{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.926099870528801, "eval_steps": 800, "global_step": 22800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.097165991902834e-09, "loss": 3.5744, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.048582995951417e-08, "loss": 3.6858, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.097165991902834e-08, "loss": 3.6628, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.214574898785425e-07, "loss": 3.3928, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.6194331983805668e-07, "loss": 3.0895, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.0242914979757083e-07, "loss": 2.6585, "step": 25 }, { "epoch": 0.0, "learning_rate": 2.42914979757085e-07, "loss": 2.1214, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.8340080971659917e-07, "loss": 1.7237, "step": 35 }, { "epoch": 0.0, "learning_rate": 3.2388663967611335e-07, "loss": 1.2235, "step": 40 }, { "epoch": 0.0, "learning_rate": 3.6437246963562754e-07, "loss": 1.1867, "step": 45 }, { "epoch": 0.0, "learning_rate": 4.0485829959514166e-07, "loss": 1.1259, "step": 50 }, { "epoch": 0.0, "learning_rate": 4.4534412955465585e-07, "loss": 1.0245, "step": 55 }, { "epoch": 0.0, "learning_rate": 4.8582995951417e-07, "loss": 0.977, "step": 60 }, { "epoch": 0.0, "learning_rate": 5.263157894736842e-07, "loss": 0.9554, "step": 65 }, { "epoch": 0.0, "learning_rate": 5.668016194331983e-07, "loss": 0.9017, "step": 70 }, { "epoch": 0.0, "learning_rate": 6.072874493927125e-07, "loss": 0.8987, "step": 75 }, { "epoch": 0.0, "learning_rate": 6.477732793522267e-07, "loss": 0.8863, "step": 80 }, { "epoch": 0.0, "learning_rate": 6.882591093117408e-07, "loss": 0.9425, "step": 85 }, { "epoch": 0.0, "learning_rate": 7.287449392712551e-07, "loss": 0.91, "step": 90 }, { "epoch": 0.0, "learning_rate": 7.692307692307693e-07, "loss": 0.848, "step": 95 }, { "epoch": 0.0, "learning_rate": 8.097165991902833e-07, "loss": 0.8213, "step": 100 }, { "epoch": 0.0, "learning_rate": 8.502024291497975e-07, "loss": 0.8434, "step": 105 }, { "epoch": 0.0, "learning_rate": 8.906882591093117e-07, "loss": 0.8409, "step": 110 }, { "epoch": 0.0, "learning_rate": 9.311740890688259e-07, "loss": 0.8398, "step": 115 }, { "epoch": 0.0, "learning_rate": 9.7165991902834e-07, "loss": 0.7942, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.0121457489878542e-06, "loss": 0.8221, "step": 125 }, { "epoch": 0.01, "learning_rate": 1.0526315789473683e-06, "loss": 0.8038, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.0931174089068826e-06, "loss": 0.803, "step": 135 }, { "epoch": 0.01, "learning_rate": 1.1336032388663967e-06, "loss": 0.7935, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.1740890688259108e-06, "loss": 0.8251, "step": 145 }, { "epoch": 0.01, "learning_rate": 1.214574898785425e-06, "loss": 0.8082, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.2550607287449393e-06, "loss": 0.7897, "step": 155 }, { "epoch": 0.01, "learning_rate": 1.2955465587044534e-06, "loss": 0.8286, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.3360323886639675e-06, "loss": 0.7742, "step": 165 }, { "epoch": 0.01, "learning_rate": 1.3765182186234816e-06, "loss": 0.787, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.4170040485829959e-06, "loss": 0.7862, "step": 175 }, { "epoch": 0.01, "learning_rate": 1.4574898785425101e-06, "loss": 0.7721, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.4979757085020242e-06, "loss": 0.7554, "step": 185 }, { "epoch": 0.01, "learning_rate": 1.5384615384615385e-06, "loss": 0.7941, "step": 190 }, { "epoch": 0.01, "learning_rate": 1.5789473684210526e-06, "loss": 0.7759, "step": 195 }, { "epoch": 0.01, "learning_rate": 1.6194331983805667e-06, "loss": 0.7249, "step": 200 }, { "epoch": 0.01, "learning_rate": 1.6599190283400807e-06, "loss": 0.7673, "step": 205 }, { "epoch": 0.01, "learning_rate": 1.700404858299595e-06, "loss": 0.7922, "step": 210 }, { "epoch": 0.01, "learning_rate": 1.7408906882591093e-06, "loss": 0.7546, "step": 215 }, { "epoch": 0.01, "learning_rate": 1.7813765182186234e-06, "loss": 0.7709, "step": 220 }, { "epoch": 0.01, "learning_rate": 1.8218623481781377e-06, "loss": 0.7383, "step": 225 }, { "epoch": 0.01, "learning_rate": 1.8623481781376518e-06, "loss": 0.7608, "step": 230 }, { "epoch": 0.01, "learning_rate": 1.9028340080971658e-06, "loss": 0.7663, "step": 235 }, { "epoch": 0.01, "learning_rate": 1.94331983805668e-06, "loss": 0.7567, "step": 240 }, { "epoch": 0.01, "learning_rate": 1.983805668016194e-06, "loss": 0.751, "step": 245 }, { "epoch": 0.01, "learning_rate": 1.9999999252295637e-06, "loss": 0.8148, "step": 250 }, { "epoch": 0.01, "learning_rate": 1.9999994682991603e-06, "loss": 0.7634, "step": 255 }, { "epoch": 0.01, "learning_rate": 1.999998595977674e-06, "loss": 0.7448, "step": 260 }, { "epoch": 0.01, "learning_rate": 1.999997308265467e-06, "loss": 0.7508, "step": 265 }, { "epoch": 0.01, "learning_rate": 1.999995605163075e-06, "loss": 0.7696, "step": 270 }, { "epoch": 0.01, "learning_rate": 1.9999934866712048e-06, "loss": 0.7676, "step": 275 }, { "epoch": 0.01, "learning_rate": 1.9999909527907367e-06, "loss": 0.7601, "step": 280 }, { "epoch": 0.01, "learning_rate": 1.9999880035227236e-06, "loss": 0.7779, "step": 285 }, { "epoch": 0.01, "learning_rate": 1.9999846388683895e-06, "loss": 0.7768, "step": 290 }, { "epoch": 0.01, "learning_rate": 1.9999808588291327e-06, "loss": 0.7713, "step": 295 }, { "epoch": 0.01, "learning_rate": 1.999976663406524e-06, "loss": 0.7666, "step": 300 }, { "epoch": 0.01, "learning_rate": 1.999972052602305e-06, "loss": 0.742, "step": 305 }, { "epoch": 0.01, "learning_rate": 1.999967026418392e-06, "loss": 0.783, "step": 310 }, { "epoch": 0.01, "learning_rate": 1.999961584856872e-06, "loss": 0.7269, "step": 315 }, { "epoch": 0.01, "learning_rate": 1.9999557279200056e-06, "loss": 0.7336, "step": 320 }, { "epoch": 0.01, "learning_rate": 1.9999494556102263e-06, "loss": 0.7072, "step": 325 }, { "epoch": 0.01, "learning_rate": 1.9999427679301387e-06, "loss": 0.7709, "step": 330 }, { "epoch": 0.01, "learning_rate": 1.999935664882522e-06, "loss": 0.7237, "step": 335 }, { "epoch": 0.01, "learning_rate": 1.9999281464703247e-06, "loss": 0.719, "step": 340 }, { "epoch": 0.01, "learning_rate": 1.999920212696672e-06, "loss": 0.748, "step": 345 }, { "epoch": 0.01, "learning_rate": 1.999911863564859e-06, "loss": 0.7167, "step": 350 }, { "epoch": 0.01, "learning_rate": 1.9999030990783527e-06, "loss": 0.7151, "step": 355 }, { "epoch": 0.01, "learning_rate": 1.999893919240795e-06, "loss": 0.7095, "step": 360 }, { "epoch": 0.01, "learning_rate": 1.9998843240559986e-06, "loss": 0.7703, "step": 365 }, { "epoch": 0.02, "learning_rate": 1.9998743135279497e-06, "loss": 0.7456, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.999863887660806e-06, "loss": 0.7532, "step": 375 }, { "epoch": 0.02, "learning_rate": 1.999853046458899e-06, "loss": 0.7014, "step": 380 }, { "epoch": 0.02, "learning_rate": 1.9998417899267313e-06, "loss": 0.7629, "step": 385 }, { "epoch": 0.02, "learning_rate": 1.999830118068979e-06, "loss": 0.7329, "step": 390 }, { "epoch": 0.02, "learning_rate": 1.999818030890491e-06, "loss": 0.723, "step": 395 }, { "epoch": 0.02, "learning_rate": 1.999805528396288e-06, "loss": 0.7549, "step": 400 }, { "epoch": 0.02, "learning_rate": 1.9997926105915627e-06, "loss": 0.7121, "step": 405 }, { "epoch": 0.02, "learning_rate": 1.999779277481682e-06, "loss": 0.7506, "step": 410 }, { "epoch": 0.02, "learning_rate": 1.9997655290721834e-06, "loss": 0.7284, "step": 415 }, { "epoch": 0.02, "learning_rate": 1.9997513653687786e-06, "loss": 0.7344, "step": 420 }, { "epoch": 0.02, "learning_rate": 1.999736786377351e-06, "loss": 0.7684, "step": 425 }, { "epoch": 0.02, "learning_rate": 1.9997217921039567e-06, "loss": 0.7427, "step": 430 }, { "epoch": 0.02, "learning_rate": 1.9997063825548237e-06, "loss": 0.7139, "step": 435 }, { "epoch": 0.02, "learning_rate": 1.9996905577363533e-06, "loss": 0.761, "step": 440 }, { "epoch": 0.02, "learning_rate": 1.9996743176551186e-06, "loss": 0.7545, "step": 445 }, { "epoch": 0.02, "learning_rate": 1.999657662317866e-06, "loss": 0.7431, "step": 450 }, { "epoch": 0.02, "learning_rate": 1.999640591731515e-06, "loss": 0.7225, "step": 455 }, { "epoch": 0.02, "learning_rate": 1.999623105903154e-06, "loss": 0.7284, "step": 460 }, { "epoch": 0.02, "learning_rate": 1.999605204840049e-06, "loss": 0.76, "step": 465 }, { "epoch": 0.02, "learning_rate": 1.9995868885496343e-06, "loss": 0.7413, "step": 470 }, { "epoch": 0.02, "learning_rate": 1.9995681570395195e-06, "loss": 0.7837, "step": 475 }, { "epoch": 0.02, "learning_rate": 1.9995490103174847e-06, "loss": 0.7347, "step": 480 }, { "epoch": 0.02, "learning_rate": 1.999529448391483e-06, "loss": 0.7576, "step": 485 }, { "epoch": 0.02, "learning_rate": 1.9995094712696413e-06, "loss": 0.7665, "step": 490 }, { "epoch": 0.02, "learning_rate": 1.9994890789602576e-06, "loss": 0.7353, "step": 495 }, { "epoch": 0.02, "learning_rate": 1.999468271471802e-06, "loss": 0.7344, "step": 500 }, { "epoch": 0.02, "learning_rate": 1.9994470488129185e-06, "loss": 0.7476, "step": 505 }, { "epoch": 0.02, "learning_rate": 1.9994254109924223e-06, "loss": 0.7257, "step": 510 }, { "epoch": 0.02, "learning_rate": 1.9994033580193017e-06, "loss": 0.7306, "step": 515 }, { "epoch": 0.02, "learning_rate": 1.999380889902718e-06, "loss": 0.7115, "step": 520 }, { "epoch": 0.02, "learning_rate": 1.9993580066520034e-06, "loss": 0.7452, "step": 525 }, { "epoch": 0.02, "learning_rate": 1.9993347082766636e-06, "loss": 0.7523, "step": 530 }, { "epoch": 0.02, "learning_rate": 1.9993109947863764e-06, "loss": 0.7091, "step": 535 }, { "epoch": 0.02, "learning_rate": 1.999286866190993e-06, "loss": 0.7383, "step": 540 }, { "epoch": 0.02, "learning_rate": 1.999262322500535e-06, "loss": 0.7043, "step": 545 }, { "epoch": 0.02, "learning_rate": 1.9992373637251982e-06, "loss": 0.7098, "step": 550 }, { "epoch": 0.02, "learning_rate": 1.999211989875351e-06, "loss": 0.7142, "step": 555 }, { "epoch": 0.02, "learning_rate": 1.999186200961532e-06, "loss": 0.7424, "step": 560 }, { "epoch": 0.02, "learning_rate": 1.9991599969944552e-06, "loss": 0.7348, "step": 565 }, { "epoch": 0.02, "learning_rate": 1.9991333779850043e-06, "loss": 0.7126, "step": 570 }, { "epoch": 0.02, "learning_rate": 1.999106343944237e-06, "loss": 0.7341, "step": 575 }, { "epoch": 0.02, "learning_rate": 1.9990788948833833e-06, "loss": 0.7445, "step": 580 }, { "epoch": 0.02, "learning_rate": 1.999051030813845e-06, "loss": 0.7181, "step": 585 }, { "epoch": 0.02, "learning_rate": 1.999022751747197e-06, "loss": 0.7295, "step": 590 }, { "epoch": 0.02, "learning_rate": 1.998994057695185e-06, "loss": 0.7159, "step": 595 }, { "epoch": 0.02, "learning_rate": 1.99896494866973e-06, "loss": 0.6844, "step": 600 }, { "epoch": 0.02, "learning_rate": 1.9989354246829222e-06, "loss": 0.7511, "step": 605 }, { "epoch": 0.02, "learning_rate": 1.9989054857470267e-06, "loss": 0.7322, "step": 610 }, { "epoch": 0.02, "learning_rate": 1.9988751318744787e-06, "loss": 0.7829, "step": 615 }, { "epoch": 0.03, "learning_rate": 1.998844363077888e-06, "loss": 0.7229, "step": 620 }, { "epoch": 0.03, "learning_rate": 1.998813179370035e-06, "loss": 0.738, "step": 625 }, { "epoch": 0.03, "learning_rate": 1.9987815807638733e-06, "loss": 0.6934, "step": 630 }, { "epoch": 0.03, "learning_rate": 1.9987495672725294e-06, "loss": 0.7005, "step": 635 }, { "epoch": 0.03, "learning_rate": 1.9987171389093e-06, "loss": 0.7692, "step": 640 }, { "epoch": 0.03, "learning_rate": 1.998684295687657e-06, "loss": 0.7101, "step": 645 }, { "epoch": 0.03, "learning_rate": 1.998651037621242e-06, "loss": 0.7813, "step": 650 }, { "epoch": 0.03, "learning_rate": 1.9986173647238715e-06, "loss": 0.7526, "step": 655 }, { "epoch": 0.03, "learning_rate": 1.9985832770095313e-06, "loss": 0.7235, "step": 660 }, { "epoch": 0.03, "learning_rate": 1.998548774492382e-06, "loss": 0.7201, "step": 665 }, { "epoch": 0.03, "learning_rate": 1.9985138571867557e-06, "loss": 0.7303, "step": 670 }, { "epoch": 0.03, "learning_rate": 1.998478525107157e-06, "loss": 0.7375, "step": 675 }, { "epoch": 0.03, "learning_rate": 1.998442778268262e-06, "loss": 0.7123, "step": 680 }, { "epoch": 0.03, "learning_rate": 1.99840661668492e-06, "loss": 0.7541, "step": 685 }, { "epoch": 0.03, "learning_rate": 1.998370040372151e-06, "loss": 0.7685, "step": 690 }, { "epoch": 0.03, "learning_rate": 1.99833304934515e-06, "loss": 0.7029, "step": 695 }, { "epoch": 0.03, "learning_rate": 1.9982956436192827e-06, "loss": 0.7797, "step": 700 }, { "epoch": 0.03, "learning_rate": 1.9982578232100866e-06, "loss": 0.7326, "step": 705 }, { "epoch": 0.03, "learning_rate": 1.9982195881332714e-06, "loss": 0.773, "step": 710 }, { "epoch": 0.03, "learning_rate": 1.9981809384047207e-06, "loss": 0.741, "step": 715 }, { "epoch": 0.03, "learning_rate": 1.9981418740404886e-06, "loss": 0.7518, "step": 720 }, { "epoch": 0.03, "learning_rate": 1.998102395056802e-06, "loss": 0.7338, "step": 725 }, { "epoch": 0.03, "learning_rate": 1.998062501470061e-06, "loss": 0.7192, "step": 730 }, { "epoch": 0.03, "learning_rate": 1.998022193296836e-06, "loss": 0.7429, "step": 735 }, { "epoch": 0.03, "learning_rate": 1.9979814705538715e-06, "loss": 0.6953, "step": 740 }, { "epoch": 0.03, "learning_rate": 1.997940333258083e-06, "loss": 0.7265, "step": 745 }, { "epoch": 0.03, "learning_rate": 1.9978987814265583e-06, "loss": 0.7105, "step": 750 }, { "epoch": 0.03, "learning_rate": 1.997856815076558e-06, "loss": 0.6994, "step": 755 }, { "epoch": 0.03, "learning_rate": 1.9978144342255147e-06, "loss": 0.7008, "step": 760 }, { "epoch": 0.03, "learning_rate": 1.9977716388910325e-06, "loss": 0.7301, "step": 765 }, { "epoch": 0.03, "learning_rate": 1.997728429090889e-06, "loss": 0.7662, "step": 770 }, { "epoch": 0.03, "learning_rate": 1.9976848048430323e-06, "loss": 0.7428, "step": 775 }, { "epoch": 0.03, "learning_rate": 1.9976407661655844e-06, "loss": 0.706, "step": 780 }, { "epoch": 0.03, "learning_rate": 1.997596313076838e-06, "loss": 0.6853, "step": 785 }, { "epoch": 0.03, "learning_rate": 1.9975514455952584e-06, "loss": 0.7363, "step": 790 }, { "epoch": 0.03, "learning_rate": 1.9975061637394834e-06, "loss": 0.7217, "step": 795 }, { "epoch": 0.03, "learning_rate": 1.997460467528323e-06, "loss": 0.7161, "step": 800 }, { "epoch": 0.03, "eval_loss": 0.6896045207977295, "eval_runtime": 140.4315, "eval_samples_per_second": 16.848, "eval_steps_per_second": 2.813, "step": 800 }, { "epoch": 0.03, "learning_rate": 1.997414356980759e-06, "loss": 0.7911, "step": 805 }, { "epoch": 0.03, "learning_rate": 1.9973678321159443e-06, "loss": 0.7037, "step": 810 }, { "epoch": 0.03, "learning_rate": 1.9973208929532063e-06, "loss": 0.7083, "step": 815 }, { "epoch": 0.03, "learning_rate": 1.9972735395120418e-06, "loss": 0.7183, "step": 820 }, { "epoch": 0.03, "learning_rate": 1.997225771812122e-06, "loss": 0.7227, "step": 825 }, { "epoch": 0.03, "learning_rate": 1.9971775898732893e-06, "loss": 0.7271, "step": 830 }, { "epoch": 0.03, "learning_rate": 1.9971289937155577e-06, "loss": 0.7271, "step": 835 }, { "epoch": 0.03, "learning_rate": 1.997079983359113e-06, "loss": 0.7065, "step": 840 }, { "epoch": 0.03, "learning_rate": 1.9970305588243145e-06, "loss": 0.706, "step": 845 }, { "epoch": 0.03, "learning_rate": 1.9969807201316925e-06, "loss": 0.7, "step": 850 }, { "epoch": 0.03, "learning_rate": 1.9969304673019494e-06, "loss": 0.7165, "step": 855 }, { "epoch": 0.03, "learning_rate": 1.99687980035596e-06, "loss": 0.729, "step": 860 }, { "epoch": 0.04, "learning_rate": 1.996828719314771e-06, "loss": 0.7199, "step": 865 }, { "epoch": 0.04, "learning_rate": 1.996777224199601e-06, "loss": 0.7041, "step": 870 }, { "epoch": 0.04, "learning_rate": 1.99672531503184e-06, "loss": 0.735, "step": 875 }, { "epoch": 0.04, "learning_rate": 1.996672991833051e-06, "loss": 0.7153, "step": 880 }, { "epoch": 0.04, "learning_rate": 1.996620254624969e-06, "loss": 0.714, "step": 885 }, { "epoch": 0.04, "learning_rate": 1.9965671034295e-06, "loss": 0.7309, "step": 890 }, { "epoch": 0.04, "learning_rate": 1.996513538268723e-06, "loss": 0.7808, "step": 895 }, { "epoch": 0.04, "learning_rate": 1.9964595591648883e-06, "loss": 0.7407, "step": 900 }, { "epoch": 0.04, "learning_rate": 1.9964051661404185e-06, "loss": 0.6831, "step": 905 }, { "epoch": 0.04, "learning_rate": 1.9963503592179078e-06, "loss": 0.7178, "step": 910 }, { "epoch": 0.04, "learning_rate": 1.996295138420122e-06, "loss": 0.7607, "step": 915 }, { "epoch": 0.04, "learning_rate": 1.9962395037700007e-06, "loss": 0.747, "step": 920 }, { "epoch": 0.04, "learning_rate": 1.996183455290653e-06, "loss": 0.6911, "step": 925 }, { "epoch": 0.04, "learning_rate": 1.996126993005361e-06, "loss": 0.7038, "step": 930 }, { "epoch": 0.04, "learning_rate": 1.996070116937579e-06, "loss": 0.7195, "step": 935 }, { "epoch": 0.04, "learning_rate": 1.9960128271109326e-06, "loss": 0.6974, "step": 940 }, { "epoch": 0.04, "learning_rate": 1.9959551235492195e-06, "loss": 0.7399, "step": 945 }, { "epoch": 0.04, "learning_rate": 1.9958970062764095e-06, "loss": 0.7475, "step": 950 }, { "epoch": 0.04, "learning_rate": 1.9958384753166437e-06, "loss": 0.7091, "step": 955 }, { "epoch": 0.04, "learning_rate": 1.995779530694236e-06, "loss": 0.6908, "step": 960 }, { "epoch": 0.04, "learning_rate": 1.9957201724336704e-06, "loss": 0.7052, "step": 965 }, { "epoch": 0.04, "learning_rate": 1.9956604005596043e-06, "loss": 0.6963, "step": 970 }, { "epoch": 0.04, "learning_rate": 1.9956002150968667e-06, "loss": 0.7064, "step": 975 }, { "epoch": 0.04, "learning_rate": 1.9955396160704582e-06, "loss": 0.6804, "step": 980 }, { "epoch": 0.04, "learning_rate": 1.99547860350555e-06, "loss": 0.6759, "step": 985 }, { "epoch": 0.04, "learning_rate": 1.995417177427488e-06, "loss": 0.7175, "step": 990 }, { "epoch": 0.04, "learning_rate": 1.9953553378617866e-06, "loss": 0.6926, "step": 995 }, { "epoch": 0.04, "learning_rate": 1.995293084834134e-06, "loss": 0.7109, "step": 1000 }, { "epoch": 0.04, "learning_rate": 1.9952304183703893e-06, "loss": 0.7129, "step": 1005 }, { "epoch": 0.04, "learning_rate": 1.9951673384965835e-06, "loss": 0.7117, "step": 1010 }, { "epoch": 0.04, "learning_rate": 1.99510384523892e-06, "loss": 0.7694, "step": 1015 }, { "epoch": 0.04, "learning_rate": 1.995039938623773e-06, "loss": 0.7381, "step": 1020 }, { "epoch": 0.04, "learning_rate": 1.9949756186776893e-06, "loss": 0.722, "step": 1025 }, { "epoch": 0.04, "learning_rate": 1.9949108854273855e-06, "loss": 0.7288, "step": 1030 }, { "epoch": 0.04, "learning_rate": 1.9948457388997528e-06, "loss": 0.7045, "step": 1035 }, { "epoch": 0.04, "learning_rate": 1.994780179121851e-06, "loss": 0.7623, "step": 1040 }, { "epoch": 0.04, "learning_rate": 1.994714206120914e-06, "loss": 0.725, "step": 1045 }, { "epoch": 0.04, "learning_rate": 1.9946478199243466e-06, "loss": 0.7203, "step": 1050 }, { "epoch": 0.04, "learning_rate": 1.9945810205597246e-06, "loss": 0.7011, "step": 1055 }, { "epoch": 0.04, "learning_rate": 1.9945138080547957e-06, "loss": 0.6946, "step": 1060 }, { "epoch": 0.04, "learning_rate": 1.99444618243748e-06, "loss": 0.7151, "step": 1065 }, { "epoch": 0.04, "learning_rate": 1.994378143735868e-06, "loss": 0.7074, "step": 1070 }, { "epoch": 0.04, "learning_rate": 1.9943096919782225e-06, "loss": 0.7, "step": 1075 }, { "epoch": 0.04, "learning_rate": 1.994240827192978e-06, "loss": 0.6957, "step": 1080 }, { "epoch": 0.04, "learning_rate": 1.9941715494087408e-06, "loss": 0.7348, "step": 1085 }, { "epoch": 0.04, "learning_rate": 1.9941018586542866e-06, "loss": 0.6984, "step": 1090 }, { "epoch": 0.04, "learning_rate": 1.9940317549585665e-06, "loss": 0.7252, "step": 1095 }, { "epoch": 0.04, "learning_rate": 1.9939612383506993e-06, "loss": 0.7706, "step": 1100 }, { "epoch": 0.04, "learning_rate": 1.993890308859978e-06, "loss": 0.7261, "step": 1105 }, { "epoch": 0.05, "learning_rate": 1.9938189665158654e-06, "loss": 0.6879, "step": 1110 }, { "epoch": 0.05, "learning_rate": 1.9937472113479966e-06, "loss": 0.7088, "step": 1115 }, { "epoch": 0.05, "learning_rate": 1.9936750433861787e-06, "loss": 0.7428, "step": 1120 }, { "epoch": 0.05, "learning_rate": 1.993602462660389e-06, "loss": 0.7111, "step": 1125 }, { "epoch": 0.05, "learning_rate": 1.993529469200777e-06, "loss": 0.7027, "step": 1130 }, { "epoch": 0.05, "learning_rate": 1.993456063037664e-06, "loss": 0.6969, "step": 1135 }, { "epoch": 0.05, "learning_rate": 1.9933822442015416e-06, "loss": 0.7343, "step": 1140 }, { "epoch": 0.05, "learning_rate": 1.993308012723074e-06, "loss": 0.7174, "step": 1145 }, { "epoch": 0.05, "learning_rate": 1.993233368633096e-06, "loss": 0.6997, "step": 1150 }, { "epoch": 0.05, "learning_rate": 1.993158311962614e-06, "loss": 0.693, "step": 1155 }, { "epoch": 0.05, "learning_rate": 1.9930828427428066e-06, "loss": 0.7136, "step": 1160 }, { "epoch": 0.05, "learning_rate": 1.9930069610050224e-06, "loss": 0.7211, "step": 1165 }, { "epoch": 0.05, "learning_rate": 1.9929306667807823e-06, "loss": 0.7144, "step": 1170 }, { "epoch": 0.05, "learning_rate": 1.992853960101778e-06, "loss": 0.6787, "step": 1175 }, { "epoch": 0.05, "learning_rate": 1.9927768409998733e-06, "loss": 0.7348, "step": 1180 }, { "epoch": 0.05, "learning_rate": 1.992699309507102e-06, "loss": 0.6718, "step": 1185 }, { "epoch": 0.05, "learning_rate": 1.992621365655671e-06, "loss": 0.7146, "step": 1190 }, { "epoch": 0.05, "learning_rate": 1.9925430094779566e-06, "loss": 0.6982, "step": 1195 }, { "epoch": 0.05, "learning_rate": 1.9924642410065075e-06, "loss": 0.7379, "step": 1200 }, { "epoch": 0.05, "learning_rate": 1.992385060274044e-06, "loss": 0.6983, "step": 1205 }, { "epoch": 0.05, "learning_rate": 1.9923054673134564e-06, "loss": 0.7893, "step": 1210 }, { "epoch": 0.05, "learning_rate": 1.992225462157807e-06, "loss": 0.714, "step": 1215 }, { "epoch": 0.05, "learning_rate": 1.99214504484033e-06, "loss": 0.7394, "step": 1220 }, { "epoch": 0.05, "learning_rate": 1.9920642153944288e-06, "loss": 0.7238, "step": 1225 }, { "epoch": 0.05, "learning_rate": 1.9919829738536806e-06, "loss": 0.6847, "step": 1230 }, { "epoch": 0.05, "learning_rate": 1.991901320251831e-06, "loss": 0.6936, "step": 1235 }, { "epoch": 0.05, "learning_rate": 1.9918192546227995e-06, "loss": 0.7271, "step": 1240 }, { "epoch": 0.05, "learning_rate": 1.991736777000675e-06, "loss": 0.7416, "step": 1245 }, { "epoch": 0.05, "learning_rate": 1.9916538874197176e-06, "loss": 0.7637, "step": 1250 }, { "epoch": 0.05, "learning_rate": 1.9915705859143594e-06, "loss": 0.6722, "step": 1255 }, { "epoch": 0.05, "learning_rate": 1.9914868725192025e-06, "loss": 0.6943, "step": 1260 }, { "epoch": 0.05, "learning_rate": 1.991402747269022e-06, "loss": 0.7433, "step": 1265 }, { "epoch": 0.05, "learning_rate": 1.991318210198761e-06, "loss": 0.7015, "step": 1270 }, { "epoch": 0.05, "learning_rate": 1.991233261343537e-06, "loss": 0.6772, "step": 1275 }, { "epoch": 0.05, "learning_rate": 1.9911479007386364e-06, "loss": 0.7278, "step": 1280 }, { "epoch": 0.05, "learning_rate": 1.991062128419517e-06, "loss": 0.7471, "step": 1285 }, { "epoch": 0.05, "learning_rate": 1.9909759444218085e-06, "loss": 0.7234, "step": 1290 }, { "epoch": 0.05, "learning_rate": 1.9908893487813106e-06, "loss": 0.7118, "step": 1295 }, { "epoch": 0.05, "learning_rate": 1.990802341533994e-06, "loss": 0.7351, "step": 1300 }, { "epoch": 0.05, "learning_rate": 1.9907149227160016e-06, "loss": 0.7084, "step": 1305 }, { "epoch": 0.05, "learning_rate": 1.9906270923636457e-06, "loss": 0.7174, "step": 1310 }, { "epoch": 0.05, "learning_rate": 1.9905388505134107e-06, "loss": 0.6935, "step": 1315 }, { "epoch": 0.05, "learning_rate": 1.990450197201951e-06, "loss": 0.7004, "step": 1320 }, { "epoch": 0.05, "learning_rate": 1.990361132466093e-06, "loss": 0.7077, "step": 1325 }, { "epoch": 0.05, "learning_rate": 1.9902716563428335e-06, "loss": 0.7226, "step": 1330 }, { "epoch": 0.05, "learning_rate": 1.9901817688693395e-06, "loss": 0.7025, "step": 1335 }, { "epoch": 0.05, "learning_rate": 1.99009147008295e-06, "loss": 0.7139, "step": 1340 }, { "epoch": 0.05, "learning_rate": 1.9900007600211735e-06, "loss": 0.6609, "step": 1345 }, { "epoch": 0.05, "learning_rate": 1.9899096387216914e-06, "loss": 0.7452, "step": 1350 }, { "epoch": 0.06, "learning_rate": 1.9898181062223536e-06, "loss": 0.7111, "step": 1355 }, { "epoch": 0.06, "learning_rate": 1.9897261625611822e-06, "loss": 0.6925, "step": 1360 }, { "epoch": 0.06, "learning_rate": 1.9896338077763704e-06, "loss": 0.7097, "step": 1365 }, { "epoch": 0.06, "learning_rate": 1.989541041906281e-06, "loss": 0.7146, "step": 1370 }, { "epoch": 0.06, "learning_rate": 1.9894478649894484e-06, "loss": 0.704, "step": 1375 }, { "epoch": 0.06, "learning_rate": 1.989354277064577e-06, "loss": 0.7466, "step": 1380 }, { "epoch": 0.06, "learning_rate": 1.9892602781705427e-06, "loss": 0.6958, "step": 1385 }, { "epoch": 0.06, "learning_rate": 1.9891658683463922e-06, "loss": 0.7421, "step": 1390 }, { "epoch": 0.06, "learning_rate": 1.989071047631342e-06, "loss": 0.6658, "step": 1395 }, { "epoch": 0.06, "learning_rate": 1.98897581606478e-06, "loss": 0.6868, "step": 1400 }, { "epoch": 0.06, "learning_rate": 1.988880173686265e-06, "loss": 0.7437, "step": 1405 }, { "epoch": 0.06, "learning_rate": 1.988784120535525e-06, "loss": 0.7484, "step": 1410 }, { "epoch": 0.06, "learning_rate": 1.988687656652461e-06, "loss": 0.7063, "step": 1415 }, { "epoch": 0.06, "learning_rate": 1.9885907820771415e-06, "loss": 0.713, "step": 1420 }, { "epoch": 0.06, "learning_rate": 1.988493496849809e-06, "loss": 0.7313, "step": 1425 }, { "epoch": 0.06, "learning_rate": 1.9883958010108736e-06, "loss": 0.6987, "step": 1430 }, { "epoch": 0.06, "learning_rate": 1.9882976946009186e-06, "loss": 0.7089, "step": 1435 }, { "epoch": 0.06, "learning_rate": 1.9881991776606956e-06, "loss": 0.6492, "step": 1440 }, { "epoch": 0.06, "learning_rate": 1.9881002502311285e-06, "loss": 0.6538, "step": 1445 }, { "epoch": 0.06, "learning_rate": 1.9880009123533095e-06, "loss": 0.7096, "step": 1450 }, { "epoch": 0.06, "learning_rate": 1.9879011640685043e-06, "loss": 0.7329, "step": 1455 }, { "epoch": 0.06, "learning_rate": 1.9878010054181463e-06, "loss": 0.7414, "step": 1460 }, { "epoch": 0.06, "learning_rate": 1.9877004364438414e-06, "loss": 0.7089, "step": 1465 }, { "epoch": 0.06, "learning_rate": 1.987599457187365e-06, "loss": 0.738, "step": 1470 }, { "epoch": 0.06, "learning_rate": 1.9874980676906617e-06, "loss": 0.7078, "step": 1475 }, { "epoch": 0.06, "learning_rate": 1.9873962679958494e-06, "loss": 0.6987, "step": 1480 }, { "epoch": 0.06, "learning_rate": 1.987294058145214e-06, "loss": 0.7456, "step": 1485 }, { "epoch": 0.06, "learning_rate": 1.987191438181213e-06, "loss": 0.7402, "step": 1490 }, { "epoch": 0.06, "learning_rate": 1.987088408146473e-06, "loss": 0.7308, "step": 1495 }, { "epoch": 0.06, "learning_rate": 1.986984968083793e-06, "loss": 0.7197, "step": 1500 }, { "epoch": 0.06, "learning_rate": 1.9868811180361402e-06, "loss": 0.7386, "step": 1505 }, { "epoch": 0.06, "learning_rate": 1.9867768580466536e-06, "loss": 0.7024, "step": 1510 }, { "epoch": 0.06, "learning_rate": 1.986672188158641e-06, "loss": 0.7241, "step": 1515 }, { "epoch": 0.06, "learning_rate": 1.9865671084155826e-06, "loss": 0.7006, "step": 1520 }, { "epoch": 0.06, "learning_rate": 1.986461618861127e-06, "loss": 0.7035, "step": 1525 }, { "epoch": 0.06, "learning_rate": 1.986355719539093e-06, "loss": 0.723, "step": 1530 }, { "epoch": 0.06, "learning_rate": 1.9862494104934717e-06, "loss": 0.7184, "step": 1535 }, { "epoch": 0.06, "learning_rate": 1.9861426917684214e-06, "loss": 0.7018, "step": 1540 }, { "epoch": 0.06, "learning_rate": 1.986035563408273e-06, "loss": 0.6943, "step": 1545 }, { "epoch": 0.06, "learning_rate": 1.9859280254575268e-06, "loss": 0.7434, "step": 1550 }, { "epoch": 0.06, "learning_rate": 1.9858200779608526e-06, "loss": 0.7122, "step": 1555 }, { "epoch": 0.06, "learning_rate": 1.9857117209630913e-06, "loss": 0.7187, "step": 1560 }, { "epoch": 0.06, "learning_rate": 1.9856029545092536e-06, "loss": 0.6825, "step": 1565 }, { "epoch": 0.06, "learning_rate": 1.985493778644519e-06, "loss": 0.6964, "step": 1570 }, { "epoch": 0.06, "learning_rate": 1.9853841934142396e-06, "loss": 0.7437, "step": 1575 }, { "epoch": 0.06, "learning_rate": 1.9852741988639356e-06, "loss": 0.7125, "step": 1580 }, { "epoch": 0.06, "learning_rate": 1.9851637950392974e-06, "loss": 0.7241, "step": 1585 }, { "epoch": 0.06, "learning_rate": 1.9850529819861863e-06, "loss": 0.7113, "step": 1590 }, { "epoch": 0.06, "learning_rate": 1.984941759750633e-06, "loss": 0.6725, "step": 1595 }, { "epoch": 0.06, "learning_rate": 1.984830128378838e-06, "loss": 0.7166, "step": 1600 }, { "epoch": 0.06, "eval_loss": 0.6776626706123352, "eval_runtime": 140.3492, "eval_samples_per_second": 16.858, "eval_steps_per_second": 2.814, "step": 1600 }, { "epoch": 0.07, "learning_rate": 1.9847180879171727e-06, "loss": 0.7111, "step": 1605 }, { "epoch": 0.07, "learning_rate": 1.9846056384121768e-06, "loss": 0.7004, "step": 1610 }, { "epoch": 0.07, "learning_rate": 1.9844927799105612e-06, "loss": 0.7221, "step": 1615 }, { "epoch": 0.07, "learning_rate": 1.984379512459207e-06, "loss": 0.7363, "step": 1620 }, { "epoch": 0.07, "learning_rate": 1.984265836105163e-06, "loss": 0.7107, "step": 1625 }, { "epoch": 0.07, "learning_rate": 1.9841517508956506e-06, "loss": 0.7081, "step": 1630 }, { "epoch": 0.07, "learning_rate": 1.9840372568780594e-06, "loss": 0.6796, "step": 1635 }, { "epoch": 0.07, "learning_rate": 1.9839223540999496e-06, "loss": 0.7207, "step": 1640 }, { "epoch": 0.07, "learning_rate": 1.9838070426090505e-06, "loss": 0.716, "step": 1645 }, { "epoch": 0.07, "learning_rate": 1.983691322453261e-06, "loss": 0.7306, "step": 1650 }, { "epoch": 0.07, "learning_rate": 1.983575193680651e-06, "loss": 0.724, "step": 1655 }, { "epoch": 0.07, "learning_rate": 1.983458656339459e-06, "loss": 0.7447, "step": 1660 }, { "epoch": 0.07, "learning_rate": 1.9833417104780942e-06, "loss": 0.6929, "step": 1665 }, { "epoch": 0.07, "learning_rate": 1.9832243561451346e-06, "loss": 0.7228, "step": 1670 }, { "epoch": 0.07, "learning_rate": 1.9831065933893275e-06, "loss": 0.6824, "step": 1675 }, { "epoch": 0.07, "learning_rate": 1.982988422259591e-06, "loss": 0.7056, "step": 1680 }, { "epoch": 0.07, "learning_rate": 1.9828698428050123e-06, "loss": 0.6943, "step": 1685 }, { "epoch": 0.07, "learning_rate": 1.982750855074849e-06, "loss": 0.7101, "step": 1690 }, { "epoch": 0.07, "learning_rate": 1.9826314591185263e-06, "loss": 0.6786, "step": 1695 }, { "epoch": 0.07, "learning_rate": 1.9825116549856408e-06, "loss": 0.6954, "step": 1700 }, { "epoch": 0.07, "learning_rate": 1.9823914427259584e-06, "loss": 0.7165, "step": 1705 }, { "epoch": 0.07, "learning_rate": 1.982270822389414e-06, "loss": 0.7208, "step": 1710 }, { "epoch": 0.07, "learning_rate": 1.9821497940261124e-06, "loss": 0.6981, "step": 1715 }, { "epoch": 0.07, "learning_rate": 1.982028357686327e-06, "loss": 0.6914, "step": 1720 }, { "epoch": 0.07, "learning_rate": 1.9819065134205026e-06, "loss": 0.7291, "step": 1725 }, { "epoch": 0.07, "learning_rate": 1.9817842612792513e-06, "loss": 0.6882, "step": 1730 }, { "epoch": 0.07, "learning_rate": 1.981661601313356e-06, "loss": 0.685, "step": 1735 }, { "epoch": 0.07, "learning_rate": 1.981538533573768e-06, "loss": 0.6954, "step": 1740 }, { "epoch": 0.07, "learning_rate": 1.9814150581116093e-06, "loss": 0.7104, "step": 1745 }, { "epoch": 0.07, "learning_rate": 1.9812911749781705e-06, "loss": 0.7026, "step": 1750 }, { "epoch": 0.07, "learning_rate": 1.981166884224911e-06, "loss": 0.6907, "step": 1755 }, { "epoch": 0.07, "learning_rate": 1.981042185903461e-06, "loss": 0.6988, "step": 1760 }, { "epoch": 0.07, "learning_rate": 1.980917080065618e-06, "loss": 0.6894, "step": 1765 }, { "epoch": 0.07, "learning_rate": 1.98079156676335e-06, "loss": 0.7308, "step": 1770 }, { "epoch": 0.07, "learning_rate": 1.9806656460487955e-06, "loss": 0.6688, "step": 1775 }, { "epoch": 0.07, "learning_rate": 1.9805393179742596e-06, "loss": 0.7028, "step": 1780 }, { "epoch": 0.07, "learning_rate": 1.980412582592218e-06, "loss": 0.6982, "step": 1785 }, { "epoch": 0.07, "learning_rate": 1.980285439955316e-06, "loss": 0.7326, "step": 1790 }, { "epoch": 0.07, "learning_rate": 1.980157890116367e-06, "loss": 0.7204, "step": 1795 }, { "epoch": 0.07, "learning_rate": 1.980029933128354e-06, "loss": 0.7016, "step": 1800 }, { "epoch": 0.07, "learning_rate": 1.9799015690444302e-06, "loss": 0.7076, "step": 1805 }, { "epoch": 0.07, "learning_rate": 1.9797727979179156e-06, "loss": 0.7121, "step": 1810 }, { "epoch": 0.07, "learning_rate": 1.9796436198023016e-06, "loss": 0.7204, "step": 1815 }, { "epoch": 0.07, "learning_rate": 1.9795140347512472e-06, "loss": 0.7178, "step": 1820 }, { "epoch": 0.07, "learning_rate": 1.979384042818581e-06, "loss": 0.7223, "step": 1825 }, { "epoch": 0.07, "learning_rate": 1.979253644058301e-06, "loss": 0.7066, "step": 1830 }, { "epoch": 0.07, "learning_rate": 1.979122838524573e-06, "loss": 0.6873, "step": 1835 }, { "epoch": 0.07, "learning_rate": 1.9789916262717328e-06, "loss": 0.6822, "step": 1840 }, { "epoch": 0.07, "learning_rate": 1.9788600073542848e-06, "loss": 0.6947, "step": 1845 }, { "epoch": 0.08, "learning_rate": 1.978727981826902e-06, "loss": 0.7092, "step": 1850 }, { "epoch": 0.08, "learning_rate": 1.978595549744427e-06, "loss": 0.7166, "step": 1855 }, { "epoch": 0.08, "learning_rate": 1.9784627111618715e-06, "loss": 0.6842, "step": 1860 }, { "epoch": 0.08, "learning_rate": 1.9783294661344145e-06, "loss": 0.7161, "step": 1865 }, { "epoch": 0.08, "learning_rate": 1.978195814717405e-06, "loss": 0.6881, "step": 1870 }, { "epoch": 0.08, "learning_rate": 1.978061756966361e-06, "loss": 0.7342, "step": 1875 }, { "epoch": 0.08, "learning_rate": 1.977927292936969e-06, "loss": 0.6767, "step": 1880 }, { "epoch": 0.08, "learning_rate": 1.9777924226850842e-06, "loss": 0.7096, "step": 1885 }, { "epoch": 0.08, "learning_rate": 1.97765714626673e-06, "loss": 0.694, "step": 1890 }, { "epoch": 0.08, "learning_rate": 1.977521463738099e-06, "loss": 0.7152, "step": 1895 }, { "epoch": 0.08, "learning_rate": 1.9773853751555537e-06, "loss": 0.6618, "step": 1900 }, { "epoch": 0.08, "learning_rate": 1.977248880575623e-06, "loss": 0.689, "step": 1905 }, { "epoch": 0.08, "learning_rate": 1.9771119800550054e-06, "loss": 0.6892, "step": 1910 }, { "epoch": 0.08, "learning_rate": 1.9769746736505694e-06, "loss": 0.7179, "step": 1915 }, { "epoch": 0.08, "learning_rate": 1.97683696141935e-06, "loss": 0.6888, "step": 1920 }, { "epoch": 0.08, "learning_rate": 1.9766988434185514e-06, "loss": 0.7041, "step": 1925 }, { "epoch": 0.08, "learning_rate": 1.976560319705547e-06, "loss": 0.6969, "step": 1930 }, { "epoch": 0.08, "learning_rate": 1.9764213903378786e-06, "loss": 0.7162, "step": 1935 }, { "epoch": 0.08, "learning_rate": 1.9762820553732563e-06, "loss": 0.7178, "step": 1940 }, { "epoch": 0.08, "learning_rate": 1.976142314869558e-06, "loss": 0.7309, "step": 1945 }, { "epoch": 0.08, "learning_rate": 1.976002168884831e-06, "loss": 0.7198, "step": 1950 }, { "epoch": 0.08, "learning_rate": 1.975861617477291e-06, "loss": 0.7131, "step": 1955 }, { "epoch": 0.08, "learning_rate": 1.9757206607053218e-06, "loss": 0.7087, "step": 1960 }, { "epoch": 0.08, "learning_rate": 1.9755792986274755e-06, "loss": 0.6708, "step": 1965 }, { "epoch": 0.08, "learning_rate": 1.975437531302472e-06, "loss": 0.7141, "step": 1970 }, { "epoch": 0.08, "learning_rate": 1.975295358789201e-06, "loss": 0.7152, "step": 1975 }, { "epoch": 0.08, "learning_rate": 1.9751527811467195e-06, "loss": 0.7172, "step": 1980 }, { "epoch": 0.08, "learning_rate": 1.9750097984342534e-06, "loss": 0.7472, "step": 1985 }, { "epoch": 0.08, "learning_rate": 1.9748664107111962e-06, "loss": 0.7129, "step": 1990 }, { "epoch": 0.08, "learning_rate": 1.9747226180371094e-06, "loss": 0.7066, "step": 1995 }, { "epoch": 0.08, "learning_rate": 1.974578420471724e-06, "loss": 0.7049, "step": 2000 }, { "epoch": 0.08, "learning_rate": 1.9744338180749376e-06, "loss": 0.7214, "step": 2005 }, { "epoch": 0.08, "learning_rate": 1.9742888109068175e-06, "loss": 0.7469, "step": 2010 }, { "epoch": 0.08, "learning_rate": 1.9741433990275987e-06, "loss": 0.7119, "step": 2015 }, { "epoch": 0.08, "learning_rate": 1.973997582497683e-06, "loss": 0.7487, "step": 2020 }, { "epoch": 0.08, "learning_rate": 1.973851361377642e-06, "loss": 0.7259, "step": 2025 }, { "epoch": 0.08, "learning_rate": 1.9737047357282143e-06, "loss": 0.7164, "step": 2030 }, { "epoch": 0.08, "learning_rate": 1.9735577056103074e-06, "loss": 0.7639, "step": 2035 }, { "epoch": 0.08, "learning_rate": 1.9734102710849956e-06, "loss": 0.7181, "step": 2040 }, { "epoch": 0.08, "learning_rate": 1.973262432213523e-06, "loss": 0.6833, "step": 2045 }, { "epoch": 0.08, "learning_rate": 1.973114189057299e-06, "loss": 0.7263, "step": 2050 }, { "epoch": 0.08, "learning_rate": 1.9729655416779044e-06, "loss": 0.6957, "step": 2055 }, { "epoch": 0.08, "learning_rate": 1.972816490137085e-06, "loss": 0.716, "step": 2060 }, { "epoch": 0.08, "learning_rate": 1.9726670344967554e-06, "loss": 0.699, "step": 2065 }, { "epoch": 0.08, "learning_rate": 1.9725171748189987e-06, "loss": 0.6858, "step": 2070 }, { "epoch": 0.08, "learning_rate": 1.9723669111660645e-06, "loss": 0.7036, "step": 2075 }, { "epoch": 0.08, "learning_rate": 1.9722162436003715e-06, "loss": 0.6958, "step": 2080 }, { "epoch": 0.08, "learning_rate": 1.9720651721845062e-06, "loss": 0.7303, "step": 2085 }, { "epoch": 0.08, "learning_rate": 1.971913696981222e-06, "loss": 0.6836, "step": 2090 }, { "epoch": 0.09, "learning_rate": 1.97176181805344e-06, "loss": 0.7349, "step": 2095 }, { "epoch": 0.09, "learning_rate": 1.9716095354642493e-06, "loss": 0.7105, "step": 2100 }, { "epoch": 0.09, "learning_rate": 1.971456849276907e-06, "loss": 0.7126, "step": 2105 }, { "epoch": 0.09, "learning_rate": 1.971303759554838e-06, "loss": 0.6959, "step": 2110 }, { "epoch": 0.09, "learning_rate": 1.9711502663616344e-06, "loss": 0.6699, "step": 2115 }, { "epoch": 0.09, "learning_rate": 1.9709963697610554e-06, "loss": 0.6671, "step": 2120 }, { "epoch": 0.09, "learning_rate": 1.970842069817029e-06, "loss": 0.7101, "step": 2125 }, { "epoch": 0.09, "learning_rate": 1.970687366593649e-06, "loss": 0.6985, "step": 2130 }, { "epoch": 0.09, "learning_rate": 1.970532260155179e-06, "loss": 0.7175, "step": 2135 }, { "epoch": 0.09, "learning_rate": 1.970376750566048e-06, "loss": 0.7251, "step": 2140 }, { "epoch": 0.09, "learning_rate": 1.9702208378908537e-06, "loss": 0.6949, "step": 2145 }, { "epoch": 0.09, "learning_rate": 1.970064522194361e-06, "loss": 0.7573, "step": 2150 }, { "epoch": 0.09, "learning_rate": 1.9699078035415014e-06, "loss": 0.6922, "step": 2155 }, { "epoch": 0.09, "learning_rate": 1.9697506819973753e-06, "loss": 0.7215, "step": 2160 }, { "epoch": 0.09, "learning_rate": 1.9695931576272493e-06, "loss": 0.7252, "step": 2165 }, { "epoch": 0.09, "learning_rate": 1.9694352304965572e-06, "loss": 0.7202, "step": 2170 }, { "epoch": 0.09, "learning_rate": 1.9692769006709013e-06, "loss": 0.6932, "step": 2175 }, { "epoch": 0.09, "learning_rate": 1.96911816821605e-06, "loss": 0.6522, "step": 2180 }, { "epoch": 0.09, "learning_rate": 1.9689590331979394e-06, "loss": 0.6933, "step": 2185 }, { "epoch": 0.09, "learning_rate": 1.968799495682673e-06, "loss": 0.6825, "step": 2190 }, { "epoch": 0.09, "learning_rate": 1.968639555736521e-06, "loss": 0.7037, "step": 2195 }, { "epoch": 0.09, "learning_rate": 1.968479213425922e-06, "loss": 0.6911, "step": 2200 }, { "epoch": 0.09, "learning_rate": 1.9683184688174795e-06, "loss": 0.7048, "step": 2205 }, { "epoch": 0.09, "learning_rate": 1.9681573219779658e-06, "loss": 0.7273, "step": 2210 }, { "epoch": 0.09, "learning_rate": 1.9679957729743204e-06, "loss": 0.7105, "step": 2215 }, { "epoch": 0.09, "learning_rate": 1.9678338218736486e-06, "loss": 0.6832, "step": 2220 }, { "epoch": 0.09, "learning_rate": 1.967671468743224e-06, "loss": 0.6829, "step": 2225 }, { "epoch": 0.09, "learning_rate": 1.9675087136504865e-06, "loss": 0.6971, "step": 2230 }, { "epoch": 0.09, "learning_rate": 1.9673455566630437e-06, "loss": 0.7298, "step": 2235 }, { "epoch": 0.09, "learning_rate": 1.9671819978486688e-06, "loss": 0.6841, "step": 2240 }, { "epoch": 0.09, "learning_rate": 1.967018037275303e-06, "loss": 0.6838, "step": 2245 }, { "epoch": 0.09, "learning_rate": 1.966853675011054e-06, "loss": 0.7053, "step": 2250 }, { "epoch": 0.09, "learning_rate": 1.966688911124197e-06, "loss": 0.6886, "step": 2255 }, { "epoch": 0.09, "learning_rate": 1.9665237456831725e-06, "loss": 0.7032, "step": 2260 }, { "epoch": 0.09, "learning_rate": 1.9663581787565898e-06, "loss": 0.7159, "step": 2265 }, { "epoch": 0.09, "learning_rate": 1.966192210413223e-06, "loss": 0.6896, "step": 2270 }, { "epoch": 0.09, "learning_rate": 1.966025840722015e-06, "loss": 0.7004, "step": 2275 }, { "epoch": 0.09, "learning_rate": 1.9658590697520735e-06, "loss": 0.6726, "step": 2280 }, { "epoch": 0.09, "learning_rate": 1.965691897572674e-06, "loss": 0.6911, "step": 2285 }, { "epoch": 0.09, "learning_rate": 1.9655243242532584e-06, "loss": 0.7235, "step": 2290 }, { "epoch": 0.09, "learning_rate": 1.9653563498634347e-06, "loss": 0.7014, "step": 2295 }, { "epoch": 0.09, "learning_rate": 1.9651879744729786e-06, "loss": 0.7192, "step": 2300 }, { "epoch": 0.09, "learning_rate": 1.9650191981518317e-06, "loss": 0.65, "step": 2305 }, { "epoch": 0.09, "learning_rate": 1.964850020970102e-06, "loss": 0.7128, "step": 2310 }, { "epoch": 0.09, "learning_rate": 1.9646804429980644e-06, "loss": 0.7265, "step": 2315 }, { "epoch": 0.09, "learning_rate": 1.96451046430616e-06, "loss": 0.7038, "step": 2320 }, { "epoch": 0.09, "learning_rate": 1.964340084964997e-06, "loss": 0.7132, "step": 2325 }, { "epoch": 0.09, "learning_rate": 1.964169305045348e-06, "loss": 0.6968, "step": 2330 }, { "epoch": 0.09, "learning_rate": 1.9639981246181555e-06, "loss": 0.7456, "step": 2335 }, { "epoch": 0.1, "learning_rate": 1.9638265437545247e-06, "loss": 0.7403, "step": 2340 }, { "epoch": 0.1, "learning_rate": 1.9636545625257297e-06, "loss": 0.6893, "step": 2345 }, { "epoch": 0.1, "learning_rate": 1.96348218100321e-06, "loss": 0.7341, "step": 2350 }, { "epoch": 0.1, "learning_rate": 1.9633093992585706e-06, "loss": 0.7459, "step": 2355 }, { "epoch": 0.1, "learning_rate": 1.963136217363585e-06, "loss": 0.716, "step": 2360 }, { "epoch": 0.1, "learning_rate": 1.9629626353901897e-06, "loss": 0.7086, "step": 2365 }, { "epoch": 0.1, "learning_rate": 1.9627886534104903e-06, "loss": 0.7041, "step": 2370 }, { "epoch": 0.1, "learning_rate": 1.962614271496757e-06, "loss": 0.7094, "step": 2375 }, { "epoch": 0.1, "learning_rate": 1.962439489721427e-06, "loss": 0.6794, "step": 2380 }, { "epoch": 0.1, "learning_rate": 1.962264308157102e-06, "loss": 0.6751, "step": 2385 }, { "epoch": 0.1, "learning_rate": 1.9620887268765523e-06, "loss": 0.7398, "step": 2390 }, { "epoch": 0.1, "learning_rate": 1.961912745952712e-06, "loss": 0.6622, "step": 2395 }, { "epoch": 0.1, "learning_rate": 1.961736365458682e-06, "loss": 0.7404, "step": 2400 }, { "epoch": 0.1, "eval_loss": 0.6729753613471985, "eval_runtime": 140.5864, "eval_samples_per_second": 16.83, "eval_steps_per_second": 2.81, "step": 2400 }, { "epoch": 0.1, "learning_rate": 1.96155958546773e-06, "loss": 0.6928, "step": 2405 }, { "epoch": 0.1, "learning_rate": 1.961382406053288e-06, "loss": 0.6832, "step": 2410 }, { "epoch": 0.1, "learning_rate": 1.961204827288955e-06, "loss": 0.6775, "step": 2415 }, { "epoch": 0.1, "learning_rate": 1.961026849248496e-06, "loss": 0.6995, "step": 2420 }, { "epoch": 0.1, "learning_rate": 1.9608484720058416e-06, "loss": 0.6508, "step": 2425 }, { "epoch": 0.1, "learning_rate": 1.960669695635087e-06, "loss": 0.6865, "step": 2430 }, { "epoch": 0.1, "learning_rate": 1.960490520210496e-06, "loss": 0.7273, "step": 2435 }, { "epoch": 0.1, "learning_rate": 1.9603109458064955e-06, "loss": 0.6513, "step": 2440 }, { "epoch": 0.1, "learning_rate": 1.9601309724976795e-06, "loss": 0.7051, "step": 2445 }, { "epoch": 0.1, "learning_rate": 1.9599506003588068e-06, "loss": 0.7164, "step": 2450 }, { "epoch": 0.1, "learning_rate": 1.9597698294648034e-06, "loss": 0.7269, "step": 2455 }, { "epoch": 0.1, "learning_rate": 1.959588659890759e-06, "loss": 0.7082, "step": 2460 }, { "epoch": 0.1, "learning_rate": 1.9594070917119306e-06, "loss": 0.7164, "step": 2465 }, { "epoch": 0.1, "learning_rate": 1.9592251250037394e-06, "loss": 0.7156, "step": 2470 }, { "epoch": 0.1, "learning_rate": 1.9590427598417733e-06, "loss": 0.6603, "step": 2475 }, { "epoch": 0.1, "learning_rate": 1.958859996301785e-06, "loss": 0.6811, "step": 2480 }, { "epoch": 0.1, "learning_rate": 1.958676834459693e-06, "loss": 0.701, "step": 2485 }, { "epoch": 0.1, "learning_rate": 1.9584932743915807e-06, "loss": 0.6546, "step": 2490 }, { "epoch": 0.1, "learning_rate": 1.9583093161736975e-06, "loss": 0.7022, "step": 2495 }, { "epoch": 0.1, "learning_rate": 1.9581249598824588e-06, "loss": 0.7066, "step": 2500 }, { "epoch": 0.1, "learning_rate": 1.957940205594444e-06, "loss": 0.6942, "step": 2505 }, { "epoch": 0.1, "learning_rate": 1.957755053386398e-06, "loss": 0.6983, "step": 2510 }, { "epoch": 0.1, "learning_rate": 1.957569503335232e-06, "loss": 0.7491, "step": 2515 }, { "epoch": 0.1, "learning_rate": 1.957383555518022e-06, "loss": 0.6916, "step": 2520 }, { "epoch": 0.1, "learning_rate": 1.9571972100120087e-06, "loss": 0.7035, "step": 2525 }, { "epoch": 0.1, "learning_rate": 1.9570104668945986e-06, "loss": 0.6987, "step": 2530 }, { "epoch": 0.1, "learning_rate": 1.956823326243363e-06, "loss": 0.7035, "step": 2535 }, { "epoch": 0.1, "learning_rate": 1.956635788136039e-06, "loss": 0.6854, "step": 2540 }, { "epoch": 0.1, "learning_rate": 1.9564478526505276e-06, "loss": 0.7194, "step": 2545 }, { "epoch": 0.1, "learning_rate": 1.9562595198648956e-06, "loss": 0.7026, "step": 2550 }, { "epoch": 0.1, "learning_rate": 1.9560707898573757e-06, "loss": 0.6887, "step": 2555 }, { "epoch": 0.1, "learning_rate": 1.9558816627063636e-06, "loss": 0.7127, "step": 2560 }, { "epoch": 0.1, "learning_rate": 1.9556921384904217e-06, "loss": 0.7126, "step": 2565 }, { "epoch": 0.1, "learning_rate": 1.9555022172882767e-06, "loss": 0.671, "step": 2570 }, { "epoch": 0.1, "learning_rate": 1.9553118991788203e-06, "loss": 0.7004, "step": 2575 }, { "epoch": 0.1, "learning_rate": 1.9551211842411083e-06, "loss": 0.7303, "step": 2580 }, { "epoch": 0.1, "learning_rate": 1.9549300725543627e-06, "loss": 0.7571, "step": 2585 }, { "epoch": 0.11, "learning_rate": 1.9547385641979696e-06, "loss": 0.6688, "step": 2590 }, { "epoch": 0.11, "learning_rate": 1.9545466592514795e-06, "loss": 0.7083, "step": 2595 }, { "epoch": 0.11, "learning_rate": 1.9543543577946086e-06, "loss": 0.7426, "step": 2600 }, { "epoch": 0.11, "learning_rate": 1.9541616599072365e-06, "loss": 0.6722, "step": 2605 }, { "epoch": 0.11, "learning_rate": 1.953968565669409e-06, "loss": 0.7215, "step": 2610 }, { "epoch": 0.11, "learning_rate": 1.9537750751613354e-06, "loss": 0.7354, "step": 2615 }, { "epoch": 0.11, "learning_rate": 1.95358118846339e-06, "loss": 0.7253, "step": 2620 }, { "epoch": 0.11, "learning_rate": 1.9533869056561113e-06, "loss": 0.6877, "step": 2625 }, { "epoch": 0.11, "learning_rate": 1.9531922268202033e-06, "loss": 0.72, "step": 2630 }, { "epoch": 0.11, "learning_rate": 1.952997152036534e-06, "loss": 0.6894, "step": 2635 }, { "epoch": 0.11, "learning_rate": 1.952801681386135e-06, "loss": 0.69, "step": 2640 }, { "epoch": 0.11, "learning_rate": 1.9526058149502035e-06, "loss": 0.689, "step": 2645 }, { "epoch": 0.11, "learning_rate": 1.952409552810101e-06, "loss": 0.7495, "step": 2650 }, { "epoch": 0.11, "learning_rate": 1.9522128950473525e-06, "loss": 0.7378, "step": 2655 }, { "epoch": 0.11, "learning_rate": 1.9520158417436486e-06, "loss": 0.7041, "step": 2660 }, { "epoch": 0.11, "learning_rate": 1.9518183929808433e-06, "loss": 0.711, "step": 2665 }, { "epoch": 0.11, "learning_rate": 1.9516205488409543e-06, "loss": 0.6908, "step": 2670 }, { "epoch": 0.11, "learning_rate": 1.9514223094061654e-06, "loss": 0.7107, "step": 2675 }, { "epoch": 0.11, "learning_rate": 1.951223674758823e-06, "loss": 0.7225, "step": 2680 }, { "epoch": 0.11, "learning_rate": 1.9510246449814385e-06, "loss": 0.7217, "step": 2685 }, { "epoch": 0.11, "learning_rate": 1.950825220156687e-06, "loss": 0.7274, "step": 2690 }, { "epoch": 0.11, "learning_rate": 1.950625400367408e-06, "loss": 0.6867, "step": 2695 }, { "epoch": 0.11, "learning_rate": 1.9504251856966043e-06, "loss": 0.7186, "step": 2700 }, { "epoch": 0.11, "learning_rate": 1.950224576227444e-06, "loss": 0.6889, "step": 2705 }, { "epoch": 0.11, "learning_rate": 1.9500235720432583e-06, "loss": 0.7005, "step": 2710 }, { "epoch": 0.11, "learning_rate": 1.9498221732275425e-06, "loss": 0.6874, "step": 2715 }, { "epoch": 0.11, "learning_rate": 1.9496203798639565e-06, "loss": 0.7033, "step": 2720 }, { "epoch": 0.11, "learning_rate": 1.9494181920363223e-06, "loss": 0.707, "step": 2725 }, { "epoch": 0.11, "learning_rate": 1.9492156098286283e-06, "loss": 0.7348, "step": 2730 }, { "epoch": 0.11, "learning_rate": 1.949012633325024e-06, "loss": 0.6636, "step": 2735 }, { "epoch": 0.11, "learning_rate": 1.9488092626098254e-06, "loss": 0.6975, "step": 2740 }, { "epoch": 0.11, "learning_rate": 1.9486054977675103e-06, "loss": 0.7124, "step": 2745 }, { "epoch": 0.11, "learning_rate": 1.9484013388827204e-06, "loss": 0.663, "step": 2750 }, { "epoch": 0.11, "learning_rate": 1.9481967860402627e-06, "loss": 0.6916, "step": 2755 }, { "epoch": 0.11, "learning_rate": 1.9479918393251056e-06, "loss": 0.7344, "step": 2760 }, { "epoch": 0.11, "learning_rate": 1.9477864988223828e-06, "loss": 0.6752, "step": 2765 }, { "epoch": 0.11, "learning_rate": 1.9475807646173908e-06, "loss": 0.6885, "step": 2770 }, { "epoch": 0.11, "learning_rate": 1.9473746367955893e-06, "loss": 0.6602, "step": 2775 }, { "epoch": 0.11, "learning_rate": 1.947168115442603e-06, "loss": 0.6764, "step": 2780 }, { "epoch": 0.11, "learning_rate": 1.946961200644218e-06, "loss": 0.6784, "step": 2785 }, { "epoch": 0.11, "learning_rate": 1.946753892486386e-06, "loss": 0.7325, "step": 2790 }, { "epoch": 0.11, "learning_rate": 1.94654619105522e-06, "loss": 0.6533, "step": 2795 }, { "epoch": 0.11, "learning_rate": 1.946338096436998e-06, "loss": 0.6741, "step": 2800 }, { "epoch": 0.11, "learning_rate": 1.9461296087181604e-06, "loss": 0.7046, "step": 2805 }, { "epoch": 0.11, "learning_rate": 1.9459207279853113e-06, "loss": 0.6797, "step": 2810 }, { "epoch": 0.11, "learning_rate": 1.945711454325218e-06, "loss": 0.7096, "step": 2815 }, { "epoch": 0.11, "learning_rate": 1.9455017878248107e-06, "loss": 0.698, "step": 2820 }, { "epoch": 0.11, "learning_rate": 1.9452917285711834e-06, "loss": 0.7036, "step": 2825 }, { "epoch": 0.11, "learning_rate": 1.945081276651593e-06, "loss": 0.7205, "step": 2830 }, { "epoch": 0.12, "learning_rate": 1.9448704321534588e-06, "loss": 0.6583, "step": 2835 }, { "epoch": 0.12, "learning_rate": 1.944659195164364e-06, "loss": 0.6757, "step": 2840 }, { "epoch": 0.12, "learning_rate": 1.9444475657720545e-06, "loss": 0.7033, "step": 2845 }, { "epoch": 0.12, "learning_rate": 1.9442355440644394e-06, "loss": 0.7029, "step": 2850 }, { "epoch": 0.12, "learning_rate": 1.944023130129591e-06, "loss": 0.6993, "step": 2855 }, { "epoch": 0.12, "learning_rate": 1.9438103240557446e-06, "loss": 0.704, "step": 2860 }, { "epoch": 0.12, "learning_rate": 1.9435971259312966e-06, "loss": 0.7104, "step": 2865 }, { "epoch": 0.12, "learning_rate": 1.9433835358448086e-06, "loss": 0.7017, "step": 2870 }, { "epoch": 0.12, "learning_rate": 1.943169553885004e-06, "loss": 0.7107, "step": 2875 }, { "epoch": 0.12, "learning_rate": 1.9429551801407687e-06, "loss": 0.6939, "step": 2880 }, { "epoch": 0.12, "learning_rate": 1.942740414701152e-06, "loss": 0.6737, "step": 2885 }, { "epoch": 0.12, "learning_rate": 1.9425252576553656e-06, "loss": 0.7174, "step": 2890 }, { "epoch": 0.12, "learning_rate": 1.942309709092784e-06, "loss": 0.7147, "step": 2895 }, { "epoch": 0.12, "learning_rate": 1.9420937691029435e-06, "loss": 0.7351, "step": 2900 }, { "epoch": 0.12, "learning_rate": 1.9418774377755444e-06, "loss": 0.6845, "step": 2905 }, { "epoch": 0.12, "learning_rate": 1.9416607152004485e-06, "loss": 0.7156, "step": 2910 }, { "epoch": 0.12, "learning_rate": 1.9414436014676806e-06, "loss": 0.7532, "step": 2915 }, { "epoch": 0.12, "learning_rate": 1.941226096667428e-06, "loss": 0.7421, "step": 2920 }, { "epoch": 0.12, "learning_rate": 1.94100820089004e-06, "loss": 0.7049, "step": 2925 }, { "epoch": 0.12, "learning_rate": 1.940789914226029e-06, "loss": 0.7652, "step": 2930 }, { "epoch": 0.12, "learning_rate": 1.940571236766069e-06, "loss": 0.6884, "step": 2935 }, { "epoch": 0.12, "learning_rate": 1.9403521686009964e-06, "loss": 0.6974, "step": 2940 }, { "epoch": 0.12, "learning_rate": 1.940132709821811e-06, "loss": 0.7362, "step": 2945 }, { "epoch": 0.12, "learning_rate": 1.9399128605196737e-06, "loss": 0.7167, "step": 2950 }, { "epoch": 0.12, "learning_rate": 1.9396926207859082e-06, "loss": 0.6806, "step": 2955 }, { "epoch": 0.12, "learning_rate": 1.939471990712e-06, "loss": 0.7069, "step": 2960 }, { "epoch": 0.12, "learning_rate": 1.939250970389597e-06, "loss": 0.7185, "step": 2965 }, { "epoch": 0.12, "learning_rate": 1.9390295599105085e-06, "loss": 0.6996, "step": 2970 }, { "epoch": 0.12, "learning_rate": 1.9388077593667075e-06, "loss": 0.7304, "step": 2975 }, { "epoch": 0.12, "learning_rate": 1.9385855688503276e-06, "loss": 0.7209, "step": 2980 }, { "epoch": 0.12, "learning_rate": 1.9383629884536644e-06, "loss": 0.7077, "step": 2985 }, { "epoch": 0.12, "learning_rate": 1.938140018269176e-06, "loss": 0.6838, "step": 2990 }, { "epoch": 0.12, "learning_rate": 1.937916658389483e-06, "loss": 0.6784, "step": 2995 }, { "epoch": 0.12, "learning_rate": 1.9376929089073665e-06, "loss": 0.7185, "step": 3000 }, { "epoch": 0.12, "learning_rate": 1.9374687699157703e-06, "loss": 0.7146, "step": 3005 }, { "epoch": 0.12, "learning_rate": 1.9372442415077994e-06, "loss": 0.7131, "step": 3010 }, { "epoch": 0.12, "learning_rate": 1.9370193237767213e-06, "loss": 0.7014, "step": 3015 }, { "epoch": 0.12, "learning_rate": 1.9367940168159648e-06, "loss": 0.697, "step": 3020 }, { "epoch": 0.12, "learning_rate": 1.9365683207191205e-06, "loss": 0.7009, "step": 3025 }, { "epoch": 0.12, "learning_rate": 1.9363422355799406e-06, "loss": 0.7125, "step": 3030 }, { "epoch": 0.12, "learning_rate": 1.936115761492339e-06, "loss": 0.6954, "step": 3035 }, { "epoch": 0.12, "learning_rate": 1.935888898550391e-06, "loss": 0.7465, "step": 3040 }, { "epoch": 0.12, "learning_rate": 1.935661646848333e-06, "loss": 0.7253, "step": 3045 }, { "epoch": 0.12, "learning_rate": 1.935434006480564e-06, "loss": 0.7223, "step": 3050 }, { "epoch": 0.12, "learning_rate": 1.935205977541644e-06, "loss": 0.7141, "step": 3055 }, { "epoch": 0.12, "learning_rate": 1.9349775601262935e-06, "loss": 0.732, "step": 3060 }, { "epoch": 0.12, "learning_rate": 1.9347487543293958e-06, "loss": 0.709, "step": 3065 }, { "epoch": 0.12, "learning_rate": 1.934519560245994e-06, "loss": 0.724, "step": 3070 }, { "epoch": 0.12, "learning_rate": 1.9342899779712946e-06, "loss": 0.6998, "step": 3075 }, { "epoch": 0.13, "learning_rate": 1.934060007600663e-06, "loss": 0.6945, "step": 3080 }, { "epoch": 0.13, "learning_rate": 1.9338296492296267e-06, "loss": 0.7068, "step": 3085 }, { "epoch": 0.13, "learning_rate": 1.9335989029538756e-06, "loss": 0.691, "step": 3090 }, { "epoch": 0.13, "learning_rate": 1.9333677688692595e-06, "loss": 0.7063, "step": 3095 }, { "epoch": 0.13, "learning_rate": 1.9331362470717886e-06, "loss": 0.7518, "step": 3100 }, { "epoch": 0.13, "learning_rate": 1.9329043376576357e-06, "loss": 0.6806, "step": 3105 }, { "epoch": 0.13, "learning_rate": 1.932672040723134e-06, "loss": 0.6878, "step": 3110 }, { "epoch": 0.13, "learning_rate": 1.9324393563647772e-06, "loss": 0.6961, "step": 3115 }, { "epoch": 0.13, "learning_rate": 1.932206284679221e-06, "loss": 0.7125, "step": 3120 }, { "epoch": 0.13, "learning_rate": 1.931972825763281e-06, "loss": 0.6893, "step": 3125 }, { "epoch": 0.13, "learning_rate": 1.931738979713934e-06, "loss": 0.6821, "step": 3130 }, { "epoch": 0.13, "learning_rate": 1.9315047466283177e-06, "loss": 0.735, "step": 3135 }, { "epoch": 0.13, "learning_rate": 1.9312701266037302e-06, "loss": 0.6873, "step": 3140 }, { "epoch": 0.13, "learning_rate": 1.931035119737631e-06, "loss": 0.6853, "step": 3145 }, { "epoch": 0.13, "learning_rate": 1.9307997261276393e-06, "loss": 0.7256, "step": 3150 }, { "epoch": 0.13, "learning_rate": 1.9305639458715365e-06, "loss": 0.7297, "step": 3155 }, { "epoch": 0.13, "learning_rate": 1.930327779067263e-06, "loss": 0.7162, "step": 3160 }, { "epoch": 0.13, "learning_rate": 1.9300912258129206e-06, "loss": 0.7133, "step": 3165 }, { "epoch": 0.13, "learning_rate": 1.9298542862067712e-06, "loss": 0.7255, "step": 3170 }, { "epoch": 0.13, "learning_rate": 1.9296169603472384e-06, "loss": 0.6895, "step": 3175 }, { "epoch": 0.13, "learning_rate": 1.929379248332904e-06, "loss": 0.729, "step": 3180 }, { "epoch": 0.13, "learning_rate": 1.9291411502625123e-06, "loss": 0.6971, "step": 3185 }, { "epoch": 0.13, "learning_rate": 1.928902666234967e-06, "loss": 0.7232, "step": 3190 }, { "epoch": 0.13, "learning_rate": 1.9286637963493323e-06, "loss": 0.6653, "step": 3195 }, { "epoch": 0.13, "learning_rate": 1.9284245407048323e-06, "loss": 0.7309, "step": 3200 }, { "epoch": 0.13, "eval_loss": 0.6701433062553406, "eval_runtime": 140.1425, "eval_samples_per_second": 16.883, "eval_steps_per_second": 2.819, "step": 3200 }, { "epoch": 0.13, "learning_rate": 1.928184899400853e-06, "loss": 0.7025, "step": 3205 }, { "epoch": 0.13, "learning_rate": 1.9279448725369375e-06, "loss": 0.711, "step": 3210 }, { "epoch": 0.13, "learning_rate": 1.927704460212792e-06, "loss": 0.6813, "step": 3215 }, { "epoch": 0.13, "learning_rate": 1.9274636625282816e-06, "loss": 0.7209, "step": 3220 }, { "epoch": 0.13, "learning_rate": 1.927222479583431e-06, "loss": 0.7279, "step": 3225 }, { "epoch": 0.13, "learning_rate": 1.9269809114784265e-06, "loss": 0.6779, "step": 3230 }, { "epoch": 0.13, "learning_rate": 1.926738958313612e-06, "loss": 0.7237, "step": 3235 }, { "epoch": 0.13, "learning_rate": 1.9264966201894945e-06, "loss": 0.7123, "step": 3240 }, { "epoch": 0.13, "learning_rate": 1.9262538972067375e-06, "loss": 0.7147, "step": 3245 }, { "epoch": 0.13, "learning_rate": 1.9260107894661666e-06, "loss": 0.7104, "step": 3250 }, { "epoch": 0.13, "learning_rate": 1.9257672970687673e-06, "loss": 0.7059, "step": 3255 }, { "epoch": 0.13, "learning_rate": 1.9255234201156834e-06, "loss": 0.7259, "step": 3260 }, { "epoch": 0.13, "learning_rate": 1.9252791587082195e-06, "loss": 0.6778, "step": 3265 }, { "epoch": 0.13, "learning_rate": 1.9250345129478396e-06, "loss": 0.7456, "step": 3270 }, { "epoch": 0.13, "learning_rate": 1.924789482936168e-06, "loss": 0.7254, "step": 3275 }, { "epoch": 0.13, "learning_rate": 1.924544068774987e-06, "loss": 0.732, "step": 3280 }, { "epoch": 0.13, "learning_rate": 1.92429827056624e-06, "loss": 0.6824, "step": 3285 }, { "epoch": 0.13, "learning_rate": 1.9240520884120296e-06, "loss": 0.6807, "step": 3290 }, { "epoch": 0.13, "learning_rate": 1.923805522414618e-06, "loss": 0.6679, "step": 3295 }, { "epoch": 0.13, "learning_rate": 1.923558572676426e-06, "loss": 0.6813, "step": 3300 }, { "epoch": 0.13, "learning_rate": 1.9233112393000344e-06, "loss": 0.6791, "step": 3305 }, { "epoch": 0.13, "learning_rate": 1.9230635223881836e-06, "loss": 0.6877, "step": 3310 }, { "epoch": 0.13, "learning_rate": 1.9228154220437733e-06, "loss": 0.7023, "step": 3315 }, { "epoch": 0.13, "learning_rate": 1.922566938369861e-06, "loss": 0.7001, "step": 3320 }, { "epoch": 0.14, "learning_rate": 1.9223180714696664e-06, "loss": 0.691, "step": 3325 }, { "epoch": 0.14, "learning_rate": 1.922068821446565e-06, "loss": 0.7149, "step": 3330 }, { "epoch": 0.14, "learning_rate": 1.9218191884040945e-06, "loss": 0.6826, "step": 3335 }, { "epoch": 0.14, "learning_rate": 1.9215691724459496e-06, "loss": 0.7069, "step": 3340 }, { "epoch": 0.14, "learning_rate": 1.9213187736759848e-06, "loss": 0.7044, "step": 3345 }, { "epoch": 0.14, "learning_rate": 1.9210679921982134e-06, "loss": 0.7026, "step": 3350 }, { "epoch": 0.14, "learning_rate": 1.9208168281168083e-06, "loss": 0.7151, "step": 3355 }, { "epoch": 0.14, "learning_rate": 1.9205652815361003e-06, "loss": 0.7147, "step": 3360 }, { "epoch": 0.14, "learning_rate": 1.92031335256058e-06, "loss": 0.6891, "step": 3365 }, { "epoch": 0.14, "learning_rate": 1.9200610412948967e-06, "loss": 0.7246, "step": 3370 }, { "epoch": 0.14, "learning_rate": 1.9198083478438584e-06, "loss": 0.6832, "step": 3375 }, { "epoch": 0.14, "learning_rate": 1.919555272312431e-06, "loss": 0.6708, "step": 3380 }, { "epoch": 0.14, "learning_rate": 1.91930181480574e-06, "loss": 0.7217, "step": 3385 }, { "epoch": 0.14, "learning_rate": 1.9190479754290703e-06, "loss": 0.7272, "step": 3390 }, { "epoch": 0.14, "learning_rate": 1.918793754287864e-06, "loss": 0.6629, "step": 3395 }, { "epoch": 0.14, "learning_rate": 1.918539151487722e-06, "loss": 0.6633, "step": 3400 }, { "epoch": 0.14, "learning_rate": 1.9182841671344053e-06, "loss": 0.7085, "step": 3405 }, { "epoch": 0.14, "learning_rate": 1.918028801333831e-06, "loss": 0.7321, "step": 3410 }, { "epoch": 0.14, "learning_rate": 1.9177730541920757e-06, "loss": 0.7141, "step": 3415 }, { "epoch": 0.14, "learning_rate": 1.9175169258153752e-06, "loss": 0.6976, "step": 3420 }, { "epoch": 0.14, "learning_rate": 1.9172604163101227e-06, "loss": 0.7034, "step": 3425 }, { "epoch": 0.14, "learning_rate": 1.9170035257828706e-06, "loss": 0.6478, "step": 3430 }, { "epoch": 0.14, "learning_rate": 1.9167462543403286e-06, "loss": 0.6949, "step": 3435 }, { "epoch": 0.14, "learning_rate": 1.9164886020893647e-06, "loss": 0.7224, "step": 3440 }, { "epoch": 0.14, "learning_rate": 1.9162305691370057e-06, "loss": 0.7016, "step": 3445 }, { "epoch": 0.14, "learning_rate": 1.9159721555904364e-06, "loss": 0.7065, "step": 3450 }, { "epoch": 0.14, "learning_rate": 1.9157133615569993e-06, "loss": 0.6808, "step": 3455 }, { "epoch": 0.14, "learning_rate": 1.9154541871441947e-06, "loss": 0.7026, "step": 3460 }, { "epoch": 0.14, "learning_rate": 1.9151946324596826e-06, "loss": 0.7035, "step": 3465 }, { "epoch": 0.14, "learning_rate": 1.9149346976112787e-06, "loss": 0.6941, "step": 3470 }, { "epoch": 0.14, "learning_rate": 1.9146743827069584e-06, "loss": 0.6988, "step": 3475 }, { "epoch": 0.14, "learning_rate": 1.9144136878548536e-06, "loss": 0.6812, "step": 3480 }, { "epoch": 0.14, "learning_rate": 1.914152613163255e-06, "loss": 0.6976, "step": 3485 }, { "epoch": 0.14, "learning_rate": 1.913891158740611e-06, "loss": 0.7263, "step": 3490 }, { "epoch": 0.14, "learning_rate": 1.913629324695527e-06, "loss": 0.6872, "step": 3495 }, { "epoch": 0.14, "learning_rate": 1.913367111136767e-06, "loss": 0.6588, "step": 3500 }, { "epoch": 0.14, "learning_rate": 1.913104518173252e-06, "loss": 0.6679, "step": 3505 }, { "epoch": 0.14, "learning_rate": 1.912841545914061e-06, "loss": 0.7026, "step": 3510 }, { "epoch": 0.14, "learning_rate": 1.9125781944684304e-06, "loss": 0.6763, "step": 3515 }, { "epoch": 0.14, "learning_rate": 1.9123144639457537e-06, "loss": 0.7106, "step": 3520 }, { "epoch": 0.14, "learning_rate": 1.9120503544555826e-06, "loss": 0.7148, "step": 3525 }, { "epoch": 0.14, "learning_rate": 1.911785866107626e-06, "loss": 0.7029, "step": 3530 }, { "epoch": 0.14, "learning_rate": 1.9115209990117495e-06, "loss": 0.7071, "step": 3535 }, { "epoch": 0.14, "learning_rate": 1.9112557532779773e-06, "loss": 0.6967, "step": 3540 }, { "epoch": 0.14, "learning_rate": 1.9109901290164897e-06, "loss": 0.6872, "step": 3545 }, { "epoch": 0.14, "learning_rate": 1.9107241263376255e-06, "loss": 0.6732, "step": 3550 }, { "epoch": 0.14, "learning_rate": 1.9104577453518783e-06, "loss": 0.7015, "step": 3555 }, { "epoch": 0.14, "learning_rate": 1.910190986169902e-06, "loss": 0.6894, "step": 3560 }, { "epoch": 0.14, "learning_rate": 1.9099238489025054e-06, "loss": 0.6748, "step": 3565 }, { "epoch": 0.15, "learning_rate": 1.909656333660655e-06, "loss": 0.6691, "step": 3570 }, { "epoch": 0.15, "learning_rate": 1.909388440555474e-06, "loss": 0.6763, "step": 3575 }, { "epoch": 0.15, "learning_rate": 1.909120169698244e-06, "loss": 0.7212, "step": 3580 }, { "epoch": 0.15, "learning_rate": 1.9088515212004006e-06, "loss": 0.6999, "step": 3585 }, { "epoch": 0.15, "learning_rate": 1.90858249517354e-06, "loss": 0.7089, "step": 3590 }, { "epoch": 0.15, "learning_rate": 1.9083130917294116e-06, "loss": 0.6819, "step": 3595 }, { "epoch": 0.15, "learning_rate": 1.9080433109799243e-06, "loss": 0.6902, "step": 3600 }, { "epoch": 0.15, "learning_rate": 1.9077731530371425e-06, "loss": 0.6825, "step": 3605 }, { "epoch": 0.15, "learning_rate": 1.9075026180132873e-06, "loss": 0.7008, "step": 3610 }, { "epoch": 0.15, "learning_rate": 1.9072317060207364e-06, "loss": 0.6789, "step": 3615 }, { "epoch": 0.15, "learning_rate": 1.9069604171720243e-06, "loss": 0.7221, "step": 3620 }, { "epoch": 0.15, "learning_rate": 1.9066887515798426e-06, "loss": 0.6761, "step": 3625 }, { "epoch": 0.15, "learning_rate": 1.9064167093570382e-06, "loss": 0.7134, "step": 3630 }, { "epoch": 0.15, "learning_rate": 1.9061442906166154e-06, "loss": 0.6633, "step": 3635 }, { "epoch": 0.15, "learning_rate": 1.9058714954717345e-06, "loss": 0.6869, "step": 3640 }, { "epoch": 0.15, "learning_rate": 1.9055983240357123e-06, "loss": 0.6967, "step": 3645 }, { "epoch": 0.15, "learning_rate": 1.9053247764220218e-06, "loss": 0.7272, "step": 3650 }, { "epoch": 0.15, "learning_rate": 1.905050852744292e-06, "loss": 0.6696, "step": 3655 }, { "epoch": 0.15, "learning_rate": 1.904776553116309e-06, "loss": 0.6606, "step": 3660 }, { "epoch": 0.15, "learning_rate": 1.9045018776520138e-06, "loss": 0.7055, "step": 3665 }, { "epoch": 0.15, "learning_rate": 1.9042268264655048e-06, "loss": 0.6879, "step": 3670 }, { "epoch": 0.15, "learning_rate": 1.9039513996710357e-06, "loss": 0.7322, "step": 3675 }, { "epoch": 0.15, "learning_rate": 1.903675597383016e-06, "loss": 0.7115, "step": 3680 }, { "epoch": 0.15, "learning_rate": 1.9033994197160124e-06, "loss": 0.6892, "step": 3685 }, { "epoch": 0.15, "learning_rate": 1.903122866784746e-06, "loss": 0.7278, "step": 3690 }, { "epoch": 0.15, "learning_rate": 1.9028459387040944e-06, "loss": 0.6844, "step": 3695 }, { "epoch": 0.15, "learning_rate": 1.9025686355890916e-06, "loss": 0.7143, "step": 3700 }, { "epoch": 0.15, "learning_rate": 1.9022909575549265e-06, "loss": 0.7161, "step": 3705 }, { "epoch": 0.15, "learning_rate": 1.9020129047169443e-06, "loss": 0.7013, "step": 3710 }, { "epoch": 0.15, "learning_rate": 1.9017344771906463e-06, "loss": 0.6588, "step": 3715 }, { "epoch": 0.15, "learning_rate": 1.9014556750916879e-06, "loss": 0.6451, "step": 3720 }, { "epoch": 0.15, "learning_rate": 1.9011764985358817e-06, "loss": 0.6991, "step": 3725 }, { "epoch": 0.15, "learning_rate": 1.900896947639195e-06, "loss": 0.6901, "step": 3730 }, { "epoch": 0.15, "learning_rate": 1.9006170225177508e-06, "loss": 0.7236, "step": 3735 }, { "epoch": 0.15, "learning_rate": 1.9003367232878273e-06, "loss": 0.6827, "step": 3740 }, { "epoch": 0.15, "learning_rate": 1.9000560500658592e-06, "loss": 0.6791, "step": 3745 }, { "epoch": 0.15, "learning_rate": 1.8997750029684347e-06, "loss": 0.6822, "step": 3750 }, { "epoch": 0.15, "learning_rate": 1.899493582112299e-06, "loss": 0.7139, "step": 3755 }, { "epoch": 0.15, "learning_rate": 1.8992117876143516e-06, "loss": 0.6901, "step": 3760 }, { "epoch": 0.15, "learning_rate": 1.8989296195916476e-06, "loss": 0.7012, "step": 3765 }, { "epoch": 0.15, "learning_rate": 1.8986470781613973e-06, "loss": 0.7305, "step": 3770 }, { "epoch": 0.15, "learning_rate": 1.8983641634409656e-06, "loss": 0.6812, "step": 3775 }, { "epoch": 0.15, "learning_rate": 1.8980808755478726e-06, "loss": 0.6781, "step": 3780 }, { "epoch": 0.15, "learning_rate": 1.8977972145997945e-06, "loss": 0.7271, "step": 3785 }, { "epoch": 0.15, "learning_rate": 1.897513180714561e-06, "loss": 0.7162, "step": 3790 }, { "epoch": 0.15, "learning_rate": 1.8972287740101572e-06, "loss": 0.6669, "step": 3795 }, { "epoch": 0.15, "learning_rate": 1.8969439946047232e-06, "loss": 0.7358, "step": 3800 }, { "epoch": 0.15, "learning_rate": 1.8966588426165544e-06, "loss": 0.7315, "step": 3805 }, { "epoch": 0.15, "learning_rate": 1.8963733181640999e-06, "loss": 0.7245, "step": 3810 }, { "epoch": 0.15, "learning_rate": 1.8960874213659643e-06, "loss": 0.7233, "step": 3815 }, { "epoch": 0.16, "learning_rate": 1.8958011523409067e-06, "loss": 0.7128, "step": 3820 }, { "epoch": 0.16, "learning_rate": 1.8955145112078408e-06, "loss": 0.6579, "step": 3825 }, { "epoch": 0.16, "learning_rate": 1.8952274980858344e-06, "loss": 0.7155, "step": 3830 }, { "epoch": 0.16, "learning_rate": 1.8949401130941109e-06, "loss": 0.6825, "step": 3835 }, { "epoch": 0.16, "learning_rate": 1.894652356352047e-06, "loss": 0.6838, "step": 3840 }, { "epoch": 0.16, "learning_rate": 1.8943642279791747e-06, "loss": 0.7339, "step": 3845 }, { "epoch": 0.16, "learning_rate": 1.8940757280951799e-06, "loss": 0.7334, "step": 3850 }, { "epoch": 0.16, "learning_rate": 1.8937868568199026e-06, "loss": 0.7113, "step": 3855 }, { "epoch": 0.16, "learning_rate": 1.893497614273338e-06, "loss": 0.7085, "step": 3860 }, { "epoch": 0.16, "learning_rate": 1.8932080005756346e-06, "loss": 0.7179, "step": 3865 }, { "epoch": 0.16, "learning_rate": 1.8929180158470953e-06, "loss": 0.6847, "step": 3870 }, { "epoch": 0.16, "learning_rate": 1.8926276602081777e-06, "loss": 0.7149, "step": 3875 }, { "epoch": 0.16, "learning_rate": 1.8923369337794926e-06, "loss": 0.6732, "step": 3880 }, { "epoch": 0.16, "learning_rate": 1.8920458366818055e-06, "loss": 0.6769, "step": 3885 }, { "epoch": 0.16, "learning_rate": 1.8917543690360351e-06, "loss": 0.6755, "step": 3890 }, { "epoch": 0.16, "learning_rate": 1.8914625309632552e-06, "loss": 0.7257, "step": 3895 }, { "epoch": 0.16, "learning_rate": 1.8911703225846921e-06, "loss": 0.6711, "step": 3900 }, { "epoch": 0.16, "learning_rate": 1.8908777440217274e-06, "loss": 0.6978, "step": 3905 }, { "epoch": 0.16, "learning_rate": 1.8905847953958951e-06, "loss": 0.706, "step": 3910 }, { "epoch": 0.16, "learning_rate": 1.8902914768288837e-06, "loss": 0.7315, "step": 3915 }, { "epoch": 0.16, "learning_rate": 1.8899977884425353e-06, "loss": 0.7385, "step": 3920 }, { "epoch": 0.16, "learning_rate": 1.8897037303588452e-06, "loss": 0.6868, "step": 3925 }, { "epoch": 0.16, "learning_rate": 1.889409302699963e-06, "loss": 0.6924, "step": 3930 }, { "epoch": 0.16, "learning_rate": 1.8891145055881907e-06, "loss": 0.6575, "step": 3935 }, { "epoch": 0.16, "learning_rate": 1.8888193391459853e-06, "loss": 0.7152, "step": 3940 }, { "epoch": 0.16, "learning_rate": 1.8885238034959556e-06, "loss": 0.6853, "step": 3945 }, { "epoch": 0.16, "learning_rate": 1.8882278987608653e-06, "loss": 0.6626, "step": 3950 }, { "epoch": 0.16, "learning_rate": 1.8879316250636302e-06, "loss": 0.6714, "step": 3955 }, { "epoch": 0.16, "learning_rate": 1.8876349825273197e-06, "loss": 0.6994, "step": 3960 }, { "epoch": 0.16, "learning_rate": 1.8873379712751567e-06, "loss": 0.7311, "step": 3965 }, { "epoch": 0.16, "learning_rate": 1.8870405914305173e-06, "loss": 0.7214, "step": 3970 }, { "epoch": 0.16, "learning_rate": 1.88674284311693e-06, "loss": 0.7004, "step": 3975 }, { "epoch": 0.16, "learning_rate": 1.8864447264580776e-06, "loss": 0.6956, "step": 3980 }, { "epoch": 0.16, "learning_rate": 1.8861462415777942e-06, "loss": 0.7378, "step": 3985 }, { "epoch": 0.16, "learning_rate": 1.885847388600069e-06, "loss": 0.7195, "step": 3990 }, { "epoch": 0.16, "learning_rate": 1.8855481676490417e-06, "loss": 0.6948, "step": 3995 }, { "epoch": 0.16, "learning_rate": 1.885248578849007e-06, "loss": 0.7367, "step": 4000 }, { "epoch": 0.16, "eval_loss": 0.666739821434021, "eval_runtime": 138.8501, "eval_samples_per_second": 17.04, "eval_steps_per_second": 2.845, "step": 4000 }, { "epoch": 0.16, "learning_rate": 1.884948622324411e-06, "loss": 0.6942, "step": 4005 }, { "epoch": 0.16, "learning_rate": 1.884648298199853e-06, "loss": 0.6911, "step": 4010 }, { "epoch": 0.16, "learning_rate": 1.8843476066000856e-06, "loss": 0.7338, "step": 4015 }, { "epoch": 0.16, "learning_rate": 1.884046547650013e-06, "loss": 0.6914, "step": 4020 }, { "epoch": 0.16, "learning_rate": 1.8837451214746922e-06, "loss": 0.6998, "step": 4025 }, { "epoch": 0.16, "learning_rate": 1.8834433281993336e-06, "loss": 0.7024, "step": 4030 }, { "epoch": 0.16, "learning_rate": 1.8831411679492992e-06, "loss": 0.6748, "step": 4035 }, { "epoch": 0.16, "learning_rate": 1.882838640850104e-06, "loss": 0.687, "step": 4040 }, { "epoch": 0.16, "learning_rate": 1.8825357470274148e-06, "loss": 0.661, "step": 4045 }, { "epoch": 0.16, "learning_rate": 1.8822324866070512e-06, "loss": 0.7209, "step": 4050 }, { "epoch": 0.16, "learning_rate": 1.8819288597149846e-06, "loss": 0.6597, "step": 4055 }, { "epoch": 0.16, "learning_rate": 1.88162486647734e-06, "loss": 0.6923, "step": 4060 }, { "epoch": 0.17, "learning_rate": 1.8813205070203924e-06, "loss": 0.6875, "step": 4065 }, { "epoch": 0.17, "learning_rate": 1.8810157814705705e-06, "loss": 0.7085, "step": 4070 }, { "epoch": 0.17, "learning_rate": 1.8807106899544547e-06, "loss": 0.6786, "step": 4075 }, { "epoch": 0.17, "learning_rate": 1.8804052325987775e-06, "loss": 0.7078, "step": 4080 }, { "epoch": 0.17, "learning_rate": 1.8800994095304227e-06, "loss": 0.6945, "step": 4085 }, { "epoch": 0.17, "learning_rate": 1.8797932208764273e-06, "loss": 0.7232, "step": 4090 }, { "epoch": 0.17, "learning_rate": 1.8794866667639791e-06, "loss": 0.6876, "step": 4095 }, { "epoch": 0.17, "learning_rate": 1.8791797473204176e-06, "loss": 0.7299, "step": 4100 }, { "epoch": 0.17, "learning_rate": 1.8788724626732347e-06, "loss": 0.6689, "step": 4105 }, { "epoch": 0.17, "learning_rate": 1.878564812950074e-06, "loss": 0.7345, "step": 4110 }, { "epoch": 0.17, "learning_rate": 1.8782567982787302e-06, "loss": 0.7271, "step": 4115 }, { "epoch": 0.17, "learning_rate": 1.8779484187871504e-06, "loss": 0.7328, "step": 4120 }, { "epoch": 0.17, "learning_rate": 1.8776396746034324e-06, "loss": 0.7043, "step": 4125 }, { "epoch": 0.17, "learning_rate": 1.8773305658558258e-06, "loss": 0.6841, "step": 4130 }, { "epoch": 0.17, "learning_rate": 1.8770210926727316e-06, "loss": 0.7154, "step": 4135 }, { "epoch": 0.17, "learning_rate": 1.8767112551827027e-06, "loss": 0.7329, "step": 4140 }, { "epoch": 0.17, "learning_rate": 1.8764010535144426e-06, "loss": 0.7226, "step": 4145 }, { "epoch": 0.17, "learning_rate": 1.8760904877968065e-06, "loss": 0.6958, "step": 4150 }, { "epoch": 0.17, "learning_rate": 1.8757795581588005e-06, "loss": 0.7373, "step": 4155 }, { "epoch": 0.17, "learning_rate": 1.8754682647295822e-06, "loss": 0.7313, "step": 4160 }, { "epoch": 0.17, "learning_rate": 1.87515660763846e-06, "loss": 0.7333, "step": 4165 }, { "epoch": 0.17, "learning_rate": 1.8748445870148941e-06, "loss": 0.6882, "step": 4170 }, { "epoch": 0.17, "learning_rate": 1.8745322029884946e-06, "loss": 0.681, "step": 4175 }, { "epoch": 0.17, "learning_rate": 1.8742194556890233e-06, "loss": 0.7227, "step": 4180 }, { "epoch": 0.17, "learning_rate": 1.8739063452463926e-06, "loss": 0.7047, "step": 4185 }, { "epoch": 0.17, "learning_rate": 1.873592871790666e-06, "loss": 0.6831, "step": 4190 }, { "epoch": 0.17, "learning_rate": 1.8732790354520579e-06, "loss": 0.6887, "step": 4195 }, { "epoch": 0.17, "learning_rate": 1.8729648363609324e-06, "loss": 0.6833, "step": 4200 }, { "epoch": 0.17, "learning_rate": 1.8726502746478058e-06, "loss": 0.6928, "step": 4205 }, { "epoch": 0.17, "learning_rate": 1.872335350443344e-06, "loss": 0.7192, "step": 4210 }, { "epoch": 0.17, "learning_rate": 1.872020063878364e-06, "loss": 0.6792, "step": 4215 }, { "epoch": 0.17, "learning_rate": 1.8717044150838326e-06, "loss": 0.6942, "step": 4220 }, { "epoch": 0.17, "learning_rate": 1.871388404190868e-06, "loss": 0.733, "step": 4225 }, { "epoch": 0.17, "learning_rate": 1.8710720313307382e-06, "loss": 0.6587, "step": 4230 }, { "epoch": 0.17, "learning_rate": 1.8707552966348618e-06, "loss": 0.7068, "step": 4235 }, { "epoch": 0.17, "learning_rate": 1.870438200234808e-06, "loss": 0.7121, "step": 4240 }, { "epoch": 0.17, "learning_rate": 1.8701207422622951e-06, "loss": 0.6572, "step": 4245 }, { "epoch": 0.17, "learning_rate": 1.869802922849193e-06, "loss": 0.6988, "step": 4250 }, { "epoch": 0.17, "learning_rate": 1.8694847421275206e-06, "loss": 0.7009, "step": 4255 }, { "epoch": 0.17, "learning_rate": 1.8691662002294481e-06, "loss": 0.7128, "step": 4260 }, { "epoch": 0.17, "learning_rate": 1.8688472972872947e-06, "loss": 0.6869, "step": 4265 }, { "epoch": 0.17, "learning_rate": 1.8685280334335296e-06, "loss": 0.6775, "step": 4270 }, { "epoch": 0.17, "learning_rate": 1.8682084088007728e-06, "loss": 0.6787, "step": 4275 }, { "epoch": 0.17, "learning_rate": 1.8678884235217928e-06, "loss": 0.7285, "step": 4280 }, { "epoch": 0.17, "learning_rate": 1.8675680777295097e-06, "loss": 0.6764, "step": 4285 }, { "epoch": 0.17, "learning_rate": 1.8672473715569916e-06, "loss": 0.7242, "step": 4290 }, { "epoch": 0.17, "learning_rate": 1.8669263051374572e-06, "loss": 0.6771, "step": 4295 }, { "epoch": 0.17, "learning_rate": 1.8666048786042752e-06, "loss": 0.7394, "step": 4300 }, { "epoch": 0.17, "learning_rate": 1.866283092090963e-06, "loss": 0.6957, "step": 4305 }, { "epoch": 0.18, "learning_rate": 1.8659609457311875e-06, "loss": 0.6816, "step": 4310 }, { "epoch": 0.18, "learning_rate": 1.865638439658766e-06, "loss": 0.6952, "step": 4315 }, { "epoch": 0.18, "learning_rate": 1.8653155740076647e-06, "loss": 0.6754, "step": 4320 }, { "epoch": 0.18, "learning_rate": 1.8649923489119992e-06, "loss": 0.7266, "step": 4325 }, { "epoch": 0.18, "learning_rate": 1.8646687645060343e-06, "loss": 0.7218, "step": 4330 }, { "epoch": 0.18, "learning_rate": 1.8643448209241841e-06, "loss": 0.6779, "step": 4335 }, { "epoch": 0.18, "learning_rate": 1.864020518301012e-06, "loss": 0.7003, "step": 4340 }, { "epoch": 0.18, "learning_rate": 1.8636958567712303e-06, "loss": 0.7015, "step": 4345 }, { "epoch": 0.18, "learning_rate": 1.8633708364697013e-06, "loss": 0.6808, "step": 4350 }, { "epoch": 0.18, "learning_rate": 1.8630454575314344e-06, "loss": 0.6816, "step": 4355 }, { "epoch": 0.18, "learning_rate": 1.8627197200915902e-06, "loss": 0.7126, "step": 4360 }, { "epoch": 0.18, "learning_rate": 1.862393624285477e-06, "loss": 0.6838, "step": 4365 }, { "epoch": 0.18, "learning_rate": 1.8620671702485517e-06, "loss": 0.6578, "step": 4370 }, { "epoch": 0.18, "learning_rate": 1.861740358116421e-06, "loss": 0.7344, "step": 4375 }, { "epoch": 0.18, "learning_rate": 1.8614131880248393e-06, "loss": 0.7172, "step": 4380 }, { "epoch": 0.18, "learning_rate": 1.8610856601097108e-06, "loss": 0.7621, "step": 4385 }, { "epoch": 0.18, "learning_rate": 1.8607577745070873e-06, "loss": 0.7071, "step": 4390 }, { "epoch": 0.18, "learning_rate": 1.8604295313531698e-06, "loss": 0.6889, "step": 4395 }, { "epoch": 0.18, "learning_rate": 1.8601009307843078e-06, "loss": 0.6992, "step": 4400 }, { "epoch": 0.18, "learning_rate": 1.8597719729369988e-06, "loss": 0.6843, "step": 4405 }, { "epoch": 0.18, "learning_rate": 1.8594426579478891e-06, "loss": 0.6791, "step": 4410 }, { "epoch": 0.18, "learning_rate": 1.8591129859537738e-06, "loss": 0.7045, "step": 4415 }, { "epoch": 0.18, "learning_rate": 1.858782957091595e-06, "loss": 0.6975, "step": 4420 }, { "epoch": 0.18, "learning_rate": 1.858452571498444e-06, "loss": 0.7386, "step": 4425 }, { "epoch": 0.18, "learning_rate": 1.8581218293115607e-06, "loss": 0.6974, "step": 4430 }, { "epoch": 0.18, "learning_rate": 1.8577907306683317e-06, "loss": 0.7313, "step": 4435 }, { "epoch": 0.18, "learning_rate": 1.857459275706293e-06, "loss": 0.6941, "step": 4440 }, { "epoch": 0.18, "learning_rate": 1.8571274645631281e-06, "loss": 0.7037, "step": 4445 }, { "epoch": 0.18, "learning_rate": 1.8567952973766685e-06, "loss": 0.655, "step": 4450 }, { "epoch": 0.18, "learning_rate": 1.856462774284893e-06, "loss": 0.712, "step": 4455 }, { "epoch": 0.18, "learning_rate": 1.8561298954259297e-06, "loss": 0.68, "step": 4460 }, { "epoch": 0.18, "learning_rate": 1.8557966609380528e-06, "loss": 0.6917, "step": 4465 }, { "epoch": 0.18, "learning_rate": 1.8554630709596855e-06, "loss": 0.6719, "step": 4470 }, { "epoch": 0.18, "learning_rate": 1.8551291256293977e-06, "loss": 0.7117, "step": 4475 }, { "epoch": 0.18, "learning_rate": 1.854794825085908e-06, "loss": 0.7157, "step": 4480 }, { "epoch": 0.18, "learning_rate": 1.8544601694680814e-06, "loss": 0.7147, "step": 4485 }, { "epoch": 0.18, "learning_rate": 1.8541251589149313e-06, "loss": 0.6964, "step": 4490 }, { "epoch": 0.18, "learning_rate": 1.853789793565618e-06, "loss": 0.7083, "step": 4495 }, { "epoch": 0.18, "learning_rate": 1.8534540735594492e-06, "loss": 0.6735, "step": 4500 }, { "epoch": 0.18, "learning_rate": 1.8531179990358806e-06, "loss": 0.6719, "step": 4505 }, { "epoch": 0.18, "learning_rate": 1.852781570134514e-06, "loss": 0.6729, "step": 4510 }, { "epoch": 0.18, "learning_rate": 1.8524447869950995e-06, "loss": 0.6988, "step": 4515 }, { "epoch": 0.18, "learning_rate": 1.8521076497575335e-06, "loss": 0.6907, "step": 4520 }, { "epoch": 0.18, "learning_rate": 1.8517701585618602e-06, "loss": 0.7268, "step": 4525 }, { "epoch": 0.18, "learning_rate": 1.8514323135482704e-06, "loss": 0.7261, "step": 4530 }, { "epoch": 0.18, "learning_rate": 1.8510941148571018e-06, "loss": 0.6763, "step": 4535 }, { "epoch": 0.18, "learning_rate": 1.8507555626288397e-06, "loss": 0.6801, "step": 4540 }, { "epoch": 0.18, "learning_rate": 1.8504166570041152e-06, "loss": 0.7502, "step": 4545 }, { "epoch": 0.18, "learning_rate": 1.8500773981237069e-06, "loss": 0.6682, "step": 4550 }, { "epoch": 0.19, "learning_rate": 1.8497377861285401e-06, "loss": 0.709, "step": 4555 }, { "epoch": 0.19, "learning_rate": 1.8493978211596865e-06, "loss": 0.7001, "step": 4560 }, { "epoch": 0.19, "learning_rate": 1.849057503358365e-06, "loss": 0.7111, "step": 4565 }, { "epoch": 0.19, "learning_rate": 1.8487168328659403e-06, "loss": 0.701, "step": 4570 }, { "epoch": 0.19, "learning_rate": 1.8483758098239237e-06, "loss": 0.691, "step": 4575 }, { "epoch": 0.19, "learning_rate": 1.8480344343739738e-06, "loss": 0.6979, "step": 4580 }, { "epoch": 0.19, "learning_rate": 1.8476927066578946e-06, "loss": 0.7074, "step": 4585 }, { "epoch": 0.19, "learning_rate": 1.8473506268176372e-06, "loss": 0.7229, "step": 4590 }, { "epoch": 0.19, "learning_rate": 1.8470081949952982e-06, "loss": 0.7015, "step": 4595 }, { "epoch": 0.19, "learning_rate": 1.846665411333121e-06, "loss": 0.7329, "step": 4600 }, { "epoch": 0.19, "learning_rate": 1.8463222759734948e-06, "loss": 0.6908, "step": 4605 }, { "epoch": 0.19, "learning_rate": 1.8459787890589554e-06, "loss": 0.7381, "step": 4610 }, { "epoch": 0.19, "learning_rate": 1.8456349507321836e-06, "loss": 0.6663, "step": 4615 }, { "epoch": 0.19, "learning_rate": 1.8452907611360076e-06, "loss": 0.7021, "step": 4620 }, { "epoch": 0.19, "learning_rate": 1.8449462204134002e-06, "loss": 0.6826, "step": 4625 }, { "epoch": 0.19, "learning_rate": 1.8446013287074811e-06, "loss": 0.6959, "step": 4630 }, { "epoch": 0.19, "learning_rate": 1.8442560861615148e-06, "loss": 0.7348, "step": 4635 }, { "epoch": 0.19, "learning_rate": 1.8439104929189124e-06, "loss": 0.6699, "step": 4640 }, { "epoch": 0.19, "learning_rate": 1.8435645491232307e-06, "loss": 0.7354, "step": 4645 }, { "epoch": 0.19, "learning_rate": 1.8432182549181707e-06, "loss": 0.6721, "step": 4650 }, { "epoch": 0.19, "learning_rate": 1.8428716104475806e-06, "loss": 0.7104, "step": 4655 }, { "epoch": 0.19, "learning_rate": 1.8425246158554537e-06, "loss": 0.6808, "step": 4660 }, { "epoch": 0.19, "learning_rate": 1.8421772712859282e-06, "loss": 0.6941, "step": 4665 }, { "epoch": 0.19, "learning_rate": 1.8418295768832883e-06, "loss": 0.687, "step": 4670 }, { "epoch": 0.19, "learning_rate": 1.841481532791963e-06, "loss": 0.6894, "step": 4675 }, { "epoch": 0.19, "learning_rate": 1.841133139156527e-06, "loss": 0.6663, "step": 4680 }, { "epoch": 0.19, "learning_rate": 1.8407843961216995e-06, "loss": 0.6423, "step": 4685 }, { "epoch": 0.19, "learning_rate": 1.8404353038323459e-06, "loss": 0.7073, "step": 4690 }, { "epoch": 0.19, "learning_rate": 1.8400858624334758e-06, "loss": 0.7149, "step": 4695 }, { "epoch": 0.19, "learning_rate": 1.8397360720702442e-06, "loss": 0.7024, "step": 4700 }, { "epoch": 0.19, "learning_rate": 1.8393859328879511e-06, "loss": 0.7039, "step": 4705 }, { "epoch": 0.19, "learning_rate": 1.839035445032041e-06, "loss": 0.6758, "step": 4710 }, { "epoch": 0.19, "learning_rate": 1.8386846086481036e-06, "loss": 0.6833, "step": 4715 }, { "epoch": 0.19, "learning_rate": 1.8383334238818736e-06, "loss": 0.6996, "step": 4720 }, { "epoch": 0.19, "learning_rate": 1.8379818908792295e-06, "loss": 0.692, "step": 4725 }, { "epoch": 0.19, "learning_rate": 1.8376300097861953e-06, "loss": 0.7099, "step": 4730 }, { "epoch": 0.19, "learning_rate": 1.8372777807489396e-06, "loss": 0.6859, "step": 4735 }, { "epoch": 0.19, "learning_rate": 1.8369252039137753e-06, "loss": 0.696, "step": 4740 }, { "epoch": 0.19, "learning_rate": 1.8365722794271594e-06, "loss": 0.6713, "step": 4745 }, { "epoch": 0.19, "learning_rate": 1.8362190074356935e-06, "loss": 0.7215, "step": 4750 }, { "epoch": 0.19, "learning_rate": 1.8358653880861245e-06, "loss": 0.7001, "step": 4755 }, { "epoch": 0.19, "learning_rate": 1.835511421525342e-06, "loss": 0.727, "step": 4760 }, { "epoch": 0.19, "learning_rate": 1.8351571079003812e-06, "loss": 0.6764, "step": 4765 }, { "epoch": 0.19, "learning_rate": 1.8348024473584208e-06, "loss": 0.6667, "step": 4770 }, { "epoch": 0.19, "learning_rate": 1.834447440046783e-06, "loss": 0.7291, "step": 4775 }, { "epoch": 0.19, "learning_rate": 1.8340920861129358e-06, "loss": 0.6699, "step": 4780 }, { "epoch": 0.19, "learning_rate": 1.8337363857044894e-06, "loss": 0.6825, "step": 4785 }, { "epoch": 0.19, "learning_rate": 1.833380338969199e-06, "loss": 0.6954, "step": 4790 }, { "epoch": 0.19, "learning_rate": 1.833023946054963e-06, "loss": 0.7002, "step": 4795 }, { "epoch": 0.19, "learning_rate": 1.8326672071098246e-06, "loss": 0.699, "step": 4800 }, { "epoch": 0.19, "eval_loss": 0.6643534302711487, "eval_runtime": 138.4591, "eval_samples_per_second": 17.088, "eval_steps_per_second": 2.853, "step": 4800 }, { "epoch": 0.2, "learning_rate": 1.8323101222819693e-06, "loss": 0.677, "step": 4805 }, { "epoch": 0.2, "learning_rate": 1.8319526917197275e-06, "loss": 0.7381, "step": 4810 }, { "epoch": 0.2, "learning_rate": 1.8315949155715722e-06, "loss": 0.6681, "step": 4815 }, { "epoch": 0.2, "learning_rate": 1.8312367939861214e-06, "loss": 0.6696, "step": 4820 }, { "epoch": 0.2, "learning_rate": 1.8308783271121346e-06, "loss": 0.713, "step": 4825 }, { "epoch": 0.2, "learning_rate": 1.830519515098517e-06, "loss": 0.7155, "step": 4830 }, { "epoch": 0.2, "learning_rate": 1.8301603580943148e-06, "loss": 0.694, "step": 4835 }, { "epoch": 0.2, "learning_rate": 1.8298008562487195e-06, "loss": 0.7343, "step": 4840 }, { "epoch": 0.2, "learning_rate": 1.829441009711065e-06, "loss": 0.7173, "step": 4845 }, { "epoch": 0.2, "learning_rate": 1.8290808186308276e-06, "loss": 0.7167, "step": 4850 }, { "epoch": 0.2, "learning_rate": 1.828720283157629e-06, "loss": 0.7265, "step": 4855 }, { "epoch": 0.2, "learning_rate": 1.8283594034412313e-06, "loss": 0.6793, "step": 4860 }, { "epoch": 0.2, "learning_rate": 1.8279981796315412e-06, "loss": 0.6807, "step": 4865 }, { "epoch": 0.2, "learning_rate": 1.8276366118786078e-06, "loss": 0.6737, "step": 4870 }, { "epoch": 0.2, "learning_rate": 1.8272747003326235e-06, "loss": 0.7202, "step": 4875 }, { "epoch": 0.2, "learning_rate": 1.8269124451439231e-06, "loss": 0.7226, "step": 4880 }, { "epoch": 0.2, "learning_rate": 1.8265498464629837e-06, "loss": 0.6772, "step": 4885 }, { "epoch": 0.2, "learning_rate": 1.8261869044404265e-06, "loss": 0.6868, "step": 4890 }, { "epoch": 0.2, "learning_rate": 1.825823619227014e-06, "loss": 0.703, "step": 4895 }, { "epoch": 0.2, "learning_rate": 1.825459990973652e-06, "loss": 0.7193, "step": 4900 }, { "epoch": 0.2, "learning_rate": 1.8250960198313878e-06, "loss": 0.7194, "step": 4905 }, { "epoch": 0.2, "learning_rate": 1.8247317059514126e-06, "loss": 0.6859, "step": 4910 }, { "epoch": 0.2, "learning_rate": 1.8243670494850592e-06, "loss": 0.6749, "step": 4915 }, { "epoch": 0.2, "learning_rate": 1.824002050583802e-06, "loss": 0.6542, "step": 4920 }, { "epoch": 0.2, "learning_rate": 1.8236367093992592e-06, "loss": 0.6592, "step": 4925 }, { "epoch": 0.2, "learning_rate": 1.82327102608319e-06, "loss": 0.6788, "step": 4930 }, { "epoch": 0.2, "learning_rate": 1.8229050007874961e-06, "loss": 0.6909, "step": 4935 }, { "epoch": 0.2, "learning_rate": 1.822538633664221e-06, "loss": 0.7105, "step": 4940 }, { "epoch": 0.2, "learning_rate": 1.8221719248655507e-06, "loss": 0.6731, "step": 4945 }, { "epoch": 0.2, "learning_rate": 1.821804874543813e-06, "loss": 0.685, "step": 4950 }, { "epoch": 0.2, "learning_rate": 1.821437482851477e-06, "loss": 0.7251, "step": 4955 }, { "epoch": 0.2, "learning_rate": 1.821069749941154e-06, "loss": 0.6517, "step": 4960 }, { "epoch": 0.2, "learning_rate": 1.8207016759655973e-06, "loss": 0.704, "step": 4965 }, { "epoch": 0.2, "learning_rate": 1.8203332610777017e-06, "loss": 0.7063, "step": 4970 }, { "epoch": 0.2, "learning_rate": 1.8199645054305037e-06, "loss": 0.7208, "step": 4975 }, { "epoch": 0.2, "learning_rate": 1.8195954091771805e-06, "loss": 0.7339, "step": 4980 }, { "epoch": 0.2, "learning_rate": 1.8192259724710518e-06, "loss": 0.6627, "step": 4985 }, { "epoch": 0.2, "learning_rate": 1.818856195465579e-06, "loss": 0.7021, "step": 4990 }, { "epoch": 0.2, "learning_rate": 1.8184860783143635e-06, "loss": 0.7108, "step": 4995 }, { "epoch": 0.2, "learning_rate": 1.8181156211711488e-06, "loss": 0.639, "step": 5000 }, { "epoch": 0.2, "learning_rate": 1.8177448241898196e-06, "loss": 0.7256, "step": 5005 }, { "epoch": 0.2, "learning_rate": 1.817373687524402e-06, "loss": 0.6977, "step": 5010 }, { "epoch": 0.2, "learning_rate": 1.817002211329063e-06, "loss": 0.6984, "step": 5015 }, { "epoch": 0.2, "learning_rate": 1.8166303957581103e-06, "loss": 0.6875, "step": 5020 }, { "epoch": 0.2, "learning_rate": 1.8162582409659932e-06, "loss": 0.6602, "step": 5025 }, { "epoch": 0.2, "learning_rate": 1.815885747107301e-06, "loss": 0.6819, "step": 5030 }, { "epoch": 0.2, "learning_rate": 1.815512914336765e-06, "loss": 0.6843, "step": 5035 }, { "epoch": 0.2, "learning_rate": 1.8151397428092563e-06, "loss": 0.7079, "step": 5040 }, { "epoch": 0.2, "learning_rate": 1.8147662326797872e-06, "loss": 0.7022, "step": 5045 }, { "epoch": 0.21, "learning_rate": 1.8143923841035107e-06, "loss": 0.6897, "step": 5050 }, { "epoch": 0.21, "learning_rate": 1.8140181972357201e-06, "loss": 0.6774, "step": 5055 }, { "epoch": 0.21, "learning_rate": 1.8136436722318496e-06, "loss": 0.6877, "step": 5060 }, { "epoch": 0.21, "learning_rate": 1.813268809247473e-06, "loss": 0.6957, "step": 5065 }, { "epoch": 0.21, "learning_rate": 1.8128936084383058e-06, "loss": 0.7274, "step": 5070 }, { "epoch": 0.21, "learning_rate": 1.812518069960203e-06, "loss": 0.6767, "step": 5075 }, { "epoch": 0.21, "learning_rate": 1.8121421939691599e-06, "loss": 0.6857, "step": 5080 }, { "epoch": 0.21, "learning_rate": 1.8117659806213122e-06, "loss": 0.6637, "step": 5085 }, { "epoch": 0.21, "learning_rate": 1.8113894300729356e-06, "loss": 0.6777, "step": 5090 }, { "epoch": 0.21, "learning_rate": 1.8110125424804458e-06, "loss": 0.6666, "step": 5095 }, { "epoch": 0.21, "learning_rate": 1.8106353180003988e-06, "loss": 0.6811, "step": 5100 }, { "epoch": 0.21, "learning_rate": 1.8102577567894905e-06, "loss": 0.6735, "step": 5105 }, { "epoch": 0.21, "learning_rate": 1.8098798590045563e-06, "loss": 0.7543, "step": 5110 }, { "epoch": 0.21, "learning_rate": 1.8095016248025717e-06, "loss": 0.6957, "step": 5115 }, { "epoch": 0.21, "learning_rate": 1.809123054340652e-06, "loss": 0.6969, "step": 5120 }, { "epoch": 0.21, "learning_rate": 1.8087441477760517e-06, "loss": 0.6775, "step": 5125 }, { "epoch": 0.21, "learning_rate": 1.8083649052661661e-06, "loss": 0.6794, "step": 5130 }, { "epoch": 0.21, "learning_rate": 1.8079853269685282e-06, "loss": 0.6253, "step": 5135 }, { "epoch": 0.21, "learning_rate": 1.8076054130408123e-06, "loss": 0.7048, "step": 5140 }, { "epoch": 0.21, "learning_rate": 1.8072251636408314e-06, "loss": 0.681, "step": 5145 }, { "epoch": 0.21, "learning_rate": 1.806844578926537e-06, "loss": 0.6644, "step": 5150 }, { "epoch": 0.21, "learning_rate": 1.8064636590560215e-06, "loss": 0.6972, "step": 5155 }, { "epoch": 0.21, "learning_rate": 1.8060824041875154e-06, "loss": 0.6831, "step": 5160 }, { "epoch": 0.21, "learning_rate": 1.8057008144793882e-06, "loss": 0.6738, "step": 5165 }, { "epoch": 0.21, "learning_rate": 1.8053188900901497e-06, "loss": 0.7293, "step": 5170 }, { "epoch": 0.21, "learning_rate": 1.8049366311784477e-06, "loss": 0.6975, "step": 5175 }, { "epoch": 0.21, "learning_rate": 1.8045540379030691e-06, "loss": 0.7275, "step": 5180 }, { "epoch": 0.21, "learning_rate": 1.8041711104229401e-06, "loss": 0.6867, "step": 5185 }, { "epoch": 0.21, "learning_rate": 1.803787848897125e-06, "loss": 0.6857, "step": 5190 }, { "epoch": 0.21, "learning_rate": 1.8034042534848277e-06, "loss": 0.6861, "step": 5195 }, { "epoch": 0.21, "learning_rate": 1.8030203243453901e-06, "loss": 0.6732, "step": 5200 }, { "epoch": 0.21, "learning_rate": 1.8026360616382933e-06, "loss": 0.7133, "step": 5205 }, { "epoch": 0.21, "learning_rate": 1.8022514655231565e-06, "loss": 0.7196, "step": 5210 }, { "epoch": 0.21, "learning_rate": 1.8018665361597373e-06, "loss": 0.6929, "step": 5215 }, { "epoch": 0.21, "learning_rate": 1.801481273707933e-06, "loss": 0.7227, "step": 5220 }, { "epoch": 0.21, "learning_rate": 1.8010956783277772e-06, "loss": 0.6655, "step": 5225 }, { "epoch": 0.21, "learning_rate": 1.8007097501794435e-06, "loss": 0.7454, "step": 5230 }, { "epoch": 0.21, "learning_rate": 1.8003234894232426e-06, "loss": 0.6815, "step": 5235 }, { "epoch": 0.21, "learning_rate": 1.7999368962196243e-06, "loss": 0.7456, "step": 5240 }, { "epoch": 0.21, "learning_rate": 1.7995499707291762e-06, "loss": 0.6822, "step": 5245 }, { "epoch": 0.21, "learning_rate": 1.799162713112623e-06, "loss": 0.7083, "step": 5250 }, { "epoch": 0.21, "learning_rate": 1.798775123530829e-06, "loss": 0.6874, "step": 5255 }, { "epoch": 0.21, "learning_rate": 1.798387202144795e-06, "loss": 0.6864, "step": 5260 }, { "epoch": 0.21, "learning_rate": 1.7979989491156603e-06, "loss": 0.6991, "step": 5265 }, { "epoch": 0.21, "learning_rate": 1.797610364604702e-06, "loss": 0.7077, "step": 5270 }, { "epoch": 0.21, "learning_rate": 1.7972214487733345e-06, "loss": 0.6461, "step": 5275 }, { "epoch": 0.21, "learning_rate": 1.7968322017831102e-06, "loss": 0.6968, "step": 5280 }, { "epoch": 0.21, "learning_rate": 1.7964426237957188e-06, "loss": 0.6635, "step": 5285 }, { "epoch": 0.21, "learning_rate": 1.7960527149729878e-06, "loss": 0.684, "step": 5290 }, { "epoch": 0.22, "learning_rate": 1.7956624754768818e-06, "loss": 0.6784, "step": 5295 }, { "epoch": 0.22, "learning_rate": 1.795271905469503e-06, "loss": 0.7481, "step": 5300 }, { "epoch": 0.22, "learning_rate": 1.7948810051130905e-06, "loss": 0.683, "step": 5305 }, { "epoch": 0.22, "learning_rate": 1.794489774570021e-06, "loss": 0.6661, "step": 5310 }, { "epoch": 0.22, "learning_rate": 1.7940982140028087e-06, "loss": 0.7184, "step": 5315 }, { "epoch": 0.22, "learning_rate": 1.793706323574104e-06, "loss": 0.6612, "step": 5320 }, { "epoch": 0.22, "learning_rate": 1.7933141034466948e-06, "loss": 0.703, "step": 5325 }, { "epoch": 0.22, "learning_rate": 1.792921553783506e-06, "loss": 0.7108, "step": 5330 }, { "epoch": 0.22, "learning_rate": 1.7925286747475994e-06, "loss": 0.7301, "step": 5335 }, { "epoch": 0.22, "learning_rate": 1.7921354665021735e-06, "loss": 0.6704, "step": 5340 }, { "epoch": 0.22, "learning_rate": 1.7917419292105636e-06, "loss": 0.7065, "step": 5345 }, { "epoch": 0.22, "learning_rate": 1.7913480630362417e-06, "loss": 0.7033, "step": 5350 }, { "epoch": 0.22, "learning_rate": 1.790953868142816e-06, "loss": 0.7258, "step": 5355 }, { "epoch": 0.22, "learning_rate": 1.790559344694032e-06, "loss": 0.6887, "step": 5360 }, { "epoch": 0.22, "learning_rate": 1.7901644928537715e-06, "loss": 0.7195, "step": 5365 }, { "epoch": 0.22, "learning_rate": 1.7897693127860524e-06, "loss": 0.7058, "step": 5370 }, { "epoch": 0.22, "learning_rate": 1.7893738046550286e-06, "loss": 0.7177, "step": 5375 }, { "epoch": 0.22, "learning_rate": 1.7889779686249912e-06, "loss": 0.6891, "step": 5380 }, { "epoch": 0.22, "learning_rate": 1.788581804860367e-06, "loss": 0.6551, "step": 5385 }, { "epoch": 0.22, "learning_rate": 1.7881853135257185e-06, "loss": 0.7061, "step": 5390 }, { "epoch": 0.22, "learning_rate": 1.7877884947857455e-06, "loss": 0.6706, "step": 5395 }, { "epoch": 0.22, "learning_rate": 1.7873913488052827e-06, "loss": 0.6719, "step": 5400 }, { "epoch": 0.22, "learning_rate": 1.7869938757493011e-06, "loss": 0.7022, "step": 5405 }, { "epoch": 0.22, "learning_rate": 1.7865960757829075e-06, "loss": 0.676, "step": 5410 }, { "epoch": 0.22, "learning_rate": 1.7861979490713445e-06, "loss": 0.7427, "step": 5415 }, { "epoch": 0.22, "learning_rate": 1.7857994957799906e-06, "loss": 0.698, "step": 5420 }, { "epoch": 0.22, "learning_rate": 1.78540071607436e-06, "loss": 0.7023, "step": 5425 }, { "epoch": 0.22, "learning_rate": 1.785001610120102e-06, "loss": 0.6985, "step": 5430 }, { "epoch": 0.22, "learning_rate": 1.784602178083002e-06, "loss": 0.7192, "step": 5435 }, { "epoch": 0.22, "learning_rate": 1.7842024201289801e-06, "loss": 0.6682, "step": 5440 }, { "epoch": 0.22, "learning_rate": 1.7838023364240929e-06, "loss": 0.694, "step": 5445 }, { "epoch": 0.22, "learning_rate": 1.7834019271345313e-06, "loss": 0.6484, "step": 5450 }, { "epoch": 0.22, "learning_rate": 1.7830011924266221e-06, "loss": 0.6921, "step": 5455 }, { "epoch": 0.22, "learning_rate": 1.7826001324668267e-06, "loss": 0.7135, "step": 5460 }, { "epoch": 0.22, "learning_rate": 1.7821987474217424e-06, "loss": 0.6631, "step": 5465 }, { "epoch": 0.22, "learning_rate": 1.7817970374581006e-06, "loss": 0.6572, "step": 5470 }, { "epoch": 0.22, "learning_rate": 1.781395002742768e-06, "loss": 0.7068, "step": 5475 }, { "epoch": 0.22, "learning_rate": 1.780992643442747e-06, "loss": 0.6417, "step": 5480 }, { "epoch": 0.22, "learning_rate": 1.7805899597251735e-06, "loss": 0.6847, "step": 5485 }, { "epoch": 0.22, "learning_rate": 1.7801869517573191e-06, "loss": 0.6862, "step": 5490 }, { "epoch": 0.22, "learning_rate": 1.77978361970659e-06, "loss": 0.6696, "step": 5495 }, { "epoch": 0.22, "learning_rate": 1.7793799637405265e-06, "loss": 0.737, "step": 5500 }, { "epoch": 0.22, "learning_rate": 1.7789759840268037e-06, "loss": 0.6826, "step": 5505 }, { "epoch": 0.22, "learning_rate": 1.7785716807332315e-06, "loss": 0.7288, "step": 5510 }, { "epoch": 0.22, "learning_rate": 1.778167054027754e-06, "loss": 0.6856, "step": 5515 }, { "epoch": 0.22, "learning_rate": 1.7777621040784496e-06, "loss": 0.6801, "step": 5520 }, { "epoch": 0.22, "learning_rate": 1.777356831053531e-06, "loss": 0.6737, "step": 5525 }, { "epoch": 0.22, "learning_rate": 1.7769512351213448e-06, "loss": 0.6547, "step": 5530 }, { "epoch": 0.22, "learning_rate": 1.7765453164503722e-06, "loss": 0.7255, "step": 5535 }, { "epoch": 0.23, "learning_rate": 1.7761390752092284e-06, "loss": 0.6757, "step": 5540 }, { "epoch": 0.23, "learning_rate": 1.7757325115666624e-06, "loss": 0.6752, "step": 5545 }, { "epoch": 0.23, "learning_rate": 1.7753256256915572e-06, "loss": 0.6901, "step": 5550 }, { "epoch": 0.23, "learning_rate": 1.7749184177529294e-06, "loss": 0.6747, "step": 5555 }, { "epoch": 0.23, "learning_rate": 1.7745108879199302e-06, "loss": 0.6852, "step": 5560 }, { "epoch": 0.23, "learning_rate": 1.7741030363618437e-06, "loss": 0.6804, "step": 5565 }, { "epoch": 0.23, "learning_rate": 1.7736948632480874e-06, "loss": 0.6783, "step": 5570 }, { "epoch": 0.23, "learning_rate": 1.7732863687482138e-06, "loss": 0.6799, "step": 5575 }, { "epoch": 0.23, "learning_rate": 1.772877553031907e-06, "loss": 0.7125, "step": 5580 }, { "epoch": 0.23, "learning_rate": 1.7724684162689863e-06, "loss": 0.6851, "step": 5585 }, { "epoch": 0.23, "learning_rate": 1.772058958629403e-06, "loss": 0.6906, "step": 5590 }, { "epoch": 0.23, "learning_rate": 1.7716491802832425e-06, "loss": 0.7292, "step": 5595 }, { "epoch": 0.23, "learning_rate": 1.771239081400723e-06, "loss": 0.7028, "step": 5600 }, { "epoch": 0.23, "eval_loss": 0.6604963541030884, "eval_runtime": 138.4769, "eval_samples_per_second": 17.086, "eval_steps_per_second": 2.852, "step": 5600 }, { "epoch": 0.23, "learning_rate": 1.7708286621521964e-06, "loss": 0.6984, "step": 5605 }, { "epoch": 0.23, "learning_rate": 1.7704179227081467e-06, "loss": 0.7075, "step": 5610 }, { "epoch": 0.23, "learning_rate": 1.7700068632391917e-06, "loss": 0.7014, "step": 5615 }, { "epoch": 0.23, "learning_rate": 1.7695954839160824e-06, "loss": 0.6986, "step": 5620 }, { "epoch": 0.23, "learning_rate": 1.7691837849097015e-06, "loss": 0.7069, "step": 5625 }, { "epoch": 0.23, "learning_rate": 1.7687717663910658e-06, "loss": 0.6893, "step": 5630 }, { "epoch": 0.23, "learning_rate": 1.7683594285313235e-06, "loss": 0.6479, "step": 5635 }, { "epoch": 0.23, "learning_rate": 1.7679467715017567e-06, "loss": 0.7326, "step": 5640 }, { "epoch": 0.23, "learning_rate": 1.7675337954737795e-06, "loss": 0.7032, "step": 5645 }, { "epoch": 0.23, "learning_rate": 1.7671205006189384e-06, "loss": 0.6587, "step": 5650 }, { "epoch": 0.23, "learning_rate": 1.7667068871089126e-06, "loss": 0.6494, "step": 5655 }, { "epoch": 0.23, "learning_rate": 1.7662929551155136e-06, "loss": 0.7001, "step": 5660 }, { "epoch": 0.23, "learning_rate": 1.7658787048106846e-06, "loss": 0.7043, "step": 5665 }, { "epoch": 0.23, "learning_rate": 1.7654641363665027e-06, "loss": 0.6735, "step": 5670 }, { "epoch": 0.23, "learning_rate": 1.765049249955175e-06, "loss": 0.706, "step": 5675 }, { "epoch": 0.23, "learning_rate": 1.764634045749042e-06, "loss": 0.7178, "step": 5680 }, { "epoch": 0.23, "learning_rate": 1.7642185239205763e-06, "loss": 0.6817, "step": 5685 }, { "epoch": 0.23, "learning_rate": 1.7638026846423813e-06, "loss": 0.739, "step": 5690 }, { "epoch": 0.23, "learning_rate": 1.7633865280871935e-06, "loss": 0.704, "step": 5695 }, { "epoch": 0.23, "learning_rate": 1.762970054427881e-06, "loss": 0.6859, "step": 5700 }, { "epoch": 0.23, "learning_rate": 1.7625532638374429e-06, "loss": 0.6645, "step": 5705 }, { "epoch": 0.23, "learning_rate": 1.7621361564890104e-06, "loss": 0.6885, "step": 5710 }, { "epoch": 0.23, "learning_rate": 1.7617187325558463e-06, "loss": 0.7211, "step": 5715 }, { "epoch": 0.23, "learning_rate": 1.7613009922113449e-06, "loss": 0.6563, "step": 5720 }, { "epoch": 0.23, "learning_rate": 1.760882935629032e-06, "loss": 0.6872, "step": 5725 }, { "epoch": 0.23, "learning_rate": 1.7604645629825647e-06, "loss": 0.7238, "step": 5730 }, { "epoch": 0.23, "learning_rate": 1.7600458744457312e-06, "loss": 0.6829, "step": 5735 }, { "epoch": 0.23, "learning_rate": 1.7596268701924513e-06, "loss": 0.6752, "step": 5740 }, { "epoch": 0.23, "learning_rate": 1.7592075503967751e-06, "loss": 0.7009, "step": 5745 }, { "epoch": 0.23, "learning_rate": 1.7587879152328852e-06, "loss": 0.6987, "step": 5750 }, { "epoch": 0.23, "learning_rate": 1.758367964875094e-06, "loss": 0.7154, "step": 5755 }, { "epoch": 0.23, "learning_rate": 1.7579476994978454e-06, "loss": 0.6551, "step": 5760 }, { "epoch": 0.23, "learning_rate": 1.7575271192757138e-06, "loss": 0.6711, "step": 5765 }, { "epoch": 0.23, "learning_rate": 1.7571062243834046e-06, "loss": 0.7064, "step": 5770 }, { "epoch": 0.23, "learning_rate": 1.7566850149957536e-06, "loss": 0.6889, "step": 5775 }, { "epoch": 0.23, "learning_rate": 1.7562634912877282e-06, "loss": 0.6998, "step": 5780 }, { "epoch": 0.23, "learning_rate": 1.7558416534344252e-06, "loss": 0.6513, "step": 5785 }, { "epoch": 0.24, "learning_rate": 1.7554195016110725e-06, "loss": 0.7019, "step": 5790 }, { "epoch": 0.24, "learning_rate": 1.7549970359930285e-06, "loss": 0.7119, "step": 5795 }, { "epoch": 0.24, "learning_rate": 1.7545742567557811e-06, "loss": 0.6858, "step": 5800 }, { "epoch": 0.24, "learning_rate": 1.7541511640749499e-06, "loss": 0.6887, "step": 5805 }, { "epoch": 0.24, "learning_rate": 1.753727758126283e-06, "loss": 0.6976, "step": 5810 }, { "epoch": 0.24, "learning_rate": 1.7533040390856608e-06, "loss": 0.6699, "step": 5815 }, { "epoch": 0.24, "learning_rate": 1.7528800071290914e-06, "loss": 0.6802, "step": 5820 }, { "epoch": 0.24, "learning_rate": 1.7524556624327147e-06, "loss": 0.6804, "step": 5825 }, { "epoch": 0.24, "learning_rate": 1.7520310051727992e-06, "loss": 0.6678, "step": 5830 }, { "epoch": 0.24, "learning_rate": 1.7516060355257443e-06, "loss": 0.6856, "step": 5835 }, { "epoch": 0.24, "learning_rate": 1.7511807536680782e-06, "loss": 0.6983, "step": 5840 }, { "epoch": 0.24, "learning_rate": 1.75075515977646e-06, "loss": 0.7212, "step": 5845 }, { "epoch": 0.24, "learning_rate": 1.7503292540276772e-06, "loss": 0.6783, "step": 5850 }, { "epoch": 0.24, "learning_rate": 1.7499030365986475e-06, "loss": 0.6932, "step": 5855 }, { "epoch": 0.24, "learning_rate": 1.749476507666418e-06, "loss": 0.7191, "step": 5860 }, { "epoch": 0.24, "learning_rate": 1.7490496674081648e-06, "loss": 0.6817, "step": 5865 }, { "epoch": 0.24, "learning_rate": 1.748622516001194e-06, "loss": 0.6568, "step": 5870 }, { "epoch": 0.24, "learning_rate": 1.7481950536229402e-06, "loss": 0.6861, "step": 5875 }, { "epoch": 0.24, "learning_rate": 1.7477672804509679e-06, "loss": 0.6995, "step": 5880 }, { "epoch": 0.24, "learning_rate": 1.7473391966629698e-06, "loss": 0.72, "step": 5885 }, { "epoch": 0.24, "learning_rate": 1.7469108024367687e-06, "loss": 0.6983, "step": 5890 }, { "epoch": 0.24, "learning_rate": 1.746482097950316e-06, "loss": 0.6961, "step": 5895 }, { "epoch": 0.24, "learning_rate": 1.7460530833816915e-06, "loss": 0.7183, "step": 5900 }, { "epoch": 0.24, "learning_rate": 1.745623758909104e-06, "loss": 0.7194, "step": 5905 }, { "epoch": 0.24, "learning_rate": 1.7451941247108914e-06, "loss": 0.6699, "step": 5910 }, { "epoch": 0.24, "learning_rate": 1.74476418096552e-06, "loss": 0.6807, "step": 5915 }, { "epoch": 0.24, "learning_rate": 1.7443339278515846e-06, "loss": 0.6835, "step": 5920 }, { "epoch": 0.24, "learning_rate": 1.7439033655478084e-06, "loss": 0.6874, "step": 5925 }, { "epoch": 0.24, "learning_rate": 1.7434724942330436e-06, "loss": 0.6832, "step": 5930 }, { "epoch": 0.24, "learning_rate": 1.7430413140862703e-06, "loss": 0.6661, "step": 5935 }, { "epoch": 0.24, "learning_rate": 1.7426098252865968e-06, "loss": 0.677, "step": 5940 }, { "epoch": 0.24, "learning_rate": 1.74217802801326e-06, "loss": 0.6587, "step": 5945 }, { "epoch": 0.24, "learning_rate": 1.7417459224456246e-06, "loss": 0.6613, "step": 5950 }, { "epoch": 0.24, "learning_rate": 1.741313508763184e-06, "loss": 0.7241, "step": 5955 }, { "epoch": 0.24, "learning_rate": 1.740880787145558e-06, "loss": 0.7194, "step": 5960 }, { "epoch": 0.24, "learning_rate": 1.7404477577724964e-06, "loss": 0.6832, "step": 5965 }, { "epoch": 0.24, "learning_rate": 1.7400144208238751e-06, "loss": 0.6761, "step": 5970 }, { "epoch": 0.24, "learning_rate": 1.7395807764796993e-06, "loss": 0.7259, "step": 5975 }, { "epoch": 0.24, "learning_rate": 1.7391468249201004e-06, "loss": 0.6704, "step": 5980 }, { "epoch": 0.24, "learning_rate": 1.7387125663253386e-06, "loss": 0.6836, "step": 5985 }, { "epoch": 0.24, "learning_rate": 1.7382780008758005e-06, "loss": 0.6555, "step": 5990 }, { "epoch": 0.24, "learning_rate": 1.7378431287520016e-06, "loss": 0.7281, "step": 5995 }, { "epoch": 0.24, "learning_rate": 1.7374079501345835e-06, "loss": 0.6644, "step": 6000 }, { "epoch": 0.24, "learning_rate": 1.7369724652043156e-06, "loss": 0.6314, "step": 6005 }, { "epoch": 0.24, "learning_rate": 1.7365366741420947e-06, "loss": 0.6719, "step": 6010 }, { "epoch": 0.24, "learning_rate": 1.736100577128945e-06, "loss": 0.6913, "step": 6015 }, { "epoch": 0.24, "learning_rate": 1.7356641743460166e-06, "loss": 0.6858, "step": 6020 }, { "epoch": 0.24, "learning_rate": 1.7352274659745878e-06, "loss": 0.6853, "step": 6025 }, { "epoch": 0.24, "learning_rate": 1.7347904521960635e-06, "loss": 0.6921, "step": 6030 }, { "epoch": 0.25, "learning_rate": 1.7343531331919756e-06, "loss": 0.6898, "step": 6035 }, { "epoch": 0.25, "learning_rate": 1.7339155091439823e-06, "loss": 0.6996, "step": 6040 }, { "epoch": 0.25, "learning_rate": 1.733477580233869e-06, "loss": 0.7071, "step": 6045 }, { "epoch": 0.25, "learning_rate": 1.7330393466435474e-06, "loss": 0.6903, "step": 6050 }, { "epoch": 0.25, "learning_rate": 1.7326008085550564e-06, "loss": 0.6929, "step": 6055 }, { "epoch": 0.25, "learning_rate": 1.73216196615056e-06, "loss": 0.7195, "step": 6060 }, { "epoch": 0.25, "learning_rate": 1.7317228196123504e-06, "loss": 0.6737, "step": 6065 }, { "epoch": 0.25, "learning_rate": 1.7312833691228445e-06, "loss": 0.7118, "step": 6070 }, { "epoch": 0.25, "learning_rate": 1.7308436148645871e-06, "loss": 0.7004, "step": 6075 }, { "epoch": 0.25, "learning_rate": 1.7304035570202476e-06, "loss": 0.666, "step": 6080 }, { "epoch": 0.25, "learning_rate": 1.7299631957726223e-06, "loss": 0.6574, "step": 6085 }, { "epoch": 0.25, "learning_rate": 1.7295225313046337e-06, "loss": 0.7057, "step": 6090 }, { "epoch": 0.25, "learning_rate": 1.72908156379933e-06, "loss": 0.6804, "step": 6095 }, { "epoch": 0.25, "learning_rate": 1.7286402934398848e-06, "loss": 0.7117, "step": 6100 }, { "epoch": 0.25, "learning_rate": 1.7281987204095985e-06, "loss": 0.7004, "step": 6105 }, { "epoch": 0.25, "learning_rate": 1.7277568448918962e-06, "loss": 0.6945, "step": 6110 }, { "epoch": 0.25, "learning_rate": 1.7273146670703295e-06, "loss": 0.6268, "step": 6115 }, { "epoch": 0.25, "learning_rate": 1.7268721871285753e-06, "loss": 0.6772, "step": 6120 }, { "epoch": 0.25, "learning_rate": 1.7264294052504358e-06, "loss": 0.7141, "step": 6125 }, { "epoch": 0.25, "learning_rate": 1.7259863216198385e-06, "loss": 0.6466, "step": 6130 }, { "epoch": 0.25, "learning_rate": 1.7255429364208366e-06, "loss": 0.6835, "step": 6135 }, { "epoch": 0.25, "learning_rate": 1.7250992498376086e-06, "loss": 0.6938, "step": 6140 }, { "epoch": 0.25, "learning_rate": 1.7246552620544584e-06, "loss": 0.7017, "step": 6145 }, { "epoch": 0.25, "learning_rate": 1.7242109732558137e-06, "loss": 0.7161, "step": 6150 }, { "epoch": 0.25, "learning_rate": 1.7237663836262289e-06, "loss": 0.6646, "step": 6155 }, { "epoch": 0.25, "learning_rate": 1.7233214933503826e-06, "loss": 0.6712, "step": 6160 }, { "epoch": 0.25, "learning_rate": 1.7228763026130783e-06, "loss": 0.6749, "step": 6165 }, { "epoch": 0.25, "learning_rate": 1.7224308115992443e-06, "loss": 0.647, "step": 6170 }, { "epoch": 0.25, "learning_rate": 1.7219850204939338e-06, "loss": 0.7009, "step": 6175 }, { "epoch": 0.25, "learning_rate": 1.7215389294823243e-06, "loss": 0.6568, "step": 6180 }, { "epoch": 0.25, "learning_rate": 1.7210925387497186e-06, "loss": 0.6501, "step": 6185 }, { "epoch": 0.25, "learning_rate": 1.7206458484815429e-06, "loss": 0.6926, "step": 6190 }, { "epoch": 0.25, "learning_rate": 1.7201988588633489e-06, "loss": 0.6973, "step": 6195 }, { "epoch": 0.25, "learning_rate": 1.7197515700808124e-06, "loss": 0.6979, "step": 6200 }, { "epoch": 0.25, "learning_rate": 1.7193039823197323e-06, "loss": 0.6634, "step": 6205 }, { "epoch": 0.25, "learning_rate": 1.7188560957660338e-06, "loss": 0.6958, "step": 6210 }, { "epoch": 0.25, "learning_rate": 1.7184079106057645e-06, "loss": 0.7073, "step": 6215 }, { "epoch": 0.25, "learning_rate": 1.7179594270250965e-06, "loss": 0.7147, "step": 6220 }, { "epoch": 0.25, "learning_rate": 1.7175106452103268e-06, "loss": 0.7319, "step": 6225 }, { "epoch": 0.25, "learning_rate": 1.7170615653478742e-06, "loss": 0.6786, "step": 6230 }, { "epoch": 0.25, "learning_rate": 1.7166121876242837e-06, "loss": 0.6643, "step": 6235 }, { "epoch": 0.25, "learning_rate": 1.716162512226222e-06, "loss": 0.7524, "step": 6240 }, { "epoch": 0.25, "learning_rate": 1.715712539340481e-06, "loss": 0.6517, "step": 6245 }, { "epoch": 0.25, "learning_rate": 1.715262269153975e-06, "loss": 0.6999, "step": 6250 }, { "epoch": 0.25, "learning_rate": 1.7148117018537432e-06, "loss": 0.6756, "step": 6255 }, { "epoch": 0.25, "learning_rate": 1.7143608376269462e-06, "loss": 0.7208, "step": 6260 }, { "epoch": 0.25, "learning_rate": 1.71390967666087e-06, "loss": 0.6793, "step": 6265 }, { "epoch": 0.25, "learning_rate": 1.7134582191429223e-06, "loss": 0.68, "step": 6270 }, { "epoch": 0.25, "learning_rate": 1.7130064652606352e-06, "loss": 0.6666, "step": 6275 }, { "epoch": 0.26, "learning_rate": 1.7125544152016627e-06, "loss": 0.6714, "step": 6280 }, { "epoch": 0.26, "learning_rate": 1.7121020691537831e-06, "loss": 0.7034, "step": 6285 }, { "epoch": 0.26, "learning_rate": 1.7116494273048966e-06, "loss": 0.6819, "step": 6290 }, { "epoch": 0.26, "learning_rate": 1.7111964898430266e-06, "loss": 0.66, "step": 6295 }, { "epoch": 0.26, "learning_rate": 1.71074325695632e-06, "loss": 0.7156, "step": 6300 }, { "epoch": 0.26, "learning_rate": 1.7102897288330454e-06, "loss": 0.6804, "step": 6305 }, { "epoch": 0.26, "learning_rate": 1.7098359056615942e-06, "loss": 0.6807, "step": 6310 }, { "epoch": 0.26, "learning_rate": 1.7093817876304807e-06, "loss": 0.6701, "step": 6315 }, { "epoch": 0.26, "learning_rate": 1.7089273749283418e-06, "loss": 0.6785, "step": 6320 }, { "epoch": 0.26, "learning_rate": 1.7084726677439364e-06, "loss": 0.6941, "step": 6325 }, { "epoch": 0.26, "learning_rate": 1.7080176662661463e-06, "loss": 0.6811, "step": 6330 }, { "epoch": 0.26, "learning_rate": 1.7075623706839745e-06, "loss": 0.6797, "step": 6335 }, { "epoch": 0.26, "learning_rate": 1.7071067811865474e-06, "loss": 0.6913, "step": 6340 }, { "epoch": 0.26, "learning_rate": 1.7066508979631129e-06, "loss": 0.6917, "step": 6345 }, { "epoch": 0.26, "learning_rate": 1.7061947212030402e-06, "loss": 0.6709, "step": 6350 }, { "epoch": 0.26, "learning_rate": 1.705738251095822e-06, "loss": 0.7023, "step": 6355 }, { "epoch": 0.26, "learning_rate": 1.7052814878310718e-06, "loss": 0.7109, "step": 6360 }, { "epoch": 0.26, "learning_rate": 1.7048244315985247e-06, "loss": 0.6795, "step": 6365 }, { "epoch": 0.26, "learning_rate": 1.7043670825880384e-06, "loss": 0.7013, "step": 6370 }, { "epoch": 0.26, "learning_rate": 1.7039094409895914e-06, "loss": 0.703, "step": 6375 }, { "epoch": 0.26, "learning_rate": 1.703451506993284e-06, "loss": 0.732, "step": 6380 }, { "epoch": 0.26, "learning_rate": 1.7029932807893382e-06, "loss": 0.6905, "step": 6385 }, { "epoch": 0.26, "learning_rate": 1.702534762568097e-06, "loss": 0.7172, "step": 6390 }, { "epoch": 0.26, "learning_rate": 1.7020759525200253e-06, "loss": 0.6674, "step": 6395 }, { "epoch": 0.26, "learning_rate": 1.701616850835708e-06, "loss": 0.6694, "step": 6400 }, { "epoch": 0.26, "eval_loss": 0.6586260795593262, "eval_runtime": 139.2072, "eval_samples_per_second": 16.996, "eval_steps_per_second": 2.837, "step": 6400 }, { "epoch": 0.26, "learning_rate": 1.7011574577058525e-06, "loss": 0.7282, "step": 6405 }, { "epoch": 0.26, "learning_rate": 1.7006977733212867e-06, "loss": 0.6736, "step": 6410 }, { "epoch": 0.26, "learning_rate": 1.7002377978729596e-06, "loss": 0.6672, "step": 6415 }, { "epoch": 0.26, "learning_rate": 1.6997775315519408e-06, "loss": 0.712, "step": 6420 }, { "epoch": 0.26, "learning_rate": 1.6993169745494209e-06, "loss": 0.6949, "step": 6425 }, { "epoch": 0.26, "learning_rate": 1.6988561270567115e-06, "loss": 0.6951, "step": 6430 }, { "epoch": 0.26, "learning_rate": 1.698394989265244e-06, "loss": 0.7019, "step": 6435 }, { "epoch": 0.26, "learning_rate": 1.6979335613665717e-06, "loss": 0.7143, "step": 6440 }, { "epoch": 0.26, "learning_rate": 1.6974718435523678e-06, "loss": 0.7253, "step": 6445 }, { "epoch": 0.26, "learning_rate": 1.6970098360144253e-06, "loss": 0.7103, "step": 6450 }, { "epoch": 0.26, "learning_rate": 1.6965475389446586e-06, "loss": 0.6953, "step": 6455 }, { "epoch": 0.26, "learning_rate": 1.6960849525351018e-06, "loss": 0.7247, "step": 6460 }, { "epoch": 0.26, "learning_rate": 1.6956220769779088e-06, "loss": 0.7015, "step": 6465 }, { "epoch": 0.26, "learning_rate": 1.6951589124653547e-06, "loss": 0.704, "step": 6470 }, { "epoch": 0.26, "learning_rate": 1.6946954591898336e-06, "loss": 0.7054, "step": 6475 }, { "epoch": 0.26, "learning_rate": 1.6942317173438604e-06, "loss": 0.6704, "step": 6480 }, { "epoch": 0.26, "learning_rate": 1.693767687120069e-06, "loss": 0.6564, "step": 6485 }, { "epoch": 0.26, "learning_rate": 1.6933033687112134e-06, "loss": 0.7054, "step": 6490 }, { "epoch": 0.26, "learning_rate": 1.6928387623101681e-06, "loss": 0.676, "step": 6495 }, { "epoch": 0.26, "learning_rate": 1.692373868109926e-06, "loss": 0.7034, "step": 6500 }, { "epoch": 0.26, "learning_rate": 1.6919086863036003e-06, "loss": 0.6781, "step": 6505 }, { "epoch": 0.26, "learning_rate": 1.6914432170844233e-06, "loss": 0.7057, "step": 6510 }, { "epoch": 0.26, "learning_rate": 1.690977460645747e-06, "loss": 0.6929, "step": 6515 }, { "epoch": 0.26, "learning_rate": 1.6905114171810429e-06, "loss": 0.6816, "step": 6520 }, { "epoch": 0.27, "learning_rate": 1.6900450868839009e-06, "loss": 0.6845, "step": 6525 }, { "epoch": 0.27, "learning_rate": 1.6895784699480306e-06, "loss": 0.6665, "step": 6530 }, { "epoch": 0.27, "learning_rate": 1.6891115665672608e-06, "loss": 0.6597, "step": 6535 }, { "epoch": 0.27, "learning_rate": 1.6886443769355393e-06, "loss": 0.6623, "step": 6540 }, { "epoch": 0.27, "learning_rate": 1.688176901246932e-06, "loss": 0.6535, "step": 6545 }, { "epoch": 0.27, "learning_rate": 1.6877091396956247e-06, "loss": 0.6902, "step": 6550 }, { "epoch": 0.27, "learning_rate": 1.6872410924759215e-06, "loss": 0.6447, "step": 6555 }, { "epoch": 0.27, "learning_rate": 1.686772759782245e-06, "loss": 0.7013, "step": 6560 }, { "epoch": 0.27, "learning_rate": 1.6863041418091366e-06, "loss": 0.7158, "step": 6565 }, { "epoch": 0.27, "learning_rate": 1.685835238751256e-06, "loss": 0.7109, "step": 6570 }, { "epoch": 0.27, "learning_rate": 1.6853660508033816e-06, "loss": 0.7231, "step": 6575 }, { "epoch": 0.27, "learning_rate": 1.6848965781604099e-06, "loss": 0.7305, "step": 6580 }, { "epoch": 0.27, "learning_rate": 1.6844268210173556e-06, "loss": 0.7013, "step": 6585 }, { "epoch": 0.27, "learning_rate": 1.6839567795693524e-06, "loss": 0.6849, "step": 6590 }, { "epoch": 0.27, "learning_rate": 1.6834864540116506e-06, "loss": 0.7316, "step": 6595 }, { "epoch": 0.27, "learning_rate": 1.6830158445396196e-06, "loss": 0.7193, "step": 6600 }, { "epoch": 0.27, "learning_rate": 1.6825449513487466e-06, "loss": 0.7277, "step": 6605 }, { "epoch": 0.27, "learning_rate": 1.6820737746346368e-06, "loss": 0.7037, "step": 6610 }, { "epoch": 0.27, "learning_rate": 1.6816023145930123e-06, "loss": 0.7078, "step": 6615 }, { "epoch": 0.27, "learning_rate": 1.681130571419714e-06, "loss": 0.6651, "step": 6620 }, { "epoch": 0.27, "learning_rate": 1.6806585453106997e-06, "loss": 0.6851, "step": 6625 }, { "epoch": 0.27, "learning_rate": 1.680186236462045e-06, "loss": 0.6877, "step": 6630 }, { "epoch": 0.27, "learning_rate": 1.6797136450699427e-06, "loss": 0.6976, "step": 6635 }, { "epoch": 0.27, "learning_rate": 1.6792407713307036e-06, "loss": 0.7244, "step": 6640 }, { "epoch": 0.27, "learning_rate": 1.678767615440755e-06, "loss": 0.6734, "step": 6645 }, { "epoch": 0.27, "learning_rate": 1.6782941775966416e-06, "loss": 0.6733, "step": 6650 }, { "epoch": 0.27, "learning_rate": 1.6778204579950255e-06, "loss": 0.6836, "step": 6655 }, { "epoch": 0.27, "learning_rate": 1.6773464568326859e-06, "loss": 0.7101, "step": 6660 }, { "epoch": 0.27, "learning_rate": 1.6768721743065186e-06, "loss": 0.695, "step": 6665 }, { "epoch": 0.27, "learning_rate": 1.6763976106135366e-06, "loss": 0.6774, "step": 6670 }, { "epoch": 0.27, "learning_rate": 1.6759227659508692e-06, "loss": 0.674, "step": 6675 }, { "epoch": 0.27, "learning_rate": 1.6754476405157631e-06, "loss": 0.6887, "step": 6680 }, { "epoch": 0.27, "learning_rate": 1.674972234505581e-06, "loss": 0.689, "step": 6685 }, { "epoch": 0.27, "learning_rate": 1.6744965481178026e-06, "loss": 0.6877, "step": 6690 }, { "epoch": 0.27, "learning_rate": 1.6740205815500236e-06, "loss": 0.6872, "step": 6695 }, { "epoch": 0.27, "learning_rate": 1.673544334999957e-06, "loss": 0.6999, "step": 6700 }, { "epoch": 0.27, "learning_rate": 1.6730678086654306e-06, "loss": 0.6983, "step": 6705 }, { "epoch": 0.27, "learning_rate": 1.6725910027443902e-06, "loss": 0.6488, "step": 6710 }, { "epoch": 0.27, "learning_rate": 1.6721139174348964e-06, "loss": 0.6741, "step": 6715 }, { "epoch": 0.27, "learning_rate": 1.671636552935126e-06, "loss": 0.6809, "step": 6720 }, { "epoch": 0.27, "learning_rate": 1.6711589094433725e-06, "loss": 0.7326, "step": 6725 }, { "epoch": 0.27, "learning_rate": 1.6706809871580446e-06, "loss": 0.6757, "step": 6730 }, { "epoch": 0.27, "learning_rate": 1.670202786277667e-06, "loss": 0.6515, "step": 6735 }, { "epoch": 0.27, "learning_rate": 1.6697243070008805e-06, "loss": 0.6747, "step": 6740 }, { "epoch": 0.27, "learning_rate": 1.6692455495264413e-06, "loss": 0.6816, "step": 6745 }, { "epoch": 0.27, "learning_rate": 1.6687665140532209e-06, "loss": 0.6918, "step": 6750 }, { "epoch": 0.27, "learning_rate": 1.6682872007802062e-06, "loss": 0.7191, "step": 6755 }, { "epoch": 0.27, "learning_rate": 1.6678076099064999e-06, "loss": 0.6649, "step": 6760 }, { "epoch": 0.27, "learning_rate": 1.66732774163132e-06, "loss": 0.6889, "step": 6765 }, { "epoch": 0.27, "learning_rate": 1.666847596154e-06, "loss": 0.6917, "step": 6770 }, { "epoch": 0.28, "learning_rate": 1.6663671736739874e-06, "loss": 0.6717, "step": 6775 }, { "epoch": 0.28, "learning_rate": 1.665886474390846e-06, "loss": 0.7177, "step": 6780 }, { "epoch": 0.28, "learning_rate": 1.6654054985042538e-06, "loss": 0.6676, "step": 6785 }, { "epoch": 0.28, "learning_rate": 1.6649242462140044e-06, "loss": 0.708, "step": 6790 }, { "epoch": 0.28, "learning_rate": 1.6644427177200053e-06, "loss": 0.6958, "step": 6795 }, { "epoch": 0.28, "learning_rate": 1.66396091322228e-06, "loss": 0.6862, "step": 6800 }, { "epoch": 0.28, "learning_rate": 1.663478832920965e-06, "loss": 0.6462, "step": 6805 }, { "epoch": 0.28, "learning_rate": 1.6629964770163128e-06, "loss": 0.6668, "step": 6810 }, { "epoch": 0.28, "learning_rate": 1.6625138457086897e-06, "loss": 0.6926, "step": 6815 }, { "epoch": 0.28, "learning_rate": 1.6620309391985767e-06, "loss": 0.6942, "step": 6820 }, { "epoch": 0.28, "learning_rate": 1.661547757686569e-06, "loss": 0.6988, "step": 6825 }, { "epoch": 0.28, "learning_rate": 1.6610643013733756e-06, "loss": 0.6548, "step": 6830 }, { "epoch": 0.28, "learning_rate": 1.6605805704598206e-06, "loss": 0.6827, "step": 6835 }, { "epoch": 0.28, "learning_rate": 1.660096565146841e-06, "loss": 0.6939, "step": 6840 }, { "epoch": 0.28, "learning_rate": 1.6596122856354885e-06, "loss": 0.728, "step": 6845 }, { "epoch": 0.28, "learning_rate": 1.6591277321269295e-06, "loss": 0.6762, "step": 6850 }, { "epoch": 0.28, "learning_rate": 1.6586429048224422e-06, "loss": 0.6697, "step": 6855 }, { "epoch": 0.28, "learning_rate": 1.6581578039234203e-06, "loss": 0.7086, "step": 6860 }, { "epoch": 0.28, "learning_rate": 1.6576724296313697e-06, "loss": 0.7378, "step": 6865 }, { "epoch": 0.28, "learning_rate": 1.657186782147912e-06, "loss": 0.6861, "step": 6870 }, { "epoch": 0.28, "learning_rate": 1.6567008616747797e-06, "loss": 0.6749, "step": 6875 }, { "epoch": 0.28, "learning_rate": 1.6562146684138205e-06, "loss": 0.67, "step": 6880 }, { "epoch": 0.28, "learning_rate": 1.655728202566995e-06, "loss": 0.6561, "step": 6885 }, { "epoch": 0.28, "learning_rate": 1.6552414643363766e-06, "loss": 0.7061, "step": 6890 }, { "epoch": 0.28, "learning_rate": 1.6547544539241516e-06, "loss": 0.6779, "step": 6895 }, { "epoch": 0.28, "learning_rate": 1.6542671715326209e-06, "loss": 0.6927, "step": 6900 }, { "epoch": 0.28, "learning_rate": 1.653779617364197e-06, "loss": 0.6633, "step": 6905 }, { "epoch": 0.28, "learning_rate": 1.6532917916214055e-06, "loss": 0.6755, "step": 6910 }, { "epoch": 0.28, "learning_rate": 1.6528036945068852e-06, "loss": 0.6774, "step": 6915 }, { "epoch": 0.28, "learning_rate": 1.652315326223387e-06, "loss": 0.7033, "step": 6920 }, { "epoch": 0.28, "learning_rate": 1.6518266869737754e-06, "loss": 0.6823, "step": 6925 }, { "epoch": 0.28, "learning_rate": 1.6513377769610264e-06, "loss": 0.6952, "step": 6930 }, { "epoch": 0.28, "learning_rate": 1.6508485963882293e-06, "loss": 0.676, "step": 6935 }, { "epoch": 0.28, "learning_rate": 1.650359145458585e-06, "loss": 0.7026, "step": 6940 }, { "epoch": 0.28, "learning_rate": 1.6498694243754075e-06, "loss": 0.6872, "step": 6945 }, { "epoch": 0.28, "learning_rate": 1.6493794333421228e-06, "loss": 0.6846, "step": 6950 }, { "epoch": 0.28, "learning_rate": 1.6488891725622688e-06, "loss": 0.6825, "step": 6955 }, { "epoch": 0.28, "learning_rate": 1.6483986422394955e-06, "loss": 0.723, "step": 6960 }, { "epoch": 0.28, "learning_rate": 1.6479078425775653e-06, "loss": 0.7463, "step": 6965 }, { "epoch": 0.28, "learning_rate": 1.6474167737803514e-06, "loss": 0.7006, "step": 6970 }, { "epoch": 0.28, "learning_rate": 1.64692543605184e-06, "loss": 0.6745, "step": 6975 }, { "epoch": 0.28, "learning_rate": 1.6464338295961283e-06, "loss": 0.6935, "step": 6980 }, { "epoch": 0.28, "learning_rate": 1.6459419546174253e-06, "loss": 0.7056, "step": 6985 }, { "epoch": 0.28, "learning_rate": 1.6454498113200521e-06, "loss": 0.6763, "step": 6990 }, { "epoch": 0.28, "learning_rate": 1.6449573999084404e-06, "loss": 0.68, "step": 6995 }, { "epoch": 0.28, "learning_rate": 1.6444647205871332e-06, "loss": 0.6595, "step": 7000 }, { "epoch": 0.28, "learning_rate": 1.6439717735607856e-06, "loss": 0.6996, "step": 7005 }, { "epoch": 0.28, "learning_rate": 1.643478559034164e-06, "loss": 0.7008, "step": 7010 }, { "epoch": 0.28, "learning_rate": 1.6429850772121446e-06, "loss": 0.6907, "step": 7015 }, { "epoch": 0.29, "learning_rate": 1.6424913282997159e-06, "loss": 0.6627, "step": 7020 }, { "epoch": 0.29, "learning_rate": 1.641997312501977e-06, "loss": 0.6939, "step": 7025 }, { "epoch": 0.29, "learning_rate": 1.6415030300241371e-06, "loss": 0.7002, "step": 7030 }, { "epoch": 0.29, "learning_rate": 1.6410084810715177e-06, "loss": 0.7098, "step": 7035 }, { "epoch": 0.29, "learning_rate": 1.6405136658495496e-06, "loss": 0.7114, "step": 7040 }, { "epoch": 0.29, "learning_rate": 1.640018584563775e-06, "loss": 0.6951, "step": 7045 }, { "epoch": 0.29, "learning_rate": 1.639523237419846e-06, "loss": 0.7112, "step": 7050 }, { "epoch": 0.29, "learning_rate": 1.6390276246235257e-06, "loss": 0.6827, "step": 7055 }, { "epoch": 0.29, "learning_rate": 1.6385317463806878e-06, "loss": 0.7133, "step": 7060 }, { "epoch": 0.29, "learning_rate": 1.6380356028973152e-06, "loss": 0.6766, "step": 7065 }, { "epoch": 0.29, "learning_rate": 1.6375391943795015e-06, "loss": 0.6767, "step": 7070 }, { "epoch": 0.29, "learning_rate": 1.6370425210334514e-06, "loss": 0.6731, "step": 7075 }, { "epoch": 0.29, "learning_rate": 1.6365455830654775e-06, "loss": 0.6974, "step": 7080 }, { "epoch": 0.29, "learning_rate": 1.6360483806820043e-06, "loss": 0.6972, "step": 7085 }, { "epoch": 0.29, "learning_rate": 1.635550914089565e-06, "loss": 0.6974, "step": 7090 }, { "epoch": 0.29, "learning_rate": 1.635053183494803e-06, "loss": 0.6649, "step": 7095 }, { "epoch": 0.29, "learning_rate": 1.6345551891044713e-06, "loss": 0.6754, "step": 7100 }, { "epoch": 0.29, "learning_rate": 1.6340569311254323e-06, "loss": 0.6877, "step": 7105 }, { "epoch": 0.29, "learning_rate": 1.6335584097646585e-06, "loss": 0.6841, "step": 7110 }, { "epoch": 0.29, "learning_rate": 1.6330596252292309e-06, "loss": 0.6675, "step": 7115 }, { "epoch": 0.29, "learning_rate": 1.63256057772634e-06, "loss": 0.6743, "step": 7120 }, { "epoch": 0.29, "learning_rate": 1.6320612674632864e-06, "loss": 0.7045, "step": 7125 }, { "epoch": 0.29, "learning_rate": 1.631561694647479e-06, "loss": 0.6891, "step": 7130 }, { "epoch": 0.29, "learning_rate": 1.6310618594864355e-06, "loss": 0.6722, "step": 7135 }, { "epoch": 0.29, "learning_rate": 1.6305617621877841e-06, "loss": 0.7012, "step": 7140 }, { "epoch": 0.29, "learning_rate": 1.6300614029592602e-06, "loss": 0.665, "step": 7145 }, { "epoch": 0.29, "learning_rate": 1.6295607820087084e-06, "loss": 0.6967, "step": 7150 }, { "epoch": 0.29, "learning_rate": 1.6290598995440835e-06, "loss": 0.7089, "step": 7155 }, { "epoch": 0.29, "learning_rate": 1.628558755773446e-06, "loss": 0.7176, "step": 7160 }, { "epoch": 0.29, "learning_rate": 1.6280573509049679e-06, "loss": 0.6847, "step": 7165 }, { "epoch": 0.29, "learning_rate": 1.6275556851469284e-06, "loss": 0.6968, "step": 7170 }, { "epoch": 0.29, "learning_rate": 1.6270537587077145e-06, "loss": 0.6586, "step": 7175 }, { "epoch": 0.29, "learning_rate": 1.6265515717958222e-06, "loss": 0.7039, "step": 7180 }, { "epoch": 0.29, "learning_rate": 1.6260491246198563e-06, "loss": 0.725, "step": 7185 }, { "epoch": 0.29, "learning_rate": 1.6255464173885275e-06, "loss": 0.7166, "step": 7190 }, { "epoch": 0.29, "learning_rate": 1.6250434503106578e-06, "loss": 0.67, "step": 7195 }, { "epoch": 0.29, "learning_rate": 1.6245402235951742e-06, "loss": 0.6697, "step": 7200 }, { "epoch": 0.29, "eval_loss": 0.6546275019645691, "eval_runtime": 138.8622, "eval_samples_per_second": 17.038, "eval_steps_per_second": 2.845, "step": 7200 }, { "epoch": 0.29, "learning_rate": 1.624036737451113e-06, "loss": 0.6907, "step": 7205 }, { "epoch": 0.29, "learning_rate": 1.623532992087618e-06, "loss": 0.7066, "step": 7210 }, { "epoch": 0.29, "learning_rate": 1.6230289877139403e-06, "loss": 0.7354, "step": 7215 }, { "epoch": 0.29, "learning_rate": 1.6225247245394393e-06, "loss": 0.672, "step": 7220 }, { "epoch": 0.29, "learning_rate": 1.622020202773582e-06, "loss": 0.676, "step": 7225 }, { "epoch": 0.29, "learning_rate": 1.6215154226259414e-06, "loss": 0.6798, "step": 7230 }, { "epoch": 0.29, "learning_rate": 1.621010384306199e-06, "loss": 0.689, "step": 7235 }, { "epoch": 0.29, "learning_rate": 1.620505088024144e-06, "loss": 0.7003, "step": 7240 }, { "epoch": 0.29, "learning_rate": 1.619999533989671e-06, "loss": 0.6669, "step": 7245 }, { "epoch": 0.29, "learning_rate": 1.6194937224127837e-06, "loss": 0.6849, "step": 7250 }, { "epoch": 0.29, "learning_rate": 1.6189876535035919e-06, "loss": 0.6997, "step": 7255 }, { "epoch": 0.29, "learning_rate": 1.6184813274723113e-06, "loss": 0.7292, "step": 7260 }, { "epoch": 0.3, "learning_rate": 1.6179747445292659e-06, "loss": 0.6555, "step": 7265 }, { "epoch": 0.3, "learning_rate": 1.6174679048848856e-06, "loss": 0.672, "step": 7270 }, { "epoch": 0.3, "learning_rate": 1.6169608087497077e-06, "loss": 0.6803, "step": 7275 }, { "epoch": 0.3, "learning_rate": 1.6164534563343752e-06, "loss": 0.6701, "step": 7280 }, { "epoch": 0.3, "learning_rate": 1.615945847849638e-06, "loss": 0.6493, "step": 7285 }, { "epoch": 0.3, "learning_rate": 1.615437983506352e-06, "loss": 0.7276, "step": 7290 }, { "epoch": 0.3, "learning_rate": 1.6149298635154795e-06, "loss": 0.6832, "step": 7295 }, { "epoch": 0.3, "learning_rate": 1.6144214880880895e-06, "loss": 0.6981, "step": 7300 }, { "epoch": 0.3, "learning_rate": 1.6139128574353568e-06, "loss": 0.6715, "step": 7305 }, { "epoch": 0.3, "learning_rate": 1.613403971768562e-06, "loss": 0.6732, "step": 7310 }, { "epoch": 0.3, "learning_rate": 1.6128948312990916e-06, "loss": 0.6906, "step": 7315 }, { "epoch": 0.3, "learning_rate": 1.6123854362384384e-06, "loss": 0.682, "step": 7320 }, { "epoch": 0.3, "learning_rate": 1.6118757867982002e-06, "loss": 0.701, "step": 7325 }, { "epoch": 0.3, "learning_rate": 1.6113658831900816e-06, "loss": 0.6969, "step": 7330 }, { "epoch": 0.3, "learning_rate": 1.6108557256258916e-06, "loss": 0.6774, "step": 7335 }, { "epoch": 0.3, "learning_rate": 1.6103453143175458e-06, "loss": 0.6849, "step": 7340 }, { "epoch": 0.3, "learning_rate": 1.6098346494770642e-06, "loss": 0.6714, "step": 7345 }, { "epoch": 0.3, "learning_rate": 1.6093237313165722e-06, "loss": 0.6781, "step": 7350 }, { "epoch": 0.3, "learning_rate": 1.6088125600483014e-06, "loss": 0.657, "step": 7355 }, { "epoch": 0.3, "learning_rate": 1.6083011358845878e-06, "loss": 0.6867, "step": 7360 }, { "epoch": 0.3, "learning_rate": 1.6077894590378722e-06, "loss": 0.6977, "step": 7365 }, { "epoch": 0.3, "learning_rate": 1.607277529720701e-06, "loss": 0.6775, "step": 7370 }, { "epoch": 0.3, "learning_rate": 1.6067653481457251e-06, "loss": 0.6962, "step": 7375 }, { "epoch": 0.3, "learning_rate": 1.6062529145257e-06, "loss": 0.6801, "step": 7380 }, { "epoch": 0.3, "learning_rate": 1.6057402290734867e-06, "loss": 0.6917, "step": 7385 }, { "epoch": 0.3, "learning_rate": 1.6052272920020502e-06, "loss": 0.6858, "step": 7390 }, { "epoch": 0.3, "learning_rate": 1.6047141035244596e-06, "loss": 0.689, "step": 7395 }, { "epoch": 0.3, "learning_rate": 1.6042006638538893e-06, "loss": 0.684, "step": 7400 }, { "epoch": 0.3, "learning_rate": 1.6036869732036175e-06, "loss": 0.6685, "step": 7405 }, { "epoch": 0.3, "learning_rate": 1.603173031787027e-06, "loss": 0.7001, "step": 7410 }, { "epoch": 0.3, "learning_rate": 1.602658839817605e-06, "loss": 0.6947, "step": 7415 }, { "epoch": 0.3, "learning_rate": 1.6021443975089415e-06, "loss": 0.6384, "step": 7420 }, { "epoch": 0.3, "learning_rate": 1.601629705074732e-06, "loss": 0.6828, "step": 7425 }, { "epoch": 0.3, "learning_rate": 1.6011147627287746e-06, "loss": 0.6856, "step": 7430 }, { "epoch": 0.3, "learning_rate": 1.6005995706849726e-06, "loss": 0.6655, "step": 7435 }, { "epoch": 0.3, "learning_rate": 1.6000841291573322e-06, "loss": 0.7021, "step": 7440 }, { "epoch": 0.3, "learning_rate": 1.599568438359963e-06, "loss": 0.6889, "step": 7445 }, { "epoch": 0.3, "learning_rate": 1.5990524985070785e-06, "loss": 0.6917, "step": 7450 }, { "epoch": 0.3, "learning_rate": 1.598536309812996e-06, "loss": 0.7008, "step": 7455 }, { "epoch": 0.3, "learning_rate": 1.598019872492135e-06, "loss": 0.6614, "step": 7460 }, { "epoch": 0.3, "learning_rate": 1.59750318675902e-06, "loss": 0.6887, "step": 7465 }, { "epoch": 0.3, "learning_rate": 1.5969862528282771e-06, "loss": 0.6733, "step": 7470 }, { "epoch": 0.3, "learning_rate": 1.5964690709146367e-06, "loss": 0.6935, "step": 7475 }, { "epoch": 0.3, "learning_rate": 1.5959516412329314e-06, "loss": 0.6731, "step": 7480 }, { "epoch": 0.3, "learning_rate": 1.5954339639980967e-06, "loss": 0.6846, "step": 7485 }, { "epoch": 0.3, "learning_rate": 1.5949160394251718e-06, "loss": 0.7281, "step": 7490 }, { "epoch": 0.3, "learning_rate": 1.5943978677292976e-06, "loss": 0.7238, "step": 7495 }, { "epoch": 0.3, "learning_rate": 1.593879449125718e-06, "loss": 0.6682, "step": 7500 }, { "epoch": 0.3, "learning_rate": 1.5933607838297804e-06, "loss": 0.6864, "step": 7505 }, { "epoch": 0.31, "learning_rate": 1.5928418720569332e-06, "loss": 0.6775, "step": 7510 }, { "epoch": 0.31, "learning_rate": 1.5923227140227278e-06, "loss": 0.7329, "step": 7515 }, { "epoch": 0.31, "learning_rate": 1.5918033099428182e-06, "loss": 0.6964, "step": 7520 }, { "epoch": 0.31, "learning_rate": 1.59128366003296e-06, "loss": 0.6862, "step": 7525 }, { "epoch": 0.31, "learning_rate": 1.5907637645090117e-06, "loss": 0.6552, "step": 7530 }, { "epoch": 0.31, "learning_rate": 1.5902436235869333e-06, "loss": 0.6738, "step": 7535 }, { "epoch": 0.31, "learning_rate": 1.5897232374827862e-06, "loss": 0.7062, "step": 7540 }, { "epoch": 0.31, "learning_rate": 1.589202606412735e-06, "loss": 0.6975, "step": 7545 }, { "epoch": 0.31, "learning_rate": 1.5886817305930452e-06, "loss": 0.6795, "step": 7550 }, { "epoch": 0.31, "learning_rate": 1.5881606102400836e-06, "loss": 0.6633, "step": 7555 }, { "epoch": 0.31, "learning_rate": 1.5876392455703198e-06, "loss": 0.6959, "step": 7560 }, { "epoch": 0.31, "learning_rate": 1.5871176368003231e-06, "loss": 0.6719, "step": 7565 }, { "epoch": 0.31, "learning_rate": 1.5865957841467666e-06, "loss": 0.7142, "step": 7570 }, { "epoch": 0.31, "learning_rate": 1.5860736878264222e-06, "loss": 0.6729, "step": 7575 }, { "epoch": 0.31, "learning_rate": 1.5855513480561649e-06, "loss": 0.6777, "step": 7580 }, { "epoch": 0.31, "learning_rate": 1.5850287650529698e-06, "loss": 0.6729, "step": 7585 }, { "epoch": 0.31, "learning_rate": 1.5845059390339134e-06, "loss": 0.7104, "step": 7590 }, { "epoch": 0.31, "learning_rate": 1.5839828702161727e-06, "loss": 0.6844, "step": 7595 }, { "epoch": 0.31, "learning_rate": 1.5834595588170266e-06, "loss": 0.6837, "step": 7600 }, { "epoch": 0.31, "learning_rate": 1.5829360050538537e-06, "loss": 0.7076, "step": 7605 }, { "epoch": 0.31, "learning_rate": 1.5824122091441337e-06, "loss": 0.6906, "step": 7610 }, { "epoch": 0.31, "learning_rate": 1.5818881713054469e-06, "loss": 0.7267, "step": 7615 }, { "epoch": 0.31, "learning_rate": 1.5813638917554742e-06, "loss": 0.7293, "step": 7620 }, { "epoch": 0.31, "learning_rate": 1.5808393707119967e-06, "loss": 0.693, "step": 7625 }, { "epoch": 0.31, "learning_rate": 1.5803146083928956e-06, "loss": 0.7122, "step": 7630 }, { "epoch": 0.31, "learning_rate": 1.579789605016153e-06, "loss": 0.6895, "step": 7635 }, { "epoch": 0.31, "learning_rate": 1.5792643607998506e-06, "loss": 0.6916, "step": 7640 }, { "epoch": 0.31, "learning_rate": 1.57873887596217e-06, "loss": 0.6745, "step": 7645 }, { "epoch": 0.31, "learning_rate": 1.5782131507213934e-06, "loss": 0.7242, "step": 7650 }, { "epoch": 0.31, "learning_rate": 1.5776871852959026e-06, "loss": 0.714, "step": 7655 }, { "epoch": 0.31, "learning_rate": 1.5771609799041788e-06, "loss": 0.7086, "step": 7660 }, { "epoch": 0.31, "learning_rate": 1.576634534764803e-06, "loss": 0.738, "step": 7665 }, { "epoch": 0.31, "learning_rate": 1.5761078500964562e-06, "loss": 0.669, "step": 7670 }, { "epoch": 0.31, "learning_rate": 1.5755809261179185e-06, "loss": 0.6587, "step": 7675 }, { "epoch": 0.31, "learning_rate": 1.5750537630480696e-06, "loss": 0.676, "step": 7680 }, { "epoch": 0.31, "learning_rate": 1.5745263611058886e-06, "loss": 0.6838, "step": 7685 }, { "epoch": 0.31, "learning_rate": 1.5739987205104535e-06, "loss": 0.6182, "step": 7690 }, { "epoch": 0.31, "learning_rate": 1.5734708414809415e-06, "loss": 0.6497, "step": 7695 }, { "epoch": 0.31, "learning_rate": 1.572942724236629e-06, "loss": 0.6681, "step": 7700 }, { "epoch": 0.31, "learning_rate": 1.5724143689968915e-06, "loss": 0.7043, "step": 7705 }, { "epoch": 0.31, "learning_rate": 1.5718857759812033e-06, "loss": 0.6924, "step": 7710 }, { "epoch": 0.31, "learning_rate": 1.571356945409137e-06, "loss": 0.6852, "step": 7715 }, { "epoch": 0.31, "learning_rate": 1.570827877500364e-06, "loss": 0.7008, "step": 7720 }, { "epoch": 0.31, "learning_rate": 1.5702985724746552e-06, "loss": 0.7029, "step": 7725 }, { "epoch": 0.31, "learning_rate": 1.5697690305518787e-06, "loss": 0.6684, "step": 7730 }, { "epoch": 0.31, "learning_rate": 1.569239251952002e-06, "loss": 0.7089, "step": 7735 }, { "epoch": 0.31, "learning_rate": 1.5687092368950908e-06, "loss": 0.6756, "step": 7740 }, { "epoch": 0.31, "learning_rate": 1.5681789856013076e-06, "loss": 0.6874, "step": 7745 }, { "epoch": 0.31, "learning_rate": 1.5676484982909154e-06, "loss": 0.7055, "step": 7750 }, { "epoch": 0.31, "learning_rate": 1.5671177751842733e-06, "loss": 0.7159, "step": 7755 }, { "epoch": 0.32, "learning_rate": 1.5665868165018395e-06, "loss": 0.6968, "step": 7760 }, { "epoch": 0.32, "learning_rate": 1.566055622464169e-06, "loss": 0.6533, "step": 7765 }, { "epoch": 0.32, "learning_rate": 1.565524193291916e-06, "loss": 0.6589, "step": 7770 }, { "epoch": 0.32, "learning_rate": 1.564992529205831e-06, "loss": 0.7165, "step": 7775 }, { "epoch": 0.32, "learning_rate": 1.5644606304267627e-06, "loss": 0.6588, "step": 7780 }, { "epoch": 0.32, "learning_rate": 1.5639284971756574e-06, "loss": 0.7004, "step": 7785 }, { "epoch": 0.32, "learning_rate": 1.5633961296735585e-06, "loss": 0.7019, "step": 7790 }, { "epoch": 0.32, "learning_rate": 1.562863528141607e-06, "loss": 0.6711, "step": 7795 }, { "epoch": 0.32, "learning_rate": 1.5623306928010408e-06, "loss": 0.6558, "step": 7800 }, { "epoch": 0.32, "learning_rate": 1.561797623873195e-06, "loss": 0.7162, "step": 7805 }, { "epoch": 0.32, "learning_rate": 1.5612643215795017e-06, "loss": 0.6986, "step": 7810 }, { "epoch": 0.32, "learning_rate": 1.5607307861414905e-06, "loss": 0.717, "step": 7815 }, { "epoch": 0.32, "learning_rate": 1.560197017780787e-06, "loss": 0.7249, "step": 7820 }, { "epoch": 0.32, "learning_rate": 1.5596630167191138e-06, "loss": 0.6965, "step": 7825 }, { "epoch": 0.32, "learning_rate": 1.5591287831782908e-06, "loss": 0.679, "step": 7830 }, { "epoch": 0.32, "learning_rate": 1.5585943173802333e-06, "loss": 0.6812, "step": 7835 }, { "epoch": 0.32, "learning_rate": 1.5580596195469547e-06, "loss": 0.6957, "step": 7840 }, { "epoch": 0.32, "learning_rate": 1.5575246899005629e-06, "loss": 0.6792, "step": 7845 }, { "epoch": 0.32, "learning_rate": 1.5569895286632634e-06, "loss": 0.6675, "step": 7850 }, { "epoch": 0.32, "learning_rate": 1.5564541360573578e-06, "loss": 0.6931, "step": 7855 }, { "epoch": 0.32, "learning_rate": 1.5559185123052427e-06, "loss": 0.7044, "step": 7860 }, { "epoch": 0.32, "learning_rate": 1.5553826576294127e-06, "loss": 0.6978, "step": 7865 }, { "epoch": 0.32, "learning_rate": 1.5548465722524561e-06, "loss": 0.693, "step": 7870 }, { "epoch": 0.32, "learning_rate": 1.554310256397059e-06, "loss": 0.7044, "step": 7875 }, { "epoch": 0.32, "learning_rate": 1.5537737102860015e-06, "loss": 0.6841, "step": 7880 }, { "epoch": 0.32, "learning_rate": 1.5532369341421609e-06, "loss": 0.674, "step": 7885 }, { "epoch": 0.32, "learning_rate": 1.5526999281885088e-06, "loss": 0.6544, "step": 7890 }, { "epoch": 0.32, "learning_rate": 1.552162692648113e-06, "loss": 0.6951, "step": 7895 }, { "epoch": 0.32, "learning_rate": 1.551625227744137e-06, "loss": 0.6969, "step": 7900 }, { "epoch": 0.32, "learning_rate": 1.5510875336998382e-06, "loss": 0.6827, "step": 7905 }, { "epoch": 0.32, "learning_rate": 1.5505496107385704e-06, "loss": 0.6296, "step": 7910 }, { "epoch": 0.32, "learning_rate": 1.550011459083782e-06, "loss": 0.6884, "step": 7915 }, { "epoch": 0.32, "learning_rate": 1.549473078959017e-06, "loss": 0.6726, "step": 7920 }, { "epoch": 0.32, "learning_rate": 1.548934470587913e-06, "loss": 0.6732, "step": 7925 }, { "epoch": 0.32, "learning_rate": 1.548395634194204e-06, "loss": 0.6986, "step": 7930 }, { "epoch": 0.32, "learning_rate": 1.5478565700017174e-06, "loss": 0.7457, "step": 7935 }, { "epoch": 0.32, "learning_rate": 1.547317278234376e-06, "loss": 0.713, "step": 7940 }, { "epoch": 0.32, "learning_rate": 1.5467777591161973e-06, "loss": 0.6842, "step": 7945 }, { "epoch": 0.32, "learning_rate": 1.5462380128712921e-06, "loss": 0.7125, "step": 7950 }, { "epoch": 0.32, "learning_rate": 1.545698039723867e-06, "loss": 0.7007, "step": 7955 }, { "epoch": 0.32, "learning_rate": 1.5451578398982216e-06, "loss": 0.6896, "step": 7960 }, { "epoch": 0.32, "learning_rate": 1.5446174136187503e-06, "loss": 0.7371, "step": 7965 }, { "epoch": 0.32, "learning_rate": 1.544076761109942e-06, "loss": 0.6922, "step": 7970 }, { "epoch": 0.32, "learning_rate": 1.5435358825963784e-06, "loss": 0.7154, "step": 7975 }, { "epoch": 0.32, "learning_rate": 1.542994778302736e-06, "loss": 0.7194, "step": 7980 }, { "epoch": 0.32, "learning_rate": 1.5424534484537847e-06, "loss": 0.6813, "step": 7985 }, { "epoch": 0.32, "learning_rate": 1.5419118932743883e-06, "loss": 0.7203, "step": 7990 }, { "epoch": 0.32, "learning_rate": 1.5413701129895045e-06, "loss": 0.6675, "step": 7995 }, { "epoch": 0.32, "learning_rate": 1.5408281078241835e-06, "loss": 0.7152, "step": 8000 }, { "epoch": 0.32, "eval_loss": 0.6539024114608765, "eval_runtime": 138.566, "eval_samples_per_second": 17.075, "eval_steps_per_second": 2.851, "step": 8000 }, { "epoch": 0.33, "learning_rate": 1.5402858780035697e-06, "loss": 0.6859, "step": 8005 }, { "epoch": 0.33, "learning_rate": 1.5397434237529012e-06, "loss": 0.7315, "step": 8010 }, { "epoch": 0.33, "learning_rate": 1.5392007452975077e-06, "loss": 0.7081, "step": 8015 }, { "epoch": 0.33, "learning_rate": 1.5386578428628142e-06, "loss": 0.682, "step": 8020 }, { "epoch": 0.33, "learning_rate": 1.5381147166743369e-06, "loss": 0.6741, "step": 8025 }, { "epoch": 0.33, "learning_rate": 1.5375713669576857e-06, "loss": 0.6979, "step": 8030 }, { "epoch": 0.33, "learning_rate": 1.5370277939385644e-06, "loss": 0.6927, "step": 8035 }, { "epoch": 0.33, "learning_rate": 1.536483997842767e-06, "loss": 0.6856, "step": 8040 }, { "epoch": 0.33, "learning_rate": 1.5359399788961826e-06, "loss": 0.6683, "step": 8045 }, { "epoch": 0.33, "learning_rate": 1.5353957373247917e-06, "loss": 0.6646, "step": 8050 }, { "epoch": 0.33, "learning_rate": 1.5348512733546674e-06, "loss": 0.6684, "step": 8055 }, { "epoch": 0.33, "learning_rate": 1.5343065872119759e-06, "loss": 0.6741, "step": 8060 }, { "epoch": 0.33, "learning_rate": 1.5337616791229744e-06, "loss": 0.6936, "step": 8065 }, { "epoch": 0.33, "learning_rate": 1.5332165493140133e-06, "loss": 0.6646, "step": 8070 }, { "epoch": 0.33, "learning_rate": 1.5326711980115343e-06, "loss": 0.6936, "step": 8075 }, { "epoch": 0.33, "learning_rate": 1.5321256254420724e-06, "loss": 0.7009, "step": 8080 }, { "epoch": 0.33, "learning_rate": 1.5315798318322532e-06, "loss": 0.7183, "step": 8085 }, { "epoch": 0.33, "learning_rate": 1.5310338174087946e-06, "loss": 0.6853, "step": 8090 }, { "epoch": 0.33, "learning_rate": 1.5304875823985066e-06, "loss": 0.6851, "step": 8095 }, { "epoch": 0.33, "learning_rate": 1.5299411270282898e-06, "loss": 0.6607, "step": 8100 }, { "epoch": 0.33, "learning_rate": 1.5293944515251376e-06, "loss": 0.6715, "step": 8105 }, { "epoch": 0.33, "learning_rate": 1.5288475561161342e-06, "loss": 0.6525, "step": 8110 }, { "epoch": 0.33, "learning_rate": 1.5283004410284549e-06, "loss": 0.6958, "step": 8115 }, { "epoch": 0.33, "learning_rate": 1.5277531064893669e-06, "loss": 0.7071, "step": 8120 }, { "epoch": 0.33, "learning_rate": 1.5272055527262278e-06, "loss": 0.683, "step": 8125 }, { "epoch": 0.33, "learning_rate": 1.526657779966487e-06, "loss": 0.71, "step": 8130 }, { "epoch": 0.33, "learning_rate": 1.5261097884376848e-06, "loss": 0.683, "step": 8135 }, { "epoch": 0.33, "learning_rate": 1.5255615783674512e-06, "loss": 0.6911, "step": 8140 }, { "epoch": 0.33, "learning_rate": 1.5250131499835088e-06, "loss": 0.7089, "step": 8145 }, { "epoch": 0.33, "learning_rate": 1.5244645035136694e-06, "loss": 0.6528, "step": 8150 }, { "epoch": 0.33, "learning_rate": 1.5239156391858363e-06, "loss": 0.6626, "step": 8155 }, { "epoch": 0.33, "learning_rate": 1.523366557228003e-06, "loss": 0.6674, "step": 8160 }, { "epoch": 0.33, "learning_rate": 1.5228172578682531e-06, "loss": 0.6858, "step": 8165 }, { "epoch": 0.33, "learning_rate": 1.5222677413347612e-06, "loss": 0.6944, "step": 8170 }, { "epoch": 0.33, "learning_rate": 1.521718007855791e-06, "loss": 0.6623, "step": 8175 }, { "epoch": 0.33, "learning_rate": 1.5211680576596976e-06, "loss": 0.6764, "step": 8180 }, { "epoch": 0.33, "learning_rate": 1.5206178909749254e-06, "loss": 0.7022, "step": 8185 }, { "epoch": 0.33, "learning_rate": 1.5200675080300086e-06, "loss": 0.6437, "step": 8190 }, { "epoch": 0.33, "learning_rate": 1.519516909053572e-06, "loss": 0.6888, "step": 8195 }, { "epoch": 0.33, "learning_rate": 1.518966094274329e-06, "loss": 0.6837, "step": 8200 }, { "epoch": 0.33, "learning_rate": 1.518415063921084e-06, "loss": 0.7228, "step": 8205 }, { "epoch": 0.33, "learning_rate": 1.5178638182227292e-06, "loss": 0.6647, "step": 8210 }, { "epoch": 0.33, "learning_rate": 1.5173123574082482e-06, "loss": 0.6992, "step": 8215 }, { "epoch": 0.33, "learning_rate": 1.5167606817067129e-06, "loss": 0.6828, "step": 8220 }, { "epoch": 0.33, "learning_rate": 1.5162087913472844e-06, "loss": 0.7303, "step": 8225 }, { "epoch": 0.33, "learning_rate": 1.5156566865592128e-06, "loss": 0.6794, "step": 8230 }, { "epoch": 0.33, "learning_rate": 1.5151043675718383e-06, "loss": 0.6442, "step": 8235 }, { "epoch": 0.33, "learning_rate": 1.5145518346145887e-06, "loss": 0.6669, "step": 8240 }, { "epoch": 0.33, "learning_rate": 1.5139990879169822e-06, "loss": 0.6902, "step": 8245 }, { "epoch": 0.34, "learning_rate": 1.513446127708624e-06, "loss": 0.6503, "step": 8250 }, { "epoch": 0.34, "learning_rate": 1.5128929542192102e-06, "loss": 0.6971, "step": 8255 }, { "epoch": 0.34, "learning_rate": 1.512339567678523e-06, "loss": 0.7052, "step": 8260 }, { "epoch": 0.34, "learning_rate": 1.511785968316435e-06, "loss": 0.6673, "step": 8265 }, { "epoch": 0.34, "learning_rate": 1.5112321563629066e-06, "loss": 0.6816, "step": 8270 }, { "epoch": 0.34, "learning_rate": 1.5106781320479862e-06, "loss": 0.6708, "step": 8275 }, { "epoch": 0.34, "learning_rate": 1.5101238956018109e-06, "loss": 0.7032, "step": 8280 }, { "epoch": 0.34, "learning_rate": 1.5095694472546058e-06, "loss": 0.7008, "step": 8285 }, { "epoch": 0.34, "learning_rate": 1.5090147872366835e-06, "loss": 0.719, "step": 8290 }, { "epoch": 0.34, "learning_rate": 1.5084599157784453e-06, "loss": 0.7094, "step": 8295 }, { "epoch": 0.34, "learning_rate": 1.5079048331103797e-06, "loss": 0.7306, "step": 8300 }, { "epoch": 0.34, "learning_rate": 1.5073495394630636e-06, "loss": 0.7004, "step": 8305 }, { "epoch": 0.34, "learning_rate": 1.5067940350671606e-06, "loss": 0.7107, "step": 8310 }, { "epoch": 0.34, "learning_rate": 1.506238320153423e-06, "loss": 0.7033, "step": 8315 }, { "epoch": 0.34, "learning_rate": 1.5056823949526898e-06, "loss": 0.6855, "step": 8320 }, { "epoch": 0.34, "learning_rate": 1.5051262596958872e-06, "loss": 0.7119, "step": 8325 }, { "epoch": 0.34, "learning_rate": 1.5045699146140289e-06, "loss": 0.6853, "step": 8330 }, { "epoch": 0.34, "learning_rate": 1.5040133599382162e-06, "loss": 0.71, "step": 8335 }, { "epoch": 0.34, "learning_rate": 1.503456595899637e-06, "loss": 0.6527, "step": 8340 }, { "epoch": 0.34, "learning_rate": 1.5028996227295664e-06, "loss": 0.6692, "step": 8345 }, { "epoch": 0.34, "learning_rate": 1.5023424406593654e-06, "loss": 0.6568, "step": 8350 }, { "epoch": 0.34, "learning_rate": 1.5017850499204835e-06, "loss": 0.658, "step": 8355 }, { "epoch": 0.34, "learning_rate": 1.501227450744455e-06, "loss": 0.6895, "step": 8360 }, { "epoch": 0.34, "learning_rate": 1.5006696433629032e-06, "loss": 0.7062, "step": 8365 }, { "epoch": 0.34, "learning_rate": 1.5001116280075353e-06, "loss": 0.6883, "step": 8370 }, { "epoch": 0.34, "learning_rate": 1.499553404910146e-06, "loss": 0.666, "step": 8375 }, { "epoch": 0.34, "learning_rate": 1.4989949743026169e-06, "loss": 0.709, "step": 8380 }, { "epoch": 0.34, "learning_rate": 1.4984363364169145e-06, "loss": 0.7225, "step": 8385 }, { "epoch": 0.34, "learning_rate": 1.4978774914850933e-06, "loss": 0.6657, "step": 8390 }, { "epoch": 0.34, "learning_rate": 1.4973184397392915e-06, "loss": 0.6774, "step": 8395 }, { "epoch": 0.34, "learning_rate": 1.4967591814117347e-06, "loss": 0.6617, "step": 8400 }, { "epoch": 0.34, "learning_rate": 1.496199716734734e-06, "loss": 0.669, "step": 8405 }, { "epoch": 0.34, "learning_rate": 1.4956400459406862e-06, "loss": 0.6798, "step": 8410 }, { "epoch": 0.34, "learning_rate": 1.4950801692620735e-06, "loss": 0.6421, "step": 8415 }, { "epoch": 0.34, "learning_rate": 1.494520086931464e-06, "loss": 0.6742, "step": 8420 }, { "epoch": 0.34, "learning_rate": 1.4939597991815107e-06, "loss": 0.6478, "step": 8425 }, { "epoch": 0.34, "learning_rate": 1.493399306244953e-06, "loss": 0.6817, "step": 8430 }, { "epoch": 0.34, "learning_rate": 1.492838608354614e-06, "loss": 0.7469, "step": 8435 }, { "epoch": 0.34, "learning_rate": 1.4922777057434031e-06, "loss": 0.6713, "step": 8440 }, { "epoch": 0.34, "learning_rate": 1.4917165986443142e-06, "loss": 0.7093, "step": 8445 }, { "epoch": 0.34, "learning_rate": 1.4911552872904266e-06, "loss": 0.6825, "step": 8450 }, { "epoch": 0.34, "learning_rate": 1.4905937719149035e-06, "loss": 0.664, "step": 8455 }, { "epoch": 0.34, "learning_rate": 1.4900320527509942e-06, "loss": 0.6827, "step": 8460 }, { "epoch": 0.34, "learning_rate": 1.489470130032032e-06, "loss": 0.7093, "step": 8465 }, { "epoch": 0.34, "learning_rate": 1.488908003991434e-06, "loss": 0.7162, "step": 8470 }, { "epoch": 0.34, "learning_rate": 1.4883456748627032e-06, "loss": 0.6738, "step": 8475 }, { "epoch": 0.34, "learning_rate": 1.4877831428794258e-06, "loss": 0.6977, "step": 8480 }, { "epoch": 0.34, "learning_rate": 1.4872204082752728e-06, "loss": 0.7097, "step": 8485 }, { "epoch": 0.34, "learning_rate": 1.4866574712839994e-06, "loss": 0.6716, "step": 8490 }, { "epoch": 0.35, "learning_rate": 1.4860943321394443e-06, "loss": 0.6891, "step": 8495 }, { "epoch": 0.35, "learning_rate": 1.4855309910755313e-06, "loss": 0.6471, "step": 8500 }, { "epoch": 0.35, "learning_rate": 1.4849674483262668e-06, "loss": 0.6544, "step": 8505 }, { "epoch": 0.35, "learning_rate": 1.4844037041257416e-06, "loss": 0.6554, "step": 8510 }, { "epoch": 0.35, "learning_rate": 1.4838397587081307e-06, "loss": 0.694, "step": 8515 }, { "epoch": 0.35, "learning_rate": 1.4832756123076912e-06, "loss": 0.6488, "step": 8520 }, { "epoch": 0.35, "learning_rate": 1.4827112651587656e-06, "loss": 0.6885, "step": 8525 }, { "epoch": 0.35, "learning_rate": 1.482146717495778e-06, "loss": 0.6827, "step": 8530 }, { "epoch": 0.35, "learning_rate": 1.481581969553237e-06, "loss": 0.7017, "step": 8535 }, { "epoch": 0.35, "learning_rate": 1.481017021565734e-06, "loss": 0.681, "step": 8540 }, { "epoch": 0.35, "learning_rate": 1.4804518737679432e-06, "loss": 0.6228, "step": 8545 }, { "epoch": 0.35, "learning_rate": 1.4798865263946223e-06, "loss": 0.7208, "step": 8550 }, { "epoch": 0.35, "learning_rate": 1.4793209796806117e-06, "loss": 0.6627, "step": 8555 }, { "epoch": 0.35, "learning_rate": 1.4787552338608341e-06, "loss": 0.6698, "step": 8560 }, { "epoch": 0.35, "learning_rate": 1.4781892891702965e-06, "loss": 0.6631, "step": 8565 }, { "epoch": 0.35, "learning_rate": 1.4776231458440862e-06, "loss": 0.6991, "step": 8570 }, { "epoch": 0.35, "learning_rate": 1.477056804117375e-06, "loss": 0.65, "step": 8575 }, { "epoch": 0.35, "learning_rate": 1.476490264225416e-06, "loss": 0.6846, "step": 8580 }, { "epoch": 0.35, "learning_rate": 1.475923526403545e-06, "loss": 0.6916, "step": 8585 }, { "epoch": 0.35, "learning_rate": 1.47535659088718e-06, "loss": 0.654, "step": 8590 }, { "epoch": 0.35, "learning_rate": 1.4747894579118208e-06, "loss": 0.7077, "step": 8595 }, { "epoch": 0.35, "learning_rate": 1.47422212771305e-06, "loss": 0.6887, "step": 8600 }, { "epoch": 0.35, "learning_rate": 1.4736546005265314e-06, "loss": 0.6919, "step": 8605 }, { "epoch": 0.35, "learning_rate": 1.4730868765880109e-06, "loss": 0.6471, "step": 8610 }, { "epoch": 0.35, "learning_rate": 1.4725189561333158e-06, "loss": 0.706, "step": 8615 }, { "epoch": 0.35, "learning_rate": 1.4719508393983555e-06, "loss": 0.6916, "step": 8620 }, { "epoch": 0.35, "learning_rate": 1.471382526619121e-06, "loss": 0.6624, "step": 8625 }, { "epoch": 0.35, "learning_rate": 1.4708140180316843e-06, "loss": 0.6888, "step": 8630 }, { "epoch": 0.35, "learning_rate": 1.470245313872199e-06, "loss": 0.6796, "step": 8635 }, { "epoch": 0.35, "learning_rate": 1.4696764143768997e-06, "loss": 0.6776, "step": 8640 }, { "epoch": 0.35, "learning_rate": 1.469107319782102e-06, "loss": 0.7217, "step": 8645 }, { "epoch": 0.35, "learning_rate": 1.4685380303242037e-06, "loss": 0.6858, "step": 8650 }, { "epoch": 0.35, "learning_rate": 1.4679685462396817e-06, "loss": 0.6937, "step": 8655 }, { "epoch": 0.35, "learning_rate": 1.467398867765096e-06, "loss": 0.6747, "step": 8660 }, { "epoch": 0.35, "learning_rate": 1.4668289951370848e-06, "loss": 0.6699, "step": 8665 }, { "epoch": 0.35, "learning_rate": 1.4662589285923686e-06, "loss": 0.7111, "step": 8670 }, { "epoch": 0.35, "learning_rate": 1.4656886683677486e-06, "loss": 0.667, "step": 8675 }, { "epoch": 0.35, "learning_rate": 1.4651182147001055e-06, "loss": 0.6586, "step": 8680 }, { "epoch": 0.35, "learning_rate": 1.464547567826401e-06, "loss": 0.6996, "step": 8685 }, { "epoch": 0.35, "learning_rate": 1.4639767279836766e-06, "loss": 0.7181, "step": 8690 }, { "epoch": 0.35, "learning_rate": 1.463405695409054e-06, "loss": 0.6878, "step": 8695 }, { "epoch": 0.35, "learning_rate": 1.462834470339736e-06, "loss": 0.6247, "step": 8700 }, { "epoch": 0.35, "learning_rate": 1.4622630530130037e-06, "loss": 0.7024, "step": 8705 }, { "epoch": 0.35, "learning_rate": 1.4616914436662195e-06, "loss": 0.6748, "step": 8710 }, { "epoch": 0.35, "learning_rate": 1.4611196425368247e-06, "loss": 0.6877, "step": 8715 }, { "epoch": 0.35, "learning_rate": 1.46054764986234e-06, "loss": 0.663, "step": 8720 }, { "epoch": 0.35, "learning_rate": 1.4599754658803671e-06, "loss": 0.6649, "step": 8725 }, { "epoch": 0.35, "learning_rate": 1.4594030908285858e-06, "loss": 0.6701, "step": 8730 }, { "epoch": 0.35, "learning_rate": 1.4588305249447557e-06, "loss": 0.6684, "step": 8735 }, { "epoch": 0.36, "learning_rate": 1.4582577684667156e-06, "loss": 0.7104, "step": 8740 }, { "epoch": 0.36, "learning_rate": 1.457684821632384e-06, "loss": 0.7183, "step": 8745 }, { "epoch": 0.36, "learning_rate": 1.457111684679757e-06, "loss": 0.7016, "step": 8750 }, { "epoch": 0.36, "learning_rate": 1.4565383578469119e-06, "loss": 0.6959, "step": 8755 }, { "epoch": 0.36, "learning_rate": 1.4559648413720033e-06, "loss": 0.7299, "step": 8760 }, { "epoch": 0.36, "learning_rate": 1.4553911354932646e-06, "loss": 0.6539, "step": 8765 }, { "epoch": 0.36, "learning_rate": 1.4548172404490089e-06, "loss": 0.685, "step": 8770 }, { "epoch": 0.36, "learning_rate": 1.4542431564776265e-06, "loss": 0.7004, "step": 8775 }, { "epoch": 0.36, "learning_rate": 1.453668883817587e-06, "loss": 0.6742, "step": 8780 }, { "epoch": 0.36, "learning_rate": 1.453094422707439e-06, "loss": 0.6744, "step": 8785 }, { "epoch": 0.36, "learning_rate": 1.4525197733858077e-06, "loss": 0.6871, "step": 8790 }, { "epoch": 0.36, "learning_rate": 1.451944936091398e-06, "loss": 0.6567, "step": 8795 }, { "epoch": 0.36, "learning_rate": 1.4513699110629921e-06, "loss": 0.6838, "step": 8800 }, { "epoch": 0.36, "eval_loss": 0.6491459012031555, "eval_runtime": 138.8275, "eval_samples_per_second": 17.043, "eval_steps_per_second": 2.845, "step": 8800 }, { "epoch": 0.36, "learning_rate": 1.45079469853945e-06, "loss": 0.6431, "step": 8805 }, { "epoch": 0.36, "learning_rate": 1.4502192987597113e-06, "loss": 0.6606, "step": 8810 }, { "epoch": 0.36, "learning_rate": 1.4496437119627905e-06, "loss": 0.6917, "step": 8815 }, { "epoch": 0.36, "learning_rate": 1.4490679383877825e-06, "loss": 0.6826, "step": 8820 }, { "epoch": 0.36, "learning_rate": 1.4484919782738581e-06, "loss": 0.6904, "step": 8825 }, { "epoch": 0.36, "learning_rate": 1.4479158318602658e-06, "loss": 0.6781, "step": 8830 }, { "epoch": 0.36, "learning_rate": 1.4473394993863325e-06, "loss": 0.7099, "step": 8835 }, { "epoch": 0.36, "learning_rate": 1.4467629810914615e-06, "loss": 0.6993, "step": 8840 }, { "epoch": 0.36, "learning_rate": 1.4461862772151333e-06, "loss": 0.6606, "step": 8845 }, { "epoch": 0.36, "learning_rate": 1.4456093879969057e-06, "loss": 0.7129, "step": 8850 }, { "epoch": 0.36, "learning_rate": 1.4450323136764136e-06, "loss": 0.6789, "step": 8855 }, { "epoch": 0.36, "learning_rate": 1.4444550544933684e-06, "loss": 0.6828, "step": 8860 }, { "epoch": 0.36, "learning_rate": 1.443877610687559e-06, "loss": 0.701, "step": 8865 }, { "epoch": 0.36, "learning_rate": 1.4432999824988503e-06, "loss": 0.6962, "step": 8870 }, { "epoch": 0.36, "learning_rate": 1.442722170167184e-06, "loss": 0.6399, "step": 8875 }, { "epoch": 0.36, "learning_rate": 1.4421441739325782e-06, "loss": 0.6943, "step": 8880 }, { "epoch": 0.36, "learning_rate": 1.4415659940351275e-06, "loss": 0.6932, "step": 8885 }, { "epoch": 0.36, "learning_rate": 1.440987630715003e-06, "loss": 0.7096, "step": 8890 }, { "epoch": 0.36, "learning_rate": 1.4404090842124519e-06, "loss": 0.6652, "step": 8895 }, { "epoch": 0.36, "learning_rate": 1.439830354767797e-06, "loss": 0.6988, "step": 8900 }, { "epoch": 0.36, "learning_rate": 1.4392514426214378e-06, "loss": 0.6862, "step": 8905 }, { "epoch": 0.36, "learning_rate": 1.4386723480138491e-06, "loss": 0.7313, "step": 8910 }, { "epoch": 0.36, "learning_rate": 1.438093071185582e-06, "loss": 0.6876, "step": 8915 }, { "epoch": 0.36, "learning_rate": 1.437513612377263e-06, "loss": 0.664, "step": 8920 }, { "epoch": 0.36, "learning_rate": 1.436933971829594e-06, "loss": 0.6918, "step": 8925 }, { "epoch": 0.36, "learning_rate": 1.4363541497833534e-06, "loss": 0.7093, "step": 8930 }, { "epoch": 0.36, "learning_rate": 1.4357741464793932e-06, "loss": 0.7008, "step": 8935 }, { "epoch": 0.36, "learning_rate": 1.4351939621586424e-06, "loss": 0.6459, "step": 8940 }, { "epoch": 0.36, "learning_rate": 1.4346135970621045e-06, "loss": 0.7083, "step": 8945 }, { "epoch": 0.36, "learning_rate": 1.4340330514308576e-06, "loss": 0.6674, "step": 8950 }, { "epoch": 0.36, "learning_rate": 1.4334523255060563e-06, "loss": 0.6906, "step": 8955 }, { "epoch": 0.36, "learning_rate": 1.432871419528928e-06, "loss": 0.6976, "step": 8960 }, { "epoch": 0.36, "learning_rate": 1.432290333740776e-06, "loss": 0.6487, "step": 8965 }, { "epoch": 0.36, "learning_rate": 1.4317090683829797e-06, "loss": 0.6716, "step": 8970 }, { "epoch": 0.36, "learning_rate": 1.43112762369699e-06, "loss": 0.6578, "step": 8975 }, { "epoch": 0.36, "learning_rate": 1.4305459999243353e-06, "loss": 0.6437, "step": 8980 }, { "epoch": 0.36, "learning_rate": 1.429964197306616e-06, "loss": 0.6527, "step": 8985 }, { "epoch": 0.37, "learning_rate": 1.4293822160855083e-06, "loss": 0.7384, "step": 8990 }, { "epoch": 0.37, "learning_rate": 1.4288000565027623e-06, "loss": 0.6817, "step": 8995 }, { "epoch": 0.37, "learning_rate": 1.4282177188002016e-06, "loss": 0.6805, "step": 9000 }, { "epoch": 0.37, "learning_rate": 1.427635203219725e-06, "loss": 0.6908, "step": 9005 }, { "epoch": 0.37, "learning_rate": 1.4270525100033036e-06, "loss": 0.6745, "step": 9010 }, { "epoch": 0.37, "learning_rate": 1.4264696393929832e-06, "loss": 0.6801, "step": 9015 }, { "epoch": 0.37, "learning_rate": 1.4258865916308834e-06, "loss": 0.6856, "step": 9020 }, { "epoch": 0.37, "learning_rate": 1.4253033669591971e-06, "loss": 0.6676, "step": 9025 }, { "epoch": 0.37, "learning_rate": 1.424719965620191e-06, "loss": 0.6524, "step": 9030 }, { "epoch": 0.37, "learning_rate": 1.4241363878562046e-06, "loss": 0.6867, "step": 9035 }, { "epoch": 0.37, "learning_rate": 1.4235526339096514e-06, "loss": 0.6674, "step": 9040 }, { "epoch": 0.37, "learning_rate": 1.422968704023017e-06, "loss": 0.659, "step": 9045 }, { "epoch": 0.37, "learning_rate": 1.4223845984388613e-06, "loss": 0.6706, "step": 9050 }, { "epoch": 0.37, "learning_rate": 1.421800317399817e-06, "loss": 0.6867, "step": 9055 }, { "epoch": 0.37, "learning_rate": 1.421215861148589e-06, "loss": 0.7136, "step": 9060 }, { "epoch": 0.37, "learning_rate": 1.420631229927955e-06, "loss": 0.7003, "step": 9065 }, { "epoch": 0.37, "learning_rate": 1.4200464239807664e-06, "loss": 0.6764, "step": 9070 }, { "epoch": 0.37, "learning_rate": 1.4194614435499458e-06, "loss": 0.6954, "step": 9075 }, { "epoch": 0.37, "learning_rate": 1.4188762888784897e-06, "loss": 0.6641, "step": 9080 }, { "epoch": 0.37, "learning_rate": 1.4182909602094662e-06, "loss": 0.6822, "step": 9085 }, { "epoch": 0.37, "learning_rate": 1.417705457786015e-06, "loss": 0.7138, "step": 9090 }, { "epoch": 0.37, "learning_rate": 1.4171197818513495e-06, "loss": 0.7152, "step": 9095 }, { "epoch": 0.37, "learning_rate": 1.4165339326487535e-06, "loss": 0.6324, "step": 9100 }, { "epoch": 0.37, "learning_rate": 1.4159479104215846e-06, "loss": 0.6677, "step": 9105 }, { "epoch": 0.37, "learning_rate": 1.4153617154132712e-06, "loss": 0.6521, "step": 9110 }, { "epoch": 0.37, "learning_rate": 1.414775347867313e-06, "loss": 0.7191, "step": 9115 }, { "epoch": 0.37, "learning_rate": 1.4141888080272825e-06, "loss": 0.6545, "step": 9120 }, { "epoch": 0.37, "learning_rate": 1.4136020961368228e-06, "loss": 0.7033, "step": 9125 }, { "epoch": 0.37, "learning_rate": 1.4130152124396497e-06, "loss": 0.6917, "step": 9130 }, { "epoch": 0.37, "learning_rate": 1.412428157179549e-06, "loss": 0.7083, "step": 9135 }, { "epoch": 0.37, "learning_rate": 1.4118409306003784e-06, "loss": 0.6786, "step": 9140 }, { "epoch": 0.37, "learning_rate": 1.4112535329460671e-06, "loss": 0.677, "step": 9145 }, { "epoch": 0.37, "learning_rate": 1.4106659644606139e-06, "loss": 0.6903, "step": 9150 }, { "epoch": 0.37, "learning_rate": 1.410078225388091e-06, "loss": 0.659, "step": 9155 }, { "epoch": 0.37, "learning_rate": 1.409490315972639e-06, "loss": 0.7028, "step": 9160 }, { "epoch": 0.37, "learning_rate": 1.4089022364584712e-06, "loss": 0.6607, "step": 9165 }, { "epoch": 0.37, "learning_rate": 1.4083139870898706e-06, "loss": 0.6595, "step": 9170 }, { "epoch": 0.37, "learning_rate": 1.4077255681111903e-06, "loss": 0.6557, "step": 9175 }, { "epoch": 0.37, "learning_rate": 1.4071369797668545e-06, "loss": 0.659, "step": 9180 }, { "epoch": 0.37, "learning_rate": 1.4065482223013585e-06, "loss": 0.6879, "step": 9185 }, { "epoch": 0.37, "learning_rate": 1.405959295959266e-06, "loss": 0.6708, "step": 9190 }, { "epoch": 0.37, "learning_rate": 1.4053702009852128e-06, "loss": 0.6839, "step": 9195 }, { "epoch": 0.37, "learning_rate": 1.4047809376239034e-06, "loss": 0.6835, "step": 9200 }, { "epoch": 0.37, "learning_rate": 1.4041915061201122e-06, "loss": 0.7229, "step": 9205 }, { "epoch": 0.37, "learning_rate": 1.4036019067186843e-06, "loss": 0.6944, "step": 9210 }, { "epoch": 0.37, "learning_rate": 1.4030121396645349e-06, "loss": 0.6232, "step": 9215 }, { "epoch": 0.37, "learning_rate": 1.402422205202647e-06, "loss": 0.7058, "step": 9220 }, { "epoch": 0.37, "learning_rate": 1.4018321035780747e-06, "loss": 0.6815, "step": 9225 }, { "epoch": 0.37, "learning_rate": 1.4012418350359414e-06, "loss": 0.6449, "step": 9230 }, { "epoch": 0.38, "learning_rate": 1.4006513998214386e-06, "loss": 0.6611, "step": 9235 }, { "epoch": 0.38, "learning_rate": 1.4000607981798292e-06, "loss": 0.6489, "step": 9240 }, { "epoch": 0.38, "learning_rate": 1.3994700303564431e-06, "loss": 0.6866, "step": 9245 }, { "epoch": 0.38, "learning_rate": 1.3988790965966801e-06, "loss": 0.6553, "step": 9250 }, { "epoch": 0.38, "learning_rate": 1.3982879971460096e-06, "loss": 0.6936, "step": 9255 }, { "epoch": 0.38, "learning_rate": 1.3976967322499683e-06, "loss": 0.6741, "step": 9260 }, { "epoch": 0.38, "learning_rate": 1.3971053021541633e-06, "loss": 0.6491, "step": 9265 }, { "epoch": 0.38, "learning_rate": 1.396513707104269e-06, "loss": 0.6819, "step": 9270 }, { "epoch": 0.38, "learning_rate": 1.395921947346029e-06, "loss": 0.6472, "step": 9275 }, { "epoch": 0.38, "learning_rate": 1.3953300231252554e-06, "loss": 0.6588, "step": 9280 }, { "epoch": 0.38, "learning_rate": 1.394737934687828e-06, "loss": 0.6795, "step": 9285 }, { "epoch": 0.38, "learning_rate": 1.3941456822796954e-06, "loss": 0.6876, "step": 9290 }, { "epoch": 0.38, "learning_rate": 1.393553266146874e-06, "loss": 0.6439, "step": 9295 }, { "epoch": 0.38, "learning_rate": 1.3929606865354484e-06, "loss": 0.6678, "step": 9300 }, { "epoch": 0.38, "learning_rate": 1.3923679436915711e-06, "loss": 0.6646, "step": 9305 }, { "epoch": 0.38, "learning_rate": 1.3917750378614619e-06, "loss": 0.6721, "step": 9310 }, { "epoch": 0.38, "learning_rate": 1.391181969291409e-06, "loss": 0.6741, "step": 9315 }, { "epoch": 0.38, "learning_rate": 1.3905887382277683e-06, "loss": 0.6298, "step": 9320 }, { "epoch": 0.38, "learning_rate": 1.3899953449169622e-06, "loss": 0.6536, "step": 9325 }, { "epoch": 0.38, "learning_rate": 1.3894017896054814e-06, "loss": 0.7037, "step": 9330 }, { "epoch": 0.38, "learning_rate": 1.3888080725398836e-06, "loss": 0.6777, "step": 9335 }, { "epoch": 0.38, "learning_rate": 1.3882141939667937e-06, "loss": 0.6805, "step": 9340 }, { "epoch": 0.38, "learning_rate": 1.3876201541329038e-06, "loss": 0.666, "step": 9345 }, { "epoch": 0.38, "learning_rate": 1.3870259532849726e-06, "loss": 0.6619, "step": 9350 }, { "epoch": 0.38, "learning_rate": 1.3864315916698264e-06, "loss": 0.7109, "step": 9355 }, { "epoch": 0.38, "learning_rate": 1.3858370695343573e-06, "loss": 0.6914, "step": 9360 }, { "epoch": 0.38, "learning_rate": 1.3852423871255252e-06, "loss": 0.6847, "step": 9365 }, { "epoch": 0.38, "learning_rate": 1.3846475446903555e-06, "loss": 0.6819, "step": 9370 }, { "epoch": 0.38, "learning_rate": 1.384052542475941e-06, "loss": 0.7004, "step": 9375 }, { "epoch": 0.38, "learning_rate": 1.3834573807294404e-06, "loss": 0.7206, "step": 9380 }, { "epoch": 0.38, "learning_rate": 1.382862059698078e-06, "loss": 0.7003, "step": 9385 }, { "epoch": 0.38, "learning_rate": 1.3822665796291459e-06, "loss": 0.686, "step": 9390 }, { "epoch": 0.38, "learning_rate": 1.3816709407700007e-06, "loss": 0.682, "step": 9395 }, { "epoch": 0.38, "learning_rate": 1.3810751433680662e-06, "loss": 0.6414, "step": 9400 }, { "epoch": 0.38, "learning_rate": 1.380479187670831e-06, "loss": 0.6494, "step": 9405 }, { "epoch": 0.38, "learning_rate": 1.37988307392585e-06, "loss": 0.6997, "step": 9410 }, { "epoch": 0.38, "learning_rate": 1.3792868023807441e-06, "loss": 0.6835, "step": 9415 }, { "epoch": 0.38, "learning_rate": 1.3786903732831982e-06, "loss": 0.6819, "step": 9420 }, { "epoch": 0.38, "learning_rate": 1.378093786880965e-06, "loss": 0.6568, "step": 9425 }, { "epoch": 0.38, "learning_rate": 1.3774970434218609e-06, "loss": 0.6811, "step": 9430 }, { "epoch": 0.38, "learning_rate": 1.3769001431537675e-06, "loss": 0.6692, "step": 9435 }, { "epoch": 0.38, "learning_rate": 1.3763030863246326e-06, "loss": 0.6732, "step": 9440 }, { "epoch": 0.38, "learning_rate": 1.3757058731824679e-06, "loss": 0.6602, "step": 9445 }, { "epoch": 0.38, "learning_rate": 1.3751085039753508e-06, "loss": 0.6597, "step": 9450 }, { "epoch": 0.38, "learning_rate": 1.3745109789514233e-06, "loss": 0.6906, "step": 9455 }, { "epoch": 0.38, "learning_rate": 1.3739132983588921e-06, "loss": 0.6466, "step": 9460 }, { "epoch": 0.38, "learning_rate": 1.3733154624460283e-06, "loss": 0.6613, "step": 9465 }, { "epoch": 0.38, "learning_rate": 1.372717471461168e-06, "loss": 0.6821, "step": 9470 }, { "epoch": 0.38, "learning_rate": 1.3721193256527116e-06, "loss": 0.6539, "step": 9475 }, { "epoch": 0.39, "learning_rate": 1.3715210252691232e-06, "loss": 0.6994, "step": 9480 }, { "epoch": 0.39, "learning_rate": 1.3709225705589318e-06, "loss": 0.6556, "step": 9485 }, { "epoch": 0.39, "learning_rate": 1.3703239617707306e-06, "loss": 0.7155, "step": 9490 }, { "epoch": 0.39, "learning_rate": 1.3697251991531756e-06, "loss": 0.6621, "step": 9495 }, { "epoch": 0.39, "learning_rate": 1.3691262829549883e-06, "loss": 0.6771, "step": 9500 }, { "epoch": 0.39, "learning_rate": 1.3685272134249535e-06, "loss": 0.7038, "step": 9505 }, { "epoch": 0.39, "learning_rate": 1.3679279908119187e-06, "loss": 0.6903, "step": 9510 }, { "epoch": 0.39, "learning_rate": 1.3673286153647967e-06, "loss": 0.7236, "step": 9515 }, { "epoch": 0.39, "learning_rate": 1.3667290873325618e-06, "loss": 0.6958, "step": 9520 }, { "epoch": 0.39, "learning_rate": 1.3661294069642537e-06, "loss": 0.653, "step": 9525 }, { "epoch": 0.39, "learning_rate": 1.3655295745089736e-06, "loss": 0.6925, "step": 9530 }, { "epoch": 0.39, "learning_rate": 1.3649295902158873e-06, "loss": 0.6607, "step": 9535 }, { "epoch": 0.39, "learning_rate": 1.3643294543342228e-06, "loss": 0.7378, "step": 9540 }, { "epoch": 0.39, "learning_rate": 1.363729167113271e-06, "loss": 0.6789, "step": 9545 }, { "epoch": 0.39, "learning_rate": 1.3631287288023866e-06, "loss": 0.6862, "step": 9550 }, { "epoch": 0.39, "learning_rate": 1.362528139650986e-06, "loss": 0.7204, "step": 9555 }, { "epoch": 0.39, "learning_rate": 1.361927399908549e-06, "loss": 0.6474, "step": 9560 }, { "epoch": 0.39, "learning_rate": 1.3613265098246176e-06, "loss": 0.6564, "step": 9565 }, { "epoch": 0.39, "learning_rate": 1.3607254696487962e-06, "loss": 0.6762, "step": 9570 }, { "epoch": 0.39, "learning_rate": 1.360124279630752e-06, "loss": 0.6903, "step": 9575 }, { "epoch": 0.39, "learning_rate": 1.3595229400202134e-06, "loss": 0.6778, "step": 9580 }, { "epoch": 0.39, "learning_rate": 1.3589214510669722e-06, "loss": 0.7117, "step": 9585 }, { "epoch": 0.39, "learning_rate": 1.3583198130208819e-06, "loss": 0.6531, "step": 9590 }, { "epoch": 0.39, "learning_rate": 1.3577180261318569e-06, "loss": 0.7066, "step": 9595 }, { "epoch": 0.39, "learning_rate": 1.357116090649875e-06, "loss": 0.7132, "step": 9600 }, { "epoch": 0.39, "eval_loss": 0.6456849575042725, "eval_runtime": 138.6043, "eval_samples_per_second": 17.07, "eval_steps_per_second": 2.85, "step": 9600 }, { "epoch": 0.39, "learning_rate": 1.3565140068249746e-06, "loss": 0.6633, "step": 9605 }, { "epoch": 0.39, "learning_rate": 1.3559117749072559e-06, "loss": 0.6652, "step": 9610 }, { "epoch": 0.39, "learning_rate": 1.3553093951468817e-06, "loss": 0.6863, "step": 9615 }, { "epoch": 0.39, "learning_rate": 1.354706867794074e-06, "loss": 0.6849, "step": 9620 }, { "epoch": 0.39, "learning_rate": 1.3541041930991187e-06, "loss": 0.6647, "step": 9625 }, { "epoch": 0.39, "learning_rate": 1.3535013713123606e-06, "loss": 0.6668, "step": 9630 }, { "epoch": 0.39, "learning_rate": 1.352898402684207e-06, "loss": 0.6574, "step": 9635 }, { "epoch": 0.39, "learning_rate": 1.3522952874651262e-06, "loss": 0.6735, "step": 9640 }, { "epoch": 0.39, "learning_rate": 1.3516920259056467e-06, "loss": 0.688, "step": 9645 }, { "epoch": 0.39, "learning_rate": 1.351088618256358e-06, "loss": 0.6553, "step": 9650 }, { "epoch": 0.39, "learning_rate": 1.3504850647679104e-06, "loss": 0.6818, "step": 9655 }, { "epoch": 0.39, "learning_rate": 1.349881365691015e-06, "loss": 0.6596, "step": 9660 }, { "epoch": 0.39, "learning_rate": 1.349277521276443e-06, "loss": 0.6655, "step": 9665 }, { "epoch": 0.39, "learning_rate": 1.3486735317750257e-06, "loss": 0.6465, "step": 9670 }, { "epoch": 0.39, "learning_rate": 1.3480693974376561e-06, "loss": 0.6647, "step": 9675 }, { "epoch": 0.39, "learning_rate": 1.3474651185152854e-06, "loss": 0.6955, "step": 9680 }, { "epoch": 0.39, "learning_rate": 1.3468606952589265e-06, "loss": 0.6851, "step": 9685 }, { "epoch": 0.39, "learning_rate": 1.346256127919651e-06, "loss": 0.6865, "step": 9690 }, { "epoch": 0.39, "learning_rate": 1.345651416748591e-06, "loss": 0.6518, "step": 9695 }, { "epoch": 0.39, "learning_rate": 1.3450465619969386e-06, "loss": 0.67, "step": 9700 }, { "epoch": 0.39, "learning_rate": 1.344441563915945e-06, "loss": 0.7331, "step": 9705 }, { "epoch": 0.39, "learning_rate": 1.3438364227569215e-06, "loss": 0.7002, "step": 9710 }, { "epoch": 0.39, "learning_rate": 1.3432311387712377e-06, "loss": 0.6777, "step": 9715 }, { "epoch": 0.39, "learning_rate": 1.3426257122103237e-06, "loss": 0.7075, "step": 9720 }, { "epoch": 0.4, "learning_rate": 1.3420201433256689e-06, "loss": 0.707, "step": 9725 }, { "epoch": 0.4, "learning_rate": 1.3414144323688204e-06, "loss": 0.67, "step": 9730 }, { "epoch": 0.4, "learning_rate": 1.3408085795913862e-06, "loss": 0.6853, "step": 9735 }, { "epoch": 0.4, "learning_rate": 1.3402025852450317e-06, "loss": 0.6859, "step": 9740 }, { "epoch": 0.4, "learning_rate": 1.3395964495814815e-06, "loss": 0.7109, "step": 9745 }, { "epoch": 0.4, "learning_rate": 1.33899017285252e-06, "loss": 0.7126, "step": 9750 }, { "epoch": 0.4, "learning_rate": 1.3383837553099882e-06, "loss": 0.6782, "step": 9755 }, { "epoch": 0.4, "learning_rate": 1.3377771972057876e-06, "loss": 0.6577, "step": 9760 }, { "epoch": 0.4, "learning_rate": 1.3371704987918763e-06, "loss": 0.6859, "step": 9765 }, { "epoch": 0.4, "learning_rate": 1.3365636603202724e-06, "loss": 0.705, "step": 9770 }, { "epoch": 0.4, "learning_rate": 1.3359566820430509e-06, "loss": 0.6359, "step": 9775 }, { "epoch": 0.4, "learning_rate": 1.335349564212345e-06, "loss": 0.6926, "step": 9780 }, { "epoch": 0.4, "learning_rate": 1.3347423070803465e-06, "loss": 0.6667, "step": 9785 }, { "epoch": 0.4, "learning_rate": 1.3341349108993047e-06, "loss": 0.7033, "step": 9790 }, { "epoch": 0.4, "learning_rate": 1.3335273759215269e-06, "loss": 0.6923, "step": 9795 }, { "epoch": 0.4, "learning_rate": 1.3329197023993774e-06, "loss": 0.706, "step": 9800 }, { "epoch": 0.4, "learning_rate": 1.3323118905852789e-06, "loss": 0.6059, "step": 9805 }, { "epoch": 0.4, "learning_rate": 1.331703940731711e-06, "loss": 0.7087, "step": 9810 }, { "epoch": 0.4, "learning_rate": 1.3310958530912106e-06, "loss": 0.7124, "step": 9815 }, { "epoch": 0.4, "learning_rate": 1.3304876279163725e-06, "loss": 0.6984, "step": 9820 }, { "epoch": 0.4, "learning_rate": 1.3298792654598474e-06, "loss": 0.6993, "step": 9825 }, { "epoch": 0.4, "learning_rate": 1.3292707659743442e-06, "loss": 0.7156, "step": 9830 }, { "epoch": 0.4, "learning_rate": 1.3286621297126285e-06, "loss": 0.7062, "step": 9835 }, { "epoch": 0.4, "learning_rate": 1.3280533569275223e-06, "loss": 0.6437, "step": 9840 }, { "epoch": 0.4, "learning_rate": 1.3274444478719048e-06, "loss": 0.6651, "step": 9845 }, { "epoch": 0.4, "learning_rate": 1.3268354027987108e-06, "loss": 0.6647, "step": 9850 }, { "epoch": 0.4, "learning_rate": 1.3262262219609329e-06, "loss": 0.6911, "step": 9855 }, { "epoch": 0.4, "learning_rate": 1.3256169056116195e-06, "loss": 0.6931, "step": 9860 }, { "epoch": 0.4, "learning_rate": 1.3250074540038751e-06, "loss": 0.6979, "step": 9865 }, { "epoch": 0.4, "learning_rate": 1.324397867390861e-06, "loss": 0.6642, "step": 9870 }, { "epoch": 0.4, "learning_rate": 1.3237881460257933e-06, "loss": 0.6457, "step": 9875 }, { "epoch": 0.4, "learning_rate": 1.3231782901619458e-06, "loss": 0.6684, "step": 9880 }, { "epoch": 0.4, "learning_rate": 1.3225683000526474e-06, "loss": 0.6744, "step": 9885 }, { "epoch": 0.4, "learning_rate": 1.321958175951282e-06, "loss": 0.6504, "step": 9890 }, { "epoch": 0.4, "learning_rate": 1.3213479181112903e-06, "loss": 0.7047, "step": 9895 }, { "epoch": 0.4, "learning_rate": 1.320737526786168e-06, "loss": 0.6802, "step": 9900 }, { "epoch": 0.4, "learning_rate": 1.320127002229466e-06, "loss": 0.6668, "step": 9905 }, { "epoch": 0.4, "learning_rate": 1.319516344694792e-06, "loss": 0.6759, "step": 9910 }, { "epoch": 0.4, "learning_rate": 1.3189055544358065e-06, "loss": 0.6614, "step": 9915 }, { "epoch": 0.4, "learning_rate": 1.3182946317062271e-06, "loss": 0.6613, "step": 9920 }, { "epoch": 0.4, "learning_rate": 1.3176835767598259e-06, "loss": 0.7015, "step": 9925 }, { "epoch": 0.4, "learning_rate": 1.3170723898504293e-06, "loss": 0.6607, "step": 9930 }, { "epoch": 0.4, "learning_rate": 1.3164610712319197e-06, "loss": 0.7008, "step": 9935 }, { "epoch": 0.4, "learning_rate": 1.3158496211582328e-06, "loss": 0.6727, "step": 9940 }, { "epoch": 0.4, "learning_rate": 1.3152380398833604e-06, "loss": 0.6481, "step": 9945 }, { "epoch": 0.4, "learning_rate": 1.3146263276613474e-06, "loss": 0.6803, "step": 9950 }, { "epoch": 0.4, "learning_rate": 1.3140144847462943e-06, "loss": 0.6999, "step": 9955 }, { "epoch": 0.4, "learning_rate": 1.313402511392355e-06, "loss": 0.7099, "step": 9960 }, { "epoch": 0.4, "learning_rate": 1.3127904078537378e-06, "loss": 0.6695, "step": 9965 }, { "epoch": 0.4, "learning_rate": 1.3121781743847054e-06, "loss": 0.6898, "step": 9970 }, { "epoch": 0.41, "learning_rate": 1.311565811239574e-06, "loss": 0.6724, "step": 9975 }, { "epoch": 0.41, "learning_rate": 1.3109533186727145e-06, "loss": 0.6905, "step": 9980 }, { "epoch": 0.41, "learning_rate": 1.3103406969385502e-06, "loss": 0.6442, "step": 9985 }, { "epoch": 0.41, "learning_rate": 1.3097279462915594e-06, "loss": 0.6735, "step": 9990 }, { "epoch": 0.41, "learning_rate": 1.3091150669862734e-06, "loss": 0.6962, "step": 9995 }, { "epoch": 0.41, "learning_rate": 1.3085020592772767e-06, "loss": 0.7271, "step": 10000 }, { "epoch": 0.41, "learning_rate": 1.3078889234192074e-06, "loss": 0.7095, "step": 10005 }, { "epoch": 0.41, "learning_rate": 1.3072756596667569e-06, "loss": 0.6467, "step": 10010 }, { "epoch": 0.41, "learning_rate": 1.3066622682746693e-06, "loss": 0.6728, "step": 10015 }, { "epoch": 0.41, "learning_rate": 1.3060487494977426e-06, "loss": 0.6724, "step": 10020 }, { "epoch": 0.41, "learning_rate": 1.3054351035908269e-06, "loss": 0.6336, "step": 10025 }, { "epoch": 0.41, "learning_rate": 1.3048213308088253e-06, "loss": 0.677, "step": 10030 }, { "epoch": 0.41, "learning_rate": 1.3042074314066937e-06, "loss": 0.7052, "step": 10035 }, { "epoch": 0.41, "learning_rate": 1.3035934056394404e-06, "loss": 0.6877, "step": 10040 }, { "epoch": 0.41, "learning_rate": 1.3029792537621269e-06, "loss": 0.6824, "step": 10045 }, { "epoch": 0.41, "learning_rate": 1.302364976029866e-06, "loss": 0.6759, "step": 10050 }, { "epoch": 0.41, "learning_rate": 1.3017505726978239e-06, "loss": 0.7549, "step": 10055 }, { "epoch": 0.41, "learning_rate": 1.3011360440212178e-06, "loss": 0.686, "step": 10060 }, { "epoch": 0.41, "learning_rate": 1.3005213902553177e-06, "loss": 0.6835, "step": 10065 }, { "epoch": 0.41, "learning_rate": 1.2999066116554457e-06, "loss": 0.7057, "step": 10070 }, { "epoch": 0.41, "learning_rate": 1.2992917084769755e-06, "loss": 0.6839, "step": 10075 }, { "epoch": 0.41, "learning_rate": 1.2986766809753322e-06, "loss": 0.676, "step": 10080 }, { "epoch": 0.41, "learning_rate": 1.298061529405993e-06, "loss": 0.7068, "step": 10085 }, { "epoch": 0.41, "learning_rate": 1.2974462540244868e-06, "loss": 0.7212, "step": 10090 }, { "epoch": 0.41, "learning_rate": 1.2968308550863932e-06, "loss": 0.6873, "step": 10095 }, { "epoch": 0.41, "learning_rate": 1.2962153328473435e-06, "loss": 0.6773, "step": 10100 }, { "epoch": 0.41, "learning_rate": 1.295599687563021e-06, "loss": 0.6805, "step": 10105 }, { "epoch": 0.41, "learning_rate": 1.2949839194891587e-06, "loss": 0.6523, "step": 10110 }, { "epoch": 0.41, "learning_rate": 1.2943680288815418e-06, "loss": 0.6765, "step": 10115 }, { "epoch": 0.41, "learning_rate": 1.2937520159960054e-06, "loss": 0.6897, "step": 10120 }, { "epoch": 0.41, "learning_rate": 1.2931358810884362e-06, "loss": 0.6637, "step": 10125 }, { "epoch": 0.41, "learning_rate": 1.2925196244147712e-06, "loss": 0.6691, "step": 10130 }, { "epoch": 0.41, "learning_rate": 1.291903246230998e-06, "loss": 0.6603, "step": 10135 }, { "epoch": 0.41, "learning_rate": 1.2912867467931551e-06, "loss": 0.7122, "step": 10140 }, { "epoch": 0.41, "learning_rate": 1.2906701263573306e-06, "loss": 0.6517, "step": 10145 }, { "epoch": 0.41, "learning_rate": 1.2900533851796632e-06, "loss": 0.7098, "step": 10150 }, { "epoch": 0.41, "learning_rate": 1.2894365235163425e-06, "loss": 0.6925, "step": 10155 }, { "epoch": 0.41, "learning_rate": 1.2888195416236065e-06, "loss": 0.6564, "step": 10160 }, { "epoch": 0.41, "learning_rate": 1.288202439757745e-06, "loss": 0.6779, "step": 10165 }, { "epoch": 0.41, "learning_rate": 1.2875852181750964e-06, "loss": 0.7095, "step": 10170 }, { "epoch": 0.41, "learning_rate": 1.2869678771320484e-06, "loss": 0.6668, "step": 10175 }, { "epoch": 0.41, "learning_rate": 1.2863504168850406e-06, "loss": 0.6538, "step": 10180 }, { "epoch": 0.41, "learning_rate": 1.2857328376905593e-06, "loss": 0.663, "step": 10185 }, { "epoch": 0.41, "learning_rate": 1.2851151398051427e-06, "loss": 0.648, "step": 10190 }, { "epoch": 0.41, "learning_rate": 1.2844973234853762e-06, "loss": 0.6934, "step": 10195 }, { "epoch": 0.41, "learning_rate": 1.2838793889878955e-06, "loss": 0.6829, "step": 10200 }, { "epoch": 0.41, "learning_rate": 1.2832613365693857e-06, "loss": 0.6484, "step": 10205 }, { "epoch": 0.41, "learning_rate": 1.2826431664865795e-06, "loss": 0.6949, "step": 10210 }, { "epoch": 0.41, "learning_rate": 1.2820248789962605e-06, "loss": 0.6975, "step": 10215 }, { "epoch": 0.42, "learning_rate": 1.281406474355259e-06, "loss": 0.6907, "step": 10220 }, { "epoch": 0.42, "learning_rate": 1.2807879528204554e-06, "loss": 0.6599, "step": 10225 }, { "epoch": 0.42, "learning_rate": 1.280169314648778e-06, "loss": 0.6655, "step": 10230 }, { "epoch": 0.42, "learning_rate": 1.2795505600972036e-06, "loss": 0.6827, "step": 10235 }, { "epoch": 0.42, "learning_rate": 1.2789316894227582e-06, "loss": 0.6679, "step": 10240 }, { "epoch": 0.42, "learning_rate": 1.2783127028825143e-06, "loss": 0.6862, "step": 10245 }, { "epoch": 0.42, "learning_rate": 1.2776936007335942e-06, "loss": 0.6659, "step": 10250 }, { "epoch": 0.42, "learning_rate": 1.2770743832331667e-06, "loss": 0.7268, "step": 10255 }, { "epoch": 0.42, "learning_rate": 1.2764550506384504e-06, "loss": 0.6597, "step": 10260 }, { "epoch": 0.42, "learning_rate": 1.2758356032067103e-06, "loss": 0.6416, "step": 10265 }, { "epoch": 0.42, "learning_rate": 1.2752160411952592e-06, "loss": 0.7008, "step": 10270 }, { "epoch": 0.42, "learning_rate": 1.2745963648614583e-06, "loss": 0.6704, "step": 10275 }, { "epoch": 0.42, "learning_rate": 1.2739765744627154e-06, "loss": 0.6444, "step": 10280 }, { "epoch": 0.42, "learning_rate": 1.2733566702564855e-06, "loss": 0.6594, "step": 10285 }, { "epoch": 0.42, "learning_rate": 1.2727366525002728e-06, "loss": 0.6597, "step": 10290 }, { "epoch": 0.42, "learning_rate": 1.2721165214516259e-06, "loss": 0.6624, "step": 10295 }, { "epoch": 0.42, "learning_rate": 1.2714962773681427e-06, "loss": 0.6726, "step": 10300 }, { "epoch": 0.42, "learning_rate": 1.270875920507467e-06, "loss": 0.6924, "step": 10305 }, { "epoch": 0.42, "learning_rate": 1.2702554511272894e-06, "loss": 0.631, "step": 10310 }, { "epoch": 0.42, "learning_rate": 1.2696348694853476e-06, "loss": 0.6702, "step": 10315 }, { "epoch": 0.42, "learning_rate": 1.2690141758394259e-06, "loss": 0.6808, "step": 10320 }, { "epoch": 0.42, "learning_rate": 1.2683933704473552e-06, "loss": 0.7162, "step": 10325 }, { "epoch": 0.42, "learning_rate": 1.2677724535670126e-06, "loss": 0.6696, "step": 10330 }, { "epoch": 0.42, "learning_rate": 1.267151425456321e-06, "loss": 0.6856, "step": 10335 }, { "epoch": 0.42, "learning_rate": 1.2665302863732508e-06, "loss": 0.6957, "step": 10340 }, { "epoch": 0.42, "learning_rate": 1.2659090365758174e-06, "loss": 0.6934, "step": 10345 }, { "epoch": 0.42, "learning_rate": 1.2652876763220828e-06, "loss": 0.6666, "step": 10350 }, { "epoch": 0.42, "learning_rate": 1.2646662058701547e-06, "loss": 0.7049, "step": 10355 }, { "epoch": 0.42, "learning_rate": 1.2640446254781855e-06, "loss": 0.685, "step": 10360 }, { "epoch": 0.42, "learning_rate": 1.263422935404376e-06, "loss": 0.6696, "step": 10365 }, { "epoch": 0.42, "learning_rate": 1.2628011359069694e-06, "loss": 0.6905, "step": 10370 }, { "epoch": 0.42, "learning_rate": 1.2621792272442569e-06, "loss": 0.7034, "step": 10375 }, { "epoch": 0.42, "learning_rate": 1.2615572096745732e-06, "loss": 0.6746, "step": 10380 }, { "epoch": 0.42, "learning_rate": 1.2609350834562992e-06, "loss": 0.6676, "step": 10385 }, { "epoch": 0.42, "learning_rate": 1.2603128488478605e-06, "loss": 0.6716, "step": 10390 }, { "epoch": 0.42, "learning_rate": 1.2596905061077283e-06, "loss": 0.6554, "step": 10395 }, { "epoch": 0.42, "learning_rate": 1.2590680554944184e-06, "loss": 0.6564, "step": 10400 }, { "epoch": 0.42, "eval_loss": 0.6432496309280396, "eval_runtime": 144.6412, "eval_samples_per_second": 16.358, "eval_steps_per_second": 2.731, "step": 10400 }, { "epoch": 0.42, "learning_rate": 1.2584454972664912e-06, "loss": 0.6727, "step": 10405 }, { "epoch": 0.42, "learning_rate": 1.257822831682552e-06, "loss": 0.6545, "step": 10410 }, { "epoch": 0.42, "learning_rate": 1.2572000590012505e-06, "loss": 0.6794, "step": 10415 }, { "epoch": 0.42, "learning_rate": 1.2565771794812812e-06, "loss": 0.682, "step": 10420 }, { "epoch": 0.42, "learning_rate": 1.255954193381383e-06, "loss": 0.6566, "step": 10425 }, { "epoch": 0.42, "learning_rate": 1.2553311009603389e-06, "loss": 0.6396, "step": 10430 }, { "epoch": 0.42, "learning_rate": 1.2547079024769756e-06, "loss": 0.6685, "step": 10435 }, { "epoch": 0.42, "learning_rate": 1.2540845981901648e-06, "loss": 0.6409, "step": 10440 }, { "epoch": 0.42, "learning_rate": 1.2534611883588213e-06, "loss": 0.6928, "step": 10445 }, { "epoch": 0.42, "learning_rate": 1.2528376732419048e-06, "loss": 0.7162, "step": 10450 }, { "epoch": 0.42, "learning_rate": 1.2522140530984173e-06, "loss": 0.6575, "step": 10455 }, { "epoch": 0.42, "learning_rate": 1.2515903281874057e-06, "loss": 0.7134, "step": 10460 }, { "epoch": 0.43, "learning_rate": 1.2509664987679599e-06, "loss": 0.6521, "step": 10465 }, { "epoch": 0.43, "learning_rate": 1.2503425650992124e-06, "loss": 0.6655, "step": 10470 }, { "epoch": 0.43, "learning_rate": 1.2497185274403407e-06, "loss": 0.6606, "step": 10475 }, { "epoch": 0.43, "learning_rate": 1.2490943860505645e-06, "loss": 0.6419, "step": 10480 }, { "epoch": 0.43, "learning_rate": 1.2484701411891465e-06, "loss": 0.6822, "step": 10485 }, { "epoch": 0.43, "learning_rate": 1.2478457931153927e-06, "loss": 0.6705, "step": 10490 }, { "epoch": 0.43, "learning_rate": 1.2472213420886518e-06, "loss": 0.638, "step": 10495 }, { "epoch": 0.43, "learning_rate": 1.2465967883683152e-06, "loss": 0.6684, "step": 10500 }, { "epoch": 0.43, "learning_rate": 1.2459721322138173e-06, "loss": 0.668, "step": 10505 }, { "epoch": 0.43, "learning_rate": 1.245347373884635e-06, "loss": 0.6665, "step": 10510 }, { "epoch": 0.43, "learning_rate": 1.244722513640287e-06, "loss": 0.6971, "step": 10515 }, { "epoch": 0.43, "learning_rate": 1.2440975517403352e-06, "loss": 0.6807, "step": 10520 }, { "epoch": 0.43, "learning_rate": 1.243472488444383e-06, "loss": 0.6471, "step": 10525 }, { "epoch": 0.43, "learning_rate": 1.2428473240120763e-06, "loss": 0.658, "step": 10530 }, { "epoch": 0.43, "learning_rate": 1.2422220587031033e-06, "loss": 0.7214, "step": 10535 }, { "epoch": 0.43, "learning_rate": 1.2415966927771938e-06, "loss": 0.6445, "step": 10540 }, { "epoch": 0.43, "learning_rate": 1.2409712264941189e-06, "loss": 0.6899, "step": 10545 }, { "epoch": 0.43, "learning_rate": 1.240345660113692e-06, "loss": 0.7066, "step": 10550 }, { "epoch": 0.43, "learning_rate": 1.2397199938957678e-06, "loss": 0.6361, "step": 10555 }, { "epoch": 0.43, "learning_rate": 1.2390942281002435e-06, "loss": 0.6871, "step": 10560 }, { "epoch": 0.43, "learning_rate": 1.238468362987056e-06, "loss": 0.6544, "step": 10565 }, { "epoch": 0.43, "learning_rate": 1.237842398816184e-06, "loss": 0.6198, "step": 10570 }, { "epoch": 0.43, "learning_rate": 1.237216335847648e-06, "loss": 0.6907, "step": 10575 }, { "epoch": 0.43, "learning_rate": 1.236590174341509e-06, "loss": 0.6411, "step": 10580 }, { "epoch": 0.43, "learning_rate": 1.2359639145578693e-06, "loss": 0.6724, "step": 10585 }, { "epoch": 0.43, "learning_rate": 1.2353375567568715e-06, "loss": 0.6606, "step": 10590 }, { "epoch": 0.43, "learning_rate": 1.234711101198699e-06, "loss": 0.6745, "step": 10595 }, { "epoch": 0.43, "learning_rate": 1.2340845481435764e-06, "loss": 0.6417, "step": 10600 }, { "epoch": 0.43, "learning_rate": 1.233457897851768e-06, "loss": 0.6824, "step": 10605 }, { "epoch": 0.43, "learning_rate": 1.2328311505835794e-06, "loss": 0.6726, "step": 10610 }, { "epoch": 0.43, "learning_rate": 1.2322043065993554e-06, "loss": 0.692, "step": 10615 }, { "epoch": 0.43, "learning_rate": 1.2315773661594817e-06, "loss": 0.7019, "step": 10620 }, { "epoch": 0.43, "learning_rate": 1.2309503295243844e-06, "loss": 0.6775, "step": 10625 }, { "epoch": 0.43, "learning_rate": 1.230323196954528e-06, "loss": 0.6774, "step": 10630 }, { "epoch": 0.43, "learning_rate": 1.229695968710419e-06, "loss": 0.7227, "step": 10635 }, { "epoch": 0.43, "learning_rate": 1.2290686450526021e-06, "loss": 0.6676, "step": 10640 }, { "epoch": 0.43, "learning_rate": 1.2284412262416621e-06, "loss": 0.6781, "step": 10645 }, { "epoch": 0.43, "learning_rate": 1.2278137125382235e-06, "loss": 0.666, "step": 10650 }, { "epoch": 0.43, "learning_rate": 1.2271861042029496e-06, "loss": 0.6926, "step": 10655 }, { "epoch": 0.43, "learning_rate": 1.2265584014965439e-06, "loss": 0.6458, "step": 10660 }, { "epoch": 0.43, "learning_rate": 1.2259306046797486e-06, "loss": 0.6518, "step": 10665 }, { "epoch": 0.43, "learning_rate": 1.2253027140133444e-06, "loss": 0.6911, "step": 10670 }, { "epoch": 0.43, "learning_rate": 1.2246747297581527e-06, "loss": 0.6738, "step": 10675 }, { "epoch": 0.43, "learning_rate": 1.2240466521750318e-06, "loss": 0.6601, "step": 10680 }, { "epoch": 0.43, "learning_rate": 1.22341848152488e-06, "loss": 0.6561, "step": 10685 }, { "epoch": 0.43, "learning_rate": 1.222790218068634e-06, "loss": 0.6899, "step": 10690 }, { "epoch": 0.43, "learning_rate": 1.2221618620672689e-06, "loss": 0.6685, "step": 10695 }, { "epoch": 0.43, "learning_rate": 1.2215334137817985e-06, "loss": 0.6926, "step": 10700 }, { "epoch": 0.43, "learning_rate": 1.2209048734732742e-06, "loss": 0.6241, "step": 10705 }, { "epoch": 0.44, "learning_rate": 1.2202762414027865e-06, "loss": 0.6556, "step": 10710 }, { "epoch": 0.44, "learning_rate": 1.219647517831464e-06, "loss": 0.6612, "step": 10715 }, { "epoch": 0.44, "learning_rate": 1.2190187030204727e-06, "loss": 0.6931, "step": 10720 }, { "epoch": 0.44, "learning_rate": 1.2183897972310168e-06, "loss": 0.6806, "step": 10725 }, { "epoch": 0.44, "learning_rate": 1.2177608007243384e-06, "loss": 0.7048, "step": 10730 }, { "epoch": 0.44, "learning_rate": 1.2171317137617172e-06, "loss": 0.6722, "step": 10735 }, { "epoch": 0.44, "learning_rate": 1.2165025366044698e-06, "loss": 0.7012, "step": 10740 }, { "epoch": 0.44, "learning_rate": 1.2158732695139523e-06, "loss": 0.6476, "step": 10745 }, { "epoch": 0.44, "learning_rate": 1.2152439127515557e-06, "loss": 0.661, "step": 10750 }, { "epoch": 0.44, "learning_rate": 1.2146144665787094e-06, "loss": 0.6862, "step": 10755 }, { "epoch": 0.44, "learning_rate": 1.2139849312568803e-06, "loss": 0.6949, "step": 10760 }, { "epoch": 0.44, "learning_rate": 1.2133553070475714e-06, "loss": 0.6565, "step": 10765 }, { "epoch": 0.44, "learning_rate": 1.2127255942123236e-06, "loss": 0.6803, "step": 10770 }, { "epoch": 0.44, "learning_rate": 1.2120957930127139e-06, "loss": 0.6909, "step": 10775 }, { "epoch": 0.44, "learning_rate": 1.211465903710356e-06, "loss": 0.6885, "step": 10780 }, { "epoch": 0.44, "learning_rate": 1.210835926566901e-06, "loss": 0.6837, "step": 10785 }, { "epoch": 0.44, "learning_rate": 1.2102058618440354e-06, "loss": 0.676, "step": 10790 }, { "epoch": 0.44, "learning_rate": 1.2095757098034829e-06, "loss": 0.6624, "step": 10795 }, { "epoch": 0.44, "learning_rate": 1.2089454707070033e-06, "loss": 0.6218, "step": 10800 }, { "epoch": 0.44, "learning_rate": 1.208315144816392e-06, "loss": 0.6528, "step": 10805 }, { "epoch": 0.44, "learning_rate": 1.2076847323934813e-06, "loss": 0.6933, "step": 10810 }, { "epoch": 0.44, "learning_rate": 1.2070542337001385e-06, "loss": 0.6646, "step": 10815 }, { "epoch": 0.44, "learning_rate": 1.206423648998268e-06, "loss": 0.6935, "step": 10820 }, { "epoch": 0.44, "learning_rate": 1.205792978549809e-06, "loss": 0.6853, "step": 10825 }, { "epoch": 0.44, "learning_rate": 1.2051622226167362e-06, "loss": 0.6648, "step": 10830 }, { "epoch": 0.44, "learning_rate": 1.2045313814610602e-06, "loss": 0.6587, "step": 10835 }, { "epoch": 0.44, "learning_rate": 1.203900455344827e-06, "loss": 0.6662, "step": 10840 }, { "epoch": 0.44, "learning_rate": 1.2032694445301182e-06, "loss": 0.6504, "step": 10845 }, { "epoch": 0.44, "learning_rate": 1.2026383492790495e-06, "loss": 0.7228, "step": 10850 }, { "epoch": 0.44, "learning_rate": 1.2020071698537727e-06, "loss": 0.6674, "step": 10855 }, { "epoch": 0.44, "learning_rate": 1.2013759065164748e-06, "loss": 0.7009, "step": 10860 }, { "epoch": 0.44, "learning_rate": 1.200744559529376e-06, "loss": 0.6588, "step": 10865 }, { "epoch": 0.44, "learning_rate": 1.2001131291547335e-06, "loss": 0.651, "step": 10870 }, { "epoch": 0.44, "learning_rate": 1.1994816156548368e-06, "loss": 0.644, "step": 10875 }, { "epoch": 0.44, "learning_rate": 1.1988500192920122e-06, "loss": 0.6669, "step": 10880 }, { "epoch": 0.44, "learning_rate": 1.1982183403286186e-06, "loss": 0.6718, "step": 10885 }, { "epoch": 0.44, "learning_rate": 1.1975865790270503e-06, "loss": 0.6594, "step": 10890 }, { "epoch": 0.44, "learning_rate": 1.1969547356497356e-06, "loss": 0.6279, "step": 10895 }, { "epoch": 0.44, "learning_rate": 1.1963228104591362e-06, "loss": 0.6866, "step": 10900 }, { "epoch": 0.44, "learning_rate": 1.1956908037177484e-06, "loss": 0.6836, "step": 10905 }, { "epoch": 0.44, "learning_rate": 1.1950587156881032e-06, "loss": 0.6323, "step": 10910 }, { "epoch": 0.44, "learning_rate": 1.1944265466327634e-06, "loss": 0.6714, "step": 10915 }, { "epoch": 0.44, "learning_rate": 1.193794296814327e-06, "loss": 0.6849, "step": 10920 }, { "epoch": 0.44, "learning_rate": 1.193161966495425e-06, "loss": 0.6654, "step": 10925 }, { "epoch": 0.44, "learning_rate": 1.1925295559387221e-06, "loss": 0.7011, "step": 10930 }, { "epoch": 0.44, "learning_rate": 1.191897065406916e-06, "loss": 0.6452, "step": 10935 }, { "epoch": 0.44, "learning_rate": 1.1912644951627375e-06, "loss": 0.6537, "step": 10940 }, { "epoch": 0.44, "learning_rate": 1.1906318454689512e-06, "loss": 0.6852, "step": 10945 }, { "epoch": 0.44, "learning_rate": 1.189999116588354e-06, "loss": 0.6316, "step": 10950 }, { "epoch": 0.44, "learning_rate": 1.189366308783776e-06, "loss": 0.6368, "step": 10955 }, { "epoch": 0.45, "learning_rate": 1.1887334223180804e-06, "loss": 0.6621, "step": 10960 }, { "epoch": 0.45, "learning_rate": 1.1881004574541625e-06, "loss": 0.6432, "step": 10965 }, { "epoch": 0.45, "learning_rate": 1.18746741445495e-06, "loss": 0.6771, "step": 10970 }, { "epoch": 0.45, "learning_rate": 1.1868342935834041e-06, "loss": 0.6967, "step": 10975 }, { "epoch": 0.45, "learning_rate": 1.1862010951025175e-06, "loss": 0.7035, "step": 10980 }, { "epoch": 0.45, "learning_rate": 1.1855678192753151e-06, "loss": 0.6717, "step": 10985 }, { "epoch": 0.45, "learning_rate": 1.1849344663648543e-06, "loss": 0.6407, "step": 10990 }, { "epoch": 0.45, "learning_rate": 1.1843010366342248e-06, "loss": 0.6802, "step": 10995 }, { "epoch": 0.45, "learning_rate": 1.1836675303465472e-06, "loss": 0.6332, "step": 11000 }, { "epoch": 0.45, "learning_rate": 1.1830339477649749e-06, "loss": 0.6221, "step": 11005 }, { "epoch": 0.45, "learning_rate": 1.1824002891526923e-06, "loss": 0.6445, "step": 11010 }, { "epoch": 0.45, "learning_rate": 1.1817665547729157e-06, "loss": 0.6818, "step": 11015 }, { "epoch": 0.45, "learning_rate": 1.181132744888893e-06, "loss": 0.6772, "step": 11020 }, { "epoch": 0.45, "learning_rate": 1.1804988597639034e-06, "loss": 0.6692, "step": 11025 }, { "epoch": 0.45, "learning_rate": 1.1798648996612572e-06, "loss": 0.6727, "step": 11030 }, { "epoch": 0.45, "learning_rate": 1.1792308648442958e-06, "loss": 0.7129, "step": 11035 }, { "epoch": 0.45, "learning_rate": 1.1785967555763915e-06, "loss": 0.636, "step": 11040 }, { "epoch": 0.45, "learning_rate": 1.1779625721209484e-06, "loss": 0.6769, "step": 11045 }, { "epoch": 0.45, "learning_rate": 1.1773283147414007e-06, "loss": 0.663, "step": 11050 }, { "epoch": 0.45, "learning_rate": 1.1766939837012128e-06, "loss": 0.6988, "step": 11055 }, { "epoch": 0.45, "learning_rate": 1.1760595792638807e-06, "loss": 0.6399, "step": 11060 }, { "epoch": 0.45, "learning_rate": 1.1754251016929307e-06, "loss": 0.6709, "step": 11065 }, { "epoch": 0.45, "learning_rate": 1.174790551251919e-06, "loss": 0.661, "step": 11070 }, { "epoch": 0.45, "learning_rate": 1.1741559282044322e-06, "loss": 0.6604, "step": 11075 }, { "epoch": 0.45, "learning_rate": 1.1735212328140876e-06, "loss": 0.6601, "step": 11080 }, { "epoch": 0.45, "learning_rate": 1.1728864653445314e-06, "loss": 0.6732, "step": 11085 }, { "epoch": 0.45, "learning_rate": 1.1722516260594412e-06, "loss": 0.6564, "step": 11090 }, { "epoch": 0.45, "learning_rate": 1.1716167152225237e-06, "loss": 0.6819, "step": 11095 }, { "epoch": 0.45, "learning_rate": 1.170981733097515e-06, "loss": 0.6739, "step": 11100 }, { "epoch": 0.45, "learning_rate": 1.1703466799481817e-06, "loss": 0.6658, "step": 11105 }, { "epoch": 0.45, "learning_rate": 1.1697115560383185e-06, "loss": 0.6999, "step": 11110 }, { "epoch": 0.45, "learning_rate": 1.1690763616317507e-06, "loss": 0.6797, "step": 11115 }, { "epoch": 0.45, "learning_rate": 1.168441096992333e-06, "loss": 0.6541, "step": 11120 }, { "epoch": 0.45, "learning_rate": 1.1678057623839484e-06, "loss": 0.6586, "step": 11125 }, { "epoch": 0.45, "learning_rate": 1.1671703580705094e-06, "loss": 0.6609, "step": 11130 }, { "epoch": 0.45, "learning_rate": 1.1665348843159574e-06, "loss": 0.6493, "step": 11135 }, { "epoch": 0.45, "learning_rate": 1.1658993413842624e-06, "loss": 0.6771, "step": 11140 }, { "epoch": 0.45, "learning_rate": 1.1652637295394244e-06, "loss": 0.6806, "step": 11145 }, { "epoch": 0.45, "learning_rate": 1.1646280490454696e-06, "loss": 0.6739, "step": 11150 }, { "epoch": 0.45, "learning_rate": 1.1639923001664555e-06, "loss": 0.6482, "step": 11155 }, { "epoch": 0.45, "learning_rate": 1.1633564831664656e-06, "loss": 0.6759, "step": 11160 }, { "epoch": 0.45, "learning_rate": 1.1627205983096135e-06, "loss": 0.6722, "step": 11165 }, { "epoch": 0.45, "learning_rate": 1.16208464586004e-06, "loss": 0.6531, "step": 11170 }, { "epoch": 0.45, "learning_rate": 1.1614486260819138e-06, "loss": 0.6934, "step": 11175 }, { "epoch": 0.45, "learning_rate": 1.1608125392394327e-06, "loss": 0.6637, "step": 11180 }, { "epoch": 0.45, "learning_rate": 1.1601763855968212e-06, "loss": 0.6879, "step": 11185 }, { "epoch": 0.45, "learning_rate": 1.1595401654183322e-06, "loss": 0.6499, "step": 11190 }, { "epoch": 0.45, "learning_rate": 1.1589038789682456e-06, "loss": 0.7014, "step": 11195 }, { "epoch": 0.45, "learning_rate": 1.1582675265108699e-06, "loss": 0.6136, "step": 11200 }, { "epoch": 0.45, "eval_loss": 0.6401504874229431, "eval_runtime": 144.3337, "eval_samples_per_second": 16.393, "eval_steps_per_second": 2.737, "step": 11200 }, { "epoch": 0.46, "learning_rate": 1.15763110831054e-06, "loss": 0.6646, "step": 11205 }, { "epoch": 0.46, "learning_rate": 1.1569946246316182e-06, "loss": 0.7085, "step": 11210 }, { "epoch": 0.46, "learning_rate": 1.156358075738495e-06, "loss": 0.6621, "step": 11215 }, { "epoch": 0.46, "learning_rate": 1.1557214618955868e-06, "loss": 0.6703, "step": 11220 }, { "epoch": 0.46, "learning_rate": 1.1550847833673374e-06, "loss": 0.7204, "step": 11225 }, { "epoch": 0.46, "learning_rate": 1.154448040418218e-06, "loss": 0.6923, "step": 11230 }, { "epoch": 0.46, "learning_rate": 1.1538112333127253e-06, "loss": 0.6608, "step": 11235 }, { "epoch": 0.46, "learning_rate": 1.1531743623153842e-06, "loss": 0.6824, "step": 11240 }, { "epoch": 0.46, "learning_rate": 1.1525374276907449e-06, "loss": 0.7322, "step": 11245 }, { "epoch": 0.46, "learning_rate": 1.1519004297033847e-06, "loss": 0.6432, "step": 11250 }, { "epoch": 0.46, "learning_rate": 1.1512633686179071e-06, "loss": 0.6795, "step": 11255 }, { "epoch": 0.46, "learning_rate": 1.1506262446989417e-06, "loss": 0.7229, "step": 11260 }, { "epoch": 0.46, "learning_rate": 1.149989058211144e-06, "loss": 0.6954, "step": 11265 }, { "epoch": 0.46, "learning_rate": 1.149351809419196e-06, "loss": 0.6879, "step": 11270 }, { "epoch": 0.46, "learning_rate": 1.148714498587805e-06, "loss": 0.6642, "step": 11275 }, { "epoch": 0.46, "learning_rate": 1.1480771259817048e-06, "loss": 0.7015, "step": 11280 }, { "epoch": 0.46, "learning_rate": 1.147439691865654e-06, "loss": 0.6467, "step": 11285 }, { "epoch": 0.46, "learning_rate": 1.1468021965044377e-06, "loss": 0.7045, "step": 11290 }, { "epoch": 0.46, "learning_rate": 1.1461646401628654e-06, "loss": 0.6635, "step": 11295 }, { "epoch": 0.46, "learning_rate": 1.1455270231057728e-06, "loss": 0.6943, "step": 11300 }, { "epoch": 0.46, "learning_rate": 1.14488934559802e-06, "loss": 0.6626, "step": 11305 }, { "epoch": 0.46, "learning_rate": 1.1442516079044932e-06, "loss": 0.6716, "step": 11310 }, { "epoch": 0.46, "learning_rate": 1.1436138102901031e-06, "loss": 0.6748, "step": 11315 }, { "epoch": 0.46, "learning_rate": 1.142975953019785e-06, "loss": 0.7028, "step": 11320 }, { "epoch": 0.46, "learning_rate": 1.1423380363584999e-06, "loss": 0.6044, "step": 11325 }, { "epoch": 0.46, "learning_rate": 1.1417000605712316e-06, "loss": 0.6831, "step": 11330 }, { "epoch": 0.46, "learning_rate": 1.1410620259229908e-06, "loss": 0.6632, "step": 11335 }, { "epoch": 0.46, "learning_rate": 1.1404239326788115e-06, "loss": 0.6393, "step": 11340 }, { "epoch": 0.46, "learning_rate": 1.1397857811037512e-06, "loss": 0.6501, "step": 11345 }, { "epoch": 0.46, "learning_rate": 1.1391475714628932e-06, "loss": 0.6398, "step": 11350 }, { "epoch": 0.46, "learning_rate": 1.138509304021344e-06, "loss": 0.6784, "step": 11355 }, { "epoch": 0.46, "learning_rate": 1.1378709790442346e-06, "loss": 0.7065, "step": 11360 }, { "epoch": 0.46, "learning_rate": 1.1372325967967196e-06, "loss": 0.6189, "step": 11365 }, { "epoch": 0.46, "learning_rate": 1.1365941575439772e-06, "loss": 0.6652, "step": 11370 }, { "epoch": 0.46, "learning_rate": 1.1359556615512099e-06, "loss": 0.6752, "step": 11375 }, { "epoch": 0.46, "learning_rate": 1.1353171090836427e-06, "loss": 0.6668, "step": 11380 }, { "epoch": 0.46, "learning_rate": 1.134678500406525e-06, "loss": 0.6587, "step": 11385 }, { "epoch": 0.46, "learning_rate": 1.13403983578513e-06, "loss": 0.6873, "step": 11390 }, { "epoch": 0.46, "learning_rate": 1.1334011154847527e-06, "loss": 0.6975, "step": 11395 }, { "epoch": 0.46, "learning_rate": 1.1327623397707122e-06, "loss": 0.6784, "step": 11400 }, { "epoch": 0.46, "learning_rate": 1.1321235089083502e-06, "loss": 0.6643, "step": 11405 }, { "epoch": 0.46, "learning_rate": 1.1314846231630315e-06, "loss": 0.6754, "step": 11410 }, { "epoch": 0.46, "learning_rate": 1.1308456828001441e-06, "loss": 0.6689, "step": 11415 }, { "epoch": 0.46, "learning_rate": 1.1302066880850975e-06, "loss": 0.6594, "step": 11420 }, { "epoch": 0.46, "learning_rate": 1.1295676392833253e-06, "loss": 0.6416, "step": 11425 }, { "epoch": 0.46, "learning_rate": 1.1289285366602826e-06, "loss": 0.7223, "step": 11430 }, { "epoch": 0.46, "learning_rate": 1.1282893804814468e-06, "loss": 0.6944, "step": 11435 }, { "epoch": 0.46, "learning_rate": 1.127650171012318e-06, "loss": 0.6598, "step": 11440 }, { "epoch": 0.46, "learning_rate": 1.1270109085184182e-06, "loss": 0.7102, "step": 11445 }, { "epoch": 0.47, "learning_rate": 1.1263715932652919e-06, "loss": 0.6803, "step": 11450 }, { "epoch": 0.47, "learning_rate": 1.1257322255185044e-06, "loss": 0.65, "step": 11455 }, { "epoch": 0.47, "learning_rate": 1.1250928055436443e-06, "loss": 0.7018, "step": 11460 }, { "epoch": 0.47, "learning_rate": 1.12445333360632e-06, "loss": 0.6409, "step": 11465 }, { "epoch": 0.47, "learning_rate": 1.1238138099721634e-06, "loss": 0.6724, "step": 11470 }, { "epoch": 0.47, "learning_rate": 1.1231742349068271e-06, "loss": 0.6854, "step": 11475 }, { "epoch": 0.47, "learning_rate": 1.1225346086759846e-06, "loss": 0.7102, "step": 11480 }, { "epoch": 0.47, "learning_rate": 1.1218949315453314e-06, "loss": 0.6584, "step": 11485 }, { "epoch": 0.47, "learning_rate": 1.1212552037805836e-06, "loss": 0.6584, "step": 11490 }, { "epoch": 0.47, "learning_rate": 1.1206154256474786e-06, "loss": 0.6846, "step": 11495 }, { "epoch": 0.47, "learning_rate": 1.119975597411775e-06, "loss": 0.7037, "step": 11500 }, { "epoch": 0.47, "learning_rate": 1.1193357193392512e-06, "loss": 0.6689, "step": 11505 }, { "epoch": 0.47, "learning_rate": 1.1186957916957078e-06, "loss": 0.6556, "step": 11510 }, { "epoch": 0.47, "learning_rate": 1.1180558147469642e-06, "loss": 0.6557, "step": 11515 }, { "epoch": 0.47, "learning_rate": 1.1174157887588623e-06, "loss": 0.6662, "step": 11520 }, { "epoch": 0.47, "learning_rate": 1.1167757139972626e-06, "loss": 0.6702, "step": 11525 }, { "epoch": 0.47, "learning_rate": 1.116135590728047e-06, "loss": 0.6682, "step": 11530 }, { "epoch": 0.47, "learning_rate": 1.115495419217117e-06, "loss": 0.6855, "step": 11535 }, { "epoch": 0.47, "learning_rate": 1.114855199730394e-06, "loss": 0.6796, "step": 11540 }, { "epoch": 0.47, "learning_rate": 1.1142149325338199e-06, "loss": 0.6481, "step": 11545 }, { "epoch": 0.47, "learning_rate": 1.1135746178933563e-06, "loss": 0.7167, "step": 11550 }, { "epoch": 0.47, "learning_rate": 1.112934256074984e-06, "loss": 0.6602, "step": 11555 }, { "epoch": 0.47, "learning_rate": 1.1122938473447038e-06, "loss": 0.6848, "step": 11560 }, { "epoch": 0.47, "learning_rate": 1.1116533919685361e-06, "loss": 0.6628, "step": 11565 }, { "epoch": 0.47, "learning_rate": 1.1110128902125201e-06, "loss": 0.6826, "step": 11570 }, { "epoch": 0.47, "learning_rate": 1.1103723423427153e-06, "loss": 0.6527, "step": 11575 }, { "epoch": 0.47, "learning_rate": 1.1097317486251992e-06, "loss": 0.6737, "step": 11580 }, { "epoch": 0.47, "learning_rate": 1.109091109326069e-06, "loss": 0.6854, "step": 11585 }, { "epoch": 0.47, "learning_rate": 1.1084504247114406e-06, "loss": 0.7145, "step": 11590 }, { "epoch": 0.47, "learning_rate": 1.107809695047449e-06, "loss": 0.6756, "step": 11595 }, { "epoch": 0.47, "learning_rate": 1.1071689206002474e-06, "loss": 0.6725, "step": 11600 }, { "epoch": 0.47, "learning_rate": 1.1065281016360083e-06, "loss": 0.7145, "step": 11605 }, { "epoch": 0.47, "learning_rate": 1.1058872384209224e-06, "loss": 0.6899, "step": 11610 }, { "epoch": 0.47, "learning_rate": 1.1052463312211983e-06, "loss": 0.6344, "step": 11615 }, { "epoch": 0.47, "learning_rate": 1.1046053803030637e-06, "loss": 0.654, "step": 11620 }, { "epoch": 0.47, "learning_rate": 1.1039643859327635e-06, "loss": 0.6741, "step": 11625 }, { "epoch": 0.47, "learning_rate": 1.1033233483765615e-06, "loss": 0.6814, "step": 11630 }, { "epoch": 0.47, "learning_rate": 1.1026822679007395e-06, "loss": 0.6565, "step": 11635 }, { "epoch": 0.47, "learning_rate": 1.1020411447715961e-06, "loss": 0.6916, "step": 11640 }, { "epoch": 0.47, "learning_rate": 1.1013999792554486e-06, "loss": 0.6894, "step": 11645 }, { "epoch": 0.47, "learning_rate": 1.1007587716186317e-06, "loss": 0.6698, "step": 11650 }, { "epoch": 0.47, "learning_rate": 1.1001175221274968e-06, "loss": 0.7096, "step": 11655 }, { "epoch": 0.47, "learning_rate": 1.0994762310484142e-06, "loss": 0.6887, "step": 11660 }, { "epoch": 0.47, "learning_rate": 1.0988348986477705e-06, "loss": 0.671, "step": 11665 }, { "epoch": 0.47, "learning_rate": 1.0981935251919693e-06, "loss": 0.6727, "step": 11670 }, { "epoch": 0.47, "learning_rate": 1.0975521109474318e-06, "loss": 0.6777, "step": 11675 }, { "epoch": 0.47, "learning_rate": 1.0969106561805952e-06, "loss": 0.6661, "step": 11680 }, { "epoch": 0.47, "learning_rate": 1.0962691611579154e-06, "loss": 0.6576, "step": 11685 }, { "epoch": 0.47, "learning_rate": 1.0956276261458629e-06, "loss": 0.6415, "step": 11690 }, { "epoch": 0.48, "learning_rate": 1.0949860514109264e-06, "loss": 0.6485, "step": 11695 }, { "epoch": 0.48, "learning_rate": 1.09434443721961e-06, "loss": 0.6966, "step": 11700 }, { "epoch": 0.48, "learning_rate": 1.0937027838384345e-06, "loss": 0.6746, "step": 11705 }, { "epoch": 0.48, "learning_rate": 1.093061091533938e-06, "loss": 0.6565, "step": 11710 }, { "epoch": 0.48, "learning_rate": 1.0924193605726733e-06, "loss": 0.7127, "step": 11715 }, { "epoch": 0.48, "learning_rate": 1.0917775912212099e-06, "loss": 0.6738, "step": 11720 }, { "epoch": 0.48, "learning_rate": 1.0911357837461332e-06, "loss": 0.6712, "step": 11725 }, { "epoch": 0.48, "learning_rate": 1.0904939384140445e-06, "loss": 0.6617, "step": 11730 }, { "epoch": 0.48, "learning_rate": 1.0898520554915607e-06, "loss": 0.6919, "step": 11735 }, { "epoch": 0.48, "learning_rate": 1.0892101352453142e-06, "loss": 0.6731, "step": 11740 }, { "epoch": 0.48, "learning_rate": 1.0885681779419537e-06, "loss": 0.6931, "step": 11745 }, { "epoch": 0.48, "learning_rate": 1.0879261838481426e-06, "loss": 0.631, "step": 11750 }, { "epoch": 0.48, "learning_rate": 1.0872841532305587e-06, "loss": 0.6515, "step": 11755 }, { "epoch": 0.48, "learning_rate": 1.0866420863558969e-06, "loss": 0.6674, "step": 11760 }, { "epoch": 0.48, "learning_rate": 1.0859999834908657e-06, "loss": 0.6514, "step": 11765 }, { "epoch": 0.48, "learning_rate": 1.0853578449021896e-06, "loss": 0.6393, "step": 11770 }, { "epoch": 0.48, "learning_rate": 1.084715670856607e-06, "loss": 0.6941, "step": 11775 }, { "epoch": 0.48, "learning_rate": 1.0840734616208712e-06, "loss": 0.664, "step": 11780 }, { "epoch": 0.48, "learning_rate": 1.0834312174617508e-06, "loss": 0.6636, "step": 11785 }, { "epoch": 0.48, "learning_rate": 1.0827889386460281e-06, "loss": 0.6756, "step": 11790 }, { "epoch": 0.48, "learning_rate": 1.0821466254405004e-06, "loss": 0.7116, "step": 11795 }, { "epoch": 0.48, "learning_rate": 1.0815042781119788e-06, "loss": 0.6647, "step": 11800 }, { "epoch": 0.48, "learning_rate": 1.0808618969272888e-06, "loss": 0.6404, "step": 11805 }, { "epoch": 0.48, "learning_rate": 1.0802194821532702e-06, "loss": 0.6711, "step": 11810 }, { "epoch": 0.48, "learning_rate": 1.079577034056776e-06, "loss": 0.6717, "step": 11815 }, { "epoch": 0.48, "learning_rate": 1.078934552904674e-06, "loss": 0.6705, "step": 11820 }, { "epoch": 0.48, "learning_rate": 1.0782920389638452e-06, "loss": 0.6713, "step": 11825 }, { "epoch": 0.48, "learning_rate": 1.0776494925011846e-06, "loss": 0.6474, "step": 11830 }, { "epoch": 0.48, "learning_rate": 1.0770069137836e-06, "loss": 0.6745, "step": 11835 }, { "epoch": 0.48, "learning_rate": 1.0763643030780126e-06, "loss": 0.6773, "step": 11840 }, { "epoch": 0.48, "learning_rate": 1.075721660651358e-06, "loss": 0.6767, "step": 11845 }, { "epoch": 0.48, "learning_rate": 1.0750789867705843e-06, "loss": 0.6758, "step": 11850 }, { "epoch": 0.48, "learning_rate": 1.0744362817026524e-06, "loss": 0.686, "step": 11855 }, { "epoch": 0.48, "learning_rate": 1.0737935457145364e-06, "loss": 0.6736, "step": 11860 }, { "epoch": 0.48, "learning_rate": 1.073150779073223e-06, "loss": 0.668, "step": 11865 }, { "epoch": 0.48, "learning_rate": 1.0725079820457123e-06, "loss": 0.7003, "step": 11870 }, { "epoch": 0.48, "learning_rate": 1.0718651548990163e-06, "loss": 0.682, "step": 11875 }, { "epoch": 0.48, "learning_rate": 1.0712222979001602e-06, "loss": 0.6445, "step": 11880 }, { "epoch": 0.48, "learning_rate": 1.0705794113161808e-06, "loss": 0.6872, "step": 11885 }, { "epoch": 0.48, "learning_rate": 1.0699364954141276e-06, "loss": 0.6936, "step": 11890 }, { "epoch": 0.48, "learning_rate": 1.0692935504610625e-06, "loss": 0.6195, "step": 11895 }, { "epoch": 0.48, "learning_rate": 1.068650576724059e-06, "loss": 0.6658, "step": 11900 }, { "epoch": 0.48, "learning_rate": 1.0680075744702034e-06, "loss": 0.6799, "step": 11905 }, { "epoch": 0.48, "learning_rate": 1.0673645439665925e-06, "loss": 0.6823, "step": 11910 }, { "epoch": 0.48, "learning_rate": 1.0667214854803357e-06, "loss": 0.6677, "step": 11915 }, { "epoch": 0.48, "learning_rate": 1.0660783992785541e-06, "loss": 0.6648, "step": 11920 }, { "epoch": 0.48, "learning_rate": 1.06543528562838e-06, "loss": 0.6313, "step": 11925 }, { "epoch": 0.48, "learning_rate": 1.0647921447969577e-06, "loss": 0.667, "step": 11930 }, { "epoch": 0.48, "learning_rate": 1.0641489770514418e-06, "loss": 0.6567, "step": 11935 }, { "epoch": 0.48, "learning_rate": 1.0635057826589987e-06, "loss": 0.6727, "step": 11940 }, { "epoch": 0.49, "learning_rate": 1.0628625618868056e-06, "loss": 0.6835, "step": 11945 }, { "epoch": 0.49, "learning_rate": 1.062219315002051e-06, "loss": 0.6329, "step": 11950 }, { "epoch": 0.49, "learning_rate": 1.061576042271934e-06, "loss": 0.6823, "step": 11955 }, { "epoch": 0.49, "learning_rate": 1.0609327439636647e-06, "loss": 0.6514, "step": 11960 }, { "epoch": 0.49, "learning_rate": 1.0602894203444633e-06, "loss": 0.716, "step": 11965 }, { "epoch": 0.49, "learning_rate": 1.0596460716815612e-06, "loss": 0.6778, "step": 11970 }, { "epoch": 0.49, "learning_rate": 1.059002698242199e-06, "loss": 0.6671, "step": 11975 }, { "epoch": 0.49, "learning_rate": 1.0583593002936298e-06, "loss": 0.6936, "step": 11980 }, { "epoch": 0.49, "learning_rate": 1.0577158781031147e-06, "loss": 0.7211, "step": 11985 }, { "epoch": 0.49, "learning_rate": 1.0570724319379254e-06, "loss": 0.6667, "step": 11990 }, { "epoch": 0.49, "learning_rate": 1.0564289620653446e-06, "loss": 0.7181, "step": 11995 }, { "epoch": 0.49, "learning_rate": 1.0557854687526632e-06, "loss": 0.6431, "step": 12000 }, { "epoch": 0.49, "eval_loss": 0.6379530429840088, "eval_runtime": 144.8913, "eval_samples_per_second": 16.329, "eval_steps_per_second": 2.726, "step": 12000 }, { "epoch": 0.49, "learning_rate": 1.0551419522671834e-06, "loss": 0.6469, "step": 12005 }, { "epoch": 0.49, "learning_rate": 1.0544984128762164e-06, "loss": 0.6672, "step": 12010 }, { "epoch": 0.49, "learning_rate": 1.0538548508470824e-06, "loss": 0.6599, "step": 12015 }, { "epoch": 0.49, "learning_rate": 1.053211266447112e-06, "loss": 0.6737, "step": 12020 }, { "epoch": 0.49, "learning_rate": 1.052567659943644e-06, "loss": 0.6275, "step": 12025 }, { "epoch": 0.49, "learning_rate": 1.0519240316040269e-06, "loss": 0.6603, "step": 12030 }, { "epoch": 0.49, "learning_rate": 1.0512803816956191e-06, "loss": 0.6718, "step": 12035 }, { "epoch": 0.49, "learning_rate": 1.0506367104857864e-06, "loss": 0.7219, "step": 12040 }, { "epoch": 0.49, "learning_rate": 1.049993018241905e-06, "loss": 0.6921, "step": 12045 }, { "epoch": 0.49, "learning_rate": 1.0493493052313582e-06, "loss": 0.6371, "step": 12050 }, { "epoch": 0.49, "learning_rate": 1.048705571721539e-06, "loss": 0.6545, "step": 12055 }, { "epoch": 0.49, "learning_rate": 1.0480618179798493e-06, "loss": 0.6818, "step": 12060 }, { "epoch": 0.49, "learning_rate": 1.047418044273698e-06, "loss": 0.6516, "step": 12065 }, { "epoch": 0.49, "learning_rate": 1.0467742508705039e-06, "loss": 0.649, "step": 12070 }, { "epoch": 0.49, "learning_rate": 1.0461304380376924e-06, "loss": 0.6821, "step": 12075 }, { "epoch": 0.49, "learning_rate": 1.0454866060426986e-06, "loss": 0.6675, "step": 12080 }, { "epoch": 0.49, "learning_rate": 1.0448427551529635e-06, "loss": 0.6436, "step": 12085 }, { "epoch": 0.49, "learning_rate": 1.0441988856359385e-06, "loss": 0.695, "step": 12090 }, { "epoch": 0.49, "learning_rate": 1.0435549977590806e-06, "loss": 0.662, "step": 12095 }, { "epoch": 0.49, "learning_rate": 1.0429110917898552e-06, "loss": 0.6499, "step": 12100 }, { "epoch": 0.49, "learning_rate": 1.0422671679957357e-06, "loss": 0.6987, "step": 12105 }, { "epoch": 0.49, "learning_rate": 1.0416232266442017e-06, "loss": 0.7055, "step": 12110 }, { "epoch": 0.49, "learning_rate": 1.0409792680027419e-06, "loss": 0.6261, "step": 12115 }, { "epoch": 0.49, "learning_rate": 1.0403352923388504e-06, "loss": 0.6435, "step": 12120 }, { "epoch": 0.49, "learning_rate": 1.039691299920029e-06, "loss": 0.6984, "step": 12125 }, { "epoch": 0.49, "learning_rate": 1.039047291013787e-06, "loss": 0.6501, "step": 12130 }, { "epoch": 0.49, "learning_rate": 1.0384032658876397e-06, "loss": 0.6991, "step": 12135 }, { "epoch": 0.49, "learning_rate": 1.03775922480911e-06, "loss": 0.6751, "step": 12140 }, { "epoch": 0.49, "learning_rate": 1.0371151680457266e-06, "loss": 0.649, "step": 12145 }, { "epoch": 0.49, "learning_rate": 1.0364710958650252e-06, "loss": 0.6668, "step": 12150 }, { "epoch": 0.49, "learning_rate": 1.0358270085345475e-06, "loss": 0.6789, "step": 12155 }, { "epoch": 0.49, "learning_rate": 1.0351829063218423e-06, "loss": 0.6708, "step": 12160 }, { "epoch": 0.49, "learning_rate": 1.0345387894944635e-06, "loss": 0.6833, "step": 12165 }, { "epoch": 0.49, "learning_rate": 1.033894658319972e-06, "loss": 0.6652, "step": 12170 }, { "epoch": 0.49, "learning_rate": 1.0332505130659344e-06, "loss": 0.7033, "step": 12175 }, { "epoch": 0.49, "learning_rate": 1.0326063539999228e-06, "loss": 0.6458, "step": 12180 }, { "epoch": 0.49, "learning_rate": 1.031962181389515e-06, "loss": 0.702, "step": 12185 }, { "epoch": 0.5, "learning_rate": 1.0313179955022951e-06, "loss": 0.6382, "step": 12190 }, { "epoch": 0.5, "learning_rate": 1.0306737966058526e-06, "loss": 0.6341, "step": 12195 }, { "epoch": 0.5, "learning_rate": 1.0300295849677811e-06, "loss": 0.6793, "step": 12200 }, { "epoch": 0.5, "learning_rate": 1.0293853608556817e-06, "loss": 0.6535, "step": 12205 }, { "epoch": 0.5, "learning_rate": 1.028741124537159e-06, "loss": 0.661, "step": 12210 }, { "epoch": 0.5, "learning_rate": 1.0280968762798227e-06, "loss": 0.642, "step": 12215 }, { "epoch": 0.5, "learning_rate": 1.0274526163512885e-06, "loss": 0.6661, "step": 12220 }, { "epoch": 0.5, "learning_rate": 1.0268083450191761e-06, "loss": 0.6825, "step": 12225 }, { "epoch": 0.5, "learning_rate": 1.0261640625511106e-06, "loss": 0.6685, "step": 12230 }, { "epoch": 0.5, "learning_rate": 1.0255197692147207e-06, "loss": 0.6488, "step": 12235 }, { "epoch": 0.5, "learning_rate": 1.024875465277641e-06, "loss": 0.646, "step": 12240 }, { "epoch": 0.5, "learning_rate": 1.024231151007509e-06, "loss": 0.6769, "step": 12245 }, { "epoch": 0.5, "learning_rate": 1.0235868266719679e-06, "loss": 0.6548, "step": 12250 }, { "epoch": 0.5, "learning_rate": 1.0229424925386638e-06, "loss": 0.6572, "step": 12255 }, { "epoch": 0.5, "learning_rate": 1.022298148875248e-06, "loss": 0.6832, "step": 12260 }, { "epoch": 0.5, "learning_rate": 1.0216537959493752e-06, "loss": 0.7031, "step": 12265 }, { "epoch": 0.5, "learning_rate": 1.0210094340287036e-06, "loss": 0.6427, "step": 12270 }, { "epoch": 0.5, "learning_rate": 1.0203650633808957e-06, "loss": 0.6727, "step": 12275 }, { "epoch": 0.5, "learning_rate": 1.0197206842736181e-06, "loss": 0.6859, "step": 12280 }, { "epoch": 0.5, "learning_rate": 1.0190762969745395e-06, "loss": 0.6617, "step": 12285 }, { "epoch": 0.5, "learning_rate": 1.018431901751333e-06, "loss": 0.6733, "step": 12290 }, { "epoch": 0.5, "learning_rate": 1.0177874988716746e-06, "loss": 0.6554, "step": 12295 }, { "epoch": 0.5, "learning_rate": 1.017143088603244e-06, "loss": 0.6412, "step": 12300 }, { "epoch": 0.5, "learning_rate": 1.0164986712137239e-06, "loss": 0.6979, "step": 12305 }, { "epoch": 0.5, "learning_rate": 1.0158542469707984e-06, "loss": 0.6387, "step": 12310 }, { "epoch": 0.5, "learning_rate": 1.0152098161421574e-06, "loss": 0.6824, "step": 12315 }, { "epoch": 0.5, "learning_rate": 1.0145653789954907e-06, "loss": 0.6187, "step": 12320 }, { "epoch": 0.5, "learning_rate": 1.0139209357984922e-06, "loss": 0.6574, "step": 12325 }, { "epoch": 0.5, "learning_rate": 1.0132764868188582e-06, "loss": 0.6943, "step": 12330 }, { "epoch": 0.5, "learning_rate": 1.0126320323242868e-06, "loss": 0.688, "step": 12335 }, { "epoch": 0.5, "learning_rate": 1.0119875725824792e-06, "loss": 0.7113, "step": 12340 }, { "epoch": 0.5, "learning_rate": 1.0113431078611381e-06, "loss": 0.6502, "step": 12345 }, { "epoch": 0.5, "learning_rate": 1.0106986384279685e-06, "loss": 0.6479, "step": 12350 }, { "epoch": 0.5, "learning_rate": 1.010054164550678e-06, "loss": 0.6594, "step": 12355 }, { "epoch": 0.5, "learning_rate": 1.0094096864969744e-06, "loss": 0.6451, "step": 12360 }, { "epoch": 0.5, "learning_rate": 1.0087652045345694e-06, "loss": 0.6453, "step": 12365 }, { "epoch": 0.5, "learning_rate": 1.0081207189311741e-06, "loss": 0.6711, "step": 12370 }, { "epoch": 0.5, "learning_rate": 1.0074762299545034e-06, "loss": 0.6659, "step": 12375 }, { "epoch": 0.5, "learning_rate": 1.0068317378722712e-06, "loss": 0.6456, "step": 12380 }, { "epoch": 0.5, "learning_rate": 1.006187242952195e-06, "loss": 0.6592, "step": 12385 }, { "epoch": 0.5, "learning_rate": 1.0055427454619916e-06, "loss": 0.6596, "step": 12390 }, { "epoch": 0.5, "learning_rate": 1.00489824566938e-06, "loss": 0.6679, "step": 12395 }, { "epoch": 0.5, "learning_rate": 1.00425374384208e-06, "loss": 0.6507, "step": 12400 }, { "epoch": 0.5, "learning_rate": 1.0036092402478114e-06, "loss": 0.6902, "step": 12405 }, { "epoch": 0.5, "learning_rate": 1.0029647351542958e-06, "loss": 0.6355, "step": 12410 }, { "epoch": 0.5, "learning_rate": 1.0023202288292552e-06, "loss": 0.6847, "step": 12415 }, { "epoch": 0.5, "learning_rate": 1.0016757215404117e-06, "loss": 0.6544, "step": 12420 }, { "epoch": 0.5, "learning_rate": 1.001031213555488e-06, "loss": 0.6403, "step": 12425 }, { "epoch": 0.5, "learning_rate": 1.000386705142207e-06, "loss": 0.6718, "step": 12430 }, { "epoch": 0.51, "learning_rate": 9.997421965682923e-07, "loss": 0.6819, "step": 12435 }, { "epoch": 0.51, "learning_rate": 9.99097688101467e-07, "loss": 0.6521, "step": 12440 }, { "epoch": 0.51, "learning_rate": 9.984531800094538e-07, "loss": 0.6456, "step": 12445 }, { "epoch": 0.51, "learning_rate": 9.978086725599764e-07, "loss": 0.6862, "step": 12450 }, { "epoch": 0.51, "learning_rate": 9.971641660207574e-07, "loss": 0.675, "step": 12455 }, { "epoch": 0.51, "learning_rate": 9.965196606595192e-07, "loss": 0.6853, "step": 12460 }, { "epoch": 0.51, "learning_rate": 9.958751567439835e-07, "loss": 0.7034, "step": 12465 }, { "epoch": 0.51, "learning_rate": 9.95230654541872e-07, "loss": 0.6665, "step": 12470 }, { "epoch": 0.51, "learning_rate": 9.94586154320905e-07, "loss": 0.7126, "step": 12475 }, { "epoch": 0.51, "learning_rate": 9.939416563488025e-07, "loss": 0.6668, "step": 12480 }, { "epoch": 0.51, "learning_rate": 9.932971608932832e-07, "loss": 0.7099, "step": 12485 }, { "epoch": 0.51, "learning_rate": 9.926526682220652e-07, "loss": 0.6525, "step": 12490 }, { "epoch": 0.51, "learning_rate": 9.920081786028647e-07, "loss": 0.6092, "step": 12495 }, { "epoch": 0.51, "learning_rate": 9.913636923033974e-07, "loss": 0.6663, "step": 12500 }, { "epoch": 0.51, "learning_rate": 9.907192095913772e-07, "loss": 0.6514, "step": 12505 }, { "epoch": 0.51, "learning_rate": 9.900747307345166e-07, "loss": 0.676, "step": 12510 }, { "epoch": 0.51, "learning_rate": 9.894302560005265e-07, "loss": 0.6726, "step": 12515 }, { "epoch": 0.51, "learning_rate": 9.88785785657116e-07, "loss": 0.6882, "step": 12520 }, { "epoch": 0.51, "learning_rate": 9.88141319971993e-07, "loss": 0.6961, "step": 12525 }, { "epoch": 0.51, "learning_rate": 9.874968592128624e-07, "loss": 0.6772, "step": 12530 }, { "epoch": 0.51, "learning_rate": 9.868524036474276e-07, "loss": 0.6945, "step": 12535 }, { "epoch": 0.51, "learning_rate": 9.862079535433902e-07, "loss": 0.6479, "step": 12540 }, { "epoch": 0.51, "learning_rate": 9.855635091684488e-07, "loss": 0.6826, "step": 12545 }, { "epoch": 0.51, "learning_rate": 9.849190707903004e-07, "loss": 0.6593, "step": 12550 }, { "epoch": 0.51, "learning_rate": 9.842746386766385e-07, "loss": 0.6512, "step": 12555 }, { "epoch": 0.51, "learning_rate": 9.836302130951548e-07, "loss": 0.6938, "step": 12560 }, { "epoch": 0.51, "learning_rate": 9.829857943135386e-07, "loss": 0.6384, "step": 12565 }, { "epoch": 0.51, "learning_rate": 9.823413825994754e-07, "loss": 0.7146, "step": 12570 }, { "epoch": 0.51, "learning_rate": 9.816969782206486e-07, "loss": 0.6924, "step": 12575 }, { "epoch": 0.51, "learning_rate": 9.810525814447372e-07, "loss": 0.6643, "step": 12580 }, { "epoch": 0.51, "learning_rate": 9.80408192539419e-07, "loss": 0.6833, "step": 12585 }, { "epoch": 0.51, "learning_rate": 9.797638117723675e-07, "loss": 0.7397, "step": 12590 }, { "epoch": 0.51, "learning_rate": 9.791194394112523e-07, "loss": 0.6674, "step": 12595 }, { "epoch": 0.51, "learning_rate": 9.784750757237405e-07, "loss": 0.6753, "step": 12600 }, { "epoch": 0.51, "learning_rate": 9.77830720977495e-07, "loss": 0.6833, "step": 12605 }, { "epoch": 0.51, "learning_rate": 9.77186375440175e-07, "loss": 0.6847, "step": 12610 }, { "epoch": 0.51, "learning_rate": 9.76542039379437e-07, "loss": 0.6786, "step": 12615 }, { "epoch": 0.51, "learning_rate": 9.75897713062931e-07, "loss": 0.6696, "step": 12620 }, { "epoch": 0.51, "learning_rate": 9.75253396758306e-07, "loss": 0.6623, "step": 12625 }, { "epoch": 0.51, "learning_rate": 9.746090907332043e-07, "loss": 0.6847, "step": 12630 }, { "epoch": 0.51, "learning_rate": 9.739647952552654e-07, "loss": 0.6885, "step": 12635 }, { "epoch": 0.51, "learning_rate": 9.733205105921247e-07, "loss": 0.6954, "step": 12640 }, { "epoch": 0.51, "learning_rate": 9.726762370114116e-07, "loss": 0.6695, "step": 12645 }, { "epoch": 0.51, "learning_rate": 9.720319747807521e-07, "loss": 0.6837, "step": 12650 }, { "epoch": 0.51, "learning_rate": 9.71387724167767e-07, "loss": 0.6701, "step": 12655 }, { "epoch": 0.51, "learning_rate": 9.707434854400723e-07, "loss": 0.6297, "step": 12660 }, { "epoch": 0.51, "learning_rate": 9.700992588652796e-07, "loss": 0.6432, "step": 12665 }, { "epoch": 0.51, "learning_rate": 9.694550447109946e-07, "loss": 0.7152, "step": 12670 }, { "epoch": 0.51, "learning_rate": 9.688108432448186e-07, "loss": 0.6285, "step": 12675 }, { "epoch": 0.52, "learning_rate": 9.681666547343467e-07, "loss": 0.6382, "step": 12680 }, { "epoch": 0.52, "learning_rate": 9.6752247944717e-07, "loss": 0.6702, "step": 12685 }, { "epoch": 0.52, "learning_rate": 9.668783176508724e-07, "loss": 0.6962, "step": 12690 }, { "epoch": 0.52, "learning_rate": 9.662341696130339e-07, "loss": 0.7102, "step": 12695 }, { "epoch": 0.52, "learning_rate": 9.655900356012279e-07, "loss": 0.6731, "step": 12700 }, { "epoch": 0.52, "learning_rate": 9.649459158830216e-07, "loss": 0.6251, "step": 12705 }, { "epoch": 0.52, "learning_rate": 9.643018107259774e-07, "loss": 0.6723, "step": 12710 }, { "epoch": 0.52, "learning_rate": 9.636577203976497e-07, "loss": 0.6475, "step": 12715 }, { "epoch": 0.52, "learning_rate": 9.630136451655894e-07, "loss": 0.6977, "step": 12720 }, { "epoch": 0.52, "learning_rate": 9.623695852973395e-07, "loss": 0.6807, "step": 12725 }, { "epoch": 0.52, "learning_rate": 9.617255410604363e-07, "loss": 0.6594, "step": 12730 }, { "epoch": 0.52, "learning_rate": 9.61081512722411e-07, "loss": 0.6928, "step": 12735 }, { "epoch": 0.52, "learning_rate": 9.604375005507862e-07, "loss": 0.6592, "step": 12740 }, { "epoch": 0.52, "learning_rate": 9.597935048130797e-07, "loss": 0.6674, "step": 12745 }, { "epoch": 0.52, "learning_rate": 9.591495257768019e-07, "loss": 0.6501, "step": 12750 }, { "epoch": 0.52, "learning_rate": 9.585055637094557e-07, "loss": 0.6283, "step": 12755 }, { "epoch": 0.52, "learning_rate": 9.578616188785378e-07, "loss": 0.6404, "step": 12760 }, { "epoch": 0.52, "learning_rate": 9.572176915515364e-07, "loss": 0.6723, "step": 12765 }, { "epoch": 0.52, "learning_rate": 9.56573781995934e-07, "loss": 0.6595, "step": 12770 }, { "epoch": 0.52, "learning_rate": 9.559298904792053e-07, "loss": 0.6458, "step": 12775 }, { "epoch": 0.52, "learning_rate": 9.552860172688165e-07, "loss": 0.6258, "step": 12780 }, { "epoch": 0.52, "learning_rate": 9.54642162632228e-07, "loss": 0.6791, "step": 12785 }, { "epoch": 0.52, "learning_rate": 9.539983268368897e-07, "loss": 0.669, "step": 12790 }, { "epoch": 0.52, "learning_rate": 9.533545101502466e-07, "loss": 0.6532, "step": 12795 }, { "epoch": 0.52, "learning_rate": 9.527107128397347e-07, "loss": 0.6997, "step": 12800 }, { "epoch": 0.52, "eval_loss": 0.6355204582214355, "eval_runtime": 144.7041, "eval_samples_per_second": 16.351, "eval_steps_per_second": 2.73, "step": 12800 }, { "epoch": 0.52, "learning_rate": 9.520669351727811e-07, "loss": 0.6886, "step": 12805 }, { "epoch": 0.52, "learning_rate": 9.514231774168063e-07, "loss": 0.6541, "step": 12810 }, { "epoch": 0.52, "learning_rate": 9.507794398392205e-07, "loss": 0.6731, "step": 12815 }, { "epoch": 0.52, "learning_rate": 9.501357227074279e-07, "loss": 0.6141, "step": 12820 }, { "epoch": 0.52, "learning_rate": 9.49492026288822e-07, "loss": 0.6469, "step": 12825 }, { "epoch": 0.52, "learning_rate": 9.488483508507892e-07, "loss": 0.6434, "step": 12830 }, { "epoch": 0.52, "learning_rate": 9.482046966607071e-07, "loss": 0.6507, "step": 12835 }, { "epoch": 0.52, "learning_rate": 9.475610639859428e-07, "loss": 0.6604, "step": 12840 }, { "epoch": 0.52, "learning_rate": 9.469174530938573e-07, "loss": 0.6497, "step": 12845 }, { "epoch": 0.52, "learning_rate": 9.462738642517995e-07, "loss": 0.6627, "step": 12850 }, { "epoch": 0.52, "learning_rate": 9.456302977271114e-07, "loss": 0.6689, "step": 12855 }, { "epoch": 0.52, "learning_rate": 9.449867537871251e-07, "loss": 0.7033, "step": 12860 }, { "epoch": 0.52, "learning_rate": 9.443432326991626e-07, "loss": 0.7198, "step": 12865 }, { "epoch": 0.52, "learning_rate": 9.436997347305377e-07, "loss": 0.6576, "step": 12870 }, { "epoch": 0.52, "learning_rate": 9.430562601485527e-07, "loss": 0.669, "step": 12875 }, { "epoch": 0.52, "learning_rate": 9.424128092205021e-07, "loss": 0.6525, "step": 12880 }, { "epoch": 0.52, "learning_rate": 9.417693822136701e-07, "loss": 0.6669, "step": 12885 }, { "epoch": 0.52, "learning_rate": 9.411259793953302e-07, "loss": 0.6975, "step": 12890 }, { "epoch": 0.52, "learning_rate": 9.404826010327467e-07, "loss": 0.6476, "step": 12895 }, { "epoch": 0.52, "learning_rate": 9.398392473931726e-07, "loss": 0.6884, "step": 12900 }, { "epoch": 0.52, "learning_rate": 9.391959187438522e-07, "loss": 0.6902, "step": 12905 }, { "epoch": 0.52, "learning_rate": 9.385526153520186e-07, "loss": 0.6382, "step": 12910 }, { "epoch": 0.52, "learning_rate": 9.379093374848939e-07, "loss": 0.6584, "step": 12915 }, { "epoch": 0.52, "learning_rate": 9.37266085409691e-07, "loss": 0.6544, "step": 12920 }, { "epoch": 0.52, "learning_rate": 9.366228593936098e-07, "loss": 0.6808, "step": 12925 }, { "epoch": 0.53, "learning_rate": 9.359796597038421e-07, "loss": 0.7172, "step": 12930 }, { "epoch": 0.53, "learning_rate": 9.353364866075672e-07, "loss": 0.6768, "step": 12935 }, { "epoch": 0.53, "learning_rate": 9.34693340371953e-07, "loss": 0.6933, "step": 12940 }, { "epoch": 0.53, "learning_rate": 9.340502212641578e-07, "loss": 0.6833, "step": 12945 }, { "epoch": 0.53, "learning_rate": 9.334071295513267e-07, "loss": 0.6569, "step": 12950 }, { "epoch": 0.53, "learning_rate": 9.32764065500595e-07, "loss": 0.645, "step": 12955 }, { "epoch": 0.53, "learning_rate": 9.321210293790859e-07, "loss": 0.6581, "step": 12960 }, { "epoch": 0.53, "learning_rate": 9.314780214539107e-07, "loss": 0.7163, "step": 12965 }, { "epoch": 0.53, "learning_rate": 9.308350419921699e-07, "loss": 0.6527, "step": 12970 }, { "epoch": 0.53, "learning_rate": 9.301920912609505e-07, "loss": 0.6956, "step": 12975 }, { "epoch": 0.53, "learning_rate": 9.295491695273301e-07, "loss": 0.6763, "step": 12980 }, { "epoch": 0.53, "learning_rate": 9.289062770583712e-07, "loss": 0.6666, "step": 12985 }, { "epoch": 0.53, "learning_rate": 9.282634141211269e-07, "loss": 0.687, "step": 12990 }, { "epoch": 0.53, "learning_rate": 9.276205809826368e-07, "loss": 0.6675, "step": 12995 }, { "epoch": 0.53, "learning_rate": 9.269777779099275e-07, "loss": 0.6656, "step": 13000 }, { "epoch": 0.53, "learning_rate": 9.263350051700147e-07, "loss": 0.6616, "step": 13005 }, { "epoch": 0.53, "learning_rate": 9.256922630298994e-07, "loss": 0.6458, "step": 13010 }, { "epoch": 0.53, "learning_rate": 9.250495517565722e-07, "loss": 0.6929, "step": 13015 }, { "epoch": 0.53, "learning_rate": 9.244068716170099e-07, "loss": 0.6717, "step": 13020 }, { "epoch": 0.53, "learning_rate": 9.237642228781749e-07, "loss": 0.6591, "step": 13025 }, { "epoch": 0.53, "learning_rate": 9.231216058070195e-07, "loss": 0.6807, "step": 13030 }, { "epoch": 0.53, "learning_rate": 9.224790206704798e-07, "loss": 0.6745, "step": 13035 }, { "epoch": 0.53, "learning_rate": 9.21836467735481e-07, "loss": 0.6207, "step": 13040 }, { "epoch": 0.53, "learning_rate": 9.21193947268934e-07, "loss": 0.6627, "step": 13045 }, { "epoch": 0.53, "learning_rate": 9.205514595377356e-07, "loss": 0.6786, "step": 13050 }, { "epoch": 0.53, "learning_rate": 9.199090048087706e-07, "loss": 0.6949, "step": 13055 }, { "epoch": 0.53, "learning_rate": 9.192665833489077e-07, "loss": 0.665, "step": 13060 }, { "epoch": 0.53, "learning_rate": 9.186241954250043e-07, "loss": 0.6553, "step": 13065 }, { "epoch": 0.53, "learning_rate": 9.179818413039028e-07, "loss": 0.6508, "step": 13070 }, { "epoch": 0.53, "learning_rate": 9.173395212524306e-07, "loss": 0.6393, "step": 13075 }, { "epoch": 0.53, "learning_rate": 9.166972355374031e-07, "loss": 0.6319, "step": 13080 }, { "epoch": 0.53, "learning_rate": 9.160549844256187e-07, "loss": 0.6485, "step": 13085 }, { "epoch": 0.53, "learning_rate": 9.154127681838642e-07, "loss": 0.6805, "step": 13090 }, { "epoch": 0.53, "learning_rate": 9.147705870789104e-07, "loss": 0.677, "step": 13095 }, { "epoch": 0.53, "learning_rate": 9.14128441377513e-07, "loss": 0.6738, "step": 13100 }, { "epoch": 0.53, "learning_rate": 9.134863313464149e-07, "loss": 0.6761, "step": 13105 }, { "epoch": 0.53, "learning_rate": 9.128442572523417e-07, "loss": 0.6966, "step": 13110 }, { "epoch": 0.53, "learning_rate": 9.122022193620068e-07, "loss": 0.6435, "step": 13115 }, { "epoch": 0.53, "learning_rate": 9.115602179421058e-07, "loss": 0.656, "step": 13120 }, { "epoch": 0.53, "learning_rate": 9.109182532593213e-07, "loss": 0.678, "step": 13125 }, { "epoch": 0.53, "learning_rate": 9.102763255803203e-07, "loss": 0.6255, "step": 13130 }, { "epoch": 0.53, "learning_rate": 9.096344351717527e-07, "loss": 0.6929, "step": 13135 }, { "epoch": 0.53, "learning_rate": 9.089925823002555e-07, "loss": 0.6523, "step": 13140 }, { "epoch": 0.53, "learning_rate": 9.083507672324474e-07, "loss": 0.6703, "step": 13145 }, { "epoch": 0.53, "learning_rate": 9.077089902349338e-07, "loss": 0.6668, "step": 13150 }, { "epoch": 0.53, "learning_rate": 9.070672515743037e-07, "loss": 0.6151, "step": 13155 }, { "epoch": 0.53, "learning_rate": 9.064255515171282e-07, "loss": 0.6626, "step": 13160 }, { "epoch": 0.53, "learning_rate": 9.057838903299656e-07, "loss": 0.6921, "step": 13165 }, { "epoch": 0.53, "learning_rate": 9.05142268279355e-07, "loss": 0.6709, "step": 13170 }, { "epoch": 0.54, "learning_rate": 9.045006856318215e-07, "loss": 0.6411, "step": 13175 }, { "epoch": 0.54, "learning_rate": 9.03859142653873e-07, "loss": 0.6394, "step": 13180 }, { "epoch": 0.54, "learning_rate": 9.03217639612e-07, "loss": 0.6713, "step": 13185 }, { "epoch": 0.54, "learning_rate": 9.025761767726784e-07, "loss": 0.6492, "step": 13190 }, { "epoch": 0.54, "learning_rate": 9.019347544023651e-07, "loss": 0.6249, "step": 13195 }, { "epoch": 0.54, "learning_rate": 9.012933727675023e-07, "loss": 0.6649, "step": 13200 }, { "epoch": 0.54, "learning_rate": 9.006520321345143e-07, "loss": 0.6133, "step": 13205 }, { "epoch": 0.54, "learning_rate": 9.000107327698078e-07, "loss": 0.6659, "step": 13210 }, { "epoch": 0.54, "learning_rate": 8.993694749397738e-07, "loss": 0.6644, "step": 13215 }, { "epoch": 0.54, "learning_rate": 8.987282589107842e-07, "loss": 0.6778, "step": 13220 }, { "epoch": 0.54, "learning_rate": 8.980870849491954e-07, "loss": 0.6317, "step": 13225 }, { "epoch": 0.54, "learning_rate": 8.974459533213457e-07, "loss": 0.6583, "step": 13230 }, { "epoch": 0.54, "learning_rate": 8.968048642935544e-07, "loss": 0.6837, "step": 13235 }, { "epoch": 0.54, "learning_rate": 8.961638181321257e-07, "loss": 0.6398, "step": 13240 }, { "epoch": 0.54, "learning_rate": 8.955228151033432e-07, "loss": 0.6956, "step": 13245 }, { "epoch": 0.54, "learning_rate": 8.948818554734756e-07, "loss": 0.736, "step": 13250 }, { "epoch": 0.54, "learning_rate": 8.942409395087706e-07, "loss": 0.653, "step": 13255 }, { "epoch": 0.54, "learning_rate": 8.936000674754592e-07, "loss": 0.6775, "step": 13260 }, { "epoch": 0.54, "learning_rate": 8.929592396397553e-07, "loss": 0.6798, "step": 13265 }, { "epoch": 0.54, "learning_rate": 8.923184562678517e-07, "loss": 0.656, "step": 13270 }, { "epoch": 0.54, "learning_rate": 8.916777176259256e-07, "loss": 0.6569, "step": 13275 }, { "epoch": 0.54, "learning_rate": 8.91037023980133e-07, "loss": 0.6893, "step": 13280 }, { "epoch": 0.54, "learning_rate": 8.903963755966129e-07, "loss": 0.6301, "step": 13285 }, { "epoch": 0.54, "learning_rate": 8.89755772741486e-07, "loss": 0.6393, "step": 13290 }, { "epoch": 0.54, "learning_rate": 8.891152156808516e-07, "loss": 0.6426, "step": 13295 }, { "epoch": 0.54, "learning_rate": 8.88474704680793e-07, "loss": 0.6431, "step": 13300 }, { "epoch": 0.54, "learning_rate": 8.878342400073717e-07, "loss": 0.6747, "step": 13305 }, { "epoch": 0.54, "learning_rate": 8.871938219266315e-07, "loss": 0.6519, "step": 13310 }, { "epoch": 0.54, "learning_rate": 8.865534507045973e-07, "loss": 0.6479, "step": 13315 }, { "epoch": 0.54, "learning_rate": 8.859131266072725e-07, "loss": 0.6343, "step": 13320 }, { "epoch": 0.54, "learning_rate": 8.852728499006434e-07, "loss": 0.7073, "step": 13325 }, { "epoch": 0.54, "learning_rate": 8.846326208506743e-07, "loss": 0.6508, "step": 13330 }, { "epoch": 0.54, "learning_rate": 8.839924397233108e-07, "loss": 0.6796, "step": 13335 }, { "epoch": 0.54, "learning_rate": 8.8335230678448e-07, "loss": 0.6686, "step": 13340 }, { "epoch": 0.54, "learning_rate": 8.827122223000856e-07, "loss": 0.6708, "step": 13345 }, { "epoch": 0.54, "learning_rate": 8.820721865360148e-07, "loss": 0.6726, "step": 13350 }, { "epoch": 0.54, "learning_rate": 8.814321997581318e-07, "loss": 0.6588, "step": 13355 }, { "epoch": 0.54, "learning_rate": 8.807922622322815e-07, "loss": 0.6799, "step": 13360 }, { "epoch": 0.54, "learning_rate": 8.801523742242897e-07, "loss": 0.653, "step": 13365 }, { "epoch": 0.54, "learning_rate": 8.795125359999586e-07, "loss": 0.6911, "step": 13370 }, { "epoch": 0.54, "learning_rate": 8.78872747825073e-07, "loss": 0.6704, "step": 13375 }, { "epoch": 0.54, "learning_rate": 8.782330099653937e-07, "loss": 0.6585, "step": 13380 }, { "epoch": 0.54, "learning_rate": 8.775933226866636e-07, "loss": 0.63, "step": 13385 }, { "epoch": 0.54, "learning_rate": 8.76953686254603e-07, "loss": 0.6628, "step": 13390 }, { "epoch": 0.54, "learning_rate": 8.763141009349104e-07, "loss": 0.723, "step": 13395 }, { "epoch": 0.54, "learning_rate": 8.756745669932655e-07, "loss": 0.6833, "step": 13400 }, { "epoch": 0.54, "learning_rate": 8.750350846953234e-07, "loss": 0.6724, "step": 13405 }, { "epoch": 0.54, "learning_rate": 8.743956543067213e-07, "loss": 0.671, "step": 13410 }, { "epoch": 0.54, "learning_rate": 8.737562760930713e-07, "loss": 0.6819, "step": 13415 }, { "epoch": 0.55, "learning_rate": 8.731169503199663e-07, "loss": 0.6681, "step": 13420 }, { "epoch": 0.55, "learning_rate": 8.724776772529775e-07, "loss": 0.6503, "step": 13425 }, { "epoch": 0.55, "learning_rate": 8.718384571576518e-07, "loss": 0.684, "step": 13430 }, { "epoch": 0.55, "learning_rate": 8.711992902995171e-07, "loss": 0.7102, "step": 13435 }, { "epoch": 0.55, "learning_rate": 8.705601769440767e-07, "loss": 0.624, "step": 13440 }, { "epoch": 0.55, "learning_rate": 8.699211173568128e-07, "loss": 0.6489, "step": 13445 }, { "epoch": 0.55, "learning_rate": 8.692821118031864e-07, "loss": 0.6841, "step": 13450 }, { "epoch": 0.55, "learning_rate": 8.686431605486331e-07, "loss": 0.6857, "step": 13455 }, { "epoch": 0.55, "learning_rate": 8.680042638585694e-07, "loss": 0.7022, "step": 13460 }, { "epoch": 0.55, "learning_rate": 8.673654219983861e-07, "loss": 0.6838, "step": 13465 }, { "epoch": 0.55, "learning_rate": 8.667266352334528e-07, "loss": 0.7022, "step": 13470 }, { "epoch": 0.55, "learning_rate": 8.660879038291169e-07, "loss": 0.6553, "step": 13475 }, { "epoch": 0.55, "learning_rate": 8.654492280507005e-07, "loss": 0.6301, "step": 13480 }, { "epoch": 0.55, "learning_rate": 8.648106081635054e-07, "loss": 0.6662, "step": 13485 }, { "epoch": 0.55, "learning_rate": 8.641720444328075e-07, "loss": 0.6445, "step": 13490 }, { "epoch": 0.55, "learning_rate": 8.635335371238609e-07, "loss": 0.6577, "step": 13495 }, { "epoch": 0.55, "learning_rate": 8.628950865018969e-07, "loss": 0.647, "step": 13500 }, { "epoch": 0.55, "learning_rate": 8.622566928321209e-07, "loss": 0.6851, "step": 13505 }, { "epoch": 0.55, "learning_rate": 8.616183563797177e-07, "loss": 0.6431, "step": 13510 }, { "epoch": 0.55, "learning_rate": 8.609800774098452e-07, "loss": 0.6909, "step": 13515 }, { "epoch": 0.55, "learning_rate": 8.603418561876394e-07, "loss": 0.6711, "step": 13520 }, { "epoch": 0.55, "learning_rate": 8.597036929782127e-07, "loss": 0.7302, "step": 13525 }, { "epoch": 0.55, "learning_rate": 8.590655880466511e-07, "loss": 0.6841, "step": 13530 }, { "epoch": 0.55, "learning_rate": 8.584275416580194e-07, "loss": 0.6897, "step": 13535 }, { "epoch": 0.55, "learning_rate": 8.577895540773552e-07, "loss": 0.6478, "step": 13540 }, { "epoch": 0.55, "learning_rate": 8.571516255696738e-07, "loss": 0.6627, "step": 13545 }, { "epoch": 0.55, "learning_rate": 8.565137563999646e-07, "loss": 0.677, "step": 13550 }, { "epoch": 0.55, "learning_rate": 8.55875946833193e-07, "loss": 0.6428, "step": 13555 }, { "epoch": 0.55, "learning_rate": 8.552381971343003e-07, "loss": 0.6605, "step": 13560 }, { "epoch": 0.55, "learning_rate": 8.546005075682012e-07, "loss": 0.6421, "step": 13565 }, { "epoch": 0.55, "learning_rate": 8.53962878399787e-07, "loss": 0.6669, "step": 13570 }, { "epoch": 0.55, "learning_rate": 8.53325309893923e-07, "loss": 0.6713, "step": 13575 }, { "epoch": 0.55, "learning_rate": 8.526878023154494e-07, "loss": 0.696, "step": 13580 }, { "epoch": 0.55, "learning_rate": 8.520503559291823e-07, "loss": 0.6294, "step": 13585 }, { "epoch": 0.55, "learning_rate": 8.514129709999103e-07, "loss": 0.6732, "step": 13590 }, { "epoch": 0.55, "learning_rate": 8.507756477923982e-07, "loss": 0.6502, "step": 13595 }, { "epoch": 0.55, "learning_rate": 8.501383865713839e-07, "loss": 0.6475, "step": 13600 }, { "epoch": 0.55, "eval_loss": 0.6325117349624634, "eval_runtime": 139.0789, "eval_samples_per_second": 17.012, "eval_steps_per_second": 2.84, "step": 13600 }, { "epoch": 0.55, "learning_rate": 8.495011876015805e-07, "loss": 0.6818, "step": 13605 }, { "epoch": 0.55, "learning_rate": 8.488640511476757e-07, "loss": 0.6347, "step": 13610 }, { "epoch": 0.55, "learning_rate": 8.482269774743291e-07, "loss": 0.653, "step": 13615 }, { "epoch": 0.55, "learning_rate": 8.475899668461765e-07, "loss": 0.6532, "step": 13620 }, { "epoch": 0.55, "learning_rate": 8.469530195278261e-07, "loss": 0.6745, "step": 13625 }, { "epoch": 0.55, "learning_rate": 8.4631613578386e-07, "loss": 0.6486, "step": 13630 }, { "epoch": 0.55, "learning_rate": 8.456793158788354e-07, "loss": 0.6595, "step": 13635 }, { "epoch": 0.55, "learning_rate": 8.450425600772802e-07, "loss": 0.6835, "step": 13640 }, { "epoch": 0.55, "learning_rate": 8.444058686436987e-07, "loss": 0.6539, "step": 13645 }, { "epoch": 0.55, "learning_rate": 8.437692418425657e-07, "loss": 0.6733, "step": 13650 }, { "epoch": 0.55, "learning_rate": 8.431326799383309e-07, "loss": 0.6533, "step": 13655 }, { "epoch": 0.55, "learning_rate": 8.424961831954174e-07, "loss": 0.6535, "step": 13660 }, { "epoch": 0.56, "learning_rate": 8.418597518782189e-07, "loss": 0.6637, "step": 13665 }, { "epoch": 0.56, "learning_rate": 8.412233862511048e-07, "loss": 0.661, "step": 13670 }, { "epoch": 0.56, "learning_rate": 8.40587086578415e-07, "loss": 0.6421, "step": 13675 }, { "epoch": 0.56, "learning_rate": 8.399508531244632e-07, "loss": 0.715, "step": 13680 }, { "epoch": 0.56, "learning_rate": 8.39314686153535e-07, "loss": 0.7132, "step": 13685 }, { "epoch": 0.56, "learning_rate": 8.386785859298885e-07, "loss": 0.6643, "step": 13690 }, { "epoch": 0.56, "learning_rate": 8.380425527177551e-07, "loss": 0.6568, "step": 13695 }, { "epoch": 0.56, "learning_rate": 8.374065867813365e-07, "loss": 0.65, "step": 13700 }, { "epoch": 0.56, "learning_rate": 8.36770688384808e-07, "loss": 0.691, "step": 13705 }, { "epoch": 0.56, "learning_rate": 8.361348577923158e-07, "loss": 0.6494, "step": 13710 }, { "epoch": 0.56, "learning_rate": 8.354990952679784e-07, "loss": 0.6998, "step": 13715 }, { "epoch": 0.56, "learning_rate": 8.348634010758869e-07, "loss": 0.6635, "step": 13720 }, { "epoch": 0.56, "learning_rate": 8.342277754801021e-07, "loss": 0.6726, "step": 13725 }, { "epoch": 0.56, "learning_rate": 8.33592218744658e-07, "loss": 0.6538, "step": 13730 }, { "epoch": 0.56, "learning_rate": 8.329567311335588e-07, "loss": 0.6708, "step": 13735 }, { "epoch": 0.56, "learning_rate": 8.323213129107805e-07, "loss": 0.6403, "step": 13740 }, { "epoch": 0.56, "learning_rate": 8.316859643402714e-07, "loss": 0.6683, "step": 13745 }, { "epoch": 0.56, "learning_rate": 8.310506856859485e-07, "loss": 0.6569, "step": 13750 }, { "epoch": 0.56, "learning_rate": 8.304154772117017e-07, "loss": 0.659, "step": 13755 }, { "epoch": 0.56, "learning_rate": 8.297803391813908e-07, "loss": 0.6665, "step": 13760 }, { "epoch": 0.56, "learning_rate": 8.291452718588463e-07, "loss": 0.6852, "step": 13765 }, { "epoch": 0.56, "learning_rate": 8.285102755078708e-07, "loss": 0.625, "step": 13770 }, { "epoch": 0.56, "learning_rate": 8.278753503922351e-07, "loss": 0.607, "step": 13775 }, { "epoch": 0.56, "learning_rate": 8.272404967756821e-07, "loss": 0.6543, "step": 13780 }, { "epoch": 0.56, "learning_rate": 8.266057149219242e-07, "loss": 0.6624, "step": 13785 }, { "epoch": 0.56, "learning_rate": 8.259710050946443e-07, "loss": 0.6695, "step": 13790 }, { "epoch": 0.56, "learning_rate": 8.253363675574959e-07, "loss": 0.6375, "step": 13795 }, { "epoch": 0.56, "learning_rate": 8.24701802574101e-07, "loss": 0.6712, "step": 13800 }, { "epoch": 0.56, "learning_rate": 8.240673104080533e-07, "loss": 0.6653, "step": 13805 }, { "epoch": 0.56, "learning_rate": 8.234328913229145e-07, "loss": 0.6578, "step": 13810 }, { "epoch": 0.56, "learning_rate": 8.227985455822174e-07, "loss": 0.697, "step": 13815 }, { "epoch": 0.56, "learning_rate": 8.221642734494631e-07, "loss": 0.6273, "step": 13820 }, { "epoch": 0.56, "learning_rate": 8.215300751881233e-07, "loss": 0.6841, "step": 13825 }, { "epoch": 0.56, "learning_rate": 8.208959510616384e-07, "loss": 0.6758, "step": 13830 }, { "epoch": 0.56, "learning_rate": 8.202619013334176e-07, "loss": 0.6531, "step": 13835 }, { "epoch": 0.56, "learning_rate": 8.1962792626684e-07, "loss": 0.679, "step": 13840 }, { "epoch": 0.56, "learning_rate": 8.189940261252531e-07, "loss": 0.6685, "step": 13845 }, { "epoch": 0.56, "learning_rate": 8.183602011719736e-07, "loss": 0.6558, "step": 13850 }, { "epoch": 0.56, "learning_rate": 8.177264516702873e-07, "loss": 0.6132, "step": 13855 }, { "epoch": 0.56, "learning_rate": 8.170927778834476e-07, "loss": 0.6571, "step": 13860 }, { "epoch": 0.56, "learning_rate": 8.164591800746774e-07, "loss": 0.685, "step": 13865 }, { "epoch": 0.56, "learning_rate": 8.158256585071674e-07, "loss": 0.6806, "step": 13870 }, { "epoch": 0.56, "learning_rate": 8.151922134440774e-07, "loss": 0.6552, "step": 13875 }, { "epoch": 0.56, "learning_rate": 8.145588451485347e-07, "loss": 0.6927, "step": 13880 }, { "epoch": 0.56, "learning_rate": 8.139255538836351e-07, "loss": 0.6594, "step": 13885 }, { "epoch": 0.56, "learning_rate": 8.132923399124424e-07, "loss": 0.6392, "step": 13890 }, { "epoch": 0.56, "learning_rate": 8.126592034979878e-07, "loss": 0.6697, "step": 13895 }, { "epoch": 0.56, "learning_rate": 8.120261449032706e-07, "loss": 0.6549, "step": 13900 }, { "epoch": 0.56, "learning_rate": 8.113931643912589e-07, "loss": 0.6544, "step": 13905 }, { "epoch": 0.57, "learning_rate": 8.107602622248859e-07, "loss": 0.6528, "step": 13910 }, { "epoch": 0.57, "learning_rate": 8.101274386670544e-07, "loss": 0.6406, "step": 13915 }, { "epoch": 0.57, "learning_rate": 8.094946939806336e-07, "loss": 0.6755, "step": 13920 }, { "epoch": 0.57, "learning_rate": 8.0886202842846e-07, "loss": 0.6685, "step": 13925 }, { "epoch": 0.57, "learning_rate": 8.082294422733382e-07, "loss": 0.6375, "step": 13930 }, { "epoch": 0.57, "learning_rate": 8.075969357780379e-07, "loss": 0.6974, "step": 13935 }, { "epoch": 0.57, "learning_rate": 8.069645092052975e-07, "loss": 0.6379, "step": 13940 }, { "epoch": 0.57, "learning_rate": 8.06332162817821e-07, "loss": 0.6283, "step": 13945 }, { "epoch": 0.57, "learning_rate": 8.056998968782797e-07, "loss": 0.6546, "step": 13950 }, { "epoch": 0.57, "learning_rate": 8.050677116493121e-07, "loss": 0.6807, "step": 13955 }, { "epoch": 0.57, "learning_rate": 8.044356073935214e-07, "loss": 0.6607, "step": 13960 }, { "epoch": 0.57, "learning_rate": 8.038035843734791e-07, "loss": 0.6562, "step": 13965 }, { "epoch": 0.57, "learning_rate": 8.031716428517212e-07, "loss": 0.6787, "step": 13970 }, { "epoch": 0.57, "learning_rate": 8.025397830907515e-07, "loss": 0.6575, "step": 13975 }, { "epoch": 0.57, "learning_rate": 8.019080053530385e-07, "loss": 0.6418, "step": 13980 }, { "epoch": 0.57, "learning_rate": 8.012763099010172e-07, "loss": 0.6619, "step": 13985 }, { "epoch": 0.57, "learning_rate": 8.00644696997089e-07, "loss": 0.6564, "step": 13990 }, { "epoch": 0.57, "learning_rate": 8.000131669036196e-07, "loss": 0.6619, "step": 13995 }, { "epoch": 0.57, "learning_rate": 7.993817198829419e-07, "loss": 0.6472, "step": 14000 }, { "epoch": 0.57, "learning_rate": 7.987503561973527e-07, "loss": 0.658, "step": 14005 }, { "epoch": 0.57, "learning_rate": 7.981190761091156e-07, "loss": 0.6977, "step": 14010 }, { "epoch": 0.57, "learning_rate": 7.974878798804587e-07, "loss": 0.6398, "step": 14015 }, { "epoch": 0.57, "learning_rate": 7.968567677735752e-07, "loss": 0.6633, "step": 14020 }, { "epoch": 0.57, "learning_rate": 7.962257400506241e-07, "loss": 0.6571, "step": 14025 }, { "epoch": 0.57, "learning_rate": 7.95594796973728e-07, "loss": 0.6622, "step": 14030 }, { "epoch": 0.57, "learning_rate": 7.949639388049758e-07, "loss": 0.6876, "step": 14035 }, { "epoch": 0.57, "learning_rate": 7.943331658064206e-07, "loss": 0.6471, "step": 14040 }, { "epoch": 0.57, "learning_rate": 7.937024782400793e-07, "loss": 0.6671, "step": 14045 }, { "epoch": 0.57, "learning_rate": 7.930718763679349e-07, "loss": 0.6538, "step": 14050 }, { "epoch": 0.57, "learning_rate": 7.924413604519333e-07, "loss": 0.6854, "step": 14055 }, { "epoch": 0.57, "learning_rate": 7.918109307539856e-07, "loss": 0.7438, "step": 14060 }, { "epoch": 0.57, "learning_rate": 7.911805875359667e-07, "loss": 0.6835, "step": 14065 }, { "epoch": 0.57, "learning_rate": 7.905503310597159e-07, "loss": 0.6572, "step": 14070 }, { "epoch": 0.57, "learning_rate": 7.899201615870361e-07, "loss": 0.6816, "step": 14075 }, { "epoch": 0.57, "learning_rate": 7.892900793796941e-07, "loss": 0.6017, "step": 14080 }, { "epoch": 0.57, "learning_rate": 7.886600846994207e-07, "loss": 0.6606, "step": 14085 }, { "epoch": 0.57, "learning_rate": 7.880301778079104e-07, "loss": 0.6908, "step": 14090 }, { "epoch": 0.57, "learning_rate": 7.874003589668207e-07, "loss": 0.6787, "step": 14095 }, { "epoch": 0.57, "learning_rate": 7.867706284377731e-07, "loss": 0.6872, "step": 14100 }, { "epoch": 0.57, "learning_rate": 7.86140986482352e-07, "loss": 0.6812, "step": 14105 }, { "epoch": 0.57, "learning_rate": 7.855114333621055e-07, "loss": 0.6552, "step": 14110 }, { "epoch": 0.57, "learning_rate": 7.848819693385443e-07, "loss": 0.6809, "step": 14115 }, { "epoch": 0.57, "learning_rate": 7.84252594673142e-07, "loss": 0.6787, "step": 14120 }, { "epoch": 0.57, "learning_rate": 7.83623309627336e-07, "loss": 0.6638, "step": 14125 }, { "epoch": 0.57, "learning_rate": 7.829941144625251e-07, "loss": 0.6821, "step": 14130 }, { "epoch": 0.57, "learning_rate": 7.823650094400722e-07, "loss": 0.6569, "step": 14135 }, { "epoch": 0.57, "learning_rate": 7.817359948213014e-07, "loss": 0.6722, "step": 14140 }, { "epoch": 0.57, "learning_rate": 7.811070708675005e-07, "loss": 0.6677, "step": 14145 }, { "epoch": 0.57, "learning_rate": 7.804782378399188e-07, "loss": 0.6305, "step": 14150 }, { "epoch": 0.57, "learning_rate": 7.798494959997679e-07, "loss": 0.6604, "step": 14155 }, { "epoch": 0.58, "learning_rate": 7.79220845608222e-07, "loss": 0.6459, "step": 14160 }, { "epoch": 0.58, "learning_rate": 7.785922869264167e-07, "loss": 0.6571, "step": 14165 }, { "epoch": 0.58, "learning_rate": 7.779638202154498e-07, "loss": 0.6593, "step": 14170 }, { "epoch": 0.58, "learning_rate": 7.773354457363814e-07, "loss": 0.672, "step": 14175 }, { "epoch": 0.58, "learning_rate": 7.767071637502323e-07, "loss": 0.6653, "step": 14180 }, { "epoch": 0.58, "learning_rate": 7.760789745179857e-07, "loss": 0.6623, "step": 14185 }, { "epoch": 0.58, "learning_rate": 7.754508783005855e-07, "loss": 0.6597, "step": 14190 }, { "epoch": 0.58, "learning_rate": 7.748228753589377e-07, "loss": 0.6594, "step": 14195 }, { "epoch": 0.58, "learning_rate": 7.741949659539094e-07, "loss": 0.6507, "step": 14200 }, { "epoch": 0.58, "learning_rate": 7.735671503463283e-07, "loss": 0.6891, "step": 14205 }, { "epoch": 0.58, "learning_rate": 7.729394287969842e-07, "loss": 0.7177, "step": 14210 }, { "epoch": 0.58, "learning_rate": 7.723118015666264e-07, "loss": 0.6398, "step": 14215 }, { "epoch": 0.58, "learning_rate": 7.716842689159662e-07, "loss": 0.689, "step": 14220 }, { "epoch": 0.58, "learning_rate": 7.710568311056754e-07, "loss": 0.6272, "step": 14225 }, { "epoch": 0.58, "learning_rate": 7.704294883963858e-07, "loss": 0.6754, "step": 14230 }, { "epoch": 0.58, "learning_rate": 7.698022410486904e-07, "loss": 0.6399, "step": 14235 }, { "epoch": 0.58, "learning_rate": 7.69175089323142e-07, "loss": 0.6666, "step": 14240 }, { "epoch": 0.58, "learning_rate": 7.685480334802543e-07, "loss": 0.6878, "step": 14245 }, { "epoch": 0.58, "learning_rate": 7.679210737805005e-07, "loss": 0.6787, "step": 14250 }, { "epoch": 0.58, "learning_rate": 7.672942104843145e-07, "loss": 0.6784, "step": 14255 }, { "epoch": 0.58, "learning_rate": 7.666674438520898e-07, "loss": 0.657, "step": 14260 }, { "epoch": 0.58, "learning_rate": 7.660407741441797e-07, "loss": 0.6817, "step": 14265 }, { "epoch": 0.58, "learning_rate": 7.654142016208976e-07, "loss": 0.6439, "step": 14270 }, { "epoch": 0.58, "learning_rate": 7.647877265425157e-07, "loss": 0.7055, "step": 14275 }, { "epoch": 0.58, "learning_rate": 7.641613491692669e-07, "loss": 0.6893, "step": 14280 }, { "epoch": 0.58, "learning_rate": 7.635350697613429e-07, "loss": 0.6435, "step": 14285 }, { "epoch": 0.58, "learning_rate": 7.629088885788942e-07, "loss": 0.6568, "step": 14290 }, { "epoch": 0.58, "learning_rate": 7.622828058820315e-07, "loss": 0.6865, "step": 14295 }, { "epoch": 0.58, "learning_rate": 7.616568219308239e-07, "loss": 0.6944, "step": 14300 }, { "epoch": 0.58, "learning_rate": 7.610309369852996e-07, "loss": 0.6978, "step": 14305 }, { "epoch": 0.58, "learning_rate": 7.604051513054462e-07, "loss": 0.6665, "step": 14310 }, { "epoch": 0.58, "learning_rate": 7.597794651512092e-07, "loss": 0.6395, "step": 14315 }, { "epoch": 0.58, "learning_rate": 7.591538787824933e-07, "loss": 0.6756, "step": 14320 }, { "epoch": 0.58, "learning_rate": 7.585283924591616e-07, "loss": 0.6442, "step": 14325 }, { "epoch": 0.58, "learning_rate": 7.579030064410357e-07, "loss": 0.6175, "step": 14330 }, { "epoch": 0.58, "learning_rate": 7.572777209878958e-07, "loss": 0.6561, "step": 14335 }, { "epoch": 0.58, "learning_rate": 7.566525363594796e-07, "loss": 0.6895, "step": 14340 }, { "epoch": 0.58, "learning_rate": 7.560274528154836e-07, "loss": 0.6491, "step": 14345 }, { "epoch": 0.58, "learning_rate": 7.55402470615562e-07, "loss": 0.7044, "step": 14350 }, { "epoch": 0.58, "learning_rate": 7.547775900193267e-07, "loss": 0.6438, "step": 14355 }, { "epoch": 0.58, "learning_rate": 7.541528112863484e-07, "loss": 0.709, "step": 14360 }, { "epoch": 0.58, "learning_rate": 7.53528134676154e-07, "loss": 0.6684, "step": 14365 }, { "epoch": 0.58, "learning_rate": 7.529035604482292e-07, "loss": 0.6646, "step": 14370 }, { "epoch": 0.58, "learning_rate": 7.522790888620165e-07, "loss": 0.6361, "step": 14375 }, { "epoch": 0.58, "learning_rate": 7.516547201769159e-07, "loss": 0.6411, "step": 14380 }, { "epoch": 0.58, "learning_rate": 7.51030454652285e-07, "loss": 0.6162, "step": 14385 }, { "epoch": 0.58, "learning_rate": 7.504062925474378e-07, "loss": 0.6916, "step": 14390 }, { "epoch": 0.58, "learning_rate": 7.497822341216465e-07, "loss": 0.6986, "step": 14395 }, { "epoch": 0.58, "learning_rate": 7.491582796341388e-07, "loss": 0.6316, "step": 14400 }, { "epoch": 0.58, "eval_loss": 0.629960834980011, "eval_runtime": 143.0305, "eval_samples_per_second": 16.542, "eval_steps_per_second": 2.762, "step": 14400 }, { "epoch": 0.59, "learning_rate": 7.485344293441006e-07, "loss": 0.7091, "step": 14405 }, { "epoch": 0.59, "learning_rate": 7.479106835106733e-07, "loss": 0.6732, "step": 14410 }, { "epoch": 0.59, "learning_rate": 7.472870423929558e-07, "loss": 0.6886, "step": 14415 }, { "epoch": 0.59, "learning_rate": 7.466635062500035e-07, "loss": 0.6929, "step": 14420 }, { "epoch": 0.59, "learning_rate": 7.460400753408274e-07, "loss": 0.6848, "step": 14425 }, { "epoch": 0.59, "learning_rate": 7.454167499243955e-07, "loss": 0.6989, "step": 14430 }, { "epoch": 0.59, "learning_rate": 7.447935302596317e-07, "loss": 0.656, "step": 14435 }, { "epoch": 0.59, "learning_rate": 7.441704166054159e-07, "loss": 0.6596, "step": 14440 }, { "epoch": 0.59, "learning_rate": 7.435474092205844e-07, "loss": 0.6879, "step": 14445 }, { "epoch": 0.59, "learning_rate": 7.429245083639288e-07, "loss": 0.6734, "step": 14450 }, { "epoch": 0.59, "learning_rate": 7.42301714294197e-07, "loss": 0.6389, "step": 14455 }, { "epoch": 0.59, "learning_rate": 7.416790272700919e-07, "loss": 0.6627, "step": 14460 }, { "epoch": 0.59, "learning_rate": 7.410564475502723e-07, "loss": 0.6592, "step": 14465 }, { "epoch": 0.59, "learning_rate": 7.404339753933526e-07, "loss": 0.6102, "step": 14470 }, { "epoch": 0.59, "learning_rate": 7.398116110579022e-07, "loss": 0.6535, "step": 14475 }, { "epoch": 0.59, "learning_rate": 7.39189354802446e-07, "loss": 0.6776, "step": 14480 }, { "epoch": 0.59, "learning_rate": 7.385672068854635e-07, "loss": 0.6815, "step": 14485 }, { "epoch": 0.59, "learning_rate": 7.379451675653897e-07, "loss": 0.6762, "step": 14490 }, { "epoch": 0.59, "learning_rate": 7.373232371006146e-07, "loss": 0.6405, "step": 14495 }, { "epoch": 0.59, "learning_rate": 7.367014157494822e-07, "loss": 0.6582, "step": 14500 }, { "epoch": 0.59, "learning_rate": 7.360797037702922e-07, "loss": 0.7052, "step": 14505 }, { "epoch": 0.59, "learning_rate": 7.354581014212978e-07, "loss": 0.7036, "step": 14510 }, { "epoch": 0.59, "learning_rate": 7.348366089607077e-07, "loss": 0.6619, "step": 14515 }, { "epoch": 0.59, "learning_rate": 7.342152266466842e-07, "loss": 0.6511, "step": 14520 }, { "epoch": 0.59, "learning_rate": 7.335939547373441e-07, "loss": 0.6736, "step": 14525 }, { "epoch": 0.59, "learning_rate": 7.329727934907586e-07, "loss": 0.6553, "step": 14530 }, { "epoch": 0.59, "learning_rate": 7.323517431649524e-07, "loss": 0.6332, "step": 14535 }, { "epoch": 0.59, "learning_rate": 7.317308040179045e-07, "loss": 0.6236, "step": 14540 }, { "epoch": 0.59, "learning_rate": 7.311099763075477e-07, "loss": 0.6322, "step": 14545 }, { "epoch": 0.59, "learning_rate": 7.304892602917681e-07, "loss": 0.6703, "step": 14550 }, { "epoch": 0.59, "learning_rate": 7.298686562284064e-07, "loss": 0.6805, "step": 14555 }, { "epoch": 0.59, "learning_rate": 7.292481643752553e-07, "loss": 0.6194, "step": 14560 }, { "epoch": 0.59, "learning_rate": 7.286277849900626e-07, "loss": 0.677, "step": 14565 }, { "epoch": 0.59, "learning_rate": 7.280075183305276e-07, "loss": 0.655, "step": 14570 }, { "epoch": 0.59, "learning_rate": 7.273873646543043e-07, "loss": 0.7065, "step": 14575 }, { "epoch": 0.59, "learning_rate": 7.267673242189991e-07, "loss": 0.6416, "step": 14580 }, { "epoch": 0.59, "learning_rate": 7.261473972821712e-07, "loss": 0.623, "step": 14585 }, { "epoch": 0.59, "learning_rate": 7.25527584101333e-07, "loss": 0.6771, "step": 14590 }, { "epoch": 0.59, "learning_rate": 7.249078849339492e-07, "loss": 0.6588, "step": 14595 }, { "epoch": 0.59, "learning_rate": 7.242883000374378e-07, "loss": 0.7181, "step": 14600 }, { "epoch": 0.59, "learning_rate": 7.23668829669169e-07, "loss": 0.7161, "step": 14605 }, { "epoch": 0.59, "learning_rate": 7.23049474086465e-07, "loss": 0.6712, "step": 14610 }, { "epoch": 0.59, "learning_rate": 7.224302335466013e-07, "loss": 0.6546, "step": 14615 }, { "epoch": 0.59, "learning_rate": 7.218111083068044e-07, "loss": 0.6682, "step": 14620 }, { "epoch": 0.59, "learning_rate": 7.211920986242539e-07, "loss": 0.6289, "step": 14625 }, { "epoch": 0.59, "learning_rate": 7.205732047560813e-07, "loss": 0.6735, "step": 14630 }, { "epoch": 0.59, "learning_rate": 7.199544269593692e-07, "loss": 0.6742, "step": 14635 }, { "epoch": 0.59, "learning_rate": 7.193357654911529e-07, "loss": 0.663, "step": 14640 }, { "epoch": 0.59, "learning_rate": 7.187172206084186e-07, "loss": 0.6653, "step": 14645 }, { "epoch": 0.6, "learning_rate": 7.180987925681047e-07, "loss": 0.633, "step": 14650 }, { "epoch": 0.6, "learning_rate": 7.174804816271012e-07, "loss": 0.6679, "step": 14655 }, { "epoch": 0.6, "learning_rate": 7.168622880422484e-07, "loss": 0.6849, "step": 14660 }, { "epoch": 0.6, "learning_rate": 7.162442120703389e-07, "loss": 0.6585, "step": 14665 }, { "epoch": 0.6, "learning_rate": 7.15626253968116e-07, "loss": 0.6842, "step": 14670 }, { "epoch": 0.6, "learning_rate": 7.150084139922745e-07, "loss": 0.6745, "step": 14675 }, { "epoch": 0.6, "learning_rate": 7.14390692399459e-07, "loss": 0.6614, "step": 14680 }, { "epoch": 0.6, "learning_rate": 7.137730894462662e-07, "loss": 0.6682, "step": 14685 }, { "epoch": 0.6, "learning_rate": 7.131556053892431e-07, "loss": 0.6302, "step": 14690 }, { "epoch": 0.6, "learning_rate": 7.125382404848867e-07, "loss": 0.6688, "step": 14695 }, { "epoch": 0.6, "learning_rate": 7.119209949896456e-07, "loss": 0.6558, "step": 14700 }, { "epoch": 0.6, "learning_rate": 7.113038691599178e-07, "loss": 0.6402, "step": 14705 }, { "epoch": 0.6, "learning_rate": 7.10686863252052e-07, "loss": 0.6774, "step": 14710 }, { "epoch": 0.6, "learning_rate": 7.100699775223476e-07, "loss": 0.6652, "step": 14715 }, { "epoch": 0.6, "learning_rate": 7.094532122270528e-07, "loss": 0.6132, "step": 14720 }, { "epoch": 0.6, "learning_rate": 7.088365676223671e-07, "loss": 0.6525, "step": 14725 }, { "epoch": 0.6, "learning_rate": 7.08220043964439e-07, "loss": 0.6803, "step": 14730 }, { "epoch": 0.6, "learning_rate": 7.076036415093673e-07, "loss": 0.6476, "step": 14735 }, { "epoch": 0.6, "learning_rate": 7.069873605132002e-07, "loss": 0.6711, "step": 14740 }, { "epoch": 0.6, "learning_rate": 7.063712012319352e-07, "loss": 0.6534, "step": 14745 }, { "epoch": 0.6, "learning_rate": 7.057551639215198e-07, "loss": 0.6575, "step": 14750 }, { "epoch": 0.6, "learning_rate": 7.051392488378503e-07, "loss": 0.6665, "step": 14755 }, { "epoch": 0.6, "learning_rate": 7.045234562367726e-07, "loss": 0.6546, "step": 14760 }, { "epoch": 0.6, "learning_rate": 7.039077863740817e-07, "loss": 0.6307, "step": 14765 }, { "epoch": 0.6, "learning_rate": 7.032922395055215e-07, "loss": 0.6717, "step": 14770 }, { "epoch": 0.6, "learning_rate": 7.026768158867847e-07, "loss": 0.7066, "step": 14775 }, { "epoch": 0.6, "learning_rate": 7.020615157735126e-07, "loss": 0.646, "step": 14780 }, { "epoch": 0.6, "learning_rate": 7.014463394212959e-07, "loss": 0.6559, "step": 14785 }, { "epoch": 0.6, "learning_rate": 7.008312870856738e-07, "loss": 0.6847, "step": 14790 }, { "epoch": 0.6, "learning_rate": 7.00216359022133e-07, "loss": 0.6717, "step": 14795 }, { "epoch": 0.6, "learning_rate": 6.9960155548611e-07, "loss": 0.6857, "step": 14800 }, { "epoch": 0.6, "learning_rate": 6.989868767329882e-07, "loss": 0.6655, "step": 14805 }, { "epoch": 0.6, "learning_rate": 6.983723230181e-07, "loss": 0.6216, "step": 14810 }, { "epoch": 0.6, "learning_rate": 6.97757894596726e-07, "loss": 0.7093, "step": 14815 }, { "epoch": 0.6, "learning_rate": 6.971435917240939e-07, "loss": 0.6597, "step": 14820 }, { "epoch": 0.6, "learning_rate": 6.965294146553802e-07, "loss": 0.6385, "step": 14825 }, { "epoch": 0.6, "learning_rate": 6.959153636457085e-07, "loss": 0.6444, "step": 14830 }, { "epoch": 0.6, "learning_rate": 6.953014389501504e-07, "loss": 0.6662, "step": 14835 }, { "epoch": 0.6, "learning_rate": 6.946876408237245e-07, "loss": 0.6718, "step": 14840 }, { "epoch": 0.6, "learning_rate": 6.940739695213976e-07, "loss": 0.6337, "step": 14845 }, { "epoch": 0.6, "learning_rate": 6.934604252980833e-07, "loss": 0.6349, "step": 14850 }, { "epoch": 0.6, "learning_rate": 6.928470084086424e-07, "loss": 0.6827, "step": 14855 }, { "epoch": 0.6, "learning_rate": 6.922337191078835e-07, "loss": 0.6323, "step": 14860 }, { "epoch": 0.6, "learning_rate": 6.916205576505607e-07, "loss": 0.6618, "step": 14865 }, { "epoch": 0.6, "learning_rate": 6.910075242913767e-07, "loss": 0.7118, "step": 14870 }, { "epoch": 0.6, "learning_rate": 6.903946192849801e-07, "loss": 0.6686, "step": 14875 }, { "epoch": 0.6, "learning_rate": 6.897818428859659e-07, "loss": 0.6446, "step": 14880 }, { "epoch": 0.6, "learning_rate": 6.891691953488767e-07, "loss": 0.7074, "step": 14885 }, { "epoch": 0.6, "learning_rate": 6.885566769282003e-07, "loss": 0.6567, "step": 14890 }, { "epoch": 0.61, "learning_rate": 6.87944287878372e-07, "loss": 0.6953, "step": 14895 }, { "epoch": 0.61, "learning_rate": 6.873320284537729e-07, "loss": 0.644, "step": 14900 }, { "epoch": 0.61, "learning_rate": 6.8671989890873e-07, "loss": 0.6672, "step": 14905 }, { "epoch": 0.61, "learning_rate": 6.861078994975167e-07, "loss": 0.6495, "step": 14910 }, { "epoch": 0.61, "learning_rate": 6.85496030474352e-07, "loss": 0.634, "step": 14915 }, { "epoch": 0.61, "learning_rate": 6.848842920934012e-07, "loss": 0.6863, "step": 14920 }, { "epoch": 0.61, "learning_rate": 6.842726846087754e-07, "loss": 0.688, "step": 14925 }, { "epoch": 0.61, "learning_rate": 6.836612082745305e-07, "loss": 0.6858, "step": 14930 }, { "epoch": 0.61, "learning_rate": 6.830498633446687e-07, "loss": 0.6644, "step": 14935 }, { "epoch": 0.61, "learning_rate": 6.824386500731373e-07, "loss": 0.6457, "step": 14940 }, { "epoch": 0.61, "learning_rate": 6.81827568713829e-07, "loss": 0.661, "step": 14945 }, { "epoch": 0.61, "learning_rate": 6.812166195205818e-07, "loss": 0.6476, "step": 14950 }, { "epoch": 0.61, "learning_rate": 6.806058027471784e-07, "loss": 0.6875, "step": 14955 }, { "epoch": 0.61, "learning_rate": 6.799951186473472e-07, "loss": 0.6149, "step": 14960 }, { "epoch": 0.61, "learning_rate": 6.793845674747606e-07, "loss": 0.6324, "step": 14965 }, { "epoch": 0.61, "learning_rate": 6.787741494830365e-07, "loss": 0.6664, "step": 14970 }, { "epoch": 0.61, "learning_rate": 6.781638649257373e-07, "loss": 0.6524, "step": 14975 }, { "epoch": 0.61, "learning_rate": 6.775537140563696e-07, "loss": 0.6808, "step": 14980 }, { "epoch": 0.61, "learning_rate": 6.769436971283852e-07, "loss": 0.6352, "step": 14985 }, { "epoch": 0.61, "learning_rate": 6.763338143951793e-07, "loss": 0.6175, "step": 14990 }, { "epoch": 0.61, "learning_rate": 6.757240661100925e-07, "loss": 0.6742, "step": 14995 }, { "epoch": 0.61, "learning_rate": 6.751144525264083e-07, "loss": 0.6516, "step": 15000 }, { "epoch": 0.61, "learning_rate": 6.745049738973552e-07, "loss": 0.6177, "step": 15005 }, { "epoch": 0.61, "learning_rate": 6.738956304761054e-07, "loss": 0.6248, "step": 15010 }, { "epoch": 0.61, "learning_rate": 6.732864225157747e-07, "loss": 0.6683, "step": 15015 }, { "epoch": 0.61, "learning_rate": 6.726773502694231e-07, "loss": 0.6681, "step": 15020 }, { "epoch": 0.61, "learning_rate": 6.720684139900533e-07, "loss": 0.6574, "step": 15025 }, { "epoch": 0.61, "learning_rate": 6.714596139306125e-07, "loss": 0.6806, "step": 15030 }, { "epoch": 0.61, "learning_rate": 6.708509503439912e-07, "loss": 0.6361, "step": 15035 }, { "epoch": 0.61, "learning_rate": 6.702424234830227e-07, "loss": 0.6768, "step": 15040 }, { "epoch": 0.61, "learning_rate": 6.696340336004837e-07, "loss": 0.6917, "step": 15045 }, { "epoch": 0.61, "learning_rate": 6.690257809490941e-07, "loss": 0.6971, "step": 15050 }, { "epoch": 0.61, "learning_rate": 6.684176657815166e-07, "loss": 0.6592, "step": 15055 }, { "epoch": 0.61, "learning_rate": 6.678096883503575e-07, "loss": 0.6655, "step": 15060 }, { "epoch": 0.61, "learning_rate": 6.672018489081648e-07, "loss": 0.6655, "step": 15065 }, { "epoch": 0.61, "learning_rate": 6.6659414770743e-07, "loss": 0.6384, "step": 15070 }, { "epoch": 0.61, "learning_rate": 6.659865850005868e-07, "loss": 0.6695, "step": 15075 }, { "epoch": 0.61, "learning_rate": 6.653791610400112e-07, "loss": 0.6991, "step": 15080 }, { "epoch": 0.61, "learning_rate": 6.647718760780226e-07, "loss": 0.655, "step": 15085 }, { "epoch": 0.61, "learning_rate": 6.641647303668809e-07, "loss": 0.6917, "step": 15090 }, { "epoch": 0.61, "learning_rate": 6.6355772415879e-07, "loss": 0.6641, "step": 15095 }, { "epoch": 0.61, "learning_rate": 6.629508577058945e-07, "loss": 0.6691, "step": 15100 }, { "epoch": 0.61, "learning_rate": 6.623441312602817e-07, "loss": 0.6896, "step": 15105 }, { "epoch": 0.61, "learning_rate": 6.617375450739802e-07, "loss": 0.6577, "step": 15110 }, { "epoch": 0.61, "learning_rate": 6.611310993989607e-07, "loss": 0.6675, "step": 15115 }, { "epoch": 0.61, "learning_rate": 6.60524794487136e-07, "loss": 0.6525, "step": 15120 }, { "epoch": 0.61, "learning_rate": 6.59918630590359e-07, "loss": 0.7091, "step": 15125 }, { "epoch": 0.61, "learning_rate": 6.593126079604256e-07, "loss": 0.6151, "step": 15130 }, { "epoch": 0.61, "learning_rate": 6.587067268490721e-07, "loss": 0.6501, "step": 15135 }, { "epoch": 0.61, "learning_rate": 6.581009875079759e-07, "loss": 0.5938, "step": 15140 }, { "epoch": 0.62, "learning_rate": 6.574953901887567e-07, "loss": 0.6959, "step": 15145 }, { "epoch": 0.62, "learning_rate": 6.568899351429736e-07, "loss": 0.6624, "step": 15150 }, { "epoch": 0.62, "learning_rate": 6.562846226221276e-07, "loss": 0.6503, "step": 15155 }, { "epoch": 0.62, "learning_rate": 6.556794528776601e-07, "loss": 0.6596, "step": 15160 }, { "epoch": 0.62, "learning_rate": 6.550744261609538e-07, "loss": 0.6702, "step": 15165 }, { "epoch": 0.62, "learning_rate": 6.544695427233312e-07, "loss": 0.6532, "step": 15170 }, { "epoch": 0.62, "learning_rate": 6.538648028160556e-07, "loss": 0.6611, "step": 15175 }, { "epoch": 0.62, "learning_rate": 6.532602066903309e-07, "loss": 0.6825, "step": 15180 }, { "epoch": 0.62, "learning_rate": 6.526557545973007e-07, "loss": 0.6508, "step": 15185 }, { "epoch": 0.62, "learning_rate": 6.520514467880495e-07, "loss": 0.6423, "step": 15190 }, { "epoch": 0.62, "learning_rate": 6.514472835136015e-07, "loss": 0.6711, "step": 15195 }, { "epoch": 0.62, "learning_rate": 6.508432650249206e-07, "loss": 0.6649, "step": 15200 }, { "epoch": 0.62, "eval_loss": 0.6282991766929626, "eval_runtime": 145.3768, "eval_samples_per_second": 16.275, "eval_steps_per_second": 2.717, "step": 15200 }, { "epoch": 0.62, "learning_rate": 6.502393915729112e-07, "loss": 0.6628, "step": 15205 }, { "epoch": 0.62, "learning_rate": 6.496356634084167e-07, "loss": 0.6933, "step": 15210 }, { "epoch": 0.62, "learning_rate": 6.490320807822207e-07, "loss": 0.6308, "step": 15215 }, { "epoch": 0.62, "learning_rate": 6.484286439450464e-07, "loss": 0.6538, "step": 15220 }, { "epoch": 0.62, "learning_rate": 6.478253531475557e-07, "loss": 0.6549, "step": 15225 }, { "epoch": 0.62, "learning_rate": 6.472222086403509e-07, "loss": 0.6538, "step": 15230 }, { "epoch": 0.62, "learning_rate": 6.466192106739725e-07, "loss": 0.6725, "step": 15235 }, { "epoch": 0.62, "learning_rate": 6.46016359498901e-07, "loss": 0.6694, "step": 15240 }, { "epoch": 0.62, "learning_rate": 6.454136553655553e-07, "loss": 0.6556, "step": 15245 }, { "epoch": 0.62, "learning_rate": 6.448110985242934e-07, "loss": 0.6585, "step": 15250 }, { "epoch": 0.62, "learning_rate": 6.442086892254126e-07, "loss": 0.6255, "step": 15255 }, { "epoch": 0.62, "learning_rate": 6.43606427719148e-07, "loss": 0.6198, "step": 15260 }, { "epoch": 0.62, "learning_rate": 6.43004314255674e-07, "loss": 0.647, "step": 15265 }, { "epoch": 0.62, "learning_rate": 6.424023490851031e-07, "loss": 0.6542, "step": 15270 }, { "epoch": 0.62, "learning_rate": 6.418005324574867e-07, "loss": 0.6648, "step": 15275 }, { "epoch": 0.62, "learning_rate": 6.411988646228142e-07, "loss": 0.6717, "step": 15280 }, { "epoch": 0.62, "learning_rate": 6.40597345831013e-07, "loss": 0.6903, "step": 15285 }, { "epoch": 0.62, "learning_rate": 6.399959763319487e-07, "loss": 0.6067, "step": 15290 }, { "epoch": 0.62, "learning_rate": 6.393947563754251e-07, "loss": 0.6792, "step": 15295 }, { "epoch": 0.62, "learning_rate": 6.387936862111838e-07, "loss": 0.6527, "step": 15300 }, { "epoch": 0.62, "learning_rate": 6.381927660889042e-07, "loss": 0.6708, "step": 15305 }, { "epoch": 0.62, "learning_rate": 6.375919962582028e-07, "loss": 0.6397, "step": 15310 }, { "epoch": 0.62, "learning_rate": 6.369913769686349e-07, "loss": 0.6297, "step": 15315 }, { "epoch": 0.62, "learning_rate": 6.363909084696916e-07, "loss": 0.6562, "step": 15320 }, { "epoch": 0.62, "learning_rate": 6.35790591010803e-07, "loss": 0.6346, "step": 15325 }, { "epoch": 0.62, "learning_rate": 6.351904248413359e-07, "loss": 0.6526, "step": 15330 }, { "epoch": 0.62, "learning_rate": 6.345904102105934e-07, "loss": 0.6721, "step": 15335 }, { "epoch": 0.62, "learning_rate": 6.339905473678171e-07, "loss": 0.6165, "step": 15340 }, { "epoch": 0.62, "learning_rate": 6.333908365621842e-07, "loss": 0.6632, "step": 15345 }, { "epoch": 0.62, "learning_rate": 6.327912780428095e-07, "loss": 0.6743, "step": 15350 }, { "epoch": 0.62, "learning_rate": 6.321918720587449e-07, "loss": 0.6507, "step": 15355 }, { "epoch": 0.62, "learning_rate": 6.315926188589776e-07, "loss": 0.6477, "step": 15360 }, { "epoch": 0.62, "learning_rate": 6.309935186924329e-07, "loss": 0.6308, "step": 15365 }, { "epoch": 0.62, "learning_rate": 6.303945718079713e-07, "loss": 0.6621, "step": 15370 }, { "epoch": 0.62, "learning_rate": 6.297957784543903e-07, "loss": 0.6398, "step": 15375 }, { "epoch": 0.62, "learning_rate": 6.291971388804238e-07, "loss": 0.668, "step": 15380 }, { "epoch": 0.62, "learning_rate": 6.28598653334741e-07, "loss": 0.6778, "step": 15385 }, { "epoch": 0.63, "learning_rate": 6.280003220659476e-07, "loss": 0.6458, "step": 15390 }, { "epoch": 0.63, "learning_rate": 6.274021453225854e-07, "loss": 0.6483, "step": 15395 }, { "epoch": 0.63, "learning_rate": 6.268041233531321e-07, "loss": 0.6463, "step": 15400 }, { "epoch": 0.63, "learning_rate": 6.262062564059999e-07, "loss": 0.7068, "step": 15405 }, { "epoch": 0.63, "learning_rate": 6.256085447295383e-07, "loss": 0.645, "step": 15410 }, { "epoch": 0.63, "learning_rate": 6.250109885720316e-07, "loss": 0.6908, "step": 15415 }, { "epoch": 0.63, "learning_rate": 6.24413588181699e-07, "loss": 0.6441, "step": 15420 }, { "epoch": 0.63, "learning_rate": 6.238163438066959e-07, "loss": 0.6636, "step": 15425 }, { "epoch": 0.63, "learning_rate": 6.232192556951116e-07, "loss": 0.6329, "step": 15430 }, { "epoch": 0.63, "learning_rate": 6.22622324094972e-07, "loss": 0.6181, "step": 15435 }, { "epoch": 0.63, "learning_rate": 6.220255492542374e-07, "loss": 0.6664, "step": 15440 }, { "epoch": 0.63, "learning_rate": 6.214289314208023e-07, "loss": 0.6294, "step": 15445 }, { "epoch": 0.63, "learning_rate": 6.208324708424975e-07, "loss": 0.6518, "step": 15450 }, { "epoch": 0.63, "learning_rate": 6.202361677670861e-07, "loss": 0.7155, "step": 15455 }, { "epoch": 0.63, "learning_rate": 6.196400224422682e-07, "loss": 0.6748, "step": 15460 }, { "epoch": 0.63, "learning_rate": 6.190440351156776e-07, "loss": 0.655, "step": 15465 }, { "epoch": 0.63, "learning_rate": 6.184482060348815e-07, "loss": 0.7019, "step": 15470 }, { "epoch": 0.63, "learning_rate": 6.178525354473829e-07, "loss": 0.6503, "step": 15475 }, { "epoch": 0.63, "learning_rate": 6.172570236006173e-07, "loss": 0.6535, "step": 15480 }, { "epoch": 0.63, "learning_rate": 6.166616707419556e-07, "loss": 0.6511, "step": 15485 }, { "epoch": 0.63, "learning_rate": 6.160664771187025e-07, "loss": 0.672, "step": 15490 }, { "epoch": 0.63, "learning_rate": 6.154714429780958e-07, "loss": 0.6731, "step": 15495 }, { "epoch": 0.63, "learning_rate": 6.148765685673078e-07, "loss": 0.6348, "step": 15500 }, { "epoch": 0.63, "learning_rate": 6.142818541334438e-07, "loss": 0.6413, "step": 15505 }, { "epoch": 0.63, "learning_rate": 6.136872999235431e-07, "loss": 0.6691, "step": 15510 }, { "epoch": 0.63, "learning_rate": 6.130929061845789e-07, "loss": 0.6727, "step": 15515 }, { "epoch": 0.63, "learning_rate": 6.124986731634565e-07, "loss": 0.6383, "step": 15520 }, { "epoch": 0.63, "learning_rate": 6.119046011070156e-07, "loss": 0.6822, "step": 15525 }, { "epoch": 0.63, "learning_rate": 6.113106902620282e-07, "loss": 0.6817, "step": 15530 }, { "epoch": 0.63, "learning_rate": 6.107169408752003e-07, "loss": 0.6315, "step": 15535 }, { "epoch": 0.63, "learning_rate": 6.101233531931688e-07, "loss": 0.6421, "step": 15540 }, { "epoch": 0.63, "learning_rate": 6.095299274625064e-07, "loss": 0.6542, "step": 15545 }, { "epoch": 0.63, "learning_rate": 6.089366639297166e-07, "loss": 0.6401, "step": 15550 }, { "epoch": 0.63, "learning_rate": 6.083435628412355e-07, "loss": 0.6219, "step": 15555 }, { "epoch": 0.63, "learning_rate": 6.077506244434328e-07, "loss": 0.676, "step": 15560 }, { "epoch": 0.63, "learning_rate": 6.07157848982609e-07, "loss": 0.6691, "step": 15565 }, { "epoch": 0.63, "learning_rate": 6.065652367049985e-07, "loss": 0.6419, "step": 15570 }, { "epoch": 0.63, "learning_rate": 6.059727878567675e-07, "loss": 0.6513, "step": 15575 }, { "epoch": 0.63, "learning_rate": 6.053805026840136e-07, "loss": 0.6663, "step": 15580 }, { "epoch": 0.63, "learning_rate": 6.047883814327675e-07, "loss": 0.6643, "step": 15585 }, { "epoch": 0.63, "learning_rate": 6.041964243489901e-07, "loss": 0.6548, "step": 15590 }, { "epoch": 0.63, "learning_rate": 6.036046316785761e-07, "loss": 0.6474, "step": 15595 }, { "epoch": 0.63, "learning_rate": 6.030130036673514e-07, "loss": 0.6343, "step": 15600 }, { "epoch": 0.63, "learning_rate": 6.024215405610722e-07, "loss": 0.6524, "step": 15605 }, { "epoch": 0.63, "learning_rate": 6.018302426054278e-07, "loss": 0.6536, "step": 15610 }, { "epoch": 0.63, "learning_rate": 6.012391100460377e-07, "loss": 0.6917, "step": 15615 }, { "epoch": 0.63, "learning_rate": 6.006481431284533e-07, "loss": 0.6504, "step": 15620 }, { "epoch": 0.63, "learning_rate": 6.000573420981578e-07, "loss": 0.6773, "step": 15625 }, { "epoch": 0.63, "learning_rate": 5.994667072005641e-07, "loss": 0.6295, "step": 15630 }, { "epoch": 0.64, "learning_rate": 5.988762386810173e-07, "loss": 0.6617, "step": 15635 }, { "epoch": 0.64, "learning_rate": 5.982859367847921e-07, "loss": 0.642, "step": 15640 }, { "epoch": 0.64, "learning_rate": 5.976958017570954e-07, "loss": 0.6194, "step": 15645 }, { "epoch": 0.64, "learning_rate": 5.971058338430643e-07, "loss": 0.6599, "step": 15650 }, { "epoch": 0.64, "learning_rate": 5.965160332877661e-07, "loss": 0.657, "step": 15655 }, { "epoch": 0.64, "learning_rate": 5.959264003361988e-07, "loss": 0.6272, "step": 15660 }, { "epoch": 0.64, "learning_rate": 5.953369352332904e-07, "loss": 0.66, "step": 15665 }, { "epoch": 0.64, "learning_rate": 5.947476382239007e-07, "loss": 0.6354, "step": 15670 }, { "epoch": 0.64, "learning_rate": 5.94158509552817e-07, "loss": 0.6648, "step": 15675 }, { "epoch": 0.64, "learning_rate": 5.935695494647595e-07, "loss": 0.6793, "step": 15680 }, { "epoch": 0.64, "learning_rate": 5.929807582043768e-07, "loss": 0.637, "step": 15685 }, { "epoch": 0.64, "learning_rate": 5.923921360162471e-07, "loss": 0.6285, "step": 15690 }, { "epoch": 0.64, "learning_rate": 5.9180368314488e-07, "loss": 0.6667, "step": 15695 }, { "epoch": 0.64, "learning_rate": 5.912153998347123e-07, "loss": 0.6879, "step": 15700 }, { "epoch": 0.64, "learning_rate": 5.90627286330113e-07, "loss": 0.6651, "step": 15705 }, { "epoch": 0.64, "learning_rate": 5.900393428753791e-07, "loss": 0.6321, "step": 15710 }, { "epoch": 0.64, "learning_rate": 5.894515697147365e-07, "loss": 0.6323, "step": 15715 }, { "epoch": 0.64, "learning_rate": 5.888639670923419e-07, "loss": 0.662, "step": 15720 }, { "epoch": 0.64, "learning_rate": 5.882765352522795e-07, "loss": 0.609, "step": 15725 }, { "epoch": 0.64, "learning_rate": 5.87689274438564e-07, "loss": 0.6352, "step": 15730 }, { "epoch": 0.64, "learning_rate": 5.871021848951387e-07, "loss": 0.6479, "step": 15735 }, { "epoch": 0.64, "learning_rate": 5.865152668658745e-07, "loss": 0.6526, "step": 15740 }, { "epoch": 0.64, "learning_rate": 5.859285205945732e-07, "loss": 0.6382, "step": 15745 }, { "epoch": 0.64, "learning_rate": 5.85341946324963e-07, "loss": 0.6363, "step": 15750 }, { "epoch": 0.64, "learning_rate": 5.847555443007022e-07, "loss": 0.6941, "step": 15755 }, { "epoch": 0.64, "learning_rate": 5.841693147653774e-07, "loss": 0.6592, "step": 15760 }, { "epoch": 0.64, "learning_rate": 5.835832579625031e-07, "loss": 0.6243, "step": 15765 }, { "epoch": 0.64, "learning_rate": 5.829973741355221e-07, "loss": 0.664, "step": 15770 }, { "epoch": 0.64, "learning_rate": 5.824116635278052e-07, "loss": 0.644, "step": 15775 }, { "epoch": 0.64, "learning_rate": 5.818261263826518e-07, "loss": 0.6605, "step": 15780 }, { "epoch": 0.64, "learning_rate": 5.812407629432891e-07, "loss": 0.6545, "step": 15785 }, { "epoch": 0.64, "learning_rate": 5.806555734528713e-07, "loss": 0.6652, "step": 15790 }, { "epoch": 0.64, "learning_rate": 5.800705581544821e-07, "loss": 0.6178, "step": 15795 }, { "epoch": 0.64, "learning_rate": 5.794857172911309e-07, "loss": 0.6393, "step": 15800 }, { "epoch": 0.64, "learning_rate": 5.789010511057555e-07, "loss": 0.6067, "step": 15805 }, { "epoch": 0.64, "learning_rate": 5.783165598412216e-07, "loss": 0.6481, "step": 15810 }, { "epoch": 0.64, "learning_rate": 5.777322437403218e-07, "loss": 0.6759, "step": 15815 }, { "epoch": 0.64, "learning_rate": 5.771481030457757e-07, "loss": 0.6192, "step": 15820 }, { "epoch": 0.64, "learning_rate": 5.765641380002299e-07, "loss": 0.6652, "step": 15825 }, { "epoch": 0.64, "learning_rate": 5.759803488462593e-07, "loss": 0.6694, "step": 15830 }, { "epoch": 0.64, "learning_rate": 5.753967358263642e-07, "loss": 0.6317, "step": 15835 }, { "epoch": 0.64, "learning_rate": 5.748132991829722e-07, "loss": 0.6434, "step": 15840 }, { "epoch": 0.64, "learning_rate": 5.742300391584384e-07, "loss": 0.6846, "step": 15845 }, { "epoch": 0.64, "learning_rate": 5.736469559950437e-07, "loss": 0.6559, "step": 15850 }, { "epoch": 0.64, "learning_rate": 5.730640499349957e-07, "loss": 0.6625, "step": 15855 }, { "epoch": 0.64, "learning_rate": 5.724813212204283e-07, "loss": 0.633, "step": 15860 }, { "epoch": 0.64, "learning_rate": 5.718987700934024e-07, "loss": 0.6763, "step": 15865 }, { "epoch": 0.64, "learning_rate": 5.713163967959045e-07, "loss": 0.667, "step": 15870 }, { "epoch": 0.64, "learning_rate": 5.70734201569847e-07, "loss": 0.6555, "step": 15875 }, { "epoch": 0.65, "learning_rate": 5.701521846570693e-07, "loss": 0.6228, "step": 15880 }, { "epoch": 0.65, "learning_rate": 5.695703462993362e-07, "loss": 0.6761, "step": 15885 }, { "epoch": 0.65, "learning_rate": 5.689886867383376e-07, "loss": 0.6576, "step": 15890 }, { "epoch": 0.65, "learning_rate": 5.684072062156906e-07, "loss": 0.6484, "step": 15895 }, { "epoch": 0.65, "learning_rate": 5.678259049729368e-07, "loss": 0.6562, "step": 15900 }, { "epoch": 0.65, "learning_rate": 5.672447832515439e-07, "loss": 0.667, "step": 15905 }, { "epoch": 0.65, "learning_rate": 5.66663841292904e-07, "loss": 0.669, "step": 15910 }, { "epoch": 0.65, "learning_rate": 5.660830793383364e-07, "loss": 0.6429, "step": 15915 }, { "epoch": 0.65, "learning_rate": 5.655024976290842e-07, "loss": 0.6619, "step": 15920 }, { "epoch": 0.65, "learning_rate": 5.649220964063154e-07, "loss": 0.662, "step": 15925 }, { "epoch": 0.65, "learning_rate": 5.643418759111247e-07, "loss": 0.6673, "step": 15930 }, { "epoch": 0.65, "learning_rate": 5.637618363845299e-07, "loss": 0.6804, "step": 15935 }, { "epoch": 0.65, "learning_rate": 5.631819780674742e-07, "loss": 0.6447, "step": 15940 }, { "epoch": 0.65, "learning_rate": 5.626023012008265e-07, "loss": 0.6468, "step": 15945 }, { "epoch": 0.65, "learning_rate": 5.620228060253791e-07, "loss": 0.6499, "step": 15950 }, { "epoch": 0.65, "learning_rate": 5.614434927818493e-07, "loss": 0.6622, "step": 15955 }, { "epoch": 0.65, "learning_rate": 5.608643617108781e-07, "loss": 0.6222, "step": 15960 }, { "epoch": 0.65, "learning_rate": 5.602854130530326e-07, "loss": 0.6698, "step": 15965 }, { "epoch": 0.65, "learning_rate": 5.597066470488026e-07, "loss": 0.6493, "step": 15970 }, { "epoch": 0.65, "learning_rate": 5.591280639386021e-07, "loss": 0.6689, "step": 15975 }, { "epoch": 0.65, "learning_rate": 5.585496639627702e-07, "loss": 0.6526, "step": 15980 }, { "epoch": 0.65, "learning_rate": 5.579714473615689e-07, "loss": 0.6491, "step": 15985 }, { "epoch": 0.65, "learning_rate": 5.573934143751841e-07, "loss": 0.6763, "step": 15990 }, { "epoch": 0.65, "learning_rate": 5.568155652437257e-07, "loss": 0.6604, "step": 15995 }, { "epoch": 0.65, "learning_rate": 5.562379002072277e-07, "loss": 0.6458, "step": 16000 }, { "epoch": 0.65, "eval_loss": 0.6262282133102417, "eval_runtime": 144.7729, "eval_samples_per_second": 16.343, "eval_steps_per_second": 2.728, "step": 16000 }, { "epoch": 0.65, "learning_rate": 5.556604195056469e-07, "loss": 0.6665, "step": 16005 }, { "epoch": 0.65, "learning_rate": 5.550831233788631e-07, "loss": 0.6382, "step": 16010 }, { "epoch": 0.65, "learning_rate": 5.545060120666811e-07, "loss": 0.6322, "step": 16015 }, { "epoch": 0.65, "learning_rate": 5.539290858088277e-07, "loss": 0.6352, "step": 16020 }, { "epoch": 0.65, "learning_rate": 5.533523448449522e-07, "loss": 0.6883, "step": 16025 }, { "epoch": 0.65, "learning_rate": 5.527757894146288e-07, "loss": 0.6319, "step": 16030 }, { "epoch": 0.65, "learning_rate": 5.521994197573534e-07, "loss": 0.6687, "step": 16035 }, { "epoch": 0.65, "learning_rate": 5.516232361125446e-07, "loss": 0.6326, "step": 16040 }, { "epoch": 0.65, "learning_rate": 5.510472387195439e-07, "loss": 0.6674, "step": 16045 }, { "epoch": 0.65, "learning_rate": 5.504714278176163e-07, "loss": 0.6257, "step": 16050 }, { "epoch": 0.65, "learning_rate": 5.498958036459484e-07, "loss": 0.7107, "step": 16055 }, { "epoch": 0.65, "learning_rate": 5.49320366443649e-07, "loss": 0.6777, "step": 16060 }, { "epoch": 0.65, "learning_rate": 5.487451164497503e-07, "loss": 0.6695, "step": 16065 }, { "epoch": 0.65, "learning_rate": 5.481700539032062e-07, "loss": 0.6443, "step": 16070 }, { "epoch": 0.65, "learning_rate": 5.475951790428918e-07, "loss": 0.6397, "step": 16075 }, { "epoch": 0.65, "learning_rate": 5.47020492107606e-07, "loss": 0.6617, "step": 16080 }, { "epoch": 0.65, "learning_rate": 5.464459933360687e-07, "loss": 0.6453, "step": 16085 }, { "epoch": 0.65, "learning_rate": 5.458716829669215e-07, "loss": 0.6691, "step": 16090 }, { "epoch": 0.65, "learning_rate": 5.452975612387274e-07, "loss": 0.6762, "step": 16095 }, { "epoch": 0.65, "learning_rate": 5.447236283899723e-07, "loss": 0.6542, "step": 16100 }, { "epoch": 0.65, "learning_rate": 5.441498846590629e-07, "loss": 0.6588, "step": 16105 }, { "epoch": 0.65, "learning_rate": 5.435763302843268e-07, "loss": 0.666, "step": 16110 }, { "epoch": 0.65, "learning_rate": 5.43002965504014e-07, "loss": 0.6717, "step": 16115 }, { "epoch": 0.65, "learning_rate": 5.424297905562953e-07, "loss": 0.6642, "step": 16120 }, { "epoch": 0.65, "learning_rate": 5.418568056792624e-07, "loss": 0.6994, "step": 16125 }, { "epoch": 0.66, "learning_rate": 5.412840111109277e-07, "loss": 0.6606, "step": 16130 }, { "epoch": 0.66, "learning_rate": 5.40711407089226e-07, "loss": 0.6669, "step": 16135 }, { "epoch": 0.66, "learning_rate": 5.401389938520118e-07, "loss": 0.6357, "step": 16140 }, { "epoch": 0.66, "learning_rate": 5.395667716370598e-07, "loss": 0.6473, "step": 16145 }, { "epoch": 0.66, "learning_rate": 5.389947406820672e-07, "loss": 0.6316, "step": 16150 }, { "epoch": 0.66, "learning_rate": 5.384229012246501e-07, "loss": 0.6444, "step": 16155 }, { "epoch": 0.66, "learning_rate": 5.378512535023457e-07, "loss": 0.6439, "step": 16160 }, { "epoch": 0.66, "learning_rate": 5.372797977526115e-07, "loss": 0.6778, "step": 16165 }, { "epoch": 0.66, "learning_rate": 5.367085342128257e-07, "loss": 0.6545, "step": 16170 }, { "epoch": 0.66, "learning_rate": 5.361374631202857e-07, "loss": 0.6666, "step": 16175 }, { "epoch": 0.66, "learning_rate": 5.35566584712209e-07, "loss": 0.657, "step": 16180 }, { "epoch": 0.66, "learning_rate": 5.349958992257346e-07, "loss": 0.6504, "step": 16185 }, { "epoch": 0.66, "learning_rate": 5.344254068979199e-07, "loss": 0.6331, "step": 16190 }, { "epoch": 0.66, "learning_rate": 5.338551079657418e-07, "loss": 0.6388, "step": 16195 }, { "epoch": 0.66, "learning_rate": 5.332850026660985e-07, "loss": 0.6554, "step": 16200 }, { "epoch": 0.66, "learning_rate": 5.327150912358065e-07, "loss": 0.6618, "step": 16205 }, { "epoch": 0.66, "learning_rate": 5.321453739116013e-07, "loss": 0.6708, "step": 16210 }, { "epoch": 0.66, "learning_rate": 5.315758509301396e-07, "loss": 0.6671, "step": 16215 }, { "epoch": 0.66, "learning_rate": 5.310065225279957e-07, "loss": 0.6527, "step": 16220 }, { "epoch": 0.66, "learning_rate": 5.304373889416638e-07, "loss": 0.7327, "step": 16225 }, { "epoch": 0.66, "learning_rate": 5.298684504075567e-07, "loss": 0.6462, "step": 16230 }, { "epoch": 0.66, "learning_rate": 5.292997071620072e-07, "loss": 0.6424, "step": 16235 }, { "epoch": 0.66, "learning_rate": 5.28731159441266e-07, "loss": 0.6477, "step": 16240 }, { "epoch": 0.66, "learning_rate": 5.281628074815023e-07, "loss": 0.6995, "step": 16245 }, { "epoch": 0.66, "learning_rate": 5.275946515188055e-07, "loss": 0.6511, "step": 16250 }, { "epoch": 0.66, "learning_rate": 5.270266917891822e-07, "loss": 0.6662, "step": 16255 }, { "epoch": 0.66, "learning_rate": 5.264589285285581e-07, "loss": 0.6326, "step": 16260 }, { "epoch": 0.66, "learning_rate": 5.258913619727765e-07, "loss": 0.6911, "step": 16265 }, { "epoch": 0.66, "learning_rate": 5.253239923576003e-07, "loss": 0.6766, "step": 16270 }, { "epoch": 0.66, "learning_rate": 5.247568199187099e-07, "loss": 0.665, "step": 16275 }, { "epoch": 0.66, "learning_rate": 5.241898448917032e-07, "loss": 0.6645, "step": 16280 }, { "epoch": 0.66, "learning_rate": 5.236230675120974e-07, "loss": 0.6634, "step": 16285 }, { "epoch": 0.66, "learning_rate": 5.230564880153266e-07, "loss": 0.6747, "step": 16290 }, { "epoch": 0.66, "learning_rate": 5.224901066367426e-07, "loss": 0.689, "step": 16295 }, { "epoch": 0.66, "learning_rate": 5.21923923611616e-07, "loss": 0.6862, "step": 16300 }, { "epoch": 0.66, "learning_rate": 5.21357939175134e-07, "loss": 0.6523, "step": 16305 }, { "epoch": 0.66, "learning_rate": 5.207921535624017e-07, "loss": 0.6355, "step": 16310 }, { "epoch": 0.66, "learning_rate": 5.202265670084409e-07, "loss": 0.6314, "step": 16315 }, { "epoch": 0.66, "learning_rate": 5.196611797481922e-07, "loss": 0.6796, "step": 16320 }, { "epoch": 0.66, "learning_rate": 5.190959920165124e-07, "loss": 0.7183, "step": 16325 }, { "epoch": 0.66, "learning_rate": 5.185310040481749e-07, "loss": 0.6641, "step": 16330 }, { "epoch": 0.66, "learning_rate": 5.179662160778716e-07, "loss": 0.659, "step": 16335 }, { "epoch": 0.66, "learning_rate": 5.174016283402101e-07, "loss": 0.6464, "step": 16340 }, { "epoch": 0.66, "learning_rate": 5.16837241069715e-07, "loss": 0.6658, "step": 16345 }, { "epoch": 0.66, "learning_rate": 5.162730545008287e-07, "loss": 0.6686, "step": 16350 }, { "epoch": 0.66, "learning_rate": 5.157090688679087e-07, "loss": 0.6493, "step": 16355 }, { "epoch": 0.66, "learning_rate": 5.1514528440523e-07, "loss": 0.6698, "step": 16360 }, { "epoch": 0.66, "learning_rate": 5.14581701346983e-07, "loss": 0.6519, "step": 16365 }, { "epoch": 0.66, "learning_rate": 5.140183199272765e-07, "loss": 0.6418, "step": 16370 }, { "epoch": 0.67, "learning_rate": 5.134551403801336e-07, "loss": 0.6559, "step": 16375 }, { "epoch": 0.67, "learning_rate": 5.128921629394937e-07, "loss": 0.6312, "step": 16380 }, { "epoch": 0.67, "learning_rate": 5.123293878392136e-07, "loss": 0.6356, "step": 16385 }, { "epoch": 0.67, "learning_rate": 5.11766815313065e-07, "loss": 0.6799, "step": 16390 }, { "epoch": 0.67, "learning_rate": 5.112044455947352e-07, "loss": 0.6431, "step": 16395 }, { "epoch": 0.67, "learning_rate": 5.106422789178275e-07, "loss": 0.6758, "step": 16400 }, { "epoch": 0.67, "learning_rate": 5.100803155158621e-07, "loss": 0.6414, "step": 16405 }, { "epoch": 0.67, "learning_rate": 5.09518555622273e-07, "loss": 0.679, "step": 16410 }, { "epoch": 0.67, "learning_rate": 5.089569994704099e-07, "loss": 0.6796, "step": 16415 }, { "epoch": 0.67, "learning_rate": 5.083956472935395e-07, "loss": 0.6583, "step": 16420 }, { "epoch": 0.67, "learning_rate": 5.07834499324842e-07, "loss": 0.6481, "step": 16425 }, { "epoch": 0.67, "learning_rate": 5.072735557974128e-07, "loss": 0.6472, "step": 16430 }, { "epoch": 0.67, "learning_rate": 5.067128169442642e-07, "loss": 0.6412, "step": 16435 }, { "epoch": 0.67, "learning_rate": 5.061522829983215e-07, "loss": 0.6273, "step": 16440 }, { "epoch": 0.67, "learning_rate": 5.055919541924258e-07, "loss": 0.6899, "step": 16445 }, { "epoch": 0.67, "learning_rate": 5.050318307593322e-07, "loss": 0.6384, "step": 16450 }, { "epoch": 0.67, "learning_rate": 5.044719129317121e-07, "loss": 0.6764, "step": 16455 }, { "epoch": 0.67, "learning_rate": 5.039122009421501e-07, "loss": 0.635, "step": 16460 }, { "epoch": 0.67, "learning_rate": 5.033526950231452e-07, "loss": 0.6379, "step": 16465 }, { "epoch": 0.67, "learning_rate": 5.027933954071121e-07, "loss": 0.656, "step": 16470 }, { "epoch": 0.67, "learning_rate": 5.022343023263789e-07, "loss": 0.667, "step": 16475 }, { "epoch": 0.67, "learning_rate": 5.016754160131871e-07, "loss": 0.6475, "step": 16480 }, { "epoch": 0.67, "learning_rate": 5.011167366996942e-07, "loss": 0.6578, "step": 16485 }, { "epoch": 0.67, "learning_rate": 5.005582646179705e-07, "loss": 0.642, "step": 16490 }, { "epoch": 0.67, "learning_rate": 5.000000000000002e-07, "loss": 0.655, "step": 16495 }, { "epoch": 0.67, "learning_rate": 4.994419430776813e-07, "loss": 0.6781, "step": 16500 }, { "epoch": 0.67, "learning_rate": 4.988840940828266e-07, "loss": 0.6668, "step": 16505 }, { "epoch": 0.67, "learning_rate": 4.983264532471613e-07, "loss": 0.6634, "step": 16510 }, { "epoch": 0.67, "learning_rate": 4.977690208023243e-07, "loss": 0.6537, "step": 16515 }, { "epoch": 0.67, "learning_rate": 4.972117969798687e-07, "loss": 0.6613, "step": 16520 }, { "epoch": 0.67, "learning_rate": 4.966547820112602e-07, "loss": 0.618, "step": 16525 }, { "epoch": 0.67, "learning_rate": 4.96097976127878e-07, "loss": 0.6319, "step": 16530 }, { "epoch": 0.67, "learning_rate": 4.955413795610139e-07, "loss": 0.63, "step": 16535 }, { "epoch": 0.67, "learning_rate": 4.949849925418741e-07, "loss": 0.6434, "step": 16540 }, { "epoch": 0.67, "learning_rate": 4.944288153015767e-07, "loss": 0.6651, "step": 16545 }, { "epoch": 0.67, "learning_rate": 4.938728480711523e-07, "loss": 0.641, "step": 16550 }, { "epoch": 0.67, "learning_rate": 4.933170910815456e-07, "loss": 0.6605, "step": 16555 }, { "epoch": 0.67, "learning_rate": 4.927615445636131e-07, "loss": 0.658, "step": 16560 }, { "epoch": 0.67, "learning_rate": 4.922062087481233e-07, "loss": 0.6616, "step": 16565 }, { "epoch": 0.67, "learning_rate": 4.916510838657589e-07, "loss": 0.6089, "step": 16570 }, { "epoch": 0.67, "learning_rate": 4.910961701471135e-07, "loss": 0.6732, "step": 16575 }, { "epoch": 0.67, "learning_rate": 4.905414678226932e-07, "loss": 0.6555, "step": 16580 }, { "epoch": 0.67, "learning_rate": 4.899869771229164e-07, "loss": 0.6712, "step": 16585 }, { "epoch": 0.67, "learning_rate": 4.894326982781144e-07, "loss": 0.6536, "step": 16590 }, { "epoch": 0.67, "learning_rate": 4.888786315185295e-07, "loss": 0.6191, "step": 16595 }, { "epoch": 0.67, "learning_rate": 4.883247770743156e-07, "loss": 0.6292, "step": 16600 }, { "epoch": 0.67, "learning_rate": 4.8777113517554e-07, "loss": 0.6654, "step": 16605 }, { "epoch": 0.67, "learning_rate": 4.872177060521803e-07, "loss": 0.6175, "step": 16610 }, { "epoch": 0.67, "learning_rate": 4.866644899341258e-07, "loss": 0.6802, "step": 16615 }, { "epoch": 0.68, "learning_rate": 4.861114870511783e-07, "loss": 0.6134, "step": 16620 }, { "epoch": 0.68, "learning_rate": 4.855586976330501e-07, "loss": 0.6653, "step": 16625 }, { "epoch": 0.68, "learning_rate": 4.850061219093651e-07, "loss": 0.691, "step": 16630 }, { "epoch": 0.68, "learning_rate": 4.844537601096582e-07, "loss": 0.6575, "step": 16635 }, { "epoch": 0.68, "learning_rate": 4.839016124633762e-07, "loss": 0.6805, "step": 16640 }, { "epoch": 0.68, "learning_rate": 4.833496791998762e-07, "loss": 0.6172, "step": 16645 }, { "epoch": 0.68, "learning_rate": 4.827979605484261e-07, "loss": 0.647, "step": 16650 }, { "epoch": 0.68, "learning_rate": 4.822464567382056e-07, "loss": 0.6365, "step": 16655 }, { "epoch": 0.68, "learning_rate": 4.816951679983046e-07, "loss": 0.6272, "step": 16660 }, { "epoch": 0.68, "learning_rate": 4.811440945577232e-07, "loss": 0.6359, "step": 16665 }, { "epoch": 0.68, "learning_rate": 4.805932366453725e-07, "loss": 0.6765, "step": 16670 }, { "epoch": 0.68, "learning_rate": 4.800425944900747e-07, "loss": 0.6651, "step": 16675 }, { "epoch": 0.68, "learning_rate": 4.794921683205613e-07, "loss": 0.644, "step": 16680 }, { "epoch": 0.68, "learning_rate": 4.789419583654742e-07, "loss": 0.6639, "step": 16685 }, { "epoch": 0.68, "learning_rate": 4.783919648533668e-07, "loss": 0.6714, "step": 16690 }, { "epoch": 0.68, "learning_rate": 4.778421880127009e-07, "loss": 0.6429, "step": 16695 }, { "epoch": 0.68, "learning_rate": 4.772926280718488e-07, "loss": 0.6389, "step": 16700 }, { "epoch": 0.68, "learning_rate": 4.767432852590939e-07, "loss": 0.6473, "step": 16705 }, { "epoch": 0.68, "learning_rate": 4.7619415980262765e-07, "loss": 0.6766, "step": 16710 }, { "epoch": 0.68, "learning_rate": 4.756452519305523e-07, "loss": 0.6198, "step": 16715 }, { "epoch": 0.68, "learning_rate": 4.7509656187087886e-07, "loss": 0.6646, "step": 16720 }, { "epoch": 0.68, "learning_rate": 4.745480898515293e-07, "loss": 0.6313, "step": 16725 }, { "epoch": 0.68, "learning_rate": 4.7399983610033346e-07, "loss": 0.6982, "step": 16730 }, { "epoch": 0.68, "learning_rate": 4.734518008450311e-07, "loss": 0.706, "step": 16735 }, { "epoch": 0.68, "learning_rate": 4.7290398431327207e-07, "loss": 0.6684, "step": 16740 }, { "epoch": 0.68, "learning_rate": 4.7235638673261404e-07, "loss": 0.6462, "step": 16745 }, { "epoch": 0.68, "learning_rate": 4.7180900833052394e-07, "loss": 0.6975, "step": 16750 }, { "epoch": 0.68, "learning_rate": 4.7126184933437873e-07, "loss": 0.6282, "step": 16755 }, { "epoch": 0.68, "learning_rate": 4.7071490997146323e-07, "loss": 0.6415, "step": 16760 }, { "epoch": 0.68, "learning_rate": 4.7016819046897126e-07, "loss": 0.6512, "step": 16765 }, { "epoch": 0.68, "learning_rate": 4.6962169105400495e-07, "loss": 0.6839, "step": 16770 }, { "epoch": 0.68, "learning_rate": 4.6907541195357613e-07, "loss": 0.6516, "step": 16775 }, { "epoch": 0.68, "learning_rate": 4.6852935339460407e-07, "loss": 0.6369, "step": 16780 }, { "epoch": 0.68, "learning_rate": 4.6798351560391636e-07, "loss": 0.6695, "step": 16785 }, { "epoch": 0.68, "learning_rate": 4.674378988082499e-07, "loss": 0.6873, "step": 16790 }, { "epoch": 0.68, "learning_rate": 4.66892503234249e-07, "loss": 0.6493, "step": 16795 }, { "epoch": 0.68, "learning_rate": 4.6634732910846563e-07, "loss": 0.6268, "step": 16800 }, { "epoch": 0.68, "eval_loss": 0.6238049268722534, "eval_runtime": 144.7581, "eval_samples_per_second": 16.345, "eval_steps_per_second": 2.729, "step": 16800 }, { "epoch": 0.68, "learning_rate": 4.6580237665736135e-07, "loss": 0.6724, "step": 16805 }, { "epoch": 0.68, "learning_rate": 4.652576461073042e-07, "loss": 0.6617, "step": 16810 }, { "epoch": 0.68, "learning_rate": 4.6471313768457053e-07, "loss": 0.6567, "step": 16815 }, { "epoch": 0.68, "learning_rate": 4.6416885161534394e-07, "loss": 0.6866, "step": 16820 }, { "epoch": 0.68, "learning_rate": 4.636247881257174e-07, "loss": 0.661, "step": 16825 }, { "epoch": 0.68, "learning_rate": 4.630809474416885e-07, "loss": 0.6815, "step": 16830 }, { "epoch": 0.68, "learning_rate": 4.625373297891647e-07, "loss": 0.5876, "step": 16835 }, { "epoch": 0.68, "learning_rate": 4.619939353939606e-07, "loss": 0.7043, "step": 16840 }, { "epoch": 0.68, "learning_rate": 4.6145076448179696e-07, "loss": 0.6551, "step": 16845 }, { "epoch": 0.68, "learning_rate": 4.609078172783023e-07, "loss": 0.658, "step": 16850 }, { "epoch": 0.68, "learning_rate": 4.603650940090118e-07, "loss": 0.6219, "step": 16855 }, { "epoch": 0.68, "learning_rate": 4.598225948993687e-07, "loss": 0.6702, "step": 16860 }, { "epoch": 0.69, "learning_rate": 4.5928032017472216e-07, "loss": 0.636, "step": 16865 }, { "epoch": 0.69, "learning_rate": 4.587382700603278e-07, "loss": 0.6706, "step": 16870 }, { "epoch": 0.69, "learning_rate": 4.581964447813499e-07, "loss": 0.6071, "step": 16875 }, { "epoch": 0.69, "learning_rate": 4.5765484456285617e-07, "loss": 0.6504, "step": 16880 }, { "epoch": 0.69, "learning_rate": 4.5711346962982366e-07, "loss": 0.6407, "step": 16885 }, { "epoch": 0.69, "learning_rate": 4.56572320207135e-07, "loss": 0.6647, "step": 16890 }, { "epoch": 0.69, "learning_rate": 4.5603139651957866e-07, "loss": 0.6111, "step": 16895 }, { "epoch": 0.69, "learning_rate": 4.5549069879184965e-07, "loss": 0.6486, "step": 16900 }, { "epoch": 0.69, "learning_rate": 4.549502272485487e-07, "loss": 0.638, "step": 16905 }, { "epoch": 0.69, "learning_rate": 4.544099821141838e-07, "loss": 0.6168, "step": 16910 }, { "epoch": 0.69, "learning_rate": 4.538699636131675e-07, "loss": 0.6655, "step": 16915 }, { "epoch": 0.69, "learning_rate": 4.533301719698187e-07, "loss": 0.6584, "step": 16920 }, { "epoch": 0.69, "learning_rate": 4.5279060740836305e-07, "loss": 0.6485, "step": 16925 }, { "epoch": 0.69, "learning_rate": 4.522512701529296e-07, "loss": 0.6726, "step": 16930 }, { "epoch": 0.69, "learning_rate": 4.517121604275551e-07, "loss": 0.6818, "step": 16935 }, { "epoch": 0.69, "learning_rate": 4.5117327845618136e-07, "loss": 0.6737, "step": 16940 }, { "epoch": 0.69, "learning_rate": 4.50634624462655e-07, "loss": 0.6375, "step": 16945 }, { "epoch": 0.69, "learning_rate": 4.5009619867072803e-07, "loss": 0.6622, "step": 16950 }, { "epoch": 0.69, "learning_rate": 4.4955800130405763e-07, "loss": 0.6516, "step": 16955 }, { "epoch": 0.69, "learning_rate": 4.4902003258620725e-07, "loss": 0.6557, "step": 16960 }, { "epoch": 0.69, "learning_rate": 4.484822927406431e-07, "loss": 0.6293, "step": 16965 }, { "epoch": 0.69, "learning_rate": 4.479447819907383e-07, "loss": 0.6313, "step": 16970 }, { "epoch": 0.69, "learning_rate": 4.474075005597705e-07, "loss": 0.6323, "step": 16975 }, { "epoch": 0.69, "learning_rate": 4.4687044867092147e-07, "loss": 0.6483, "step": 16980 }, { "epoch": 0.69, "learning_rate": 4.463336265472778e-07, "loss": 0.6485, "step": 16985 }, { "epoch": 0.69, "learning_rate": 4.4579703441183025e-07, "loss": 0.6649, "step": 16990 }, { "epoch": 0.69, "learning_rate": 4.4526067248747557e-07, "loss": 0.6729, "step": 16995 }, { "epoch": 0.69, "learning_rate": 4.447245409970132e-07, "loss": 0.6805, "step": 17000 }, { "epoch": 0.69, "learning_rate": 4.441886401631472e-07, "loss": 0.6739, "step": 17005 }, { "epoch": 0.69, "learning_rate": 4.4365297020848727e-07, "loss": 0.6435, "step": 17010 }, { "epoch": 0.69, "learning_rate": 4.431175313555445e-07, "loss": 0.6506, "step": 17015 }, { "epoch": 0.69, "learning_rate": 4.4258232382673634e-07, "loss": 0.6641, "step": 17020 }, { "epoch": 0.69, "learning_rate": 4.4204734784438367e-07, "loss": 0.6448, "step": 17025 }, { "epoch": 0.69, "learning_rate": 4.4151260363071064e-07, "loss": 0.6651, "step": 17030 }, { "epoch": 0.69, "learning_rate": 4.40978091407845e-07, "loss": 0.6762, "step": 17035 }, { "epoch": 0.69, "learning_rate": 4.404438113978185e-07, "loss": 0.6604, "step": 17040 }, { "epoch": 0.69, "learning_rate": 4.3990976382256694e-07, "loss": 0.6538, "step": 17045 }, { "epoch": 0.69, "learning_rate": 4.393759489039287e-07, "loss": 0.631, "step": 17050 }, { "epoch": 0.69, "learning_rate": 4.388423668636455e-07, "loss": 0.6838, "step": 17055 }, { "epoch": 0.69, "learning_rate": 4.3830901792336374e-07, "loss": 0.6582, "step": 17060 }, { "epoch": 0.69, "learning_rate": 4.377759023046306e-07, "loss": 0.6154, "step": 17065 }, { "epoch": 0.69, "learning_rate": 4.372430202288981e-07, "loss": 0.646, "step": 17070 }, { "epoch": 0.69, "learning_rate": 4.3671037191752157e-07, "loss": 0.6777, "step": 17075 }, { "epoch": 0.69, "learning_rate": 4.361779575917579e-07, "loss": 0.6359, "step": 17080 }, { "epoch": 0.69, "learning_rate": 4.356457774727674e-07, "loss": 0.6211, "step": 17085 }, { "epoch": 0.69, "learning_rate": 4.3511383178161263e-07, "loss": 0.6075, "step": 17090 }, { "epoch": 0.69, "learning_rate": 4.345821207392605e-07, "loss": 0.6124, "step": 17095 }, { "epoch": 0.69, "learning_rate": 4.340506445665775e-07, "loss": 0.6473, "step": 17100 }, { "epoch": 0.69, "learning_rate": 4.33519403484335e-07, "loss": 0.6244, "step": 17105 }, { "epoch": 0.69, "learning_rate": 4.3298839771320674e-07, "loss": 0.6249, "step": 17110 }, { "epoch": 0.7, "learning_rate": 4.3245762747376635e-07, "loss": 0.6282, "step": 17115 }, { "epoch": 0.7, "learning_rate": 4.3192709298649223e-07, "loss": 0.6779, "step": 17120 }, { "epoch": 0.7, "learning_rate": 4.3139679447176313e-07, "loss": 0.6534, "step": 17125 }, { "epoch": 0.7, "learning_rate": 4.3086673214986114e-07, "loss": 0.6536, "step": 17130 }, { "epoch": 0.7, "learning_rate": 4.303369062409693e-07, "loss": 0.6452, "step": 17135 }, { "epoch": 0.7, "learning_rate": 4.2980731696517203e-07, "loss": 0.6267, "step": 17140 }, { "epoch": 0.7, "learning_rate": 4.292779645424576e-07, "loss": 0.633, "step": 17145 }, { "epoch": 0.7, "learning_rate": 4.2874884919271257e-07, "loss": 0.6706, "step": 17150 }, { "epoch": 0.7, "learning_rate": 4.282199711357277e-07, "loss": 0.6658, "step": 17155 }, { "epoch": 0.7, "learning_rate": 4.276913305911952e-07, "loss": 0.6152, "step": 17160 }, { "epoch": 0.7, "learning_rate": 4.2716292777870624e-07, "loss": 0.6495, "step": 17165 }, { "epoch": 0.7, "learning_rate": 4.266347629177558e-07, "loss": 0.6657, "step": 17170 }, { "epoch": 0.7, "learning_rate": 4.2610683622773815e-07, "loss": 0.6627, "step": 17175 }, { "epoch": 0.7, "learning_rate": 4.255791479279504e-07, "loss": 0.6713, "step": 17180 }, { "epoch": 0.7, "learning_rate": 4.250516982375891e-07, "loss": 0.6723, "step": 17185 }, { "epoch": 0.7, "learning_rate": 4.245244873757521e-07, "loss": 0.6523, "step": 17190 }, { "epoch": 0.7, "learning_rate": 4.2399751556143903e-07, "loss": 0.6366, "step": 17195 }, { "epoch": 0.7, "learning_rate": 4.2347078301354824e-07, "loss": 0.6277, "step": 17200 }, { "epoch": 0.7, "learning_rate": 4.229442899508804e-07, "loss": 0.6854, "step": 17205 }, { "epoch": 0.7, "learning_rate": 4.224180365921366e-07, "loss": 0.648, "step": 17210 }, { "epoch": 0.7, "learning_rate": 4.2189202315591744e-07, "loss": 0.6133, "step": 17215 }, { "epoch": 0.7, "learning_rate": 4.2136624986072435e-07, "loss": 0.6783, "step": 17220 }, { "epoch": 0.7, "learning_rate": 4.208407169249587e-07, "loss": 0.6617, "step": 17225 }, { "epoch": 0.7, "learning_rate": 4.2031542456692303e-07, "loss": 0.639, "step": 17230 }, { "epoch": 0.7, "learning_rate": 4.197903730048188e-07, "loss": 0.6371, "step": 17235 }, { "epoch": 0.7, "learning_rate": 4.192655624567475e-07, "loss": 0.636, "step": 17240 }, { "epoch": 0.7, "learning_rate": 4.18740993140712e-07, "loss": 0.6729, "step": 17245 }, { "epoch": 0.7, "learning_rate": 4.182166652746123e-07, "loss": 0.6432, "step": 17250 }, { "epoch": 0.7, "learning_rate": 4.1769257907625077e-07, "loss": 0.6669, "step": 17255 }, { "epoch": 0.7, "learning_rate": 4.171687347633276e-07, "loss": 0.6517, "step": 17260 }, { "epoch": 0.7, "learning_rate": 4.166451325534437e-07, "loss": 0.6368, "step": 17265 }, { "epoch": 0.7, "learning_rate": 4.161217726640988e-07, "loss": 0.6766, "step": 17270 }, { "epoch": 0.7, "learning_rate": 4.1559865531269135e-07, "loss": 0.6816, "step": 17275 }, { "epoch": 0.7, "learning_rate": 4.150757807165212e-07, "loss": 0.6367, "step": 17280 }, { "epoch": 0.7, "learning_rate": 4.1455314909278405e-07, "loss": 0.6598, "step": 17285 }, { "epoch": 0.7, "learning_rate": 4.1403076065857755e-07, "loss": 0.6619, "step": 17290 }, { "epoch": 0.7, "learning_rate": 4.13508615630898e-07, "loss": 0.656, "step": 17295 }, { "epoch": 0.7, "learning_rate": 4.129867142266384e-07, "loss": 0.6077, "step": 17300 }, { "epoch": 0.7, "learning_rate": 4.124650566625931e-07, "loss": 0.6615, "step": 17305 }, { "epoch": 0.7, "learning_rate": 4.119436431554536e-07, "loss": 0.6512, "step": 17310 }, { "epoch": 0.7, "learning_rate": 4.1142247392181096e-07, "loss": 0.6808, "step": 17315 }, { "epoch": 0.7, "learning_rate": 4.1090154917815423e-07, "loss": 0.667, "step": 17320 }, { "epoch": 0.7, "learning_rate": 4.1038086914087045e-07, "loss": 0.655, "step": 17325 }, { "epoch": 0.7, "learning_rate": 4.0986043402624694e-07, "loss": 0.645, "step": 17330 }, { "epoch": 0.7, "learning_rate": 4.093402440504662e-07, "loss": 0.6364, "step": 17335 }, { "epoch": 0.7, "learning_rate": 4.0882029942961136e-07, "loss": 0.6864, "step": 17340 }, { "epoch": 0.7, "learning_rate": 4.0830060037966386e-07, "loss": 0.6425, "step": 17345 }, { "epoch": 0.7, "learning_rate": 4.077811471165005e-07, "loss": 0.6822, "step": 17350 }, { "epoch": 0.7, "learning_rate": 4.0726193985589884e-07, "loss": 0.6606, "step": 17355 }, { "epoch": 0.71, "learning_rate": 4.0674297881353227e-07, "loss": 0.6377, "step": 17360 }, { "epoch": 0.71, "learning_rate": 4.0622426420497345e-07, "loss": 0.7072, "step": 17365 }, { "epoch": 0.71, "learning_rate": 4.057057962456916e-07, "loss": 0.6939, "step": 17370 }, { "epoch": 0.71, "learning_rate": 4.0518757515105327e-07, "loss": 0.6147, "step": 17375 }, { "epoch": 0.71, "learning_rate": 4.046696011363243e-07, "loss": 0.6398, "step": 17380 }, { "epoch": 0.71, "learning_rate": 4.041518744166651e-07, "loss": 0.6739, "step": 17385 }, { "epoch": 0.71, "learning_rate": 4.0363439520713585e-07, "loss": 0.6546, "step": 17390 }, { "epoch": 0.71, "learning_rate": 4.0311716372269243e-07, "loss": 0.6565, "step": 17395 }, { "epoch": 0.71, "learning_rate": 4.026001801781883e-07, "loss": 0.5946, "step": 17400 }, { "epoch": 0.71, "learning_rate": 4.020834447883743e-07, "loss": 0.62, "step": 17405 }, { "epoch": 0.71, "learning_rate": 4.015669577678973e-07, "loss": 0.689, "step": 17410 }, { "epoch": 0.71, "learning_rate": 4.010507193313025e-07, "loss": 0.6858, "step": 17415 }, { "epoch": 0.71, "learning_rate": 4.005347296930295e-07, "loss": 0.6892, "step": 17420 }, { "epoch": 0.71, "learning_rate": 4.000189890674166e-07, "loss": 0.6309, "step": 17425 }, { "epoch": 0.71, "learning_rate": 3.995034976686986e-07, "loss": 0.6863, "step": 17430 }, { "epoch": 0.71, "learning_rate": 3.9898825571100483e-07, "loss": 0.621, "step": 17435 }, { "epoch": 0.71, "learning_rate": 3.984732634083633e-07, "loss": 0.6637, "step": 17440 }, { "epoch": 0.71, "learning_rate": 3.9795852097469704e-07, "loss": 0.6236, "step": 17445 }, { "epoch": 0.71, "learning_rate": 3.974440286238252e-07, "loss": 0.698, "step": 17450 }, { "epoch": 0.71, "learning_rate": 3.969297865694641e-07, "loss": 0.6756, "step": 17455 }, { "epoch": 0.71, "learning_rate": 3.9641579502522484e-07, "loss": 0.6504, "step": 17460 }, { "epoch": 0.71, "learning_rate": 3.95902054204616e-07, "loss": 0.6308, "step": 17465 }, { "epoch": 0.71, "learning_rate": 3.953885643210395e-07, "loss": 0.6394, "step": 17470 }, { "epoch": 0.71, "learning_rate": 3.948753255877956e-07, "loss": 0.635, "step": 17475 }, { "epoch": 0.71, "learning_rate": 3.9436233821807965e-07, "loss": 0.6351, "step": 17480 }, { "epoch": 0.71, "learning_rate": 3.9384960242498076e-07, "loss": 0.6077, "step": 17485 }, { "epoch": 0.71, "learning_rate": 3.933371184214862e-07, "loss": 0.5905, "step": 17490 }, { "epoch": 0.71, "learning_rate": 3.928248864204764e-07, "loss": 0.6507, "step": 17495 }, { "epoch": 0.71, "learning_rate": 3.923129066347288e-07, "loss": 0.639, "step": 17500 }, { "epoch": 0.71, "learning_rate": 3.9180117927691523e-07, "loss": 0.6503, "step": 17505 }, { "epoch": 0.71, "learning_rate": 3.912897045596022e-07, "loss": 0.651, "step": 17510 }, { "epoch": 0.71, "learning_rate": 3.90778482695253e-07, "loss": 0.6379, "step": 17515 }, { "epoch": 0.71, "learning_rate": 3.9026751389622337e-07, "loss": 0.6312, "step": 17520 }, { "epoch": 0.71, "learning_rate": 3.897567983747663e-07, "loss": 0.6215, "step": 17525 }, { "epoch": 0.71, "learning_rate": 3.892463363430283e-07, "loss": 0.654, "step": 17530 }, { "epoch": 0.71, "learning_rate": 3.8873612801305035e-07, "loss": 0.6402, "step": 17535 }, { "epoch": 0.71, "learning_rate": 3.8822617359676946e-07, "loss": 0.6927, "step": 17540 }, { "epoch": 0.71, "learning_rate": 3.877164733060154e-07, "loss": 0.6587, "step": 17545 }, { "epoch": 0.71, "learning_rate": 3.872070273525143e-07, "loss": 0.6217, "step": 17550 }, { "epoch": 0.71, "learning_rate": 3.866978359478844e-07, "loss": 0.6561, "step": 17555 }, { "epoch": 0.71, "learning_rate": 3.8618889930363984e-07, "loss": 0.6448, "step": 17560 }, { "epoch": 0.71, "learning_rate": 3.856802176311892e-07, "loss": 0.6062, "step": 17565 }, { "epoch": 0.71, "learning_rate": 3.8517179114183306e-07, "loss": 0.6583, "step": 17570 }, { "epoch": 0.71, "learning_rate": 3.8466362004676845e-07, "loss": 0.6847, "step": 17575 }, { "epoch": 0.71, "learning_rate": 3.841557045570849e-07, "loss": 0.6257, "step": 17580 }, { "epoch": 0.71, "learning_rate": 3.8364804488376556e-07, "loss": 0.6344, "step": 17585 }, { "epoch": 0.71, "learning_rate": 3.8314064123768885e-07, "loss": 0.6658, "step": 17590 }, { "epoch": 0.71, "learning_rate": 3.8263349382962483e-07, "loss": 0.6301, "step": 17595 }, { "epoch": 0.71, "learning_rate": 3.821266028702392e-07, "loss": 0.6709, "step": 17600 }, { "epoch": 0.71, "eval_loss": 0.6223161220550537, "eval_runtime": 139.812, "eval_samples_per_second": 16.923, "eval_steps_per_second": 2.825, "step": 17600 }, { "epoch": 0.72, "learning_rate": 3.8161996857008895e-07, "loss": 0.6462, "step": 17605 }, { "epoch": 0.72, "learning_rate": 3.811135911396259e-07, "loss": 0.6432, "step": 17610 }, { "epoch": 0.72, "learning_rate": 3.8060747078919574e-07, "loss": 0.6256, "step": 17615 }, { "epoch": 0.72, "learning_rate": 3.801016077290351e-07, "loss": 0.6452, "step": 17620 }, { "epoch": 0.72, "learning_rate": 3.7959600216927603e-07, "loss": 0.6282, "step": 17625 }, { "epoch": 0.72, "learning_rate": 3.7909065431994227e-07, "loss": 0.6568, "step": 17630 }, { "epoch": 0.72, "learning_rate": 3.785855643909507e-07, "loss": 0.6867, "step": 17635 }, { "epoch": 0.72, "learning_rate": 3.7808073259211194e-07, "loss": 0.622, "step": 17640 }, { "epoch": 0.72, "learning_rate": 3.7757615913312813e-07, "loss": 0.6691, "step": 17645 }, { "epoch": 0.72, "learning_rate": 3.770718442235955e-07, "loss": 0.6732, "step": 17650 }, { "epoch": 0.72, "learning_rate": 3.765677880730009e-07, "loss": 0.6415, "step": 17655 }, { "epoch": 0.72, "learning_rate": 3.7606399089072594e-07, "loss": 0.7083, "step": 17660 }, { "epoch": 0.72, "learning_rate": 3.755604528860431e-07, "loss": 0.6402, "step": 17665 }, { "epoch": 0.72, "learning_rate": 3.750571742681173e-07, "loss": 0.648, "step": 17670 }, { "epoch": 0.72, "learning_rate": 3.7455415524600697e-07, "loss": 0.6585, "step": 17675 }, { "epoch": 0.72, "learning_rate": 3.7405139602866145e-07, "loss": 0.6325, "step": 17680 }, { "epoch": 0.72, "learning_rate": 3.735488968249224e-07, "loss": 0.6709, "step": 17685 }, { "epoch": 0.72, "learning_rate": 3.730466578435235e-07, "loss": 0.6228, "step": 17690 }, { "epoch": 0.72, "learning_rate": 3.725446792930905e-07, "loss": 0.637, "step": 17695 }, { "epoch": 0.72, "learning_rate": 3.7204296138214197e-07, "loss": 0.6506, "step": 17700 }, { "epoch": 0.72, "learning_rate": 3.715415043190856e-07, "loss": 0.7209, "step": 17705 }, { "epoch": 0.72, "learning_rate": 3.710403083122232e-07, "loss": 0.6668, "step": 17710 }, { "epoch": 0.72, "learning_rate": 3.7053937356974717e-07, "loss": 0.6685, "step": 17715 }, { "epoch": 0.72, "learning_rate": 3.700387002997408e-07, "loss": 0.6666, "step": 17720 }, { "epoch": 0.72, "learning_rate": 3.695382887101804e-07, "loss": 0.6703, "step": 17725 }, { "epoch": 0.72, "learning_rate": 3.6903813900893175e-07, "loss": 0.6361, "step": 17730 }, { "epoch": 0.72, "learning_rate": 3.685382514037537e-07, "loss": 0.6159, "step": 17735 }, { "epoch": 0.72, "learning_rate": 3.6803862610229384e-07, "loss": 0.6266, "step": 17740 }, { "epoch": 0.72, "learning_rate": 3.6753926331209294e-07, "loss": 0.6465, "step": 17745 }, { "epoch": 0.72, "learning_rate": 3.670401632405826e-07, "loss": 0.6146, "step": 17750 }, { "epoch": 0.72, "learning_rate": 3.6654132609508325e-07, "loss": 0.6482, "step": 17755 }, { "epoch": 0.72, "learning_rate": 3.660427520828085e-07, "loss": 0.6261, "step": 17760 }, { "epoch": 0.72, "learning_rate": 3.655444414108615e-07, "loss": 0.6514, "step": 17765 }, { "epoch": 0.72, "learning_rate": 3.650463942862356e-07, "loss": 0.6404, "step": 17770 }, { "epoch": 0.72, "learning_rate": 3.645486109158161e-07, "loss": 0.6637, "step": 17775 }, { "epoch": 0.72, "learning_rate": 3.640510915063771e-07, "loss": 0.6227, "step": 17780 }, { "epoch": 0.72, "learning_rate": 3.635538362645849e-07, "loss": 0.6501, "step": 17785 }, { "epoch": 0.72, "learning_rate": 3.6305684539699366e-07, "loss": 0.6559, "step": 17790 }, { "epoch": 0.72, "learning_rate": 3.625601191100496e-07, "loss": 0.648, "step": 17795 }, { "epoch": 0.72, "learning_rate": 3.620636576100894e-07, "loss": 0.672, "step": 17800 }, { "epoch": 0.72, "learning_rate": 3.6156746110333736e-07, "loss": 0.6461, "step": 17805 }, { "epoch": 0.72, "learning_rate": 3.6107152979591015e-07, "loss": 0.6711, "step": 17810 }, { "epoch": 0.72, "learning_rate": 3.6057586389381325e-07, "loss": 0.6374, "step": 17815 }, { "epoch": 0.72, "learning_rate": 3.6008046360294163e-07, "loss": 0.6321, "step": 17820 }, { "epoch": 0.72, "learning_rate": 3.5958532912908015e-07, "loss": 0.607, "step": 17825 }, { "epoch": 0.72, "learning_rate": 3.590904606779035e-07, "loss": 0.6357, "step": 17830 }, { "epoch": 0.72, "learning_rate": 3.5859585845497666e-07, "loss": 0.6354, "step": 17835 }, { "epoch": 0.72, "learning_rate": 3.5810152266575167e-07, "loss": 0.6476, "step": 17840 }, { "epoch": 0.72, "learning_rate": 3.576074535155722e-07, "loss": 0.6301, "step": 17845 }, { "epoch": 0.73, "learning_rate": 3.5711365120966994e-07, "loss": 0.6725, "step": 17850 }, { "epoch": 0.73, "learning_rate": 3.56620115953166e-07, "loss": 0.6659, "step": 17855 }, { "epoch": 0.73, "learning_rate": 3.5612684795107104e-07, "loss": 0.6825, "step": 17860 }, { "epoch": 0.73, "learning_rate": 3.5563384740828406e-07, "loss": 0.6667, "step": 17865 }, { "epoch": 0.73, "learning_rate": 3.5514111452959317e-07, "loss": 0.653, "step": 17870 }, { "epoch": 0.73, "learning_rate": 3.54648649519675e-07, "loss": 0.6709, "step": 17875 }, { "epoch": 0.73, "learning_rate": 3.5415645258309556e-07, "loss": 0.664, "step": 17880 }, { "epoch": 0.73, "learning_rate": 3.536645239243099e-07, "loss": 0.624, "step": 17885 }, { "epoch": 0.73, "learning_rate": 3.531728637476594e-07, "loss": 0.6278, "step": 17890 }, { "epoch": 0.73, "learning_rate": 3.526814722573767e-07, "loss": 0.6323, "step": 17895 }, { "epoch": 0.73, "learning_rate": 3.52190349657581e-07, "loss": 0.6911, "step": 17900 }, { "epoch": 0.73, "learning_rate": 3.5169949615228014e-07, "loss": 0.6741, "step": 17905 }, { "epoch": 0.73, "learning_rate": 3.5120891194537093e-07, "loss": 0.6731, "step": 17910 }, { "epoch": 0.73, "learning_rate": 3.5071859724063756e-07, "loss": 0.6234, "step": 17915 }, { "epoch": 0.73, "learning_rate": 3.5022855224175254e-07, "loss": 0.6702, "step": 17920 }, { "epoch": 0.73, "learning_rate": 3.4973877715227584e-07, "loss": 0.6427, "step": 17925 }, { "epoch": 0.73, "learning_rate": 3.4924927217565614e-07, "loss": 0.6306, "step": 17930 }, { "epoch": 0.73, "learning_rate": 3.487600375152303e-07, "loss": 0.6943, "step": 17935 }, { "epoch": 0.73, "learning_rate": 3.482710733742207e-07, "loss": 0.6889, "step": 17940 }, { "epoch": 0.73, "learning_rate": 3.477823799557399e-07, "loss": 0.6084, "step": 17945 }, { "epoch": 0.73, "learning_rate": 3.4729395746278645e-07, "loss": 0.6656, "step": 17950 }, { "epoch": 0.73, "learning_rate": 3.468058060982468e-07, "loss": 0.658, "step": 17955 }, { "epoch": 0.73, "learning_rate": 3.463179260648946e-07, "loss": 0.649, "step": 17960 }, { "epoch": 0.73, "learning_rate": 3.458303175653916e-07, "loss": 0.6494, "step": 17965 }, { "epoch": 0.73, "learning_rate": 3.4534298080228563e-07, "loss": 0.6672, "step": 17970 }, { "epoch": 0.73, "learning_rate": 3.44855915978012e-07, "loss": 0.6541, "step": 17975 }, { "epoch": 0.73, "learning_rate": 3.443691232948938e-07, "loss": 0.6382, "step": 17980 }, { "epoch": 0.73, "learning_rate": 3.438826029551402e-07, "loss": 0.6651, "step": 17985 }, { "epoch": 0.73, "learning_rate": 3.43396355160847e-07, "loss": 0.6019, "step": 17990 }, { "epoch": 0.73, "learning_rate": 3.429103801139981e-07, "loss": 0.6356, "step": 17995 }, { "epoch": 0.73, "learning_rate": 3.4242467801646303e-07, "loss": 0.6722, "step": 18000 }, { "epoch": 0.73, "learning_rate": 3.4193924906999804e-07, "loss": 0.64, "step": 18005 }, { "epoch": 0.73, "learning_rate": 3.4145409347624597e-07, "loss": 0.6232, "step": 18010 }, { "epoch": 0.73, "learning_rate": 3.409692114367365e-07, "loss": 0.6801, "step": 18015 }, { "epoch": 0.73, "learning_rate": 3.4048460315288607e-07, "loss": 0.6688, "step": 18020 }, { "epoch": 0.73, "learning_rate": 3.400002688259953e-07, "loss": 0.6331, "step": 18025 }, { "epoch": 0.73, "learning_rate": 3.395162086572536e-07, "loss": 0.6097, "step": 18030 }, { "epoch": 0.73, "learning_rate": 3.39032422847735e-07, "loss": 0.6887, "step": 18035 }, { "epoch": 0.73, "learning_rate": 3.385489115983996e-07, "loss": 0.6126, "step": 18040 }, { "epoch": 0.73, "learning_rate": 3.380656751100943e-07, "loss": 0.6639, "step": 18045 }, { "epoch": 0.73, "learning_rate": 3.3758271358355115e-07, "loss": 0.6329, "step": 18050 }, { "epoch": 0.73, "learning_rate": 3.371000272193881e-07, "loss": 0.6381, "step": 18055 }, { "epoch": 0.73, "learning_rate": 3.366176162181086e-07, "loss": 0.674, "step": 18060 }, { "epoch": 0.73, "learning_rate": 3.361354807801021e-07, "loss": 0.6684, "step": 18065 }, { "epoch": 0.73, "learning_rate": 3.3565362110564446e-07, "loss": 0.6402, "step": 18070 }, { "epoch": 0.73, "learning_rate": 3.351720373948945e-07, "loss": 0.6506, "step": 18075 }, { "epoch": 0.73, "learning_rate": 3.346907298478989e-07, "loss": 0.6486, "step": 18080 }, { "epoch": 0.73, "learning_rate": 3.3420969866458823e-07, "loss": 0.6789, "step": 18085 }, { "epoch": 0.73, "learning_rate": 3.337289440447788e-07, "loss": 0.6283, "step": 18090 }, { "epoch": 0.73, "learning_rate": 3.332484661881716e-07, "loss": 0.6729, "step": 18095 }, { "epoch": 0.74, "learning_rate": 3.327682652943534e-07, "loss": 0.6307, "step": 18100 }, { "epoch": 0.74, "learning_rate": 3.322883415627953e-07, "loss": 0.6712, "step": 18105 }, { "epoch": 0.74, "learning_rate": 3.31808695192853e-07, "loss": 0.68, "step": 18110 }, { "epoch": 0.74, "learning_rate": 3.313293263837683e-07, "loss": 0.6577, "step": 18115 }, { "epoch": 0.74, "learning_rate": 3.308502353346663e-07, "loss": 0.6393, "step": 18120 }, { "epoch": 0.74, "learning_rate": 3.3037142224455703e-07, "loss": 0.6464, "step": 18125 }, { "epoch": 0.74, "learning_rate": 3.2989288731233587e-07, "loss": 0.6654, "step": 18130 }, { "epoch": 0.74, "learning_rate": 3.2941463073678186e-07, "loss": 0.6245, "step": 18135 }, { "epoch": 0.74, "learning_rate": 3.289366527165586e-07, "loss": 0.6602, "step": 18140 }, { "epoch": 0.74, "learning_rate": 3.2845895345021345e-07, "loss": 0.6258, "step": 18145 }, { "epoch": 0.74, "learning_rate": 3.279815331361795e-07, "loss": 0.6558, "step": 18150 }, { "epoch": 0.74, "learning_rate": 3.275043919727725e-07, "loss": 0.6414, "step": 18155 }, { "epoch": 0.74, "learning_rate": 3.2702753015819227e-07, "loss": 0.6347, "step": 18160 }, { "epoch": 0.74, "learning_rate": 3.265509478905241e-07, "loss": 0.6173, "step": 18165 }, { "epoch": 0.74, "learning_rate": 3.2607464536773546e-07, "loss": 0.6816, "step": 18170 }, { "epoch": 0.74, "learning_rate": 3.2559862278767815e-07, "loss": 0.6507, "step": 18175 }, { "epoch": 0.74, "learning_rate": 3.251228803480883e-07, "loss": 0.6762, "step": 18180 }, { "epoch": 0.74, "learning_rate": 3.2464741824658504e-07, "loss": 0.6747, "step": 18185 }, { "epoch": 0.74, "learning_rate": 3.2417223668067095e-07, "loss": 0.6143, "step": 18190 }, { "epoch": 0.74, "learning_rate": 3.2369733584773216e-07, "loss": 0.6404, "step": 18195 }, { "epoch": 0.74, "learning_rate": 3.23222715945039e-07, "loss": 0.654, "step": 18200 }, { "epoch": 0.74, "learning_rate": 3.227483771697441e-07, "loss": 0.6367, "step": 18205 }, { "epoch": 0.74, "learning_rate": 3.222743197188834e-07, "loss": 0.6811, "step": 18210 }, { "epoch": 0.74, "learning_rate": 3.2180054378937673e-07, "loss": 0.6155, "step": 18215 }, { "epoch": 0.74, "learning_rate": 3.2132704957802637e-07, "loss": 0.6127, "step": 18220 }, { "epoch": 0.74, "learning_rate": 3.208538372815172e-07, "loss": 0.648, "step": 18225 }, { "epoch": 0.74, "learning_rate": 3.203809070964181e-07, "loss": 0.6523, "step": 18230 }, { "epoch": 0.74, "learning_rate": 3.199082592191801e-07, "loss": 0.651, "step": 18235 }, { "epoch": 0.74, "learning_rate": 3.1943589384613667e-07, "loss": 0.6423, "step": 18240 }, { "epoch": 0.74, "learning_rate": 3.189638111735042e-07, "loss": 0.6186, "step": 18245 }, { "epoch": 0.74, "learning_rate": 3.1849201139738247e-07, "loss": 0.6621, "step": 18250 }, { "epoch": 0.74, "learning_rate": 3.1802049471375234e-07, "loss": 0.6333, "step": 18255 }, { "epoch": 0.74, "learning_rate": 3.175492613184777e-07, "loss": 0.6234, "step": 18260 }, { "epoch": 0.74, "learning_rate": 3.1707831140730537e-07, "loss": 0.6328, "step": 18265 }, { "epoch": 0.74, "learning_rate": 3.1660764517586337e-07, "loss": 0.6621, "step": 18270 }, { "epoch": 0.74, "learning_rate": 3.1613726281966255e-07, "loss": 0.6735, "step": 18275 }, { "epoch": 0.74, "learning_rate": 3.1566716453409536e-07, "loss": 0.6595, "step": 18280 }, { "epoch": 0.74, "learning_rate": 3.15197350514437e-07, "loss": 0.6837, "step": 18285 }, { "epoch": 0.74, "learning_rate": 3.1472782095584396e-07, "loss": 0.66, "step": 18290 }, { "epoch": 0.74, "learning_rate": 3.1425857605335427e-07, "loss": 0.6552, "step": 18295 }, { "epoch": 0.74, "learning_rate": 3.1378961600188903e-07, "loss": 0.6419, "step": 18300 }, { "epoch": 0.74, "learning_rate": 3.1332094099624963e-07, "loss": 0.6407, "step": 18305 }, { "epoch": 0.74, "learning_rate": 3.1285255123111944e-07, "loss": 0.6473, "step": 18310 }, { "epoch": 0.74, "learning_rate": 3.1238444690106394e-07, "loss": 0.6624, "step": 18315 }, { "epoch": 0.74, "learning_rate": 3.1191662820052954e-07, "loss": 0.6523, "step": 18320 }, { "epoch": 0.74, "learning_rate": 3.114490953238438e-07, "loss": 0.6311, "step": 18325 }, { "epoch": 0.74, "learning_rate": 3.1098184846521567e-07, "loss": 0.6484, "step": 18330 }, { "epoch": 0.74, "learning_rate": 3.1051488781873605e-07, "loss": 0.6783, "step": 18335 }, { "epoch": 0.74, "learning_rate": 3.1004821357837594e-07, "loss": 0.6586, "step": 18340 }, { "epoch": 0.75, "learning_rate": 3.0958182593798745e-07, "loss": 0.6285, "step": 18345 }, { "epoch": 0.75, "learning_rate": 3.0911572509130465e-07, "loss": 0.6312, "step": 18350 }, { "epoch": 0.75, "learning_rate": 3.086499112319414e-07, "loss": 0.6531, "step": 18355 }, { "epoch": 0.75, "learning_rate": 3.081843845533925e-07, "loss": 0.6358, "step": 18360 }, { "epoch": 0.75, "learning_rate": 3.0771914524903417e-07, "loss": 0.6551, "step": 18365 }, { "epoch": 0.75, "learning_rate": 3.0725419351212254e-07, "loss": 0.6368, "step": 18370 }, { "epoch": 0.75, "learning_rate": 3.067895295357945e-07, "loss": 0.6914, "step": 18375 }, { "epoch": 0.75, "learning_rate": 3.063251535130672e-07, "loss": 0.6461, "step": 18380 }, { "epoch": 0.75, "learning_rate": 3.058610656368389e-07, "loss": 0.6122, "step": 18385 }, { "epoch": 0.75, "learning_rate": 3.053972660998875e-07, "loss": 0.6502, "step": 18390 }, { "epoch": 0.75, "learning_rate": 3.0493375509487074e-07, "loss": 0.688, "step": 18395 }, { "epoch": 0.75, "learning_rate": 3.044705328143279e-07, "loss": 0.6114, "step": 18400 }, { "epoch": 0.75, "eval_loss": 0.6205956935882568, "eval_runtime": 140.0377, "eval_samples_per_second": 16.895, "eval_steps_per_second": 2.821, "step": 18400 }, { "epoch": 0.75, "learning_rate": 3.040075994506771e-07, "loss": 0.6604, "step": 18405 }, { "epoch": 0.75, "learning_rate": 3.0354495519621693e-07, "loss": 0.6973, "step": 18410 }, { "epoch": 0.75, "learning_rate": 3.030826002431253e-07, "loss": 0.6445, "step": 18415 }, { "epoch": 0.75, "learning_rate": 3.026205347834612e-07, "loss": 0.6553, "step": 18420 }, { "epoch": 0.75, "learning_rate": 3.021587590091622e-07, "loss": 0.6605, "step": 18425 }, { "epoch": 0.75, "learning_rate": 3.0169727311204564e-07, "loss": 0.6498, "step": 18430 }, { "epoch": 0.75, "learning_rate": 3.0123607728380953e-07, "loss": 0.6277, "step": 18435 }, { "epoch": 0.75, "learning_rate": 3.0077517171603007e-07, "loss": 0.6164, "step": 18440 }, { "epoch": 0.75, "learning_rate": 3.0031455660016316e-07, "loss": 0.6513, "step": 18445 }, { "epoch": 0.75, "learning_rate": 2.9985423212754503e-07, "loss": 0.6511, "step": 18450 }, { "epoch": 0.75, "learning_rate": 2.993941984893902e-07, "loss": 0.6822, "step": 18455 }, { "epoch": 0.75, "learning_rate": 2.9893445587679245e-07, "loss": 0.6549, "step": 18460 }, { "epoch": 0.75, "learning_rate": 2.984750044807247e-07, "loss": 0.6606, "step": 18465 }, { "epoch": 0.75, "learning_rate": 2.9801584449203967e-07, "loss": 0.6564, "step": 18470 }, { "epoch": 0.75, "learning_rate": 2.97556976101468e-07, "loss": 0.6355, "step": 18475 }, { "epoch": 0.75, "learning_rate": 2.9709839949961947e-07, "loss": 0.6713, "step": 18480 }, { "epoch": 0.75, "learning_rate": 2.966401148769834e-07, "loss": 0.6468, "step": 18485 }, { "epoch": 0.75, "learning_rate": 2.961821224239268e-07, "loss": 0.6286, "step": 18490 }, { "epoch": 0.75, "learning_rate": 2.957244223306955e-07, "loss": 0.6355, "step": 18495 }, { "epoch": 0.75, "learning_rate": 2.952670147874149e-07, "loss": 0.6472, "step": 18500 }, { "epoch": 0.75, "learning_rate": 2.948098999840877e-07, "loss": 0.6622, "step": 18505 }, { "epoch": 0.75, "learning_rate": 2.943530781105954e-07, "loss": 0.6634, "step": 18510 }, { "epoch": 0.75, "learning_rate": 2.938965493566975e-07, "loss": 0.6532, "step": 18515 }, { "epoch": 0.75, "learning_rate": 2.934403139120327e-07, "loss": 0.6362, "step": 18520 }, { "epoch": 0.75, "learning_rate": 2.9298437196611704e-07, "loss": 0.6463, "step": 18525 }, { "epoch": 0.75, "learning_rate": 2.925287237083445e-07, "loss": 0.6824, "step": 18530 }, { "epoch": 0.75, "learning_rate": 2.9207336932798787e-07, "loss": 0.6852, "step": 18535 }, { "epoch": 0.75, "learning_rate": 2.9161830901419714e-07, "loss": 0.6914, "step": 18540 }, { "epoch": 0.75, "learning_rate": 2.9116354295600056e-07, "loss": 0.6176, "step": 18545 }, { "epoch": 0.75, "learning_rate": 2.907090713423035e-07, "loss": 0.644, "step": 18550 }, { "epoch": 0.75, "learning_rate": 2.9025489436189034e-07, "loss": 0.6502, "step": 18555 }, { "epoch": 0.75, "learning_rate": 2.8980101220342167e-07, "loss": 0.6321, "step": 18560 }, { "epoch": 0.75, "learning_rate": 2.893474250554362e-07, "loss": 0.6527, "step": 18565 }, { "epoch": 0.75, "learning_rate": 2.8889413310635033e-07, "loss": 0.6831, "step": 18570 }, { "epoch": 0.75, "learning_rate": 2.884411365444577e-07, "loss": 0.6418, "step": 18575 }, { "epoch": 0.75, "learning_rate": 2.879884355579285e-07, "loss": 0.6457, "step": 18580 }, { "epoch": 0.75, "learning_rate": 2.875360303348116e-07, "loss": 0.622, "step": 18585 }, { "epoch": 0.76, "learning_rate": 2.870839210630317e-07, "loss": 0.6198, "step": 18590 }, { "epoch": 0.76, "learning_rate": 2.8663210793039114e-07, "loss": 0.6673, "step": 18595 }, { "epoch": 0.76, "learning_rate": 2.8618059112456907e-07, "loss": 0.6307, "step": 18600 }, { "epoch": 0.76, "learning_rate": 2.8572937083312196e-07, "loss": 0.6682, "step": 18605 }, { "epoch": 0.76, "learning_rate": 2.852784472434826e-07, "loss": 0.6635, "step": 18610 }, { "epoch": 0.76, "learning_rate": 2.8482782054296037e-07, "loss": 0.6525, "step": 18615 }, { "epoch": 0.76, "learning_rate": 2.843774909187425e-07, "loss": 0.6445, "step": 18620 }, { "epoch": 0.76, "learning_rate": 2.8392745855789144e-07, "loss": 0.629, "step": 18625 }, { "epoch": 0.76, "learning_rate": 2.8347772364734656e-07, "loss": 0.6769, "step": 18630 }, { "epoch": 0.76, "learning_rate": 2.830282863739244e-07, "loss": 0.6489, "step": 18635 }, { "epoch": 0.76, "learning_rate": 2.8257914692431703e-07, "loss": 0.6738, "step": 18640 }, { "epoch": 0.76, "learning_rate": 2.8213030548509296e-07, "loss": 0.6459, "step": 18645 }, { "epoch": 0.76, "learning_rate": 2.816817622426968e-07, "loss": 0.6447, "step": 18650 }, { "epoch": 0.76, "learning_rate": 2.8123351738345016e-07, "loss": 0.6607, "step": 18655 }, { "epoch": 0.76, "learning_rate": 2.807855710935496e-07, "loss": 0.6474, "step": 18660 }, { "epoch": 0.76, "learning_rate": 2.80337923559068e-07, "loss": 0.6493, "step": 18665 }, { "epoch": 0.76, "learning_rate": 2.798905749659548e-07, "loss": 0.6176, "step": 18670 }, { "epoch": 0.76, "learning_rate": 2.7944352550003425e-07, "loss": 0.6472, "step": 18675 }, { "epoch": 0.76, "learning_rate": 2.7899677534700706e-07, "loss": 0.6353, "step": 18680 }, { "epoch": 0.76, "learning_rate": 2.785503246924489e-07, "loss": 0.6279, "step": 18685 }, { "epoch": 0.76, "learning_rate": 2.781041737218121e-07, "loss": 0.6039, "step": 18690 }, { "epoch": 0.76, "learning_rate": 2.7765832262042364e-07, "loss": 0.6366, "step": 18695 }, { "epoch": 0.76, "learning_rate": 2.772127715734859e-07, "loss": 0.664, "step": 18700 }, { "epoch": 0.76, "learning_rate": 2.7676752076607747e-07, "loss": 0.6687, "step": 18705 }, { "epoch": 0.76, "learning_rate": 2.763225703831513e-07, "loss": 0.6712, "step": 18710 }, { "epoch": 0.76, "learning_rate": 2.7587792060953585e-07, "loss": 0.659, "step": 18715 }, { "epoch": 0.76, "learning_rate": 2.754335716299351e-07, "loss": 0.6291, "step": 18720 }, { "epoch": 0.76, "learning_rate": 2.749895236289276e-07, "loss": 0.6037, "step": 18725 }, { "epoch": 0.76, "learning_rate": 2.745457767909669e-07, "loss": 0.625, "step": 18730 }, { "epoch": 0.76, "learning_rate": 2.741023313003814e-07, "loss": 0.6479, "step": 18735 }, { "epoch": 0.76, "learning_rate": 2.736591873413751e-07, "loss": 0.6515, "step": 18740 }, { "epoch": 0.76, "learning_rate": 2.7321634509802584e-07, "loss": 0.647, "step": 18745 }, { "epoch": 0.76, "learning_rate": 2.72773804754286e-07, "loss": 0.6699, "step": 18750 }, { "epoch": 0.76, "learning_rate": 2.723315664939838e-07, "loss": 0.6759, "step": 18755 }, { "epoch": 0.76, "learning_rate": 2.718896305008207e-07, "loss": 0.6816, "step": 18760 }, { "epoch": 0.76, "learning_rate": 2.7144799695837283e-07, "loss": 0.6374, "step": 18765 }, { "epoch": 0.76, "learning_rate": 2.710066660500916e-07, "loss": 0.6381, "step": 18770 }, { "epoch": 0.76, "learning_rate": 2.705656379593015e-07, "loss": 0.6504, "step": 18775 }, { "epoch": 0.76, "learning_rate": 2.7012491286920215e-07, "loss": 0.6652, "step": 18780 }, { "epoch": 0.76, "learning_rate": 2.696844909628662e-07, "loss": 0.6412, "step": 18785 }, { "epoch": 0.76, "learning_rate": 2.6924437242324195e-07, "loss": 0.6511, "step": 18790 }, { "epoch": 0.76, "learning_rate": 2.6880455743315045e-07, "loss": 0.6489, "step": 18795 }, { "epoch": 0.76, "learning_rate": 2.683650461752867e-07, "loss": 0.6671, "step": 18800 }, { "epoch": 0.76, "learning_rate": 2.6792583883222044e-07, "loss": 0.6377, "step": 18805 }, { "epoch": 0.76, "learning_rate": 2.6748693558639426e-07, "loss": 0.6314, "step": 18810 }, { "epoch": 0.76, "learning_rate": 2.6704833662012484e-07, "loss": 0.6626, "step": 18815 }, { "epoch": 0.76, "learning_rate": 2.66610042115602e-07, "loss": 0.6428, "step": 18820 }, { "epoch": 0.76, "learning_rate": 2.6617205225489015e-07, "loss": 0.6139, "step": 18825 }, { "epoch": 0.76, "learning_rate": 2.65734367219926e-07, "loss": 0.6466, "step": 18830 }, { "epoch": 0.77, "learning_rate": 2.6529698719252e-07, "loss": 0.6723, "step": 18835 }, { "epoch": 0.77, "learning_rate": 2.6485991235435655e-07, "loss": 0.6314, "step": 18840 }, { "epoch": 0.77, "learning_rate": 2.644231428869924e-07, "loss": 0.68, "step": 18845 }, { "epoch": 0.77, "learning_rate": 2.6398667897185754e-07, "loss": 0.6589, "step": 18850 }, { "epoch": 0.77, "learning_rate": 2.6355052079025573e-07, "loss": 0.6587, "step": 18855 }, { "epoch": 0.77, "learning_rate": 2.6311466852336326e-07, "loss": 0.6506, "step": 18860 }, { "epoch": 0.77, "learning_rate": 2.6267912235222913e-07, "loss": 0.6344, "step": 18865 }, { "epoch": 0.77, "learning_rate": 2.622438824577753e-07, "loss": 0.6729, "step": 18870 }, { "epoch": 0.77, "learning_rate": 2.61808949020797e-07, "loss": 0.6573, "step": 18875 }, { "epoch": 0.77, "learning_rate": 2.613743222219618e-07, "loss": 0.6228, "step": 18880 }, { "epoch": 0.77, "learning_rate": 2.6094000224180934e-07, "loss": 0.636, "step": 18885 }, { "epoch": 0.77, "learning_rate": 2.6050598926075317e-07, "loss": 0.6215, "step": 18890 }, { "epoch": 0.77, "learning_rate": 2.6007228345907804e-07, "loss": 0.6473, "step": 18895 }, { "epoch": 0.77, "learning_rate": 2.596388850169413e-07, "loss": 0.6789, "step": 18900 }, { "epoch": 0.77, "learning_rate": 2.592057941143736e-07, "loss": 0.6479, "step": 18905 }, { "epoch": 0.77, "learning_rate": 2.5877301093127677e-07, "loss": 0.7127, "step": 18910 }, { "epoch": 0.77, "learning_rate": 2.583405356474252e-07, "loss": 0.6525, "step": 18915 }, { "epoch": 0.77, "learning_rate": 2.57908368442465e-07, "loss": 0.6547, "step": 18920 }, { "epoch": 0.77, "learning_rate": 2.574765094959153e-07, "loss": 0.6684, "step": 18925 }, { "epoch": 0.77, "learning_rate": 2.5704495898716615e-07, "loss": 0.6369, "step": 18930 }, { "epoch": 0.77, "learning_rate": 2.5661371709547983e-07, "loss": 0.6421, "step": 18935 }, { "epoch": 0.77, "learning_rate": 2.5618278399999094e-07, "loss": 0.6399, "step": 18940 }, { "epoch": 0.77, "learning_rate": 2.557521598797049e-07, "loss": 0.5765, "step": 18945 }, { "epoch": 0.77, "learning_rate": 2.553218449134994e-07, "loss": 0.6161, "step": 18950 }, { "epoch": 0.77, "learning_rate": 2.5489183928012325e-07, "loss": 0.6204, "step": 18955 }, { "epoch": 0.77, "learning_rate": 2.544621431581975e-07, "loss": 0.6343, "step": 18960 }, { "epoch": 0.77, "learning_rate": 2.5403275672621407e-07, "loss": 0.6635, "step": 18965 }, { "epoch": 0.77, "learning_rate": 2.536036801625359e-07, "loss": 0.6453, "step": 18970 }, { "epoch": 0.77, "learning_rate": 2.531749136453982e-07, "loss": 0.6791, "step": 18975 }, { "epoch": 0.77, "learning_rate": 2.527464573529068e-07, "loss": 0.6195, "step": 18980 }, { "epoch": 0.77, "learning_rate": 2.5231831146303817e-07, "loss": 0.6502, "step": 18985 }, { "epoch": 0.77, "learning_rate": 2.5189047615364124e-07, "loss": 0.6623, "step": 18990 }, { "epoch": 0.77, "learning_rate": 2.5146295160243457e-07, "loss": 0.6331, "step": 18995 }, { "epoch": 0.77, "learning_rate": 2.5103573798700816e-07, "loss": 0.6164, "step": 19000 }, { "epoch": 0.77, "learning_rate": 2.506088354848226e-07, "loss": 0.6363, "step": 19005 }, { "epoch": 0.77, "learning_rate": 2.5018224427321013e-07, "loss": 0.6733, "step": 19010 }, { "epoch": 0.77, "learning_rate": 2.497559645293726e-07, "loss": 0.6452, "step": 19015 }, { "epoch": 0.77, "learning_rate": 2.4932999643038264e-07, "loss": 0.6344, "step": 19020 }, { "epoch": 0.77, "learning_rate": 2.489043401531844e-07, "loss": 0.6154, "step": 19025 }, { "epoch": 0.77, "learning_rate": 2.484789958745913e-07, "loss": 0.6564, "step": 19030 }, { "epoch": 0.77, "learning_rate": 2.4805396377128753e-07, "loss": 0.6827, "step": 19035 }, { "epoch": 0.77, "learning_rate": 2.476292440198283e-07, "loss": 0.6415, "step": 19040 }, { "epoch": 0.77, "learning_rate": 2.4720483679663815e-07, "loss": 0.6611, "step": 19045 }, { "epoch": 0.77, "learning_rate": 2.4678074227801213e-07, "loss": 0.6308, "step": 19050 }, { "epoch": 0.77, "learning_rate": 2.463569606401151e-07, "loss": 0.6434, "step": 19055 }, { "epoch": 0.77, "learning_rate": 2.459334920589831e-07, "loss": 0.6625, "step": 19060 }, { "epoch": 0.77, "learning_rate": 2.455103367105207e-07, "loss": 0.6416, "step": 19065 }, { "epoch": 0.77, "learning_rate": 2.450874947705027e-07, "loss": 0.6458, "step": 19070 }, { "epoch": 0.77, "learning_rate": 2.446649664145748e-07, "loss": 0.6101, "step": 19075 }, { "epoch": 0.77, "learning_rate": 2.4424275181825103e-07, "loss": 0.6629, "step": 19080 }, { "epoch": 0.78, "learning_rate": 2.4382085115691575e-07, "loss": 0.6881, "step": 19085 }, { "epoch": 0.78, "learning_rate": 2.4339926460582263e-07, "loss": 0.6869, "step": 19090 }, { "epoch": 0.78, "learning_rate": 2.429779923400955e-07, "loss": 0.6448, "step": 19095 }, { "epoch": 0.78, "learning_rate": 2.42557034534727e-07, "loss": 0.6401, "step": 19100 }, { "epoch": 0.78, "learning_rate": 2.42136391364579e-07, "loss": 0.6611, "step": 19105 }, { "epoch": 0.78, "learning_rate": 2.4171606300438364e-07, "loss": 0.6471, "step": 19110 }, { "epoch": 0.78, "learning_rate": 2.4129604962874127e-07, "loss": 0.6922, "step": 19115 }, { "epoch": 0.78, "learning_rate": 2.4087635141212157e-07, "loss": 0.6707, "step": 19120 }, { "epoch": 0.78, "learning_rate": 2.404569685288642e-07, "loss": 0.6398, "step": 19125 }, { "epoch": 0.78, "learning_rate": 2.400379011531768e-07, "loss": 0.6756, "step": 19130 }, { "epoch": 0.78, "learning_rate": 2.396191494591363e-07, "loss": 0.6565, "step": 19135 }, { "epoch": 0.78, "learning_rate": 2.392007136206883e-07, "loss": 0.6649, "step": 19140 }, { "epoch": 0.78, "learning_rate": 2.387825938116478e-07, "loss": 0.6119, "step": 19145 }, { "epoch": 0.78, "learning_rate": 2.38364790205698e-07, "loss": 0.6636, "step": 19150 }, { "epoch": 0.78, "learning_rate": 2.3794730297639054e-07, "loss": 0.6411, "step": 19155 }, { "epoch": 0.78, "learning_rate": 2.3753013229714658e-07, "loss": 0.6213, "step": 19160 }, { "epoch": 0.78, "learning_rate": 2.3711327834125495e-07, "loss": 0.6499, "step": 19165 }, { "epoch": 0.78, "learning_rate": 2.3669674128187277e-07, "loss": 0.6644, "step": 19170 }, { "epoch": 0.78, "learning_rate": 2.3628052129202658e-07, "loss": 0.6425, "step": 19175 }, { "epoch": 0.78, "learning_rate": 2.3586461854461015e-07, "loss": 0.6316, "step": 19180 }, { "epoch": 0.78, "learning_rate": 2.3544903321238596e-07, "loss": 0.6716, "step": 19185 }, { "epoch": 0.78, "learning_rate": 2.3503376546798425e-07, "loss": 0.6026, "step": 19190 }, { "epoch": 0.78, "learning_rate": 2.3461881548390416e-07, "loss": 0.6799, "step": 19195 }, { "epoch": 0.78, "learning_rate": 2.3420418343251215e-07, "loss": 0.6355, "step": 19200 }, { "epoch": 0.78, "eval_loss": 0.6190813779830933, "eval_runtime": 143.7787, "eval_samples_per_second": 16.456, "eval_steps_per_second": 2.747, "step": 19200 }, { "epoch": 0.78, "learning_rate": 2.3378986948604217e-07, "loss": 0.649, "step": 19205 }, { "epoch": 0.78, "learning_rate": 2.3337587381659752e-07, "loss": 0.653, "step": 19210 }, { "epoch": 0.78, "learning_rate": 2.3296219659614802e-07, "loss": 0.6588, "step": 19215 }, { "epoch": 0.78, "learning_rate": 2.3254883799653124e-07, "loss": 0.6292, "step": 19220 }, { "epoch": 0.78, "learning_rate": 2.321357981894534e-07, "loss": 0.6651, "step": 19225 }, { "epoch": 0.78, "learning_rate": 2.3172307734648722e-07, "loss": 0.6464, "step": 19230 }, { "epoch": 0.78, "learning_rate": 2.3131067563907359e-07, "loss": 0.642, "step": 19235 }, { "epoch": 0.78, "learning_rate": 2.3089859323852002e-07, "loss": 0.6499, "step": 19240 }, { "epoch": 0.78, "learning_rate": 2.304868303160028e-07, "loss": 0.6595, "step": 19245 }, { "epoch": 0.78, "learning_rate": 2.3007538704256412e-07, "loss": 0.627, "step": 19250 }, { "epoch": 0.78, "learning_rate": 2.2966426358911384e-07, "loss": 0.6531, "step": 19255 }, { "epoch": 0.78, "learning_rate": 2.2925346012642964e-07, "loss": 0.6453, "step": 19260 }, { "epoch": 0.78, "learning_rate": 2.288429768251553e-07, "loss": 0.6343, "step": 19265 }, { "epoch": 0.78, "learning_rate": 2.2843281385580215e-07, "loss": 0.6788, "step": 19270 }, { "epoch": 0.78, "learning_rate": 2.2802297138874792e-07, "loss": 0.7187, "step": 19275 }, { "epoch": 0.78, "learning_rate": 2.276134495942381e-07, "loss": 0.6708, "step": 19280 }, { "epoch": 0.78, "learning_rate": 2.2720424864238452e-07, "loss": 0.6434, "step": 19285 }, { "epoch": 0.78, "learning_rate": 2.2679536870316506e-07, "loss": 0.6389, "step": 19290 }, { "epoch": 0.78, "learning_rate": 2.2638680994642567e-07, "loss": 0.6507, "step": 19295 }, { "epoch": 0.78, "learning_rate": 2.259785725418778e-07, "loss": 0.6413, "step": 19300 }, { "epoch": 0.78, "learning_rate": 2.2557065665909936e-07, "loss": 0.6542, "step": 19305 }, { "epoch": 0.78, "learning_rate": 2.251630624675357e-07, "loss": 0.6694, "step": 19310 }, { "epoch": 0.78, "learning_rate": 2.247557901364975e-07, "loss": 0.6521, "step": 19315 }, { "epoch": 0.78, "learning_rate": 2.2434883983516217e-07, "loss": 0.6519, "step": 19320 }, { "epoch": 0.78, "learning_rate": 2.239422117325732e-07, "loss": 0.608, "step": 19325 }, { "epoch": 0.79, "learning_rate": 2.2353590599764083e-07, "loss": 0.653, "step": 19330 }, { "epoch": 0.79, "learning_rate": 2.2312992279914067e-07, "loss": 0.6787, "step": 19335 }, { "epoch": 0.79, "learning_rate": 2.2272426230571428e-07, "loss": 0.5948, "step": 19340 }, { "epoch": 0.79, "learning_rate": 2.223189246858701e-07, "loss": 0.6391, "step": 19345 }, { "epoch": 0.79, "learning_rate": 2.2191391010798143e-07, "loss": 0.6242, "step": 19350 }, { "epoch": 0.79, "learning_rate": 2.2150921874028782e-07, "loss": 0.6666, "step": 19355 }, { "epoch": 0.79, "learning_rate": 2.211048507508949e-07, "loss": 0.672, "step": 19360 }, { "epoch": 0.79, "learning_rate": 2.2070080630777334e-07, "loss": 0.6048, "step": 19365 }, { "epoch": 0.79, "learning_rate": 2.202970855787597e-07, "loss": 0.6577, "step": 19370 }, { "epoch": 0.79, "learning_rate": 2.198936887315559e-07, "loss": 0.6343, "step": 19375 }, { "epoch": 0.79, "learning_rate": 2.1949061593372986e-07, "loss": 0.661, "step": 19380 }, { "epoch": 0.79, "learning_rate": 2.1908786735271434e-07, "loss": 0.635, "step": 19385 }, { "epoch": 0.79, "learning_rate": 2.1868544315580728e-07, "loss": 0.661, "step": 19390 }, { "epoch": 0.79, "learning_rate": 2.1828334351017286e-07, "loss": 0.6658, "step": 19395 }, { "epoch": 0.79, "learning_rate": 2.1788156858283923e-07, "loss": 0.6291, "step": 19400 }, { "epoch": 0.79, "learning_rate": 2.174801185407006e-07, "loss": 0.6638, "step": 19405 }, { "epoch": 0.79, "learning_rate": 2.1707899355051518e-07, "loss": 0.6319, "step": 19410 }, { "epoch": 0.79, "learning_rate": 2.166781937789075e-07, "loss": 0.6331, "step": 19415 }, { "epoch": 0.79, "learning_rate": 2.1627771939236606e-07, "loss": 0.5873, "step": 19420 }, { "epoch": 0.79, "learning_rate": 2.158775705572441e-07, "loss": 0.6696, "step": 19425 }, { "epoch": 0.79, "learning_rate": 2.1547774743976055e-07, "loss": 0.6585, "step": 19430 }, { "epoch": 0.79, "learning_rate": 2.1507825020599822e-07, "loss": 0.6483, "step": 19435 }, { "epoch": 0.79, "learning_rate": 2.146790790219044e-07, "loss": 0.6506, "step": 19440 }, { "epoch": 0.79, "learning_rate": 2.14280234053292e-07, "loss": 0.6411, "step": 19445 }, { "epoch": 0.79, "learning_rate": 2.1388171546583745e-07, "loss": 0.7025, "step": 19450 }, { "epoch": 0.79, "learning_rate": 2.1348352342508181e-07, "loss": 0.6499, "step": 19455 }, { "epoch": 0.79, "learning_rate": 2.1308565809643042e-07, "loss": 0.6437, "step": 19460 }, { "epoch": 0.79, "learning_rate": 2.1268811964515354e-07, "loss": 0.6432, "step": 19465 }, { "epoch": 0.79, "learning_rate": 2.1229090823638507e-07, "loss": 0.6752, "step": 19470 }, { "epoch": 0.79, "learning_rate": 2.1189402403512268e-07, "loss": 0.6625, "step": 19475 }, { "epoch": 0.79, "learning_rate": 2.1149746720622929e-07, "loss": 0.6549, "step": 19480 }, { "epoch": 0.79, "learning_rate": 2.111012379144309e-07, "loss": 0.6326, "step": 19485 }, { "epoch": 0.79, "learning_rate": 2.107053363243174e-07, "loss": 0.683, "step": 19490 }, { "epoch": 0.79, "learning_rate": 2.1030976260034338e-07, "loss": 0.6442, "step": 19495 }, { "epoch": 0.79, "learning_rate": 2.099145169068266e-07, "loss": 0.647, "step": 19500 }, { "epoch": 0.79, "learning_rate": 2.095195994079485e-07, "loss": 0.6238, "step": 19505 }, { "epoch": 0.79, "learning_rate": 2.091250102677542e-07, "loss": 0.6403, "step": 19510 }, { "epoch": 0.79, "learning_rate": 2.0873074965015335e-07, "loss": 0.6408, "step": 19515 }, { "epoch": 0.79, "learning_rate": 2.0833681771891743e-07, "loss": 0.6941, "step": 19520 }, { "epoch": 0.79, "learning_rate": 2.0794321463768273e-07, "loss": 0.6483, "step": 19525 }, { "epoch": 0.79, "learning_rate": 2.075499405699489e-07, "loss": 0.6438, "step": 19530 }, { "epoch": 0.79, "learning_rate": 2.071569956790782e-07, "loss": 0.6856, "step": 19535 }, { "epoch": 0.79, "learning_rate": 2.0676438012829668e-07, "loss": 0.6519, "step": 19540 }, { "epoch": 0.79, "learning_rate": 2.06372094080693e-07, "loss": 0.6411, "step": 19545 }, { "epoch": 0.79, "learning_rate": 2.0598013769922008e-07, "loss": 0.6776, "step": 19550 }, { "epoch": 0.79, "learning_rate": 2.0558851114669274e-07, "loss": 0.6476, "step": 19555 }, { "epoch": 0.79, "learning_rate": 2.051972145857891e-07, "loss": 0.6632, "step": 19560 }, { "epoch": 0.79, "learning_rate": 2.0480624817905113e-07, "loss": 0.6584, "step": 19565 }, { "epoch": 0.79, "learning_rate": 2.044156120888818e-07, "loss": 0.613, "step": 19570 }, { "epoch": 0.8, "learning_rate": 2.0402530647754844e-07, "loss": 0.6221, "step": 19575 }, { "epoch": 0.8, "learning_rate": 2.0363533150718093e-07, "loss": 0.6513, "step": 19580 }, { "epoch": 0.8, "learning_rate": 2.032456873397711e-07, "loss": 0.641, "step": 19585 }, { "epoch": 0.8, "learning_rate": 2.0285637413717395e-07, "loss": 0.6591, "step": 19590 }, { "epoch": 0.8, "learning_rate": 2.024673920611063e-07, "loss": 0.674, "step": 19595 }, { "epoch": 0.8, "learning_rate": 2.0207874127314862e-07, "loss": 0.668, "step": 19600 }, { "epoch": 0.8, "learning_rate": 2.0169042193474283e-07, "loss": 0.6521, "step": 19605 }, { "epoch": 0.8, "learning_rate": 2.0130243420719294e-07, "loss": 0.6629, "step": 19610 }, { "epoch": 0.8, "learning_rate": 2.0091477825166636e-07, "loss": 0.6329, "step": 19615 }, { "epoch": 0.8, "learning_rate": 2.0052745422919183e-07, "loss": 0.6239, "step": 19620 }, { "epoch": 0.8, "learning_rate": 2.0014046230065985e-07, "loss": 0.6888, "step": 19625 }, { "epoch": 0.8, "learning_rate": 1.9975380262682429e-07, "loss": 0.6387, "step": 19630 }, { "epoch": 0.8, "learning_rate": 1.993674753682998e-07, "loss": 0.6091, "step": 19635 }, { "epoch": 0.8, "learning_rate": 1.9898148068556332e-07, "loss": 0.6518, "step": 19640 }, { "epoch": 0.8, "learning_rate": 1.985958187389536e-07, "loss": 0.6446, "step": 19645 }, { "epoch": 0.8, "learning_rate": 1.982104896886716e-07, "loss": 0.6669, "step": 19650 }, { "epoch": 0.8, "learning_rate": 1.9782549369477952e-07, "loss": 0.6802, "step": 19655 }, { "epoch": 0.8, "learning_rate": 1.974408309172011e-07, "loss": 0.6449, "step": 19660 }, { "epoch": 0.8, "learning_rate": 1.970565015157223e-07, "loss": 0.5942, "step": 19665 }, { "epoch": 0.8, "learning_rate": 1.9667250564999006e-07, "loss": 0.68, "step": 19670 }, { "epoch": 0.8, "learning_rate": 1.962888434795129e-07, "loss": 0.6198, "step": 19675 }, { "epoch": 0.8, "learning_rate": 1.959055151636605e-07, "loss": 0.6153, "step": 19680 }, { "epoch": 0.8, "learning_rate": 1.9552252086166465e-07, "loss": 0.6893, "step": 19685 }, { "epoch": 0.8, "learning_rate": 1.9513986073261757e-07, "loss": 0.6095, "step": 19690 }, { "epoch": 0.8, "learning_rate": 1.9475753493547254e-07, "loss": 0.6591, "step": 19695 }, { "epoch": 0.8, "learning_rate": 1.943755436290454e-07, "loss": 0.6112, "step": 19700 }, { "epoch": 0.8, "learning_rate": 1.939938869720108e-07, "loss": 0.653, "step": 19705 }, { "epoch": 0.8, "learning_rate": 1.9361256512290624e-07, "loss": 0.6472, "step": 19710 }, { "epoch": 0.8, "learning_rate": 1.932315782401297e-07, "loss": 0.6746, "step": 19715 }, { "epoch": 0.8, "learning_rate": 1.9285092648193947e-07, "loss": 0.6383, "step": 19720 }, { "epoch": 0.8, "learning_rate": 1.9247061000645515e-07, "loss": 0.6605, "step": 19725 }, { "epoch": 0.8, "learning_rate": 1.920906289716565e-07, "loss": 0.6241, "step": 19730 }, { "epoch": 0.8, "learning_rate": 1.9171098353538494e-07, "loss": 0.6597, "step": 19735 }, { "epoch": 0.8, "learning_rate": 1.9133167385534167e-07, "loss": 0.6183, "step": 19740 }, { "epoch": 0.8, "learning_rate": 1.9095270008908815e-07, "loss": 0.6427, "step": 19745 }, { "epoch": 0.8, "learning_rate": 1.9057406239404784e-07, "loss": 0.645, "step": 19750 }, { "epoch": 0.8, "learning_rate": 1.9019576092750234e-07, "loss": 0.6908, "step": 19755 }, { "epoch": 0.8, "learning_rate": 1.898177958465953e-07, "loss": 0.5862, "step": 19760 }, { "epoch": 0.8, "learning_rate": 1.8944016730833045e-07, "loss": 0.6414, "step": 19765 }, { "epoch": 0.8, "learning_rate": 1.8906287546957122e-07, "loss": 0.6513, "step": 19770 }, { "epoch": 0.8, "learning_rate": 1.8868592048704125e-07, "loss": 0.6557, "step": 19775 }, { "epoch": 0.8, "learning_rate": 1.8830930251732403e-07, "loss": 0.6538, "step": 19780 }, { "epoch": 0.8, "learning_rate": 1.8793302171686398e-07, "loss": 0.6498, "step": 19785 }, { "epoch": 0.8, "learning_rate": 1.8755707824196476e-07, "loss": 0.6455, "step": 19790 }, { "epoch": 0.8, "learning_rate": 1.8718147224878954e-07, "loss": 0.6459, "step": 19795 }, { "epoch": 0.8, "learning_rate": 1.8680620389336267e-07, "loss": 0.626, "step": 19800 }, { "epoch": 0.8, "learning_rate": 1.8643127333156628e-07, "loss": 0.6689, "step": 19805 }, { "epoch": 0.8, "learning_rate": 1.8605668071914404e-07, "loss": 0.6425, "step": 19810 }, { "epoch": 0.8, "learning_rate": 1.8568242621169806e-07, "loss": 0.6661, "step": 19815 }, { "epoch": 0.81, "learning_rate": 1.8530850996469083e-07, "loss": 0.6463, "step": 19820 }, { "epoch": 0.81, "learning_rate": 1.8493493213344358e-07, "loss": 0.6842, "step": 19825 }, { "epoch": 0.81, "learning_rate": 1.8456169287313716e-07, "loss": 0.6676, "step": 19830 }, { "epoch": 0.81, "learning_rate": 1.8418879233881267e-07, "loss": 0.6673, "step": 19835 }, { "epoch": 0.81, "learning_rate": 1.8381623068536866e-07, "loss": 0.6137, "step": 19840 }, { "epoch": 0.81, "learning_rate": 1.8344400806756455e-07, "loss": 0.6686, "step": 19845 }, { "epoch": 0.81, "learning_rate": 1.8307212464001888e-07, "loss": 0.6486, "step": 19850 }, { "epoch": 0.81, "learning_rate": 1.827005805572077e-07, "loss": 0.6125, "step": 19855 }, { "epoch": 0.81, "learning_rate": 1.823293759734681e-07, "loss": 0.6306, "step": 19860 }, { "epoch": 0.81, "learning_rate": 1.8195851104299465e-07, "loss": 0.6946, "step": 19865 }, { "epoch": 0.81, "learning_rate": 1.8158798591984194e-07, "loss": 0.6081, "step": 19870 }, { "epoch": 0.81, "learning_rate": 1.8121780075792258e-07, "loss": 0.6554, "step": 19875 }, { "epoch": 0.81, "learning_rate": 1.8084795571100809e-07, "loss": 0.6768, "step": 19880 }, { "epoch": 0.81, "learning_rate": 1.8047845093272963e-07, "loss": 0.6378, "step": 19885 }, { "epoch": 0.81, "learning_rate": 1.8010928657657521e-07, "loss": 0.6416, "step": 19890 }, { "epoch": 0.81, "learning_rate": 1.7974046279589304e-07, "loss": 0.6047, "step": 19895 }, { "epoch": 0.81, "learning_rate": 1.793719797438895e-07, "loss": 0.6747, "step": 19900 }, { "epoch": 0.81, "learning_rate": 1.7900383757362913e-07, "loss": 0.6308, "step": 19905 }, { "epoch": 0.81, "learning_rate": 1.7863603643803481e-07, "loss": 0.6584, "step": 19910 }, { "epoch": 0.81, "learning_rate": 1.782685764898878e-07, "loss": 0.6723, "step": 19915 }, { "epoch": 0.81, "learning_rate": 1.779014578818283e-07, "loss": 0.6617, "step": 19920 }, { "epoch": 0.81, "learning_rate": 1.775346807663538e-07, "loss": 0.6726, "step": 19925 }, { "epoch": 0.81, "learning_rate": 1.771682452958202e-07, "loss": 0.6528, "step": 19930 }, { "epoch": 0.81, "learning_rate": 1.7680215162244228e-07, "loss": 0.6322, "step": 19935 }, { "epoch": 0.81, "learning_rate": 1.7643639989829128e-07, "loss": 0.6696, "step": 19940 }, { "epoch": 0.81, "learning_rate": 1.7607099027529792e-07, "loss": 0.6936, "step": 19945 }, { "epoch": 0.81, "learning_rate": 1.7570592290524966e-07, "loss": 0.6281, "step": 19950 }, { "epoch": 0.81, "learning_rate": 1.7534119793979286e-07, "loss": 0.6463, "step": 19955 }, { "epoch": 0.81, "learning_rate": 1.7497681553043086e-07, "loss": 0.6862, "step": 19960 }, { "epoch": 0.81, "learning_rate": 1.7461277582852473e-07, "loss": 0.643, "step": 19965 }, { "epoch": 0.81, "learning_rate": 1.7424907898529406e-07, "loss": 0.6482, "step": 19970 }, { "epoch": 0.81, "learning_rate": 1.7388572515181444e-07, "loss": 0.6563, "step": 19975 }, { "epoch": 0.81, "learning_rate": 1.7352271447902033e-07, "loss": 0.6614, "step": 19980 }, { "epoch": 0.81, "learning_rate": 1.731600471177037e-07, "loss": 0.6491, "step": 19985 }, { "epoch": 0.81, "learning_rate": 1.727977232185125e-07, "loss": 0.6552, "step": 19990 }, { "epoch": 0.81, "learning_rate": 1.7243574293195363e-07, "loss": 0.6329, "step": 19995 }, { "epoch": 0.81, "learning_rate": 1.7207410640838992e-07, "loss": 0.6362, "step": 20000 }, { "epoch": 0.81, "eval_loss": 0.6180657744407654, "eval_runtime": 139.6405, "eval_samples_per_second": 16.944, "eval_steps_per_second": 2.829, "step": 20000 }, { "epoch": 0.81, "learning_rate": 1.7171281379804282e-07, "loss": 0.6774, "step": 20005 }, { "epoch": 0.81, "learning_rate": 1.7135186525098965e-07, "loss": 0.6437, "step": 20010 }, { "epoch": 0.81, "learning_rate": 1.709912609171651e-07, "loss": 0.6578, "step": 20015 }, { "epoch": 0.81, "learning_rate": 1.7063100094636195e-07, "loss": 0.661, "step": 20020 }, { "epoch": 0.81, "learning_rate": 1.7027108548822788e-07, "loss": 0.6436, "step": 20025 }, { "epoch": 0.81, "learning_rate": 1.6991151469226928e-07, "loss": 0.7003, "step": 20030 }, { "epoch": 0.81, "learning_rate": 1.695522887078491e-07, "loss": 0.6573, "step": 20035 }, { "epoch": 0.81, "learning_rate": 1.6919340768418577e-07, "loss": 0.6348, "step": 20040 }, { "epoch": 0.81, "learning_rate": 1.6883487177035616e-07, "loss": 0.6514, "step": 20045 }, { "epoch": 0.81, "learning_rate": 1.6847668111529234e-07, "loss": 0.6425, "step": 20050 }, { "epoch": 0.81, "learning_rate": 1.681188358677842e-07, "loss": 0.6413, "step": 20055 }, { "epoch": 0.81, "learning_rate": 1.6776133617647724e-07, "loss": 0.6331, "step": 20060 }, { "epoch": 0.82, "learning_rate": 1.674041821898735e-07, "loss": 0.6977, "step": 20065 }, { "epoch": 0.82, "learning_rate": 1.670473740563323e-07, "loss": 0.6412, "step": 20070 }, { "epoch": 0.82, "learning_rate": 1.666909119240678e-07, "loss": 0.6476, "step": 20075 }, { "epoch": 0.82, "learning_rate": 1.6633479594115184e-07, "loss": 0.6352, "step": 20080 }, { "epoch": 0.82, "learning_rate": 1.6597902625551185e-07, "loss": 0.7082, "step": 20085 }, { "epoch": 0.82, "learning_rate": 1.6562360301493106e-07, "loss": 0.6911, "step": 20090 }, { "epoch": 0.82, "learning_rate": 1.6526852636704968e-07, "loss": 0.6373, "step": 20095 }, { "epoch": 0.82, "learning_rate": 1.6491379645936298e-07, "loss": 0.6378, "step": 20100 }, { "epoch": 0.82, "learning_rate": 1.6455941343922354e-07, "loss": 0.6746, "step": 20105 }, { "epoch": 0.82, "learning_rate": 1.642053774538379e-07, "loss": 0.6449, "step": 20110 }, { "epoch": 0.82, "learning_rate": 1.6385168865027012e-07, "loss": 0.6542, "step": 20115 }, { "epoch": 0.82, "learning_rate": 1.6349834717543975e-07, "loss": 0.637, "step": 20120 }, { "epoch": 0.82, "learning_rate": 1.63145353176121e-07, "loss": 0.6517, "step": 20125 }, { "epoch": 0.82, "learning_rate": 1.6279270679894507e-07, "loss": 0.6613, "step": 20130 }, { "epoch": 0.82, "learning_rate": 1.6244040819039772e-07, "loss": 0.6674, "step": 20135 }, { "epoch": 0.82, "learning_rate": 1.6208845749682144e-07, "loss": 0.641, "step": 20140 }, { "epoch": 0.82, "learning_rate": 1.617368548644129e-07, "loss": 0.6318, "step": 20145 }, { "epoch": 0.82, "learning_rate": 1.6138560043922488e-07, "loss": 0.6804, "step": 20150 }, { "epoch": 0.82, "learning_rate": 1.6103469436716587e-07, "loss": 0.6316, "step": 20155 }, { "epoch": 0.82, "learning_rate": 1.606841367939984e-07, "loss": 0.6841, "step": 20160 }, { "epoch": 0.82, "learning_rate": 1.603339278653414e-07, "loss": 0.6406, "step": 20165 }, { "epoch": 0.82, "learning_rate": 1.5998406772666916e-07, "loss": 0.6244, "step": 20170 }, { "epoch": 0.82, "learning_rate": 1.596345565233096e-07, "loss": 0.6496, "step": 20175 }, { "epoch": 0.82, "learning_rate": 1.592853944004473e-07, "loss": 0.6582, "step": 20180 }, { "epoch": 0.82, "learning_rate": 1.5893658150312071e-07, "loss": 0.6606, "step": 20185 }, { "epoch": 0.82, "learning_rate": 1.5858811797622418e-07, "loss": 0.6386, "step": 20190 }, { "epoch": 0.82, "learning_rate": 1.582400039645062e-07, "loss": 0.6196, "step": 20195 }, { "epoch": 0.82, "learning_rate": 1.5789223961257003e-07, "loss": 0.6316, "step": 20200 }, { "epoch": 0.82, "learning_rate": 1.5754482506487465e-07, "loss": 0.6206, "step": 20205 }, { "epoch": 0.82, "learning_rate": 1.5719776046573207e-07, "loss": 0.6245, "step": 20210 }, { "epoch": 0.82, "learning_rate": 1.5685104595931054e-07, "loss": 0.663, "step": 20215 }, { "epoch": 0.82, "learning_rate": 1.5650468168963249e-07, "loss": 0.6784, "step": 20220 }, { "epoch": 0.82, "learning_rate": 1.5615866780057385e-07, "loss": 0.5968, "step": 20225 }, { "epoch": 0.82, "learning_rate": 1.5581300443586643e-07, "loss": 0.6721, "step": 20230 }, { "epoch": 0.82, "learning_rate": 1.5546769173909534e-07, "loss": 0.659, "step": 20235 }, { "epoch": 0.82, "learning_rate": 1.551227298537011e-07, "loss": 0.6432, "step": 20240 }, { "epoch": 0.82, "learning_rate": 1.547781189229771e-07, "loss": 0.6608, "step": 20245 }, { "epoch": 0.82, "learning_rate": 1.54433859090072e-07, "loss": 0.665, "step": 20250 }, { "epoch": 0.82, "learning_rate": 1.5408995049798888e-07, "loss": 0.6538, "step": 20255 }, { "epoch": 0.82, "learning_rate": 1.537463932895836e-07, "loss": 0.6964, "step": 20260 }, { "epoch": 0.82, "learning_rate": 1.5340318760756731e-07, "loss": 0.642, "step": 20265 }, { "epoch": 0.82, "learning_rate": 1.5306033359450454e-07, "loss": 0.6475, "step": 20270 }, { "epoch": 0.82, "learning_rate": 1.5271783139281357e-07, "loss": 0.6927, "step": 20275 }, { "epoch": 0.82, "learning_rate": 1.523756811447674e-07, "loss": 0.6862, "step": 20280 }, { "epoch": 0.82, "learning_rate": 1.5203388299249176e-07, "loss": 0.6319, "step": 20285 }, { "epoch": 0.82, "learning_rate": 1.516924370779673e-07, "loss": 0.649, "step": 20290 }, { "epoch": 0.82, "learning_rate": 1.513513435430267e-07, "loss": 0.6765, "step": 20295 }, { "epoch": 0.82, "learning_rate": 1.5101060252935783e-07, "loss": 0.6901, "step": 20300 }, { "epoch": 0.82, "learning_rate": 1.50670214178502e-07, "loss": 0.6132, "step": 20305 }, { "epoch": 0.82, "learning_rate": 1.503301786318526e-07, "loss": 0.6503, "step": 20310 }, { "epoch": 0.83, "learning_rate": 1.4999049603065805e-07, "loss": 0.6773, "step": 20315 }, { "epoch": 0.83, "learning_rate": 1.496511665160195e-07, "loss": 0.6379, "step": 20320 }, { "epoch": 0.83, "learning_rate": 1.4931219022889107e-07, "loss": 0.6177, "step": 20325 }, { "epoch": 0.83, "learning_rate": 1.4897356731008125e-07, "loss": 0.6606, "step": 20330 }, { "epoch": 0.83, "learning_rate": 1.486352979002503e-07, "loss": 0.6543, "step": 20335 }, { "epoch": 0.83, "learning_rate": 1.4829738213991328e-07, "loss": 0.6527, "step": 20340 }, { "epoch": 0.83, "learning_rate": 1.4795982016943654e-07, "loss": 0.6489, "step": 20345 }, { "epoch": 0.83, "learning_rate": 1.476226121290408e-07, "loss": 0.6398, "step": 20350 }, { "epoch": 0.83, "learning_rate": 1.4728575815879973e-07, "loss": 0.6249, "step": 20355 }, { "epoch": 0.83, "learning_rate": 1.469492583986387e-07, "loss": 0.6764, "step": 20360 }, { "epoch": 0.83, "learning_rate": 1.4661311298833755e-07, "loss": 0.6486, "step": 20365 }, { "epoch": 0.83, "learning_rate": 1.4627732206752786e-07, "loss": 0.6423, "step": 20370 }, { "epoch": 0.83, "learning_rate": 1.4594188577569412e-07, "loss": 0.6952, "step": 20375 }, { "epoch": 0.83, "learning_rate": 1.4560680425217364e-07, "loss": 0.684, "step": 20380 }, { "epoch": 0.83, "learning_rate": 1.4527207763615647e-07, "loss": 0.6607, "step": 20385 }, { "epoch": 0.83, "learning_rate": 1.4493770606668565e-07, "loss": 0.6086, "step": 20390 }, { "epoch": 0.83, "learning_rate": 1.4460368968265524e-07, "loss": 0.625, "step": 20395 }, { "epoch": 0.83, "learning_rate": 1.4427002862281356e-07, "loss": 0.6509, "step": 20400 }, { "epoch": 0.83, "learning_rate": 1.439367230257602e-07, "loss": 0.6514, "step": 20405 }, { "epoch": 0.83, "learning_rate": 1.4360377302994708e-07, "loss": 0.6664, "step": 20410 }, { "epoch": 0.83, "learning_rate": 1.4327117877367933e-07, "loss": 0.6719, "step": 20415 }, { "epoch": 0.83, "learning_rate": 1.4293894039511324e-07, "loss": 0.6728, "step": 20420 }, { "epoch": 0.83, "learning_rate": 1.4260705803225838e-07, "loss": 0.617, "step": 20425 }, { "epoch": 0.83, "learning_rate": 1.4227553182297492e-07, "loss": 0.6401, "step": 20430 }, { "epoch": 0.83, "learning_rate": 1.4194436190497638e-07, "loss": 0.6788, "step": 20435 }, { "epoch": 0.83, "learning_rate": 1.416135484158284e-07, "loss": 0.6974, "step": 20440 }, { "epoch": 0.83, "learning_rate": 1.4128309149294694e-07, "loss": 0.6263, "step": 20445 }, { "epoch": 0.83, "learning_rate": 1.4095299127360183e-07, "loss": 0.6399, "step": 20450 }, { "epoch": 0.83, "learning_rate": 1.4062324789491352e-07, "loss": 0.6801, "step": 20455 }, { "epoch": 0.83, "learning_rate": 1.4029386149385425e-07, "loss": 0.6187, "step": 20460 }, { "epoch": 0.83, "learning_rate": 1.3996483220724876e-07, "loss": 0.63, "step": 20465 }, { "epoch": 0.83, "learning_rate": 1.396361601717726e-07, "loss": 0.6627, "step": 20470 }, { "epoch": 0.83, "learning_rate": 1.3930784552395381e-07, "loss": 0.6438, "step": 20475 }, { "epoch": 0.83, "learning_rate": 1.389798884001706e-07, "loss": 0.6468, "step": 20480 }, { "epoch": 0.83, "learning_rate": 1.3865228893665393e-07, "loss": 0.588, "step": 20485 }, { "epoch": 0.83, "learning_rate": 1.3832504726948623e-07, "loss": 0.6959, "step": 20490 }, { "epoch": 0.83, "learning_rate": 1.3799816353460003e-07, "loss": 0.658, "step": 20495 }, { "epoch": 0.83, "learning_rate": 1.3767163786778046e-07, "loss": 0.6279, "step": 20500 }, { "epoch": 0.83, "learning_rate": 1.3734547040466348e-07, "loss": 0.6533, "step": 20505 }, { "epoch": 0.83, "learning_rate": 1.3701966128073605e-07, "loss": 0.6677, "step": 20510 }, { "epoch": 0.83, "learning_rate": 1.3669421063133623e-07, "loss": 0.6677, "step": 20515 }, { "epoch": 0.83, "learning_rate": 1.3636911859165357e-07, "loss": 0.6375, "step": 20520 }, { "epoch": 0.83, "learning_rate": 1.3604438529672913e-07, "loss": 0.6312, "step": 20525 }, { "epoch": 0.83, "learning_rate": 1.3572001088145312e-07, "loss": 0.6548, "step": 20530 }, { "epoch": 0.83, "learning_rate": 1.3539599548056879e-07, "loss": 0.6392, "step": 20535 }, { "epoch": 0.83, "learning_rate": 1.350723392286689e-07, "loss": 0.6157, "step": 20540 }, { "epoch": 0.83, "learning_rate": 1.3474904226019736e-07, "loss": 0.6478, "step": 20545 }, { "epoch": 0.83, "learning_rate": 1.3442610470944925e-07, "loss": 0.6769, "step": 20550 }, { "epoch": 0.83, "learning_rate": 1.341035267105699e-07, "loss": 0.6771, "step": 20555 }, { "epoch": 0.84, "learning_rate": 1.3378130839755532e-07, "loss": 0.6579, "step": 20560 }, { "epoch": 0.84, "learning_rate": 1.3345944990425195e-07, "loss": 0.6595, "step": 20565 }, { "epoch": 0.84, "learning_rate": 1.3313795136435736e-07, "loss": 0.6163, "step": 20570 }, { "epoch": 0.84, "learning_rate": 1.3281681291141955e-07, "loss": 0.6473, "step": 20575 }, { "epoch": 0.84, "learning_rate": 1.3249603467883586e-07, "loss": 0.6347, "step": 20580 }, { "epoch": 0.84, "learning_rate": 1.3217561679985545e-07, "loss": 0.6629, "step": 20585 }, { "epoch": 0.84, "learning_rate": 1.3185555940757674e-07, "loss": 0.6555, "step": 20590 }, { "epoch": 0.84, "learning_rate": 1.3153586263494876e-07, "loss": 0.6274, "step": 20595 }, { "epoch": 0.84, "learning_rate": 1.3121652661477112e-07, "loss": 0.6048, "step": 20600 }, { "epoch": 0.84, "learning_rate": 1.3089755147969294e-07, "loss": 0.6473, "step": 20605 }, { "epoch": 0.84, "learning_rate": 1.3057893736221392e-07, "loss": 0.6855, "step": 20610 }, { "epoch": 0.84, "learning_rate": 1.3026068439468318e-07, "loss": 0.6465, "step": 20615 }, { "epoch": 0.84, "learning_rate": 1.2994279270930052e-07, "loss": 0.6463, "step": 20620 }, { "epoch": 0.84, "learning_rate": 1.2962526243811577e-07, "loss": 0.6503, "step": 20625 }, { "epoch": 0.84, "learning_rate": 1.2930809371302741e-07, "loss": 0.5882, "step": 20630 }, { "epoch": 0.84, "learning_rate": 1.289912866657854e-07, "loss": 0.6766, "step": 20635 }, { "epoch": 0.84, "learning_rate": 1.2867484142798813e-07, "loss": 0.6454, "step": 20640 }, { "epoch": 0.84, "learning_rate": 1.283587581310841e-07, "loss": 0.6341, "step": 20645 }, { "epoch": 0.84, "learning_rate": 1.2804303690637197e-07, "loss": 0.6512, "step": 20650 }, { "epoch": 0.84, "learning_rate": 1.2772767788499917e-07, "loss": 0.6695, "step": 20655 }, { "epoch": 0.84, "learning_rate": 1.274126811979639e-07, "loss": 0.6492, "step": 20660 }, { "epoch": 0.84, "learning_rate": 1.2709804697611193e-07, "loss": 0.6555, "step": 20665 }, { "epoch": 0.84, "learning_rate": 1.267837753501403e-07, "loss": 0.616, "step": 20670 }, { "epoch": 0.84, "learning_rate": 1.2646986645059454e-07, "loss": 0.6314, "step": 20675 }, { "epoch": 0.84, "learning_rate": 1.261563204078695e-07, "loss": 0.6754, "step": 20680 }, { "epoch": 0.84, "learning_rate": 1.2584313735220987e-07, "loss": 0.6445, "step": 20685 }, { "epoch": 0.84, "learning_rate": 1.255303174137089e-07, "loss": 0.6318, "step": 20690 }, { "epoch": 0.84, "learning_rate": 1.2521786072230933e-07, "loss": 0.6744, "step": 20695 }, { "epoch": 0.84, "learning_rate": 1.249057674078028e-07, "loss": 0.6755, "step": 20700 }, { "epoch": 0.84, "learning_rate": 1.2459403759983023e-07, "loss": 0.6621, "step": 20705 }, { "epoch": 0.84, "learning_rate": 1.2428267142788195e-07, "loss": 0.5975, "step": 20710 }, { "epoch": 0.84, "learning_rate": 1.2397166902129595e-07, "loss": 0.6464, "step": 20715 }, { "epoch": 0.84, "learning_rate": 1.2366103050926057e-07, "loss": 0.6505, "step": 20720 }, { "epoch": 0.84, "learning_rate": 1.2335075602081202e-07, "loss": 0.6541, "step": 20725 }, { "epoch": 0.84, "learning_rate": 1.2304084568483552e-07, "loss": 0.6424, "step": 20730 }, { "epoch": 0.84, "learning_rate": 1.2273129963006558e-07, "loss": 0.6359, "step": 20735 }, { "epoch": 0.84, "learning_rate": 1.224221179850846e-07, "loss": 0.6211, "step": 20740 }, { "epoch": 0.84, "learning_rate": 1.2211330087832404e-07, "loss": 0.6651, "step": 20745 }, { "epoch": 0.84, "learning_rate": 1.218048484380636e-07, "loss": 0.6969, "step": 20750 }, { "epoch": 0.84, "learning_rate": 1.2149676079243198e-07, "loss": 0.6476, "step": 20755 }, { "epoch": 0.84, "learning_rate": 1.211890380694065e-07, "loss": 0.6507, "step": 20760 }, { "epoch": 0.84, "learning_rate": 1.2088168039681168e-07, "loss": 0.6329, "step": 20765 }, { "epoch": 0.84, "learning_rate": 1.2057468790232195e-07, "loss": 0.6363, "step": 20770 }, { "epoch": 0.84, "learning_rate": 1.2026806071345885e-07, "loss": 0.6098, "step": 20775 }, { "epoch": 0.84, "learning_rate": 1.1996179895759262e-07, "loss": 0.5963, "step": 20780 }, { "epoch": 0.84, "learning_rate": 1.1965590276194215e-07, "loss": 0.6654, "step": 20785 }, { "epoch": 0.84, "learning_rate": 1.1935037225357392e-07, "loss": 0.6525, "step": 20790 }, { "epoch": 0.84, "learning_rate": 1.190452075594024e-07, "loss": 0.6529, "step": 20795 }, { "epoch": 0.84, "learning_rate": 1.1874040880619041e-07, "loss": 0.6519, "step": 20800 }, { "epoch": 0.84, "eval_loss": 0.6171961426734924, "eval_runtime": 139.723, "eval_samples_per_second": 16.934, "eval_steps_per_second": 2.827, "step": 20800 }, { "epoch": 0.85, "learning_rate": 1.184359761205489e-07, "loss": 0.6229, "step": 20805 }, { "epoch": 0.85, "learning_rate": 1.181319096289366e-07, "loss": 0.6511, "step": 20810 }, { "epoch": 0.85, "learning_rate": 1.1782820945765958e-07, "loss": 0.6513, "step": 20815 }, { "epoch": 0.85, "learning_rate": 1.1752487573287296e-07, "loss": 0.6551, "step": 20820 }, { "epoch": 0.85, "learning_rate": 1.1722190858057846e-07, "loss": 0.6649, "step": 20825 }, { "epoch": 0.85, "learning_rate": 1.169193081266262e-07, "loss": 0.631, "step": 20830 }, { "epoch": 0.85, "learning_rate": 1.1661707449671343e-07, "loss": 0.6674, "step": 20835 }, { "epoch": 0.85, "learning_rate": 1.1631520781638582e-07, "loss": 0.649, "step": 20840 }, { "epoch": 0.85, "learning_rate": 1.1601370821103607e-07, "loss": 0.5991, "step": 20845 }, { "epoch": 0.85, "learning_rate": 1.1571257580590421e-07, "loss": 0.6397, "step": 20850 }, { "epoch": 0.85, "learning_rate": 1.1541181072607831e-07, "loss": 0.6698, "step": 20855 }, { "epoch": 0.85, "learning_rate": 1.1511141309649364e-07, "loss": 0.6566, "step": 20860 }, { "epoch": 0.85, "learning_rate": 1.1481138304193228e-07, "loss": 0.6261, "step": 20865 }, { "epoch": 0.85, "learning_rate": 1.1451172068702464e-07, "loss": 0.6449, "step": 20870 }, { "epoch": 0.85, "learning_rate": 1.1421242615624771e-07, "loss": 0.6521, "step": 20875 }, { "epoch": 0.85, "learning_rate": 1.1391349957392571e-07, "loss": 0.6329, "step": 20880 }, { "epoch": 0.85, "learning_rate": 1.1361494106423008e-07, "loss": 0.6462, "step": 20885 }, { "epoch": 0.85, "learning_rate": 1.1331675075117963e-07, "loss": 0.6567, "step": 20890 }, { "epoch": 0.85, "learning_rate": 1.1301892875864005e-07, "loss": 0.6515, "step": 20895 }, { "epoch": 0.85, "learning_rate": 1.127214752103236e-07, "loss": 0.6775, "step": 20900 }, { "epoch": 0.85, "learning_rate": 1.1242439022979055e-07, "loss": 0.6874, "step": 20905 }, { "epoch": 0.85, "learning_rate": 1.1212767394044697e-07, "loss": 0.6659, "step": 20910 }, { "epoch": 0.85, "learning_rate": 1.1183132646554605e-07, "loss": 0.6481, "step": 20915 }, { "epoch": 0.85, "learning_rate": 1.1153534792818852e-07, "loss": 0.6514, "step": 20920 }, { "epoch": 0.85, "learning_rate": 1.1123973845132095e-07, "loss": 0.6444, "step": 20925 }, { "epoch": 0.85, "learning_rate": 1.1094449815773699e-07, "loss": 0.6424, "step": 20930 }, { "epoch": 0.85, "learning_rate": 1.1064962717007675e-07, "loss": 0.611, "step": 20935 }, { "epoch": 0.85, "learning_rate": 1.1035512561082738e-07, "loss": 0.6177, "step": 20940 }, { "epoch": 0.85, "learning_rate": 1.1006099360232212e-07, "loss": 0.6323, "step": 20945 }, { "epoch": 0.85, "learning_rate": 1.0976723126674059e-07, "loss": 0.6681, "step": 20950 }, { "epoch": 0.85, "learning_rate": 1.094738387261096e-07, "loss": 0.6665, "step": 20955 }, { "epoch": 0.85, "learning_rate": 1.0918081610230157e-07, "loss": 0.6414, "step": 20960 }, { "epoch": 0.85, "learning_rate": 1.0888816351703555e-07, "loss": 0.6165, "step": 20965 }, { "epoch": 0.85, "learning_rate": 1.0859588109187678e-07, "loss": 0.6792, "step": 20970 }, { "epoch": 0.85, "learning_rate": 1.0830396894823712e-07, "loss": 0.6507, "step": 20975 }, { "epoch": 0.85, "learning_rate": 1.0801242720737425e-07, "loss": 0.6051, "step": 20980 }, { "epoch": 0.85, "learning_rate": 1.0772125599039183e-07, "loss": 0.6421, "step": 20985 }, { "epoch": 0.85, "learning_rate": 1.0743045541824015e-07, "loss": 0.6227, "step": 20990 }, { "epoch": 0.85, "learning_rate": 1.0714002561171521e-07, "loss": 0.6296, "step": 20995 }, { "epoch": 0.85, "learning_rate": 1.0684996669145874e-07, "loss": 0.6638, "step": 21000 }, { "epoch": 0.85, "learning_rate": 1.0656027877795904e-07, "loss": 0.643, "step": 21005 }, { "epoch": 0.85, "learning_rate": 1.0627096199154983e-07, "loss": 0.6595, "step": 21010 }, { "epoch": 0.85, "learning_rate": 1.0598201645241079e-07, "loss": 0.6595, "step": 21015 }, { "epoch": 0.85, "learning_rate": 1.0569344228056708e-07, "loss": 0.6446, "step": 21020 }, { "epoch": 0.85, "learning_rate": 1.0540523959589042e-07, "loss": 0.6615, "step": 21025 }, { "epoch": 0.85, "learning_rate": 1.0511740851809747e-07, "loss": 0.6832, "step": 21030 }, { "epoch": 0.85, "learning_rate": 1.0482994916675047e-07, "loss": 0.6588, "step": 21035 }, { "epoch": 0.85, "learning_rate": 1.0454286166125814e-07, "loss": 0.6532, "step": 21040 }, { "epoch": 0.85, "learning_rate": 1.0425614612087363e-07, "loss": 0.6882, "step": 21045 }, { "epoch": 0.86, "learning_rate": 1.0396980266469623e-07, "loss": 0.6266, "step": 21050 }, { "epoch": 0.86, "learning_rate": 1.0368383141167059e-07, "loss": 0.6535, "step": 21055 }, { "epoch": 0.86, "learning_rate": 1.0339823248058677e-07, "loss": 0.6547, "step": 21060 }, { "epoch": 0.86, "learning_rate": 1.0311300599007988e-07, "loss": 0.6387, "step": 21065 }, { "epoch": 0.86, "learning_rate": 1.0282815205863038e-07, "loss": 0.6395, "step": 21070 }, { "epoch": 0.86, "learning_rate": 1.0254367080456449e-07, "loss": 0.6433, "step": 21075 }, { "epoch": 0.86, "learning_rate": 1.0225956234605316e-07, "loss": 0.6727, "step": 21080 }, { "epoch": 0.86, "learning_rate": 1.0197582680111228e-07, "loss": 0.6174, "step": 21085 }, { "epoch": 0.86, "learning_rate": 1.0169246428760359e-07, "loss": 0.6452, "step": 21090 }, { "epoch": 0.86, "learning_rate": 1.0140947492323315e-07, "loss": 0.628, "step": 21095 }, { "epoch": 0.86, "learning_rate": 1.0112685882555228e-07, "loss": 0.6451, "step": 21100 }, { "epoch": 0.86, "learning_rate": 1.0084461611195705e-07, "loss": 0.6135, "step": 21105 }, { "epoch": 0.86, "learning_rate": 1.0056274689968902e-07, "loss": 0.6015, "step": 21110 }, { "epoch": 0.86, "learning_rate": 1.0028125130583409e-07, "loss": 0.6446, "step": 21115 }, { "epoch": 0.86, "learning_rate": 1.0000012944732284e-07, "loss": 0.6519, "step": 21120 }, { "epoch": 0.86, "learning_rate": 9.971938144093129e-08, "loss": 0.6359, "step": 21125 }, { "epoch": 0.86, "learning_rate": 9.943900740327937e-08, "loss": 0.6572, "step": 21130 }, { "epoch": 0.86, "learning_rate": 9.915900745083194e-08, "loss": 0.6668, "step": 21135 }, { "epoch": 0.86, "learning_rate": 9.887938169989896e-08, "loss": 0.6255, "step": 21140 }, { "epoch": 0.86, "learning_rate": 9.860013026663428e-08, "loss": 0.6186, "step": 21145 }, { "epoch": 0.86, "learning_rate": 9.832125326703644e-08, "loss": 0.6505, "step": 21150 }, { "epoch": 0.86, "learning_rate": 9.804275081694846e-08, "loss": 0.6368, "step": 21155 }, { "epoch": 0.86, "learning_rate": 9.776462303205824e-08, "loss": 0.6271, "step": 21160 }, { "epoch": 0.86, "learning_rate": 9.748687002789734e-08, "loss": 0.6784, "step": 21165 }, { "epoch": 0.86, "learning_rate": 9.720949191984185e-08, "loss": 0.6309, "step": 21170 }, { "epoch": 0.86, "learning_rate": 9.693248882311256e-08, "loss": 0.6185, "step": 21175 }, { "epoch": 0.86, "learning_rate": 9.665586085277388e-08, "loss": 0.6288, "step": 21180 }, { "epoch": 0.86, "learning_rate": 9.637960812373457e-08, "loss": 0.7016, "step": 21185 }, { "epoch": 0.86, "learning_rate": 9.610373075074806e-08, "loss": 0.6614, "step": 21190 }, { "epoch": 0.86, "learning_rate": 9.582822884841101e-08, "loss": 0.6505, "step": 21195 }, { "epoch": 0.86, "learning_rate": 9.555310253116467e-08, "loss": 0.6784, "step": 21200 }, { "epoch": 0.86, "learning_rate": 9.527835191329392e-08, "loss": 0.6565, "step": 21205 }, { "epoch": 0.86, "learning_rate": 9.500397710892816e-08, "loss": 0.6305, "step": 21210 }, { "epoch": 0.86, "learning_rate": 9.472997823203999e-08, "loss": 0.6524, "step": 21215 }, { "epoch": 0.86, "learning_rate": 9.445635539644615e-08, "loss": 0.6717, "step": 21220 }, { "epoch": 0.86, "learning_rate": 9.418310871580737e-08, "loss": 0.6429, "step": 21225 }, { "epoch": 0.86, "learning_rate": 9.391023830362799e-08, "loss": 0.6434, "step": 21230 }, { "epoch": 0.86, "learning_rate": 9.363774427325577e-08, "loss": 0.6648, "step": 21235 }, { "epoch": 0.86, "learning_rate": 9.336562673788228e-08, "loss": 0.636, "step": 21240 }, { "epoch": 0.86, "learning_rate": 9.309388581054322e-08, "loss": 0.6771, "step": 21245 }, { "epoch": 0.86, "learning_rate": 9.282252160411719e-08, "loss": 0.6502, "step": 21250 }, { "epoch": 0.86, "learning_rate": 9.255153423132622e-08, "loss": 0.6437, "step": 21255 }, { "epoch": 0.86, "learning_rate": 9.22809238047365e-08, "loss": 0.6704, "step": 21260 }, { "epoch": 0.86, "learning_rate": 9.201069043675724e-08, "loss": 0.6404, "step": 21265 }, { "epoch": 0.86, "learning_rate": 9.174083423964062e-08, "loss": 0.6834, "step": 21270 }, { "epoch": 0.86, "learning_rate": 9.147135532548311e-08, "loss": 0.6516, "step": 21275 }, { "epoch": 0.86, "learning_rate": 9.120225380622371e-08, "loss": 0.671, "step": 21280 }, { "epoch": 0.86, "learning_rate": 9.093352979364466e-08, "loss": 0.6583, "step": 21285 }, { "epoch": 0.86, "learning_rate": 9.066518339937157e-08, "loss": 0.6467, "step": 21290 }, { "epoch": 0.86, "learning_rate": 9.03972147348735e-08, "loss": 0.5999, "step": 21295 }, { "epoch": 0.87, "learning_rate": 9.012962391146217e-08, "loss": 0.6589, "step": 21300 }, { "epoch": 0.87, "learning_rate": 8.986241104029224e-08, "loss": 0.647, "step": 21305 }, { "epoch": 0.87, "learning_rate": 8.959557623236202e-08, "loss": 0.6199, "step": 21310 }, { "epoch": 0.87, "learning_rate": 8.93291195985122e-08, "loss": 0.6762, "step": 21315 }, { "epoch": 0.87, "learning_rate": 8.906304124942632e-08, "loss": 0.6446, "step": 21320 }, { "epoch": 0.87, "learning_rate": 8.879734129563132e-08, "loss": 0.6504, "step": 21325 }, { "epoch": 0.87, "learning_rate": 8.853201984749658e-08, "loss": 0.6898, "step": 21330 }, { "epoch": 0.87, "learning_rate": 8.826707701523428e-08, "loss": 0.6575, "step": 21335 }, { "epoch": 0.87, "learning_rate": 8.800251290889927e-08, "loss": 0.6208, "step": 21340 }, { "epoch": 0.87, "learning_rate": 8.773832763838939e-08, "loss": 0.6662, "step": 21345 }, { "epoch": 0.87, "learning_rate": 8.74745213134448e-08, "loss": 0.6218, "step": 21350 }, { "epoch": 0.87, "learning_rate": 8.721109404364812e-08, "loss": 0.6747, "step": 21355 }, { "epoch": 0.87, "learning_rate": 8.694804593842519e-08, "loss": 0.693, "step": 21360 }, { "epoch": 0.87, "learning_rate": 8.668537710704371e-08, "loss": 0.6482, "step": 21365 }, { "epoch": 0.87, "learning_rate": 8.642308765861406e-08, "loss": 0.6946, "step": 21370 }, { "epoch": 0.87, "learning_rate": 8.616117770208864e-08, "loss": 0.655, "step": 21375 }, { "epoch": 0.87, "learning_rate": 8.58996473462631e-08, "loss": 0.6549, "step": 21380 }, { "epoch": 0.87, "learning_rate": 8.563849669977463e-08, "loss": 0.6444, "step": 21385 }, { "epoch": 0.87, "learning_rate": 8.537772587110281e-08, "loss": 0.646, "step": 21390 }, { "epoch": 0.87, "learning_rate": 8.511733496856999e-08, "loss": 0.6792, "step": 21395 }, { "epoch": 0.87, "learning_rate": 8.485732410033985e-08, "loss": 0.6037, "step": 21400 }, { "epoch": 0.87, "learning_rate": 8.459769337441868e-08, "loss": 0.6055, "step": 21405 }, { "epoch": 0.87, "learning_rate": 8.433844289865521e-08, "loss": 0.6427, "step": 21410 }, { "epoch": 0.87, "learning_rate": 8.407957278073952e-08, "loss": 0.6628, "step": 21415 }, { "epoch": 0.87, "learning_rate": 8.382108312820401e-08, "loss": 0.6569, "step": 21420 }, { "epoch": 0.87, "learning_rate": 8.356297404842305e-08, "loss": 0.659, "step": 21425 }, { "epoch": 0.87, "learning_rate": 8.330524564861297e-08, "loss": 0.6279, "step": 21430 }, { "epoch": 0.87, "learning_rate": 8.304789803583201e-08, "loss": 0.6281, "step": 21435 }, { "epoch": 0.87, "learning_rate": 8.279093131697968e-08, "loss": 0.6327, "step": 21440 }, { "epoch": 0.87, "learning_rate": 8.253434559879835e-08, "loss": 0.6402, "step": 21445 }, { "epoch": 0.87, "learning_rate": 8.227814098787111e-08, "loss": 0.6601, "step": 21450 }, { "epoch": 0.87, "learning_rate": 8.202231759062305e-08, "loss": 0.6355, "step": 21455 }, { "epoch": 0.87, "learning_rate": 8.17668755133214e-08, "loss": 0.663, "step": 21460 }, { "epoch": 0.87, "learning_rate": 8.151181486207414e-08, "loss": 0.6715, "step": 21465 }, { "epoch": 0.87, "learning_rate": 8.125713574283155e-08, "loss": 0.6456, "step": 21470 }, { "epoch": 0.87, "learning_rate": 8.100283826138477e-08, "loss": 0.6243, "step": 21475 }, { "epoch": 0.87, "learning_rate": 8.074892252336718e-08, "loss": 0.6273, "step": 21480 }, { "epoch": 0.87, "learning_rate": 8.049538863425298e-08, "loss": 0.6379, "step": 21485 }, { "epoch": 0.87, "learning_rate": 8.024223669935782e-08, "loss": 0.6303, "step": 21490 }, { "epoch": 0.87, "learning_rate": 7.9989466823839e-08, "loss": 0.6826, "step": 21495 }, { "epoch": 0.87, "learning_rate": 7.973707911269489e-08, "loss": 0.6236, "step": 21500 }, { "epoch": 0.87, "learning_rate": 7.948507367076518e-08, "loss": 0.6341, "step": 21505 }, { "epoch": 0.87, "learning_rate": 7.923345060273046e-08, "loss": 0.6677, "step": 21510 }, { "epoch": 0.87, "learning_rate": 7.898221001311312e-08, "loss": 0.6299, "step": 21515 }, { "epoch": 0.87, "learning_rate": 7.873135200627623e-08, "loss": 0.6272, "step": 21520 }, { "epoch": 0.87, "learning_rate": 7.848087668642377e-08, "loss": 0.6455, "step": 21525 }, { "epoch": 0.87, "learning_rate": 7.823078415760143e-08, "loss": 0.6406, "step": 21530 }, { "epoch": 0.87, "learning_rate": 7.798107452369517e-08, "loss": 0.7099, "step": 21535 }, { "epoch": 0.87, "learning_rate": 7.773174788843218e-08, "loss": 0.6831, "step": 21540 }, { "epoch": 0.88, "learning_rate": 7.74828043553808e-08, "loss": 0.6205, "step": 21545 }, { "epoch": 0.88, "learning_rate": 7.723424402794998e-08, "loss": 0.649, "step": 21550 }, { "epoch": 0.88, "learning_rate": 7.698606700938936e-08, "loss": 0.6636, "step": 21555 }, { "epoch": 0.88, "learning_rate": 7.673827340278937e-08, "loss": 0.6314, "step": 21560 }, { "epoch": 0.88, "learning_rate": 7.649086331108178e-08, "loss": 0.6969, "step": 21565 }, { "epoch": 0.88, "learning_rate": 7.624383683703839e-08, "loss": 0.6516, "step": 21570 }, { "epoch": 0.88, "learning_rate": 7.599719408327155e-08, "loss": 0.7174, "step": 21575 }, { "epoch": 0.88, "learning_rate": 7.575093515223496e-08, "loss": 0.6436, "step": 21580 }, { "epoch": 0.88, "learning_rate": 7.550506014622215e-08, "loss": 0.6571, "step": 21585 }, { "epoch": 0.88, "learning_rate": 7.525956916736753e-08, "loss": 0.6919, "step": 21590 }, { "epoch": 0.88, "learning_rate": 7.501446231764607e-08, "loss": 0.6461, "step": 21595 }, { "epoch": 0.88, "learning_rate": 7.47697396988729e-08, "loss": 0.6169, "step": 21600 }, { "epoch": 0.88, "eval_loss": 0.6164625883102417, "eval_runtime": 140.1585, "eval_samples_per_second": 16.881, "eval_steps_per_second": 2.818, "step": 21600 }, { "epoch": 0.88, "learning_rate": 7.452540141270358e-08, "loss": 0.643, "step": 21605 }, { "epoch": 0.88, "learning_rate": 7.428144756063415e-08, "loss": 0.6571, "step": 21610 }, { "epoch": 0.88, "learning_rate": 7.403787824400098e-08, "loss": 0.6473, "step": 21615 }, { "epoch": 0.88, "learning_rate": 7.379469356398072e-08, "loss": 0.6089, "step": 21620 }, { "epoch": 0.88, "learning_rate": 7.355189362158997e-08, "loss": 0.6356, "step": 21625 }, { "epoch": 0.88, "learning_rate": 7.330947851768588e-08, "loss": 0.6338, "step": 21630 }, { "epoch": 0.88, "learning_rate": 7.306744835296563e-08, "loss": 0.6296, "step": 21635 }, { "epoch": 0.88, "learning_rate": 7.282580322796606e-08, "loss": 0.6403, "step": 21640 }, { "epoch": 0.88, "learning_rate": 7.258454324306495e-08, "loss": 0.649, "step": 21645 }, { "epoch": 0.88, "learning_rate": 7.23436684984794e-08, "loss": 0.6433, "step": 21650 }, { "epoch": 0.88, "learning_rate": 7.210317909426656e-08, "loss": 0.6741, "step": 21655 }, { "epoch": 0.88, "learning_rate": 7.186307513032364e-08, "loss": 0.6607, "step": 21660 }, { "epoch": 0.88, "learning_rate": 7.162335670638797e-08, "loss": 0.6845, "step": 21665 }, { "epoch": 0.88, "learning_rate": 7.138402392203646e-08, "loss": 0.5908, "step": 21670 }, { "epoch": 0.88, "learning_rate": 7.114507687668559e-08, "loss": 0.6756, "step": 21675 }, { "epoch": 0.88, "learning_rate": 7.090651566959216e-08, "loss": 0.6435, "step": 21680 }, { "epoch": 0.88, "learning_rate": 7.066834039985237e-08, "loss": 0.6275, "step": 21685 }, { "epoch": 0.88, "learning_rate": 7.043055116640206e-08, "loss": 0.6286, "step": 21690 }, { "epoch": 0.88, "learning_rate": 7.019314806801679e-08, "loss": 0.6561, "step": 21695 }, { "epoch": 0.88, "learning_rate": 6.99561312033119e-08, "loss": 0.6653, "step": 21700 }, { "epoch": 0.88, "learning_rate": 6.971950067074206e-08, "loss": 0.6333, "step": 21705 }, { "epoch": 0.88, "learning_rate": 6.948325656860143e-08, "loss": 0.6574, "step": 21710 }, { "epoch": 0.88, "learning_rate": 6.924739899502396e-08, "loss": 0.6581, "step": 21715 }, { "epoch": 0.88, "learning_rate": 6.901192804798272e-08, "loss": 0.6574, "step": 21720 }, { "epoch": 0.88, "learning_rate": 6.877684382529025e-08, "loss": 0.6292, "step": 21725 }, { "epoch": 0.88, "learning_rate": 6.854214642459855e-08, "loss": 0.6288, "step": 21730 }, { "epoch": 0.88, "learning_rate": 6.830783594339895e-08, "loss": 0.6242, "step": 21735 }, { "epoch": 0.88, "learning_rate": 6.807391247902195e-08, "loss": 0.6551, "step": 21740 }, { "epoch": 0.88, "learning_rate": 6.784037612863702e-08, "loss": 0.6485, "step": 21745 }, { "epoch": 0.88, "learning_rate": 6.760722698925358e-08, "loss": 0.6398, "step": 21750 }, { "epoch": 0.88, "learning_rate": 6.737446515771961e-08, "loss": 0.6063, "step": 21755 }, { "epoch": 0.88, "learning_rate": 6.714209073072218e-08, "loss": 0.6095, "step": 21760 }, { "epoch": 0.88, "learning_rate": 6.691010380478779e-08, "loss": 0.6306, "step": 21765 }, { "epoch": 0.88, "learning_rate": 6.667850447628175e-08, "loss": 0.5991, "step": 21770 }, { "epoch": 0.88, "learning_rate": 6.644729284140826e-08, "loss": 0.6475, "step": 21775 }, { "epoch": 0.88, "learning_rate": 6.621646899621091e-08, "loss": 0.6737, "step": 21780 }, { "epoch": 0.88, "learning_rate": 6.598603303657179e-08, "loss": 0.6395, "step": 21785 }, { "epoch": 0.89, "learning_rate": 6.5755985058212e-08, "loss": 0.6428, "step": 21790 }, { "epoch": 0.89, "learning_rate": 6.552632515669121e-08, "loss": 0.6312, "step": 21795 }, { "epoch": 0.89, "learning_rate": 6.529705342740843e-08, "loss": 0.6315, "step": 21800 }, { "epoch": 0.89, "learning_rate": 6.506816996560127e-08, "loss": 0.6268, "step": 21805 }, { "epoch": 0.89, "learning_rate": 6.483967486634546e-08, "loss": 0.664, "step": 21810 }, { "epoch": 0.89, "learning_rate": 6.461156822455638e-08, "loss": 0.6397, "step": 21815 }, { "epoch": 0.89, "learning_rate": 6.438385013498726e-08, "loss": 0.6273, "step": 21820 }, { "epoch": 0.89, "learning_rate": 6.415652069223032e-08, "loss": 0.6245, "step": 21825 }, { "epoch": 0.89, "learning_rate": 6.392957999071602e-08, "loss": 0.6921, "step": 21830 }, { "epoch": 0.89, "learning_rate": 6.370302812471384e-08, "loss": 0.6685, "step": 21835 }, { "epoch": 0.89, "learning_rate": 6.34768651883314e-08, "loss": 0.6563, "step": 21840 }, { "epoch": 0.89, "learning_rate": 6.325109127551465e-08, "loss": 0.6676, "step": 21845 }, { "epoch": 0.89, "learning_rate": 6.302570648004834e-08, "loss": 0.682, "step": 21850 }, { "epoch": 0.89, "learning_rate": 6.280071089555516e-08, "loss": 0.63, "step": 21855 }, { "epoch": 0.89, "learning_rate": 6.257610461549634e-08, "loss": 0.6781, "step": 21860 }, { "epoch": 0.89, "learning_rate": 6.235188773317146e-08, "loss": 0.6647, "step": 21865 }, { "epoch": 0.89, "learning_rate": 6.212806034171836e-08, "loss": 0.6611, "step": 21870 }, { "epoch": 0.89, "learning_rate": 6.190462253411277e-08, "loss": 0.658, "step": 21875 }, { "epoch": 0.89, "learning_rate": 6.16815744031688e-08, "loss": 0.6362, "step": 21880 }, { "epoch": 0.89, "learning_rate": 6.145891604153886e-08, "loss": 0.64, "step": 21885 }, { "epoch": 0.89, "learning_rate": 6.123664754171331e-08, "loss": 0.6428, "step": 21890 }, { "epoch": 0.89, "learning_rate": 6.101476899602043e-08, "loss": 0.6626, "step": 21895 }, { "epoch": 0.89, "learning_rate": 6.079328049662668e-08, "loss": 0.6502, "step": 21900 }, { "epoch": 0.89, "learning_rate": 6.057218213553661e-08, "loss": 0.6694, "step": 21905 }, { "epoch": 0.89, "learning_rate": 6.035147400459217e-08, "loss": 0.642, "step": 21910 }, { "epoch": 0.89, "learning_rate": 6.013115619547404e-08, "loss": 0.6864, "step": 21915 }, { "epoch": 0.89, "learning_rate": 5.991122879970012e-08, "loss": 0.636, "step": 21920 }, { "epoch": 0.89, "learning_rate": 5.969169190862644e-08, "loss": 0.6338, "step": 21925 }, { "epoch": 0.89, "learning_rate": 5.947254561344628e-08, "loss": 0.6647, "step": 21930 }, { "epoch": 0.89, "learning_rate": 5.9253790005191705e-08, "loss": 0.65, "step": 21935 }, { "epoch": 0.89, "learning_rate": 5.90354251747317e-08, "loss": 0.6274, "step": 21940 }, { "epoch": 0.89, "learning_rate": 5.8817451212772815e-08, "loss": 0.6559, "step": 21945 }, { "epoch": 0.89, "learning_rate": 5.859986820985985e-08, "loss": 0.6318, "step": 21950 }, { "epoch": 0.89, "learning_rate": 5.838267625637494e-08, "loss": 0.6755, "step": 21955 }, { "epoch": 0.89, "learning_rate": 5.8165875442537594e-08, "loss": 0.6342, "step": 21960 }, { "epoch": 0.89, "learning_rate": 5.7949465858404766e-08, "loss": 0.6707, "step": 21965 }, { "epoch": 0.89, "learning_rate": 5.773344759387155e-08, "loss": 0.6339, "step": 21970 }, { "epoch": 0.89, "learning_rate": 5.751782073866984e-08, "loss": 0.6389, "step": 21975 }, { "epoch": 0.89, "learning_rate": 5.730258538236909e-08, "loss": 0.6287, "step": 21980 }, { "epoch": 0.89, "learning_rate": 5.708774161437635e-08, "loss": 0.6844, "step": 21985 }, { "epoch": 0.89, "learning_rate": 5.6873289523935775e-08, "loss": 0.6497, "step": 21990 }, { "epoch": 0.89, "learning_rate": 5.665922920012878e-08, "loss": 0.7022, "step": 21995 }, { "epoch": 0.89, "learning_rate": 5.644556073187445e-08, "loss": 0.6279, "step": 22000 }, { "epoch": 0.89, "learning_rate": 5.6232284207928584e-08, "loss": 0.6533, "step": 22005 }, { "epoch": 0.89, "learning_rate": 5.601939971688452e-08, "loss": 0.635, "step": 22010 }, { "epoch": 0.89, "learning_rate": 5.580690734717241e-08, "loss": 0.6474, "step": 22015 }, { "epoch": 0.89, "learning_rate": 5.559480718706e-08, "loss": 0.6732, "step": 22020 }, { "epoch": 0.89, "learning_rate": 5.5383099324651684e-08, "loss": 0.6351, "step": 22025 }, { "epoch": 0.89, "learning_rate": 5.5171783847889006e-08, "loss": 0.6832, "step": 22030 }, { "epoch": 0.9, "learning_rate": 5.496086084455087e-08, "loss": 0.6403, "step": 22035 }, { "epoch": 0.9, "learning_rate": 5.475033040225274e-08, "loss": 0.6231, "step": 22040 }, { "epoch": 0.9, "learning_rate": 5.454019260844678e-08, "loss": 0.6585, "step": 22045 }, { "epoch": 0.9, "learning_rate": 5.433044755042293e-08, "loss": 0.6478, "step": 22050 }, { "epoch": 0.9, "learning_rate": 5.4121095315307173e-08, "loss": 0.627, "step": 22055 }, { "epoch": 0.9, "learning_rate": 5.3912135990062726e-08, "loss": 0.6431, "step": 22060 }, { "epoch": 0.9, "learning_rate": 5.370356966148914e-08, "loss": 0.6224, "step": 22065 }, { "epoch": 0.9, "learning_rate": 5.3495396416223584e-08, "loss": 0.6851, "step": 22070 }, { "epoch": 0.9, "learning_rate": 5.3287616340739084e-08, "loss": 0.6471, "step": 22075 }, { "epoch": 0.9, "learning_rate": 5.308022952134561e-08, "loss": 0.6231, "step": 22080 }, { "epoch": 0.9, "learning_rate": 5.287323604419014e-08, "loss": 0.6358, "step": 22085 }, { "epoch": 0.9, "learning_rate": 5.266663599525578e-08, "loss": 0.6723, "step": 22090 }, { "epoch": 0.9, "learning_rate": 5.246042946036244e-08, "loss": 0.674, "step": 22095 }, { "epoch": 0.9, "learning_rate": 5.225461652516639e-08, "loss": 0.6099, "step": 22100 }, { "epoch": 0.9, "learning_rate": 5.204919727516066e-08, "loss": 0.6175, "step": 22105 }, { "epoch": 0.9, "learning_rate": 5.184417179567468e-08, "loss": 0.6383, "step": 22110 }, { "epoch": 0.9, "learning_rate": 5.163954017187399e-08, "loss": 0.6796, "step": 22115 }, { "epoch": 0.9, "learning_rate": 5.143530248876116e-08, "loss": 0.6428, "step": 22120 }, { "epoch": 0.9, "learning_rate": 5.123145883117452e-08, "loss": 0.7068, "step": 22125 }, { "epoch": 0.9, "learning_rate": 5.102800928378881e-08, "loss": 0.6485, "step": 22130 }, { "epoch": 0.9, "learning_rate": 5.082495393111563e-08, "loss": 0.6488, "step": 22135 }, { "epoch": 0.9, "learning_rate": 5.062229285750208e-08, "loss": 0.639, "step": 22140 }, { "epoch": 0.9, "learning_rate": 5.0420026147131925e-08, "loss": 0.6629, "step": 22145 }, { "epoch": 0.9, "learning_rate": 5.021815388402473e-08, "loss": 0.6624, "step": 22150 }, { "epoch": 0.9, "learning_rate": 5.0016676152036974e-08, "loss": 0.6282, "step": 22155 }, { "epoch": 0.9, "learning_rate": 4.981559303486038e-08, "loss": 0.6319, "step": 22160 }, { "epoch": 0.9, "learning_rate": 4.9614904616023134e-08, "loss": 0.6655, "step": 22165 }, { "epoch": 0.9, "learning_rate": 4.941461097888966e-08, "loss": 0.6915, "step": 22170 }, { "epoch": 0.9, "learning_rate": 4.921471220666018e-08, "loss": 0.5924, "step": 22175 }, { "epoch": 0.9, "learning_rate": 4.901520838237061e-08, "loss": 0.6699, "step": 22180 }, { "epoch": 0.9, "learning_rate": 4.8816099588893436e-08, "loss": 0.635, "step": 22185 }, { "epoch": 0.9, "learning_rate": 4.86173859089366e-08, "loss": 0.6572, "step": 22190 }, { "epoch": 0.9, "learning_rate": 4.8419067425044094e-08, "loss": 0.6296, "step": 22195 }, { "epoch": 0.9, "learning_rate": 4.822114421959545e-08, "loss": 0.6598, "step": 22200 }, { "epoch": 0.9, "learning_rate": 4.8023616374806564e-08, "loss": 0.6451, "step": 22205 }, { "epoch": 0.9, "learning_rate": 4.782648397272859e-08, "loss": 0.6616, "step": 22210 }, { "epoch": 0.9, "learning_rate": 4.762974709524858e-08, "loss": 0.6583, "step": 22215 }, { "epoch": 0.9, "learning_rate": 4.743340582408961e-08, "loss": 0.66, "step": 22220 }, { "epoch": 0.9, "learning_rate": 4.723746024080988e-08, "loss": 0.6213, "step": 22225 }, { "epoch": 0.9, "learning_rate": 4.70419104268035e-08, "loss": 0.6316, "step": 22230 }, { "epoch": 0.9, "learning_rate": 4.6846756463300054e-08, "loss": 0.6505, "step": 22235 }, { "epoch": 0.9, "learning_rate": 4.665199843136513e-08, "loss": 0.6645, "step": 22240 }, { "epoch": 0.9, "learning_rate": 4.645763641189937e-08, "loss": 0.6523, "step": 22245 }, { "epoch": 0.9, "learning_rate": 4.626367048563884e-08, "loss": 0.6516, "step": 22250 }, { "epoch": 0.9, "learning_rate": 4.607010073315565e-08, "loss": 0.638, "step": 22255 }, { "epoch": 0.9, "learning_rate": 4.587692723485681e-08, "loss": 0.5924, "step": 22260 }, { "epoch": 0.9, "learning_rate": 4.5684150070984804e-08, "loss": 0.6316, "step": 22265 }, { "epoch": 0.9, "learning_rate": 4.549176932161791e-08, "loss": 0.6585, "step": 22270 }, { "epoch": 0.9, "learning_rate": 4.5299785066669205e-08, "loss": 0.6547, "step": 22275 }, { "epoch": 0.9, "learning_rate": 4.5108197385887335e-08, "loss": 0.6432, "step": 22280 }, { "epoch": 0.91, "learning_rate": 4.491700635885598e-08, "loss": 0.6075, "step": 22285 }, { "epoch": 0.91, "learning_rate": 4.4726212064994493e-08, "loss": 0.5892, "step": 22290 }, { "epoch": 0.91, "learning_rate": 4.453581458355704e-08, "loss": 0.6202, "step": 22295 }, { "epoch": 0.91, "learning_rate": 4.4345813993632905e-08, "loss": 0.6405, "step": 22300 }, { "epoch": 0.91, "learning_rate": 4.4156210374147075e-08, "loss": 0.6393, "step": 22305 }, { "epoch": 0.91, "learning_rate": 4.396700380385898e-08, "loss": 0.6548, "step": 22310 }, { "epoch": 0.91, "learning_rate": 4.377819436136332e-08, "loss": 0.6813, "step": 22315 }, { "epoch": 0.91, "learning_rate": 4.358978212509012e-08, "loss": 0.6689, "step": 22320 }, { "epoch": 0.91, "learning_rate": 4.340176717330413e-08, "loss": 0.6631, "step": 22325 }, { "epoch": 0.91, "learning_rate": 4.3214149584105076e-08, "loss": 0.6586, "step": 22330 }, { "epoch": 0.91, "learning_rate": 4.3026929435427516e-08, "loss": 0.6643, "step": 22335 }, { "epoch": 0.91, "learning_rate": 4.2840106805041354e-08, "loss": 0.654, "step": 22340 }, { "epoch": 0.91, "learning_rate": 4.2653681770550955e-08, "loss": 0.6688, "step": 22345 }, { "epoch": 0.91, "learning_rate": 4.2467654409395484e-08, "loss": 0.6761, "step": 22350 }, { "epoch": 0.91, "learning_rate": 4.228202479884946e-08, "loss": 0.6667, "step": 22355 }, { "epoch": 0.91, "learning_rate": 4.209679301602165e-08, "loss": 0.6749, "step": 22360 }, { "epoch": 0.91, "learning_rate": 4.191195913785561e-08, "loss": 0.6396, "step": 22365 }, { "epoch": 0.91, "learning_rate": 4.1727523241129606e-08, "loss": 0.6696, "step": 22370 }, { "epoch": 0.91, "learning_rate": 4.154348540245711e-08, "loss": 0.6364, "step": 22375 }, { "epoch": 0.91, "learning_rate": 4.135984569828566e-08, "loss": 0.6495, "step": 22380 }, { "epoch": 0.91, "learning_rate": 4.1176604204897434e-08, "loss": 0.6496, "step": 22385 }, { "epoch": 0.91, "learning_rate": 4.099376099840968e-08, "loss": 0.6268, "step": 22390 }, { "epoch": 0.91, "learning_rate": 4.0811316154773515e-08, "loss": 0.6527, "step": 22395 }, { "epoch": 0.91, "learning_rate": 4.06292697497751e-08, "loss": 0.651, "step": 22400 }, { "epoch": 0.91, "eval_loss": 0.6161190867424011, "eval_runtime": 139.4449, "eval_samples_per_second": 16.967, "eval_steps_per_second": 2.833, "step": 22400 }, { "epoch": 0.91, "learning_rate": 4.044762185903494e-08, "loss": 0.6551, "step": 22405 }, { "epoch": 0.91, "learning_rate": 4.026637255800813e-08, "loss": 0.6677, "step": 22410 }, { "epoch": 0.91, "learning_rate": 4.008552192198378e-08, "loss": 0.6738, "step": 22415 }, { "epoch": 0.91, "learning_rate": 3.9905070026085784e-08, "loss": 0.6642, "step": 22420 }, { "epoch": 0.91, "learning_rate": 3.9725016945272416e-08, "loss": 0.6399, "step": 22425 }, { "epoch": 0.91, "learning_rate": 3.9545362754335955e-08, "loss": 0.6706, "step": 22430 }, { "epoch": 0.91, "learning_rate": 3.936610752790326e-08, "loss": 0.686, "step": 22435 }, { "epoch": 0.91, "learning_rate": 3.9187251340435653e-08, "loss": 0.5905, "step": 22440 }, { "epoch": 0.91, "learning_rate": 3.900879426622794e-08, "loss": 0.5991, "step": 22445 }, { "epoch": 0.91, "learning_rate": 3.8830736379409814e-08, "loss": 0.6265, "step": 22450 }, { "epoch": 0.91, "learning_rate": 3.865307775394533e-08, "loss": 0.6525, "step": 22455 }, { "epoch": 0.91, "learning_rate": 3.84758184636319e-08, "loss": 0.6228, "step": 22460 }, { "epoch": 0.91, "learning_rate": 3.829895858210186e-08, "loss": 0.6736, "step": 22465 }, { "epoch": 0.91, "learning_rate": 3.812249818282076e-08, "loss": 0.6303, "step": 22470 }, { "epoch": 0.91, "learning_rate": 3.79464373390892e-08, "loss": 0.6367, "step": 22475 }, { "epoch": 0.91, "learning_rate": 3.777077612404123e-08, "loss": 0.6334, "step": 22480 }, { "epoch": 0.91, "learning_rate": 3.75955146106447e-08, "loss": 0.6403, "step": 22485 }, { "epoch": 0.91, "learning_rate": 3.742065287170215e-08, "loss": 0.6089, "step": 22490 }, { "epoch": 0.91, "learning_rate": 3.724619097984916e-08, "loss": 0.6471, "step": 22495 }, { "epoch": 0.91, "learning_rate": 3.707212900755608e-08, "loss": 0.6479, "step": 22500 }, { "epoch": 0.91, "learning_rate": 3.689846702712651e-08, "loss": 0.6429, "step": 22505 }, { "epoch": 0.91, "learning_rate": 3.672520511069821e-08, "loss": 0.6175, "step": 22510 }, { "epoch": 0.91, "learning_rate": 3.655234333024271e-08, "loss": 0.6745, "step": 22515 }, { "epoch": 0.91, "learning_rate": 3.637988175756512e-08, "loss": 0.6604, "step": 22520 }, { "epoch": 0.91, "learning_rate": 3.6207820464304814e-08, "loss": 0.6285, "step": 22525 }, { "epoch": 0.92, "learning_rate": 3.603615952193417e-08, "loss": 0.6314, "step": 22530 }, { "epoch": 0.92, "learning_rate": 3.5864899001759706e-08, "loss": 0.6703, "step": 22535 }, { "epoch": 0.92, "learning_rate": 3.569403897492185e-08, "loss": 0.6586, "step": 22540 }, { "epoch": 0.92, "learning_rate": 3.552357951239427e-08, "loss": 0.6588, "step": 22545 }, { "epoch": 0.92, "learning_rate": 3.5353520684984096e-08, "loss": 0.6623, "step": 22550 }, { "epoch": 0.92, "learning_rate": 3.51838625633325e-08, "loss": 0.6619, "step": 22555 }, { "epoch": 0.92, "learning_rate": 3.501460521791399e-08, "loss": 0.7056, "step": 22560 }, { "epoch": 0.92, "learning_rate": 3.484574871903656e-08, "loss": 0.6647, "step": 22565 }, { "epoch": 0.92, "learning_rate": 3.467729313684153e-08, "loss": 0.6277, "step": 22570 }, { "epoch": 0.92, "learning_rate": 3.4509238541304384e-08, "loss": 0.6705, "step": 22575 }, { "epoch": 0.92, "learning_rate": 3.4341585002232945e-08, "loss": 0.654, "step": 22580 }, { "epoch": 0.92, "learning_rate": 3.4174332589269385e-08, "loss": 0.6352, "step": 22585 }, { "epoch": 0.92, "learning_rate": 3.4007481371888915e-08, "loss": 0.6517, "step": 22590 }, { "epoch": 0.92, "learning_rate": 3.384103141940009e-08, "loss": 0.6898, "step": 22595 }, { "epoch": 0.92, "learning_rate": 3.3674982800944604e-08, "loss": 0.6067, "step": 22600 }, { "epoch": 0.92, "learning_rate": 3.350933558549751e-08, "loss": 0.6752, "step": 22605 }, { "epoch": 0.92, "learning_rate": 3.334408984186765e-08, "loss": 0.664, "step": 22610 }, { "epoch": 0.92, "learning_rate": 3.317924563869634e-08, "loss": 0.6458, "step": 22615 }, { "epoch": 0.92, "learning_rate": 3.301480304445836e-08, "loss": 0.7108, "step": 22620 }, { "epoch": 0.92, "learning_rate": 3.2850762127462184e-08, "loss": 0.6561, "step": 22625 }, { "epoch": 0.92, "learning_rate": 3.268712295584841e-08, "loss": 0.7372, "step": 22630 }, { "epoch": 0.92, "learning_rate": 3.252388559759156e-08, "loss": 0.6515, "step": 22635 }, { "epoch": 0.92, "learning_rate": 3.2361050120499275e-08, "loss": 0.6417, "step": 22640 }, { "epoch": 0.92, "learning_rate": 3.219861659221168e-08, "loss": 0.6537, "step": 22645 }, { "epoch": 0.92, "learning_rate": 3.203658508020235e-08, "loss": 0.6385, "step": 22650 }, { "epoch": 0.92, "learning_rate": 3.1874955651777667e-08, "loss": 0.6112, "step": 22655 }, { "epoch": 0.92, "learning_rate": 3.171372837407738e-08, "loss": 0.6437, "step": 22660 }, { "epoch": 0.92, "learning_rate": 3.155290331407357e-08, "loss": 0.6222, "step": 22665 }, { "epoch": 0.92, "learning_rate": 3.1392480538571574e-08, "loss": 0.6439, "step": 22670 }, { "epoch": 0.92, "learning_rate": 3.123246011420999e-08, "loss": 0.6528, "step": 22675 }, { "epoch": 0.92, "learning_rate": 3.107284210745953e-08, "loss": 0.6654, "step": 22680 }, { "epoch": 0.92, "learning_rate": 3.0913626584624266e-08, "loss": 0.6474, "step": 22685 }, { "epoch": 0.92, "learning_rate": 3.0754813611840846e-08, "loss": 0.6528, "step": 22690 }, { "epoch": 0.92, "learning_rate": 3.0596403255078954e-08, "loss": 0.6708, "step": 22695 }, { "epoch": 0.92, "learning_rate": 3.043839558014083e-08, "loss": 0.6365, "step": 22700 }, { "epoch": 0.92, "learning_rate": 3.028079065266142e-08, "loss": 0.6687, "step": 22705 }, { "epoch": 0.92, "learning_rate": 3.012358853810859e-08, "loss": 0.6645, "step": 22710 }, { "epoch": 0.92, "learning_rate": 2.9966789301782535e-08, "loss": 0.6366, "step": 22715 }, { "epoch": 0.92, "learning_rate": 2.981039300881627e-08, "loss": 0.6575, "step": 22720 }, { "epoch": 0.92, "learning_rate": 2.9654399724175828e-08, "loss": 0.6966, "step": 22725 }, { "epoch": 0.92, "learning_rate": 2.949880951265904e-08, "loss": 0.6842, "step": 22730 }, { "epoch": 0.92, "learning_rate": 2.9343622438896875e-08, "loss": 0.6586, "step": 22735 }, { "epoch": 0.92, "learning_rate": 2.918883856735277e-08, "loss": 0.6629, "step": 22740 }, { "epoch": 0.92, "learning_rate": 2.9034457962322513e-08, "loss": 0.6283, "step": 22745 }, { "epoch": 0.92, "learning_rate": 2.8880480687934473e-08, "loss": 0.6272, "step": 22750 }, { "epoch": 0.92, "learning_rate": 2.8726906808149486e-08, "loss": 0.616, "step": 22755 }, { "epoch": 0.92, "learning_rate": 2.857373638676097e-08, "loss": 0.6605, "step": 22760 }, { "epoch": 0.92, "learning_rate": 2.8420969487394143e-08, "loss": 0.6212, "step": 22765 }, { "epoch": 0.92, "learning_rate": 2.826860617350746e-08, "loss": 0.6844, "step": 22770 }, { "epoch": 0.93, "learning_rate": 2.8116646508391183e-08, "loss": 0.645, "step": 22775 }, { "epoch": 0.93, "learning_rate": 2.7965090555168047e-08, "loss": 0.6497, "step": 22780 }, { "epoch": 0.93, "learning_rate": 2.7813938376793134e-08, "loss": 0.6361, "step": 22785 }, { "epoch": 0.93, "learning_rate": 2.7663190036053552e-08, "loss": 0.5859, "step": 22790 }, { "epoch": 0.93, "learning_rate": 2.75128455955691e-08, "loss": 0.6026, "step": 22795 }, { "epoch": 0.93, "learning_rate": 2.7362905117791268e-08, "loss": 0.6694, "step": 22800 } ], "logging_steps": 5, "max_steps": 24619, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 400, "total_flos": 3187222391169024.0, "trial_name": null, "trial_params": null }