{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8047349566481491, "eval_steps": 500, "global_step": 46500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.999999999999999e-06, "loss": 11.0832, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.1999999999999999e-05, "loss": 10.9995, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.7999999999999997e-05, "loss": 10.8287, "step": 15 }, { "epoch": 0.0, "learning_rate": 2.3999999999999997e-05, "loss": 10.5347, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.9999999999999997e-05, "loss": 10.1317, "step": 25 }, { "epoch": 0.0, "learning_rate": 3.5999999999999994e-05, "loss": 9.7568, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.2e-05, "loss": 9.4066, "step": 35 }, { "epoch": 0.0, "learning_rate": 4.7999999999999994e-05, "loss": 9.0828, "step": 40 }, { "epoch": 0.0, "learning_rate": 5.399999999999999e-05, "loss": 8.7974, "step": 45 }, { "epoch": 0.0, "learning_rate": 5.9999999999999995e-05, "loss": 8.4785, "step": 50 }, { "epoch": 0.0, "learning_rate": 6.599999999999999e-05, "loss": 8.2844, "step": 55 }, { "epoch": 0.0, "learning_rate": 7.199999999999999e-05, "loss": 8.1159, "step": 60 }, { "epoch": 0.0, "learning_rate": 7.8e-05, "loss": 8.04, "step": 65 }, { "epoch": 0.0, "learning_rate": 8.4e-05, "loss": 8.0479, "step": 70 }, { "epoch": 0.0, "learning_rate": 8.999999999999999e-05, "loss": 7.785, "step": 75 }, { "epoch": 0.0, "learning_rate": 9.599999999999999e-05, "loss": 7.8489, "step": 80 }, { "epoch": 0.0, "learning_rate": 0.000102, "loss": 7.7499, "step": 85 }, { "epoch": 0.0, "learning_rate": 0.00010799999999999998, "loss": 7.596, "step": 90 }, { "epoch": 0.0, "learning_rate": 0.00011399999999999999, "loss": 7.6188, "step": 95 }, { "epoch": 0.0, "learning_rate": 0.00011999999999999999, "loss": 7.5564, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.00012599999999999997, "loss": 7.5576, "step": 105 }, { "epoch": 0.0, "learning_rate": 0.00013199999999999998, "loss": 7.5225, "step": 110 }, { "epoch": 0.0, "learning_rate": 0.000138, "loss": 7.4858, "step": 115 }, { "epoch": 0.0, "learning_rate": 0.00014399999999999998, "loss": 7.2874, "step": 120 }, { "epoch": 0.0, "learning_rate": 0.00015, "loss": 7.3418, "step": 125 }, { "epoch": 0.0, "learning_rate": 0.000156, "loss": 7.2647, "step": 130 }, { "epoch": 0.0, "learning_rate": 0.000162, "loss": 7.1983, "step": 135 }, { "epoch": 0.0, "learning_rate": 0.000168, "loss": 7.0973, "step": 140 }, { "epoch": 0.0, "learning_rate": 0.00017399999999999997, "loss": 7.1693, "step": 145 }, { "epoch": 0.0, "learning_rate": 0.00017999999999999998, "loss": 7.1801, "step": 150 }, { "epoch": 0.0, "learning_rate": 0.000186, "loss": 7.0995, "step": 155 }, { "epoch": 0.0, "learning_rate": 0.00019199999999999998, "loss": 6.9925, "step": 160 }, { "epoch": 0.0, "learning_rate": 0.000198, "loss": 6.9763, "step": 165 }, { "epoch": 0.0, "learning_rate": 0.000204, "loss": 6.9261, "step": 170 }, { "epoch": 0.0, "learning_rate": 0.00020999999999999998, "loss": 6.9174, "step": 175 }, { "epoch": 0.0, "learning_rate": 0.00021599999999999996, "loss": 6.9063, "step": 180 }, { "epoch": 0.0, "learning_rate": 0.00022199999999999998, "loss": 6.8622, "step": 185 }, { "epoch": 0.0, "learning_rate": 0.00022799999999999999, "loss": 6.897, "step": 190 }, { "epoch": 0.0, "learning_rate": 0.000234, "loss": 6.8112, "step": 195 }, { "epoch": 0.0, "learning_rate": 0.00023999999999999998, "loss": 6.7515, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.00024599999999999996, "loss": 6.7586, "step": 205 }, { "epoch": 0.0, "learning_rate": 0.00025199999999999995, "loss": 6.5961, "step": 210 }, { "epoch": 0.0, "learning_rate": 0.000258, "loss": 6.6598, "step": 215 }, { "epoch": 0.0, "learning_rate": 0.00026399999999999997, "loss": 6.6071, "step": 220 }, { "epoch": 0.0, "learning_rate": 0.00027, "loss": 6.5918, "step": 225 }, { "epoch": 0.0, "learning_rate": 0.000276, "loss": 6.539, "step": 230 }, { "epoch": 0.0, "learning_rate": 0.00028199999999999997, "loss": 6.4539, "step": 235 }, { "epoch": 0.0, "learning_rate": 0.00028799999999999995, "loss": 6.4418, "step": 240 }, { "epoch": 0.0, "learning_rate": 0.000294, "loss": 6.5303, "step": 245 }, { "epoch": 0.0, "learning_rate": 0.0003, "loss": 6.4021, "step": 250 }, { "epoch": 0.0, "learning_rate": 0.00030599999999999996, "loss": 6.4768, "step": 255 }, { "epoch": 0.0, "learning_rate": 0.000312, "loss": 6.3929, "step": 260 }, { "epoch": 0.0, "learning_rate": 0.000318, "loss": 6.3891, "step": 265 }, { "epoch": 0.0, "learning_rate": 0.000324, "loss": 6.4381, "step": 270 }, { "epoch": 0.0, "learning_rate": 0.00033, "loss": 6.3544, "step": 275 }, { "epoch": 0.0, "learning_rate": 0.000336, "loss": 6.355, "step": 280 }, { "epoch": 0.0, "learning_rate": 0.00034199999999999996, "loss": 6.3141, "step": 285 }, { "epoch": 0.01, "learning_rate": 0.00034799999999999995, "loss": 6.2884, "step": 290 }, { "epoch": 0.01, "learning_rate": 0.00035399999999999993, "loss": 6.3167, "step": 295 }, { "epoch": 0.01, "learning_rate": 0.00035999999999999997, "loss": 6.2576, "step": 300 }, { "epoch": 0.01, "learning_rate": 0.00036599999999999995, "loss": 6.2405, "step": 305 }, { "epoch": 0.01, "learning_rate": 0.000372, "loss": 6.2546, "step": 310 }, { "epoch": 0.01, "learning_rate": 0.00037799999999999997, "loss": 6.1438, "step": 315 }, { "epoch": 0.01, "learning_rate": 0.00038399999999999996, "loss": 6.1532, "step": 320 }, { "epoch": 0.01, "learning_rate": 0.00039, "loss": 6.1473, "step": 325 }, { "epoch": 0.01, "learning_rate": 0.000396, "loss": 6.2154, "step": 330 }, { "epoch": 0.01, "learning_rate": 0.000402, "loss": 6.0882, "step": 335 }, { "epoch": 0.01, "learning_rate": 0.000408, "loss": 6.2324, "step": 340 }, { "epoch": 0.01, "learning_rate": 0.0004139999999999999, "loss": 6.2041, "step": 345 }, { "epoch": 0.01, "learning_rate": 0.00041999999999999996, "loss": 6.0757, "step": 350 }, { "epoch": 0.01, "learning_rate": 0.00042599999999999995, "loss": 6.0776, "step": 355 }, { "epoch": 0.01, "learning_rate": 0.00043199999999999993, "loss": 6.032, "step": 360 }, { "epoch": 0.01, "learning_rate": 0.00043799999999999997, "loss": 6.1352, "step": 365 }, { "epoch": 0.01, "learning_rate": 0.00044399999999999995, "loss": 5.9888, "step": 370 }, { "epoch": 0.01, "learning_rate": 0.00045, "loss": 6.0221, "step": 375 }, { "epoch": 0.01, "learning_rate": 0.00045599999999999997, "loss": 6.0419, "step": 380 }, { "epoch": 0.01, "learning_rate": 0.00046199999999999995, "loss": 5.9985, "step": 385 }, { "epoch": 0.01, "learning_rate": 0.000468, "loss": 5.8769, "step": 390 }, { "epoch": 0.01, "learning_rate": 0.000474, "loss": 5.9902, "step": 395 }, { "epoch": 0.01, "learning_rate": 0.00047999999999999996, "loss": 5.9365, "step": 400 }, { "epoch": 0.01, "learning_rate": 0.000486, "loss": 5.895, "step": 405 }, { "epoch": 0.01, "learning_rate": 0.0004919999999999999, "loss": 5.9953, "step": 410 }, { "epoch": 0.01, "learning_rate": 0.000498, "loss": 5.8948, "step": 415 }, { "epoch": 0.01, "learning_rate": 0.0005039999999999999, "loss": 5.9516, "step": 420 }, { "epoch": 0.01, "learning_rate": 0.0005099999999999999, "loss": 5.8119, "step": 425 }, { "epoch": 0.01, "learning_rate": 0.000516, "loss": 5.8849, "step": 430 }, { "epoch": 0.01, "learning_rate": 0.000522, "loss": 5.8657, "step": 435 }, { "epoch": 0.01, "learning_rate": 0.0005279999999999999, "loss": 5.7593, "step": 440 }, { "epoch": 0.01, "learning_rate": 0.000534, "loss": 5.8779, "step": 445 }, { "epoch": 0.01, "learning_rate": 0.00054, "loss": 5.8816, "step": 450 }, { "epoch": 0.01, "learning_rate": 0.0005459999999999999, "loss": 5.813, "step": 455 }, { "epoch": 0.01, "learning_rate": 0.000552, "loss": 5.8118, "step": 460 }, { "epoch": 0.01, "learning_rate": 0.000558, "loss": 5.7594, "step": 465 }, { "epoch": 0.01, "learning_rate": 0.0005639999999999999, "loss": 5.8195, "step": 470 }, { "epoch": 0.01, "learning_rate": 0.00057, "loss": 5.7352, "step": 475 }, { "epoch": 0.01, "learning_rate": 0.0005759999999999999, "loss": 5.6983, "step": 480 }, { "epoch": 0.01, "learning_rate": 0.0005819999999999999, "loss": 5.7638, "step": 485 }, { "epoch": 0.01, "learning_rate": 0.000588, "loss": 5.8061, "step": 490 }, { "epoch": 0.01, "learning_rate": 0.0005939999999999999, "loss": 5.8155, "step": 495 }, { "epoch": 0.01, "learning_rate": 0.0006, "loss": 5.667, "step": 500 }, { "epoch": 0.01, "eval_loss": 5.787242889404297, "eval_runtime": 151.0777, "eval_samples_per_second": 12.186, "eval_steps_per_second": 0.768, "step": 500 }, { "epoch": 0.01, "learning_rate": 0.0005999999887207687, "loss": 5.7718, "step": 505 }, { "epoch": 0.01, "learning_rate": 0.0005999999548830758, "loss": 5.673, "step": 510 }, { "epoch": 0.01, "learning_rate": 0.0005999998984869237, "loss": 5.6713, "step": 515 }, { "epoch": 0.01, "learning_rate": 0.0005999998195323167, "loss": 5.6457, "step": 520 }, { "epoch": 0.01, "learning_rate": 0.0005999997180192608, "loss": 5.7369, "step": 525 }, { "epoch": 0.01, "learning_rate": 0.0005999995939477636, "loss": 5.7537, "step": 530 }, { "epoch": 0.01, "learning_rate": 0.0005999994473178343, "loss": 5.721, "step": 535 }, { "epoch": 0.01, "learning_rate": 0.0005999992781294842, "loss": 5.6126, "step": 540 }, { "epoch": 0.01, "learning_rate": 0.0005999990863827257, "loss": 5.5628, "step": 545 }, { "epoch": 0.01, "learning_rate": 0.0005999988720775734, "loss": 5.5213, "step": 550 }, { "epoch": 0.01, "learning_rate": 0.0005999986352140434, "loss": 5.5386, "step": 555 }, { "epoch": 0.01, "learning_rate": 0.0005999983757921536, "loss": 5.6003, "step": 560 }, { "epoch": 0.01, "learning_rate": 0.0005999980938119233, "loss": 5.5606, "step": 565 }, { "epoch": 0.01, "learning_rate": 0.0005999977892733739, "loss": 5.6028, "step": 570 }, { "epoch": 0.01, "learning_rate": 0.0005999974621765282, "loss": 5.6032, "step": 575 }, { "epoch": 0.01, "learning_rate": 0.0005999971125214107, "loss": 5.5227, "step": 580 }, { "epoch": 0.01, "learning_rate": 0.0005999967403080478, "loss": 5.5631, "step": 585 }, { "epoch": 0.01, "learning_rate": 0.0005999963455364676, "loss": 5.5767, "step": 590 }, { "epoch": 0.01, "learning_rate": 0.0005999959282066996, "loss": 5.5166, "step": 595 }, { "epoch": 0.01, "learning_rate": 0.0005999954883187754, "loss": 5.5309, "step": 600 }, { "epoch": 0.01, "learning_rate": 0.0005999950258727278, "loss": 5.4728, "step": 605 }, { "epoch": 0.01, "learning_rate": 0.0005999945408685917, "loss": 5.4537, "step": 610 }, { "epoch": 0.01, "learning_rate": 0.0005999940333064035, "loss": 5.4461, "step": 615 }, { "epoch": 0.01, "learning_rate": 0.0005999935031862015, "loss": 5.4692, "step": 620 }, { "epoch": 0.01, "learning_rate": 0.0005999929505080256, "loss": 5.4129, "step": 625 }, { "epoch": 0.01, "learning_rate": 0.0005999923752719172, "loss": 5.4537, "step": 630 }, { "epoch": 0.01, "learning_rate": 0.0005999917774779196, "loss": 5.3996, "step": 635 }, { "epoch": 0.01, "learning_rate": 0.0005999911571260778, "loss": 5.4168, "step": 640 }, { "epoch": 0.01, "learning_rate": 0.0005999905142164384, "loss": 5.4616, "step": 645 }, { "epoch": 0.01, "learning_rate": 0.0005999898487490498, "loss": 5.4191, "step": 650 }, { "epoch": 0.01, "learning_rate": 0.0005999891607239619, "loss": 5.3498, "step": 655 }, { "epoch": 0.01, "learning_rate": 0.0005999884501412266, "loss": 5.5168, "step": 660 }, { "epoch": 0.01, "learning_rate": 0.0005999877170008972, "loss": 5.4529, "step": 665 }, { "epoch": 0.01, "learning_rate": 0.0005999869613030289, "loss": 5.4247, "step": 670 }, { "epoch": 0.01, "learning_rate": 0.0005999861830476786, "loss": 5.3704, "step": 675 }, { "epoch": 0.01, "learning_rate": 0.0005999853822349047, "loss": 5.314, "step": 680 }, { "epoch": 0.01, "learning_rate": 0.0005999845588647674, "loss": 5.263, "step": 685 }, { "epoch": 0.01, "learning_rate": 0.0005999837129373288, "loss": 5.3963, "step": 690 }, { "epoch": 0.01, "learning_rate": 0.0005999828444526523, "loss": 5.3916, "step": 695 }, { "epoch": 0.01, "learning_rate": 0.0005999819534108033, "loss": 5.3184, "step": 700 }, { "epoch": 0.01, "learning_rate": 0.0005999810398118487, "loss": 5.4009, "step": 705 }, { "epoch": 0.01, "learning_rate": 0.0005999801036558574, "loss": 5.3957, "step": 710 }, { "epoch": 0.01, "learning_rate": 0.0005999791449428996, "loss": 5.2775, "step": 715 }, { "epoch": 0.01, "learning_rate": 0.0005999781636730476, "loss": 5.2826, "step": 720 }, { "epoch": 0.01, "learning_rate": 0.0005999771598463749, "loss": 5.2916, "step": 725 }, { "epoch": 0.01, "learning_rate": 0.0005999761334629572, "loss": 5.3436, "step": 730 }, { "epoch": 0.01, "learning_rate": 0.0005999750845228717, "loss": 5.3674, "step": 735 }, { "epoch": 0.01, "learning_rate": 0.0005999740130261971, "loss": 5.2603, "step": 740 }, { "epoch": 0.01, "learning_rate": 0.0005999729189730139, "loss": 5.2142, "step": 745 }, { "epoch": 0.01, "learning_rate": 0.0005999718023634048, "loss": 5.3698, "step": 750 }, { "epoch": 0.01, "learning_rate": 0.0005999706631974534, "loss": 5.2052, "step": 755 }, { "epoch": 0.01, "learning_rate": 0.0005999695014752454, "loss": 5.2727, "step": 760 }, { "epoch": 0.01, "learning_rate": 0.0005999683171968683, "loss": 5.2406, "step": 765 }, { "epoch": 0.01, "learning_rate": 0.000599967110362411, "loss": 5.2282, "step": 770 }, { "epoch": 0.01, "learning_rate": 0.0005999658809719644, "loss": 5.1632, "step": 775 }, { "epoch": 0.01, "learning_rate": 0.0005999646290256208, "loss": 5.1701, "step": 780 }, { "epoch": 0.01, "learning_rate": 0.0005999633545234743, "loss": 5.1785, "step": 785 }, { "epoch": 0.01, "learning_rate": 0.0005999620574656209, "loss": 5.2017, "step": 790 }, { "epoch": 0.01, "learning_rate": 0.000599960737852158, "loss": 5.2983, "step": 795 }, { "epoch": 0.01, "learning_rate": 0.000599959395683185, "loss": 5.1982, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.0005999580309588026, "loss": 5.1416, "step": 805 }, { "epoch": 0.01, "learning_rate": 0.0005999566436791137, "loss": 5.1912, "step": 810 }, { "epoch": 0.01, "learning_rate": 0.0005999552338442223, "loss": 5.2655, "step": 815 }, { "epoch": 0.01, "learning_rate": 0.0005999538014542347, "loss": 5.188, "step": 820 }, { "epoch": 0.01, "learning_rate": 0.0005999523465092583, "loss": 5.1928, "step": 825 }, { "epoch": 0.01, "learning_rate": 0.0005999508690094028, "loss": 5.1108, "step": 830 }, { "epoch": 0.01, "learning_rate": 0.000599949368954779, "loss": 5.1586, "step": 835 }, { "epoch": 0.01, "learning_rate": 0.0005999478463455001, "loss": 5.1363, "step": 840 }, { "epoch": 0.01, "learning_rate": 0.0005999463011816801, "loss": 5.142, "step": 845 }, { "epoch": 0.01, "learning_rate": 0.0005999447334634358, "loss": 5.1496, "step": 850 }, { "epoch": 0.01, "learning_rate": 0.0005999431431908844, "loss": 5.052, "step": 855 }, { "epoch": 0.01, "learning_rate": 0.0005999415303641459, "loss": 5.0762, "step": 860 }, { "epoch": 0.01, "learning_rate": 0.0005999398949833416, "loss": 5.1514, "step": 865 }, { "epoch": 0.02, "learning_rate": 0.0005999382370485942, "loss": 5.1176, "step": 870 }, { "epoch": 0.02, "learning_rate": 0.0005999365565600288, "loss": 5.1016, "step": 875 }, { "epoch": 0.02, "learning_rate": 0.0005999348535177713, "loss": 5.1446, "step": 880 }, { "epoch": 0.02, "learning_rate": 0.00059993312792195, "loss": 4.9957, "step": 885 }, { "epoch": 0.02, "learning_rate": 0.0005999313797726947, "loss": 5.1482, "step": 890 }, { "epoch": 0.02, "learning_rate": 0.0005999296090701366, "loss": 5.0768, "step": 895 }, { "epoch": 0.02, "learning_rate": 0.0005999278158144091, "loss": 5.2004, "step": 900 }, { "epoch": 0.02, "learning_rate": 0.0005999260000056469, "loss": 5.0756, "step": 905 }, { "epoch": 0.02, "learning_rate": 0.0005999241616439867, "loss": 5.1679, "step": 910 }, { "epoch": 0.02, "learning_rate": 0.0005999223007295666, "loss": 5.0844, "step": 915 }, { "epoch": 0.02, "learning_rate": 0.0005999204172625265, "loss": 5.1273, "step": 920 }, { "epoch": 0.02, "learning_rate": 0.0005999185112430082, "loss": 5.0067, "step": 925 }, { "epoch": 0.02, "learning_rate": 0.0005999165826711547, "loss": 5.0033, "step": 930 }, { "epoch": 0.02, "learning_rate": 0.0005999146315471115, "loss": 5.0368, "step": 935 }, { "epoch": 0.02, "learning_rate": 0.0005999126578710249, "loss": 5.0261, "step": 940 }, { "epoch": 0.02, "learning_rate": 0.0005999106616430435, "loss": 5.0727, "step": 945 }, { "epoch": 0.02, "learning_rate": 0.0005999086428633173, "loss": 5.0198, "step": 950 }, { "epoch": 0.02, "learning_rate": 0.0005999066015319981, "loss": 5.0701, "step": 955 }, { "epoch": 0.02, "learning_rate": 0.0005999045376492396, "loss": 5.0427, "step": 960 }, { "epoch": 0.02, "learning_rate": 0.0005999024512151967, "loss": 5.1675, "step": 965 }, { "epoch": 0.02, "learning_rate": 0.0005999003422300267, "loss": 5.0997, "step": 970 }, { "epoch": 0.02, "learning_rate": 0.0005998982106938878, "loss": 5.1069, "step": 975 }, { "epoch": 0.02, "learning_rate": 0.0005998960566069404, "loss": 5.0461, "step": 980 }, { "epoch": 0.02, "learning_rate": 0.0005998938799693464, "loss": 5.0177, "step": 985 }, { "epoch": 0.02, "learning_rate": 0.0005998916807812696, "loss": 4.9937, "step": 990 }, { "epoch": 0.02, "learning_rate": 0.0005998894590428755, "loss": 5.0079, "step": 995 }, { "epoch": 0.02, "learning_rate": 0.0005998872147543309, "loss": 4.9904, "step": 1000 }, { "epoch": 0.02, "eval_loss": 5.063974857330322, "eval_runtime": 150.7726, "eval_samples_per_second": 12.21, "eval_steps_per_second": 0.769, "step": 1000 }, { "epoch": 0.02, "learning_rate": 0.0005998849479158045, "loss": 5.0673, "step": 1005 }, { "epoch": 0.02, "learning_rate": 0.0005998826585274671, "loss": 4.9611, "step": 1010 }, { "epoch": 0.02, "learning_rate": 0.0005998803465894905, "loss": 5.0122, "step": 1015 }, { "epoch": 0.02, "learning_rate": 0.0005998780121020487, "loss": 5.0227, "step": 1020 }, { "epoch": 0.02, "learning_rate": 0.0005998756550653174, "loss": 4.9941, "step": 1025 }, { "epoch": 0.02, "learning_rate": 0.0005998732754794736, "loss": 5.0426, "step": 1030 }, { "epoch": 0.02, "learning_rate": 0.0005998708733446962, "loss": 4.9343, "step": 1035 }, { "epoch": 0.02, "learning_rate": 0.000599868448661166, "loss": 5.0083, "step": 1040 }, { "epoch": 0.02, "learning_rate": 0.0005998660014290653, "loss": 5.0275, "step": 1045 }, { "epoch": 0.02, "learning_rate": 0.0005998635316485781, "loss": 5.0044, "step": 1050 }, { "epoch": 0.02, "learning_rate": 0.0005998610393198902, "loss": 5.0977, "step": 1055 }, { "epoch": 0.02, "learning_rate": 0.0005998585244431887, "loss": 4.9482, "step": 1060 }, { "epoch": 0.02, "learning_rate": 0.0005998559870186629, "loss": 5.007, "step": 1065 }, { "epoch": 0.02, "learning_rate": 0.0005998534270465038, "loss": 4.9826, "step": 1070 }, { "epoch": 0.02, "learning_rate": 0.0005998508445269035, "loss": 4.9924, "step": 1075 }, { "epoch": 0.02, "learning_rate": 0.0005998482394600566, "loss": 4.9346, "step": 1080 }, { "epoch": 0.02, "learning_rate": 0.0005998456118461587, "loss": 4.9776, "step": 1085 }, { "epoch": 0.02, "learning_rate": 0.0005998429616854074, "loss": 5.0336, "step": 1090 }, { "epoch": 0.02, "learning_rate": 0.0005998402889780021, "loss": 4.9965, "step": 1095 }, { "epoch": 0.02, "learning_rate": 0.0005998375937241438, "loss": 4.9999, "step": 1100 }, { "epoch": 0.02, "learning_rate": 0.0005998348759240351, "loss": 4.9897, "step": 1105 }, { "epoch": 0.02, "learning_rate": 0.0005998321355778803, "loss": 4.904, "step": 1110 }, { "epoch": 0.02, "learning_rate": 0.0005998293726858855, "loss": 5.0172, "step": 1115 }, { "epoch": 0.02, "learning_rate": 0.0005998265872482585, "loss": 4.9544, "step": 1120 }, { "epoch": 0.02, "learning_rate": 0.0005998237792652088, "loss": 4.9249, "step": 1125 }, { "epoch": 0.02, "learning_rate": 0.0005998209487369473, "loss": 5.0203, "step": 1130 }, { "epoch": 0.02, "learning_rate": 0.0005998180956636872, "loss": 4.9552, "step": 1135 }, { "epoch": 0.02, "learning_rate": 0.0005998152200456427, "loss": 4.9835, "step": 1140 }, { "epoch": 0.02, "learning_rate": 0.0005998123218830302, "loss": 4.9234, "step": 1145 }, { "epoch": 0.02, "learning_rate": 0.0005998094011760677, "loss": 4.9115, "step": 1150 }, { "epoch": 0.02, "learning_rate": 0.0005998064579249748, "loss": 4.914, "step": 1155 }, { "epoch": 0.02, "learning_rate": 0.0005998034921299726, "loss": 4.9211, "step": 1160 }, { "epoch": 0.02, "learning_rate": 0.0005998005037912842, "loss": 4.9112, "step": 1165 }, { "epoch": 0.02, "learning_rate": 0.0005997974929091345, "loss": 4.9139, "step": 1170 }, { "epoch": 0.02, "learning_rate": 0.0005997944594837497, "loss": 4.9096, "step": 1175 }, { "epoch": 0.02, "learning_rate": 0.0005997914035153581, "loss": 4.8065, "step": 1180 }, { "epoch": 0.02, "learning_rate": 0.0005997883250041892, "loss": 4.9562, "step": 1185 }, { "epoch": 0.02, "learning_rate": 0.0005997852239504747, "loss": 4.939, "step": 1190 }, { "epoch": 0.02, "learning_rate": 0.0005997821003544478, "loss": 4.8459, "step": 1195 }, { "epoch": 0.02, "learning_rate": 0.0005997789542163432, "loss": 4.8906, "step": 1200 }, { "epoch": 0.02, "learning_rate": 0.0005997757855363976, "loss": 4.8547, "step": 1205 }, { "epoch": 0.02, "learning_rate": 0.0005997725943148494, "loss": 4.9617, "step": 1210 }, { "epoch": 0.02, "learning_rate": 0.0005997693805519383, "loss": 4.8849, "step": 1215 }, { "epoch": 0.02, "learning_rate": 0.0005997661442479061, "loss": 4.8716, "step": 1220 }, { "epoch": 0.02, "learning_rate": 0.0005997628854029961, "loss": 4.83, "step": 1225 }, { "epoch": 0.02, "learning_rate": 0.0005997596040174534, "loss": 4.8464, "step": 1230 }, { "epoch": 0.02, "learning_rate": 0.0005997563000915248, "loss": 4.8273, "step": 1235 }, { "epoch": 0.02, "learning_rate": 0.0005997529736254585, "loss": 4.8679, "step": 1240 }, { "epoch": 0.02, "learning_rate": 0.0005997496246195049, "loss": 4.8066, "step": 1245 }, { "epoch": 0.02, "learning_rate": 0.0005997462530739157, "loss": 4.7826, "step": 1250 }, { "epoch": 0.02, "learning_rate": 0.0005997428589889444, "loss": 4.8587, "step": 1255 }, { "epoch": 0.02, "learning_rate": 0.0005997394423648463, "loss": 4.8974, "step": 1260 }, { "epoch": 0.02, "learning_rate": 0.0005997360032018783, "loss": 4.8758, "step": 1265 }, { "epoch": 0.02, "learning_rate": 0.000599732541500299, "loss": 4.8788, "step": 1270 }, { "epoch": 0.02, "learning_rate": 0.0005997290572603687, "loss": 4.8967, "step": 1275 }, { "epoch": 0.02, "learning_rate": 0.0005997255504823492, "loss": 4.8774, "step": 1280 }, { "epoch": 0.02, "learning_rate": 0.0005997220211665045, "loss": 4.8542, "step": 1285 }, { "epoch": 0.02, "learning_rate": 0.0005997184693130999, "loss": 4.7275, "step": 1290 }, { "epoch": 0.02, "learning_rate": 0.0005997148949224023, "loss": 4.807, "step": 1295 }, { "epoch": 0.02, "learning_rate": 0.0005997112979946809, "loss": 4.8433, "step": 1300 }, { "epoch": 0.02, "learning_rate": 0.0005997076785302057, "loss": 4.9526, "step": 1305 }, { "epoch": 0.02, "learning_rate": 0.000599704036529249, "loss": 4.8017, "step": 1310 }, { "epoch": 0.02, "learning_rate": 0.0005997003719920847, "loss": 4.8443, "step": 1315 }, { "epoch": 0.02, "learning_rate": 0.0005996966849189884, "loss": 4.823, "step": 1320 }, { "epoch": 0.02, "learning_rate": 0.0005996929753102374, "loss": 4.8656, "step": 1325 }, { "epoch": 0.02, "learning_rate": 0.0005996892431661104, "loss": 4.7528, "step": 1330 }, { "epoch": 0.02, "learning_rate": 0.0005996854884868884, "loss": 4.804, "step": 1335 }, { "epoch": 0.02, "learning_rate": 0.0005996817112728534, "loss": 4.8494, "step": 1340 }, { "epoch": 0.02, "learning_rate": 0.0005996779115242896, "loss": 4.8679, "step": 1345 }, { "epoch": 0.02, "learning_rate": 0.0005996740892414827, "loss": 4.7677, "step": 1350 }, { "epoch": 0.02, "learning_rate": 0.0005996702444247201, "loss": 4.7851, "step": 1355 }, { "epoch": 0.02, "learning_rate": 0.0005996663770742908, "loss": 4.8101, "step": 1360 }, { "epoch": 0.02, "learning_rate": 0.0005996624871904858, "loss": 4.8664, "step": 1365 }, { "epoch": 0.02, "learning_rate": 0.0005996585747735975, "loss": 4.8919, "step": 1370 }, { "epoch": 0.02, "learning_rate": 0.0005996546398239201, "loss": 4.7878, "step": 1375 }, { "epoch": 0.02, "learning_rate": 0.0005996506823417495, "loss": 4.8748, "step": 1380 }, { "epoch": 0.02, "learning_rate": 0.0005996467023273832, "loss": 4.8132, "step": 1385 }, { "epoch": 0.02, "learning_rate": 0.0005996426997811207, "loss": 4.8323, "step": 1390 }, { "epoch": 0.02, "learning_rate": 0.0005996386747032627, "loss": 4.7567, "step": 1395 }, { "epoch": 0.02, "learning_rate": 0.0005996346270941119, "loss": 4.7993, "step": 1400 }, { "epoch": 0.02, "learning_rate": 0.0005996305569539728, "loss": 4.7315, "step": 1405 }, { "epoch": 0.02, "learning_rate": 0.0005996264642831516, "loss": 4.8493, "step": 1410 }, { "epoch": 0.02, "learning_rate": 0.0005996223490819556, "loss": 4.7694, "step": 1415 }, { "epoch": 0.02, "learning_rate": 0.0005996182113506947, "loss": 4.7332, "step": 1420 }, { "epoch": 0.02, "learning_rate": 0.0005996140510896796, "loss": 4.8223, "step": 1425 }, { "epoch": 0.02, "learning_rate": 0.0005996098682992235, "loss": 4.7319, "step": 1430 }, { "epoch": 0.02, "learning_rate": 0.0005996056629796408, "loss": 4.8006, "step": 1435 }, { "epoch": 0.02, "learning_rate": 0.0005996014351312476, "loss": 4.844, "step": 1440 }, { "epoch": 0.03, "learning_rate": 0.0005995971847543621, "loss": 4.7525, "step": 1445 }, { "epoch": 0.03, "learning_rate": 0.0005995929118493034, "loss": 4.7649, "step": 1450 }, { "epoch": 0.03, "learning_rate": 0.0005995886164163934, "loss": 4.7703, "step": 1455 }, { "epoch": 0.03, "learning_rate": 0.0005995842984559547, "loss": 4.8856, "step": 1460 }, { "epoch": 0.03, "learning_rate": 0.0005995799579683121, "loss": 4.7844, "step": 1465 }, { "epoch": 0.03, "learning_rate": 0.0005995755949537919, "loss": 4.7843, "step": 1470 }, { "epoch": 0.03, "learning_rate": 0.0005995712094127224, "loss": 4.7452, "step": 1475 }, { "epoch": 0.03, "learning_rate": 0.0005995668013454332, "loss": 4.7533, "step": 1480 }, { "epoch": 0.03, "learning_rate": 0.0005995623707522558, "loss": 4.7856, "step": 1485 }, { "epoch": 0.03, "learning_rate": 0.0005995579176335233, "loss": 4.8798, "step": 1490 }, { "epoch": 0.03, "learning_rate": 0.0005995534419895707, "loss": 4.7446, "step": 1495 }, { "epoch": 0.03, "learning_rate": 0.0005995489438207344, "loss": 4.7522, "step": 1500 }, { "epoch": 0.03, "eval_loss": 4.799483776092529, "eval_runtime": 150.7804, "eval_samples_per_second": 12.21, "eval_steps_per_second": 0.769, "step": 1500 }, { "epoch": 0.03, "learning_rate": 0.0005995444231273526, "loss": 4.6847, "step": 1505 }, { "epoch": 0.03, "learning_rate": 0.0005995398799097654, "loss": 4.846, "step": 1510 }, { "epoch": 0.03, "learning_rate": 0.0005995353141683144, "loss": 4.7079, "step": 1515 }, { "epoch": 0.03, "learning_rate": 0.0005995307259033428, "loss": 4.7785, "step": 1520 }, { "epoch": 0.03, "learning_rate": 0.0005995261151151957, "loss": 4.8516, "step": 1525 }, { "epoch": 0.03, "learning_rate": 0.0005995214818042198, "loss": 4.7627, "step": 1530 }, { "epoch": 0.03, "learning_rate": 0.0005995168259707635, "loss": 4.7492, "step": 1535 }, { "epoch": 0.03, "learning_rate": 0.0005995121476151769, "loss": 4.767, "step": 1540 }, { "epoch": 0.03, "learning_rate": 0.0005995074467378117, "loss": 4.7328, "step": 1545 }, { "epoch": 0.03, "learning_rate": 0.0005995027233390215, "loss": 4.622, "step": 1550 }, { "epoch": 0.03, "learning_rate": 0.0005994979774191614, "loss": 4.7061, "step": 1555 }, { "epoch": 0.03, "learning_rate": 0.0005994932089785883, "loss": 4.7718, "step": 1560 }, { "epoch": 0.03, "learning_rate": 0.0005994884180176608, "loss": 4.8252, "step": 1565 }, { "epoch": 0.03, "learning_rate": 0.0005994836045367391, "loss": 4.7833, "step": 1570 }, { "epoch": 0.03, "learning_rate": 0.0005994787685361852, "loss": 4.7973, "step": 1575 }, { "epoch": 0.03, "learning_rate": 0.0005994739100163627, "loss": 4.8151, "step": 1580 }, { "epoch": 0.03, "learning_rate": 0.0005994690289776368, "loss": 4.6815, "step": 1585 }, { "epoch": 0.03, "learning_rate": 0.0005994641254203749, "loss": 4.6592, "step": 1590 }, { "epoch": 0.03, "learning_rate": 0.0005994591993449455, "loss": 4.6454, "step": 1595 }, { "epoch": 0.03, "learning_rate": 0.0005994542507517188, "loss": 4.7652, "step": 1600 }, { "epoch": 0.03, "learning_rate": 0.0005994492796410673, "loss": 4.6894, "step": 1605 }, { "epoch": 0.03, "learning_rate": 0.0005994442860133647, "loss": 4.7193, "step": 1610 }, { "epoch": 0.03, "learning_rate": 0.0005994392698689862, "loss": 4.6321, "step": 1615 }, { "epoch": 0.03, "learning_rate": 0.0005994342312083093, "loss": 4.7376, "step": 1620 }, { "epoch": 0.03, "learning_rate": 0.0005994291700317127, "loss": 4.7212, "step": 1625 }, { "epoch": 0.03, "learning_rate": 0.0005994240863395772, "loss": 4.6763, "step": 1630 }, { "epoch": 0.03, "learning_rate": 0.0005994189801322849, "loss": 4.6659, "step": 1635 }, { "epoch": 0.03, "learning_rate": 0.0005994138514102198, "loss": 4.6895, "step": 1640 }, { "epoch": 0.03, "learning_rate": 0.0005994087001737674, "loss": 4.7019, "step": 1645 }, { "epoch": 0.03, "learning_rate": 0.0005994035264233154, "loss": 4.6882, "step": 1650 }, { "epoch": 0.03, "learning_rate": 0.0005993983301592525, "loss": 4.724, "step": 1655 }, { "epoch": 0.03, "learning_rate": 0.0005993931113819697, "loss": 4.6239, "step": 1660 }, { "epoch": 0.03, "learning_rate": 0.0005993878700918592, "loss": 4.5808, "step": 1665 }, { "epoch": 0.03, "learning_rate": 0.0005993826062893152, "loss": 4.6235, "step": 1670 }, { "epoch": 0.03, "learning_rate": 0.0005993773199747335, "loss": 4.6813, "step": 1675 }, { "epoch": 0.03, "learning_rate": 0.0005993720111485115, "loss": 4.6841, "step": 1680 }, { "epoch": 0.03, "learning_rate": 0.0005993666798110488, "loss": 4.6773, "step": 1685 }, { "epoch": 0.03, "learning_rate": 0.0005993613259627458, "loss": 4.6088, "step": 1690 }, { "epoch": 0.03, "learning_rate": 0.0005993559496040054, "loss": 4.592, "step": 1695 }, { "epoch": 0.03, "learning_rate": 0.0005993505507352317, "loss": 4.7731, "step": 1700 }, { "epoch": 0.03, "learning_rate": 0.0005993451293568307, "loss": 4.7196, "step": 1705 }, { "epoch": 0.03, "learning_rate": 0.0005993396854692102, "loss": 4.6916, "step": 1710 }, { "epoch": 0.03, "learning_rate": 0.0005993342190727793, "loss": 4.6838, "step": 1715 }, { "epoch": 0.03, "learning_rate": 0.0005993287301679494, "loss": 4.6393, "step": 1720 }, { "epoch": 0.03, "learning_rate": 0.0005993232187551328, "loss": 4.705, "step": 1725 }, { "epoch": 0.03, "learning_rate": 0.0005993176848347444, "loss": 4.6213, "step": 1730 }, { "epoch": 0.03, "learning_rate": 0.0005993121284071999, "loss": 4.7283, "step": 1735 }, { "epoch": 0.03, "learning_rate": 0.0005993065494729173, "loss": 4.6904, "step": 1740 }, { "epoch": 0.03, "learning_rate": 0.0005993009480323162, "loss": 4.7068, "step": 1745 }, { "epoch": 0.03, "learning_rate": 0.0005992953240858177, "loss": 4.6864, "step": 1750 }, { "epoch": 0.03, "learning_rate": 0.0005992896776338446, "loss": 4.6971, "step": 1755 }, { "epoch": 0.03, "learning_rate": 0.0005992840086768216, "loss": 4.7385, "step": 1760 }, { "epoch": 0.03, "learning_rate": 0.000599278317215175, "loss": 4.6804, "step": 1765 }, { "epoch": 0.03, "learning_rate": 0.0005992726032493326, "loss": 4.7191, "step": 1770 }, { "epoch": 0.03, "learning_rate": 0.0005992668667797244, "loss": 4.5827, "step": 1775 }, { "epoch": 0.03, "learning_rate": 0.0005992611078067813, "loss": 4.7553, "step": 1780 }, { "epoch": 0.03, "learning_rate": 0.0005992553263309367, "loss": 4.6555, "step": 1785 }, { "epoch": 0.03, "learning_rate": 0.0005992495223526253, "loss": 4.5248, "step": 1790 }, { "epoch": 0.03, "learning_rate": 0.0005992436958722834, "loss": 4.4933, "step": 1795 }, { "epoch": 0.03, "learning_rate": 0.0005992378468903489, "loss": 4.622, "step": 1800 }, { "epoch": 0.03, "learning_rate": 0.0005992319754072622, "loss": 4.5995, "step": 1805 }, { "epoch": 0.03, "learning_rate": 0.0005992260814234644, "loss": 4.6473, "step": 1810 }, { "epoch": 0.03, "learning_rate": 0.0005992201649393989, "loss": 4.6518, "step": 1815 }, { "epoch": 0.03, "learning_rate": 0.0005992142259555103, "loss": 4.6183, "step": 1820 }, { "epoch": 0.03, "learning_rate": 0.0005992082644722454, "loss": 4.5708, "step": 1825 }, { "epoch": 0.03, "learning_rate": 0.0005992022804900525, "loss": 4.5672, "step": 1830 }, { "epoch": 0.03, "learning_rate": 0.0005991962740093813, "loss": 4.5723, "step": 1835 }, { "epoch": 0.03, "learning_rate": 0.0005991902450306839, "loss": 4.574, "step": 1840 }, { "epoch": 0.03, "learning_rate": 0.0005991841935544133, "loss": 4.6351, "step": 1845 }, { "epoch": 0.03, "learning_rate": 0.0005991781195810246, "loss": 4.6235, "step": 1850 }, { "epoch": 0.03, "learning_rate": 0.0005991720231109745, "loss": 4.6356, "step": 1855 }, { "epoch": 0.03, "learning_rate": 0.0005991659041447217, "loss": 4.6934, "step": 1860 }, { "epoch": 0.03, "learning_rate": 0.000599159762682726, "loss": 4.6972, "step": 1865 }, { "epoch": 0.03, "learning_rate": 0.0005991535987254494, "loss": 4.5976, "step": 1870 }, { "epoch": 0.03, "learning_rate": 0.0005991474122733552, "loss": 4.5597, "step": 1875 }, { "epoch": 0.03, "learning_rate": 0.0005991412033269087, "loss": 4.552, "step": 1880 }, { "epoch": 0.03, "learning_rate": 0.0005991349718865768, "loss": 4.5887, "step": 1885 }, { "epoch": 0.03, "learning_rate": 0.000599128717952828, "loss": 4.6184, "step": 1890 }, { "epoch": 0.03, "learning_rate": 0.0005991224415261328, "loss": 4.6031, "step": 1895 }, { "epoch": 0.03, "learning_rate": 0.0005991161426069629, "loss": 4.6395, "step": 1900 }, { "epoch": 0.03, "learning_rate": 0.000599109821195792, "loss": 4.6755, "step": 1905 }, { "epoch": 0.03, "learning_rate": 0.0005991034772930955, "loss": 4.5333, "step": 1910 }, { "epoch": 0.03, "learning_rate": 0.0005990971108993503, "loss": 4.5863, "step": 1915 }, { "epoch": 0.03, "learning_rate": 0.0005990907220150353, "loss": 4.5263, "step": 1920 }, { "epoch": 0.03, "learning_rate": 0.0005990843106406308, "loss": 4.5766, "step": 1925 }, { "epoch": 0.03, "learning_rate": 0.0005990778767766188, "loss": 4.6123, "step": 1930 }, { "epoch": 0.03, "learning_rate": 0.0005990714204234833, "loss": 4.5906, "step": 1935 }, { "epoch": 0.03, "learning_rate": 0.0005990649415817097, "loss": 4.592, "step": 1940 }, { "epoch": 0.03, "learning_rate": 0.0005990584402517853, "loss": 4.5306, "step": 1945 }, { "epoch": 0.03, "learning_rate": 0.0005990519164341987, "loss": 4.538, "step": 1950 }, { "epoch": 0.03, "learning_rate": 0.0005990453701294406, "loss": 4.7278, "step": 1955 }, { "epoch": 0.03, "learning_rate": 0.0005990388013380033, "loss": 4.5994, "step": 1960 }, { "epoch": 0.03, "learning_rate": 0.0005990322100603806, "loss": 4.6766, "step": 1965 }, { "epoch": 0.03, "learning_rate": 0.0005990255962970684, "loss": 4.5969, "step": 1970 }, { "epoch": 0.03, "learning_rate": 0.0005990189600485637, "loss": 4.5924, "step": 1975 }, { "epoch": 0.03, "learning_rate": 0.0005990123013153657, "loss": 4.6688, "step": 1980 }, { "epoch": 0.03, "learning_rate": 0.000599005620097975, "loss": 4.6841, "step": 1985 }, { "epoch": 0.03, "learning_rate": 0.0005989989163968941, "loss": 4.6268, "step": 1990 }, { "epoch": 0.03, "learning_rate": 0.000598992190212627, "loss": 4.6271, "step": 1995 }, { "epoch": 0.03, "learning_rate": 0.0005989854415456796, "loss": 4.5934, "step": 2000 }, { "epoch": 0.03, "eval_loss": 4.622138977050781, "eval_runtime": 150.27, "eval_samples_per_second": 12.251, "eval_steps_per_second": 0.772, "step": 2000 }, { "epoch": 0.03, "learning_rate": 0.0005989786703965592, "loss": 4.6169, "step": 2005 }, { "epoch": 0.03, "learning_rate": 0.000598971876765775, "loss": 4.5989, "step": 2010 }, { "epoch": 0.03, "learning_rate": 0.0005989650606538379, "loss": 4.5947, "step": 2015 }, { "epoch": 0.03, "learning_rate": 0.0005989582220612605, "loss": 4.6156, "step": 2020 }, { "epoch": 0.04, "learning_rate": 0.0005989513609885568, "loss": 4.6182, "step": 2025 }, { "epoch": 0.04, "learning_rate": 0.0005989444774362429, "loss": 4.6214, "step": 2030 }, { "epoch": 0.04, "learning_rate": 0.0005989375714048363, "loss": 4.6239, "step": 2035 }, { "epoch": 0.04, "learning_rate": 0.0005989306428948565, "loss": 4.5639, "step": 2040 }, { "epoch": 0.04, "learning_rate": 0.0005989236919068243, "loss": 4.5897, "step": 2045 }, { "epoch": 0.04, "learning_rate": 0.0005989167184412624, "loss": 4.6397, "step": 2050 }, { "epoch": 0.04, "learning_rate": 0.0005989097224986952, "loss": 4.6781, "step": 2055 }, { "epoch": 0.04, "learning_rate": 0.0005989027040796488, "loss": 4.6692, "step": 2060 }, { "epoch": 0.04, "learning_rate": 0.0005988956631846509, "loss": 4.5066, "step": 2065 }, { "epoch": 0.04, "learning_rate": 0.0005988885998142308, "loss": 4.5364, "step": 2070 }, { "epoch": 0.04, "learning_rate": 0.0005988815139689199, "loss": 4.5657, "step": 2075 }, { "epoch": 0.04, "learning_rate": 0.0005988744056492508, "loss": 4.4681, "step": 2080 }, { "epoch": 0.04, "learning_rate": 0.0005988672748557583, "loss": 4.5448, "step": 2085 }, { "epoch": 0.04, "learning_rate": 0.0005988601215889782, "loss": 4.5786, "step": 2090 }, { "epoch": 0.04, "learning_rate": 0.0005988529458494488, "loss": 4.5426, "step": 2095 }, { "epoch": 0.04, "learning_rate": 0.0005988457476377092, "loss": 4.5494, "step": 2100 }, { "epoch": 0.04, "learning_rate": 0.0005988385269543011, "loss": 4.5845, "step": 2105 }, { "epoch": 0.04, "learning_rate": 0.0005988312837997673, "loss": 4.5108, "step": 2110 }, { "epoch": 0.04, "learning_rate": 0.0005988240181746524, "loss": 4.5646, "step": 2115 }, { "epoch": 0.04, "learning_rate": 0.0005988167300795029, "loss": 4.5582, "step": 2120 }, { "epoch": 0.04, "learning_rate": 0.0005988094195148666, "loss": 4.5773, "step": 2125 }, { "epoch": 0.04, "learning_rate": 0.0005988020864812933, "loss": 4.5119, "step": 2130 }, { "epoch": 0.04, "learning_rate": 0.0005987947309793344, "loss": 4.4035, "step": 2135 }, { "epoch": 0.04, "learning_rate": 0.0005987873530095432, "loss": 4.4899, "step": 2140 }, { "epoch": 0.04, "learning_rate": 0.0005987799525724742, "loss": 4.56, "step": 2145 }, { "epoch": 0.04, "learning_rate": 0.000598772529668684, "loss": 4.4948, "step": 2150 }, { "epoch": 0.04, "learning_rate": 0.0005987650842987307, "loss": 4.4912, "step": 2155 }, { "epoch": 0.04, "learning_rate": 0.0005987576164631743, "loss": 4.6292, "step": 2160 }, { "epoch": 0.04, "learning_rate": 0.0005987501261625762, "loss": 4.563, "step": 2165 }, { "epoch": 0.04, "learning_rate": 0.0005987426133974997, "loss": 4.5965, "step": 2170 }, { "epoch": 0.04, "learning_rate": 0.0005987350781685097, "loss": 4.546, "step": 2175 }, { "epoch": 0.04, "learning_rate": 0.0005987275204761729, "loss": 4.6035, "step": 2180 }, { "epoch": 0.04, "learning_rate": 0.0005987199403210574, "loss": 4.6014, "step": 2185 }, { "epoch": 0.04, "learning_rate": 0.0005987123377037333, "loss": 4.5493, "step": 2190 }, { "epoch": 0.04, "learning_rate": 0.0005987047126247723, "loss": 4.5345, "step": 2195 }, { "epoch": 0.04, "learning_rate": 0.0005986970650847478, "loss": 4.5793, "step": 2200 }, { "epoch": 0.04, "learning_rate": 0.0005986893950842347, "loss": 4.5242, "step": 2205 }, { "epoch": 0.04, "learning_rate": 0.0005986817026238099, "loss": 4.5865, "step": 2210 }, { "epoch": 0.04, "learning_rate": 0.0005986739877040518, "loss": 4.5695, "step": 2215 }, { "epoch": 0.04, "learning_rate": 0.0005986662503255405, "loss": 4.5274, "step": 2220 }, { "epoch": 0.04, "learning_rate": 0.0005986584904888579, "loss": 4.5146, "step": 2225 }, { "epoch": 0.04, "learning_rate": 0.0005986507081945872, "loss": 4.4585, "step": 2230 }, { "epoch": 0.04, "learning_rate": 0.000598642903443314, "loss": 4.5071, "step": 2235 }, { "epoch": 0.04, "learning_rate": 0.0005986350762356249, "loss": 4.6897, "step": 2240 }, { "epoch": 0.04, "learning_rate": 0.0005986272265721086, "loss": 4.5492, "step": 2245 }, { "epoch": 0.04, "learning_rate": 0.0005986193544533553, "loss": 4.4015, "step": 2250 }, { "epoch": 0.04, "learning_rate": 0.000598611459879957, "loss": 4.5966, "step": 2255 }, { "epoch": 0.04, "learning_rate": 0.000598603542852507, "loss": 4.5583, "step": 2260 }, { "epoch": 0.04, "learning_rate": 0.0005985956033716011, "loss": 4.4743, "step": 2265 }, { "epoch": 0.04, "learning_rate": 0.0005985876414378361, "loss": 4.557, "step": 2270 }, { "epoch": 0.04, "learning_rate": 0.0005985796570518108, "loss": 4.5134, "step": 2275 }, { "epoch": 0.04, "learning_rate": 0.0005985716502141254, "loss": 4.5188, "step": 2280 }, { "epoch": 0.04, "learning_rate": 0.0005985636209253821, "loss": 4.5209, "step": 2285 }, { "epoch": 0.04, "learning_rate": 0.0005985555691861845, "loss": 4.4911, "step": 2290 }, { "epoch": 0.04, "learning_rate": 0.0005985474949971383, "loss": 4.5194, "step": 2295 }, { "epoch": 0.04, "learning_rate": 0.0005985393983588503, "loss": 4.5059, "step": 2300 }, { "epoch": 0.04, "learning_rate": 0.0005985312792719297, "loss": 4.4679, "step": 2305 }, { "epoch": 0.04, "learning_rate": 0.0005985231377369869, "loss": 4.5583, "step": 2310 }, { "epoch": 0.04, "learning_rate": 0.000598514973754634, "loss": 4.5782, "step": 2315 }, { "epoch": 0.04, "learning_rate": 0.0005985067873254848, "loss": 4.5104, "step": 2320 }, { "epoch": 0.04, "learning_rate": 0.0005984985784501551, "loss": 4.5072, "step": 2325 }, { "epoch": 0.04, "learning_rate": 0.0005984903471292621, "loss": 4.435, "step": 2330 }, { "epoch": 0.04, "learning_rate": 0.0005984820933634247, "loss": 4.4558, "step": 2335 }, { "epoch": 0.04, "learning_rate": 0.0005984738171532636, "loss": 4.5055, "step": 2340 }, { "epoch": 0.04, "learning_rate": 0.0005984655184994011, "loss": 4.521, "step": 2345 }, { "epoch": 0.04, "learning_rate": 0.0005984571974024614, "loss": 4.4612, "step": 2350 }, { "epoch": 0.04, "learning_rate": 0.0005984488538630698, "loss": 4.4435, "step": 2355 }, { "epoch": 0.04, "learning_rate": 0.0005984404878818539, "loss": 4.5881, "step": 2360 }, { "epoch": 0.04, "learning_rate": 0.0005984320994594429, "loss": 4.5542, "step": 2365 }, { "epoch": 0.04, "learning_rate": 0.0005984236885964674, "loss": 4.3867, "step": 2370 }, { "epoch": 0.04, "learning_rate": 0.00059841525529356, "loss": 4.5786, "step": 2375 }, { "epoch": 0.04, "learning_rate": 0.0005984067995513547, "loss": 4.5679, "step": 2380 }, { "epoch": 0.04, "learning_rate": 0.0005983983213704874, "loss": 4.4752, "step": 2385 }, { "epoch": 0.04, "learning_rate": 0.0005983898207515957, "loss": 4.4446, "step": 2390 }, { "epoch": 0.04, "learning_rate": 0.0005983812976953185, "loss": 4.4343, "step": 2395 }, { "epoch": 0.04, "learning_rate": 0.000598372752202297, "loss": 4.3717, "step": 2400 }, { "epoch": 0.04, "learning_rate": 0.0005983641842731736, "loss": 4.4689, "step": 2405 }, { "epoch": 0.04, "learning_rate": 0.0005983555939085927, "loss": 4.4949, "step": 2410 }, { "epoch": 0.04, "learning_rate": 0.0005983469811092002, "loss": 4.5136, "step": 2415 }, { "epoch": 0.04, "learning_rate": 0.0005983383458756437, "loss": 4.4673, "step": 2420 }, { "epoch": 0.04, "learning_rate": 0.0005983296882085726, "loss": 4.521, "step": 2425 }, { "epoch": 0.04, "learning_rate": 0.0005983210081086378, "loss": 4.4922, "step": 2430 }, { "epoch": 0.04, "learning_rate": 0.0005983123055764921, "loss": 4.5239, "step": 2435 }, { "epoch": 0.04, "learning_rate": 0.0005983035806127897, "loss": 4.4424, "step": 2440 }, { "epoch": 0.04, "learning_rate": 0.0005982948332181871, "loss": 4.4227, "step": 2445 }, { "epoch": 0.04, "learning_rate": 0.0005982860633933415, "loss": 4.5284, "step": 2450 }, { "epoch": 0.04, "learning_rate": 0.0005982772711389128, "loss": 4.5241, "step": 2455 }, { "epoch": 0.04, "learning_rate": 0.0005982684564555619, "loss": 4.4501, "step": 2460 }, { "epoch": 0.04, "learning_rate": 0.0005982596193439517, "loss": 4.4509, "step": 2465 }, { "epoch": 0.04, "learning_rate": 0.0005982507598047466, "loss": 4.4052, "step": 2470 }, { "epoch": 0.04, "learning_rate": 0.000598241877838613, "loss": 4.4956, "step": 2475 }, { "epoch": 0.04, "learning_rate": 0.0005982329734462186, "loss": 4.3242, "step": 2480 }, { "epoch": 0.04, "learning_rate": 0.0005982240466282329, "loss": 4.392, "step": 2485 }, { "epoch": 0.04, "learning_rate": 0.0005982150973853274, "loss": 4.5077, "step": 2490 }, { "epoch": 0.04, "learning_rate": 0.0005982061257181749, "loss": 4.4919, "step": 2495 }, { "epoch": 0.04, "learning_rate": 0.00059819713162745, "loss": 4.5178, "step": 2500 }, { "epoch": 0.04, "eval_loss": 4.496661186218262, "eval_runtime": 150.774, "eval_samples_per_second": 12.21, "eval_steps_per_second": 0.769, "step": 2500 }, { "epoch": 0.04, "learning_rate": 0.0005981881151138291, "loss": 4.5157, "step": 2505 }, { "epoch": 0.04, "learning_rate": 0.00059817907617799, "loss": 4.4359, "step": 2510 }, { "epoch": 0.04, "learning_rate": 0.0005981700148206126, "loss": 4.4987, "step": 2515 }, { "epoch": 0.04, "learning_rate": 0.0005981609310423781, "loss": 4.3829, "step": 2520 }, { "epoch": 0.04, "learning_rate": 0.0005981518248439697, "loss": 4.4533, "step": 2525 }, { "epoch": 0.04, "learning_rate": 0.0005981426962260722, "loss": 4.4787, "step": 2530 }, { "epoch": 0.04, "learning_rate": 0.0005981335451893718, "loss": 4.4843, "step": 2535 }, { "epoch": 0.04, "learning_rate": 0.0005981243717345567, "loss": 4.4322, "step": 2540 }, { "epoch": 0.04, "learning_rate": 0.0005981151758623167, "loss": 4.504, "step": 2545 }, { "epoch": 0.04, "learning_rate": 0.0005981059575733432, "loss": 4.4743, "step": 2550 }, { "epoch": 0.04, "learning_rate": 0.0005980967168683297, "loss": 4.5483, "step": 2555 }, { "epoch": 0.04, "learning_rate": 0.0005980874537479706, "loss": 4.4412, "step": 2560 }, { "epoch": 0.04, "learning_rate": 0.0005980781682129627, "loss": 4.4774, "step": 2565 }, { "epoch": 0.04, "learning_rate": 0.0005980688602640042, "loss": 4.3842, "step": 2570 }, { "epoch": 0.04, "learning_rate": 0.000598059529901795, "loss": 4.453, "step": 2575 }, { "epoch": 0.04, "learning_rate": 0.0005980501771270366, "loss": 4.3691, "step": 2580 }, { "epoch": 0.04, "learning_rate": 0.0005980408019404324, "loss": 4.4649, "step": 2585 }, { "epoch": 0.04, "learning_rate": 0.0005980314043426874, "loss": 4.4571, "step": 2590 }, { "epoch": 0.04, "learning_rate": 0.0005980219843345082, "loss": 4.2869, "step": 2595 }, { "epoch": 0.04, "learning_rate": 0.000598012541916603, "loss": 4.442, "step": 2600 }, { "epoch": 0.05, "learning_rate": 0.000598003077089682, "loss": 4.4842, "step": 2605 }, { "epoch": 0.05, "learning_rate": 0.0005979935898544568, "loss": 4.4487, "step": 2610 }, { "epoch": 0.05, "learning_rate": 0.0005979840802116409, "loss": 4.4768, "step": 2615 }, { "epoch": 0.05, "learning_rate": 0.0005979745481619493, "loss": 4.4794, "step": 2620 }, { "epoch": 0.05, "learning_rate": 0.0005979649937060988, "loss": 4.4843, "step": 2625 }, { "epoch": 0.05, "learning_rate": 0.0005979554168448078, "loss": 4.4868, "step": 2630 }, { "epoch": 0.05, "learning_rate": 0.0005979458175787963, "loss": 4.52, "step": 2635 }, { "epoch": 0.05, "learning_rate": 0.0005979361959087865, "loss": 4.5309, "step": 2640 }, { "epoch": 0.05, "learning_rate": 0.0005979265518355015, "loss": 4.4523, "step": 2645 }, { "epoch": 0.05, "learning_rate": 0.0005979168853596668, "loss": 4.4153, "step": 2650 }, { "epoch": 0.05, "learning_rate": 0.000597907196482009, "loss": 4.3197, "step": 2655 }, { "epoch": 0.05, "learning_rate": 0.0005978974852032568, "loss": 4.4264, "step": 2660 }, { "epoch": 0.05, "learning_rate": 0.0005978877515241404, "loss": 4.4645, "step": 2665 }, { "epoch": 0.05, "learning_rate": 0.0005978779954453917, "loss": 4.4296, "step": 2670 }, { "epoch": 0.05, "learning_rate": 0.0005978682169677444, "loss": 4.4271, "step": 2675 }, { "epoch": 0.05, "learning_rate": 0.0005978584160919339, "loss": 4.4266, "step": 2680 }, { "epoch": 0.05, "learning_rate": 0.0005978485928186968, "loss": 4.4608, "step": 2685 }, { "epoch": 0.05, "learning_rate": 0.000597838747148772, "loss": 4.455, "step": 2690 }, { "epoch": 0.05, "learning_rate": 0.0005978288790828998, "loss": 4.4432, "step": 2695 }, { "epoch": 0.05, "learning_rate": 0.0005978189886218223, "loss": 4.4275, "step": 2700 }, { "epoch": 0.05, "learning_rate": 0.0005978090757662831, "loss": 4.4242, "step": 2705 }, { "epoch": 0.05, "learning_rate": 0.0005977991405170277, "loss": 4.4197, "step": 2710 }, { "epoch": 0.05, "learning_rate": 0.0005977891828748031, "loss": 4.3557, "step": 2715 }, { "epoch": 0.05, "learning_rate": 0.0005977792028403581, "loss": 4.4138, "step": 2720 }, { "epoch": 0.05, "learning_rate": 0.0005977692004144431, "loss": 4.4321, "step": 2725 }, { "epoch": 0.05, "learning_rate": 0.0005977591755978104, "loss": 4.3348, "step": 2730 }, { "epoch": 0.05, "learning_rate": 0.0005977491283912135, "loss": 4.3667, "step": 2735 }, { "epoch": 0.05, "learning_rate": 0.0005977390587954082, "loss": 4.5263, "step": 2740 }, { "epoch": 0.05, "learning_rate": 0.0005977289668111515, "loss": 4.4123, "step": 2745 }, { "epoch": 0.05, "learning_rate": 0.0005977188524392025, "loss": 4.4297, "step": 2750 }, { "epoch": 0.05, "learning_rate": 0.0005977087156803214, "loss": 4.4391, "step": 2755 }, { "epoch": 0.05, "learning_rate": 0.0005976985565352707, "loss": 4.4199, "step": 2760 }, { "epoch": 0.05, "learning_rate": 0.0005976883750048142, "loss": 4.4723, "step": 2765 }, { "epoch": 0.05, "learning_rate": 0.0005976781710897175, "loss": 4.3106, "step": 2770 }, { "epoch": 0.05, "learning_rate": 0.000597667944790748, "loss": 4.4756, "step": 2775 }, { "epoch": 0.05, "learning_rate": 0.0005976576961086745, "loss": 4.4701, "step": 2780 }, { "epoch": 0.05, "learning_rate": 0.0005976474250442678, "loss": 4.316, "step": 2785 }, { "epoch": 0.05, "learning_rate": 0.0005976371315983, "loss": 4.3714, "step": 2790 }, { "epoch": 0.05, "learning_rate": 0.0005976268157715453, "loss": 4.2413, "step": 2795 }, { "epoch": 0.05, "learning_rate": 0.0005976164775647795, "loss": 4.4715, "step": 2800 }, { "epoch": 0.05, "learning_rate": 0.0005976061169787798, "loss": 4.4679, "step": 2805 }, { "epoch": 0.05, "learning_rate": 0.0005975957340143254, "loss": 4.3745, "step": 2810 }, { "epoch": 0.05, "learning_rate": 0.0005975853286721968, "loss": 4.348, "step": 2815 }, { "epoch": 0.05, "learning_rate": 0.0005975749009531767, "loss": 4.4823, "step": 2820 }, { "epoch": 0.05, "learning_rate": 0.0005975644508580489, "loss": 4.3, "step": 2825 }, { "epoch": 0.05, "learning_rate": 0.0005975539783875995, "loss": 4.3079, "step": 2830 }, { "epoch": 0.05, "learning_rate": 0.000597543483542616, "loss": 4.4583, "step": 2835 }, { "epoch": 0.05, "learning_rate": 0.0005975329663238872, "loss": 4.3141, "step": 2840 }, { "epoch": 0.05, "learning_rate": 0.0005975224267322043, "loss": 4.4546, "step": 2845 }, { "epoch": 0.05, "learning_rate": 0.0005975118647683597, "loss": 4.4115, "step": 2850 }, { "epoch": 0.05, "learning_rate": 0.0005975012804331476, "loss": 4.3319, "step": 2855 }, { "epoch": 0.05, "learning_rate": 0.0005974906737273638, "loss": 4.3583, "step": 2860 }, { "epoch": 0.05, "learning_rate": 0.000597480044651806, "loss": 4.3746, "step": 2865 }, { "epoch": 0.05, "learning_rate": 0.0005974693932072734, "loss": 4.3578, "step": 2870 }, { "epoch": 0.05, "learning_rate": 0.000597458719394567, "loss": 4.4066, "step": 2875 }, { "epoch": 0.05, "learning_rate": 0.0005974480232144891, "loss": 4.3589, "step": 2880 }, { "epoch": 0.05, "learning_rate": 0.0005974373046678445, "loss": 4.4172, "step": 2885 }, { "epoch": 0.05, "learning_rate": 0.0005974265637554389, "loss": 4.4344, "step": 2890 }, { "epoch": 0.05, "learning_rate": 0.0005974158004780799, "loss": 4.3983, "step": 2895 }, { "epoch": 0.05, "learning_rate": 0.000597405014836577, "loss": 4.3429, "step": 2900 }, { "epoch": 0.05, "learning_rate": 0.0005973942068317411, "loss": 4.3561, "step": 2905 }, { "epoch": 0.05, "learning_rate": 0.0005973833764643851, "loss": 4.3462, "step": 2910 }, { "epoch": 0.05, "learning_rate": 0.0005973725237353231, "loss": 4.3672, "step": 2915 }, { "epoch": 0.05, "learning_rate": 0.0005973616486453715, "loss": 4.4243, "step": 2920 }, { "epoch": 0.05, "learning_rate": 0.0005973507511953478, "loss": 4.4428, "step": 2925 }, { "epoch": 0.05, "learning_rate": 0.0005973398313860714, "loss": 4.3471, "step": 2930 }, { "epoch": 0.05, "learning_rate": 0.0005973288892183636, "loss": 4.2282, "step": 2935 }, { "epoch": 0.05, "learning_rate": 0.0005973179246930472, "loss": 4.3884, "step": 2940 }, { "epoch": 0.05, "learning_rate": 0.0005973069378109466, "loss": 4.3978, "step": 2945 }, { "epoch": 0.05, "learning_rate": 0.0005972959285728877, "loss": 4.4103, "step": 2950 }, { "epoch": 0.05, "learning_rate": 0.0005972848969796988, "loss": 4.3988, "step": 2955 }, { "epoch": 0.05, "learning_rate": 0.0005972738430322092, "loss": 4.4573, "step": 2960 }, { "epoch": 0.05, "learning_rate": 0.0005972627667312501, "loss": 4.3979, "step": 2965 }, { "epoch": 0.05, "learning_rate": 0.0005972516680776543, "loss": 4.3337, "step": 2970 }, { "epoch": 0.05, "learning_rate": 0.0005972405470722565, "loss": 4.4286, "step": 2975 }, { "epoch": 0.05, "learning_rate": 0.000597229403715893, "loss": 4.3805, "step": 2980 }, { "epoch": 0.05, "learning_rate": 0.0005972182380094015, "loss": 4.3886, "step": 2985 }, { "epoch": 0.05, "learning_rate": 0.0005972070499536217, "loss": 4.3983, "step": 2990 }, { "epoch": 0.05, "learning_rate": 0.000597195839549395, "loss": 4.4782, "step": 2995 }, { "epoch": 0.05, "learning_rate": 0.0005971846067975642, "loss": 4.3503, "step": 3000 }, { "epoch": 0.05, "eval_loss": 4.413268089294434, "eval_runtime": 150.2737, "eval_samples_per_second": 12.251, "eval_steps_per_second": 0.772, "step": 3000 }, { "epoch": 0.05, "learning_rate": 0.000597173351698974, "loss": 4.4155, "step": 3005 }, { "epoch": 0.05, "learning_rate": 0.0005971620742544709, "loss": 4.4261, "step": 3010 }, { "epoch": 0.05, "learning_rate": 0.0005971507744649025, "loss": 4.348, "step": 3015 }, { "epoch": 0.05, "learning_rate": 0.000597139452331119, "loss": 4.4353, "step": 3020 }, { "epoch": 0.05, "learning_rate": 0.0005971281078539712, "loss": 4.3666, "step": 3025 }, { "epoch": 0.05, "learning_rate": 0.0005971167410343127, "loss": 4.4247, "step": 3030 }, { "epoch": 0.05, "learning_rate": 0.0005971053518729979, "loss": 4.4093, "step": 3035 }, { "epoch": 0.05, "learning_rate": 0.0005970939403708832, "loss": 4.4617, "step": 3040 }, { "epoch": 0.05, "learning_rate": 0.0005970825065288269, "loss": 4.3769, "step": 3045 }, { "epoch": 0.05, "learning_rate": 0.0005970710503476885, "loss": 4.3765, "step": 3050 }, { "epoch": 0.05, "learning_rate": 0.0005970595718283296, "loss": 4.4443, "step": 3055 }, { "epoch": 0.05, "learning_rate": 0.0005970480709716134, "loss": 4.4562, "step": 3060 }, { "epoch": 0.05, "learning_rate": 0.0005970365477784046, "loss": 4.4314, "step": 3065 }, { "epoch": 0.05, "learning_rate": 0.0005970250022495696, "loss": 4.3781, "step": 3070 }, { "epoch": 0.05, "learning_rate": 0.0005970134343859767, "loss": 4.2969, "step": 3075 }, { "epoch": 0.05, "learning_rate": 0.0005970018441884957, "loss": 4.3114, "step": 3080 }, { "epoch": 0.05, "learning_rate": 0.0005969902316579982, "loss": 4.4437, "step": 3085 }, { "epoch": 0.05, "learning_rate": 0.0005969785967953572, "loss": 4.4663, "step": 3090 }, { "epoch": 0.05, "learning_rate": 0.0005969669396014479, "loss": 4.397, "step": 3095 }, { "epoch": 0.05, "learning_rate": 0.0005969552600771465, "loss": 4.3915, "step": 3100 }, { "epoch": 0.05, "learning_rate": 0.0005969435582233314, "loss": 4.3023, "step": 3105 }, { "epoch": 0.05, "learning_rate": 0.0005969318340408827, "loss": 4.2951, "step": 3110 }, { "epoch": 0.05, "learning_rate": 0.0005969200875306817, "loss": 4.4636, "step": 3115 }, { "epoch": 0.05, "learning_rate": 0.0005969083186936118, "loss": 4.3918, "step": 3120 }, { "epoch": 0.05, "learning_rate": 0.000596896527530558, "loss": 4.3059, "step": 3125 }, { "epoch": 0.05, "learning_rate": 0.000596884714042407, "loss": 4.2898, "step": 3130 }, { "epoch": 0.05, "learning_rate": 0.0005968728782300469, "loss": 4.3249, "step": 3135 }, { "epoch": 0.05, "learning_rate": 0.0005968610200943678, "loss": 4.3538, "step": 3140 }, { "epoch": 0.05, "learning_rate": 0.0005968491396362614, "loss": 4.2482, "step": 3145 }, { "epoch": 0.05, "learning_rate": 0.000596837236856621, "loss": 4.3515, "step": 3150 }, { "epoch": 0.05, "learning_rate": 0.0005968253117563417, "loss": 4.4214, "step": 3155 }, { "epoch": 0.05, "learning_rate": 0.0005968133643363202, "loss": 4.3378, "step": 3160 }, { "epoch": 0.05, "learning_rate": 0.0005968013945974549, "loss": 4.3318, "step": 3165 }, { "epoch": 0.05, "learning_rate": 0.0005967894025406456, "loss": 4.2822, "step": 3170 }, { "epoch": 0.05, "learning_rate": 0.0005967773881667944, "loss": 4.2495, "step": 3175 }, { "epoch": 0.06, "learning_rate": 0.0005967653514768046, "loss": 4.268, "step": 3180 }, { "epoch": 0.06, "learning_rate": 0.0005967532924715811, "loss": 4.3754, "step": 3185 }, { "epoch": 0.06, "learning_rate": 0.000596741211152031, "loss": 4.4025, "step": 3190 }, { "epoch": 0.06, "learning_rate": 0.0005967291075190625, "loss": 4.4083, "step": 3195 }, { "epoch": 0.06, "learning_rate": 0.0005967169815735858, "loss": 4.3213, "step": 3200 }, { "epoch": 0.06, "learning_rate": 0.0005967048333165128, "loss": 4.3461, "step": 3205 }, { "epoch": 0.06, "learning_rate": 0.0005966926627487569, "loss": 4.3768, "step": 3210 }, { "epoch": 0.06, "learning_rate": 0.0005966804698712333, "loss": 4.4196, "step": 3215 }, { "epoch": 0.06, "learning_rate": 0.0005966682546848589, "loss": 4.5157, "step": 3220 }, { "epoch": 0.06, "learning_rate": 0.000596656017190552, "loss": 4.5326, "step": 3225 }, { "epoch": 0.06, "learning_rate": 0.0005966437573892331, "loss": 4.4102, "step": 3230 }, { "epoch": 0.06, "learning_rate": 0.0005966314752818239, "loss": 4.405, "step": 3235 }, { "epoch": 0.06, "learning_rate": 0.0005966191708692478, "loss": 4.4479, "step": 3240 }, { "epoch": 0.06, "learning_rate": 0.0005966068441524304, "loss": 4.2742, "step": 3245 }, { "epoch": 0.06, "learning_rate": 0.0005965944951322984, "loss": 4.4105, "step": 3250 }, { "epoch": 0.06, "learning_rate": 0.0005965821238097803, "loss": 4.3222, "step": 3255 }, { "epoch": 0.06, "learning_rate": 0.0005965697301858064, "loss": 4.3689, "step": 3260 }, { "epoch": 0.06, "learning_rate": 0.0005965573142613088, "loss": 4.3729, "step": 3265 }, { "epoch": 0.06, "learning_rate": 0.0005965448760372209, "loss": 4.3759, "step": 3270 }, { "epoch": 0.06, "learning_rate": 0.0005965324155144782, "loss": 4.3199, "step": 3275 }, { "epoch": 0.06, "learning_rate": 0.0005965199326940174, "loss": 4.352, "step": 3280 }, { "epoch": 0.06, "learning_rate": 0.0005965074275767774, "loss": 4.3888, "step": 3285 }, { "epoch": 0.06, "learning_rate": 0.0005964949001636985, "loss": 4.2492, "step": 3290 }, { "epoch": 0.06, "learning_rate": 0.0005964823504557226, "loss": 4.2244, "step": 3295 }, { "epoch": 0.06, "learning_rate": 0.0005964697784537933, "loss": 4.2883, "step": 3300 }, { "epoch": 0.06, "learning_rate": 0.0005964571841588561, "loss": 4.371, "step": 3305 }, { "epoch": 0.06, "learning_rate": 0.0005964445675718579, "loss": 4.334, "step": 3310 }, { "epoch": 0.06, "learning_rate": 0.0005964319286937475, "loss": 4.3165, "step": 3315 }, { "epoch": 0.06, "learning_rate": 0.0005964192675254753, "loss": 4.2529, "step": 3320 }, { "epoch": 0.06, "learning_rate": 0.0005964065840679933, "loss": 4.3381, "step": 3325 }, { "epoch": 0.06, "learning_rate": 0.0005963938783222553, "loss": 4.283, "step": 3330 }, { "epoch": 0.06, "learning_rate": 0.0005963811502892165, "loss": 4.3161, "step": 3335 }, { "epoch": 0.06, "learning_rate": 0.0005963683999698342, "loss": 4.3748, "step": 3340 }, { "epoch": 0.06, "learning_rate": 0.000596355627365067, "loss": 4.3108, "step": 3345 }, { "epoch": 0.06, "learning_rate": 0.0005963428324758755, "loss": 4.3598, "step": 3350 }, { "epoch": 0.06, "learning_rate": 0.0005963300153032217, "loss": 4.3048, "step": 3355 }, { "epoch": 0.06, "learning_rate": 0.0005963171758480695, "loss": 4.3148, "step": 3360 }, { "epoch": 0.06, "learning_rate": 0.0005963043141113842, "loss": 4.1902, "step": 3365 }, { "epoch": 0.06, "learning_rate": 0.000596291430094133, "loss": 4.2621, "step": 3370 }, { "epoch": 0.06, "learning_rate": 0.0005962785237972848, "loss": 4.3392, "step": 3375 }, { "epoch": 0.06, "learning_rate": 0.00059626559522181, "loss": 4.4537, "step": 3380 }, { "epoch": 0.06, "learning_rate": 0.0005962526443686807, "loss": 4.3562, "step": 3385 }, { "epoch": 0.06, "learning_rate": 0.0005962396712388709, "loss": 4.3759, "step": 3390 }, { "epoch": 0.06, "learning_rate": 0.000596226675833356, "loss": 4.3024, "step": 3395 }, { "epoch": 0.06, "learning_rate": 0.0005962136581531132, "loss": 4.3345, "step": 3400 }, { "epoch": 0.06, "learning_rate": 0.0005962006181991214, "loss": 4.3228, "step": 3405 }, { "epoch": 0.06, "learning_rate": 0.0005961875559723611, "loss": 4.235, "step": 3410 }, { "epoch": 0.06, "learning_rate": 0.0005961744714738146, "loss": 4.2656, "step": 3415 }, { "epoch": 0.06, "learning_rate": 0.0005961613647044656, "loss": 4.404, "step": 3420 }, { "epoch": 0.06, "learning_rate": 0.0005961482356652998, "loss": 4.2853, "step": 3425 }, { "epoch": 0.06, "learning_rate": 0.0005961350843573044, "loss": 4.294, "step": 3430 }, { "epoch": 0.06, "learning_rate": 0.0005961219107814684, "loss": 4.2044, "step": 3435 }, { "epoch": 0.06, "learning_rate": 0.0005961087149387824, "loss": 4.2688, "step": 3440 }, { "epoch": 0.06, "learning_rate": 0.0005960954968302383, "loss": 4.2032, "step": 3445 }, { "epoch": 0.06, "learning_rate": 0.0005960822564568305, "loss": 4.3575, "step": 3450 }, { "epoch": 0.06, "learning_rate": 0.0005960689938195544, "loss": 4.368, "step": 3455 }, { "epoch": 0.06, "learning_rate": 0.0005960557089194072, "loss": 4.2639, "step": 3460 }, { "epoch": 0.06, "learning_rate": 0.0005960424017573881, "loss": 4.3362, "step": 3465 }, { "epoch": 0.06, "learning_rate": 0.0005960290723344974, "loss": 4.2979, "step": 3470 }, { "epoch": 0.06, "learning_rate": 0.0005960157206517376, "loss": 4.3052, "step": 3475 }, { "epoch": 0.06, "learning_rate": 0.0005960023467101128, "loss": 4.3042, "step": 3480 }, { "epoch": 0.06, "learning_rate": 0.0005959889505106284, "loss": 4.2859, "step": 3485 }, { "epoch": 0.06, "learning_rate": 0.0005959755320542919, "loss": 4.3849, "step": 3490 }, { "epoch": 0.06, "learning_rate": 0.0005959620913421123, "loss": 4.3201, "step": 3495 }, { "epoch": 0.06, "learning_rate": 0.0005959486283751001, "loss": 4.41, "step": 3500 }, { "epoch": 0.06, "eval_loss": 4.344966411590576, "eval_runtime": 150.7763, "eval_samples_per_second": 12.21, "eval_steps_per_second": 0.769, "step": 3500 }, { "epoch": 0.06, "learning_rate": 0.0005959351431542678, "loss": 4.3984, "step": 3505 }, { "epoch": 0.06, "learning_rate": 0.0005959216356806294, "loss": 4.3778, "step": 3510 }, { "epoch": 0.06, "learning_rate": 0.0005959081059552005, "loss": 4.3246, "step": 3515 }, { "epoch": 0.06, "learning_rate": 0.0005958945539789987, "loss": 4.2603, "step": 3520 }, { "epoch": 0.06, "learning_rate": 0.0005958809797530427, "loss": 4.2545, "step": 3525 }, { "epoch": 0.06, "learning_rate": 0.0005958673832783534, "loss": 4.268, "step": 3530 }, { "epoch": 0.06, "learning_rate": 0.0005958537645559533, "loss": 4.2577, "step": 3535 }, { "epoch": 0.06, "learning_rate": 0.0005958401235868663, "loss": 4.3378, "step": 3540 }, { "epoch": 0.06, "learning_rate": 0.0005958264603721181, "loss": 4.3697, "step": 3545 }, { "epoch": 0.06, "learning_rate": 0.0005958127749127361, "loss": 4.4162, "step": 3550 }, { "epoch": 0.06, "learning_rate": 0.0005957990672097495, "loss": 4.351, "step": 3555 }, { "epoch": 0.06, "learning_rate": 0.0005957853372641891, "loss": 4.3266, "step": 3560 }, { "epoch": 0.06, "learning_rate": 0.0005957715850770871, "loss": 4.2015, "step": 3565 }, { "epoch": 0.06, "learning_rate": 0.0005957578106494778, "loss": 4.1827, "step": 3570 }, { "epoch": 0.06, "learning_rate": 0.0005957440139823968, "loss": 4.4158, "step": 3575 }, { "epoch": 0.06, "learning_rate": 0.0005957301950768816, "loss": 4.2008, "step": 3580 }, { "epoch": 0.06, "learning_rate": 0.0005957163539339714, "loss": 4.1551, "step": 3585 }, { "epoch": 0.06, "learning_rate": 0.0005957024905547068, "loss": 4.2472, "step": 3590 }, { "epoch": 0.06, "learning_rate": 0.0005956886049401305, "loss": 4.2492, "step": 3595 }, { "epoch": 0.06, "learning_rate": 0.0005956746970912863, "loss": 4.2584, "step": 3600 }, { "epoch": 0.06, "learning_rate": 0.0005956607670092204, "loss": 4.3039, "step": 3605 }, { "epoch": 0.06, "learning_rate": 0.0005956468146949799, "loss": 4.2622, "step": 3610 }, { "epoch": 0.06, "learning_rate": 0.0005956328401496141, "loss": 4.2605, "step": 3615 }, { "epoch": 0.06, "learning_rate": 0.000595618843374174, "loss": 4.2973, "step": 3620 }, { "epoch": 0.06, "learning_rate": 0.0005956048243697117, "loss": 4.218, "step": 3625 }, { "epoch": 0.06, "learning_rate": 0.0005955907831372817, "loss": 4.3062, "step": 3630 }, { "epoch": 0.06, "learning_rate": 0.0005955767196779395, "loss": 4.3318, "step": 3635 }, { "epoch": 0.06, "learning_rate": 0.0005955626339927431, "loss": 4.4257, "step": 3640 }, { "epoch": 0.06, "learning_rate": 0.000595548526082751, "loss": 4.1505, "step": 3645 }, { "epoch": 0.06, "learning_rate": 0.0005955343959490246, "loss": 4.2879, "step": 3650 }, { "epoch": 0.06, "learning_rate": 0.0005955202435926262, "loss": 4.2549, "step": 3655 }, { "epoch": 0.06, "learning_rate": 0.00059550606901462, "loss": 4.2972, "step": 3660 }, { "epoch": 0.06, "learning_rate": 0.0005954918722160718, "loss": 4.253, "step": 3665 }, { "epoch": 0.06, "learning_rate": 0.0005954776531980492, "loss": 4.3401, "step": 3670 }, { "epoch": 0.06, "learning_rate": 0.0005954634119616215, "loss": 4.2288, "step": 3675 }, { "epoch": 0.06, "learning_rate": 0.0005954491485078592, "loss": 4.2791, "step": 3680 }, { "epoch": 0.06, "learning_rate": 0.0005954348628378353, "loss": 4.2321, "step": 3685 }, { "epoch": 0.06, "learning_rate": 0.0005954205549526236, "loss": 4.2922, "step": 3690 }, { "epoch": 0.06, "learning_rate": 0.0005954062248533003, "loss": 4.2489, "step": 3695 }, { "epoch": 0.06, "learning_rate": 0.0005953918725409429, "loss": 4.3511, "step": 3700 }, { "epoch": 0.06, "learning_rate": 0.0005953774980166304, "loss": 4.3664, "step": 3705 }, { "epoch": 0.06, "learning_rate": 0.000595363101281444, "loss": 4.2423, "step": 3710 }, { "epoch": 0.06, "learning_rate": 0.0005953486823364659, "loss": 4.2646, "step": 3715 }, { "epoch": 0.06, "learning_rate": 0.0005953342411827807, "loss": 4.2715, "step": 3720 }, { "epoch": 0.06, "learning_rate": 0.000595319777821474, "loss": 4.3321, "step": 3725 }, { "epoch": 0.06, "learning_rate": 0.0005953052922536335, "loss": 4.2929, "step": 3730 }, { "epoch": 0.06, "learning_rate": 0.0005952907844803486, "loss": 4.2246, "step": 3735 }, { "epoch": 0.06, "learning_rate": 0.0005952762545027099, "loss": 4.2096, "step": 3740 }, { "epoch": 0.06, "learning_rate": 0.0005952617023218101, "loss": 4.312, "step": 3745 }, { "epoch": 0.06, "learning_rate": 0.0005952471279387435, "loss": 4.2608, "step": 3750 }, { "epoch": 0.06, "learning_rate": 0.000595232531354606, "loss": 4.3276, "step": 3755 }, { "epoch": 0.07, "learning_rate": 0.0005952179125704952, "loss": 4.335, "step": 3760 }, { "epoch": 0.07, "learning_rate": 0.0005952032715875105, "loss": 4.2907, "step": 3765 }, { "epoch": 0.07, "learning_rate": 0.0005951886084067525, "loss": 4.3741, "step": 3770 }, { "epoch": 0.07, "learning_rate": 0.0005951739230293239, "loss": 4.3517, "step": 3775 }, { "epoch": 0.07, "learning_rate": 0.0005951592154563292, "loss": 4.277, "step": 3780 }, { "epoch": 0.07, "learning_rate": 0.000595144485688874, "loss": 4.3368, "step": 3785 }, { "epoch": 0.07, "learning_rate": 0.0005951297337280662, "loss": 4.2605, "step": 3790 }, { "epoch": 0.07, "learning_rate": 0.0005951149595750149, "loss": 4.3184, "step": 3795 }, { "epoch": 0.07, "learning_rate": 0.0005951001632308311, "loss": 4.3056, "step": 3800 }, { "epoch": 0.07, "learning_rate": 0.0005950853446966273, "loss": 4.3516, "step": 3805 }, { "epoch": 0.07, "learning_rate": 0.000595070503973518, "loss": 4.3533, "step": 3810 }, { "epoch": 0.07, "learning_rate": 0.0005950556410626189, "loss": 4.2064, "step": 3815 }, { "epoch": 0.07, "learning_rate": 0.0005950407559650477, "loss": 4.2559, "step": 3820 }, { "epoch": 0.07, "learning_rate": 0.0005950258486819239, "loss": 4.347, "step": 3825 }, { "epoch": 0.07, "learning_rate": 0.0005950109192143681, "loss": 4.2478, "step": 3830 }, { "epoch": 0.07, "learning_rate": 0.0005949959675635031, "loss": 4.3066, "step": 3835 }, { "epoch": 0.07, "learning_rate": 0.0005949809937304532, "loss": 4.1078, "step": 3840 }, { "epoch": 0.07, "learning_rate": 0.0005949659977163444, "loss": 4.3285, "step": 3845 }, { "epoch": 0.07, "learning_rate": 0.0005949509795223042, "loss": 4.211, "step": 3850 }, { "epoch": 0.07, "learning_rate": 0.0005949359391494619, "loss": 4.2925, "step": 3855 }, { "epoch": 0.07, "learning_rate": 0.0005949208765989485, "loss": 4.2087, "step": 3860 }, { "epoch": 0.07, "learning_rate": 0.0005949057918718966, "loss": 4.3738, "step": 3865 }, { "epoch": 0.07, "learning_rate": 0.0005948906849694406, "loss": 4.234, "step": 3870 }, { "epoch": 0.07, "learning_rate": 0.0005948755558927163, "loss": 4.2358, "step": 3875 }, { "epoch": 0.07, "learning_rate": 0.0005948604046428614, "loss": 4.2694, "step": 3880 }, { "epoch": 0.07, "learning_rate": 0.0005948452312210153, "loss": 4.2782, "step": 3885 }, { "epoch": 0.07, "learning_rate": 0.0005948300356283188, "loss": 4.3455, "step": 3890 }, { "epoch": 0.07, "learning_rate": 0.0005948148178659147, "loss": 4.2975, "step": 3895 }, { "epoch": 0.07, "learning_rate": 0.0005947995779349471, "loss": 4.3128, "step": 3900 }, { "epoch": 0.07, "learning_rate": 0.0005947843158365621, "loss": 4.2444, "step": 3905 }, { "epoch": 0.07, "learning_rate": 0.0005947690315719073, "loss": 4.2566, "step": 3910 }, { "epoch": 0.07, "learning_rate": 0.0005947537251421318, "loss": 4.2086, "step": 3915 }, { "epoch": 0.07, "learning_rate": 0.000594738396548387, "loss": 4.2687, "step": 3920 }, { "epoch": 0.07, "learning_rate": 0.0005947230457918253, "loss": 4.2127, "step": 3925 }, { "epoch": 0.07, "learning_rate": 0.0005947076728736009, "loss": 4.3346, "step": 3930 }, { "epoch": 0.07, "learning_rate": 0.0005946922777948699, "loss": 4.2569, "step": 3935 }, { "epoch": 0.07, "learning_rate": 0.0005946768605567898, "loss": 4.327, "step": 3940 }, { "epoch": 0.07, "learning_rate": 0.0005946614211605201, "loss": 4.3121, "step": 3945 }, { "epoch": 0.07, "learning_rate": 0.0005946459596072216, "loss": 4.1934, "step": 3950 }, { "epoch": 0.07, "learning_rate": 0.0005946304758980569, "loss": 4.0914, "step": 3955 }, { "epoch": 0.07, "learning_rate": 0.0005946149700341904, "loss": 4.284, "step": 3960 }, { "epoch": 0.07, "learning_rate": 0.000594599442016788, "loss": 4.2219, "step": 3965 }, { "epoch": 0.07, "learning_rate": 0.0005945838918470174, "loss": 4.2636, "step": 3970 }, { "epoch": 0.07, "learning_rate": 0.0005945683195260478, "loss": 4.2416, "step": 3975 }, { "epoch": 0.07, "learning_rate": 0.0005945527250550503, "loss": 4.1594, "step": 3980 }, { "epoch": 0.07, "learning_rate": 0.0005945371084351975, "loss": 4.2529, "step": 3985 }, { "epoch": 0.07, "learning_rate": 0.0005945214696676635, "loss": 4.2743, "step": 3990 }, { "epoch": 0.07, "learning_rate": 0.0005945058087536243, "loss": 4.1218, "step": 3995 }, { "epoch": 0.07, "learning_rate": 0.0005944901256942578, "loss": 4.2187, "step": 4000 }, { "epoch": 0.07, "eval_loss": 4.265633583068848, "eval_runtime": 150.1723, "eval_samples_per_second": 12.259, "eval_steps_per_second": 0.772, "step": 4000 }, { "epoch": 0.07, "learning_rate": 0.000594474420490743, "loss": 4.2666, "step": 4005 }, { "epoch": 0.07, "learning_rate": 0.0005944586931442609, "loss": 4.12, "step": 4010 }, { "epoch": 0.07, "learning_rate": 0.0005944429436559942, "loss": 4.2916, "step": 4015 }, { "epoch": 0.07, "learning_rate": 0.0005944271720271272, "loss": 4.1823, "step": 4020 }, { "epoch": 0.07, "learning_rate": 0.0005944113782588456, "loss": 4.2687, "step": 4025 }, { "epoch": 0.07, "learning_rate": 0.0005943955623523373, "loss": 4.2593, "step": 4030 }, { "epoch": 0.07, "learning_rate": 0.0005943797243087915, "loss": 4.2273, "step": 4035 }, { "epoch": 0.07, "learning_rate": 0.000594363864129399, "loss": 4.2475, "step": 4040 }, { "epoch": 0.07, "learning_rate": 0.0005943479818153526, "loss": 4.2453, "step": 4045 }, { "epoch": 0.07, "learning_rate": 0.0005943320773678463, "loss": 4.2077, "step": 4050 }, { "epoch": 0.07, "learning_rate": 0.0005943161507880765, "loss": 4.211, "step": 4055 }, { "epoch": 0.07, "learning_rate": 0.0005943002020772403, "loss": 4.3064, "step": 4060 }, { "epoch": 0.07, "learning_rate": 0.0005942842312365372, "loss": 4.2258, "step": 4065 }, { "epoch": 0.07, "learning_rate": 0.000594268238267168, "loss": 4.1974, "step": 4070 }, { "epoch": 0.07, "learning_rate": 0.0005942522231703357, "loss": 4.3158, "step": 4075 }, { "epoch": 0.07, "learning_rate": 0.000594236185947244, "loss": 4.2207, "step": 4080 }, { "epoch": 0.07, "learning_rate": 0.0005942201265990991, "loss": 4.2394, "step": 4085 }, { "epoch": 0.07, "learning_rate": 0.0005942040451271085, "loss": 4.2613, "step": 4090 }, { "epoch": 0.07, "learning_rate": 0.0005941879415324815, "loss": 4.2127, "step": 4095 }, { "epoch": 0.07, "learning_rate": 0.000594171815816429, "loss": 4.2353, "step": 4100 }, { "epoch": 0.07, "learning_rate": 0.0005941556679801636, "loss": 4.2169, "step": 4105 }, { "epoch": 0.07, "learning_rate": 0.0005941394980248994, "loss": 4.1401, "step": 4110 }, { "epoch": 0.07, "learning_rate": 0.0005941233059518524, "loss": 4.2061, "step": 4115 }, { "epoch": 0.07, "learning_rate": 0.0005941070917622401, "loss": 4.1151, "step": 4120 }, { "epoch": 0.07, "learning_rate": 0.000594090855457282, "loss": 4.1882, "step": 4125 }, { "epoch": 0.07, "learning_rate": 0.0005940745970381985, "loss": 4.1692, "step": 4130 }, { "epoch": 0.07, "learning_rate": 0.0005940583165062126, "loss": 4.2102, "step": 4135 }, { "epoch": 0.07, "learning_rate": 0.0005940420138625482, "loss": 4.1483, "step": 4140 }, { "epoch": 0.07, "learning_rate": 0.0005940256891084313, "loss": 4.2406, "step": 4145 }, { "epoch": 0.07, "learning_rate": 0.0005940093422450895, "loss": 4.2565, "step": 4150 }, { "epoch": 0.07, "learning_rate": 0.000593992973273752, "loss": 4.316, "step": 4155 }, { "epoch": 0.07, "learning_rate": 0.0005939765821956495, "loss": 4.27, "step": 4160 }, { "epoch": 0.07, "learning_rate": 0.0005939601690120146, "loss": 4.2913, "step": 4165 }, { "epoch": 0.07, "learning_rate": 0.0005939437337240816, "loss": 4.1995, "step": 4170 }, { "epoch": 0.07, "learning_rate": 0.0005939272763330862, "loss": 4.1578, "step": 4175 }, { "epoch": 0.07, "learning_rate": 0.000593910796840266, "loss": 4.2062, "step": 4180 }, { "epoch": 0.07, "learning_rate": 0.0005938942952468601, "loss": 4.2138, "step": 4185 }, { "epoch": 0.07, "learning_rate": 0.0005938777715541095, "loss": 4.1583, "step": 4190 }, { "epoch": 0.07, "learning_rate": 0.0005938612257632565, "loss": 4.1554, "step": 4195 }, { "epoch": 0.07, "learning_rate": 0.0005938446578755453, "loss": 4.1384, "step": 4200 }, { "epoch": 0.07, "learning_rate": 0.0005938280678922218, "loss": 4.1932, "step": 4205 }, { "epoch": 0.07, "learning_rate": 0.0005938114558145334, "loss": 4.262, "step": 4210 }, { "epoch": 0.07, "learning_rate": 0.0005937948216437294, "loss": 4.3102, "step": 4215 }, { "epoch": 0.07, "learning_rate": 0.0005937781653810603, "loss": 4.1807, "step": 4220 }, { "epoch": 0.07, "learning_rate": 0.0005937614870277788, "loss": 4.1832, "step": 4225 }, { "epoch": 0.07, "learning_rate": 0.0005937447865851391, "loss": 4.2501, "step": 4230 }, { "epoch": 0.07, "learning_rate": 0.0005937280640543968, "loss": 4.2523, "step": 4235 }, { "epoch": 0.07, "learning_rate": 0.0005937113194368095, "loss": 4.1573, "step": 4240 }, { "epoch": 0.07, "learning_rate": 0.0005936945527336362, "loss": 4.1715, "step": 4245 }, { "epoch": 0.07, "learning_rate": 0.0005936777639461375, "loss": 4.2046, "step": 4250 }, { "epoch": 0.07, "learning_rate": 0.0005936609530755761, "loss": 4.2866, "step": 4255 }, { "epoch": 0.07, "learning_rate": 0.0005936441201232162, "loss": 4.1373, "step": 4260 }, { "epoch": 0.07, "learning_rate": 0.0005936272650903232, "loss": 4.2981, "step": 4265 }, { "epoch": 0.07, "learning_rate": 0.0005936103879781647, "loss": 4.1283, "step": 4270 }, { "epoch": 0.07, "learning_rate": 0.0005935934887880099, "loss": 4.2305, "step": 4275 }, { "epoch": 0.07, "learning_rate": 0.0005935765675211292, "loss": 4.1224, "step": 4280 }, { "epoch": 0.07, "learning_rate": 0.0005935596241787953, "loss": 4.2186, "step": 4285 }, { "epoch": 0.07, "learning_rate": 0.0005935426587622821, "loss": 4.2636, "step": 4290 }, { "epoch": 0.07, "learning_rate": 0.0005935256712728653, "loss": 4.1941, "step": 4295 }, { "epoch": 0.07, "learning_rate": 0.0005935086617118224, "loss": 4.1388, "step": 4300 }, { "epoch": 0.07, "learning_rate": 0.0005934916300804322, "loss": 4.1762, "step": 4305 }, { "epoch": 0.07, "learning_rate": 0.0005934745763799756, "loss": 4.2763, "step": 4310 }, { "epoch": 0.07, "learning_rate": 0.0005934575006117349, "loss": 4.2378, "step": 4315 }, { "epoch": 0.07, "learning_rate": 0.0005934404027769941, "loss": 4.2348, "step": 4320 }, { "epoch": 0.07, "learning_rate": 0.000593423282877039, "loss": 4.203, "step": 4325 }, { "epoch": 0.07, "learning_rate": 0.0005934061409131567, "loss": 4.2091, "step": 4330 }, { "epoch": 0.08, "learning_rate": 0.0005933889768866362, "loss": 4.1984, "step": 4335 }, { "epoch": 0.08, "learning_rate": 0.0005933717907987682, "loss": 4.2025, "step": 4340 }, { "epoch": 0.08, "learning_rate": 0.0005933545826508453, "loss": 4.2632, "step": 4345 }, { "epoch": 0.08, "learning_rate": 0.000593337352444161, "loss": 4.2896, "step": 4350 }, { "epoch": 0.08, "learning_rate": 0.0005933201001800111, "loss": 4.2299, "step": 4355 }, { "epoch": 0.08, "learning_rate": 0.0005933028258596931, "loss": 4.1947, "step": 4360 }, { "epoch": 0.08, "learning_rate": 0.0005932855294845055, "loss": 4.176, "step": 4365 }, { "epoch": 0.08, "learning_rate": 0.0005932682110557494, "loss": 4.087, "step": 4370 }, { "epoch": 0.08, "learning_rate": 0.0005932508705747266, "loss": 4.1089, "step": 4375 }, { "epoch": 0.08, "learning_rate": 0.0005932335080427413, "loss": 4.1443, "step": 4380 }, { "epoch": 0.08, "learning_rate": 0.000593216123461099, "loss": 4.2938, "step": 4385 }, { "epoch": 0.08, "learning_rate": 0.0005931987168311069, "loss": 4.2061, "step": 4390 }, { "epoch": 0.08, "learning_rate": 0.000593181288154074, "loss": 4.1735, "step": 4395 }, { "epoch": 0.08, "learning_rate": 0.0005931638374313107, "loss": 4.248, "step": 4400 }, { "epoch": 0.08, "learning_rate": 0.0005931463646641292, "loss": 4.1558, "step": 4405 }, { "epoch": 0.08, "learning_rate": 0.0005931288698538434, "loss": 4.2654, "step": 4410 }, { "epoch": 0.08, "learning_rate": 0.000593111353001769, "loss": 4.0433, "step": 4415 }, { "epoch": 0.08, "learning_rate": 0.000593093814109223, "loss": 4.217, "step": 4420 }, { "epoch": 0.08, "learning_rate": 0.0005930762531775242, "loss": 4.1692, "step": 4425 }, { "epoch": 0.08, "learning_rate": 0.0005930586702079931, "loss": 4.0659, "step": 4430 }, { "epoch": 0.08, "learning_rate": 0.000593041065201952, "loss": 4.219, "step": 4435 }, { "epoch": 0.08, "learning_rate": 0.0005930234381607246, "loss": 4.235, "step": 4440 }, { "epoch": 0.08, "learning_rate": 0.0005930057890856364, "loss": 4.1474, "step": 4445 }, { "epoch": 0.08, "learning_rate": 0.0005929881179780144, "loss": 4.2813, "step": 4450 }, { "epoch": 0.08, "learning_rate": 0.0005929704248391875, "loss": 4.1081, "step": 4455 }, { "epoch": 0.08, "learning_rate": 0.0005929527096704862, "loss": 4.1982, "step": 4460 }, { "epoch": 0.08, "learning_rate": 0.0005929349724732424, "loss": 4.2255, "step": 4465 }, { "epoch": 0.08, "learning_rate": 0.0005929172132487899, "loss": 4.1753, "step": 4470 }, { "epoch": 0.08, "learning_rate": 0.0005928994319984643, "loss": 4.2111, "step": 4475 }, { "epoch": 0.08, "learning_rate": 0.0005928816287236023, "loss": 4.3088, "step": 4480 }, { "epoch": 0.08, "learning_rate": 0.0005928638034255429, "loss": 4.3208, "step": 4485 }, { "epoch": 0.08, "learning_rate": 0.0005928459561056264, "loss": 4.2369, "step": 4490 }, { "epoch": 0.08, "learning_rate": 0.0005928280867651947, "loss": 4.252, "step": 4495 }, { "epoch": 0.08, "learning_rate": 0.0005928101954055918, "loss": 4.1711, "step": 4500 }, { "epoch": 0.08, "eval_loss": 4.222765922546387, "eval_runtime": 150.6785, "eval_samples_per_second": 12.218, "eval_steps_per_second": 0.77, "step": 4500 }, { "epoch": 0.08, "learning_rate": 0.0005927922820281627, "loss": 4.2227, "step": 4505 }, { "epoch": 0.08, "learning_rate": 0.0005927743466342545, "loss": 4.1118, "step": 4510 }, { "epoch": 0.08, "learning_rate": 0.000592756389225216, "loss": 4.1699, "step": 4515 }, { "epoch": 0.08, "learning_rate": 0.0005927384098023973, "loss": 4.1542, "step": 4520 }, { "epoch": 0.08, "learning_rate": 0.0005927204083671505, "loss": 4.1589, "step": 4525 }, { "epoch": 0.08, "learning_rate": 0.0005927023849208291, "loss": 4.1576, "step": 4530 }, { "epoch": 0.08, "learning_rate": 0.0005926843394647884, "loss": 4.1702, "step": 4535 }, { "epoch": 0.08, "learning_rate": 0.0005926662720003855, "loss": 4.1987, "step": 4540 }, { "epoch": 0.08, "learning_rate": 0.0005926481825289788, "loss": 4.1527, "step": 4545 }, { "epoch": 0.08, "learning_rate": 0.0005926300710519285, "loss": 4.147, "step": 4550 }, { "epoch": 0.08, "learning_rate": 0.0005926119375705965, "loss": 4.0946, "step": 4555 }, { "epoch": 0.08, "learning_rate": 0.0005925937820863465, "loss": 4.2065, "step": 4560 }, { "epoch": 0.08, "learning_rate": 0.0005925756046005436, "loss": 4.2049, "step": 4565 }, { "epoch": 0.08, "learning_rate": 0.0005925574051145546, "loss": 4.1496, "step": 4570 }, { "epoch": 0.08, "learning_rate": 0.0005925391836297481, "loss": 4.0682, "step": 4575 }, { "epoch": 0.08, "learning_rate": 0.0005925209401474943, "loss": 4.1447, "step": 4580 }, { "epoch": 0.08, "learning_rate": 0.0005925026746691649, "loss": 4.1023, "step": 4585 }, { "epoch": 0.08, "learning_rate": 0.0005924843871961334, "loss": 4.207, "step": 4590 }, { "epoch": 0.08, "learning_rate": 0.0005924660777297749, "loss": 4.1938, "step": 4595 }, { "epoch": 0.08, "learning_rate": 0.0005924477462714664, "loss": 4.1987, "step": 4600 }, { "epoch": 0.08, "learning_rate": 0.000592429392822586, "loss": 4.2236, "step": 4605 }, { "epoch": 0.08, "learning_rate": 0.0005924110173845139, "loss": 4.2157, "step": 4610 }, { "epoch": 0.08, "learning_rate": 0.0005923926199586319, "loss": 4.0652, "step": 4615 }, { "epoch": 0.08, "learning_rate": 0.0005923742005463235, "loss": 4.1559, "step": 4620 }, { "epoch": 0.08, "learning_rate": 0.0005923557591489735, "loss": 4.1209, "step": 4625 }, { "epoch": 0.08, "learning_rate": 0.0005923372957679688, "loss": 4.0978, "step": 4630 }, { "epoch": 0.08, "learning_rate": 0.0005923188104046975, "loss": 4.1673, "step": 4635 }, { "epoch": 0.08, "learning_rate": 0.0005923003030605499, "loss": 4.1935, "step": 4640 }, { "epoch": 0.08, "learning_rate": 0.0005922817737369175, "loss": 4.0448, "step": 4645 }, { "epoch": 0.08, "learning_rate": 0.0005922632224351936, "loss": 3.9366, "step": 4650 }, { "epoch": 0.08, "learning_rate": 0.0005922446491567733, "loss": 4.1289, "step": 4655 }, { "epoch": 0.08, "learning_rate": 0.000592226053903053, "loss": 4.1816, "step": 4660 }, { "epoch": 0.08, "learning_rate": 0.000592207436675431, "loss": 4.1968, "step": 4665 }, { "epoch": 0.08, "learning_rate": 0.0005921887974753075, "loss": 4.2072, "step": 4670 }, { "epoch": 0.08, "learning_rate": 0.0005921701363040837, "loss": 4.3708, "step": 4675 }, { "epoch": 0.08, "learning_rate": 0.0005921514531631631, "loss": 4.1242, "step": 4680 }, { "epoch": 0.08, "learning_rate": 0.0005921327480539505, "loss": 4.2098, "step": 4685 }, { "epoch": 0.08, "learning_rate": 0.0005921140209778524, "loss": 4.0701, "step": 4690 }, { "epoch": 0.08, "learning_rate": 0.0005920952719362769, "loss": 4.2459, "step": 4695 }, { "epoch": 0.08, "learning_rate": 0.0005920765009306341, "loss": 4.1828, "step": 4700 }, { "epoch": 0.08, "learning_rate": 0.000592057707962335, "loss": 4.0961, "step": 4705 }, { "epoch": 0.08, "learning_rate": 0.0005920388930327932, "loss": 4.1396, "step": 4710 }, { "epoch": 0.08, "learning_rate": 0.0005920200561434234, "loss": 4.191, "step": 4715 }, { "epoch": 0.08, "learning_rate": 0.0005920011972956418, "loss": 4.1557, "step": 4720 }, { "epoch": 0.08, "learning_rate": 0.0005919823164908667, "loss": 4.1885, "step": 4725 }, { "epoch": 0.08, "learning_rate": 0.0005919634137305179, "loss": 4.0797, "step": 4730 }, { "epoch": 0.08, "learning_rate": 0.0005919444890160165, "loss": 4.1649, "step": 4735 }, { "epoch": 0.08, "learning_rate": 0.0005919255423487859, "loss": 4.0826, "step": 4740 }, { "epoch": 0.08, "learning_rate": 0.0005919065737302505, "loss": 4.1142, "step": 4745 }, { "epoch": 0.08, "learning_rate": 0.0005918875831618367, "loss": 4.1676, "step": 4750 }, { "epoch": 0.08, "learning_rate": 0.0005918685706449726, "loss": 4.0972, "step": 4755 }, { "epoch": 0.08, "learning_rate": 0.0005918495361810878, "loss": 4.1348, "step": 4760 }, { "epoch": 0.08, "learning_rate": 0.0005918304797716136, "loss": 4.1432, "step": 4765 }, { "epoch": 0.08, "learning_rate": 0.0005918114014179829, "loss": 4.2454, "step": 4770 }, { "epoch": 0.08, "learning_rate": 0.0005917923011216304, "loss": 4.1003, "step": 4775 }, { "epoch": 0.08, "learning_rate": 0.0005917731788839921, "loss": 4.1819, "step": 4780 }, { "epoch": 0.08, "learning_rate": 0.0005917540347065062, "loss": 4.188, "step": 4785 }, { "epoch": 0.08, "learning_rate": 0.0005917348685906121, "loss": 4.1215, "step": 4790 }, { "epoch": 0.08, "learning_rate": 0.000591715680537751, "loss": 4.1963, "step": 4795 }, { "epoch": 0.08, "learning_rate": 0.0005916964705493656, "loss": 4.1186, "step": 4800 }, { "epoch": 0.08, "learning_rate": 0.0005916772386269007, "loss": 4.1362, "step": 4805 }, { "epoch": 0.08, "learning_rate": 0.0005916579847718022, "loss": 4.2354, "step": 4810 }, { "epoch": 0.08, "learning_rate": 0.000591638708985518, "loss": 4.1049, "step": 4815 }, { "epoch": 0.08, "learning_rate": 0.0005916194112694976, "loss": 4.1688, "step": 4820 }, { "epoch": 0.08, "learning_rate": 0.0005916000916251918, "loss": 4.0749, "step": 4825 }, { "epoch": 0.08, "learning_rate": 0.0005915807500540537, "loss": 4.1612, "step": 4830 }, { "epoch": 0.08, "learning_rate": 0.0005915613865575374, "loss": 4.1247, "step": 4835 }, { "epoch": 0.08, "learning_rate": 0.0005915420011370992, "loss": 4.2301, "step": 4840 }, { "epoch": 0.08, "learning_rate": 0.0005915225937941965, "loss": 4.1453, "step": 4845 }, { "epoch": 0.08, "learning_rate": 0.0005915031645302889, "loss": 4.2323, "step": 4850 }, { "epoch": 0.08, "learning_rate": 0.0005914837133468372, "loss": 4.2572, "step": 4855 }, { "epoch": 0.08, "learning_rate": 0.000591464240245304, "loss": 4.1619, "step": 4860 }, { "epoch": 0.08, "learning_rate": 0.0005914447452271537, "loss": 4.1835, "step": 4865 }, { "epoch": 0.08, "learning_rate": 0.0005914252282938524, "loss": 4.1756, "step": 4870 }, { "epoch": 0.08, "learning_rate": 0.0005914056894468671, "loss": 4.2158, "step": 4875 }, { "epoch": 0.08, "learning_rate": 0.0005913861286876677, "loss": 4.1847, "step": 4880 }, { "epoch": 0.08, "learning_rate": 0.0005913665460177246, "loss": 4.1548, "step": 4885 }, { "epoch": 0.08, "learning_rate": 0.0005913469414385105, "loss": 4.0973, "step": 4890 }, { "epoch": 0.08, "learning_rate": 0.0005913273149514996, "loss": 4.1362, "step": 4895 }, { "epoch": 0.08, "learning_rate": 0.0005913076665581675, "loss": 4.1658, "step": 4900 }, { "epoch": 0.08, "learning_rate": 0.000591287996259992, "loss": 4.1478, "step": 4905 }, { "epoch": 0.08, "learning_rate": 0.0005912683040584519, "loss": 4.1785, "step": 4910 }, { "epoch": 0.09, "learning_rate": 0.000591248589955028, "loss": 4.2769, "step": 4915 }, { "epoch": 0.09, "learning_rate": 0.0005912288539512029, "loss": 4.1097, "step": 4920 }, { "epoch": 0.09, "learning_rate": 0.0005912090960484603, "loss": 4.1856, "step": 4925 }, { "epoch": 0.09, "learning_rate": 0.0005911893162482862, "loss": 4.2468, "step": 4930 }, { "epoch": 0.09, "learning_rate": 0.0005911695145521679, "loss": 4.2657, "step": 4935 }, { "epoch": 0.09, "learning_rate": 0.0005911496909615943, "loss": 4.0184, "step": 4940 }, { "epoch": 0.09, "learning_rate": 0.000591129845478056, "loss": 4.0345, "step": 4945 }, { "epoch": 0.09, "learning_rate": 0.0005911099781030454, "loss": 4.0501, "step": 4950 }, { "epoch": 0.09, "learning_rate": 0.0005910900888380564, "loss": 4.1289, "step": 4955 }, { "epoch": 0.09, "learning_rate": 0.0005910701776845845, "loss": 4.1621, "step": 4960 }, { "epoch": 0.09, "learning_rate": 0.0005910502446441269, "loss": 4.0817, "step": 4965 }, { "epoch": 0.09, "learning_rate": 0.0005910302897181826, "loss": 4.1124, "step": 4970 }, { "epoch": 0.09, "learning_rate": 0.0005910103129082519, "loss": 4.1758, "step": 4975 }, { "epoch": 0.09, "learning_rate": 0.0005909903142158372, "loss": 4.0607, "step": 4980 }, { "epoch": 0.09, "learning_rate": 0.0005909702936424421, "loss": 4.1467, "step": 4985 }, { "epoch": 0.09, "learning_rate": 0.000590950251189572, "loss": 4.2868, "step": 4990 }, { "epoch": 0.09, "learning_rate": 0.0005909301868587343, "loss": 4.1972, "step": 4995 }, { "epoch": 0.09, "learning_rate": 0.0005909101006514374, "loss": 4.1593, "step": 5000 }, { "epoch": 0.09, "eval_loss": 4.196778774261475, "eval_runtime": 150.8836, "eval_samples_per_second": 12.201, "eval_steps_per_second": 0.769, "step": 5000 }, { "epoch": 0.09, "learning_rate": 0.000590889992569192, "loss": 4.1782, "step": 5005 }, { "epoch": 0.09, "learning_rate": 0.0005908698626135098, "loss": 4.1774, "step": 5010 }, { "epoch": 0.09, "learning_rate": 0.0005908497107859046, "loss": 4.1228, "step": 5015 }, { "epoch": 0.09, "learning_rate": 0.0005908295370878918, "loss": 4.1645, "step": 5020 }, { "epoch": 0.09, "learning_rate": 0.0005908093415209883, "loss": 4.1598, "step": 5025 }, { "epoch": 0.09, "learning_rate": 0.0005907891240867127, "loss": 4.033, "step": 5030 }, { "epoch": 0.09, "learning_rate": 0.0005907688847865851, "loss": 4.1476, "step": 5035 }, { "epoch": 0.09, "learning_rate": 0.0005907486236221278, "loss": 4.0968, "step": 5040 }, { "epoch": 0.09, "learning_rate": 0.0005907283405948638, "loss": 4.273, "step": 5045 }, { "epoch": 0.09, "learning_rate": 0.0005907080357063187, "loss": 4.1311, "step": 5050 }, { "epoch": 0.09, "learning_rate": 0.000590687708958019, "loss": 3.9782, "step": 5055 }, { "epoch": 0.09, "learning_rate": 0.0005906673603514936, "loss": 4.1972, "step": 5060 }, { "epoch": 0.09, "learning_rate": 0.0005906469898882721, "loss": 4.0807, "step": 5065 }, { "epoch": 0.09, "learning_rate": 0.0005906265975698867, "loss": 4.0946, "step": 5070 }, { "epoch": 0.09, "learning_rate": 0.0005906061833978705, "loss": 4.1808, "step": 5075 }, { "epoch": 0.09, "learning_rate": 0.0005905857473737586, "loss": 4.0355, "step": 5080 }, { "epoch": 0.09, "learning_rate": 0.0005905652894990878, "loss": 4.2127, "step": 5085 }, { "epoch": 0.09, "learning_rate": 0.0005905448097753963, "loss": 4.0982, "step": 5090 }, { "epoch": 0.09, "learning_rate": 0.0005905243082042242, "loss": 4.1078, "step": 5095 }, { "epoch": 0.09, "learning_rate": 0.0005905037847871129, "loss": 4.1049, "step": 5100 }, { "epoch": 0.09, "learning_rate": 0.000590483239525606, "loss": 4.1008, "step": 5105 }, { "epoch": 0.09, "learning_rate": 0.0005904626724212481, "loss": 4.0823, "step": 5110 }, { "epoch": 0.09, "learning_rate": 0.0005904420834755858, "loss": 4.0588, "step": 5115 }, { "epoch": 0.09, "learning_rate": 0.0005904214726901672, "loss": 4.1389, "step": 5120 }, { "epoch": 0.09, "learning_rate": 0.0005904008400665424, "loss": 4.1278, "step": 5125 }, { "epoch": 0.09, "learning_rate": 0.0005903801856062627, "loss": 4.1523, "step": 5130 }, { "epoch": 0.09, "learning_rate": 0.0005903595093108812, "loss": 4.23, "step": 5135 }, { "epoch": 0.09, "learning_rate": 0.0005903388111819526, "loss": 4.046, "step": 5140 }, { "epoch": 0.09, "learning_rate": 0.0005903180912210335, "loss": 4.0819, "step": 5145 }, { "epoch": 0.09, "learning_rate": 0.0005902973494296816, "loss": 4.1195, "step": 5150 }, { "epoch": 0.09, "learning_rate": 0.0005902765858094569, "loss": 4.0833, "step": 5155 }, { "epoch": 0.09, "learning_rate": 0.0005902558003619205, "loss": 4.2782, "step": 5160 }, { "epoch": 0.09, "learning_rate": 0.0005902349930886357, "loss": 4.1168, "step": 5165 }, { "epoch": 0.09, "learning_rate": 0.0005902141639911666, "loss": 4.1654, "step": 5170 }, { "epoch": 0.09, "learning_rate": 0.0005901933130710797, "loss": 4.0838, "step": 5175 }, { "epoch": 0.09, "learning_rate": 0.000590172440329943, "loss": 4.0421, "step": 5180 }, { "epoch": 0.09, "learning_rate": 0.0005901515457693258, "loss": 4.1311, "step": 5185 }, { "epoch": 0.09, "learning_rate": 0.0005901306293907995, "loss": 4.0227, "step": 5190 }, { "epoch": 0.09, "learning_rate": 0.0005901096911959365, "loss": 4.2226, "step": 5195 }, { "epoch": 0.09, "learning_rate": 0.0005900887311863118, "loss": 4.1306, "step": 5200 }, { "epoch": 0.09, "learning_rate": 0.000590067749363501, "loss": 4.0604, "step": 5205 }, { "epoch": 0.09, "learning_rate": 0.0005900467457290821, "loss": 4.0125, "step": 5210 }, { "epoch": 0.09, "learning_rate": 0.0005900257202846344, "loss": 4.1222, "step": 5215 }, { "epoch": 0.09, "learning_rate": 0.0005900046730317388, "loss": 4.1159, "step": 5220 }, { "epoch": 0.09, "learning_rate": 0.0005899836039719782, "loss": 4.1893, "step": 5225 }, { "epoch": 0.09, "learning_rate": 0.0005899625131069367, "loss": 4.1073, "step": 5230 }, { "epoch": 0.09, "learning_rate": 0.0005899414004382001, "loss": 4.0826, "step": 5235 }, { "epoch": 0.09, "learning_rate": 0.0005899202659673563, "loss": 4.0734, "step": 5240 }, { "epoch": 0.09, "learning_rate": 0.0005898991096959941, "loss": 4.1146, "step": 5245 }, { "epoch": 0.09, "learning_rate": 0.0005898779316257047, "loss": 4.0675, "step": 5250 }, { "epoch": 0.09, "learning_rate": 0.0005898567317580804, "loss": 4.0328, "step": 5255 }, { "epoch": 0.09, "learning_rate": 0.0005898355100947154, "loss": 4.0624, "step": 5260 }, { "epoch": 0.09, "learning_rate": 0.0005898142666372055, "loss": 4.0561, "step": 5265 }, { "epoch": 0.09, "learning_rate": 0.0005897930013871478, "loss": 4.0266, "step": 5270 }, { "epoch": 0.09, "learning_rate": 0.0005897717143461418, "loss": 3.9118, "step": 5275 }, { "epoch": 0.09, "learning_rate": 0.0005897504055157878, "loss": 4.1576, "step": 5280 }, { "epoch": 0.09, "learning_rate": 0.0005897290748976883, "loss": 4.0451, "step": 5285 }, { "epoch": 0.09, "learning_rate": 0.0005897077224934471, "loss": 4.2013, "step": 5290 }, { "epoch": 0.09, "learning_rate": 0.00058968634830467, "loss": 4.1097, "step": 5295 }, { "epoch": 0.09, "learning_rate": 0.0005896649523329642, "loss": 4.1492, "step": 5300 }, { "epoch": 0.09, "learning_rate": 0.0005896435345799383, "loss": 4.0192, "step": 5305 }, { "epoch": 0.09, "learning_rate": 0.0005896220950472031, "loss": 4.1101, "step": 5310 }, { "epoch": 0.09, "learning_rate": 0.0005896006337363707, "loss": 4.1892, "step": 5315 }, { "epoch": 0.09, "learning_rate": 0.0005895791506490548, "loss": 4.1187, "step": 5320 }, { "epoch": 0.09, "learning_rate": 0.0005895576457868708, "loss": 4.0377, "step": 5325 }, { "epoch": 0.09, "learning_rate": 0.0005895361191514358, "loss": 4.0711, "step": 5330 }, { "epoch": 0.09, "learning_rate": 0.0005895145707443684, "loss": 4.0841, "step": 5335 }, { "epoch": 0.09, "learning_rate": 0.0005894930005672892, "loss": 4.0504, "step": 5340 }, { "epoch": 0.09, "learning_rate": 0.0005894714086218199, "loss": 4.1208, "step": 5345 }, { "epoch": 0.09, "learning_rate": 0.0005894497949095841, "loss": 4.1813, "step": 5350 }, { "epoch": 0.09, "learning_rate": 0.0005894281594322073, "loss": 4.1122, "step": 5355 }, { "epoch": 0.09, "learning_rate": 0.0005894065021913161, "loss": 4.0417, "step": 5360 }, { "epoch": 0.09, "learning_rate": 0.0005893848231885391, "loss": 4.0673, "step": 5365 }, { "epoch": 0.09, "learning_rate": 0.0005893631224255066, "loss": 3.9842, "step": 5370 }, { "epoch": 0.09, "learning_rate": 0.0005893413999038501, "loss": 4.0419, "step": 5375 }, { "epoch": 0.09, "learning_rate": 0.0005893196556252034, "loss": 4.0908, "step": 5380 }, { "epoch": 0.09, "learning_rate": 0.0005892978895912011, "loss": 4.1134, "step": 5385 }, { "epoch": 0.09, "learning_rate": 0.0005892761018034803, "loss": 4.0354, "step": 5390 }, { "epoch": 0.09, "learning_rate": 0.0005892542922636791, "loss": 4.1377, "step": 5395 }, { "epoch": 0.09, "learning_rate": 0.0005892324609734373, "loss": 4.1254, "step": 5400 }, { "epoch": 0.09, "learning_rate": 0.000589210607934397, "loss": 4.0462, "step": 5405 }, { "epoch": 0.09, "learning_rate": 0.000589188733148201, "loss": 4.1928, "step": 5410 }, { "epoch": 0.09, "learning_rate": 0.0005891668366164945, "loss": 4.109, "step": 5415 }, { "epoch": 0.09, "learning_rate": 0.0005891449183409238, "loss": 3.9687, "step": 5420 }, { "epoch": 0.09, "learning_rate": 0.0005891229783231371, "loss": 4.083, "step": 5425 }, { "epoch": 0.09, "learning_rate": 0.0005891010165647841, "loss": 4.0274, "step": 5430 }, { "epoch": 0.09, "learning_rate": 0.0005890790330675162, "loss": 4.0976, "step": 5435 }, { "epoch": 0.09, "learning_rate": 0.0005890570278329867, "loss": 4.0589, "step": 5440 }, { "epoch": 0.09, "learning_rate": 0.00058903500086285, "loss": 4.1165, "step": 5445 }, { "epoch": 0.09, "learning_rate": 0.0005890129521587625, "loss": 4.1122, "step": 5450 }, { "epoch": 0.09, "learning_rate": 0.0005889908817223822, "loss": 4.094, "step": 5455 }, { "epoch": 0.09, "learning_rate": 0.0005889687895553687, "loss": 4.0328, "step": 5460 }, { "epoch": 0.09, "learning_rate": 0.0005889466756593831, "loss": 4.0535, "step": 5465 }, { "epoch": 0.09, "learning_rate": 0.0005889245400360883, "loss": 4.1173, "step": 5470 }, { "epoch": 0.09, "learning_rate": 0.0005889023826871489, "loss": 4.1297, "step": 5475 }, { "epoch": 0.09, "learning_rate": 0.0005888802036142308, "loss": 4.0227, "step": 5480 }, { "epoch": 0.09, "learning_rate": 0.0005888580028190019, "loss": 4.1865, "step": 5485 }, { "epoch": 0.1, "learning_rate": 0.0005888357803031317, "loss": 4.0646, "step": 5490 }, { "epoch": 0.1, "learning_rate": 0.000588813536068291, "loss": 4.0189, "step": 5495 }, { "epoch": 0.1, "learning_rate": 0.0005887912701161524, "loss": 4.0756, "step": 5500 }, { "epoch": 0.1, "eval_loss": 4.124629974365234, "eval_runtime": 150.3809, "eval_samples_per_second": 12.242, "eval_steps_per_second": 0.771, "step": 5500 }, { "epoch": 0.1, "learning_rate": 0.0005887689824483905, "loss": 4.0788, "step": 5505 }, { "epoch": 0.1, "learning_rate": 0.000588746673066681, "loss": 4.0131, "step": 5510 }, { "epoch": 0.1, "learning_rate": 0.0005887243419727013, "loss": 4.0943, "step": 5515 }, { "epoch": 0.1, "learning_rate": 0.000588701989168131, "loss": 4.1031, "step": 5520 }, { "epoch": 0.1, "learning_rate": 0.0005886796146546507, "loss": 4.0249, "step": 5525 }, { "epoch": 0.1, "learning_rate": 0.0005886572184339427, "loss": 4.1008, "step": 5530 }, { "epoch": 0.1, "learning_rate": 0.0005886348005076913, "loss": 4.0013, "step": 5535 }, { "epoch": 0.1, "learning_rate": 0.0005886123608775821, "loss": 4.0819, "step": 5540 }, { "epoch": 0.1, "learning_rate": 0.0005885898995453024, "loss": 3.9572, "step": 5545 }, { "epoch": 0.1, "learning_rate": 0.0005885674165125415, "loss": 4.0845, "step": 5550 }, { "epoch": 0.1, "learning_rate": 0.0005885449117809896, "loss": 4.0497, "step": 5555 }, { "epoch": 0.1, "learning_rate": 0.000588522385352339, "loss": 4.1316, "step": 5560 }, { "epoch": 0.1, "learning_rate": 0.0005884998372282838, "loss": 4.0639, "step": 5565 }, { "epoch": 0.1, "learning_rate": 0.0005884772674105193, "loss": 4.1424, "step": 5570 }, { "epoch": 0.1, "learning_rate": 0.0005884546759007428, "loss": 4.1344, "step": 5575 }, { "epoch": 0.1, "learning_rate": 0.000588432062700653, "loss": 4.1834, "step": 5580 }, { "epoch": 0.1, "learning_rate": 0.0005884094278119502, "loss": 4.0697, "step": 5585 }, { "epoch": 0.1, "learning_rate": 0.0005883867712363365, "loss": 4.1778, "step": 5590 }, { "epoch": 0.1, "learning_rate": 0.0005883640929755156, "loss": 3.9826, "step": 5595 }, { "epoch": 0.1, "learning_rate": 0.0005883413930311928, "loss": 4.1734, "step": 5600 }, { "epoch": 0.1, "learning_rate": 0.0005883186714050749, "loss": 3.9475, "step": 5605 }, { "epoch": 0.1, "learning_rate": 0.0005882959280988705, "loss": 4.0677, "step": 5610 }, { "epoch": 0.1, "learning_rate": 0.0005882731631142898, "loss": 3.981, "step": 5615 }, { "epoch": 0.1, "learning_rate": 0.0005882503764530447, "loss": 4.1465, "step": 5620 }, { "epoch": 0.1, "learning_rate": 0.0005882275681168485, "loss": 4.1284, "step": 5625 }, { "epoch": 0.1, "learning_rate": 0.0005882047381074163, "loss": 4.128, "step": 5630 }, { "epoch": 0.1, "learning_rate": 0.0005881818864264648, "loss": 3.9485, "step": 5635 }, { "epoch": 0.1, "learning_rate": 0.0005881590130757124, "loss": 4.0819, "step": 5640 }, { "epoch": 0.1, "learning_rate": 0.000588136118056879, "loss": 4.0794, "step": 5645 }, { "epoch": 0.1, "learning_rate": 0.0005881132013716862, "loss": 4.0057, "step": 5650 }, { "epoch": 0.1, "learning_rate": 0.0005880902630218571, "loss": 4.126, "step": 5655 }, { "epoch": 0.1, "learning_rate": 0.0005880673030091169, "loss": 4.1839, "step": 5660 }, { "epoch": 0.1, "learning_rate": 0.0005880443213351917, "loss": 4.0048, "step": 5665 }, { "epoch": 0.1, "learning_rate": 0.0005880213180018097, "loss": 3.9995, "step": 5670 }, { "epoch": 0.1, "learning_rate": 0.0005879982930107007, "loss": 4.047, "step": 5675 }, { "epoch": 0.1, "learning_rate": 0.000587975246363596, "loss": 4.0557, "step": 5680 }, { "epoch": 0.1, "learning_rate": 0.0005879521780622287, "loss": 3.9903, "step": 5685 }, { "epoch": 0.1, "learning_rate": 0.0005879290881083333, "loss": 4.0698, "step": 5690 }, { "epoch": 0.1, "learning_rate": 0.0005879059765036461, "loss": 4.0808, "step": 5695 }, { "epoch": 0.1, "learning_rate": 0.000587882843249905, "loss": 3.9814, "step": 5700 }, { "epoch": 0.1, "learning_rate": 0.0005878596883488493, "loss": 4.0462, "step": 5705 }, { "epoch": 0.1, "learning_rate": 0.0005878365118022205, "loss": 4.057, "step": 5710 }, { "epoch": 0.1, "learning_rate": 0.000587813313611761, "loss": 4.0648, "step": 5715 }, { "epoch": 0.1, "learning_rate": 0.0005877900937792154, "loss": 4.0972, "step": 5720 }, { "epoch": 0.1, "learning_rate": 0.0005877668523063297, "loss": 4.1129, "step": 5725 }, { "epoch": 0.1, "learning_rate": 0.0005877435891948514, "loss": 4.055, "step": 5730 }, { "epoch": 0.1, "learning_rate": 0.0005877203044465299, "loss": 4.0274, "step": 5735 }, { "epoch": 0.1, "learning_rate": 0.0005876969980631161, "loss": 4.1068, "step": 5740 }, { "epoch": 0.1, "learning_rate": 0.0005876736700463623, "loss": 4.0272, "step": 5745 }, { "epoch": 0.1, "learning_rate": 0.000587650320398023, "loss": 4.0066, "step": 5750 }, { "epoch": 0.1, "learning_rate": 0.0005876269491198536, "loss": 4.049, "step": 5755 }, { "epoch": 0.1, "learning_rate": 0.0005876035562136119, "loss": 3.9857, "step": 5760 }, { "epoch": 0.1, "learning_rate": 0.0005875801416810566, "loss": 3.9977, "step": 5765 }, { "epoch": 0.1, "learning_rate": 0.0005875567055239485, "loss": 3.978, "step": 5770 }, { "epoch": 0.1, "learning_rate": 0.0005875332477440499, "loss": 3.966, "step": 5775 }, { "epoch": 0.1, "learning_rate": 0.0005875097683431245, "loss": 3.9962, "step": 5780 }, { "epoch": 0.1, "learning_rate": 0.0005874862673229381, "loss": 4.0646, "step": 5785 }, { "epoch": 0.1, "learning_rate": 0.0005874627446852578, "loss": 4.0243, "step": 5790 }, { "epoch": 0.1, "learning_rate": 0.0005874392004318521, "loss": 3.9927, "step": 5795 }, { "epoch": 0.1, "learning_rate": 0.0005874156345644919, "loss": 4.0585, "step": 5800 }, { "epoch": 0.1, "learning_rate": 0.0005873920470849488, "loss": 4.0704, "step": 5805 }, { "epoch": 0.1, "learning_rate": 0.0005873684379949967, "loss": 4.1157, "step": 5810 }, { "epoch": 0.1, "learning_rate": 0.0005873448072964108, "loss": 4.0976, "step": 5815 }, { "epoch": 0.1, "learning_rate": 0.0005873211549909679, "loss": 4.0073, "step": 5820 }, { "epoch": 0.1, "learning_rate": 0.0005872974810804467, "loss": 3.999, "step": 5825 }, { "epoch": 0.1, "learning_rate": 0.0005872737855666273, "loss": 4.0376, "step": 5830 }, { "epoch": 0.1, "learning_rate": 0.0005872500684512915, "loss": 4.0754, "step": 5835 }, { "epoch": 0.1, "learning_rate": 0.0005872263297362227, "loss": 4.0787, "step": 5840 }, { "epoch": 0.1, "learning_rate": 0.000587202569423206, "loss": 4.0246, "step": 5845 }, { "epoch": 0.1, "learning_rate": 0.0005871787875140278, "loss": 4.0841, "step": 5850 }, { "epoch": 0.1, "learning_rate": 0.0005871549840104766, "loss": 4.0973, "step": 5855 }, { "epoch": 0.1, "learning_rate": 0.0005871311589143424, "loss": 4.1085, "step": 5860 }, { "epoch": 0.1, "learning_rate": 0.0005871073122274162, "loss": 4.0267, "step": 5865 }, { "epoch": 0.1, "learning_rate": 0.0005870834439514919, "loss": 4.0323, "step": 5870 }, { "epoch": 0.1, "learning_rate": 0.0005870595540883638, "loss": 4.0608, "step": 5875 }, { "epoch": 0.1, "learning_rate": 0.0005870356426398283, "loss": 4.0103, "step": 5880 }, { "epoch": 0.1, "learning_rate": 0.0005870117096076835, "loss": 3.9965, "step": 5885 }, { "epoch": 0.1, "learning_rate": 0.0005869877549937291, "loss": 4.087, "step": 5890 }, { "epoch": 0.1, "learning_rate": 0.0005869637787997663, "loss": 4.0467, "step": 5895 }, { "epoch": 0.1, "learning_rate": 0.000586939781027598, "loss": 4.0311, "step": 5900 }, { "epoch": 0.1, "learning_rate": 0.0005869157616790287, "loss": 4.0099, "step": 5905 }, { "epoch": 0.1, "learning_rate": 0.0005868917207558646, "loss": 4.0817, "step": 5910 }, { "epoch": 0.1, "learning_rate": 0.0005868676582599134, "loss": 3.966, "step": 5915 }, { "epoch": 0.1, "learning_rate": 0.0005868435741929845, "loss": 4.0285, "step": 5920 }, { "epoch": 0.1, "learning_rate": 0.0005868194685568886, "loss": 3.9462, "step": 5925 }, { "epoch": 0.1, "learning_rate": 0.0005867953413534389, "loss": 4.0755, "step": 5930 }, { "epoch": 0.1, "learning_rate": 0.0005867711925844492, "loss": 3.9712, "step": 5935 }, { "epoch": 0.1, "learning_rate": 0.0005867470222517355, "loss": 3.9844, "step": 5940 }, { "epoch": 0.1, "learning_rate": 0.0005867228303571153, "loss": 3.9981, "step": 5945 }, { "epoch": 0.1, "learning_rate": 0.0005866986169024077, "loss": 4.0011, "step": 5950 }, { "epoch": 0.1, "learning_rate": 0.0005866743818894334, "loss": 3.9929, "step": 5955 }, { "epoch": 0.1, "learning_rate": 0.0005866501253200148, "loss": 4.0131, "step": 5960 }, { "epoch": 0.1, "learning_rate": 0.0005866258471959758, "loss": 4.0858, "step": 5965 }, { "epoch": 0.1, "learning_rate": 0.0005866015475191421, "loss": 4.0778, "step": 5970 }, { "epoch": 0.1, "learning_rate": 0.0005865772262913407, "loss": 4.0723, "step": 5975 }, { "epoch": 0.1, "learning_rate": 0.0005865528835144007, "loss": 4.0553, "step": 5980 }, { "epoch": 0.1, "learning_rate": 0.0005865285191901524, "loss": 4.0564, "step": 5985 }, { "epoch": 0.1, "learning_rate": 0.0005865041333204278, "loss": 3.9492, "step": 5990 }, { "epoch": 0.1, "learning_rate": 0.0005864797259070607, "loss": 3.9973, "step": 5995 }, { "epoch": 0.1, "learning_rate": 0.0005864552969518865, "loss": 4.0681, "step": 6000 }, { "epoch": 0.1, "eval_loss": 4.073433876037598, "eval_runtime": 149.973, "eval_samples_per_second": 12.276, "eval_steps_per_second": 0.773, "step": 6000 }, { "epoch": 0.1, "learning_rate": 0.0005864308464567419, "loss": 4.0337, "step": 6005 }, { "epoch": 0.1, "learning_rate": 0.0005864063744234657, "loss": 4.0838, "step": 6010 }, { "epoch": 0.1, "learning_rate": 0.0005863818808538978, "loss": 4.003, "step": 6015 }, { "epoch": 0.1, "learning_rate": 0.0005863573657498803, "loss": 4.0435, "step": 6020 }, { "epoch": 0.1, "learning_rate": 0.0005863328291132564, "loss": 4.0459, "step": 6025 }, { "epoch": 0.1, "learning_rate": 0.0005863082709458711, "loss": 4.0842, "step": 6030 }, { "epoch": 0.1, "learning_rate": 0.0005862836912495713, "loss": 4.0807, "step": 6035 }, { "epoch": 0.1, "learning_rate": 0.0005862590900262049, "loss": 4.1169, "step": 6040 }, { "epoch": 0.1, "learning_rate": 0.0005862344672776221, "loss": 4.1037, "step": 6045 }, { "epoch": 0.1, "learning_rate": 0.0005862098230056743, "loss": 3.976, "step": 6050 }, { "epoch": 0.1, "learning_rate": 0.0005861851572122146, "loss": 4.0173, "step": 6055 }, { "epoch": 0.1, "learning_rate": 0.0005861604698990978, "loss": 4.0725, "step": 6060 }, { "epoch": 0.1, "learning_rate": 0.0005861357610681802, "loss": 4.0116, "step": 6065 }, { "epoch": 0.11, "learning_rate": 0.0005861110307213197, "loss": 4.0785, "step": 6070 }, { "epoch": 0.11, "learning_rate": 0.0005860862788603761, "loss": 4.0062, "step": 6075 }, { "epoch": 0.11, "learning_rate": 0.0005860615054872104, "loss": 4.0221, "step": 6080 }, { "epoch": 0.11, "learning_rate": 0.0005860367106036854, "loss": 4.0759, "step": 6085 }, { "epoch": 0.11, "learning_rate": 0.000586011894211666, "loss": 4.0736, "step": 6090 }, { "epoch": 0.11, "learning_rate": 0.0005859870563130177, "loss": 4.1106, "step": 6095 }, { "epoch": 0.11, "learning_rate": 0.0005859621969096085, "loss": 3.976, "step": 6100 }, { "epoch": 0.11, "learning_rate": 0.0005859373160033076, "loss": 3.9863, "step": 6105 }, { "epoch": 0.11, "learning_rate": 0.0005859124135959861, "loss": 4.0505, "step": 6110 }, { "epoch": 0.11, "learning_rate": 0.0005858874896895163, "loss": 4.0183, "step": 6115 }, { "epoch": 0.11, "learning_rate": 0.0005858625442857722, "loss": 3.9544, "step": 6120 }, { "epoch": 0.11, "learning_rate": 0.00058583757738663, "loss": 4.0514, "step": 6125 }, { "epoch": 0.11, "learning_rate": 0.0005858125889939669, "loss": 3.9714, "step": 6130 }, { "epoch": 0.11, "learning_rate": 0.0005857875791096617, "loss": 4.0567, "step": 6135 }, { "epoch": 0.11, "learning_rate": 0.0005857625477355954, "loss": 4.0103, "step": 6140 }, { "epoch": 0.11, "learning_rate": 0.00058573749487365, "loss": 3.9502, "step": 6145 }, { "epoch": 0.11, "learning_rate": 0.0005857124205257091, "loss": 4.0495, "step": 6150 }, { "epoch": 0.11, "learning_rate": 0.0005856873246936588, "loss": 4.1271, "step": 6155 }, { "epoch": 0.11, "learning_rate": 0.0005856622073793856, "loss": 4.0907, "step": 6160 }, { "epoch": 0.11, "learning_rate": 0.0005856370685847784, "loss": 4.0462, "step": 6165 }, { "epoch": 0.11, "learning_rate": 0.0005856119083117276, "loss": 4.026, "step": 6170 }, { "epoch": 0.11, "learning_rate": 0.0005855867265621249, "loss": 4.027, "step": 6175 }, { "epoch": 0.11, "learning_rate": 0.0005855615233378641, "loss": 3.9725, "step": 6180 }, { "epoch": 0.11, "learning_rate": 0.0005855362986408401, "loss": 4.0381, "step": 6185 }, { "epoch": 0.11, "learning_rate": 0.0005855110524729499, "loss": 4.0856, "step": 6190 }, { "epoch": 0.11, "learning_rate": 0.0005854857848360917, "loss": 4.059, "step": 6195 }, { "epoch": 0.11, "learning_rate": 0.0005854604957321657, "loss": 4.0826, "step": 6200 }, { "epoch": 0.11, "learning_rate": 0.0005854351851630732, "loss": 4.0564, "step": 6205 }, { "epoch": 0.11, "learning_rate": 0.0005854098531307177, "loss": 3.918, "step": 6210 }, { "epoch": 0.11, "learning_rate": 0.000585384499637004, "loss": 4.0235, "step": 6215 }, { "epoch": 0.11, "learning_rate": 0.0005853591246838384, "loss": 4.0396, "step": 6220 }, { "epoch": 0.11, "learning_rate": 0.0005853337282731291, "loss": 4.0262, "step": 6225 }, { "epoch": 0.11, "learning_rate": 0.0005853083104067858, "loss": 4.0257, "step": 6230 }, { "epoch": 0.11, "learning_rate": 0.0005852828710867197, "loss": 3.9727, "step": 6235 }, { "epoch": 0.11, "learning_rate": 0.0005852574103148437, "loss": 4.0742, "step": 6240 }, { "epoch": 0.11, "learning_rate": 0.0005852319280930724, "loss": 4.0149, "step": 6245 }, { "epoch": 0.11, "learning_rate": 0.000585206424423322, "loss": 4.0074, "step": 6250 }, { "epoch": 0.11, "learning_rate": 0.00058518089930751, "loss": 4.0104, "step": 6255 }, { "epoch": 0.11, "learning_rate": 0.000585155352747556, "loss": 4.0947, "step": 6260 }, { "epoch": 0.11, "learning_rate": 0.0005851297847453808, "loss": 4.0355, "step": 6265 }, { "epoch": 0.11, "learning_rate": 0.000585104195302907, "loss": 4.1322, "step": 6270 }, { "epoch": 0.11, "learning_rate": 0.000585078584422059, "loss": 4.2945, "step": 6275 }, { "epoch": 0.11, "learning_rate": 0.0005850529521047623, "loss": 4.0278, "step": 6280 }, { "epoch": 0.11, "learning_rate": 0.0005850272983529446, "loss": 4.0641, "step": 6285 }, { "epoch": 0.11, "learning_rate": 0.0005850016231685348, "loss": 4.0774, "step": 6290 }, { "epoch": 0.11, "learning_rate": 0.0005849759265534635, "loss": 4.0438, "step": 6295 }, { "epoch": 0.11, "learning_rate": 0.000584950208509663, "loss": 4.0762, "step": 6300 }, { "epoch": 0.11, "learning_rate": 0.000584924469039067, "loss": 3.9799, "step": 6305 }, { "epoch": 0.11, "learning_rate": 0.0005848987081436115, "loss": 4.0686, "step": 6310 }, { "epoch": 0.11, "learning_rate": 0.0005848729258252329, "loss": 3.993, "step": 6315 }, { "epoch": 0.11, "learning_rate": 0.0005848471220858705, "loss": 3.9435, "step": 6320 }, { "epoch": 0.11, "learning_rate": 0.0005848212969274643, "loss": 4.056, "step": 6325 }, { "epoch": 0.11, "learning_rate": 0.0005847954503519561, "loss": 4.0116, "step": 6330 }, { "epoch": 0.11, "learning_rate": 0.0005847695823612897, "loss": 3.9138, "step": 6335 }, { "epoch": 0.11, "learning_rate": 0.0005847436929574101, "loss": 4.0694, "step": 6340 }, { "epoch": 0.11, "learning_rate": 0.000584717782142264, "loss": 4.1302, "step": 6345 }, { "epoch": 0.11, "learning_rate": 0.0005846918499177998, "loss": 4.1207, "step": 6350 }, { "epoch": 0.11, "learning_rate": 0.0005846658962859677, "loss": 4.0034, "step": 6355 }, { "epoch": 0.11, "learning_rate": 0.000584639921248719, "loss": 3.8664, "step": 6360 }, { "epoch": 0.11, "learning_rate": 0.000584613924808007, "loss": 3.914, "step": 6365 }, { "epoch": 0.11, "learning_rate": 0.0005845879069657865, "loss": 3.9682, "step": 6370 }, { "epoch": 0.11, "learning_rate": 0.0005845618677240138, "loss": 3.9832, "step": 6375 }, { "epoch": 0.11, "learning_rate": 0.000584535807084647, "loss": 3.9809, "step": 6380 }, { "epoch": 0.11, "learning_rate": 0.0005845097250496457, "loss": 3.9714, "step": 6385 }, { "epoch": 0.11, "learning_rate": 0.0005844836216209714, "loss": 3.992, "step": 6390 }, { "epoch": 0.11, "learning_rate": 0.0005844574968005865, "loss": 3.9963, "step": 6395 }, { "epoch": 0.11, "learning_rate": 0.0005844313505904557, "loss": 4.062, "step": 6400 }, { "epoch": 0.11, "learning_rate": 0.0005844051829925451, "loss": 3.9474, "step": 6405 }, { "epoch": 0.11, "learning_rate": 0.0005843789940088223, "loss": 3.9946, "step": 6410 }, { "epoch": 0.11, "learning_rate": 0.0005843527836412565, "loss": 4.0315, "step": 6415 }, { "epoch": 0.11, "learning_rate": 0.0005843265518918185, "loss": 4.0435, "step": 6420 }, { "epoch": 0.11, "learning_rate": 0.0005843002987624812, "loss": 4.0449, "step": 6425 }, { "epoch": 0.11, "learning_rate": 0.0005842740242552182, "loss": 3.9634, "step": 6430 }, { "epoch": 0.11, "learning_rate": 0.0005842477283720058, "loss": 4.1614, "step": 6435 }, { "epoch": 0.11, "learning_rate": 0.0005842214111148207, "loss": 4.0237, "step": 6440 }, { "epoch": 0.11, "learning_rate": 0.0005841950724856422, "loss": 4.0143, "step": 6445 }, { "epoch": 0.11, "learning_rate": 0.0005841687124864506, "loss": 4.0254, "step": 6450 }, { "epoch": 0.11, "learning_rate": 0.0005841423311192283, "loss": 3.9378, "step": 6455 }, { "epoch": 0.11, "learning_rate": 0.000584115928385959, "loss": 4.0162, "step": 6460 }, { "epoch": 0.11, "learning_rate": 0.0005840895042886278, "loss": 4.0237, "step": 6465 }, { "epoch": 0.11, "learning_rate": 0.0005840630588292218, "loss": 3.951, "step": 6470 }, { "epoch": 0.11, "learning_rate": 0.0005840365920097296, "loss": 3.9824, "step": 6475 }, { "epoch": 0.11, "learning_rate": 0.0005840101038321414, "loss": 3.8698, "step": 6480 }, { "epoch": 0.11, "learning_rate": 0.0005839835942984488, "loss": 3.9789, "step": 6485 }, { "epoch": 0.11, "learning_rate": 0.0005839570634106455, "loss": 3.9527, "step": 6490 }, { "epoch": 0.11, "learning_rate": 0.0005839305111707262, "loss": 4.0282, "step": 6495 }, { "epoch": 0.11, "learning_rate": 0.0005839039375806875, "loss": 4.0262, "step": 6500 }, { "epoch": 0.11, "eval_loss": 4.031991481781006, "eval_runtime": 150.4726, "eval_samples_per_second": 12.235, "eval_steps_per_second": 0.771, "step": 6500 }, { "epoch": 0.11, "learning_rate": 0.0005838773426425277, "loss": 3.972, "step": 6505 }, { "epoch": 0.11, "learning_rate": 0.0005838507263582467, "loss": 3.908, "step": 6510 }, { "epoch": 0.11, "learning_rate": 0.0005838240887298457, "loss": 3.945, "step": 6515 }, { "epoch": 0.11, "learning_rate": 0.0005837974297593278, "loss": 3.9304, "step": 6520 }, { "epoch": 0.11, "learning_rate": 0.0005837707494486976, "loss": 4.0567, "step": 6525 }, { "epoch": 0.11, "learning_rate": 0.0005837440477999613, "loss": 4.0024, "step": 6530 }, { "epoch": 0.11, "learning_rate": 0.0005837173248151269, "loss": 4.0126, "step": 6535 }, { "epoch": 0.11, "learning_rate": 0.0005836905804962036, "loss": 3.9493, "step": 6540 }, { "epoch": 0.11, "learning_rate": 0.0005836638148452027, "loss": 4.0822, "step": 6545 }, { "epoch": 0.11, "learning_rate": 0.0005836370278641365, "loss": 3.9565, "step": 6550 }, { "epoch": 0.11, "learning_rate": 0.0005836102195550195, "loss": 4.0498, "step": 6555 }, { "epoch": 0.11, "learning_rate": 0.0005835833899198675, "loss": 4.138, "step": 6560 }, { "epoch": 0.11, "learning_rate": 0.0005835565389606978, "loss": 4.028, "step": 6565 }, { "epoch": 0.11, "learning_rate": 0.0005835296666795297, "loss": 3.8704, "step": 6570 }, { "epoch": 0.11, "learning_rate": 0.0005835027730783836, "loss": 4.0168, "step": 6575 }, { "epoch": 0.11, "learning_rate": 0.000583475858159282, "loss": 3.9846, "step": 6580 }, { "epoch": 0.11, "learning_rate": 0.0005834489219242486, "loss": 3.9255, "step": 6585 }, { "epoch": 0.11, "learning_rate": 0.000583421964375309, "loss": 3.9709, "step": 6590 }, { "epoch": 0.11, "learning_rate": 0.0005833949855144901, "loss": 3.971, "step": 6595 }, { "epoch": 0.11, "learning_rate": 0.0005833679853438208, "loss": 4.0187, "step": 6600 }, { "epoch": 0.11, "learning_rate": 0.0005833409638653311, "loss": 4.0978, "step": 6605 }, { "epoch": 0.11, "learning_rate": 0.000583313921081053, "loss": 4.0233, "step": 6610 }, { "epoch": 0.11, "learning_rate": 0.0005832868569930201, "loss": 3.9346, "step": 6615 }, { "epoch": 0.11, "learning_rate": 0.0005832597716032673, "loss": 3.9806, "step": 6620 }, { "epoch": 0.11, "learning_rate": 0.0005832326649138315, "loss": 3.9501, "step": 6625 }, { "epoch": 0.11, "learning_rate": 0.0005832055369267507, "loss": 3.8945, "step": 6630 }, { "epoch": 0.11, "learning_rate": 0.000583178387644065, "loss": 4.0411, "step": 6635 }, { "epoch": 0.11, "learning_rate": 0.0005831512170678157, "loss": 4.033, "step": 6640 }, { "epoch": 0.11, "learning_rate": 0.0005831240252000461, "loss": 3.9629, "step": 6645 }, { "epoch": 0.12, "learning_rate": 0.0005830968120428008, "loss": 3.9754, "step": 6650 }, { "epoch": 0.12, "learning_rate": 0.0005830695775981261, "loss": 3.961, "step": 6655 }, { "epoch": 0.12, "learning_rate": 0.0005830423218680698, "loss": 3.8399, "step": 6660 }, { "epoch": 0.12, "learning_rate": 0.0005830150448546816, "loss": 3.8657, "step": 6665 }, { "epoch": 0.12, "learning_rate": 0.0005829877465600122, "loss": 3.9245, "step": 6670 }, { "epoch": 0.12, "learning_rate": 0.0005829604269861149, "loss": 4.0341, "step": 6675 }, { "epoch": 0.12, "learning_rate": 0.0005829330861350433, "loss": 3.9792, "step": 6680 }, { "epoch": 0.12, "learning_rate": 0.0005829057240088538, "loss": 4.0263, "step": 6685 }, { "epoch": 0.12, "learning_rate": 0.0005828783406096037, "loss": 3.9451, "step": 6690 }, { "epoch": 0.12, "learning_rate": 0.0005828509359393522, "loss": 3.9552, "step": 6695 }, { "epoch": 0.12, "learning_rate": 0.0005828235100001597, "loss": 4.0922, "step": 6700 }, { "epoch": 0.12, "learning_rate": 0.0005827960627940888, "loss": 3.9879, "step": 6705 }, { "epoch": 0.12, "learning_rate": 0.0005827685943232034, "loss": 4.0245, "step": 6710 }, { "epoch": 0.12, "learning_rate": 0.0005827411045895687, "loss": 4.0227, "step": 6715 }, { "epoch": 0.12, "learning_rate": 0.0005827135935952521, "loss": 3.8635, "step": 6720 }, { "epoch": 0.12, "learning_rate": 0.0005826860613423221, "loss": 3.9943, "step": 6725 }, { "epoch": 0.12, "learning_rate": 0.0005826585078328489, "loss": 4.047, "step": 6730 }, { "epoch": 0.12, "learning_rate": 0.0005826309330689047, "loss": 3.9673, "step": 6735 }, { "epoch": 0.12, "learning_rate": 0.0005826033370525627, "loss": 3.9391, "step": 6740 }, { "epoch": 0.12, "learning_rate": 0.0005825757197858981, "loss": 4.0063, "step": 6745 }, { "epoch": 0.12, "learning_rate": 0.0005825480812709876, "loss": 3.9123, "step": 6750 }, { "epoch": 0.12, "learning_rate": 0.0005825204215099093, "loss": 4.0236, "step": 6755 }, { "epoch": 0.12, "learning_rate": 0.0005824927405047432, "loss": 3.9006, "step": 6760 }, { "epoch": 0.12, "learning_rate": 0.0005824650382575709, "loss": 3.9333, "step": 6765 }, { "epoch": 0.12, "learning_rate": 0.0005824373147704753, "loss": 3.9388, "step": 6770 }, { "epoch": 0.12, "learning_rate": 0.000582409570045541, "loss": 3.9877, "step": 6775 }, { "epoch": 0.12, "learning_rate": 0.0005823818040848545, "loss": 4.0653, "step": 6780 }, { "epoch": 0.12, "learning_rate": 0.0005823540168905034, "loss": 3.9874, "step": 6785 }, { "epoch": 0.12, "learning_rate": 0.0005823262084645773, "loss": 4.0155, "step": 6790 }, { "epoch": 0.12, "learning_rate": 0.0005822983788091672, "loss": 3.9023, "step": 6795 }, { "epoch": 0.12, "learning_rate": 0.0005822705279263658, "loss": 4.119, "step": 6800 }, { "epoch": 0.12, "learning_rate": 0.0005822426558182674, "loss": 3.99, "step": 6805 }, { "epoch": 0.12, "learning_rate": 0.0005822147624869676, "loss": 4.0494, "step": 6810 }, { "epoch": 0.12, "learning_rate": 0.000582186847934564, "loss": 3.9168, "step": 6815 }, { "epoch": 0.12, "learning_rate": 0.0005821589121631557, "loss": 3.8806, "step": 6820 }, { "epoch": 0.12, "learning_rate": 0.0005821309551748432, "loss": 3.7961, "step": 6825 }, { "epoch": 0.12, "learning_rate": 0.0005821029769717288, "loss": 3.7826, "step": 6830 }, { "epoch": 0.12, "learning_rate": 0.0005820749775559163, "loss": 3.9815, "step": 6835 }, { "epoch": 0.12, "learning_rate": 0.000582046956929511, "loss": 3.9875, "step": 6840 }, { "epoch": 0.12, "learning_rate": 0.0005820189150946201, "loss": 4.0475, "step": 6845 }, { "epoch": 0.12, "learning_rate": 0.0005819908520533521, "loss": 3.9757, "step": 6850 }, { "epoch": 0.12, "learning_rate": 0.0005819627678078172, "loss": 4.0318, "step": 6855 }, { "epoch": 0.12, "learning_rate": 0.0005819346623601271, "loss": 3.994, "step": 6860 }, { "epoch": 0.12, "learning_rate": 0.0005819065357123955, "loss": 4.0183, "step": 6865 }, { "epoch": 0.12, "learning_rate": 0.0005818783878667371, "loss": 3.8478, "step": 6870 }, { "epoch": 0.12, "learning_rate": 0.0005818502188252684, "loss": 3.9599, "step": 6875 }, { "epoch": 0.12, "learning_rate": 0.0005818220285901078, "loss": 3.9233, "step": 6880 }, { "epoch": 0.12, "learning_rate": 0.0005817938171633749, "loss": 3.9714, "step": 6885 }, { "epoch": 0.12, "learning_rate": 0.0005817655845471912, "loss": 4.0335, "step": 6890 }, { "epoch": 0.12, "learning_rate": 0.0005817373307436795, "loss": 3.9459, "step": 6895 }, { "epoch": 0.12, "learning_rate": 0.0005817090557549644, "loss": 3.8784, "step": 6900 }, { "epoch": 0.12, "learning_rate": 0.0005816807595831721, "loss": 3.9412, "step": 6905 }, { "epoch": 0.12, "learning_rate": 0.0005816524422304303, "loss": 3.8951, "step": 6910 }, { "epoch": 0.12, "learning_rate": 0.0005816241036988683, "loss": 3.9854, "step": 6915 }, { "epoch": 0.12, "learning_rate": 0.0005815957439906169, "loss": 3.8417, "step": 6920 }, { "epoch": 0.12, "learning_rate": 0.0005815673631078088, "loss": 3.9827, "step": 6925 }, { "epoch": 0.12, "learning_rate": 0.0005815389610525778, "loss": 4.0293, "step": 6930 }, { "epoch": 0.12, "learning_rate": 0.00058151053782706, "loss": 3.9579, "step": 6935 }, { "epoch": 0.12, "learning_rate": 0.0005814820934333925, "loss": 4.0445, "step": 6940 }, { "epoch": 0.12, "learning_rate": 0.0005814536278737141, "loss": 3.9389, "step": 6945 }, { "epoch": 0.12, "learning_rate": 0.0005814251411501652, "loss": 3.947, "step": 6950 }, { "epoch": 0.12, "learning_rate": 0.0005813966332648881, "loss": 3.9215, "step": 6955 }, { "epoch": 0.12, "learning_rate": 0.0005813681042200262, "loss": 3.8809, "step": 6960 }, { "epoch": 0.12, "learning_rate": 0.0005813395540177249, "loss": 3.9399, "step": 6965 }, { "epoch": 0.12, "learning_rate": 0.000581310982660131, "loss": 3.906, "step": 6970 }, { "epoch": 0.12, "learning_rate": 0.000581282390149393, "loss": 3.9693, "step": 6975 }, { "epoch": 0.12, "learning_rate": 0.0005812537764876606, "loss": 3.9784, "step": 6980 }, { "epoch": 0.12, "learning_rate": 0.0005812251416770857, "loss": 4.0167, "step": 6985 }, { "epoch": 0.12, "learning_rate": 0.0005811964857198214, "loss": 4.0349, "step": 6990 }, { "epoch": 0.12, "learning_rate": 0.0005811678086180225, "loss": 3.9012, "step": 6995 }, { "epoch": 0.12, "learning_rate": 0.0005811391103738454, "loss": 4.0148, "step": 7000 }, { "epoch": 0.12, "eval_loss": 3.98817777633667, "eval_runtime": 150.0793, "eval_samples_per_second": 12.267, "eval_steps_per_second": 0.773, "step": 7000 }, { "epoch": 0.12, "learning_rate": 0.000581110390989448, "loss": 3.9029, "step": 7005 }, { "epoch": 0.12, "learning_rate": 0.0005810816504669897, "loss": 4.0553, "step": 7010 }, { "epoch": 0.12, "learning_rate": 0.000581052888808632, "loss": 4.097, "step": 7015 }, { "epoch": 0.12, "learning_rate": 0.0005810241060165374, "loss": 3.9799, "step": 7020 }, { "epoch": 0.12, "learning_rate": 0.0005809953020928703, "loss": 3.9113, "step": 7025 }, { "epoch": 0.12, "learning_rate": 0.0005809664770397964, "loss": 3.9452, "step": 7030 }, { "epoch": 0.12, "learning_rate": 0.0005809376308594835, "loss": 3.9655, "step": 7035 }, { "epoch": 0.12, "learning_rate": 0.0005809087635541005, "loss": 3.8256, "step": 7040 }, { "epoch": 0.12, "learning_rate": 0.000580879875125818, "loss": 3.9917, "step": 7045 }, { "epoch": 0.12, "learning_rate": 0.0005808509655768086, "loss": 3.9427, "step": 7050 }, { "epoch": 0.12, "learning_rate": 0.0005808220349092458, "loss": 4.0305, "step": 7055 }, { "epoch": 0.12, "learning_rate": 0.0005807930831253052, "loss": 3.9139, "step": 7060 }, { "epoch": 0.12, "learning_rate": 0.0005807641102271637, "loss": 3.9324, "step": 7065 }, { "epoch": 0.12, "learning_rate": 0.0005807351162170001, "loss": 3.9821, "step": 7070 }, { "epoch": 0.12, "learning_rate": 0.0005807061010969944, "loss": 3.9877, "step": 7075 }, { "epoch": 0.12, "learning_rate": 0.0005806770648693286, "loss": 4.0239, "step": 7080 }, { "epoch": 0.12, "learning_rate": 0.000580648007536186, "loss": 3.9869, "step": 7085 }, { "epoch": 0.12, "learning_rate": 0.0005806189290997514, "loss": 3.9419, "step": 7090 }, { "epoch": 0.12, "learning_rate": 0.0005805898295622115, "loss": 4.0456, "step": 7095 }, { "epoch": 0.12, "learning_rate": 0.0005805607089257545, "loss": 3.9479, "step": 7100 }, { "epoch": 0.12, "learning_rate": 0.00058053156719257, "loss": 3.9563, "step": 7105 }, { "epoch": 0.12, "learning_rate": 0.0005805024043648493, "loss": 3.9586, "step": 7110 }, { "epoch": 0.12, "learning_rate": 0.0005804732204447854, "loss": 3.9508, "step": 7115 }, { "epoch": 0.12, "learning_rate": 0.0005804440154345728, "loss": 3.9228, "step": 7120 }, { "epoch": 0.12, "learning_rate": 0.0005804147893364073, "loss": 3.9427, "step": 7125 }, { "epoch": 0.12, "learning_rate": 0.000580385542152487, "loss": 3.9727, "step": 7130 }, { "epoch": 0.12, "learning_rate": 0.0005803562738850107, "loss": 3.8936, "step": 7135 }, { "epoch": 0.12, "learning_rate": 0.0005803269845361794, "loss": 3.8108, "step": 7140 }, { "epoch": 0.12, "learning_rate": 0.0005802976741081956, "loss": 3.943, "step": 7145 }, { "epoch": 0.12, "learning_rate": 0.0005802683426032631, "loss": 3.9068, "step": 7150 }, { "epoch": 0.12, "learning_rate": 0.0005802389900235878, "loss": 3.931, "step": 7155 }, { "epoch": 0.12, "learning_rate": 0.0005802096163713764, "loss": 4.0288, "step": 7160 }, { "epoch": 0.12, "learning_rate": 0.0005801802216488381, "loss": 3.8147, "step": 7165 }, { "epoch": 0.12, "learning_rate": 0.0005801508058581829, "loss": 3.8442, "step": 7170 }, { "epoch": 0.12, "learning_rate": 0.0005801213690016228, "loss": 3.9372, "step": 7175 }, { "epoch": 0.12, "learning_rate": 0.0005800919110813716, "loss": 4.0047, "step": 7180 }, { "epoch": 0.12, "learning_rate": 0.0005800624320996439, "loss": 3.9978, "step": 7185 }, { "epoch": 0.12, "learning_rate": 0.0005800329320586568, "loss": 3.9016, "step": 7190 }, { "epoch": 0.12, "learning_rate": 0.0005800034109606282, "loss": 3.9061, "step": 7195 }, { "epoch": 0.12, "learning_rate": 0.0005799738688077781, "loss": 3.9114, "step": 7200 }, { "epoch": 0.12, "learning_rate": 0.0005799443056023279, "loss": 3.8848, "step": 7205 }, { "epoch": 0.12, "learning_rate": 0.0005799147213465006, "loss": 3.9432, "step": 7210 }, { "epoch": 0.12, "learning_rate": 0.0005798851160425209, "loss": 3.9281, "step": 7215 }, { "epoch": 0.12, "learning_rate": 0.0005798554896926149, "loss": 3.8136, "step": 7220 }, { "epoch": 0.13, "learning_rate": 0.0005798258422990101, "loss": 3.9658, "step": 7225 }, { "epoch": 0.13, "learning_rate": 0.0005797961738639362, "loss": 3.9506, "step": 7230 }, { "epoch": 0.13, "learning_rate": 0.000579766484389624, "loss": 4.0028, "step": 7235 }, { "epoch": 0.13, "learning_rate": 0.0005797367738783059, "loss": 3.9895, "step": 7240 }, { "epoch": 0.13, "learning_rate": 0.000579707042332216, "loss": 3.9246, "step": 7245 }, { "epoch": 0.13, "learning_rate": 0.00057967728975359, "loss": 3.9121, "step": 7250 }, { "epoch": 0.13, "learning_rate": 0.0005796475161446651, "loss": 3.8231, "step": 7255 }, { "epoch": 0.13, "learning_rate": 0.0005796177215076801, "loss": 3.9446, "step": 7260 }, { "epoch": 0.13, "learning_rate": 0.0005795879058448756, "loss": 3.9487, "step": 7265 }, { "epoch": 0.13, "learning_rate": 0.0005795580691584934, "loss": 3.9777, "step": 7270 }, { "epoch": 0.13, "learning_rate": 0.0005795282114507772, "loss": 3.8589, "step": 7275 }, { "epoch": 0.13, "learning_rate": 0.0005794983327239719, "loss": 3.9822, "step": 7280 }, { "epoch": 0.13, "learning_rate": 0.0005794684329803244, "loss": 3.9954, "step": 7285 }, { "epoch": 0.13, "learning_rate": 0.000579438512222083, "loss": 3.9695, "step": 7290 }, { "epoch": 0.13, "learning_rate": 0.0005794085704514977, "loss": 3.9504, "step": 7295 }, { "epoch": 0.13, "learning_rate": 0.0005793786076708198, "loss": 3.9779, "step": 7300 }, { "epoch": 0.13, "learning_rate": 0.0005793486238823024, "loss": 3.8648, "step": 7305 }, { "epoch": 0.13, "learning_rate": 0.0005793186190882001, "loss": 3.9049, "step": 7310 }, { "epoch": 0.13, "learning_rate": 0.000579288593290769, "loss": 3.9022, "step": 7315 }, { "epoch": 0.13, "learning_rate": 0.0005792585464922673, "loss": 3.9056, "step": 7320 }, { "epoch": 0.13, "learning_rate": 0.0005792284786949538, "loss": 3.9115, "step": 7325 }, { "epoch": 0.13, "learning_rate": 0.0005791983899010899, "loss": 3.953, "step": 7330 }, { "epoch": 0.13, "learning_rate": 0.0005791682801129379, "loss": 3.9481, "step": 7335 }, { "epoch": 0.13, "learning_rate": 0.000579138149332762, "loss": 3.8981, "step": 7340 }, { "epoch": 0.13, "learning_rate": 0.0005791079975628277, "loss": 4.0269, "step": 7345 }, { "epoch": 0.13, "learning_rate": 0.0005790778248054025, "loss": 3.9883, "step": 7350 }, { "epoch": 0.13, "learning_rate": 0.0005790476310627551, "loss": 3.9876, "step": 7355 }, { "epoch": 0.13, "learning_rate": 0.000579017416337156, "loss": 3.988, "step": 7360 }, { "epoch": 0.13, "learning_rate": 0.000578987180630877, "loss": 3.9376, "step": 7365 }, { "epoch": 0.13, "learning_rate": 0.0005789569239461918, "loss": 3.9417, "step": 7370 }, { "epoch": 0.13, "learning_rate": 0.0005789266462853757, "loss": 4.0789, "step": 7375 }, { "epoch": 0.13, "learning_rate": 0.0005788963476507052, "loss": 3.9385, "step": 7380 }, { "epoch": 0.13, "learning_rate": 0.0005788660280444586, "loss": 3.929, "step": 7385 }, { "epoch": 0.13, "learning_rate": 0.0005788356874689159, "loss": 4.0291, "step": 7390 }, { "epoch": 0.13, "learning_rate": 0.0005788053259263587, "loss": 3.8872, "step": 7395 }, { "epoch": 0.13, "learning_rate": 0.0005787749434190695, "loss": 3.8675, "step": 7400 }, { "epoch": 0.13, "learning_rate": 0.0005787445399493335, "loss": 4.0703, "step": 7405 }, { "epoch": 0.13, "learning_rate": 0.0005787141155194365, "loss": 3.9236, "step": 7410 }, { "epoch": 0.13, "learning_rate": 0.0005786836701316665, "loss": 3.9127, "step": 7415 }, { "epoch": 0.13, "learning_rate": 0.0005786532037883128, "loss": 3.8897, "step": 7420 }, { "epoch": 0.13, "learning_rate": 0.0005786227164916662, "loss": 3.9272, "step": 7425 }, { "epoch": 0.13, "learning_rate": 0.0005785922082440192, "loss": 3.9871, "step": 7430 }, { "epoch": 0.13, "learning_rate": 0.0005785616790476659, "loss": 3.9186, "step": 7435 }, { "epoch": 0.13, "learning_rate": 0.0005785311289049019, "loss": 3.974, "step": 7440 }, { "epoch": 0.13, "learning_rate": 0.0005785005578180245, "loss": 3.9538, "step": 7445 }, { "epoch": 0.13, "learning_rate": 0.0005784699657893325, "loss": 4.0123, "step": 7450 }, { "epoch": 0.13, "learning_rate": 0.0005784393528211261, "loss": 3.9068, "step": 7455 }, { "epoch": 0.13, "learning_rate": 0.0005784087189157074, "loss": 3.7432, "step": 7460 }, { "epoch": 0.13, "learning_rate": 0.0005783780640753798, "loss": 3.9067, "step": 7465 }, { "epoch": 0.13, "learning_rate": 0.0005783473883024486, "loss": 3.9339, "step": 7470 }, { "epoch": 0.13, "learning_rate": 0.0005783166915992202, "loss": 3.8964, "step": 7475 }, { "epoch": 0.13, "learning_rate": 0.000578285973968003, "loss": 3.8972, "step": 7480 }, { "epoch": 0.13, "learning_rate": 0.0005782552354111067, "loss": 3.9025, "step": 7485 }, { "epoch": 0.13, "learning_rate": 0.0005782244759308427, "loss": 3.979, "step": 7490 }, { "epoch": 0.13, "learning_rate": 0.0005781936955295241, "loss": 3.9776, "step": 7495 }, { "epoch": 0.13, "learning_rate": 0.0005781628942094653, "loss": 3.9969, "step": 7500 }, { "epoch": 0.13, "eval_loss": 3.961434841156006, "eval_runtime": 149.9847, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 7500 }, { "epoch": 0.13, "learning_rate": 0.0005781320719729822, "loss": 3.9575, "step": 7505 }, { "epoch": 0.13, "learning_rate": 0.0005781012288223929, "loss": 3.8435, "step": 7510 }, { "epoch": 0.13, "learning_rate": 0.0005780703647600165, "loss": 3.9905, "step": 7515 }, { "epoch": 0.13, "learning_rate": 0.0005780394797881735, "loss": 3.8025, "step": 7520 }, { "epoch": 0.13, "learning_rate": 0.0005780085739091867, "loss": 3.9212, "step": 7525 }, { "epoch": 0.13, "learning_rate": 0.00057797764712538, "loss": 3.8277, "step": 7530 }, { "epoch": 0.13, "learning_rate": 0.0005779466994390788, "loss": 3.8959, "step": 7535 }, { "epoch": 0.13, "learning_rate": 0.0005779157308526102, "loss": 3.9607, "step": 7540 }, { "epoch": 0.13, "learning_rate": 0.000577884741368303, "loss": 3.9904, "step": 7545 }, { "epoch": 0.13, "learning_rate": 0.0005778537309884873, "loss": 4.0796, "step": 7550 }, { "epoch": 0.13, "learning_rate": 0.000577822699715495, "loss": 3.893, "step": 7555 }, { "epoch": 0.13, "learning_rate": 0.0005777916475516597, "loss": 3.953, "step": 7560 }, { "epoch": 0.13, "learning_rate": 0.0005777605744993159, "loss": 3.9167, "step": 7565 }, { "epoch": 0.13, "learning_rate": 0.0005777294805608006, "loss": 3.8872, "step": 7570 }, { "epoch": 0.13, "learning_rate": 0.0005776983657384515, "loss": 3.9342, "step": 7575 }, { "epoch": 0.13, "learning_rate": 0.0005776672300346086, "loss": 3.9996, "step": 7580 }, { "epoch": 0.13, "learning_rate": 0.000577636073451613, "loss": 3.9381, "step": 7585 }, { "epoch": 0.13, "learning_rate": 0.0005776048959918076, "loss": 4.0558, "step": 7590 }, { "epoch": 0.13, "learning_rate": 0.0005775736976575367, "loss": 3.904, "step": 7595 }, { "epoch": 0.13, "learning_rate": 0.0005775424784511462, "loss": 3.9933, "step": 7600 }, { "epoch": 0.13, "learning_rate": 0.0005775112383749839, "loss": 3.995, "step": 7605 }, { "epoch": 0.13, "learning_rate": 0.0005774799774313985, "loss": 3.9822, "step": 7610 }, { "epoch": 0.13, "learning_rate": 0.000577448695622741, "loss": 3.7759, "step": 7615 }, { "epoch": 0.13, "learning_rate": 0.0005774173929513633, "loss": 3.7732, "step": 7620 }, { "epoch": 0.13, "learning_rate": 0.0005773860694196195, "loss": 3.8441, "step": 7625 }, { "epoch": 0.13, "learning_rate": 0.000577354725029865, "loss": 3.9408, "step": 7630 }, { "epoch": 0.13, "learning_rate": 0.0005773233597844564, "loss": 3.9715, "step": 7635 }, { "epoch": 0.13, "learning_rate": 0.0005772919736857524, "loss": 3.8511, "step": 7640 }, { "epoch": 0.13, "learning_rate": 0.0005772605667361131, "loss": 3.9275, "step": 7645 }, { "epoch": 0.13, "learning_rate": 0.0005772291389379002, "loss": 3.8361, "step": 7650 }, { "epoch": 0.13, "learning_rate": 0.0005771976902934768, "loss": 3.8592, "step": 7655 }, { "epoch": 0.13, "learning_rate": 0.0005771662208052077, "loss": 3.8605, "step": 7660 }, { "epoch": 0.13, "learning_rate": 0.0005771347304754593, "loss": 3.885, "step": 7665 }, { "epoch": 0.13, "learning_rate": 0.0005771032193065994, "loss": 3.8989, "step": 7670 }, { "epoch": 0.13, "learning_rate": 0.0005770716873009975, "loss": 3.9576, "step": 7675 }, { "epoch": 0.13, "learning_rate": 0.0005770401344610248, "loss": 3.8623, "step": 7680 }, { "epoch": 0.13, "learning_rate": 0.0005770085607890537, "loss": 3.8787, "step": 7685 }, { "epoch": 0.13, "learning_rate": 0.0005769769662874585, "loss": 3.9269, "step": 7690 }, { "epoch": 0.13, "learning_rate": 0.000576945350958615, "loss": 3.947, "step": 7695 }, { "epoch": 0.13, "learning_rate": 0.0005769137148049002, "loss": 3.869, "step": 7700 }, { "epoch": 0.13, "learning_rate": 0.0005768820578286935, "loss": 3.9183, "step": 7705 }, { "epoch": 0.13, "learning_rate": 0.0005768503800323748, "loss": 3.94, "step": 7710 }, { "epoch": 0.13, "learning_rate": 0.0005768186814183265, "loss": 3.7839, "step": 7715 }, { "epoch": 0.13, "learning_rate": 0.000576786961988932, "loss": 3.8936, "step": 7720 }, { "epoch": 0.13, "learning_rate": 0.0005767552217465765, "loss": 3.9323, "step": 7725 }, { "epoch": 0.13, "learning_rate": 0.0005767234606936465, "loss": 3.8951, "step": 7730 }, { "epoch": 0.13, "learning_rate": 0.0005766916788325306, "loss": 3.8801, "step": 7735 }, { "epoch": 0.13, "learning_rate": 0.0005766598761656186, "loss": 3.9485, "step": 7740 }, { "epoch": 0.13, "learning_rate": 0.0005766280526953016, "loss": 3.9617, "step": 7745 }, { "epoch": 0.13, "learning_rate": 0.0005765962084239727, "loss": 4.004, "step": 7750 }, { "epoch": 0.13, "learning_rate": 0.0005765643433540265, "loss": 3.9779, "step": 7755 }, { "epoch": 0.13, "learning_rate": 0.0005765324574878592, "loss": 3.8105, "step": 7760 }, { "epoch": 0.13, "learning_rate": 0.0005765005508278681, "loss": 3.9296, "step": 7765 }, { "epoch": 0.13, "learning_rate": 0.0005764686233764527, "loss": 3.9916, "step": 7770 }, { "epoch": 0.13, "learning_rate": 0.0005764366751360137, "loss": 3.8613, "step": 7775 }, { "epoch": 0.13, "learning_rate": 0.0005764047061089534, "loss": 3.8955, "step": 7780 }, { "epoch": 0.13, "learning_rate": 0.0005763727162976758, "loss": 3.9348, "step": 7785 }, { "epoch": 0.13, "learning_rate": 0.0005763407057045863, "loss": 3.9172, "step": 7790 }, { "epoch": 0.13, "learning_rate": 0.000576308674332092, "loss": 3.8628, "step": 7795 }, { "epoch": 0.13, "learning_rate": 0.0005762766221826015, "loss": 3.9669, "step": 7800 }, { "epoch": 0.14, "learning_rate": 0.0005762445492585247, "loss": 3.9162, "step": 7805 }, { "epoch": 0.14, "learning_rate": 0.0005762124555622737, "loss": 3.9174, "step": 7810 }, { "epoch": 0.14, "learning_rate": 0.0005761803410962615, "loss": 3.7882, "step": 7815 }, { "epoch": 0.14, "learning_rate": 0.0005761482058629032, "loss": 3.9603, "step": 7820 }, { "epoch": 0.14, "learning_rate": 0.0005761160498646149, "loss": 3.8465, "step": 7825 }, { "epoch": 0.14, "learning_rate": 0.0005760838731038147, "loss": 3.8924, "step": 7830 }, { "epoch": 0.14, "learning_rate": 0.0005760516755829225, "loss": 3.9661, "step": 7835 }, { "epoch": 0.14, "learning_rate": 0.0005760194573043586, "loss": 3.8856, "step": 7840 }, { "epoch": 0.14, "learning_rate": 0.0005759872182705463, "loss": 3.9037, "step": 7845 }, { "epoch": 0.14, "learning_rate": 0.0005759549584839096, "loss": 3.8099, "step": 7850 }, { "epoch": 0.14, "learning_rate": 0.0005759226779468742, "loss": 3.9186, "step": 7855 }, { "epoch": 0.14, "learning_rate": 0.0005758903766618676, "loss": 3.8689, "step": 7860 }, { "epoch": 0.14, "learning_rate": 0.0005758580546313185, "loss": 3.9651, "step": 7865 }, { "epoch": 0.14, "learning_rate": 0.0005758257118576574, "loss": 3.8181, "step": 7870 }, { "epoch": 0.14, "learning_rate": 0.0005757933483433165, "loss": 3.9611, "step": 7875 }, { "epoch": 0.14, "learning_rate": 0.000575760964090729, "loss": 3.8883, "step": 7880 }, { "epoch": 0.14, "learning_rate": 0.0005757285591023305, "loss": 3.9339, "step": 7885 }, { "epoch": 0.14, "learning_rate": 0.0005756961333805574, "loss": 3.9402, "step": 7890 }, { "epoch": 0.14, "learning_rate": 0.0005756636869278478, "loss": 3.9722, "step": 7895 }, { "epoch": 0.14, "learning_rate": 0.0005756312197466418, "loss": 3.8141, "step": 7900 }, { "epoch": 0.14, "learning_rate": 0.0005755987318393807, "loss": 3.851, "step": 7905 }, { "epoch": 0.14, "learning_rate": 0.0005755662232085075, "loss": 3.9061, "step": 7910 }, { "epoch": 0.14, "learning_rate": 0.0005755336938564663, "loss": 3.846, "step": 7915 }, { "epoch": 0.14, "learning_rate": 0.0005755011437857037, "loss": 3.8962, "step": 7920 }, { "epoch": 0.14, "learning_rate": 0.0005754685729986668, "loss": 3.9421, "step": 7925 }, { "epoch": 0.14, "learning_rate": 0.000575435981497805, "loss": 3.8796, "step": 7930 }, { "epoch": 0.14, "learning_rate": 0.0005754033692855691, "loss": 3.8234, "step": 7935 }, { "epoch": 0.14, "learning_rate": 0.0005753707363644111, "loss": 3.8632, "step": 7940 }, { "epoch": 0.14, "learning_rate": 0.000575338082736785, "loss": 3.8991, "step": 7945 }, { "epoch": 0.14, "learning_rate": 0.0005753054084051462, "loss": 4.0148, "step": 7950 }, { "epoch": 0.14, "learning_rate": 0.0005752727133719516, "loss": 3.9543, "step": 7955 }, { "epoch": 0.14, "learning_rate": 0.0005752399976396596, "loss": 3.8603, "step": 7960 }, { "epoch": 0.14, "learning_rate": 0.0005752072612107306, "loss": 3.8823, "step": 7965 }, { "epoch": 0.14, "learning_rate": 0.0005751745040876258, "loss": 3.8419, "step": 7970 }, { "epoch": 0.14, "learning_rate": 0.0005751417262728085, "loss": 3.9414, "step": 7975 }, { "epoch": 0.14, "learning_rate": 0.0005751089277687436, "loss": 3.911, "step": 7980 }, { "epoch": 0.14, "learning_rate": 0.0005750761085778972, "loss": 3.9677, "step": 7985 }, { "epoch": 0.14, "learning_rate": 0.0005750432687027372, "loss": 3.9097, "step": 7990 }, { "epoch": 0.14, "learning_rate": 0.000575010408145733, "loss": 3.827, "step": 7995 }, { "epoch": 0.14, "learning_rate": 0.0005749775269093554, "loss": 3.8829, "step": 8000 }, { "epoch": 0.14, "eval_loss": 3.9269142150878906, "eval_runtime": 150.3729, "eval_samples_per_second": 12.243, "eval_steps_per_second": 0.771, "step": 8000 }, { "epoch": 0.14, "learning_rate": 0.000574944624996077, "loss": 3.8017, "step": 8005 }, { "epoch": 0.14, "learning_rate": 0.000574911702408372, "loss": 3.9319, "step": 8010 }, { "epoch": 0.14, "learning_rate": 0.0005748787591487158, "loss": 3.9119, "step": 8015 }, { "epoch": 0.14, "learning_rate": 0.0005748457952195856, "loss": 3.9751, "step": 8020 }, { "epoch": 0.14, "learning_rate": 0.0005748128106234602, "loss": 3.8432, "step": 8025 }, { "epoch": 0.14, "learning_rate": 0.0005747798053628198, "loss": 3.9297, "step": 8030 }, { "epoch": 0.14, "learning_rate": 0.0005747467794401464, "loss": 3.8792, "step": 8035 }, { "epoch": 0.14, "learning_rate": 0.0005747137328579231, "loss": 3.8893, "step": 8040 }, { "epoch": 0.14, "learning_rate": 0.000574680665618635, "loss": 3.8906, "step": 8045 }, { "epoch": 0.14, "learning_rate": 0.0005746475777247686, "loss": 3.8385, "step": 8050 }, { "epoch": 0.14, "learning_rate": 0.0005746144691788119, "loss": 3.7674, "step": 8055 }, { "epoch": 0.14, "learning_rate": 0.0005745813399832544, "loss": 3.8042, "step": 8060 }, { "epoch": 0.14, "learning_rate": 0.0005745481901405874, "loss": 3.9017, "step": 8065 }, { "epoch": 0.14, "learning_rate": 0.0005745150196533037, "loss": 3.8173, "step": 8070 }, { "epoch": 0.14, "learning_rate": 0.0005744818285238972, "loss": 3.8445, "step": 8075 }, { "epoch": 0.14, "learning_rate": 0.000574448616754864, "loss": 3.9037, "step": 8080 }, { "epoch": 0.14, "learning_rate": 0.0005744153843487012, "loss": 3.8863, "step": 8085 }, { "epoch": 0.14, "learning_rate": 0.000574382131307908, "loss": 3.8032, "step": 8090 }, { "epoch": 0.14, "learning_rate": 0.0005743488576349846, "loss": 3.8524, "step": 8095 }, { "epoch": 0.14, "learning_rate": 0.0005743155633324332, "loss": 3.7371, "step": 8100 }, { "epoch": 0.14, "learning_rate": 0.0005742822484027573, "loss": 3.8358, "step": 8105 }, { "epoch": 0.14, "learning_rate": 0.0005742489128484619, "loss": 4.0695, "step": 8110 }, { "epoch": 0.14, "learning_rate": 0.0005742155566720538, "loss": 3.8984, "step": 8115 }, { "epoch": 0.14, "learning_rate": 0.0005741821798760412, "loss": 3.8755, "step": 8120 }, { "epoch": 0.14, "learning_rate": 0.0005741487824629339, "loss": 3.9444, "step": 8125 }, { "epoch": 0.14, "learning_rate": 0.000574115364435243, "loss": 3.9359, "step": 8130 }, { "epoch": 0.14, "learning_rate": 0.0005740819257954814, "loss": 3.9182, "step": 8135 }, { "epoch": 0.14, "learning_rate": 0.0005740484665461639, "loss": 3.9721, "step": 8140 }, { "epoch": 0.14, "learning_rate": 0.0005740149866898061, "loss": 3.8413, "step": 8145 }, { "epoch": 0.14, "learning_rate": 0.0005739814862289256, "loss": 3.9575, "step": 8150 }, { "epoch": 0.14, "learning_rate": 0.0005739479651660415, "loss": 3.8612, "step": 8155 }, { "epoch": 0.14, "learning_rate": 0.0005739144235036744, "loss": 3.8002, "step": 8160 }, { "epoch": 0.14, "learning_rate": 0.0005738808612443464, "loss": 3.8984, "step": 8165 }, { "epoch": 0.14, "learning_rate": 0.0005738472783905811, "loss": 3.8158, "step": 8170 }, { "epoch": 0.14, "learning_rate": 0.0005738136749449041, "loss": 3.8328, "step": 8175 }, { "epoch": 0.14, "learning_rate": 0.000573780050909842, "loss": 3.8829, "step": 8180 }, { "epoch": 0.14, "learning_rate": 0.0005737464062879231, "loss": 3.7717, "step": 8185 }, { "epoch": 0.14, "learning_rate": 0.0005737127410816775, "loss": 3.879, "step": 8190 }, { "epoch": 0.14, "learning_rate": 0.0005736790552936364, "loss": 3.9169, "step": 8195 }, { "epoch": 0.14, "learning_rate": 0.0005736453489263329, "loss": 3.8645, "step": 8200 }, { "epoch": 0.14, "learning_rate": 0.0005736116219823016, "loss": 3.8988, "step": 8205 }, { "epoch": 0.14, "learning_rate": 0.0005735778744640787, "loss": 3.8545, "step": 8210 }, { "epoch": 0.14, "learning_rate": 0.0005735441063742016, "loss": 3.7853, "step": 8215 }, { "epoch": 0.14, "learning_rate": 0.0005735103177152096, "loss": 3.8307, "step": 8220 }, { "epoch": 0.14, "learning_rate": 0.0005734765084896435, "loss": 3.9213, "step": 8225 }, { "epoch": 0.14, "learning_rate": 0.0005734426787000453, "loss": 3.9432, "step": 8230 }, { "epoch": 0.14, "learning_rate": 0.0005734088283489593, "loss": 3.868, "step": 8235 }, { "epoch": 0.14, "learning_rate": 0.0005733749574389304, "loss": 3.827, "step": 8240 }, { "epoch": 0.14, "learning_rate": 0.0005733410659725058, "loss": 3.9695, "step": 8245 }, { "epoch": 0.14, "learning_rate": 0.0005733071539522339, "loss": 3.8528, "step": 8250 }, { "epoch": 0.14, "learning_rate": 0.0005732732213806647, "loss": 3.8285, "step": 8255 }, { "epoch": 0.14, "learning_rate": 0.0005732392682603497, "loss": 3.9186, "step": 8260 }, { "epoch": 0.14, "learning_rate": 0.0005732052945938421, "loss": 3.8465, "step": 8265 }, { "epoch": 0.14, "learning_rate": 0.0005731713003836965, "loss": 3.8373, "step": 8270 }, { "epoch": 0.14, "learning_rate": 0.0005731372856324691, "loss": 3.9738, "step": 8275 }, { "epoch": 0.14, "learning_rate": 0.0005731032503427176, "loss": 3.8417, "step": 8280 }, { "epoch": 0.14, "learning_rate": 0.0005730691945170013, "loss": 3.9056, "step": 8285 }, { "epoch": 0.14, "learning_rate": 0.0005730351181578811, "loss": 3.9564, "step": 8290 }, { "epoch": 0.14, "learning_rate": 0.0005730010212679192, "loss": 3.8577, "step": 8295 }, { "epoch": 0.14, "learning_rate": 0.0005729669038496796, "loss": 3.8578, "step": 8300 }, { "epoch": 0.14, "learning_rate": 0.0005729327659057279, "loss": 3.9018, "step": 8305 }, { "epoch": 0.14, "learning_rate": 0.0005728986074386309, "loss": 3.9108, "step": 8310 }, { "epoch": 0.14, "learning_rate": 0.0005728644284509572, "loss": 3.8183, "step": 8315 }, { "epoch": 0.14, "learning_rate": 0.0005728302289452769, "loss": 3.8654, "step": 8320 }, { "epoch": 0.14, "learning_rate": 0.0005727960089241615, "loss": 3.8433, "step": 8325 }, { "epoch": 0.14, "learning_rate": 0.0005727617683901845, "loss": 3.8515, "step": 8330 }, { "epoch": 0.14, "learning_rate": 0.0005727275073459202, "loss": 3.8286, "step": 8335 }, { "epoch": 0.14, "learning_rate": 0.0005726932257939452, "loss": 3.8778, "step": 8340 }, { "epoch": 0.14, "learning_rate": 0.000572658923736837, "loss": 3.7871, "step": 8345 }, { "epoch": 0.14, "learning_rate": 0.0005726246011771752, "loss": 3.9097, "step": 8350 }, { "epoch": 0.14, "learning_rate": 0.0005725902581175406, "loss": 3.837, "step": 8355 }, { "epoch": 0.14, "learning_rate": 0.0005725558945605155, "loss": 3.7465, "step": 8360 }, { "epoch": 0.14, "learning_rate": 0.0005725215105086839, "loss": 3.9166, "step": 8365 }, { "epoch": 0.14, "learning_rate": 0.0005724871059646316, "loss": 3.9286, "step": 8370 }, { "epoch": 0.14, "learning_rate": 0.0005724526809309452, "loss": 3.785, "step": 8375 }, { "epoch": 0.15, "learning_rate": 0.0005724182354102136, "loss": 3.8503, "step": 8380 }, { "epoch": 0.15, "learning_rate": 0.0005723837694050267, "loss": 3.8255, "step": 8385 }, { "epoch": 0.15, "learning_rate": 0.0005723492829179764, "loss": 3.887, "step": 8390 }, { "epoch": 0.15, "learning_rate": 0.0005723147759516558, "loss": 3.7949, "step": 8395 }, { "epoch": 0.15, "learning_rate": 0.0005722802485086595, "loss": 3.8919, "step": 8400 }, { "epoch": 0.15, "learning_rate": 0.0005722457005915841, "loss": 3.8261, "step": 8405 }, { "epoch": 0.15, "learning_rate": 0.000572211132203027, "loss": 3.8836, "step": 8410 }, { "epoch": 0.15, "learning_rate": 0.000572176543345588, "loss": 3.7968, "step": 8415 }, { "epoch": 0.15, "learning_rate": 0.0005721419340218678, "loss": 3.8791, "step": 8420 }, { "epoch": 0.15, "learning_rate": 0.000572107304234469, "loss": 3.8896, "step": 8425 }, { "epoch": 0.15, "learning_rate": 0.0005720726539859953, "loss": 3.8461, "step": 8430 }, { "epoch": 0.15, "learning_rate": 0.0005720379832790524, "loss": 3.8805, "step": 8435 }, { "epoch": 0.15, "learning_rate": 0.0005720032921162473, "loss": 3.7594, "step": 8440 }, { "epoch": 0.15, "learning_rate": 0.0005719685805001886, "loss": 3.906, "step": 8445 }, { "epoch": 0.15, "learning_rate": 0.0005719338484334866, "loss": 3.8963, "step": 8450 }, { "epoch": 0.15, "learning_rate": 0.0005718990959187527, "loss": 3.8806, "step": 8455 }, { "epoch": 0.15, "learning_rate": 0.0005718643229586002, "loss": 3.9259, "step": 8460 }, { "epoch": 0.15, "learning_rate": 0.0005718295295556441, "loss": 3.8151, "step": 8465 }, { "epoch": 0.15, "learning_rate": 0.0005717947157125004, "loss": 3.8314, "step": 8470 }, { "epoch": 0.15, "learning_rate": 0.000571759881431787, "loss": 3.8986, "step": 8475 }, { "epoch": 0.15, "learning_rate": 0.0005717250267161232, "loss": 3.8939, "step": 8480 }, { "epoch": 0.15, "learning_rate": 0.00057169015156813, "loss": 3.9307, "step": 8485 }, { "epoch": 0.15, "learning_rate": 0.0005716552559904297, "loss": 3.7868, "step": 8490 }, { "epoch": 0.15, "learning_rate": 0.0005716203399856466, "loss": 3.8096, "step": 8495 }, { "epoch": 0.15, "learning_rate": 0.0005715854035564058, "loss": 3.8729, "step": 8500 }, { "epoch": 0.15, "eval_loss": 3.8896238803863525, "eval_runtime": 150.0738, "eval_samples_per_second": 12.267, "eval_steps_per_second": 0.773, "step": 8500 }, { "epoch": 0.15, "learning_rate": 0.0005715504467053345, "loss": 3.7995, "step": 8505 }, { "epoch": 0.15, "learning_rate": 0.0005715154694350613, "loss": 3.8546, "step": 8510 }, { "epoch": 0.15, "learning_rate": 0.0005714804717482163, "loss": 3.8225, "step": 8515 }, { "epoch": 0.15, "learning_rate": 0.0005714454536474311, "loss": 3.866, "step": 8520 }, { "epoch": 0.15, "learning_rate": 0.000571410415135339, "loss": 3.8313, "step": 8525 }, { "epoch": 0.15, "learning_rate": 0.0005713753562145746, "loss": 3.8833, "step": 8530 }, { "epoch": 0.15, "learning_rate": 0.0005713402768877743, "loss": 3.8299, "step": 8535 }, { "epoch": 0.15, "learning_rate": 0.0005713051771575756, "loss": 3.9089, "step": 8540 }, { "epoch": 0.15, "learning_rate": 0.000571270057026618, "loss": 3.8661, "step": 8545 }, { "epoch": 0.15, "learning_rate": 0.0005712349164975424, "loss": 3.7924, "step": 8550 }, { "epoch": 0.15, "learning_rate": 0.0005711997555729912, "loss": 3.9294, "step": 8555 }, { "epoch": 0.15, "learning_rate": 0.0005711645742556082, "loss": 3.7973, "step": 8560 }, { "epoch": 0.15, "learning_rate": 0.0005711293725480389, "loss": 3.7566, "step": 8565 }, { "epoch": 0.15, "learning_rate": 0.0005710941504529304, "loss": 3.9148, "step": 8570 }, { "epoch": 0.15, "learning_rate": 0.0005710589079729309, "loss": 3.8453, "step": 8575 }, { "epoch": 0.15, "learning_rate": 0.0005710236451106909, "loss": 3.8899, "step": 8580 }, { "epoch": 0.15, "learning_rate": 0.0005709883618688617, "loss": 3.8985, "step": 8585 }, { "epoch": 0.15, "learning_rate": 0.0005709530582500964, "loss": 3.8628, "step": 8590 }, { "epoch": 0.15, "learning_rate": 0.0005709177342570498, "loss": 3.7169, "step": 8595 }, { "epoch": 0.15, "learning_rate": 0.0005708823898923779, "loss": 3.8679, "step": 8600 }, { "epoch": 0.15, "learning_rate": 0.0005708470251587387, "loss": 3.9674, "step": 8605 }, { "epoch": 0.15, "learning_rate": 0.0005708116400587912, "loss": 3.9565, "step": 8610 }, { "epoch": 0.15, "learning_rate": 0.0005707762345951963, "loss": 3.786, "step": 8615 }, { "epoch": 0.15, "learning_rate": 0.0005707408087706163, "loss": 3.8328, "step": 8620 }, { "epoch": 0.15, "learning_rate": 0.0005707053625877149, "loss": 3.8651, "step": 8625 }, { "epoch": 0.15, "learning_rate": 0.0005706698960491576, "loss": 3.8354, "step": 8630 }, { "epoch": 0.15, "learning_rate": 0.0005706344091576113, "loss": 3.9148, "step": 8635 }, { "epoch": 0.15, "learning_rate": 0.0005705989019157443, "loss": 3.743, "step": 8640 }, { "epoch": 0.15, "learning_rate": 0.0005705633743262269, "loss": 3.9586, "step": 8645 }, { "epoch": 0.15, "learning_rate": 0.0005705278263917301, "loss": 3.9242, "step": 8650 }, { "epoch": 0.15, "learning_rate": 0.0005704922581149275, "loss": 3.7486, "step": 8655 }, { "epoch": 0.15, "learning_rate": 0.0005704566694984931, "loss": 3.8465, "step": 8660 }, { "epoch": 0.15, "learning_rate": 0.0005704210605451033, "loss": 3.9023, "step": 8665 }, { "epoch": 0.15, "learning_rate": 0.0005703854312574357, "loss": 3.9225, "step": 8670 }, { "epoch": 0.15, "learning_rate": 0.0005703497816381692, "loss": 3.9354, "step": 8675 }, { "epoch": 0.15, "learning_rate": 0.0005703141116899847, "loss": 3.8554, "step": 8680 }, { "epoch": 0.15, "learning_rate": 0.0005702784214155645, "loss": 3.8595, "step": 8685 }, { "epoch": 0.15, "learning_rate": 0.000570242710817592, "loss": 3.8911, "step": 8690 }, { "epoch": 0.15, "learning_rate": 0.0005702069798987526, "loss": 3.9046, "step": 8695 }, { "epoch": 0.15, "learning_rate": 0.0005701712286617332, "loss": 3.9157, "step": 8700 }, { "epoch": 0.15, "learning_rate": 0.0005701354571092219, "loss": 3.7783, "step": 8705 }, { "epoch": 0.15, "learning_rate": 0.0005700996652439088, "loss": 3.8195, "step": 8710 }, { "epoch": 0.15, "learning_rate": 0.000570063853068485, "loss": 3.8212, "step": 8715 }, { "epoch": 0.15, "learning_rate": 0.0005700280205856435, "loss": 3.8575, "step": 8720 }, { "epoch": 0.15, "learning_rate": 0.0005699921677980788, "loss": 3.8246, "step": 8725 }, { "epoch": 0.15, "learning_rate": 0.0005699562947084867, "loss": 3.7655, "step": 8730 }, { "epoch": 0.15, "learning_rate": 0.0005699204013195648, "loss": 3.6899, "step": 8735 }, { "epoch": 0.15, "learning_rate": 0.000569884487634012, "loss": 3.8449, "step": 8740 }, { "epoch": 0.15, "learning_rate": 0.0005698485536545289, "loss": 3.8651, "step": 8745 }, { "epoch": 0.15, "learning_rate": 0.0005698125993838176, "loss": 3.8758, "step": 8750 }, { "epoch": 0.15, "learning_rate": 0.0005697766248245813, "loss": 3.7971, "step": 8755 }, { "epoch": 0.15, "learning_rate": 0.0005697406299795257, "loss": 3.8458, "step": 8760 }, { "epoch": 0.15, "learning_rate": 0.000569704614851357, "loss": 3.8787, "step": 8765 }, { "epoch": 0.15, "learning_rate": 0.0005696685794427835, "loss": 3.7689, "step": 8770 }, { "epoch": 0.15, "learning_rate": 0.0005696325237565148, "loss": 3.8765, "step": 8775 }, { "epoch": 0.15, "learning_rate": 0.0005695964477952623, "loss": 3.8841, "step": 8780 }, { "epoch": 0.15, "learning_rate": 0.0005695603515617384, "loss": 3.8682, "step": 8785 }, { "epoch": 0.15, "learning_rate": 0.0005695242350586576, "loss": 3.9042, "step": 8790 }, { "epoch": 0.15, "learning_rate": 0.0005694880982887356, "loss": 3.8366, "step": 8795 }, { "epoch": 0.15, "learning_rate": 0.0005694519412546899, "loss": 3.72, "step": 8800 }, { "epoch": 0.15, "learning_rate": 0.0005694157639592388, "loss": 3.8754, "step": 8805 }, { "epoch": 0.15, "learning_rate": 0.0005693795664051032, "loss": 3.7116, "step": 8810 }, { "epoch": 0.15, "learning_rate": 0.0005693433485950048, "loss": 3.8559, "step": 8815 }, { "epoch": 0.15, "learning_rate": 0.0005693071105316668, "loss": 3.8147, "step": 8820 }, { "epoch": 0.15, "learning_rate": 0.0005692708522178144, "loss": 3.8115, "step": 8825 }, { "epoch": 0.15, "learning_rate": 0.0005692345736561738, "loss": 3.8211, "step": 8830 }, { "epoch": 0.15, "learning_rate": 0.0005691982748494732, "loss": 3.7904, "step": 8835 }, { "epoch": 0.15, "learning_rate": 0.0005691619558004418, "loss": 3.725, "step": 8840 }, { "epoch": 0.15, "learning_rate": 0.0005691256165118109, "loss": 3.7322, "step": 8845 }, { "epoch": 0.15, "learning_rate": 0.0005690892569863127, "loss": 3.8021, "step": 8850 }, { "epoch": 0.15, "learning_rate": 0.0005690528772266816, "loss": 3.8315, "step": 8855 }, { "epoch": 0.15, "learning_rate": 0.0005690164772356529, "loss": 3.8825, "step": 8860 }, { "epoch": 0.15, "learning_rate": 0.0005689800570159639, "loss": 3.9128, "step": 8865 }, { "epoch": 0.15, "learning_rate": 0.0005689436165703531, "loss": 3.7504, "step": 8870 }, { "epoch": 0.15, "learning_rate": 0.0005689071559015605, "loss": 3.8452, "step": 8875 }, { "epoch": 0.15, "learning_rate": 0.000568870675012328, "loss": 3.8525, "step": 8880 }, { "epoch": 0.15, "learning_rate": 0.0005688341739053988, "loss": 3.8366, "step": 8885 }, { "epoch": 0.15, "learning_rate": 0.0005687976525835173, "loss": 3.9315, "step": 8890 }, { "epoch": 0.15, "learning_rate": 0.0005687611110494299, "loss": 3.863, "step": 8895 }, { "epoch": 0.15, "learning_rate": 0.0005687245493058845, "loss": 3.9057, "step": 8900 }, { "epoch": 0.15, "learning_rate": 0.00056868796735563, "loss": 3.8194, "step": 8905 }, { "epoch": 0.15, "learning_rate": 0.0005686513652014175, "loss": 3.8638, "step": 8910 }, { "epoch": 0.15, "learning_rate": 0.0005686147428459991, "loss": 3.8555, "step": 8915 }, { "epoch": 0.15, "learning_rate": 0.0005685781002921286, "loss": 3.7245, "step": 8920 }, { "epoch": 0.15, "learning_rate": 0.0005685414375425615, "loss": 3.8012, "step": 8925 }, { "epoch": 0.15, "learning_rate": 0.0005685047546000544, "loss": 3.8829, "step": 8930 }, { "epoch": 0.15, "learning_rate": 0.000568468051467366, "loss": 3.7756, "step": 8935 }, { "epoch": 0.15, "learning_rate": 0.0005684313281472559, "loss": 3.8913, "step": 8940 }, { "epoch": 0.15, "learning_rate": 0.0005683945846424856, "loss": 3.7455, "step": 8945 }, { "epoch": 0.15, "learning_rate": 0.0005683578209558181, "loss": 3.7563, "step": 8950 }, { "epoch": 0.15, "learning_rate": 0.0005683210370900177, "loss": 3.8759, "step": 8955 }, { "epoch": 0.16, "learning_rate": 0.0005682842330478505, "loss": 3.8912, "step": 8960 }, { "epoch": 0.16, "learning_rate": 0.0005682474088320839, "loss": 3.9026, "step": 8965 }, { "epoch": 0.16, "learning_rate": 0.0005682105644454869, "loss": 3.8464, "step": 8970 }, { "epoch": 0.16, "learning_rate": 0.00056817369989083, "loss": 3.8083, "step": 8975 }, { "epoch": 0.16, "learning_rate": 0.0005681368151708852, "loss": 3.8982, "step": 8980 }, { "epoch": 0.16, "learning_rate": 0.0005680999102884261, "loss": 3.8016, "step": 8985 }, { "epoch": 0.16, "learning_rate": 0.0005680629852462278, "loss": 3.9751, "step": 8990 }, { "epoch": 0.16, "learning_rate": 0.0005680260400470668, "loss": 3.8363, "step": 8995 }, { "epoch": 0.16, "learning_rate": 0.0005679890746937211, "loss": 3.8684, "step": 9000 }, { "epoch": 0.16, "eval_loss": 3.870915412902832, "eval_runtime": 150.275, "eval_samples_per_second": 12.251, "eval_steps_per_second": 0.772, "step": 9000 }, { "epoch": 0.16, "learning_rate": 0.0005679520891889705, "loss": 3.9171, "step": 9005 }, { "epoch": 0.16, "learning_rate": 0.000567915083535596, "loss": 3.9465, "step": 9010 }, { "epoch": 0.16, "learning_rate": 0.0005678780577363802, "loss": 3.9283, "step": 9015 }, { "epoch": 0.16, "learning_rate": 0.0005678410117941075, "loss": 3.8472, "step": 9020 }, { "epoch": 0.16, "learning_rate": 0.0005678039457115632, "loss": 3.9198, "step": 9025 }, { "epoch": 0.16, "learning_rate": 0.0005677668594915347, "loss": 3.8925, "step": 9030 }, { "epoch": 0.16, "learning_rate": 0.0005677297531368106, "loss": 3.893, "step": 9035 }, { "epoch": 0.16, "learning_rate": 0.0005676926266501812, "loss": 3.8164, "step": 9040 }, { "epoch": 0.16, "learning_rate": 0.0005676554800344382, "loss": 3.8225, "step": 9045 }, { "epoch": 0.16, "learning_rate": 0.0005676183132923748, "loss": 3.8328, "step": 9050 }, { "epoch": 0.16, "learning_rate": 0.0005675811264267858, "loss": 3.8067, "step": 9055 }, { "epoch": 0.16, "learning_rate": 0.0005675439194404673, "loss": 3.8443, "step": 9060 }, { "epoch": 0.16, "learning_rate": 0.0005675066923362174, "loss": 3.7139, "step": 9065 }, { "epoch": 0.16, "learning_rate": 0.000567469445116835, "loss": 3.733, "step": 9070 }, { "epoch": 0.16, "learning_rate": 0.0005674321777851211, "loss": 3.7259, "step": 9075 }, { "epoch": 0.16, "learning_rate": 0.0005673948903438781, "loss": 3.8794, "step": 9080 }, { "epoch": 0.16, "learning_rate": 0.0005673575827959097, "loss": 3.7507, "step": 9085 }, { "epoch": 0.16, "learning_rate": 0.0005673202551440212, "loss": 3.8077, "step": 9090 }, { "epoch": 0.16, "learning_rate": 0.0005672829073910196, "loss": 3.778, "step": 9095 }, { "epoch": 0.16, "learning_rate": 0.0005672455395397132, "loss": 3.8871, "step": 9100 }, { "epoch": 0.16, "learning_rate": 0.0005672081515929118, "loss": 3.8326, "step": 9105 }, { "epoch": 0.16, "learning_rate": 0.0005671707435534267, "loss": 3.8089, "step": 9110 }, { "epoch": 0.16, "learning_rate": 0.0005671333154240709, "loss": 3.8371, "step": 9115 }, { "epoch": 0.16, "learning_rate": 0.000567095867207659, "loss": 3.7742, "step": 9120 }, { "epoch": 0.16, "learning_rate": 0.0005670583989070066, "loss": 3.8002, "step": 9125 }, { "epoch": 0.16, "learning_rate": 0.0005670209105249313, "loss": 3.7759, "step": 9130 }, { "epoch": 0.16, "learning_rate": 0.000566983402064252, "loss": 3.8101, "step": 9135 }, { "epoch": 0.16, "learning_rate": 0.0005669458735277891, "loss": 3.8326, "step": 9140 }, { "epoch": 0.16, "learning_rate": 0.0005669083249183646, "loss": 3.9168, "step": 9145 }, { "epoch": 0.16, "learning_rate": 0.000566870756238802, "loss": 3.8423, "step": 9150 }, { "epoch": 0.16, "learning_rate": 0.0005668331674919261, "loss": 3.7503, "step": 9155 }, { "epoch": 0.16, "learning_rate": 0.0005667955586805635, "loss": 3.9171, "step": 9160 }, { "epoch": 0.16, "learning_rate": 0.0005667579298075424, "loss": 3.844, "step": 9165 }, { "epoch": 0.16, "learning_rate": 0.0005667202808756919, "loss": 3.9111, "step": 9170 }, { "epoch": 0.16, "learning_rate": 0.0005666826118878432, "loss": 3.7965, "step": 9175 }, { "epoch": 0.16, "learning_rate": 0.0005666449228468288, "loss": 3.8016, "step": 9180 }, { "epoch": 0.16, "learning_rate": 0.0005666072137554829, "loss": 3.7283, "step": 9185 }, { "epoch": 0.16, "learning_rate": 0.0005665694846166408, "loss": 3.7984, "step": 9190 }, { "epoch": 0.16, "learning_rate": 0.0005665317354331395, "loss": 3.7922, "step": 9195 }, { "epoch": 0.16, "learning_rate": 0.0005664939662078178, "loss": 3.7183, "step": 9200 }, { "epoch": 0.16, "learning_rate": 0.0005664561769435155, "loss": 3.9538, "step": 9205 }, { "epoch": 0.16, "learning_rate": 0.0005664183676430743, "loss": 3.7491, "step": 9210 }, { "epoch": 0.16, "learning_rate": 0.0005663805383093372, "loss": 3.8107, "step": 9215 }, { "epoch": 0.16, "learning_rate": 0.0005663426889451488, "loss": 3.8527, "step": 9220 }, { "epoch": 0.16, "learning_rate": 0.0005663048195533552, "loss": 3.8206, "step": 9225 }, { "epoch": 0.16, "learning_rate": 0.000566266930136804, "loss": 3.8189, "step": 9230 }, { "epoch": 0.16, "learning_rate": 0.0005662290206983441, "loss": 3.7953, "step": 9235 }, { "epoch": 0.16, "learning_rate": 0.0005661910912408264, "loss": 3.8083, "step": 9240 }, { "epoch": 0.16, "learning_rate": 0.0005661531417671027, "loss": 3.7971, "step": 9245 }, { "epoch": 0.16, "learning_rate": 0.0005661151722800269, "loss": 3.6886, "step": 9250 }, { "epoch": 0.16, "learning_rate": 0.0005660771827824539, "loss": 3.7777, "step": 9255 }, { "epoch": 0.16, "learning_rate": 0.0005660391732772404, "loss": 3.6483, "step": 9260 }, { "epoch": 0.16, "learning_rate": 0.0005660011437672445, "loss": 3.8698, "step": 9265 }, { "epoch": 0.16, "learning_rate": 0.0005659630942553258, "loss": 3.8082, "step": 9270 }, { "epoch": 0.16, "learning_rate": 0.0005659250247443454, "loss": 3.8567, "step": 9275 }, { "epoch": 0.16, "learning_rate": 0.0005658869352371661, "loss": 3.8349, "step": 9280 }, { "epoch": 0.16, "learning_rate": 0.0005658488257366518, "loss": 3.8353, "step": 9285 }, { "epoch": 0.16, "learning_rate": 0.0005658106962456684, "loss": 3.7528, "step": 9290 }, { "epoch": 0.16, "learning_rate": 0.0005657725467670828, "loss": 3.7924, "step": 9295 }, { "epoch": 0.16, "learning_rate": 0.0005657343773037637, "loss": 3.8372, "step": 9300 }, { "epoch": 0.16, "learning_rate": 0.0005656961878585813, "loss": 3.7955, "step": 9305 }, { "epoch": 0.16, "learning_rate": 0.0005656579784344074, "loss": 3.7235, "step": 9310 }, { "epoch": 0.16, "learning_rate": 0.0005656197490341149, "loss": 3.8506, "step": 9315 }, { "epoch": 0.16, "learning_rate": 0.0005655814996605785, "loss": 3.844, "step": 9320 }, { "epoch": 0.16, "learning_rate": 0.0005655432303166744, "loss": 3.7618, "step": 9325 }, { "epoch": 0.16, "learning_rate": 0.0005655049410052804, "loss": 3.7935, "step": 9330 }, { "epoch": 0.16, "learning_rate": 0.0005654666317292754, "loss": 3.9014, "step": 9335 }, { "epoch": 0.16, "learning_rate": 0.0005654283024915403, "loss": 3.829, "step": 9340 }, { "epoch": 0.16, "learning_rate": 0.0005653899532949571, "loss": 3.7477, "step": 9345 }, { "epoch": 0.16, "learning_rate": 0.0005653515841424095, "loss": 3.9187, "step": 9350 }, { "epoch": 0.16, "learning_rate": 0.0005653131950367826, "loss": 3.8836, "step": 9355 }, { "epoch": 0.16, "learning_rate": 0.0005652747859809632, "loss": 3.8617, "step": 9360 }, { "epoch": 0.16, "learning_rate": 0.0005652363569778395, "loss": 3.8054, "step": 9365 }, { "epoch": 0.16, "learning_rate": 0.0005651979080303009, "loss": 3.8272, "step": 9370 }, { "epoch": 0.16, "learning_rate": 0.000565159439141239, "loss": 3.8475, "step": 9375 }, { "epoch": 0.16, "learning_rate": 0.000565120950313546, "loss": 3.823, "step": 9380 }, { "epoch": 0.16, "learning_rate": 0.0005650824415501163, "loss": 3.9115, "step": 9385 }, { "epoch": 0.16, "learning_rate": 0.0005650439128538455, "loss": 3.837, "step": 9390 }, { "epoch": 0.16, "learning_rate": 0.0005650053642276307, "loss": 3.7632, "step": 9395 }, { "epoch": 0.16, "learning_rate": 0.0005649667956743708, "loss": 3.7894, "step": 9400 }, { "epoch": 0.16, "learning_rate": 0.0005649282071969658, "loss": 3.8228, "step": 9405 }, { "epoch": 0.16, "learning_rate": 0.0005648895987983172, "loss": 3.8639, "step": 9410 }, { "epoch": 0.16, "learning_rate": 0.0005648509704813284, "loss": 3.8556, "step": 9415 }, { "epoch": 0.16, "learning_rate": 0.000564812322248904, "loss": 3.856, "step": 9420 }, { "epoch": 0.16, "learning_rate": 0.00056477365410395, "loss": 3.8072, "step": 9425 }, { "epoch": 0.16, "learning_rate": 0.0005647349660493743, "loss": 3.8082, "step": 9430 }, { "epoch": 0.16, "learning_rate": 0.0005646962580880858, "loss": 3.7869, "step": 9435 }, { "epoch": 0.16, "learning_rate": 0.0005646575302229953, "loss": 3.8706, "step": 9440 }, { "epoch": 0.16, "learning_rate": 0.0005646187824570148, "loss": 3.7245, "step": 9445 }, { "epoch": 0.16, "learning_rate": 0.000564580014793058, "loss": 3.7688, "step": 9450 }, { "epoch": 0.16, "learning_rate": 0.0005645412272340401, "loss": 3.7546, "step": 9455 }, { "epoch": 0.16, "learning_rate": 0.0005645024197828777, "loss": 3.7613, "step": 9460 }, { "epoch": 0.16, "learning_rate": 0.0005644635924424888, "loss": 3.7864, "step": 9465 }, { "epoch": 0.16, "learning_rate": 0.0005644247452157932, "loss": 3.8333, "step": 9470 }, { "epoch": 0.16, "learning_rate": 0.0005643858781057118, "loss": 3.8384, "step": 9475 }, { "epoch": 0.16, "learning_rate": 0.0005643469911151673, "loss": 3.8734, "step": 9480 }, { "epoch": 0.16, "learning_rate": 0.0005643080842470839, "loss": 3.8705, "step": 9485 }, { "epoch": 0.16, "learning_rate": 0.0005642691575043872, "loss": 3.7985, "step": 9490 }, { "epoch": 0.16, "learning_rate": 0.0005642302108900041, "loss": 3.8142, "step": 9495 }, { "epoch": 0.16, "learning_rate": 0.0005641912444068634, "loss": 3.8861, "step": 9500 }, { "epoch": 0.16, "eval_loss": 3.8443562984466553, "eval_runtime": 150.875, "eval_samples_per_second": 12.202, "eval_steps_per_second": 0.769, "step": 9500 }, { "epoch": 0.16, "learning_rate": 0.0005641522580578949, "loss": 3.8243, "step": 9505 }, { "epoch": 0.16, "learning_rate": 0.0005641132518460305, "loss": 3.817, "step": 9510 }, { "epoch": 0.16, "learning_rate": 0.0005640742257742031, "loss": 3.7974, "step": 9515 }, { "epoch": 0.16, "learning_rate": 0.0005640351798453472, "loss": 3.8722, "step": 9520 }, { "epoch": 0.16, "learning_rate": 0.000563996114062399, "loss": 3.8327, "step": 9525 }, { "epoch": 0.16, "learning_rate": 0.000563957028428296, "loss": 3.7176, "step": 9530 }, { "epoch": 0.17, "learning_rate": 0.0005639179229459772, "loss": 3.8109, "step": 9535 }, { "epoch": 0.17, "learning_rate": 0.0005638787976183831, "loss": 3.7644, "step": 9540 }, { "epoch": 0.17, "learning_rate": 0.0005638396524484558, "loss": 3.8023, "step": 9545 }, { "epoch": 0.17, "learning_rate": 0.0005638004874391389, "loss": 3.7181, "step": 9550 }, { "epoch": 0.17, "learning_rate": 0.0005637613025933772, "loss": 3.8218, "step": 9555 }, { "epoch": 0.17, "learning_rate": 0.0005637220979141174, "loss": 3.821, "step": 9560 }, { "epoch": 0.17, "learning_rate": 0.0005636828734043072, "loss": 3.7989, "step": 9565 }, { "epoch": 0.17, "learning_rate": 0.0005636436290668965, "loss": 3.8229, "step": 9570 }, { "epoch": 0.17, "learning_rate": 0.0005636043649048359, "loss": 3.7486, "step": 9575 }, { "epoch": 0.17, "learning_rate": 0.0005635650809210781, "loss": 3.7537, "step": 9580 }, { "epoch": 0.17, "learning_rate": 0.0005635257771185768, "loss": 3.8646, "step": 9585 }, { "epoch": 0.17, "learning_rate": 0.0005634864535002878, "loss": 3.8951, "step": 9590 }, { "epoch": 0.17, "learning_rate": 0.0005634471100691678, "loss": 3.8565, "step": 9595 }, { "epoch": 0.17, "learning_rate": 0.0005634077468281752, "loss": 3.865, "step": 9600 }, { "epoch": 0.17, "learning_rate": 0.00056336836378027, "loss": 3.7137, "step": 9605 }, { "epoch": 0.17, "learning_rate": 0.0005633289609284135, "loss": 3.875, "step": 9610 }, { "epoch": 0.17, "learning_rate": 0.0005632895382755689, "loss": 3.7517, "step": 9615 }, { "epoch": 0.17, "learning_rate": 0.0005632500958247002, "loss": 3.8388, "step": 9620 }, { "epoch": 0.17, "learning_rate": 0.0005632106335787734, "loss": 3.7177, "step": 9625 }, { "epoch": 0.17, "learning_rate": 0.0005631711515407561, "loss": 3.9332, "step": 9630 }, { "epoch": 0.17, "learning_rate": 0.0005631316497136167, "loss": 3.7879, "step": 9635 }, { "epoch": 0.17, "learning_rate": 0.0005630921281003259, "loss": 3.8254, "step": 9640 }, { "epoch": 0.17, "learning_rate": 0.0005630525867038553, "loss": 3.7538, "step": 9645 }, { "epoch": 0.17, "learning_rate": 0.0005630130255271784, "loss": 3.8595, "step": 9650 }, { "epoch": 0.17, "learning_rate": 0.0005629734445732698, "loss": 3.8395, "step": 9655 }, { "epoch": 0.17, "learning_rate": 0.0005629338438451059, "loss": 3.8212, "step": 9660 }, { "epoch": 0.17, "learning_rate": 0.0005628942233456645, "loss": 3.6969, "step": 9665 }, { "epoch": 0.17, "learning_rate": 0.0005628545830779248, "loss": 3.8934, "step": 9670 }, { "epoch": 0.17, "learning_rate": 0.0005628149230448677, "loss": 3.784, "step": 9675 }, { "epoch": 0.17, "learning_rate": 0.0005627752432494751, "loss": 3.7458, "step": 9680 }, { "epoch": 0.17, "learning_rate": 0.0005627355436947309, "loss": 3.7764, "step": 9685 }, { "epoch": 0.17, "learning_rate": 0.0005626958243836205, "loss": 3.8557, "step": 9690 }, { "epoch": 0.17, "learning_rate": 0.0005626560853191302, "loss": 3.7871, "step": 9695 }, { "epoch": 0.17, "learning_rate": 0.0005626163265042485, "loss": 3.8723, "step": 9700 }, { "epoch": 0.17, "learning_rate": 0.0005625765479419649, "loss": 3.7628, "step": 9705 }, { "epoch": 0.17, "learning_rate": 0.0005625367496352706, "loss": 3.7876, "step": 9710 }, { "epoch": 0.17, "learning_rate": 0.0005624969315871581, "loss": 3.8288, "step": 9715 }, { "epoch": 0.17, "learning_rate": 0.0005624570938006217, "loss": 3.7054, "step": 9720 }, { "epoch": 0.17, "learning_rate": 0.000562417236278657, "loss": 3.8978, "step": 9725 }, { "epoch": 0.17, "learning_rate": 0.0005623773590242608, "loss": 3.7314, "step": 9730 }, { "epoch": 0.17, "learning_rate": 0.000562337462040432, "loss": 3.844, "step": 9735 }, { "epoch": 0.17, "learning_rate": 0.0005622975453301706, "loss": 3.8081, "step": 9740 }, { "epoch": 0.17, "learning_rate": 0.0005622576088964779, "loss": 3.7213, "step": 9745 }, { "epoch": 0.17, "learning_rate": 0.0005622176527423571, "loss": 3.7476, "step": 9750 }, { "epoch": 0.17, "learning_rate": 0.0005621776768708126, "loss": 3.8561, "step": 9755 }, { "epoch": 0.17, "learning_rate": 0.0005621376812848505, "loss": 3.8914, "step": 9760 }, { "epoch": 0.17, "learning_rate": 0.0005620976659874782, "loss": 3.706, "step": 9765 }, { "epoch": 0.17, "learning_rate": 0.0005620576309817046, "loss": 3.7814, "step": 9770 }, { "epoch": 0.17, "learning_rate": 0.0005620175762705402, "loss": 3.7574, "step": 9775 }, { "epoch": 0.17, "learning_rate": 0.000561977501856997, "loss": 3.8686, "step": 9780 }, { "epoch": 0.17, "learning_rate": 0.0005619374077440882, "loss": 3.8096, "step": 9785 }, { "epoch": 0.17, "learning_rate": 0.0005618972939348286, "loss": 3.8072, "step": 9790 }, { "epoch": 0.17, "learning_rate": 0.0005618571604322349, "loss": 3.8532, "step": 9795 }, { "epoch": 0.17, "learning_rate": 0.0005618170072393246, "loss": 3.7328, "step": 9800 }, { "epoch": 0.17, "learning_rate": 0.0005617768343591172, "loss": 3.8532, "step": 9805 }, { "epoch": 0.17, "learning_rate": 0.0005617366417946335, "loss": 3.8652, "step": 9810 }, { "epoch": 0.17, "learning_rate": 0.0005616964295488957, "loss": 3.7735, "step": 9815 }, { "epoch": 0.17, "learning_rate": 0.0005616561976249275, "loss": 3.7548, "step": 9820 }, { "epoch": 0.17, "learning_rate": 0.0005616159460257542, "loss": 3.764, "step": 9825 }, { "epoch": 0.17, "learning_rate": 0.0005615756747544026, "loss": 3.7759, "step": 9830 }, { "epoch": 0.17, "learning_rate": 0.0005615353838139006, "loss": 3.7218, "step": 9835 }, { "epoch": 0.17, "learning_rate": 0.0005614950732072784, "loss": 3.8552, "step": 9840 }, { "epoch": 0.17, "learning_rate": 0.0005614547429375667, "loss": 3.7065, "step": 9845 }, { "epoch": 0.17, "learning_rate": 0.0005614143930077982, "loss": 3.8019, "step": 9850 }, { "epoch": 0.17, "learning_rate": 0.0005613740234210072, "loss": 3.7688, "step": 9855 }, { "epoch": 0.17, "learning_rate": 0.0005613336341802291, "loss": 3.7568, "step": 9860 }, { "epoch": 0.17, "learning_rate": 0.000561293225288501, "loss": 3.7632, "step": 9865 }, { "epoch": 0.17, "learning_rate": 0.0005612527967488614, "loss": 3.6562, "step": 9870 }, { "epoch": 0.17, "learning_rate": 0.0005612123485643505, "loss": 3.8351, "step": 9875 }, { "epoch": 0.17, "learning_rate": 0.0005611718807380096, "loss": 3.8006, "step": 9880 }, { "epoch": 0.17, "learning_rate": 0.0005611313932728818, "loss": 3.6692, "step": 9885 }, { "epoch": 0.17, "learning_rate": 0.0005610908861720114, "loss": 3.7825, "step": 9890 }, { "epoch": 0.17, "learning_rate": 0.0005610503594384446, "loss": 3.7926, "step": 9895 }, { "epoch": 0.17, "learning_rate": 0.0005610098130752286, "loss": 3.8703, "step": 9900 }, { "epoch": 0.17, "learning_rate": 0.0005609692470854122, "loss": 3.7963, "step": 9905 }, { "epoch": 0.17, "learning_rate": 0.0005609286614720459, "loss": 3.8204, "step": 9910 }, { "epoch": 0.17, "learning_rate": 0.0005608880562381815, "loss": 3.8013, "step": 9915 }, { "epoch": 0.17, "learning_rate": 0.0005608474313868724, "loss": 3.7682, "step": 9920 }, { "epoch": 0.17, "learning_rate": 0.0005608067869211733, "loss": 3.7958, "step": 9925 }, { "epoch": 0.17, "learning_rate": 0.0005607661228441402, "loss": 3.7584, "step": 9930 }, { "epoch": 0.17, "learning_rate": 0.0005607254391588312, "loss": 3.8284, "step": 9935 }, { "epoch": 0.17, "learning_rate": 0.0005606847358683054, "loss": 3.7367, "step": 9940 }, { "epoch": 0.17, "learning_rate": 0.0005606440129756234, "loss": 3.7325, "step": 9945 }, { "epoch": 0.17, "learning_rate": 0.0005606032704838475, "loss": 3.6778, "step": 9950 }, { "epoch": 0.17, "learning_rate": 0.0005605625083960412, "loss": 3.7351, "step": 9955 }, { "epoch": 0.17, "learning_rate": 0.0005605217267152696, "loss": 3.812, "step": 9960 }, { "epoch": 0.17, "learning_rate": 0.0005604809254445992, "loss": 3.7126, "step": 9965 }, { "epoch": 0.17, "learning_rate": 0.0005604401045870983, "loss": 3.8023, "step": 9970 }, { "epoch": 0.17, "learning_rate": 0.0005603992641458362, "loss": 3.7676, "step": 9975 }, { "epoch": 0.17, "learning_rate": 0.000560358404123884, "loss": 3.8314, "step": 9980 }, { "epoch": 0.17, "learning_rate": 0.0005603175245243141, "loss": 3.7735, "step": 9985 }, { "epoch": 0.17, "learning_rate": 0.0005602766253502005, "loss": 3.752, "step": 9990 }, { "epoch": 0.17, "learning_rate": 0.0005602357066046185, "loss": 3.8366, "step": 9995 }, { "epoch": 0.17, "learning_rate": 0.0005601947682906452, "loss": 3.7659, "step": 10000 }, { "epoch": 0.17, "eval_loss": 3.824028253555298, "eval_runtime": 150.1823, "eval_samples_per_second": 12.258, "eval_steps_per_second": 0.772, "step": 10000 }, { "epoch": 0.17, "learning_rate": 0.0005601538104113585, "loss": 3.7734, "step": 10005 }, { "epoch": 0.17, "learning_rate": 0.0005601128329698388, "loss": 3.7532, "step": 10010 }, { "epoch": 0.17, "learning_rate": 0.000560071835969167, "loss": 3.828, "step": 10015 }, { "epoch": 0.17, "learning_rate": 0.000560030819412426, "loss": 3.8327, "step": 10020 }, { "epoch": 0.17, "learning_rate": 0.0005599897833027, "loss": 3.9457, "step": 10025 }, { "epoch": 0.17, "learning_rate": 0.0005599487276430749, "loss": 3.7388, "step": 10030 }, { "epoch": 0.17, "learning_rate": 0.0005599076524366375, "loss": 3.8464, "step": 10035 }, { "epoch": 0.17, "learning_rate": 0.0005598665576864768, "loss": 3.7601, "step": 10040 }, { "epoch": 0.17, "learning_rate": 0.0005598254433956826, "loss": 3.7211, "step": 10045 }, { "epoch": 0.17, "learning_rate": 0.0005597843095673466, "loss": 3.7223, "step": 10050 }, { "epoch": 0.17, "learning_rate": 0.000559743156204562, "loss": 3.6862, "step": 10055 }, { "epoch": 0.17, "learning_rate": 0.0005597019833104232, "loss": 3.8612, "step": 10060 }, { "epoch": 0.17, "learning_rate": 0.0005596607908880263, "loss": 3.7786, "step": 10065 }, { "epoch": 0.17, "learning_rate": 0.0005596195789404685, "loss": 3.9116, "step": 10070 }, { "epoch": 0.17, "learning_rate": 0.000559578347470849, "loss": 3.8694, "step": 10075 }, { "epoch": 0.17, "learning_rate": 0.0005595370964822681, "loss": 3.7191, "step": 10080 }, { "epoch": 0.17, "learning_rate": 0.0005594958259778276, "loss": 3.8221, "step": 10085 }, { "epoch": 0.17, "learning_rate": 0.0005594545359606308, "loss": 3.8228, "step": 10090 }, { "epoch": 0.17, "learning_rate": 0.0005594132264337827, "loss": 3.8175, "step": 10095 }, { "epoch": 0.17, "learning_rate": 0.0005593718974003895, "loss": 3.7765, "step": 10100 }, { "epoch": 0.17, "learning_rate": 0.0005593305488635587, "loss": 3.7567, "step": 10105 }, { "epoch": 0.17, "learning_rate": 0.0005592891808263998, "loss": 3.8263, "step": 10110 }, { "epoch": 0.18, "learning_rate": 0.0005592477932920232, "loss": 3.664, "step": 10115 }, { "epoch": 0.18, "learning_rate": 0.0005592063862635414, "loss": 3.873, "step": 10120 }, { "epoch": 0.18, "learning_rate": 0.0005591649597440675, "loss": 3.7406, "step": 10125 }, { "epoch": 0.18, "learning_rate": 0.000559123513736717, "loss": 3.7542, "step": 10130 }, { "epoch": 0.18, "learning_rate": 0.0005590820482446063, "loss": 3.8525, "step": 10135 }, { "epoch": 0.18, "learning_rate": 0.0005590405632708533, "loss": 3.7978, "step": 10140 }, { "epoch": 0.18, "learning_rate": 0.0005589990588185773, "loss": 3.7052, "step": 10145 }, { "epoch": 0.18, "learning_rate": 0.0005589575348908996, "loss": 3.8114, "step": 10150 }, { "epoch": 0.18, "learning_rate": 0.0005589159914909425, "loss": 3.7365, "step": 10155 }, { "epoch": 0.18, "learning_rate": 0.0005588744286218297, "loss": 3.7698, "step": 10160 }, { "epoch": 0.18, "learning_rate": 0.0005588328462866864, "loss": 3.7538, "step": 10165 }, { "epoch": 0.18, "learning_rate": 0.0005587912444886397, "loss": 3.7885, "step": 10170 }, { "epoch": 0.18, "learning_rate": 0.0005587496232308177, "loss": 3.761, "step": 10175 }, { "epoch": 0.18, "learning_rate": 0.0005587079825163502, "loss": 3.7494, "step": 10180 }, { "epoch": 0.18, "learning_rate": 0.0005586663223483681, "loss": 3.7903, "step": 10185 }, { "epoch": 0.18, "learning_rate": 0.0005586246427300043, "loss": 3.7783, "step": 10190 }, { "epoch": 0.18, "learning_rate": 0.0005585829436643928, "loss": 3.7313, "step": 10195 }, { "epoch": 0.18, "learning_rate": 0.0005585412251546691, "loss": 3.8423, "step": 10200 }, { "epoch": 0.18, "learning_rate": 0.0005584994872039704, "loss": 3.8002, "step": 10205 }, { "epoch": 0.18, "learning_rate": 0.0005584577298154349, "loss": 3.7193, "step": 10210 }, { "epoch": 0.18, "learning_rate": 0.0005584159529922028, "loss": 3.7501, "step": 10215 }, { "epoch": 0.18, "learning_rate": 0.0005583741567374153, "loss": 3.8655, "step": 10220 }, { "epoch": 0.18, "learning_rate": 0.0005583323410542154, "loss": 3.7713, "step": 10225 }, { "epoch": 0.18, "learning_rate": 0.0005582905059457473, "loss": 3.7608, "step": 10230 }, { "epoch": 0.18, "learning_rate": 0.000558248651415157, "loss": 3.7848, "step": 10235 }, { "epoch": 0.18, "learning_rate": 0.0005582067774655916, "loss": 3.8751, "step": 10240 }, { "epoch": 0.18, "learning_rate": 0.0005581648841001997, "loss": 3.8157, "step": 10245 }, { "epoch": 0.18, "learning_rate": 0.0005581229713221318, "loss": 3.7949, "step": 10250 }, { "epoch": 0.18, "learning_rate": 0.0005580810391345391, "loss": 3.7647, "step": 10255 }, { "epoch": 0.18, "learning_rate": 0.000558039087540575, "loss": 3.7398, "step": 10260 }, { "epoch": 0.18, "learning_rate": 0.0005579971165433939, "loss": 3.7206, "step": 10265 }, { "epoch": 0.18, "learning_rate": 0.000557955126146152, "loss": 3.9106, "step": 10270 }, { "epoch": 0.18, "learning_rate": 0.0005579131163520064, "loss": 3.7303, "step": 10275 }, { "epoch": 0.18, "learning_rate": 0.0005578710871641165, "loss": 3.6985, "step": 10280 }, { "epoch": 0.18, "learning_rate": 0.0005578290385856421, "loss": 3.7783, "step": 10285 }, { "epoch": 0.18, "learning_rate": 0.0005577869706197456, "loss": 3.7213, "step": 10290 }, { "epoch": 0.18, "learning_rate": 0.00055774488326959, "loss": 3.8223, "step": 10295 }, { "epoch": 0.18, "learning_rate": 0.00055770277653834, "loss": 3.8479, "step": 10300 }, { "epoch": 0.18, "learning_rate": 0.000557660650429162, "loss": 3.7619, "step": 10305 }, { "epoch": 0.18, "learning_rate": 0.0005576185049452235, "loss": 3.7465, "step": 10310 }, { "epoch": 0.18, "learning_rate": 0.0005575763400896938, "loss": 3.8019, "step": 10315 }, { "epoch": 0.18, "learning_rate": 0.0005575341558657433, "loss": 3.7722, "step": 10320 }, { "epoch": 0.18, "learning_rate": 0.0005574919522765441, "loss": 3.7072, "step": 10325 }, { "epoch": 0.18, "learning_rate": 0.0005574497293252698, "loss": 3.8018, "step": 10330 }, { "epoch": 0.18, "learning_rate": 0.0005574074870150952, "loss": 3.7442, "step": 10335 }, { "epoch": 0.18, "learning_rate": 0.0005573652253491968, "loss": 3.7484, "step": 10340 }, { "epoch": 0.18, "learning_rate": 0.0005573229443307524, "loss": 3.7656, "step": 10345 }, { "epoch": 0.18, "learning_rate": 0.0005572806439629414, "loss": 3.7769, "step": 10350 }, { "epoch": 0.18, "learning_rate": 0.0005572383242489445, "loss": 3.7266, "step": 10355 }, { "epoch": 0.18, "learning_rate": 0.0005571959851919439, "loss": 3.8326, "step": 10360 }, { "epoch": 0.18, "learning_rate": 0.0005571536267951234, "loss": 3.7286, "step": 10365 }, { "epoch": 0.18, "learning_rate": 0.000557111249061668, "loss": 3.7353, "step": 10370 }, { "epoch": 0.18, "learning_rate": 0.0005570688519947643, "loss": 3.7861, "step": 10375 }, { "epoch": 0.18, "learning_rate": 0.0005570264355976005, "loss": 3.746, "step": 10380 }, { "epoch": 0.18, "learning_rate": 0.0005569839998733659, "loss": 3.8178, "step": 10385 }, { "epoch": 0.18, "learning_rate": 0.0005569415448252516, "loss": 3.7118, "step": 10390 }, { "epoch": 0.18, "learning_rate": 0.0005568990704564498, "loss": 3.8499, "step": 10395 }, { "epoch": 0.18, "learning_rate": 0.0005568565767701546, "loss": 3.7467, "step": 10400 }, { "epoch": 0.18, "learning_rate": 0.0005568140637695611, "loss": 3.7022, "step": 10405 }, { "epoch": 0.18, "learning_rate": 0.0005567715314578664, "loss": 3.7888, "step": 10410 }, { "epoch": 0.18, "learning_rate": 0.0005567289798382682, "loss": 3.661, "step": 10415 }, { "epoch": 0.18, "learning_rate": 0.0005566864089139665, "loss": 3.7638, "step": 10420 }, { "epoch": 0.18, "learning_rate": 0.0005566438186881624, "loss": 3.7379, "step": 10425 }, { "epoch": 0.18, "learning_rate": 0.0005566012091640585, "loss": 3.7724, "step": 10430 }, { "epoch": 0.18, "learning_rate": 0.0005565585803448585, "loss": 3.8414, "step": 10435 }, { "epoch": 0.18, "learning_rate": 0.0005565159322337682, "loss": 3.7841, "step": 10440 }, { "epoch": 0.18, "learning_rate": 0.0005564732648339945, "loss": 3.7352, "step": 10445 }, { "epoch": 0.18, "learning_rate": 0.0005564305781487456, "loss": 3.796, "step": 10450 }, { "epoch": 0.18, "learning_rate": 0.0005563878721812314, "loss": 3.681, "step": 10455 }, { "epoch": 0.18, "learning_rate": 0.0005563451469346632, "loss": 3.6562, "step": 10460 }, { "epoch": 0.18, "learning_rate": 0.0005563024024122537, "loss": 3.7886, "step": 10465 }, { "epoch": 0.18, "learning_rate": 0.0005562596386172172, "loss": 3.7527, "step": 10470 }, { "epoch": 0.18, "learning_rate": 0.0005562168555527689, "loss": 3.7763, "step": 10475 }, { "epoch": 0.18, "learning_rate": 0.0005561740532221265, "loss": 3.7259, "step": 10480 }, { "epoch": 0.18, "learning_rate": 0.0005561312316285079, "loss": 3.7692, "step": 10485 }, { "epoch": 0.18, "learning_rate": 0.0005560883907751334, "loss": 3.753, "step": 10490 }, { "epoch": 0.18, "learning_rate": 0.0005560455306652243, "loss": 3.7146, "step": 10495 }, { "epoch": 0.18, "learning_rate": 0.0005560026513020036, "loss": 3.7457, "step": 10500 }, { "epoch": 0.18, "eval_loss": 3.808900833129883, "eval_runtime": 150.2733, "eval_samples_per_second": 12.251, "eval_steps_per_second": 0.772, "step": 10500 }, { "epoch": 0.18, "learning_rate": 0.0005559597526886955, "loss": 3.7659, "step": 10505 }, { "epoch": 0.18, "learning_rate": 0.0005559168348285258, "loss": 3.7797, "step": 10510 }, { "epoch": 0.18, "learning_rate": 0.0005558738977247216, "loss": 3.7639, "step": 10515 }, { "epoch": 0.18, "learning_rate": 0.0005558309413805117, "loss": 3.7582, "step": 10520 }, { "epoch": 0.18, "learning_rate": 0.0005557879657991261, "loss": 3.7758, "step": 10525 }, { "epoch": 0.18, "learning_rate": 0.0005557449709837963, "loss": 3.6406, "step": 10530 }, { "epoch": 0.18, "learning_rate": 0.0005557019569377555, "loss": 3.7329, "step": 10535 }, { "epoch": 0.18, "learning_rate": 0.0005556589236642378, "loss": 3.762, "step": 10540 }, { "epoch": 0.18, "learning_rate": 0.0005556158711664793, "loss": 3.7226, "step": 10545 }, { "epoch": 0.18, "learning_rate": 0.0005555727994477175, "loss": 3.7183, "step": 10550 }, { "epoch": 0.18, "learning_rate": 0.0005555297085111907, "loss": 3.7314, "step": 10555 }, { "epoch": 0.18, "learning_rate": 0.0005554865983601396, "loss": 3.829, "step": 10560 }, { "epoch": 0.18, "learning_rate": 0.0005554434689978056, "loss": 3.7891, "step": 10565 }, { "epoch": 0.18, "learning_rate": 0.0005554003204274318, "loss": 3.765, "step": 10570 }, { "epoch": 0.18, "learning_rate": 0.0005553571526522628, "loss": 3.7701, "step": 10575 }, { "epoch": 0.18, "learning_rate": 0.0005553139656755446, "loss": 3.7329, "step": 10580 }, { "epoch": 0.18, "learning_rate": 0.0005552707595005247, "loss": 3.8377, "step": 10585 }, { "epoch": 0.18, "learning_rate": 0.000555227534130452, "loss": 3.6951, "step": 10590 }, { "epoch": 0.18, "learning_rate": 0.0005551842895685766, "loss": 3.7439, "step": 10595 }, { "epoch": 0.18, "learning_rate": 0.0005551410258181506, "loss": 3.764, "step": 10600 }, { "epoch": 0.18, "learning_rate": 0.0005550977428824269, "loss": 3.7233, "step": 10605 }, { "epoch": 0.18, "learning_rate": 0.0005550544407646603, "loss": 3.8349, "step": 10610 }, { "epoch": 0.18, "learning_rate": 0.000555011119468107, "loss": 3.606, "step": 10615 }, { "epoch": 0.18, "learning_rate": 0.0005549677789960242, "loss": 3.7585, "step": 10620 }, { "epoch": 0.18, "learning_rate": 0.0005549244193516713, "loss": 3.6967, "step": 10625 }, { "epoch": 0.18, "learning_rate": 0.0005548810405383084, "loss": 3.7009, "step": 10630 }, { "epoch": 0.18, "learning_rate": 0.0005548376425591976, "loss": 3.7488, "step": 10635 }, { "epoch": 0.18, "learning_rate": 0.0005547942254176021, "loss": 3.8379, "step": 10640 }, { "epoch": 0.18, "learning_rate": 0.0005547507891167866, "loss": 3.6371, "step": 10645 }, { "epoch": 0.18, "learning_rate": 0.0005547073336600174, "loss": 3.7803, "step": 10650 }, { "epoch": 0.18, "learning_rate": 0.000554663859050562, "loss": 3.7893, "step": 10655 }, { "epoch": 0.18, "learning_rate": 0.0005546203652916895, "loss": 3.7123, "step": 10660 }, { "epoch": 0.18, "learning_rate": 0.0005545768523866706, "loss": 3.7559, "step": 10665 }, { "epoch": 0.18, "learning_rate": 0.0005545333203387768, "loss": 3.7527, "step": 10670 }, { "epoch": 0.18, "learning_rate": 0.0005544897691512821, "loss": 3.747, "step": 10675 }, { "epoch": 0.18, "learning_rate": 0.0005544461988274608, "loss": 3.7641, "step": 10680 }, { "epoch": 0.18, "learning_rate": 0.0005544026093705894, "loss": 3.8162, "step": 10685 }, { "epoch": 0.19, "learning_rate": 0.0005543590007839455, "loss": 3.8019, "step": 10690 }, { "epoch": 0.19, "learning_rate": 0.0005543153730708084, "loss": 3.8212, "step": 10695 }, { "epoch": 0.19, "learning_rate": 0.0005542717262344586, "loss": 3.6983, "step": 10700 }, { "epoch": 0.19, "learning_rate": 0.0005542280602781782, "loss": 3.7003, "step": 10705 }, { "epoch": 0.19, "learning_rate": 0.0005541843752052504, "loss": 3.8489, "step": 10710 }, { "epoch": 0.19, "learning_rate": 0.0005541406710189603, "loss": 3.7563, "step": 10715 }, { "epoch": 0.19, "learning_rate": 0.0005540969477225942, "loss": 3.8294, "step": 10720 }, { "epoch": 0.19, "learning_rate": 0.0005540532053194399, "loss": 3.786, "step": 10725 }, { "epoch": 0.19, "learning_rate": 0.0005540094438127865, "loss": 3.7697, "step": 10730 }, { "epoch": 0.19, "learning_rate": 0.0005539656632059247, "loss": 3.7351, "step": 10735 }, { "epoch": 0.19, "learning_rate": 0.0005539218635021466, "loss": 3.8407, "step": 10740 }, { "epoch": 0.19, "learning_rate": 0.0005538780447047457, "loss": 3.7196, "step": 10745 }, { "epoch": 0.19, "learning_rate": 0.0005538342068170169, "loss": 3.6508, "step": 10750 }, { "epoch": 0.19, "learning_rate": 0.0005537903498422567, "loss": 3.7522, "step": 10755 }, { "epoch": 0.19, "learning_rate": 0.0005537464737837628, "loss": 3.6545, "step": 10760 }, { "epoch": 0.19, "learning_rate": 0.0005537025786448346, "loss": 3.7622, "step": 10765 }, { "epoch": 0.19, "learning_rate": 0.0005536586644287726, "loss": 3.7217, "step": 10770 }, { "epoch": 0.19, "learning_rate": 0.000553614731138879, "loss": 3.6806, "step": 10775 }, { "epoch": 0.19, "learning_rate": 0.0005535707787784574, "loss": 3.8162, "step": 10780 }, { "epoch": 0.19, "learning_rate": 0.0005535268073508128, "loss": 3.7956, "step": 10785 }, { "epoch": 0.19, "learning_rate": 0.0005534828168592515, "loss": 3.7212, "step": 10790 }, { "epoch": 0.19, "learning_rate": 0.0005534388073070815, "loss": 3.8734, "step": 10795 }, { "epoch": 0.19, "learning_rate": 0.0005533947786976121, "loss": 3.7601, "step": 10800 }, { "epoch": 0.19, "learning_rate": 0.0005533507310341539, "loss": 3.7531, "step": 10805 }, { "epoch": 0.19, "learning_rate": 0.0005533066643200191, "loss": 3.8357, "step": 10810 }, { "epoch": 0.19, "learning_rate": 0.0005532625785585214, "loss": 3.7786, "step": 10815 }, { "epoch": 0.19, "learning_rate": 0.0005532184737529757, "loss": 3.8007, "step": 10820 }, { "epoch": 0.19, "learning_rate": 0.0005531743499066985, "loss": 3.8569, "step": 10825 }, { "epoch": 0.19, "learning_rate": 0.0005531302070230077, "loss": 3.729, "step": 10830 }, { "epoch": 0.19, "learning_rate": 0.0005530860451052226, "loss": 3.7075, "step": 10835 }, { "epoch": 0.19, "learning_rate": 0.0005530418641566639, "loss": 3.8074, "step": 10840 }, { "epoch": 0.19, "learning_rate": 0.0005529976641806539, "loss": 3.7291, "step": 10845 }, { "epoch": 0.19, "learning_rate": 0.0005529534451805162, "loss": 3.8529, "step": 10850 }, { "epoch": 0.19, "learning_rate": 0.0005529092071595756, "loss": 3.7, "step": 10855 }, { "epoch": 0.19, "learning_rate": 0.000552864950121159, "loss": 3.65, "step": 10860 }, { "epoch": 0.19, "learning_rate": 0.0005528206740685938, "loss": 3.729, "step": 10865 }, { "epoch": 0.19, "learning_rate": 0.0005527763790052099, "loss": 3.822, "step": 10870 }, { "epoch": 0.19, "learning_rate": 0.0005527320649343375, "loss": 3.7941, "step": 10875 }, { "epoch": 0.19, "learning_rate": 0.0005526877318593092, "loss": 3.7147, "step": 10880 }, { "epoch": 0.19, "learning_rate": 0.0005526433797834584, "loss": 3.6646, "step": 10885 }, { "epoch": 0.19, "learning_rate": 0.0005525990087101202, "loss": 3.827, "step": 10890 }, { "epoch": 0.19, "learning_rate": 0.0005525546186426311, "loss": 3.7701, "step": 10895 }, { "epoch": 0.19, "learning_rate": 0.000552510209584329, "loss": 3.6971, "step": 10900 }, { "epoch": 0.19, "learning_rate": 0.0005524657815385532, "loss": 3.712, "step": 10905 }, { "epoch": 0.19, "learning_rate": 0.0005524213345086446, "loss": 3.7302, "step": 10910 }, { "epoch": 0.19, "learning_rate": 0.0005523768684979453, "loss": 3.7991, "step": 10915 }, { "epoch": 0.19, "learning_rate": 0.0005523323835097988, "loss": 3.8046, "step": 10920 }, { "epoch": 0.19, "learning_rate": 0.0005522878795475503, "loss": 3.746, "step": 10925 }, { "epoch": 0.19, "learning_rate": 0.0005522433566145462, "loss": 3.6934, "step": 10930 }, { "epoch": 0.19, "learning_rate": 0.0005521988147141343, "loss": 3.7359, "step": 10935 }, { "epoch": 0.19, "learning_rate": 0.0005521542538496642, "loss": 3.6942, "step": 10940 }, { "epoch": 0.19, "learning_rate": 0.0005521096740244864, "loss": 3.6852, "step": 10945 }, { "epoch": 0.19, "learning_rate": 0.0005520650752419531, "loss": 3.7642, "step": 10950 }, { "epoch": 0.19, "learning_rate": 0.0005520204575054182, "loss": 3.7516, "step": 10955 }, { "epoch": 0.19, "learning_rate": 0.0005519758208182362, "loss": 3.8863, "step": 10960 }, { "epoch": 0.19, "learning_rate": 0.0005519311651837639, "loss": 3.8518, "step": 10965 }, { "epoch": 0.19, "learning_rate": 0.0005518864906053592, "loss": 3.6166, "step": 10970 }, { "epoch": 0.19, "learning_rate": 0.0005518417970863812, "loss": 3.6787, "step": 10975 }, { "epoch": 0.19, "learning_rate": 0.0005517970846301908, "loss": 3.6608, "step": 10980 }, { "epoch": 0.19, "learning_rate": 0.00055175235324015, "loss": 3.7437, "step": 10985 }, { "epoch": 0.19, "learning_rate": 0.0005517076029196225, "loss": 3.7547, "step": 10990 }, { "epoch": 0.19, "learning_rate": 0.0005516628336719732, "loss": 3.8382, "step": 10995 }, { "epoch": 0.19, "learning_rate": 0.0005516180455005686, "loss": 3.9065, "step": 11000 }, { "epoch": 0.19, "eval_loss": 3.82816481590271, "eval_runtime": 150.3797, "eval_samples_per_second": 12.242, "eval_steps_per_second": 0.771, "step": 11000 }, { "epoch": 0.19, "learning_rate": 0.0005515732384087765, "loss": 3.8359, "step": 11005 }, { "epoch": 0.19, "learning_rate": 0.0005515284123999662, "loss": 3.7338, "step": 11010 }, { "epoch": 0.19, "learning_rate": 0.0005514835674775083, "loss": 3.6409, "step": 11015 }, { "epoch": 0.19, "learning_rate": 0.000551438703644775, "loss": 3.6348, "step": 11020 }, { "epoch": 0.19, "learning_rate": 0.0005513938209051396, "loss": 3.8714, "step": 11025 }, { "epoch": 0.19, "learning_rate": 0.0005513489192619775, "loss": 3.7836, "step": 11030 }, { "epoch": 0.19, "learning_rate": 0.0005513039987186647, "loss": 3.6187, "step": 11035 }, { "epoch": 0.19, "learning_rate": 0.0005512590592785791, "loss": 3.7686, "step": 11040 }, { "epoch": 0.19, "learning_rate": 0.0005512141009451, "loss": 3.7887, "step": 11045 }, { "epoch": 0.19, "learning_rate": 0.0005511691237216078, "loss": 3.7462, "step": 11050 }, { "epoch": 0.19, "learning_rate": 0.0005511241276114849, "loss": 3.7589, "step": 11055 }, { "epoch": 0.19, "learning_rate": 0.0005510791126181145, "loss": 3.6582, "step": 11060 }, { "epoch": 0.19, "learning_rate": 0.0005510340787448815, "loss": 3.6191, "step": 11065 }, { "epoch": 0.19, "learning_rate": 0.0005509890259951725, "loss": 3.7391, "step": 11070 }, { "epoch": 0.19, "learning_rate": 0.000550943954372375, "loss": 3.7873, "step": 11075 }, { "epoch": 0.19, "learning_rate": 0.0005508988638798781, "loss": 3.58, "step": 11080 }, { "epoch": 0.19, "learning_rate": 0.0005508537545210726, "loss": 3.7628, "step": 11085 }, { "epoch": 0.19, "learning_rate": 0.0005508086262993502, "loss": 3.684, "step": 11090 }, { "epoch": 0.19, "learning_rate": 0.0005507634792181046, "loss": 3.808, "step": 11095 }, { "epoch": 0.19, "learning_rate": 0.0005507183132807304, "loss": 3.6461, "step": 11100 }, { "epoch": 0.19, "learning_rate": 0.0005506731284906242, "loss": 3.7431, "step": 11105 }, { "epoch": 0.19, "learning_rate": 0.0005506279248511832, "loss": 3.6644, "step": 11110 }, { "epoch": 0.19, "learning_rate": 0.0005505827023658067, "loss": 3.8145, "step": 11115 }, { "epoch": 0.19, "learning_rate": 0.0005505374610378953, "loss": 3.7549, "step": 11120 }, { "epoch": 0.19, "learning_rate": 0.0005504922008708507, "loss": 3.6777, "step": 11125 }, { "epoch": 0.19, "learning_rate": 0.0005504469218680763, "loss": 3.6589, "step": 11130 }, { "epoch": 0.19, "learning_rate": 0.0005504016240329772, "loss": 3.7951, "step": 11135 }, { "epoch": 0.19, "learning_rate": 0.000550356307368959, "loss": 3.6839, "step": 11140 }, { "epoch": 0.19, "learning_rate": 0.0005503109718794296, "loss": 3.8131, "step": 11145 }, { "epoch": 0.19, "learning_rate": 0.0005502656175677979, "loss": 3.7538, "step": 11150 }, { "epoch": 0.19, "learning_rate": 0.0005502202444374744, "loss": 3.5535, "step": 11155 }, { "epoch": 0.19, "learning_rate": 0.0005501748524918709, "loss": 3.6548, "step": 11160 }, { "epoch": 0.19, "learning_rate": 0.0005501294417344006, "loss": 3.7523, "step": 11165 }, { "epoch": 0.19, "learning_rate": 0.0005500840121684781, "loss": 3.778, "step": 11170 }, { "epoch": 0.19, "learning_rate": 0.0005500385637975196, "loss": 3.6966, "step": 11175 }, { "epoch": 0.19, "learning_rate": 0.0005499930966249426, "loss": 3.7497, "step": 11180 }, { "epoch": 0.19, "learning_rate": 0.0005499476106541659, "loss": 3.7591, "step": 11185 }, { "epoch": 0.19, "learning_rate": 0.0005499021058886096, "loss": 3.7704, "step": 11190 }, { "epoch": 0.19, "learning_rate": 0.0005498565823316959, "loss": 3.8197, "step": 11195 }, { "epoch": 0.19, "learning_rate": 0.0005498110399868477, "loss": 3.6668, "step": 11200 }, { "epoch": 0.19, "learning_rate": 0.0005497654788574896, "loss": 3.7984, "step": 11205 }, { "epoch": 0.19, "learning_rate": 0.0005497198989470474, "loss": 3.7465, "step": 11210 }, { "epoch": 0.19, "learning_rate": 0.0005496743002589487, "loss": 3.6981, "step": 11215 }, { "epoch": 0.19, "learning_rate": 0.000549628682796622, "loss": 3.8666, "step": 11220 }, { "epoch": 0.19, "learning_rate": 0.0005495830465634978, "loss": 3.7241, "step": 11225 }, { "epoch": 0.19, "learning_rate": 0.0005495373915630076, "loss": 3.7521, "step": 11230 }, { "epoch": 0.19, "learning_rate": 0.0005494917177985844, "loss": 3.717, "step": 11235 }, { "epoch": 0.19, "learning_rate": 0.0005494460252736627, "loss": 3.8369, "step": 11240 }, { "epoch": 0.19, "learning_rate": 0.0005494003139916782, "loss": 3.6984, "step": 11245 }, { "epoch": 0.19, "learning_rate": 0.0005493545839560682, "loss": 3.7683, "step": 11250 }, { "epoch": 0.19, "learning_rate": 0.0005493088351702716, "loss": 3.753, "step": 11255 }, { "epoch": 0.19, "learning_rate": 0.0005492630676377281, "loss": 3.6862, "step": 11260 }, { "epoch": 0.19, "learning_rate": 0.0005492172813618795, "loss": 3.7831, "step": 11265 }, { "epoch": 0.2, "learning_rate": 0.0005491714763461684, "loss": 3.6696, "step": 11270 }, { "epoch": 0.2, "learning_rate": 0.0005491256525940395, "loss": 3.792, "step": 11275 }, { "epoch": 0.2, "learning_rate": 0.000549079810108938, "loss": 3.7118, "step": 11280 }, { "epoch": 0.2, "learning_rate": 0.0005490339488943115, "loss": 3.7159, "step": 11285 }, { "epoch": 0.2, "learning_rate": 0.0005489880689536083, "loss": 3.8581, "step": 11290 }, { "epoch": 0.2, "learning_rate": 0.0005489421702902783, "loss": 3.7271, "step": 11295 }, { "epoch": 0.2, "learning_rate": 0.0005488962529077728, "loss": 3.7899, "step": 11300 }, { "epoch": 0.2, "learning_rate": 0.0005488503168095449, "loss": 3.7817, "step": 11305 }, { "epoch": 0.2, "learning_rate": 0.0005488043619990483, "loss": 3.7262, "step": 11310 }, { "epoch": 0.2, "learning_rate": 0.0005487583884797389, "loss": 3.7328, "step": 11315 }, { "epoch": 0.2, "learning_rate": 0.0005487123962550735, "loss": 3.8268, "step": 11320 }, { "epoch": 0.2, "learning_rate": 0.0005486663853285106, "loss": 3.6743, "step": 11325 }, { "epoch": 0.2, "learning_rate": 0.0005486203557035098, "loss": 3.7256, "step": 11330 }, { "epoch": 0.2, "learning_rate": 0.0005485743073835325, "loss": 3.6811, "step": 11335 }, { "epoch": 0.2, "learning_rate": 0.0005485282403720411, "loss": 3.6535, "step": 11340 }, { "epoch": 0.2, "learning_rate": 0.0005484821546724998, "loss": 3.7042, "step": 11345 }, { "epoch": 0.2, "learning_rate": 0.000548436050288374, "loss": 3.7035, "step": 11350 }, { "epoch": 0.2, "learning_rate": 0.0005483899272231303, "loss": 3.6523, "step": 11355 }, { "epoch": 0.2, "learning_rate": 0.000548343785480237, "loss": 3.6676, "step": 11360 }, { "epoch": 0.2, "learning_rate": 0.0005482976250631639, "loss": 3.7115, "step": 11365 }, { "epoch": 0.2, "learning_rate": 0.000548251445975382, "loss": 3.7758, "step": 11370 }, { "epoch": 0.2, "learning_rate": 0.0005482052482203634, "loss": 3.7202, "step": 11375 }, { "epoch": 0.2, "learning_rate": 0.0005481590318015823, "loss": 3.7921, "step": 11380 }, { "epoch": 0.2, "learning_rate": 0.0005481127967225137, "loss": 3.5875, "step": 11385 }, { "epoch": 0.2, "learning_rate": 0.0005480665429866344, "loss": 3.872, "step": 11390 }, { "epoch": 0.2, "learning_rate": 0.0005480202705974225, "loss": 3.7499, "step": 11395 }, { "epoch": 0.2, "learning_rate": 0.0005479739795583573, "loss": 3.8315, "step": 11400 }, { "epoch": 0.2, "learning_rate": 0.0005479276698729195, "loss": 3.7723, "step": 11405 }, { "epoch": 0.2, "learning_rate": 0.0005478813415445916, "loss": 3.7126, "step": 11410 }, { "epoch": 0.2, "learning_rate": 0.0005478349945768574, "loss": 3.8067, "step": 11415 }, { "epoch": 0.2, "learning_rate": 0.0005477886289732015, "loss": 3.6816, "step": 11420 }, { "epoch": 0.2, "learning_rate": 0.0005477422447371107, "loss": 3.8123, "step": 11425 }, { "epoch": 0.2, "learning_rate": 0.0005476958418720728, "loss": 3.6074, "step": 11430 }, { "epoch": 0.2, "learning_rate": 0.000547649420381577, "loss": 3.7801, "step": 11435 }, { "epoch": 0.2, "learning_rate": 0.0005476029802691138, "loss": 3.7547, "step": 11440 }, { "epoch": 0.2, "learning_rate": 0.0005475565215381755, "loss": 3.7165, "step": 11445 }, { "epoch": 0.2, "learning_rate": 0.0005475100441922556, "loss": 3.721, "step": 11450 }, { "epoch": 0.2, "learning_rate": 0.0005474635482348488, "loss": 3.6334, "step": 11455 }, { "epoch": 0.2, "learning_rate": 0.0005474170336694514, "loss": 3.7961, "step": 11460 }, { "epoch": 0.2, "learning_rate": 0.000547370500499561, "loss": 3.7415, "step": 11465 }, { "epoch": 0.2, "learning_rate": 0.0005473239487286767, "loss": 3.6964, "step": 11470 }, { "epoch": 0.2, "learning_rate": 0.0005472773783602991, "loss": 3.7686, "step": 11475 }, { "epoch": 0.2, "learning_rate": 0.0005472307893979298, "loss": 3.7813, "step": 11480 }, { "epoch": 0.2, "learning_rate": 0.0005471841818450722, "loss": 3.8297, "step": 11485 }, { "epoch": 0.2, "learning_rate": 0.000547137555705231, "loss": 3.6398, "step": 11490 }, { "epoch": 0.2, "learning_rate": 0.0005470909109819122, "loss": 3.8471, "step": 11495 }, { "epoch": 0.2, "learning_rate": 0.0005470442476786231, "loss": 3.636, "step": 11500 }, { "epoch": 0.2, "eval_loss": 3.7674803733825684, "eval_runtime": 150.2732, "eval_samples_per_second": 12.251, "eval_steps_per_second": 0.772, "step": 11500 }, { "epoch": 0.2, "learning_rate": 0.0005469975657988727, "loss": 3.7453, "step": 11505 }, { "epoch": 0.2, "learning_rate": 0.0005469508653461713, "loss": 3.6952, "step": 11510 }, { "epoch": 0.2, "learning_rate": 0.0005469041463240302, "loss": 3.6867, "step": 11515 }, { "epoch": 0.2, "learning_rate": 0.0005468574087359628, "loss": 3.7732, "step": 11520 }, { "epoch": 0.2, "learning_rate": 0.0005468106525854834, "loss": 3.7456, "step": 11525 }, { "epoch": 0.2, "learning_rate": 0.0005467638778761078, "loss": 3.7178, "step": 11530 }, { "epoch": 0.2, "learning_rate": 0.0005467170846113532, "loss": 3.6693, "step": 11535 }, { "epoch": 0.2, "learning_rate": 0.0005466702727947383, "loss": 3.7435, "step": 11540 }, { "epoch": 0.2, "learning_rate": 0.0005466234424297829, "loss": 3.7004, "step": 11545 }, { "epoch": 0.2, "learning_rate": 0.0005465765935200086, "loss": 3.8126, "step": 11550 }, { "epoch": 0.2, "learning_rate": 0.0005465297260689383, "loss": 3.7205, "step": 11555 }, { "epoch": 0.2, "learning_rate": 0.0005464828400800959, "loss": 3.7958, "step": 11560 }, { "epoch": 0.2, "learning_rate": 0.0005464359355570071, "loss": 3.745, "step": 11565 }, { "epoch": 0.2, "learning_rate": 0.0005463890125031991, "loss": 3.681, "step": 11570 }, { "epoch": 0.2, "learning_rate": 0.0005463420709222, "loss": 3.6648, "step": 11575 }, { "epoch": 0.2, "learning_rate": 0.0005462951108175396, "loss": 3.7188, "step": 11580 }, { "epoch": 0.2, "learning_rate": 0.0005462481321927492, "loss": 3.6209, "step": 11585 }, { "epoch": 0.2, "learning_rate": 0.0005462011350513611, "loss": 3.8103, "step": 11590 }, { "epoch": 0.2, "learning_rate": 0.0005461541193969095, "loss": 3.7334, "step": 11595 }, { "epoch": 0.2, "learning_rate": 0.0005461070852329296, "loss": 3.8183, "step": 11600 }, { "epoch": 0.2, "learning_rate": 0.0005460600325629582, "loss": 3.7361, "step": 11605 }, { "epoch": 0.2, "learning_rate": 0.0005460129613905334, "loss": 3.6082, "step": 11610 }, { "epoch": 0.2, "learning_rate": 0.0005459658717191948, "loss": 3.7858, "step": 11615 }, { "epoch": 0.2, "learning_rate": 0.0005459187635524831, "loss": 3.7071, "step": 11620 }, { "epoch": 0.2, "learning_rate": 0.0005458716368939407, "loss": 3.8105, "step": 11625 }, { "epoch": 0.2, "learning_rate": 0.0005458244917471112, "loss": 3.7418, "step": 11630 }, { "epoch": 0.2, "learning_rate": 0.0005457773281155398, "loss": 3.7896, "step": 11635 }, { "epoch": 0.2, "learning_rate": 0.000545730146002773, "loss": 3.7541, "step": 11640 }, { "epoch": 0.2, "learning_rate": 0.0005456829454123585, "loss": 3.7075, "step": 11645 }, { "epoch": 0.2, "learning_rate": 0.0005456357263478456, "loss": 3.7605, "step": 11650 }, { "epoch": 0.2, "learning_rate": 0.000545588488812785, "loss": 3.7757, "step": 11655 }, { "epoch": 0.2, "learning_rate": 0.0005455412328107287, "loss": 3.7652, "step": 11660 }, { "epoch": 0.2, "learning_rate": 0.0005454939583452299, "loss": 3.7985, "step": 11665 }, { "epoch": 0.2, "learning_rate": 0.0005454466654198437, "loss": 3.7654, "step": 11670 }, { "epoch": 0.2, "learning_rate": 0.0005453993540381261, "loss": 3.6536, "step": 11675 }, { "epoch": 0.2, "learning_rate": 0.0005453520242036348, "loss": 3.7901, "step": 11680 }, { "epoch": 0.2, "learning_rate": 0.0005453046759199286, "loss": 3.6965, "step": 11685 }, { "epoch": 0.2, "learning_rate": 0.000545257309190568, "loss": 3.7089, "step": 11690 }, { "epoch": 0.2, "learning_rate": 0.0005452099240191146, "loss": 3.7599, "step": 11695 }, { "epoch": 0.2, "learning_rate": 0.0005451625204091317, "loss": 3.6116, "step": 11700 }, { "epoch": 0.2, "learning_rate": 0.0005451150983641835, "loss": 3.7526, "step": 11705 }, { "epoch": 0.2, "learning_rate": 0.0005450676578878362, "loss": 3.7258, "step": 11710 }, { "epoch": 0.2, "learning_rate": 0.0005450201989836571, "loss": 3.7765, "step": 11715 }, { "epoch": 0.2, "learning_rate": 0.0005449727216552147, "loss": 3.7091, "step": 11720 }, { "epoch": 0.2, "learning_rate": 0.000544925225906079, "loss": 3.6662, "step": 11725 }, { "epoch": 0.2, "learning_rate": 0.0005448777117398216, "loss": 3.6813, "step": 11730 }, { "epoch": 0.2, "learning_rate": 0.0005448301791600152, "loss": 3.6477, "step": 11735 }, { "epoch": 0.2, "learning_rate": 0.0005447826281702342, "loss": 3.6587, "step": 11740 }, { "epoch": 0.2, "learning_rate": 0.000544735058774054, "loss": 3.7099, "step": 11745 }, { "epoch": 0.2, "learning_rate": 0.0005446874709750516, "loss": 3.7078, "step": 11750 }, { "epoch": 0.2, "learning_rate": 0.0005446398647768053, "loss": 3.7051, "step": 11755 }, { "epoch": 0.2, "learning_rate": 0.000544592240182895, "loss": 3.7258, "step": 11760 }, { "epoch": 0.2, "learning_rate": 0.0005445445971969017, "loss": 3.6229, "step": 11765 }, { "epoch": 0.2, "learning_rate": 0.0005444969358224082, "loss": 3.8427, "step": 11770 }, { "epoch": 0.2, "learning_rate": 0.0005444492560629979, "loss": 3.6114, "step": 11775 }, { "epoch": 0.2, "learning_rate": 0.0005444015579222564, "loss": 3.7371, "step": 11780 }, { "epoch": 0.2, "learning_rate": 0.0005443538414037702, "loss": 3.704, "step": 11785 }, { "epoch": 0.2, "learning_rate": 0.0005443061065111275, "loss": 3.7573, "step": 11790 }, { "epoch": 0.2, "learning_rate": 0.0005442583532479177, "loss": 3.7781, "step": 11795 }, { "epoch": 0.2, "learning_rate": 0.0005442105816177315, "loss": 3.7478, "step": 11800 }, { "epoch": 0.2, "learning_rate": 0.0005441627916241611, "loss": 3.6679, "step": 11805 }, { "epoch": 0.2, "learning_rate": 0.0005441149832708, "loss": 3.5633, "step": 11810 }, { "epoch": 0.2, "learning_rate": 0.0005440671565612434, "loss": 3.6967, "step": 11815 }, { "epoch": 0.2, "learning_rate": 0.0005440193114990873, "loss": 3.6898, "step": 11820 }, { "epoch": 0.2, "learning_rate": 0.0005439714480879297, "loss": 3.6566, "step": 11825 }, { "epoch": 0.2, "learning_rate": 0.0005439235663313695, "loss": 3.6392, "step": 11830 }, { "epoch": 0.2, "learning_rate": 0.0005438756662330071, "loss": 3.7343, "step": 11835 }, { "epoch": 0.2, "learning_rate": 0.0005438277477964445, "loss": 3.7082, "step": 11840 }, { "epoch": 0.2, "learning_rate": 0.0005437798110252849, "loss": 3.6861, "step": 11845 }, { "epoch": 0.21, "learning_rate": 0.0005437318559231328, "loss": 3.8254, "step": 11850 }, { "epoch": 0.21, "learning_rate": 0.0005436838824935943, "loss": 3.7302, "step": 11855 }, { "epoch": 0.21, "learning_rate": 0.0005436358907402767, "loss": 3.7656, "step": 11860 }, { "epoch": 0.21, "learning_rate": 0.0005435878806667886, "loss": 3.7166, "step": 11865 }, { "epoch": 0.21, "learning_rate": 0.0005435398522767404, "loss": 3.6894, "step": 11870 }, { "epoch": 0.21, "learning_rate": 0.0005434918055737434, "loss": 3.7122, "step": 11875 }, { "epoch": 0.21, "learning_rate": 0.0005434437405614104, "loss": 3.7014, "step": 11880 }, { "epoch": 0.21, "learning_rate": 0.0005433956572433558, "loss": 3.7079, "step": 11885 }, { "epoch": 0.21, "learning_rate": 0.0005433475556231951, "loss": 3.6731, "step": 11890 }, { "epoch": 0.21, "learning_rate": 0.0005432994357045455, "loss": 3.7536, "step": 11895 }, { "epoch": 0.21, "learning_rate": 0.0005432512974910252, "loss": 3.58, "step": 11900 }, { "epoch": 0.21, "learning_rate": 0.0005432031409862538, "loss": 3.7737, "step": 11905 }, { "epoch": 0.21, "learning_rate": 0.0005431549661938527, "loss": 3.6355, "step": 11910 }, { "epoch": 0.21, "learning_rate": 0.0005431067731174443, "loss": 3.7166, "step": 11915 }, { "epoch": 0.21, "learning_rate": 0.0005430585617606524, "loss": 3.7184, "step": 11920 }, { "epoch": 0.21, "learning_rate": 0.0005430103321271025, "loss": 3.7067, "step": 11925 }, { "epoch": 0.21, "learning_rate": 0.0005429620842204209, "loss": 3.8456, "step": 11930 }, { "epoch": 0.21, "learning_rate": 0.0005429138180442358, "loss": 3.6251, "step": 11935 }, { "epoch": 0.21, "learning_rate": 0.0005428655336021763, "loss": 3.7128, "step": 11940 }, { "epoch": 0.21, "learning_rate": 0.0005428172308978735, "loss": 3.7401, "step": 11945 }, { "epoch": 0.21, "learning_rate": 0.0005427689099349593, "loss": 3.7788, "step": 11950 }, { "epoch": 0.21, "learning_rate": 0.0005427205707170674, "loss": 3.759, "step": 11955 }, { "epoch": 0.21, "learning_rate": 0.0005426722132478323, "loss": 3.78, "step": 11960 }, { "epoch": 0.21, "learning_rate": 0.0005426238375308906, "loss": 3.7712, "step": 11965 }, { "epoch": 0.21, "learning_rate": 0.0005425754435698796, "loss": 3.7476, "step": 11970 }, { "epoch": 0.21, "learning_rate": 0.0005425270313684385, "loss": 3.7033, "step": 11975 }, { "epoch": 0.21, "learning_rate": 0.0005424786009302075, "loss": 3.6609, "step": 11980 }, { "epoch": 0.21, "learning_rate": 0.0005424301522588285, "loss": 3.7616, "step": 11985 }, { "epoch": 0.21, "learning_rate": 0.0005423816853579444, "loss": 3.6577, "step": 11990 }, { "epoch": 0.21, "learning_rate": 0.0005423332002311998, "loss": 3.7224, "step": 11995 }, { "epoch": 0.21, "learning_rate": 0.0005422846968822404, "loss": 3.6866, "step": 12000 }, { "epoch": 0.21, "eval_loss": 3.7599782943725586, "eval_runtime": 150.9725, "eval_samples_per_second": 12.194, "eval_steps_per_second": 0.768, "step": 12000 }, { "epoch": 0.21, "learning_rate": 0.0005422361753147136, "loss": 3.7354, "step": 12005 }, { "epoch": 0.21, "learning_rate": 0.0005421876355322677, "loss": 3.7213, "step": 12010 }, { "epoch": 0.21, "learning_rate": 0.000542139077538553, "loss": 3.7977, "step": 12015 }, { "epoch": 0.21, "learning_rate": 0.0005420905013372205, "loss": 3.786, "step": 12020 }, { "epoch": 0.21, "learning_rate": 0.000542041906931923, "loss": 3.7917, "step": 12025 }, { "epoch": 0.21, "learning_rate": 0.0005419932943263146, "loss": 3.7851, "step": 12030 }, { "epoch": 0.21, "learning_rate": 0.0005419446635240505, "loss": 3.7055, "step": 12035 }, { "epoch": 0.21, "learning_rate": 0.0005418960145287878, "loss": 3.7075, "step": 12040 }, { "epoch": 0.21, "learning_rate": 0.0005418473473441845, "loss": 3.77, "step": 12045 }, { "epoch": 0.21, "learning_rate": 0.0005417986619739001, "loss": 3.6612, "step": 12050 }, { "epoch": 0.21, "learning_rate": 0.0005417499584215955, "loss": 3.5958, "step": 12055 }, { "epoch": 0.21, "learning_rate": 0.0005417012366909329, "loss": 3.7396, "step": 12060 }, { "epoch": 0.21, "learning_rate": 0.0005416524967855761, "loss": 3.6948, "step": 12065 }, { "epoch": 0.21, "learning_rate": 0.00054160373870919, "loss": 3.7575, "step": 12070 }, { "epoch": 0.21, "learning_rate": 0.0005415549624654409, "loss": 3.7255, "step": 12075 }, { "epoch": 0.21, "learning_rate": 0.0005415061680579966, "loss": 3.751, "step": 12080 }, { "epoch": 0.21, "learning_rate": 0.0005414573554905262, "loss": 3.7404, "step": 12085 }, { "epoch": 0.21, "learning_rate": 0.0005414085247667002, "loss": 3.8004, "step": 12090 }, { "epoch": 0.21, "learning_rate": 0.0005413596758901901, "loss": 3.649, "step": 12095 }, { "epoch": 0.21, "learning_rate": 0.0005413108088646695, "loss": 3.7088, "step": 12100 }, { "epoch": 0.21, "learning_rate": 0.0005412619236938126, "loss": 3.6278, "step": 12105 }, { "epoch": 0.21, "learning_rate": 0.0005412130203812957, "loss": 3.7007, "step": 12110 }, { "epoch": 0.21, "learning_rate": 0.0005411640989307958, "loss": 3.7458, "step": 12115 }, { "epoch": 0.21, "learning_rate": 0.0005411151593459915, "loss": 3.7283, "step": 12120 }, { "epoch": 0.21, "learning_rate": 0.0005410662016305629, "loss": 3.7408, "step": 12125 }, { "epoch": 0.21, "learning_rate": 0.0005410172257881915, "loss": 3.7432, "step": 12130 }, { "epoch": 0.21, "learning_rate": 0.0005409682318225599, "loss": 3.7822, "step": 12135 }, { "epoch": 0.21, "learning_rate": 0.0005409192197373522, "loss": 3.7222, "step": 12140 }, { "epoch": 0.21, "learning_rate": 0.0005408701895362538, "loss": 3.741, "step": 12145 }, { "epoch": 0.21, "learning_rate": 0.0005408211412229517, "loss": 3.5775, "step": 12150 }, { "epoch": 0.21, "learning_rate": 0.0005407720748011338, "loss": 3.7984, "step": 12155 }, { "epoch": 0.21, "learning_rate": 0.00054072299027449, "loss": 3.752, "step": 12160 }, { "epoch": 0.21, "learning_rate": 0.0005406738876467109, "loss": 3.706, "step": 12165 }, { "epoch": 0.21, "learning_rate": 0.000540624766921489, "loss": 3.7555, "step": 12170 }, { "epoch": 0.21, "learning_rate": 0.0005405756281025176, "loss": 3.7497, "step": 12175 }, { "epoch": 0.21, "learning_rate": 0.0005405264711934922, "loss": 3.6543, "step": 12180 }, { "epoch": 0.21, "learning_rate": 0.0005404772961981086, "loss": 3.6224, "step": 12185 }, { "epoch": 0.21, "learning_rate": 0.0005404281031200648, "loss": 3.6435, "step": 12190 }, { "epoch": 0.21, "learning_rate": 0.0005403788919630598, "loss": 3.7747, "step": 12195 }, { "epoch": 0.21, "learning_rate": 0.0005403296627307942, "loss": 3.5874, "step": 12200 }, { "epoch": 0.21, "learning_rate": 0.0005402804154269695, "loss": 3.5728, "step": 12205 }, { "epoch": 0.21, "learning_rate": 0.0005402311500552889, "loss": 3.7743, "step": 12210 }, { "epoch": 0.21, "learning_rate": 0.0005401818666194571, "loss": 3.6319, "step": 12215 }, { "epoch": 0.21, "learning_rate": 0.0005401325651231798, "loss": 3.6402, "step": 12220 }, { "epoch": 0.21, "learning_rate": 0.0005400832455701644, "loss": 3.6527, "step": 12225 }, { "epoch": 0.21, "learning_rate": 0.0005400339079641192, "loss": 3.7487, "step": 12230 }, { "epoch": 0.21, "learning_rate": 0.0005399845523087543, "loss": 3.6764, "step": 12235 }, { "epoch": 0.21, "learning_rate": 0.000539935178607781, "loss": 3.756, "step": 12240 }, { "epoch": 0.21, "learning_rate": 0.0005398857868649118, "loss": 3.5799, "step": 12245 }, { "epoch": 0.21, "learning_rate": 0.0005398363770838609, "loss": 3.6562, "step": 12250 }, { "epoch": 0.21, "learning_rate": 0.0005397869492683436, "loss": 3.6615, "step": 12255 }, { "epoch": 0.21, "learning_rate": 0.0005397375034220767, "loss": 3.7017, "step": 12260 }, { "epoch": 0.21, "learning_rate": 0.0005396880395487781, "loss": 3.7083, "step": 12265 }, { "epoch": 0.21, "learning_rate": 0.0005396385576521672, "loss": 3.7206, "step": 12270 }, { "epoch": 0.21, "learning_rate": 0.000539589057735965, "loss": 3.6814, "step": 12275 }, { "epoch": 0.21, "learning_rate": 0.0005395395398038935, "loss": 3.5883, "step": 12280 }, { "epoch": 0.21, "learning_rate": 0.0005394900038596763, "loss": 3.6049, "step": 12285 }, { "epoch": 0.21, "learning_rate": 0.0005394404499070381, "loss": 3.7757, "step": 12290 }, { "epoch": 0.21, "learning_rate": 0.0005393908779497052, "loss": 3.7323, "step": 12295 }, { "epoch": 0.21, "learning_rate": 0.0005393412879914051, "loss": 3.7704, "step": 12300 }, { "epoch": 0.21, "learning_rate": 0.0005392916800358667, "loss": 3.7201, "step": 12305 }, { "epoch": 0.21, "learning_rate": 0.0005392420540868205, "loss": 3.7927, "step": 12310 }, { "epoch": 0.21, "learning_rate": 0.0005391924101479978, "loss": 3.656, "step": 12315 }, { "epoch": 0.21, "learning_rate": 0.0005391427482231317, "loss": 3.7216, "step": 12320 }, { "epoch": 0.21, "learning_rate": 0.0005390930683159566, "loss": 3.7133, "step": 12325 }, { "epoch": 0.21, "learning_rate": 0.000539043370430208, "loss": 3.6312, "step": 12330 }, { "epoch": 0.21, "learning_rate": 0.000538993654569623, "loss": 3.643, "step": 12335 }, { "epoch": 0.21, "learning_rate": 0.00053894392073794, "loss": 3.5749, "step": 12340 }, { "epoch": 0.21, "learning_rate": 0.0005388941689388988, "loss": 3.5516, "step": 12345 }, { "epoch": 0.21, "learning_rate": 0.0005388443991762405, "loss": 3.7479, "step": 12350 }, { "epoch": 0.21, "learning_rate": 0.0005387946114537072, "loss": 3.6775, "step": 12355 }, { "epoch": 0.21, "learning_rate": 0.0005387448057750431, "loss": 3.6478, "step": 12360 }, { "epoch": 0.21, "learning_rate": 0.0005386949821439932, "loss": 3.7257, "step": 12365 }, { "epoch": 0.21, "learning_rate": 0.0005386451405643038, "loss": 3.6918, "step": 12370 }, { "epoch": 0.21, "learning_rate": 0.000538595281039723, "loss": 3.6817, "step": 12375 }, { "epoch": 0.21, "learning_rate": 0.0005385454035739996, "loss": 3.6628, "step": 12380 }, { "epoch": 0.21, "learning_rate": 0.0005384955081708847, "loss": 3.6527, "step": 12385 }, { "epoch": 0.21, "learning_rate": 0.0005384455948341296, "loss": 3.7236, "step": 12390 }, { "epoch": 0.21, "learning_rate": 0.0005383956635674878, "loss": 3.5499, "step": 12395 }, { "epoch": 0.21, "learning_rate": 0.0005383457143747141, "loss": 3.7324, "step": 12400 }, { "epoch": 0.21, "learning_rate": 0.0005382957472595638, "loss": 3.6974, "step": 12405 }, { "epoch": 0.21, "learning_rate": 0.0005382457622257949, "loss": 3.6924, "step": 12410 }, { "epoch": 0.21, "learning_rate": 0.0005381957592771654, "loss": 3.5955, "step": 12415 }, { "epoch": 0.21, "learning_rate": 0.0005381457384174358, "loss": 3.6657, "step": 12420 }, { "epoch": 0.22, "learning_rate": 0.000538095699650367, "loss": 3.7176, "step": 12425 }, { "epoch": 0.22, "learning_rate": 0.0005380456429797218, "loss": 3.6794, "step": 12430 }, { "epoch": 0.22, "learning_rate": 0.0005379955684092643, "loss": 3.7586, "step": 12435 }, { "epoch": 0.22, "learning_rate": 0.0005379454759427597, "loss": 3.7462, "step": 12440 }, { "epoch": 0.22, "learning_rate": 0.0005378953655839747, "loss": 3.6993, "step": 12445 }, { "epoch": 0.22, "learning_rate": 0.0005378452373366776, "loss": 3.6243, "step": 12450 }, { "epoch": 0.22, "learning_rate": 0.0005377950912046374, "loss": 3.6997, "step": 12455 }, { "epoch": 0.22, "learning_rate": 0.0005377449271916251, "loss": 3.6549, "step": 12460 }, { "epoch": 0.22, "learning_rate": 0.0005376947453014126, "loss": 3.7064, "step": 12465 }, { "epoch": 0.22, "learning_rate": 0.0005376445455377736, "loss": 3.6477, "step": 12470 }, { "epoch": 0.22, "learning_rate": 0.0005375943279044825, "loss": 3.7082, "step": 12475 }, { "epoch": 0.22, "learning_rate": 0.0005375440924053156, "loss": 3.6758, "step": 12480 }, { "epoch": 0.22, "learning_rate": 0.0005374938390440505, "loss": 3.6828, "step": 12485 }, { "epoch": 0.22, "learning_rate": 0.0005374435678244658, "loss": 3.6934, "step": 12490 }, { "epoch": 0.22, "learning_rate": 0.0005373932787503417, "loss": 3.6921, "step": 12495 }, { "epoch": 0.22, "learning_rate": 0.0005373429718254595, "loss": 3.6586, "step": 12500 }, { "epoch": 0.22, "eval_loss": 3.7146527767181396, "eval_runtime": 150.1722, "eval_samples_per_second": 12.259, "eval_steps_per_second": 0.772, "step": 12500 }, { "epoch": 0.22, "learning_rate": 0.0005372926470536023, "loss": 3.759, "step": 12505 }, { "epoch": 0.22, "learning_rate": 0.0005372423044385541, "loss": 3.6515, "step": 12510 }, { "epoch": 0.22, "learning_rate": 0.0005371919439841005, "loss": 3.8031, "step": 12515 }, { "epoch": 0.22, "learning_rate": 0.0005371415656940283, "loss": 3.6402, "step": 12520 }, { "epoch": 0.22, "learning_rate": 0.0005370911695721256, "loss": 3.7482, "step": 12525 }, { "epoch": 0.22, "learning_rate": 0.0005370407556221821, "loss": 3.7288, "step": 12530 }, { "epoch": 0.22, "learning_rate": 0.0005369903238479886, "loss": 3.6749, "step": 12535 }, { "epoch": 0.22, "learning_rate": 0.0005369398742533373, "loss": 3.6862, "step": 12540 }, { "epoch": 0.22, "learning_rate": 0.0005368894068420217, "loss": 3.6132, "step": 12545 }, { "epoch": 0.22, "learning_rate": 0.0005368389216178368, "loss": 3.666, "step": 12550 }, { "epoch": 0.22, "learning_rate": 0.0005367884185845787, "loss": 3.5704, "step": 12555 }, { "epoch": 0.22, "learning_rate": 0.0005367378977460452, "loss": 3.7454, "step": 12560 }, { "epoch": 0.22, "learning_rate": 0.0005366873591060349, "loss": 3.7277, "step": 12565 }, { "epoch": 0.22, "learning_rate": 0.0005366368026683483, "loss": 3.7656, "step": 12570 }, { "epoch": 0.22, "learning_rate": 0.0005365862284367869, "loss": 3.8027, "step": 12575 }, { "epoch": 0.22, "learning_rate": 0.0005365356364151535, "loss": 3.6515, "step": 12580 }, { "epoch": 0.22, "learning_rate": 0.0005364850266072525, "loss": 3.6866, "step": 12585 }, { "epoch": 0.22, "learning_rate": 0.0005364343990168895, "loss": 3.6147, "step": 12590 }, { "epoch": 0.22, "learning_rate": 0.0005363837536478714, "loss": 3.6964, "step": 12595 }, { "epoch": 0.22, "learning_rate": 0.0005363330905040064, "loss": 3.6638, "step": 12600 }, { "epoch": 0.22, "learning_rate": 0.0005362824095891043, "loss": 3.6581, "step": 12605 }, { "epoch": 0.22, "learning_rate": 0.0005362317109069758, "loss": 3.7257, "step": 12610 }, { "epoch": 0.22, "learning_rate": 0.0005361809944614334, "loss": 3.6829, "step": 12615 }, { "epoch": 0.22, "learning_rate": 0.0005361302602562907, "loss": 3.6478, "step": 12620 }, { "epoch": 0.22, "learning_rate": 0.0005360795082953624, "loss": 3.6036, "step": 12625 }, { "epoch": 0.22, "learning_rate": 0.0005360287385824651, "loss": 3.6498, "step": 12630 }, { "epoch": 0.22, "learning_rate": 0.0005359779511214162, "loss": 3.749, "step": 12635 }, { "epoch": 0.22, "learning_rate": 0.0005359271459160347, "loss": 3.6121, "step": 12640 }, { "epoch": 0.22, "learning_rate": 0.0005358763229701411, "loss": 3.6755, "step": 12645 }, { "epoch": 0.22, "learning_rate": 0.0005358254822875567, "loss": 3.6591, "step": 12650 }, { "epoch": 0.22, "learning_rate": 0.0005357746238721047, "loss": 3.6631, "step": 12655 }, { "epoch": 0.22, "learning_rate": 0.0005357237477276093, "loss": 3.6449, "step": 12660 }, { "epoch": 0.22, "learning_rate": 0.0005356728538578961, "loss": 3.6601, "step": 12665 }, { "epoch": 0.22, "learning_rate": 0.0005356219422667922, "loss": 3.6639, "step": 12670 }, { "epoch": 0.22, "learning_rate": 0.0005355710129581257, "loss": 3.693, "step": 12675 }, { "epoch": 0.22, "learning_rate": 0.0005355200659357262, "loss": 3.6938, "step": 12680 }, { "epoch": 0.22, "learning_rate": 0.0005354691012034249, "loss": 3.715, "step": 12685 }, { "epoch": 0.22, "learning_rate": 0.000535418118765054, "loss": 3.521, "step": 12690 }, { "epoch": 0.22, "learning_rate": 0.000535367118624447, "loss": 3.692, "step": 12695 }, { "epoch": 0.22, "learning_rate": 0.0005353161007854389, "loss": 3.6524, "step": 12700 }, { "epoch": 0.22, "learning_rate": 0.0005352650652518659, "loss": 3.6092, "step": 12705 }, { "epoch": 0.22, "learning_rate": 0.0005352140120275659, "loss": 3.712, "step": 12710 }, { "epoch": 0.22, "learning_rate": 0.0005351629411163776, "loss": 3.7175, "step": 12715 }, { "epoch": 0.22, "learning_rate": 0.0005351118525221412, "loss": 3.6494, "step": 12720 }, { "epoch": 0.22, "learning_rate": 0.0005350607462486985, "loss": 3.7582, "step": 12725 }, { "epoch": 0.22, "learning_rate": 0.0005350096222998924, "loss": 3.6472, "step": 12730 }, { "epoch": 0.22, "learning_rate": 0.0005349584806795671, "loss": 3.6468, "step": 12735 }, { "epoch": 0.22, "learning_rate": 0.0005349073213915681, "loss": 3.6471, "step": 12740 }, { "epoch": 0.22, "learning_rate": 0.0005348561444397425, "loss": 3.7198, "step": 12745 }, { "epoch": 0.22, "learning_rate": 0.0005348049498279384, "loss": 3.5422, "step": 12750 }, { "epoch": 0.22, "learning_rate": 0.0005347537375600055, "loss": 3.6621, "step": 12755 }, { "epoch": 0.22, "learning_rate": 0.0005347025076397945, "loss": 3.6559, "step": 12760 }, { "epoch": 0.22, "learning_rate": 0.0005346512600711579, "loss": 3.6488, "step": 12765 }, { "epoch": 0.22, "learning_rate": 0.000534599994857949, "loss": 3.6929, "step": 12770 }, { "epoch": 0.22, "learning_rate": 0.0005345487120040229, "loss": 3.605, "step": 12775 }, { "epoch": 0.22, "learning_rate": 0.0005344974115132356, "loss": 3.7029, "step": 12780 }, { "epoch": 0.22, "learning_rate": 0.0005344460933894448, "loss": 3.5666, "step": 12785 }, { "epoch": 0.22, "learning_rate": 0.0005343947576365093, "loss": 3.6138, "step": 12790 }, { "epoch": 0.22, "learning_rate": 0.0005343434042582892, "loss": 3.6309, "step": 12795 }, { "epoch": 0.22, "learning_rate": 0.0005342920332586462, "loss": 3.6865, "step": 12800 }, { "epoch": 0.22, "learning_rate": 0.0005342406446414429, "loss": 3.5163, "step": 12805 }, { "epoch": 0.22, "learning_rate": 0.0005341892384105436, "loss": 3.7673, "step": 12810 }, { "epoch": 0.22, "learning_rate": 0.0005341378145698139, "loss": 3.7516, "step": 12815 }, { "epoch": 0.22, "learning_rate": 0.0005340863731231204, "loss": 3.6601, "step": 12820 }, { "epoch": 0.22, "learning_rate": 0.0005340349140743314, "loss": 3.6487, "step": 12825 }, { "epoch": 0.22, "learning_rate": 0.0005339834374273162, "loss": 3.594, "step": 12830 }, { "epoch": 0.22, "learning_rate": 0.0005339319431859455, "loss": 3.6715, "step": 12835 }, { "epoch": 0.22, "learning_rate": 0.0005338804313540918, "loss": 3.6531, "step": 12840 }, { "epoch": 0.22, "learning_rate": 0.0005338289019356283, "loss": 3.6891, "step": 12845 }, { "epoch": 0.22, "learning_rate": 0.0005337773549344294, "loss": 3.722, "step": 12850 }, { "epoch": 0.22, "learning_rate": 0.0005337257903543719, "loss": 3.6754, "step": 12855 }, { "epoch": 0.22, "learning_rate": 0.0005336742081993325, "loss": 3.723, "step": 12860 }, { "epoch": 0.22, "learning_rate": 0.0005336226084731903, "loss": 3.5262, "step": 12865 }, { "epoch": 0.22, "learning_rate": 0.0005335709911798253, "loss": 3.6648, "step": 12870 }, { "epoch": 0.22, "learning_rate": 0.0005335193563231188, "loss": 3.755, "step": 12875 }, { "epoch": 0.22, "learning_rate": 0.0005334677039069534, "loss": 3.6505, "step": 12880 }, { "epoch": 0.22, "learning_rate": 0.0005334160339352131, "loss": 3.5853, "step": 12885 }, { "epoch": 0.22, "learning_rate": 0.0005333643464117834, "loss": 3.6742, "step": 12890 }, { "epoch": 0.22, "learning_rate": 0.0005333126413405508, "loss": 3.7451, "step": 12895 }, { "epoch": 0.22, "learning_rate": 0.0005332609187254033, "loss": 3.5786, "step": 12900 }, { "epoch": 0.22, "learning_rate": 0.0005332091785702301, "loss": 3.6852, "step": 12905 }, { "epoch": 0.22, "learning_rate": 0.0005331574208789219, "loss": 3.7323, "step": 12910 }, { "epoch": 0.22, "learning_rate": 0.0005331056456553705, "loss": 3.5534, "step": 12915 }, { "epoch": 0.22, "learning_rate": 0.0005330538529034692, "loss": 3.6558, "step": 12920 }, { "epoch": 0.22, "learning_rate": 0.0005330020426271126, "loss": 3.5388, "step": 12925 }, { "epoch": 0.22, "learning_rate": 0.0005329502148301965, "loss": 3.7418, "step": 12930 }, { "epoch": 0.22, "learning_rate": 0.0005328983695166181, "loss": 3.6543, "step": 12935 }, { "epoch": 0.22, "learning_rate": 0.0005328465066902759, "loss": 3.6607, "step": 12940 }, { "epoch": 0.22, "learning_rate": 0.0005327946263550698, "loss": 3.796, "step": 12945 }, { "epoch": 0.22, "learning_rate": 0.0005327427285149006, "loss": 3.6999, "step": 12950 }, { "epoch": 0.22, "learning_rate": 0.0005326908131736712, "loss": 3.5925, "step": 12955 }, { "epoch": 0.22, "learning_rate": 0.0005326388803352851, "loss": 3.675, "step": 12960 }, { "epoch": 0.22, "learning_rate": 0.0005325869300036475, "loss": 3.72, "step": 12965 }, { "epoch": 0.22, "learning_rate": 0.0005325349621826648, "loss": 3.7205, "step": 12970 }, { "epoch": 0.22, "learning_rate": 0.0005324829768762445, "loss": 3.6847, "step": 12975 }, { "epoch": 0.22, "learning_rate": 0.000532430974088296, "loss": 3.7384, "step": 12980 }, { "epoch": 0.22, "learning_rate": 0.0005323789538227293, "loss": 3.6207, "step": 12985 }, { "epoch": 0.22, "learning_rate": 0.0005323269160834562, "loss": 3.6368, "step": 12990 }, { "epoch": 0.22, "learning_rate": 0.0005322748608743897, "loss": 3.5439, "step": 12995 }, { "epoch": 0.22, "learning_rate": 0.0005322227881994441, "loss": 3.6824, "step": 13000 }, { "epoch": 0.22, "eval_loss": 3.6955907344818115, "eval_runtime": 150.2757, "eval_samples_per_second": 12.251, "eval_steps_per_second": 0.772, "step": 13000 }, { "epoch": 0.23, "learning_rate": 0.0005321706980625349, "loss": 3.6233, "step": 13005 }, { "epoch": 0.23, "learning_rate": 0.0005321185904675791, "loss": 3.6698, "step": 13010 }, { "epoch": 0.23, "learning_rate": 0.0005320664654184947, "loss": 3.693, "step": 13015 }, { "epoch": 0.23, "learning_rate": 0.0005320143229192016, "loss": 3.6562, "step": 13020 }, { "epoch": 0.23, "learning_rate": 0.0005319621629736205, "loss": 3.7259, "step": 13025 }, { "epoch": 0.23, "learning_rate": 0.0005319099855856734, "loss": 3.6133, "step": 13030 }, { "epoch": 0.23, "learning_rate": 0.0005318577907592841, "loss": 3.626, "step": 13035 }, { "epoch": 0.23, "learning_rate": 0.000531805578498377, "loss": 3.6176, "step": 13040 }, { "epoch": 0.23, "learning_rate": 0.0005317533488068784, "loss": 3.7022, "step": 13045 }, { "epoch": 0.23, "learning_rate": 0.0005317011016887157, "loss": 3.5367, "step": 13050 }, { "epoch": 0.23, "learning_rate": 0.0005316488371478177, "loss": 3.6384, "step": 13055 }, { "epoch": 0.23, "learning_rate": 0.0005315965551881143, "loss": 3.6755, "step": 13060 }, { "epoch": 0.23, "learning_rate": 0.0005315442558135368, "loss": 3.7178, "step": 13065 }, { "epoch": 0.23, "learning_rate": 0.0005314919390280179, "loss": 3.6934, "step": 13070 }, { "epoch": 0.23, "learning_rate": 0.0005314396048354916, "loss": 3.634, "step": 13075 }, { "epoch": 0.23, "learning_rate": 0.0005313872532398932, "loss": 3.5333, "step": 13080 }, { "epoch": 0.23, "learning_rate": 0.000531334884245159, "loss": 3.6866, "step": 13085 }, { "epoch": 0.23, "learning_rate": 0.0005312824978552272, "loss": 3.6536, "step": 13090 }, { "epoch": 0.23, "learning_rate": 0.0005312300940740369, "loss": 3.7084, "step": 13095 }, { "epoch": 0.23, "learning_rate": 0.0005311776729055286, "loss": 3.637, "step": 13100 }, { "epoch": 0.23, "learning_rate": 0.0005311252343536439, "loss": 3.7074, "step": 13105 }, { "epoch": 0.23, "learning_rate": 0.0005310727784223261, "loss": 3.7197, "step": 13110 }, { "epoch": 0.23, "learning_rate": 0.0005310203051155198, "loss": 3.6676, "step": 13115 }, { "epoch": 0.23, "learning_rate": 0.0005309678144371703, "loss": 3.5821, "step": 13120 }, { "epoch": 0.23, "learning_rate": 0.000530915306391225, "loss": 3.6428, "step": 13125 }, { "epoch": 0.23, "learning_rate": 0.0005308627809816319, "loss": 3.7832, "step": 13130 }, { "epoch": 0.23, "learning_rate": 0.000530810238212341, "loss": 3.6682, "step": 13135 }, { "epoch": 0.23, "learning_rate": 0.000530757678087303, "loss": 3.6524, "step": 13140 }, { "epoch": 0.23, "learning_rate": 0.0005307051006104701, "loss": 3.6169, "step": 13145 }, { "epoch": 0.23, "learning_rate": 0.0005306525057857961, "loss": 3.6496, "step": 13150 }, { "epoch": 0.23, "learning_rate": 0.0005305998936172357, "loss": 3.6896, "step": 13155 }, { "epoch": 0.23, "learning_rate": 0.0005305472641087452, "loss": 3.6579, "step": 13160 }, { "epoch": 0.23, "learning_rate": 0.000530494617264282, "loss": 3.7858, "step": 13165 }, { "epoch": 0.23, "learning_rate": 0.0005304419530878046, "loss": 3.7619, "step": 13170 }, { "epoch": 0.23, "learning_rate": 0.0005303892715832736, "loss": 3.784, "step": 13175 }, { "epoch": 0.23, "learning_rate": 0.0005303365727546499, "loss": 3.7082, "step": 13180 }, { "epoch": 0.23, "learning_rate": 0.0005302838566058964, "loss": 3.6019, "step": 13185 }, { "epoch": 0.23, "learning_rate": 0.0005302311231409773, "loss": 3.641, "step": 13190 }, { "epoch": 0.23, "learning_rate": 0.0005301783723638574, "loss": 3.6698, "step": 13195 }, { "epoch": 0.23, "learning_rate": 0.0005301256042785036, "loss": 3.6359, "step": 13200 }, { "epoch": 0.23, "learning_rate": 0.0005300728188888838, "loss": 3.7043, "step": 13205 }, { "epoch": 0.23, "learning_rate": 0.0005300200161989672, "loss": 3.7608, "step": 13210 }, { "epoch": 0.23, "learning_rate": 0.000529967196212724, "loss": 3.6878, "step": 13215 }, { "epoch": 0.23, "learning_rate": 0.0005299143589341263, "loss": 3.6146, "step": 13220 }, { "epoch": 0.23, "learning_rate": 0.0005298615043671472, "loss": 3.7112, "step": 13225 }, { "epoch": 0.23, "learning_rate": 0.0005298086325157609, "loss": 3.6229, "step": 13230 }, { "epoch": 0.23, "learning_rate": 0.0005297557433839432, "loss": 3.6452, "step": 13235 }, { "epoch": 0.23, "learning_rate": 0.0005297028369756711, "loss": 3.6943, "step": 13240 }, { "epoch": 0.23, "learning_rate": 0.0005296499132949227, "loss": 3.6575, "step": 13245 }, { "epoch": 0.23, "learning_rate": 0.0005295969723456781, "loss": 3.5772, "step": 13250 }, { "epoch": 0.23, "learning_rate": 0.0005295440141319176, "loss": 3.6112, "step": 13255 }, { "epoch": 0.23, "learning_rate": 0.0005294910386576237, "loss": 3.7853, "step": 13260 }, { "epoch": 0.23, "learning_rate": 0.0005294380459267797, "loss": 3.6779, "step": 13265 }, { "epoch": 0.23, "learning_rate": 0.0005293850359433706, "loss": 3.7071, "step": 13270 }, { "epoch": 0.23, "learning_rate": 0.0005293320087113824, "loss": 3.7668, "step": 13275 }, { "epoch": 0.23, "learning_rate": 0.0005292789642348024, "loss": 3.7479, "step": 13280 }, { "epoch": 0.23, "learning_rate": 0.0005292259025176194, "loss": 3.6091, "step": 13285 }, { "epoch": 0.23, "learning_rate": 0.0005291728235638232, "loss": 3.6423, "step": 13290 }, { "epoch": 0.23, "learning_rate": 0.0005291197273774053, "loss": 3.631, "step": 13295 }, { "epoch": 0.23, "learning_rate": 0.000529066613962358, "loss": 3.6896, "step": 13300 }, { "epoch": 0.23, "learning_rate": 0.0005290134833226752, "loss": 3.726, "step": 13305 }, { "epoch": 0.23, "learning_rate": 0.0005289603354623523, "loss": 3.6359, "step": 13310 }, { "epoch": 0.23, "learning_rate": 0.0005289071703853855, "loss": 3.6416, "step": 13315 }, { "epoch": 0.23, "learning_rate": 0.0005288539880957727, "loss": 3.7228, "step": 13320 }, { "epoch": 0.23, "learning_rate": 0.0005288007885975127, "loss": 3.6365, "step": 13325 }, { "epoch": 0.23, "learning_rate": 0.0005287475718946061, "loss": 3.7192, "step": 13330 }, { "epoch": 0.23, "learning_rate": 0.0005286943379910543, "loss": 3.7571, "step": 13335 }, { "epoch": 0.23, "learning_rate": 0.0005286410868908604, "loss": 3.6936, "step": 13340 }, { "epoch": 0.23, "learning_rate": 0.0005285878185980285, "loss": 3.6531, "step": 13345 }, { "epoch": 0.23, "learning_rate": 0.0005285345331165641, "loss": 3.6293, "step": 13350 }, { "epoch": 0.23, "learning_rate": 0.0005284812304504742, "loss": 3.7047, "step": 13355 }, { "epoch": 0.23, "learning_rate": 0.0005284279106037666, "loss": 3.6494, "step": 13360 }, { "epoch": 0.23, "learning_rate": 0.0005283745735804508, "loss": 3.7135, "step": 13365 }, { "epoch": 0.23, "learning_rate": 0.0005283212193845374, "loss": 3.6306, "step": 13370 }, { "epoch": 0.23, "learning_rate": 0.0005282678480200385, "loss": 3.5678, "step": 13375 }, { "epoch": 0.23, "learning_rate": 0.0005282144594909674, "loss": 3.6169, "step": 13380 }, { "epoch": 0.23, "learning_rate": 0.0005281610538013384, "loss": 3.6236, "step": 13385 }, { "epoch": 0.23, "learning_rate": 0.0005281076309551674, "loss": 3.6551, "step": 13390 }, { "epoch": 0.23, "learning_rate": 0.0005280541909564717, "loss": 3.7224, "step": 13395 }, { "epoch": 0.23, "learning_rate": 0.0005280007338092696, "loss": 3.5341, "step": 13400 }, { "epoch": 0.23, "learning_rate": 0.0005279472595175807, "loss": 3.6211, "step": 13405 }, { "epoch": 0.23, "learning_rate": 0.0005278937680854262, "loss": 3.5654, "step": 13410 }, { "epoch": 0.23, "learning_rate": 0.0005278402595168282, "loss": 3.7164, "step": 13415 }, { "epoch": 0.23, "learning_rate": 0.0005277867338158104, "loss": 3.7283, "step": 13420 }, { "epoch": 0.23, "learning_rate": 0.0005277331909863976, "loss": 3.6716, "step": 13425 }, { "epoch": 0.23, "learning_rate": 0.000527679631032616, "loss": 3.7008, "step": 13430 }, { "epoch": 0.23, "learning_rate": 0.0005276260539584928, "loss": 3.6196, "step": 13435 }, { "epoch": 0.23, "learning_rate": 0.0005275724597680571, "loss": 3.6388, "step": 13440 }, { "epoch": 0.23, "learning_rate": 0.0005275188484653386, "loss": 3.6706, "step": 13445 }, { "epoch": 0.23, "learning_rate": 0.0005274652200543688, "loss": 3.6409, "step": 13450 }, { "epoch": 0.23, "learning_rate": 0.0005274115745391801, "loss": 3.5821, "step": 13455 }, { "epoch": 0.23, "learning_rate": 0.0005273579119238065, "loss": 3.6809, "step": 13460 }, { "epoch": 0.23, "learning_rate": 0.0005273042322122832, "loss": 3.7153, "step": 13465 }, { "epoch": 0.23, "learning_rate": 0.0005272505354086465, "loss": 3.6738, "step": 13470 }, { "epoch": 0.23, "learning_rate": 0.0005271968215169341, "loss": 3.6512, "step": 13475 }, { "epoch": 0.23, "learning_rate": 0.0005271430905411851, "loss": 3.6398, "step": 13480 }, { "epoch": 0.23, "learning_rate": 0.0005270893424854399, "loss": 3.5679, "step": 13485 }, { "epoch": 0.23, "learning_rate": 0.0005270355773537398, "loss": 3.6169, "step": 13490 }, { "epoch": 0.23, "learning_rate": 0.000526981795150128, "loss": 3.6797, "step": 13495 }, { "epoch": 0.23, "learning_rate": 0.0005269279958786484, "loss": 3.565, "step": 13500 }, { "epoch": 0.23, "eval_loss": 3.6858224868774414, "eval_runtime": 149.7698, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.775, "step": 13500 }, { "epoch": 0.23, "learning_rate": 0.0005268741795433464, "loss": 3.7488, "step": 13505 }, { "epoch": 0.23, "learning_rate": 0.0005268203461482689, "loss": 3.6822, "step": 13510 }, { "epoch": 0.23, "learning_rate": 0.0005267664956974638, "loss": 3.5031, "step": 13515 }, { "epoch": 0.23, "learning_rate": 0.0005267126281949804, "loss": 3.6234, "step": 13520 }, { "epoch": 0.23, "learning_rate": 0.0005266587436448692, "loss": 3.6111, "step": 13525 }, { "epoch": 0.23, "learning_rate": 0.000526604842051182, "loss": 3.5746, "step": 13530 }, { "epoch": 0.23, "learning_rate": 0.0005265509234179722, "loss": 3.6438, "step": 13535 }, { "epoch": 0.23, "learning_rate": 0.0005264969877492939, "loss": 3.6441, "step": 13540 }, { "epoch": 0.23, "learning_rate": 0.0005264430350492029, "loss": 3.5713, "step": 13545 }, { "epoch": 0.23, "learning_rate": 0.0005263890653217562, "loss": 3.604, "step": 13550 }, { "epoch": 0.23, "learning_rate": 0.000526335078571012, "loss": 3.6513, "step": 13555 }, { "epoch": 0.23, "learning_rate": 0.0005262810748010298, "loss": 3.6257, "step": 13560 }, { "epoch": 0.23, "learning_rate": 0.0005262270540158705, "loss": 3.6804, "step": 13565 }, { "epoch": 0.23, "learning_rate": 0.0005261730162195962, "loss": 3.7044, "step": 13570 }, { "epoch": 0.23, "learning_rate": 0.0005261189614162701, "loss": 3.6778, "step": 13575 }, { "epoch": 0.24, "learning_rate": 0.000526064889609957, "loss": 3.6329, "step": 13580 }, { "epoch": 0.24, "learning_rate": 0.0005260108008047227, "loss": 3.6702, "step": 13585 }, { "epoch": 0.24, "learning_rate": 0.0005259566950046344, "loss": 3.6102, "step": 13590 }, { "epoch": 0.24, "learning_rate": 0.0005259025722137608, "loss": 3.5786, "step": 13595 }, { "epoch": 0.24, "learning_rate": 0.0005258484324361715, "loss": 3.599, "step": 13600 }, { "epoch": 0.24, "learning_rate": 0.0005257942756759375, "loss": 3.6527, "step": 13605 }, { "epoch": 0.24, "learning_rate": 0.0005257401019371312, "loss": 3.7808, "step": 13610 }, { "epoch": 0.24, "learning_rate": 0.0005256859112238259, "loss": 3.5285, "step": 13615 }, { "epoch": 0.24, "learning_rate": 0.000525631703540097, "loss": 3.6049, "step": 13620 }, { "epoch": 0.24, "learning_rate": 0.0005255774788900201, "loss": 3.431, "step": 13625 }, { "epoch": 0.24, "learning_rate": 0.000525523237277673, "loss": 3.5476, "step": 13630 }, { "epoch": 0.24, "learning_rate": 0.0005254689787071342, "loss": 3.5949, "step": 13635 }, { "epoch": 0.24, "learning_rate": 0.0005254147031824837, "loss": 3.7066, "step": 13640 }, { "epoch": 0.24, "learning_rate": 0.0005253604107078027, "loss": 3.6354, "step": 13645 }, { "epoch": 0.24, "learning_rate": 0.0005253061012871738, "loss": 3.5766, "step": 13650 }, { "epoch": 0.24, "learning_rate": 0.0005252517749246809, "loss": 3.7445, "step": 13655 }, { "epoch": 0.24, "learning_rate": 0.0005251974316244088, "loss": 3.7481, "step": 13660 }, { "epoch": 0.24, "learning_rate": 0.000525143071390444, "loss": 3.521, "step": 13665 }, { "epoch": 0.24, "learning_rate": 0.000525088694226874, "loss": 3.7559, "step": 13670 }, { "epoch": 0.24, "learning_rate": 0.0005250343001377877, "loss": 3.6996, "step": 13675 }, { "epoch": 0.24, "learning_rate": 0.0005249798891272755, "loss": 3.6476, "step": 13680 }, { "epoch": 0.24, "learning_rate": 0.0005249254611994286, "loss": 3.7529, "step": 13685 }, { "epoch": 0.24, "learning_rate": 0.0005248710163583396, "loss": 3.7192, "step": 13690 }, { "epoch": 0.24, "learning_rate": 0.0005248165546081027, "loss": 3.6813, "step": 13695 }, { "epoch": 0.24, "learning_rate": 0.0005247620759528131, "loss": 3.6612, "step": 13700 }, { "epoch": 0.24, "learning_rate": 0.0005247075803965672, "loss": 3.5626, "step": 13705 }, { "epoch": 0.24, "learning_rate": 0.000524653067943463, "loss": 3.6624, "step": 13710 }, { "epoch": 0.24, "learning_rate": 0.0005245985385975993, "loss": 3.671, "step": 13715 }, { "epoch": 0.24, "learning_rate": 0.0005245439923630765, "loss": 3.5639, "step": 13720 }, { "epoch": 0.24, "learning_rate": 0.0005244894292439963, "loss": 3.5779, "step": 13725 }, { "epoch": 0.24, "learning_rate": 0.0005244348492444615, "loss": 3.6789, "step": 13730 }, { "epoch": 0.24, "learning_rate": 0.0005243802523685763, "loss": 3.6376, "step": 13735 }, { "epoch": 0.24, "learning_rate": 0.0005243256386204461, "loss": 3.5759, "step": 13740 }, { "epoch": 0.24, "learning_rate": 0.0005242710080041774, "loss": 3.6651, "step": 13745 }, { "epoch": 0.24, "learning_rate": 0.0005242163605238783, "loss": 3.7021, "step": 13750 }, { "epoch": 0.24, "learning_rate": 0.000524161696183658, "loss": 3.5692, "step": 13755 }, { "epoch": 0.24, "learning_rate": 0.0005241070149876269, "loss": 3.6352, "step": 13760 }, { "epoch": 0.24, "learning_rate": 0.000524052316939897, "loss": 3.799, "step": 13765 }, { "epoch": 0.24, "learning_rate": 0.000523997602044581, "loss": 3.6014, "step": 13770 }, { "epoch": 0.24, "learning_rate": 0.0005239428703057933, "loss": 3.5783, "step": 13775 }, { "epoch": 0.24, "learning_rate": 0.0005238881217276493, "loss": 3.6207, "step": 13780 }, { "epoch": 0.24, "learning_rate": 0.0005238333563142661, "loss": 3.652, "step": 13785 }, { "epoch": 0.24, "learning_rate": 0.0005237785740697617, "loss": 3.5884, "step": 13790 }, { "epoch": 0.24, "learning_rate": 0.0005237237749982552, "loss": 3.6441, "step": 13795 }, { "epoch": 0.24, "learning_rate": 0.0005236689591038674, "loss": 3.6444, "step": 13800 }, { "epoch": 0.24, "learning_rate": 0.0005236141263907203, "loss": 3.5998, "step": 13805 }, { "epoch": 0.24, "learning_rate": 0.0005235592768629368, "loss": 3.6522, "step": 13810 }, { "epoch": 0.24, "learning_rate": 0.0005235044105246415, "loss": 3.6656, "step": 13815 }, { "epoch": 0.24, "learning_rate": 0.0005234495273799598, "loss": 3.6451, "step": 13820 }, { "epoch": 0.24, "learning_rate": 0.000523394627433019, "loss": 3.7121, "step": 13825 }, { "epoch": 0.24, "learning_rate": 0.0005233397106879469, "loss": 4.2102, "step": 13830 }, { "epoch": 0.24, "learning_rate": 0.0005232847771488733, "loss": 4.3941, "step": 13835 }, { "epoch": 0.24, "learning_rate": 0.0005232298268199288, "loss": 4.4938, "step": 13840 }, { "epoch": 0.24, "learning_rate": 0.0005231748597052452, "loss": 4.4764, "step": 13845 }, { "epoch": 0.24, "learning_rate": 0.000523119875808956, "loss": 4.1145, "step": 13850 }, { "epoch": 0.24, "learning_rate": 0.0005230648751351956, "loss": 3.8547, "step": 13855 }, { "epoch": 0.24, "learning_rate": 0.0005230098576880997, "loss": 3.8067, "step": 13860 }, { "epoch": 0.24, "learning_rate": 0.0005229548234718056, "loss": 3.6504, "step": 13865 }, { "epoch": 0.24, "learning_rate": 0.0005228997724904512, "loss": 3.7026, "step": 13870 }, { "epoch": 0.24, "learning_rate": 0.0005228447047481763, "loss": 3.7336, "step": 13875 }, { "epoch": 0.24, "learning_rate": 0.0005227896202491218, "loss": 3.6148, "step": 13880 }, { "epoch": 0.24, "learning_rate": 0.0005227345189974295, "loss": 3.7376, "step": 13885 }, { "epoch": 0.24, "learning_rate": 0.000522679400997243, "loss": 3.7209, "step": 13890 }, { "epoch": 0.24, "learning_rate": 0.0005226242662527066, "loss": 3.7498, "step": 13895 }, { "epoch": 0.24, "learning_rate": 0.0005225691147679664, "loss": 3.7111, "step": 13900 }, { "epoch": 0.24, "learning_rate": 0.0005225139465471694, "loss": 3.6576, "step": 13905 }, { "epoch": 0.24, "learning_rate": 0.000522458761594464, "loss": 3.6185, "step": 13910 }, { "epoch": 0.24, "learning_rate": 0.0005224035599139997, "loss": 3.641, "step": 13915 }, { "epoch": 0.24, "learning_rate": 0.0005223483415099276, "loss": 3.704, "step": 13920 }, { "epoch": 0.24, "learning_rate": 0.0005222931063863998, "loss": 3.6244, "step": 13925 }, { "epoch": 0.24, "learning_rate": 0.0005222378545475695, "loss": 3.5334, "step": 13930 }, { "epoch": 0.24, "learning_rate": 0.0005221825859975915, "loss": 3.651, "step": 13935 }, { "epoch": 0.24, "learning_rate": 0.0005221273007406218, "loss": 3.5782, "step": 13940 }, { "epoch": 0.24, "learning_rate": 0.0005220719987808174, "loss": 3.6259, "step": 13945 }, { "epoch": 0.24, "learning_rate": 0.0005220166801223369, "loss": 3.6128, "step": 13950 }, { "epoch": 0.24, "learning_rate": 0.0005219613447693396, "loss": 3.6453, "step": 13955 }, { "epoch": 0.24, "learning_rate": 0.0005219059927259869, "loss": 3.7525, "step": 13960 }, { "epoch": 0.24, "learning_rate": 0.0005218506239964409, "loss": 3.6357, "step": 13965 }, { "epoch": 0.24, "learning_rate": 0.0005217952385848648, "loss": 3.6617, "step": 13970 }, { "epoch": 0.24, "learning_rate": 0.0005217398364954234, "loss": 3.5647, "step": 13975 }, { "epoch": 0.24, "learning_rate": 0.0005216844177322828, "loss": 3.6491, "step": 13980 }, { "epoch": 0.24, "learning_rate": 0.00052162898229961, "loss": 3.644, "step": 13985 }, { "epoch": 0.24, "learning_rate": 0.0005215735302015737, "loss": 3.7162, "step": 13990 }, { "epoch": 0.24, "learning_rate": 0.0005215180614423434, "loss": 3.5794, "step": 13995 }, { "epoch": 0.24, "learning_rate": 0.0005214625760260901, "loss": 3.5723, "step": 14000 }, { "epoch": 0.24, "eval_loss": 3.667832851409912, "eval_runtime": 149.7768, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.774, "step": 14000 }, { "epoch": 0.24, "learning_rate": 0.0005214070739569861, "loss": 3.5753, "step": 14005 }, { "epoch": 0.24, "learning_rate": 0.0005213515552392048, "loss": 3.6657, "step": 14010 }, { "epoch": 0.24, "learning_rate": 0.0005212960198769209, "loss": 3.6619, "step": 14015 }, { "epoch": 0.24, "learning_rate": 0.0005212404678743105, "loss": 3.6746, "step": 14020 }, { "epoch": 0.24, "learning_rate": 0.000521184899235551, "loss": 3.6758, "step": 14025 }, { "epoch": 0.24, "learning_rate": 0.0005211293139648203, "loss": 3.5972, "step": 14030 }, { "epoch": 0.24, "learning_rate": 0.0005210737120662985, "loss": 3.5256, "step": 14035 }, { "epoch": 0.24, "learning_rate": 0.0005210180935441667, "loss": 3.7113, "step": 14040 }, { "epoch": 0.24, "learning_rate": 0.0005209624584026068, "loss": 3.5753, "step": 14045 }, { "epoch": 0.24, "learning_rate": 0.0005209068066458026, "loss": 3.6638, "step": 14050 }, { "epoch": 0.24, "learning_rate": 0.0005208511382779385, "loss": 3.6401, "step": 14055 }, { "epoch": 0.24, "learning_rate": 0.0005207954533032008, "loss": 3.659, "step": 14060 }, { "epoch": 0.24, "learning_rate": 0.0005207397517257765, "loss": 3.7119, "step": 14065 }, { "epoch": 0.24, "learning_rate": 0.0005206840335498541, "loss": 3.6507, "step": 14070 }, { "epoch": 0.24, "learning_rate": 0.0005206282987796234, "loss": 3.5722, "step": 14075 }, { "epoch": 0.24, "learning_rate": 0.0005205725474192753, "loss": 3.6486, "step": 14080 }, { "epoch": 0.24, "learning_rate": 0.0005205167794730022, "loss": 3.772, "step": 14085 }, { "epoch": 0.24, "learning_rate": 0.0005204609949449972, "loss": 3.6892, "step": 14090 }, { "epoch": 0.24, "learning_rate": 0.0005204051938394554, "loss": 3.7151, "step": 14095 }, { "epoch": 0.24, "learning_rate": 0.0005203493761605724, "loss": 3.6335, "step": 14100 }, { "epoch": 0.24, "learning_rate": 0.0005202935419125458, "loss": 3.68, "step": 14105 }, { "epoch": 0.24, "learning_rate": 0.0005202376910995736, "loss": 3.6781, "step": 14110 }, { "epoch": 0.24, "learning_rate": 0.0005201818237258558, "loss": 3.7159, "step": 14115 }, { "epoch": 0.24, "learning_rate": 0.0005201259397955934, "loss": 3.6356, "step": 14120 }, { "epoch": 0.24, "learning_rate": 0.0005200700393129884, "loss": 3.5929, "step": 14125 }, { "epoch": 0.24, "learning_rate": 0.0005200141222822443, "loss": 3.702, "step": 14130 }, { "epoch": 0.24, "learning_rate": 0.0005199581887075657, "loss": 3.572, "step": 14135 }, { "epoch": 0.24, "learning_rate": 0.0005199022385931585, "loss": 3.5301, "step": 14140 }, { "epoch": 0.24, "learning_rate": 0.0005198462719432301, "loss": 3.7308, "step": 14145 }, { "epoch": 0.24, "learning_rate": 0.0005197902887619887, "loss": 3.6635, "step": 14150 }, { "epoch": 0.24, "learning_rate": 0.0005197342890536441, "loss": 3.6117, "step": 14155 }, { "epoch": 0.25, "learning_rate": 0.0005196782728224069, "loss": 3.6756, "step": 14160 }, { "epoch": 0.25, "learning_rate": 0.0005196222400724894, "loss": 3.6662, "step": 14165 }, { "epoch": 0.25, "learning_rate": 0.0005195661908081052, "loss": 3.5442, "step": 14170 }, { "epoch": 0.25, "learning_rate": 0.0005195101250334684, "loss": 3.6221, "step": 14175 }, { "epoch": 0.25, "learning_rate": 0.0005194540427527955, "loss": 3.7254, "step": 14180 }, { "epoch": 0.25, "learning_rate": 0.0005193979439703031, "loss": 3.6972, "step": 14185 }, { "epoch": 0.25, "learning_rate": 0.0005193418286902098, "loss": 3.5639, "step": 14190 }, { "epoch": 0.25, "learning_rate": 0.0005192856969167349, "loss": 3.7267, "step": 14195 }, { "epoch": 0.25, "learning_rate": 0.0005192295486540995, "loss": 3.6168, "step": 14200 }, { "epoch": 0.25, "learning_rate": 0.0005191733839065257, "loss": 3.5785, "step": 14205 }, { "epoch": 0.25, "learning_rate": 0.0005191172026782365, "loss": 3.6425, "step": 14210 }, { "epoch": 0.25, "learning_rate": 0.0005190610049734568, "loss": 3.6147, "step": 14215 }, { "epoch": 0.25, "learning_rate": 0.0005190047907964121, "loss": 3.6009, "step": 14220 }, { "epoch": 0.25, "learning_rate": 0.0005189485601513295, "loss": 3.6146, "step": 14225 }, { "epoch": 0.25, "learning_rate": 0.0005188923130424372, "loss": 3.6774, "step": 14230 }, { "epoch": 0.25, "learning_rate": 0.0005188360494739649, "loss": 3.6255, "step": 14235 }, { "epoch": 0.25, "learning_rate": 0.0005187797694501431, "loss": 3.4911, "step": 14240 }, { "epoch": 0.25, "learning_rate": 0.0005187234729752039, "loss": 3.5676, "step": 14245 }, { "epoch": 0.25, "learning_rate": 0.0005186671600533805, "loss": 3.6448, "step": 14250 }, { "epoch": 0.25, "learning_rate": 0.0005186108306889073, "loss": 3.5538, "step": 14255 }, { "epoch": 0.25, "learning_rate": 0.00051855448488602, "loss": 3.4816, "step": 14260 }, { "epoch": 0.25, "learning_rate": 0.0005184981226489555, "loss": 3.5797, "step": 14265 }, { "epoch": 0.25, "learning_rate": 0.0005184417439819519, "loss": 3.4676, "step": 14270 }, { "epoch": 0.25, "learning_rate": 0.0005183853488892488, "loss": 3.6309, "step": 14275 }, { "epoch": 0.25, "learning_rate": 0.0005183289373750866, "loss": 3.5128, "step": 14280 }, { "epoch": 0.25, "learning_rate": 0.0005182725094437072, "loss": 3.6644, "step": 14285 }, { "epoch": 0.25, "learning_rate": 0.0005182160650993537, "loss": 3.6486, "step": 14290 }, { "epoch": 0.25, "learning_rate": 0.0005181596043462704, "loss": 3.6108, "step": 14295 }, { "epoch": 0.25, "learning_rate": 0.000518103127188703, "loss": 3.5354, "step": 14300 }, { "epoch": 0.25, "learning_rate": 0.0005180466336308983, "loss": 3.5014, "step": 14305 }, { "epoch": 0.25, "learning_rate": 0.000517990123677104, "loss": 3.416, "step": 14310 }, { "epoch": 0.25, "learning_rate": 0.0005179335973315697, "loss": 3.5127, "step": 14315 }, { "epoch": 0.25, "learning_rate": 0.0005178770545985458, "loss": 3.6262, "step": 14320 }, { "epoch": 0.25, "learning_rate": 0.0005178204954822839, "loss": 3.6022, "step": 14325 }, { "epoch": 0.25, "learning_rate": 0.000517763919987037, "loss": 3.6538, "step": 14330 }, { "epoch": 0.25, "learning_rate": 0.0005177073281170595, "loss": 3.6462, "step": 14335 }, { "epoch": 0.25, "learning_rate": 0.0005176507198766066, "loss": 3.5705, "step": 14340 }, { "epoch": 0.25, "learning_rate": 0.0005175940952699351, "loss": 3.5812, "step": 14345 }, { "epoch": 0.25, "learning_rate": 0.0005175374543013025, "loss": 3.6557, "step": 14350 }, { "epoch": 0.25, "learning_rate": 0.0005174807969749685, "loss": 3.6208, "step": 14355 }, { "epoch": 0.25, "learning_rate": 0.000517424123295193, "loss": 3.5958, "step": 14360 }, { "epoch": 0.25, "learning_rate": 0.0005173674332662377, "loss": 3.5134, "step": 14365 }, { "epoch": 0.25, "learning_rate": 0.0005173107268923655, "loss": 3.477, "step": 14370 }, { "epoch": 0.25, "learning_rate": 0.0005172540041778403, "loss": 3.5057, "step": 14375 }, { "epoch": 0.25, "learning_rate": 0.0005171972651269273, "loss": 3.6122, "step": 14380 }, { "epoch": 0.25, "learning_rate": 0.0005171405097438933, "loss": 3.6689, "step": 14385 }, { "epoch": 0.25, "learning_rate": 0.0005170837380330056, "loss": 3.5244, "step": 14390 }, { "epoch": 0.25, "learning_rate": 0.0005170269499985334, "loss": 3.6451, "step": 14395 }, { "epoch": 0.25, "learning_rate": 0.0005169701456447468, "loss": 3.6317, "step": 14400 }, { "epoch": 0.25, "learning_rate": 0.0005169133249759171, "loss": 3.5989, "step": 14405 }, { "epoch": 0.25, "learning_rate": 0.0005168564879963172, "loss": 3.6657, "step": 14410 }, { "epoch": 0.25, "learning_rate": 0.0005167996347102207, "loss": 3.6195, "step": 14415 }, { "epoch": 0.25, "learning_rate": 0.0005167427651219027, "loss": 3.6496, "step": 14420 }, { "epoch": 0.25, "learning_rate": 0.0005166858792356396, "loss": 3.719, "step": 14425 }, { "epoch": 0.25, "learning_rate": 0.0005166289770557089, "loss": 3.5693, "step": 14430 }, { "epoch": 0.25, "learning_rate": 0.0005165720585863893, "loss": 3.5917, "step": 14435 }, { "epoch": 0.25, "learning_rate": 0.0005165151238319608, "loss": 3.5493, "step": 14440 }, { "epoch": 0.25, "learning_rate": 0.0005164581727967046, "loss": 3.5514, "step": 14445 }, { "epoch": 0.25, "learning_rate": 0.0005164012054849032, "loss": 3.5368, "step": 14450 }, { "epoch": 0.25, "learning_rate": 0.0005163442219008402, "loss": 3.5879, "step": 14455 }, { "epoch": 0.25, "learning_rate": 0.0005162872220488004, "loss": 3.6387, "step": 14460 }, { "epoch": 0.25, "learning_rate": 0.00051623020593307, "loss": 3.5715, "step": 14465 }, { "epoch": 0.25, "learning_rate": 0.0005161731735579361, "loss": 3.6949, "step": 14470 }, { "epoch": 0.25, "learning_rate": 0.0005161161249276876, "loss": 3.6781, "step": 14475 }, { "epoch": 0.25, "learning_rate": 0.0005160590600466141, "loss": 3.537, "step": 14480 }, { "epoch": 0.25, "learning_rate": 0.0005160019789190065, "loss": 3.664, "step": 14485 }, { "epoch": 0.25, "learning_rate": 0.0005159448815491571, "loss": 3.6726, "step": 14490 }, { "epoch": 0.25, "learning_rate": 0.0005158877679413593, "loss": 3.6492, "step": 14495 }, { "epoch": 0.25, "learning_rate": 0.0005158306380999078, "loss": 3.5562, "step": 14500 }, { "epoch": 0.25, "eval_loss": 3.648855447769165, "eval_runtime": 149.8786, "eval_samples_per_second": 12.283, "eval_steps_per_second": 0.774, "step": 14500 }, { "epoch": 0.25, "learning_rate": 0.0005157734920290984, "loss": 3.634, "step": 14505 }, { "epoch": 0.25, "learning_rate": 0.0005157163297332282, "loss": 3.5898, "step": 14510 }, { "epoch": 0.25, "learning_rate": 0.0005156591512165955, "loss": 3.6621, "step": 14515 }, { "epoch": 0.25, "learning_rate": 0.0005156019564834999, "loss": 3.6402, "step": 14520 }, { "epoch": 0.25, "learning_rate": 0.0005155447455382421, "loss": 3.6102, "step": 14525 }, { "epoch": 0.25, "learning_rate": 0.0005154875183851241, "loss": 3.624, "step": 14530 }, { "epoch": 0.25, "learning_rate": 0.0005154302750284491, "loss": 3.6164, "step": 14535 }, { "epoch": 0.25, "learning_rate": 0.0005153730154725214, "loss": 3.4685, "step": 14540 }, { "epoch": 0.25, "learning_rate": 0.0005153157397216468, "loss": 3.6154, "step": 14545 }, { "epoch": 0.25, "learning_rate": 0.000515258447780132, "loss": 3.6433, "step": 14550 }, { "epoch": 0.25, "learning_rate": 0.0005152011396522851, "loss": 3.5996, "step": 14555 }, { "epoch": 0.25, "learning_rate": 0.0005151438153424155, "loss": 3.6255, "step": 14560 }, { "epoch": 0.25, "learning_rate": 0.0005150864748548335, "loss": 3.5906, "step": 14565 }, { "epoch": 0.25, "learning_rate": 0.0005150291181938508, "loss": 3.6043, "step": 14570 }, { "epoch": 0.25, "learning_rate": 0.0005149717453637805, "loss": 3.4846, "step": 14575 }, { "epoch": 0.25, "learning_rate": 0.0005149143563689367, "loss": 3.5817, "step": 14580 }, { "epoch": 0.25, "learning_rate": 0.0005148569512136346, "loss": 3.5475, "step": 14585 }, { "epoch": 0.25, "learning_rate": 0.0005147995299021911, "loss": 3.6521, "step": 14590 }, { "epoch": 0.25, "learning_rate": 0.0005147420924389236, "loss": 3.6627, "step": 14595 }, { "epoch": 0.25, "learning_rate": 0.0005146846388281514, "loss": 3.6745, "step": 14600 }, { "epoch": 0.25, "learning_rate": 0.0005146271690741945, "loss": 3.6828, "step": 14605 }, { "epoch": 0.25, "learning_rate": 0.0005145696831813744, "loss": 3.6828, "step": 14610 }, { "epoch": 0.25, "learning_rate": 0.0005145121811540139, "loss": 3.5683, "step": 14615 }, { "epoch": 0.25, "learning_rate": 0.0005144546629964367, "loss": 3.6632, "step": 14620 }, { "epoch": 0.25, "learning_rate": 0.0005143971287129679, "loss": 3.6587, "step": 14625 }, { "epoch": 0.25, "learning_rate": 0.0005143395783079337, "loss": 3.603, "step": 14630 }, { "epoch": 0.25, "learning_rate": 0.0005142820117856618, "loss": 3.6555, "step": 14635 }, { "epoch": 0.25, "learning_rate": 0.0005142244291504808, "loss": 3.7154, "step": 14640 }, { "epoch": 0.25, "learning_rate": 0.0005141668304067205, "loss": 3.6161, "step": 14645 }, { "epoch": 0.25, "learning_rate": 0.0005141092155587123, "loss": 3.6059, "step": 14650 }, { "epoch": 0.25, "learning_rate": 0.0005140515846107882, "loss": 3.6758, "step": 14655 }, { "epoch": 0.25, "learning_rate": 0.000513993937567282, "loss": 3.6174, "step": 14660 }, { "epoch": 0.25, "learning_rate": 0.0005139362744325285, "loss": 3.5476, "step": 14665 }, { "epoch": 0.25, "learning_rate": 0.0005138785952108635, "loss": 3.6128, "step": 14670 }, { "epoch": 0.25, "learning_rate": 0.0005138208999066242, "loss": 3.5876, "step": 14675 }, { "epoch": 0.25, "learning_rate": 0.0005137631885241491, "loss": 3.5896, "step": 14680 }, { "epoch": 0.25, "learning_rate": 0.0005137054610677777, "loss": 3.5601, "step": 14685 }, { "epoch": 0.25, "learning_rate": 0.0005136477175418508, "loss": 3.6276, "step": 14690 }, { "epoch": 0.25, "learning_rate": 0.0005135899579507106, "loss": 3.5822, "step": 14695 }, { "epoch": 0.25, "learning_rate": 0.0005135321822987001, "loss": 3.4708, "step": 14700 }, { "epoch": 0.25, "learning_rate": 0.0005134743905901639, "loss": 3.6286, "step": 14705 }, { "epoch": 0.25, "learning_rate": 0.0005134165828294475, "loss": 3.5722, "step": 14710 }, { "epoch": 0.25, "learning_rate": 0.0005133587590208979, "loss": 3.6132, "step": 14715 }, { "epoch": 0.25, "learning_rate": 0.0005133009191688629, "loss": 3.7012, "step": 14720 }, { "epoch": 0.25, "learning_rate": 0.000513243063277692, "loss": 3.6159, "step": 14725 }, { "epoch": 0.25, "learning_rate": 0.0005131851913517358, "loss": 3.6005, "step": 14730 }, { "epoch": 0.26, "learning_rate": 0.0005131273033953456, "loss": 3.592, "step": 14735 }, { "epoch": 0.26, "learning_rate": 0.0005130693994128744, "loss": 3.6199, "step": 14740 }, { "epoch": 0.26, "learning_rate": 0.0005130114794086763, "loss": 3.5501, "step": 14745 }, { "epoch": 0.26, "learning_rate": 0.0005129535433871067, "loss": 3.6375, "step": 14750 }, { "epoch": 0.26, "learning_rate": 0.000512895591352522, "loss": 3.5636, "step": 14755 }, { "epoch": 0.26, "learning_rate": 0.00051283762330928, "loss": 3.6531, "step": 14760 }, { "epoch": 0.26, "learning_rate": 0.0005127796392617393, "loss": 3.5518, "step": 14765 }, { "epoch": 0.26, "learning_rate": 0.0005127216392142604, "loss": 3.5923, "step": 14770 }, { "epoch": 0.26, "learning_rate": 0.0005126636231712042, "loss": 3.5783, "step": 14775 }, { "epoch": 0.26, "learning_rate": 0.0005126055911369335, "loss": 3.69, "step": 14780 }, { "epoch": 0.26, "learning_rate": 0.0005125475431158121, "loss": 3.7556, "step": 14785 }, { "epoch": 0.26, "learning_rate": 0.0005124894791122045, "loss": 3.6912, "step": 14790 }, { "epoch": 0.26, "learning_rate": 0.0005124313991304773, "loss": 3.6166, "step": 14795 }, { "epoch": 0.26, "learning_rate": 0.0005123733031749974, "loss": 3.5407, "step": 14800 }, { "epoch": 0.26, "learning_rate": 0.0005123151912501336, "loss": 3.6813, "step": 14805 }, { "epoch": 0.26, "learning_rate": 0.0005122570633602554, "loss": 3.6641, "step": 14810 }, { "epoch": 0.26, "learning_rate": 0.0005121989195097339, "loss": 3.6209, "step": 14815 }, { "epoch": 0.26, "learning_rate": 0.0005121407597029413, "loss": 3.6123, "step": 14820 }, { "epoch": 0.26, "learning_rate": 0.0005120825839442506, "loss": 3.5358, "step": 14825 }, { "epoch": 0.26, "learning_rate": 0.0005120243922380366, "loss": 3.6142, "step": 14830 }, { "epoch": 0.26, "learning_rate": 0.0005119661845886747, "loss": 3.5662, "step": 14835 }, { "epoch": 0.26, "learning_rate": 0.0005119079610005422, "loss": 3.6078, "step": 14840 }, { "epoch": 0.26, "learning_rate": 0.0005118497214780171, "loss": 3.6417, "step": 14845 }, { "epoch": 0.26, "learning_rate": 0.0005117914660254785, "loss": 3.5625, "step": 14850 }, { "epoch": 0.26, "learning_rate": 0.0005117331946473071, "loss": 3.619, "step": 14855 }, { "epoch": 0.26, "learning_rate": 0.0005116749073478846, "loss": 3.616, "step": 14860 }, { "epoch": 0.26, "learning_rate": 0.0005116166041315939, "loss": 3.5866, "step": 14865 }, { "epoch": 0.26, "learning_rate": 0.0005115582850028192, "loss": 3.6377, "step": 14870 }, { "epoch": 0.26, "learning_rate": 0.0005114999499659454, "loss": 3.6135, "step": 14875 }, { "epoch": 0.26, "learning_rate": 0.0005114415990253595, "loss": 3.5777, "step": 14880 }, { "epoch": 0.26, "learning_rate": 0.0005113832321854491, "loss": 3.5029, "step": 14885 }, { "epoch": 0.26, "learning_rate": 0.0005113248494506027, "loss": 3.5864, "step": 14890 }, { "epoch": 0.26, "learning_rate": 0.0005112664508252107, "loss": 3.5529, "step": 14895 }, { "epoch": 0.26, "learning_rate": 0.0005112080363136644, "loss": 3.588, "step": 14900 }, { "epoch": 0.26, "learning_rate": 0.0005111496059203561, "loss": 3.614, "step": 14905 }, { "epoch": 0.26, "learning_rate": 0.0005110911596496797, "loss": 3.6759, "step": 14910 }, { "epoch": 0.26, "learning_rate": 0.0005110326975060298, "loss": 3.4662, "step": 14915 }, { "epoch": 0.26, "learning_rate": 0.0005109742194938026, "loss": 3.6857, "step": 14920 }, { "epoch": 0.26, "learning_rate": 0.0005109157256173954, "loss": 3.6664, "step": 14925 }, { "epoch": 0.26, "learning_rate": 0.0005108572158812064, "loss": 3.5142, "step": 14930 }, { "epoch": 0.26, "learning_rate": 0.0005107986902896355, "loss": 3.5656, "step": 14935 }, { "epoch": 0.26, "learning_rate": 0.0005107401488470834, "loss": 3.7301, "step": 14940 }, { "epoch": 0.26, "learning_rate": 0.0005106815915579522, "loss": 3.5186, "step": 14945 }, { "epoch": 0.26, "learning_rate": 0.0005106230184266448, "loss": 3.65, "step": 14950 }, { "epoch": 0.26, "learning_rate": 0.0005105644294575661, "loss": 3.6077, "step": 14955 }, { "epoch": 0.26, "learning_rate": 0.0005105058246551214, "loss": 3.6377, "step": 14960 }, { "epoch": 0.26, "learning_rate": 0.0005104472040237174, "loss": 3.5337, "step": 14965 }, { "epoch": 0.26, "learning_rate": 0.0005103885675677622, "loss": 3.5585, "step": 14970 }, { "epoch": 0.26, "learning_rate": 0.0005103299152916651, "loss": 3.6477, "step": 14975 }, { "epoch": 0.26, "learning_rate": 0.0005102712471998361, "loss": 3.5803, "step": 14980 }, { "epoch": 0.26, "learning_rate": 0.000510212563296687, "loss": 3.5321, "step": 14985 }, { "epoch": 0.26, "learning_rate": 0.0005101538635866304, "loss": 3.6679, "step": 14990 }, { "epoch": 0.26, "learning_rate": 0.0005100951480740804, "loss": 3.6513, "step": 14995 }, { "epoch": 0.26, "learning_rate": 0.0005100364167634518, "loss": 3.5916, "step": 15000 }, { "epoch": 0.26, "eval_loss": 3.638385534286499, "eval_runtime": 149.9801, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 15000 }, { "epoch": 0.26, "learning_rate": 0.0005099776696591613, "loss": 3.6363, "step": 15005 }, { "epoch": 0.26, "learning_rate": 0.0005099189067656261, "loss": 3.5853, "step": 15010 }, { "epoch": 0.26, "learning_rate": 0.0005098601280872649, "loss": 3.6406, "step": 15015 }, { "epoch": 0.26, "learning_rate": 0.0005098013336284975, "loss": 3.5502, "step": 15020 }, { "epoch": 0.26, "learning_rate": 0.0005097425233937451, "loss": 3.6303, "step": 15025 }, { "epoch": 0.26, "learning_rate": 0.0005096836973874298, "loss": 3.5962, "step": 15030 }, { "epoch": 0.26, "learning_rate": 0.0005096248556139753, "loss": 3.4167, "step": 15035 }, { "epoch": 0.26, "learning_rate": 0.0005095659980778056, "loss": 3.5678, "step": 15040 }, { "epoch": 0.26, "learning_rate": 0.0005095071247833473, "loss": 3.5376, "step": 15045 }, { "epoch": 0.26, "learning_rate": 0.0005094482357350266, "loss": 3.6403, "step": 15050 }, { "epoch": 0.26, "learning_rate": 0.000509389330937272, "loss": 3.6371, "step": 15055 }, { "epoch": 0.26, "learning_rate": 0.000509330410394513, "loss": 3.5036, "step": 15060 }, { "epoch": 0.26, "learning_rate": 0.0005092714741111797, "loss": 3.6479, "step": 15065 }, { "epoch": 0.26, "learning_rate": 0.0005092125220917042, "loss": 3.6069, "step": 15070 }, { "epoch": 0.26, "learning_rate": 0.0005091535543405192, "loss": 3.6093, "step": 15075 }, { "epoch": 0.26, "learning_rate": 0.0005090945708620588, "loss": 3.6439, "step": 15080 }, { "epoch": 0.26, "learning_rate": 0.0005090355716607582, "loss": 3.5337, "step": 15085 }, { "epoch": 0.26, "learning_rate": 0.0005089765567410539, "loss": 3.604, "step": 15090 }, { "epoch": 0.26, "learning_rate": 0.0005089175261073837, "loss": 3.5584, "step": 15095 }, { "epoch": 0.26, "learning_rate": 0.0005088584797641861, "loss": 3.5309, "step": 15100 }, { "epoch": 0.26, "learning_rate": 0.0005087994177159012, "loss": 3.6083, "step": 15105 }, { "epoch": 0.26, "learning_rate": 0.0005087403399669702, "loss": 3.5913, "step": 15110 }, { "epoch": 0.26, "learning_rate": 0.0005086812465218354, "loss": 3.5442, "step": 15115 }, { "epoch": 0.26, "learning_rate": 0.0005086221373849403, "loss": 3.6318, "step": 15120 }, { "epoch": 0.26, "learning_rate": 0.0005085630125607297, "loss": 3.6189, "step": 15125 }, { "epoch": 0.26, "learning_rate": 0.0005085038720536493, "loss": 3.5056, "step": 15130 }, { "epoch": 0.26, "learning_rate": 0.0005084447158681465, "loss": 3.5779, "step": 15135 }, { "epoch": 0.26, "learning_rate": 0.0005083855440086691, "loss": 3.5665, "step": 15140 }, { "epoch": 0.26, "learning_rate": 0.0005083263564796669, "loss": 3.5793, "step": 15145 }, { "epoch": 0.26, "learning_rate": 0.0005082671532855903, "loss": 3.5825, "step": 15150 }, { "epoch": 0.26, "learning_rate": 0.0005082079344308912, "loss": 3.7259, "step": 15155 }, { "epoch": 0.26, "learning_rate": 0.0005081486999200224, "loss": 3.6756, "step": 15160 }, { "epoch": 0.26, "learning_rate": 0.0005080894497574381, "loss": 3.5817, "step": 15165 }, { "epoch": 0.26, "learning_rate": 0.0005080301839475936, "loss": 3.518, "step": 15170 }, { "epoch": 0.26, "learning_rate": 0.0005079709024949455, "loss": 3.5779, "step": 15175 }, { "epoch": 0.26, "learning_rate": 0.0005079116054039513, "loss": 3.7219, "step": 15180 }, { "epoch": 0.26, "learning_rate": 0.0005078522926790698, "loss": 3.5903, "step": 15185 }, { "epoch": 0.26, "learning_rate": 0.0005077929643247613, "loss": 3.5368, "step": 15190 }, { "epoch": 0.26, "learning_rate": 0.0005077336203454867, "loss": 3.6035, "step": 15195 }, { "epoch": 0.26, "learning_rate": 0.0005076742607457086, "loss": 3.6253, "step": 15200 }, { "epoch": 0.26, "learning_rate": 0.0005076148855298903, "loss": 3.6759, "step": 15205 }, { "epoch": 0.26, "learning_rate": 0.0005075554947024967, "loss": 3.6473, "step": 15210 }, { "epoch": 0.26, "learning_rate": 0.0005074960882679935, "loss": 3.5805, "step": 15215 }, { "epoch": 0.26, "learning_rate": 0.0005074366662308478, "loss": 3.5368, "step": 15220 }, { "epoch": 0.26, "learning_rate": 0.000507377228595528, "loss": 3.5171, "step": 15225 }, { "epoch": 0.26, "learning_rate": 0.0005073177753665034, "loss": 3.6021, "step": 15230 }, { "epoch": 0.26, "learning_rate": 0.0005072583065482446, "loss": 3.6194, "step": 15235 }, { "epoch": 0.26, "learning_rate": 0.0005071988221452233, "loss": 3.6315, "step": 15240 }, { "epoch": 0.26, "learning_rate": 0.0005071393221619124, "loss": 3.6864, "step": 15245 }, { "epoch": 0.26, "learning_rate": 0.0005070798066027862, "loss": 3.5701, "step": 15250 }, { "epoch": 0.26, "learning_rate": 0.0005070202754723196, "loss": 3.6381, "step": 15255 }, { "epoch": 0.26, "learning_rate": 0.0005069607287749893, "loss": 3.572, "step": 15260 }, { "epoch": 0.26, "learning_rate": 0.000506901166515273, "loss": 3.6789, "step": 15265 }, { "epoch": 0.26, "learning_rate": 0.0005068415886976493, "loss": 3.536, "step": 15270 }, { "epoch": 0.26, "learning_rate": 0.000506781995326598, "loss": 3.5427, "step": 15275 }, { "epoch": 0.26, "learning_rate": 0.0005067223864066005, "loss": 3.5554, "step": 15280 }, { "epoch": 0.26, "learning_rate": 0.000506662761942139, "loss": 3.5994, "step": 15285 }, { "epoch": 0.26, "learning_rate": 0.000506603121937697, "loss": 3.6482, "step": 15290 }, { "epoch": 0.26, "learning_rate": 0.000506543466397759, "loss": 3.6878, "step": 15295 }, { "epoch": 0.26, "learning_rate": 0.0005064837953268108, "loss": 3.489, "step": 15300 }, { "epoch": 0.26, "learning_rate": 0.0005064241087293393, "loss": 3.4993, "step": 15305 }, { "epoch": 0.26, "learning_rate": 0.0005063644066098329, "loss": 3.5804, "step": 15310 }, { "epoch": 0.27, "learning_rate": 0.0005063046889727807, "loss": 3.5341, "step": 15315 }, { "epoch": 0.27, "learning_rate": 0.0005062449558226732, "loss": 3.5108, "step": 15320 }, { "epoch": 0.27, "learning_rate": 0.0005061852071640019, "loss": 3.6813, "step": 15325 }, { "epoch": 0.27, "learning_rate": 0.0005061254430012598, "loss": 3.4356, "step": 15330 }, { "epoch": 0.27, "learning_rate": 0.0005060656633389406, "loss": 3.6614, "step": 15335 }, { "epoch": 0.27, "learning_rate": 0.0005060058681815399, "loss": 3.6952, "step": 15340 }, { "epoch": 0.27, "learning_rate": 0.0005059460575335534, "loss": 3.6666, "step": 15345 }, { "epoch": 0.27, "learning_rate": 0.000505886231399479, "loss": 3.5999, "step": 15350 }, { "epoch": 0.27, "learning_rate": 0.000505826389783815, "loss": 3.6896, "step": 15355 }, { "epoch": 0.27, "learning_rate": 0.0005057665326910614, "loss": 3.5638, "step": 15360 }, { "epoch": 0.27, "learning_rate": 0.0005057066601257191, "loss": 3.5589, "step": 15365 }, { "epoch": 0.27, "learning_rate": 0.0005056467720922903, "loss": 3.6379, "step": 15370 }, { "epoch": 0.27, "learning_rate": 0.000505586868595278, "loss": 3.5751, "step": 15375 }, { "epoch": 0.27, "learning_rate": 0.0005055269496391868, "loss": 3.5328, "step": 15380 }, { "epoch": 0.27, "learning_rate": 0.0005054670152285223, "loss": 3.5601, "step": 15385 }, { "epoch": 0.27, "learning_rate": 0.0005054070653677913, "loss": 3.5779, "step": 15390 }, { "epoch": 0.27, "learning_rate": 0.0005053471000615017, "loss": 3.637, "step": 15395 }, { "epoch": 0.27, "learning_rate": 0.0005052871193141625, "loss": 3.5866, "step": 15400 }, { "epoch": 0.27, "learning_rate": 0.000505227123130284, "loss": 3.6035, "step": 15405 }, { "epoch": 0.27, "learning_rate": 0.0005051671115143776, "loss": 3.6226, "step": 15410 }, { "epoch": 0.27, "learning_rate": 0.000505107084470956, "loss": 3.5384, "step": 15415 }, { "epoch": 0.27, "learning_rate": 0.0005050470420045327, "loss": 3.512, "step": 15420 }, { "epoch": 0.27, "learning_rate": 0.0005049869841196228, "loss": 3.6033, "step": 15425 }, { "epoch": 0.27, "learning_rate": 0.0005049269108207421, "loss": 3.5151, "step": 15430 }, { "epoch": 0.27, "learning_rate": 0.0005048668221124081, "loss": 3.4719, "step": 15435 }, { "epoch": 0.27, "learning_rate": 0.0005048067179991389, "loss": 3.6271, "step": 15440 }, { "epoch": 0.27, "learning_rate": 0.000504746598485454, "loss": 3.7077, "step": 15445 }, { "epoch": 0.27, "learning_rate": 0.0005046864635758744, "loss": 3.6447, "step": 15450 }, { "epoch": 0.27, "learning_rate": 0.0005046263132749217, "loss": 3.601, "step": 15455 }, { "epoch": 0.27, "learning_rate": 0.0005045661475871189, "loss": 3.6165, "step": 15460 }, { "epoch": 0.27, "learning_rate": 0.0005045059665169902, "loss": 3.6336, "step": 15465 }, { "epoch": 0.27, "learning_rate": 0.0005044457700690609, "loss": 3.4781, "step": 15470 }, { "epoch": 0.27, "learning_rate": 0.0005043855582478574, "loss": 3.5317, "step": 15475 }, { "epoch": 0.27, "learning_rate": 0.0005043253310579075, "loss": 3.5082, "step": 15480 }, { "epoch": 0.27, "learning_rate": 0.0005042650885037397, "loss": 3.6004, "step": 15485 }, { "epoch": 0.27, "learning_rate": 0.0005042048305898843, "loss": 3.6375, "step": 15490 }, { "epoch": 0.27, "learning_rate": 0.000504144557320872, "loss": 3.4989, "step": 15495 }, { "epoch": 0.27, "learning_rate": 0.0005040842687012351, "loss": 3.6445, "step": 15500 }, { "epoch": 0.27, "eval_loss": 3.6320202350616455, "eval_runtime": 150.4615, "eval_samples_per_second": 12.236, "eval_steps_per_second": 0.771, "step": 15500 }, { "epoch": 0.27, "learning_rate": 0.0005040239647355073, "loss": 3.521, "step": 15505 }, { "epoch": 0.27, "learning_rate": 0.000503963645428223, "loss": 3.5892, "step": 15510 }, { "epoch": 0.27, "learning_rate": 0.0005039033107839177, "loss": 3.5733, "step": 15515 }, { "epoch": 0.27, "learning_rate": 0.0005038429608071285, "loss": 3.6047, "step": 15520 }, { "epoch": 0.27, "learning_rate": 0.0005037825955023933, "loss": 3.6021, "step": 15525 }, { "epoch": 0.27, "learning_rate": 0.0005037222148742514, "loss": 3.6138, "step": 15530 }, { "epoch": 0.27, "learning_rate": 0.0005036618189272428, "loss": 3.6236, "step": 15535 }, { "epoch": 0.27, "learning_rate": 0.0005036014076659094, "loss": 3.5677, "step": 15540 }, { "epoch": 0.27, "learning_rate": 0.0005035409810947934, "loss": 3.498, "step": 15545 }, { "epoch": 0.27, "learning_rate": 0.0005034805392184389, "loss": 3.5847, "step": 15550 }, { "epoch": 0.27, "learning_rate": 0.0005034200820413906, "loss": 3.6563, "step": 15555 }, { "epoch": 0.27, "learning_rate": 0.0005033596095681946, "loss": 3.6285, "step": 15560 }, { "epoch": 0.27, "learning_rate": 0.0005032991218033981, "loss": 3.524, "step": 15565 }, { "epoch": 0.27, "learning_rate": 0.0005032386187515497, "loss": 3.5815, "step": 15570 }, { "epoch": 0.27, "learning_rate": 0.0005031781004171986, "loss": 3.5129, "step": 15575 }, { "epoch": 0.27, "learning_rate": 0.0005031175668048957, "loss": 3.5887, "step": 15580 }, { "epoch": 0.27, "learning_rate": 0.0005030570179191927, "loss": 3.5245, "step": 15585 }, { "epoch": 0.27, "learning_rate": 0.0005029964537646427, "loss": 3.6023, "step": 15590 }, { "epoch": 0.27, "learning_rate": 0.0005029358743457997, "loss": 3.5239, "step": 15595 }, { "epoch": 0.27, "learning_rate": 0.0005028752796672189, "loss": 3.5323, "step": 15600 }, { "epoch": 0.27, "learning_rate": 0.0005028146697334568, "loss": 3.5991, "step": 15605 }, { "epoch": 0.27, "learning_rate": 0.000502754044549071, "loss": 3.4643, "step": 15610 }, { "epoch": 0.27, "learning_rate": 0.0005026934041186201, "loss": 3.5993, "step": 15615 }, { "epoch": 0.27, "learning_rate": 0.0005026327484466641, "loss": 3.5821, "step": 15620 }, { "epoch": 0.27, "learning_rate": 0.0005025720775377637, "loss": 3.498, "step": 15625 }, { "epoch": 0.27, "learning_rate": 0.0005025113913964813, "loss": 3.5981, "step": 15630 }, { "epoch": 0.27, "learning_rate": 0.0005024506900273803, "loss": 3.5101, "step": 15635 }, { "epoch": 0.27, "learning_rate": 0.0005023899734350248, "loss": 3.5864, "step": 15640 }, { "epoch": 0.27, "learning_rate": 0.0005023292416239805, "loss": 3.5627, "step": 15645 }, { "epoch": 0.27, "learning_rate": 0.0005022684945988141, "loss": 3.5497, "step": 15650 }, { "epoch": 0.27, "learning_rate": 0.0005022077323640937, "loss": 3.6516, "step": 15655 }, { "epoch": 0.27, "learning_rate": 0.0005021469549243881, "loss": 3.4684, "step": 15660 }, { "epoch": 0.27, "learning_rate": 0.0005020861622842673, "loss": 3.5867, "step": 15665 }, { "epoch": 0.27, "learning_rate": 0.000502025354448303, "loss": 3.5082, "step": 15670 }, { "epoch": 0.27, "learning_rate": 0.0005019645314210672, "loss": 3.6077, "step": 15675 }, { "epoch": 0.27, "learning_rate": 0.0005019036932071338, "loss": 3.5891, "step": 15680 }, { "epoch": 0.27, "learning_rate": 0.0005018428398110774, "loss": 3.6244, "step": 15685 }, { "epoch": 0.27, "learning_rate": 0.0005017819712374739, "loss": 3.4976, "step": 15690 }, { "epoch": 0.27, "learning_rate": 0.0005017210874909003, "loss": 3.6083, "step": 15695 }, { "epoch": 0.27, "learning_rate": 0.0005016601885759346, "loss": 3.5849, "step": 15700 }, { "epoch": 0.27, "learning_rate": 0.0005015992744971562, "loss": 3.6031, "step": 15705 }, { "epoch": 0.27, "learning_rate": 0.0005015383452591456, "loss": 3.5034, "step": 15710 }, { "epoch": 0.27, "learning_rate": 0.0005014774008664844, "loss": 3.5678, "step": 15715 }, { "epoch": 0.27, "learning_rate": 0.0005014164413237552, "loss": 3.6184, "step": 15720 }, { "epoch": 0.27, "learning_rate": 0.0005013554666355417, "loss": 3.6015, "step": 15725 }, { "epoch": 0.27, "learning_rate": 0.0005012944768064291, "loss": 3.6218, "step": 15730 }, { "epoch": 0.27, "learning_rate": 0.0005012334718410036, "loss": 3.8092, "step": 15735 }, { "epoch": 0.27, "learning_rate": 0.0005011724517438521, "loss": 4.6015, "step": 15740 }, { "epoch": 0.27, "learning_rate": 0.0005011114165195634, "loss": 4.2182, "step": 15745 }, { "epoch": 0.27, "learning_rate": 0.0005010503661727268, "loss": 3.7612, "step": 15750 }, { "epoch": 0.27, "learning_rate": 0.0005009893007079329, "loss": 3.7264, "step": 15755 }, { "epoch": 0.27, "learning_rate": 0.0005009282201297739, "loss": 3.6667, "step": 15760 }, { "epoch": 0.27, "learning_rate": 0.0005008671244428423, "loss": 3.6342, "step": 15765 }, { "epoch": 0.27, "learning_rate": 0.0005008060136517324, "loss": 3.6282, "step": 15770 }, { "epoch": 0.27, "learning_rate": 0.0005007448877610395, "loss": 3.6548, "step": 15775 }, { "epoch": 0.27, "learning_rate": 0.0005006837467753597, "loss": 3.6451, "step": 15780 }, { "epoch": 0.27, "learning_rate": 0.0005006225906992906, "loss": 3.7284, "step": 15785 }, { "epoch": 0.27, "learning_rate": 0.000500561419537431, "loss": 3.6631, "step": 15790 }, { "epoch": 0.27, "learning_rate": 0.0005005002332943804, "loss": 3.6146, "step": 15795 }, { "epoch": 0.27, "learning_rate": 0.0005004390319747399, "loss": 3.6857, "step": 15800 }, { "epoch": 0.27, "learning_rate": 0.0005003778155831113, "loss": 3.5865, "step": 15805 }, { "epoch": 0.27, "learning_rate": 0.000500316584124098, "loss": 3.7437, "step": 15810 }, { "epoch": 0.27, "learning_rate": 0.0005002553376023042, "loss": 3.5996, "step": 15815 }, { "epoch": 0.27, "learning_rate": 0.0005001940760223352, "loss": 3.7551, "step": 15820 }, { "epoch": 0.27, "learning_rate": 0.0005001327993887977, "loss": 3.6479, "step": 15825 }, { "epoch": 0.27, "learning_rate": 0.0005000715077062994, "loss": 3.6632, "step": 15830 }, { "epoch": 0.27, "learning_rate": 0.0005000102009794491, "loss": 3.5554, "step": 15835 }, { "epoch": 0.27, "learning_rate": 0.0004999488792128567, "loss": 3.6261, "step": 15840 }, { "epoch": 0.27, "learning_rate": 0.0004998875424111332, "loss": 3.5204, "step": 15845 }, { "epoch": 0.27, "learning_rate": 0.0004998261905788909, "loss": 3.6914, "step": 15850 }, { "epoch": 0.27, "learning_rate": 0.0004997648237207433, "loss": 3.4878, "step": 15855 }, { "epoch": 0.27, "learning_rate": 0.0004997034418413046, "loss": 3.5916, "step": 15860 }, { "epoch": 0.27, "learning_rate": 0.0004996420449451907, "loss": 3.6535, "step": 15865 }, { "epoch": 0.27, "learning_rate": 0.000499580633037018, "loss": 3.6224, "step": 15870 }, { "epoch": 0.27, "learning_rate": 0.0004995192061214047, "loss": 3.5223, "step": 15875 }, { "epoch": 0.27, "learning_rate": 0.0004994577642029695, "loss": 3.563, "step": 15880 }, { "epoch": 0.27, "learning_rate": 0.0004993963072863326, "loss": 3.561, "step": 15885 }, { "epoch": 0.27, "learning_rate": 0.0004993348353761154, "loss": 3.6209, "step": 15890 }, { "epoch": 0.28, "learning_rate": 0.0004992733484769401, "loss": 3.3646, "step": 15895 }, { "epoch": 0.28, "learning_rate": 0.0004992118465934302, "loss": 3.5317, "step": 15900 }, { "epoch": 0.28, "learning_rate": 0.0004991503297302105, "loss": 3.5534, "step": 15905 }, { "epoch": 0.28, "learning_rate": 0.0004990887978919066, "loss": 3.6341, "step": 15910 }, { "epoch": 0.28, "learning_rate": 0.0004990272510831454, "loss": 3.5312, "step": 15915 }, { "epoch": 0.28, "learning_rate": 0.0004989656893085549, "loss": 3.5533, "step": 15920 }, { "epoch": 0.28, "learning_rate": 0.0004989041125727643, "loss": 3.5574, "step": 15925 }, { "epoch": 0.28, "learning_rate": 0.0004988425208804037, "loss": 3.5899, "step": 15930 }, { "epoch": 0.28, "learning_rate": 0.0004987809142361046, "loss": 3.5567, "step": 15935 }, { "epoch": 0.28, "learning_rate": 0.0004987192926444995, "loss": 3.5264, "step": 15940 }, { "epoch": 0.28, "learning_rate": 0.000498657656110222, "loss": 3.6206, "step": 15945 }, { "epoch": 0.28, "learning_rate": 0.0004985960046379069, "loss": 3.5102, "step": 15950 }, { "epoch": 0.28, "learning_rate": 0.0004985343382321901, "loss": 3.5516, "step": 15955 }, { "epoch": 0.28, "learning_rate": 0.0004984726568977083, "loss": 3.6478, "step": 15960 }, { "epoch": 0.28, "learning_rate": 0.0004984109606391002, "loss": 3.6414, "step": 15965 }, { "epoch": 0.28, "learning_rate": 0.0004983492494610045, "loss": 3.5679, "step": 15970 }, { "epoch": 0.28, "learning_rate": 0.0004982875233680618, "loss": 3.5373, "step": 15975 }, { "epoch": 0.28, "learning_rate": 0.0004982257823649136, "loss": 3.6789, "step": 15980 }, { "epoch": 0.28, "learning_rate": 0.0004981640264562023, "loss": 3.642, "step": 15985 }, { "epoch": 0.28, "learning_rate": 0.0004981022556465719, "loss": 3.5777, "step": 15990 }, { "epoch": 0.28, "learning_rate": 0.0004980404699406672, "loss": 3.6623, "step": 15995 }, { "epoch": 0.28, "learning_rate": 0.000497978669343134, "loss": 3.6401, "step": 16000 }, { "epoch": 0.28, "eval_loss": 3.6124560832977295, "eval_runtime": 150.0796, "eval_samples_per_second": 12.267, "eval_steps_per_second": 0.773, "step": 16000 }, { "epoch": 0.28, "learning_rate": 0.0004979168538586195, "loss": 3.5841, "step": 16005 }, { "epoch": 0.28, "learning_rate": 0.0004978550234917719, "loss": 3.595, "step": 16010 }, { "epoch": 0.28, "learning_rate": 0.0004977931782472405, "loss": 3.5558, "step": 16015 }, { "epoch": 0.28, "learning_rate": 0.0004977313181296759, "loss": 3.592, "step": 16020 }, { "epoch": 0.28, "learning_rate": 0.0004976694431437295, "loss": 3.5335, "step": 16025 }, { "epoch": 0.28, "learning_rate": 0.0004976075532940539, "loss": 3.4794, "step": 16030 }, { "epoch": 0.28, "learning_rate": 0.000497545648585303, "loss": 3.6644, "step": 16035 }, { "epoch": 0.28, "learning_rate": 0.0004974837290221318, "loss": 3.5437, "step": 16040 }, { "epoch": 0.28, "learning_rate": 0.0004974217946091962, "loss": 3.6994, "step": 16045 }, { "epoch": 0.28, "learning_rate": 0.0004973598453511535, "loss": 3.6072, "step": 16050 }, { "epoch": 0.28, "learning_rate": 0.0004972978812526619, "loss": 3.5775, "step": 16055 }, { "epoch": 0.28, "learning_rate": 0.0004972359023183808, "loss": 3.5683, "step": 16060 }, { "epoch": 0.28, "learning_rate": 0.0004971739085529705, "loss": 3.4989, "step": 16065 }, { "epoch": 0.28, "learning_rate": 0.0004971118999610929, "loss": 3.5584, "step": 16070 }, { "epoch": 0.28, "learning_rate": 0.0004970498765474106, "loss": 3.5787, "step": 16075 }, { "epoch": 0.28, "learning_rate": 0.0004969878383165873, "loss": 3.6137, "step": 16080 }, { "epoch": 0.28, "learning_rate": 0.0004969257852732883, "loss": 3.7059, "step": 16085 }, { "epoch": 0.28, "learning_rate": 0.0004968637174221794, "loss": 3.6148, "step": 16090 }, { "epoch": 0.28, "learning_rate": 0.000496801634767928, "loss": 3.5649, "step": 16095 }, { "epoch": 0.28, "learning_rate": 0.000496739537315202, "loss": 3.6258, "step": 16100 }, { "epoch": 0.28, "learning_rate": 0.0004966774250686713, "loss": 3.5316, "step": 16105 }, { "epoch": 0.28, "learning_rate": 0.000496615298033006, "loss": 3.5973, "step": 16110 }, { "epoch": 0.28, "learning_rate": 0.000496553156212878, "loss": 3.6305, "step": 16115 }, { "epoch": 0.28, "learning_rate": 0.0004964909996129599, "loss": 3.5401, "step": 16120 }, { "epoch": 0.28, "learning_rate": 0.0004964288282379258, "loss": 3.5479, "step": 16125 }, { "epoch": 0.28, "learning_rate": 0.0004963666420924504, "loss": 3.5814, "step": 16130 }, { "epoch": 0.28, "learning_rate": 0.0004963044411812099, "loss": 3.4288, "step": 16135 }, { "epoch": 0.28, "learning_rate": 0.0004962422255088815, "loss": 3.5357, "step": 16140 }, { "epoch": 0.28, "learning_rate": 0.0004961799950801434, "loss": 3.5193, "step": 16145 }, { "epoch": 0.28, "learning_rate": 0.000496117749899675, "loss": 3.5514, "step": 16150 }, { "epoch": 0.28, "learning_rate": 0.000496055489972157, "loss": 3.5557, "step": 16155 }, { "epoch": 0.28, "learning_rate": 0.0004959932153022709, "loss": 3.5929, "step": 16160 }, { "epoch": 0.28, "learning_rate": 0.0004959309258946996, "loss": 3.542, "step": 16165 }, { "epoch": 0.28, "learning_rate": 0.0004958686217541266, "loss": 3.5837, "step": 16170 }, { "epoch": 0.28, "learning_rate": 0.0004958063028852372, "loss": 3.4923, "step": 16175 }, { "epoch": 0.28, "learning_rate": 0.0004957439692927173, "loss": 3.6609, "step": 16180 }, { "epoch": 0.28, "learning_rate": 0.0004956816209812539, "loss": 3.4934, "step": 16185 }, { "epoch": 0.28, "learning_rate": 0.0004956192579555356, "loss": 3.5329, "step": 16190 }, { "epoch": 0.28, "learning_rate": 0.0004955568802202517, "loss": 3.5227, "step": 16195 }, { "epoch": 0.28, "learning_rate": 0.0004954944877800924, "loss": 3.5705, "step": 16200 }, { "epoch": 0.28, "learning_rate": 0.0004954320806397497, "loss": 3.5531, "step": 16205 }, { "epoch": 0.28, "learning_rate": 0.000495369658803916, "loss": 3.4774, "step": 16210 }, { "epoch": 0.28, "learning_rate": 0.0004953072222772853, "loss": 3.5052, "step": 16215 }, { "epoch": 0.28, "learning_rate": 0.0004952447710645524, "loss": 3.5444, "step": 16220 }, { "epoch": 0.28, "learning_rate": 0.0004951823051704134, "loss": 3.5683, "step": 16225 }, { "epoch": 0.28, "learning_rate": 0.0004951198245995652, "loss": 3.4055, "step": 16230 }, { "epoch": 0.28, "learning_rate": 0.0004950573293567062, "loss": 3.6428, "step": 16235 }, { "epoch": 0.28, "learning_rate": 0.0004949948194465357, "loss": 3.6984, "step": 16240 }, { "epoch": 0.28, "learning_rate": 0.0004949322948737541, "loss": 3.5885, "step": 16245 }, { "epoch": 0.28, "learning_rate": 0.0004948697556430631, "loss": 3.4286, "step": 16250 }, { "epoch": 0.28, "learning_rate": 0.0004948072017591649, "loss": 3.565, "step": 16255 }, { "epoch": 0.28, "learning_rate": 0.0004947446332267636, "loss": 3.5906, "step": 16260 }, { "epoch": 0.28, "learning_rate": 0.000494682050050564, "loss": 3.634, "step": 16265 }, { "epoch": 0.28, "learning_rate": 0.000494619452235272, "loss": 3.5922, "step": 16270 }, { "epoch": 0.28, "learning_rate": 0.0004945568397855945, "loss": 3.538, "step": 16275 }, { "epoch": 0.28, "learning_rate": 0.0004944942127062397, "loss": 3.572, "step": 16280 }, { "epoch": 0.28, "learning_rate": 0.0004944315710019169, "loss": 3.6377, "step": 16285 }, { "epoch": 0.28, "learning_rate": 0.0004943689146773365, "loss": 3.6228, "step": 16290 }, { "epoch": 0.28, "learning_rate": 0.0004943062437372098, "loss": 3.5855, "step": 16295 }, { "epoch": 0.28, "learning_rate": 0.0004942435581862493, "loss": 3.5372, "step": 16300 }, { "epoch": 0.28, "learning_rate": 0.0004941808580291688, "loss": 3.5609, "step": 16305 }, { "epoch": 0.28, "learning_rate": 0.000494118143270683, "loss": 3.6014, "step": 16310 }, { "epoch": 0.28, "learning_rate": 0.0004940554139155074, "loss": 3.5558, "step": 16315 }, { "epoch": 0.28, "learning_rate": 0.0004939926699683594, "loss": 3.5162, "step": 16320 }, { "epoch": 0.28, "learning_rate": 0.0004939299114339568, "loss": 3.6057, "step": 16325 }, { "epoch": 0.28, "learning_rate": 0.0004938671383170187, "loss": 3.6958, "step": 16330 }, { "epoch": 0.28, "learning_rate": 0.0004938043506222653, "loss": 3.6499, "step": 16335 }, { "epoch": 0.28, "learning_rate": 0.0004937415483544181, "loss": 3.5561, "step": 16340 }, { "epoch": 0.28, "learning_rate": 0.0004936787315181993, "loss": 3.5672, "step": 16345 }, { "epoch": 0.28, "learning_rate": 0.0004936159001183325, "loss": 3.5341, "step": 16350 }, { "epoch": 0.28, "learning_rate": 0.0004935530541595422, "loss": 3.5887, "step": 16355 }, { "epoch": 0.28, "learning_rate": 0.0004934901936465543, "loss": 3.5734, "step": 16360 }, { "epoch": 0.28, "learning_rate": 0.0004934273185840953, "loss": 3.5032, "step": 16365 }, { "epoch": 0.28, "learning_rate": 0.0004933644289768933, "loss": 3.6115, "step": 16370 }, { "epoch": 0.28, "learning_rate": 0.0004933015248296772, "loss": 3.5705, "step": 16375 }, { "epoch": 0.28, "learning_rate": 0.0004932386061471772, "loss": 3.6443, "step": 16380 }, { "epoch": 0.28, "learning_rate": 0.0004931756729341242, "loss": 3.4791, "step": 16385 }, { "epoch": 0.28, "learning_rate": 0.0004931127251952507, "loss": 3.6197, "step": 16390 }, { "epoch": 0.28, "learning_rate": 0.00049304976293529, "loss": 3.4558, "step": 16395 }, { "epoch": 0.28, "learning_rate": 0.0004929867861589764, "loss": 3.5077, "step": 16400 }, { "epoch": 0.28, "learning_rate": 0.0004929237948710455, "loss": 3.5552, "step": 16405 }, { "epoch": 0.28, "learning_rate": 0.0004928607890762341, "loss": 3.5842, "step": 16410 }, { "epoch": 0.28, "learning_rate": 0.0004927977687792796, "loss": 3.5561, "step": 16415 }, { "epoch": 0.28, "learning_rate": 0.000492734733984921, "loss": 3.4731, "step": 16420 }, { "epoch": 0.28, "learning_rate": 0.0004926716846978982, "loss": 3.5924, "step": 16425 }, { "epoch": 0.28, "learning_rate": 0.0004926086209229521, "loss": 3.4558, "step": 16430 }, { "epoch": 0.28, "learning_rate": 0.0004925455426648248, "loss": 3.457, "step": 16435 }, { "epoch": 0.28, "learning_rate": 0.0004924824499282595, "loss": 3.5853, "step": 16440 }, { "epoch": 0.28, "learning_rate": 0.0004924193427180004, "loss": 3.6165, "step": 16445 }, { "epoch": 0.28, "learning_rate": 0.0004923562210387928, "loss": 3.4844, "step": 16450 }, { "epoch": 0.28, "learning_rate": 0.0004922930848953834, "loss": 3.629, "step": 16455 }, { "epoch": 0.28, "learning_rate": 0.0004922299342925193, "loss": 3.4659, "step": 16460 }, { "epoch": 0.28, "learning_rate": 0.0004921667692349493, "loss": 3.6049, "step": 16465 }, { "epoch": 0.29, "learning_rate": 0.0004921035897274232, "loss": 3.5527, "step": 16470 }, { "epoch": 0.29, "learning_rate": 0.0004920403957746916, "loss": 3.5113, "step": 16475 }, { "epoch": 0.29, "learning_rate": 0.0004919771873815065, "loss": 3.5445, "step": 16480 }, { "epoch": 0.29, "learning_rate": 0.0004919139645526207, "loss": 3.5608, "step": 16485 }, { "epoch": 0.29, "learning_rate": 0.0004918507272927883, "loss": 3.5982, "step": 16490 }, { "epoch": 0.29, "learning_rate": 0.0004917874756067644, "loss": 3.5369, "step": 16495 }, { "epoch": 0.29, "learning_rate": 0.0004917242094993054, "loss": 3.4741, "step": 16500 }, { "epoch": 0.29, "eval_loss": 3.5877273082733154, "eval_runtime": 150.0518, "eval_samples_per_second": 12.269, "eval_steps_per_second": 0.773, "step": 16500 }, { "epoch": 0.29, "learning_rate": 0.0004916609289751683, "loss": 3.5372, "step": 16505 }, { "epoch": 0.29, "learning_rate": 0.0004915976340391116, "loss": 3.5657, "step": 16510 }, { "epoch": 0.29, "learning_rate": 0.0004915343246958947, "loss": 3.5311, "step": 16515 }, { "epoch": 0.29, "learning_rate": 0.0004914710009502782, "loss": 3.5391, "step": 16520 }, { "epoch": 0.29, "learning_rate": 0.0004914076628070238, "loss": 3.6073, "step": 16525 }, { "epoch": 0.29, "learning_rate": 0.000491344310270894, "loss": 3.4663, "step": 16530 }, { "epoch": 0.29, "learning_rate": 0.0004912809433466527, "loss": 3.5429, "step": 16535 }, { "epoch": 0.29, "learning_rate": 0.0004912175620390648, "loss": 3.5616, "step": 16540 }, { "epoch": 0.29, "learning_rate": 0.0004911541663528961, "loss": 3.5202, "step": 16545 }, { "epoch": 0.29, "learning_rate": 0.0004910907562929139, "loss": 3.6226, "step": 16550 }, { "epoch": 0.29, "learning_rate": 0.0004910273318638861, "loss": 3.5045, "step": 16555 }, { "epoch": 0.29, "learning_rate": 0.0004909638930705819, "loss": 3.4955, "step": 16560 }, { "epoch": 0.29, "learning_rate": 0.0004909004399177717, "loss": 3.5726, "step": 16565 }, { "epoch": 0.29, "learning_rate": 0.0004908369724102267, "loss": 3.4443, "step": 16570 }, { "epoch": 0.29, "learning_rate": 0.0004907734905527193, "loss": 3.5395, "step": 16575 }, { "epoch": 0.29, "learning_rate": 0.0004907099943500233, "loss": 3.4818, "step": 16580 }, { "epoch": 0.29, "learning_rate": 0.000490646483806913, "loss": 3.6123, "step": 16585 }, { "epoch": 0.29, "learning_rate": 0.0004905829589281642, "loss": 3.5348, "step": 16590 }, { "epoch": 0.29, "learning_rate": 0.0004905194197185536, "loss": 3.5266, "step": 16595 }, { "epoch": 0.29, "learning_rate": 0.0004904558661828589, "loss": 3.5143, "step": 16600 }, { "epoch": 0.29, "learning_rate": 0.0004903922983258593, "loss": 3.6398, "step": 16605 }, { "epoch": 0.29, "learning_rate": 0.0004903287161523346, "loss": 3.5811, "step": 16610 }, { "epoch": 0.29, "learning_rate": 0.0004902651196670658, "loss": 3.4796, "step": 16615 }, { "epoch": 0.29, "learning_rate": 0.0004902015088748352, "loss": 3.4956, "step": 16620 }, { "epoch": 0.29, "learning_rate": 0.0004901378837804258, "loss": 3.4245, "step": 16625 }, { "epoch": 0.29, "learning_rate": 0.000490074244388622, "loss": 3.6016, "step": 16630 }, { "epoch": 0.29, "learning_rate": 0.0004900105907042092, "loss": 3.5701, "step": 16635 }, { "epoch": 0.29, "learning_rate": 0.0004899469227319737, "loss": 3.5832, "step": 16640 }, { "epoch": 0.29, "learning_rate": 0.0004898832404767031, "loss": 3.5255, "step": 16645 }, { "epoch": 0.29, "learning_rate": 0.0004898195439431858, "loss": 3.5867, "step": 16650 }, { "epoch": 0.29, "learning_rate": 0.0004897558331362119, "loss": 3.4916, "step": 16655 }, { "epoch": 0.29, "learning_rate": 0.0004896921080605716, "loss": 3.5749, "step": 16660 }, { "epoch": 0.29, "learning_rate": 0.000489628368721057, "loss": 3.5705, "step": 16665 }, { "epoch": 0.29, "learning_rate": 0.000489564615122461, "loss": 3.5197, "step": 16670 }, { "epoch": 0.29, "learning_rate": 0.0004895008472695772, "loss": 3.669, "step": 16675 }, { "epoch": 0.29, "learning_rate": 0.0004894370651672011, "loss": 3.5425, "step": 16680 }, { "epoch": 0.29, "learning_rate": 0.0004893732688201284, "loss": 3.5529, "step": 16685 }, { "epoch": 0.29, "learning_rate": 0.0004893094582331566, "loss": 3.5687, "step": 16690 }, { "epoch": 0.29, "learning_rate": 0.0004892456334110835, "loss": 3.5815, "step": 16695 }, { "epoch": 0.29, "learning_rate": 0.0004891817943587087, "loss": 3.5539, "step": 16700 }, { "epoch": 0.29, "learning_rate": 0.0004891179410808326, "loss": 3.6557, "step": 16705 }, { "epoch": 0.29, "learning_rate": 0.0004890540735822566, "loss": 3.6539, "step": 16710 }, { "epoch": 0.29, "learning_rate": 0.000488990191867783, "loss": 3.675, "step": 16715 }, { "epoch": 0.29, "learning_rate": 0.0004889262959422155, "loss": 3.5966, "step": 16720 }, { "epoch": 0.29, "learning_rate": 0.0004888623858103589, "loss": 3.4819, "step": 16725 }, { "epoch": 0.29, "learning_rate": 0.0004887984614770188, "loss": 3.4654, "step": 16730 }, { "epoch": 0.29, "learning_rate": 0.0004887345229470019, "loss": 3.5918, "step": 16735 }, { "epoch": 0.29, "learning_rate": 0.0004886705702251161, "loss": 3.5793, "step": 16740 }, { "epoch": 0.29, "learning_rate": 0.0004886066033161704, "loss": 3.5092, "step": 16745 }, { "epoch": 0.29, "learning_rate": 0.0004885426222249747, "loss": 3.6574, "step": 16750 }, { "epoch": 0.29, "learning_rate": 0.0004884786269563401, "loss": 3.5397, "step": 16755 }, { "epoch": 0.29, "learning_rate": 0.0004884146175150786, "loss": 3.4925, "step": 16760 }, { "epoch": 0.29, "learning_rate": 0.0004883505939060036, "loss": 3.5832, "step": 16765 }, { "epoch": 0.29, "learning_rate": 0.0004882865561339293, "loss": 3.5626, "step": 16770 }, { "epoch": 0.29, "learning_rate": 0.00048822250420367064, "loss": 3.5953, "step": 16775 }, { "epoch": 0.29, "learning_rate": 0.00048815843812004446, "loss": 3.6348, "step": 16780 }, { "epoch": 0.29, "learning_rate": 0.000488094357887868, "loss": 3.5284, "step": 16785 }, { "epoch": 0.29, "learning_rate": 0.00048803026351195984, "loss": 3.5496, "step": 16790 }, { "epoch": 0.29, "learning_rate": 0.00048796615499713947, "loss": 3.554, "step": 16795 }, { "epoch": 0.29, "learning_rate": 0.0004879020323482276, "loss": 3.5148, "step": 16800 }, { "epoch": 0.29, "learning_rate": 0.0004878378955700458, "loss": 3.5522, "step": 16805 }, { "epoch": 0.29, "learning_rate": 0.00048777374466741697, "loss": 3.5446, "step": 16810 }, { "epoch": 0.29, "learning_rate": 0.0004877095796451648, "loss": 3.5773, "step": 16815 }, { "epoch": 0.29, "learning_rate": 0.0004876454005081144, "loss": 3.6213, "step": 16820 }, { "epoch": 0.29, "learning_rate": 0.0004875812072610913, "loss": 3.621, "step": 16825 }, { "epoch": 0.29, "learning_rate": 0.0004875169999089229, "loss": 3.524, "step": 16830 }, { "epoch": 0.29, "learning_rate": 0.0004874527784564371, "loss": 3.5176, "step": 16835 }, { "epoch": 0.29, "learning_rate": 0.0004873885429084631, "loss": 3.4054, "step": 16840 }, { "epoch": 0.29, "learning_rate": 0.00048732429326983097, "loss": 3.5634, "step": 16845 }, { "epoch": 0.29, "learning_rate": 0.000487260029545372, "loss": 3.6623, "step": 16850 }, { "epoch": 0.29, "learning_rate": 0.0004871957517399184, "loss": 3.5978, "step": 16855 }, { "epoch": 0.29, "learning_rate": 0.00048713145985830386, "loss": 3.5235, "step": 16860 }, { "epoch": 0.29, "learning_rate": 0.00048706715390536235, "loss": 3.5122, "step": 16865 }, { "epoch": 0.29, "learning_rate": 0.00048700283388592976, "loss": 3.5124, "step": 16870 }, { "epoch": 0.29, "learning_rate": 0.00048693849980484234, "loss": 3.5231, "step": 16875 }, { "epoch": 0.29, "learning_rate": 0.00048687415166693773, "loss": 3.6416, "step": 16880 }, { "epoch": 0.29, "learning_rate": 0.0004868097894770547, "loss": 3.5731, "step": 16885 }, { "epoch": 0.29, "learning_rate": 0.00048674541324003287, "loss": 3.5577, "step": 16890 }, { "epoch": 0.29, "learning_rate": 0.00048668102296071296, "loss": 3.3237, "step": 16895 }, { "epoch": 0.29, "learning_rate": 0.00048661661864393696, "loss": 3.5944, "step": 16900 }, { "epoch": 0.29, "learning_rate": 0.00048655220029454756, "loss": 3.5177, "step": 16905 }, { "epoch": 0.29, "learning_rate": 0.0004864877679173889, "loss": 3.511, "step": 16910 }, { "epoch": 0.29, "learning_rate": 0.0004864233215173056, "loss": 3.5407, "step": 16915 }, { "epoch": 0.29, "learning_rate": 0.0004863588610991441, "loss": 3.5129, "step": 16920 }, { "epoch": 0.29, "learning_rate": 0.0004862943866677512, "loss": 3.5516, "step": 16925 }, { "epoch": 0.29, "learning_rate": 0.0004862298982279752, "loss": 3.6088, "step": 16930 }, { "epoch": 0.29, "learning_rate": 0.0004861653957846653, "loss": 3.4369, "step": 16935 }, { "epoch": 0.29, "learning_rate": 0.0004861008793426717, "loss": 3.4892, "step": 16940 }, { "epoch": 0.29, "learning_rate": 0.0004860363489068457, "loss": 3.5787, "step": 16945 }, { "epoch": 0.29, "learning_rate": 0.00048597180448203977, "loss": 3.5381, "step": 16950 }, { "epoch": 0.29, "learning_rate": 0.00048590724607310714, "loss": 3.4371, "step": 16955 }, { "epoch": 0.29, "learning_rate": 0.0004858426736849024, "loss": 3.4722, "step": 16960 }, { "epoch": 0.29, "learning_rate": 0.00048577808732228105, "loss": 3.5033, "step": 16965 }, { "epoch": 0.29, "learning_rate": 0.00048571348699009964, "loss": 3.5394, "step": 16970 }, { "epoch": 0.29, "learning_rate": 0.0004856488726932158, "loss": 3.5476, "step": 16975 }, { "epoch": 0.29, "learning_rate": 0.00048558424443648804, "loss": 3.5903, "step": 16980 }, { "epoch": 0.29, "learning_rate": 0.00048551960222477635, "loss": 3.4827, "step": 16985 }, { "epoch": 0.29, "learning_rate": 0.0004854549460629412, "loss": 3.4285, "step": 16990 }, { "epoch": 0.29, "learning_rate": 0.00048539027595584464, "loss": 3.5585, "step": 16995 }, { "epoch": 0.29, "learning_rate": 0.0004853255919083494, "loss": 3.4662, "step": 17000 }, { "epoch": 0.29, "eval_loss": 3.576127052307129, "eval_runtime": 150.0801, "eval_samples_per_second": 12.267, "eval_steps_per_second": 0.773, "step": 17000 }, { "epoch": 0.29, "learning_rate": 0.00048526089392531945, "loss": 3.4641, "step": 17005 }, { "epoch": 0.29, "learning_rate": 0.0004851961820116197, "loss": 3.5871, "step": 17010 }, { "epoch": 0.29, "learning_rate": 0.0004851314561721162, "loss": 3.6005, "step": 17015 }, { "epoch": 0.29, "learning_rate": 0.000485066716411676, "loss": 3.5406, "step": 17020 }, { "epoch": 0.29, "learning_rate": 0.0004850019627351672, "loss": 3.5104, "step": 17025 }, { "epoch": 0.29, "learning_rate": 0.0004849371951474588, "loss": 3.5767, "step": 17030 }, { "epoch": 0.29, "learning_rate": 0.00048487241365342116, "loss": 3.5849, "step": 17035 }, { "epoch": 0.29, "learning_rate": 0.00048480761825792556, "loss": 3.5134, "step": 17040 }, { "epoch": 0.29, "learning_rate": 0.0004847428089658442, "loss": 3.5021, "step": 17045 }, { "epoch": 0.3, "learning_rate": 0.0004846779857820503, "loss": 3.5575, "step": 17050 }, { "epoch": 0.3, "learning_rate": 0.00048461314871141835, "loss": 3.5833, "step": 17055 }, { "epoch": 0.3, "learning_rate": 0.00048454829775882374, "loss": 3.4563, "step": 17060 }, { "epoch": 0.3, "learning_rate": 0.00048448343292914296, "loss": 3.5297, "step": 17065 }, { "epoch": 0.3, "learning_rate": 0.0004844185542272534, "loss": 3.576, "step": 17070 }, { "epoch": 0.3, "learning_rate": 0.00048435366165803384, "loss": 3.5006, "step": 17075 }, { "epoch": 0.3, "learning_rate": 0.0004842887552263637, "loss": 3.579, "step": 17080 }, { "epoch": 0.3, "learning_rate": 0.0004842238349371235, "loss": 3.565, "step": 17085 }, { "epoch": 0.3, "learning_rate": 0.00048415890079519516, "loss": 3.4344, "step": 17090 }, { "epoch": 0.3, "learning_rate": 0.0004840939528054612, "loss": 3.4845, "step": 17095 }, { "epoch": 0.3, "learning_rate": 0.00048402899097280556, "loss": 3.5118, "step": 17100 }, { "epoch": 0.3, "learning_rate": 0.0004839640153021129, "loss": 3.4574, "step": 17105 }, { "epoch": 0.3, "learning_rate": 0.000483899025798269, "loss": 3.5173, "step": 17110 }, { "epoch": 0.3, "learning_rate": 0.00048383402246616087, "loss": 3.5195, "step": 17115 }, { "epoch": 0.3, "learning_rate": 0.0004837690053106765, "loss": 3.4535, "step": 17120 }, { "epoch": 0.3, "learning_rate": 0.0004837039743367045, "loss": 3.4877, "step": 17125 }, { "epoch": 0.3, "learning_rate": 0.0004836389295491353, "loss": 3.5545, "step": 17130 }, { "epoch": 0.3, "learning_rate": 0.00048357387095285964, "loss": 3.4785, "step": 17135 }, { "epoch": 0.3, "learning_rate": 0.0004835087985527697, "loss": 3.5197, "step": 17140 }, { "epoch": 0.3, "learning_rate": 0.0004834437123537586, "loss": 3.5171, "step": 17145 }, { "epoch": 0.3, "learning_rate": 0.0004833786123607205, "loss": 3.5258, "step": 17150 }, { "epoch": 0.3, "learning_rate": 0.0004833134985785505, "loss": 3.6473, "step": 17155 }, { "epoch": 0.3, "learning_rate": 0.0004832483710121448, "loss": 3.5945, "step": 17160 }, { "epoch": 0.3, "learning_rate": 0.0004831832296664009, "loss": 3.577, "step": 17165 }, { "epoch": 0.3, "learning_rate": 0.00048311807454621685, "loss": 3.5094, "step": 17170 }, { "epoch": 0.3, "learning_rate": 0.00048305290565649195, "loss": 3.489, "step": 17175 }, { "epoch": 0.3, "learning_rate": 0.0004829877230021268, "loss": 3.524, "step": 17180 }, { "epoch": 0.3, "learning_rate": 0.00048292252658802264, "loss": 3.5145, "step": 17185 }, { "epoch": 0.3, "learning_rate": 0.00048285731641908196, "loss": 3.5293, "step": 17190 }, { "epoch": 0.3, "learning_rate": 0.0004827920925002082, "loss": 3.4996, "step": 17195 }, { "epoch": 0.3, "learning_rate": 0.00048272685483630585, "loss": 3.6237, "step": 17200 }, { "epoch": 0.3, "learning_rate": 0.00048266160343228066, "loss": 3.4632, "step": 17205 }, { "epoch": 0.3, "learning_rate": 0.0004825963382930388, "loss": 3.5058, "step": 17210 }, { "epoch": 0.3, "learning_rate": 0.0004825310594234882, "loss": 3.521, "step": 17215 }, { "epoch": 0.3, "learning_rate": 0.00048246576682853736, "loss": 3.4941, "step": 17220 }, { "epoch": 0.3, "learning_rate": 0.00048240046051309593, "loss": 3.6331, "step": 17225 }, { "epoch": 0.3, "learning_rate": 0.0004823351404820747, "loss": 3.5374, "step": 17230 }, { "epoch": 0.3, "learning_rate": 0.00048226980674038534, "loss": 3.5544, "step": 17235 }, { "epoch": 0.3, "learning_rate": 0.00048220445929294067, "loss": 3.4389, "step": 17240 }, { "epoch": 0.3, "learning_rate": 0.00048213909814465443, "loss": 3.5835, "step": 17245 }, { "epoch": 0.3, "learning_rate": 0.00048207372330044137, "loss": 3.4002, "step": 17250 }, { "epoch": 0.3, "learning_rate": 0.0004820083347652176, "loss": 3.5722, "step": 17255 }, { "epoch": 0.3, "learning_rate": 0.0004819429325438997, "loss": 3.6024, "step": 17260 }, { "epoch": 0.3, "learning_rate": 0.0004818775166414057, "loss": 3.6439, "step": 17265 }, { "epoch": 0.3, "learning_rate": 0.0004818120870626547, "loss": 3.4983, "step": 17270 }, { "epoch": 0.3, "learning_rate": 0.00048174664381256643, "loss": 3.4879, "step": 17275 }, { "epoch": 0.3, "learning_rate": 0.00048168118689606197, "loss": 3.5338, "step": 17280 }, { "epoch": 0.3, "learning_rate": 0.00048161571631806337, "loss": 3.4761, "step": 17285 }, { "epoch": 0.3, "learning_rate": 0.0004815502320834936, "loss": 3.5412, "step": 17290 }, { "epoch": 0.3, "learning_rate": 0.00048148473419727694, "loss": 3.5941, "step": 17295 }, { "epoch": 0.3, "learning_rate": 0.0004814192226643383, "loss": 3.5358, "step": 17300 }, { "epoch": 0.3, "learning_rate": 0.00048135369748960395, "loss": 3.459, "step": 17305 }, { "epoch": 0.3, "learning_rate": 0.0004812881586780008, "loss": 3.4669, "step": 17310 }, { "epoch": 0.3, "learning_rate": 0.0004812226062344573, "loss": 3.5167, "step": 17315 }, { "epoch": 0.3, "learning_rate": 0.00048115704016390265, "loss": 3.6433, "step": 17320 }, { "epoch": 0.3, "learning_rate": 0.0004810914604712669, "loss": 3.4723, "step": 17325 }, { "epoch": 0.3, "learning_rate": 0.00048102586716148145, "loss": 3.6129, "step": 17330 }, { "epoch": 0.3, "learning_rate": 0.00048096026023947846, "loss": 3.5888, "step": 17335 }, { "epoch": 0.3, "learning_rate": 0.00048089463971019133, "loss": 3.5604, "step": 17340 }, { "epoch": 0.3, "learning_rate": 0.0004808290055785544, "loss": 3.5614, "step": 17345 }, { "epoch": 0.3, "learning_rate": 0.0004807633578495029, "loss": 3.5267, "step": 17350 }, { "epoch": 0.3, "learning_rate": 0.0004806976965279734, "loss": 3.5437, "step": 17355 }, { "epoch": 0.3, "learning_rate": 0.0004806320216189031, "loss": 3.577, "step": 17360 }, { "epoch": 0.3, "learning_rate": 0.0004805663331272306, "loss": 3.5452, "step": 17365 }, { "epoch": 0.3, "learning_rate": 0.00048050063105789515, "loss": 3.5119, "step": 17370 }, { "epoch": 0.3, "learning_rate": 0.0004804349154158373, "loss": 3.5662, "step": 17375 }, { "epoch": 0.3, "learning_rate": 0.0004803691862059985, "loss": 3.5817, "step": 17380 }, { "epoch": 0.3, "learning_rate": 0.0004803034434333214, "loss": 3.4176, "step": 17385 }, { "epoch": 0.3, "learning_rate": 0.0004802376871027493, "loss": 3.5071, "step": 17390 }, { "epoch": 0.3, "learning_rate": 0.00048017191721922683, "loss": 3.4767, "step": 17395 }, { "epoch": 0.3, "learning_rate": 0.00048010613378769945, "loss": 3.4762, "step": 17400 }, { "epoch": 0.3, "learning_rate": 0.00048004033681311407, "loss": 3.5571, "step": 17405 }, { "epoch": 0.3, "learning_rate": 0.00047997452630041783, "loss": 3.4705, "step": 17410 }, { "epoch": 0.3, "learning_rate": 0.00047990870225455976, "loss": 3.4789, "step": 17415 }, { "epoch": 0.3, "learning_rate": 0.00047984286468048923, "loss": 3.5571, "step": 17420 }, { "epoch": 0.3, "learning_rate": 0.00047977701358315687, "loss": 3.5161, "step": 17425 }, { "epoch": 0.3, "learning_rate": 0.00047971114896751455, "loss": 3.6078, "step": 17430 }, { "epoch": 0.3, "learning_rate": 0.00047964527083851476, "loss": 3.4624, "step": 17435 }, { "epoch": 0.3, "learning_rate": 0.00047957937920111126, "loss": 3.4843, "step": 17440 }, { "epoch": 0.3, "learning_rate": 0.00047951347406025886, "loss": 3.5239, "step": 17445 }, { "epoch": 0.3, "learning_rate": 0.00047944755542091307, "loss": 3.5355, "step": 17450 }, { "epoch": 0.3, "learning_rate": 0.00047938162328803086, "loss": 3.427, "step": 17455 }, { "epoch": 0.3, "learning_rate": 0.0004793156776665699, "loss": 3.5715, "step": 17460 }, { "epoch": 0.3, "learning_rate": 0.00047924971856148896, "loss": 3.6401, "step": 17465 }, { "epoch": 0.3, "learning_rate": 0.0004791837459777478, "loss": 3.5878, "step": 17470 }, { "epoch": 0.3, "learning_rate": 0.0004791177599203072, "loss": 3.4139, "step": 17475 }, { "epoch": 0.3, "learning_rate": 0.00047905176039412904, "loss": 3.5894, "step": 17480 }, { "epoch": 0.3, "learning_rate": 0.0004789857474041762, "loss": 3.5054, "step": 17485 }, { "epoch": 0.3, "learning_rate": 0.0004789197209554124, "loss": 3.4073, "step": 17490 }, { "epoch": 0.3, "learning_rate": 0.0004788536810528025, "loss": 3.546, "step": 17495 }, { "epoch": 0.3, "learning_rate": 0.0004787876277013124, "loss": 3.4948, "step": 17500 }, { "epoch": 0.3, "eval_loss": 3.5631024837493896, "eval_runtime": 149.9514, "eval_samples_per_second": 12.277, "eval_steps_per_second": 0.774, "step": 17500 }, { "epoch": 0.3, "learning_rate": 0.000478721560905909, "loss": 3.5123, "step": 17505 }, { "epoch": 0.3, "learning_rate": 0.0004786554806715601, "loss": 3.4436, "step": 17510 }, { "epoch": 0.3, "learning_rate": 0.0004785893870032347, "loss": 3.551, "step": 17515 }, { "epoch": 0.3, "learning_rate": 0.0004785232799059026, "loss": 3.5769, "step": 17520 }, { "epoch": 0.3, "learning_rate": 0.0004784571593845349, "loss": 3.5486, "step": 17525 }, { "epoch": 0.3, "learning_rate": 0.00047839102544410325, "loss": 3.6552, "step": 17530 }, { "epoch": 0.3, "learning_rate": 0.00047832487808958083, "loss": 3.5692, "step": 17535 }, { "epoch": 0.3, "learning_rate": 0.0004782587173259414, "loss": 3.4071, "step": 17540 }, { "epoch": 0.3, "learning_rate": 0.00047819254315816006, "loss": 3.5011, "step": 17545 }, { "epoch": 0.3, "learning_rate": 0.00047812635559121256, "loss": 3.5682, "step": 17550 }, { "epoch": 0.3, "learning_rate": 0.00047806015463007615, "loss": 3.4182, "step": 17555 }, { "epoch": 0.3, "learning_rate": 0.00047799394027972866, "loss": 3.5072, "step": 17560 }, { "epoch": 0.3, "learning_rate": 0.0004779277125451489, "loss": 3.4896, "step": 17565 }, { "epoch": 0.3, "learning_rate": 0.00047786147143131716, "loss": 3.3961, "step": 17570 }, { "epoch": 0.3, "learning_rate": 0.00047779521694321425, "loss": 3.5795, "step": 17575 }, { "epoch": 0.3, "learning_rate": 0.0004777289490858222, "loss": 3.4299, "step": 17580 }, { "epoch": 0.3, "learning_rate": 0.00047766266786412415, "loss": 3.5548, "step": 17585 }, { "epoch": 0.3, "learning_rate": 0.00047759637328310386, "loss": 3.4617, "step": 17590 }, { "epoch": 0.3, "learning_rate": 0.00047753006534774646, "loss": 3.4899, "step": 17595 }, { "epoch": 0.3, "learning_rate": 0.000477463744063038, "loss": 3.5963, "step": 17600 }, { "epoch": 0.3, "learning_rate": 0.00047739740943396553, "loss": 3.4351, "step": 17605 }, { "epoch": 0.3, "learning_rate": 0.0004773310614655169, "loss": 3.5722, "step": 17610 }, { "epoch": 0.3, "learning_rate": 0.00047726470016268134, "loss": 3.5019, "step": 17615 }, { "epoch": 0.3, "learning_rate": 0.00047719832553044876, "loss": 3.4795, "step": 17620 }, { "epoch": 0.31, "learning_rate": 0.0004771319375738103, "loss": 3.4939, "step": 17625 }, { "epoch": 0.31, "learning_rate": 0.0004770655362977578, "loss": 3.459, "step": 17630 }, { "epoch": 0.31, "learning_rate": 0.0004769991217072844, "loss": 3.4321, "step": 17635 }, { "epoch": 0.31, "learning_rate": 0.0004769326938073843, "loss": 3.588, "step": 17640 }, { "epoch": 0.31, "learning_rate": 0.0004768662526030523, "loss": 3.6168, "step": 17645 }, { "epoch": 0.31, "learning_rate": 0.0004767997980992846, "loss": 3.4792, "step": 17650 }, { "epoch": 0.31, "learning_rate": 0.0004767333303010781, "loss": 3.4928, "step": 17655 }, { "epoch": 0.31, "learning_rate": 0.0004766668492134309, "loss": 3.6081, "step": 17660 }, { "epoch": 0.31, "learning_rate": 0.0004766003548413421, "loss": 3.5809, "step": 17665 }, { "epoch": 0.31, "learning_rate": 0.00047653384718981164, "loss": 3.5654, "step": 17670 }, { "epoch": 0.31, "learning_rate": 0.00047646732626384066, "loss": 3.4998, "step": 17675 }, { "epoch": 0.31, "learning_rate": 0.000476400792068431, "loss": 3.3947, "step": 17680 }, { "epoch": 0.31, "learning_rate": 0.00047633424460858585, "loss": 3.6234, "step": 17685 }, { "epoch": 0.31, "learning_rate": 0.0004762676838893093, "loss": 3.4851, "step": 17690 }, { "epoch": 0.31, "learning_rate": 0.00047620110991560623, "loss": 3.4785, "step": 17695 }, { "epoch": 0.31, "learning_rate": 0.00047613452269248274, "loss": 3.5703, "step": 17700 }, { "epoch": 0.31, "learning_rate": 0.00047606792222494566, "loss": 3.5448, "step": 17705 }, { "epoch": 0.31, "learning_rate": 0.0004760013085180033, "loss": 3.4537, "step": 17710 }, { "epoch": 0.31, "learning_rate": 0.0004759346815766646, "loss": 3.515, "step": 17715 }, { "epoch": 0.31, "learning_rate": 0.0004758680414059393, "loss": 3.5411, "step": 17720 }, { "epoch": 0.31, "learning_rate": 0.0004758013880108387, "loss": 3.5154, "step": 17725 }, { "epoch": 0.31, "learning_rate": 0.0004757347213963747, "loss": 3.4914, "step": 17730 }, { "epoch": 0.31, "learning_rate": 0.0004756680415675603, "loss": 3.4521, "step": 17735 }, { "epoch": 0.31, "learning_rate": 0.0004756013485294094, "loss": 3.5936, "step": 17740 }, { "epoch": 0.31, "learning_rate": 0.0004755346422869371, "loss": 3.4493, "step": 17745 }, { "epoch": 0.31, "learning_rate": 0.0004754679228451592, "loss": 3.5317, "step": 17750 }, { "epoch": 0.31, "learning_rate": 0.00047540119020909286, "loss": 3.5307, "step": 17755 }, { "epoch": 0.31, "learning_rate": 0.00047533444438375587, "loss": 3.5228, "step": 17760 }, { "epoch": 0.31, "learning_rate": 0.0004752676853741673, "loss": 3.4958, "step": 17765 }, { "epoch": 0.31, "learning_rate": 0.0004752009131853469, "loss": 3.4806, "step": 17770 }, { "epoch": 0.31, "learning_rate": 0.0004751341278223159, "loss": 3.4653, "step": 17775 }, { "epoch": 0.31, "learning_rate": 0.0004750673292900959, "loss": 3.5797, "step": 17780 }, { "epoch": 0.31, "learning_rate": 0.00047500051759371, "loss": 3.5707, "step": 17785 }, { "epoch": 0.31, "learning_rate": 0.000474933692738182, "loss": 3.5677, "step": 17790 }, { "epoch": 0.31, "learning_rate": 0.00047486685472853687, "loss": 3.5105, "step": 17795 }, { "epoch": 0.31, "learning_rate": 0.0004748000035698003, "loss": 3.5625, "step": 17800 }, { "epoch": 0.31, "learning_rate": 0.00047473313926699954, "loss": 3.5567, "step": 17805 }, { "epoch": 0.31, "learning_rate": 0.00047466626182516203, "loss": 3.5687, "step": 17810 }, { "epoch": 0.31, "learning_rate": 0.00047459937124931685, "loss": 3.5199, "step": 17815 }, { "epoch": 0.31, "learning_rate": 0.0004745324675444938, "loss": 3.5174, "step": 17820 }, { "epoch": 0.31, "learning_rate": 0.00047446555071572354, "loss": 3.4744, "step": 17825 }, { "epoch": 0.31, "learning_rate": 0.0004743986207680381, "loss": 3.3464, "step": 17830 }, { "epoch": 0.31, "learning_rate": 0.00047433167770647, "loss": 3.55, "step": 17835 }, { "epoch": 0.31, "learning_rate": 0.0004742647215360533, "loss": 3.4027, "step": 17840 }, { "epoch": 0.31, "learning_rate": 0.00047419775226182264, "loss": 3.5334, "step": 17845 }, { "epoch": 0.31, "learning_rate": 0.0004741307698888137, "loss": 3.5083, "step": 17850 }, { "epoch": 0.31, "learning_rate": 0.00047406377442206325, "loss": 3.628, "step": 17855 }, { "epoch": 0.31, "learning_rate": 0.00047399676586660914, "loss": 3.6128, "step": 17860 }, { "epoch": 0.31, "learning_rate": 0.0004739297442274899, "loss": 3.5811, "step": 17865 }, { "epoch": 0.31, "learning_rate": 0.00047386270950974525, "loss": 3.5103, "step": 17870 }, { "epoch": 0.31, "learning_rate": 0.0004737956617184159, "loss": 3.5447, "step": 17875 }, { "epoch": 0.31, "learning_rate": 0.00047372860085854345, "loss": 3.6269, "step": 17880 }, { "epoch": 0.31, "learning_rate": 0.0004736615269351706, "loss": 3.5275, "step": 17885 }, { "epoch": 0.31, "learning_rate": 0.00047359443995334083, "loss": 3.4888, "step": 17890 }, { "epoch": 0.31, "learning_rate": 0.00047352733991809894, "loss": 3.5246, "step": 17895 }, { "epoch": 0.31, "learning_rate": 0.0004734602268344903, "loss": 3.4484, "step": 17900 }, { "epoch": 0.31, "learning_rate": 0.0004733931007075617, "loss": 3.5838, "step": 17905 }, { "epoch": 0.31, "learning_rate": 0.00047332596154236046, "loss": 3.5475, "step": 17910 }, { "epoch": 0.31, "learning_rate": 0.00047325880934393524, "loss": 3.5593, "step": 17915 }, { "epoch": 0.31, "learning_rate": 0.00047319164411733545, "loss": 3.6149, "step": 17920 }, { "epoch": 0.31, "learning_rate": 0.00047312446586761164, "loss": 3.4426, "step": 17925 }, { "epoch": 0.31, "learning_rate": 0.0004730572745998153, "loss": 3.524, "step": 17930 }, { "epoch": 0.31, "learning_rate": 0.0004729900703189988, "loss": 3.5848, "step": 17935 }, { "epoch": 0.31, "learning_rate": 0.00047292285303021544, "loss": 3.4916, "step": 17940 }, { "epoch": 0.31, "learning_rate": 0.0004728556227385199, "loss": 3.5414, "step": 17945 }, { "epoch": 0.31, "learning_rate": 0.0004727883794489673, "loss": 3.5673, "step": 17950 }, { "epoch": 0.31, "learning_rate": 0.0004727211231666141, "loss": 3.5469, "step": 17955 }, { "epoch": 0.31, "learning_rate": 0.0004726538538965177, "loss": 3.4563, "step": 17960 }, { "epoch": 0.31, "learning_rate": 0.00047258657164373624, "loss": 3.512, "step": 17965 }, { "epoch": 0.31, "learning_rate": 0.00047251927641332915, "loss": 3.481, "step": 17970 }, { "epoch": 0.31, "learning_rate": 0.0004724519682103566, "loss": 3.5488, "step": 17975 }, { "epoch": 0.31, "learning_rate": 0.0004723846470398798, "loss": 3.3891, "step": 17980 }, { "epoch": 0.31, "learning_rate": 0.000472317312906961, "loss": 3.533, "step": 17985 }, { "epoch": 0.31, "learning_rate": 0.0004722499658166634, "loss": 3.6264, "step": 17990 }, { "epoch": 0.31, "learning_rate": 0.00047218260577405123, "loss": 3.5065, "step": 17995 }, { "epoch": 0.31, "learning_rate": 0.0004721152327841894, "loss": 3.5378, "step": 18000 }, { "epoch": 0.31, "eval_loss": 3.560037136077881, "eval_runtime": 149.8718, "eval_samples_per_second": 12.284, "eval_steps_per_second": 0.774, "step": 18000 }, { "epoch": 0.31, "learning_rate": 0.00047204784685214425, "loss": 3.637, "step": 18005 }, { "epoch": 0.31, "learning_rate": 0.00047198044798298273, "loss": 3.5406, "step": 18010 }, { "epoch": 0.31, "learning_rate": 0.0004719130361817728, "loss": 3.5347, "step": 18015 }, { "epoch": 0.31, "learning_rate": 0.00047184561145358376, "loss": 3.5563, "step": 18020 }, { "epoch": 0.31, "learning_rate": 0.0004717781738034853, "loss": 3.6075, "step": 18025 }, { "epoch": 0.31, "learning_rate": 0.00047171072323654847, "loss": 3.411, "step": 18030 }, { "epoch": 0.31, "learning_rate": 0.0004716432597578454, "loss": 3.505, "step": 18035 }, { "epoch": 0.31, "learning_rate": 0.00047157578337244873, "loss": 3.6218, "step": 18040 }, { "epoch": 0.31, "learning_rate": 0.0004715082940854326, "loss": 3.4998, "step": 18045 }, { "epoch": 0.31, "learning_rate": 0.0004714407919018716, "loss": 3.5515, "step": 18050 }, { "epoch": 0.31, "learning_rate": 0.0004713732768268417, "loss": 3.6125, "step": 18055 }, { "epoch": 0.31, "learning_rate": 0.0004713057488654197, "loss": 3.5094, "step": 18060 }, { "epoch": 0.31, "learning_rate": 0.0004712382080226833, "loss": 3.5357, "step": 18065 }, { "epoch": 0.31, "learning_rate": 0.0004711706543037112, "loss": 3.5171, "step": 18070 }, { "epoch": 0.31, "learning_rate": 0.0004711030877135831, "loss": 3.5248, "step": 18075 }, { "epoch": 0.31, "learning_rate": 0.0004710355082573798, "loss": 3.3577, "step": 18080 }, { "epoch": 0.31, "learning_rate": 0.00047096791594018273, "loss": 3.4937, "step": 18085 }, { "epoch": 0.31, "learning_rate": 0.0004709003107670747, "loss": 3.5847, "step": 18090 }, { "epoch": 0.31, "learning_rate": 0.00047083269274313897, "loss": 3.5041, "step": 18095 }, { "epoch": 0.31, "learning_rate": 0.0004707650618734604, "loss": 3.518, "step": 18100 }, { "epoch": 0.31, "learning_rate": 0.0004706974181631243, "loss": 3.3611, "step": 18105 }, { "epoch": 0.31, "learning_rate": 0.0004706297616172172, "loss": 3.5533, "step": 18110 }, { "epoch": 0.31, "learning_rate": 0.00047056209224082636, "loss": 3.4176, "step": 18115 }, { "epoch": 0.31, "learning_rate": 0.0004704944100390404, "loss": 3.5398, "step": 18120 }, { "epoch": 0.31, "learning_rate": 0.0004704267150169485, "loss": 3.4949, "step": 18125 }, { "epoch": 0.31, "learning_rate": 0.0004703590071796411, "loss": 3.5204, "step": 18130 }, { "epoch": 0.31, "learning_rate": 0.0004702912865322095, "loss": 3.4492, "step": 18135 }, { "epoch": 0.31, "learning_rate": 0.0004702235530797458, "loss": 3.6095, "step": 18140 }, { "epoch": 0.31, "learning_rate": 0.0004701558068273433, "loss": 3.4533, "step": 18145 }, { "epoch": 0.31, "learning_rate": 0.0004700880477800962, "loss": 3.4747, "step": 18150 }, { "epoch": 0.31, "learning_rate": 0.0004700202759430995, "loss": 3.6068, "step": 18155 }, { "epoch": 0.31, "learning_rate": 0.0004699524913214495, "loss": 3.5339, "step": 18160 }, { "epoch": 0.31, "learning_rate": 0.0004698846939202431, "loss": 3.5689, "step": 18165 }, { "epoch": 0.31, "learning_rate": 0.00046981688374457835, "loss": 3.4747, "step": 18170 }, { "epoch": 0.31, "learning_rate": 0.0004697490607995543, "loss": 3.5512, "step": 18175 }, { "epoch": 0.31, "learning_rate": 0.00046968122509027075, "loss": 3.5927, "step": 18180 }, { "epoch": 0.31, "learning_rate": 0.00046961337662182874, "loss": 3.4706, "step": 18185 }, { "epoch": 0.31, "learning_rate": 0.00046954551539933007, "loss": 3.556, "step": 18190 }, { "epoch": 0.31, "learning_rate": 0.0004694776414278775, "loss": 3.4009, "step": 18195 }, { "epoch": 0.31, "learning_rate": 0.00046940975471257483, "loss": 3.4902, "step": 18200 }, { "epoch": 0.32, "learning_rate": 0.00046934185525852683, "loss": 3.4386, "step": 18205 }, { "epoch": 0.32, "learning_rate": 0.0004692739430708392, "loss": 3.4121, "step": 18210 }, { "epoch": 0.32, "learning_rate": 0.0004692060181546186, "loss": 3.5268, "step": 18215 }, { "epoch": 0.32, "learning_rate": 0.00046913808051497253, "loss": 3.5333, "step": 18220 }, { "epoch": 0.32, "learning_rate": 0.00046907013015700977, "loss": 3.5135, "step": 18225 }, { "epoch": 0.32, "learning_rate": 0.0004690021670858395, "loss": 3.4469, "step": 18230 }, { "epoch": 0.32, "learning_rate": 0.0004689341913065725, "loss": 3.4252, "step": 18235 }, { "epoch": 0.32, "learning_rate": 0.0004688662028243201, "loss": 3.5202, "step": 18240 }, { "epoch": 0.32, "learning_rate": 0.00046879820164419454, "loss": 3.596, "step": 18245 }, { "epoch": 0.32, "learning_rate": 0.00046873018777130945, "loss": 3.5158, "step": 18250 }, { "epoch": 0.32, "learning_rate": 0.0004686621612107789, "loss": 3.4576, "step": 18255 }, { "epoch": 0.32, "learning_rate": 0.00046859412196771817, "loss": 3.4635, "step": 18260 }, { "epoch": 0.32, "learning_rate": 0.00046852607004724354, "loss": 3.5205, "step": 18265 }, { "epoch": 0.32, "learning_rate": 0.0004684580054544721, "loss": 3.5507, "step": 18270 }, { "epoch": 0.32, "learning_rate": 0.0004683899281945221, "loss": 3.4769, "step": 18275 }, { "epoch": 0.32, "learning_rate": 0.00046832183827251244, "loss": 3.4863, "step": 18280 }, { "epoch": 0.32, "learning_rate": 0.00046825373569356305, "loss": 3.5763, "step": 18285 }, { "epoch": 0.32, "learning_rate": 0.0004681856204627951, "loss": 3.4605, "step": 18290 }, { "epoch": 0.32, "learning_rate": 0.0004681174925853305, "loss": 3.4173, "step": 18295 }, { "epoch": 0.32, "learning_rate": 0.00046804935206629187, "loss": 3.4477, "step": 18300 }, { "epoch": 0.32, "learning_rate": 0.00046798119891080343, "loss": 3.4775, "step": 18305 }, { "epoch": 0.32, "learning_rate": 0.0004679130331239895, "loss": 3.5202, "step": 18310 }, { "epoch": 0.32, "learning_rate": 0.0004678448547109761, "loss": 3.4684, "step": 18315 }, { "epoch": 0.32, "learning_rate": 0.00046777666367688996, "loss": 3.4995, "step": 18320 }, { "epoch": 0.32, "learning_rate": 0.0004677084600268584, "loss": 3.4431, "step": 18325 }, { "epoch": 0.32, "learning_rate": 0.0004676402437660102, "loss": 3.5776, "step": 18330 }, { "epoch": 0.32, "learning_rate": 0.0004675720148994747, "loss": 3.5402, "step": 18335 }, { "epoch": 0.32, "learning_rate": 0.00046750377343238256, "loss": 3.5223, "step": 18340 }, { "epoch": 0.32, "learning_rate": 0.00046743551936986513, "loss": 3.332, "step": 18345 }, { "epoch": 0.32, "learning_rate": 0.0004673672527170547, "loss": 3.5664, "step": 18350 }, { "epoch": 0.32, "learning_rate": 0.00046729897347908467, "loss": 3.5212, "step": 18355 }, { "epoch": 0.32, "learning_rate": 0.0004672306816610892, "loss": 3.4357, "step": 18360 }, { "epoch": 0.32, "learning_rate": 0.0004671623772682034, "loss": 3.4875, "step": 18365 }, { "epoch": 0.32, "learning_rate": 0.00046709406030556364, "loss": 3.6394, "step": 18370 }, { "epoch": 0.32, "learning_rate": 0.00046702573077830677, "loss": 3.5538, "step": 18375 }, { "epoch": 0.32, "learning_rate": 0.0004669573886915711, "loss": 3.4598, "step": 18380 }, { "epoch": 0.32, "learning_rate": 0.0004668890340504954, "loss": 3.6014, "step": 18385 }, { "epoch": 0.32, "learning_rate": 0.0004668206668602196, "loss": 3.5697, "step": 18390 }, { "epoch": 0.32, "learning_rate": 0.0004667522871258846, "loss": 3.3325, "step": 18395 }, { "epoch": 0.32, "learning_rate": 0.0004666838948526321, "loss": 3.5063, "step": 18400 }, { "epoch": 0.32, "learning_rate": 0.00046661549004560515, "loss": 3.5601, "step": 18405 }, { "epoch": 0.32, "learning_rate": 0.0004665470727099471, "loss": 3.5294, "step": 18410 }, { "epoch": 0.32, "learning_rate": 0.00046647864285080274, "loss": 3.4877, "step": 18415 }, { "epoch": 0.32, "learning_rate": 0.00046641020047331764, "loss": 3.5173, "step": 18420 }, { "epoch": 0.32, "learning_rate": 0.0004663417455826383, "loss": 3.5529, "step": 18425 }, { "epoch": 0.32, "learning_rate": 0.0004662732781839122, "loss": 3.4485, "step": 18430 }, { "epoch": 0.32, "learning_rate": 0.00046620479828228767, "loss": 3.6021, "step": 18435 }, { "epoch": 0.32, "learning_rate": 0.00046613630588291413, "loss": 3.5351, "step": 18440 }, { "epoch": 0.32, "learning_rate": 0.0004660678009909419, "loss": 3.5565, "step": 18445 }, { "epoch": 0.32, "learning_rate": 0.000465999283611522, "loss": 3.5563, "step": 18450 }, { "epoch": 0.32, "learning_rate": 0.0004659307537498068, "loss": 3.4886, "step": 18455 }, { "epoch": 0.32, "learning_rate": 0.00046586221141094923, "loss": 3.5287, "step": 18460 }, { "epoch": 0.32, "learning_rate": 0.0004657936566001034, "loss": 3.3976, "step": 18465 }, { "epoch": 0.32, "learning_rate": 0.00046572508932242424, "loss": 3.5678, "step": 18470 }, { "epoch": 0.32, "learning_rate": 0.00046565650958306775, "loss": 3.5914, "step": 18475 }, { "epoch": 0.32, "learning_rate": 0.0004655879173871908, "loss": 3.4357, "step": 18480 }, { "epoch": 0.32, "learning_rate": 0.00046551931273995087, "loss": 3.5462, "step": 18485 }, { "epoch": 0.32, "learning_rate": 0.000465450695646507, "loss": 3.6432, "step": 18490 }, { "epoch": 0.32, "learning_rate": 0.0004653820661120188, "loss": 3.509, "step": 18495 }, { "epoch": 0.32, "learning_rate": 0.0004653134241416467, "loss": 3.3627, "step": 18500 }, { "epoch": 0.32, "eval_loss": 3.539888858795166, "eval_runtime": 149.9774, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 18500 }, { "epoch": 0.32, "learning_rate": 0.0004652447697405524, "loss": 3.4926, "step": 18505 }, { "epoch": 0.32, "learning_rate": 0.00046517610291389825, "loss": 3.4943, "step": 18510 }, { "epoch": 0.32, "learning_rate": 0.0004651074236668477, "loss": 3.5992, "step": 18515 }, { "epoch": 0.32, "learning_rate": 0.000465038732004565, "loss": 3.4454, "step": 18520 }, { "epoch": 0.32, "learning_rate": 0.00046497002793221545, "loss": 3.4608, "step": 18525 }, { "epoch": 0.32, "learning_rate": 0.0004649013114549653, "loss": 3.426, "step": 18530 }, { "epoch": 0.32, "learning_rate": 0.0004648325825779817, "loss": 3.5191, "step": 18535 }, { "epoch": 0.32, "learning_rate": 0.00046476384130643254, "loss": 3.6492, "step": 18540 }, { "epoch": 0.32, "learning_rate": 0.000464695087645487, "loss": 3.4617, "step": 18545 }, { "epoch": 0.32, "learning_rate": 0.00046462632160031484, "loss": 3.506, "step": 18550 }, { "epoch": 0.32, "learning_rate": 0.000464557543176087, "loss": 3.5133, "step": 18555 }, { "epoch": 0.32, "learning_rate": 0.0004644887523779753, "loss": 3.5006, "step": 18560 }, { "epoch": 0.32, "learning_rate": 0.0004644199492111524, "loss": 3.2805, "step": 18565 }, { "epoch": 0.32, "learning_rate": 0.00046435113368079197, "loss": 3.5573, "step": 18570 }, { "epoch": 0.32, "learning_rate": 0.00046428230579206853, "loss": 3.5132, "step": 18575 }, { "epoch": 0.32, "learning_rate": 0.0004642134655501576, "loss": 3.661, "step": 18580 }, { "epoch": 0.32, "learning_rate": 0.0004641446129602358, "loss": 3.3854, "step": 18585 }, { "epoch": 0.32, "learning_rate": 0.00046407574802748017, "loss": 3.5102, "step": 18590 }, { "epoch": 0.32, "learning_rate": 0.00046400687075706925, "loss": 3.4589, "step": 18595 }, { "epoch": 0.32, "learning_rate": 0.0004639379811541821, "loss": 3.4866, "step": 18600 }, { "epoch": 0.32, "learning_rate": 0.00046386907922399894, "loss": 3.5285, "step": 18605 }, { "epoch": 0.32, "learning_rate": 0.00046380016497170096, "loss": 3.5076, "step": 18610 }, { "epoch": 0.32, "learning_rate": 0.0004637312384024699, "loss": 3.4932, "step": 18615 }, { "epoch": 0.32, "learning_rate": 0.0004636622995214889, "loss": 3.4528, "step": 18620 }, { "epoch": 0.32, "learning_rate": 0.0004635933483339417, "loss": 3.586, "step": 18625 }, { "epoch": 0.32, "learning_rate": 0.0004635243848450131, "loss": 3.4847, "step": 18630 }, { "epoch": 0.32, "learning_rate": 0.0004634554090598888, "loss": 3.525, "step": 18635 }, { "epoch": 0.32, "learning_rate": 0.00046338642098375544, "loss": 3.4765, "step": 18640 }, { "epoch": 0.32, "learning_rate": 0.0004633174206218006, "loss": 3.577, "step": 18645 }, { "epoch": 0.32, "learning_rate": 0.0004632484079792127, "loss": 3.4143, "step": 18650 }, { "epoch": 0.32, "learning_rate": 0.00046317938306118117, "loss": 3.5152, "step": 18655 }, { "epoch": 0.32, "learning_rate": 0.0004631103458728963, "loss": 3.4873, "step": 18660 }, { "epoch": 0.32, "learning_rate": 0.0004630412964195493, "loss": 3.4814, "step": 18665 }, { "epoch": 0.32, "learning_rate": 0.00046297223470633247, "loss": 3.5824, "step": 18670 }, { "epoch": 0.32, "learning_rate": 0.0004629031607384388, "loss": 3.4629, "step": 18675 }, { "epoch": 0.32, "learning_rate": 0.00046283407452106225, "loss": 3.5677, "step": 18680 }, { "epoch": 0.32, "learning_rate": 0.00046276497605939786, "loss": 3.4402, "step": 18685 }, { "epoch": 0.32, "learning_rate": 0.0004626958653586414, "loss": 3.4675, "step": 18690 }, { "epoch": 0.32, "learning_rate": 0.0004626267424239896, "loss": 3.4872, "step": 18695 }, { "epoch": 0.32, "learning_rate": 0.0004625576072606403, "loss": 3.4781, "step": 18700 }, { "epoch": 0.32, "learning_rate": 0.00046248845987379204, "loss": 3.5359, "step": 18705 }, { "epoch": 0.32, "learning_rate": 0.00046241930026864425, "loss": 3.5084, "step": 18710 }, { "epoch": 0.32, "learning_rate": 0.0004623501284503976, "loss": 3.4936, "step": 18715 }, { "epoch": 0.32, "learning_rate": 0.0004622809444242533, "loss": 3.5277, "step": 18720 }, { "epoch": 0.32, "learning_rate": 0.0004622117481954135, "loss": 3.5329, "step": 18725 }, { "epoch": 0.32, "learning_rate": 0.00046214253976908167, "loss": 3.459, "step": 18730 }, { "epoch": 0.32, "learning_rate": 0.00046207331915046183, "loss": 3.5331, "step": 18735 }, { "epoch": 0.32, "learning_rate": 0.00046200408634475894, "loss": 3.4192, "step": 18740 }, { "epoch": 0.32, "learning_rate": 0.00046193484135717913, "loss": 3.4791, "step": 18745 }, { "epoch": 0.32, "learning_rate": 0.0004618655841929291, "loss": 3.4814, "step": 18750 }, { "epoch": 0.32, "learning_rate": 0.0004617963148572167, "loss": 3.4538, "step": 18755 }, { "epoch": 0.32, "learning_rate": 0.00046172703335525047, "loss": 3.4978, "step": 18760 }, { "epoch": 0.32, "learning_rate": 0.0004616577396922403, "loss": 3.5329, "step": 18765 }, { "epoch": 0.32, "learning_rate": 0.0004615884338733966, "loss": 3.5177, "step": 18770 }, { "epoch": 0.32, "learning_rate": 0.0004615191159039308, "loss": 3.5178, "step": 18775 }, { "epoch": 0.33, "learning_rate": 0.0004614497857890552, "loss": 3.5395, "step": 18780 }, { "epoch": 0.33, "learning_rate": 0.0004613804435339832, "loss": 3.5156, "step": 18785 }, { "epoch": 0.33, "learning_rate": 0.00046131108914392884, "loss": 3.4949, "step": 18790 }, { "epoch": 0.33, "learning_rate": 0.0004612417226241073, "loss": 3.4067, "step": 18795 }, { "epoch": 0.33, "learning_rate": 0.0004611723439797346, "loss": 3.4388, "step": 18800 }, { "epoch": 0.33, "learning_rate": 0.0004611029532160276, "loss": 3.5248, "step": 18805 }, { "epoch": 0.33, "learning_rate": 0.0004610335503382041, "loss": 3.5521, "step": 18810 }, { "epoch": 0.33, "learning_rate": 0.0004609641353514829, "loss": 3.5943, "step": 18815 }, { "epoch": 0.33, "learning_rate": 0.0004608947082610838, "loss": 3.5184, "step": 18820 }, { "epoch": 0.33, "learning_rate": 0.0004608252690722271, "loss": 3.4862, "step": 18825 }, { "epoch": 0.33, "learning_rate": 0.0004607558177901344, "loss": 3.6086, "step": 18830 }, { "epoch": 0.33, "learning_rate": 0.000460686354420028, "loss": 3.4269, "step": 18835 }, { "epoch": 0.33, "learning_rate": 0.0004606168789671314, "loss": 3.583, "step": 18840 }, { "epoch": 0.33, "learning_rate": 0.00046054739143666857, "loss": 3.4697, "step": 18845 }, { "epoch": 0.33, "learning_rate": 0.0004604778918338647, "loss": 3.5615, "step": 18850 }, { "epoch": 0.33, "learning_rate": 0.0004604083801639457, "loss": 3.5213, "step": 18855 }, { "epoch": 0.33, "learning_rate": 0.00046033885643213866, "loss": 3.443, "step": 18860 }, { "epoch": 0.33, "learning_rate": 0.0004602693206436714, "loss": 3.4458, "step": 18865 }, { "epoch": 0.33, "learning_rate": 0.0004601997728037726, "loss": 3.4955, "step": 18870 }, { "epoch": 0.33, "learning_rate": 0.00046013021291767183, "loss": 3.5146, "step": 18875 }, { "epoch": 0.33, "learning_rate": 0.0004600606409905997, "loss": 3.5533, "step": 18880 }, { "epoch": 0.33, "learning_rate": 0.00045999105702778775, "loss": 3.415, "step": 18885 }, { "epoch": 0.33, "learning_rate": 0.0004599214610344682, "loss": 3.4769, "step": 18890 }, { "epoch": 0.33, "learning_rate": 0.00045985185301587435, "loss": 3.4555, "step": 18895 }, { "epoch": 0.33, "learning_rate": 0.0004597822329772403, "loss": 3.4481, "step": 18900 }, { "epoch": 0.33, "learning_rate": 0.00045971260092380137, "loss": 3.391, "step": 18905 }, { "epoch": 0.33, "learning_rate": 0.00045964295686079325, "loss": 3.3999, "step": 18910 }, { "epoch": 0.33, "learning_rate": 0.000459573300793453, "loss": 3.5864, "step": 18915 }, { "epoch": 0.33, "learning_rate": 0.00045950363272701824, "loss": 3.5394, "step": 18920 }, { "epoch": 0.33, "learning_rate": 0.00045943395266672787, "loss": 3.5128, "step": 18925 }, { "epoch": 0.33, "learning_rate": 0.00045936426061782135, "loss": 3.5267, "step": 18930 }, { "epoch": 0.33, "learning_rate": 0.00045929455658553895, "loss": 3.4345, "step": 18935 }, { "epoch": 0.33, "learning_rate": 0.00045922484057512247, "loss": 3.6213, "step": 18940 }, { "epoch": 0.33, "learning_rate": 0.00045915511259181385, "loss": 3.5013, "step": 18945 }, { "epoch": 0.33, "learning_rate": 0.00045908537264085657, "loss": 3.561, "step": 18950 }, { "epoch": 0.33, "learning_rate": 0.0004590156207274945, "loss": 3.5136, "step": 18955 }, { "epoch": 0.33, "learning_rate": 0.00045894585685697263, "loss": 3.4877, "step": 18960 }, { "epoch": 0.33, "learning_rate": 0.000458876081034537, "loss": 3.4736, "step": 18965 }, { "epoch": 0.33, "learning_rate": 0.0004588062932654343, "loss": 3.5516, "step": 18970 }, { "epoch": 0.33, "learning_rate": 0.00045873649355491217, "loss": 3.4, "step": 18975 }, { "epoch": 0.33, "learning_rate": 0.0004586666819082192, "loss": 3.5071, "step": 18980 }, { "epoch": 0.33, "learning_rate": 0.0004585968583306049, "loss": 3.5604, "step": 18985 }, { "epoch": 0.33, "learning_rate": 0.00045852702282731976, "loss": 3.5457, "step": 18990 }, { "epoch": 0.33, "learning_rate": 0.0004584571754036149, "loss": 3.4559, "step": 18995 }, { "epoch": 0.33, "learning_rate": 0.0004583873160647425, "loss": 3.6102, "step": 19000 }, { "epoch": 0.33, "eval_loss": 3.5276401042938232, "eval_runtime": 149.8717, "eval_samples_per_second": 12.284, "eval_steps_per_second": 0.774, "step": 19000 }, { "epoch": 0.33, "learning_rate": 0.0004583174448159557, "loss": 3.4795, "step": 19005 }, { "epoch": 0.33, "learning_rate": 0.0004582475616625084, "loss": 3.4671, "step": 19010 }, { "epoch": 0.33, "learning_rate": 0.00045817766660965543, "loss": 3.5596, "step": 19015 }, { "epoch": 0.33, "learning_rate": 0.0004581077596626526, "loss": 3.6208, "step": 19020 }, { "epoch": 0.33, "learning_rate": 0.0004580378408267565, "loss": 3.5096, "step": 19025 }, { "epoch": 0.33, "learning_rate": 0.00045796791010722487, "loss": 3.4556, "step": 19030 }, { "epoch": 0.33, "learning_rate": 0.00045789796750931574, "loss": 3.4603, "step": 19035 }, { "epoch": 0.33, "learning_rate": 0.00045782801303828873, "loss": 3.4485, "step": 19040 }, { "epoch": 0.33, "learning_rate": 0.0004577580466994041, "loss": 3.4624, "step": 19045 }, { "epoch": 0.33, "learning_rate": 0.00045768806849792276, "loss": 3.4523, "step": 19050 }, { "epoch": 0.33, "learning_rate": 0.00045761807843910685, "loss": 3.5471, "step": 19055 }, { "epoch": 0.33, "learning_rate": 0.0004575480765282192, "loss": 3.3223, "step": 19060 }, { "epoch": 0.33, "learning_rate": 0.00045747806277052363, "loss": 3.4825, "step": 19065 }, { "epoch": 0.33, "learning_rate": 0.00045740803717128484, "loss": 3.3537, "step": 19070 }, { "epoch": 0.33, "learning_rate": 0.0004573379997357682, "loss": 3.3989, "step": 19075 }, { "epoch": 0.33, "learning_rate": 0.0004572679504692405, "loss": 3.5855, "step": 19080 }, { "epoch": 0.33, "learning_rate": 0.00045719788937696883, "loss": 3.5294, "step": 19085 }, { "epoch": 0.33, "learning_rate": 0.0004571278164642215, "loss": 3.4447, "step": 19090 }, { "epoch": 0.33, "learning_rate": 0.0004570577317362677, "loss": 3.6258, "step": 19095 }, { "epoch": 0.33, "learning_rate": 0.00045698763519837725, "loss": 3.4541, "step": 19100 }, { "epoch": 0.33, "learning_rate": 0.00045691752685582133, "loss": 3.5693, "step": 19105 }, { "epoch": 0.33, "learning_rate": 0.0004568474067138716, "loss": 3.4704, "step": 19110 }, { "epoch": 0.33, "learning_rate": 0.00045677727477780056, "loss": 3.5682, "step": 19115 }, { "epoch": 0.33, "learning_rate": 0.000456707131052882, "loss": 3.4419, "step": 19120 }, { "epoch": 0.33, "learning_rate": 0.00045663697554439034, "loss": 3.4335, "step": 19125 }, { "epoch": 0.33, "learning_rate": 0.0004565668082576008, "loss": 3.5477, "step": 19130 }, { "epoch": 0.33, "learning_rate": 0.00045649662919778976, "loss": 3.5055, "step": 19135 }, { "epoch": 0.33, "learning_rate": 0.00045642643837023406, "loss": 3.6236, "step": 19140 }, { "epoch": 0.33, "learning_rate": 0.0004563562357802121, "loss": 3.5487, "step": 19145 }, { "epoch": 0.33, "learning_rate": 0.00045628602143300243, "loss": 3.4841, "step": 19150 }, { "epoch": 0.33, "learning_rate": 0.0004562157953338848, "loss": 3.4994, "step": 19155 }, { "epoch": 0.33, "learning_rate": 0.00045614555748814005, "loss": 3.4607, "step": 19160 }, { "epoch": 0.33, "learning_rate": 0.00045607530790104956, "loss": 3.4797, "step": 19165 }, { "epoch": 0.33, "learning_rate": 0.00045600504657789594, "loss": 3.5536, "step": 19170 }, { "epoch": 0.33, "learning_rate": 0.00045593477352396213, "loss": 3.5152, "step": 19175 }, { "epoch": 0.33, "learning_rate": 0.0004558644887445326, "loss": 3.5431, "step": 19180 }, { "epoch": 0.33, "learning_rate": 0.0004557941922448923, "loss": 3.4514, "step": 19185 }, { "epoch": 0.33, "learning_rate": 0.00045572388403032724, "loss": 3.3985, "step": 19190 }, { "epoch": 0.33, "learning_rate": 0.0004556535641061241, "loss": 3.4395, "step": 19195 }, { "epoch": 0.33, "learning_rate": 0.00045558323247757063, "loss": 3.4796, "step": 19200 }, { "epoch": 0.33, "learning_rate": 0.00045551288914995536, "loss": 3.4244, "step": 19205 }, { "epoch": 0.33, "learning_rate": 0.00045544253412856806, "loss": 3.4544, "step": 19210 }, { "epoch": 0.33, "learning_rate": 0.00045537216741869865, "loss": 3.5251, "step": 19215 }, { "epoch": 0.33, "learning_rate": 0.00045530178902563847, "loss": 3.5224, "step": 19220 }, { "epoch": 0.33, "learning_rate": 0.0004552313989546798, "loss": 3.2957, "step": 19225 }, { "epoch": 0.33, "learning_rate": 0.0004551609972111154, "loss": 3.4405, "step": 19230 }, { "epoch": 0.33, "learning_rate": 0.0004550905838002392, "loss": 3.4844, "step": 19235 }, { "epoch": 0.33, "learning_rate": 0.000455020158727346, "loss": 3.549, "step": 19240 }, { "epoch": 0.33, "learning_rate": 0.0004549497219977312, "loss": 3.5311, "step": 19245 }, { "epoch": 0.33, "learning_rate": 0.0004548792736166915, "loss": 3.4819, "step": 19250 }, { "epoch": 0.33, "learning_rate": 0.00045480881358952416, "loss": 3.492, "step": 19255 }, { "epoch": 0.33, "learning_rate": 0.00045473834192152744, "loss": 3.4925, "step": 19260 }, { "epoch": 0.33, "learning_rate": 0.00045466785861800034, "loss": 3.4899, "step": 19265 }, { "epoch": 0.33, "learning_rate": 0.00045459736368424304, "loss": 3.5294, "step": 19270 }, { "epoch": 0.33, "learning_rate": 0.0004545268571255563, "loss": 3.4631, "step": 19275 }, { "epoch": 0.33, "learning_rate": 0.0004544563389472417, "loss": 3.436, "step": 19280 }, { "epoch": 0.33, "learning_rate": 0.0004543858091546021, "loss": 3.4474, "step": 19285 }, { "epoch": 0.33, "learning_rate": 0.00045431526775294086, "loss": 3.4655, "step": 19290 }, { "epoch": 0.33, "learning_rate": 0.00045424471474756227, "loss": 3.4941, "step": 19295 }, { "epoch": 0.33, "learning_rate": 0.00045417415014377166, "loss": 3.5932, "step": 19300 }, { "epoch": 0.33, "learning_rate": 0.0004541035739468751, "loss": 3.3979, "step": 19305 }, { "epoch": 0.33, "learning_rate": 0.00045403298616217963, "loss": 3.4772, "step": 19310 }, { "epoch": 0.33, "learning_rate": 0.00045396238679499293, "loss": 3.5518, "step": 19315 }, { "epoch": 0.33, "learning_rate": 0.0004538917758506237, "loss": 3.4183, "step": 19320 }, { "epoch": 0.33, "learning_rate": 0.0004538211533343817, "loss": 3.5486, "step": 19325 }, { "epoch": 0.33, "learning_rate": 0.0004537505192515773, "loss": 3.3749, "step": 19330 }, { "epoch": 0.33, "learning_rate": 0.00045367987360752174, "loss": 3.4757, "step": 19335 }, { "epoch": 0.33, "learning_rate": 0.00045360921640752737, "loss": 3.4974, "step": 19340 }, { "epoch": 0.33, "learning_rate": 0.0004535385476569071, "loss": 3.4206, "step": 19345 }, { "epoch": 0.33, "learning_rate": 0.000453467867360975, "loss": 3.5497, "step": 19350 }, { "epoch": 0.33, "learning_rate": 0.00045339717552504564, "loss": 3.4373, "step": 19355 }, { "epoch": 0.34, "learning_rate": 0.0004533264721544349, "loss": 3.4262, "step": 19360 }, { "epoch": 0.34, "learning_rate": 0.0004532557572544593, "loss": 3.5594, "step": 19365 }, { "epoch": 0.34, "learning_rate": 0.0004531850308304361, "loss": 3.4997, "step": 19370 }, { "epoch": 0.34, "learning_rate": 0.00045311429288768374, "loss": 3.464, "step": 19375 }, { "epoch": 0.34, "learning_rate": 0.00045304354343152117, "loss": 3.4962, "step": 19380 }, { "epoch": 0.34, "learning_rate": 0.00045297278246726844, "loss": 3.5346, "step": 19385 }, { "epoch": 0.34, "learning_rate": 0.00045290201000024654, "loss": 3.5029, "step": 19390 }, { "epoch": 0.34, "learning_rate": 0.000452831226035777, "loss": 3.5546, "step": 19395 }, { "epoch": 0.34, "learning_rate": 0.00045276043057918263, "loss": 3.4901, "step": 19400 }, { "epoch": 0.34, "learning_rate": 0.0004526896236357866, "loss": 3.4695, "step": 19405 }, { "epoch": 0.34, "learning_rate": 0.0004526188052109135, "loss": 3.5047, "step": 19410 }, { "epoch": 0.34, "learning_rate": 0.00045254797530988844, "loss": 3.451, "step": 19415 }, { "epoch": 0.34, "learning_rate": 0.0004524771339380374, "loss": 3.436, "step": 19420 }, { "epoch": 0.34, "learning_rate": 0.00045240628110068725, "loss": 3.5381, "step": 19425 }, { "epoch": 0.34, "learning_rate": 0.00045233541680316593, "loss": 3.4288, "step": 19430 }, { "epoch": 0.34, "learning_rate": 0.0004522645410508019, "loss": 3.5495, "step": 19435 }, { "epoch": 0.34, "learning_rate": 0.0004521936538489248, "loss": 3.4837, "step": 19440 }, { "epoch": 0.34, "learning_rate": 0.00045212275520286477, "loss": 3.4782, "step": 19445 }, { "epoch": 0.34, "learning_rate": 0.00045205184511795335, "loss": 3.4985, "step": 19450 }, { "epoch": 0.34, "learning_rate": 0.0004519809235995223, "loss": 3.535, "step": 19455 }, { "epoch": 0.34, "learning_rate": 0.0004519099906529047, "loss": 3.5755, "step": 19460 }, { "epoch": 0.34, "learning_rate": 0.00045183904628343444, "loss": 3.5931, "step": 19465 }, { "epoch": 0.34, "learning_rate": 0.000451768090496446, "loss": 3.5447, "step": 19470 }, { "epoch": 0.34, "learning_rate": 0.0004516971232972749, "loss": 3.435, "step": 19475 }, { "epoch": 0.34, "learning_rate": 0.0004516261446912576, "loss": 3.43, "step": 19480 }, { "epoch": 0.34, "learning_rate": 0.0004515551546837313, "loss": 3.4134, "step": 19485 }, { "epoch": 0.34, "learning_rate": 0.0004514841532800341, "loss": 3.393, "step": 19490 }, { "epoch": 0.34, "learning_rate": 0.00045141314048550486, "loss": 3.4855, "step": 19495 }, { "epoch": 0.34, "learning_rate": 0.0004513421163054834, "loss": 3.4721, "step": 19500 }, { "epoch": 0.34, "eval_loss": 3.5094563961029053, "eval_runtime": 149.8708, "eval_samples_per_second": 12.284, "eval_steps_per_second": 0.774, "step": 19500 }, { "epoch": 0.34, "learning_rate": 0.00045127108074531045, "loss": 3.4603, "step": 19505 }, { "epoch": 0.34, "learning_rate": 0.00045120003381032755, "loss": 3.3118, "step": 19510 }, { "epoch": 0.34, "learning_rate": 0.00045112897550587705, "loss": 3.4607, "step": 19515 }, { "epoch": 0.34, "learning_rate": 0.0004510579058373019, "loss": 3.4662, "step": 19520 }, { "epoch": 0.34, "learning_rate": 0.0004509868248099465, "loss": 3.5243, "step": 19525 }, { "epoch": 0.34, "learning_rate": 0.0004509157324291557, "loss": 3.4312, "step": 19530 }, { "epoch": 0.34, "learning_rate": 0.0004508446287002752, "loss": 3.3722, "step": 19535 }, { "epoch": 0.34, "learning_rate": 0.0004507735136286517, "loss": 3.3688, "step": 19540 }, { "epoch": 0.34, "learning_rate": 0.00045070238721963264, "loss": 3.3627, "step": 19545 }, { "epoch": 0.34, "learning_rate": 0.0004506312494785665, "loss": 3.4587, "step": 19550 }, { "epoch": 0.34, "learning_rate": 0.00045056010041080236, "loss": 3.3304, "step": 19555 }, { "epoch": 0.34, "learning_rate": 0.0004504889400216902, "loss": 3.4772, "step": 19560 }, { "epoch": 0.34, "learning_rate": 0.00045041776831658103, "loss": 3.4572, "step": 19565 }, { "epoch": 0.34, "learning_rate": 0.0004503465853008266, "loss": 3.504, "step": 19570 }, { "epoch": 0.34, "learning_rate": 0.0004502753909797794, "loss": 3.4871, "step": 19575 }, { "epoch": 0.34, "learning_rate": 0.00045020418535879293, "loss": 3.5595, "step": 19580 }, { "epoch": 0.34, "learning_rate": 0.0004501329684432215, "loss": 3.5133, "step": 19585 }, { "epoch": 0.34, "learning_rate": 0.0004500617402384204, "loss": 3.4042, "step": 19590 }, { "epoch": 0.34, "learning_rate": 0.0004499905007497454, "loss": 3.6127, "step": 19595 }, { "epoch": 0.34, "learning_rate": 0.00044991924998255344, "loss": 3.344, "step": 19600 }, { "epoch": 0.34, "learning_rate": 0.0004498479879422022, "loss": 3.4879, "step": 19605 }, { "epoch": 0.34, "learning_rate": 0.0004497767146340502, "loss": 3.5619, "step": 19610 }, { "epoch": 0.34, "learning_rate": 0.00044970543006345686, "loss": 3.4435, "step": 19615 }, { "epoch": 0.34, "learning_rate": 0.00044963413423578245, "loss": 3.5145, "step": 19620 }, { "epoch": 0.34, "learning_rate": 0.00044956282715638796, "loss": 3.5095, "step": 19625 }, { "epoch": 0.34, "learning_rate": 0.00044949150883063545, "loss": 3.5551, "step": 19630 }, { "epoch": 0.34, "learning_rate": 0.00044942017926388755, "loss": 3.5651, "step": 19635 }, { "epoch": 0.34, "learning_rate": 0.00044934883846150793, "loss": 3.3975, "step": 19640 }, { "epoch": 0.34, "learning_rate": 0.0004492774864288611, "loss": 3.5149, "step": 19645 }, { "epoch": 0.34, "learning_rate": 0.00044920612317131224, "loss": 3.4926, "step": 19650 }, { "epoch": 0.34, "learning_rate": 0.0004491347486942277, "loss": 3.3984, "step": 19655 }, { "epoch": 0.34, "learning_rate": 0.00044906336300297423, "loss": 3.3962, "step": 19660 }, { "epoch": 0.34, "learning_rate": 0.0004489919661029198, "loss": 3.4718, "step": 19665 }, { "epoch": 0.34, "learning_rate": 0.00044892055799943323, "loss": 3.4969, "step": 19670 }, { "epoch": 0.34, "learning_rate": 0.00044884913869788377, "loss": 3.3791, "step": 19675 }, { "epoch": 0.34, "learning_rate": 0.0004487777082036421, "loss": 3.4044, "step": 19680 }, { "epoch": 0.34, "learning_rate": 0.00044870626652207903, "loss": 3.4759, "step": 19685 }, { "epoch": 0.34, "learning_rate": 0.0004486348136585669, "loss": 3.4839, "step": 19690 }, { "epoch": 0.34, "learning_rate": 0.00044856334961847867, "loss": 3.5427, "step": 19695 }, { "epoch": 0.34, "learning_rate": 0.00044849187440718774, "loss": 3.5028, "step": 19700 }, { "epoch": 0.34, "learning_rate": 0.0004484203880300689, "loss": 3.5073, "step": 19705 }, { "epoch": 0.34, "learning_rate": 0.00044834889049249755, "loss": 3.4769, "step": 19710 }, { "epoch": 0.34, "learning_rate": 0.0004482773817998499, "loss": 3.5275, "step": 19715 }, { "epoch": 0.34, "learning_rate": 0.0004482058619575031, "loss": 3.5522, "step": 19720 }, { "epoch": 0.34, "learning_rate": 0.00044813433097083493, "loss": 3.5725, "step": 19725 }, { "epoch": 0.34, "learning_rate": 0.0004480627888452243, "loss": 3.3296, "step": 19730 }, { "epoch": 0.34, "learning_rate": 0.0004479912355860508, "loss": 3.575, "step": 19735 }, { "epoch": 0.34, "learning_rate": 0.00044791967119869473, "loss": 3.485, "step": 19740 }, { "epoch": 0.34, "learning_rate": 0.00044784809568853754, "loss": 3.5094, "step": 19745 }, { "epoch": 0.34, "learning_rate": 0.0004477765090609612, "loss": 3.4432, "step": 19750 }, { "epoch": 0.34, "learning_rate": 0.0004477049113213488, "loss": 3.4815, "step": 19755 }, { "epoch": 0.34, "learning_rate": 0.000447633302475084, "loss": 3.5307, "step": 19760 }, { "epoch": 0.34, "learning_rate": 0.0004475616825275514, "loss": 3.479, "step": 19765 }, { "epoch": 0.34, "learning_rate": 0.0004474900514841366, "loss": 3.4396, "step": 19770 }, { "epoch": 0.34, "learning_rate": 0.0004474184093502258, "loss": 3.5377, "step": 19775 }, { "epoch": 0.34, "learning_rate": 0.0004473467561312061, "loss": 3.5034, "step": 19780 }, { "epoch": 0.34, "learning_rate": 0.00044727509183246553, "loss": 3.4338, "step": 19785 }, { "epoch": 0.34, "learning_rate": 0.0004472034164593927, "loss": 3.492, "step": 19790 }, { "epoch": 0.34, "learning_rate": 0.0004471317300173776, "loss": 3.478, "step": 19795 }, { "epoch": 0.34, "learning_rate": 0.0004470600325118102, "loss": 3.5671, "step": 19800 }, { "epoch": 0.34, "learning_rate": 0.00044698832394808206, "loss": 3.3984, "step": 19805 }, { "epoch": 0.34, "learning_rate": 0.0004469166043315853, "loss": 3.5496, "step": 19810 }, { "epoch": 0.34, "learning_rate": 0.0004468448736677129, "loss": 3.4328, "step": 19815 }, { "epoch": 0.34, "learning_rate": 0.0004467731319618585, "loss": 3.4768, "step": 19820 }, { "epoch": 0.34, "learning_rate": 0.0004467013792194168, "loss": 3.4958, "step": 19825 }, { "epoch": 0.34, "learning_rate": 0.00044662961544578313, "loss": 3.4901, "step": 19830 }, { "epoch": 0.34, "learning_rate": 0.00044655784064635406, "loss": 3.4205, "step": 19835 }, { "epoch": 0.34, "learning_rate": 0.00044648605482652625, "loss": 3.5096, "step": 19840 }, { "epoch": 0.34, "learning_rate": 0.0004464142579916979, "loss": 3.5586, "step": 19845 }, { "epoch": 0.34, "learning_rate": 0.00044634245014726773, "loss": 3.5259, "step": 19850 }, { "epoch": 0.34, "learning_rate": 0.00044627063129863535, "loss": 3.396, "step": 19855 }, { "epoch": 0.34, "learning_rate": 0.0004461988014512011, "loss": 3.4005, "step": 19860 }, { "epoch": 0.34, "learning_rate": 0.00044612696061036613, "loss": 3.4798, "step": 19865 }, { "epoch": 0.34, "learning_rate": 0.00044605510878153277, "loss": 3.5662, "step": 19870 }, { "epoch": 0.34, "learning_rate": 0.00044598324597010366, "loss": 3.4116, "step": 19875 }, { "epoch": 0.34, "learning_rate": 0.0004459113721814826, "loss": 3.4119, "step": 19880 }, { "epoch": 0.34, "learning_rate": 0.00044583948742107416, "loss": 3.4519, "step": 19885 }, { "epoch": 0.34, "learning_rate": 0.00044576759169428365, "loss": 3.4095, "step": 19890 }, { "epoch": 0.34, "learning_rate": 0.0004456956850065173, "loss": 3.5294, "step": 19895 }, { "epoch": 0.34, "learning_rate": 0.0004456237673631822, "loss": 3.3993, "step": 19900 }, { "epoch": 0.34, "learning_rate": 0.00044555183876968595, "loss": 3.4356, "step": 19905 }, { "epoch": 0.34, "learning_rate": 0.0004454798992314375, "loss": 3.4959, "step": 19910 }, { "epoch": 0.34, "learning_rate": 0.0004454079487538462, "loss": 3.5206, "step": 19915 }, { "epoch": 0.34, "learning_rate": 0.00044533598734232226, "loss": 3.3217, "step": 19920 }, { "epoch": 0.34, "learning_rate": 0.000445264015002277, "loss": 3.486, "step": 19925 }, { "epoch": 0.34, "learning_rate": 0.00044519203173912214, "loss": 3.5203, "step": 19930 }, { "epoch": 0.34, "learning_rate": 0.00044512003755827075, "loss": 3.4889, "step": 19935 }, { "epoch": 0.35, "learning_rate": 0.0004450480324651362, "loss": 3.4719, "step": 19940 }, { "epoch": 0.35, "learning_rate": 0.00044497601646513294, "loss": 3.4491, "step": 19945 }, { "epoch": 0.35, "learning_rate": 0.00044490398956367635, "loss": 3.4391, "step": 19950 }, { "epoch": 0.35, "learning_rate": 0.0004448319517661823, "loss": 3.3457, "step": 19955 }, { "epoch": 0.35, "learning_rate": 0.00044475990307806765, "loss": 3.3828, "step": 19960 }, { "epoch": 0.35, "learning_rate": 0.00044468784350475024, "loss": 3.4094, "step": 19965 }, { "epoch": 0.35, "learning_rate": 0.0004446157730516485, "loss": 3.3578, "step": 19970 }, { "epoch": 0.35, "learning_rate": 0.00044454369172418183, "loss": 3.4901, "step": 19975 }, { "epoch": 0.35, "learning_rate": 0.0004444715995277702, "loss": 3.4578, "step": 19980 }, { "epoch": 0.35, "learning_rate": 0.0004443994964678347, "loss": 3.5705, "step": 19985 }, { "epoch": 0.35, "learning_rate": 0.0004443273825497972, "loss": 3.356, "step": 19990 }, { "epoch": 0.35, "learning_rate": 0.00044425525777908015, "loss": 3.5245, "step": 19995 }, { "epoch": 0.35, "learning_rate": 0.00044418312216110703, "loss": 3.506, "step": 20000 }, { "epoch": 0.35, "eval_loss": 3.5002212524414062, "eval_runtime": 149.672, "eval_samples_per_second": 12.3, "eval_steps_per_second": 0.775, "step": 20000 }, { "epoch": 0.35, "learning_rate": 0.00044411097570130204, "loss": 3.4232, "step": 20005 }, { "epoch": 0.35, "learning_rate": 0.0004440388184050902, "loss": 3.4982, "step": 20010 }, { "epoch": 0.35, "learning_rate": 0.00044396665027789746, "loss": 3.4513, "step": 20015 }, { "epoch": 0.35, "learning_rate": 0.00044389447132515037, "loss": 3.3997, "step": 20020 }, { "epoch": 0.35, "learning_rate": 0.0004438222815522765, "loss": 3.5083, "step": 20025 }, { "epoch": 0.35, "learning_rate": 0.0004437500809647042, "loss": 3.4299, "step": 20030 }, { "epoch": 0.35, "learning_rate": 0.0004436778695678625, "loss": 3.4987, "step": 20035 }, { "epoch": 0.35, "learning_rate": 0.0004436056473671813, "loss": 3.5377, "step": 20040 }, { "epoch": 0.35, "learning_rate": 0.0004435334143680914, "loss": 3.5088, "step": 20045 }, { "epoch": 0.35, "learning_rate": 0.00044346117057602425, "loss": 3.4873, "step": 20050 }, { "epoch": 0.35, "learning_rate": 0.0004433889159964125, "loss": 3.474, "step": 20055 }, { "epoch": 0.35, "learning_rate": 0.00044331665063468894, "loss": 3.5821, "step": 20060 }, { "epoch": 0.35, "learning_rate": 0.00044324437449628785, "loss": 3.5429, "step": 20065 }, { "epoch": 0.35, "learning_rate": 0.00044317208758664386, "loss": 3.2625, "step": 20070 }, { "epoch": 0.35, "learning_rate": 0.00044309978991119263, "loss": 3.3802, "step": 20075 }, { "epoch": 0.35, "learning_rate": 0.0004430274814753706, "loss": 3.5245, "step": 20080 }, { "epoch": 0.35, "learning_rate": 0.000442955162284615, "loss": 3.5747, "step": 20085 }, { "epoch": 0.35, "learning_rate": 0.0004428828323443638, "loss": 3.4744, "step": 20090 }, { "epoch": 0.35, "learning_rate": 0.00044281049166005585, "loss": 3.5422, "step": 20095 }, { "epoch": 0.35, "learning_rate": 0.0004427381402371308, "loss": 3.3606, "step": 20100 }, { "epoch": 0.35, "learning_rate": 0.00044266577808102915, "loss": 3.4586, "step": 20105 }, { "epoch": 0.35, "learning_rate": 0.0004425934051971921, "loss": 3.4529, "step": 20110 }, { "epoch": 0.35, "learning_rate": 0.00044252102159106187, "loss": 3.4736, "step": 20115 }, { "epoch": 0.35, "learning_rate": 0.00044244862726808114, "loss": 3.415, "step": 20120 }, { "epoch": 0.35, "learning_rate": 0.0004423762222336936, "loss": 3.4178, "step": 20125 }, { "epoch": 0.35, "learning_rate": 0.00044230380649334396, "loss": 3.4815, "step": 20130 }, { "epoch": 0.35, "learning_rate": 0.0004422313800524773, "loss": 3.4722, "step": 20135 }, { "epoch": 0.35, "learning_rate": 0.0004421589429165399, "loss": 3.5723, "step": 20140 }, { "epoch": 0.35, "learning_rate": 0.00044208649509097837, "loss": 3.4964, "step": 20145 }, { "epoch": 0.35, "learning_rate": 0.00044201403658124055, "loss": 3.3457, "step": 20150 }, { "epoch": 0.35, "learning_rate": 0.00044194156739277524, "loss": 3.5067, "step": 20155 }, { "epoch": 0.35, "learning_rate": 0.00044186908753103137, "loss": 3.4389, "step": 20160 }, { "epoch": 0.35, "learning_rate": 0.0004417965970014591, "loss": 3.3161, "step": 20165 }, { "epoch": 0.35, "learning_rate": 0.00044172409580950947, "loss": 3.5093, "step": 20170 }, { "epoch": 0.35, "learning_rate": 0.0004416515839606343, "loss": 3.3348, "step": 20175 }, { "epoch": 0.35, "learning_rate": 0.00044157906146028586, "loss": 3.4568, "step": 20180 }, { "epoch": 0.35, "learning_rate": 0.00044150652831391755, "loss": 3.4137, "step": 20185 }, { "epoch": 0.35, "learning_rate": 0.0004414339845269835, "loss": 3.3819, "step": 20190 }, { "epoch": 0.35, "learning_rate": 0.00044136143010493884, "loss": 3.5321, "step": 20195 }, { "epoch": 0.35, "learning_rate": 0.00044128886505323897, "loss": 3.458, "step": 20200 }, { "epoch": 0.35, "learning_rate": 0.00044121628937734053, "loss": 3.4455, "step": 20205 }, { "epoch": 0.35, "learning_rate": 0.0004411437030827008, "loss": 3.4663, "step": 20210 }, { "epoch": 0.35, "learning_rate": 0.0004410711061747781, "loss": 3.5031, "step": 20215 }, { "epoch": 0.35, "learning_rate": 0.00044099849865903107, "loss": 3.526, "step": 20220 }, { "epoch": 0.35, "learning_rate": 0.0004409258805409196, "loss": 3.4007, "step": 20225 }, { "epoch": 0.35, "learning_rate": 0.0004408532518259041, "loss": 3.4619, "step": 20230 }, { "epoch": 0.35, "learning_rate": 0.000440780612519446, "loss": 3.4583, "step": 20235 }, { "epoch": 0.35, "learning_rate": 0.0004407079626270072, "loss": 3.4842, "step": 20240 }, { "epoch": 0.35, "learning_rate": 0.0004406353021540509, "loss": 3.5053, "step": 20245 }, { "epoch": 0.35, "learning_rate": 0.00044056263110604045, "loss": 3.5203, "step": 20250 }, { "epoch": 0.35, "learning_rate": 0.00044048994948844063, "loss": 3.5216, "step": 20255 }, { "epoch": 0.35, "learning_rate": 0.0004404172573067166, "loss": 3.4012, "step": 20260 }, { "epoch": 0.35, "learning_rate": 0.0004403445545663344, "loss": 3.4549, "step": 20265 }, { "epoch": 0.35, "learning_rate": 0.000440271841272761, "loss": 3.3967, "step": 20270 }, { "epoch": 0.35, "learning_rate": 0.00044019911743146397, "loss": 3.4056, "step": 20275 }, { "epoch": 0.35, "learning_rate": 0.0004401263830479118, "loss": 3.5852, "step": 20280 }, { "epoch": 0.35, "learning_rate": 0.00044005363812757377, "loss": 3.5015, "step": 20285 }, { "epoch": 0.35, "learning_rate": 0.00043998088267591996, "loss": 3.5004, "step": 20290 }, { "epoch": 0.35, "learning_rate": 0.00043990811669842114, "loss": 3.4759, "step": 20295 }, { "epoch": 0.35, "learning_rate": 0.00043983534020054907, "loss": 3.4747, "step": 20300 }, { "epoch": 0.35, "learning_rate": 0.00043976255318777587, "loss": 3.4997, "step": 20305 }, { "epoch": 0.35, "learning_rate": 0.0004396897556655751, "loss": 3.4021, "step": 20310 }, { "epoch": 0.35, "learning_rate": 0.00043961694763942053, "loss": 3.5991, "step": 20315 }, { "epoch": 0.35, "learning_rate": 0.00043954412911478705, "loss": 3.4963, "step": 20320 }, { "epoch": 0.35, "learning_rate": 0.0004394713000971501, "loss": 3.3867, "step": 20325 }, { "epoch": 0.35, "learning_rate": 0.0004393984605919863, "loss": 3.493, "step": 20330 }, { "epoch": 0.35, "learning_rate": 0.0004393256106047726, "loss": 3.4838, "step": 20335 }, { "epoch": 0.35, "learning_rate": 0.0004392527501409871, "loss": 3.4146, "step": 20340 }, { "epoch": 0.35, "learning_rate": 0.0004391798792061084, "loss": 3.4319, "step": 20345 }, { "epoch": 0.35, "learning_rate": 0.000439106997805616, "loss": 3.3523, "step": 20350 }, { "epoch": 0.35, "learning_rate": 0.0004390341059449903, "loss": 3.4308, "step": 20355 }, { "epoch": 0.35, "learning_rate": 0.00043896120362971246, "loss": 3.3949, "step": 20360 }, { "epoch": 0.35, "learning_rate": 0.0004388882908652642, "loss": 3.5024, "step": 20365 }, { "epoch": 0.35, "learning_rate": 0.0004388153676571283, "loss": 3.4641, "step": 20370 }, { "epoch": 0.35, "learning_rate": 0.00043874243401078813, "loss": 3.4261, "step": 20375 }, { "epoch": 0.35, "learning_rate": 0.00043866948993172793, "loss": 3.4187, "step": 20380 }, { "epoch": 0.35, "learning_rate": 0.00043859653542543286, "loss": 3.4059, "step": 20385 }, { "epoch": 0.35, "learning_rate": 0.00043852357049738855, "loss": 3.381, "step": 20390 }, { "epoch": 0.35, "learning_rate": 0.0004384505951530817, "loss": 3.5046, "step": 20395 }, { "epoch": 0.35, "learning_rate": 0.0004383776093979996, "loss": 3.4859, "step": 20400 }, { "epoch": 0.35, "learning_rate": 0.00043830461323763047, "loss": 3.5565, "step": 20405 }, { "epoch": 0.35, "learning_rate": 0.0004382316066774633, "loss": 3.4298, "step": 20410 }, { "epoch": 0.35, "learning_rate": 0.00043815858972298763, "loss": 3.4938, "step": 20415 }, { "epoch": 0.35, "learning_rate": 0.00043808556237969403, "loss": 3.5072, "step": 20420 }, { "epoch": 0.35, "learning_rate": 0.00043801252465307396, "loss": 3.5391, "step": 20425 }, { "epoch": 0.35, "learning_rate": 0.0004379394765486192, "loss": 3.4818, "step": 20430 }, { "epoch": 0.35, "learning_rate": 0.0004378664180718228, "loss": 3.4382, "step": 20435 }, { "epoch": 0.35, "learning_rate": 0.0004377933492281783, "loss": 3.5275, "step": 20440 }, { "epoch": 0.35, "learning_rate": 0.0004377202700231801, "loss": 3.4213, "step": 20445 }, { "epoch": 0.35, "learning_rate": 0.00043764718046232346, "loss": 3.359, "step": 20450 }, { "epoch": 0.35, "learning_rate": 0.0004375740805511043, "loss": 3.5426, "step": 20455 }, { "epoch": 0.35, "learning_rate": 0.00043750097029501925, "loss": 3.4999, "step": 20460 }, { "epoch": 0.35, "learning_rate": 0.0004374278496995659, "loss": 3.4504, "step": 20465 }, { "epoch": 0.35, "learning_rate": 0.0004373547187702426, "loss": 3.4479, "step": 20470 }, { "epoch": 0.35, "learning_rate": 0.00043728157751254834, "loss": 3.4309, "step": 20475 }, { "epoch": 0.35, "learning_rate": 0.0004372084259319831, "loss": 3.468, "step": 20480 }, { "epoch": 0.35, "learning_rate": 0.00043713526403404746, "loss": 3.4853, "step": 20485 }, { "epoch": 0.35, "learning_rate": 0.0004370620918242426, "loss": 3.4808, "step": 20490 }, { "epoch": 0.35, "learning_rate": 0.000436988909308071, "loss": 3.4663, "step": 20495 }, { "epoch": 0.35, "learning_rate": 0.0004369157164910354, "loss": 3.3601, "step": 20500 }, { "epoch": 0.35, "eval_loss": 3.4890780448913574, "eval_runtime": 149.7679, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.775, "step": 20500 }, { "epoch": 0.35, "learning_rate": 0.00043684251337863957, "loss": 3.503, "step": 20505 }, { "epoch": 0.35, "learning_rate": 0.00043676929997638807, "loss": 3.4588, "step": 20510 }, { "epoch": 0.36, "learning_rate": 0.00043669607628978616, "loss": 3.5404, "step": 20515 }, { "epoch": 0.36, "learning_rate": 0.00043662284232433995, "loss": 3.4366, "step": 20520 }, { "epoch": 0.36, "learning_rate": 0.0004365495980855561, "loss": 3.3995, "step": 20525 }, { "epoch": 0.36, "learning_rate": 0.00043647634357894225, "loss": 3.3957, "step": 20530 }, { "epoch": 0.36, "learning_rate": 0.0004364030788100069, "loss": 3.5176, "step": 20535 }, { "epoch": 0.36, "learning_rate": 0.00043632980378425894, "loss": 3.5703, "step": 20540 }, { "epoch": 0.36, "learning_rate": 0.00043625651850720846, "loss": 3.4949, "step": 20545 }, { "epoch": 0.36, "learning_rate": 0.00043618322298436614, "loss": 3.4727, "step": 20550 }, { "epoch": 0.36, "learning_rate": 0.0004361099172212433, "loss": 3.4264, "step": 20555 }, { "epoch": 0.36, "learning_rate": 0.0004360366012233523, "loss": 3.4806, "step": 20560 }, { "epoch": 0.36, "learning_rate": 0.00043596327499620613, "loss": 3.4705, "step": 20565 }, { "epoch": 0.36, "learning_rate": 0.00043588993854531835, "loss": 3.4814, "step": 20570 }, { "epoch": 0.36, "learning_rate": 0.0004358165918762037, "loss": 3.4198, "step": 20575 }, { "epoch": 0.36, "learning_rate": 0.00043574323499437736, "loss": 3.3648, "step": 20580 }, { "epoch": 0.36, "learning_rate": 0.0004356698679053554, "loss": 3.5215, "step": 20585 }, { "epoch": 0.36, "learning_rate": 0.0004355964906146547, "loss": 3.4305, "step": 20590 }, { "epoch": 0.36, "learning_rate": 0.0004355231031277928, "loss": 3.3691, "step": 20595 }, { "epoch": 0.36, "learning_rate": 0.0004354497054502882, "loss": 3.4175, "step": 20600 }, { "epoch": 0.36, "learning_rate": 0.00043537629758765977, "loss": 3.4657, "step": 20605 }, { "epoch": 0.36, "learning_rate": 0.0004353028795454275, "loss": 3.5085, "step": 20610 }, { "epoch": 0.36, "learning_rate": 0.00043522945132911235, "loss": 3.4169, "step": 20615 }, { "epoch": 0.36, "learning_rate": 0.0004351560129442354, "loss": 3.4685, "step": 20620 }, { "epoch": 0.36, "learning_rate": 0.00043508256439631884, "loss": 3.5605, "step": 20625 }, { "epoch": 0.36, "learning_rate": 0.00043500910569088577, "loss": 3.4576, "step": 20630 }, { "epoch": 0.36, "learning_rate": 0.0004349356368334599, "loss": 3.5073, "step": 20635 }, { "epoch": 0.36, "learning_rate": 0.0004348621578295657, "loss": 3.456, "step": 20640 }, { "epoch": 0.36, "learning_rate": 0.00043478866868472833, "loss": 3.3731, "step": 20645 }, { "epoch": 0.36, "learning_rate": 0.0004347151694044738, "loss": 3.4255, "step": 20650 }, { "epoch": 0.36, "learning_rate": 0.00043464165999432903, "loss": 3.4228, "step": 20655 }, { "epoch": 0.36, "learning_rate": 0.0004345681404598215, "loss": 3.4786, "step": 20660 }, { "epoch": 0.36, "learning_rate": 0.0004344946108064794, "loss": 3.4763, "step": 20665 }, { "epoch": 0.36, "learning_rate": 0.0004344210710398318, "loss": 3.4635, "step": 20670 }, { "epoch": 0.36, "learning_rate": 0.00043434752116540854, "loss": 3.4831, "step": 20675 }, { "epoch": 0.36, "learning_rate": 0.0004342739611887403, "loss": 3.4503, "step": 20680 }, { "epoch": 0.36, "learning_rate": 0.0004342003911153583, "loss": 3.4472, "step": 20685 }, { "epoch": 0.36, "learning_rate": 0.00043412681095079465, "loss": 3.346, "step": 20690 }, { "epoch": 0.36, "learning_rate": 0.00043405322070058216, "loss": 3.3185, "step": 20695 }, { "epoch": 0.36, "learning_rate": 0.0004339796203702545, "loss": 3.5082, "step": 20700 }, { "epoch": 0.36, "learning_rate": 0.00043390600996534606, "loss": 3.5576, "step": 20705 }, { "epoch": 0.36, "learning_rate": 0.0004338323894913919, "loss": 3.4343, "step": 20710 }, { "epoch": 0.36, "learning_rate": 0.00043375875895392793, "loss": 3.5085, "step": 20715 }, { "epoch": 0.36, "learning_rate": 0.0004336851183584908, "loss": 3.3682, "step": 20720 }, { "epoch": 0.36, "learning_rate": 0.0004336114677106179, "loss": 3.419, "step": 20725 }, { "epoch": 0.36, "learning_rate": 0.0004335378070158474, "loss": 3.5034, "step": 20730 }, { "epoch": 0.36, "learning_rate": 0.00043346413627971803, "loss": 3.4361, "step": 20735 }, { "epoch": 0.36, "learning_rate": 0.00043339045550776976, "loss": 3.402, "step": 20740 }, { "epoch": 0.36, "learning_rate": 0.00043331676470554286, "loss": 3.4727, "step": 20745 }, { "epoch": 0.36, "learning_rate": 0.0004332430638785784, "loss": 3.5097, "step": 20750 }, { "epoch": 0.36, "learning_rate": 0.0004331693530324184, "loss": 3.4251, "step": 20755 }, { "epoch": 0.36, "learning_rate": 0.00043309563217260564, "loss": 3.4258, "step": 20760 }, { "epoch": 0.36, "learning_rate": 0.0004330219013046834, "loss": 3.3033, "step": 20765 }, { "epoch": 0.36, "learning_rate": 0.0004329481604341959, "loss": 3.4339, "step": 20770 }, { "epoch": 0.36, "learning_rate": 0.000432874409566688, "loss": 3.4677, "step": 20775 }, { "epoch": 0.36, "learning_rate": 0.0004328006487077056, "loss": 3.3985, "step": 20780 }, { "epoch": 0.36, "learning_rate": 0.00043272687786279497, "loss": 3.4579, "step": 20785 }, { "epoch": 0.36, "learning_rate": 0.00043265309703750327, "loss": 3.3846, "step": 20790 }, { "epoch": 0.36, "learning_rate": 0.0004325793062373786, "loss": 3.5574, "step": 20795 }, { "epoch": 0.36, "learning_rate": 0.00043250550546796953, "loss": 3.4578, "step": 20800 }, { "epoch": 0.36, "learning_rate": 0.0004324316947348255, "loss": 3.4623, "step": 20805 }, { "epoch": 0.36, "learning_rate": 0.0004323578740434967, "loss": 3.5179, "step": 20810 }, { "epoch": 0.36, "learning_rate": 0.0004322840433995342, "loss": 3.459, "step": 20815 }, { "epoch": 0.36, "learning_rate": 0.0004322102028084896, "loss": 3.478, "step": 20820 }, { "epoch": 0.36, "learning_rate": 0.00043213635227591516, "loss": 3.3421, "step": 20825 }, { "epoch": 0.36, "learning_rate": 0.0004320624918073643, "loss": 3.3589, "step": 20830 }, { "epoch": 0.36, "learning_rate": 0.0004319886214083908, "loss": 3.4584, "step": 20835 }, { "epoch": 0.36, "learning_rate": 0.0004319147410845495, "loss": 3.3751, "step": 20840 }, { "epoch": 0.36, "learning_rate": 0.0004318408508413957, "loss": 3.6159, "step": 20845 }, { "epoch": 0.36, "learning_rate": 0.00043176695068448555, "loss": 3.3952, "step": 20850 }, { "epoch": 0.36, "learning_rate": 0.000431693040619376, "loss": 3.5016, "step": 20855 }, { "epoch": 0.36, "learning_rate": 0.0004316191206516248, "loss": 3.412, "step": 20860 }, { "epoch": 0.36, "learning_rate": 0.0004315451907867902, "loss": 3.5053, "step": 20865 }, { "epoch": 0.36, "learning_rate": 0.00043147125103043147, "loss": 3.4214, "step": 20870 }, { "epoch": 0.36, "learning_rate": 0.00043139730138810835, "loss": 3.4585, "step": 20875 }, { "epoch": 0.36, "learning_rate": 0.0004313233418653816, "loss": 3.3709, "step": 20880 }, { "epoch": 0.36, "learning_rate": 0.00043124937246781254, "loss": 3.419, "step": 20885 }, { "epoch": 0.36, "learning_rate": 0.00043117539320096333, "loss": 3.4385, "step": 20890 }, { "epoch": 0.36, "learning_rate": 0.00043110140407039686, "loss": 3.4396, "step": 20895 }, { "epoch": 0.36, "learning_rate": 0.0004310274050816767, "loss": 3.3851, "step": 20900 }, { "epoch": 0.36, "learning_rate": 0.00043095339624036705, "loss": 3.4022, "step": 20905 }, { "epoch": 0.36, "learning_rate": 0.0004308793775520333, "loss": 3.4697, "step": 20910 }, { "epoch": 0.36, "learning_rate": 0.00043080534902224094, "loss": 3.3228, "step": 20915 }, { "epoch": 0.36, "learning_rate": 0.00043073131065655685, "loss": 3.4832, "step": 20920 }, { "epoch": 0.36, "learning_rate": 0.0004306572624605481, "loss": 3.4429, "step": 20925 }, { "epoch": 0.36, "learning_rate": 0.0004305832044397828, "loss": 3.3656, "step": 20930 }, { "epoch": 0.36, "learning_rate": 0.00043050913659982984, "loss": 3.4722, "step": 20935 }, { "epoch": 0.36, "learning_rate": 0.00043043505894625864, "loss": 3.4771, "step": 20940 }, { "epoch": 0.36, "learning_rate": 0.0004303609714846394, "loss": 3.4003, "step": 20945 }, { "epoch": 0.36, "learning_rate": 0.00043028687422054317, "loss": 3.5443, "step": 20950 }, { "epoch": 0.36, "learning_rate": 0.0004302127671595418, "loss": 3.4675, "step": 20955 }, { "epoch": 0.36, "learning_rate": 0.0004301386503072077, "loss": 3.4031, "step": 20960 }, { "epoch": 0.36, "learning_rate": 0.000430064523669114, "loss": 3.3901, "step": 20965 }, { "epoch": 0.36, "learning_rate": 0.0004299903872508346, "loss": 3.4691, "step": 20970 }, { "epoch": 0.36, "learning_rate": 0.00042991624105794435, "loss": 3.4782, "step": 20975 }, { "epoch": 0.36, "learning_rate": 0.0004298420850960186, "loss": 3.481, "step": 20980 }, { "epoch": 0.36, "learning_rate": 0.0004297679193706335, "loss": 3.4205, "step": 20985 }, { "epoch": 0.36, "learning_rate": 0.00042969374388736586, "loss": 3.4272, "step": 20990 }, { "epoch": 0.36, "learning_rate": 0.0004296195586517933, "loss": 3.4391, "step": 20995 }, { "epoch": 0.36, "learning_rate": 0.00042954536366949435, "loss": 3.5532, "step": 21000 }, { "epoch": 0.36, "eval_loss": 3.4810240268707275, "eval_runtime": 149.6804, "eval_samples_per_second": 12.3, "eval_steps_per_second": 0.775, "step": 21000 }, { "epoch": 0.36, "learning_rate": 0.0004294711589460479, "loss": 3.5336, "step": 21005 }, { "epoch": 0.36, "learning_rate": 0.00042939694448703385, "loss": 3.5589, "step": 21010 }, { "epoch": 0.36, "learning_rate": 0.0004293227202980327, "loss": 3.3975, "step": 21015 }, { "epoch": 0.36, "learning_rate": 0.0004292484863846257, "loss": 3.4718, "step": 21020 }, { "epoch": 0.36, "learning_rate": 0.000429174242752395, "loss": 3.4397, "step": 21025 }, { "epoch": 0.36, "learning_rate": 0.0004290999894069232, "loss": 3.5044, "step": 21030 }, { "epoch": 0.36, "learning_rate": 0.0004290257263537938, "loss": 3.4743, "step": 21035 }, { "epoch": 0.36, "learning_rate": 0.0004289514535985911, "loss": 3.4612, "step": 21040 }, { "epoch": 0.36, "learning_rate": 0.0004288771711468999, "loss": 3.5065, "step": 21045 }, { "epoch": 0.36, "learning_rate": 0.0004288028790043059, "loss": 3.3358, "step": 21050 }, { "epoch": 0.36, "learning_rate": 0.00042872857717639556, "loss": 3.4931, "step": 21055 }, { "epoch": 0.36, "learning_rate": 0.0004286542656687559, "loss": 3.4695, "step": 21060 }, { "epoch": 0.36, "learning_rate": 0.00042857994448697485, "loss": 3.3413, "step": 21065 }, { "epoch": 0.36, "learning_rate": 0.0004285056136366409, "loss": 3.497, "step": 21070 }, { "epoch": 0.36, "learning_rate": 0.00042843127312334345, "loss": 3.4309, "step": 21075 }, { "epoch": 0.36, "learning_rate": 0.00042835692295267237, "loss": 3.5423, "step": 21080 }, { "epoch": 0.36, "learning_rate": 0.0004282825631302185, "loss": 3.4508, "step": 21085 }, { "epoch": 0.36, "learning_rate": 0.0004282081936615734, "loss": 3.4325, "step": 21090 }, { "epoch": 0.37, "learning_rate": 0.0004281338145523291, "loss": 3.4188, "step": 21095 }, { "epoch": 0.37, "learning_rate": 0.00042805942580807863, "loss": 3.3961, "step": 21100 }, { "epoch": 0.37, "learning_rate": 0.00042798502743441577, "loss": 3.5294, "step": 21105 }, { "epoch": 0.37, "learning_rate": 0.0004279106194369346, "loss": 3.5052, "step": 21110 }, { "epoch": 0.37, "learning_rate": 0.0004278362018212304, "loss": 3.4474, "step": 21115 }, { "epoch": 0.37, "learning_rate": 0.0004277617745928991, "loss": 3.4268, "step": 21120 }, { "epoch": 0.37, "learning_rate": 0.00042768733775753705, "loss": 3.4271, "step": 21125 }, { "epoch": 0.37, "learning_rate": 0.0004276128913207415, "loss": 3.319, "step": 21130 }, { "epoch": 0.37, "learning_rate": 0.00042753843528811055, "loss": 3.4287, "step": 21135 }, { "epoch": 0.37, "learning_rate": 0.000427463969665243, "loss": 3.502, "step": 21140 }, { "epoch": 0.37, "learning_rate": 0.0004273894944577381, "loss": 3.3916, "step": 21145 }, { "epoch": 0.37, "learning_rate": 0.00042731500967119613, "loss": 3.4751, "step": 21150 }, { "epoch": 0.37, "learning_rate": 0.0004272405153112179, "loss": 3.3869, "step": 21155 }, { "epoch": 0.37, "learning_rate": 0.000427166011383405, "loss": 3.3328, "step": 21160 }, { "epoch": 0.37, "learning_rate": 0.0004270914978933598, "loss": 3.5021, "step": 21165 }, { "epoch": 0.37, "learning_rate": 0.00042701697484668515, "loss": 3.5497, "step": 21170 }, { "epoch": 0.37, "learning_rate": 0.0004269424422489851, "loss": 3.4791, "step": 21175 }, { "epoch": 0.37, "learning_rate": 0.00042686790010586386, "loss": 3.4404, "step": 21180 }, { "epoch": 0.37, "learning_rate": 0.0004267933484229267, "loss": 3.3707, "step": 21185 }, { "epoch": 0.37, "learning_rate": 0.0004267187872057797, "loss": 3.4202, "step": 21190 }, { "epoch": 0.37, "learning_rate": 0.00042664421646002916, "loss": 3.4687, "step": 21195 }, { "epoch": 0.37, "learning_rate": 0.00042656963619128267, "loss": 3.4317, "step": 21200 }, { "epoch": 0.37, "learning_rate": 0.00042649504640514816, "loss": 3.3562, "step": 21205 }, { "epoch": 0.37, "learning_rate": 0.0004264204471072345, "loss": 3.5205, "step": 21210 }, { "epoch": 0.37, "learning_rate": 0.00042634583830315103, "loss": 3.3985, "step": 21215 }, { "epoch": 0.37, "learning_rate": 0.00042627121999850806, "loss": 3.4669, "step": 21220 }, { "epoch": 0.37, "learning_rate": 0.0004261965921989165, "loss": 3.3633, "step": 21225 }, { "epoch": 0.37, "learning_rate": 0.00042612195490998795, "loss": 3.4944, "step": 21230 }, { "epoch": 0.37, "learning_rate": 0.0004260473081373347, "loss": 3.4313, "step": 21235 }, { "epoch": 0.37, "learning_rate": 0.00042597265188656995, "loss": 3.3019, "step": 21240 }, { "epoch": 0.37, "learning_rate": 0.0004258979861633073, "loss": 3.5405, "step": 21245 }, { "epoch": 0.37, "learning_rate": 0.0004258233109731613, "loss": 3.4114, "step": 21250 }, { "epoch": 0.37, "learning_rate": 0.0004257486263217473, "loss": 3.5809, "step": 21255 }, { "epoch": 0.37, "learning_rate": 0.00042567393221468094, "loss": 3.4726, "step": 21260 }, { "epoch": 0.37, "learning_rate": 0.00042559922865757896, "loss": 3.4404, "step": 21265 }, { "epoch": 0.37, "learning_rate": 0.0004255245156560586, "loss": 3.4897, "step": 21270 }, { "epoch": 0.37, "learning_rate": 0.00042544979321573794, "loss": 3.4024, "step": 21275 }, { "epoch": 0.37, "learning_rate": 0.00042537506134223595, "loss": 3.4146, "step": 21280 }, { "epoch": 0.37, "learning_rate": 0.0004253003200411717, "loss": 3.2768, "step": 21285 }, { "epoch": 0.37, "learning_rate": 0.0004252255693181657, "loss": 3.3862, "step": 21290 }, { "epoch": 0.37, "learning_rate": 0.0004251508091788385, "loss": 3.4472, "step": 21295 }, { "epoch": 0.37, "learning_rate": 0.00042507603962881193, "loss": 3.3695, "step": 21300 }, { "epoch": 0.37, "learning_rate": 0.00042500126067370814, "loss": 3.5154, "step": 21305 }, { "epoch": 0.37, "learning_rate": 0.00042492647231915016, "loss": 3.466, "step": 21310 }, { "epoch": 0.37, "learning_rate": 0.0004248516745707618, "loss": 3.5312, "step": 21315 }, { "epoch": 0.37, "learning_rate": 0.0004247768674341672, "loss": 3.4309, "step": 21320 }, { "epoch": 0.37, "learning_rate": 0.0004247020509149918, "loss": 3.3506, "step": 21325 }, { "epoch": 0.37, "learning_rate": 0.00042462722501886114, "loss": 3.3954, "step": 21330 }, { "epoch": 0.37, "learning_rate": 0.00042455238975140186, "loss": 3.4317, "step": 21335 }, { "epoch": 0.37, "learning_rate": 0.0004244775451182413, "loss": 3.4081, "step": 21340 }, { "epoch": 0.37, "learning_rate": 0.0004244026911250072, "loss": 3.4154, "step": 21345 }, { "epoch": 0.37, "learning_rate": 0.0004243278277773283, "loss": 3.378, "step": 21350 }, { "epoch": 0.37, "learning_rate": 0.000424252955080834, "loss": 3.4763, "step": 21355 }, { "epoch": 0.37, "learning_rate": 0.0004241780730411541, "loss": 3.3921, "step": 21360 }, { "epoch": 0.37, "learning_rate": 0.0004241031816639197, "loss": 3.4287, "step": 21365 }, { "epoch": 0.37, "learning_rate": 0.0004240282809547619, "loss": 3.4556, "step": 21370 }, { "epoch": 0.37, "learning_rate": 0.00042395337091931314, "loss": 3.5046, "step": 21375 }, { "epoch": 0.37, "learning_rate": 0.0004238784515632061, "loss": 3.3785, "step": 21380 }, { "epoch": 0.37, "learning_rate": 0.0004238035228920743, "loss": 3.4726, "step": 21385 }, { "epoch": 0.37, "learning_rate": 0.00042372858491155203, "loss": 3.5175, "step": 21390 }, { "epoch": 0.37, "learning_rate": 0.0004236536376272744, "loss": 3.4565, "step": 21395 }, { "epoch": 0.37, "learning_rate": 0.00042357868104487683, "loss": 3.4254, "step": 21400 }, { "epoch": 0.37, "learning_rate": 0.00042350371516999586, "loss": 3.4177, "step": 21405 }, { "epoch": 0.37, "learning_rate": 0.0004234287400082684, "loss": 3.4484, "step": 21410 }, { "epoch": 0.37, "learning_rate": 0.0004233537555653322, "loss": 3.4054, "step": 21415 }, { "epoch": 0.37, "learning_rate": 0.00042327876184682594, "loss": 3.3857, "step": 21420 }, { "epoch": 0.37, "learning_rate": 0.0004232037588583885, "loss": 3.451, "step": 21425 }, { "epoch": 0.37, "learning_rate": 0.0004231287466056597, "loss": 3.4559, "step": 21430 }, { "epoch": 0.37, "learning_rate": 0.0004230537250942803, "loss": 3.4196, "step": 21435 }, { "epoch": 0.37, "learning_rate": 0.0004229786943298914, "loss": 3.454, "step": 21440 }, { "epoch": 0.37, "learning_rate": 0.0004229036543181349, "loss": 3.4613, "step": 21445 }, { "epoch": 0.37, "learning_rate": 0.0004228286050646535, "loss": 3.4041, "step": 21450 }, { "epoch": 0.37, "learning_rate": 0.00042275354657509036, "loss": 3.4881, "step": 21455 }, { "epoch": 0.37, "learning_rate": 0.00042267847885508976, "loss": 3.5192, "step": 21460 }, { "epoch": 0.37, "learning_rate": 0.00042260340191029624, "loss": 3.4635, "step": 21465 }, { "epoch": 0.37, "learning_rate": 0.0004225283157463553, "loss": 3.4071, "step": 21470 }, { "epoch": 0.37, "learning_rate": 0.0004224532203689128, "loss": 3.4137, "step": 21475 }, { "epoch": 0.37, "learning_rate": 0.0004223781157836158, "loss": 3.3856, "step": 21480 }, { "epoch": 0.37, "learning_rate": 0.0004223030019961116, "loss": 3.4598, "step": 21485 }, { "epoch": 0.37, "learning_rate": 0.00042222787901204854, "loss": 3.4487, "step": 21490 }, { "epoch": 0.37, "learning_rate": 0.0004221527468370754, "loss": 3.4663, "step": 21495 }, { "epoch": 0.37, "learning_rate": 0.00042207760547684155, "loss": 3.4861, "step": 21500 }, { "epoch": 0.37, "eval_loss": 3.4763381481170654, "eval_runtime": 149.6778, "eval_samples_per_second": 12.3, "eval_steps_per_second": 0.775, "step": 21500 }, { "epoch": 0.37, "learning_rate": 0.0004220024549369975, "loss": 3.5112, "step": 21505 }, { "epoch": 0.37, "learning_rate": 0.0004219272952231942, "loss": 3.4445, "step": 21510 }, { "epoch": 0.37, "learning_rate": 0.000421852126341083, "loss": 3.4347, "step": 21515 }, { "epoch": 0.37, "learning_rate": 0.00042177694829631653, "loss": 3.4521, "step": 21520 }, { "epoch": 0.37, "learning_rate": 0.00042170176109454754, "loss": 3.3605, "step": 21525 }, { "epoch": 0.37, "learning_rate": 0.0004216265647414298, "loss": 3.3925, "step": 21530 }, { "epoch": 0.37, "learning_rate": 0.00042155135924261785, "loss": 3.4833, "step": 21535 }, { "epoch": 0.37, "learning_rate": 0.00042147614460376646, "loss": 3.435, "step": 21540 }, { "epoch": 0.37, "learning_rate": 0.0004214009208305317, "loss": 3.4267, "step": 21545 }, { "epoch": 0.37, "learning_rate": 0.0004213256879285698, "loss": 3.4272, "step": 21550 }, { "epoch": 0.37, "learning_rate": 0.00042125044590353796, "loss": 3.3872, "step": 21555 }, { "epoch": 0.37, "learning_rate": 0.00042117519476109395, "loss": 3.406, "step": 21560 }, { "epoch": 0.37, "learning_rate": 0.0004210999345068963, "loss": 3.3582, "step": 21565 }, { "epoch": 0.37, "learning_rate": 0.0004210246651466043, "loss": 3.4148, "step": 21570 }, { "epoch": 0.37, "learning_rate": 0.00042094938668587765, "loss": 3.4063, "step": 21575 }, { "epoch": 0.37, "learning_rate": 0.000420874099130377, "loss": 3.472, "step": 21580 }, { "epoch": 0.37, "learning_rate": 0.0004207988024857636, "loss": 3.5276, "step": 21585 }, { "epoch": 0.37, "learning_rate": 0.00042072349675769923, "loss": 3.5215, "step": 21590 }, { "epoch": 0.37, "learning_rate": 0.00042064818195184663, "loss": 3.5187, "step": 21595 }, { "epoch": 0.37, "learning_rate": 0.0004205728580738691, "loss": 3.4223, "step": 21600 }, { "epoch": 0.37, "learning_rate": 0.0004204975251294306, "loss": 3.3929, "step": 21605 }, { "epoch": 0.37, "learning_rate": 0.00042042218312419576, "loss": 3.5108, "step": 21610 }, { "epoch": 0.37, "learning_rate": 0.00042034683206382983, "loss": 3.571, "step": 21615 }, { "epoch": 0.37, "learning_rate": 0.0004202714719539989, "loss": 3.5817, "step": 21620 }, { "epoch": 0.37, "learning_rate": 0.0004201961028003697, "loss": 3.4792, "step": 21625 }, { "epoch": 0.37, "learning_rate": 0.00042012072460860956, "loss": 3.4894, "step": 21630 }, { "epoch": 0.37, "learning_rate": 0.0004200453373843865, "loss": 3.4205, "step": 21635 }, { "epoch": 0.37, "learning_rate": 0.0004199699411333693, "loss": 3.5227, "step": 21640 }, { "epoch": 0.37, "learning_rate": 0.00041989453586122744, "loss": 3.3261, "step": 21645 }, { "epoch": 0.37, "learning_rate": 0.00041981912157363087, "loss": 3.4374, "step": 21650 }, { "epoch": 0.37, "learning_rate": 0.0004197436982762504, "loss": 3.4541, "step": 21655 }, { "epoch": 0.37, "learning_rate": 0.00041966826597475757, "loss": 3.4462, "step": 21660 }, { "epoch": 0.37, "learning_rate": 0.00041959282467482435, "loss": 3.4077, "step": 21665 }, { "epoch": 0.38, "learning_rate": 0.00041951737438212367, "loss": 3.3667, "step": 21670 }, { "epoch": 0.38, "learning_rate": 0.0004194419151023289, "loss": 3.4438, "step": 21675 }, { "epoch": 0.38, "learning_rate": 0.0004193664468411143, "loss": 3.4317, "step": 21680 }, { "epoch": 0.38, "learning_rate": 0.00041929096960415457, "loss": 3.4413, "step": 21685 }, { "epoch": 0.38, "learning_rate": 0.0004192154833971254, "loss": 3.507, "step": 21690 }, { "epoch": 0.38, "learning_rate": 0.00041913998822570267, "loss": 3.5172, "step": 21695 }, { "epoch": 0.38, "learning_rate": 0.0004190644840955636, "loss": 3.4648, "step": 21700 }, { "epoch": 0.38, "learning_rate": 0.0004189889710123854, "loss": 3.4228, "step": 21705 }, { "epoch": 0.38, "learning_rate": 0.00041891344898184647, "loss": 3.383, "step": 21710 }, { "epoch": 0.38, "learning_rate": 0.00041883791800962554, "loss": 3.4611, "step": 21715 }, { "epoch": 0.38, "learning_rate": 0.0004187623781014021, "loss": 3.4332, "step": 21720 }, { "epoch": 0.38, "learning_rate": 0.00041868682926285677, "loss": 3.3851, "step": 21725 }, { "epoch": 0.38, "learning_rate": 0.0004186112714996699, "loss": 3.4926, "step": 21730 }, { "epoch": 0.38, "learning_rate": 0.00041853570481752335, "loss": 3.3628, "step": 21735 }, { "epoch": 0.38, "learning_rate": 0.0004184601292220993, "loss": 3.4518, "step": 21740 }, { "epoch": 0.38, "learning_rate": 0.00041838454471908066, "loss": 3.2995, "step": 21745 }, { "epoch": 0.38, "learning_rate": 0.000418308951314151, "loss": 3.4446, "step": 21750 }, { "epoch": 0.38, "learning_rate": 0.00041823334901299435, "loss": 3.5188, "step": 21755 }, { "epoch": 0.38, "learning_rate": 0.00041815773782129587, "loss": 3.4001, "step": 21760 }, { "epoch": 0.38, "learning_rate": 0.0004180821177447411, "loss": 3.4094, "step": 21765 }, { "epoch": 0.38, "learning_rate": 0.00041800648878901627, "loss": 3.3152, "step": 21770 }, { "epoch": 0.38, "learning_rate": 0.00041793085095980816, "loss": 3.3421, "step": 21775 }, { "epoch": 0.38, "learning_rate": 0.0004178552042628045, "loss": 3.4248, "step": 21780 }, { "epoch": 0.38, "learning_rate": 0.00041777954870369344, "loss": 3.401, "step": 21785 }, { "epoch": 0.38, "learning_rate": 0.00041770388428816397, "loss": 3.5127, "step": 21790 }, { "epoch": 0.38, "learning_rate": 0.00041762821102190554, "loss": 3.4763, "step": 21795 }, { "epoch": 0.38, "learning_rate": 0.0004175525289106085, "loss": 3.475, "step": 21800 }, { "epoch": 0.38, "learning_rate": 0.00041747683795996385, "loss": 3.3599, "step": 21805 }, { "epoch": 0.38, "learning_rate": 0.000417401138175663, "loss": 3.4129, "step": 21810 }, { "epoch": 0.38, "learning_rate": 0.00041732542956339816, "loss": 3.4717, "step": 21815 }, { "epoch": 0.38, "learning_rate": 0.0004172497121288623, "loss": 3.4032, "step": 21820 }, { "epoch": 0.38, "learning_rate": 0.00041717398587774913, "loss": 3.442, "step": 21825 }, { "epoch": 0.38, "learning_rate": 0.00041709825081575263, "loss": 3.4066, "step": 21830 }, { "epoch": 0.38, "learning_rate": 0.0004170225069485677, "loss": 3.4771, "step": 21835 }, { "epoch": 0.38, "learning_rate": 0.00041694675428189006, "loss": 3.4156, "step": 21840 }, { "epoch": 0.38, "learning_rate": 0.0004168709928214159, "loss": 3.3453, "step": 21845 }, { "epoch": 0.38, "learning_rate": 0.00041679522257284196, "loss": 3.5143, "step": 21850 }, { "epoch": 0.38, "learning_rate": 0.00041671944354186585, "loss": 3.5789, "step": 21855 }, { "epoch": 0.38, "learning_rate": 0.00041664365573418577, "loss": 3.406, "step": 21860 }, { "epoch": 0.38, "learning_rate": 0.00041656785915550055, "loss": 3.4958, "step": 21865 }, { "epoch": 0.38, "learning_rate": 0.0004164920538115098, "loss": 3.4977, "step": 21870 }, { "epoch": 0.38, "learning_rate": 0.0004164162397079135, "loss": 3.4383, "step": 21875 }, { "epoch": 0.38, "learning_rate": 0.00041634041685041266, "loss": 3.5104, "step": 21880 }, { "epoch": 0.38, "learning_rate": 0.0004162645852447087, "loss": 3.4549, "step": 21885 }, { "epoch": 0.38, "learning_rate": 0.0004161887448965039, "loss": 3.4399, "step": 21890 }, { "epoch": 0.38, "learning_rate": 0.0004161128958115008, "loss": 3.3542, "step": 21895 }, { "epoch": 0.38, "learning_rate": 0.0004160370379954029, "loss": 3.4418, "step": 21900 }, { "epoch": 0.38, "learning_rate": 0.00041596117145391466, "loss": 3.3958, "step": 21905 }, { "epoch": 0.38, "learning_rate": 0.0004158852961927405, "loss": 3.3919, "step": 21910 }, { "epoch": 0.38, "learning_rate": 0.0004158094122175859, "loss": 3.3159, "step": 21915 }, { "epoch": 0.38, "learning_rate": 0.0004157335195341571, "loss": 3.5038, "step": 21920 }, { "epoch": 0.38, "learning_rate": 0.0004156576181481608, "loss": 3.3401, "step": 21925 }, { "epoch": 0.38, "learning_rate": 0.00041558170806530435, "loss": 3.4432, "step": 21930 }, { "epoch": 0.38, "learning_rate": 0.0004155057892912957, "loss": 3.3199, "step": 21935 }, { "epoch": 0.38, "learning_rate": 0.00041542986183184367, "loss": 3.5088, "step": 21940 }, { "epoch": 0.38, "learning_rate": 0.00041535392569265765, "loss": 3.5437, "step": 21945 }, { "epoch": 0.38, "learning_rate": 0.0004152779808794476, "loss": 3.3675, "step": 21950 }, { "epoch": 0.38, "learning_rate": 0.0004152020273979242, "loss": 3.4963, "step": 21955 }, { "epoch": 0.38, "learning_rate": 0.00041512606525379866, "loss": 3.422, "step": 21960 }, { "epoch": 0.38, "learning_rate": 0.0004150500944527831, "loss": 3.3885, "step": 21965 }, { "epoch": 0.38, "learning_rate": 0.00041497411500059, "loss": 3.4297, "step": 21970 }, { "epoch": 0.38, "learning_rate": 0.00041489812690293274, "loss": 3.447, "step": 21975 }, { "epoch": 0.38, "learning_rate": 0.00041482213016552513, "loss": 3.4361, "step": 21980 }, { "epoch": 0.38, "learning_rate": 0.0004147461247940818, "loss": 3.423, "step": 21985 }, { "epoch": 0.38, "learning_rate": 0.0004146701107943179, "loss": 3.4564, "step": 21990 }, { "epoch": 0.38, "learning_rate": 0.00041459408817194946, "loss": 3.3803, "step": 21995 }, { "epoch": 0.38, "learning_rate": 0.00041451805693269283, "loss": 3.4608, "step": 22000 }, { "epoch": 0.38, "eval_loss": 3.4630024433135986, "eval_runtime": 149.7666, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.775, "step": 22000 }, { "epoch": 0.38, "learning_rate": 0.0004144420170822652, "loss": 3.4355, "step": 22005 }, { "epoch": 0.38, "learning_rate": 0.0004143659686263844, "loss": 3.4343, "step": 22010 }, { "epoch": 0.38, "learning_rate": 0.0004142899115707688, "loss": 3.3277, "step": 22015 }, { "epoch": 0.38, "learning_rate": 0.0004142138459211377, "loss": 3.4995, "step": 22020 }, { "epoch": 0.38, "learning_rate": 0.0004141377716832107, "loss": 3.4184, "step": 22025 }, { "epoch": 0.38, "learning_rate": 0.00041406168886270816, "loss": 3.3363, "step": 22030 }, { "epoch": 0.38, "learning_rate": 0.00041398559746535113, "loss": 3.3641, "step": 22035 }, { "epoch": 0.38, "learning_rate": 0.0004139094974968614, "loss": 3.4262, "step": 22040 }, { "epoch": 0.38, "learning_rate": 0.0004138333889629613, "loss": 3.4048, "step": 22045 }, { "epoch": 0.38, "learning_rate": 0.0004137572718693737, "loss": 3.4102, "step": 22050 }, { "epoch": 0.38, "learning_rate": 0.00041368114622182214, "loss": 3.4041, "step": 22055 }, { "epoch": 0.38, "learning_rate": 0.0004136050120260311, "loss": 3.4719, "step": 22060 }, { "epoch": 0.38, "learning_rate": 0.0004135288692877254, "loss": 3.4398, "step": 22065 }, { "epoch": 0.38, "learning_rate": 0.0004134527180126305, "loss": 3.3921, "step": 22070 }, { "epoch": 0.38, "learning_rate": 0.0004133765582064726, "loss": 3.4075, "step": 22075 }, { "epoch": 0.38, "learning_rate": 0.00041330038987497856, "loss": 3.4711, "step": 22080 }, { "epoch": 0.38, "learning_rate": 0.000413224213023876, "loss": 3.4251, "step": 22085 }, { "epoch": 0.38, "learning_rate": 0.00041314802765889276, "loss": 3.3675, "step": 22090 }, { "epoch": 0.38, "learning_rate": 0.0004130718337857578, "loss": 3.362, "step": 22095 }, { "epoch": 0.38, "learning_rate": 0.0004129956314102003, "loss": 3.4152, "step": 22100 }, { "epoch": 0.38, "learning_rate": 0.0004129194205379506, "loss": 3.4164, "step": 22105 }, { "epoch": 0.38, "learning_rate": 0.00041284320117473906, "loss": 3.476, "step": 22110 }, { "epoch": 0.38, "learning_rate": 0.00041276697332629707, "loss": 3.3858, "step": 22115 }, { "epoch": 0.38, "learning_rate": 0.0004126907369983566, "loss": 3.3862, "step": 22120 }, { "epoch": 0.38, "learning_rate": 0.0004126144921966503, "loss": 3.3574, "step": 22125 }, { "epoch": 0.38, "learning_rate": 0.00041253823892691135, "loss": 3.358, "step": 22130 }, { "epoch": 0.38, "learning_rate": 0.0004124619771948735, "loss": 3.4601, "step": 22135 }, { "epoch": 0.38, "learning_rate": 0.00041238570700627143, "loss": 3.566, "step": 22140 }, { "epoch": 0.38, "learning_rate": 0.00041230942836684, "loss": 3.4623, "step": 22145 }, { "epoch": 0.38, "learning_rate": 0.00041223314128231535, "loss": 3.4965, "step": 22150 }, { "epoch": 0.38, "learning_rate": 0.00041215684575843345, "loss": 3.3462, "step": 22155 }, { "epoch": 0.38, "learning_rate": 0.00041208054180093175, "loss": 3.3736, "step": 22160 }, { "epoch": 0.38, "learning_rate": 0.00041200422941554753, "loss": 3.503, "step": 22165 }, { "epoch": 0.38, "learning_rate": 0.0004119279086080193, "loss": 3.5064, "step": 22170 }, { "epoch": 0.38, "learning_rate": 0.00041185157938408603, "loss": 3.4897, "step": 22175 }, { "epoch": 0.38, "learning_rate": 0.00041177524174948715, "loss": 3.4477, "step": 22180 }, { "epoch": 0.38, "learning_rate": 0.000411698895709963, "loss": 3.3709, "step": 22185 }, { "epoch": 0.38, "learning_rate": 0.0004116225412712544, "loss": 3.4257, "step": 22190 }, { "epoch": 0.38, "learning_rate": 0.0004115461784391026, "loss": 3.3655, "step": 22195 }, { "epoch": 0.38, "learning_rate": 0.0004114698072192499, "loss": 3.4449, "step": 22200 }, { "epoch": 0.38, "learning_rate": 0.000411393427617439, "loss": 3.4398, "step": 22205 }, { "epoch": 0.38, "learning_rate": 0.0004113170396394133, "loss": 3.1629, "step": 22210 }, { "epoch": 0.38, "learning_rate": 0.0004112406432909165, "loss": 3.3966, "step": 22215 }, { "epoch": 0.38, "learning_rate": 0.0004111642385776935, "loss": 3.3941, "step": 22220 }, { "epoch": 0.38, "learning_rate": 0.0004110878255054896, "loss": 3.3217, "step": 22225 }, { "epoch": 0.38, "learning_rate": 0.0004110114040800504, "loss": 3.3977, "step": 22230 }, { "epoch": 0.38, "learning_rate": 0.00041093497430712267, "loss": 3.4394, "step": 22235 }, { "epoch": 0.38, "learning_rate": 0.00041085853619245323, "loss": 3.4824, "step": 22240 }, { "epoch": 0.38, "learning_rate": 0.00041078208974179017, "loss": 3.4449, "step": 22245 }, { "epoch": 0.39, "learning_rate": 0.00041070563496088165, "loss": 3.4106, "step": 22250 }, { "epoch": 0.39, "learning_rate": 0.00041062917185547666, "loss": 3.4365, "step": 22255 }, { "epoch": 0.39, "learning_rate": 0.00041055270043132493, "loss": 3.4706, "step": 22260 }, { "epoch": 0.39, "learning_rate": 0.0004104762206941768, "loss": 3.3919, "step": 22265 }, { "epoch": 0.39, "learning_rate": 0.000410399732649783, "loss": 3.4725, "step": 22270 }, { "epoch": 0.39, "learning_rate": 0.00041032323630389516, "loss": 3.5326, "step": 22275 }, { "epoch": 0.39, "learning_rate": 0.00041024673166226524, "loss": 3.2729, "step": 22280 }, { "epoch": 0.39, "learning_rate": 0.00041017021873064613, "loss": 3.4428, "step": 22285 }, { "epoch": 0.39, "learning_rate": 0.0004100936975147913, "loss": 3.4666, "step": 22290 }, { "epoch": 0.39, "learning_rate": 0.0004100171680204545, "loss": 3.4196, "step": 22295 }, { "epoch": 0.39, "learning_rate": 0.0004099406302533905, "loss": 3.4659, "step": 22300 }, { "epoch": 0.39, "learning_rate": 0.00040986408421935454, "loss": 3.4275, "step": 22305 }, { "epoch": 0.39, "learning_rate": 0.00040978752992410264, "loss": 3.4052, "step": 22310 }, { "epoch": 0.39, "learning_rate": 0.000409710967373391, "loss": 3.5221, "step": 22315 }, { "epoch": 0.39, "learning_rate": 0.0004096343965729769, "loss": 3.5078, "step": 22320 }, { "epoch": 0.39, "learning_rate": 0.00040955781752861805, "loss": 3.4321, "step": 22325 }, { "epoch": 0.39, "learning_rate": 0.00040948123024607286, "loss": 3.4621, "step": 22330 }, { "epoch": 0.39, "learning_rate": 0.00040940463473110013, "loss": 3.3809, "step": 22335 }, { "epoch": 0.39, "learning_rate": 0.0004093280309894596, "loss": 3.4517, "step": 22340 }, { "epoch": 0.39, "learning_rate": 0.00040925141902691144, "loss": 3.4528, "step": 22345 }, { "epoch": 0.39, "learning_rate": 0.00040917479884921654, "loss": 3.4547, "step": 22350 }, { "epoch": 0.39, "learning_rate": 0.0004090981704621362, "loss": 3.4308, "step": 22355 }, { "epoch": 0.39, "learning_rate": 0.0004090215338714325, "loss": 3.2692, "step": 22360 }, { "epoch": 0.39, "learning_rate": 0.0004089448890828683, "loss": 3.4193, "step": 22365 }, { "epoch": 0.39, "learning_rate": 0.00040886823610220674, "loss": 3.4402, "step": 22370 }, { "epoch": 0.39, "learning_rate": 0.0004087915749352117, "loss": 3.4521, "step": 22375 }, { "epoch": 0.39, "learning_rate": 0.00040871490558764786, "loss": 3.3807, "step": 22380 }, { "epoch": 0.39, "learning_rate": 0.0004086382280652802, "loss": 3.4643, "step": 22385 }, { "epoch": 0.39, "learning_rate": 0.0004085615423738747, "loss": 3.4981, "step": 22390 }, { "epoch": 0.39, "learning_rate": 0.0004084848485191974, "loss": 3.5087, "step": 22395 }, { "epoch": 0.39, "learning_rate": 0.00040840814650701553, "loss": 3.453, "step": 22400 }, { "epoch": 0.39, "learning_rate": 0.0004083314363430967, "loss": 3.3977, "step": 22405 }, { "epoch": 0.39, "learning_rate": 0.00040825471803320894, "loss": 3.357, "step": 22410 }, { "epoch": 0.39, "learning_rate": 0.0004081779915831213, "loss": 3.4247, "step": 22415 }, { "epoch": 0.39, "learning_rate": 0.00040810125699860297, "loss": 3.3161, "step": 22420 }, { "epoch": 0.39, "learning_rate": 0.00040802451428542414, "loss": 3.4269, "step": 22425 }, { "epoch": 0.39, "learning_rate": 0.0004079477634493556, "loss": 3.4299, "step": 22430 }, { "epoch": 0.39, "learning_rate": 0.00040787100449616833, "loss": 3.4994, "step": 22435 }, { "epoch": 0.39, "learning_rate": 0.0004077942374316344, "loss": 3.3577, "step": 22440 }, { "epoch": 0.39, "learning_rate": 0.0004077174622615262, "loss": 3.353, "step": 22445 }, { "epoch": 0.39, "learning_rate": 0.0004076406789916169, "loss": 3.5383, "step": 22450 }, { "epoch": 0.39, "learning_rate": 0.00040756388762768023, "loss": 3.4003, "step": 22455 }, { "epoch": 0.39, "learning_rate": 0.0004074870881754904, "loss": 3.4799, "step": 22460 }, { "epoch": 0.39, "learning_rate": 0.0004074102806408225, "loss": 3.3984, "step": 22465 }, { "epoch": 0.39, "learning_rate": 0.0004073334650294519, "loss": 3.4481, "step": 22470 }, { "epoch": 0.39, "learning_rate": 0.00040725664134715485, "loss": 3.4943, "step": 22475 }, { "epoch": 0.39, "learning_rate": 0.00040717980959970794, "loss": 3.4733, "step": 22480 }, { "epoch": 0.39, "learning_rate": 0.0004071029697928887, "loss": 3.4076, "step": 22485 }, { "epoch": 0.39, "learning_rate": 0.0004070261219324751, "loss": 3.4849, "step": 22490 }, { "epoch": 0.39, "learning_rate": 0.00040694926602424553, "loss": 3.4302, "step": 22495 }, { "epoch": 0.39, "learning_rate": 0.0004068724020739793, "loss": 3.4945, "step": 22500 }, { "epoch": 0.39, "eval_loss": 3.4543726444244385, "eval_runtime": 149.6841, "eval_samples_per_second": 12.299, "eval_steps_per_second": 0.775, "step": 22500 }, { "epoch": 0.39, "learning_rate": 0.00040679553008745613, "loss": 3.5376, "step": 22505 }, { "epoch": 0.39, "learning_rate": 0.00040671865007045654, "loss": 3.2163, "step": 22510 }, { "epoch": 0.39, "learning_rate": 0.00040664176202876123, "loss": 3.4521, "step": 22515 }, { "epoch": 0.39, "learning_rate": 0.00040656486596815194, "loss": 3.3826, "step": 22520 }, { "epoch": 0.39, "learning_rate": 0.00040648796189441094, "loss": 3.3821, "step": 22525 }, { "epoch": 0.39, "learning_rate": 0.00040641104981332097, "loss": 3.4799, "step": 22530 }, { "epoch": 0.39, "learning_rate": 0.00040633412973066534, "loss": 3.3088, "step": 22535 }, { "epoch": 0.39, "learning_rate": 0.0004062572016522281, "loss": 3.4578, "step": 22540 }, { "epoch": 0.39, "learning_rate": 0.00040618026558379396, "loss": 3.4492, "step": 22545 }, { "epoch": 0.39, "learning_rate": 0.00040610332153114795, "loss": 3.4271, "step": 22550 }, { "epoch": 0.39, "learning_rate": 0.0004060263695000759, "loss": 3.3281, "step": 22555 }, { "epoch": 0.39, "learning_rate": 0.00040594940949636424, "loss": 3.5284, "step": 22560 }, { "epoch": 0.39, "learning_rate": 0.00040587244152579996, "loss": 3.4063, "step": 22565 }, { "epoch": 0.39, "learning_rate": 0.00040579546559417083, "loss": 3.3343, "step": 22570 }, { "epoch": 0.39, "learning_rate": 0.00040571848170726463, "loss": 3.4706, "step": 22575 }, { "epoch": 0.39, "learning_rate": 0.0004056414898708706, "loss": 3.4401, "step": 22580 }, { "epoch": 0.39, "learning_rate": 0.00040556449009077777, "loss": 3.4597, "step": 22585 }, { "epoch": 0.39, "learning_rate": 0.00040548748237277644, "loss": 3.4925, "step": 22590 }, { "epoch": 0.39, "learning_rate": 0.000405410466722657, "loss": 3.2829, "step": 22595 }, { "epoch": 0.39, "learning_rate": 0.0004053334431462106, "loss": 3.4492, "step": 22600 }, { "epoch": 0.39, "learning_rate": 0.0004052564116492292, "loss": 3.315, "step": 22605 }, { "epoch": 0.39, "learning_rate": 0.0004051793722375051, "loss": 3.4348, "step": 22610 }, { "epoch": 0.39, "learning_rate": 0.0004051023249168312, "loss": 3.4744, "step": 22615 }, { "epoch": 0.39, "learning_rate": 0.0004050252696930011, "loss": 3.4817, "step": 22620 }, { "epoch": 0.39, "learning_rate": 0.0004049482065718089, "loss": 3.4592, "step": 22625 }, { "epoch": 0.39, "learning_rate": 0.0004048711355590495, "loss": 3.3313, "step": 22630 }, { "epoch": 0.39, "learning_rate": 0.00040479405666051817, "loss": 3.4485, "step": 22635 }, { "epoch": 0.39, "learning_rate": 0.00040471696988201076, "loss": 3.3691, "step": 22640 }, { "epoch": 0.39, "learning_rate": 0.00040463987522932395, "loss": 3.4079, "step": 22645 }, { "epoch": 0.39, "learning_rate": 0.0004045627727082548, "loss": 3.4411, "step": 22650 }, { "epoch": 0.39, "learning_rate": 0.000404485662324601, "loss": 3.4073, "step": 22655 }, { "epoch": 0.39, "learning_rate": 0.0004044085440841609, "loss": 3.3304, "step": 22660 }, { "epoch": 0.39, "learning_rate": 0.0004043314179927332, "loss": 3.3583, "step": 22665 }, { "epoch": 0.39, "learning_rate": 0.0004042542840561178, "loss": 3.4128, "step": 22670 }, { "epoch": 0.39, "learning_rate": 0.0004041771422801144, "loss": 3.4874, "step": 22675 }, { "epoch": 0.39, "learning_rate": 0.00040409999267052385, "loss": 3.4342, "step": 22680 }, { "epoch": 0.39, "learning_rate": 0.0004040228352331474, "loss": 3.4489, "step": 22685 }, { "epoch": 0.39, "learning_rate": 0.00040394566997378674, "loss": 3.4437, "step": 22690 }, { "epoch": 0.39, "learning_rate": 0.00040386849689824444, "loss": 3.3225, "step": 22695 }, { "epoch": 0.39, "learning_rate": 0.0004037913160123235, "loss": 3.4923, "step": 22700 }, { "epoch": 0.39, "learning_rate": 0.0004037141273218275, "loss": 3.5339, "step": 22705 }, { "epoch": 0.39, "learning_rate": 0.0004036369308325607, "loss": 3.4442, "step": 22710 }, { "epoch": 0.39, "learning_rate": 0.0004035597265503278, "loss": 3.2786, "step": 22715 }, { "epoch": 0.39, "learning_rate": 0.0004034825144809342, "loss": 3.3097, "step": 22720 }, { "epoch": 0.39, "learning_rate": 0.0004034052946301858, "loss": 3.3156, "step": 22725 }, { "epoch": 0.39, "learning_rate": 0.0004033280670038893, "loss": 3.4123, "step": 22730 }, { "epoch": 0.39, "learning_rate": 0.0004032508316078517, "loss": 3.3769, "step": 22735 }, { "epoch": 0.39, "learning_rate": 0.00040317358844788064, "loss": 3.4664, "step": 22740 }, { "epoch": 0.39, "learning_rate": 0.0004030963375297845, "loss": 3.4246, "step": 22745 }, { "epoch": 0.39, "learning_rate": 0.00040301907885937217, "loss": 3.4092, "step": 22750 }, { "epoch": 0.39, "learning_rate": 0.0004029418124424531, "loss": 3.4981, "step": 22755 }, { "epoch": 0.39, "learning_rate": 0.00040286453828483723, "loss": 3.3302, "step": 22760 }, { "epoch": 0.39, "learning_rate": 0.00040278725639233525, "loss": 3.3779, "step": 22765 }, { "epoch": 0.39, "learning_rate": 0.0004027099667707584, "loss": 3.4032, "step": 22770 }, { "epoch": 0.39, "learning_rate": 0.0004026326694259185, "loss": 3.4799, "step": 22775 }, { "epoch": 0.39, "learning_rate": 0.00040255536436362767, "loss": 3.4285, "step": 22780 }, { "epoch": 0.39, "learning_rate": 0.00040247805158969915, "loss": 3.4642, "step": 22785 }, { "epoch": 0.39, "learning_rate": 0.00040240073110994637, "loss": 3.4589, "step": 22790 }, { "epoch": 0.39, "learning_rate": 0.0004023234029301833, "loss": 3.4435, "step": 22795 }, { "epoch": 0.39, "learning_rate": 0.0004022460670562248, "loss": 3.3843, "step": 22800 }, { "epoch": 0.39, "learning_rate": 0.0004021687234938861, "loss": 3.5109, "step": 22805 }, { "epoch": 0.39, "learning_rate": 0.000402091372248983, "loss": 3.3412, "step": 22810 }, { "epoch": 0.39, "learning_rate": 0.00040201401332733177, "loss": 3.2672, "step": 22815 }, { "epoch": 0.39, "learning_rate": 0.00040193664673474967, "loss": 3.3403, "step": 22820 }, { "epoch": 0.4, "learning_rate": 0.0004018592724770541, "loss": 3.4237, "step": 22825 }, { "epoch": 0.4, "learning_rate": 0.00040178189056006334, "loss": 3.2901, "step": 22830 }, { "epoch": 0.4, "learning_rate": 0.0004017045009895959, "loss": 3.5062, "step": 22835 }, { "epoch": 0.4, "learning_rate": 0.00040162710377147134, "loss": 3.45, "step": 22840 }, { "epoch": 0.4, "learning_rate": 0.0004015496989115093, "loss": 3.435, "step": 22845 }, { "epoch": 0.4, "learning_rate": 0.0004014722864155304, "loss": 3.4186, "step": 22850 }, { "epoch": 0.4, "learning_rate": 0.0004013948662893557, "loss": 3.3804, "step": 22855 }, { "epoch": 0.4, "learning_rate": 0.0004013174385388065, "loss": 3.4171, "step": 22860 }, { "epoch": 0.4, "learning_rate": 0.0004012400031697053, "loss": 3.3592, "step": 22865 }, { "epoch": 0.4, "learning_rate": 0.00040116256018787465, "loss": 3.4479, "step": 22870 }, { "epoch": 0.4, "learning_rate": 0.00040108510959913795, "loss": 3.283, "step": 22875 }, { "epoch": 0.4, "learning_rate": 0.00040100765140931905, "loss": 3.5205, "step": 22880 }, { "epoch": 0.4, "learning_rate": 0.00040093018562424235, "loss": 3.3975, "step": 22885 }, { "epoch": 0.4, "learning_rate": 0.00040085271224973303, "loss": 3.4526, "step": 22890 }, { "epoch": 0.4, "learning_rate": 0.0004007752312916167, "loss": 3.4228, "step": 22895 }, { "epoch": 0.4, "learning_rate": 0.00040069774275571933, "loss": 3.4961, "step": 22900 }, { "epoch": 0.4, "learning_rate": 0.00040062024664786773, "loss": 3.2809, "step": 22905 }, { "epoch": 0.4, "learning_rate": 0.0004005427429738893, "loss": 3.3984, "step": 22910 }, { "epoch": 0.4, "learning_rate": 0.00040046523173961195, "loss": 3.3236, "step": 22915 }, { "epoch": 0.4, "learning_rate": 0.00040038771295086397, "loss": 3.4506, "step": 22920 }, { "epoch": 0.4, "learning_rate": 0.0004003101866134744, "loss": 3.4803, "step": 22925 }, { "epoch": 0.4, "learning_rate": 0.00040023265273327296, "loss": 3.4333, "step": 22930 }, { "epoch": 0.4, "learning_rate": 0.0004001551113160897, "loss": 3.4086, "step": 22935 }, { "epoch": 0.4, "learning_rate": 0.00040007756236775543, "loss": 3.329, "step": 22940 }, { "epoch": 0.4, "learning_rate": 0.0004000000058941012, "loss": 3.4441, "step": 22945 }, { "epoch": 0.4, "learning_rate": 0.0003999224419009591, "loss": 3.4387, "step": 22950 }, { "epoch": 0.4, "learning_rate": 0.00039984487039416153, "loss": 3.3819, "step": 22955 }, { "epoch": 0.4, "learning_rate": 0.0003997672913795413, "loss": 3.3857, "step": 22960 }, { "epoch": 0.4, "learning_rate": 0.00039968970486293213, "loss": 3.4039, "step": 22965 }, { "epoch": 0.4, "learning_rate": 0.000399612110850168, "loss": 3.4457, "step": 22970 }, { "epoch": 0.4, "learning_rate": 0.00039953450934708366, "loss": 3.4657, "step": 22975 }, { "epoch": 0.4, "learning_rate": 0.00039945690035951434, "loss": 3.3894, "step": 22980 }, { "epoch": 0.4, "learning_rate": 0.00039937928389329575, "loss": 3.5035, "step": 22985 }, { "epoch": 0.4, "learning_rate": 0.0003993016599542644, "loss": 3.3818, "step": 22990 }, { "epoch": 0.4, "learning_rate": 0.0003992240285482571, "loss": 3.3472, "step": 22995 }, { "epoch": 0.4, "learning_rate": 0.0003991463896811114, "loss": 3.3529, "step": 23000 }, { "epoch": 0.4, "eval_loss": 3.442578077316284, "eval_runtime": 149.8744, "eval_samples_per_second": 12.284, "eval_steps_per_second": 0.774, "step": 23000 }, { "epoch": 0.4, "learning_rate": 0.00039906874335866535, "loss": 3.3731, "step": 23005 }, { "epoch": 0.4, "learning_rate": 0.0003989910895867574, "loss": 3.4402, "step": 23010 }, { "epoch": 0.4, "learning_rate": 0.000398913428371227, "loss": 3.4643, "step": 23015 }, { "epoch": 0.4, "learning_rate": 0.0003988357597179135, "loss": 3.501, "step": 23020 }, { "epoch": 0.4, "learning_rate": 0.0003987580836326575, "loss": 3.4019, "step": 23025 }, { "epoch": 0.4, "learning_rate": 0.0003986804001212998, "loss": 3.351, "step": 23030 }, { "epoch": 0.4, "learning_rate": 0.0003986027091896817, "loss": 3.313, "step": 23035 }, { "epoch": 0.4, "learning_rate": 0.0003985250108436452, "loss": 3.4688, "step": 23040 }, { "epoch": 0.4, "learning_rate": 0.0003984473050890327, "loss": 3.3642, "step": 23045 }, { "epoch": 0.4, "learning_rate": 0.0003983695919316875, "loss": 3.3882, "step": 23050 }, { "epoch": 0.4, "learning_rate": 0.00039829187137745316, "loss": 3.3845, "step": 23055 }, { "epoch": 0.4, "learning_rate": 0.0003982141434321738, "loss": 3.3129, "step": 23060 }, { "epoch": 0.4, "learning_rate": 0.00039813640810169415, "loss": 3.2984, "step": 23065 }, { "epoch": 0.4, "learning_rate": 0.0003980586653918596, "loss": 3.4537, "step": 23070 }, { "epoch": 0.4, "learning_rate": 0.00039798091530851586, "loss": 3.4673, "step": 23075 }, { "epoch": 0.4, "learning_rate": 0.0003979031578575096, "loss": 3.4474, "step": 23080 }, { "epoch": 0.4, "learning_rate": 0.0003978253930446875, "loss": 3.4899, "step": 23085 }, { "epoch": 0.4, "learning_rate": 0.00039774762087589713, "loss": 3.4094, "step": 23090 }, { "epoch": 0.4, "learning_rate": 0.0003976698413569868, "loss": 3.4574, "step": 23095 }, { "epoch": 0.4, "learning_rate": 0.00039759205449380476, "loss": 3.4646, "step": 23100 }, { "epoch": 0.4, "learning_rate": 0.0003975142602922005, "loss": 3.4299, "step": 23105 }, { "epoch": 0.4, "learning_rate": 0.0003974364587580236, "loss": 3.4025, "step": 23110 }, { "epoch": 0.4, "learning_rate": 0.0003973586498971243, "loss": 3.4642, "step": 23115 }, { "epoch": 0.4, "learning_rate": 0.00039728083371535355, "loss": 3.3419, "step": 23120 }, { "epoch": 0.4, "learning_rate": 0.0003972030102185626, "loss": 3.4246, "step": 23125 }, { "epoch": 0.4, "learning_rate": 0.00039712517941260344, "loss": 3.4522, "step": 23130 }, { "epoch": 0.4, "learning_rate": 0.0003970473413033286, "loss": 3.331, "step": 23135 }, { "epoch": 0.4, "learning_rate": 0.000396969495896591, "loss": 3.3998, "step": 23140 }, { "epoch": 0.4, "learning_rate": 0.00039689164319824436, "loss": 3.3278, "step": 23145 }, { "epoch": 0.4, "learning_rate": 0.0003968137832141426, "loss": 3.4126, "step": 23150 }, { "epoch": 0.4, "learning_rate": 0.0003967359159501406, "loss": 3.4785, "step": 23155 }, { "epoch": 0.4, "learning_rate": 0.00039665804141209346, "loss": 3.3449, "step": 23160 }, { "epoch": 0.4, "learning_rate": 0.0003965801596058569, "loss": 3.2645, "step": 23165 }, { "epoch": 0.4, "learning_rate": 0.00039650227053728736, "loss": 3.4129, "step": 23170 }, { "epoch": 0.4, "learning_rate": 0.0003964243742122417, "loss": 3.4324, "step": 23175 }, { "epoch": 0.4, "learning_rate": 0.00039634647063657724, "loss": 3.3419, "step": 23180 }, { "epoch": 0.4, "learning_rate": 0.00039626855981615184, "loss": 3.4273, "step": 23185 }, { "epoch": 0.4, "learning_rate": 0.00039619064175682416, "loss": 3.3218, "step": 23190 }, { "epoch": 0.4, "learning_rate": 0.0003961127164644533, "loss": 3.5256, "step": 23195 }, { "epoch": 0.4, "learning_rate": 0.0003960347839448986, "loss": 3.4073, "step": 23200 }, { "epoch": 0.4, "learning_rate": 0.0003959568442040203, "loss": 3.4519, "step": 23205 }, { "epoch": 0.4, "learning_rate": 0.0003958788972476792, "loss": 3.4716, "step": 23210 }, { "epoch": 0.4, "learning_rate": 0.0003958009430817364, "loss": 3.3856, "step": 23215 }, { "epoch": 0.4, "learning_rate": 0.0003957229817120536, "loss": 3.3233, "step": 23220 }, { "epoch": 0.4, "learning_rate": 0.00039564501314449317, "loss": 3.4366, "step": 23225 }, { "epoch": 0.4, "learning_rate": 0.00039556703738491785, "loss": 3.3467, "step": 23230 }, { "epoch": 0.4, "learning_rate": 0.00039548905443919117, "loss": 3.324, "step": 23235 }, { "epoch": 0.4, "learning_rate": 0.000395411064313177, "loss": 3.3125, "step": 23240 }, { "epoch": 0.4, "learning_rate": 0.0003953330670127397, "loss": 3.3889, "step": 23245 }, { "epoch": 0.4, "learning_rate": 0.0003952550625437443, "loss": 3.4227, "step": 23250 }, { "epoch": 0.4, "learning_rate": 0.0003951770509120565, "loss": 3.454, "step": 23255 }, { "epoch": 0.4, "learning_rate": 0.00039509903212354215, "loss": 3.4451, "step": 23260 }, { "epoch": 0.4, "learning_rate": 0.00039502100618406797, "loss": 3.3362, "step": 23265 }, { "epoch": 0.4, "learning_rate": 0.0003949429730995011, "loss": 3.4359, "step": 23270 }, { "epoch": 0.4, "learning_rate": 0.0003948649328757093, "loss": 3.3884, "step": 23275 }, { "epoch": 0.4, "learning_rate": 0.00039478688551856055, "loss": 3.4077, "step": 23280 }, { "epoch": 0.4, "learning_rate": 0.00039470883103392393, "loss": 3.3771, "step": 23285 }, { "epoch": 0.4, "learning_rate": 0.00039463076942766853, "loss": 3.4804, "step": 23290 }, { "epoch": 0.4, "learning_rate": 0.0003945527007056644, "loss": 3.3656, "step": 23295 }, { "epoch": 0.4, "learning_rate": 0.0003944746248737816, "loss": 3.4062, "step": 23300 }, { "epoch": 0.4, "learning_rate": 0.0003943965419378911, "loss": 3.3631, "step": 23305 }, { "epoch": 0.4, "learning_rate": 0.00039431845190386454, "loss": 3.4399, "step": 23310 }, { "epoch": 0.4, "learning_rate": 0.0003942403547775738, "loss": 3.4739, "step": 23315 }, { "epoch": 0.4, "learning_rate": 0.0003941622505648913, "loss": 3.3574, "step": 23320 }, { "epoch": 0.4, "learning_rate": 0.00039408413927169016, "loss": 3.463, "step": 23325 }, { "epoch": 0.4, "learning_rate": 0.00039400602090384384, "loss": 3.3802, "step": 23330 }, { "epoch": 0.4, "learning_rate": 0.0003939278954672267, "loss": 3.4331, "step": 23335 }, { "epoch": 0.4, "learning_rate": 0.00039384976296771303, "loss": 3.5204, "step": 23340 }, { "epoch": 0.4, "learning_rate": 0.00039377162341117816, "loss": 3.4733, "step": 23345 }, { "epoch": 0.4, "learning_rate": 0.00039369347680349786, "loss": 3.3425, "step": 23350 }, { "epoch": 0.4, "learning_rate": 0.00039361532315054825, "loss": 3.4375, "step": 23355 }, { "epoch": 0.4, "learning_rate": 0.00039353716245820613, "loss": 3.4392, "step": 23360 }, { "epoch": 0.4, "learning_rate": 0.0003934589947323487, "loss": 3.3395, "step": 23365 }, { "epoch": 0.4, "learning_rate": 0.00039338081997885384, "loss": 3.3579, "step": 23370 }, { "epoch": 0.4, "learning_rate": 0.0003933026382036, "loss": 3.4948, "step": 23375 }, { "epoch": 0.4, "learning_rate": 0.0003932244494124658, "loss": 3.4524, "step": 23380 }, { "epoch": 0.4, "learning_rate": 0.00039314625361133086, "loss": 3.4354, "step": 23385 }, { "epoch": 0.4, "learning_rate": 0.00039306805080607495, "loss": 3.3287, "step": 23390 }, { "epoch": 0.4, "learning_rate": 0.0003929898410025786, "loss": 3.3441, "step": 23395 }, { "epoch": 0.4, "learning_rate": 0.00039291162420672284, "loss": 3.3456, "step": 23400 }, { "epoch": 0.41, "learning_rate": 0.00039283340042438905, "loss": 3.3949, "step": 23405 }, { "epoch": 0.41, "learning_rate": 0.0003927551696614593, "loss": 3.3505, "step": 23410 }, { "epoch": 0.41, "learning_rate": 0.0003926769319238162, "loss": 3.4162, "step": 23415 }, { "epoch": 0.41, "learning_rate": 0.00039259868721734274, "loss": 3.3707, "step": 23420 }, { "epoch": 0.41, "learning_rate": 0.0003925204355479226, "loss": 3.3314, "step": 23425 }, { "epoch": 0.41, "learning_rate": 0.00039244217692143975, "loss": 3.4314, "step": 23430 }, { "epoch": 0.41, "learning_rate": 0.0003923639113437791, "loss": 3.4921, "step": 23435 }, { "epoch": 0.41, "learning_rate": 0.00039228563882082564, "loss": 3.3425, "step": 23440 }, { "epoch": 0.41, "learning_rate": 0.000392207359358465, "loss": 3.3939, "step": 23445 }, { "epoch": 0.41, "learning_rate": 0.00039212907296258363, "loss": 3.391, "step": 23450 }, { "epoch": 0.41, "learning_rate": 0.00039205077963906806, "loss": 3.3738, "step": 23455 }, { "epoch": 0.41, "learning_rate": 0.0003919724793938056, "loss": 3.5164, "step": 23460 }, { "epoch": 0.41, "learning_rate": 0.00039189417223268406, "loss": 3.3694, "step": 23465 }, { "epoch": 0.41, "learning_rate": 0.00039181585816159176, "loss": 3.354, "step": 23470 }, { "epoch": 0.41, "learning_rate": 0.0003917375371864174, "loss": 3.4643, "step": 23475 }, { "epoch": 0.41, "learning_rate": 0.0003916592093130505, "loss": 3.3646, "step": 23480 }, { "epoch": 0.41, "learning_rate": 0.00039158087454738063, "loss": 3.38, "step": 23485 }, { "epoch": 0.41, "learning_rate": 0.0003915025328952984, "loss": 3.5276, "step": 23490 }, { "epoch": 0.41, "learning_rate": 0.00039142418436269473, "loss": 3.5326, "step": 23495 }, { "epoch": 0.41, "learning_rate": 0.00039134582895546094, "loss": 3.4432, "step": 23500 }, { "epoch": 0.41, "eval_loss": 3.4460275173187256, "eval_runtime": 149.7826, "eval_samples_per_second": 12.291, "eval_steps_per_second": 0.774, "step": 23500 }, { "epoch": 0.41, "learning_rate": 0.0003912674666794889, "loss": 3.5093, "step": 23505 }, { "epoch": 0.41, "learning_rate": 0.00039118909754067097, "loss": 3.4232, "step": 23510 }, { "epoch": 0.41, "learning_rate": 0.0003911107215449005, "loss": 3.3742, "step": 23515 }, { "epoch": 0.41, "learning_rate": 0.00039103233869807055, "loss": 3.5677, "step": 23520 }, { "epoch": 0.41, "learning_rate": 0.00039095394900607537, "loss": 3.412, "step": 23525 }, { "epoch": 0.41, "learning_rate": 0.0003908755524748092, "loss": 3.4859, "step": 23530 }, { "epoch": 0.41, "learning_rate": 0.00039079714911016727, "loss": 3.3516, "step": 23535 }, { "epoch": 0.41, "learning_rate": 0.0003907187389180451, "loss": 3.4358, "step": 23540 }, { "epoch": 0.41, "learning_rate": 0.0003906403219043386, "loss": 3.4573, "step": 23545 }, { "epoch": 0.41, "learning_rate": 0.0003905618980749445, "loss": 3.4623, "step": 23550 }, { "epoch": 0.41, "learning_rate": 0.00039048346743575976, "loss": 3.5096, "step": 23555 }, { "epoch": 0.41, "learning_rate": 0.00039040502999268203, "loss": 3.3192, "step": 23560 }, { "epoch": 0.41, "learning_rate": 0.0003903265857516094, "loss": 3.4335, "step": 23565 }, { "epoch": 0.41, "learning_rate": 0.00039024813471844034, "loss": 3.4312, "step": 23570 }, { "epoch": 0.41, "learning_rate": 0.00039016967689907406, "loss": 3.3351, "step": 23575 }, { "epoch": 0.41, "learning_rate": 0.00039009121229941027, "loss": 3.4376, "step": 23580 }, { "epoch": 0.41, "learning_rate": 0.0003900127409253489, "loss": 3.378, "step": 23585 }, { "epoch": 0.41, "learning_rate": 0.0003899342627827909, "loss": 3.4918, "step": 23590 }, { "epoch": 0.41, "learning_rate": 0.00038985577787763713, "loss": 3.5062, "step": 23595 }, { "epoch": 0.41, "learning_rate": 0.0003897772862157893, "loss": 3.3182, "step": 23600 }, { "epoch": 0.41, "learning_rate": 0.00038969878780314976, "loss": 3.4342, "step": 23605 }, { "epoch": 0.41, "learning_rate": 0.000389620282645621, "loss": 3.3611, "step": 23610 }, { "epoch": 0.41, "learning_rate": 0.0003895417707491064, "loss": 3.2832, "step": 23615 }, { "epoch": 0.41, "learning_rate": 0.00038946325211950945, "loss": 3.4133, "step": 23620 }, { "epoch": 0.41, "learning_rate": 0.0003893847267627344, "loss": 3.3847, "step": 23625 }, { "epoch": 0.41, "learning_rate": 0.0003893061946846861, "loss": 3.2987, "step": 23630 }, { "epoch": 0.41, "learning_rate": 0.00038922765589126954, "loss": 3.3355, "step": 23635 }, { "epoch": 0.41, "learning_rate": 0.00038914911038839063, "loss": 3.2932, "step": 23640 }, { "epoch": 0.41, "learning_rate": 0.0003890705581819554, "loss": 3.4283, "step": 23645 }, { "epoch": 0.41, "learning_rate": 0.00038899199927787067, "loss": 3.3789, "step": 23650 }, { "epoch": 0.41, "learning_rate": 0.00038891343368204374, "loss": 3.4124, "step": 23655 }, { "epoch": 0.41, "learning_rate": 0.0003888348614003823, "loss": 3.388, "step": 23660 }, { "epoch": 0.41, "learning_rate": 0.0003887562824387945, "loss": 3.4347, "step": 23665 }, { "epoch": 0.41, "learning_rate": 0.00038867769680318913, "loss": 3.4287, "step": 23670 }, { "epoch": 0.41, "learning_rate": 0.0003885991044994755, "loss": 3.4319, "step": 23675 }, { "epoch": 0.41, "learning_rate": 0.0003885205055335632, "loss": 3.3868, "step": 23680 }, { "epoch": 0.41, "learning_rate": 0.0003884418999113625, "loss": 3.4081, "step": 23685 }, { "epoch": 0.41, "learning_rate": 0.00038836328763878423, "loss": 3.5162, "step": 23690 }, { "epoch": 0.41, "learning_rate": 0.0003882846687217396, "loss": 3.439, "step": 23695 }, { "epoch": 0.41, "learning_rate": 0.00038820604316614036, "loss": 3.4161, "step": 23700 }, { "epoch": 0.41, "learning_rate": 0.0003881274109778987, "loss": 3.408, "step": 23705 }, { "epoch": 0.41, "learning_rate": 0.0003880487721629273, "loss": 3.3882, "step": 23710 }, { "epoch": 0.41, "learning_rate": 0.0003879701267271396, "loss": 3.2077, "step": 23715 }, { "epoch": 0.41, "learning_rate": 0.00038789147467644915, "loss": 3.346, "step": 23720 }, { "epoch": 0.41, "learning_rate": 0.00038781281601677023, "loss": 3.4345, "step": 23725 }, { "epoch": 0.41, "learning_rate": 0.0003877341507540176, "loss": 3.2781, "step": 23730 }, { "epoch": 0.41, "learning_rate": 0.00038765547889410643, "loss": 3.3699, "step": 23735 }, { "epoch": 0.41, "learning_rate": 0.0003875768004429525, "loss": 3.4467, "step": 23740 }, { "epoch": 0.41, "learning_rate": 0.00038749811540647204, "loss": 3.3993, "step": 23745 }, { "epoch": 0.41, "learning_rate": 0.00038741942379058163, "loss": 3.3744, "step": 23750 }, { "epoch": 0.41, "learning_rate": 0.00038734072560119866, "loss": 3.4018, "step": 23755 }, { "epoch": 0.41, "learning_rate": 0.00038726202084424076, "loss": 3.412, "step": 23760 }, { "epoch": 0.41, "learning_rate": 0.000387183309525626, "loss": 3.4137, "step": 23765 }, { "epoch": 0.41, "learning_rate": 0.0003871045916512732, "loss": 3.4427, "step": 23770 }, { "epoch": 0.41, "learning_rate": 0.00038702586722710153, "loss": 3.4318, "step": 23775 }, { "epoch": 0.41, "learning_rate": 0.0003869471362590306, "loss": 3.4418, "step": 23780 }, { "epoch": 0.41, "learning_rate": 0.00038686839875298073, "loss": 3.3053, "step": 23785 }, { "epoch": 0.41, "learning_rate": 0.0003867896547148724, "loss": 3.4195, "step": 23790 }, { "epoch": 0.41, "learning_rate": 0.00038671090415062683, "loss": 3.343, "step": 23795 }, { "epoch": 0.41, "learning_rate": 0.00038663214706616565, "loss": 3.4823, "step": 23800 }, { "epoch": 0.41, "learning_rate": 0.000386553383467411, "loss": 3.2957, "step": 23805 }, { "epoch": 0.41, "learning_rate": 0.0003864746133602855, "loss": 3.3719, "step": 23810 }, { "epoch": 0.41, "learning_rate": 0.0003863958367507122, "loss": 3.3919, "step": 23815 }, { "epoch": 0.41, "learning_rate": 0.00038631705364461485, "loss": 3.3362, "step": 23820 }, { "epoch": 0.41, "learning_rate": 0.00038623826404791733, "loss": 3.4481, "step": 23825 }, { "epoch": 0.41, "learning_rate": 0.0003861594679665443, "loss": 3.4172, "step": 23830 }, { "epoch": 0.41, "learning_rate": 0.00038608066540642095, "loss": 3.3761, "step": 23835 }, { "epoch": 0.41, "learning_rate": 0.0003860018563734727, "loss": 3.4139, "step": 23840 }, { "epoch": 0.41, "learning_rate": 0.0003859230408736256, "loss": 3.4002, "step": 23845 }, { "epoch": 0.41, "learning_rate": 0.00038584421891280606, "loss": 3.3431, "step": 23850 }, { "epoch": 0.41, "learning_rate": 0.0003857653904969413, "loss": 3.4767, "step": 23855 }, { "epoch": 0.41, "learning_rate": 0.00038568655563195873, "loss": 3.3485, "step": 23860 }, { "epoch": 0.41, "learning_rate": 0.0003856077143237863, "loss": 3.4446, "step": 23865 }, { "epoch": 0.41, "learning_rate": 0.0003855288665783525, "loss": 3.4077, "step": 23870 }, { "epoch": 0.41, "learning_rate": 0.0003854500124015861, "loss": 3.4456, "step": 23875 }, { "epoch": 0.41, "learning_rate": 0.00038537115179941686, "loss": 3.4435, "step": 23880 }, { "epoch": 0.41, "learning_rate": 0.0003852922847777745, "loss": 3.3529, "step": 23885 }, { "epoch": 0.41, "learning_rate": 0.00038521341134258947, "loss": 3.3197, "step": 23890 }, { "epoch": 0.41, "learning_rate": 0.0003851345314997926, "loss": 3.3096, "step": 23895 }, { "epoch": 0.41, "learning_rate": 0.0003850556452553152, "loss": 3.4025, "step": 23900 }, { "epoch": 0.41, "learning_rate": 0.0003849767526150892, "loss": 3.3826, "step": 23905 }, { "epoch": 0.41, "learning_rate": 0.000384897853585047, "loss": 3.3757, "step": 23910 }, { "epoch": 0.41, "learning_rate": 0.00038481894817112114, "loss": 3.4822, "step": 23915 }, { "epoch": 0.41, "learning_rate": 0.00038474003637924516, "loss": 3.3612, "step": 23920 }, { "epoch": 0.41, "learning_rate": 0.0003846611182153528, "loss": 3.4059, "step": 23925 }, { "epoch": 0.41, "learning_rate": 0.00038458219368537813, "loss": 3.2958, "step": 23930 }, { "epoch": 0.41, "learning_rate": 0.000384503262795256, "loss": 3.3191, "step": 23935 }, { "epoch": 0.41, "learning_rate": 0.00038442432555092163, "loss": 3.4621, "step": 23940 }, { "epoch": 0.41, "learning_rate": 0.00038434538195831054, "loss": 3.3922, "step": 23945 }, { "epoch": 0.41, "learning_rate": 0.00038426643202335906, "loss": 3.3505, "step": 23950 }, { "epoch": 0.41, "learning_rate": 0.00038418747575200374, "loss": 3.4525, "step": 23955 }, { "epoch": 0.41, "learning_rate": 0.0003841085131501818, "loss": 3.4467, "step": 23960 }, { "epoch": 0.41, "learning_rate": 0.0003840295442238306, "loss": 3.4365, "step": 23965 }, { "epoch": 0.41, "learning_rate": 0.00038395056897888824, "loss": 3.471, "step": 23970 }, { "epoch": 0.41, "learning_rate": 0.00038387158742129345, "loss": 3.3748, "step": 23975 }, { "epoch": 0.42, "learning_rate": 0.00038379259955698516, "loss": 3.4801, "step": 23980 }, { "epoch": 0.42, "learning_rate": 0.0003837136053919028, "loss": 3.3963, "step": 23985 }, { "epoch": 0.42, "learning_rate": 0.0003836346049319862, "loss": 3.4244, "step": 23990 }, { "epoch": 0.42, "learning_rate": 0.000383555598183176, "loss": 3.469, "step": 23995 }, { "epoch": 0.42, "learning_rate": 0.0003834765851514131, "loss": 3.4334, "step": 24000 }, { "epoch": 0.42, "eval_loss": 3.4270379543304443, "eval_runtime": 150.6758, "eval_samples_per_second": 12.218, "eval_steps_per_second": 0.77, "step": 24000 }, { "epoch": 0.42, "learning_rate": 0.00038339756584263875, "loss": 3.3244, "step": 24005 }, { "epoch": 0.42, "learning_rate": 0.00038331854026279485, "loss": 3.3813, "step": 24010 }, { "epoch": 0.42, "learning_rate": 0.00038323950841782375, "loss": 3.3714, "step": 24015 }, { "epoch": 0.42, "learning_rate": 0.0003831604703136683, "loss": 3.3798, "step": 24020 }, { "epoch": 0.42, "learning_rate": 0.0003830814259562715, "loss": 3.3194, "step": 24025 }, { "epoch": 0.42, "learning_rate": 0.0003830023753515773, "loss": 3.4367, "step": 24030 }, { "epoch": 0.42, "learning_rate": 0.00038292331850552986, "loss": 3.4029, "step": 24035 }, { "epoch": 0.42, "learning_rate": 0.00038284425542407395, "loss": 3.4108, "step": 24040 }, { "epoch": 0.42, "learning_rate": 0.0003827651861131545, "loss": 3.3577, "step": 24045 }, { "epoch": 0.42, "learning_rate": 0.00038268611057871727, "loss": 3.4693, "step": 24050 }, { "epoch": 0.42, "learning_rate": 0.0003826070288267082, "loss": 3.3681, "step": 24055 }, { "epoch": 0.42, "learning_rate": 0.00038252794086307405, "loss": 3.4132, "step": 24060 }, { "epoch": 0.42, "learning_rate": 0.0003824488466937616, "loss": 3.4845, "step": 24065 }, { "epoch": 0.42, "learning_rate": 0.0003823697463247184, "loss": 3.4444, "step": 24070 }, { "epoch": 0.42, "learning_rate": 0.00038229063976189247, "loss": 3.4076, "step": 24075 }, { "epoch": 0.42, "learning_rate": 0.00038221152701123216, "loss": 3.3896, "step": 24080 }, { "epoch": 0.42, "learning_rate": 0.00038213240807868633, "loss": 3.3713, "step": 24085 }, { "epoch": 0.42, "learning_rate": 0.0003820532829702043, "loss": 3.3014, "step": 24090 }, { "epoch": 0.42, "learning_rate": 0.0003819741516917359, "loss": 3.4203, "step": 24095 }, { "epoch": 0.42, "learning_rate": 0.00038189501424923156, "loss": 3.3145, "step": 24100 }, { "epoch": 0.42, "learning_rate": 0.0003818158706486417, "loss": 3.36, "step": 24105 }, { "epoch": 0.42, "learning_rate": 0.0003817367208959177, "loss": 3.3946, "step": 24110 }, { "epoch": 0.42, "learning_rate": 0.0003816575649970112, "loss": 3.3836, "step": 24115 }, { "epoch": 0.42, "learning_rate": 0.0003815784029578743, "loss": 3.4324, "step": 24120 }, { "epoch": 0.42, "learning_rate": 0.0003814992347844595, "loss": 3.4453, "step": 24125 }, { "epoch": 0.42, "learning_rate": 0.0003814200604827201, "loss": 3.4197, "step": 24130 }, { "epoch": 0.42, "learning_rate": 0.00038134088005860924, "loss": 3.4852, "step": 24135 }, { "epoch": 0.42, "learning_rate": 0.00038126169351808127, "loss": 3.408, "step": 24140 }, { "epoch": 0.42, "learning_rate": 0.0003811825008670902, "loss": 3.4374, "step": 24145 }, { "epoch": 0.42, "learning_rate": 0.0003811033021115912, "loss": 3.4174, "step": 24150 }, { "epoch": 0.42, "learning_rate": 0.00038102409725753947, "loss": 3.4054, "step": 24155 }, { "epoch": 0.42, "learning_rate": 0.000380944886310891, "loss": 3.3431, "step": 24160 }, { "epoch": 0.42, "learning_rate": 0.00038086566927760185, "loss": 3.5198, "step": 24165 }, { "epoch": 0.42, "learning_rate": 0.00038078644616362876, "loss": 3.3664, "step": 24170 }, { "epoch": 0.42, "learning_rate": 0.00038070721697492895, "loss": 3.3576, "step": 24175 }, { "epoch": 0.42, "learning_rate": 0.0003806279817174601, "loss": 3.4815, "step": 24180 }, { "epoch": 0.42, "learning_rate": 0.0003805487403971802, "loss": 3.4107, "step": 24185 }, { "epoch": 0.42, "learning_rate": 0.00038046949302004783, "loss": 3.3978, "step": 24190 }, { "epoch": 0.42, "learning_rate": 0.00038039023959202193, "loss": 3.3128, "step": 24195 }, { "epoch": 0.42, "learning_rate": 0.0003803109801190621, "loss": 3.4749, "step": 24200 }, { "epoch": 0.42, "learning_rate": 0.0003802317146071281, "loss": 3.3715, "step": 24205 }, { "epoch": 0.42, "learning_rate": 0.00038015244306218034, "loss": 3.3911, "step": 24210 }, { "epoch": 0.42, "learning_rate": 0.00038007316549017964, "loss": 3.364, "step": 24215 }, { "epoch": 0.42, "learning_rate": 0.00037999388189708733, "loss": 3.4206, "step": 24220 }, { "epoch": 0.42, "learning_rate": 0.00037991459228886495, "loss": 3.4327, "step": 24225 }, { "epoch": 0.42, "learning_rate": 0.0003798352966714749, "loss": 3.3105, "step": 24230 }, { "epoch": 0.42, "learning_rate": 0.0003797559950508796, "loss": 3.4319, "step": 24235 }, { "epoch": 0.42, "learning_rate": 0.00037967668743304225, "loss": 3.3862, "step": 24240 }, { "epoch": 0.42, "learning_rate": 0.00037959737382392637, "loss": 3.3361, "step": 24245 }, { "epoch": 0.42, "learning_rate": 0.0003795180542294958, "loss": 3.4328, "step": 24250 }, { "epoch": 0.42, "learning_rate": 0.00037943872865571525, "loss": 3.4733, "step": 24255 }, { "epoch": 0.42, "learning_rate": 0.00037935939710854934, "loss": 3.3578, "step": 24260 }, { "epoch": 0.42, "learning_rate": 0.0003792800595939634, "loss": 3.3603, "step": 24265 }, { "epoch": 0.42, "learning_rate": 0.00037920071611792337, "loss": 3.4714, "step": 24270 }, { "epoch": 0.42, "learning_rate": 0.0003791213666863953, "loss": 3.4574, "step": 24275 }, { "epoch": 0.42, "learning_rate": 0.00037904201130534604, "loss": 3.3614, "step": 24280 }, { "epoch": 0.42, "learning_rate": 0.0003789626499807426, "loss": 3.3123, "step": 24285 }, { "epoch": 0.42, "learning_rate": 0.0003788832827185525, "loss": 3.3467, "step": 24290 }, { "epoch": 0.42, "learning_rate": 0.00037880390952474386, "loss": 3.4375, "step": 24295 }, { "epoch": 0.42, "learning_rate": 0.00037872453040528516, "loss": 3.3211, "step": 24300 }, { "epoch": 0.42, "learning_rate": 0.0003786451453661451, "loss": 3.3171, "step": 24305 }, { "epoch": 0.42, "learning_rate": 0.00037856575441329315, "loss": 3.43, "step": 24310 }, { "epoch": 0.42, "learning_rate": 0.0003784863575526992, "loss": 3.36, "step": 24315 }, { "epoch": 0.42, "learning_rate": 0.00037840695479033335, "loss": 3.4438, "step": 24320 }, { "epoch": 0.42, "learning_rate": 0.0003783275461321663, "loss": 3.323, "step": 24325 }, { "epoch": 0.42, "learning_rate": 0.00037824813158416937, "loss": 3.4666, "step": 24330 }, { "epoch": 0.42, "learning_rate": 0.00037816871115231376, "loss": 3.4379, "step": 24335 }, { "epoch": 0.42, "learning_rate": 0.00037808928484257175, "loss": 3.501, "step": 24340 }, { "epoch": 0.42, "learning_rate": 0.0003780098526609158, "loss": 3.4066, "step": 24345 }, { "epoch": 0.42, "learning_rate": 0.00037793041461331857, "loss": 3.2325, "step": 24350 }, { "epoch": 0.42, "learning_rate": 0.0003778509707057536, "loss": 3.3971, "step": 24355 }, { "epoch": 0.42, "learning_rate": 0.00037777152094419474, "loss": 3.384, "step": 24360 }, { "epoch": 0.42, "learning_rate": 0.0003776920653346159, "loss": 3.3162, "step": 24365 }, { "epoch": 0.42, "learning_rate": 0.00037761260388299205, "loss": 3.4612, "step": 24370 }, { "epoch": 0.42, "learning_rate": 0.00037753313659529805, "loss": 3.4324, "step": 24375 }, { "epoch": 0.42, "learning_rate": 0.0003774536634775097, "loss": 3.4222, "step": 24380 }, { "epoch": 0.42, "learning_rate": 0.0003773741845356026, "loss": 3.3924, "step": 24385 }, { "epoch": 0.42, "learning_rate": 0.0003772946997755534, "loss": 3.4305, "step": 24390 }, { "epoch": 0.42, "learning_rate": 0.0003772152092033389, "loss": 3.402, "step": 24395 }, { "epoch": 0.42, "learning_rate": 0.0003771357128249365, "loss": 3.5204, "step": 24400 }, { "epoch": 0.42, "learning_rate": 0.0003770562106463237, "loss": 3.3448, "step": 24405 }, { "epoch": 0.42, "learning_rate": 0.0003769767026734788, "loss": 3.2582, "step": 24410 }, { "epoch": 0.42, "learning_rate": 0.0003768971889123803, "loss": 3.4989, "step": 24415 }, { "epoch": 0.42, "learning_rate": 0.0003768176693690074, "loss": 3.2957, "step": 24420 }, { "epoch": 0.42, "learning_rate": 0.0003767381440493394, "loss": 3.4028, "step": 24425 }, { "epoch": 0.42, "learning_rate": 0.0003766586129593562, "loss": 3.4257, "step": 24430 }, { "epoch": 0.42, "learning_rate": 0.0003765790761050383, "loss": 3.4178, "step": 24435 }, { "epoch": 0.42, "learning_rate": 0.0003764995334923662, "loss": 3.4097, "step": 24440 }, { "epoch": 0.42, "learning_rate": 0.0003764199851273213, "loss": 3.4026, "step": 24445 }, { "epoch": 0.42, "learning_rate": 0.0003763404310158851, "loss": 3.4466, "step": 24450 }, { "epoch": 0.42, "learning_rate": 0.00037626087116403973, "loss": 3.3257, "step": 24455 }, { "epoch": 0.42, "learning_rate": 0.00037618130557776774, "loss": 3.4463, "step": 24460 }, { "epoch": 0.42, "learning_rate": 0.000376101734263052, "loss": 3.2616, "step": 24465 }, { "epoch": 0.42, "learning_rate": 0.00037602215722587587, "loss": 3.3278, "step": 24470 }, { "epoch": 0.42, "learning_rate": 0.0003759425744722231, "loss": 3.3324, "step": 24475 }, { "epoch": 0.42, "learning_rate": 0.00037586298600807795, "loss": 3.3937, "step": 24480 }, { "epoch": 0.42, "learning_rate": 0.00037578339183942503, "loss": 3.3143, "step": 24485 }, { "epoch": 0.42, "learning_rate": 0.00037570379197224935, "loss": 3.3917, "step": 24490 }, { "epoch": 0.42, "learning_rate": 0.0003756241864125365, "loss": 3.3902, "step": 24495 }, { "epoch": 0.42, "learning_rate": 0.0003755445751662725, "loss": 3.4043, "step": 24500 }, { "epoch": 0.42, "eval_loss": 3.4218356609344482, "eval_runtime": 149.9736, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 24500 }, { "epoch": 0.42, "learning_rate": 0.0003754649582394435, "loss": 3.3665, "step": 24505 }, { "epoch": 0.42, "learning_rate": 0.0003753853356380365, "loss": 3.4224, "step": 24510 }, { "epoch": 0.42, "learning_rate": 0.00037530570736803853, "loss": 3.2309, "step": 24515 }, { "epoch": 0.42, "learning_rate": 0.0003752260734354374, "loss": 3.32, "step": 24520 }, { "epoch": 0.42, "learning_rate": 0.000375146433846221, "loss": 3.3945, "step": 24525 }, { "epoch": 0.42, "learning_rate": 0.0003750667886063778, "loss": 3.4484, "step": 24530 }, { "epoch": 0.42, "learning_rate": 0.00037498713772189695, "loss": 3.3952, "step": 24535 }, { "epoch": 0.42, "learning_rate": 0.0003749074811987676, "loss": 3.4158, "step": 24540 }, { "epoch": 0.42, "learning_rate": 0.00037482781904297954, "loss": 3.45, "step": 24545 }, { "epoch": 0.42, "learning_rate": 0.00037474815126052304, "loss": 3.3781, "step": 24550 }, { "epoch": 0.42, "learning_rate": 0.0003746684778573885, "loss": 3.285, "step": 24555 }, { "epoch": 0.43, "learning_rate": 0.0003745887988395672, "loss": 3.3847, "step": 24560 }, { "epoch": 0.43, "learning_rate": 0.00037450911421305056, "loss": 3.4632, "step": 24565 }, { "epoch": 0.43, "learning_rate": 0.00037442942398383027, "loss": 3.364, "step": 24570 }, { "epoch": 0.43, "learning_rate": 0.0003743497281578987, "loss": 3.3277, "step": 24575 }, { "epoch": 0.43, "learning_rate": 0.0003742700267412487, "loss": 3.4421, "step": 24580 }, { "epoch": 0.43, "learning_rate": 0.00037419031973987335, "loss": 3.3185, "step": 24585 }, { "epoch": 0.43, "learning_rate": 0.00037411060715976613, "loss": 3.45, "step": 24590 }, { "epoch": 0.43, "learning_rate": 0.000374030889006921, "loss": 3.4009, "step": 24595 }, { "epoch": 0.43, "learning_rate": 0.0003739511652873325, "loss": 3.4462, "step": 24600 }, { "epoch": 0.43, "learning_rate": 0.0003738714360069954, "loss": 3.2398, "step": 24605 }, { "epoch": 0.43, "learning_rate": 0.00037379170117190477, "loss": 3.4085, "step": 24610 }, { "epoch": 0.43, "learning_rate": 0.0003737119607880565, "loss": 3.4324, "step": 24615 }, { "epoch": 0.43, "learning_rate": 0.00037363221486144644, "loss": 3.4352, "step": 24620 }, { "epoch": 0.43, "learning_rate": 0.0003735524633980713, "loss": 3.451, "step": 24625 }, { "epoch": 0.43, "learning_rate": 0.00037347270640392783, "loss": 3.3561, "step": 24630 }, { "epoch": 0.43, "learning_rate": 0.0003733929438850133, "loss": 3.3655, "step": 24635 }, { "epoch": 0.43, "learning_rate": 0.0003733131758473256, "loss": 3.4145, "step": 24640 }, { "epoch": 0.43, "learning_rate": 0.0003732334022968628, "loss": 3.4254, "step": 24645 }, { "epoch": 0.43, "learning_rate": 0.0003731536232396235, "loss": 3.341, "step": 24650 }, { "epoch": 0.43, "learning_rate": 0.0003730738386816065, "loss": 3.2968, "step": 24655 }, { "epoch": 0.43, "learning_rate": 0.00037299404862881145, "loss": 3.3911, "step": 24660 }, { "epoch": 0.43, "learning_rate": 0.000372914253087238, "loss": 3.3917, "step": 24665 }, { "epoch": 0.43, "learning_rate": 0.00037283445206288637, "loss": 3.3416, "step": 24670 }, { "epoch": 0.43, "learning_rate": 0.00037275464556175725, "loss": 3.3614, "step": 24675 }, { "epoch": 0.43, "learning_rate": 0.0003726748335898516, "loss": 3.3741, "step": 24680 }, { "epoch": 0.43, "learning_rate": 0.00037259501615317096, "loss": 3.343, "step": 24685 }, { "epoch": 0.43, "learning_rate": 0.0003725151932577172, "loss": 3.3623, "step": 24690 }, { "epoch": 0.43, "learning_rate": 0.0003724353649094924, "loss": 3.4274, "step": 24695 }, { "epoch": 0.43, "learning_rate": 0.0003723555311144995, "loss": 3.3472, "step": 24700 }, { "epoch": 0.43, "learning_rate": 0.00037227569187874146, "loss": 3.4346, "step": 24705 }, { "epoch": 0.43, "learning_rate": 0.0003721958472082217, "loss": 3.5295, "step": 24710 }, { "epoch": 0.43, "learning_rate": 0.0003721159971089443, "loss": 3.5022, "step": 24715 }, { "epoch": 0.43, "learning_rate": 0.00037203614158691354, "loss": 3.3116, "step": 24720 }, { "epoch": 0.43, "learning_rate": 0.0003719562806481341, "loss": 3.2844, "step": 24725 }, { "epoch": 0.43, "learning_rate": 0.0003718764142986111, "loss": 3.4344, "step": 24730 }, { "epoch": 0.43, "learning_rate": 0.0003717965425443501, "loss": 3.4131, "step": 24735 }, { "epoch": 0.43, "learning_rate": 0.00037171666539135705, "loss": 3.3643, "step": 24740 }, { "epoch": 0.43, "learning_rate": 0.00037163678284563836, "loss": 3.2979, "step": 24745 }, { "epoch": 0.43, "learning_rate": 0.00037155689491320066, "loss": 3.358, "step": 24750 }, { "epoch": 0.43, "learning_rate": 0.00037147700160005125, "loss": 3.4448, "step": 24755 }, { "epoch": 0.43, "learning_rate": 0.0003713971029121975, "loss": 3.3963, "step": 24760 }, { "epoch": 0.43, "learning_rate": 0.0003713171988556478, "loss": 3.2131, "step": 24765 }, { "epoch": 0.43, "learning_rate": 0.0003712372894364099, "loss": 3.392, "step": 24770 }, { "epoch": 0.43, "learning_rate": 0.00037115737466049306, "loss": 3.3659, "step": 24775 }, { "epoch": 0.43, "learning_rate": 0.0003710774545339064, "loss": 3.3049, "step": 24780 }, { "epoch": 0.43, "learning_rate": 0.00037099752906265943, "loss": 3.3554, "step": 24785 }, { "epoch": 0.43, "learning_rate": 0.00037091759825276207, "loss": 3.318, "step": 24790 }, { "epoch": 0.43, "learning_rate": 0.00037083766211022467, "loss": 3.2913, "step": 24795 }, { "epoch": 0.43, "learning_rate": 0.00037075772064105815, "loss": 3.4315, "step": 24800 }, { "epoch": 0.43, "learning_rate": 0.00037067777385127383, "loss": 3.3689, "step": 24805 }, { "epoch": 0.43, "learning_rate": 0.00037059782174688296, "loss": 3.4208, "step": 24810 }, { "epoch": 0.43, "learning_rate": 0.00037051786433389775, "loss": 3.2799, "step": 24815 }, { "epoch": 0.43, "learning_rate": 0.0003704379016183306, "loss": 3.5046, "step": 24820 }, { "epoch": 0.43, "learning_rate": 0.00037035793360619423, "loss": 3.4735, "step": 24825 }, { "epoch": 0.43, "learning_rate": 0.00037027796030350183, "loss": 3.3752, "step": 24830 }, { "epoch": 0.43, "learning_rate": 0.0003701979817162669, "loss": 3.4892, "step": 24835 }, { "epoch": 0.43, "learning_rate": 0.0003701179978505035, "loss": 3.2982, "step": 24840 }, { "epoch": 0.43, "learning_rate": 0.0003700380087122262, "loss": 3.5165, "step": 24845 }, { "epoch": 0.43, "learning_rate": 0.0003699580143074494, "loss": 3.3722, "step": 24850 }, { "epoch": 0.43, "learning_rate": 0.00036987801464218853, "loss": 3.456, "step": 24855 }, { "epoch": 0.43, "learning_rate": 0.000369798009722459, "loss": 3.3965, "step": 24860 }, { "epoch": 0.43, "learning_rate": 0.00036971799955427695, "loss": 3.4197, "step": 24865 }, { "epoch": 0.43, "learning_rate": 0.0003696379841436586, "loss": 3.3727, "step": 24870 }, { "epoch": 0.43, "learning_rate": 0.00036955796349662066, "loss": 3.3547, "step": 24875 }, { "epoch": 0.43, "learning_rate": 0.00036947793761918043, "loss": 3.3268, "step": 24880 }, { "epoch": 0.43, "learning_rate": 0.00036939790651735536, "loss": 3.4494, "step": 24885 }, { "epoch": 0.43, "learning_rate": 0.0003693178701971633, "loss": 3.3767, "step": 24890 }, { "epoch": 0.43, "learning_rate": 0.0003692378286646227, "loss": 3.4077, "step": 24895 }, { "epoch": 0.43, "learning_rate": 0.00036915778192575223, "loss": 3.4866, "step": 24900 }, { "epoch": 0.43, "learning_rate": 0.00036907772998657107, "loss": 3.2929, "step": 24905 }, { "epoch": 0.43, "learning_rate": 0.00036899767285309846, "loss": 3.5017, "step": 24910 }, { "epoch": 0.43, "learning_rate": 0.0003689176105313545, "loss": 3.3838, "step": 24915 }, { "epoch": 0.43, "learning_rate": 0.0003688375430273595, "loss": 3.2769, "step": 24920 }, { "epoch": 0.43, "learning_rate": 0.000368757470347134, "loss": 3.3815, "step": 24925 }, { "epoch": 0.43, "learning_rate": 0.0003686773924966992, "loss": 3.381, "step": 24930 }, { "epoch": 0.43, "learning_rate": 0.00036859730948207627, "loss": 3.3977, "step": 24935 }, { "epoch": 0.43, "learning_rate": 0.0003685172213092873, "loss": 3.5147, "step": 24940 }, { "epoch": 0.43, "learning_rate": 0.0003684371279843546, "loss": 3.3731, "step": 24945 }, { "epoch": 0.43, "learning_rate": 0.0003683570295133004, "loss": 3.4501, "step": 24950 }, { "epoch": 0.43, "learning_rate": 0.0003682769259021479, "loss": 3.3245, "step": 24955 }, { "epoch": 0.43, "learning_rate": 0.00036819681715692063, "loss": 3.4224, "step": 24960 }, { "epoch": 0.43, "learning_rate": 0.00036811670328364214, "loss": 3.3934, "step": 24965 }, { "epoch": 0.43, "learning_rate": 0.0003680365842883367, "loss": 3.3077, "step": 24970 }, { "epoch": 0.43, "learning_rate": 0.0003679564601770287, "loss": 3.3332, "step": 24975 }, { "epoch": 0.43, "learning_rate": 0.0003678763309557432, "loss": 3.3636, "step": 24980 }, { "epoch": 0.43, "learning_rate": 0.00036779619663050554, "loss": 3.3194, "step": 24985 }, { "epoch": 0.43, "learning_rate": 0.00036771605720734127, "loss": 3.487, "step": 24990 }, { "epoch": 0.43, "learning_rate": 0.00036763591269227663, "loss": 3.3416, "step": 24995 }, { "epoch": 0.43, "learning_rate": 0.0003675557630913379, "loss": 3.3194, "step": 25000 }, { "epoch": 0.43, "eval_loss": 3.4098989963531494, "eval_runtime": 149.8725, "eval_samples_per_second": 12.284, "eval_steps_per_second": 0.774, "step": 25000 }, { "epoch": 0.43, "learning_rate": 0.00036747560841055206, "loss": 3.4664, "step": 25005 }, { "epoch": 0.43, "learning_rate": 0.00036739544865594623, "loss": 3.3433, "step": 25010 }, { "epoch": 0.43, "learning_rate": 0.00036731528383354803, "loss": 3.3198, "step": 25015 }, { "epoch": 0.43, "learning_rate": 0.0003672351139493855, "loss": 3.3991, "step": 25020 }, { "epoch": 0.43, "learning_rate": 0.00036715493900948704, "loss": 3.319, "step": 25025 }, { "epoch": 0.43, "learning_rate": 0.00036707475901988124, "loss": 3.3713, "step": 25030 }, { "epoch": 0.43, "learning_rate": 0.0003669945739865974, "loss": 3.34, "step": 25035 }, { "epoch": 0.43, "learning_rate": 0.0003669143839156648, "loss": 3.4226, "step": 25040 }, { "epoch": 0.43, "learning_rate": 0.00036683418881311355, "loss": 3.3767, "step": 25045 }, { "epoch": 0.43, "learning_rate": 0.00036675398868497394, "loss": 3.4798, "step": 25050 }, { "epoch": 0.43, "learning_rate": 0.0003666737835372763, "loss": 3.3018, "step": 25055 }, { "epoch": 0.43, "learning_rate": 0.0003665935733760519, "loss": 3.4508, "step": 25060 }, { "epoch": 0.43, "learning_rate": 0.00036651335820733213, "loss": 3.351, "step": 25065 }, { "epoch": 0.43, "learning_rate": 0.0003664331380371486, "loss": 3.4597, "step": 25070 }, { "epoch": 0.43, "learning_rate": 0.00036635291287153365, "loss": 3.3536, "step": 25075 }, { "epoch": 0.43, "learning_rate": 0.00036627268271651955, "loss": 3.4136, "step": 25080 }, { "epoch": 0.43, "learning_rate": 0.00036619244757813953, "loss": 3.24, "step": 25085 }, { "epoch": 0.43, "learning_rate": 0.0003661122074624266, "loss": 3.3397, "step": 25090 }, { "epoch": 0.43, "learning_rate": 0.00036603196237541445, "loss": 3.3929, "step": 25095 }, { "epoch": 0.43, "learning_rate": 0.00036595171232313715, "loss": 3.3473, "step": 25100 }, { "epoch": 0.43, "learning_rate": 0.0003658714573116292, "loss": 3.3839, "step": 25105 }, { "epoch": 0.43, "learning_rate": 0.0003657911973469252, "loss": 3.3912, "step": 25110 }, { "epoch": 0.43, "learning_rate": 0.0003657109324350602, "loss": 3.2797, "step": 25115 }, { "epoch": 0.43, "learning_rate": 0.0003656306625820699, "loss": 3.3835, "step": 25120 }, { "epoch": 0.43, "learning_rate": 0.0003655503877939902, "loss": 3.3576, "step": 25125 }, { "epoch": 0.43, "learning_rate": 0.00036547010807685726, "loss": 3.4066, "step": 25130 }, { "epoch": 0.43, "learning_rate": 0.0003653898234367078, "loss": 3.4692, "step": 25135 }, { "epoch": 0.44, "learning_rate": 0.0003653095338795786, "loss": 3.3662, "step": 25140 }, { "epoch": 0.44, "learning_rate": 0.0003652292394115072, "loss": 3.2443, "step": 25145 }, { "epoch": 0.44, "learning_rate": 0.00036514894003853126, "loss": 3.4306, "step": 25150 }, { "epoch": 0.44, "learning_rate": 0.00036506863576668894, "loss": 3.3949, "step": 25155 }, { "epoch": 0.44, "learning_rate": 0.0003649883266020187, "loss": 3.3633, "step": 25160 }, { "epoch": 0.44, "learning_rate": 0.0003649080125505593, "loss": 3.4211, "step": 25165 }, { "epoch": 0.44, "learning_rate": 0.00036482769361835, "loss": 3.3037, "step": 25170 }, { "epoch": 0.44, "learning_rate": 0.0003647473698114305, "loss": 3.2987, "step": 25175 }, { "epoch": 0.44, "learning_rate": 0.0003646670411358405, "loss": 3.32, "step": 25180 }, { "epoch": 0.44, "learning_rate": 0.00036458670759762044, "loss": 3.3066, "step": 25185 }, { "epoch": 0.44, "learning_rate": 0.000364506369202811, "loss": 3.446, "step": 25190 }, { "epoch": 0.44, "learning_rate": 0.00036442602595745314, "loss": 3.4656, "step": 25195 }, { "epoch": 0.44, "learning_rate": 0.00036434567786758836, "loss": 3.3859, "step": 25200 }, { "epoch": 0.44, "learning_rate": 0.0003642653249392584, "loss": 3.363, "step": 25205 }, { "epoch": 0.44, "learning_rate": 0.0003641849671785053, "loss": 3.3929, "step": 25210 }, { "epoch": 0.44, "learning_rate": 0.0003641046045913716, "loss": 3.3327, "step": 25215 }, { "epoch": 0.44, "learning_rate": 0.0003640242371839002, "loss": 3.4705, "step": 25220 }, { "epoch": 0.44, "learning_rate": 0.00036394386496213423, "loss": 3.4551, "step": 25225 }, { "epoch": 0.44, "learning_rate": 0.00036386348793211737, "loss": 3.4754, "step": 25230 }, { "epoch": 0.44, "learning_rate": 0.0003637831060998935, "loss": 3.341, "step": 25235 }, { "epoch": 0.44, "learning_rate": 0.00036370271947150693, "loss": 3.4458, "step": 25240 }, { "epoch": 0.44, "learning_rate": 0.0003636223280530024, "loss": 3.3916, "step": 25245 }, { "epoch": 0.44, "learning_rate": 0.0003635419318504248, "loss": 3.3774, "step": 25250 }, { "epoch": 0.44, "learning_rate": 0.00036346153086981955, "loss": 3.3418, "step": 25255 }, { "epoch": 0.44, "learning_rate": 0.00036338112511723235, "loss": 3.2995, "step": 25260 }, { "epoch": 0.44, "learning_rate": 0.00036330071459870946, "loss": 3.318, "step": 25265 }, { "epoch": 0.44, "learning_rate": 0.00036322029932029726, "loss": 3.3669, "step": 25270 }, { "epoch": 0.44, "learning_rate": 0.00036313987928804256, "loss": 3.4883, "step": 25275 }, { "epoch": 0.44, "learning_rate": 0.00036305945450799243, "loss": 3.3465, "step": 25280 }, { "epoch": 0.44, "learning_rate": 0.0003629790249861946, "loss": 3.3789, "step": 25285 }, { "epoch": 0.44, "learning_rate": 0.00036289859072869687, "loss": 3.408, "step": 25290 }, { "epoch": 0.44, "learning_rate": 0.0003628181517415473, "loss": 3.3993, "step": 25295 }, { "epoch": 0.44, "learning_rate": 0.0003627377080307947, "loss": 3.323, "step": 25300 }, { "epoch": 0.44, "learning_rate": 0.00036265725960248806, "loss": 3.3462, "step": 25305 }, { "epoch": 0.44, "learning_rate": 0.0003625768064626766, "loss": 3.3265, "step": 25310 }, { "epoch": 0.44, "learning_rate": 0.0003624963486174101, "loss": 3.2759, "step": 25315 }, { "epoch": 0.44, "learning_rate": 0.00036241588607273823, "loss": 3.4092, "step": 25320 }, { "epoch": 0.44, "learning_rate": 0.0003623354188347118, "loss": 3.2942, "step": 25325 }, { "epoch": 0.44, "learning_rate": 0.00036225494690938126, "loss": 3.3437, "step": 25330 }, { "epoch": 0.44, "learning_rate": 0.00036217447030279773, "loss": 3.3779, "step": 25335 }, { "epoch": 0.44, "learning_rate": 0.0003620939890210128, "loss": 3.3958, "step": 25340 }, { "epoch": 0.44, "learning_rate": 0.00036201350307007795, "loss": 3.438, "step": 25345 }, { "epoch": 0.44, "learning_rate": 0.0003619330124560456, "loss": 3.4422, "step": 25350 }, { "epoch": 0.44, "learning_rate": 0.0003618525171849681, "loss": 3.3307, "step": 25355 }, { "epoch": 0.44, "learning_rate": 0.0003617720172628982, "loss": 3.4132, "step": 25360 }, { "epoch": 0.44, "learning_rate": 0.0003616915126958893, "loss": 3.3374, "step": 25365 }, { "epoch": 0.44, "learning_rate": 0.0003616110034899948, "loss": 3.3578, "step": 25370 }, { "epoch": 0.44, "learning_rate": 0.0003615304896512685, "loss": 3.4068, "step": 25375 }, { "epoch": 0.44, "learning_rate": 0.0003614499711857648, "loss": 3.2033, "step": 25380 }, { "epoch": 0.44, "learning_rate": 0.00036136944809953816, "loss": 3.3313, "step": 25385 }, { "epoch": 0.44, "learning_rate": 0.0003612889203986436, "loss": 3.2963, "step": 25390 }, { "epoch": 0.44, "learning_rate": 0.00036120838808913625, "loss": 3.3687, "step": 25395 }, { "epoch": 0.44, "learning_rate": 0.0003611278511770718, "loss": 3.3163, "step": 25400 }, { "epoch": 0.44, "learning_rate": 0.00036104730966850625, "loss": 3.4108, "step": 25405 }, { "epoch": 0.44, "learning_rate": 0.0003609667635694959, "loss": 3.4129, "step": 25410 }, { "epoch": 0.44, "learning_rate": 0.0003608862128860973, "loss": 3.3316, "step": 25415 }, { "epoch": 0.44, "learning_rate": 0.00036080565762436757, "loss": 3.2811, "step": 25420 }, { "epoch": 0.44, "learning_rate": 0.00036072509779036397, "loss": 3.4021, "step": 25425 }, { "epoch": 0.44, "learning_rate": 0.00036064453339014436, "loss": 3.4404, "step": 25430 }, { "epoch": 0.44, "learning_rate": 0.00036056396442976657, "loss": 3.4088, "step": 25435 }, { "epoch": 0.44, "learning_rate": 0.00036048339091528895, "loss": 3.3295, "step": 25440 }, { "epoch": 0.44, "learning_rate": 0.0003604028128527704, "loss": 3.4301, "step": 25445 }, { "epoch": 0.44, "learning_rate": 0.00036032223024826983, "loss": 3.3048, "step": 25450 }, { "epoch": 0.44, "learning_rate": 0.0003602416431078468, "loss": 3.2633, "step": 25455 }, { "epoch": 0.44, "learning_rate": 0.0003601610514375608, "loss": 3.3771, "step": 25460 }, { "epoch": 0.44, "learning_rate": 0.00036008045524347213, "loss": 3.3616, "step": 25465 }, { "epoch": 0.44, "learning_rate": 0.00035999985453164115, "loss": 3.3544, "step": 25470 }, { "epoch": 0.44, "learning_rate": 0.00035991924930812855, "loss": 3.3366, "step": 25475 }, { "epoch": 0.44, "learning_rate": 0.0003598386395789955, "loss": 3.405, "step": 25480 }, { "epoch": 0.44, "learning_rate": 0.0003597580253503034, "loss": 3.3767, "step": 25485 }, { "epoch": 0.44, "learning_rate": 0.0003596774066281141, "loss": 3.2993, "step": 25490 }, { "epoch": 0.44, "learning_rate": 0.00035959678341848964, "loss": 3.4348, "step": 25495 }, { "epoch": 0.44, "learning_rate": 0.00035951615572749247, "loss": 3.2641, "step": 25500 }, { "epoch": 0.44, "eval_loss": 3.403398036956787, "eval_runtime": 149.6768, "eval_samples_per_second": 12.3, "eval_steps_per_second": 0.775, "step": 25500 }, { "epoch": 0.44, "learning_rate": 0.00035943552356118547, "loss": 3.3219, "step": 25505 }, { "epoch": 0.44, "learning_rate": 0.00035935488692563167, "loss": 3.3383, "step": 25510 }, { "epoch": 0.44, "learning_rate": 0.0003592742458268945, "loss": 3.3004, "step": 25515 }, { "epoch": 0.44, "learning_rate": 0.00035919360027103785, "loss": 3.2707, "step": 25520 }, { "epoch": 0.44, "learning_rate": 0.0003591129502641259, "loss": 3.3976, "step": 25525 }, { "epoch": 0.44, "learning_rate": 0.00035903229581222293, "loss": 3.3484, "step": 25530 }, { "epoch": 0.44, "learning_rate": 0.000358951636921394, "loss": 3.3598, "step": 25535 }, { "epoch": 0.44, "learning_rate": 0.00035887097359770393, "loss": 3.258, "step": 25540 }, { "epoch": 0.44, "learning_rate": 0.0003587903058472184, "loss": 3.3335, "step": 25545 }, { "epoch": 0.44, "learning_rate": 0.00035870963367600324, "loss": 3.3303, "step": 25550 }, { "epoch": 0.44, "learning_rate": 0.00035862895709012443, "loss": 3.4024, "step": 25555 }, { "epoch": 0.44, "learning_rate": 0.0003585482760956486, "loss": 3.3626, "step": 25560 }, { "epoch": 0.44, "learning_rate": 0.0003584675906986423, "loss": 3.3502, "step": 25565 }, { "epoch": 0.44, "learning_rate": 0.00035838690090517303, "loss": 3.4236, "step": 25570 }, { "epoch": 0.44, "learning_rate": 0.0003583062067213079, "loss": 3.3696, "step": 25575 }, { "epoch": 0.44, "learning_rate": 0.00035822550815311485, "loss": 3.3772, "step": 25580 }, { "epoch": 0.44, "learning_rate": 0.000358144805206662, "loss": 3.445, "step": 25585 }, { "epoch": 0.44, "learning_rate": 0.00035806409788801785, "loss": 3.4372, "step": 25590 }, { "epoch": 0.44, "learning_rate": 0.0003579833862032511, "loss": 3.4523, "step": 25595 }, { "epoch": 0.44, "learning_rate": 0.00035790267015843076, "loss": 3.3395, "step": 25600 }, { "epoch": 0.44, "learning_rate": 0.0003578219497596264, "loss": 3.3512, "step": 25605 }, { "epoch": 0.44, "learning_rate": 0.0003577412250129079, "loss": 3.3397, "step": 25610 }, { "epoch": 0.44, "learning_rate": 0.0003576604959243451, "loss": 3.3981, "step": 25615 }, { "epoch": 0.44, "learning_rate": 0.00035757976250000846, "loss": 3.3503, "step": 25620 }, { "epoch": 0.44, "learning_rate": 0.0003574990247459688, "loss": 3.3889, "step": 25625 }, { "epoch": 0.44, "learning_rate": 0.0003574182826682972, "loss": 3.3094, "step": 25630 }, { "epoch": 0.44, "learning_rate": 0.00035733753627306496, "loss": 3.3504, "step": 25635 }, { "epoch": 0.44, "learning_rate": 0.0003572567855663439, "loss": 3.3083, "step": 25640 }, { "epoch": 0.44, "learning_rate": 0.0003571760305542059, "loss": 3.4901, "step": 25645 }, { "epoch": 0.44, "learning_rate": 0.0003570952712427235, "loss": 3.3134, "step": 25650 }, { "epoch": 0.44, "learning_rate": 0.0003570145076379693, "loss": 3.4654, "step": 25655 }, { "epoch": 0.44, "learning_rate": 0.0003569337397460164, "loss": 3.347, "step": 25660 }, { "epoch": 0.44, "learning_rate": 0.0003568529675729379, "loss": 3.4081, "step": 25665 }, { "epoch": 0.44, "learning_rate": 0.00035677219112480765, "loss": 3.381, "step": 25670 }, { "epoch": 0.44, "learning_rate": 0.0003566914104076996, "loss": 3.2434, "step": 25675 }, { "epoch": 0.44, "learning_rate": 0.00035661062542768804, "loss": 3.4147, "step": 25680 }, { "epoch": 0.44, "learning_rate": 0.00035652983619084754, "loss": 3.3619, "step": 25685 }, { "epoch": 0.44, "learning_rate": 0.00035644904270325315, "loss": 3.3556, "step": 25690 }, { "epoch": 0.44, "learning_rate": 0.00035636824497098, "loss": 3.362, "step": 25695 }, { "epoch": 0.44, "learning_rate": 0.00035628744300010367, "loss": 3.3565, "step": 25700 }, { "epoch": 0.44, "learning_rate": 0.00035620663679670014, "loss": 3.3171, "step": 25705 }, { "epoch": 0.44, "learning_rate": 0.0003561258263668456, "loss": 3.3843, "step": 25710 }, { "epoch": 0.45, "learning_rate": 0.0003560450117166165, "loss": 3.4695, "step": 25715 }, { "epoch": 0.45, "learning_rate": 0.0003559641928520897, "loss": 3.432, "step": 25720 }, { "epoch": 0.45, "learning_rate": 0.0003558833697793425, "loss": 3.4059, "step": 25725 }, { "epoch": 0.45, "learning_rate": 0.00035580254250445234, "loss": 3.3132, "step": 25730 }, { "epoch": 0.45, "learning_rate": 0.0003557217110334969, "loss": 3.4028, "step": 25735 }, { "epoch": 0.45, "learning_rate": 0.0003556408753725543, "loss": 3.3004, "step": 25740 }, { "epoch": 0.45, "learning_rate": 0.000355560035527703, "loss": 3.3349, "step": 25745 }, { "epoch": 0.45, "learning_rate": 0.000355479191505022, "loss": 3.3751, "step": 25750 }, { "epoch": 0.45, "learning_rate": 0.00035539834331058996, "loss": 3.3602, "step": 25755 }, { "epoch": 0.45, "learning_rate": 0.00035531749095048633, "loss": 3.3683, "step": 25760 }, { "epoch": 0.45, "learning_rate": 0.00035523663443079104, "loss": 3.4222, "step": 25765 }, { "epoch": 0.45, "learning_rate": 0.00035515577375758384, "loss": 3.3746, "step": 25770 }, { "epoch": 0.45, "learning_rate": 0.00035507490893694517, "loss": 3.3384, "step": 25775 }, { "epoch": 0.45, "learning_rate": 0.00035499403997495555, "loss": 3.3536, "step": 25780 }, { "epoch": 0.45, "learning_rate": 0.00035491316687769596, "loss": 3.3968, "step": 25785 }, { "epoch": 0.45, "learning_rate": 0.0003548322896512477, "loss": 3.3728, "step": 25790 }, { "epoch": 0.45, "learning_rate": 0.00035475140830169225, "loss": 3.2715, "step": 25795 }, { "epoch": 0.45, "learning_rate": 0.0003546705228351115, "loss": 3.4149, "step": 25800 }, { "epoch": 0.45, "learning_rate": 0.0003545896332575876, "loss": 3.4619, "step": 25805 }, { "epoch": 0.45, "learning_rate": 0.000354508739575203, "loss": 3.3945, "step": 25810 }, { "epoch": 0.45, "learning_rate": 0.0003544278417940406, "loss": 3.204, "step": 25815 }, { "epoch": 0.45, "learning_rate": 0.0003543469399201834, "loss": 3.3092, "step": 25820 }, { "epoch": 0.45, "learning_rate": 0.0003542660339597149, "loss": 3.3261, "step": 25825 }, { "epoch": 0.45, "learning_rate": 0.0003541851239187188, "loss": 3.2748, "step": 25830 }, { "epoch": 0.45, "learning_rate": 0.0003541042098032789, "loss": 3.2542, "step": 25835 }, { "epoch": 0.45, "learning_rate": 0.0003540232916194798, "loss": 3.3005, "step": 25840 }, { "epoch": 0.45, "learning_rate": 0.00035394236937340604, "loss": 3.2868, "step": 25845 }, { "epoch": 0.45, "learning_rate": 0.00035386144307114257, "loss": 3.3108, "step": 25850 }, { "epoch": 0.45, "learning_rate": 0.0003537805127187746, "loss": 3.3181, "step": 25855 }, { "epoch": 0.45, "learning_rate": 0.0003536995783223876, "loss": 3.4812, "step": 25860 }, { "epoch": 0.45, "learning_rate": 0.00035361863988806765, "loss": 3.369, "step": 25865 }, { "epoch": 0.45, "learning_rate": 0.00035353769742190073, "loss": 3.3039, "step": 25870 }, { "epoch": 0.45, "learning_rate": 0.0003534567509299733, "loss": 3.3852, "step": 25875 }, { "epoch": 0.45, "learning_rate": 0.0003533758004183722, "loss": 3.4023, "step": 25880 }, { "epoch": 0.45, "learning_rate": 0.0003532948458931844, "loss": 3.4037, "step": 25885 }, { "epoch": 0.45, "learning_rate": 0.0003532138873604974, "loss": 3.416, "step": 25890 }, { "epoch": 0.45, "learning_rate": 0.00035313292482639876, "loss": 3.3859, "step": 25895 }, { "epoch": 0.45, "learning_rate": 0.0003530519582969765, "loss": 3.2529, "step": 25900 }, { "epoch": 0.45, "learning_rate": 0.00035297098777831886, "loss": 3.3192, "step": 25905 }, { "epoch": 0.45, "learning_rate": 0.00035289001327651445, "loss": 3.3498, "step": 25910 }, { "epoch": 0.45, "learning_rate": 0.00035280903479765205, "loss": 3.4666, "step": 25915 }, { "epoch": 0.45, "learning_rate": 0.00035272805234782075, "loss": 3.3699, "step": 25920 }, { "epoch": 0.45, "learning_rate": 0.00035264706593311025, "loss": 3.2185, "step": 25925 }, { "epoch": 0.45, "learning_rate": 0.00035256607555961023, "loss": 3.4041, "step": 25930 }, { "epoch": 0.45, "learning_rate": 0.00035248508123341066, "loss": 3.2361, "step": 25935 }, { "epoch": 0.45, "learning_rate": 0.00035240408296060205, "loss": 3.289, "step": 25940 }, { "epoch": 0.45, "learning_rate": 0.0003523230807472749, "loss": 3.4608, "step": 25945 }, { "epoch": 0.45, "learning_rate": 0.00035224207459952025, "loss": 3.3191, "step": 25950 }, { "epoch": 0.45, "learning_rate": 0.0003521610645234294, "loss": 3.4509, "step": 25955 }, { "epoch": 0.45, "learning_rate": 0.00035208005052509366, "loss": 3.3884, "step": 25960 }, { "epoch": 0.45, "learning_rate": 0.00035199903261060516, "loss": 3.2898, "step": 25965 }, { "epoch": 0.45, "learning_rate": 0.00035191801078605586, "loss": 3.3371, "step": 25970 }, { "epoch": 0.45, "learning_rate": 0.00035183698505753815, "loss": 3.3688, "step": 25975 }, { "epoch": 0.45, "learning_rate": 0.000351755955431145, "loss": 3.3285, "step": 25980 }, { "epoch": 0.45, "learning_rate": 0.00035167492191296904, "loss": 3.3297, "step": 25985 }, { "epoch": 0.45, "learning_rate": 0.0003515938845091039, "loss": 3.3815, "step": 25990 }, { "epoch": 0.45, "learning_rate": 0.00035151284322564307, "loss": 3.3746, "step": 25995 }, { "epoch": 0.45, "learning_rate": 0.0003514317980686803, "loss": 3.3711, "step": 26000 }, { "epoch": 0.45, "eval_loss": 3.395720958709717, "eval_runtime": 149.6727, "eval_samples_per_second": 12.3, "eval_steps_per_second": 0.775, "step": 26000 }, { "epoch": 0.45, "learning_rate": 0.0003513507490443101, "loss": 3.3657, "step": 26005 }, { "epoch": 0.45, "learning_rate": 0.0003512696961586266, "loss": 3.3346, "step": 26010 }, { "epoch": 0.45, "learning_rate": 0.00035118863941772477, "loss": 3.4844, "step": 26015 }, { "epoch": 0.45, "learning_rate": 0.00035110757882769955, "loss": 3.3899, "step": 26020 }, { "epoch": 0.45, "learning_rate": 0.0003510265143946463, "loss": 3.399, "step": 26025 }, { "epoch": 0.45, "learning_rate": 0.00035094544612466073, "loss": 3.2881, "step": 26030 }, { "epoch": 0.45, "learning_rate": 0.0003508643740238387, "loss": 3.4151, "step": 26035 }, { "epoch": 0.45, "learning_rate": 0.00035078329809827633, "loss": 3.2765, "step": 26040 }, { "epoch": 0.45, "learning_rate": 0.0003507022183540703, "loss": 3.4003, "step": 26045 }, { "epoch": 0.45, "learning_rate": 0.0003506211347973173, "loss": 3.4405, "step": 26050 }, { "epoch": 0.45, "learning_rate": 0.00035054004743411435, "loss": 3.3197, "step": 26055 }, { "epoch": 0.45, "learning_rate": 0.0003504589562705589, "loss": 3.27, "step": 26060 }, { "epoch": 0.45, "learning_rate": 0.00035037786131274836, "loss": 3.335, "step": 26065 }, { "epoch": 0.45, "learning_rate": 0.00035029676256678104, "loss": 3.2878, "step": 26070 }, { "epoch": 0.45, "learning_rate": 0.0003502156600387549, "loss": 3.3474, "step": 26075 }, { "epoch": 0.45, "learning_rate": 0.0003501345537347684, "loss": 3.4218, "step": 26080 }, { "epoch": 0.45, "learning_rate": 0.00035005344366092047, "loss": 3.4272, "step": 26085 }, { "epoch": 0.45, "learning_rate": 0.0003499723298233101, "loss": 3.3154, "step": 26090 }, { "epoch": 0.45, "learning_rate": 0.00034989121222803665, "loss": 3.3007, "step": 26095 }, { "epoch": 0.45, "learning_rate": 0.0003498100908811996, "loss": 3.5048, "step": 26100 }, { "epoch": 0.45, "learning_rate": 0.000349728965788899, "loss": 3.411, "step": 26105 }, { "epoch": 0.45, "learning_rate": 0.00034964783695723515, "loss": 3.3317, "step": 26110 }, { "epoch": 0.45, "learning_rate": 0.00034956670439230836, "loss": 3.424, "step": 26115 }, { "epoch": 0.45, "learning_rate": 0.0003494855681002194, "loss": 3.3243, "step": 26120 }, { "epoch": 0.45, "learning_rate": 0.00034940442808706926, "loss": 3.4425, "step": 26125 }, { "epoch": 0.45, "learning_rate": 0.00034932328435895946, "loss": 3.413, "step": 26130 }, { "epoch": 0.45, "learning_rate": 0.0003492421369219914, "loss": 3.4129, "step": 26135 }, { "epoch": 0.45, "learning_rate": 0.00034916098578226696, "loss": 3.4568, "step": 26140 }, { "epoch": 0.45, "learning_rate": 0.00034907983094588846, "loss": 3.2578, "step": 26145 }, { "epoch": 0.45, "learning_rate": 0.0003489986724189581, "loss": 3.4228, "step": 26150 }, { "epoch": 0.45, "learning_rate": 0.0003489175102075787, "loss": 3.2936, "step": 26155 }, { "epoch": 0.45, "learning_rate": 0.00034883634431785325, "loss": 3.4025, "step": 26160 }, { "epoch": 0.45, "learning_rate": 0.0003487551747558849, "loss": 3.308, "step": 26165 }, { "epoch": 0.45, "learning_rate": 0.0003486740015277774, "loss": 3.351, "step": 26170 }, { "epoch": 0.45, "learning_rate": 0.0003485928246396344, "loss": 3.2869, "step": 26175 }, { "epoch": 0.45, "learning_rate": 0.00034851164409756, "loss": 3.3198, "step": 26180 }, { "epoch": 0.45, "learning_rate": 0.0003484304599076586, "loss": 3.3552, "step": 26185 }, { "epoch": 0.45, "learning_rate": 0.00034834927207603474, "loss": 3.4593, "step": 26190 }, { "epoch": 0.45, "learning_rate": 0.00034826808060879356, "loss": 3.3524, "step": 26195 }, { "epoch": 0.45, "learning_rate": 0.00034818688551204, "loss": 3.2367, "step": 26200 }, { "epoch": 0.45, "learning_rate": 0.0003481056867918795, "loss": 3.2815, "step": 26205 }, { "epoch": 0.45, "learning_rate": 0.0003480244844544181, "loss": 3.4033, "step": 26210 }, { "epoch": 0.45, "learning_rate": 0.0003479432785057615, "loss": 3.3144, "step": 26215 }, { "epoch": 0.45, "learning_rate": 0.000347862068952016, "loss": 3.3572, "step": 26220 }, { "epoch": 0.45, "learning_rate": 0.00034778085579928827, "loss": 3.3663, "step": 26225 }, { "epoch": 0.45, "learning_rate": 0.00034769963905368495, "loss": 3.3651, "step": 26230 }, { "epoch": 0.45, "learning_rate": 0.0003476184187213135, "loss": 3.3908, "step": 26235 }, { "epoch": 0.45, "learning_rate": 0.00034753719480828076, "loss": 3.3412, "step": 26240 }, { "epoch": 0.45, "learning_rate": 0.00034745596732069456, "loss": 3.1838, "step": 26245 }, { "epoch": 0.45, "learning_rate": 0.00034737473626466296, "loss": 3.3703, "step": 26250 }, { "epoch": 0.45, "learning_rate": 0.00034729350164629393, "loss": 3.4167, "step": 26255 }, { "epoch": 0.45, "learning_rate": 0.000347212263471696, "loss": 3.445, "step": 26260 }, { "epoch": 0.45, "learning_rate": 0.00034713102174697773, "loss": 3.3665, "step": 26265 }, { "epoch": 0.45, "learning_rate": 0.0003470497764782483, "loss": 3.4791, "step": 26270 }, { "epoch": 0.45, "learning_rate": 0.0003469685276716167, "loss": 3.327, "step": 26275 }, { "epoch": 0.45, "learning_rate": 0.00034688727533319254, "loss": 3.479, "step": 26280 }, { "epoch": 0.45, "learning_rate": 0.0003468060194690856, "loss": 3.2287, "step": 26285 }, { "epoch": 0.45, "learning_rate": 0.0003467247600854059, "loss": 3.3918, "step": 26290 }, { "epoch": 0.46, "learning_rate": 0.0003466434971882636, "loss": 3.511, "step": 26295 }, { "epoch": 0.46, "learning_rate": 0.0003465622307837695, "loss": 3.3726, "step": 26300 }, { "epoch": 0.46, "learning_rate": 0.0003464809608780341, "loss": 3.2298, "step": 26305 }, { "epoch": 0.46, "learning_rate": 0.0003463996874771688, "loss": 3.3942, "step": 26310 }, { "epoch": 0.46, "learning_rate": 0.00034631841058728474, "loss": 3.4149, "step": 26315 }, { "epoch": 0.46, "learning_rate": 0.0003462371302144935, "loss": 3.3502, "step": 26320 }, { "epoch": 0.46, "learning_rate": 0.00034615584636490715, "loss": 3.2583, "step": 26325 }, { "epoch": 0.46, "learning_rate": 0.00034607455904463766, "loss": 3.357, "step": 26330 }, { "epoch": 0.46, "learning_rate": 0.0003459932682597974, "loss": 3.3674, "step": 26335 }, { "epoch": 0.46, "learning_rate": 0.00034591197401649914, "loss": 3.4351, "step": 26340 }, { "epoch": 0.46, "learning_rate": 0.0003458306763208557, "loss": 3.3208, "step": 26345 }, { "epoch": 0.46, "learning_rate": 0.00034574937517898034, "loss": 3.3437, "step": 26350 }, { "epoch": 0.46, "learning_rate": 0.00034566807059698647, "loss": 3.4414, "step": 26355 }, { "epoch": 0.46, "learning_rate": 0.0003455867625809876, "loss": 3.3091, "step": 26360 }, { "epoch": 0.46, "learning_rate": 0.00034550545113709786, "loss": 3.3744, "step": 26365 }, { "epoch": 0.46, "learning_rate": 0.0003454241362714314, "loss": 3.354, "step": 26370 }, { "epoch": 0.46, "learning_rate": 0.0003453428179901028, "loss": 3.3764, "step": 26375 }, { "epoch": 0.46, "learning_rate": 0.0003452614962992265, "loss": 3.345, "step": 26380 }, { "epoch": 0.46, "learning_rate": 0.00034518017120491765, "loss": 3.3632, "step": 26385 }, { "epoch": 0.46, "learning_rate": 0.0003450988427132915, "loss": 3.3498, "step": 26390 }, { "epoch": 0.46, "learning_rate": 0.0003450175108304636, "loss": 3.3696, "step": 26395 }, { "epoch": 0.46, "learning_rate": 0.00034493617556254953, "loss": 3.3475, "step": 26400 }, { "epoch": 0.46, "learning_rate": 0.00034485483691566537, "loss": 3.3516, "step": 26405 }, { "epoch": 0.46, "learning_rate": 0.0003447734948959273, "loss": 3.3455, "step": 26410 }, { "epoch": 0.46, "learning_rate": 0.0003446921495094519, "loss": 3.3736, "step": 26415 }, { "epoch": 0.46, "learning_rate": 0.00034461080076235597, "loss": 3.4373, "step": 26420 }, { "epoch": 0.46, "learning_rate": 0.0003445294486607564, "loss": 3.3134, "step": 26425 }, { "epoch": 0.46, "learning_rate": 0.00034444809321077045, "loss": 3.3514, "step": 26430 }, { "epoch": 0.46, "learning_rate": 0.0003443667344185157, "loss": 3.4766, "step": 26435 }, { "epoch": 0.46, "learning_rate": 0.00034428537229011, "loss": 3.3528, "step": 26440 }, { "epoch": 0.46, "learning_rate": 0.0003442040068316711, "loss": 3.3033, "step": 26445 }, { "epoch": 0.46, "learning_rate": 0.0003441226380493174, "loss": 3.4208, "step": 26450 }, { "epoch": 0.46, "learning_rate": 0.0003440412659491677, "loss": 3.3896, "step": 26455 }, { "epoch": 0.46, "learning_rate": 0.00034395989053734034, "loss": 3.3191, "step": 26460 }, { "epoch": 0.46, "learning_rate": 0.0003438785118199545, "loss": 3.3925, "step": 26465 }, { "epoch": 0.46, "learning_rate": 0.0003437971298031295, "loss": 3.3533, "step": 26470 }, { "epoch": 0.46, "learning_rate": 0.00034371574449298476, "loss": 3.3886, "step": 26475 }, { "epoch": 0.46, "learning_rate": 0.0003436343558956401, "loss": 3.3418, "step": 26480 }, { "epoch": 0.46, "learning_rate": 0.00034355296401721545, "loss": 3.4094, "step": 26485 }, { "epoch": 0.46, "learning_rate": 0.0003434715688638312, "loss": 3.4429, "step": 26490 }, { "epoch": 0.46, "learning_rate": 0.00034339017044160766, "loss": 3.3538, "step": 26495 }, { "epoch": 0.46, "learning_rate": 0.0003433087687566657, "loss": 3.3811, "step": 26500 }, { "epoch": 0.46, "eval_loss": 3.386625289916992, "eval_runtime": 149.9818, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 26500 }, { "epoch": 0.46, "learning_rate": 0.0003432273638151263, "loss": 3.4065, "step": 26505 }, { "epoch": 0.46, "learning_rate": 0.00034314595562311064, "loss": 3.3432, "step": 26510 }, { "epoch": 0.46, "learning_rate": 0.0003430645441867404, "loss": 3.4248, "step": 26515 }, { "epoch": 0.46, "learning_rate": 0.00034298312951213696, "loss": 3.3458, "step": 26520 }, { "epoch": 0.46, "learning_rate": 0.00034290171160542243, "loss": 3.4478, "step": 26525 }, { "epoch": 0.46, "learning_rate": 0.0003428202904727192, "loss": 3.3292, "step": 26530 }, { "epoch": 0.46, "learning_rate": 0.0003427388661201495, "loss": 3.4159, "step": 26535 }, { "epoch": 0.46, "learning_rate": 0.00034265743855383616, "loss": 3.3999, "step": 26540 }, { "epoch": 0.46, "learning_rate": 0.000342576007779902, "loss": 3.4405, "step": 26545 }, { "epoch": 0.46, "learning_rate": 0.0003424945738044702, "loss": 3.2289, "step": 26550 }, { "epoch": 0.46, "learning_rate": 0.00034241313663366436, "loss": 3.3146, "step": 26555 }, { "epoch": 0.46, "learning_rate": 0.00034233169627360797, "loss": 3.2682, "step": 26560 }, { "epoch": 0.46, "learning_rate": 0.0003422502527304249, "loss": 3.401, "step": 26565 }, { "epoch": 0.46, "learning_rate": 0.0003421688060102394, "loss": 3.4786, "step": 26570 }, { "epoch": 0.46, "learning_rate": 0.0003420873561191758, "loss": 3.3598, "step": 26575 }, { "epoch": 0.46, "learning_rate": 0.00034200590306335873, "loss": 3.3288, "step": 26580 }, { "epoch": 0.46, "learning_rate": 0.000341924446848913, "loss": 3.3177, "step": 26585 }, { "epoch": 0.46, "learning_rate": 0.0003418429874819637, "loss": 3.3151, "step": 26590 }, { "epoch": 0.46, "learning_rate": 0.0003417615249686363, "loss": 3.3541, "step": 26595 }, { "epoch": 0.46, "learning_rate": 0.0003416800593150561, "loss": 3.3449, "step": 26600 }, { "epoch": 0.46, "learning_rate": 0.00034159859052734917, "loss": 3.3314, "step": 26605 }, { "epoch": 0.46, "learning_rate": 0.00034151711861164135, "loss": 3.3612, "step": 26610 }, { "epoch": 0.46, "learning_rate": 0.000341435643574059, "loss": 3.3928, "step": 26615 }, { "epoch": 0.46, "learning_rate": 0.00034135416542072864, "loss": 3.3273, "step": 26620 }, { "epoch": 0.46, "learning_rate": 0.00034127268415777687, "loss": 3.3225, "step": 26625 }, { "epoch": 0.46, "learning_rate": 0.00034119119979133085, "loss": 3.3973, "step": 26630 }, { "epoch": 0.46, "learning_rate": 0.00034110971232751775, "loss": 3.3591, "step": 26635 }, { "epoch": 0.46, "learning_rate": 0.00034102822177246487, "loss": 3.1721, "step": 26640 }, { "epoch": 0.46, "learning_rate": 0.0003409467281323, "loss": 3.1719, "step": 26645 }, { "epoch": 0.46, "learning_rate": 0.00034086523141315105, "loss": 3.3968, "step": 26650 }, { "epoch": 0.46, "learning_rate": 0.00034078373162114615, "loss": 3.3185, "step": 26655 }, { "epoch": 0.46, "learning_rate": 0.00034070222876241365, "loss": 3.3102, "step": 26660 }, { "epoch": 0.46, "learning_rate": 0.0003406207228430821, "loss": 3.4479, "step": 26665 }, { "epoch": 0.46, "learning_rate": 0.00034053921386928036, "loss": 3.4227, "step": 26670 }, { "epoch": 0.46, "learning_rate": 0.00034045770184713755, "loss": 3.4028, "step": 26675 }, { "epoch": 0.46, "learning_rate": 0.0003403761867827829, "loss": 3.4202, "step": 26680 }, { "epoch": 0.46, "learning_rate": 0.0003402946686823459, "loss": 3.3627, "step": 26685 }, { "epoch": 0.46, "learning_rate": 0.0003402131475519563, "loss": 3.4673, "step": 26690 }, { "epoch": 0.46, "learning_rate": 0.0003401316233977441, "loss": 3.4651, "step": 26695 }, { "epoch": 0.46, "learning_rate": 0.0003400500962258395, "loss": 3.2451, "step": 26700 }, { "epoch": 0.46, "learning_rate": 0.0003399685660423729, "loss": 3.3571, "step": 26705 }, { "epoch": 0.46, "learning_rate": 0.000339887032853475, "loss": 3.3063, "step": 26710 }, { "epoch": 0.46, "learning_rate": 0.0003398054966652767, "loss": 3.1844, "step": 26715 }, { "epoch": 0.46, "learning_rate": 0.00033972395748390905, "loss": 3.3216, "step": 26720 }, { "epoch": 0.46, "learning_rate": 0.00033964241531550326, "loss": 3.2759, "step": 26725 }, { "epoch": 0.46, "learning_rate": 0.00033956087016619104, "loss": 3.2445, "step": 26730 }, { "epoch": 0.46, "learning_rate": 0.0003394793220421042, "loss": 3.2716, "step": 26735 }, { "epoch": 0.46, "learning_rate": 0.0003393977709493746, "loss": 3.4037, "step": 26740 }, { "epoch": 0.46, "learning_rate": 0.00033931621689413464, "loss": 3.3809, "step": 26745 }, { "epoch": 0.46, "learning_rate": 0.0003392346598825165, "loss": 3.3758, "step": 26750 }, { "epoch": 0.46, "learning_rate": 0.00033915309992065316, "loss": 3.3075, "step": 26755 }, { "epoch": 0.46, "learning_rate": 0.00033907153701467733, "loss": 3.463, "step": 26760 }, { "epoch": 0.46, "learning_rate": 0.00033898997117072206, "loss": 3.3957, "step": 26765 }, { "epoch": 0.46, "learning_rate": 0.00033890840239492084, "loss": 3.3882, "step": 26770 }, { "epoch": 0.46, "learning_rate": 0.0003388268306934072, "loss": 3.3903, "step": 26775 }, { "epoch": 0.46, "learning_rate": 0.0003387452560723148, "loss": 3.3708, "step": 26780 }, { "epoch": 0.46, "learning_rate": 0.00033866367853777784, "loss": 3.3458, "step": 26785 }, { "epoch": 0.46, "learning_rate": 0.00033858209809593023, "loss": 3.3112, "step": 26790 }, { "epoch": 0.46, "learning_rate": 0.0003385005147529067, "loss": 3.2947, "step": 26795 }, { "epoch": 0.46, "learning_rate": 0.0003384189285148417, "loss": 3.3433, "step": 26800 }, { "epoch": 0.46, "learning_rate": 0.0003383373393878702, "loss": 3.3527, "step": 26805 }, { "epoch": 0.46, "learning_rate": 0.00033825574737812725, "loss": 3.1773, "step": 26810 }, { "epoch": 0.46, "learning_rate": 0.00033817415249174824, "loss": 3.3979, "step": 26815 }, { "epoch": 0.46, "learning_rate": 0.0003380925547348684, "loss": 3.4185, "step": 26820 }, { "epoch": 0.46, "learning_rate": 0.0003380109541136238, "loss": 3.3596, "step": 26825 }, { "epoch": 0.46, "learning_rate": 0.00033792935063415024, "loss": 3.4408, "step": 26830 }, { "epoch": 0.46, "learning_rate": 0.00033784774430258393, "loss": 3.2784, "step": 26835 }, { "epoch": 0.46, "learning_rate": 0.0003377661351250612, "loss": 3.3703, "step": 26840 }, { "epoch": 0.46, "learning_rate": 0.0003376845231077186, "loss": 3.3976, "step": 26845 }, { "epoch": 0.46, "learning_rate": 0.00033760290825669305, "loss": 3.4119, "step": 26850 }, { "epoch": 0.46, "learning_rate": 0.0003375212905781215, "loss": 3.4389, "step": 26855 }, { "epoch": 0.46, "learning_rate": 0.00033743967007814123, "loss": 3.4103, "step": 26860 }, { "epoch": 0.46, "learning_rate": 0.0003373580467628895, "loss": 3.3018, "step": 26865 }, { "epoch": 0.47, "learning_rate": 0.0003372764206385042, "loss": 3.3944, "step": 26870 }, { "epoch": 0.47, "learning_rate": 0.00033719479171112317, "loss": 3.3529, "step": 26875 }, { "epoch": 0.47, "learning_rate": 0.00033711315998688436, "loss": 3.3456, "step": 26880 }, { "epoch": 0.47, "learning_rate": 0.0003370315254719261, "loss": 3.3234, "step": 26885 }, { "epoch": 0.47, "learning_rate": 0.00033694988817238694, "loss": 3.2187, "step": 26890 }, { "epoch": 0.47, "learning_rate": 0.0003368682480944056, "loss": 3.4029, "step": 26895 }, { "epoch": 0.47, "learning_rate": 0.00033678660524412093, "loss": 3.3446, "step": 26900 }, { "epoch": 0.47, "learning_rate": 0.00033670495962767193, "loss": 3.3761, "step": 26905 }, { "epoch": 0.47, "learning_rate": 0.00033662331125119824, "loss": 3.2874, "step": 26910 }, { "epoch": 0.47, "learning_rate": 0.00033654166012083916, "loss": 3.2817, "step": 26915 }, { "epoch": 0.47, "learning_rate": 0.00033646000624273453, "loss": 3.4034, "step": 26920 }, { "epoch": 0.47, "learning_rate": 0.00033637834962302435, "loss": 3.4035, "step": 26925 }, { "epoch": 0.47, "learning_rate": 0.0003362966902678486, "loss": 3.3829, "step": 26930 }, { "epoch": 0.47, "learning_rate": 0.00033621502818334786, "loss": 3.3859, "step": 26935 }, { "epoch": 0.47, "learning_rate": 0.0003361333633756626, "loss": 3.2862, "step": 26940 }, { "epoch": 0.47, "learning_rate": 0.0003360516958509335, "loss": 3.3791, "step": 26945 }, { "epoch": 0.47, "learning_rate": 0.0003359700256153017, "loss": 3.3683, "step": 26950 }, { "epoch": 0.47, "learning_rate": 0.00033588835267490827, "loss": 3.3453, "step": 26955 }, { "epoch": 0.47, "learning_rate": 0.0003358066770358947, "loss": 3.3683, "step": 26960 }, { "epoch": 0.47, "learning_rate": 0.0003357249987044025, "loss": 3.3918, "step": 26965 }, { "epoch": 0.47, "learning_rate": 0.0003356433176865735, "loss": 3.3034, "step": 26970 }, { "epoch": 0.47, "learning_rate": 0.00033556163398854966, "loss": 3.3282, "step": 26975 }, { "epoch": 0.47, "learning_rate": 0.0003354799476164732, "loss": 3.4101, "step": 26980 }, { "epoch": 0.47, "learning_rate": 0.0003353982585764865, "loss": 3.3256, "step": 26985 }, { "epoch": 0.47, "learning_rate": 0.0003353165668747322, "loss": 3.3154, "step": 26990 }, { "epoch": 0.47, "learning_rate": 0.00033523487251735303, "loss": 3.2524, "step": 26995 }, { "epoch": 0.47, "learning_rate": 0.00033515317551049213, "loss": 3.3979, "step": 27000 }, { "epoch": 0.47, "eval_loss": 3.3817105293273926, "eval_runtime": 149.7679, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.775, "step": 27000 }, { "epoch": 0.47, "learning_rate": 0.0003350714758602924, "loss": 3.3361, "step": 27005 }, { "epoch": 0.47, "learning_rate": 0.0003349897735728975, "loss": 3.3722, "step": 27010 }, { "epoch": 0.47, "learning_rate": 0.00033490806865445097, "loss": 3.4067, "step": 27015 }, { "epoch": 0.47, "learning_rate": 0.0003348263611110966, "loss": 3.3635, "step": 27020 }, { "epoch": 0.47, "learning_rate": 0.00033474465094897836, "loss": 3.4518, "step": 27025 }, { "epoch": 0.47, "learning_rate": 0.0003346629381742404, "loss": 3.3886, "step": 27030 }, { "epoch": 0.47, "learning_rate": 0.00033458122279302706, "loss": 3.3627, "step": 27035 }, { "epoch": 0.47, "learning_rate": 0.0003344995048114832, "loss": 3.2823, "step": 27040 }, { "epoch": 0.47, "learning_rate": 0.00033441778423575315, "loss": 3.3224, "step": 27045 }, { "epoch": 0.47, "learning_rate": 0.00033433606107198223, "loss": 3.4366, "step": 27050 }, { "epoch": 0.47, "learning_rate": 0.00033425433532631546, "loss": 3.2814, "step": 27055 }, { "epoch": 0.47, "learning_rate": 0.0003341726070048983, "loss": 3.4437, "step": 27060 }, { "epoch": 0.47, "learning_rate": 0.0003340908761138762, "loss": 3.3634, "step": 27065 }, { "epoch": 0.47, "learning_rate": 0.00033400914265939484, "loss": 3.3136, "step": 27070 }, { "epoch": 0.47, "learning_rate": 0.00033392740664760033, "loss": 3.2769, "step": 27075 }, { "epoch": 0.47, "learning_rate": 0.0003338456680846387, "loss": 3.3517, "step": 27080 }, { "epoch": 0.47, "learning_rate": 0.00033376392697665624, "loss": 3.299, "step": 27085 }, { "epoch": 0.47, "learning_rate": 0.00033368218332979955, "loss": 3.3863, "step": 27090 }, { "epoch": 0.47, "learning_rate": 0.0003336004371502152, "loss": 3.3336, "step": 27095 }, { "epoch": 0.47, "learning_rate": 0.0003335186884440503, "loss": 3.2838, "step": 27100 }, { "epoch": 0.47, "learning_rate": 0.00033343693721745186, "loss": 3.3958, "step": 27105 }, { "epoch": 0.47, "learning_rate": 0.0003333551834765669, "loss": 3.2276, "step": 27110 }, { "epoch": 0.47, "learning_rate": 0.00033327342722754325, "loss": 3.3027, "step": 27115 }, { "epoch": 0.47, "learning_rate": 0.0003331916684765284, "loss": 3.3461, "step": 27120 }, { "epoch": 0.47, "learning_rate": 0.00033310990722967, "loss": 3.4142, "step": 27125 }, { "epoch": 0.47, "learning_rate": 0.00033302814349311645, "loss": 3.4698, "step": 27130 }, { "epoch": 0.47, "learning_rate": 0.00033294637727301565, "loss": 3.3017, "step": 27135 }, { "epoch": 0.47, "learning_rate": 0.0003328646085755163, "loss": 3.2897, "step": 27140 }, { "epoch": 0.47, "learning_rate": 0.0003327828374067667, "loss": 3.4139, "step": 27145 }, { "epoch": 0.47, "learning_rate": 0.00033270106377291563, "loss": 3.4206, "step": 27150 }, { "epoch": 0.47, "learning_rate": 0.00033261928768011235, "loss": 3.3275, "step": 27155 }, { "epoch": 0.47, "learning_rate": 0.0003325375091345057, "loss": 3.304, "step": 27160 }, { "epoch": 0.47, "learning_rate": 0.0003324557281422451, "loss": 3.2816, "step": 27165 }, { "epoch": 0.47, "learning_rate": 0.0003323739447094802, "loss": 3.3583, "step": 27170 }, { "epoch": 0.47, "learning_rate": 0.0003322921588423604, "loss": 3.2299, "step": 27175 }, { "epoch": 0.47, "learning_rate": 0.0003322103705470359, "loss": 3.2416, "step": 27180 }, { "epoch": 0.47, "learning_rate": 0.00033212857982965657, "loss": 3.3558, "step": 27185 }, { "epoch": 0.47, "learning_rate": 0.0003320467866963726, "loss": 3.3987, "step": 27190 }, { "epoch": 0.47, "learning_rate": 0.0003319649911533346, "loss": 3.3418, "step": 27195 }, { "epoch": 0.47, "learning_rate": 0.0003318831932066931, "loss": 3.3419, "step": 27200 }, { "epoch": 0.47, "learning_rate": 0.00033180139286259885, "loss": 3.4227, "step": 27205 }, { "epoch": 0.47, "learning_rate": 0.00033171959012720276, "loss": 3.3857, "step": 27210 }, { "epoch": 0.47, "learning_rate": 0.00033163778500665606, "loss": 3.2885, "step": 27215 }, { "epoch": 0.47, "learning_rate": 0.00033155597750711023, "loss": 3.3045, "step": 27220 }, { "epoch": 0.47, "learning_rate": 0.00033147416763471644, "loss": 3.3942, "step": 27225 }, { "epoch": 0.47, "learning_rate": 0.0003313923553956266, "loss": 3.3544, "step": 27230 }, { "epoch": 0.47, "learning_rate": 0.0003313105407959926, "loss": 3.2217, "step": 27235 }, { "epoch": 0.47, "learning_rate": 0.00033122872384196627, "loss": 3.2347, "step": 27240 }, { "epoch": 0.47, "learning_rate": 0.0003311469045397001, "loss": 3.2675, "step": 27245 }, { "epoch": 0.47, "learning_rate": 0.00033106508289534615, "loss": 3.274, "step": 27250 }, { "epoch": 0.47, "learning_rate": 0.0003309832589150573, "loss": 3.3786, "step": 27255 }, { "epoch": 0.47, "learning_rate": 0.00033090143260498616, "loss": 3.3703, "step": 27260 }, { "epoch": 0.47, "learning_rate": 0.0003308196039712856, "loss": 3.2512, "step": 27265 }, { "epoch": 0.47, "learning_rate": 0.00033073777302010875, "loss": 3.3575, "step": 27270 }, { "epoch": 0.47, "learning_rate": 0.0003306559397576089, "loss": 3.2458, "step": 27275 }, { "epoch": 0.47, "learning_rate": 0.0003305741041899396, "loss": 3.3806, "step": 27280 }, { "epoch": 0.47, "learning_rate": 0.0003304922663232543, "loss": 3.3828, "step": 27285 }, { "epoch": 0.47, "learning_rate": 0.00033041042616370676, "loss": 3.3612, "step": 27290 }, { "epoch": 0.47, "learning_rate": 0.0003303285837174511, "loss": 3.2727, "step": 27295 }, { "epoch": 0.47, "learning_rate": 0.00033024673899064143, "loss": 3.2924, "step": 27300 }, { "epoch": 0.47, "learning_rate": 0.00033016489198943184, "loss": 3.3078, "step": 27305 }, { "epoch": 0.47, "learning_rate": 0.0003300830427199771, "loss": 3.2827, "step": 27310 }, { "epoch": 0.47, "learning_rate": 0.00033000119118843164, "loss": 3.328, "step": 27315 }, { "epoch": 0.47, "learning_rate": 0.0003299193374009505, "loss": 3.3192, "step": 27320 }, { "epoch": 0.47, "learning_rate": 0.00032983748136368836, "loss": 3.4029, "step": 27325 }, { "epoch": 0.47, "learning_rate": 0.00032975562308280057, "loss": 3.4059, "step": 27330 }, { "epoch": 0.47, "learning_rate": 0.00032967376256444247, "loss": 3.3607, "step": 27335 }, { "epoch": 0.47, "learning_rate": 0.00032959189981476947, "loss": 3.2867, "step": 27340 }, { "epoch": 0.47, "learning_rate": 0.00032951003483993734, "loss": 3.3518, "step": 27345 }, { "epoch": 0.47, "learning_rate": 0.00032942816764610164, "loss": 3.2867, "step": 27350 }, { "epoch": 0.47, "learning_rate": 0.0003293462982394187, "loss": 3.3899, "step": 27355 }, { "epoch": 0.47, "learning_rate": 0.0003292644266260445, "loss": 3.22, "step": 27360 }, { "epoch": 0.47, "learning_rate": 0.0003291825528121354, "loss": 3.4481, "step": 27365 }, { "epoch": 0.47, "learning_rate": 0.00032910067680384786, "loss": 3.2794, "step": 27370 }, { "epoch": 0.47, "learning_rate": 0.00032901879860733856, "loss": 3.3851, "step": 27375 }, { "epoch": 0.47, "learning_rate": 0.0003289369182287644, "loss": 3.3057, "step": 27380 }, { "epoch": 0.47, "learning_rate": 0.00032885503567428224, "loss": 3.3024, "step": 27385 }, { "epoch": 0.47, "learning_rate": 0.0003287731509500492, "loss": 3.3614, "step": 27390 }, { "epoch": 0.47, "learning_rate": 0.0003286912640622227, "loss": 3.2706, "step": 27395 }, { "epoch": 0.47, "learning_rate": 0.0003286093750169603, "loss": 3.2274, "step": 27400 }, { "epoch": 0.47, "learning_rate": 0.0003285274838204194, "loss": 3.3175, "step": 27405 }, { "epoch": 0.47, "learning_rate": 0.000328445590478758, "loss": 3.4169, "step": 27410 }, { "epoch": 0.47, "learning_rate": 0.00032836369499813383, "loss": 3.3511, "step": 27415 }, { "epoch": 0.47, "learning_rate": 0.00032828179738470534, "loss": 3.4634, "step": 27420 }, { "epoch": 0.47, "learning_rate": 0.00032819989764463055, "loss": 3.3259, "step": 27425 }, { "epoch": 0.47, "learning_rate": 0.00032811799578406794, "loss": 3.3551, "step": 27430 }, { "epoch": 0.47, "learning_rate": 0.0003280360918091762, "loss": 3.2537, "step": 27435 }, { "epoch": 0.47, "learning_rate": 0.00032795418572611405, "loss": 3.2446, "step": 27440 }, { "epoch": 0.47, "learning_rate": 0.00032787227754104033, "loss": 3.289, "step": 27445 }, { "epoch": 0.48, "learning_rate": 0.00032779036726011425, "loss": 3.3049, "step": 27450 }, { "epoch": 0.48, "learning_rate": 0.0003277084548894949, "loss": 3.3346, "step": 27455 }, { "epoch": 0.48, "learning_rate": 0.00032762654043534185, "loss": 3.3805, "step": 27460 }, { "epoch": 0.48, "learning_rate": 0.0003275446239038145, "loss": 3.343, "step": 27465 }, { "epoch": 0.48, "learning_rate": 0.0003274627053010725, "loss": 3.2794, "step": 27470 }, { "epoch": 0.48, "learning_rate": 0.000327380784633276, "loss": 3.34, "step": 27475 }, { "epoch": 0.48, "learning_rate": 0.00032729886190658463, "loss": 3.3971, "step": 27480 }, { "epoch": 0.48, "learning_rate": 0.0003272169371271589, "loss": 3.3285, "step": 27485 }, { "epoch": 0.48, "learning_rate": 0.0003271350103011589, "loss": 3.2486, "step": 27490 }, { "epoch": 0.48, "learning_rate": 0.0003270530814347452, "loss": 3.4702, "step": 27495 }, { "epoch": 0.48, "learning_rate": 0.00032697115053407843, "loss": 3.4144, "step": 27500 }, { "epoch": 0.48, "eval_loss": 3.368710994720459, "eval_runtime": 149.7624, "eval_samples_per_second": 12.293, "eval_steps_per_second": 0.775, "step": 27500 }, { "epoch": 0.48, "learning_rate": 0.00032688921760531936, "loss": 3.3579, "step": 27505 }, { "epoch": 0.48, "learning_rate": 0.00032680728265462885, "loss": 3.2252, "step": 27510 }, { "epoch": 0.48, "learning_rate": 0.0003267253456881681, "loss": 3.3385, "step": 27515 }, { "epoch": 0.48, "learning_rate": 0.00032664340671209846, "loss": 3.4116, "step": 27520 }, { "epoch": 0.48, "learning_rate": 0.00032656146573258114, "loss": 3.3403, "step": 27525 }, { "epoch": 0.48, "learning_rate": 0.0003264795227557776, "loss": 3.1292, "step": 27530 }, { "epoch": 0.48, "learning_rate": 0.00032639757778784965, "loss": 3.3242, "step": 27535 }, { "epoch": 0.48, "learning_rate": 0.0003263156308349593, "loss": 3.259, "step": 27540 }, { "epoch": 0.48, "learning_rate": 0.00032623368190326827, "loss": 3.4236, "step": 27545 }, { "epoch": 0.48, "learning_rate": 0.0003261517309989389, "loss": 3.3361, "step": 27550 }, { "epoch": 0.48, "learning_rate": 0.00032606977812813323, "loss": 3.4113, "step": 27555 }, { "epoch": 0.48, "learning_rate": 0.00032598782329701397, "loss": 3.2926, "step": 27560 }, { "epoch": 0.48, "learning_rate": 0.0003259058665117436, "loss": 3.3761, "step": 27565 }, { "epoch": 0.48, "learning_rate": 0.00032582390777848464, "loss": 3.371, "step": 27570 }, { "epoch": 0.48, "learning_rate": 0.00032574194710340037, "loss": 3.455, "step": 27575 }, { "epoch": 0.48, "learning_rate": 0.00032565998449265353, "loss": 3.3533, "step": 27580 }, { "epoch": 0.48, "learning_rate": 0.0003255780199524073, "loss": 3.4031, "step": 27585 }, { "epoch": 0.48, "learning_rate": 0.0003254960534888252, "loss": 3.3147, "step": 27590 }, { "epoch": 0.48, "learning_rate": 0.0003254140851080704, "loss": 3.2974, "step": 27595 }, { "epoch": 0.48, "learning_rate": 0.0003253321148163067, "loss": 3.3253, "step": 27600 }, { "epoch": 0.48, "learning_rate": 0.00032525014261969786, "loss": 3.4219, "step": 27605 }, { "epoch": 0.48, "learning_rate": 0.00032516816852440756, "loss": 3.3182, "step": 27610 }, { "epoch": 0.48, "learning_rate": 0.0003250861925366001, "loss": 3.4381, "step": 27615 }, { "epoch": 0.48, "learning_rate": 0.00032500421466243946, "loss": 3.2992, "step": 27620 }, { "epoch": 0.48, "learning_rate": 0.00032492223490809, "loss": 3.3346, "step": 27625 }, { "epoch": 0.48, "learning_rate": 0.0003248402532797163, "loss": 3.4011, "step": 27630 }, { "epoch": 0.48, "learning_rate": 0.00032475826978348274, "loss": 3.3957, "step": 27635 }, { "epoch": 0.48, "learning_rate": 0.0003246762844255542, "loss": 3.4597, "step": 27640 }, { "epoch": 0.48, "learning_rate": 0.00032459429721209564, "loss": 3.418, "step": 27645 }, { "epoch": 0.48, "learning_rate": 0.00032451230814927183, "loss": 3.2963, "step": 27650 }, { "epoch": 0.48, "learning_rate": 0.0003244303172432482, "loss": 3.2607, "step": 27655 }, { "epoch": 0.48, "learning_rate": 0.00032434832450018987, "loss": 3.3302, "step": 27660 }, { "epoch": 0.48, "learning_rate": 0.00032426632992626234, "loss": 3.3218, "step": 27665 }, { "epoch": 0.48, "learning_rate": 0.00032418433352763106, "loss": 3.2973, "step": 27670 }, { "epoch": 0.48, "learning_rate": 0.00032410233531046185, "loss": 3.2454, "step": 27675 }, { "epoch": 0.48, "learning_rate": 0.0003240203352809206, "loss": 3.4339, "step": 27680 }, { "epoch": 0.48, "learning_rate": 0.00032393833344517334, "loss": 3.3274, "step": 27685 }, { "epoch": 0.48, "learning_rate": 0.000323856329809386, "loss": 3.2768, "step": 27690 }, { "epoch": 0.48, "learning_rate": 0.0003237743243797249, "loss": 3.2868, "step": 27695 }, { "epoch": 0.48, "learning_rate": 0.0003236923171623564, "loss": 3.3539, "step": 27700 }, { "epoch": 0.48, "learning_rate": 0.0003236103081634472, "loss": 3.2585, "step": 27705 }, { "epoch": 0.48, "learning_rate": 0.00032352829738916365, "loss": 3.279, "step": 27710 }, { "epoch": 0.48, "learning_rate": 0.0003234462848456729, "loss": 3.3725, "step": 27715 }, { "epoch": 0.48, "learning_rate": 0.0003233642705391416, "loss": 3.3193, "step": 27720 }, { "epoch": 0.48, "learning_rate": 0.0003232822544757369, "loss": 3.2873, "step": 27725 }, { "epoch": 0.48, "learning_rate": 0.00032320023666162606, "loss": 3.2741, "step": 27730 }, { "epoch": 0.48, "learning_rate": 0.0003231182171029761, "loss": 3.3445, "step": 27735 }, { "epoch": 0.48, "learning_rate": 0.0003230361958059549, "loss": 3.1675, "step": 27740 }, { "epoch": 0.48, "learning_rate": 0.00032295417277672985, "loss": 3.3555, "step": 27745 }, { "epoch": 0.48, "learning_rate": 0.00032287214802146854, "loss": 3.3363, "step": 27750 }, { "epoch": 0.48, "learning_rate": 0.000322790121546339, "loss": 3.3608, "step": 27755 }, { "epoch": 0.48, "learning_rate": 0.00032270809335750895, "loss": 3.3135, "step": 27760 }, { "epoch": 0.48, "learning_rate": 0.00032262606346114683, "loss": 3.3241, "step": 27765 }, { "epoch": 0.48, "learning_rate": 0.00032254403186342067, "loss": 3.3791, "step": 27770 }, { "epoch": 0.48, "learning_rate": 0.00032246199857049877, "loss": 3.2657, "step": 27775 }, { "epoch": 0.48, "learning_rate": 0.0003223799635885498, "loss": 3.3029, "step": 27780 }, { "epoch": 0.48, "learning_rate": 0.0003222979269237423, "loss": 3.3314, "step": 27785 }, { "epoch": 0.48, "learning_rate": 0.0003222158885822447, "loss": 3.1889, "step": 27790 }, { "epoch": 0.48, "learning_rate": 0.00032213384857022635, "loss": 3.3532, "step": 27795 }, { "epoch": 0.48, "learning_rate": 0.000322051806893856, "loss": 3.2229, "step": 27800 }, { "epoch": 0.48, "learning_rate": 0.0003219697635593029, "loss": 3.2576, "step": 27805 }, { "epoch": 0.48, "learning_rate": 0.000321887718572736, "loss": 3.4196, "step": 27810 }, { "epoch": 0.48, "learning_rate": 0.00032180567194032476, "loss": 3.2642, "step": 27815 }, { "epoch": 0.48, "learning_rate": 0.0003217236236682389, "loss": 3.3044, "step": 27820 }, { "epoch": 0.48, "learning_rate": 0.0003216415737626479, "loss": 3.277, "step": 27825 }, { "epoch": 0.48, "learning_rate": 0.0003215595222297214, "loss": 3.3251, "step": 27830 }, { "epoch": 0.48, "learning_rate": 0.0003214774690756293, "loss": 3.291, "step": 27835 }, { "epoch": 0.48, "learning_rate": 0.0003213954143065416, "loss": 3.3613, "step": 27840 }, { "epoch": 0.48, "learning_rate": 0.00032131335792862846, "loss": 3.3399, "step": 27845 }, { "epoch": 0.48, "learning_rate": 0.00032123129994805993, "loss": 3.2345, "step": 27850 }, { "epoch": 0.48, "learning_rate": 0.0003211492403710065, "loss": 3.2515, "step": 27855 }, { "epoch": 0.48, "learning_rate": 0.00032106717920363855, "loss": 3.369, "step": 27860 }, { "epoch": 0.48, "learning_rate": 0.0003209851164521268, "loss": 3.2912, "step": 27865 }, { "epoch": 0.48, "learning_rate": 0.00032090305212264175, "loss": 3.355, "step": 27870 }, { "epoch": 0.48, "learning_rate": 0.00032082098622135424, "loss": 3.3553, "step": 27875 }, { "epoch": 0.48, "learning_rate": 0.0003207389187544354, "loss": 3.3148, "step": 27880 }, { "epoch": 0.48, "learning_rate": 0.0003206568497280562, "loss": 3.2627, "step": 27885 }, { "epoch": 0.48, "learning_rate": 0.0003205747791483876, "loss": 3.388, "step": 27890 }, { "epoch": 0.48, "learning_rate": 0.0003204927070216011, "loss": 3.2773, "step": 27895 }, { "epoch": 0.48, "learning_rate": 0.0003204106333538681, "loss": 3.363, "step": 27900 }, { "epoch": 0.48, "learning_rate": 0.00032032855815136, "loss": 3.442, "step": 27905 }, { "epoch": 0.48, "learning_rate": 0.00032024648142024866, "loss": 3.2348, "step": 27910 }, { "epoch": 0.48, "learning_rate": 0.00032016440316670554, "loss": 3.2679, "step": 27915 }, { "epoch": 0.48, "learning_rate": 0.0003200823233969027, "loss": 3.2793, "step": 27920 }, { "epoch": 0.48, "learning_rate": 0.00032000024211701207, "loss": 3.2952, "step": 27925 }, { "epoch": 0.48, "learning_rate": 0.00031991815933320574, "loss": 3.1779, "step": 27930 }, { "epoch": 0.48, "learning_rate": 0.00031983607505165585, "loss": 3.2989, "step": 27935 }, { "epoch": 0.48, "learning_rate": 0.0003197539892785348, "loss": 3.3453, "step": 27940 }, { "epoch": 0.48, "learning_rate": 0.0003196719020200151, "loss": 3.2617, "step": 27945 }, { "epoch": 0.48, "learning_rate": 0.000319589813282269, "loss": 3.4275, "step": 27950 }, { "epoch": 0.48, "learning_rate": 0.00031950772307146936, "loss": 3.4124, "step": 27955 }, { "epoch": 0.48, "learning_rate": 0.00031942563139378906, "loss": 3.4406, "step": 27960 }, { "epoch": 0.48, "learning_rate": 0.00031934353825540074, "loss": 3.4792, "step": 27965 }, { "epoch": 0.48, "learning_rate": 0.0003192614436624774, "loss": 3.3811, "step": 27970 }, { "epoch": 0.48, "learning_rate": 0.00031917934762119236, "loss": 3.3476, "step": 27975 }, { "epoch": 0.48, "learning_rate": 0.0003190972501377186, "loss": 3.366, "step": 27980 }, { "epoch": 0.48, "learning_rate": 0.0003190151512182296, "loss": 3.3788, "step": 27985 }, { "epoch": 0.48, "learning_rate": 0.00031893305086889853, "loss": 3.3044, "step": 27990 }, { "epoch": 0.48, "learning_rate": 0.0003188509490958991, "loss": 3.3833, "step": 27995 }, { "epoch": 0.48, "learning_rate": 0.00031876884590540495, "loss": 3.3097, "step": 28000 }, { "epoch": 0.48, "eval_loss": 3.3711462020874023, "eval_runtime": 149.7727, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.775, "step": 28000 }, { "epoch": 0.48, "learning_rate": 0.0003186867413035898, "loss": 3.2273, "step": 28005 }, { "epoch": 0.48, "learning_rate": 0.0003186046352966274, "loss": 3.2312, "step": 28010 }, { "epoch": 0.48, "learning_rate": 0.00031852252789069184, "loss": 3.3193, "step": 28015 }, { "epoch": 0.48, "learning_rate": 0.000318440419091957, "loss": 3.3245, "step": 28020 }, { "epoch": 0.49, "learning_rate": 0.0003183583089065973, "loss": 3.3313, "step": 28025 }, { "epoch": 0.49, "learning_rate": 0.00031827619734078676, "loss": 3.3352, "step": 28030 }, { "epoch": 0.49, "learning_rate": 0.0003181940844006999, "loss": 3.3963, "step": 28035 }, { "epoch": 0.49, "learning_rate": 0.00031811197009251103, "loss": 3.3619, "step": 28040 }, { "epoch": 0.49, "learning_rate": 0.0003180298544223949, "loss": 3.3542, "step": 28045 }, { "epoch": 0.49, "learning_rate": 0.00031794773739652623, "loss": 3.3766, "step": 28050 }, { "epoch": 0.49, "learning_rate": 0.0003178656190210795, "loss": 3.4085, "step": 28055 }, { "epoch": 0.49, "learning_rate": 0.0003177834993022299, "loss": 3.3648, "step": 28060 }, { "epoch": 0.49, "learning_rate": 0.00031770137824615227, "loss": 3.4042, "step": 28065 }, { "epoch": 0.49, "learning_rate": 0.0003176192558590217, "loss": 3.3609, "step": 28070 }, { "epoch": 0.49, "learning_rate": 0.00031753713214701334, "loss": 3.4013, "step": 28075 }, { "epoch": 0.49, "learning_rate": 0.00031745500711630257, "loss": 3.3054, "step": 28080 }, { "epoch": 0.49, "learning_rate": 0.0003173728807730647, "loss": 3.373, "step": 28085 }, { "epoch": 0.49, "learning_rate": 0.0003172907531234752, "loss": 3.3296, "step": 28090 }, { "epoch": 0.49, "learning_rate": 0.00031720862417370976, "loss": 3.3439, "step": 28095 }, { "epoch": 0.49, "learning_rate": 0.0003171264939299439, "loss": 3.3268, "step": 28100 }, { "epoch": 0.49, "learning_rate": 0.00031704436239835356, "loss": 3.3792, "step": 28105 }, { "epoch": 0.49, "learning_rate": 0.00031696222958511443, "loss": 3.3698, "step": 28110 }, { "epoch": 0.49, "learning_rate": 0.00031688009549640263, "loss": 3.3603, "step": 28115 }, { "epoch": 0.49, "learning_rate": 0.0003167979601383941, "loss": 3.2905, "step": 28120 }, { "epoch": 0.49, "learning_rate": 0.00031671582351726514, "loss": 3.1407, "step": 28125 }, { "epoch": 0.49, "learning_rate": 0.00031663368563919183, "loss": 3.3483, "step": 28130 }, { "epoch": 0.49, "learning_rate": 0.0003165515465103506, "loss": 3.3586, "step": 28135 }, { "epoch": 0.49, "learning_rate": 0.000316469406136918, "loss": 3.3597, "step": 28140 }, { "epoch": 0.49, "learning_rate": 0.0003163872645250704, "loss": 3.4144, "step": 28145 }, { "epoch": 0.49, "learning_rate": 0.0003163051216809845, "loss": 3.3252, "step": 28150 }, { "epoch": 0.49, "learning_rate": 0.00031622297761083696, "loss": 3.2158, "step": 28155 }, { "epoch": 0.49, "learning_rate": 0.0003161408323208047, "loss": 3.268, "step": 28160 }, { "epoch": 0.49, "learning_rate": 0.00031605868581706457, "loss": 3.2614, "step": 28165 }, { "epoch": 0.49, "learning_rate": 0.0003159765381057935, "loss": 3.32, "step": 28170 }, { "epoch": 0.49, "learning_rate": 0.00031589438919316874, "loss": 3.4281, "step": 28175 }, { "epoch": 0.49, "learning_rate": 0.0003158122390853673, "loss": 3.4212, "step": 28180 }, { "epoch": 0.49, "learning_rate": 0.0003157300877885665, "loss": 3.254, "step": 28185 }, { "epoch": 0.49, "learning_rate": 0.00031564793530894386, "loss": 3.3829, "step": 28190 }, { "epoch": 0.49, "learning_rate": 0.00031556578165267647, "loss": 3.2446, "step": 28195 }, { "epoch": 0.49, "learning_rate": 0.00031548362682594215, "loss": 3.3505, "step": 28200 }, { "epoch": 0.49, "learning_rate": 0.0003154014708349186, "loss": 3.3956, "step": 28205 }, { "epoch": 0.49, "learning_rate": 0.0003153193136857832, "loss": 3.3643, "step": 28210 }, { "epoch": 0.49, "learning_rate": 0.00031523715538471394, "loss": 3.2072, "step": 28215 }, { "epoch": 0.49, "learning_rate": 0.0003151549959378887, "loss": 3.3384, "step": 28220 }, { "epoch": 0.49, "learning_rate": 0.00031507283535148553, "loss": 3.3184, "step": 28225 }, { "epoch": 0.49, "learning_rate": 0.0003149906736316823, "loss": 3.3089, "step": 28230 }, { "epoch": 0.49, "learning_rate": 0.00031490851078465717, "loss": 3.2242, "step": 28235 }, { "epoch": 0.49, "learning_rate": 0.00031482634681658856, "loss": 3.3506, "step": 28240 }, { "epoch": 0.49, "learning_rate": 0.00031474418173365465, "loss": 3.2773, "step": 28245 }, { "epoch": 0.49, "learning_rate": 0.0003146620155420337, "loss": 3.3073, "step": 28250 }, { "epoch": 0.49, "learning_rate": 0.0003145798482479044, "loss": 3.3887, "step": 28255 }, { "epoch": 0.49, "learning_rate": 0.00031449767985744525, "loss": 3.2862, "step": 28260 }, { "epoch": 0.49, "learning_rate": 0.0003144155103768349, "loss": 3.3459, "step": 28265 }, { "epoch": 0.49, "learning_rate": 0.00031433333981225204, "loss": 3.3802, "step": 28270 }, { "epoch": 0.49, "learning_rate": 0.00031425116816987536, "loss": 3.3579, "step": 28275 }, { "epoch": 0.49, "learning_rate": 0.00031416899545588404, "loss": 3.4191, "step": 28280 }, { "epoch": 0.49, "learning_rate": 0.0003140868216764568, "loss": 3.3477, "step": 28285 }, { "epoch": 0.49, "learning_rate": 0.0003140046468377728, "loss": 3.3956, "step": 28290 }, { "epoch": 0.49, "learning_rate": 0.00031392247094601104, "loss": 3.4055, "step": 28295 }, { "epoch": 0.49, "learning_rate": 0.00031384029400735087, "loss": 3.3708, "step": 28300 }, { "epoch": 0.49, "learning_rate": 0.00031375811602797155, "loss": 3.4004, "step": 28305 }, { "epoch": 0.49, "learning_rate": 0.00031367593701405236, "loss": 3.307, "step": 28310 }, { "epoch": 0.49, "learning_rate": 0.00031359375697177284, "loss": 3.3867, "step": 28315 }, { "epoch": 0.49, "learning_rate": 0.00031351157590731244, "loss": 3.3628, "step": 28320 }, { "epoch": 0.49, "learning_rate": 0.0003134293938268508, "loss": 3.3526, "step": 28325 }, { "epoch": 0.49, "learning_rate": 0.00031334721073656763, "loss": 3.2619, "step": 28330 }, { "epoch": 0.49, "learning_rate": 0.0003132650266426425, "loss": 3.2303, "step": 28335 }, { "epoch": 0.49, "learning_rate": 0.00031318284155125534, "loss": 3.3438, "step": 28340 }, { "epoch": 0.49, "learning_rate": 0.00031310065546858606, "loss": 3.3175, "step": 28345 }, { "epoch": 0.49, "learning_rate": 0.00031301846840081475, "loss": 3.348, "step": 28350 }, { "epoch": 0.49, "learning_rate": 0.0003129362803541213, "loss": 3.4054, "step": 28355 }, { "epoch": 0.49, "learning_rate": 0.00031285409133468574, "loss": 3.2905, "step": 28360 }, { "epoch": 0.49, "learning_rate": 0.00031277190134868855, "loss": 3.3457, "step": 28365 }, { "epoch": 0.49, "learning_rate": 0.00031268971040230973, "loss": 3.2979, "step": 28370 }, { "epoch": 0.49, "learning_rate": 0.00031260751850172974, "loss": 3.3189, "step": 28375 }, { "epoch": 0.49, "learning_rate": 0.00031252532565312894, "loss": 3.2012, "step": 28380 }, { "epoch": 0.49, "learning_rate": 0.0003124431318626879, "loss": 3.2903, "step": 28385 }, { "epoch": 0.49, "learning_rate": 0.00031236093713658705, "loss": 3.394, "step": 28390 }, { "epoch": 0.49, "learning_rate": 0.0003122787414810072, "loss": 3.376, "step": 28395 }, { "epoch": 0.49, "learning_rate": 0.0003121965449021288, "loss": 3.424, "step": 28400 }, { "epoch": 0.49, "learning_rate": 0.0003121143474061328, "loss": 3.2631, "step": 28405 }, { "epoch": 0.49, "learning_rate": 0.0003120321489991999, "loss": 3.3431, "step": 28410 }, { "epoch": 0.49, "learning_rate": 0.0003119499496875111, "loss": 3.3405, "step": 28415 }, { "epoch": 0.49, "learning_rate": 0.0003118677494772473, "loss": 3.288, "step": 28420 }, { "epoch": 0.49, "learning_rate": 0.0003117855483745896, "loss": 3.3591, "step": 28425 }, { "epoch": 0.49, "learning_rate": 0.0003117033463857191, "loss": 3.3612, "step": 28430 }, { "epoch": 0.49, "learning_rate": 0.0003116211435168168, "loss": 3.2559, "step": 28435 }, { "epoch": 0.49, "learning_rate": 0.00031153893977406405, "loss": 3.3729, "step": 28440 }, { "epoch": 0.49, "learning_rate": 0.00031145673516364225, "loss": 3.3824, "step": 28445 }, { "epoch": 0.49, "learning_rate": 0.00031137452969173275, "loss": 3.3824, "step": 28450 }, { "epoch": 0.49, "learning_rate": 0.00031129232336451676, "loss": 3.3525, "step": 28455 }, { "epoch": 0.49, "learning_rate": 0.000311210116188176, "loss": 3.2539, "step": 28460 }, { "epoch": 0.49, "learning_rate": 0.000311127908168892, "loss": 3.2978, "step": 28465 }, { "epoch": 0.49, "learning_rate": 0.0003110456993128464, "loss": 3.2967, "step": 28470 }, { "epoch": 0.49, "learning_rate": 0.00031096348962622063, "loss": 3.352, "step": 28475 }, { "epoch": 0.49, "learning_rate": 0.00031088127911519666, "loss": 3.3765, "step": 28480 }, { "epoch": 0.49, "learning_rate": 0.00031079906778595627, "loss": 3.4548, "step": 28485 }, { "epoch": 0.49, "learning_rate": 0.00031071685564468146, "loss": 3.2177, "step": 28490 }, { "epoch": 0.49, "learning_rate": 0.00031063464269755393, "loss": 3.2067, "step": 28495 }, { "epoch": 0.49, "learning_rate": 0.00031055242895075574, "loss": 3.2593, "step": 28500 }, { "epoch": 0.49, "eval_loss": 3.359267473220825, "eval_runtime": 149.9726, "eval_samples_per_second": 12.276, "eval_steps_per_second": 0.773, "step": 28500 }, { "epoch": 0.49, "learning_rate": 0.00031047021441046906, "loss": 3.3636, "step": 28505 }, { "epoch": 0.49, "learning_rate": 0.0003103879990828759, "loss": 3.3139, "step": 28510 }, { "epoch": 0.49, "learning_rate": 0.00031030578297415836, "loss": 3.379, "step": 28515 }, { "epoch": 0.49, "learning_rate": 0.0003102235660904988, "loss": 3.3304, "step": 28520 }, { "epoch": 0.49, "learning_rate": 0.00031014134843807944, "loss": 3.3501, "step": 28525 }, { "epoch": 0.49, "learning_rate": 0.00031005913002308275, "loss": 3.3744, "step": 28530 }, { "epoch": 0.49, "learning_rate": 0.000309976910851691, "loss": 3.2913, "step": 28535 }, { "epoch": 0.49, "learning_rate": 0.0003098946909300866, "loss": 3.2803, "step": 28540 }, { "epoch": 0.49, "learning_rate": 0.00030981247026445224, "loss": 3.3851, "step": 28545 }, { "epoch": 0.49, "learning_rate": 0.0003097302488609704, "loss": 3.2995, "step": 28550 }, { "epoch": 0.49, "learning_rate": 0.00030964802672582364, "loss": 3.356, "step": 28555 }, { "epoch": 0.49, "learning_rate": 0.00030956580386519477, "loss": 3.2917, "step": 28560 }, { "epoch": 0.49, "learning_rate": 0.00030948358028526644, "loss": 3.3783, "step": 28565 }, { "epoch": 0.49, "learning_rate": 0.0003094013559922215, "loss": 3.2218, "step": 28570 }, { "epoch": 0.49, "learning_rate": 0.0003093191309922428, "loss": 3.163, "step": 28575 }, { "epoch": 0.49, "learning_rate": 0.00030923690529151315, "loss": 3.3771, "step": 28580 }, { "epoch": 0.49, "learning_rate": 0.0003091546788962156, "loss": 3.3725, "step": 28585 }, { "epoch": 0.49, "learning_rate": 0.0003090724518125331, "loss": 3.3931, "step": 28590 }, { "epoch": 0.49, "learning_rate": 0.0003089902240466487, "loss": 3.3111, "step": 28595 }, { "epoch": 0.49, "learning_rate": 0.0003089079956047456, "loss": 3.3732, "step": 28600 }, { "epoch": 0.5, "learning_rate": 0.00030882576649300675, "loss": 3.442, "step": 28605 }, { "epoch": 0.5, "learning_rate": 0.0003087435367176157, "loss": 3.393, "step": 28610 }, { "epoch": 0.5, "learning_rate": 0.0003086613062847553, "loss": 3.3046, "step": 28615 }, { "epoch": 0.5, "learning_rate": 0.0003085790752006091, "loss": 3.3409, "step": 28620 }, { "epoch": 0.5, "learning_rate": 0.0003084968434713605, "loss": 3.3822, "step": 28625 }, { "epoch": 0.5, "learning_rate": 0.0003084146111031927, "loss": 3.2859, "step": 28630 }, { "epoch": 0.5, "learning_rate": 0.0003083323781022894, "loss": 3.3541, "step": 28635 }, { "epoch": 0.5, "learning_rate": 0.00030825014447483383, "loss": 3.286, "step": 28640 }, { "epoch": 0.5, "learning_rate": 0.0003081679102270098, "loss": 3.3848, "step": 28645 }, { "epoch": 0.5, "learning_rate": 0.00030808567536500073, "loss": 3.3427, "step": 28650 }, { "epoch": 0.5, "learning_rate": 0.0003080034398949903, "loss": 3.2654, "step": 28655 }, { "epoch": 0.5, "learning_rate": 0.0003079212038231622, "loss": 3.2767, "step": 28660 }, { "epoch": 0.5, "learning_rate": 0.00030783896715570015, "loss": 3.2923, "step": 28665 }, { "epoch": 0.5, "learning_rate": 0.00030775672989878796, "loss": 3.2185, "step": 28670 }, { "epoch": 0.5, "learning_rate": 0.0003076744920586095, "loss": 3.317, "step": 28675 }, { "epoch": 0.5, "learning_rate": 0.00030759225364134846, "loss": 3.2964, "step": 28680 }, { "epoch": 0.5, "learning_rate": 0.0003075100146531889, "loss": 3.3482, "step": 28685 }, { "epoch": 0.5, "learning_rate": 0.0003074277751003148, "loss": 3.456, "step": 28690 }, { "epoch": 0.5, "learning_rate": 0.00030734553498891, "loss": 3.3257, "step": 28695 }, { "epoch": 0.5, "learning_rate": 0.00030726329432515865, "loss": 3.4125, "step": 28700 }, { "epoch": 0.5, "learning_rate": 0.00030718105311524475, "loss": 3.3209, "step": 28705 }, { "epoch": 0.5, "learning_rate": 0.00030709881136535246, "loss": 3.2977, "step": 28710 }, { "epoch": 0.5, "learning_rate": 0.000307016569081666, "loss": 3.4025, "step": 28715 }, { "epoch": 0.5, "learning_rate": 0.0003069343262703694, "loss": 3.2998, "step": 28720 }, { "epoch": 0.5, "learning_rate": 0.00030685208293764713, "loss": 3.3609, "step": 28725 }, { "epoch": 0.5, "learning_rate": 0.00030676983908968324, "loss": 3.3702, "step": 28730 }, { "epoch": 0.5, "learning_rate": 0.00030668759473266214, "loss": 3.3413, "step": 28735 }, { "epoch": 0.5, "learning_rate": 0.00030660534987276823, "loss": 3.3318, "step": 28740 }, { "epoch": 0.5, "learning_rate": 0.00030652310451618586, "loss": 3.3335, "step": 28745 }, { "epoch": 0.5, "learning_rate": 0.00030644085866909953, "loss": 3.1442, "step": 28750 }, { "epoch": 0.5, "learning_rate": 0.00030635861233769353, "loss": 3.4118, "step": 28755 }, { "epoch": 0.5, "learning_rate": 0.00030627636552815247, "loss": 3.3533, "step": 28760 }, { "epoch": 0.5, "learning_rate": 0.0003061941182466609, "loss": 3.3464, "step": 28765 }, { "epoch": 0.5, "learning_rate": 0.0003061118704994035, "loss": 3.3673, "step": 28770 }, { "epoch": 0.5, "learning_rate": 0.0003060296222925646, "loss": 3.2905, "step": 28775 }, { "epoch": 0.5, "learning_rate": 0.0003059473736323291, "loss": 3.3231, "step": 28780 }, { "epoch": 0.5, "learning_rate": 0.0003058651245248815, "loss": 3.3518, "step": 28785 }, { "epoch": 0.5, "learning_rate": 0.00030578287497640675, "loss": 3.2899, "step": 28790 }, { "epoch": 0.5, "learning_rate": 0.0003057006249930893, "loss": 3.4217, "step": 28795 }, { "epoch": 0.5, "learning_rate": 0.0003056183745811141, "loss": 3.3105, "step": 28800 }, { "epoch": 0.5, "learning_rate": 0.00030553612374666593, "loss": 3.3716, "step": 28805 }, { "epoch": 0.5, "learning_rate": 0.00030545387249592964, "loss": 3.3545, "step": 28810 }, { "epoch": 0.5, "learning_rate": 0.00030537162083509007, "loss": 3.2589, "step": 28815 }, { "epoch": 0.5, "learning_rate": 0.0003052893687703321, "loss": 3.3462, "step": 28820 }, { "epoch": 0.5, "learning_rate": 0.00030520711630784074, "loss": 3.3051, "step": 28825 }, { "epoch": 0.5, "learning_rate": 0.00030512486345380096, "loss": 3.2761, "step": 28830 }, { "epoch": 0.5, "learning_rate": 0.0003050426102143976, "loss": 3.2527, "step": 28835 }, { "epoch": 0.5, "learning_rate": 0.000304960356595816, "loss": 3.2873, "step": 28840 }, { "epoch": 0.5, "learning_rate": 0.00030487810260424077, "loss": 3.3043, "step": 28845 }, { "epoch": 0.5, "learning_rate": 0.0003047958482458574, "loss": 3.2753, "step": 28850 }, { "epoch": 0.5, "learning_rate": 0.0003047135935268508, "loss": 3.4139, "step": 28855 }, { "epoch": 0.5, "learning_rate": 0.000304631338453406, "loss": 3.2713, "step": 28860 }, { "epoch": 0.5, "learning_rate": 0.00030454908303170845, "loss": 3.2992, "step": 28865 }, { "epoch": 0.5, "learning_rate": 0.0003044668272679431, "loss": 3.3822, "step": 28870 }, { "epoch": 0.5, "learning_rate": 0.00030438457116829516, "loss": 3.3267, "step": 28875 }, { "epoch": 0.5, "learning_rate": 0.00030430231473895, "loss": 3.2699, "step": 28880 }, { "epoch": 0.5, "learning_rate": 0.00030422005798609277, "loss": 3.2666, "step": 28885 }, { "epoch": 0.5, "learning_rate": 0.0003041378009159089, "loss": 3.3945, "step": 28890 }, { "epoch": 0.5, "learning_rate": 0.00030405554353458357, "loss": 3.2789, "step": 28895 }, { "epoch": 0.5, "learning_rate": 0.00030397328584830214, "loss": 3.3467, "step": 28900 }, { "epoch": 0.5, "learning_rate": 0.00030389102786325, "loss": 3.3268, "step": 28905 }, { "epoch": 0.5, "learning_rate": 0.0003038087695856125, "loss": 3.3759, "step": 28910 }, { "epoch": 0.5, "learning_rate": 0.000303726511021575, "loss": 3.3726, "step": 28915 }, { "epoch": 0.5, "learning_rate": 0.000303644252177323, "loss": 3.3361, "step": 28920 }, { "epoch": 0.5, "learning_rate": 0.00030356199305904183, "loss": 3.381, "step": 28925 }, { "epoch": 0.5, "learning_rate": 0.0003034797336729173, "loss": 3.35, "step": 28930 }, { "epoch": 0.5, "learning_rate": 0.0003033974740251343, "loss": 3.3854, "step": 28935 }, { "epoch": 0.5, "learning_rate": 0.0003033152141218788, "loss": 3.359, "step": 28940 }, { "epoch": 0.5, "learning_rate": 0.0003032329539693362, "loss": 3.2957, "step": 28945 }, { "epoch": 0.5, "learning_rate": 0.0003031506935736921, "loss": 3.2719, "step": 28950 }, { "epoch": 0.5, "learning_rate": 0.000303068432941132, "loss": 3.2425, "step": 28955 }, { "epoch": 0.5, "learning_rate": 0.00030298617207784133, "loss": 3.3014, "step": 28960 }, { "epoch": 0.5, "learning_rate": 0.00030290391099000585, "loss": 3.2075, "step": 28965 }, { "epoch": 0.5, "learning_rate": 0.00030282164968381125, "loss": 3.3241, "step": 28970 }, { "epoch": 0.5, "learning_rate": 0.00030273938816544293, "loss": 3.2996, "step": 28975 }, { "epoch": 0.5, "learning_rate": 0.0003026571264410867, "loss": 3.3114, "step": 28980 }, { "epoch": 0.5, "learning_rate": 0.0003025748645169282, "loss": 3.2326, "step": 28985 }, { "epoch": 0.5, "learning_rate": 0.0003024926023991531, "loss": 3.3887, "step": 28990 }, { "epoch": 0.5, "learning_rate": 0.00030241034009394706, "loss": 3.4096, "step": 28995 }, { "epoch": 0.5, "learning_rate": 0.0003023280776074957, "loss": 3.2992, "step": 29000 }, { "epoch": 0.5, "eval_loss": 3.3513615131378174, "eval_runtime": 150.0716, "eval_samples_per_second": 12.267, "eval_steps_per_second": 0.773, "step": 29000 }, { "epoch": 0.5, "learning_rate": 0.00030224581494598497, "loss": 3.2705, "step": 29005 }, { "epoch": 0.5, "learning_rate": 0.00030216355211560046, "loss": 3.1582, "step": 29010 }, { "epoch": 0.5, "learning_rate": 0.0003020812891225279, "loss": 3.3338, "step": 29015 }, { "epoch": 0.5, "learning_rate": 0.00030199902597295307, "loss": 3.355, "step": 29020 }, { "epoch": 0.5, "learning_rate": 0.00030191676267306164, "loss": 3.3007, "step": 29025 }, { "epoch": 0.5, "learning_rate": 0.0003018344992290396, "loss": 3.3631, "step": 29030 }, { "epoch": 0.5, "learning_rate": 0.00030175223564707257, "loss": 3.3573, "step": 29035 }, { "epoch": 0.5, "learning_rate": 0.0003016699719333463, "loss": 3.3362, "step": 29040 }, { "epoch": 0.5, "learning_rate": 0.00030158770809404686, "loss": 3.324, "step": 29045 }, { "epoch": 0.5, "learning_rate": 0.00030150544413535987, "loss": 3.3532, "step": 29050 }, { "epoch": 0.5, "learning_rate": 0.0003014231800634711, "loss": 3.3602, "step": 29055 }, { "epoch": 0.5, "learning_rate": 0.00030134091588456663, "loss": 3.3985, "step": 29060 }, { "epoch": 0.5, "learning_rate": 0.000301258651604832, "loss": 3.3109, "step": 29065 }, { "epoch": 0.5, "learning_rate": 0.00030117638723045333, "loss": 3.234, "step": 29070 }, { "epoch": 0.5, "learning_rate": 0.00030109412276761635, "loss": 3.3347, "step": 29075 }, { "epoch": 0.5, "learning_rate": 0.0003010118582225069, "loss": 3.334, "step": 29080 }, { "epoch": 0.5, "learning_rate": 0.000300929593601311, "loss": 3.5063, "step": 29085 }, { "epoch": 0.5, "learning_rate": 0.0003008473289102143, "loss": 3.2729, "step": 29090 }, { "epoch": 0.5, "learning_rate": 0.0003007650641554029, "loss": 3.3154, "step": 29095 }, { "epoch": 0.5, "learning_rate": 0.00030068279934306253, "loss": 3.2814, "step": 29100 }, { "epoch": 0.5, "learning_rate": 0.00030060053447937915, "loss": 3.2991, "step": 29105 }, { "epoch": 0.5, "learning_rate": 0.00030051826957053874, "loss": 3.2739, "step": 29110 }, { "epoch": 0.5, "learning_rate": 0.000300436004622727, "loss": 3.3853, "step": 29115 }, { "epoch": 0.5, "learning_rate": 0.0003003537396421301, "loss": 3.3941, "step": 29120 }, { "epoch": 0.5, "learning_rate": 0.00030027147463493364, "loss": 3.2138, "step": 29125 }, { "epoch": 0.5, "learning_rate": 0.00030018920960732376, "loss": 3.2438, "step": 29130 }, { "epoch": 0.5, "learning_rate": 0.0003001069445654863, "loss": 3.1981, "step": 29135 }, { "epoch": 0.5, "learning_rate": 0.00030002467951560706, "loss": 3.2727, "step": 29140 }, { "epoch": 0.5, "learning_rate": 0.00029994241446387206, "loss": 3.3465, "step": 29145 }, { "epoch": 0.5, "learning_rate": 0.0002998601494164673, "loss": 3.3148, "step": 29150 }, { "epoch": 0.5, "learning_rate": 0.0002997778843795786, "loss": 3.2841, "step": 29155 }, { "epoch": 0.5, "learning_rate": 0.00029969561935939173, "loss": 3.3489, "step": 29160 }, { "epoch": 0.5, "learning_rate": 0.0002996133543620928, "loss": 3.2372, "step": 29165 }, { "epoch": 0.5, "learning_rate": 0.0002995310893938676, "loss": 3.2644, "step": 29170 }, { "epoch": 0.5, "learning_rate": 0.00029944882446090207, "loss": 3.2412, "step": 29175 }, { "epoch": 0.5, "learning_rate": 0.0002993665595693822, "loss": 3.3525, "step": 29180 }, { "epoch": 0.51, "learning_rate": 0.0002992842947254937, "loss": 3.2611, "step": 29185 }, { "epoch": 0.51, "learning_rate": 0.00029920202993542267, "loss": 3.3687, "step": 29190 }, { "epoch": 0.51, "learning_rate": 0.0002991197652053548, "loss": 3.4074, "step": 29195 }, { "epoch": 0.51, "learning_rate": 0.0002990375005414761, "loss": 3.3301, "step": 29200 }, { "epoch": 0.51, "learning_rate": 0.0002989552359499725, "loss": 3.2824, "step": 29205 }, { "epoch": 0.51, "learning_rate": 0.0002988729714370297, "loss": 3.3212, "step": 29210 }, { "epoch": 0.51, "learning_rate": 0.0002987907070088338, "loss": 3.3241, "step": 29215 }, { "epoch": 0.51, "learning_rate": 0.0002987084426715704, "loss": 3.3126, "step": 29220 }, { "epoch": 0.51, "learning_rate": 0.0002986261784314255, "loss": 3.2104, "step": 29225 }, { "epoch": 0.51, "learning_rate": 0.0002985439142945851, "loss": 3.3995, "step": 29230 }, { "epoch": 0.51, "learning_rate": 0.0002984616502672348, "loss": 3.3807, "step": 29235 }, { "epoch": 0.51, "learning_rate": 0.00029837938635556043, "loss": 3.2622, "step": 29240 }, { "epoch": 0.51, "learning_rate": 0.0002982971225657479, "loss": 3.2761, "step": 29245 }, { "epoch": 0.51, "learning_rate": 0.00029821485890398313, "loss": 3.3754, "step": 29250 }, { "epoch": 0.51, "learning_rate": 0.00029813259537645186, "loss": 3.4256, "step": 29255 }, { "epoch": 0.51, "learning_rate": 0.0002980503319893397, "loss": 3.3391, "step": 29260 }, { "epoch": 0.51, "learning_rate": 0.00029796806874883265, "loss": 3.3717, "step": 29265 }, { "epoch": 0.51, "learning_rate": 0.00029788580566111645, "loss": 3.3585, "step": 29270 }, { "epoch": 0.51, "learning_rate": 0.0002978035427323767, "loss": 3.3437, "step": 29275 }, { "epoch": 0.51, "learning_rate": 0.0002977212799687995, "loss": 3.3646, "step": 29280 }, { "epoch": 0.51, "learning_rate": 0.00029763901737657017, "loss": 3.2832, "step": 29285 }, { "epoch": 0.51, "learning_rate": 0.0002975567549618747, "loss": 3.1794, "step": 29290 }, { "epoch": 0.51, "learning_rate": 0.0002974744927308987, "loss": 3.3764, "step": 29295 }, { "epoch": 0.51, "learning_rate": 0.000297392230689828, "loss": 3.3258, "step": 29300 }, { "epoch": 0.51, "learning_rate": 0.000297309968844848, "loss": 3.3501, "step": 29305 }, { "epoch": 0.51, "learning_rate": 0.0002972277072021448, "loss": 3.3172, "step": 29310 }, { "epoch": 0.51, "learning_rate": 0.0002971454457679037, "loss": 3.3571, "step": 29315 }, { "epoch": 0.51, "learning_rate": 0.0002970631845483104, "loss": 3.2708, "step": 29320 }, { "epoch": 0.51, "learning_rate": 0.0002969809235495506, "loss": 3.4065, "step": 29325 }, { "epoch": 0.51, "learning_rate": 0.00029689866277780993, "loss": 3.2756, "step": 29330 }, { "epoch": 0.51, "learning_rate": 0.000296816402239274, "loss": 3.3239, "step": 29335 }, { "epoch": 0.51, "learning_rate": 0.00029673414194012816, "loss": 3.284, "step": 29340 }, { "epoch": 0.51, "learning_rate": 0.00029665188188655807, "loss": 3.3406, "step": 29345 }, { "epoch": 0.51, "learning_rate": 0.0002965696220847494, "loss": 3.338, "step": 29350 }, { "epoch": 0.51, "learning_rate": 0.0002964873625408875, "loss": 3.3738, "step": 29355 }, { "epoch": 0.51, "learning_rate": 0.0002964051032611581, "loss": 3.1847, "step": 29360 }, { "epoch": 0.51, "learning_rate": 0.0002963228442517463, "loss": 3.3021, "step": 29365 }, { "epoch": 0.51, "learning_rate": 0.0002962405855188378, "loss": 3.3367, "step": 29370 }, { "epoch": 0.51, "learning_rate": 0.00029615832706861815, "loss": 3.2833, "step": 29375 }, { "epoch": 0.51, "learning_rate": 0.00029607606890727246, "loss": 3.2644, "step": 29380 }, { "epoch": 0.51, "learning_rate": 0.0002959938110409864, "loss": 3.4083, "step": 29385 }, { "epoch": 0.51, "learning_rate": 0.0002959115534759452, "loss": 3.3432, "step": 29390 }, { "epoch": 0.51, "learning_rate": 0.0002958292962183342, "loss": 3.2759, "step": 29395 }, { "epoch": 0.51, "learning_rate": 0.0002957470392743387, "loss": 3.2871, "step": 29400 }, { "epoch": 0.51, "learning_rate": 0.00029566478265014403, "loss": 3.2679, "step": 29405 }, { "epoch": 0.51, "learning_rate": 0.00029558252635193563, "loss": 3.3223, "step": 29410 }, { "epoch": 0.51, "learning_rate": 0.0002955002703858986, "loss": 3.2751, "step": 29415 }, { "epoch": 0.51, "learning_rate": 0.000295418014758218, "loss": 3.3319, "step": 29420 }, { "epoch": 0.51, "learning_rate": 0.00029533575947507927, "loss": 3.3044, "step": 29425 }, { "epoch": 0.51, "learning_rate": 0.0002952535045426676, "loss": 3.2715, "step": 29430 }, { "epoch": 0.51, "learning_rate": 0.00029517124996716816, "loss": 3.147, "step": 29435 }, { "epoch": 0.51, "learning_rate": 0.0002950889957547658, "loss": 3.3518, "step": 29440 }, { "epoch": 0.51, "learning_rate": 0.0002950067419116457, "loss": 3.2539, "step": 29445 }, { "epoch": 0.51, "learning_rate": 0.00029492448844399327, "loss": 3.26, "step": 29450 }, { "epoch": 0.51, "learning_rate": 0.0002948422353579931, "loss": 3.3907, "step": 29455 }, { "epoch": 0.51, "learning_rate": 0.0002947599826598306, "loss": 3.299, "step": 29460 }, { "epoch": 0.51, "learning_rate": 0.0002946777303556904, "loss": 3.3628, "step": 29465 }, { "epoch": 0.51, "learning_rate": 0.0002945954784517575, "loss": 3.2462, "step": 29470 }, { "epoch": 0.51, "learning_rate": 0.00029451322695421713, "loss": 3.3751, "step": 29475 }, { "epoch": 0.51, "learning_rate": 0.0002944309758692538, "loss": 3.2978, "step": 29480 }, { "epoch": 0.51, "learning_rate": 0.00029434872520305277, "loss": 3.3679, "step": 29485 }, { "epoch": 0.51, "learning_rate": 0.00029426647496179847, "loss": 3.2476, "step": 29490 }, { "epoch": 0.51, "learning_rate": 0.00029418422515167596, "loss": 3.3317, "step": 29495 }, { "epoch": 0.51, "learning_rate": 0.0002941019757788698, "loss": 3.3763, "step": 29500 }, { "epoch": 0.51, "eval_loss": 3.340322494506836, "eval_runtime": 149.8765, "eval_samples_per_second": 12.283, "eval_steps_per_second": 0.774, "step": 29500 }, { "epoch": 0.51, "learning_rate": 0.0002940197268495649, "loss": 3.2741, "step": 29505 }, { "epoch": 0.51, "learning_rate": 0.00029393747836994595, "loss": 3.2172, "step": 29510 }, { "epoch": 0.51, "learning_rate": 0.0002938552303461975, "loss": 3.282, "step": 29515 }, { "epoch": 0.51, "learning_rate": 0.0002937729827845042, "loss": 3.3107, "step": 29520 }, { "epoch": 0.51, "learning_rate": 0.00029369073569105066, "loss": 3.1937, "step": 29525 }, { "epoch": 0.51, "learning_rate": 0.00029360848907202154, "loss": 3.3407, "step": 29530 }, { "epoch": 0.51, "learning_rate": 0.0002935262429336013, "loss": 3.3098, "step": 29535 }, { "epoch": 0.51, "learning_rate": 0.0002934439972819744, "loss": 3.4239, "step": 29540 }, { "epoch": 0.51, "learning_rate": 0.0002933617521233252, "loss": 3.2941, "step": 29545 }, { "epoch": 0.51, "learning_rate": 0.0002932795074638383, "loss": 3.1586, "step": 29550 }, { "epoch": 0.51, "learning_rate": 0.000293197263309698, "loss": 3.3078, "step": 29555 }, { "epoch": 0.51, "learning_rate": 0.0002931150196670886, "loss": 3.303, "step": 29560 }, { "epoch": 0.51, "learning_rate": 0.00029303277654219457, "loss": 3.1798, "step": 29565 }, { "epoch": 0.51, "learning_rate": 0.0002929505339411999, "loss": 3.3173, "step": 29570 }, { "epoch": 0.51, "learning_rate": 0.000292868291870289, "loss": 3.4086, "step": 29575 }, { "epoch": 0.51, "learning_rate": 0.000292786050335646, "loss": 3.4117, "step": 29580 }, { "epoch": 0.51, "learning_rate": 0.00029270380934345495, "loss": 3.1563, "step": 29585 }, { "epoch": 0.51, "learning_rate": 0.00029262156889990024, "loss": 3.3599, "step": 29590 }, { "epoch": 0.51, "learning_rate": 0.0002925393290111656, "loss": 3.3668, "step": 29595 }, { "epoch": 0.51, "learning_rate": 0.00029245708968343514, "loss": 3.3687, "step": 29600 }, { "epoch": 0.51, "learning_rate": 0.00029237485092289294, "loss": 3.2999, "step": 29605 }, { "epoch": 0.51, "learning_rate": 0.0002922926127357229, "loss": 3.2961, "step": 29610 }, { "epoch": 0.51, "learning_rate": 0.00029221037512810886, "loss": 3.2225, "step": 29615 }, { "epoch": 0.51, "learning_rate": 0.00029212813810623473, "loss": 3.2368, "step": 29620 }, { "epoch": 0.51, "learning_rate": 0.0002920459016762842, "loss": 3.2402, "step": 29625 }, { "epoch": 0.51, "learning_rate": 0.0002919636658444411, "loss": 3.3306, "step": 29630 }, { "epoch": 0.51, "learning_rate": 0.00029188143061688917, "loss": 3.2542, "step": 29635 }, { "epoch": 0.51, "learning_rate": 0.00029179919599981217, "loss": 3.3902, "step": 29640 }, { "epoch": 0.51, "learning_rate": 0.0002917169619993934, "loss": 3.1883, "step": 29645 }, { "epoch": 0.51, "learning_rate": 0.0002916347286218167, "loss": 3.3464, "step": 29650 }, { "epoch": 0.51, "learning_rate": 0.0002915524958732656, "loss": 3.254, "step": 29655 }, { "epoch": 0.51, "learning_rate": 0.0002914702637599234, "loss": 3.3093, "step": 29660 }, { "epoch": 0.51, "learning_rate": 0.00029138803228797384, "loss": 3.3814, "step": 29665 }, { "epoch": 0.51, "learning_rate": 0.00029130580146359997, "loss": 3.3697, "step": 29670 }, { "epoch": 0.51, "learning_rate": 0.00029122357129298537, "loss": 3.3207, "step": 29675 }, { "epoch": 0.51, "learning_rate": 0.0002911413417823131, "loss": 3.3784, "step": 29680 }, { "epoch": 0.51, "learning_rate": 0.00029105911293776654, "loss": 3.326, "step": 29685 }, { "epoch": 0.51, "learning_rate": 0.00029097688476552904, "loss": 3.4872, "step": 29690 }, { "epoch": 0.51, "learning_rate": 0.0002908946572717834, "loss": 3.2418, "step": 29695 }, { "epoch": 0.51, "learning_rate": 0.0002908124304627129, "loss": 3.383, "step": 29700 }, { "epoch": 0.51, "learning_rate": 0.0002907302043445005, "loss": 3.4032, "step": 29705 }, { "epoch": 0.51, "learning_rate": 0.0002906479789233293, "loss": 3.2928, "step": 29710 }, { "epoch": 0.51, "learning_rate": 0.0002905657542053822, "loss": 3.2659, "step": 29715 }, { "epoch": 0.51, "learning_rate": 0.000290483530196842, "loss": 3.3656, "step": 29720 }, { "epoch": 0.51, "learning_rate": 0.0002904013069038915, "loss": 3.3738, "step": 29725 }, { "epoch": 0.51, "learning_rate": 0.00029031908433271354, "loss": 3.3268, "step": 29730 }, { "epoch": 0.51, "learning_rate": 0.00029023686248949083, "loss": 3.2683, "step": 29735 }, { "epoch": 0.51, "learning_rate": 0.00029015464138040613, "loss": 3.447, "step": 29740 }, { "epoch": 0.51, "learning_rate": 0.00029007242101164183, "loss": 3.2916, "step": 29745 }, { "epoch": 0.51, "learning_rate": 0.0002899902013893806, "loss": 3.2699, "step": 29750 }, { "epoch": 0.51, "learning_rate": 0.00028990798251980494, "loss": 3.319, "step": 29755 }, { "epoch": 0.52, "learning_rate": 0.00028982576440909723, "loss": 3.2191, "step": 29760 }, { "epoch": 0.52, "learning_rate": 0.00028974354706344003, "loss": 3.3557, "step": 29765 }, { "epoch": 0.52, "learning_rate": 0.0002896613304890154, "loss": 3.3007, "step": 29770 }, { "epoch": 0.52, "learning_rate": 0.00028957911469200577, "loss": 3.3596, "step": 29775 }, { "epoch": 0.52, "learning_rate": 0.00028949689967859327, "loss": 3.3322, "step": 29780 }, { "epoch": 0.52, "learning_rate": 0.0002894146854549601, "loss": 3.4363, "step": 29785 }, { "epoch": 0.52, "learning_rate": 0.0002893324720272884, "loss": 3.3202, "step": 29790 }, { "epoch": 0.52, "learning_rate": 0.0002892502594017601, "loss": 3.3451, "step": 29795 }, { "epoch": 0.52, "learning_rate": 0.0002891680475845572, "loss": 3.3622, "step": 29800 }, { "epoch": 0.52, "learning_rate": 0.0002890858365818616, "loss": 3.2766, "step": 29805 }, { "epoch": 0.52, "learning_rate": 0.0002890036263998551, "loss": 3.348, "step": 29810 }, { "epoch": 0.52, "learning_rate": 0.0002889214170447196, "loss": 3.3661, "step": 29815 }, { "epoch": 0.52, "learning_rate": 0.00028883920852263686, "loss": 3.275, "step": 29820 }, { "epoch": 0.52, "learning_rate": 0.0002887570008397883, "loss": 3.3184, "step": 29825 }, { "epoch": 0.52, "learning_rate": 0.00028867479400235567, "loss": 3.154, "step": 29830 }, { "epoch": 0.52, "learning_rate": 0.0002885925880165205, "loss": 3.2861, "step": 29835 }, { "epoch": 0.52, "learning_rate": 0.0002885103828884642, "loss": 3.2612, "step": 29840 }, { "epoch": 0.52, "learning_rate": 0.0002884281786243684, "loss": 3.3055, "step": 29845 }, { "epoch": 0.52, "learning_rate": 0.00028834597523041413, "loss": 3.3218, "step": 29850 }, { "epoch": 0.52, "learning_rate": 0.00028826377271278284, "loss": 3.3651, "step": 29855 }, { "epoch": 0.52, "learning_rate": 0.00028818157107765563, "loss": 3.3722, "step": 29860 }, { "epoch": 0.52, "learning_rate": 0.0002880993703312137, "loss": 3.3252, "step": 29865 }, { "epoch": 0.52, "learning_rate": 0.0002880171704796383, "loss": 3.2013, "step": 29870 }, { "epoch": 0.52, "learning_rate": 0.0002879349715291101, "loss": 3.3523, "step": 29875 }, { "epoch": 0.52, "learning_rate": 0.00028785277348581024, "loss": 3.2081, "step": 29880 }, { "epoch": 0.52, "learning_rate": 0.0002877705763559196, "loss": 3.3849, "step": 29885 }, { "epoch": 0.52, "learning_rate": 0.00028768838014561884, "loss": 3.3127, "step": 29890 }, { "epoch": 0.52, "learning_rate": 0.000287606184861089, "loss": 3.4062, "step": 29895 }, { "epoch": 0.52, "learning_rate": 0.0002875239905085104, "loss": 3.2684, "step": 29900 }, { "epoch": 0.52, "learning_rate": 0.0002874417970940638, "loss": 3.4488, "step": 29905 }, { "epoch": 0.52, "learning_rate": 0.00028735960462392964, "loss": 3.3177, "step": 29910 }, { "epoch": 0.52, "learning_rate": 0.0002872774131042885, "loss": 3.2003, "step": 29915 }, { "epoch": 0.52, "learning_rate": 0.0002871952225413208, "loss": 3.2894, "step": 29920 }, { "epoch": 0.52, "learning_rate": 0.00028711303294120654, "loss": 3.2948, "step": 29925 }, { "epoch": 0.52, "learning_rate": 0.0002870308443101262, "loss": 3.2572, "step": 29930 }, { "epoch": 0.52, "learning_rate": 0.00028694865665425997, "loss": 3.309, "step": 29935 }, { "epoch": 0.52, "learning_rate": 0.00028686646997978777, "loss": 3.3262, "step": 29940 }, { "epoch": 0.52, "learning_rate": 0.0002867842842928899, "loss": 3.3213, "step": 29945 }, { "epoch": 0.52, "learning_rate": 0.000286702099599746, "loss": 3.1783, "step": 29950 }, { "epoch": 0.52, "learning_rate": 0.000286619915906536, "loss": 3.3147, "step": 29955 }, { "epoch": 0.52, "learning_rate": 0.0002865377332194399, "loss": 3.2999, "step": 29960 }, { "epoch": 0.52, "learning_rate": 0.0002864555515446371, "loss": 3.1541, "step": 29965 }, { "epoch": 0.52, "learning_rate": 0.0002863733708883076, "loss": 3.2152, "step": 29970 }, { "epoch": 0.52, "learning_rate": 0.00028629119125663064, "loss": 3.2402, "step": 29975 }, { "epoch": 0.52, "learning_rate": 0.0002862090126557859, "loss": 3.4334, "step": 29980 }, { "epoch": 0.52, "learning_rate": 0.00028612683509195267, "loss": 3.2809, "step": 29985 }, { "epoch": 0.52, "learning_rate": 0.00028604465857131033, "loss": 3.4323, "step": 29990 }, { "epoch": 0.52, "learning_rate": 0.0002859624831000383, "loss": 3.3852, "step": 29995 }, { "epoch": 0.52, "learning_rate": 0.0002858803086843155, "loss": 3.3623, "step": 30000 }, { "epoch": 0.52, "eval_loss": 3.3538341522216797, "eval_runtime": 149.973, "eval_samples_per_second": 12.276, "eval_steps_per_second": 0.773, "step": 30000 }, { "epoch": 0.52, "learning_rate": 0.0002857981353303211, "loss": 3.3106, "step": 30005 }, { "epoch": 0.52, "learning_rate": 0.0002857159630442341, "loss": 3.4119, "step": 30010 }, { "epoch": 0.52, "learning_rate": 0.00028563379183223355, "loss": 3.3264, "step": 30015 }, { "epoch": 0.52, "learning_rate": 0.0002855516217004982, "loss": 3.2663, "step": 30020 }, { "epoch": 0.52, "learning_rate": 0.00028546945265520694, "loss": 3.349, "step": 30025 }, { "epoch": 0.52, "learning_rate": 0.00028538728470253825, "loss": 3.3401, "step": 30030 }, { "epoch": 0.52, "learning_rate": 0.0002853051178486709, "loss": 3.4039, "step": 30035 }, { "epoch": 0.52, "learning_rate": 0.00028522295209978336, "loss": 3.3008, "step": 30040 }, { "epoch": 0.52, "learning_rate": 0.0002851407874620541, "loss": 3.356, "step": 30045 }, { "epoch": 0.52, "learning_rate": 0.00028505862394166156, "loss": 3.1896, "step": 30050 }, { "epoch": 0.52, "learning_rate": 0.00028497646154478383, "loss": 3.3458, "step": 30055 }, { "epoch": 0.52, "learning_rate": 0.0002848943002775992, "loss": 3.3157, "step": 30060 }, { "epoch": 0.52, "learning_rate": 0.0002848121401462858, "loss": 3.3277, "step": 30065 }, { "epoch": 0.52, "learning_rate": 0.00028472998115702157, "loss": 3.4176, "step": 30070 }, { "epoch": 0.52, "learning_rate": 0.00028464782331598463, "loss": 3.3079, "step": 30075 }, { "epoch": 0.52, "learning_rate": 0.00028456566662935265, "loss": 3.2774, "step": 30080 }, { "epoch": 0.52, "learning_rate": 0.0002844835111033034, "loss": 3.3172, "step": 30085 }, { "epoch": 0.52, "learning_rate": 0.00028440135674401453, "loss": 3.1986, "step": 30090 }, { "epoch": 0.52, "learning_rate": 0.0002843192035576638, "loss": 3.3215, "step": 30095 }, { "epoch": 0.52, "learning_rate": 0.0002842370515504286, "loss": 3.3483, "step": 30100 }, { "epoch": 0.52, "learning_rate": 0.00028415490072848627, "loss": 3.3005, "step": 30105 }, { "epoch": 0.52, "learning_rate": 0.0002840727510980142, "loss": 3.2421, "step": 30110 }, { "epoch": 0.52, "learning_rate": 0.0002839906026651897, "loss": 3.2194, "step": 30115 }, { "epoch": 0.52, "learning_rate": 0.0002839084554361897, "loss": 3.3219, "step": 30120 }, { "epoch": 0.52, "learning_rate": 0.0002838263094171916, "loss": 3.2303, "step": 30125 }, { "epoch": 0.52, "learning_rate": 0.00028374416461437194, "loss": 3.226, "step": 30130 }, { "epoch": 0.52, "learning_rate": 0.00028366202103390786, "loss": 3.2418, "step": 30135 }, { "epoch": 0.52, "learning_rate": 0.0002835798786819761, "loss": 3.1874, "step": 30140 }, { "epoch": 0.52, "learning_rate": 0.0002834977375647532, "loss": 3.3188, "step": 30145 }, { "epoch": 0.52, "learning_rate": 0.0002834155976884161, "loss": 3.1829, "step": 30150 }, { "epoch": 0.52, "learning_rate": 0.0002833334590591408, "loss": 3.3849, "step": 30155 }, { "epoch": 0.52, "learning_rate": 0.00028325132168310413, "loss": 3.3192, "step": 30160 }, { "epoch": 0.52, "learning_rate": 0.0002831691855664821, "loss": 3.3352, "step": 30165 }, { "epoch": 0.52, "learning_rate": 0.0002830870507154511, "loss": 3.3424, "step": 30170 }, { "epoch": 0.52, "learning_rate": 0.0002830049171361873, "loss": 3.23, "step": 30175 }, { "epoch": 0.52, "learning_rate": 0.00028292278483486665, "loss": 3.257, "step": 30180 }, { "epoch": 0.52, "learning_rate": 0.0002828406538176649, "loss": 3.3752, "step": 30185 }, { "epoch": 0.52, "learning_rate": 0.0002827585240907581, "loss": 3.2752, "step": 30190 }, { "epoch": 0.52, "learning_rate": 0.000282676395660322, "loss": 3.2853, "step": 30195 }, { "epoch": 0.52, "learning_rate": 0.00028259426853253215, "loss": 3.3301, "step": 30200 }, { "epoch": 0.52, "learning_rate": 0.00028251214271356406, "loss": 3.258, "step": 30205 }, { "epoch": 0.52, "learning_rate": 0.0002824300182095932, "loss": 3.2661, "step": 30210 }, { "epoch": 0.52, "learning_rate": 0.00028234789502679493, "loss": 3.3113, "step": 30215 }, { "epoch": 0.52, "learning_rate": 0.0002822657731713445, "loss": 3.327, "step": 30220 }, { "epoch": 0.52, "learning_rate": 0.0002821836526494171, "loss": 3.2908, "step": 30225 }, { "epoch": 0.52, "learning_rate": 0.0002821015334671877, "loss": 3.3357, "step": 30230 }, { "epoch": 0.52, "learning_rate": 0.00028201941563083113, "loss": 3.394, "step": 30235 }, { "epoch": 0.52, "learning_rate": 0.0002819372991465225, "loss": 3.3686, "step": 30240 }, { "epoch": 0.52, "learning_rate": 0.0002818551840204363, "loss": 3.4033, "step": 30245 }, { "epoch": 0.52, "learning_rate": 0.00028177307025874725, "loss": 3.3786, "step": 30250 }, { "epoch": 0.52, "learning_rate": 0.00028169095786763007, "loss": 3.2397, "step": 30255 }, { "epoch": 0.52, "learning_rate": 0.000281608846853259, "loss": 3.3121, "step": 30260 }, { "epoch": 0.52, "learning_rate": 0.0002815267372218083, "loss": 3.2628, "step": 30265 }, { "epoch": 0.52, "learning_rate": 0.00028144462897945223, "loss": 3.3744, "step": 30270 }, { "epoch": 0.52, "learning_rate": 0.0002813625221323651, "loss": 3.2426, "step": 30275 }, { "epoch": 0.52, "learning_rate": 0.0002812804166867208, "loss": 3.4355, "step": 30280 }, { "epoch": 0.52, "learning_rate": 0.0002811983126486931, "loss": 3.245, "step": 30285 }, { "epoch": 0.52, "learning_rate": 0.000281116210024456, "loss": 3.3295, "step": 30290 }, { "epoch": 0.52, "learning_rate": 0.0002810341088201831, "loss": 3.3174, "step": 30295 }, { "epoch": 0.52, "learning_rate": 0.00028095200904204804, "loss": 3.1724, "step": 30300 }, { "epoch": 0.52, "learning_rate": 0.0002808699106962244, "loss": 3.2641, "step": 30305 }, { "epoch": 0.52, "learning_rate": 0.00028078781378888524, "loss": 3.2328, "step": 30310 }, { "epoch": 0.52, "learning_rate": 0.00028070571832620404, "loss": 3.2577, "step": 30315 }, { "epoch": 0.52, "learning_rate": 0.00028062362431435404, "loss": 3.3785, "step": 30320 }, { "epoch": 0.52, "learning_rate": 0.0002805415317595081, "loss": 3.2063, "step": 30325 }, { "epoch": 0.52, "learning_rate": 0.0002804594406678394, "loss": 3.3733, "step": 30330 }, { "epoch": 0.52, "learning_rate": 0.0002803773510455205, "loss": 3.3888, "step": 30335 }, { "epoch": 0.53, "learning_rate": 0.0002802952628987243, "loss": 3.2477, "step": 30340 }, { "epoch": 0.53, "learning_rate": 0.00028021317623362326, "loss": 3.3568, "step": 30345 }, { "epoch": 0.53, "learning_rate": 0.00028013109105638995, "loss": 3.284, "step": 30350 }, { "epoch": 0.53, "learning_rate": 0.0002800490073731969, "loss": 3.3265, "step": 30355 }, { "epoch": 0.53, "learning_rate": 0.0002799669251902162, "loss": 3.242, "step": 30360 }, { "epoch": 0.53, "learning_rate": 0.00027988484451361994, "loss": 3.391, "step": 30365 }, { "epoch": 0.53, "learning_rate": 0.00027980276534958037, "loss": 3.3313, "step": 30370 }, { "epoch": 0.53, "learning_rate": 0.00027972068770426936, "loss": 3.2538, "step": 30375 }, { "epoch": 0.53, "learning_rate": 0.0002796386115838587, "loss": 3.3219, "step": 30380 }, { "epoch": 0.53, "learning_rate": 0.00027955653699452006, "loss": 3.3282, "step": 30385 }, { "epoch": 0.53, "learning_rate": 0.0002794744639424251, "loss": 3.3222, "step": 30390 }, { "epoch": 0.53, "learning_rate": 0.00027939239243374517, "loss": 3.3605, "step": 30395 }, { "epoch": 0.53, "learning_rate": 0.0002793103224746518, "loss": 3.296, "step": 30400 }, { "epoch": 0.53, "learning_rate": 0.00027922825407131625, "loss": 3.3659, "step": 30405 }, { "epoch": 0.53, "learning_rate": 0.0002791461872299094, "loss": 3.4053, "step": 30410 }, { "epoch": 0.53, "learning_rate": 0.00027906412195660235, "loss": 3.3414, "step": 30415 }, { "epoch": 0.53, "learning_rate": 0.00027898205825756614, "loss": 3.2646, "step": 30420 }, { "epoch": 0.53, "learning_rate": 0.00027889999613897137, "loss": 3.2449, "step": 30425 }, { "epoch": 0.53, "learning_rate": 0.00027881793560698885, "loss": 3.3722, "step": 30430 }, { "epoch": 0.53, "learning_rate": 0.00027873587666778893, "loss": 3.2959, "step": 30435 }, { "epoch": 0.53, "learning_rate": 0.00027865381932754214, "loss": 3.2453, "step": 30440 }, { "epoch": 0.53, "learning_rate": 0.0002785717635924187, "loss": 3.305, "step": 30445 }, { "epoch": 0.53, "learning_rate": 0.00027848970946858884, "loss": 3.337, "step": 30450 }, { "epoch": 0.53, "learning_rate": 0.0002784076569622227, "loss": 3.2714, "step": 30455 }, { "epoch": 0.53, "learning_rate": 0.00027832560607948993, "loss": 3.3498, "step": 30460 }, { "epoch": 0.53, "learning_rate": 0.00027824355682656057, "loss": 3.2591, "step": 30465 }, { "epoch": 0.53, "learning_rate": 0.0002781615092096042, "loss": 3.3538, "step": 30470 }, { "epoch": 0.53, "learning_rate": 0.0002780794632347904, "loss": 3.3029, "step": 30475 }, { "epoch": 0.53, "learning_rate": 0.00027799741890828874, "loss": 3.4062, "step": 30480 }, { "epoch": 0.53, "learning_rate": 0.00027791537623626843, "loss": 3.2511, "step": 30485 }, { "epoch": 0.53, "learning_rate": 0.0002778333352248985, "loss": 3.2801, "step": 30490 }, { "epoch": 0.53, "learning_rate": 0.00027775129588034823, "loss": 3.3308, "step": 30495 }, { "epoch": 0.53, "learning_rate": 0.00027766925820878647, "loss": 3.3449, "step": 30500 }, { "epoch": 0.53, "eval_loss": 3.326901435852051, "eval_runtime": 149.9761, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 30500 }, { "epoch": 0.53, "learning_rate": 0.00027758722221638205, "loss": 3.2604, "step": 30505 }, { "epoch": 0.53, "learning_rate": 0.0002775051879093038, "loss": 3.3171, "step": 30510 }, { "epoch": 0.53, "learning_rate": 0.00027742315529371994, "loss": 3.3948, "step": 30515 }, { "epoch": 0.53, "learning_rate": 0.00027734112437579917, "loss": 3.3968, "step": 30520 }, { "epoch": 0.53, "learning_rate": 0.00027725909516170964, "loss": 3.2516, "step": 30525 }, { "epoch": 0.53, "learning_rate": 0.00027717706765761967, "loss": 3.323, "step": 30530 }, { "epoch": 0.53, "learning_rate": 0.00027709504186969737, "loss": 3.3059, "step": 30535 }, { "epoch": 0.53, "learning_rate": 0.00027701301780411045, "loss": 3.2289, "step": 30540 }, { "epoch": 0.53, "learning_rate": 0.0002769309954670267, "loss": 3.3094, "step": 30545 }, { "epoch": 0.53, "learning_rate": 0.0002768489748646139, "loss": 3.3621, "step": 30550 }, { "epoch": 0.53, "learning_rate": 0.00027676695600303955, "loss": 3.409, "step": 30555 }, { "epoch": 0.53, "learning_rate": 0.0002766849388884711, "loss": 3.3604, "step": 30560 }, { "epoch": 0.53, "learning_rate": 0.00027660292352707576, "loss": 3.2526, "step": 30565 }, { "epoch": 0.53, "learning_rate": 0.0002765209099250206, "loss": 3.2604, "step": 30570 }, { "epoch": 0.53, "learning_rate": 0.0002764388980884726, "loss": 3.269, "step": 30575 }, { "epoch": 0.53, "learning_rate": 0.0002763568880235989, "loss": 3.3991, "step": 30580 }, { "epoch": 0.53, "learning_rate": 0.00027627487973656606, "loss": 3.368, "step": 30585 }, { "epoch": 0.53, "learning_rate": 0.0002761928732335405, "loss": 3.2534, "step": 30590 }, { "epoch": 0.53, "learning_rate": 0.0002761108685206889, "loss": 3.2685, "step": 30595 }, { "epoch": 0.53, "learning_rate": 0.00027602886560417763, "loss": 3.3091, "step": 30600 }, { "epoch": 0.53, "learning_rate": 0.00027594686449017276, "loss": 3.3925, "step": 30605 }, { "epoch": 0.53, "learning_rate": 0.00027586486518484056, "loss": 3.3245, "step": 30610 }, { "epoch": 0.53, "learning_rate": 0.00027578286769434663, "loss": 3.3573, "step": 30615 }, { "epoch": 0.53, "learning_rate": 0.00027570087202485703, "loss": 3.3753, "step": 30620 }, { "epoch": 0.53, "learning_rate": 0.00027561887818253726, "loss": 3.287, "step": 30625 }, { "epoch": 0.53, "learning_rate": 0.0002755368861735529, "loss": 3.3379, "step": 30630 }, { "epoch": 0.53, "learning_rate": 0.0002754548960040694, "loss": 3.2588, "step": 30635 }, { "epoch": 0.53, "learning_rate": 0.0002753729076802519, "loss": 3.2747, "step": 30640 }, { "epoch": 0.53, "learning_rate": 0.0002752909212082655, "loss": 3.2554, "step": 30645 }, { "epoch": 0.53, "learning_rate": 0.00027520893659427516, "loss": 3.3417, "step": 30650 }, { "epoch": 0.53, "learning_rate": 0.00027512695384444575, "loss": 3.2509, "step": 30655 }, { "epoch": 0.53, "learning_rate": 0.00027504497296494207, "loss": 3.2155, "step": 30660 }, { "epoch": 0.53, "learning_rate": 0.00027496299396192846, "loss": 3.3851, "step": 30665 }, { "epoch": 0.53, "learning_rate": 0.0002748810168415693, "loss": 3.2289, "step": 30670 }, { "epoch": 0.53, "learning_rate": 0.000274799041610029, "loss": 3.3077, "step": 30675 }, { "epoch": 0.53, "learning_rate": 0.0002747170682734716, "loss": 3.2613, "step": 30680 }, { "epoch": 0.53, "learning_rate": 0.0002746350968380613, "loss": 3.3562, "step": 30685 }, { "epoch": 0.53, "learning_rate": 0.0002745531273099615, "loss": 3.2328, "step": 30690 }, { "epoch": 0.53, "learning_rate": 0.0002744711596953361, "loss": 3.3524, "step": 30695 }, { "epoch": 0.53, "learning_rate": 0.0002743891940003487, "loss": 3.3606, "step": 30700 }, { "epoch": 0.53, "learning_rate": 0.00027430723023116264, "loss": 3.2986, "step": 30705 }, { "epoch": 0.53, "learning_rate": 0.0002742252683939412, "loss": 3.2556, "step": 30710 }, { "epoch": 0.53, "learning_rate": 0.0002741433084948476, "loss": 3.28, "step": 30715 }, { "epoch": 0.53, "learning_rate": 0.00027406135054004456, "loss": 3.2094, "step": 30720 }, { "epoch": 0.53, "learning_rate": 0.00027397939453569506, "loss": 3.393, "step": 30725 }, { "epoch": 0.53, "learning_rate": 0.0002738974404879617, "loss": 3.2284, "step": 30730 }, { "epoch": 0.53, "learning_rate": 0.00027381548840300706, "loss": 3.2894, "step": 30735 }, { "epoch": 0.53, "learning_rate": 0.0002737335382869936, "loss": 3.3592, "step": 30740 }, { "epoch": 0.53, "learning_rate": 0.0002736515901460834, "loss": 3.2587, "step": 30745 }, { "epoch": 0.53, "learning_rate": 0.00027356964398643854, "loss": 3.2823, "step": 30750 }, { "epoch": 0.53, "learning_rate": 0.00027348769981422094, "loss": 3.3475, "step": 30755 }, { "epoch": 0.53, "learning_rate": 0.00027340575763559256, "loss": 3.3608, "step": 30760 }, { "epoch": 0.53, "learning_rate": 0.0002733238174567149, "loss": 3.3273, "step": 30765 }, { "epoch": 0.53, "learning_rate": 0.0002732418792837494, "loss": 3.241, "step": 30770 }, { "epoch": 0.53, "learning_rate": 0.00027315994312285746, "loss": 3.293, "step": 30775 }, { "epoch": 0.53, "learning_rate": 0.00027307800898020027, "loss": 3.1938, "step": 30780 }, { "epoch": 0.53, "learning_rate": 0.00027299607686193876, "loss": 3.3112, "step": 30785 }, { "epoch": 0.53, "learning_rate": 0.000272914146774234, "loss": 3.3405, "step": 30790 }, { "epoch": 0.53, "learning_rate": 0.0002728322187232465, "loss": 3.2946, "step": 30795 }, { "epoch": 0.53, "learning_rate": 0.00027275029271513695, "loss": 3.3734, "step": 30800 }, { "epoch": 0.53, "learning_rate": 0.00027266836875606567, "loss": 3.3478, "step": 30805 }, { "epoch": 0.53, "learning_rate": 0.000272586446852193, "loss": 3.2108, "step": 30810 }, { "epoch": 0.53, "learning_rate": 0.0002725045270096792, "loss": 3.2706, "step": 30815 }, { "epoch": 0.53, "learning_rate": 0.0002724226092346839, "loss": 3.3201, "step": 30820 }, { "epoch": 0.53, "learning_rate": 0.0002723406935333671, "loss": 3.2457, "step": 30825 }, { "epoch": 0.53, "learning_rate": 0.0002722587799118883, "loss": 3.2723, "step": 30830 }, { "epoch": 0.53, "learning_rate": 0.00027217686837640714, "loss": 3.3165, "step": 30835 }, { "epoch": 0.53, "learning_rate": 0.00027209495893308296, "loss": 3.2665, "step": 30840 }, { "epoch": 0.53, "learning_rate": 0.00027201305158807483, "loss": 3.3003, "step": 30845 }, { "epoch": 0.53, "learning_rate": 0.0002719311463475418, "loss": 3.2617, "step": 30850 }, { "epoch": 0.53, "learning_rate": 0.0002718492432176426, "loss": 3.3741, "step": 30855 }, { "epoch": 0.53, "learning_rate": 0.0002717673422045361, "loss": 3.4239, "step": 30860 }, { "epoch": 0.53, "learning_rate": 0.0002716854433143809, "loss": 3.3527, "step": 30865 }, { "epoch": 0.53, "learning_rate": 0.0002716035465533351, "loss": 3.3153, "step": 30870 }, { "epoch": 0.53, "learning_rate": 0.00027152165192755707, "loss": 3.2969, "step": 30875 }, { "epoch": 0.53, "learning_rate": 0.00027143975944320494, "loss": 3.3132, "step": 30880 }, { "epoch": 0.53, "learning_rate": 0.00027135786910643646, "loss": 3.238, "step": 30885 }, { "epoch": 0.53, "learning_rate": 0.0002712759809234096, "loss": 3.3268, "step": 30890 }, { "epoch": 0.53, "learning_rate": 0.0002711940949002816, "loss": 3.3004, "step": 30895 }, { "epoch": 0.53, "learning_rate": 0.00027111221104321004, "loss": 3.3266, "step": 30900 }, { "epoch": 0.53, "learning_rate": 0.0002710303293583522, "loss": 3.3325, "step": 30905 }, { "epoch": 0.53, "learning_rate": 0.0002709484498518651, "loss": 3.3649, "step": 30910 }, { "epoch": 0.54, "learning_rate": 0.0002708665725299058, "loss": 3.3601, "step": 30915 }, { "epoch": 0.54, "learning_rate": 0.0002707846973986308, "loss": 3.3293, "step": 30920 }, { "epoch": 0.54, "learning_rate": 0.0002707028244641969, "loss": 3.2142, "step": 30925 }, { "epoch": 0.54, "learning_rate": 0.00027062095373276036, "loss": 3.1972, "step": 30930 }, { "epoch": 0.54, "learning_rate": 0.0002705390852104776, "loss": 3.2593, "step": 30935 }, { "epoch": 0.54, "learning_rate": 0.0002704572189035047, "loss": 3.3917, "step": 30940 }, { "epoch": 0.54, "learning_rate": 0.0002703753548179976, "loss": 3.2728, "step": 30945 }, { "epoch": 0.54, "learning_rate": 0.0002702934929601118, "loss": 3.4312, "step": 30950 }, { "epoch": 0.54, "learning_rate": 0.0002702116333360031, "loss": 3.3181, "step": 30955 }, { "epoch": 0.54, "learning_rate": 0.000270129775951827, "loss": 3.3649, "step": 30960 }, { "epoch": 0.54, "learning_rate": 0.0002700479208137386, "loss": 3.3491, "step": 30965 }, { "epoch": 0.54, "learning_rate": 0.0002699660679278932, "loss": 3.194, "step": 30970 }, { "epoch": 0.54, "learning_rate": 0.00026988421730044545, "loss": 3.3569, "step": 30975 }, { "epoch": 0.54, "learning_rate": 0.0002698023689375502, "loss": 3.1581, "step": 30980 }, { "epoch": 0.54, "learning_rate": 0.0002697205228453621, "loss": 3.2575, "step": 30985 }, { "epoch": 0.54, "learning_rate": 0.00026963867903003543, "loss": 3.2312, "step": 30990 }, { "epoch": 0.54, "learning_rate": 0.00026955683749772466, "loss": 3.3679, "step": 30995 }, { "epoch": 0.54, "learning_rate": 0.0002694749982545835, "loss": 3.2296, "step": 31000 }, { "epoch": 0.54, "eval_loss": 3.32407808303833, "eval_runtime": 149.9754, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 31000 }, { "epoch": 0.54, "learning_rate": 0.00026939316130676616, "loss": 3.2916, "step": 31005 }, { "epoch": 0.54, "learning_rate": 0.00026931132666042614, "loss": 3.316, "step": 31010 }, { "epoch": 0.54, "learning_rate": 0.0002692294943217171, "loss": 3.183, "step": 31015 }, { "epoch": 0.54, "learning_rate": 0.0002691476642967925, "loss": 3.3019, "step": 31020 }, { "epoch": 0.54, "learning_rate": 0.0002690658365918054, "loss": 3.1547, "step": 31025 }, { "epoch": 0.54, "learning_rate": 0.00026898401121290875, "loss": 3.3068, "step": 31030 }, { "epoch": 0.54, "learning_rate": 0.0002689021881662555, "loss": 3.2116, "step": 31035 }, { "epoch": 0.54, "learning_rate": 0.0002688203674579984, "loss": 3.2976, "step": 31040 }, { "epoch": 0.54, "learning_rate": 0.0002687385490942899, "loss": 3.3166, "step": 31045 }, { "epoch": 0.54, "learning_rate": 0.00026865673308128224, "loss": 3.2547, "step": 31050 }, { "epoch": 0.54, "learning_rate": 0.0002685749194251276, "loss": 3.3457, "step": 31055 }, { "epoch": 0.54, "learning_rate": 0.000268493108131978, "loss": 3.2873, "step": 31060 }, { "epoch": 0.54, "learning_rate": 0.00026841129920798513, "loss": 3.3012, "step": 31065 }, { "epoch": 0.54, "learning_rate": 0.00026832949265930087, "loss": 3.1554, "step": 31070 }, { "epoch": 0.54, "learning_rate": 0.0002682476884920763, "loss": 3.4911, "step": 31075 }, { "epoch": 0.54, "learning_rate": 0.0002681658867124628, "loss": 3.2928, "step": 31080 }, { "epoch": 0.54, "learning_rate": 0.0002680840873266115, "loss": 3.0558, "step": 31085 }, { "epoch": 0.54, "learning_rate": 0.00026800229034067326, "loss": 3.2551, "step": 31090 }, { "epoch": 0.54, "learning_rate": 0.0002679204957607989, "loss": 3.2682, "step": 31095 }, { "epoch": 0.54, "learning_rate": 0.0002678387035931387, "loss": 3.3801, "step": 31100 }, { "epoch": 0.54, "learning_rate": 0.0002677569138438433, "loss": 3.195, "step": 31105 }, { "epoch": 0.54, "learning_rate": 0.00026767512651906263, "loss": 3.2651, "step": 31110 }, { "epoch": 0.54, "learning_rate": 0.0002675933416249468, "loss": 3.3951, "step": 31115 }, { "epoch": 0.54, "learning_rate": 0.00026751155916764573, "loss": 3.3247, "step": 31120 }, { "epoch": 0.54, "learning_rate": 0.00026742977915330884, "loss": 3.3234, "step": 31125 }, { "epoch": 0.54, "learning_rate": 0.00026734800158808555, "loss": 3.3103, "step": 31130 }, { "epoch": 0.54, "learning_rate": 0.00026726622647812526, "loss": 3.3172, "step": 31135 }, { "epoch": 0.54, "learning_rate": 0.00026718445382957695, "loss": 3.4098, "step": 31140 }, { "epoch": 0.54, "learning_rate": 0.00026710268364858963, "loss": 3.3378, "step": 31145 }, { "epoch": 0.54, "learning_rate": 0.00026702091594131187, "loss": 3.3174, "step": 31150 }, { "epoch": 0.54, "learning_rate": 0.00026693915071389216, "loss": 3.2108, "step": 31155 }, { "epoch": 0.54, "learning_rate": 0.00026685738797247886, "loss": 3.3841, "step": 31160 }, { "epoch": 0.54, "learning_rate": 0.00026677562772322016, "loss": 3.4254, "step": 31165 }, { "epoch": 0.54, "learning_rate": 0.0002666938699722641, "loss": 3.3229, "step": 31170 }, { "epoch": 0.54, "learning_rate": 0.0002666121147257582, "loss": 3.1803, "step": 31175 }, { "epoch": 0.54, "learning_rate": 0.00026653036198985016, "loss": 3.2278, "step": 31180 }, { "epoch": 0.54, "learning_rate": 0.00026644861177068746, "loss": 3.2767, "step": 31185 }, { "epoch": 0.54, "learning_rate": 0.0002663668640744171, "loss": 3.3793, "step": 31190 }, { "epoch": 0.54, "learning_rate": 0.0002662851189071862, "loss": 3.2449, "step": 31195 }, { "epoch": 0.54, "learning_rate": 0.00026620337627514165, "loss": 3.3237, "step": 31200 }, { "epoch": 0.54, "learning_rate": 0.00026612163618443003, "loss": 3.3144, "step": 31205 }, { "epoch": 0.54, "learning_rate": 0.0002660398986411976, "loss": 3.3152, "step": 31210 }, { "epoch": 0.54, "learning_rate": 0.0002659581636515908, "loss": 3.2679, "step": 31215 }, { "epoch": 0.54, "learning_rate": 0.0002658764312217557, "loss": 3.2625, "step": 31220 }, { "epoch": 0.54, "learning_rate": 0.0002657947013578381, "loss": 3.2968, "step": 31225 }, { "epoch": 0.54, "learning_rate": 0.0002657129740659836, "loss": 3.2104, "step": 31230 }, { "epoch": 0.54, "learning_rate": 0.0002656312493523377, "loss": 3.331, "step": 31235 }, { "epoch": 0.54, "learning_rate": 0.0002655495272230457, "loss": 3.2342, "step": 31240 }, { "epoch": 0.54, "learning_rate": 0.0002654678076842528, "loss": 3.2085, "step": 31245 }, { "epoch": 0.54, "learning_rate": 0.0002653860907421038, "loss": 3.311, "step": 31250 }, { "epoch": 0.54, "learning_rate": 0.0002653043764027433, "loss": 3.2037, "step": 31255 }, { "epoch": 0.54, "learning_rate": 0.0002652226646723159, "loss": 3.1527, "step": 31260 }, { "epoch": 0.54, "learning_rate": 0.0002651409555569659, "loss": 3.271, "step": 31265 }, { "epoch": 0.54, "learning_rate": 0.0002650592490628373, "loss": 3.3012, "step": 31270 }, { "epoch": 0.54, "learning_rate": 0.00026497754519607433, "loss": 3.3312, "step": 31275 }, { "epoch": 0.54, "learning_rate": 0.0002648958439628202, "loss": 3.3229, "step": 31280 }, { "epoch": 0.54, "learning_rate": 0.00026481414536921893, "loss": 3.2941, "step": 31285 }, { "epoch": 0.54, "learning_rate": 0.00026473244942141344, "loss": 3.2946, "step": 31290 }, { "epoch": 0.54, "learning_rate": 0.000264650756125547, "loss": 3.3225, "step": 31295 }, { "epoch": 0.54, "learning_rate": 0.0002645690654877627, "loss": 3.2401, "step": 31300 }, { "epoch": 0.54, "learning_rate": 0.00026448737751420305, "loss": 3.328, "step": 31305 }, { "epoch": 0.54, "learning_rate": 0.0002644056922110105, "loss": 3.3317, "step": 31310 }, { "epoch": 0.54, "learning_rate": 0.0002643240095843275, "loss": 3.2121, "step": 31315 }, { "epoch": 0.54, "learning_rate": 0.0002642423296402962, "loss": 3.2404, "step": 31320 }, { "epoch": 0.54, "learning_rate": 0.00026416065238505846, "loss": 3.2064, "step": 31325 }, { "epoch": 0.54, "learning_rate": 0.000264078977824756, "loss": 3.261, "step": 31330 }, { "epoch": 0.54, "learning_rate": 0.00026399730596553023, "loss": 3.174, "step": 31335 }, { "epoch": 0.54, "learning_rate": 0.0002639156368135226, "loss": 3.3145, "step": 31340 }, { "epoch": 0.54, "learning_rate": 0.00026383397037487414, "loss": 3.4055, "step": 31345 }, { "epoch": 0.54, "learning_rate": 0.00026375230665572584, "loss": 3.2682, "step": 31350 }, { "epoch": 0.54, "learning_rate": 0.0002636706456622182, "loss": 3.3942, "step": 31355 }, { "epoch": 0.54, "learning_rate": 0.0002635889874004918, "loss": 3.1798, "step": 31360 }, { "epoch": 0.54, "learning_rate": 0.00026350733187668706, "loss": 3.2755, "step": 31365 }, { "epoch": 0.54, "learning_rate": 0.00026342567909694385, "loss": 3.1718, "step": 31370 }, { "epoch": 0.54, "learning_rate": 0.00026334402906740223, "loss": 3.1946, "step": 31375 }, { "epoch": 0.54, "learning_rate": 0.00026326238179420167, "loss": 3.2054, "step": 31380 }, { "epoch": 0.54, "learning_rate": 0.0002631807372834818, "loss": 3.257, "step": 31385 }, { "epoch": 0.54, "learning_rate": 0.0002630990955413817, "loss": 3.3149, "step": 31390 }, { "epoch": 0.54, "learning_rate": 0.00026301745657404045, "loss": 3.3137, "step": 31395 }, { "epoch": 0.54, "learning_rate": 0.00026293582038759716, "loss": 3.3703, "step": 31400 }, { "epoch": 0.54, "learning_rate": 0.00026285418698819004, "loss": 3.2994, "step": 31405 }, { "epoch": 0.54, "learning_rate": 0.00026277255638195774, "loss": 3.2856, "step": 31410 }, { "epoch": 0.54, "learning_rate": 0.00026269092857503836, "loss": 3.3122, "step": 31415 }, { "epoch": 0.54, "learning_rate": 0.00026260930357357, "loss": 3.2749, "step": 31420 }, { "epoch": 0.54, "learning_rate": 0.00026252768138369044, "loss": 3.161, "step": 31425 }, { "epoch": 0.54, "learning_rate": 0.0002624460620115372, "loss": 3.2431, "step": 31430 }, { "epoch": 0.54, "learning_rate": 0.0002623644454632475, "loss": 3.2436, "step": 31435 }, { "epoch": 0.54, "learning_rate": 0.0002622828317449587, "loss": 3.3021, "step": 31440 }, { "epoch": 0.54, "learning_rate": 0.00026220122086280764, "loss": 3.2542, "step": 31445 }, { "epoch": 0.54, "learning_rate": 0.0002621196128229311, "loss": 3.2806, "step": 31450 }, { "epoch": 0.54, "learning_rate": 0.0002620380076314656, "loss": 3.2708, "step": 31455 }, { "epoch": 0.54, "learning_rate": 0.00026195640529454726, "loss": 3.3919, "step": 31460 }, { "epoch": 0.54, "learning_rate": 0.0002618748058183123, "loss": 3.2865, "step": 31465 }, { "epoch": 0.54, "learning_rate": 0.0002617932092088965, "loss": 3.2749, "step": 31470 }, { "epoch": 0.54, "learning_rate": 0.0002617116154724356, "loss": 3.2656, "step": 31475 }, { "epoch": 0.54, "learning_rate": 0.00026163002461506503, "loss": 3.212, "step": 31480 }, { "epoch": 0.54, "learning_rate": 0.00026154843664291997, "loss": 3.2677, "step": 31485 }, { "epoch": 0.54, "learning_rate": 0.0002614668515621354, "loss": 3.2452, "step": 31490 }, { "epoch": 0.55, "learning_rate": 0.00026138526937884604, "loss": 3.2805, "step": 31495 }, { "epoch": 0.55, "learning_rate": 0.0002613036900991865, "loss": 3.3033, "step": 31500 }, { "epoch": 0.55, "eval_loss": 3.316983938217163, "eval_runtime": 150.2836, "eval_samples_per_second": 12.25, "eval_steps_per_second": 0.772, "step": 31500 }, { "epoch": 0.55, "learning_rate": 0.00026122211372929137, "loss": 3.3315, "step": 31505 }, { "epoch": 0.55, "learning_rate": 0.00026114054027529443, "loss": 3.3347, "step": 31510 }, { "epoch": 0.55, "learning_rate": 0.0002610589697433297, "loss": 3.3488, "step": 31515 }, { "epoch": 0.55, "learning_rate": 0.0002609774021395308, "loss": 3.3674, "step": 31520 }, { "epoch": 0.55, "learning_rate": 0.00026089583747003134, "loss": 3.1443, "step": 31525 }, { "epoch": 0.55, "learning_rate": 0.0002608142757409646, "loss": 3.3235, "step": 31530 }, { "epoch": 0.55, "learning_rate": 0.00026073271695846337, "loss": 3.2616, "step": 31535 }, { "epoch": 0.55, "learning_rate": 0.0002606511611286606, "loss": 3.328, "step": 31540 }, { "epoch": 0.55, "learning_rate": 0.0002605696082576889, "loss": 3.339, "step": 31545 }, { "epoch": 0.55, "learning_rate": 0.0002604880583516805, "loss": 3.2483, "step": 31550 }, { "epoch": 0.55, "learning_rate": 0.00026040651141676774, "loss": 3.3018, "step": 31555 }, { "epoch": 0.55, "learning_rate": 0.0002603249674590823, "loss": 3.3246, "step": 31560 }, { "epoch": 0.55, "learning_rate": 0.00026024342648475594, "loss": 3.4165, "step": 31565 }, { "epoch": 0.55, "learning_rate": 0.00026016188849992025, "loss": 3.2257, "step": 31570 }, { "epoch": 0.55, "learning_rate": 0.0002600803535107063, "loss": 3.2111, "step": 31575 }, { "epoch": 0.55, "learning_rate": 0.0002599988215232453, "loss": 3.2358, "step": 31580 }, { "epoch": 0.55, "learning_rate": 0.00025991729254366786, "loss": 3.3897, "step": 31585 }, { "epoch": 0.55, "learning_rate": 0.00025983576657810463, "loss": 3.2663, "step": 31590 }, { "epoch": 0.55, "learning_rate": 0.0002597542436326858, "loss": 3.2689, "step": 31595 }, { "epoch": 0.55, "learning_rate": 0.00025967272371354164, "loss": 3.2168, "step": 31600 }, { "epoch": 0.55, "learning_rate": 0.00025959120682680215, "loss": 3.2391, "step": 31605 }, { "epoch": 0.55, "learning_rate": 0.00025950969297859665, "loss": 3.2852, "step": 31610 }, { "epoch": 0.55, "learning_rate": 0.00025942818217505474, "loss": 3.3367, "step": 31615 }, { "epoch": 0.55, "learning_rate": 0.0002593466744223056, "loss": 3.283, "step": 31620 }, { "epoch": 0.55, "learning_rate": 0.00025926516972647827, "loss": 3.3326, "step": 31625 }, { "epoch": 0.55, "learning_rate": 0.0002591836680937014, "loss": 3.3459, "step": 31630 }, { "epoch": 0.55, "learning_rate": 0.00025910216953010343, "loss": 3.4016, "step": 31635 }, { "epoch": 0.55, "learning_rate": 0.00025902067404181276, "loss": 3.2182, "step": 31640 }, { "epoch": 0.55, "learning_rate": 0.0002589391816349574, "loss": 3.2924, "step": 31645 }, { "epoch": 0.55, "learning_rate": 0.0002588576923156651, "loss": 3.3283, "step": 31650 }, { "epoch": 0.55, "learning_rate": 0.0002587762060900634, "loss": 3.346, "step": 31655 }, { "epoch": 0.55, "learning_rate": 0.00025869472296428, "loss": 3.1884, "step": 31660 }, { "epoch": 0.55, "learning_rate": 0.00025861324294444157, "loss": 3.2707, "step": 31665 }, { "epoch": 0.55, "learning_rate": 0.0002585317660366752, "loss": 3.2709, "step": 31670 }, { "epoch": 0.55, "learning_rate": 0.00025845029224710745, "loss": 3.3347, "step": 31675 }, { "epoch": 0.55, "learning_rate": 0.0002583688215818648, "loss": 3.268, "step": 31680 }, { "epoch": 0.55, "learning_rate": 0.00025828735404707356, "loss": 3.2481, "step": 31685 }, { "epoch": 0.55, "learning_rate": 0.0002582058896488594, "loss": 3.3265, "step": 31690 }, { "epoch": 0.55, "learning_rate": 0.0002581244283933481, "loss": 3.2392, "step": 31695 }, { "epoch": 0.55, "learning_rate": 0.0002580429702866652, "loss": 3.3131, "step": 31700 }, { "epoch": 0.55, "learning_rate": 0.000257961515334936, "loss": 3.2571, "step": 31705 }, { "epoch": 0.55, "learning_rate": 0.0002578800635442854, "loss": 3.3224, "step": 31710 }, { "epoch": 0.55, "learning_rate": 0.00025779861492083805, "loss": 3.3081, "step": 31715 }, { "epoch": 0.55, "learning_rate": 0.0002577171694707186, "loss": 3.3205, "step": 31720 }, { "epoch": 0.55, "learning_rate": 0.00025763572720005135, "loss": 3.1753, "step": 31725 }, { "epoch": 0.55, "learning_rate": 0.00025755428811496023, "loss": 3.2786, "step": 31730 }, { "epoch": 0.55, "learning_rate": 0.0002574728522215693, "loss": 3.1329, "step": 31735 }, { "epoch": 0.55, "learning_rate": 0.00025739141952600176, "loss": 3.1854, "step": 31740 }, { "epoch": 0.55, "learning_rate": 0.0002573099900343811, "loss": 3.2668, "step": 31745 }, { "epoch": 0.55, "learning_rate": 0.0002572285637528305, "loss": 3.3433, "step": 31750 }, { "epoch": 0.55, "learning_rate": 0.00025714714068747263, "loss": 3.4145, "step": 31755 }, { "epoch": 0.55, "learning_rate": 0.00025706572084443027, "loss": 3.4166, "step": 31760 }, { "epoch": 0.55, "learning_rate": 0.00025698430422982555, "loss": 3.2479, "step": 31765 }, { "epoch": 0.55, "learning_rate": 0.00025690289084978076, "loss": 3.2204, "step": 31770 }, { "epoch": 0.55, "learning_rate": 0.00025682148071041767, "loss": 3.1905, "step": 31775 }, { "epoch": 0.55, "learning_rate": 0.0002567400738178579, "loss": 3.3027, "step": 31780 }, { "epoch": 0.55, "learning_rate": 0.0002566586701782231, "loss": 3.3398, "step": 31785 }, { "epoch": 0.55, "learning_rate": 0.000256577269797634, "loss": 3.3163, "step": 31790 }, { "epoch": 0.55, "learning_rate": 0.0002564958726822117, "loss": 3.2045, "step": 31795 }, { "epoch": 0.55, "learning_rate": 0.0002564144788380767, "loss": 3.313, "step": 31800 }, { "epoch": 0.55, "learning_rate": 0.00025633308827134973, "loss": 3.2722, "step": 31805 }, { "epoch": 0.55, "learning_rate": 0.00025625170098815065, "loss": 3.3247, "step": 31810 }, { "epoch": 0.55, "learning_rate": 0.00025617031699459947, "loss": 3.3358, "step": 31815 }, { "epoch": 0.55, "learning_rate": 0.00025608893629681576, "loss": 3.0963, "step": 31820 }, { "epoch": 0.55, "learning_rate": 0.00025600755890091897, "loss": 3.2378, "step": 31825 }, { "epoch": 0.55, "learning_rate": 0.0002559261848130284, "loss": 3.2526, "step": 31830 }, { "epoch": 0.55, "learning_rate": 0.0002558448140392629, "loss": 3.3501, "step": 31835 }, { "epoch": 0.55, "learning_rate": 0.000255763446585741, "loss": 3.1792, "step": 31840 }, { "epoch": 0.55, "learning_rate": 0.00025568208245858116, "loss": 3.27, "step": 31845 }, { "epoch": 0.55, "learning_rate": 0.00025560072166390163, "loss": 3.1158, "step": 31850 }, { "epoch": 0.55, "learning_rate": 0.0002555193642078203, "loss": 3.289, "step": 31855 }, { "epoch": 0.55, "learning_rate": 0.0002554380100964549, "loss": 3.2798, "step": 31860 }, { "epoch": 0.55, "learning_rate": 0.00025535665933592265, "loss": 3.3566, "step": 31865 }, { "epoch": 0.55, "learning_rate": 0.0002552753119323409, "loss": 3.3437, "step": 31870 }, { "epoch": 0.55, "learning_rate": 0.00025519396789182644, "loss": 3.2963, "step": 31875 }, { "epoch": 0.55, "learning_rate": 0.0002551126272204959, "loss": 3.3405, "step": 31880 }, { "epoch": 0.55, "learning_rate": 0.00025503128992446586, "loss": 3.2588, "step": 31885 }, { "epoch": 0.55, "learning_rate": 0.0002549499560098524, "loss": 3.2521, "step": 31890 }, { "epoch": 0.55, "learning_rate": 0.00025486862548277126, "loss": 3.2144, "step": 31895 }, { "epoch": 0.55, "learning_rate": 0.0002547872983493382, "loss": 3.3133, "step": 31900 }, { "epoch": 0.55, "learning_rate": 0.00025470597461566864, "loss": 3.2138, "step": 31905 }, { "epoch": 0.55, "learning_rate": 0.00025462465428787763, "loss": 3.2776, "step": 31910 }, { "epoch": 0.55, "learning_rate": 0.0002545433373720801, "loss": 3.2242, "step": 31915 }, { "epoch": 0.55, "learning_rate": 0.0002544620238743906, "loss": 3.2321, "step": 31920 }, { "epoch": 0.55, "learning_rate": 0.00025438071380092355, "loss": 3.2964, "step": 31925 }, { "epoch": 0.55, "learning_rate": 0.00025429940715779304, "loss": 3.2827, "step": 31930 }, { "epoch": 0.55, "learning_rate": 0.0002542181039511128, "loss": 3.265, "step": 31935 }, { "epoch": 0.55, "learning_rate": 0.00025413680418699675, "loss": 3.2511, "step": 31940 }, { "epoch": 0.55, "learning_rate": 0.00025405550787155774, "loss": 3.3294, "step": 31945 }, { "epoch": 0.55, "learning_rate": 0.0002539742150109092, "loss": 3.3519, "step": 31950 }, { "epoch": 0.55, "learning_rate": 0.0002538929256111637, "loss": 3.3194, "step": 31955 }, { "epoch": 0.55, "learning_rate": 0.000253811639678434, "loss": 3.3208, "step": 31960 }, { "epoch": 0.55, "learning_rate": 0.00025373035721883234, "loss": 3.2799, "step": 31965 }, { "epoch": 0.55, "learning_rate": 0.0002536490782384706, "loss": 3.3969, "step": 31970 }, { "epoch": 0.55, "learning_rate": 0.00025356780274346063, "loss": 3.261, "step": 31975 }, { "epoch": 0.55, "learning_rate": 0.00025348653073991386, "loss": 3.1804, "step": 31980 }, { "epoch": 0.55, "learning_rate": 0.0002534052622339417, "loss": 3.322, "step": 31985 }, { "epoch": 0.55, "learning_rate": 0.00025332399723165507, "loss": 3.2808, "step": 31990 }, { "epoch": 0.55, "learning_rate": 0.0002532427357391646, "loss": 3.266, "step": 31995 }, { "epoch": 0.55, "learning_rate": 0.0002531614777625807, "loss": 3.3233, "step": 32000 }, { "epoch": 0.55, "eval_loss": 3.3087377548217773, "eval_runtime": 149.8766, "eval_samples_per_second": 12.283, "eval_steps_per_second": 0.774, "step": 32000 }, { "epoch": 0.55, "learning_rate": 0.0002530802233080136, "loss": 3.4058, "step": 32005 }, { "epoch": 0.55, "learning_rate": 0.00025299897238157337, "loss": 3.3462, "step": 32010 }, { "epoch": 0.55, "learning_rate": 0.0002529177249893696, "loss": 3.3441, "step": 32015 }, { "epoch": 0.55, "learning_rate": 0.0002528364811375115, "loss": 3.3583, "step": 32020 }, { "epoch": 0.55, "learning_rate": 0.0002527552408321083, "loss": 3.2635, "step": 32025 }, { "epoch": 0.55, "learning_rate": 0.00025267400407926886, "loss": 3.2558, "step": 32030 }, { "epoch": 0.55, "learning_rate": 0.0002525927708851018, "loss": 3.3147, "step": 32035 }, { "epoch": 0.55, "learning_rate": 0.00025251154125571545, "loss": 3.2104, "step": 32040 }, { "epoch": 0.55, "learning_rate": 0.00025243031519721777, "loss": 3.4015, "step": 32045 }, { "epoch": 0.55, "learning_rate": 0.00025234909271571663, "loss": 3.1764, "step": 32050 }, { "epoch": 0.55, "learning_rate": 0.0002522678738173195, "loss": 3.282, "step": 32055 }, { "epoch": 0.55, "learning_rate": 0.00025218665850813355, "loss": 3.2438, "step": 32060 }, { "epoch": 0.55, "learning_rate": 0.000252105446794266, "loss": 3.2136, "step": 32065 }, { "epoch": 0.56, "learning_rate": 0.0002520242386818233, "loss": 3.3125, "step": 32070 }, { "epoch": 0.56, "learning_rate": 0.000251943034176912, "loss": 3.3091, "step": 32075 }, { "epoch": 0.56, "learning_rate": 0.00025186183328563817, "loss": 3.2398, "step": 32080 }, { "epoch": 0.56, "learning_rate": 0.0002517806360141078, "loss": 3.2099, "step": 32085 }, { "epoch": 0.56, "learning_rate": 0.00025169944236842664, "loss": 3.3446, "step": 32090 }, { "epoch": 0.56, "learning_rate": 0.00025161825235469975, "loss": 3.3282, "step": 32095 }, { "epoch": 0.56, "learning_rate": 0.0002515370659790323, "loss": 3.2385, "step": 32100 }, { "epoch": 0.56, "learning_rate": 0.00025145588324752914, "loss": 3.3214, "step": 32105 }, { "epoch": 0.56, "learning_rate": 0.0002513747041662948, "loss": 3.2566, "step": 32110 }, { "epoch": 0.56, "learning_rate": 0.00025129352874143343, "loss": 3.329, "step": 32115 }, { "epoch": 0.56, "learning_rate": 0.00025121235697904925, "loss": 3.2985, "step": 32120 }, { "epoch": 0.56, "learning_rate": 0.00025113118888524564, "loss": 3.2731, "step": 32125 }, { "epoch": 0.56, "learning_rate": 0.0002510500244661263, "loss": 3.2967, "step": 32130 }, { "epoch": 0.56, "learning_rate": 0.0002509688637277941, "loss": 3.2774, "step": 32135 }, { "epoch": 0.56, "learning_rate": 0.00025088770667635213, "loss": 3.2615, "step": 32140 }, { "epoch": 0.56, "learning_rate": 0.000250806553317903, "loss": 3.2842, "step": 32145 }, { "epoch": 0.56, "learning_rate": 0.00025072540365854895, "loss": 3.1892, "step": 32150 }, { "epoch": 0.56, "learning_rate": 0.0002506442577043919, "loss": 3.1842, "step": 32155 }, { "epoch": 0.56, "learning_rate": 0.00025056311546153376, "loss": 3.246, "step": 32160 }, { "epoch": 0.56, "learning_rate": 0.000250481976936076, "loss": 3.3429, "step": 32165 }, { "epoch": 0.56, "learning_rate": 0.00025040084213411983, "loss": 3.2132, "step": 32170 }, { "epoch": 0.56, "learning_rate": 0.0002503197110617661, "loss": 3.3142, "step": 32175 }, { "epoch": 0.56, "learning_rate": 0.0002502385837251155, "loss": 3.2998, "step": 32180 }, { "epoch": 0.56, "learning_rate": 0.00025015746013026834, "loss": 3.1796, "step": 32185 }, { "epoch": 0.56, "learning_rate": 0.0002500763402833248, "loss": 3.2882, "step": 32190 }, { "epoch": 0.56, "learning_rate": 0.00024999522419038466, "loss": 3.2778, "step": 32195 }, { "epoch": 0.56, "learning_rate": 0.0002499141118575473, "loss": 3.2608, "step": 32200 }, { "epoch": 0.56, "learning_rate": 0.00024983300329091206, "loss": 3.2805, "step": 32205 }, { "epoch": 0.56, "learning_rate": 0.00024975189849657785, "loss": 3.3096, "step": 32210 }, { "epoch": 0.56, "learning_rate": 0.0002496707974806434, "loss": 3.2804, "step": 32215 }, { "epoch": 0.56, "learning_rate": 0.0002495897002492071, "loss": 3.3137, "step": 32220 }, { "epoch": 0.56, "learning_rate": 0.0002495086068083669, "loss": 3.3505, "step": 32225 }, { "epoch": 0.56, "learning_rate": 0.0002494275171642207, "loss": 3.2128, "step": 32230 }, { "epoch": 0.56, "learning_rate": 0.00024934643132286605, "loss": 3.3917, "step": 32235 }, { "epoch": 0.56, "learning_rate": 0.00024926534929040013, "loss": 3.2804, "step": 32240 }, { "epoch": 0.56, "learning_rate": 0.00024918427107292003, "loss": 3.309, "step": 32245 }, { "epoch": 0.56, "learning_rate": 0.00024910319667652227, "loss": 3.2208, "step": 32250 }, { "epoch": 0.56, "learning_rate": 0.00024902212610730326, "loss": 3.3962, "step": 32255 }, { "epoch": 0.56, "learning_rate": 0.0002489410593713591, "loss": 3.3368, "step": 32260 }, { "epoch": 0.56, "learning_rate": 0.0002488599964747856, "loss": 3.2554, "step": 32265 }, { "epoch": 0.56, "learning_rate": 0.0002487789374236784, "loss": 3.3785, "step": 32270 }, { "epoch": 0.56, "learning_rate": 0.00024869788222413254, "loss": 3.2223, "step": 32275 }, { "epoch": 0.56, "learning_rate": 0.0002486168308822429, "loss": 3.1955, "step": 32280 }, { "epoch": 0.56, "learning_rate": 0.0002485357834041043, "loss": 3.2661, "step": 32285 }, { "epoch": 0.56, "learning_rate": 0.0002484547397958111, "loss": 3.3976, "step": 32290 }, { "epoch": 0.56, "learning_rate": 0.0002483737000634574, "loss": 3.3036, "step": 32295 }, { "epoch": 0.56, "learning_rate": 0.00024829266421313664, "loss": 3.2412, "step": 32300 }, { "epoch": 0.56, "learning_rate": 0.00024821163225094265, "loss": 3.2499, "step": 32305 }, { "epoch": 0.56, "learning_rate": 0.00024813060418296846, "loss": 3.2339, "step": 32310 }, { "epoch": 0.56, "learning_rate": 0.000248049580015307, "loss": 3.2515, "step": 32315 }, { "epoch": 0.56, "learning_rate": 0.000247968559754051, "loss": 3.2655, "step": 32320 }, { "epoch": 0.56, "learning_rate": 0.0002478875434052925, "loss": 3.1095, "step": 32325 }, { "epoch": 0.56, "learning_rate": 0.0002478065309751237, "loss": 3.322, "step": 32330 }, { "epoch": 0.56, "learning_rate": 0.0002477255224696364, "loss": 3.2905, "step": 32335 }, { "epoch": 0.56, "learning_rate": 0.0002476445178949218, "loss": 3.2749, "step": 32340 }, { "epoch": 0.56, "learning_rate": 0.00024756351725707107, "loss": 3.2526, "step": 32345 }, { "epoch": 0.56, "learning_rate": 0.00024748252056217533, "loss": 3.3218, "step": 32350 }, { "epoch": 0.56, "learning_rate": 0.0002474015278163248, "loss": 3.3869, "step": 32355 }, { "epoch": 0.56, "learning_rate": 0.0002473205390256097, "loss": 3.2438, "step": 32360 }, { "epoch": 0.56, "learning_rate": 0.00024723955419612016, "loss": 3.3304, "step": 32365 }, { "epoch": 0.56, "learning_rate": 0.0002471585733339458, "loss": 3.2835, "step": 32370 }, { "epoch": 0.56, "learning_rate": 0.0002470775964451759, "loss": 3.1467, "step": 32375 }, { "epoch": 0.56, "learning_rate": 0.0002469966235358995, "loss": 3.322, "step": 32380 }, { "epoch": 0.56, "learning_rate": 0.0002469156546122053, "loss": 3.1773, "step": 32385 }, { "epoch": 0.56, "learning_rate": 0.00024683468968018197, "loss": 3.1448, "step": 32390 }, { "epoch": 0.56, "learning_rate": 0.0002467537287459174, "loss": 3.2498, "step": 32395 }, { "epoch": 0.56, "learning_rate": 0.00024667277181549967, "loss": 3.2981, "step": 32400 }, { "epoch": 0.56, "learning_rate": 0.00024659181889501603, "loss": 3.3788, "step": 32405 }, { "epoch": 0.56, "learning_rate": 0.000246510869990554, "loss": 3.2933, "step": 32410 }, { "epoch": 0.56, "learning_rate": 0.0002464299251082004, "loss": 3.3849, "step": 32415 }, { "epoch": 0.56, "learning_rate": 0.0002463489842540418, "loss": 3.3092, "step": 32420 }, { "epoch": 0.56, "learning_rate": 0.00024626804743416484, "loss": 3.3249, "step": 32425 }, { "epoch": 0.56, "learning_rate": 0.0002461871146546551, "loss": 3.2616, "step": 32430 }, { "epoch": 0.56, "learning_rate": 0.00024610618592159876, "loss": 3.1845, "step": 32435 }, { "epoch": 0.56, "learning_rate": 0.0002460252612410809, "loss": 3.3216, "step": 32440 }, { "epoch": 0.56, "learning_rate": 0.0002459443406191868, "loss": 3.3316, "step": 32445 }, { "epoch": 0.56, "learning_rate": 0.0002458634240620013, "loss": 3.1841, "step": 32450 }, { "epoch": 0.56, "learning_rate": 0.00024578251157560886, "loss": 3.3593, "step": 32455 }, { "epoch": 0.56, "learning_rate": 0.00024570160316609365, "loss": 3.263, "step": 32460 }, { "epoch": 0.56, "learning_rate": 0.00024562069883953956, "loss": 3.2655, "step": 32465 }, { "epoch": 0.56, "learning_rate": 0.00024553979860203034, "loss": 3.1221, "step": 32470 }, { "epoch": 0.56, "learning_rate": 0.0002454589024596492, "loss": 3.1684, "step": 32475 }, { "epoch": 0.56, "learning_rate": 0.00024537801041847894, "loss": 3.2231, "step": 32480 }, { "epoch": 0.56, "learning_rate": 0.0002452971224846024, "loss": 3.3318, "step": 32485 }, { "epoch": 0.56, "learning_rate": 0.0002452162386641019, "loss": 3.2675, "step": 32490 }, { "epoch": 0.56, "learning_rate": 0.0002451353589630595, "loss": 3.2606, "step": 32495 }, { "epoch": 0.56, "learning_rate": 0.00024505448338755706, "loss": 3.2408, "step": 32500 }, { "epoch": 0.56, "eval_loss": 3.3065123558044434, "eval_runtime": 149.9745, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 32500 }, { "epoch": 0.56, "learning_rate": 0.00024497361194367564, "loss": 3.2712, "step": 32505 }, { "epoch": 0.56, "learning_rate": 0.0002448927446374967, "loss": 3.2798, "step": 32510 }, { "epoch": 0.56, "learning_rate": 0.00024481188147510097, "loss": 3.2018, "step": 32515 }, { "epoch": 0.56, "learning_rate": 0.0002447310224625689, "loss": 3.2207, "step": 32520 }, { "epoch": 0.56, "learning_rate": 0.00024465016760598075, "loss": 3.3062, "step": 32525 }, { "epoch": 0.56, "learning_rate": 0.00024456931691141625, "loss": 3.2997, "step": 32530 }, { "epoch": 0.56, "learning_rate": 0.00024448847038495506, "loss": 3.2389, "step": 32535 }, { "epoch": 0.56, "learning_rate": 0.0002444076280326764, "loss": 3.2905, "step": 32540 }, { "epoch": 0.56, "learning_rate": 0.00024432678986065914, "loss": 3.1733, "step": 32545 }, { "epoch": 0.56, "learning_rate": 0.00024424595587498217, "loss": 3.2917, "step": 32550 }, { "epoch": 0.56, "learning_rate": 0.0002441651260817235, "loss": 3.262, "step": 32555 }, { "epoch": 0.56, "learning_rate": 0.0002440843004869611, "loss": 3.2987, "step": 32560 }, { "epoch": 0.56, "learning_rate": 0.0002440034790967727, "loss": 3.3115, "step": 32565 }, { "epoch": 0.56, "learning_rate": 0.00024392266191723583, "loss": 3.2615, "step": 32570 }, { "epoch": 0.56, "learning_rate": 0.0002438418489544273, "loss": 3.3166, "step": 32575 }, { "epoch": 0.56, "learning_rate": 0.00024376104021442408, "loss": 3.3056, "step": 32580 }, { "epoch": 0.56, "learning_rate": 0.00024368023570330225, "loss": 3.236, "step": 32585 }, { "epoch": 0.56, "learning_rate": 0.00024359943542713805, "loss": 3.1418, "step": 32590 }, { "epoch": 0.56, "learning_rate": 0.00024351863939200735, "loss": 3.3424, "step": 32595 }, { "epoch": 0.56, "learning_rate": 0.00024343784760398545, "loss": 3.2941, "step": 32600 }, { "epoch": 0.56, "learning_rate": 0.00024335706006914766, "loss": 3.2504, "step": 32605 }, { "epoch": 0.56, "learning_rate": 0.00024327627679356854, "loss": 3.1296, "step": 32610 }, { "epoch": 0.56, "learning_rate": 0.00024319549778332273, "loss": 3.1769, "step": 32615 }, { "epoch": 0.56, "learning_rate": 0.00024311472304448432, "loss": 3.2691, "step": 32620 }, { "epoch": 0.56, "learning_rate": 0.00024303395258312724, "loss": 3.2445, "step": 32625 }, { "epoch": 0.56, "learning_rate": 0.00024295318640532506, "loss": 3.2382, "step": 32630 }, { "epoch": 0.56, "learning_rate": 0.00024287242451715086, "loss": 3.1444, "step": 32635 }, { "epoch": 0.56, "learning_rate": 0.0002427916669246775, "loss": 3.3026, "step": 32640 }, { "epoch": 0.56, "learning_rate": 0.00024271091363397757, "loss": 3.2935, "step": 32645 }, { "epoch": 0.57, "learning_rate": 0.00024263016465112344, "loss": 3.1797, "step": 32650 }, { "epoch": 0.57, "learning_rate": 0.00024254941998218694, "loss": 3.2365, "step": 32655 }, { "epoch": 0.57, "learning_rate": 0.00024246867963323952, "loss": 3.3014, "step": 32660 }, { "epoch": 0.57, "learning_rate": 0.00024238794361035253, "loss": 3.2298, "step": 32665 }, { "epoch": 0.57, "learning_rate": 0.00024230721191959703, "loss": 3.2835, "step": 32670 }, { "epoch": 0.57, "learning_rate": 0.0002422264845670434, "loss": 3.2102, "step": 32675 }, { "epoch": 0.57, "learning_rate": 0.0002421457615587622, "loss": 3.329, "step": 32680 }, { "epoch": 0.57, "learning_rate": 0.00024206504290082313, "loss": 3.2735, "step": 32685 }, { "epoch": 0.57, "learning_rate": 0.0002419843285992959, "loss": 3.2008, "step": 32690 }, { "epoch": 0.57, "learning_rate": 0.0002419036186602499, "loss": 3.263, "step": 32695 }, { "epoch": 0.57, "learning_rate": 0.00024182291308975394, "loss": 3.3097, "step": 32700 }, { "epoch": 0.57, "learning_rate": 0.00024174221189387693, "loss": 3.202, "step": 32705 }, { "epoch": 0.57, "learning_rate": 0.00024166151507868687, "loss": 3.2625, "step": 32710 }, { "epoch": 0.57, "learning_rate": 0.000241580822650252, "loss": 3.2967, "step": 32715 }, { "epoch": 0.57, "learning_rate": 0.0002415001346146398, "loss": 3.1632, "step": 32720 }, { "epoch": 0.57, "learning_rate": 0.00024141945097791765, "loss": 3.2288, "step": 32725 }, { "epoch": 0.57, "learning_rate": 0.00024133877174615267, "loss": 3.2721, "step": 32730 }, { "epoch": 0.57, "learning_rate": 0.0002412580969254114, "loss": 3.2332, "step": 32735 }, { "epoch": 0.57, "learning_rate": 0.00024117742652176007, "loss": 3.2993, "step": 32740 }, { "epoch": 0.57, "learning_rate": 0.00024109676054126486, "loss": 3.3246, "step": 32745 }, { "epoch": 0.57, "learning_rate": 0.00024101609898999145, "loss": 3.2481, "step": 32750 }, { "epoch": 0.57, "learning_rate": 0.00024093544187400511, "loss": 3.1872, "step": 32755 }, { "epoch": 0.57, "learning_rate": 0.00024085478919937082, "loss": 3.1915, "step": 32760 }, { "epoch": 0.57, "learning_rate": 0.0002407741409721532, "loss": 3.2906, "step": 32765 }, { "epoch": 0.57, "learning_rate": 0.00024069349719841663, "loss": 3.3152, "step": 32770 }, { "epoch": 0.57, "learning_rate": 0.00024061285788422524, "loss": 3.3742, "step": 32775 }, { "epoch": 0.57, "learning_rate": 0.00024053222303564264, "loss": 3.2066, "step": 32780 }, { "epoch": 0.57, "learning_rate": 0.00024045159265873194, "loss": 3.2123, "step": 32785 }, { "epoch": 0.57, "learning_rate": 0.00024037096675955634, "loss": 3.3466, "step": 32790 }, { "epoch": 0.57, "learning_rate": 0.00024029034534417848, "loss": 3.3364, "step": 32795 }, { "epoch": 0.57, "learning_rate": 0.00024020972841866056, "loss": 3.263, "step": 32800 }, { "epoch": 0.57, "learning_rate": 0.00024012911598906466, "loss": 3.138, "step": 32805 }, { "epoch": 0.57, "learning_rate": 0.00024004850806145256, "loss": 3.308, "step": 32810 }, { "epoch": 0.57, "learning_rate": 0.0002399679046418853, "loss": 3.3403, "step": 32815 }, { "epoch": 0.57, "learning_rate": 0.0002398873057364239, "loss": 3.3254, "step": 32820 }, { "epoch": 0.57, "learning_rate": 0.00023980671135112906, "loss": 3.3335, "step": 32825 }, { "epoch": 0.57, "learning_rate": 0.00023972612149206107, "loss": 3.1811, "step": 32830 }, { "epoch": 0.57, "learning_rate": 0.00023964553616527987, "loss": 3.2545, "step": 32835 }, { "epoch": 0.57, "learning_rate": 0.00023956495537684504, "loss": 3.2524, "step": 32840 }, { "epoch": 0.57, "learning_rate": 0.00023948437913281576, "loss": 3.187, "step": 32845 }, { "epoch": 0.57, "learning_rate": 0.000239403807439251, "loss": 3.234, "step": 32850 }, { "epoch": 0.57, "learning_rate": 0.00023932324030220947, "loss": 3.3456, "step": 32855 }, { "epoch": 0.57, "learning_rate": 0.0002392426777277494, "loss": 3.3587, "step": 32860 }, { "epoch": 0.57, "learning_rate": 0.00023916211972192843, "loss": 3.1395, "step": 32865 }, { "epoch": 0.57, "learning_rate": 0.00023908156629080427, "loss": 3.3324, "step": 32870 }, { "epoch": 0.57, "learning_rate": 0.0002390010174404342, "loss": 3.2773, "step": 32875 }, { "epoch": 0.57, "learning_rate": 0.00023892047317687493, "loss": 3.3637, "step": 32880 }, { "epoch": 0.57, "learning_rate": 0.00023883993350618316, "loss": 3.216, "step": 32885 }, { "epoch": 0.57, "learning_rate": 0.00023875939843441484, "loss": 3.2905, "step": 32890 }, { "epoch": 0.57, "learning_rate": 0.00023867886796762598, "loss": 3.2365, "step": 32895 }, { "epoch": 0.57, "learning_rate": 0.0002385983421118719, "loss": 3.2825, "step": 32900 }, { "epoch": 0.57, "learning_rate": 0.0002385178208732078, "loss": 3.2922, "step": 32905 }, { "epoch": 0.57, "learning_rate": 0.00023843730425768863, "loss": 3.1794, "step": 32910 }, { "epoch": 0.57, "learning_rate": 0.00023835679227136858, "loss": 3.2932, "step": 32915 }, { "epoch": 0.57, "learning_rate": 0.00023827628492030184, "loss": 3.3405, "step": 32920 }, { "epoch": 0.57, "learning_rate": 0.0002381957822105421, "loss": 3.3375, "step": 32925 }, { "epoch": 0.57, "learning_rate": 0.00023811528414814285, "loss": 3.1655, "step": 32930 }, { "epoch": 0.57, "learning_rate": 0.0002380347907391571, "loss": 3.2732, "step": 32935 }, { "epoch": 0.57, "learning_rate": 0.0002379543019896375, "loss": 3.2293, "step": 32940 }, { "epoch": 0.57, "learning_rate": 0.00023787381790563636, "loss": 3.2525, "step": 32945 }, { "epoch": 0.57, "learning_rate": 0.00023779333849320572, "loss": 3.2663, "step": 32950 }, { "epoch": 0.57, "learning_rate": 0.0002377128637583973, "loss": 3.328, "step": 32955 }, { "epoch": 0.57, "learning_rate": 0.00023763239370726237, "loss": 3.2032, "step": 32960 }, { "epoch": 0.57, "learning_rate": 0.00023755192834585165, "loss": 3.2314, "step": 32965 }, { "epoch": 0.57, "learning_rate": 0.0002374714676802159, "loss": 3.3097, "step": 32970 }, { "epoch": 0.57, "learning_rate": 0.00023739101171640537, "loss": 3.2052, "step": 32975 }, { "epoch": 0.57, "learning_rate": 0.00023731056046046985, "loss": 3.3136, "step": 32980 }, { "epoch": 0.57, "learning_rate": 0.00023723011391845903, "loss": 3.3039, "step": 32985 }, { "epoch": 0.57, "learning_rate": 0.0002371496720964218, "loss": 3.2194, "step": 32990 }, { "epoch": 0.57, "learning_rate": 0.0002370692350004072, "loss": 3.2503, "step": 32995 }, { "epoch": 0.57, "learning_rate": 0.00023698880263646357, "loss": 3.2548, "step": 33000 }, { "epoch": 0.57, "eval_loss": 3.29677677154541, "eval_runtime": 149.7729, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.775, "step": 33000 }, { "epoch": 0.57, "learning_rate": 0.00023690837501063905, "loss": 3.2068, "step": 33005 }, { "epoch": 0.57, "learning_rate": 0.0002368279521289815, "loss": 3.3154, "step": 33010 }, { "epoch": 0.57, "learning_rate": 0.0002367475339975381, "loss": 3.3981, "step": 33015 }, { "epoch": 0.57, "learning_rate": 0.00023666712062235602, "loss": 3.1803, "step": 33020 }, { "epoch": 0.57, "learning_rate": 0.0002365867120094818, "loss": 3.2611, "step": 33025 }, { "epoch": 0.57, "learning_rate": 0.0002365063081649619, "loss": 3.331, "step": 33030 }, { "epoch": 0.57, "learning_rate": 0.00023642590909484236, "loss": 3.2699, "step": 33035 }, { "epoch": 0.57, "learning_rate": 0.00023634551480516852, "loss": 3.2775, "step": 33040 }, { "epoch": 0.57, "learning_rate": 0.00023626512530198574, "loss": 3.2124, "step": 33045 }, { "epoch": 0.57, "learning_rate": 0.00023618474059133887, "loss": 3.267, "step": 33050 }, { "epoch": 0.57, "learning_rate": 0.00023610436067927253, "loss": 3.3642, "step": 33055 }, { "epoch": 0.57, "learning_rate": 0.00023602398557183074, "loss": 3.288, "step": 33060 }, { "epoch": 0.57, "learning_rate": 0.0002359436152750575, "loss": 3.2004, "step": 33065 }, { "epoch": 0.57, "learning_rate": 0.00023586324979499597, "loss": 3.1586, "step": 33070 }, { "epoch": 0.57, "learning_rate": 0.00023578288913768942, "loss": 3.2883, "step": 33075 }, { "epoch": 0.57, "learning_rate": 0.00023570253330918044, "loss": 3.2835, "step": 33080 }, { "epoch": 0.57, "learning_rate": 0.00023562218231551143, "loss": 3.328, "step": 33085 }, { "epoch": 0.57, "learning_rate": 0.0002355418361627245, "loss": 3.1934, "step": 33090 }, { "epoch": 0.57, "learning_rate": 0.00023546149485686104, "loss": 3.3223, "step": 33095 }, { "epoch": 0.57, "learning_rate": 0.0002353811584039625, "loss": 3.26, "step": 33100 }, { "epoch": 0.57, "learning_rate": 0.00023530082681006957, "loss": 3.3357, "step": 33105 }, { "epoch": 0.57, "learning_rate": 0.00023522050008122292, "loss": 3.3325, "step": 33110 }, { "epoch": 0.57, "learning_rate": 0.0002351401782234628, "loss": 3.2175, "step": 33115 }, { "epoch": 0.57, "learning_rate": 0.0002350598612428288, "loss": 3.2222, "step": 33120 }, { "epoch": 0.57, "learning_rate": 0.00023497954914536033, "loss": 3.3419, "step": 33125 }, { "epoch": 0.57, "learning_rate": 0.00023489924193709658, "loss": 3.2388, "step": 33130 }, { "epoch": 0.57, "learning_rate": 0.00023481893962407622, "loss": 3.2658, "step": 33135 }, { "epoch": 0.57, "learning_rate": 0.00023473864221233766, "loss": 3.3275, "step": 33140 }, { "epoch": 0.57, "learning_rate": 0.00023465834970791863, "loss": 3.3339, "step": 33145 }, { "epoch": 0.57, "learning_rate": 0.0002345780621168568, "loss": 3.2137, "step": 33150 }, { "epoch": 0.57, "learning_rate": 0.00023449777944518948, "loss": 3.1605, "step": 33155 }, { "epoch": 0.57, "learning_rate": 0.0002344175016989534, "loss": 3.2205, "step": 33160 }, { "epoch": 0.57, "learning_rate": 0.0002343372288841852, "loss": 3.3544, "step": 33165 }, { "epoch": 0.57, "learning_rate": 0.00023425696100692078, "loss": 3.3108, "step": 33170 }, { "epoch": 0.57, "learning_rate": 0.00023417669807319598, "loss": 3.2136, "step": 33175 }, { "epoch": 0.57, "learning_rate": 0.00023409644008904611, "loss": 3.2587, "step": 33180 }, { "epoch": 0.57, "learning_rate": 0.00023401618706050617, "loss": 3.3205, "step": 33185 }, { "epoch": 0.57, "learning_rate": 0.00023393593899361095, "loss": 3.2509, "step": 33190 }, { "epoch": 0.57, "learning_rate": 0.00023385569589439445, "loss": 3.2005, "step": 33195 }, { "epoch": 0.57, "learning_rate": 0.00023377545776889064, "loss": 3.2256, "step": 33200 }, { "epoch": 0.57, "learning_rate": 0.000233695224623133, "loss": 3.231, "step": 33205 }, { "epoch": 0.57, "learning_rate": 0.0002336149964631546, "loss": 3.2971, "step": 33210 }, { "epoch": 0.57, "learning_rate": 0.00023353477329498843, "loss": 3.2918, "step": 33215 }, { "epoch": 0.57, "learning_rate": 0.00023345455512466658, "loss": 3.2987, "step": 33220 }, { "epoch": 0.57, "learning_rate": 0.00023337434195822108, "loss": 3.3542, "step": 33225 }, { "epoch": 0.58, "learning_rate": 0.00023329413380168365, "loss": 3.3258, "step": 33230 }, { "epoch": 0.58, "learning_rate": 0.00023321393066108557, "loss": 3.2357, "step": 33235 }, { "epoch": 0.58, "learning_rate": 0.00023313373254245763, "loss": 3.2245, "step": 33240 }, { "epoch": 0.58, "learning_rate": 0.00023305353945183023, "loss": 3.3036, "step": 33245 }, { "epoch": 0.58, "learning_rate": 0.00023297335139523356, "loss": 3.3151, "step": 33250 }, { "epoch": 0.58, "learning_rate": 0.00023289316837869743, "loss": 3.2807, "step": 33255 }, { "epoch": 0.58, "learning_rate": 0.00023281299040825104, "loss": 3.2591, "step": 33260 }, { "epoch": 0.58, "learning_rate": 0.0002327328174899236, "loss": 3.3142, "step": 33265 }, { "epoch": 0.58, "learning_rate": 0.00023265264962974339, "loss": 3.2627, "step": 33270 }, { "epoch": 0.58, "learning_rate": 0.00023257248683373878, "loss": 3.3279, "step": 33275 }, { "epoch": 0.58, "learning_rate": 0.00023249232910793768, "loss": 3.234, "step": 33280 }, { "epoch": 0.58, "learning_rate": 0.0002324121764583674, "loss": 3.2011, "step": 33285 }, { "epoch": 0.58, "learning_rate": 0.00023233202889105507, "loss": 3.2268, "step": 33290 }, { "epoch": 0.58, "learning_rate": 0.00023225188641202746, "loss": 3.1476, "step": 33295 }, { "epoch": 0.58, "learning_rate": 0.00023217174902731078, "loss": 3.2587, "step": 33300 }, { "epoch": 0.58, "learning_rate": 0.00023209161674293087, "loss": 3.1833, "step": 33305 }, { "epoch": 0.58, "learning_rate": 0.00023201148956491336, "loss": 3.1849, "step": 33310 }, { "epoch": 0.58, "learning_rate": 0.00023193136749928342, "loss": 3.2485, "step": 33315 }, { "epoch": 0.58, "learning_rate": 0.00023185125055206592, "loss": 3.2812, "step": 33320 }, { "epoch": 0.58, "learning_rate": 0.00023177113872928498, "loss": 3.1795, "step": 33325 }, { "epoch": 0.58, "learning_rate": 0.00023169103203696473, "loss": 3.3434, "step": 33330 }, { "epoch": 0.58, "learning_rate": 0.00023161093048112887, "loss": 3.3059, "step": 33335 }, { "epoch": 0.58, "learning_rate": 0.00023153083406780045, "loss": 3.3019, "step": 33340 }, { "epoch": 0.58, "learning_rate": 0.00023145074280300258, "loss": 3.2374, "step": 33345 }, { "epoch": 0.58, "learning_rate": 0.00023137065669275739, "loss": 3.2438, "step": 33350 }, { "epoch": 0.58, "learning_rate": 0.0002312905757430871, "loss": 3.1102, "step": 33355 }, { "epoch": 0.58, "learning_rate": 0.00023121049996001344, "loss": 3.3015, "step": 33360 }, { "epoch": 0.58, "learning_rate": 0.0002311304293495576, "loss": 3.284, "step": 33365 }, { "epoch": 0.58, "learning_rate": 0.00023105036391774065, "loss": 3.3114, "step": 33370 }, { "epoch": 0.58, "learning_rate": 0.00023097030367058284, "loss": 3.2978, "step": 33375 }, { "epoch": 0.58, "learning_rate": 0.00023089024861410446, "loss": 3.2116, "step": 33380 }, { "epoch": 0.58, "learning_rate": 0.00023081019875432517, "loss": 3.3001, "step": 33385 }, { "epoch": 0.58, "learning_rate": 0.00023073015409726434, "loss": 3.317, "step": 33390 }, { "epoch": 0.58, "learning_rate": 0.00023065011464894103, "loss": 3.2739, "step": 33395 }, { "epoch": 0.58, "learning_rate": 0.00023057008041537362, "loss": 3.2719, "step": 33400 }, { "epoch": 0.58, "learning_rate": 0.0002304900514025803, "loss": 3.2217, "step": 33405 }, { "epoch": 0.58, "learning_rate": 0.00023041002761657888, "loss": 3.32, "step": 33410 }, { "epoch": 0.58, "learning_rate": 0.00023033000906338676, "loss": 3.2321, "step": 33415 }, { "epoch": 0.58, "learning_rate": 0.000230249995749021, "loss": 3.2446, "step": 33420 }, { "epoch": 0.58, "learning_rate": 0.000230169987679498, "loss": 3.2752, "step": 33425 }, { "epoch": 0.58, "learning_rate": 0.000230089984860834, "loss": 3.1693, "step": 33430 }, { "epoch": 0.58, "learning_rate": 0.0002300099872990449, "loss": 3.3488, "step": 33435 }, { "epoch": 0.58, "learning_rate": 0.0002299299950001461, "loss": 3.2554, "step": 33440 }, { "epoch": 0.58, "learning_rate": 0.00022985000797015262, "loss": 3.2451, "step": 33445 }, { "epoch": 0.58, "learning_rate": 0.00022977002621507893, "loss": 3.0189, "step": 33450 }, { "epoch": 0.58, "learning_rate": 0.00022969004974093932, "loss": 3.3202, "step": 33455 }, { "epoch": 0.58, "learning_rate": 0.0002296100785537477, "loss": 3.2214, "step": 33460 }, { "epoch": 0.58, "learning_rate": 0.00022953011265951738, "loss": 3.13, "step": 33465 }, { "epoch": 0.58, "learning_rate": 0.0002294501520642616, "loss": 3.2021, "step": 33470 }, { "epoch": 0.58, "learning_rate": 0.00022937019677399267, "loss": 3.2777, "step": 33475 }, { "epoch": 0.58, "learning_rate": 0.00022929024679472303, "loss": 3.279, "step": 33480 }, { "epoch": 0.58, "learning_rate": 0.00022921030213246437, "loss": 3.3105, "step": 33485 }, { "epoch": 0.58, "learning_rate": 0.0002291303627932282, "loss": 3.2978, "step": 33490 }, { "epoch": 0.58, "learning_rate": 0.0002290504287830257, "loss": 3.2142, "step": 33495 }, { "epoch": 0.58, "learning_rate": 0.00022897050010786725, "loss": 3.2172, "step": 33500 }, { "epoch": 0.58, "eval_loss": 3.2933146953582764, "eval_runtime": 149.7693, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.775, "step": 33500 }, { "epoch": 0.58, "learning_rate": 0.00022889057677376312, "loss": 3.3861, "step": 33505 }, { "epoch": 0.58, "learning_rate": 0.0002288106587867232, "loss": 3.1333, "step": 33510 }, { "epoch": 0.58, "learning_rate": 0.00022873074615275695, "loss": 3.2619, "step": 33515 }, { "epoch": 0.58, "learning_rate": 0.00022865083887787328, "loss": 3.2127, "step": 33520 }, { "epoch": 0.58, "learning_rate": 0.00022857093696808102, "loss": 3.3108, "step": 33525 }, { "epoch": 0.58, "learning_rate": 0.00022849104042938805, "loss": 3.3339, "step": 33530 }, { "epoch": 0.58, "learning_rate": 0.0002284111492678024, "loss": 3.2757, "step": 33535 }, { "epoch": 0.58, "learning_rate": 0.0002283312634893315, "loss": 3.2759, "step": 33540 }, { "epoch": 0.58, "learning_rate": 0.0002282513830999822, "loss": 3.2947, "step": 33545 }, { "epoch": 0.58, "learning_rate": 0.00022817150810576134, "loss": 3.2764, "step": 33550 }, { "epoch": 0.58, "learning_rate": 0.00022809163851267482, "loss": 3.2314, "step": 33555 }, { "epoch": 0.58, "learning_rate": 0.00022801177432672863, "loss": 3.2722, "step": 33560 }, { "epoch": 0.58, "learning_rate": 0.000227931915553928, "loss": 3.1763, "step": 33565 }, { "epoch": 0.58, "learning_rate": 0.00022785206220027798, "loss": 3.3469, "step": 33570 }, { "epoch": 0.58, "learning_rate": 0.00022777221427178326, "loss": 3.1805, "step": 33575 }, { "epoch": 0.58, "learning_rate": 0.0002276923717744478, "loss": 3.1481, "step": 33580 }, { "epoch": 0.58, "learning_rate": 0.00022761253471427536, "loss": 3.2782, "step": 33585 }, { "epoch": 0.58, "learning_rate": 0.00022753270309726933, "loss": 3.3483, "step": 33590 }, { "epoch": 0.58, "learning_rate": 0.00022745287692943266, "loss": 3.2516, "step": 33595 }, { "epoch": 0.58, "learning_rate": 0.00022737305621676787, "loss": 3.2987, "step": 33600 }, { "epoch": 0.58, "learning_rate": 0.00022729324096527708, "loss": 3.2064, "step": 33605 }, { "epoch": 0.58, "learning_rate": 0.00022721343118096185, "loss": 3.3007, "step": 33610 }, { "epoch": 0.58, "learning_rate": 0.00022713362686982357, "loss": 3.2484, "step": 33615 }, { "epoch": 0.58, "learning_rate": 0.0002270538280378632, "loss": 3.2258, "step": 33620 }, { "epoch": 0.58, "learning_rate": 0.00022697403469108116, "loss": 3.306, "step": 33625 }, { "epoch": 0.58, "learning_rate": 0.00022689424683547735, "loss": 3.1254, "step": 33630 }, { "epoch": 0.58, "learning_rate": 0.00022681446447705153, "loss": 3.1907, "step": 33635 }, { "epoch": 0.58, "learning_rate": 0.00022673468762180295, "loss": 3.2499, "step": 33640 }, { "epoch": 0.58, "learning_rate": 0.00022665491627573033, "loss": 3.2333, "step": 33645 }, { "epoch": 0.58, "learning_rate": 0.00022657515044483223, "loss": 3.1616, "step": 33650 }, { "epoch": 0.58, "learning_rate": 0.0002264953901351064, "loss": 3.1961, "step": 33655 }, { "epoch": 0.58, "learning_rate": 0.0002264156353525506, "loss": 3.3953, "step": 33660 }, { "epoch": 0.58, "learning_rate": 0.00022633588610316185, "loss": 3.2886, "step": 33665 }, { "epoch": 0.58, "learning_rate": 0.00022625614239293692, "loss": 3.3128, "step": 33670 }, { "epoch": 0.58, "learning_rate": 0.00022617640422787226, "loss": 3.318, "step": 33675 }, { "epoch": 0.58, "learning_rate": 0.00022609667161396357, "loss": 3.1784, "step": 33680 }, { "epoch": 0.58, "learning_rate": 0.00022601694455720648, "loss": 3.3266, "step": 33685 }, { "epoch": 0.58, "learning_rate": 0.00022593722306359594, "loss": 3.2716, "step": 33690 }, { "epoch": 0.58, "learning_rate": 0.0002258575071391267, "loss": 3.2966, "step": 33695 }, { "epoch": 0.58, "learning_rate": 0.00022577779678979303, "loss": 3.2999, "step": 33700 }, { "epoch": 0.58, "learning_rate": 0.0002256980920215886, "loss": 3.1279, "step": 33705 }, { "epoch": 0.58, "learning_rate": 0.00022561839284050678, "loss": 3.3071, "step": 33710 }, { "epoch": 0.58, "learning_rate": 0.00022553869925254064, "loss": 3.2625, "step": 33715 }, { "epoch": 0.58, "learning_rate": 0.0002254590112636828, "loss": 3.2814, "step": 33720 }, { "epoch": 0.58, "learning_rate": 0.0002253793288799253, "loss": 3.25, "step": 33725 }, { "epoch": 0.58, "learning_rate": 0.00022529965210725972, "loss": 3.2336, "step": 33730 }, { "epoch": 0.58, "learning_rate": 0.0002252199809516775, "loss": 3.3184, "step": 33735 }, { "epoch": 0.58, "learning_rate": 0.00022514031541916955, "loss": 3.3121, "step": 33740 }, { "epoch": 0.58, "learning_rate": 0.00022506065551572614, "loss": 3.319, "step": 33745 }, { "epoch": 0.58, "learning_rate": 0.00022498100124733738, "loss": 3.2058, "step": 33750 }, { "epoch": 0.58, "learning_rate": 0.000224901352619993, "loss": 3.2683, "step": 33755 }, { "epoch": 0.58, "learning_rate": 0.00022482170963968195, "loss": 3.3285, "step": 33760 }, { "epoch": 0.58, "learning_rate": 0.000224742072312393, "loss": 3.3569, "step": 33765 }, { "epoch": 0.58, "learning_rate": 0.0002246624406441145, "loss": 3.2333, "step": 33770 }, { "epoch": 0.58, "learning_rate": 0.00022458281464083442, "loss": 3.1978, "step": 33775 }, { "epoch": 0.58, "learning_rate": 0.0002245031943085402, "loss": 3.1741, "step": 33780 }, { "epoch": 0.58, "learning_rate": 0.00022442357965321889, "loss": 3.0947, "step": 33785 }, { "epoch": 0.58, "learning_rate": 0.00022434397068085693, "loss": 3.2276, "step": 33790 }, { "epoch": 0.58, "learning_rate": 0.00022426436739744068, "loss": 3.1653, "step": 33795 }, { "epoch": 0.58, "learning_rate": 0.0002241847698089559, "loss": 3.3112, "step": 33800 }, { "epoch": 0.59, "learning_rate": 0.00022410517792138801, "loss": 3.1834, "step": 33805 }, { "epoch": 0.59, "learning_rate": 0.00022402559174072162, "loss": 3.2776, "step": 33810 }, { "epoch": 0.59, "learning_rate": 0.0002239460112729413, "loss": 3.1643, "step": 33815 }, { "epoch": 0.59, "learning_rate": 0.00022386643652403133, "loss": 3.3707, "step": 33820 }, { "epoch": 0.59, "learning_rate": 0.00022378686749997505, "loss": 3.2157, "step": 33825 }, { "epoch": 0.59, "learning_rate": 0.0002237073042067559, "loss": 3.2477, "step": 33830 }, { "epoch": 0.59, "learning_rate": 0.00022362774665035632, "loss": 3.2571, "step": 33835 }, { "epoch": 0.59, "learning_rate": 0.00022354819483675885, "loss": 3.169, "step": 33840 }, { "epoch": 0.59, "learning_rate": 0.00022346864877194526, "loss": 3.2855, "step": 33845 }, { "epoch": 0.59, "learning_rate": 0.00022338910846189707, "loss": 3.2139, "step": 33850 }, { "epoch": 0.59, "learning_rate": 0.00022330957391259544, "loss": 3.331, "step": 33855 }, { "epoch": 0.59, "learning_rate": 0.0002232300451300207, "loss": 3.3466, "step": 33860 }, { "epoch": 0.59, "learning_rate": 0.00022315052212015316, "loss": 3.2267, "step": 33865 }, { "epoch": 0.59, "learning_rate": 0.00022307100488897243, "loss": 3.1652, "step": 33870 }, { "epoch": 0.59, "learning_rate": 0.0002229914934424579, "loss": 3.2467, "step": 33875 }, { "epoch": 0.59, "learning_rate": 0.00022291198778658853, "loss": 3.3519, "step": 33880 }, { "epoch": 0.59, "learning_rate": 0.00022283248792734256, "loss": 3.1367, "step": 33885 }, { "epoch": 0.59, "learning_rate": 0.00022275299387069798, "loss": 3.2817, "step": 33890 }, { "epoch": 0.59, "learning_rate": 0.00022267350562263233, "loss": 3.1939, "step": 33895 }, { "epoch": 0.59, "learning_rate": 0.00022259402318912283, "loss": 3.1678, "step": 33900 }, { "epoch": 0.59, "learning_rate": 0.00022251454657614615, "loss": 3.2199, "step": 33905 }, { "epoch": 0.59, "learning_rate": 0.00022243507578967837, "loss": 3.289, "step": 33910 }, { "epoch": 0.59, "learning_rate": 0.00022235561083569532, "loss": 3.3205, "step": 33915 }, { "epoch": 0.59, "learning_rate": 0.00022227615172017254, "loss": 3.2538, "step": 33920 }, { "epoch": 0.59, "learning_rate": 0.0002221966984490847, "loss": 3.1713, "step": 33925 }, { "epoch": 0.59, "learning_rate": 0.00022211725102840659, "loss": 3.1795, "step": 33930 }, { "epoch": 0.59, "learning_rate": 0.00022203780946411188, "loss": 3.2756, "step": 33935 }, { "epoch": 0.59, "learning_rate": 0.0002219583737621744, "loss": 3.2947, "step": 33940 }, { "epoch": 0.59, "learning_rate": 0.00022187894392856726, "loss": 3.2601, "step": 33945 }, { "epoch": 0.59, "learning_rate": 0.0002217995199692631, "loss": 3.2433, "step": 33950 }, { "epoch": 0.59, "learning_rate": 0.00022172010189023446, "loss": 3.1556, "step": 33955 }, { "epoch": 0.59, "learning_rate": 0.00022164068969745282, "loss": 3.3248, "step": 33960 }, { "epoch": 0.59, "learning_rate": 0.00022156128339688978, "loss": 3.2672, "step": 33965 }, { "epoch": 0.59, "learning_rate": 0.00022148188299451623, "loss": 3.1232, "step": 33970 }, { "epoch": 0.59, "learning_rate": 0.0002214024884963026, "loss": 3.3216, "step": 33975 }, { "epoch": 0.59, "learning_rate": 0.00022132309990821914, "loss": 3.3364, "step": 33980 }, { "epoch": 0.59, "learning_rate": 0.00022124371723623542, "loss": 3.3008, "step": 33985 }, { "epoch": 0.59, "learning_rate": 0.0002211643404863204, "loss": 3.1925, "step": 33990 }, { "epoch": 0.59, "learning_rate": 0.00022108496966444295, "loss": 3.1433, "step": 33995 }, { "epoch": 0.59, "learning_rate": 0.0002210056047765714, "loss": 3.1537, "step": 34000 }, { "epoch": 0.59, "eval_loss": 3.282989501953125, "eval_runtime": 151.8721, "eval_samples_per_second": 12.122, "eval_steps_per_second": 0.764, "step": 34000 }, { "epoch": 0.59, "learning_rate": 0.00022092624582867348, "loss": 3.2759, "step": 34005 }, { "epoch": 0.59, "learning_rate": 0.00022084689282671677, "loss": 3.3142, "step": 34010 }, { "epoch": 0.59, "learning_rate": 0.00022076754577666794, "loss": 3.3801, "step": 34015 }, { "epoch": 0.59, "learning_rate": 0.00022068820468449366, "loss": 3.283, "step": 34020 }, { "epoch": 0.59, "learning_rate": 0.00022060886955615986, "loss": 3.2798, "step": 34025 }, { "epoch": 0.59, "learning_rate": 0.00022052954039763218, "loss": 3.1747, "step": 34030 }, { "epoch": 0.59, "learning_rate": 0.00022045021721487593, "loss": 3.2212, "step": 34035 }, { "epoch": 0.59, "learning_rate": 0.00022037090001385552, "loss": 3.3031, "step": 34040 }, { "epoch": 0.59, "learning_rate": 0.0002202915888005354, "loss": 3.219, "step": 34045 }, { "epoch": 0.59, "learning_rate": 0.00022021228358087922, "loss": 3.2706, "step": 34050 }, { "epoch": 0.59, "learning_rate": 0.00022013298436085045, "loss": 3.2221, "step": 34055 }, { "epoch": 0.59, "learning_rate": 0.00022005369114641205, "loss": 3.2401, "step": 34060 }, { "epoch": 0.59, "learning_rate": 0.0002199744039435263, "loss": 3.2445, "step": 34065 }, { "epoch": 0.59, "learning_rate": 0.0002198951227581552, "loss": 3.2795, "step": 34070 }, { "epoch": 0.59, "learning_rate": 0.0002198158475962603, "loss": 3.3098, "step": 34075 }, { "epoch": 0.59, "learning_rate": 0.00021973657846380282, "loss": 3.2402, "step": 34080 }, { "epoch": 0.59, "learning_rate": 0.00021965731536674333, "loss": 3.2837, "step": 34085 }, { "epoch": 0.59, "learning_rate": 0.0002195780583110419, "loss": 3.2242, "step": 34090 }, { "epoch": 0.59, "learning_rate": 0.0002194988073026583, "loss": 3.3176, "step": 34095 }, { "epoch": 0.59, "learning_rate": 0.00021941956234755185, "loss": 3.2141, "step": 34100 }, { "epoch": 0.59, "learning_rate": 0.00021934032345168132, "loss": 3.2779, "step": 34105 }, { "epoch": 0.59, "learning_rate": 0.00021926109062100525, "loss": 3.189, "step": 34110 }, { "epoch": 0.59, "learning_rate": 0.00021918186386148122, "loss": 3.2255, "step": 34115 }, { "epoch": 0.59, "learning_rate": 0.00021910264317906685, "loss": 3.2848, "step": 34120 }, { "epoch": 0.59, "learning_rate": 0.00021902342857971918, "loss": 3.125, "step": 34125 }, { "epoch": 0.59, "learning_rate": 0.0002189442200693946, "loss": 3.2491, "step": 34130 }, { "epoch": 0.59, "learning_rate": 0.0002188650176540494, "loss": 3.2903, "step": 34135 }, { "epoch": 0.59, "learning_rate": 0.00021878582133963895, "loss": 3.1633, "step": 34140 }, { "epoch": 0.59, "learning_rate": 0.0002187066311321186, "loss": 3.183, "step": 34145 }, { "epoch": 0.59, "learning_rate": 0.00021862744703744286, "loss": 3.227, "step": 34150 }, { "epoch": 0.59, "learning_rate": 0.0002185482690615661, "loss": 3.1407, "step": 34155 }, { "epoch": 0.59, "learning_rate": 0.0002184690972104422, "loss": 3.259, "step": 34160 }, { "epoch": 0.59, "learning_rate": 0.0002183899314900243, "loss": 3.2113, "step": 34165 }, { "epoch": 0.59, "learning_rate": 0.0002183107719062652, "loss": 3.2791, "step": 34170 }, { "epoch": 0.59, "learning_rate": 0.00021823161846511746, "loss": 3.2624, "step": 34175 }, { "epoch": 0.59, "learning_rate": 0.00021815247117253298, "loss": 3.2633, "step": 34180 }, { "epoch": 0.59, "learning_rate": 0.0002180733300344632, "loss": 3.1451, "step": 34185 }, { "epoch": 0.59, "learning_rate": 0.00021799419505685915, "loss": 3.2792, "step": 34190 }, { "epoch": 0.59, "learning_rate": 0.00021791506624567128, "loss": 3.2983, "step": 34195 }, { "epoch": 0.59, "learning_rate": 0.0002178359436068498, "loss": 3.0929, "step": 34200 }, { "epoch": 0.59, "learning_rate": 0.00021775682714634428, "loss": 3.2645, "step": 34205 }, { "epoch": 0.59, "learning_rate": 0.00021767771687010386, "loss": 3.2737, "step": 34210 }, { "epoch": 0.59, "learning_rate": 0.0002175986127840773, "loss": 3.2691, "step": 34215 }, { "epoch": 0.59, "learning_rate": 0.00021751951489421266, "loss": 3.2498, "step": 34220 }, { "epoch": 0.59, "learning_rate": 0.00021744042320645787, "loss": 3.3095, "step": 34225 }, { "epoch": 0.59, "learning_rate": 0.00021736133772676006, "loss": 3.3028, "step": 34230 }, { "epoch": 0.59, "learning_rate": 0.00021728225846106612, "loss": 3.2369, "step": 34235 }, { "epoch": 0.59, "learning_rate": 0.00021720318541532262, "loss": 3.245, "step": 34240 }, { "epoch": 0.59, "learning_rate": 0.00021712411859547512, "loss": 3.2824, "step": 34245 }, { "epoch": 0.59, "learning_rate": 0.00021704505800746913, "loss": 3.3172, "step": 34250 }, { "epoch": 0.59, "learning_rate": 0.00021696600365724966, "loss": 3.2327, "step": 34255 }, { "epoch": 0.59, "learning_rate": 0.0002168869555507612, "loss": 3.198, "step": 34260 }, { "epoch": 0.59, "learning_rate": 0.00021680791369394783, "loss": 3.2636, "step": 34265 }, { "epoch": 0.59, "learning_rate": 0.00021672887809275285, "loss": 3.317, "step": 34270 }, { "epoch": 0.59, "learning_rate": 0.00021664984875311948, "loss": 3.2125, "step": 34275 }, { "epoch": 0.59, "learning_rate": 0.0002165708256809904, "loss": 3.2339, "step": 34280 }, { "epoch": 0.59, "learning_rate": 0.0002164918088823076, "loss": 3.2303, "step": 34285 }, { "epoch": 0.59, "learning_rate": 0.0002164127983630129, "loss": 3.2303, "step": 34290 }, { "epoch": 0.59, "learning_rate": 0.00021633379412904727, "loss": 3.3098, "step": 34295 }, { "epoch": 0.59, "learning_rate": 0.00021625479618635153, "loss": 3.296, "step": 34300 }, { "epoch": 0.59, "learning_rate": 0.00021617580454086599, "loss": 3.1825, "step": 34305 }, { "epoch": 0.59, "learning_rate": 0.0002160968191985303, "loss": 3.2411, "step": 34310 }, { "epoch": 0.59, "learning_rate": 0.00021601784016528396, "loss": 3.2966, "step": 34315 }, { "epoch": 0.59, "learning_rate": 0.00021593886744706548, "loss": 3.2849, "step": 34320 }, { "epoch": 0.59, "learning_rate": 0.00021585990104981345, "loss": 3.2534, "step": 34325 }, { "epoch": 0.59, "learning_rate": 0.00021578094097946557, "loss": 3.2557, "step": 34330 }, { "epoch": 0.59, "learning_rate": 0.00021570198724195933, "loss": 3.2512, "step": 34335 }, { "epoch": 0.59, "learning_rate": 0.00021562303984323176, "loss": 3.2774, "step": 34340 }, { "epoch": 0.59, "learning_rate": 0.00021554409878921904, "loss": 3.2232, "step": 34345 }, { "epoch": 0.59, "learning_rate": 0.00021546516408585726, "loss": 3.28, "step": 34350 }, { "epoch": 0.59, "learning_rate": 0.0002153862357390819, "loss": 3.2287, "step": 34355 }, { "epoch": 0.59, "learning_rate": 0.000215307313754828, "loss": 3.2912, "step": 34360 }, { "epoch": 0.59, "learning_rate": 0.00021522839813903013, "loss": 3.2309, "step": 34365 }, { "epoch": 0.59, "learning_rate": 0.00021514948889762224, "loss": 3.2931, "step": 34370 }, { "epoch": 0.59, "learning_rate": 0.00021507058603653783, "loss": 3.2682, "step": 34375 }, { "epoch": 0.59, "learning_rate": 0.00021499168956171016, "loss": 3.2441, "step": 34380 }, { "epoch": 0.6, "learning_rate": 0.00021491279947907178, "loss": 3.3051, "step": 34385 }, { "epoch": 0.6, "learning_rate": 0.0002148339157945549, "loss": 3.2857, "step": 34390 }, { "epoch": 0.6, "learning_rate": 0.000214755038514091, "loss": 3.274, "step": 34395 }, { "epoch": 0.6, "learning_rate": 0.00021467616764361128, "loss": 3.2249, "step": 34400 }, { "epoch": 0.6, "learning_rate": 0.0002145973031890465, "loss": 3.2408, "step": 34405 }, { "epoch": 0.6, "learning_rate": 0.00021451844515632686, "loss": 3.2944, "step": 34410 }, { "epoch": 0.6, "learning_rate": 0.00021443959355138214, "loss": 3.2375, "step": 34415 }, { "epoch": 0.6, "learning_rate": 0.0002143607483801414, "loss": 3.3012, "step": 34420 }, { "epoch": 0.6, "learning_rate": 0.0002142819096485335, "loss": 3.3001, "step": 34425 }, { "epoch": 0.6, "learning_rate": 0.00021420307736248667, "loss": 3.2736, "step": 34430 }, { "epoch": 0.6, "learning_rate": 0.0002141242515279287, "loss": 3.3536, "step": 34435 }, { "epoch": 0.6, "learning_rate": 0.00021404543215078697, "loss": 3.2831, "step": 34440 }, { "epoch": 0.6, "learning_rate": 0.00021396661923698827, "loss": 3.2456, "step": 34445 }, { "epoch": 0.6, "learning_rate": 0.00021388781279245886, "loss": 3.2275, "step": 34450 }, { "epoch": 0.6, "learning_rate": 0.00021380901282312456, "loss": 3.2121, "step": 34455 }, { "epoch": 0.6, "learning_rate": 0.00021373021933491075, "loss": 3.3182, "step": 34460 }, { "epoch": 0.6, "learning_rate": 0.00021365143233374238, "loss": 3.2926, "step": 34465 }, { "epoch": 0.6, "learning_rate": 0.00021357265182554385, "loss": 3.2554, "step": 34470 }, { "epoch": 0.6, "learning_rate": 0.00021349387781623887, "loss": 3.2501, "step": 34475 }, { "epoch": 0.6, "learning_rate": 0.0002134151103117509, "loss": 3.2919, "step": 34480 }, { "epoch": 0.6, "learning_rate": 0.00021333634931800297, "loss": 3.2388, "step": 34485 }, { "epoch": 0.6, "learning_rate": 0.0002132575948409174, "loss": 3.3324, "step": 34490 }, { "epoch": 0.6, "learning_rate": 0.0002131788468864162, "loss": 3.1517, "step": 34495 }, { "epoch": 0.6, "learning_rate": 0.0002131001054604207, "loss": 3.1907, "step": 34500 }, { "epoch": 0.6, "eval_loss": 3.2766971588134766, "eval_runtime": 150.1846, "eval_samples_per_second": 12.258, "eval_steps_per_second": 0.772, "step": 34500 }, { "epoch": 0.6, "learning_rate": 0.00021302137056885198, "loss": 3.1657, "step": 34505 }, { "epoch": 0.6, "learning_rate": 0.00021294264221763032, "loss": 3.2792, "step": 34510 }, { "epoch": 0.6, "learning_rate": 0.0002128639204126758, "loss": 3.3099, "step": 34515 }, { "epoch": 0.6, "learning_rate": 0.00021278520515990808, "loss": 3.345, "step": 34520 }, { "epoch": 0.6, "learning_rate": 0.0002127064964652459, "loss": 3.2582, "step": 34525 }, { "epoch": 0.6, "learning_rate": 0.0002126277943346077, "loss": 3.2024, "step": 34530 }, { "epoch": 0.6, "learning_rate": 0.00021254909877391163, "loss": 3.2492, "step": 34535 }, { "epoch": 0.6, "learning_rate": 0.00021247040978907524, "loss": 3.2715, "step": 34540 }, { "epoch": 0.6, "learning_rate": 0.00021239172738601539, "loss": 3.2769, "step": 34545 }, { "epoch": 0.6, "learning_rate": 0.00021231305157064868, "loss": 3.1182, "step": 34550 }, { "epoch": 0.6, "learning_rate": 0.00021223438234889102, "loss": 3.2583, "step": 34555 }, { "epoch": 0.6, "learning_rate": 0.00021215571972665808, "loss": 3.3762, "step": 34560 }, { "epoch": 0.6, "learning_rate": 0.00021207706370986483, "loss": 3.3264, "step": 34565 }, { "epoch": 0.6, "learning_rate": 0.00021199841430442586, "loss": 3.2889, "step": 34570 }, { "epoch": 0.6, "learning_rate": 0.00021191977151625505, "loss": 3.2835, "step": 34575 }, { "epoch": 0.6, "learning_rate": 0.00021184113535126597, "loss": 3.1812, "step": 34580 }, { "epoch": 0.6, "learning_rate": 0.00021176250581537176, "loss": 3.2657, "step": 34585 }, { "epoch": 0.6, "learning_rate": 0.00021168388291448485, "loss": 3.2983, "step": 34590 }, { "epoch": 0.6, "learning_rate": 0.0002116052666545175, "loss": 3.2066, "step": 34595 }, { "epoch": 0.6, "learning_rate": 0.0002115266570413809, "loss": 3.2254, "step": 34600 }, { "epoch": 0.6, "learning_rate": 0.00021144805408098634, "loss": 3.2641, "step": 34605 }, { "epoch": 0.6, "learning_rate": 0.00021136945777924427, "loss": 3.2156, "step": 34610 }, { "epoch": 0.6, "learning_rate": 0.00021129086814206474, "loss": 3.3646, "step": 34615 }, { "epoch": 0.6, "learning_rate": 0.00021121228517535738, "loss": 3.1861, "step": 34620 }, { "epoch": 0.6, "learning_rate": 0.00021113370888503106, "loss": 3.2872, "step": 34625 }, { "epoch": 0.6, "learning_rate": 0.00021105513927699446, "loss": 3.1778, "step": 34630 }, { "epoch": 0.6, "learning_rate": 0.0002109765763571555, "loss": 3.3283, "step": 34635 }, { "epoch": 0.6, "learning_rate": 0.00021089802013142173, "loss": 3.1781, "step": 34640 }, { "epoch": 0.6, "learning_rate": 0.00021081947060570035, "loss": 3.2741, "step": 34645 }, { "epoch": 0.6, "learning_rate": 0.00021074092778589767, "loss": 3.252, "step": 34650 }, { "epoch": 0.6, "learning_rate": 0.0002106623916779197, "loss": 3.2506, "step": 34655 }, { "epoch": 0.6, "learning_rate": 0.00021058386228767206, "loss": 3.2566, "step": 34660 }, { "epoch": 0.6, "learning_rate": 0.00021050533962105975, "loss": 3.2547, "step": 34665 }, { "epoch": 0.6, "learning_rate": 0.00021042682368398717, "loss": 3.2342, "step": 34670 }, { "epoch": 0.6, "learning_rate": 0.00021034831448235857, "loss": 3.2487, "step": 34675 }, { "epoch": 0.6, "learning_rate": 0.00021026981202207712, "loss": 3.308, "step": 34680 }, { "epoch": 0.6, "learning_rate": 0.00021019131630904602, "loss": 3.3426, "step": 34685 }, { "epoch": 0.6, "learning_rate": 0.00021011282734916757, "loss": 3.2593, "step": 34690 }, { "epoch": 0.6, "learning_rate": 0.0002100343451483439, "loss": 3.2209, "step": 34695 }, { "epoch": 0.6, "learning_rate": 0.00020995586971247647, "loss": 3.1753, "step": 34700 }, { "epoch": 0.6, "learning_rate": 0.00020987740104746607, "loss": 3.3474, "step": 34705 }, { "epoch": 0.6, "learning_rate": 0.00020979893915921332, "loss": 3.2738, "step": 34710 }, { "epoch": 0.6, "learning_rate": 0.00020972048405361798, "loss": 3.1795, "step": 34715 }, { "epoch": 0.6, "learning_rate": 0.00020964203573657959, "loss": 3.1401, "step": 34720 }, { "epoch": 0.6, "learning_rate": 0.00020956359421399714, "loss": 3.2246, "step": 34725 }, { "epoch": 0.6, "learning_rate": 0.0002094851594917689, "loss": 3.2825, "step": 34730 }, { "epoch": 0.6, "learning_rate": 0.00020940673157579268, "loss": 3.2241, "step": 34735 }, { "epoch": 0.6, "learning_rate": 0.00020932831047196595, "loss": 3.2764, "step": 34740 }, { "epoch": 0.6, "learning_rate": 0.0002092498961861857, "loss": 3.2176, "step": 34745 }, { "epoch": 0.6, "learning_rate": 0.0002091714887243482, "loss": 3.2499, "step": 34750 }, { "epoch": 0.6, "learning_rate": 0.00020909308809234913, "loss": 3.2197, "step": 34755 }, { "epoch": 0.6, "learning_rate": 0.000209014694296084, "loss": 3.2597, "step": 34760 }, { "epoch": 0.6, "learning_rate": 0.00020893630734144755, "loss": 3.1445, "step": 34765 }, { "epoch": 0.6, "learning_rate": 0.00020885792723433406, "loss": 3.2218, "step": 34770 }, { "epoch": 0.6, "learning_rate": 0.0002087795539806375, "loss": 3.2191, "step": 34775 }, { "epoch": 0.6, "learning_rate": 0.00020870118758625086, "loss": 3.3003, "step": 34780 }, { "epoch": 0.6, "learning_rate": 0.00020862282805706706, "loss": 3.293, "step": 34785 }, { "epoch": 0.6, "learning_rate": 0.0002085444753989782, "loss": 3.2902, "step": 34790 }, { "epoch": 0.6, "learning_rate": 0.00020846612961787612, "loss": 3.2605, "step": 34795 }, { "epoch": 0.6, "learning_rate": 0.0002083877907196521, "loss": 3.2403, "step": 34800 }, { "epoch": 0.6, "learning_rate": 0.00020830945871019656, "loss": 3.1848, "step": 34805 }, { "epoch": 0.6, "learning_rate": 0.00020823113359539993, "loss": 3.2072, "step": 34810 }, { "epoch": 0.6, "learning_rate": 0.00020815281538115164, "loss": 3.235, "step": 34815 }, { "epoch": 0.6, "learning_rate": 0.00020807450407334097, "loss": 3.2259, "step": 34820 }, { "epoch": 0.6, "learning_rate": 0.0002079961996778566, "loss": 3.1593, "step": 34825 }, { "epoch": 0.6, "learning_rate": 0.0002079179022005864, "loss": 3.2826, "step": 34830 }, { "epoch": 0.6, "learning_rate": 0.00020783961164741804, "loss": 3.2567, "step": 34835 }, { "epoch": 0.6, "learning_rate": 0.00020776132802423851, "loss": 3.2129, "step": 34840 }, { "epoch": 0.6, "learning_rate": 0.00020768305133693447, "loss": 3.2653, "step": 34845 }, { "epoch": 0.6, "learning_rate": 0.00020760478159139196, "loss": 3.1523, "step": 34850 }, { "epoch": 0.6, "learning_rate": 0.0002075265187934962, "loss": 3.284, "step": 34855 }, { "epoch": 0.6, "learning_rate": 0.0002074482629491323, "loss": 3.1118, "step": 34860 }, { "epoch": 0.6, "learning_rate": 0.0002073700140641848, "loss": 3.2638, "step": 34865 }, { "epoch": 0.6, "learning_rate": 0.00020729177214453745, "loss": 3.1445, "step": 34870 }, { "epoch": 0.6, "learning_rate": 0.0002072135371960738, "loss": 3.2557, "step": 34875 }, { "epoch": 0.6, "learning_rate": 0.00020713530922467652, "loss": 3.2218, "step": 34880 }, { "epoch": 0.6, "learning_rate": 0.00020705708823622805, "loss": 3.3213, "step": 34885 }, { "epoch": 0.6, "learning_rate": 0.00020697887423661032, "loss": 3.2697, "step": 34890 }, { "epoch": 0.6, "learning_rate": 0.00020690066723170447, "loss": 3.2742, "step": 34895 }, { "epoch": 0.6, "learning_rate": 0.0002068224672273914, "loss": 3.2105, "step": 34900 }, { "epoch": 0.6, "learning_rate": 0.00020674427422955115, "loss": 3.2227, "step": 34905 }, { "epoch": 0.6, "learning_rate": 0.00020666608824406363, "loss": 3.2625, "step": 34910 }, { "epoch": 0.6, "learning_rate": 0.00020658790927680787, "loss": 3.1875, "step": 34915 }, { "epoch": 0.6, "learning_rate": 0.00020650973733366262, "loss": 3.233, "step": 34920 }, { "epoch": 0.6, "learning_rate": 0.00020643157242050602, "loss": 3.2415, "step": 34925 }, { "epoch": 0.6, "learning_rate": 0.00020635341454321575, "loss": 3.2621, "step": 34930 }, { "epoch": 0.6, "learning_rate": 0.00020627526370766862, "loss": 3.3244, "step": 34935 }, { "epoch": 0.6, "learning_rate": 0.0002061971199197413, "loss": 3.2816, "step": 34940 }, { "epoch": 0.6, "learning_rate": 0.00020611898318530995, "loss": 3.1986, "step": 34945 }, { "epoch": 0.6, "learning_rate": 0.00020604085351024986, "loss": 3.2335, "step": 34950 }, { "epoch": 0.6, "learning_rate": 0.00020596273090043614, "loss": 3.2378, "step": 34955 }, { "epoch": 0.61, "learning_rate": 0.00020588461536174304, "loss": 3.1589, "step": 34960 }, { "epoch": 0.61, "learning_rate": 0.0002058065069000445, "loss": 3.2316, "step": 34965 }, { "epoch": 0.61, "learning_rate": 0.000205728405521214, "loss": 3.2408, "step": 34970 }, { "epoch": 0.61, "learning_rate": 0.0002056503112311242, "loss": 3.2371, "step": 34975 }, { "epoch": 0.61, "learning_rate": 0.00020557222403564757, "loss": 3.2762, "step": 34980 }, { "epoch": 0.61, "learning_rate": 0.0002054941439406556, "loss": 3.2734, "step": 34985 }, { "epoch": 0.61, "learning_rate": 0.0002054160709520198, "loss": 3.1853, "step": 34990 }, { "epoch": 0.61, "learning_rate": 0.00020533800507561067, "loss": 3.2824, "step": 34995 }, { "epoch": 0.61, "learning_rate": 0.00020525994631729837, "loss": 3.19, "step": 35000 }, { "epoch": 0.61, "eval_loss": 3.272491693496704, "eval_runtime": 149.9711, "eval_samples_per_second": 12.276, "eval_steps_per_second": 0.773, "step": 35000 }, { "epoch": 0.61, "learning_rate": 0.0002051818946829527, "loss": 3.2679, "step": 35005 }, { "epoch": 0.61, "learning_rate": 0.00020510385017844258, "loss": 3.1934, "step": 35010 }, { "epoch": 0.61, "learning_rate": 0.00020502581280963656, "loss": 3.2659, "step": 35015 }, { "epoch": 0.61, "learning_rate": 0.0002049477825824026, "loss": 3.1719, "step": 35020 }, { "epoch": 0.61, "learning_rate": 0.0002048697595026084, "loss": 3.2524, "step": 35025 }, { "epoch": 0.61, "learning_rate": 0.00020479174357612077, "loss": 3.147, "step": 35030 }, { "epoch": 0.61, "learning_rate": 0.00020471373480880593, "loss": 3.0605, "step": 35035 }, { "epoch": 0.61, "learning_rate": 0.00020463573320652996, "loss": 3.2339, "step": 35040 }, { "epoch": 0.61, "learning_rate": 0.00020455773877515814, "loss": 3.3027, "step": 35045 }, { "epoch": 0.61, "learning_rate": 0.00020447975152055518, "loss": 3.2568, "step": 35050 }, { "epoch": 0.61, "learning_rate": 0.00020440177144858554, "loss": 3.2592, "step": 35055 }, { "epoch": 0.61, "learning_rate": 0.00020432379856511257, "loss": 3.2078, "step": 35060 }, { "epoch": 0.61, "learning_rate": 0.0002042458328759996, "loss": 3.1336, "step": 35065 }, { "epoch": 0.61, "learning_rate": 0.00020416787438710937, "loss": 3.2229, "step": 35070 }, { "epoch": 0.61, "learning_rate": 0.00020408992310430382, "loss": 3.3117, "step": 35075 }, { "epoch": 0.61, "learning_rate": 0.00020401197903344463, "loss": 3.2499, "step": 35080 }, { "epoch": 0.61, "learning_rate": 0.00020393404218039252, "loss": 3.2453, "step": 35085 }, { "epoch": 0.61, "learning_rate": 0.0002038561125510082, "loss": 3.1057, "step": 35090 }, { "epoch": 0.61, "learning_rate": 0.0002037781901511514, "loss": 3.2666, "step": 35095 }, { "epoch": 0.61, "learning_rate": 0.00020370027498668158, "loss": 3.0863, "step": 35100 }, { "epoch": 0.61, "learning_rate": 0.0002036223670634577, "loss": 3.2022, "step": 35105 }, { "epoch": 0.61, "learning_rate": 0.0002035444663873378, "loss": 3.3174, "step": 35110 }, { "epoch": 0.61, "learning_rate": 0.00020346657296417968, "loss": 3.242, "step": 35115 }, { "epoch": 0.61, "learning_rate": 0.00020338868679984048, "loss": 3.2349, "step": 35120 }, { "epoch": 0.61, "learning_rate": 0.00020331080790017702, "loss": 3.3197, "step": 35125 }, { "epoch": 0.61, "learning_rate": 0.00020323293627104528, "loss": 3.202, "step": 35130 }, { "epoch": 0.61, "learning_rate": 0.0002031550719183008, "loss": 3.0995, "step": 35135 }, { "epoch": 0.61, "learning_rate": 0.00020307721484779854, "loss": 3.0756, "step": 35140 }, { "epoch": 0.61, "learning_rate": 0.000202999365065393, "loss": 3.2164, "step": 35145 }, { "epoch": 0.61, "learning_rate": 0.0002029215225769381, "loss": 3.1971, "step": 35150 }, { "epoch": 0.61, "learning_rate": 0.00020284368738828716, "loss": 3.1615, "step": 35155 }, { "epoch": 0.61, "learning_rate": 0.00020276585950529312, "loss": 3.2523, "step": 35160 }, { "epoch": 0.61, "learning_rate": 0.000202688038933808, "loss": 3.3051, "step": 35165 }, { "epoch": 0.61, "learning_rate": 0.00020261022567968372, "loss": 3.1922, "step": 35170 }, { "epoch": 0.61, "learning_rate": 0.0002025324197487713, "loss": 3.2387, "step": 35175 }, { "epoch": 0.61, "learning_rate": 0.00020245462114692138, "loss": 3.3196, "step": 35180 }, { "epoch": 0.61, "learning_rate": 0.0002023768298799842, "loss": 3.1601, "step": 35185 }, { "epoch": 0.61, "learning_rate": 0.000202299045953809, "loss": 3.2727, "step": 35190 }, { "epoch": 0.61, "learning_rate": 0.00020222126937424477, "loss": 3.2983, "step": 35195 }, { "epoch": 0.61, "learning_rate": 0.00020214350014714, "loss": 3.294, "step": 35200 }, { "epoch": 0.61, "learning_rate": 0.00020206573827834254, "loss": 3.2709, "step": 35205 }, { "epoch": 0.61, "learning_rate": 0.0002019879837736997, "loss": 3.2138, "step": 35210 }, { "epoch": 0.61, "learning_rate": 0.00020191023663905814, "loss": 3.2238, "step": 35215 }, { "epoch": 0.61, "learning_rate": 0.00020183249688026405, "loss": 3.2299, "step": 35220 }, { "epoch": 0.61, "learning_rate": 0.0002017547645031631, "loss": 3.2146, "step": 35225 }, { "epoch": 0.61, "learning_rate": 0.00020167703951360039, "loss": 3.1378, "step": 35230 }, { "epoch": 0.61, "learning_rate": 0.0002015993219174205, "loss": 3.3198, "step": 35235 }, { "epoch": 0.61, "learning_rate": 0.0002015216117204671, "loss": 3.1608, "step": 35240 }, { "epoch": 0.61, "learning_rate": 0.00020144390892858392, "loss": 3.2125, "step": 35245 }, { "epoch": 0.61, "learning_rate": 0.0002013662135476137, "loss": 3.2722, "step": 35250 }, { "epoch": 0.61, "learning_rate": 0.00020128852558339868, "loss": 2.9673, "step": 35255 }, { "epoch": 0.61, "learning_rate": 0.00020121084504178078, "loss": 3.0968, "step": 35260 }, { "epoch": 0.61, "learning_rate": 0.00020113317192860093, "loss": 3.2643, "step": 35265 }, { "epoch": 0.61, "learning_rate": 0.00020105550624969988, "loss": 3.0082, "step": 35270 }, { "epoch": 0.61, "learning_rate": 0.00020097784801091767, "loss": 3.228, "step": 35275 }, { "epoch": 0.61, "learning_rate": 0.00020090019721809375, "loss": 3.2276, "step": 35280 }, { "epoch": 0.61, "learning_rate": 0.00020082255387706733, "loss": 3.1601, "step": 35285 }, { "epoch": 0.61, "learning_rate": 0.00020074491799367642, "loss": 3.0885, "step": 35290 }, { "epoch": 0.61, "learning_rate": 0.000200667289573759, "loss": 3.2486, "step": 35295 }, { "epoch": 0.61, "learning_rate": 0.00020058966862315238, "loss": 3.2406, "step": 35300 }, { "epoch": 0.61, "learning_rate": 0.00020051205514769313, "loss": 3.1888, "step": 35305 }, { "epoch": 0.61, "learning_rate": 0.00020043444915321762, "loss": 3.1852, "step": 35310 }, { "epoch": 0.61, "learning_rate": 0.00020035685064556118, "loss": 3.2038, "step": 35315 }, { "epoch": 0.61, "learning_rate": 0.0002002792596305589, "loss": 3.2482, "step": 35320 }, { "epoch": 0.61, "learning_rate": 0.0002002016761140452, "loss": 3.2151, "step": 35325 }, { "epoch": 0.61, "learning_rate": 0.00020012410010185406, "loss": 3.2906, "step": 35330 }, { "epoch": 0.61, "learning_rate": 0.0002000465315998188, "loss": 3.2794, "step": 35335 }, { "epoch": 0.61, "learning_rate": 0.000199968970613772, "loss": 3.1682, "step": 35340 }, { "epoch": 0.61, "learning_rate": 0.00019989141714954592, "loss": 3.2599, "step": 35345 }, { "epoch": 0.61, "learning_rate": 0.0001998138712129723, "loss": 3.2017, "step": 35350 }, { "epoch": 0.61, "learning_rate": 0.00019973633280988206, "loss": 3.3271, "step": 35355 }, { "epoch": 0.61, "learning_rate": 0.0001996588019461059, "loss": 3.1123, "step": 35360 }, { "epoch": 0.61, "learning_rate": 0.00019958127862747344, "loss": 3.2482, "step": 35365 }, { "epoch": 0.61, "learning_rate": 0.00019950376285981426, "loss": 3.3316, "step": 35370 }, { "epoch": 0.61, "learning_rate": 0.00019942625464895703, "loss": 3.2091, "step": 35375 }, { "epoch": 0.61, "learning_rate": 0.00019934875400072998, "loss": 3.34, "step": 35380 }, { "epoch": 0.61, "learning_rate": 0.00019927126092096094, "loss": 3.1714, "step": 35385 }, { "epoch": 0.61, "learning_rate": 0.00019919377541547677, "loss": 3.142, "step": 35390 }, { "epoch": 0.61, "learning_rate": 0.00019911629749010408, "loss": 3.2751, "step": 35395 }, { "epoch": 0.61, "learning_rate": 0.00019903882715066875, "loss": 3.2666, "step": 35400 }, { "epoch": 0.61, "learning_rate": 0.00019896136440299622, "loss": 3.3066, "step": 35405 }, { "epoch": 0.61, "learning_rate": 0.0001988839092529113, "loss": 3.296, "step": 35410 }, { "epoch": 0.61, "learning_rate": 0.00019880646170623829, "loss": 3.3092, "step": 35415 }, { "epoch": 0.61, "learning_rate": 0.00019872902176880058, "loss": 3.2801, "step": 35420 }, { "epoch": 0.61, "learning_rate": 0.00019865158944642146, "loss": 3.1055, "step": 35425 }, { "epoch": 0.61, "learning_rate": 0.0001985741647449235, "loss": 3.2495, "step": 35430 }, { "epoch": 0.61, "learning_rate": 0.0001984967476701284, "loss": 3.3053, "step": 35435 }, { "epoch": 0.61, "learning_rate": 0.00019841933822785788, "loss": 3.2339, "step": 35440 }, { "epoch": 0.61, "learning_rate": 0.0001983419364239324, "loss": 3.2144, "step": 35445 }, { "epoch": 0.61, "learning_rate": 0.00019826454226417236, "loss": 3.2215, "step": 35450 }, { "epoch": 0.61, "learning_rate": 0.00019818715575439729, "loss": 3.2162, "step": 35455 }, { "epoch": 0.61, "learning_rate": 0.00019810977690042634, "loss": 3.3478, "step": 35460 }, { "epoch": 0.61, "learning_rate": 0.0001980324057080781, "loss": 3.3434, "step": 35465 }, { "epoch": 0.61, "learning_rate": 0.00019795504218317024, "loss": 3.2074, "step": 35470 }, { "epoch": 0.61, "learning_rate": 0.0001978776863315203, "loss": 3.2835, "step": 35475 }, { "epoch": 0.61, "learning_rate": 0.00019780033815894495, "loss": 3.2004, "step": 35480 }, { "epoch": 0.61, "learning_rate": 0.00019772299767126039, "loss": 3.3079, "step": 35485 }, { "epoch": 0.61, "learning_rate": 0.00019764566487428235, "loss": 3.2486, "step": 35490 }, { "epoch": 0.61, "learning_rate": 0.0001975683397738257, "loss": 3.3032, "step": 35495 }, { "epoch": 0.61, "learning_rate": 0.0001974910223757049, "loss": 3.2286, "step": 35500 }, { "epoch": 0.61, "eval_loss": 3.266571283340454, "eval_runtime": 150.2623, "eval_samples_per_second": 12.252, "eval_steps_per_second": 0.772, "step": 35500 }, { "epoch": 0.61, "learning_rate": 0.00019741371268573388, "loss": 3.1576, "step": 35505 }, { "epoch": 0.61, "learning_rate": 0.0001973364107097259, "loss": 3.2607, "step": 35510 }, { "epoch": 0.61, "learning_rate": 0.00019725911645349382, "loss": 3.1451, "step": 35515 }, { "epoch": 0.61, "learning_rate": 0.0001971818299228495, "loss": 3.2446, "step": 35520 }, { "epoch": 0.61, "learning_rate": 0.00019710455112360468, "loss": 3.3296, "step": 35525 }, { "epoch": 0.61, "learning_rate": 0.00019702728006157033, "loss": 3.3272, "step": 35530 }, { "epoch": 0.61, "learning_rate": 0.00019695001674255669, "loss": 3.2667, "step": 35535 }, { "epoch": 0.62, "learning_rate": 0.00019687276117237384, "loss": 3.3462, "step": 35540 }, { "epoch": 0.62, "learning_rate": 0.00019679551335683064, "loss": 3.226, "step": 35545 }, { "epoch": 0.62, "learning_rate": 0.00019671827330173597, "loss": 3.2596, "step": 35550 }, { "epoch": 0.62, "learning_rate": 0.0001966410410128979, "loss": 3.2741, "step": 35555 }, { "epoch": 0.62, "learning_rate": 0.00019656381649612375, "loss": 3.1513, "step": 35560 }, { "epoch": 0.62, "learning_rate": 0.00019648659975722064, "loss": 3.2949, "step": 35565 }, { "epoch": 0.62, "learning_rate": 0.00019640939080199462, "loss": 3.2267, "step": 35570 }, { "epoch": 0.62, "learning_rate": 0.00019633218963625157, "loss": 3.2633, "step": 35575 }, { "epoch": 0.62, "learning_rate": 0.0001962549962657965, "loss": 3.2205, "step": 35580 }, { "epoch": 0.62, "learning_rate": 0.00019617781069643402, "loss": 3.2456, "step": 35585 }, { "epoch": 0.62, "learning_rate": 0.00019610063293396823, "loss": 3.1869, "step": 35590 }, { "epoch": 0.62, "learning_rate": 0.0001960234629842023, "loss": 3.2434, "step": 35595 }, { "epoch": 0.62, "learning_rate": 0.00019594630085293906, "loss": 3.1685, "step": 35600 }, { "epoch": 0.62, "learning_rate": 0.00019586914654598066, "loss": 3.2325, "step": 35605 }, { "epoch": 0.62, "learning_rate": 0.0001957920000691289, "loss": 3.2937, "step": 35610 }, { "epoch": 0.62, "learning_rate": 0.00019571486142818458, "loss": 3.2478, "step": 35615 }, { "epoch": 0.62, "learning_rate": 0.00019563773062894834, "loss": 3.2374, "step": 35620 }, { "epoch": 0.62, "learning_rate": 0.00019556060767721983, "loss": 3.1036, "step": 35625 }, { "epoch": 0.62, "learning_rate": 0.0001954834925787984, "loss": 3.1363, "step": 35630 }, { "epoch": 0.62, "learning_rate": 0.00019540638533948267, "loss": 3.1992, "step": 35635 }, { "epoch": 0.62, "learning_rate": 0.00019532928596507071, "loss": 3.2498, "step": 35640 }, { "epoch": 0.62, "learning_rate": 0.00019525219446136013, "loss": 3.3513, "step": 35645 }, { "epoch": 0.62, "learning_rate": 0.00019517511083414757, "loss": 3.2661, "step": 35650 }, { "epoch": 0.62, "learning_rate": 0.00019509803508922953, "loss": 3.2577, "step": 35655 }, { "epoch": 0.62, "learning_rate": 0.00019502096723240158, "loss": 3.1112, "step": 35660 }, { "epoch": 0.62, "learning_rate": 0.00019494390726945888, "loss": 3.2319, "step": 35665 }, { "epoch": 0.62, "learning_rate": 0.00019486685520619606, "loss": 3.2341, "step": 35670 }, { "epoch": 0.62, "learning_rate": 0.0001947898110484069, "loss": 3.1483, "step": 35675 }, { "epoch": 0.62, "learning_rate": 0.0001947127748018847, "loss": 3.2717, "step": 35680 }, { "epoch": 0.62, "learning_rate": 0.0001946357464724222, "loss": 3.2706, "step": 35685 }, { "epoch": 0.62, "learning_rate": 0.00019455872606581167, "loss": 3.2078, "step": 35690 }, { "epoch": 0.62, "learning_rate": 0.00019448171358784464, "loss": 3.2178, "step": 35695 }, { "epoch": 0.62, "learning_rate": 0.0001944047090443118, "loss": 3.1824, "step": 35700 }, { "epoch": 0.62, "learning_rate": 0.00019432771244100373, "loss": 3.2617, "step": 35705 }, { "epoch": 0.62, "learning_rate": 0.00019425072378371014, "loss": 3.2644, "step": 35710 }, { "epoch": 0.62, "learning_rate": 0.00019417374307822012, "loss": 3.2957, "step": 35715 }, { "epoch": 0.62, "learning_rate": 0.00019409677033032242, "loss": 3.2295, "step": 35720 }, { "epoch": 0.62, "learning_rate": 0.00019401980554580468, "loss": 3.2526, "step": 35725 }, { "epoch": 0.62, "learning_rate": 0.00019394284873045447, "loss": 3.2768, "step": 35730 }, { "epoch": 0.62, "learning_rate": 0.0001938658998900585, "loss": 3.1854, "step": 35735 }, { "epoch": 0.62, "learning_rate": 0.0001937889590304029, "loss": 3.0675, "step": 35740 }, { "epoch": 0.62, "learning_rate": 0.0001937120261572734, "loss": 3.3383, "step": 35745 }, { "epoch": 0.62, "learning_rate": 0.00019363510127645467, "loss": 3.1992, "step": 35750 }, { "epoch": 0.62, "learning_rate": 0.00019355818439373127, "loss": 3.262, "step": 35755 }, { "epoch": 0.62, "learning_rate": 0.00019348127551488682, "loss": 3.2469, "step": 35760 }, { "epoch": 0.62, "learning_rate": 0.00019340437464570462, "loss": 3.2135, "step": 35765 }, { "epoch": 0.62, "learning_rate": 0.00019332748179196724, "loss": 3.2866, "step": 35770 }, { "epoch": 0.62, "learning_rate": 0.00019325059695945648, "loss": 3.2102, "step": 35775 }, { "epoch": 0.62, "learning_rate": 0.00019317372015395366, "loss": 3.2206, "step": 35780 }, { "epoch": 0.62, "learning_rate": 0.00019309685138123962, "loss": 3.1866, "step": 35785 }, { "epoch": 0.62, "learning_rate": 0.00019301999064709455, "loss": 3.2248, "step": 35790 }, { "epoch": 0.62, "learning_rate": 0.00019294313795729793, "loss": 3.1514, "step": 35795 }, { "epoch": 0.62, "learning_rate": 0.00019286629331762868, "loss": 3.1978, "step": 35800 }, { "epoch": 0.62, "learning_rate": 0.00019278945673386504, "loss": 3.2829, "step": 35805 }, { "epoch": 0.62, "learning_rate": 0.0001927126282117848, "loss": 3.2408, "step": 35810 }, { "epoch": 0.62, "learning_rate": 0.0001926358077571652, "loss": 3.2113, "step": 35815 }, { "epoch": 0.62, "learning_rate": 0.00019255899537578262, "loss": 3.223, "step": 35820 }, { "epoch": 0.62, "learning_rate": 0.00019248219107341287, "loss": 3.1289, "step": 35825 }, { "epoch": 0.62, "learning_rate": 0.0001924053948558313, "loss": 3.2393, "step": 35830 }, { "epoch": 0.62, "learning_rate": 0.00019232860672881272, "loss": 3.1435, "step": 35835 }, { "epoch": 0.62, "learning_rate": 0.00019225182669813104, "loss": 3.1946, "step": 35840 }, { "epoch": 0.62, "learning_rate": 0.0001921750547695598, "loss": 3.1769, "step": 35845 }, { "epoch": 0.62, "learning_rate": 0.00019209829094887196, "loss": 3.198, "step": 35850 }, { "epoch": 0.62, "learning_rate": 0.0001920215352418396, "loss": 3.2421, "step": 35855 }, { "epoch": 0.62, "learning_rate": 0.00019194478765423433, "loss": 3.0731, "step": 35860 }, { "epoch": 0.62, "learning_rate": 0.00019186804819182725, "loss": 3.2731, "step": 35865 }, { "epoch": 0.62, "learning_rate": 0.00019179131686038883, "loss": 3.1812, "step": 35870 }, { "epoch": 0.62, "learning_rate": 0.00019171459366568893, "loss": 3.2939, "step": 35875 }, { "epoch": 0.62, "learning_rate": 0.00019163787861349647, "loss": 3.1732, "step": 35880 }, { "epoch": 0.62, "learning_rate": 0.00019156117170958023, "loss": 3.2444, "step": 35885 }, { "epoch": 0.62, "learning_rate": 0.00019148447295970817, "loss": 3.1963, "step": 35890 }, { "epoch": 0.62, "learning_rate": 0.00019140778236964752, "loss": 3.3122, "step": 35895 }, { "epoch": 0.62, "learning_rate": 0.00019133109994516528, "loss": 3.2267, "step": 35900 }, { "epoch": 0.62, "learning_rate": 0.00019125442569202725, "loss": 3.2838, "step": 35905 }, { "epoch": 0.62, "learning_rate": 0.00019117775961599912, "loss": 3.3105, "step": 35910 }, { "epoch": 0.62, "learning_rate": 0.0001911011017228458, "loss": 3.2765, "step": 35915 }, { "epoch": 0.62, "learning_rate": 0.00019102445201833152, "loss": 3.3233, "step": 35920 }, { "epoch": 0.62, "learning_rate": 0.00019094781050822007, "loss": 3.2274, "step": 35925 }, { "epoch": 0.62, "learning_rate": 0.0001908711771982742, "loss": 3.2296, "step": 35930 }, { "epoch": 0.62, "learning_rate": 0.0001907945520942566, "loss": 3.1826, "step": 35935 }, { "epoch": 0.62, "learning_rate": 0.000190717935201929, "loss": 3.3203, "step": 35940 }, { "epoch": 0.62, "learning_rate": 0.0001906413265270526, "loss": 3.1889, "step": 35945 }, { "epoch": 0.62, "learning_rate": 0.00019056472607538807, "loss": 2.9914, "step": 35950 }, { "epoch": 0.62, "learning_rate": 0.0001904881338526953, "loss": 3.2326, "step": 35955 }, { "epoch": 0.62, "learning_rate": 0.00019041154986473356, "loss": 3.2476, "step": 35960 }, { "epoch": 0.62, "learning_rate": 0.00019033497411726158, "loss": 3.1318, "step": 35965 }, { "epoch": 0.62, "learning_rate": 0.00019025840661603764, "loss": 3.266, "step": 35970 }, { "epoch": 0.62, "learning_rate": 0.00019018184736681906, "loss": 3.1782, "step": 35975 }, { "epoch": 0.62, "learning_rate": 0.0001901052963753628, "loss": 3.1955, "step": 35980 }, { "epoch": 0.62, "learning_rate": 0.0001900287536474249, "loss": 3.2449, "step": 35985 }, { "epoch": 0.62, "learning_rate": 0.0001899522191887612, "loss": 2.9662, "step": 35990 }, { "epoch": 0.62, "learning_rate": 0.0001898756930051267, "loss": 3.1855, "step": 35995 }, { "epoch": 0.62, "learning_rate": 0.00018979917510227574, "loss": 3.1923, "step": 36000 }, { "epoch": 0.62, "eval_loss": 3.261223554611206, "eval_runtime": 150.469, "eval_samples_per_second": 12.235, "eval_steps_per_second": 0.771, "step": 36000 }, { "epoch": 0.62, "learning_rate": 0.00018972266548596193, "loss": 3.2353, "step": 36005 }, { "epoch": 0.62, "learning_rate": 0.00018964616416193848, "loss": 3.2502, "step": 36010 }, { "epoch": 0.62, "learning_rate": 0.00018956967113595806, "loss": 3.2247, "step": 36015 }, { "epoch": 0.62, "learning_rate": 0.00018949318641377233, "loss": 3.2834, "step": 36020 }, { "epoch": 0.62, "learning_rate": 0.00018941671000113276, "loss": 3.2689, "step": 36025 }, { "epoch": 0.62, "learning_rate": 0.00018934024190378972, "loss": 3.2487, "step": 36030 }, { "epoch": 0.62, "learning_rate": 0.00018926378212749347, "loss": 3.1112, "step": 36035 }, { "epoch": 0.62, "learning_rate": 0.00018918733067799324, "loss": 3.1531, "step": 36040 }, { "epoch": 0.62, "learning_rate": 0.0001891108875610378, "loss": 3.2902, "step": 36045 }, { "epoch": 0.62, "learning_rate": 0.00018903445278237543, "loss": 3.278, "step": 36050 }, { "epoch": 0.62, "learning_rate": 0.00018895802634775346, "loss": 3.1309, "step": 36055 }, { "epoch": 0.62, "learning_rate": 0.0001888816082629188, "loss": 3.1394, "step": 36060 }, { "epoch": 0.62, "learning_rate": 0.00018880519853361775, "loss": 3.2527, "step": 36065 }, { "epoch": 0.62, "learning_rate": 0.00018872879716559584, "loss": 3.1727, "step": 36070 }, { "epoch": 0.62, "learning_rate": 0.00018865240416459823, "loss": 3.2331, "step": 36075 }, { "epoch": 0.62, "learning_rate": 0.00018857601953636922, "loss": 3.1652, "step": 36080 }, { "epoch": 0.62, "learning_rate": 0.00018849964328665242, "loss": 3.3314, "step": 36085 }, { "epoch": 0.62, "learning_rate": 0.00018842327542119096, "loss": 3.2538, "step": 36090 }, { "epoch": 0.62, "learning_rate": 0.00018834691594572746, "loss": 3.2119, "step": 36095 }, { "epoch": 0.62, "learning_rate": 0.0001882705648660036, "loss": 3.2479, "step": 36100 }, { "epoch": 0.62, "learning_rate": 0.0001881942221877608, "loss": 3.2294, "step": 36105 }, { "epoch": 0.62, "learning_rate": 0.0001881178879167393, "loss": 3.1991, "step": 36110 }, { "epoch": 0.63, "learning_rate": 0.0001880415620586794, "loss": 3.2136, "step": 36115 }, { "epoch": 0.63, "learning_rate": 0.0001879652446193201, "loss": 3.1497, "step": 36120 }, { "epoch": 0.63, "learning_rate": 0.0001878889356044003, "loss": 3.301, "step": 36125 }, { "epoch": 0.63, "learning_rate": 0.0001878126350196581, "loss": 3.1176, "step": 36130 }, { "epoch": 0.63, "learning_rate": 0.00018773634287083072, "loss": 3.3595, "step": 36135 }, { "epoch": 0.63, "learning_rate": 0.00018766005916365493, "loss": 3.2179, "step": 36140 }, { "epoch": 0.63, "learning_rate": 0.000187583783903867, "loss": 3.2545, "step": 36145 }, { "epoch": 0.63, "learning_rate": 0.00018750751709720247, "loss": 3.2765, "step": 36150 }, { "epoch": 0.63, "learning_rate": 0.0001874312587493961, "loss": 3.2513, "step": 36155 }, { "epoch": 0.63, "learning_rate": 0.00018735500886618215, "loss": 3.1349, "step": 36160 }, { "epoch": 0.63, "learning_rate": 0.00018727876745329425, "loss": 3.278, "step": 36165 }, { "epoch": 0.63, "learning_rate": 0.00018720253451646524, "loss": 3.1892, "step": 36170 }, { "epoch": 0.63, "learning_rate": 0.00018712631006142773, "loss": 3.2387, "step": 36175 }, { "epoch": 0.63, "learning_rate": 0.00018705009409391324, "loss": 3.2239, "step": 36180 }, { "epoch": 0.63, "learning_rate": 0.0001869738866196527, "loss": 3.1069, "step": 36185 }, { "epoch": 0.63, "learning_rate": 0.00018689768764437663, "loss": 3.3351, "step": 36190 }, { "epoch": 0.63, "learning_rate": 0.00018682149717381489, "loss": 3.2367, "step": 36195 }, { "epoch": 0.63, "learning_rate": 0.00018674531521369644, "loss": 3.2287, "step": 36200 }, { "epoch": 0.63, "learning_rate": 0.00018666914176975, "loss": 3.3816, "step": 36205 }, { "epoch": 0.63, "learning_rate": 0.00018659297684770317, "loss": 3.2868, "step": 36210 }, { "epoch": 0.63, "learning_rate": 0.00018651682045328334, "loss": 3.2455, "step": 36215 }, { "epoch": 0.63, "learning_rate": 0.00018644067259221696, "loss": 3.2449, "step": 36220 }, { "epoch": 0.63, "learning_rate": 0.00018636453327023004, "loss": 3.2737, "step": 36225 }, { "epoch": 0.63, "learning_rate": 0.00018628840249304793, "loss": 3.2192, "step": 36230 }, { "epoch": 0.63, "learning_rate": 0.0001862122802663951, "loss": 3.2535, "step": 36235 }, { "epoch": 0.63, "learning_rate": 0.00018613616659599567, "loss": 3.1544, "step": 36240 }, { "epoch": 0.63, "learning_rate": 0.00018606006148757292, "loss": 3.2894, "step": 36245 }, { "epoch": 0.63, "learning_rate": 0.0001859839649468496, "loss": 3.1255, "step": 36250 }, { "epoch": 0.63, "learning_rate": 0.00018590787697954795, "loss": 3.132, "step": 36255 }, { "epoch": 0.63, "learning_rate": 0.00018583179759138916, "loss": 3.1732, "step": 36260 }, { "epoch": 0.63, "learning_rate": 0.00018575572678809402, "loss": 3.2779, "step": 36265 }, { "epoch": 0.63, "learning_rate": 0.00018567966457538274, "loss": 3.2261, "step": 36270 }, { "epoch": 0.63, "learning_rate": 0.00018560361095897485, "loss": 3.1753, "step": 36275 }, { "epoch": 0.63, "learning_rate": 0.00018552756594458923, "loss": 3.2143, "step": 36280 }, { "epoch": 0.63, "learning_rate": 0.00018545152953794385, "loss": 3.2923, "step": 36285 }, { "epoch": 0.63, "learning_rate": 0.00018537550174475639, "loss": 3.1306, "step": 36290 }, { "epoch": 0.63, "learning_rate": 0.00018529948257074385, "loss": 3.2825, "step": 36295 }, { "epoch": 0.63, "learning_rate": 0.00018522347202162232, "loss": 3.3198, "step": 36300 }, { "epoch": 0.63, "learning_rate": 0.00018514747010310744, "loss": 3.289, "step": 36305 }, { "epoch": 0.63, "learning_rate": 0.00018507147682091435, "loss": 3.267, "step": 36310 }, { "epoch": 0.63, "learning_rate": 0.00018499549218075703, "loss": 3.3149, "step": 36315 }, { "epoch": 0.63, "learning_rate": 0.00018491951618834947, "loss": 3.1908, "step": 36320 }, { "epoch": 0.63, "learning_rate": 0.00018484354884940437, "loss": 3.2189, "step": 36325 }, { "epoch": 0.63, "learning_rate": 0.00018476759016963426, "loss": 3.1788, "step": 36330 }, { "epoch": 0.63, "learning_rate": 0.00018469164015475092, "loss": 3.232, "step": 36335 }, { "epoch": 0.63, "learning_rate": 0.00018461569881046526, "loss": 3.3037, "step": 36340 }, { "epoch": 0.63, "learning_rate": 0.00018453976614248762, "loss": 3.2038, "step": 36345 }, { "epoch": 0.63, "learning_rate": 0.00018446384215652786, "loss": 3.124, "step": 36350 }, { "epoch": 0.63, "learning_rate": 0.00018438792685829513, "loss": 3.2536, "step": 36355 }, { "epoch": 0.63, "learning_rate": 0.00018431202025349783, "loss": 3.1973, "step": 36360 }, { "epoch": 0.63, "learning_rate": 0.00018423612234784364, "loss": 3.2563, "step": 36365 }, { "epoch": 0.63, "learning_rate": 0.00018416023314703973, "loss": 3.2235, "step": 36370 }, { "epoch": 0.63, "learning_rate": 0.00018408435265679268, "loss": 3.3684, "step": 36375 }, { "epoch": 0.63, "learning_rate": 0.0001840084808828082, "loss": 3.2167, "step": 36380 }, { "epoch": 0.63, "learning_rate": 0.0001839326178307916, "loss": 3.2903, "step": 36385 }, { "epoch": 0.63, "learning_rate": 0.00018385676350644723, "loss": 3.0828, "step": 36390 }, { "epoch": 0.63, "learning_rate": 0.0001837809179154791, "loss": 3.3216, "step": 36395 }, { "epoch": 0.63, "learning_rate": 0.0001837050810635902, "loss": 3.2892, "step": 36400 }, { "epoch": 0.63, "learning_rate": 0.00018362925295648322, "loss": 3.1734, "step": 36405 }, { "epoch": 0.63, "learning_rate": 0.00018355343359986018, "loss": 3.1735, "step": 36410 }, { "epoch": 0.63, "learning_rate": 0.000183477622999422, "loss": 3.1576, "step": 36415 }, { "epoch": 0.63, "learning_rate": 0.00018340182116086947, "loss": 3.2127, "step": 36420 }, { "epoch": 0.63, "learning_rate": 0.00018332602808990238, "loss": 3.1634, "step": 36425 }, { "epoch": 0.63, "learning_rate": 0.00018325024379222006, "loss": 3.3005, "step": 36430 }, { "epoch": 0.63, "learning_rate": 0.00018317446827352115, "loss": 3.2287, "step": 36435 }, { "epoch": 0.63, "learning_rate": 0.00018309870153950347, "loss": 3.23, "step": 36440 }, { "epoch": 0.63, "learning_rate": 0.00018302294359586426, "loss": 3.208, "step": 36445 }, { "epoch": 0.63, "learning_rate": 0.0001829471944483002, "loss": 3.2683, "step": 36450 }, { "epoch": 0.63, "learning_rate": 0.00018287145410250723, "loss": 3.1638, "step": 36455 }, { "epoch": 0.63, "learning_rate": 0.00018279572256418077, "loss": 3.2345, "step": 36460 }, { "epoch": 0.63, "learning_rate": 0.00018271999983901515, "loss": 3.2936, "step": 36465 }, { "epoch": 0.63, "learning_rate": 0.00018264428593270452, "loss": 3.2147, "step": 36470 }, { "epoch": 0.63, "learning_rate": 0.0001825685808509422, "loss": 3.1711, "step": 36475 }, { "epoch": 0.63, "learning_rate": 0.00018249288459942073, "loss": 3.1448, "step": 36480 }, { "epoch": 0.63, "learning_rate": 0.0001824171971838322, "loss": 3.2079, "step": 36485 }, { "epoch": 0.63, "learning_rate": 0.00018234151860986773, "loss": 3.1737, "step": 36490 }, { "epoch": 0.63, "learning_rate": 0.00018226584888321812, "loss": 3.2266, "step": 36495 }, { "epoch": 0.63, "learning_rate": 0.00018219018800957334, "loss": 3.154, "step": 36500 }, { "epoch": 0.63, "eval_loss": 3.2557082176208496, "eval_runtime": 151.364, "eval_samples_per_second": 12.163, "eval_steps_per_second": 0.766, "step": 36500 }, { "epoch": 0.63, "learning_rate": 0.00018211453599462256, "loss": 3.2151, "step": 36505 }, { "epoch": 0.63, "learning_rate": 0.00018203889284405467, "loss": 3.3157, "step": 36510 }, { "epoch": 0.63, "learning_rate": 0.00018196325856355736, "loss": 3.1872, "step": 36515 }, { "epoch": 0.63, "learning_rate": 0.00018188763315881817, "loss": 3.2263, "step": 36520 }, { "epoch": 0.63, "learning_rate": 0.00018181201663552353, "loss": 3.2157, "step": 36525 }, { "epoch": 0.63, "learning_rate": 0.00018173640899935958, "loss": 3.2489, "step": 36530 }, { "epoch": 0.63, "learning_rate": 0.0001816608102560117, "loss": 3.2881, "step": 36535 }, { "epoch": 0.63, "learning_rate": 0.0001815852204111643, "loss": 3.2426, "step": 36540 }, { "epoch": 0.63, "learning_rate": 0.00018150963947050145, "loss": 3.2455, "step": 36545 }, { "epoch": 0.63, "learning_rate": 0.00018143406743970642, "loss": 3.3231, "step": 36550 }, { "epoch": 0.63, "learning_rate": 0.00018135850432446197, "loss": 3.3465, "step": 36555 }, { "epoch": 0.63, "learning_rate": 0.00018128295013044993, "loss": 3.2252, "step": 36560 }, { "epoch": 0.63, "learning_rate": 0.0001812074048633517, "loss": 3.2743, "step": 36565 }, { "epoch": 0.63, "learning_rate": 0.00018113186852884773, "loss": 3.2134, "step": 36570 }, { "epoch": 0.63, "learning_rate": 0.00018105634113261805, "loss": 3.1525, "step": 36575 }, { "epoch": 0.63, "learning_rate": 0.00018098082268034197, "loss": 3.2405, "step": 36580 }, { "epoch": 0.63, "learning_rate": 0.00018090531317769805, "loss": 3.1579, "step": 36585 }, { "epoch": 0.63, "learning_rate": 0.0001808298126303643, "loss": 3.3345, "step": 36590 }, { "epoch": 0.63, "learning_rate": 0.00018075432104401782, "loss": 3.2023, "step": 36595 }, { "epoch": 0.63, "learning_rate": 0.00018067883842433534, "loss": 3.3086, "step": 36600 }, { "epoch": 0.63, "learning_rate": 0.00018060336477699262, "loss": 3.2832, "step": 36605 }, { "epoch": 0.63, "learning_rate": 0.00018052790010766496, "loss": 3.311, "step": 36610 }, { "epoch": 0.63, "learning_rate": 0.00018045244442202707, "loss": 3.263, "step": 36615 }, { "epoch": 0.63, "learning_rate": 0.00018037699772575262, "loss": 3.2083, "step": 36620 }, { "epoch": 0.63, "learning_rate": 0.0001803015600245148, "loss": 3.3219, "step": 36625 }, { "epoch": 0.63, "learning_rate": 0.00018022613132398624, "loss": 3.271, "step": 36630 }, { "epoch": 0.63, "learning_rate": 0.00018015071162983884, "loss": 3.1227, "step": 36635 }, { "epoch": 0.63, "learning_rate": 0.00018007530094774375, "loss": 3.2264, "step": 36640 }, { "epoch": 0.63, "learning_rate": 0.00017999989928337132, "loss": 3.2169, "step": 36645 }, { "epoch": 0.63, "learning_rate": 0.00017992450664239149, "loss": 3.1578, "step": 36650 }, { "epoch": 0.63, "learning_rate": 0.0001798491230304734, "loss": 3.1818, "step": 36655 }, { "epoch": 0.63, "learning_rate": 0.00017977374845328547, "loss": 3.1602, "step": 36660 }, { "epoch": 0.63, "learning_rate": 0.0001796983829164956, "loss": 3.1913, "step": 36665 }, { "epoch": 0.63, "learning_rate": 0.0001796230264257707, "loss": 3.3033, "step": 36670 }, { "epoch": 0.63, "learning_rate": 0.00017954767898677726, "loss": 3.2693, "step": 36675 }, { "epoch": 0.63, "learning_rate": 0.00017947234060518115, "loss": 3.2891, "step": 36680 }, { "epoch": 0.63, "learning_rate": 0.00017939701128664727, "loss": 3.2005, "step": 36685 }, { "epoch": 0.63, "learning_rate": 0.00017932169103684016, "loss": 3.1168, "step": 36690 }, { "epoch": 0.64, "learning_rate": 0.0001792463798614233, "loss": 3.2606, "step": 36695 }, { "epoch": 0.64, "learning_rate": 0.0001791710777660599, "loss": 3.1485, "step": 36700 }, { "epoch": 0.64, "learning_rate": 0.00017909578475641213, "loss": 3.0826, "step": 36705 }, { "epoch": 0.64, "learning_rate": 0.00017902050083814174, "loss": 3.2234, "step": 36710 }, { "epoch": 0.64, "learning_rate": 0.00017894522601690974, "loss": 3.36, "step": 36715 }, { "epoch": 0.64, "learning_rate": 0.00017886996029837634, "loss": 3.2038, "step": 36720 }, { "epoch": 0.64, "learning_rate": 0.0001787947036882011, "loss": 3.1815, "step": 36725 }, { "epoch": 0.64, "learning_rate": 0.00017871945619204294, "loss": 3.265, "step": 36730 }, { "epoch": 0.64, "learning_rate": 0.0001786442178155602, "loss": 3.2302, "step": 36735 }, { "epoch": 0.64, "learning_rate": 0.00017856898856441036, "loss": 3.1937, "step": 36740 }, { "epoch": 0.64, "learning_rate": 0.00017849376844425024, "loss": 3.1804, "step": 36745 }, { "epoch": 0.64, "learning_rate": 0.00017841855746073595, "loss": 3.2216, "step": 36750 }, { "epoch": 0.64, "learning_rate": 0.00017834335561952306, "loss": 3.3077, "step": 36755 }, { "epoch": 0.64, "learning_rate": 0.0001782681629262664, "loss": 3.1248, "step": 36760 }, { "epoch": 0.64, "learning_rate": 0.0001781929793866201, "loss": 3.1452, "step": 36765 }, { "epoch": 0.64, "learning_rate": 0.00017811780500623733, "loss": 3.2719, "step": 36770 }, { "epoch": 0.64, "learning_rate": 0.000178042639790771, "loss": 3.2455, "step": 36775 }, { "epoch": 0.64, "learning_rate": 0.00017796748374587323, "loss": 3.1196, "step": 36780 }, { "epoch": 0.64, "learning_rate": 0.0001778923368771952, "loss": 3.329, "step": 36785 }, { "epoch": 0.64, "learning_rate": 0.0001778171991903876, "loss": 3.1396, "step": 36790 }, { "epoch": 0.64, "learning_rate": 0.00017774207069110064, "loss": 3.2559, "step": 36795 }, { "epoch": 0.64, "learning_rate": 0.00017766695138498332, "loss": 3.2072, "step": 36800 }, { "epoch": 0.64, "learning_rate": 0.00017759184127768424, "loss": 3.3362, "step": 36805 }, { "epoch": 0.64, "learning_rate": 0.00017751674037485136, "loss": 3.2532, "step": 36810 }, { "epoch": 0.64, "learning_rate": 0.00017744164868213193, "loss": 3.2233, "step": 36815 }, { "epoch": 0.64, "learning_rate": 0.00017736656620517246, "loss": 3.2606, "step": 36820 }, { "epoch": 0.64, "learning_rate": 0.0001772914929496187, "loss": 3.1516, "step": 36825 }, { "epoch": 0.64, "learning_rate": 0.00017721642892111573, "loss": 3.1829, "step": 36830 }, { "epoch": 0.64, "learning_rate": 0.00017714137412530806, "loss": 3.2415, "step": 36835 }, { "epoch": 0.64, "learning_rate": 0.00017706632856783952, "loss": 3.2393, "step": 36840 }, { "epoch": 0.64, "learning_rate": 0.00017699129225435303, "loss": 3.192, "step": 36845 }, { "epoch": 0.64, "learning_rate": 0.00017691626519049085, "loss": 3.3051, "step": 36850 }, { "epoch": 0.64, "learning_rate": 0.00017684124738189476, "loss": 3.2575, "step": 36855 }, { "epoch": 0.64, "learning_rate": 0.00017676623883420574, "loss": 3.1901, "step": 36860 }, { "epoch": 0.64, "learning_rate": 0.00017669123955306394, "loss": 3.1218, "step": 36865 }, { "epoch": 0.64, "learning_rate": 0.00017661624954410905, "loss": 3.2743, "step": 36870 }, { "epoch": 0.64, "learning_rate": 0.0001765412688129798, "loss": 3.2173, "step": 36875 }, { "epoch": 0.64, "learning_rate": 0.0001764662973653145, "loss": 3.1673, "step": 36880 }, { "epoch": 0.64, "learning_rate": 0.00017639133520675043, "loss": 3.3145, "step": 36885 }, { "epoch": 0.64, "learning_rate": 0.00017631638234292448, "loss": 3.2983, "step": 36890 }, { "epoch": 0.64, "learning_rate": 0.00017624143877947282, "loss": 3.1276, "step": 36895 }, { "epoch": 0.64, "learning_rate": 0.00017616650452203066, "loss": 3.0952, "step": 36900 }, { "epoch": 0.64, "learning_rate": 0.00017609157957623265, "loss": 3.1819, "step": 36905 }, { "epoch": 0.64, "learning_rate": 0.00017601666394771287, "loss": 3.13, "step": 36910 }, { "epoch": 0.64, "learning_rate": 0.00017594175764210453, "loss": 3.1295, "step": 36915 }, { "epoch": 0.64, "learning_rate": 0.00017586686066504035, "loss": 3.2718, "step": 36920 }, { "epoch": 0.64, "learning_rate": 0.00017579197302215204, "loss": 3.3466, "step": 36925 }, { "epoch": 0.64, "learning_rate": 0.0001757170947190707, "loss": 3.1818, "step": 36930 }, { "epoch": 0.64, "learning_rate": 0.00017564222576142694, "loss": 3.2946, "step": 36935 }, { "epoch": 0.64, "learning_rate": 0.0001755673661548505, "loss": 3.2415, "step": 36940 }, { "epoch": 0.64, "learning_rate": 0.0001754925159049705, "loss": 3.2438, "step": 36945 }, { "epoch": 0.64, "learning_rate": 0.0001754176750174151, "loss": 3.2137, "step": 36950 }, { "epoch": 0.64, "learning_rate": 0.00017534284349781203, "loss": 3.1935, "step": 36955 }, { "epoch": 0.64, "learning_rate": 0.00017526802135178836, "loss": 3.281, "step": 36960 }, { "epoch": 0.64, "learning_rate": 0.00017519320858497018, "loss": 3.185, "step": 36965 }, { "epoch": 0.64, "learning_rate": 0.0001751184052029832, "loss": 3.2558, "step": 36970 }, { "epoch": 0.64, "learning_rate": 0.0001750436112114521, "loss": 3.3523, "step": 36975 }, { "epoch": 0.64, "learning_rate": 0.00017496882661600095, "loss": 3.2183, "step": 36980 }, { "epoch": 0.64, "learning_rate": 0.00017489405142225332, "loss": 3.308, "step": 36985 }, { "epoch": 0.64, "learning_rate": 0.0001748192856358319, "loss": 3.2556, "step": 36990 }, { "epoch": 0.64, "learning_rate": 0.00017474452926235864, "loss": 3.2036, "step": 36995 }, { "epoch": 0.64, "learning_rate": 0.00017466978230745486, "loss": 3.2368, "step": 37000 }, { "epoch": 0.64, "eval_loss": 3.2513468265533447, "eval_runtime": 149.7793, "eval_samples_per_second": 12.291, "eval_steps_per_second": 0.774, "step": 37000 }, { "epoch": 0.64, "learning_rate": 0.00017459504477674104, "loss": 3.2079, "step": 37005 }, { "epoch": 0.64, "learning_rate": 0.00017452031667583728, "loss": 3.0015, "step": 37010 }, { "epoch": 0.64, "learning_rate": 0.00017444559801036262, "loss": 3.2196, "step": 37015 }, { "epoch": 0.64, "learning_rate": 0.00017437088878593542, "loss": 3.2114, "step": 37020 }, { "epoch": 0.64, "learning_rate": 0.0001742961890081738, "loss": 3.3074, "step": 37025 }, { "epoch": 0.64, "learning_rate": 0.00017422149868269427, "loss": 3.1725, "step": 37030 }, { "epoch": 0.64, "learning_rate": 0.00017414681781511356, "loss": 3.2638, "step": 37035 }, { "epoch": 0.64, "learning_rate": 0.00017407214641104715, "loss": 3.1967, "step": 37040 }, { "epoch": 0.64, "learning_rate": 0.00017399748447610987, "loss": 3.1839, "step": 37045 }, { "epoch": 0.64, "learning_rate": 0.00017392283201591625, "loss": 3.251, "step": 37050 }, { "epoch": 0.64, "learning_rate": 0.0001738481890360794, "loss": 3.1605, "step": 37055 }, { "epoch": 0.64, "learning_rate": 0.0001737735555422121, "loss": 3.2344, "step": 37060 }, { "epoch": 0.64, "learning_rate": 0.00017369893153992662, "loss": 3.1954, "step": 37065 }, { "epoch": 0.64, "learning_rate": 0.00017362431703483425, "loss": 3.2549, "step": 37070 }, { "epoch": 0.64, "learning_rate": 0.00017354971203254553, "loss": 3.2404, "step": 37075 }, { "epoch": 0.64, "learning_rate": 0.00017347511653867045, "loss": 3.249, "step": 37080 }, { "epoch": 0.64, "learning_rate": 0.00017340053055881803, "loss": 3.2248, "step": 37085 }, { "epoch": 0.64, "learning_rate": 0.00017332595409859706, "loss": 3.2577, "step": 37090 }, { "epoch": 0.64, "learning_rate": 0.0001732513871636151, "loss": 3.249, "step": 37095 }, { "epoch": 0.64, "learning_rate": 0.00017317682975947926, "loss": 3.2418, "step": 37100 }, { "epoch": 0.64, "learning_rate": 0.0001731022818917959, "loss": 3.1526, "step": 37105 }, { "epoch": 0.64, "learning_rate": 0.00017302774356617052, "loss": 3.2873, "step": 37110 }, { "epoch": 0.64, "learning_rate": 0.00017295321478820805, "loss": 3.1383, "step": 37115 }, { "epoch": 0.64, "learning_rate": 0.00017287869556351283, "loss": 3.2247, "step": 37120 }, { "epoch": 0.64, "learning_rate": 0.0001728041858976882, "loss": 3.1668, "step": 37125 }, { "epoch": 0.64, "learning_rate": 0.00017272968579633695, "loss": 3.2901, "step": 37130 }, { "epoch": 0.64, "learning_rate": 0.00017265519526506108, "loss": 3.2611, "step": 37135 }, { "epoch": 0.64, "learning_rate": 0.00017258071430946174, "loss": 3.1702, "step": 37140 }, { "epoch": 0.64, "learning_rate": 0.0001725062429351398, "loss": 3.2486, "step": 37145 }, { "epoch": 0.64, "learning_rate": 0.00017243178114769505, "loss": 3.2305, "step": 37150 }, { "epoch": 0.64, "learning_rate": 0.00017235732895272648, "loss": 3.2179, "step": 37155 }, { "epoch": 0.64, "learning_rate": 0.0001722828863558327, "loss": 3.2362, "step": 37160 }, { "epoch": 0.64, "learning_rate": 0.00017220845336261118, "loss": 3.2262, "step": 37165 }, { "epoch": 0.64, "learning_rate": 0.00017213402997865915, "loss": 3.1945, "step": 37170 }, { "epoch": 0.64, "learning_rate": 0.0001720596162095729, "loss": 3.2624, "step": 37175 }, { "epoch": 0.64, "learning_rate": 0.0001719852120609476, "loss": 3.0769, "step": 37180 }, { "epoch": 0.64, "learning_rate": 0.00017191081753837836, "loss": 3.1758, "step": 37185 }, { "epoch": 0.64, "learning_rate": 0.00017183643264745925, "loss": 3.2337, "step": 37190 }, { "epoch": 0.64, "learning_rate": 0.00017176205739378344, "loss": 3.1392, "step": 37195 }, { "epoch": 0.64, "learning_rate": 0.0001716876917829439, "loss": 3.2752, "step": 37200 }, { "epoch": 0.64, "learning_rate": 0.00017161333582053217, "loss": 3.3017, "step": 37205 }, { "epoch": 0.64, "learning_rate": 0.0001715389895121397, "loss": 3.1933, "step": 37210 }, { "epoch": 0.64, "learning_rate": 0.00017146465286335688, "loss": 3.2897, "step": 37215 }, { "epoch": 0.64, "learning_rate": 0.00017139032587977333, "loss": 3.2704, "step": 37220 }, { "epoch": 0.64, "learning_rate": 0.00017131600856697834, "loss": 3.221, "step": 37225 }, { "epoch": 0.64, "learning_rate": 0.00017124170093055983, "loss": 3.2712, "step": 37230 }, { "epoch": 0.64, "learning_rate": 0.00017116740297610565, "loss": 3.1784, "step": 37235 }, { "epoch": 0.64, "learning_rate": 0.0001710931147092025, "loss": 3.1287, "step": 37240 }, { "epoch": 0.64, "learning_rate": 0.00017101883613543636, "loss": 3.3075, "step": 37245 }, { "epoch": 0.64, "learning_rate": 0.00017094456726039284, "loss": 3.2108, "step": 37250 }, { "epoch": 0.64, "learning_rate": 0.00017087030808965654, "loss": 3.167, "step": 37255 }, { "epoch": 0.64, "learning_rate": 0.00017079605862881114, "loss": 3.0637, "step": 37260 }, { "epoch": 0.64, "learning_rate": 0.00017072181888344004, "loss": 3.2903, "step": 37265 }, { "epoch": 0.64, "learning_rate": 0.0001706475888591256, "loss": 3.3106, "step": 37270 }, { "epoch": 0.65, "learning_rate": 0.0001705733685614495, "loss": 3.2914, "step": 37275 }, { "epoch": 0.65, "learning_rate": 0.00017049915799599298, "loss": 3.2675, "step": 37280 }, { "epoch": 0.65, "learning_rate": 0.00017042495716833588, "loss": 3.2125, "step": 37285 }, { "epoch": 0.65, "learning_rate": 0.00017035076608405803, "loss": 3.197, "step": 37290 }, { "epoch": 0.65, "learning_rate": 0.00017027658474873817, "loss": 3.1445, "step": 37295 }, { "epoch": 0.65, "learning_rate": 0.00017020241316795416, "loss": 3.2512, "step": 37300 }, { "epoch": 0.65, "learning_rate": 0.0001701282513472837, "loss": 3.2431, "step": 37305 }, { "epoch": 0.65, "learning_rate": 0.00017005409929230297, "loss": 3.1504, "step": 37310 }, { "epoch": 0.65, "learning_rate": 0.00016997995700858808, "loss": 3.185, "step": 37315 }, { "epoch": 0.65, "learning_rate": 0.00016990582450171412, "loss": 3.0925, "step": 37320 }, { "epoch": 0.65, "learning_rate": 0.00016983170177725532, "loss": 3.0949, "step": 37325 }, { "epoch": 0.65, "learning_rate": 0.00016975758884078566, "loss": 3.2816, "step": 37330 }, { "epoch": 0.65, "learning_rate": 0.00016968348569787773, "loss": 3.3073, "step": 37335 }, { "epoch": 0.65, "learning_rate": 0.0001696093923541037, "loss": 3.3084, "step": 37340 }, { "epoch": 0.65, "learning_rate": 0.0001695353088150352, "loss": 3.1617, "step": 37345 }, { "epoch": 0.65, "learning_rate": 0.00016946123508624288, "loss": 3.2592, "step": 37350 }, { "epoch": 0.65, "learning_rate": 0.0001693871711732967, "loss": 3.2328, "step": 37355 }, { "epoch": 0.65, "learning_rate": 0.00016931311708176587, "loss": 3.217, "step": 37360 }, { "epoch": 0.65, "learning_rate": 0.0001692390728172187, "loss": 3.2719, "step": 37365 }, { "epoch": 0.65, "learning_rate": 0.00016916503838522331, "loss": 3.069, "step": 37370 }, { "epoch": 0.65, "learning_rate": 0.0001690910137913465, "loss": 3.3553, "step": 37375 }, { "epoch": 0.65, "learning_rate": 0.00016901699904115454, "loss": 3.1695, "step": 37380 }, { "epoch": 0.65, "learning_rate": 0.00016894299414021294, "loss": 3.2769, "step": 37385 }, { "epoch": 0.65, "learning_rate": 0.00016886899909408643, "loss": 3.126, "step": 37390 }, { "epoch": 0.65, "learning_rate": 0.00016879501390833928, "loss": 3.265, "step": 37395 }, { "epoch": 0.65, "learning_rate": 0.0001687210385885347, "loss": 3.2026, "step": 37400 }, { "epoch": 0.65, "learning_rate": 0.00016864707314023517, "loss": 3.2575, "step": 37405 }, { "epoch": 0.65, "learning_rate": 0.0001685731175690026, "loss": 3.2137, "step": 37410 }, { "epoch": 0.65, "learning_rate": 0.00016849917188039803, "loss": 3.2626, "step": 37415 }, { "epoch": 0.65, "learning_rate": 0.0001684252360799817, "loss": 3.2544, "step": 37420 }, { "epoch": 0.65, "learning_rate": 0.00016835131017331337, "loss": 3.3139, "step": 37425 }, { "epoch": 0.65, "learning_rate": 0.00016827739416595187, "loss": 3.2438, "step": 37430 }, { "epoch": 0.65, "learning_rate": 0.00016820348806345525, "loss": 3.1817, "step": 37435 }, { "epoch": 0.65, "learning_rate": 0.0001681295918713809, "loss": 3.2638, "step": 37440 }, { "epoch": 0.65, "learning_rate": 0.00016805570559528526, "loss": 3.2733, "step": 37445 }, { "epoch": 0.65, "learning_rate": 0.0001679818292407245, "loss": 3.2396, "step": 37450 }, { "epoch": 0.65, "learning_rate": 0.0001679079628132536, "loss": 3.2589, "step": 37455 }, { "epoch": 0.65, "learning_rate": 0.00016783410631842691, "loss": 3.2473, "step": 37460 }, { "epoch": 0.65, "learning_rate": 0.0001677602597617981, "loss": 3.19, "step": 37465 }, { "epoch": 0.65, "learning_rate": 0.0001676864231489199, "loss": 3.2841, "step": 37470 }, { "epoch": 0.65, "learning_rate": 0.00016761259648534477, "loss": 3.3538, "step": 37475 }, { "epoch": 0.65, "learning_rate": 0.00016753877977662384, "loss": 3.3771, "step": 37480 }, { "epoch": 0.65, "learning_rate": 0.0001674649730283079, "loss": 3.1082, "step": 37485 }, { "epoch": 0.65, "learning_rate": 0.00016739117624594665, "loss": 3.1594, "step": 37490 }, { "epoch": 0.65, "learning_rate": 0.0001673173894350894, "loss": 3.2678, "step": 37495 }, { "epoch": 0.65, "learning_rate": 0.00016724361260128434, "loss": 3.2161, "step": 37500 }, { "epoch": 0.65, "eval_loss": 3.246263027191162, "eval_runtime": 150.2834, "eval_samples_per_second": 12.25, "eval_steps_per_second": 0.772, "step": 37500 }, { "epoch": 0.65, "learning_rate": 0.0001671698457500794, "loss": 3.1662, "step": 37505 }, { "epoch": 0.65, "learning_rate": 0.00016709608888702124, "loss": 3.2584, "step": 37510 }, { "epoch": 0.65, "learning_rate": 0.00016702234201765613, "loss": 3.2162, "step": 37515 }, { "epoch": 0.65, "learning_rate": 0.00016694860514752938, "loss": 3.1176, "step": 37520 }, { "epoch": 0.65, "learning_rate": 0.00016687487828218557, "loss": 3.1931, "step": 37525 }, { "epoch": 0.65, "learning_rate": 0.00016680116142716873, "loss": 3.2113, "step": 37530 }, { "epoch": 0.65, "learning_rate": 0.00016672745458802193, "loss": 3.2477, "step": 37535 }, { "epoch": 0.65, "learning_rate": 0.00016665375777028755, "loss": 3.1498, "step": 37540 }, { "epoch": 0.65, "learning_rate": 0.00016658007097950715, "loss": 3.3016, "step": 37545 }, { "epoch": 0.65, "learning_rate": 0.00016650639422122158, "loss": 3.194, "step": 37550 }, { "epoch": 0.65, "learning_rate": 0.0001664327275009711, "loss": 3.137, "step": 37555 }, { "epoch": 0.65, "learning_rate": 0.00016635907082429514, "loss": 3.2968, "step": 37560 }, { "epoch": 0.65, "learning_rate": 0.0001662854241967319, "loss": 3.2957, "step": 37565 }, { "epoch": 0.65, "learning_rate": 0.00016621178762381964, "loss": 3.2532, "step": 37570 }, { "epoch": 0.65, "learning_rate": 0.00016613816111109525, "loss": 3.1561, "step": 37575 }, { "epoch": 0.65, "learning_rate": 0.000166064544664095, "loss": 3.1695, "step": 37580 }, { "epoch": 0.65, "learning_rate": 0.00016599093828835484, "loss": 3.1958, "step": 37585 }, { "epoch": 0.65, "learning_rate": 0.00016591734198940906, "loss": 3.18, "step": 37590 }, { "epoch": 0.65, "learning_rate": 0.00016584375577279219, "loss": 3.1963, "step": 37595 }, { "epoch": 0.65, "learning_rate": 0.00016577017964403728, "loss": 3.1112, "step": 37600 }, { "epoch": 0.65, "learning_rate": 0.00016569661360867687, "loss": 3.1904, "step": 37605 }, { "epoch": 0.65, "learning_rate": 0.00016562305767224304, "loss": 3.2197, "step": 37610 }, { "epoch": 0.65, "learning_rate": 0.00016554951184026646, "loss": 3.2805, "step": 37615 }, { "epoch": 0.65, "learning_rate": 0.00016547597611827746, "loss": 3.237, "step": 37620 }, { "epoch": 0.65, "learning_rate": 0.00016540245051180572, "loss": 3.1968, "step": 37625 }, { "epoch": 0.65, "learning_rate": 0.00016532893502637993, "loss": 3.2983, "step": 37630 }, { "epoch": 0.65, "learning_rate": 0.00016525542966752804, "loss": 3.2977, "step": 37635 }, { "epoch": 0.65, "learning_rate": 0.00016518193444077728, "loss": 3.2517, "step": 37640 }, { "epoch": 0.65, "learning_rate": 0.000165108449351654, "loss": 3.1764, "step": 37645 }, { "epoch": 0.65, "learning_rate": 0.0001650349744056841, "loss": 3.1407, "step": 37650 }, { "epoch": 0.65, "learning_rate": 0.0001649615096083925, "loss": 3.3731, "step": 37655 }, { "epoch": 0.65, "learning_rate": 0.00016488805496530328, "loss": 3.2043, "step": 37660 }, { "epoch": 0.65, "learning_rate": 0.0001648146104819399, "loss": 3.214, "step": 37665 }, { "epoch": 0.65, "learning_rate": 0.0001647411761638249, "loss": 3.1209, "step": 37670 }, { "epoch": 0.65, "learning_rate": 0.00016466775201648032, "loss": 3.2196, "step": 37675 }, { "epoch": 0.65, "learning_rate": 0.00016459433804542727, "loss": 3.3123, "step": 37680 }, { "epoch": 0.65, "learning_rate": 0.00016452093425618606, "loss": 3.245, "step": 37685 }, { "epoch": 0.65, "learning_rate": 0.00016444754065427625, "loss": 3.1629, "step": 37690 }, { "epoch": 0.65, "learning_rate": 0.00016437415724521672, "loss": 3.2326, "step": 37695 }, { "epoch": 0.65, "learning_rate": 0.00016430078403452535, "loss": 3.1992, "step": 37700 }, { "epoch": 0.65, "learning_rate": 0.00016422742102771969, "loss": 3.2132, "step": 37705 }, { "epoch": 0.65, "learning_rate": 0.00016415406823031616, "loss": 3.179, "step": 37710 }, { "epoch": 0.65, "learning_rate": 0.00016408072564783047, "loss": 3.2493, "step": 37715 }, { "epoch": 0.65, "learning_rate": 0.00016400739328577767, "loss": 3.203, "step": 37720 }, { "epoch": 0.65, "learning_rate": 0.00016393407114967184, "loss": 3.1359, "step": 37725 }, { "epoch": 0.65, "learning_rate": 0.00016386075924502657, "loss": 3.2161, "step": 37730 }, { "epoch": 0.65, "learning_rate": 0.0001637874575773546, "loss": 3.1806, "step": 37735 }, { "epoch": 0.65, "learning_rate": 0.00016371416615216765, "loss": 3.2406, "step": 37740 }, { "epoch": 0.65, "learning_rate": 0.00016364088497497706, "loss": 3.1397, "step": 37745 }, { "epoch": 0.65, "learning_rate": 0.00016356761405129287, "loss": 3.1923, "step": 37750 }, { "epoch": 0.65, "learning_rate": 0.0001634943533866251, "loss": 3.3604, "step": 37755 }, { "epoch": 0.65, "learning_rate": 0.00016342110298648232, "loss": 3.2568, "step": 37760 }, { "epoch": 0.65, "learning_rate": 0.00016334786285637264, "loss": 3.2019, "step": 37765 }, { "epoch": 0.65, "learning_rate": 0.00016327463300180335, "loss": 3.1786, "step": 37770 }, { "epoch": 0.65, "learning_rate": 0.00016320141342828095, "loss": 3.0524, "step": 37775 }, { "epoch": 0.65, "learning_rate": 0.00016312820414131107, "loss": 3.3086, "step": 37780 }, { "epoch": 0.65, "learning_rate": 0.00016305500514639888, "loss": 3.1939, "step": 37785 }, { "epoch": 0.65, "learning_rate": 0.00016298181644904854, "loss": 3.307, "step": 37790 }, { "epoch": 0.65, "learning_rate": 0.00016290863805476331, "loss": 3.2686, "step": 37795 }, { "epoch": 0.65, "learning_rate": 0.00016283546996904595, "loss": 3.1524, "step": 37800 }, { "epoch": 0.65, "learning_rate": 0.0001627623121973982, "loss": 3.1401, "step": 37805 }, { "epoch": 0.65, "learning_rate": 0.0001626891647453213, "loss": 3.1872, "step": 37810 }, { "epoch": 0.65, "learning_rate": 0.00016261602761831554, "loss": 3.4207, "step": 37815 }, { "epoch": 0.65, "learning_rate": 0.00016254290082188044, "loss": 3.2811, "step": 37820 }, { "epoch": 0.65, "learning_rate": 0.00016246978436151472, "loss": 3.2386, "step": 37825 }, { "epoch": 0.65, "learning_rate": 0.00016239667824271626, "loss": 3.2014, "step": 37830 }, { "epoch": 0.65, "learning_rate": 0.0001623235824709825, "loss": 3.1844, "step": 37835 }, { "epoch": 0.65, "learning_rate": 0.00016225049705180988, "loss": 3.1639, "step": 37840 }, { "epoch": 0.65, "learning_rate": 0.00016217742199069375, "loss": 3.2266, "step": 37845 }, { "epoch": 0.66, "learning_rate": 0.00016210435729312918, "loss": 3.1546, "step": 37850 }, { "epoch": 0.66, "learning_rate": 0.0001620313029646103, "loss": 3.2589, "step": 37855 }, { "epoch": 0.66, "learning_rate": 0.00016195825901063024, "loss": 3.1717, "step": 37860 }, { "epoch": 0.66, "learning_rate": 0.00016188522543668187, "loss": 3.1791, "step": 37865 }, { "epoch": 0.66, "learning_rate": 0.00016181220224825647, "loss": 3.2002, "step": 37870 }, { "epoch": 0.66, "learning_rate": 0.00016173918945084538, "loss": 3.0784, "step": 37875 }, { "epoch": 0.66, "learning_rate": 0.00016166618704993867, "loss": 3.2606, "step": 37880 }, { "epoch": 0.66, "learning_rate": 0.00016159319505102565, "loss": 3.2025, "step": 37885 }, { "epoch": 0.66, "learning_rate": 0.00016152021345959528, "loss": 3.119, "step": 37890 }, { "epoch": 0.66, "learning_rate": 0.00016144724228113488, "loss": 3.3311, "step": 37895 }, { "epoch": 0.66, "learning_rate": 0.00016137428152113197, "loss": 3.2303, "step": 37900 }, { "epoch": 0.66, "learning_rate": 0.0001613013311850726, "loss": 3.306, "step": 37905 }, { "epoch": 0.66, "learning_rate": 0.00016122839127844218, "loss": 3.2077, "step": 37910 }, { "epoch": 0.66, "learning_rate": 0.0001611554618067258, "loss": 3.1467, "step": 37915 }, { "epoch": 0.66, "learning_rate": 0.000161082542775407, "loss": 3.2522, "step": 37920 }, { "epoch": 0.66, "learning_rate": 0.0001610096341899689, "loss": 3.2779, "step": 37925 }, { "epoch": 0.66, "learning_rate": 0.00016093673605589416, "loss": 3.1941, "step": 37930 }, { "epoch": 0.66, "learning_rate": 0.00016086384837866414, "loss": 3.1619, "step": 37935 }, { "epoch": 0.66, "learning_rate": 0.00016079097116375958, "loss": 3.2503, "step": 37940 }, { "epoch": 0.66, "learning_rate": 0.00016071810441666077, "loss": 3.2323, "step": 37945 }, { "epoch": 0.66, "learning_rate": 0.00016064524814284644, "loss": 3.1909, "step": 37950 }, { "epoch": 0.66, "learning_rate": 0.00016057240234779543, "loss": 3.1922, "step": 37955 }, { "epoch": 0.66, "learning_rate": 0.0001604995670369852, "loss": 3.2015, "step": 37960 }, { "epoch": 0.66, "learning_rate": 0.00016042674221589242, "loss": 3.1244, "step": 37965 }, { "epoch": 0.66, "learning_rate": 0.00016035392788999364, "loss": 3.2906, "step": 37970 }, { "epoch": 0.66, "learning_rate": 0.00016028112406476353, "loss": 3.2155, "step": 37975 }, { "epoch": 0.66, "learning_rate": 0.00016020833074567695, "loss": 3.2215, "step": 37980 }, { "epoch": 0.66, "learning_rate": 0.0001601355479382075, "loss": 3.2019, "step": 37985 }, { "epoch": 0.66, "learning_rate": 0.00016006277564782796, "loss": 3.1993, "step": 37990 }, { "epoch": 0.66, "learning_rate": 0.00015999001388001074, "loss": 3.3097, "step": 37995 }, { "epoch": 0.66, "learning_rate": 0.00015991726264022677, "loss": 3.1445, "step": 38000 }, { "epoch": 0.66, "eval_loss": 3.2400145530700684, "eval_runtime": 149.8729, "eval_samples_per_second": 12.284, "eval_steps_per_second": 0.774, "step": 38000 }, { "epoch": 0.66, "learning_rate": 0.0001598445219339467, "loss": 3.2372, "step": 38005 }, { "epoch": 0.66, "learning_rate": 0.00015977179176664035, "loss": 3.2157, "step": 38010 }, { "epoch": 0.66, "learning_rate": 0.00015969907214377662, "loss": 3.2457, "step": 38015 }, { "epoch": 0.66, "learning_rate": 0.00015962636307082363, "loss": 3.2293, "step": 38020 }, { "epoch": 0.66, "learning_rate": 0.00015955366455324874, "loss": 3.1265, "step": 38025 }, { "epoch": 0.66, "learning_rate": 0.0001594809765965184, "loss": 3.2626, "step": 38030 }, { "epoch": 0.66, "learning_rate": 0.00015940829920609858, "loss": 3.2296, "step": 38035 }, { "epoch": 0.66, "learning_rate": 0.00015933563238745412, "loss": 3.2178, "step": 38040 }, { "epoch": 0.66, "learning_rate": 0.00015926297614604924, "loss": 3.2407, "step": 38045 }, { "epoch": 0.66, "learning_rate": 0.00015919033048734726, "loss": 3.2555, "step": 38050 }, { "epoch": 0.66, "learning_rate": 0.00015911769541681066, "loss": 3.2065, "step": 38055 }, { "epoch": 0.66, "learning_rate": 0.00015904507093990152, "loss": 3.2034, "step": 38060 }, { "epoch": 0.66, "learning_rate": 0.00015897245706208063, "loss": 3.3155, "step": 38065 }, { "epoch": 0.66, "learning_rate": 0.00015889985378880818, "loss": 3.2745, "step": 38070 }, { "epoch": 0.66, "learning_rate": 0.00015882726112554364, "loss": 3.2874, "step": 38075 }, { "epoch": 0.66, "learning_rate": 0.00015875467907774558, "loss": 3.1474, "step": 38080 }, { "epoch": 0.66, "learning_rate": 0.00015868210765087167, "loss": 3.2925, "step": 38085 }, { "epoch": 0.66, "learning_rate": 0.00015860954685037912, "loss": 3.1178, "step": 38090 }, { "epoch": 0.66, "learning_rate": 0.000158536996681724, "loss": 3.2575, "step": 38095 }, { "epoch": 0.66, "learning_rate": 0.0001584644571503618, "loss": 3.2266, "step": 38100 }, { "epoch": 0.66, "learning_rate": 0.000158391928261747, "loss": 3.0909, "step": 38105 }, { "epoch": 0.66, "learning_rate": 0.00015831941002133343, "loss": 3.1911, "step": 38110 }, { "epoch": 0.66, "learning_rate": 0.0001582469024345742, "loss": 3.1309, "step": 38115 }, { "epoch": 0.66, "learning_rate": 0.00015817440550692152, "loss": 3.1826, "step": 38120 }, { "epoch": 0.66, "learning_rate": 0.0001581019192438265, "loss": 3.212, "step": 38125 }, { "epoch": 0.66, "learning_rate": 0.00015802944365074005, "loss": 3.3265, "step": 38130 }, { "epoch": 0.66, "learning_rate": 0.00015795697873311184, "loss": 3.1416, "step": 38135 }, { "epoch": 0.66, "learning_rate": 0.00015788452449639077, "loss": 3.0383, "step": 38140 }, { "epoch": 0.66, "learning_rate": 0.00015781208094602537, "loss": 3.159, "step": 38145 }, { "epoch": 0.66, "learning_rate": 0.0001577396480874625, "loss": 3.2361, "step": 38150 }, { "epoch": 0.66, "learning_rate": 0.0001576672259261492, "loss": 3.2279, "step": 38155 }, { "epoch": 0.66, "learning_rate": 0.00015759481446753098, "loss": 3.248, "step": 38160 }, { "epoch": 0.66, "learning_rate": 0.00015752241371705286, "loss": 3.1824, "step": 38165 }, { "epoch": 0.66, "learning_rate": 0.00015745002368015913, "loss": 3.1641, "step": 38170 }, { "epoch": 0.66, "learning_rate": 0.00015737764436229306, "loss": 3.1412, "step": 38175 }, { "epoch": 0.66, "learning_rate": 0.0001573052757688972, "loss": 3.2713, "step": 38180 }, { "epoch": 0.66, "learning_rate": 0.00015723291790541334, "loss": 3.226, "step": 38185 }, { "epoch": 0.66, "learning_rate": 0.00015716057077728223, "loss": 3.3351, "step": 38190 }, { "epoch": 0.66, "learning_rate": 0.0001570882343899443, "loss": 3.2517, "step": 38195 }, { "epoch": 0.66, "learning_rate": 0.00015701590874883883, "loss": 3.1853, "step": 38200 }, { "epoch": 0.66, "learning_rate": 0.00015694359385940403, "loss": 3.1761, "step": 38205 }, { "epoch": 0.66, "learning_rate": 0.0001568712897270779, "loss": 3.1338, "step": 38210 }, { "epoch": 0.66, "learning_rate": 0.0001567989963572973, "loss": 3.2637, "step": 38215 }, { "epoch": 0.66, "learning_rate": 0.00015672671375549814, "loss": 3.2493, "step": 38220 }, { "epoch": 0.66, "learning_rate": 0.00015665444192711604, "loss": 3.2718, "step": 38225 }, { "epoch": 0.66, "learning_rate": 0.00015658218087758507, "loss": 3.2829, "step": 38230 }, { "epoch": 0.66, "learning_rate": 0.00015650993061233924, "loss": 3.3045, "step": 38235 }, { "epoch": 0.66, "learning_rate": 0.00015643769113681124, "loss": 3.2175, "step": 38240 }, { "epoch": 0.66, "learning_rate": 0.000156365462456433, "loss": 3.2631, "step": 38245 }, { "epoch": 0.66, "learning_rate": 0.00015629324457663605, "loss": 3.3236, "step": 38250 }, { "epoch": 0.66, "learning_rate": 0.00015622103750285047, "loss": 3.2581, "step": 38255 }, { "epoch": 0.66, "learning_rate": 0.00015614884124050607, "loss": 3.287, "step": 38260 }, { "epoch": 0.66, "learning_rate": 0.00015607665579503164, "loss": 3.3278, "step": 38265 }, { "epoch": 0.66, "learning_rate": 0.00015600448117185492, "loss": 3.1982, "step": 38270 }, { "epoch": 0.66, "learning_rate": 0.00015593231737640352, "loss": 3.2156, "step": 38275 }, { "epoch": 0.66, "learning_rate": 0.00015586016441410341, "loss": 3.2382, "step": 38280 }, { "epoch": 0.66, "learning_rate": 0.00015578802229038007, "loss": 3.2535, "step": 38285 }, { "epoch": 0.66, "learning_rate": 0.00015571589101065855, "loss": 3.3024, "step": 38290 }, { "epoch": 0.66, "learning_rate": 0.00015564377058036258, "loss": 3.1432, "step": 38295 }, { "epoch": 0.66, "learning_rate": 0.00015557166100491518, "loss": 3.1777, "step": 38300 }, { "epoch": 0.66, "learning_rate": 0.00015549956228973875, "loss": 3.2004, "step": 38305 }, { "epoch": 0.66, "learning_rate": 0.00015542747444025455, "loss": 3.0625, "step": 38310 }, { "epoch": 0.66, "learning_rate": 0.00015535539746188348, "loss": 3.1938, "step": 38315 }, { "epoch": 0.66, "learning_rate": 0.00015528333136004523, "loss": 3.221, "step": 38320 }, { "epoch": 0.66, "learning_rate": 0.00015521127614015877, "loss": 3.2017, "step": 38325 }, { "epoch": 0.66, "learning_rate": 0.00015513923180764233, "loss": 3.2288, "step": 38330 }, { "epoch": 0.66, "learning_rate": 0.00015506719836791313, "loss": 3.2243, "step": 38335 }, { "epoch": 0.66, "learning_rate": 0.00015499517582638798, "loss": 3.232, "step": 38340 }, { "epoch": 0.66, "learning_rate": 0.00015492316418848248, "loss": 3.1439, "step": 38345 }, { "epoch": 0.66, "learning_rate": 0.0001548511634596115, "loss": 3.1259, "step": 38350 }, { "epoch": 0.66, "learning_rate": 0.00015477917364518915, "loss": 3.1856, "step": 38355 }, { "epoch": 0.66, "learning_rate": 0.0001547071947506287, "loss": 3.2, "step": 38360 }, { "epoch": 0.66, "learning_rate": 0.00015463522678134248, "loss": 3.1343, "step": 38365 }, { "epoch": 0.66, "learning_rate": 0.00015456326974274233, "loss": 3.2869, "step": 38370 }, { "epoch": 0.66, "learning_rate": 0.00015449132364023897, "loss": 3.0858, "step": 38375 }, { "epoch": 0.66, "learning_rate": 0.00015441938847924233, "loss": 3.1788, "step": 38380 }, { "epoch": 0.66, "learning_rate": 0.0001543474642651616, "loss": 3.2329, "step": 38385 }, { "epoch": 0.66, "learning_rate": 0.00015427555100340498, "loss": 3.196, "step": 38390 }, { "epoch": 0.66, "learning_rate": 0.0001542036486993802, "loss": 3.1304, "step": 38395 }, { "epoch": 0.66, "learning_rate": 0.00015413175735849388, "loss": 3.2239, "step": 38400 }, { "epoch": 0.66, "learning_rate": 0.00015405987698615184, "loss": 3.0827, "step": 38405 }, { "epoch": 0.66, "learning_rate": 0.0001539880075877591, "loss": 3.2181, "step": 38410 }, { "epoch": 0.66, "learning_rate": 0.00015391614916871978, "loss": 3.2241, "step": 38415 }, { "epoch": 0.66, "learning_rate": 0.00015384430173443747, "loss": 3.2218, "step": 38420 }, { "epoch": 0.66, "learning_rate": 0.00015377246529031465, "loss": 3.098, "step": 38425 }, { "epoch": 0.67, "learning_rate": 0.00015370063984175304, "loss": 3.2373, "step": 38430 }, { "epoch": 0.67, "learning_rate": 0.00015362882539415352, "loss": 3.2509, "step": 38435 }, { "epoch": 0.67, "learning_rate": 0.0001535570219529162, "loss": 3.1837, "step": 38440 }, { "epoch": 0.67, "learning_rate": 0.0001534852295234402, "loss": 3.2769, "step": 38445 }, { "epoch": 0.67, "learning_rate": 0.0001534134481111242, "loss": 3.1648, "step": 38450 }, { "epoch": 0.67, "learning_rate": 0.00015334167772136566, "loss": 3.1912, "step": 38455 }, { "epoch": 0.67, "learning_rate": 0.00015326991835956132, "loss": 3.1258, "step": 38460 }, { "epoch": 0.67, "learning_rate": 0.00015319817003110715, "loss": 3.2684, "step": 38465 }, { "epoch": 0.67, "learning_rate": 0.00015312643274139812, "loss": 3.2462, "step": 38470 }, { "epoch": 0.67, "learning_rate": 0.0001530547064958288, "loss": 3.1875, "step": 38475 }, { "epoch": 0.67, "learning_rate": 0.00015298299129979238, "loss": 3.2456, "step": 38480 }, { "epoch": 0.67, "learning_rate": 0.00015291128715868164, "loss": 3.1945, "step": 38485 }, { "epoch": 0.67, "learning_rate": 0.00015283959407788825, "loss": 3.2784, "step": 38490 }, { "epoch": 0.67, "learning_rate": 0.00015276791206280307, "loss": 3.0896, "step": 38495 }, { "epoch": 0.67, "learning_rate": 0.0001526962411188165, "loss": 3.1892, "step": 38500 }, { "epoch": 0.67, "eval_loss": 3.235760450363159, "eval_runtime": 149.9771, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 38500 }, { "epoch": 0.67, "learning_rate": 0.0001526245812513178, "loss": 3.1669, "step": 38505 }, { "epoch": 0.67, "learning_rate": 0.00015255293246569504, "loss": 3.2019, "step": 38510 }, { "epoch": 0.67, "learning_rate": 0.00015248129476733626, "loss": 3.2363, "step": 38515 }, { "epoch": 0.67, "learning_rate": 0.00015240966816162807, "loss": 3.2336, "step": 38520 }, { "epoch": 0.67, "learning_rate": 0.0001523380526539563, "loss": 3.2384, "step": 38525 }, { "epoch": 0.67, "learning_rate": 0.0001522664482497065, "loss": 3.1823, "step": 38530 }, { "epoch": 0.67, "learning_rate": 0.00015219485495426242, "loss": 3.2383, "step": 38535 }, { "epoch": 0.67, "learning_rate": 0.00015212327277300784, "loss": 3.2228, "step": 38540 }, { "epoch": 0.67, "learning_rate": 0.00015205170171132536, "loss": 3.0597, "step": 38545 }, { "epoch": 0.67, "learning_rate": 0.0001519801417745965, "loss": 3.2417, "step": 38550 }, { "epoch": 0.67, "learning_rate": 0.00015190859296820264, "loss": 3.2426, "step": 38555 }, { "epoch": 0.67, "learning_rate": 0.0001518370552975234, "loss": 3.1934, "step": 38560 }, { "epoch": 0.67, "learning_rate": 0.00015176552876793847, "loss": 3.2366, "step": 38565 }, { "epoch": 0.67, "learning_rate": 0.000151694013384826, "loss": 3.1862, "step": 38570 }, { "epoch": 0.67, "learning_rate": 0.00015162250915356357, "loss": 3.2587, "step": 38575 }, { "epoch": 0.67, "learning_rate": 0.00015155101607952827, "loss": 3.1882, "step": 38580 }, { "epoch": 0.67, "learning_rate": 0.00015147953416809565, "loss": 3.2082, "step": 38585 }, { "epoch": 0.67, "learning_rate": 0.00015140806342464077, "loss": 3.204, "step": 38590 }, { "epoch": 0.67, "learning_rate": 0.00015133660385453814, "loss": 3.2401, "step": 38595 }, { "epoch": 0.67, "learning_rate": 0.00015126515546316102, "loss": 3.1105, "step": 38600 }, { "epoch": 0.67, "learning_rate": 0.00015119371825588192, "loss": 3.2315, "step": 38605 }, { "epoch": 0.67, "learning_rate": 0.00015112229223807262, "loss": 3.2599, "step": 38610 }, { "epoch": 0.67, "learning_rate": 0.00015105087741510386, "loss": 3.1626, "step": 38615 }, { "epoch": 0.67, "learning_rate": 0.00015097947379234583, "loss": 3.0691, "step": 38620 }, { "epoch": 0.67, "learning_rate": 0.00015090808137516767, "loss": 3.1392, "step": 38625 }, { "epoch": 0.67, "learning_rate": 0.0001508367001689377, "loss": 3.2198, "step": 38630 }, { "epoch": 0.67, "learning_rate": 0.00015076533017902347, "loss": 3.1879, "step": 38635 }, { "epoch": 0.67, "learning_rate": 0.00015069397141079154, "loss": 3.228, "step": 38640 }, { "epoch": 0.67, "learning_rate": 0.00015062262386960772, "loss": 3.2194, "step": 38645 }, { "epoch": 0.67, "learning_rate": 0.0001505512875608371, "loss": 3.2814, "step": 38650 }, { "epoch": 0.67, "learning_rate": 0.00015047996248984376, "loss": 3.1777, "step": 38655 }, { "epoch": 0.67, "learning_rate": 0.00015040864866199097, "loss": 3.221, "step": 38660 }, { "epoch": 0.67, "learning_rate": 0.00015033734608264115, "loss": 3.2044, "step": 38665 }, { "epoch": 0.67, "learning_rate": 0.00015026605475715581, "loss": 3.2661, "step": 38670 }, { "epoch": 0.67, "learning_rate": 0.00015019477469089586, "loss": 3.2046, "step": 38675 }, { "epoch": 0.67, "learning_rate": 0.0001501235058892211, "loss": 3.1087, "step": 38680 }, { "epoch": 0.67, "learning_rate": 0.00015005224835749063, "loss": 3.2581, "step": 38685 }, { "epoch": 0.67, "learning_rate": 0.00014998100210106258, "loss": 3.1716, "step": 38690 }, { "epoch": 0.67, "learning_rate": 0.00014990976712529424, "loss": 3.1427, "step": 38695 }, { "epoch": 0.67, "learning_rate": 0.00014983854343554232, "loss": 3.1025, "step": 38700 }, { "epoch": 0.67, "learning_rate": 0.0001497673310371624, "loss": 3.1843, "step": 38705 }, { "epoch": 0.67, "learning_rate": 0.0001496961299355092, "loss": 3.2923, "step": 38710 }, { "epoch": 0.67, "learning_rate": 0.0001496249401359368, "loss": 3.2623, "step": 38715 }, { "epoch": 0.67, "learning_rate": 0.00014955376164379818, "loss": 3.1085, "step": 38720 }, { "epoch": 0.67, "learning_rate": 0.0001494825944644456, "loss": 3.2231, "step": 38725 }, { "epoch": 0.67, "learning_rate": 0.0001494114386032306, "loss": 3.2339, "step": 38730 }, { "epoch": 0.67, "learning_rate": 0.00014934029406550367, "loss": 3.1435, "step": 38735 }, { "epoch": 0.67, "learning_rate": 0.0001492691608566145, "loss": 3.2924, "step": 38740 }, { "epoch": 0.67, "learning_rate": 0.00014919803898191198, "loss": 3.3259, "step": 38745 }, { "epoch": 0.67, "learning_rate": 0.00014912692844674393, "loss": 3.2163, "step": 38750 }, { "epoch": 0.67, "learning_rate": 0.0001490558292564578, "loss": 3.1284, "step": 38755 }, { "epoch": 0.67, "learning_rate": 0.0001489847414163997, "loss": 3.2856, "step": 38760 }, { "epoch": 0.67, "learning_rate": 0.0001489136649319151, "loss": 3.2836, "step": 38765 }, { "epoch": 0.67, "learning_rate": 0.00014884259980834866, "loss": 3.1405, "step": 38770 }, { "epoch": 0.67, "learning_rate": 0.0001487715460510439, "loss": 3.1253, "step": 38775 }, { "epoch": 0.67, "learning_rate": 0.0001487005036653439, "loss": 3.2167, "step": 38780 }, { "epoch": 0.67, "learning_rate": 0.0001486294726565908, "loss": 3.1719, "step": 38785 }, { "epoch": 0.67, "learning_rate": 0.00014855845303012535, "loss": 3.1742, "step": 38790 }, { "epoch": 0.67, "learning_rate": 0.0001484874447912882, "loss": 3.1847, "step": 38795 }, { "epoch": 0.67, "learning_rate": 0.00014841644794541872, "loss": 3.2533, "step": 38800 }, { "epoch": 0.67, "learning_rate": 0.00014834546249785542, "loss": 3.1358, "step": 38805 }, { "epoch": 0.67, "learning_rate": 0.00014827448845393634, "loss": 3.2148, "step": 38810 }, { "epoch": 0.67, "learning_rate": 0.00014820352581899787, "loss": 3.1691, "step": 38815 }, { "epoch": 0.67, "learning_rate": 0.00014813257459837649, "loss": 3.1114, "step": 38820 }, { "epoch": 0.67, "learning_rate": 0.00014806163479740714, "loss": 3.1567, "step": 38825 }, { "epoch": 0.67, "learning_rate": 0.00014799070642142412, "loss": 3.189, "step": 38830 }, { "epoch": 0.67, "learning_rate": 0.00014791978947576115, "loss": 3.1744, "step": 38835 }, { "epoch": 0.67, "learning_rate": 0.00014784888396575038, "loss": 3.2016, "step": 38840 }, { "epoch": 0.67, "learning_rate": 0.00014777798989672387, "loss": 3.1499, "step": 38845 }, { "epoch": 0.67, "learning_rate": 0.00014770710727401241, "loss": 3.2737, "step": 38850 }, { "epoch": 0.67, "learning_rate": 0.00014763623610294588, "loss": 3.2374, "step": 38855 }, { "epoch": 0.67, "learning_rate": 0.00014756537638885376, "loss": 3.2429, "step": 38860 }, { "epoch": 0.67, "learning_rate": 0.00014749452813706405, "loss": 3.3057, "step": 38865 }, { "epoch": 0.67, "learning_rate": 0.00014742369135290409, "loss": 3.2106, "step": 38870 }, { "epoch": 0.67, "learning_rate": 0.00014735286604170074, "loss": 3.1396, "step": 38875 }, { "epoch": 0.67, "learning_rate": 0.00014728205220877953, "loss": 3.3025, "step": 38880 }, { "epoch": 0.67, "learning_rate": 0.00014721124985946521, "loss": 3.1966, "step": 38885 }, { "epoch": 0.67, "learning_rate": 0.00014714045899908215, "loss": 3.229, "step": 38890 }, { "epoch": 0.67, "learning_rate": 0.0001470696796329529, "loss": 3.2342, "step": 38895 }, { "epoch": 0.67, "learning_rate": 0.0001469989117664001, "loss": 3.2281, "step": 38900 }, { "epoch": 0.67, "learning_rate": 0.00014692815540474506, "loss": 3.1531, "step": 38905 }, { "epoch": 0.67, "learning_rate": 0.00014685741055330809, "loss": 3.2777, "step": 38910 }, { "epoch": 0.67, "learning_rate": 0.00014678667721740925, "loss": 3.0968, "step": 38915 }, { "epoch": 0.67, "learning_rate": 0.00014671595540236687, "loss": 3.2377, "step": 38920 }, { "epoch": 0.67, "learning_rate": 0.00014664524511349918, "loss": 3.2756, "step": 38925 }, { "epoch": 0.67, "learning_rate": 0.00014657454635612314, "loss": 3.2936, "step": 38930 }, { "epoch": 0.67, "learning_rate": 0.00014650385913555475, "loss": 3.1701, "step": 38935 }, { "epoch": 0.67, "learning_rate": 0.00014643318345710976, "loss": 3.2626, "step": 38940 }, { "epoch": 0.67, "learning_rate": 0.00014636251932610226, "loss": 3.1047, "step": 38945 }, { "epoch": 0.67, "learning_rate": 0.0001462918667478458, "loss": 3.2449, "step": 38950 }, { "epoch": 0.67, "learning_rate": 0.00014622122572765335, "loss": 3.1803, "step": 38955 }, { "epoch": 0.67, "learning_rate": 0.00014615059627083658, "loss": 3.199, "step": 38960 }, { "epoch": 0.67, "learning_rate": 0.00014607997838270652, "loss": 3.2114, "step": 38965 }, { "epoch": 0.67, "learning_rate": 0.00014600937206857328, "loss": 3.1264, "step": 38970 }, { "epoch": 0.67, "learning_rate": 0.00014593877733374595, "loss": 3.1626, "step": 38975 }, { "epoch": 0.67, "learning_rate": 0.0001458681941835331, "loss": 3.1494, "step": 38980 }, { "epoch": 0.67, "learning_rate": 0.00014579762262324216, "loss": 3.1752, "step": 38985 }, { "epoch": 0.67, "learning_rate": 0.00014572706265817976, "loss": 3.2887, "step": 38990 }, { "epoch": 0.67, "learning_rate": 0.00014565651429365153, "loss": 3.1689, "step": 38995 }, { "epoch": 0.67, "learning_rate": 0.0001455859775349624, "loss": 3.1576, "step": 39000 }, { "epoch": 0.67, "eval_loss": 3.232856512069702, "eval_runtime": 149.8711, "eval_samples_per_second": 12.284, "eval_steps_per_second": 0.774, "step": 39000 }, { "epoch": 0.68, "learning_rate": 0.00014551545238741643, "loss": 3.1477, "step": 39005 }, { "epoch": 0.68, "learning_rate": 0.00014544493885631677, "loss": 3.1183, "step": 39010 }, { "epoch": 0.68, "learning_rate": 0.0001453744369469656, "loss": 3.2491, "step": 39015 }, { "epoch": 0.68, "learning_rate": 0.00014530394666466436, "loss": 3.2484, "step": 39020 }, { "epoch": 0.68, "learning_rate": 0.00014523346801471347, "loss": 3.1706, "step": 39025 }, { "epoch": 0.68, "learning_rate": 0.00014516300100241256, "loss": 3.2258, "step": 39030 }, { "epoch": 0.68, "learning_rate": 0.00014509254563306053, "loss": 3.212, "step": 39035 }, { "epoch": 0.68, "learning_rate": 0.00014502210191195522, "loss": 3.1339, "step": 39040 }, { "epoch": 0.68, "learning_rate": 0.00014495166984439356, "loss": 3.0903, "step": 39045 }, { "epoch": 0.68, "learning_rate": 0.00014488124943567174, "loss": 3.2413, "step": 39050 }, { "epoch": 0.68, "learning_rate": 0.00014481084069108488, "loss": 3.0555, "step": 39055 }, { "epoch": 0.68, "learning_rate": 0.00014474044361592755, "loss": 3.1508, "step": 39060 }, { "epoch": 0.68, "learning_rate": 0.0001446700582154932, "loss": 3.2852, "step": 39065 }, { "epoch": 0.68, "learning_rate": 0.0001445996844950744, "loss": 3.1775, "step": 39070 }, { "epoch": 0.68, "learning_rate": 0.0001445293224599629, "loss": 3.1098, "step": 39075 }, { "epoch": 0.68, "learning_rate": 0.00014445897211544948, "loss": 3.2777, "step": 39080 }, { "epoch": 0.68, "learning_rate": 0.00014438863346682437, "loss": 3.2472, "step": 39085 }, { "epoch": 0.68, "learning_rate": 0.00014431830651937658, "loss": 3.1457, "step": 39090 }, { "epoch": 0.68, "learning_rate": 0.0001442479912783941, "loss": 3.1604, "step": 39095 }, { "epoch": 0.68, "learning_rate": 0.00014417768774916456, "loss": 3.2105, "step": 39100 }, { "epoch": 0.68, "learning_rate": 0.00014410739593697432, "loss": 3.2452, "step": 39105 }, { "epoch": 0.68, "learning_rate": 0.00014403711584710886, "loss": 3.1147, "step": 39110 }, { "epoch": 0.68, "learning_rate": 0.00014396684748485308, "loss": 3.138, "step": 39115 }, { "epoch": 0.68, "learning_rate": 0.00014389659085549075, "loss": 3.1713, "step": 39120 }, { "epoch": 0.68, "learning_rate": 0.00014382634596430476, "loss": 3.2346, "step": 39125 }, { "epoch": 0.68, "learning_rate": 0.00014375611281657716, "loss": 3.1103, "step": 39130 }, { "epoch": 0.68, "learning_rate": 0.00014368589141758903, "loss": 3.2913, "step": 39135 }, { "epoch": 0.68, "learning_rate": 0.00014361568177262088, "loss": 3.0587, "step": 39140 }, { "epoch": 0.68, "learning_rate": 0.00014354548388695201, "loss": 3.183, "step": 39145 }, { "epoch": 0.68, "learning_rate": 0.00014347529776586094, "loss": 3.2254, "step": 39150 }, { "epoch": 0.68, "learning_rate": 0.0001434051234146253, "loss": 3.2179, "step": 39155 }, { "epoch": 0.68, "learning_rate": 0.00014333496083852174, "loss": 3.1301, "step": 39160 }, { "epoch": 0.68, "learning_rate": 0.00014326481004282632, "loss": 3.0906, "step": 39165 }, { "epoch": 0.68, "learning_rate": 0.00014319467103281406, "loss": 3.0157, "step": 39170 }, { "epoch": 0.68, "learning_rate": 0.0001431245438137587, "loss": 3.2439, "step": 39175 }, { "epoch": 0.68, "learning_rate": 0.00014305442839093375, "loss": 3.1518, "step": 39180 }, { "epoch": 0.68, "learning_rate": 0.00014298432476961148, "loss": 3.1854, "step": 39185 }, { "epoch": 0.68, "learning_rate": 0.00014291423295506318, "loss": 3.2038, "step": 39190 }, { "epoch": 0.68, "learning_rate": 0.00014284415295255973, "loss": 3.226, "step": 39195 }, { "epoch": 0.68, "learning_rate": 0.00014277408476737034, "loss": 3.2791, "step": 39200 }, { "epoch": 0.68, "learning_rate": 0.00014270402840476412, "loss": 3.3441, "step": 39205 }, { "epoch": 0.68, "learning_rate": 0.00014263398387000885, "loss": 3.131, "step": 39210 }, { "epoch": 0.68, "learning_rate": 0.00014256395116837137, "loss": 3.2233, "step": 39215 }, { "epoch": 0.68, "learning_rate": 0.00014249393030511814, "loss": 3.1594, "step": 39220 }, { "epoch": 0.68, "learning_rate": 0.00014242392128551405, "loss": 3.2299, "step": 39225 }, { "epoch": 0.68, "learning_rate": 0.0001423539241148234, "loss": 3.2021, "step": 39230 }, { "epoch": 0.68, "learning_rate": 0.00014228393879830984, "loss": 3.2461, "step": 39235 }, { "epoch": 0.68, "learning_rate": 0.00014221396534123582, "loss": 3.1472, "step": 39240 }, { "epoch": 0.68, "learning_rate": 0.00014214400374886293, "loss": 3.1902, "step": 39245 }, { "epoch": 0.68, "learning_rate": 0.00014207405402645197, "loss": 3.2461, "step": 39250 }, { "epoch": 0.68, "learning_rate": 0.0001420041161792627, "loss": 3.2336, "step": 39255 }, { "epoch": 0.68, "learning_rate": 0.00014193419021255428, "loss": 3.2692, "step": 39260 }, { "epoch": 0.68, "learning_rate": 0.00014186427613158468, "loss": 3.1938, "step": 39265 }, { "epoch": 0.68, "learning_rate": 0.00014179437394161107, "loss": 3.1897, "step": 39270 }, { "epoch": 0.68, "learning_rate": 0.00014172448364788972, "loss": 3.2023, "step": 39275 }, { "epoch": 0.68, "learning_rate": 0.00014165460525567602, "loss": 3.1319, "step": 39280 }, { "epoch": 0.68, "learning_rate": 0.00014158473877022454, "loss": 3.3315, "step": 39285 }, { "epoch": 0.68, "learning_rate": 0.00014151488419678887, "loss": 3.2045, "step": 39290 }, { "epoch": 0.68, "learning_rate": 0.00014144504154062166, "loss": 3.298, "step": 39295 }, { "epoch": 0.68, "learning_rate": 0.00014137521080697478, "loss": 3.2035, "step": 39300 }, { "epoch": 0.68, "learning_rate": 0.0001413053920010991, "loss": 3.2147, "step": 39305 }, { "epoch": 0.68, "learning_rate": 0.00014123558512824452, "loss": 3.2121, "step": 39310 }, { "epoch": 0.68, "learning_rate": 0.0001411657901936604, "loss": 3.1644, "step": 39315 }, { "epoch": 0.68, "learning_rate": 0.00014109600720259484, "loss": 3.2276, "step": 39320 }, { "epoch": 0.68, "learning_rate": 0.00014102623616029516, "loss": 3.2369, "step": 39325 }, { "epoch": 0.68, "learning_rate": 0.00014095647707200782, "loss": 3.0479, "step": 39330 }, { "epoch": 0.68, "learning_rate": 0.0001408867299429782, "loss": 3.174, "step": 39335 }, { "epoch": 0.68, "learning_rate": 0.00014081699477845112, "loss": 3.248, "step": 39340 }, { "epoch": 0.68, "learning_rate": 0.00014074727158367028, "loss": 3.1176, "step": 39345 }, { "epoch": 0.68, "learning_rate": 0.00014067756036387842, "loss": 3.218, "step": 39350 }, { "epoch": 0.68, "learning_rate": 0.00014060786112431755, "loss": 3.2828, "step": 39355 }, { "epoch": 0.68, "learning_rate": 0.0001405381738702285, "loss": 3.1822, "step": 39360 }, { "epoch": 0.68, "learning_rate": 0.0001404684986068517, "loss": 3.2422, "step": 39365 }, { "epoch": 0.68, "learning_rate": 0.00014039883533942615, "loss": 3.2066, "step": 39370 }, { "epoch": 0.68, "learning_rate": 0.0001403291840731903, "loss": 3.2333, "step": 39375 }, { "epoch": 0.68, "learning_rate": 0.0001402595448133815, "loss": 3.3032, "step": 39380 }, { "epoch": 0.68, "learning_rate": 0.00014018991756523628, "loss": 3.2043, "step": 39385 }, { "epoch": 0.68, "learning_rate": 0.00014012030233399013, "loss": 3.1163, "step": 39390 }, { "epoch": 0.68, "learning_rate": 0.00014005069912487793, "loss": 3.1967, "step": 39395 }, { "epoch": 0.68, "learning_rate": 0.0001399811079431335, "loss": 3.3533, "step": 39400 }, { "epoch": 0.68, "learning_rate": 0.00013991152879398963, "loss": 3.1829, "step": 39405 }, { "epoch": 0.68, "learning_rate": 0.00013984196168267837, "loss": 3.245, "step": 39410 }, { "epoch": 0.68, "learning_rate": 0.0001397724066144307, "loss": 3.1686, "step": 39415 }, { "epoch": 0.68, "learning_rate": 0.00013970286359447697, "loss": 3.2045, "step": 39420 }, { "epoch": 0.68, "learning_rate": 0.00013963333262804643, "loss": 3.3767, "step": 39425 }, { "epoch": 0.68, "learning_rate": 0.0001395638137203674, "loss": 3.2496, "step": 39430 }, { "epoch": 0.68, "learning_rate": 0.00013949430687666732, "loss": 3.2372, "step": 39435 }, { "epoch": 0.68, "learning_rate": 0.0001394248121021727, "loss": 3.1356, "step": 39440 }, { "epoch": 0.68, "learning_rate": 0.00013935532940210937, "loss": 3.2368, "step": 39445 }, { "epoch": 0.68, "learning_rate": 0.00013928585878170212, "loss": 3.2078, "step": 39450 }, { "epoch": 0.68, "learning_rate": 0.0001392164002461744, "loss": 3.2284, "step": 39455 }, { "epoch": 0.68, "learning_rate": 0.00013914695380074947, "loss": 3.312, "step": 39460 }, { "epoch": 0.68, "learning_rate": 0.0001390775194506493, "loss": 3.0963, "step": 39465 }, { "epoch": 0.68, "learning_rate": 0.00013900809720109478, "loss": 3.1184, "step": 39470 }, { "epoch": 0.68, "learning_rate": 0.00013893868705730657, "loss": 3.0958, "step": 39475 }, { "epoch": 0.68, "learning_rate": 0.0001388692890245034, "loss": 3.2181, "step": 39480 }, { "epoch": 0.68, "learning_rate": 0.00013879990310790405, "loss": 3.2215, "step": 39485 }, { "epoch": 0.68, "learning_rate": 0.00013873052931272583, "loss": 3.2281, "step": 39490 }, { "epoch": 0.68, "learning_rate": 0.0001386611676441852, "loss": 3.2299, "step": 39495 }, { "epoch": 0.68, "learning_rate": 0.00013859181810749817, "loss": 3.2065, "step": 39500 }, { "epoch": 0.68, "eval_loss": 3.226215362548828, "eval_runtime": 149.8725, "eval_samples_per_second": 12.284, "eval_steps_per_second": 0.774, "step": 39500 }, { "epoch": 0.68, "learning_rate": 0.00013852248070787897, "loss": 3.1109, "step": 39505 }, { "epoch": 0.68, "learning_rate": 0.00013845315545054179, "loss": 3.1988, "step": 39510 }, { "epoch": 0.68, "learning_rate": 0.00013838384234069937, "loss": 3.2595, "step": 39515 }, { "epoch": 0.68, "learning_rate": 0.00013831454138356366, "loss": 3.1322, "step": 39520 }, { "epoch": 0.68, "learning_rate": 0.00013824525258434602, "loss": 3.2045, "step": 39525 }, { "epoch": 0.68, "learning_rate": 0.0001381759759482563, "loss": 3.2601, "step": 39530 }, { "epoch": 0.68, "learning_rate": 0.00013810671148050373, "loss": 3.249, "step": 39535 }, { "epoch": 0.68, "learning_rate": 0.00013803745918629688, "loss": 3.3018, "step": 39540 }, { "epoch": 0.68, "learning_rate": 0.00013796821907084307, "loss": 3.1712, "step": 39545 }, { "epoch": 0.68, "learning_rate": 0.00013789899113934878, "loss": 3.1496, "step": 39550 }, { "epoch": 0.68, "learning_rate": 0.00013782977539701958, "loss": 3.1739, "step": 39555 }, { "epoch": 0.68, "learning_rate": 0.00013776057184906008, "loss": 3.3045, "step": 39560 }, { "epoch": 0.68, "learning_rate": 0.0001376913805006742, "loss": 3.2423, "step": 39565 }, { "epoch": 0.68, "learning_rate": 0.00013762220135706468, "loss": 3.1416, "step": 39570 }, { "epoch": 0.68, "learning_rate": 0.00013755303442343337, "loss": 3.2441, "step": 39575 }, { "epoch": 0.68, "learning_rate": 0.00013748387970498156, "loss": 3.2493, "step": 39580 }, { "epoch": 0.69, "learning_rate": 0.0001374147372069089, "loss": 3.224, "step": 39585 }, { "epoch": 0.69, "learning_rate": 0.00013734560693441484, "loss": 3.28, "step": 39590 }, { "epoch": 0.69, "learning_rate": 0.00013727648889269754, "loss": 3.2218, "step": 39595 }, { "epoch": 0.69, "learning_rate": 0.00013720738308695428, "loss": 3.1533, "step": 39600 }, { "epoch": 0.69, "learning_rate": 0.00013713828952238168, "loss": 3.2559, "step": 39605 }, { "epoch": 0.69, "learning_rate": 0.00013706920820417498, "loss": 3.1483, "step": 39610 }, { "epoch": 0.69, "learning_rate": 0.0001370001391375287, "loss": 3.2323, "step": 39615 }, { "epoch": 0.69, "learning_rate": 0.00013693108232763667, "loss": 3.1266, "step": 39620 }, { "epoch": 0.69, "learning_rate": 0.00013686203777969156, "loss": 3.1826, "step": 39625 }, { "epoch": 0.69, "learning_rate": 0.00013679300549888514, "loss": 3.2438, "step": 39630 }, { "epoch": 0.69, "learning_rate": 0.00013672398549040827, "loss": 3.1957, "step": 39635 }, { "epoch": 0.69, "learning_rate": 0.00013665497775945082, "loss": 3.2472, "step": 39640 }, { "epoch": 0.69, "learning_rate": 0.00013658598231120205, "loss": 3.0956, "step": 39645 }, { "epoch": 0.69, "learning_rate": 0.00013651699915084986, "loss": 3.2534, "step": 39650 }, { "epoch": 0.69, "learning_rate": 0.00013644802828358154, "loss": 3.1855, "step": 39655 }, { "epoch": 0.69, "learning_rate": 0.0001363790697145833, "loss": 3.2826, "step": 39660 }, { "epoch": 0.69, "learning_rate": 0.00013631012344904038, "loss": 3.176, "step": 39665 }, { "epoch": 0.69, "learning_rate": 0.00013624118949213737, "loss": 3.1286, "step": 39670 }, { "epoch": 0.69, "learning_rate": 0.00013617226784905765, "loss": 3.2014, "step": 39675 }, { "epoch": 0.69, "learning_rate": 0.0001361033585249838, "loss": 3.1897, "step": 39680 }, { "epoch": 0.69, "learning_rate": 0.00013603446152509743, "loss": 3.3175, "step": 39685 }, { "epoch": 0.69, "learning_rate": 0.00013596557685457923, "loss": 3.1729, "step": 39690 }, { "epoch": 0.69, "learning_rate": 0.0001358967045186089, "loss": 3.216, "step": 39695 }, { "epoch": 0.69, "learning_rate": 0.00013582784452236546, "loss": 3.1958, "step": 39700 }, { "epoch": 0.69, "learning_rate": 0.00013575899687102674, "loss": 3.2406, "step": 39705 }, { "epoch": 0.69, "learning_rate": 0.00013569016156976977, "loss": 3.1854, "step": 39710 }, { "epoch": 0.69, "learning_rate": 0.00013562133862377054, "loss": 3.2161, "step": 39715 }, { "epoch": 0.69, "learning_rate": 0.0001355525280382041, "loss": 3.0194, "step": 39720 }, { "epoch": 0.69, "learning_rate": 0.00013548372981824492, "loss": 3.0951, "step": 39725 }, { "epoch": 0.69, "learning_rate": 0.0001354149439690662, "loss": 3.1332, "step": 39730 }, { "epoch": 0.69, "learning_rate": 0.00013534617049584002, "loss": 3.2012, "step": 39735 }, { "epoch": 0.69, "learning_rate": 0.00013527740940373807, "loss": 3.2941, "step": 39740 }, { "epoch": 0.69, "learning_rate": 0.00013520866069793077, "loss": 3.2922, "step": 39745 }, { "epoch": 0.69, "learning_rate": 0.0001351399243835875, "loss": 3.2285, "step": 39750 }, { "epoch": 0.69, "learning_rate": 0.00013507120046587728, "loss": 3.1846, "step": 39755 }, { "epoch": 0.69, "learning_rate": 0.00013500248894996727, "loss": 3.2841, "step": 39760 }, { "epoch": 0.69, "learning_rate": 0.00013493378984102462, "loss": 3.2459, "step": 39765 }, { "epoch": 0.69, "learning_rate": 0.00013486510314421503, "loss": 3.2431, "step": 39770 }, { "epoch": 0.69, "learning_rate": 0.00013479642886470326, "loss": 3.1897, "step": 39775 }, { "epoch": 0.69, "learning_rate": 0.0001347277670076536, "loss": 3.1831, "step": 39780 }, { "epoch": 0.69, "learning_rate": 0.00013465911757822865, "loss": 3.2116, "step": 39785 }, { "epoch": 0.69, "learning_rate": 0.00013459048058159085, "loss": 3.0972, "step": 39790 }, { "epoch": 0.69, "learning_rate": 0.00013452185602290113, "loss": 3.2108, "step": 39795 }, { "epoch": 0.69, "learning_rate": 0.0001344532439073197, "loss": 3.2728, "step": 39800 }, { "epoch": 0.69, "learning_rate": 0.00013438464424000602, "loss": 3.188, "step": 39805 }, { "epoch": 0.69, "learning_rate": 0.0001343160570261184, "loss": 3.1234, "step": 39810 }, { "epoch": 0.69, "learning_rate": 0.000134247482270814, "loss": 3.1929, "step": 39815 }, { "epoch": 0.69, "learning_rate": 0.00013417891997924956, "loss": 3.1941, "step": 39820 }, { "epoch": 0.69, "learning_rate": 0.00013411037015658052, "loss": 3.2134, "step": 39825 }, { "epoch": 0.69, "learning_rate": 0.00013404183280796137, "loss": 3.1629, "step": 39830 }, { "epoch": 0.69, "learning_rate": 0.0001339733079385461, "loss": 3.315, "step": 39835 }, { "epoch": 0.69, "learning_rate": 0.00013390479555348698, "loss": 3.1403, "step": 39840 }, { "epoch": 0.69, "learning_rate": 0.00013383629565793612, "loss": 3.1475, "step": 39845 }, { "epoch": 0.69, "learning_rate": 0.00013376780825704422, "loss": 3.1861, "step": 39850 }, { "epoch": 0.69, "learning_rate": 0.00013369933335596112, "loss": 3.2026, "step": 39855 }, { "epoch": 0.69, "learning_rate": 0.00013363087095983608, "loss": 3.2458, "step": 39860 }, { "epoch": 0.69, "learning_rate": 0.00013356242107381673, "loss": 3.0653, "step": 39865 }, { "epoch": 0.69, "learning_rate": 0.0001334939837030504, "loss": 3.1658, "step": 39870 }, { "epoch": 0.69, "learning_rate": 0.0001334255588526832, "loss": 3.2229, "step": 39875 }, { "epoch": 0.69, "learning_rate": 0.00013335714652786018, "loss": 3.3034, "step": 39880 }, { "epoch": 0.69, "learning_rate": 0.0001332887467337259, "loss": 3.1056, "step": 39885 }, { "epoch": 0.69, "learning_rate": 0.00013322035947542344, "loss": 3.2603, "step": 39890 }, { "epoch": 0.69, "learning_rate": 0.00013315198475809505, "loss": 3.1717, "step": 39895 }, { "epoch": 0.69, "learning_rate": 0.00013308362258688245, "loss": 3.1708, "step": 39900 }, { "epoch": 0.69, "learning_rate": 0.000133015272966926, "loss": 3.0321, "step": 39905 }, { "epoch": 0.69, "learning_rate": 0.00013294693590336522, "loss": 3.1176, "step": 39910 }, { "epoch": 0.69, "learning_rate": 0.00013287861140133875, "loss": 3.1338, "step": 39915 }, { "epoch": 0.69, "learning_rate": 0.0001328102994659841, "loss": 3.1293, "step": 39920 }, { "epoch": 0.69, "learning_rate": 0.0001327420001024382, "loss": 3.1591, "step": 39925 }, { "epoch": 0.69, "learning_rate": 0.00013267371331583672, "loss": 3.2051, "step": 39930 }, { "epoch": 0.69, "learning_rate": 0.00013260543911131447, "loss": 3.2059, "step": 39935 }, { "epoch": 0.69, "learning_rate": 0.00013253717749400531, "loss": 3.1315, "step": 39940 }, { "epoch": 0.69, "learning_rate": 0.00013246892846904205, "loss": 3.2699, "step": 39945 }, { "epoch": 0.69, "learning_rate": 0.0001324006920415569, "loss": 3.2865, "step": 39950 }, { "epoch": 0.69, "learning_rate": 0.00013233246821668077, "loss": 3.1292, "step": 39955 }, { "epoch": 0.69, "learning_rate": 0.0001322642569995438, "loss": 3.237, "step": 39960 }, { "epoch": 0.69, "learning_rate": 0.000132196058395275, "loss": 3.1434, "step": 39965 }, { "epoch": 0.69, "learning_rate": 0.00013212787240900265, "loss": 3.2707, "step": 39970 }, { "epoch": 0.69, "learning_rate": 0.00013205969904585385, "loss": 3.2334, "step": 39975 }, { "epoch": 0.69, "learning_rate": 0.00013199153831095511, "loss": 3.1555, "step": 39980 }, { "epoch": 0.69, "learning_rate": 0.0001319233902094317, "loss": 3.1761, "step": 39985 }, { "epoch": 0.69, "learning_rate": 0.00013185525474640792, "loss": 3.3449, "step": 39990 }, { "epoch": 0.69, "learning_rate": 0.00013178713192700726, "loss": 3.1922, "step": 39995 }, { "epoch": 0.69, "learning_rate": 0.00013171902175635212, "loss": 3.2252, "step": 40000 }, { "epoch": 0.69, "eval_loss": 3.221252679824829, "eval_runtime": 149.7766, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.774, "step": 40000 }, { "epoch": 0.69, "learning_rate": 0.0001316509242395642, "loss": 3.2361, "step": 40005 }, { "epoch": 0.69, "learning_rate": 0.00013158283938176403, "loss": 3.1515, "step": 40010 }, { "epoch": 0.69, "learning_rate": 0.00013151476718807118, "loss": 3.2082, "step": 40015 }, { "epoch": 0.69, "learning_rate": 0.00013144670766360438, "loss": 3.1433, "step": 40020 }, { "epoch": 0.69, "learning_rate": 0.00013137866081348122, "loss": 3.0056, "step": 40025 }, { "epoch": 0.69, "learning_rate": 0.00013131062664281876, "loss": 3.1968, "step": 40030 }, { "epoch": 0.69, "learning_rate": 0.0001312426051567326, "loss": 3.1794, "step": 40035 }, { "epoch": 0.69, "learning_rate": 0.00013117459636033767, "loss": 3.086, "step": 40040 }, { "epoch": 0.69, "learning_rate": 0.00013110660025874787, "loss": 3.2869, "step": 40045 }, { "epoch": 0.69, "learning_rate": 0.00013103861685707623, "loss": 3.2714, "step": 40050 }, { "epoch": 0.69, "learning_rate": 0.0001309706461604345, "loss": 3.3275, "step": 40055 }, { "epoch": 0.69, "learning_rate": 0.00013090268817393409, "loss": 3.1481, "step": 40060 }, { "epoch": 0.69, "learning_rate": 0.00013083474290268487, "loss": 3.1364, "step": 40065 }, { "epoch": 0.69, "learning_rate": 0.00013076681035179604, "loss": 3.2324, "step": 40070 }, { "epoch": 0.69, "learning_rate": 0.0001306988905263758, "loss": 3.2283, "step": 40075 }, { "epoch": 0.69, "learning_rate": 0.00013063098343153116, "loss": 3.2573, "step": 40080 }, { "epoch": 0.69, "learning_rate": 0.0001305630890723687, "loss": 3.1392, "step": 40085 }, { "epoch": 0.69, "learning_rate": 0.00013049520745399363, "loss": 3.3101, "step": 40090 }, { "epoch": 0.69, "learning_rate": 0.00013042733858151023, "loss": 3.1184, "step": 40095 }, { "epoch": 0.69, "learning_rate": 0.00013035948246002191, "loss": 3.2271, "step": 40100 }, { "epoch": 0.69, "learning_rate": 0.00013029163909463102, "loss": 3.2393, "step": 40105 }, { "epoch": 0.69, "learning_rate": 0.00013022380849043926, "loss": 3.2122, "step": 40110 }, { "epoch": 0.69, "learning_rate": 0.00013015599065254707, "loss": 3.2095, "step": 40115 }, { "epoch": 0.69, "learning_rate": 0.00013008818558605376, "loss": 3.2102, "step": 40120 }, { "epoch": 0.69, "learning_rate": 0.00013002039329605824, "loss": 3.0793, "step": 40125 }, { "epoch": 0.69, "learning_rate": 0.00012995261378765798, "loss": 3.1341, "step": 40130 }, { "epoch": 0.69, "learning_rate": 0.0001298848470659496, "loss": 3.2486, "step": 40135 }, { "epoch": 0.69, "learning_rate": 0.0001298170931360291, "loss": 3.1172, "step": 40140 }, { "epoch": 0.69, "learning_rate": 0.00012974935200299077, "loss": 3.0749, "step": 40145 }, { "epoch": 0.69, "learning_rate": 0.0001296816236719288, "loss": 3.1997, "step": 40150 }, { "epoch": 0.69, "learning_rate": 0.00012961390814793583, "loss": 3.1917, "step": 40155 }, { "epoch": 0.7, "learning_rate": 0.00012954620543610367, "loss": 3.2264, "step": 40160 }, { "epoch": 0.7, "learning_rate": 0.00012947851554152354, "loss": 3.2491, "step": 40165 }, { "epoch": 0.7, "learning_rate": 0.00012941083846928488, "loss": 3.1931, "step": 40170 }, { "epoch": 0.7, "learning_rate": 0.00012934317422447702, "loss": 3.218, "step": 40175 }, { "epoch": 0.7, "learning_rate": 0.0001292755228121879, "loss": 3.2307, "step": 40180 }, { "epoch": 0.7, "learning_rate": 0.00012920788423750434, "loss": 3.2845, "step": 40185 }, { "epoch": 0.7, "learning_rate": 0.00012914025850551284, "loss": 3.1866, "step": 40190 }, { "epoch": 0.7, "learning_rate": 0.00012907264562129814, "loss": 3.1706, "step": 40195 }, { "epoch": 0.7, "learning_rate": 0.00012900504558994438, "loss": 3.2382, "step": 40200 }, { "epoch": 0.7, "learning_rate": 0.00012893745841653499, "loss": 3.1685, "step": 40205 }, { "epoch": 0.7, "learning_rate": 0.00012886988410615207, "loss": 3.1907, "step": 40210 }, { "epoch": 0.7, "learning_rate": 0.00012880232266387678, "loss": 3.2388, "step": 40215 }, { "epoch": 0.7, "learning_rate": 0.0001287347740947895, "loss": 3.2157, "step": 40220 }, { "epoch": 0.7, "learning_rate": 0.00012866723840396934, "loss": 3.1247, "step": 40225 }, { "epoch": 0.7, "learning_rate": 0.0001285997155964949, "loss": 3.2234, "step": 40230 }, { "epoch": 0.7, "learning_rate": 0.00012853220567744344, "loss": 3.2356, "step": 40235 }, { "epoch": 0.7, "learning_rate": 0.00012846470865189138, "loss": 3.2794, "step": 40240 }, { "epoch": 0.7, "learning_rate": 0.0001283972245249141, "loss": 3.1414, "step": 40245 }, { "epoch": 0.7, "learning_rate": 0.0001283297533015861, "loss": 3.1587, "step": 40250 }, { "epoch": 0.7, "learning_rate": 0.00012826229498698075, "loss": 3.2113, "step": 40255 }, { "epoch": 0.7, "learning_rate": 0.00012819484958617077, "loss": 3.2561, "step": 40260 }, { "epoch": 0.7, "learning_rate": 0.0001281274171042276, "loss": 3.1379, "step": 40265 }, { "epoch": 0.7, "learning_rate": 0.00012805999754622182, "loss": 3.1382, "step": 40270 }, { "epoch": 0.7, "learning_rate": 0.0001279925909172231, "loss": 3.2634, "step": 40275 }, { "epoch": 0.7, "learning_rate": 0.00012792519722229989, "loss": 3.2751, "step": 40280 }, { "epoch": 0.7, "learning_rate": 0.00012785781646652006, "loss": 3.0939, "step": 40285 }, { "epoch": 0.7, "learning_rate": 0.00012779044865495024, "loss": 3.2182, "step": 40290 }, { "epoch": 0.7, "learning_rate": 0.0001277230937926561, "loss": 3.0571, "step": 40295 }, { "epoch": 0.7, "learning_rate": 0.00012765575188470246, "loss": 3.1138, "step": 40300 }, { "epoch": 0.7, "learning_rate": 0.00012758842293615285, "loss": 3.1653, "step": 40305 }, { "epoch": 0.7, "learning_rate": 0.00012752110695207037, "loss": 3.2168, "step": 40310 }, { "epoch": 0.7, "learning_rate": 0.0001274538039375167, "loss": 3.1544, "step": 40315 }, { "epoch": 0.7, "learning_rate": 0.00012738651389755274, "loss": 3.2413, "step": 40320 }, { "epoch": 0.7, "learning_rate": 0.00012731923683723828, "loss": 3.2262, "step": 40325 }, { "epoch": 0.7, "learning_rate": 0.00012725197276163222, "loss": 3.1795, "step": 40330 }, { "epoch": 0.7, "learning_rate": 0.00012718472167579236, "loss": 3.1318, "step": 40335 }, { "epoch": 0.7, "learning_rate": 0.00012711748358477592, "loss": 3.2327, "step": 40340 }, { "epoch": 0.7, "learning_rate": 0.00012705025849363868, "loss": 3.3024, "step": 40345 }, { "epoch": 0.7, "learning_rate": 0.00012698304640743565, "loss": 3.156, "step": 40350 }, { "epoch": 0.7, "learning_rate": 0.00012691584733122083, "loss": 3.0815, "step": 40355 }, { "epoch": 0.7, "learning_rate": 0.00012684866127004715, "loss": 3.1712, "step": 40360 }, { "epoch": 0.7, "learning_rate": 0.00012678148822896685, "loss": 3.1828, "step": 40365 }, { "epoch": 0.7, "learning_rate": 0.0001267143282130309, "loss": 3.2036, "step": 40370 }, { "epoch": 0.7, "learning_rate": 0.0001266471812272894, "loss": 3.1898, "step": 40375 }, { "epoch": 0.7, "learning_rate": 0.00012658004727679145, "loss": 3.206, "step": 40380 }, { "epoch": 0.7, "learning_rate": 0.0001265129263665851, "loss": 3.2307, "step": 40385 }, { "epoch": 0.7, "learning_rate": 0.00012644581850171764, "loss": 3.1573, "step": 40390 }, { "epoch": 0.7, "learning_rate": 0.00012637872368723534, "loss": 3.1681, "step": 40395 }, { "epoch": 0.7, "learning_rate": 0.000126311641928183, "loss": 3.2237, "step": 40400 }, { "epoch": 0.7, "learning_rate": 0.0001262445732296051, "loss": 3.1012, "step": 40405 }, { "epoch": 0.7, "learning_rate": 0.00012617751759654488, "loss": 3.1711, "step": 40410 }, { "epoch": 0.7, "learning_rate": 0.00012611047503404435, "loss": 3.2486, "step": 40415 }, { "epoch": 0.7, "learning_rate": 0.00012604344554714518, "loss": 3.1923, "step": 40420 }, { "epoch": 0.7, "learning_rate": 0.00012597642914088716, "loss": 3.2736, "step": 40425 }, { "epoch": 0.7, "learning_rate": 0.00012590942582030992, "loss": 3.2386, "step": 40430 }, { "epoch": 0.7, "learning_rate": 0.00012584243559045168, "loss": 3.1725, "step": 40435 }, { "epoch": 0.7, "learning_rate": 0.0001257754584563496, "loss": 3.1664, "step": 40440 }, { "epoch": 0.7, "learning_rate": 0.0001257084944230404, "loss": 3.1511, "step": 40445 }, { "epoch": 0.7, "learning_rate": 0.00012564154349555893, "loss": 3.0612, "step": 40450 }, { "epoch": 0.7, "learning_rate": 0.00012557460567893999, "loss": 3.3118, "step": 40455 }, { "epoch": 0.7, "learning_rate": 0.00012550768097821673, "loss": 3.0661, "step": 40460 }, { "epoch": 0.7, "learning_rate": 0.0001254407693984215, "loss": 3.2882, "step": 40465 }, { "epoch": 0.7, "learning_rate": 0.00012537387094458603, "loss": 3.1967, "step": 40470 }, { "epoch": 0.7, "learning_rate": 0.00012530698562174044, "loss": 3.1915, "step": 40475 }, { "epoch": 0.7, "learning_rate": 0.00012524011343491412, "loss": 3.1552, "step": 40480 }, { "epoch": 0.7, "learning_rate": 0.00012517325438913572, "loss": 3.0782, "step": 40485 }, { "epoch": 0.7, "learning_rate": 0.00012510640848943265, "loss": 3.1107, "step": 40490 }, { "epoch": 0.7, "learning_rate": 0.00012503957574083133, "loss": 3.1968, "step": 40495 }, { "epoch": 0.7, "learning_rate": 0.0001249727561483573, "loss": 3.1461, "step": 40500 }, { "epoch": 0.7, "eval_loss": 3.2169432640075684, "eval_runtime": 149.8523, "eval_samples_per_second": 12.285, "eval_steps_per_second": 0.774, "step": 40500 }, { "epoch": 0.7, "learning_rate": 0.00012490594971703483, "loss": 2.9939, "step": 40505 }, { "epoch": 0.7, "learning_rate": 0.00012483915645188776, "loss": 3.0932, "step": 40510 }, { "epoch": 0.7, "learning_rate": 0.00012477237635793843, "loss": 3.1378, "step": 40515 }, { "epoch": 0.7, "learning_rate": 0.00012470560944020825, "loss": 3.1682, "step": 40520 }, { "epoch": 0.7, "learning_rate": 0.00012463885570371808, "loss": 3.0671, "step": 40525 }, { "epoch": 0.7, "learning_rate": 0.00012457211515348705, "loss": 3.0951, "step": 40530 }, { "epoch": 0.7, "learning_rate": 0.000124505387794534, "loss": 3.2597, "step": 40535 }, { "epoch": 0.7, "learning_rate": 0.0001244386736318764, "loss": 3.1131, "step": 40540 }, { "epoch": 0.7, "learning_rate": 0.0001243719726705307, "loss": 3.0366, "step": 40545 }, { "epoch": 0.7, "learning_rate": 0.00012430528491551277, "loss": 3.148, "step": 40550 }, { "epoch": 0.7, "learning_rate": 0.00012423861037183693, "loss": 3.1749, "step": 40555 }, { "epoch": 0.7, "learning_rate": 0.00012417194904451668, "loss": 3.1621, "step": 40560 }, { "epoch": 0.7, "learning_rate": 0.00012410530093856487, "loss": 3.1866, "step": 40565 }, { "epoch": 0.7, "learning_rate": 0.000124038666058993, "loss": 3.2424, "step": 40570 }, { "epoch": 0.7, "learning_rate": 0.00012397204441081164, "loss": 3.3355, "step": 40575 }, { "epoch": 0.7, "learning_rate": 0.00012390543599903042, "loss": 3.1592, "step": 40580 }, { "epoch": 0.7, "learning_rate": 0.00012383884082865781, "loss": 3.105, "step": 40585 }, { "epoch": 0.7, "learning_rate": 0.0001237722589047017, "loss": 3.2395, "step": 40590 }, { "epoch": 0.7, "learning_rate": 0.00012370569023216856, "loss": 3.1991, "step": 40595 }, { "epoch": 0.7, "learning_rate": 0.00012363913481606403, "loss": 3.1987, "step": 40600 }, { "epoch": 0.7, "learning_rate": 0.00012357259266139272, "loss": 3.0725, "step": 40605 }, { "epoch": 0.7, "learning_rate": 0.00012350606377315815, "loss": 3.2065, "step": 40610 }, { "epoch": 0.7, "learning_rate": 0.00012343954815636314, "loss": 3.2754, "step": 40615 }, { "epoch": 0.7, "learning_rate": 0.0001233730458160093, "loss": 3.1027, "step": 40620 }, { "epoch": 0.7, "learning_rate": 0.0001233065567570972, "loss": 3.244, "step": 40625 }, { "epoch": 0.7, "learning_rate": 0.00012324008098462652, "loss": 3.1625, "step": 40630 }, { "epoch": 0.7, "learning_rate": 0.00012317361850359583, "loss": 3.1442, "step": 40635 }, { "epoch": 0.7, "learning_rate": 0.0001231071693190027, "loss": 3.2685, "step": 40640 }, { "epoch": 0.7, "learning_rate": 0.00012304073343584403, "loss": 3.1912, "step": 40645 }, { "epoch": 0.7, "learning_rate": 0.00012297431085911526, "loss": 3.1647, "step": 40650 }, { "epoch": 0.7, "learning_rate": 0.00012290790159381106, "loss": 3.1712, "step": 40655 }, { "epoch": 0.7, "learning_rate": 0.00012284150564492513, "loss": 3.3168, "step": 40660 }, { "epoch": 0.7, "learning_rate": 0.0001227751230174499, "loss": 3.2013, "step": 40665 }, { "epoch": 0.7, "learning_rate": 0.0001227087537163773, "loss": 3.2584, "step": 40670 }, { "epoch": 0.7, "learning_rate": 0.00012264239774669776, "loss": 3.2529, "step": 40675 }, { "epoch": 0.7, "learning_rate": 0.00012257605511340104, "loss": 3.1225, "step": 40680 }, { "epoch": 0.7, "learning_rate": 0.00012250972582147562, "loss": 3.0349, "step": 40685 }, { "epoch": 0.7, "learning_rate": 0.00012244340987590914, "loss": 3.1949, "step": 40690 }, { "epoch": 0.7, "learning_rate": 0.00012237710728168835, "loss": 3.2146, "step": 40695 }, { "epoch": 0.7, "learning_rate": 0.00012231081804379892, "loss": 3.2274, "step": 40700 }, { "epoch": 0.7, "learning_rate": 0.0001222445421672251, "loss": 3.1047, "step": 40705 }, { "epoch": 0.7, "learning_rate": 0.00012217827965695083, "loss": 3.276, "step": 40710 }, { "epoch": 0.7, "learning_rate": 0.00012211203051795864, "loss": 3.1447, "step": 40715 }, { "epoch": 0.7, "learning_rate": 0.00012204579475522996, "loss": 3.1452, "step": 40720 }, { "epoch": 0.7, "learning_rate": 0.00012197957237374574, "loss": 3.187, "step": 40725 }, { "epoch": 0.7, "learning_rate": 0.00012191336337848511, "loss": 3.1845, "step": 40730 }, { "epoch": 0.7, "learning_rate": 0.00012184716777442699, "loss": 3.1049, "step": 40735 }, { "epoch": 0.71, "learning_rate": 0.00012178098556654884, "loss": 3.1475, "step": 40740 }, { "epoch": 0.71, "learning_rate": 0.00012171481675982707, "loss": 3.0952, "step": 40745 }, { "epoch": 0.71, "learning_rate": 0.00012164866135923754, "loss": 3.204, "step": 40750 }, { "epoch": 0.71, "learning_rate": 0.0001215825193697546, "loss": 3.3169, "step": 40755 }, { "epoch": 0.71, "learning_rate": 0.00012151639079635188, "loss": 3.324, "step": 40760 }, { "epoch": 0.71, "learning_rate": 0.00012145027564400183, "loss": 3.1329, "step": 40765 }, { "epoch": 0.71, "learning_rate": 0.00012138417391767586, "loss": 3.2409, "step": 40770 }, { "epoch": 0.71, "learning_rate": 0.00012131808562234478, "loss": 3.1679, "step": 40775 }, { "epoch": 0.71, "learning_rate": 0.00012125201076297802, "loss": 3.2133, "step": 40780 }, { "epoch": 0.71, "learning_rate": 0.00012118594934454375, "loss": 3.2173, "step": 40785 }, { "epoch": 0.71, "learning_rate": 0.0001211199013720098, "loss": 2.9781, "step": 40790 }, { "epoch": 0.71, "learning_rate": 0.0001210538668503425, "loss": 3.2979, "step": 40795 }, { "epoch": 0.71, "learning_rate": 0.00012098784578450726, "loss": 3.1413, "step": 40800 }, { "epoch": 0.71, "learning_rate": 0.0001209218381794688, "loss": 3.1493, "step": 40805 }, { "epoch": 0.71, "learning_rate": 0.00012085584404019011, "loss": 3.1366, "step": 40810 }, { "epoch": 0.71, "learning_rate": 0.00012078986337163399, "loss": 3.1048, "step": 40815 }, { "epoch": 0.71, "learning_rate": 0.00012072389617876167, "loss": 3.1932, "step": 40820 }, { "epoch": 0.71, "learning_rate": 0.00012065794246653348, "loss": 3.1397, "step": 40825 }, { "epoch": 0.71, "learning_rate": 0.00012059200223990913, "loss": 3.1325, "step": 40830 }, { "epoch": 0.71, "learning_rate": 0.00012052607550384668, "loss": 3.1377, "step": 40835 }, { "epoch": 0.71, "learning_rate": 0.00012046016226330344, "loss": 3.1563, "step": 40840 }, { "epoch": 0.71, "learning_rate": 0.00012039426252323596, "loss": 3.246, "step": 40845 }, { "epoch": 0.71, "learning_rate": 0.00012032837628859946, "loss": 3.0618, "step": 40850 }, { "epoch": 0.71, "learning_rate": 0.0001202625035643483, "loss": 3.1616, "step": 40855 }, { "epoch": 0.71, "learning_rate": 0.00012019664435543569, "loss": 3.1188, "step": 40860 }, { "epoch": 0.71, "learning_rate": 0.00012013079866681387, "loss": 3.2018, "step": 40865 }, { "epoch": 0.71, "learning_rate": 0.00012006496650343427, "loss": 3.1535, "step": 40870 }, { "epoch": 0.71, "learning_rate": 0.00011999914787024704, "loss": 3.1538, "step": 40875 }, { "epoch": 0.71, "learning_rate": 0.00011993334277220142, "loss": 3.2631, "step": 40880 }, { "epoch": 0.71, "learning_rate": 0.00011986755121424558, "loss": 3.1214, "step": 40885 }, { "epoch": 0.71, "learning_rate": 0.00011980177320132663, "loss": 3.1057, "step": 40890 }, { "epoch": 0.71, "learning_rate": 0.0001197360087383909, "loss": 3.1804, "step": 40895 }, { "epoch": 0.71, "learning_rate": 0.0001196702578303835, "loss": 3.0932, "step": 40900 }, { "epoch": 0.71, "learning_rate": 0.00011960452048224853, "loss": 3.1851, "step": 40905 }, { "epoch": 0.71, "learning_rate": 0.00011953879669892913, "loss": 3.1617, "step": 40910 }, { "epoch": 0.71, "learning_rate": 0.00011947308648536734, "loss": 3.1573, "step": 40915 }, { "epoch": 0.71, "learning_rate": 0.00011940738984650419, "loss": 3.1449, "step": 40920 }, { "epoch": 0.71, "learning_rate": 0.00011934170678727987, "loss": 3.1135, "step": 40925 }, { "epoch": 0.71, "learning_rate": 0.00011927603731263337, "loss": 3.1522, "step": 40930 }, { "epoch": 0.71, "learning_rate": 0.00011921038142750264, "loss": 3.1436, "step": 40935 }, { "epoch": 0.71, "learning_rate": 0.0001191447391368247, "loss": 3.1961, "step": 40940 }, { "epoch": 0.71, "learning_rate": 0.00011907911044553541, "loss": 3.2341, "step": 40945 }, { "epoch": 0.71, "learning_rate": 0.00011901349535856993, "loss": 3.3183, "step": 40950 }, { "epoch": 0.71, "learning_rate": 0.00011894789388086205, "loss": 3.1467, "step": 40955 }, { "epoch": 0.71, "learning_rate": 0.00011888230601734466, "loss": 3.099, "step": 40960 }, { "epoch": 0.71, "learning_rate": 0.00011881673177294967, "loss": 3.1508, "step": 40965 }, { "epoch": 0.71, "learning_rate": 0.00011875117115260775, "loss": 3.2019, "step": 40970 }, { "epoch": 0.71, "learning_rate": 0.00011868562416124903, "loss": 3.2991, "step": 40975 }, { "epoch": 0.71, "learning_rate": 0.00011862009080380213, "loss": 3.0779, "step": 40980 }, { "epoch": 0.71, "learning_rate": 0.00011855457108519482, "loss": 3.1401, "step": 40985 }, { "epoch": 0.71, "learning_rate": 0.0001184890650103539, "loss": 3.1752, "step": 40990 }, { "epoch": 0.71, "learning_rate": 0.00011842357258420507, "loss": 3.241, "step": 40995 }, { "epoch": 0.71, "learning_rate": 0.00011835809381167289, "loss": 3.2169, "step": 41000 }, { "epoch": 0.71, "eval_loss": 3.2123522758483887, "eval_runtime": 149.7773, "eval_samples_per_second": 12.292, "eval_steps_per_second": 0.774, "step": 41000 }, { "epoch": 0.71, "learning_rate": 0.00011829262869768125, "loss": 3.1618, "step": 41005 }, { "epoch": 0.71, "learning_rate": 0.00011822717724715272, "loss": 3.1831, "step": 41010 }, { "epoch": 0.71, "learning_rate": 0.0001181617394650089, "loss": 3.1159, "step": 41015 }, { "epoch": 0.71, "learning_rate": 0.00011809631535617035, "loss": 3.2193, "step": 41020 }, { "epoch": 0.71, "learning_rate": 0.00011803090492555653, "loss": 3.2243, "step": 41025 }, { "epoch": 0.71, "learning_rate": 0.0001179655081780862, "loss": 3.1949, "step": 41030 }, { "epoch": 0.71, "learning_rate": 0.00011790012511867675, "loss": 3.2647, "step": 41035 }, { "epoch": 0.71, "learning_rate": 0.00011783475575224461, "loss": 3.1304, "step": 41040 }, { "epoch": 0.71, "learning_rate": 0.0001177694000837053, "loss": 3.045, "step": 41045 }, { "epoch": 0.71, "learning_rate": 0.00011770405811797305, "loss": 3.1695, "step": 41050 }, { "epoch": 0.71, "learning_rate": 0.00011763872985996152, "loss": 3.1831, "step": 41055 }, { "epoch": 0.71, "learning_rate": 0.00011757341531458297, "loss": 3.0754, "step": 41060 }, { "epoch": 0.71, "learning_rate": 0.0001175081144867485, "loss": 3.0875, "step": 41065 }, { "epoch": 0.71, "learning_rate": 0.00011744282738136864, "loss": 3.1625, "step": 41070 }, { "epoch": 0.71, "learning_rate": 0.00011737755400335259, "loss": 3.2232, "step": 41075 }, { "epoch": 0.71, "learning_rate": 0.00011731229435760843, "loss": 3.2507, "step": 41080 }, { "epoch": 0.71, "learning_rate": 0.00011724704844904368, "loss": 3.1869, "step": 41085 }, { "epoch": 0.71, "learning_rate": 0.00011718181628256413, "loss": 3.2847, "step": 41090 }, { "epoch": 0.71, "learning_rate": 0.00011711659786307514, "loss": 3.199, "step": 41095 }, { "epoch": 0.71, "learning_rate": 0.00011705139319548073, "loss": 3.1731, "step": 41100 }, { "epoch": 0.71, "learning_rate": 0.00011698620228468384, "loss": 3.0682, "step": 41105 }, { "epoch": 0.71, "learning_rate": 0.00011692102513558682, "loss": 3.1529, "step": 41110 }, { "epoch": 0.71, "learning_rate": 0.00011685586175309026, "loss": 3.31, "step": 41115 }, { "epoch": 0.71, "learning_rate": 0.00011679071214209438, "loss": 3.1318, "step": 41120 }, { "epoch": 0.71, "learning_rate": 0.00011672557630749801, "loss": 3.2206, "step": 41125 }, { "epoch": 0.71, "learning_rate": 0.00011666045425419895, "loss": 3.1701, "step": 41130 }, { "epoch": 0.71, "learning_rate": 0.00011659534598709431, "loss": 3.1972, "step": 41135 }, { "epoch": 0.71, "learning_rate": 0.00011653025151107965, "loss": 3.1705, "step": 41140 }, { "epoch": 0.71, "learning_rate": 0.0001164651708310497, "loss": 3.1075, "step": 41145 }, { "epoch": 0.71, "learning_rate": 0.00011640010395189838, "loss": 3.2107, "step": 41150 }, { "epoch": 0.71, "learning_rate": 0.0001163350508785183, "loss": 3.1441, "step": 41155 }, { "epoch": 0.71, "learning_rate": 0.00011627001161580116, "loss": 3.2071, "step": 41160 }, { "epoch": 0.71, "learning_rate": 0.00011620498616863753, "loss": 3.0613, "step": 41165 }, { "epoch": 0.71, "learning_rate": 0.00011613997454191691, "loss": 3.2416, "step": 41170 }, { "epoch": 0.71, "learning_rate": 0.00011607497674052802, "loss": 3.1232, "step": 41175 }, { "epoch": 0.71, "learning_rate": 0.00011600999276935832, "loss": 3.1565, "step": 41180 }, { "epoch": 0.71, "learning_rate": 0.00011594502263329418, "loss": 3.1478, "step": 41185 }, { "epoch": 0.71, "learning_rate": 0.0001158800663372211, "loss": 3.0881, "step": 41190 }, { "epoch": 0.71, "learning_rate": 0.00011581512388602334, "loss": 3.174, "step": 41195 }, { "epoch": 0.71, "learning_rate": 0.0001157501952845844, "loss": 3.0763, "step": 41200 }, { "epoch": 0.71, "learning_rate": 0.00011568528053778651, "loss": 3.153, "step": 41205 }, { "epoch": 0.71, "learning_rate": 0.00011562037965051084, "loss": 3.2635, "step": 41210 }, { "epoch": 0.71, "learning_rate": 0.0001155554926276379, "loss": 3.2358, "step": 41215 }, { "epoch": 0.71, "learning_rate": 0.00011549061947404656, "loss": 3.167, "step": 41220 }, { "epoch": 0.71, "learning_rate": 0.00011542576019461489, "loss": 3.144, "step": 41225 }, { "epoch": 0.71, "learning_rate": 0.00011536091479422028, "loss": 3.1572, "step": 41230 }, { "epoch": 0.71, "learning_rate": 0.00011529608327773859, "loss": 3.128, "step": 41235 }, { "epoch": 0.71, "learning_rate": 0.00011523126565004486, "loss": 3.2118, "step": 41240 }, { "epoch": 0.71, "learning_rate": 0.00011516646191601301, "loss": 3.273, "step": 41245 }, { "epoch": 0.71, "learning_rate": 0.00011510167208051588, "loss": 3.1155, "step": 41250 }, { "epoch": 0.71, "learning_rate": 0.00011503689614842551, "loss": 3.2507, "step": 41255 }, { "epoch": 0.71, "learning_rate": 0.00011497213412461262, "loss": 3.231, "step": 41260 }, { "epoch": 0.71, "learning_rate": 0.000114907386013947, "loss": 3.1433, "step": 41265 }, { "epoch": 0.71, "learning_rate": 0.00011484265182129739, "loss": 3.1993, "step": 41270 }, { "epoch": 0.71, "learning_rate": 0.00011477793155153134, "loss": 3.3162, "step": 41275 }, { "epoch": 0.71, "learning_rate": 0.00011471322520951567, "loss": 3.2125, "step": 41280 }, { "epoch": 0.71, "learning_rate": 0.0001146485328001159, "loss": 3.1264, "step": 41285 }, { "epoch": 0.71, "learning_rate": 0.00011458385432819654, "loss": 3.2733, "step": 41290 }, { "epoch": 0.71, "learning_rate": 0.00011451918979862108, "loss": 3.2649, "step": 41295 }, { "epoch": 0.71, "learning_rate": 0.00011445453921625198, "loss": 3.2322, "step": 41300 }, { "epoch": 0.71, "learning_rate": 0.00011438990258595055, "loss": 3.1406, "step": 41305 }, { "epoch": 0.71, "learning_rate": 0.0001143252799125773, "loss": 3.1362, "step": 41310 }, { "epoch": 0.72, "learning_rate": 0.00011426067120099141, "loss": 3.1726, "step": 41315 }, { "epoch": 0.72, "learning_rate": 0.00011419607645605117, "loss": 3.1266, "step": 41320 }, { "epoch": 0.72, "learning_rate": 0.00011413149568261374, "loss": 3.1741, "step": 41325 }, { "epoch": 0.72, "learning_rate": 0.00011406692888553518, "loss": 3.1985, "step": 41330 }, { "epoch": 0.72, "learning_rate": 0.00011400237606967078, "loss": 3.0828, "step": 41335 }, { "epoch": 0.72, "learning_rate": 0.00011393783723987457, "loss": 3.2476, "step": 41340 }, { "epoch": 0.72, "learning_rate": 0.00011387331240099926, "loss": 3.2025, "step": 41345 }, { "epoch": 0.72, "learning_rate": 0.00011380880155789706, "loss": 3.3122, "step": 41350 }, { "epoch": 0.72, "learning_rate": 0.00011374430471541874, "loss": 3.1677, "step": 41355 }, { "epoch": 0.72, "learning_rate": 0.00011367982187841411, "loss": 2.9866, "step": 41360 }, { "epoch": 0.72, "learning_rate": 0.00011361535305173218, "loss": 3.1992, "step": 41365 }, { "epoch": 0.72, "learning_rate": 0.00011355089824022028, "loss": 3.1555, "step": 41370 }, { "epoch": 0.72, "learning_rate": 0.00011348645744872542, "loss": 3.2704, "step": 41375 }, { "epoch": 0.72, "learning_rate": 0.00011342203068209307, "loss": 3.1413, "step": 41380 }, { "epoch": 0.72, "learning_rate": 0.00011335761794516771, "loss": 3.1366, "step": 41385 }, { "epoch": 0.72, "learning_rate": 0.00011329321924279318, "loss": 3.1669, "step": 41390 }, { "epoch": 0.72, "learning_rate": 0.00011322883457981149, "loss": 3.1667, "step": 41395 }, { "epoch": 0.72, "learning_rate": 0.00011316446396106438, "loss": 3.1504, "step": 41400 }, { "epoch": 0.72, "learning_rate": 0.00011310010739139204, "loss": 3.2718, "step": 41405 }, { "epoch": 0.72, "learning_rate": 0.00011303576487563366, "loss": 3.1071, "step": 41410 }, { "epoch": 0.72, "learning_rate": 0.00011297143641862781, "loss": 3.2227, "step": 41415 }, { "epoch": 0.72, "learning_rate": 0.00011290712202521133, "loss": 3.2243, "step": 41420 }, { "epoch": 0.72, "learning_rate": 0.00011284282170022033, "loss": 3.185, "step": 41425 }, { "epoch": 0.72, "learning_rate": 0.0001127785354484901, "loss": 3.1816, "step": 41430 }, { "epoch": 0.72, "learning_rate": 0.00011271426327485448, "loss": 3.204, "step": 41435 }, { "epoch": 0.72, "learning_rate": 0.00011265000518414633, "loss": 3.1829, "step": 41440 }, { "epoch": 0.72, "learning_rate": 0.00011258576118119786, "loss": 3.0784, "step": 41445 }, { "epoch": 0.72, "learning_rate": 0.00011252153127083944, "loss": 3.0765, "step": 41450 }, { "epoch": 0.72, "learning_rate": 0.00011245731545790121, "loss": 3.0026, "step": 41455 }, { "epoch": 0.72, "learning_rate": 0.00011239311374721167, "loss": 3.1984, "step": 41460 }, { "epoch": 0.72, "learning_rate": 0.0001123289261435984, "loss": 3.1799, "step": 41465 }, { "epoch": 0.72, "learning_rate": 0.00011226475265188832, "loss": 3.2619, "step": 41470 }, { "epoch": 0.72, "learning_rate": 0.00011220059327690649, "loss": 3.1734, "step": 41475 }, { "epoch": 0.72, "learning_rate": 0.00011213644802347769, "loss": 3.1921, "step": 41480 }, { "epoch": 0.72, "learning_rate": 0.00011207231689642522, "loss": 3.1058, "step": 41485 }, { "epoch": 0.72, "learning_rate": 0.00011200819990057129, "loss": 3.0331, "step": 41490 }, { "epoch": 0.72, "learning_rate": 0.00011194409704073752, "loss": 3.0883, "step": 41495 }, { "epoch": 0.72, "learning_rate": 0.00011188000832174376, "loss": 3.1265, "step": 41500 }, { "epoch": 0.72, "eval_loss": 3.207731008529663, "eval_runtime": 149.6797, "eval_samples_per_second": 12.3, "eval_steps_per_second": 0.775, "step": 41500 }, { "epoch": 0.72, "learning_rate": 0.00011181593374840916, "loss": 3.0709, "step": 41505 }, { "epoch": 0.72, "learning_rate": 0.00011175187332555206, "loss": 3.2107, "step": 41510 }, { "epoch": 0.72, "learning_rate": 0.00011168782705798929, "loss": 3.1451, "step": 41515 }, { "epoch": 0.72, "learning_rate": 0.00011162379495053683, "loss": 3.118, "step": 41520 }, { "epoch": 0.72, "learning_rate": 0.00011155977700800959, "loss": 3.0218, "step": 41525 }, { "epoch": 0.72, "learning_rate": 0.00011149577323522128, "loss": 3.0796, "step": 41530 }, { "epoch": 0.72, "learning_rate": 0.00011143178363698489, "loss": 3.2099, "step": 41535 }, { "epoch": 0.72, "learning_rate": 0.00011136780821811194, "loss": 3.0831, "step": 41540 }, { "epoch": 0.72, "learning_rate": 0.00011130384698341308, "loss": 3.2227, "step": 41545 }, { "epoch": 0.72, "learning_rate": 0.00011123989993769789, "loss": 3.151, "step": 41550 }, { "epoch": 0.72, "learning_rate": 0.00011117596708577473, "loss": 3.1856, "step": 41555 }, { "epoch": 0.72, "learning_rate": 0.00011111204843245124, "loss": 3.086, "step": 41560 }, { "epoch": 0.72, "learning_rate": 0.00011104814398253367, "loss": 3.2135, "step": 41565 }, { "epoch": 0.72, "learning_rate": 0.00011098425374082733, "loss": 3.1735, "step": 41570 }, { "epoch": 0.72, "learning_rate": 0.0001109203777121364, "loss": 3.2179, "step": 41575 }, { "epoch": 0.72, "learning_rate": 0.00011085651590126405, "loss": 3.2251, "step": 41580 }, { "epoch": 0.72, "learning_rate": 0.00011079266831301225, "loss": 3.1628, "step": 41585 }, { "epoch": 0.72, "learning_rate": 0.00011072883495218224, "loss": 3.2514, "step": 41590 }, { "epoch": 0.72, "learning_rate": 0.00011066501582357384, "loss": 3.2295, "step": 41595 }, { "epoch": 0.72, "learning_rate": 0.00011060121093198591, "loss": 3.2016, "step": 41600 }, { "epoch": 0.72, "learning_rate": 0.00011053742028221629, "loss": 3.1608, "step": 41605 }, { "epoch": 0.72, "learning_rate": 0.00011047364387906157, "loss": 3.1431, "step": 41610 }, { "epoch": 0.72, "learning_rate": 0.0001104098817273176, "loss": 3.1741, "step": 41615 }, { "epoch": 0.72, "learning_rate": 0.00011034613383177893, "loss": 3.2561, "step": 41620 }, { "epoch": 0.72, "learning_rate": 0.000110282400197239, "loss": 3.1301, "step": 41625 }, { "epoch": 0.72, "learning_rate": 0.00011021868082849031, "loss": 3.1035, "step": 41630 }, { "epoch": 0.72, "learning_rate": 0.00011015497573032411, "loss": 3.2929, "step": 41635 }, { "epoch": 0.72, "learning_rate": 0.00011009128490753086, "loss": 3.233, "step": 41640 }, { "epoch": 0.72, "learning_rate": 0.00011002760836489986, "loss": 3.0038, "step": 41645 }, { "epoch": 0.72, "learning_rate": 0.00010996394610721889, "loss": 3.2885, "step": 41650 }, { "epoch": 0.72, "learning_rate": 0.00010990029813927538, "loss": 3.1889, "step": 41655 }, { "epoch": 0.72, "learning_rate": 0.00010983666446585519, "loss": 3.2143, "step": 41660 }, { "epoch": 0.72, "learning_rate": 0.00010977304509174315, "loss": 3.1514, "step": 41665 }, { "epoch": 0.72, "learning_rate": 0.00010970944002172335, "loss": 3.1891, "step": 41670 }, { "epoch": 0.72, "learning_rate": 0.00010964584926057843, "loss": 3.1459, "step": 41675 }, { "epoch": 0.72, "learning_rate": 0.00010958227281309008, "loss": 3.069, "step": 41680 }, { "epoch": 0.72, "learning_rate": 0.00010951871068403893, "loss": 3.3585, "step": 41685 }, { "epoch": 0.72, "learning_rate": 0.00010945516287820446, "loss": 3.2503, "step": 41690 }, { "epoch": 0.72, "learning_rate": 0.0001093916294003653, "loss": 3.1437, "step": 41695 }, { "epoch": 0.72, "learning_rate": 0.00010932811025529875, "loss": 3.0947, "step": 41700 }, { "epoch": 0.72, "learning_rate": 0.00010926460544778115, "loss": 3.2289, "step": 41705 }, { "epoch": 0.72, "learning_rate": 0.00010920111498258771, "loss": 3.1003, "step": 41710 }, { "epoch": 0.72, "learning_rate": 0.00010913763886449249, "loss": 3.1294, "step": 41715 }, { "epoch": 0.72, "learning_rate": 0.00010907417709826878, "loss": 3.126, "step": 41720 }, { "epoch": 0.72, "learning_rate": 0.00010901072968868858, "loss": 3.0671, "step": 41725 }, { "epoch": 0.72, "learning_rate": 0.00010894729664052247, "loss": 3.2908, "step": 41730 }, { "epoch": 0.72, "learning_rate": 0.00010888387795854067, "loss": 3.1179, "step": 41735 }, { "epoch": 0.72, "learning_rate": 0.00010882047364751177, "loss": 3.1891, "step": 41740 }, { "epoch": 0.72, "learning_rate": 0.00010875708371220335, "loss": 3.193, "step": 41745 }, { "epoch": 0.72, "learning_rate": 0.00010869370815738237, "loss": 3.2572, "step": 41750 }, { "epoch": 0.72, "learning_rate": 0.00010863034698781387, "loss": 3.1651, "step": 41755 }, { "epoch": 0.72, "learning_rate": 0.00010856700020826264, "loss": 3.1652, "step": 41760 }, { "epoch": 0.72, "learning_rate": 0.00010850366782349193, "loss": 3.1472, "step": 41765 }, { "epoch": 0.72, "learning_rate": 0.00010844034983826389, "loss": 3.2266, "step": 41770 }, { "epoch": 0.72, "learning_rate": 0.00010837704625734002, "loss": 3.2599, "step": 41775 }, { "epoch": 0.72, "learning_rate": 0.00010831375708547998, "loss": 3.2315, "step": 41780 }, { "epoch": 0.72, "learning_rate": 0.00010825048232744318, "loss": 3.2559, "step": 41785 }, { "epoch": 0.72, "learning_rate": 0.00010818722198798737, "loss": 3.1243, "step": 41790 }, { "epoch": 0.72, "learning_rate": 0.00010812397607186936, "loss": 3.1431, "step": 41795 }, { "epoch": 0.72, "learning_rate": 0.00010806074458384519, "loss": 3.2221, "step": 41800 }, { "epoch": 0.72, "learning_rate": 0.00010799752752866928, "loss": 3.2885, "step": 41805 }, { "epoch": 0.72, "learning_rate": 0.00010793432491109518, "loss": 3.2099, "step": 41810 }, { "epoch": 0.72, "learning_rate": 0.00010787113673587563, "loss": 3.2519, "step": 41815 }, { "epoch": 0.72, "learning_rate": 0.00010780796300776196, "loss": 3.1942, "step": 41820 }, { "epoch": 0.72, "learning_rate": 0.00010774480373150449, "loss": 3.0997, "step": 41825 }, { "epoch": 0.72, "learning_rate": 0.0001076816589118525, "loss": 3.1281, "step": 41830 }, { "epoch": 0.72, "learning_rate": 0.00010761852855355405, "loss": 3.1984, "step": 41835 }, { "epoch": 0.72, "learning_rate": 0.00010755541266135639, "loss": 3.1747, "step": 41840 }, { "epoch": 0.72, "learning_rate": 0.00010749231124000543, "loss": 3.116, "step": 41845 }, { "epoch": 0.72, "learning_rate": 0.00010742922429424612, "loss": 3.093, "step": 41850 }, { "epoch": 0.72, "learning_rate": 0.00010736615182882218, "loss": 3.2103, "step": 41855 }, { "epoch": 0.72, "learning_rate": 0.00010730309384847643, "loss": 3.2, "step": 41860 }, { "epoch": 0.72, "learning_rate": 0.00010724005035795035, "loss": 3.0806, "step": 41865 }, { "epoch": 0.72, "learning_rate": 0.00010717702136198471, "loss": 3.2047, "step": 41870 }, { "epoch": 0.72, "learning_rate": 0.00010711400686531885, "loss": 3.1632, "step": 41875 }, { "epoch": 0.72, "learning_rate": 0.00010705100687269118, "loss": 3.0442, "step": 41880 }, { "epoch": 0.72, "learning_rate": 0.00010698802138883891, "loss": 3.2539, "step": 41885 }, { "epoch": 0.72, "learning_rate": 0.00010692505041849818, "loss": 3.1052, "step": 41890 }, { "epoch": 0.73, "learning_rate": 0.00010686209396640425, "loss": 3.2667, "step": 41895 }, { "epoch": 0.73, "learning_rate": 0.00010679915203729107, "loss": 3.2379, "step": 41900 }, { "epoch": 0.73, "learning_rate": 0.00010673622463589149, "loss": 3.2051, "step": 41905 }, { "epoch": 0.73, "learning_rate": 0.00010667331176693735, "loss": 3.1852, "step": 41910 }, { "epoch": 0.73, "learning_rate": 0.00010661041343515931, "loss": 3.1764, "step": 41915 }, { "epoch": 0.73, "learning_rate": 0.00010654752964528715, "loss": 3.191, "step": 41920 }, { "epoch": 0.73, "learning_rate": 0.00010648466040204932, "loss": 3.2177, "step": 41925 }, { "epoch": 0.73, "learning_rate": 0.00010642180571017332, "loss": 3.176, "step": 41930 }, { "epoch": 0.73, "learning_rate": 0.00010635896557438544, "loss": 3.1663, "step": 41935 }, { "epoch": 0.73, "learning_rate": 0.00010629613999941095, "loss": 3.1828, "step": 41940 }, { "epoch": 0.73, "learning_rate": 0.00010623332898997389, "loss": 3.246, "step": 41945 }, { "epoch": 0.73, "learning_rate": 0.00010617053255079758, "loss": 3.1608, "step": 41950 }, { "epoch": 0.73, "learning_rate": 0.00010610775068660385, "loss": 3.1494, "step": 41955 }, { "epoch": 0.73, "learning_rate": 0.00010604498340211361, "loss": 3.173, "step": 41960 }, { "epoch": 0.73, "learning_rate": 0.0001059822307020466, "loss": 3.2291, "step": 41965 }, { "epoch": 0.73, "learning_rate": 0.0001059194925911214, "loss": 3.1392, "step": 41970 }, { "epoch": 0.73, "learning_rate": 0.00010585676907405583, "loss": 3.1625, "step": 41975 }, { "epoch": 0.73, "learning_rate": 0.00010579406015556625, "loss": 3.271, "step": 41980 }, { "epoch": 0.73, "learning_rate": 0.00010573136584036807, "loss": 3.1903, "step": 41985 }, { "epoch": 0.73, "learning_rate": 0.00010566868613317558, "loss": 3.1708, "step": 41990 }, { "epoch": 0.73, "learning_rate": 0.0001056060210387018, "loss": 3.1984, "step": 41995 }, { "epoch": 0.73, "learning_rate": 0.00010554337056165913, "loss": 3.1468, "step": 42000 }, { "epoch": 0.73, "eval_loss": 3.204759359359741, "eval_runtime": 149.9776, "eval_samples_per_second": 12.275, "eval_steps_per_second": 0.773, "step": 42000 }, { "epoch": 0.73, "learning_rate": 0.00010548073470675853, "loss": 3.0838, "step": 42005 }, { "epoch": 0.73, "learning_rate": 0.00010541811347870956, "loss": 3.1097, "step": 42010 }, { "epoch": 0.73, "learning_rate": 0.00010535550688222135, "loss": 3.0943, "step": 42015 }, { "epoch": 0.73, "learning_rate": 0.00010529291492200146, "loss": 3.212, "step": 42020 }, { "epoch": 0.73, "learning_rate": 0.00010523033760275644, "loss": 3.1342, "step": 42025 }, { "epoch": 0.73, "learning_rate": 0.00010516777492919206, "loss": 3.1234, "step": 42030 }, { "epoch": 0.73, "learning_rate": 0.00010510522690601228, "loss": 3.09, "step": 42035 }, { "epoch": 0.73, "learning_rate": 0.00010504269353792073, "loss": 3.1094, "step": 42040 }, { "epoch": 0.73, "learning_rate": 0.00010498017482961946, "loss": 3.1887, "step": 42045 }, { "epoch": 0.73, "learning_rate": 0.00010491767078580953, "loss": 3.1215, "step": 42050 }, { "epoch": 0.73, "learning_rate": 0.00010485518141119113, "loss": 3.1981, "step": 42055 }, { "epoch": 0.73, "learning_rate": 0.00010479270671046284, "loss": 3.1392, "step": 42060 }, { "epoch": 0.73, "learning_rate": 0.00010473024668832265, "loss": 3.1256, "step": 42065 }, { "epoch": 0.73, "learning_rate": 0.00010466780134946715, "loss": 3.1203, "step": 42070 }, { "epoch": 0.73, "learning_rate": 0.00010460537069859185, "loss": 3.0632, "step": 42075 }, { "epoch": 0.73, "learning_rate": 0.0001045429547403915, "loss": 3.1372, "step": 42080 }, { "epoch": 0.73, "learning_rate": 0.00010448055347955913, "loss": 3.2098, "step": 42085 }, { "epoch": 0.73, "learning_rate": 0.00010441816692078702, "loss": 3.1605, "step": 42090 }, { "epoch": 0.73, "learning_rate": 0.0001043557950687665, "loss": 3.1536, "step": 42095 }, { "epoch": 0.73, "learning_rate": 0.0001042934379281876, "loss": 3.2327, "step": 42100 }, { "epoch": 0.73, "learning_rate": 0.00010423109550373913, "loss": 3.122, "step": 42105 }, { "epoch": 0.73, "learning_rate": 0.00010416876780010898, "loss": 3.1947, "step": 42110 }, { "epoch": 0.73, "learning_rate": 0.0001041064548219838, "loss": 3.2346, "step": 42115 }, { "epoch": 0.73, "learning_rate": 0.00010404415657404935, "loss": 3.2128, "step": 42120 }, { "epoch": 0.73, "learning_rate": 0.00010398187306099007, "loss": 3.2819, "step": 42125 }, { "epoch": 0.73, "learning_rate": 0.00010391960428748938, "loss": 3.1558, "step": 42130 }, { "epoch": 0.73, "learning_rate": 0.00010385735025822954, "loss": 3.1389, "step": 42135 }, { "epoch": 0.73, "learning_rate": 0.00010379511097789162, "loss": 3.2378, "step": 42140 }, { "epoch": 0.73, "learning_rate": 0.00010373288645115594, "loss": 3.1614, "step": 42145 }, { "epoch": 0.73, "learning_rate": 0.00010367067668270137, "loss": 3.106, "step": 42150 }, { "epoch": 0.73, "learning_rate": 0.00010360848167720561, "loss": 3.1168, "step": 42155 }, { "epoch": 0.73, "learning_rate": 0.00010354630143934578, "loss": 3.2302, "step": 42160 }, { "epoch": 0.73, "learning_rate": 0.00010348413597379714, "loss": 3.1316, "step": 42165 }, { "epoch": 0.73, "learning_rate": 0.00010342198528523423, "loss": 3.2556, "step": 42170 }, { "epoch": 0.73, "learning_rate": 0.00010335984937833069, "loss": 3.1538, "step": 42175 }, { "epoch": 0.73, "learning_rate": 0.00010329772825775871, "loss": 3.0533, "step": 42180 }, { "epoch": 0.73, "learning_rate": 0.0001032356219281895, "loss": 3.0731, "step": 42185 }, { "epoch": 0.73, "learning_rate": 0.00010317353039429312, "loss": 3.1566, "step": 42190 }, { "epoch": 0.73, "learning_rate": 0.00010311145366073845, "loss": 3.0046, "step": 42195 }, { "epoch": 0.73, "learning_rate": 0.00010304939173219347, "loss": 3.1053, "step": 42200 }, { "epoch": 0.73, "learning_rate": 0.00010298734461332493, "loss": 3.1393, "step": 42205 }, { "epoch": 0.73, "learning_rate": 0.00010292531230879836, "loss": 3.1764, "step": 42210 }, { "epoch": 0.73, "learning_rate": 0.00010286329482327833, "loss": 3.2309, "step": 42215 }, { "epoch": 0.73, "learning_rate": 0.00010280129216142815, "loss": 3.2166, "step": 42220 }, { "epoch": 0.73, "learning_rate": 0.00010273930432791024, "loss": 3.0617, "step": 42225 }, { "epoch": 0.73, "learning_rate": 0.00010267733132738571, "loss": 3.2347, "step": 42230 }, { "epoch": 0.73, "learning_rate": 0.00010261537316451457, "loss": 3.1509, "step": 42235 }, { "epoch": 0.73, "learning_rate": 0.00010255342984395583, "loss": 3.1592, "step": 42240 }, { "epoch": 0.73, "learning_rate": 0.00010249150137036725, "loss": 3.2813, "step": 42245 }, { "epoch": 0.73, "learning_rate": 0.00010242958774840544, "loss": 3.0992, "step": 42250 }, { "epoch": 0.73, "learning_rate": 0.00010236768898272622, "loss": 3.1728, "step": 42255 }, { "epoch": 0.73, "learning_rate": 0.00010230580507798392, "loss": 3.1918, "step": 42260 }, { "epoch": 0.73, "learning_rate": 0.00010224393603883195, "loss": 3.1306, "step": 42265 }, { "epoch": 0.73, "learning_rate": 0.00010218208186992248, "loss": 3.1408, "step": 42270 }, { "epoch": 0.73, "learning_rate": 0.00010212024257590652, "loss": 3.1967, "step": 42275 }, { "epoch": 0.73, "learning_rate": 0.00010205841816143437, "loss": 3.2242, "step": 42280 }, { "epoch": 0.73, "learning_rate": 0.0001019966086311547, "loss": 3.2118, "step": 42285 }, { "epoch": 0.73, "learning_rate": 0.00010193481398971532, "loss": 3.195, "step": 42290 }, { "epoch": 0.73, "learning_rate": 0.00010187303424176285, "loss": 3.2024, "step": 42295 }, { "epoch": 0.73, "learning_rate": 0.00010181126939194269, "loss": 3.2437, "step": 42300 }, { "epoch": 0.73, "learning_rate": 0.00010174951944489953, "loss": 3.1947, "step": 42305 }, { "epoch": 0.73, "learning_rate": 0.00010168778440527656, "loss": 3.2454, "step": 42310 }, { "epoch": 0.73, "learning_rate": 0.00010162606427771568, "loss": 3.2738, "step": 42315 }, { "epoch": 0.73, "learning_rate": 0.0001015643590668582, "loss": 3.1613, "step": 42320 }, { "epoch": 0.73, "learning_rate": 0.00010150266877734395, "loss": 3.117, "step": 42325 }, { "epoch": 0.73, "learning_rate": 0.00010144099341381165, "loss": 3.143, "step": 42330 }, { "epoch": 0.73, "learning_rate": 0.00010137933298089923, "loss": 3.2231, "step": 42335 }, { "epoch": 0.73, "learning_rate": 0.00010131768748324284, "loss": 3.1593, "step": 42340 }, { "epoch": 0.73, "learning_rate": 0.00010125605692547824, "loss": 3.1333, "step": 42345 }, { "epoch": 0.73, "learning_rate": 0.00010119444131223962, "loss": 3.1118, "step": 42350 }, { "epoch": 0.73, "learning_rate": 0.00010113284064816004, "loss": 3.1835, "step": 42355 }, { "epoch": 0.73, "learning_rate": 0.00010107125493787187, "loss": 3.2863, "step": 42360 }, { "epoch": 0.73, "learning_rate": 0.00010100968418600574, "loss": 3.1489, "step": 42365 }, { "epoch": 0.73, "learning_rate": 0.00010094812839719143, "loss": 3.1761, "step": 42370 }, { "epoch": 0.73, "learning_rate": 0.00010088658757605786, "loss": 3.0487, "step": 42375 }, { "epoch": 0.73, "learning_rate": 0.00010082506172723244, "loss": 3.0552, "step": 42380 }, { "epoch": 0.73, "learning_rate": 0.00010076355085534155, "loss": 3.1523, "step": 42385 }, { "epoch": 0.73, "learning_rate": 0.00010070205496501075, "loss": 3.1273, "step": 42390 }, { "epoch": 0.73, "learning_rate": 0.00010064057406086381, "loss": 3.1326, "step": 42395 }, { "epoch": 0.73, "learning_rate": 0.00010057910814752414, "loss": 3.0923, "step": 42400 }, { "epoch": 0.73, "learning_rate": 0.00010051765722961349, "loss": 3.09, "step": 42405 }, { "epoch": 0.73, "learning_rate": 0.00010045622131175262, "loss": 3.2578, "step": 42410 }, { "epoch": 0.73, "learning_rate": 0.00010039480039856145, "loss": 3.0941, "step": 42415 }, { "epoch": 0.73, "learning_rate": 0.0001003333944946581, "loss": 3.2674, "step": 42420 }, { "epoch": 0.73, "learning_rate": 0.00010027200360466032, "loss": 3.1428, "step": 42425 }, { "epoch": 0.73, "learning_rate": 0.00010021062773318431, "loss": 3.2387, "step": 42430 }, { "epoch": 0.73, "learning_rate": 0.00010014926688484507, "loss": 3.0526, "step": 42435 }, { "epoch": 0.73, "learning_rate": 0.00010008792106425693, "loss": 3.151, "step": 42440 }, { "epoch": 0.73, "learning_rate": 0.00010002659027603253, "loss": 3.113, "step": 42445 }, { "epoch": 0.73, "learning_rate": 9.996527452478354e-05, "loss": 3.2201, "step": 42450 }, { "epoch": 0.73, "learning_rate": 9.990397381512088e-05, "loss": 3.1862, "step": 42455 }, { "epoch": 0.73, "learning_rate": 9.984268815165389e-05, "loss": 3.2709, "step": 42460 }, { "epoch": 0.73, "learning_rate": 9.978141753899098e-05, "loss": 3.2524, "step": 42465 }, { "epoch": 0.73, "learning_rate": 9.972016198173938e-05, "loss": 3.12, "step": 42470 }, { "epoch": 0.74, "learning_rate": 9.965892148450506e-05, "loss": 3.2258, "step": 42475 }, { "epoch": 0.74, "learning_rate": 9.959769605189322e-05, "loss": 3.163, "step": 42480 }, { "epoch": 0.74, "learning_rate": 9.953648568850758e-05, "loss": 3.2217, "step": 42485 }, { "epoch": 0.74, "learning_rate": 9.947529039895088e-05, "loss": 3.1551, "step": 42490 }, { "epoch": 0.74, "learning_rate": 9.941411018782465e-05, "loss": 3.0359, "step": 42495 }, { "epoch": 0.74, "learning_rate": 9.935294505972927e-05, "loss": 3.2666, "step": 42500 }, { "epoch": 0.74, "eval_loss": 3.201066255569458, "eval_runtime": 149.9729, "eval_samples_per_second": 12.276, "eval_steps_per_second": 0.773, "step": 42500 }, { "epoch": 0.74, "learning_rate": 9.929179501926418e-05, "loss": 3.1589, "step": 42505 }, { "epoch": 0.74, "learning_rate": 9.923066007102752e-05, "loss": 3.2029, "step": 42510 }, { "epoch": 0.74, "learning_rate": 9.91695402196163e-05, "loss": 3.2643, "step": 42515 }, { "epoch": 0.74, "learning_rate": 9.910843546962638e-05, "loss": 3.1305, "step": 42520 }, { "epoch": 0.74, "learning_rate": 9.904734582565261e-05, "loss": 3.1963, "step": 42525 }, { "epoch": 0.74, "learning_rate": 9.898627129228842e-05, "loss": 3.2523, "step": 42530 }, { "epoch": 0.74, "learning_rate": 9.89252118741266e-05, "loss": 3.2724, "step": 42535 }, { "epoch": 0.74, "learning_rate": 9.886416757575832e-05, "loss": 3.1106, "step": 42540 }, { "epoch": 0.74, "learning_rate": 9.880313840177383e-05, "loss": 3.0961, "step": 42545 }, { "epoch": 0.74, "learning_rate": 9.874212435676224e-05, "loss": 3.1022, "step": 42550 }, { "epoch": 0.74, "learning_rate": 9.868112544531136e-05, "loss": 3.2171, "step": 42555 }, { "epoch": 0.74, "learning_rate": 9.862014167200818e-05, "loss": 3.1618, "step": 42560 }, { "epoch": 0.74, "learning_rate": 9.855917304143833e-05, "loss": 3.1864, "step": 42565 }, { "epoch": 0.74, "learning_rate": 9.849821955818625e-05, "loss": 3.1131, "step": 42570 }, { "epoch": 0.74, "learning_rate": 9.843728122683545e-05, "loss": 3.276, "step": 42575 }, { "epoch": 0.74, "learning_rate": 9.837635805196792e-05, "loss": 3.1902, "step": 42580 }, { "epoch": 0.74, "learning_rate": 9.831545003816512e-05, "loss": 3.0883, "step": 42585 }, { "epoch": 0.74, "learning_rate": 9.825455719000695e-05, "loss": 3.1129, "step": 42590 }, { "epoch": 0.74, "learning_rate": 9.819367951207196e-05, "loss": 3.077, "step": 42595 }, { "epoch": 0.74, "learning_rate": 9.813281700893813e-05, "loss": 3.1294, "step": 42600 }, { "epoch": 0.74, "learning_rate": 9.807196968518187e-05, "loss": 3.2189, "step": 42605 }, { "epoch": 0.74, "learning_rate": 9.801113754537855e-05, "loss": 3.2169, "step": 42610 }, { "epoch": 0.74, "learning_rate": 9.795032059410263e-05, "loss": 3.2404, "step": 42615 }, { "epoch": 0.74, "learning_rate": 9.788951883592709e-05, "loss": 3.1659, "step": 42620 }, { "epoch": 0.74, "learning_rate": 9.782873227542393e-05, "loss": 3.0816, "step": 42625 }, { "epoch": 0.74, "learning_rate": 9.7767960917164e-05, "loss": 3.1314, "step": 42630 }, { "epoch": 0.74, "learning_rate": 9.770720476571688e-05, "loss": 3.2118, "step": 42635 }, { "epoch": 0.74, "learning_rate": 9.764646382565133e-05, "loss": 3.1068, "step": 42640 }, { "epoch": 0.74, "learning_rate": 9.758573810153467e-05, "loss": 3.1281, "step": 42645 }, { "epoch": 0.74, "learning_rate": 9.752502759793312e-05, "loss": 3.1041, "step": 42650 }, { "epoch": 0.74, "learning_rate": 9.746433231941186e-05, "loss": 3.3104, "step": 42655 }, { "epoch": 0.74, "learning_rate": 9.740365227053469e-05, "loss": 3.2378, "step": 42660 }, { "epoch": 0.74, "learning_rate": 9.734298745586472e-05, "loss": 3.2226, "step": 42665 }, { "epoch": 0.74, "learning_rate": 9.728233787996356e-05, "loss": 3.1264, "step": 42670 }, { "epoch": 0.74, "learning_rate": 9.72217035473915e-05, "loss": 3.165, "step": 42675 }, { "epoch": 0.74, "learning_rate": 9.716108446270822e-05, "loss": 3.1995, "step": 42680 }, { "epoch": 0.74, "learning_rate": 9.710048063047184e-05, "loss": 3.187, "step": 42685 }, { "epoch": 0.74, "learning_rate": 9.703989205523936e-05, "loss": 3.1576, "step": 42690 }, { "epoch": 0.74, "learning_rate": 9.697931874156707e-05, "loss": 3.2503, "step": 42695 }, { "epoch": 0.74, "learning_rate": 9.691876069400931e-05, "loss": 3.1001, "step": 42700 }, { "epoch": 0.74, "learning_rate": 9.685821791712011e-05, "loss": 3.2139, "step": 42705 }, { "epoch": 0.74, "learning_rate": 9.679769041545181e-05, "loss": 3.0835, "step": 42710 }, { "epoch": 0.74, "learning_rate": 9.673717819355571e-05, "loss": 3.2361, "step": 42715 }, { "epoch": 0.74, "learning_rate": 9.66766812559823e-05, "loss": 3.1695, "step": 42720 }, { "epoch": 0.74, "learning_rate": 9.661619960728026e-05, "loss": 3.1172, "step": 42725 }, { "epoch": 0.74, "learning_rate": 9.655573325199778e-05, "loss": 3.2464, "step": 42730 }, { "epoch": 0.74, "learning_rate": 9.649528219468151e-05, "loss": 3.22, "step": 42735 }, { "epoch": 0.74, "learning_rate": 9.643484643987698e-05, "loss": 3.144, "step": 42740 }, { "epoch": 0.74, "learning_rate": 9.637442599212894e-05, "loss": 3.16, "step": 42745 }, { "epoch": 0.74, "learning_rate": 9.631402085598038e-05, "loss": 3.1275, "step": 42750 }, { "epoch": 0.74, "learning_rate": 9.625363103597352e-05, "loss": 3.2621, "step": 42755 }, { "epoch": 0.74, "learning_rate": 9.61932565366495e-05, "loss": 3.1393, "step": 42760 }, { "epoch": 0.74, "learning_rate": 9.613289736254807e-05, "loss": 3.1324, "step": 42765 }, { "epoch": 0.74, "learning_rate": 9.607255351820802e-05, "loss": 2.9853, "step": 42770 }, { "epoch": 0.74, "learning_rate": 9.601222500816679e-05, "loss": 3.1869, "step": 42775 }, { "epoch": 0.74, "learning_rate": 9.595191183696073e-05, "loss": 3.0711, "step": 42780 }, { "epoch": 0.74, "learning_rate": 9.589161400912524e-05, "loss": 3.069, "step": 42785 }, { "epoch": 0.74, "learning_rate": 9.583133152919437e-05, "loss": 3.1541, "step": 42790 }, { "epoch": 0.74, "learning_rate": 9.577106440170101e-05, "loss": 3.1853, "step": 42795 }, { "epoch": 0.74, "learning_rate": 9.571081263117695e-05, "loss": 3.1427, "step": 42800 }, { "epoch": 0.74, "learning_rate": 9.565057622215274e-05, "loss": 3.0839, "step": 42805 }, { "epoch": 0.74, "learning_rate": 9.5590355179158e-05, "loss": 3.1947, "step": 42810 }, { "epoch": 0.74, "learning_rate": 9.553014950672097e-05, "loss": 3.1567, "step": 42815 }, { "epoch": 0.74, "learning_rate": 9.54699592093688e-05, "loss": 3.2465, "step": 42820 }, { "epoch": 0.74, "learning_rate": 9.540978429162751e-05, "loss": 3.1557, "step": 42825 }, { "epoch": 0.74, "learning_rate": 9.534962475802193e-05, "loss": 3.0594, "step": 42830 }, { "epoch": 0.74, "learning_rate": 9.528948061307565e-05, "loss": 3.0756, "step": 42835 }, { "epoch": 0.74, "learning_rate": 9.522935186131141e-05, "loss": 3.2361, "step": 42840 }, { "epoch": 0.74, "learning_rate": 9.516923850725044e-05, "loss": 3.1698, "step": 42845 }, { "epoch": 0.74, "learning_rate": 9.510914055541304e-05, "loss": 3.2094, "step": 42850 }, { "epoch": 0.74, "learning_rate": 9.504905801031819e-05, "loss": 3.1183, "step": 42855 }, { "epoch": 0.74, "learning_rate": 9.498899087648373e-05, "loss": 3.207, "step": 42860 }, { "epoch": 0.74, "learning_rate": 9.492893915842661e-05, "loss": 3.1721, "step": 42865 }, { "epoch": 0.74, "learning_rate": 9.486890286066229e-05, "loss": 3.2029, "step": 42870 }, { "epoch": 0.74, "learning_rate": 9.480888198770516e-05, "loss": 3.1417, "step": 42875 }, { "epoch": 0.74, "learning_rate": 9.474887654406857e-05, "loss": 3.1568, "step": 42880 }, { "epoch": 0.74, "learning_rate": 9.468888653426444e-05, "loss": 3.1464, "step": 42885 }, { "epoch": 0.74, "learning_rate": 9.462891196280393e-05, "loss": 3.179, "step": 42890 }, { "epoch": 0.74, "learning_rate": 9.456895283419674e-05, "loss": 3.1786, "step": 42895 }, { "epoch": 0.74, "learning_rate": 9.450900915295147e-05, "loss": 3.0743, "step": 42900 }, { "epoch": 0.74, "learning_rate": 9.444908092357558e-05, "loss": 3.1999, "step": 42905 }, { "epoch": 0.74, "learning_rate": 9.438916815057541e-05, "loss": 3.1467, "step": 42910 }, { "epoch": 0.74, "learning_rate": 9.432927083845592e-05, "loss": 3.201, "step": 42915 }, { "epoch": 0.74, "learning_rate": 9.426938899172132e-05, "loss": 3.2788, "step": 42920 }, { "epoch": 0.74, "learning_rate": 9.420952261487434e-05, "loss": 3.2115, "step": 42925 }, { "epoch": 0.74, "learning_rate": 9.41496717124166e-05, "loss": 3.1825, "step": 42930 }, { "epoch": 0.74, "learning_rate": 9.408983628884856e-05, "loss": 3.1825, "step": 42935 }, { "epoch": 0.74, "learning_rate": 9.403001634866948e-05, "loss": 3.0352, "step": 42940 }, { "epoch": 0.74, "learning_rate": 9.397021189637765e-05, "loss": 3.0817, "step": 42945 }, { "epoch": 0.74, "learning_rate": 9.391042293647012e-05, "loss": 3.1235, "step": 42950 }, { "epoch": 0.74, "learning_rate": 9.385064947344241e-05, "loss": 3.1679, "step": 42955 }, { "epoch": 0.74, "learning_rate": 9.379089151178945e-05, "loss": 3.1045, "step": 42960 }, { "epoch": 0.74, "learning_rate": 9.373114905600464e-05, "loss": 3.146, "step": 42965 }, { "epoch": 0.74, "learning_rate": 9.367142211058023e-05, "loss": 3.2207, "step": 42970 }, { "epoch": 0.74, "learning_rate": 9.361171068000762e-05, "loss": 3.2523, "step": 42975 }, { "epoch": 0.74, "learning_rate": 9.355201476877647e-05, "loss": 3.078, "step": 42980 }, { "epoch": 0.74, "learning_rate": 9.349233438137589e-05, "loss": 3.1585, "step": 42985 }, { "epoch": 0.74, "learning_rate": 9.343266952229341e-05, "loss": 3.171, "step": 42990 }, { "epoch": 0.74, "learning_rate": 9.337302019601545e-05, "loss": 3.2222, "step": 42995 }, { "epoch": 0.74, "learning_rate": 9.331338640702764e-05, "loss": 3.1117, "step": 43000 }, { "epoch": 0.74, "eval_loss": 3.1977977752685547, "eval_runtime": 149.6717, "eval_samples_per_second": 12.3, "eval_steps_per_second": 0.775, "step": 43000 }, { "epoch": 0.74, "learning_rate": 9.32537681598137e-05, "loss": 3.2042, "step": 43005 }, { "epoch": 0.74, "learning_rate": 9.319416545885693e-05, "loss": 3.1313, "step": 43010 }, { "epoch": 0.74, "learning_rate": 9.313457830863906e-05, "loss": 3.169, "step": 43015 }, { "epoch": 0.74, "learning_rate": 9.307500671364064e-05, "loss": 3.136, "step": 43020 }, { "epoch": 0.74, "learning_rate": 9.301545067834142e-05, "loss": 3.1929, "step": 43025 }, { "epoch": 0.74, "learning_rate": 9.295591020721944e-05, "loss": 3.1781, "step": 43030 }, { "epoch": 0.74, "learning_rate": 9.289638530475186e-05, "loss": 3.1836, "step": 43035 }, { "epoch": 0.74, "learning_rate": 9.283687597541477e-05, "loss": 3.1999, "step": 43040 }, { "epoch": 0.74, "learning_rate": 9.277738222368294e-05, "loss": 3.0913, "step": 43045 }, { "epoch": 0.75, "learning_rate": 9.271790405402996e-05, "loss": 3.2435, "step": 43050 }, { "epoch": 0.75, "learning_rate": 9.265844147092832e-05, "loss": 3.295, "step": 43055 }, { "epoch": 0.75, "learning_rate": 9.259899447884915e-05, "loss": 3.1696, "step": 43060 }, { "epoch": 0.75, "learning_rate": 9.253956308226277e-05, "loss": 3.0823, "step": 43065 }, { "epoch": 0.75, "learning_rate": 9.248014728563801e-05, "loss": 3.1608, "step": 43070 }, { "epoch": 0.75, "learning_rate": 9.242074709344258e-05, "loss": 3.1251, "step": 43075 }, { "epoch": 0.75, "learning_rate": 9.236136251014336e-05, "loss": 3.1621, "step": 43080 }, { "epoch": 0.75, "learning_rate": 9.230199354020533e-05, "loss": 3.2057, "step": 43085 }, { "epoch": 0.75, "learning_rate": 9.224264018809304e-05, "loss": 3.1949, "step": 43090 }, { "epoch": 0.75, "learning_rate": 9.218330245826948e-05, "loss": 3.1971, "step": 43095 }, { "epoch": 0.75, "learning_rate": 9.212398035519641e-05, "loss": 3.0869, "step": 43100 }, { "epoch": 0.75, "learning_rate": 9.206467388333491e-05, "loss": 3.093, "step": 43105 }, { "epoch": 0.75, "learning_rate": 9.200538304714416e-05, "loss": 3.1078, "step": 43110 }, { "epoch": 0.75, "learning_rate": 9.194610785108253e-05, "loss": 3.2347, "step": 43115 }, { "epoch": 0.75, "learning_rate": 9.188684829960746e-05, "loss": 3.1788, "step": 43120 }, { "epoch": 0.75, "learning_rate": 9.182760439717482e-05, "loss": 3.1977, "step": 43125 }, { "epoch": 0.75, "learning_rate": 9.176837614823945e-05, "loss": 3.1689, "step": 43130 }, { "epoch": 0.75, "learning_rate": 9.170916355725505e-05, "loss": 3.1488, "step": 43135 }, { "epoch": 0.75, "learning_rate": 9.164996662867398e-05, "loss": 3.1792, "step": 43140 }, { "epoch": 0.75, "learning_rate": 9.159078536694774e-05, "loss": 3.1396, "step": 43145 }, { "epoch": 0.75, "learning_rate": 9.153161977652634e-05, "loss": 3.1108, "step": 43150 }, { "epoch": 0.75, "learning_rate": 9.147246986185874e-05, "loss": 3.0777, "step": 43155 }, { "epoch": 0.75, "learning_rate": 9.141333562739275e-05, "loss": 3.2488, "step": 43160 }, { "epoch": 0.75, "learning_rate": 9.135421707757483e-05, "loss": 3.2371, "step": 43165 }, { "epoch": 0.75, "learning_rate": 9.129511421685058e-05, "loss": 3.1491, "step": 43170 }, { "epoch": 0.75, "learning_rate": 9.123602704966415e-05, "loss": 3.2448, "step": 43175 }, { "epoch": 0.75, "learning_rate": 9.117695558045858e-05, "loss": 3.1201, "step": 43180 }, { "epoch": 0.75, "learning_rate": 9.111789981367576e-05, "loss": 3.1906, "step": 43185 }, { "epoch": 0.75, "learning_rate": 9.105885975375637e-05, "loss": 3.0943, "step": 43190 }, { "epoch": 0.75, "learning_rate": 9.099983540513981e-05, "loss": 3.2098, "step": 43195 }, { "epoch": 0.75, "learning_rate": 9.094082677226462e-05, "loss": 3.2461, "step": 43200 }, { "epoch": 0.75, "learning_rate": 9.088183385956785e-05, "loss": 3.2476, "step": 43205 }, { "epoch": 0.75, "learning_rate": 9.082285667148545e-05, "loss": 3.1644, "step": 43210 }, { "epoch": 0.75, "learning_rate": 9.076389521245225e-05, "loss": 3.2083, "step": 43215 }, { "epoch": 0.75, "learning_rate": 9.070494948690165e-05, "loss": 3.2944, "step": 43220 }, { "epoch": 0.75, "learning_rate": 9.064601949926638e-05, "loss": 3.3015, "step": 43225 }, { "epoch": 0.75, "learning_rate": 9.058710525397748e-05, "loss": 3.0546, "step": 43230 }, { "epoch": 0.75, "learning_rate": 9.052820675546508e-05, "loss": 3.1223, "step": 43235 }, { "epoch": 0.75, "learning_rate": 9.046932400815798e-05, "loss": 3.1651, "step": 43240 }, { "epoch": 0.75, "learning_rate": 9.041045701648382e-05, "loss": 2.9694, "step": 43245 }, { "epoch": 0.75, "learning_rate": 9.035160578486924e-05, "loss": 3.0904, "step": 43250 }, { "epoch": 0.75, "learning_rate": 9.029277031773958e-05, "loss": 3.2837, "step": 43255 }, { "epoch": 0.75, "learning_rate": 9.023395061951864e-05, "loss": 3.1519, "step": 43260 }, { "epoch": 0.75, "learning_rate": 9.017514669462969e-05, "loss": 3.2259, "step": 43265 }, { "epoch": 0.75, "learning_rate": 9.011635854749439e-05, "loss": 3.1521, "step": 43270 }, { "epoch": 0.75, "learning_rate": 9.005758618253316e-05, "loss": 3.1211, "step": 43275 }, { "epoch": 0.75, "learning_rate": 8.999882960416572e-05, "loss": 3.2336, "step": 43280 }, { "epoch": 0.75, "learning_rate": 8.994008881680989e-05, "loss": 3.1052, "step": 43285 }, { "epoch": 0.75, "learning_rate": 8.988136382488292e-05, "loss": 3.1502, "step": 43290 }, { "epoch": 0.75, "learning_rate": 8.982265463280054e-05, "loss": 3.1397, "step": 43295 }, { "epoch": 0.75, "learning_rate": 8.976396124497731e-05, "loss": 3.2139, "step": 43300 }, { "epoch": 0.75, "learning_rate": 8.97052836658268e-05, "loss": 3.1242, "step": 43305 }, { "epoch": 0.75, "learning_rate": 8.964662189976128e-05, "loss": 3.2127, "step": 43310 }, { "epoch": 0.75, "learning_rate": 8.95879759511917e-05, "loss": 3.1406, "step": 43315 }, { "epoch": 0.75, "learning_rate": 8.9529345824528e-05, "loss": 3.1651, "step": 43320 }, { "epoch": 0.75, "learning_rate": 8.947073152417877e-05, "loss": 3.1555, "step": 43325 }, { "epoch": 0.75, "learning_rate": 8.941213305455166e-05, "loss": 3.2542, "step": 43330 }, { "epoch": 0.75, "learning_rate": 8.9353550420053e-05, "loss": 3.2307, "step": 43335 }, { "epoch": 0.75, "learning_rate": 8.929498362508762e-05, "loss": 3.239, "step": 43340 }, { "epoch": 0.75, "learning_rate": 8.92364326740597e-05, "loss": 3.096, "step": 43345 }, { "epoch": 0.75, "learning_rate": 8.91778975713719e-05, "loss": 3.2674, "step": 43350 }, { "epoch": 0.75, "learning_rate": 8.911937832142569e-05, "loss": 3.0949, "step": 43355 }, { "epoch": 0.75, "learning_rate": 8.906087492862166e-05, "loss": 3.157, "step": 43360 }, { "epoch": 0.75, "learning_rate": 8.900238739735859e-05, "loss": 3.1264, "step": 43365 }, { "epoch": 0.75, "learning_rate": 8.894391573203475e-05, "loss": 3.2266, "step": 43370 }, { "epoch": 0.75, "learning_rate": 8.888545993704678e-05, "loss": 3.1276, "step": 43375 }, { "epoch": 0.75, "learning_rate": 8.882702001679017e-05, "loss": 3.1758, "step": 43380 }, { "epoch": 0.75, "learning_rate": 8.876859597565965e-05, "loss": 3.1014, "step": 43385 }, { "epoch": 0.75, "learning_rate": 8.871018781804796e-05, "loss": 3.261, "step": 43390 }, { "epoch": 0.75, "learning_rate": 8.865179554834739e-05, "loss": 3.098, "step": 43395 }, { "epoch": 0.75, "learning_rate": 8.859341917094863e-05, "loss": 3.3551, "step": 43400 }, { "epoch": 0.75, "learning_rate": 8.853505869024127e-05, "loss": 3.2433, "step": 43405 }, { "epoch": 0.75, "learning_rate": 8.847671411061389e-05, "loss": 3.1784, "step": 43410 }, { "epoch": 0.75, "learning_rate": 8.841838543645353e-05, "loss": 3.1108, "step": 43415 }, { "epoch": 0.75, "learning_rate": 8.83600726721461e-05, "loss": 3.2022, "step": 43420 }, { "epoch": 0.75, "learning_rate": 8.830177582207669e-05, "loss": 3.0931, "step": 43425 }, { "epoch": 0.75, "learning_rate": 8.824349489062876e-05, "loss": 3.24, "step": 43430 }, { "epoch": 0.75, "learning_rate": 8.818522988218483e-05, "loss": 3.1067, "step": 43435 }, { "epoch": 0.75, "learning_rate": 8.812698080112607e-05, "loss": 3.0216, "step": 43440 }, { "epoch": 0.75, "learning_rate": 8.806874765183241e-05, "loss": 3.09, "step": 43445 }, { "epoch": 0.75, "learning_rate": 8.80105304386829e-05, "loss": 3.1525, "step": 43450 }, { "epoch": 0.75, "learning_rate": 8.795232916605506e-05, "loss": 3.2424, "step": 43455 }, { "epoch": 0.75, "learning_rate": 8.789414383832535e-05, "loss": 3.2136, "step": 43460 }, { "epoch": 0.75, "learning_rate": 8.783597445986901e-05, "loss": 3.1622, "step": 43465 }, { "epoch": 0.75, "learning_rate": 8.777782103506007e-05, "loss": 3.2465, "step": 43470 }, { "epoch": 0.75, "learning_rate": 8.771968356827123e-05, "loss": 3.1255, "step": 43475 }, { "epoch": 0.75, "learning_rate": 8.766156206387442e-05, "loss": 3.1675, "step": 43480 }, { "epoch": 0.75, "learning_rate": 8.760345652623987e-05, "loss": 3.1265, "step": 43485 }, { "epoch": 0.75, "learning_rate": 8.75453669597369e-05, "loss": 3.0942, "step": 43490 }, { "epoch": 0.75, "learning_rate": 8.748729336873353e-05, "loss": 3.1477, "step": 43495 }, { "epoch": 0.75, "learning_rate": 8.742923575759649e-05, "loss": 3.1318, "step": 43500 }, { "epoch": 0.75, "eval_loss": 3.1929843425750732, "eval_runtime": 150.078, "eval_samples_per_second": 12.267, "eval_steps_per_second": 0.773, "step": 43500 }, { "epoch": 0.75, "learning_rate": 8.737119413069158e-05, "loss": 3.221, "step": 43505 }, { "epoch": 0.75, "learning_rate": 8.73131684923832e-05, "loss": 3.0582, "step": 43510 }, { "epoch": 0.75, "learning_rate": 8.725515884703453e-05, "loss": 3.1917, "step": 43515 }, { "epoch": 0.75, "learning_rate": 8.719716519900763e-05, "loss": 3.1326, "step": 43520 }, { "epoch": 0.75, "learning_rate": 8.713918755266321e-05, "loss": 3.0365, "step": 43525 }, { "epoch": 0.75, "learning_rate": 8.708122591236109e-05, "loss": 3.181, "step": 43530 }, { "epoch": 0.75, "learning_rate": 8.702328028245956e-05, "loss": 3.2332, "step": 43535 }, { "epoch": 0.75, "learning_rate": 8.69653506673159e-05, "loss": 3.1744, "step": 43540 }, { "epoch": 0.75, "learning_rate": 8.690743707128605e-05, "loss": 3.0682, "step": 43545 }, { "epoch": 0.75, "learning_rate": 8.68495394987249e-05, "loss": 3.3009, "step": 43550 }, { "epoch": 0.75, "learning_rate": 8.679165795398587e-05, "loss": 3.126, "step": 43555 }, { "epoch": 0.75, "learning_rate": 8.67337924414216e-05, "loss": 3.2137, "step": 43560 }, { "epoch": 0.75, "learning_rate": 8.667594296538315e-05, "loss": 3.1615, "step": 43565 }, { "epoch": 0.75, "learning_rate": 8.661810953022052e-05, "loss": 3.1869, "step": 43570 }, { "epoch": 0.75, "learning_rate": 8.656029214028249e-05, "loss": 3.1582, "step": 43575 }, { "epoch": 0.75, "learning_rate": 8.650249079991654e-05, "loss": 3.0512, "step": 43580 }, { "epoch": 0.75, "learning_rate": 8.644470551346921e-05, "loss": 3.1223, "step": 43585 }, { "epoch": 0.75, "learning_rate": 8.638693628528559e-05, "loss": 3.2332, "step": 43590 }, { "epoch": 0.75, "learning_rate": 8.63291831197096e-05, "loss": 3.0745, "step": 43595 }, { "epoch": 0.75, "learning_rate": 8.627144602108399e-05, "loss": 3.1756, "step": 43600 }, { "epoch": 0.75, "learning_rate": 8.621372499375021e-05, "loss": 3.2514, "step": 43605 }, { "epoch": 0.75, "learning_rate": 8.615602004204876e-05, "loss": 3.1066, "step": 43610 }, { "epoch": 0.75, "learning_rate": 8.609833117031879e-05, "loss": 3.1709, "step": 43615 }, { "epoch": 0.75, "learning_rate": 8.604065838289788e-05, "loss": 3.0782, "step": 43620 }, { "epoch": 0.75, "learning_rate": 8.598300168412301e-05, "loss": 3.1607, "step": 43625 }, { "epoch": 0.76, "learning_rate": 8.592536107832962e-05, "loss": 3.1411, "step": 43630 }, { "epoch": 0.76, "learning_rate": 8.586773656985185e-05, "loss": 3.0961, "step": 43635 }, { "epoch": 0.76, "learning_rate": 8.581012816302309e-05, "loss": 3.202, "step": 43640 }, { "epoch": 0.76, "learning_rate": 8.575253586217478e-05, "loss": 3.2263, "step": 43645 }, { "epoch": 0.76, "learning_rate": 8.569495967163786e-05, "loss": 3.0951, "step": 43650 }, { "epoch": 0.76, "learning_rate": 8.56373995957417e-05, "loss": 3.1947, "step": 43655 }, { "epoch": 0.76, "learning_rate": 8.557985563881439e-05, "loss": 3.2083, "step": 43660 }, { "epoch": 0.76, "learning_rate": 8.552232780518324e-05, "loss": 3.169, "step": 43665 }, { "epoch": 0.76, "learning_rate": 8.546481609917367e-05, "loss": 3.1267, "step": 43670 }, { "epoch": 0.76, "learning_rate": 8.540732052511058e-05, "loss": 3.288, "step": 43675 }, { "epoch": 0.76, "learning_rate": 8.534984108731717e-05, "loss": 3.0646, "step": 43680 }, { "epoch": 0.76, "learning_rate": 8.529237779011557e-05, "loss": 3.2094, "step": 43685 }, { "epoch": 0.76, "learning_rate": 8.523493063782699e-05, "loss": 3.1029, "step": 43690 }, { "epoch": 0.76, "learning_rate": 8.517749963477087e-05, "loss": 3.1713, "step": 43695 }, { "epoch": 0.76, "learning_rate": 8.512008478526575e-05, "loss": 3.1246, "step": 43700 }, { "epoch": 0.76, "learning_rate": 8.50626860936291e-05, "loss": 3.1438, "step": 43705 }, { "epoch": 0.76, "learning_rate": 8.500530356417692e-05, "loss": 3.2288, "step": 43710 }, { "epoch": 0.76, "learning_rate": 8.494793720122407e-05, "loss": 3.049, "step": 43715 }, { "epoch": 0.76, "learning_rate": 8.489058700908424e-05, "loss": 3.1557, "step": 43720 }, { "epoch": 0.76, "learning_rate": 8.483325299206972e-05, "loss": 3.1421, "step": 43725 }, { "epoch": 0.76, "learning_rate": 8.4775935154492e-05, "loss": 3.1228, "step": 43730 }, { "epoch": 0.76, "learning_rate": 8.47186335006609e-05, "loss": 3.2315, "step": 43735 }, { "epoch": 0.76, "learning_rate": 8.466134803488532e-05, "loss": 3.2098, "step": 43740 }, { "epoch": 0.76, "learning_rate": 8.460407876147273e-05, "loss": 3.0859, "step": 43745 }, { "epoch": 0.76, "learning_rate": 8.454682568472946e-05, "loss": 3.1922, "step": 43750 }, { "epoch": 0.76, "learning_rate": 8.44895888089608e-05, "loss": 3.1085, "step": 43755 }, { "epoch": 0.76, "learning_rate": 8.44323681384706e-05, "loss": 3.1564, "step": 43760 }, { "epoch": 0.76, "learning_rate": 8.437516367756153e-05, "loss": 3.2095, "step": 43765 }, { "epoch": 0.76, "learning_rate": 8.431797543053512e-05, "loss": 3.2012, "step": 43770 }, { "epoch": 0.76, "learning_rate": 8.426080340169158e-05, "loss": 3.1939, "step": 43775 }, { "epoch": 0.76, "learning_rate": 8.42036475953299e-05, "loss": 3.1871, "step": 43780 }, { "epoch": 0.76, "learning_rate": 8.414650801574806e-05, "loss": 3.1464, "step": 43785 }, { "epoch": 0.76, "learning_rate": 8.408938466724257e-05, "loss": 3.2299, "step": 43790 }, { "epoch": 0.76, "learning_rate": 8.403227755410887e-05, "loss": 3.1247, "step": 43795 }, { "epoch": 0.76, "learning_rate": 8.397518668064109e-05, "loss": 3.1662, "step": 43800 }, { "epoch": 0.76, "learning_rate": 8.391811205113204e-05, "loss": 3.1429, "step": 43805 }, { "epoch": 0.76, "learning_rate": 8.386105366987363e-05, "loss": 3.1746, "step": 43810 }, { "epoch": 0.76, "learning_rate": 8.380401154115631e-05, "loss": 3.2565, "step": 43815 }, { "epoch": 0.76, "learning_rate": 8.374698566926937e-05, "loss": 3.0521, "step": 43820 }, { "epoch": 0.76, "learning_rate": 8.368997605850078e-05, "loss": 3.1862, "step": 43825 }, { "epoch": 0.76, "learning_rate": 8.363298271313735e-05, "loss": 3.1848, "step": 43830 }, { "epoch": 0.76, "learning_rate": 8.357600563746485e-05, "loss": 3.1177, "step": 43835 }, { "epoch": 0.76, "learning_rate": 8.351904483576758e-05, "loss": 3.1069, "step": 43840 }, { "epoch": 0.76, "learning_rate": 8.346210031232866e-05, "loss": 3.2112, "step": 43845 }, { "epoch": 0.76, "learning_rate": 8.340517207143007e-05, "loss": 3.0668, "step": 43850 }, { "epoch": 0.76, "learning_rate": 8.334826011735252e-05, "loss": 3.0612, "step": 43855 }, { "epoch": 0.76, "learning_rate": 8.32913644543754e-05, "loss": 3.2284, "step": 43860 }, { "epoch": 0.76, "learning_rate": 8.323448508677713e-05, "loss": 3.1964, "step": 43865 }, { "epoch": 0.76, "learning_rate": 8.317762201883468e-05, "loss": 3.1696, "step": 43870 }, { "epoch": 0.76, "learning_rate": 8.312077525482387e-05, "loss": 3.0291, "step": 43875 }, { "epoch": 0.76, "learning_rate": 8.306394479901928e-05, "loss": 3.0917, "step": 43880 }, { "epoch": 0.76, "learning_rate": 8.300713065569415e-05, "loss": 3.1189, "step": 43885 }, { "epoch": 0.76, "learning_rate": 8.295033282912081e-05, "loss": 3.2172, "step": 43890 }, { "epoch": 0.76, "learning_rate": 8.289355132357011e-05, "loss": 3.213, "step": 43895 }, { "epoch": 0.76, "learning_rate": 8.283678614331165e-05, "loss": 3.2169, "step": 43900 }, { "epoch": 0.76, "learning_rate": 8.278003729261396e-05, "loss": 3.0397, "step": 43905 }, { "epoch": 0.76, "learning_rate": 8.272330477574414e-05, "loss": 3.144, "step": 43910 }, { "epoch": 0.76, "learning_rate": 8.266658859696835e-05, "loss": 3.0974, "step": 43915 }, { "epoch": 0.76, "learning_rate": 8.26098887605514e-05, "loss": 3.171, "step": 43920 }, { "epoch": 0.76, "learning_rate": 8.255320527075651e-05, "loss": 3.1021, "step": 43925 }, { "epoch": 0.76, "learning_rate": 8.249653813184628e-05, "loss": 3.2077, "step": 43930 }, { "epoch": 0.76, "learning_rate": 8.243988734808168e-05, "loss": 3.1471, "step": 43935 }, { "epoch": 0.76, "learning_rate": 8.23832529237225e-05, "loss": 3.0669, "step": 43940 }, { "epoch": 0.76, "learning_rate": 8.232663486302761e-05, "loss": 3.1868, "step": 43945 }, { "epoch": 0.76, "learning_rate": 8.227003317025403e-05, "loss": 3.2153, "step": 43950 }, { "epoch": 0.76, "learning_rate": 8.22134478496582e-05, "loss": 3.091, "step": 43955 }, { "epoch": 0.76, "learning_rate": 8.215687890549499e-05, "loss": 3.1243, "step": 43960 }, { "epoch": 0.76, "learning_rate": 8.210032634201792e-05, "loss": 3.2038, "step": 43965 }, { "epoch": 0.76, "learning_rate": 8.204379016347979e-05, "loss": 3.2283, "step": 43970 }, { "epoch": 0.76, "learning_rate": 8.198727037413156e-05, "loss": 3.2619, "step": 43975 }, { "epoch": 0.76, "learning_rate": 8.193076697822316e-05, "loss": 3.1312, "step": 43980 }, { "epoch": 0.76, "learning_rate": 8.187427998000364e-05, "loss": 3.0785, "step": 43985 }, { "epoch": 0.76, "learning_rate": 8.181780938372037e-05, "loss": 2.9717, "step": 43990 }, { "epoch": 0.76, "learning_rate": 8.176135519361964e-05, "loss": 3.0844, "step": 43995 }, { "epoch": 0.76, "learning_rate": 8.170491741394658e-05, "loss": 3.2462, "step": 44000 }, { "epoch": 0.76, "eval_loss": 3.190524101257324, "eval_runtime": 149.6752, "eval_samples_per_second": 12.3, "eval_steps_per_second": 0.775, "step": 44000 }, { "epoch": 0.76, "learning_rate": 8.164849604894487e-05, "loss": 3.2107, "step": 44005 }, { "epoch": 0.76, "learning_rate": 8.159209110285734e-05, "loss": 3.1158, "step": 44010 }, { "epoch": 0.76, "learning_rate": 8.153570257992521e-05, "loss": 3.1124, "step": 44015 }, { "epoch": 0.76, "learning_rate": 8.147933048438856e-05, "loss": 3.2516, "step": 44020 }, { "epoch": 0.76, "learning_rate": 8.142297482048653e-05, "loss": 3.1914, "step": 44025 }, { "epoch": 0.76, "learning_rate": 8.136663559245643e-05, "loss": 3.1275, "step": 44030 }, { "epoch": 0.76, "learning_rate": 8.131031280453494e-05, "loss": 3.1475, "step": 44035 }, { "epoch": 0.76, "learning_rate": 8.125400646095717e-05, "loss": 3.2376, "step": 44040 }, { "epoch": 0.76, "learning_rate": 8.119771656595696e-05, "loss": 3.1365, "step": 44045 }, { "epoch": 0.76, "learning_rate": 8.114144312376731e-05, "loss": 3.1429, "step": 44050 }, { "epoch": 0.76, "learning_rate": 8.108518613861941e-05, "loss": 3.1624, "step": 44055 }, { "epoch": 0.76, "learning_rate": 8.102894561474349e-05, "loss": 3.1822, "step": 44060 }, { "epoch": 0.76, "learning_rate": 8.097272155636877e-05, "loss": 3.0874, "step": 44065 }, { "epoch": 0.76, "learning_rate": 8.091651396772286e-05, "loss": 3.232, "step": 44070 }, { "epoch": 0.76, "learning_rate": 8.086032285303232e-05, "loss": 3.2127, "step": 44075 }, { "epoch": 0.76, "learning_rate": 8.080414821652244e-05, "loss": 3.2376, "step": 44080 }, { "epoch": 0.76, "learning_rate": 8.074799006241716e-05, "loss": 3.156, "step": 44085 }, { "epoch": 0.76, "learning_rate": 8.069184839493947e-05, "loss": 3.232, "step": 44090 }, { "epoch": 0.76, "learning_rate": 8.063572321831084e-05, "loss": 3.0889, "step": 44095 }, { "epoch": 0.76, "learning_rate": 8.05796145367516e-05, "loss": 3.2148, "step": 44100 }, { "epoch": 0.76, "learning_rate": 8.052352235448084e-05, "loss": 3.1608, "step": 44105 }, { "epoch": 0.76, "learning_rate": 8.046744667571629e-05, "loss": 3.0584, "step": 44110 }, { "epoch": 0.76, "learning_rate": 8.041138750467478e-05, "loss": 3.2204, "step": 44115 }, { "epoch": 0.76, "learning_rate": 8.035534484557153e-05, "loss": 3.0466, "step": 44120 }, { "epoch": 0.76, "learning_rate": 8.029931870262073e-05, "loss": 3.1303, "step": 44125 }, { "epoch": 0.76, "learning_rate": 8.024330908003516e-05, "loss": 3.1986, "step": 44130 }, { "epoch": 0.76, "learning_rate": 8.018731598202655e-05, "loss": 3.1084, "step": 44135 }, { "epoch": 0.76, "learning_rate": 8.013133941280516e-05, "loss": 3.2342, "step": 44140 }, { "epoch": 0.76, "learning_rate": 8.007537937658032e-05, "loss": 3.1171, "step": 44145 }, { "epoch": 0.76, "learning_rate": 8.001943587755984e-05, "loss": 3.1854, "step": 44150 }, { "epoch": 0.76, "learning_rate": 7.996350891995045e-05, "loss": 3.175, "step": 44155 }, { "epoch": 0.76, "learning_rate": 7.990759850795746e-05, "loss": 3.2055, "step": 44160 }, { "epoch": 0.76, "learning_rate": 7.985170464578506e-05, "loss": 3.0755, "step": 44165 }, { "epoch": 0.76, "learning_rate": 7.97958273376363e-05, "loss": 3.0841, "step": 44170 }, { "epoch": 0.76, "learning_rate": 7.97399665877128e-05, "loss": 3.1463, "step": 44175 }, { "epoch": 0.76, "learning_rate": 7.9684122400215e-05, "loss": 3.1655, "step": 44180 }, { "epoch": 0.76, "learning_rate": 7.962829477934207e-05, "loss": 3.1207, "step": 44185 }, { "epoch": 0.76, "learning_rate": 7.957248372929192e-05, "loss": 3.1037, "step": 44190 }, { "epoch": 0.76, "learning_rate": 7.95166892542614e-05, "loss": 3.125, "step": 44195 }, { "epoch": 0.76, "learning_rate": 7.946091135844601e-05, "loss": 3.1905, "step": 44200 }, { "epoch": 0.77, "learning_rate": 7.940515004603963e-05, "loss": 3.2434, "step": 44205 }, { "epoch": 0.77, "learning_rate": 7.934940532123552e-05, "loss": 3.1113, "step": 44210 }, { "epoch": 0.77, "learning_rate": 7.929367718822534e-05, "loss": 3.2207, "step": 44215 }, { "epoch": 0.77, "learning_rate": 7.923796565119941e-05, "loss": 3.1306, "step": 44220 }, { "epoch": 0.77, "learning_rate": 7.918227071434729e-05, "loss": 3.2167, "step": 44225 }, { "epoch": 0.77, "learning_rate": 7.912659238185654e-05, "loss": 3.0545, "step": 44230 }, { "epoch": 0.77, "learning_rate": 7.907093065791418e-05, "loss": 3.1117, "step": 44235 }, { "epoch": 0.77, "learning_rate": 7.90152855467056e-05, "loss": 3.2648, "step": 44240 }, { "epoch": 0.77, "learning_rate": 7.895965705241489e-05, "loss": 3.1396, "step": 44245 }, { "epoch": 0.77, "learning_rate": 7.890404517922529e-05, "loss": 3.1897, "step": 44250 }, { "epoch": 0.77, "learning_rate": 7.884844993131836e-05, "loss": 2.9301, "step": 44255 }, { "epoch": 0.77, "learning_rate": 7.879287131287463e-05, "loss": 3.1565, "step": 44260 }, { "epoch": 0.77, "learning_rate": 7.873730932807332e-05, "loss": 3.163, "step": 44265 }, { "epoch": 0.77, "learning_rate": 7.868176398109228e-05, "loss": 3.097, "step": 44270 }, { "epoch": 0.77, "learning_rate": 7.862623527610847e-05, "loss": 3.1801, "step": 44275 }, { "epoch": 0.77, "learning_rate": 7.857072321729732e-05, "loss": 3.0713, "step": 44280 }, { "epoch": 0.77, "learning_rate": 7.851522780883277e-05, "loss": 2.9592, "step": 44285 }, { "epoch": 0.77, "learning_rate": 7.845974905488812e-05, "loss": 3.1761, "step": 44290 }, { "epoch": 0.77, "learning_rate": 7.840428695963497e-05, "loss": 3.1655, "step": 44295 }, { "epoch": 0.77, "learning_rate": 7.834884152724367e-05, "loss": 3.0895, "step": 44300 }, { "epoch": 0.77, "learning_rate": 7.829341276188372e-05, "loss": 3.0921, "step": 44305 }, { "epoch": 0.77, "learning_rate": 7.823800066772273e-05, "loss": 3.2266, "step": 44310 }, { "epoch": 0.77, "learning_rate": 7.81826052489277e-05, "loss": 3.2352, "step": 44315 }, { "epoch": 0.77, "learning_rate": 7.812722650966392e-05, "loss": 3.1349, "step": 44320 }, { "epoch": 0.77, "learning_rate": 7.807186445409554e-05, "loss": 3.2131, "step": 44325 }, { "epoch": 0.77, "learning_rate": 7.801651908638578e-05, "loss": 3.1752, "step": 44330 }, { "epoch": 0.77, "learning_rate": 7.796119041069595e-05, "loss": 3.2089, "step": 44335 }, { "epoch": 0.77, "learning_rate": 7.790587843118677e-05, "loss": 3.1466, "step": 44340 }, { "epoch": 0.77, "learning_rate": 7.785058315201728e-05, "loss": 3.1338, "step": 44345 }, { "epoch": 0.77, "learning_rate": 7.77953045773454e-05, "loss": 3.0996, "step": 44350 }, { "epoch": 0.77, "learning_rate": 7.7740042711328e-05, "loss": 3.1595, "step": 44355 }, { "epoch": 0.77, "learning_rate": 7.768479755812024e-05, "loss": 3.1891, "step": 44360 }, { "epoch": 0.77, "learning_rate": 7.762956912187625e-05, "loss": 3.1434, "step": 44365 }, { "epoch": 0.77, "learning_rate": 7.757435740674913e-05, "loss": 3.1736, "step": 44370 }, { "epoch": 0.77, "learning_rate": 7.751916241689042e-05, "loss": 3.1572, "step": 44375 }, { "epoch": 0.77, "learning_rate": 7.746398415645048e-05, "loss": 3.0956, "step": 44380 }, { "epoch": 0.77, "learning_rate": 7.740882262957847e-05, "loss": 3.0352, "step": 44385 }, { "epoch": 0.77, "learning_rate": 7.735367784042214e-05, "loss": 3.11, "step": 44390 }, { "epoch": 0.77, "learning_rate": 7.729854979312828e-05, "loss": 3.1855, "step": 44395 }, { "epoch": 0.77, "learning_rate": 7.724343849184217e-05, "loss": 3.2492, "step": 44400 }, { "epoch": 0.77, "learning_rate": 7.718834394070786e-05, "loss": 3.1713, "step": 44405 }, { "epoch": 0.77, "learning_rate": 7.713326614386818e-05, "loss": 3.157, "step": 44410 }, { "epoch": 0.77, "learning_rate": 7.707820510546466e-05, "loss": 3.1427, "step": 44415 }, { "epoch": 0.77, "learning_rate": 7.702316082963773e-05, "loss": 3.1726, "step": 44420 }, { "epoch": 0.77, "learning_rate": 7.696813332052634e-05, "loss": 3.2024, "step": 44425 }, { "epoch": 0.77, "learning_rate": 7.691312258226834e-05, "loss": 3.1871, "step": 44430 }, { "epoch": 0.77, "learning_rate": 7.685812861900023e-05, "loss": 3.1701, "step": 44435 }, { "epoch": 0.77, "learning_rate": 7.680315143485724e-05, "loss": 3.1798, "step": 44440 }, { "epoch": 0.77, "learning_rate": 7.67481910339733e-05, "loss": 3.1527, "step": 44445 }, { "epoch": 0.77, "learning_rate": 7.669324742048133e-05, "loss": 3.2117, "step": 44450 }, { "epoch": 0.77, "learning_rate": 7.663832059851276e-05, "loss": 3.2039, "step": 44455 }, { "epoch": 0.77, "learning_rate": 7.658341057219775e-05, "loss": 3.1415, "step": 44460 }, { "epoch": 0.77, "learning_rate": 7.652851734566529e-05, "loss": 3.1103, "step": 44465 }, { "epoch": 0.77, "learning_rate": 7.647364092304292e-05, "loss": 3.1017, "step": 44470 }, { "epoch": 0.77, "learning_rate": 7.641878130845732e-05, "loss": 3.0508, "step": 44475 }, { "epoch": 0.77, "learning_rate": 7.636393850603352e-05, "loss": 3.142, "step": 44480 }, { "epoch": 0.77, "learning_rate": 7.630911251989544e-05, "loss": 3.1147, "step": 44485 }, { "epoch": 0.77, "learning_rate": 7.62543033541657e-05, "loss": 3.1511, "step": 44490 }, { "epoch": 0.77, "learning_rate": 7.619951101296557e-05, "loss": 3.2319, "step": 44495 }, { "epoch": 0.77, "learning_rate": 7.614473550041533e-05, "loss": 3.0392, "step": 44500 }, { "epoch": 0.77, "eval_loss": 3.186605215072632, "eval_runtime": 149.8734, "eval_samples_per_second": 12.284, "eval_steps_per_second": 0.774, "step": 44500 }, { "epoch": 0.77, "learning_rate": 7.608997682063378e-05, "loss": 3.1118, "step": 44505 }, { "epoch": 0.77, "learning_rate": 7.603523497773841e-05, "loss": 3.1806, "step": 44510 }, { "epoch": 0.77, "learning_rate": 7.598050997584561e-05, "loss": 3.0252, "step": 44515 }, { "epoch": 0.77, "learning_rate": 7.592580181907033e-05, "loss": 3.2698, "step": 44520 }, { "epoch": 0.77, "learning_rate": 7.587111051152633e-05, "loss": 3.1393, "step": 44525 }, { "epoch": 0.77, "learning_rate": 7.58164360573263e-05, "loss": 3.1737, "step": 44530 }, { "epoch": 0.77, "learning_rate": 7.576177846058128e-05, "loss": 3.2284, "step": 44535 }, { "epoch": 0.77, "learning_rate": 7.570713772540138e-05, "loss": 3.2084, "step": 44540 }, { "epoch": 0.77, "learning_rate": 7.56525138558952e-05, "loss": 3.0973, "step": 44545 }, { "epoch": 0.77, "learning_rate": 7.559790685617017e-05, "loss": 3.1736, "step": 44550 }, { "epoch": 0.77, "learning_rate": 7.554331673033255e-05, "loss": 3.1482, "step": 44555 }, { "epoch": 0.77, "learning_rate": 7.54887434824873e-05, "loss": 3.2329, "step": 44560 }, { "epoch": 0.77, "learning_rate": 7.543418711673774e-05, "loss": 3.0866, "step": 44565 }, { "epoch": 0.77, "learning_rate": 7.537964763718653e-05, "loss": 3.21, "step": 44570 }, { "epoch": 0.77, "learning_rate": 7.532512504793467e-05, "loss": 3.103, "step": 44575 }, { "epoch": 0.77, "learning_rate": 7.527061935308184e-05, "loss": 3.1107, "step": 44580 }, { "epoch": 0.77, "learning_rate": 7.521613055672693e-05, "loss": 3.1379, "step": 44585 }, { "epoch": 0.77, "learning_rate": 7.516165866296681e-05, "loss": 3.1808, "step": 44590 }, { "epoch": 0.77, "learning_rate": 7.510720367589777e-05, "loss": 3.1765, "step": 44595 }, { "epoch": 0.77, "learning_rate": 7.505276559961444e-05, "loss": 3.2069, "step": 44600 }, { "epoch": 0.77, "learning_rate": 7.499834443821023e-05, "loss": 3.1409, "step": 44605 }, { "epoch": 0.77, "learning_rate": 7.494394019577759e-05, "loss": 3.1709, "step": 44610 }, { "epoch": 0.77, "learning_rate": 7.488955287640706e-05, "loss": 3.1446, "step": 44615 }, { "epoch": 0.77, "learning_rate": 7.483518248418858e-05, "loss": 3.0126, "step": 44620 }, { "epoch": 0.77, "learning_rate": 7.47808290232104e-05, "loss": 3.1909, "step": 44625 }, { "epoch": 0.77, "learning_rate": 7.47264924975596e-05, "loss": 3.1479, "step": 44630 }, { "epoch": 0.77, "learning_rate": 7.46721729113222e-05, "loss": 3.084, "step": 44635 }, { "epoch": 0.77, "learning_rate": 7.461787026858255e-05, "loss": 3.1349, "step": 44640 }, { "epoch": 0.77, "learning_rate": 7.456358457342386e-05, "loss": 3.1268, "step": 44645 }, { "epoch": 0.77, "learning_rate": 7.450931582992839e-05, "loss": 3.1649, "step": 44650 }, { "epoch": 0.77, "learning_rate": 7.445506404217674e-05, "loss": 3.1787, "step": 44655 }, { "epoch": 0.77, "learning_rate": 7.440082921424836e-05, "loss": 3.0321, "step": 44660 }, { "epoch": 0.77, "learning_rate": 7.434661135022149e-05, "loss": 3.1882, "step": 44665 }, { "epoch": 0.77, "learning_rate": 7.429241045417287e-05, "loss": 3.1999, "step": 44670 }, { "epoch": 0.77, "learning_rate": 7.423822653017831e-05, "loss": 3.1162, "step": 44675 }, { "epoch": 0.77, "learning_rate": 7.418405958231217e-05, "loss": 3.1264, "step": 44680 }, { "epoch": 0.77, "learning_rate": 7.412990961464742e-05, "loss": 3.2139, "step": 44685 }, { "epoch": 0.77, "learning_rate": 7.40757766312559e-05, "loss": 3.0781, "step": 44690 }, { "epoch": 0.77, "learning_rate": 7.402166063620805e-05, "loss": 3.1487, "step": 44695 }, { "epoch": 0.77, "learning_rate": 7.39675616335733e-05, "loss": 3.0855, "step": 44700 }, { "epoch": 0.77, "learning_rate": 7.391347962741948e-05, "loss": 3.1442, "step": 44705 }, { "epoch": 0.77, "learning_rate": 7.385941462181327e-05, "loss": 3.2047, "step": 44710 }, { "epoch": 0.77, "learning_rate": 7.38053666208203e-05, "loss": 3.184, "step": 44715 }, { "epoch": 0.77, "learning_rate": 7.375133562850443e-05, "loss": 3.2336, "step": 44720 }, { "epoch": 0.77, "learning_rate": 7.369732164892854e-05, "loss": 3.1874, "step": 44725 }, { "epoch": 0.77, "learning_rate": 7.364332468615436e-05, "loss": 3.2044, "step": 44730 }, { "epoch": 0.77, "learning_rate": 7.35893447442421e-05, "loss": 3.209, "step": 44735 }, { "epoch": 0.77, "learning_rate": 7.353538182725077e-05, "loss": 3.1973, "step": 44740 }, { "epoch": 0.77, "learning_rate": 7.348143593923814e-05, "loss": 3.2708, "step": 44745 }, { "epoch": 0.77, "learning_rate": 7.342750708426053e-05, "loss": 3.1732, "step": 44750 }, { "epoch": 0.77, "learning_rate": 7.337359526637332e-05, "loss": 3.1316, "step": 44755 }, { "epoch": 0.77, "learning_rate": 7.331970048963027e-05, "loss": 3.0108, "step": 44760 }, { "epoch": 0.77, "learning_rate": 7.326582275808409e-05, "loss": 3.1858, "step": 44765 }, { "epoch": 0.77, "learning_rate": 7.3211962075786e-05, "loss": 3.1266, "step": 44770 }, { "epoch": 0.77, "learning_rate": 7.3158118446786e-05, "loss": 3.2553, "step": 44775 }, { "epoch": 0.77, "learning_rate": 7.310429187513305e-05, "loss": 3.0143, "step": 44780 }, { "epoch": 0.78, "learning_rate": 7.305048236487452e-05, "loss": 3.2031, "step": 44785 }, { "epoch": 0.78, "learning_rate": 7.299668992005664e-05, "loss": 3.1179, "step": 44790 }, { "epoch": 0.78, "learning_rate": 7.294291454472429e-05, "loss": 3.0976, "step": 44795 }, { "epoch": 0.78, "learning_rate": 7.28891562429211e-05, "loss": 3.1156, "step": 44800 }, { "epoch": 0.78, "learning_rate": 7.283541501868935e-05, "loss": 3.2027, "step": 44805 }, { "epoch": 0.78, "learning_rate": 7.27816908760703e-05, "loss": 3.1327, "step": 44810 }, { "epoch": 0.78, "learning_rate": 7.272798381910361e-05, "loss": 3.0802, "step": 44815 }, { "epoch": 0.78, "learning_rate": 7.267429385182777e-05, "loss": 3.1307, "step": 44820 }, { "epoch": 0.78, "learning_rate": 7.262062097828005e-05, "loss": 3.065, "step": 44825 }, { "epoch": 0.78, "learning_rate": 7.256696520249623e-05, "loss": 3.0924, "step": 44830 }, { "epoch": 0.78, "learning_rate": 7.251332652851111e-05, "loss": 3.1293, "step": 44835 }, { "epoch": 0.78, "learning_rate": 7.245970496035803e-05, "loss": 3.1039, "step": 44840 }, { "epoch": 0.78, "learning_rate": 7.240610050206896e-05, "loss": 3.1689, "step": 44845 }, { "epoch": 0.78, "learning_rate": 7.235251315767475e-05, "loss": 3.2031, "step": 44850 }, { "epoch": 0.78, "learning_rate": 7.22989429312048e-05, "loss": 3.0999, "step": 44855 }, { "epoch": 0.78, "learning_rate": 7.224538982668746e-05, "loss": 3.1289, "step": 44860 }, { "epoch": 0.78, "learning_rate": 7.21918538481497e-05, "loss": 3.145, "step": 44865 }, { "epoch": 0.78, "learning_rate": 7.213833499961684e-05, "loss": 3.2122, "step": 44870 }, { "epoch": 0.78, "learning_rate": 7.208483328511349e-05, "loss": 3.1842, "step": 44875 }, { "epoch": 0.78, "learning_rate": 7.203134870866264e-05, "loss": 3.1976, "step": 44880 }, { "epoch": 0.78, "learning_rate": 7.197788127428595e-05, "loss": 3.2405, "step": 44885 }, { "epoch": 0.78, "learning_rate": 7.192443098600421e-05, "loss": 3.0957, "step": 44890 }, { "epoch": 0.78, "learning_rate": 7.187099784783617e-05, "loss": 3.1705, "step": 44895 }, { "epoch": 0.78, "learning_rate": 7.181758186380007e-05, "loss": 2.9637, "step": 44900 }, { "epoch": 0.78, "learning_rate": 7.176418303791237e-05, "loss": 3.2228, "step": 44905 }, { "epoch": 0.78, "learning_rate": 7.171080137418832e-05, "loss": 3.1422, "step": 44910 }, { "epoch": 0.78, "learning_rate": 7.165743687664224e-05, "loss": 3.225, "step": 44915 }, { "epoch": 0.78, "learning_rate": 7.160408954928651e-05, "loss": 3.1538, "step": 44920 }, { "epoch": 0.78, "learning_rate": 7.155075939613279e-05, "loss": 3.0714, "step": 44925 }, { "epoch": 0.78, "learning_rate": 7.14974464211912e-05, "loss": 3.1305, "step": 44930 }, { "epoch": 0.78, "learning_rate": 7.144415062847047e-05, "loss": 3.1041, "step": 44935 }, { "epoch": 0.78, "learning_rate": 7.139087202197842e-05, "loss": 3.122, "step": 44940 }, { "epoch": 0.78, "learning_rate": 7.133761060572123e-05, "loss": 3.1977, "step": 44945 }, { "epoch": 0.78, "learning_rate": 7.128436638370367e-05, "loss": 3.0151, "step": 44950 }, { "epoch": 0.78, "learning_rate": 7.123113935992975e-05, "loss": 3.1652, "step": 44955 }, { "epoch": 0.78, "learning_rate": 7.117792953840168e-05, "loss": 3.2035, "step": 44960 }, { "epoch": 0.78, "learning_rate": 7.112473692312054e-05, "loss": 3.1325, "step": 44965 }, { "epoch": 0.78, "learning_rate": 7.10715615180864e-05, "loss": 3.0583, "step": 44970 }, { "epoch": 0.78, "learning_rate": 7.101840332729739e-05, "loss": 3.0872, "step": 44975 }, { "epoch": 0.78, "learning_rate": 7.096526235475102e-05, "loss": 3.2089, "step": 44980 }, { "epoch": 0.78, "learning_rate": 7.091213860444318e-05, "loss": 2.9796, "step": 44985 }, { "epoch": 0.78, "learning_rate": 7.085903208036837e-05, "loss": 3.0638, "step": 44990 }, { "epoch": 0.78, "learning_rate": 7.080594278652017e-05, "loss": 3.1517, "step": 44995 }, { "epoch": 0.78, "learning_rate": 7.07528707268903e-05, "loss": 3.1721, "step": 45000 }, { "epoch": 0.78, "eval_loss": 3.183523654937744, "eval_runtime": 150.1695, "eval_samples_per_second": 12.259, "eval_steps_per_second": 0.772, "step": 45000 }, { "epoch": 0.78, "learning_rate": 7.069981590546978e-05, "loss": 3.0841, "step": 45005 }, { "epoch": 0.78, "learning_rate": 7.064677832624794e-05, "loss": 3.2613, "step": 45010 }, { "epoch": 0.78, "learning_rate": 7.059375799321287e-05, "loss": 3.0345, "step": 45015 }, { "epoch": 0.78, "learning_rate": 7.054075491035168e-05, "loss": 3.196, "step": 45020 }, { "epoch": 0.78, "learning_rate": 7.04877690816497e-05, "loss": 3.1549, "step": 45025 }, { "epoch": 0.78, "learning_rate": 7.043480051109114e-05, "loss": 3.0979, "step": 45030 }, { "epoch": 0.78, "learning_rate": 7.038184920265916e-05, "loss": 3.1891, "step": 45035 }, { "epoch": 0.78, "learning_rate": 7.032891516033538e-05, "loss": 3.119, "step": 45040 }, { "epoch": 0.78, "learning_rate": 7.027599838810011e-05, "loss": 3.169, "step": 45045 }, { "epoch": 0.78, "learning_rate": 7.022309888993245e-05, "loss": 3.1685, "step": 45050 }, { "epoch": 0.78, "learning_rate": 7.017021666981009e-05, "loss": 3.2098, "step": 45055 }, { "epoch": 0.78, "learning_rate": 7.011735173170964e-05, "loss": 3.1718, "step": 45060 }, { "epoch": 0.78, "learning_rate": 7.006450407960622e-05, "loss": 3.1111, "step": 45065 }, { "epoch": 0.78, "learning_rate": 7.001167371747369e-05, "loss": 3.1789, "step": 45070 }, { "epoch": 0.78, "learning_rate": 6.995886064928464e-05, "loss": 3.0517, "step": 45075 }, { "epoch": 0.78, "learning_rate": 6.990606487901033e-05, "loss": 3.1516, "step": 45080 }, { "epoch": 0.78, "learning_rate": 6.985328641062064e-05, "loss": 3.1516, "step": 45085 }, { "epoch": 0.78, "learning_rate": 6.98005252480844e-05, "loss": 3.2177, "step": 45090 }, { "epoch": 0.78, "learning_rate": 6.97477813953689e-05, "loss": 3.0959, "step": 45095 }, { "epoch": 0.78, "learning_rate": 6.969505485644023e-05, "loss": 3.1638, "step": 45100 }, { "epoch": 0.78, "learning_rate": 6.964234563526314e-05, "loss": 3.2965, "step": 45105 }, { "epoch": 0.78, "learning_rate": 6.958965373580101e-05, "loss": 3.2153, "step": 45110 }, { "epoch": 0.78, "learning_rate": 6.953697916201618e-05, "loss": 3.1298, "step": 45115 }, { "epoch": 0.78, "learning_rate": 6.948432191786936e-05, "loss": 3.0617, "step": 45120 }, { "epoch": 0.78, "learning_rate": 6.94316820073202e-05, "loss": 3.1779, "step": 45125 }, { "epoch": 0.78, "learning_rate": 6.93790594343269e-05, "loss": 3.1907, "step": 45130 }, { "epoch": 0.78, "learning_rate": 6.932645420284636e-05, "loss": 3.1058, "step": 45135 }, { "epoch": 0.78, "learning_rate": 6.927386631683435e-05, "loss": 3.0789, "step": 45140 }, { "epoch": 0.78, "learning_rate": 6.92212957802452e-05, "loss": 3.0602, "step": 45145 }, { "epoch": 0.78, "learning_rate": 6.916874259703172e-05, "loss": 3.197, "step": 45150 }, { "epoch": 0.78, "learning_rate": 6.911620677114592e-05, "loss": 3.1129, "step": 45155 }, { "epoch": 0.78, "learning_rate": 6.90636883065381e-05, "loss": 3.2127, "step": 45160 }, { "epoch": 0.78, "learning_rate": 6.901118720715731e-05, "loss": 3.1139, "step": 45165 }, { "epoch": 0.78, "learning_rate": 6.895870347695151e-05, "loss": 3.2134, "step": 45170 }, { "epoch": 0.78, "learning_rate": 6.890623711986715e-05, "loss": 3.1768, "step": 45175 }, { "epoch": 0.78, "learning_rate": 6.88537881398494e-05, "loss": 3.1486, "step": 45180 }, { "epoch": 0.78, "learning_rate": 6.88013565408422e-05, "loss": 3.2026, "step": 45185 }, { "epoch": 0.78, "learning_rate": 6.874894232678804e-05, "loss": 3.1052, "step": 45190 }, { "epoch": 0.78, "learning_rate": 6.869654550162833e-05, "loss": 3.1453, "step": 45195 }, { "epoch": 0.78, "learning_rate": 6.864416606930299e-05, "loss": 3.1664, "step": 45200 }, { "epoch": 0.78, "learning_rate": 6.859180403375072e-05, "loss": 3.1615, "step": 45205 }, { "epoch": 0.78, "learning_rate": 6.853945939890878e-05, "loss": 3.0932, "step": 45210 }, { "epoch": 0.78, "learning_rate": 6.848713216871323e-05, "loss": 3.1201, "step": 45215 }, { "epoch": 0.78, "learning_rate": 6.843482234709893e-05, "loss": 3.0702, "step": 45220 }, { "epoch": 0.78, "learning_rate": 6.838252993799932e-05, "loss": 3.0548, "step": 45225 }, { "epoch": 0.78, "learning_rate": 6.833025494534629e-05, "loss": 3.1788, "step": 45230 }, { "epoch": 0.78, "learning_rate": 6.827799737307088e-05, "loss": 3.2091, "step": 45235 }, { "epoch": 0.78, "learning_rate": 6.822575722510249e-05, "loss": 3.29, "step": 45240 }, { "epoch": 0.78, "learning_rate": 6.817353450536926e-05, "loss": 3.0955, "step": 45245 }, { "epoch": 0.78, "learning_rate": 6.812132921779832e-05, "loss": 3.219, "step": 45250 }, { "epoch": 0.78, "learning_rate": 6.806914136631487e-05, "loss": 3.2833, "step": 45255 }, { "epoch": 0.78, "learning_rate": 6.801697095484346e-05, "loss": 3.1134, "step": 45260 }, { "epoch": 0.78, "learning_rate": 6.796481798730692e-05, "loss": 3.1767, "step": 45265 }, { "epoch": 0.78, "learning_rate": 6.791268246762687e-05, "loss": 3.1696, "step": 45270 }, { "epoch": 0.78, "learning_rate": 6.78605643997238e-05, "loss": 3.1449, "step": 45275 }, { "epoch": 0.78, "learning_rate": 6.780846378751643e-05, "loss": 3.1968, "step": 45280 }, { "epoch": 0.78, "learning_rate": 6.775638063492272e-05, "loss": 3.0654, "step": 45285 }, { "epoch": 0.78, "learning_rate": 6.770431494585892e-05, "loss": 3.1028, "step": 45290 }, { "epoch": 0.78, "learning_rate": 6.765226672424007e-05, "loss": 3.1161, "step": 45295 }, { "epoch": 0.78, "learning_rate": 6.76002359739802e-05, "loss": 3.2315, "step": 45300 }, { "epoch": 0.78, "learning_rate": 6.754822269899145e-05, "loss": 3.0886, "step": 45305 }, { "epoch": 0.78, "learning_rate": 6.749622690318497e-05, "loss": 3.1833, "step": 45310 }, { "epoch": 0.78, "learning_rate": 6.744424859047076e-05, "loss": 3.2618, "step": 45315 }, { "epoch": 0.78, "learning_rate": 6.739228776475721e-05, "loss": 3.1447, "step": 45320 }, { "epoch": 0.78, "learning_rate": 6.734034442995154e-05, "loss": 3.0796, "step": 45325 }, { "epoch": 0.78, "learning_rate": 6.72884185899596e-05, "loss": 3.0618, "step": 45330 }, { "epoch": 0.78, "learning_rate": 6.723651024868586e-05, "loss": 3.1823, "step": 45335 }, { "epoch": 0.78, "learning_rate": 6.718461941003375e-05, "loss": 3.0862, "step": 45340 }, { "epoch": 0.78, "learning_rate": 6.713274607790507e-05, "loss": 3.2958, "step": 45345 }, { "epoch": 0.78, "learning_rate": 6.708089025620049e-05, "loss": 3.1402, "step": 45350 }, { "epoch": 0.78, "learning_rate": 6.702905194881928e-05, "loss": 3.3028, "step": 45355 }, { "epoch": 0.79, "learning_rate": 6.69772311596593e-05, "loss": 3.1589, "step": 45360 }, { "epoch": 0.79, "learning_rate": 6.692542789261742e-05, "loss": 3.0914, "step": 45365 }, { "epoch": 0.79, "learning_rate": 6.687364215158884e-05, "loss": 3.1536, "step": 45370 }, { "epoch": 0.79, "learning_rate": 6.682187394046763e-05, "loss": 3.1351, "step": 45375 }, { "epoch": 0.79, "learning_rate": 6.677012326314649e-05, "loss": 3.1095, "step": 45380 }, { "epoch": 0.79, "learning_rate": 6.671839012351682e-05, "loss": 3.1706, "step": 45385 }, { "epoch": 0.79, "learning_rate": 6.666667452546854e-05, "loss": 3.1585, "step": 45390 }, { "epoch": 0.79, "learning_rate": 6.661497647289062e-05, "loss": 3.1016, "step": 45395 }, { "epoch": 0.79, "learning_rate": 6.656329596967038e-05, "loss": 3.1136, "step": 45400 }, { "epoch": 0.79, "learning_rate": 6.651163301969397e-05, "loss": 3.1383, "step": 45405 }, { "epoch": 0.79, "learning_rate": 6.645998762684613e-05, "loss": 3.1012, "step": 45410 }, { "epoch": 0.79, "learning_rate": 6.640835979501025e-05, "loss": 3.1399, "step": 45415 }, { "epoch": 0.79, "learning_rate": 6.635674952806867e-05, "loss": 3.158, "step": 45420 }, { "epoch": 0.79, "learning_rate": 6.630515682990212e-05, "loss": 3.2877, "step": 45425 }, { "epoch": 0.79, "learning_rate": 6.62535817043901e-05, "loss": 3.0882, "step": 45430 }, { "epoch": 0.79, "learning_rate": 6.620202415541081e-05, "loss": 3.2474, "step": 45435 }, { "epoch": 0.79, "learning_rate": 6.615048418684106e-05, "loss": 3.1571, "step": 45440 }, { "epoch": 0.79, "learning_rate": 6.609896180255649e-05, "loss": 3.1484, "step": 45445 }, { "epoch": 0.79, "learning_rate": 6.604745700643126e-05, "loss": 3.1594, "step": 45450 }, { "epoch": 0.79, "learning_rate": 6.599596980233828e-05, "loss": 3.2426, "step": 45455 }, { "epoch": 0.79, "learning_rate": 6.594450019414913e-05, "loss": 3.169, "step": 45460 }, { "epoch": 0.79, "learning_rate": 6.589304818573405e-05, "loss": 3.1357, "step": 45465 }, { "epoch": 0.79, "learning_rate": 6.584161378096187e-05, "loss": 3.0433, "step": 45470 }, { "epoch": 0.79, "learning_rate": 6.57901969837004e-05, "loss": 3.1571, "step": 45475 }, { "epoch": 0.79, "learning_rate": 6.573879779781578e-05, "loss": 3.1311, "step": 45480 }, { "epoch": 0.79, "learning_rate": 6.568741622717303e-05, "loss": 3.191, "step": 45485 }, { "epoch": 0.79, "learning_rate": 6.563605227563572e-05, "loss": 3.1198, "step": 45490 }, { "epoch": 0.79, "learning_rate": 6.558470594706609e-05, "loss": 3.1676, "step": 45495 }, { "epoch": 0.79, "learning_rate": 6.553337724532533e-05, "loss": 3.0357, "step": 45500 }, { "epoch": 0.79, "eval_loss": 3.180738925933838, "eval_runtime": 151.371, "eval_samples_per_second": 12.162, "eval_steps_per_second": 0.766, "step": 45500 }, { "epoch": 0.79, "learning_rate": 6.548206617427295e-05, "loss": 3.1704, "step": 45505 }, { "epoch": 0.79, "learning_rate": 6.543077273776732e-05, "loss": 3.1313, "step": 45510 }, { "epoch": 0.79, "learning_rate": 6.537949693966543e-05, "loss": 3.0594, "step": 45515 }, { "epoch": 0.79, "learning_rate": 6.53282387838229e-05, "loss": 3.0647, "step": 45520 }, { "epoch": 0.79, "learning_rate": 6.52769982740942e-05, "loss": 3.1225, "step": 45525 }, { "epoch": 0.79, "learning_rate": 6.522577541433235e-05, "loss": 3.1439, "step": 45530 }, { "epoch": 0.79, "learning_rate": 6.517457020838887e-05, "loss": 3.1825, "step": 45535 }, { "epoch": 0.79, "learning_rate": 6.51233826601143e-05, "loss": 3.2263, "step": 45540 }, { "epoch": 0.79, "learning_rate": 6.507221277335767e-05, "loss": 3.167, "step": 45545 }, { "epoch": 0.79, "learning_rate": 6.502106055196654e-05, "loss": 3.0891, "step": 45550 }, { "epoch": 0.79, "learning_rate": 6.496992599978757e-05, "loss": 3.0816, "step": 45555 }, { "epoch": 0.79, "learning_rate": 6.491880912066549e-05, "loss": 3.1269, "step": 45560 }, { "epoch": 0.79, "learning_rate": 6.486770991844429e-05, "loss": 3.1808, "step": 45565 }, { "epoch": 0.79, "learning_rate": 6.481662839696626e-05, "loss": 3.2526, "step": 45570 }, { "epoch": 0.79, "learning_rate": 6.476556456007242e-05, "loss": 3.0343, "step": 45575 }, { "epoch": 0.79, "learning_rate": 6.47145184116027e-05, "loss": 3.268, "step": 45580 }, { "epoch": 0.79, "learning_rate": 6.466348995539531e-05, "loss": 3.0679, "step": 45585 }, { "epoch": 0.79, "learning_rate": 6.46124791952873e-05, "loss": 3.1898, "step": 45590 }, { "epoch": 0.79, "learning_rate": 6.456148613511462e-05, "loss": 3.0314, "step": 45595 }, { "epoch": 0.79, "learning_rate": 6.451051077871154e-05, "loss": 3.1434, "step": 45600 }, { "epoch": 0.79, "learning_rate": 6.445955312991122e-05, "loss": 3.2544, "step": 45605 }, { "epoch": 0.79, "learning_rate": 6.440861319254536e-05, "loss": 3.0762, "step": 45610 }, { "epoch": 0.79, "learning_rate": 6.435769097044431e-05, "loss": 2.9781, "step": 45615 }, { "epoch": 0.79, "learning_rate": 6.430678646743735e-05, "loss": 3.1502, "step": 45620 }, { "epoch": 0.79, "learning_rate": 6.425589968735217e-05, "loss": 3.0586, "step": 45625 }, { "epoch": 0.79, "learning_rate": 6.420503063401511e-05, "loss": 3.0531, "step": 45630 }, { "epoch": 0.79, "learning_rate": 6.415417931125134e-05, "loss": 3.0447, "step": 45635 }, { "epoch": 0.79, "learning_rate": 6.41033457228845e-05, "loss": 3.1198, "step": 45640 }, { "epoch": 0.79, "learning_rate": 6.405252987273723e-05, "loss": 3.1565, "step": 45645 }, { "epoch": 0.79, "learning_rate": 6.400173176463047e-05, "loss": 3.1355, "step": 45650 }, { "epoch": 0.79, "learning_rate": 6.395095140238393e-05, "loss": 3.0957, "step": 45655 }, { "epoch": 0.79, "learning_rate": 6.390018878981627e-05, "loss": 3.0195, "step": 45660 }, { "epoch": 0.79, "learning_rate": 6.384944393074434e-05, "loss": 3.1708, "step": 45665 }, { "epoch": 0.79, "learning_rate": 6.379871682898392e-05, "loss": 3.0612, "step": 45670 }, { "epoch": 0.79, "learning_rate": 6.374800748834953e-05, "loss": 3.1129, "step": 45675 }, { "epoch": 0.79, "learning_rate": 6.369731591265423e-05, "loss": 3.1009, "step": 45680 }, { "epoch": 0.79, "learning_rate": 6.364664210570973e-05, "loss": 3.0948, "step": 45685 }, { "epoch": 0.79, "learning_rate": 6.359598607132644e-05, "loss": 3.0709, "step": 45690 }, { "epoch": 0.79, "learning_rate": 6.35453478133134e-05, "loss": 3.043, "step": 45695 }, { "epoch": 0.79, "learning_rate": 6.349472733547844e-05, "loss": 3.1692, "step": 45700 }, { "epoch": 0.79, "learning_rate": 6.344412464162795e-05, "loss": 3.247, "step": 45705 }, { "epoch": 0.79, "learning_rate": 6.33935397355669e-05, "loss": 3.1907, "step": 45710 }, { "epoch": 0.79, "learning_rate": 6.334297262109912e-05, "loss": 3.3283, "step": 45715 }, { "epoch": 0.79, "learning_rate": 6.329242330202686e-05, "loss": 3.2082, "step": 45720 }, { "epoch": 0.79, "learning_rate": 6.324189178215133e-05, "loss": 3.1577, "step": 45725 }, { "epoch": 0.79, "learning_rate": 6.319137806527219e-05, "loss": 3.096, "step": 45730 }, { "epoch": 0.79, "learning_rate": 6.314088215518776e-05, "loss": 3.1203, "step": 45735 }, { "epoch": 0.79, "learning_rate": 6.309040405569515e-05, "loss": 3.082, "step": 45740 }, { "epoch": 0.79, "learning_rate": 6.303994377058999e-05, "loss": 3.1651, "step": 45745 }, { "epoch": 0.79, "learning_rate": 6.298950130366659e-05, "loss": 3.2569, "step": 45750 }, { "epoch": 0.79, "learning_rate": 6.293907665871808e-05, "loss": 3.0798, "step": 45755 }, { "epoch": 0.79, "learning_rate": 6.288866983953612e-05, "loss": 3.2079, "step": 45760 }, { "epoch": 0.79, "learning_rate": 6.283828084991102e-05, "loss": 3.2477, "step": 45765 }, { "epoch": 0.79, "learning_rate": 6.278790969363175e-05, "loss": 3.2101, "step": 45770 }, { "epoch": 0.79, "learning_rate": 6.27375563744859e-05, "loss": 3.2387, "step": 45775 }, { "epoch": 0.79, "learning_rate": 6.268722089625992e-05, "loss": 3.1574, "step": 45780 }, { "epoch": 0.79, "learning_rate": 6.263690326273874e-05, "loss": 3.1126, "step": 45785 }, { "epoch": 0.79, "learning_rate": 6.258660347770597e-05, "loss": 3.2076, "step": 45790 }, { "epoch": 0.79, "learning_rate": 6.253632154494388e-05, "loss": 3.1809, "step": 45795 }, { "epoch": 0.79, "learning_rate": 6.248605746823339e-05, "loss": 3.131, "step": 45800 }, { "epoch": 0.79, "learning_rate": 6.243581125135417e-05, "loss": 3.1605, "step": 45805 }, { "epoch": 0.79, "learning_rate": 6.238558289808456e-05, "loss": 3.1935, "step": 45810 }, { "epoch": 0.79, "learning_rate": 6.233537241220117e-05, "loss": 3.1822, "step": 45815 }, { "epoch": 0.79, "learning_rate": 6.228517979747987e-05, "loss": 3.1973, "step": 45820 }, { "epoch": 0.79, "learning_rate": 6.223500505769476e-05, "loss": 3.0712, "step": 45825 }, { "epoch": 0.79, "learning_rate": 6.218484819661864e-05, "loss": 3.2074, "step": 45830 }, { "epoch": 0.79, "learning_rate": 6.213470921802335e-05, "loss": 3.1901, "step": 45835 }, { "epoch": 0.79, "learning_rate": 6.208458812567873e-05, "loss": 3.1254, "step": 45840 }, { "epoch": 0.79, "learning_rate": 6.203448492335385e-05, "loss": 3.2218, "step": 45845 }, { "epoch": 0.79, "learning_rate": 6.198439961481615e-05, "loss": 3.194, "step": 45850 }, { "epoch": 0.79, "learning_rate": 6.193433220383174e-05, "loss": 3.1396, "step": 45855 }, { "epoch": 0.79, "learning_rate": 6.188428269416561e-05, "loss": 3.2048, "step": 45860 }, { "epoch": 0.79, "learning_rate": 6.183425108958094e-05, "loss": 3.085, "step": 45865 }, { "epoch": 0.79, "learning_rate": 6.178423739384006e-05, "loss": 3.1692, "step": 45870 }, { "epoch": 0.79, "learning_rate": 6.173424161070372e-05, "loss": 3.1756, "step": 45875 }, { "epoch": 0.79, "learning_rate": 6.168426374393125e-05, "loss": 3.1431, "step": 45880 }, { "epoch": 0.79, "learning_rate": 6.163430379728086e-05, "loss": 3.2415, "step": 45885 }, { "epoch": 0.79, "learning_rate": 6.158436177450931e-05, "loss": 3.27, "step": 45890 }, { "epoch": 0.79, "learning_rate": 6.153443767937175e-05, "loss": 3.0112, "step": 45895 }, { "epoch": 0.79, "learning_rate": 6.148453151562243e-05, "loss": 3.1826, "step": 45900 }, { "epoch": 0.79, "learning_rate": 6.143464328701394e-05, "loss": 3.2231, "step": 45905 }, { "epoch": 0.79, "learning_rate": 6.13847729972976e-05, "loss": 3.1354, "step": 45910 }, { "epoch": 0.79, "learning_rate": 6.133492065022358e-05, "loss": 3.0341, "step": 45915 }, { "epoch": 0.79, "learning_rate": 6.128508624954025e-05, "loss": 3.1803, "step": 45920 }, { "epoch": 0.79, "learning_rate": 6.12352697989951e-05, "loss": 3.0783, "step": 45925 }, { "epoch": 0.79, "learning_rate": 6.118547130233401e-05, "loss": 3.1693, "step": 45930 }, { "epoch": 0.79, "learning_rate": 6.113569076330152e-05, "loss": 3.1872, "step": 45935 }, { "epoch": 0.8, "learning_rate": 6.108592818564105e-05, "loss": 3.1872, "step": 45940 }, { "epoch": 0.8, "learning_rate": 6.103618357309423e-05, "loss": 3.1362, "step": 45945 }, { "epoch": 0.8, "learning_rate": 6.0986456929401795e-05, "loss": 3.0724, "step": 45950 }, { "epoch": 0.8, "learning_rate": 6.09367482583029e-05, "loss": 3.2822, "step": 45955 }, { "epoch": 0.8, "learning_rate": 6.0887057563535266e-05, "loss": 3.1595, "step": 45960 }, { "epoch": 0.8, "learning_rate": 6.0837384848835634e-05, "loss": 3.0659, "step": 45965 }, { "epoch": 0.8, "learning_rate": 6.0787730117938885e-05, "loss": 3.192, "step": 45970 }, { "epoch": 0.8, "learning_rate": 6.073809337457879e-05, "loss": 3.1099, "step": 45975 }, { "epoch": 0.8, "learning_rate": 6.068847462248797e-05, "loss": 3.1255, "step": 45980 }, { "epoch": 0.8, "learning_rate": 6.063887386539741e-05, "loss": 3.2569, "step": 45985 }, { "epoch": 0.8, "learning_rate": 6.058929110703685e-05, "loss": 3.1493, "step": 45990 }, { "epoch": 0.8, "learning_rate": 6.0539726351134585e-05, "loss": 3.1143, "step": 45995 }, { "epoch": 0.8, "learning_rate": 6.049017960141767e-05, "loss": 3.1716, "step": 46000 }, { "epoch": 0.8, "eval_loss": 3.1786859035491943, "eval_runtime": 150.5699, "eval_samples_per_second": 12.227, "eval_steps_per_second": 0.77, "step": 46000 }, { "epoch": 0.8, "learning_rate": 6.04406508616118e-05, "loss": 3.0864, "step": 46005 }, { "epoch": 0.8, "learning_rate": 6.0391140135441295e-05, "loss": 3.2294, "step": 46010 }, { "epoch": 0.8, "learning_rate": 6.0341647426629056e-05, "loss": 3.2553, "step": 46015 }, { "epoch": 0.8, "learning_rate": 6.029217273889672e-05, "loss": 3.1414, "step": 46020 }, { "epoch": 0.8, "learning_rate": 6.0242716075964424e-05, "loss": 3.1521, "step": 46025 }, { "epoch": 0.8, "learning_rate": 6.0193277441551244e-05, "loss": 3.2215, "step": 46030 }, { "epoch": 0.8, "learning_rate": 6.014385683937457e-05, "loss": 3.1964, "step": 46035 }, { "epoch": 0.8, "learning_rate": 6.009445427315066e-05, "loss": 2.9809, "step": 46040 }, { "epoch": 0.8, "learning_rate": 6.0045069746594275e-05, "loss": 3.1355, "step": 46045 }, { "epoch": 0.8, "learning_rate": 5.999570326341888e-05, "loss": 3.1077, "step": 46050 }, { "epoch": 0.8, "learning_rate": 5.994635482733654e-05, "loss": 3.167, "step": 46055 }, { "epoch": 0.8, "learning_rate": 5.989702444205812e-05, "loss": 3.2413, "step": 46060 }, { "epoch": 0.8, "learning_rate": 5.9847712111292946e-05, "loss": 3.1135, "step": 46065 }, { "epoch": 0.8, "learning_rate": 5.9798417838749094e-05, "loss": 3.2011, "step": 46070 }, { "epoch": 0.8, "learning_rate": 5.974914162813319e-05, "loss": 3.1403, "step": 46075 }, { "epoch": 0.8, "learning_rate": 5.969988348315048e-05, "loss": 3.1994, "step": 46080 }, { "epoch": 0.8, "learning_rate": 5.965064340750513e-05, "loss": 3.1759, "step": 46085 }, { "epoch": 0.8, "learning_rate": 5.960142140489967e-05, "loss": 3.1331, "step": 46090 }, { "epoch": 0.8, "learning_rate": 5.955221747903512e-05, "loss": 3.2708, "step": 46095 }, { "epoch": 0.8, "learning_rate": 5.9503031633611654e-05, "loss": 3.103, "step": 46100 }, { "epoch": 0.8, "learning_rate": 5.94538638723276e-05, "loss": 3.1251, "step": 46105 }, { "epoch": 0.8, "learning_rate": 5.9404714198880275e-05, "loss": 3.1968, "step": 46110 }, { "epoch": 0.8, "learning_rate": 5.9355582616965425e-05, "loss": 3.0474, "step": 46115 }, { "epoch": 0.8, "learning_rate": 5.9306469130277456e-05, "loss": 3.168, "step": 46120 }, { "epoch": 0.8, "learning_rate": 5.925737374250949e-05, "loss": 3.1837, "step": 46125 }, { "epoch": 0.8, "learning_rate": 5.920829645735322e-05, "loss": 3.1468, "step": 46130 }, { "epoch": 0.8, "learning_rate": 5.9159237278498947e-05, "loss": 3.0686, "step": 46135 }, { "epoch": 0.8, "learning_rate": 5.911019620963582e-05, "loss": 3.1152, "step": 46140 }, { "epoch": 0.8, "learning_rate": 5.906117325445141e-05, "loss": 3.2782, "step": 46145 }, { "epoch": 0.8, "learning_rate": 5.901216841663194e-05, "loss": 3.089, "step": 46150 }, { "epoch": 0.8, "learning_rate": 5.89631816998624e-05, "loss": 3.1593, "step": 46155 }, { "epoch": 0.8, "learning_rate": 5.891421310782621e-05, "loss": 3.2204, "step": 46160 }, { "epoch": 0.8, "learning_rate": 5.886526264420573e-05, "loss": 3.1318, "step": 46165 }, { "epoch": 0.8, "learning_rate": 5.881633031268179e-05, "loss": 3.1237, "step": 46170 }, { "epoch": 0.8, "learning_rate": 5.876741611693361e-05, "loss": 3.0531, "step": 46175 }, { "epoch": 0.8, "learning_rate": 5.8718520060639494e-05, "loss": 3.1773, "step": 46180 }, { "epoch": 0.8, "learning_rate": 5.866964214747615e-05, "loss": 3.1155, "step": 46185 }, { "epoch": 0.8, "learning_rate": 5.8620782381118826e-05, "loss": 3.1278, "step": 46190 }, { "epoch": 0.8, "learning_rate": 5.8571940765241755e-05, "loss": 3.0726, "step": 46195 }, { "epoch": 0.8, "learning_rate": 5.85231173035173e-05, "loss": 3.2045, "step": 46200 }, { "epoch": 0.8, "learning_rate": 5.8474311999616954e-05, "loss": 3.1445, "step": 46205 }, { "epoch": 0.8, "learning_rate": 5.842552485721055e-05, "loss": 3.2156, "step": 46210 }, { "epoch": 0.8, "learning_rate": 5.8376755879966554e-05, "loss": 3.1789, "step": 46215 }, { "epoch": 0.8, "learning_rate": 5.8328005071552364e-05, "loss": 3.1581, "step": 46220 }, { "epoch": 0.8, "learning_rate": 5.827927243563348e-05, "loss": 3.2274, "step": 46225 }, { "epoch": 0.8, "learning_rate": 5.823055797587458e-05, "loss": 3.0204, "step": 46230 }, { "epoch": 0.8, "learning_rate": 5.8181861695938695e-05, "loss": 3.1658, "step": 46235 }, { "epoch": 0.8, "learning_rate": 5.813318359948742e-05, "loss": 3.0678, "step": 46240 }, { "epoch": 0.8, "learning_rate": 5.8084523690181336e-05, "loss": 3.1534, "step": 46245 }, { "epoch": 0.8, "learning_rate": 5.803588197167922e-05, "loss": 3.1554, "step": 46250 }, { "epoch": 0.8, "learning_rate": 5.798725844763864e-05, "loss": 3.1348, "step": 46255 }, { "epoch": 0.8, "learning_rate": 5.793865312171604e-05, "loss": 3.1477, "step": 46260 }, { "epoch": 0.8, "learning_rate": 5.7890065997566126e-05, "loss": 3.2401, "step": 46265 }, { "epoch": 0.8, "learning_rate": 5.784149707884248e-05, "loss": 3.2408, "step": 46270 }, { "epoch": 0.8, "learning_rate": 5.779294636919721e-05, "loss": 3.1458, "step": 46275 }, { "epoch": 0.8, "learning_rate": 5.774441387228104e-05, "loss": 3.0876, "step": 46280 }, { "epoch": 0.8, "learning_rate": 5.769589959174343e-05, "loss": 3.081, "step": 46285 }, { "epoch": 0.8, "learning_rate": 5.7647403531232415e-05, "loss": 3.1654, "step": 46290 }, { "epoch": 0.8, "learning_rate": 5.7598925694394605e-05, "loss": 3.1191, "step": 46295 }, { "epoch": 0.8, "learning_rate": 5.755046608487528e-05, "loss": 3.1663, "step": 46300 }, { "epoch": 0.8, "learning_rate": 5.750202470631833e-05, "loss": 3.1126, "step": 46305 }, { "epoch": 0.8, "learning_rate": 5.74536015623664e-05, "loss": 3.1349, "step": 46310 }, { "epoch": 0.8, "learning_rate": 5.740519665666059e-05, "loss": 3.1206, "step": 46315 }, { "epoch": 0.8, "learning_rate": 5.7356809992840715e-05, "loss": 3.078, "step": 46320 }, { "epoch": 0.8, "learning_rate": 5.730844157454523e-05, "loss": 3.0895, "step": 46325 }, { "epoch": 0.8, "learning_rate": 5.7260091405411124e-05, "loss": 3.2058, "step": 46330 }, { "epoch": 0.8, "learning_rate": 5.721175948907406e-05, "loss": 3.2039, "step": 46335 }, { "epoch": 0.8, "learning_rate": 5.71634458291685e-05, "loss": 3.1625, "step": 46340 }, { "epoch": 0.8, "learning_rate": 5.71151504293273e-05, "loss": 3.1704, "step": 46345 }, { "epoch": 0.8, "learning_rate": 5.706687329318202e-05, "loss": 3.112, "step": 46350 }, { "epoch": 0.8, "learning_rate": 5.701861442436285e-05, "loss": 3.1327, "step": 46355 }, { "epoch": 0.8, "learning_rate": 5.6970373826498536e-05, "loss": 3.1424, "step": 46360 }, { "epoch": 0.8, "learning_rate": 5.692215150321667e-05, "loss": 3.1835, "step": 46365 }, { "epoch": 0.8, "learning_rate": 5.6873947458143255e-05, "loss": 3.0898, "step": 46370 }, { "epoch": 0.8, "learning_rate": 5.682576169490302e-05, "loss": 3.0496, "step": 46375 }, { "epoch": 0.8, "learning_rate": 5.6777594217119224e-05, "loss": 3.1112, "step": 46380 }, { "epoch": 0.8, "learning_rate": 5.672944502841379e-05, "loss": 3.201, "step": 46385 }, { "epoch": 0.8, "learning_rate": 5.668131413240744e-05, "loss": 3.0926, "step": 46390 }, { "epoch": 0.8, "learning_rate": 5.6633201532719256e-05, "loss": 3.1919, "step": 46395 }, { "epoch": 0.8, "learning_rate": 5.6585107232967095e-05, "loss": 3.2124, "step": 46400 }, { "epoch": 0.8, "learning_rate": 5.653703123676738e-05, "loss": 3.2094, "step": 46405 }, { "epoch": 0.8, "learning_rate": 5.6488973547735184e-05, "loss": 3.0477, "step": 46410 }, { "epoch": 0.8, "learning_rate": 5.6440934169484106e-05, "loss": 3.1221, "step": 46415 }, { "epoch": 0.8, "learning_rate": 5.639291310562667e-05, "loss": 3.0849, "step": 46420 }, { "epoch": 0.8, "learning_rate": 5.634491035977371e-05, "loss": 3.053, "step": 46425 }, { "epoch": 0.8, "learning_rate": 5.6296925935534746e-05, "loss": 3.1511, "step": 46430 }, { "epoch": 0.8, "learning_rate": 5.624895983651801e-05, "loss": 3.1053, "step": 46435 }, { "epoch": 0.8, "learning_rate": 5.620101206633021e-05, "loss": 3.2083, "step": 46440 }, { "epoch": 0.8, "learning_rate": 5.6153082628576937e-05, "loss": 3.1903, "step": 46445 }, { "epoch": 0.8, "learning_rate": 5.610517152686212e-05, "loss": 3.1463, "step": 46450 }, { "epoch": 0.8, "learning_rate": 5.605727876478852e-05, "loss": 3.1161, "step": 46455 }, { "epoch": 0.8, "learning_rate": 5.600940434595733e-05, "loss": 3.1253, "step": 46460 }, { "epoch": 0.8, "learning_rate": 5.596154827396842e-05, "loss": 3.2649, "step": 46465 }, { "epoch": 0.8, "learning_rate": 5.591371055242049e-05, "loss": 3.1203, "step": 46470 }, { "epoch": 0.8, "learning_rate": 5.5865891184910706e-05, "loss": 3.1973, "step": 46475 }, { "epoch": 0.8, "learning_rate": 5.581809017503457e-05, "loss": 3.196, "step": 46480 }, { "epoch": 0.8, "learning_rate": 5.5770307526386703e-05, "loss": 3.0937, "step": 46485 }, { "epoch": 0.8, "learning_rate": 5.572254324256004e-05, "loss": 3.1002, "step": 46490 }, { "epoch": 0.8, "learning_rate": 5.567479732714616e-05, "loss": 3.2265, "step": 46495 }, { "epoch": 0.8, "learning_rate": 5.5627069783735534e-05, "loss": 3.0773, "step": 46500 }, { "epoch": 0.8, "eval_loss": 3.175149440765381, "eval_runtime": 150.553, "eval_samples_per_second": 12.228, "eval_steps_per_second": 0.77, "step": 46500 } ], "logging_steps": 5, "max_steps": 57783, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.3814529059671327e+18, "trial_name": null, "trial_params": null }