diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,56563 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8047349566481491, + "eval_steps": 500, + "global_step": 46500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.999999999999999e-06, + "loss": 11.0832, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.1999999999999999e-05, + "loss": 10.9995, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.7999999999999997e-05, + "loss": 10.8287, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 2.3999999999999997e-05, + "loss": 10.5347, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 2.9999999999999997e-05, + "loss": 10.1317, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 3.5999999999999994e-05, + "loss": 9.7568, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.2e-05, + "loss": 9.4066, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 4.7999999999999994e-05, + "loss": 9.0828, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 5.399999999999999e-05, + "loss": 8.7974, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 5.9999999999999995e-05, + "loss": 8.4785, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 6.599999999999999e-05, + "loss": 8.2844, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 7.199999999999999e-05, + "loss": 8.1159, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 7.8e-05, + "loss": 8.04, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 8.4e-05, + "loss": 8.0479, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 8.999999999999999e-05, + "loss": 7.785, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 9.599999999999999e-05, + "loss": 7.8489, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 0.000102, + "loss": 7.7499, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010799999999999998, + "loss": 7.596, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011399999999999999, + "loss": 7.6188, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011999999999999999, + "loss": 7.5564, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012599999999999997, + "loss": 7.5576, + "step": 105 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013199999999999998, + "loss": 7.5225, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 0.000138, + "loss": 7.4858, + "step": 115 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014399999999999998, + "loss": 7.2874, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015, + "loss": 7.3418, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 0.000156, + "loss": 7.2647, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 0.000162, + "loss": 7.1983, + "step": 135 + }, + { + "epoch": 0.0, + "learning_rate": 0.000168, + "loss": 7.0973, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017399999999999997, + "loss": 7.1693, + "step": 145 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017999999999999998, + "loss": 7.1801, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 0.000186, + "loss": 7.0995, + "step": 155 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019199999999999998, + "loss": 6.9925, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 0.000198, + "loss": 6.9763, + "step": 165 + }, + { + "epoch": 0.0, + "learning_rate": 0.000204, + "loss": 6.9261, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 0.00020999999999999998, + "loss": 6.9174, + "step": 175 + }, + { + "epoch": 0.0, + "learning_rate": 0.00021599999999999996, + "loss": 6.9063, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 0.00022199999999999998, + "loss": 6.8622, + "step": 185 + }, + { + "epoch": 0.0, + "learning_rate": 0.00022799999999999999, + "loss": 6.897, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 0.000234, + "loss": 6.8112, + "step": 195 + }, + { + "epoch": 0.0, + "learning_rate": 0.00023999999999999998, + "loss": 6.7515, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 0.00024599999999999996, + "loss": 6.7586, + "step": 205 + }, + { + "epoch": 0.0, + "learning_rate": 0.00025199999999999995, + "loss": 6.5961, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 0.000258, + "loss": 6.6598, + "step": 215 + }, + { + "epoch": 0.0, + "learning_rate": 0.00026399999999999997, + "loss": 6.6071, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 0.00027, + "loss": 6.5918, + "step": 225 + }, + { + "epoch": 0.0, + "learning_rate": 0.000276, + "loss": 6.539, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 0.00028199999999999997, + "loss": 6.4539, + "step": 235 + }, + { + "epoch": 0.0, + "learning_rate": 0.00028799999999999995, + "loss": 6.4418, + "step": 240 + }, + { + "epoch": 0.0, + "learning_rate": 0.000294, + "loss": 6.5303, + "step": 245 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003, + "loss": 6.4021, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 0.00030599999999999996, + "loss": 6.4768, + "step": 255 + }, + { + "epoch": 0.0, + "learning_rate": 0.000312, + "loss": 6.3929, + "step": 260 + }, + { + "epoch": 0.0, + "learning_rate": 0.000318, + "loss": 6.3891, + "step": 265 + }, + { + "epoch": 0.0, + "learning_rate": 0.000324, + "loss": 6.4381, + "step": 270 + }, + { + "epoch": 0.0, + "learning_rate": 0.00033, + "loss": 6.3544, + "step": 275 + }, + { + "epoch": 0.0, + "learning_rate": 0.000336, + "loss": 6.355, + "step": 280 + }, + { + "epoch": 0.0, + "learning_rate": 0.00034199999999999996, + "loss": 6.3141, + "step": 285 + }, + { + "epoch": 0.01, + "learning_rate": 0.00034799999999999995, + "loss": 6.2884, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035399999999999993, + "loss": 6.3167, + "step": 295 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035999999999999997, + "loss": 6.2576, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 0.00036599999999999995, + "loss": 6.2405, + "step": 305 + }, + { + "epoch": 0.01, + "learning_rate": 0.000372, + "loss": 6.2546, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037799999999999997, + "loss": 6.1438, + "step": 315 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038399999999999996, + "loss": 6.1532, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039, + "loss": 6.1473, + "step": 325 + }, + { + "epoch": 0.01, + "learning_rate": 0.000396, + "loss": 6.2154, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 0.000402, + "loss": 6.0882, + "step": 335 + }, + { + "epoch": 0.01, + "learning_rate": 0.000408, + "loss": 6.2324, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004139999999999999, + "loss": 6.2041, + "step": 345 + }, + { + "epoch": 0.01, + "learning_rate": 0.00041999999999999996, + "loss": 6.0757, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 0.00042599999999999995, + "loss": 6.0776, + "step": 355 + }, + { + "epoch": 0.01, + "learning_rate": 0.00043199999999999993, + "loss": 6.032, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 0.00043799999999999997, + "loss": 6.1352, + "step": 365 + }, + { + "epoch": 0.01, + "learning_rate": 0.00044399999999999995, + "loss": 5.9888, + "step": 370 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045, + "loss": 6.0221, + "step": 375 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045599999999999997, + "loss": 6.0419, + "step": 380 + }, + { + "epoch": 0.01, + "learning_rate": 0.00046199999999999995, + "loss": 5.9985, + "step": 385 + }, + { + "epoch": 0.01, + "learning_rate": 0.000468, + "loss": 5.8769, + "step": 390 + }, + { + "epoch": 0.01, + "learning_rate": 0.000474, + "loss": 5.9902, + "step": 395 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047999999999999996, + "loss": 5.9365, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 0.000486, + "loss": 5.895, + "step": 405 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004919999999999999, + "loss": 5.9953, + "step": 410 + }, + { + "epoch": 0.01, + "learning_rate": 0.000498, + "loss": 5.8948, + "step": 415 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005039999999999999, + "loss": 5.9516, + "step": 420 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005099999999999999, + "loss": 5.8119, + "step": 425 + }, + { + "epoch": 0.01, + "learning_rate": 0.000516, + "loss": 5.8849, + "step": 430 + }, + { + "epoch": 0.01, + "learning_rate": 0.000522, + "loss": 5.8657, + "step": 435 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005279999999999999, + "loss": 5.7593, + "step": 440 + }, + { + "epoch": 0.01, + "learning_rate": 0.000534, + "loss": 5.8779, + "step": 445 + }, + { + "epoch": 0.01, + "learning_rate": 0.00054, + "loss": 5.8816, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005459999999999999, + "loss": 5.813, + "step": 455 + }, + { + "epoch": 0.01, + "learning_rate": 0.000552, + "loss": 5.8118, + "step": 460 + }, + { + "epoch": 0.01, + "learning_rate": 0.000558, + "loss": 5.7594, + "step": 465 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005639999999999999, + "loss": 5.8195, + "step": 470 + }, + { + "epoch": 0.01, + "learning_rate": 0.00057, + "loss": 5.7352, + "step": 475 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005759999999999999, + "loss": 5.6983, + "step": 480 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005819999999999999, + "loss": 5.7638, + "step": 485 + }, + { + "epoch": 0.01, + "learning_rate": 0.000588, + "loss": 5.8061, + "step": 490 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005939999999999999, + "loss": 5.8155, + "step": 495 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006, + "loss": 5.667, + "step": 500 + }, + { + "epoch": 0.01, + "eval_loss": 5.787242889404297, + "eval_runtime": 151.0777, + "eval_samples_per_second": 12.186, + "eval_steps_per_second": 0.768, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999999887207687, + "loss": 5.7718, + "step": 505 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999999548830758, + "loss": 5.673, + "step": 510 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999998984869237, + "loss": 5.6713, + "step": 515 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999998195323167, + "loss": 5.6457, + "step": 520 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999997180192608, + "loss": 5.7369, + "step": 525 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999995939477636, + "loss": 5.7537, + "step": 530 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999994473178343, + "loss": 5.721, + "step": 535 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999992781294842, + "loss": 5.6126, + "step": 540 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999990863827257, + "loss": 5.5628, + "step": 545 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999988720775734, + "loss": 5.5213, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999986352140434, + "loss": 5.5386, + "step": 555 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999983757921536, + "loss": 5.6003, + "step": 560 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999980938119233, + "loss": 5.5606, + "step": 565 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999977892733739, + "loss": 5.6028, + "step": 570 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999974621765282, + "loss": 5.6032, + "step": 575 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999971125214107, + "loss": 5.5227, + "step": 580 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999967403080478, + "loss": 5.5631, + "step": 585 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999963455364676, + "loss": 5.5767, + "step": 590 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999959282066996, + "loss": 5.5166, + "step": 595 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999954883187754, + "loss": 5.5309, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999950258727278, + "loss": 5.4728, + "step": 605 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999945408685917, + "loss": 5.4537, + "step": 610 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999940333064035, + "loss": 5.4461, + "step": 615 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999935031862015, + "loss": 5.4692, + "step": 620 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999929505080256, + "loss": 5.4129, + "step": 625 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999923752719172, + "loss": 5.4537, + "step": 630 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999917774779196, + "loss": 5.3996, + "step": 635 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999911571260778, + "loss": 5.4168, + "step": 640 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999905142164384, + "loss": 5.4616, + "step": 645 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999898487490498, + "loss": 5.4191, + "step": 650 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999891607239619, + "loss": 5.3498, + "step": 655 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999884501412266, + "loss": 5.5168, + "step": 660 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999877170008972, + "loss": 5.4529, + "step": 665 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999869613030289, + "loss": 5.4247, + "step": 670 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999861830476786, + "loss": 5.3704, + "step": 675 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999853822349047, + "loss": 5.314, + "step": 680 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999845588647674, + "loss": 5.263, + "step": 685 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999837129373288, + "loss": 5.3963, + "step": 690 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999828444526523, + "loss": 5.3916, + "step": 695 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999819534108033, + "loss": 5.3184, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999810398118487, + "loss": 5.4009, + "step": 705 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999801036558574, + "loss": 5.3957, + "step": 710 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999791449428996, + "loss": 5.2775, + "step": 715 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999781636730476, + "loss": 5.2826, + "step": 720 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999771598463749, + "loss": 5.2916, + "step": 725 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999761334629572, + "loss": 5.3436, + "step": 730 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999750845228717, + "loss": 5.3674, + "step": 735 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999740130261971, + "loss": 5.2603, + "step": 740 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999729189730139, + "loss": 5.2142, + "step": 745 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999718023634048, + "loss": 5.3698, + "step": 750 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999706631974534, + "loss": 5.2052, + "step": 755 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999695014752454, + "loss": 5.2727, + "step": 760 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999683171968683, + "loss": 5.2406, + "step": 765 + }, + { + "epoch": 0.01, + "learning_rate": 0.000599967110362411, + "loss": 5.2282, + "step": 770 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999658809719644, + "loss": 5.1632, + "step": 775 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999646290256208, + "loss": 5.1701, + "step": 780 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999633545234743, + "loss": 5.1785, + "step": 785 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999620574656209, + "loss": 5.2017, + "step": 790 + }, + { + "epoch": 0.01, + "learning_rate": 0.000599960737852158, + "loss": 5.2983, + "step": 795 + }, + { + "epoch": 0.01, + "learning_rate": 0.000599959395683185, + "loss": 5.1982, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999580309588026, + "loss": 5.1416, + "step": 805 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999566436791137, + "loss": 5.1912, + "step": 810 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999552338442223, + "loss": 5.2655, + "step": 815 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999538014542347, + "loss": 5.188, + "step": 820 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999523465092583, + "loss": 5.1928, + "step": 825 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999508690094028, + "loss": 5.1108, + "step": 830 + }, + { + "epoch": 0.01, + "learning_rate": 0.000599949368954779, + "loss": 5.1586, + "step": 835 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999478463455001, + "loss": 5.1363, + "step": 840 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999463011816801, + "loss": 5.142, + "step": 845 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999447334634358, + "loss": 5.1496, + "step": 850 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999431431908844, + "loss": 5.052, + "step": 855 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999415303641459, + "loss": 5.0762, + "step": 860 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005999398949833416, + "loss": 5.1514, + "step": 865 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999382370485942, + "loss": 5.1176, + "step": 870 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999365565600288, + "loss": 5.1016, + "step": 875 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999348535177713, + "loss": 5.1446, + "step": 880 + }, + { + "epoch": 0.02, + "learning_rate": 0.00059993312792195, + "loss": 4.9957, + "step": 885 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999313797726947, + "loss": 5.1482, + "step": 890 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999296090701366, + "loss": 5.0768, + "step": 895 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999278158144091, + "loss": 5.2004, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999260000056469, + "loss": 5.0756, + "step": 905 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999241616439867, + "loss": 5.1679, + "step": 910 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999223007295666, + "loss": 5.0844, + "step": 915 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999204172625265, + "loss": 5.1273, + "step": 920 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999185112430082, + "loss": 5.0067, + "step": 925 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999165826711547, + "loss": 5.0033, + "step": 930 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999146315471115, + "loss": 5.0368, + "step": 935 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999126578710249, + "loss": 5.0261, + "step": 940 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999106616430435, + "loss": 5.0727, + "step": 945 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999086428633173, + "loss": 5.0198, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999066015319981, + "loss": 5.0701, + "step": 955 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999045376492396, + "loss": 5.0427, + "step": 960 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999024512151967, + "loss": 5.1675, + "step": 965 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005999003422300267, + "loss": 5.0997, + "step": 970 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998982106938878, + "loss": 5.1069, + "step": 975 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998960566069404, + "loss": 5.0461, + "step": 980 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998938799693464, + "loss": 5.0177, + "step": 985 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998916807812696, + "loss": 4.9937, + "step": 990 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998894590428755, + "loss": 5.0079, + "step": 995 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998872147543309, + "loss": 4.9904, + "step": 1000 + }, + { + "epoch": 0.02, + "eval_loss": 5.063974857330322, + "eval_runtime": 150.7726, + "eval_samples_per_second": 12.21, + "eval_steps_per_second": 0.769, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998849479158045, + "loss": 5.0673, + "step": 1005 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998826585274671, + "loss": 4.9611, + "step": 1010 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998803465894905, + "loss": 5.0122, + "step": 1015 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998780121020487, + "loss": 5.0227, + "step": 1020 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998756550653174, + "loss": 4.9941, + "step": 1025 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998732754794736, + "loss": 5.0426, + "step": 1030 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998708733446962, + "loss": 4.9343, + "step": 1035 + }, + { + "epoch": 0.02, + "learning_rate": 0.000599868448661166, + "loss": 5.0083, + "step": 1040 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998660014290653, + "loss": 5.0275, + "step": 1045 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998635316485781, + "loss": 5.0044, + "step": 1050 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998610393198902, + "loss": 5.0977, + "step": 1055 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998585244431887, + "loss": 4.9482, + "step": 1060 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998559870186629, + "loss": 5.007, + "step": 1065 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998534270465038, + "loss": 4.9826, + "step": 1070 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998508445269035, + "loss": 4.9924, + "step": 1075 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998482394600566, + "loss": 4.9346, + "step": 1080 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998456118461587, + "loss": 4.9776, + "step": 1085 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998429616854074, + "loss": 5.0336, + "step": 1090 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998402889780021, + "loss": 4.9965, + "step": 1095 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998375937241438, + "loss": 4.9999, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998348759240351, + "loss": 4.9897, + "step": 1105 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998321355778803, + "loss": 4.904, + "step": 1110 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998293726858855, + "loss": 5.0172, + "step": 1115 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998265872482585, + "loss": 4.9544, + "step": 1120 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998237792652088, + "loss": 4.9249, + "step": 1125 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998209487369473, + "loss": 5.0203, + "step": 1130 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998180956636872, + "loss": 4.9552, + "step": 1135 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998152200456427, + "loss": 4.9835, + "step": 1140 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998123218830302, + "loss": 4.9234, + "step": 1145 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998094011760677, + "loss": 4.9115, + "step": 1150 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998064579249748, + "loss": 4.914, + "step": 1155 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998034921299726, + "loss": 4.9211, + "step": 1160 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005998005037912842, + "loss": 4.9112, + "step": 1165 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997974929091345, + "loss": 4.9139, + "step": 1170 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997944594837497, + "loss": 4.9096, + "step": 1175 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997914035153581, + "loss": 4.8065, + "step": 1180 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997883250041892, + "loss": 4.9562, + "step": 1185 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997852239504747, + "loss": 4.939, + "step": 1190 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997821003544478, + "loss": 4.8459, + "step": 1195 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997789542163432, + "loss": 4.8906, + "step": 1200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997757855363976, + "loss": 4.8547, + "step": 1205 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997725943148494, + "loss": 4.9617, + "step": 1210 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997693805519383, + "loss": 4.8849, + "step": 1215 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997661442479061, + "loss": 4.8716, + "step": 1220 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997628854029961, + "loss": 4.83, + "step": 1225 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997596040174534, + "loss": 4.8464, + "step": 1230 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997563000915248, + "loss": 4.8273, + "step": 1235 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997529736254585, + "loss": 4.8679, + "step": 1240 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997496246195049, + "loss": 4.8066, + "step": 1245 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997462530739157, + "loss": 4.7826, + "step": 1250 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997428589889444, + "loss": 4.8587, + "step": 1255 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997394423648463, + "loss": 4.8974, + "step": 1260 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997360032018783, + "loss": 4.8758, + "step": 1265 + }, + { + "epoch": 0.02, + "learning_rate": 0.000599732541500299, + "loss": 4.8788, + "step": 1270 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997290572603687, + "loss": 4.8967, + "step": 1275 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997255504823492, + "loss": 4.8774, + "step": 1280 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997220211665045, + "loss": 4.8542, + "step": 1285 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997184693130999, + "loss": 4.7275, + "step": 1290 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997148949224023, + "loss": 4.807, + "step": 1295 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997112979946809, + "loss": 4.8433, + "step": 1300 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997076785302057, + "loss": 4.9526, + "step": 1305 + }, + { + "epoch": 0.02, + "learning_rate": 0.000599704036529249, + "loss": 4.8017, + "step": 1310 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005997003719920847, + "loss": 4.8443, + "step": 1315 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996966849189884, + "loss": 4.823, + "step": 1320 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996929753102374, + "loss": 4.8656, + "step": 1325 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996892431661104, + "loss": 4.7528, + "step": 1330 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996854884868884, + "loss": 4.804, + "step": 1335 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996817112728534, + "loss": 4.8494, + "step": 1340 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996779115242896, + "loss": 4.8679, + "step": 1345 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996740892414827, + "loss": 4.7677, + "step": 1350 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996702444247201, + "loss": 4.7851, + "step": 1355 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996663770742908, + "loss": 4.8101, + "step": 1360 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996624871904858, + "loss": 4.8664, + "step": 1365 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996585747735975, + "loss": 4.8919, + "step": 1370 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996546398239201, + "loss": 4.7878, + "step": 1375 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996506823417495, + "loss": 4.8748, + "step": 1380 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996467023273832, + "loss": 4.8132, + "step": 1385 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996426997811207, + "loss": 4.8323, + "step": 1390 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996386747032627, + "loss": 4.7567, + "step": 1395 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996346270941119, + "loss": 4.7993, + "step": 1400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996305569539728, + "loss": 4.7315, + "step": 1405 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996264642831516, + "loss": 4.8493, + "step": 1410 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996223490819556, + "loss": 4.7694, + "step": 1415 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996182113506947, + "loss": 4.7332, + "step": 1420 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996140510896796, + "loss": 4.8223, + "step": 1425 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996098682992235, + "loss": 4.7319, + "step": 1430 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996056629796408, + "loss": 4.8006, + "step": 1435 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005996014351312476, + "loss": 4.844, + "step": 1440 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995971847543621, + "loss": 4.7525, + "step": 1445 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995929118493034, + "loss": 4.7649, + "step": 1450 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995886164163934, + "loss": 4.7703, + "step": 1455 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995842984559547, + "loss": 4.8856, + "step": 1460 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995799579683121, + "loss": 4.7844, + "step": 1465 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995755949537919, + "loss": 4.7843, + "step": 1470 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995712094127224, + "loss": 4.7452, + "step": 1475 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995668013454332, + "loss": 4.7533, + "step": 1480 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995623707522558, + "loss": 4.7856, + "step": 1485 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995579176335233, + "loss": 4.8798, + "step": 1490 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995534419895707, + "loss": 4.7446, + "step": 1495 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995489438207344, + "loss": 4.7522, + "step": 1500 + }, + { + "epoch": 0.03, + "eval_loss": 4.799483776092529, + "eval_runtime": 150.7804, + "eval_samples_per_second": 12.21, + "eval_steps_per_second": 0.769, + "step": 1500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995444231273526, + "loss": 4.6847, + "step": 1505 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995398799097654, + "loss": 4.846, + "step": 1510 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995353141683144, + "loss": 4.7079, + "step": 1515 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995307259033428, + "loss": 4.7785, + "step": 1520 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995261151151957, + "loss": 4.8516, + "step": 1525 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995214818042198, + "loss": 4.7627, + "step": 1530 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995168259707635, + "loss": 4.7492, + "step": 1535 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995121476151769, + "loss": 4.767, + "step": 1540 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995074467378117, + "loss": 4.7328, + "step": 1545 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005995027233390215, + "loss": 4.622, + "step": 1550 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994979774191614, + "loss": 4.7061, + "step": 1555 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994932089785883, + "loss": 4.7718, + "step": 1560 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994884180176608, + "loss": 4.8252, + "step": 1565 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994836045367391, + "loss": 4.7833, + "step": 1570 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994787685361852, + "loss": 4.7973, + "step": 1575 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994739100163627, + "loss": 4.8151, + "step": 1580 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994690289776368, + "loss": 4.6815, + "step": 1585 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994641254203749, + "loss": 4.6592, + "step": 1590 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994591993449455, + "loss": 4.6454, + "step": 1595 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994542507517188, + "loss": 4.7652, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994492796410673, + "loss": 4.6894, + "step": 1605 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994442860133647, + "loss": 4.7193, + "step": 1610 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994392698689862, + "loss": 4.6321, + "step": 1615 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994342312083093, + "loss": 4.7376, + "step": 1620 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994291700317127, + "loss": 4.7212, + "step": 1625 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994240863395772, + "loss": 4.6763, + "step": 1630 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994189801322849, + "loss": 4.6659, + "step": 1635 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994138514102198, + "loss": 4.6895, + "step": 1640 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994087001737674, + "loss": 4.7019, + "step": 1645 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005994035264233154, + "loss": 4.6882, + "step": 1650 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993983301592525, + "loss": 4.724, + "step": 1655 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993931113819697, + "loss": 4.6239, + "step": 1660 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993878700918592, + "loss": 4.5808, + "step": 1665 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993826062893152, + "loss": 4.6235, + "step": 1670 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993773199747335, + "loss": 4.6813, + "step": 1675 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993720111485115, + "loss": 4.6841, + "step": 1680 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993666798110488, + "loss": 4.6773, + "step": 1685 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993613259627458, + "loss": 4.6088, + "step": 1690 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993559496040054, + "loss": 4.592, + "step": 1695 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993505507352317, + "loss": 4.7731, + "step": 1700 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993451293568307, + "loss": 4.7196, + "step": 1705 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993396854692102, + "loss": 4.6916, + "step": 1710 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993342190727793, + "loss": 4.6838, + "step": 1715 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993287301679494, + "loss": 4.6393, + "step": 1720 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993232187551328, + "loss": 4.705, + "step": 1725 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993176848347444, + "loss": 4.6213, + "step": 1730 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993121284071999, + "loss": 4.7283, + "step": 1735 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993065494729173, + "loss": 4.6904, + "step": 1740 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005993009480323162, + "loss": 4.7068, + "step": 1745 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992953240858177, + "loss": 4.6864, + "step": 1750 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992896776338446, + "loss": 4.6971, + "step": 1755 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992840086768216, + "loss": 4.7385, + "step": 1760 + }, + { + "epoch": 0.03, + "learning_rate": 0.000599278317215175, + "loss": 4.6804, + "step": 1765 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992726032493326, + "loss": 4.7191, + "step": 1770 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992668667797244, + "loss": 4.5827, + "step": 1775 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992611078067813, + "loss": 4.7553, + "step": 1780 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992553263309367, + "loss": 4.6555, + "step": 1785 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992495223526253, + "loss": 4.5248, + "step": 1790 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992436958722834, + "loss": 4.4933, + "step": 1795 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992378468903489, + "loss": 4.622, + "step": 1800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992319754072622, + "loss": 4.5995, + "step": 1805 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992260814234644, + "loss": 4.6473, + "step": 1810 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992201649393989, + "loss": 4.6518, + "step": 1815 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992142259555103, + "loss": 4.6183, + "step": 1820 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992082644722454, + "loss": 4.5708, + "step": 1825 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005992022804900525, + "loss": 4.5672, + "step": 1830 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991962740093813, + "loss": 4.5723, + "step": 1835 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991902450306839, + "loss": 4.574, + "step": 1840 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991841935544133, + "loss": 4.6351, + "step": 1845 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991781195810246, + "loss": 4.6235, + "step": 1850 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991720231109745, + "loss": 4.6356, + "step": 1855 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991659041447217, + "loss": 4.6934, + "step": 1860 + }, + { + "epoch": 0.03, + "learning_rate": 0.000599159762682726, + "loss": 4.6972, + "step": 1865 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991535987254494, + "loss": 4.5976, + "step": 1870 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991474122733552, + "loss": 4.5597, + "step": 1875 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991412033269087, + "loss": 4.552, + "step": 1880 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991349718865768, + "loss": 4.5887, + "step": 1885 + }, + { + "epoch": 0.03, + "learning_rate": 0.000599128717952828, + "loss": 4.6184, + "step": 1890 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991224415261328, + "loss": 4.6031, + "step": 1895 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991161426069629, + "loss": 4.6395, + "step": 1900 + }, + { + "epoch": 0.03, + "learning_rate": 0.000599109821195792, + "loss": 4.6755, + "step": 1905 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005991034772930955, + "loss": 4.5333, + "step": 1910 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990971108993503, + "loss": 4.5863, + "step": 1915 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990907220150353, + "loss": 4.5263, + "step": 1920 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990843106406308, + "loss": 4.5766, + "step": 1925 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990778767766188, + "loss": 4.6123, + "step": 1930 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990714204234833, + "loss": 4.5906, + "step": 1935 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990649415817097, + "loss": 4.592, + "step": 1940 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990584402517853, + "loss": 4.5306, + "step": 1945 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990519164341987, + "loss": 4.538, + "step": 1950 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990453701294406, + "loss": 4.7278, + "step": 1955 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990388013380033, + "loss": 4.5994, + "step": 1960 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990322100603806, + "loss": 4.6766, + "step": 1965 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990255962970684, + "loss": 4.5969, + "step": 1970 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990189600485637, + "loss": 4.5924, + "step": 1975 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005990123013153657, + "loss": 4.6688, + "step": 1980 + }, + { + "epoch": 0.03, + "learning_rate": 0.000599005620097975, + "loss": 4.6841, + "step": 1985 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005989989163968941, + "loss": 4.6268, + "step": 1990 + }, + { + "epoch": 0.03, + "learning_rate": 0.000598992190212627, + "loss": 4.6271, + "step": 1995 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005989854415456796, + "loss": 4.5934, + "step": 2000 + }, + { + "epoch": 0.03, + "eval_loss": 4.622138977050781, + "eval_runtime": 150.27, + "eval_samples_per_second": 12.251, + "eval_steps_per_second": 0.772, + "step": 2000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005989786703965592, + "loss": 4.6169, + "step": 2005 + }, + { + "epoch": 0.03, + "learning_rate": 0.000598971876765775, + "loss": 4.5989, + "step": 2010 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005989650606538379, + "loss": 4.5947, + "step": 2015 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005989582220612605, + "loss": 4.6156, + "step": 2020 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005989513609885568, + "loss": 4.6182, + "step": 2025 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005989444774362429, + "loss": 4.6214, + "step": 2030 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005989375714048363, + "loss": 4.6239, + "step": 2035 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005989306428948565, + "loss": 4.5639, + "step": 2040 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005989236919068243, + "loss": 4.5897, + "step": 2045 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005989167184412624, + "loss": 4.6397, + "step": 2050 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005989097224986952, + "loss": 4.6781, + "step": 2055 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005989027040796488, + "loss": 4.6692, + "step": 2060 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988956631846509, + "loss": 4.5066, + "step": 2065 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988885998142308, + "loss": 4.5364, + "step": 2070 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988815139689199, + "loss": 4.5657, + "step": 2075 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988744056492508, + "loss": 4.4681, + "step": 2080 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988672748557583, + "loss": 4.5448, + "step": 2085 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988601215889782, + "loss": 4.5786, + "step": 2090 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988529458494488, + "loss": 4.5426, + "step": 2095 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988457476377092, + "loss": 4.5494, + "step": 2100 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988385269543011, + "loss": 4.5845, + "step": 2105 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988312837997673, + "loss": 4.5108, + "step": 2110 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988240181746524, + "loss": 4.5646, + "step": 2115 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988167300795029, + "loss": 4.5582, + "step": 2120 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988094195148666, + "loss": 4.5773, + "step": 2125 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005988020864812933, + "loss": 4.5119, + "step": 2130 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987947309793344, + "loss": 4.4035, + "step": 2135 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987873530095432, + "loss": 4.4899, + "step": 2140 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987799525724742, + "loss": 4.56, + "step": 2145 + }, + { + "epoch": 0.04, + "learning_rate": 0.000598772529668684, + "loss": 4.4948, + "step": 2150 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987650842987307, + "loss": 4.4912, + "step": 2155 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987576164631743, + "loss": 4.6292, + "step": 2160 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987501261625762, + "loss": 4.563, + "step": 2165 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987426133974997, + "loss": 4.5965, + "step": 2170 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987350781685097, + "loss": 4.546, + "step": 2175 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987275204761729, + "loss": 4.6035, + "step": 2180 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987199403210574, + "loss": 4.6014, + "step": 2185 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987123377037333, + "loss": 4.5493, + "step": 2190 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005987047126247723, + "loss": 4.5345, + "step": 2195 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986970650847478, + "loss": 4.5793, + "step": 2200 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986893950842347, + "loss": 4.5242, + "step": 2205 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986817026238099, + "loss": 4.5865, + "step": 2210 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986739877040518, + "loss": 4.5695, + "step": 2215 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986662503255405, + "loss": 4.5274, + "step": 2220 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986584904888579, + "loss": 4.5146, + "step": 2225 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986507081945872, + "loss": 4.4585, + "step": 2230 + }, + { + "epoch": 0.04, + "learning_rate": 0.000598642903443314, + "loss": 4.5071, + "step": 2235 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986350762356249, + "loss": 4.6897, + "step": 2240 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986272265721086, + "loss": 4.5492, + "step": 2245 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005986193544533553, + "loss": 4.4015, + "step": 2250 + }, + { + "epoch": 0.04, + "learning_rate": 0.000598611459879957, + "loss": 4.5966, + "step": 2255 + }, + { + "epoch": 0.04, + "learning_rate": 0.000598603542852507, + "loss": 4.5583, + "step": 2260 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985956033716011, + "loss": 4.4743, + "step": 2265 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985876414378361, + "loss": 4.557, + "step": 2270 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985796570518108, + "loss": 4.5134, + "step": 2275 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985716502141254, + "loss": 4.5188, + "step": 2280 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985636209253821, + "loss": 4.5209, + "step": 2285 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985555691861845, + "loss": 4.4911, + "step": 2290 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985474949971383, + "loss": 4.5194, + "step": 2295 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985393983588503, + "loss": 4.5059, + "step": 2300 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985312792719297, + "loss": 4.4679, + "step": 2305 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985231377369869, + "loss": 4.5583, + "step": 2310 + }, + { + "epoch": 0.04, + "learning_rate": 0.000598514973754634, + "loss": 4.5782, + "step": 2315 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005985067873254848, + "loss": 4.5104, + "step": 2320 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984985784501551, + "loss": 4.5072, + "step": 2325 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984903471292621, + "loss": 4.435, + "step": 2330 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984820933634247, + "loss": 4.4558, + "step": 2335 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984738171532636, + "loss": 4.5055, + "step": 2340 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984655184994011, + "loss": 4.521, + "step": 2345 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984571974024614, + "loss": 4.4612, + "step": 2350 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984488538630698, + "loss": 4.4435, + "step": 2355 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984404878818539, + "loss": 4.5881, + "step": 2360 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984320994594429, + "loss": 4.5542, + "step": 2365 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984236885964674, + "loss": 4.3867, + "step": 2370 + }, + { + "epoch": 0.04, + "learning_rate": 0.00059841525529356, + "loss": 4.5786, + "step": 2375 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005984067995513547, + "loss": 4.5679, + "step": 2380 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983983213704874, + "loss": 4.4752, + "step": 2385 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983898207515957, + "loss": 4.4446, + "step": 2390 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983812976953185, + "loss": 4.4343, + "step": 2395 + }, + { + "epoch": 0.04, + "learning_rate": 0.000598372752202297, + "loss": 4.3717, + "step": 2400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983641842731736, + "loss": 4.4689, + "step": 2405 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983555939085927, + "loss": 4.4949, + "step": 2410 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983469811092002, + "loss": 4.5136, + "step": 2415 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983383458756437, + "loss": 4.4673, + "step": 2420 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983296882085726, + "loss": 4.521, + "step": 2425 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983210081086378, + "loss": 4.4922, + "step": 2430 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983123055764921, + "loss": 4.5239, + "step": 2435 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005983035806127897, + "loss": 4.4424, + "step": 2440 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982948332181871, + "loss": 4.4227, + "step": 2445 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982860633933415, + "loss": 4.5284, + "step": 2450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982772711389128, + "loss": 4.5241, + "step": 2455 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982684564555619, + "loss": 4.4501, + "step": 2460 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982596193439517, + "loss": 4.4509, + "step": 2465 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982507598047466, + "loss": 4.4052, + "step": 2470 + }, + { + "epoch": 0.04, + "learning_rate": 0.000598241877838613, + "loss": 4.4956, + "step": 2475 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982329734462186, + "loss": 4.3242, + "step": 2480 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982240466282329, + "loss": 4.392, + "step": 2485 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982150973853274, + "loss": 4.5077, + "step": 2490 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005982061257181749, + "loss": 4.4919, + "step": 2495 + }, + { + "epoch": 0.04, + "learning_rate": 0.00059819713162745, + "loss": 4.5178, + "step": 2500 + }, + { + "epoch": 0.04, + "eval_loss": 4.496661186218262, + "eval_runtime": 150.774, + "eval_samples_per_second": 12.21, + "eval_steps_per_second": 0.769, + "step": 2500 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005981881151138291, + "loss": 4.5157, + "step": 2505 + }, + { + "epoch": 0.04, + "learning_rate": 0.00059817907617799, + "loss": 4.4359, + "step": 2510 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005981700148206126, + "loss": 4.4987, + "step": 2515 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005981609310423781, + "loss": 4.3829, + "step": 2520 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005981518248439697, + "loss": 4.4533, + "step": 2525 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005981426962260722, + "loss": 4.4787, + "step": 2530 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005981335451893718, + "loss": 4.4843, + "step": 2535 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005981243717345567, + "loss": 4.4322, + "step": 2540 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005981151758623167, + "loss": 4.504, + "step": 2545 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005981059575733432, + "loss": 4.4743, + "step": 2550 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005980967168683297, + "loss": 4.5483, + "step": 2555 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005980874537479706, + "loss": 4.4412, + "step": 2560 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005980781682129627, + "loss": 4.4774, + "step": 2565 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005980688602640042, + "loss": 4.3842, + "step": 2570 + }, + { + "epoch": 0.04, + "learning_rate": 0.000598059529901795, + "loss": 4.453, + "step": 2575 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005980501771270366, + "loss": 4.3691, + "step": 2580 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005980408019404324, + "loss": 4.4649, + "step": 2585 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005980314043426874, + "loss": 4.4571, + "step": 2590 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005980219843345082, + "loss": 4.2869, + "step": 2595 + }, + { + "epoch": 0.04, + "learning_rate": 0.000598012541916603, + "loss": 4.442, + "step": 2600 + }, + { + "epoch": 0.05, + "learning_rate": 0.000598003077089682, + "loss": 4.4842, + "step": 2605 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005979935898544568, + "loss": 4.4487, + "step": 2610 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005979840802116409, + "loss": 4.4768, + "step": 2615 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005979745481619493, + "loss": 4.4794, + "step": 2620 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005979649937060988, + "loss": 4.4843, + "step": 2625 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005979554168448078, + "loss": 4.4868, + "step": 2630 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005979458175787963, + "loss": 4.52, + "step": 2635 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005979361959087865, + "loss": 4.5309, + "step": 2640 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005979265518355015, + "loss": 4.4523, + "step": 2645 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005979168853596668, + "loss": 4.4153, + "step": 2650 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597907196482009, + "loss": 4.3197, + "step": 2655 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005978974852032568, + "loss": 4.4264, + "step": 2660 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005978877515241404, + "loss": 4.4645, + "step": 2665 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005978779954453917, + "loss": 4.4296, + "step": 2670 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005978682169677444, + "loss": 4.4271, + "step": 2675 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005978584160919339, + "loss": 4.4266, + "step": 2680 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005978485928186968, + "loss": 4.4608, + "step": 2685 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597838747148772, + "loss": 4.455, + "step": 2690 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005978288790828998, + "loss": 4.4432, + "step": 2695 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005978189886218223, + "loss": 4.4275, + "step": 2700 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005978090757662831, + "loss": 4.4242, + "step": 2705 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977991405170277, + "loss": 4.4197, + "step": 2710 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977891828748031, + "loss": 4.3557, + "step": 2715 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977792028403581, + "loss": 4.4138, + "step": 2720 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977692004144431, + "loss": 4.4321, + "step": 2725 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977591755978104, + "loss": 4.3348, + "step": 2730 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977491283912135, + "loss": 4.3667, + "step": 2735 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977390587954082, + "loss": 4.5263, + "step": 2740 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977289668111515, + "loss": 4.4123, + "step": 2745 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977188524392025, + "loss": 4.4297, + "step": 2750 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005977087156803214, + "loss": 4.4391, + "step": 2755 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005976985565352707, + "loss": 4.4199, + "step": 2760 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005976883750048142, + "loss": 4.4723, + "step": 2765 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005976781710897175, + "loss": 4.3106, + "step": 2770 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597667944790748, + "loss": 4.4756, + "step": 2775 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005976576961086745, + "loss": 4.4701, + "step": 2780 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005976474250442678, + "loss": 4.316, + "step": 2785 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005976371315983, + "loss": 4.3714, + "step": 2790 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005976268157715453, + "loss": 4.2413, + "step": 2795 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005976164775647795, + "loss": 4.4715, + "step": 2800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005976061169787798, + "loss": 4.4679, + "step": 2805 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005975957340143254, + "loss": 4.3745, + "step": 2810 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005975853286721968, + "loss": 4.348, + "step": 2815 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005975749009531767, + "loss": 4.4823, + "step": 2820 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005975644508580489, + "loss": 4.3, + "step": 2825 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005975539783875995, + "loss": 4.3079, + "step": 2830 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597543483542616, + "loss": 4.4583, + "step": 2835 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005975329663238872, + "loss": 4.3141, + "step": 2840 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005975224267322043, + "loss": 4.4546, + "step": 2845 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005975118647683597, + "loss": 4.4115, + "step": 2850 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005975012804331476, + "loss": 4.3319, + "step": 2855 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005974906737273638, + "loss": 4.3583, + "step": 2860 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597480044651806, + "loss": 4.3746, + "step": 2865 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005974693932072734, + "loss": 4.3578, + "step": 2870 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597458719394567, + "loss": 4.4066, + "step": 2875 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005974480232144891, + "loss": 4.3589, + "step": 2880 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005974373046678445, + "loss": 4.4172, + "step": 2885 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005974265637554389, + "loss": 4.4344, + "step": 2890 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005974158004780799, + "loss": 4.3983, + "step": 2895 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597405014836577, + "loss": 4.3429, + "step": 2900 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005973942068317411, + "loss": 4.3561, + "step": 2905 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005973833764643851, + "loss": 4.3462, + "step": 2910 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005973725237353231, + "loss": 4.3672, + "step": 2915 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005973616486453715, + "loss": 4.4243, + "step": 2920 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005973507511953478, + "loss": 4.4428, + "step": 2925 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005973398313860714, + "loss": 4.3471, + "step": 2930 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005973288892183636, + "loss": 4.2282, + "step": 2935 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005973179246930472, + "loss": 4.3884, + "step": 2940 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005973069378109466, + "loss": 4.3978, + "step": 2945 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005972959285728877, + "loss": 4.4103, + "step": 2950 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005972848969796988, + "loss": 4.3988, + "step": 2955 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005972738430322092, + "loss": 4.4573, + "step": 2960 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005972627667312501, + "loss": 4.3979, + "step": 2965 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005972516680776543, + "loss": 4.3337, + "step": 2970 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005972405470722565, + "loss": 4.4286, + "step": 2975 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597229403715893, + "loss": 4.3805, + "step": 2980 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005972182380094015, + "loss": 4.3886, + "step": 2985 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005972070499536217, + "loss": 4.3983, + "step": 2990 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597195839549395, + "loss": 4.4782, + "step": 2995 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005971846067975642, + "loss": 4.3503, + "step": 3000 + }, + { + "epoch": 0.05, + "eval_loss": 4.413268089294434, + "eval_runtime": 150.2737, + "eval_samples_per_second": 12.251, + "eval_steps_per_second": 0.772, + "step": 3000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597173351698974, + "loss": 4.4155, + "step": 3005 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005971620742544709, + "loss": 4.4261, + "step": 3010 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005971507744649025, + "loss": 4.348, + "step": 3015 + }, + { + "epoch": 0.05, + "learning_rate": 0.000597139452331119, + "loss": 4.4353, + "step": 3020 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005971281078539712, + "loss": 4.3666, + "step": 3025 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005971167410343127, + "loss": 4.4247, + "step": 3030 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005971053518729979, + "loss": 4.4093, + "step": 3035 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005970939403708832, + "loss": 4.4617, + "step": 3040 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005970825065288269, + "loss": 4.3769, + "step": 3045 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005970710503476885, + "loss": 4.3765, + "step": 3050 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005970595718283296, + "loss": 4.4443, + "step": 3055 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005970480709716134, + "loss": 4.4562, + "step": 3060 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005970365477784046, + "loss": 4.4314, + "step": 3065 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005970250022495696, + "loss": 4.3781, + "step": 3070 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005970134343859767, + "loss": 4.2969, + "step": 3075 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005970018441884957, + "loss": 4.3114, + "step": 3080 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005969902316579982, + "loss": 4.4437, + "step": 3085 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005969785967953572, + "loss": 4.4663, + "step": 3090 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005969669396014479, + "loss": 4.397, + "step": 3095 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005969552600771465, + "loss": 4.3915, + "step": 3100 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005969435582233314, + "loss": 4.3023, + "step": 3105 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005969318340408827, + "loss": 4.2951, + "step": 3110 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005969200875306817, + "loss": 4.4636, + "step": 3115 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005969083186936118, + "loss": 4.3918, + "step": 3120 + }, + { + "epoch": 0.05, + "learning_rate": 0.000596896527530558, + "loss": 4.3059, + "step": 3125 + }, + { + "epoch": 0.05, + "learning_rate": 0.000596884714042407, + "loss": 4.2898, + "step": 3130 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005968728782300469, + "loss": 4.3249, + "step": 3135 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005968610200943678, + "loss": 4.3538, + "step": 3140 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005968491396362614, + "loss": 4.2482, + "step": 3145 + }, + { + "epoch": 0.05, + "learning_rate": 0.000596837236856621, + "loss": 4.3515, + "step": 3150 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005968253117563417, + "loss": 4.4214, + "step": 3155 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005968133643363202, + "loss": 4.3378, + "step": 3160 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005968013945974549, + "loss": 4.3318, + "step": 3165 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005967894025406456, + "loss": 4.2822, + "step": 3170 + }, + { + "epoch": 0.05, + "learning_rate": 0.0005967773881667944, + "loss": 4.2495, + "step": 3175 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005967653514768046, + "loss": 4.268, + "step": 3180 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005967532924715811, + "loss": 4.3754, + "step": 3185 + }, + { + "epoch": 0.06, + "learning_rate": 0.000596741211152031, + "loss": 4.4025, + "step": 3190 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005967291075190625, + "loss": 4.4083, + "step": 3195 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005967169815735858, + "loss": 4.3213, + "step": 3200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005967048333165128, + "loss": 4.3461, + "step": 3205 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005966926627487569, + "loss": 4.3768, + "step": 3210 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005966804698712333, + "loss": 4.4196, + "step": 3215 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005966682546848589, + "loss": 4.5157, + "step": 3220 + }, + { + "epoch": 0.06, + "learning_rate": 0.000596656017190552, + "loss": 4.5326, + "step": 3225 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005966437573892331, + "loss": 4.4102, + "step": 3230 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005966314752818239, + "loss": 4.405, + "step": 3235 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005966191708692478, + "loss": 4.4479, + "step": 3240 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005966068441524304, + "loss": 4.2742, + "step": 3245 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005965944951322984, + "loss": 4.4105, + "step": 3250 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005965821238097803, + "loss": 4.3222, + "step": 3255 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005965697301858064, + "loss": 4.3689, + "step": 3260 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005965573142613088, + "loss": 4.3729, + "step": 3265 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005965448760372209, + "loss": 4.3759, + "step": 3270 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005965324155144782, + "loss": 4.3199, + "step": 3275 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005965199326940174, + "loss": 4.352, + "step": 3280 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005965074275767774, + "loss": 4.3888, + "step": 3285 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005964949001636985, + "loss": 4.2492, + "step": 3290 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005964823504557226, + "loss": 4.2244, + "step": 3295 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005964697784537933, + "loss": 4.2883, + "step": 3300 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005964571841588561, + "loss": 4.371, + "step": 3305 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005964445675718579, + "loss": 4.334, + "step": 3310 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005964319286937475, + "loss": 4.3165, + "step": 3315 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005964192675254753, + "loss": 4.2529, + "step": 3320 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005964065840679933, + "loss": 4.3381, + "step": 3325 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005963938783222553, + "loss": 4.283, + "step": 3330 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005963811502892165, + "loss": 4.3161, + "step": 3335 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005963683999698342, + "loss": 4.3748, + "step": 3340 + }, + { + "epoch": 0.06, + "learning_rate": 0.000596355627365067, + "loss": 4.3108, + "step": 3345 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005963428324758755, + "loss": 4.3598, + "step": 3350 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005963300153032217, + "loss": 4.3048, + "step": 3355 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005963171758480695, + "loss": 4.3148, + "step": 3360 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005963043141113842, + "loss": 4.1902, + "step": 3365 + }, + { + "epoch": 0.06, + "learning_rate": 0.000596291430094133, + "loss": 4.2621, + "step": 3370 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005962785237972848, + "loss": 4.3392, + "step": 3375 + }, + { + "epoch": 0.06, + "learning_rate": 0.00059626559522181, + "loss": 4.4537, + "step": 3380 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005962526443686807, + "loss": 4.3562, + "step": 3385 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005962396712388709, + "loss": 4.3759, + "step": 3390 + }, + { + "epoch": 0.06, + "learning_rate": 0.000596226675833356, + "loss": 4.3024, + "step": 3395 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005962136581531132, + "loss": 4.3345, + "step": 3400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005962006181991214, + "loss": 4.3228, + "step": 3405 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005961875559723611, + "loss": 4.235, + "step": 3410 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005961744714738146, + "loss": 4.2656, + "step": 3415 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005961613647044656, + "loss": 4.404, + "step": 3420 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005961482356652998, + "loss": 4.2853, + "step": 3425 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005961350843573044, + "loss": 4.294, + "step": 3430 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005961219107814684, + "loss": 4.2044, + "step": 3435 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005961087149387824, + "loss": 4.2688, + "step": 3440 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005960954968302383, + "loss": 4.2032, + "step": 3445 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005960822564568305, + "loss": 4.3575, + "step": 3450 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005960689938195544, + "loss": 4.368, + "step": 3455 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005960557089194072, + "loss": 4.2639, + "step": 3460 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005960424017573881, + "loss": 4.3362, + "step": 3465 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005960290723344974, + "loss": 4.2979, + "step": 3470 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005960157206517376, + "loss": 4.3052, + "step": 3475 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005960023467101128, + "loss": 4.3042, + "step": 3480 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005959889505106284, + "loss": 4.2859, + "step": 3485 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005959755320542919, + "loss": 4.3849, + "step": 3490 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005959620913421123, + "loss": 4.3201, + "step": 3495 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005959486283751001, + "loss": 4.41, + "step": 3500 + }, + { + "epoch": 0.06, + "eval_loss": 4.344966411590576, + "eval_runtime": 150.7763, + "eval_samples_per_second": 12.21, + "eval_steps_per_second": 0.769, + "step": 3500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005959351431542678, + "loss": 4.3984, + "step": 3505 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005959216356806294, + "loss": 4.3778, + "step": 3510 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005959081059552005, + "loss": 4.3246, + "step": 3515 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005958945539789987, + "loss": 4.2603, + "step": 3520 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005958809797530427, + "loss": 4.2545, + "step": 3525 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005958673832783534, + "loss": 4.268, + "step": 3530 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005958537645559533, + "loss": 4.2577, + "step": 3535 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005958401235868663, + "loss": 4.3378, + "step": 3540 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005958264603721181, + "loss": 4.3697, + "step": 3545 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005958127749127361, + "loss": 4.4162, + "step": 3550 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005957990672097495, + "loss": 4.351, + "step": 3555 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005957853372641891, + "loss": 4.3266, + "step": 3560 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005957715850770871, + "loss": 4.2015, + "step": 3565 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005957578106494778, + "loss": 4.1827, + "step": 3570 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005957440139823968, + "loss": 4.4158, + "step": 3575 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005957301950768816, + "loss": 4.2008, + "step": 3580 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005957163539339714, + "loss": 4.1551, + "step": 3585 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005957024905547068, + "loss": 4.2472, + "step": 3590 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005956886049401305, + "loss": 4.2492, + "step": 3595 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005956746970912863, + "loss": 4.2584, + "step": 3600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005956607670092204, + "loss": 4.3039, + "step": 3605 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005956468146949799, + "loss": 4.2622, + "step": 3610 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005956328401496141, + "loss": 4.2605, + "step": 3615 + }, + { + "epoch": 0.06, + "learning_rate": 0.000595618843374174, + "loss": 4.2973, + "step": 3620 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005956048243697117, + "loss": 4.218, + "step": 3625 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005955907831372817, + "loss": 4.3062, + "step": 3630 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005955767196779395, + "loss": 4.3318, + "step": 3635 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005955626339927431, + "loss": 4.4257, + "step": 3640 + }, + { + "epoch": 0.06, + "learning_rate": 0.000595548526082751, + "loss": 4.1505, + "step": 3645 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005955343959490246, + "loss": 4.2879, + "step": 3650 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005955202435926262, + "loss": 4.2549, + "step": 3655 + }, + { + "epoch": 0.06, + "learning_rate": 0.00059550606901462, + "loss": 4.2972, + "step": 3660 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005954918722160718, + "loss": 4.253, + "step": 3665 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005954776531980492, + "loss": 4.3401, + "step": 3670 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005954634119616215, + "loss": 4.2288, + "step": 3675 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005954491485078592, + "loss": 4.2791, + "step": 3680 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005954348628378353, + "loss": 4.2321, + "step": 3685 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005954205549526236, + "loss": 4.2922, + "step": 3690 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005954062248533003, + "loss": 4.2489, + "step": 3695 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005953918725409429, + "loss": 4.3511, + "step": 3700 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005953774980166304, + "loss": 4.3664, + "step": 3705 + }, + { + "epoch": 0.06, + "learning_rate": 0.000595363101281444, + "loss": 4.2423, + "step": 3710 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005953486823364659, + "loss": 4.2646, + "step": 3715 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005953342411827807, + "loss": 4.2715, + "step": 3720 + }, + { + "epoch": 0.06, + "learning_rate": 0.000595319777821474, + "loss": 4.3321, + "step": 3725 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005953052922536335, + "loss": 4.2929, + "step": 3730 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005952907844803486, + "loss": 4.2246, + "step": 3735 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005952762545027099, + "loss": 4.2096, + "step": 3740 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005952617023218101, + "loss": 4.312, + "step": 3745 + }, + { + "epoch": 0.06, + "learning_rate": 0.0005952471279387435, + "loss": 4.2608, + "step": 3750 + }, + { + "epoch": 0.06, + "learning_rate": 0.000595232531354606, + "loss": 4.3276, + "step": 3755 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005952179125704952, + "loss": 4.335, + "step": 3760 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005952032715875105, + "loss": 4.2907, + "step": 3765 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005951886084067525, + "loss": 4.3741, + "step": 3770 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005951739230293239, + "loss": 4.3517, + "step": 3775 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005951592154563292, + "loss": 4.277, + "step": 3780 + }, + { + "epoch": 0.07, + "learning_rate": 0.000595144485688874, + "loss": 4.3368, + "step": 3785 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005951297337280662, + "loss": 4.2605, + "step": 3790 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005951149595750149, + "loss": 4.3184, + "step": 3795 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005951001632308311, + "loss": 4.3056, + "step": 3800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005950853446966273, + "loss": 4.3516, + "step": 3805 + }, + { + "epoch": 0.07, + "learning_rate": 0.000595070503973518, + "loss": 4.3533, + "step": 3810 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005950556410626189, + "loss": 4.2064, + "step": 3815 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005950407559650477, + "loss": 4.2559, + "step": 3820 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005950258486819239, + "loss": 4.347, + "step": 3825 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005950109192143681, + "loss": 4.2478, + "step": 3830 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005949959675635031, + "loss": 4.3066, + "step": 3835 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005949809937304532, + "loss": 4.1078, + "step": 3840 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005949659977163444, + "loss": 4.3285, + "step": 3845 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005949509795223042, + "loss": 4.211, + "step": 3850 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005949359391494619, + "loss": 4.2925, + "step": 3855 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005949208765989485, + "loss": 4.2087, + "step": 3860 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005949057918718966, + "loss": 4.3738, + "step": 3865 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005948906849694406, + "loss": 4.234, + "step": 3870 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005948755558927163, + "loss": 4.2358, + "step": 3875 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005948604046428614, + "loss": 4.2694, + "step": 3880 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005948452312210153, + "loss": 4.2782, + "step": 3885 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005948300356283188, + "loss": 4.3455, + "step": 3890 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005948148178659147, + "loss": 4.2975, + "step": 3895 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005947995779349471, + "loss": 4.3128, + "step": 3900 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005947843158365621, + "loss": 4.2444, + "step": 3905 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005947690315719073, + "loss": 4.2566, + "step": 3910 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005947537251421318, + "loss": 4.2086, + "step": 3915 + }, + { + "epoch": 0.07, + "learning_rate": 0.000594738396548387, + "loss": 4.2687, + "step": 3920 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005947230457918253, + "loss": 4.2127, + "step": 3925 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005947076728736009, + "loss": 4.3346, + "step": 3930 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005946922777948699, + "loss": 4.2569, + "step": 3935 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005946768605567898, + "loss": 4.327, + "step": 3940 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005946614211605201, + "loss": 4.3121, + "step": 3945 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005946459596072216, + "loss": 4.1934, + "step": 3950 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005946304758980569, + "loss": 4.0914, + "step": 3955 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005946149700341904, + "loss": 4.284, + "step": 3960 + }, + { + "epoch": 0.07, + "learning_rate": 0.000594599442016788, + "loss": 4.2219, + "step": 3965 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005945838918470174, + "loss": 4.2636, + "step": 3970 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005945683195260478, + "loss": 4.2416, + "step": 3975 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005945527250550503, + "loss": 4.1594, + "step": 3980 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005945371084351975, + "loss": 4.2529, + "step": 3985 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005945214696676635, + "loss": 4.2743, + "step": 3990 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005945058087536243, + "loss": 4.1218, + "step": 3995 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005944901256942578, + "loss": 4.2187, + "step": 4000 + }, + { + "epoch": 0.07, + "eval_loss": 4.265633583068848, + "eval_runtime": 150.1723, + "eval_samples_per_second": 12.259, + "eval_steps_per_second": 0.772, + "step": 4000 + }, + { + "epoch": 0.07, + "learning_rate": 0.000594474420490743, + "loss": 4.2666, + "step": 4005 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005944586931442609, + "loss": 4.12, + "step": 4010 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005944429436559942, + "loss": 4.2916, + "step": 4015 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005944271720271272, + "loss": 4.1823, + "step": 4020 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005944113782588456, + "loss": 4.2687, + "step": 4025 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005943955623523373, + "loss": 4.2593, + "step": 4030 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005943797243087915, + "loss": 4.2273, + "step": 4035 + }, + { + "epoch": 0.07, + "learning_rate": 0.000594363864129399, + "loss": 4.2475, + "step": 4040 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005943479818153526, + "loss": 4.2453, + "step": 4045 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005943320773678463, + "loss": 4.2077, + "step": 4050 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005943161507880765, + "loss": 4.211, + "step": 4055 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005943002020772403, + "loss": 4.3064, + "step": 4060 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005942842312365372, + "loss": 4.2258, + "step": 4065 + }, + { + "epoch": 0.07, + "learning_rate": 0.000594268238267168, + "loss": 4.1974, + "step": 4070 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005942522231703357, + "loss": 4.3158, + "step": 4075 + }, + { + "epoch": 0.07, + "learning_rate": 0.000594236185947244, + "loss": 4.2207, + "step": 4080 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005942201265990991, + "loss": 4.2394, + "step": 4085 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005942040451271085, + "loss": 4.2613, + "step": 4090 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005941879415324815, + "loss": 4.2127, + "step": 4095 + }, + { + "epoch": 0.07, + "learning_rate": 0.000594171815816429, + "loss": 4.2353, + "step": 4100 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005941556679801636, + "loss": 4.2169, + "step": 4105 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005941394980248994, + "loss": 4.1401, + "step": 4110 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005941233059518524, + "loss": 4.2061, + "step": 4115 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005941070917622401, + "loss": 4.1151, + "step": 4120 + }, + { + "epoch": 0.07, + "learning_rate": 0.000594090855457282, + "loss": 4.1882, + "step": 4125 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005940745970381985, + "loss": 4.1692, + "step": 4130 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005940583165062126, + "loss": 4.2102, + "step": 4135 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005940420138625482, + "loss": 4.1483, + "step": 4140 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005940256891084313, + "loss": 4.2406, + "step": 4145 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005940093422450895, + "loss": 4.2565, + "step": 4150 + }, + { + "epoch": 0.07, + "learning_rate": 0.000593992973273752, + "loss": 4.316, + "step": 4155 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005939765821956495, + "loss": 4.27, + "step": 4160 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005939601690120146, + "loss": 4.2913, + "step": 4165 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005939437337240816, + "loss": 4.1995, + "step": 4170 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005939272763330862, + "loss": 4.1578, + "step": 4175 + }, + { + "epoch": 0.07, + "learning_rate": 0.000593910796840266, + "loss": 4.2062, + "step": 4180 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005938942952468601, + "loss": 4.2138, + "step": 4185 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005938777715541095, + "loss": 4.1583, + "step": 4190 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005938612257632565, + "loss": 4.1554, + "step": 4195 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005938446578755453, + "loss": 4.1384, + "step": 4200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005938280678922218, + "loss": 4.1932, + "step": 4205 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005938114558145334, + "loss": 4.262, + "step": 4210 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005937948216437294, + "loss": 4.3102, + "step": 4215 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005937781653810603, + "loss": 4.1807, + "step": 4220 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005937614870277788, + "loss": 4.1832, + "step": 4225 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005937447865851391, + "loss": 4.2501, + "step": 4230 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005937280640543968, + "loss": 4.2523, + "step": 4235 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005937113194368095, + "loss": 4.1573, + "step": 4240 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005936945527336362, + "loss": 4.1715, + "step": 4245 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005936777639461375, + "loss": 4.2046, + "step": 4250 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005936609530755761, + "loss": 4.2866, + "step": 4255 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005936441201232162, + "loss": 4.1373, + "step": 4260 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005936272650903232, + "loss": 4.2981, + "step": 4265 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005936103879781647, + "loss": 4.1283, + "step": 4270 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005935934887880099, + "loss": 4.2305, + "step": 4275 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005935765675211292, + "loss": 4.1224, + "step": 4280 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005935596241787953, + "loss": 4.2186, + "step": 4285 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005935426587622821, + "loss": 4.2636, + "step": 4290 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005935256712728653, + "loss": 4.1941, + "step": 4295 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005935086617118224, + "loss": 4.1388, + "step": 4300 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005934916300804322, + "loss": 4.1762, + "step": 4305 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005934745763799756, + "loss": 4.2763, + "step": 4310 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005934575006117349, + "loss": 4.2378, + "step": 4315 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005934404027769941, + "loss": 4.2348, + "step": 4320 + }, + { + "epoch": 0.07, + "learning_rate": 0.000593423282877039, + "loss": 4.203, + "step": 4325 + }, + { + "epoch": 0.07, + "learning_rate": 0.0005934061409131567, + "loss": 4.2091, + "step": 4330 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005933889768866362, + "loss": 4.1984, + "step": 4335 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005933717907987682, + "loss": 4.2025, + "step": 4340 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005933545826508453, + "loss": 4.2632, + "step": 4345 + }, + { + "epoch": 0.08, + "learning_rate": 0.000593337352444161, + "loss": 4.2896, + "step": 4350 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005933201001800111, + "loss": 4.2299, + "step": 4355 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005933028258596931, + "loss": 4.1947, + "step": 4360 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005932855294845055, + "loss": 4.176, + "step": 4365 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005932682110557494, + "loss": 4.087, + "step": 4370 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005932508705747266, + "loss": 4.1089, + "step": 4375 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005932335080427413, + "loss": 4.1443, + "step": 4380 + }, + { + "epoch": 0.08, + "learning_rate": 0.000593216123461099, + "loss": 4.2938, + "step": 4385 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005931987168311069, + "loss": 4.2061, + "step": 4390 + }, + { + "epoch": 0.08, + "learning_rate": 0.000593181288154074, + "loss": 4.1735, + "step": 4395 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005931638374313107, + "loss": 4.248, + "step": 4400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005931463646641292, + "loss": 4.1558, + "step": 4405 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005931288698538434, + "loss": 4.2654, + "step": 4410 + }, + { + "epoch": 0.08, + "learning_rate": 0.000593111353001769, + "loss": 4.0433, + "step": 4415 + }, + { + "epoch": 0.08, + "learning_rate": 0.000593093814109223, + "loss": 4.217, + "step": 4420 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005930762531775242, + "loss": 4.1692, + "step": 4425 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005930586702079931, + "loss": 4.0659, + "step": 4430 + }, + { + "epoch": 0.08, + "learning_rate": 0.000593041065201952, + "loss": 4.219, + "step": 4435 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005930234381607246, + "loss": 4.235, + "step": 4440 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005930057890856364, + "loss": 4.1474, + "step": 4445 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005929881179780144, + "loss": 4.2813, + "step": 4450 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005929704248391875, + "loss": 4.1081, + "step": 4455 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005929527096704862, + "loss": 4.1982, + "step": 4460 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005929349724732424, + "loss": 4.2255, + "step": 4465 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005929172132487899, + "loss": 4.1753, + "step": 4470 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005928994319984643, + "loss": 4.2111, + "step": 4475 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005928816287236023, + "loss": 4.3088, + "step": 4480 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005928638034255429, + "loss": 4.3208, + "step": 4485 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005928459561056264, + "loss": 4.2369, + "step": 4490 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005928280867651947, + "loss": 4.252, + "step": 4495 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005928101954055918, + "loss": 4.1711, + "step": 4500 + }, + { + "epoch": 0.08, + "eval_loss": 4.222765922546387, + "eval_runtime": 150.6785, + "eval_samples_per_second": 12.218, + "eval_steps_per_second": 0.77, + "step": 4500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005927922820281627, + "loss": 4.2227, + "step": 4505 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005927743466342545, + "loss": 4.1118, + "step": 4510 + }, + { + "epoch": 0.08, + "learning_rate": 0.000592756389225216, + "loss": 4.1699, + "step": 4515 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005927384098023973, + "loss": 4.1542, + "step": 4520 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005927204083671505, + "loss": 4.1589, + "step": 4525 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005927023849208291, + "loss": 4.1576, + "step": 4530 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005926843394647884, + "loss": 4.1702, + "step": 4535 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005926662720003855, + "loss": 4.1987, + "step": 4540 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005926481825289788, + "loss": 4.1527, + "step": 4545 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005926300710519285, + "loss": 4.147, + "step": 4550 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005926119375705965, + "loss": 4.0946, + "step": 4555 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005925937820863465, + "loss": 4.2065, + "step": 4560 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005925756046005436, + "loss": 4.2049, + "step": 4565 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005925574051145546, + "loss": 4.1496, + "step": 4570 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005925391836297481, + "loss": 4.0682, + "step": 4575 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005925209401474943, + "loss": 4.1447, + "step": 4580 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005925026746691649, + "loss": 4.1023, + "step": 4585 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005924843871961334, + "loss": 4.207, + "step": 4590 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005924660777297749, + "loss": 4.1938, + "step": 4595 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005924477462714664, + "loss": 4.1987, + "step": 4600 + }, + { + "epoch": 0.08, + "learning_rate": 0.000592429392822586, + "loss": 4.2236, + "step": 4605 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005924110173845139, + "loss": 4.2157, + "step": 4610 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005923926199586319, + "loss": 4.0652, + "step": 4615 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005923742005463235, + "loss": 4.1559, + "step": 4620 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005923557591489735, + "loss": 4.1209, + "step": 4625 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005923372957679688, + "loss": 4.0978, + "step": 4630 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005923188104046975, + "loss": 4.1673, + "step": 4635 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005923003030605499, + "loss": 4.1935, + "step": 4640 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005922817737369175, + "loss": 4.0448, + "step": 4645 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005922632224351936, + "loss": 3.9366, + "step": 4650 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005922446491567733, + "loss": 4.1289, + "step": 4655 + }, + { + "epoch": 0.08, + "learning_rate": 0.000592226053903053, + "loss": 4.1816, + "step": 4660 + }, + { + "epoch": 0.08, + "learning_rate": 0.000592207436675431, + "loss": 4.1968, + "step": 4665 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005921887974753075, + "loss": 4.2072, + "step": 4670 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005921701363040837, + "loss": 4.3708, + "step": 4675 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005921514531631631, + "loss": 4.1242, + "step": 4680 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005921327480539505, + "loss": 4.2098, + "step": 4685 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005921140209778524, + "loss": 4.0701, + "step": 4690 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005920952719362769, + "loss": 4.2459, + "step": 4695 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005920765009306341, + "loss": 4.1828, + "step": 4700 + }, + { + "epoch": 0.08, + "learning_rate": 0.000592057707962335, + "loss": 4.0961, + "step": 4705 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005920388930327932, + "loss": 4.1396, + "step": 4710 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005920200561434234, + "loss": 4.191, + "step": 4715 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005920011972956418, + "loss": 4.1557, + "step": 4720 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005919823164908667, + "loss": 4.1885, + "step": 4725 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005919634137305179, + "loss": 4.0797, + "step": 4730 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005919444890160165, + "loss": 4.1649, + "step": 4735 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005919255423487859, + "loss": 4.0826, + "step": 4740 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005919065737302505, + "loss": 4.1142, + "step": 4745 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005918875831618367, + "loss": 4.1676, + "step": 4750 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005918685706449726, + "loss": 4.0972, + "step": 4755 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005918495361810878, + "loss": 4.1348, + "step": 4760 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005918304797716136, + "loss": 4.1432, + "step": 4765 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005918114014179829, + "loss": 4.2454, + "step": 4770 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005917923011216304, + "loss": 4.1003, + "step": 4775 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005917731788839921, + "loss": 4.1819, + "step": 4780 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005917540347065062, + "loss": 4.188, + "step": 4785 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005917348685906121, + "loss": 4.1215, + "step": 4790 + }, + { + "epoch": 0.08, + "learning_rate": 0.000591715680537751, + "loss": 4.1963, + "step": 4795 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005916964705493656, + "loss": 4.1186, + "step": 4800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005916772386269007, + "loss": 4.1362, + "step": 4805 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005916579847718022, + "loss": 4.2354, + "step": 4810 + }, + { + "epoch": 0.08, + "learning_rate": 0.000591638708985518, + "loss": 4.1049, + "step": 4815 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005916194112694976, + "loss": 4.1688, + "step": 4820 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005916000916251918, + "loss": 4.0749, + "step": 4825 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005915807500540537, + "loss": 4.1612, + "step": 4830 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005915613865575374, + "loss": 4.1247, + "step": 4835 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005915420011370992, + "loss": 4.2301, + "step": 4840 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005915225937941965, + "loss": 4.1453, + "step": 4845 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005915031645302889, + "loss": 4.2323, + "step": 4850 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005914837133468372, + "loss": 4.2572, + "step": 4855 + }, + { + "epoch": 0.08, + "learning_rate": 0.000591464240245304, + "loss": 4.1619, + "step": 4860 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005914447452271537, + "loss": 4.1835, + "step": 4865 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005914252282938524, + "loss": 4.1756, + "step": 4870 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005914056894468671, + "loss": 4.2158, + "step": 4875 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005913861286876677, + "loss": 4.1847, + "step": 4880 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005913665460177246, + "loss": 4.1548, + "step": 4885 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005913469414385105, + "loss": 4.0973, + "step": 4890 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005913273149514996, + "loss": 4.1362, + "step": 4895 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005913076665581675, + "loss": 4.1658, + "step": 4900 + }, + { + "epoch": 0.08, + "learning_rate": 0.000591287996259992, + "loss": 4.1478, + "step": 4905 + }, + { + "epoch": 0.08, + "learning_rate": 0.0005912683040584519, + "loss": 4.1785, + "step": 4910 + }, + { + "epoch": 0.09, + "learning_rate": 0.000591248589955028, + "loss": 4.2769, + "step": 4915 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005912288539512029, + "loss": 4.1097, + "step": 4920 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005912090960484603, + "loss": 4.1856, + "step": 4925 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005911893162482862, + "loss": 4.2468, + "step": 4930 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005911695145521679, + "loss": 4.2657, + "step": 4935 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005911496909615943, + "loss": 4.0184, + "step": 4940 + }, + { + "epoch": 0.09, + "learning_rate": 0.000591129845478056, + "loss": 4.0345, + "step": 4945 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005911099781030454, + "loss": 4.0501, + "step": 4950 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005910900888380564, + "loss": 4.1289, + "step": 4955 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005910701776845845, + "loss": 4.1621, + "step": 4960 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005910502446441269, + "loss": 4.0817, + "step": 4965 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005910302897181826, + "loss": 4.1124, + "step": 4970 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005910103129082519, + "loss": 4.1758, + "step": 4975 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005909903142158372, + "loss": 4.0607, + "step": 4980 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005909702936424421, + "loss": 4.1467, + "step": 4985 + }, + { + "epoch": 0.09, + "learning_rate": 0.000590950251189572, + "loss": 4.2868, + "step": 4990 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005909301868587343, + "loss": 4.1972, + "step": 4995 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005909101006514374, + "loss": 4.1593, + "step": 5000 + }, + { + "epoch": 0.09, + "eval_loss": 4.196778774261475, + "eval_runtime": 150.8836, + "eval_samples_per_second": 12.201, + "eval_steps_per_second": 0.769, + "step": 5000 + }, + { + "epoch": 0.09, + "learning_rate": 0.000590889992569192, + "loss": 4.1782, + "step": 5005 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005908698626135098, + "loss": 4.1774, + "step": 5010 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005908497107859046, + "loss": 4.1228, + "step": 5015 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005908295370878918, + "loss": 4.1645, + "step": 5020 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005908093415209883, + "loss": 4.1598, + "step": 5025 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005907891240867127, + "loss": 4.033, + "step": 5030 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005907688847865851, + "loss": 4.1476, + "step": 5035 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005907486236221278, + "loss": 4.0968, + "step": 5040 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005907283405948638, + "loss": 4.273, + "step": 5045 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005907080357063187, + "loss": 4.1311, + "step": 5050 + }, + { + "epoch": 0.09, + "learning_rate": 0.000590687708958019, + "loss": 3.9782, + "step": 5055 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005906673603514936, + "loss": 4.1972, + "step": 5060 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005906469898882721, + "loss": 4.0807, + "step": 5065 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005906265975698867, + "loss": 4.0946, + "step": 5070 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005906061833978705, + "loss": 4.1808, + "step": 5075 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005905857473737586, + "loss": 4.0355, + "step": 5080 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005905652894990878, + "loss": 4.2127, + "step": 5085 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005905448097753963, + "loss": 4.0982, + "step": 5090 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005905243082042242, + "loss": 4.1078, + "step": 5095 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005905037847871129, + "loss": 4.1049, + "step": 5100 + }, + { + "epoch": 0.09, + "learning_rate": 0.000590483239525606, + "loss": 4.1008, + "step": 5105 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005904626724212481, + "loss": 4.0823, + "step": 5110 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005904420834755858, + "loss": 4.0588, + "step": 5115 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005904214726901672, + "loss": 4.1389, + "step": 5120 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005904008400665424, + "loss": 4.1278, + "step": 5125 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005903801856062627, + "loss": 4.1523, + "step": 5130 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005903595093108812, + "loss": 4.23, + "step": 5135 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005903388111819526, + "loss": 4.046, + "step": 5140 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005903180912210335, + "loss": 4.0819, + "step": 5145 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005902973494296816, + "loss": 4.1195, + "step": 5150 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005902765858094569, + "loss": 4.0833, + "step": 5155 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005902558003619205, + "loss": 4.2782, + "step": 5160 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005902349930886357, + "loss": 4.1168, + "step": 5165 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005902141639911666, + "loss": 4.1654, + "step": 5170 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005901933130710797, + "loss": 4.0838, + "step": 5175 + }, + { + "epoch": 0.09, + "learning_rate": 0.000590172440329943, + "loss": 4.0421, + "step": 5180 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005901515457693258, + "loss": 4.1311, + "step": 5185 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005901306293907995, + "loss": 4.0227, + "step": 5190 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005901096911959365, + "loss": 4.2226, + "step": 5195 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005900887311863118, + "loss": 4.1306, + "step": 5200 + }, + { + "epoch": 0.09, + "learning_rate": 0.000590067749363501, + "loss": 4.0604, + "step": 5205 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005900467457290821, + "loss": 4.0125, + "step": 5210 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005900257202846344, + "loss": 4.1222, + "step": 5215 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005900046730317388, + "loss": 4.1159, + "step": 5220 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005899836039719782, + "loss": 4.1893, + "step": 5225 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005899625131069367, + "loss": 4.1073, + "step": 5230 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005899414004382001, + "loss": 4.0826, + "step": 5235 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005899202659673563, + "loss": 4.0734, + "step": 5240 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005898991096959941, + "loss": 4.1146, + "step": 5245 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005898779316257047, + "loss": 4.0675, + "step": 5250 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005898567317580804, + "loss": 4.0328, + "step": 5255 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005898355100947154, + "loss": 4.0624, + "step": 5260 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005898142666372055, + "loss": 4.0561, + "step": 5265 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005897930013871478, + "loss": 4.0266, + "step": 5270 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005897717143461418, + "loss": 3.9118, + "step": 5275 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005897504055157878, + "loss": 4.1576, + "step": 5280 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005897290748976883, + "loss": 4.0451, + "step": 5285 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005897077224934471, + "loss": 4.2013, + "step": 5290 + }, + { + "epoch": 0.09, + "learning_rate": 0.00058968634830467, + "loss": 4.1097, + "step": 5295 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005896649523329642, + "loss": 4.1492, + "step": 5300 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005896435345799383, + "loss": 4.0192, + "step": 5305 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005896220950472031, + "loss": 4.1101, + "step": 5310 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005896006337363707, + "loss": 4.1892, + "step": 5315 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005895791506490548, + "loss": 4.1187, + "step": 5320 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005895576457868708, + "loss": 4.0377, + "step": 5325 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005895361191514358, + "loss": 4.0711, + "step": 5330 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005895145707443684, + "loss": 4.0841, + "step": 5335 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005894930005672892, + "loss": 4.0504, + "step": 5340 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005894714086218199, + "loss": 4.1208, + "step": 5345 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005894497949095841, + "loss": 4.1813, + "step": 5350 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005894281594322073, + "loss": 4.1122, + "step": 5355 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005894065021913161, + "loss": 4.0417, + "step": 5360 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005893848231885391, + "loss": 4.0673, + "step": 5365 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005893631224255066, + "loss": 3.9842, + "step": 5370 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005893413999038501, + "loss": 4.0419, + "step": 5375 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005893196556252034, + "loss": 4.0908, + "step": 5380 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005892978895912011, + "loss": 4.1134, + "step": 5385 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005892761018034803, + "loss": 4.0354, + "step": 5390 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005892542922636791, + "loss": 4.1377, + "step": 5395 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005892324609734373, + "loss": 4.1254, + "step": 5400 + }, + { + "epoch": 0.09, + "learning_rate": 0.000589210607934397, + "loss": 4.0462, + "step": 5405 + }, + { + "epoch": 0.09, + "learning_rate": 0.000589188733148201, + "loss": 4.1928, + "step": 5410 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005891668366164945, + "loss": 4.109, + "step": 5415 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005891449183409238, + "loss": 3.9687, + "step": 5420 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005891229783231371, + "loss": 4.083, + "step": 5425 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005891010165647841, + "loss": 4.0274, + "step": 5430 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005890790330675162, + "loss": 4.0976, + "step": 5435 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005890570278329867, + "loss": 4.0589, + "step": 5440 + }, + { + "epoch": 0.09, + "learning_rate": 0.00058903500086285, + "loss": 4.1165, + "step": 5445 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005890129521587625, + "loss": 4.1122, + "step": 5450 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005889908817223822, + "loss": 4.094, + "step": 5455 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005889687895553687, + "loss": 4.0328, + "step": 5460 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005889466756593831, + "loss": 4.0535, + "step": 5465 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005889245400360883, + "loss": 4.1173, + "step": 5470 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005889023826871489, + "loss": 4.1297, + "step": 5475 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005888802036142308, + "loss": 4.0227, + "step": 5480 + }, + { + "epoch": 0.09, + "learning_rate": 0.0005888580028190019, + "loss": 4.1865, + "step": 5485 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005888357803031317, + "loss": 4.0646, + "step": 5490 + }, + { + "epoch": 0.1, + "learning_rate": 0.000588813536068291, + "loss": 4.0189, + "step": 5495 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005887912701161524, + "loss": 4.0756, + "step": 5500 + }, + { + "epoch": 0.1, + "eval_loss": 4.124629974365234, + "eval_runtime": 150.3809, + "eval_samples_per_second": 12.242, + "eval_steps_per_second": 0.771, + "step": 5500 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005887689824483905, + "loss": 4.0788, + "step": 5505 + }, + { + "epoch": 0.1, + "learning_rate": 0.000588746673066681, + "loss": 4.0131, + "step": 5510 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005887243419727013, + "loss": 4.0943, + "step": 5515 + }, + { + "epoch": 0.1, + "learning_rate": 0.000588701989168131, + "loss": 4.1031, + "step": 5520 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005886796146546507, + "loss": 4.0249, + "step": 5525 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005886572184339427, + "loss": 4.1008, + "step": 5530 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005886348005076913, + "loss": 4.0013, + "step": 5535 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005886123608775821, + "loss": 4.0819, + "step": 5540 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005885898995453024, + "loss": 3.9572, + "step": 5545 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005885674165125415, + "loss": 4.0845, + "step": 5550 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005885449117809896, + "loss": 4.0497, + "step": 5555 + }, + { + "epoch": 0.1, + "learning_rate": 0.000588522385352339, + "loss": 4.1316, + "step": 5560 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005884998372282838, + "loss": 4.0639, + "step": 5565 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005884772674105193, + "loss": 4.1424, + "step": 5570 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005884546759007428, + "loss": 4.1344, + "step": 5575 + }, + { + "epoch": 0.1, + "learning_rate": 0.000588432062700653, + "loss": 4.1834, + "step": 5580 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005884094278119502, + "loss": 4.0697, + "step": 5585 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005883867712363365, + "loss": 4.1778, + "step": 5590 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005883640929755156, + "loss": 3.9826, + "step": 5595 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005883413930311928, + "loss": 4.1734, + "step": 5600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005883186714050749, + "loss": 3.9475, + "step": 5605 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005882959280988705, + "loss": 4.0677, + "step": 5610 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005882731631142898, + "loss": 3.981, + "step": 5615 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005882503764530447, + "loss": 4.1465, + "step": 5620 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005882275681168485, + "loss": 4.1284, + "step": 5625 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005882047381074163, + "loss": 4.128, + "step": 5630 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005881818864264648, + "loss": 3.9485, + "step": 5635 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005881590130757124, + "loss": 4.0819, + "step": 5640 + }, + { + "epoch": 0.1, + "learning_rate": 0.000588136118056879, + "loss": 4.0794, + "step": 5645 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005881132013716862, + "loss": 4.0057, + "step": 5650 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005880902630218571, + "loss": 4.126, + "step": 5655 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005880673030091169, + "loss": 4.1839, + "step": 5660 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005880443213351917, + "loss": 4.0048, + "step": 5665 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005880213180018097, + "loss": 3.9995, + "step": 5670 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005879982930107007, + "loss": 4.047, + "step": 5675 + }, + { + "epoch": 0.1, + "learning_rate": 0.000587975246363596, + "loss": 4.0557, + "step": 5680 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005879521780622287, + "loss": 3.9903, + "step": 5685 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005879290881083333, + "loss": 4.0698, + "step": 5690 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005879059765036461, + "loss": 4.0808, + "step": 5695 + }, + { + "epoch": 0.1, + "learning_rate": 0.000587882843249905, + "loss": 3.9814, + "step": 5700 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005878596883488493, + "loss": 4.0462, + "step": 5705 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005878365118022205, + "loss": 4.057, + "step": 5710 + }, + { + "epoch": 0.1, + "learning_rate": 0.000587813313611761, + "loss": 4.0648, + "step": 5715 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005877900937792154, + "loss": 4.0972, + "step": 5720 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005877668523063297, + "loss": 4.1129, + "step": 5725 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005877435891948514, + "loss": 4.055, + "step": 5730 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005877203044465299, + "loss": 4.0274, + "step": 5735 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005876969980631161, + "loss": 4.1068, + "step": 5740 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005876736700463623, + "loss": 4.0272, + "step": 5745 + }, + { + "epoch": 0.1, + "learning_rate": 0.000587650320398023, + "loss": 4.0066, + "step": 5750 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005876269491198536, + "loss": 4.049, + "step": 5755 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005876035562136119, + "loss": 3.9857, + "step": 5760 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005875801416810566, + "loss": 3.9977, + "step": 5765 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005875567055239485, + "loss": 3.978, + "step": 5770 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005875332477440499, + "loss": 3.966, + "step": 5775 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005875097683431245, + "loss": 3.9962, + "step": 5780 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005874862673229381, + "loss": 4.0646, + "step": 5785 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005874627446852578, + "loss": 4.0243, + "step": 5790 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005874392004318521, + "loss": 3.9927, + "step": 5795 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005874156345644919, + "loss": 4.0585, + "step": 5800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005873920470849488, + "loss": 4.0704, + "step": 5805 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005873684379949967, + "loss": 4.1157, + "step": 5810 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005873448072964108, + "loss": 4.0976, + "step": 5815 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005873211549909679, + "loss": 4.0073, + "step": 5820 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005872974810804467, + "loss": 3.999, + "step": 5825 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005872737855666273, + "loss": 4.0376, + "step": 5830 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005872500684512915, + "loss": 4.0754, + "step": 5835 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005872263297362227, + "loss": 4.0787, + "step": 5840 + }, + { + "epoch": 0.1, + "learning_rate": 0.000587202569423206, + "loss": 4.0246, + "step": 5845 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005871787875140278, + "loss": 4.0841, + "step": 5850 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005871549840104766, + "loss": 4.0973, + "step": 5855 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005871311589143424, + "loss": 4.1085, + "step": 5860 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005871073122274162, + "loss": 4.0267, + "step": 5865 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005870834439514919, + "loss": 4.0323, + "step": 5870 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005870595540883638, + "loss": 4.0608, + "step": 5875 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005870356426398283, + "loss": 4.0103, + "step": 5880 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005870117096076835, + "loss": 3.9965, + "step": 5885 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005869877549937291, + "loss": 4.087, + "step": 5890 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005869637787997663, + "loss": 4.0467, + "step": 5895 + }, + { + "epoch": 0.1, + "learning_rate": 0.000586939781027598, + "loss": 4.0311, + "step": 5900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005869157616790287, + "loss": 4.0099, + "step": 5905 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005868917207558646, + "loss": 4.0817, + "step": 5910 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005868676582599134, + "loss": 3.966, + "step": 5915 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005868435741929845, + "loss": 4.0285, + "step": 5920 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005868194685568886, + "loss": 3.9462, + "step": 5925 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005867953413534389, + "loss": 4.0755, + "step": 5930 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005867711925844492, + "loss": 3.9712, + "step": 5935 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005867470222517355, + "loss": 3.9844, + "step": 5940 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005867228303571153, + "loss": 3.9981, + "step": 5945 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005866986169024077, + "loss": 4.0011, + "step": 5950 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005866743818894334, + "loss": 3.9929, + "step": 5955 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005866501253200148, + "loss": 4.0131, + "step": 5960 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005866258471959758, + "loss": 4.0858, + "step": 5965 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005866015475191421, + "loss": 4.0778, + "step": 5970 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005865772262913407, + "loss": 4.0723, + "step": 5975 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005865528835144007, + "loss": 4.0553, + "step": 5980 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005865285191901524, + "loss": 4.0564, + "step": 5985 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005865041333204278, + "loss": 3.9492, + "step": 5990 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005864797259070607, + "loss": 3.9973, + "step": 5995 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005864552969518865, + "loss": 4.0681, + "step": 6000 + }, + { + "epoch": 0.1, + "eval_loss": 4.073433876037598, + "eval_runtime": 149.973, + "eval_samples_per_second": 12.276, + "eval_steps_per_second": 0.773, + "step": 6000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005864308464567419, + "loss": 4.0337, + "step": 6005 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005864063744234657, + "loss": 4.0838, + "step": 6010 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005863818808538978, + "loss": 4.003, + "step": 6015 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005863573657498803, + "loss": 4.0435, + "step": 6020 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005863328291132564, + "loss": 4.0459, + "step": 6025 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005863082709458711, + "loss": 4.0842, + "step": 6030 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005862836912495713, + "loss": 4.0807, + "step": 6035 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005862590900262049, + "loss": 4.1169, + "step": 6040 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005862344672776221, + "loss": 4.1037, + "step": 6045 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005862098230056743, + "loss": 3.976, + "step": 6050 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005861851572122146, + "loss": 4.0173, + "step": 6055 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005861604698990978, + "loss": 4.0725, + "step": 6060 + }, + { + "epoch": 0.1, + "learning_rate": 0.0005861357610681802, + "loss": 4.0116, + "step": 6065 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005861110307213197, + "loss": 4.0785, + "step": 6070 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005860862788603761, + "loss": 4.0062, + "step": 6075 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005860615054872104, + "loss": 4.0221, + "step": 6080 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005860367106036854, + "loss": 4.0759, + "step": 6085 + }, + { + "epoch": 0.11, + "learning_rate": 0.000586011894211666, + "loss": 4.0736, + "step": 6090 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005859870563130177, + "loss": 4.1106, + "step": 6095 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005859621969096085, + "loss": 3.976, + "step": 6100 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005859373160033076, + "loss": 3.9863, + "step": 6105 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005859124135959861, + "loss": 4.0505, + "step": 6110 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005858874896895163, + "loss": 4.0183, + "step": 6115 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005858625442857722, + "loss": 3.9544, + "step": 6120 + }, + { + "epoch": 0.11, + "learning_rate": 0.00058583757738663, + "loss": 4.0514, + "step": 6125 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005858125889939669, + "loss": 3.9714, + "step": 6130 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005857875791096617, + "loss": 4.0567, + "step": 6135 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005857625477355954, + "loss": 4.0103, + "step": 6140 + }, + { + "epoch": 0.11, + "learning_rate": 0.00058573749487365, + "loss": 3.9502, + "step": 6145 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005857124205257091, + "loss": 4.0495, + "step": 6150 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005856873246936588, + "loss": 4.1271, + "step": 6155 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005856622073793856, + "loss": 4.0907, + "step": 6160 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005856370685847784, + "loss": 4.0462, + "step": 6165 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005856119083117276, + "loss": 4.026, + "step": 6170 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005855867265621249, + "loss": 4.027, + "step": 6175 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005855615233378641, + "loss": 3.9725, + "step": 6180 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005855362986408401, + "loss": 4.0381, + "step": 6185 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005855110524729499, + "loss": 4.0856, + "step": 6190 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005854857848360917, + "loss": 4.059, + "step": 6195 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005854604957321657, + "loss": 4.0826, + "step": 6200 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005854351851630732, + "loss": 4.0564, + "step": 6205 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005854098531307177, + "loss": 3.918, + "step": 6210 + }, + { + "epoch": 0.11, + "learning_rate": 0.000585384499637004, + "loss": 4.0235, + "step": 6215 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005853591246838384, + "loss": 4.0396, + "step": 6220 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005853337282731291, + "loss": 4.0262, + "step": 6225 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005853083104067858, + "loss": 4.0257, + "step": 6230 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005852828710867197, + "loss": 3.9727, + "step": 6235 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005852574103148437, + "loss": 4.0742, + "step": 6240 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005852319280930724, + "loss": 4.0149, + "step": 6245 + }, + { + "epoch": 0.11, + "learning_rate": 0.000585206424423322, + "loss": 4.0074, + "step": 6250 + }, + { + "epoch": 0.11, + "learning_rate": 0.00058518089930751, + "loss": 4.0104, + "step": 6255 + }, + { + "epoch": 0.11, + "learning_rate": 0.000585155352747556, + "loss": 4.0947, + "step": 6260 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005851297847453808, + "loss": 4.0355, + "step": 6265 + }, + { + "epoch": 0.11, + "learning_rate": 0.000585104195302907, + "loss": 4.1322, + "step": 6270 + }, + { + "epoch": 0.11, + "learning_rate": 0.000585078584422059, + "loss": 4.2945, + "step": 6275 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005850529521047623, + "loss": 4.0278, + "step": 6280 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005850272983529446, + "loss": 4.0641, + "step": 6285 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005850016231685348, + "loss": 4.0774, + "step": 6290 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005849759265534635, + "loss": 4.0438, + "step": 6295 + }, + { + "epoch": 0.11, + "learning_rate": 0.000584950208509663, + "loss": 4.0762, + "step": 6300 + }, + { + "epoch": 0.11, + "learning_rate": 0.000584924469039067, + "loss": 3.9799, + "step": 6305 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005848987081436115, + "loss": 4.0686, + "step": 6310 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005848729258252329, + "loss": 3.993, + "step": 6315 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005848471220858705, + "loss": 3.9435, + "step": 6320 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005848212969274643, + "loss": 4.056, + "step": 6325 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005847954503519561, + "loss": 4.0116, + "step": 6330 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005847695823612897, + "loss": 3.9138, + "step": 6335 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005847436929574101, + "loss": 4.0694, + "step": 6340 + }, + { + "epoch": 0.11, + "learning_rate": 0.000584717782142264, + "loss": 4.1302, + "step": 6345 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005846918499177998, + "loss": 4.1207, + "step": 6350 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005846658962859677, + "loss": 4.0034, + "step": 6355 + }, + { + "epoch": 0.11, + "learning_rate": 0.000584639921248719, + "loss": 3.8664, + "step": 6360 + }, + { + "epoch": 0.11, + "learning_rate": 0.000584613924808007, + "loss": 3.914, + "step": 6365 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005845879069657865, + "loss": 3.9682, + "step": 6370 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005845618677240138, + "loss": 3.9832, + "step": 6375 + }, + { + "epoch": 0.11, + "learning_rate": 0.000584535807084647, + "loss": 3.9809, + "step": 6380 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005845097250496457, + "loss": 3.9714, + "step": 6385 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005844836216209714, + "loss": 3.992, + "step": 6390 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005844574968005865, + "loss": 3.9963, + "step": 6395 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005844313505904557, + "loss": 4.062, + "step": 6400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005844051829925451, + "loss": 3.9474, + "step": 6405 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005843789940088223, + "loss": 3.9946, + "step": 6410 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005843527836412565, + "loss": 4.0315, + "step": 6415 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005843265518918185, + "loss": 4.0435, + "step": 6420 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005843002987624812, + "loss": 4.0449, + "step": 6425 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005842740242552182, + "loss": 3.9634, + "step": 6430 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005842477283720058, + "loss": 4.1614, + "step": 6435 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005842214111148207, + "loss": 4.0237, + "step": 6440 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005841950724856422, + "loss": 4.0143, + "step": 6445 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005841687124864506, + "loss": 4.0254, + "step": 6450 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005841423311192283, + "loss": 3.9378, + "step": 6455 + }, + { + "epoch": 0.11, + "learning_rate": 0.000584115928385959, + "loss": 4.0162, + "step": 6460 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005840895042886278, + "loss": 4.0237, + "step": 6465 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005840630588292218, + "loss": 3.951, + "step": 6470 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005840365920097296, + "loss": 3.9824, + "step": 6475 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005840101038321414, + "loss": 3.8698, + "step": 6480 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005839835942984488, + "loss": 3.9789, + "step": 6485 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005839570634106455, + "loss": 3.9527, + "step": 6490 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005839305111707262, + "loss": 4.0282, + "step": 6495 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005839039375806875, + "loss": 4.0262, + "step": 6500 + }, + { + "epoch": 0.11, + "eval_loss": 4.031991481781006, + "eval_runtime": 150.4726, + "eval_samples_per_second": 12.235, + "eval_steps_per_second": 0.771, + "step": 6500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005838773426425277, + "loss": 3.972, + "step": 6505 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005838507263582467, + "loss": 3.908, + "step": 6510 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005838240887298457, + "loss": 3.945, + "step": 6515 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005837974297593278, + "loss": 3.9304, + "step": 6520 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005837707494486976, + "loss": 4.0567, + "step": 6525 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005837440477999613, + "loss": 4.0024, + "step": 6530 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005837173248151269, + "loss": 4.0126, + "step": 6535 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005836905804962036, + "loss": 3.9493, + "step": 6540 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005836638148452027, + "loss": 4.0822, + "step": 6545 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005836370278641365, + "loss": 3.9565, + "step": 6550 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005836102195550195, + "loss": 4.0498, + "step": 6555 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005835833899198675, + "loss": 4.138, + "step": 6560 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005835565389606978, + "loss": 4.028, + "step": 6565 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005835296666795297, + "loss": 3.8704, + "step": 6570 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005835027730783836, + "loss": 4.0168, + "step": 6575 + }, + { + "epoch": 0.11, + "learning_rate": 0.000583475858159282, + "loss": 3.9846, + "step": 6580 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005834489219242486, + "loss": 3.9255, + "step": 6585 + }, + { + "epoch": 0.11, + "learning_rate": 0.000583421964375309, + "loss": 3.9709, + "step": 6590 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005833949855144901, + "loss": 3.971, + "step": 6595 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005833679853438208, + "loss": 4.0187, + "step": 6600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005833409638653311, + "loss": 4.0978, + "step": 6605 + }, + { + "epoch": 0.11, + "learning_rate": 0.000583313921081053, + "loss": 4.0233, + "step": 6610 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005832868569930201, + "loss": 3.9346, + "step": 6615 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005832597716032673, + "loss": 3.9806, + "step": 6620 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005832326649138315, + "loss": 3.9501, + "step": 6625 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005832055369267507, + "loss": 3.8945, + "step": 6630 + }, + { + "epoch": 0.11, + "learning_rate": 0.000583178387644065, + "loss": 4.0411, + "step": 6635 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005831512170678157, + "loss": 4.033, + "step": 6640 + }, + { + "epoch": 0.11, + "learning_rate": 0.0005831240252000461, + "loss": 3.9629, + "step": 6645 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005830968120428008, + "loss": 3.9754, + "step": 6650 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005830695775981261, + "loss": 3.961, + "step": 6655 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005830423218680698, + "loss": 3.8399, + "step": 6660 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005830150448546816, + "loss": 3.8657, + "step": 6665 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005829877465600122, + "loss": 3.9245, + "step": 6670 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005829604269861149, + "loss": 4.0341, + "step": 6675 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005829330861350433, + "loss": 3.9792, + "step": 6680 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005829057240088538, + "loss": 4.0263, + "step": 6685 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005828783406096037, + "loss": 3.9451, + "step": 6690 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005828509359393522, + "loss": 3.9552, + "step": 6695 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005828235100001597, + "loss": 4.0922, + "step": 6700 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005827960627940888, + "loss": 3.9879, + "step": 6705 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005827685943232034, + "loss": 4.0245, + "step": 6710 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005827411045895687, + "loss": 4.0227, + "step": 6715 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005827135935952521, + "loss": 3.8635, + "step": 6720 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005826860613423221, + "loss": 3.9943, + "step": 6725 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005826585078328489, + "loss": 4.047, + "step": 6730 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005826309330689047, + "loss": 3.9673, + "step": 6735 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005826033370525627, + "loss": 3.9391, + "step": 6740 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005825757197858981, + "loss": 4.0063, + "step": 6745 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005825480812709876, + "loss": 3.9123, + "step": 6750 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005825204215099093, + "loss": 4.0236, + "step": 6755 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005824927405047432, + "loss": 3.9006, + "step": 6760 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005824650382575709, + "loss": 3.9333, + "step": 6765 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005824373147704753, + "loss": 3.9388, + "step": 6770 + }, + { + "epoch": 0.12, + "learning_rate": 0.000582409570045541, + "loss": 3.9877, + "step": 6775 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005823818040848545, + "loss": 4.0653, + "step": 6780 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005823540168905034, + "loss": 3.9874, + "step": 6785 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005823262084645773, + "loss": 4.0155, + "step": 6790 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005822983788091672, + "loss": 3.9023, + "step": 6795 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005822705279263658, + "loss": 4.119, + "step": 6800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005822426558182674, + "loss": 3.99, + "step": 6805 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005822147624869676, + "loss": 4.0494, + "step": 6810 + }, + { + "epoch": 0.12, + "learning_rate": 0.000582186847934564, + "loss": 3.9168, + "step": 6815 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005821589121631557, + "loss": 3.8806, + "step": 6820 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005821309551748432, + "loss": 3.7961, + "step": 6825 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005821029769717288, + "loss": 3.7826, + "step": 6830 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005820749775559163, + "loss": 3.9815, + "step": 6835 + }, + { + "epoch": 0.12, + "learning_rate": 0.000582046956929511, + "loss": 3.9875, + "step": 6840 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005820189150946201, + "loss": 4.0475, + "step": 6845 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005819908520533521, + "loss": 3.9757, + "step": 6850 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005819627678078172, + "loss": 4.0318, + "step": 6855 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005819346623601271, + "loss": 3.994, + "step": 6860 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005819065357123955, + "loss": 4.0183, + "step": 6865 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005818783878667371, + "loss": 3.8478, + "step": 6870 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005818502188252684, + "loss": 3.9599, + "step": 6875 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005818220285901078, + "loss": 3.9233, + "step": 6880 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005817938171633749, + "loss": 3.9714, + "step": 6885 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005817655845471912, + "loss": 4.0335, + "step": 6890 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005817373307436795, + "loss": 3.9459, + "step": 6895 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005817090557549644, + "loss": 3.8784, + "step": 6900 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005816807595831721, + "loss": 3.9412, + "step": 6905 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005816524422304303, + "loss": 3.8951, + "step": 6910 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005816241036988683, + "loss": 3.9854, + "step": 6915 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005815957439906169, + "loss": 3.8417, + "step": 6920 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005815673631078088, + "loss": 3.9827, + "step": 6925 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005815389610525778, + "loss": 4.0293, + "step": 6930 + }, + { + "epoch": 0.12, + "learning_rate": 0.00058151053782706, + "loss": 3.9579, + "step": 6935 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005814820934333925, + "loss": 4.0445, + "step": 6940 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005814536278737141, + "loss": 3.9389, + "step": 6945 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005814251411501652, + "loss": 3.947, + "step": 6950 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005813966332648881, + "loss": 3.9215, + "step": 6955 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005813681042200262, + "loss": 3.8809, + "step": 6960 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005813395540177249, + "loss": 3.9399, + "step": 6965 + }, + { + "epoch": 0.12, + "learning_rate": 0.000581310982660131, + "loss": 3.906, + "step": 6970 + }, + { + "epoch": 0.12, + "learning_rate": 0.000581282390149393, + "loss": 3.9693, + "step": 6975 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005812537764876606, + "loss": 3.9784, + "step": 6980 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005812251416770857, + "loss": 4.0167, + "step": 6985 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005811964857198214, + "loss": 4.0349, + "step": 6990 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005811678086180225, + "loss": 3.9012, + "step": 6995 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005811391103738454, + "loss": 4.0148, + "step": 7000 + }, + { + "epoch": 0.12, + "eval_loss": 3.98817777633667, + "eval_runtime": 150.0793, + "eval_samples_per_second": 12.267, + "eval_steps_per_second": 0.773, + "step": 7000 + }, + { + "epoch": 0.12, + "learning_rate": 0.000581110390989448, + "loss": 3.9029, + "step": 7005 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005810816504669897, + "loss": 4.0553, + "step": 7010 + }, + { + "epoch": 0.12, + "learning_rate": 0.000581052888808632, + "loss": 4.097, + "step": 7015 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005810241060165374, + "loss": 3.9799, + "step": 7020 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005809953020928703, + "loss": 3.9113, + "step": 7025 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005809664770397964, + "loss": 3.9452, + "step": 7030 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005809376308594835, + "loss": 3.9655, + "step": 7035 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005809087635541005, + "loss": 3.8256, + "step": 7040 + }, + { + "epoch": 0.12, + "learning_rate": 0.000580879875125818, + "loss": 3.9917, + "step": 7045 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005808509655768086, + "loss": 3.9427, + "step": 7050 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005808220349092458, + "loss": 4.0305, + "step": 7055 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005807930831253052, + "loss": 3.9139, + "step": 7060 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005807641102271637, + "loss": 3.9324, + "step": 7065 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005807351162170001, + "loss": 3.9821, + "step": 7070 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005807061010969944, + "loss": 3.9877, + "step": 7075 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005806770648693286, + "loss": 4.0239, + "step": 7080 + }, + { + "epoch": 0.12, + "learning_rate": 0.000580648007536186, + "loss": 3.9869, + "step": 7085 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005806189290997514, + "loss": 3.9419, + "step": 7090 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005805898295622115, + "loss": 4.0456, + "step": 7095 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005805607089257545, + "loss": 3.9479, + "step": 7100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00058053156719257, + "loss": 3.9563, + "step": 7105 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005805024043648493, + "loss": 3.9586, + "step": 7110 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005804732204447854, + "loss": 3.9508, + "step": 7115 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005804440154345728, + "loss": 3.9228, + "step": 7120 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005804147893364073, + "loss": 3.9427, + "step": 7125 + }, + { + "epoch": 0.12, + "learning_rate": 0.000580385542152487, + "loss": 3.9727, + "step": 7130 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005803562738850107, + "loss": 3.8936, + "step": 7135 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005803269845361794, + "loss": 3.8108, + "step": 7140 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005802976741081956, + "loss": 3.943, + "step": 7145 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005802683426032631, + "loss": 3.9068, + "step": 7150 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005802389900235878, + "loss": 3.931, + "step": 7155 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005802096163713764, + "loss": 4.0288, + "step": 7160 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005801802216488381, + "loss": 3.8147, + "step": 7165 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005801508058581829, + "loss": 3.8442, + "step": 7170 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005801213690016228, + "loss": 3.9372, + "step": 7175 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005800919110813716, + "loss": 4.0047, + "step": 7180 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005800624320996439, + "loss": 3.9978, + "step": 7185 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005800329320586568, + "loss": 3.9016, + "step": 7190 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005800034109606282, + "loss": 3.9061, + "step": 7195 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005799738688077781, + "loss": 3.9114, + "step": 7200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005799443056023279, + "loss": 3.8848, + "step": 7205 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005799147213465006, + "loss": 3.9432, + "step": 7210 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005798851160425209, + "loss": 3.9281, + "step": 7215 + }, + { + "epoch": 0.12, + "learning_rate": 0.0005798554896926149, + "loss": 3.8136, + "step": 7220 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005798258422990101, + "loss": 3.9658, + "step": 7225 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005797961738639362, + "loss": 3.9506, + "step": 7230 + }, + { + "epoch": 0.13, + "learning_rate": 0.000579766484389624, + "loss": 4.0028, + "step": 7235 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005797367738783059, + "loss": 3.9895, + "step": 7240 + }, + { + "epoch": 0.13, + "learning_rate": 0.000579707042332216, + "loss": 3.9246, + "step": 7245 + }, + { + "epoch": 0.13, + "learning_rate": 0.00057967728975359, + "loss": 3.9121, + "step": 7250 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005796475161446651, + "loss": 3.8231, + "step": 7255 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005796177215076801, + "loss": 3.9446, + "step": 7260 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005795879058448756, + "loss": 3.9487, + "step": 7265 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005795580691584934, + "loss": 3.9777, + "step": 7270 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005795282114507772, + "loss": 3.8589, + "step": 7275 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005794983327239719, + "loss": 3.9822, + "step": 7280 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005794684329803244, + "loss": 3.9954, + "step": 7285 + }, + { + "epoch": 0.13, + "learning_rate": 0.000579438512222083, + "loss": 3.9695, + "step": 7290 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005794085704514977, + "loss": 3.9504, + "step": 7295 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005793786076708198, + "loss": 3.9779, + "step": 7300 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005793486238823024, + "loss": 3.8648, + "step": 7305 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005793186190882001, + "loss": 3.9049, + "step": 7310 + }, + { + "epoch": 0.13, + "learning_rate": 0.000579288593290769, + "loss": 3.9022, + "step": 7315 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005792585464922673, + "loss": 3.9056, + "step": 7320 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005792284786949538, + "loss": 3.9115, + "step": 7325 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005791983899010899, + "loss": 3.953, + "step": 7330 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005791682801129379, + "loss": 3.9481, + "step": 7335 + }, + { + "epoch": 0.13, + "learning_rate": 0.000579138149332762, + "loss": 3.8981, + "step": 7340 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005791079975628277, + "loss": 4.0269, + "step": 7345 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005790778248054025, + "loss": 3.9883, + "step": 7350 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005790476310627551, + "loss": 3.9876, + "step": 7355 + }, + { + "epoch": 0.13, + "learning_rate": 0.000579017416337156, + "loss": 3.988, + "step": 7360 + }, + { + "epoch": 0.13, + "learning_rate": 0.000578987180630877, + "loss": 3.9376, + "step": 7365 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005789569239461918, + "loss": 3.9417, + "step": 7370 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005789266462853757, + "loss": 4.0789, + "step": 7375 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005788963476507052, + "loss": 3.9385, + "step": 7380 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005788660280444586, + "loss": 3.929, + "step": 7385 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005788356874689159, + "loss": 4.0291, + "step": 7390 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005788053259263587, + "loss": 3.8872, + "step": 7395 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005787749434190695, + "loss": 3.8675, + "step": 7400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005787445399493335, + "loss": 4.0703, + "step": 7405 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005787141155194365, + "loss": 3.9236, + "step": 7410 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005786836701316665, + "loss": 3.9127, + "step": 7415 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005786532037883128, + "loss": 3.8897, + "step": 7420 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005786227164916662, + "loss": 3.9272, + "step": 7425 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005785922082440192, + "loss": 3.9871, + "step": 7430 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005785616790476659, + "loss": 3.9186, + "step": 7435 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005785311289049019, + "loss": 3.974, + "step": 7440 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005785005578180245, + "loss": 3.9538, + "step": 7445 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005784699657893325, + "loss": 4.0123, + "step": 7450 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005784393528211261, + "loss": 3.9068, + "step": 7455 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005784087189157074, + "loss": 3.7432, + "step": 7460 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005783780640753798, + "loss": 3.9067, + "step": 7465 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005783473883024486, + "loss": 3.9339, + "step": 7470 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005783166915992202, + "loss": 3.8964, + "step": 7475 + }, + { + "epoch": 0.13, + "learning_rate": 0.000578285973968003, + "loss": 3.8972, + "step": 7480 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005782552354111067, + "loss": 3.9025, + "step": 7485 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005782244759308427, + "loss": 3.979, + "step": 7490 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005781936955295241, + "loss": 3.9776, + "step": 7495 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005781628942094653, + "loss": 3.9969, + "step": 7500 + }, + { + "epoch": 0.13, + "eval_loss": 3.961434841156006, + "eval_runtime": 149.9847, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 7500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005781320719729822, + "loss": 3.9575, + "step": 7505 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005781012288223929, + "loss": 3.8435, + "step": 7510 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005780703647600165, + "loss": 3.9905, + "step": 7515 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005780394797881735, + "loss": 3.8025, + "step": 7520 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005780085739091867, + "loss": 3.9212, + "step": 7525 + }, + { + "epoch": 0.13, + "learning_rate": 0.00057797764712538, + "loss": 3.8277, + "step": 7530 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005779466994390788, + "loss": 3.8959, + "step": 7535 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005779157308526102, + "loss": 3.9607, + "step": 7540 + }, + { + "epoch": 0.13, + "learning_rate": 0.000577884741368303, + "loss": 3.9904, + "step": 7545 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005778537309884873, + "loss": 4.0796, + "step": 7550 + }, + { + "epoch": 0.13, + "learning_rate": 0.000577822699715495, + "loss": 3.893, + "step": 7555 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005777916475516597, + "loss": 3.953, + "step": 7560 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005777605744993159, + "loss": 3.9167, + "step": 7565 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005777294805608006, + "loss": 3.8872, + "step": 7570 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005776983657384515, + "loss": 3.9342, + "step": 7575 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005776672300346086, + "loss": 3.9996, + "step": 7580 + }, + { + "epoch": 0.13, + "learning_rate": 0.000577636073451613, + "loss": 3.9381, + "step": 7585 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005776048959918076, + "loss": 4.0558, + "step": 7590 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005775736976575367, + "loss": 3.904, + "step": 7595 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005775424784511462, + "loss": 3.9933, + "step": 7600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005775112383749839, + "loss": 3.995, + "step": 7605 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005774799774313985, + "loss": 3.9822, + "step": 7610 + }, + { + "epoch": 0.13, + "learning_rate": 0.000577448695622741, + "loss": 3.7759, + "step": 7615 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005774173929513633, + "loss": 3.7732, + "step": 7620 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005773860694196195, + "loss": 3.8441, + "step": 7625 + }, + { + "epoch": 0.13, + "learning_rate": 0.000577354725029865, + "loss": 3.9408, + "step": 7630 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005773233597844564, + "loss": 3.9715, + "step": 7635 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005772919736857524, + "loss": 3.8511, + "step": 7640 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005772605667361131, + "loss": 3.9275, + "step": 7645 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005772291389379002, + "loss": 3.8361, + "step": 7650 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005771976902934768, + "loss": 3.8592, + "step": 7655 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005771662208052077, + "loss": 3.8605, + "step": 7660 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005771347304754593, + "loss": 3.885, + "step": 7665 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005771032193065994, + "loss": 3.8989, + "step": 7670 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005770716873009975, + "loss": 3.9576, + "step": 7675 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005770401344610248, + "loss": 3.8623, + "step": 7680 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005770085607890537, + "loss": 3.8787, + "step": 7685 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005769769662874585, + "loss": 3.9269, + "step": 7690 + }, + { + "epoch": 0.13, + "learning_rate": 0.000576945350958615, + "loss": 3.947, + "step": 7695 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005769137148049002, + "loss": 3.869, + "step": 7700 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005768820578286935, + "loss": 3.9183, + "step": 7705 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005768503800323748, + "loss": 3.94, + "step": 7710 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005768186814183265, + "loss": 3.7839, + "step": 7715 + }, + { + "epoch": 0.13, + "learning_rate": 0.000576786961988932, + "loss": 3.8936, + "step": 7720 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005767552217465765, + "loss": 3.9323, + "step": 7725 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005767234606936465, + "loss": 3.8951, + "step": 7730 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005766916788325306, + "loss": 3.8801, + "step": 7735 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005766598761656186, + "loss": 3.9485, + "step": 7740 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005766280526953016, + "loss": 3.9617, + "step": 7745 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005765962084239727, + "loss": 4.004, + "step": 7750 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005765643433540265, + "loss": 3.9779, + "step": 7755 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005765324574878592, + "loss": 3.8105, + "step": 7760 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005765005508278681, + "loss": 3.9296, + "step": 7765 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005764686233764527, + "loss": 3.9916, + "step": 7770 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005764366751360137, + "loss": 3.8613, + "step": 7775 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005764047061089534, + "loss": 3.8955, + "step": 7780 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005763727162976758, + "loss": 3.9348, + "step": 7785 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005763407057045863, + "loss": 3.9172, + "step": 7790 + }, + { + "epoch": 0.13, + "learning_rate": 0.000576308674332092, + "loss": 3.8628, + "step": 7795 + }, + { + "epoch": 0.13, + "learning_rate": 0.0005762766221826015, + "loss": 3.9669, + "step": 7800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005762445492585247, + "loss": 3.9162, + "step": 7805 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005762124555622737, + "loss": 3.9174, + "step": 7810 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005761803410962615, + "loss": 3.7882, + "step": 7815 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005761482058629032, + "loss": 3.9603, + "step": 7820 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005761160498646149, + "loss": 3.8465, + "step": 7825 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005760838731038147, + "loss": 3.8924, + "step": 7830 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005760516755829225, + "loss": 3.9661, + "step": 7835 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005760194573043586, + "loss": 3.8856, + "step": 7840 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005759872182705463, + "loss": 3.9037, + "step": 7845 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005759549584839096, + "loss": 3.8099, + "step": 7850 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005759226779468742, + "loss": 3.9186, + "step": 7855 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005758903766618676, + "loss": 3.8689, + "step": 7860 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005758580546313185, + "loss": 3.9651, + "step": 7865 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005758257118576574, + "loss": 3.8181, + "step": 7870 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005757933483433165, + "loss": 3.9611, + "step": 7875 + }, + { + "epoch": 0.14, + "learning_rate": 0.000575760964090729, + "loss": 3.8883, + "step": 7880 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005757285591023305, + "loss": 3.9339, + "step": 7885 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005756961333805574, + "loss": 3.9402, + "step": 7890 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005756636869278478, + "loss": 3.9722, + "step": 7895 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005756312197466418, + "loss": 3.8141, + "step": 7900 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005755987318393807, + "loss": 3.851, + "step": 7905 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005755662232085075, + "loss": 3.9061, + "step": 7910 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005755336938564663, + "loss": 3.846, + "step": 7915 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005755011437857037, + "loss": 3.8962, + "step": 7920 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005754685729986668, + "loss": 3.9421, + "step": 7925 + }, + { + "epoch": 0.14, + "learning_rate": 0.000575435981497805, + "loss": 3.8796, + "step": 7930 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005754033692855691, + "loss": 3.8234, + "step": 7935 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005753707363644111, + "loss": 3.8632, + "step": 7940 + }, + { + "epoch": 0.14, + "learning_rate": 0.000575338082736785, + "loss": 3.8991, + "step": 7945 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005753054084051462, + "loss": 4.0148, + "step": 7950 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005752727133719516, + "loss": 3.9543, + "step": 7955 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005752399976396596, + "loss": 3.8603, + "step": 7960 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005752072612107306, + "loss": 3.8823, + "step": 7965 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005751745040876258, + "loss": 3.8419, + "step": 7970 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005751417262728085, + "loss": 3.9414, + "step": 7975 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005751089277687436, + "loss": 3.911, + "step": 7980 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005750761085778972, + "loss": 3.9677, + "step": 7985 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005750432687027372, + "loss": 3.9097, + "step": 7990 + }, + { + "epoch": 0.14, + "learning_rate": 0.000575010408145733, + "loss": 3.827, + "step": 7995 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005749775269093554, + "loss": 3.8829, + "step": 8000 + }, + { + "epoch": 0.14, + "eval_loss": 3.9269142150878906, + "eval_runtime": 150.3729, + "eval_samples_per_second": 12.243, + "eval_steps_per_second": 0.771, + "step": 8000 + }, + { + "epoch": 0.14, + "learning_rate": 0.000574944624996077, + "loss": 3.8017, + "step": 8005 + }, + { + "epoch": 0.14, + "learning_rate": 0.000574911702408372, + "loss": 3.9319, + "step": 8010 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005748787591487158, + "loss": 3.9119, + "step": 8015 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005748457952195856, + "loss": 3.9751, + "step": 8020 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005748128106234602, + "loss": 3.8432, + "step": 8025 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005747798053628198, + "loss": 3.9297, + "step": 8030 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005747467794401464, + "loss": 3.8792, + "step": 8035 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005747137328579231, + "loss": 3.8893, + "step": 8040 + }, + { + "epoch": 0.14, + "learning_rate": 0.000574680665618635, + "loss": 3.8906, + "step": 8045 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005746475777247686, + "loss": 3.8385, + "step": 8050 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005746144691788119, + "loss": 3.7674, + "step": 8055 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005745813399832544, + "loss": 3.8042, + "step": 8060 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005745481901405874, + "loss": 3.9017, + "step": 8065 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005745150196533037, + "loss": 3.8173, + "step": 8070 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005744818285238972, + "loss": 3.8445, + "step": 8075 + }, + { + "epoch": 0.14, + "learning_rate": 0.000574448616754864, + "loss": 3.9037, + "step": 8080 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005744153843487012, + "loss": 3.8863, + "step": 8085 + }, + { + "epoch": 0.14, + "learning_rate": 0.000574382131307908, + "loss": 3.8032, + "step": 8090 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005743488576349846, + "loss": 3.8524, + "step": 8095 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005743155633324332, + "loss": 3.7371, + "step": 8100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005742822484027573, + "loss": 3.8358, + "step": 8105 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005742489128484619, + "loss": 4.0695, + "step": 8110 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005742155566720538, + "loss": 3.8984, + "step": 8115 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005741821798760412, + "loss": 3.8755, + "step": 8120 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005741487824629339, + "loss": 3.9444, + "step": 8125 + }, + { + "epoch": 0.14, + "learning_rate": 0.000574115364435243, + "loss": 3.9359, + "step": 8130 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005740819257954814, + "loss": 3.9182, + "step": 8135 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005740484665461639, + "loss": 3.9721, + "step": 8140 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005740149866898061, + "loss": 3.8413, + "step": 8145 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005739814862289256, + "loss": 3.9575, + "step": 8150 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005739479651660415, + "loss": 3.8612, + "step": 8155 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005739144235036744, + "loss": 3.8002, + "step": 8160 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005738808612443464, + "loss": 3.8984, + "step": 8165 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005738472783905811, + "loss": 3.8158, + "step": 8170 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005738136749449041, + "loss": 3.8328, + "step": 8175 + }, + { + "epoch": 0.14, + "learning_rate": 0.000573780050909842, + "loss": 3.8829, + "step": 8180 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005737464062879231, + "loss": 3.7717, + "step": 8185 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005737127410816775, + "loss": 3.879, + "step": 8190 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005736790552936364, + "loss": 3.9169, + "step": 8195 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005736453489263329, + "loss": 3.8645, + "step": 8200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005736116219823016, + "loss": 3.8988, + "step": 8205 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005735778744640787, + "loss": 3.8545, + "step": 8210 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005735441063742016, + "loss": 3.7853, + "step": 8215 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005735103177152096, + "loss": 3.8307, + "step": 8220 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005734765084896435, + "loss": 3.9213, + "step": 8225 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005734426787000453, + "loss": 3.9432, + "step": 8230 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005734088283489593, + "loss": 3.868, + "step": 8235 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005733749574389304, + "loss": 3.827, + "step": 8240 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005733410659725058, + "loss": 3.9695, + "step": 8245 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005733071539522339, + "loss": 3.8528, + "step": 8250 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005732732213806647, + "loss": 3.8285, + "step": 8255 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005732392682603497, + "loss": 3.9186, + "step": 8260 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005732052945938421, + "loss": 3.8465, + "step": 8265 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005731713003836965, + "loss": 3.8373, + "step": 8270 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005731372856324691, + "loss": 3.9738, + "step": 8275 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005731032503427176, + "loss": 3.8417, + "step": 8280 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005730691945170013, + "loss": 3.9056, + "step": 8285 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005730351181578811, + "loss": 3.9564, + "step": 8290 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005730010212679192, + "loss": 3.8577, + "step": 8295 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005729669038496796, + "loss": 3.8578, + "step": 8300 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005729327659057279, + "loss": 3.9018, + "step": 8305 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005728986074386309, + "loss": 3.9108, + "step": 8310 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005728644284509572, + "loss": 3.8183, + "step": 8315 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005728302289452769, + "loss": 3.8654, + "step": 8320 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005727960089241615, + "loss": 3.8433, + "step": 8325 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005727617683901845, + "loss": 3.8515, + "step": 8330 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005727275073459202, + "loss": 3.8286, + "step": 8335 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005726932257939452, + "loss": 3.8778, + "step": 8340 + }, + { + "epoch": 0.14, + "learning_rate": 0.000572658923736837, + "loss": 3.7871, + "step": 8345 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005726246011771752, + "loss": 3.9097, + "step": 8350 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005725902581175406, + "loss": 3.837, + "step": 8355 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005725558945605155, + "loss": 3.7465, + "step": 8360 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005725215105086839, + "loss": 3.9166, + "step": 8365 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005724871059646316, + "loss": 3.9286, + "step": 8370 + }, + { + "epoch": 0.14, + "learning_rate": 0.0005724526809309452, + "loss": 3.785, + "step": 8375 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005724182354102136, + "loss": 3.8503, + "step": 8380 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005723837694050267, + "loss": 3.8255, + "step": 8385 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005723492829179764, + "loss": 3.887, + "step": 8390 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005723147759516558, + "loss": 3.7949, + "step": 8395 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005722802485086595, + "loss": 3.8919, + "step": 8400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005722457005915841, + "loss": 3.8261, + "step": 8405 + }, + { + "epoch": 0.15, + "learning_rate": 0.000572211132203027, + "loss": 3.8836, + "step": 8410 + }, + { + "epoch": 0.15, + "learning_rate": 0.000572176543345588, + "loss": 3.7968, + "step": 8415 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005721419340218678, + "loss": 3.8791, + "step": 8420 + }, + { + "epoch": 0.15, + "learning_rate": 0.000572107304234469, + "loss": 3.8896, + "step": 8425 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005720726539859953, + "loss": 3.8461, + "step": 8430 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005720379832790524, + "loss": 3.8805, + "step": 8435 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005720032921162473, + "loss": 3.7594, + "step": 8440 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005719685805001886, + "loss": 3.906, + "step": 8445 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005719338484334866, + "loss": 3.8963, + "step": 8450 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005718990959187527, + "loss": 3.8806, + "step": 8455 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005718643229586002, + "loss": 3.9259, + "step": 8460 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005718295295556441, + "loss": 3.8151, + "step": 8465 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005717947157125004, + "loss": 3.8314, + "step": 8470 + }, + { + "epoch": 0.15, + "learning_rate": 0.000571759881431787, + "loss": 3.8986, + "step": 8475 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005717250267161232, + "loss": 3.8939, + "step": 8480 + }, + { + "epoch": 0.15, + "learning_rate": 0.00057169015156813, + "loss": 3.9307, + "step": 8485 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005716552559904297, + "loss": 3.7868, + "step": 8490 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005716203399856466, + "loss": 3.8096, + "step": 8495 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005715854035564058, + "loss": 3.8729, + "step": 8500 + }, + { + "epoch": 0.15, + "eval_loss": 3.8896238803863525, + "eval_runtime": 150.0738, + "eval_samples_per_second": 12.267, + "eval_steps_per_second": 0.773, + "step": 8500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005715504467053345, + "loss": 3.7995, + "step": 8505 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005715154694350613, + "loss": 3.8546, + "step": 8510 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005714804717482163, + "loss": 3.8225, + "step": 8515 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005714454536474311, + "loss": 3.866, + "step": 8520 + }, + { + "epoch": 0.15, + "learning_rate": 0.000571410415135339, + "loss": 3.8313, + "step": 8525 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005713753562145746, + "loss": 3.8833, + "step": 8530 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005713402768877743, + "loss": 3.8299, + "step": 8535 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005713051771575756, + "loss": 3.9089, + "step": 8540 + }, + { + "epoch": 0.15, + "learning_rate": 0.000571270057026618, + "loss": 3.8661, + "step": 8545 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005712349164975424, + "loss": 3.7924, + "step": 8550 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005711997555729912, + "loss": 3.9294, + "step": 8555 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005711645742556082, + "loss": 3.7973, + "step": 8560 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005711293725480389, + "loss": 3.7566, + "step": 8565 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005710941504529304, + "loss": 3.9148, + "step": 8570 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005710589079729309, + "loss": 3.8453, + "step": 8575 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005710236451106909, + "loss": 3.8899, + "step": 8580 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005709883618688617, + "loss": 3.8985, + "step": 8585 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005709530582500964, + "loss": 3.8628, + "step": 8590 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005709177342570498, + "loss": 3.7169, + "step": 8595 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005708823898923779, + "loss": 3.8679, + "step": 8600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005708470251587387, + "loss": 3.9674, + "step": 8605 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005708116400587912, + "loss": 3.9565, + "step": 8610 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005707762345951963, + "loss": 3.786, + "step": 8615 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005707408087706163, + "loss": 3.8328, + "step": 8620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005707053625877149, + "loss": 3.8651, + "step": 8625 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005706698960491576, + "loss": 3.8354, + "step": 8630 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005706344091576113, + "loss": 3.9148, + "step": 8635 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005705989019157443, + "loss": 3.743, + "step": 8640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005705633743262269, + "loss": 3.9586, + "step": 8645 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005705278263917301, + "loss": 3.9242, + "step": 8650 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005704922581149275, + "loss": 3.7486, + "step": 8655 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005704566694984931, + "loss": 3.8465, + "step": 8660 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005704210605451033, + "loss": 3.9023, + "step": 8665 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005703854312574357, + "loss": 3.9225, + "step": 8670 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005703497816381692, + "loss": 3.9354, + "step": 8675 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005703141116899847, + "loss": 3.8554, + "step": 8680 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005702784214155645, + "loss": 3.8595, + "step": 8685 + }, + { + "epoch": 0.15, + "learning_rate": 0.000570242710817592, + "loss": 3.8911, + "step": 8690 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005702069798987526, + "loss": 3.9046, + "step": 8695 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005701712286617332, + "loss": 3.9157, + "step": 8700 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005701354571092219, + "loss": 3.7783, + "step": 8705 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005700996652439088, + "loss": 3.8195, + "step": 8710 + }, + { + "epoch": 0.15, + "learning_rate": 0.000570063853068485, + "loss": 3.8212, + "step": 8715 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005700280205856435, + "loss": 3.8575, + "step": 8720 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005699921677980788, + "loss": 3.8246, + "step": 8725 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005699562947084867, + "loss": 3.7655, + "step": 8730 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005699204013195648, + "loss": 3.6899, + "step": 8735 + }, + { + "epoch": 0.15, + "learning_rate": 0.000569884487634012, + "loss": 3.8449, + "step": 8740 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005698485536545289, + "loss": 3.8651, + "step": 8745 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005698125993838176, + "loss": 3.8758, + "step": 8750 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005697766248245813, + "loss": 3.7971, + "step": 8755 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005697406299795257, + "loss": 3.8458, + "step": 8760 + }, + { + "epoch": 0.15, + "learning_rate": 0.000569704614851357, + "loss": 3.8787, + "step": 8765 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005696685794427835, + "loss": 3.7689, + "step": 8770 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005696325237565148, + "loss": 3.8765, + "step": 8775 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005695964477952623, + "loss": 3.8841, + "step": 8780 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005695603515617384, + "loss": 3.8682, + "step": 8785 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005695242350586576, + "loss": 3.9042, + "step": 8790 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005694880982887356, + "loss": 3.8366, + "step": 8795 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005694519412546899, + "loss": 3.72, + "step": 8800 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005694157639592388, + "loss": 3.8754, + "step": 8805 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005693795664051032, + "loss": 3.7116, + "step": 8810 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005693433485950048, + "loss": 3.8559, + "step": 8815 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005693071105316668, + "loss": 3.8147, + "step": 8820 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005692708522178144, + "loss": 3.8115, + "step": 8825 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005692345736561738, + "loss": 3.8211, + "step": 8830 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005691982748494732, + "loss": 3.7904, + "step": 8835 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005691619558004418, + "loss": 3.725, + "step": 8840 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005691256165118109, + "loss": 3.7322, + "step": 8845 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005690892569863127, + "loss": 3.8021, + "step": 8850 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005690528772266816, + "loss": 3.8315, + "step": 8855 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005690164772356529, + "loss": 3.8825, + "step": 8860 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005689800570159639, + "loss": 3.9128, + "step": 8865 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005689436165703531, + "loss": 3.7504, + "step": 8870 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005689071559015605, + "loss": 3.8452, + "step": 8875 + }, + { + "epoch": 0.15, + "learning_rate": 0.000568870675012328, + "loss": 3.8525, + "step": 8880 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005688341739053988, + "loss": 3.8366, + "step": 8885 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005687976525835173, + "loss": 3.9315, + "step": 8890 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005687611110494299, + "loss": 3.863, + "step": 8895 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005687245493058845, + "loss": 3.9057, + "step": 8900 + }, + { + "epoch": 0.15, + "learning_rate": 0.00056868796735563, + "loss": 3.8194, + "step": 8905 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005686513652014175, + "loss": 3.8638, + "step": 8910 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005686147428459991, + "loss": 3.8555, + "step": 8915 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005685781002921286, + "loss": 3.7245, + "step": 8920 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005685414375425615, + "loss": 3.8012, + "step": 8925 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005685047546000544, + "loss": 3.8829, + "step": 8930 + }, + { + "epoch": 0.15, + "learning_rate": 0.000568468051467366, + "loss": 3.7756, + "step": 8935 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005684313281472559, + "loss": 3.8913, + "step": 8940 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005683945846424856, + "loss": 3.7455, + "step": 8945 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005683578209558181, + "loss": 3.7563, + "step": 8950 + }, + { + "epoch": 0.15, + "learning_rate": 0.0005683210370900177, + "loss": 3.8759, + "step": 8955 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005682842330478505, + "loss": 3.8912, + "step": 8960 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005682474088320839, + "loss": 3.9026, + "step": 8965 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005682105644454869, + "loss": 3.8464, + "step": 8970 + }, + { + "epoch": 0.16, + "learning_rate": 0.00056817369989083, + "loss": 3.8083, + "step": 8975 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005681368151708852, + "loss": 3.8982, + "step": 8980 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005680999102884261, + "loss": 3.8016, + "step": 8985 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005680629852462278, + "loss": 3.9751, + "step": 8990 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005680260400470668, + "loss": 3.8363, + "step": 8995 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005679890746937211, + "loss": 3.8684, + "step": 9000 + }, + { + "epoch": 0.16, + "eval_loss": 3.870915412902832, + "eval_runtime": 150.275, + "eval_samples_per_second": 12.251, + "eval_steps_per_second": 0.772, + "step": 9000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005679520891889705, + "loss": 3.9171, + "step": 9005 + }, + { + "epoch": 0.16, + "learning_rate": 0.000567915083535596, + "loss": 3.9465, + "step": 9010 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005678780577363802, + "loss": 3.9283, + "step": 9015 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005678410117941075, + "loss": 3.8472, + "step": 9020 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005678039457115632, + "loss": 3.9198, + "step": 9025 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005677668594915347, + "loss": 3.8925, + "step": 9030 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005677297531368106, + "loss": 3.893, + "step": 9035 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005676926266501812, + "loss": 3.8164, + "step": 9040 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005676554800344382, + "loss": 3.8225, + "step": 9045 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005676183132923748, + "loss": 3.8328, + "step": 9050 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005675811264267858, + "loss": 3.8067, + "step": 9055 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005675439194404673, + "loss": 3.8443, + "step": 9060 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005675066923362174, + "loss": 3.7139, + "step": 9065 + }, + { + "epoch": 0.16, + "learning_rate": 0.000567469445116835, + "loss": 3.733, + "step": 9070 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005674321777851211, + "loss": 3.7259, + "step": 9075 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005673948903438781, + "loss": 3.8794, + "step": 9080 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005673575827959097, + "loss": 3.7507, + "step": 9085 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005673202551440212, + "loss": 3.8077, + "step": 9090 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005672829073910196, + "loss": 3.778, + "step": 9095 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005672455395397132, + "loss": 3.8871, + "step": 9100 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005672081515929118, + "loss": 3.8326, + "step": 9105 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005671707435534267, + "loss": 3.8089, + "step": 9110 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005671333154240709, + "loss": 3.8371, + "step": 9115 + }, + { + "epoch": 0.16, + "learning_rate": 0.000567095867207659, + "loss": 3.7742, + "step": 9120 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005670583989070066, + "loss": 3.8002, + "step": 9125 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005670209105249313, + "loss": 3.7759, + "step": 9130 + }, + { + "epoch": 0.16, + "learning_rate": 0.000566983402064252, + "loss": 3.8101, + "step": 9135 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005669458735277891, + "loss": 3.8326, + "step": 9140 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005669083249183646, + "loss": 3.9168, + "step": 9145 + }, + { + "epoch": 0.16, + "learning_rate": 0.000566870756238802, + "loss": 3.8423, + "step": 9150 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005668331674919261, + "loss": 3.7503, + "step": 9155 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005667955586805635, + "loss": 3.9171, + "step": 9160 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005667579298075424, + "loss": 3.844, + "step": 9165 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005667202808756919, + "loss": 3.9111, + "step": 9170 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005666826118878432, + "loss": 3.7965, + "step": 9175 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005666449228468288, + "loss": 3.8016, + "step": 9180 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005666072137554829, + "loss": 3.7283, + "step": 9185 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005665694846166408, + "loss": 3.7984, + "step": 9190 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005665317354331395, + "loss": 3.7922, + "step": 9195 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005664939662078178, + "loss": 3.7183, + "step": 9200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005664561769435155, + "loss": 3.9538, + "step": 9205 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005664183676430743, + "loss": 3.7491, + "step": 9210 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005663805383093372, + "loss": 3.8107, + "step": 9215 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005663426889451488, + "loss": 3.8527, + "step": 9220 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005663048195533552, + "loss": 3.8206, + "step": 9225 + }, + { + "epoch": 0.16, + "learning_rate": 0.000566266930136804, + "loss": 3.8189, + "step": 9230 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005662290206983441, + "loss": 3.7953, + "step": 9235 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005661910912408264, + "loss": 3.8083, + "step": 9240 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005661531417671027, + "loss": 3.7971, + "step": 9245 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005661151722800269, + "loss": 3.6886, + "step": 9250 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005660771827824539, + "loss": 3.7777, + "step": 9255 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005660391732772404, + "loss": 3.6483, + "step": 9260 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005660011437672445, + "loss": 3.8698, + "step": 9265 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005659630942553258, + "loss": 3.8082, + "step": 9270 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005659250247443454, + "loss": 3.8567, + "step": 9275 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005658869352371661, + "loss": 3.8349, + "step": 9280 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005658488257366518, + "loss": 3.8353, + "step": 9285 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005658106962456684, + "loss": 3.7528, + "step": 9290 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005657725467670828, + "loss": 3.7924, + "step": 9295 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005657343773037637, + "loss": 3.8372, + "step": 9300 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005656961878585813, + "loss": 3.7955, + "step": 9305 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005656579784344074, + "loss": 3.7235, + "step": 9310 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005656197490341149, + "loss": 3.8506, + "step": 9315 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005655814996605785, + "loss": 3.844, + "step": 9320 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005655432303166744, + "loss": 3.7618, + "step": 9325 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005655049410052804, + "loss": 3.7935, + "step": 9330 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005654666317292754, + "loss": 3.9014, + "step": 9335 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005654283024915403, + "loss": 3.829, + "step": 9340 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005653899532949571, + "loss": 3.7477, + "step": 9345 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005653515841424095, + "loss": 3.9187, + "step": 9350 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005653131950367826, + "loss": 3.8836, + "step": 9355 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005652747859809632, + "loss": 3.8617, + "step": 9360 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005652363569778395, + "loss": 3.8054, + "step": 9365 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005651979080303009, + "loss": 3.8272, + "step": 9370 + }, + { + "epoch": 0.16, + "learning_rate": 0.000565159439141239, + "loss": 3.8475, + "step": 9375 + }, + { + "epoch": 0.16, + "learning_rate": 0.000565120950313546, + "loss": 3.823, + "step": 9380 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005650824415501163, + "loss": 3.9115, + "step": 9385 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005650439128538455, + "loss": 3.837, + "step": 9390 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005650053642276307, + "loss": 3.7632, + "step": 9395 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005649667956743708, + "loss": 3.7894, + "step": 9400 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005649282071969658, + "loss": 3.8228, + "step": 9405 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005648895987983172, + "loss": 3.8639, + "step": 9410 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005648509704813284, + "loss": 3.8556, + "step": 9415 + }, + { + "epoch": 0.16, + "learning_rate": 0.000564812322248904, + "loss": 3.856, + "step": 9420 + }, + { + "epoch": 0.16, + "learning_rate": 0.00056477365410395, + "loss": 3.8072, + "step": 9425 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005647349660493743, + "loss": 3.8082, + "step": 9430 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005646962580880858, + "loss": 3.7869, + "step": 9435 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005646575302229953, + "loss": 3.8706, + "step": 9440 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005646187824570148, + "loss": 3.7245, + "step": 9445 + }, + { + "epoch": 0.16, + "learning_rate": 0.000564580014793058, + "loss": 3.7688, + "step": 9450 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005645412272340401, + "loss": 3.7546, + "step": 9455 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005645024197828777, + "loss": 3.7613, + "step": 9460 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005644635924424888, + "loss": 3.7864, + "step": 9465 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005644247452157932, + "loss": 3.8333, + "step": 9470 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005643858781057118, + "loss": 3.8384, + "step": 9475 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005643469911151673, + "loss": 3.8734, + "step": 9480 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005643080842470839, + "loss": 3.8705, + "step": 9485 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005642691575043872, + "loss": 3.7985, + "step": 9490 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005642302108900041, + "loss": 3.8142, + "step": 9495 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005641912444068634, + "loss": 3.8861, + "step": 9500 + }, + { + "epoch": 0.16, + "eval_loss": 3.8443562984466553, + "eval_runtime": 150.875, + "eval_samples_per_second": 12.202, + "eval_steps_per_second": 0.769, + "step": 9500 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005641522580578949, + "loss": 3.8243, + "step": 9505 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005641132518460305, + "loss": 3.817, + "step": 9510 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005640742257742031, + "loss": 3.7974, + "step": 9515 + }, + { + "epoch": 0.16, + "learning_rate": 0.0005640351798453472, + "loss": 3.8722, + "step": 9520 + }, + { + "epoch": 0.16, + "learning_rate": 0.000563996114062399, + "loss": 3.8327, + "step": 9525 + }, + { + "epoch": 0.16, + "learning_rate": 0.000563957028428296, + "loss": 3.7176, + "step": 9530 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005639179229459772, + "loss": 3.8109, + "step": 9535 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005638787976183831, + "loss": 3.7644, + "step": 9540 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005638396524484558, + "loss": 3.8023, + "step": 9545 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005638004874391389, + "loss": 3.7181, + "step": 9550 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005637613025933772, + "loss": 3.8218, + "step": 9555 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005637220979141174, + "loss": 3.821, + "step": 9560 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005636828734043072, + "loss": 3.7989, + "step": 9565 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005636436290668965, + "loss": 3.8229, + "step": 9570 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005636043649048359, + "loss": 3.7486, + "step": 9575 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005635650809210781, + "loss": 3.7537, + "step": 9580 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005635257771185768, + "loss": 3.8646, + "step": 9585 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005634864535002878, + "loss": 3.8951, + "step": 9590 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005634471100691678, + "loss": 3.8565, + "step": 9595 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005634077468281752, + "loss": 3.865, + "step": 9600 + }, + { + "epoch": 0.17, + "learning_rate": 0.00056336836378027, + "loss": 3.7137, + "step": 9605 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005633289609284135, + "loss": 3.875, + "step": 9610 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005632895382755689, + "loss": 3.7517, + "step": 9615 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005632500958247002, + "loss": 3.8388, + "step": 9620 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005632106335787734, + "loss": 3.7177, + "step": 9625 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005631711515407561, + "loss": 3.9332, + "step": 9630 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005631316497136167, + "loss": 3.7879, + "step": 9635 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005630921281003259, + "loss": 3.8254, + "step": 9640 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005630525867038553, + "loss": 3.7538, + "step": 9645 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005630130255271784, + "loss": 3.8595, + "step": 9650 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005629734445732698, + "loss": 3.8395, + "step": 9655 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005629338438451059, + "loss": 3.8212, + "step": 9660 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005628942233456645, + "loss": 3.6969, + "step": 9665 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005628545830779248, + "loss": 3.8934, + "step": 9670 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005628149230448677, + "loss": 3.784, + "step": 9675 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005627752432494751, + "loss": 3.7458, + "step": 9680 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005627355436947309, + "loss": 3.7764, + "step": 9685 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005626958243836205, + "loss": 3.8557, + "step": 9690 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005626560853191302, + "loss": 3.7871, + "step": 9695 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005626163265042485, + "loss": 3.8723, + "step": 9700 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005625765479419649, + "loss": 3.7628, + "step": 9705 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005625367496352706, + "loss": 3.7876, + "step": 9710 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005624969315871581, + "loss": 3.8288, + "step": 9715 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005624570938006217, + "loss": 3.7054, + "step": 9720 + }, + { + "epoch": 0.17, + "learning_rate": 0.000562417236278657, + "loss": 3.8978, + "step": 9725 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005623773590242608, + "loss": 3.7314, + "step": 9730 + }, + { + "epoch": 0.17, + "learning_rate": 0.000562337462040432, + "loss": 3.844, + "step": 9735 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005622975453301706, + "loss": 3.8081, + "step": 9740 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005622576088964779, + "loss": 3.7213, + "step": 9745 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005622176527423571, + "loss": 3.7476, + "step": 9750 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005621776768708126, + "loss": 3.8561, + "step": 9755 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005621376812848505, + "loss": 3.8914, + "step": 9760 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005620976659874782, + "loss": 3.706, + "step": 9765 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005620576309817046, + "loss": 3.7814, + "step": 9770 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005620175762705402, + "loss": 3.7574, + "step": 9775 + }, + { + "epoch": 0.17, + "learning_rate": 0.000561977501856997, + "loss": 3.8686, + "step": 9780 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005619374077440882, + "loss": 3.8096, + "step": 9785 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005618972939348286, + "loss": 3.8072, + "step": 9790 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005618571604322349, + "loss": 3.8532, + "step": 9795 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005618170072393246, + "loss": 3.7328, + "step": 9800 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005617768343591172, + "loss": 3.8532, + "step": 9805 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005617366417946335, + "loss": 3.8652, + "step": 9810 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005616964295488957, + "loss": 3.7735, + "step": 9815 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005616561976249275, + "loss": 3.7548, + "step": 9820 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005616159460257542, + "loss": 3.764, + "step": 9825 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005615756747544026, + "loss": 3.7759, + "step": 9830 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005615353838139006, + "loss": 3.7218, + "step": 9835 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005614950732072784, + "loss": 3.8552, + "step": 9840 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005614547429375667, + "loss": 3.7065, + "step": 9845 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005614143930077982, + "loss": 3.8019, + "step": 9850 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005613740234210072, + "loss": 3.7688, + "step": 9855 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005613336341802291, + "loss": 3.7568, + "step": 9860 + }, + { + "epoch": 0.17, + "learning_rate": 0.000561293225288501, + "loss": 3.7632, + "step": 9865 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005612527967488614, + "loss": 3.6562, + "step": 9870 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005612123485643505, + "loss": 3.8351, + "step": 9875 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005611718807380096, + "loss": 3.8006, + "step": 9880 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005611313932728818, + "loss": 3.6692, + "step": 9885 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005610908861720114, + "loss": 3.7825, + "step": 9890 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005610503594384446, + "loss": 3.7926, + "step": 9895 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005610098130752286, + "loss": 3.8703, + "step": 9900 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005609692470854122, + "loss": 3.7963, + "step": 9905 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005609286614720459, + "loss": 3.8204, + "step": 9910 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005608880562381815, + "loss": 3.8013, + "step": 9915 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005608474313868724, + "loss": 3.7682, + "step": 9920 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005608067869211733, + "loss": 3.7958, + "step": 9925 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005607661228441402, + "loss": 3.7584, + "step": 9930 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005607254391588312, + "loss": 3.8284, + "step": 9935 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005606847358683054, + "loss": 3.7367, + "step": 9940 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005606440129756234, + "loss": 3.7325, + "step": 9945 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005606032704838475, + "loss": 3.6778, + "step": 9950 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005605625083960412, + "loss": 3.7351, + "step": 9955 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005605217267152696, + "loss": 3.812, + "step": 9960 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005604809254445992, + "loss": 3.7126, + "step": 9965 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005604401045870983, + "loss": 3.8023, + "step": 9970 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005603992641458362, + "loss": 3.7676, + "step": 9975 + }, + { + "epoch": 0.17, + "learning_rate": 0.000560358404123884, + "loss": 3.8314, + "step": 9980 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005603175245243141, + "loss": 3.7735, + "step": 9985 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005602766253502005, + "loss": 3.752, + "step": 9990 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005602357066046185, + "loss": 3.8366, + "step": 9995 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005601947682906452, + "loss": 3.7659, + "step": 10000 + }, + { + "epoch": 0.17, + "eval_loss": 3.824028253555298, + "eval_runtime": 150.1823, + "eval_samples_per_second": 12.258, + "eval_steps_per_second": 0.772, + "step": 10000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005601538104113585, + "loss": 3.7734, + "step": 10005 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005601128329698388, + "loss": 3.7532, + "step": 10010 + }, + { + "epoch": 0.17, + "learning_rate": 0.000560071835969167, + "loss": 3.828, + "step": 10015 + }, + { + "epoch": 0.17, + "learning_rate": 0.000560030819412426, + "loss": 3.8327, + "step": 10020 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005599897833027, + "loss": 3.9457, + "step": 10025 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005599487276430749, + "loss": 3.7388, + "step": 10030 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005599076524366375, + "loss": 3.8464, + "step": 10035 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005598665576864768, + "loss": 3.7601, + "step": 10040 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005598254433956826, + "loss": 3.7211, + "step": 10045 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005597843095673466, + "loss": 3.7223, + "step": 10050 + }, + { + "epoch": 0.17, + "learning_rate": 0.000559743156204562, + "loss": 3.6862, + "step": 10055 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005597019833104232, + "loss": 3.8612, + "step": 10060 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005596607908880263, + "loss": 3.7786, + "step": 10065 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005596195789404685, + "loss": 3.9116, + "step": 10070 + }, + { + "epoch": 0.17, + "learning_rate": 0.000559578347470849, + "loss": 3.8694, + "step": 10075 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005595370964822681, + "loss": 3.7191, + "step": 10080 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005594958259778276, + "loss": 3.8221, + "step": 10085 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005594545359606308, + "loss": 3.8228, + "step": 10090 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005594132264337827, + "loss": 3.8175, + "step": 10095 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005593718974003895, + "loss": 3.7765, + "step": 10100 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005593305488635587, + "loss": 3.7567, + "step": 10105 + }, + { + "epoch": 0.17, + "learning_rate": 0.0005592891808263998, + "loss": 3.8263, + "step": 10110 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005592477932920232, + "loss": 3.664, + "step": 10115 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005592063862635414, + "loss": 3.873, + "step": 10120 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005591649597440675, + "loss": 3.7406, + "step": 10125 + }, + { + "epoch": 0.18, + "learning_rate": 0.000559123513736717, + "loss": 3.7542, + "step": 10130 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005590820482446063, + "loss": 3.8525, + "step": 10135 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005590405632708533, + "loss": 3.7978, + "step": 10140 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005589990588185773, + "loss": 3.7052, + "step": 10145 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005589575348908996, + "loss": 3.8114, + "step": 10150 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005589159914909425, + "loss": 3.7365, + "step": 10155 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005588744286218297, + "loss": 3.7698, + "step": 10160 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005588328462866864, + "loss": 3.7538, + "step": 10165 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005587912444886397, + "loss": 3.7885, + "step": 10170 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005587496232308177, + "loss": 3.761, + "step": 10175 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005587079825163502, + "loss": 3.7494, + "step": 10180 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005586663223483681, + "loss": 3.7903, + "step": 10185 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005586246427300043, + "loss": 3.7783, + "step": 10190 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005585829436643928, + "loss": 3.7313, + "step": 10195 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005585412251546691, + "loss": 3.8423, + "step": 10200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005584994872039704, + "loss": 3.8002, + "step": 10205 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005584577298154349, + "loss": 3.7193, + "step": 10210 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005584159529922028, + "loss": 3.7501, + "step": 10215 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005583741567374153, + "loss": 3.8655, + "step": 10220 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005583323410542154, + "loss": 3.7713, + "step": 10225 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005582905059457473, + "loss": 3.7608, + "step": 10230 + }, + { + "epoch": 0.18, + "learning_rate": 0.000558248651415157, + "loss": 3.7848, + "step": 10235 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005582067774655916, + "loss": 3.8751, + "step": 10240 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005581648841001997, + "loss": 3.8157, + "step": 10245 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005581229713221318, + "loss": 3.7949, + "step": 10250 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005580810391345391, + "loss": 3.7647, + "step": 10255 + }, + { + "epoch": 0.18, + "learning_rate": 0.000558039087540575, + "loss": 3.7398, + "step": 10260 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005579971165433939, + "loss": 3.7206, + "step": 10265 + }, + { + "epoch": 0.18, + "learning_rate": 0.000557955126146152, + "loss": 3.9106, + "step": 10270 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005579131163520064, + "loss": 3.7303, + "step": 10275 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005578710871641165, + "loss": 3.6985, + "step": 10280 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005578290385856421, + "loss": 3.7783, + "step": 10285 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005577869706197456, + "loss": 3.7213, + "step": 10290 + }, + { + "epoch": 0.18, + "learning_rate": 0.00055774488326959, + "loss": 3.8223, + "step": 10295 + }, + { + "epoch": 0.18, + "learning_rate": 0.00055770277653834, + "loss": 3.8479, + "step": 10300 + }, + { + "epoch": 0.18, + "learning_rate": 0.000557660650429162, + "loss": 3.7619, + "step": 10305 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005576185049452235, + "loss": 3.7465, + "step": 10310 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005575763400896938, + "loss": 3.8019, + "step": 10315 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005575341558657433, + "loss": 3.7722, + "step": 10320 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005574919522765441, + "loss": 3.7072, + "step": 10325 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005574497293252698, + "loss": 3.8018, + "step": 10330 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005574074870150952, + "loss": 3.7442, + "step": 10335 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005573652253491968, + "loss": 3.7484, + "step": 10340 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005573229443307524, + "loss": 3.7656, + "step": 10345 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005572806439629414, + "loss": 3.7769, + "step": 10350 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005572383242489445, + "loss": 3.7266, + "step": 10355 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005571959851919439, + "loss": 3.8326, + "step": 10360 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005571536267951234, + "loss": 3.7286, + "step": 10365 + }, + { + "epoch": 0.18, + "learning_rate": 0.000557111249061668, + "loss": 3.7353, + "step": 10370 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005570688519947643, + "loss": 3.7861, + "step": 10375 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005570264355976005, + "loss": 3.746, + "step": 10380 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005569839998733659, + "loss": 3.8178, + "step": 10385 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005569415448252516, + "loss": 3.7118, + "step": 10390 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005568990704564498, + "loss": 3.8499, + "step": 10395 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005568565767701546, + "loss": 3.7467, + "step": 10400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005568140637695611, + "loss": 3.7022, + "step": 10405 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005567715314578664, + "loss": 3.7888, + "step": 10410 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005567289798382682, + "loss": 3.661, + "step": 10415 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005566864089139665, + "loss": 3.7638, + "step": 10420 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005566438186881624, + "loss": 3.7379, + "step": 10425 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005566012091640585, + "loss": 3.7724, + "step": 10430 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005565585803448585, + "loss": 3.8414, + "step": 10435 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005565159322337682, + "loss": 3.7841, + "step": 10440 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005564732648339945, + "loss": 3.7352, + "step": 10445 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005564305781487456, + "loss": 3.796, + "step": 10450 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005563878721812314, + "loss": 3.681, + "step": 10455 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005563451469346632, + "loss": 3.6562, + "step": 10460 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005563024024122537, + "loss": 3.7886, + "step": 10465 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005562596386172172, + "loss": 3.7527, + "step": 10470 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005562168555527689, + "loss": 3.7763, + "step": 10475 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005561740532221265, + "loss": 3.7259, + "step": 10480 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005561312316285079, + "loss": 3.7692, + "step": 10485 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005560883907751334, + "loss": 3.753, + "step": 10490 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005560455306652243, + "loss": 3.7146, + "step": 10495 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005560026513020036, + "loss": 3.7457, + "step": 10500 + }, + { + "epoch": 0.18, + "eval_loss": 3.808900833129883, + "eval_runtime": 150.2733, + "eval_samples_per_second": 12.251, + "eval_steps_per_second": 0.772, + "step": 10500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005559597526886955, + "loss": 3.7659, + "step": 10505 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005559168348285258, + "loss": 3.7797, + "step": 10510 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005558738977247216, + "loss": 3.7639, + "step": 10515 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005558309413805117, + "loss": 3.7582, + "step": 10520 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005557879657991261, + "loss": 3.7758, + "step": 10525 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005557449709837963, + "loss": 3.6406, + "step": 10530 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005557019569377555, + "loss": 3.7329, + "step": 10535 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005556589236642378, + "loss": 3.762, + "step": 10540 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005556158711664793, + "loss": 3.7226, + "step": 10545 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005555727994477175, + "loss": 3.7183, + "step": 10550 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005555297085111907, + "loss": 3.7314, + "step": 10555 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005554865983601396, + "loss": 3.829, + "step": 10560 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005554434689978056, + "loss": 3.7891, + "step": 10565 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005554003204274318, + "loss": 3.765, + "step": 10570 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005553571526522628, + "loss": 3.7701, + "step": 10575 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005553139656755446, + "loss": 3.7329, + "step": 10580 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005552707595005247, + "loss": 3.8377, + "step": 10585 + }, + { + "epoch": 0.18, + "learning_rate": 0.000555227534130452, + "loss": 3.6951, + "step": 10590 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005551842895685766, + "loss": 3.7439, + "step": 10595 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005551410258181506, + "loss": 3.764, + "step": 10600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005550977428824269, + "loss": 3.7233, + "step": 10605 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005550544407646603, + "loss": 3.8349, + "step": 10610 + }, + { + "epoch": 0.18, + "learning_rate": 0.000555011119468107, + "loss": 3.606, + "step": 10615 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005549677789960242, + "loss": 3.7585, + "step": 10620 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005549244193516713, + "loss": 3.6967, + "step": 10625 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005548810405383084, + "loss": 3.7009, + "step": 10630 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005548376425591976, + "loss": 3.7488, + "step": 10635 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005547942254176021, + "loss": 3.8379, + "step": 10640 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005547507891167866, + "loss": 3.6371, + "step": 10645 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005547073336600174, + "loss": 3.7803, + "step": 10650 + }, + { + "epoch": 0.18, + "learning_rate": 0.000554663859050562, + "loss": 3.7893, + "step": 10655 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005546203652916895, + "loss": 3.7123, + "step": 10660 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005545768523866706, + "loss": 3.7559, + "step": 10665 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005545333203387768, + "loss": 3.7527, + "step": 10670 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005544897691512821, + "loss": 3.747, + "step": 10675 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005544461988274608, + "loss": 3.7641, + "step": 10680 + }, + { + "epoch": 0.18, + "learning_rate": 0.0005544026093705894, + "loss": 3.8162, + "step": 10685 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005543590007839455, + "loss": 3.8019, + "step": 10690 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005543153730708084, + "loss": 3.8212, + "step": 10695 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005542717262344586, + "loss": 3.6983, + "step": 10700 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005542280602781782, + "loss": 3.7003, + "step": 10705 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005541843752052504, + "loss": 3.8489, + "step": 10710 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005541406710189603, + "loss": 3.7563, + "step": 10715 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005540969477225942, + "loss": 3.8294, + "step": 10720 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005540532053194399, + "loss": 3.786, + "step": 10725 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005540094438127865, + "loss": 3.7697, + "step": 10730 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005539656632059247, + "loss": 3.7351, + "step": 10735 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005539218635021466, + "loss": 3.8407, + "step": 10740 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005538780447047457, + "loss": 3.7196, + "step": 10745 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005538342068170169, + "loss": 3.6508, + "step": 10750 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005537903498422567, + "loss": 3.7522, + "step": 10755 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005537464737837628, + "loss": 3.6545, + "step": 10760 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005537025786448346, + "loss": 3.7622, + "step": 10765 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005536586644287726, + "loss": 3.7217, + "step": 10770 + }, + { + "epoch": 0.19, + "learning_rate": 0.000553614731138879, + "loss": 3.6806, + "step": 10775 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005535707787784574, + "loss": 3.8162, + "step": 10780 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005535268073508128, + "loss": 3.7956, + "step": 10785 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005534828168592515, + "loss": 3.7212, + "step": 10790 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005534388073070815, + "loss": 3.8734, + "step": 10795 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005533947786976121, + "loss": 3.7601, + "step": 10800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005533507310341539, + "loss": 3.7531, + "step": 10805 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005533066643200191, + "loss": 3.8357, + "step": 10810 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005532625785585214, + "loss": 3.7786, + "step": 10815 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005532184737529757, + "loss": 3.8007, + "step": 10820 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005531743499066985, + "loss": 3.8569, + "step": 10825 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005531302070230077, + "loss": 3.729, + "step": 10830 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005530860451052226, + "loss": 3.7075, + "step": 10835 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005530418641566639, + "loss": 3.8074, + "step": 10840 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005529976641806539, + "loss": 3.7291, + "step": 10845 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005529534451805162, + "loss": 3.8529, + "step": 10850 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005529092071595756, + "loss": 3.7, + "step": 10855 + }, + { + "epoch": 0.19, + "learning_rate": 0.000552864950121159, + "loss": 3.65, + "step": 10860 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005528206740685938, + "loss": 3.729, + "step": 10865 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005527763790052099, + "loss": 3.822, + "step": 10870 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005527320649343375, + "loss": 3.7941, + "step": 10875 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005526877318593092, + "loss": 3.7147, + "step": 10880 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005526433797834584, + "loss": 3.6646, + "step": 10885 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005525990087101202, + "loss": 3.827, + "step": 10890 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005525546186426311, + "loss": 3.7701, + "step": 10895 + }, + { + "epoch": 0.19, + "learning_rate": 0.000552510209584329, + "loss": 3.6971, + "step": 10900 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005524657815385532, + "loss": 3.712, + "step": 10905 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005524213345086446, + "loss": 3.7302, + "step": 10910 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005523768684979453, + "loss": 3.7991, + "step": 10915 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005523323835097988, + "loss": 3.8046, + "step": 10920 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005522878795475503, + "loss": 3.746, + "step": 10925 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005522433566145462, + "loss": 3.6934, + "step": 10930 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005521988147141343, + "loss": 3.7359, + "step": 10935 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005521542538496642, + "loss": 3.6942, + "step": 10940 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005521096740244864, + "loss": 3.6852, + "step": 10945 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005520650752419531, + "loss": 3.7642, + "step": 10950 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005520204575054182, + "loss": 3.7516, + "step": 10955 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005519758208182362, + "loss": 3.8863, + "step": 10960 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005519311651837639, + "loss": 3.8518, + "step": 10965 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005518864906053592, + "loss": 3.6166, + "step": 10970 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005518417970863812, + "loss": 3.6787, + "step": 10975 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005517970846301908, + "loss": 3.6608, + "step": 10980 + }, + { + "epoch": 0.19, + "learning_rate": 0.00055175235324015, + "loss": 3.7437, + "step": 10985 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005517076029196225, + "loss": 3.7547, + "step": 10990 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005516628336719732, + "loss": 3.8382, + "step": 10995 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005516180455005686, + "loss": 3.9065, + "step": 11000 + }, + { + "epoch": 0.19, + "eval_loss": 3.82816481590271, + "eval_runtime": 150.3797, + "eval_samples_per_second": 12.242, + "eval_steps_per_second": 0.771, + "step": 11000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005515732384087765, + "loss": 3.8359, + "step": 11005 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005515284123999662, + "loss": 3.7338, + "step": 11010 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005514835674775083, + "loss": 3.6409, + "step": 11015 + }, + { + "epoch": 0.19, + "learning_rate": 0.000551438703644775, + "loss": 3.6348, + "step": 11020 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005513938209051396, + "loss": 3.8714, + "step": 11025 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005513489192619775, + "loss": 3.7836, + "step": 11030 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005513039987186647, + "loss": 3.6187, + "step": 11035 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005512590592785791, + "loss": 3.7686, + "step": 11040 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005512141009451, + "loss": 3.7887, + "step": 11045 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005511691237216078, + "loss": 3.7462, + "step": 11050 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005511241276114849, + "loss": 3.7589, + "step": 11055 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005510791126181145, + "loss": 3.6582, + "step": 11060 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005510340787448815, + "loss": 3.6191, + "step": 11065 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005509890259951725, + "loss": 3.7391, + "step": 11070 + }, + { + "epoch": 0.19, + "learning_rate": 0.000550943954372375, + "loss": 3.7873, + "step": 11075 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005508988638798781, + "loss": 3.58, + "step": 11080 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005508537545210726, + "loss": 3.7628, + "step": 11085 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005508086262993502, + "loss": 3.684, + "step": 11090 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005507634792181046, + "loss": 3.808, + "step": 11095 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005507183132807304, + "loss": 3.6461, + "step": 11100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005506731284906242, + "loss": 3.7431, + "step": 11105 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005506279248511832, + "loss": 3.6644, + "step": 11110 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005505827023658067, + "loss": 3.8145, + "step": 11115 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005505374610378953, + "loss": 3.7549, + "step": 11120 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005504922008708507, + "loss": 3.6777, + "step": 11125 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005504469218680763, + "loss": 3.6589, + "step": 11130 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005504016240329772, + "loss": 3.7951, + "step": 11135 + }, + { + "epoch": 0.19, + "learning_rate": 0.000550356307368959, + "loss": 3.6839, + "step": 11140 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005503109718794296, + "loss": 3.8131, + "step": 11145 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005502656175677979, + "loss": 3.7538, + "step": 11150 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005502202444374744, + "loss": 3.5535, + "step": 11155 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005501748524918709, + "loss": 3.6548, + "step": 11160 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005501294417344006, + "loss": 3.7523, + "step": 11165 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005500840121684781, + "loss": 3.778, + "step": 11170 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005500385637975196, + "loss": 3.6966, + "step": 11175 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005499930966249426, + "loss": 3.7497, + "step": 11180 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005499476106541659, + "loss": 3.7591, + "step": 11185 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005499021058886096, + "loss": 3.7704, + "step": 11190 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005498565823316959, + "loss": 3.8197, + "step": 11195 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005498110399868477, + "loss": 3.6668, + "step": 11200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005497654788574896, + "loss": 3.7984, + "step": 11205 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005497198989470474, + "loss": 3.7465, + "step": 11210 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005496743002589487, + "loss": 3.6981, + "step": 11215 + }, + { + "epoch": 0.19, + "learning_rate": 0.000549628682796622, + "loss": 3.8666, + "step": 11220 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005495830465634978, + "loss": 3.7241, + "step": 11225 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005495373915630076, + "loss": 3.7521, + "step": 11230 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005494917177985844, + "loss": 3.717, + "step": 11235 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005494460252736627, + "loss": 3.8369, + "step": 11240 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005494003139916782, + "loss": 3.6984, + "step": 11245 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005493545839560682, + "loss": 3.7683, + "step": 11250 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005493088351702716, + "loss": 3.753, + "step": 11255 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005492630676377281, + "loss": 3.6862, + "step": 11260 + }, + { + "epoch": 0.19, + "learning_rate": 0.0005492172813618795, + "loss": 3.7831, + "step": 11265 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005491714763461684, + "loss": 3.6696, + "step": 11270 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005491256525940395, + "loss": 3.792, + "step": 11275 + }, + { + "epoch": 0.2, + "learning_rate": 0.000549079810108938, + "loss": 3.7118, + "step": 11280 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005490339488943115, + "loss": 3.7159, + "step": 11285 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005489880689536083, + "loss": 3.8581, + "step": 11290 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005489421702902783, + "loss": 3.7271, + "step": 11295 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005488962529077728, + "loss": 3.7899, + "step": 11300 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005488503168095449, + "loss": 3.7817, + "step": 11305 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005488043619990483, + "loss": 3.7262, + "step": 11310 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005487583884797389, + "loss": 3.7328, + "step": 11315 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005487123962550735, + "loss": 3.8268, + "step": 11320 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005486663853285106, + "loss": 3.6743, + "step": 11325 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005486203557035098, + "loss": 3.7256, + "step": 11330 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005485743073835325, + "loss": 3.6811, + "step": 11335 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005485282403720411, + "loss": 3.6535, + "step": 11340 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005484821546724998, + "loss": 3.7042, + "step": 11345 + }, + { + "epoch": 0.2, + "learning_rate": 0.000548436050288374, + "loss": 3.7035, + "step": 11350 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005483899272231303, + "loss": 3.6523, + "step": 11355 + }, + { + "epoch": 0.2, + "learning_rate": 0.000548343785480237, + "loss": 3.6676, + "step": 11360 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005482976250631639, + "loss": 3.7115, + "step": 11365 + }, + { + "epoch": 0.2, + "learning_rate": 0.000548251445975382, + "loss": 3.7758, + "step": 11370 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005482052482203634, + "loss": 3.7202, + "step": 11375 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005481590318015823, + "loss": 3.7921, + "step": 11380 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005481127967225137, + "loss": 3.5875, + "step": 11385 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005480665429866344, + "loss": 3.872, + "step": 11390 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005480202705974225, + "loss": 3.7499, + "step": 11395 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005479739795583573, + "loss": 3.8315, + "step": 11400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005479276698729195, + "loss": 3.7723, + "step": 11405 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005478813415445916, + "loss": 3.7126, + "step": 11410 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005478349945768574, + "loss": 3.8067, + "step": 11415 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005477886289732015, + "loss": 3.6816, + "step": 11420 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005477422447371107, + "loss": 3.8123, + "step": 11425 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005476958418720728, + "loss": 3.6074, + "step": 11430 + }, + { + "epoch": 0.2, + "learning_rate": 0.000547649420381577, + "loss": 3.7801, + "step": 11435 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005476029802691138, + "loss": 3.7547, + "step": 11440 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005475565215381755, + "loss": 3.7165, + "step": 11445 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005475100441922556, + "loss": 3.721, + "step": 11450 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005474635482348488, + "loss": 3.6334, + "step": 11455 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005474170336694514, + "loss": 3.7961, + "step": 11460 + }, + { + "epoch": 0.2, + "learning_rate": 0.000547370500499561, + "loss": 3.7415, + "step": 11465 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005473239487286767, + "loss": 3.6964, + "step": 11470 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005472773783602991, + "loss": 3.7686, + "step": 11475 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005472307893979298, + "loss": 3.7813, + "step": 11480 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005471841818450722, + "loss": 3.8297, + "step": 11485 + }, + { + "epoch": 0.2, + "learning_rate": 0.000547137555705231, + "loss": 3.6398, + "step": 11490 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005470909109819122, + "loss": 3.8471, + "step": 11495 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005470442476786231, + "loss": 3.636, + "step": 11500 + }, + { + "epoch": 0.2, + "eval_loss": 3.7674803733825684, + "eval_runtime": 150.2732, + "eval_samples_per_second": 12.251, + "eval_steps_per_second": 0.772, + "step": 11500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005469975657988727, + "loss": 3.7453, + "step": 11505 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005469508653461713, + "loss": 3.6952, + "step": 11510 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005469041463240302, + "loss": 3.6867, + "step": 11515 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005468574087359628, + "loss": 3.7732, + "step": 11520 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005468106525854834, + "loss": 3.7456, + "step": 11525 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005467638778761078, + "loss": 3.7178, + "step": 11530 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005467170846113532, + "loss": 3.6693, + "step": 11535 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005466702727947383, + "loss": 3.7435, + "step": 11540 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005466234424297829, + "loss": 3.7004, + "step": 11545 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005465765935200086, + "loss": 3.8126, + "step": 11550 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005465297260689383, + "loss": 3.7205, + "step": 11555 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005464828400800959, + "loss": 3.7958, + "step": 11560 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005464359355570071, + "loss": 3.745, + "step": 11565 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005463890125031991, + "loss": 3.681, + "step": 11570 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005463420709222, + "loss": 3.6648, + "step": 11575 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005462951108175396, + "loss": 3.7188, + "step": 11580 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005462481321927492, + "loss": 3.6209, + "step": 11585 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005462011350513611, + "loss": 3.8103, + "step": 11590 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005461541193969095, + "loss": 3.7334, + "step": 11595 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005461070852329296, + "loss": 3.8183, + "step": 11600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005460600325629582, + "loss": 3.7361, + "step": 11605 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005460129613905334, + "loss": 3.6082, + "step": 11610 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005459658717191948, + "loss": 3.7858, + "step": 11615 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005459187635524831, + "loss": 3.7071, + "step": 11620 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005458716368939407, + "loss": 3.8105, + "step": 11625 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005458244917471112, + "loss": 3.7418, + "step": 11630 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005457773281155398, + "loss": 3.7896, + "step": 11635 + }, + { + "epoch": 0.2, + "learning_rate": 0.000545730146002773, + "loss": 3.7541, + "step": 11640 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005456829454123585, + "loss": 3.7075, + "step": 11645 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005456357263478456, + "loss": 3.7605, + "step": 11650 + }, + { + "epoch": 0.2, + "learning_rate": 0.000545588488812785, + "loss": 3.7757, + "step": 11655 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005455412328107287, + "loss": 3.7652, + "step": 11660 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005454939583452299, + "loss": 3.7985, + "step": 11665 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005454466654198437, + "loss": 3.7654, + "step": 11670 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005453993540381261, + "loss": 3.6536, + "step": 11675 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005453520242036348, + "loss": 3.7901, + "step": 11680 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005453046759199286, + "loss": 3.6965, + "step": 11685 + }, + { + "epoch": 0.2, + "learning_rate": 0.000545257309190568, + "loss": 3.7089, + "step": 11690 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005452099240191146, + "loss": 3.7599, + "step": 11695 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005451625204091317, + "loss": 3.6116, + "step": 11700 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005451150983641835, + "loss": 3.7526, + "step": 11705 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005450676578878362, + "loss": 3.7258, + "step": 11710 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005450201989836571, + "loss": 3.7765, + "step": 11715 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005449727216552147, + "loss": 3.7091, + "step": 11720 + }, + { + "epoch": 0.2, + "learning_rate": 0.000544925225906079, + "loss": 3.6662, + "step": 11725 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005448777117398216, + "loss": 3.6813, + "step": 11730 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005448301791600152, + "loss": 3.6477, + "step": 11735 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005447826281702342, + "loss": 3.6587, + "step": 11740 + }, + { + "epoch": 0.2, + "learning_rate": 0.000544735058774054, + "loss": 3.7099, + "step": 11745 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005446874709750516, + "loss": 3.7078, + "step": 11750 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005446398647768053, + "loss": 3.7051, + "step": 11755 + }, + { + "epoch": 0.2, + "learning_rate": 0.000544592240182895, + "loss": 3.7258, + "step": 11760 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005445445971969017, + "loss": 3.6229, + "step": 11765 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005444969358224082, + "loss": 3.8427, + "step": 11770 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005444492560629979, + "loss": 3.6114, + "step": 11775 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005444015579222564, + "loss": 3.7371, + "step": 11780 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005443538414037702, + "loss": 3.704, + "step": 11785 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005443061065111275, + "loss": 3.7573, + "step": 11790 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005442583532479177, + "loss": 3.7781, + "step": 11795 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005442105816177315, + "loss": 3.7478, + "step": 11800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005441627916241611, + "loss": 3.6679, + "step": 11805 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005441149832708, + "loss": 3.5633, + "step": 11810 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005440671565612434, + "loss": 3.6967, + "step": 11815 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005440193114990873, + "loss": 3.6898, + "step": 11820 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005439714480879297, + "loss": 3.6566, + "step": 11825 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005439235663313695, + "loss": 3.6392, + "step": 11830 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005438756662330071, + "loss": 3.7343, + "step": 11835 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005438277477964445, + "loss": 3.7082, + "step": 11840 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005437798110252849, + "loss": 3.6861, + "step": 11845 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005437318559231328, + "loss": 3.8254, + "step": 11850 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005436838824935943, + "loss": 3.7302, + "step": 11855 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005436358907402767, + "loss": 3.7656, + "step": 11860 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005435878806667886, + "loss": 3.7166, + "step": 11865 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005435398522767404, + "loss": 3.6894, + "step": 11870 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005434918055737434, + "loss": 3.7122, + "step": 11875 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005434437405614104, + "loss": 3.7014, + "step": 11880 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005433956572433558, + "loss": 3.7079, + "step": 11885 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005433475556231951, + "loss": 3.6731, + "step": 11890 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005432994357045455, + "loss": 3.7536, + "step": 11895 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005432512974910252, + "loss": 3.58, + "step": 11900 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005432031409862538, + "loss": 3.7737, + "step": 11905 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005431549661938527, + "loss": 3.6355, + "step": 11910 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005431067731174443, + "loss": 3.7166, + "step": 11915 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005430585617606524, + "loss": 3.7184, + "step": 11920 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005430103321271025, + "loss": 3.7067, + "step": 11925 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005429620842204209, + "loss": 3.8456, + "step": 11930 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005429138180442358, + "loss": 3.6251, + "step": 11935 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005428655336021763, + "loss": 3.7128, + "step": 11940 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005428172308978735, + "loss": 3.7401, + "step": 11945 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005427689099349593, + "loss": 3.7788, + "step": 11950 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005427205707170674, + "loss": 3.759, + "step": 11955 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005426722132478323, + "loss": 3.78, + "step": 11960 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005426238375308906, + "loss": 3.7712, + "step": 11965 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005425754435698796, + "loss": 3.7476, + "step": 11970 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005425270313684385, + "loss": 3.7033, + "step": 11975 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005424786009302075, + "loss": 3.6609, + "step": 11980 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005424301522588285, + "loss": 3.7616, + "step": 11985 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005423816853579444, + "loss": 3.6577, + "step": 11990 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005423332002311998, + "loss": 3.7224, + "step": 11995 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005422846968822404, + "loss": 3.6866, + "step": 12000 + }, + { + "epoch": 0.21, + "eval_loss": 3.7599782943725586, + "eval_runtime": 150.9725, + "eval_samples_per_second": 12.194, + "eval_steps_per_second": 0.768, + "step": 12000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005422361753147136, + "loss": 3.7354, + "step": 12005 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005421876355322677, + "loss": 3.7213, + "step": 12010 + }, + { + "epoch": 0.21, + "learning_rate": 0.000542139077538553, + "loss": 3.7977, + "step": 12015 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005420905013372205, + "loss": 3.786, + "step": 12020 + }, + { + "epoch": 0.21, + "learning_rate": 0.000542041906931923, + "loss": 3.7917, + "step": 12025 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005419932943263146, + "loss": 3.7851, + "step": 12030 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005419446635240505, + "loss": 3.7055, + "step": 12035 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005418960145287878, + "loss": 3.7075, + "step": 12040 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005418473473441845, + "loss": 3.77, + "step": 12045 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005417986619739001, + "loss": 3.6612, + "step": 12050 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005417499584215955, + "loss": 3.5958, + "step": 12055 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005417012366909329, + "loss": 3.7396, + "step": 12060 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005416524967855761, + "loss": 3.6948, + "step": 12065 + }, + { + "epoch": 0.21, + "learning_rate": 0.00054160373870919, + "loss": 3.7575, + "step": 12070 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005415549624654409, + "loss": 3.7255, + "step": 12075 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005415061680579966, + "loss": 3.751, + "step": 12080 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005414573554905262, + "loss": 3.7404, + "step": 12085 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005414085247667002, + "loss": 3.8004, + "step": 12090 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005413596758901901, + "loss": 3.649, + "step": 12095 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005413108088646695, + "loss": 3.7088, + "step": 12100 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005412619236938126, + "loss": 3.6278, + "step": 12105 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005412130203812957, + "loss": 3.7007, + "step": 12110 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005411640989307958, + "loss": 3.7458, + "step": 12115 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005411151593459915, + "loss": 3.7283, + "step": 12120 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005410662016305629, + "loss": 3.7408, + "step": 12125 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005410172257881915, + "loss": 3.7432, + "step": 12130 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005409682318225599, + "loss": 3.7822, + "step": 12135 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005409192197373522, + "loss": 3.7222, + "step": 12140 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005408701895362538, + "loss": 3.741, + "step": 12145 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005408211412229517, + "loss": 3.5775, + "step": 12150 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005407720748011338, + "loss": 3.7984, + "step": 12155 + }, + { + "epoch": 0.21, + "learning_rate": 0.00054072299027449, + "loss": 3.752, + "step": 12160 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005406738876467109, + "loss": 3.706, + "step": 12165 + }, + { + "epoch": 0.21, + "learning_rate": 0.000540624766921489, + "loss": 3.7555, + "step": 12170 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005405756281025176, + "loss": 3.7497, + "step": 12175 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005405264711934922, + "loss": 3.6543, + "step": 12180 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005404772961981086, + "loss": 3.6224, + "step": 12185 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005404281031200648, + "loss": 3.6435, + "step": 12190 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005403788919630598, + "loss": 3.7747, + "step": 12195 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005403296627307942, + "loss": 3.5874, + "step": 12200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005402804154269695, + "loss": 3.5728, + "step": 12205 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005402311500552889, + "loss": 3.7743, + "step": 12210 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005401818666194571, + "loss": 3.6319, + "step": 12215 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005401325651231798, + "loss": 3.6402, + "step": 12220 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005400832455701644, + "loss": 3.6527, + "step": 12225 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005400339079641192, + "loss": 3.7487, + "step": 12230 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005399845523087543, + "loss": 3.6764, + "step": 12235 + }, + { + "epoch": 0.21, + "learning_rate": 0.000539935178607781, + "loss": 3.756, + "step": 12240 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005398857868649118, + "loss": 3.5799, + "step": 12245 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005398363770838609, + "loss": 3.6562, + "step": 12250 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005397869492683436, + "loss": 3.6615, + "step": 12255 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005397375034220767, + "loss": 3.7017, + "step": 12260 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005396880395487781, + "loss": 3.7083, + "step": 12265 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005396385576521672, + "loss": 3.7206, + "step": 12270 + }, + { + "epoch": 0.21, + "learning_rate": 0.000539589057735965, + "loss": 3.6814, + "step": 12275 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005395395398038935, + "loss": 3.5883, + "step": 12280 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005394900038596763, + "loss": 3.6049, + "step": 12285 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005394404499070381, + "loss": 3.7757, + "step": 12290 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005393908779497052, + "loss": 3.7323, + "step": 12295 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005393412879914051, + "loss": 3.7704, + "step": 12300 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005392916800358667, + "loss": 3.7201, + "step": 12305 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005392420540868205, + "loss": 3.7927, + "step": 12310 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005391924101479978, + "loss": 3.656, + "step": 12315 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005391427482231317, + "loss": 3.7216, + "step": 12320 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005390930683159566, + "loss": 3.7133, + "step": 12325 + }, + { + "epoch": 0.21, + "learning_rate": 0.000539043370430208, + "loss": 3.6312, + "step": 12330 + }, + { + "epoch": 0.21, + "learning_rate": 0.000538993654569623, + "loss": 3.643, + "step": 12335 + }, + { + "epoch": 0.21, + "learning_rate": 0.00053894392073794, + "loss": 3.5749, + "step": 12340 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005388941689388988, + "loss": 3.5516, + "step": 12345 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005388443991762405, + "loss": 3.7479, + "step": 12350 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005387946114537072, + "loss": 3.6775, + "step": 12355 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005387448057750431, + "loss": 3.6478, + "step": 12360 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005386949821439932, + "loss": 3.7257, + "step": 12365 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005386451405643038, + "loss": 3.6918, + "step": 12370 + }, + { + "epoch": 0.21, + "learning_rate": 0.000538595281039723, + "loss": 3.6817, + "step": 12375 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005385454035739996, + "loss": 3.6628, + "step": 12380 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005384955081708847, + "loss": 3.6527, + "step": 12385 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005384455948341296, + "loss": 3.7236, + "step": 12390 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005383956635674878, + "loss": 3.5499, + "step": 12395 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005383457143747141, + "loss": 3.7324, + "step": 12400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005382957472595638, + "loss": 3.6974, + "step": 12405 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005382457622257949, + "loss": 3.6924, + "step": 12410 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005381957592771654, + "loss": 3.5955, + "step": 12415 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005381457384174358, + "loss": 3.6657, + "step": 12420 + }, + { + "epoch": 0.22, + "learning_rate": 0.000538095699650367, + "loss": 3.7176, + "step": 12425 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005380456429797218, + "loss": 3.6794, + "step": 12430 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005379955684092643, + "loss": 3.7586, + "step": 12435 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005379454759427597, + "loss": 3.7462, + "step": 12440 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005378953655839747, + "loss": 3.6993, + "step": 12445 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005378452373366776, + "loss": 3.6243, + "step": 12450 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005377950912046374, + "loss": 3.6997, + "step": 12455 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005377449271916251, + "loss": 3.6549, + "step": 12460 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005376947453014126, + "loss": 3.7064, + "step": 12465 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005376445455377736, + "loss": 3.6477, + "step": 12470 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005375943279044825, + "loss": 3.7082, + "step": 12475 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005375440924053156, + "loss": 3.6758, + "step": 12480 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005374938390440505, + "loss": 3.6828, + "step": 12485 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005374435678244658, + "loss": 3.6934, + "step": 12490 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005373932787503417, + "loss": 3.6921, + "step": 12495 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005373429718254595, + "loss": 3.6586, + "step": 12500 + }, + { + "epoch": 0.22, + "eval_loss": 3.7146527767181396, + "eval_runtime": 150.1722, + "eval_samples_per_second": 12.259, + "eval_steps_per_second": 0.772, + "step": 12500 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005372926470536023, + "loss": 3.759, + "step": 12505 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005372423044385541, + "loss": 3.6515, + "step": 12510 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005371919439841005, + "loss": 3.8031, + "step": 12515 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005371415656940283, + "loss": 3.6402, + "step": 12520 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005370911695721256, + "loss": 3.7482, + "step": 12525 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005370407556221821, + "loss": 3.7288, + "step": 12530 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005369903238479886, + "loss": 3.6749, + "step": 12535 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005369398742533373, + "loss": 3.6862, + "step": 12540 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005368894068420217, + "loss": 3.6132, + "step": 12545 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005368389216178368, + "loss": 3.666, + "step": 12550 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005367884185845787, + "loss": 3.5704, + "step": 12555 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005367378977460452, + "loss": 3.7454, + "step": 12560 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005366873591060349, + "loss": 3.7277, + "step": 12565 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005366368026683483, + "loss": 3.7656, + "step": 12570 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005365862284367869, + "loss": 3.8027, + "step": 12575 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005365356364151535, + "loss": 3.6515, + "step": 12580 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005364850266072525, + "loss": 3.6866, + "step": 12585 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005364343990168895, + "loss": 3.6147, + "step": 12590 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005363837536478714, + "loss": 3.6964, + "step": 12595 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005363330905040064, + "loss": 3.6638, + "step": 12600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005362824095891043, + "loss": 3.6581, + "step": 12605 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005362317109069758, + "loss": 3.7257, + "step": 12610 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005361809944614334, + "loss": 3.6829, + "step": 12615 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005361302602562907, + "loss": 3.6478, + "step": 12620 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005360795082953624, + "loss": 3.6036, + "step": 12625 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005360287385824651, + "loss": 3.6498, + "step": 12630 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005359779511214162, + "loss": 3.749, + "step": 12635 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005359271459160347, + "loss": 3.6121, + "step": 12640 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005358763229701411, + "loss": 3.6755, + "step": 12645 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005358254822875567, + "loss": 3.6591, + "step": 12650 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005357746238721047, + "loss": 3.6631, + "step": 12655 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005357237477276093, + "loss": 3.6449, + "step": 12660 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005356728538578961, + "loss": 3.6601, + "step": 12665 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005356219422667922, + "loss": 3.6639, + "step": 12670 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005355710129581257, + "loss": 3.693, + "step": 12675 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005355200659357262, + "loss": 3.6938, + "step": 12680 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005354691012034249, + "loss": 3.715, + "step": 12685 + }, + { + "epoch": 0.22, + "learning_rate": 0.000535418118765054, + "loss": 3.521, + "step": 12690 + }, + { + "epoch": 0.22, + "learning_rate": 0.000535367118624447, + "loss": 3.692, + "step": 12695 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005353161007854389, + "loss": 3.6524, + "step": 12700 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005352650652518659, + "loss": 3.6092, + "step": 12705 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005352140120275659, + "loss": 3.712, + "step": 12710 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005351629411163776, + "loss": 3.7175, + "step": 12715 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005351118525221412, + "loss": 3.6494, + "step": 12720 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005350607462486985, + "loss": 3.7582, + "step": 12725 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005350096222998924, + "loss": 3.6472, + "step": 12730 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005349584806795671, + "loss": 3.6468, + "step": 12735 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005349073213915681, + "loss": 3.6471, + "step": 12740 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005348561444397425, + "loss": 3.7198, + "step": 12745 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005348049498279384, + "loss": 3.5422, + "step": 12750 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005347537375600055, + "loss": 3.6621, + "step": 12755 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005347025076397945, + "loss": 3.6559, + "step": 12760 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005346512600711579, + "loss": 3.6488, + "step": 12765 + }, + { + "epoch": 0.22, + "learning_rate": 0.000534599994857949, + "loss": 3.6929, + "step": 12770 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005345487120040229, + "loss": 3.605, + "step": 12775 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005344974115132356, + "loss": 3.7029, + "step": 12780 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005344460933894448, + "loss": 3.5666, + "step": 12785 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005343947576365093, + "loss": 3.6138, + "step": 12790 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005343434042582892, + "loss": 3.6309, + "step": 12795 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005342920332586462, + "loss": 3.6865, + "step": 12800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005342406446414429, + "loss": 3.5163, + "step": 12805 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005341892384105436, + "loss": 3.7673, + "step": 12810 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005341378145698139, + "loss": 3.7516, + "step": 12815 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005340863731231204, + "loss": 3.6601, + "step": 12820 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005340349140743314, + "loss": 3.6487, + "step": 12825 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005339834374273162, + "loss": 3.594, + "step": 12830 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005339319431859455, + "loss": 3.6715, + "step": 12835 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005338804313540918, + "loss": 3.6531, + "step": 12840 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005338289019356283, + "loss": 3.6891, + "step": 12845 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005337773549344294, + "loss": 3.722, + "step": 12850 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005337257903543719, + "loss": 3.6754, + "step": 12855 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005336742081993325, + "loss": 3.723, + "step": 12860 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005336226084731903, + "loss": 3.5262, + "step": 12865 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005335709911798253, + "loss": 3.6648, + "step": 12870 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005335193563231188, + "loss": 3.755, + "step": 12875 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005334677039069534, + "loss": 3.6505, + "step": 12880 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005334160339352131, + "loss": 3.5853, + "step": 12885 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005333643464117834, + "loss": 3.6742, + "step": 12890 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005333126413405508, + "loss": 3.7451, + "step": 12895 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005332609187254033, + "loss": 3.5786, + "step": 12900 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005332091785702301, + "loss": 3.6852, + "step": 12905 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005331574208789219, + "loss": 3.7323, + "step": 12910 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005331056456553705, + "loss": 3.5534, + "step": 12915 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005330538529034692, + "loss": 3.6558, + "step": 12920 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005330020426271126, + "loss": 3.5388, + "step": 12925 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005329502148301965, + "loss": 3.7418, + "step": 12930 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005328983695166181, + "loss": 3.6543, + "step": 12935 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005328465066902759, + "loss": 3.6607, + "step": 12940 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005327946263550698, + "loss": 3.796, + "step": 12945 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005327427285149006, + "loss": 3.6999, + "step": 12950 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005326908131736712, + "loss": 3.5925, + "step": 12955 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005326388803352851, + "loss": 3.675, + "step": 12960 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005325869300036475, + "loss": 3.72, + "step": 12965 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005325349621826648, + "loss": 3.7205, + "step": 12970 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005324829768762445, + "loss": 3.6847, + "step": 12975 + }, + { + "epoch": 0.22, + "learning_rate": 0.000532430974088296, + "loss": 3.7384, + "step": 12980 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005323789538227293, + "loss": 3.6207, + "step": 12985 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005323269160834562, + "loss": 3.6368, + "step": 12990 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005322748608743897, + "loss": 3.5439, + "step": 12995 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005322227881994441, + "loss": 3.6824, + "step": 13000 + }, + { + "epoch": 0.22, + "eval_loss": 3.6955907344818115, + "eval_runtime": 150.2757, + "eval_samples_per_second": 12.251, + "eval_steps_per_second": 0.772, + "step": 13000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005321706980625349, + "loss": 3.6233, + "step": 13005 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005321185904675791, + "loss": 3.6698, + "step": 13010 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005320664654184947, + "loss": 3.693, + "step": 13015 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005320143229192016, + "loss": 3.6562, + "step": 13020 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005319621629736205, + "loss": 3.7259, + "step": 13025 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005319099855856734, + "loss": 3.6133, + "step": 13030 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005318577907592841, + "loss": 3.626, + "step": 13035 + }, + { + "epoch": 0.23, + "learning_rate": 0.000531805578498377, + "loss": 3.6176, + "step": 13040 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005317533488068784, + "loss": 3.7022, + "step": 13045 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005317011016887157, + "loss": 3.5367, + "step": 13050 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005316488371478177, + "loss": 3.6384, + "step": 13055 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005315965551881143, + "loss": 3.6755, + "step": 13060 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005315442558135368, + "loss": 3.7178, + "step": 13065 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005314919390280179, + "loss": 3.6934, + "step": 13070 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005314396048354916, + "loss": 3.634, + "step": 13075 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005313872532398932, + "loss": 3.5333, + "step": 13080 + }, + { + "epoch": 0.23, + "learning_rate": 0.000531334884245159, + "loss": 3.6866, + "step": 13085 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005312824978552272, + "loss": 3.6536, + "step": 13090 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005312300940740369, + "loss": 3.7084, + "step": 13095 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005311776729055286, + "loss": 3.637, + "step": 13100 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005311252343536439, + "loss": 3.7074, + "step": 13105 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005310727784223261, + "loss": 3.7197, + "step": 13110 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005310203051155198, + "loss": 3.6676, + "step": 13115 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005309678144371703, + "loss": 3.5821, + "step": 13120 + }, + { + "epoch": 0.23, + "learning_rate": 0.000530915306391225, + "loss": 3.6428, + "step": 13125 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005308627809816319, + "loss": 3.7832, + "step": 13130 + }, + { + "epoch": 0.23, + "learning_rate": 0.000530810238212341, + "loss": 3.6682, + "step": 13135 + }, + { + "epoch": 0.23, + "learning_rate": 0.000530757678087303, + "loss": 3.6524, + "step": 13140 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005307051006104701, + "loss": 3.6169, + "step": 13145 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005306525057857961, + "loss": 3.6496, + "step": 13150 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005305998936172357, + "loss": 3.6896, + "step": 13155 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005305472641087452, + "loss": 3.6579, + "step": 13160 + }, + { + "epoch": 0.23, + "learning_rate": 0.000530494617264282, + "loss": 3.7858, + "step": 13165 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005304419530878046, + "loss": 3.7619, + "step": 13170 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005303892715832736, + "loss": 3.784, + "step": 13175 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005303365727546499, + "loss": 3.7082, + "step": 13180 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005302838566058964, + "loss": 3.6019, + "step": 13185 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005302311231409773, + "loss": 3.641, + "step": 13190 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005301783723638574, + "loss": 3.6698, + "step": 13195 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005301256042785036, + "loss": 3.6359, + "step": 13200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005300728188888838, + "loss": 3.7043, + "step": 13205 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005300200161989672, + "loss": 3.7608, + "step": 13210 + }, + { + "epoch": 0.23, + "learning_rate": 0.000529967196212724, + "loss": 3.6878, + "step": 13215 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005299143589341263, + "loss": 3.6146, + "step": 13220 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005298615043671472, + "loss": 3.7112, + "step": 13225 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005298086325157609, + "loss": 3.6229, + "step": 13230 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005297557433839432, + "loss": 3.6452, + "step": 13235 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005297028369756711, + "loss": 3.6943, + "step": 13240 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005296499132949227, + "loss": 3.6575, + "step": 13245 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005295969723456781, + "loss": 3.5772, + "step": 13250 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005295440141319176, + "loss": 3.6112, + "step": 13255 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005294910386576237, + "loss": 3.7853, + "step": 13260 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005294380459267797, + "loss": 3.6779, + "step": 13265 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005293850359433706, + "loss": 3.7071, + "step": 13270 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005293320087113824, + "loss": 3.7668, + "step": 13275 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005292789642348024, + "loss": 3.7479, + "step": 13280 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005292259025176194, + "loss": 3.6091, + "step": 13285 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005291728235638232, + "loss": 3.6423, + "step": 13290 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005291197273774053, + "loss": 3.631, + "step": 13295 + }, + { + "epoch": 0.23, + "learning_rate": 0.000529066613962358, + "loss": 3.6896, + "step": 13300 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005290134833226752, + "loss": 3.726, + "step": 13305 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005289603354623523, + "loss": 3.6359, + "step": 13310 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005289071703853855, + "loss": 3.6416, + "step": 13315 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005288539880957727, + "loss": 3.7228, + "step": 13320 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005288007885975127, + "loss": 3.6365, + "step": 13325 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005287475718946061, + "loss": 3.7192, + "step": 13330 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005286943379910543, + "loss": 3.7571, + "step": 13335 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005286410868908604, + "loss": 3.6936, + "step": 13340 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005285878185980285, + "loss": 3.6531, + "step": 13345 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005285345331165641, + "loss": 3.6293, + "step": 13350 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005284812304504742, + "loss": 3.7047, + "step": 13355 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005284279106037666, + "loss": 3.6494, + "step": 13360 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005283745735804508, + "loss": 3.7135, + "step": 13365 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005283212193845374, + "loss": 3.6306, + "step": 13370 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005282678480200385, + "loss": 3.5678, + "step": 13375 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005282144594909674, + "loss": 3.6169, + "step": 13380 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005281610538013384, + "loss": 3.6236, + "step": 13385 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005281076309551674, + "loss": 3.6551, + "step": 13390 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005280541909564717, + "loss": 3.7224, + "step": 13395 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005280007338092696, + "loss": 3.5341, + "step": 13400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005279472595175807, + "loss": 3.6211, + "step": 13405 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005278937680854262, + "loss": 3.5654, + "step": 13410 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005278402595168282, + "loss": 3.7164, + "step": 13415 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005277867338158104, + "loss": 3.7283, + "step": 13420 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005277331909863976, + "loss": 3.6716, + "step": 13425 + }, + { + "epoch": 0.23, + "learning_rate": 0.000527679631032616, + "loss": 3.7008, + "step": 13430 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005276260539584928, + "loss": 3.6196, + "step": 13435 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005275724597680571, + "loss": 3.6388, + "step": 13440 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005275188484653386, + "loss": 3.6706, + "step": 13445 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005274652200543688, + "loss": 3.6409, + "step": 13450 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005274115745391801, + "loss": 3.5821, + "step": 13455 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005273579119238065, + "loss": 3.6809, + "step": 13460 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005273042322122832, + "loss": 3.7153, + "step": 13465 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005272505354086465, + "loss": 3.6738, + "step": 13470 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005271968215169341, + "loss": 3.6512, + "step": 13475 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005271430905411851, + "loss": 3.6398, + "step": 13480 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005270893424854399, + "loss": 3.5679, + "step": 13485 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005270355773537398, + "loss": 3.6169, + "step": 13490 + }, + { + "epoch": 0.23, + "learning_rate": 0.000526981795150128, + "loss": 3.6797, + "step": 13495 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005269279958786484, + "loss": 3.565, + "step": 13500 + }, + { + "epoch": 0.23, + "eval_loss": 3.6858224868774414, + "eval_runtime": 149.7698, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.775, + "step": 13500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005268741795433464, + "loss": 3.7488, + "step": 13505 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005268203461482689, + "loss": 3.6822, + "step": 13510 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005267664956974638, + "loss": 3.5031, + "step": 13515 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005267126281949804, + "loss": 3.6234, + "step": 13520 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005266587436448692, + "loss": 3.6111, + "step": 13525 + }, + { + "epoch": 0.23, + "learning_rate": 0.000526604842051182, + "loss": 3.5746, + "step": 13530 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005265509234179722, + "loss": 3.6438, + "step": 13535 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005264969877492939, + "loss": 3.6441, + "step": 13540 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005264430350492029, + "loss": 3.5713, + "step": 13545 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005263890653217562, + "loss": 3.604, + "step": 13550 + }, + { + "epoch": 0.23, + "learning_rate": 0.000526335078571012, + "loss": 3.6513, + "step": 13555 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005262810748010298, + "loss": 3.6257, + "step": 13560 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005262270540158705, + "loss": 3.6804, + "step": 13565 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005261730162195962, + "loss": 3.7044, + "step": 13570 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005261189614162701, + "loss": 3.6778, + "step": 13575 + }, + { + "epoch": 0.24, + "learning_rate": 0.000526064889609957, + "loss": 3.6329, + "step": 13580 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005260108008047227, + "loss": 3.6702, + "step": 13585 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005259566950046344, + "loss": 3.6102, + "step": 13590 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005259025722137608, + "loss": 3.5786, + "step": 13595 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005258484324361715, + "loss": 3.599, + "step": 13600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005257942756759375, + "loss": 3.6527, + "step": 13605 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005257401019371312, + "loss": 3.7808, + "step": 13610 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005256859112238259, + "loss": 3.5285, + "step": 13615 + }, + { + "epoch": 0.24, + "learning_rate": 0.000525631703540097, + "loss": 3.6049, + "step": 13620 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005255774788900201, + "loss": 3.431, + "step": 13625 + }, + { + "epoch": 0.24, + "learning_rate": 0.000525523237277673, + "loss": 3.5476, + "step": 13630 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005254689787071342, + "loss": 3.5949, + "step": 13635 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005254147031824837, + "loss": 3.7066, + "step": 13640 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005253604107078027, + "loss": 3.6354, + "step": 13645 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005253061012871738, + "loss": 3.5766, + "step": 13650 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005252517749246809, + "loss": 3.7445, + "step": 13655 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005251974316244088, + "loss": 3.7481, + "step": 13660 + }, + { + "epoch": 0.24, + "learning_rate": 0.000525143071390444, + "loss": 3.521, + "step": 13665 + }, + { + "epoch": 0.24, + "learning_rate": 0.000525088694226874, + "loss": 3.7559, + "step": 13670 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005250343001377877, + "loss": 3.6996, + "step": 13675 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005249798891272755, + "loss": 3.6476, + "step": 13680 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005249254611994286, + "loss": 3.7529, + "step": 13685 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005248710163583396, + "loss": 3.7192, + "step": 13690 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005248165546081027, + "loss": 3.6813, + "step": 13695 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005247620759528131, + "loss": 3.6612, + "step": 13700 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005247075803965672, + "loss": 3.5626, + "step": 13705 + }, + { + "epoch": 0.24, + "learning_rate": 0.000524653067943463, + "loss": 3.6624, + "step": 13710 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005245985385975993, + "loss": 3.671, + "step": 13715 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005245439923630765, + "loss": 3.5639, + "step": 13720 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005244894292439963, + "loss": 3.5779, + "step": 13725 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005244348492444615, + "loss": 3.6789, + "step": 13730 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005243802523685763, + "loss": 3.6376, + "step": 13735 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005243256386204461, + "loss": 3.5759, + "step": 13740 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005242710080041774, + "loss": 3.6651, + "step": 13745 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005242163605238783, + "loss": 3.7021, + "step": 13750 + }, + { + "epoch": 0.24, + "learning_rate": 0.000524161696183658, + "loss": 3.5692, + "step": 13755 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005241070149876269, + "loss": 3.6352, + "step": 13760 + }, + { + "epoch": 0.24, + "learning_rate": 0.000524052316939897, + "loss": 3.799, + "step": 13765 + }, + { + "epoch": 0.24, + "learning_rate": 0.000523997602044581, + "loss": 3.6014, + "step": 13770 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005239428703057933, + "loss": 3.5783, + "step": 13775 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005238881217276493, + "loss": 3.6207, + "step": 13780 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005238333563142661, + "loss": 3.652, + "step": 13785 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005237785740697617, + "loss": 3.5884, + "step": 13790 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005237237749982552, + "loss": 3.6441, + "step": 13795 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005236689591038674, + "loss": 3.6444, + "step": 13800 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005236141263907203, + "loss": 3.5998, + "step": 13805 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005235592768629368, + "loss": 3.6522, + "step": 13810 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005235044105246415, + "loss": 3.6656, + "step": 13815 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005234495273799598, + "loss": 3.6451, + "step": 13820 + }, + { + "epoch": 0.24, + "learning_rate": 0.000523394627433019, + "loss": 3.7121, + "step": 13825 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005233397106879469, + "loss": 4.2102, + "step": 13830 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005232847771488733, + "loss": 4.3941, + "step": 13835 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005232298268199288, + "loss": 4.4938, + "step": 13840 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005231748597052452, + "loss": 4.4764, + "step": 13845 + }, + { + "epoch": 0.24, + "learning_rate": 0.000523119875808956, + "loss": 4.1145, + "step": 13850 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005230648751351956, + "loss": 3.8547, + "step": 13855 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005230098576880997, + "loss": 3.8067, + "step": 13860 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005229548234718056, + "loss": 3.6504, + "step": 13865 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005228997724904512, + "loss": 3.7026, + "step": 13870 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005228447047481763, + "loss": 3.7336, + "step": 13875 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005227896202491218, + "loss": 3.6148, + "step": 13880 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005227345189974295, + "loss": 3.7376, + "step": 13885 + }, + { + "epoch": 0.24, + "learning_rate": 0.000522679400997243, + "loss": 3.7209, + "step": 13890 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005226242662527066, + "loss": 3.7498, + "step": 13895 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005225691147679664, + "loss": 3.7111, + "step": 13900 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005225139465471694, + "loss": 3.6576, + "step": 13905 + }, + { + "epoch": 0.24, + "learning_rate": 0.000522458761594464, + "loss": 3.6185, + "step": 13910 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005224035599139997, + "loss": 3.641, + "step": 13915 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005223483415099276, + "loss": 3.704, + "step": 13920 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005222931063863998, + "loss": 3.6244, + "step": 13925 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005222378545475695, + "loss": 3.5334, + "step": 13930 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005221825859975915, + "loss": 3.651, + "step": 13935 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005221273007406218, + "loss": 3.5782, + "step": 13940 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005220719987808174, + "loss": 3.6259, + "step": 13945 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005220166801223369, + "loss": 3.6128, + "step": 13950 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005219613447693396, + "loss": 3.6453, + "step": 13955 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005219059927259869, + "loss": 3.7525, + "step": 13960 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005218506239964409, + "loss": 3.6357, + "step": 13965 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005217952385848648, + "loss": 3.6617, + "step": 13970 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005217398364954234, + "loss": 3.5647, + "step": 13975 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005216844177322828, + "loss": 3.6491, + "step": 13980 + }, + { + "epoch": 0.24, + "learning_rate": 0.00052162898229961, + "loss": 3.644, + "step": 13985 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005215735302015737, + "loss": 3.7162, + "step": 13990 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005215180614423434, + "loss": 3.5794, + "step": 13995 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005214625760260901, + "loss": 3.5723, + "step": 14000 + }, + { + "epoch": 0.24, + "eval_loss": 3.667832851409912, + "eval_runtime": 149.7768, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.774, + "step": 14000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005214070739569861, + "loss": 3.5753, + "step": 14005 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005213515552392048, + "loss": 3.6657, + "step": 14010 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005212960198769209, + "loss": 3.6619, + "step": 14015 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005212404678743105, + "loss": 3.6746, + "step": 14020 + }, + { + "epoch": 0.24, + "learning_rate": 0.000521184899235551, + "loss": 3.6758, + "step": 14025 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005211293139648203, + "loss": 3.5972, + "step": 14030 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005210737120662985, + "loss": 3.5256, + "step": 14035 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005210180935441667, + "loss": 3.7113, + "step": 14040 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005209624584026068, + "loss": 3.5753, + "step": 14045 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005209068066458026, + "loss": 3.6638, + "step": 14050 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005208511382779385, + "loss": 3.6401, + "step": 14055 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005207954533032008, + "loss": 3.659, + "step": 14060 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005207397517257765, + "loss": 3.7119, + "step": 14065 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005206840335498541, + "loss": 3.6507, + "step": 14070 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005206282987796234, + "loss": 3.5722, + "step": 14075 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005205725474192753, + "loss": 3.6486, + "step": 14080 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005205167794730022, + "loss": 3.772, + "step": 14085 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005204609949449972, + "loss": 3.6892, + "step": 14090 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005204051938394554, + "loss": 3.7151, + "step": 14095 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005203493761605724, + "loss": 3.6335, + "step": 14100 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005202935419125458, + "loss": 3.68, + "step": 14105 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005202376910995736, + "loss": 3.6781, + "step": 14110 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005201818237258558, + "loss": 3.7159, + "step": 14115 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005201259397955934, + "loss": 3.6356, + "step": 14120 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005200700393129884, + "loss": 3.5929, + "step": 14125 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005200141222822443, + "loss": 3.702, + "step": 14130 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005199581887075657, + "loss": 3.572, + "step": 14135 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005199022385931585, + "loss": 3.5301, + "step": 14140 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005198462719432301, + "loss": 3.7308, + "step": 14145 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005197902887619887, + "loss": 3.6635, + "step": 14150 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005197342890536441, + "loss": 3.6117, + "step": 14155 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005196782728224069, + "loss": 3.6756, + "step": 14160 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005196222400724894, + "loss": 3.6662, + "step": 14165 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005195661908081052, + "loss": 3.5442, + "step": 14170 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005195101250334684, + "loss": 3.6221, + "step": 14175 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005194540427527955, + "loss": 3.7254, + "step": 14180 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005193979439703031, + "loss": 3.6972, + "step": 14185 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005193418286902098, + "loss": 3.5639, + "step": 14190 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005192856969167349, + "loss": 3.7267, + "step": 14195 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005192295486540995, + "loss": 3.6168, + "step": 14200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005191733839065257, + "loss": 3.5785, + "step": 14205 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005191172026782365, + "loss": 3.6425, + "step": 14210 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005190610049734568, + "loss": 3.6147, + "step": 14215 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005190047907964121, + "loss": 3.6009, + "step": 14220 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005189485601513295, + "loss": 3.6146, + "step": 14225 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005188923130424372, + "loss": 3.6774, + "step": 14230 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005188360494739649, + "loss": 3.6255, + "step": 14235 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005187797694501431, + "loss": 3.4911, + "step": 14240 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005187234729752039, + "loss": 3.5676, + "step": 14245 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005186671600533805, + "loss": 3.6448, + "step": 14250 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005186108306889073, + "loss": 3.5538, + "step": 14255 + }, + { + "epoch": 0.25, + "learning_rate": 0.00051855448488602, + "loss": 3.4816, + "step": 14260 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005184981226489555, + "loss": 3.5797, + "step": 14265 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005184417439819519, + "loss": 3.4676, + "step": 14270 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005183853488892488, + "loss": 3.6309, + "step": 14275 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005183289373750866, + "loss": 3.5128, + "step": 14280 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005182725094437072, + "loss": 3.6644, + "step": 14285 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005182160650993537, + "loss": 3.6486, + "step": 14290 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005181596043462704, + "loss": 3.6108, + "step": 14295 + }, + { + "epoch": 0.25, + "learning_rate": 0.000518103127188703, + "loss": 3.5354, + "step": 14300 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005180466336308983, + "loss": 3.5014, + "step": 14305 + }, + { + "epoch": 0.25, + "learning_rate": 0.000517990123677104, + "loss": 3.416, + "step": 14310 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005179335973315697, + "loss": 3.5127, + "step": 14315 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005178770545985458, + "loss": 3.6262, + "step": 14320 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005178204954822839, + "loss": 3.6022, + "step": 14325 + }, + { + "epoch": 0.25, + "learning_rate": 0.000517763919987037, + "loss": 3.6538, + "step": 14330 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005177073281170595, + "loss": 3.6462, + "step": 14335 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005176507198766066, + "loss": 3.5705, + "step": 14340 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005175940952699351, + "loss": 3.5812, + "step": 14345 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005175374543013025, + "loss": 3.6557, + "step": 14350 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005174807969749685, + "loss": 3.6208, + "step": 14355 + }, + { + "epoch": 0.25, + "learning_rate": 0.000517424123295193, + "loss": 3.5958, + "step": 14360 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005173674332662377, + "loss": 3.5134, + "step": 14365 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005173107268923655, + "loss": 3.477, + "step": 14370 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005172540041778403, + "loss": 3.5057, + "step": 14375 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005171972651269273, + "loss": 3.6122, + "step": 14380 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005171405097438933, + "loss": 3.6689, + "step": 14385 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005170837380330056, + "loss": 3.5244, + "step": 14390 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005170269499985334, + "loss": 3.6451, + "step": 14395 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005169701456447468, + "loss": 3.6317, + "step": 14400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005169133249759171, + "loss": 3.5989, + "step": 14405 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005168564879963172, + "loss": 3.6657, + "step": 14410 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005167996347102207, + "loss": 3.6195, + "step": 14415 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005167427651219027, + "loss": 3.6496, + "step": 14420 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005166858792356396, + "loss": 3.719, + "step": 14425 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005166289770557089, + "loss": 3.5693, + "step": 14430 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005165720585863893, + "loss": 3.5917, + "step": 14435 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005165151238319608, + "loss": 3.5493, + "step": 14440 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005164581727967046, + "loss": 3.5514, + "step": 14445 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005164012054849032, + "loss": 3.5368, + "step": 14450 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005163442219008402, + "loss": 3.5879, + "step": 14455 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005162872220488004, + "loss": 3.6387, + "step": 14460 + }, + { + "epoch": 0.25, + "learning_rate": 0.00051623020593307, + "loss": 3.5715, + "step": 14465 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005161731735579361, + "loss": 3.6949, + "step": 14470 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005161161249276876, + "loss": 3.6781, + "step": 14475 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005160590600466141, + "loss": 3.537, + "step": 14480 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005160019789190065, + "loss": 3.664, + "step": 14485 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005159448815491571, + "loss": 3.6726, + "step": 14490 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005158877679413593, + "loss": 3.6492, + "step": 14495 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005158306380999078, + "loss": 3.5562, + "step": 14500 + }, + { + "epoch": 0.25, + "eval_loss": 3.648855447769165, + "eval_runtime": 149.8786, + "eval_samples_per_second": 12.283, + "eval_steps_per_second": 0.774, + "step": 14500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005157734920290984, + "loss": 3.634, + "step": 14505 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005157163297332282, + "loss": 3.5898, + "step": 14510 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005156591512165955, + "loss": 3.6621, + "step": 14515 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005156019564834999, + "loss": 3.6402, + "step": 14520 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005155447455382421, + "loss": 3.6102, + "step": 14525 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005154875183851241, + "loss": 3.624, + "step": 14530 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005154302750284491, + "loss": 3.6164, + "step": 14535 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005153730154725214, + "loss": 3.4685, + "step": 14540 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005153157397216468, + "loss": 3.6154, + "step": 14545 + }, + { + "epoch": 0.25, + "learning_rate": 0.000515258447780132, + "loss": 3.6433, + "step": 14550 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005152011396522851, + "loss": 3.5996, + "step": 14555 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005151438153424155, + "loss": 3.6255, + "step": 14560 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005150864748548335, + "loss": 3.5906, + "step": 14565 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005150291181938508, + "loss": 3.6043, + "step": 14570 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005149717453637805, + "loss": 3.4846, + "step": 14575 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005149143563689367, + "loss": 3.5817, + "step": 14580 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005148569512136346, + "loss": 3.5475, + "step": 14585 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005147995299021911, + "loss": 3.6521, + "step": 14590 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005147420924389236, + "loss": 3.6627, + "step": 14595 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005146846388281514, + "loss": 3.6745, + "step": 14600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005146271690741945, + "loss": 3.6828, + "step": 14605 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005145696831813744, + "loss": 3.6828, + "step": 14610 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005145121811540139, + "loss": 3.5683, + "step": 14615 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005144546629964367, + "loss": 3.6632, + "step": 14620 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005143971287129679, + "loss": 3.6587, + "step": 14625 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005143395783079337, + "loss": 3.603, + "step": 14630 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005142820117856618, + "loss": 3.6555, + "step": 14635 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005142244291504808, + "loss": 3.7154, + "step": 14640 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005141668304067205, + "loss": 3.6161, + "step": 14645 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005141092155587123, + "loss": 3.6059, + "step": 14650 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005140515846107882, + "loss": 3.6758, + "step": 14655 + }, + { + "epoch": 0.25, + "learning_rate": 0.000513993937567282, + "loss": 3.6174, + "step": 14660 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005139362744325285, + "loss": 3.5476, + "step": 14665 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005138785952108635, + "loss": 3.6128, + "step": 14670 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005138208999066242, + "loss": 3.5876, + "step": 14675 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005137631885241491, + "loss": 3.5896, + "step": 14680 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005137054610677777, + "loss": 3.5601, + "step": 14685 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005136477175418508, + "loss": 3.6276, + "step": 14690 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005135899579507106, + "loss": 3.5822, + "step": 14695 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005135321822987001, + "loss": 3.4708, + "step": 14700 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005134743905901639, + "loss": 3.6286, + "step": 14705 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005134165828294475, + "loss": 3.5722, + "step": 14710 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005133587590208979, + "loss": 3.6132, + "step": 14715 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005133009191688629, + "loss": 3.7012, + "step": 14720 + }, + { + "epoch": 0.25, + "learning_rate": 0.000513243063277692, + "loss": 3.6159, + "step": 14725 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005131851913517358, + "loss": 3.6005, + "step": 14730 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005131273033953456, + "loss": 3.592, + "step": 14735 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005130693994128744, + "loss": 3.6199, + "step": 14740 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005130114794086763, + "loss": 3.5501, + "step": 14745 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005129535433871067, + "loss": 3.6375, + "step": 14750 + }, + { + "epoch": 0.26, + "learning_rate": 0.000512895591352522, + "loss": 3.5636, + "step": 14755 + }, + { + "epoch": 0.26, + "learning_rate": 0.00051283762330928, + "loss": 3.6531, + "step": 14760 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005127796392617393, + "loss": 3.5518, + "step": 14765 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005127216392142604, + "loss": 3.5923, + "step": 14770 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005126636231712042, + "loss": 3.5783, + "step": 14775 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005126055911369335, + "loss": 3.69, + "step": 14780 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005125475431158121, + "loss": 3.7556, + "step": 14785 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005124894791122045, + "loss": 3.6912, + "step": 14790 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005124313991304773, + "loss": 3.6166, + "step": 14795 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005123733031749974, + "loss": 3.5407, + "step": 14800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005123151912501336, + "loss": 3.6813, + "step": 14805 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005122570633602554, + "loss": 3.6641, + "step": 14810 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005121989195097339, + "loss": 3.6209, + "step": 14815 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005121407597029413, + "loss": 3.6123, + "step": 14820 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005120825839442506, + "loss": 3.5358, + "step": 14825 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005120243922380366, + "loss": 3.6142, + "step": 14830 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005119661845886747, + "loss": 3.5662, + "step": 14835 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005119079610005422, + "loss": 3.6078, + "step": 14840 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005118497214780171, + "loss": 3.6417, + "step": 14845 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005117914660254785, + "loss": 3.5625, + "step": 14850 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005117331946473071, + "loss": 3.619, + "step": 14855 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005116749073478846, + "loss": 3.616, + "step": 14860 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005116166041315939, + "loss": 3.5866, + "step": 14865 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005115582850028192, + "loss": 3.6377, + "step": 14870 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005114999499659454, + "loss": 3.6135, + "step": 14875 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005114415990253595, + "loss": 3.5777, + "step": 14880 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005113832321854491, + "loss": 3.5029, + "step": 14885 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005113248494506027, + "loss": 3.5864, + "step": 14890 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005112664508252107, + "loss": 3.5529, + "step": 14895 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005112080363136644, + "loss": 3.588, + "step": 14900 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005111496059203561, + "loss": 3.614, + "step": 14905 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005110911596496797, + "loss": 3.6759, + "step": 14910 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005110326975060298, + "loss": 3.4662, + "step": 14915 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005109742194938026, + "loss": 3.6857, + "step": 14920 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005109157256173954, + "loss": 3.6664, + "step": 14925 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005108572158812064, + "loss": 3.5142, + "step": 14930 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005107986902896355, + "loss": 3.5656, + "step": 14935 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005107401488470834, + "loss": 3.7301, + "step": 14940 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005106815915579522, + "loss": 3.5186, + "step": 14945 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005106230184266448, + "loss": 3.65, + "step": 14950 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005105644294575661, + "loss": 3.6077, + "step": 14955 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005105058246551214, + "loss": 3.6377, + "step": 14960 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005104472040237174, + "loss": 3.5337, + "step": 14965 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005103885675677622, + "loss": 3.5585, + "step": 14970 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005103299152916651, + "loss": 3.6477, + "step": 14975 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005102712471998361, + "loss": 3.5803, + "step": 14980 + }, + { + "epoch": 0.26, + "learning_rate": 0.000510212563296687, + "loss": 3.5321, + "step": 14985 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005101538635866304, + "loss": 3.6679, + "step": 14990 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005100951480740804, + "loss": 3.6513, + "step": 14995 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005100364167634518, + "loss": 3.5916, + "step": 15000 + }, + { + "epoch": 0.26, + "eval_loss": 3.638385534286499, + "eval_runtime": 149.9801, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 15000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005099776696591613, + "loss": 3.6363, + "step": 15005 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005099189067656261, + "loss": 3.5853, + "step": 15010 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005098601280872649, + "loss": 3.6406, + "step": 15015 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005098013336284975, + "loss": 3.5502, + "step": 15020 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005097425233937451, + "loss": 3.6303, + "step": 15025 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005096836973874298, + "loss": 3.5962, + "step": 15030 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005096248556139753, + "loss": 3.4167, + "step": 15035 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005095659980778056, + "loss": 3.5678, + "step": 15040 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005095071247833473, + "loss": 3.5376, + "step": 15045 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005094482357350266, + "loss": 3.6403, + "step": 15050 + }, + { + "epoch": 0.26, + "learning_rate": 0.000509389330937272, + "loss": 3.6371, + "step": 15055 + }, + { + "epoch": 0.26, + "learning_rate": 0.000509330410394513, + "loss": 3.5036, + "step": 15060 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005092714741111797, + "loss": 3.6479, + "step": 15065 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005092125220917042, + "loss": 3.6069, + "step": 15070 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005091535543405192, + "loss": 3.6093, + "step": 15075 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005090945708620588, + "loss": 3.6439, + "step": 15080 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005090355716607582, + "loss": 3.5337, + "step": 15085 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005089765567410539, + "loss": 3.604, + "step": 15090 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005089175261073837, + "loss": 3.5584, + "step": 15095 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005088584797641861, + "loss": 3.5309, + "step": 15100 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005087994177159012, + "loss": 3.6083, + "step": 15105 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005087403399669702, + "loss": 3.5913, + "step": 15110 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005086812465218354, + "loss": 3.5442, + "step": 15115 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005086221373849403, + "loss": 3.6318, + "step": 15120 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005085630125607297, + "loss": 3.6189, + "step": 15125 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005085038720536493, + "loss": 3.5056, + "step": 15130 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005084447158681465, + "loss": 3.5779, + "step": 15135 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005083855440086691, + "loss": 3.5665, + "step": 15140 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005083263564796669, + "loss": 3.5793, + "step": 15145 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005082671532855903, + "loss": 3.5825, + "step": 15150 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005082079344308912, + "loss": 3.7259, + "step": 15155 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005081486999200224, + "loss": 3.6756, + "step": 15160 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005080894497574381, + "loss": 3.5817, + "step": 15165 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005080301839475936, + "loss": 3.518, + "step": 15170 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005079709024949455, + "loss": 3.5779, + "step": 15175 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005079116054039513, + "loss": 3.7219, + "step": 15180 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005078522926790698, + "loss": 3.5903, + "step": 15185 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005077929643247613, + "loss": 3.5368, + "step": 15190 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005077336203454867, + "loss": 3.6035, + "step": 15195 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005076742607457086, + "loss": 3.6253, + "step": 15200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005076148855298903, + "loss": 3.6759, + "step": 15205 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005075554947024967, + "loss": 3.6473, + "step": 15210 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005074960882679935, + "loss": 3.5805, + "step": 15215 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005074366662308478, + "loss": 3.5368, + "step": 15220 + }, + { + "epoch": 0.26, + "learning_rate": 0.000507377228595528, + "loss": 3.5171, + "step": 15225 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005073177753665034, + "loss": 3.6021, + "step": 15230 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005072583065482446, + "loss": 3.6194, + "step": 15235 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005071988221452233, + "loss": 3.6315, + "step": 15240 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005071393221619124, + "loss": 3.6864, + "step": 15245 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005070798066027862, + "loss": 3.5701, + "step": 15250 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005070202754723196, + "loss": 3.6381, + "step": 15255 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005069607287749893, + "loss": 3.572, + "step": 15260 + }, + { + "epoch": 0.26, + "learning_rate": 0.000506901166515273, + "loss": 3.6789, + "step": 15265 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005068415886976493, + "loss": 3.536, + "step": 15270 + }, + { + "epoch": 0.26, + "learning_rate": 0.000506781995326598, + "loss": 3.5427, + "step": 15275 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005067223864066005, + "loss": 3.5554, + "step": 15280 + }, + { + "epoch": 0.26, + "learning_rate": 0.000506662761942139, + "loss": 3.5994, + "step": 15285 + }, + { + "epoch": 0.26, + "learning_rate": 0.000506603121937697, + "loss": 3.6482, + "step": 15290 + }, + { + "epoch": 0.26, + "learning_rate": 0.000506543466397759, + "loss": 3.6878, + "step": 15295 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005064837953268108, + "loss": 3.489, + "step": 15300 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005064241087293393, + "loss": 3.4993, + "step": 15305 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005063644066098329, + "loss": 3.5804, + "step": 15310 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005063046889727807, + "loss": 3.5341, + "step": 15315 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005062449558226732, + "loss": 3.5108, + "step": 15320 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005061852071640019, + "loss": 3.6813, + "step": 15325 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005061254430012598, + "loss": 3.4356, + "step": 15330 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005060656633389406, + "loss": 3.6614, + "step": 15335 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005060058681815399, + "loss": 3.6952, + "step": 15340 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005059460575335534, + "loss": 3.6666, + "step": 15345 + }, + { + "epoch": 0.27, + "learning_rate": 0.000505886231399479, + "loss": 3.5999, + "step": 15350 + }, + { + "epoch": 0.27, + "learning_rate": 0.000505826389783815, + "loss": 3.6896, + "step": 15355 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005057665326910614, + "loss": 3.5638, + "step": 15360 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005057066601257191, + "loss": 3.5589, + "step": 15365 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005056467720922903, + "loss": 3.6379, + "step": 15370 + }, + { + "epoch": 0.27, + "learning_rate": 0.000505586868595278, + "loss": 3.5751, + "step": 15375 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005055269496391868, + "loss": 3.5328, + "step": 15380 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005054670152285223, + "loss": 3.5601, + "step": 15385 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005054070653677913, + "loss": 3.5779, + "step": 15390 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005053471000615017, + "loss": 3.637, + "step": 15395 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005052871193141625, + "loss": 3.5866, + "step": 15400 + }, + { + "epoch": 0.27, + "learning_rate": 0.000505227123130284, + "loss": 3.6035, + "step": 15405 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005051671115143776, + "loss": 3.6226, + "step": 15410 + }, + { + "epoch": 0.27, + "learning_rate": 0.000505107084470956, + "loss": 3.5384, + "step": 15415 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005050470420045327, + "loss": 3.512, + "step": 15420 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005049869841196228, + "loss": 3.6033, + "step": 15425 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005049269108207421, + "loss": 3.5151, + "step": 15430 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005048668221124081, + "loss": 3.4719, + "step": 15435 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005048067179991389, + "loss": 3.6271, + "step": 15440 + }, + { + "epoch": 0.27, + "learning_rate": 0.000504746598485454, + "loss": 3.7077, + "step": 15445 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005046864635758744, + "loss": 3.6447, + "step": 15450 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005046263132749217, + "loss": 3.601, + "step": 15455 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005045661475871189, + "loss": 3.6165, + "step": 15460 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005045059665169902, + "loss": 3.6336, + "step": 15465 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005044457700690609, + "loss": 3.4781, + "step": 15470 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005043855582478574, + "loss": 3.5317, + "step": 15475 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005043253310579075, + "loss": 3.5082, + "step": 15480 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005042650885037397, + "loss": 3.6004, + "step": 15485 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005042048305898843, + "loss": 3.6375, + "step": 15490 + }, + { + "epoch": 0.27, + "learning_rate": 0.000504144557320872, + "loss": 3.4989, + "step": 15495 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005040842687012351, + "loss": 3.6445, + "step": 15500 + }, + { + "epoch": 0.27, + "eval_loss": 3.6320202350616455, + "eval_runtime": 150.4615, + "eval_samples_per_second": 12.236, + "eval_steps_per_second": 0.771, + "step": 15500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005040239647355073, + "loss": 3.521, + "step": 15505 + }, + { + "epoch": 0.27, + "learning_rate": 0.000503963645428223, + "loss": 3.5892, + "step": 15510 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005039033107839177, + "loss": 3.5733, + "step": 15515 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005038429608071285, + "loss": 3.6047, + "step": 15520 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005037825955023933, + "loss": 3.6021, + "step": 15525 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005037222148742514, + "loss": 3.6138, + "step": 15530 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005036618189272428, + "loss": 3.6236, + "step": 15535 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005036014076659094, + "loss": 3.5677, + "step": 15540 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005035409810947934, + "loss": 3.498, + "step": 15545 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005034805392184389, + "loss": 3.5847, + "step": 15550 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005034200820413906, + "loss": 3.6563, + "step": 15555 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005033596095681946, + "loss": 3.6285, + "step": 15560 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005032991218033981, + "loss": 3.524, + "step": 15565 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005032386187515497, + "loss": 3.5815, + "step": 15570 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005031781004171986, + "loss": 3.5129, + "step": 15575 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005031175668048957, + "loss": 3.5887, + "step": 15580 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005030570179191927, + "loss": 3.5245, + "step": 15585 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005029964537646427, + "loss": 3.6023, + "step": 15590 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005029358743457997, + "loss": 3.5239, + "step": 15595 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005028752796672189, + "loss": 3.5323, + "step": 15600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005028146697334568, + "loss": 3.5991, + "step": 15605 + }, + { + "epoch": 0.27, + "learning_rate": 0.000502754044549071, + "loss": 3.4643, + "step": 15610 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005026934041186201, + "loss": 3.5993, + "step": 15615 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005026327484466641, + "loss": 3.5821, + "step": 15620 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005025720775377637, + "loss": 3.498, + "step": 15625 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005025113913964813, + "loss": 3.5981, + "step": 15630 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005024506900273803, + "loss": 3.5101, + "step": 15635 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005023899734350248, + "loss": 3.5864, + "step": 15640 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005023292416239805, + "loss": 3.5627, + "step": 15645 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005022684945988141, + "loss": 3.5497, + "step": 15650 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005022077323640937, + "loss": 3.6516, + "step": 15655 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005021469549243881, + "loss": 3.4684, + "step": 15660 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005020861622842673, + "loss": 3.5867, + "step": 15665 + }, + { + "epoch": 0.27, + "learning_rate": 0.000502025354448303, + "loss": 3.5082, + "step": 15670 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005019645314210672, + "loss": 3.6077, + "step": 15675 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005019036932071338, + "loss": 3.5891, + "step": 15680 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005018428398110774, + "loss": 3.6244, + "step": 15685 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005017819712374739, + "loss": 3.4976, + "step": 15690 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005017210874909003, + "loss": 3.6083, + "step": 15695 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005016601885759346, + "loss": 3.5849, + "step": 15700 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005015992744971562, + "loss": 3.6031, + "step": 15705 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005015383452591456, + "loss": 3.5034, + "step": 15710 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005014774008664844, + "loss": 3.5678, + "step": 15715 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005014164413237552, + "loss": 3.6184, + "step": 15720 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005013554666355417, + "loss": 3.6015, + "step": 15725 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005012944768064291, + "loss": 3.6218, + "step": 15730 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005012334718410036, + "loss": 3.8092, + "step": 15735 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005011724517438521, + "loss": 4.6015, + "step": 15740 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005011114165195634, + "loss": 4.2182, + "step": 15745 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005010503661727268, + "loss": 3.7612, + "step": 15750 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005009893007079329, + "loss": 3.7264, + "step": 15755 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005009282201297739, + "loss": 3.6667, + "step": 15760 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005008671244428423, + "loss": 3.6342, + "step": 15765 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005008060136517324, + "loss": 3.6282, + "step": 15770 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005007448877610395, + "loss": 3.6548, + "step": 15775 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005006837467753597, + "loss": 3.6451, + "step": 15780 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005006225906992906, + "loss": 3.7284, + "step": 15785 + }, + { + "epoch": 0.27, + "learning_rate": 0.000500561419537431, + "loss": 3.6631, + "step": 15790 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005005002332943804, + "loss": 3.6146, + "step": 15795 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005004390319747399, + "loss": 3.6857, + "step": 15800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005003778155831113, + "loss": 3.5865, + "step": 15805 + }, + { + "epoch": 0.27, + "learning_rate": 0.000500316584124098, + "loss": 3.7437, + "step": 15810 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005002553376023042, + "loss": 3.5996, + "step": 15815 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005001940760223352, + "loss": 3.7551, + "step": 15820 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005001327993887977, + "loss": 3.6479, + "step": 15825 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005000715077062994, + "loss": 3.6632, + "step": 15830 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005000102009794491, + "loss": 3.5554, + "step": 15835 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004999488792128567, + "loss": 3.6261, + "step": 15840 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004998875424111332, + "loss": 3.5204, + "step": 15845 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004998261905788909, + "loss": 3.6914, + "step": 15850 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004997648237207433, + "loss": 3.4878, + "step": 15855 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004997034418413046, + "loss": 3.5916, + "step": 15860 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004996420449451907, + "loss": 3.6535, + "step": 15865 + }, + { + "epoch": 0.27, + "learning_rate": 0.000499580633037018, + "loss": 3.6224, + "step": 15870 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004995192061214047, + "loss": 3.5223, + "step": 15875 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004994577642029695, + "loss": 3.563, + "step": 15880 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004993963072863326, + "loss": 3.561, + "step": 15885 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004993348353761154, + "loss": 3.6209, + "step": 15890 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004992733484769401, + "loss": 3.3646, + "step": 15895 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004992118465934302, + "loss": 3.5317, + "step": 15900 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004991503297302105, + "loss": 3.5534, + "step": 15905 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004990887978919066, + "loss": 3.6341, + "step": 15910 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004990272510831454, + "loss": 3.5312, + "step": 15915 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004989656893085549, + "loss": 3.5533, + "step": 15920 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004989041125727643, + "loss": 3.5574, + "step": 15925 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004988425208804037, + "loss": 3.5899, + "step": 15930 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004987809142361046, + "loss": 3.5567, + "step": 15935 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004987192926444995, + "loss": 3.5264, + "step": 15940 + }, + { + "epoch": 0.28, + "learning_rate": 0.000498657656110222, + "loss": 3.6206, + "step": 15945 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004985960046379069, + "loss": 3.5102, + "step": 15950 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004985343382321901, + "loss": 3.5516, + "step": 15955 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004984726568977083, + "loss": 3.6478, + "step": 15960 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004984109606391002, + "loss": 3.6414, + "step": 15965 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004983492494610045, + "loss": 3.5679, + "step": 15970 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004982875233680618, + "loss": 3.5373, + "step": 15975 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004982257823649136, + "loss": 3.6789, + "step": 15980 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004981640264562023, + "loss": 3.642, + "step": 15985 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004981022556465719, + "loss": 3.5777, + "step": 15990 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004980404699406672, + "loss": 3.6623, + "step": 15995 + }, + { + "epoch": 0.28, + "learning_rate": 0.000497978669343134, + "loss": 3.6401, + "step": 16000 + }, + { + "epoch": 0.28, + "eval_loss": 3.6124560832977295, + "eval_runtime": 150.0796, + "eval_samples_per_second": 12.267, + "eval_steps_per_second": 0.773, + "step": 16000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004979168538586195, + "loss": 3.5841, + "step": 16005 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004978550234917719, + "loss": 3.595, + "step": 16010 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004977931782472405, + "loss": 3.5558, + "step": 16015 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004977313181296759, + "loss": 3.592, + "step": 16020 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004976694431437295, + "loss": 3.5335, + "step": 16025 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004976075532940539, + "loss": 3.4794, + "step": 16030 + }, + { + "epoch": 0.28, + "learning_rate": 0.000497545648585303, + "loss": 3.6644, + "step": 16035 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004974837290221318, + "loss": 3.5437, + "step": 16040 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004974217946091962, + "loss": 3.6994, + "step": 16045 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004973598453511535, + "loss": 3.6072, + "step": 16050 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004972978812526619, + "loss": 3.5775, + "step": 16055 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004972359023183808, + "loss": 3.5683, + "step": 16060 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004971739085529705, + "loss": 3.4989, + "step": 16065 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004971118999610929, + "loss": 3.5584, + "step": 16070 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004970498765474106, + "loss": 3.5787, + "step": 16075 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004969878383165873, + "loss": 3.6137, + "step": 16080 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004969257852732883, + "loss": 3.7059, + "step": 16085 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004968637174221794, + "loss": 3.6148, + "step": 16090 + }, + { + "epoch": 0.28, + "learning_rate": 0.000496801634767928, + "loss": 3.5649, + "step": 16095 + }, + { + "epoch": 0.28, + "learning_rate": 0.000496739537315202, + "loss": 3.6258, + "step": 16100 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004966774250686713, + "loss": 3.5316, + "step": 16105 + }, + { + "epoch": 0.28, + "learning_rate": 0.000496615298033006, + "loss": 3.5973, + "step": 16110 + }, + { + "epoch": 0.28, + "learning_rate": 0.000496553156212878, + "loss": 3.6305, + "step": 16115 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004964909996129599, + "loss": 3.5401, + "step": 16120 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004964288282379258, + "loss": 3.5479, + "step": 16125 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004963666420924504, + "loss": 3.5814, + "step": 16130 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004963044411812099, + "loss": 3.4288, + "step": 16135 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004962422255088815, + "loss": 3.5357, + "step": 16140 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004961799950801434, + "loss": 3.5193, + "step": 16145 + }, + { + "epoch": 0.28, + "learning_rate": 0.000496117749899675, + "loss": 3.5514, + "step": 16150 + }, + { + "epoch": 0.28, + "learning_rate": 0.000496055489972157, + "loss": 3.5557, + "step": 16155 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004959932153022709, + "loss": 3.5929, + "step": 16160 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004959309258946996, + "loss": 3.542, + "step": 16165 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004958686217541266, + "loss": 3.5837, + "step": 16170 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004958063028852372, + "loss": 3.4923, + "step": 16175 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004957439692927173, + "loss": 3.6609, + "step": 16180 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004956816209812539, + "loss": 3.4934, + "step": 16185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004956192579555356, + "loss": 3.5329, + "step": 16190 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004955568802202517, + "loss": 3.5227, + "step": 16195 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004954944877800924, + "loss": 3.5705, + "step": 16200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004954320806397497, + "loss": 3.5531, + "step": 16205 + }, + { + "epoch": 0.28, + "learning_rate": 0.000495369658803916, + "loss": 3.4774, + "step": 16210 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004953072222772853, + "loss": 3.5052, + "step": 16215 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004952447710645524, + "loss": 3.5444, + "step": 16220 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004951823051704134, + "loss": 3.5683, + "step": 16225 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004951198245995652, + "loss": 3.4055, + "step": 16230 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004950573293567062, + "loss": 3.6428, + "step": 16235 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004949948194465357, + "loss": 3.6984, + "step": 16240 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004949322948737541, + "loss": 3.5885, + "step": 16245 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004948697556430631, + "loss": 3.4286, + "step": 16250 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004948072017591649, + "loss": 3.565, + "step": 16255 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004947446332267636, + "loss": 3.5906, + "step": 16260 + }, + { + "epoch": 0.28, + "learning_rate": 0.000494682050050564, + "loss": 3.634, + "step": 16265 + }, + { + "epoch": 0.28, + "learning_rate": 0.000494619452235272, + "loss": 3.5922, + "step": 16270 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004945568397855945, + "loss": 3.538, + "step": 16275 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004944942127062397, + "loss": 3.572, + "step": 16280 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004944315710019169, + "loss": 3.6377, + "step": 16285 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004943689146773365, + "loss": 3.6228, + "step": 16290 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004943062437372098, + "loss": 3.5855, + "step": 16295 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004942435581862493, + "loss": 3.5372, + "step": 16300 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004941808580291688, + "loss": 3.5609, + "step": 16305 + }, + { + "epoch": 0.28, + "learning_rate": 0.000494118143270683, + "loss": 3.6014, + "step": 16310 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004940554139155074, + "loss": 3.5558, + "step": 16315 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004939926699683594, + "loss": 3.5162, + "step": 16320 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004939299114339568, + "loss": 3.6057, + "step": 16325 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004938671383170187, + "loss": 3.6958, + "step": 16330 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004938043506222653, + "loss": 3.6499, + "step": 16335 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004937415483544181, + "loss": 3.5561, + "step": 16340 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004936787315181993, + "loss": 3.5672, + "step": 16345 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004936159001183325, + "loss": 3.5341, + "step": 16350 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004935530541595422, + "loss": 3.5887, + "step": 16355 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004934901936465543, + "loss": 3.5734, + "step": 16360 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004934273185840953, + "loss": 3.5032, + "step": 16365 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004933644289768933, + "loss": 3.6115, + "step": 16370 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004933015248296772, + "loss": 3.5705, + "step": 16375 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004932386061471772, + "loss": 3.6443, + "step": 16380 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004931756729341242, + "loss": 3.4791, + "step": 16385 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004931127251952507, + "loss": 3.6197, + "step": 16390 + }, + { + "epoch": 0.28, + "learning_rate": 0.00049304976293529, + "loss": 3.4558, + "step": 16395 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004929867861589764, + "loss": 3.5077, + "step": 16400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004929237948710455, + "loss": 3.5552, + "step": 16405 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004928607890762341, + "loss": 3.5842, + "step": 16410 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004927977687792796, + "loss": 3.5561, + "step": 16415 + }, + { + "epoch": 0.28, + "learning_rate": 0.000492734733984921, + "loss": 3.4731, + "step": 16420 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004926716846978982, + "loss": 3.5924, + "step": 16425 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004926086209229521, + "loss": 3.4558, + "step": 16430 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004925455426648248, + "loss": 3.457, + "step": 16435 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004924824499282595, + "loss": 3.5853, + "step": 16440 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004924193427180004, + "loss": 3.6165, + "step": 16445 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004923562210387928, + "loss": 3.4844, + "step": 16450 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004922930848953834, + "loss": 3.629, + "step": 16455 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004922299342925193, + "loss": 3.4659, + "step": 16460 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004921667692349493, + "loss": 3.6049, + "step": 16465 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004921035897274232, + "loss": 3.5527, + "step": 16470 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004920403957746916, + "loss": 3.5113, + "step": 16475 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004919771873815065, + "loss": 3.5445, + "step": 16480 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004919139645526207, + "loss": 3.5608, + "step": 16485 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004918507272927883, + "loss": 3.5982, + "step": 16490 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004917874756067644, + "loss": 3.5369, + "step": 16495 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004917242094993054, + "loss": 3.4741, + "step": 16500 + }, + { + "epoch": 0.29, + "eval_loss": 3.5877273082733154, + "eval_runtime": 150.0518, + "eval_samples_per_second": 12.269, + "eval_steps_per_second": 0.773, + "step": 16500 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004916609289751683, + "loss": 3.5372, + "step": 16505 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004915976340391116, + "loss": 3.5657, + "step": 16510 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004915343246958947, + "loss": 3.5311, + "step": 16515 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004914710009502782, + "loss": 3.5391, + "step": 16520 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004914076628070238, + "loss": 3.6073, + "step": 16525 + }, + { + "epoch": 0.29, + "learning_rate": 0.000491344310270894, + "loss": 3.4663, + "step": 16530 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004912809433466527, + "loss": 3.5429, + "step": 16535 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004912175620390648, + "loss": 3.5616, + "step": 16540 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004911541663528961, + "loss": 3.5202, + "step": 16545 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004910907562929139, + "loss": 3.6226, + "step": 16550 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004910273318638861, + "loss": 3.5045, + "step": 16555 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004909638930705819, + "loss": 3.4955, + "step": 16560 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004909004399177717, + "loss": 3.5726, + "step": 16565 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004908369724102267, + "loss": 3.4443, + "step": 16570 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004907734905527193, + "loss": 3.5395, + "step": 16575 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004907099943500233, + "loss": 3.4818, + "step": 16580 + }, + { + "epoch": 0.29, + "learning_rate": 0.000490646483806913, + "loss": 3.6123, + "step": 16585 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004905829589281642, + "loss": 3.5348, + "step": 16590 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004905194197185536, + "loss": 3.5266, + "step": 16595 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004904558661828589, + "loss": 3.5143, + "step": 16600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004903922983258593, + "loss": 3.6398, + "step": 16605 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004903287161523346, + "loss": 3.5811, + "step": 16610 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004902651196670658, + "loss": 3.4796, + "step": 16615 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004902015088748352, + "loss": 3.4956, + "step": 16620 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004901378837804258, + "loss": 3.4245, + "step": 16625 + }, + { + "epoch": 0.29, + "learning_rate": 0.000490074244388622, + "loss": 3.6016, + "step": 16630 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004900105907042092, + "loss": 3.5701, + "step": 16635 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004899469227319737, + "loss": 3.5832, + "step": 16640 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004898832404767031, + "loss": 3.5255, + "step": 16645 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004898195439431858, + "loss": 3.5867, + "step": 16650 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004897558331362119, + "loss": 3.4916, + "step": 16655 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004896921080605716, + "loss": 3.5749, + "step": 16660 + }, + { + "epoch": 0.29, + "learning_rate": 0.000489628368721057, + "loss": 3.5705, + "step": 16665 + }, + { + "epoch": 0.29, + "learning_rate": 0.000489564615122461, + "loss": 3.5197, + "step": 16670 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004895008472695772, + "loss": 3.669, + "step": 16675 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004894370651672011, + "loss": 3.5425, + "step": 16680 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004893732688201284, + "loss": 3.5529, + "step": 16685 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004893094582331566, + "loss": 3.5687, + "step": 16690 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004892456334110835, + "loss": 3.5815, + "step": 16695 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004891817943587087, + "loss": 3.5539, + "step": 16700 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004891179410808326, + "loss": 3.6557, + "step": 16705 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004890540735822566, + "loss": 3.6539, + "step": 16710 + }, + { + "epoch": 0.29, + "learning_rate": 0.000488990191867783, + "loss": 3.675, + "step": 16715 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004889262959422155, + "loss": 3.5966, + "step": 16720 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004888623858103589, + "loss": 3.4819, + "step": 16725 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004887984614770188, + "loss": 3.4654, + "step": 16730 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004887345229470019, + "loss": 3.5918, + "step": 16735 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004886705702251161, + "loss": 3.5793, + "step": 16740 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004886066033161704, + "loss": 3.5092, + "step": 16745 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004885426222249747, + "loss": 3.6574, + "step": 16750 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004884786269563401, + "loss": 3.5397, + "step": 16755 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004884146175150786, + "loss": 3.4925, + "step": 16760 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004883505939060036, + "loss": 3.5832, + "step": 16765 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004882865561339293, + "loss": 3.5626, + "step": 16770 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048822250420367064, + "loss": 3.5953, + "step": 16775 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048815843812004446, + "loss": 3.6348, + "step": 16780 + }, + { + "epoch": 0.29, + "learning_rate": 0.000488094357887868, + "loss": 3.5284, + "step": 16785 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048803026351195984, + "loss": 3.5496, + "step": 16790 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048796615499713947, + "loss": 3.554, + "step": 16795 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004879020323482276, + "loss": 3.5148, + "step": 16800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004878378955700458, + "loss": 3.5522, + "step": 16805 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048777374466741697, + "loss": 3.5446, + "step": 16810 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004877095796451648, + "loss": 3.5773, + "step": 16815 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004876454005081144, + "loss": 3.6213, + "step": 16820 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004875812072610913, + "loss": 3.621, + "step": 16825 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004875169999089229, + "loss": 3.524, + "step": 16830 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004874527784564371, + "loss": 3.5176, + "step": 16835 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004873885429084631, + "loss": 3.4054, + "step": 16840 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048732429326983097, + "loss": 3.5634, + "step": 16845 + }, + { + "epoch": 0.29, + "learning_rate": 0.000487260029545372, + "loss": 3.6623, + "step": 16850 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004871957517399184, + "loss": 3.5978, + "step": 16855 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048713145985830386, + "loss": 3.5235, + "step": 16860 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048706715390536235, + "loss": 3.5122, + "step": 16865 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048700283388592976, + "loss": 3.5124, + "step": 16870 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048693849980484234, + "loss": 3.5231, + "step": 16875 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048687415166693773, + "loss": 3.6416, + "step": 16880 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004868097894770547, + "loss": 3.5731, + "step": 16885 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048674541324003287, + "loss": 3.5577, + "step": 16890 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048668102296071296, + "loss": 3.3237, + "step": 16895 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048661661864393696, + "loss": 3.5944, + "step": 16900 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048655220029454756, + "loss": 3.5177, + "step": 16905 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004864877679173889, + "loss": 3.511, + "step": 16910 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004864233215173056, + "loss": 3.5407, + "step": 16915 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004863588610991441, + "loss": 3.5129, + "step": 16920 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004862943866677512, + "loss": 3.5516, + "step": 16925 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004862298982279752, + "loss": 3.6088, + "step": 16930 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004861653957846653, + "loss": 3.4369, + "step": 16935 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004861008793426717, + "loss": 3.4892, + "step": 16940 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004860363489068457, + "loss": 3.5787, + "step": 16945 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048597180448203977, + "loss": 3.5381, + "step": 16950 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048590724607310714, + "loss": 3.4371, + "step": 16955 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004858426736849024, + "loss": 3.4722, + "step": 16960 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048577808732228105, + "loss": 3.5033, + "step": 16965 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048571348699009964, + "loss": 3.5394, + "step": 16970 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004856488726932158, + "loss": 3.5476, + "step": 16975 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048558424443648804, + "loss": 3.5903, + "step": 16980 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048551960222477635, + "loss": 3.4827, + "step": 16985 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004854549460629412, + "loss": 3.4285, + "step": 16990 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048539027595584464, + "loss": 3.5585, + "step": 16995 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004853255919083494, + "loss": 3.4662, + "step": 17000 + }, + { + "epoch": 0.29, + "eval_loss": 3.576127052307129, + "eval_runtime": 150.0801, + "eval_samples_per_second": 12.267, + "eval_steps_per_second": 0.773, + "step": 17000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048526089392531945, + "loss": 3.4641, + "step": 17005 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004851961820116197, + "loss": 3.5871, + "step": 17010 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004851314561721162, + "loss": 3.6005, + "step": 17015 + }, + { + "epoch": 0.29, + "learning_rate": 0.000485066716411676, + "loss": 3.5406, + "step": 17020 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004850019627351672, + "loss": 3.5104, + "step": 17025 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004849371951474588, + "loss": 3.5767, + "step": 17030 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048487241365342116, + "loss": 3.5849, + "step": 17035 + }, + { + "epoch": 0.29, + "learning_rate": 0.00048480761825792556, + "loss": 3.5134, + "step": 17040 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004847428089658442, + "loss": 3.5021, + "step": 17045 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004846779857820503, + "loss": 3.5575, + "step": 17050 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048461314871141835, + "loss": 3.5833, + "step": 17055 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048454829775882374, + "loss": 3.4563, + "step": 17060 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048448343292914296, + "loss": 3.5297, + "step": 17065 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004844185542272534, + "loss": 3.576, + "step": 17070 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048435366165803384, + "loss": 3.5006, + "step": 17075 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004842887552263637, + "loss": 3.579, + "step": 17080 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004842238349371235, + "loss": 3.565, + "step": 17085 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048415890079519516, + "loss": 3.4344, + "step": 17090 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004840939528054612, + "loss": 3.4845, + "step": 17095 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048402899097280556, + "loss": 3.5118, + "step": 17100 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004839640153021129, + "loss": 3.4574, + "step": 17105 + }, + { + "epoch": 0.3, + "learning_rate": 0.000483899025798269, + "loss": 3.5173, + "step": 17110 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048383402246616087, + "loss": 3.5195, + "step": 17115 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004837690053106765, + "loss": 3.4535, + "step": 17120 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004837039743367045, + "loss": 3.4877, + "step": 17125 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004836389295491353, + "loss": 3.5545, + "step": 17130 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048357387095285964, + "loss": 3.4785, + "step": 17135 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004835087985527697, + "loss": 3.5197, + "step": 17140 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004834437123537586, + "loss": 3.5171, + "step": 17145 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004833786123607205, + "loss": 3.5258, + "step": 17150 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004833134985785505, + "loss": 3.6473, + "step": 17155 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004832483710121448, + "loss": 3.5945, + "step": 17160 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004831832296664009, + "loss": 3.577, + "step": 17165 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048311807454621685, + "loss": 3.5094, + "step": 17170 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048305290565649195, + "loss": 3.489, + "step": 17175 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004829877230021268, + "loss": 3.524, + "step": 17180 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048292252658802264, + "loss": 3.5145, + "step": 17185 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048285731641908196, + "loss": 3.5293, + "step": 17190 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004827920925002082, + "loss": 3.4996, + "step": 17195 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048272685483630585, + "loss": 3.6237, + "step": 17200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048266160343228066, + "loss": 3.4632, + "step": 17205 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004825963382930388, + "loss": 3.5058, + "step": 17210 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004825310594234882, + "loss": 3.521, + "step": 17215 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048246576682853736, + "loss": 3.4941, + "step": 17220 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048240046051309593, + "loss": 3.6331, + "step": 17225 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004823351404820747, + "loss": 3.5374, + "step": 17230 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048226980674038534, + "loss": 3.5544, + "step": 17235 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048220445929294067, + "loss": 3.4389, + "step": 17240 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048213909814465443, + "loss": 3.5835, + "step": 17245 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048207372330044137, + "loss": 3.4002, + "step": 17250 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004820083347652176, + "loss": 3.5722, + "step": 17255 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004819429325438997, + "loss": 3.6024, + "step": 17260 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004818775166414057, + "loss": 3.6439, + "step": 17265 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004818120870626547, + "loss": 3.4983, + "step": 17270 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048174664381256643, + "loss": 3.4879, + "step": 17275 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048168118689606197, + "loss": 3.5338, + "step": 17280 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048161571631806337, + "loss": 3.4761, + "step": 17285 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004815502320834936, + "loss": 3.5412, + "step": 17290 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048148473419727694, + "loss": 3.5941, + "step": 17295 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004814192226643383, + "loss": 3.5358, + "step": 17300 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048135369748960395, + "loss": 3.459, + "step": 17305 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004812881586780008, + "loss": 3.4669, + "step": 17310 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004812226062344573, + "loss": 3.5167, + "step": 17315 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048115704016390265, + "loss": 3.6433, + "step": 17320 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004810914604712669, + "loss": 3.4723, + "step": 17325 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048102586716148145, + "loss": 3.6129, + "step": 17330 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048096026023947846, + "loss": 3.5888, + "step": 17335 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048089463971019133, + "loss": 3.5604, + "step": 17340 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004808290055785544, + "loss": 3.5614, + "step": 17345 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004807633578495029, + "loss": 3.5267, + "step": 17350 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004806976965279734, + "loss": 3.5437, + "step": 17355 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004806320216189031, + "loss": 3.577, + "step": 17360 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004805663331272306, + "loss": 3.5452, + "step": 17365 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048050063105789515, + "loss": 3.5119, + "step": 17370 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004804349154158373, + "loss": 3.5662, + "step": 17375 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004803691862059985, + "loss": 3.5817, + "step": 17380 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004803034434333214, + "loss": 3.4176, + "step": 17385 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004802376871027493, + "loss": 3.5071, + "step": 17390 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048017191721922683, + "loss": 3.4767, + "step": 17395 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048010613378769945, + "loss": 3.4762, + "step": 17400 + }, + { + "epoch": 0.3, + "learning_rate": 0.00048004033681311407, + "loss": 3.5571, + "step": 17405 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047997452630041783, + "loss": 3.4705, + "step": 17410 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047990870225455976, + "loss": 3.4789, + "step": 17415 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047984286468048923, + "loss": 3.5571, + "step": 17420 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047977701358315687, + "loss": 3.5161, + "step": 17425 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047971114896751455, + "loss": 3.6078, + "step": 17430 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047964527083851476, + "loss": 3.4624, + "step": 17435 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047957937920111126, + "loss": 3.4843, + "step": 17440 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047951347406025886, + "loss": 3.5239, + "step": 17445 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047944755542091307, + "loss": 3.5355, + "step": 17450 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047938162328803086, + "loss": 3.427, + "step": 17455 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004793156776665699, + "loss": 3.5715, + "step": 17460 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047924971856148896, + "loss": 3.6401, + "step": 17465 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004791837459777478, + "loss": 3.5878, + "step": 17470 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004791177599203072, + "loss": 3.4139, + "step": 17475 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047905176039412904, + "loss": 3.5894, + "step": 17480 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004789857474041762, + "loss": 3.5054, + "step": 17485 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004789197209554124, + "loss": 3.4073, + "step": 17490 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004788536810528025, + "loss": 3.546, + "step": 17495 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004787876277013124, + "loss": 3.4948, + "step": 17500 + }, + { + "epoch": 0.3, + "eval_loss": 3.5631024837493896, + "eval_runtime": 149.9514, + "eval_samples_per_second": 12.277, + "eval_steps_per_second": 0.774, + "step": 17500 + }, + { + "epoch": 0.3, + "learning_rate": 0.000478721560905909, + "loss": 3.5123, + "step": 17505 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004786554806715601, + "loss": 3.4436, + "step": 17510 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004785893870032347, + "loss": 3.551, + "step": 17515 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004785232799059026, + "loss": 3.5769, + "step": 17520 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004784571593845349, + "loss": 3.5486, + "step": 17525 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047839102544410325, + "loss": 3.6552, + "step": 17530 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047832487808958083, + "loss": 3.5692, + "step": 17535 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004782587173259414, + "loss": 3.4071, + "step": 17540 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047819254315816006, + "loss": 3.5011, + "step": 17545 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047812635559121256, + "loss": 3.5682, + "step": 17550 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047806015463007615, + "loss": 3.4182, + "step": 17555 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047799394027972866, + "loss": 3.5072, + "step": 17560 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004779277125451489, + "loss": 3.4896, + "step": 17565 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047786147143131716, + "loss": 3.3961, + "step": 17570 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047779521694321425, + "loss": 3.5795, + "step": 17575 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004777289490858222, + "loss": 3.4299, + "step": 17580 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047766266786412415, + "loss": 3.5548, + "step": 17585 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047759637328310386, + "loss": 3.4617, + "step": 17590 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047753006534774646, + "loss": 3.4899, + "step": 17595 + }, + { + "epoch": 0.3, + "learning_rate": 0.000477463744063038, + "loss": 3.5963, + "step": 17600 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047739740943396553, + "loss": 3.4351, + "step": 17605 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004773310614655169, + "loss": 3.5722, + "step": 17610 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047726470016268134, + "loss": 3.5019, + "step": 17615 + }, + { + "epoch": 0.3, + "learning_rate": 0.00047719832553044876, + "loss": 3.4795, + "step": 17620 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004771319375738103, + "loss": 3.4939, + "step": 17625 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004770655362977578, + "loss": 3.459, + "step": 17630 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004769991217072844, + "loss": 3.4321, + "step": 17635 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004769326938073843, + "loss": 3.588, + "step": 17640 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004768662526030523, + "loss": 3.6168, + "step": 17645 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004767997980992846, + "loss": 3.4792, + "step": 17650 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004767333303010781, + "loss": 3.4928, + "step": 17655 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004766668492134309, + "loss": 3.6081, + "step": 17660 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004766003548413421, + "loss": 3.5809, + "step": 17665 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047653384718981164, + "loss": 3.5654, + "step": 17670 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047646732626384066, + "loss": 3.4998, + "step": 17675 + }, + { + "epoch": 0.31, + "learning_rate": 0.000476400792068431, + "loss": 3.3947, + "step": 17680 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047633424460858585, + "loss": 3.6234, + "step": 17685 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004762676838893093, + "loss": 3.4851, + "step": 17690 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047620110991560623, + "loss": 3.4785, + "step": 17695 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047613452269248274, + "loss": 3.5703, + "step": 17700 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047606792222494566, + "loss": 3.5448, + "step": 17705 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004760013085180033, + "loss": 3.4537, + "step": 17710 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004759346815766646, + "loss": 3.515, + "step": 17715 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004758680414059393, + "loss": 3.5411, + "step": 17720 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004758013880108387, + "loss": 3.5154, + "step": 17725 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004757347213963747, + "loss": 3.4914, + "step": 17730 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004756680415675603, + "loss": 3.4521, + "step": 17735 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004756013485294094, + "loss": 3.5936, + "step": 17740 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004755346422869371, + "loss": 3.4493, + "step": 17745 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004754679228451592, + "loss": 3.5317, + "step": 17750 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047540119020909286, + "loss": 3.5307, + "step": 17755 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047533444438375587, + "loss": 3.5228, + "step": 17760 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004752676853741673, + "loss": 3.4958, + "step": 17765 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004752009131853469, + "loss": 3.4806, + "step": 17770 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004751341278223159, + "loss": 3.4653, + "step": 17775 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004750673292900959, + "loss": 3.5797, + "step": 17780 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047500051759371, + "loss": 3.5707, + "step": 17785 + }, + { + "epoch": 0.31, + "learning_rate": 0.000474933692738182, + "loss": 3.5677, + "step": 17790 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047486685472853687, + "loss": 3.5105, + "step": 17795 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004748000035698003, + "loss": 3.5625, + "step": 17800 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047473313926699954, + "loss": 3.5567, + "step": 17805 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047466626182516203, + "loss": 3.5687, + "step": 17810 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047459937124931685, + "loss": 3.5199, + "step": 17815 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004745324675444938, + "loss": 3.5174, + "step": 17820 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047446555071572354, + "loss": 3.4744, + "step": 17825 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004743986207680381, + "loss": 3.3464, + "step": 17830 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047433167770647, + "loss": 3.55, + "step": 17835 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004742647215360533, + "loss": 3.4027, + "step": 17840 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047419775226182264, + "loss": 3.5334, + "step": 17845 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004741307698888137, + "loss": 3.5083, + "step": 17850 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047406377442206325, + "loss": 3.628, + "step": 17855 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047399676586660914, + "loss": 3.6128, + "step": 17860 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004739297442274899, + "loss": 3.5811, + "step": 17865 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047386270950974525, + "loss": 3.5103, + "step": 17870 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004737956617184159, + "loss": 3.5447, + "step": 17875 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047372860085854345, + "loss": 3.6269, + "step": 17880 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004736615269351706, + "loss": 3.5275, + "step": 17885 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047359443995334083, + "loss": 3.4888, + "step": 17890 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047352733991809894, + "loss": 3.5246, + "step": 17895 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004734602268344903, + "loss": 3.4484, + "step": 17900 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004733931007075617, + "loss": 3.5838, + "step": 17905 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047332596154236046, + "loss": 3.5475, + "step": 17910 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047325880934393524, + "loss": 3.5593, + "step": 17915 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047319164411733545, + "loss": 3.6149, + "step": 17920 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047312446586761164, + "loss": 3.4426, + "step": 17925 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004730572745998153, + "loss": 3.524, + "step": 17930 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004729900703189988, + "loss": 3.5848, + "step": 17935 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047292285303021544, + "loss": 3.4916, + "step": 17940 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004728556227385199, + "loss": 3.5414, + "step": 17945 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004727883794489673, + "loss": 3.5673, + "step": 17950 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004727211231666141, + "loss": 3.5469, + "step": 17955 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004726538538965177, + "loss": 3.4563, + "step": 17960 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047258657164373624, + "loss": 3.512, + "step": 17965 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047251927641332915, + "loss": 3.481, + "step": 17970 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004724519682103566, + "loss": 3.5488, + "step": 17975 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004723846470398798, + "loss": 3.3891, + "step": 17980 + }, + { + "epoch": 0.31, + "learning_rate": 0.000472317312906961, + "loss": 3.533, + "step": 17985 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004722499658166634, + "loss": 3.6264, + "step": 17990 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047218260577405123, + "loss": 3.5065, + "step": 17995 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004721152327841894, + "loss": 3.5378, + "step": 18000 + }, + { + "epoch": 0.31, + "eval_loss": 3.560037136077881, + "eval_runtime": 149.8718, + "eval_samples_per_second": 12.284, + "eval_steps_per_second": 0.774, + "step": 18000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047204784685214425, + "loss": 3.637, + "step": 18005 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047198044798298273, + "loss": 3.5406, + "step": 18010 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004719130361817728, + "loss": 3.5347, + "step": 18015 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047184561145358376, + "loss": 3.5563, + "step": 18020 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004717781738034853, + "loss": 3.6075, + "step": 18025 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047171072323654847, + "loss": 3.411, + "step": 18030 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004716432597578454, + "loss": 3.505, + "step": 18035 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047157578337244873, + "loss": 3.6218, + "step": 18040 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004715082940854326, + "loss": 3.4998, + "step": 18045 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004714407919018716, + "loss": 3.5515, + "step": 18050 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004713732768268417, + "loss": 3.6125, + "step": 18055 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004713057488654197, + "loss": 3.5094, + "step": 18060 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004712382080226833, + "loss": 3.5357, + "step": 18065 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004711706543037112, + "loss": 3.5171, + "step": 18070 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004711030877135831, + "loss": 3.5248, + "step": 18075 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004710355082573798, + "loss": 3.3577, + "step": 18080 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047096791594018273, + "loss": 3.4937, + "step": 18085 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004709003107670747, + "loss": 3.5847, + "step": 18090 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047083269274313897, + "loss": 3.5041, + "step": 18095 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004707650618734604, + "loss": 3.518, + "step": 18100 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004706974181631243, + "loss": 3.3611, + "step": 18105 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004706297616172172, + "loss": 3.5533, + "step": 18110 + }, + { + "epoch": 0.31, + "learning_rate": 0.00047056209224082636, + "loss": 3.4176, + "step": 18115 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004704944100390404, + "loss": 3.5398, + "step": 18120 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004704267150169485, + "loss": 3.4949, + "step": 18125 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004703590071796411, + "loss": 3.5204, + "step": 18130 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004702912865322095, + "loss": 3.4492, + "step": 18135 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004702235530797458, + "loss": 3.6095, + "step": 18140 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004701558068273433, + "loss": 3.4533, + "step": 18145 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004700880477800962, + "loss": 3.4747, + "step": 18150 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004700202759430995, + "loss": 3.6068, + "step": 18155 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004699524913214495, + "loss": 3.5339, + "step": 18160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004698846939202431, + "loss": 3.5689, + "step": 18165 + }, + { + "epoch": 0.31, + "learning_rate": 0.00046981688374457835, + "loss": 3.4747, + "step": 18170 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004697490607995543, + "loss": 3.5512, + "step": 18175 + }, + { + "epoch": 0.31, + "learning_rate": 0.00046968122509027075, + "loss": 3.5927, + "step": 18180 + }, + { + "epoch": 0.31, + "learning_rate": 0.00046961337662182874, + "loss": 3.4706, + "step": 18185 + }, + { + "epoch": 0.31, + "learning_rate": 0.00046954551539933007, + "loss": 3.556, + "step": 18190 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004694776414278775, + "loss": 3.4009, + "step": 18195 + }, + { + "epoch": 0.31, + "learning_rate": 0.00046940975471257483, + "loss": 3.4902, + "step": 18200 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046934185525852683, + "loss": 3.4386, + "step": 18205 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004692739430708392, + "loss": 3.4121, + "step": 18210 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004692060181546186, + "loss": 3.5268, + "step": 18215 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046913808051497253, + "loss": 3.5333, + "step": 18220 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046907013015700977, + "loss": 3.5135, + "step": 18225 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004690021670858395, + "loss": 3.4469, + "step": 18230 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004689341913065725, + "loss": 3.4252, + "step": 18235 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004688662028243201, + "loss": 3.5202, + "step": 18240 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046879820164419454, + "loss": 3.596, + "step": 18245 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046873018777130945, + "loss": 3.5158, + "step": 18250 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004686621612107789, + "loss": 3.4576, + "step": 18255 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046859412196771817, + "loss": 3.4635, + "step": 18260 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046852607004724354, + "loss": 3.5205, + "step": 18265 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004684580054544721, + "loss": 3.5507, + "step": 18270 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004683899281945221, + "loss": 3.4769, + "step": 18275 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046832183827251244, + "loss": 3.4863, + "step": 18280 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046825373569356305, + "loss": 3.5763, + "step": 18285 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004681856204627951, + "loss": 3.4605, + "step": 18290 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004681174925853305, + "loss": 3.4173, + "step": 18295 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046804935206629187, + "loss": 3.4477, + "step": 18300 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046798119891080343, + "loss": 3.4775, + "step": 18305 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004679130331239895, + "loss": 3.5202, + "step": 18310 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004678448547109761, + "loss": 3.4684, + "step": 18315 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046777666367688996, + "loss": 3.4995, + "step": 18320 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004677084600268584, + "loss": 3.4431, + "step": 18325 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004676402437660102, + "loss": 3.5776, + "step": 18330 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004675720148994747, + "loss": 3.5402, + "step": 18335 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046750377343238256, + "loss": 3.5223, + "step": 18340 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046743551936986513, + "loss": 3.332, + "step": 18345 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004673672527170547, + "loss": 3.5664, + "step": 18350 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046729897347908467, + "loss": 3.5212, + "step": 18355 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004672306816610892, + "loss": 3.4357, + "step": 18360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004671623772682034, + "loss": 3.4875, + "step": 18365 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046709406030556364, + "loss": 3.6394, + "step": 18370 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046702573077830677, + "loss": 3.5538, + "step": 18375 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004669573886915711, + "loss": 3.4598, + "step": 18380 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004668890340504954, + "loss": 3.6014, + "step": 18385 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004668206668602196, + "loss": 3.5697, + "step": 18390 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004667522871258846, + "loss": 3.3325, + "step": 18395 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004666838948526321, + "loss": 3.5063, + "step": 18400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046661549004560515, + "loss": 3.5601, + "step": 18405 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004665470727099471, + "loss": 3.5294, + "step": 18410 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046647864285080274, + "loss": 3.4877, + "step": 18415 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046641020047331764, + "loss": 3.5173, + "step": 18420 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004663417455826383, + "loss": 3.5529, + "step": 18425 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004662732781839122, + "loss": 3.4485, + "step": 18430 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046620479828228767, + "loss": 3.6021, + "step": 18435 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046613630588291413, + "loss": 3.5351, + "step": 18440 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004660678009909419, + "loss": 3.5565, + "step": 18445 + }, + { + "epoch": 0.32, + "learning_rate": 0.000465999283611522, + "loss": 3.5563, + "step": 18450 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004659307537498068, + "loss": 3.4886, + "step": 18455 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046586221141094923, + "loss": 3.5287, + "step": 18460 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004657936566001034, + "loss": 3.3976, + "step": 18465 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046572508932242424, + "loss": 3.5678, + "step": 18470 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046565650958306775, + "loss": 3.5914, + "step": 18475 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004655879173871908, + "loss": 3.4357, + "step": 18480 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046551931273995087, + "loss": 3.5462, + "step": 18485 + }, + { + "epoch": 0.32, + "learning_rate": 0.000465450695646507, + "loss": 3.6432, + "step": 18490 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004653820661120188, + "loss": 3.509, + "step": 18495 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004653134241416467, + "loss": 3.3627, + "step": 18500 + }, + { + "epoch": 0.32, + "eval_loss": 3.539888858795166, + "eval_runtime": 149.9774, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 18500 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004652447697405524, + "loss": 3.4926, + "step": 18505 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046517610291389825, + "loss": 3.4943, + "step": 18510 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004651074236668477, + "loss": 3.5992, + "step": 18515 + }, + { + "epoch": 0.32, + "learning_rate": 0.000465038732004565, + "loss": 3.4454, + "step": 18520 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046497002793221545, + "loss": 3.4608, + "step": 18525 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004649013114549653, + "loss": 3.426, + "step": 18530 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004648325825779817, + "loss": 3.5191, + "step": 18535 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046476384130643254, + "loss": 3.6492, + "step": 18540 + }, + { + "epoch": 0.32, + "learning_rate": 0.000464695087645487, + "loss": 3.4617, + "step": 18545 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046462632160031484, + "loss": 3.506, + "step": 18550 + }, + { + "epoch": 0.32, + "learning_rate": 0.000464557543176087, + "loss": 3.5133, + "step": 18555 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004644887523779753, + "loss": 3.5006, + "step": 18560 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004644199492111524, + "loss": 3.2805, + "step": 18565 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046435113368079197, + "loss": 3.5573, + "step": 18570 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046428230579206853, + "loss": 3.5132, + "step": 18575 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004642134655501576, + "loss": 3.661, + "step": 18580 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004641446129602358, + "loss": 3.3854, + "step": 18585 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046407574802748017, + "loss": 3.5102, + "step": 18590 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046400687075706925, + "loss": 3.4589, + "step": 18595 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004639379811541821, + "loss": 3.4866, + "step": 18600 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046386907922399894, + "loss": 3.5285, + "step": 18605 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046380016497170096, + "loss": 3.5076, + "step": 18610 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004637312384024699, + "loss": 3.4932, + "step": 18615 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004636622995214889, + "loss": 3.4528, + "step": 18620 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004635933483339417, + "loss": 3.586, + "step": 18625 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004635243848450131, + "loss": 3.4847, + "step": 18630 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004634554090598888, + "loss": 3.525, + "step": 18635 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046338642098375544, + "loss": 3.4765, + "step": 18640 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004633174206218006, + "loss": 3.577, + "step": 18645 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004632484079792127, + "loss": 3.4143, + "step": 18650 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046317938306118117, + "loss": 3.5152, + "step": 18655 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004631103458728963, + "loss": 3.4873, + "step": 18660 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004630412964195493, + "loss": 3.4814, + "step": 18665 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046297223470633247, + "loss": 3.5824, + "step": 18670 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004629031607384388, + "loss": 3.4629, + "step": 18675 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046283407452106225, + "loss": 3.5677, + "step": 18680 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046276497605939786, + "loss": 3.4402, + "step": 18685 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004626958653586414, + "loss": 3.4675, + "step": 18690 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004626267424239896, + "loss": 3.4872, + "step": 18695 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004625576072606403, + "loss": 3.4781, + "step": 18700 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046248845987379204, + "loss": 3.5359, + "step": 18705 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046241930026864425, + "loss": 3.5084, + "step": 18710 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004623501284503976, + "loss": 3.4936, + "step": 18715 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004622809444242533, + "loss": 3.5277, + "step": 18720 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004622117481954135, + "loss": 3.5329, + "step": 18725 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046214253976908167, + "loss": 3.459, + "step": 18730 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046207331915046183, + "loss": 3.5331, + "step": 18735 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046200408634475894, + "loss": 3.4192, + "step": 18740 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046193484135717913, + "loss": 3.4791, + "step": 18745 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004618655841929291, + "loss": 3.4814, + "step": 18750 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004617963148572167, + "loss": 3.4538, + "step": 18755 + }, + { + "epoch": 0.32, + "learning_rate": 0.00046172703335525047, + "loss": 3.4978, + "step": 18760 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004616577396922403, + "loss": 3.5329, + "step": 18765 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004615884338733966, + "loss": 3.5177, + "step": 18770 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004615191159039308, + "loss": 3.5178, + "step": 18775 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004614497857890552, + "loss": 3.5395, + "step": 18780 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004613804435339832, + "loss": 3.5156, + "step": 18785 + }, + { + "epoch": 0.33, + "learning_rate": 0.00046131108914392884, + "loss": 3.4949, + "step": 18790 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004612417226241073, + "loss": 3.4067, + "step": 18795 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004611723439797346, + "loss": 3.4388, + "step": 18800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004611029532160276, + "loss": 3.5248, + "step": 18805 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004610335503382041, + "loss": 3.5521, + "step": 18810 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004609641353514829, + "loss": 3.5943, + "step": 18815 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004608947082610838, + "loss": 3.5184, + "step": 18820 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004608252690722271, + "loss": 3.4862, + "step": 18825 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004607558177901344, + "loss": 3.6086, + "step": 18830 + }, + { + "epoch": 0.33, + "learning_rate": 0.000460686354420028, + "loss": 3.4269, + "step": 18835 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004606168789671314, + "loss": 3.583, + "step": 18840 + }, + { + "epoch": 0.33, + "learning_rate": 0.00046054739143666857, + "loss": 3.4697, + "step": 18845 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004604778918338647, + "loss": 3.5615, + "step": 18850 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004604083801639457, + "loss": 3.5213, + "step": 18855 + }, + { + "epoch": 0.33, + "learning_rate": 0.00046033885643213866, + "loss": 3.443, + "step": 18860 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004602693206436714, + "loss": 3.4458, + "step": 18865 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004601997728037726, + "loss": 3.4955, + "step": 18870 + }, + { + "epoch": 0.33, + "learning_rate": 0.00046013021291767183, + "loss": 3.5146, + "step": 18875 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004600606409905997, + "loss": 3.5533, + "step": 18880 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045999105702778775, + "loss": 3.415, + "step": 18885 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004599214610344682, + "loss": 3.4769, + "step": 18890 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045985185301587435, + "loss": 3.4555, + "step": 18895 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004597822329772403, + "loss": 3.4481, + "step": 18900 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045971260092380137, + "loss": 3.391, + "step": 18905 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045964295686079325, + "loss": 3.3999, + "step": 18910 + }, + { + "epoch": 0.33, + "learning_rate": 0.000459573300793453, + "loss": 3.5864, + "step": 18915 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045950363272701824, + "loss": 3.5394, + "step": 18920 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045943395266672787, + "loss": 3.5128, + "step": 18925 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045936426061782135, + "loss": 3.5267, + "step": 18930 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045929455658553895, + "loss": 3.4345, + "step": 18935 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045922484057512247, + "loss": 3.6213, + "step": 18940 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045915511259181385, + "loss": 3.5013, + "step": 18945 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045908537264085657, + "loss": 3.561, + "step": 18950 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004590156207274945, + "loss": 3.5136, + "step": 18955 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045894585685697263, + "loss": 3.4877, + "step": 18960 + }, + { + "epoch": 0.33, + "learning_rate": 0.000458876081034537, + "loss": 3.4736, + "step": 18965 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004588062932654343, + "loss": 3.5516, + "step": 18970 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045873649355491217, + "loss": 3.4, + "step": 18975 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004586666819082192, + "loss": 3.5071, + "step": 18980 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004585968583306049, + "loss": 3.5604, + "step": 18985 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045852702282731976, + "loss": 3.5457, + "step": 18990 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004584571754036149, + "loss": 3.4559, + "step": 18995 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004583873160647425, + "loss": 3.6102, + "step": 19000 + }, + { + "epoch": 0.33, + "eval_loss": 3.5276401042938232, + "eval_runtime": 149.8717, + "eval_samples_per_second": 12.284, + "eval_steps_per_second": 0.774, + "step": 19000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004583174448159557, + "loss": 3.4795, + "step": 19005 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004582475616625084, + "loss": 3.4671, + "step": 19010 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045817766660965543, + "loss": 3.5596, + "step": 19015 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004581077596626526, + "loss": 3.6208, + "step": 19020 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004580378408267565, + "loss": 3.5096, + "step": 19025 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045796791010722487, + "loss": 3.4556, + "step": 19030 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045789796750931574, + "loss": 3.4603, + "step": 19035 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045782801303828873, + "loss": 3.4485, + "step": 19040 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004577580466994041, + "loss": 3.4624, + "step": 19045 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045768806849792276, + "loss": 3.4523, + "step": 19050 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045761807843910685, + "loss": 3.5471, + "step": 19055 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004575480765282192, + "loss": 3.3223, + "step": 19060 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045747806277052363, + "loss": 3.4825, + "step": 19065 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045740803717128484, + "loss": 3.3537, + "step": 19070 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004573379997357682, + "loss": 3.3989, + "step": 19075 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004572679504692405, + "loss": 3.5855, + "step": 19080 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045719788937696883, + "loss": 3.5294, + "step": 19085 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004571278164642215, + "loss": 3.4447, + "step": 19090 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004570577317362677, + "loss": 3.6258, + "step": 19095 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045698763519837725, + "loss": 3.4541, + "step": 19100 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045691752685582133, + "loss": 3.5693, + "step": 19105 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004568474067138716, + "loss": 3.4704, + "step": 19110 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045677727477780056, + "loss": 3.5682, + "step": 19115 + }, + { + "epoch": 0.33, + "learning_rate": 0.000456707131052882, + "loss": 3.4419, + "step": 19120 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045663697554439034, + "loss": 3.4335, + "step": 19125 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004565668082576008, + "loss": 3.5477, + "step": 19130 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045649662919778976, + "loss": 3.5055, + "step": 19135 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045642643837023406, + "loss": 3.6236, + "step": 19140 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004563562357802121, + "loss": 3.5487, + "step": 19145 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045628602143300243, + "loss": 3.4841, + "step": 19150 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004562157953338848, + "loss": 3.4994, + "step": 19155 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045614555748814005, + "loss": 3.4607, + "step": 19160 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045607530790104956, + "loss": 3.4797, + "step": 19165 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045600504657789594, + "loss": 3.5536, + "step": 19170 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045593477352396213, + "loss": 3.5152, + "step": 19175 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004558644887445326, + "loss": 3.5431, + "step": 19180 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004557941922448923, + "loss": 3.4514, + "step": 19185 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045572388403032724, + "loss": 3.3985, + "step": 19190 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004556535641061241, + "loss": 3.4395, + "step": 19195 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045558323247757063, + "loss": 3.4796, + "step": 19200 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045551288914995536, + "loss": 3.4244, + "step": 19205 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045544253412856806, + "loss": 3.4544, + "step": 19210 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045537216741869865, + "loss": 3.5251, + "step": 19215 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045530178902563847, + "loss": 3.5224, + "step": 19220 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004552313989546798, + "loss": 3.2957, + "step": 19225 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004551609972111154, + "loss": 3.4405, + "step": 19230 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004550905838002392, + "loss": 3.4844, + "step": 19235 + }, + { + "epoch": 0.33, + "learning_rate": 0.000455020158727346, + "loss": 3.549, + "step": 19240 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004549497219977312, + "loss": 3.5311, + "step": 19245 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004548792736166915, + "loss": 3.4819, + "step": 19250 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045480881358952416, + "loss": 3.492, + "step": 19255 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045473834192152744, + "loss": 3.4925, + "step": 19260 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045466785861800034, + "loss": 3.4899, + "step": 19265 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045459736368424304, + "loss": 3.5294, + "step": 19270 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004545268571255563, + "loss": 3.4631, + "step": 19275 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004544563389472417, + "loss": 3.436, + "step": 19280 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004543858091546021, + "loss": 3.4474, + "step": 19285 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045431526775294086, + "loss": 3.4655, + "step": 19290 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045424471474756227, + "loss": 3.4941, + "step": 19295 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045417415014377166, + "loss": 3.5932, + "step": 19300 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004541035739468751, + "loss": 3.3979, + "step": 19305 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045403298616217963, + "loss": 3.4772, + "step": 19310 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045396238679499293, + "loss": 3.5518, + "step": 19315 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004538917758506237, + "loss": 3.4183, + "step": 19320 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004538211533343817, + "loss": 3.5486, + "step": 19325 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004537505192515773, + "loss": 3.3749, + "step": 19330 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045367987360752174, + "loss": 3.4757, + "step": 19335 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045360921640752737, + "loss": 3.4974, + "step": 19340 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004535385476569071, + "loss": 3.4206, + "step": 19345 + }, + { + "epoch": 0.33, + "learning_rate": 0.000453467867360975, + "loss": 3.5497, + "step": 19350 + }, + { + "epoch": 0.33, + "learning_rate": 0.00045339717552504564, + "loss": 3.4373, + "step": 19355 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004533264721544349, + "loss": 3.4262, + "step": 19360 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004532557572544593, + "loss": 3.5594, + "step": 19365 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004531850308304361, + "loss": 3.4997, + "step": 19370 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045311429288768374, + "loss": 3.464, + "step": 19375 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045304354343152117, + "loss": 3.4962, + "step": 19380 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045297278246726844, + "loss": 3.5346, + "step": 19385 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045290201000024654, + "loss": 3.5029, + "step": 19390 + }, + { + "epoch": 0.34, + "learning_rate": 0.000452831226035777, + "loss": 3.5546, + "step": 19395 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045276043057918263, + "loss": 3.4901, + "step": 19400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004526896236357866, + "loss": 3.4695, + "step": 19405 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004526188052109135, + "loss": 3.5047, + "step": 19410 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045254797530988844, + "loss": 3.451, + "step": 19415 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004524771339380374, + "loss": 3.436, + "step": 19420 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045240628110068725, + "loss": 3.5381, + "step": 19425 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045233541680316593, + "loss": 3.4288, + "step": 19430 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004522645410508019, + "loss": 3.5495, + "step": 19435 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004521936538489248, + "loss": 3.4837, + "step": 19440 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045212275520286477, + "loss": 3.4782, + "step": 19445 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045205184511795335, + "loss": 3.4985, + "step": 19450 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004519809235995223, + "loss": 3.535, + "step": 19455 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004519099906529047, + "loss": 3.5755, + "step": 19460 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045183904628343444, + "loss": 3.5931, + "step": 19465 + }, + { + "epoch": 0.34, + "learning_rate": 0.000451768090496446, + "loss": 3.5447, + "step": 19470 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004516971232972749, + "loss": 3.435, + "step": 19475 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004516261446912576, + "loss": 3.43, + "step": 19480 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004515551546837313, + "loss": 3.4134, + "step": 19485 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004514841532800341, + "loss": 3.393, + "step": 19490 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045141314048550486, + "loss": 3.4855, + "step": 19495 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004513421163054834, + "loss": 3.4721, + "step": 19500 + }, + { + "epoch": 0.34, + "eval_loss": 3.5094563961029053, + "eval_runtime": 149.8708, + "eval_samples_per_second": 12.284, + "eval_steps_per_second": 0.774, + "step": 19500 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045127108074531045, + "loss": 3.4603, + "step": 19505 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045120003381032755, + "loss": 3.3118, + "step": 19510 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045112897550587705, + "loss": 3.4607, + "step": 19515 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004510579058373019, + "loss": 3.4662, + "step": 19520 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004509868248099465, + "loss": 3.5243, + "step": 19525 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004509157324291557, + "loss": 3.4312, + "step": 19530 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004508446287002752, + "loss": 3.3722, + "step": 19535 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004507735136286517, + "loss": 3.3688, + "step": 19540 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045070238721963264, + "loss": 3.3627, + "step": 19545 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004506312494785665, + "loss": 3.4587, + "step": 19550 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045056010041080236, + "loss": 3.3304, + "step": 19555 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004504889400216902, + "loss": 3.4772, + "step": 19560 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045041776831658103, + "loss": 3.4572, + "step": 19565 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004503465853008266, + "loss": 3.504, + "step": 19570 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004502753909797794, + "loss": 3.4871, + "step": 19575 + }, + { + "epoch": 0.34, + "learning_rate": 0.00045020418535879293, + "loss": 3.5595, + "step": 19580 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004501329684432215, + "loss": 3.5133, + "step": 19585 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004500617402384204, + "loss": 3.4042, + "step": 19590 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004499905007497454, + "loss": 3.6127, + "step": 19595 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044991924998255344, + "loss": 3.344, + "step": 19600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004498479879422022, + "loss": 3.4879, + "step": 19605 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004497767146340502, + "loss": 3.5619, + "step": 19610 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044970543006345686, + "loss": 3.4435, + "step": 19615 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044963413423578245, + "loss": 3.5145, + "step": 19620 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044956282715638796, + "loss": 3.5095, + "step": 19625 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044949150883063545, + "loss": 3.5551, + "step": 19630 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044942017926388755, + "loss": 3.5651, + "step": 19635 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044934883846150793, + "loss": 3.3975, + "step": 19640 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004492774864288611, + "loss": 3.5149, + "step": 19645 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044920612317131224, + "loss": 3.4926, + "step": 19650 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004491347486942277, + "loss": 3.3984, + "step": 19655 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044906336300297423, + "loss": 3.3962, + "step": 19660 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004489919661029198, + "loss": 3.4718, + "step": 19665 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044892055799943323, + "loss": 3.4969, + "step": 19670 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044884913869788377, + "loss": 3.3791, + "step": 19675 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004487777082036421, + "loss": 3.4044, + "step": 19680 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044870626652207903, + "loss": 3.4759, + "step": 19685 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004486348136585669, + "loss": 3.4839, + "step": 19690 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044856334961847867, + "loss": 3.5427, + "step": 19695 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044849187440718774, + "loss": 3.5028, + "step": 19700 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004484203880300689, + "loss": 3.5073, + "step": 19705 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044834889049249755, + "loss": 3.4769, + "step": 19710 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004482773817998499, + "loss": 3.5275, + "step": 19715 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004482058619575031, + "loss": 3.5522, + "step": 19720 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044813433097083493, + "loss": 3.5725, + "step": 19725 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004480627888452243, + "loss": 3.3296, + "step": 19730 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004479912355860508, + "loss": 3.575, + "step": 19735 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044791967119869473, + "loss": 3.485, + "step": 19740 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044784809568853754, + "loss": 3.5094, + "step": 19745 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004477765090609612, + "loss": 3.4432, + "step": 19750 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004477049113213488, + "loss": 3.4815, + "step": 19755 + }, + { + "epoch": 0.34, + "learning_rate": 0.000447633302475084, + "loss": 3.5307, + "step": 19760 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004475616825275514, + "loss": 3.479, + "step": 19765 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004474900514841366, + "loss": 3.4396, + "step": 19770 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004474184093502258, + "loss": 3.5377, + "step": 19775 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004473467561312061, + "loss": 3.5034, + "step": 19780 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044727509183246553, + "loss": 3.4338, + "step": 19785 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004472034164593927, + "loss": 3.492, + "step": 19790 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004471317300173776, + "loss": 3.478, + "step": 19795 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004470600325118102, + "loss": 3.5671, + "step": 19800 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044698832394808206, + "loss": 3.3984, + "step": 19805 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004469166043315853, + "loss": 3.5496, + "step": 19810 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004468448736677129, + "loss": 3.4328, + "step": 19815 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004467731319618585, + "loss": 3.4768, + "step": 19820 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004467013792194168, + "loss": 3.4958, + "step": 19825 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044662961544578313, + "loss": 3.4901, + "step": 19830 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044655784064635406, + "loss": 3.4205, + "step": 19835 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044648605482652625, + "loss": 3.5096, + "step": 19840 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004464142579916979, + "loss": 3.5586, + "step": 19845 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044634245014726773, + "loss": 3.5259, + "step": 19850 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044627063129863535, + "loss": 3.396, + "step": 19855 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004461988014512011, + "loss": 3.4005, + "step": 19860 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044612696061036613, + "loss": 3.4798, + "step": 19865 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044605510878153277, + "loss": 3.5662, + "step": 19870 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044598324597010366, + "loss": 3.4116, + "step": 19875 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004459113721814826, + "loss": 3.4119, + "step": 19880 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044583948742107416, + "loss": 3.4519, + "step": 19885 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044576759169428365, + "loss": 3.4095, + "step": 19890 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004456956850065173, + "loss": 3.5294, + "step": 19895 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004456237673631822, + "loss": 3.3993, + "step": 19900 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044555183876968595, + "loss": 3.4356, + "step": 19905 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004454798992314375, + "loss": 3.4959, + "step": 19910 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004454079487538462, + "loss": 3.5206, + "step": 19915 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044533598734232226, + "loss": 3.3217, + "step": 19920 + }, + { + "epoch": 0.34, + "learning_rate": 0.000445264015002277, + "loss": 3.486, + "step": 19925 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044519203173912214, + "loss": 3.5203, + "step": 19930 + }, + { + "epoch": 0.34, + "learning_rate": 0.00044512003755827075, + "loss": 3.4889, + "step": 19935 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004450480324651362, + "loss": 3.4719, + "step": 19940 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044497601646513294, + "loss": 3.4491, + "step": 19945 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044490398956367635, + "loss": 3.4391, + "step": 19950 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004448319517661823, + "loss": 3.3457, + "step": 19955 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044475990307806765, + "loss": 3.3828, + "step": 19960 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044468784350475024, + "loss": 3.4094, + "step": 19965 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004446157730516485, + "loss": 3.3578, + "step": 19970 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044454369172418183, + "loss": 3.4901, + "step": 19975 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004444715995277702, + "loss": 3.4578, + "step": 19980 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004443994964678347, + "loss": 3.5705, + "step": 19985 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004443273825497972, + "loss": 3.356, + "step": 19990 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044425525777908015, + "loss": 3.5245, + "step": 19995 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044418312216110703, + "loss": 3.506, + "step": 20000 + }, + { + "epoch": 0.35, + "eval_loss": 3.5002212524414062, + "eval_runtime": 149.672, + "eval_samples_per_second": 12.3, + "eval_steps_per_second": 0.775, + "step": 20000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044411097570130204, + "loss": 3.4232, + "step": 20005 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004440388184050902, + "loss": 3.4982, + "step": 20010 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044396665027789746, + "loss": 3.4513, + "step": 20015 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044389447132515037, + "loss": 3.3997, + "step": 20020 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004438222815522765, + "loss": 3.5083, + "step": 20025 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004437500809647042, + "loss": 3.4299, + "step": 20030 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004436778695678625, + "loss": 3.4987, + "step": 20035 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004436056473671813, + "loss": 3.5377, + "step": 20040 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004435334143680914, + "loss": 3.5088, + "step": 20045 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044346117057602425, + "loss": 3.4873, + "step": 20050 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004433889159964125, + "loss": 3.474, + "step": 20055 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044331665063468894, + "loss": 3.5821, + "step": 20060 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044324437449628785, + "loss": 3.5429, + "step": 20065 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044317208758664386, + "loss": 3.2625, + "step": 20070 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044309978991119263, + "loss": 3.3802, + "step": 20075 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004430274814753706, + "loss": 3.5245, + "step": 20080 + }, + { + "epoch": 0.35, + "learning_rate": 0.000442955162284615, + "loss": 3.5747, + "step": 20085 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004428828323443638, + "loss": 3.4744, + "step": 20090 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044281049166005585, + "loss": 3.5422, + "step": 20095 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004427381402371308, + "loss": 3.3606, + "step": 20100 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044266577808102915, + "loss": 3.4586, + "step": 20105 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004425934051971921, + "loss": 3.4529, + "step": 20110 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044252102159106187, + "loss": 3.4736, + "step": 20115 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044244862726808114, + "loss": 3.415, + "step": 20120 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004423762222336936, + "loss": 3.4178, + "step": 20125 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044230380649334396, + "loss": 3.4815, + "step": 20130 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004422313800524773, + "loss": 3.4722, + "step": 20135 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004421589429165399, + "loss": 3.5723, + "step": 20140 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044208649509097837, + "loss": 3.4964, + "step": 20145 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044201403658124055, + "loss": 3.3457, + "step": 20150 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044194156739277524, + "loss": 3.5067, + "step": 20155 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044186908753103137, + "loss": 3.4389, + "step": 20160 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004417965970014591, + "loss": 3.3161, + "step": 20165 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044172409580950947, + "loss": 3.5093, + "step": 20170 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004416515839606343, + "loss": 3.3348, + "step": 20175 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044157906146028586, + "loss": 3.4568, + "step": 20180 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044150652831391755, + "loss": 3.4137, + "step": 20185 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004414339845269835, + "loss": 3.3819, + "step": 20190 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044136143010493884, + "loss": 3.5321, + "step": 20195 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044128886505323897, + "loss": 3.458, + "step": 20200 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044121628937734053, + "loss": 3.4455, + "step": 20205 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004411437030827008, + "loss": 3.4663, + "step": 20210 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004410711061747781, + "loss": 3.5031, + "step": 20215 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044099849865903107, + "loss": 3.526, + "step": 20220 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004409258805409196, + "loss": 3.4007, + "step": 20225 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004408532518259041, + "loss": 3.4619, + "step": 20230 + }, + { + "epoch": 0.35, + "learning_rate": 0.000440780612519446, + "loss": 3.4583, + "step": 20235 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004407079626270072, + "loss": 3.4842, + "step": 20240 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004406353021540509, + "loss": 3.5053, + "step": 20245 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044056263110604045, + "loss": 3.5203, + "step": 20250 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044048994948844063, + "loss": 3.5216, + "step": 20255 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004404172573067166, + "loss": 3.4012, + "step": 20260 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004403445545663344, + "loss": 3.4549, + "step": 20265 + }, + { + "epoch": 0.35, + "learning_rate": 0.000440271841272761, + "loss": 3.3967, + "step": 20270 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044019911743146397, + "loss": 3.4056, + "step": 20275 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004401263830479118, + "loss": 3.5852, + "step": 20280 + }, + { + "epoch": 0.35, + "learning_rate": 0.00044005363812757377, + "loss": 3.5015, + "step": 20285 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043998088267591996, + "loss": 3.5004, + "step": 20290 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043990811669842114, + "loss": 3.4759, + "step": 20295 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043983534020054907, + "loss": 3.4747, + "step": 20300 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043976255318777587, + "loss": 3.4997, + "step": 20305 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004396897556655751, + "loss": 3.4021, + "step": 20310 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043961694763942053, + "loss": 3.5991, + "step": 20315 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043954412911478705, + "loss": 3.4963, + "step": 20320 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004394713000971501, + "loss": 3.3867, + "step": 20325 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004393984605919863, + "loss": 3.493, + "step": 20330 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004393256106047726, + "loss": 3.4838, + "step": 20335 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004392527501409871, + "loss": 3.4146, + "step": 20340 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004391798792061084, + "loss": 3.4319, + "step": 20345 + }, + { + "epoch": 0.35, + "learning_rate": 0.000439106997805616, + "loss": 3.3523, + "step": 20350 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004390341059449903, + "loss": 3.4308, + "step": 20355 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043896120362971246, + "loss": 3.3949, + "step": 20360 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004388882908652642, + "loss": 3.5024, + "step": 20365 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004388153676571283, + "loss": 3.4641, + "step": 20370 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043874243401078813, + "loss": 3.4261, + "step": 20375 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043866948993172793, + "loss": 3.4187, + "step": 20380 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043859653542543286, + "loss": 3.4059, + "step": 20385 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043852357049738855, + "loss": 3.381, + "step": 20390 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004384505951530817, + "loss": 3.5046, + "step": 20395 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004383776093979996, + "loss": 3.4859, + "step": 20400 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043830461323763047, + "loss": 3.5565, + "step": 20405 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004382316066774633, + "loss": 3.4298, + "step": 20410 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043815858972298763, + "loss": 3.4938, + "step": 20415 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043808556237969403, + "loss": 3.5072, + "step": 20420 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043801252465307396, + "loss": 3.5391, + "step": 20425 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004379394765486192, + "loss": 3.4818, + "step": 20430 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004378664180718228, + "loss": 3.4382, + "step": 20435 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004377933492281783, + "loss": 3.5275, + "step": 20440 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004377202700231801, + "loss": 3.4213, + "step": 20445 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043764718046232346, + "loss": 3.359, + "step": 20450 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004375740805511043, + "loss": 3.5426, + "step": 20455 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043750097029501925, + "loss": 3.4999, + "step": 20460 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004374278496995659, + "loss": 3.4504, + "step": 20465 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004373547187702426, + "loss": 3.4479, + "step": 20470 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043728157751254834, + "loss": 3.4309, + "step": 20475 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004372084259319831, + "loss": 3.468, + "step": 20480 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043713526403404746, + "loss": 3.4853, + "step": 20485 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004370620918242426, + "loss": 3.4808, + "step": 20490 + }, + { + "epoch": 0.35, + "learning_rate": 0.000436988909308071, + "loss": 3.4663, + "step": 20495 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004369157164910354, + "loss": 3.3601, + "step": 20500 + }, + { + "epoch": 0.35, + "eval_loss": 3.4890780448913574, + "eval_runtime": 149.7679, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.775, + "step": 20500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043684251337863957, + "loss": 3.503, + "step": 20505 + }, + { + "epoch": 0.35, + "learning_rate": 0.00043676929997638807, + "loss": 3.4588, + "step": 20510 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043669607628978616, + "loss": 3.5404, + "step": 20515 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043662284232433995, + "loss": 3.4366, + "step": 20520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004365495980855561, + "loss": 3.3995, + "step": 20525 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043647634357894225, + "loss": 3.3957, + "step": 20530 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004364030788100069, + "loss": 3.5176, + "step": 20535 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043632980378425894, + "loss": 3.5703, + "step": 20540 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043625651850720846, + "loss": 3.4949, + "step": 20545 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043618322298436614, + "loss": 3.4727, + "step": 20550 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004361099172212433, + "loss": 3.4264, + "step": 20555 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004360366012233523, + "loss": 3.4806, + "step": 20560 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043596327499620613, + "loss": 3.4705, + "step": 20565 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043588993854531835, + "loss": 3.4814, + "step": 20570 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004358165918762037, + "loss": 3.4198, + "step": 20575 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043574323499437736, + "loss": 3.3648, + "step": 20580 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004356698679053554, + "loss": 3.5215, + "step": 20585 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004355964906146547, + "loss": 3.4305, + "step": 20590 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004355231031277928, + "loss": 3.3691, + "step": 20595 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004354497054502882, + "loss": 3.4175, + "step": 20600 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043537629758765977, + "loss": 3.4657, + "step": 20605 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004353028795454275, + "loss": 3.5085, + "step": 20610 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043522945132911235, + "loss": 3.4169, + "step": 20615 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004351560129442354, + "loss": 3.4685, + "step": 20620 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043508256439631884, + "loss": 3.5605, + "step": 20625 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043500910569088577, + "loss": 3.4576, + "step": 20630 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004349356368334599, + "loss": 3.5073, + "step": 20635 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004348621578295657, + "loss": 3.456, + "step": 20640 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043478866868472833, + "loss": 3.3731, + "step": 20645 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004347151694044738, + "loss": 3.4255, + "step": 20650 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043464165999432903, + "loss": 3.4228, + "step": 20655 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004345681404598215, + "loss": 3.4786, + "step": 20660 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004344946108064794, + "loss": 3.4763, + "step": 20665 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004344210710398318, + "loss": 3.4635, + "step": 20670 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043434752116540854, + "loss": 3.4831, + "step": 20675 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004342739611887403, + "loss": 3.4503, + "step": 20680 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004342003911153583, + "loss": 3.4472, + "step": 20685 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043412681095079465, + "loss": 3.346, + "step": 20690 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043405322070058216, + "loss": 3.3185, + "step": 20695 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004339796203702545, + "loss": 3.5082, + "step": 20700 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043390600996534606, + "loss": 3.5576, + "step": 20705 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004338323894913919, + "loss": 3.4343, + "step": 20710 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043375875895392793, + "loss": 3.5085, + "step": 20715 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004336851183584908, + "loss": 3.3682, + "step": 20720 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004336114677106179, + "loss": 3.419, + "step": 20725 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004335378070158474, + "loss": 3.5034, + "step": 20730 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043346413627971803, + "loss": 3.4361, + "step": 20735 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043339045550776976, + "loss": 3.402, + "step": 20740 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043331676470554286, + "loss": 3.4727, + "step": 20745 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004332430638785784, + "loss": 3.5097, + "step": 20750 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004331693530324184, + "loss": 3.4251, + "step": 20755 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043309563217260564, + "loss": 3.4258, + "step": 20760 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004330219013046834, + "loss": 3.3033, + "step": 20765 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004329481604341959, + "loss": 3.4339, + "step": 20770 + }, + { + "epoch": 0.36, + "learning_rate": 0.000432874409566688, + "loss": 3.4677, + "step": 20775 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004328006487077056, + "loss": 3.3985, + "step": 20780 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043272687786279497, + "loss": 3.4579, + "step": 20785 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043265309703750327, + "loss": 3.3846, + "step": 20790 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004325793062373786, + "loss": 3.5574, + "step": 20795 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043250550546796953, + "loss": 3.4578, + "step": 20800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004324316947348255, + "loss": 3.4623, + "step": 20805 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004323578740434967, + "loss": 3.5179, + "step": 20810 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004322840433995342, + "loss": 3.459, + "step": 20815 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004322102028084896, + "loss": 3.478, + "step": 20820 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043213635227591516, + "loss": 3.3421, + "step": 20825 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004320624918073643, + "loss": 3.3589, + "step": 20830 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004319886214083908, + "loss": 3.4584, + "step": 20835 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004319147410845495, + "loss": 3.3751, + "step": 20840 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004318408508413957, + "loss": 3.6159, + "step": 20845 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043176695068448555, + "loss": 3.3952, + "step": 20850 + }, + { + "epoch": 0.36, + "learning_rate": 0.000431693040619376, + "loss": 3.5016, + "step": 20855 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004316191206516248, + "loss": 3.412, + "step": 20860 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004315451907867902, + "loss": 3.5053, + "step": 20865 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043147125103043147, + "loss": 3.4214, + "step": 20870 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043139730138810835, + "loss": 3.4585, + "step": 20875 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004313233418653816, + "loss": 3.3709, + "step": 20880 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043124937246781254, + "loss": 3.419, + "step": 20885 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043117539320096333, + "loss": 3.4385, + "step": 20890 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043110140407039686, + "loss": 3.4396, + "step": 20895 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004310274050816767, + "loss": 3.3851, + "step": 20900 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043095339624036705, + "loss": 3.4022, + "step": 20905 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004308793775520333, + "loss": 3.4697, + "step": 20910 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043080534902224094, + "loss": 3.3228, + "step": 20915 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043073131065655685, + "loss": 3.4832, + "step": 20920 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004306572624605481, + "loss": 3.4429, + "step": 20925 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004305832044397828, + "loss": 3.3656, + "step": 20930 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043050913659982984, + "loss": 3.4722, + "step": 20935 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043043505894625864, + "loss": 3.4771, + "step": 20940 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004303609714846394, + "loss": 3.4003, + "step": 20945 + }, + { + "epoch": 0.36, + "learning_rate": 0.00043028687422054317, + "loss": 3.5443, + "step": 20950 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004302127671595418, + "loss": 3.4675, + "step": 20955 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004301386503072077, + "loss": 3.4031, + "step": 20960 + }, + { + "epoch": 0.36, + "learning_rate": 0.000430064523669114, + "loss": 3.3901, + "step": 20965 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004299903872508346, + "loss": 3.4691, + "step": 20970 + }, + { + "epoch": 0.36, + "learning_rate": 0.00042991624105794435, + "loss": 3.4782, + "step": 20975 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004298420850960186, + "loss": 3.481, + "step": 20980 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004297679193706335, + "loss": 3.4205, + "step": 20985 + }, + { + "epoch": 0.36, + "learning_rate": 0.00042969374388736586, + "loss": 3.4272, + "step": 20990 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004296195586517933, + "loss": 3.4391, + "step": 20995 + }, + { + "epoch": 0.36, + "learning_rate": 0.00042954536366949435, + "loss": 3.5532, + "step": 21000 + }, + { + "epoch": 0.36, + "eval_loss": 3.4810240268707275, + "eval_runtime": 149.6804, + "eval_samples_per_second": 12.3, + "eval_steps_per_second": 0.775, + "step": 21000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004294711589460479, + "loss": 3.5336, + "step": 21005 + }, + { + "epoch": 0.36, + "learning_rate": 0.00042939694448703385, + "loss": 3.5589, + "step": 21010 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004293227202980327, + "loss": 3.3975, + "step": 21015 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004292484863846257, + "loss": 3.4718, + "step": 21020 + }, + { + "epoch": 0.36, + "learning_rate": 0.000429174242752395, + "loss": 3.4397, + "step": 21025 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004290999894069232, + "loss": 3.5044, + "step": 21030 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004290257263537938, + "loss": 3.4743, + "step": 21035 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004289514535985911, + "loss": 3.4612, + "step": 21040 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004288771711468999, + "loss": 3.5065, + "step": 21045 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004288028790043059, + "loss": 3.3358, + "step": 21050 + }, + { + "epoch": 0.36, + "learning_rate": 0.00042872857717639556, + "loss": 3.4931, + "step": 21055 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004286542656687559, + "loss": 3.4695, + "step": 21060 + }, + { + "epoch": 0.36, + "learning_rate": 0.00042857994448697485, + "loss": 3.3413, + "step": 21065 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004285056136366409, + "loss": 3.497, + "step": 21070 + }, + { + "epoch": 0.36, + "learning_rate": 0.00042843127312334345, + "loss": 3.4309, + "step": 21075 + }, + { + "epoch": 0.36, + "learning_rate": 0.00042835692295267237, + "loss": 3.5423, + "step": 21080 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004282825631302185, + "loss": 3.4508, + "step": 21085 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004282081936615734, + "loss": 3.4325, + "step": 21090 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004281338145523291, + "loss": 3.4188, + "step": 21095 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042805942580807863, + "loss": 3.3961, + "step": 21100 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042798502743441577, + "loss": 3.5294, + "step": 21105 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004279106194369346, + "loss": 3.5052, + "step": 21110 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004278362018212304, + "loss": 3.4474, + "step": 21115 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004277617745928991, + "loss": 3.4268, + "step": 21120 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042768733775753705, + "loss": 3.4271, + "step": 21125 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004276128913207415, + "loss": 3.319, + "step": 21130 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042753843528811055, + "loss": 3.4287, + "step": 21135 + }, + { + "epoch": 0.37, + "learning_rate": 0.000427463969665243, + "loss": 3.502, + "step": 21140 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004273894944577381, + "loss": 3.3916, + "step": 21145 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042731500967119613, + "loss": 3.4751, + "step": 21150 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004272405153112179, + "loss": 3.3869, + "step": 21155 + }, + { + "epoch": 0.37, + "learning_rate": 0.000427166011383405, + "loss": 3.3328, + "step": 21160 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004270914978933598, + "loss": 3.5021, + "step": 21165 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042701697484668515, + "loss": 3.5497, + "step": 21170 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004269424422489851, + "loss": 3.4791, + "step": 21175 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042686790010586386, + "loss": 3.4404, + "step": 21180 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004267933484229267, + "loss": 3.3707, + "step": 21185 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004267187872057797, + "loss": 3.4202, + "step": 21190 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042664421646002916, + "loss": 3.4687, + "step": 21195 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042656963619128267, + "loss": 3.4317, + "step": 21200 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042649504640514816, + "loss": 3.3562, + "step": 21205 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004264204471072345, + "loss": 3.5205, + "step": 21210 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042634583830315103, + "loss": 3.3985, + "step": 21215 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042627121999850806, + "loss": 3.4669, + "step": 21220 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004261965921989165, + "loss": 3.3633, + "step": 21225 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042612195490998795, + "loss": 3.4944, + "step": 21230 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004260473081373347, + "loss": 3.4313, + "step": 21235 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042597265188656995, + "loss": 3.3019, + "step": 21240 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004258979861633073, + "loss": 3.5405, + "step": 21245 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004258233109731613, + "loss": 3.4114, + "step": 21250 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004257486263217473, + "loss": 3.5809, + "step": 21255 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042567393221468094, + "loss": 3.4726, + "step": 21260 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042559922865757896, + "loss": 3.4404, + "step": 21265 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004255245156560586, + "loss": 3.4897, + "step": 21270 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042544979321573794, + "loss": 3.4024, + "step": 21275 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042537506134223595, + "loss": 3.4146, + "step": 21280 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004253003200411717, + "loss": 3.2768, + "step": 21285 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004252255693181657, + "loss": 3.3862, + "step": 21290 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004251508091788385, + "loss": 3.4472, + "step": 21295 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042507603962881193, + "loss": 3.3695, + "step": 21300 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042500126067370814, + "loss": 3.5154, + "step": 21305 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042492647231915016, + "loss": 3.466, + "step": 21310 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004248516745707618, + "loss": 3.5312, + "step": 21315 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004247768674341672, + "loss": 3.4309, + "step": 21320 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004247020509149918, + "loss": 3.3506, + "step": 21325 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042462722501886114, + "loss": 3.3954, + "step": 21330 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042455238975140186, + "loss": 3.4317, + "step": 21335 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004244775451182413, + "loss": 3.4081, + "step": 21340 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004244026911250072, + "loss": 3.4154, + "step": 21345 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004243278277773283, + "loss": 3.378, + "step": 21350 + }, + { + "epoch": 0.37, + "learning_rate": 0.000424252955080834, + "loss": 3.4763, + "step": 21355 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004241780730411541, + "loss": 3.3921, + "step": 21360 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004241031816639197, + "loss": 3.4287, + "step": 21365 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004240282809547619, + "loss": 3.4556, + "step": 21370 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042395337091931314, + "loss": 3.5046, + "step": 21375 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004238784515632061, + "loss": 3.3785, + "step": 21380 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004238035228920743, + "loss": 3.4726, + "step": 21385 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042372858491155203, + "loss": 3.5175, + "step": 21390 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004236536376272744, + "loss": 3.4565, + "step": 21395 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042357868104487683, + "loss": 3.4254, + "step": 21400 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042350371516999586, + "loss": 3.4177, + "step": 21405 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004234287400082684, + "loss": 3.4484, + "step": 21410 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004233537555653322, + "loss": 3.4054, + "step": 21415 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042327876184682594, + "loss": 3.3857, + "step": 21420 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004232037588583885, + "loss": 3.451, + "step": 21425 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004231287466056597, + "loss": 3.4559, + "step": 21430 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004230537250942803, + "loss": 3.4196, + "step": 21435 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004229786943298914, + "loss": 3.454, + "step": 21440 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004229036543181349, + "loss": 3.4613, + "step": 21445 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004228286050646535, + "loss": 3.4041, + "step": 21450 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042275354657509036, + "loss": 3.4881, + "step": 21455 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042267847885508976, + "loss": 3.5192, + "step": 21460 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042260340191029624, + "loss": 3.4635, + "step": 21465 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004225283157463553, + "loss": 3.4071, + "step": 21470 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004224532203689128, + "loss": 3.4137, + "step": 21475 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004223781157836158, + "loss": 3.3856, + "step": 21480 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004223030019961116, + "loss": 3.4598, + "step": 21485 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042222787901204854, + "loss": 3.4487, + "step": 21490 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004221527468370754, + "loss": 3.4663, + "step": 21495 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042207760547684155, + "loss": 3.4861, + "step": 21500 + }, + { + "epoch": 0.37, + "eval_loss": 3.4763381481170654, + "eval_runtime": 149.6778, + "eval_samples_per_second": 12.3, + "eval_steps_per_second": 0.775, + "step": 21500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004220024549369975, + "loss": 3.5112, + "step": 21505 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004219272952231942, + "loss": 3.4445, + "step": 21510 + }, + { + "epoch": 0.37, + "learning_rate": 0.000421852126341083, + "loss": 3.4347, + "step": 21515 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042177694829631653, + "loss": 3.4521, + "step": 21520 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042170176109454754, + "loss": 3.3605, + "step": 21525 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004216265647414298, + "loss": 3.3925, + "step": 21530 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042155135924261785, + "loss": 3.4833, + "step": 21535 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042147614460376646, + "loss": 3.435, + "step": 21540 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004214009208305317, + "loss": 3.4267, + "step": 21545 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004213256879285698, + "loss": 3.4272, + "step": 21550 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042125044590353796, + "loss": 3.3872, + "step": 21555 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042117519476109395, + "loss": 3.406, + "step": 21560 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004210999345068963, + "loss": 3.3582, + "step": 21565 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004210246651466043, + "loss": 3.4148, + "step": 21570 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042094938668587765, + "loss": 3.4063, + "step": 21575 + }, + { + "epoch": 0.37, + "learning_rate": 0.000420874099130377, + "loss": 3.472, + "step": 21580 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004207988024857636, + "loss": 3.5276, + "step": 21585 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042072349675769923, + "loss": 3.5215, + "step": 21590 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042064818195184663, + "loss": 3.5187, + "step": 21595 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004205728580738691, + "loss": 3.4223, + "step": 21600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004204975251294306, + "loss": 3.3929, + "step": 21605 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042042218312419576, + "loss": 3.5108, + "step": 21610 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042034683206382983, + "loss": 3.571, + "step": 21615 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004202714719539989, + "loss": 3.5817, + "step": 21620 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004201961028003697, + "loss": 3.4792, + "step": 21625 + }, + { + "epoch": 0.37, + "learning_rate": 0.00042012072460860956, + "loss": 3.4894, + "step": 21630 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004200453373843865, + "loss": 3.4205, + "step": 21635 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004199699411333693, + "loss": 3.5227, + "step": 21640 + }, + { + "epoch": 0.37, + "learning_rate": 0.00041989453586122744, + "loss": 3.3261, + "step": 21645 + }, + { + "epoch": 0.37, + "learning_rate": 0.00041981912157363087, + "loss": 3.4374, + "step": 21650 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004197436982762504, + "loss": 3.4541, + "step": 21655 + }, + { + "epoch": 0.37, + "learning_rate": 0.00041966826597475757, + "loss": 3.4462, + "step": 21660 + }, + { + "epoch": 0.37, + "learning_rate": 0.00041959282467482435, + "loss": 3.4077, + "step": 21665 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041951737438212367, + "loss": 3.3667, + "step": 21670 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004194419151023289, + "loss": 3.4438, + "step": 21675 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004193664468411143, + "loss": 3.4317, + "step": 21680 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041929096960415457, + "loss": 3.4413, + "step": 21685 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004192154833971254, + "loss": 3.507, + "step": 21690 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041913998822570267, + "loss": 3.5172, + "step": 21695 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004190644840955636, + "loss": 3.4648, + "step": 21700 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004189889710123854, + "loss": 3.4228, + "step": 21705 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041891344898184647, + "loss": 3.383, + "step": 21710 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041883791800962554, + "loss": 3.4611, + "step": 21715 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004187623781014021, + "loss": 3.4332, + "step": 21720 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041868682926285677, + "loss": 3.3851, + "step": 21725 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004186112714996699, + "loss": 3.4926, + "step": 21730 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041853570481752335, + "loss": 3.3628, + "step": 21735 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004184601292220993, + "loss": 3.4518, + "step": 21740 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041838454471908066, + "loss": 3.2995, + "step": 21745 + }, + { + "epoch": 0.38, + "learning_rate": 0.000418308951314151, + "loss": 3.4446, + "step": 21750 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041823334901299435, + "loss": 3.5188, + "step": 21755 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041815773782129587, + "loss": 3.4001, + "step": 21760 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004180821177447411, + "loss": 3.4094, + "step": 21765 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041800648878901627, + "loss": 3.3152, + "step": 21770 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041793085095980816, + "loss": 3.3421, + "step": 21775 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004178552042628045, + "loss": 3.4248, + "step": 21780 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041777954870369344, + "loss": 3.401, + "step": 21785 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041770388428816397, + "loss": 3.5127, + "step": 21790 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041762821102190554, + "loss": 3.4763, + "step": 21795 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004175525289106085, + "loss": 3.475, + "step": 21800 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041747683795996385, + "loss": 3.3599, + "step": 21805 + }, + { + "epoch": 0.38, + "learning_rate": 0.000417401138175663, + "loss": 3.4129, + "step": 21810 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041732542956339816, + "loss": 3.4717, + "step": 21815 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004172497121288623, + "loss": 3.4032, + "step": 21820 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041717398587774913, + "loss": 3.442, + "step": 21825 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041709825081575263, + "loss": 3.4066, + "step": 21830 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004170225069485677, + "loss": 3.4771, + "step": 21835 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041694675428189006, + "loss": 3.4156, + "step": 21840 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004168709928214159, + "loss": 3.3453, + "step": 21845 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041679522257284196, + "loss": 3.5143, + "step": 21850 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041671944354186585, + "loss": 3.5789, + "step": 21855 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041664365573418577, + "loss": 3.406, + "step": 21860 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041656785915550055, + "loss": 3.4958, + "step": 21865 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004164920538115098, + "loss": 3.4977, + "step": 21870 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004164162397079135, + "loss": 3.4383, + "step": 21875 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041634041685041266, + "loss": 3.5104, + "step": 21880 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004162645852447087, + "loss": 3.4549, + "step": 21885 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004161887448965039, + "loss": 3.4399, + "step": 21890 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004161128958115008, + "loss": 3.3542, + "step": 21895 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004160370379954029, + "loss": 3.4418, + "step": 21900 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041596117145391466, + "loss": 3.3958, + "step": 21905 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004158852961927405, + "loss": 3.3919, + "step": 21910 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004158094122175859, + "loss": 3.3159, + "step": 21915 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004157335195341571, + "loss": 3.5038, + "step": 21920 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004156576181481608, + "loss": 3.3401, + "step": 21925 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041558170806530435, + "loss": 3.4432, + "step": 21930 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004155057892912957, + "loss": 3.3199, + "step": 21935 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041542986183184367, + "loss": 3.5088, + "step": 21940 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041535392569265765, + "loss": 3.5437, + "step": 21945 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004152779808794476, + "loss": 3.3675, + "step": 21950 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004152020273979242, + "loss": 3.4963, + "step": 21955 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041512606525379866, + "loss": 3.422, + "step": 21960 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004150500944527831, + "loss": 3.3885, + "step": 21965 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041497411500059, + "loss": 3.4297, + "step": 21970 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041489812690293274, + "loss": 3.447, + "step": 21975 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041482213016552513, + "loss": 3.4361, + "step": 21980 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004147461247940818, + "loss": 3.423, + "step": 21985 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004146701107943179, + "loss": 3.4564, + "step": 21990 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041459408817194946, + "loss": 3.3803, + "step": 21995 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041451805693269283, + "loss": 3.4608, + "step": 22000 + }, + { + "epoch": 0.38, + "eval_loss": 3.4630024433135986, + "eval_runtime": 149.7666, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.775, + "step": 22000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004144420170822652, + "loss": 3.4355, + "step": 22005 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004143659686263844, + "loss": 3.4343, + "step": 22010 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004142899115707688, + "loss": 3.3277, + "step": 22015 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004142138459211377, + "loss": 3.4995, + "step": 22020 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004141377716832107, + "loss": 3.4184, + "step": 22025 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041406168886270816, + "loss": 3.3363, + "step": 22030 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041398559746535113, + "loss": 3.3641, + "step": 22035 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004139094974968614, + "loss": 3.4262, + "step": 22040 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004138333889629613, + "loss": 3.4048, + "step": 22045 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004137572718693737, + "loss": 3.4102, + "step": 22050 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041368114622182214, + "loss": 3.4041, + "step": 22055 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004136050120260311, + "loss": 3.4719, + "step": 22060 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004135288692877254, + "loss": 3.4398, + "step": 22065 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004134527180126305, + "loss": 3.3921, + "step": 22070 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004133765582064726, + "loss": 3.4075, + "step": 22075 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041330038987497856, + "loss": 3.4711, + "step": 22080 + }, + { + "epoch": 0.38, + "learning_rate": 0.000413224213023876, + "loss": 3.4251, + "step": 22085 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041314802765889276, + "loss": 3.3675, + "step": 22090 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004130718337857578, + "loss": 3.362, + "step": 22095 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004129956314102003, + "loss": 3.4152, + "step": 22100 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004129194205379506, + "loss": 3.4164, + "step": 22105 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041284320117473906, + "loss": 3.476, + "step": 22110 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041276697332629707, + "loss": 3.3858, + "step": 22115 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004126907369983566, + "loss": 3.3862, + "step": 22120 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004126144921966503, + "loss": 3.3574, + "step": 22125 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041253823892691135, + "loss": 3.358, + "step": 22130 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004124619771948735, + "loss": 3.4601, + "step": 22135 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041238570700627143, + "loss": 3.566, + "step": 22140 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041230942836684, + "loss": 3.4623, + "step": 22145 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041223314128231535, + "loss": 3.4965, + "step": 22150 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041215684575843345, + "loss": 3.3462, + "step": 22155 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041208054180093175, + "loss": 3.3736, + "step": 22160 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041200422941554753, + "loss": 3.503, + "step": 22165 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004119279086080193, + "loss": 3.5064, + "step": 22170 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041185157938408603, + "loss": 3.4897, + "step": 22175 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041177524174948715, + "loss": 3.4477, + "step": 22180 + }, + { + "epoch": 0.38, + "learning_rate": 0.000411698895709963, + "loss": 3.3709, + "step": 22185 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004116225412712544, + "loss": 3.4257, + "step": 22190 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004115461784391026, + "loss": 3.3655, + "step": 22195 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004114698072192499, + "loss": 3.4449, + "step": 22200 + }, + { + "epoch": 0.38, + "learning_rate": 0.000411393427617439, + "loss": 3.4398, + "step": 22205 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004113170396394133, + "loss": 3.1629, + "step": 22210 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004112406432909165, + "loss": 3.3966, + "step": 22215 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004111642385776935, + "loss": 3.3941, + "step": 22220 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004110878255054896, + "loss": 3.3217, + "step": 22225 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004110114040800504, + "loss": 3.3977, + "step": 22230 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041093497430712267, + "loss": 3.4394, + "step": 22235 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041085853619245323, + "loss": 3.4824, + "step": 22240 + }, + { + "epoch": 0.38, + "learning_rate": 0.00041078208974179017, + "loss": 3.4449, + "step": 22245 + }, + { + "epoch": 0.39, + "learning_rate": 0.00041070563496088165, + "loss": 3.4106, + "step": 22250 + }, + { + "epoch": 0.39, + "learning_rate": 0.00041062917185547666, + "loss": 3.4365, + "step": 22255 + }, + { + "epoch": 0.39, + "learning_rate": 0.00041055270043132493, + "loss": 3.4706, + "step": 22260 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004104762206941768, + "loss": 3.3919, + "step": 22265 + }, + { + "epoch": 0.39, + "learning_rate": 0.000410399732649783, + "loss": 3.4725, + "step": 22270 + }, + { + "epoch": 0.39, + "learning_rate": 0.00041032323630389516, + "loss": 3.5326, + "step": 22275 + }, + { + "epoch": 0.39, + "learning_rate": 0.00041024673166226524, + "loss": 3.2729, + "step": 22280 + }, + { + "epoch": 0.39, + "learning_rate": 0.00041017021873064613, + "loss": 3.4428, + "step": 22285 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004100936975147913, + "loss": 3.4666, + "step": 22290 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004100171680204545, + "loss": 3.4196, + "step": 22295 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004099406302533905, + "loss": 3.4659, + "step": 22300 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040986408421935454, + "loss": 3.4275, + "step": 22305 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040978752992410264, + "loss": 3.4052, + "step": 22310 + }, + { + "epoch": 0.39, + "learning_rate": 0.000409710967373391, + "loss": 3.5221, + "step": 22315 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004096343965729769, + "loss": 3.5078, + "step": 22320 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040955781752861805, + "loss": 3.4321, + "step": 22325 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040948123024607286, + "loss": 3.4621, + "step": 22330 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040940463473110013, + "loss": 3.3809, + "step": 22335 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004093280309894596, + "loss": 3.4517, + "step": 22340 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040925141902691144, + "loss": 3.4528, + "step": 22345 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040917479884921654, + "loss": 3.4547, + "step": 22350 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004090981704621362, + "loss": 3.4308, + "step": 22355 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004090215338714325, + "loss": 3.2692, + "step": 22360 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004089448890828683, + "loss": 3.4193, + "step": 22365 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040886823610220674, + "loss": 3.4402, + "step": 22370 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004087915749352117, + "loss": 3.4521, + "step": 22375 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040871490558764786, + "loss": 3.3807, + "step": 22380 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004086382280652802, + "loss": 3.4643, + "step": 22385 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004085615423738747, + "loss": 3.4981, + "step": 22390 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004084848485191974, + "loss": 3.5087, + "step": 22395 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040840814650701553, + "loss": 3.453, + "step": 22400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004083314363430967, + "loss": 3.3977, + "step": 22405 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040825471803320894, + "loss": 3.357, + "step": 22410 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004081779915831213, + "loss": 3.4247, + "step": 22415 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040810125699860297, + "loss": 3.3161, + "step": 22420 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040802451428542414, + "loss": 3.4269, + "step": 22425 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004079477634493556, + "loss": 3.4299, + "step": 22430 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040787100449616833, + "loss": 3.4994, + "step": 22435 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004077942374316344, + "loss": 3.3577, + "step": 22440 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004077174622615262, + "loss": 3.353, + "step": 22445 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004076406789916169, + "loss": 3.5383, + "step": 22450 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040756388762768023, + "loss": 3.4003, + "step": 22455 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004074870881754904, + "loss": 3.4799, + "step": 22460 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004074102806408225, + "loss": 3.3984, + "step": 22465 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004073334650294519, + "loss": 3.4481, + "step": 22470 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040725664134715485, + "loss": 3.4943, + "step": 22475 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040717980959970794, + "loss": 3.4733, + "step": 22480 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004071029697928887, + "loss": 3.4076, + "step": 22485 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004070261219324751, + "loss": 3.4849, + "step": 22490 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040694926602424553, + "loss": 3.4302, + "step": 22495 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004068724020739793, + "loss": 3.4945, + "step": 22500 + }, + { + "epoch": 0.39, + "eval_loss": 3.4543726444244385, + "eval_runtime": 149.6841, + "eval_samples_per_second": 12.299, + "eval_steps_per_second": 0.775, + "step": 22500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040679553008745613, + "loss": 3.5376, + "step": 22505 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040671865007045654, + "loss": 3.2163, + "step": 22510 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040664176202876123, + "loss": 3.4521, + "step": 22515 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040656486596815194, + "loss": 3.3826, + "step": 22520 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040648796189441094, + "loss": 3.3821, + "step": 22525 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040641104981332097, + "loss": 3.4799, + "step": 22530 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040633412973066534, + "loss": 3.3088, + "step": 22535 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004062572016522281, + "loss": 3.4578, + "step": 22540 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040618026558379396, + "loss": 3.4492, + "step": 22545 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040610332153114795, + "loss": 3.4271, + "step": 22550 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004060263695000759, + "loss": 3.3281, + "step": 22555 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040594940949636424, + "loss": 3.5284, + "step": 22560 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040587244152579996, + "loss": 3.4063, + "step": 22565 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040579546559417083, + "loss": 3.3343, + "step": 22570 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040571848170726463, + "loss": 3.4706, + "step": 22575 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004056414898708706, + "loss": 3.4401, + "step": 22580 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040556449009077777, + "loss": 3.4597, + "step": 22585 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040548748237277644, + "loss": 3.4925, + "step": 22590 + }, + { + "epoch": 0.39, + "learning_rate": 0.000405410466722657, + "loss": 3.2829, + "step": 22595 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004053334431462106, + "loss": 3.4492, + "step": 22600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004052564116492292, + "loss": 3.315, + "step": 22605 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004051793722375051, + "loss": 3.4348, + "step": 22610 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004051023249168312, + "loss": 3.4744, + "step": 22615 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004050252696930011, + "loss": 3.4817, + "step": 22620 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004049482065718089, + "loss": 3.4592, + "step": 22625 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004048711355590495, + "loss": 3.3313, + "step": 22630 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040479405666051817, + "loss": 3.4485, + "step": 22635 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040471696988201076, + "loss": 3.3691, + "step": 22640 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040463987522932395, + "loss": 3.4079, + "step": 22645 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004045627727082548, + "loss": 3.4411, + "step": 22650 + }, + { + "epoch": 0.39, + "learning_rate": 0.000404485662324601, + "loss": 3.4073, + "step": 22655 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004044085440841609, + "loss": 3.3304, + "step": 22660 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004043314179927332, + "loss": 3.3583, + "step": 22665 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004042542840561178, + "loss": 3.4128, + "step": 22670 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004041771422801144, + "loss": 3.4874, + "step": 22675 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040409999267052385, + "loss": 3.4342, + "step": 22680 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004040228352331474, + "loss": 3.4489, + "step": 22685 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040394566997378674, + "loss": 3.4437, + "step": 22690 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040386849689824444, + "loss": 3.3225, + "step": 22695 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004037913160123235, + "loss": 3.4923, + "step": 22700 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004037141273218275, + "loss": 3.5339, + "step": 22705 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004036369308325607, + "loss": 3.4442, + "step": 22710 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004035597265503278, + "loss": 3.2786, + "step": 22715 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004034825144809342, + "loss": 3.3097, + "step": 22720 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004034052946301858, + "loss": 3.3156, + "step": 22725 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004033280670038893, + "loss": 3.4123, + "step": 22730 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004032508316078517, + "loss": 3.3769, + "step": 22735 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040317358844788064, + "loss": 3.4664, + "step": 22740 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004030963375297845, + "loss": 3.4246, + "step": 22745 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040301907885937217, + "loss": 3.4092, + "step": 22750 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004029418124424531, + "loss": 3.4981, + "step": 22755 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040286453828483723, + "loss": 3.3302, + "step": 22760 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040278725639233525, + "loss": 3.3779, + "step": 22765 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004027099667707584, + "loss": 3.4032, + "step": 22770 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004026326694259185, + "loss": 3.4799, + "step": 22775 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040255536436362767, + "loss": 3.4285, + "step": 22780 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040247805158969915, + "loss": 3.4642, + "step": 22785 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040240073110994637, + "loss": 3.4589, + "step": 22790 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004023234029301833, + "loss": 3.4435, + "step": 22795 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004022460670562248, + "loss": 3.3843, + "step": 22800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004021687234938861, + "loss": 3.5109, + "step": 22805 + }, + { + "epoch": 0.39, + "learning_rate": 0.000402091372248983, + "loss": 3.3412, + "step": 22810 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040201401332733177, + "loss": 3.2672, + "step": 22815 + }, + { + "epoch": 0.39, + "learning_rate": 0.00040193664673474967, + "loss": 3.3403, + "step": 22820 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004018592724770541, + "loss": 3.4237, + "step": 22825 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040178189056006334, + "loss": 3.2901, + "step": 22830 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004017045009895959, + "loss": 3.5062, + "step": 22835 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040162710377147134, + "loss": 3.45, + "step": 22840 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004015496989115093, + "loss": 3.435, + "step": 22845 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004014722864155304, + "loss": 3.4186, + "step": 22850 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004013948662893557, + "loss": 3.3804, + "step": 22855 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004013174385388065, + "loss": 3.4171, + "step": 22860 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004012400031697053, + "loss": 3.3592, + "step": 22865 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040116256018787465, + "loss": 3.4479, + "step": 22870 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040108510959913795, + "loss": 3.283, + "step": 22875 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040100765140931905, + "loss": 3.5205, + "step": 22880 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040093018562424235, + "loss": 3.3975, + "step": 22885 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040085271224973303, + "loss": 3.4526, + "step": 22890 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004007752312916167, + "loss": 3.4228, + "step": 22895 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040069774275571933, + "loss": 3.4961, + "step": 22900 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040062024664786773, + "loss": 3.2809, + "step": 22905 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004005427429738893, + "loss": 3.3984, + "step": 22910 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040046523173961195, + "loss": 3.3236, + "step": 22915 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040038771295086397, + "loss": 3.4506, + "step": 22920 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004003101866134744, + "loss": 3.4803, + "step": 22925 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040023265273327296, + "loss": 3.4333, + "step": 22930 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004001551113160897, + "loss": 3.4086, + "step": 22935 + }, + { + "epoch": 0.4, + "learning_rate": 0.00040007756236775543, + "loss": 3.329, + "step": 22940 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004000000058941012, + "loss": 3.4441, + "step": 22945 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003999224419009591, + "loss": 3.4387, + "step": 22950 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039984487039416153, + "loss": 3.3819, + "step": 22955 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003997672913795413, + "loss": 3.3857, + "step": 22960 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039968970486293213, + "loss": 3.4039, + "step": 22965 + }, + { + "epoch": 0.4, + "learning_rate": 0.000399612110850168, + "loss": 3.4457, + "step": 22970 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039953450934708366, + "loss": 3.4657, + "step": 22975 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039945690035951434, + "loss": 3.3894, + "step": 22980 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039937928389329575, + "loss": 3.5035, + "step": 22985 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003993016599542644, + "loss": 3.3818, + "step": 22990 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003992240285482571, + "loss": 3.3472, + "step": 22995 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003991463896811114, + "loss": 3.3529, + "step": 23000 + }, + { + "epoch": 0.4, + "eval_loss": 3.442578077316284, + "eval_runtime": 149.8744, + "eval_samples_per_second": 12.284, + "eval_steps_per_second": 0.774, + "step": 23000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039906874335866535, + "loss": 3.3731, + "step": 23005 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003989910895867574, + "loss": 3.4402, + "step": 23010 + }, + { + "epoch": 0.4, + "learning_rate": 0.000398913428371227, + "loss": 3.4643, + "step": 23015 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003988357597179135, + "loss": 3.501, + "step": 23020 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003987580836326575, + "loss": 3.4019, + "step": 23025 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003986804001212998, + "loss": 3.351, + "step": 23030 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003986027091896817, + "loss": 3.313, + "step": 23035 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003985250108436452, + "loss": 3.4688, + "step": 23040 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003984473050890327, + "loss": 3.3642, + "step": 23045 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003983695919316875, + "loss": 3.3882, + "step": 23050 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039829187137745316, + "loss": 3.3845, + "step": 23055 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003982141434321738, + "loss": 3.3129, + "step": 23060 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039813640810169415, + "loss": 3.2984, + "step": 23065 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003980586653918596, + "loss": 3.4537, + "step": 23070 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039798091530851586, + "loss": 3.4673, + "step": 23075 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003979031578575096, + "loss": 3.4474, + "step": 23080 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003978253930446875, + "loss": 3.4899, + "step": 23085 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039774762087589713, + "loss": 3.4094, + "step": 23090 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003976698413569868, + "loss": 3.4574, + "step": 23095 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039759205449380476, + "loss": 3.4646, + "step": 23100 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003975142602922005, + "loss": 3.4299, + "step": 23105 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003974364587580236, + "loss": 3.4025, + "step": 23110 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003973586498971243, + "loss": 3.4642, + "step": 23115 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039728083371535355, + "loss": 3.3419, + "step": 23120 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003972030102185626, + "loss": 3.4246, + "step": 23125 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039712517941260344, + "loss": 3.4522, + "step": 23130 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003970473413033286, + "loss": 3.331, + "step": 23135 + }, + { + "epoch": 0.4, + "learning_rate": 0.000396969495896591, + "loss": 3.3998, + "step": 23140 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039689164319824436, + "loss": 3.3278, + "step": 23145 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003968137832141426, + "loss": 3.4126, + "step": 23150 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003967359159501406, + "loss": 3.4785, + "step": 23155 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039665804141209346, + "loss": 3.3449, + "step": 23160 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003965801596058569, + "loss": 3.2645, + "step": 23165 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039650227053728736, + "loss": 3.4129, + "step": 23170 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003964243742122417, + "loss": 3.4324, + "step": 23175 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039634647063657724, + "loss": 3.3419, + "step": 23180 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039626855981615184, + "loss": 3.4273, + "step": 23185 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039619064175682416, + "loss": 3.3218, + "step": 23190 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003961127164644533, + "loss": 3.5256, + "step": 23195 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003960347839448986, + "loss": 3.4073, + "step": 23200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003959568442040203, + "loss": 3.4519, + "step": 23205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003958788972476792, + "loss": 3.4716, + "step": 23210 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003958009430817364, + "loss": 3.3856, + "step": 23215 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003957229817120536, + "loss": 3.3233, + "step": 23220 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039564501314449317, + "loss": 3.4366, + "step": 23225 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039556703738491785, + "loss": 3.3467, + "step": 23230 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039548905443919117, + "loss": 3.324, + "step": 23235 + }, + { + "epoch": 0.4, + "learning_rate": 0.000395411064313177, + "loss": 3.3125, + "step": 23240 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003953330670127397, + "loss": 3.3889, + "step": 23245 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003952550625437443, + "loss": 3.4227, + "step": 23250 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003951770509120565, + "loss": 3.454, + "step": 23255 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039509903212354215, + "loss": 3.4451, + "step": 23260 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039502100618406797, + "loss": 3.3362, + "step": 23265 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003949429730995011, + "loss": 3.4359, + "step": 23270 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003948649328757093, + "loss": 3.3884, + "step": 23275 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039478688551856055, + "loss": 3.4077, + "step": 23280 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039470883103392393, + "loss": 3.3771, + "step": 23285 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039463076942766853, + "loss": 3.4804, + "step": 23290 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003945527007056644, + "loss": 3.3656, + "step": 23295 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003944746248737816, + "loss": 3.4062, + "step": 23300 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003943965419378911, + "loss": 3.3631, + "step": 23305 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039431845190386454, + "loss": 3.4399, + "step": 23310 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003942403547775738, + "loss": 3.4739, + "step": 23315 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003941622505648913, + "loss": 3.3574, + "step": 23320 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039408413927169016, + "loss": 3.463, + "step": 23325 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039400602090384384, + "loss": 3.3802, + "step": 23330 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003939278954672267, + "loss": 3.4331, + "step": 23335 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039384976296771303, + "loss": 3.5204, + "step": 23340 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039377162341117816, + "loss": 3.4733, + "step": 23345 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039369347680349786, + "loss": 3.3425, + "step": 23350 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039361532315054825, + "loss": 3.4375, + "step": 23355 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039353716245820613, + "loss": 3.4392, + "step": 23360 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003934589947323487, + "loss": 3.3395, + "step": 23365 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039338081997885384, + "loss": 3.3579, + "step": 23370 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003933026382036, + "loss": 3.4948, + "step": 23375 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003932244494124658, + "loss": 3.4524, + "step": 23380 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039314625361133086, + "loss": 3.4354, + "step": 23385 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039306805080607495, + "loss": 3.3287, + "step": 23390 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003929898410025786, + "loss": 3.3441, + "step": 23395 + }, + { + "epoch": 0.4, + "learning_rate": 0.00039291162420672284, + "loss": 3.3456, + "step": 23400 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039283340042438905, + "loss": 3.3949, + "step": 23405 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003927551696614593, + "loss": 3.3505, + "step": 23410 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003926769319238162, + "loss": 3.4162, + "step": 23415 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039259868721734274, + "loss": 3.3707, + "step": 23420 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003925204355479226, + "loss": 3.3314, + "step": 23425 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039244217692143975, + "loss": 3.4314, + "step": 23430 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003923639113437791, + "loss": 3.4921, + "step": 23435 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039228563882082564, + "loss": 3.3425, + "step": 23440 + }, + { + "epoch": 0.41, + "learning_rate": 0.000392207359358465, + "loss": 3.3939, + "step": 23445 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039212907296258363, + "loss": 3.391, + "step": 23450 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039205077963906806, + "loss": 3.3738, + "step": 23455 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003919724793938056, + "loss": 3.5164, + "step": 23460 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039189417223268406, + "loss": 3.3694, + "step": 23465 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039181585816159176, + "loss": 3.354, + "step": 23470 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003917375371864174, + "loss": 3.4643, + "step": 23475 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003916592093130505, + "loss": 3.3646, + "step": 23480 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039158087454738063, + "loss": 3.38, + "step": 23485 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003915025328952984, + "loss": 3.5276, + "step": 23490 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039142418436269473, + "loss": 3.5326, + "step": 23495 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039134582895546094, + "loss": 3.4432, + "step": 23500 + }, + { + "epoch": 0.41, + "eval_loss": 3.4460275173187256, + "eval_runtime": 149.7826, + "eval_samples_per_second": 12.291, + "eval_steps_per_second": 0.774, + "step": 23500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003912674666794889, + "loss": 3.5093, + "step": 23505 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039118909754067097, + "loss": 3.4232, + "step": 23510 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003911107215449005, + "loss": 3.3742, + "step": 23515 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039103233869807055, + "loss": 3.5677, + "step": 23520 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039095394900607537, + "loss": 3.412, + "step": 23525 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003908755524748092, + "loss": 3.4859, + "step": 23530 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039079714911016727, + "loss": 3.3516, + "step": 23535 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003907187389180451, + "loss": 3.4358, + "step": 23540 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003906403219043386, + "loss": 3.4573, + "step": 23545 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003905618980749445, + "loss": 3.4623, + "step": 23550 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039048346743575976, + "loss": 3.5096, + "step": 23555 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039040502999268203, + "loss": 3.3192, + "step": 23560 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003903265857516094, + "loss": 3.4335, + "step": 23565 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039024813471844034, + "loss": 3.4312, + "step": 23570 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039016967689907406, + "loss": 3.3351, + "step": 23575 + }, + { + "epoch": 0.41, + "learning_rate": 0.00039009121229941027, + "loss": 3.4376, + "step": 23580 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003900127409253489, + "loss": 3.378, + "step": 23585 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003899342627827909, + "loss": 3.4918, + "step": 23590 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038985577787763713, + "loss": 3.5062, + "step": 23595 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003897772862157893, + "loss": 3.3182, + "step": 23600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038969878780314976, + "loss": 3.4342, + "step": 23605 + }, + { + "epoch": 0.41, + "learning_rate": 0.000389620282645621, + "loss": 3.3611, + "step": 23610 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003895417707491064, + "loss": 3.2832, + "step": 23615 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038946325211950945, + "loss": 3.4133, + "step": 23620 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003893847267627344, + "loss": 3.3847, + "step": 23625 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003893061946846861, + "loss": 3.2987, + "step": 23630 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038922765589126954, + "loss": 3.3355, + "step": 23635 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038914911038839063, + "loss": 3.2932, + "step": 23640 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003890705581819554, + "loss": 3.4283, + "step": 23645 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038899199927787067, + "loss": 3.3789, + "step": 23650 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038891343368204374, + "loss": 3.4124, + "step": 23655 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003888348614003823, + "loss": 3.388, + "step": 23660 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003887562824387945, + "loss": 3.4347, + "step": 23665 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038867769680318913, + "loss": 3.4287, + "step": 23670 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003885991044994755, + "loss": 3.4319, + "step": 23675 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003885205055335632, + "loss": 3.3868, + "step": 23680 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003884418999113625, + "loss": 3.4081, + "step": 23685 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038836328763878423, + "loss": 3.5162, + "step": 23690 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003882846687217396, + "loss": 3.439, + "step": 23695 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038820604316614036, + "loss": 3.4161, + "step": 23700 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003881274109778987, + "loss": 3.408, + "step": 23705 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003880487721629273, + "loss": 3.3882, + "step": 23710 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003879701267271396, + "loss": 3.2077, + "step": 23715 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038789147467644915, + "loss": 3.346, + "step": 23720 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038781281601677023, + "loss": 3.4345, + "step": 23725 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003877341507540176, + "loss": 3.2781, + "step": 23730 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038765547889410643, + "loss": 3.3699, + "step": 23735 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003875768004429525, + "loss": 3.4467, + "step": 23740 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038749811540647204, + "loss": 3.3993, + "step": 23745 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038741942379058163, + "loss": 3.3744, + "step": 23750 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038734072560119866, + "loss": 3.4018, + "step": 23755 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038726202084424076, + "loss": 3.412, + "step": 23760 + }, + { + "epoch": 0.41, + "learning_rate": 0.000387183309525626, + "loss": 3.4137, + "step": 23765 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003871045916512732, + "loss": 3.4427, + "step": 23770 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038702586722710153, + "loss": 3.4318, + "step": 23775 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003869471362590306, + "loss": 3.4418, + "step": 23780 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038686839875298073, + "loss": 3.3053, + "step": 23785 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003867896547148724, + "loss": 3.4195, + "step": 23790 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038671090415062683, + "loss": 3.343, + "step": 23795 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038663214706616565, + "loss": 3.4823, + "step": 23800 + }, + { + "epoch": 0.41, + "learning_rate": 0.000386553383467411, + "loss": 3.2957, + "step": 23805 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003864746133602855, + "loss": 3.3719, + "step": 23810 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003863958367507122, + "loss": 3.3919, + "step": 23815 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038631705364461485, + "loss": 3.3362, + "step": 23820 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038623826404791733, + "loss": 3.4481, + "step": 23825 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003861594679665443, + "loss": 3.4172, + "step": 23830 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038608066540642095, + "loss": 3.3761, + "step": 23835 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003860018563734727, + "loss": 3.4139, + "step": 23840 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003859230408736256, + "loss": 3.4002, + "step": 23845 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038584421891280606, + "loss": 3.3431, + "step": 23850 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003857653904969413, + "loss": 3.4767, + "step": 23855 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038568655563195873, + "loss": 3.3485, + "step": 23860 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003856077143237863, + "loss": 3.4446, + "step": 23865 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003855288665783525, + "loss": 3.4077, + "step": 23870 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003854500124015861, + "loss": 3.4456, + "step": 23875 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038537115179941686, + "loss": 3.4435, + "step": 23880 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003852922847777745, + "loss": 3.3529, + "step": 23885 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038521341134258947, + "loss": 3.3197, + "step": 23890 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003851345314997926, + "loss": 3.3096, + "step": 23895 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003850556452553152, + "loss": 3.4025, + "step": 23900 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003849767526150892, + "loss": 3.3826, + "step": 23905 + }, + { + "epoch": 0.41, + "learning_rate": 0.000384897853585047, + "loss": 3.3757, + "step": 23910 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038481894817112114, + "loss": 3.4822, + "step": 23915 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038474003637924516, + "loss": 3.3612, + "step": 23920 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003846611182153528, + "loss": 3.4059, + "step": 23925 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038458219368537813, + "loss": 3.2958, + "step": 23930 + }, + { + "epoch": 0.41, + "learning_rate": 0.000384503262795256, + "loss": 3.3191, + "step": 23935 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038442432555092163, + "loss": 3.4621, + "step": 23940 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038434538195831054, + "loss": 3.3922, + "step": 23945 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038426643202335906, + "loss": 3.3505, + "step": 23950 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038418747575200374, + "loss": 3.4525, + "step": 23955 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003841085131501818, + "loss": 3.4467, + "step": 23960 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003840295442238306, + "loss": 3.4365, + "step": 23965 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038395056897888824, + "loss": 3.471, + "step": 23970 + }, + { + "epoch": 0.41, + "learning_rate": 0.00038387158742129345, + "loss": 3.3748, + "step": 23975 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038379259955698516, + "loss": 3.4801, + "step": 23980 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003837136053919028, + "loss": 3.3963, + "step": 23985 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003836346049319862, + "loss": 3.4244, + "step": 23990 + }, + { + "epoch": 0.42, + "learning_rate": 0.000383555598183176, + "loss": 3.469, + "step": 23995 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003834765851514131, + "loss": 3.4334, + "step": 24000 + }, + { + "epoch": 0.42, + "eval_loss": 3.4270379543304443, + "eval_runtime": 150.6758, + "eval_samples_per_second": 12.218, + "eval_steps_per_second": 0.77, + "step": 24000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038339756584263875, + "loss": 3.3244, + "step": 24005 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038331854026279485, + "loss": 3.3813, + "step": 24010 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038323950841782375, + "loss": 3.3714, + "step": 24015 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003831604703136683, + "loss": 3.3798, + "step": 24020 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003830814259562715, + "loss": 3.3194, + "step": 24025 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003830023753515773, + "loss": 3.4367, + "step": 24030 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038292331850552986, + "loss": 3.4029, + "step": 24035 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038284425542407395, + "loss": 3.4108, + "step": 24040 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003827651861131545, + "loss": 3.3577, + "step": 24045 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038268611057871727, + "loss": 3.4693, + "step": 24050 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003826070288267082, + "loss": 3.3681, + "step": 24055 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038252794086307405, + "loss": 3.4132, + "step": 24060 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003824488466937616, + "loss": 3.4845, + "step": 24065 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003823697463247184, + "loss": 3.4444, + "step": 24070 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038229063976189247, + "loss": 3.4076, + "step": 24075 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038221152701123216, + "loss": 3.3896, + "step": 24080 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038213240807868633, + "loss": 3.3713, + "step": 24085 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003820532829702043, + "loss": 3.3014, + "step": 24090 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003819741516917359, + "loss": 3.4203, + "step": 24095 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038189501424923156, + "loss": 3.3145, + "step": 24100 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003818158706486417, + "loss": 3.36, + "step": 24105 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003817367208959177, + "loss": 3.3946, + "step": 24110 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003816575649970112, + "loss": 3.3836, + "step": 24115 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003815784029578743, + "loss": 3.4324, + "step": 24120 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003814992347844595, + "loss": 3.4453, + "step": 24125 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003814200604827201, + "loss": 3.4197, + "step": 24130 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038134088005860924, + "loss": 3.4852, + "step": 24135 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038126169351808127, + "loss": 3.408, + "step": 24140 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003811825008670902, + "loss": 3.4374, + "step": 24145 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003811033021115912, + "loss": 3.4174, + "step": 24150 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038102409725753947, + "loss": 3.4054, + "step": 24155 + }, + { + "epoch": 0.42, + "learning_rate": 0.000380944886310891, + "loss": 3.3431, + "step": 24160 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038086566927760185, + "loss": 3.5198, + "step": 24165 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038078644616362876, + "loss": 3.3664, + "step": 24170 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038070721697492895, + "loss": 3.3576, + "step": 24175 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003806279817174601, + "loss": 3.4815, + "step": 24180 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003805487403971802, + "loss": 3.4107, + "step": 24185 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038046949302004783, + "loss": 3.3978, + "step": 24190 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038039023959202193, + "loss": 3.3128, + "step": 24195 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003803109801190621, + "loss": 3.4749, + "step": 24200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003802317146071281, + "loss": 3.3715, + "step": 24205 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038015244306218034, + "loss": 3.3911, + "step": 24210 + }, + { + "epoch": 0.42, + "learning_rate": 0.00038007316549017964, + "loss": 3.364, + "step": 24215 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037999388189708733, + "loss": 3.4206, + "step": 24220 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037991459228886495, + "loss": 3.4327, + "step": 24225 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003798352966714749, + "loss": 3.3105, + "step": 24230 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003797559950508796, + "loss": 3.4319, + "step": 24235 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037967668743304225, + "loss": 3.3862, + "step": 24240 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037959737382392637, + "loss": 3.3361, + "step": 24245 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003795180542294958, + "loss": 3.4328, + "step": 24250 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037943872865571525, + "loss": 3.4733, + "step": 24255 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037935939710854934, + "loss": 3.3578, + "step": 24260 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003792800595939634, + "loss": 3.3603, + "step": 24265 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037920071611792337, + "loss": 3.4714, + "step": 24270 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003791213666863953, + "loss": 3.4574, + "step": 24275 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037904201130534604, + "loss": 3.3614, + "step": 24280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003789626499807426, + "loss": 3.3123, + "step": 24285 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003788832827185525, + "loss": 3.3467, + "step": 24290 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037880390952474386, + "loss": 3.4375, + "step": 24295 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037872453040528516, + "loss": 3.3211, + "step": 24300 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003786451453661451, + "loss": 3.3171, + "step": 24305 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037856575441329315, + "loss": 3.43, + "step": 24310 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003784863575526992, + "loss": 3.36, + "step": 24315 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037840695479033335, + "loss": 3.4438, + "step": 24320 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003783275461321663, + "loss": 3.323, + "step": 24325 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037824813158416937, + "loss": 3.4666, + "step": 24330 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037816871115231376, + "loss": 3.4379, + "step": 24335 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037808928484257175, + "loss": 3.501, + "step": 24340 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003780098526609158, + "loss": 3.4066, + "step": 24345 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037793041461331857, + "loss": 3.2325, + "step": 24350 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003778509707057536, + "loss": 3.3971, + "step": 24355 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037777152094419474, + "loss": 3.384, + "step": 24360 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003776920653346159, + "loss": 3.3162, + "step": 24365 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037761260388299205, + "loss": 3.4612, + "step": 24370 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037753313659529805, + "loss": 3.4324, + "step": 24375 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003774536634775097, + "loss": 3.4222, + "step": 24380 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003773741845356026, + "loss": 3.3924, + "step": 24385 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003772946997755534, + "loss": 3.4305, + "step": 24390 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003772152092033389, + "loss": 3.402, + "step": 24395 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003771357128249365, + "loss": 3.5204, + "step": 24400 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003770562106463237, + "loss": 3.3448, + "step": 24405 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003769767026734788, + "loss": 3.2582, + "step": 24410 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003768971889123803, + "loss": 3.4989, + "step": 24415 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003768176693690074, + "loss": 3.2957, + "step": 24420 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003767381440493394, + "loss": 3.4028, + "step": 24425 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003766586129593562, + "loss": 3.4257, + "step": 24430 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003765790761050383, + "loss": 3.4178, + "step": 24435 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003764995334923662, + "loss": 3.4097, + "step": 24440 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003764199851273213, + "loss": 3.4026, + "step": 24445 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003763404310158851, + "loss": 3.4466, + "step": 24450 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037626087116403973, + "loss": 3.3257, + "step": 24455 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037618130557776774, + "loss": 3.4463, + "step": 24460 + }, + { + "epoch": 0.42, + "learning_rate": 0.000376101734263052, + "loss": 3.2616, + "step": 24465 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037602215722587587, + "loss": 3.3278, + "step": 24470 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003759425744722231, + "loss": 3.3324, + "step": 24475 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037586298600807795, + "loss": 3.3937, + "step": 24480 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037578339183942503, + "loss": 3.3143, + "step": 24485 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037570379197224935, + "loss": 3.3917, + "step": 24490 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003756241864125365, + "loss": 3.3902, + "step": 24495 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003755445751662725, + "loss": 3.4043, + "step": 24500 + }, + { + "epoch": 0.42, + "eval_loss": 3.4218356609344482, + "eval_runtime": 149.9736, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 24500 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003754649582394435, + "loss": 3.3665, + "step": 24505 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003753853356380365, + "loss": 3.4224, + "step": 24510 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037530570736803853, + "loss": 3.2309, + "step": 24515 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003752260734354374, + "loss": 3.32, + "step": 24520 + }, + { + "epoch": 0.42, + "learning_rate": 0.000375146433846221, + "loss": 3.3945, + "step": 24525 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003750667886063778, + "loss": 3.4484, + "step": 24530 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037498713772189695, + "loss": 3.3952, + "step": 24535 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003749074811987676, + "loss": 3.4158, + "step": 24540 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037482781904297954, + "loss": 3.45, + "step": 24545 + }, + { + "epoch": 0.42, + "learning_rate": 0.00037474815126052304, + "loss": 3.3781, + "step": 24550 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003746684778573885, + "loss": 3.285, + "step": 24555 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003745887988395672, + "loss": 3.3847, + "step": 24560 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037450911421305056, + "loss": 3.4632, + "step": 24565 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037442942398383027, + "loss": 3.364, + "step": 24570 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003743497281578987, + "loss": 3.3277, + "step": 24575 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003742700267412487, + "loss": 3.4421, + "step": 24580 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037419031973987335, + "loss": 3.3185, + "step": 24585 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037411060715976613, + "loss": 3.45, + "step": 24590 + }, + { + "epoch": 0.43, + "learning_rate": 0.000374030889006921, + "loss": 3.4009, + "step": 24595 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003739511652873325, + "loss": 3.4462, + "step": 24600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003738714360069954, + "loss": 3.2398, + "step": 24605 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037379170117190477, + "loss": 3.4085, + "step": 24610 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003737119607880565, + "loss": 3.4324, + "step": 24615 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037363221486144644, + "loss": 3.4352, + "step": 24620 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003735524633980713, + "loss": 3.451, + "step": 24625 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037347270640392783, + "loss": 3.3561, + "step": 24630 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003733929438850133, + "loss": 3.3655, + "step": 24635 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003733131758473256, + "loss": 3.4145, + "step": 24640 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003732334022968628, + "loss": 3.4254, + "step": 24645 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003731536232396235, + "loss": 3.341, + "step": 24650 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003730738386816065, + "loss": 3.2968, + "step": 24655 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037299404862881145, + "loss": 3.3911, + "step": 24660 + }, + { + "epoch": 0.43, + "learning_rate": 0.000372914253087238, + "loss": 3.3917, + "step": 24665 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037283445206288637, + "loss": 3.3416, + "step": 24670 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037275464556175725, + "loss": 3.3614, + "step": 24675 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003726748335898516, + "loss": 3.3741, + "step": 24680 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037259501615317096, + "loss": 3.343, + "step": 24685 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003725151932577172, + "loss": 3.3623, + "step": 24690 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003724353649094924, + "loss": 3.4274, + "step": 24695 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003723555311144995, + "loss": 3.3472, + "step": 24700 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037227569187874146, + "loss": 3.4346, + "step": 24705 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003721958472082217, + "loss": 3.5295, + "step": 24710 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003721159971089443, + "loss": 3.5022, + "step": 24715 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037203614158691354, + "loss": 3.3116, + "step": 24720 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003719562806481341, + "loss": 3.2844, + "step": 24725 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003718764142986111, + "loss": 3.4344, + "step": 24730 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003717965425443501, + "loss": 3.4131, + "step": 24735 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037171666539135705, + "loss": 3.3643, + "step": 24740 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037163678284563836, + "loss": 3.2979, + "step": 24745 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037155689491320066, + "loss": 3.358, + "step": 24750 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037147700160005125, + "loss": 3.4448, + "step": 24755 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003713971029121975, + "loss": 3.3963, + "step": 24760 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003713171988556478, + "loss": 3.2131, + "step": 24765 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003712372894364099, + "loss": 3.392, + "step": 24770 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037115737466049306, + "loss": 3.3659, + "step": 24775 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003710774545339064, + "loss": 3.3049, + "step": 24780 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037099752906265943, + "loss": 3.3554, + "step": 24785 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037091759825276207, + "loss": 3.318, + "step": 24790 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037083766211022467, + "loss": 3.2913, + "step": 24795 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037075772064105815, + "loss": 3.4315, + "step": 24800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037067777385127383, + "loss": 3.3689, + "step": 24805 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037059782174688296, + "loss": 3.4208, + "step": 24810 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037051786433389775, + "loss": 3.2799, + "step": 24815 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003704379016183306, + "loss": 3.5046, + "step": 24820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037035793360619423, + "loss": 3.4735, + "step": 24825 + }, + { + "epoch": 0.43, + "learning_rate": 0.00037027796030350183, + "loss": 3.3752, + "step": 24830 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003701979817162669, + "loss": 3.4892, + "step": 24835 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003701179978505035, + "loss": 3.2982, + "step": 24840 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003700380087122262, + "loss": 3.5165, + "step": 24845 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003699580143074494, + "loss": 3.3722, + "step": 24850 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036987801464218853, + "loss": 3.456, + "step": 24855 + }, + { + "epoch": 0.43, + "learning_rate": 0.000369798009722459, + "loss": 3.3965, + "step": 24860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036971799955427695, + "loss": 3.4197, + "step": 24865 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003696379841436586, + "loss": 3.3727, + "step": 24870 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036955796349662066, + "loss": 3.3547, + "step": 24875 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036947793761918043, + "loss": 3.3268, + "step": 24880 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036939790651735536, + "loss": 3.4494, + "step": 24885 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003693178701971633, + "loss": 3.3767, + "step": 24890 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003692378286646227, + "loss": 3.4077, + "step": 24895 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036915778192575223, + "loss": 3.4866, + "step": 24900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036907772998657107, + "loss": 3.2929, + "step": 24905 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036899767285309846, + "loss": 3.5017, + "step": 24910 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003689176105313545, + "loss": 3.3838, + "step": 24915 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003688375430273595, + "loss": 3.2769, + "step": 24920 + }, + { + "epoch": 0.43, + "learning_rate": 0.000368757470347134, + "loss": 3.3815, + "step": 24925 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003686773924966992, + "loss": 3.381, + "step": 24930 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036859730948207627, + "loss": 3.3977, + "step": 24935 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003685172213092873, + "loss": 3.5147, + "step": 24940 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003684371279843546, + "loss": 3.3731, + "step": 24945 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003683570295133004, + "loss": 3.4501, + "step": 24950 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003682769259021479, + "loss": 3.3245, + "step": 24955 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036819681715692063, + "loss": 3.4224, + "step": 24960 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036811670328364214, + "loss": 3.3934, + "step": 24965 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003680365842883367, + "loss": 3.3077, + "step": 24970 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003679564601770287, + "loss": 3.3332, + "step": 24975 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003678763309557432, + "loss": 3.3636, + "step": 24980 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036779619663050554, + "loss": 3.3194, + "step": 24985 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036771605720734127, + "loss": 3.487, + "step": 24990 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036763591269227663, + "loss": 3.3416, + "step": 24995 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003675557630913379, + "loss": 3.3194, + "step": 25000 + }, + { + "epoch": 0.43, + "eval_loss": 3.4098989963531494, + "eval_runtime": 149.8725, + "eval_samples_per_second": 12.284, + "eval_steps_per_second": 0.774, + "step": 25000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036747560841055206, + "loss": 3.4664, + "step": 25005 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036739544865594623, + "loss": 3.3433, + "step": 25010 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036731528383354803, + "loss": 3.3198, + "step": 25015 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003672351139493855, + "loss": 3.3991, + "step": 25020 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036715493900948704, + "loss": 3.319, + "step": 25025 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036707475901988124, + "loss": 3.3713, + "step": 25030 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003669945739865974, + "loss": 3.34, + "step": 25035 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003669143839156648, + "loss": 3.4226, + "step": 25040 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036683418881311355, + "loss": 3.3767, + "step": 25045 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036675398868497394, + "loss": 3.4798, + "step": 25050 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003666737835372763, + "loss": 3.3018, + "step": 25055 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003665935733760519, + "loss": 3.4508, + "step": 25060 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036651335820733213, + "loss": 3.351, + "step": 25065 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003664331380371486, + "loss": 3.4597, + "step": 25070 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036635291287153365, + "loss": 3.3536, + "step": 25075 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036627268271651955, + "loss": 3.4136, + "step": 25080 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036619244757813953, + "loss": 3.24, + "step": 25085 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003661122074624266, + "loss": 3.3397, + "step": 25090 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036603196237541445, + "loss": 3.3929, + "step": 25095 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036595171232313715, + "loss": 3.3473, + "step": 25100 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003658714573116292, + "loss": 3.3839, + "step": 25105 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003657911973469252, + "loss": 3.3912, + "step": 25110 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003657109324350602, + "loss": 3.2797, + "step": 25115 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003656306625820699, + "loss": 3.3835, + "step": 25120 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003655503877939902, + "loss": 3.3576, + "step": 25125 + }, + { + "epoch": 0.43, + "learning_rate": 0.00036547010807685726, + "loss": 3.4066, + "step": 25130 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003653898234367078, + "loss": 3.4692, + "step": 25135 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003653095338795786, + "loss": 3.3662, + "step": 25140 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003652292394115072, + "loss": 3.2443, + "step": 25145 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036514894003853126, + "loss": 3.4306, + "step": 25150 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036506863576668894, + "loss": 3.3949, + "step": 25155 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003649883266020187, + "loss": 3.3633, + "step": 25160 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003649080125505593, + "loss": 3.4211, + "step": 25165 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036482769361835, + "loss": 3.3037, + "step": 25170 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003647473698114305, + "loss": 3.2987, + "step": 25175 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003646670411358405, + "loss": 3.32, + "step": 25180 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036458670759762044, + "loss": 3.3066, + "step": 25185 + }, + { + "epoch": 0.44, + "learning_rate": 0.000364506369202811, + "loss": 3.446, + "step": 25190 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036442602595745314, + "loss": 3.4656, + "step": 25195 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036434567786758836, + "loss": 3.3859, + "step": 25200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003642653249392584, + "loss": 3.363, + "step": 25205 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003641849671785053, + "loss": 3.3929, + "step": 25210 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003641046045913716, + "loss": 3.3327, + "step": 25215 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003640242371839002, + "loss": 3.4705, + "step": 25220 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036394386496213423, + "loss": 3.4551, + "step": 25225 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036386348793211737, + "loss": 3.4754, + "step": 25230 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003637831060998935, + "loss": 3.341, + "step": 25235 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036370271947150693, + "loss": 3.4458, + "step": 25240 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003636223280530024, + "loss": 3.3916, + "step": 25245 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003635419318504248, + "loss": 3.3774, + "step": 25250 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036346153086981955, + "loss": 3.3418, + "step": 25255 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036338112511723235, + "loss": 3.2995, + "step": 25260 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036330071459870946, + "loss": 3.318, + "step": 25265 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036322029932029726, + "loss": 3.3669, + "step": 25270 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036313987928804256, + "loss": 3.4883, + "step": 25275 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036305945450799243, + "loss": 3.3465, + "step": 25280 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003629790249861946, + "loss": 3.3789, + "step": 25285 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036289859072869687, + "loss": 3.408, + "step": 25290 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003628181517415473, + "loss": 3.3993, + "step": 25295 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003627377080307947, + "loss": 3.323, + "step": 25300 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036265725960248806, + "loss": 3.3462, + "step": 25305 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003625768064626766, + "loss": 3.3265, + "step": 25310 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003624963486174101, + "loss": 3.2759, + "step": 25315 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036241588607273823, + "loss": 3.4092, + "step": 25320 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003623354188347118, + "loss": 3.2942, + "step": 25325 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036225494690938126, + "loss": 3.3437, + "step": 25330 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036217447030279773, + "loss": 3.3779, + "step": 25335 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003620939890210128, + "loss": 3.3958, + "step": 25340 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036201350307007795, + "loss": 3.438, + "step": 25345 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003619330124560456, + "loss": 3.4422, + "step": 25350 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003618525171849681, + "loss": 3.3307, + "step": 25355 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003617720172628982, + "loss": 3.4132, + "step": 25360 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003616915126958893, + "loss": 3.3374, + "step": 25365 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003616110034899948, + "loss": 3.3578, + "step": 25370 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003615304896512685, + "loss": 3.4068, + "step": 25375 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003614499711857648, + "loss": 3.2033, + "step": 25380 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036136944809953816, + "loss": 3.3313, + "step": 25385 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003612889203986436, + "loss": 3.2963, + "step": 25390 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036120838808913625, + "loss": 3.3687, + "step": 25395 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003611278511770718, + "loss": 3.3163, + "step": 25400 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036104730966850625, + "loss": 3.4108, + "step": 25405 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003609667635694959, + "loss": 3.4129, + "step": 25410 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003608862128860973, + "loss": 3.3316, + "step": 25415 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036080565762436757, + "loss": 3.2811, + "step": 25420 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036072509779036397, + "loss": 3.4021, + "step": 25425 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036064453339014436, + "loss": 3.4404, + "step": 25430 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036056396442976657, + "loss": 3.4088, + "step": 25435 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036048339091528895, + "loss": 3.3295, + "step": 25440 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003604028128527704, + "loss": 3.4301, + "step": 25445 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036032223024826983, + "loss": 3.3048, + "step": 25450 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003602416431078468, + "loss": 3.2633, + "step": 25455 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003601610514375608, + "loss": 3.3771, + "step": 25460 + }, + { + "epoch": 0.44, + "learning_rate": 0.00036008045524347213, + "loss": 3.3616, + "step": 25465 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035999985453164115, + "loss": 3.3544, + "step": 25470 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035991924930812855, + "loss": 3.3366, + "step": 25475 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003598386395789955, + "loss": 3.405, + "step": 25480 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003597580253503034, + "loss": 3.3767, + "step": 25485 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003596774066281141, + "loss": 3.2993, + "step": 25490 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035959678341848964, + "loss": 3.4348, + "step": 25495 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035951615572749247, + "loss": 3.2641, + "step": 25500 + }, + { + "epoch": 0.44, + "eval_loss": 3.403398036956787, + "eval_runtime": 149.6768, + "eval_samples_per_second": 12.3, + "eval_steps_per_second": 0.775, + "step": 25500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035943552356118547, + "loss": 3.3219, + "step": 25505 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035935488692563167, + "loss": 3.3383, + "step": 25510 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003592742458268945, + "loss": 3.3004, + "step": 25515 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035919360027103785, + "loss": 3.2707, + "step": 25520 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003591129502641259, + "loss": 3.3976, + "step": 25525 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035903229581222293, + "loss": 3.3484, + "step": 25530 + }, + { + "epoch": 0.44, + "learning_rate": 0.000358951636921394, + "loss": 3.3598, + "step": 25535 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035887097359770393, + "loss": 3.258, + "step": 25540 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003587903058472184, + "loss": 3.3335, + "step": 25545 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035870963367600324, + "loss": 3.3303, + "step": 25550 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035862895709012443, + "loss": 3.4024, + "step": 25555 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003585482760956486, + "loss": 3.3626, + "step": 25560 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003584675906986423, + "loss": 3.3502, + "step": 25565 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035838690090517303, + "loss": 3.4236, + "step": 25570 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003583062067213079, + "loss": 3.3696, + "step": 25575 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035822550815311485, + "loss": 3.3772, + "step": 25580 + }, + { + "epoch": 0.44, + "learning_rate": 0.000358144805206662, + "loss": 3.445, + "step": 25585 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035806409788801785, + "loss": 3.4372, + "step": 25590 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003579833862032511, + "loss": 3.4523, + "step": 25595 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035790267015843076, + "loss": 3.3395, + "step": 25600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003578219497596264, + "loss": 3.3512, + "step": 25605 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003577412250129079, + "loss": 3.3397, + "step": 25610 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003576604959243451, + "loss": 3.3981, + "step": 25615 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035757976250000846, + "loss": 3.3503, + "step": 25620 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003574990247459688, + "loss": 3.3889, + "step": 25625 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003574182826682972, + "loss": 3.3094, + "step": 25630 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035733753627306496, + "loss": 3.3504, + "step": 25635 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003572567855663439, + "loss": 3.3083, + "step": 25640 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003571760305542059, + "loss": 3.4901, + "step": 25645 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003570952712427235, + "loss": 3.3134, + "step": 25650 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003570145076379693, + "loss": 3.4654, + "step": 25655 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003569337397460164, + "loss": 3.347, + "step": 25660 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003568529675729379, + "loss": 3.4081, + "step": 25665 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035677219112480765, + "loss": 3.381, + "step": 25670 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003566914104076996, + "loss": 3.2434, + "step": 25675 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035661062542768804, + "loss": 3.4147, + "step": 25680 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035652983619084754, + "loss": 3.3619, + "step": 25685 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035644904270325315, + "loss": 3.3556, + "step": 25690 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035636824497098, + "loss": 3.362, + "step": 25695 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035628744300010367, + "loss": 3.3565, + "step": 25700 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035620663679670014, + "loss": 3.3171, + "step": 25705 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003561258263668456, + "loss": 3.3843, + "step": 25710 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003560450117166165, + "loss": 3.4695, + "step": 25715 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003559641928520897, + "loss": 3.432, + "step": 25720 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003558833697793425, + "loss": 3.4059, + "step": 25725 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035580254250445234, + "loss": 3.3132, + "step": 25730 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003557217110334969, + "loss": 3.4028, + "step": 25735 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003556408753725543, + "loss": 3.3004, + "step": 25740 + }, + { + "epoch": 0.45, + "learning_rate": 0.000355560035527703, + "loss": 3.3349, + "step": 25745 + }, + { + "epoch": 0.45, + "learning_rate": 0.000355479191505022, + "loss": 3.3751, + "step": 25750 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035539834331058996, + "loss": 3.3602, + "step": 25755 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035531749095048633, + "loss": 3.3683, + "step": 25760 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035523663443079104, + "loss": 3.4222, + "step": 25765 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035515577375758384, + "loss": 3.3746, + "step": 25770 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035507490893694517, + "loss": 3.3384, + "step": 25775 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035499403997495555, + "loss": 3.3536, + "step": 25780 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035491316687769596, + "loss": 3.3968, + "step": 25785 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003548322896512477, + "loss": 3.3728, + "step": 25790 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035475140830169225, + "loss": 3.2715, + "step": 25795 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003546705228351115, + "loss": 3.4149, + "step": 25800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003545896332575876, + "loss": 3.4619, + "step": 25805 + }, + { + "epoch": 0.45, + "learning_rate": 0.000354508739575203, + "loss": 3.3945, + "step": 25810 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003544278417940406, + "loss": 3.204, + "step": 25815 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003543469399201834, + "loss": 3.3092, + "step": 25820 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003542660339597149, + "loss": 3.3261, + "step": 25825 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003541851239187188, + "loss": 3.2748, + "step": 25830 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003541042098032789, + "loss": 3.2542, + "step": 25835 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003540232916194798, + "loss": 3.3005, + "step": 25840 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035394236937340604, + "loss": 3.2868, + "step": 25845 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035386144307114257, + "loss": 3.3108, + "step": 25850 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003537805127187746, + "loss": 3.3181, + "step": 25855 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003536995783223876, + "loss": 3.4812, + "step": 25860 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035361863988806765, + "loss": 3.369, + "step": 25865 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035353769742190073, + "loss": 3.3039, + "step": 25870 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003534567509299733, + "loss": 3.3852, + "step": 25875 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003533758004183722, + "loss": 3.4023, + "step": 25880 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003532948458931844, + "loss": 3.4037, + "step": 25885 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003532138873604974, + "loss": 3.416, + "step": 25890 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035313292482639876, + "loss": 3.3859, + "step": 25895 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003530519582969765, + "loss": 3.2529, + "step": 25900 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035297098777831886, + "loss": 3.3192, + "step": 25905 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035289001327651445, + "loss": 3.3498, + "step": 25910 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035280903479765205, + "loss": 3.4666, + "step": 25915 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035272805234782075, + "loss": 3.3699, + "step": 25920 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035264706593311025, + "loss": 3.2185, + "step": 25925 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035256607555961023, + "loss": 3.4041, + "step": 25930 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035248508123341066, + "loss": 3.2361, + "step": 25935 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035240408296060205, + "loss": 3.289, + "step": 25940 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003523230807472749, + "loss": 3.4608, + "step": 25945 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035224207459952025, + "loss": 3.3191, + "step": 25950 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003521610645234294, + "loss": 3.4509, + "step": 25955 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035208005052509366, + "loss": 3.3884, + "step": 25960 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035199903261060516, + "loss": 3.2898, + "step": 25965 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035191801078605586, + "loss": 3.3371, + "step": 25970 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035183698505753815, + "loss": 3.3688, + "step": 25975 + }, + { + "epoch": 0.45, + "learning_rate": 0.000351755955431145, + "loss": 3.3285, + "step": 25980 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035167492191296904, + "loss": 3.3297, + "step": 25985 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003515938845091039, + "loss": 3.3815, + "step": 25990 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035151284322564307, + "loss": 3.3746, + "step": 25995 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003514317980686803, + "loss": 3.3711, + "step": 26000 + }, + { + "epoch": 0.45, + "eval_loss": 3.395720958709717, + "eval_runtime": 149.6727, + "eval_samples_per_second": 12.3, + "eval_steps_per_second": 0.775, + "step": 26000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003513507490443101, + "loss": 3.3657, + "step": 26005 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003512696961586266, + "loss": 3.3346, + "step": 26010 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035118863941772477, + "loss": 3.4844, + "step": 26015 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035110757882769955, + "loss": 3.3899, + "step": 26020 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003510265143946463, + "loss": 3.399, + "step": 26025 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035094544612466073, + "loss": 3.2881, + "step": 26030 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003508643740238387, + "loss": 3.4151, + "step": 26035 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035078329809827633, + "loss": 3.2765, + "step": 26040 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003507022183540703, + "loss": 3.4003, + "step": 26045 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003506211347973173, + "loss": 3.4405, + "step": 26050 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035054004743411435, + "loss": 3.3197, + "step": 26055 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003504589562705589, + "loss": 3.27, + "step": 26060 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035037786131274836, + "loss": 3.335, + "step": 26065 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035029676256678104, + "loss": 3.2878, + "step": 26070 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003502156600387549, + "loss": 3.3474, + "step": 26075 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003501345537347684, + "loss": 3.4218, + "step": 26080 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035005344366092047, + "loss": 3.4272, + "step": 26085 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003499723298233101, + "loss": 3.3154, + "step": 26090 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034989121222803665, + "loss": 3.3007, + "step": 26095 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003498100908811996, + "loss": 3.5048, + "step": 26100 + }, + { + "epoch": 0.45, + "learning_rate": 0.000349728965788899, + "loss": 3.411, + "step": 26105 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034964783695723515, + "loss": 3.3317, + "step": 26110 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034956670439230836, + "loss": 3.424, + "step": 26115 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003494855681002194, + "loss": 3.3243, + "step": 26120 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034940442808706926, + "loss": 3.4425, + "step": 26125 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034932328435895946, + "loss": 3.413, + "step": 26130 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003492421369219914, + "loss": 3.4129, + "step": 26135 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034916098578226696, + "loss": 3.4568, + "step": 26140 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034907983094588846, + "loss": 3.2578, + "step": 26145 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003489986724189581, + "loss": 3.4228, + "step": 26150 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003489175102075787, + "loss": 3.2936, + "step": 26155 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034883634431785325, + "loss": 3.4025, + "step": 26160 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003487551747558849, + "loss": 3.308, + "step": 26165 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003486740015277774, + "loss": 3.351, + "step": 26170 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003485928246396344, + "loss": 3.2869, + "step": 26175 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034851164409756, + "loss": 3.3198, + "step": 26180 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003484304599076586, + "loss": 3.3552, + "step": 26185 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034834927207603474, + "loss": 3.4593, + "step": 26190 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034826808060879356, + "loss": 3.3524, + "step": 26195 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034818688551204, + "loss": 3.2367, + "step": 26200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003481056867918795, + "loss": 3.2815, + "step": 26205 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003480244844544181, + "loss": 3.4033, + "step": 26210 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003479432785057615, + "loss": 3.3144, + "step": 26215 + }, + { + "epoch": 0.45, + "learning_rate": 0.000347862068952016, + "loss": 3.3572, + "step": 26220 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034778085579928827, + "loss": 3.3663, + "step": 26225 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034769963905368495, + "loss": 3.3651, + "step": 26230 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003476184187213135, + "loss": 3.3908, + "step": 26235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034753719480828076, + "loss": 3.3412, + "step": 26240 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034745596732069456, + "loss": 3.1838, + "step": 26245 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034737473626466296, + "loss": 3.3703, + "step": 26250 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034729350164629393, + "loss": 3.4167, + "step": 26255 + }, + { + "epoch": 0.45, + "learning_rate": 0.000347212263471696, + "loss": 3.445, + "step": 26260 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034713102174697773, + "loss": 3.3665, + "step": 26265 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003470497764782483, + "loss": 3.4791, + "step": 26270 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003469685276716167, + "loss": 3.327, + "step": 26275 + }, + { + "epoch": 0.45, + "learning_rate": 0.00034688727533319254, + "loss": 3.479, + "step": 26280 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003468060194690856, + "loss": 3.2287, + "step": 26285 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003467247600854059, + "loss": 3.3918, + "step": 26290 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003466434971882636, + "loss": 3.511, + "step": 26295 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003465622307837695, + "loss": 3.3726, + "step": 26300 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003464809608780341, + "loss": 3.2298, + "step": 26305 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003463996874771688, + "loss": 3.3942, + "step": 26310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034631841058728474, + "loss": 3.4149, + "step": 26315 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003462371302144935, + "loss": 3.3502, + "step": 26320 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034615584636490715, + "loss": 3.2583, + "step": 26325 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034607455904463766, + "loss": 3.357, + "step": 26330 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003459932682597974, + "loss": 3.3674, + "step": 26335 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034591197401649914, + "loss": 3.4351, + "step": 26340 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003458306763208557, + "loss": 3.3208, + "step": 26345 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034574937517898034, + "loss": 3.3437, + "step": 26350 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034566807059698647, + "loss": 3.4414, + "step": 26355 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003455867625809876, + "loss": 3.3091, + "step": 26360 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034550545113709786, + "loss": 3.3744, + "step": 26365 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003454241362714314, + "loss": 3.354, + "step": 26370 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003453428179901028, + "loss": 3.3764, + "step": 26375 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003452614962992265, + "loss": 3.345, + "step": 26380 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034518017120491765, + "loss": 3.3632, + "step": 26385 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003450988427132915, + "loss": 3.3498, + "step": 26390 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003450175108304636, + "loss": 3.3696, + "step": 26395 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034493617556254953, + "loss": 3.3475, + "step": 26400 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034485483691566537, + "loss": 3.3516, + "step": 26405 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003447734948959273, + "loss": 3.3455, + "step": 26410 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003446921495094519, + "loss": 3.3736, + "step": 26415 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034461080076235597, + "loss": 3.4373, + "step": 26420 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003445294486607564, + "loss": 3.3134, + "step": 26425 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034444809321077045, + "loss": 3.3514, + "step": 26430 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003443667344185157, + "loss": 3.4766, + "step": 26435 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034428537229011, + "loss": 3.3528, + "step": 26440 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003442040068316711, + "loss": 3.3033, + "step": 26445 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003441226380493174, + "loss": 3.4208, + "step": 26450 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003440412659491677, + "loss": 3.3896, + "step": 26455 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034395989053734034, + "loss": 3.3191, + "step": 26460 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003438785118199545, + "loss": 3.3925, + "step": 26465 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003437971298031295, + "loss": 3.3533, + "step": 26470 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034371574449298476, + "loss": 3.3886, + "step": 26475 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003436343558956401, + "loss": 3.3418, + "step": 26480 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034355296401721545, + "loss": 3.4094, + "step": 26485 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003434715688638312, + "loss": 3.4429, + "step": 26490 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034339017044160766, + "loss": 3.3538, + "step": 26495 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003433087687566657, + "loss": 3.3811, + "step": 26500 + }, + { + "epoch": 0.46, + "eval_loss": 3.386625289916992, + "eval_runtime": 149.9818, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 26500 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003432273638151263, + "loss": 3.4065, + "step": 26505 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034314595562311064, + "loss": 3.3432, + "step": 26510 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003430645441867404, + "loss": 3.4248, + "step": 26515 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034298312951213696, + "loss": 3.3458, + "step": 26520 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034290171160542243, + "loss": 3.4478, + "step": 26525 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003428202904727192, + "loss": 3.3292, + "step": 26530 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003427388661201495, + "loss": 3.4159, + "step": 26535 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034265743855383616, + "loss": 3.3999, + "step": 26540 + }, + { + "epoch": 0.46, + "learning_rate": 0.000342576007779902, + "loss": 3.4405, + "step": 26545 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003424945738044702, + "loss": 3.2289, + "step": 26550 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034241313663366436, + "loss": 3.3146, + "step": 26555 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034233169627360797, + "loss": 3.2682, + "step": 26560 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003422502527304249, + "loss": 3.401, + "step": 26565 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003421688060102394, + "loss": 3.4786, + "step": 26570 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003420873561191758, + "loss": 3.3598, + "step": 26575 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034200590306335873, + "loss": 3.3288, + "step": 26580 + }, + { + "epoch": 0.46, + "learning_rate": 0.000341924446848913, + "loss": 3.3177, + "step": 26585 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003418429874819637, + "loss": 3.3151, + "step": 26590 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003417615249686363, + "loss": 3.3541, + "step": 26595 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003416800593150561, + "loss": 3.3449, + "step": 26600 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034159859052734917, + "loss": 3.3314, + "step": 26605 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034151711861164135, + "loss": 3.3612, + "step": 26610 + }, + { + "epoch": 0.46, + "learning_rate": 0.000341435643574059, + "loss": 3.3928, + "step": 26615 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034135416542072864, + "loss": 3.3273, + "step": 26620 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034127268415777687, + "loss": 3.3225, + "step": 26625 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034119119979133085, + "loss": 3.3973, + "step": 26630 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034110971232751775, + "loss": 3.3591, + "step": 26635 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034102822177246487, + "loss": 3.1721, + "step": 26640 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003409467281323, + "loss": 3.1719, + "step": 26645 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034086523141315105, + "loss": 3.3968, + "step": 26650 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034078373162114615, + "loss": 3.3185, + "step": 26655 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034070222876241365, + "loss": 3.3102, + "step": 26660 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003406207228430821, + "loss": 3.4479, + "step": 26665 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034053921386928036, + "loss": 3.4227, + "step": 26670 + }, + { + "epoch": 0.46, + "learning_rate": 0.00034045770184713755, + "loss": 3.4028, + "step": 26675 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003403761867827829, + "loss": 3.4202, + "step": 26680 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003402946686823459, + "loss": 3.3627, + "step": 26685 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003402131475519563, + "loss": 3.4673, + "step": 26690 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003401316233977441, + "loss": 3.4651, + "step": 26695 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003400500962258395, + "loss": 3.2451, + "step": 26700 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003399685660423729, + "loss": 3.3571, + "step": 26705 + }, + { + "epoch": 0.46, + "learning_rate": 0.000339887032853475, + "loss": 3.3063, + "step": 26710 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003398054966652767, + "loss": 3.1844, + "step": 26715 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033972395748390905, + "loss": 3.3216, + "step": 26720 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033964241531550326, + "loss": 3.2759, + "step": 26725 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033956087016619104, + "loss": 3.2445, + "step": 26730 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003394793220421042, + "loss": 3.2716, + "step": 26735 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003393977709493746, + "loss": 3.4037, + "step": 26740 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033931621689413464, + "loss": 3.3809, + "step": 26745 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003392346598825165, + "loss": 3.3758, + "step": 26750 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033915309992065316, + "loss": 3.3075, + "step": 26755 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033907153701467733, + "loss": 3.463, + "step": 26760 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033898997117072206, + "loss": 3.3957, + "step": 26765 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033890840239492084, + "loss": 3.3882, + "step": 26770 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003388268306934072, + "loss": 3.3903, + "step": 26775 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003387452560723148, + "loss": 3.3708, + "step": 26780 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033866367853777784, + "loss": 3.3458, + "step": 26785 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033858209809593023, + "loss": 3.3112, + "step": 26790 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003385005147529067, + "loss": 3.2947, + "step": 26795 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003384189285148417, + "loss": 3.3433, + "step": 26800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003383373393878702, + "loss": 3.3527, + "step": 26805 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033825574737812725, + "loss": 3.1773, + "step": 26810 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033817415249174824, + "loss": 3.3979, + "step": 26815 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003380925547348684, + "loss": 3.4185, + "step": 26820 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003380109541136238, + "loss": 3.3596, + "step": 26825 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033792935063415024, + "loss": 3.4408, + "step": 26830 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033784774430258393, + "loss": 3.2784, + "step": 26835 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003377661351250612, + "loss": 3.3703, + "step": 26840 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003376845231077186, + "loss": 3.3976, + "step": 26845 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033760290825669305, + "loss": 3.4119, + "step": 26850 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003375212905781215, + "loss": 3.4389, + "step": 26855 + }, + { + "epoch": 0.46, + "learning_rate": 0.00033743967007814123, + "loss": 3.4103, + "step": 26860 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003373580467628895, + "loss": 3.3018, + "step": 26865 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003372764206385042, + "loss": 3.3944, + "step": 26870 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033719479171112317, + "loss": 3.3529, + "step": 26875 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033711315998688436, + "loss": 3.3456, + "step": 26880 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003370315254719261, + "loss": 3.3234, + "step": 26885 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033694988817238694, + "loss": 3.2187, + "step": 26890 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003368682480944056, + "loss": 3.4029, + "step": 26895 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033678660524412093, + "loss": 3.3446, + "step": 26900 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033670495962767193, + "loss": 3.3761, + "step": 26905 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033662331125119824, + "loss": 3.2874, + "step": 26910 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033654166012083916, + "loss": 3.2817, + "step": 26915 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033646000624273453, + "loss": 3.4034, + "step": 26920 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033637834962302435, + "loss": 3.4035, + "step": 26925 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003362966902678486, + "loss": 3.3829, + "step": 26930 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033621502818334786, + "loss": 3.3859, + "step": 26935 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003361333633756626, + "loss": 3.2862, + "step": 26940 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003360516958509335, + "loss": 3.3791, + "step": 26945 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003359700256153017, + "loss": 3.3683, + "step": 26950 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033588835267490827, + "loss": 3.3453, + "step": 26955 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003358066770358947, + "loss": 3.3683, + "step": 26960 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003357249987044025, + "loss": 3.3918, + "step": 26965 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003356433176865735, + "loss": 3.3034, + "step": 26970 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033556163398854966, + "loss": 3.3282, + "step": 26975 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003354799476164732, + "loss": 3.4101, + "step": 26980 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003353982585764865, + "loss": 3.3256, + "step": 26985 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003353165668747322, + "loss": 3.3154, + "step": 26990 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033523487251735303, + "loss": 3.2524, + "step": 26995 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033515317551049213, + "loss": 3.3979, + "step": 27000 + }, + { + "epoch": 0.47, + "eval_loss": 3.3817105293273926, + "eval_runtime": 149.7679, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.775, + "step": 27000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003350714758602924, + "loss": 3.3361, + "step": 27005 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003349897735728975, + "loss": 3.3722, + "step": 27010 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033490806865445097, + "loss": 3.4067, + "step": 27015 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003348263611110966, + "loss": 3.3635, + "step": 27020 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033474465094897836, + "loss": 3.4518, + "step": 27025 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003346629381742404, + "loss": 3.3886, + "step": 27030 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033458122279302706, + "loss": 3.3627, + "step": 27035 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003344995048114832, + "loss": 3.2823, + "step": 27040 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033441778423575315, + "loss": 3.3224, + "step": 27045 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033433606107198223, + "loss": 3.4366, + "step": 27050 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033425433532631546, + "loss": 3.2814, + "step": 27055 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003341726070048983, + "loss": 3.4437, + "step": 27060 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003340908761138762, + "loss": 3.3634, + "step": 27065 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033400914265939484, + "loss": 3.3136, + "step": 27070 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033392740664760033, + "loss": 3.2769, + "step": 27075 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003338456680846387, + "loss": 3.3517, + "step": 27080 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033376392697665624, + "loss": 3.299, + "step": 27085 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033368218332979955, + "loss": 3.3863, + "step": 27090 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003336004371502152, + "loss": 3.3336, + "step": 27095 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003335186884440503, + "loss": 3.2838, + "step": 27100 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033343693721745186, + "loss": 3.3958, + "step": 27105 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003333551834765669, + "loss": 3.2276, + "step": 27110 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033327342722754325, + "loss": 3.3027, + "step": 27115 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003331916684765284, + "loss": 3.3461, + "step": 27120 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033310990722967, + "loss": 3.4142, + "step": 27125 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033302814349311645, + "loss": 3.4698, + "step": 27130 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033294637727301565, + "loss": 3.3017, + "step": 27135 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003328646085755163, + "loss": 3.2897, + "step": 27140 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003327828374067667, + "loss": 3.4139, + "step": 27145 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033270106377291563, + "loss": 3.4206, + "step": 27150 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033261928768011235, + "loss": 3.3275, + "step": 27155 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003325375091345057, + "loss": 3.304, + "step": 27160 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003324557281422451, + "loss": 3.2816, + "step": 27165 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003323739447094802, + "loss": 3.3583, + "step": 27170 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003322921588423604, + "loss": 3.2299, + "step": 27175 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003322103705470359, + "loss": 3.2416, + "step": 27180 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033212857982965657, + "loss": 3.3558, + "step": 27185 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003320467866963726, + "loss": 3.3987, + "step": 27190 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003319649911533346, + "loss": 3.3418, + "step": 27195 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003318831932066931, + "loss": 3.3419, + "step": 27200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033180139286259885, + "loss": 3.4227, + "step": 27205 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033171959012720276, + "loss": 3.3857, + "step": 27210 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033163778500665606, + "loss": 3.2885, + "step": 27215 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033155597750711023, + "loss": 3.3045, + "step": 27220 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033147416763471644, + "loss": 3.3942, + "step": 27225 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003313923553956266, + "loss": 3.3544, + "step": 27230 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003313105407959926, + "loss": 3.2217, + "step": 27235 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033122872384196627, + "loss": 3.2347, + "step": 27240 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003311469045397001, + "loss": 3.2675, + "step": 27245 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033106508289534615, + "loss": 3.274, + "step": 27250 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003309832589150573, + "loss": 3.3786, + "step": 27255 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033090143260498616, + "loss": 3.3703, + "step": 27260 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003308196039712856, + "loss": 3.2512, + "step": 27265 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033073777302010875, + "loss": 3.3575, + "step": 27270 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003306559397576089, + "loss": 3.2458, + "step": 27275 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003305741041899396, + "loss": 3.3806, + "step": 27280 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003304922663232543, + "loss": 3.3828, + "step": 27285 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033041042616370676, + "loss": 3.3612, + "step": 27290 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003303285837174511, + "loss": 3.2727, + "step": 27295 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033024673899064143, + "loss": 3.2924, + "step": 27300 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033016489198943184, + "loss": 3.3078, + "step": 27305 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003300830427199771, + "loss": 3.2827, + "step": 27310 + }, + { + "epoch": 0.47, + "learning_rate": 0.00033000119118843164, + "loss": 3.328, + "step": 27315 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003299193374009505, + "loss": 3.3192, + "step": 27320 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032983748136368836, + "loss": 3.4029, + "step": 27325 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032975562308280057, + "loss": 3.4059, + "step": 27330 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032967376256444247, + "loss": 3.3607, + "step": 27335 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032959189981476947, + "loss": 3.2867, + "step": 27340 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032951003483993734, + "loss": 3.3518, + "step": 27345 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032942816764610164, + "loss": 3.2867, + "step": 27350 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003293462982394187, + "loss": 3.3899, + "step": 27355 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003292644266260445, + "loss": 3.22, + "step": 27360 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003291825528121354, + "loss": 3.4481, + "step": 27365 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032910067680384786, + "loss": 3.2794, + "step": 27370 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032901879860733856, + "loss": 3.3851, + "step": 27375 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003289369182287644, + "loss": 3.3057, + "step": 27380 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032885503567428224, + "loss": 3.3024, + "step": 27385 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003287731509500492, + "loss": 3.3614, + "step": 27390 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003286912640622227, + "loss": 3.2706, + "step": 27395 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003286093750169603, + "loss": 3.2274, + "step": 27400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003285274838204194, + "loss": 3.3175, + "step": 27405 + }, + { + "epoch": 0.47, + "learning_rate": 0.000328445590478758, + "loss": 3.4169, + "step": 27410 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032836369499813383, + "loss": 3.3511, + "step": 27415 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032828179738470534, + "loss": 3.4634, + "step": 27420 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032819989764463055, + "loss": 3.3259, + "step": 27425 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032811799578406794, + "loss": 3.3551, + "step": 27430 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003280360918091762, + "loss": 3.2537, + "step": 27435 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032795418572611405, + "loss": 3.2446, + "step": 27440 + }, + { + "epoch": 0.47, + "learning_rate": 0.00032787227754104033, + "loss": 3.289, + "step": 27445 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032779036726011425, + "loss": 3.3049, + "step": 27450 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003277084548894949, + "loss": 3.3346, + "step": 27455 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032762654043534185, + "loss": 3.3805, + "step": 27460 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003275446239038145, + "loss": 3.343, + "step": 27465 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003274627053010725, + "loss": 3.2794, + "step": 27470 + }, + { + "epoch": 0.48, + "learning_rate": 0.000327380784633276, + "loss": 3.34, + "step": 27475 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032729886190658463, + "loss": 3.3971, + "step": 27480 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003272169371271589, + "loss": 3.3285, + "step": 27485 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003271350103011589, + "loss": 3.2486, + "step": 27490 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003270530814347452, + "loss": 3.4702, + "step": 27495 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032697115053407843, + "loss": 3.4144, + "step": 27500 + }, + { + "epoch": 0.48, + "eval_loss": 3.368710994720459, + "eval_runtime": 149.7624, + "eval_samples_per_second": 12.293, + "eval_steps_per_second": 0.775, + "step": 27500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032688921760531936, + "loss": 3.3579, + "step": 27505 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032680728265462885, + "loss": 3.2252, + "step": 27510 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003267253456881681, + "loss": 3.3385, + "step": 27515 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032664340671209846, + "loss": 3.4116, + "step": 27520 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032656146573258114, + "loss": 3.3403, + "step": 27525 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003264795227557776, + "loss": 3.1292, + "step": 27530 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032639757778784965, + "loss": 3.3242, + "step": 27535 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003263156308349593, + "loss": 3.259, + "step": 27540 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032623368190326827, + "loss": 3.4236, + "step": 27545 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003261517309989389, + "loss": 3.3361, + "step": 27550 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032606977812813323, + "loss": 3.4113, + "step": 27555 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032598782329701397, + "loss": 3.2926, + "step": 27560 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003259058665117436, + "loss": 3.3761, + "step": 27565 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032582390777848464, + "loss": 3.371, + "step": 27570 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032574194710340037, + "loss": 3.455, + "step": 27575 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032565998449265353, + "loss": 3.3533, + "step": 27580 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003255780199524073, + "loss": 3.4031, + "step": 27585 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003254960534888252, + "loss": 3.3147, + "step": 27590 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003254140851080704, + "loss": 3.2974, + "step": 27595 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003253321148163067, + "loss": 3.3253, + "step": 27600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032525014261969786, + "loss": 3.4219, + "step": 27605 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032516816852440756, + "loss": 3.3182, + "step": 27610 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003250861925366001, + "loss": 3.4381, + "step": 27615 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032500421466243946, + "loss": 3.2992, + "step": 27620 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032492223490809, + "loss": 3.3346, + "step": 27625 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003248402532797163, + "loss": 3.4011, + "step": 27630 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032475826978348274, + "loss": 3.3957, + "step": 27635 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003246762844255542, + "loss": 3.4597, + "step": 27640 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032459429721209564, + "loss": 3.418, + "step": 27645 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032451230814927183, + "loss": 3.2963, + "step": 27650 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003244303172432482, + "loss": 3.2607, + "step": 27655 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032434832450018987, + "loss": 3.3302, + "step": 27660 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032426632992626234, + "loss": 3.3218, + "step": 27665 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032418433352763106, + "loss": 3.2973, + "step": 27670 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032410233531046185, + "loss": 3.2454, + "step": 27675 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003240203352809206, + "loss": 3.4339, + "step": 27680 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032393833344517334, + "loss": 3.3274, + "step": 27685 + }, + { + "epoch": 0.48, + "learning_rate": 0.000323856329809386, + "loss": 3.2768, + "step": 27690 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003237743243797249, + "loss": 3.2868, + "step": 27695 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003236923171623564, + "loss": 3.3539, + "step": 27700 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003236103081634472, + "loss": 3.2585, + "step": 27705 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032352829738916365, + "loss": 3.279, + "step": 27710 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003234462848456729, + "loss": 3.3725, + "step": 27715 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003233642705391416, + "loss": 3.3193, + "step": 27720 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003232822544757369, + "loss": 3.2873, + "step": 27725 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032320023666162606, + "loss": 3.2741, + "step": 27730 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003231182171029761, + "loss": 3.3445, + "step": 27735 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003230361958059549, + "loss": 3.1675, + "step": 27740 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032295417277672985, + "loss": 3.3555, + "step": 27745 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032287214802146854, + "loss": 3.3363, + "step": 27750 + }, + { + "epoch": 0.48, + "learning_rate": 0.000322790121546339, + "loss": 3.3608, + "step": 27755 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032270809335750895, + "loss": 3.3135, + "step": 27760 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032262606346114683, + "loss": 3.3241, + "step": 27765 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032254403186342067, + "loss": 3.3791, + "step": 27770 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032246199857049877, + "loss": 3.2657, + "step": 27775 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003223799635885498, + "loss": 3.3029, + "step": 27780 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003222979269237423, + "loss": 3.3314, + "step": 27785 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003222158885822447, + "loss": 3.1889, + "step": 27790 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032213384857022635, + "loss": 3.3532, + "step": 27795 + }, + { + "epoch": 0.48, + "learning_rate": 0.000322051806893856, + "loss": 3.2229, + "step": 27800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003219697635593029, + "loss": 3.2576, + "step": 27805 + }, + { + "epoch": 0.48, + "learning_rate": 0.000321887718572736, + "loss": 3.4196, + "step": 27810 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032180567194032476, + "loss": 3.2642, + "step": 27815 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003217236236682389, + "loss": 3.3044, + "step": 27820 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003216415737626479, + "loss": 3.277, + "step": 27825 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003215595222297214, + "loss": 3.3251, + "step": 27830 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003214774690756293, + "loss": 3.291, + "step": 27835 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003213954143065416, + "loss": 3.3613, + "step": 27840 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032131335792862846, + "loss": 3.3399, + "step": 27845 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032123129994805993, + "loss": 3.2345, + "step": 27850 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003211492403710065, + "loss": 3.2515, + "step": 27855 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032106717920363855, + "loss": 3.369, + "step": 27860 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003209851164521268, + "loss": 3.2912, + "step": 27865 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032090305212264175, + "loss": 3.355, + "step": 27870 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032082098622135424, + "loss": 3.3553, + "step": 27875 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003207389187544354, + "loss": 3.3148, + "step": 27880 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003206568497280562, + "loss": 3.2627, + "step": 27885 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003205747791483876, + "loss": 3.388, + "step": 27890 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003204927070216011, + "loss": 3.2773, + "step": 27895 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003204106333538681, + "loss": 3.363, + "step": 27900 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032032855815136, + "loss": 3.442, + "step": 27905 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032024648142024866, + "loss": 3.2348, + "step": 27910 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032016440316670554, + "loss": 3.2679, + "step": 27915 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003200823233969027, + "loss": 3.2793, + "step": 27920 + }, + { + "epoch": 0.48, + "learning_rate": 0.00032000024211701207, + "loss": 3.2952, + "step": 27925 + }, + { + "epoch": 0.48, + "learning_rate": 0.00031991815933320574, + "loss": 3.1779, + "step": 27930 + }, + { + "epoch": 0.48, + "learning_rate": 0.00031983607505165585, + "loss": 3.2989, + "step": 27935 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003197539892785348, + "loss": 3.3453, + "step": 27940 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003196719020200151, + "loss": 3.2617, + "step": 27945 + }, + { + "epoch": 0.48, + "learning_rate": 0.000319589813282269, + "loss": 3.4275, + "step": 27950 + }, + { + "epoch": 0.48, + "learning_rate": 0.00031950772307146936, + "loss": 3.4124, + "step": 27955 + }, + { + "epoch": 0.48, + "learning_rate": 0.00031942563139378906, + "loss": 3.4406, + "step": 27960 + }, + { + "epoch": 0.48, + "learning_rate": 0.00031934353825540074, + "loss": 3.4792, + "step": 27965 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003192614436624774, + "loss": 3.3811, + "step": 27970 + }, + { + "epoch": 0.48, + "learning_rate": 0.00031917934762119236, + "loss": 3.3476, + "step": 27975 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003190972501377186, + "loss": 3.366, + "step": 27980 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003190151512182296, + "loss": 3.3788, + "step": 27985 + }, + { + "epoch": 0.48, + "learning_rate": 0.00031893305086889853, + "loss": 3.3044, + "step": 27990 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003188509490958991, + "loss": 3.3833, + "step": 27995 + }, + { + "epoch": 0.48, + "learning_rate": 0.00031876884590540495, + "loss": 3.3097, + "step": 28000 + }, + { + "epoch": 0.48, + "eval_loss": 3.3711462020874023, + "eval_runtime": 149.7727, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.775, + "step": 28000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003186867413035898, + "loss": 3.2273, + "step": 28005 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003186046352966274, + "loss": 3.2312, + "step": 28010 + }, + { + "epoch": 0.48, + "learning_rate": 0.00031852252789069184, + "loss": 3.3193, + "step": 28015 + }, + { + "epoch": 0.48, + "learning_rate": 0.000318440419091957, + "loss": 3.3245, + "step": 28020 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003183583089065973, + "loss": 3.3313, + "step": 28025 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031827619734078676, + "loss": 3.3352, + "step": 28030 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003181940844006999, + "loss": 3.3963, + "step": 28035 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031811197009251103, + "loss": 3.3619, + "step": 28040 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003180298544223949, + "loss": 3.3542, + "step": 28045 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031794773739652623, + "loss": 3.3766, + "step": 28050 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003178656190210795, + "loss": 3.4085, + "step": 28055 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003177834993022299, + "loss": 3.3648, + "step": 28060 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031770137824615227, + "loss": 3.4042, + "step": 28065 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003176192558590217, + "loss": 3.3609, + "step": 28070 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031753713214701334, + "loss": 3.4013, + "step": 28075 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031745500711630257, + "loss": 3.3054, + "step": 28080 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003173728807730647, + "loss": 3.373, + "step": 28085 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003172907531234752, + "loss": 3.3296, + "step": 28090 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031720862417370976, + "loss": 3.3439, + "step": 28095 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003171264939299439, + "loss": 3.3268, + "step": 28100 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031704436239835356, + "loss": 3.3792, + "step": 28105 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031696222958511443, + "loss": 3.3698, + "step": 28110 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031688009549640263, + "loss": 3.3603, + "step": 28115 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003167979601383941, + "loss": 3.2905, + "step": 28120 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031671582351726514, + "loss": 3.1407, + "step": 28125 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031663368563919183, + "loss": 3.3483, + "step": 28130 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003165515465103506, + "loss": 3.3586, + "step": 28135 + }, + { + "epoch": 0.49, + "learning_rate": 0.000316469406136918, + "loss": 3.3597, + "step": 28140 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003163872645250704, + "loss": 3.4144, + "step": 28145 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003163051216809845, + "loss": 3.3252, + "step": 28150 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031622297761083696, + "loss": 3.2158, + "step": 28155 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003161408323208047, + "loss": 3.268, + "step": 28160 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031605868581706457, + "loss": 3.2614, + "step": 28165 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003159765381057935, + "loss": 3.32, + "step": 28170 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031589438919316874, + "loss": 3.4281, + "step": 28175 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003158122390853673, + "loss": 3.4212, + "step": 28180 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003157300877885665, + "loss": 3.254, + "step": 28185 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031564793530894386, + "loss": 3.3829, + "step": 28190 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031556578165267647, + "loss": 3.2446, + "step": 28195 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031548362682594215, + "loss": 3.3505, + "step": 28200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003154014708349186, + "loss": 3.3956, + "step": 28205 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003153193136857832, + "loss": 3.3643, + "step": 28210 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031523715538471394, + "loss": 3.2072, + "step": 28215 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003151549959378887, + "loss": 3.3384, + "step": 28220 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031507283535148553, + "loss": 3.3184, + "step": 28225 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003149906736316823, + "loss": 3.3089, + "step": 28230 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031490851078465717, + "loss": 3.2242, + "step": 28235 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031482634681658856, + "loss": 3.3506, + "step": 28240 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031474418173365465, + "loss": 3.2773, + "step": 28245 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003146620155420337, + "loss": 3.3073, + "step": 28250 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003145798482479044, + "loss": 3.3887, + "step": 28255 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031449767985744525, + "loss": 3.2862, + "step": 28260 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003144155103768349, + "loss": 3.3459, + "step": 28265 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031433333981225204, + "loss": 3.3802, + "step": 28270 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031425116816987536, + "loss": 3.3579, + "step": 28275 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031416899545588404, + "loss": 3.4191, + "step": 28280 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003140868216764568, + "loss": 3.3477, + "step": 28285 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003140046468377728, + "loss": 3.3956, + "step": 28290 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031392247094601104, + "loss": 3.4055, + "step": 28295 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031384029400735087, + "loss": 3.3708, + "step": 28300 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031375811602797155, + "loss": 3.4004, + "step": 28305 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031367593701405236, + "loss": 3.307, + "step": 28310 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031359375697177284, + "loss": 3.3867, + "step": 28315 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031351157590731244, + "loss": 3.3628, + "step": 28320 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003134293938268508, + "loss": 3.3526, + "step": 28325 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031334721073656763, + "loss": 3.2619, + "step": 28330 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003132650266426425, + "loss": 3.2303, + "step": 28335 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031318284155125534, + "loss": 3.3438, + "step": 28340 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031310065546858606, + "loss": 3.3175, + "step": 28345 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031301846840081475, + "loss": 3.348, + "step": 28350 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003129362803541213, + "loss": 3.4054, + "step": 28355 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031285409133468574, + "loss": 3.2905, + "step": 28360 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031277190134868855, + "loss": 3.3457, + "step": 28365 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031268971040230973, + "loss": 3.2979, + "step": 28370 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031260751850172974, + "loss": 3.3189, + "step": 28375 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031252532565312894, + "loss": 3.2012, + "step": 28380 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003124431318626879, + "loss": 3.2903, + "step": 28385 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031236093713658705, + "loss": 3.394, + "step": 28390 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003122787414810072, + "loss": 3.376, + "step": 28395 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003121965449021288, + "loss": 3.424, + "step": 28400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003121143474061328, + "loss": 3.2631, + "step": 28405 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003120321489991999, + "loss": 3.3431, + "step": 28410 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003119499496875111, + "loss": 3.3405, + "step": 28415 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003118677494772473, + "loss": 3.288, + "step": 28420 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003117855483745896, + "loss": 3.3591, + "step": 28425 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003117033463857191, + "loss": 3.3612, + "step": 28430 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003116211435168168, + "loss": 3.2559, + "step": 28435 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031153893977406405, + "loss": 3.3729, + "step": 28440 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031145673516364225, + "loss": 3.3824, + "step": 28445 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031137452969173275, + "loss": 3.3824, + "step": 28450 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031129232336451676, + "loss": 3.3525, + "step": 28455 + }, + { + "epoch": 0.49, + "learning_rate": 0.000311210116188176, + "loss": 3.2539, + "step": 28460 + }, + { + "epoch": 0.49, + "learning_rate": 0.000311127908168892, + "loss": 3.2978, + "step": 28465 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003110456993128464, + "loss": 3.2967, + "step": 28470 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031096348962622063, + "loss": 3.352, + "step": 28475 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031088127911519666, + "loss": 3.3765, + "step": 28480 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031079906778595627, + "loss": 3.4548, + "step": 28485 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031071685564468146, + "loss": 3.2177, + "step": 28490 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031063464269755393, + "loss": 3.2067, + "step": 28495 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031055242895075574, + "loss": 3.2593, + "step": 28500 + }, + { + "epoch": 0.49, + "eval_loss": 3.359267473220825, + "eval_runtime": 149.9726, + "eval_samples_per_second": 12.276, + "eval_steps_per_second": 0.773, + "step": 28500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031047021441046906, + "loss": 3.3636, + "step": 28505 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003103879990828759, + "loss": 3.3139, + "step": 28510 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031030578297415836, + "loss": 3.379, + "step": 28515 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003102235660904988, + "loss": 3.3304, + "step": 28520 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031014134843807944, + "loss": 3.3501, + "step": 28525 + }, + { + "epoch": 0.49, + "learning_rate": 0.00031005913002308275, + "loss": 3.3744, + "step": 28530 + }, + { + "epoch": 0.49, + "learning_rate": 0.000309976910851691, + "loss": 3.2913, + "step": 28535 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003098946909300866, + "loss": 3.2803, + "step": 28540 + }, + { + "epoch": 0.49, + "learning_rate": 0.00030981247026445224, + "loss": 3.3851, + "step": 28545 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003097302488609704, + "loss": 3.2995, + "step": 28550 + }, + { + "epoch": 0.49, + "learning_rate": 0.00030964802672582364, + "loss": 3.356, + "step": 28555 + }, + { + "epoch": 0.49, + "learning_rate": 0.00030956580386519477, + "loss": 3.2917, + "step": 28560 + }, + { + "epoch": 0.49, + "learning_rate": 0.00030948358028526644, + "loss": 3.3783, + "step": 28565 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003094013559922215, + "loss": 3.2218, + "step": 28570 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003093191309922428, + "loss": 3.163, + "step": 28575 + }, + { + "epoch": 0.49, + "learning_rate": 0.00030923690529151315, + "loss": 3.3771, + "step": 28580 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003091546788962156, + "loss": 3.3725, + "step": 28585 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003090724518125331, + "loss": 3.3931, + "step": 28590 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003089902240466487, + "loss": 3.3111, + "step": 28595 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003089079956047456, + "loss": 3.3732, + "step": 28600 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030882576649300675, + "loss": 3.442, + "step": 28605 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003087435367176157, + "loss": 3.393, + "step": 28610 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003086613062847553, + "loss": 3.3046, + "step": 28615 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003085790752006091, + "loss": 3.3409, + "step": 28620 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003084968434713605, + "loss": 3.3822, + "step": 28625 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003084146111031927, + "loss": 3.2859, + "step": 28630 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003083323781022894, + "loss": 3.3541, + "step": 28635 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030825014447483383, + "loss": 3.286, + "step": 28640 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003081679102270098, + "loss": 3.3848, + "step": 28645 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030808567536500073, + "loss": 3.3427, + "step": 28650 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003080034398949903, + "loss": 3.2654, + "step": 28655 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003079212038231622, + "loss": 3.2767, + "step": 28660 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030783896715570015, + "loss": 3.2923, + "step": 28665 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030775672989878796, + "loss": 3.2185, + "step": 28670 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003076744920586095, + "loss": 3.317, + "step": 28675 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030759225364134846, + "loss": 3.2964, + "step": 28680 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003075100146531889, + "loss": 3.3482, + "step": 28685 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003074277751003148, + "loss": 3.456, + "step": 28690 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030734553498891, + "loss": 3.3257, + "step": 28695 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030726329432515865, + "loss": 3.4125, + "step": 28700 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030718105311524475, + "loss": 3.3209, + "step": 28705 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030709881136535246, + "loss": 3.2977, + "step": 28710 + }, + { + "epoch": 0.5, + "learning_rate": 0.000307016569081666, + "loss": 3.4025, + "step": 28715 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003069343262703694, + "loss": 3.2998, + "step": 28720 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030685208293764713, + "loss": 3.3609, + "step": 28725 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030676983908968324, + "loss": 3.3702, + "step": 28730 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030668759473266214, + "loss": 3.3413, + "step": 28735 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030660534987276823, + "loss": 3.3318, + "step": 28740 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030652310451618586, + "loss": 3.3335, + "step": 28745 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030644085866909953, + "loss": 3.1442, + "step": 28750 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030635861233769353, + "loss": 3.4118, + "step": 28755 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030627636552815247, + "loss": 3.3533, + "step": 28760 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003061941182466609, + "loss": 3.3464, + "step": 28765 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003061118704994035, + "loss": 3.3673, + "step": 28770 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003060296222925646, + "loss": 3.2905, + "step": 28775 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003059473736323291, + "loss": 3.3231, + "step": 28780 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003058651245248815, + "loss": 3.3518, + "step": 28785 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030578287497640675, + "loss": 3.2899, + "step": 28790 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003057006249930893, + "loss": 3.4217, + "step": 28795 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003056183745811141, + "loss": 3.3105, + "step": 28800 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030553612374666593, + "loss": 3.3716, + "step": 28805 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030545387249592964, + "loss": 3.3545, + "step": 28810 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030537162083509007, + "loss": 3.2589, + "step": 28815 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003052893687703321, + "loss": 3.3462, + "step": 28820 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030520711630784074, + "loss": 3.3051, + "step": 28825 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030512486345380096, + "loss": 3.2761, + "step": 28830 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003050426102143976, + "loss": 3.2527, + "step": 28835 + }, + { + "epoch": 0.5, + "learning_rate": 0.000304960356595816, + "loss": 3.2873, + "step": 28840 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030487810260424077, + "loss": 3.3043, + "step": 28845 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003047958482458574, + "loss": 3.2753, + "step": 28850 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003047135935268508, + "loss": 3.4139, + "step": 28855 + }, + { + "epoch": 0.5, + "learning_rate": 0.000304631338453406, + "loss": 3.2713, + "step": 28860 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030454908303170845, + "loss": 3.2992, + "step": 28865 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003044668272679431, + "loss": 3.3822, + "step": 28870 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030438457116829516, + "loss": 3.3267, + "step": 28875 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030430231473895, + "loss": 3.2699, + "step": 28880 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030422005798609277, + "loss": 3.2666, + "step": 28885 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003041378009159089, + "loss": 3.3945, + "step": 28890 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030405554353458357, + "loss": 3.2789, + "step": 28895 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030397328584830214, + "loss": 3.3467, + "step": 28900 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030389102786325, + "loss": 3.3268, + "step": 28905 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003038087695856125, + "loss": 3.3759, + "step": 28910 + }, + { + "epoch": 0.5, + "learning_rate": 0.000303726511021575, + "loss": 3.3726, + "step": 28915 + }, + { + "epoch": 0.5, + "learning_rate": 0.000303644252177323, + "loss": 3.3361, + "step": 28920 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030356199305904183, + "loss": 3.381, + "step": 28925 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003034797336729173, + "loss": 3.35, + "step": 28930 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003033974740251343, + "loss": 3.3854, + "step": 28935 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003033152141218788, + "loss": 3.359, + "step": 28940 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003032329539693362, + "loss": 3.2957, + "step": 28945 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003031506935736921, + "loss": 3.2719, + "step": 28950 + }, + { + "epoch": 0.5, + "learning_rate": 0.000303068432941132, + "loss": 3.2425, + "step": 28955 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030298617207784133, + "loss": 3.3014, + "step": 28960 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030290391099000585, + "loss": 3.2075, + "step": 28965 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030282164968381125, + "loss": 3.3241, + "step": 28970 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030273938816544293, + "loss": 3.2996, + "step": 28975 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003026571264410867, + "loss": 3.3114, + "step": 28980 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003025748645169282, + "loss": 3.2326, + "step": 28985 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003024926023991531, + "loss": 3.3887, + "step": 28990 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030241034009394706, + "loss": 3.4096, + "step": 28995 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003023280776074957, + "loss": 3.2992, + "step": 29000 + }, + { + "epoch": 0.5, + "eval_loss": 3.3513615131378174, + "eval_runtime": 150.0716, + "eval_samples_per_second": 12.267, + "eval_steps_per_second": 0.773, + "step": 29000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030224581494598497, + "loss": 3.2705, + "step": 29005 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030216355211560046, + "loss": 3.1582, + "step": 29010 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003020812891225279, + "loss": 3.3338, + "step": 29015 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030199902597295307, + "loss": 3.355, + "step": 29020 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030191676267306164, + "loss": 3.3007, + "step": 29025 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003018344992290396, + "loss": 3.3631, + "step": 29030 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030175223564707257, + "loss": 3.3573, + "step": 29035 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003016699719333463, + "loss": 3.3362, + "step": 29040 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030158770809404686, + "loss": 3.324, + "step": 29045 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030150544413535987, + "loss": 3.3532, + "step": 29050 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003014231800634711, + "loss": 3.3602, + "step": 29055 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030134091588456663, + "loss": 3.3985, + "step": 29060 + }, + { + "epoch": 0.5, + "learning_rate": 0.000301258651604832, + "loss": 3.3109, + "step": 29065 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030117638723045333, + "loss": 3.234, + "step": 29070 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030109412276761635, + "loss": 3.3347, + "step": 29075 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003010118582225069, + "loss": 3.334, + "step": 29080 + }, + { + "epoch": 0.5, + "learning_rate": 0.000300929593601311, + "loss": 3.5063, + "step": 29085 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003008473289102143, + "loss": 3.2729, + "step": 29090 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003007650641554029, + "loss": 3.3154, + "step": 29095 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030068279934306253, + "loss": 3.2814, + "step": 29100 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030060053447937915, + "loss": 3.2991, + "step": 29105 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030051826957053874, + "loss": 3.2739, + "step": 29110 + }, + { + "epoch": 0.5, + "learning_rate": 0.000300436004622727, + "loss": 3.3853, + "step": 29115 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003003537396421301, + "loss": 3.3941, + "step": 29120 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030027147463493364, + "loss": 3.2138, + "step": 29125 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030018920960732376, + "loss": 3.2438, + "step": 29130 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003001069445654863, + "loss": 3.1981, + "step": 29135 + }, + { + "epoch": 0.5, + "learning_rate": 0.00030002467951560706, + "loss": 3.2727, + "step": 29140 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029994241446387206, + "loss": 3.3465, + "step": 29145 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002998601494164673, + "loss": 3.3148, + "step": 29150 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002997778843795786, + "loss": 3.2841, + "step": 29155 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029969561935939173, + "loss": 3.3489, + "step": 29160 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002996133543620928, + "loss": 3.2372, + "step": 29165 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002995310893938676, + "loss": 3.2644, + "step": 29170 + }, + { + "epoch": 0.5, + "learning_rate": 0.00029944882446090207, + "loss": 3.2412, + "step": 29175 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002993665595693822, + "loss": 3.3525, + "step": 29180 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002992842947254937, + "loss": 3.2611, + "step": 29185 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029920202993542267, + "loss": 3.3687, + "step": 29190 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002991197652053548, + "loss": 3.4074, + "step": 29195 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002990375005414761, + "loss": 3.3301, + "step": 29200 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002989552359499725, + "loss": 3.2824, + "step": 29205 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002988729714370297, + "loss": 3.3212, + "step": 29210 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002987907070088338, + "loss": 3.3241, + "step": 29215 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002987084426715704, + "loss": 3.3126, + "step": 29220 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002986261784314255, + "loss": 3.2104, + "step": 29225 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002985439142945851, + "loss": 3.3995, + "step": 29230 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002984616502672348, + "loss": 3.3807, + "step": 29235 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029837938635556043, + "loss": 3.2622, + "step": 29240 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002982971225657479, + "loss": 3.2761, + "step": 29245 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029821485890398313, + "loss": 3.3754, + "step": 29250 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029813259537645186, + "loss": 3.4256, + "step": 29255 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002980503319893397, + "loss": 3.3391, + "step": 29260 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029796806874883265, + "loss": 3.3717, + "step": 29265 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029788580566111645, + "loss": 3.3585, + "step": 29270 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002978035427323767, + "loss": 3.3437, + "step": 29275 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002977212799687995, + "loss": 3.3646, + "step": 29280 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029763901737657017, + "loss": 3.2832, + "step": 29285 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002975567549618747, + "loss": 3.1794, + "step": 29290 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002974744927308987, + "loss": 3.3764, + "step": 29295 + }, + { + "epoch": 0.51, + "learning_rate": 0.000297392230689828, + "loss": 3.3258, + "step": 29300 + }, + { + "epoch": 0.51, + "learning_rate": 0.000297309968844848, + "loss": 3.3501, + "step": 29305 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002972277072021448, + "loss": 3.3172, + "step": 29310 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002971454457679037, + "loss": 3.3571, + "step": 29315 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002970631845483104, + "loss": 3.2708, + "step": 29320 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002969809235495506, + "loss": 3.4065, + "step": 29325 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029689866277780993, + "loss": 3.2756, + "step": 29330 + }, + { + "epoch": 0.51, + "learning_rate": 0.000296816402239274, + "loss": 3.3239, + "step": 29335 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029673414194012816, + "loss": 3.284, + "step": 29340 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029665188188655807, + "loss": 3.3406, + "step": 29345 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002965696220847494, + "loss": 3.338, + "step": 29350 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002964873625408875, + "loss": 3.3738, + "step": 29355 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002964051032611581, + "loss": 3.1847, + "step": 29360 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002963228442517463, + "loss": 3.3021, + "step": 29365 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002962405855188378, + "loss": 3.3367, + "step": 29370 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029615832706861815, + "loss": 3.2833, + "step": 29375 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029607606890727246, + "loss": 3.2644, + "step": 29380 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002959938110409864, + "loss": 3.4083, + "step": 29385 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002959115534759452, + "loss": 3.3432, + "step": 29390 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002958292962183342, + "loss": 3.2759, + "step": 29395 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002957470392743387, + "loss": 3.2871, + "step": 29400 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029566478265014403, + "loss": 3.2679, + "step": 29405 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029558252635193563, + "loss": 3.3223, + "step": 29410 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002955002703858986, + "loss": 3.2751, + "step": 29415 + }, + { + "epoch": 0.51, + "learning_rate": 0.000295418014758218, + "loss": 3.3319, + "step": 29420 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029533575947507927, + "loss": 3.3044, + "step": 29425 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002952535045426676, + "loss": 3.2715, + "step": 29430 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029517124996716816, + "loss": 3.147, + "step": 29435 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002950889957547658, + "loss": 3.3518, + "step": 29440 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002950067419116457, + "loss": 3.2539, + "step": 29445 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029492448844399327, + "loss": 3.26, + "step": 29450 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002948422353579931, + "loss": 3.3907, + "step": 29455 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002947599826598306, + "loss": 3.299, + "step": 29460 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002946777303556904, + "loss": 3.3628, + "step": 29465 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002945954784517575, + "loss": 3.2462, + "step": 29470 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029451322695421713, + "loss": 3.3751, + "step": 29475 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002944309758692538, + "loss": 3.2978, + "step": 29480 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029434872520305277, + "loss": 3.3679, + "step": 29485 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029426647496179847, + "loss": 3.2476, + "step": 29490 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029418422515167596, + "loss": 3.3317, + "step": 29495 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002941019757788698, + "loss": 3.3763, + "step": 29500 + }, + { + "epoch": 0.51, + "eval_loss": 3.340322494506836, + "eval_runtime": 149.8765, + "eval_samples_per_second": 12.283, + "eval_steps_per_second": 0.774, + "step": 29500 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002940197268495649, + "loss": 3.2741, + "step": 29505 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029393747836994595, + "loss": 3.2172, + "step": 29510 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002938552303461975, + "loss": 3.282, + "step": 29515 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002937729827845042, + "loss": 3.3107, + "step": 29520 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029369073569105066, + "loss": 3.1937, + "step": 29525 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029360848907202154, + "loss": 3.3407, + "step": 29530 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002935262429336013, + "loss": 3.3098, + "step": 29535 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002934439972819744, + "loss": 3.4239, + "step": 29540 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002933617521233252, + "loss": 3.2941, + "step": 29545 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002932795074638383, + "loss": 3.1586, + "step": 29550 + }, + { + "epoch": 0.51, + "learning_rate": 0.000293197263309698, + "loss": 3.3078, + "step": 29555 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002931150196670886, + "loss": 3.303, + "step": 29560 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029303277654219457, + "loss": 3.1798, + "step": 29565 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002929505339411999, + "loss": 3.3173, + "step": 29570 + }, + { + "epoch": 0.51, + "learning_rate": 0.000292868291870289, + "loss": 3.4086, + "step": 29575 + }, + { + "epoch": 0.51, + "learning_rate": 0.000292786050335646, + "loss": 3.4117, + "step": 29580 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029270380934345495, + "loss": 3.1563, + "step": 29585 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029262156889990024, + "loss": 3.3599, + "step": 29590 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002925393290111656, + "loss": 3.3668, + "step": 29595 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029245708968343514, + "loss": 3.3687, + "step": 29600 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029237485092289294, + "loss": 3.2999, + "step": 29605 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002922926127357229, + "loss": 3.2961, + "step": 29610 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029221037512810886, + "loss": 3.2225, + "step": 29615 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029212813810623473, + "loss": 3.2368, + "step": 29620 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002920459016762842, + "loss": 3.2402, + "step": 29625 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002919636658444411, + "loss": 3.3306, + "step": 29630 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029188143061688917, + "loss": 3.2542, + "step": 29635 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029179919599981217, + "loss": 3.3902, + "step": 29640 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002917169619993934, + "loss": 3.1883, + "step": 29645 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002916347286218167, + "loss": 3.3464, + "step": 29650 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002915524958732656, + "loss": 3.254, + "step": 29655 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002914702637599234, + "loss": 3.3093, + "step": 29660 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029138803228797384, + "loss": 3.3814, + "step": 29665 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029130580146359997, + "loss": 3.3697, + "step": 29670 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029122357129298537, + "loss": 3.3207, + "step": 29675 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002911413417823131, + "loss": 3.3784, + "step": 29680 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029105911293776654, + "loss": 3.326, + "step": 29685 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029097688476552904, + "loss": 3.4872, + "step": 29690 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002908946572717834, + "loss": 3.2418, + "step": 29695 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002908124304627129, + "loss": 3.383, + "step": 29700 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002907302043445005, + "loss": 3.4032, + "step": 29705 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002906479789233293, + "loss": 3.2928, + "step": 29710 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002905657542053822, + "loss": 3.2659, + "step": 29715 + }, + { + "epoch": 0.51, + "learning_rate": 0.000290483530196842, + "loss": 3.3656, + "step": 29720 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002904013069038915, + "loss": 3.3738, + "step": 29725 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029031908433271354, + "loss": 3.3268, + "step": 29730 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029023686248949083, + "loss": 3.2683, + "step": 29735 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029015464138040613, + "loss": 3.447, + "step": 29740 + }, + { + "epoch": 0.51, + "learning_rate": 0.00029007242101164183, + "loss": 3.2916, + "step": 29745 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002899902013893806, + "loss": 3.2699, + "step": 29750 + }, + { + "epoch": 0.51, + "learning_rate": 0.00028990798251980494, + "loss": 3.319, + "step": 29755 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028982576440909723, + "loss": 3.2191, + "step": 29760 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028974354706344003, + "loss": 3.3557, + "step": 29765 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002896613304890154, + "loss": 3.3007, + "step": 29770 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028957911469200577, + "loss": 3.3596, + "step": 29775 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028949689967859327, + "loss": 3.3322, + "step": 29780 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002894146854549601, + "loss": 3.4363, + "step": 29785 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002893324720272884, + "loss": 3.3202, + "step": 29790 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002892502594017601, + "loss": 3.3451, + "step": 29795 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002891680475845572, + "loss": 3.3622, + "step": 29800 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002890858365818616, + "loss": 3.2766, + "step": 29805 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002890036263998551, + "loss": 3.348, + "step": 29810 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002889214170447196, + "loss": 3.3661, + "step": 29815 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028883920852263686, + "loss": 3.275, + "step": 29820 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002887570008397883, + "loss": 3.3184, + "step": 29825 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028867479400235567, + "loss": 3.154, + "step": 29830 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002885925880165205, + "loss": 3.2861, + "step": 29835 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002885103828884642, + "loss": 3.2612, + "step": 29840 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002884281786243684, + "loss": 3.3055, + "step": 29845 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028834597523041413, + "loss": 3.3218, + "step": 29850 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028826377271278284, + "loss": 3.3651, + "step": 29855 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028818157107765563, + "loss": 3.3722, + "step": 29860 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002880993703312137, + "loss": 3.3252, + "step": 29865 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002880171704796383, + "loss": 3.2013, + "step": 29870 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002879349715291101, + "loss": 3.3523, + "step": 29875 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028785277348581024, + "loss": 3.2081, + "step": 29880 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002877705763559196, + "loss": 3.3849, + "step": 29885 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028768838014561884, + "loss": 3.3127, + "step": 29890 + }, + { + "epoch": 0.52, + "learning_rate": 0.000287606184861089, + "loss": 3.4062, + "step": 29895 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002875239905085104, + "loss": 3.2684, + "step": 29900 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002874417970940638, + "loss": 3.4488, + "step": 29905 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028735960462392964, + "loss": 3.3177, + "step": 29910 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002872774131042885, + "loss": 3.2003, + "step": 29915 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002871952225413208, + "loss": 3.2894, + "step": 29920 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028711303294120654, + "loss": 3.2948, + "step": 29925 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002870308443101262, + "loss": 3.2572, + "step": 29930 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028694865665425997, + "loss": 3.309, + "step": 29935 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028686646997978777, + "loss": 3.3262, + "step": 29940 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002867842842928899, + "loss": 3.3213, + "step": 29945 + }, + { + "epoch": 0.52, + "learning_rate": 0.000286702099599746, + "loss": 3.1783, + "step": 29950 + }, + { + "epoch": 0.52, + "learning_rate": 0.000286619915906536, + "loss": 3.3147, + "step": 29955 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002865377332194399, + "loss": 3.2999, + "step": 29960 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002864555515446371, + "loss": 3.1541, + "step": 29965 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002863733708883076, + "loss": 3.2152, + "step": 29970 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028629119125663064, + "loss": 3.2402, + "step": 29975 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002862090126557859, + "loss": 3.4334, + "step": 29980 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028612683509195267, + "loss": 3.2809, + "step": 29985 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028604465857131033, + "loss": 3.4323, + "step": 29990 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002859624831000383, + "loss": 3.3852, + "step": 29995 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002858803086843155, + "loss": 3.3623, + "step": 30000 + }, + { + "epoch": 0.52, + "eval_loss": 3.3538341522216797, + "eval_runtime": 149.973, + "eval_samples_per_second": 12.276, + "eval_steps_per_second": 0.773, + "step": 30000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002857981353303211, + "loss": 3.3106, + "step": 30005 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002857159630442341, + "loss": 3.4119, + "step": 30010 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028563379183223355, + "loss": 3.3264, + "step": 30015 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002855516217004982, + "loss": 3.2663, + "step": 30020 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028546945265520694, + "loss": 3.349, + "step": 30025 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028538728470253825, + "loss": 3.3401, + "step": 30030 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002853051178486709, + "loss": 3.4039, + "step": 30035 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028522295209978336, + "loss": 3.3008, + "step": 30040 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002851407874620541, + "loss": 3.356, + "step": 30045 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028505862394166156, + "loss": 3.1896, + "step": 30050 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028497646154478383, + "loss": 3.3458, + "step": 30055 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002848943002775992, + "loss": 3.3157, + "step": 30060 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002848121401462858, + "loss": 3.3277, + "step": 30065 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028472998115702157, + "loss": 3.4176, + "step": 30070 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028464782331598463, + "loss": 3.3079, + "step": 30075 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028456566662935265, + "loss": 3.2774, + "step": 30080 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002844835111033034, + "loss": 3.3172, + "step": 30085 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028440135674401453, + "loss": 3.1986, + "step": 30090 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002843192035576638, + "loss": 3.3215, + "step": 30095 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002842370515504286, + "loss": 3.3483, + "step": 30100 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028415490072848627, + "loss": 3.3005, + "step": 30105 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002840727510980142, + "loss": 3.2421, + "step": 30110 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002839906026651897, + "loss": 3.2194, + "step": 30115 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002839084554361897, + "loss": 3.3219, + "step": 30120 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002838263094171916, + "loss": 3.2303, + "step": 30125 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028374416461437194, + "loss": 3.226, + "step": 30130 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028366202103390786, + "loss": 3.2418, + "step": 30135 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002835798786819761, + "loss": 3.1874, + "step": 30140 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002834977375647532, + "loss": 3.3188, + "step": 30145 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002834155976884161, + "loss": 3.1829, + "step": 30150 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002833334590591408, + "loss": 3.3849, + "step": 30155 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028325132168310413, + "loss": 3.3192, + "step": 30160 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002831691855664821, + "loss": 3.3352, + "step": 30165 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002830870507154511, + "loss": 3.3424, + "step": 30170 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002830049171361873, + "loss": 3.23, + "step": 30175 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028292278483486665, + "loss": 3.257, + "step": 30180 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002828406538176649, + "loss": 3.3752, + "step": 30185 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002827585240907581, + "loss": 3.2752, + "step": 30190 + }, + { + "epoch": 0.52, + "learning_rate": 0.000282676395660322, + "loss": 3.2853, + "step": 30195 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028259426853253215, + "loss": 3.3301, + "step": 30200 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028251214271356406, + "loss": 3.258, + "step": 30205 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002824300182095932, + "loss": 3.2661, + "step": 30210 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028234789502679493, + "loss": 3.3113, + "step": 30215 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002822657731713445, + "loss": 3.327, + "step": 30220 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002821836526494171, + "loss": 3.2908, + "step": 30225 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002821015334671877, + "loss": 3.3357, + "step": 30230 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028201941563083113, + "loss": 3.394, + "step": 30235 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002819372991465225, + "loss": 3.3686, + "step": 30240 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002818551840204363, + "loss": 3.4033, + "step": 30245 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028177307025874725, + "loss": 3.3786, + "step": 30250 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028169095786763007, + "loss": 3.2397, + "step": 30255 + }, + { + "epoch": 0.52, + "learning_rate": 0.000281608846853259, + "loss": 3.3121, + "step": 30260 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002815267372218083, + "loss": 3.2628, + "step": 30265 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028144462897945223, + "loss": 3.3744, + "step": 30270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002813625221323651, + "loss": 3.2426, + "step": 30275 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002812804166867208, + "loss": 3.4355, + "step": 30280 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002811983126486931, + "loss": 3.245, + "step": 30285 + }, + { + "epoch": 0.52, + "learning_rate": 0.000281116210024456, + "loss": 3.3295, + "step": 30290 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002810341088201831, + "loss": 3.3174, + "step": 30295 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028095200904204804, + "loss": 3.1724, + "step": 30300 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002808699106962244, + "loss": 3.2641, + "step": 30305 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028078781378888524, + "loss": 3.2328, + "step": 30310 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028070571832620404, + "loss": 3.2577, + "step": 30315 + }, + { + "epoch": 0.52, + "learning_rate": 0.00028062362431435404, + "loss": 3.3785, + "step": 30320 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002805415317595081, + "loss": 3.2063, + "step": 30325 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002804594406678394, + "loss": 3.3733, + "step": 30330 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002803773510455205, + "loss": 3.3888, + "step": 30335 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002802952628987243, + "loss": 3.2477, + "step": 30340 + }, + { + "epoch": 0.53, + "learning_rate": 0.00028021317623362326, + "loss": 3.3568, + "step": 30345 + }, + { + "epoch": 0.53, + "learning_rate": 0.00028013109105638995, + "loss": 3.284, + "step": 30350 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002800490073731969, + "loss": 3.3265, + "step": 30355 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002799669251902162, + "loss": 3.242, + "step": 30360 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027988484451361994, + "loss": 3.391, + "step": 30365 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027980276534958037, + "loss": 3.3313, + "step": 30370 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027972068770426936, + "loss": 3.2538, + "step": 30375 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002796386115838587, + "loss": 3.3219, + "step": 30380 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027955653699452006, + "loss": 3.3282, + "step": 30385 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002794744639424251, + "loss": 3.3222, + "step": 30390 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027939239243374517, + "loss": 3.3605, + "step": 30395 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002793103224746518, + "loss": 3.296, + "step": 30400 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027922825407131625, + "loss": 3.3659, + "step": 30405 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002791461872299094, + "loss": 3.4053, + "step": 30410 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027906412195660235, + "loss": 3.3414, + "step": 30415 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027898205825756614, + "loss": 3.2646, + "step": 30420 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027889999613897137, + "loss": 3.2449, + "step": 30425 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027881793560698885, + "loss": 3.3722, + "step": 30430 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027873587666778893, + "loss": 3.2959, + "step": 30435 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027865381932754214, + "loss": 3.2453, + "step": 30440 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002785717635924187, + "loss": 3.305, + "step": 30445 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027848970946858884, + "loss": 3.337, + "step": 30450 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002784076569622227, + "loss": 3.2714, + "step": 30455 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027832560607948993, + "loss": 3.3498, + "step": 30460 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027824355682656057, + "loss": 3.2591, + "step": 30465 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002781615092096042, + "loss": 3.3538, + "step": 30470 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002780794632347904, + "loss": 3.3029, + "step": 30475 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027799741890828874, + "loss": 3.4062, + "step": 30480 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027791537623626843, + "loss": 3.2511, + "step": 30485 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002778333352248985, + "loss": 3.2801, + "step": 30490 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027775129588034823, + "loss": 3.3308, + "step": 30495 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027766925820878647, + "loss": 3.3449, + "step": 30500 + }, + { + "epoch": 0.53, + "eval_loss": 3.326901435852051, + "eval_runtime": 149.9761, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 30500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027758722221638205, + "loss": 3.2604, + "step": 30505 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002775051879093038, + "loss": 3.3171, + "step": 30510 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027742315529371994, + "loss": 3.3948, + "step": 30515 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027734112437579917, + "loss": 3.3968, + "step": 30520 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027725909516170964, + "loss": 3.2516, + "step": 30525 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027717706765761967, + "loss": 3.323, + "step": 30530 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027709504186969737, + "loss": 3.3059, + "step": 30535 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027701301780411045, + "loss": 3.2289, + "step": 30540 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002769309954670267, + "loss": 3.3094, + "step": 30545 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002768489748646139, + "loss": 3.3621, + "step": 30550 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027676695600303955, + "loss": 3.409, + "step": 30555 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002766849388884711, + "loss": 3.3604, + "step": 30560 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027660292352707576, + "loss": 3.2526, + "step": 30565 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002765209099250206, + "loss": 3.2604, + "step": 30570 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002764388980884726, + "loss": 3.269, + "step": 30575 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002763568880235989, + "loss": 3.3991, + "step": 30580 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027627487973656606, + "loss": 3.368, + "step": 30585 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002761928732335405, + "loss": 3.2534, + "step": 30590 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002761108685206889, + "loss": 3.2685, + "step": 30595 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027602886560417763, + "loss": 3.3091, + "step": 30600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027594686449017276, + "loss": 3.3925, + "step": 30605 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027586486518484056, + "loss": 3.3245, + "step": 30610 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027578286769434663, + "loss": 3.3573, + "step": 30615 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027570087202485703, + "loss": 3.3753, + "step": 30620 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027561887818253726, + "loss": 3.287, + "step": 30625 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002755368861735529, + "loss": 3.3379, + "step": 30630 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002754548960040694, + "loss": 3.2588, + "step": 30635 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002753729076802519, + "loss": 3.2747, + "step": 30640 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002752909212082655, + "loss": 3.2554, + "step": 30645 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027520893659427516, + "loss": 3.3417, + "step": 30650 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027512695384444575, + "loss": 3.2509, + "step": 30655 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027504497296494207, + "loss": 3.2155, + "step": 30660 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027496299396192846, + "loss": 3.3851, + "step": 30665 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002748810168415693, + "loss": 3.2289, + "step": 30670 + }, + { + "epoch": 0.53, + "learning_rate": 0.000274799041610029, + "loss": 3.3077, + "step": 30675 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002747170682734716, + "loss": 3.2613, + "step": 30680 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002746350968380613, + "loss": 3.3562, + "step": 30685 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002745531273099615, + "loss": 3.2328, + "step": 30690 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002744711596953361, + "loss": 3.3524, + "step": 30695 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002743891940003487, + "loss": 3.3606, + "step": 30700 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027430723023116264, + "loss": 3.2986, + "step": 30705 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002742252683939412, + "loss": 3.2556, + "step": 30710 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002741433084948476, + "loss": 3.28, + "step": 30715 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027406135054004456, + "loss": 3.2094, + "step": 30720 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027397939453569506, + "loss": 3.393, + "step": 30725 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002738974404879617, + "loss": 3.2284, + "step": 30730 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027381548840300706, + "loss": 3.2894, + "step": 30735 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002737335382869936, + "loss": 3.3592, + "step": 30740 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002736515901460834, + "loss": 3.2587, + "step": 30745 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027356964398643854, + "loss": 3.2823, + "step": 30750 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027348769981422094, + "loss": 3.3475, + "step": 30755 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027340575763559256, + "loss": 3.3608, + "step": 30760 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002733238174567149, + "loss": 3.3273, + "step": 30765 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002732418792837494, + "loss": 3.241, + "step": 30770 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027315994312285746, + "loss": 3.293, + "step": 30775 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027307800898020027, + "loss": 3.1938, + "step": 30780 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027299607686193876, + "loss": 3.3112, + "step": 30785 + }, + { + "epoch": 0.53, + "learning_rate": 0.000272914146774234, + "loss": 3.3405, + "step": 30790 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002728322187232465, + "loss": 3.2946, + "step": 30795 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027275029271513695, + "loss": 3.3734, + "step": 30800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027266836875606567, + "loss": 3.3478, + "step": 30805 + }, + { + "epoch": 0.53, + "learning_rate": 0.000272586446852193, + "loss": 3.2108, + "step": 30810 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002725045270096792, + "loss": 3.2706, + "step": 30815 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002724226092346839, + "loss": 3.3201, + "step": 30820 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002723406935333671, + "loss": 3.2457, + "step": 30825 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002722587799118883, + "loss": 3.2723, + "step": 30830 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027217686837640714, + "loss": 3.3165, + "step": 30835 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027209495893308296, + "loss": 3.2665, + "step": 30840 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027201305158807483, + "loss": 3.3003, + "step": 30845 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002719311463475418, + "loss": 3.2617, + "step": 30850 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002718492432176426, + "loss": 3.3741, + "step": 30855 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002717673422045361, + "loss": 3.4239, + "step": 30860 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002716854433143809, + "loss": 3.3527, + "step": 30865 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002716035465533351, + "loss": 3.3153, + "step": 30870 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027152165192755707, + "loss": 3.2969, + "step": 30875 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027143975944320494, + "loss": 3.3132, + "step": 30880 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027135786910643646, + "loss": 3.238, + "step": 30885 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002712759809234096, + "loss": 3.3268, + "step": 30890 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002711940949002816, + "loss": 3.3004, + "step": 30895 + }, + { + "epoch": 0.53, + "learning_rate": 0.00027111221104321004, + "loss": 3.3266, + "step": 30900 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002710303293583522, + "loss": 3.3325, + "step": 30905 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002709484498518651, + "loss": 3.3649, + "step": 30910 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002708665725299058, + "loss": 3.3601, + "step": 30915 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002707846973986308, + "loss": 3.3293, + "step": 30920 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002707028244641969, + "loss": 3.2142, + "step": 30925 + }, + { + "epoch": 0.54, + "learning_rate": 0.00027062095373276036, + "loss": 3.1972, + "step": 30930 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002705390852104776, + "loss": 3.2593, + "step": 30935 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002704572189035047, + "loss": 3.3917, + "step": 30940 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002703753548179976, + "loss": 3.2728, + "step": 30945 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002702934929601118, + "loss": 3.4312, + "step": 30950 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002702116333360031, + "loss": 3.3181, + "step": 30955 + }, + { + "epoch": 0.54, + "learning_rate": 0.000270129775951827, + "loss": 3.3649, + "step": 30960 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002700479208137386, + "loss": 3.3491, + "step": 30965 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002699660679278932, + "loss": 3.194, + "step": 30970 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026988421730044545, + "loss": 3.3569, + "step": 30975 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002698023689375502, + "loss": 3.1581, + "step": 30980 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002697205228453621, + "loss": 3.2575, + "step": 30985 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026963867903003543, + "loss": 3.2312, + "step": 30990 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026955683749772466, + "loss": 3.3679, + "step": 30995 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002694749982545835, + "loss": 3.2296, + "step": 31000 + }, + { + "epoch": 0.54, + "eval_loss": 3.32407808303833, + "eval_runtime": 149.9754, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 31000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026939316130676616, + "loss": 3.2916, + "step": 31005 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026931132666042614, + "loss": 3.316, + "step": 31010 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002692294943217171, + "loss": 3.183, + "step": 31015 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002691476642967925, + "loss": 3.3019, + "step": 31020 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002690658365918054, + "loss": 3.1547, + "step": 31025 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026898401121290875, + "loss": 3.3068, + "step": 31030 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002689021881662555, + "loss": 3.2116, + "step": 31035 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002688203674579984, + "loss": 3.2976, + "step": 31040 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002687385490942899, + "loss": 3.3166, + "step": 31045 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026865673308128224, + "loss": 3.2547, + "step": 31050 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002685749194251276, + "loss": 3.3457, + "step": 31055 + }, + { + "epoch": 0.54, + "learning_rate": 0.000268493108131978, + "loss": 3.2873, + "step": 31060 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026841129920798513, + "loss": 3.3012, + "step": 31065 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026832949265930087, + "loss": 3.1554, + "step": 31070 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002682476884920763, + "loss": 3.4911, + "step": 31075 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002681658867124628, + "loss": 3.2928, + "step": 31080 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002680840873266115, + "loss": 3.0558, + "step": 31085 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026800229034067326, + "loss": 3.2551, + "step": 31090 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002679204957607989, + "loss": 3.2682, + "step": 31095 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002678387035931387, + "loss": 3.3801, + "step": 31100 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002677569138438433, + "loss": 3.195, + "step": 31105 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026767512651906263, + "loss": 3.2651, + "step": 31110 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002675933416249468, + "loss": 3.3951, + "step": 31115 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026751155916764573, + "loss": 3.3247, + "step": 31120 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026742977915330884, + "loss": 3.3234, + "step": 31125 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026734800158808555, + "loss": 3.3103, + "step": 31130 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026726622647812526, + "loss": 3.3172, + "step": 31135 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026718445382957695, + "loss": 3.4098, + "step": 31140 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026710268364858963, + "loss": 3.3378, + "step": 31145 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026702091594131187, + "loss": 3.3174, + "step": 31150 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026693915071389216, + "loss": 3.2108, + "step": 31155 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026685738797247886, + "loss": 3.3841, + "step": 31160 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026677562772322016, + "loss": 3.4254, + "step": 31165 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002666938699722641, + "loss": 3.3229, + "step": 31170 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002666121147257582, + "loss": 3.1803, + "step": 31175 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026653036198985016, + "loss": 3.2278, + "step": 31180 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026644861177068746, + "loss": 3.2767, + "step": 31185 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002663668640744171, + "loss": 3.3793, + "step": 31190 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002662851189071862, + "loss": 3.2449, + "step": 31195 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026620337627514165, + "loss": 3.3237, + "step": 31200 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026612163618443003, + "loss": 3.3144, + "step": 31205 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002660398986411976, + "loss": 3.3152, + "step": 31210 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002659581636515908, + "loss": 3.2679, + "step": 31215 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002658764312217557, + "loss": 3.2625, + "step": 31220 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002657947013578381, + "loss": 3.2968, + "step": 31225 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002657129740659836, + "loss": 3.2104, + "step": 31230 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002656312493523377, + "loss": 3.331, + "step": 31235 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002655495272230457, + "loss": 3.2342, + "step": 31240 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002654678076842528, + "loss": 3.2085, + "step": 31245 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002653860907421038, + "loss": 3.311, + "step": 31250 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002653043764027433, + "loss": 3.2037, + "step": 31255 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002652226646723159, + "loss": 3.1527, + "step": 31260 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002651409555569659, + "loss": 3.271, + "step": 31265 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002650592490628373, + "loss": 3.3012, + "step": 31270 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026497754519607433, + "loss": 3.3312, + "step": 31275 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002648958439628202, + "loss": 3.3229, + "step": 31280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026481414536921893, + "loss": 3.2941, + "step": 31285 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026473244942141344, + "loss": 3.2946, + "step": 31290 + }, + { + "epoch": 0.54, + "learning_rate": 0.000264650756125547, + "loss": 3.3225, + "step": 31295 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002645690654877627, + "loss": 3.2401, + "step": 31300 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026448737751420305, + "loss": 3.328, + "step": 31305 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002644056922110105, + "loss": 3.3317, + "step": 31310 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002643240095843275, + "loss": 3.2121, + "step": 31315 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002642423296402962, + "loss": 3.2404, + "step": 31320 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026416065238505846, + "loss": 3.2064, + "step": 31325 + }, + { + "epoch": 0.54, + "learning_rate": 0.000264078977824756, + "loss": 3.261, + "step": 31330 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026399730596553023, + "loss": 3.174, + "step": 31335 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002639156368135226, + "loss": 3.3145, + "step": 31340 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026383397037487414, + "loss": 3.4055, + "step": 31345 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026375230665572584, + "loss": 3.2682, + "step": 31350 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002636706456622182, + "loss": 3.3942, + "step": 31355 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002635889874004918, + "loss": 3.1798, + "step": 31360 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026350733187668706, + "loss": 3.2755, + "step": 31365 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026342567909694385, + "loss": 3.1718, + "step": 31370 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026334402906740223, + "loss": 3.1946, + "step": 31375 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026326238179420167, + "loss": 3.2054, + "step": 31380 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002631807372834818, + "loss": 3.257, + "step": 31385 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002630990955413817, + "loss": 3.3149, + "step": 31390 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026301745657404045, + "loss": 3.3137, + "step": 31395 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026293582038759716, + "loss": 3.3703, + "step": 31400 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026285418698819004, + "loss": 3.2994, + "step": 31405 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026277255638195774, + "loss": 3.2856, + "step": 31410 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026269092857503836, + "loss": 3.3122, + "step": 31415 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026260930357357, + "loss": 3.2749, + "step": 31420 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026252768138369044, + "loss": 3.161, + "step": 31425 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002624460620115372, + "loss": 3.2431, + "step": 31430 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002623644454632475, + "loss": 3.2436, + "step": 31435 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002622828317449587, + "loss": 3.3021, + "step": 31440 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026220122086280764, + "loss": 3.2542, + "step": 31445 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002621196128229311, + "loss": 3.2806, + "step": 31450 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002620380076314656, + "loss": 3.2708, + "step": 31455 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026195640529454726, + "loss": 3.3919, + "step": 31460 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002618748058183123, + "loss": 3.2865, + "step": 31465 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002617932092088965, + "loss": 3.2749, + "step": 31470 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002617116154724356, + "loss": 3.2656, + "step": 31475 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026163002461506503, + "loss": 3.212, + "step": 31480 + }, + { + "epoch": 0.54, + "learning_rate": 0.00026154843664291997, + "loss": 3.2677, + "step": 31485 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002614668515621354, + "loss": 3.2452, + "step": 31490 + }, + { + "epoch": 0.55, + "learning_rate": 0.00026138526937884604, + "loss": 3.2805, + "step": 31495 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002613036900991865, + "loss": 3.3033, + "step": 31500 + }, + { + "epoch": 0.55, + "eval_loss": 3.316983938217163, + "eval_runtime": 150.2836, + "eval_samples_per_second": 12.25, + "eval_steps_per_second": 0.772, + "step": 31500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00026122211372929137, + "loss": 3.3315, + "step": 31505 + }, + { + "epoch": 0.55, + "learning_rate": 0.00026114054027529443, + "loss": 3.3347, + "step": 31510 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002610589697433297, + "loss": 3.3488, + "step": 31515 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002609774021395308, + "loss": 3.3674, + "step": 31520 + }, + { + "epoch": 0.55, + "learning_rate": 0.00026089583747003134, + "loss": 3.1443, + "step": 31525 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002608142757409646, + "loss": 3.3235, + "step": 31530 + }, + { + "epoch": 0.55, + "learning_rate": 0.00026073271695846337, + "loss": 3.2616, + "step": 31535 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002606511611286606, + "loss": 3.328, + "step": 31540 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002605696082576889, + "loss": 3.339, + "step": 31545 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002604880583516805, + "loss": 3.2483, + "step": 31550 + }, + { + "epoch": 0.55, + "learning_rate": 0.00026040651141676774, + "loss": 3.3018, + "step": 31555 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002603249674590823, + "loss": 3.3246, + "step": 31560 + }, + { + "epoch": 0.55, + "learning_rate": 0.00026024342648475594, + "loss": 3.4165, + "step": 31565 + }, + { + "epoch": 0.55, + "learning_rate": 0.00026016188849992025, + "loss": 3.2257, + "step": 31570 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002600803535107063, + "loss": 3.2111, + "step": 31575 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002599988215232453, + "loss": 3.2358, + "step": 31580 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025991729254366786, + "loss": 3.3897, + "step": 31585 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025983576657810463, + "loss": 3.2663, + "step": 31590 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002597542436326858, + "loss": 3.2689, + "step": 31595 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025967272371354164, + "loss": 3.2168, + "step": 31600 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025959120682680215, + "loss": 3.2391, + "step": 31605 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025950969297859665, + "loss": 3.2852, + "step": 31610 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025942818217505474, + "loss": 3.3367, + "step": 31615 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002593466744223056, + "loss": 3.283, + "step": 31620 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025926516972647827, + "loss": 3.3326, + "step": 31625 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002591836680937014, + "loss": 3.3459, + "step": 31630 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025910216953010343, + "loss": 3.4016, + "step": 31635 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025902067404181276, + "loss": 3.2182, + "step": 31640 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002589391816349574, + "loss": 3.2924, + "step": 31645 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002588576923156651, + "loss": 3.3283, + "step": 31650 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002587762060900634, + "loss": 3.346, + "step": 31655 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025869472296428, + "loss": 3.1884, + "step": 31660 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025861324294444157, + "loss": 3.2707, + "step": 31665 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002585317660366752, + "loss": 3.2709, + "step": 31670 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025845029224710745, + "loss": 3.3347, + "step": 31675 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002583688215818648, + "loss": 3.268, + "step": 31680 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025828735404707356, + "loss": 3.2481, + "step": 31685 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002582058896488594, + "loss": 3.3265, + "step": 31690 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002581244283933481, + "loss": 3.2392, + "step": 31695 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002580429702866652, + "loss": 3.3131, + "step": 31700 + }, + { + "epoch": 0.55, + "learning_rate": 0.000257961515334936, + "loss": 3.2571, + "step": 31705 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002578800635442854, + "loss": 3.3224, + "step": 31710 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025779861492083805, + "loss": 3.3081, + "step": 31715 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002577171694707186, + "loss": 3.3205, + "step": 31720 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025763572720005135, + "loss": 3.1753, + "step": 31725 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025755428811496023, + "loss": 3.2786, + "step": 31730 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002574728522215693, + "loss": 3.1329, + "step": 31735 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025739141952600176, + "loss": 3.1854, + "step": 31740 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002573099900343811, + "loss": 3.2668, + "step": 31745 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002572285637528305, + "loss": 3.3433, + "step": 31750 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025714714068747263, + "loss": 3.4145, + "step": 31755 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025706572084443027, + "loss": 3.4166, + "step": 31760 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025698430422982555, + "loss": 3.2479, + "step": 31765 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025690289084978076, + "loss": 3.2204, + "step": 31770 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025682148071041767, + "loss": 3.1905, + "step": 31775 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002567400738178579, + "loss": 3.3027, + "step": 31780 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002566586701782231, + "loss": 3.3398, + "step": 31785 + }, + { + "epoch": 0.55, + "learning_rate": 0.000256577269797634, + "loss": 3.3163, + "step": 31790 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002564958726822117, + "loss": 3.2045, + "step": 31795 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002564144788380767, + "loss": 3.313, + "step": 31800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025633308827134973, + "loss": 3.2722, + "step": 31805 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025625170098815065, + "loss": 3.3247, + "step": 31810 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025617031699459947, + "loss": 3.3358, + "step": 31815 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025608893629681576, + "loss": 3.0963, + "step": 31820 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025600755890091897, + "loss": 3.2378, + "step": 31825 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002559261848130284, + "loss": 3.2526, + "step": 31830 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002558448140392629, + "loss": 3.3501, + "step": 31835 + }, + { + "epoch": 0.55, + "learning_rate": 0.000255763446585741, + "loss": 3.1792, + "step": 31840 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025568208245858116, + "loss": 3.27, + "step": 31845 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025560072166390163, + "loss": 3.1158, + "step": 31850 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002555193642078203, + "loss": 3.289, + "step": 31855 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002554380100964549, + "loss": 3.2798, + "step": 31860 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025535665933592265, + "loss": 3.3566, + "step": 31865 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002552753119323409, + "loss": 3.3437, + "step": 31870 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025519396789182644, + "loss": 3.2963, + "step": 31875 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002551126272204959, + "loss": 3.3405, + "step": 31880 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025503128992446586, + "loss": 3.2588, + "step": 31885 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002549499560098524, + "loss": 3.2521, + "step": 31890 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025486862548277126, + "loss": 3.2144, + "step": 31895 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002547872983493382, + "loss": 3.3133, + "step": 31900 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025470597461566864, + "loss": 3.2138, + "step": 31905 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025462465428787763, + "loss": 3.2776, + "step": 31910 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002545433373720801, + "loss": 3.2242, + "step": 31915 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002544620238743906, + "loss": 3.2321, + "step": 31920 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025438071380092355, + "loss": 3.2964, + "step": 31925 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025429940715779304, + "loss": 3.2827, + "step": 31930 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002542181039511128, + "loss": 3.265, + "step": 31935 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025413680418699675, + "loss": 3.2511, + "step": 31940 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025405550787155774, + "loss": 3.3294, + "step": 31945 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002539742150109092, + "loss": 3.3519, + "step": 31950 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002538929256111637, + "loss": 3.3194, + "step": 31955 + }, + { + "epoch": 0.55, + "learning_rate": 0.000253811639678434, + "loss": 3.3208, + "step": 31960 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025373035721883234, + "loss": 3.2799, + "step": 31965 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002536490782384706, + "loss": 3.3969, + "step": 31970 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025356780274346063, + "loss": 3.261, + "step": 31975 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025348653073991386, + "loss": 3.1804, + "step": 31980 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002534052622339417, + "loss": 3.322, + "step": 31985 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025332399723165507, + "loss": 3.2808, + "step": 31990 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002532427357391646, + "loss": 3.266, + "step": 31995 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002531614777625807, + "loss": 3.3233, + "step": 32000 + }, + { + "epoch": 0.55, + "eval_loss": 3.3087377548217773, + "eval_runtime": 149.8766, + "eval_samples_per_second": 12.283, + "eval_steps_per_second": 0.774, + "step": 32000 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002530802233080136, + "loss": 3.4058, + "step": 32005 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025299897238157337, + "loss": 3.3462, + "step": 32010 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002529177249893696, + "loss": 3.3441, + "step": 32015 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002528364811375115, + "loss": 3.3583, + "step": 32020 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002527552408321083, + "loss": 3.2635, + "step": 32025 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025267400407926886, + "loss": 3.2558, + "step": 32030 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002525927708851018, + "loss": 3.3147, + "step": 32035 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025251154125571545, + "loss": 3.2104, + "step": 32040 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025243031519721777, + "loss": 3.4015, + "step": 32045 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025234909271571663, + "loss": 3.1764, + "step": 32050 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002522678738173195, + "loss": 3.282, + "step": 32055 + }, + { + "epoch": 0.55, + "learning_rate": 0.00025218665850813355, + "loss": 3.2438, + "step": 32060 + }, + { + "epoch": 0.55, + "learning_rate": 0.000252105446794266, + "loss": 3.2136, + "step": 32065 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002520242386818233, + "loss": 3.3125, + "step": 32070 + }, + { + "epoch": 0.56, + "learning_rate": 0.000251943034176912, + "loss": 3.3091, + "step": 32075 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025186183328563817, + "loss": 3.2398, + "step": 32080 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002517806360141078, + "loss": 3.2099, + "step": 32085 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025169944236842664, + "loss": 3.3446, + "step": 32090 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025161825235469975, + "loss": 3.3282, + "step": 32095 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002515370659790323, + "loss": 3.2385, + "step": 32100 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025145588324752914, + "loss": 3.3214, + "step": 32105 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002513747041662948, + "loss": 3.2566, + "step": 32110 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025129352874143343, + "loss": 3.329, + "step": 32115 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025121235697904925, + "loss": 3.2985, + "step": 32120 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025113118888524564, + "loss": 3.2731, + "step": 32125 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002510500244661263, + "loss": 3.2967, + "step": 32130 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002509688637277941, + "loss": 3.2774, + "step": 32135 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025088770667635213, + "loss": 3.2615, + "step": 32140 + }, + { + "epoch": 0.56, + "learning_rate": 0.000250806553317903, + "loss": 3.2842, + "step": 32145 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025072540365854895, + "loss": 3.1892, + "step": 32150 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002506442577043919, + "loss": 3.1842, + "step": 32155 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025056311546153376, + "loss": 3.246, + "step": 32160 + }, + { + "epoch": 0.56, + "learning_rate": 0.000250481976936076, + "loss": 3.3429, + "step": 32165 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025040084213411983, + "loss": 3.2132, + "step": 32170 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002503197110617661, + "loss": 3.3142, + "step": 32175 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002502385837251155, + "loss": 3.2998, + "step": 32180 + }, + { + "epoch": 0.56, + "learning_rate": 0.00025015746013026834, + "loss": 3.1796, + "step": 32185 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002500763402833248, + "loss": 3.2882, + "step": 32190 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024999522419038466, + "loss": 3.2778, + "step": 32195 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002499141118575473, + "loss": 3.2608, + "step": 32200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024983300329091206, + "loss": 3.2805, + "step": 32205 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024975189849657785, + "loss": 3.3096, + "step": 32210 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002496707974806434, + "loss": 3.2804, + "step": 32215 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002495897002492071, + "loss": 3.3137, + "step": 32220 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002495086068083669, + "loss": 3.3505, + "step": 32225 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002494275171642207, + "loss": 3.2128, + "step": 32230 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024934643132286605, + "loss": 3.3917, + "step": 32235 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024926534929040013, + "loss": 3.2804, + "step": 32240 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024918427107292003, + "loss": 3.309, + "step": 32245 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024910319667652227, + "loss": 3.2208, + "step": 32250 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024902212610730326, + "loss": 3.3962, + "step": 32255 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002489410593713591, + "loss": 3.3368, + "step": 32260 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002488599964747856, + "loss": 3.2554, + "step": 32265 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002487789374236784, + "loss": 3.3785, + "step": 32270 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024869788222413254, + "loss": 3.2223, + "step": 32275 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002486168308822429, + "loss": 3.1955, + "step": 32280 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002485357834041043, + "loss": 3.2661, + "step": 32285 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002484547397958111, + "loss": 3.3976, + "step": 32290 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002483737000634574, + "loss": 3.3036, + "step": 32295 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024829266421313664, + "loss": 3.2412, + "step": 32300 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024821163225094265, + "loss": 3.2499, + "step": 32305 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024813060418296846, + "loss": 3.2339, + "step": 32310 + }, + { + "epoch": 0.56, + "learning_rate": 0.000248049580015307, + "loss": 3.2515, + "step": 32315 + }, + { + "epoch": 0.56, + "learning_rate": 0.000247968559754051, + "loss": 3.2655, + "step": 32320 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002478875434052925, + "loss": 3.1095, + "step": 32325 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002478065309751237, + "loss": 3.322, + "step": 32330 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002477255224696364, + "loss": 3.2905, + "step": 32335 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002476445178949218, + "loss": 3.2749, + "step": 32340 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024756351725707107, + "loss": 3.2526, + "step": 32345 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024748252056217533, + "loss": 3.3218, + "step": 32350 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002474015278163248, + "loss": 3.3869, + "step": 32355 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002473205390256097, + "loss": 3.2438, + "step": 32360 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024723955419612016, + "loss": 3.3304, + "step": 32365 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002471585733339458, + "loss": 3.2835, + "step": 32370 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002470775964451759, + "loss": 3.1467, + "step": 32375 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002469966235358995, + "loss": 3.322, + "step": 32380 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002469156546122053, + "loss": 3.1773, + "step": 32385 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024683468968018197, + "loss": 3.1448, + "step": 32390 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002467537287459174, + "loss": 3.2498, + "step": 32395 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024667277181549967, + "loss": 3.2981, + "step": 32400 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024659181889501603, + "loss": 3.3788, + "step": 32405 + }, + { + "epoch": 0.56, + "learning_rate": 0.000246510869990554, + "loss": 3.2933, + "step": 32410 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002464299251082004, + "loss": 3.3849, + "step": 32415 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002463489842540418, + "loss": 3.3092, + "step": 32420 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024626804743416484, + "loss": 3.3249, + "step": 32425 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002461871146546551, + "loss": 3.2616, + "step": 32430 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024610618592159876, + "loss": 3.1845, + "step": 32435 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002460252612410809, + "loss": 3.3216, + "step": 32440 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002459443406191868, + "loss": 3.3316, + "step": 32445 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002458634240620013, + "loss": 3.1841, + "step": 32450 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024578251157560886, + "loss": 3.3593, + "step": 32455 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024570160316609365, + "loss": 3.263, + "step": 32460 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024562069883953956, + "loss": 3.2655, + "step": 32465 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024553979860203034, + "loss": 3.1221, + "step": 32470 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002454589024596492, + "loss": 3.1684, + "step": 32475 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024537801041847894, + "loss": 3.2231, + "step": 32480 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002452971224846024, + "loss": 3.3318, + "step": 32485 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002452162386641019, + "loss": 3.2675, + "step": 32490 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002451353589630595, + "loss": 3.2606, + "step": 32495 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024505448338755706, + "loss": 3.2408, + "step": 32500 + }, + { + "epoch": 0.56, + "eval_loss": 3.3065123558044434, + "eval_runtime": 149.9745, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 32500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024497361194367564, + "loss": 3.2712, + "step": 32505 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002448927446374967, + "loss": 3.2798, + "step": 32510 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024481188147510097, + "loss": 3.2018, + "step": 32515 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002447310224625689, + "loss": 3.2207, + "step": 32520 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024465016760598075, + "loss": 3.3062, + "step": 32525 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024456931691141625, + "loss": 3.2997, + "step": 32530 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024448847038495506, + "loss": 3.2389, + "step": 32535 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002444076280326764, + "loss": 3.2905, + "step": 32540 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024432678986065914, + "loss": 3.1733, + "step": 32545 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024424595587498217, + "loss": 3.2917, + "step": 32550 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002441651260817235, + "loss": 3.262, + "step": 32555 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002440843004869611, + "loss": 3.2987, + "step": 32560 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002440034790967727, + "loss": 3.3115, + "step": 32565 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024392266191723583, + "loss": 3.2615, + "step": 32570 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002438418489544273, + "loss": 3.3166, + "step": 32575 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024376104021442408, + "loss": 3.3056, + "step": 32580 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024368023570330225, + "loss": 3.236, + "step": 32585 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024359943542713805, + "loss": 3.1418, + "step": 32590 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024351863939200735, + "loss": 3.3424, + "step": 32595 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024343784760398545, + "loss": 3.2941, + "step": 32600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024335706006914766, + "loss": 3.2504, + "step": 32605 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024327627679356854, + "loss": 3.1296, + "step": 32610 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024319549778332273, + "loss": 3.1769, + "step": 32615 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024311472304448432, + "loss": 3.2691, + "step": 32620 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024303395258312724, + "loss": 3.2445, + "step": 32625 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024295318640532506, + "loss": 3.2382, + "step": 32630 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024287242451715086, + "loss": 3.1444, + "step": 32635 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002427916669246775, + "loss": 3.3026, + "step": 32640 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024271091363397757, + "loss": 3.2935, + "step": 32645 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024263016465112344, + "loss": 3.1797, + "step": 32650 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024254941998218694, + "loss": 3.2365, + "step": 32655 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024246867963323952, + "loss": 3.3014, + "step": 32660 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024238794361035253, + "loss": 3.2298, + "step": 32665 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024230721191959703, + "loss": 3.2835, + "step": 32670 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002422264845670434, + "loss": 3.2102, + "step": 32675 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002421457615587622, + "loss": 3.329, + "step": 32680 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024206504290082313, + "loss": 3.2735, + "step": 32685 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002419843285992959, + "loss": 3.2008, + "step": 32690 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002419036186602499, + "loss": 3.263, + "step": 32695 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024182291308975394, + "loss": 3.3097, + "step": 32700 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024174221189387693, + "loss": 3.202, + "step": 32705 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024166151507868687, + "loss": 3.2625, + "step": 32710 + }, + { + "epoch": 0.57, + "learning_rate": 0.000241580822650252, + "loss": 3.2967, + "step": 32715 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002415001346146398, + "loss": 3.1632, + "step": 32720 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024141945097791765, + "loss": 3.2288, + "step": 32725 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024133877174615267, + "loss": 3.2721, + "step": 32730 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002412580969254114, + "loss": 3.2332, + "step": 32735 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024117742652176007, + "loss": 3.2993, + "step": 32740 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024109676054126486, + "loss": 3.3246, + "step": 32745 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024101609898999145, + "loss": 3.2481, + "step": 32750 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024093544187400511, + "loss": 3.1872, + "step": 32755 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024085478919937082, + "loss": 3.1915, + "step": 32760 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002407741409721532, + "loss": 3.2906, + "step": 32765 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024069349719841663, + "loss": 3.3152, + "step": 32770 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024061285788422524, + "loss": 3.3742, + "step": 32775 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024053222303564264, + "loss": 3.2066, + "step": 32780 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024045159265873194, + "loss": 3.2123, + "step": 32785 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024037096675955634, + "loss": 3.3466, + "step": 32790 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024029034534417848, + "loss": 3.3364, + "step": 32795 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024020972841866056, + "loss": 3.263, + "step": 32800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024012911598906466, + "loss": 3.138, + "step": 32805 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024004850806145256, + "loss": 3.308, + "step": 32810 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002399679046418853, + "loss": 3.3403, + "step": 32815 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002398873057364239, + "loss": 3.3254, + "step": 32820 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023980671135112906, + "loss": 3.3335, + "step": 32825 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023972612149206107, + "loss": 3.1811, + "step": 32830 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023964553616527987, + "loss": 3.2545, + "step": 32835 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023956495537684504, + "loss": 3.2524, + "step": 32840 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023948437913281576, + "loss": 3.187, + "step": 32845 + }, + { + "epoch": 0.57, + "learning_rate": 0.000239403807439251, + "loss": 3.234, + "step": 32850 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023932324030220947, + "loss": 3.3456, + "step": 32855 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002392426777277494, + "loss": 3.3587, + "step": 32860 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023916211972192843, + "loss": 3.1395, + "step": 32865 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023908156629080427, + "loss": 3.3324, + "step": 32870 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002390010174404342, + "loss": 3.2773, + "step": 32875 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023892047317687493, + "loss": 3.3637, + "step": 32880 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023883993350618316, + "loss": 3.216, + "step": 32885 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023875939843441484, + "loss": 3.2905, + "step": 32890 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023867886796762598, + "loss": 3.2365, + "step": 32895 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002385983421118719, + "loss": 3.2825, + "step": 32900 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002385178208732078, + "loss": 3.2922, + "step": 32905 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023843730425768863, + "loss": 3.1794, + "step": 32910 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023835679227136858, + "loss": 3.2932, + "step": 32915 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023827628492030184, + "loss": 3.3405, + "step": 32920 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002381957822105421, + "loss": 3.3375, + "step": 32925 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023811528414814285, + "loss": 3.1655, + "step": 32930 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002380347907391571, + "loss": 3.2732, + "step": 32935 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002379543019896375, + "loss": 3.2293, + "step": 32940 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023787381790563636, + "loss": 3.2525, + "step": 32945 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023779333849320572, + "loss": 3.2663, + "step": 32950 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002377128637583973, + "loss": 3.328, + "step": 32955 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023763239370726237, + "loss": 3.2032, + "step": 32960 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023755192834585165, + "loss": 3.2314, + "step": 32965 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002374714676802159, + "loss": 3.3097, + "step": 32970 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023739101171640537, + "loss": 3.2052, + "step": 32975 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023731056046046985, + "loss": 3.3136, + "step": 32980 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023723011391845903, + "loss": 3.3039, + "step": 32985 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002371496720964218, + "loss": 3.2194, + "step": 32990 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002370692350004072, + "loss": 3.2503, + "step": 32995 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023698880263646357, + "loss": 3.2548, + "step": 33000 + }, + { + "epoch": 0.57, + "eval_loss": 3.29677677154541, + "eval_runtime": 149.7729, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.775, + "step": 33000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023690837501063905, + "loss": 3.2068, + "step": 33005 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002368279521289815, + "loss": 3.3154, + "step": 33010 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002367475339975381, + "loss": 3.3981, + "step": 33015 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023666712062235602, + "loss": 3.1803, + "step": 33020 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002365867120094818, + "loss": 3.2611, + "step": 33025 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002365063081649619, + "loss": 3.331, + "step": 33030 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023642590909484236, + "loss": 3.2699, + "step": 33035 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023634551480516852, + "loss": 3.2775, + "step": 33040 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023626512530198574, + "loss": 3.2124, + "step": 33045 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023618474059133887, + "loss": 3.267, + "step": 33050 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023610436067927253, + "loss": 3.3642, + "step": 33055 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023602398557183074, + "loss": 3.288, + "step": 33060 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002359436152750575, + "loss": 3.2004, + "step": 33065 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023586324979499597, + "loss": 3.1586, + "step": 33070 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023578288913768942, + "loss": 3.2883, + "step": 33075 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023570253330918044, + "loss": 3.2835, + "step": 33080 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023562218231551143, + "loss": 3.328, + "step": 33085 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002355418361627245, + "loss": 3.1934, + "step": 33090 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023546149485686104, + "loss": 3.3223, + "step": 33095 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002353811584039625, + "loss": 3.26, + "step": 33100 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023530082681006957, + "loss": 3.3357, + "step": 33105 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023522050008122292, + "loss": 3.3325, + "step": 33110 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002351401782234628, + "loss": 3.2175, + "step": 33115 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002350598612428288, + "loss": 3.2222, + "step": 33120 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023497954914536033, + "loss": 3.3419, + "step": 33125 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023489924193709658, + "loss": 3.2388, + "step": 33130 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023481893962407622, + "loss": 3.2658, + "step": 33135 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023473864221233766, + "loss": 3.3275, + "step": 33140 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023465834970791863, + "loss": 3.3339, + "step": 33145 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002345780621168568, + "loss": 3.2137, + "step": 33150 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023449777944518948, + "loss": 3.1605, + "step": 33155 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002344175016989534, + "loss": 3.2205, + "step": 33160 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002343372288841852, + "loss": 3.3544, + "step": 33165 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023425696100692078, + "loss": 3.3108, + "step": 33170 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023417669807319598, + "loss": 3.2136, + "step": 33175 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023409644008904611, + "loss": 3.2587, + "step": 33180 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023401618706050617, + "loss": 3.3205, + "step": 33185 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023393593899361095, + "loss": 3.2509, + "step": 33190 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023385569589439445, + "loss": 3.2005, + "step": 33195 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023377545776889064, + "loss": 3.2256, + "step": 33200 + }, + { + "epoch": 0.57, + "learning_rate": 0.000233695224623133, + "loss": 3.231, + "step": 33205 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002336149964631546, + "loss": 3.2971, + "step": 33210 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023353477329498843, + "loss": 3.2918, + "step": 33215 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023345455512466658, + "loss": 3.2987, + "step": 33220 + }, + { + "epoch": 0.57, + "learning_rate": 0.00023337434195822108, + "loss": 3.3542, + "step": 33225 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023329413380168365, + "loss": 3.3258, + "step": 33230 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023321393066108557, + "loss": 3.2357, + "step": 33235 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023313373254245763, + "loss": 3.2245, + "step": 33240 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023305353945183023, + "loss": 3.3036, + "step": 33245 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023297335139523356, + "loss": 3.3151, + "step": 33250 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023289316837869743, + "loss": 3.2807, + "step": 33255 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023281299040825104, + "loss": 3.2591, + "step": 33260 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002327328174899236, + "loss": 3.3142, + "step": 33265 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023265264962974339, + "loss": 3.2627, + "step": 33270 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023257248683373878, + "loss": 3.3279, + "step": 33275 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023249232910793768, + "loss": 3.234, + "step": 33280 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002324121764583674, + "loss": 3.2011, + "step": 33285 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023233202889105507, + "loss": 3.2268, + "step": 33290 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023225188641202746, + "loss": 3.1476, + "step": 33295 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023217174902731078, + "loss": 3.2587, + "step": 33300 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023209161674293087, + "loss": 3.1833, + "step": 33305 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023201148956491336, + "loss": 3.1849, + "step": 33310 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023193136749928342, + "loss": 3.2485, + "step": 33315 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023185125055206592, + "loss": 3.2812, + "step": 33320 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023177113872928498, + "loss": 3.1795, + "step": 33325 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023169103203696473, + "loss": 3.3434, + "step": 33330 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023161093048112887, + "loss": 3.3059, + "step": 33335 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023153083406780045, + "loss": 3.3019, + "step": 33340 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023145074280300258, + "loss": 3.2374, + "step": 33345 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023137065669275739, + "loss": 3.2438, + "step": 33350 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002312905757430871, + "loss": 3.1102, + "step": 33355 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023121049996001344, + "loss": 3.3015, + "step": 33360 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002311304293495576, + "loss": 3.284, + "step": 33365 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023105036391774065, + "loss": 3.3114, + "step": 33370 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023097030367058284, + "loss": 3.2978, + "step": 33375 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023089024861410446, + "loss": 3.2116, + "step": 33380 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023081019875432517, + "loss": 3.3001, + "step": 33385 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023073015409726434, + "loss": 3.317, + "step": 33390 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023065011464894103, + "loss": 3.2739, + "step": 33395 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023057008041537362, + "loss": 3.2719, + "step": 33400 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002304900514025803, + "loss": 3.2217, + "step": 33405 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023041002761657888, + "loss": 3.32, + "step": 33410 + }, + { + "epoch": 0.58, + "learning_rate": 0.00023033000906338676, + "loss": 3.2321, + "step": 33415 + }, + { + "epoch": 0.58, + "learning_rate": 0.000230249995749021, + "loss": 3.2446, + "step": 33420 + }, + { + "epoch": 0.58, + "learning_rate": 0.000230169987679498, + "loss": 3.2752, + "step": 33425 + }, + { + "epoch": 0.58, + "learning_rate": 0.000230089984860834, + "loss": 3.1693, + "step": 33430 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002300099872990449, + "loss": 3.3488, + "step": 33435 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002299299950001461, + "loss": 3.2554, + "step": 33440 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022985000797015262, + "loss": 3.2451, + "step": 33445 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022977002621507893, + "loss": 3.0189, + "step": 33450 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022969004974093932, + "loss": 3.3202, + "step": 33455 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002296100785537477, + "loss": 3.2214, + "step": 33460 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022953011265951738, + "loss": 3.13, + "step": 33465 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002294501520642616, + "loss": 3.2021, + "step": 33470 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022937019677399267, + "loss": 3.2777, + "step": 33475 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022929024679472303, + "loss": 3.279, + "step": 33480 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022921030213246437, + "loss": 3.3105, + "step": 33485 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002291303627932282, + "loss": 3.2978, + "step": 33490 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002290504287830257, + "loss": 3.2142, + "step": 33495 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022897050010786725, + "loss": 3.2172, + "step": 33500 + }, + { + "epoch": 0.58, + "eval_loss": 3.2933146953582764, + "eval_runtime": 149.7693, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.775, + "step": 33500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022889057677376312, + "loss": 3.3861, + "step": 33505 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002288106587867232, + "loss": 3.1333, + "step": 33510 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022873074615275695, + "loss": 3.2619, + "step": 33515 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022865083887787328, + "loss": 3.2127, + "step": 33520 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022857093696808102, + "loss": 3.3108, + "step": 33525 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022849104042938805, + "loss": 3.3339, + "step": 33530 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002284111492678024, + "loss": 3.2757, + "step": 33535 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002283312634893315, + "loss": 3.2759, + "step": 33540 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002282513830999822, + "loss": 3.2947, + "step": 33545 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022817150810576134, + "loss": 3.2764, + "step": 33550 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022809163851267482, + "loss": 3.2314, + "step": 33555 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022801177432672863, + "loss": 3.2722, + "step": 33560 + }, + { + "epoch": 0.58, + "learning_rate": 0.000227931915553928, + "loss": 3.1763, + "step": 33565 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022785206220027798, + "loss": 3.3469, + "step": 33570 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022777221427178326, + "loss": 3.1805, + "step": 33575 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002276923717744478, + "loss": 3.1481, + "step": 33580 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022761253471427536, + "loss": 3.2782, + "step": 33585 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022753270309726933, + "loss": 3.3483, + "step": 33590 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022745287692943266, + "loss": 3.2516, + "step": 33595 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022737305621676787, + "loss": 3.2987, + "step": 33600 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022729324096527708, + "loss": 3.2064, + "step": 33605 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022721343118096185, + "loss": 3.3007, + "step": 33610 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022713362686982357, + "loss": 3.2484, + "step": 33615 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002270538280378632, + "loss": 3.2258, + "step": 33620 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022697403469108116, + "loss": 3.306, + "step": 33625 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022689424683547735, + "loss": 3.1254, + "step": 33630 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022681446447705153, + "loss": 3.1907, + "step": 33635 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022673468762180295, + "loss": 3.2499, + "step": 33640 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022665491627573033, + "loss": 3.2333, + "step": 33645 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022657515044483223, + "loss": 3.1616, + "step": 33650 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002264953901351064, + "loss": 3.1961, + "step": 33655 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002264156353525506, + "loss": 3.3953, + "step": 33660 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022633588610316185, + "loss": 3.2886, + "step": 33665 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022625614239293692, + "loss": 3.3128, + "step": 33670 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022617640422787226, + "loss": 3.318, + "step": 33675 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022609667161396357, + "loss": 3.1784, + "step": 33680 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022601694455720648, + "loss": 3.3266, + "step": 33685 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022593722306359594, + "loss": 3.2716, + "step": 33690 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002258575071391267, + "loss": 3.2966, + "step": 33695 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022577779678979303, + "loss": 3.2999, + "step": 33700 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002256980920215886, + "loss": 3.1279, + "step": 33705 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022561839284050678, + "loss": 3.3071, + "step": 33710 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022553869925254064, + "loss": 3.2625, + "step": 33715 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002254590112636828, + "loss": 3.2814, + "step": 33720 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002253793288799253, + "loss": 3.25, + "step": 33725 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022529965210725972, + "loss": 3.2336, + "step": 33730 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002252199809516775, + "loss": 3.3184, + "step": 33735 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022514031541916955, + "loss": 3.3121, + "step": 33740 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022506065551572614, + "loss": 3.319, + "step": 33745 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022498100124733738, + "loss": 3.2058, + "step": 33750 + }, + { + "epoch": 0.58, + "learning_rate": 0.000224901352619993, + "loss": 3.2683, + "step": 33755 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022482170963968195, + "loss": 3.3285, + "step": 33760 + }, + { + "epoch": 0.58, + "learning_rate": 0.000224742072312393, + "loss": 3.3569, + "step": 33765 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002246624406441145, + "loss": 3.2333, + "step": 33770 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022458281464083442, + "loss": 3.1978, + "step": 33775 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002245031943085402, + "loss": 3.1741, + "step": 33780 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022442357965321889, + "loss": 3.0947, + "step": 33785 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022434397068085693, + "loss": 3.2276, + "step": 33790 + }, + { + "epoch": 0.58, + "learning_rate": 0.00022426436739744068, + "loss": 3.1653, + "step": 33795 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002241847698089559, + "loss": 3.3112, + "step": 33800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022410517792138801, + "loss": 3.1834, + "step": 33805 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022402559174072162, + "loss": 3.2776, + "step": 33810 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002239460112729413, + "loss": 3.1643, + "step": 33815 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022386643652403133, + "loss": 3.3707, + "step": 33820 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022378686749997505, + "loss": 3.2157, + "step": 33825 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002237073042067559, + "loss": 3.2477, + "step": 33830 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022362774665035632, + "loss": 3.2571, + "step": 33835 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022354819483675885, + "loss": 3.169, + "step": 33840 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022346864877194526, + "loss": 3.2855, + "step": 33845 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022338910846189707, + "loss": 3.2139, + "step": 33850 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022330957391259544, + "loss": 3.331, + "step": 33855 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002232300451300207, + "loss": 3.3466, + "step": 33860 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022315052212015316, + "loss": 3.2267, + "step": 33865 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022307100488897243, + "loss": 3.1652, + "step": 33870 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002229914934424579, + "loss": 3.2467, + "step": 33875 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022291198778658853, + "loss": 3.3519, + "step": 33880 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022283248792734256, + "loss": 3.1367, + "step": 33885 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022275299387069798, + "loss": 3.2817, + "step": 33890 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022267350562263233, + "loss": 3.1939, + "step": 33895 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022259402318912283, + "loss": 3.1678, + "step": 33900 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022251454657614615, + "loss": 3.2199, + "step": 33905 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022243507578967837, + "loss": 3.289, + "step": 33910 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022235561083569532, + "loss": 3.3205, + "step": 33915 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022227615172017254, + "loss": 3.2538, + "step": 33920 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002221966984490847, + "loss": 3.1713, + "step": 33925 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022211725102840659, + "loss": 3.1795, + "step": 33930 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022203780946411188, + "loss": 3.2756, + "step": 33935 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002219583737621744, + "loss": 3.2947, + "step": 33940 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022187894392856726, + "loss": 3.2601, + "step": 33945 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002217995199692631, + "loss": 3.2433, + "step": 33950 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022172010189023446, + "loss": 3.1556, + "step": 33955 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022164068969745282, + "loss": 3.3248, + "step": 33960 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022156128339688978, + "loss": 3.2672, + "step": 33965 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022148188299451623, + "loss": 3.1232, + "step": 33970 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002214024884963026, + "loss": 3.3216, + "step": 33975 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022132309990821914, + "loss": 3.3364, + "step": 33980 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022124371723623542, + "loss": 3.3008, + "step": 33985 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002211643404863204, + "loss": 3.1925, + "step": 33990 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022108496966444295, + "loss": 3.1433, + "step": 33995 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002210056047765714, + "loss": 3.1537, + "step": 34000 + }, + { + "epoch": 0.59, + "eval_loss": 3.282989501953125, + "eval_runtime": 151.8721, + "eval_samples_per_second": 12.122, + "eval_steps_per_second": 0.764, + "step": 34000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022092624582867348, + "loss": 3.2759, + "step": 34005 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022084689282671677, + "loss": 3.3142, + "step": 34010 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022076754577666794, + "loss": 3.3801, + "step": 34015 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022068820468449366, + "loss": 3.283, + "step": 34020 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022060886955615986, + "loss": 3.2798, + "step": 34025 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022052954039763218, + "loss": 3.1747, + "step": 34030 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022045021721487593, + "loss": 3.2212, + "step": 34035 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022037090001385552, + "loss": 3.3031, + "step": 34040 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002202915888005354, + "loss": 3.219, + "step": 34045 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022021228358087922, + "loss": 3.2706, + "step": 34050 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022013298436085045, + "loss": 3.2221, + "step": 34055 + }, + { + "epoch": 0.59, + "learning_rate": 0.00022005369114641205, + "loss": 3.2401, + "step": 34060 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002199744039435263, + "loss": 3.2445, + "step": 34065 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002198951227581552, + "loss": 3.2795, + "step": 34070 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002198158475962603, + "loss": 3.3098, + "step": 34075 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021973657846380282, + "loss": 3.2402, + "step": 34080 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021965731536674333, + "loss": 3.2837, + "step": 34085 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002195780583110419, + "loss": 3.2242, + "step": 34090 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002194988073026583, + "loss": 3.3176, + "step": 34095 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021941956234755185, + "loss": 3.2141, + "step": 34100 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021934032345168132, + "loss": 3.2779, + "step": 34105 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021926109062100525, + "loss": 3.189, + "step": 34110 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021918186386148122, + "loss": 3.2255, + "step": 34115 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021910264317906685, + "loss": 3.2848, + "step": 34120 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021902342857971918, + "loss": 3.125, + "step": 34125 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002189442200693946, + "loss": 3.2491, + "step": 34130 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002188650176540494, + "loss": 3.2903, + "step": 34135 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021878582133963895, + "loss": 3.1633, + "step": 34140 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002187066311321186, + "loss": 3.183, + "step": 34145 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021862744703744286, + "loss": 3.227, + "step": 34150 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002185482690615661, + "loss": 3.1407, + "step": 34155 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002184690972104422, + "loss": 3.259, + "step": 34160 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002183899314900243, + "loss": 3.2113, + "step": 34165 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002183107719062652, + "loss": 3.2791, + "step": 34170 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021823161846511746, + "loss": 3.2624, + "step": 34175 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021815247117253298, + "loss": 3.2633, + "step": 34180 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002180733300344632, + "loss": 3.1451, + "step": 34185 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021799419505685915, + "loss": 3.2792, + "step": 34190 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021791506624567128, + "loss": 3.2983, + "step": 34195 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002178359436068498, + "loss": 3.0929, + "step": 34200 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021775682714634428, + "loss": 3.2645, + "step": 34205 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021767771687010386, + "loss": 3.2737, + "step": 34210 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002175986127840773, + "loss": 3.2691, + "step": 34215 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021751951489421266, + "loss": 3.2498, + "step": 34220 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021744042320645787, + "loss": 3.3095, + "step": 34225 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021736133772676006, + "loss": 3.3028, + "step": 34230 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021728225846106612, + "loss": 3.2369, + "step": 34235 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021720318541532262, + "loss": 3.245, + "step": 34240 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021712411859547512, + "loss": 3.2824, + "step": 34245 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021704505800746913, + "loss": 3.3172, + "step": 34250 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021696600365724966, + "loss": 3.2327, + "step": 34255 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002168869555507612, + "loss": 3.198, + "step": 34260 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021680791369394783, + "loss": 3.2636, + "step": 34265 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021672887809275285, + "loss": 3.317, + "step": 34270 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021664984875311948, + "loss": 3.2125, + "step": 34275 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002165708256809904, + "loss": 3.2339, + "step": 34280 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002164918088823076, + "loss": 3.2303, + "step": 34285 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002164127983630129, + "loss": 3.2303, + "step": 34290 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021633379412904727, + "loss": 3.3098, + "step": 34295 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021625479618635153, + "loss": 3.296, + "step": 34300 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021617580454086599, + "loss": 3.1825, + "step": 34305 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002160968191985303, + "loss": 3.2411, + "step": 34310 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021601784016528396, + "loss": 3.2966, + "step": 34315 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021593886744706548, + "loss": 3.2849, + "step": 34320 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021585990104981345, + "loss": 3.2534, + "step": 34325 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021578094097946557, + "loss": 3.2557, + "step": 34330 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021570198724195933, + "loss": 3.2512, + "step": 34335 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021562303984323176, + "loss": 3.2774, + "step": 34340 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021554409878921904, + "loss": 3.2232, + "step": 34345 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021546516408585726, + "loss": 3.28, + "step": 34350 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002153862357390819, + "loss": 3.2287, + "step": 34355 + }, + { + "epoch": 0.59, + "learning_rate": 0.000215307313754828, + "loss": 3.2912, + "step": 34360 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021522839813903013, + "loss": 3.2309, + "step": 34365 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021514948889762224, + "loss": 3.2931, + "step": 34370 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021507058603653783, + "loss": 3.2682, + "step": 34375 + }, + { + "epoch": 0.59, + "learning_rate": 0.00021499168956171016, + "loss": 3.2441, + "step": 34380 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021491279947907178, + "loss": 3.3051, + "step": 34385 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002148339157945549, + "loss": 3.2857, + "step": 34390 + }, + { + "epoch": 0.6, + "learning_rate": 0.000214755038514091, + "loss": 3.274, + "step": 34395 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021467616764361128, + "loss": 3.2249, + "step": 34400 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002145973031890465, + "loss": 3.2408, + "step": 34405 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021451844515632686, + "loss": 3.2944, + "step": 34410 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021443959355138214, + "loss": 3.2375, + "step": 34415 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002143607483801414, + "loss": 3.3012, + "step": 34420 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002142819096485335, + "loss": 3.3001, + "step": 34425 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021420307736248667, + "loss": 3.2736, + "step": 34430 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002141242515279287, + "loss": 3.3536, + "step": 34435 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021404543215078697, + "loss": 3.2831, + "step": 34440 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021396661923698827, + "loss": 3.2456, + "step": 34445 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021388781279245886, + "loss": 3.2275, + "step": 34450 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021380901282312456, + "loss": 3.2121, + "step": 34455 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021373021933491075, + "loss": 3.3182, + "step": 34460 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021365143233374238, + "loss": 3.2926, + "step": 34465 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021357265182554385, + "loss": 3.2554, + "step": 34470 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021349387781623887, + "loss": 3.2501, + "step": 34475 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002134151103117509, + "loss": 3.2919, + "step": 34480 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021333634931800297, + "loss": 3.2388, + "step": 34485 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002132575948409174, + "loss": 3.3324, + "step": 34490 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002131788468864162, + "loss": 3.1517, + "step": 34495 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002131001054604207, + "loss": 3.1907, + "step": 34500 + }, + { + "epoch": 0.6, + "eval_loss": 3.2766971588134766, + "eval_runtime": 150.1846, + "eval_samples_per_second": 12.258, + "eval_steps_per_second": 0.772, + "step": 34500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021302137056885198, + "loss": 3.1657, + "step": 34505 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021294264221763032, + "loss": 3.2792, + "step": 34510 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002128639204126758, + "loss": 3.3099, + "step": 34515 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021278520515990808, + "loss": 3.345, + "step": 34520 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002127064964652459, + "loss": 3.2582, + "step": 34525 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002126277943346077, + "loss": 3.2024, + "step": 34530 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021254909877391163, + "loss": 3.2492, + "step": 34535 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021247040978907524, + "loss": 3.2715, + "step": 34540 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021239172738601539, + "loss": 3.2769, + "step": 34545 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021231305157064868, + "loss": 3.1182, + "step": 34550 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021223438234889102, + "loss": 3.2583, + "step": 34555 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021215571972665808, + "loss": 3.3762, + "step": 34560 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021207706370986483, + "loss": 3.3264, + "step": 34565 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021199841430442586, + "loss": 3.2889, + "step": 34570 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021191977151625505, + "loss": 3.2835, + "step": 34575 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021184113535126597, + "loss": 3.1812, + "step": 34580 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021176250581537176, + "loss": 3.2657, + "step": 34585 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021168388291448485, + "loss": 3.2983, + "step": 34590 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002116052666545175, + "loss": 3.2066, + "step": 34595 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002115266570413809, + "loss": 3.2254, + "step": 34600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021144805408098634, + "loss": 3.2641, + "step": 34605 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021136945777924427, + "loss": 3.2156, + "step": 34610 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021129086814206474, + "loss": 3.3646, + "step": 34615 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021121228517535738, + "loss": 3.1861, + "step": 34620 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021113370888503106, + "loss": 3.2872, + "step": 34625 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021105513927699446, + "loss": 3.1778, + "step": 34630 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002109765763571555, + "loss": 3.3283, + "step": 34635 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021089802013142173, + "loss": 3.1781, + "step": 34640 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021081947060570035, + "loss": 3.2741, + "step": 34645 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021074092778589767, + "loss": 3.252, + "step": 34650 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002106623916779197, + "loss": 3.2506, + "step": 34655 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021058386228767206, + "loss": 3.2566, + "step": 34660 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021050533962105975, + "loss": 3.2547, + "step": 34665 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021042682368398717, + "loss": 3.2342, + "step": 34670 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021034831448235857, + "loss": 3.2487, + "step": 34675 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021026981202207712, + "loss": 3.308, + "step": 34680 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021019131630904602, + "loss": 3.3426, + "step": 34685 + }, + { + "epoch": 0.6, + "learning_rate": 0.00021011282734916757, + "loss": 3.2593, + "step": 34690 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002100343451483439, + "loss": 3.2209, + "step": 34695 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020995586971247647, + "loss": 3.1753, + "step": 34700 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020987740104746607, + "loss": 3.3474, + "step": 34705 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020979893915921332, + "loss": 3.2738, + "step": 34710 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020972048405361798, + "loss": 3.1795, + "step": 34715 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020964203573657959, + "loss": 3.1401, + "step": 34720 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020956359421399714, + "loss": 3.2246, + "step": 34725 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002094851594917689, + "loss": 3.2825, + "step": 34730 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020940673157579268, + "loss": 3.2241, + "step": 34735 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020932831047196595, + "loss": 3.2764, + "step": 34740 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002092498961861857, + "loss": 3.2176, + "step": 34745 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002091714887243482, + "loss": 3.2499, + "step": 34750 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020909308809234913, + "loss": 3.2197, + "step": 34755 + }, + { + "epoch": 0.6, + "learning_rate": 0.000209014694296084, + "loss": 3.2597, + "step": 34760 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020893630734144755, + "loss": 3.1445, + "step": 34765 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020885792723433406, + "loss": 3.2218, + "step": 34770 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002087795539806375, + "loss": 3.2191, + "step": 34775 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020870118758625086, + "loss": 3.3003, + "step": 34780 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020862282805706706, + "loss": 3.293, + "step": 34785 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002085444753989782, + "loss": 3.2902, + "step": 34790 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020846612961787612, + "loss": 3.2605, + "step": 34795 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002083877907196521, + "loss": 3.2403, + "step": 34800 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020830945871019656, + "loss": 3.1848, + "step": 34805 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020823113359539993, + "loss": 3.2072, + "step": 34810 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020815281538115164, + "loss": 3.235, + "step": 34815 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020807450407334097, + "loss": 3.2259, + "step": 34820 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002079961996778566, + "loss": 3.1593, + "step": 34825 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002079179022005864, + "loss": 3.2826, + "step": 34830 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020783961164741804, + "loss": 3.2567, + "step": 34835 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020776132802423851, + "loss": 3.2129, + "step": 34840 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020768305133693447, + "loss": 3.2653, + "step": 34845 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020760478159139196, + "loss": 3.1523, + "step": 34850 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002075265187934962, + "loss": 3.284, + "step": 34855 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002074482629491323, + "loss": 3.1118, + "step": 34860 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002073700140641848, + "loss": 3.2638, + "step": 34865 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020729177214453745, + "loss": 3.1445, + "step": 34870 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002072135371960738, + "loss": 3.2557, + "step": 34875 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020713530922467652, + "loss": 3.2218, + "step": 34880 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020705708823622805, + "loss": 3.3213, + "step": 34885 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020697887423661032, + "loss": 3.2697, + "step": 34890 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020690066723170447, + "loss": 3.2742, + "step": 34895 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002068224672273914, + "loss": 3.2105, + "step": 34900 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020674427422955115, + "loss": 3.2227, + "step": 34905 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020666608824406363, + "loss": 3.2625, + "step": 34910 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020658790927680787, + "loss": 3.1875, + "step": 34915 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020650973733366262, + "loss": 3.233, + "step": 34920 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020643157242050602, + "loss": 3.2415, + "step": 34925 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020635341454321575, + "loss": 3.2621, + "step": 34930 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020627526370766862, + "loss": 3.3244, + "step": 34935 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002061971199197413, + "loss": 3.2816, + "step": 34940 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020611898318530995, + "loss": 3.1986, + "step": 34945 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020604085351024986, + "loss": 3.2335, + "step": 34950 + }, + { + "epoch": 0.6, + "learning_rate": 0.00020596273090043614, + "loss": 3.2378, + "step": 34955 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020588461536174304, + "loss": 3.1589, + "step": 34960 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002058065069000445, + "loss": 3.2316, + "step": 34965 + }, + { + "epoch": 0.61, + "learning_rate": 0.000205728405521214, + "loss": 3.2408, + "step": 34970 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002056503112311242, + "loss": 3.2371, + "step": 34975 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020557222403564757, + "loss": 3.2762, + "step": 34980 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002054941439406556, + "loss": 3.2734, + "step": 34985 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002054160709520198, + "loss": 3.1853, + "step": 34990 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020533800507561067, + "loss": 3.2824, + "step": 34995 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020525994631729837, + "loss": 3.19, + "step": 35000 + }, + { + "epoch": 0.61, + "eval_loss": 3.272491693496704, + "eval_runtime": 149.9711, + "eval_samples_per_second": 12.276, + "eval_steps_per_second": 0.773, + "step": 35000 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002051818946829527, + "loss": 3.2679, + "step": 35005 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020510385017844258, + "loss": 3.1934, + "step": 35010 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020502581280963656, + "loss": 3.2659, + "step": 35015 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002049477825824026, + "loss": 3.1719, + "step": 35020 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002048697595026084, + "loss": 3.2524, + "step": 35025 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020479174357612077, + "loss": 3.147, + "step": 35030 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020471373480880593, + "loss": 3.0605, + "step": 35035 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020463573320652996, + "loss": 3.2339, + "step": 35040 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020455773877515814, + "loss": 3.3027, + "step": 35045 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020447975152055518, + "loss": 3.2568, + "step": 35050 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020440177144858554, + "loss": 3.2592, + "step": 35055 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020432379856511257, + "loss": 3.2078, + "step": 35060 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002042458328759996, + "loss": 3.1336, + "step": 35065 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020416787438710937, + "loss": 3.2229, + "step": 35070 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020408992310430382, + "loss": 3.3117, + "step": 35075 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020401197903344463, + "loss": 3.2499, + "step": 35080 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020393404218039252, + "loss": 3.2453, + "step": 35085 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002038561125510082, + "loss": 3.1057, + "step": 35090 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002037781901511514, + "loss": 3.2666, + "step": 35095 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020370027498668158, + "loss": 3.0863, + "step": 35100 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002036223670634577, + "loss": 3.2022, + "step": 35105 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002035444663873378, + "loss": 3.3174, + "step": 35110 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020346657296417968, + "loss": 3.242, + "step": 35115 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020338868679984048, + "loss": 3.2349, + "step": 35120 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020331080790017702, + "loss": 3.3197, + "step": 35125 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020323293627104528, + "loss": 3.202, + "step": 35130 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002031550719183008, + "loss": 3.0995, + "step": 35135 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020307721484779854, + "loss": 3.0756, + "step": 35140 + }, + { + "epoch": 0.61, + "learning_rate": 0.000202999365065393, + "loss": 3.2164, + "step": 35145 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002029215225769381, + "loss": 3.1971, + "step": 35150 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020284368738828716, + "loss": 3.1615, + "step": 35155 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020276585950529312, + "loss": 3.2523, + "step": 35160 + }, + { + "epoch": 0.61, + "learning_rate": 0.000202688038933808, + "loss": 3.3051, + "step": 35165 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020261022567968372, + "loss": 3.1922, + "step": 35170 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002025324197487713, + "loss": 3.2387, + "step": 35175 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020245462114692138, + "loss": 3.3196, + "step": 35180 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002023768298799842, + "loss": 3.1601, + "step": 35185 + }, + { + "epoch": 0.61, + "learning_rate": 0.000202299045953809, + "loss": 3.2727, + "step": 35190 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020222126937424477, + "loss": 3.2983, + "step": 35195 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020214350014714, + "loss": 3.294, + "step": 35200 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020206573827834254, + "loss": 3.2709, + "step": 35205 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002019879837736997, + "loss": 3.2138, + "step": 35210 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020191023663905814, + "loss": 3.2238, + "step": 35215 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020183249688026405, + "loss": 3.2299, + "step": 35220 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002017547645031631, + "loss": 3.2146, + "step": 35225 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020167703951360039, + "loss": 3.1378, + "step": 35230 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002015993219174205, + "loss": 3.3198, + "step": 35235 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002015216117204671, + "loss": 3.1608, + "step": 35240 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020144390892858392, + "loss": 3.2125, + "step": 35245 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002013662135476137, + "loss": 3.2722, + "step": 35250 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020128852558339868, + "loss": 2.9673, + "step": 35255 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020121084504178078, + "loss": 3.0968, + "step": 35260 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020113317192860093, + "loss": 3.2643, + "step": 35265 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020105550624969988, + "loss": 3.0082, + "step": 35270 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020097784801091767, + "loss": 3.228, + "step": 35275 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020090019721809375, + "loss": 3.2276, + "step": 35280 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020082255387706733, + "loss": 3.1601, + "step": 35285 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020074491799367642, + "loss": 3.0885, + "step": 35290 + }, + { + "epoch": 0.61, + "learning_rate": 0.000200667289573759, + "loss": 3.2486, + "step": 35295 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020058966862315238, + "loss": 3.2406, + "step": 35300 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020051205514769313, + "loss": 3.1888, + "step": 35305 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020043444915321762, + "loss": 3.1852, + "step": 35310 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020035685064556118, + "loss": 3.2038, + "step": 35315 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002002792596305589, + "loss": 3.2482, + "step": 35320 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002002016761140452, + "loss": 3.2151, + "step": 35325 + }, + { + "epoch": 0.61, + "learning_rate": 0.00020012410010185406, + "loss": 3.2906, + "step": 35330 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002000465315998188, + "loss": 3.2794, + "step": 35335 + }, + { + "epoch": 0.61, + "learning_rate": 0.000199968970613772, + "loss": 3.1682, + "step": 35340 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019989141714954592, + "loss": 3.2599, + "step": 35345 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001998138712129723, + "loss": 3.2017, + "step": 35350 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019973633280988206, + "loss": 3.3271, + "step": 35355 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001996588019461059, + "loss": 3.1123, + "step": 35360 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019958127862747344, + "loss": 3.2482, + "step": 35365 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019950376285981426, + "loss": 3.3316, + "step": 35370 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019942625464895703, + "loss": 3.2091, + "step": 35375 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019934875400072998, + "loss": 3.34, + "step": 35380 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019927126092096094, + "loss": 3.1714, + "step": 35385 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019919377541547677, + "loss": 3.142, + "step": 35390 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019911629749010408, + "loss": 3.2751, + "step": 35395 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019903882715066875, + "loss": 3.2666, + "step": 35400 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019896136440299622, + "loss": 3.3066, + "step": 35405 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001988839092529113, + "loss": 3.296, + "step": 35410 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019880646170623829, + "loss": 3.3092, + "step": 35415 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019872902176880058, + "loss": 3.2801, + "step": 35420 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019865158944642146, + "loss": 3.1055, + "step": 35425 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001985741647449235, + "loss": 3.2495, + "step": 35430 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001984967476701284, + "loss": 3.3053, + "step": 35435 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019841933822785788, + "loss": 3.2339, + "step": 35440 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001983419364239324, + "loss": 3.2144, + "step": 35445 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019826454226417236, + "loss": 3.2215, + "step": 35450 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019818715575439729, + "loss": 3.2162, + "step": 35455 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019810977690042634, + "loss": 3.3478, + "step": 35460 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001980324057080781, + "loss": 3.3434, + "step": 35465 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019795504218317024, + "loss": 3.2074, + "step": 35470 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001978776863315203, + "loss": 3.2835, + "step": 35475 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019780033815894495, + "loss": 3.2004, + "step": 35480 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019772299767126039, + "loss": 3.3079, + "step": 35485 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019764566487428235, + "loss": 3.2486, + "step": 35490 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001975683397738257, + "loss": 3.3032, + "step": 35495 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001974910223757049, + "loss": 3.2286, + "step": 35500 + }, + { + "epoch": 0.61, + "eval_loss": 3.266571283340454, + "eval_runtime": 150.2623, + "eval_samples_per_second": 12.252, + "eval_steps_per_second": 0.772, + "step": 35500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019741371268573388, + "loss": 3.1576, + "step": 35505 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001973364107097259, + "loss": 3.2607, + "step": 35510 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019725911645349382, + "loss": 3.1451, + "step": 35515 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001971818299228495, + "loss": 3.2446, + "step": 35520 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019710455112360468, + "loss": 3.3296, + "step": 35525 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019702728006157033, + "loss": 3.3272, + "step": 35530 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019695001674255669, + "loss": 3.2667, + "step": 35535 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019687276117237384, + "loss": 3.3462, + "step": 35540 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019679551335683064, + "loss": 3.226, + "step": 35545 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019671827330173597, + "loss": 3.2596, + "step": 35550 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001966410410128979, + "loss": 3.2741, + "step": 35555 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019656381649612375, + "loss": 3.1513, + "step": 35560 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019648659975722064, + "loss": 3.2949, + "step": 35565 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019640939080199462, + "loss": 3.2267, + "step": 35570 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019633218963625157, + "loss": 3.2633, + "step": 35575 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001962549962657965, + "loss": 3.2205, + "step": 35580 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019617781069643402, + "loss": 3.2456, + "step": 35585 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019610063293396823, + "loss": 3.1869, + "step": 35590 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001960234629842023, + "loss": 3.2434, + "step": 35595 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019594630085293906, + "loss": 3.1685, + "step": 35600 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019586914654598066, + "loss": 3.2325, + "step": 35605 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001957920000691289, + "loss": 3.2937, + "step": 35610 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019571486142818458, + "loss": 3.2478, + "step": 35615 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019563773062894834, + "loss": 3.2374, + "step": 35620 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019556060767721983, + "loss": 3.1036, + "step": 35625 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001954834925787984, + "loss": 3.1363, + "step": 35630 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019540638533948267, + "loss": 3.1992, + "step": 35635 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019532928596507071, + "loss": 3.2498, + "step": 35640 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019525219446136013, + "loss": 3.3513, + "step": 35645 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019517511083414757, + "loss": 3.2661, + "step": 35650 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019509803508922953, + "loss": 3.2577, + "step": 35655 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019502096723240158, + "loss": 3.1112, + "step": 35660 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019494390726945888, + "loss": 3.2319, + "step": 35665 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019486685520619606, + "loss": 3.2341, + "step": 35670 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001947898110484069, + "loss": 3.1483, + "step": 35675 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001947127748018847, + "loss": 3.2717, + "step": 35680 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001946357464724222, + "loss": 3.2706, + "step": 35685 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019455872606581167, + "loss": 3.2078, + "step": 35690 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019448171358784464, + "loss": 3.2178, + "step": 35695 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001944047090443118, + "loss": 3.1824, + "step": 35700 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019432771244100373, + "loss": 3.2617, + "step": 35705 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019425072378371014, + "loss": 3.2644, + "step": 35710 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019417374307822012, + "loss": 3.2957, + "step": 35715 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019409677033032242, + "loss": 3.2295, + "step": 35720 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019401980554580468, + "loss": 3.2526, + "step": 35725 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019394284873045447, + "loss": 3.2768, + "step": 35730 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001938658998900585, + "loss": 3.1854, + "step": 35735 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001937889590304029, + "loss": 3.0675, + "step": 35740 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001937120261572734, + "loss": 3.3383, + "step": 35745 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019363510127645467, + "loss": 3.1992, + "step": 35750 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019355818439373127, + "loss": 3.262, + "step": 35755 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019348127551488682, + "loss": 3.2469, + "step": 35760 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019340437464570462, + "loss": 3.2135, + "step": 35765 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019332748179196724, + "loss": 3.2866, + "step": 35770 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019325059695945648, + "loss": 3.2102, + "step": 35775 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019317372015395366, + "loss": 3.2206, + "step": 35780 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019309685138123962, + "loss": 3.1866, + "step": 35785 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019301999064709455, + "loss": 3.2248, + "step": 35790 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019294313795729793, + "loss": 3.1514, + "step": 35795 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019286629331762868, + "loss": 3.1978, + "step": 35800 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019278945673386504, + "loss": 3.2829, + "step": 35805 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001927126282117848, + "loss": 3.2408, + "step": 35810 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001926358077571652, + "loss": 3.2113, + "step": 35815 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019255899537578262, + "loss": 3.223, + "step": 35820 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019248219107341287, + "loss": 3.1289, + "step": 35825 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001924053948558313, + "loss": 3.2393, + "step": 35830 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019232860672881272, + "loss": 3.1435, + "step": 35835 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019225182669813104, + "loss": 3.1946, + "step": 35840 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001921750547695598, + "loss": 3.1769, + "step": 35845 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019209829094887196, + "loss": 3.198, + "step": 35850 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001920215352418396, + "loss": 3.2421, + "step": 35855 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019194478765423433, + "loss": 3.0731, + "step": 35860 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019186804819182725, + "loss": 3.2731, + "step": 35865 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019179131686038883, + "loss": 3.1812, + "step": 35870 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019171459366568893, + "loss": 3.2939, + "step": 35875 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019163787861349647, + "loss": 3.1732, + "step": 35880 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019156117170958023, + "loss": 3.2444, + "step": 35885 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019148447295970817, + "loss": 3.1963, + "step": 35890 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019140778236964752, + "loss": 3.3122, + "step": 35895 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019133109994516528, + "loss": 3.2267, + "step": 35900 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019125442569202725, + "loss": 3.2838, + "step": 35905 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019117775961599912, + "loss": 3.3105, + "step": 35910 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001911011017228458, + "loss": 3.2765, + "step": 35915 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019102445201833152, + "loss": 3.3233, + "step": 35920 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019094781050822007, + "loss": 3.2274, + "step": 35925 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001908711771982742, + "loss": 3.2296, + "step": 35930 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001907945520942566, + "loss": 3.1826, + "step": 35935 + }, + { + "epoch": 0.62, + "learning_rate": 0.000190717935201929, + "loss": 3.3203, + "step": 35940 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001906413265270526, + "loss": 3.1889, + "step": 35945 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019056472607538807, + "loss": 2.9914, + "step": 35950 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001904881338526953, + "loss": 3.2326, + "step": 35955 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019041154986473356, + "loss": 3.2476, + "step": 35960 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019033497411726158, + "loss": 3.1318, + "step": 35965 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019025840661603764, + "loss": 3.266, + "step": 35970 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019018184736681906, + "loss": 3.1782, + "step": 35975 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001901052963753628, + "loss": 3.1955, + "step": 35980 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001900287536474249, + "loss": 3.2449, + "step": 35985 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001899522191887612, + "loss": 2.9662, + "step": 35990 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001898756930051267, + "loss": 3.1855, + "step": 35995 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018979917510227574, + "loss": 3.1923, + "step": 36000 + }, + { + "epoch": 0.62, + "eval_loss": 3.261223554611206, + "eval_runtime": 150.469, + "eval_samples_per_second": 12.235, + "eval_steps_per_second": 0.771, + "step": 36000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018972266548596193, + "loss": 3.2353, + "step": 36005 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018964616416193848, + "loss": 3.2502, + "step": 36010 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018956967113595806, + "loss": 3.2247, + "step": 36015 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018949318641377233, + "loss": 3.2834, + "step": 36020 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018941671000113276, + "loss": 3.2689, + "step": 36025 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018934024190378972, + "loss": 3.2487, + "step": 36030 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018926378212749347, + "loss": 3.1112, + "step": 36035 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018918733067799324, + "loss": 3.1531, + "step": 36040 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001891108875610378, + "loss": 3.2902, + "step": 36045 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018903445278237543, + "loss": 3.278, + "step": 36050 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018895802634775346, + "loss": 3.1309, + "step": 36055 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001888816082629188, + "loss": 3.1394, + "step": 36060 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018880519853361775, + "loss": 3.2527, + "step": 36065 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018872879716559584, + "loss": 3.1727, + "step": 36070 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018865240416459823, + "loss": 3.2331, + "step": 36075 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018857601953636922, + "loss": 3.1652, + "step": 36080 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018849964328665242, + "loss": 3.3314, + "step": 36085 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018842327542119096, + "loss": 3.2538, + "step": 36090 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018834691594572746, + "loss": 3.2119, + "step": 36095 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001882705648660036, + "loss": 3.2479, + "step": 36100 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001881942221877608, + "loss": 3.2294, + "step": 36105 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001881178879167393, + "loss": 3.1991, + "step": 36110 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001880415620586794, + "loss": 3.2136, + "step": 36115 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001879652446193201, + "loss": 3.1497, + "step": 36120 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001878889356044003, + "loss": 3.301, + "step": 36125 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001878126350196581, + "loss": 3.1176, + "step": 36130 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018773634287083072, + "loss": 3.3595, + "step": 36135 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018766005916365493, + "loss": 3.2179, + "step": 36140 + }, + { + "epoch": 0.63, + "learning_rate": 0.000187583783903867, + "loss": 3.2545, + "step": 36145 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018750751709720247, + "loss": 3.2765, + "step": 36150 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001874312587493961, + "loss": 3.2513, + "step": 36155 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018735500886618215, + "loss": 3.1349, + "step": 36160 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018727876745329425, + "loss": 3.278, + "step": 36165 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018720253451646524, + "loss": 3.1892, + "step": 36170 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018712631006142773, + "loss": 3.2387, + "step": 36175 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018705009409391324, + "loss": 3.2239, + "step": 36180 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001869738866196527, + "loss": 3.1069, + "step": 36185 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018689768764437663, + "loss": 3.3351, + "step": 36190 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018682149717381489, + "loss": 3.2367, + "step": 36195 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018674531521369644, + "loss": 3.2287, + "step": 36200 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018666914176975, + "loss": 3.3816, + "step": 36205 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018659297684770317, + "loss": 3.2868, + "step": 36210 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018651682045328334, + "loss": 3.2455, + "step": 36215 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018644067259221696, + "loss": 3.2449, + "step": 36220 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018636453327023004, + "loss": 3.2737, + "step": 36225 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018628840249304793, + "loss": 3.2192, + "step": 36230 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001862122802663951, + "loss": 3.2535, + "step": 36235 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018613616659599567, + "loss": 3.1544, + "step": 36240 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018606006148757292, + "loss": 3.2894, + "step": 36245 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001859839649468496, + "loss": 3.1255, + "step": 36250 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018590787697954795, + "loss": 3.132, + "step": 36255 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018583179759138916, + "loss": 3.1732, + "step": 36260 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018575572678809402, + "loss": 3.2779, + "step": 36265 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018567966457538274, + "loss": 3.2261, + "step": 36270 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018560361095897485, + "loss": 3.1753, + "step": 36275 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018552756594458923, + "loss": 3.2143, + "step": 36280 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018545152953794385, + "loss": 3.2923, + "step": 36285 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018537550174475639, + "loss": 3.1306, + "step": 36290 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018529948257074385, + "loss": 3.2825, + "step": 36295 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018522347202162232, + "loss": 3.3198, + "step": 36300 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018514747010310744, + "loss": 3.289, + "step": 36305 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018507147682091435, + "loss": 3.267, + "step": 36310 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018499549218075703, + "loss": 3.3149, + "step": 36315 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018491951618834947, + "loss": 3.1908, + "step": 36320 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018484354884940437, + "loss": 3.2189, + "step": 36325 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018476759016963426, + "loss": 3.1788, + "step": 36330 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018469164015475092, + "loss": 3.232, + "step": 36335 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018461569881046526, + "loss": 3.3037, + "step": 36340 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018453976614248762, + "loss": 3.2038, + "step": 36345 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018446384215652786, + "loss": 3.124, + "step": 36350 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018438792685829513, + "loss": 3.2536, + "step": 36355 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018431202025349783, + "loss": 3.1973, + "step": 36360 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018423612234784364, + "loss": 3.2563, + "step": 36365 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018416023314703973, + "loss": 3.2235, + "step": 36370 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018408435265679268, + "loss": 3.3684, + "step": 36375 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001840084808828082, + "loss": 3.2167, + "step": 36380 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001839326178307916, + "loss": 3.2903, + "step": 36385 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018385676350644723, + "loss": 3.0828, + "step": 36390 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001837809179154791, + "loss": 3.3216, + "step": 36395 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001837050810635902, + "loss": 3.2892, + "step": 36400 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018362925295648322, + "loss": 3.1734, + "step": 36405 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018355343359986018, + "loss": 3.1735, + "step": 36410 + }, + { + "epoch": 0.63, + "learning_rate": 0.000183477622999422, + "loss": 3.1576, + "step": 36415 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018340182116086947, + "loss": 3.2127, + "step": 36420 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018332602808990238, + "loss": 3.1634, + "step": 36425 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018325024379222006, + "loss": 3.3005, + "step": 36430 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018317446827352115, + "loss": 3.2287, + "step": 36435 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018309870153950347, + "loss": 3.23, + "step": 36440 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018302294359586426, + "loss": 3.208, + "step": 36445 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001829471944483002, + "loss": 3.2683, + "step": 36450 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018287145410250723, + "loss": 3.1638, + "step": 36455 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018279572256418077, + "loss": 3.2345, + "step": 36460 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018271999983901515, + "loss": 3.2936, + "step": 36465 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018264428593270452, + "loss": 3.2147, + "step": 36470 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001825685808509422, + "loss": 3.1711, + "step": 36475 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018249288459942073, + "loss": 3.1448, + "step": 36480 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001824171971838322, + "loss": 3.2079, + "step": 36485 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018234151860986773, + "loss": 3.1737, + "step": 36490 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018226584888321812, + "loss": 3.2266, + "step": 36495 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018219018800957334, + "loss": 3.154, + "step": 36500 + }, + { + "epoch": 0.63, + "eval_loss": 3.2557082176208496, + "eval_runtime": 151.364, + "eval_samples_per_second": 12.163, + "eval_steps_per_second": 0.766, + "step": 36500 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018211453599462256, + "loss": 3.2151, + "step": 36505 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018203889284405467, + "loss": 3.3157, + "step": 36510 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018196325856355736, + "loss": 3.1872, + "step": 36515 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018188763315881817, + "loss": 3.2263, + "step": 36520 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018181201663552353, + "loss": 3.2157, + "step": 36525 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018173640899935958, + "loss": 3.2489, + "step": 36530 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001816608102560117, + "loss": 3.2881, + "step": 36535 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001815852204111643, + "loss": 3.2426, + "step": 36540 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018150963947050145, + "loss": 3.2455, + "step": 36545 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018143406743970642, + "loss": 3.3231, + "step": 36550 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018135850432446197, + "loss": 3.3465, + "step": 36555 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018128295013044993, + "loss": 3.2252, + "step": 36560 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001812074048633517, + "loss": 3.2743, + "step": 36565 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018113186852884773, + "loss": 3.2134, + "step": 36570 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018105634113261805, + "loss": 3.1525, + "step": 36575 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018098082268034197, + "loss": 3.2405, + "step": 36580 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018090531317769805, + "loss": 3.1579, + "step": 36585 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001808298126303643, + "loss": 3.3345, + "step": 36590 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018075432104401782, + "loss": 3.2023, + "step": 36595 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018067883842433534, + "loss": 3.3086, + "step": 36600 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018060336477699262, + "loss": 3.2832, + "step": 36605 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018052790010766496, + "loss": 3.311, + "step": 36610 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018045244442202707, + "loss": 3.263, + "step": 36615 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018037699772575262, + "loss": 3.2083, + "step": 36620 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001803015600245148, + "loss": 3.3219, + "step": 36625 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018022613132398624, + "loss": 3.271, + "step": 36630 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018015071162983884, + "loss": 3.1227, + "step": 36635 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018007530094774375, + "loss": 3.2264, + "step": 36640 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017999989928337132, + "loss": 3.2169, + "step": 36645 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017992450664239149, + "loss": 3.1578, + "step": 36650 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001798491230304734, + "loss": 3.1818, + "step": 36655 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017977374845328547, + "loss": 3.1602, + "step": 36660 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001796983829164956, + "loss": 3.1913, + "step": 36665 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001796230264257707, + "loss": 3.3033, + "step": 36670 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017954767898677726, + "loss": 3.2693, + "step": 36675 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017947234060518115, + "loss": 3.2891, + "step": 36680 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017939701128664727, + "loss": 3.2005, + "step": 36685 + }, + { + "epoch": 0.63, + "learning_rate": 0.00017932169103684016, + "loss": 3.1168, + "step": 36690 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001792463798614233, + "loss": 3.2606, + "step": 36695 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001791710777660599, + "loss": 3.1485, + "step": 36700 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017909578475641213, + "loss": 3.0826, + "step": 36705 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017902050083814174, + "loss": 3.2234, + "step": 36710 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017894522601690974, + "loss": 3.36, + "step": 36715 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017886996029837634, + "loss": 3.2038, + "step": 36720 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001787947036882011, + "loss": 3.1815, + "step": 36725 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017871945619204294, + "loss": 3.265, + "step": 36730 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001786442178155602, + "loss": 3.2302, + "step": 36735 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017856898856441036, + "loss": 3.1937, + "step": 36740 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017849376844425024, + "loss": 3.1804, + "step": 36745 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017841855746073595, + "loss": 3.2216, + "step": 36750 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017834335561952306, + "loss": 3.3077, + "step": 36755 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001782681629262664, + "loss": 3.1248, + "step": 36760 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001781929793866201, + "loss": 3.1452, + "step": 36765 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017811780500623733, + "loss": 3.2719, + "step": 36770 + }, + { + "epoch": 0.64, + "learning_rate": 0.000178042639790771, + "loss": 3.2455, + "step": 36775 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017796748374587323, + "loss": 3.1196, + "step": 36780 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001778923368771952, + "loss": 3.329, + "step": 36785 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001778171991903876, + "loss": 3.1396, + "step": 36790 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017774207069110064, + "loss": 3.2559, + "step": 36795 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017766695138498332, + "loss": 3.2072, + "step": 36800 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017759184127768424, + "loss": 3.3362, + "step": 36805 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017751674037485136, + "loss": 3.2532, + "step": 36810 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017744164868213193, + "loss": 3.2233, + "step": 36815 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017736656620517246, + "loss": 3.2606, + "step": 36820 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001772914929496187, + "loss": 3.1516, + "step": 36825 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017721642892111573, + "loss": 3.1829, + "step": 36830 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017714137412530806, + "loss": 3.2415, + "step": 36835 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017706632856783952, + "loss": 3.2393, + "step": 36840 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017699129225435303, + "loss": 3.192, + "step": 36845 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017691626519049085, + "loss": 3.3051, + "step": 36850 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017684124738189476, + "loss": 3.2575, + "step": 36855 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017676623883420574, + "loss": 3.1901, + "step": 36860 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017669123955306394, + "loss": 3.1218, + "step": 36865 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017661624954410905, + "loss": 3.2743, + "step": 36870 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001765412688129798, + "loss": 3.2173, + "step": 36875 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001764662973653145, + "loss": 3.1673, + "step": 36880 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017639133520675043, + "loss": 3.3145, + "step": 36885 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017631638234292448, + "loss": 3.2983, + "step": 36890 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017624143877947282, + "loss": 3.1276, + "step": 36895 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017616650452203066, + "loss": 3.0952, + "step": 36900 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017609157957623265, + "loss": 3.1819, + "step": 36905 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017601666394771287, + "loss": 3.13, + "step": 36910 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017594175764210453, + "loss": 3.1295, + "step": 36915 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017586686066504035, + "loss": 3.2718, + "step": 36920 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017579197302215204, + "loss": 3.3466, + "step": 36925 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001757170947190707, + "loss": 3.1818, + "step": 36930 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017564222576142694, + "loss": 3.2946, + "step": 36935 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001755673661548505, + "loss": 3.2415, + "step": 36940 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001754925159049705, + "loss": 3.2438, + "step": 36945 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001754176750174151, + "loss": 3.2137, + "step": 36950 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017534284349781203, + "loss": 3.1935, + "step": 36955 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017526802135178836, + "loss": 3.281, + "step": 36960 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017519320858497018, + "loss": 3.185, + "step": 36965 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001751184052029832, + "loss": 3.2558, + "step": 36970 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001750436112114521, + "loss": 3.3523, + "step": 36975 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017496882661600095, + "loss": 3.2183, + "step": 36980 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017489405142225332, + "loss": 3.308, + "step": 36985 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001748192856358319, + "loss": 3.2556, + "step": 36990 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017474452926235864, + "loss": 3.2036, + "step": 36995 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017466978230745486, + "loss": 3.2368, + "step": 37000 + }, + { + "epoch": 0.64, + "eval_loss": 3.2513468265533447, + "eval_runtime": 149.7793, + "eval_samples_per_second": 12.291, + "eval_steps_per_second": 0.774, + "step": 37000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017459504477674104, + "loss": 3.2079, + "step": 37005 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017452031667583728, + "loss": 3.0015, + "step": 37010 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017444559801036262, + "loss": 3.2196, + "step": 37015 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017437088878593542, + "loss": 3.2114, + "step": 37020 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001742961890081738, + "loss": 3.3074, + "step": 37025 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017422149868269427, + "loss": 3.1725, + "step": 37030 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017414681781511356, + "loss": 3.2638, + "step": 37035 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017407214641104715, + "loss": 3.1967, + "step": 37040 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017399748447610987, + "loss": 3.1839, + "step": 37045 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017392283201591625, + "loss": 3.251, + "step": 37050 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001738481890360794, + "loss": 3.1605, + "step": 37055 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001737735555422121, + "loss": 3.2344, + "step": 37060 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017369893153992662, + "loss": 3.1954, + "step": 37065 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017362431703483425, + "loss": 3.2549, + "step": 37070 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017354971203254553, + "loss": 3.2404, + "step": 37075 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017347511653867045, + "loss": 3.249, + "step": 37080 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017340053055881803, + "loss": 3.2248, + "step": 37085 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017332595409859706, + "loss": 3.2577, + "step": 37090 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001732513871636151, + "loss": 3.249, + "step": 37095 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017317682975947926, + "loss": 3.2418, + "step": 37100 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001731022818917959, + "loss": 3.1526, + "step": 37105 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017302774356617052, + "loss": 3.2873, + "step": 37110 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017295321478820805, + "loss": 3.1383, + "step": 37115 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017287869556351283, + "loss": 3.2247, + "step": 37120 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001728041858976882, + "loss": 3.1668, + "step": 37125 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017272968579633695, + "loss": 3.2901, + "step": 37130 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017265519526506108, + "loss": 3.2611, + "step": 37135 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017258071430946174, + "loss": 3.1702, + "step": 37140 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001725062429351398, + "loss": 3.2486, + "step": 37145 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017243178114769505, + "loss": 3.2305, + "step": 37150 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017235732895272648, + "loss": 3.2179, + "step": 37155 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001722828863558327, + "loss": 3.2362, + "step": 37160 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017220845336261118, + "loss": 3.2262, + "step": 37165 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017213402997865915, + "loss": 3.1945, + "step": 37170 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001720596162095729, + "loss": 3.2624, + "step": 37175 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001719852120609476, + "loss": 3.0769, + "step": 37180 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017191081753837836, + "loss": 3.1758, + "step": 37185 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017183643264745925, + "loss": 3.2337, + "step": 37190 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017176205739378344, + "loss": 3.1392, + "step": 37195 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001716876917829439, + "loss": 3.2752, + "step": 37200 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017161333582053217, + "loss": 3.3017, + "step": 37205 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001715389895121397, + "loss": 3.1933, + "step": 37210 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017146465286335688, + "loss": 3.2897, + "step": 37215 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017139032587977333, + "loss": 3.2704, + "step": 37220 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017131600856697834, + "loss": 3.221, + "step": 37225 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017124170093055983, + "loss": 3.2712, + "step": 37230 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017116740297610565, + "loss": 3.1784, + "step": 37235 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001710931147092025, + "loss": 3.1287, + "step": 37240 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017101883613543636, + "loss": 3.3075, + "step": 37245 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017094456726039284, + "loss": 3.2108, + "step": 37250 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017087030808965654, + "loss": 3.167, + "step": 37255 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017079605862881114, + "loss": 3.0637, + "step": 37260 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017072181888344004, + "loss": 3.2903, + "step": 37265 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001706475888591256, + "loss": 3.3106, + "step": 37270 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001705733685614495, + "loss": 3.2914, + "step": 37275 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017049915799599298, + "loss": 3.2675, + "step": 37280 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017042495716833588, + "loss": 3.2125, + "step": 37285 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017035076608405803, + "loss": 3.197, + "step": 37290 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017027658474873817, + "loss": 3.1445, + "step": 37295 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017020241316795416, + "loss": 3.2512, + "step": 37300 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001701282513472837, + "loss": 3.2431, + "step": 37305 + }, + { + "epoch": 0.65, + "learning_rate": 0.00017005409929230297, + "loss": 3.1504, + "step": 37310 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016997995700858808, + "loss": 3.185, + "step": 37315 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016990582450171412, + "loss": 3.0925, + "step": 37320 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016983170177725532, + "loss": 3.0949, + "step": 37325 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016975758884078566, + "loss": 3.2816, + "step": 37330 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016968348569787773, + "loss": 3.3073, + "step": 37335 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001696093923541037, + "loss": 3.3084, + "step": 37340 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001695353088150352, + "loss": 3.1617, + "step": 37345 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016946123508624288, + "loss": 3.2592, + "step": 37350 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001693871711732967, + "loss": 3.2328, + "step": 37355 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016931311708176587, + "loss": 3.217, + "step": 37360 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001692390728172187, + "loss": 3.2719, + "step": 37365 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016916503838522331, + "loss": 3.069, + "step": 37370 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001690910137913465, + "loss": 3.3553, + "step": 37375 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016901699904115454, + "loss": 3.1695, + "step": 37380 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016894299414021294, + "loss": 3.2769, + "step": 37385 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016886899909408643, + "loss": 3.126, + "step": 37390 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016879501390833928, + "loss": 3.265, + "step": 37395 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001687210385885347, + "loss": 3.2026, + "step": 37400 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016864707314023517, + "loss": 3.2575, + "step": 37405 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001685731175690026, + "loss": 3.2137, + "step": 37410 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016849917188039803, + "loss": 3.2626, + "step": 37415 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001684252360799817, + "loss": 3.2544, + "step": 37420 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016835131017331337, + "loss": 3.3139, + "step": 37425 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016827739416595187, + "loss": 3.2438, + "step": 37430 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016820348806345525, + "loss": 3.1817, + "step": 37435 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001681295918713809, + "loss": 3.2638, + "step": 37440 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016805570559528526, + "loss": 3.2733, + "step": 37445 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001679818292407245, + "loss": 3.2396, + "step": 37450 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001679079628132536, + "loss": 3.2589, + "step": 37455 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016783410631842691, + "loss": 3.2473, + "step": 37460 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001677602597617981, + "loss": 3.19, + "step": 37465 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001676864231489199, + "loss": 3.2841, + "step": 37470 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016761259648534477, + "loss": 3.3538, + "step": 37475 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016753877977662384, + "loss": 3.3771, + "step": 37480 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001674649730283079, + "loss": 3.1082, + "step": 37485 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016739117624594665, + "loss": 3.1594, + "step": 37490 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001673173894350894, + "loss": 3.2678, + "step": 37495 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016724361260128434, + "loss": 3.2161, + "step": 37500 + }, + { + "epoch": 0.65, + "eval_loss": 3.246263027191162, + "eval_runtime": 150.2834, + "eval_samples_per_second": 12.25, + "eval_steps_per_second": 0.772, + "step": 37500 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001671698457500794, + "loss": 3.1662, + "step": 37505 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016709608888702124, + "loss": 3.2584, + "step": 37510 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016702234201765613, + "loss": 3.2162, + "step": 37515 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016694860514752938, + "loss": 3.1176, + "step": 37520 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016687487828218557, + "loss": 3.1931, + "step": 37525 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016680116142716873, + "loss": 3.2113, + "step": 37530 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016672745458802193, + "loss": 3.2477, + "step": 37535 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016665375777028755, + "loss": 3.1498, + "step": 37540 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016658007097950715, + "loss": 3.3016, + "step": 37545 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016650639422122158, + "loss": 3.194, + "step": 37550 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001664327275009711, + "loss": 3.137, + "step": 37555 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016635907082429514, + "loss": 3.2968, + "step": 37560 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001662854241967319, + "loss": 3.2957, + "step": 37565 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016621178762381964, + "loss": 3.2532, + "step": 37570 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016613816111109525, + "loss": 3.1561, + "step": 37575 + }, + { + "epoch": 0.65, + "learning_rate": 0.000166064544664095, + "loss": 3.1695, + "step": 37580 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016599093828835484, + "loss": 3.1958, + "step": 37585 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016591734198940906, + "loss": 3.18, + "step": 37590 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016584375577279219, + "loss": 3.1963, + "step": 37595 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016577017964403728, + "loss": 3.1112, + "step": 37600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016569661360867687, + "loss": 3.1904, + "step": 37605 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016562305767224304, + "loss": 3.2197, + "step": 37610 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016554951184026646, + "loss": 3.2805, + "step": 37615 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016547597611827746, + "loss": 3.237, + "step": 37620 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016540245051180572, + "loss": 3.1968, + "step": 37625 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016532893502637993, + "loss": 3.2983, + "step": 37630 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016525542966752804, + "loss": 3.2977, + "step": 37635 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016518193444077728, + "loss": 3.2517, + "step": 37640 + }, + { + "epoch": 0.65, + "learning_rate": 0.000165108449351654, + "loss": 3.1764, + "step": 37645 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001650349744056841, + "loss": 3.1407, + "step": 37650 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001649615096083925, + "loss": 3.3731, + "step": 37655 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016488805496530328, + "loss": 3.2043, + "step": 37660 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001648146104819399, + "loss": 3.214, + "step": 37665 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001647411761638249, + "loss": 3.1209, + "step": 37670 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016466775201648032, + "loss": 3.2196, + "step": 37675 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016459433804542727, + "loss": 3.3123, + "step": 37680 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016452093425618606, + "loss": 3.245, + "step": 37685 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016444754065427625, + "loss": 3.1629, + "step": 37690 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016437415724521672, + "loss": 3.2326, + "step": 37695 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016430078403452535, + "loss": 3.1992, + "step": 37700 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016422742102771969, + "loss": 3.2132, + "step": 37705 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016415406823031616, + "loss": 3.179, + "step": 37710 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016408072564783047, + "loss": 3.2493, + "step": 37715 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016400739328577767, + "loss": 3.203, + "step": 37720 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016393407114967184, + "loss": 3.1359, + "step": 37725 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016386075924502657, + "loss": 3.2161, + "step": 37730 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001637874575773546, + "loss": 3.1806, + "step": 37735 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016371416615216765, + "loss": 3.2406, + "step": 37740 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016364088497497706, + "loss": 3.1397, + "step": 37745 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016356761405129287, + "loss": 3.1923, + "step": 37750 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001634943533866251, + "loss": 3.3604, + "step": 37755 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016342110298648232, + "loss": 3.2568, + "step": 37760 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016334786285637264, + "loss": 3.2019, + "step": 37765 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016327463300180335, + "loss": 3.1786, + "step": 37770 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016320141342828095, + "loss": 3.0524, + "step": 37775 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016312820414131107, + "loss": 3.3086, + "step": 37780 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016305500514639888, + "loss": 3.1939, + "step": 37785 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016298181644904854, + "loss": 3.307, + "step": 37790 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016290863805476331, + "loss": 3.2686, + "step": 37795 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016283546996904595, + "loss": 3.1524, + "step": 37800 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001627623121973982, + "loss": 3.1401, + "step": 37805 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001626891647453213, + "loss": 3.1872, + "step": 37810 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016261602761831554, + "loss": 3.4207, + "step": 37815 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016254290082188044, + "loss": 3.2811, + "step": 37820 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016246978436151472, + "loss": 3.2386, + "step": 37825 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016239667824271626, + "loss": 3.2014, + "step": 37830 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001623235824709825, + "loss": 3.1844, + "step": 37835 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016225049705180988, + "loss": 3.1639, + "step": 37840 + }, + { + "epoch": 0.65, + "learning_rate": 0.00016217742199069375, + "loss": 3.2266, + "step": 37845 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016210435729312918, + "loss": 3.1546, + "step": 37850 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001620313029646103, + "loss": 3.2589, + "step": 37855 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016195825901063024, + "loss": 3.1717, + "step": 37860 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016188522543668187, + "loss": 3.1791, + "step": 37865 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016181220224825647, + "loss": 3.2002, + "step": 37870 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016173918945084538, + "loss": 3.0784, + "step": 37875 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016166618704993867, + "loss": 3.2606, + "step": 37880 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016159319505102565, + "loss": 3.2025, + "step": 37885 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016152021345959528, + "loss": 3.119, + "step": 37890 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016144724228113488, + "loss": 3.3311, + "step": 37895 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016137428152113197, + "loss": 3.2303, + "step": 37900 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001613013311850726, + "loss": 3.306, + "step": 37905 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016122839127844218, + "loss": 3.2077, + "step": 37910 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001611554618067258, + "loss": 3.1467, + "step": 37915 + }, + { + "epoch": 0.66, + "learning_rate": 0.000161082542775407, + "loss": 3.2522, + "step": 37920 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001610096341899689, + "loss": 3.2779, + "step": 37925 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016093673605589416, + "loss": 3.1941, + "step": 37930 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016086384837866414, + "loss": 3.1619, + "step": 37935 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016079097116375958, + "loss": 3.2503, + "step": 37940 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016071810441666077, + "loss": 3.2323, + "step": 37945 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016064524814284644, + "loss": 3.1909, + "step": 37950 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016057240234779543, + "loss": 3.1922, + "step": 37955 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001604995670369852, + "loss": 3.2015, + "step": 37960 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016042674221589242, + "loss": 3.1244, + "step": 37965 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016035392788999364, + "loss": 3.2906, + "step": 37970 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016028112406476353, + "loss": 3.2155, + "step": 37975 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016020833074567695, + "loss": 3.2215, + "step": 37980 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001601355479382075, + "loss": 3.2019, + "step": 37985 + }, + { + "epoch": 0.66, + "learning_rate": 0.00016006277564782796, + "loss": 3.1993, + "step": 37990 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015999001388001074, + "loss": 3.3097, + "step": 37995 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015991726264022677, + "loss": 3.1445, + "step": 38000 + }, + { + "epoch": 0.66, + "eval_loss": 3.2400145530700684, + "eval_runtime": 149.8729, + "eval_samples_per_second": 12.284, + "eval_steps_per_second": 0.774, + "step": 38000 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001598445219339467, + "loss": 3.2372, + "step": 38005 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015977179176664035, + "loss": 3.2157, + "step": 38010 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015969907214377662, + "loss": 3.2457, + "step": 38015 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015962636307082363, + "loss": 3.2293, + "step": 38020 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015955366455324874, + "loss": 3.1265, + "step": 38025 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001594809765965184, + "loss": 3.2626, + "step": 38030 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015940829920609858, + "loss": 3.2296, + "step": 38035 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015933563238745412, + "loss": 3.2178, + "step": 38040 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015926297614604924, + "loss": 3.2407, + "step": 38045 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015919033048734726, + "loss": 3.2555, + "step": 38050 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015911769541681066, + "loss": 3.2065, + "step": 38055 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015904507093990152, + "loss": 3.2034, + "step": 38060 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015897245706208063, + "loss": 3.3155, + "step": 38065 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015889985378880818, + "loss": 3.2745, + "step": 38070 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015882726112554364, + "loss": 3.2874, + "step": 38075 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015875467907774558, + "loss": 3.1474, + "step": 38080 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015868210765087167, + "loss": 3.2925, + "step": 38085 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015860954685037912, + "loss": 3.1178, + "step": 38090 + }, + { + "epoch": 0.66, + "learning_rate": 0.000158536996681724, + "loss": 3.2575, + "step": 38095 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001584644571503618, + "loss": 3.2266, + "step": 38100 + }, + { + "epoch": 0.66, + "learning_rate": 0.000158391928261747, + "loss": 3.0909, + "step": 38105 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015831941002133343, + "loss": 3.1911, + "step": 38110 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001582469024345742, + "loss": 3.1309, + "step": 38115 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015817440550692152, + "loss": 3.1826, + "step": 38120 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001581019192438265, + "loss": 3.212, + "step": 38125 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015802944365074005, + "loss": 3.3265, + "step": 38130 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015795697873311184, + "loss": 3.1416, + "step": 38135 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015788452449639077, + "loss": 3.0383, + "step": 38140 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015781208094602537, + "loss": 3.159, + "step": 38145 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001577396480874625, + "loss": 3.2361, + "step": 38150 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001576672259261492, + "loss": 3.2279, + "step": 38155 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015759481446753098, + "loss": 3.248, + "step": 38160 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015752241371705286, + "loss": 3.1824, + "step": 38165 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015745002368015913, + "loss": 3.1641, + "step": 38170 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015737764436229306, + "loss": 3.1412, + "step": 38175 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001573052757688972, + "loss": 3.2713, + "step": 38180 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015723291790541334, + "loss": 3.226, + "step": 38185 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015716057077728223, + "loss": 3.3351, + "step": 38190 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001570882343899443, + "loss": 3.2517, + "step": 38195 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015701590874883883, + "loss": 3.1853, + "step": 38200 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015694359385940403, + "loss": 3.1761, + "step": 38205 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001568712897270779, + "loss": 3.1338, + "step": 38210 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001567989963572973, + "loss": 3.2637, + "step": 38215 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015672671375549814, + "loss": 3.2493, + "step": 38220 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015665444192711604, + "loss": 3.2718, + "step": 38225 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015658218087758507, + "loss": 3.2829, + "step": 38230 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015650993061233924, + "loss": 3.3045, + "step": 38235 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015643769113681124, + "loss": 3.2175, + "step": 38240 + }, + { + "epoch": 0.66, + "learning_rate": 0.000156365462456433, + "loss": 3.2631, + "step": 38245 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015629324457663605, + "loss": 3.3236, + "step": 38250 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015622103750285047, + "loss": 3.2581, + "step": 38255 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015614884124050607, + "loss": 3.287, + "step": 38260 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015607665579503164, + "loss": 3.3278, + "step": 38265 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015600448117185492, + "loss": 3.1982, + "step": 38270 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015593231737640352, + "loss": 3.2156, + "step": 38275 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015586016441410341, + "loss": 3.2382, + "step": 38280 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015578802229038007, + "loss": 3.2535, + "step": 38285 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015571589101065855, + "loss": 3.3024, + "step": 38290 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015564377058036258, + "loss": 3.1432, + "step": 38295 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015557166100491518, + "loss": 3.1777, + "step": 38300 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015549956228973875, + "loss": 3.2004, + "step": 38305 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015542747444025455, + "loss": 3.0625, + "step": 38310 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015535539746188348, + "loss": 3.1938, + "step": 38315 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015528333136004523, + "loss": 3.221, + "step": 38320 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015521127614015877, + "loss": 3.2017, + "step": 38325 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015513923180764233, + "loss": 3.2288, + "step": 38330 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015506719836791313, + "loss": 3.2243, + "step": 38335 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015499517582638798, + "loss": 3.232, + "step": 38340 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015492316418848248, + "loss": 3.1439, + "step": 38345 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001548511634596115, + "loss": 3.1259, + "step": 38350 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015477917364518915, + "loss": 3.1856, + "step": 38355 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001547071947506287, + "loss": 3.2, + "step": 38360 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015463522678134248, + "loss": 3.1343, + "step": 38365 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015456326974274233, + "loss": 3.2869, + "step": 38370 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015449132364023897, + "loss": 3.0858, + "step": 38375 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015441938847924233, + "loss": 3.1788, + "step": 38380 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001543474642651616, + "loss": 3.2329, + "step": 38385 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015427555100340498, + "loss": 3.196, + "step": 38390 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001542036486993802, + "loss": 3.1304, + "step": 38395 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015413175735849388, + "loss": 3.2239, + "step": 38400 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015405987698615184, + "loss": 3.0827, + "step": 38405 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001539880075877591, + "loss": 3.2181, + "step": 38410 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015391614916871978, + "loss": 3.2241, + "step": 38415 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015384430173443747, + "loss": 3.2218, + "step": 38420 + }, + { + "epoch": 0.66, + "learning_rate": 0.00015377246529031465, + "loss": 3.098, + "step": 38425 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015370063984175304, + "loss": 3.2373, + "step": 38430 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015362882539415352, + "loss": 3.2509, + "step": 38435 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001535570219529162, + "loss": 3.1837, + "step": 38440 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001534852295234402, + "loss": 3.2769, + "step": 38445 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001534134481111242, + "loss": 3.1648, + "step": 38450 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015334167772136566, + "loss": 3.1912, + "step": 38455 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015326991835956132, + "loss": 3.1258, + "step": 38460 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015319817003110715, + "loss": 3.2684, + "step": 38465 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015312643274139812, + "loss": 3.2462, + "step": 38470 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001530547064958288, + "loss": 3.1875, + "step": 38475 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015298299129979238, + "loss": 3.2456, + "step": 38480 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015291128715868164, + "loss": 3.1945, + "step": 38485 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015283959407788825, + "loss": 3.2784, + "step": 38490 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015276791206280307, + "loss": 3.0896, + "step": 38495 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001526962411188165, + "loss": 3.1892, + "step": 38500 + }, + { + "epoch": 0.67, + "eval_loss": 3.235760450363159, + "eval_runtime": 149.9771, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 38500 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001526245812513178, + "loss": 3.1669, + "step": 38505 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015255293246569504, + "loss": 3.2019, + "step": 38510 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015248129476733626, + "loss": 3.2363, + "step": 38515 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015240966816162807, + "loss": 3.2336, + "step": 38520 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001523380526539563, + "loss": 3.2384, + "step": 38525 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001522664482497065, + "loss": 3.1823, + "step": 38530 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015219485495426242, + "loss": 3.2383, + "step": 38535 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015212327277300784, + "loss": 3.2228, + "step": 38540 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015205170171132536, + "loss": 3.0597, + "step": 38545 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001519801417745965, + "loss": 3.2417, + "step": 38550 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015190859296820264, + "loss": 3.2426, + "step": 38555 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001518370552975234, + "loss": 3.1934, + "step": 38560 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015176552876793847, + "loss": 3.2366, + "step": 38565 + }, + { + "epoch": 0.67, + "learning_rate": 0.000151694013384826, + "loss": 3.1862, + "step": 38570 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015162250915356357, + "loss": 3.2587, + "step": 38575 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015155101607952827, + "loss": 3.1882, + "step": 38580 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015147953416809565, + "loss": 3.2082, + "step": 38585 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015140806342464077, + "loss": 3.204, + "step": 38590 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015133660385453814, + "loss": 3.2401, + "step": 38595 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015126515546316102, + "loss": 3.1105, + "step": 38600 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015119371825588192, + "loss": 3.2315, + "step": 38605 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015112229223807262, + "loss": 3.2599, + "step": 38610 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015105087741510386, + "loss": 3.1626, + "step": 38615 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015097947379234583, + "loss": 3.0691, + "step": 38620 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015090808137516767, + "loss": 3.1392, + "step": 38625 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001508367001689377, + "loss": 3.2198, + "step": 38630 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015076533017902347, + "loss": 3.1879, + "step": 38635 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015069397141079154, + "loss": 3.228, + "step": 38640 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015062262386960772, + "loss": 3.2194, + "step": 38645 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001505512875608371, + "loss": 3.2814, + "step": 38650 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015047996248984376, + "loss": 3.1777, + "step": 38655 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015040864866199097, + "loss": 3.221, + "step": 38660 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015033734608264115, + "loss": 3.2044, + "step": 38665 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015026605475715581, + "loss": 3.2661, + "step": 38670 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015019477469089586, + "loss": 3.2046, + "step": 38675 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001501235058892211, + "loss": 3.1087, + "step": 38680 + }, + { + "epoch": 0.67, + "learning_rate": 0.00015005224835749063, + "loss": 3.2581, + "step": 38685 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014998100210106258, + "loss": 3.1716, + "step": 38690 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014990976712529424, + "loss": 3.1427, + "step": 38695 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014983854343554232, + "loss": 3.1025, + "step": 38700 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001497673310371624, + "loss": 3.1843, + "step": 38705 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001496961299355092, + "loss": 3.2923, + "step": 38710 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001496249401359368, + "loss": 3.2623, + "step": 38715 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014955376164379818, + "loss": 3.1085, + "step": 38720 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001494825944644456, + "loss": 3.2231, + "step": 38725 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001494114386032306, + "loss": 3.2339, + "step": 38730 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014934029406550367, + "loss": 3.1435, + "step": 38735 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001492691608566145, + "loss": 3.2924, + "step": 38740 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014919803898191198, + "loss": 3.3259, + "step": 38745 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014912692844674393, + "loss": 3.2163, + "step": 38750 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001490558292564578, + "loss": 3.1284, + "step": 38755 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001489847414163997, + "loss": 3.2856, + "step": 38760 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001489136649319151, + "loss": 3.2836, + "step": 38765 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014884259980834866, + "loss": 3.1405, + "step": 38770 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001487715460510439, + "loss": 3.1253, + "step": 38775 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001487005036653439, + "loss": 3.2167, + "step": 38780 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001486294726565908, + "loss": 3.1719, + "step": 38785 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014855845303012535, + "loss": 3.1742, + "step": 38790 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001484874447912882, + "loss": 3.1847, + "step": 38795 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014841644794541872, + "loss": 3.2533, + "step": 38800 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014834546249785542, + "loss": 3.1358, + "step": 38805 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014827448845393634, + "loss": 3.2148, + "step": 38810 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014820352581899787, + "loss": 3.1691, + "step": 38815 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014813257459837649, + "loss": 3.1114, + "step": 38820 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014806163479740714, + "loss": 3.1567, + "step": 38825 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014799070642142412, + "loss": 3.189, + "step": 38830 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014791978947576115, + "loss": 3.1744, + "step": 38835 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014784888396575038, + "loss": 3.2016, + "step": 38840 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014777798989672387, + "loss": 3.1499, + "step": 38845 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014770710727401241, + "loss": 3.2737, + "step": 38850 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014763623610294588, + "loss": 3.2374, + "step": 38855 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014756537638885376, + "loss": 3.2429, + "step": 38860 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014749452813706405, + "loss": 3.3057, + "step": 38865 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014742369135290409, + "loss": 3.2106, + "step": 38870 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014735286604170074, + "loss": 3.1396, + "step": 38875 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014728205220877953, + "loss": 3.3025, + "step": 38880 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014721124985946521, + "loss": 3.1966, + "step": 38885 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014714045899908215, + "loss": 3.229, + "step": 38890 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001470696796329529, + "loss": 3.2342, + "step": 38895 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001469989117664001, + "loss": 3.2281, + "step": 38900 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014692815540474506, + "loss": 3.1531, + "step": 38905 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014685741055330809, + "loss": 3.2777, + "step": 38910 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014678667721740925, + "loss": 3.0968, + "step": 38915 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014671595540236687, + "loss": 3.2377, + "step": 38920 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014664524511349918, + "loss": 3.2756, + "step": 38925 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014657454635612314, + "loss": 3.2936, + "step": 38930 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014650385913555475, + "loss": 3.1701, + "step": 38935 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014643318345710976, + "loss": 3.2626, + "step": 38940 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014636251932610226, + "loss": 3.1047, + "step": 38945 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001462918667478458, + "loss": 3.2449, + "step": 38950 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014622122572765335, + "loss": 3.1803, + "step": 38955 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014615059627083658, + "loss": 3.199, + "step": 38960 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014607997838270652, + "loss": 3.2114, + "step": 38965 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014600937206857328, + "loss": 3.1264, + "step": 38970 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014593877733374595, + "loss": 3.1626, + "step": 38975 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001458681941835331, + "loss": 3.1494, + "step": 38980 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014579762262324216, + "loss": 3.1752, + "step": 38985 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014572706265817976, + "loss": 3.2887, + "step": 38990 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014565651429365153, + "loss": 3.1689, + "step": 38995 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001455859775349624, + "loss": 3.1576, + "step": 39000 + }, + { + "epoch": 0.67, + "eval_loss": 3.232856512069702, + "eval_runtime": 149.8711, + "eval_samples_per_second": 12.284, + "eval_steps_per_second": 0.774, + "step": 39000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014551545238741643, + "loss": 3.1477, + "step": 39005 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014544493885631677, + "loss": 3.1183, + "step": 39010 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001453744369469656, + "loss": 3.2491, + "step": 39015 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014530394666466436, + "loss": 3.2484, + "step": 39020 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014523346801471347, + "loss": 3.1706, + "step": 39025 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014516300100241256, + "loss": 3.2258, + "step": 39030 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014509254563306053, + "loss": 3.212, + "step": 39035 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014502210191195522, + "loss": 3.1339, + "step": 39040 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014495166984439356, + "loss": 3.0903, + "step": 39045 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014488124943567174, + "loss": 3.2413, + "step": 39050 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014481084069108488, + "loss": 3.0555, + "step": 39055 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014474044361592755, + "loss": 3.1508, + "step": 39060 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001446700582154932, + "loss": 3.2852, + "step": 39065 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001445996844950744, + "loss": 3.1775, + "step": 39070 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001445293224599629, + "loss": 3.1098, + "step": 39075 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014445897211544948, + "loss": 3.2777, + "step": 39080 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014438863346682437, + "loss": 3.2472, + "step": 39085 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014431830651937658, + "loss": 3.1457, + "step": 39090 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001442479912783941, + "loss": 3.1604, + "step": 39095 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014417768774916456, + "loss": 3.2105, + "step": 39100 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014410739593697432, + "loss": 3.2452, + "step": 39105 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014403711584710886, + "loss": 3.1147, + "step": 39110 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014396684748485308, + "loss": 3.138, + "step": 39115 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014389659085549075, + "loss": 3.1713, + "step": 39120 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014382634596430476, + "loss": 3.2346, + "step": 39125 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014375611281657716, + "loss": 3.1103, + "step": 39130 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014368589141758903, + "loss": 3.2913, + "step": 39135 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014361568177262088, + "loss": 3.0587, + "step": 39140 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014354548388695201, + "loss": 3.183, + "step": 39145 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014347529776586094, + "loss": 3.2254, + "step": 39150 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001434051234146253, + "loss": 3.2179, + "step": 39155 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014333496083852174, + "loss": 3.1301, + "step": 39160 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014326481004282632, + "loss": 3.0906, + "step": 39165 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014319467103281406, + "loss": 3.0157, + "step": 39170 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001431245438137587, + "loss": 3.2439, + "step": 39175 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014305442839093375, + "loss": 3.1518, + "step": 39180 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014298432476961148, + "loss": 3.1854, + "step": 39185 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014291423295506318, + "loss": 3.2038, + "step": 39190 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014284415295255973, + "loss": 3.226, + "step": 39195 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014277408476737034, + "loss": 3.2791, + "step": 39200 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014270402840476412, + "loss": 3.3441, + "step": 39205 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014263398387000885, + "loss": 3.131, + "step": 39210 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014256395116837137, + "loss": 3.2233, + "step": 39215 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014249393030511814, + "loss": 3.1594, + "step": 39220 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014242392128551405, + "loss": 3.2299, + "step": 39225 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001423539241148234, + "loss": 3.2021, + "step": 39230 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014228393879830984, + "loss": 3.2461, + "step": 39235 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014221396534123582, + "loss": 3.1472, + "step": 39240 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014214400374886293, + "loss": 3.1902, + "step": 39245 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014207405402645197, + "loss": 3.2461, + "step": 39250 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001420041161792627, + "loss": 3.2336, + "step": 39255 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014193419021255428, + "loss": 3.2692, + "step": 39260 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014186427613158468, + "loss": 3.1938, + "step": 39265 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014179437394161107, + "loss": 3.1897, + "step": 39270 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014172448364788972, + "loss": 3.2023, + "step": 39275 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014165460525567602, + "loss": 3.1319, + "step": 39280 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014158473877022454, + "loss": 3.3315, + "step": 39285 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014151488419678887, + "loss": 3.2045, + "step": 39290 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014144504154062166, + "loss": 3.298, + "step": 39295 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014137521080697478, + "loss": 3.2035, + "step": 39300 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001413053920010991, + "loss": 3.2147, + "step": 39305 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014123558512824452, + "loss": 3.2121, + "step": 39310 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001411657901936604, + "loss": 3.1644, + "step": 39315 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014109600720259484, + "loss": 3.2276, + "step": 39320 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014102623616029516, + "loss": 3.2369, + "step": 39325 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014095647707200782, + "loss": 3.0479, + "step": 39330 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001408867299429782, + "loss": 3.174, + "step": 39335 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014081699477845112, + "loss": 3.248, + "step": 39340 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014074727158367028, + "loss": 3.1176, + "step": 39345 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014067756036387842, + "loss": 3.218, + "step": 39350 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014060786112431755, + "loss": 3.2828, + "step": 39355 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001405381738702285, + "loss": 3.1822, + "step": 39360 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001404684986068517, + "loss": 3.2422, + "step": 39365 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014039883533942615, + "loss": 3.2066, + "step": 39370 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001403291840731903, + "loss": 3.2333, + "step": 39375 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001402595448133815, + "loss": 3.3032, + "step": 39380 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014018991756523628, + "loss": 3.2043, + "step": 39385 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014012030233399013, + "loss": 3.1163, + "step": 39390 + }, + { + "epoch": 0.68, + "learning_rate": 0.00014005069912487793, + "loss": 3.1967, + "step": 39395 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001399811079431335, + "loss": 3.3533, + "step": 39400 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013991152879398963, + "loss": 3.1829, + "step": 39405 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013984196168267837, + "loss": 3.245, + "step": 39410 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001397724066144307, + "loss": 3.1686, + "step": 39415 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013970286359447697, + "loss": 3.2045, + "step": 39420 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013963333262804643, + "loss": 3.3767, + "step": 39425 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001395638137203674, + "loss": 3.2496, + "step": 39430 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013949430687666732, + "loss": 3.2372, + "step": 39435 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001394248121021727, + "loss": 3.1356, + "step": 39440 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013935532940210937, + "loss": 3.2368, + "step": 39445 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013928585878170212, + "loss": 3.2078, + "step": 39450 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001392164002461744, + "loss": 3.2284, + "step": 39455 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013914695380074947, + "loss": 3.312, + "step": 39460 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001390775194506493, + "loss": 3.0963, + "step": 39465 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013900809720109478, + "loss": 3.1184, + "step": 39470 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013893868705730657, + "loss": 3.0958, + "step": 39475 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001388692890245034, + "loss": 3.2181, + "step": 39480 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013879990310790405, + "loss": 3.2215, + "step": 39485 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013873052931272583, + "loss": 3.2281, + "step": 39490 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001386611676441852, + "loss": 3.2299, + "step": 39495 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013859181810749817, + "loss": 3.2065, + "step": 39500 + }, + { + "epoch": 0.68, + "eval_loss": 3.226215362548828, + "eval_runtime": 149.8725, + "eval_samples_per_second": 12.284, + "eval_steps_per_second": 0.774, + "step": 39500 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013852248070787897, + "loss": 3.1109, + "step": 39505 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013845315545054179, + "loss": 3.1988, + "step": 39510 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013838384234069937, + "loss": 3.2595, + "step": 39515 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013831454138356366, + "loss": 3.1322, + "step": 39520 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013824525258434602, + "loss": 3.2045, + "step": 39525 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001381759759482563, + "loss": 3.2601, + "step": 39530 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013810671148050373, + "loss": 3.249, + "step": 39535 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013803745918629688, + "loss": 3.3018, + "step": 39540 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013796821907084307, + "loss": 3.1712, + "step": 39545 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013789899113934878, + "loss": 3.1496, + "step": 39550 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013782977539701958, + "loss": 3.1739, + "step": 39555 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013776057184906008, + "loss": 3.3045, + "step": 39560 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001376913805006742, + "loss": 3.2423, + "step": 39565 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013762220135706468, + "loss": 3.1416, + "step": 39570 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013755303442343337, + "loss": 3.2441, + "step": 39575 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013748387970498156, + "loss": 3.2493, + "step": 39580 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001374147372069089, + "loss": 3.224, + "step": 39585 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013734560693441484, + "loss": 3.28, + "step": 39590 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013727648889269754, + "loss": 3.2218, + "step": 39595 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013720738308695428, + "loss": 3.1533, + "step": 39600 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013713828952238168, + "loss": 3.2559, + "step": 39605 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013706920820417498, + "loss": 3.1483, + "step": 39610 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001370001391375287, + "loss": 3.2323, + "step": 39615 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013693108232763667, + "loss": 3.1266, + "step": 39620 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013686203777969156, + "loss": 3.1826, + "step": 39625 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013679300549888514, + "loss": 3.2438, + "step": 39630 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013672398549040827, + "loss": 3.1957, + "step": 39635 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013665497775945082, + "loss": 3.2472, + "step": 39640 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013658598231120205, + "loss": 3.0956, + "step": 39645 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013651699915084986, + "loss": 3.2534, + "step": 39650 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013644802828358154, + "loss": 3.1855, + "step": 39655 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001363790697145833, + "loss": 3.2826, + "step": 39660 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013631012344904038, + "loss": 3.176, + "step": 39665 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013624118949213737, + "loss": 3.1286, + "step": 39670 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013617226784905765, + "loss": 3.2014, + "step": 39675 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001361033585249838, + "loss": 3.1897, + "step": 39680 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013603446152509743, + "loss": 3.3175, + "step": 39685 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013596557685457923, + "loss": 3.1729, + "step": 39690 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001358967045186089, + "loss": 3.216, + "step": 39695 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013582784452236546, + "loss": 3.1958, + "step": 39700 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013575899687102674, + "loss": 3.2406, + "step": 39705 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013569016156976977, + "loss": 3.1854, + "step": 39710 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013562133862377054, + "loss": 3.2161, + "step": 39715 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001355525280382041, + "loss": 3.0194, + "step": 39720 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013548372981824492, + "loss": 3.0951, + "step": 39725 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001354149439690662, + "loss": 3.1332, + "step": 39730 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013534617049584002, + "loss": 3.2012, + "step": 39735 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013527740940373807, + "loss": 3.2941, + "step": 39740 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013520866069793077, + "loss": 3.2922, + "step": 39745 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001351399243835875, + "loss": 3.2285, + "step": 39750 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013507120046587728, + "loss": 3.1846, + "step": 39755 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013500248894996727, + "loss": 3.2841, + "step": 39760 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013493378984102462, + "loss": 3.2459, + "step": 39765 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013486510314421503, + "loss": 3.2431, + "step": 39770 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013479642886470326, + "loss": 3.1897, + "step": 39775 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001347277670076536, + "loss": 3.1831, + "step": 39780 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013465911757822865, + "loss": 3.2116, + "step": 39785 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013459048058159085, + "loss": 3.0972, + "step": 39790 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013452185602290113, + "loss": 3.2108, + "step": 39795 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001344532439073197, + "loss": 3.2728, + "step": 39800 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013438464424000602, + "loss": 3.188, + "step": 39805 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001343160570261184, + "loss": 3.1234, + "step": 39810 + }, + { + "epoch": 0.69, + "learning_rate": 0.000134247482270814, + "loss": 3.1929, + "step": 39815 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013417891997924956, + "loss": 3.1941, + "step": 39820 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013411037015658052, + "loss": 3.2134, + "step": 39825 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013404183280796137, + "loss": 3.1629, + "step": 39830 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001339733079385461, + "loss": 3.315, + "step": 39835 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013390479555348698, + "loss": 3.1403, + "step": 39840 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013383629565793612, + "loss": 3.1475, + "step": 39845 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013376780825704422, + "loss": 3.1861, + "step": 39850 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013369933335596112, + "loss": 3.2026, + "step": 39855 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013363087095983608, + "loss": 3.2458, + "step": 39860 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013356242107381673, + "loss": 3.0653, + "step": 39865 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001334939837030504, + "loss": 3.1658, + "step": 39870 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001334255588526832, + "loss": 3.2229, + "step": 39875 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013335714652786018, + "loss": 3.3034, + "step": 39880 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001332887467337259, + "loss": 3.1056, + "step": 39885 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013322035947542344, + "loss": 3.2603, + "step": 39890 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013315198475809505, + "loss": 3.1717, + "step": 39895 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013308362258688245, + "loss": 3.1708, + "step": 39900 + }, + { + "epoch": 0.69, + "learning_rate": 0.000133015272966926, + "loss": 3.0321, + "step": 39905 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013294693590336522, + "loss": 3.1176, + "step": 39910 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013287861140133875, + "loss": 3.1338, + "step": 39915 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001328102994659841, + "loss": 3.1293, + "step": 39920 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001327420001024382, + "loss": 3.1591, + "step": 39925 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013267371331583672, + "loss": 3.2051, + "step": 39930 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013260543911131447, + "loss": 3.2059, + "step": 39935 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013253717749400531, + "loss": 3.1315, + "step": 39940 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013246892846904205, + "loss": 3.2699, + "step": 39945 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001324006920415569, + "loss": 3.2865, + "step": 39950 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013233246821668077, + "loss": 3.1292, + "step": 39955 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001322642569995438, + "loss": 3.237, + "step": 39960 + }, + { + "epoch": 0.69, + "learning_rate": 0.000132196058395275, + "loss": 3.1434, + "step": 39965 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013212787240900265, + "loss": 3.2707, + "step": 39970 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013205969904585385, + "loss": 3.2334, + "step": 39975 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013199153831095511, + "loss": 3.1555, + "step": 39980 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001319233902094317, + "loss": 3.1761, + "step": 39985 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013185525474640792, + "loss": 3.3449, + "step": 39990 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013178713192700726, + "loss": 3.1922, + "step": 39995 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013171902175635212, + "loss": 3.2252, + "step": 40000 + }, + { + "epoch": 0.69, + "eval_loss": 3.221252679824829, + "eval_runtime": 149.7766, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.774, + "step": 40000 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001316509242395642, + "loss": 3.2361, + "step": 40005 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013158283938176403, + "loss": 3.1515, + "step": 40010 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013151476718807118, + "loss": 3.2082, + "step": 40015 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013144670766360438, + "loss": 3.1433, + "step": 40020 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013137866081348122, + "loss": 3.0056, + "step": 40025 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013131062664281876, + "loss": 3.1968, + "step": 40030 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001312426051567326, + "loss": 3.1794, + "step": 40035 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013117459636033767, + "loss": 3.086, + "step": 40040 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013110660025874787, + "loss": 3.2869, + "step": 40045 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013103861685707623, + "loss": 3.2714, + "step": 40050 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001309706461604345, + "loss": 3.3275, + "step": 40055 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013090268817393409, + "loss": 3.1481, + "step": 40060 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013083474290268487, + "loss": 3.1364, + "step": 40065 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013076681035179604, + "loss": 3.2324, + "step": 40070 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001306988905263758, + "loss": 3.2283, + "step": 40075 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013063098343153116, + "loss": 3.2573, + "step": 40080 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001305630890723687, + "loss": 3.1392, + "step": 40085 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013049520745399363, + "loss": 3.3101, + "step": 40090 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013042733858151023, + "loss": 3.1184, + "step": 40095 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013035948246002191, + "loss": 3.2271, + "step": 40100 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013029163909463102, + "loss": 3.2393, + "step": 40105 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013022380849043926, + "loss": 3.2122, + "step": 40110 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013015599065254707, + "loss": 3.2095, + "step": 40115 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013008818558605376, + "loss": 3.2102, + "step": 40120 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013002039329605824, + "loss": 3.0793, + "step": 40125 + }, + { + "epoch": 0.69, + "learning_rate": 0.00012995261378765798, + "loss": 3.1341, + "step": 40130 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001298848470659496, + "loss": 3.2486, + "step": 40135 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001298170931360291, + "loss": 3.1172, + "step": 40140 + }, + { + "epoch": 0.69, + "learning_rate": 0.00012974935200299077, + "loss": 3.0749, + "step": 40145 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001296816236719288, + "loss": 3.1997, + "step": 40150 + }, + { + "epoch": 0.69, + "learning_rate": 0.00012961390814793583, + "loss": 3.1917, + "step": 40155 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012954620543610367, + "loss": 3.2264, + "step": 40160 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012947851554152354, + "loss": 3.2491, + "step": 40165 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012941083846928488, + "loss": 3.1931, + "step": 40170 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012934317422447702, + "loss": 3.218, + "step": 40175 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001292755228121879, + "loss": 3.2307, + "step": 40180 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012920788423750434, + "loss": 3.2845, + "step": 40185 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012914025850551284, + "loss": 3.1866, + "step": 40190 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012907264562129814, + "loss": 3.1706, + "step": 40195 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012900504558994438, + "loss": 3.2382, + "step": 40200 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012893745841653499, + "loss": 3.1685, + "step": 40205 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012886988410615207, + "loss": 3.1907, + "step": 40210 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012880232266387678, + "loss": 3.2388, + "step": 40215 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001287347740947895, + "loss": 3.2157, + "step": 40220 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012866723840396934, + "loss": 3.1247, + "step": 40225 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001285997155964949, + "loss": 3.2234, + "step": 40230 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012853220567744344, + "loss": 3.2356, + "step": 40235 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012846470865189138, + "loss": 3.2794, + "step": 40240 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001283972245249141, + "loss": 3.1414, + "step": 40245 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001283297533015861, + "loss": 3.1587, + "step": 40250 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012826229498698075, + "loss": 3.2113, + "step": 40255 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012819484958617077, + "loss": 3.2561, + "step": 40260 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001281274171042276, + "loss": 3.1379, + "step": 40265 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012805999754622182, + "loss": 3.1382, + "step": 40270 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001279925909172231, + "loss": 3.2634, + "step": 40275 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012792519722229989, + "loss": 3.2751, + "step": 40280 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012785781646652006, + "loss": 3.0939, + "step": 40285 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012779044865495024, + "loss": 3.2182, + "step": 40290 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001277230937926561, + "loss": 3.0571, + "step": 40295 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012765575188470246, + "loss": 3.1138, + "step": 40300 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012758842293615285, + "loss": 3.1653, + "step": 40305 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012752110695207037, + "loss": 3.2168, + "step": 40310 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001274538039375167, + "loss": 3.1544, + "step": 40315 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012738651389755274, + "loss": 3.2413, + "step": 40320 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012731923683723828, + "loss": 3.2262, + "step": 40325 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012725197276163222, + "loss": 3.1795, + "step": 40330 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012718472167579236, + "loss": 3.1318, + "step": 40335 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012711748358477592, + "loss": 3.2327, + "step": 40340 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012705025849363868, + "loss": 3.3024, + "step": 40345 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012698304640743565, + "loss": 3.156, + "step": 40350 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012691584733122083, + "loss": 3.0815, + "step": 40355 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012684866127004715, + "loss": 3.1712, + "step": 40360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012678148822896685, + "loss": 3.1828, + "step": 40365 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001267143282130309, + "loss": 3.2036, + "step": 40370 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001266471812272894, + "loss": 3.1898, + "step": 40375 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012658004727679145, + "loss": 3.206, + "step": 40380 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001265129263665851, + "loss": 3.2307, + "step": 40385 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012644581850171764, + "loss": 3.1573, + "step": 40390 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012637872368723534, + "loss": 3.1681, + "step": 40395 + }, + { + "epoch": 0.7, + "learning_rate": 0.000126311641928183, + "loss": 3.2237, + "step": 40400 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001262445732296051, + "loss": 3.1012, + "step": 40405 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012617751759654488, + "loss": 3.1711, + "step": 40410 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012611047503404435, + "loss": 3.2486, + "step": 40415 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012604344554714518, + "loss": 3.1923, + "step": 40420 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012597642914088716, + "loss": 3.2736, + "step": 40425 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012590942582030992, + "loss": 3.2386, + "step": 40430 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012584243559045168, + "loss": 3.1725, + "step": 40435 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001257754584563496, + "loss": 3.1664, + "step": 40440 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001257084944230404, + "loss": 3.1511, + "step": 40445 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012564154349555893, + "loss": 3.0612, + "step": 40450 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012557460567893999, + "loss": 3.3118, + "step": 40455 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012550768097821673, + "loss": 3.0661, + "step": 40460 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001254407693984215, + "loss": 3.2882, + "step": 40465 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012537387094458603, + "loss": 3.1967, + "step": 40470 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012530698562174044, + "loss": 3.1915, + "step": 40475 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012524011343491412, + "loss": 3.1552, + "step": 40480 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012517325438913572, + "loss": 3.0782, + "step": 40485 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012510640848943265, + "loss": 3.1107, + "step": 40490 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012503957574083133, + "loss": 3.1968, + "step": 40495 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001249727561483573, + "loss": 3.1461, + "step": 40500 + }, + { + "epoch": 0.7, + "eval_loss": 3.2169432640075684, + "eval_runtime": 149.8523, + "eval_samples_per_second": 12.285, + "eval_steps_per_second": 0.774, + "step": 40500 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012490594971703483, + "loss": 2.9939, + "step": 40505 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012483915645188776, + "loss": 3.0932, + "step": 40510 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012477237635793843, + "loss": 3.1378, + "step": 40515 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012470560944020825, + "loss": 3.1682, + "step": 40520 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012463885570371808, + "loss": 3.0671, + "step": 40525 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012457211515348705, + "loss": 3.0951, + "step": 40530 + }, + { + "epoch": 0.7, + "learning_rate": 0.000124505387794534, + "loss": 3.2597, + "step": 40535 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001244386736318764, + "loss": 3.1131, + "step": 40540 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001243719726705307, + "loss": 3.0366, + "step": 40545 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012430528491551277, + "loss": 3.148, + "step": 40550 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012423861037183693, + "loss": 3.1749, + "step": 40555 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012417194904451668, + "loss": 3.1621, + "step": 40560 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012410530093856487, + "loss": 3.1866, + "step": 40565 + }, + { + "epoch": 0.7, + "learning_rate": 0.000124038666058993, + "loss": 3.2424, + "step": 40570 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012397204441081164, + "loss": 3.3355, + "step": 40575 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012390543599903042, + "loss": 3.1592, + "step": 40580 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012383884082865781, + "loss": 3.105, + "step": 40585 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001237722589047017, + "loss": 3.2395, + "step": 40590 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012370569023216856, + "loss": 3.1991, + "step": 40595 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012363913481606403, + "loss": 3.1987, + "step": 40600 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012357259266139272, + "loss": 3.0725, + "step": 40605 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012350606377315815, + "loss": 3.2065, + "step": 40610 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012343954815636314, + "loss": 3.2754, + "step": 40615 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001233730458160093, + "loss": 3.1027, + "step": 40620 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001233065567570972, + "loss": 3.244, + "step": 40625 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012324008098462652, + "loss": 3.1625, + "step": 40630 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012317361850359583, + "loss": 3.1442, + "step": 40635 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001231071693190027, + "loss": 3.2685, + "step": 40640 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012304073343584403, + "loss": 3.1912, + "step": 40645 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012297431085911526, + "loss": 3.1647, + "step": 40650 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012290790159381106, + "loss": 3.1712, + "step": 40655 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012284150564492513, + "loss": 3.3168, + "step": 40660 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001227751230174499, + "loss": 3.2013, + "step": 40665 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001227087537163773, + "loss": 3.2584, + "step": 40670 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012264239774669776, + "loss": 3.2529, + "step": 40675 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012257605511340104, + "loss": 3.1225, + "step": 40680 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012250972582147562, + "loss": 3.0349, + "step": 40685 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012244340987590914, + "loss": 3.1949, + "step": 40690 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012237710728168835, + "loss": 3.2146, + "step": 40695 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012231081804379892, + "loss": 3.2274, + "step": 40700 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001222445421672251, + "loss": 3.1047, + "step": 40705 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012217827965695083, + "loss": 3.276, + "step": 40710 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012211203051795864, + "loss": 3.1447, + "step": 40715 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012204579475522996, + "loss": 3.1452, + "step": 40720 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012197957237374574, + "loss": 3.187, + "step": 40725 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012191336337848511, + "loss": 3.1845, + "step": 40730 + }, + { + "epoch": 0.7, + "learning_rate": 0.00012184716777442699, + "loss": 3.1049, + "step": 40735 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012178098556654884, + "loss": 3.1475, + "step": 40740 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012171481675982707, + "loss": 3.0952, + "step": 40745 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012164866135923754, + "loss": 3.204, + "step": 40750 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001215825193697546, + "loss": 3.3169, + "step": 40755 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012151639079635188, + "loss": 3.324, + "step": 40760 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012145027564400183, + "loss": 3.1329, + "step": 40765 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012138417391767586, + "loss": 3.2409, + "step": 40770 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012131808562234478, + "loss": 3.1679, + "step": 40775 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012125201076297802, + "loss": 3.2133, + "step": 40780 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012118594934454375, + "loss": 3.2173, + "step": 40785 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001211199013720098, + "loss": 2.9781, + "step": 40790 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001210538668503425, + "loss": 3.2979, + "step": 40795 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012098784578450726, + "loss": 3.1413, + "step": 40800 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001209218381794688, + "loss": 3.1493, + "step": 40805 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012085584404019011, + "loss": 3.1366, + "step": 40810 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012078986337163399, + "loss": 3.1048, + "step": 40815 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012072389617876167, + "loss": 3.1932, + "step": 40820 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012065794246653348, + "loss": 3.1397, + "step": 40825 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012059200223990913, + "loss": 3.1325, + "step": 40830 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012052607550384668, + "loss": 3.1377, + "step": 40835 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012046016226330344, + "loss": 3.1563, + "step": 40840 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012039426252323596, + "loss": 3.246, + "step": 40845 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012032837628859946, + "loss": 3.0618, + "step": 40850 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001202625035643483, + "loss": 3.1616, + "step": 40855 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012019664435543569, + "loss": 3.1188, + "step": 40860 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012013079866681387, + "loss": 3.2018, + "step": 40865 + }, + { + "epoch": 0.71, + "learning_rate": 0.00012006496650343427, + "loss": 3.1535, + "step": 40870 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011999914787024704, + "loss": 3.1538, + "step": 40875 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011993334277220142, + "loss": 3.2631, + "step": 40880 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011986755121424558, + "loss": 3.1214, + "step": 40885 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011980177320132663, + "loss": 3.1057, + "step": 40890 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001197360087383909, + "loss": 3.1804, + "step": 40895 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001196702578303835, + "loss": 3.0932, + "step": 40900 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011960452048224853, + "loss": 3.1851, + "step": 40905 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011953879669892913, + "loss": 3.1617, + "step": 40910 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011947308648536734, + "loss": 3.1573, + "step": 40915 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011940738984650419, + "loss": 3.1449, + "step": 40920 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011934170678727987, + "loss": 3.1135, + "step": 40925 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011927603731263337, + "loss": 3.1522, + "step": 40930 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011921038142750264, + "loss": 3.1436, + "step": 40935 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001191447391368247, + "loss": 3.1961, + "step": 40940 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011907911044553541, + "loss": 3.2341, + "step": 40945 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011901349535856993, + "loss": 3.3183, + "step": 40950 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011894789388086205, + "loss": 3.1467, + "step": 40955 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011888230601734466, + "loss": 3.099, + "step": 40960 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011881673177294967, + "loss": 3.1508, + "step": 40965 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011875117115260775, + "loss": 3.2019, + "step": 40970 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011868562416124903, + "loss": 3.2991, + "step": 40975 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011862009080380213, + "loss": 3.0779, + "step": 40980 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011855457108519482, + "loss": 3.1401, + "step": 40985 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001184890650103539, + "loss": 3.1752, + "step": 40990 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011842357258420507, + "loss": 3.241, + "step": 40995 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011835809381167289, + "loss": 3.2169, + "step": 41000 + }, + { + "epoch": 0.71, + "eval_loss": 3.2123522758483887, + "eval_runtime": 149.7773, + "eval_samples_per_second": 12.292, + "eval_steps_per_second": 0.774, + "step": 41000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011829262869768125, + "loss": 3.1618, + "step": 41005 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011822717724715272, + "loss": 3.1831, + "step": 41010 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001181617394650089, + "loss": 3.1159, + "step": 41015 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011809631535617035, + "loss": 3.2193, + "step": 41020 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011803090492555653, + "loss": 3.2243, + "step": 41025 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001179655081780862, + "loss": 3.1949, + "step": 41030 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011790012511867675, + "loss": 3.2647, + "step": 41035 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011783475575224461, + "loss": 3.1304, + "step": 41040 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001177694000837053, + "loss": 3.045, + "step": 41045 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011770405811797305, + "loss": 3.1695, + "step": 41050 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011763872985996152, + "loss": 3.1831, + "step": 41055 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011757341531458297, + "loss": 3.0754, + "step": 41060 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001175081144867485, + "loss": 3.0875, + "step": 41065 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011744282738136864, + "loss": 3.1625, + "step": 41070 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011737755400335259, + "loss": 3.2232, + "step": 41075 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011731229435760843, + "loss": 3.2507, + "step": 41080 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011724704844904368, + "loss": 3.1869, + "step": 41085 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011718181628256413, + "loss": 3.2847, + "step": 41090 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011711659786307514, + "loss": 3.199, + "step": 41095 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011705139319548073, + "loss": 3.1731, + "step": 41100 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011698620228468384, + "loss": 3.0682, + "step": 41105 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011692102513558682, + "loss": 3.1529, + "step": 41110 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011685586175309026, + "loss": 3.31, + "step": 41115 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011679071214209438, + "loss": 3.1318, + "step": 41120 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011672557630749801, + "loss": 3.2206, + "step": 41125 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011666045425419895, + "loss": 3.1701, + "step": 41130 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011659534598709431, + "loss": 3.1972, + "step": 41135 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011653025151107965, + "loss": 3.1705, + "step": 41140 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001164651708310497, + "loss": 3.1075, + "step": 41145 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011640010395189838, + "loss": 3.2107, + "step": 41150 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001163350508785183, + "loss": 3.1441, + "step": 41155 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011627001161580116, + "loss": 3.2071, + "step": 41160 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011620498616863753, + "loss": 3.0613, + "step": 41165 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011613997454191691, + "loss": 3.2416, + "step": 41170 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011607497674052802, + "loss": 3.1232, + "step": 41175 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011600999276935832, + "loss": 3.1565, + "step": 41180 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011594502263329418, + "loss": 3.1478, + "step": 41185 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001158800663372211, + "loss": 3.0881, + "step": 41190 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011581512388602334, + "loss": 3.174, + "step": 41195 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001157501952845844, + "loss": 3.0763, + "step": 41200 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011568528053778651, + "loss": 3.153, + "step": 41205 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011562037965051084, + "loss": 3.2635, + "step": 41210 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001155554926276379, + "loss": 3.2358, + "step": 41215 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011549061947404656, + "loss": 3.167, + "step": 41220 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011542576019461489, + "loss": 3.144, + "step": 41225 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011536091479422028, + "loss": 3.1572, + "step": 41230 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011529608327773859, + "loss": 3.128, + "step": 41235 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011523126565004486, + "loss": 3.2118, + "step": 41240 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011516646191601301, + "loss": 3.273, + "step": 41245 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011510167208051588, + "loss": 3.1155, + "step": 41250 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011503689614842551, + "loss": 3.2507, + "step": 41255 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011497213412461262, + "loss": 3.231, + "step": 41260 + }, + { + "epoch": 0.71, + "learning_rate": 0.000114907386013947, + "loss": 3.1433, + "step": 41265 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011484265182129739, + "loss": 3.1993, + "step": 41270 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011477793155153134, + "loss": 3.3162, + "step": 41275 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011471322520951567, + "loss": 3.2125, + "step": 41280 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001146485328001159, + "loss": 3.1264, + "step": 41285 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011458385432819654, + "loss": 3.2733, + "step": 41290 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011451918979862108, + "loss": 3.2649, + "step": 41295 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011445453921625198, + "loss": 3.2322, + "step": 41300 + }, + { + "epoch": 0.71, + "learning_rate": 0.00011438990258595055, + "loss": 3.1406, + "step": 41305 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001143252799125773, + "loss": 3.1362, + "step": 41310 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011426067120099141, + "loss": 3.1726, + "step": 41315 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011419607645605117, + "loss": 3.1266, + "step": 41320 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011413149568261374, + "loss": 3.1741, + "step": 41325 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011406692888553518, + "loss": 3.1985, + "step": 41330 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011400237606967078, + "loss": 3.0828, + "step": 41335 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011393783723987457, + "loss": 3.2476, + "step": 41340 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011387331240099926, + "loss": 3.2025, + "step": 41345 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011380880155789706, + "loss": 3.3122, + "step": 41350 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011374430471541874, + "loss": 3.1677, + "step": 41355 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011367982187841411, + "loss": 2.9866, + "step": 41360 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011361535305173218, + "loss": 3.1992, + "step": 41365 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011355089824022028, + "loss": 3.1555, + "step": 41370 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011348645744872542, + "loss": 3.2704, + "step": 41375 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011342203068209307, + "loss": 3.1413, + "step": 41380 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011335761794516771, + "loss": 3.1366, + "step": 41385 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011329321924279318, + "loss": 3.1669, + "step": 41390 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011322883457981149, + "loss": 3.1667, + "step": 41395 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011316446396106438, + "loss": 3.1504, + "step": 41400 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011310010739139204, + "loss": 3.2718, + "step": 41405 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011303576487563366, + "loss": 3.1071, + "step": 41410 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011297143641862781, + "loss": 3.2227, + "step": 41415 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011290712202521133, + "loss": 3.2243, + "step": 41420 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011284282170022033, + "loss": 3.185, + "step": 41425 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001127785354484901, + "loss": 3.1816, + "step": 41430 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011271426327485448, + "loss": 3.204, + "step": 41435 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011265000518414633, + "loss": 3.1829, + "step": 41440 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011258576118119786, + "loss": 3.0784, + "step": 41445 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011252153127083944, + "loss": 3.0765, + "step": 41450 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011245731545790121, + "loss": 3.0026, + "step": 41455 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011239311374721167, + "loss": 3.1984, + "step": 41460 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001123289261435984, + "loss": 3.1799, + "step": 41465 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011226475265188832, + "loss": 3.2619, + "step": 41470 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011220059327690649, + "loss": 3.1734, + "step": 41475 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011213644802347769, + "loss": 3.1921, + "step": 41480 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011207231689642522, + "loss": 3.1058, + "step": 41485 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011200819990057129, + "loss": 3.0331, + "step": 41490 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011194409704073752, + "loss": 3.0883, + "step": 41495 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011188000832174376, + "loss": 3.1265, + "step": 41500 + }, + { + "epoch": 0.72, + "eval_loss": 3.207731008529663, + "eval_runtime": 149.6797, + "eval_samples_per_second": 12.3, + "eval_steps_per_second": 0.775, + "step": 41500 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011181593374840916, + "loss": 3.0709, + "step": 41505 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011175187332555206, + "loss": 3.2107, + "step": 41510 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011168782705798929, + "loss": 3.1451, + "step": 41515 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011162379495053683, + "loss": 3.118, + "step": 41520 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011155977700800959, + "loss": 3.0218, + "step": 41525 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011149577323522128, + "loss": 3.0796, + "step": 41530 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011143178363698489, + "loss": 3.2099, + "step": 41535 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011136780821811194, + "loss": 3.0831, + "step": 41540 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011130384698341308, + "loss": 3.2227, + "step": 41545 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011123989993769789, + "loss": 3.151, + "step": 41550 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011117596708577473, + "loss": 3.1856, + "step": 41555 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011111204843245124, + "loss": 3.086, + "step": 41560 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011104814398253367, + "loss": 3.2135, + "step": 41565 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011098425374082733, + "loss": 3.1735, + "step": 41570 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001109203777121364, + "loss": 3.2179, + "step": 41575 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011085651590126405, + "loss": 3.2251, + "step": 41580 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011079266831301225, + "loss": 3.1628, + "step": 41585 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011072883495218224, + "loss": 3.2514, + "step": 41590 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011066501582357384, + "loss": 3.2295, + "step": 41595 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011060121093198591, + "loss": 3.2016, + "step": 41600 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011053742028221629, + "loss": 3.1608, + "step": 41605 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011047364387906157, + "loss": 3.1431, + "step": 41610 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001104098817273176, + "loss": 3.1741, + "step": 41615 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011034613383177893, + "loss": 3.2561, + "step": 41620 + }, + { + "epoch": 0.72, + "learning_rate": 0.000110282400197239, + "loss": 3.1301, + "step": 41625 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011021868082849031, + "loss": 3.1035, + "step": 41630 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011015497573032411, + "loss": 3.2929, + "step": 41635 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011009128490753086, + "loss": 3.233, + "step": 41640 + }, + { + "epoch": 0.72, + "learning_rate": 0.00011002760836489986, + "loss": 3.0038, + "step": 41645 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010996394610721889, + "loss": 3.2885, + "step": 41650 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010990029813927538, + "loss": 3.1889, + "step": 41655 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010983666446585519, + "loss": 3.2143, + "step": 41660 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010977304509174315, + "loss": 3.1514, + "step": 41665 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010970944002172335, + "loss": 3.1891, + "step": 41670 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010964584926057843, + "loss": 3.1459, + "step": 41675 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010958227281309008, + "loss": 3.069, + "step": 41680 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010951871068403893, + "loss": 3.3585, + "step": 41685 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010945516287820446, + "loss": 3.2503, + "step": 41690 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001093916294003653, + "loss": 3.1437, + "step": 41695 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010932811025529875, + "loss": 3.0947, + "step": 41700 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010926460544778115, + "loss": 3.2289, + "step": 41705 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010920111498258771, + "loss": 3.1003, + "step": 41710 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010913763886449249, + "loss": 3.1294, + "step": 41715 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010907417709826878, + "loss": 3.126, + "step": 41720 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010901072968868858, + "loss": 3.0671, + "step": 41725 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010894729664052247, + "loss": 3.2908, + "step": 41730 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010888387795854067, + "loss": 3.1179, + "step": 41735 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010882047364751177, + "loss": 3.1891, + "step": 41740 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010875708371220335, + "loss": 3.193, + "step": 41745 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010869370815738237, + "loss": 3.2572, + "step": 41750 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010863034698781387, + "loss": 3.1651, + "step": 41755 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010856700020826264, + "loss": 3.1652, + "step": 41760 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010850366782349193, + "loss": 3.1472, + "step": 41765 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010844034983826389, + "loss": 3.2266, + "step": 41770 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010837704625734002, + "loss": 3.2599, + "step": 41775 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010831375708547998, + "loss": 3.2315, + "step": 41780 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010825048232744318, + "loss": 3.2559, + "step": 41785 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010818722198798737, + "loss": 3.1243, + "step": 41790 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010812397607186936, + "loss": 3.1431, + "step": 41795 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010806074458384519, + "loss": 3.2221, + "step": 41800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010799752752866928, + "loss": 3.2885, + "step": 41805 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010793432491109518, + "loss": 3.2099, + "step": 41810 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010787113673587563, + "loss": 3.2519, + "step": 41815 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010780796300776196, + "loss": 3.1942, + "step": 41820 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010774480373150449, + "loss": 3.0997, + "step": 41825 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001076816589118525, + "loss": 3.1281, + "step": 41830 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010761852855355405, + "loss": 3.1984, + "step": 41835 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010755541266135639, + "loss": 3.1747, + "step": 41840 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010749231124000543, + "loss": 3.116, + "step": 41845 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010742922429424612, + "loss": 3.093, + "step": 41850 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010736615182882218, + "loss": 3.2103, + "step": 41855 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010730309384847643, + "loss": 3.2, + "step": 41860 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010724005035795035, + "loss": 3.0806, + "step": 41865 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010717702136198471, + "loss": 3.2047, + "step": 41870 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010711400686531885, + "loss": 3.1632, + "step": 41875 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010705100687269118, + "loss": 3.0442, + "step": 41880 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010698802138883891, + "loss": 3.2539, + "step": 41885 + }, + { + "epoch": 0.72, + "learning_rate": 0.00010692505041849818, + "loss": 3.1052, + "step": 41890 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010686209396640425, + "loss": 3.2667, + "step": 41895 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010679915203729107, + "loss": 3.2379, + "step": 41900 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010673622463589149, + "loss": 3.2051, + "step": 41905 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010667331176693735, + "loss": 3.1852, + "step": 41910 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010661041343515931, + "loss": 3.1764, + "step": 41915 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010654752964528715, + "loss": 3.191, + "step": 41920 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010648466040204932, + "loss": 3.2177, + "step": 41925 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010642180571017332, + "loss": 3.176, + "step": 41930 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010635896557438544, + "loss": 3.1663, + "step": 41935 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010629613999941095, + "loss": 3.1828, + "step": 41940 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010623332898997389, + "loss": 3.246, + "step": 41945 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010617053255079758, + "loss": 3.1608, + "step": 41950 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010610775068660385, + "loss": 3.1494, + "step": 41955 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010604498340211361, + "loss": 3.173, + "step": 41960 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001059822307020466, + "loss": 3.2291, + "step": 41965 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001059194925911214, + "loss": 3.1392, + "step": 41970 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010585676907405583, + "loss": 3.1625, + "step": 41975 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010579406015556625, + "loss": 3.271, + "step": 41980 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010573136584036807, + "loss": 3.1903, + "step": 41985 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010566868613317558, + "loss": 3.1708, + "step": 41990 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001056060210387018, + "loss": 3.1984, + "step": 41995 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010554337056165913, + "loss": 3.1468, + "step": 42000 + }, + { + "epoch": 0.73, + "eval_loss": 3.204759359359741, + "eval_runtime": 149.9776, + "eval_samples_per_second": 12.275, + "eval_steps_per_second": 0.773, + "step": 42000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010548073470675853, + "loss": 3.0838, + "step": 42005 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010541811347870956, + "loss": 3.1097, + "step": 42010 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010535550688222135, + "loss": 3.0943, + "step": 42015 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010529291492200146, + "loss": 3.212, + "step": 42020 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010523033760275644, + "loss": 3.1342, + "step": 42025 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010516777492919206, + "loss": 3.1234, + "step": 42030 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010510522690601228, + "loss": 3.09, + "step": 42035 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010504269353792073, + "loss": 3.1094, + "step": 42040 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010498017482961946, + "loss": 3.1887, + "step": 42045 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010491767078580953, + "loss": 3.1215, + "step": 42050 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010485518141119113, + "loss": 3.1981, + "step": 42055 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010479270671046284, + "loss": 3.1392, + "step": 42060 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010473024668832265, + "loss": 3.1256, + "step": 42065 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010466780134946715, + "loss": 3.1203, + "step": 42070 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010460537069859185, + "loss": 3.0632, + "step": 42075 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001045429547403915, + "loss": 3.1372, + "step": 42080 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010448055347955913, + "loss": 3.2098, + "step": 42085 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010441816692078702, + "loss": 3.1605, + "step": 42090 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001043557950687665, + "loss": 3.1536, + "step": 42095 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001042934379281876, + "loss": 3.2327, + "step": 42100 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010423109550373913, + "loss": 3.122, + "step": 42105 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010416876780010898, + "loss": 3.1947, + "step": 42110 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001041064548219838, + "loss": 3.2346, + "step": 42115 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010404415657404935, + "loss": 3.2128, + "step": 42120 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010398187306099007, + "loss": 3.2819, + "step": 42125 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010391960428748938, + "loss": 3.1558, + "step": 42130 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010385735025822954, + "loss": 3.1389, + "step": 42135 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010379511097789162, + "loss": 3.2378, + "step": 42140 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010373288645115594, + "loss": 3.1614, + "step": 42145 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010367067668270137, + "loss": 3.106, + "step": 42150 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010360848167720561, + "loss": 3.1168, + "step": 42155 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010354630143934578, + "loss": 3.2302, + "step": 42160 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010348413597379714, + "loss": 3.1316, + "step": 42165 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010342198528523423, + "loss": 3.2556, + "step": 42170 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010335984937833069, + "loss": 3.1538, + "step": 42175 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010329772825775871, + "loss": 3.0533, + "step": 42180 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001032356219281895, + "loss": 3.0731, + "step": 42185 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010317353039429312, + "loss": 3.1566, + "step": 42190 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010311145366073845, + "loss": 3.0046, + "step": 42195 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010304939173219347, + "loss": 3.1053, + "step": 42200 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010298734461332493, + "loss": 3.1393, + "step": 42205 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010292531230879836, + "loss": 3.1764, + "step": 42210 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010286329482327833, + "loss": 3.2309, + "step": 42215 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010280129216142815, + "loss": 3.2166, + "step": 42220 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010273930432791024, + "loss": 3.0617, + "step": 42225 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010267733132738571, + "loss": 3.2347, + "step": 42230 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010261537316451457, + "loss": 3.1509, + "step": 42235 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010255342984395583, + "loss": 3.1592, + "step": 42240 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010249150137036725, + "loss": 3.2813, + "step": 42245 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010242958774840544, + "loss": 3.0992, + "step": 42250 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010236768898272622, + "loss": 3.1728, + "step": 42255 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010230580507798392, + "loss": 3.1918, + "step": 42260 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010224393603883195, + "loss": 3.1306, + "step": 42265 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010218208186992248, + "loss": 3.1408, + "step": 42270 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010212024257590652, + "loss": 3.1967, + "step": 42275 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010205841816143437, + "loss": 3.2242, + "step": 42280 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001019966086311547, + "loss": 3.2118, + "step": 42285 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010193481398971532, + "loss": 3.195, + "step": 42290 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010187303424176285, + "loss": 3.2024, + "step": 42295 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010181126939194269, + "loss": 3.2437, + "step": 42300 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010174951944489953, + "loss": 3.1947, + "step": 42305 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010168778440527656, + "loss": 3.2454, + "step": 42310 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010162606427771568, + "loss": 3.2738, + "step": 42315 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001015643590668582, + "loss": 3.1613, + "step": 42320 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010150266877734395, + "loss": 3.117, + "step": 42325 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010144099341381165, + "loss": 3.143, + "step": 42330 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010137933298089923, + "loss": 3.2231, + "step": 42335 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010131768748324284, + "loss": 3.1593, + "step": 42340 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010125605692547824, + "loss": 3.1333, + "step": 42345 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010119444131223962, + "loss": 3.1118, + "step": 42350 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010113284064816004, + "loss": 3.1835, + "step": 42355 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010107125493787187, + "loss": 3.2863, + "step": 42360 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010100968418600574, + "loss": 3.1489, + "step": 42365 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010094812839719143, + "loss": 3.1761, + "step": 42370 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010088658757605786, + "loss": 3.0487, + "step": 42375 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010082506172723244, + "loss": 3.0552, + "step": 42380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010076355085534155, + "loss": 3.1523, + "step": 42385 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010070205496501075, + "loss": 3.1273, + "step": 42390 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010064057406086381, + "loss": 3.1326, + "step": 42395 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010057910814752414, + "loss": 3.0923, + "step": 42400 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010051765722961349, + "loss": 3.09, + "step": 42405 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010045622131175262, + "loss": 3.2578, + "step": 42410 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010039480039856145, + "loss": 3.0941, + "step": 42415 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001003333944946581, + "loss": 3.2674, + "step": 42420 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010027200360466032, + "loss": 3.1428, + "step": 42425 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010021062773318431, + "loss": 3.2387, + "step": 42430 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010014926688484507, + "loss": 3.0526, + "step": 42435 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010008792106425693, + "loss": 3.151, + "step": 42440 + }, + { + "epoch": 0.73, + "learning_rate": 0.00010002659027603253, + "loss": 3.113, + "step": 42445 + }, + { + "epoch": 0.73, + "learning_rate": 9.996527452478354e-05, + "loss": 3.2201, + "step": 42450 + }, + { + "epoch": 0.73, + "learning_rate": 9.990397381512088e-05, + "loss": 3.1862, + "step": 42455 + }, + { + "epoch": 0.73, + "learning_rate": 9.984268815165389e-05, + "loss": 3.2709, + "step": 42460 + }, + { + "epoch": 0.73, + "learning_rate": 9.978141753899098e-05, + "loss": 3.2524, + "step": 42465 + }, + { + "epoch": 0.73, + "learning_rate": 9.972016198173938e-05, + "loss": 3.12, + "step": 42470 + }, + { + "epoch": 0.74, + "learning_rate": 9.965892148450506e-05, + "loss": 3.2258, + "step": 42475 + }, + { + "epoch": 0.74, + "learning_rate": 9.959769605189322e-05, + "loss": 3.163, + "step": 42480 + }, + { + "epoch": 0.74, + "learning_rate": 9.953648568850758e-05, + "loss": 3.2217, + "step": 42485 + }, + { + "epoch": 0.74, + "learning_rate": 9.947529039895088e-05, + "loss": 3.1551, + "step": 42490 + }, + { + "epoch": 0.74, + "learning_rate": 9.941411018782465e-05, + "loss": 3.0359, + "step": 42495 + }, + { + "epoch": 0.74, + "learning_rate": 9.935294505972927e-05, + "loss": 3.2666, + "step": 42500 + }, + { + "epoch": 0.74, + "eval_loss": 3.201066255569458, + "eval_runtime": 149.9729, + "eval_samples_per_second": 12.276, + "eval_steps_per_second": 0.773, + "step": 42500 + }, + { + "epoch": 0.74, + "learning_rate": 9.929179501926418e-05, + "loss": 3.1589, + "step": 42505 + }, + { + "epoch": 0.74, + "learning_rate": 9.923066007102752e-05, + "loss": 3.2029, + "step": 42510 + }, + { + "epoch": 0.74, + "learning_rate": 9.91695402196163e-05, + "loss": 3.2643, + "step": 42515 + }, + { + "epoch": 0.74, + "learning_rate": 9.910843546962638e-05, + "loss": 3.1305, + "step": 42520 + }, + { + "epoch": 0.74, + "learning_rate": 9.904734582565261e-05, + "loss": 3.1963, + "step": 42525 + }, + { + "epoch": 0.74, + "learning_rate": 9.898627129228842e-05, + "loss": 3.2523, + "step": 42530 + }, + { + "epoch": 0.74, + "learning_rate": 9.89252118741266e-05, + "loss": 3.2724, + "step": 42535 + }, + { + "epoch": 0.74, + "learning_rate": 9.886416757575832e-05, + "loss": 3.1106, + "step": 42540 + }, + { + "epoch": 0.74, + "learning_rate": 9.880313840177383e-05, + "loss": 3.0961, + "step": 42545 + }, + { + "epoch": 0.74, + "learning_rate": 9.874212435676224e-05, + "loss": 3.1022, + "step": 42550 + }, + { + "epoch": 0.74, + "learning_rate": 9.868112544531136e-05, + "loss": 3.2171, + "step": 42555 + }, + { + "epoch": 0.74, + "learning_rate": 9.862014167200818e-05, + "loss": 3.1618, + "step": 42560 + }, + { + "epoch": 0.74, + "learning_rate": 9.855917304143833e-05, + "loss": 3.1864, + "step": 42565 + }, + { + "epoch": 0.74, + "learning_rate": 9.849821955818625e-05, + "loss": 3.1131, + "step": 42570 + }, + { + "epoch": 0.74, + "learning_rate": 9.843728122683545e-05, + "loss": 3.276, + "step": 42575 + }, + { + "epoch": 0.74, + "learning_rate": 9.837635805196792e-05, + "loss": 3.1902, + "step": 42580 + }, + { + "epoch": 0.74, + "learning_rate": 9.831545003816512e-05, + "loss": 3.0883, + "step": 42585 + }, + { + "epoch": 0.74, + "learning_rate": 9.825455719000695e-05, + "loss": 3.1129, + "step": 42590 + }, + { + "epoch": 0.74, + "learning_rate": 9.819367951207196e-05, + "loss": 3.077, + "step": 42595 + }, + { + "epoch": 0.74, + "learning_rate": 9.813281700893813e-05, + "loss": 3.1294, + "step": 42600 + }, + { + "epoch": 0.74, + "learning_rate": 9.807196968518187e-05, + "loss": 3.2189, + "step": 42605 + }, + { + "epoch": 0.74, + "learning_rate": 9.801113754537855e-05, + "loss": 3.2169, + "step": 42610 + }, + { + "epoch": 0.74, + "learning_rate": 9.795032059410263e-05, + "loss": 3.2404, + "step": 42615 + }, + { + "epoch": 0.74, + "learning_rate": 9.788951883592709e-05, + "loss": 3.1659, + "step": 42620 + }, + { + "epoch": 0.74, + "learning_rate": 9.782873227542393e-05, + "loss": 3.0816, + "step": 42625 + }, + { + "epoch": 0.74, + "learning_rate": 9.7767960917164e-05, + "loss": 3.1314, + "step": 42630 + }, + { + "epoch": 0.74, + "learning_rate": 9.770720476571688e-05, + "loss": 3.2118, + "step": 42635 + }, + { + "epoch": 0.74, + "learning_rate": 9.764646382565133e-05, + "loss": 3.1068, + "step": 42640 + }, + { + "epoch": 0.74, + "learning_rate": 9.758573810153467e-05, + "loss": 3.1281, + "step": 42645 + }, + { + "epoch": 0.74, + "learning_rate": 9.752502759793312e-05, + "loss": 3.1041, + "step": 42650 + }, + { + "epoch": 0.74, + "learning_rate": 9.746433231941186e-05, + "loss": 3.3104, + "step": 42655 + }, + { + "epoch": 0.74, + "learning_rate": 9.740365227053469e-05, + "loss": 3.2378, + "step": 42660 + }, + { + "epoch": 0.74, + "learning_rate": 9.734298745586472e-05, + "loss": 3.2226, + "step": 42665 + }, + { + "epoch": 0.74, + "learning_rate": 9.728233787996356e-05, + "loss": 3.1264, + "step": 42670 + }, + { + "epoch": 0.74, + "learning_rate": 9.72217035473915e-05, + "loss": 3.165, + "step": 42675 + }, + { + "epoch": 0.74, + "learning_rate": 9.716108446270822e-05, + "loss": 3.1995, + "step": 42680 + }, + { + "epoch": 0.74, + "learning_rate": 9.710048063047184e-05, + "loss": 3.187, + "step": 42685 + }, + { + "epoch": 0.74, + "learning_rate": 9.703989205523936e-05, + "loss": 3.1576, + "step": 42690 + }, + { + "epoch": 0.74, + "learning_rate": 9.697931874156707e-05, + "loss": 3.2503, + "step": 42695 + }, + { + "epoch": 0.74, + "learning_rate": 9.691876069400931e-05, + "loss": 3.1001, + "step": 42700 + }, + { + "epoch": 0.74, + "learning_rate": 9.685821791712011e-05, + "loss": 3.2139, + "step": 42705 + }, + { + "epoch": 0.74, + "learning_rate": 9.679769041545181e-05, + "loss": 3.0835, + "step": 42710 + }, + { + "epoch": 0.74, + "learning_rate": 9.673717819355571e-05, + "loss": 3.2361, + "step": 42715 + }, + { + "epoch": 0.74, + "learning_rate": 9.66766812559823e-05, + "loss": 3.1695, + "step": 42720 + }, + { + "epoch": 0.74, + "learning_rate": 9.661619960728026e-05, + "loss": 3.1172, + "step": 42725 + }, + { + "epoch": 0.74, + "learning_rate": 9.655573325199778e-05, + "loss": 3.2464, + "step": 42730 + }, + { + "epoch": 0.74, + "learning_rate": 9.649528219468151e-05, + "loss": 3.22, + "step": 42735 + }, + { + "epoch": 0.74, + "learning_rate": 9.643484643987698e-05, + "loss": 3.144, + "step": 42740 + }, + { + "epoch": 0.74, + "learning_rate": 9.637442599212894e-05, + "loss": 3.16, + "step": 42745 + }, + { + "epoch": 0.74, + "learning_rate": 9.631402085598038e-05, + "loss": 3.1275, + "step": 42750 + }, + { + "epoch": 0.74, + "learning_rate": 9.625363103597352e-05, + "loss": 3.2621, + "step": 42755 + }, + { + "epoch": 0.74, + "learning_rate": 9.61932565366495e-05, + "loss": 3.1393, + "step": 42760 + }, + { + "epoch": 0.74, + "learning_rate": 9.613289736254807e-05, + "loss": 3.1324, + "step": 42765 + }, + { + "epoch": 0.74, + "learning_rate": 9.607255351820802e-05, + "loss": 2.9853, + "step": 42770 + }, + { + "epoch": 0.74, + "learning_rate": 9.601222500816679e-05, + "loss": 3.1869, + "step": 42775 + }, + { + "epoch": 0.74, + "learning_rate": 9.595191183696073e-05, + "loss": 3.0711, + "step": 42780 + }, + { + "epoch": 0.74, + "learning_rate": 9.589161400912524e-05, + "loss": 3.069, + "step": 42785 + }, + { + "epoch": 0.74, + "learning_rate": 9.583133152919437e-05, + "loss": 3.1541, + "step": 42790 + }, + { + "epoch": 0.74, + "learning_rate": 9.577106440170101e-05, + "loss": 3.1853, + "step": 42795 + }, + { + "epoch": 0.74, + "learning_rate": 9.571081263117695e-05, + "loss": 3.1427, + "step": 42800 + }, + { + "epoch": 0.74, + "learning_rate": 9.565057622215274e-05, + "loss": 3.0839, + "step": 42805 + }, + { + "epoch": 0.74, + "learning_rate": 9.5590355179158e-05, + "loss": 3.1947, + "step": 42810 + }, + { + "epoch": 0.74, + "learning_rate": 9.553014950672097e-05, + "loss": 3.1567, + "step": 42815 + }, + { + "epoch": 0.74, + "learning_rate": 9.54699592093688e-05, + "loss": 3.2465, + "step": 42820 + }, + { + "epoch": 0.74, + "learning_rate": 9.540978429162751e-05, + "loss": 3.1557, + "step": 42825 + }, + { + "epoch": 0.74, + "learning_rate": 9.534962475802193e-05, + "loss": 3.0594, + "step": 42830 + }, + { + "epoch": 0.74, + "learning_rate": 9.528948061307565e-05, + "loss": 3.0756, + "step": 42835 + }, + { + "epoch": 0.74, + "learning_rate": 9.522935186131141e-05, + "loss": 3.2361, + "step": 42840 + }, + { + "epoch": 0.74, + "learning_rate": 9.516923850725044e-05, + "loss": 3.1698, + "step": 42845 + }, + { + "epoch": 0.74, + "learning_rate": 9.510914055541304e-05, + "loss": 3.2094, + "step": 42850 + }, + { + "epoch": 0.74, + "learning_rate": 9.504905801031819e-05, + "loss": 3.1183, + "step": 42855 + }, + { + "epoch": 0.74, + "learning_rate": 9.498899087648373e-05, + "loss": 3.207, + "step": 42860 + }, + { + "epoch": 0.74, + "learning_rate": 9.492893915842661e-05, + "loss": 3.1721, + "step": 42865 + }, + { + "epoch": 0.74, + "learning_rate": 9.486890286066229e-05, + "loss": 3.2029, + "step": 42870 + }, + { + "epoch": 0.74, + "learning_rate": 9.480888198770516e-05, + "loss": 3.1417, + "step": 42875 + }, + { + "epoch": 0.74, + "learning_rate": 9.474887654406857e-05, + "loss": 3.1568, + "step": 42880 + }, + { + "epoch": 0.74, + "learning_rate": 9.468888653426444e-05, + "loss": 3.1464, + "step": 42885 + }, + { + "epoch": 0.74, + "learning_rate": 9.462891196280393e-05, + "loss": 3.179, + "step": 42890 + }, + { + "epoch": 0.74, + "learning_rate": 9.456895283419674e-05, + "loss": 3.1786, + "step": 42895 + }, + { + "epoch": 0.74, + "learning_rate": 9.450900915295147e-05, + "loss": 3.0743, + "step": 42900 + }, + { + "epoch": 0.74, + "learning_rate": 9.444908092357558e-05, + "loss": 3.1999, + "step": 42905 + }, + { + "epoch": 0.74, + "learning_rate": 9.438916815057541e-05, + "loss": 3.1467, + "step": 42910 + }, + { + "epoch": 0.74, + "learning_rate": 9.432927083845592e-05, + "loss": 3.201, + "step": 42915 + }, + { + "epoch": 0.74, + "learning_rate": 9.426938899172132e-05, + "loss": 3.2788, + "step": 42920 + }, + { + "epoch": 0.74, + "learning_rate": 9.420952261487434e-05, + "loss": 3.2115, + "step": 42925 + }, + { + "epoch": 0.74, + "learning_rate": 9.41496717124166e-05, + "loss": 3.1825, + "step": 42930 + }, + { + "epoch": 0.74, + "learning_rate": 9.408983628884856e-05, + "loss": 3.1825, + "step": 42935 + }, + { + "epoch": 0.74, + "learning_rate": 9.403001634866948e-05, + "loss": 3.0352, + "step": 42940 + }, + { + "epoch": 0.74, + "learning_rate": 9.397021189637765e-05, + "loss": 3.0817, + "step": 42945 + }, + { + "epoch": 0.74, + "learning_rate": 9.391042293647012e-05, + "loss": 3.1235, + "step": 42950 + }, + { + "epoch": 0.74, + "learning_rate": 9.385064947344241e-05, + "loss": 3.1679, + "step": 42955 + }, + { + "epoch": 0.74, + "learning_rate": 9.379089151178945e-05, + "loss": 3.1045, + "step": 42960 + }, + { + "epoch": 0.74, + "learning_rate": 9.373114905600464e-05, + "loss": 3.146, + "step": 42965 + }, + { + "epoch": 0.74, + "learning_rate": 9.367142211058023e-05, + "loss": 3.2207, + "step": 42970 + }, + { + "epoch": 0.74, + "learning_rate": 9.361171068000762e-05, + "loss": 3.2523, + "step": 42975 + }, + { + "epoch": 0.74, + "learning_rate": 9.355201476877647e-05, + "loss": 3.078, + "step": 42980 + }, + { + "epoch": 0.74, + "learning_rate": 9.349233438137589e-05, + "loss": 3.1585, + "step": 42985 + }, + { + "epoch": 0.74, + "learning_rate": 9.343266952229341e-05, + "loss": 3.171, + "step": 42990 + }, + { + "epoch": 0.74, + "learning_rate": 9.337302019601545e-05, + "loss": 3.2222, + "step": 42995 + }, + { + "epoch": 0.74, + "learning_rate": 9.331338640702764e-05, + "loss": 3.1117, + "step": 43000 + }, + { + "epoch": 0.74, + "eval_loss": 3.1977977752685547, + "eval_runtime": 149.6717, + "eval_samples_per_second": 12.3, + "eval_steps_per_second": 0.775, + "step": 43000 + }, + { + "epoch": 0.74, + "learning_rate": 9.32537681598137e-05, + "loss": 3.2042, + "step": 43005 + }, + { + "epoch": 0.74, + "learning_rate": 9.319416545885693e-05, + "loss": 3.1313, + "step": 43010 + }, + { + "epoch": 0.74, + "learning_rate": 9.313457830863906e-05, + "loss": 3.169, + "step": 43015 + }, + { + "epoch": 0.74, + "learning_rate": 9.307500671364064e-05, + "loss": 3.136, + "step": 43020 + }, + { + "epoch": 0.74, + "learning_rate": 9.301545067834142e-05, + "loss": 3.1929, + "step": 43025 + }, + { + "epoch": 0.74, + "learning_rate": 9.295591020721944e-05, + "loss": 3.1781, + "step": 43030 + }, + { + "epoch": 0.74, + "learning_rate": 9.289638530475186e-05, + "loss": 3.1836, + "step": 43035 + }, + { + "epoch": 0.74, + "learning_rate": 9.283687597541477e-05, + "loss": 3.1999, + "step": 43040 + }, + { + "epoch": 0.74, + "learning_rate": 9.277738222368294e-05, + "loss": 3.0913, + "step": 43045 + }, + { + "epoch": 0.75, + "learning_rate": 9.271790405402996e-05, + "loss": 3.2435, + "step": 43050 + }, + { + "epoch": 0.75, + "learning_rate": 9.265844147092832e-05, + "loss": 3.295, + "step": 43055 + }, + { + "epoch": 0.75, + "learning_rate": 9.259899447884915e-05, + "loss": 3.1696, + "step": 43060 + }, + { + "epoch": 0.75, + "learning_rate": 9.253956308226277e-05, + "loss": 3.0823, + "step": 43065 + }, + { + "epoch": 0.75, + "learning_rate": 9.248014728563801e-05, + "loss": 3.1608, + "step": 43070 + }, + { + "epoch": 0.75, + "learning_rate": 9.242074709344258e-05, + "loss": 3.1251, + "step": 43075 + }, + { + "epoch": 0.75, + "learning_rate": 9.236136251014336e-05, + "loss": 3.1621, + "step": 43080 + }, + { + "epoch": 0.75, + "learning_rate": 9.230199354020533e-05, + "loss": 3.2057, + "step": 43085 + }, + { + "epoch": 0.75, + "learning_rate": 9.224264018809304e-05, + "loss": 3.1949, + "step": 43090 + }, + { + "epoch": 0.75, + "learning_rate": 9.218330245826948e-05, + "loss": 3.1971, + "step": 43095 + }, + { + "epoch": 0.75, + "learning_rate": 9.212398035519641e-05, + "loss": 3.0869, + "step": 43100 + }, + { + "epoch": 0.75, + "learning_rate": 9.206467388333491e-05, + "loss": 3.093, + "step": 43105 + }, + { + "epoch": 0.75, + "learning_rate": 9.200538304714416e-05, + "loss": 3.1078, + "step": 43110 + }, + { + "epoch": 0.75, + "learning_rate": 9.194610785108253e-05, + "loss": 3.2347, + "step": 43115 + }, + { + "epoch": 0.75, + "learning_rate": 9.188684829960746e-05, + "loss": 3.1788, + "step": 43120 + }, + { + "epoch": 0.75, + "learning_rate": 9.182760439717482e-05, + "loss": 3.1977, + "step": 43125 + }, + { + "epoch": 0.75, + "learning_rate": 9.176837614823945e-05, + "loss": 3.1689, + "step": 43130 + }, + { + "epoch": 0.75, + "learning_rate": 9.170916355725505e-05, + "loss": 3.1488, + "step": 43135 + }, + { + "epoch": 0.75, + "learning_rate": 9.164996662867398e-05, + "loss": 3.1792, + "step": 43140 + }, + { + "epoch": 0.75, + "learning_rate": 9.159078536694774e-05, + "loss": 3.1396, + "step": 43145 + }, + { + "epoch": 0.75, + "learning_rate": 9.153161977652634e-05, + "loss": 3.1108, + "step": 43150 + }, + { + "epoch": 0.75, + "learning_rate": 9.147246986185874e-05, + "loss": 3.0777, + "step": 43155 + }, + { + "epoch": 0.75, + "learning_rate": 9.141333562739275e-05, + "loss": 3.2488, + "step": 43160 + }, + { + "epoch": 0.75, + "learning_rate": 9.135421707757483e-05, + "loss": 3.2371, + "step": 43165 + }, + { + "epoch": 0.75, + "learning_rate": 9.129511421685058e-05, + "loss": 3.1491, + "step": 43170 + }, + { + "epoch": 0.75, + "learning_rate": 9.123602704966415e-05, + "loss": 3.2448, + "step": 43175 + }, + { + "epoch": 0.75, + "learning_rate": 9.117695558045858e-05, + "loss": 3.1201, + "step": 43180 + }, + { + "epoch": 0.75, + "learning_rate": 9.111789981367576e-05, + "loss": 3.1906, + "step": 43185 + }, + { + "epoch": 0.75, + "learning_rate": 9.105885975375637e-05, + "loss": 3.0943, + "step": 43190 + }, + { + "epoch": 0.75, + "learning_rate": 9.099983540513981e-05, + "loss": 3.2098, + "step": 43195 + }, + { + "epoch": 0.75, + "learning_rate": 9.094082677226462e-05, + "loss": 3.2461, + "step": 43200 + }, + { + "epoch": 0.75, + "learning_rate": 9.088183385956785e-05, + "loss": 3.2476, + "step": 43205 + }, + { + "epoch": 0.75, + "learning_rate": 9.082285667148545e-05, + "loss": 3.1644, + "step": 43210 + }, + { + "epoch": 0.75, + "learning_rate": 9.076389521245225e-05, + "loss": 3.2083, + "step": 43215 + }, + { + "epoch": 0.75, + "learning_rate": 9.070494948690165e-05, + "loss": 3.2944, + "step": 43220 + }, + { + "epoch": 0.75, + "learning_rate": 9.064601949926638e-05, + "loss": 3.3015, + "step": 43225 + }, + { + "epoch": 0.75, + "learning_rate": 9.058710525397748e-05, + "loss": 3.0546, + "step": 43230 + }, + { + "epoch": 0.75, + "learning_rate": 9.052820675546508e-05, + "loss": 3.1223, + "step": 43235 + }, + { + "epoch": 0.75, + "learning_rate": 9.046932400815798e-05, + "loss": 3.1651, + "step": 43240 + }, + { + "epoch": 0.75, + "learning_rate": 9.041045701648382e-05, + "loss": 2.9694, + "step": 43245 + }, + { + "epoch": 0.75, + "learning_rate": 9.035160578486924e-05, + "loss": 3.0904, + "step": 43250 + }, + { + "epoch": 0.75, + "learning_rate": 9.029277031773958e-05, + "loss": 3.2837, + "step": 43255 + }, + { + "epoch": 0.75, + "learning_rate": 9.023395061951864e-05, + "loss": 3.1519, + "step": 43260 + }, + { + "epoch": 0.75, + "learning_rate": 9.017514669462969e-05, + "loss": 3.2259, + "step": 43265 + }, + { + "epoch": 0.75, + "learning_rate": 9.011635854749439e-05, + "loss": 3.1521, + "step": 43270 + }, + { + "epoch": 0.75, + "learning_rate": 9.005758618253316e-05, + "loss": 3.1211, + "step": 43275 + }, + { + "epoch": 0.75, + "learning_rate": 8.999882960416572e-05, + "loss": 3.2336, + "step": 43280 + }, + { + "epoch": 0.75, + "learning_rate": 8.994008881680989e-05, + "loss": 3.1052, + "step": 43285 + }, + { + "epoch": 0.75, + "learning_rate": 8.988136382488292e-05, + "loss": 3.1502, + "step": 43290 + }, + { + "epoch": 0.75, + "learning_rate": 8.982265463280054e-05, + "loss": 3.1397, + "step": 43295 + }, + { + "epoch": 0.75, + "learning_rate": 8.976396124497731e-05, + "loss": 3.2139, + "step": 43300 + }, + { + "epoch": 0.75, + "learning_rate": 8.97052836658268e-05, + "loss": 3.1242, + "step": 43305 + }, + { + "epoch": 0.75, + "learning_rate": 8.964662189976128e-05, + "loss": 3.2127, + "step": 43310 + }, + { + "epoch": 0.75, + "learning_rate": 8.95879759511917e-05, + "loss": 3.1406, + "step": 43315 + }, + { + "epoch": 0.75, + "learning_rate": 8.9529345824528e-05, + "loss": 3.1651, + "step": 43320 + }, + { + "epoch": 0.75, + "learning_rate": 8.947073152417877e-05, + "loss": 3.1555, + "step": 43325 + }, + { + "epoch": 0.75, + "learning_rate": 8.941213305455166e-05, + "loss": 3.2542, + "step": 43330 + }, + { + "epoch": 0.75, + "learning_rate": 8.9353550420053e-05, + "loss": 3.2307, + "step": 43335 + }, + { + "epoch": 0.75, + "learning_rate": 8.929498362508762e-05, + "loss": 3.239, + "step": 43340 + }, + { + "epoch": 0.75, + "learning_rate": 8.92364326740597e-05, + "loss": 3.096, + "step": 43345 + }, + { + "epoch": 0.75, + "learning_rate": 8.91778975713719e-05, + "loss": 3.2674, + "step": 43350 + }, + { + "epoch": 0.75, + "learning_rate": 8.911937832142569e-05, + "loss": 3.0949, + "step": 43355 + }, + { + "epoch": 0.75, + "learning_rate": 8.906087492862166e-05, + "loss": 3.157, + "step": 43360 + }, + { + "epoch": 0.75, + "learning_rate": 8.900238739735859e-05, + "loss": 3.1264, + "step": 43365 + }, + { + "epoch": 0.75, + "learning_rate": 8.894391573203475e-05, + "loss": 3.2266, + "step": 43370 + }, + { + "epoch": 0.75, + "learning_rate": 8.888545993704678e-05, + "loss": 3.1276, + "step": 43375 + }, + { + "epoch": 0.75, + "learning_rate": 8.882702001679017e-05, + "loss": 3.1758, + "step": 43380 + }, + { + "epoch": 0.75, + "learning_rate": 8.876859597565965e-05, + "loss": 3.1014, + "step": 43385 + }, + { + "epoch": 0.75, + "learning_rate": 8.871018781804796e-05, + "loss": 3.261, + "step": 43390 + }, + { + "epoch": 0.75, + "learning_rate": 8.865179554834739e-05, + "loss": 3.098, + "step": 43395 + }, + { + "epoch": 0.75, + "learning_rate": 8.859341917094863e-05, + "loss": 3.3551, + "step": 43400 + }, + { + "epoch": 0.75, + "learning_rate": 8.853505869024127e-05, + "loss": 3.2433, + "step": 43405 + }, + { + "epoch": 0.75, + "learning_rate": 8.847671411061389e-05, + "loss": 3.1784, + "step": 43410 + }, + { + "epoch": 0.75, + "learning_rate": 8.841838543645353e-05, + "loss": 3.1108, + "step": 43415 + }, + { + "epoch": 0.75, + "learning_rate": 8.83600726721461e-05, + "loss": 3.2022, + "step": 43420 + }, + { + "epoch": 0.75, + "learning_rate": 8.830177582207669e-05, + "loss": 3.0931, + "step": 43425 + }, + { + "epoch": 0.75, + "learning_rate": 8.824349489062876e-05, + "loss": 3.24, + "step": 43430 + }, + { + "epoch": 0.75, + "learning_rate": 8.818522988218483e-05, + "loss": 3.1067, + "step": 43435 + }, + { + "epoch": 0.75, + "learning_rate": 8.812698080112607e-05, + "loss": 3.0216, + "step": 43440 + }, + { + "epoch": 0.75, + "learning_rate": 8.806874765183241e-05, + "loss": 3.09, + "step": 43445 + }, + { + "epoch": 0.75, + "learning_rate": 8.80105304386829e-05, + "loss": 3.1525, + "step": 43450 + }, + { + "epoch": 0.75, + "learning_rate": 8.795232916605506e-05, + "loss": 3.2424, + "step": 43455 + }, + { + "epoch": 0.75, + "learning_rate": 8.789414383832535e-05, + "loss": 3.2136, + "step": 43460 + }, + { + "epoch": 0.75, + "learning_rate": 8.783597445986901e-05, + "loss": 3.1622, + "step": 43465 + }, + { + "epoch": 0.75, + "learning_rate": 8.777782103506007e-05, + "loss": 3.2465, + "step": 43470 + }, + { + "epoch": 0.75, + "learning_rate": 8.771968356827123e-05, + "loss": 3.1255, + "step": 43475 + }, + { + "epoch": 0.75, + "learning_rate": 8.766156206387442e-05, + "loss": 3.1675, + "step": 43480 + }, + { + "epoch": 0.75, + "learning_rate": 8.760345652623987e-05, + "loss": 3.1265, + "step": 43485 + }, + { + "epoch": 0.75, + "learning_rate": 8.75453669597369e-05, + "loss": 3.0942, + "step": 43490 + }, + { + "epoch": 0.75, + "learning_rate": 8.748729336873353e-05, + "loss": 3.1477, + "step": 43495 + }, + { + "epoch": 0.75, + "learning_rate": 8.742923575759649e-05, + "loss": 3.1318, + "step": 43500 + }, + { + "epoch": 0.75, + "eval_loss": 3.1929843425750732, + "eval_runtime": 150.078, + "eval_samples_per_second": 12.267, + "eval_steps_per_second": 0.773, + "step": 43500 + }, + { + "epoch": 0.75, + "learning_rate": 8.737119413069158e-05, + "loss": 3.221, + "step": 43505 + }, + { + "epoch": 0.75, + "learning_rate": 8.73131684923832e-05, + "loss": 3.0582, + "step": 43510 + }, + { + "epoch": 0.75, + "learning_rate": 8.725515884703453e-05, + "loss": 3.1917, + "step": 43515 + }, + { + "epoch": 0.75, + "learning_rate": 8.719716519900763e-05, + "loss": 3.1326, + "step": 43520 + }, + { + "epoch": 0.75, + "learning_rate": 8.713918755266321e-05, + "loss": 3.0365, + "step": 43525 + }, + { + "epoch": 0.75, + "learning_rate": 8.708122591236109e-05, + "loss": 3.181, + "step": 43530 + }, + { + "epoch": 0.75, + "learning_rate": 8.702328028245956e-05, + "loss": 3.2332, + "step": 43535 + }, + { + "epoch": 0.75, + "learning_rate": 8.69653506673159e-05, + "loss": 3.1744, + "step": 43540 + }, + { + "epoch": 0.75, + "learning_rate": 8.690743707128605e-05, + "loss": 3.0682, + "step": 43545 + }, + { + "epoch": 0.75, + "learning_rate": 8.68495394987249e-05, + "loss": 3.3009, + "step": 43550 + }, + { + "epoch": 0.75, + "learning_rate": 8.679165795398587e-05, + "loss": 3.126, + "step": 43555 + }, + { + "epoch": 0.75, + "learning_rate": 8.67337924414216e-05, + "loss": 3.2137, + "step": 43560 + }, + { + "epoch": 0.75, + "learning_rate": 8.667594296538315e-05, + "loss": 3.1615, + "step": 43565 + }, + { + "epoch": 0.75, + "learning_rate": 8.661810953022052e-05, + "loss": 3.1869, + "step": 43570 + }, + { + "epoch": 0.75, + "learning_rate": 8.656029214028249e-05, + "loss": 3.1582, + "step": 43575 + }, + { + "epoch": 0.75, + "learning_rate": 8.650249079991654e-05, + "loss": 3.0512, + "step": 43580 + }, + { + "epoch": 0.75, + "learning_rate": 8.644470551346921e-05, + "loss": 3.1223, + "step": 43585 + }, + { + "epoch": 0.75, + "learning_rate": 8.638693628528559e-05, + "loss": 3.2332, + "step": 43590 + }, + { + "epoch": 0.75, + "learning_rate": 8.63291831197096e-05, + "loss": 3.0745, + "step": 43595 + }, + { + "epoch": 0.75, + "learning_rate": 8.627144602108399e-05, + "loss": 3.1756, + "step": 43600 + }, + { + "epoch": 0.75, + "learning_rate": 8.621372499375021e-05, + "loss": 3.2514, + "step": 43605 + }, + { + "epoch": 0.75, + "learning_rate": 8.615602004204876e-05, + "loss": 3.1066, + "step": 43610 + }, + { + "epoch": 0.75, + "learning_rate": 8.609833117031879e-05, + "loss": 3.1709, + "step": 43615 + }, + { + "epoch": 0.75, + "learning_rate": 8.604065838289788e-05, + "loss": 3.0782, + "step": 43620 + }, + { + "epoch": 0.75, + "learning_rate": 8.598300168412301e-05, + "loss": 3.1607, + "step": 43625 + }, + { + "epoch": 0.76, + "learning_rate": 8.592536107832962e-05, + "loss": 3.1411, + "step": 43630 + }, + { + "epoch": 0.76, + "learning_rate": 8.586773656985185e-05, + "loss": 3.0961, + "step": 43635 + }, + { + "epoch": 0.76, + "learning_rate": 8.581012816302309e-05, + "loss": 3.202, + "step": 43640 + }, + { + "epoch": 0.76, + "learning_rate": 8.575253586217478e-05, + "loss": 3.2263, + "step": 43645 + }, + { + "epoch": 0.76, + "learning_rate": 8.569495967163786e-05, + "loss": 3.0951, + "step": 43650 + }, + { + "epoch": 0.76, + "learning_rate": 8.56373995957417e-05, + "loss": 3.1947, + "step": 43655 + }, + { + "epoch": 0.76, + "learning_rate": 8.557985563881439e-05, + "loss": 3.2083, + "step": 43660 + }, + { + "epoch": 0.76, + "learning_rate": 8.552232780518324e-05, + "loss": 3.169, + "step": 43665 + }, + { + "epoch": 0.76, + "learning_rate": 8.546481609917367e-05, + "loss": 3.1267, + "step": 43670 + }, + { + "epoch": 0.76, + "learning_rate": 8.540732052511058e-05, + "loss": 3.288, + "step": 43675 + }, + { + "epoch": 0.76, + "learning_rate": 8.534984108731717e-05, + "loss": 3.0646, + "step": 43680 + }, + { + "epoch": 0.76, + "learning_rate": 8.529237779011557e-05, + "loss": 3.2094, + "step": 43685 + }, + { + "epoch": 0.76, + "learning_rate": 8.523493063782699e-05, + "loss": 3.1029, + "step": 43690 + }, + { + "epoch": 0.76, + "learning_rate": 8.517749963477087e-05, + "loss": 3.1713, + "step": 43695 + }, + { + "epoch": 0.76, + "learning_rate": 8.512008478526575e-05, + "loss": 3.1246, + "step": 43700 + }, + { + "epoch": 0.76, + "learning_rate": 8.50626860936291e-05, + "loss": 3.1438, + "step": 43705 + }, + { + "epoch": 0.76, + "learning_rate": 8.500530356417692e-05, + "loss": 3.2288, + "step": 43710 + }, + { + "epoch": 0.76, + "learning_rate": 8.494793720122407e-05, + "loss": 3.049, + "step": 43715 + }, + { + "epoch": 0.76, + "learning_rate": 8.489058700908424e-05, + "loss": 3.1557, + "step": 43720 + }, + { + "epoch": 0.76, + "learning_rate": 8.483325299206972e-05, + "loss": 3.1421, + "step": 43725 + }, + { + "epoch": 0.76, + "learning_rate": 8.4775935154492e-05, + "loss": 3.1228, + "step": 43730 + }, + { + "epoch": 0.76, + "learning_rate": 8.47186335006609e-05, + "loss": 3.2315, + "step": 43735 + }, + { + "epoch": 0.76, + "learning_rate": 8.466134803488532e-05, + "loss": 3.2098, + "step": 43740 + }, + { + "epoch": 0.76, + "learning_rate": 8.460407876147273e-05, + "loss": 3.0859, + "step": 43745 + }, + { + "epoch": 0.76, + "learning_rate": 8.454682568472946e-05, + "loss": 3.1922, + "step": 43750 + }, + { + "epoch": 0.76, + "learning_rate": 8.44895888089608e-05, + "loss": 3.1085, + "step": 43755 + }, + { + "epoch": 0.76, + "learning_rate": 8.44323681384706e-05, + "loss": 3.1564, + "step": 43760 + }, + { + "epoch": 0.76, + "learning_rate": 8.437516367756153e-05, + "loss": 3.2095, + "step": 43765 + }, + { + "epoch": 0.76, + "learning_rate": 8.431797543053512e-05, + "loss": 3.2012, + "step": 43770 + }, + { + "epoch": 0.76, + "learning_rate": 8.426080340169158e-05, + "loss": 3.1939, + "step": 43775 + }, + { + "epoch": 0.76, + "learning_rate": 8.42036475953299e-05, + "loss": 3.1871, + "step": 43780 + }, + { + "epoch": 0.76, + "learning_rate": 8.414650801574806e-05, + "loss": 3.1464, + "step": 43785 + }, + { + "epoch": 0.76, + "learning_rate": 8.408938466724257e-05, + "loss": 3.2299, + "step": 43790 + }, + { + "epoch": 0.76, + "learning_rate": 8.403227755410887e-05, + "loss": 3.1247, + "step": 43795 + }, + { + "epoch": 0.76, + "learning_rate": 8.397518668064109e-05, + "loss": 3.1662, + "step": 43800 + }, + { + "epoch": 0.76, + "learning_rate": 8.391811205113204e-05, + "loss": 3.1429, + "step": 43805 + }, + { + "epoch": 0.76, + "learning_rate": 8.386105366987363e-05, + "loss": 3.1746, + "step": 43810 + }, + { + "epoch": 0.76, + "learning_rate": 8.380401154115631e-05, + "loss": 3.2565, + "step": 43815 + }, + { + "epoch": 0.76, + "learning_rate": 8.374698566926937e-05, + "loss": 3.0521, + "step": 43820 + }, + { + "epoch": 0.76, + "learning_rate": 8.368997605850078e-05, + "loss": 3.1862, + "step": 43825 + }, + { + "epoch": 0.76, + "learning_rate": 8.363298271313735e-05, + "loss": 3.1848, + "step": 43830 + }, + { + "epoch": 0.76, + "learning_rate": 8.357600563746485e-05, + "loss": 3.1177, + "step": 43835 + }, + { + "epoch": 0.76, + "learning_rate": 8.351904483576758e-05, + "loss": 3.1069, + "step": 43840 + }, + { + "epoch": 0.76, + "learning_rate": 8.346210031232866e-05, + "loss": 3.2112, + "step": 43845 + }, + { + "epoch": 0.76, + "learning_rate": 8.340517207143007e-05, + "loss": 3.0668, + "step": 43850 + }, + { + "epoch": 0.76, + "learning_rate": 8.334826011735252e-05, + "loss": 3.0612, + "step": 43855 + }, + { + "epoch": 0.76, + "learning_rate": 8.32913644543754e-05, + "loss": 3.2284, + "step": 43860 + }, + { + "epoch": 0.76, + "learning_rate": 8.323448508677713e-05, + "loss": 3.1964, + "step": 43865 + }, + { + "epoch": 0.76, + "learning_rate": 8.317762201883468e-05, + "loss": 3.1696, + "step": 43870 + }, + { + "epoch": 0.76, + "learning_rate": 8.312077525482387e-05, + "loss": 3.0291, + "step": 43875 + }, + { + "epoch": 0.76, + "learning_rate": 8.306394479901928e-05, + "loss": 3.0917, + "step": 43880 + }, + { + "epoch": 0.76, + "learning_rate": 8.300713065569415e-05, + "loss": 3.1189, + "step": 43885 + }, + { + "epoch": 0.76, + "learning_rate": 8.295033282912081e-05, + "loss": 3.2172, + "step": 43890 + }, + { + "epoch": 0.76, + "learning_rate": 8.289355132357011e-05, + "loss": 3.213, + "step": 43895 + }, + { + "epoch": 0.76, + "learning_rate": 8.283678614331165e-05, + "loss": 3.2169, + "step": 43900 + }, + { + "epoch": 0.76, + "learning_rate": 8.278003729261396e-05, + "loss": 3.0397, + "step": 43905 + }, + { + "epoch": 0.76, + "learning_rate": 8.272330477574414e-05, + "loss": 3.144, + "step": 43910 + }, + { + "epoch": 0.76, + "learning_rate": 8.266658859696835e-05, + "loss": 3.0974, + "step": 43915 + }, + { + "epoch": 0.76, + "learning_rate": 8.26098887605514e-05, + "loss": 3.171, + "step": 43920 + }, + { + "epoch": 0.76, + "learning_rate": 8.255320527075651e-05, + "loss": 3.1021, + "step": 43925 + }, + { + "epoch": 0.76, + "learning_rate": 8.249653813184628e-05, + "loss": 3.2077, + "step": 43930 + }, + { + "epoch": 0.76, + "learning_rate": 8.243988734808168e-05, + "loss": 3.1471, + "step": 43935 + }, + { + "epoch": 0.76, + "learning_rate": 8.23832529237225e-05, + "loss": 3.0669, + "step": 43940 + }, + { + "epoch": 0.76, + "learning_rate": 8.232663486302761e-05, + "loss": 3.1868, + "step": 43945 + }, + { + "epoch": 0.76, + "learning_rate": 8.227003317025403e-05, + "loss": 3.2153, + "step": 43950 + }, + { + "epoch": 0.76, + "learning_rate": 8.22134478496582e-05, + "loss": 3.091, + "step": 43955 + }, + { + "epoch": 0.76, + "learning_rate": 8.215687890549499e-05, + "loss": 3.1243, + "step": 43960 + }, + { + "epoch": 0.76, + "learning_rate": 8.210032634201792e-05, + "loss": 3.2038, + "step": 43965 + }, + { + "epoch": 0.76, + "learning_rate": 8.204379016347979e-05, + "loss": 3.2283, + "step": 43970 + }, + { + "epoch": 0.76, + "learning_rate": 8.198727037413156e-05, + "loss": 3.2619, + "step": 43975 + }, + { + "epoch": 0.76, + "learning_rate": 8.193076697822316e-05, + "loss": 3.1312, + "step": 43980 + }, + { + "epoch": 0.76, + "learning_rate": 8.187427998000364e-05, + "loss": 3.0785, + "step": 43985 + }, + { + "epoch": 0.76, + "learning_rate": 8.181780938372037e-05, + "loss": 2.9717, + "step": 43990 + }, + { + "epoch": 0.76, + "learning_rate": 8.176135519361964e-05, + "loss": 3.0844, + "step": 43995 + }, + { + "epoch": 0.76, + "learning_rate": 8.170491741394658e-05, + "loss": 3.2462, + "step": 44000 + }, + { + "epoch": 0.76, + "eval_loss": 3.190524101257324, + "eval_runtime": 149.6752, + "eval_samples_per_second": 12.3, + "eval_steps_per_second": 0.775, + "step": 44000 + }, + { + "epoch": 0.76, + "learning_rate": 8.164849604894487e-05, + "loss": 3.2107, + "step": 44005 + }, + { + "epoch": 0.76, + "learning_rate": 8.159209110285734e-05, + "loss": 3.1158, + "step": 44010 + }, + { + "epoch": 0.76, + "learning_rate": 8.153570257992521e-05, + "loss": 3.1124, + "step": 44015 + }, + { + "epoch": 0.76, + "learning_rate": 8.147933048438856e-05, + "loss": 3.2516, + "step": 44020 + }, + { + "epoch": 0.76, + "learning_rate": 8.142297482048653e-05, + "loss": 3.1914, + "step": 44025 + }, + { + "epoch": 0.76, + "learning_rate": 8.136663559245643e-05, + "loss": 3.1275, + "step": 44030 + }, + { + "epoch": 0.76, + "learning_rate": 8.131031280453494e-05, + "loss": 3.1475, + "step": 44035 + }, + { + "epoch": 0.76, + "learning_rate": 8.125400646095717e-05, + "loss": 3.2376, + "step": 44040 + }, + { + "epoch": 0.76, + "learning_rate": 8.119771656595696e-05, + "loss": 3.1365, + "step": 44045 + }, + { + "epoch": 0.76, + "learning_rate": 8.114144312376731e-05, + "loss": 3.1429, + "step": 44050 + }, + { + "epoch": 0.76, + "learning_rate": 8.108518613861941e-05, + "loss": 3.1624, + "step": 44055 + }, + { + "epoch": 0.76, + "learning_rate": 8.102894561474349e-05, + "loss": 3.1822, + "step": 44060 + }, + { + "epoch": 0.76, + "learning_rate": 8.097272155636877e-05, + "loss": 3.0874, + "step": 44065 + }, + { + "epoch": 0.76, + "learning_rate": 8.091651396772286e-05, + "loss": 3.232, + "step": 44070 + }, + { + "epoch": 0.76, + "learning_rate": 8.086032285303232e-05, + "loss": 3.2127, + "step": 44075 + }, + { + "epoch": 0.76, + "learning_rate": 8.080414821652244e-05, + "loss": 3.2376, + "step": 44080 + }, + { + "epoch": 0.76, + "learning_rate": 8.074799006241716e-05, + "loss": 3.156, + "step": 44085 + }, + { + "epoch": 0.76, + "learning_rate": 8.069184839493947e-05, + "loss": 3.232, + "step": 44090 + }, + { + "epoch": 0.76, + "learning_rate": 8.063572321831084e-05, + "loss": 3.0889, + "step": 44095 + }, + { + "epoch": 0.76, + "learning_rate": 8.05796145367516e-05, + "loss": 3.2148, + "step": 44100 + }, + { + "epoch": 0.76, + "learning_rate": 8.052352235448084e-05, + "loss": 3.1608, + "step": 44105 + }, + { + "epoch": 0.76, + "learning_rate": 8.046744667571629e-05, + "loss": 3.0584, + "step": 44110 + }, + { + "epoch": 0.76, + "learning_rate": 8.041138750467478e-05, + "loss": 3.2204, + "step": 44115 + }, + { + "epoch": 0.76, + "learning_rate": 8.035534484557153e-05, + "loss": 3.0466, + "step": 44120 + }, + { + "epoch": 0.76, + "learning_rate": 8.029931870262073e-05, + "loss": 3.1303, + "step": 44125 + }, + { + "epoch": 0.76, + "learning_rate": 8.024330908003516e-05, + "loss": 3.1986, + "step": 44130 + }, + { + "epoch": 0.76, + "learning_rate": 8.018731598202655e-05, + "loss": 3.1084, + "step": 44135 + }, + { + "epoch": 0.76, + "learning_rate": 8.013133941280516e-05, + "loss": 3.2342, + "step": 44140 + }, + { + "epoch": 0.76, + "learning_rate": 8.007537937658032e-05, + "loss": 3.1171, + "step": 44145 + }, + { + "epoch": 0.76, + "learning_rate": 8.001943587755984e-05, + "loss": 3.1854, + "step": 44150 + }, + { + "epoch": 0.76, + "learning_rate": 7.996350891995045e-05, + "loss": 3.175, + "step": 44155 + }, + { + "epoch": 0.76, + "learning_rate": 7.990759850795746e-05, + "loss": 3.2055, + "step": 44160 + }, + { + "epoch": 0.76, + "learning_rate": 7.985170464578506e-05, + "loss": 3.0755, + "step": 44165 + }, + { + "epoch": 0.76, + "learning_rate": 7.97958273376363e-05, + "loss": 3.0841, + "step": 44170 + }, + { + "epoch": 0.76, + "learning_rate": 7.97399665877128e-05, + "loss": 3.1463, + "step": 44175 + }, + { + "epoch": 0.76, + "learning_rate": 7.9684122400215e-05, + "loss": 3.1655, + "step": 44180 + }, + { + "epoch": 0.76, + "learning_rate": 7.962829477934207e-05, + "loss": 3.1207, + "step": 44185 + }, + { + "epoch": 0.76, + "learning_rate": 7.957248372929192e-05, + "loss": 3.1037, + "step": 44190 + }, + { + "epoch": 0.76, + "learning_rate": 7.95166892542614e-05, + "loss": 3.125, + "step": 44195 + }, + { + "epoch": 0.76, + "learning_rate": 7.946091135844601e-05, + "loss": 3.1905, + "step": 44200 + }, + { + "epoch": 0.77, + "learning_rate": 7.940515004603963e-05, + "loss": 3.2434, + "step": 44205 + }, + { + "epoch": 0.77, + "learning_rate": 7.934940532123552e-05, + "loss": 3.1113, + "step": 44210 + }, + { + "epoch": 0.77, + "learning_rate": 7.929367718822534e-05, + "loss": 3.2207, + "step": 44215 + }, + { + "epoch": 0.77, + "learning_rate": 7.923796565119941e-05, + "loss": 3.1306, + "step": 44220 + }, + { + "epoch": 0.77, + "learning_rate": 7.918227071434729e-05, + "loss": 3.2167, + "step": 44225 + }, + { + "epoch": 0.77, + "learning_rate": 7.912659238185654e-05, + "loss": 3.0545, + "step": 44230 + }, + { + "epoch": 0.77, + "learning_rate": 7.907093065791418e-05, + "loss": 3.1117, + "step": 44235 + }, + { + "epoch": 0.77, + "learning_rate": 7.90152855467056e-05, + "loss": 3.2648, + "step": 44240 + }, + { + "epoch": 0.77, + "learning_rate": 7.895965705241489e-05, + "loss": 3.1396, + "step": 44245 + }, + { + "epoch": 0.77, + "learning_rate": 7.890404517922529e-05, + "loss": 3.1897, + "step": 44250 + }, + { + "epoch": 0.77, + "learning_rate": 7.884844993131836e-05, + "loss": 2.9301, + "step": 44255 + }, + { + "epoch": 0.77, + "learning_rate": 7.879287131287463e-05, + "loss": 3.1565, + "step": 44260 + }, + { + "epoch": 0.77, + "learning_rate": 7.873730932807332e-05, + "loss": 3.163, + "step": 44265 + }, + { + "epoch": 0.77, + "learning_rate": 7.868176398109228e-05, + "loss": 3.097, + "step": 44270 + }, + { + "epoch": 0.77, + "learning_rate": 7.862623527610847e-05, + "loss": 3.1801, + "step": 44275 + }, + { + "epoch": 0.77, + "learning_rate": 7.857072321729732e-05, + "loss": 3.0713, + "step": 44280 + }, + { + "epoch": 0.77, + "learning_rate": 7.851522780883277e-05, + "loss": 2.9592, + "step": 44285 + }, + { + "epoch": 0.77, + "learning_rate": 7.845974905488812e-05, + "loss": 3.1761, + "step": 44290 + }, + { + "epoch": 0.77, + "learning_rate": 7.840428695963497e-05, + "loss": 3.1655, + "step": 44295 + }, + { + "epoch": 0.77, + "learning_rate": 7.834884152724367e-05, + "loss": 3.0895, + "step": 44300 + }, + { + "epoch": 0.77, + "learning_rate": 7.829341276188372e-05, + "loss": 3.0921, + "step": 44305 + }, + { + "epoch": 0.77, + "learning_rate": 7.823800066772273e-05, + "loss": 3.2266, + "step": 44310 + }, + { + "epoch": 0.77, + "learning_rate": 7.81826052489277e-05, + "loss": 3.2352, + "step": 44315 + }, + { + "epoch": 0.77, + "learning_rate": 7.812722650966392e-05, + "loss": 3.1349, + "step": 44320 + }, + { + "epoch": 0.77, + "learning_rate": 7.807186445409554e-05, + "loss": 3.2131, + "step": 44325 + }, + { + "epoch": 0.77, + "learning_rate": 7.801651908638578e-05, + "loss": 3.1752, + "step": 44330 + }, + { + "epoch": 0.77, + "learning_rate": 7.796119041069595e-05, + "loss": 3.2089, + "step": 44335 + }, + { + "epoch": 0.77, + "learning_rate": 7.790587843118677e-05, + "loss": 3.1466, + "step": 44340 + }, + { + "epoch": 0.77, + "learning_rate": 7.785058315201728e-05, + "loss": 3.1338, + "step": 44345 + }, + { + "epoch": 0.77, + "learning_rate": 7.77953045773454e-05, + "loss": 3.0996, + "step": 44350 + }, + { + "epoch": 0.77, + "learning_rate": 7.7740042711328e-05, + "loss": 3.1595, + "step": 44355 + }, + { + "epoch": 0.77, + "learning_rate": 7.768479755812024e-05, + "loss": 3.1891, + "step": 44360 + }, + { + "epoch": 0.77, + "learning_rate": 7.762956912187625e-05, + "loss": 3.1434, + "step": 44365 + }, + { + "epoch": 0.77, + "learning_rate": 7.757435740674913e-05, + "loss": 3.1736, + "step": 44370 + }, + { + "epoch": 0.77, + "learning_rate": 7.751916241689042e-05, + "loss": 3.1572, + "step": 44375 + }, + { + "epoch": 0.77, + "learning_rate": 7.746398415645048e-05, + "loss": 3.0956, + "step": 44380 + }, + { + "epoch": 0.77, + "learning_rate": 7.740882262957847e-05, + "loss": 3.0352, + "step": 44385 + }, + { + "epoch": 0.77, + "learning_rate": 7.735367784042214e-05, + "loss": 3.11, + "step": 44390 + }, + { + "epoch": 0.77, + "learning_rate": 7.729854979312828e-05, + "loss": 3.1855, + "step": 44395 + }, + { + "epoch": 0.77, + "learning_rate": 7.724343849184217e-05, + "loss": 3.2492, + "step": 44400 + }, + { + "epoch": 0.77, + "learning_rate": 7.718834394070786e-05, + "loss": 3.1713, + "step": 44405 + }, + { + "epoch": 0.77, + "learning_rate": 7.713326614386818e-05, + "loss": 3.157, + "step": 44410 + }, + { + "epoch": 0.77, + "learning_rate": 7.707820510546466e-05, + "loss": 3.1427, + "step": 44415 + }, + { + "epoch": 0.77, + "learning_rate": 7.702316082963773e-05, + "loss": 3.1726, + "step": 44420 + }, + { + "epoch": 0.77, + "learning_rate": 7.696813332052634e-05, + "loss": 3.2024, + "step": 44425 + }, + { + "epoch": 0.77, + "learning_rate": 7.691312258226834e-05, + "loss": 3.1871, + "step": 44430 + }, + { + "epoch": 0.77, + "learning_rate": 7.685812861900023e-05, + "loss": 3.1701, + "step": 44435 + }, + { + "epoch": 0.77, + "learning_rate": 7.680315143485724e-05, + "loss": 3.1798, + "step": 44440 + }, + { + "epoch": 0.77, + "learning_rate": 7.67481910339733e-05, + "loss": 3.1527, + "step": 44445 + }, + { + "epoch": 0.77, + "learning_rate": 7.669324742048133e-05, + "loss": 3.2117, + "step": 44450 + }, + { + "epoch": 0.77, + "learning_rate": 7.663832059851276e-05, + "loss": 3.2039, + "step": 44455 + }, + { + "epoch": 0.77, + "learning_rate": 7.658341057219775e-05, + "loss": 3.1415, + "step": 44460 + }, + { + "epoch": 0.77, + "learning_rate": 7.652851734566529e-05, + "loss": 3.1103, + "step": 44465 + }, + { + "epoch": 0.77, + "learning_rate": 7.647364092304292e-05, + "loss": 3.1017, + "step": 44470 + }, + { + "epoch": 0.77, + "learning_rate": 7.641878130845732e-05, + "loss": 3.0508, + "step": 44475 + }, + { + "epoch": 0.77, + "learning_rate": 7.636393850603352e-05, + "loss": 3.142, + "step": 44480 + }, + { + "epoch": 0.77, + "learning_rate": 7.630911251989544e-05, + "loss": 3.1147, + "step": 44485 + }, + { + "epoch": 0.77, + "learning_rate": 7.62543033541657e-05, + "loss": 3.1511, + "step": 44490 + }, + { + "epoch": 0.77, + "learning_rate": 7.619951101296557e-05, + "loss": 3.2319, + "step": 44495 + }, + { + "epoch": 0.77, + "learning_rate": 7.614473550041533e-05, + "loss": 3.0392, + "step": 44500 + }, + { + "epoch": 0.77, + "eval_loss": 3.186605215072632, + "eval_runtime": 149.8734, + "eval_samples_per_second": 12.284, + "eval_steps_per_second": 0.774, + "step": 44500 + }, + { + "epoch": 0.77, + "learning_rate": 7.608997682063378e-05, + "loss": 3.1118, + "step": 44505 + }, + { + "epoch": 0.77, + "learning_rate": 7.603523497773841e-05, + "loss": 3.1806, + "step": 44510 + }, + { + "epoch": 0.77, + "learning_rate": 7.598050997584561e-05, + "loss": 3.0252, + "step": 44515 + }, + { + "epoch": 0.77, + "learning_rate": 7.592580181907033e-05, + "loss": 3.2698, + "step": 44520 + }, + { + "epoch": 0.77, + "learning_rate": 7.587111051152633e-05, + "loss": 3.1393, + "step": 44525 + }, + { + "epoch": 0.77, + "learning_rate": 7.58164360573263e-05, + "loss": 3.1737, + "step": 44530 + }, + { + "epoch": 0.77, + "learning_rate": 7.576177846058128e-05, + "loss": 3.2284, + "step": 44535 + }, + { + "epoch": 0.77, + "learning_rate": 7.570713772540138e-05, + "loss": 3.2084, + "step": 44540 + }, + { + "epoch": 0.77, + "learning_rate": 7.56525138558952e-05, + "loss": 3.0973, + "step": 44545 + }, + { + "epoch": 0.77, + "learning_rate": 7.559790685617017e-05, + "loss": 3.1736, + "step": 44550 + }, + { + "epoch": 0.77, + "learning_rate": 7.554331673033255e-05, + "loss": 3.1482, + "step": 44555 + }, + { + "epoch": 0.77, + "learning_rate": 7.54887434824873e-05, + "loss": 3.2329, + "step": 44560 + }, + { + "epoch": 0.77, + "learning_rate": 7.543418711673774e-05, + "loss": 3.0866, + "step": 44565 + }, + { + "epoch": 0.77, + "learning_rate": 7.537964763718653e-05, + "loss": 3.21, + "step": 44570 + }, + { + "epoch": 0.77, + "learning_rate": 7.532512504793467e-05, + "loss": 3.103, + "step": 44575 + }, + { + "epoch": 0.77, + "learning_rate": 7.527061935308184e-05, + "loss": 3.1107, + "step": 44580 + }, + { + "epoch": 0.77, + "learning_rate": 7.521613055672693e-05, + "loss": 3.1379, + "step": 44585 + }, + { + "epoch": 0.77, + "learning_rate": 7.516165866296681e-05, + "loss": 3.1808, + "step": 44590 + }, + { + "epoch": 0.77, + "learning_rate": 7.510720367589777e-05, + "loss": 3.1765, + "step": 44595 + }, + { + "epoch": 0.77, + "learning_rate": 7.505276559961444e-05, + "loss": 3.2069, + "step": 44600 + }, + { + "epoch": 0.77, + "learning_rate": 7.499834443821023e-05, + "loss": 3.1409, + "step": 44605 + }, + { + "epoch": 0.77, + "learning_rate": 7.494394019577759e-05, + "loss": 3.1709, + "step": 44610 + }, + { + "epoch": 0.77, + "learning_rate": 7.488955287640706e-05, + "loss": 3.1446, + "step": 44615 + }, + { + "epoch": 0.77, + "learning_rate": 7.483518248418858e-05, + "loss": 3.0126, + "step": 44620 + }, + { + "epoch": 0.77, + "learning_rate": 7.47808290232104e-05, + "loss": 3.1909, + "step": 44625 + }, + { + "epoch": 0.77, + "learning_rate": 7.47264924975596e-05, + "loss": 3.1479, + "step": 44630 + }, + { + "epoch": 0.77, + "learning_rate": 7.46721729113222e-05, + "loss": 3.084, + "step": 44635 + }, + { + "epoch": 0.77, + "learning_rate": 7.461787026858255e-05, + "loss": 3.1349, + "step": 44640 + }, + { + "epoch": 0.77, + "learning_rate": 7.456358457342386e-05, + "loss": 3.1268, + "step": 44645 + }, + { + "epoch": 0.77, + "learning_rate": 7.450931582992839e-05, + "loss": 3.1649, + "step": 44650 + }, + { + "epoch": 0.77, + "learning_rate": 7.445506404217674e-05, + "loss": 3.1787, + "step": 44655 + }, + { + "epoch": 0.77, + "learning_rate": 7.440082921424836e-05, + "loss": 3.0321, + "step": 44660 + }, + { + "epoch": 0.77, + "learning_rate": 7.434661135022149e-05, + "loss": 3.1882, + "step": 44665 + }, + { + "epoch": 0.77, + "learning_rate": 7.429241045417287e-05, + "loss": 3.1999, + "step": 44670 + }, + { + "epoch": 0.77, + "learning_rate": 7.423822653017831e-05, + "loss": 3.1162, + "step": 44675 + }, + { + "epoch": 0.77, + "learning_rate": 7.418405958231217e-05, + "loss": 3.1264, + "step": 44680 + }, + { + "epoch": 0.77, + "learning_rate": 7.412990961464742e-05, + "loss": 3.2139, + "step": 44685 + }, + { + "epoch": 0.77, + "learning_rate": 7.40757766312559e-05, + "loss": 3.0781, + "step": 44690 + }, + { + "epoch": 0.77, + "learning_rate": 7.402166063620805e-05, + "loss": 3.1487, + "step": 44695 + }, + { + "epoch": 0.77, + "learning_rate": 7.39675616335733e-05, + "loss": 3.0855, + "step": 44700 + }, + { + "epoch": 0.77, + "learning_rate": 7.391347962741948e-05, + "loss": 3.1442, + "step": 44705 + }, + { + "epoch": 0.77, + "learning_rate": 7.385941462181327e-05, + "loss": 3.2047, + "step": 44710 + }, + { + "epoch": 0.77, + "learning_rate": 7.38053666208203e-05, + "loss": 3.184, + "step": 44715 + }, + { + "epoch": 0.77, + "learning_rate": 7.375133562850443e-05, + "loss": 3.2336, + "step": 44720 + }, + { + "epoch": 0.77, + "learning_rate": 7.369732164892854e-05, + "loss": 3.1874, + "step": 44725 + }, + { + "epoch": 0.77, + "learning_rate": 7.364332468615436e-05, + "loss": 3.2044, + "step": 44730 + }, + { + "epoch": 0.77, + "learning_rate": 7.35893447442421e-05, + "loss": 3.209, + "step": 44735 + }, + { + "epoch": 0.77, + "learning_rate": 7.353538182725077e-05, + "loss": 3.1973, + "step": 44740 + }, + { + "epoch": 0.77, + "learning_rate": 7.348143593923814e-05, + "loss": 3.2708, + "step": 44745 + }, + { + "epoch": 0.77, + "learning_rate": 7.342750708426053e-05, + "loss": 3.1732, + "step": 44750 + }, + { + "epoch": 0.77, + "learning_rate": 7.337359526637332e-05, + "loss": 3.1316, + "step": 44755 + }, + { + "epoch": 0.77, + "learning_rate": 7.331970048963027e-05, + "loss": 3.0108, + "step": 44760 + }, + { + "epoch": 0.77, + "learning_rate": 7.326582275808409e-05, + "loss": 3.1858, + "step": 44765 + }, + { + "epoch": 0.77, + "learning_rate": 7.3211962075786e-05, + "loss": 3.1266, + "step": 44770 + }, + { + "epoch": 0.77, + "learning_rate": 7.3158118446786e-05, + "loss": 3.2553, + "step": 44775 + }, + { + "epoch": 0.77, + "learning_rate": 7.310429187513305e-05, + "loss": 3.0143, + "step": 44780 + }, + { + "epoch": 0.78, + "learning_rate": 7.305048236487452e-05, + "loss": 3.2031, + "step": 44785 + }, + { + "epoch": 0.78, + "learning_rate": 7.299668992005664e-05, + "loss": 3.1179, + "step": 44790 + }, + { + "epoch": 0.78, + "learning_rate": 7.294291454472429e-05, + "loss": 3.0976, + "step": 44795 + }, + { + "epoch": 0.78, + "learning_rate": 7.28891562429211e-05, + "loss": 3.1156, + "step": 44800 + }, + { + "epoch": 0.78, + "learning_rate": 7.283541501868935e-05, + "loss": 3.2027, + "step": 44805 + }, + { + "epoch": 0.78, + "learning_rate": 7.27816908760703e-05, + "loss": 3.1327, + "step": 44810 + }, + { + "epoch": 0.78, + "learning_rate": 7.272798381910361e-05, + "loss": 3.0802, + "step": 44815 + }, + { + "epoch": 0.78, + "learning_rate": 7.267429385182777e-05, + "loss": 3.1307, + "step": 44820 + }, + { + "epoch": 0.78, + "learning_rate": 7.262062097828005e-05, + "loss": 3.065, + "step": 44825 + }, + { + "epoch": 0.78, + "learning_rate": 7.256696520249623e-05, + "loss": 3.0924, + "step": 44830 + }, + { + "epoch": 0.78, + "learning_rate": 7.251332652851111e-05, + "loss": 3.1293, + "step": 44835 + }, + { + "epoch": 0.78, + "learning_rate": 7.245970496035803e-05, + "loss": 3.1039, + "step": 44840 + }, + { + "epoch": 0.78, + "learning_rate": 7.240610050206896e-05, + "loss": 3.1689, + "step": 44845 + }, + { + "epoch": 0.78, + "learning_rate": 7.235251315767475e-05, + "loss": 3.2031, + "step": 44850 + }, + { + "epoch": 0.78, + "learning_rate": 7.22989429312048e-05, + "loss": 3.0999, + "step": 44855 + }, + { + "epoch": 0.78, + "learning_rate": 7.224538982668746e-05, + "loss": 3.1289, + "step": 44860 + }, + { + "epoch": 0.78, + "learning_rate": 7.21918538481497e-05, + "loss": 3.145, + "step": 44865 + }, + { + "epoch": 0.78, + "learning_rate": 7.213833499961684e-05, + "loss": 3.2122, + "step": 44870 + }, + { + "epoch": 0.78, + "learning_rate": 7.208483328511349e-05, + "loss": 3.1842, + "step": 44875 + }, + { + "epoch": 0.78, + "learning_rate": 7.203134870866264e-05, + "loss": 3.1976, + "step": 44880 + }, + { + "epoch": 0.78, + "learning_rate": 7.197788127428595e-05, + "loss": 3.2405, + "step": 44885 + }, + { + "epoch": 0.78, + "learning_rate": 7.192443098600421e-05, + "loss": 3.0957, + "step": 44890 + }, + { + "epoch": 0.78, + "learning_rate": 7.187099784783617e-05, + "loss": 3.1705, + "step": 44895 + }, + { + "epoch": 0.78, + "learning_rate": 7.181758186380007e-05, + "loss": 2.9637, + "step": 44900 + }, + { + "epoch": 0.78, + "learning_rate": 7.176418303791237e-05, + "loss": 3.2228, + "step": 44905 + }, + { + "epoch": 0.78, + "learning_rate": 7.171080137418832e-05, + "loss": 3.1422, + "step": 44910 + }, + { + "epoch": 0.78, + "learning_rate": 7.165743687664224e-05, + "loss": 3.225, + "step": 44915 + }, + { + "epoch": 0.78, + "learning_rate": 7.160408954928651e-05, + "loss": 3.1538, + "step": 44920 + }, + { + "epoch": 0.78, + "learning_rate": 7.155075939613279e-05, + "loss": 3.0714, + "step": 44925 + }, + { + "epoch": 0.78, + "learning_rate": 7.14974464211912e-05, + "loss": 3.1305, + "step": 44930 + }, + { + "epoch": 0.78, + "learning_rate": 7.144415062847047e-05, + "loss": 3.1041, + "step": 44935 + }, + { + "epoch": 0.78, + "learning_rate": 7.139087202197842e-05, + "loss": 3.122, + "step": 44940 + }, + { + "epoch": 0.78, + "learning_rate": 7.133761060572123e-05, + "loss": 3.1977, + "step": 44945 + }, + { + "epoch": 0.78, + "learning_rate": 7.128436638370367e-05, + "loss": 3.0151, + "step": 44950 + }, + { + "epoch": 0.78, + "learning_rate": 7.123113935992975e-05, + "loss": 3.1652, + "step": 44955 + }, + { + "epoch": 0.78, + "learning_rate": 7.117792953840168e-05, + "loss": 3.2035, + "step": 44960 + }, + { + "epoch": 0.78, + "learning_rate": 7.112473692312054e-05, + "loss": 3.1325, + "step": 44965 + }, + { + "epoch": 0.78, + "learning_rate": 7.10715615180864e-05, + "loss": 3.0583, + "step": 44970 + }, + { + "epoch": 0.78, + "learning_rate": 7.101840332729739e-05, + "loss": 3.0872, + "step": 44975 + }, + { + "epoch": 0.78, + "learning_rate": 7.096526235475102e-05, + "loss": 3.2089, + "step": 44980 + }, + { + "epoch": 0.78, + "learning_rate": 7.091213860444318e-05, + "loss": 2.9796, + "step": 44985 + }, + { + "epoch": 0.78, + "learning_rate": 7.085903208036837e-05, + "loss": 3.0638, + "step": 44990 + }, + { + "epoch": 0.78, + "learning_rate": 7.080594278652017e-05, + "loss": 3.1517, + "step": 44995 + }, + { + "epoch": 0.78, + "learning_rate": 7.07528707268903e-05, + "loss": 3.1721, + "step": 45000 + }, + { + "epoch": 0.78, + "eval_loss": 3.183523654937744, + "eval_runtime": 150.1695, + "eval_samples_per_second": 12.259, + "eval_steps_per_second": 0.772, + "step": 45000 + }, + { + "epoch": 0.78, + "learning_rate": 7.069981590546978e-05, + "loss": 3.0841, + "step": 45005 + }, + { + "epoch": 0.78, + "learning_rate": 7.064677832624794e-05, + "loss": 3.2613, + "step": 45010 + }, + { + "epoch": 0.78, + "learning_rate": 7.059375799321287e-05, + "loss": 3.0345, + "step": 45015 + }, + { + "epoch": 0.78, + "learning_rate": 7.054075491035168e-05, + "loss": 3.196, + "step": 45020 + }, + { + "epoch": 0.78, + "learning_rate": 7.04877690816497e-05, + "loss": 3.1549, + "step": 45025 + }, + { + "epoch": 0.78, + "learning_rate": 7.043480051109114e-05, + "loss": 3.0979, + "step": 45030 + }, + { + "epoch": 0.78, + "learning_rate": 7.038184920265916e-05, + "loss": 3.1891, + "step": 45035 + }, + { + "epoch": 0.78, + "learning_rate": 7.032891516033538e-05, + "loss": 3.119, + "step": 45040 + }, + { + "epoch": 0.78, + "learning_rate": 7.027599838810011e-05, + "loss": 3.169, + "step": 45045 + }, + { + "epoch": 0.78, + "learning_rate": 7.022309888993245e-05, + "loss": 3.1685, + "step": 45050 + }, + { + "epoch": 0.78, + "learning_rate": 7.017021666981009e-05, + "loss": 3.2098, + "step": 45055 + }, + { + "epoch": 0.78, + "learning_rate": 7.011735173170964e-05, + "loss": 3.1718, + "step": 45060 + }, + { + "epoch": 0.78, + "learning_rate": 7.006450407960622e-05, + "loss": 3.1111, + "step": 45065 + }, + { + "epoch": 0.78, + "learning_rate": 7.001167371747369e-05, + "loss": 3.1789, + "step": 45070 + }, + { + "epoch": 0.78, + "learning_rate": 6.995886064928464e-05, + "loss": 3.0517, + "step": 45075 + }, + { + "epoch": 0.78, + "learning_rate": 6.990606487901033e-05, + "loss": 3.1516, + "step": 45080 + }, + { + "epoch": 0.78, + "learning_rate": 6.985328641062064e-05, + "loss": 3.1516, + "step": 45085 + }, + { + "epoch": 0.78, + "learning_rate": 6.98005252480844e-05, + "loss": 3.2177, + "step": 45090 + }, + { + "epoch": 0.78, + "learning_rate": 6.97477813953689e-05, + "loss": 3.0959, + "step": 45095 + }, + { + "epoch": 0.78, + "learning_rate": 6.969505485644023e-05, + "loss": 3.1638, + "step": 45100 + }, + { + "epoch": 0.78, + "learning_rate": 6.964234563526314e-05, + "loss": 3.2965, + "step": 45105 + }, + { + "epoch": 0.78, + "learning_rate": 6.958965373580101e-05, + "loss": 3.2153, + "step": 45110 + }, + { + "epoch": 0.78, + "learning_rate": 6.953697916201618e-05, + "loss": 3.1298, + "step": 45115 + }, + { + "epoch": 0.78, + "learning_rate": 6.948432191786936e-05, + "loss": 3.0617, + "step": 45120 + }, + { + "epoch": 0.78, + "learning_rate": 6.94316820073202e-05, + "loss": 3.1779, + "step": 45125 + }, + { + "epoch": 0.78, + "learning_rate": 6.93790594343269e-05, + "loss": 3.1907, + "step": 45130 + }, + { + "epoch": 0.78, + "learning_rate": 6.932645420284636e-05, + "loss": 3.1058, + "step": 45135 + }, + { + "epoch": 0.78, + "learning_rate": 6.927386631683435e-05, + "loss": 3.0789, + "step": 45140 + }, + { + "epoch": 0.78, + "learning_rate": 6.92212957802452e-05, + "loss": 3.0602, + "step": 45145 + }, + { + "epoch": 0.78, + "learning_rate": 6.916874259703172e-05, + "loss": 3.197, + "step": 45150 + }, + { + "epoch": 0.78, + "learning_rate": 6.911620677114592e-05, + "loss": 3.1129, + "step": 45155 + }, + { + "epoch": 0.78, + "learning_rate": 6.90636883065381e-05, + "loss": 3.2127, + "step": 45160 + }, + { + "epoch": 0.78, + "learning_rate": 6.901118720715731e-05, + "loss": 3.1139, + "step": 45165 + }, + { + "epoch": 0.78, + "learning_rate": 6.895870347695151e-05, + "loss": 3.2134, + "step": 45170 + }, + { + "epoch": 0.78, + "learning_rate": 6.890623711986715e-05, + "loss": 3.1768, + "step": 45175 + }, + { + "epoch": 0.78, + "learning_rate": 6.88537881398494e-05, + "loss": 3.1486, + "step": 45180 + }, + { + "epoch": 0.78, + "learning_rate": 6.88013565408422e-05, + "loss": 3.2026, + "step": 45185 + }, + { + "epoch": 0.78, + "learning_rate": 6.874894232678804e-05, + "loss": 3.1052, + "step": 45190 + }, + { + "epoch": 0.78, + "learning_rate": 6.869654550162833e-05, + "loss": 3.1453, + "step": 45195 + }, + { + "epoch": 0.78, + "learning_rate": 6.864416606930299e-05, + "loss": 3.1664, + "step": 45200 + }, + { + "epoch": 0.78, + "learning_rate": 6.859180403375072e-05, + "loss": 3.1615, + "step": 45205 + }, + { + "epoch": 0.78, + "learning_rate": 6.853945939890878e-05, + "loss": 3.0932, + "step": 45210 + }, + { + "epoch": 0.78, + "learning_rate": 6.848713216871323e-05, + "loss": 3.1201, + "step": 45215 + }, + { + "epoch": 0.78, + "learning_rate": 6.843482234709893e-05, + "loss": 3.0702, + "step": 45220 + }, + { + "epoch": 0.78, + "learning_rate": 6.838252993799932e-05, + "loss": 3.0548, + "step": 45225 + }, + { + "epoch": 0.78, + "learning_rate": 6.833025494534629e-05, + "loss": 3.1788, + "step": 45230 + }, + { + "epoch": 0.78, + "learning_rate": 6.827799737307088e-05, + "loss": 3.2091, + "step": 45235 + }, + { + "epoch": 0.78, + "learning_rate": 6.822575722510249e-05, + "loss": 3.29, + "step": 45240 + }, + { + "epoch": 0.78, + "learning_rate": 6.817353450536926e-05, + "loss": 3.0955, + "step": 45245 + }, + { + "epoch": 0.78, + "learning_rate": 6.812132921779832e-05, + "loss": 3.219, + "step": 45250 + }, + { + "epoch": 0.78, + "learning_rate": 6.806914136631487e-05, + "loss": 3.2833, + "step": 45255 + }, + { + "epoch": 0.78, + "learning_rate": 6.801697095484346e-05, + "loss": 3.1134, + "step": 45260 + }, + { + "epoch": 0.78, + "learning_rate": 6.796481798730692e-05, + "loss": 3.1767, + "step": 45265 + }, + { + "epoch": 0.78, + "learning_rate": 6.791268246762687e-05, + "loss": 3.1696, + "step": 45270 + }, + { + "epoch": 0.78, + "learning_rate": 6.78605643997238e-05, + "loss": 3.1449, + "step": 45275 + }, + { + "epoch": 0.78, + "learning_rate": 6.780846378751643e-05, + "loss": 3.1968, + "step": 45280 + }, + { + "epoch": 0.78, + "learning_rate": 6.775638063492272e-05, + "loss": 3.0654, + "step": 45285 + }, + { + "epoch": 0.78, + "learning_rate": 6.770431494585892e-05, + "loss": 3.1028, + "step": 45290 + }, + { + "epoch": 0.78, + "learning_rate": 6.765226672424007e-05, + "loss": 3.1161, + "step": 45295 + }, + { + "epoch": 0.78, + "learning_rate": 6.76002359739802e-05, + "loss": 3.2315, + "step": 45300 + }, + { + "epoch": 0.78, + "learning_rate": 6.754822269899145e-05, + "loss": 3.0886, + "step": 45305 + }, + { + "epoch": 0.78, + "learning_rate": 6.749622690318497e-05, + "loss": 3.1833, + "step": 45310 + }, + { + "epoch": 0.78, + "learning_rate": 6.744424859047076e-05, + "loss": 3.2618, + "step": 45315 + }, + { + "epoch": 0.78, + "learning_rate": 6.739228776475721e-05, + "loss": 3.1447, + "step": 45320 + }, + { + "epoch": 0.78, + "learning_rate": 6.734034442995154e-05, + "loss": 3.0796, + "step": 45325 + }, + { + "epoch": 0.78, + "learning_rate": 6.72884185899596e-05, + "loss": 3.0618, + "step": 45330 + }, + { + "epoch": 0.78, + "learning_rate": 6.723651024868586e-05, + "loss": 3.1823, + "step": 45335 + }, + { + "epoch": 0.78, + "learning_rate": 6.718461941003375e-05, + "loss": 3.0862, + "step": 45340 + }, + { + "epoch": 0.78, + "learning_rate": 6.713274607790507e-05, + "loss": 3.2958, + "step": 45345 + }, + { + "epoch": 0.78, + "learning_rate": 6.708089025620049e-05, + "loss": 3.1402, + "step": 45350 + }, + { + "epoch": 0.78, + "learning_rate": 6.702905194881928e-05, + "loss": 3.3028, + "step": 45355 + }, + { + "epoch": 0.79, + "learning_rate": 6.69772311596593e-05, + "loss": 3.1589, + "step": 45360 + }, + { + "epoch": 0.79, + "learning_rate": 6.692542789261742e-05, + "loss": 3.0914, + "step": 45365 + }, + { + "epoch": 0.79, + "learning_rate": 6.687364215158884e-05, + "loss": 3.1536, + "step": 45370 + }, + { + "epoch": 0.79, + "learning_rate": 6.682187394046763e-05, + "loss": 3.1351, + "step": 45375 + }, + { + "epoch": 0.79, + "learning_rate": 6.677012326314649e-05, + "loss": 3.1095, + "step": 45380 + }, + { + "epoch": 0.79, + "learning_rate": 6.671839012351682e-05, + "loss": 3.1706, + "step": 45385 + }, + { + "epoch": 0.79, + "learning_rate": 6.666667452546854e-05, + "loss": 3.1585, + "step": 45390 + }, + { + "epoch": 0.79, + "learning_rate": 6.661497647289062e-05, + "loss": 3.1016, + "step": 45395 + }, + { + "epoch": 0.79, + "learning_rate": 6.656329596967038e-05, + "loss": 3.1136, + "step": 45400 + }, + { + "epoch": 0.79, + "learning_rate": 6.651163301969397e-05, + "loss": 3.1383, + "step": 45405 + }, + { + "epoch": 0.79, + "learning_rate": 6.645998762684613e-05, + "loss": 3.1012, + "step": 45410 + }, + { + "epoch": 0.79, + "learning_rate": 6.640835979501025e-05, + "loss": 3.1399, + "step": 45415 + }, + { + "epoch": 0.79, + "learning_rate": 6.635674952806867e-05, + "loss": 3.158, + "step": 45420 + }, + { + "epoch": 0.79, + "learning_rate": 6.630515682990212e-05, + "loss": 3.2877, + "step": 45425 + }, + { + "epoch": 0.79, + "learning_rate": 6.62535817043901e-05, + "loss": 3.0882, + "step": 45430 + }, + { + "epoch": 0.79, + "learning_rate": 6.620202415541081e-05, + "loss": 3.2474, + "step": 45435 + }, + { + "epoch": 0.79, + "learning_rate": 6.615048418684106e-05, + "loss": 3.1571, + "step": 45440 + }, + { + "epoch": 0.79, + "learning_rate": 6.609896180255649e-05, + "loss": 3.1484, + "step": 45445 + }, + { + "epoch": 0.79, + "learning_rate": 6.604745700643126e-05, + "loss": 3.1594, + "step": 45450 + }, + { + "epoch": 0.79, + "learning_rate": 6.599596980233828e-05, + "loss": 3.2426, + "step": 45455 + }, + { + "epoch": 0.79, + "learning_rate": 6.594450019414913e-05, + "loss": 3.169, + "step": 45460 + }, + { + "epoch": 0.79, + "learning_rate": 6.589304818573405e-05, + "loss": 3.1357, + "step": 45465 + }, + { + "epoch": 0.79, + "learning_rate": 6.584161378096187e-05, + "loss": 3.0433, + "step": 45470 + }, + { + "epoch": 0.79, + "learning_rate": 6.57901969837004e-05, + "loss": 3.1571, + "step": 45475 + }, + { + "epoch": 0.79, + "learning_rate": 6.573879779781578e-05, + "loss": 3.1311, + "step": 45480 + }, + { + "epoch": 0.79, + "learning_rate": 6.568741622717303e-05, + "loss": 3.191, + "step": 45485 + }, + { + "epoch": 0.79, + "learning_rate": 6.563605227563572e-05, + "loss": 3.1198, + "step": 45490 + }, + { + "epoch": 0.79, + "learning_rate": 6.558470594706609e-05, + "loss": 3.1676, + "step": 45495 + }, + { + "epoch": 0.79, + "learning_rate": 6.553337724532533e-05, + "loss": 3.0357, + "step": 45500 + }, + { + "epoch": 0.79, + "eval_loss": 3.180738925933838, + "eval_runtime": 151.371, + "eval_samples_per_second": 12.162, + "eval_steps_per_second": 0.766, + "step": 45500 + }, + { + "epoch": 0.79, + "learning_rate": 6.548206617427295e-05, + "loss": 3.1704, + "step": 45505 + }, + { + "epoch": 0.79, + "learning_rate": 6.543077273776732e-05, + "loss": 3.1313, + "step": 45510 + }, + { + "epoch": 0.79, + "learning_rate": 6.537949693966543e-05, + "loss": 3.0594, + "step": 45515 + }, + { + "epoch": 0.79, + "learning_rate": 6.53282387838229e-05, + "loss": 3.0647, + "step": 45520 + }, + { + "epoch": 0.79, + "learning_rate": 6.52769982740942e-05, + "loss": 3.1225, + "step": 45525 + }, + { + "epoch": 0.79, + "learning_rate": 6.522577541433235e-05, + "loss": 3.1439, + "step": 45530 + }, + { + "epoch": 0.79, + "learning_rate": 6.517457020838887e-05, + "loss": 3.1825, + "step": 45535 + }, + { + "epoch": 0.79, + "learning_rate": 6.51233826601143e-05, + "loss": 3.2263, + "step": 45540 + }, + { + "epoch": 0.79, + "learning_rate": 6.507221277335767e-05, + "loss": 3.167, + "step": 45545 + }, + { + "epoch": 0.79, + "learning_rate": 6.502106055196654e-05, + "loss": 3.0891, + "step": 45550 + }, + { + "epoch": 0.79, + "learning_rate": 6.496992599978757e-05, + "loss": 3.0816, + "step": 45555 + }, + { + "epoch": 0.79, + "learning_rate": 6.491880912066549e-05, + "loss": 3.1269, + "step": 45560 + }, + { + "epoch": 0.79, + "learning_rate": 6.486770991844429e-05, + "loss": 3.1808, + "step": 45565 + }, + { + "epoch": 0.79, + "learning_rate": 6.481662839696626e-05, + "loss": 3.2526, + "step": 45570 + }, + { + "epoch": 0.79, + "learning_rate": 6.476556456007242e-05, + "loss": 3.0343, + "step": 45575 + }, + { + "epoch": 0.79, + "learning_rate": 6.47145184116027e-05, + "loss": 3.268, + "step": 45580 + }, + { + "epoch": 0.79, + "learning_rate": 6.466348995539531e-05, + "loss": 3.0679, + "step": 45585 + }, + { + "epoch": 0.79, + "learning_rate": 6.46124791952873e-05, + "loss": 3.1898, + "step": 45590 + }, + { + "epoch": 0.79, + "learning_rate": 6.456148613511462e-05, + "loss": 3.0314, + "step": 45595 + }, + { + "epoch": 0.79, + "learning_rate": 6.451051077871154e-05, + "loss": 3.1434, + "step": 45600 + }, + { + "epoch": 0.79, + "learning_rate": 6.445955312991122e-05, + "loss": 3.2544, + "step": 45605 + }, + { + "epoch": 0.79, + "learning_rate": 6.440861319254536e-05, + "loss": 3.0762, + "step": 45610 + }, + { + "epoch": 0.79, + "learning_rate": 6.435769097044431e-05, + "loss": 2.9781, + "step": 45615 + }, + { + "epoch": 0.79, + "learning_rate": 6.430678646743735e-05, + "loss": 3.1502, + "step": 45620 + }, + { + "epoch": 0.79, + "learning_rate": 6.425589968735217e-05, + "loss": 3.0586, + "step": 45625 + }, + { + "epoch": 0.79, + "learning_rate": 6.420503063401511e-05, + "loss": 3.0531, + "step": 45630 + }, + { + "epoch": 0.79, + "learning_rate": 6.415417931125134e-05, + "loss": 3.0447, + "step": 45635 + }, + { + "epoch": 0.79, + "learning_rate": 6.41033457228845e-05, + "loss": 3.1198, + "step": 45640 + }, + { + "epoch": 0.79, + "learning_rate": 6.405252987273723e-05, + "loss": 3.1565, + "step": 45645 + }, + { + "epoch": 0.79, + "learning_rate": 6.400173176463047e-05, + "loss": 3.1355, + "step": 45650 + }, + { + "epoch": 0.79, + "learning_rate": 6.395095140238393e-05, + "loss": 3.0957, + "step": 45655 + }, + { + "epoch": 0.79, + "learning_rate": 6.390018878981627e-05, + "loss": 3.0195, + "step": 45660 + }, + { + "epoch": 0.79, + "learning_rate": 6.384944393074434e-05, + "loss": 3.1708, + "step": 45665 + }, + { + "epoch": 0.79, + "learning_rate": 6.379871682898392e-05, + "loss": 3.0612, + "step": 45670 + }, + { + "epoch": 0.79, + "learning_rate": 6.374800748834953e-05, + "loss": 3.1129, + "step": 45675 + }, + { + "epoch": 0.79, + "learning_rate": 6.369731591265423e-05, + "loss": 3.1009, + "step": 45680 + }, + { + "epoch": 0.79, + "learning_rate": 6.364664210570973e-05, + "loss": 3.0948, + "step": 45685 + }, + { + "epoch": 0.79, + "learning_rate": 6.359598607132644e-05, + "loss": 3.0709, + "step": 45690 + }, + { + "epoch": 0.79, + "learning_rate": 6.35453478133134e-05, + "loss": 3.043, + "step": 45695 + }, + { + "epoch": 0.79, + "learning_rate": 6.349472733547844e-05, + "loss": 3.1692, + "step": 45700 + }, + { + "epoch": 0.79, + "learning_rate": 6.344412464162795e-05, + "loss": 3.247, + "step": 45705 + }, + { + "epoch": 0.79, + "learning_rate": 6.33935397355669e-05, + "loss": 3.1907, + "step": 45710 + }, + { + "epoch": 0.79, + "learning_rate": 6.334297262109912e-05, + "loss": 3.3283, + "step": 45715 + }, + { + "epoch": 0.79, + "learning_rate": 6.329242330202686e-05, + "loss": 3.2082, + "step": 45720 + }, + { + "epoch": 0.79, + "learning_rate": 6.324189178215133e-05, + "loss": 3.1577, + "step": 45725 + }, + { + "epoch": 0.79, + "learning_rate": 6.319137806527219e-05, + "loss": 3.096, + "step": 45730 + }, + { + "epoch": 0.79, + "learning_rate": 6.314088215518776e-05, + "loss": 3.1203, + "step": 45735 + }, + { + "epoch": 0.79, + "learning_rate": 6.309040405569515e-05, + "loss": 3.082, + "step": 45740 + }, + { + "epoch": 0.79, + "learning_rate": 6.303994377058999e-05, + "loss": 3.1651, + "step": 45745 + }, + { + "epoch": 0.79, + "learning_rate": 6.298950130366659e-05, + "loss": 3.2569, + "step": 45750 + }, + { + "epoch": 0.79, + "learning_rate": 6.293907665871808e-05, + "loss": 3.0798, + "step": 45755 + }, + { + "epoch": 0.79, + "learning_rate": 6.288866983953612e-05, + "loss": 3.2079, + "step": 45760 + }, + { + "epoch": 0.79, + "learning_rate": 6.283828084991102e-05, + "loss": 3.2477, + "step": 45765 + }, + { + "epoch": 0.79, + "learning_rate": 6.278790969363175e-05, + "loss": 3.2101, + "step": 45770 + }, + { + "epoch": 0.79, + "learning_rate": 6.27375563744859e-05, + "loss": 3.2387, + "step": 45775 + }, + { + "epoch": 0.79, + "learning_rate": 6.268722089625992e-05, + "loss": 3.1574, + "step": 45780 + }, + { + "epoch": 0.79, + "learning_rate": 6.263690326273874e-05, + "loss": 3.1126, + "step": 45785 + }, + { + "epoch": 0.79, + "learning_rate": 6.258660347770597e-05, + "loss": 3.2076, + "step": 45790 + }, + { + "epoch": 0.79, + "learning_rate": 6.253632154494388e-05, + "loss": 3.1809, + "step": 45795 + }, + { + "epoch": 0.79, + "learning_rate": 6.248605746823339e-05, + "loss": 3.131, + "step": 45800 + }, + { + "epoch": 0.79, + "learning_rate": 6.243581125135417e-05, + "loss": 3.1605, + "step": 45805 + }, + { + "epoch": 0.79, + "learning_rate": 6.238558289808456e-05, + "loss": 3.1935, + "step": 45810 + }, + { + "epoch": 0.79, + "learning_rate": 6.233537241220117e-05, + "loss": 3.1822, + "step": 45815 + }, + { + "epoch": 0.79, + "learning_rate": 6.228517979747987e-05, + "loss": 3.1973, + "step": 45820 + }, + { + "epoch": 0.79, + "learning_rate": 6.223500505769476e-05, + "loss": 3.0712, + "step": 45825 + }, + { + "epoch": 0.79, + "learning_rate": 6.218484819661864e-05, + "loss": 3.2074, + "step": 45830 + }, + { + "epoch": 0.79, + "learning_rate": 6.213470921802335e-05, + "loss": 3.1901, + "step": 45835 + }, + { + "epoch": 0.79, + "learning_rate": 6.208458812567873e-05, + "loss": 3.1254, + "step": 45840 + }, + { + "epoch": 0.79, + "learning_rate": 6.203448492335385e-05, + "loss": 3.2218, + "step": 45845 + }, + { + "epoch": 0.79, + "learning_rate": 6.198439961481615e-05, + "loss": 3.194, + "step": 45850 + }, + { + "epoch": 0.79, + "learning_rate": 6.193433220383174e-05, + "loss": 3.1396, + "step": 45855 + }, + { + "epoch": 0.79, + "learning_rate": 6.188428269416561e-05, + "loss": 3.2048, + "step": 45860 + }, + { + "epoch": 0.79, + "learning_rate": 6.183425108958094e-05, + "loss": 3.085, + "step": 45865 + }, + { + "epoch": 0.79, + "learning_rate": 6.178423739384006e-05, + "loss": 3.1692, + "step": 45870 + }, + { + "epoch": 0.79, + "learning_rate": 6.173424161070372e-05, + "loss": 3.1756, + "step": 45875 + }, + { + "epoch": 0.79, + "learning_rate": 6.168426374393125e-05, + "loss": 3.1431, + "step": 45880 + }, + { + "epoch": 0.79, + "learning_rate": 6.163430379728086e-05, + "loss": 3.2415, + "step": 45885 + }, + { + "epoch": 0.79, + "learning_rate": 6.158436177450931e-05, + "loss": 3.27, + "step": 45890 + }, + { + "epoch": 0.79, + "learning_rate": 6.153443767937175e-05, + "loss": 3.0112, + "step": 45895 + }, + { + "epoch": 0.79, + "learning_rate": 6.148453151562243e-05, + "loss": 3.1826, + "step": 45900 + }, + { + "epoch": 0.79, + "learning_rate": 6.143464328701394e-05, + "loss": 3.2231, + "step": 45905 + }, + { + "epoch": 0.79, + "learning_rate": 6.13847729972976e-05, + "loss": 3.1354, + "step": 45910 + }, + { + "epoch": 0.79, + "learning_rate": 6.133492065022358e-05, + "loss": 3.0341, + "step": 45915 + }, + { + "epoch": 0.79, + "learning_rate": 6.128508624954025e-05, + "loss": 3.1803, + "step": 45920 + }, + { + "epoch": 0.79, + "learning_rate": 6.12352697989951e-05, + "loss": 3.0783, + "step": 45925 + }, + { + "epoch": 0.79, + "learning_rate": 6.118547130233401e-05, + "loss": 3.1693, + "step": 45930 + }, + { + "epoch": 0.79, + "learning_rate": 6.113569076330152e-05, + "loss": 3.1872, + "step": 45935 + }, + { + "epoch": 0.8, + "learning_rate": 6.108592818564105e-05, + "loss": 3.1872, + "step": 45940 + }, + { + "epoch": 0.8, + "learning_rate": 6.103618357309423e-05, + "loss": 3.1362, + "step": 45945 + }, + { + "epoch": 0.8, + "learning_rate": 6.0986456929401795e-05, + "loss": 3.0724, + "step": 45950 + }, + { + "epoch": 0.8, + "learning_rate": 6.09367482583029e-05, + "loss": 3.2822, + "step": 45955 + }, + { + "epoch": 0.8, + "learning_rate": 6.0887057563535266e-05, + "loss": 3.1595, + "step": 45960 + }, + { + "epoch": 0.8, + "learning_rate": 6.0837384848835634e-05, + "loss": 3.0659, + "step": 45965 + }, + { + "epoch": 0.8, + "learning_rate": 6.0787730117938885e-05, + "loss": 3.192, + "step": 45970 + }, + { + "epoch": 0.8, + "learning_rate": 6.073809337457879e-05, + "loss": 3.1099, + "step": 45975 + }, + { + "epoch": 0.8, + "learning_rate": 6.068847462248797e-05, + "loss": 3.1255, + "step": 45980 + }, + { + "epoch": 0.8, + "learning_rate": 6.063887386539741e-05, + "loss": 3.2569, + "step": 45985 + }, + { + "epoch": 0.8, + "learning_rate": 6.058929110703685e-05, + "loss": 3.1493, + "step": 45990 + }, + { + "epoch": 0.8, + "learning_rate": 6.0539726351134585e-05, + "loss": 3.1143, + "step": 45995 + }, + { + "epoch": 0.8, + "learning_rate": 6.049017960141767e-05, + "loss": 3.1716, + "step": 46000 + }, + { + "epoch": 0.8, + "eval_loss": 3.1786859035491943, + "eval_runtime": 150.5699, + "eval_samples_per_second": 12.227, + "eval_steps_per_second": 0.77, + "step": 46000 + }, + { + "epoch": 0.8, + "learning_rate": 6.04406508616118e-05, + "loss": 3.0864, + "step": 46005 + }, + { + "epoch": 0.8, + "learning_rate": 6.0391140135441295e-05, + "loss": 3.2294, + "step": 46010 + }, + { + "epoch": 0.8, + "learning_rate": 6.0341647426629056e-05, + "loss": 3.2553, + "step": 46015 + }, + { + "epoch": 0.8, + "learning_rate": 6.029217273889672e-05, + "loss": 3.1414, + "step": 46020 + }, + { + "epoch": 0.8, + "learning_rate": 6.0242716075964424e-05, + "loss": 3.1521, + "step": 46025 + }, + { + "epoch": 0.8, + "learning_rate": 6.0193277441551244e-05, + "loss": 3.2215, + "step": 46030 + }, + { + "epoch": 0.8, + "learning_rate": 6.014385683937457e-05, + "loss": 3.1964, + "step": 46035 + }, + { + "epoch": 0.8, + "learning_rate": 6.009445427315066e-05, + "loss": 2.9809, + "step": 46040 + }, + { + "epoch": 0.8, + "learning_rate": 6.0045069746594275e-05, + "loss": 3.1355, + "step": 46045 + }, + { + "epoch": 0.8, + "learning_rate": 5.999570326341888e-05, + "loss": 3.1077, + "step": 46050 + }, + { + "epoch": 0.8, + "learning_rate": 5.994635482733654e-05, + "loss": 3.167, + "step": 46055 + }, + { + "epoch": 0.8, + "learning_rate": 5.989702444205812e-05, + "loss": 3.2413, + "step": 46060 + }, + { + "epoch": 0.8, + "learning_rate": 5.9847712111292946e-05, + "loss": 3.1135, + "step": 46065 + }, + { + "epoch": 0.8, + "learning_rate": 5.9798417838749094e-05, + "loss": 3.2011, + "step": 46070 + }, + { + "epoch": 0.8, + "learning_rate": 5.974914162813319e-05, + "loss": 3.1403, + "step": 46075 + }, + { + "epoch": 0.8, + "learning_rate": 5.969988348315048e-05, + "loss": 3.1994, + "step": 46080 + }, + { + "epoch": 0.8, + "learning_rate": 5.965064340750513e-05, + "loss": 3.1759, + "step": 46085 + }, + { + "epoch": 0.8, + "learning_rate": 5.960142140489967e-05, + "loss": 3.1331, + "step": 46090 + }, + { + "epoch": 0.8, + "learning_rate": 5.955221747903512e-05, + "loss": 3.2708, + "step": 46095 + }, + { + "epoch": 0.8, + "learning_rate": 5.9503031633611654e-05, + "loss": 3.103, + "step": 46100 + }, + { + "epoch": 0.8, + "learning_rate": 5.94538638723276e-05, + "loss": 3.1251, + "step": 46105 + }, + { + "epoch": 0.8, + "learning_rate": 5.9404714198880275e-05, + "loss": 3.1968, + "step": 46110 + }, + { + "epoch": 0.8, + "learning_rate": 5.9355582616965425e-05, + "loss": 3.0474, + "step": 46115 + }, + { + "epoch": 0.8, + "learning_rate": 5.9306469130277456e-05, + "loss": 3.168, + "step": 46120 + }, + { + "epoch": 0.8, + "learning_rate": 5.925737374250949e-05, + "loss": 3.1837, + "step": 46125 + }, + { + "epoch": 0.8, + "learning_rate": 5.920829645735322e-05, + "loss": 3.1468, + "step": 46130 + }, + { + "epoch": 0.8, + "learning_rate": 5.9159237278498947e-05, + "loss": 3.0686, + "step": 46135 + }, + { + "epoch": 0.8, + "learning_rate": 5.911019620963582e-05, + "loss": 3.1152, + "step": 46140 + }, + { + "epoch": 0.8, + "learning_rate": 5.906117325445141e-05, + "loss": 3.2782, + "step": 46145 + }, + { + "epoch": 0.8, + "learning_rate": 5.901216841663194e-05, + "loss": 3.089, + "step": 46150 + }, + { + "epoch": 0.8, + "learning_rate": 5.89631816998624e-05, + "loss": 3.1593, + "step": 46155 + }, + { + "epoch": 0.8, + "learning_rate": 5.891421310782621e-05, + "loss": 3.2204, + "step": 46160 + }, + { + "epoch": 0.8, + "learning_rate": 5.886526264420573e-05, + "loss": 3.1318, + "step": 46165 + }, + { + "epoch": 0.8, + "learning_rate": 5.881633031268179e-05, + "loss": 3.1237, + "step": 46170 + }, + { + "epoch": 0.8, + "learning_rate": 5.876741611693361e-05, + "loss": 3.0531, + "step": 46175 + }, + { + "epoch": 0.8, + "learning_rate": 5.8718520060639494e-05, + "loss": 3.1773, + "step": 46180 + }, + { + "epoch": 0.8, + "learning_rate": 5.866964214747615e-05, + "loss": 3.1155, + "step": 46185 + }, + { + "epoch": 0.8, + "learning_rate": 5.8620782381118826e-05, + "loss": 3.1278, + "step": 46190 + }, + { + "epoch": 0.8, + "learning_rate": 5.8571940765241755e-05, + "loss": 3.0726, + "step": 46195 + }, + { + "epoch": 0.8, + "learning_rate": 5.85231173035173e-05, + "loss": 3.2045, + "step": 46200 + }, + { + "epoch": 0.8, + "learning_rate": 5.8474311999616954e-05, + "loss": 3.1445, + "step": 46205 + }, + { + "epoch": 0.8, + "learning_rate": 5.842552485721055e-05, + "loss": 3.2156, + "step": 46210 + }, + { + "epoch": 0.8, + "learning_rate": 5.8376755879966554e-05, + "loss": 3.1789, + "step": 46215 + }, + { + "epoch": 0.8, + "learning_rate": 5.8328005071552364e-05, + "loss": 3.1581, + "step": 46220 + }, + { + "epoch": 0.8, + "learning_rate": 5.827927243563348e-05, + "loss": 3.2274, + "step": 46225 + }, + { + "epoch": 0.8, + "learning_rate": 5.823055797587458e-05, + "loss": 3.0204, + "step": 46230 + }, + { + "epoch": 0.8, + "learning_rate": 5.8181861695938695e-05, + "loss": 3.1658, + "step": 46235 + }, + { + "epoch": 0.8, + "learning_rate": 5.813318359948742e-05, + "loss": 3.0678, + "step": 46240 + }, + { + "epoch": 0.8, + "learning_rate": 5.8084523690181336e-05, + "loss": 3.1534, + "step": 46245 + }, + { + "epoch": 0.8, + "learning_rate": 5.803588197167922e-05, + "loss": 3.1554, + "step": 46250 + }, + { + "epoch": 0.8, + "learning_rate": 5.798725844763864e-05, + "loss": 3.1348, + "step": 46255 + }, + { + "epoch": 0.8, + "learning_rate": 5.793865312171604e-05, + "loss": 3.1477, + "step": 46260 + }, + { + "epoch": 0.8, + "learning_rate": 5.7890065997566126e-05, + "loss": 3.2401, + "step": 46265 + }, + { + "epoch": 0.8, + "learning_rate": 5.784149707884248e-05, + "loss": 3.2408, + "step": 46270 + }, + { + "epoch": 0.8, + "learning_rate": 5.779294636919721e-05, + "loss": 3.1458, + "step": 46275 + }, + { + "epoch": 0.8, + "learning_rate": 5.774441387228104e-05, + "loss": 3.0876, + "step": 46280 + }, + { + "epoch": 0.8, + "learning_rate": 5.769589959174343e-05, + "loss": 3.081, + "step": 46285 + }, + { + "epoch": 0.8, + "learning_rate": 5.7647403531232415e-05, + "loss": 3.1654, + "step": 46290 + }, + { + "epoch": 0.8, + "learning_rate": 5.7598925694394605e-05, + "loss": 3.1191, + "step": 46295 + }, + { + "epoch": 0.8, + "learning_rate": 5.755046608487528e-05, + "loss": 3.1663, + "step": 46300 + }, + { + "epoch": 0.8, + "learning_rate": 5.750202470631833e-05, + "loss": 3.1126, + "step": 46305 + }, + { + "epoch": 0.8, + "learning_rate": 5.74536015623664e-05, + "loss": 3.1349, + "step": 46310 + }, + { + "epoch": 0.8, + "learning_rate": 5.740519665666059e-05, + "loss": 3.1206, + "step": 46315 + }, + { + "epoch": 0.8, + "learning_rate": 5.7356809992840715e-05, + "loss": 3.078, + "step": 46320 + }, + { + "epoch": 0.8, + "learning_rate": 5.730844157454523e-05, + "loss": 3.0895, + "step": 46325 + }, + { + "epoch": 0.8, + "learning_rate": 5.7260091405411124e-05, + "loss": 3.2058, + "step": 46330 + }, + { + "epoch": 0.8, + "learning_rate": 5.721175948907406e-05, + "loss": 3.2039, + "step": 46335 + }, + { + "epoch": 0.8, + "learning_rate": 5.71634458291685e-05, + "loss": 3.1625, + "step": 46340 + }, + { + "epoch": 0.8, + "learning_rate": 5.71151504293273e-05, + "loss": 3.1704, + "step": 46345 + }, + { + "epoch": 0.8, + "learning_rate": 5.706687329318202e-05, + "loss": 3.112, + "step": 46350 + }, + { + "epoch": 0.8, + "learning_rate": 5.701861442436285e-05, + "loss": 3.1327, + "step": 46355 + }, + { + "epoch": 0.8, + "learning_rate": 5.6970373826498536e-05, + "loss": 3.1424, + "step": 46360 + }, + { + "epoch": 0.8, + "learning_rate": 5.692215150321667e-05, + "loss": 3.1835, + "step": 46365 + }, + { + "epoch": 0.8, + "learning_rate": 5.6873947458143255e-05, + "loss": 3.0898, + "step": 46370 + }, + { + "epoch": 0.8, + "learning_rate": 5.682576169490302e-05, + "loss": 3.0496, + "step": 46375 + }, + { + "epoch": 0.8, + "learning_rate": 5.6777594217119224e-05, + "loss": 3.1112, + "step": 46380 + }, + { + "epoch": 0.8, + "learning_rate": 5.672944502841379e-05, + "loss": 3.201, + "step": 46385 + }, + { + "epoch": 0.8, + "learning_rate": 5.668131413240744e-05, + "loss": 3.0926, + "step": 46390 + }, + { + "epoch": 0.8, + "learning_rate": 5.6633201532719256e-05, + "loss": 3.1919, + "step": 46395 + }, + { + "epoch": 0.8, + "learning_rate": 5.6585107232967095e-05, + "loss": 3.2124, + "step": 46400 + }, + { + "epoch": 0.8, + "learning_rate": 5.653703123676738e-05, + "loss": 3.2094, + "step": 46405 + }, + { + "epoch": 0.8, + "learning_rate": 5.6488973547735184e-05, + "loss": 3.0477, + "step": 46410 + }, + { + "epoch": 0.8, + "learning_rate": 5.6440934169484106e-05, + "loss": 3.1221, + "step": 46415 + }, + { + "epoch": 0.8, + "learning_rate": 5.639291310562667e-05, + "loss": 3.0849, + "step": 46420 + }, + { + "epoch": 0.8, + "learning_rate": 5.634491035977371e-05, + "loss": 3.053, + "step": 46425 + }, + { + "epoch": 0.8, + "learning_rate": 5.6296925935534746e-05, + "loss": 3.1511, + "step": 46430 + }, + { + "epoch": 0.8, + "learning_rate": 5.624895983651801e-05, + "loss": 3.1053, + "step": 46435 + }, + { + "epoch": 0.8, + "learning_rate": 5.620101206633021e-05, + "loss": 3.2083, + "step": 46440 + }, + { + "epoch": 0.8, + "learning_rate": 5.6153082628576937e-05, + "loss": 3.1903, + "step": 46445 + }, + { + "epoch": 0.8, + "learning_rate": 5.610517152686212e-05, + "loss": 3.1463, + "step": 46450 + }, + { + "epoch": 0.8, + "learning_rate": 5.605727876478852e-05, + "loss": 3.1161, + "step": 46455 + }, + { + "epoch": 0.8, + "learning_rate": 5.600940434595733e-05, + "loss": 3.1253, + "step": 46460 + }, + { + "epoch": 0.8, + "learning_rate": 5.596154827396842e-05, + "loss": 3.2649, + "step": 46465 + }, + { + "epoch": 0.8, + "learning_rate": 5.591371055242049e-05, + "loss": 3.1203, + "step": 46470 + }, + { + "epoch": 0.8, + "learning_rate": 5.5865891184910706e-05, + "loss": 3.1973, + "step": 46475 + }, + { + "epoch": 0.8, + "learning_rate": 5.581809017503457e-05, + "loss": 3.196, + "step": 46480 + }, + { + "epoch": 0.8, + "learning_rate": 5.5770307526386703e-05, + "loss": 3.0937, + "step": 46485 + }, + { + "epoch": 0.8, + "learning_rate": 5.572254324256004e-05, + "loss": 3.1002, + "step": 46490 + }, + { + "epoch": 0.8, + "learning_rate": 5.567479732714616e-05, + "loss": 3.2265, + "step": 46495 + }, + { + "epoch": 0.8, + "learning_rate": 5.5627069783735534e-05, + "loss": 3.0773, + "step": 46500 + }, + { + "epoch": 0.8, + "eval_loss": 3.175149440765381, + "eval_runtime": 150.553, + "eval_samples_per_second": 12.228, + "eval_steps_per_second": 0.77, + "step": 46500 + } + ], + "logging_steps": 5, + "max_steps": 57783, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 1.3814529059671327e+18, + "trial_name": null, + "trial_params": null +}