{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 3360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 0.00013451892828543385, "loss": 3.9488, "step": 5 }, { "epoch": 0.18, "learning_rate": 0.00012668528006706028, "loss": 3.8298, "step": 10 }, { "epoch": 0.27, "learning_rate": 0.00011431137524750748, "loss": 3.7557, "step": 15 }, { "epoch": 0.36, "learning_rate": 9.836442450346448e-05, "loss": 3.5311, "step": 20 }, { "epoch": 0.45, "learning_rate": 8.009092691870492e-05, "loss": 3.3526, "step": 25 }, { "epoch": 0.54, "learning_rate": 6.0919236939313083e-05, "loss": 3.5934, "step": 30 }, { "epoch": 0.62, "learning_rate": 4.2347916539754844e-05, "loss": 3.3983, "step": 35 }, { "epoch": 0.71, "learning_rate": 2.5828599592490882e-05, "loss": 3.6609, "step": 40 }, { "epoch": 0.8, "learning_rate": 1.2652524389394753e-05, "loss": 3.4898, "step": 45 }, { "epoch": 0.89, "learning_rate": 3.849603540845984e-06, "loss": 3.5749, "step": 50 }, { "epoch": 0.98, "learning_rate": 1.0792048977778093e-07, "loss": 3.4588, "step": 55 }, { "epoch": 1.07, "learning_rate": 1.7199452243268996e-06, "loss": 3.2536, "step": 60 }, { "epoch": 1.16, "learning_rate": 8.559673257059505e-06, "loss": 3.2439, "step": 65 }, { "epoch": 1.25, "learning_rate": 2.009247481060283e-05, "loss": 3.2968, "step": 70 }, { "epoch": 1.34, "learning_rate": 3.541688434458052e-05, "loss": 3.4346, "step": 75 }, { "epoch": 1.43, "learning_rate": 5.333506393059682e-05, "loss": 3.3563, "step": 80 }, { "epoch": 1.52, "learning_rate": 7.244643268047132e-05, "loss": 3.2696, "step": 85 }, { "epoch": 1.61, "learning_rate": 9.125714365012444e-05, "loss": 3.4046, "step": 90 }, { "epoch": 1.7, "learning_rate": 0.00010829685091793463, "loss": 3.4708, "step": 95 }, { "epoch": 1.79, "learning_rate": 0.00012223363969730684, "loss": 3.2387, "step": 100 }, { "epoch": 1.88, "learning_rate": 0.00013197813593027427, "loss": 3.3163, "step": 105 }, { "epoch": 1.96, "learning_rate": 0.00013676865759867644, "loss": 3.2581, "step": 110 }, { "epoch": 2.05, "learning_rate": 0.000136230751870351, "loss": 3.1941, "step": 115 }, { "epoch": 2.14, "learning_rate": 0.00013040646433810595, "loss": 3.0392, "step": 120 }, { "epoch": 2.23, "learning_rate": 0.00011975105251098516, "loss": 3.0188, "step": 125 }, { "epoch": 2.32, "learning_rate": 0.00010509740044895205, "loss": 3.1013, "step": 130 }, { "epoch": 2.41, "learning_rate": 8.759091608374473e-05, "loss": 3.0585, "step": 135 }, { "epoch": 2.5, "learning_rate": 6.860000000000001e-05, "loss": 3.1302, "step": 140 }, { "epoch": 2.59, "learning_rate": 4.9609083916255386e-05, "loss": 3.2358, "step": 145 }, { "epoch": 2.68, "learning_rate": 3.210259955104798e-05, "loss": 3.119, "step": 150 }, { "epoch": 2.77, "learning_rate": 1.744894748901483e-05, "loss": 2.9946, "step": 155 }, { "epoch": 2.86, "learning_rate": 6.793535661894062e-06, "loss": 3.0184, "step": 160 }, { "epoch": 2.95, "learning_rate": 9.692481296490106e-07, "loss": 2.9798, "step": 165 }, { "epoch": 3.04, "learning_rate": 4.313424013235498e-07, "loss": 3.1282, "step": 170 }, { "epoch": 3.12, "learning_rate": 5.22186406972573e-06, "loss": 3.0772, "step": 175 }, { "epoch": 3.21, "learning_rate": 1.496636030269314e-05, "loss": 2.8216, "step": 180 }, { "epoch": 3.3, "learning_rate": 2.890314908206528e-05, "loss": 2.7665, "step": 185 }, { "epoch": 3.39, "learning_rate": 4.594285634987545e-05, "loss": 3.0073, "step": 190 }, { "epoch": 3.48, "learning_rate": 6.475356731952864e-05, "loss": 3.0372, "step": 195 }, { "epoch": 3.57, "learning_rate": 8.386493606940314e-05, "loss": 2.807, "step": 200 }, { "epoch": 3.66, "learning_rate": 0.0001017831156554194, "loss": 3.1058, "step": 205 }, { "epoch": 3.75, "learning_rate": 0.0001171075251893971, "loss": 2.961, "step": 210 }, { "epoch": 3.84, "learning_rate": 0.0001286403267429405, "loss": 3.1032, "step": 215 }, { "epoch": 3.93, "learning_rate": 0.0001354800547756731, "loss": 2.7667, "step": 220 }, { "epoch": 4.02, "learning_rate": 0.00013709207951022223, "loss": 3.0024, "step": 225 }, { "epoch": 4.11, "learning_rate": 0.00013335039645915404, "loss": 2.8538, "step": 230 }, { "epoch": 4.2, "learning_rate": 0.00012454747561060531, "loss": 2.8202, "step": 235 }, { "epoch": 4.29, "learning_rate": 0.00011137140040750914, "loss": 2.6845, "step": 240 }, { "epoch": 4.38, "learning_rate": 9.485208346024522e-05, "loss": 2.6865, "step": 245 }, { "epoch": 4.46, "learning_rate": 7.62807630606869e-05, "loss": 2.8686, "step": 250 }, { "epoch": 4.55, "learning_rate": 5.710907308129509e-05, "loss": 2.9936, "step": 255 }, { "epoch": 4.64, "learning_rate": 3.883557549653544e-05, "loss": 2.5979, "step": 260 }, { "epoch": 4.73, "learning_rate": 2.2888624752492583e-05, "loss": 2.7179, "step": 265 }, { "epoch": 4.82, "learning_rate": 1.0514719932939762e-05, "loss": 2.9387, "step": 270 }, { "epoch": 4.91, "learning_rate": 2.681071714566175e-06, "loss": 2.6822, "step": 275 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 2.7684, "step": 280 }, { "epoch": 5.09, "learning_rate": 2.6810717145661523e-06, "loss": 2.6722, "step": 285 }, { "epoch": 5.18, "learning_rate": 1.0514719932939649e-05, "loss": 2.597, "step": 290 }, { "epoch": 5.27, "learning_rate": 2.2888624752492607e-05, "loss": 2.7343, "step": 295 }, { "epoch": 5.36, "learning_rate": 3.8835575496535365e-05, "loss": 2.567, "step": 300 }, { "epoch": 5.45, "learning_rate": 5.7109073081294886e-05, "loss": 2.6375, "step": 305 }, { "epoch": 5.54, "learning_rate": 7.628076306068694e-05, "loss": 2.734, "step": 310 }, { "epoch": 5.62, "learning_rate": 9.485208346024515e-05, "loss": 2.6448, "step": 315 }, { "epoch": 5.71, "learning_rate": 0.00011137140040750908, "loss": 2.6255, "step": 320 }, { "epoch": 5.8, "learning_rate": 0.0001245474756106052, "loss": 2.6455, "step": 325 }, { "epoch": 5.89, "learning_rate": 0.00013335039645915407, "loss": 2.5969, "step": 330 }, { "epoch": 5.98, "learning_rate": 0.00013709207951022223, "loss": 2.6923, "step": 335 }, { "epoch": 6.07, "learning_rate": 0.00013548005477567314, "loss": 2.3761, "step": 340 }, { "epoch": 6.16, "learning_rate": 0.00012864032674294047, "loss": 2.4563, "step": 345 }, { "epoch": 6.25, "learning_rate": 0.00011710752518939715, "loss": 2.4791, "step": 350 }, { "epoch": 6.34, "learning_rate": 0.00010178311565541947, "loss": 2.446, "step": 355 }, { "epoch": 6.43, "learning_rate": 8.386493606940322e-05, "loss": 2.5515, "step": 360 }, { "epoch": 6.52, "learning_rate": 6.475356731952872e-05, "loss": 2.5469, "step": 365 }, { "epoch": 6.61, "learning_rate": 4.594285634987565e-05, "loss": 2.6391, "step": 370 }, { "epoch": 6.7, "learning_rate": 2.890314908206545e-05, "loss": 2.36, "step": 375 }, { "epoch": 6.79, "learning_rate": 1.496636030269327e-05, "loss": 2.4806, "step": 380 }, { "epoch": 6.88, "learning_rate": 5.221864069725715e-06, "loss": 2.6083, "step": 385 }, { "epoch": 6.96, "learning_rate": 4.3134240132355735e-07, "loss": 2.6457, "step": 390 }, { "epoch": 7.05, "learning_rate": 9.692481296490106e-07, "loss": 2.4165, "step": 395 }, { "epoch": 7.14, "learning_rate": 6.793535661894024e-06, "loss": 2.301, "step": 400 }, { "epoch": 7.23, "learning_rate": 1.744894748901478e-05, "loss": 2.4478, "step": 405 }, { "epoch": 7.32, "learning_rate": 3.2102599551047805e-05, "loss": 2.3692, "step": 410 }, { "epoch": 7.41, "learning_rate": 4.960908391625518e-05, "loss": 2.3269, "step": 415 }, { "epoch": 7.5, "learning_rate": 6.859999999999982e-05, "loss": 2.1706, "step": 420 }, { "epoch": 7.59, "learning_rate": 8.759091608374469e-05, "loss": 2.3618, "step": 425 }, { "epoch": 7.68, "learning_rate": 0.00010509740044895209, "loss": 2.284, "step": 430 }, { "epoch": 7.77, "learning_rate": 0.00011975105251098514, "loss": 2.3587, "step": 435 }, { "epoch": 7.86, "learning_rate": 0.00013040646433810593, "loss": 2.4467, "step": 440 }, { "epoch": 7.95, "learning_rate": 0.000136230751870351, "loss": 2.5326, "step": 445 }, { "epoch": 8.04, "learning_rate": 0.00013676865759867642, "loss": 2.3045, "step": 450 }, { "epoch": 8.12, "learning_rate": 0.00013197813593027432, "loss": 2.1819, "step": 455 }, { "epoch": 8.21, "learning_rate": 0.00012223363969730697, "loss": 2.2893, "step": 460 }, { "epoch": 8.3, "learning_rate": 0.00010829685091793466, "loss": 2.3117, "step": 465 }, { "epoch": 8.39, "learning_rate": 9.12571436501247e-05, "loss": 2.272, "step": 470 }, { "epoch": 8.48, "learning_rate": 7.24464326804714e-05, "loss": 2.3461, "step": 475 }, { "epoch": 8.57, "learning_rate": 5.33350639305969e-05, "loss": 2.1348, "step": 480 }, { "epoch": 8.66, "learning_rate": 3.541688434458043e-05, "loss": 2.2985, "step": 485 }, { "epoch": 8.75, "learning_rate": 2.0092474810602934e-05, "loss": 2.077, "step": 490 }, { "epoch": 8.84, "learning_rate": 8.559673257059573e-06, "loss": 2.2565, "step": 495 }, { "epoch": 8.93, "learning_rate": 1.719945224326892e-06, "loss": 2.1992, "step": 500 }, { "epoch": 9.02, "learning_rate": 1.0792048977777332e-07, "loss": 2.1455, "step": 505 }, { "epoch": 9.11, "learning_rate": 3.849603540845977e-06, "loss": 2.1314, "step": 510 }, { "epoch": 9.2, "learning_rate": 1.2652524389394722e-05, "loss": 1.9046, "step": 515 }, { "epoch": 9.29, "learning_rate": 2.582859959249101e-05, "loss": 2.0235, "step": 520 }, { "epoch": 9.38, "learning_rate": 4.234791653975475e-05, "loss": 2.0746, "step": 525 }, { "epoch": 9.46, "learning_rate": 6.091923693931295e-05, "loss": 2.0545, "step": 530 }, { "epoch": 9.55, "learning_rate": 8.0090926918705e-05, "loss": 2.0609, "step": 535 }, { "epoch": 9.64, "learning_rate": 9.83644245034643e-05, "loss": 2.1458, "step": 540 }, { "epoch": 9.73, "learning_rate": 0.00011431137524750748, "loss": 1.9214, "step": 545 }, { "epoch": 9.82, "learning_rate": 0.00012668528006706028, "loss": 2.2293, "step": 550 }, { "epoch": 9.91, "learning_rate": 0.00013451892828543387, "loss": 2.2592, "step": 555 }, { "epoch": 10.0, "learning_rate": 0.0001372, "loss": 2.1707, "step": 560 }, { "epoch": 10.09, "learning_rate": 0.00013451892828543393, "loss": 1.7839, "step": 565 }, { "epoch": 10.18, "learning_rate": 0.0001266852800670604, "loss": 1.931, "step": 570 }, { "epoch": 10.27, "learning_rate": 0.00011431137524750779, "loss": 1.981, "step": 575 }, { "epoch": 10.36, "learning_rate": 9.836442450346467e-05, "loss": 1.9186, "step": 580 }, { "epoch": 10.45, "learning_rate": 8.009092691870492e-05, "loss": 1.9439, "step": 585 }, { "epoch": 10.54, "learning_rate": 6.0919236939312867e-05, "loss": 1.7981, "step": 590 }, { "epoch": 10.62, "learning_rate": 4.23479165397549e-05, "loss": 1.8503, "step": 595 }, { "epoch": 10.71, "learning_rate": 2.5828599592491143e-05, "loss": 2.1472, "step": 600 }, { "epoch": 10.8, "learning_rate": 1.265252438939482e-05, "loss": 1.8879, "step": 605 }, { "epoch": 10.89, "learning_rate": 3.849603540846114e-06, "loss": 2.0268, "step": 610 }, { "epoch": 10.98, "learning_rate": 1.0792048977779616e-07, "loss": 2.0845, "step": 615 }, { "epoch": 11.07, "learning_rate": 1.7199452243269073e-06, "loss": 2.1026, "step": 620 }, { "epoch": 11.16, "learning_rate": 8.559673257059612e-06, "loss": 1.8359, "step": 625 }, { "epoch": 11.25, "learning_rate": 2.0092474810602812e-05, "loss": 1.6721, "step": 630 }, { "epoch": 11.34, "learning_rate": 3.541688434458027e-05, "loss": 1.6882, "step": 635 }, { "epoch": 11.43, "learning_rate": 5.333506393059674e-05, "loss": 1.7527, "step": 640 }, { "epoch": 11.52, "learning_rate": 7.244643268047099e-05, "loss": 1.7121, "step": 645 }, { "epoch": 11.61, "learning_rate": 9.125714365012432e-05, "loss": 1.8074, "step": 650 }, { "epoch": 11.7, "learning_rate": 0.00010829685091793471, "loss": 1.8594, "step": 655 }, { "epoch": 11.79, "learning_rate": 0.000122233639697307, "loss": 1.8073, "step": 660 }, { "epoch": 11.88, "learning_rate": 0.00013197813593027427, "loss": 1.8472, "step": 665 }, { "epoch": 11.96, "learning_rate": 0.00013676865759867642, "loss": 1.8812, "step": 670 }, { "epoch": 12.05, "learning_rate": 0.000136230751870351, "loss": 1.5812, "step": 675 }, { "epoch": 12.14, "learning_rate": 0.0001304064643381061, "loss": 1.6567, "step": 680 }, { "epoch": 12.23, "learning_rate": 0.00011975105251098525, "loss": 1.6487, "step": 685 }, { "epoch": 12.32, "learning_rate": 0.000105097400448952, "loss": 1.698, "step": 690 }, { "epoch": 12.41, "learning_rate": 8.759091608374439e-05, "loss": 1.7092, "step": 695 }, { "epoch": 12.5, "learning_rate": 6.859999999999999e-05, "loss": 1.5692, "step": 700 }, { "epoch": 12.59, "learning_rate": 4.960908391625558e-05, "loss": 1.7959, "step": 705 }, { "epoch": 12.68, "learning_rate": 3.210259955104795e-05, "loss": 1.769, "step": 710 }, { "epoch": 12.77, "learning_rate": 1.7448947489015055e-05, "loss": 1.672, "step": 715 }, { "epoch": 12.86, "learning_rate": 6.793535661894092e-06, "loss": 1.6729, "step": 720 }, { "epoch": 12.95, "learning_rate": 9.692481296490868e-07, "loss": 1.6378, "step": 725 }, { "epoch": 13.04, "learning_rate": 4.3134240132354215e-07, "loss": 1.5157, "step": 730 }, { "epoch": 13.12, "learning_rate": 5.221864069725745e-06, "loss": 1.5695, "step": 735 }, { "epoch": 13.21, "learning_rate": 1.496636030269301e-05, "loss": 1.4221, "step": 740 }, { "epoch": 13.3, "learning_rate": 2.890314908206531e-05, "loss": 1.5325, "step": 745 }, { "epoch": 13.39, "learning_rate": 4.5942856349875256e-05, "loss": 1.5675, "step": 750 }, { "epoch": 13.48, "learning_rate": 6.475356731952856e-05, "loss": 1.4491, "step": 755 }, { "epoch": 13.57, "learning_rate": 8.386493606940281e-05, "loss": 1.4567, "step": 760 }, { "epoch": 13.66, "learning_rate": 0.00010178311565541931, "loss": 1.4152, "step": 765 }, { "epoch": 13.75, "learning_rate": 0.00011710752518939722, "loss": 1.6904, "step": 770 }, { "epoch": 13.84, "learning_rate": 0.00012864032674294042, "loss": 1.7226, "step": 775 }, { "epoch": 13.93, "learning_rate": 0.0001354800547756731, "loss": 1.6131, "step": 780 }, { "epoch": 14.02, "learning_rate": 0.00013709207951022223, "loss": 1.4874, "step": 785 }, { "epoch": 14.11, "learning_rate": 0.00013335039645915404, "loss": 1.321, "step": 790 }, { "epoch": 14.2, "learning_rate": 0.00012454747561060542, "loss": 1.5378, "step": 795 }, { "epoch": 14.29, "learning_rate": 0.00011137140040750922, "loss": 1.5117, "step": 800 }, { "epoch": 14.38, "learning_rate": 9.485208346024507e-05, "loss": 1.3334, "step": 805 }, { "epoch": 14.46, "learning_rate": 7.62807630606871e-05, "loss": 1.418, "step": 810 }, { "epoch": 14.55, "learning_rate": 5.710907308129505e-05, "loss": 1.4658, "step": 815 }, { "epoch": 14.64, "learning_rate": 3.883557549653573e-05, "loss": 1.6222, "step": 820 }, { "epoch": 14.73, "learning_rate": 2.2888624752492553e-05, "loss": 1.4192, "step": 825 }, { "epoch": 14.82, "learning_rate": 1.0514719932939869e-05, "loss": 1.346, "step": 830 }, { "epoch": 14.91, "learning_rate": 2.681071714566198e-06, "loss": 1.3806, "step": 835 }, { "epoch": 15.0, "learning_rate": 0.0, "loss": 1.384, "step": 840 }, { "epoch": 15.09, "learning_rate": 2.6810717145661294e-06, "loss": 1.2256, "step": 845 }, { "epoch": 15.18, "learning_rate": 1.0514719932939732e-05, "loss": 1.3474, "step": 850 }, { "epoch": 15.27, "learning_rate": 2.2888624752492363e-05, "loss": 1.3215, "step": 855 }, { "epoch": 15.36, "learning_rate": 3.88355754965355e-05, "loss": 1.2651, "step": 860 }, { "epoch": 15.45, "learning_rate": 5.710907308129481e-05, "loss": 1.3224, "step": 865 }, { "epoch": 15.54, "learning_rate": 7.628076306068686e-05, "loss": 1.1728, "step": 870 }, { "epoch": 15.62, "learning_rate": 9.485208346024484e-05, "loss": 1.3578, "step": 875 }, { "epoch": 15.71, "learning_rate": 0.00011137140040750902, "loss": 1.1825, "step": 880 }, { "epoch": 15.8, "learning_rate": 0.0001245474756106053, "loss": 1.3246, "step": 885 }, { "epoch": 15.89, "learning_rate": 0.00013335039645915393, "loss": 1.332, "step": 890 }, { "epoch": 15.98, "learning_rate": 0.00013709207951022223, "loss": 1.388, "step": 895 }, { "epoch": 16.07, "learning_rate": 0.00013548005477567304, "loss": 1.2832, "step": 900 }, { "epoch": 16.16, "learning_rate": 0.00012864032674294074, "loss": 1.2149, "step": 905 }, { "epoch": 16.25, "learning_rate": 0.00011710752518939739, "loss": 1.0402, "step": 910 }, { "epoch": 16.34, "learning_rate": 0.00010178311565541954, "loss": 1.2199, "step": 915 }, { "epoch": 16.43, "learning_rate": 8.386493606940354e-05, "loss": 1.2836, "step": 920 }, { "epoch": 16.52, "learning_rate": 6.47535673195288e-05, "loss": 1.2179, "step": 925 }, { "epoch": 16.61, "learning_rate": 4.59428563498755e-05, "loss": 1.2299, "step": 930 }, { "epoch": 16.7, "learning_rate": 2.8903149082065114e-05, "loss": 1.072, "step": 935 }, { "epoch": 16.79, "learning_rate": 1.4966360302693468e-05, "loss": 1.2192, "step": 940 }, { "epoch": 16.88, "learning_rate": 5.221864069725844e-06, "loss": 1.2081, "step": 945 }, { "epoch": 16.96, "learning_rate": 4.3134240132356497e-07, "loss": 1.3213, "step": 950 }, { "epoch": 17.05, "learning_rate": 9.692481296489572e-07, "loss": 1.0468, "step": 955 }, { "epoch": 17.14, "learning_rate": 6.793535661893986e-06, "loss": 1.1071, "step": 960 }, { "epoch": 17.23, "learning_rate": 1.7448947489014885e-05, "loss": 1.0509, "step": 965 }, { "epoch": 17.32, "learning_rate": 3.210259955104815e-05, "loss": 0.96, "step": 970 }, { "epoch": 17.41, "learning_rate": 4.9609083916254864e-05, "loss": 1.1523, "step": 975 }, { "epoch": 17.5, "learning_rate": 6.859999999999973e-05, "loss": 0.9922, "step": 980 }, { "epoch": 17.59, "learning_rate": 8.759091608374462e-05, "loss": 1.1601, "step": 985 }, { "epoch": 17.68, "learning_rate": 0.0001050974004489518, "loss": 0.9979, "step": 990 }, { "epoch": 17.77, "learning_rate": 0.00011975105251098509, "loss": 1.1048, "step": 995 }, { "epoch": 17.86, "learning_rate": 0.00013040646433810598, "loss": 1.0999, "step": 1000 }, { "epoch": 17.95, "learning_rate": 0.00013623075187035104, "loss": 1.2753, "step": 1005 }, { "epoch": 18.04, "learning_rate": 0.0001367686575986765, "loss": 1.0046, "step": 1010 }, { "epoch": 18.12, "learning_rate": 0.00013197813593027435, "loss": 0.9683, "step": 1015 }, { "epoch": 18.21, "learning_rate": 0.00012223363969730686, "loss": 1.0153, "step": 1020 }, { "epoch": 18.3, "learning_rate": 0.00010829685091793493, "loss": 0.9562, "step": 1025 }, { "epoch": 18.39, "learning_rate": 9.125714365012455e-05, "loss": 1.0145, "step": 1030 }, { "epoch": 18.48, "learning_rate": 7.244643268047124e-05, "loss": 1.0414, "step": 1035 }, { "epoch": 18.57, "learning_rate": 5.333506393059651e-05, "loss": 1.0547, "step": 1040 }, { "epoch": 18.66, "learning_rate": 3.541688434458093e-05, "loss": 1.038, "step": 1045 }, { "epoch": 18.75, "learning_rate": 2.0092474810602995e-05, "loss": 0.9887, "step": 1050 }, { "epoch": 18.84, "learning_rate": 8.559673257059497e-06, "loss": 1.0939, "step": 1055 }, { "epoch": 18.93, "learning_rate": 1.7199452243269606e-06, "loss": 1.0271, "step": 1060 }, { "epoch": 19.02, "learning_rate": 1.0792048977778093e-07, "loss": 0.8933, "step": 1065 }, { "epoch": 19.11, "learning_rate": 3.84960354084603e-06, "loss": 0.9411, "step": 1070 }, { "epoch": 19.2, "learning_rate": 1.2652524389394958e-05, "loss": 0.8984, "step": 1075 }, { "epoch": 19.29, "learning_rate": 2.5828599592490564e-05, "loss": 0.8203, "step": 1080 }, { "epoch": 19.38, "learning_rate": 4.234791653975466e-05, "loss": 0.8312, "step": 1085 }, { "epoch": 19.46, "learning_rate": 6.09192369393131e-05, "loss": 0.7923, "step": 1090 }, { "epoch": 19.55, "learning_rate": 8.009092691870466e-05, "loss": 0.8649, "step": 1095 }, { "epoch": 19.64, "learning_rate": 9.836442450346445e-05, "loss": 0.939, "step": 1100 }, { "epoch": 19.73, "learning_rate": 0.0001143113752475076, "loss": 0.8501, "step": 1105 }, { "epoch": 19.82, "learning_rate": 0.0001266852800670605, "loss": 0.9123, "step": 1110 }, { "epoch": 19.91, "learning_rate": 0.0001345189282854337, "loss": 0.9598, "step": 1115 }, { "epoch": 20.0, "learning_rate": 0.0001372, "loss": 0.9292, "step": 1120 }, { "epoch": 20.09, "learning_rate": 0.00013451892828543382, "loss": 0.8035, "step": 1125 }, { "epoch": 20.18, "learning_rate": 0.00012668528006706069, "loss": 0.792, "step": 1130 }, { "epoch": 20.27, "learning_rate": 0.00011431137524750785, "loss": 0.8557, "step": 1135 }, { "epoch": 20.36, "learning_rate": 9.836442450346476e-05, "loss": 0.8628, "step": 1140 }, { "epoch": 20.45, "learning_rate": 8.0090926918705e-05, "loss": 0.8219, "step": 1145 }, { "epoch": 20.54, "learning_rate": 6.091923693931392e-05, "loss": 0.8509, "step": 1150 }, { "epoch": 20.62, "learning_rate": 4.234791653975543e-05, "loss": 0.8817, "step": 1155 }, { "epoch": 20.71, "learning_rate": 2.5828599592491204e-05, "loss": 0.8186, "step": 1160 }, { "epoch": 20.8, "learning_rate": 1.2652524389394875e-05, "loss": 0.8184, "step": 1165 }, { "epoch": 20.89, "learning_rate": 3.849603540845984e-06, "loss": 0.8315, "step": 1170 }, { "epoch": 20.98, "learning_rate": 1.0792048977777332e-07, "loss": 0.8831, "step": 1175 }, { "epoch": 21.07, "learning_rate": 1.7199452243269987e-06, "loss": 0.737, "step": 1180 }, { "epoch": 21.16, "learning_rate": 8.559673257059337e-06, "loss": 0.776, "step": 1185 }, { "epoch": 21.25, "learning_rate": 2.009247481060276e-05, "loss": 0.6944, "step": 1190 }, { "epoch": 21.34, "learning_rate": 3.541688434458063e-05, "loss": 0.6937, "step": 1195 }, { "epoch": 21.43, "learning_rate": 5.333506393059618e-05, "loss": 0.7423, "step": 1200 }, { "epoch": 21.52, "learning_rate": 7.24464326804709e-05, "loss": 0.7534, "step": 1205 }, { "epoch": 21.61, "learning_rate": 9.125714365012422e-05, "loss": 0.6734, "step": 1210 }, { "epoch": 21.7, "learning_rate": 0.00010829685091793466, "loss": 0.7151, "step": 1215 }, { "epoch": 21.79, "learning_rate": 0.00012223363969730635, "loss": 0.8225, "step": 1220 }, { "epoch": 21.88, "learning_rate": 0.00013197813593027405, "loss": 0.6728, "step": 1225 }, { "epoch": 21.96, "learning_rate": 0.0001367686575986764, "loss": 0.7781, "step": 1230 }, { "epoch": 22.05, "learning_rate": 0.00013623075187035101, "loss": 0.724, "step": 1235 }, { "epoch": 22.14, "learning_rate": 0.00013040646433810593, "loss": 0.5813, "step": 1240 }, { "epoch": 22.23, "learning_rate": 0.00011975105251098498, "loss": 0.7065, "step": 1245 }, { "epoch": 22.32, "learning_rate": 0.00010509740044895168, "loss": 0.6825, "step": 1250 }, { "epoch": 22.41, "learning_rate": 8.759091608374493e-05, "loss": 0.7155, "step": 1255 }, { "epoch": 22.5, "learning_rate": 6.860000000000005e-05, "loss": 0.7597, "step": 1260 }, { "epoch": 22.59, "learning_rate": 4.9609083916255196e-05, "loss": 0.749, "step": 1265 }, { "epoch": 22.68, "learning_rate": 3.2102599551048435e-05, "loss": 0.7066, "step": 1270 }, { "epoch": 22.77, "learning_rate": 1.7448947489015106e-05, "loss": 0.6666, "step": 1275 }, { "epoch": 22.86, "learning_rate": 6.7935356618941304e-06, "loss": 0.6573, "step": 1280 }, { "epoch": 22.95, "learning_rate": 9.692481296490182e-07, "loss": 0.7112, "step": 1285 }, { "epoch": 23.04, "learning_rate": 4.313424013234736e-07, "loss": 0.5373, "step": 1290 }, { "epoch": 23.12, "learning_rate": 5.221864069725524e-06, "loss": 0.5479, "step": 1295 }, { "epoch": 23.21, "learning_rate": 1.4966360302692958e-05, "loss": 0.5882, "step": 1300 }, { "epoch": 23.3, "learning_rate": 2.8903149082065243e-05, "loss": 0.5555, "step": 1305 }, { "epoch": 23.39, "learning_rate": 4.5942856349875636e-05, "loss": 0.5804, "step": 1310 }, { "epoch": 23.48, "learning_rate": 6.475356731952897e-05, "loss": 0.5679, "step": 1315 }, { "epoch": 23.57, "learning_rate": 8.386493606940368e-05, "loss": 0.6024, "step": 1320 }, { "epoch": 23.66, "learning_rate": 0.00010178311565541925, "loss": 0.552, "step": 1325 }, { "epoch": 23.75, "learning_rate": 0.00011710752518939715, "loss": 0.6404, "step": 1330 }, { "epoch": 23.84, "learning_rate": 0.0001286403267429406, "loss": 0.581, "step": 1335 }, { "epoch": 23.93, "learning_rate": 0.00013548005477567298, "loss": 0.6956, "step": 1340 }, { "epoch": 24.02, "learning_rate": 0.00013709207951022223, "loss": 0.6156, "step": 1345 }, { "epoch": 24.11, "learning_rate": 0.00013335039645915407, "loss": 0.5472, "step": 1350 }, { "epoch": 24.2, "learning_rate": 0.0001245474756106052, "loss": 0.5691, "step": 1355 }, { "epoch": 24.29, "learning_rate": 0.00011137140040750965, "loss": 0.5632, "step": 1360 }, { "epoch": 24.38, "learning_rate": 9.485208346024561e-05, "loss": 0.6406, "step": 1365 }, { "epoch": 24.46, "learning_rate": 7.628076306068718e-05, "loss": 0.5618, "step": 1370 }, { "epoch": 24.55, "learning_rate": 5.710907308129514e-05, "loss": 0.5619, "step": 1375 }, { "epoch": 24.64, "learning_rate": 3.8835575496535365e-05, "loss": 0.5453, "step": 1380 }, { "epoch": 24.73, "learning_rate": 2.288862475249225e-05, "loss": 0.6172, "step": 1385 }, { "epoch": 24.82, "learning_rate": 1.0514719932939396e-05, "loss": 0.5714, "step": 1390 }, { "epoch": 24.91, "learning_rate": 2.681071714566221e-06, "loss": 0.5045, "step": 1395 }, { "epoch": 25.0, "learning_rate": 0.0, "loss": 0.5848, "step": 1400 }, { "epoch": 25.09, "learning_rate": 2.681071714566236e-06, "loss": 0.4364, "step": 1405 }, { "epoch": 25.18, "learning_rate": 1.0514719932939435e-05, "loss": 0.4791, "step": 1410 }, { "epoch": 25.27, "learning_rate": 2.2888624752492302e-05, "loss": 0.4854, "step": 1415 }, { "epoch": 25.36, "learning_rate": 3.8835575496535426e-05, "loss": 0.3908, "step": 1420 }, { "epoch": 25.45, "learning_rate": 5.7109073081295205e-05, "loss": 0.4823, "step": 1425 }, { "epoch": 25.54, "learning_rate": 7.628076306068627e-05, "loss": 0.478, "step": 1430 }, { "epoch": 25.62, "learning_rate": 9.485208346024477e-05, "loss": 0.4768, "step": 1435 }, { "epoch": 25.71, "learning_rate": 0.00011137140040750896, "loss": 0.4795, "step": 1440 }, { "epoch": 25.8, "learning_rate": 0.0001245474756106047, "loss": 0.4712, "step": 1445 }, { "epoch": 25.89, "learning_rate": 0.00013335039645915377, "loss": 0.5162, "step": 1450 }, { "epoch": 25.98, "learning_rate": 0.0001370920795102222, "loss": 0.6066, "step": 1455 }, { "epoch": 26.07, "learning_rate": 0.0001354800547756732, "loss": 0.4381, "step": 1460 }, { "epoch": 26.16, "learning_rate": 0.00012864032674294058, "loss": 0.4356, "step": 1465 }, { "epoch": 26.25, "learning_rate": 0.0001171075251893971, "loss": 0.4588, "step": 1470 }, { "epoch": 26.34, "learning_rate": 0.00010178311565541919, "loss": 0.4378, "step": 1475 }, { "epoch": 26.43, "learning_rate": 8.386493606940363e-05, "loss": 0.4972, "step": 1480 }, { "epoch": 26.52, "learning_rate": 6.47535673195289e-05, "loss": 0.5317, "step": 1485 }, { "epoch": 26.61, "learning_rate": 4.5942856349875575e-05, "loss": 0.4449, "step": 1490 }, { "epoch": 26.7, "learning_rate": 2.8903149082065182e-05, "loss": 0.5294, "step": 1495 }, { "epoch": 26.79, "learning_rate": 1.496636030269352e-05, "loss": 0.4735, "step": 1500 }, { "epoch": 26.88, "learning_rate": 5.221864069725874e-06, "loss": 0.4522, "step": 1505 }, { "epoch": 26.96, "learning_rate": 4.313424013235802e-07, "loss": 0.4696, "step": 1510 }, { "epoch": 27.05, "learning_rate": 9.69248129648866e-07, "loss": 0.3814, "step": 1515 }, { "epoch": 27.14, "learning_rate": 6.793535661893734e-06, "loss": 0.3927, "step": 1520 }, { "epoch": 27.23, "learning_rate": 1.7448947489014506e-05, "loss": 0.4072, "step": 1525 }, { "epoch": 27.32, "learning_rate": 3.210259955104767e-05, "loss": 0.4177, "step": 1530 }, { "epoch": 27.41, "learning_rate": 4.9609083916255264e-05, "loss": 0.3721, "step": 1535 }, { "epoch": 27.5, "learning_rate": 6.860000000000014e-05, "loss": 0.3918, "step": 1540 }, { "epoch": 27.59, "learning_rate": 8.7590916083745e-05, "loss": 0.391, "step": 1545 }, { "epoch": 27.68, "learning_rate": 0.00010509740044895174, "loss": 0.368, "step": 1550 }, { "epoch": 27.77, "learning_rate": 0.00011975105251098503, "loss": 0.4225, "step": 1555 }, { "epoch": 27.86, "learning_rate": 0.00013040646433810595, "loss": 0.3877, "step": 1560 }, { "epoch": 27.95, "learning_rate": 0.00013623075187035101, "loss": 0.4493, "step": 1565 }, { "epoch": 28.04, "learning_rate": 0.00013676865759867652, "loss": 0.4004, "step": 1570 }, { "epoch": 28.12, "learning_rate": 0.0001319781359302744, "loss": 0.3739, "step": 1575 }, { "epoch": 28.21, "learning_rate": 0.00012223363969730692, "loss": 0.3801, "step": 1580 }, { "epoch": 28.3, "learning_rate": 0.00010829685091793539, "loss": 0.4067, "step": 1585 }, { "epoch": 28.39, "learning_rate": 9.125714365012509e-05, "loss": 0.3734, "step": 1590 }, { "epoch": 28.48, "learning_rate": 7.244643268047182e-05, "loss": 0.3794, "step": 1595 }, { "epoch": 28.57, "learning_rate": 5.3335063930597066e-05, "loss": 0.4073, "step": 1600 }, { "epoch": 28.66, "learning_rate": 3.541688434458058e-05, "loss": 0.4003, "step": 1605 }, { "epoch": 28.75, "learning_rate": 2.0092474810602707e-05, "loss": 0.3867, "step": 1610 }, { "epoch": 28.84, "learning_rate": 8.559673257059307e-06, "loss": 0.368, "step": 1615 }, { "epoch": 28.93, "learning_rate": 1.7199452243269835e-06, "loss": 0.3861, "step": 1620 }, { "epoch": 29.02, "learning_rate": 1.0792048977777332e-07, "loss": 0.3113, "step": 1625 }, { "epoch": 29.11, "learning_rate": 3.849603540846007e-06, "loss": 0.2884, "step": 1630 }, { "epoch": 29.2, "learning_rate": 1.2652524389394912e-05, "loss": 0.341, "step": 1635 }, { "epoch": 29.29, "learning_rate": 2.5828599592490496e-05, "loss": 0.2955, "step": 1640 }, { "epoch": 29.38, "learning_rate": 4.234791653975459e-05, "loss": 0.282, "step": 1645 }, { "epoch": 29.46, "learning_rate": 6.0919236939313016e-05, "loss": 0.2748, "step": 1650 }, { "epoch": 29.55, "learning_rate": 8.009092691870409e-05, "loss": 0.3101, "step": 1655 }, { "epoch": 29.64, "learning_rate": 9.836442450346394e-05, "loss": 0.334, "step": 1660 }, { "epoch": 29.73, "learning_rate": 0.00011431137524750716, "loss": 0.3008, "step": 1665 }, { "epoch": 29.82, "learning_rate": 0.0001266852800670602, "loss": 0.3448, "step": 1670 }, { "epoch": 29.91, "learning_rate": 0.00013451892828543358, "loss": 0.3626, "step": 1675 }, { "epoch": 30.0, "learning_rate": 0.0001372, "loss": 0.3804, "step": 1680 }, { "epoch": 30.09, "learning_rate": 0.0001345189282854337, "loss": 0.2925, "step": 1685 }, { "epoch": 30.18, "learning_rate": 0.00012668528006706047, "loss": 0.3217, "step": 1690 }, { "epoch": 30.27, "learning_rate": 0.00011431137524750754, "loss": 0.3431, "step": 1695 }, { "epoch": 30.36, "learning_rate": 9.83644245034644e-05, "loss": 0.2936, "step": 1700 }, { "epoch": 30.45, "learning_rate": 8.009092691870459e-05, "loss": 0.3465, "step": 1705 }, { "epoch": 30.54, "learning_rate": 6.091923693931352e-05, "loss": 0.3303, "step": 1710 }, { "epoch": 30.62, "learning_rate": 4.234791653975505e-05, "loss": 0.3262, "step": 1715 }, { "epoch": 30.71, "learning_rate": 2.582859959249089e-05, "loss": 0.3351, "step": 1720 }, { "epoch": 30.8, "learning_rate": 1.2652524389395202e-05, "loss": 0.3175, "step": 1725 }, { "epoch": 30.89, "learning_rate": 3.849603540846175e-06, "loss": 0.2689, "step": 1730 }, { "epoch": 30.98, "learning_rate": 1.0792048977780377e-07, "loss": 0.3047, "step": 1735 }, { "epoch": 31.07, "learning_rate": 1.7199452243268694e-06, "loss": 0.2869, "step": 1740 }, { "epoch": 31.16, "learning_rate": 8.559673257059063e-06, "loss": 0.2604, "step": 1745 }, { "epoch": 31.25, "learning_rate": 2.0092474810602348e-05, "loss": 0.2769, "step": 1750 }, { "epoch": 31.34, "learning_rate": 3.541688434458014e-05, "loss": 0.2263, "step": 1755 }, { "epoch": 31.43, "learning_rate": 5.333506393059658e-05, "loss": 0.2346, "step": 1760 }, { "epoch": 31.52, "learning_rate": 7.244643268047132e-05, "loss": 0.2531, "step": 1765 }, { "epoch": 31.61, "learning_rate": 9.125714365012463e-05, "loss": 0.26, "step": 1770 }, { "epoch": 31.7, "learning_rate": 0.00010829685091793499, "loss": 0.2807, "step": 1775 }, { "epoch": 31.79, "learning_rate": 0.00012223363969730662, "loss": 0.2235, "step": 1780 }, { "epoch": 31.88, "learning_rate": 0.0001319781359302742, "loss": 0.2793, "step": 1785 }, { "epoch": 31.96, "learning_rate": 0.00013676865759867644, "loss": 0.2978, "step": 1790 }, { "epoch": 32.05, "learning_rate": 0.00013623075187035093, "loss": 0.274, "step": 1795 }, { "epoch": 32.14, "learning_rate": 0.00013040646433810576, "loss": 0.2887, "step": 1800 }, { "epoch": 32.23, "learning_rate": 0.00011975105251098601, "loss": 0.2648, "step": 1805 }, { "epoch": 32.32, "learning_rate": 0.00010509740044895298, "loss": 0.2468, "step": 1810 }, { "epoch": 32.41, "learning_rate": 8.759091608374549e-05, "loss": 0.2918, "step": 1815 }, { "epoch": 32.5, "learning_rate": 6.860000000000064e-05, "loss": 0.2799, "step": 1820 }, { "epoch": 32.59, "learning_rate": 4.9609083916255745e-05, "loss": 0.2562, "step": 1825 }, { "epoch": 32.68, "learning_rate": 3.2102599551048096e-05, "loss": 0.2663, "step": 1830 }, { "epoch": 32.77, "learning_rate": 1.744894748901484e-05, "loss": 0.2774, "step": 1835 }, { "epoch": 32.86, "learning_rate": 6.793535661894382e-06, "loss": 0.2487, "step": 1840 }, { "epoch": 32.95, "learning_rate": 9.692481296491097e-07, "loss": 0.2686, "step": 1845 }, { "epoch": 33.04, "learning_rate": 4.313424013235193e-07, "loss": 0.2175, "step": 1850 }, { "epoch": 33.12, "learning_rate": 5.221864069725684e-06, "loss": 0.218, "step": 1855 }, { "epoch": 33.21, "learning_rate": 1.4966360302693209e-05, "loss": 0.216, "step": 1860 }, { "epoch": 33.3, "learning_rate": 2.890314908206557e-05, "loss": 0.2596, "step": 1865 }, { "epoch": 33.39, "learning_rate": 4.5942856349876015e-05, "loss": 0.2206, "step": 1870 }, { "epoch": 33.48, "learning_rate": 6.475356731952742e-05, "loss": 0.1793, "step": 1875 }, { "epoch": 33.57, "learning_rate": 8.386493606940219e-05, "loss": 0.2108, "step": 1880 }, { "epoch": 33.66, "learning_rate": 0.00010178311565541875, "loss": 0.2287, "step": 1885 }, { "epoch": 33.75, "learning_rate": 0.00011710752518939675, "loss": 0.1961, "step": 1890 }, { "epoch": 33.84, "learning_rate": 0.00012864032674294034, "loss": 0.2072, "step": 1895 }, { "epoch": 33.93, "learning_rate": 0.00013548005477567306, "loss": 0.2405, "step": 1900 }, { "epoch": 34.02, "learning_rate": 0.00013709207951022223, "loss": 0.2522, "step": 1905 }, { "epoch": 34.11, "learning_rate": 0.00013335039645915423, "loss": 0.2533, "step": 1910 }, { "epoch": 34.2, "learning_rate": 0.00012454747561060553, "loss": 0.2087, "step": 1915 }, { "epoch": 34.29, "learning_rate": 0.00011137140040750936, "loss": 0.2236, "step": 1920 }, { "epoch": 34.38, "learning_rate": 9.485208346024522e-05, "loss": 0.2418, "step": 1925 }, { "epoch": 34.46, "learning_rate": 7.628076306068678e-05, "loss": 0.2321, "step": 1930 }, { "epoch": 34.55, "learning_rate": 5.710907308129474e-05, "loss": 0.2204, "step": 1935 }, { "epoch": 34.64, "learning_rate": 3.8835575496535006e-05, "loss": 0.253, "step": 1940 }, { "epoch": 34.73, "learning_rate": 2.2888624752493407e-05, "loss": 0.1896, "step": 1945 }, { "epoch": 34.82, "learning_rate": 1.051471993294022e-05, "loss": 0.2327, "step": 1950 }, { "epoch": 34.91, "learning_rate": 2.6810717145663806e-06, "loss": 0.1881, "step": 1955 }, { "epoch": 35.0, "learning_rate": 0.0, "loss": 0.2131, "step": 1960 }, { "epoch": 35.09, "learning_rate": 2.6810717145660837e-06, "loss": 0.2035, "step": 1965 }, { "epoch": 35.18, "learning_rate": 1.0514719932939649e-05, "loss": 0.1686, "step": 1970 }, { "epoch": 35.27, "learning_rate": 2.28886247524926e-05, "loss": 0.1647, "step": 1975 }, { "epoch": 35.36, "learning_rate": 3.883557549653492e-05, "loss": 0.1727, "step": 1980 }, { "epoch": 35.45, "learning_rate": 5.710907308129464e-05, "loss": 0.1767, "step": 1985 }, { "epoch": 35.54, "learning_rate": 7.62807630606867e-05, "loss": 0.1769, "step": 1990 }, { "epoch": 35.62, "learning_rate": 9.485208346024514e-05, "loss": 0.1662, "step": 1995 }, { "epoch": 35.71, "learning_rate": 0.00011137140040750926, "loss": 0.1953, "step": 2000 }, { "epoch": 35.8, "learning_rate": 0.00012454747561060548, "loss": 0.186, "step": 2005 }, { "epoch": 35.89, "learning_rate": 0.0001333503964591542, "loss": 0.2162, "step": 2010 }, { "epoch": 35.98, "learning_rate": 0.00013709207951022217, "loss": 0.215, "step": 2015 }, { "epoch": 36.07, "learning_rate": 0.0001354800547756733, "loss": 0.2198, "step": 2020 }, { "epoch": 36.16, "learning_rate": 0.00012864032674294085, "loss": 0.189, "step": 2025 }, { "epoch": 36.25, "learning_rate": 0.00011710752518939751, "loss": 0.2029, "step": 2030 }, { "epoch": 36.34, "learning_rate": 0.0001017831156554197, "loss": 0.1957, "step": 2035 }, { "epoch": 36.43, "learning_rate": 8.386493606940322e-05, "loss": 0.2062, "step": 2040 }, { "epoch": 36.52, "learning_rate": 6.47535673195285e-05, "loss": 0.2046, "step": 2045 }, { "epoch": 36.61, "learning_rate": 4.594285634987612e-05, "loss": 0.1769, "step": 2050 }, { "epoch": 36.7, "learning_rate": 2.8903149082065656e-05, "loss": 0.193, "step": 2055 }, { "epoch": 36.79, "learning_rate": 1.496636030269327e-05, "loss": 0.1815, "step": 2060 }, { "epoch": 36.88, "learning_rate": 5.2218640697257225e-06, "loss": 0.1812, "step": 2065 }, { "epoch": 36.96, "learning_rate": 4.3134240132353453e-07, "loss": 0.177, "step": 2070 }, { "epoch": 37.05, "learning_rate": 9.692481296490944e-07, "loss": 0.1544, "step": 2075 }, { "epoch": 37.14, "learning_rate": 6.793535661894336e-06, "loss": 0.1606, "step": 2080 }, { "epoch": 37.23, "learning_rate": 1.7448947489014123e-05, "loss": 0.1656, "step": 2085 }, { "epoch": 37.32, "learning_rate": 3.210259955104718e-05, "loss": 0.1505, "step": 2090 }, { "epoch": 37.41, "learning_rate": 4.9609083916254715e-05, "loss": 0.1481, "step": 2095 }, { "epoch": 37.5, "learning_rate": 6.859999999999957e-05, "loss": 0.1534, "step": 2100 }, { "epoch": 37.59, "learning_rate": 8.759091608374445e-05, "loss": 0.1499, "step": 2105 }, { "epoch": 37.68, "learning_rate": 0.00010509740044895207, "loss": 0.1818, "step": 2110 }, { "epoch": 37.77, "learning_rate": 0.0001197510525109853, "loss": 0.1553, "step": 2115 }, { "epoch": 37.86, "learning_rate": 0.0001304064643381057, "loss": 0.1696, "step": 2120 }, { "epoch": 37.95, "learning_rate": 0.0001362307518703509, "loss": 0.1848, "step": 2125 }, { "epoch": 38.04, "learning_rate": 0.00013676865759867644, "loss": 0.1581, "step": 2130 }, { "epoch": 38.12, "learning_rate": 0.00013197813593027424, "loss": 0.1639, "step": 2135 }, { "epoch": 38.21, "learning_rate": 0.00012223363969730668, "loss": 0.1736, "step": 2140 }, { "epoch": 38.3, "learning_rate": 0.00010829685091793427, "loss": 0.1716, "step": 2145 }, { "epoch": 38.39, "learning_rate": 9.125714365012379e-05, "loss": 0.1616, "step": 2150 }, { "epoch": 38.48, "learning_rate": 7.244643268047237e-05, "loss": 0.1674, "step": 2155 }, { "epoch": 38.57, "learning_rate": 5.333506393059762e-05, "loss": 0.181, "step": 2160 }, { "epoch": 38.66, "learning_rate": 3.541688434458107e-05, "loss": 0.1737, "step": 2165 }, { "epoch": 38.75, "learning_rate": 2.009247481060311e-05, "loss": 0.1692, "step": 2170 }, { "epoch": 38.84, "learning_rate": 8.559673257059581e-06, "loss": 0.1796, "step": 2175 }, { "epoch": 38.93, "learning_rate": 1.719945224326892e-06, "loss": 0.149, "step": 2180 }, { "epoch": 39.02, "learning_rate": 1.0792048977779616e-07, "loss": 0.1672, "step": 2185 }, { "epoch": 39.11, "learning_rate": 3.849603540845817e-06, "loss": 0.1322, "step": 2190 }, { "epoch": 39.2, "learning_rate": 1.2652524389394578e-05, "loss": 0.134, "step": 2195 }, { "epoch": 39.29, "learning_rate": 2.5828599592490815e-05, "loss": 0.1382, "step": 2200 }, { "epoch": 39.38, "learning_rate": 4.234791653975496e-05, "loss": 0.131, "step": 2205 }, { "epoch": 39.46, "learning_rate": 6.0919236939313415e-05, "loss": 0.1396, "step": 2210 }, { "epoch": 39.55, "learning_rate": 8.009092691870546e-05, "loss": 0.1475, "step": 2215 }, { "epoch": 39.64, "learning_rate": 9.836442450346518e-05, "loss": 0.1374, "step": 2220 }, { "epoch": 39.73, "learning_rate": 0.00011431137524750674, "loss": 0.1528, "step": 2225 }, { "epoch": 39.82, "learning_rate": 0.0001266852800670599, "loss": 0.1468, "step": 2230 }, { "epoch": 39.91, "learning_rate": 0.00013451892828543368, "loss": 0.1589, "step": 2235 }, { "epoch": 40.0, "learning_rate": 0.0001372, "loss": 0.153, "step": 2240 }, { "epoch": 40.09, "learning_rate": 0.00013451892828543387, "loss": 0.1516, "step": 2245 }, { "epoch": 40.18, "learning_rate": 0.00012668528006706025, "loss": 0.1432, "step": 2250 }, { "epoch": 40.27, "learning_rate": 0.00011431137524750726, "loss": 0.1714, "step": 2255 }, { "epoch": 40.36, "learning_rate": 9.836442450346578e-05, "loss": 0.1401, "step": 2260 }, { "epoch": 40.45, "learning_rate": 8.009092691870612e-05, "loss": 0.1529, "step": 2265 }, { "epoch": 40.54, "learning_rate": 6.0919236939314086e-05, "loss": 0.1557, "step": 2270 }, { "epoch": 40.62, "learning_rate": 4.234791653975558e-05, "loss": 0.1562, "step": 2275 }, { "epoch": 40.71, "learning_rate": 2.582859959249134e-05, "loss": 0.1449, "step": 2280 }, { "epoch": 40.8, "learning_rate": 1.2652524389394402e-05, "loss": 0.1595, "step": 2285 }, { "epoch": 40.89, "learning_rate": 3.849603540846038e-06, "loss": 0.1353, "step": 2290 }, { "epoch": 40.98, "learning_rate": 1.0792048977783424e-07, "loss": 0.1344, "step": 2295 }, { "epoch": 41.07, "learning_rate": 1.7199452243265265e-06, "loss": 0.1224, "step": 2300 }, { "epoch": 41.16, "learning_rate": 8.559673257059253e-06, "loss": 0.1157, "step": 2305 }, { "epoch": 41.25, "learning_rate": 2.0092474810601945e-05, "loss": 0.1231, "step": 2310 }, { "epoch": 41.34, "learning_rate": 3.541688434458049e-05, "loss": 0.1233, "step": 2315 }, { "epoch": 41.43, "learning_rate": 5.3335063930596016e-05, "loss": 0.1129, "step": 2320 }, { "epoch": 41.52, "learning_rate": 7.244643268047171e-05, "loss": 0.1297, "step": 2325 }, { "epoch": 41.61, "learning_rate": 9.125714365012407e-05, "loss": 0.1117, "step": 2330 }, { "epoch": 41.7, "learning_rate": 0.00010829685091793371, "loss": 0.117, "step": 2335 }, { "epoch": 41.79, "learning_rate": 0.00012223363969730686, "loss": 0.1357, "step": 2340 }, { "epoch": 41.88, "learning_rate": 0.00013197813593027397, "loss": 0.1191, "step": 2345 }, { "epoch": 41.96, "learning_rate": 0.00013676865759867647, "loss": 0.1514, "step": 2350 }, { "epoch": 42.05, "learning_rate": 0.00013623075187035104, "loss": 0.1221, "step": 2355 }, { "epoch": 42.14, "learning_rate": 0.00013040646433810557, "loss": 0.1356, "step": 2360 }, { "epoch": 42.23, "learning_rate": 0.0001197510525109864, "loss": 0.1469, "step": 2365 }, { "epoch": 42.32, "learning_rate": 0.00010509740044895266, "loss": 0.1364, "step": 2370 }, { "epoch": 42.41, "learning_rate": 8.759091608374603e-05, "loss": 0.146, "step": 2375 }, { "epoch": 42.5, "learning_rate": 6.860000000000023e-05, "loss": 0.1439, "step": 2380 }, { "epoch": 42.59, "learning_rate": 4.960908391625629e-05, "loss": 0.1339, "step": 2385 }, { "epoch": 42.68, "learning_rate": 3.210259955104775e-05, "loss": 0.1282, "step": 2390 }, { "epoch": 42.77, "learning_rate": 1.744894748901522e-05, "loss": 0.1341, "step": 2395 }, { "epoch": 42.86, "learning_rate": 6.793535661894625e-06, "loss": 0.1195, "step": 2400 }, { "epoch": 42.95, "learning_rate": 9.69248129649041e-07, "loss": 0.128, "step": 2405 }, { "epoch": 43.04, "learning_rate": 4.3134240132345835e-07, "loss": 0.1227, "step": 2410 }, { "epoch": 43.12, "learning_rate": 5.221864069725836e-06, "loss": 0.1054, "step": 2415 }, { "epoch": 43.21, "learning_rate": 1.4966360302692852e-05, "loss": 0.113, "step": 2420 }, { "epoch": 43.3, "learning_rate": 2.8903149082065897e-05, "loss": 0.1135, "step": 2425 }, { "epoch": 43.39, "learning_rate": 4.594285634987549e-05, "loss": 0.0998, "step": 2430 }, { "epoch": 43.48, "learning_rate": 6.475356731952782e-05, "loss": 0.1042, "step": 2435 }, { "epoch": 43.57, "learning_rate": 8.386493606940162e-05, "loss": 0.1103, "step": 2440 }, { "epoch": 43.66, "learning_rate": 0.0001017831156554191, "loss": 0.0937, "step": 2445 }, { "epoch": 43.75, "learning_rate": 0.00011710752518939633, "loss": 0.1037, "step": 2450 }, { "epoch": 43.84, "learning_rate": 0.0001286403267429405, "loss": 0.1108, "step": 2455 }, { "epoch": 43.93, "learning_rate": 0.00013548005477567295, "loss": 0.1088, "step": 2460 }, { "epoch": 44.02, "learning_rate": 0.0001370920795102222, "loss": 0.132, "step": 2465 }, { "epoch": 44.11, "learning_rate": 0.00013335039645915412, "loss": 0.1115, "step": 2470 }, { "epoch": 44.2, "learning_rate": 0.00012454747561060588, "loss": 0.1322, "step": 2475 }, { "epoch": 44.29, "learning_rate": 0.00011137140040750903, "loss": 0.1218, "step": 2480 }, { "epoch": 44.38, "learning_rate": 9.485208346024576e-05, "loss": 0.1266, "step": 2485 }, { "epoch": 44.46, "learning_rate": 7.628076306068638e-05, "loss": 0.1337, "step": 2490 }, { "epoch": 44.55, "learning_rate": 5.7109073081295306e-05, "loss": 0.1423, "step": 2495 }, { "epoch": 44.64, "learning_rate": 3.883557549653465e-05, "loss": 0.1254, "step": 2500 }, { "epoch": 44.73, "learning_rate": 2.2888624752493833e-05, "loss": 0.1102, "step": 2505 }, { "epoch": 44.82, "learning_rate": 1.0514719932940006e-05, "loss": 0.1057, "step": 2510 }, { "epoch": 44.91, "learning_rate": 2.6810717145665407e-06, "loss": 0.1127, "step": 2515 }, { "epoch": 45.0, "learning_rate": 0.0, "loss": 0.1, "step": 2520 }, { "epoch": 45.09, "learning_rate": 2.6810717145659236e-06, "loss": 0.0923, "step": 2525 }, { "epoch": 45.18, "learning_rate": 1.0514719932939862e-05, "loss": 0.1096, "step": 2530 }, { "epoch": 45.27, "learning_rate": 2.288862475249218e-05, "loss": 0.1037, "step": 2535 }, { "epoch": 45.36, "learning_rate": 3.88355754965344e-05, "loss": 0.0967, "step": 2540 }, { "epoch": 45.45, "learning_rate": 5.7109073081295035e-05, "loss": 0.0873, "step": 2545 }, { "epoch": 45.54, "learning_rate": 7.628076306068611e-05, "loss": 0.1035, "step": 2550 }, { "epoch": 45.62, "learning_rate": 9.485208346024552e-05, "loss": 0.0923, "step": 2555 }, { "epoch": 45.71, "learning_rate": 0.00011137140040750883, "loss": 0.0814, "step": 2560 }, { "epoch": 45.8, "learning_rate": 0.0001245474756106057, "loss": 0.0889, "step": 2565 }, { "epoch": 45.89, "learning_rate": 0.00013335039645915404, "loss": 0.1126, "step": 2570 }, { "epoch": 45.98, "learning_rate": 0.00013709207951022217, "loss": 0.1126, "step": 2575 }, { "epoch": 46.07, "learning_rate": 0.00013548005477567344, "loss": 0.115, "step": 2580 }, { "epoch": 46.16, "learning_rate": 0.00012864032674294066, "loss": 0.1107, "step": 2585 }, { "epoch": 46.25, "learning_rate": 0.00011710752518939791, "loss": 0.1185, "step": 2590 }, { "epoch": 46.34, "learning_rate": 0.00010178311565541935, "loss": 0.0988, "step": 2595 }, { "epoch": 46.43, "learning_rate": 8.386493606940379e-05, "loss": 0.106, "step": 2600 }, { "epoch": 46.52, "learning_rate": 6.475356731952809e-05, "loss": 0.1046, "step": 2605 }, { "epoch": 46.61, "learning_rate": 4.594285634987574e-05, "loss": 0.1119, "step": 2610 }, { "epoch": 46.7, "learning_rate": 2.890314908206612e-05, "loss": 0.1081, "step": 2615 }, { "epoch": 46.79, "learning_rate": 1.496636030269302e-05, "loss": 0.0968, "step": 2620 }, { "epoch": 46.88, "learning_rate": 5.221864069725935e-06, "loss": 0.1116, "step": 2625 }, { "epoch": 46.96, "learning_rate": 4.3134240132348884e-07, "loss": 0.1083, "step": 2630 }, { "epoch": 47.05, "learning_rate": 9.692481296489953e-07, "loss": 0.0932, "step": 2635 }, { "epoch": 47.14, "learning_rate": 6.793535661894512e-06, "loss": 0.0854, "step": 2640 }, { "epoch": 47.23, "learning_rate": 1.7448947489013744e-05, "loss": 0.0843, "step": 2645 }, { "epoch": 47.32, "learning_rate": 3.210259955104752e-05, "loss": 0.0891, "step": 2650 }, { "epoch": 47.41, "learning_rate": 4.960908391625416e-05, "loss": 0.0791, "step": 2655 }, { "epoch": 47.5, "learning_rate": 6.859999999999997e-05, "loss": 0.09, "step": 2660 }, { "epoch": 47.59, "learning_rate": 8.75909160837439e-05, "loss": 0.0897, "step": 2665 }, { "epoch": 47.68, "learning_rate": 0.00010509740044895241, "loss": 0.0856, "step": 2670 }, { "epoch": 47.77, "learning_rate": 0.00011975105251098491, "loss": 0.0932, "step": 2675 }, { "epoch": 47.86, "learning_rate": 0.00013040646433810547, "loss": 0.0998, "step": 2680 }, { "epoch": 47.95, "learning_rate": 0.000136230751870351, "loss": 0.1009, "step": 2685 }, { "epoch": 48.04, "learning_rate": 0.00013676865759867652, "loss": 0.0975, "step": 2690 }, { "epoch": 48.12, "learning_rate": 0.00013197813593027408, "loss": 0.1072, "step": 2695 }, { "epoch": 48.21, "learning_rate": 0.00012223363969730703, "loss": 0.105, "step": 2700 }, { "epoch": 48.3, "learning_rate": 0.00010829685091793393, "loss": 0.102, "step": 2705 }, { "epoch": 48.39, "learning_rate": 9.125714365012433e-05, "loss": 0.0928, "step": 2710 }, { "epoch": 48.48, "learning_rate": 7.244643268047198e-05, "loss": 0.117, "step": 2715 }, { "epoch": 48.57, "learning_rate": 5.333506393059818e-05, "loss": 0.0985, "step": 2720 }, { "epoch": 48.66, "learning_rate": 3.541688434458072e-05, "loss": 0.1006, "step": 2725 }, { "epoch": 48.75, "learning_rate": 2.0092474810603514e-05, "loss": 0.0912, "step": 2730 }, { "epoch": 48.84, "learning_rate": 8.559673257059383e-06, "loss": 0.0824, "step": 2735 }, { "epoch": 48.93, "learning_rate": 1.7199452243270216e-06, "loss": 0.0914, "step": 2740 }, { "epoch": 49.02, "learning_rate": 1.07920489777819e-07, "loss": 0.103, "step": 2745 }, { "epoch": 49.11, "learning_rate": 3.849603540845946e-06, "loss": 0.0818, "step": 2750 }, { "epoch": 49.2, "learning_rate": 1.265252438939425e-05, "loss": 0.083, "step": 2755 }, { "epoch": 49.29, "learning_rate": 2.5828599592491126e-05, "loss": 0.0802, "step": 2760 }, { "epoch": 49.38, "learning_rate": 4.2347916539754424e-05, "loss": 0.0776, "step": 2765 }, { "epoch": 49.46, "learning_rate": 6.091923693931382e-05, "loss": 0.0897, "step": 2770 }, { "epoch": 49.55, "learning_rate": 8.00909269187049e-05, "loss": 0.0723, "step": 2775 }, { "epoch": 49.64, "learning_rate": 9.836442450346554e-05, "loss": 0.0913, "step": 2780 }, { "epoch": 49.73, "learning_rate": 0.00011431137524750631, "loss": 0.0869, "step": 2785 }, { "epoch": 49.82, "learning_rate": 0.00012668528006706012, "loss": 0.0839, "step": 2790 }, { "epoch": 49.91, "learning_rate": 0.00013451892828543352, "loss": 0.0867, "step": 2795 }, { "epoch": 50.0, "learning_rate": 0.0001372, "loss": 0.0961, "step": 2800 }, { "epoch": 50.09, "learning_rate": 0.00013451892828543404, "loss": 0.0873, "step": 2805 }, { "epoch": 50.18, "learning_rate": 0.00012668528006706004, "loss": 0.0834, "step": 2810 }, { "epoch": 50.27, "learning_rate": 0.00011431137524750768, "loss": 0.0971, "step": 2815 }, { "epoch": 50.36, "learning_rate": 9.836442450346542e-05, "loss": 0.0968, "step": 2820 }, { "epoch": 50.45, "learning_rate": 8.009092691870668e-05, "loss": 0.0985, "step": 2825 }, { "epoch": 50.54, "learning_rate": 6.0919236939313686e-05, "loss": 0.092, "step": 2830 }, { "epoch": 50.62, "learning_rate": 4.2347916539756105e-05, "loss": 0.0898, "step": 2835 }, { "epoch": 50.71, "learning_rate": 2.582859959249102e-05, "loss": 0.0918, "step": 2840 }, { "epoch": 50.8, "learning_rate": 1.2652524389395302e-05, "loss": 0.0936, "step": 2845 }, { "epoch": 50.89, "learning_rate": 3.849603540845908e-06, "loss": 0.0775, "step": 2850 }, { "epoch": 50.98, "learning_rate": 1.0792048977781139e-07, "loss": 0.086, "step": 2855 }, { "epoch": 51.07, "learning_rate": 1.719945224326618e-06, "loss": 0.071, "step": 2860 }, { "epoch": 51.16, "learning_rate": 8.559673257059451e-06, "loss": 0.0789, "step": 2865 }, { "epoch": 51.25, "learning_rate": 2.0092474810602236e-05, "loss": 0.0844, "step": 2870 }, { "epoch": 51.34, "learning_rate": 3.541688434458084e-05, "loss": 0.0717, "step": 2875 }, { "epoch": 51.43, "learning_rate": 5.333506393059641e-05, "loss": 0.0714, "step": 2880 }, { "epoch": 51.52, "learning_rate": 7.244643268047212e-05, "loss": 0.0735, "step": 2885 }, { "epoch": 51.61, "learning_rate": 9.125714365012261e-05, "loss": 0.0773, "step": 2890 }, { "epoch": 51.7, "learning_rate": 0.00010829685091793405, "loss": 0.0718, "step": 2895 }, { "epoch": 51.79, "learning_rate": 0.0001222336396973059, "loss": 0.0782, "step": 2900 }, { "epoch": 51.88, "learning_rate": 0.00013197813593027413, "loss": 0.0685, "step": 2905 }, { "epoch": 51.96, "learning_rate": 0.00013676865759867633, "loss": 0.0792, "step": 2910 }, { "epoch": 52.05, "learning_rate": 0.000136230751870351, "loss": 0.076, "step": 2915 }, { "epoch": 52.14, "learning_rate": 0.00013040646433810625, "loss": 0.0802, "step": 2920 }, { "epoch": 52.23, "learning_rate": 0.00011975105251098613, "loss": 0.0753, "step": 2925 }, { "epoch": 52.32, "learning_rate": 0.0001050974004489523, "loss": 0.0935, "step": 2930 }, { "epoch": 52.41, "learning_rate": 8.759091608374565e-05, "loss": 0.0924, "step": 2935 }, { "epoch": 52.5, "learning_rate": 6.859999999999984e-05, "loss": 0.0846, "step": 2940 }, { "epoch": 52.59, "learning_rate": 4.96090839162559e-05, "loss": 0.0729, "step": 2945 }, { "epoch": 52.68, "learning_rate": 3.210259955104741e-05, "loss": 0.0807, "step": 2950 }, { "epoch": 52.77, "learning_rate": 1.7448947489014953e-05, "loss": 0.0955, "step": 2955 }, { "epoch": 52.86, "learning_rate": 6.793535661894451e-06, "loss": 0.0765, "step": 2960 }, { "epoch": 52.95, "learning_rate": 9.692481296493e-07, "loss": 0.0808, "step": 2965 }, { "epoch": 53.04, "learning_rate": 4.3134240132350404e-07, "loss": 0.0749, "step": 2970 }, { "epoch": 53.12, "learning_rate": 5.221864069725242e-06, "loss": 0.0756, "step": 2975 }, { "epoch": 53.21, "learning_rate": 1.4966360302693102e-05, "loss": 0.0691, "step": 2980 }, { "epoch": 53.3, "learning_rate": 2.8903149082064643e-05, "loss": 0.0656, "step": 2985 }, { "epoch": 53.39, "learning_rate": 4.5942856349875866e-05, "loss": 0.0701, "step": 2990 }, { "epoch": 53.48, "learning_rate": 6.475356731952822e-05, "loss": 0.0626, "step": 2995 }, { "epoch": 53.57, "learning_rate": 8.386493606940203e-05, "loss": 0.066, "step": 3000 }, { "epoch": 53.66, "learning_rate": 0.00010178311565541946, "loss": 0.0758, "step": 3005 }, { "epoch": 53.75, "learning_rate": 0.00011710752518939663, "loss": 0.0757, "step": 3010 }, { "epoch": 53.84, "learning_rate": 0.00012864032674294072, "loss": 0.0673, "step": 3015 }, { "epoch": 53.93, "learning_rate": 0.00013548005477567304, "loss": 0.0775, "step": 3020 }, { "epoch": 54.02, "learning_rate": 0.00013709207951022217, "loss": 0.0768, "step": 3025 }, { "epoch": 54.11, "learning_rate": 0.00013335039645915464, "loss": 0.0761, "step": 3030 }, { "epoch": 54.2, "learning_rate": 0.00012454747561060564, "loss": 0.0757, "step": 3035 }, { "epoch": 54.29, "learning_rate": 0.00011137140040751024, "loss": 0.0755, "step": 3040 }, { "epoch": 54.38, "learning_rate": 9.48520834602454e-05, "loss": 0.0803, "step": 3045 }, { "epoch": 54.46, "learning_rate": 7.628076306068793e-05, "loss": 0.0809, "step": 3050 }, { "epoch": 54.55, "learning_rate": 5.71090730812949e-05, "loss": 0.0821, "step": 3055 }, { "epoch": 54.64, "learning_rate": 3.8835575496536036e-05, "loss": 0.0792, "step": 3060 }, { "epoch": 54.73, "learning_rate": 2.288862475249353e-05, "loss": 0.073, "step": 3065 }, { "epoch": 54.82, "learning_rate": 1.0514719932939786e-05, "loss": 0.0754, "step": 3070 }, { "epoch": 54.91, "learning_rate": 2.6810717145664263e-06, "loss": 0.0837, "step": 3075 }, { "epoch": 55.0, "learning_rate": 0.0, "loss": 0.0772, "step": 3080 }, { "epoch": 55.09, "learning_rate": 2.681071714566038e-06, "loss": 0.0661, "step": 3085 }, { "epoch": 55.18, "learning_rate": 1.0514719932940075e-05, "loss": 0.0746, "step": 3090 }, { "epoch": 55.27, "learning_rate": 2.2888624752492478e-05, "loss": 0.0666, "step": 3095 }, { "epoch": 55.36, "learning_rate": 3.883557549653476e-05, "loss": 0.0662, "step": 3100 }, { "epoch": 55.45, "learning_rate": 5.710907308129351e-05, "loss": 0.0673, "step": 3105 }, { "epoch": 55.54, "learning_rate": 7.628076306068652e-05, "loss": 0.0648, "step": 3110 }, { "epoch": 55.62, "learning_rate": 9.485208346024408e-05, "loss": 0.0616, "step": 3115 }, { "epoch": 55.71, "learning_rate": 0.00011137140040750914, "loss": 0.066, "step": 3120 }, { "epoch": 55.8, "learning_rate": 0.00012454747561060483, "loss": 0.0587, "step": 3125 }, { "epoch": 55.89, "learning_rate": 0.00013335039645915415, "loss": 0.0635, "step": 3130 }, { "epoch": 55.98, "learning_rate": 0.0001370920795102222, "loss": 0.0683, "step": 3135 }, { "epoch": 56.07, "learning_rate": 0.00013548005477567333, "loss": 0.0734, "step": 3140 }, { "epoch": 56.16, "learning_rate": 0.00012864032674294044, "loss": 0.0734, "step": 3145 }, { "epoch": 56.25, "learning_rate": 0.00011710752518939762, "loss": 0.0714, "step": 3150 }, { "epoch": 56.34, "learning_rate": 0.00010178311565541898, "loss": 0.0763, "step": 3155 }, { "epoch": 56.43, "learning_rate": 8.38649360694034e-05, "loss": 0.076, "step": 3160 }, { "epoch": 56.52, "learning_rate": 6.47535673195277e-05, "loss": 0.065, "step": 3165 }, { "epoch": 56.61, "learning_rate": 4.594285634987719e-05, "loss": 0.0772, "step": 3170 }, { "epoch": 56.7, "learning_rate": 2.8903149082065792e-05, "loss": 0.0719, "step": 3175 }, { "epoch": 56.79, "learning_rate": 1.4966360302693987e-05, "loss": 0.0661, "step": 3180 }, { "epoch": 56.88, "learning_rate": 5.2218640697257835e-06, "loss": 0.0754, "step": 3185 }, { "epoch": 56.96, "learning_rate": 4.313424013236564e-07, "loss": 0.0747, "step": 3190 }, { "epoch": 57.05, "learning_rate": 9.69248129649064e-07, "loss": 0.067, "step": 3195 }, { "epoch": 57.14, "learning_rate": 6.793535661893841e-06, "loss": 0.0616, "step": 3200 }, { "epoch": 57.23, "learning_rate": 1.744894748901401e-05, "loss": 0.0606, "step": 3205 }, { "epoch": 57.32, "learning_rate": 3.2102599551047865e-05, "loss": 0.0688, "step": 3210 }, { "epoch": 57.41, "learning_rate": 4.9609083916254546e-05, "loss": 0.0625, "step": 3215 }, { "epoch": 57.5, "learning_rate": 6.860000000000037e-05, "loss": 0.0574, "step": 3220 }, { "epoch": 57.59, "learning_rate": 8.75909160837443e-05, "loss": 0.0555, "step": 3225 }, { "epoch": 57.68, "learning_rate": 0.00010509740044895277, "loss": 0.0597, "step": 3230 }, { "epoch": 57.77, "learning_rate": 0.00011975105251098518, "loss": 0.0566, "step": 3235 }, { "epoch": 57.86, "learning_rate": 0.00013040646433810563, "loss": 0.0627, "step": 3240 }, { "epoch": 57.95, "learning_rate": 0.00013623075187035074, "loss": 0.0736, "step": 3245 }, { "epoch": 58.04, "learning_rate": 0.00013676865759867647, "loss": 0.07, "step": 3250 }, { "epoch": 58.12, "learning_rate": 0.00013197813593027467, "loss": 0.0627, "step": 3255 }, { "epoch": 58.21, "learning_rate": 0.00012223363969730676, "loss": 0.0709, "step": 3260 }, { "epoch": 58.3, "learning_rate": 0.0001082968509179352, "loss": 0.0699, "step": 3265 }, { "epoch": 58.39, "learning_rate": 9.125714365012395e-05, "loss": 0.0791, "step": 3270 }, { "epoch": 58.48, "learning_rate": 7.244643268047157e-05, "loss": 0.0732, "step": 3275 }, { "epoch": 58.57, "learning_rate": 5.333506393059779e-05, "loss": 0.0646, "step": 3280 }, { "epoch": 58.66, "learning_rate": 3.541688434458037e-05, "loss": 0.0675, "step": 3285 }, { "epoch": 58.75, "learning_rate": 2.0092474810603233e-05, "loss": 0.0586, "step": 3290 }, { "epoch": 58.84, "learning_rate": 8.559673257059193e-06, "loss": 0.0643, "step": 3295 }, { "epoch": 58.93, "learning_rate": 1.7199452243269301e-06, "loss": 0.0693, "step": 3300 }, { "epoch": 59.02, "learning_rate": 1.0792048977784186e-07, "loss": 0.0652, "step": 3305 }, { "epoch": 59.11, "learning_rate": 3.849603540845436e-06, "loss": 0.058, "step": 3310 }, { "epoch": 59.2, "learning_rate": 1.2652524389394478e-05, "loss": 0.0575, "step": 3315 }, { "epoch": 59.29, "learning_rate": 2.5828599592489917e-05, "loss": 0.0572, "step": 3320 }, { "epoch": 59.38, "learning_rate": 4.234791653975481e-05, "loss": 0.0532, "step": 3325 }, { "epoch": 59.46, "learning_rate": 6.0919236939312284e-05, "loss": 0.057, "step": 3330 }, { "epoch": 59.55, "learning_rate": 8.00909269187053e-05, "loss": 0.0541, "step": 3335 }, { "epoch": 59.64, "learning_rate": 9.836442450346415e-05, "loss": 0.0572, "step": 3340 }, { "epoch": 59.73, "learning_rate": 0.00011431137524750662, "loss": 0.0581, "step": 3345 }, { "epoch": 59.82, "learning_rate": 0.00012668528006705928, "loss": 0.0531, "step": 3350 }, { "epoch": 59.91, "learning_rate": 0.00013451892828543363, "loss": 0.0639, "step": 3355 }, { "epoch": 60.0, "learning_rate": 0.0001372, "loss": 0.0604, "step": 3360 } ], "max_steps": 3360, "num_train_epochs": 60, "total_flos": 3472571105280000.0, "trial_name": null, "trial_params": null }