{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.99943704259711, "eval_steps": 500, "global_step": 11988, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.6680567139282736e-08, "loss": 1.5794, "step": 1 }, { "epoch": 0.0, "learning_rate": 8.340283569641369e-08, "loss": 1.6302, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.6680567139282737e-07, "loss": 1.5803, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.5020850708924106e-07, "loss": 1.6049, "step": 15 }, { "epoch": 0.01, "learning_rate": 3.3361134278565475e-07, "loss": 1.6456, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.170141784820684e-07, "loss": 1.6027, "step": 25 }, { "epoch": 0.01, "learning_rate": 5.004170141784821e-07, "loss": 1.5919, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.838198498748958e-07, "loss": 1.5533, "step": 35 }, { "epoch": 0.01, "learning_rate": 6.672226855713095e-07, "loss": 1.5715, "step": 40 }, { "epoch": 0.01, "learning_rate": 7.506255212677231e-07, "loss": 1.5834, "step": 45 }, { "epoch": 0.01, "learning_rate": 8.340283569641368e-07, "loss": 1.6214, "step": 50 }, { "epoch": 0.01, "learning_rate": 9.174311926605506e-07, "loss": 1.5622, "step": 55 }, { "epoch": 0.02, "learning_rate": 1.0008340283569642e-06, "loss": 1.5693, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.084236864053378e-06, "loss": 1.5967, "step": 65 }, { "epoch": 0.02, "learning_rate": 1.1676396997497916e-06, "loss": 1.6005, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.2510425354462053e-06, "loss": 1.5626, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.334445371142619e-06, "loss": 1.5255, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.4178482068390325e-06, "loss": 1.5522, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.5012510425354462e-06, "loss": 1.5442, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.5846538782318598e-06, "loss": 1.5968, "step": 95 }, { "epoch": 0.03, "learning_rate": 1.6680567139282735e-06, "loss": 1.5844, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.7514595496246874e-06, "loss": 1.5577, "step": 105 }, { "epoch": 0.03, "learning_rate": 1.8348623853211011e-06, "loss": 1.551, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.918265221017515e-06, "loss": 1.4929, "step": 115 }, { "epoch": 0.03, "learning_rate": 2.0016680567139285e-06, "loss": 1.5102, "step": 120 }, { "epoch": 0.03, "learning_rate": 2.085070892410342e-06, "loss": 1.5341, "step": 125 }, { "epoch": 0.03, "learning_rate": 2.168473728106756e-06, "loss": 1.4989, "step": 130 }, { "epoch": 0.03, "learning_rate": 2.2518765638031695e-06, "loss": 1.4657, "step": 135 }, { "epoch": 0.04, "learning_rate": 2.3352793994995832e-06, "loss": 1.4592, "step": 140 }, { "epoch": 0.04, "learning_rate": 2.418682235195997e-06, "loss": 1.4634, "step": 145 }, { "epoch": 0.04, "learning_rate": 2.5020850708924106e-06, "loss": 1.4714, "step": 150 }, { "epoch": 0.04, "learning_rate": 2.5854879065888243e-06, "loss": 1.4757, "step": 155 }, { "epoch": 0.04, "learning_rate": 2.668890742285238e-06, "loss": 1.4458, "step": 160 }, { "epoch": 0.04, "learning_rate": 2.7522935779816517e-06, "loss": 1.4174, "step": 165 }, { "epoch": 0.04, "learning_rate": 2.835696413678065e-06, "loss": 1.4536, "step": 170 }, { "epoch": 0.04, "learning_rate": 2.919099249374479e-06, "loss": 1.4057, "step": 175 }, { "epoch": 0.05, "learning_rate": 3.0025020850708923e-06, "loss": 1.4007, "step": 180 }, { "epoch": 0.05, "learning_rate": 3.0859049207673064e-06, "loss": 1.395, "step": 185 }, { "epoch": 0.05, "learning_rate": 3.1693077564637197e-06, "loss": 1.3612, "step": 190 }, { "epoch": 0.05, "learning_rate": 3.252710592160134e-06, "loss": 1.4045, "step": 195 }, { "epoch": 0.05, "learning_rate": 3.336113427856547e-06, "loss": 1.3799, "step": 200 }, { "epoch": 0.05, "learning_rate": 3.419516263552961e-06, "loss": 1.375, "step": 205 }, { "epoch": 0.05, "learning_rate": 3.502919099249375e-06, "loss": 1.4005, "step": 210 }, { "epoch": 0.05, "learning_rate": 3.5863219349457885e-06, "loss": 1.3637, "step": 215 }, { "epoch": 0.06, "learning_rate": 3.6697247706422022e-06, "loss": 1.3975, "step": 220 }, { "epoch": 0.06, "learning_rate": 3.753127606338616e-06, "loss": 1.4067, "step": 225 }, { "epoch": 0.06, "learning_rate": 3.83653044203503e-06, "loss": 1.3825, "step": 230 }, { "epoch": 0.06, "learning_rate": 3.919933277731443e-06, "loss": 1.3479, "step": 235 }, { "epoch": 0.06, "learning_rate": 4.003336113427857e-06, "loss": 1.3161, "step": 240 }, { "epoch": 0.06, "learning_rate": 4.086738949124271e-06, "loss": 1.3312, "step": 245 }, { "epoch": 0.06, "learning_rate": 4.170141784820684e-06, "loss": 1.3528, "step": 250 }, { "epoch": 0.06, "learning_rate": 4.253544620517098e-06, "loss": 1.3515, "step": 255 }, { "epoch": 0.07, "learning_rate": 4.336947456213512e-06, "loss": 1.3219, "step": 260 }, { "epoch": 0.07, "learning_rate": 4.420350291909925e-06, "loss": 1.3382, "step": 265 }, { "epoch": 0.07, "learning_rate": 4.503753127606339e-06, "loss": 1.2932, "step": 270 }, { "epoch": 0.07, "learning_rate": 4.587155963302753e-06, "loss": 1.3276, "step": 275 }, { "epoch": 0.07, "learning_rate": 4.6705587989991665e-06, "loss": 1.296, "step": 280 }, { "epoch": 0.07, "learning_rate": 4.75396163469558e-06, "loss": 1.3689, "step": 285 }, { "epoch": 0.07, "learning_rate": 4.837364470391994e-06, "loss": 1.3019, "step": 290 }, { "epoch": 0.07, "learning_rate": 4.9207673060884075e-06, "loss": 1.3385, "step": 295 }, { "epoch": 0.08, "learning_rate": 5.004170141784821e-06, "loss": 1.3242, "step": 300 }, { "epoch": 0.08, "learning_rate": 5.087572977481234e-06, "loss": 1.292, "step": 305 }, { "epoch": 0.08, "learning_rate": 5.170975813177649e-06, "loss": 1.2686, "step": 310 }, { "epoch": 0.08, "learning_rate": 5.254378648874062e-06, "loss": 1.3063, "step": 315 }, { "epoch": 0.08, "learning_rate": 5.337781484570476e-06, "loss": 1.2968, "step": 320 }, { "epoch": 0.08, "learning_rate": 5.421184320266889e-06, "loss": 1.3175, "step": 325 }, { "epoch": 0.08, "learning_rate": 5.504587155963303e-06, "loss": 1.3273, "step": 330 }, { "epoch": 0.08, "learning_rate": 5.587989991659717e-06, "loss": 1.2955, "step": 335 }, { "epoch": 0.09, "learning_rate": 5.67139282735613e-06, "loss": 1.2991, "step": 340 }, { "epoch": 0.09, "learning_rate": 5.754795663052544e-06, "loss": 1.324, "step": 345 }, { "epoch": 0.09, "learning_rate": 5.838198498748958e-06, "loss": 1.3077, "step": 350 }, { "epoch": 0.09, "learning_rate": 5.921601334445372e-06, "loss": 1.263, "step": 355 }, { "epoch": 0.09, "learning_rate": 6.005004170141785e-06, "loss": 1.3247, "step": 360 }, { "epoch": 0.09, "learning_rate": 6.088407005838199e-06, "loss": 1.2586, "step": 365 }, { "epoch": 0.09, "learning_rate": 6.171809841534613e-06, "loss": 1.3098, "step": 370 }, { "epoch": 0.09, "learning_rate": 6.2552126772310265e-06, "loss": 1.2605, "step": 375 }, { "epoch": 0.1, "learning_rate": 6.338615512927439e-06, "loss": 1.3087, "step": 380 }, { "epoch": 0.1, "learning_rate": 6.422018348623854e-06, "loss": 1.3049, "step": 385 }, { "epoch": 0.1, "learning_rate": 6.505421184320268e-06, "loss": 1.3244, "step": 390 }, { "epoch": 0.1, "learning_rate": 6.588824020016681e-06, "loss": 1.2739, "step": 395 }, { "epoch": 0.1, "learning_rate": 6.672226855713094e-06, "loss": 1.3107, "step": 400 }, { "epoch": 0.1, "learning_rate": 6.755629691409509e-06, "loss": 1.274, "step": 405 }, { "epoch": 0.1, "learning_rate": 6.839032527105922e-06, "loss": 1.3037, "step": 410 }, { "epoch": 0.1, "learning_rate": 6.922435362802335e-06, "loss": 1.2839, "step": 415 }, { "epoch": 0.11, "learning_rate": 7.00583819849875e-06, "loss": 1.2556, "step": 420 }, { "epoch": 0.11, "learning_rate": 7.089241034195163e-06, "loss": 1.3045, "step": 425 }, { "epoch": 0.11, "learning_rate": 7.172643869891577e-06, "loss": 1.312, "step": 430 }, { "epoch": 0.11, "learning_rate": 7.25604670558799e-06, "loss": 1.2672, "step": 435 }, { "epoch": 0.11, "learning_rate": 7.3394495412844045e-06, "loss": 1.2706, "step": 440 }, { "epoch": 0.11, "learning_rate": 7.422852376980818e-06, "loss": 1.284, "step": 445 }, { "epoch": 0.11, "learning_rate": 7.506255212677232e-06, "loss": 1.307, "step": 450 }, { "epoch": 0.11, "learning_rate": 7.589658048373645e-06, "loss": 1.2843, "step": 455 }, { "epoch": 0.12, "learning_rate": 7.67306088407006e-06, "loss": 1.2414, "step": 460 }, { "epoch": 0.12, "learning_rate": 7.756463719766472e-06, "loss": 1.2559, "step": 465 }, { "epoch": 0.12, "learning_rate": 7.839866555462887e-06, "loss": 1.2834, "step": 470 }, { "epoch": 0.12, "learning_rate": 7.923269391159301e-06, "loss": 1.2546, "step": 475 }, { "epoch": 0.12, "learning_rate": 8.006672226855714e-06, "loss": 1.2882, "step": 480 }, { "epoch": 0.12, "learning_rate": 8.090075062552127e-06, "loss": 1.277, "step": 485 }, { "epoch": 0.12, "learning_rate": 8.173477898248541e-06, "loss": 1.2793, "step": 490 }, { "epoch": 0.12, "learning_rate": 8.256880733944956e-06, "loss": 1.283, "step": 495 }, { "epoch": 0.13, "learning_rate": 8.340283569641369e-06, "loss": 1.2517, "step": 500 }, { "epoch": 0.13, "learning_rate": 8.423686405337782e-06, "loss": 1.2271, "step": 505 }, { "epoch": 0.13, "learning_rate": 8.507089241034196e-06, "loss": 1.2878, "step": 510 }, { "epoch": 0.13, "learning_rate": 8.59049207673061e-06, "loss": 1.2581, "step": 515 }, { "epoch": 0.13, "learning_rate": 8.673894912427023e-06, "loss": 1.2431, "step": 520 }, { "epoch": 0.13, "learning_rate": 8.757297748123436e-06, "loss": 1.2796, "step": 525 }, { "epoch": 0.13, "learning_rate": 8.84070058381985e-06, "loss": 1.2476, "step": 530 }, { "epoch": 0.13, "learning_rate": 8.924103419516265e-06, "loss": 1.3065, "step": 535 }, { "epoch": 0.14, "learning_rate": 9.007506255212678e-06, "loss": 1.2921, "step": 540 }, { "epoch": 0.14, "learning_rate": 9.090909090909091e-06, "loss": 1.2653, "step": 545 }, { "epoch": 0.14, "learning_rate": 9.174311926605506e-06, "loss": 1.257, "step": 550 }, { "epoch": 0.14, "learning_rate": 9.257714762301918e-06, "loss": 1.2876, "step": 555 }, { "epoch": 0.14, "learning_rate": 9.341117597998333e-06, "loss": 1.2991, "step": 560 }, { "epoch": 0.14, "learning_rate": 9.424520433694746e-06, "loss": 1.2091, "step": 565 }, { "epoch": 0.14, "learning_rate": 9.50792326939116e-06, "loss": 1.2401, "step": 570 }, { "epoch": 0.14, "learning_rate": 9.591326105087573e-06, "loss": 1.2866, "step": 575 }, { "epoch": 0.15, "learning_rate": 9.674728940783988e-06, "loss": 1.2962, "step": 580 }, { "epoch": 0.15, "learning_rate": 9.7581317764804e-06, "loss": 1.2282, "step": 585 }, { "epoch": 0.15, "learning_rate": 9.841534612176815e-06, "loss": 1.2257, "step": 590 }, { "epoch": 0.15, "learning_rate": 9.924937447873228e-06, "loss": 1.246, "step": 595 }, { "epoch": 0.15, "learning_rate": 1.0008340283569642e-05, "loss": 1.2714, "step": 600 }, { "epoch": 0.15, "learning_rate": 1.0091743119266055e-05, "loss": 1.2128, "step": 605 }, { "epoch": 0.15, "learning_rate": 1.0175145954962468e-05, "loss": 1.2687, "step": 610 }, { "epoch": 0.15, "learning_rate": 1.0258548790658884e-05, "loss": 1.2261, "step": 615 }, { "epoch": 0.16, "learning_rate": 1.0341951626355297e-05, "loss": 1.2628, "step": 620 }, { "epoch": 0.16, "learning_rate": 1.0425354462051712e-05, "loss": 1.2711, "step": 625 }, { "epoch": 0.16, "learning_rate": 1.0508757297748125e-05, "loss": 1.2513, "step": 630 }, { "epoch": 0.16, "learning_rate": 1.0592160133444537e-05, "loss": 1.2849, "step": 635 }, { "epoch": 0.16, "learning_rate": 1.0675562969140952e-05, "loss": 1.2722, "step": 640 }, { "epoch": 0.16, "learning_rate": 1.0758965804837365e-05, "loss": 1.2653, "step": 645 }, { "epoch": 0.16, "learning_rate": 1.0842368640533778e-05, "loss": 1.2662, "step": 650 }, { "epoch": 0.16, "learning_rate": 1.0925771476230194e-05, "loss": 1.2725, "step": 655 }, { "epoch": 0.17, "learning_rate": 1.1009174311926607e-05, "loss": 1.2622, "step": 660 }, { "epoch": 0.17, "learning_rate": 1.1092577147623021e-05, "loss": 1.2297, "step": 665 }, { "epoch": 0.17, "learning_rate": 1.1175979983319434e-05, "loss": 1.2577, "step": 670 }, { "epoch": 0.17, "learning_rate": 1.1259382819015847e-05, "loss": 1.2539, "step": 675 }, { "epoch": 0.17, "learning_rate": 1.134278565471226e-05, "loss": 1.2798, "step": 680 }, { "epoch": 0.17, "learning_rate": 1.1426188490408674e-05, "loss": 1.2469, "step": 685 }, { "epoch": 0.17, "learning_rate": 1.1509591326105089e-05, "loss": 1.2443, "step": 690 }, { "epoch": 0.17, "learning_rate": 1.1592994161801503e-05, "loss": 1.2484, "step": 695 }, { "epoch": 0.18, "learning_rate": 1.1676396997497916e-05, "loss": 1.2727, "step": 700 }, { "epoch": 0.18, "learning_rate": 1.1759799833194329e-05, "loss": 1.2523, "step": 705 }, { "epoch": 0.18, "learning_rate": 1.1843202668890744e-05, "loss": 1.2152, "step": 710 }, { "epoch": 0.18, "learning_rate": 1.1926605504587156e-05, "loss": 1.2479, "step": 715 }, { "epoch": 0.18, "learning_rate": 1.201000834028357e-05, "loss": 1.2382, "step": 720 }, { "epoch": 0.18, "learning_rate": 1.2093411175979984e-05, "loss": 1.2411, "step": 725 }, { "epoch": 0.18, "learning_rate": 1.2176814011676398e-05, "loss": 1.242, "step": 730 }, { "epoch": 0.18, "learning_rate": 1.2260216847372813e-05, "loss": 1.2853, "step": 735 }, { "epoch": 0.19, "learning_rate": 1.2343619683069226e-05, "loss": 1.2433, "step": 740 }, { "epoch": 0.19, "learning_rate": 1.2427022518765639e-05, "loss": 1.2487, "step": 745 }, { "epoch": 0.19, "learning_rate": 1.2510425354462053e-05, "loss": 1.2814, "step": 750 }, { "epoch": 0.19, "learning_rate": 1.2593828190158466e-05, "loss": 1.2637, "step": 755 }, { "epoch": 0.19, "learning_rate": 1.2677231025854879e-05, "loss": 1.2466, "step": 760 }, { "epoch": 0.19, "learning_rate": 1.2760633861551295e-05, "loss": 1.2418, "step": 765 }, { "epoch": 0.19, "learning_rate": 1.2844036697247708e-05, "loss": 1.2388, "step": 770 }, { "epoch": 0.19, "learning_rate": 1.2927439532944122e-05, "loss": 1.2694, "step": 775 }, { "epoch": 0.2, "learning_rate": 1.3010842368640535e-05, "loss": 1.2334, "step": 780 }, { "epoch": 0.2, "learning_rate": 1.3094245204336948e-05, "loss": 1.2626, "step": 785 }, { "epoch": 0.2, "learning_rate": 1.3177648040033363e-05, "loss": 1.2359, "step": 790 }, { "epoch": 0.2, "learning_rate": 1.3261050875729775e-05, "loss": 1.2345, "step": 795 }, { "epoch": 0.2, "learning_rate": 1.3344453711426188e-05, "loss": 1.2468, "step": 800 }, { "epoch": 0.2, "learning_rate": 1.3427856547122604e-05, "loss": 1.2172, "step": 805 }, { "epoch": 0.2, "learning_rate": 1.3511259382819017e-05, "loss": 1.2613, "step": 810 }, { "epoch": 0.2, "learning_rate": 1.359466221851543e-05, "loss": 1.2485, "step": 815 }, { "epoch": 0.21, "learning_rate": 1.3678065054211845e-05, "loss": 1.2679, "step": 820 }, { "epoch": 0.21, "learning_rate": 1.3761467889908258e-05, "loss": 1.2337, "step": 825 }, { "epoch": 0.21, "learning_rate": 1.384487072560467e-05, "loss": 1.2615, "step": 830 }, { "epoch": 0.21, "learning_rate": 1.3928273561301085e-05, "loss": 1.2498, "step": 835 }, { "epoch": 0.21, "learning_rate": 1.40116763969975e-05, "loss": 1.256, "step": 840 }, { "epoch": 0.21, "learning_rate": 1.4095079232693914e-05, "loss": 1.2859, "step": 845 }, { "epoch": 0.21, "learning_rate": 1.4178482068390327e-05, "loss": 1.2939, "step": 850 }, { "epoch": 0.21, "learning_rate": 1.426188490408674e-05, "loss": 1.2304, "step": 855 }, { "epoch": 0.22, "learning_rate": 1.4345287739783154e-05, "loss": 1.2289, "step": 860 }, { "epoch": 0.22, "learning_rate": 1.4428690575479567e-05, "loss": 1.2199, "step": 865 }, { "epoch": 0.22, "learning_rate": 1.451209341117598e-05, "loss": 1.2682, "step": 870 }, { "epoch": 0.22, "learning_rate": 1.4595496246872394e-05, "loss": 1.2405, "step": 875 }, { "epoch": 0.22, "learning_rate": 1.4678899082568809e-05, "loss": 1.2202, "step": 880 }, { "epoch": 0.22, "learning_rate": 1.4762301918265223e-05, "loss": 1.2261, "step": 885 }, { "epoch": 0.22, "learning_rate": 1.4845704753961636e-05, "loss": 1.2549, "step": 890 }, { "epoch": 0.22, "learning_rate": 1.4929107589658049e-05, "loss": 1.2336, "step": 895 }, { "epoch": 0.23, "learning_rate": 1.5012510425354464e-05, "loss": 1.2415, "step": 900 }, { "epoch": 0.23, "learning_rate": 1.5095913261050877e-05, "loss": 1.2268, "step": 905 }, { "epoch": 0.23, "learning_rate": 1.517931609674729e-05, "loss": 1.2437, "step": 910 }, { "epoch": 0.23, "learning_rate": 1.5262718932443706e-05, "loss": 1.2189, "step": 915 }, { "epoch": 0.23, "learning_rate": 1.534612176814012e-05, "loss": 1.2188, "step": 920 }, { "epoch": 0.23, "learning_rate": 1.542952460383653e-05, "loss": 1.2668, "step": 925 }, { "epoch": 0.23, "learning_rate": 1.5512927439532944e-05, "loss": 1.2171, "step": 930 }, { "epoch": 0.23, "learning_rate": 1.559633027522936e-05, "loss": 1.2774, "step": 935 }, { "epoch": 0.24, "learning_rate": 1.5679733110925773e-05, "loss": 1.2533, "step": 940 }, { "epoch": 0.24, "learning_rate": 1.5763135946622186e-05, "loss": 1.2429, "step": 945 }, { "epoch": 0.24, "learning_rate": 1.5846538782318602e-05, "loss": 1.2213, "step": 950 }, { "epoch": 0.24, "learning_rate": 1.5929941618015015e-05, "loss": 1.2406, "step": 955 }, { "epoch": 0.24, "learning_rate": 1.6013344453711428e-05, "loss": 1.2353, "step": 960 }, { "epoch": 0.24, "learning_rate": 1.609674728940784e-05, "loss": 1.2483, "step": 965 }, { "epoch": 0.24, "learning_rate": 1.6180150125104254e-05, "loss": 1.2302, "step": 970 }, { "epoch": 0.24, "learning_rate": 1.626355296080067e-05, "loss": 1.2601, "step": 975 }, { "epoch": 0.25, "learning_rate": 1.6346955796497083e-05, "loss": 1.2705, "step": 980 }, { "epoch": 0.25, "learning_rate": 1.6430358632193495e-05, "loss": 1.257, "step": 985 }, { "epoch": 0.25, "learning_rate": 1.6513761467889912e-05, "loss": 1.2427, "step": 990 }, { "epoch": 0.25, "learning_rate": 1.6597164303586325e-05, "loss": 1.2647, "step": 995 }, { "epoch": 0.25, "learning_rate": 1.6680567139282737e-05, "loss": 1.2564, "step": 1000 }, { "epoch": 0.25, "learning_rate": 1.676396997497915e-05, "loss": 1.2585, "step": 1005 }, { "epoch": 0.25, "learning_rate": 1.6847372810675563e-05, "loss": 1.2295, "step": 1010 }, { "epoch": 0.25, "learning_rate": 1.6930775646371976e-05, "loss": 1.2153, "step": 1015 }, { "epoch": 0.26, "learning_rate": 1.7014178482068392e-05, "loss": 1.2389, "step": 1020 }, { "epoch": 0.26, "learning_rate": 1.7097581317764805e-05, "loss": 1.2583, "step": 1025 }, { "epoch": 0.26, "learning_rate": 1.718098415346122e-05, "loss": 1.2156, "step": 1030 }, { "epoch": 0.26, "learning_rate": 1.7264386989157634e-05, "loss": 1.2594, "step": 1035 }, { "epoch": 0.26, "learning_rate": 1.7347789824854047e-05, "loss": 1.2076, "step": 1040 }, { "epoch": 0.26, "learning_rate": 1.743119266055046e-05, "loss": 1.2261, "step": 1045 }, { "epoch": 0.26, "learning_rate": 1.7514595496246873e-05, "loss": 1.264, "step": 1050 }, { "epoch": 0.26, "learning_rate": 1.7597998331943285e-05, "loss": 1.2215, "step": 1055 }, { "epoch": 0.27, "learning_rate": 1.76814011676397e-05, "loss": 1.2392, "step": 1060 }, { "epoch": 0.27, "learning_rate": 1.7764804003336114e-05, "loss": 1.2899, "step": 1065 }, { "epoch": 0.27, "learning_rate": 1.784820683903253e-05, "loss": 1.2522, "step": 1070 }, { "epoch": 0.27, "learning_rate": 1.7931609674728944e-05, "loss": 1.2231, "step": 1075 }, { "epoch": 0.27, "learning_rate": 1.8015012510425356e-05, "loss": 1.2421, "step": 1080 }, { "epoch": 0.27, "learning_rate": 1.809841534612177e-05, "loss": 1.2435, "step": 1085 }, { "epoch": 0.27, "learning_rate": 1.8181818181818182e-05, "loss": 1.2508, "step": 1090 }, { "epoch": 0.27, "learning_rate": 1.8265221017514595e-05, "loss": 1.2094, "step": 1095 }, { "epoch": 0.28, "learning_rate": 1.834862385321101e-05, "loss": 1.2518, "step": 1100 }, { "epoch": 0.28, "learning_rate": 1.8432026688907424e-05, "loss": 1.22, "step": 1105 }, { "epoch": 0.28, "learning_rate": 1.8515429524603837e-05, "loss": 1.2842, "step": 1110 }, { "epoch": 0.28, "learning_rate": 1.8598832360300253e-05, "loss": 1.2219, "step": 1115 }, { "epoch": 0.28, "learning_rate": 1.8682235195996666e-05, "loss": 1.2477, "step": 1120 }, { "epoch": 0.28, "learning_rate": 1.876563803169308e-05, "loss": 1.2666, "step": 1125 }, { "epoch": 0.28, "learning_rate": 1.884904086738949e-05, "loss": 1.2305, "step": 1130 }, { "epoch": 0.28, "learning_rate": 1.8932443703085904e-05, "loss": 1.2361, "step": 1135 }, { "epoch": 0.29, "learning_rate": 1.901584653878232e-05, "loss": 1.2792, "step": 1140 }, { "epoch": 0.29, "learning_rate": 1.9099249374478733e-05, "loss": 1.243, "step": 1145 }, { "epoch": 0.29, "learning_rate": 1.9182652210175146e-05, "loss": 1.2817, "step": 1150 }, { "epoch": 0.29, "learning_rate": 1.9266055045871563e-05, "loss": 1.2483, "step": 1155 }, { "epoch": 0.29, "learning_rate": 1.9349457881567975e-05, "loss": 1.248, "step": 1160 }, { "epoch": 0.29, "learning_rate": 1.9432860717264388e-05, "loss": 1.2669, "step": 1165 }, { "epoch": 0.29, "learning_rate": 1.95162635529608e-05, "loss": 1.227, "step": 1170 }, { "epoch": 0.29, "learning_rate": 1.9599666388657217e-05, "loss": 1.1834, "step": 1175 }, { "epoch": 0.3, "learning_rate": 1.968306922435363e-05, "loss": 1.2384, "step": 1180 }, { "epoch": 0.3, "learning_rate": 1.9766472060050043e-05, "loss": 1.216, "step": 1185 }, { "epoch": 0.3, "learning_rate": 1.9849874895746456e-05, "loss": 1.2611, "step": 1190 }, { "epoch": 0.3, "learning_rate": 1.9933277731442872e-05, "loss": 1.2651, "step": 1195 }, { "epoch": 0.3, "learning_rate": 1.9999999576057108e-05, "loss": 1.2429, "step": 1200 }, { "epoch": 0.3, "learning_rate": 1.999998473805958e-05, "loss": 1.2281, "step": 1205 }, { "epoch": 0.3, "learning_rate": 1.9999948702953286e-05, "loss": 1.2642, "step": 1210 }, { "epoch": 0.3, "learning_rate": 1.9999891470814604e-05, "loss": 1.2357, "step": 1215 }, { "epoch": 0.31, "learning_rate": 1.9999813041764854e-05, "loss": 1.2365, "step": 1220 }, { "epoch": 0.31, "learning_rate": 1.9999713415970277e-05, "loss": 1.2494, "step": 1225 }, { "epoch": 0.31, "learning_rate": 1.999959259364206e-05, "loss": 1.2132, "step": 1230 }, { "epoch": 0.31, "learning_rate": 1.9999450575036306e-05, "loss": 1.2248, "step": 1235 }, { "epoch": 0.31, "learning_rate": 1.999928736045406e-05, "loss": 1.2604, "step": 1240 }, { "epoch": 0.31, "learning_rate": 1.9999102950241278e-05, "loss": 1.2113, "step": 1245 }, { "epoch": 0.31, "learning_rate": 1.999889734478887e-05, "loss": 1.231, "step": 1250 }, { "epoch": 0.31, "learning_rate": 1.9998670544532654e-05, "loss": 1.2155, "step": 1255 }, { "epoch": 0.32, "learning_rate": 1.999842254995338e-05, "loss": 1.2456, "step": 1260 }, { "epoch": 0.32, "learning_rate": 1.999815336157673e-05, "loss": 1.2034, "step": 1265 }, { "epoch": 0.32, "learning_rate": 1.9997862979973308e-05, "loss": 1.2483, "step": 1270 }, { "epoch": 0.32, "learning_rate": 1.9997551405758634e-05, "loss": 1.2034, "step": 1275 }, { "epoch": 0.32, "learning_rate": 1.999721863959316e-05, "loss": 1.232, "step": 1280 }, { "epoch": 0.32, "learning_rate": 1.9996864682182253e-05, "loss": 1.2475, "step": 1285 }, { "epoch": 0.32, "learning_rate": 1.9996489534276207e-05, "loss": 1.2395, "step": 1290 }, { "epoch": 0.32, "learning_rate": 1.999609319667022e-05, "loss": 1.2213, "step": 1295 }, { "epoch": 0.33, "learning_rate": 1.999567567020442e-05, "loss": 1.1962, "step": 1300 }, { "epoch": 0.33, "learning_rate": 1.9995236955763842e-05, "loss": 1.2385, "step": 1305 }, { "epoch": 0.33, "learning_rate": 1.9994777054278435e-05, "loss": 1.253, "step": 1310 }, { "epoch": 0.33, "learning_rate": 1.9994295966723062e-05, "loss": 1.2265, "step": 1315 }, { "epoch": 0.33, "learning_rate": 1.999379369411749e-05, "loss": 1.2343, "step": 1320 }, { "epoch": 0.33, "learning_rate": 1.9993270237526384e-05, "loss": 1.2356, "step": 1325 }, { "epoch": 0.33, "learning_rate": 1.9992725598059333e-05, "loss": 1.2971, "step": 1330 }, { "epoch": 0.33, "learning_rate": 1.9992159776870815e-05, "loss": 1.2501, "step": 1335 }, { "epoch": 0.34, "learning_rate": 1.999157277516021e-05, "loss": 1.2352, "step": 1340 }, { "epoch": 0.34, "learning_rate": 1.999096459417179e-05, "loss": 1.2674, "step": 1345 }, { "epoch": 0.34, "learning_rate": 1.9990335235194727e-05, "loss": 1.2292, "step": 1350 }, { "epoch": 0.34, "learning_rate": 1.998968469956308e-05, "loss": 1.2395, "step": 1355 }, { "epoch": 0.34, "learning_rate": 1.99890129886558e-05, "loss": 1.281, "step": 1360 }, { "epoch": 0.34, "learning_rate": 1.9988320103896727e-05, "loss": 1.2313, "step": 1365 }, { "epoch": 0.34, "learning_rate": 1.998760604675457e-05, "loss": 1.2341, "step": 1370 }, { "epoch": 0.34, "learning_rate": 1.9986870818742932e-05, "loss": 1.2367, "step": 1375 }, { "epoch": 0.35, "learning_rate": 1.9986114421420284e-05, "loss": 1.2316, "step": 1380 }, { "epoch": 0.35, "learning_rate": 1.998533685638997e-05, "loss": 1.2585, "step": 1385 }, { "epoch": 0.35, "learning_rate": 1.998453812530021e-05, "loss": 1.2089, "step": 1390 }, { "epoch": 0.35, "learning_rate": 1.9983718229844083e-05, "loss": 1.2296, "step": 1395 }, { "epoch": 0.35, "learning_rate": 1.9982877171759534e-05, "loss": 1.2558, "step": 1400 }, { "epoch": 0.35, "learning_rate": 1.9982014952829366e-05, "loss": 1.2609, "step": 1405 }, { "epoch": 0.35, "learning_rate": 1.9981131574881233e-05, "loss": 1.2533, "step": 1410 }, { "epoch": 0.35, "learning_rate": 1.998022703978765e-05, "loss": 1.2486, "step": 1415 }, { "epoch": 0.36, "learning_rate": 1.9979301349465966e-05, "loss": 1.2397, "step": 1420 }, { "epoch": 0.36, "learning_rate": 1.9978354505878382e-05, "loss": 1.2433, "step": 1425 }, { "epoch": 0.36, "learning_rate": 1.9977386511031943e-05, "loss": 1.2011, "step": 1430 }, { "epoch": 0.36, "learning_rate": 1.9976397366978508e-05, "loss": 1.2004, "step": 1435 }, { "epoch": 0.36, "learning_rate": 1.997538707581479e-05, "loss": 1.2433, "step": 1440 }, { "epoch": 0.36, "learning_rate": 1.9974355639682317e-05, "loss": 1.2627, "step": 1445 }, { "epoch": 0.36, "learning_rate": 1.997330306076743e-05, "loss": 1.2574, "step": 1450 }, { "epoch": 0.36, "learning_rate": 1.9972229341301305e-05, "loss": 1.2648, "step": 1455 }, { "epoch": 0.37, "learning_rate": 1.9971134483559918e-05, "loss": 1.2643, "step": 1460 }, { "epoch": 0.37, "learning_rate": 1.997001848986405e-05, "loss": 1.1965, "step": 1465 }, { "epoch": 0.37, "learning_rate": 1.9968881362579293e-05, "loss": 1.247, "step": 1470 }, { "epoch": 0.37, "learning_rate": 1.996772310411603e-05, "loss": 1.2171, "step": 1475 }, { "epoch": 0.37, "learning_rate": 1.996654371692944e-05, "loss": 1.2069, "step": 1480 }, { "epoch": 0.37, "learning_rate": 1.9965343203519484e-05, "loss": 1.2287, "step": 1485 }, { "epoch": 0.37, "learning_rate": 1.9964121566430907e-05, "loss": 1.2397, "step": 1490 }, { "epoch": 0.37, "learning_rate": 1.996287880825323e-05, "loss": 1.2209, "step": 1495 }, { "epoch": 0.38, "learning_rate": 1.9961614931620748e-05, "loss": 1.2728, "step": 1500 }, { "epoch": 0.38, "learning_rate": 1.9960329939212516e-05, "loss": 1.274, "step": 1505 }, { "epoch": 0.38, "learning_rate": 1.995902383375235e-05, "loss": 1.2206, "step": 1510 }, { "epoch": 0.38, "learning_rate": 1.9957696618008824e-05, "loss": 1.2284, "step": 1515 }, { "epoch": 0.38, "learning_rate": 1.995634829479525e-05, "loss": 1.2286, "step": 1520 }, { "epoch": 0.38, "learning_rate": 1.9954978866969695e-05, "loss": 1.2909, "step": 1525 }, { "epoch": 0.38, "learning_rate": 1.9953588337434947e-05, "loss": 1.2531, "step": 1530 }, { "epoch": 0.38, "learning_rate": 1.9952176709138538e-05, "loss": 1.2369, "step": 1535 }, { "epoch": 0.39, "learning_rate": 1.995074398507271e-05, "loss": 1.1946, "step": 1540 }, { "epoch": 0.39, "learning_rate": 1.9949290168274437e-05, "loss": 1.1781, "step": 1545 }, { "epoch": 0.39, "learning_rate": 1.9947815261825382e-05, "loss": 1.2127, "step": 1550 }, { "epoch": 0.39, "learning_rate": 1.9946319268851938e-05, "loss": 1.2108, "step": 1555 }, { "epoch": 0.39, "learning_rate": 1.9944802192525176e-05, "loss": 1.2077, "step": 1560 }, { "epoch": 0.39, "learning_rate": 1.994326403606086e-05, "loss": 1.2504, "step": 1565 }, { "epoch": 0.39, "learning_rate": 1.994170480271945e-05, "loss": 1.1973, "step": 1570 }, { "epoch": 0.39, "learning_rate": 1.994012449580607e-05, "loss": 1.2734, "step": 1575 }, { "epoch": 0.4, "learning_rate": 1.9938523118670524e-05, "loss": 1.2345, "step": 1580 }, { "epoch": 0.4, "learning_rate": 1.9936900674707268e-05, "loss": 1.2033, "step": 1585 }, { "epoch": 0.4, "learning_rate": 1.9935257167355426e-05, "loss": 1.2398, "step": 1590 }, { "epoch": 0.4, "learning_rate": 1.9933592600098753e-05, "loss": 1.2644, "step": 1595 }, { "epoch": 0.4, "learning_rate": 1.9931906976465668e-05, "loss": 1.2478, "step": 1600 }, { "epoch": 0.4, "learning_rate": 1.9930200300029205e-05, "loss": 1.2749, "step": 1605 }, { "epoch": 0.4, "learning_rate": 1.9928472574407025e-05, "loss": 1.215, "step": 1610 }, { "epoch": 0.4, "learning_rate": 1.992672380326142e-05, "loss": 1.2136, "step": 1615 }, { "epoch": 0.41, "learning_rate": 1.9924953990299285e-05, "loss": 1.2478, "step": 1620 }, { "epoch": 0.41, "learning_rate": 1.9923163139272113e-05, "loss": 1.229, "step": 1625 }, { "epoch": 0.41, "learning_rate": 1.9921351253976004e-05, "loss": 1.2486, "step": 1630 }, { "epoch": 0.41, "learning_rate": 1.9919518338251624e-05, "loss": 1.2388, "step": 1635 }, { "epoch": 0.41, "learning_rate": 1.991766439598424e-05, "loss": 1.223, "step": 1640 }, { "epoch": 0.41, "learning_rate": 1.991578943110368e-05, "loss": 1.2118, "step": 1645 }, { "epoch": 0.41, "learning_rate": 1.991389344758433e-05, "loss": 1.1814, "step": 1650 }, { "epoch": 0.41, "learning_rate": 1.9911976449445127e-05, "loss": 1.2561, "step": 1655 }, { "epoch": 0.42, "learning_rate": 1.9910038440749574e-05, "loss": 1.241, "step": 1660 }, { "epoch": 0.42, "learning_rate": 1.9908079425605683e-05, "loss": 1.2132, "step": 1665 }, { "epoch": 0.42, "learning_rate": 1.9906099408166014e-05, "loss": 1.2527, "step": 1670 }, { "epoch": 0.42, "learning_rate": 1.9904098392627628e-05, "loss": 1.2471, "step": 1675 }, { "epoch": 0.42, "learning_rate": 1.9902076383232117e-05, "loss": 1.2288, "step": 1680 }, { "epoch": 0.42, "learning_rate": 1.9900033384265556e-05, "loss": 1.2191, "step": 1685 }, { "epoch": 0.42, "learning_rate": 1.989796940005852e-05, "loss": 1.2297, "step": 1690 }, { "epoch": 0.42, "learning_rate": 1.989588443498607e-05, "loss": 1.2367, "step": 1695 }, { "epoch": 0.43, "learning_rate": 1.9893778493467726e-05, "loss": 1.2076, "step": 1700 }, { "epoch": 0.43, "learning_rate": 1.989165157996749e-05, "loss": 1.2709, "step": 1705 }, { "epoch": 0.43, "learning_rate": 1.9889503698993812e-05, "loss": 1.18, "step": 1710 }, { "epoch": 0.43, "learning_rate": 1.9887334855099576e-05, "loss": 1.2411, "step": 1715 }, { "epoch": 0.43, "learning_rate": 1.9885145052882124e-05, "loss": 1.2113, "step": 1720 }, { "epoch": 0.43, "learning_rate": 1.9882934296983197e-05, "loss": 1.2129, "step": 1725 }, { "epoch": 0.43, "learning_rate": 1.9880702592088976e-05, "loss": 1.2166, "step": 1730 }, { "epoch": 0.43, "learning_rate": 1.9878449942930033e-05, "loss": 1.2357, "step": 1735 }, { "epoch": 0.44, "learning_rate": 1.987617635428134e-05, "loss": 1.2102, "step": 1740 }, { "epoch": 0.44, "learning_rate": 1.9873881830962256e-05, "loss": 1.209, "step": 1745 }, { "epoch": 0.44, "learning_rate": 1.9871566377836514e-05, "loss": 1.2481, "step": 1750 }, { "epoch": 0.44, "learning_rate": 1.986922999981221e-05, "loss": 1.271, "step": 1755 }, { "epoch": 0.44, "learning_rate": 1.9866872701841805e-05, "loss": 1.2477, "step": 1760 }, { "epoch": 0.44, "learning_rate": 1.986449448892209e-05, "loss": 1.2176, "step": 1765 }, { "epoch": 0.44, "learning_rate": 1.98620953660942e-05, "loss": 1.2003, "step": 1770 }, { "epoch": 0.44, "learning_rate": 1.985967533844359e-05, "loss": 1.2234, "step": 1775 }, { "epoch": 0.45, "learning_rate": 1.9857234411100017e-05, "loss": 1.2328, "step": 1780 }, { "epoch": 0.45, "learning_rate": 1.9854772589237564e-05, "loss": 1.228, "step": 1785 }, { "epoch": 0.45, "learning_rate": 1.985228987807458e-05, "loss": 1.1777, "step": 1790 }, { "epoch": 0.45, "learning_rate": 1.9849786282873706e-05, "loss": 1.2283, "step": 1795 }, { "epoch": 0.45, "learning_rate": 1.9847261808941847e-05, "loss": 1.2399, "step": 1800 }, { "epoch": 0.45, "learning_rate": 1.9844716461630168e-05, "loss": 1.2385, "step": 1805 }, { "epoch": 0.45, "learning_rate": 1.9842150246334072e-05, "loss": 1.2663, "step": 1810 }, { "epoch": 0.45, "learning_rate": 1.983956316849321e-05, "loss": 1.231, "step": 1815 }, { "epoch": 0.46, "learning_rate": 1.9836955233591443e-05, "loss": 1.2313, "step": 1820 }, { "epoch": 0.46, "learning_rate": 1.9834326447156847e-05, "loss": 1.231, "step": 1825 }, { "epoch": 0.46, "learning_rate": 1.9831676814761696e-05, "loss": 1.2181, "step": 1830 }, { "epoch": 0.46, "learning_rate": 1.9829006342022457e-05, "loss": 1.2396, "step": 1835 }, { "epoch": 0.46, "learning_rate": 1.982631503459977e-05, "loss": 1.2336, "step": 1840 }, { "epoch": 0.46, "learning_rate": 1.9823602898198433e-05, "loss": 1.2473, "step": 1845 }, { "epoch": 0.46, "learning_rate": 1.98208699385674e-05, "loss": 1.2243, "step": 1850 }, { "epoch": 0.46, "learning_rate": 1.9818116161499767e-05, "loss": 1.2343, "step": 1855 }, { "epoch": 0.47, "learning_rate": 1.981534157283275e-05, "loss": 1.2409, "step": 1860 }, { "epoch": 0.47, "learning_rate": 1.981254617844769e-05, "loss": 1.2374, "step": 1865 }, { "epoch": 0.47, "learning_rate": 1.9809729984270022e-05, "loss": 1.2239, "step": 1870 }, { "epoch": 0.47, "learning_rate": 1.9806892996269266e-05, "loss": 1.211, "step": 1875 }, { "epoch": 0.47, "learning_rate": 1.980403522045903e-05, "loss": 1.2613, "step": 1880 }, { "epoch": 0.47, "learning_rate": 1.980115666289699e-05, "loss": 1.2404, "step": 1885 }, { "epoch": 0.47, "learning_rate": 1.979825732968485e-05, "loss": 1.197, "step": 1890 }, { "epoch": 0.47, "learning_rate": 1.9795337226968375e-05, "loss": 1.2453, "step": 1895 }, { "epoch": 0.48, "learning_rate": 1.979239636093735e-05, "loss": 1.2299, "step": 1900 }, { "epoch": 0.48, "learning_rate": 1.9789434737825566e-05, "loss": 1.284, "step": 1905 }, { "epoch": 0.48, "learning_rate": 1.9786452363910822e-05, "loss": 1.1972, "step": 1910 }, { "epoch": 0.48, "learning_rate": 1.9783449245514894e-05, "loss": 1.2398, "step": 1915 }, { "epoch": 0.48, "learning_rate": 1.9780425389003533e-05, "loss": 1.2369, "step": 1920 }, { "epoch": 0.48, "learning_rate": 1.9777380800786456e-05, "loss": 1.2192, "step": 1925 }, { "epoch": 0.48, "learning_rate": 1.977431548731732e-05, "loss": 1.2018, "step": 1930 }, { "epoch": 0.48, "learning_rate": 1.9771229455093703e-05, "loss": 1.2104, "step": 1935 }, { "epoch": 0.49, "learning_rate": 1.976812271065712e-05, "loss": 1.1689, "step": 1940 }, { "epoch": 0.49, "learning_rate": 1.976499526059298e-05, "loss": 1.2483, "step": 1945 }, { "epoch": 0.49, "learning_rate": 1.9761847111530583e-05, "loss": 1.188, "step": 1950 }, { "epoch": 0.49, "learning_rate": 1.97586782701431e-05, "loss": 1.252, "step": 1955 }, { "epoch": 0.49, "learning_rate": 1.9755488743147576e-05, "loss": 1.2482, "step": 1960 }, { "epoch": 0.49, "learning_rate": 1.9752278537304895e-05, "loss": 1.2301, "step": 1965 }, { "epoch": 0.49, "learning_rate": 1.974904765941977e-05, "loss": 1.2089, "step": 1970 }, { "epoch": 0.49, "learning_rate": 1.9745796116340747e-05, "loss": 1.2412, "step": 1975 }, { "epoch": 0.5, "learning_rate": 1.9742523914960157e-05, "loss": 1.1948, "step": 1980 }, { "epoch": 0.5, "learning_rate": 1.973923106221414e-05, "loss": 1.2514, "step": 1985 }, { "epoch": 0.5, "learning_rate": 1.97359175650826e-05, "loss": 1.2316, "step": 1990 }, { "epoch": 0.5, "learning_rate": 1.9732583430589204e-05, "loss": 1.2518, "step": 1995 }, { "epoch": 0.5, "learning_rate": 1.9729228665801362e-05, "loss": 1.2086, "step": 2000 }, { "epoch": 0.5, "learning_rate": 1.9725853277830217e-05, "loss": 1.2396, "step": 2005 }, { "epoch": 0.5, "learning_rate": 1.9722457273830633e-05, "loss": 1.2779, "step": 2010 }, { "epoch": 0.5, "learning_rate": 1.9719040661001156e-05, "loss": 1.223, "step": 2015 }, { "epoch": 0.51, "learning_rate": 1.9715603446584037e-05, "loss": 1.2345, "step": 2020 }, { "epoch": 0.51, "learning_rate": 1.9712145637865185e-05, "loss": 1.2228, "step": 2025 }, { "epoch": 0.51, "learning_rate": 1.9708667242174163e-05, "loss": 1.2139, "step": 2030 }, { "epoch": 0.51, "learning_rate": 1.9705168266884183e-05, "loss": 1.2298, "step": 2035 }, { "epoch": 0.51, "learning_rate": 1.9701648719412064e-05, "loss": 1.223, "step": 2040 }, { "epoch": 0.51, "learning_rate": 1.9698108607218244e-05, "loss": 1.2068, "step": 2045 }, { "epoch": 0.51, "learning_rate": 1.9694547937806752e-05, "loss": 1.207, "step": 2050 }, { "epoch": 0.51, "learning_rate": 1.9690966718725188e-05, "loss": 1.2506, "step": 2055 }, { "epoch": 0.52, "learning_rate": 1.9687364957564705e-05, "loss": 1.2071, "step": 2060 }, { "epoch": 0.52, "learning_rate": 1.9683742661960017e-05, "loss": 1.2172, "step": 2065 }, { "epoch": 0.52, "learning_rate": 1.968009983958935e-05, "loss": 1.238, "step": 2070 }, { "epoch": 0.52, "learning_rate": 1.9676436498174448e-05, "loss": 1.2553, "step": 2075 }, { "epoch": 0.52, "learning_rate": 1.967275264548054e-05, "loss": 1.2319, "step": 2080 }, { "epoch": 0.52, "learning_rate": 1.9669048289316353e-05, "loss": 1.2454, "step": 2085 }, { "epoch": 0.52, "learning_rate": 1.9665323437534058e-05, "loss": 1.2822, "step": 2090 }, { "epoch": 0.52, "learning_rate": 1.9661578098029273e-05, "loss": 1.2203, "step": 2095 }, { "epoch": 0.53, "learning_rate": 1.965781227874105e-05, "loss": 1.2587, "step": 2100 }, { "epoch": 0.53, "learning_rate": 1.9654025987651845e-05, "loss": 1.2404, "step": 2105 }, { "epoch": 0.53, "learning_rate": 1.965021923278752e-05, "loss": 1.2558, "step": 2110 }, { "epoch": 0.53, "learning_rate": 1.96463920222173e-05, "loss": 1.1959, "step": 2115 }, { "epoch": 0.53, "learning_rate": 1.9642544364053782e-05, "loss": 1.2459, "step": 2120 }, { "epoch": 0.53, "learning_rate": 1.9638676266452896e-05, "loss": 1.2445, "step": 2125 }, { "epoch": 0.53, "learning_rate": 1.963478773761391e-05, "loss": 1.2029, "step": 2130 }, { "epoch": 0.53, "learning_rate": 1.963087878577939e-05, "loss": 1.2436, "step": 2135 }, { "epoch": 0.54, "learning_rate": 1.9626949419235194e-05, "loss": 1.1873, "step": 2140 }, { "epoch": 0.54, "learning_rate": 1.9622999646310458e-05, "loss": 1.2446, "step": 2145 }, { "epoch": 0.54, "learning_rate": 1.9619029475377573e-05, "loss": 1.2412, "step": 2150 }, { "epoch": 0.54, "learning_rate": 1.9615038914852163e-05, "loss": 1.2353, "step": 2155 }, { "epoch": 0.54, "learning_rate": 1.961102797319308e-05, "loss": 1.2612, "step": 2160 }, { "epoch": 0.54, "learning_rate": 1.960699665890237e-05, "loss": 1.2478, "step": 2165 }, { "epoch": 0.54, "learning_rate": 1.9602944980525267e-05, "loss": 1.2141, "step": 2170 }, { "epoch": 0.54, "learning_rate": 1.959887294665017e-05, "loss": 1.2029, "step": 2175 }, { "epoch": 0.55, "learning_rate": 1.959478056590863e-05, "loss": 1.2375, "step": 2180 }, { "epoch": 0.55, "learning_rate": 1.9590667846975324e-05, "loss": 1.2282, "step": 2185 }, { "epoch": 0.55, "learning_rate": 1.9586534798568032e-05, "loss": 1.2168, "step": 2190 }, { "epoch": 0.55, "learning_rate": 1.9582381429447648e-05, "loss": 1.2338, "step": 2195 }, { "epoch": 0.55, "learning_rate": 1.957820774841812e-05, "loss": 1.2464, "step": 2200 }, { "epoch": 0.55, "learning_rate": 1.957401376432646e-05, "loss": 1.2439, "step": 2205 }, { "epoch": 0.55, "learning_rate": 1.9569799486062712e-05, "loss": 1.2075, "step": 2210 }, { "epoch": 0.55, "learning_rate": 1.9565564922559947e-05, "loss": 1.2106, "step": 2215 }, { "epoch": 0.56, "learning_rate": 1.9561310082794224e-05, "loss": 1.213, "step": 2220 }, { "epoch": 0.56, "learning_rate": 1.955703497578459e-05, "loss": 1.2596, "step": 2225 }, { "epoch": 0.56, "learning_rate": 1.9552739610593048e-05, "loss": 1.2297, "step": 2230 }, { "epoch": 0.56, "learning_rate": 1.9548423996324544e-05, "loss": 1.2218, "step": 2235 }, { "epoch": 0.56, "learning_rate": 1.9544088142126947e-05, "loss": 1.1932, "step": 2240 }, { "epoch": 0.56, "learning_rate": 1.9539732057191027e-05, "loss": 1.2259, "step": 2245 }, { "epoch": 0.56, "learning_rate": 1.9535355750750444e-05, "loss": 1.2368, "step": 2250 }, { "epoch": 0.56, "learning_rate": 1.9530959232081713e-05, "loss": 1.2543, "step": 2255 }, { "epoch": 0.57, "learning_rate": 1.952654251050419e-05, "loss": 1.2087, "step": 2260 }, { "epoch": 0.57, "learning_rate": 1.9522105595380073e-05, "loss": 1.1934, "step": 2265 }, { "epoch": 0.57, "learning_rate": 1.951764849611435e-05, "loss": 1.2332, "step": 2270 }, { "epoch": 0.57, "learning_rate": 1.9513171222154796e-05, "loss": 1.2608, "step": 2275 }, { "epoch": 0.57, "learning_rate": 1.950867378299195e-05, "loss": 1.2398, "step": 2280 }, { "epoch": 0.57, "learning_rate": 1.9504156188159098e-05, "loss": 1.2344, "step": 2285 }, { "epoch": 0.57, "learning_rate": 1.9499618447232252e-05, "loss": 1.2497, "step": 2290 }, { "epoch": 0.57, "learning_rate": 1.9495060569830126e-05, "loss": 1.2288, "step": 2295 }, { "epoch": 0.58, "learning_rate": 1.9490482565614118e-05, "loss": 1.2423, "step": 2300 }, { "epoch": 0.58, "learning_rate": 1.9485884444288282e-05, "loss": 1.2051, "step": 2305 }, { "epoch": 0.58, "learning_rate": 1.948126621559932e-05, "loss": 1.2416, "step": 2310 }, { "epoch": 0.58, "learning_rate": 1.9476627889336564e-05, "loss": 1.1961, "step": 2315 }, { "epoch": 0.58, "learning_rate": 1.947196947533194e-05, "loss": 1.2133, "step": 2320 }, { "epoch": 0.58, "learning_rate": 1.9467290983459945e-05, "loss": 1.22, "step": 2325 }, { "epoch": 0.58, "learning_rate": 1.946259242363765e-05, "loss": 1.2378, "step": 2330 }, { "epoch": 0.58, "learning_rate": 1.9457873805824664e-05, "loss": 1.2228, "step": 2335 }, { "epoch": 0.59, "learning_rate": 1.9453135140023095e-05, "loss": 1.2394, "step": 2340 }, { "epoch": 0.59, "learning_rate": 1.944837643627757e-05, "loss": 1.2511, "step": 2345 }, { "epoch": 0.59, "learning_rate": 1.9443597704675176e-05, "loss": 1.2271, "step": 2350 }, { "epoch": 0.59, "learning_rate": 1.9438798955345458e-05, "loss": 1.2294, "step": 2355 }, { "epoch": 0.59, "learning_rate": 1.9433980198460395e-05, "loss": 1.2267, "step": 2360 }, { "epoch": 0.59, "learning_rate": 1.9429141444234373e-05, "loss": 1.2628, "step": 2365 }, { "epoch": 0.59, "learning_rate": 1.9424282702924163e-05, "loss": 1.2055, "step": 2370 }, { "epoch": 0.59, "learning_rate": 1.9419403984828915e-05, "loss": 1.2282, "step": 2375 }, { "epoch": 0.6, "learning_rate": 1.9414505300290113e-05, "loss": 1.2312, "step": 2380 }, { "epoch": 0.6, "learning_rate": 1.940958665969157e-05, "loss": 1.2224, "step": 2385 }, { "epoch": 0.6, "learning_rate": 1.940464807345939e-05, "loss": 1.2158, "step": 2390 }, { "epoch": 0.6, "learning_rate": 1.9399689552061977e-05, "loss": 1.2207, "step": 2395 }, { "epoch": 0.6, "learning_rate": 1.9394711106009967e-05, "loss": 1.2208, "step": 2400 }, { "epoch": 0.6, "learning_rate": 1.9389712745856245e-05, "loss": 1.2302, "step": 2405 }, { "epoch": 0.6, "learning_rate": 1.9384694482195912e-05, "loss": 1.1974, "step": 2410 }, { "epoch": 0.6, "learning_rate": 1.9379656325666248e-05, "loss": 1.2391, "step": 2415 }, { "epoch": 0.61, "learning_rate": 1.93745982869467e-05, "loss": 1.2614, "step": 2420 }, { "epoch": 0.61, "learning_rate": 1.9369520376758872e-05, "loss": 1.242, "step": 2425 }, { "epoch": 0.61, "learning_rate": 1.9364422605866476e-05, "loss": 1.2564, "step": 2430 }, { "epoch": 0.61, "learning_rate": 1.935930498507533e-05, "loss": 1.239, "step": 2435 }, { "epoch": 0.61, "learning_rate": 1.935416752523333e-05, "loss": 1.22, "step": 2440 }, { "epoch": 0.61, "learning_rate": 1.9349010237230423e-05, "loss": 1.2761, "step": 2445 }, { "epoch": 0.61, "learning_rate": 1.934383313199858e-05, "loss": 1.2167, "step": 2450 }, { "epoch": 0.61, "learning_rate": 1.9338636220511784e-05, "loss": 1.2333, "step": 2455 }, { "epoch": 0.62, "learning_rate": 1.933341951378601e-05, "loss": 1.2117, "step": 2460 }, { "epoch": 0.62, "learning_rate": 1.9328183022879172e-05, "loss": 1.2154, "step": 2465 }, { "epoch": 0.62, "learning_rate": 1.9322926758891145e-05, "loss": 1.1964, "step": 2470 }, { "epoch": 0.62, "learning_rate": 1.93176507329637e-05, "loss": 1.1968, "step": 2475 }, { "epoch": 0.62, "learning_rate": 1.9312354956280505e-05, "loss": 1.2335, "step": 2480 }, { "epoch": 0.62, "learning_rate": 1.930703944006709e-05, "loss": 1.219, "step": 2485 }, { "epoch": 0.62, "learning_rate": 1.930170419559084e-05, "loss": 1.2363, "step": 2490 }, { "epoch": 0.62, "learning_rate": 1.9296349234160934e-05, "loss": 1.2142, "step": 2495 }, { "epoch": 0.63, "learning_rate": 1.929097456712837e-05, "loss": 1.253, "step": 2500 }, { "epoch": 0.63, "learning_rate": 1.92855802058859e-05, "loss": 1.2343, "step": 2505 }, { "epoch": 0.63, "learning_rate": 1.9280166161868026e-05, "loss": 1.2021, "step": 2510 }, { "epoch": 0.63, "learning_rate": 1.9274732446550977e-05, "loss": 1.2367, "step": 2515 }, { "epoch": 0.63, "learning_rate": 1.926927907145268e-05, "loss": 1.2306, "step": 2520 }, { "epoch": 0.63, "learning_rate": 1.926380604813272e-05, "loss": 1.2297, "step": 2525 }, { "epoch": 0.63, "learning_rate": 1.9258313388192354e-05, "loss": 1.2179, "step": 2530 }, { "epoch": 0.63, "learning_rate": 1.9252801103274444e-05, "loss": 1.1992, "step": 2535 }, { "epoch": 0.64, "learning_rate": 1.9247269205063458e-05, "loss": 1.2267, "step": 2540 }, { "epoch": 0.64, "learning_rate": 1.9241717705285442e-05, "loss": 1.1992, "step": 2545 }, { "epoch": 0.64, "learning_rate": 1.9236146615707985e-05, "loss": 1.2355, "step": 2550 }, { "epoch": 0.64, "learning_rate": 1.9230555948140206e-05, "loss": 1.2317, "step": 2555 }, { "epoch": 0.64, "learning_rate": 1.922494571443272e-05, "loss": 1.2213, "step": 2560 }, { "epoch": 0.64, "learning_rate": 1.9219315926477623e-05, "loss": 1.2224, "step": 2565 }, { "epoch": 0.64, "learning_rate": 1.9213666596208452e-05, "loss": 1.2453, "step": 2570 }, { "epoch": 0.64, "learning_rate": 1.920799773560017e-05, "loss": 1.2307, "step": 2575 }, { "epoch": 0.65, "learning_rate": 1.9202309356669147e-05, "loss": 1.237, "step": 2580 }, { "epoch": 0.65, "learning_rate": 1.9196601471473122e-05, "loss": 1.2232, "step": 2585 }, { "epoch": 0.65, "learning_rate": 1.919087409211117e-05, "loss": 1.25, "step": 2590 }, { "epoch": 0.65, "learning_rate": 1.9185127230723705e-05, "loss": 1.2474, "step": 2595 }, { "epoch": 0.65, "learning_rate": 1.9179360899492424e-05, "loss": 1.2273, "step": 2600 }, { "epoch": 0.65, "learning_rate": 1.9173575110640308e-05, "loss": 1.2288, "step": 2605 }, { "epoch": 0.65, "learning_rate": 1.916776987643157e-05, "loss": 1.2216, "step": 2610 }, { "epoch": 0.65, "learning_rate": 1.9161945209171652e-05, "loss": 1.2462, "step": 2615 }, { "epoch": 0.66, "learning_rate": 1.9156101121207176e-05, "loss": 1.2296, "step": 2620 }, { "epoch": 0.66, "learning_rate": 1.9150237624925946e-05, "loss": 1.2043, "step": 2625 }, { "epoch": 0.66, "learning_rate": 1.914435473275689e-05, "loss": 1.2386, "step": 2630 }, { "epoch": 0.66, "learning_rate": 1.9138452457170063e-05, "loss": 1.2452, "step": 2635 }, { "epoch": 0.66, "learning_rate": 1.91325308106766e-05, "loss": 1.2039, "step": 2640 }, { "epoch": 0.66, "learning_rate": 1.91265898058287e-05, "loss": 1.2232, "step": 2645 }, { "epoch": 0.66, "learning_rate": 1.9120629455219593e-05, "loss": 1.2459, "step": 2650 }, { "epoch": 0.66, "learning_rate": 1.911464977148352e-05, "loss": 1.2158, "step": 2655 }, { "epoch": 0.67, "learning_rate": 1.9108650767295697e-05, "loss": 1.2249, "step": 2660 }, { "epoch": 0.67, "learning_rate": 1.9102632455372302e-05, "loss": 1.1819, "step": 2665 }, { "epoch": 0.67, "learning_rate": 1.9096594848470436e-05, "loss": 1.1932, "step": 2670 }, { "epoch": 0.67, "learning_rate": 1.9090537959388098e-05, "loss": 1.2307, "step": 2675 }, { "epoch": 0.67, "learning_rate": 1.9084461800964164e-05, "loss": 1.2405, "step": 2680 }, { "epoch": 0.67, "learning_rate": 1.9078366386078343e-05, "loss": 1.2681, "step": 2685 }, { "epoch": 0.67, "learning_rate": 1.9072251727651185e-05, "loss": 1.2417, "step": 2690 }, { "epoch": 0.67, "learning_rate": 1.906611783864401e-05, "loss": 1.1916, "step": 2695 }, { "epoch": 0.68, "learning_rate": 1.905996473205891e-05, "loss": 1.2146, "step": 2700 }, { "epoch": 0.68, "learning_rate": 1.9053792420938714e-05, "loss": 1.2401, "step": 2705 }, { "epoch": 0.68, "learning_rate": 1.9047600918366952e-05, "loss": 1.215, "step": 2710 }, { "epoch": 0.68, "learning_rate": 1.9041390237467845e-05, "loss": 1.2557, "step": 2715 }, { "epoch": 0.68, "learning_rate": 1.9035160391406262e-05, "loss": 1.2505, "step": 2720 }, { "epoch": 0.68, "learning_rate": 1.902891139338769e-05, "loss": 1.218, "step": 2725 }, { "epoch": 0.68, "learning_rate": 1.902264325665822e-05, "loss": 1.2362, "step": 2730 }, { "epoch": 0.68, "learning_rate": 1.9016355994504514e-05, "loss": 1.2511, "step": 2735 }, { "epoch": 0.69, "learning_rate": 1.9010049620253767e-05, "loss": 1.2317, "step": 2740 }, { "epoch": 0.69, "learning_rate": 1.9003724147273688e-05, "loss": 1.2383, "step": 2745 }, { "epoch": 0.69, "learning_rate": 1.8997379588972472e-05, "loss": 1.2282, "step": 2750 }, { "epoch": 0.69, "learning_rate": 1.899101595879877e-05, "loss": 1.2081, "step": 2755 }, { "epoch": 0.69, "learning_rate": 1.8984633270241662e-05, "loss": 1.2424, "step": 2760 }, { "epoch": 0.69, "learning_rate": 1.8978231536830616e-05, "loss": 1.2059, "step": 2765 }, { "epoch": 0.69, "learning_rate": 1.897181077213548e-05, "loss": 1.211, "step": 2770 }, { "epoch": 0.69, "learning_rate": 1.8965370989766443e-05, "loss": 1.2084, "step": 2775 }, { "epoch": 0.7, "learning_rate": 1.8958912203373995e-05, "loss": 1.2641, "step": 2780 }, { "epoch": 0.7, "learning_rate": 1.895243442664892e-05, "loss": 1.2338, "step": 2785 }, { "epoch": 0.7, "learning_rate": 1.894593767332226e-05, "loss": 1.237, "step": 2790 }, { "epoch": 0.7, "learning_rate": 1.8939421957165263e-05, "loss": 1.2618, "step": 2795 }, { "epoch": 0.7, "learning_rate": 1.893288729198939e-05, "loss": 1.1891, "step": 2800 }, { "epoch": 0.7, "learning_rate": 1.8926333691646267e-05, "loss": 1.1951, "step": 2805 }, { "epoch": 0.7, "learning_rate": 1.8919761170027646e-05, "loss": 1.2211, "step": 2810 }, { "epoch": 0.7, "learning_rate": 1.8913169741065394e-05, "loss": 1.2308, "step": 2815 }, { "epoch": 0.71, "learning_rate": 1.8906559418731463e-05, "loss": 1.2544, "step": 2820 }, { "epoch": 0.71, "learning_rate": 1.889993021703784e-05, "loss": 1.1971, "step": 2825 }, { "epoch": 0.71, "learning_rate": 1.889328215003654e-05, "loss": 1.2254, "step": 2830 }, { "epoch": 0.71, "learning_rate": 1.8886615231819566e-05, "loss": 1.2344, "step": 2835 }, { "epoch": 0.71, "learning_rate": 1.8879929476518874e-05, "loss": 1.2376, "step": 2840 }, { "epoch": 0.71, "learning_rate": 1.887322489830636e-05, "loss": 1.234, "step": 2845 }, { "epoch": 0.71, "learning_rate": 1.8866501511393807e-05, "loss": 1.2289, "step": 2850 }, { "epoch": 0.71, "learning_rate": 1.8859759330032872e-05, "loss": 1.2122, "step": 2855 }, { "epoch": 0.72, "learning_rate": 1.8852998368515062e-05, "loss": 1.256, "step": 2860 }, { "epoch": 0.72, "learning_rate": 1.8846218641171674e-05, "loss": 1.2181, "step": 2865 }, { "epoch": 0.72, "learning_rate": 1.8839420162373796e-05, "loss": 1.2542, "step": 2870 }, { "epoch": 0.72, "learning_rate": 1.8832602946532256e-05, "loss": 1.2109, "step": 2875 }, { "epoch": 0.72, "learning_rate": 1.8825767008097603e-05, "loss": 1.2321, "step": 2880 }, { "epoch": 0.72, "learning_rate": 1.8818912361560072e-05, "loss": 1.2449, "step": 2885 }, { "epoch": 0.72, "learning_rate": 1.881203902144956e-05, "loss": 1.1804, "step": 2890 }, { "epoch": 0.72, "learning_rate": 1.8805147002335574e-05, "loss": 1.2743, "step": 2895 }, { "epoch": 0.73, "learning_rate": 1.879823631882723e-05, "loss": 1.2579, "step": 2900 }, { "epoch": 0.73, "learning_rate": 1.8791306985573203e-05, "loss": 1.231, "step": 2905 }, { "epoch": 0.73, "learning_rate": 1.878435901726169e-05, "loss": 1.2431, "step": 2910 }, { "epoch": 0.73, "learning_rate": 1.8777392428620405e-05, "loss": 1.2122, "step": 2915 }, { "epoch": 0.73, "learning_rate": 1.8770407234416522e-05, "loss": 1.2906, "step": 2920 }, { "epoch": 0.73, "learning_rate": 1.8763403449456653e-05, "loss": 1.2345, "step": 2925 }, { "epoch": 0.73, "learning_rate": 1.8756381088586826e-05, "loss": 1.2166, "step": 2930 }, { "epoch": 0.73, "learning_rate": 1.8749340166692435e-05, "loss": 1.2425, "step": 2935 }, { "epoch": 0.74, "learning_rate": 1.874228069869822e-05, "loss": 1.2132, "step": 2940 }, { "epoch": 0.74, "learning_rate": 1.8735202699568237e-05, "loss": 1.203, "step": 2945 }, { "epoch": 0.74, "learning_rate": 1.872810618430582e-05, "loss": 1.2579, "step": 2950 }, { "epoch": 0.74, "learning_rate": 1.8720991167953553e-05, "loss": 1.261, "step": 2955 }, { "epoch": 0.74, "learning_rate": 1.8713857665593235e-05, "loss": 1.2144, "step": 2960 }, { "epoch": 0.74, "learning_rate": 1.8706705692345854e-05, "loss": 1.2518, "step": 2965 }, { "epoch": 0.74, "learning_rate": 1.8699535263371548e-05, "loss": 1.1973, "step": 2970 }, { "epoch": 0.74, "learning_rate": 1.8692346393869575e-05, "loss": 1.2221, "step": 2975 }, { "epoch": 0.75, "learning_rate": 1.8685139099078286e-05, "loss": 1.1996, "step": 2980 }, { "epoch": 0.75, "learning_rate": 1.867791339427508e-05, "loss": 1.2191, "step": 2985 }, { "epoch": 0.75, "learning_rate": 1.867066929477639e-05, "loss": 1.2154, "step": 2990 }, { "epoch": 0.75, "learning_rate": 1.8663406815937634e-05, "loss": 1.241, "step": 2995 }, { "epoch": 0.75, "learning_rate": 1.8656125973153193e-05, "loss": 1.2254, "step": 3000 }, { "epoch": 0.75, "learning_rate": 1.8648826781856372e-05, "loss": 1.2285, "step": 3005 }, { "epoch": 0.75, "learning_rate": 1.8641509257519367e-05, "loss": 1.2554, "step": 3010 }, { "epoch": 0.75, "learning_rate": 1.863417341565324e-05, "loss": 1.2438, "step": 3015 }, { "epoch": 0.76, "learning_rate": 1.862681927180788e-05, "loss": 1.2142, "step": 3020 }, { "epoch": 0.76, "learning_rate": 1.8619446841571966e-05, "loss": 1.2233, "step": 3025 }, { "epoch": 0.76, "learning_rate": 1.861205614057294e-05, "loss": 1.1655, "step": 3030 }, { "epoch": 0.76, "learning_rate": 1.8604647184476986e-05, "loss": 1.2342, "step": 3035 }, { "epoch": 0.76, "learning_rate": 1.859721998898896e-05, "loss": 1.2451, "step": 3040 }, { "epoch": 0.76, "learning_rate": 1.8589774569852405e-05, "loss": 1.2025, "step": 3045 }, { "epoch": 0.76, "learning_rate": 1.858231094284947e-05, "loss": 1.2153, "step": 3050 }, { "epoch": 0.76, "learning_rate": 1.8574829123800916e-05, "loss": 1.2244, "step": 3055 }, { "epoch": 0.77, "learning_rate": 1.856732912856606e-05, "loss": 1.2093, "step": 3060 }, { "epoch": 0.77, "learning_rate": 1.8559810973042748e-05, "loss": 1.2439, "step": 3065 }, { "epoch": 0.77, "learning_rate": 1.855227467316732e-05, "loss": 1.24, "step": 3070 }, { "epoch": 0.77, "learning_rate": 1.854472024491458e-05, "loss": 1.2732, "step": 3075 }, { "epoch": 0.77, "learning_rate": 1.853714770429775e-05, "loss": 1.2401, "step": 3080 }, { "epoch": 0.77, "learning_rate": 1.8529557067368452e-05, "loss": 1.2335, "step": 3085 }, { "epoch": 0.77, "learning_rate": 1.8521948350216673e-05, "loss": 1.1939, "step": 3090 }, { "epoch": 0.77, "learning_rate": 1.8514321568970714e-05, "loss": 1.2455, "step": 3095 }, { "epoch": 0.78, "learning_rate": 1.8506676739797168e-05, "loss": 1.2048, "step": 3100 }, { "epoch": 0.78, "learning_rate": 1.849901387890089e-05, "loss": 1.2241, "step": 3105 }, { "epoch": 0.78, "learning_rate": 1.8491333002524955e-05, "loss": 1.2531, "step": 3110 }, { "epoch": 0.78, "learning_rate": 1.848363412695063e-05, "loss": 1.2372, "step": 3115 }, { "epoch": 0.78, "learning_rate": 1.8475917268497315e-05, "loss": 1.2394, "step": 3120 }, { "epoch": 0.78, "learning_rate": 1.8468182443522556e-05, "loss": 1.1985, "step": 3125 }, { "epoch": 0.78, "learning_rate": 1.8460429668421972e-05, "loss": 1.2258, "step": 3130 }, { "epoch": 0.78, "learning_rate": 1.8452658959629225e-05, "loss": 1.2346, "step": 3135 }, { "epoch": 0.79, "learning_rate": 1.8444870333615993e-05, "loss": 1.2279, "step": 3140 }, { "epoch": 0.79, "learning_rate": 1.8437063806891946e-05, "loss": 1.227, "step": 3145 }, { "epoch": 0.79, "learning_rate": 1.842923939600468e-05, "loss": 1.2126, "step": 3150 }, { "epoch": 0.79, "learning_rate": 1.842139711753971e-05, "loss": 1.2266, "step": 3155 }, { "epoch": 0.79, "learning_rate": 1.8413536988120434e-05, "loss": 1.2316, "step": 3160 }, { "epoch": 0.79, "learning_rate": 1.8405659024408064e-05, "loss": 1.1792, "step": 3165 }, { "epoch": 0.79, "learning_rate": 1.839776324310164e-05, "loss": 1.2202, "step": 3170 }, { "epoch": 0.79, "learning_rate": 1.8389849660937968e-05, "loss": 1.1974, "step": 3175 }, { "epoch": 0.8, "learning_rate": 1.838191829469156e-05, "loss": 1.1633, "step": 3180 }, { "epoch": 0.8, "learning_rate": 1.8373969161174665e-05, "loss": 1.1885, "step": 3185 }, { "epoch": 0.8, "learning_rate": 1.8366002277237162e-05, "loss": 1.2157, "step": 3190 }, { "epoch": 0.8, "learning_rate": 1.8358017659766572e-05, "loss": 1.2212, "step": 3195 }, { "epoch": 0.8, "learning_rate": 1.8350015325688e-05, "loss": 1.2215, "step": 3200 }, { "epoch": 0.8, "learning_rate": 1.8341995291964103e-05, "loss": 1.1971, "step": 3205 }, { "epoch": 0.8, "learning_rate": 1.8333957575595066e-05, "loss": 1.2531, "step": 3210 }, { "epoch": 0.8, "learning_rate": 1.8325902193618547e-05, "loss": 1.2007, "step": 3215 }, { "epoch": 0.81, "learning_rate": 1.831782916310965e-05, "loss": 1.255, "step": 3220 }, { "epoch": 0.81, "learning_rate": 1.8309738501180897e-05, "loss": 1.2902, "step": 3225 }, { "epoch": 0.81, "learning_rate": 1.8301630224982173e-05, "loss": 1.2184, "step": 3230 }, { "epoch": 0.81, "learning_rate": 1.8293504351700712e-05, "loss": 1.2425, "step": 3235 }, { "epoch": 0.81, "learning_rate": 1.8285360898561035e-05, "loss": 1.2283, "step": 3240 }, { "epoch": 0.81, "learning_rate": 1.827719988282494e-05, "loss": 1.2181, "step": 3245 }, { "epoch": 0.81, "learning_rate": 1.826902132179144e-05, "loss": 1.2436, "step": 3250 }, { "epoch": 0.81, "learning_rate": 1.8260825232796758e-05, "loss": 1.239, "step": 3255 }, { "epoch": 0.82, "learning_rate": 1.8252611633214247e-05, "loss": 1.2097, "step": 3260 }, { "epoch": 0.82, "learning_rate": 1.82443805404544e-05, "loss": 1.1906, "step": 3265 }, { "epoch": 0.82, "learning_rate": 1.8236131971964775e-05, "loss": 1.2391, "step": 3270 }, { "epoch": 0.82, "learning_rate": 1.8227865945229978e-05, "loss": 1.2248, "step": 3275 }, { "epoch": 0.82, "learning_rate": 1.821958247777163e-05, "loss": 1.2385, "step": 3280 }, { "epoch": 0.82, "learning_rate": 1.8211281587148305e-05, "loss": 1.2571, "step": 3285 }, { "epoch": 0.82, "learning_rate": 1.8202963290955523e-05, "loss": 1.2158, "step": 3290 }, { "epoch": 0.82, "learning_rate": 1.819462760682569e-05, "loss": 1.2233, "step": 3295 }, { "epoch": 0.83, "learning_rate": 1.818627455242808e-05, "loss": 1.2529, "step": 3300 }, { "epoch": 0.83, "learning_rate": 1.817790414546877e-05, "loss": 1.2594, "step": 3305 }, { "epoch": 0.83, "learning_rate": 1.816951640369064e-05, "loss": 1.2181, "step": 3310 }, { "epoch": 0.83, "learning_rate": 1.81611113448733e-05, "loss": 1.2211, "step": 3315 }, { "epoch": 0.83, "learning_rate": 1.8152688986833073e-05, "loss": 1.1897, "step": 3320 }, { "epoch": 0.83, "learning_rate": 1.8144249347422946e-05, "loss": 1.1894, "step": 3325 }, { "epoch": 0.83, "learning_rate": 1.813579244453255e-05, "loss": 1.2001, "step": 3330 }, { "epoch": 0.83, "learning_rate": 1.8127318296088093e-05, "loss": 1.2317, "step": 3335 }, { "epoch": 0.84, "learning_rate": 1.8118826920052352e-05, "loss": 1.2341, "step": 3340 }, { "epoch": 0.84, "learning_rate": 1.8110318334424617e-05, "loss": 1.2312, "step": 3345 }, { "epoch": 0.84, "learning_rate": 1.8101792557240653e-05, "loss": 1.2295, "step": 3350 }, { "epoch": 0.84, "learning_rate": 1.8093249606572673e-05, "loss": 1.2464, "step": 3355 }, { "epoch": 0.84, "learning_rate": 1.8084689500529288e-05, "loss": 1.2346, "step": 3360 }, { "epoch": 0.84, "learning_rate": 1.8076112257255477e-05, "loss": 1.1914, "step": 3365 }, { "epoch": 0.84, "learning_rate": 1.8067517894932548e-05, "loss": 1.229, "step": 3370 }, { "epoch": 0.84, "learning_rate": 1.8058906431778085e-05, "loss": 1.2008, "step": 3375 }, { "epoch": 0.85, "learning_rate": 1.8050277886045932e-05, "loss": 1.2341, "step": 3380 }, { "epoch": 0.85, "learning_rate": 1.8041632276026138e-05, "loss": 1.257, "step": 3385 }, { "epoch": 0.85, "learning_rate": 1.8032969620044923e-05, "loss": 1.2306, "step": 3390 }, { "epoch": 0.85, "learning_rate": 1.8024289936464644e-05, "loss": 1.2187, "step": 3395 }, { "epoch": 0.85, "learning_rate": 1.8015593243683747e-05, "loss": 1.2279, "step": 3400 }, { "epoch": 0.85, "learning_rate": 1.8006879560136733e-05, "loss": 1.2241, "step": 3405 }, { "epoch": 0.85, "learning_rate": 1.7998148904294124e-05, "loss": 1.1996, "step": 3410 }, { "epoch": 0.85, "learning_rate": 1.7989401294662414e-05, "loss": 1.2356, "step": 3415 }, { "epoch": 0.86, "learning_rate": 1.7980636749784028e-05, "loss": 1.2351, "step": 3420 }, { "epoch": 0.86, "learning_rate": 1.7971855288237302e-05, "loss": 1.2429, "step": 3425 }, { "epoch": 0.86, "learning_rate": 1.7963056928636424e-05, "loss": 1.2373, "step": 3430 }, { "epoch": 0.86, "learning_rate": 1.7954241689631397e-05, "loss": 1.2198, "step": 3435 }, { "epoch": 0.86, "learning_rate": 1.7945409589908013e-05, "loss": 1.2012, "step": 3440 }, { "epoch": 0.86, "learning_rate": 1.7936560648187793e-05, "loss": 1.1995, "step": 3445 }, { "epoch": 0.86, "learning_rate": 1.7927694883227968e-05, "loss": 1.2004, "step": 3450 }, { "epoch": 0.86, "learning_rate": 1.7918812313821422e-05, "loss": 1.2491, "step": 3455 }, { "epoch": 0.87, "learning_rate": 1.7909912958796663e-05, "loss": 1.2045, "step": 3460 }, { "epoch": 0.87, "learning_rate": 1.7900996837017778e-05, "loss": 1.2369, "step": 3465 }, { "epoch": 0.87, "learning_rate": 1.7892063967384403e-05, "loss": 1.1898, "step": 3470 }, { "epoch": 0.87, "learning_rate": 1.788311436883166e-05, "loss": 1.2239, "step": 3475 }, { "epoch": 0.87, "learning_rate": 1.7874148060330142e-05, "loss": 1.2119, "step": 3480 }, { "epoch": 0.87, "learning_rate": 1.786516506088586e-05, "loss": 1.2084, "step": 3485 }, { "epoch": 0.87, "learning_rate": 1.7856165389540208e-05, "loss": 1.2211, "step": 3490 }, { "epoch": 0.87, "learning_rate": 1.784714906536991e-05, "loss": 1.2327, "step": 3495 }, { "epoch": 0.88, "learning_rate": 1.7838116107487e-05, "loss": 1.2076, "step": 3500 }, { "epoch": 0.88, "learning_rate": 1.7829066535038765e-05, "loss": 1.246, "step": 3505 }, { "epoch": 0.88, "learning_rate": 1.782000036720771e-05, "loss": 1.2143, "step": 3510 }, { "epoch": 0.88, "learning_rate": 1.7810917623211524e-05, "loss": 1.227, "step": 3515 }, { "epoch": 0.88, "learning_rate": 1.7801818322303018e-05, "loss": 1.2292, "step": 3520 }, { "epoch": 0.88, "learning_rate": 1.779270248377012e-05, "loss": 1.2026, "step": 3525 }, { "epoch": 0.88, "learning_rate": 1.7783570126935793e-05, "loss": 1.2531, "step": 3530 }, { "epoch": 0.88, "learning_rate": 1.777442127115803e-05, "loss": 1.207, "step": 3535 }, { "epoch": 0.89, "learning_rate": 1.7765255935829784e-05, "loss": 1.223, "step": 3540 }, { "epoch": 0.89, "learning_rate": 1.7756074140378943e-05, "loss": 1.2487, "step": 3545 }, { "epoch": 0.89, "learning_rate": 1.7746875904268294e-05, "loss": 1.2205, "step": 3550 }, { "epoch": 0.89, "learning_rate": 1.773766124699547e-05, "loss": 1.2194, "step": 3555 }, { "epoch": 0.89, "learning_rate": 1.7728430188092902e-05, "loss": 1.2442, "step": 3560 }, { "epoch": 0.89, "learning_rate": 1.7719182747127805e-05, "loss": 1.2298, "step": 3565 }, { "epoch": 0.89, "learning_rate": 1.77099189437021e-05, "loss": 1.2517, "step": 3570 }, { "epoch": 0.89, "learning_rate": 1.770063879745241e-05, "loss": 1.2253, "step": 3575 }, { "epoch": 0.9, "learning_rate": 1.769134232804999e-05, "loss": 1.2334, "step": 3580 }, { "epoch": 0.9, "learning_rate": 1.76820295552007e-05, "loss": 1.2115, "step": 3585 }, { "epoch": 0.9, "learning_rate": 1.7672700498644946e-05, "loss": 1.2548, "step": 3590 }, { "epoch": 0.9, "learning_rate": 1.766335517815767e-05, "loss": 1.2436, "step": 3595 }, { "epoch": 0.9, "learning_rate": 1.7653993613548276e-05, "loss": 1.2099, "step": 3600 }, { "epoch": 0.9, "learning_rate": 1.7644615824660606e-05, "loss": 1.1964, "step": 3605 }, { "epoch": 0.9, "learning_rate": 1.763522183137289e-05, "loss": 1.2008, "step": 3610 }, { "epoch": 0.9, "learning_rate": 1.7625811653597714e-05, "loss": 1.2142, "step": 3615 }, { "epoch": 0.91, "learning_rate": 1.7616385311281957e-05, "loss": 1.216, "step": 3620 }, { "epoch": 0.91, "learning_rate": 1.7606942824406773e-05, "loss": 1.21, "step": 3625 }, { "epoch": 0.91, "learning_rate": 1.7597484212987534e-05, "loss": 1.2378, "step": 3630 }, { "epoch": 0.91, "learning_rate": 1.7588009497073794e-05, "loss": 1.2175, "step": 3635 }, { "epoch": 0.91, "learning_rate": 1.7578518696749242e-05, "loss": 1.205, "step": 3640 }, { "epoch": 0.91, "learning_rate": 1.7569011832131656e-05, "loss": 1.244, "step": 3645 }, { "epoch": 0.91, "learning_rate": 1.755948892337288e-05, "loss": 1.233, "step": 3650 }, { "epoch": 0.91, "learning_rate": 1.754994999065875e-05, "loss": 1.2029, "step": 3655 }, { "epoch": 0.92, "learning_rate": 1.754039505420908e-05, "loss": 1.2124, "step": 3660 }, { "epoch": 0.92, "learning_rate": 1.7530824134277603e-05, "loss": 1.2085, "step": 3665 }, { "epoch": 0.92, "learning_rate": 1.7521237251151932e-05, "loss": 1.224, "step": 3670 }, { "epoch": 0.92, "learning_rate": 1.751163442515352e-05, "loss": 1.2417, "step": 3675 }, { "epoch": 0.92, "learning_rate": 1.7502015676637617e-05, "loss": 1.222, "step": 3680 }, { "epoch": 0.92, "learning_rate": 1.749238102599321e-05, "loss": 1.228, "step": 3685 }, { "epoch": 0.92, "learning_rate": 1.748273049364301e-05, "loss": 1.2294, "step": 3690 }, { "epoch": 0.92, "learning_rate": 1.7473064100043385e-05, "loss": 1.2362, "step": 3695 }, { "epoch": 0.93, "learning_rate": 1.746338186568433e-05, "loss": 1.2208, "step": 3700 }, { "epoch": 0.93, "learning_rate": 1.745368381108941e-05, "loss": 1.2384, "step": 3705 }, { "epoch": 0.93, "learning_rate": 1.744396995681573e-05, "loss": 1.2414, "step": 3710 }, { "epoch": 0.93, "learning_rate": 1.743424032345388e-05, "loss": 1.2051, "step": 3715 }, { "epoch": 0.93, "learning_rate": 1.7424494931627906e-05, "loss": 1.2498, "step": 3720 }, { "epoch": 0.93, "learning_rate": 1.7414733801995248e-05, "loss": 1.2451, "step": 3725 }, { "epoch": 0.93, "learning_rate": 1.7404956955246715e-05, "loss": 1.2211, "step": 3730 }, { "epoch": 0.93, "learning_rate": 1.7395164412106425e-05, "loss": 1.2128, "step": 3735 }, { "epoch": 0.94, "learning_rate": 1.7385356193331768e-05, "loss": 1.2428, "step": 3740 }, { "epoch": 0.94, "learning_rate": 1.7375532319713366e-05, "loss": 1.2245, "step": 3745 }, { "epoch": 0.94, "learning_rate": 1.7365692812075024e-05, "loss": 1.2182, "step": 3750 }, { "epoch": 0.94, "learning_rate": 1.735583769127368e-05, "loss": 1.2326, "step": 3755 }, { "epoch": 0.94, "learning_rate": 1.7345966978199377e-05, "loss": 1.1819, "step": 3760 }, { "epoch": 0.94, "learning_rate": 1.7336080693775207e-05, "loss": 1.2439, "step": 3765 }, { "epoch": 0.94, "learning_rate": 1.7326178858957257e-05, "loss": 1.2811, "step": 3770 }, { "epoch": 0.94, "learning_rate": 1.73162614947346e-05, "loss": 1.177, "step": 3775 }, { "epoch": 0.95, "learning_rate": 1.7306328622129197e-05, "loss": 1.1923, "step": 3780 }, { "epoch": 0.95, "learning_rate": 1.729638026219591e-05, "loss": 1.2649, "step": 3785 }, { "epoch": 0.95, "learning_rate": 1.7286416436022415e-05, "loss": 1.2402, "step": 3790 }, { "epoch": 0.95, "learning_rate": 1.7276437164729173e-05, "loss": 1.2265, "step": 3795 }, { "epoch": 0.95, "learning_rate": 1.726644246946939e-05, "loss": 1.224, "step": 3800 }, { "epoch": 0.95, "learning_rate": 1.7256432371428956e-05, "loss": 1.2217, "step": 3805 }, { "epoch": 0.95, "learning_rate": 1.7246406891826422e-05, "loss": 1.2344, "step": 3810 }, { "epoch": 0.95, "learning_rate": 1.7236366051912942e-05, "loss": 1.2321, "step": 3815 }, { "epoch": 0.96, "learning_rate": 1.722630987297222e-05, "loss": 1.2377, "step": 3820 }, { "epoch": 0.96, "learning_rate": 1.721623837632048e-05, "loss": 1.233, "step": 3825 }, { "epoch": 0.96, "learning_rate": 1.7206151583306417e-05, "loss": 1.1784, "step": 3830 }, { "epoch": 0.96, "learning_rate": 1.7196049515311152e-05, "loss": 1.182, "step": 3835 }, { "epoch": 0.96, "learning_rate": 1.7185932193748177e-05, "loss": 1.2103, "step": 3840 }, { "epoch": 0.96, "learning_rate": 1.7175799640063323e-05, "loss": 1.233, "step": 3845 }, { "epoch": 0.96, "learning_rate": 1.7165651875734706e-05, "loss": 1.2027, "step": 3850 }, { "epoch": 0.96, "learning_rate": 1.7155488922272688e-05, "loss": 1.2033, "step": 3855 }, { "epoch": 0.97, "learning_rate": 1.7145310801219823e-05, "loss": 1.2419, "step": 3860 }, { "epoch": 0.97, "learning_rate": 1.7135117534150812e-05, "loss": 1.2297, "step": 3865 }, { "epoch": 0.97, "learning_rate": 1.7124909142672475e-05, "loss": 1.249, "step": 3870 }, { "epoch": 0.97, "learning_rate": 1.7114685648423687e-05, "loss": 1.2036, "step": 3875 }, { "epoch": 0.97, "learning_rate": 1.710444707307532e-05, "loss": 1.1964, "step": 3880 }, { "epoch": 0.97, "learning_rate": 1.7094193438330237e-05, "loss": 1.1657, "step": 3885 }, { "epoch": 0.97, "learning_rate": 1.708392476592321e-05, "loss": 1.2331, "step": 3890 }, { "epoch": 0.97, "learning_rate": 1.7073641077620887e-05, "loss": 1.2685, "step": 3895 }, { "epoch": 0.98, "learning_rate": 1.7063342395221746e-05, "loss": 1.2614, "step": 3900 }, { "epoch": 0.98, "learning_rate": 1.7053028740556058e-05, "loss": 1.2203, "step": 3905 }, { "epoch": 0.98, "learning_rate": 1.704270013548581e-05, "loss": 1.2429, "step": 3910 }, { "epoch": 0.98, "learning_rate": 1.7032356601904698e-05, "loss": 1.2069, "step": 3915 }, { "epoch": 0.98, "learning_rate": 1.7021998161738056e-05, "loss": 1.2329, "step": 3920 }, { "epoch": 0.98, "learning_rate": 1.701162483694282e-05, "loss": 1.2007, "step": 3925 }, { "epoch": 0.98, "learning_rate": 1.7001236649507467e-05, "loss": 1.1881, "step": 3930 }, { "epoch": 0.98, "learning_rate": 1.6990833621451983e-05, "loss": 1.2406, "step": 3935 }, { "epoch": 0.99, "learning_rate": 1.698041577482782e-05, "loss": 1.1988, "step": 3940 }, { "epoch": 0.99, "learning_rate": 1.696998313171783e-05, "loss": 1.2386, "step": 3945 }, { "epoch": 0.99, "learning_rate": 1.6959535714236235e-05, "loss": 1.2411, "step": 3950 }, { "epoch": 0.99, "learning_rate": 1.694907354452857e-05, "loss": 1.1831, "step": 3955 }, { "epoch": 0.99, "learning_rate": 1.693859664477165e-05, "loss": 1.2234, "step": 3960 }, { "epoch": 0.99, "learning_rate": 1.6928105037173506e-05, "loss": 1.2108, "step": 3965 }, { "epoch": 0.99, "learning_rate": 1.6917598743973344e-05, "loss": 1.2129, "step": 3970 }, { "epoch": 0.99, "learning_rate": 1.69070777874415e-05, "loss": 1.2221, "step": 3975 }, { "epoch": 1.0, "learning_rate": 1.6896542189879398e-05, "loss": 1.2281, "step": 3980 }, { "epoch": 1.0, "learning_rate": 1.6885991973619493e-05, "loss": 1.194, "step": 3985 }, { "epoch": 1.0, "learning_rate": 1.6875427161025227e-05, "loss": 1.2642, "step": 3990 }, { "epoch": 1.0, "learning_rate": 1.686484777449098e-05, "loss": 1.2257, "step": 3995 }, { "epoch": 1.0, "eval_loss": 1.2097705602645874, "eval_runtime": 1779.6498, "eval_samples_per_second": 15.906, "eval_steps_per_second": 0.995, "step": 3996 }, { "epoch": 1.0, "learning_rate": 1.6854253836442027e-05, "loss": 1.2392, "step": 4000 }, { "epoch": 1.0, "learning_rate": 1.684364536933449e-05, "loss": 1.2356, "step": 4005 }, { "epoch": 1.0, "learning_rate": 1.6833022395655286e-05, "loss": 1.2443, "step": 4010 }, { "epoch": 1.0, "learning_rate": 1.6822384937922086e-05, "loss": 1.2047, "step": 4015 }, { "epoch": 1.01, "learning_rate": 1.681173301868325e-05, "loss": 1.2419, "step": 4020 }, { "epoch": 1.01, "learning_rate": 1.6801066660517808e-05, "loss": 1.2678, "step": 4025 }, { "epoch": 1.01, "learning_rate": 1.679038588603539e-05, "loss": 1.1882, "step": 4030 }, { "epoch": 1.01, "learning_rate": 1.677969071787618e-05, "loss": 1.2392, "step": 4035 }, { "epoch": 1.01, "learning_rate": 1.676898117871088e-05, "loss": 1.2242, "step": 4040 }, { "epoch": 1.01, "learning_rate": 1.6758257291240655e-05, "loss": 1.2111, "step": 4045 }, { "epoch": 1.01, "learning_rate": 1.6747519078197076e-05, "loss": 1.2497, "step": 4050 }, { "epoch": 1.01, "learning_rate": 1.6736766562342083e-05, "loss": 1.1996, "step": 4055 }, { "epoch": 1.02, "learning_rate": 1.6725999766467943e-05, "loss": 1.2409, "step": 4060 }, { "epoch": 1.02, "learning_rate": 1.6715218713397174e-05, "loss": 1.2051, "step": 4065 }, { "epoch": 1.02, "learning_rate": 1.6704423425982537e-05, "loss": 1.2459, "step": 4070 }, { "epoch": 1.02, "learning_rate": 1.669361392710695e-05, "loss": 1.2263, "step": 4075 }, { "epoch": 1.02, "learning_rate": 1.668279023968346e-05, "loss": 1.2515, "step": 4080 }, { "epoch": 1.02, "learning_rate": 1.667195238665519e-05, "loss": 1.2285, "step": 4085 }, { "epoch": 1.02, "learning_rate": 1.6661100390995296e-05, "loss": 1.2183, "step": 4090 }, { "epoch": 1.02, "learning_rate": 1.66502342757069e-05, "loss": 1.2646, "step": 4095 }, { "epoch": 1.03, "learning_rate": 1.6639354063823058e-05, "loss": 1.257, "step": 4100 }, { "epoch": 1.03, "learning_rate": 1.662845977840671e-05, "loss": 1.2069, "step": 4105 }, { "epoch": 1.03, "learning_rate": 1.6617551442550633e-05, "loss": 1.2097, "step": 4110 }, { "epoch": 1.03, "learning_rate": 1.6606629079377376e-05, "loss": 1.1913, "step": 4115 }, { "epoch": 1.03, "learning_rate": 1.6595692712039225e-05, "loss": 1.2387, "step": 4120 }, { "epoch": 1.03, "learning_rate": 1.6584742363718152e-05, "loss": 1.1946, "step": 4125 }, { "epoch": 1.03, "learning_rate": 1.6573778057625773e-05, "loss": 1.232, "step": 4130 }, { "epoch": 1.03, "learning_rate": 1.656279981700327e-05, "loss": 1.2039, "step": 4135 }, { "epoch": 1.04, "learning_rate": 1.655180766512138e-05, "loss": 1.2182, "step": 4140 }, { "epoch": 1.04, "learning_rate": 1.6540801625280323e-05, "loss": 1.2328, "step": 4145 }, { "epoch": 1.04, "learning_rate": 1.6529781720809758e-05, "loss": 1.24, "step": 4150 }, { "epoch": 1.04, "learning_rate": 1.651874797506873e-05, "loss": 1.2276, "step": 4155 }, { "epoch": 1.04, "learning_rate": 1.650770041144563e-05, "loss": 1.2216, "step": 4160 }, { "epoch": 1.04, "learning_rate": 1.6496639053358126e-05, "loss": 1.1855, "step": 4165 }, { "epoch": 1.04, "learning_rate": 1.6485563924253142e-05, "loss": 1.2309, "step": 4170 }, { "epoch": 1.04, "learning_rate": 1.6474475047606783e-05, "loss": 1.2219, "step": 4175 }, { "epoch": 1.05, "learning_rate": 1.6463372446924296e-05, "loss": 1.2408, "step": 4180 }, { "epoch": 1.05, "learning_rate": 1.6452256145740023e-05, "loss": 1.23, "step": 4185 }, { "epoch": 1.05, "learning_rate": 1.644112616761734e-05, "loss": 1.2043, "step": 4190 }, { "epoch": 1.05, "learning_rate": 1.6429982536148628e-05, "loss": 1.2391, "step": 4195 }, { "epoch": 1.05, "learning_rate": 1.641882527495519e-05, "loss": 1.226, "step": 4200 }, { "epoch": 1.05, "learning_rate": 1.6407654407687233e-05, "loss": 1.2169, "step": 4205 }, { "epoch": 1.05, "learning_rate": 1.6396469958023808e-05, "loss": 1.2213, "step": 4210 }, { "epoch": 1.05, "learning_rate": 1.6385271949672742e-05, "loss": 1.2106, "step": 4215 }, { "epoch": 1.06, "learning_rate": 1.6374060406370613e-05, "loss": 1.2246, "step": 4220 }, { "epoch": 1.06, "learning_rate": 1.636283535188269e-05, "loss": 1.1903, "step": 4225 }, { "epoch": 1.06, "learning_rate": 1.6351596810002883e-05, "loss": 1.2411, "step": 4230 }, { "epoch": 1.06, "learning_rate": 1.6340344804553683e-05, "loss": 1.2332, "step": 4235 }, { "epoch": 1.06, "learning_rate": 1.6329079359386124e-05, "loss": 1.2059, "step": 4240 }, { "epoch": 1.06, "learning_rate": 1.631780049837973e-05, "loss": 1.2029, "step": 4245 }, { "epoch": 1.06, "learning_rate": 1.6306508245442463e-05, "loss": 1.2039, "step": 4250 }, { "epoch": 1.06, "learning_rate": 1.629520262451067e-05, "loss": 1.2595, "step": 4255 }, { "epoch": 1.07, "learning_rate": 1.6283883659549037e-05, "loss": 1.203, "step": 4260 }, { "epoch": 1.07, "learning_rate": 1.6272551374550532e-05, "loss": 1.249, "step": 4265 }, { "epoch": 1.07, "learning_rate": 1.626120579353636e-05, "loss": 1.2497, "step": 4270 }, { "epoch": 1.07, "learning_rate": 1.6249846940555905e-05, "loss": 1.2565, "step": 4275 }, { "epoch": 1.07, "learning_rate": 1.6238474839686698e-05, "loss": 1.2345, "step": 4280 }, { "epoch": 1.07, "learning_rate": 1.622708951503433e-05, "loss": 1.2302, "step": 4285 }, { "epoch": 1.07, "learning_rate": 1.6215690990732443e-05, "loss": 1.2337, "step": 4290 }, { "epoch": 1.07, "learning_rate": 1.6204279290942647e-05, "loss": 1.2421, "step": 4295 }, { "epoch": 1.08, "learning_rate": 1.6192854439854482e-05, "loss": 1.2182, "step": 4300 }, { "epoch": 1.08, "learning_rate": 1.6181416461685365e-05, "loss": 1.2188, "step": 4305 }, { "epoch": 1.08, "learning_rate": 1.6169965380680547e-05, "loss": 1.2217, "step": 4310 }, { "epoch": 1.08, "learning_rate": 1.6158501221113035e-05, "loss": 1.2273, "step": 4315 }, { "epoch": 1.08, "learning_rate": 1.614702400728358e-05, "loss": 1.2354, "step": 4320 }, { "epoch": 1.08, "learning_rate": 1.6135533763520586e-05, "loss": 1.2593, "step": 4325 }, { "epoch": 1.08, "learning_rate": 1.612403051418009e-05, "loss": 1.2172, "step": 4330 }, { "epoch": 1.08, "learning_rate": 1.6112514283645693e-05, "loss": 1.2272, "step": 4335 }, { "epoch": 1.09, "learning_rate": 1.6100985096328506e-05, "loss": 1.2285, "step": 4340 }, { "epoch": 1.09, "learning_rate": 1.6089442976667112e-05, "loss": 1.2523, "step": 4345 }, { "epoch": 1.09, "learning_rate": 1.6077887949127507e-05, "loss": 1.2613, "step": 4350 }, { "epoch": 1.09, "learning_rate": 1.6066320038203046e-05, "loss": 1.2031, "step": 4355 }, { "epoch": 1.09, "learning_rate": 1.605473926841439e-05, "loss": 1.1893, "step": 4360 }, { "epoch": 1.09, "learning_rate": 1.6043145664309464e-05, "loss": 1.2492, "step": 4365 }, { "epoch": 1.09, "learning_rate": 1.603153925046339e-05, "loss": 1.2569, "step": 4370 }, { "epoch": 1.09, "learning_rate": 1.601992005147845e-05, "loss": 1.2015, "step": 4375 }, { "epoch": 1.1, "learning_rate": 1.6008288091984025e-05, "loss": 1.2357, "step": 4380 }, { "epoch": 1.1, "learning_rate": 1.599664339663654e-05, "loss": 1.2452, "step": 4385 }, { "epoch": 1.1, "learning_rate": 1.598498599011942e-05, "loss": 1.2215, "step": 4390 }, { "epoch": 1.1, "learning_rate": 1.5973315897143043e-05, "loss": 1.2122, "step": 4395 }, { "epoch": 1.1, "learning_rate": 1.596163314244466e-05, "loss": 1.206, "step": 4400 }, { "epoch": 1.1, "learning_rate": 1.594993775078837e-05, "loss": 1.2532, "step": 4405 }, { "epoch": 1.1, "learning_rate": 1.593822974696507e-05, "loss": 1.1909, "step": 4410 }, { "epoch": 1.1, "learning_rate": 1.592650915579237e-05, "loss": 1.2096, "step": 4415 }, { "epoch": 1.11, "learning_rate": 1.591477600211458e-05, "loss": 1.2232, "step": 4420 }, { "epoch": 1.11, "learning_rate": 1.5903030310802628e-05, "loss": 1.2029, "step": 4425 }, { "epoch": 1.11, "learning_rate": 1.589127210675402e-05, "loss": 1.2363, "step": 4430 }, { "epoch": 1.11, "learning_rate": 1.5879501414892793e-05, "loss": 1.222, "step": 4435 }, { "epoch": 1.11, "learning_rate": 1.5867718260169446e-05, "loss": 1.207, "step": 4440 }, { "epoch": 1.11, "learning_rate": 1.585592266756089e-05, "loss": 1.2229, "step": 4445 }, { "epoch": 1.11, "learning_rate": 1.5844114662070423e-05, "loss": 1.2777, "step": 4450 }, { "epoch": 1.11, "learning_rate": 1.5832294268727634e-05, "loss": 1.2245, "step": 4455 }, { "epoch": 1.12, "learning_rate": 1.5820461512588377e-05, "loss": 1.2276, "step": 4460 }, { "epoch": 1.12, "learning_rate": 1.5808616418734712e-05, "loss": 1.2381, "step": 4465 }, { "epoch": 1.12, "learning_rate": 1.579675901227485e-05, "loss": 1.2531, "step": 4470 }, { "epoch": 1.12, "learning_rate": 1.5784889318343112e-05, "loss": 1.229, "step": 4475 }, { "epoch": 1.12, "learning_rate": 1.5773007362099848e-05, "loss": 1.209, "step": 4480 }, { "epoch": 1.12, "learning_rate": 1.576111316873141e-05, "loss": 1.2339, "step": 4485 }, { "epoch": 1.12, "learning_rate": 1.5749206763450082e-05, "loss": 1.1845, "step": 4490 }, { "epoch": 1.12, "learning_rate": 1.5737288171494048e-05, "loss": 1.2151, "step": 4495 }, { "epoch": 1.13, "learning_rate": 1.572535741812731e-05, "loss": 1.2058, "step": 4500 }, { "epoch": 1.13, "learning_rate": 1.571341452863966e-05, "loss": 1.2399, "step": 4505 }, { "epoch": 1.13, "learning_rate": 1.57014595283466e-05, "loss": 1.2112, "step": 4510 }, { "epoch": 1.13, "learning_rate": 1.5689492442589322e-05, "loss": 1.2145, "step": 4515 }, { "epoch": 1.13, "learning_rate": 1.5677513296734624e-05, "loss": 1.206, "step": 4520 }, { "epoch": 1.13, "learning_rate": 1.5665522116174866e-05, "loss": 1.2098, "step": 4525 }, { "epoch": 1.13, "learning_rate": 1.5653518926327928e-05, "loss": 1.2128, "step": 4530 }, { "epoch": 1.13, "learning_rate": 1.564150375263714e-05, "loss": 1.1942, "step": 4535 }, { "epoch": 1.14, "learning_rate": 1.5629476620571233e-05, "loss": 1.2329, "step": 4540 }, { "epoch": 1.14, "learning_rate": 1.561743755562429e-05, "loss": 1.2024, "step": 4545 }, { "epoch": 1.14, "learning_rate": 1.560538658331569e-05, "loss": 1.2234, "step": 4550 }, { "epoch": 1.14, "learning_rate": 1.5593323729190042e-05, "loss": 1.2099, "step": 4555 }, { "epoch": 1.14, "learning_rate": 1.5581249018817155e-05, "loss": 1.2281, "step": 4560 }, { "epoch": 1.14, "learning_rate": 1.5569162477791956e-05, "loss": 1.2161, "step": 4565 }, { "epoch": 1.14, "learning_rate": 1.5557064131734462e-05, "loss": 1.2158, "step": 4570 }, { "epoch": 1.14, "learning_rate": 1.5544954006289706e-05, "loss": 1.2113, "step": 4575 }, { "epoch": 1.15, "learning_rate": 1.5532832127127694e-05, "loss": 1.1916, "step": 4580 }, { "epoch": 1.15, "learning_rate": 1.552069851994334e-05, "loss": 1.2174, "step": 4585 }, { "epoch": 1.15, "learning_rate": 1.550855321045643e-05, "loss": 1.2032, "step": 4590 }, { "epoch": 1.15, "learning_rate": 1.549639622441154e-05, "loss": 1.2163, "step": 4595 }, { "epoch": 1.15, "learning_rate": 1.5484227587578008e-05, "loss": 1.2457, "step": 4600 }, { "epoch": 1.15, "learning_rate": 1.5472047325749863e-05, "loss": 1.2303, "step": 4605 }, { "epoch": 1.15, "learning_rate": 1.545985546474578e-05, "loss": 1.2415, "step": 4610 }, { "epoch": 1.15, "learning_rate": 1.5447652030409018e-05, "loss": 1.2214, "step": 4615 }, { "epoch": 1.16, "learning_rate": 1.543543704860737e-05, "loss": 1.2212, "step": 4620 }, { "epoch": 1.16, "learning_rate": 1.5423210545233108e-05, "loss": 1.1879, "step": 4625 }, { "epoch": 1.16, "learning_rate": 1.5410972546202917e-05, "loss": 1.2234, "step": 4630 }, { "epoch": 1.16, "learning_rate": 1.539872307745786e-05, "loss": 1.2271, "step": 4635 }, { "epoch": 1.16, "learning_rate": 1.538646216496331e-05, "loss": 1.2285, "step": 4640 }, { "epoch": 1.16, "learning_rate": 1.5374189834708898e-05, "loss": 1.2271, "step": 4645 }, { "epoch": 1.16, "learning_rate": 1.5361906112708446e-05, "loss": 1.2783, "step": 4650 }, { "epoch": 1.16, "learning_rate": 1.5349611024999943e-05, "loss": 1.2404, "step": 4655 }, { "epoch": 1.17, "learning_rate": 1.533730459764546e-05, "loss": 1.2233, "step": 4660 }, { "epoch": 1.17, "learning_rate": 1.5324986856731093e-05, "loss": 1.2045, "step": 4665 }, { "epoch": 1.17, "learning_rate": 1.5312657828366946e-05, "loss": 1.2288, "step": 4670 }, { "epoch": 1.17, "learning_rate": 1.5300317538687025e-05, "loss": 1.2156, "step": 4675 }, { "epoch": 1.17, "learning_rate": 1.528796601384922e-05, "loss": 1.2247, "step": 4680 }, { "epoch": 1.17, "learning_rate": 1.527560328003523e-05, "loss": 1.2191, "step": 4685 }, { "epoch": 1.17, "learning_rate": 1.5263229363450517e-05, "loss": 1.2468, "step": 4690 }, { "epoch": 1.17, "learning_rate": 1.5250844290324248e-05, "loss": 1.1891, "step": 4695 }, { "epoch": 1.18, "learning_rate": 1.5238448086909237e-05, "loss": 1.2177, "step": 4700 }, { "epoch": 1.18, "learning_rate": 1.5226040779481889e-05, "loss": 1.2813, "step": 4705 }, { "epoch": 1.18, "learning_rate": 1.5213622394342156e-05, "loss": 1.2293, "step": 4710 }, { "epoch": 1.18, "learning_rate": 1.5201192957813453e-05, "loss": 1.2091, "step": 4715 }, { "epoch": 1.18, "learning_rate": 1.5188752496242641e-05, "loss": 1.2736, "step": 4720 }, { "epoch": 1.18, "learning_rate": 1.5176301035999937e-05, "loss": 1.2212, "step": 4725 }, { "epoch": 1.18, "learning_rate": 1.516383860347888e-05, "loss": 1.2077, "step": 4730 }, { "epoch": 1.18, "learning_rate": 1.5151365225096261e-05, "loss": 1.2189, "step": 4735 }, { "epoch": 1.19, "learning_rate": 1.513888092729208e-05, "loss": 1.2533, "step": 4740 }, { "epoch": 1.19, "learning_rate": 1.5126385736529477e-05, "loss": 1.1779, "step": 4745 }, { "epoch": 1.19, "learning_rate": 1.5113879679294683e-05, "loss": 1.225, "step": 4750 }, { "epoch": 1.19, "learning_rate": 1.5101362782096967e-05, "loss": 1.2024, "step": 4755 }, { "epoch": 1.19, "learning_rate": 1.508883507146857e-05, "loss": 1.2652, "step": 4760 }, { "epoch": 1.19, "learning_rate": 1.5076296573964659e-05, "loss": 1.2315, "step": 4765 }, { "epoch": 1.19, "learning_rate": 1.5063747316163263e-05, "loss": 1.2266, "step": 4770 }, { "epoch": 1.19, "learning_rate": 1.5051187324665222e-05, "loss": 1.2578, "step": 4775 }, { "epoch": 1.2, "learning_rate": 1.5038616626094124e-05, "loss": 1.2205, "step": 4780 }, { "epoch": 1.2, "learning_rate": 1.502603524709626e-05, "loss": 1.202, "step": 4785 }, { "epoch": 1.2, "learning_rate": 1.5013443214340556e-05, "loss": 1.2369, "step": 4790 }, { "epoch": 1.2, "learning_rate": 1.5000840554518518e-05, "loss": 1.1992, "step": 4795 }, { "epoch": 1.2, "learning_rate": 1.4988227294344183e-05, "loss": 1.2554, "step": 4800 }, { "epoch": 1.2, "learning_rate": 1.4975603460554063e-05, "loss": 1.2423, "step": 4805 }, { "epoch": 1.2, "learning_rate": 1.4962969079907068e-05, "loss": 1.2212, "step": 4810 }, { "epoch": 1.2, "learning_rate": 1.4950324179184479e-05, "loss": 1.2174, "step": 4815 }, { "epoch": 1.21, "learning_rate": 1.4937668785189867e-05, "loss": 1.2352, "step": 4820 }, { "epoch": 1.21, "learning_rate": 1.4925002924749053e-05, "loss": 1.2354, "step": 4825 }, { "epoch": 1.21, "learning_rate": 1.4912326624710032e-05, "loss": 1.222, "step": 4830 }, { "epoch": 1.21, "learning_rate": 1.4899639911942948e-05, "loss": 1.2001, "step": 4835 }, { "epoch": 1.21, "learning_rate": 1.4886942813339992e-05, "loss": 1.2176, "step": 4840 }, { "epoch": 1.21, "learning_rate": 1.4874235355815395e-05, "loss": 1.2204, "step": 4845 }, { "epoch": 1.21, "learning_rate": 1.4861517566305329e-05, "loss": 1.2438, "step": 4850 }, { "epoch": 1.21, "learning_rate": 1.4848789471767869e-05, "loss": 1.2045, "step": 4855 }, { "epoch": 1.22, "learning_rate": 1.4836051099182938e-05, "loss": 1.1969, "step": 4860 }, { "epoch": 1.22, "learning_rate": 1.4823302475552247e-05, "loss": 1.2097, "step": 4865 }, { "epoch": 1.22, "learning_rate": 1.4810543627899235e-05, "loss": 1.1715, "step": 4870 }, { "epoch": 1.22, "learning_rate": 1.4797774583269005e-05, "loss": 1.2288, "step": 4875 }, { "epoch": 1.22, "learning_rate": 1.4784995368728283e-05, "loss": 1.2144, "step": 4880 }, { "epoch": 1.22, "learning_rate": 1.4772206011365355e-05, "loss": 1.2237, "step": 4885 }, { "epoch": 1.22, "learning_rate": 1.4759406538289995e-05, "loss": 1.2186, "step": 4890 }, { "epoch": 1.22, "learning_rate": 1.4746596976633436e-05, "loss": 1.2173, "step": 4895 }, { "epoch": 1.23, "learning_rate": 1.4733777353548279e-05, "loss": 1.241, "step": 4900 }, { "epoch": 1.23, "learning_rate": 1.4720947696208463e-05, "loss": 1.2278, "step": 4905 }, { "epoch": 1.23, "learning_rate": 1.4708108031809192e-05, "loss": 1.21, "step": 4910 }, { "epoch": 1.23, "learning_rate": 1.4695258387566886e-05, "loss": 1.2362, "step": 4915 }, { "epoch": 1.23, "learning_rate": 1.4682398790719115e-05, "loss": 1.1997, "step": 4920 }, { "epoch": 1.23, "learning_rate": 1.4669529268524549e-05, "loss": 1.2138, "step": 4925 }, { "epoch": 1.23, "learning_rate": 1.4656649848262895e-05, "loss": 1.1897, "step": 4930 }, { "epoch": 1.23, "learning_rate": 1.4643760557234845e-05, "loss": 1.2381, "step": 4935 }, { "epoch": 1.24, "learning_rate": 1.4630861422762009e-05, "loss": 1.2225, "step": 4940 }, { "epoch": 1.24, "learning_rate": 1.4617952472186863e-05, "loss": 1.1967, "step": 4945 }, { "epoch": 1.24, "learning_rate": 1.4605033732872693e-05, "loss": 1.2425, "step": 4950 }, { "epoch": 1.24, "learning_rate": 1.4592105232203533e-05, "loss": 1.2147, "step": 4955 }, { "epoch": 1.24, "learning_rate": 1.4579166997584109e-05, "loss": 1.273, "step": 4960 }, { "epoch": 1.24, "learning_rate": 1.4566219056439777e-05, "loss": 1.1922, "step": 4965 }, { "epoch": 1.24, "learning_rate": 1.4553261436216472e-05, "loss": 1.2044, "step": 4970 }, { "epoch": 1.24, "learning_rate": 1.4540294164380649e-05, "loss": 1.2722, "step": 4975 }, { "epoch": 1.25, "learning_rate": 1.4527317268419207e-05, "loss": 1.1729, "step": 4980 }, { "epoch": 1.25, "learning_rate": 1.4514330775839465e-05, "loss": 1.2233, "step": 4985 }, { "epoch": 1.25, "learning_rate": 1.4501334714169073e-05, "loss": 1.1908, "step": 4990 }, { "epoch": 1.25, "learning_rate": 1.448832911095596e-05, "loss": 1.251, "step": 4995 }, { "epoch": 1.25, "learning_rate": 1.4475313993768292e-05, "loss": 1.2623, "step": 5000 }, { "epoch": 1.25, "learning_rate": 1.4462289390194398e-05, "loss": 1.2188, "step": 5005 }, { "epoch": 1.25, "learning_rate": 1.4449255327842709e-05, "loss": 1.2513, "step": 5010 }, { "epoch": 1.25, "learning_rate": 1.4436211834341716e-05, "loss": 1.2429, "step": 5015 }, { "epoch": 1.26, "learning_rate": 1.442315893733989e-05, "loss": 1.1916, "step": 5020 }, { "epoch": 1.26, "learning_rate": 1.4410096664505647e-05, "loss": 1.2405, "step": 5025 }, { "epoch": 1.26, "learning_rate": 1.4397025043527266e-05, "loss": 1.2079, "step": 5030 }, { "epoch": 1.26, "learning_rate": 1.4383944102112854e-05, "loss": 1.227, "step": 5035 }, { "epoch": 1.26, "learning_rate": 1.437085386799026e-05, "loss": 1.1969, "step": 5040 }, { "epoch": 1.26, "learning_rate": 1.4357754368907039e-05, "loss": 1.2221, "step": 5045 }, { "epoch": 1.26, "learning_rate": 1.4344645632630387e-05, "loss": 1.1977, "step": 5050 }, { "epoch": 1.26, "learning_rate": 1.4331527686947073e-05, "loss": 1.2608, "step": 5055 }, { "epoch": 1.27, "learning_rate": 1.43184005596634e-05, "loss": 1.2542, "step": 5060 }, { "epoch": 1.27, "learning_rate": 1.4305264278605113e-05, "loss": 1.241, "step": 5065 }, { "epoch": 1.27, "learning_rate": 1.4292118871617381e-05, "loss": 1.2254, "step": 5070 }, { "epoch": 1.27, "learning_rate": 1.4278964366564707e-05, "loss": 1.2052, "step": 5075 }, { "epoch": 1.27, "learning_rate": 1.426580079133088e-05, "loss": 1.26, "step": 5080 }, { "epoch": 1.27, "learning_rate": 1.4252628173818914e-05, "loss": 1.2339, "step": 5085 }, { "epoch": 1.27, "learning_rate": 1.4239446541950996e-05, "loss": 1.2372, "step": 5090 }, { "epoch": 1.27, "learning_rate": 1.4226255923668417e-05, "loss": 1.187, "step": 5095 }, { "epoch": 1.28, "learning_rate": 1.4213056346931514e-05, "loss": 1.2271, "step": 5100 }, { "epoch": 1.28, "learning_rate": 1.4199847839719618e-05, "loss": 1.2257, "step": 5105 }, { "epoch": 1.28, "learning_rate": 1.418663043003099e-05, "loss": 1.2262, "step": 5110 }, { "epoch": 1.28, "learning_rate": 1.4173404145882755e-05, "loss": 1.1777, "step": 5115 }, { "epoch": 1.28, "learning_rate": 1.4160169015310856e-05, "loss": 1.2194, "step": 5120 }, { "epoch": 1.28, "learning_rate": 1.4146925066369988e-05, "loss": 1.2252, "step": 5125 }, { "epoch": 1.28, "learning_rate": 1.4133672327133536e-05, "loss": 1.1949, "step": 5130 }, { "epoch": 1.28, "learning_rate": 1.4120410825693518e-05, "loss": 1.2164, "step": 5135 }, { "epoch": 1.29, "learning_rate": 1.410714059016052e-05, "loss": 1.216, "step": 5140 }, { "epoch": 1.29, "learning_rate": 1.4093861648663656e-05, "loss": 1.2079, "step": 5145 }, { "epoch": 1.29, "learning_rate": 1.4080574029350484e-05, "loss": 1.2599, "step": 5150 }, { "epoch": 1.29, "learning_rate": 1.4067277760386957e-05, "loss": 1.2358, "step": 5155 }, { "epoch": 1.29, "learning_rate": 1.4053972869957363e-05, "loss": 1.249, "step": 5160 }, { "epoch": 1.29, "learning_rate": 1.4040659386264263e-05, "loss": 1.212, "step": 5165 }, { "epoch": 1.29, "learning_rate": 1.4027337337528443e-05, "loss": 1.2186, "step": 5170 }, { "epoch": 1.29, "learning_rate": 1.4014006751988839e-05, "loss": 1.1575, "step": 5175 }, { "epoch": 1.3, "learning_rate": 1.4000667657902472e-05, "loss": 1.1963, "step": 5180 }, { "epoch": 1.3, "learning_rate": 1.3987320083544413e-05, "loss": 1.1951, "step": 5185 }, { "epoch": 1.3, "learning_rate": 1.3973964057207706e-05, "loss": 1.2081, "step": 5190 }, { "epoch": 1.3, "learning_rate": 1.39605996072033e-05, "loss": 1.2217, "step": 5195 }, { "epoch": 1.3, "learning_rate": 1.3947226761860022e-05, "loss": 1.2291, "step": 5200 }, { "epoch": 1.3, "learning_rate": 1.393384554952447e-05, "loss": 1.1975, "step": 5205 }, { "epoch": 1.3, "learning_rate": 1.392045599856099e-05, "loss": 1.2727, "step": 5210 }, { "epoch": 1.3, "learning_rate": 1.3907058137351608e-05, "loss": 1.207, "step": 5215 }, { "epoch": 1.31, "learning_rate": 1.3893651994295954e-05, "loss": 1.2296, "step": 5220 }, { "epoch": 1.31, "learning_rate": 1.3880237597811222e-05, "loss": 1.2036, "step": 5225 }, { "epoch": 1.31, "learning_rate": 1.3866814976332092e-05, "loss": 1.2079, "step": 5230 }, { "epoch": 1.31, "learning_rate": 1.385338415831069e-05, "loss": 1.1969, "step": 5235 }, { "epoch": 1.31, "learning_rate": 1.3839945172216509e-05, "loss": 1.1877, "step": 5240 }, { "epoch": 1.31, "learning_rate": 1.3826498046536356e-05, "loss": 1.2163, "step": 5245 }, { "epoch": 1.31, "learning_rate": 1.3813042809774295e-05, "loss": 1.242, "step": 5250 }, { "epoch": 1.31, "learning_rate": 1.379957949045158e-05, "loss": 1.2321, "step": 5255 }, { "epoch": 1.32, "learning_rate": 1.3786108117106598e-05, "loss": 1.2263, "step": 5260 }, { "epoch": 1.32, "learning_rate": 1.3772628718294811e-05, "loss": 1.2162, "step": 5265 }, { "epoch": 1.32, "learning_rate": 1.3759141322588694e-05, "loss": 1.2573, "step": 5270 }, { "epoch": 1.32, "learning_rate": 1.3745645958577663e-05, "loss": 1.2381, "step": 5275 }, { "epoch": 1.32, "learning_rate": 1.3732142654868033e-05, "loss": 1.2582, "step": 5280 }, { "epoch": 1.32, "learning_rate": 1.371863144008295e-05, "loss": 1.2339, "step": 5285 }, { "epoch": 1.32, "learning_rate": 1.3705112342862328e-05, "loss": 1.2246, "step": 5290 }, { "epoch": 1.32, "learning_rate": 1.3691585391862785e-05, "loss": 1.2144, "step": 5295 }, { "epoch": 1.33, "learning_rate": 1.3678050615757593e-05, "loss": 1.1777, "step": 5300 }, { "epoch": 1.33, "learning_rate": 1.3664508043236602e-05, "loss": 1.2515, "step": 5305 }, { "epoch": 1.33, "learning_rate": 1.36509577030062e-05, "loss": 1.2068, "step": 5310 }, { "epoch": 1.33, "learning_rate": 1.3637399623789233e-05, "loss": 1.2418, "step": 5315 }, { "epoch": 1.33, "learning_rate": 1.3623833834324951e-05, "loss": 1.2198, "step": 5320 }, { "epoch": 1.33, "learning_rate": 1.3610260363368952e-05, "loss": 1.2455, "step": 5325 }, { "epoch": 1.33, "learning_rate": 1.3596679239693113e-05, "loss": 1.2436, "step": 5330 }, { "epoch": 1.33, "learning_rate": 1.3583090492085529e-05, "loss": 1.2343, "step": 5335 }, { "epoch": 1.34, "learning_rate": 1.356949414935047e-05, "loss": 1.2457, "step": 5340 }, { "epoch": 1.34, "learning_rate": 1.3555890240308286e-05, "loss": 1.207, "step": 5345 }, { "epoch": 1.34, "learning_rate": 1.354227879379538e-05, "loss": 1.213, "step": 5350 }, { "epoch": 1.34, "learning_rate": 1.3528659838664124e-05, "loss": 1.2492, "step": 5355 }, { "epoch": 1.34, "learning_rate": 1.3515033403782814e-05, "loss": 1.2114, "step": 5360 }, { "epoch": 1.34, "learning_rate": 1.3501399518035594e-05, "loss": 1.2154, "step": 5365 }, { "epoch": 1.34, "learning_rate": 1.34877582103224e-05, "loss": 1.1993, "step": 5370 }, { "epoch": 1.34, "learning_rate": 1.3474109509558912e-05, "loss": 1.1956, "step": 5375 }, { "epoch": 1.35, "learning_rate": 1.3460453444676467e-05, "loss": 1.2101, "step": 5380 }, { "epoch": 1.35, "learning_rate": 1.3446790044622025e-05, "loss": 1.207, "step": 5385 }, { "epoch": 1.35, "learning_rate": 1.343311933835808e-05, "loss": 1.2279, "step": 5390 }, { "epoch": 1.35, "learning_rate": 1.3419441354862626e-05, "loss": 1.2299, "step": 5395 }, { "epoch": 1.35, "learning_rate": 1.340575612312908e-05, "loss": 1.1914, "step": 5400 }, { "epoch": 1.35, "learning_rate": 1.3392063672166213e-05, "loss": 1.2218, "step": 5405 }, { "epoch": 1.35, "learning_rate": 1.3378364030998113e-05, "loss": 1.1935, "step": 5410 }, { "epoch": 1.35, "learning_rate": 1.33646572286641e-05, "loss": 1.223, "step": 5415 }, { "epoch": 1.36, "learning_rate": 1.3350943294218679e-05, "loss": 1.2459, "step": 5420 }, { "epoch": 1.36, "learning_rate": 1.3337222256731468e-05, "loss": 1.2441, "step": 5425 }, { "epoch": 1.36, "learning_rate": 1.3323494145287144e-05, "loss": 1.208, "step": 5430 }, { "epoch": 1.36, "learning_rate": 1.3309758988985379e-05, "loss": 1.2196, "step": 5435 }, { "epoch": 1.36, "learning_rate": 1.329601681694078e-05, "loss": 1.2233, "step": 5440 }, { "epoch": 1.36, "learning_rate": 1.3282267658282815e-05, "loss": 1.2242, "step": 5445 }, { "epoch": 1.36, "learning_rate": 1.3268511542155778e-05, "loss": 1.2225, "step": 5450 }, { "epoch": 1.36, "learning_rate": 1.3254748497718701e-05, "loss": 1.2317, "step": 5455 }, { "epoch": 1.37, "learning_rate": 1.3240978554145302e-05, "loss": 1.2092, "step": 5460 }, { "epoch": 1.37, "learning_rate": 1.3227201740623925e-05, "loss": 1.2069, "step": 5465 }, { "epoch": 1.37, "learning_rate": 1.3213418086357474e-05, "loss": 1.252, "step": 5470 }, { "epoch": 1.37, "learning_rate": 1.3199627620563358e-05, "loss": 1.2364, "step": 5475 }, { "epoch": 1.37, "learning_rate": 1.3185830372473423e-05, "loss": 1.2343, "step": 5480 }, { "epoch": 1.37, "learning_rate": 1.3172026371333889e-05, "loss": 1.2046, "step": 5485 }, { "epoch": 1.37, "learning_rate": 1.3158215646405287e-05, "loss": 1.2236, "step": 5490 }, { "epoch": 1.37, "learning_rate": 1.3144398226962416e-05, "loss": 1.2193, "step": 5495 }, { "epoch": 1.38, "learning_rate": 1.3130574142294245e-05, "loss": 1.233, "step": 5500 }, { "epoch": 1.38, "learning_rate": 1.311674342170389e-05, "loss": 1.2641, "step": 5505 }, { "epoch": 1.38, "learning_rate": 1.3102906094508519e-05, "loss": 1.2022, "step": 5510 }, { "epoch": 1.38, "learning_rate": 1.308906219003931e-05, "loss": 1.189, "step": 5515 }, { "epoch": 1.38, "learning_rate": 1.3075211737641385e-05, "loss": 1.2419, "step": 5520 }, { "epoch": 1.38, "learning_rate": 1.306135476667374e-05, "loss": 1.1992, "step": 5525 }, { "epoch": 1.38, "learning_rate": 1.3047491306509196e-05, "loss": 1.1746, "step": 5530 }, { "epoch": 1.38, "learning_rate": 1.3033621386534323e-05, "loss": 1.2289, "step": 5535 }, { "epoch": 1.39, "learning_rate": 1.3019745036149387e-05, "loss": 1.2617, "step": 5540 }, { "epoch": 1.39, "learning_rate": 1.300586228476828e-05, "loss": 1.1717, "step": 5545 }, { "epoch": 1.39, "learning_rate": 1.2991973161818472e-05, "loss": 1.2374, "step": 5550 }, { "epoch": 1.39, "learning_rate": 1.297807769674093e-05, "loss": 1.2168, "step": 5555 }, { "epoch": 1.39, "learning_rate": 1.2964175918990067e-05, "loss": 1.2176, "step": 5560 }, { "epoch": 1.39, "learning_rate": 1.2950267858033676e-05, "loss": 1.2388, "step": 5565 }, { "epoch": 1.39, "learning_rate": 1.2936353543352876e-05, "loss": 1.2146, "step": 5570 }, { "epoch": 1.39, "learning_rate": 1.292243300444203e-05, "loss": 1.2222, "step": 5575 }, { "epoch": 1.4, "learning_rate": 1.2908506270808703e-05, "loss": 1.2044, "step": 5580 }, { "epoch": 1.4, "learning_rate": 1.2894573371973587e-05, "loss": 1.2207, "step": 5585 }, { "epoch": 1.4, "learning_rate": 1.2880634337470448e-05, "loss": 1.1965, "step": 5590 }, { "epoch": 1.4, "learning_rate": 1.2866689196846052e-05, "loss": 1.2253, "step": 5595 }, { "epoch": 1.4, "learning_rate": 1.285273797966011e-05, "loss": 1.2192, "step": 5600 }, { "epoch": 1.4, "learning_rate": 1.2838780715485215e-05, "loss": 1.207, "step": 5605 }, { "epoch": 1.4, "learning_rate": 1.2824817433906774e-05, "loss": 1.1713, "step": 5610 }, { "epoch": 1.4, "learning_rate": 1.2810848164522957e-05, "loss": 1.2314, "step": 5615 }, { "epoch": 1.41, "learning_rate": 1.279687293694462e-05, "loss": 1.1936, "step": 5620 }, { "epoch": 1.41, "learning_rate": 1.2782891780795248e-05, "loss": 1.2353, "step": 5625 }, { "epoch": 1.41, "learning_rate": 1.2768904725710895e-05, "loss": 1.2662, "step": 5630 }, { "epoch": 1.41, "learning_rate": 1.275491180134012e-05, "loss": 1.2333, "step": 5635 }, { "epoch": 1.41, "learning_rate": 1.274091303734392e-05, "loss": 1.23, "step": 5640 }, { "epoch": 1.41, "learning_rate": 1.272690846339568e-05, "loss": 1.1954, "step": 5645 }, { "epoch": 1.41, "learning_rate": 1.2712898109181084e-05, "loss": 1.2318, "step": 5650 }, { "epoch": 1.41, "learning_rate": 1.2698882004398083e-05, "loss": 1.2214, "step": 5655 }, { "epoch": 1.42, "learning_rate": 1.268486017875681e-05, "loss": 1.2273, "step": 5660 }, { "epoch": 1.42, "learning_rate": 1.2670832661979528e-05, "loss": 1.2117, "step": 5665 }, { "epoch": 1.42, "learning_rate": 1.2656799483800558e-05, "loss": 1.2456, "step": 5670 }, { "epoch": 1.42, "learning_rate": 1.2642760673966234e-05, "loss": 1.2046, "step": 5675 }, { "epoch": 1.42, "learning_rate": 1.2628716262234813e-05, "loss": 1.2422, "step": 5680 }, { "epoch": 1.42, "learning_rate": 1.2614666278376433e-05, "loss": 1.2634, "step": 5685 }, { "epoch": 1.42, "learning_rate": 1.2600610752173046e-05, "loss": 1.2307, "step": 5690 }, { "epoch": 1.42, "learning_rate": 1.2586549713418348e-05, "loss": 1.2411, "step": 5695 }, { "epoch": 1.43, "learning_rate": 1.2572483191917719e-05, "loss": 1.1869, "step": 5700 }, { "epoch": 1.43, "learning_rate": 1.2558411217488167e-05, "loss": 1.2391, "step": 5705 }, { "epoch": 1.43, "learning_rate": 1.2544333819958252e-05, "loss": 1.2358, "step": 5710 }, { "epoch": 1.43, "learning_rate": 1.2530251029168032e-05, "loss": 1.2369, "step": 5715 }, { "epoch": 1.43, "learning_rate": 1.2516162874969002e-05, "loss": 1.2369, "step": 5720 }, { "epoch": 1.43, "learning_rate": 1.2502069387224012e-05, "loss": 1.2182, "step": 5725 }, { "epoch": 1.43, "learning_rate": 1.2487970595807237e-05, "loss": 1.2308, "step": 5730 }, { "epoch": 1.43, "learning_rate": 1.2473866530604077e-05, "loss": 1.1934, "step": 5735 }, { "epoch": 1.44, "learning_rate": 1.2459757221511125e-05, "loss": 1.2198, "step": 5740 }, { "epoch": 1.44, "learning_rate": 1.2445642698436075e-05, "loss": 1.2143, "step": 5745 }, { "epoch": 1.44, "learning_rate": 1.2431522991297683e-05, "loss": 1.2195, "step": 5750 }, { "epoch": 1.44, "learning_rate": 1.2417398130025694e-05, "loss": 1.2306, "step": 5755 }, { "epoch": 1.44, "learning_rate": 1.2403268144560773e-05, "loss": 1.1986, "step": 5760 }, { "epoch": 1.44, "learning_rate": 1.2389133064854455e-05, "loss": 1.1946, "step": 5765 }, { "epoch": 1.44, "learning_rate": 1.2374992920869062e-05, "loss": 1.1941, "step": 5770 }, { "epoch": 1.44, "learning_rate": 1.2360847742577658e-05, "loss": 1.2039, "step": 5775 }, { "epoch": 1.45, "learning_rate": 1.2346697559963974e-05, "loss": 1.2154, "step": 5780 }, { "epoch": 1.45, "learning_rate": 1.2332542403022363e-05, "loss": 1.2106, "step": 5785 }, { "epoch": 1.45, "learning_rate": 1.23183823017577e-05, "loss": 1.2241, "step": 5790 }, { "epoch": 1.45, "learning_rate": 1.2304217286185351e-05, "loss": 1.1919, "step": 5795 }, { "epoch": 1.45, "learning_rate": 1.2290047386331108e-05, "loss": 1.2541, "step": 5800 }, { "epoch": 1.45, "learning_rate": 1.2275872632231098e-05, "loss": 1.2289, "step": 5805 }, { "epoch": 1.45, "learning_rate": 1.2261693053931757e-05, "loss": 1.2423, "step": 5810 }, { "epoch": 1.45, "learning_rate": 1.2247508681489732e-05, "loss": 1.2335, "step": 5815 }, { "epoch": 1.46, "learning_rate": 1.2233319544971836e-05, "loss": 1.2344, "step": 5820 }, { "epoch": 1.46, "learning_rate": 1.2219125674454986e-05, "loss": 1.239, "step": 5825 }, { "epoch": 1.46, "learning_rate": 1.2204927100026125e-05, "loss": 1.1935, "step": 5830 }, { "epoch": 1.46, "learning_rate": 1.2190723851782176e-05, "loss": 1.213, "step": 5835 }, { "epoch": 1.46, "learning_rate": 1.217651595982996e-05, "loss": 1.211, "step": 5840 }, { "epoch": 1.46, "learning_rate": 1.216230345428615e-05, "loss": 1.2376, "step": 5845 }, { "epoch": 1.46, "learning_rate": 1.2148086365277197e-05, "loss": 1.2199, "step": 5850 }, { "epoch": 1.46, "learning_rate": 1.2133864722939257e-05, "loss": 1.2444, "step": 5855 }, { "epoch": 1.47, "learning_rate": 1.2119638557418155e-05, "loss": 1.2311, "step": 5860 }, { "epoch": 1.47, "learning_rate": 1.2105407898869283e-05, "loss": 1.2136, "step": 5865 }, { "epoch": 1.47, "learning_rate": 1.2091172777457583e-05, "loss": 1.2379, "step": 5870 }, { "epoch": 1.47, "learning_rate": 1.2076933223357433e-05, "loss": 1.2155, "step": 5875 }, { "epoch": 1.47, "learning_rate": 1.2062689266752622e-05, "loss": 1.2554, "step": 5880 }, { "epoch": 1.47, "learning_rate": 1.2048440937836264e-05, "loss": 1.239, "step": 5885 }, { "epoch": 1.47, "learning_rate": 1.2034188266810736e-05, "loss": 1.1865, "step": 5890 }, { "epoch": 1.47, "learning_rate": 1.2019931283887635e-05, "loss": 1.2066, "step": 5895 }, { "epoch": 1.48, "learning_rate": 1.200567001928769e-05, "loss": 1.208, "step": 5900 }, { "epoch": 1.48, "learning_rate": 1.19914045032407e-05, "loss": 1.2554, "step": 5905 }, { "epoch": 1.48, "learning_rate": 1.1977134765985481e-05, "loss": 1.2192, "step": 5910 }, { "epoch": 1.48, "learning_rate": 1.1962860837769794e-05, "loss": 1.2169, "step": 5915 }, { "epoch": 1.48, "learning_rate": 1.194858274885029e-05, "loss": 1.2111, "step": 5920 }, { "epoch": 1.48, "learning_rate": 1.193430052949244e-05, "loss": 1.2258, "step": 5925 }, { "epoch": 1.48, "learning_rate": 1.1920014209970456e-05, "loss": 1.1916, "step": 5930 }, { "epoch": 1.48, "learning_rate": 1.1905723820567257e-05, "loss": 1.1985, "step": 5935 }, { "epoch": 1.49, "learning_rate": 1.189142939157438e-05, "loss": 1.1977, "step": 5940 }, { "epoch": 1.49, "learning_rate": 1.1877130953291932e-05, "loss": 1.2283, "step": 5945 }, { "epoch": 1.49, "learning_rate": 1.1862828536028511e-05, "loss": 1.227, "step": 5950 }, { "epoch": 1.49, "learning_rate": 1.1848522170101155e-05, "loss": 1.2106, "step": 5955 }, { "epoch": 1.49, "learning_rate": 1.1834211885835267e-05, "loss": 1.2327, "step": 5960 }, { "epoch": 1.49, "learning_rate": 1.1819897713564562e-05, "loss": 1.2006, "step": 5965 }, { "epoch": 1.49, "learning_rate": 1.180557968363099e-05, "loss": 1.2416, "step": 5970 }, { "epoch": 1.49, "learning_rate": 1.1791257826384682e-05, "loss": 1.2201, "step": 5975 }, { "epoch": 1.5, "learning_rate": 1.1776932172183882e-05, "loss": 1.2061, "step": 5980 }, { "epoch": 1.5, "learning_rate": 1.1762602751394878e-05, "loss": 1.1958, "step": 5985 }, { "epoch": 1.5, "learning_rate": 1.174826959439195e-05, "loss": 1.2265, "step": 5990 }, { "epoch": 1.5, "learning_rate": 1.1733932731557292e-05, "loss": 1.2274, "step": 5995 }, { "epoch": 1.5, "learning_rate": 1.171959219328095e-05, "loss": 1.219, "step": 6000 }, { "epoch": 1.5, "learning_rate": 1.1705248009960773e-05, "loss": 1.2315, "step": 6005 }, { "epoch": 1.5, "learning_rate": 1.1690900212002323e-05, "loss": 1.2397, "step": 6010 }, { "epoch": 1.5, "learning_rate": 1.1676548829818831e-05, "loss": 1.2238, "step": 6015 }, { "epoch": 1.51, "learning_rate": 1.1662193893831129e-05, "loss": 1.2065, "step": 6020 }, { "epoch": 1.51, "learning_rate": 1.1647835434467569e-05, "loss": 1.2349, "step": 6025 }, { "epoch": 1.51, "learning_rate": 1.1633473482163982e-05, "loss": 1.216, "step": 6030 }, { "epoch": 1.51, "learning_rate": 1.1619108067363608e-05, "loss": 1.1983, "step": 6035 }, { "epoch": 1.51, "learning_rate": 1.1604739220517012e-05, "loss": 1.2072, "step": 6040 }, { "epoch": 1.51, "learning_rate": 1.1590366972082046e-05, "loss": 1.2475, "step": 6045 }, { "epoch": 1.51, "learning_rate": 1.1575991352523765e-05, "loss": 1.1955, "step": 6050 }, { "epoch": 1.51, "learning_rate": 1.1561612392314374e-05, "loss": 1.2333, "step": 6055 }, { "epoch": 1.52, "learning_rate": 1.1547230121933153e-05, "loss": 1.1844, "step": 6060 }, { "epoch": 1.52, "learning_rate": 1.1532844571866415e-05, "loss": 1.218, "step": 6065 }, { "epoch": 1.52, "learning_rate": 1.1518455772607401e-05, "loss": 1.2199, "step": 6070 }, { "epoch": 1.52, "learning_rate": 1.150406375465626e-05, "loss": 1.2084, "step": 6075 }, { "epoch": 1.52, "learning_rate": 1.1489668548519955e-05, "loss": 1.222, "step": 6080 }, { "epoch": 1.52, "learning_rate": 1.14752701847122e-05, "loss": 1.2178, "step": 6085 }, { "epoch": 1.52, "learning_rate": 1.146086869375342e-05, "loss": 1.2633, "step": 6090 }, { "epoch": 1.52, "learning_rate": 1.1446464106170651e-05, "loss": 1.2645, "step": 6095 }, { "epoch": 1.53, "learning_rate": 1.1432056452497504e-05, "loss": 1.2213, "step": 6100 }, { "epoch": 1.53, "learning_rate": 1.1417645763274088e-05, "loss": 1.1927, "step": 6105 }, { "epoch": 1.53, "learning_rate": 1.140323206904694e-05, "loss": 1.2114, "step": 6110 }, { "epoch": 1.53, "learning_rate": 1.1388815400368972e-05, "loss": 1.2664, "step": 6115 }, { "epoch": 1.53, "learning_rate": 1.1374395787799402e-05, "loss": 1.229, "step": 6120 }, { "epoch": 1.53, "learning_rate": 1.1359973261903682e-05, "loss": 1.2111, "step": 6125 }, { "epoch": 1.53, "learning_rate": 1.1345547853253449e-05, "loss": 1.2466, "step": 6130 }, { "epoch": 1.53, "learning_rate": 1.1331119592426445e-05, "loss": 1.271, "step": 6135 }, { "epoch": 1.54, "learning_rate": 1.1316688510006452e-05, "loss": 1.1852, "step": 6140 }, { "epoch": 1.54, "learning_rate": 1.1302254636583245e-05, "loss": 1.1882, "step": 6145 }, { "epoch": 1.54, "learning_rate": 1.1287818002752508e-05, "loss": 1.2324, "step": 6150 }, { "epoch": 1.54, "learning_rate": 1.1273378639115777e-05, "loss": 1.1959, "step": 6155 }, { "epoch": 1.54, "learning_rate": 1.1258936576280375e-05, "loss": 1.2014, "step": 6160 }, { "epoch": 1.54, "learning_rate": 1.1244491844859346e-05, "loss": 1.2045, "step": 6165 }, { "epoch": 1.54, "learning_rate": 1.1230044475471389e-05, "loss": 1.1678, "step": 6170 }, { "epoch": 1.55, "learning_rate": 1.12155944987408e-05, "loss": 1.1895, "step": 6175 }, { "epoch": 1.55, "learning_rate": 1.1201141945297398e-05, "loss": 1.1773, "step": 6180 }, { "epoch": 1.55, "learning_rate": 1.1186686845776466e-05, "loss": 1.2291, "step": 6185 }, { "epoch": 1.55, "learning_rate": 1.117222923081868e-05, "loss": 1.2193, "step": 6190 }, { "epoch": 1.55, "learning_rate": 1.1157769131070046e-05, "loss": 1.2039, "step": 6195 }, { "epoch": 1.55, "learning_rate": 1.1143306577181849e-05, "loss": 1.2415, "step": 6200 }, { "epoch": 1.55, "learning_rate": 1.1128841599810567e-05, "loss": 1.2295, "step": 6205 }, { "epoch": 1.55, "learning_rate": 1.1114374229617817e-05, "loss": 1.188, "step": 6210 }, { "epoch": 1.56, "learning_rate": 1.1099904497270285e-05, "loss": 1.2046, "step": 6215 }, { "epoch": 1.56, "learning_rate": 1.1085432433439666e-05, "loss": 1.1906, "step": 6220 }, { "epoch": 1.56, "learning_rate": 1.10709580688026e-05, "loss": 1.2161, "step": 6225 }, { "epoch": 1.56, "learning_rate": 1.1056481434040605e-05, "loss": 1.2378, "step": 6230 }, { "epoch": 1.56, "learning_rate": 1.1042002559840003e-05, "loss": 1.2359, "step": 6235 }, { "epoch": 1.56, "learning_rate": 1.1027521476891872e-05, "loss": 1.2179, "step": 6240 }, { "epoch": 1.56, "learning_rate": 1.1013038215891964e-05, "loss": 1.2546, "step": 6245 }, { "epoch": 1.56, "learning_rate": 1.0998552807540657e-05, "loss": 1.2371, "step": 6250 }, { "epoch": 1.57, "learning_rate": 1.0984065282542869e-05, "loss": 1.2453, "step": 6255 }, { "epoch": 1.57, "learning_rate": 1.0969575671608015e-05, "loss": 1.1817, "step": 6260 }, { "epoch": 1.57, "learning_rate": 1.0955084005449928e-05, "loss": 1.1847, "step": 6265 }, { "epoch": 1.57, "learning_rate": 1.0940590314786797e-05, "loss": 1.213, "step": 6270 }, { "epoch": 1.57, "learning_rate": 1.09260946303411e-05, "loss": 1.2452, "step": 6275 }, { "epoch": 1.57, "learning_rate": 1.0911596982839547e-05, "loss": 1.236, "step": 6280 }, { "epoch": 1.57, "learning_rate": 1.0897097403013006e-05, "loss": 1.2235, "step": 6285 }, { "epoch": 1.57, "learning_rate": 1.0882595921596439e-05, "loss": 1.1953, "step": 6290 }, { "epoch": 1.58, "learning_rate": 1.086809256932884e-05, "loss": 1.2134, "step": 6295 }, { "epoch": 1.58, "learning_rate": 1.0853587376953173e-05, "loss": 1.2195, "step": 6300 }, { "epoch": 1.58, "learning_rate": 1.08390803752163e-05, "loss": 1.2299, "step": 6305 }, { "epoch": 1.58, "learning_rate": 1.0824571594868912e-05, "loss": 1.2616, "step": 6310 }, { "epoch": 1.58, "learning_rate": 1.0810061066665476e-05, "loss": 1.2405, "step": 6315 }, { "epoch": 1.58, "learning_rate": 1.0795548821364168e-05, "loss": 1.2252, "step": 6320 }, { "epoch": 1.58, "learning_rate": 1.0781034889726796e-05, "loss": 1.2212, "step": 6325 }, { "epoch": 1.58, "learning_rate": 1.0766519302518747e-05, "loss": 1.2065, "step": 6330 }, { "epoch": 1.59, "learning_rate": 1.0752002090508911e-05, "loss": 1.2464, "step": 6335 }, { "epoch": 1.59, "learning_rate": 1.0737483284469634e-05, "loss": 1.228, "step": 6340 }, { "epoch": 1.59, "learning_rate": 1.0722962915176634e-05, "loss": 1.2133, "step": 6345 }, { "epoch": 1.59, "learning_rate": 1.070844101340894e-05, "loss": 1.2146, "step": 6350 }, { "epoch": 1.59, "learning_rate": 1.069391760994883e-05, "loss": 1.1744, "step": 6355 }, { "epoch": 1.59, "learning_rate": 1.0679392735581771e-05, "loss": 1.2225, "step": 6360 }, { "epoch": 1.59, "learning_rate": 1.0664866421096338e-05, "loss": 1.199, "step": 6365 }, { "epoch": 1.59, "learning_rate": 1.0650338697284177e-05, "loss": 1.1755, "step": 6370 }, { "epoch": 1.6, "learning_rate": 1.0635809594939898e-05, "loss": 1.269, "step": 6375 }, { "epoch": 1.6, "learning_rate": 1.0621279144861047e-05, "loss": 1.1627, "step": 6380 }, { "epoch": 1.6, "learning_rate": 1.0606747377848028e-05, "loss": 1.2012, "step": 6385 }, { "epoch": 1.6, "learning_rate": 1.0592214324704027e-05, "loss": 1.2128, "step": 6390 }, { "epoch": 1.6, "learning_rate": 1.0577680016234966e-05, "loss": 1.2012, "step": 6395 }, { "epoch": 1.6, "learning_rate": 1.0563144483249421e-05, "loss": 1.1896, "step": 6400 }, { "epoch": 1.6, "learning_rate": 1.0548607756558572e-05, "loss": 1.1946, "step": 6405 }, { "epoch": 1.6, "learning_rate": 1.0534069866976113e-05, "loss": 1.1808, "step": 6410 }, { "epoch": 1.61, "learning_rate": 1.0519530845318224e-05, "loss": 1.2583, "step": 6415 }, { "epoch": 1.61, "learning_rate": 1.050499072240347e-05, "loss": 1.237, "step": 6420 }, { "epoch": 1.61, "learning_rate": 1.0490449529052755e-05, "loss": 1.1941, "step": 6425 }, { "epoch": 1.61, "learning_rate": 1.0475907296089252e-05, "loss": 1.2222, "step": 6430 }, { "epoch": 1.61, "learning_rate": 1.0461364054338339e-05, "loss": 1.2064, "step": 6435 }, { "epoch": 1.61, "learning_rate": 1.0446819834627526e-05, "loss": 1.2029, "step": 6440 }, { "epoch": 1.61, "learning_rate": 1.0432274667786409e-05, "loss": 1.2509, "step": 6445 }, { "epoch": 1.61, "learning_rate": 1.0417728584646574e-05, "loss": 1.2021, "step": 6450 }, { "epoch": 1.62, "learning_rate": 1.0403181616041564e-05, "loss": 1.2267, "step": 6455 }, { "epoch": 1.62, "learning_rate": 1.0388633792806792e-05, "loss": 1.197, "step": 6460 }, { "epoch": 1.62, "learning_rate": 1.0374085145779486e-05, "loss": 1.2131, "step": 6465 }, { "epoch": 1.62, "learning_rate": 1.035953570579862e-05, "loss": 1.2406, "step": 6470 }, { "epoch": 1.62, "learning_rate": 1.0344985503704841e-05, "loss": 1.2095, "step": 6475 }, { "epoch": 1.62, "learning_rate": 1.0330434570340423e-05, "loss": 1.2096, "step": 6480 }, { "epoch": 1.62, "learning_rate": 1.0315882936549181e-05, "loss": 1.2179, "step": 6485 }, { "epoch": 1.62, "learning_rate": 1.0301330633176425e-05, "loss": 1.2373, "step": 6490 }, { "epoch": 1.63, "learning_rate": 1.028677769106887e-05, "loss": 1.2, "step": 6495 }, { "epoch": 1.63, "learning_rate": 1.0272224141074596e-05, "loss": 1.2208, "step": 6500 }, { "epoch": 1.63, "learning_rate": 1.0257670014042969e-05, "loss": 1.2557, "step": 6505 }, { "epoch": 1.63, "learning_rate": 1.0243115340824577e-05, "loss": 1.2055, "step": 6510 }, { "epoch": 1.63, "learning_rate": 1.0228560152271167e-05, "loss": 1.2549, "step": 6515 }, { "epoch": 1.63, "learning_rate": 1.0214004479235578e-05, "loss": 1.2094, "step": 6520 }, { "epoch": 1.63, "learning_rate": 1.0199448352571673e-05, "loss": 1.2063, "step": 6525 }, { "epoch": 1.63, "learning_rate": 1.0184891803134277e-05, "loss": 1.2328, "step": 6530 }, { "epoch": 1.64, "learning_rate": 1.0170334861779123e-05, "loss": 1.2041, "step": 6535 }, { "epoch": 1.64, "learning_rate": 1.0155777559362754e-05, "loss": 1.1998, "step": 6540 }, { "epoch": 1.64, "learning_rate": 1.0141219926742496e-05, "loss": 1.2356, "step": 6545 }, { "epoch": 1.64, "learning_rate": 1.0126661994776365e-05, "loss": 1.2147, "step": 6550 }, { "epoch": 1.64, "learning_rate": 1.0112103794323018e-05, "loss": 1.2511, "step": 6555 }, { "epoch": 1.64, "learning_rate": 1.0097545356241676e-05, "loss": 1.1976, "step": 6560 }, { "epoch": 1.64, "learning_rate": 1.0082986711392064e-05, "loss": 1.232, "step": 6565 }, { "epoch": 1.64, "learning_rate": 1.0068427890634352e-05, "loss": 1.2417, "step": 6570 }, { "epoch": 1.65, "learning_rate": 1.005386892482907e-05, "loss": 1.1936, "step": 6575 }, { "epoch": 1.65, "learning_rate": 1.003930984483707e-05, "loss": 1.1897, "step": 6580 }, { "epoch": 1.65, "learning_rate": 1.0024750681519442e-05, "loss": 1.2021, "step": 6585 }, { "epoch": 1.65, "learning_rate": 1.0010191465737437e-05, "loss": 1.2204, "step": 6590 }, { "epoch": 1.65, "learning_rate": 9.995632228352445e-06, "loss": 1.2018, "step": 6595 }, { "epoch": 1.65, "learning_rate": 9.981073000225873e-06, "loss": 1.2192, "step": 6600 }, { "epoch": 1.65, "learning_rate": 9.966513812219135e-06, "loss": 1.2067, "step": 6605 }, { "epoch": 1.65, "learning_rate": 9.95195469519354e-06, "loss": 1.2274, "step": 6610 }, { "epoch": 1.66, "learning_rate": 9.937395680010256e-06, "loss": 1.209, "step": 6615 }, { "epoch": 1.66, "learning_rate": 9.922836797530234e-06, "loss": 1.2376, "step": 6620 }, { "epoch": 1.66, "learning_rate": 9.908278078614139e-06, "loss": 1.2361, "step": 6625 }, { "epoch": 1.66, "learning_rate": 9.893719554122298e-06, "loss": 1.2174, "step": 6630 }, { "epoch": 1.66, "learning_rate": 9.879161254914615e-06, "loss": 1.2324, "step": 6635 }, { "epoch": 1.66, "learning_rate": 9.864603211850526e-06, "loss": 1.2119, "step": 6640 }, { "epoch": 1.66, "learning_rate": 9.85004545578892e-06, "loss": 1.215, "step": 6645 }, { "epoch": 1.66, "learning_rate": 9.835488017588078e-06, "loss": 1.215, "step": 6650 }, { "epoch": 1.67, "learning_rate": 9.820930928105603e-06, "loss": 1.2227, "step": 6655 }, { "epoch": 1.67, "learning_rate": 9.80637421819837e-06, "loss": 1.2197, "step": 6660 }, { "epoch": 1.67, "learning_rate": 9.791817918722438e-06, "loss": 1.2096, "step": 6665 }, { "epoch": 1.67, "learning_rate": 9.777262060533003e-06, "loss": 1.1778, "step": 6670 }, { "epoch": 1.67, "learning_rate": 9.762706674484322e-06, "loss": 1.2402, "step": 6675 }, { "epoch": 1.67, "learning_rate": 9.748151791429651e-06, "loss": 1.2206, "step": 6680 }, { "epoch": 1.67, "learning_rate": 9.733597442221182e-06, "loss": 1.2189, "step": 6685 }, { "epoch": 1.67, "learning_rate": 9.719043657709973e-06, "loss": 1.2213, "step": 6690 }, { "epoch": 1.68, "learning_rate": 9.70449046874589e-06, "loss": 1.2494, "step": 6695 }, { "epoch": 1.68, "learning_rate": 9.689937906177527e-06, "loss": 1.2475, "step": 6700 }, { "epoch": 1.68, "learning_rate": 9.675386000852165e-06, "loss": 1.2393, "step": 6705 }, { "epoch": 1.68, "learning_rate": 9.660834783615674e-06, "loss": 1.2185, "step": 6710 }, { "epoch": 1.68, "learning_rate": 9.646284285312475e-06, "loss": 1.2218, "step": 6715 }, { "epoch": 1.68, "learning_rate": 9.631734536785476e-06, "loss": 1.2027, "step": 6720 }, { "epoch": 1.68, "learning_rate": 9.617185568875971e-06, "loss": 1.2105, "step": 6725 }, { "epoch": 1.68, "learning_rate": 9.60263741242362e-06, "loss": 1.207, "step": 6730 }, { "epoch": 1.69, "learning_rate": 9.588090098266354e-06, "loss": 1.2266, "step": 6735 }, { "epoch": 1.69, "learning_rate": 9.573543657240318e-06, "loss": 1.2331, "step": 6740 }, { "epoch": 1.69, "learning_rate": 9.558998120179812e-06, "loss": 1.2476, "step": 6745 }, { "epoch": 1.69, "learning_rate": 9.544453517917214e-06, "loss": 1.2013, "step": 6750 }, { "epoch": 1.69, "learning_rate": 9.529909881282922e-06, "loss": 1.1971, "step": 6755 }, { "epoch": 1.69, "learning_rate": 9.515367241105288e-06, "loss": 1.2325, "step": 6760 }, { "epoch": 1.69, "learning_rate": 9.500825628210551e-06, "loss": 1.2354, "step": 6765 }, { "epoch": 1.69, "learning_rate": 9.486285073422774e-06, "loss": 1.2108, "step": 6770 }, { "epoch": 1.7, "learning_rate": 9.471745607563778e-06, "loss": 1.2133, "step": 6775 }, { "epoch": 1.7, "learning_rate": 9.457207261453073e-06, "loss": 1.175, "step": 6780 }, { "epoch": 1.7, "learning_rate": 9.442670065907794e-06, "loss": 1.2023, "step": 6785 }, { "epoch": 1.7, "learning_rate": 9.428134051742644e-06, "loss": 1.1766, "step": 6790 }, { "epoch": 1.7, "learning_rate": 9.413599249769814e-06, "loss": 1.236, "step": 6795 }, { "epoch": 1.7, "learning_rate": 9.39906569079893e-06, "loss": 1.1866, "step": 6800 }, { "epoch": 1.7, "learning_rate": 9.38453340563698e-06, "loss": 1.2113, "step": 6805 }, { "epoch": 1.7, "learning_rate": 9.370002425088257e-06, "loss": 1.2214, "step": 6810 }, { "epoch": 1.71, "learning_rate": 9.355472779954283e-06, "loss": 1.193, "step": 6815 }, { "epoch": 1.71, "learning_rate": 9.340944501033754e-06, "loss": 1.191, "step": 6820 }, { "epoch": 1.71, "learning_rate": 9.326417619122464e-06, "loss": 1.2215, "step": 6825 }, { "epoch": 1.71, "learning_rate": 9.311892165013253e-06, "loss": 1.2137, "step": 6830 }, { "epoch": 1.71, "learning_rate": 9.297368169495932e-06, "loss": 1.2625, "step": 6835 }, { "epoch": 1.71, "learning_rate": 9.282845663357219e-06, "loss": 1.2161, "step": 6840 }, { "epoch": 1.71, "learning_rate": 9.268324677380674e-06, "loss": 1.2401, "step": 6845 }, { "epoch": 1.71, "learning_rate": 9.253805242346633e-06, "loss": 1.2762, "step": 6850 }, { "epoch": 1.72, "learning_rate": 9.23928738903215e-06, "loss": 1.1943, "step": 6855 }, { "epoch": 1.72, "learning_rate": 9.224771148210927e-06, "loss": 1.2189, "step": 6860 }, { "epoch": 1.72, "learning_rate": 9.210256550653238e-06, "loss": 1.2134, "step": 6865 }, { "epoch": 1.72, "learning_rate": 9.195743627125888e-06, "loss": 1.2158, "step": 6870 }, { "epoch": 1.72, "learning_rate": 9.181232408392118e-06, "loss": 1.2073, "step": 6875 }, { "epoch": 1.72, "learning_rate": 9.166722925211562e-06, "loss": 1.2205, "step": 6880 }, { "epoch": 1.72, "learning_rate": 9.152215208340187e-06, "loss": 1.2409, "step": 6885 }, { "epoch": 1.72, "learning_rate": 9.137709288530196e-06, "loss": 1.2135, "step": 6890 }, { "epoch": 1.73, "learning_rate": 9.123205196529997e-06, "loss": 1.2516, "step": 6895 }, { "epoch": 1.73, "learning_rate": 9.108702963084113e-06, "loss": 1.2057, "step": 6900 }, { "epoch": 1.73, "learning_rate": 9.094202618933138e-06, "loss": 1.2367, "step": 6905 }, { "epoch": 1.73, "learning_rate": 9.079704194813656e-06, "loss": 1.2335, "step": 6910 }, { "epoch": 1.73, "learning_rate": 9.06520772145818e-06, "loss": 1.2093, "step": 6915 }, { "epoch": 1.73, "learning_rate": 9.050713229595087e-06, "loss": 1.2066, "step": 6920 }, { "epoch": 1.73, "learning_rate": 9.036220749948558e-06, "loss": 1.1984, "step": 6925 }, { "epoch": 1.73, "learning_rate": 9.021730313238506e-06, "loss": 1.2476, "step": 6930 }, { "epoch": 1.74, "learning_rate": 9.007241950180511e-06, "loss": 1.2264, "step": 6935 }, { "epoch": 1.74, "learning_rate": 8.992755691485767e-06, "loss": 1.2225, "step": 6940 }, { "epoch": 1.74, "learning_rate": 8.978271567860997e-06, "loss": 1.2026, "step": 6945 }, { "epoch": 1.74, "learning_rate": 8.963789610008406e-06, "loss": 1.2227, "step": 6950 }, { "epoch": 1.74, "learning_rate": 8.949309848625598e-06, "loss": 1.2298, "step": 6955 }, { "epoch": 1.74, "learning_rate": 8.934832314405537e-06, "loss": 1.2141, "step": 6960 }, { "epoch": 1.74, "learning_rate": 8.920357038036447e-06, "loss": 1.2548, "step": 6965 }, { "epoch": 1.74, "learning_rate": 8.90588405020178e-06, "loss": 1.1976, "step": 6970 }, { "epoch": 1.75, "learning_rate": 8.891413381580131e-06, "loss": 1.2296, "step": 6975 }, { "epoch": 1.75, "learning_rate": 8.876945062845182e-06, "loss": 1.1986, "step": 6980 }, { "epoch": 1.75, "learning_rate": 8.862479124665634e-06, "loss": 1.2166, "step": 6985 }, { "epoch": 1.75, "learning_rate": 8.848015597705133e-06, "loss": 1.2079, "step": 6990 }, { "epoch": 1.75, "learning_rate": 8.83355451262223e-06, "loss": 1.2095, "step": 6995 }, { "epoch": 1.75, "learning_rate": 8.819095900070286e-06, "loss": 1.1953, "step": 7000 }, { "epoch": 1.75, "learning_rate": 8.804639790697432e-06, "loss": 1.1983, "step": 7005 }, { "epoch": 1.75, "learning_rate": 8.79018621514648e-06, "loss": 1.2014, "step": 7010 }, { "epoch": 1.76, "learning_rate": 8.77573520405488e-06, "loss": 1.2335, "step": 7015 }, { "epoch": 1.76, "learning_rate": 8.761286788054643e-06, "loss": 1.2347, "step": 7020 }, { "epoch": 1.76, "learning_rate": 8.746840997772286e-06, "loss": 1.1809, "step": 7025 }, { "epoch": 1.76, "learning_rate": 8.73239786382875e-06, "loss": 1.2574, "step": 7030 }, { "epoch": 1.76, "learning_rate": 8.71795741683935e-06, "loss": 1.2364, "step": 7035 }, { "epoch": 1.76, "learning_rate": 8.703519687413704e-06, "loss": 1.2484, "step": 7040 }, { "epoch": 1.76, "learning_rate": 8.68908470615567e-06, "loss": 1.2306, "step": 7045 }, { "epoch": 1.76, "learning_rate": 8.67465250366329e-06, "loss": 1.2037, "step": 7050 }, { "epoch": 1.77, "learning_rate": 8.6602231105287e-06, "loss": 1.239, "step": 7055 }, { "epoch": 1.77, "learning_rate": 8.645796557338088e-06, "loss": 1.2142, "step": 7060 }, { "epoch": 1.77, "learning_rate": 8.631372874671624e-06, "loss": 1.2164, "step": 7065 }, { "epoch": 1.77, "learning_rate": 8.616952093103393e-06, "loss": 1.214, "step": 7070 }, { "epoch": 1.77, "learning_rate": 8.60253424320133e-06, "loss": 1.2376, "step": 7075 }, { "epoch": 1.77, "learning_rate": 8.588119355527148e-06, "loss": 1.1896, "step": 7080 }, { "epoch": 1.77, "learning_rate": 8.573707460636296e-06, "loss": 1.2244, "step": 7085 }, { "epoch": 1.77, "learning_rate": 8.559298589077866e-06, "loss": 1.2009, "step": 7090 }, { "epoch": 1.78, "learning_rate": 8.54489277139455e-06, "loss": 1.2108, "step": 7095 }, { "epoch": 1.78, "learning_rate": 8.53049003812256e-06, "loss": 1.2592, "step": 7100 }, { "epoch": 1.78, "learning_rate": 8.516090419791569e-06, "loss": 1.2217, "step": 7105 }, { "epoch": 1.78, "learning_rate": 8.501693946924662e-06, "loss": 1.2478, "step": 7110 }, { "epoch": 1.78, "learning_rate": 8.487300650038238e-06, "loss": 1.2053, "step": 7115 }, { "epoch": 1.78, "learning_rate": 8.472910559641975e-06, "loss": 1.1916, "step": 7120 }, { "epoch": 1.78, "learning_rate": 8.45852370623875e-06, "loss": 1.2131, "step": 7125 }, { "epoch": 1.78, "learning_rate": 8.444140120324575e-06, "loss": 1.2377, "step": 7130 }, { "epoch": 1.79, "learning_rate": 8.429759832388545e-06, "loss": 1.2079, "step": 7135 }, { "epoch": 1.79, "learning_rate": 8.415382872912758e-06, "loss": 1.2054, "step": 7140 }, { "epoch": 1.79, "learning_rate": 8.401009272372256e-06, "loss": 1.2156, "step": 7145 }, { "epoch": 1.79, "learning_rate": 8.386639061234967e-06, "loss": 1.23, "step": 7150 }, { "epoch": 1.79, "learning_rate": 8.372272269961626e-06, "loss": 1.1898, "step": 7155 }, { "epoch": 1.79, "learning_rate": 8.35790892900572e-06, "loss": 1.2433, "step": 7160 }, { "epoch": 1.79, "learning_rate": 8.343549068813437e-06, "loss": 1.2312, "step": 7165 }, { "epoch": 1.79, "learning_rate": 8.329192719823569e-06, "loss": 1.2501, "step": 7170 }, { "epoch": 1.8, "learning_rate": 8.314839912467468e-06, "loss": 1.1845, "step": 7175 }, { "epoch": 1.8, "learning_rate": 8.300490677168986e-06, "loss": 1.246, "step": 7180 }, { "epoch": 1.8, "learning_rate": 8.2861450443444e-06, "loss": 1.1947, "step": 7185 }, { "epoch": 1.8, "learning_rate": 8.27180304440235e-06, "loss": 1.1964, "step": 7190 }, { "epoch": 1.8, "learning_rate": 8.257464707743778e-06, "loss": 1.1999, "step": 7195 }, { "epoch": 1.8, "learning_rate": 8.243130064761852e-06, "loss": 1.2569, "step": 7200 }, { "epoch": 1.8, "learning_rate": 8.228799145841922e-06, "loss": 1.1947, "step": 7205 }, { "epoch": 1.8, "learning_rate": 8.214471981361436e-06, "loss": 1.2225, "step": 7210 }, { "epoch": 1.81, "learning_rate": 8.200148601689887e-06, "loss": 1.1941, "step": 7215 }, { "epoch": 1.81, "learning_rate": 8.185829037188751e-06, "loss": 1.2217, "step": 7220 }, { "epoch": 1.81, "learning_rate": 8.171513318211403e-06, "loss": 1.244, "step": 7225 }, { "epoch": 1.81, "learning_rate": 8.15720147510308e-06, "loss": 1.2224, "step": 7230 }, { "epoch": 1.81, "learning_rate": 8.142893538200796e-06, "loss": 1.2538, "step": 7235 }, { "epoch": 1.81, "learning_rate": 8.128589537833289e-06, "loss": 1.1802, "step": 7240 }, { "epoch": 1.81, "learning_rate": 8.114289504320946e-06, "loss": 1.1909, "step": 7245 }, { "epoch": 1.81, "learning_rate": 8.099993467975752e-06, "loss": 1.2534, "step": 7250 }, { "epoch": 1.82, "learning_rate": 8.085701459101216e-06, "loss": 1.1968, "step": 7255 }, { "epoch": 1.82, "learning_rate": 8.071413507992312e-06, "loss": 1.2454, "step": 7260 }, { "epoch": 1.82, "learning_rate": 8.057129644935411e-06, "loss": 1.2311, "step": 7265 }, { "epoch": 1.82, "learning_rate": 8.04284990020822e-06, "loss": 1.1969, "step": 7270 }, { "epoch": 1.82, "learning_rate": 8.028574304079716e-06, "loss": 1.2484, "step": 7275 }, { "epoch": 1.82, "learning_rate": 8.014302886810078e-06, "loss": 1.2111, "step": 7280 }, { "epoch": 1.82, "learning_rate": 8.000035678650638e-06, "loss": 1.2063, "step": 7285 }, { "epoch": 1.82, "learning_rate": 7.985772709843789e-06, "loss": 1.2054, "step": 7290 }, { "epoch": 1.83, "learning_rate": 7.971514010622953e-06, "loss": 1.2012, "step": 7295 }, { "epoch": 1.83, "learning_rate": 7.957259611212495e-06, "loss": 1.2119, "step": 7300 }, { "epoch": 1.83, "learning_rate": 7.943009541827667e-06, "loss": 1.2307, "step": 7305 }, { "epoch": 1.83, "learning_rate": 7.928763832674541e-06, "loss": 1.2307, "step": 7310 }, { "epoch": 1.83, "learning_rate": 7.91452251394995e-06, "loss": 1.2409, "step": 7315 }, { "epoch": 1.83, "learning_rate": 7.900285615841415e-06, "loss": 1.1773, "step": 7320 }, { "epoch": 1.83, "learning_rate": 7.886053168527085e-06, "loss": 1.2124, "step": 7325 }, { "epoch": 1.83, "learning_rate": 7.871825202175695e-06, "loss": 1.2255, "step": 7330 }, { "epoch": 1.84, "learning_rate": 7.857601746946455e-06, "loss": 1.218, "step": 7335 }, { "epoch": 1.84, "learning_rate": 7.843382832989023e-06, "loss": 1.2028, "step": 7340 }, { "epoch": 1.84, "learning_rate": 7.829168490443435e-06, "loss": 1.2325, "step": 7345 }, { "epoch": 1.84, "learning_rate": 7.814958749440034e-06, "loss": 1.2723, "step": 7350 }, { "epoch": 1.84, "learning_rate": 7.800753640099408e-06, "loss": 1.1804, "step": 7355 }, { "epoch": 1.84, "learning_rate": 7.786553192532326e-06, "loss": 1.2025, "step": 7360 }, { "epoch": 1.84, "learning_rate": 7.772357436839678e-06, "loss": 1.2345, "step": 7365 }, { "epoch": 1.84, "learning_rate": 7.758166403112409e-06, "loss": 1.1899, "step": 7370 }, { "epoch": 1.85, "learning_rate": 7.743980121431449e-06, "loss": 1.2206, "step": 7375 }, { "epoch": 1.85, "learning_rate": 7.729798621867662e-06, "loss": 1.1957, "step": 7380 }, { "epoch": 1.85, "learning_rate": 7.715621934481776e-06, "loss": 1.2046, "step": 7385 }, { "epoch": 1.85, "learning_rate": 7.701450089324312e-06, "loss": 1.1939, "step": 7390 }, { "epoch": 1.85, "learning_rate": 7.687283116435531e-06, "loss": 1.1851, "step": 7395 }, { "epoch": 1.85, "learning_rate": 7.673121045845367e-06, "loss": 1.2045, "step": 7400 }, { "epoch": 1.85, "learning_rate": 7.65896390757336e-06, "loss": 1.1872, "step": 7405 }, { "epoch": 1.85, "learning_rate": 7.644811731628591e-06, "loss": 1.2437, "step": 7410 }, { "epoch": 1.86, "learning_rate": 7.630664548009634e-06, "loss": 1.2318, "step": 7415 }, { "epoch": 1.86, "learning_rate": 7.616522386704469e-06, "loss": 1.2532, "step": 7420 }, { "epoch": 1.86, "learning_rate": 7.602385277690437e-06, "loss": 1.2191, "step": 7425 }, { "epoch": 1.86, "learning_rate": 7.5882532509341675e-06, "loss": 1.1982, "step": 7430 }, { "epoch": 1.86, "learning_rate": 7.574126336391514e-06, "loss": 1.2181, "step": 7435 }, { "epoch": 1.86, "learning_rate": 7.560004564007502e-06, "loss": 1.1675, "step": 7440 }, { "epoch": 1.86, "learning_rate": 7.545887963716248e-06, "loss": 1.2327, "step": 7445 }, { "epoch": 1.86, "learning_rate": 7.531776565440914e-06, "loss": 1.2258, "step": 7450 }, { "epoch": 1.87, "learning_rate": 7.517670399093622e-06, "loss": 1.2485, "step": 7455 }, { "epoch": 1.87, "learning_rate": 7.503569494575417e-06, "loss": 1.211, "step": 7460 }, { "epoch": 1.87, "learning_rate": 7.489473881776183e-06, "loss": 1.2558, "step": 7465 }, { "epoch": 1.87, "learning_rate": 7.475383590574592e-06, "loss": 1.2082, "step": 7470 }, { "epoch": 1.87, "learning_rate": 7.461298650838029e-06, "loss": 1.2079, "step": 7475 }, { "epoch": 1.87, "learning_rate": 7.4472190924225465e-06, "loss": 1.2095, "step": 7480 }, { "epoch": 1.87, "learning_rate": 7.433144945172777e-06, "loss": 1.2118, "step": 7485 }, { "epoch": 1.87, "learning_rate": 7.41907623892189e-06, "loss": 1.1997, "step": 7490 }, { "epoch": 1.88, "learning_rate": 7.405013003491518e-06, "loss": 1.2502, "step": 7495 }, { "epoch": 1.88, "learning_rate": 7.3909552686917066e-06, "loss": 1.2312, "step": 7500 }, { "epoch": 1.88, "learning_rate": 7.37690306432083e-06, "loss": 1.2444, "step": 7505 }, { "epoch": 1.88, "learning_rate": 7.362856420165548e-06, "loss": 1.2709, "step": 7510 }, { "epoch": 1.88, "learning_rate": 7.348815366000726e-06, "loss": 1.2138, "step": 7515 }, { "epoch": 1.88, "learning_rate": 7.334779931589384e-06, "loss": 1.2227, "step": 7520 }, { "epoch": 1.88, "learning_rate": 7.320750146682638e-06, "loss": 1.2503, "step": 7525 }, { "epoch": 1.88, "learning_rate": 7.306726041019613e-06, "loss": 1.2025, "step": 7530 }, { "epoch": 1.89, "learning_rate": 7.292707644327406e-06, "loss": 1.2066, "step": 7535 }, { "epoch": 1.89, "learning_rate": 7.278694986321011e-06, "loss": 1.2295, "step": 7540 }, { "epoch": 1.89, "learning_rate": 7.264688096703256e-06, "loss": 1.2188, "step": 7545 }, { "epoch": 1.89, "learning_rate": 7.250687005164743e-06, "loss": 1.2088, "step": 7550 }, { "epoch": 1.89, "learning_rate": 7.236691741383783e-06, "loss": 1.2059, "step": 7555 }, { "epoch": 1.89, "learning_rate": 7.222702335026337e-06, "loss": 1.1928, "step": 7560 }, { "epoch": 1.89, "learning_rate": 7.208718815745945e-06, "loss": 1.2098, "step": 7565 }, { "epoch": 1.89, "learning_rate": 7.194741213183672e-06, "loss": 1.1742, "step": 7570 }, { "epoch": 1.9, "learning_rate": 7.1807695569680325e-06, "loss": 1.2633, "step": 7575 }, { "epoch": 1.9, "learning_rate": 7.1668038767149515e-06, "loss": 1.2647, "step": 7580 }, { "epoch": 1.9, "learning_rate": 7.152844202027673e-06, "loss": 1.2423, "step": 7585 }, { "epoch": 1.9, "learning_rate": 7.138890562496721e-06, "loss": 1.2204, "step": 7590 }, { "epoch": 1.9, "learning_rate": 7.12494298769982e-06, "loss": 1.2282, "step": 7595 }, { "epoch": 1.9, "learning_rate": 7.111001507201839e-06, "loss": 1.2067, "step": 7600 }, { "epoch": 1.9, "learning_rate": 7.097066150554729e-06, "loss": 1.1841, "step": 7605 }, { "epoch": 1.9, "learning_rate": 7.083136947297471e-06, "loss": 1.2236, "step": 7610 }, { "epoch": 1.91, "learning_rate": 7.069213926955989e-06, "loss": 1.2448, "step": 7615 }, { "epoch": 1.91, "learning_rate": 7.055297119043105e-06, "loss": 1.258, "step": 7620 }, { "epoch": 1.91, "learning_rate": 7.041386553058474e-06, "loss": 1.1773, "step": 7625 }, { "epoch": 1.91, "learning_rate": 7.027482258488516e-06, "loss": 1.2039, "step": 7630 }, { "epoch": 1.91, "learning_rate": 7.013584264806366e-06, "loss": 1.227, "step": 7635 }, { "epoch": 1.91, "learning_rate": 6.999692601471795e-06, "loss": 1.2584, "step": 7640 }, { "epoch": 1.91, "learning_rate": 6.985807297931155e-06, "loss": 1.1941, "step": 7645 }, { "epoch": 1.91, "learning_rate": 6.971928383617321e-06, "loss": 1.2296, "step": 7650 }, { "epoch": 1.92, "learning_rate": 6.9580558879496265e-06, "loss": 1.1942, "step": 7655 }, { "epoch": 1.92, "learning_rate": 6.944189840333792e-06, "loss": 1.2055, "step": 7660 }, { "epoch": 1.92, "learning_rate": 6.930330270161878e-06, "loss": 1.232, "step": 7665 }, { "epoch": 1.92, "learning_rate": 6.91647720681221e-06, "loss": 1.2081, "step": 7670 }, { "epoch": 1.92, "learning_rate": 6.902630679649322e-06, "loss": 1.1874, "step": 7675 }, { "epoch": 1.92, "learning_rate": 6.888790718023892e-06, "loss": 1.2082, "step": 7680 }, { "epoch": 1.92, "learning_rate": 6.874957351272684e-06, "loss": 1.2164, "step": 7685 }, { "epoch": 1.92, "learning_rate": 6.861130608718478e-06, "loss": 1.2173, "step": 7690 }, { "epoch": 1.93, "learning_rate": 6.847310519670018e-06, "loss": 1.219, "step": 7695 }, { "epoch": 1.93, "learning_rate": 6.833497113421938e-06, "loss": 1.244, "step": 7700 }, { "epoch": 1.93, "learning_rate": 6.819690419254714e-06, "loss": 1.2206, "step": 7705 }, { "epoch": 1.93, "learning_rate": 6.805890466434588e-06, "loss": 1.1958, "step": 7710 }, { "epoch": 1.93, "learning_rate": 6.792097284213515e-06, "loss": 1.1836, "step": 7715 }, { "epoch": 1.93, "learning_rate": 6.778310901829099e-06, "loss": 1.1819, "step": 7720 }, { "epoch": 1.93, "learning_rate": 6.764531348504531e-06, "loss": 1.2263, "step": 7725 }, { "epoch": 1.93, "learning_rate": 6.750758653448524e-06, "loss": 1.2045, "step": 7730 }, { "epoch": 1.94, "learning_rate": 6.73699284585525e-06, "loss": 1.2179, "step": 7735 }, { "epoch": 1.94, "learning_rate": 6.723233954904289e-06, "loss": 1.218, "step": 7740 }, { "epoch": 1.94, "learning_rate": 6.709482009760555e-06, "loss": 1.2006, "step": 7745 }, { "epoch": 1.94, "learning_rate": 6.695737039574241e-06, "loss": 1.1896, "step": 7750 }, { "epoch": 1.94, "learning_rate": 6.681999073480756e-06, "loss": 1.1864, "step": 7755 }, { "epoch": 1.94, "learning_rate": 6.668268140600659e-06, "loss": 1.2327, "step": 7760 }, { "epoch": 1.94, "learning_rate": 6.6545442700396e-06, "loss": 1.2142, "step": 7765 }, { "epoch": 1.94, "learning_rate": 6.64082749088826e-06, "loss": 1.1816, "step": 7770 }, { "epoch": 1.95, "learning_rate": 6.627117832222297e-06, "loss": 1.2387, "step": 7775 }, { "epoch": 1.95, "learning_rate": 6.613415323102262e-06, "loss": 1.1965, "step": 7780 }, { "epoch": 1.95, "learning_rate": 6.599719992573559e-06, "loss": 1.2224, "step": 7785 }, { "epoch": 1.95, "learning_rate": 6.586031869666371e-06, "loss": 1.239, "step": 7790 }, { "epoch": 1.95, "learning_rate": 6.572350983395608e-06, "loss": 1.2422, "step": 7795 }, { "epoch": 1.95, "learning_rate": 6.5586773627608366e-06, "loss": 1.21, "step": 7800 }, { "epoch": 1.95, "learning_rate": 6.545011036746226e-06, "loss": 1.204, "step": 7805 }, { "epoch": 1.95, "learning_rate": 6.531352034320475e-06, "loss": 1.2112, "step": 7810 }, { "epoch": 1.96, "learning_rate": 6.517700384436767e-06, "loss": 1.2238, "step": 7815 }, { "epoch": 1.96, "learning_rate": 6.504056116032698e-06, "loss": 1.222, "step": 7820 }, { "epoch": 1.96, "learning_rate": 6.490419258030212e-06, "loss": 1.2107, "step": 7825 }, { "epoch": 1.96, "learning_rate": 6.476789839335551e-06, "loss": 1.2359, "step": 7830 }, { "epoch": 1.96, "learning_rate": 6.46316788883919e-06, "loss": 1.1889, "step": 7835 }, { "epoch": 1.96, "learning_rate": 6.449553435415768e-06, "loss": 1.2102, "step": 7840 }, { "epoch": 1.96, "learning_rate": 6.4359465079240315e-06, "loss": 1.1855, "step": 7845 }, { "epoch": 1.96, "learning_rate": 6.422347135206779e-06, "loss": 1.2239, "step": 7850 }, { "epoch": 1.97, "learning_rate": 6.4087553460907926e-06, "loss": 1.2145, "step": 7855 }, { "epoch": 1.97, "learning_rate": 6.395171169386778e-06, "loss": 1.24, "step": 7860 }, { "epoch": 1.97, "learning_rate": 6.381594633889306e-06, "loss": 1.183, "step": 7865 }, { "epoch": 1.97, "learning_rate": 6.368025768376754e-06, "loss": 1.2077, "step": 7870 }, { "epoch": 1.97, "learning_rate": 6.354464601611233e-06, "loss": 1.2278, "step": 7875 }, { "epoch": 1.97, "learning_rate": 6.340911162338546e-06, "loss": 1.2254, "step": 7880 }, { "epoch": 1.97, "learning_rate": 6.3273654792880975e-06, "loss": 1.2084, "step": 7885 }, { "epoch": 1.97, "learning_rate": 6.3138275811728765e-06, "loss": 1.1913, "step": 7890 }, { "epoch": 1.98, "learning_rate": 6.3002974966893525e-06, "loss": 1.2382, "step": 7895 }, { "epoch": 1.98, "learning_rate": 6.286775254517433e-06, "loss": 1.2593, "step": 7900 }, { "epoch": 1.98, "learning_rate": 6.273260883320409e-06, "loss": 1.2394, "step": 7905 }, { "epoch": 1.98, "learning_rate": 6.2597544117448804e-06, "loss": 1.1949, "step": 7910 }, { "epoch": 1.98, "learning_rate": 6.24625586842071e-06, "loss": 1.211, "step": 7915 }, { "epoch": 1.98, "learning_rate": 6.232765281960947e-06, "loss": 1.2107, "step": 7920 }, { "epoch": 1.98, "learning_rate": 6.219282680961782e-06, "loss": 1.2215, "step": 7925 }, { "epoch": 1.98, "learning_rate": 6.205808094002469e-06, "loss": 1.2093, "step": 7930 }, { "epoch": 1.99, "learning_rate": 6.192341549645283e-06, "loss": 1.2273, "step": 7935 }, { "epoch": 1.99, "learning_rate": 6.178883076435447e-06, "loss": 1.1949, "step": 7940 }, { "epoch": 1.99, "learning_rate": 6.165432702901079e-06, "loss": 1.2372, "step": 7945 }, { "epoch": 1.99, "learning_rate": 6.151990457553125e-06, "loss": 1.2076, "step": 7950 }, { "epoch": 1.99, "learning_rate": 6.1385563688853e-06, "loss": 1.1843, "step": 7955 }, { "epoch": 1.99, "learning_rate": 6.125130465374034e-06, "loss": 1.1921, "step": 7960 }, { "epoch": 1.99, "learning_rate": 6.111712775478402e-06, "loss": 1.2052, "step": 7965 }, { "epoch": 1.99, "learning_rate": 6.098303327640075e-06, "loss": 1.2081, "step": 7970 }, { "epoch": 2.0, "learning_rate": 6.084902150283243e-06, "loss": 1.2327, "step": 7975 }, { "epoch": 2.0, "learning_rate": 6.071509271814573e-06, "loss": 1.221, "step": 7980 }, { "epoch": 2.0, "learning_rate": 6.058124720623137e-06, "loss": 1.217, "step": 7985 }, { "epoch": 2.0, "learning_rate": 6.044748525080359e-06, "loss": 1.1658, "step": 7990 }, { "epoch": 2.0, "eval_loss": 1.2050257921218872, "eval_runtime": 1767.2536, "eval_samples_per_second": 16.018, "eval_steps_per_second": 1.002, "step": 7993 }, { "epoch": 2.0, "learning_rate": 6.031380713539949e-06, "loss": 1.1973, "step": 7995 }, { "epoch": 2.0, "learning_rate": 6.018021314337847e-06, "loss": 1.2369, "step": 8000 }, { "epoch": 2.0, "learning_rate": 6.004670355792159e-06, "loss": 1.2411, "step": 8005 }, { "epoch": 2.0, "learning_rate": 5.9913278662031005e-06, "loss": 1.237, "step": 8010 }, { "epoch": 2.01, "learning_rate": 5.977993873852935e-06, "loss": 1.2037, "step": 8015 }, { "epoch": 2.01, "learning_rate": 5.964668407005913e-06, "loss": 1.192, "step": 8020 }, { "epoch": 2.01, "learning_rate": 5.951351493908215e-06, "loss": 1.2215, "step": 8025 }, { "epoch": 2.01, "learning_rate": 5.938043162787891e-06, "loss": 1.207, "step": 8030 }, { "epoch": 2.01, "learning_rate": 5.9247434418547966e-06, "loss": 1.2416, "step": 8035 }, { "epoch": 2.01, "learning_rate": 5.911452359300541e-06, "loss": 1.1997, "step": 8040 }, { "epoch": 2.01, "learning_rate": 5.898169943298415e-06, "loss": 1.2367, "step": 8045 }, { "epoch": 2.01, "learning_rate": 5.884896222003343e-06, "loss": 1.2301, "step": 8050 }, { "epoch": 2.02, "learning_rate": 5.8716312235518234e-06, "loss": 1.1844, "step": 8055 }, { "epoch": 2.02, "learning_rate": 5.858374976061863e-06, "loss": 1.2153, "step": 8060 }, { "epoch": 2.02, "learning_rate": 5.845127507632908e-06, "loss": 1.1875, "step": 8065 }, { "epoch": 2.02, "learning_rate": 5.831888846345809e-06, "loss": 1.2038, "step": 8070 }, { "epoch": 2.02, "learning_rate": 5.8186590202627495e-06, "loss": 1.2676, "step": 8075 }, { "epoch": 2.02, "learning_rate": 5.805438057427166e-06, "loss": 1.234, "step": 8080 }, { "epoch": 2.02, "learning_rate": 5.792225985863728e-06, "loss": 1.2064, "step": 8085 }, { "epoch": 2.02, "learning_rate": 5.7790228335782476e-06, "loss": 1.232, "step": 8090 }, { "epoch": 2.03, "learning_rate": 5.765828628557632e-06, "loss": 1.2244, "step": 8095 }, { "epoch": 2.03, "learning_rate": 5.7526433987698275e-06, "loss": 1.1987, "step": 8100 }, { "epoch": 2.03, "learning_rate": 5.739467172163744e-06, "loss": 1.2027, "step": 8105 }, { "epoch": 2.03, "learning_rate": 5.726299976669225e-06, "loss": 1.2025, "step": 8110 }, { "epoch": 2.03, "learning_rate": 5.713141840196956e-06, "loss": 1.2287, "step": 8115 }, { "epoch": 2.03, "learning_rate": 5.699992790638429e-06, "loss": 1.2299, "step": 8120 }, { "epoch": 2.03, "learning_rate": 5.686852855865862e-06, "loss": 1.2052, "step": 8125 }, { "epoch": 2.03, "learning_rate": 5.673722063732163e-06, "loss": 1.2104, "step": 8130 }, { "epoch": 2.04, "learning_rate": 5.660600442070858e-06, "loss": 1.2042, "step": 8135 }, { "epoch": 2.04, "learning_rate": 5.647488018696034e-06, "loss": 1.2049, "step": 8140 }, { "epoch": 2.04, "learning_rate": 5.634384821402281e-06, "loss": 1.2724, "step": 8145 }, { "epoch": 2.04, "learning_rate": 5.621290877964629e-06, "loss": 1.1794, "step": 8150 }, { "epoch": 2.04, "learning_rate": 5.608206216138495e-06, "loss": 1.2058, "step": 8155 }, { "epoch": 2.04, "learning_rate": 5.595130863659618e-06, "loss": 1.2017, "step": 8160 }, { "epoch": 2.04, "learning_rate": 5.58206484824402e-06, "loss": 1.2138, "step": 8165 }, { "epoch": 2.04, "learning_rate": 5.569008197587904e-06, "loss": 1.2152, "step": 8170 }, { "epoch": 2.05, "learning_rate": 5.5559609393676425e-06, "loss": 1.2691, "step": 8175 }, { "epoch": 2.05, "learning_rate": 5.542923101239692e-06, "loss": 1.2725, "step": 8180 }, { "epoch": 2.05, "learning_rate": 5.529894710840543e-06, "loss": 1.2035, "step": 8185 }, { "epoch": 2.05, "learning_rate": 5.516875795786658e-06, "loss": 1.2294, "step": 8190 }, { "epoch": 2.05, "learning_rate": 5.503866383674414e-06, "loss": 1.2047, "step": 8195 }, { "epoch": 2.05, "learning_rate": 5.490866502080046e-06, "loss": 1.1984, "step": 8200 }, { "epoch": 2.05, "learning_rate": 5.477876178559588e-06, "loss": 1.2166, "step": 8205 }, { "epoch": 2.05, "learning_rate": 5.46489544064881e-06, "loss": 1.232, "step": 8210 }, { "epoch": 2.06, "learning_rate": 5.451924315863166e-06, "loss": 1.2387, "step": 8215 }, { "epoch": 2.06, "learning_rate": 5.438962831697732e-06, "loss": 1.2492, "step": 8220 }, { "epoch": 2.06, "learning_rate": 5.426011015627151e-06, "loss": 1.2694, "step": 8225 }, { "epoch": 2.06, "learning_rate": 5.413068895105567e-06, "loss": 1.2267, "step": 8230 }, { "epoch": 2.06, "learning_rate": 5.400136497566577e-06, "loss": 1.2197, "step": 8235 }, { "epoch": 2.06, "learning_rate": 5.3872138504231666e-06, "loss": 1.2598, "step": 8240 }, { "epoch": 2.06, "learning_rate": 5.374300981067653e-06, "loss": 1.2198, "step": 8245 }, { "epoch": 2.06, "learning_rate": 5.361397916871629e-06, "loss": 1.1885, "step": 8250 }, { "epoch": 2.07, "learning_rate": 5.3485046851859005e-06, "loss": 1.1842, "step": 8255 }, { "epoch": 2.07, "learning_rate": 5.3356213133404335e-06, "loss": 1.2445, "step": 8260 }, { "epoch": 2.07, "learning_rate": 5.322747828644295e-06, "loss": 1.2136, "step": 8265 }, { "epoch": 2.07, "learning_rate": 5.309884258385587e-06, "loss": 1.2474, "step": 8270 }, { "epoch": 2.07, "learning_rate": 5.297030629831399e-06, "loss": 1.2136, "step": 8275 }, { "epoch": 2.07, "learning_rate": 5.284186970227758e-06, "loss": 1.2059, "step": 8280 }, { "epoch": 2.07, "learning_rate": 5.271353306799546e-06, "loss": 1.2208, "step": 8285 }, { "epoch": 2.07, "learning_rate": 5.2585296667504606e-06, "loss": 1.2031, "step": 8290 }, { "epoch": 2.08, "learning_rate": 5.245716077262952e-06, "loss": 1.2189, "step": 8295 }, { "epoch": 2.08, "learning_rate": 5.232912565498167e-06, "loss": 1.2265, "step": 8300 }, { "epoch": 2.08, "learning_rate": 5.220119158595891e-06, "loss": 1.2429, "step": 8305 }, { "epoch": 2.08, "learning_rate": 5.207335883674491e-06, "loss": 1.2086, "step": 8310 }, { "epoch": 2.08, "learning_rate": 5.194562767830851e-06, "loss": 1.2087, "step": 8315 }, { "epoch": 2.08, "learning_rate": 5.181799838140326e-06, "loss": 1.1724, "step": 8320 }, { "epoch": 2.08, "learning_rate": 5.1690471216566785e-06, "loss": 1.2105, "step": 8325 }, { "epoch": 2.08, "learning_rate": 5.156304645412017e-06, "loss": 1.1959, "step": 8330 }, { "epoch": 2.09, "learning_rate": 5.143572436416757e-06, "loss": 1.2722, "step": 8335 }, { "epoch": 2.09, "learning_rate": 5.1308505216595395e-06, "loss": 1.2169, "step": 8340 }, { "epoch": 2.09, "learning_rate": 5.1181389281071835e-06, "loss": 1.2304, "step": 8345 }, { "epoch": 2.09, "learning_rate": 5.105437682704634e-06, "loss": 1.2211, "step": 8350 }, { "epoch": 2.09, "learning_rate": 5.0927468123749065e-06, "loss": 1.2292, "step": 8355 }, { "epoch": 2.09, "learning_rate": 5.080066344019008e-06, "loss": 1.2176, "step": 8360 }, { "epoch": 2.09, "learning_rate": 5.06739630451591e-06, "loss": 1.2302, "step": 8365 }, { "epoch": 2.09, "learning_rate": 5.054736720722475e-06, "loss": 1.201, "step": 8370 }, { "epoch": 2.1, "learning_rate": 5.0420876194734e-06, "loss": 1.2063, "step": 8375 }, { "epoch": 2.1, "learning_rate": 5.029449027581166e-06, "loss": 1.2418, "step": 8380 }, { "epoch": 2.1, "learning_rate": 5.016820971835967e-06, "loss": 1.1819, "step": 8385 }, { "epoch": 2.1, "learning_rate": 5.004203479005682e-06, "loss": 1.2046, "step": 8390 }, { "epoch": 2.1, "learning_rate": 4.991596575835783e-06, "loss": 1.2099, "step": 8395 }, { "epoch": 2.1, "learning_rate": 4.979000289049305e-06, "loss": 1.2033, "step": 8400 }, { "epoch": 2.1, "learning_rate": 4.966414645346767e-06, "loss": 1.1854, "step": 8405 }, { "epoch": 2.1, "learning_rate": 4.95383967140614e-06, "loss": 1.1783, "step": 8410 }, { "epoch": 2.11, "learning_rate": 4.941275393882771e-06, "loss": 1.2084, "step": 8415 }, { "epoch": 2.11, "learning_rate": 4.9287218394093414e-06, "loss": 1.2028, "step": 8420 }, { "epoch": 2.11, "learning_rate": 4.916179034595794e-06, "loss": 1.225, "step": 8425 }, { "epoch": 2.11, "learning_rate": 4.90364700602929e-06, "loss": 1.2407, "step": 8430 }, { "epoch": 2.11, "learning_rate": 4.891125780274148e-06, "loss": 1.2101, "step": 8435 }, { "epoch": 2.11, "learning_rate": 4.878615383871781e-06, "loss": 1.1914, "step": 8440 }, { "epoch": 2.11, "learning_rate": 4.866115843340666e-06, "loss": 1.2449, "step": 8445 }, { "epoch": 2.11, "learning_rate": 4.853627185176245e-06, "loss": 1.2558, "step": 8450 }, { "epoch": 2.12, "learning_rate": 4.841149435850905e-06, "loss": 1.2216, "step": 8455 }, { "epoch": 2.12, "learning_rate": 4.828682621813907e-06, "loss": 1.271, "step": 8460 }, { "epoch": 2.12, "learning_rate": 4.816226769491335e-06, "loss": 1.2217, "step": 8465 }, { "epoch": 2.12, "learning_rate": 4.8037819052860316e-06, "loss": 1.2342, "step": 8470 }, { "epoch": 2.12, "learning_rate": 4.791348055577554e-06, "loss": 1.2224, "step": 8475 }, { "epoch": 2.12, "learning_rate": 4.778925246722107e-06, "loss": 1.1803, "step": 8480 }, { "epoch": 2.12, "learning_rate": 4.766513505052495e-06, "loss": 1.2514, "step": 8485 }, { "epoch": 2.12, "learning_rate": 4.7541128568780614e-06, "loss": 1.193, "step": 8490 }, { "epoch": 2.13, "learning_rate": 4.741723328484636e-06, "loss": 1.2148, "step": 8495 }, { "epoch": 2.13, "learning_rate": 4.729344946134476e-06, "loss": 1.2302, "step": 8500 }, { "epoch": 2.13, "learning_rate": 4.716977736066213e-06, "loss": 1.2506, "step": 8505 }, { "epoch": 2.13, "learning_rate": 4.704621724494797e-06, "loss": 1.1951, "step": 8510 }, { "epoch": 2.13, "learning_rate": 4.6922769376114405e-06, "loss": 1.1902, "step": 8515 }, { "epoch": 2.13, "learning_rate": 4.679943401583562e-06, "loss": 1.22, "step": 8520 }, { "epoch": 2.13, "learning_rate": 4.6676211425547336e-06, "loss": 1.231, "step": 8525 }, { "epoch": 2.13, "learning_rate": 4.655310186644618e-06, "loss": 1.1996, "step": 8530 }, { "epoch": 2.14, "learning_rate": 4.643010559948926e-06, "loss": 1.2326, "step": 8535 }, { "epoch": 2.14, "learning_rate": 4.630722288539347e-06, "loss": 1.2094, "step": 8540 }, { "epoch": 2.14, "learning_rate": 4.618445398463509e-06, "loss": 1.214, "step": 8545 }, { "epoch": 2.14, "learning_rate": 4.606179915744897e-06, "loss": 1.2193, "step": 8550 }, { "epoch": 2.14, "learning_rate": 4.593925866382839e-06, "loss": 1.2169, "step": 8555 }, { "epoch": 2.14, "learning_rate": 4.5816832763524136e-06, "loss": 1.2187, "step": 8560 }, { "epoch": 2.14, "learning_rate": 4.569452171604411e-06, "loss": 1.2177, "step": 8565 }, { "epoch": 2.14, "learning_rate": 4.5572325780652745e-06, "loss": 1.2289, "step": 8570 }, { "epoch": 2.15, "learning_rate": 4.545024521637053e-06, "loss": 1.192, "step": 8575 }, { "epoch": 2.15, "learning_rate": 4.532828028197332e-06, "loss": 1.2054, "step": 8580 }, { "epoch": 2.15, "learning_rate": 4.520643123599195e-06, "loss": 1.2002, "step": 8585 }, { "epoch": 2.15, "learning_rate": 4.508469833671155e-06, "loss": 1.1993, "step": 8590 }, { "epoch": 2.15, "learning_rate": 4.496308184217103e-06, "loss": 1.222, "step": 8595 }, { "epoch": 2.15, "learning_rate": 4.484158201016262e-06, "loss": 1.1833, "step": 8600 }, { "epoch": 2.15, "learning_rate": 4.472019909823121e-06, "loss": 1.2175, "step": 8605 }, { "epoch": 2.15, "learning_rate": 4.459893336367384e-06, "loss": 1.2301, "step": 8610 }, { "epoch": 2.16, "learning_rate": 4.44777850635393e-06, "loss": 1.2223, "step": 8615 }, { "epoch": 2.16, "learning_rate": 4.4356754454627285e-06, "loss": 1.1817, "step": 8620 }, { "epoch": 2.16, "learning_rate": 4.423584179348809e-06, "loss": 1.2103, "step": 8625 }, { "epoch": 2.16, "learning_rate": 4.411504733642199e-06, "loss": 1.2031, "step": 8630 }, { "epoch": 2.16, "learning_rate": 4.399437133947874e-06, "loss": 1.2075, "step": 8635 }, { "epoch": 2.16, "learning_rate": 4.387381405845688e-06, "loss": 1.2254, "step": 8640 }, { "epoch": 2.16, "learning_rate": 4.375337574890341e-06, "loss": 1.2297, "step": 8645 }, { "epoch": 2.16, "learning_rate": 4.363305666611314e-06, "loss": 1.1802, "step": 8650 }, { "epoch": 2.17, "learning_rate": 4.351285706512809e-06, "loss": 1.1914, "step": 8655 }, { "epoch": 2.17, "learning_rate": 4.339277720073708e-06, "loss": 1.1976, "step": 8660 }, { "epoch": 2.17, "learning_rate": 4.3272817327475035e-06, "loss": 1.2498, "step": 8665 }, { "epoch": 2.17, "learning_rate": 4.315297769962267e-06, "loss": 1.2083, "step": 8670 }, { "epoch": 2.17, "learning_rate": 4.303325857120572e-06, "loss": 1.2009, "step": 8675 }, { "epoch": 2.17, "learning_rate": 4.291366019599453e-06, "loss": 1.2251, "step": 8680 }, { "epoch": 2.17, "learning_rate": 4.279418282750338e-06, "loss": 1.2069, "step": 8685 }, { "epoch": 2.17, "learning_rate": 4.2674826718990185e-06, "loss": 1.2303, "step": 8690 }, { "epoch": 2.18, "learning_rate": 4.255559212345577e-06, "loss": 1.2315, "step": 8695 }, { "epoch": 2.18, "learning_rate": 4.243647929364339e-06, "loss": 1.2073, "step": 8700 }, { "epoch": 2.18, "learning_rate": 4.23174884820382e-06, "loss": 1.1902, "step": 8705 }, { "epoch": 2.18, "learning_rate": 4.2198619940866684e-06, "loss": 1.2214, "step": 8710 }, { "epoch": 2.18, "learning_rate": 4.207987392209617e-06, "loss": 1.2318, "step": 8715 }, { "epoch": 2.18, "learning_rate": 4.1961250677434255e-06, "loss": 1.2135, "step": 8720 }, { "epoch": 2.18, "learning_rate": 4.184275045832838e-06, "loss": 1.2209, "step": 8725 }, { "epoch": 2.18, "learning_rate": 4.172437351596506e-06, "loss": 1.2058, "step": 8730 }, { "epoch": 2.19, "learning_rate": 4.1606120101269564e-06, "loss": 1.2334, "step": 8735 }, { "epoch": 2.19, "learning_rate": 4.1487990464905355e-06, "loss": 1.2196, "step": 8740 }, { "epoch": 2.19, "learning_rate": 4.13699848572735e-06, "loss": 1.2323, "step": 8745 }, { "epoch": 2.19, "learning_rate": 4.125210352851211e-06, "loss": 1.1873, "step": 8750 }, { "epoch": 2.19, "learning_rate": 4.113434672849593e-06, "loss": 1.183, "step": 8755 }, { "epoch": 2.19, "learning_rate": 4.101671470683572e-06, "loss": 1.226, "step": 8760 }, { "epoch": 2.19, "learning_rate": 4.08992077128777e-06, "loss": 1.2046, "step": 8765 }, { "epoch": 2.19, "learning_rate": 4.078182599570314e-06, "loss": 1.2376, "step": 8770 }, { "epoch": 2.2, "learning_rate": 4.0664569804127695e-06, "loss": 1.211, "step": 8775 }, { "epoch": 2.2, "learning_rate": 4.054743938670099e-06, "loss": 1.1992, "step": 8780 }, { "epoch": 2.2, "learning_rate": 4.043043499170601e-06, "loss": 1.2412, "step": 8785 }, { "epoch": 2.2, "learning_rate": 4.031355686715864e-06, "loss": 1.261, "step": 8790 }, { "epoch": 2.2, "learning_rate": 4.019680526080706e-06, "loss": 1.2046, "step": 8795 }, { "epoch": 2.2, "learning_rate": 4.008018042013131e-06, "loss": 1.2093, "step": 8800 }, { "epoch": 2.2, "learning_rate": 3.996368259234274e-06, "loss": 1.2304, "step": 8805 }, { "epoch": 2.2, "learning_rate": 3.984731202438339e-06, "loss": 1.1864, "step": 8810 }, { "epoch": 2.21, "learning_rate": 3.973106896292563e-06, "loss": 1.2262, "step": 8815 }, { "epoch": 2.21, "learning_rate": 3.9614953654371504e-06, "loss": 1.1909, "step": 8820 }, { "epoch": 2.21, "learning_rate": 3.949896634485227e-06, "loss": 1.2134, "step": 8825 }, { "epoch": 2.21, "learning_rate": 3.938310728022789e-06, "loss": 1.2012, "step": 8830 }, { "epoch": 2.21, "learning_rate": 3.926737670608641e-06, "loss": 1.2139, "step": 8835 }, { "epoch": 2.21, "learning_rate": 3.915177486774361e-06, "loss": 1.2137, "step": 8840 }, { "epoch": 2.21, "learning_rate": 3.903630201024231e-06, "loss": 1.1894, "step": 8845 }, { "epoch": 2.21, "learning_rate": 3.892095837835196e-06, "loss": 1.2037, "step": 8850 }, { "epoch": 2.22, "learning_rate": 3.880574421656809e-06, "loss": 1.252, "step": 8855 }, { "epoch": 2.22, "learning_rate": 3.869065976911177e-06, "loss": 1.2189, "step": 8860 }, { "epoch": 2.22, "learning_rate": 3.8575705279929145e-06, "loss": 1.2569, "step": 8865 }, { "epoch": 2.22, "learning_rate": 3.846088099269085e-06, "loss": 1.1871, "step": 8870 }, { "epoch": 2.22, "learning_rate": 3.834618715079161e-06, "loss": 1.2234, "step": 8875 }, { "epoch": 2.22, "learning_rate": 3.823162399734949e-06, "loss": 1.203, "step": 8880 }, { "epoch": 2.22, "learning_rate": 3.811719177520562e-06, "loss": 1.2347, "step": 8885 }, { "epoch": 2.22, "learning_rate": 3.800289072692368e-06, "loss": 1.1871, "step": 8890 }, { "epoch": 2.23, "learning_rate": 3.7888721094789173e-06, "loss": 1.2199, "step": 8895 }, { "epoch": 2.23, "learning_rate": 3.7774683120809065e-06, "loss": 1.2063, "step": 8900 }, { "epoch": 2.23, "learning_rate": 3.766077704671128e-06, "loss": 1.1848, "step": 8905 }, { "epoch": 2.23, "learning_rate": 3.7547003113944135e-06, "loss": 1.2611, "step": 8910 }, { "epoch": 2.23, "learning_rate": 3.743336156367582e-06, "loss": 1.2395, "step": 8915 }, { "epoch": 2.23, "learning_rate": 3.7319852636793975e-06, "loss": 1.2035, "step": 8920 }, { "epoch": 2.23, "learning_rate": 3.7206476573905016e-06, "loss": 1.2635, "step": 8925 }, { "epoch": 2.23, "learning_rate": 3.70932336153338e-06, "loss": 1.1831, "step": 8930 }, { "epoch": 2.24, "learning_rate": 3.698012400112303e-06, "loss": 1.2047, "step": 8935 }, { "epoch": 2.24, "learning_rate": 3.6867147971032724e-06, "loss": 1.2393, "step": 8940 }, { "epoch": 2.24, "learning_rate": 3.6754305764539834e-06, "loss": 1.2527, "step": 8945 }, { "epoch": 2.24, "learning_rate": 3.664159762083754e-06, "loss": 1.2002, "step": 8950 }, { "epoch": 2.24, "learning_rate": 3.6529023778834895e-06, "loss": 1.1983, "step": 8955 }, { "epoch": 2.24, "learning_rate": 3.6416584477156246e-06, "loss": 1.2087, "step": 8960 }, { "epoch": 2.24, "learning_rate": 3.630427995414081e-06, "loss": 1.1631, "step": 8965 }, { "epoch": 2.24, "learning_rate": 3.619211044784199e-06, "loss": 1.2334, "step": 8970 }, { "epoch": 2.25, "learning_rate": 3.6080076196027116e-06, "loss": 1.149, "step": 8975 }, { "epoch": 2.25, "learning_rate": 3.5968177436176777e-06, "loss": 1.2464, "step": 8980 }, { "epoch": 2.25, "learning_rate": 3.585641440548434e-06, "loss": 1.1957, "step": 8985 }, { "epoch": 2.25, "learning_rate": 3.574478734085549e-06, "loss": 1.261, "step": 8990 }, { "epoch": 2.25, "learning_rate": 3.5633296478907686e-06, "loss": 1.2058, "step": 8995 }, { "epoch": 2.25, "learning_rate": 3.5521942055969648e-06, "loss": 1.2164, "step": 9000 }, { "epoch": 2.25, "learning_rate": 3.541072430808099e-06, "loss": 1.239, "step": 9005 }, { "epoch": 2.25, "learning_rate": 3.529964347099153e-06, "loss": 1.2236, "step": 9010 }, { "epoch": 2.26, "learning_rate": 3.518869978016083e-06, "loss": 1.2343, "step": 9015 }, { "epoch": 2.26, "learning_rate": 3.507789347075783e-06, "loss": 1.1855, "step": 9020 }, { "epoch": 2.26, "learning_rate": 3.4967224777660215e-06, "loss": 1.2521, "step": 9025 }, { "epoch": 2.26, "learning_rate": 3.4856693935453988e-06, "loss": 1.2196, "step": 9030 }, { "epoch": 2.26, "learning_rate": 3.4746301178432938e-06, "loss": 1.2168, "step": 9035 }, { "epoch": 2.26, "learning_rate": 3.463604674059814e-06, "loss": 1.1994, "step": 9040 }, { "epoch": 2.26, "learning_rate": 3.4525930855657473e-06, "loss": 1.2275, "step": 9045 }, { "epoch": 2.26, "learning_rate": 3.4415953757025165e-06, "loss": 1.2335, "step": 9050 }, { "epoch": 2.27, "learning_rate": 3.4306115677821193e-06, "loss": 1.2327, "step": 9055 }, { "epoch": 2.27, "learning_rate": 3.4196416850870896e-06, "loss": 1.2128, "step": 9060 }, { "epoch": 2.27, "learning_rate": 3.408685750870443e-06, "loss": 1.2041, "step": 9065 }, { "epoch": 2.27, "learning_rate": 3.3977437883556265e-06, "loss": 1.2605, "step": 9070 }, { "epoch": 2.27, "learning_rate": 3.3868158207364733e-06, "loss": 1.2046, "step": 9075 }, { "epoch": 2.27, "learning_rate": 3.375901871177151e-06, "loss": 1.2447, "step": 9080 }, { "epoch": 2.27, "learning_rate": 3.365001962812111e-06, "loss": 1.2092, "step": 9085 }, { "epoch": 2.27, "learning_rate": 3.3541161187460446e-06, "loss": 1.1973, "step": 9090 }, { "epoch": 2.28, "learning_rate": 3.343244362053828e-06, "loss": 1.226, "step": 9095 }, { "epoch": 2.28, "learning_rate": 3.332386715780478e-06, "loss": 1.2219, "step": 9100 }, { "epoch": 2.28, "learning_rate": 3.321543202941101e-06, "loss": 1.1747, "step": 9105 }, { "epoch": 2.28, "learning_rate": 3.310713846520842e-06, "loss": 1.2143, "step": 9110 }, { "epoch": 2.28, "learning_rate": 3.2998986694748425e-06, "loss": 1.1766, "step": 9115 }, { "epoch": 2.28, "learning_rate": 3.289097694728186e-06, "loss": 1.1693, "step": 9120 }, { "epoch": 2.28, "learning_rate": 3.278310945175851e-06, "loss": 1.2758, "step": 9125 }, { "epoch": 2.28, "learning_rate": 3.267538443682662e-06, "loss": 1.2059, "step": 9130 }, { "epoch": 2.29, "learning_rate": 3.2567802130832417e-06, "loss": 1.2264, "step": 9135 }, { "epoch": 2.29, "learning_rate": 3.2460362761819653e-06, "loss": 1.2191, "step": 9140 }, { "epoch": 2.29, "learning_rate": 3.2353066557529067e-06, "loss": 1.2094, "step": 9145 }, { "epoch": 2.29, "learning_rate": 3.2245913745397928e-06, "loss": 1.2659, "step": 9150 }, { "epoch": 2.29, "learning_rate": 3.213890455255961e-06, "loss": 1.226, "step": 9155 }, { "epoch": 2.29, "learning_rate": 3.2032039205842947e-06, "loss": 1.2143, "step": 9160 }, { "epoch": 2.29, "learning_rate": 3.1925317931771904e-06, "loss": 1.2169, "step": 9165 }, { "epoch": 2.29, "learning_rate": 3.1818740956565155e-06, "loss": 1.2212, "step": 9170 }, { "epoch": 2.3, "learning_rate": 3.171230850613537e-06, "loss": 1.1913, "step": 9175 }, { "epoch": 2.3, "learning_rate": 3.1606020806088932e-06, "loss": 1.2151, "step": 9180 }, { "epoch": 2.3, "learning_rate": 3.1499878081725365e-06, "loss": 1.2442, "step": 9185 }, { "epoch": 2.3, "learning_rate": 3.1393880558036906e-06, "loss": 1.2328, "step": 9190 }, { "epoch": 2.3, "learning_rate": 3.1288028459707998e-06, "loss": 1.2415, "step": 9195 }, { "epoch": 2.3, "learning_rate": 3.118232201111487e-06, "loss": 1.2127, "step": 9200 }, { "epoch": 2.3, "learning_rate": 3.10767614363249e-06, "loss": 1.1994, "step": 9205 }, { "epoch": 2.3, "learning_rate": 3.097134695909636e-06, "loss": 1.2027, "step": 9210 }, { "epoch": 2.31, "learning_rate": 3.0866078802877807e-06, "loss": 1.2078, "step": 9215 }, { "epoch": 2.31, "learning_rate": 3.0760957190807607e-06, "loss": 1.2312, "step": 9220 }, { "epoch": 2.31, "learning_rate": 3.0655982345713585e-06, "loss": 1.21, "step": 9225 }, { "epoch": 2.31, "learning_rate": 3.0551154490112365e-06, "loss": 1.2609, "step": 9230 }, { "epoch": 2.31, "learning_rate": 3.0446473846209056e-06, "loss": 1.2088, "step": 9235 }, { "epoch": 2.31, "learning_rate": 3.034194063589666e-06, "loss": 1.2429, "step": 9240 }, { "epoch": 2.31, "learning_rate": 3.0237555080755754e-06, "loss": 1.2184, "step": 9245 }, { "epoch": 2.31, "learning_rate": 3.013331740205381e-06, "loss": 1.2315, "step": 9250 }, { "epoch": 2.32, "learning_rate": 3.0029227820744922e-06, "loss": 1.1825, "step": 9255 }, { "epoch": 2.32, "learning_rate": 2.992528655746926e-06, "loss": 1.2397, "step": 9260 }, { "epoch": 2.32, "learning_rate": 2.9821493832552583e-06, "loss": 1.2014, "step": 9265 }, { "epoch": 2.32, "learning_rate": 2.9717849866005777e-06, "loss": 1.1883, "step": 9270 }, { "epoch": 2.32, "learning_rate": 2.961435487752442e-06, "loss": 1.2073, "step": 9275 }, { "epoch": 2.32, "learning_rate": 2.951100908648834e-06, "loss": 1.2274, "step": 9280 }, { "epoch": 2.32, "learning_rate": 2.940781271196105e-06, "loss": 1.2366, "step": 9285 }, { "epoch": 2.32, "learning_rate": 2.9304765972689375e-06, "loss": 1.193, "step": 9290 }, { "epoch": 2.33, "learning_rate": 2.9201869087102896e-06, "loss": 1.2522, "step": 9295 }, { "epoch": 2.33, "learning_rate": 2.9099122273313617e-06, "loss": 1.2074, "step": 9300 }, { "epoch": 2.33, "learning_rate": 2.899652574911542e-06, "loss": 1.206, "step": 9305 }, { "epoch": 2.33, "learning_rate": 2.8894079731983593e-06, "loss": 1.2534, "step": 9310 }, { "epoch": 2.33, "learning_rate": 2.879178443907441e-06, "loss": 1.2365, "step": 9315 }, { "epoch": 2.33, "learning_rate": 2.8689640087224648e-06, "loss": 1.2307, "step": 9320 }, { "epoch": 2.33, "learning_rate": 2.858764689295113e-06, "loss": 1.191, "step": 9325 }, { "epoch": 2.33, "learning_rate": 2.8485805072450223e-06, "loss": 1.2098, "step": 9330 }, { "epoch": 2.34, "learning_rate": 2.8384114841597597e-06, "loss": 1.2243, "step": 9335 }, { "epoch": 2.34, "learning_rate": 2.828257641594735e-06, "loss": 1.2041, "step": 9340 }, { "epoch": 2.34, "learning_rate": 2.818119001073195e-06, "loss": 1.2091, "step": 9345 }, { "epoch": 2.34, "learning_rate": 2.8079955840861595e-06, "loss": 1.2222, "step": 9350 }, { "epoch": 2.34, "learning_rate": 2.7978874120923783e-06, "loss": 1.231, "step": 9355 }, { "epoch": 2.34, "learning_rate": 2.7877945065182876e-06, "loss": 1.2523, "step": 9360 }, { "epoch": 2.34, "learning_rate": 2.7777168887579585e-06, "loss": 1.2295, "step": 9365 }, { "epoch": 2.34, "learning_rate": 2.767654580173064e-06, "loss": 1.1988, "step": 9370 }, { "epoch": 2.35, "learning_rate": 2.7576076020928165e-06, "loss": 1.2063, "step": 9375 }, { "epoch": 2.35, "learning_rate": 2.747575975813942e-06, "loss": 1.204, "step": 9380 }, { "epoch": 2.35, "learning_rate": 2.7375597226006167e-06, "loss": 1.229, "step": 9385 }, { "epoch": 2.35, "learning_rate": 2.7275588636844364e-06, "loss": 1.1868, "step": 9390 }, { "epoch": 2.35, "learning_rate": 2.7175734202643613e-06, "loss": 1.2437, "step": 9395 }, { "epoch": 2.35, "learning_rate": 2.7076034135066766e-06, "loss": 1.2115, "step": 9400 }, { "epoch": 2.35, "learning_rate": 2.6976488645449484e-06, "loss": 1.2075, "step": 9405 }, { "epoch": 2.35, "learning_rate": 2.6877097944799723e-06, "loss": 1.22, "step": 9410 }, { "epoch": 2.36, "learning_rate": 2.677786224379736e-06, "loss": 1.2074, "step": 9415 }, { "epoch": 2.36, "learning_rate": 2.6678781752793727e-06, "loss": 1.194, "step": 9420 }, { "epoch": 2.36, "learning_rate": 2.657985668181111e-06, "loss": 1.2172, "step": 9425 }, { "epoch": 2.36, "learning_rate": 2.64810872405424e-06, "loss": 1.2064, "step": 9430 }, { "epoch": 2.36, "learning_rate": 2.638247363835057e-06, "loss": 1.1969, "step": 9435 }, { "epoch": 2.36, "learning_rate": 2.6284016084268292e-06, "loss": 1.1903, "step": 9440 }, { "epoch": 2.36, "learning_rate": 2.618571478699735e-06, "loss": 1.2002, "step": 9445 }, { "epoch": 2.36, "learning_rate": 2.6087569954908466e-06, "loss": 1.217, "step": 9450 }, { "epoch": 2.37, "learning_rate": 2.5989581796040607e-06, "loss": 1.2318, "step": 9455 }, { "epoch": 2.37, "learning_rate": 2.5891750518100654e-06, "loss": 1.2126, "step": 9460 }, { "epoch": 2.37, "learning_rate": 2.579407632846296e-06, "loss": 1.2123, "step": 9465 }, { "epoch": 2.37, "learning_rate": 2.5696559434168856e-06, "loss": 1.1972, "step": 9470 }, { "epoch": 2.37, "learning_rate": 2.5599200041926297e-06, "loss": 1.2758, "step": 9475 }, { "epoch": 2.37, "learning_rate": 2.550199835810936e-06, "loss": 1.2491, "step": 9480 }, { "epoch": 2.37, "learning_rate": 2.5404954588757845e-06, "loss": 1.2397, "step": 9485 }, { "epoch": 2.37, "learning_rate": 2.5308068939576757e-06, "loss": 1.2063, "step": 9490 }, { "epoch": 2.38, "learning_rate": 2.521134161593599e-06, "loss": 1.2171, "step": 9495 }, { "epoch": 2.38, "learning_rate": 2.5114772822869794e-06, "loss": 1.2104, "step": 9500 }, { "epoch": 2.38, "learning_rate": 2.5018362765076453e-06, "loss": 1.2102, "step": 9505 }, { "epoch": 2.38, "learning_rate": 2.4922111646917713e-06, "loss": 1.2592, "step": 9510 }, { "epoch": 2.38, "learning_rate": 2.4826019672418412e-06, "loss": 1.2143, "step": 9515 }, { "epoch": 2.38, "learning_rate": 2.4730087045266073e-06, "loss": 1.1828, "step": 9520 }, { "epoch": 2.38, "learning_rate": 2.4634313968810442e-06, "loss": 1.2249, "step": 9525 }, { "epoch": 2.38, "learning_rate": 2.453870064606307e-06, "loss": 1.2174, "step": 9530 }, { "epoch": 2.39, "learning_rate": 2.4443247279696834e-06, "loss": 1.2342, "step": 9535 }, { "epoch": 2.39, "learning_rate": 2.43479540720456e-06, "loss": 1.265, "step": 9540 }, { "epoch": 2.39, "learning_rate": 2.425282122510373e-06, "loss": 1.2275, "step": 9545 }, { "epoch": 2.39, "learning_rate": 2.415784894052565e-06, "loss": 1.2162, "step": 9550 }, { "epoch": 2.39, "learning_rate": 2.4063037419625433e-06, "loss": 1.2479, "step": 9555 }, { "epoch": 2.39, "learning_rate": 2.3968386863376447e-06, "loss": 1.2282, "step": 9560 }, { "epoch": 2.39, "learning_rate": 2.38738974724108e-06, "loss": 1.2183, "step": 9565 }, { "epoch": 2.39, "learning_rate": 2.377956944701897e-06, "loss": 1.2153, "step": 9570 }, { "epoch": 2.4, "learning_rate": 2.3685402987149442e-06, "loss": 1.2297, "step": 9575 }, { "epoch": 2.4, "learning_rate": 2.359139829240812e-06, "loss": 1.2231, "step": 9580 }, { "epoch": 2.4, "learning_rate": 2.3497555562058126e-06, "loss": 1.2562, "step": 9585 }, { "epoch": 2.4, "learning_rate": 2.3403874995019214e-06, "loss": 1.2295, "step": 9590 }, { "epoch": 2.4, "learning_rate": 2.3310356789867407e-06, "loss": 1.2468, "step": 9595 }, { "epoch": 2.4, "learning_rate": 2.3217001144834563e-06, "loss": 1.2223, "step": 9600 }, { "epoch": 2.4, "learning_rate": 2.3123808257807944e-06, "loss": 1.1785, "step": 9605 }, { "epoch": 2.4, "learning_rate": 2.3030778326329827e-06, "loss": 1.2384, "step": 9610 }, { "epoch": 2.41, "learning_rate": 2.29379115475971e-06, "loss": 1.1963, "step": 9615 }, { "epoch": 2.41, "learning_rate": 2.2845208118460814e-06, "loss": 1.2299, "step": 9620 }, { "epoch": 2.41, "learning_rate": 2.2752668235425658e-06, "loss": 1.2557, "step": 9625 }, { "epoch": 2.41, "learning_rate": 2.266029209464976e-06, "loss": 1.2076, "step": 9630 }, { "epoch": 2.41, "learning_rate": 2.256807989194413e-06, "loss": 1.2046, "step": 9635 }, { "epoch": 2.41, "learning_rate": 2.247603182277228e-06, "loss": 1.2128, "step": 9640 }, { "epoch": 2.41, "learning_rate": 2.23841480822498e-06, "loss": 1.1842, "step": 9645 }, { "epoch": 2.41, "learning_rate": 2.2292428865143943e-06, "loss": 1.2373, "step": 9650 }, { "epoch": 2.42, "learning_rate": 2.2200874365873236e-06, "loss": 1.2159, "step": 9655 }, { "epoch": 2.42, "learning_rate": 2.210948477850704e-06, "loss": 1.2106, "step": 9660 }, { "epoch": 2.42, "learning_rate": 2.201826029676516e-06, "loss": 1.2168, "step": 9665 }, { "epoch": 2.42, "learning_rate": 2.1927201114017405e-06, "loss": 1.2371, "step": 9670 }, { "epoch": 2.42, "learning_rate": 2.183630742328322e-06, "loss": 1.2264, "step": 9675 }, { "epoch": 2.42, "learning_rate": 2.174557941723123e-06, "loss": 1.2353, "step": 9680 }, { "epoch": 2.42, "learning_rate": 2.1655017288178893e-06, "loss": 1.186, "step": 9685 }, { "epoch": 2.42, "learning_rate": 2.1564621228092007e-06, "loss": 1.2357, "step": 9690 }, { "epoch": 2.43, "learning_rate": 2.1474391428584394e-06, "loss": 1.204, "step": 9695 }, { "epoch": 2.43, "learning_rate": 2.138432808091744e-06, "loss": 1.2238, "step": 9700 }, { "epoch": 2.43, "learning_rate": 2.129443137599967e-06, "loss": 1.2238, "step": 9705 }, { "epoch": 2.43, "learning_rate": 2.1204701504386405e-06, "loss": 1.1905, "step": 9710 }, { "epoch": 2.43, "learning_rate": 2.1115138656279333e-06, "loss": 1.2211, "step": 9715 }, { "epoch": 2.43, "learning_rate": 2.1025743021526067e-06, "loss": 1.2318, "step": 9720 }, { "epoch": 2.43, "learning_rate": 2.093651478961982e-06, "loss": 1.1849, "step": 9725 }, { "epoch": 2.43, "learning_rate": 2.084745414969892e-06, "loss": 1.2501, "step": 9730 }, { "epoch": 2.44, "learning_rate": 2.0758561290546454e-06, "loss": 1.21, "step": 9735 }, { "epoch": 2.44, "learning_rate": 2.0669836400589893e-06, "loss": 1.1897, "step": 9740 }, { "epoch": 2.44, "learning_rate": 2.058127966790061e-06, "loss": 1.2738, "step": 9745 }, { "epoch": 2.44, "learning_rate": 2.0492891280193593e-06, "loss": 1.2078, "step": 9750 }, { "epoch": 2.44, "learning_rate": 2.0404671424826925e-06, "loss": 1.2065, "step": 9755 }, { "epoch": 2.44, "learning_rate": 2.03166202888015e-06, "loss": 1.238, "step": 9760 }, { "epoch": 2.44, "learning_rate": 2.022873805876057e-06, "loss": 1.254, "step": 9765 }, { "epoch": 2.44, "learning_rate": 2.0141024920989284e-06, "loss": 1.2354, "step": 9770 }, { "epoch": 2.45, "learning_rate": 2.0053481061414447e-06, "loss": 1.2049, "step": 9775 }, { "epoch": 2.45, "learning_rate": 1.996610666560399e-06, "loss": 1.2613, "step": 9780 }, { "epoch": 2.45, "learning_rate": 1.9878901918766704e-06, "loss": 1.1905, "step": 9785 }, { "epoch": 2.45, "learning_rate": 1.9791867005751685e-06, "loss": 1.1925, "step": 9790 }, { "epoch": 2.45, "learning_rate": 1.9705002111048077e-06, "loss": 1.2053, "step": 9795 }, { "epoch": 2.45, "learning_rate": 1.9618307418784634e-06, "loss": 1.2141, "step": 9800 }, { "epoch": 2.45, "learning_rate": 1.9531783112729297e-06, "loss": 1.2084, "step": 9805 }, { "epoch": 2.45, "learning_rate": 1.9445429376288893e-06, "loss": 1.2062, "step": 9810 }, { "epoch": 2.46, "learning_rate": 1.9359246392508603e-06, "loss": 1.2057, "step": 9815 }, { "epoch": 2.46, "learning_rate": 1.9273234344071745e-06, "loss": 1.2078, "step": 9820 }, { "epoch": 2.46, "learning_rate": 1.918739341329927e-06, "loss": 1.2008, "step": 9825 }, { "epoch": 2.46, "learning_rate": 1.9101723782149404e-06, "loss": 1.2473, "step": 9830 }, { "epoch": 2.46, "learning_rate": 1.9016225632217256e-06, "loss": 1.2009, "step": 9835 }, { "epoch": 2.46, "learning_rate": 1.8930899144734505e-06, "loss": 1.2285, "step": 9840 }, { "epoch": 2.46, "learning_rate": 1.8845744500568897e-06, "loss": 1.1935, "step": 9845 }, { "epoch": 2.46, "learning_rate": 1.876076188022392e-06, "loss": 1.2001, "step": 9850 }, { "epoch": 2.47, "learning_rate": 1.867595146383846e-06, "loss": 1.19, "step": 9855 }, { "epoch": 2.47, "learning_rate": 1.8591313431186308e-06, "loss": 1.1877, "step": 9860 }, { "epoch": 2.47, "learning_rate": 1.8506847961675923e-06, "loss": 1.1735, "step": 9865 }, { "epoch": 2.47, "learning_rate": 1.8422555234349936e-06, "loss": 1.1835, "step": 9870 }, { "epoch": 2.47, "learning_rate": 1.833843542788486e-06, "loss": 1.1944, "step": 9875 }, { "epoch": 2.47, "learning_rate": 1.8254488720590612e-06, "loss": 1.2579, "step": 9880 }, { "epoch": 2.47, "learning_rate": 1.8170715290410223e-06, "loss": 1.2102, "step": 9885 }, { "epoch": 2.47, "learning_rate": 1.8087115314919378e-06, "loss": 1.2393, "step": 9890 }, { "epoch": 2.48, "learning_rate": 1.8003688971326194e-06, "loss": 1.2573, "step": 9895 }, { "epoch": 2.48, "learning_rate": 1.7920436436470667e-06, "loss": 1.2286, "step": 9900 }, { "epoch": 2.48, "learning_rate": 1.7837357886824292e-06, "loss": 1.2201, "step": 9905 }, { "epoch": 2.48, "learning_rate": 1.7754453498489899e-06, "loss": 1.213, "step": 9910 }, { "epoch": 2.48, "learning_rate": 1.7671723447201083e-06, "loss": 1.2055, "step": 9915 }, { "epoch": 2.48, "learning_rate": 1.758916790832189e-06, "loss": 1.2042, "step": 9920 }, { "epoch": 2.48, "learning_rate": 1.7506787056846463e-06, "loss": 1.2216, "step": 9925 }, { "epoch": 2.48, "learning_rate": 1.742458106739867e-06, "loss": 1.2149, "step": 9930 }, { "epoch": 2.49, "learning_rate": 1.7342550114231692e-06, "loss": 1.1829, "step": 9935 }, { "epoch": 2.49, "learning_rate": 1.7260694371227705e-06, "loss": 1.2064, "step": 9940 }, { "epoch": 2.49, "learning_rate": 1.7179014011897487e-06, "loss": 1.2334, "step": 9945 }, { "epoch": 2.49, "learning_rate": 1.7097509209380036e-06, "loss": 1.1933, "step": 9950 }, { "epoch": 2.49, "learning_rate": 1.701618013644224e-06, "loss": 1.2437, "step": 9955 }, { "epoch": 2.49, "learning_rate": 1.6935026965478474e-06, "loss": 1.2209, "step": 9960 }, { "epoch": 2.49, "learning_rate": 1.685404986851027e-06, "loss": 1.1936, "step": 9965 }, { "epoch": 2.49, "learning_rate": 1.6773249017185923e-06, "loss": 1.2388, "step": 9970 }, { "epoch": 2.5, "learning_rate": 1.6692624582780136e-06, "loss": 1.1641, "step": 9975 }, { "epoch": 2.5, "learning_rate": 1.6612176736193652e-06, "loss": 1.2671, "step": 9980 }, { "epoch": 2.5, "learning_rate": 1.6531905647952928e-06, "loss": 1.1723, "step": 9985 }, { "epoch": 2.5, "learning_rate": 1.645181148820969e-06, "loss": 1.226, "step": 9990 }, { "epoch": 2.5, "learning_rate": 1.6371894426740686e-06, "loss": 1.2264, "step": 9995 }, { "epoch": 2.5, "learning_rate": 1.6292154632947233e-06, "loss": 1.1796, "step": 10000 }, { "epoch": 2.5, "learning_rate": 1.6212592275854887e-06, "loss": 1.2353, "step": 10005 }, { "epoch": 2.5, "learning_rate": 1.6133207524113115e-06, "loss": 1.2272, "step": 10010 }, { "epoch": 2.51, "learning_rate": 1.6054000545994885e-06, "loss": 1.2238, "step": 10015 }, { "epoch": 2.51, "learning_rate": 1.5974971509396342e-06, "loss": 1.1947, "step": 10020 }, { "epoch": 2.51, "learning_rate": 1.589612058183646e-06, "loss": 1.2184, "step": 10025 }, { "epoch": 2.51, "learning_rate": 1.5817447930456652e-06, "loss": 1.2132, "step": 10030 }, { "epoch": 2.51, "learning_rate": 1.5738953722020466e-06, "loss": 1.2221, "step": 10035 }, { "epoch": 2.51, "learning_rate": 1.5660638122913164e-06, "loss": 1.1844, "step": 10040 }, { "epoch": 2.51, "learning_rate": 1.5582501299141461e-06, "loss": 1.2017, "step": 10045 }, { "epoch": 2.51, "learning_rate": 1.550454341633304e-06, "loss": 1.2389, "step": 10050 }, { "epoch": 2.52, "learning_rate": 1.5426764639736313e-06, "loss": 1.2208, "step": 10055 }, { "epoch": 2.52, "learning_rate": 1.5349165134220113e-06, "loss": 1.2246, "step": 10060 }, { "epoch": 2.52, "learning_rate": 1.527174506427317e-06, "loss": 1.1771, "step": 10065 }, { "epoch": 2.52, "learning_rate": 1.5194504594003901e-06, "loss": 1.1896, "step": 10070 }, { "epoch": 2.52, "learning_rate": 1.5117443887140026e-06, "loss": 1.1773, "step": 10075 }, { "epoch": 2.52, "learning_rate": 1.5040563107028206e-06, "loss": 1.2304, "step": 10080 }, { "epoch": 2.52, "learning_rate": 1.4963862416633722e-06, "loss": 1.2427, "step": 10085 }, { "epoch": 2.52, "learning_rate": 1.4887341978540137e-06, "loss": 1.2021, "step": 10090 }, { "epoch": 2.53, "learning_rate": 1.4811001954948846e-06, "loss": 1.2598, "step": 10095 }, { "epoch": 2.53, "learning_rate": 1.4734842507678904e-06, "loss": 1.234, "step": 10100 }, { "epoch": 2.53, "learning_rate": 1.4658863798166567e-06, "loss": 1.1867, "step": 10105 }, { "epoch": 2.53, "learning_rate": 1.458306598746495e-06, "loss": 1.1668, "step": 10110 }, { "epoch": 2.53, "learning_rate": 1.4507449236243797e-06, "loss": 1.1675, "step": 10115 }, { "epoch": 2.53, "learning_rate": 1.4432013704788973e-06, "loss": 1.2377, "step": 10120 }, { "epoch": 2.53, "learning_rate": 1.435675955300223e-06, "loss": 1.2091, "step": 10125 }, { "epoch": 2.53, "learning_rate": 1.4281686940400874e-06, "loss": 1.2209, "step": 10130 }, { "epoch": 2.54, "learning_rate": 1.4206796026117385e-06, "loss": 1.2059, "step": 10135 }, { "epoch": 2.54, "learning_rate": 1.4132086968899062e-06, "loss": 1.2403, "step": 10140 }, { "epoch": 2.54, "learning_rate": 1.4057559927107767e-06, "loss": 1.2127, "step": 10145 }, { "epoch": 2.54, "learning_rate": 1.3983215058719512e-06, "loss": 1.2014, "step": 10150 }, { "epoch": 2.54, "learning_rate": 1.3909052521324174e-06, "loss": 1.1967, "step": 10155 }, { "epoch": 2.54, "learning_rate": 1.3835072472125122e-06, "loss": 1.2321, "step": 10160 }, { "epoch": 2.54, "learning_rate": 1.3761275067938917e-06, "loss": 1.1786, "step": 10165 }, { "epoch": 2.54, "learning_rate": 1.368766046519493e-06, "loss": 1.2327, "step": 10170 }, { "epoch": 2.55, "learning_rate": 1.3614228819935128e-06, "loss": 1.2408, "step": 10175 }, { "epoch": 2.55, "learning_rate": 1.3540980287813599e-06, "loss": 1.2247, "step": 10180 }, { "epoch": 2.55, "learning_rate": 1.3467915024096246e-06, "loss": 1.1726, "step": 10185 }, { "epoch": 2.55, "learning_rate": 1.3395033183660555e-06, "loss": 1.2186, "step": 10190 }, { "epoch": 2.55, "learning_rate": 1.3322334920995205e-06, "loss": 1.1854, "step": 10195 }, { "epoch": 2.55, "learning_rate": 1.324982039019972e-06, "loss": 1.2254, "step": 10200 }, { "epoch": 2.55, "learning_rate": 1.3177489744984184e-06, "loss": 1.2258, "step": 10205 }, { "epoch": 2.55, "learning_rate": 1.3105343138668868e-06, "loss": 1.1875, "step": 10210 }, { "epoch": 2.56, "learning_rate": 1.3033380724183976e-06, "loss": 1.2249, "step": 10215 }, { "epoch": 2.56, "learning_rate": 1.2961602654069228e-06, "loss": 1.2178, "step": 10220 }, { "epoch": 2.56, "learning_rate": 1.289000908047363e-06, "loss": 1.2393, "step": 10225 }, { "epoch": 2.56, "learning_rate": 1.2818600155155092e-06, "loss": 1.2067, "step": 10230 }, { "epoch": 2.56, "learning_rate": 1.2747376029480108e-06, "loss": 1.2474, "step": 10235 }, { "epoch": 2.56, "learning_rate": 1.2676336854423465e-06, "loss": 1.1985, "step": 10240 }, { "epoch": 2.56, "learning_rate": 1.2605482780567913e-06, "loss": 1.2184, "step": 10245 }, { "epoch": 2.56, "learning_rate": 1.253481395810382e-06, "loss": 1.2052, "step": 10250 }, { "epoch": 2.57, "learning_rate": 1.2464330536828906e-06, "loss": 1.2274, "step": 10255 }, { "epoch": 2.57, "learning_rate": 1.239403266614785e-06, "loss": 1.2094, "step": 10260 }, { "epoch": 2.57, "learning_rate": 1.2323920495072051e-06, "loss": 1.2251, "step": 10265 }, { "epoch": 2.57, "learning_rate": 1.2253994172219276e-06, "loss": 1.2164, "step": 10270 }, { "epoch": 2.57, "learning_rate": 1.218425384581332e-06, "loss": 1.2063, "step": 10275 }, { "epoch": 2.57, "learning_rate": 1.211469966368376e-06, "loss": 1.2151, "step": 10280 }, { "epoch": 2.57, "learning_rate": 1.2045331773265556e-06, "loss": 1.1862, "step": 10285 }, { "epoch": 2.57, "learning_rate": 1.1976150321598822e-06, "loss": 1.2264, "step": 10290 }, { "epoch": 2.58, "learning_rate": 1.1907155455328435e-06, "loss": 1.2054, "step": 10295 }, { "epoch": 2.58, "learning_rate": 1.1838347320703814e-06, "loss": 1.2331, "step": 10300 }, { "epoch": 2.58, "learning_rate": 1.1769726063578514e-06, "loss": 1.232, "step": 10305 }, { "epoch": 2.58, "learning_rate": 1.1701291829409988e-06, "loss": 1.2172, "step": 10310 }, { "epoch": 2.58, "learning_rate": 1.1633044763259238e-06, "loss": 1.227, "step": 10315 }, { "epoch": 2.58, "learning_rate": 1.1564985009790552e-06, "loss": 1.2142, "step": 10320 }, { "epoch": 2.58, "learning_rate": 1.149711271327114e-06, "loss": 1.2109, "step": 10325 }, { "epoch": 2.58, "learning_rate": 1.1429428017570887e-06, "loss": 1.2032, "step": 10330 }, { "epoch": 2.59, "learning_rate": 1.1361931066161936e-06, "loss": 1.2189, "step": 10335 }, { "epoch": 2.59, "learning_rate": 1.1294622002118593e-06, "loss": 1.2197, "step": 10340 }, { "epoch": 2.59, "learning_rate": 1.122750096811681e-06, "loss": 1.2441, "step": 10345 }, { "epoch": 2.59, "learning_rate": 1.1160568106433978e-06, "loss": 1.2144, "step": 10350 }, { "epoch": 2.59, "learning_rate": 1.1093823558948647e-06, "loss": 1.2352, "step": 10355 }, { "epoch": 2.59, "learning_rate": 1.1027267467140156e-06, "loss": 1.2138, "step": 10360 }, { "epoch": 2.59, "learning_rate": 1.0960899972088413e-06, "loss": 1.1846, "step": 10365 }, { "epoch": 2.59, "learning_rate": 1.0894721214473514e-06, "loss": 1.2255, "step": 10370 }, { "epoch": 2.6, "learning_rate": 1.0828731334575537e-06, "loss": 1.2272, "step": 10375 }, { "epoch": 2.6, "learning_rate": 1.0762930472274125e-06, "loss": 1.2052, "step": 10380 }, { "epoch": 2.6, "learning_rate": 1.0697318767048315e-06, "loss": 1.2144, "step": 10385 }, { "epoch": 2.6, "learning_rate": 1.0631896357976124e-06, "loss": 1.1947, "step": 10390 }, { "epoch": 2.6, "learning_rate": 1.056666338373442e-06, "loss": 1.191, "step": 10395 }, { "epoch": 2.6, "learning_rate": 1.0501619982598432e-06, "loss": 1.1864, "step": 10400 }, { "epoch": 2.6, "learning_rate": 1.043676629244157e-06, "loss": 1.1756, "step": 10405 }, { "epoch": 2.6, "learning_rate": 1.037210245073511e-06, "loss": 1.1998, "step": 10410 }, { "epoch": 2.61, "learning_rate": 1.0307628594547925e-06, "loss": 1.1891, "step": 10415 }, { "epoch": 2.61, "learning_rate": 1.0243344860546178e-06, "loss": 1.2361, "step": 10420 }, { "epoch": 2.61, "learning_rate": 1.017925138499295e-06, "loss": 1.2227, "step": 10425 }, { "epoch": 2.61, "learning_rate": 1.0115348303748128e-06, "loss": 1.1929, "step": 10430 }, { "epoch": 2.61, "learning_rate": 1.0051635752267952e-06, "loss": 1.236, "step": 10435 }, { "epoch": 2.61, "learning_rate": 9.988113865604843e-07, "loss": 1.2159, "step": 10440 }, { "epoch": 2.61, "learning_rate": 9.924782778406994e-07, "loss": 1.2412, "step": 10445 }, { "epoch": 2.61, "learning_rate": 9.861642624918256e-07, "loss": 1.2359, "step": 10450 }, { "epoch": 2.62, "learning_rate": 9.798693538977688e-07, "loss": 1.2342, "step": 10455 }, { "epoch": 2.62, "learning_rate": 9.735935654019346e-07, "loss": 1.2695, "step": 10460 }, { "epoch": 2.62, "learning_rate": 9.673369103072029e-07, "loss": 1.2182, "step": 10465 }, { "epoch": 2.62, "learning_rate": 9.610994018758901e-07, "loss": 1.2155, "step": 10470 }, { "epoch": 2.62, "learning_rate": 9.548810533297325e-07, "loss": 1.2453, "step": 10475 }, { "epoch": 2.62, "learning_rate": 9.486818778498519e-07, "loss": 1.2421, "step": 10480 }, { "epoch": 2.62, "learning_rate": 9.425018885767278e-07, "loss": 1.2887, "step": 10485 }, { "epoch": 2.62, "learning_rate": 9.363410986101695e-07, "loss": 1.2046, "step": 10490 }, { "epoch": 2.63, "learning_rate": 9.301995210092918e-07, "loss": 1.2219, "step": 10495 }, { "epoch": 2.63, "learning_rate": 9.240771687924821e-07, "loss": 1.2279, "step": 10500 }, { "epoch": 2.63, "learning_rate": 9.179740549373762e-07, "loss": 1.1949, "step": 10505 }, { "epoch": 2.63, "learning_rate": 9.118901923808365e-07, "loss": 1.2096, "step": 10510 }, { "epoch": 2.63, "learning_rate": 9.058255940189053e-07, "loss": 1.2004, "step": 10515 }, { "epoch": 2.63, "learning_rate": 8.997802727068006e-07, "loss": 1.2125, "step": 10520 }, { "epoch": 2.63, "learning_rate": 8.93754241258874e-07, "loss": 1.2135, "step": 10525 }, { "epoch": 2.63, "learning_rate": 8.877475124485901e-07, "loss": 1.2048, "step": 10530 }, { "epoch": 2.64, "learning_rate": 8.817600990084974e-07, "loss": 1.1951, "step": 10535 }, { "epoch": 2.64, "learning_rate": 8.75792013630199e-07, "loss": 1.1725, "step": 10540 }, { "epoch": 2.64, "learning_rate": 8.69843268964331e-07, "loss": 1.2434, "step": 10545 }, { "epoch": 2.64, "learning_rate": 8.639138776205302e-07, "loss": 1.2134, "step": 10550 }, { "epoch": 2.64, "learning_rate": 8.580038521674128e-07, "loss": 1.2303, "step": 10555 }, { "epoch": 2.64, "learning_rate": 8.521132051325432e-07, "loss": 1.203, "step": 10560 }, { "epoch": 2.64, "learning_rate": 8.462419490024065e-07, "loss": 1.2283, "step": 10565 }, { "epoch": 2.64, "learning_rate": 8.403900962223899e-07, "loss": 1.1832, "step": 10570 }, { "epoch": 2.65, "learning_rate": 8.345576591967463e-07, "loss": 1.2153, "step": 10575 }, { "epoch": 2.65, "learning_rate": 8.287446502885766e-07, "loss": 1.2195, "step": 10580 }, { "epoch": 2.65, "learning_rate": 8.229510818197961e-07, "loss": 1.2467, "step": 10585 }, { "epoch": 2.65, "learning_rate": 8.17176966071116e-07, "loss": 1.1875, "step": 10590 }, { "epoch": 2.65, "learning_rate": 8.114223152820078e-07, "loss": 1.1866, "step": 10595 }, { "epoch": 2.65, "learning_rate": 8.056871416506884e-07, "loss": 1.2309, "step": 10600 }, { "epoch": 2.65, "learning_rate": 7.999714573340855e-07, "loss": 1.1898, "step": 10605 }, { "epoch": 2.65, "learning_rate": 7.942752744478166e-07, "loss": 1.2014, "step": 10610 }, { "epoch": 2.66, "learning_rate": 7.885986050661576e-07, "loss": 1.2189, "step": 10615 }, { "epoch": 2.66, "learning_rate": 7.829414612220287e-07, "loss": 1.2177, "step": 10620 }, { "epoch": 2.66, "learning_rate": 7.773038549069567e-07, "loss": 1.2421, "step": 10625 }, { "epoch": 2.66, "learning_rate": 7.716857980710535e-07, "loss": 1.2286, "step": 10630 }, { "epoch": 2.66, "learning_rate": 7.660873026229953e-07, "loss": 1.2329, "step": 10635 }, { "epoch": 2.66, "learning_rate": 7.605083804299906e-07, "loss": 1.2221, "step": 10640 }, { "epoch": 2.66, "learning_rate": 7.549490433177609e-07, "loss": 1.2194, "step": 10645 }, { "epoch": 2.66, "learning_rate": 7.494093030705108e-07, "loss": 1.206, "step": 10650 }, { "epoch": 2.67, "learning_rate": 7.438891714309071e-07, "loss": 1.1964, "step": 10655 }, { "epoch": 2.67, "learning_rate": 7.383886601000479e-07, "loss": 1.1936, "step": 10660 }, { "epoch": 2.67, "learning_rate": 7.329077807374463e-07, "loss": 1.2165, "step": 10665 }, { "epoch": 2.67, "learning_rate": 7.274465449609969e-07, "loss": 1.2078, "step": 10670 }, { "epoch": 2.67, "learning_rate": 7.22004964346964e-07, "loss": 1.2431, "step": 10675 }, { "epoch": 2.67, "learning_rate": 7.165830504299387e-07, "loss": 1.228, "step": 10680 }, { "epoch": 2.67, "learning_rate": 7.111808147028299e-07, "loss": 1.2466, "step": 10685 }, { "epoch": 2.67, "learning_rate": 7.05798268616833e-07, "loss": 1.2406, "step": 10690 }, { "epoch": 2.68, "learning_rate": 7.004354235814059e-07, "loss": 1.2192, "step": 10695 }, { "epoch": 2.68, "learning_rate": 6.950922909642488e-07, "loss": 1.2238, "step": 10700 }, { "epoch": 2.68, "learning_rate": 6.897688820912729e-07, "loss": 1.1632, "step": 10705 }, { "epoch": 2.68, "learning_rate": 6.84465208246583e-07, "loss": 1.2128, "step": 10710 }, { "epoch": 2.68, "learning_rate": 6.79181280672454e-07, "loss": 1.2231, "step": 10715 }, { "epoch": 2.68, "learning_rate": 6.739171105692987e-07, "loss": 1.2338, "step": 10720 }, { "epoch": 2.68, "learning_rate": 6.686727090956535e-07, "loss": 1.2654, "step": 10725 }, { "epoch": 2.68, "learning_rate": 6.634480873681526e-07, "loss": 1.2201, "step": 10730 }, { "epoch": 2.69, "learning_rate": 6.582432564614993e-07, "loss": 1.1924, "step": 10735 }, { "epoch": 2.69, "learning_rate": 6.530582274084463e-07, "loss": 1.2118, "step": 10740 }, { "epoch": 2.69, "learning_rate": 6.47893011199775e-07, "loss": 1.1781, "step": 10745 }, { "epoch": 2.69, "learning_rate": 6.42747618784263e-07, "loss": 1.2255, "step": 10750 }, { "epoch": 2.69, "learning_rate": 6.376220610686734e-07, "loss": 1.2056, "step": 10755 }, { "epoch": 2.69, "learning_rate": 6.325163489177244e-07, "loss": 1.2519, "step": 10760 }, { "epoch": 2.69, "learning_rate": 6.274304931540643e-07, "loss": 1.23, "step": 10765 }, { "epoch": 2.69, "learning_rate": 6.223645045582549e-07, "loss": 1.2003, "step": 10770 }, { "epoch": 2.7, "learning_rate": 6.173183938687421e-07, "loss": 1.2812, "step": 10775 }, { "epoch": 2.7, "learning_rate": 6.122921717818375e-07, "loss": 1.2228, "step": 10780 }, { "epoch": 2.7, "learning_rate": 6.072858489516975e-07, "loss": 1.2133, "step": 10785 }, { "epoch": 2.7, "learning_rate": 6.022994359902957e-07, "loss": 1.2284, "step": 10790 }, { "epoch": 2.7, "learning_rate": 5.97332943467398e-07, "loss": 1.2501, "step": 10795 }, { "epoch": 2.7, "learning_rate": 5.923863819105513e-07, "loss": 1.2071, "step": 10800 }, { "epoch": 2.7, "learning_rate": 5.874597618050526e-07, "loss": 1.1823, "step": 10805 }, { "epoch": 2.7, "learning_rate": 5.825530935939261e-07, "loss": 1.2333, "step": 10810 }, { "epoch": 2.71, "learning_rate": 5.776663876779076e-07, "loss": 1.209, "step": 10815 }, { "epoch": 2.71, "learning_rate": 5.727996544154147e-07, "loss": 1.2094, "step": 10820 }, { "epoch": 2.71, "learning_rate": 5.679529041225318e-07, "loss": 1.2096, "step": 10825 }, { "epoch": 2.71, "learning_rate": 5.631261470729832e-07, "loss": 1.1935, "step": 10830 }, { "epoch": 2.71, "learning_rate": 5.583193934981146e-07, "loss": 1.2344, "step": 10835 }, { "epoch": 2.71, "learning_rate": 5.53532653586869e-07, "loss": 1.1948, "step": 10840 }, { "epoch": 2.71, "learning_rate": 5.487659374857668e-07, "loss": 1.2356, "step": 10845 }, { "epoch": 2.71, "learning_rate": 5.440192552988833e-07, "loss": 1.1916, "step": 10850 }, { "epoch": 2.72, "learning_rate": 5.392926170878277e-07, "loss": 1.2353, "step": 10855 }, { "epoch": 2.72, "learning_rate": 5.345860328717222e-07, "loss": 1.2342, "step": 10860 }, { "epoch": 2.72, "learning_rate": 5.298995126271789e-07, "loss": 1.1901, "step": 10865 }, { "epoch": 2.72, "learning_rate": 5.252330662882809e-07, "loss": 1.2272, "step": 10870 }, { "epoch": 2.72, "learning_rate": 5.205867037465606e-07, "loss": 1.1985, "step": 10875 }, { "epoch": 2.72, "learning_rate": 5.159604348509784e-07, "loss": 1.225, "step": 10880 }, { "epoch": 2.72, "learning_rate": 5.113542694079021e-07, "loss": 1.2324, "step": 10885 }, { "epoch": 2.72, "learning_rate": 5.06768217181085e-07, "loss": 1.2297, "step": 10890 }, { "epoch": 2.73, "learning_rate": 5.022022878916466e-07, "loss": 1.2081, "step": 10895 }, { "epoch": 2.73, "learning_rate": 4.976564912180526e-07, "loss": 1.2007, "step": 10900 }, { "epoch": 2.73, "learning_rate": 4.931308367960919e-07, "loss": 1.2192, "step": 10905 }, { "epoch": 2.73, "learning_rate": 4.886253342188574e-07, "loss": 1.2043, "step": 10910 }, { "epoch": 2.73, "learning_rate": 4.841399930367264e-07, "loss": 1.2311, "step": 10915 }, { "epoch": 2.73, "learning_rate": 4.796748227573411e-07, "loss": 1.2235, "step": 10920 }, { "epoch": 2.73, "learning_rate": 4.7522983284558513e-07, "loss": 1.2152, "step": 10925 }, { "epoch": 2.73, "learning_rate": 4.7080503272356693e-07, "loss": 1.1812, "step": 10930 }, { "epoch": 2.74, "learning_rate": 4.6640043177059725e-07, "loss": 1.217, "step": 10935 }, { "epoch": 2.74, "learning_rate": 4.620160393231732e-07, "loss": 1.2326, "step": 10940 }, { "epoch": 2.74, "learning_rate": 4.5765186467494837e-07, "loss": 1.1826, "step": 10945 }, { "epoch": 2.74, "learning_rate": 4.533079170767274e-07, "loss": 1.2128, "step": 10950 }, { "epoch": 2.74, "learning_rate": 4.489842057364391e-07, "loss": 1.212, "step": 10955 }, { "epoch": 2.74, "learning_rate": 4.4468073981911555e-07, "loss": 1.2254, "step": 10960 }, { "epoch": 2.74, "learning_rate": 4.40397528446872e-07, "loss": 1.265, "step": 10965 }, { "epoch": 2.74, "learning_rate": 4.3613458069889257e-07, "loss": 1.2472, "step": 10970 }, { "epoch": 2.75, "learning_rate": 4.3189190561140904e-07, "loss": 1.2362, "step": 10975 }, { "epoch": 2.75, "learning_rate": 4.276695121776786e-07, "loss": 1.1828, "step": 10980 }, { "epoch": 2.75, "learning_rate": 4.234674093479696e-07, "loss": 1.2195, "step": 10985 }, { "epoch": 2.75, "learning_rate": 4.192856060295358e-07, "loss": 1.1925, "step": 10990 }, { "epoch": 2.75, "learning_rate": 4.1512411108660775e-07, "loss": 1.1981, "step": 10995 }, { "epoch": 2.75, "learning_rate": 4.1098293334036255e-07, "loss": 1.216, "step": 11000 }, { "epoch": 2.75, "learning_rate": 4.068620815689139e-07, "loss": 1.1915, "step": 11005 }, { "epoch": 2.75, "learning_rate": 4.027615645072902e-07, "loss": 1.2144, "step": 11010 }, { "epoch": 2.76, "learning_rate": 3.986813908474152e-07, "loss": 1.1742, "step": 11015 }, { "epoch": 2.76, "learning_rate": 3.946215692380906e-07, "loss": 1.1993, "step": 11020 }, { "epoch": 2.76, "learning_rate": 3.90582108284977e-07, "loss": 1.2322, "step": 11025 }, { "epoch": 2.76, "learning_rate": 3.865630165505785e-07, "loss": 1.2331, "step": 11030 }, { "epoch": 2.76, "learning_rate": 3.8256430255421805e-07, "loss": 1.225, "step": 11035 }, { "epoch": 2.76, "learning_rate": 3.7858597477202777e-07, "loss": 1.2249, "step": 11040 }, { "epoch": 2.76, "learning_rate": 3.746280416369241e-07, "loss": 1.1942, "step": 11045 }, { "epoch": 2.76, "learning_rate": 3.7069051153859394e-07, "loss": 1.2156, "step": 11050 }, { "epoch": 2.77, "learning_rate": 3.6677339282347624e-07, "loss": 1.1983, "step": 11055 }, { "epoch": 2.77, "learning_rate": 3.628766937947414e-07, "loss": 1.2137, "step": 11060 }, { "epoch": 2.77, "learning_rate": 3.5900042271227897e-07, "loss": 1.2108, "step": 11065 }, { "epoch": 2.77, "learning_rate": 3.5514458779267514e-07, "loss": 1.2288, "step": 11070 }, { "epoch": 2.77, "learning_rate": 3.5130919720919865e-07, "loss": 1.233, "step": 11075 }, { "epoch": 2.77, "learning_rate": 3.474942590917774e-07, "loss": 1.2047, "step": 11080 }, { "epoch": 2.77, "learning_rate": 3.436997815269927e-07, "loss": 1.1715, "step": 11085 }, { "epoch": 2.77, "learning_rate": 3.399257725580518e-07, "loss": 1.2027, "step": 11090 }, { "epoch": 2.78, "learning_rate": 3.3617224018477335e-07, "loss": 1.2088, "step": 11095 }, { "epoch": 2.78, "learning_rate": 3.3243919236357503e-07, "loss": 1.2144, "step": 11100 }, { "epoch": 2.78, "learning_rate": 3.287266370074493e-07, "loss": 1.2316, "step": 11105 }, { "epoch": 2.78, "learning_rate": 3.2503458198595237e-07, "loss": 1.2022, "step": 11110 }, { "epoch": 2.78, "learning_rate": 3.2136303512518396e-07, "loss": 1.2393, "step": 11115 }, { "epoch": 2.78, "learning_rate": 3.177120042077786e-07, "loss": 1.1972, "step": 11120 }, { "epoch": 2.78, "learning_rate": 3.140814969728734e-07, "loss": 1.202, "step": 11125 }, { "epoch": 2.78, "learning_rate": 3.104715211161069e-07, "loss": 1.1677, "step": 11130 }, { "epoch": 2.79, "learning_rate": 3.0688208428959696e-07, "loss": 1.2491, "step": 11135 }, { "epoch": 2.79, "learning_rate": 3.0331319410192297e-07, "loss": 1.1845, "step": 11140 }, { "epoch": 2.79, "learning_rate": 2.997648581181112e-07, "loss": 1.2318, "step": 11145 }, { "epoch": 2.79, "learning_rate": 2.9623708385961956e-07, "loss": 1.2688, "step": 11150 }, { "epoch": 2.79, "learning_rate": 2.927298788043209e-07, "loss": 1.2294, "step": 11155 }, { "epoch": 2.79, "learning_rate": 2.892432503864884e-07, "loss": 1.2123, "step": 11160 }, { "epoch": 2.79, "learning_rate": 2.857772059967767e-07, "loss": 1.235, "step": 11165 }, { "epoch": 2.79, "learning_rate": 2.8233175298221005e-07, "loss": 1.2096, "step": 11170 }, { "epoch": 2.8, "learning_rate": 2.789068986461618e-07, "loss": 1.1731, "step": 11175 }, { "epoch": 2.8, "learning_rate": 2.755026502483449e-07, "loss": 1.2422, "step": 11180 }, { "epoch": 2.8, "learning_rate": 2.721190150047925e-07, "loss": 1.2165, "step": 11185 }, { "epoch": 2.8, "learning_rate": 2.687560000878453e-07, "loss": 1.2478, "step": 11190 }, { "epoch": 2.8, "learning_rate": 2.6541361262613307e-07, "loss": 1.2024, "step": 11195 }, { "epoch": 2.8, "learning_rate": 2.6209185970455963e-07, "loss": 1.2733, "step": 11200 }, { "epoch": 2.8, "learning_rate": 2.5879074836429375e-07, "loss": 1.2276, "step": 11205 }, { "epoch": 2.8, "learning_rate": 2.5551028560274803e-07, "loss": 1.1597, "step": 11210 }, { "epoch": 2.81, "learning_rate": 2.5225047837356354e-07, "loss": 1.2047, "step": 11215 }, { "epoch": 2.81, "learning_rate": 2.490113335866018e-07, "loss": 1.23, "step": 11220 }, { "epoch": 2.81, "learning_rate": 2.4579285810792054e-07, "loss": 1.1806, "step": 11225 }, { "epoch": 2.81, "learning_rate": 2.425950587597714e-07, "loss": 1.2063, "step": 11230 }, { "epoch": 2.81, "learning_rate": 2.3941794232057334e-07, "loss": 1.2058, "step": 11235 }, { "epoch": 2.81, "learning_rate": 2.3626151552490485e-07, "loss": 1.1915, "step": 11240 }, { "epoch": 2.81, "learning_rate": 2.3312578506348828e-07, "loss": 1.2629, "step": 11245 }, { "epoch": 2.81, "learning_rate": 2.3001075758317448e-07, "loss": 1.1962, "step": 11250 }, { "epoch": 2.82, "learning_rate": 2.269164396869339e-07, "loss": 1.2024, "step": 11255 }, { "epoch": 2.82, "learning_rate": 2.238428379338342e-07, "loss": 1.2191, "step": 11260 }, { "epoch": 2.82, "learning_rate": 2.2078995883903276e-07, "loss": 1.2477, "step": 11265 }, { "epoch": 2.82, "learning_rate": 2.1775780887376086e-07, "loss": 1.2188, "step": 11270 }, { "epoch": 2.82, "learning_rate": 2.147463944653072e-07, "loss": 1.2305, "step": 11275 }, { "epoch": 2.82, "learning_rate": 2.1175572199701233e-07, "loss": 1.227, "step": 11280 }, { "epoch": 2.82, "learning_rate": 2.0878579780824525e-07, "loss": 1.1789, "step": 11285 }, { "epoch": 2.82, "learning_rate": 2.0583662819439686e-07, "loss": 1.1595, "step": 11290 }, { "epoch": 2.83, "learning_rate": 2.0290821940686323e-07, "loss": 1.1932, "step": 11295 }, { "epoch": 2.83, "learning_rate": 2.000005776530345e-07, "loss": 1.197, "step": 11300 }, { "epoch": 2.83, "learning_rate": 1.9711370909627935e-07, "loss": 1.1964, "step": 11305 }, { "epoch": 2.83, "learning_rate": 1.94247619855934e-07, "loss": 1.1978, "step": 11310 }, { "epoch": 2.83, "learning_rate": 1.9140231600728866e-07, "loss": 1.1771, "step": 11315 }, { "epoch": 2.83, "learning_rate": 1.885778035815722e-07, "loss": 1.182, "step": 11320 }, { "epoch": 2.83, "learning_rate": 1.8577408856594536e-07, "loss": 1.2523, "step": 11325 }, { "epoch": 2.83, "learning_rate": 1.82991176903482e-07, "loss": 1.2297, "step": 11330 }, { "epoch": 2.84, "learning_rate": 1.8022907449316007e-07, "loss": 1.2305, "step": 11335 }, { "epoch": 2.84, "learning_rate": 1.7748778718984394e-07, "loss": 1.2029, "step": 11340 }, { "epoch": 2.84, "learning_rate": 1.7476732080428215e-07, "loss": 1.2182, "step": 11345 }, { "epoch": 2.84, "learning_rate": 1.7206768110308524e-07, "loss": 1.2272, "step": 11350 }, { "epoch": 2.84, "learning_rate": 1.6938887380871683e-07, "loss": 1.2174, "step": 11355 }, { "epoch": 2.84, "learning_rate": 1.667309045994825e-07, "loss": 1.2409, "step": 11360 }, { "epoch": 2.84, "learning_rate": 1.6409377910951763e-07, "loss": 1.2352, "step": 11365 }, { "epoch": 2.84, "learning_rate": 1.6147750292877296e-07, "loss": 1.2629, "step": 11370 }, { "epoch": 2.85, "learning_rate": 1.588820816030079e-07, "loss": 1.177, "step": 11375 }, { "epoch": 2.85, "learning_rate": 1.5630752063377274e-07, "loss": 1.2109, "step": 11380 }, { "epoch": 2.85, "learning_rate": 1.5375382547840102e-07, "loss": 1.259, "step": 11385 }, { "epoch": 2.85, "learning_rate": 1.5122100154999597e-07, "loss": 1.1901, "step": 11390 }, { "epoch": 2.85, "learning_rate": 1.4870905421741967e-07, "loss": 1.235, "step": 11395 }, { "epoch": 2.85, "learning_rate": 1.4621798880528505e-07, "loss": 1.2135, "step": 11400 }, { "epoch": 2.85, "learning_rate": 1.4374781059393495e-07, "loss": 1.2716, "step": 11405 }, { "epoch": 2.85, "learning_rate": 1.4129852481944317e-07, "loss": 1.1912, "step": 11410 }, { "epoch": 2.86, "learning_rate": 1.388701366735956e-07, "loss": 1.2086, "step": 11415 }, { "epoch": 2.86, "learning_rate": 1.3646265130387916e-07, "loss": 1.2066, "step": 11420 }, { "epoch": 2.86, "learning_rate": 1.3407607381347609e-07, "loss": 1.1874, "step": 11425 }, { "epoch": 2.86, "learning_rate": 1.3171040926124757e-07, "loss": 1.2009, "step": 11430 }, { "epoch": 2.86, "learning_rate": 1.2936566266172568e-07, "loss": 1.204, "step": 11435 }, { "epoch": 2.86, "learning_rate": 1.270418389851047e-07, "loss": 1.2233, "step": 11440 }, { "epoch": 2.86, "learning_rate": 1.2473894315722434e-07, "loss": 1.2269, "step": 11445 }, { "epoch": 2.86, "learning_rate": 1.2245698005956762e-07, "loss": 1.2131, "step": 11450 }, { "epoch": 2.87, "learning_rate": 1.2019595452924193e-07, "loss": 1.2056, "step": 11455 }, { "epoch": 2.87, "learning_rate": 1.1795587135897568e-07, "loss": 1.2114, "step": 11460 }, { "epoch": 2.87, "learning_rate": 1.1573673529710506e-07, "loss": 1.2214, "step": 11465 }, { "epoch": 2.87, "learning_rate": 1.1353855104756395e-07, "loss": 1.2326, "step": 11470 }, { "epoch": 2.87, "learning_rate": 1.1136132326987403e-07, "loss": 1.2284, "step": 11475 }, { "epoch": 2.87, "learning_rate": 1.0920505657913694e-07, "loss": 1.2452, "step": 11480 }, { "epoch": 2.87, "learning_rate": 1.0706975554601983e-07, "loss": 1.2177, "step": 11485 }, { "epoch": 2.87, "learning_rate": 1.0495542469675213e-07, "loss": 1.2432, "step": 11490 }, { "epoch": 2.88, "learning_rate": 1.0286206851310987e-07, "loss": 1.2204, "step": 11495 }, { "epoch": 2.88, "learning_rate": 1.0078969143241024e-07, "loss": 1.2253, "step": 11500 }, { "epoch": 2.88, "learning_rate": 9.873829784749933e-08, "loss": 1.202, "step": 11505 }, { "epoch": 2.88, "learning_rate": 9.670789210674547e-08, "loss": 1.2132, "step": 11510 }, { "epoch": 2.88, "learning_rate": 9.469847851402924e-08, "loss": 1.2229, "step": 11515 }, { "epoch": 2.88, "learning_rate": 9.27100613287324e-08, "loss": 1.2361, "step": 11520 }, { "epoch": 2.88, "learning_rate": 9.074264476573003e-08, "loss": 1.2348, "step": 11525 }, { "epoch": 2.88, "learning_rate": 8.879623299538398e-08, "loss": 1.2372, "step": 11530 }, { "epoch": 2.89, "learning_rate": 8.68708301435306e-08, "loss": 1.1973, "step": 11535 }, { "epoch": 2.89, "learning_rate": 8.496644029147184e-08, "loss": 1.1942, "step": 11540 }, { "epoch": 2.89, "learning_rate": 8.308306747597306e-08, "loss": 1.2046, "step": 11545 }, { "epoch": 2.89, "learning_rate": 8.122071568924305e-08, "loss": 1.2065, "step": 11550 }, { "epoch": 2.89, "learning_rate": 7.937938887893626e-08, "loss": 1.2092, "step": 11555 }, { "epoch": 2.89, "learning_rate": 7.75590909481394e-08, "loss": 1.2493, "step": 11560 }, { "epoch": 2.89, "learning_rate": 7.575982575536267e-08, "loss": 1.2344, "step": 11565 }, { "epoch": 2.89, "learning_rate": 7.398159711453634e-08, "loss": 1.2059, "step": 11570 }, { "epoch": 2.9, "learning_rate": 7.222440879499415e-08, "loss": 1.2026, "step": 11575 }, { "epoch": 2.9, "learning_rate": 7.048826452147329e-08, "loss": 1.2069, "step": 11580 }, { "epoch": 2.9, "learning_rate": 6.877316797410549e-08, "loss": 1.1986, "step": 11585 }, { "epoch": 2.9, "learning_rate": 6.707912278840267e-08, "loss": 1.2067, "step": 11590 }, { "epoch": 2.9, "learning_rate": 6.540613255525796e-08, "loss": 1.2362, "step": 11595 }, { "epoch": 2.9, "learning_rate": 6.37542008209302e-08, "loss": 1.1966, "step": 11600 }, { "epoch": 2.9, "learning_rate": 6.212333108704505e-08, "loss": 1.208, "step": 11605 }, { "epoch": 2.9, "learning_rate": 6.051352681057831e-08, "loss": 1.2273, "step": 11610 }, { "epoch": 2.91, "learning_rate": 5.8924791403854876e-08, "loss": 1.2254, "step": 11615 }, { "epoch": 2.91, "learning_rate": 5.735712823453976e-08, "loss": 1.1853, "step": 11620 }, { "epoch": 2.91, "learning_rate": 5.5810540625632625e-08, "loss": 1.2131, "step": 11625 }, { "epoch": 2.91, "learning_rate": 5.428503185545442e-08, "loss": 1.273, "step": 11630 }, { "epoch": 2.91, "learning_rate": 5.27806051576496e-08, "loss": 1.2154, "step": 11635 }, { "epoch": 2.91, "learning_rate": 5.129726372117061e-08, "loss": 1.222, "step": 11640 }, { "epoch": 2.91, "learning_rate": 4.9835010690278965e-08, "loss": 1.2266, "step": 11645 }, { "epoch": 2.91, "learning_rate": 4.839384916453194e-08, "loss": 1.2142, "step": 11650 }, { "epoch": 2.92, "learning_rate": 4.6973782198780346e-08, "loss": 1.2123, "step": 11655 }, { "epoch": 2.92, "learning_rate": 4.5574812803160786e-08, "loss": 1.2002, "step": 11660 }, { "epoch": 2.92, "learning_rate": 4.419694394308782e-08, "loss": 1.2489, "step": 11665 }, { "epoch": 2.92, "learning_rate": 4.28401785392496e-08, "loss": 1.2534, "step": 11670 }, { "epoch": 2.92, "learning_rate": 4.1504519467601146e-08, "loss": 1.2275, "step": 11675 }, { "epoch": 2.92, "learning_rate": 4.018996955935772e-08, "loss": 1.234, "step": 11680 }, { "epoch": 2.92, "learning_rate": 3.889653160098816e-08, "loss": 1.2255, "step": 11685 }, { "epoch": 2.92, "learning_rate": 3.762420833421265e-08, "loss": 1.212, "step": 11690 }, { "epoch": 2.93, "learning_rate": 3.6373002455992734e-08, "loss": 1.2093, "step": 11695 }, { "epoch": 2.93, "learning_rate": 3.514291661852687e-08, "loss": 1.2092, "step": 11700 }, { "epoch": 2.93, "learning_rate": 3.3933953429244884e-08, "loss": 1.1918, "step": 11705 }, { "epoch": 2.93, "learning_rate": 3.274611545080353e-08, "loss": 1.2321, "step": 11710 }, { "epoch": 2.93, "learning_rate": 3.1579405201079826e-08, "loss": 1.2152, "step": 11715 }, { "epoch": 2.93, "learning_rate": 3.043382515316551e-08, "loss": 1.189, "step": 11720 }, { "epoch": 2.93, "learning_rate": 2.9309377735362578e-08, "loss": 1.2432, "step": 11725 }, { "epoch": 2.93, "learning_rate": 2.8206065331179978e-08, "loss": 1.2246, "step": 11730 }, { "epoch": 2.94, "learning_rate": 2.7123890279322495e-08, "loss": 1.2078, "step": 11735 }, { "epoch": 2.94, "learning_rate": 2.6062854873691866e-08, "loss": 1.2295, "step": 11740 }, { "epoch": 2.94, "learning_rate": 2.5022961363381227e-08, "loss": 1.2207, "step": 11745 }, { "epoch": 2.94, "learning_rate": 2.4004211952666223e-08, "loss": 1.2077, "step": 11750 }, { "epoch": 2.94, "learning_rate": 2.3006608801006136e-08, "loss": 1.2256, "step": 11755 }, { "epoch": 2.94, "learning_rate": 2.203015402303166e-08, "loss": 1.2325, "step": 11760 }, { "epoch": 2.94, "learning_rate": 2.1074849688550446e-08, "loss": 1.1954, "step": 11765 }, { "epoch": 2.94, "learning_rate": 2.0140697822533807e-08, "loss": 1.2344, "step": 11770 }, { "epoch": 2.95, "learning_rate": 1.922770040511557e-08, "loss": 1.1843, "step": 11775 }, { "epoch": 2.95, "learning_rate": 1.833585937158988e-08, "loss": 1.1996, "step": 11780 }, { "epoch": 2.95, "learning_rate": 1.7465176612405656e-08, "loss": 1.2203, "step": 11785 }, { "epoch": 2.95, "learning_rate": 1.6615653973159895e-08, "loss": 1.2383, "step": 11790 }, { "epoch": 2.95, "learning_rate": 1.5787293254598822e-08, "loss": 1.2293, "step": 11795 }, { "epoch": 2.95, "learning_rate": 1.4980096212608985e-08, "loss": 1.2038, "step": 11800 }, { "epoch": 2.95, "learning_rate": 1.4194064558219478e-08, "loss": 1.2103, "step": 11805 }, { "epoch": 2.95, "learning_rate": 1.3429199957590844e-08, "loss": 1.2211, "step": 11810 }, { "epoch": 2.96, "learning_rate": 1.2685504032019514e-08, "loss": 1.1878, "step": 11815 }, { "epoch": 2.96, "learning_rate": 1.1962978357925593e-08, "loss": 1.1948, "step": 11820 }, { "epoch": 2.96, "learning_rate": 1.1261624466858411e-08, "loss": 1.2422, "step": 11825 }, { "epoch": 2.96, "learning_rate": 1.058144384548765e-08, "loss": 1.2327, "step": 11830 }, { "epoch": 2.96, "learning_rate": 9.922437935601104e-09, "loss": 1.1815, "step": 11835 }, { "epoch": 2.96, "learning_rate": 9.28460813410359e-09, "loss": 1.2487, "step": 11840 }, { "epoch": 2.96, "learning_rate": 8.667955793011384e-09, "loss": 1.1774, "step": 11845 }, { "epoch": 2.96, "learning_rate": 8.072482219452227e-09, "loss": 1.2423, "step": 11850 }, { "epoch": 2.97, "learning_rate": 7.498188675658658e-09, "loss": 1.2548, "step": 11855 }, { "epoch": 2.97, "learning_rate": 6.945076378969129e-09, "loss": 1.165, "step": 11860 }, { "epoch": 2.97, "learning_rate": 6.413146501824674e-09, "loss": 1.2387, "step": 11865 }, { "epoch": 2.97, "learning_rate": 5.902400171762246e-09, "loss": 1.1689, "step": 11870 }, { "epoch": 2.97, "learning_rate": 5.412838471420267e-09, "loss": 1.195, "step": 11875 }, { "epoch": 2.97, "learning_rate": 4.944462438528641e-09, "loss": 1.2161, "step": 11880 }, { "epoch": 2.97, "learning_rate": 4.497273065910968e-09, "loss": 1.2272, "step": 11885 }, { "epoch": 2.97, "learning_rate": 4.071271301480107e-09, "loss": 1.2012, "step": 11890 }, { "epoch": 2.98, "learning_rate": 3.6664580482392852e-09, "loss": 1.2359, "step": 11895 }, { "epoch": 2.98, "learning_rate": 3.282834164275439e-09, "loss": 1.2254, "step": 11900 }, { "epoch": 2.98, "learning_rate": 2.9204004627625404e-09, "loss": 1.2317, "step": 11905 }, { "epoch": 2.98, "learning_rate": 2.5791577119560484e-09, "loss": 1.2238, "step": 11910 }, { "epoch": 2.98, "learning_rate": 2.2591066351929093e-09, "loss": 1.2234, "step": 11915 }, { "epoch": 2.98, "learning_rate": 1.9602479108904448e-09, "loss": 1.1968, "step": 11920 }, { "epoch": 2.98, "learning_rate": 1.6825821725430236e-09, "loss": 1.2227, "step": 11925 }, { "epoch": 2.98, "learning_rate": 1.4261100087231694e-09, "loss": 1.1756, "step": 11930 }, { "epoch": 2.99, "learning_rate": 1.1908319630782316e-09, "loss": 1.1996, "step": 11935 }, { "epoch": 2.99, "learning_rate": 9.76748534330385e-10, "loss": 1.2148, "step": 11940 }, { "epoch": 2.99, "learning_rate": 7.838601762755194e-10, "loss": 1.2056, "step": 11945 }, { "epoch": 2.99, "learning_rate": 6.121672977821292e-10, "loss": 1.2297, "step": 11950 }, { "epoch": 2.99, "learning_rate": 4.616702627890934e-10, "loss": 1.2005, "step": 11955 }, { "epoch": 2.99, "learning_rate": 3.3236939030789615e-10, "loss": 1.2425, "step": 11960 }, { "epoch": 2.99, "learning_rate": 2.242649544192954e-10, "loss": 1.2216, "step": 11965 }, { "epoch": 2.99, "learning_rate": 1.3735718427332346e-10, "loss": 1.208, "step": 11970 }, { "epoch": 3.0, "learning_rate": 7.164626409039699e-11, "loss": 1.2005, "step": 11975 }, { "epoch": 3.0, "learning_rate": 2.7132333157986466e-11, "loss": 1.2235, "step": 11980 }, { "epoch": 3.0, "learning_rate": 3.815485832836529e-12, "loss": 1.2092, "step": 11985 }, { "epoch": 3.0, "eval_loss": 1.2043143510818481, "eval_runtime": 1766.7542, "eval_samples_per_second": 16.022, "eval_steps_per_second": 1.002, "step": 11988 }, { "epoch": 3.0, "step": 11988, "total_flos": 7988601421824000.0, "train_loss": 1.2296489907933905, "train_runtime": 137473.834, "train_samples_per_second": 5.582, "train_steps_per_second": 0.087 } ], "logging_steps": 5, "max_steps": 11988, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 7988601421824000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }