{ "best_metric": null, "best_model_checkpoint": null, "epoch": 45.0, "global_step": 6300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.00047619047619047614, "loss": 0.4551, "step": 10 }, { "epoch": 0.14, "learning_rate": 0.0009523809523809523, "loss": 0.4547, "step": 20 }, { "epoch": 0.21, "learning_rate": 0.0014285714285714284, "loss": 0.4544, "step": 30 }, { "epoch": 0.29, "learning_rate": 0.0019047619047619045, "loss": 0.4511, "step": 40 }, { "epoch": 0.36, "learning_rate": 0.0023809523809523807, "loss": 0.4473, "step": 50 }, { "epoch": 0.43, "learning_rate": 0.0028571428571428567, "loss": 0.4409, "step": 60 }, { "epoch": 0.5, "learning_rate": 0.003333333333333333, "loss": 0.4332, "step": 70 }, { "epoch": 0.57, "learning_rate": 0.003809523809523809, "loss": 0.4166, "step": 80 }, { "epoch": 0.64, "learning_rate": 0.004285714285714285, "loss": 0.391, "step": 90 }, { "epoch": 0.71, "learning_rate": 0.0047619047619047615, "loss": 0.3292, "step": 100 }, { "epoch": 0.79, "learning_rate": 0.005238095238095238, "loss": 0.2205, "step": 110 }, { "epoch": 0.86, "learning_rate": 0.005714285714285713, "loss": 0.1464, "step": 120 }, { "epoch": 0.93, "learning_rate": 0.00619047619047619, "loss": 0.1104, "step": 130 }, { "epoch": 1.0, "learning_rate": 0.006666666666666666, "loss": 0.0839, "step": 140 }, { "epoch": 1.07, "learning_rate": 0.007142857142857142, "loss": 0.0637, "step": 150 }, { "epoch": 1.14, "learning_rate": 0.007619047619047618, "loss": 0.0519, "step": 160 }, { "epoch": 1.21, "learning_rate": 0.008095238095238095, "loss": 0.0478, "step": 170 }, { "epoch": 1.29, "learning_rate": 0.00857142857142857, "loss": 0.045, "step": 180 }, { "epoch": 1.36, "learning_rate": 0.009047619047619047, "loss": 0.0432, "step": 190 }, { "epoch": 1.43, "learning_rate": 0.009523809523809523, "loss": 0.0399, "step": 200 }, { "epoch": 1.5, "learning_rate": 0.009999999999999998, "loss": 0.0373, "step": 210 }, { "epoch": 1.57, "learning_rate": 0.010476190476190476, "loss": 0.038, "step": 220 }, { "epoch": 1.64, "learning_rate": 0.010952380952380951, "loss": 0.0349, "step": 230 }, { "epoch": 1.71, "learning_rate": 0.011428571428571427, "loss": 0.0335, "step": 240 }, { "epoch": 1.79, "learning_rate": 0.011904761904761904, "loss": 0.0344, "step": 250 }, { "epoch": 1.86, "learning_rate": 0.01238095238095238, "loss": 0.0298, "step": 260 }, { "epoch": 1.93, "learning_rate": 0.012857142857142855, "loss": 0.0312, "step": 270 }, { "epoch": 2.0, "learning_rate": 0.013333333333333332, "loss": 0.0287, "step": 280 }, { "epoch": 2.07, "learning_rate": 0.013809523809523808, "loss": 0.0275, "step": 290 }, { "epoch": 2.14, "learning_rate": 0.014285714285714284, "loss": 0.0275, "step": 300 }, { "epoch": 2.21, "learning_rate": 0.01476190476190476, "loss": 0.0258, "step": 310 }, { "epoch": 2.29, "learning_rate": 0.015238095238095236, "loss": 0.0259, "step": 320 }, { "epoch": 2.36, "learning_rate": 0.015714285714285715, "loss": 0.0255, "step": 330 }, { "epoch": 2.43, "learning_rate": 0.01619047619047619, "loss": 0.0242, "step": 340 }, { "epoch": 2.5, "learning_rate": 0.016666666666666666, "loss": 0.0262, "step": 350 }, { "epoch": 2.57, "learning_rate": 0.01714285714285714, "loss": 0.0254, "step": 360 }, { "epoch": 2.64, "learning_rate": 0.017619047619047618, "loss": 0.0232, "step": 370 }, { "epoch": 2.71, "learning_rate": 0.018095238095238095, "loss": 0.0241, "step": 380 }, { "epoch": 2.79, "learning_rate": 0.018571428571428572, "loss": 0.0227, "step": 390 }, { "epoch": 2.86, "learning_rate": 0.019047619047619046, "loss": 0.0242, "step": 400 }, { "epoch": 2.93, "learning_rate": 0.019523809523809523, "loss": 0.0236, "step": 410 }, { "epoch": 3.0, "learning_rate": 0.019999999999999997, "loss": 0.023, "step": 420 }, { "epoch": 3.07, "learning_rate": 0.020476190476190478, "loss": 0.0213, "step": 430 }, { "epoch": 3.14, "learning_rate": 0.02095238095238095, "loss": 0.0214, "step": 440 }, { "epoch": 3.21, "learning_rate": 0.02142857142857143, "loss": 0.0219, "step": 450 }, { "epoch": 3.29, "learning_rate": 0.021904761904761903, "loss": 0.0206, "step": 460 }, { "epoch": 3.36, "learning_rate": 0.02238095238095238, "loss": 0.0214, "step": 470 }, { "epoch": 3.43, "learning_rate": 0.022857142857142854, "loss": 0.0213, "step": 480 }, { "epoch": 3.5, "learning_rate": 0.023333333333333334, "loss": 0.0212, "step": 490 }, { "epoch": 3.57, "learning_rate": 0.023809523809523808, "loss": 0.0209, "step": 500 }, { "epoch": 3.64, "learning_rate": 0.024285714285714285, "loss": 0.0198, "step": 510 }, { "epoch": 3.71, "learning_rate": 0.02476190476190476, "loss": 0.0206, "step": 520 }, { "epoch": 3.79, "learning_rate": 0.025238095238095237, "loss": 0.0195, "step": 530 }, { "epoch": 3.86, "learning_rate": 0.02571428571428571, "loss": 0.0223, "step": 540 }, { "epoch": 3.93, "learning_rate": 0.02619047619047619, "loss": 0.0207, "step": 550 }, { "epoch": 4.0, "learning_rate": 0.026666666666666665, "loss": 0.0197, "step": 560 }, { "epoch": 4.07, "learning_rate": 0.027142857142857142, "loss": 0.0209, "step": 570 }, { "epoch": 4.14, "learning_rate": 0.027619047619047616, "loss": 0.0188, "step": 580 }, { "epoch": 4.21, "learning_rate": 0.028095238095238093, "loss": 0.0181, "step": 590 }, { "epoch": 4.29, "learning_rate": 0.028571428571428567, "loss": 0.0184, "step": 600 }, { "epoch": 4.36, "learning_rate": 0.029047619047619048, "loss": 0.0178, "step": 610 }, { "epoch": 4.43, "learning_rate": 0.02952380952380952, "loss": 0.0177, "step": 620 }, { "epoch": 4.5, "learning_rate": 0.03, "loss": 0.0169, "step": 630 }, { "epoch": 4.57, "learning_rate": 0.029947089947089944, "loss": 0.0169, "step": 640 }, { "epoch": 4.64, "learning_rate": 0.029894179894179893, "loss": 0.017, "step": 650 }, { "epoch": 4.71, "learning_rate": 0.02984126984126984, "loss": 0.0162, "step": 660 }, { "epoch": 4.79, "learning_rate": 0.029788359788359788, "loss": 0.0159, "step": 670 }, { "epoch": 4.86, "learning_rate": 0.029735449735449733, "loss": 0.0143, "step": 680 }, { "epoch": 4.93, "learning_rate": 0.029682539682539682, "loss": 0.0127, "step": 690 }, { "epoch": 5.0, "learning_rate": 0.029629629629629627, "loss": 0.0135, "step": 700 }, { "epoch": 5.07, "learning_rate": 0.029576719576719576, "loss": 0.0137, "step": 710 }, { "epoch": 5.14, "learning_rate": 0.02952380952380952, "loss": 0.0124, "step": 720 }, { "epoch": 5.21, "learning_rate": 0.02947089947089947, "loss": 0.0125, "step": 730 }, { "epoch": 5.29, "learning_rate": 0.029417989417989416, "loss": 0.0127, "step": 740 }, { "epoch": 5.36, "learning_rate": 0.029365079365079365, "loss": 0.0118, "step": 750 }, { "epoch": 5.43, "learning_rate": 0.02931216931216931, "loss": 0.013, "step": 760 }, { "epoch": 5.5, "learning_rate": 0.02925925925925926, "loss": 0.0116, "step": 770 }, { "epoch": 5.57, "learning_rate": 0.029206349206349205, "loss": 0.0111, "step": 780 }, { "epoch": 5.64, "learning_rate": 0.029153439153439153, "loss": 0.0103, "step": 790 }, { "epoch": 5.71, "learning_rate": 0.0291005291005291, "loss": 0.011, "step": 800 }, { "epoch": 5.79, "learning_rate": 0.029047619047619048, "loss": 0.0102, "step": 810 }, { "epoch": 5.86, "learning_rate": 0.028994708994708993, "loss": 0.0097, "step": 820 }, { "epoch": 5.93, "learning_rate": 0.028941798941798942, "loss": 0.0094, "step": 830 }, { "epoch": 6.0, "learning_rate": 0.028888888888888888, "loss": 0.0102, "step": 840 }, { "epoch": 6.07, "learning_rate": 0.028835978835978836, "loss": 0.0091, "step": 850 }, { "epoch": 6.14, "learning_rate": 0.028783068783068782, "loss": 0.0086, "step": 860 }, { "epoch": 6.21, "learning_rate": 0.02873015873015873, "loss": 0.0096, "step": 870 }, { "epoch": 6.29, "learning_rate": 0.028677248677248676, "loss": 0.0091, "step": 880 }, { "epoch": 6.36, "learning_rate": 0.02862433862433862, "loss": 0.0076, "step": 890 }, { "epoch": 6.43, "learning_rate": 0.028571428571428567, "loss": 0.009, "step": 900 }, { "epoch": 6.5, "learning_rate": 0.028518518518518516, "loss": 0.0081, "step": 910 }, { "epoch": 6.57, "learning_rate": 0.02846560846560846, "loss": 0.0076, "step": 920 }, { "epoch": 6.64, "learning_rate": 0.02841269841269841, "loss": 0.0078, "step": 930 }, { "epoch": 6.71, "learning_rate": 0.028359788359788356, "loss": 0.0081, "step": 940 }, { "epoch": 6.79, "learning_rate": 0.028306878306878305, "loss": 0.0076, "step": 950 }, { "epoch": 6.86, "learning_rate": 0.02825396825396825, "loss": 0.0071, "step": 960 }, { "epoch": 6.93, "learning_rate": 0.0282010582010582, "loss": 0.0072, "step": 970 }, { "epoch": 7.0, "learning_rate": 0.028148148148148144, "loss": 0.0068, "step": 980 }, { "epoch": 7.07, "learning_rate": 0.028095238095238093, "loss": 0.0071, "step": 990 }, { "epoch": 7.14, "learning_rate": 0.02804232804232804, "loss": 0.0071, "step": 1000 }, { "epoch": 7.21, "learning_rate": 0.027989417989417988, "loss": 0.0066, "step": 1010 }, { "epoch": 7.29, "learning_rate": 0.027936507936507933, "loss": 0.0065, "step": 1020 }, { "epoch": 7.36, "learning_rate": 0.027883597883597882, "loss": 0.0067, "step": 1030 }, { "epoch": 7.43, "learning_rate": 0.027830687830687827, "loss": 0.0061, "step": 1040 }, { "epoch": 7.5, "learning_rate": 0.027777777777777776, "loss": 0.0063, "step": 1050 }, { "epoch": 7.57, "learning_rate": 0.02772486772486772, "loss": 0.0062, "step": 1060 }, { "epoch": 7.64, "learning_rate": 0.02767195767195767, "loss": 0.0061, "step": 1070 }, { "epoch": 7.71, "learning_rate": 0.027619047619047616, "loss": 0.0064, "step": 1080 }, { "epoch": 7.79, "learning_rate": 0.027566137566137565, "loss": 0.0069, "step": 1090 }, { "epoch": 7.86, "learning_rate": 0.02751322751322751, "loss": 0.0066, "step": 1100 }, { "epoch": 7.93, "learning_rate": 0.02746031746031746, "loss": 0.0064, "step": 1110 }, { "epoch": 8.0, "learning_rate": 0.027407407407407405, "loss": 0.0058, "step": 1120 }, { "epoch": 8.07, "learning_rate": 0.027354497354497354, "loss": 0.0057, "step": 1130 }, { "epoch": 8.14, "learning_rate": 0.0273015873015873, "loss": 0.006, "step": 1140 }, { "epoch": 8.21, "learning_rate": 0.027248677248677248, "loss": 0.0059, "step": 1150 }, { "epoch": 8.29, "learning_rate": 0.027195767195767193, "loss": 0.0056, "step": 1160 }, { "epoch": 8.36, "learning_rate": 0.027142857142857142, "loss": 0.0061, "step": 1170 }, { "epoch": 8.43, "learning_rate": 0.027089947089947088, "loss": 0.0062, "step": 1180 }, { "epoch": 8.5, "learning_rate": 0.027037037037037037, "loss": 0.0059, "step": 1190 }, { "epoch": 8.57, "learning_rate": 0.026984126984126982, "loss": 0.0053, "step": 1200 }, { "epoch": 8.64, "learning_rate": 0.02693121693121693, "loss": 0.0058, "step": 1210 }, { "epoch": 8.71, "learning_rate": 0.026878306878306876, "loss": 0.005, "step": 1220 }, { "epoch": 8.79, "learning_rate": 0.026825396825396825, "loss": 0.0049, "step": 1230 }, { "epoch": 8.86, "learning_rate": 0.02677248677248677, "loss": 0.0053, "step": 1240 }, { "epoch": 8.93, "learning_rate": 0.02671957671957672, "loss": 0.0045, "step": 1250 }, { "epoch": 9.0, "learning_rate": 0.026666666666666665, "loss": 0.0048, "step": 1260 }, { "epoch": 9.07, "learning_rate": 0.026613756613756614, "loss": 0.0046, "step": 1270 }, { "epoch": 9.14, "learning_rate": 0.02656084656084656, "loss": 0.0051, "step": 1280 }, { "epoch": 9.21, "learning_rate": 0.026507936507936508, "loss": 0.005, "step": 1290 }, { "epoch": 9.29, "learning_rate": 0.026455026455026454, "loss": 0.0046, "step": 1300 }, { "epoch": 9.36, "learning_rate": 0.026402116402116402, "loss": 0.0047, "step": 1310 }, { "epoch": 9.43, "learning_rate": 0.026349206349206348, "loss": 0.005, "step": 1320 }, { "epoch": 9.5, "learning_rate": 0.026296296296296297, "loss": 0.0045, "step": 1330 }, { "epoch": 9.57, "learning_rate": 0.026243386243386242, "loss": 0.0048, "step": 1340 }, { "epoch": 9.64, "learning_rate": 0.02619047619047619, "loss": 0.004, "step": 1350 }, { "epoch": 9.71, "learning_rate": 0.026137566137566137, "loss": 0.0039, "step": 1360 }, { "epoch": 9.79, "learning_rate": 0.026084656084656085, "loss": 0.0037, "step": 1370 }, { "epoch": 9.86, "learning_rate": 0.02603174603174603, "loss": 0.004, "step": 1380 }, { "epoch": 9.93, "learning_rate": 0.02597883597883598, "loss": 0.0036, "step": 1390 }, { "epoch": 10.0, "learning_rate": 0.025925925925925925, "loss": 0.0041, "step": 1400 }, { "epoch": 10.07, "learning_rate": 0.02587301587301587, "loss": 0.0034, "step": 1410 }, { "epoch": 10.14, "learning_rate": 0.025820105820105816, "loss": 0.0029, "step": 1420 }, { "epoch": 10.21, "learning_rate": 0.025767195767195765, "loss": 0.0037, "step": 1430 }, { "epoch": 10.29, "learning_rate": 0.02571428571428571, "loss": 0.0032, "step": 1440 }, { "epoch": 10.36, "learning_rate": 0.02566137566137566, "loss": 0.0028, "step": 1450 }, { "epoch": 10.43, "learning_rate": 0.025608465608465605, "loss": 0.0031, "step": 1460 }, { "epoch": 10.5, "learning_rate": 0.025555555555555554, "loss": 0.0031, "step": 1470 }, { "epoch": 10.57, "learning_rate": 0.0255026455026455, "loss": 0.0034, "step": 1480 }, { "epoch": 10.64, "learning_rate": 0.025449735449735448, "loss": 0.003, "step": 1490 }, { "epoch": 10.71, "learning_rate": 0.025396825396825393, "loss": 0.0033, "step": 1500 }, { "epoch": 10.79, "learning_rate": 0.025343915343915342, "loss": 0.003, "step": 1510 }, { "epoch": 10.86, "learning_rate": 0.025291005291005288, "loss": 0.0029, "step": 1520 }, { "epoch": 10.93, "learning_rate": 0.025238095238095237, "loss": 0.0031, "step": 1530 }, { "epoch": 11.0, "learning_rate": 0.025185185185185182, "loss": 0.0027, "step": 1540 }, { "epoch": 11.07, "learning_rate": 0.02513227513227513, "loss": 0.003, "step": 1550 }, { "epoch": 11.14, "learning_rate": 0.025079365079365076, "loss": 0.0027, "step": 1560 }, { "epoch": 11.21, "learning_rate": 0.025026455026455025, "loss": 0.0025, "step": 1570 }, { "epoch": 11.29, "learning_rate": 0.02497354497354497, "loss": 0.0027, "step": 1580 }, { "epoch": 11.36, "learning_rate": 0.02492063492063492, "loss": 0.0026, "step": 1590 }, { "epoch": 11.43, "learning_rate": 0.024867724867724865, "loss": 0.0024, "step": 1600 }, { "epoch": 11.5, "learning_rate": 0.024814814814814814, "loss": 0.0028, "step": 1610 }, { "epoch": 11.57, "learning_rate": 0.02476190476190476, "loss": 0.0027, "step": 1620 }, { "epoch": 11.64, "learning_rate": 0.024708994708994708, "loss": 0.0026, "step": 1630 }, { "epoch": 11.71, "learning_rate": 0.024656084656084654, "loss": 0.0024, "step": 1640 }, { "epoch": 11.79, "learning_rate": 0.024603174603174603, "loss": 0.0028, "step": 1650 }, { "epoch": 11.86, "learning_rate": 0.024550264550264548, "loss": 0.0023, "step": 1660 }, { "epoch": 11.93, "learning_rate": 0.024497354497354497, "loss": 0.002, "step": 1670 }, { "epoch": 12.0, "learning_rate": 0.024444444444444442, "loss": 0.0023, "step": 1680 }, { "epoch": 12.07, "learning_rate": 0.02439153439153439, "loss": 0.0023, "step": 1690 }, { "epoch": 12.14, "learning_rate": 0.024338624338624337, "loss": 0.0024, "step": 1700 }, { "epoch": 12.21, "learning_rate": 0.024285714285714285, "loss": 0.0022, "step": 1710 }, { "epoch": 12.29, "learning_rate": 0.02423280423280423, "loss": 0.002, "step": 1720 }, { "epoch": 12.36, "learning_rate": 0.02417989417989418, "loss": 0.0022, "step": 1730 }, { "epoch": 12.43, "learning_rate": 0.024126984126984125, "loss": 0.0022, "step": 1740 }, { "epoch": 12.5, "learning_rate": 0.024074074074074074, "loss": 0.0022, "step": 1750 }, { "epoch": 12.57, "learning_rate": 0.02402116402116402, "loss": 0.0025, "step": 1760 }, { "epoch": 12.64, "learning_rate": 0.02396825396825397, "loss": 0.0021, "step": 1770 }, { "epoch": 12.71, "learning_rate": 0.023915343915343914, "loss": 0.0022, "step": 1780 }, { "epoch": 12.79, "learning_rate": 0.023862433862433863, "loss": 0.0019, "step": 1790 }, { "epoch": 12.86, "learning_rate": 0.023809523809523808, "loss": 0.0023, "step": 1800 }, { "epoch": 12.93, "learning_rate": 0.023756613756613757, "loss": 0.002, "step": 1810 }, { "epoch": 13.0, "learning_rate": 0.023703703703703703, "loss": 0.0018, "step": 1820 }, { "epoch": 13.07, "learning_rate": 0.02365079365079365, "loss": 0.002, "step": 1830 }, { "epoch": 13.14, "learning_rate": 0.023597883597883597, "loss": 0.0021, "step": 1840 }, { "epoch": 13.21, "learning_rate": 0.023544973544973546, "loss": 0.002, "step": 1850 }, { "epoch": 13.29, "learning_rate": 0.02349206349206349, "loss": 0.0018, "step": 1860 }, { "epoch": 13.36, "learning_rate": 0.02343915343915344, "loss": 0.0018, "step": 1870 }, { "epoch": 13.43, "learning_rate": 0.023386243386243386, "loss": 0.002, "step": 1880 }, { "epoch": 13.5, "learning_rate": 0.023333333333333334, "loss": 0.0017, "step": 1890 }, { "epoch": 13.57, "learning_rate": 0.02328042328042328, "loss": 0.0018, "step": 1900 }, { "epoch": 13.64, "learning_rate": 0.02322751322751323, "loss": 0.002, "step": 1910 }, { "epoch": 13.71, "learning_rate": 0.023174603174603174, "loss": 0.0019, "step": 1920 }, { "epoch": 13.79, "learning_rate": 0.02312169312169312, "loss": 0.0019, "step": 1930 }, { "epoch": 13.86, "learning_rate": 0.023068783068783065, "loss": 0.002, "step": 1940 }, { "epoch": 13.93, "learning_rate": 0.023015873015873014, "loss": 0.0017, "step": 1950 }, { "epoch": 14.0, "learning_rate": 0.02296296296296296, "loss": 0.0015, "step": 1960 }, { "epoch": 14.07, "learning_rate": 0.022910052910052908, "loss": 0.0017, "step": 1970 }, { "epoch": 14.14, "learning_rate": 0.022857142857142854, "loss": 0.0018, "step": 1980 }, { "epoch": 14.21, "learning_rate": 0.022804232804232803, "loss": 0.0019, "step": 1990 }, { "epoch": 14.29, "learning_rate": 0.022751322751322748, "loss": 0.0019, "step": 2000 }, { "epoch": 14.36, "learning_rate": 0.022698412698412697, "loss": 0.0018, "step": 2010 }, { "epoch": 14.43, "learning_rate": 0.022645502645502642, "loss": 0.0018, "step": 2020 }, { "epoch": 14.5, "learning_rate": 0.02259259259259259, "loss": 0.0018, "step": 2030 }, { "epoch": 14.57, "learning_rate": 0.022539682539682537, "loss": 0.0017, "step": 2040 }, { "epoch": 14.64, "learning_rate": 0.022486772486772486, "loss": 0.0019, "step": 2050 }, { "epoch": 14.71, "learning_rate": 0.02243386243386243, "loss": 0.0018, "step": 2060 }, { "epoch": 14.79, "learning_rate": 0.02238095238095238, "loss": 0.0015, "step": 2070 }, { "epoch": 14.86, "learning_rate": 0.022328042328042325, "loss": 0.0018, "step": 2080 }, { "epoch": 14.93, "learning_rate": 0.022275132275132274, "loss": 0.0017, "step": 2090 }, { "epoch": 15.0, "learning_rate": 0.02222222222222222, "loss": 0.0015, "step": 2100 }, { "epoch": 15.07, "learning_rate": 0.02216931216931217, "loss": 0.0016, "step": 2110 }, { "epoch": 15.14, "learning_rate": 0.022116402116402114, "loss": 0.0016, "step": 2120 }, { "epoch": 15.21, "learning_rate": 0.022063492063492063, "loss": 0.0016, "step": 2130 }, { "epoch": 15.29, "learning_rate": 0.022010582010582008, "loss": 0.0016, "step": 2140 }, { "epoch": 15.36, "learning_rate": 0.021957671957671957, "loss": 0.0017, "step": 2150 }, { "epoch": 15.43, "learning_rate": 0.021904761904761903, "loss": 0.0021, "step": 2160 }, { "epoch": 15.5, "learning_rate": 0.02185185185185185, "loss": 0.0018, "step": 2170 }, { "epoch": 15.57, "learning_rate": 0.021798941798941797, "loss": 0.0017, "step": 2180 }, { "epoch": 15.64, "learning_rate": 0.021746031746031746, "loss": 0.0018, "step": 2190 }, { "epoch": 15.71, "learning_rate": 0.02169312169312169, "loss": 0.0017, "step": 2200 }, { "epoch": 15.79, "learning_rate": 0.02164021164021164, "loss": 0.0015, "step": 2210 }, { "epoch": 15.86, "learning_rate": 0.021587301587301586, "loss": 0.0016, "step": 2220 }, { "epoch": 15.93, "learning_rate": 0.021534391534391534, "loss": 0.0016, "step": 2230 }, { "epoch": 16.0, "learning_rate": 0.02148148148148148, "loss": 0.0015, "step": 2240 }, { "epoch": 16.07, "learning_rate": 0.02142857142857143, "loss": 0.0016, "step": 2250 }, { "epoch": 16.14, "learning_rate": 0.021375661375661374, "loss": 0.0016, "step": 2260 }, { "epoch": 16.21, "learning_rate": 0.021322751322751323, "loss": 0.0015, "step": 2270 }, { "epoch": 16.29, "learning_rate": 0.02126984126984127, "loss": 0.0017, "step": 2280 }, { "epoch": 16.36, "learning_rate": 0.021216931216931217, "loss": 0.0017, "step": 2290 }, { "epoch": 16.43, "learning_rate": 0.021164021164021163, "loss": 0.0015, "step": 2300 }, { "epoch": 16.5, "learning_rate": 0.021111111111111112, "loss": 0.0014, "step": 2310 }, { "epoch": 16.57, "learning_rate": 0.021058201058201057, "loss": 0.0016, "step": 2320 }, { "epoch": 16.64, "learning_rate": 0.021005291005291006, "loss": 0.0015, "step": 2330 }, { "epoch": 16.71, "learning_rate": 0.02095238095238095, "loss": 0.0015, "step": 2340 }, { "epoch": 16.79, "learning_rate": 0.0208994708994709, "loss": 0.0017, "step": 2350 }, { "epoch": 16.86, "learning_rate": 0.020846560846560846, "loss": 0.0015, "step": 2360 }, { "epoch": 16.93, "learning_rate": 0.020793650793650795, "loss": 0.0015, "step": 2370 }, { "epoch": 17.0, "learning_rate": 0.02074074074074074, "loss": 0.0016, "step": 2380 }, { "epoch": 17.07, "learning_rate": 0.02068783068783069, "loss": 0.0015, "step": 2390 }, { "epoch": 17.14, "learning_rate": 0.020634920634920634, "loss": 0.0015, "step": 2400 }, { "epoch": 17.21, "learning_rate": 0.020582010582010583, "loss": 0.0017, "step": 2410 }, { "epoch": 17.29, "learning_rate": 0.02052910052910053, "loss": 0.0017, "step": 2420 }, { "epoch": 17.36, "learning_rate": 0.020476190476190478, "loss": 0.0016, "step": 2430 }, { "epoch": 17.43, "learning_rate": 0.020423280423280423, "loss": 0.0016, "step": 2440 }, { "epoch": 17.5, "learning_rate": 0.02037037037037037, "loss": 0.0016, "step": 2450 }, { "epoch": 17.57, "learning_rate": 0.020317460317460314, "loss": 0.0014, "step": 2460 }, { "epoch": 17.64, "learning_rate": 0.020264550264550263, "loss": 0.0014, "step": 2470 }, { "epoch": 17.71, "learning_rate": 0.02021164021164021, "loss": 0.0015, "step": 2480 }, { "epoch": 17.79, "learning_rate": 0.020158730158730157, "loss": 0.0015, "step": 2490 }, { "epoch": 17.86, "learning_rate": 0.020105820105820103, "loss": 0.0015, "step": 2500 }, { "epoch": 17.93, "learning_rate": 0.02005291005291005, "loss": 0.0018, "step": 2510 }, { "epoch": 18.0, "learning_rate": 0.019999999999999997, "loss": 0.0014, "step": 2520 }, { "epoch": 18.07, "learning_rate": 0.019947089947089946, "loss": 0.0016, "step": 2530 }, { "epoch": 18.14, "learning_rate": 0.01989417989417989, "loss": 0.0015, "step": 2540 }, { "epoch": 18.21, "learning_rate": 0.01984126984126984, "loss": 0.0015, "step": 2550 }, { "epoch": 18.29, "learning_rate": 0.019788359788359786, "loss": 0.0015, "step": 2560 }, { "epoch": 18.36, "learning_rate": 0.019735449735449734, "loss": 0.0015, "step": 2570 }, { "epoch": 18.43, "learning_rate": 0.01968253968253968, "loss": 0.0014, "step": 2580 }, { "epoch": 18.5, "learning_rate": 0.01962962962962963, "loss": 0.0014, "step": 2590 }, { "epoch": 18.57, "learning_rate": 0.019576719576719574, "loss": 0.0015, "step": 2600 }, { "epoch": 18.64, "learning_rate": 0.019523809523809523, "loss": 0.0017, "step": 2610 }, { "epoch": 18.71, "learning_rate": 0.01947089947089947, "loss": 0.0015, "step": 2620 }, { "epoch": 18.79, "learning_rate": 0.019417989417989417, "loss": 0.0014, "step": 2630 }, { "epoch": 18.86, "learning_rate": 0.019365079365079363, "loss": 0.0015, "step": 2640 }, { "epoch": 18.93, "learning_rate": 0.019312169312169312, "loss": 0.0014, "step": 2650 }, { "epoch": 19.0, "learning_rate": 0.019259259259259257, "loss": 0.0014, "step": 2660 }, { "epoch": 19.07, "learning_rate": 0.019206349206349206, "loss": 0.0013, "step": 2670 }, { "epoch": 19.14, "learning_rate": 0.01915343915343915, "loss": 0.0014, "step": 2680 }, { "epoch": 19.21, "learning_rate": 0.0191005291005291, "loss": 0.0015, "step": 2690 }, { "epoch": 19.29, "learning_rate": 0.019047619047619046, "loss": 0.0014, "step": 2700 }, { "epoch": 19.36, "learning_rate": 0.018994708994708995, "loss": 0.0015, "step": 2710 }, { "epoch": 19.43, "learning_rate": 0.01894179894179894, "loss": 0.0016, "step": 2720 }, { "epoch": 19.5, "learning_rate": 0.01888888888888889, "loss": 0.0015, "step": 2730 }, { "epoch": 19.57, "learning_rate": 0.018835978835978835, "loss": 0.0014, "step": 2740 }, { "epoch": 19.64, "learning_rate": 0.018783068783068783, "loss": 0.0014, "step": 2750 }, { "epoch": 19.71, "learning_rate": 0.01873015873015873, "loss": 0.0015, "step": 2760 }, { "epoch": 19.79, "learning_rate": 0.018677248677248678, "loss": 0.0014, "step": 2770 }, { "epoch": 19.86, "learning_rate": 0.018624338624338623, "loss": 0.0016, "step": 2780 }, { "epoch": 19.93, "learning_rate": 0.018571428571428572, "loss": 0.0014, "step": 2790 }, { "epoch": 20.0, "learning_rate": 0.018518518518518517, "loss": 0.0014, "step": 2800 }, { "epoch": 20.07, "learning_rate": 0.018465608465608466, "loss": 0.0014, "step": 2810 }, { "epoch": 20.14, "learning_rate": 0.018412698412698412, "loss": 0.0013, "step": 2820 }, { "epoch": 20.21, "learning_rate": 0.01835978835978836, "loss": 0.0015, "step": 2830 }, { "epoch": 20.29, "learning_rate": 0.018306878306878306, "loss": 0.0014, "step": 2840 }, { "epoch": 20.36, "learning_rate": 0.018253968253968255, "loss": 0.0016, "step": 2850 }, { "epoch": 20.43, "learning_rate": 0.0182010582010582, "loss": 0.0014, "step": 2860 }, { "epoch": 20.5, "learning_rate": 0.01814814814814815, "loss": 0.0015, "step": 2870 }, { "epoch": 20.57, "learning_rate": 0.018095238095238095, "loss": 0.0015, "step": 2880 }, { "epoch": 20.64, "learning_rate": 0.018042328042328044, "loss": 0.0014, "step": 2890 }, { "epoch": 20.71, "learning_rate": 0.01798941798941799, "loss": 0.0014, "step": 2900 }, { "epoch": 20.79, "learning_rate": 0.017936507936507938, "loss": 0.0014, "step": 2910 }, { "epoch": 20.86, "learning_rate": 0.017883597883597883, "loss": 0.0014, "step": 2920 }, { "epoch": 20.93, "learning_rate": 0.017830687830687832, "loss": 0.0016, "step": 2930 }, { "epoch": 21.0, "learning_rate": 0.017777777777777778, "loss": 0.0014, "step": 2940 }, { "epoch": 21.07, "learning_rate": 0.017724867724867723, "loss": 0.0015, "step": 2950 }, { "epoch": 21.14, "learning_rate": 0.01767195767195767, "loss": 0.0014, "step": 2960 }, { "epoch": 21.21, "learning_rate": 0.017619047619047618, "loss": 0.0014, "step": 2970 }, { "epoch": 21.29, "learning_rate": 0.017566137566137563, "loss": 0.0012, "step": 2980 }, { "epoch": 21.36, "learning_rate": 0.017513227513227512, "loss": 0.0014, "step": 2990 }, { "epoch": 21.43, "learning_rate": 0.017460317460317457, "loss": 0.0014, "step": 3000 }, { "epoch": 21.5, "learning_rate": 0.017407407407407406, "loss": 0.0013, "step": 3010 }, { "epoch": 21.57, "learning_rate": 0.01735449735449735, "loss": 0.0014, "step": 3020 }, { "epoch": 21.64, "learning_rate": 0.0173015873015873, "loss": 0.0014, "step": 3030 }, { "epoch": 21.71, "learning_rate": 0.017248677248677246, "loss": 0.0013, "step": 3040 }, { "epoch": 21.79, "learning_rate": 0.017195767195767195, "loss": 0.0013, "step": 3050 }, { "epoch": 21.86, "learning_rate": 0.01714285714285714, "loss": 0.0013, "step": 3060 }, { "epoch": 21.93, "learning_rate": 0.01708994708994709, "loss": 0.0013, "step": 3070 }, { "epoch": 22.0, "learning_rate": 0.017037037037037035, "loss": 0.0014, "step": 3080 }, { "epoch": 22.07, "learning_rate": 0.016984126984126983, "loss": 0.0012, "step": 3090 }, { "epoch": 22.14, "learning_rate": 0.01693121693121693, "loss": 0.0013, "step": 3100 }, { "epoch": 22.21, "learning_rate": 0.016878306878306878, "loss": 0.0014, "step": 3110 }, { "epoch": 22.29, "learning_rate": 0.016825396825396823, "loss": 0.0013, "step": 3120 }, { "epoch": 22.36, "learning_rate": 0.016772486772486772, "loss": 0.0013, "step": 3130 }, { "epoch": 22.43, "learning_rate": 0.016719576719576718, "loss": 0.0013, "step": 3140 }, { "epoch": 22.5, "learning_rate": 0.016666666666666666, "loss": 0.0013, "step": 3150 }, { "epoch": 22.57, "learning_rate": 0.016613756613756612, "loss": 0.0014, "step": 3160 }, { "epoch": 22.64, "learning_rate": 0.01656084656084656, "loss": 0.0012, "step": 3170 }, { "epoch": 22.71, "learning_rate": 0.016507936507936506, "loss": 0.0014, "step": 3180 }, { "epoch": 22.79, "learning_rate": 0.016455026455026455, "loss": 0.0013, "step": 3190 }, { "epoch": 22.86, "learning_rate": 0.0164021164021164, "loss": 0.0014, "step": 3200 }, { "epoch": 22.93, "learning_rate": 0.01634920634920635, "loss": 0.0013, "step": 3210 }, { "epoch": 23.0, "learning_rate": 0.016296296296296295, "loss": 0.0013, "step": 3220 }, { "epoch": 23.07, "learning_rate": 0.016243386243386244, "loss": 0.0012, "step": 3230 }, { "epoch": 23.14, "learning_rate": 0.01619047619047619, "loss": 0.0014, "step": 3240 }, { "epoch": 23.21, "learning_rate": 0.016137566137566138, "loss": 0.0015, "step": 3250 }, { "epoch": 23.29, "learning_rate": 0.016084656084656083, "loss": 0.0012, "step": 3260 }, { "epoch": 23.36, "learning_rate": 0.016031746031746032, "loss": 0.0012, "step": 3270 }, { "epoch": 23.43, "learning_rate": 0.015978835978835978, "loss": 0.0013, "step": 3280 }, { "epoch": 23.5, "learning_rate": 0.015925925925925927, "loss": 0.0014, "step": 3290 }, { "epoch": 23.57, "learning_rate": 0.015873015873015872, "loss": 0.0013, "step": 3300 }, { "epoch": 23.64, "learning_rate": 0.01582010582010582, "loss": 0.0013, "step": 3310 }, { "epoch": 23.71, "learning_rate": 0.015767195767195766, "loss": 0.0013, "step": 3320 }, { "epoch": 23.79, "learning_rate": 0.015714285714285715, "loss": 0.0015, "step": 3330 }, { "epoch": 23.86, "learning_rate": 0.01566137566137566, "loss": 0.0013, "step": 3340 }, { "epoch": 23.93, "learning_rate": 0.015608465608465608, "loss": 0.0012, "step": 3350 }, { "epoch": 24.0, "learning_rate": 0.015555555555555553, "loss": 0.0014, "step": 3360 }, { "epoch": 24.07, "learning_rate": 0.015502645502645502, "loss": 0.0013, "step": 3370 }, { "epoch": 24.14, "learning_rate": 0.015449735449735448, "loss": 0.0013, "step": 3380 }, { "epoch": 24.21, "learning_rate": 0.015396825396825397, "loss": 0.0013, "step": 3390 }, { "epoch": 24.29, "learning_rate": 0.015343915343915342, "loss": 0.0013, "step": 3400 }, { "epoch": 24.36, "learning_rate": 0.015291005291005291, "loss": 0.0013, "step": 3410 }, { "epoch": 24.43, "learning_rate": 0.015238095238095236, "loss": 0.0013, "step": 3420 }, { "epoch": 24.5, "learning_rate": 0.015185185185185185, "loss": 0.0013, "step": 3430 }, { "epoch": 24.57, "learning_rate": 0.01513227513227513, "loss": 0.0013, "step": 3440 }, { "epoch": 24.64, "learning_rate": 0.01507936507936508, "loss": 0.0013, "step": 3450 }, { "epoch": 24.71, "learning_rate": 0.015026455026455025, "loss": 0.0014, "step": 3460 }, { "epoch": 24.79, "learning_rate": 0.014973544973544972, "loss": 0.0014, "step": 3470 }, { "epoch": 24.86, "learning_rate": 0.01492063492063492, "loss": 0.0013, "step": 3480 }, { "epoch": 24.93, "learning_rate": 0.014867724867724866, "loss": 0.0012, "step": 3490 }, { "epoch": 25.0, "learning_rate": 0.014814814814814814, "loss": 0.0012, "step": 3500 }, { "epoch": 25.07, "learning_rate": 0.01476190476190476, "loss": 0.0012, "step": 3510 }, { "epoch": 25.14, "learning_rate": 0.014708994708994708, "loss": 0.0014, "step": 3520 }, { "epoch": 25.21, "learning_rate": 0.014656084656084655, "loss": 0.0013, "step": 3530 }, { "epoch": 25.29, "learning_rate": 0.014603174603174602, "loss": 0.0014, "step": 3540 }, { "epoch": 25.36, "learning_rate": 0.01455026455026455, "loss": 0.0014, "step": 3550 }, { "epoch": 25.43, "learning_rate": 0.014497354497354497, "loss": 0.0013, "step": 3560 }, { "epoch": 25.5, "learning_rate": 0.014444444444444444, "loss": 0.0014, "step": 3570 }, { "epoch": 25.57, "learning_rate": 0.014391534391534391, "loss": 0.0013, "step": 3580 }, { "epoch": 25.64, "learning_rate": 0.014338624338624338, "loss": 0.0014, "step": 3590 }, { "epoch": 25.71, "learning_rate": 0.014285714285714284, "loss": 0.0013, "step": 3600 }, { "epoch": 25.79, "learning_rate": 0.01423280423280423, "loss": 0.0012, "step": 3610 }, { "epoch": 25.86, "learning_rate": 0.014179894179894178, "loss": 0.0013, "step": 3620 }, { "epoch": 25.93, "learning_rate": 0.014126984126984125, "loss": 0.0012, "step": 3630 }, { "epoch": 26.0, "learning_rate": 0.014074074074074072, "loss": 0.0013, "step": 3640 }, { "epoch": 26.07, "learning_rate": 0.01402116402116402, "loss": 0.0013, "step": 3650 }, { "epoch": 26.14, "learning_rate": 0.013968253968253967, "loss": 0.0012, "step": 3660 }, { "epoch": 26.21, "learning_rate": 0.013915343915343914, "loss": 0.0013, "step": 3670 }, { "epoch": 26.29, "learning_rate": 0.01386243386243386, "loss": 0.0012, "step": 3680 }, { "epoch": 26.36, "learning_rate": 0.013809523809523808, "loss": 0.0014, "step": 3690 }, { "epoch": 26.43, "learning_rate": 0.013756613756613755, "loss": 0.0013, "step": 3700 }, { "epoch": 26.5, "learning_rate": 0.013703703703703702, "loss": 0.0013, "step": 3710 }, { "epoch": 26.57, "learning_rate": 0.01365079365079365, "loss": 0.0012, "step": 3720 }, { "epoch": 26.64, "learning_rate": 0.013597883597883597, "loss": 0.0014, "step": 3730 }, { "epoch": 26.71, "learning_rate": 0.013544973544973544, "loss": 0.0013, "step": 3740 }, { "epoch": 26.79, "learning_rate": 0.013492063492063491, "loss": 0.0013, "step": 3750 }, { "epoch": 26.86, "learning_rate": 0.013439153439153438, "loss": 0.0013, "step": 3760 }, { "epoch": 26.93, "learning_rate": 0.013386243386243385, "loss": 0.0013, "step": 3770 }, { "epoch": 27.0, "learning_rate": 0.013333333333333332, "loss": 0.0013, "step": 3780 }, { "epoch": 27.07, "learning_rate": 0.01328042328042328, "loss": 0.0012, "step": 3790 }, { "epoch": 27.14, "learning_rate": 0.013227513227513227, "loss": 0.0012, "step": 3800 }, { "epoch": 27.21, "learning_rate": 0.013174603174603174, "loss": 0.0013, "step": 3810 }, { "epoch": 27.29, "learning_rate": 0.013121693121693121, "loss": 0.0013, "step": 3820 }, { "epoch": 27.36, "learning_rate": 0.013068783068783068, "loss": 0.0013, "step": 3830 }, { "epoch": 27.43, "learning_rate": 0.013015873015873015, "loss": 0.0012, "step": 3840 }, { "epoch": 27.5, "learning_rate": 0.012962962962962963, "loss": 0.0012, "step": 3850 }, { "epoch": 27.57, "learning_rate": 0.012910052910052908, "loss": 0.0012, "step": 3860 }, { "epoch": 27.64, "learning_rate": 0.012857142857142855, "loss": 0.0012, "step": 3870 }, { "epoch": 27.71, "learning_rate": 0.012804232804232802, "loss": 0.0013, "step": 3880 }, { "epoch": 27.79, "learning_rate": 0.01275132275132275, "loss": 0.0016, "step": 3890 }, { "epoch": 27.86, "learning_rate": 0.012698412698412697, "loss": 0.0012, "step": 3900 }, { "epoch": 27.93, "learning_rate": 0.012645502645502644, "loss": 0.0013, "step": 3910 }, { "epoch": 28.0, "learning_rate": 0.012592592592592591, "loss": 0.0013, "step": 3920 }, { "epoch": 28.07, "learning_rate": 0.012539682539682538, "loss": 0.0013, "step": 3930 }, { "epoch": 28.14, "learning_rate": 0.012486772486772485, "loss": 0.0012, "step": 3940 }, { "epoch": 28.21, "learning_rate": 0.012433862433862432, "loss": 0.0012, "step": 3950 }, { "epoch": 28.29, "learning_rate": 0.01238095238095238, "loss": 0.0012, "step": 3960 }, { "epoch": 28.36, "learning_rate": 0.012328042328042327, "loss": 0.0013, "step": 3970 }, { "epoch": 28.43, "learning_rate": 0.012275132275132274, "loss": 0.0012, "step": 3980 }, { "epoch": 28.5, "learning_rate": 0.012222222222222221, "loss": 0.0013, "step": 3990 }, { "epoch": 28.57, "learning_rate": 0.012169312169312168, "loss": 0.0012, "step": 4000 }, { "epoch": 28.64, "learning_rate": 0.012116402116402115, "loss": 0.0012, "step": 4010 }, { "epoch": 28.71, "learning_rate": 0.012063492063492063, "loss": 0.0013, "step": 4020 }, { "epoch": 28.79, "learning_rate": 0.01201058201058201, "loss": 0.0011, "step": 4030 }, { "epoch": 28.86, "learning_rate": 0.011957671957671957, "loss": 0.0013, "step": 4040 }, { "epoch": 28.93, "learning_rate": 0.011904761904761904, "loss": 0.0013, "step": 4050 }, { "epoch": 29.0, "learning_rate": 0.011851851851851851, "loss": 0.0012, "step": 4060 }, { "epoch": 29.07, "learning_rate": 0.011798941798941798, "loss": 0.0011, "step": 4070 }, { "epoch": 29.14, "learning_rate": 0.011746031746031746, "loss": 0.0013, "step": 4080 }, { "epoch": 29.21, "learning_rate": 0.011693121693121693, "loss": 0.0012, "step": 4090 }, { "epoch": 29.29, "learning_rate": 0.01164021164021164, "loss": 0.0014, "step": 4100 }, { "epoch": 29.36, "learning_rate": 0.011587301587301587, "loss": 0.0012, "step": 4110 }, { "epoch": 29.43, "learning_rate": 0.011534391534391533, "loss": 0.0013, "step": 4120 }, { "epoch": 29.5, "learning_rate": 0.01148148148148148, "loss": 0.0012, "step": 4130 }, { "epoch": 29.57, "learning_rate": 0.011428571428571427, "loss": 0.0012, "step": 4140 }, { "epoch": 29.64, "learning_rate": 0.011375661375661374, "loss": 0.0013, "step": 4150 }, { "epoch": 29.71, "learning_rate": 0.011322751322751321, "loss": 0.0012, "step": 4160 }, { "epoch": 29.79, "learning_rate": 0.011269841269841268, "loss": 0.0013, "step": 4170 }, { "epoch": 29.86, "learning_rate": 0.011216931216931215, "loss": 0.0013, "step": 4180 }, { "epoch": 29.93, "learning_rate": 0.011164021164021163, "loss": 0.0013, "step": 4190 }, { "epoch": 30.0, "learning_rate": 0.01111111111111111, "loss": 0.0012, "step": 4200 }, { "epoch": 30.07, "learning_rate": 0.011058201058201057, "loss": 0.0013, "step": 4210 }, { "epoch": 30.14, "learning_rate": 0.011005291005291004, "loss": 0.0012, "step": 4220 }, { "epoch": 30.21, "learning_rate": 0.010952380952380951, "loss": 0.0012, "step": 4230 }, { "epoch": 30.29, "learning_rate": 0.010899470899470898, "loss": 0.0012, "step": 4240 }, { "epoch": 30.36, "learning_rate": 0.010846560846560846, "loss": 0.0012, "step": 4250 }, { "epoch": 30.43, "learning_rate": 0.010793650793650793, "loss": 0.0012, "step": 4260 }, { "epoch": 30.5, "learning_rate": 0.01074074074074074, "loss": 0.0013, "step": 4270 }, { "epoch": 30.57, "learning_rate": 0.010687830687830687, "loss": 0.0012, "step": 4280 }, { "epoch": 30.64, "learning_rate": 0.010634920634920634, "loss": 0.0013, "step": 4290 }, { "epoch": 30.71, "learning_rate": 0.010582010582010581, "loss": 0.0012, "step": 4300 }, { "epoch": 30.79, "learning_rate": 0.010529100529100529, "loss": 0.0012, "step": 4310 }, { "epoch": 30.86, "learning_rate": 0.010476190476190476, "loss": 0.0012, "step": 4320 }, { "epoch": 30.93, "learning_rate": 0.010423280423280423, "loss": 0.0012, "step": 4330 }, { "epoch": 31.0, "learning_rate": 0.01037037037037037, "loss": 0.0013, "step": 4340 }, { "epoch": 31.07, "learning_rate": 0.010317460317460317, "loss": 0.0012, "step": 4350 }, { "epoch": 31.14, "learning_rate": 0.010264550264550264, "loss": 0.0011, "step": 4360 }, { "epoch": 31.21, "learning_rate": 0.010211640211640212, "loss": 0.0012, "step": 4370 }, { "epoch": 31.29, "learning_rate": 0.010158730158730157, "loss": 0.0013, "step": 4380 }, { "epoch": 31.36, "learning_rate": 0.010105820105820104, "loss": 0.0011, "step": 4390 }, { "epoch": 31.43, "learning_rate": 0.010052910052910051, "loss": 0.0012, "step": 4400 }, { "epoch": 31.5, "learning_rate": 0.009999999999999998, "loss": 0.0013, "step": 4410 }, { "epoch": 31.57, "learning_rate": 0.009947089947089946, "loss": 0.0012, "step": 4420 }, { "epoch": 31.64, "learning_rate": 0.009894179894179893, "loss": 0.0013, "step": 4430 }, { "epoch": 31.71, "learning_rate": 0.00984126984126984, "loss": 0.0012, "step": 4440 }, { "epoch": 31.79, "learning_rate": 0.009788359788359787, "loss": 0.0012, "step": 4450 }, { "epoch": 31.86, "learning_rate": 0.009735449735449734, "loss": 0.0012, "step": 4460 }, { "epoch": 31.93, "learning_rate": 0.009682539682539681, "loss": 0.0013, "step": 4470 }, { "epoch": 32.0, "learning_rate": 0.009629629629629629, "loss": 0.0011, "step": 4480 }, { "epoch": 32.07, "learning_rate": 0.009576719576719576, "loss": 0.0012, "step": 4490 }, { "epoch": 32.14, "learning_rate": 0.009523809523809523, "loss": 0.0013, "step": 4500 }, { "epoch": 32.21, "learning_rate": 0.00947089947089947, "loss": 0.0012, "step": 4510 }, { "epoch": 32.29, "learning_rate": 0.009417989417989417, "loss": 0.0012, "step": 4520 }, { "epoch": 32.36, "learning_rate": 0.009365079365079364, "loss": 0.0013, "step": 4530 }, { "epoch": 32.43, "learning_rate": 0.009312169312169312, "loss": 0.0013, "step": 4540 }, { "epoch": 32.5, "learning_rate": 0.009259259259259259, "loss": 0.0012, "step": 4550 }, { "epoch": 32.57, "learning_rate": 0.009206349206349206, "loss": 0.0011, "step": 4560 }, { "epoch": 32.64, "learning_rate": 0.009153439153439153, "loss": 0.0012, "step": 4570 }, { "epoch": 32.71, "learning_rate": 0.0091005291005291, "loss": 0.0012, "step": 4580 }, { "epoch": 32.79, "learning_rate": 0.009047619047619047, "loss": 0.0013, "step": 4590 }, { "epoch": 32.86, "learning_rate": 0.008994708994708995, "loss": 0.0012, "step": 4600 }, { "epoch": 32.93, "learning_rate": 0.008941798941798942, "loss": 0.0014, "step": 4610 }, { "epoch": 33.0, "learning_rate": 0.008888888888888889, "loss": 0.0012, "step": 4620 }, { "epoch": 33.07, "learning_rate": 0.008835978835978834, "loss": 0.0012, "step": 4630 }, { "epoch": 33.14, "learning_rate": 0.008783068783068781, "loss": 0.0013, "step": 4640 }, { "epoch": 33.21, "learning_rate": 0.008730158730158729, "loss": 0.0012, "step": 4650 }, { "epoch": 33.29, "learning_rate": 0.008677248677248676, "loss": 0.0012, "step": 4660 }, { "epoch": 33.36, "learning_rate": 0.008624338624338623, "loss": 0.0012, "step": 4670 }, { "epoch": 33.43, "learning_rate": 0.00857142857142857, "loss": 0.0012, "step": 4680 }, { "epoch": 33.5, "learning_rate": 0.008518518518518517, "loss": 0.0012, "step": 4690 }, { "epoch": 33.57, "learning_rate": 0.008465608465608464, "loss": 0.0012, "step": 4700 }, { "epoch": 33.64, "learning_rate": 0.008412698412698412, "loss": 0.0012, "step": 4710 }, { "epoch": 33.71, "learning_rate": 0.008359788359788359, "loss": 0.0012, "step": 4720 }, { "epoch": 33.79, "learning_rate": 0.008306878306878306, "loss": 0.0012, "step": 4730 }, { "epoch": 33.86, "learning_rate": 0.008253968253968253, "loss": 0.0012, "step": 4740 }, { "epoch": 33.93, "learning_rate": 0.0082010582010582, "loss": 0.0012, "step": 4750 }, { "epoch": 34.0, "learning_rate": 0.008148148148148147, "loss": 0.0012, "step": 4760 }, { "epoch": 34.07, "learning_rate": 0.008095238095238095, "loss": 0.0011, "step": 4770 }, { "epoch": 34.14, "learning_rate": 0.008042328042328042, "loss": 0.0013, "step": 4780 }, { "epoch": 34.21, "learning_rate": 0.007989417989417989, "loss": 0.0011, "step": 4790 }, { "epoch": 34.29, "learning_rate": 0.007936507936507936, "loss": 0.0011, "step": 4800 }, { "epoch": 34.36, "learning_rate": 0.007883597883597883, "loss": 0.0012, "step": 4810 }, { "epoch": 34.43, "learning_rate": 0.00783068783068783, "loss": 0.0012, "step": 4820 }, { "epoch": 34.5, "learning_rate": 0.007777777777777777, "loss": 0.0011, "step": 4830 }, { "epoch": 34.57, "learning_rate": 0.007724867724867724, "loss": 0.0011, "step": 4840 }, { "epoch": 34.64, "learning_rate": 0.007671957671957671, "loss": 0.0011, "step": 4850 }, { "epoch": 34.71, "learning_rate": 0.007619047619047618, "loss": 0.0012, "step": 4860 }, { "epoch": 34.79, "learning_rate": 0.007566137566137565, "loss": 0.0012, "step": 4870 }, { "epoch": 34.86, "learning_rate": 0.0075132275132275125, "loss": 0.0012, "step": 4880 }, { "epoch": 34.93, "learning_rate": 0.00746031746031746, "loss": 0.0011, "step": 4890 }, { "epoch": 35.0, "learning_rate": 0.007407407407407407, "loss": 0.0012, "step": 4900 }, { "epoch": 35.07, "learning_rate": 0.007354497354497354, "loss": 0.0012, "step": 4910 }, { "epoch": 35.14, "learning_rate": 0.007301587301587301, "loss": 0.0012, "step": 4920 }, { "epoch": 35.21, "learning_rate": 0.007248677248677248, "loss": 0.0012, "step": 4930 }, { "epoch": 35.29, "learning_rate": 0.0071957671957671955, "loss": 0.0011, "step": 4940 }, { "epoch": 35.36, "learning_rate": 0.007142857142857142, "loss": 0.0012, "step": 4950 }, { "epoch": 35.43, "learning_rate": 0.007089947089947089, "loss": 0.0012, "step": 4960 }, { "epoch": 35.5, "learning_rate": 0.007037037037037036, "loss": 0.0013, "step": 4970 }, { "epoch": 35.57, "learning_rate": 0.006984126984126983, "loss": 0.0012, "step": 4980 }, { "epoch": 35.64, "learning_rate": 0.00693121693121693, "loss": 0.0012, "step": 4990 }, { "epoch": 35.71, "learning_rate": 0.006878306878306878, "loss": 0.0011, "step": 5000 }, { "epoch": 35.79, "learning_rate": 0.006825396825396825, "loss": 0.0012, "step": 5010 }, { "epoch": 35.86, "learning_rate": 0.006772486772486772, "loss": 0.0012, "step": 5020 }, { "epoch": 35.93, "learning_rate": 0.006719576719576719, "loss": 0.0012, "step": 5030 }, { "epoch": 36.0, "learning_rate": 0.006666666666666666, "loss": 0.0011, "step": 5040 }, { "epoch": 36.07, "learning_rate": 0.006613756613756613, "loss": 0.0012, "step": 5050 }, { "epoch": 36.14, "learning_rate": 0.0065608465608465606, "loss": 0.0011, "step": 5060 }, { "epoch": 36.21, "learning_rate": 0.006507936507936508, "loss": 0.0012, "step": 5070 }, { "epoch": 36.29, "learning_rate": 0.006455026455026454, "loss": 0.0013, "step": 5080 }, { "epoch": 36.36, "learning_rate": 0.006402116402116401, "loss": 0.0011, "step": 5090 }, { "epoch": 36.43, "learning_rate": 0.006349206349206348, "loss": 0.0012, "step": 5100 }, { "epoch": 36.5, "learning_rate": 0.0062962962962962955, "loss": 0.0011, "step": 5110 }, { "epoch": 36.57, "learning_rate": 0.006243386243386243, "loss": 0.0012, "step": 5120 }, { "epoch": 36.64, "learning_rate": 0.00619047619047619, "loss": 0.0011, "step": 5130 }, { "epoch": 36.71, "learning_rate": 0.006137566137566137, "loss": 0.0011, "step": 5140 }, { "epoch": 36.79, "learning_rate": 0.006084656084656084, "loss": 0.0011, "step": 5150 }, { "epoch": 36.86, "learning_rate": 0.006031746031746031, "loss": 0.0011, "step": 5160 }, { "epoch": 36.93, "learning_rate": 0.0059788359788359785, "loss": 0.0012, "step": 5170 }, { "epoch": 37.0, "learning_rate": 0.005925925925925926, "loss": 0.0011, "step": 5180 }, { "epoch": 37.07, "learning_rate": 0.005873015873015873, "loss": 0.0012, "step": 5190 }, { "epoch": 37.14, "learning_rate": 0.00582010582010582, "loss": 0.0012, "step": 5200 }, { "epoch": 37.21, "learning_rate": 0.005767195767195766, "loss": 0.0011, "step": 5210 }, { "epoch": 37.29, "learning_rate": 0.005714285714285713, "loss": 0.0011, "step": 5220 }, { "epoch": 37.36, "learning_rate": 0.005661375661375661, "loss": 0.0012, "step": 5230 }, { "epoch": 37.43, "learning_rate": 0.005608465608465608, "loss": 0.0012, "step": 5240 }, { "epoch": 37.5, "learning_rate": 0.005555555555555555, "loss": 0.0012, "step": 5250 }, { "epoch": 37.57, "learning_rate": 0.005502645502645502, "loss": 0.0011, "step": 5260 }, { "epoch": 37.64, "learning_rate": 0.005449735449735449, "loss": 0.0012, "step": 5270 }, { "epoch": 37.71, "learning_rate": 0.005396825396825396, "loss": 0.0011, "step": 5280 }, { "epoch": 37.79, "learning_rate": 0.0053439153439153435, "loss": 0.0012, "step": 5290 }, { "epoch": 37.86, "learning_rate": 0.005291005291005291, "loss": 0.0011, "step": 5300 }, { "epoch": 37.93, "learning_rate": 0.005238095238095238, "loss": 0.0012, "step": 5310 }, { "epoch": 38.0, "learning_rate": 0.005185185185185185, "loss": 0.0012, "step": 5320 }, { "epoch": 38.07, "learning_rate": 0.005132275132275132, "loss": 0.0013, "step": 5330 }, { "epoch": 38.14, "learning_rate": 0.0050793650793650785, "loss": 0.0011, "step": 5340 }, { "epoch": 38.21, "learning_rate": 0.005026455026455026, "loss": 0.0012, "step": 5350 }, { "epoch": 38.29, "learning_rate": 0.004973544973544973, "loss": 0.0012, "step": 5360 }, { "epoch": 38.36, "learning_rate": 0.00492063492063492, "loss": 0.0011, "step": 5370 }, { "epoch": 38.43, "learning_rate": 0.004867724867724867, "loss": 0.0011, "step": 5380 }, { "epoch": 38.5, "learning_rate": 0.004814814814814814, "loss": 0.0011, "step": 5390 }, { "epoch": 38.57, "learning_rate": 0.0047619047619047615, "loss": 0.0012, "step": 5400 }, { "epoch": 38.64, "learning_rate": 0.004708994708994709, "loss": 0.0011, "step": 5410 }, { "epoch": 38.71, "learning_rate": 0.004656084656084656, "loss": 0.0011, "step": 5420 }, { "epoch": 38.79, "learning_rate": 0.004603174603174603, "loss": 0.0011, "step": 5430 }, { "epoch": 38.86, "learning_rate": 0.00455026455026455, "loss": 0.0011, "step": 5440 }, { "epoch": 38.93, "learning_rate": 0.004497354497354497, "loss": 0.0011, "step": 5450 }, { "epoch": 39.0, "learning_rate": 0.0044444444444444444, "loss": 0.0012, "step": 5460 }, { "epoch": 39.07, "learning_rate": 0.004391534391534391, "loss": 0.0011, "step": 5470 }, { "epoch": 39.14, "learning_rate": 0.004338624338624338, "loss": 0.0012, "step": 5480 }, { "epoch": 39.21, "learning_rate": 0.004285714285714285, "loss": 0.0011, "step": 5490 }, { "epoch": 39.29, "learning_rate": 0.004232804232804232, "loss": 0.0012, "step": 5500 }, { "epoch": 39.36, "learning_rate": 0.004179894179894179, "loss": 0.0011, "step": 5510 }, { "epoch": 39.43, "learning_rate": 0.0041269841269841265, "loss": 0.0011, "step": 5520 }, { "epoch": 39.5, "learning_rate": 0.004074074074074074, "loss": 0.0011, "step": 5530 }, { "epoch": 39.57, "learning_rate": 0.004021164021164021, "loss": 0.0012, "step": 5540 }, { "epoch": 39.64, "learning_rate": 0.003968253968253968, "loss": 0.0011, "step": 5550 }, { "epoch": 39.71, "learning_rate": 0.003915343915343915, "loss": 0.0012, "step": 5560 }, { "epoch": 39.79, "learning_rate": 0.003862433862433862, "loss": 0.0012, "step": 5570 }, { "epoch": 39.86, "learning_rate": 0.003809523809523809, "loss": 0.0012, "step": 5580 }, { "epoch": 39.93, "learning_rate": 0.0037566137566137562, "loss": 0.0012, "step": 5590 }, { "epoch": 40.0, "learning_rate": 0.0037037037037037034, "loss": 0.0013, "step": 5600 }, { "epoch": 40.07, "learning_rate": 0.0036507936507936506, "loss": 0.0012, "step": 5610 }, { "epoch": 40.14, "learning_rate": 0.0035978835978835977, "loss": 0.0011, "step": 5620 }, { "epoch": 40.21, "learning_rate": 0.0035449735449735445, "loss": 0.0012, "step": 5630 }, { "epoch": 40.29, "learning_rate": 0.0034920634920634916, "loss": 0.0013, "step": 5640 }, { "epoch": 40.36, "learning_rate": 0.003439153439153439, "loss": 0.0012, "step": 5650 }, { "epoch": 40.43, "learning_rate": 0.003386243386243386, "loss": 0.0012, "step": 5660 }, { "epoch": 40.5, "learning_rate": 0.003333333333333333, "loss": 0.0012, "step": 5670 }, { "epoch": 40.57, "learning_rate": 0.0032804232804232803, "loss": 0.0011, "step": 5680 }, { "epoch": 40.64, "learning_rate": 0.003227513227513227, "loss": 0.0012, "step": 5690 }, { "epoch": 40.71, "learning_rate": 0.003174603174603174, "loss": 0.0012, "step": 5700 }, { "epoch": 40.79, "learning_rate": 0.0031216931216931213, "loss": 0.0011, "step": 5710 }, { "epoch": 40.86, "learning_rate": 0.0030687830687830685, "loss": 0.0012, "step": 5720 }, { "epoch": 40.93, "learning_rate": 0.0030158730158730157, "loss": 0.0011, "step": 5730 }, { "epoch": 41.0, "learning_rate": 0.002962962962962963, "loss": 0.001, "step": 5740 }, { "epoch": 41.07, "learning_rate": 0.00291005291005291, "loss": 0.0012, "step": 5750 }, { "epoch": 41.14, "learning_rate": 0.0028571428571428567, "loss": 0.0011, "step": 5760 }, { "epoch": 41.21, "learning_rate": 0.002804232804232804, "loss": 0.0012, "step": 5770 }, { "epoch": 41.29, "learning_rate": 0.002751322751322751, "loss": 0.0011, "step": 5780 }, { "epoch": 41.36, "learning_rate": 0.002698412698412698, "loss": 0.001, "step": 5790 }, { "epoch": 41.43, "learning_rate": 0.0026455026455026454, "loss": 0.0012, "step": 5800 }, { "epoch": 41.5, "learning_rate": 0.0025925925925925925, "loss": 0.0011, "step": 5810 }, { "epoch": 41.57, "learning_rate": 0.0025396825396825392, "loss": 0.001, "step": 5820 }, { "epoch": 41.64, "learning_rate": 0.0024867724867724864, "loss": 0.0012, "step": 5830 }, { "epoch": 41.71, "learning_rate": 0.0024338624338624336, "loss": 0.0012, "step": 5840 }, { "epoch": 41.79, "learning_rate": 0.0023809523809523807, "loss": 0.0011, "step": 5850 }, { "epoch": 41.86, "learning_rate": 0.002328042328042328, "loss": 0.0012, "step": 5860 }, { "epoch": 41.93, "learning_rate": 0.002275132275132275, "loss": 0.0011, "step": 5870 }, { "epoch": 42.0, "learning_rate": 0.0022222222222222222, "loss": 0.0011, "step": 5880 }, { "epoch": 42.07, "learning_rate": 0.002169312169312169, "loss": 0.0012, "step": 5890 }, { "epoch": 42.14, "learning_rate": 0.002116402116402116, "loss": 0.001, "step": 5900 }, { "epoch": 42.21, "learning_rate": 0.0020634920634920633, "loss": 0.0011, "step": 5910 }, { "epoch": 42.29, "learning_rate": 0.0020105820105820104, "loss": 0.0012, "step": 5920 }, { "epoch": 42.36, "learning_rate": 0.0019576719576719576, "loss": 0.0012, "step": 5930 }, { "epoch": 42.43, "learning_rate": 0.0019047619047619045, "loss": 0.0011, "step": 5940 }, { "epoch": 42.5, "learning_rate": 0.0018518518518518517, "loss": 0.0011, "step": 5950 }, { "epoch": 42.57, "learning_rate": 0.0017989417989417989, "loss": 0.0012, "step": 5960 }, { "epoch": 42.64, "learning_rate": 0.0017460317460317458, "loss": 0.0012, "step": 5970 }, { "epoch": 42.71, "learning_rate": 0.001693121693121693, "loss": 0.0011, "step": 5980 }, { "epoch": 42.79, "learning_rate": 0.0016402116402116401, "loss": 0.0012, "step": 5990 }, { "epoch": 42.86, "learning_rate": 0.001587301587301587, "loss": 0.0011, "step": 6000 }, { "epoch": 42.93, "learning_rate": 0.0015343915343915342, "loss": 0.0011, "step": 6010 }, { "epoch": 43.0, "learning_rate": 0.0014814814814814814, "loss": 0.0011, "step": 6020 }, { "epoch": 43.07, "learning_rate": 0.0014285714285714284, "loss": 0.0011, "step": 6030 }, { "epoch": 43.14, "learning_rate": 0.0013756613756613755, "loss": 0.0012, "step": 6040 }, { "epoch": 43.21, "learning_rate": 0.0013227513227513227, "loss": 0.0012, "step": 6050 }, { "epoch": 43.29, "learning_rate": 0.0012698412698412696, "loss": 0.0013, "step": 6060 }, { "epoch": 43.36, "learning_rate": 0.0012169312169312168, "loss": 0.0012, "step": 6070 }, { "epoch": 43.43, "learning_rate": 0.001164021164021164, "loss": 0.0011, "step": 6080 }, { "epoch": 43.5, "learning_rate": 0.0011111111111111111, "loss": 0.0011, "step": 6090 }, { "epoch": 43.57, "learning_rate": 0.001058201058201058, "loss": 0.0011, "step": 6100 }, { "epoch": 43.64, "learning_rate": 0.0010052910052910052, "loss": 0.0012, "step": 6110 }, { "epoch": 43.71, "learning_rate": 0.0009523809523809523, "loss": 0.0011, "step": 6120 }, { "epoch": 43.79, "learning_rate": 0.0008994708994708994, "loss": 0.0011, "step": 6130 }, { "epoch": 43.86, "learning_rate": 0.0008465608465608465, "loss": 0.0011, "step": 6140 }, { "epoch": 43.93, "learning_rate": 0.0007936507936507935, "loss": 0.0011, "step": 6150 }, { "epoch": 44.0, "learning_rate": 0.0007407407407407407, "loss": 0.0011, "step": 6160 }, { "epoch": 44.07, "learning_rate": 0.0006878306878306878, "loss": 0.0011, "step": 6170 }, { "epoch": 44.14, "learning_rate": 0.0006349206349206348, "loss": 0.0011, "step": 6180 }, { "epoch": 44.21, "learning_rate": 0.000582010582010582, "loss": 0.0012, "step": 6190 }, { "epoch": 44.29, "learning_rate": 0.000529100529100529, "loss": 0.0011, "step": 6200 }, { "epoch": 44.36, "learning_rate": 0.00047619047619047614, "loss": 0.0011, "step": 6210 }, { "epoch": 44.43, "learning_rate": 0.00042328042328042324, "loss": 0.0012, "step": 6220 }, { "epoch": 44.5, "learning_rate": 0.00037037037037037035, "loss": 0.0011, "step": 6230 }, { "epoch": 44.57, "learning_rate": 0.0003174603174603174, "loss": 0.0012, "step": 6240 }, { "epoch": 44.64, "learning_rate": 0.0002645502645502645, "loss": 0.0011, "step": 6250 }, { "epoch": 44.71, "learning_rate": 0.00021164021164021162, "loss": 0.0012, "step": 6260 }, { "epoch": 44.79, "learning_rate": 0.0001587301587301587, "loss": 0.0012, "step": 6270 }, { "epoch": 44.86, "learning_rate": 0.00010582010582010581, "loss": 0.0011, "step": 6280 }, { "epoch": 44.93, "learning_rate": 5.2910052910052905e-05, "loss": 0.0011, "step": 6290 }, { "epoch": 45.0, "learning_rate": 0.0, "loss": 0.0012, "step": 6300 } ], "max_steps": 6300, "num_train_epochs": 45, "total_flos": 1.8837542182050202e+18, "trial_name": null, "trial_params": null }