{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.26666666666666666, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00019920000000000002, "loss": 0.672, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.0001984, "loss": 0.6133, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.0001976, "loss": 0.585, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.0001968, "loss": 0.5738, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.000196, "loss": 0.6044, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.0001952, "loss": 0.5787, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.0001944, "loss": 0.6115, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.00019360000000000002, "loss": 0.5353, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.0001928, "loss": 0.5436, "step": 90 }, { "epoch": 0.01, "learning_rate": 0.000192, "loss": 0.5947, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.0001912, "loss": 0.5773, "step": 110 }, { "epoch": 0.01, "learning_rate": 0.0001904, "loss": 0.544, "step": 120 }, { "epoch": 0.01, "learning_rate": 0.0001896, "loss": 0.568, "step": 130 }, { "epoch": 0.01, "learning_rate": 0.0001888, "loss": 0.5503, "step": 140 }, { "epoch": 0.02, "learning_rate": 0.000188, "loss": 0.5963, "step": 150 }, { "epoch": 0.02, "learning_rate": 0.00018720000000000002, "loss": 0.4925, "step": 160 }, { "epoch": 0.02, "learning_rate": 0.00018640000000000003, "loss": 0.5489, "step": 170 }, { "epoch": 0.02, "learning_rate": 0.0001856, "loss": 0.5321, "step": 180 }, { "epoch": 0.02, "learning_rate": 0.00018480000000000002, "loss": 0.6061, "step": 190 }, { "epoch": 0.02, "learning_rate": 0.00018400000000000003, "loss": 0.5699, "step": 200 }, { "epoch": 0.02, "learning_rate": 0.0001832, "loss": 0.5725, "step": 210 }, { "epoch": 0.02, "learning_rate": 0.00018240000000000002, "loss": 0.5132, "step": 220 }, { "epoch": 0.02, "learning_rate": 0.00018160000000000002, "loss": 0.5558, "step": 230 }, { "epoch": 0.03, "learning_rate": 0.0001808, "loss": 0.5192, "step": 240 }, { "epoch": 0.03, "learning_rate": 0.00018, "loss": 0.5916, "step": 250 }, { "epoch": 0.03, "learning_rate": 0.00017920000000000002, "loss": 0.5719, "step": 260 }, { "epoch": 0.03, "learning_rate": 0.0001784, "loss": 0.6012, "step": 270 }, { "epoch": 0.03, "learning_rate": 0.0001776, "loss": 0.6292, "step": 280 }, { "epoch": 0.03, "learning_rate": 0.00017680000000000001, "loss": 0.5424, "step": 290 }, { "epoch": 0.03, "learning_rate": 0.00017600000000000002, "loss": 0.5623, "step": 300 }, { "epoch": 0.03, "learning_rate": 0.0001752, "loss": 0.5139, "step": 310 }, { "epoch": 0.03, "learning_rate": 0.0001744, "loss": 0.5409, "step": 320 }, { "epoch": 0.04, "learning_rate": 0.00017360000000000002, "loss": 0.5562, "step": 330 }, { "epoch": 0.04, "learning_rate": 0.0001728, "loss": 0.5624, "step": 340 }, { "epoch": 0.04, "learning_rate": 0.000172, "loss": 0.5449, "step": 350 }, { "epoch": 0.04, "learning_rate": 0.00017120000000000001, "loss": 0.5555, "step": 360 }, { "epoch": 0.04, "learning_rate": 0.0001704, "loss": 0.5622, "step": 370 }, { "epoch": 0.04, "learning_rate": 0.0001696, "loss": 0.5714, "step": 380 }, { "epoch": 0.04, "learning_rate": 0.0001688, "loss": 0.5434, "step": 390 }, { "epoch": 0.04, "learning_rate": 0.000168, "loss": 0.5492, "step": 400 }, { "epoch": 0.04, "learning_rate": 0.0001672, "loss": 0.536, "step": 410 }, { "epoch": 0.04, "learning_rate": 0.0001664, "loss": 0.5307, "step": 420 }, { "epoch": 0.05, "learning_rate": 0.0001656, "loss": 0.5957, "step": 430 }, { "epoch": 0.05, "learning_rate": 0.0001648, "loss": 0.5935, "step": 440 }, { "epoch": 0.05, "learning_rate": 0.000164, "loss": 0.5643, "step": 450 }, { "epoch": 0.05, "learning_rate": 0.0001632, "loss": 0.5483, "step": 460 }, { "epoch": 0.05, "learning_rate": 0.00016240000000000002, "loss": 0.5415, "step": 470 }, { "epoch": 0.05, "learning_rate": 0.00016160000000000002, "loss": 0.5383, "step": 480 }, { "epoch": 0.05, "learning_rate": 0.0001608, "loss": 0.5444, "step": 490 }, { "epoch": 0.05, "learning_rate": 0.00016, "loss": 0.592, "step": 500 }, { "epoch": 0.05, "learning_rate": 0.00015920000000000002, "loss": 0.5295, "step": 510 }, { "epoch": 0.06, "learning_rate": 0.00015840000000000003, "loss": 0.5505, "step": 520 }, { "epoch": 0.06, "learning_rate": 0.0001576, "loss": 0.5954, "step": 530 }, { "epoch": 0.06, "learning_rate": 0.00015680000000000002, "loss": 0.5609, "step": 540 }, { "epoch": 0.06, "learning_rate": 0.00015600000000000002, "loss": 0.5614, "step": 550 }, { "epoch": 0.06, "learning_rate": 0.0001552, "loss": 0.5902, "step": 560 }, { "epoch": 0.06, "learning_rate": 0.0001544, "loss": 0.539, "step": 570 }, { "epoch": 0.06, "learning_rate": 0.00015360000000000002, "loss": 0.5821, "step": 580 }, { "epoch": 0.06, "learning_rate": 0.0001528, "loss": 0.5347, "step": 590 }, { "epoch": 0.06, "learning_rate": 0.000152, "loss": 0.5322, "step": 600 }, { "epoch": 0.07, "learning_rate": 0.00015120000000000002, "loss": 0.6044, "step": 610 }, { "epoch": 0.07, "learning_rate": 0.0001504, "loss": 0.5941, "step": 620 }, { "epoch": 0.07, "learning_rate": 0.0001496, "loss": 0.5473, "step": 630 }, { "epoch": 0.07, "learning_rate": 0.0001488, "loss": 0.5833, "step": 640 }, { "epoch": 0.07, "learning_rate": 0.000148, "loss": 0.5338, "step": 650 }, { "epoch": 0.07, "learning_rate": 0.0001472, "loss": 0.5687, "step": 660 }, { "epoch": 0.07, "learning_rate": 0.0001464, "loss": 0.5591, "step": 670 }, { "epoch": 0.07, "learning_rate": 0.00014560000000000002, "loss": 0.5691, "step": 680 }, { "epoch": 0.07, "learning_rate": 0.0001448, "loss": 0.5452, "step": 690 }, { "epoch": 0.07, "learning_rate": 0.000144, "loss": 0.5752, "step": 700 }, { "epoch": 0.08, "learning_rate": 0.0001432, "loss": 0.6147, "step": 710 }, { "epoch": 0.08, "learning_rate": 0.0001424, "loss": 0.5441, "step": 720 }, { "epoch": 0.08, "learning_rate": 0.0001416, "loss": 0.4872, "step": 730 }, { "epoch": 0.08, "learning_rate": 0.0001408, "loss": 0.5158, "step": 740 }, { "epoch": 0.08, "learning_rate": 0.00014, "loss": 0.5917, "step": 750 }, { "epoch": 0.08, "learning_rate": 0.0001392, "loss": 0.5357, "step": 760 }, { "epoch": 0.08, "learning_rate": 0.0001384, "loss": 0.5594, "step": 770 }, { "epoch": 0.08, "learning_rate": 0.00013759999999999998, "loss": 0.5654, "step": 780 }, { "epoch": 0.08, "learning_rate": 0.00013680000000000002, "loss": 0.5621, "step": 790 }, { "epoch": 0.09, "learning_rate": 0.00013600000000000003, "loss": 0.5776, "step": 800 }, { "epoch": 0.09, "learning_rate": 0.0001352, "loss": 0.5439, "step": 810 }, { "epoch": 0.09, "learning_rate": 0.00013440000000000001, "loss": 0.5433, "step": 820 }, { "epoch": 0.09, "learning_rate": 0.00013360000000000002, "loss": 0.5765, "step": 830 }, { "epoch": 0.09, "learning_rate": 0.0001328, "loss": 0.5169, "step": 840 }, { "epoch": 0.09, "learning_rate": 0.000132, "loss": 0.5308, "step": 850 }, { "epoch": 0.09, "learning_rate": 0.00013120000000000002, "loss": 0.4969, "step": 860 }, { "epoch": 0.09, "learning_rate": 0.0001304, "loss": 0.5865, "step": 870 }, { "epoch": 0.09, "learning_rate": 0.0001296, "loss": 0.5639, "step": 880 }, { "epoch": 0.09, "learning_rate": 0.00012880000000000001, "loss": 0.5635, "step": 890 }, { "epoch": 0.1, "learning_rate": 0.00012800000000000002, "loss": 0.5438, "step": 900 }, { "epoch": 0.1, "learning_rate": 0.0001272, "loss": 0.5678, "step": 910 }, { "epoch": 0.1, "learning_rate": 0.0001264, "loss": 0.538, "step": 920 }, { "epoch": 0.1, "learning_rate": 0.00012560000000000002, "loss": 0.5491, "step": 930 }, { "epoch": 0.1, "learning_rate": 0.0001248, "loss": 0.5836, "step": 940 }, { "epoch": 0.1, "learning_rate": 0.000124, "loss": 0.4828, "step": 950 }, { "epoch": 0.1, "learning_rate": 0.0001232, "loss": 0.5376, "step": 960 }, { "epoch": 0.1, "learning_rate": 0.0001224, "loss": 0.5001, "step": 970 }, { "epoch": 0.1, "learning_rate": 0.0001216, "loss": 0.5483, "step": 980 }, { "epoch": 0.11, "learning_rate": 0.0001208, "loss": 0.5528, "step": 990 }, { "epoch": 0.11, "learning_rate": 0.00012, "loss": 0.5651, "step": 1000 }, { "epoch": 0.11, "learning_rate": 0.0001192, "loss": 0.5435, "step": 1010 }, { "epoch": 0.11, "learning_rate": 0.0001184, "loss": 0.588, "step": 1020 }, { "epoch": 0.11, "learning_rate": 0.0001176, "loss": 0.5459, "step": 1030 }, { "epoch": 0.11, "learning_rate": 0.00011679999999999999, "loss": 0.5923, "step": 1040 }, { "epoch": 0.11, "learning_rate": 0.000116, "loss": 0.5071, "step": 1050 }, { "epoch": 0.11, "learning_rate": 0.0001152, "loss": 0.5964, "step": 1060 }, { "epoch": 0.11, "learning_rate": 0.0001144, "loss": 0.5785, "step": 1070 }, { "epoch": 0.12, "learning_rate": 0.0001136, "loss": 0.5776, "step": 1080 }, { "epoch": 0.12, "learning_rate": 0.00011279999999999999, "loss": 0.5412, "step": 1090 }, { "epoch": 0.12, "learning_rate": 0.00011200000000000001, "loss": 0.5943, "step": 1100 }, { "epoch": 0.12, "learning_rate": 0.00011120000000000002, "loss": 0.4882, "step": 1110 }, { "epoch": 0.12, "learning_rate": 0.00011040000000000001, "loss": 0.523, "step": 1120 }, { "epoch": 0.12, "learning_rate": 0.00010960000000000001, "loss": 0.5669, "step": 1130 }, { "epoch": 0.12, "learning_rate": 0.00010880000000000002, "loss": 0.5745, "step": 1140 }, { "epoch": 0.12, "learning_rate": 0.00010800000000000001, "loss": 0.5419, "step": 1150 }, { "epoch": 0.12, "learning_rate": 0.00010720000000000002, "loss": 0.5433, "step": 1160 }, { "epoch": 0.12, "learning_rate": 0.00010640000000000001, "loss": 0.6327, "step": 1170 }, { "epoch": 0.13, "learning_rate": 0.0001056, "loss": 0.5044, "step": 1180 }, { "epoch": 0.13, "learning_rate": 0.00010480000000000001, "loss": 0.5386, "step": 1190 }, { "epoch": 0.13, "learning_rate": 0.00010400000000000001, "loss": 0.5734, "step": 1200 }, { "epoch": 0.13, "learning_rate": 0.0001032, "loss": 0.5193, "step": 1210 }, { "epoch": 0.13, "learning_rate": 0.00010240000000000001, "loss": 0.5335, "step": 1220 }, { "epoch": 0.13, "learning_rate": 0.0001016, "loss": 0.5776, "step": 1230 }, { "epoch": 0.13, "learning_rate": 0.00010080000000000001, "loss": 0.5472, "step": 1240 }, { "epoch": 0.13, "learning_rate": 0.0001, "loss": 0.5383, "step": 1250 }, { "epoch": 0.13, "learning_rate": 9.92e-05, "loss": 0.5626, "step": 1260 }, { "epoch": 0.14, "learning_rate": 9.84e-05, "loss": 0.5664, "step": 1270 }, { "epoch": 0.14, "learning_rate": 9.76e-05, "loss": 0.5044, "step": 1280 }, { "epoch": 0.14, "learning_rate": 9.680000000000001e-05, "loss": 0.5203, "step": 1290 }, { "epoch": 0.14, "learning_rate": 9.6e-05, "loss": 0.549, "step": 1300 }, { "epoch": 0.14, "learning_rate": 9.52e-05, "loss": 0.4875, "step": 1310 }, { "epoch": 0.14, "learning_rate": 9.44e-05, "loss": 0.552, "step": 1320 }, { "epoch": 0.14, "learning_rate": 9.360000000000001e-05, "loss": 0.5711, "step": 1330 }, { "epoch": 0.14, "learning_rate": 9.28e-05, "loss": 0.5239, "step": 1340 }, { "epoch": 0.14, "learning_rate": 9.200000000000001e-05, "loss": 0.5446, "step": 1350 }, { "epoch": 0.15, "learning_rate": 9.120000000000001e-05, "loss": 0.5091, "step": 1360 }, { "epoch": 0.15, "learning_rate": 9.04e-05, "loss": 0.5244, "step": 1370 }, { "epoch": 0.15, "learning_rate": 8.960000000000001e-05, "loss": 0.5609, "step": 1380 }, { "epoch": 0.15, "learning_rate": 8.88e-05, "loss": 0.5794, "step": 1390 }, { "epoch": 0.15, "learning_rate": 8.800000000000001e-05, "loss": 0.5486, "step": 1400 }, { "epoch": 0.15, "learning_rate": 8.72e-05, "loss": 0.5047, "step": 1410 }, { "epoch": 0.15, "learning_rate": 8.64e-05, "loss": 0.541, "step": 1420 }, { "epoch": 0.15, "learning_rate": 8.560000000000001e-05, "loss": 0.509, "step": 1430 }, { "epoch": 0.15, "learning_rate": 8.48e-05, "loss": 0.4901, "step": 1440 }, { "epoch": 0.15, "learning_rate": 8.4e-05, "loss": 0.5326, "step": 1450 }, { "epoch": 0.16, "learning_rate": 8.32e-05, "loss": 0.5754, "step": 1460 }, { "epoch": 0.16, "learning_rate": 8.24e-05, "loss": 0.5574, "step": 1470 }, { "epoch": 0.16, "learning_rate": 8.16e-05, "loss": 0.5752, "step": 1480 }, { "epoch": 0.16, "learning_rate": 8.080000000000001e-05, "loss": 0.5834, "step": 1490 }, { "epoch": 0.16, "learning_rate": 8e-05, "loss": 0.5168, "step": 1500 }, { "epoch": 0.16, "learning_rate": 7.920000000000001e-05, "loss": 0.5751, "step": 1510 }, { "epoch": 0.16, "learning_rate": 7.840000000000001e-05, "loss": 0.6164, "step": 1520 }, { "epoch": 0.16, "learning_rate": 7.76e-05, "loss": 0.542, "step": 1530 }, { "epoch": 0.16, "learning_rate": 7.680000000000001e-05, "loss": 0.5558, "step": 1540 }, { "epoch": 0.17, "learning_rate": 7.6e-05, "loss": 0.524, "step": 1550 }, { "epoch": 0.17, "learning_rate": 7.52e-05, "loss": 0.6232, "step": 1560 }, { "epoch": 0.17, "learning_rate": 7.44e-05, "loss": 0.4692, "step": 1570 }, { "epoch": 0.17, "learning_rate": 7.36e-05, "loss": 0.5604, "step": 1580 }, { "epoch": 0.17, "learning_rate": 7.280000000000001e-05, "loss": 0.5242, "step": 1590 }, { "epoch": 0.17, "learning_rate": 7.2e-05, "loss": 0.5455, "step": 1600 }, { "epoch": 0.17, "learning_rate": 7.12e-05, "loss": 0.5362, "step": 1610 }, { "epoch": 0.17, "learning_rate": 7.04e-05, "loss": 0.5257, "step": 1620 }, { "epoch": 0.17, "learning_rate": 6.96e-05, "loss": 0.5601, "step": 1630 }, { "epoch": 0.17, "learning_rate": 6.879999999999999e-05, "loss": 0.5484, "step": 1640 }, { "epoch": 0.18, "learning_rate": 6.800000000000001e-05, "loss": 0.5787, "step": 1650 }, { "epoch": 0.18, "learning_rate": 6.720000000000001e-05, "loss": 0.6185, "step": 1660 }, { "epoch": 0.18, "learning_rate": 6.64e-05, "loss": 0.5453, "step": 1670 }, { "epoch": 0.18, "learning_rate": 6.560000000000001e-05, "loss": 0.5297, "step": 1680 }, { "epoch": 0.18, "learning_rate": 6.48e-05, "loss": 0.5538, "step": 1690 }, { "epoch": 0.18, "learning_rate": 6.400000000000001e-05, "loss": 0.5651, "step": 1700 }, { "epoch": 0.18, "learning_rate": 6.32e-05, "loss": 0.5764, "step": 1710 }, { "epoch": 0.18, "learning_rate": 6.24e-05, "loss": 0.5414, "step": 1720 }, { "epoch": 0.18, "learning_rate": 6.16e-05, "loss": 0.5264, "step": 1730 }, { "epoch": 0.19, "learning_rate": 6.08e-05, "loss": 0.5472, "step": 1740 }, { "epoch": 0.19, "learning_rate": 6e-05, "loss": 0.5876, "step": 1750 }, { "epoch": 0.19, "learning_rate": 5.92e-05, "loss": 0.5346, "step": 1760 }, { "epoch": 0.19, "learning_rate": 5.8399999999999997e-05, "loss": 0.5856, "step": 1770 }, { "epoch": 0.19, "learning_rate": 5.76e-05, "loss": 0.592, "step": 1780 }, { "epoch": 0.19, "learning_rate": 5.68e-05, "loss": 0.535, "step": 1790 }, { "epoch": 0.19, "learning_rate": 5.6000000000000006e-05, "loss": 0.5427, "step": 1800 }, { "epoch": 0.19, "learning_rate": 5.520000000000001e-05, "loss": 0.5296, "step": 1810 }, { "epoch": 0.19, "learning_rate": 5.440000000000001e-05, "loss": 0.5307, "step": 1820 }, { "epoch": 0.2, "learning_rate": 5.360000000000001e-05, "loss": 0.5943, "step": 1830 }, { "epoch": 0.2, "learning_rate": 5.28e-05, "loss": 0.5458, "step": 1840 }, { "epoch": 0.2, "learning_rate": 5.2000000000000004e-05, "loss": 0.5331, "step": 1850 }, { "epoch": 0.2, "learning_rate": 5.1200000000000004e-05, "loss": 0.5342, "step": 1860 }, { "epoch": 0.2, "learning_rate": 5.0400000000000005e-05, "loss": 0.6056, "step": 1870 }, { "epoch": 0.2, "learning_rate": 4.96e-05, "loss": 0.5618, "step": 1880 }, { "epoch": 0.2, "learning_rate": 4.88e-05, "loss": 0.5471, "step": 1890 }, { "epoch": 0.2, "learning_rate": 4.8e-05, "loss": 0.5799, "step": 1900 }, { "epoch": 0.2, "learning_rate": 4.72e-05, "loss": 0.5605, "step": 1910 }, { "epoch": 0.2, "learning_rate": 4.64e-05, "loss": 0.5294, "step": 1920 }, { "epoch": 0.21, "learning_rate": 4.5600000000000004e-05, "loss": 0.469, "step": 1930 }, { "epoch": 0.21, "learning_rate": 4.4800000000000005e-05, "loss": 0.5056, "step": 1940 }, { "epoch": 0.21, "learning_rate": 4.4000000000000006e-05, "loss": 0.5084, "step": 1950 }, { "epoch": 0.21, "learning_rate": 4.32e-05, "loss": 0.4769, "step": 1960 }, { "epoch": 0.21, "learning_rate": 4.24e-05, "loss": 0.5651, "step": 1970 }, { "epoch": 0.21, "learning_rate": 4.16e-05, "loss": 0.5117, "step": 1980 }, { "epoch": 0.21, "learning_rate": 4.08e-05, "loss": 0.5165, "step": 1990 }, { "epoch": 0.21, "learning_rate": 4e-05, "loss": 0.5167, "step": 2000 }, { "epoch": 0.21, "learning_rate": 3.9200000000000004e-05, "loss": 0.5029, "step": 2010 }, { "epoch": 0.22, "learning_rate": 3.8400000000000005e-05, "loss": 0.5706, "step": 2020 }, { "epoch": 0.22, "learning_rate": 3.76e-05, "loss": 0.5751, "step": 2030 }, { "epoch": 0.22, "learning_rate": 3.68e-05, "loss": 0.6197, "step": 2040 }, { "epoch": 0.22, "learning_rate": 3.6e-05, "loss": 0.5016, "step": 2050 }, { "epoch": 0.22, "learning_rate": 3.52e-05, "loss": 0.5219, "step": 2060 }, { "epoch": 0.22, "learning_rate": 3.4399999999999996e-05, "loss": 0.5708, "step": 2070 }, { "epoch": 0.22, "learning_rate": 3.3600000000000004e-05, "loss": 0.5139, "step": 2080 }, { "epoch": 0.22, "learning_rate": 3.2800000000000004e-05, "loss": 0.5225, "step": 2090 }, { "epoch": 0.22, "learning_rate": 3.2000000000000005e-05, "loss": 0.5606, "step": 2100 }, { "epoch": 0.23, "learning_rate": 3.12e-05, "loss": 0.5264, "step": 2110 }, { "epoch": 0.23, "learning_rate": 3.04e-05, "loss": 0.5016, "step": 2120 }, { "epoch": 0.23, "learning_rate": 2.96e-05, "loss": 0.5609, "step": 2130 }, { "epoch": 0.23, "learning_rate": 2.88e-05, "loss": 0.5286, "step": 2140 }, { "epoch": 0.23, "learning_rate": 2.8000000000000003e-05, "loss": 0.5327, "step": 2150 }, { "epoch": 0.23, "learning_rate": 2.7200000000000004e-05, "loss": 0.5381, "step": 2160 }, { "epoch": 0.23, "learning_rate": 2.64e-05, "loss": 0.5476, "step": 2170 }, { "epoch": 0.23, "learning_rate": 2.5600000000000002e-05, "loss": 0.5325, "step": 2180 }, { "epoch": 0.23, "learning_rate": 2.48e-05, "loss": 0.571, "step": 2190 }, { "epoch": 0.23, "learning_rate": 2.4e-05, "loss": 0.5726, "step": 2200 }, { "epoch": 0.24, "learning_rate": 2.32e-05, "loss": 0.5485, "step": 2210 }, { "epoch": 0.24, "learning_rate": 2.2400000000000002e-05, "loss": 0.5847, "step": 2220 }, { "epoch": 0.24, "learning_rate": 2.16e-05, "loss": 0.5154, "step": 2230 }, { "epoch": 0.24, "learning_rate": 2.08e-05, "loss": 0.6066, "step": 2240 }, { "epoch": 0.24, "learning_rate": 2e-05, "loss": 0.5618, "step": 2250 }, { "epoch": 0.24, "learning_rate": 1.9200000000000003e-05, "loss": 0.5221, "step": 2260 }, { "epoch": 0.24, "learning_rate": 1.84e-05, "loss": 0.5052, "step": 2270 }, { "epoch": 0.24, "learning_rate": 1.76e-05, "loss": 0.5675, "step": 2280 }, { "epoch": 0.24, "learning_rate": 1.6800000000000002e-05, "loss": 0.5574, "step": 2290 }, { "epoch": 0.25, "learning_rate": 1.6000000000000003e-05, "loss": 0.5942, "step": 2300 }, { "epoch": 0.25, "learning_rate": 1.52e-05, "loss": 0.6035, "step": 2310 }, { "epoch": 0.25, "learning_rate": 1.44e-05, "loss": 0.5454, "step": 2320 }, { "epoch": 0.25, "learning_rate": 1.3600000000000002e-05, "loss": 0.5231, "step": 2330 }, { "epoch": 0.25, "learning_rate": 1.2800000000000001e-05, "loss": 0.5558, "step": 2340 }, { "epoch": 0.25, "learning_rate": 1.2e-05, "loss": 0.5233, "step": 2350 }, { "epoch": 0.25, "learning_rate": 1.1200000000000001e-05, "loss": 0.4485, "step": 2360 }, { "epoch": 0.25, "learning_rate": 1.04e-05, "loss": 0.5184, "step": 2370 }, { "epoch": 0.25, "learning_rate": 9.600000000000001e-06, "loss": 0.5298, "step": 2380 }, { "epoch": 0.25, "learning_rate": 8.8e-06, "loss": 0.5317, "step": 2390 }, { "epoch": 0.26, "learning_rate": 8.000000000000001e-06, "loss": 0.5399, "step": 2400 }, { "epoch": 0.26, "learning_rate": 7.2e-06, "loss": 0.5639, "step": 2410 }, { "epoch": 0.26, "learning_rate": 6.4000000000000006e-06, "loss": 0.5616, "step": 2420 }, { "epoch": 0.26, "learning_rate": 5.600000000000001e-06, "loss": 0.5761, "step": 2430 }, { "epoch": 0.26, "learning_rate": 4.800000000000001e-06, "loss": 0.5912, "step": 2440 }, { "epoch": 0.26, "learning_rate": 4.000000000000001e-06, "loss": 0.601, "step": 2450 }, { "epoch": 0.26, "learning_rate": 3.2000000000000003e-06, "loss": 0.5664, "step": 2460 }, { "epoch": 0.26, "learning_rate": 2.4000000000000003e-06, "loss": 0.6286, "step": 2470 }, { "epoch": 0.26, "learning_rate": 1.6000000000000001e-06, "loss": 0.4989, "step": 2480 }, { "epoch": 0.27, "learning_rate": 8.000000000000001e-07, "loss": 0.5339, "step": 2490 }, { "epoch": 0.27, "learning_rate": 0.0, "loss": 0.525, "step": 2500 } ], "logging_steps": 10, "max_steps": 2500, "num_train_epochs": 1, "save_steps": 500, "total_flos": 4.243363257237504e+17, "trial_name": null, "trial_params": null }