{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.3535169785169785, "eval_steps": 500, "global_step": 500000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.2499999999999995e-06, "loss": 7.4049, "step": 500 }, { "epoch": 0.0, "learning_rate": 1.2499999999999999e-05, "loss": 4.7791, "step": 1000 }, { "epoch": 0.0, "learning_rate": 1.875e-05, "loss": 3.8115, "step": 1500 }, { "epoch": 0.01, "learning_rate": 2.4999999999999998e-05, "loss": 3.3298, "step": 2000 }, { "epoch": 0.01, "learning_rate": 3.125e-05, "loss": 3.0365, "step": 2500 }, { "epoch": 0.01, "learning_rate": 3.75e-05, "loss": 2.8445, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.375e-05, "loss": 2.7101, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.9999999999999996e-05, "loss": 2.6043, "step": 4000 }, { "epoch": 0.01, "learning_rate": 5.625e-05, "loss": 2.5286, "step": 4500 }, { "epoch": 0.01, "learning_rate": 6.25e-05, "loss": 2.467, "step": 5000 }, { "epoch": 0.01, "learning_rate": 6.874999999999999e-05, "loss": 2.4192, "step": 5500 }, { "epoch": 0.02, "learning_rate": 7.5e-05, "loss": 2.3785, "step": 6000 }, { "epoch": 0.02, "learning_rate": 8.124999999999998e-05, "loss": 2.3456, "step": 6500 }, { "epoch": 0.02, "learning_rate": 8.75e-05, "loss": 2.3183, "step": 7000 }, { "epoch": 0.02, "learning_rate": 9.374999999999999e-05, "loss": 2.3005, "step": 7500 }, { "epoch": 0.02, "learning_rate": 9.999999999999999e-05, "loss": 2.2808, "step": 8000 }, { "epoch": 0.02, "learning_rate": 0.00010625, "loss": 2.2722, "step": 8500 }, { "epoch": 0.02, "learning_rate": 0.0001125, "loss": 2.2577, "step": 9000 }, { "epoch": 0.03, "learning_rate": 0.00011874999999999999, "loss": 2.2479, "step": 9500 }, { "epoch": 0.03, "learning_rate": 0.000125, "loss": 2.2452, "step": 10000 }, { "epoch": 0.03, "learning_rate": 0.00013125, "loss": 2.2419, "step": 10500 }, { "epoch": 0.03, "learning_rate": 0.00013749999999999998, "loss": 2.2345, "step": 11000 }, { "epoch": 0.03, "learning_rate": 0.00014375, "loss": 2.2276, "step": 11500 }, { "epoch": 0.03, "learning_rate": 0.00015, "loss": 2.2285, "step": 12000 }, { "epoch": 0.03, "learning_rate": 0.00015625, "loss": 2.2178, "step": 12500 }, { "epoch": 0.04, "learning_rate": 0.00016249999999999997, "loss": 2.2214, "step": 13000 }, { "epoch": 0.04, "learning_rate": 0.00016874999999999998, "loss": 2.2269, "step": 13500 }, { "epoch": 0.04, "learning_rate": 0.000175, "loss": 2.2322, "step": 14000 }, { "epoch": 0.04, "learning_rate": 0.00018124999999999996, "loss": 2.2292, "step": 14500 }, { "epoch": 0.04, "learning_rate": 0.00018749999999999998, "loss": 2.2274, "step": 15000 }, { "epoch": 0.04, "learning_rate": 0.00019375, "loss": 2.232, "step": 15500 }, { "epoch": 0.04, "learning_rate": 0.00019999999999999998, "loss": 2.2316, "step": 16000 }, { "epoch": 0.04, "learning_rate": 0.00020624999999999997, "loss": 2.238, "step": 16500 }, { "epoch": 0.05, "learning_rate": 0.0002125, "loss": 2.2426, "step": 17000 }, { "epoch": 0.05, "learning_rate": 0.00021874999999999998, "loss": 2.2408, "step": 17500 }, { "epoch": 0.05, "learning_rate": 0.000225, "loss": 2.2456, "step": 18000 }, { "epoch": 0.05, "learning_rate": 0.00023124999999999998, "loss": 2.2473, "step": 18500 }, { "epoch": 0.05, "learning_rate": 0.00023749999999999997, "loss": 2.2549, "step": 19000 }, { "epoch": 0.05, "learning_rate": 0.00024375, "loss": 2.2542, "step": 19500 }, { "epoch": 0.05, "learning_rate": 0.00025, "loss": 2.2604, "step": 20000 }, { "epoch": 0.06, "learning_rate": 0.00025624999999999997, "loss": 2.2626, "step": 20500 }, { "epoch": 0.06, "learning_rate": 0.0002625, "loss": 2.2694, "step": 21000 }, { "epoch": 0.06, "learning_rate": 0.00026875, "loss": 2.2724, "step": 21500 }, { "epoch": 0.06, "learning_rate": 0.00027499999999999996, "loss": 2.2722, "step": 22000 }, { "epoch": 0.06, "learning_rate": 0.00028125, "loss": 2.2807, "step": 22500 }, { "epoch": 0.06, "learning_rate": 0.0002875, "loss": 2.2846, "step": 23000 }, { "epoch": 0.06, "learning_rate": 0.00029374999999999996, "loss": 2.292, "step": 23500 }, { "epoch": 0.06, "learning_rate": 0.0003, "loss": 2.297, "step": 24000 }, { "epoch": 0.07, "learning_rate": 0.0002996848739495798, "loss": 2.3038, "step": 24500 }, { "epoch": 0.07, "learning_rate": 0.00029936974789915966, "loss": 2.2998, "step": 25000 }, { "epoch": 0.07, "learning_rate": 0.0002990546218487395, "loss": 2.2977, "step": 25500 }, { "epoch": 0.07, "learning_rate": 0.0002987394957983193, "loss": 2.2925, "step": 26000 }, { "epoch": 0.07, "learning_rate": 0.0002984243697478991, "loss": 2.2943, "step": 26500 }, { "epoch": 0.07, "learning_rate": 0.000298109243697479, "loss": 2.2899, "step": 27000 }, { "epoch": 0.07, "learning_rate": 0.0002977941176470588, "loss": 2.2899, "step": 27500 }, { "epoch": 0.08, "learning_rate": 0.0002974789915966386, "loss": 2.2806, "step": 28000 }, { "epoch": 0.08, "learning_rate": 0.0002971638655462185, "loss": 2.2823, "step": 28500 }, { "epoch": 0.08, "learning_rate": 0.0002968487394957983, "loss": 2.2795, "step": 29000 }, { "epoch": 0.08, "learning_rate": 0.00029653361344537813, "loss": 2.2736, "step": 29500 }, { "epoch": 0.08, "learning_rate": 0.00029621848739495795, "loss": 2.2704, "step": 30000 }, { "epoch": 0.08, "learning_rate": 0.0002959033613445378, "loss": 2.269, "step": 30500 }, { "epoch": 0.08, "learning_rate": 0.00029558823529411763, "loss": 2.2601, "step": 31000 }, { "epoch": 0.09, "learning_rate": 0.00029527310924369745, "loss": 2.2674, "step": 31500 }, { "epoch": 0.09, "learning_rate": 0.00029495798319327727, "loss": 2.2575, "step": 32000 }, { "epoch": 0.09, "learning_rate": 0.0002946428571428571, "loss": 2.2583, "step": 32500 }, { "epoch": 0.09, "learning_rate": 0.00029432773109243696, "loss": 2.2494, "step": 33000 }, { "epoch": 0.09, "learning_rate": 0.0002940126050420168, "loss": 2.2412, "step": 33500 }, { "epoch": 0.09, "learning_rate": 0.0002936974789915966, "loss": 2.2453, "step": 34000 }, { "epoch": 0.09, "learning_rate": 0.0002933823529411764, "loss": 2.2401, "step": 34500 }, { "epoch": 0.09, "learning_rate": 0.0002930672268907563, "loss": 2.2375, "step": 35000 }, { "epoch": 0.1, "learning_rate": 0.0002927521008403361, "loss": 2.2302, "step": 35500 }, { "epoch": 0.1, "learning_rate": 0.0002924369747899159, "loss": 2.2275, "step": 36000 }, { "epoch": 0.1, "learning_rate": 0.0002921218487394958, "loss": 2.223, "step": 36500 }, { "epoch": 0.1, "learning_rate": 0.0002918067226890756, "loss": 2.2252, "step": 37000 }, { "epoch": 0.1, "learning_rate": 0.0002914915966386554, "loss": 2.2238, "step": 37500 }, { "epoch": 0.1, "learning_rate": 0.00029117647058823524, "loss": 2.2206, "step": 38000 }, { "epoch": 0.1, "learning_rate": 0.0002908613445378151, "loss": 2.2162, "step": 38500 }, { "epoch": 0.11, "learning_rate": 0.00029054621848739493, "loss": 2.2141, "step": 39000 }, { "epoch": 0.11, "learning_rate": 0.00029023109243697475, "loss": 2.2215, "step": 39500 }, { "epoch": 0.11, "learning_rate": 0.0002899159663865546, "loss": 2.2107, "step": 40000 }, { "epoch": 0.11, "learning_rate": 0.00028960084033613444, "loss": 2.2125, "step": 40500 }, { "epoch": 0.11, "learning_rate": 0.00028928571428571425, "loss": 2.2086, "step": 41000 }, { "epoch": 0.11, "learning_rate": 0.00028897058823529407, "loss": 2.2097, "step": 41500 }, { "epoch": 0.11, "learning_rate": 0.00028865546218487394, "loss": 2.1996, "step": 42000 }, { "epoch": 0.12, "learning_rate": 0.00028834033613445376, "loss": 2.1972, "step": 42500 }, { "epoch": 0.12, "learning_rate": 0.0002880252100840336, "loss": 2.1906, "step": 43000 }, { "epoch": 0.12, "learning_rate": 0.00028771008403361345, "loss": 2.2015, "step": 43500 }, { "epoch": 0.12, "learning_rate": 0.00028739495798319327, "loss": 2.1927, "step": 44000 }, { "epoch": 0.12, "learning_rate": 0.0002870798319327731, "loss": 2.1889, "step": 44500 }, { "epoch": 0.12, "learning_rate": 0.00028676470588235296, "loss": 2.1904, "step": 45000 }, { "epoch": 0.12, "learning_rate": 0.0002864495798319328, "loss": 2.1809, "step": 45500 }, { "epoch": 0.12, "learning_rate": 0.0002861344537815126, "loss": 2.1777, "step": 46000 }, { "epoch": 0.13, "learning_rate": 0.0002858193277310924, "loss": 2.1772, "step": 46500 }, { "epoch": 0.13, "learning_rate": 0.0002855042016806722, "loss": 2.1714, "step": 47000 }, { "epoch": 0.13, "learning_rate": 0.00028518907563025204, "loss": 2.171, "step": 47500 }, { "epoch": 0.13, "learning_rate": 0.0002848739495798319, "loss": 2.1814, "step": 48000 }, { "epoch": 0.13, "learning_rate": 0.00028455882352941173, "loss": 2.1713, "step": 48500 }, { "epoch": 0.13, "learning_rate": 0.00028424369747899155, "loss": 2.168, "step": 49000 }, { "epoch": 0.13, "learning_rate": 0.00028392857142857137, "loss": 2.1635, "step": 49500 }, { "epoch": 0.14, "learning_rate": 0.00028361344537815124, "loss": 2.1689, "step": 50000 }, { "epoch": 0.14, "learning_rate": 0.00028329831932773106, "loss": 2.1614, "step": 50500 }, { "epoch": 0.14, "learning_rate": 0.0002829831932773109, "loss": 2.1594, "step": 51000 }, { "epoch": 0.14, "learning_rate": 0.00028266806722689075, "loss": 2.1585, "step": 51500 }, { "epoch": 0.14, "learning_rate": 0.00028235294117647056, "loss": 2.1639, "step": 52000 }, { "epoch": 0.14, "learning_rate": 0.0002820378151260504, "loss": 2.1574, "step": 52500 }, { "epoch": 0.14, "learning_rate": 0.00028172268907563025, "loss": 2.1512, "step": 53000 }, { "epoch": 0.14, "learning_rate": 0.00028140756302521007, "loss": 2.1571, "step": 53500 }, { "epoch": 0.15, "learning_rate": 0.0002810924369747899, "loss": 2.1513, "step": 54000 }, { "epoch": 0.15, "learning_rate": 0.0002807773109243697, "loss": 2.1503, "step": 54500 }, { "epoch": 0.15, "learning_rate": 0.0002804621848739496, "loss": 2.1462, "step": 55000 }, { "epoch": 0.15, "learning_rate": 0.0002801470588235294, "loss": 2.1449, "step": 55500 }, { "epoch": 0.15, "learning_rate": 0.0002798319327731092, "loss": 2.1383, "step": 56000 }, { "epoch": 0.15, "learning_rate": 0.0002795168067226891, "loss": 2.1431, "step": 56500 }, { "epoch": 0.15, "learning_rate": 0.0002792016806722689, "loss": 2.1349, "step": 57000 }, { "epoch": 0.16, "learning_rate": 0.0002788865546218487, "loss": 2.1423, "step": 57500 }, { "epoch": 0.16, "learning_rate": 0.00027857142857142854, "loss": 2.1375, "step": 58000 }, { "epoch": 0.16, "learning_rate": 0.0002782563025210084, "loss": 2.1261, "step": 58500 }, { "epoch": 0.16, "learning_rate": 0.0002779411764705882, "loss": 2.1277, "step": 59000 }, { "epoch": 0.16, "learning_rate": 0.00027762605042016804, "loss": 2.1204, "step": 59500 }, { "epoch": 0.16, "learning_rate": 0.0002773109243697479, "loss": 2.1277, "step": 60000 }, { "epoch": 0.16, "learning_rate": 0.00027699579831932773, "loss": 2.1246, "step": 60500 }, { "epoch": 0.17, "learning_rate": 0.00027668067226890755, "loss": 2.1207, "step": 61000 }, { "epoch": 0.17, "learning_rate": 0.00027636554621848737, "loss": 2.1161, "step": 61500 }, { "epoch": 0.17, "learning_rate": 0.0002760504201680672, "loss": 2.1198, "step": 62000 }, { "epoch": 0.17, "learning_rate": 0.000275735294117647, "loss": 2.1122, "step": 62500 }, { "epoch": 0.17, "learning_rate": 0.00027542016806722687, "loss": 2.1113, "step": 63000 }, { "epoch": 0.17, "learning_rate": 0.0002751050420168067, "loss": 2.1164, "step": 63500 }, { "epoch": 0.17, "learning_rate": 0.0002747899159663865, "loss": 2.1096, "step": 64000 }, { "epoch": 0.17, "learning_rate": 0.0002744747899159664, "loss": 2.106, "step": 64500 }, { "epoch": 0.18, "learning_rate": 0.0002741596638655462, "loss": 2.1015, "step": 65000 }, { "epoch": 0.18, "learning_rate": 0.000273844537815126, "loss": 2.1041, "step": 65500 }, { "epoch": 0.18, "learning_rate": 0.00027352941176470583, "loss": 2.0991, "step": 66000 }, { "epoch": 0.18, "learning_rate": 0.0002732142857142857, "loss": 2.0956, "step": 66500 }, { "epoch": 0.18, "learning_rate": 0.0002728991596638655, "loss": 2.0945, "step": 67000 }, { "epoch": 0.18, "learning_rate": 0.00027258403361344534, "loss": 2.0958, "step": 67500 }, { "epoch": 0.18, "learning_rate": 0.0002722689075630252, "loss": 2.1115, "step": 68000 }, { "epoch": 0.19, "learning_rate": 0.000271953781512605, "loss": 2.1205, "step": 68500 }, { "epoch": 0.19, "learning_rate": 0.00027163865546218484, "loss": 2.1098, "step": 69000 }, { "epoch": 0.19, "learning_rate": 0.00027132352941176466, "loss": 2.1046, "step": 69500 }, { "epoch": 0.19, "learning_rate": 0.00027100840336134453, "loss": 2.1013, "step": 70000 }, { "epoch": 0.19, "learning_rate": 0.00027069327731092435, "loss": 2.0989, "step": 70500 }, { "epoch": 0.19, "learning_rate": 0.00027037815126050417, "loss": 2.0927, "step": 71000 }, { "epoch": 0.19, "learning_rate": 0.00027006302521008404, "loss": 2.0827, "step": 71500 }, { "epoch": 0.19, "learning_rate": 0.00026974789915966386, "loss": 2.0851, "step": 72000 }, { "epoch": 0.2, "learning_rate": 0.0002694327731092437, "loss": 2.0841, "step": 72500 }, { "epoch": 0.2, "learning_rate": 0.0002691176470588235, "loss": 2.0816, "step": 73000 }, { "epoch": 0.2, "learning_rate": 0.00026880252100840336, "loss": 2.0761, "step": 73500 }, { "epoch": 0.2, "learning_rate": 0.0002684873949579832, "loss": 2.0745, "step": 74000 }, { "epoch": 0.2, "learning_rate": 0.000268172268907563, "loss": 2.0762, "step": 74500 }, { "epoch": 0.2, "learning_rate": 0.00026785714285714287, "loss": 2.0704, "step": 75000 }, { "epoch": 0.2, "learning_rate": 0.0002675420168067227, "loss": 2.0737, "step": 75500 }, { "epoch": 0.21, "learning_rate": 0.0002672268907563025, "loss": 2.0687, "step": 76000 }, { "epoch": 0.21, "learning_rate": 0.0002669117647058823, "loss": 2.0743, "step": 76500 }, { "epoch": 0.21, "learning_rate": 0.00026659663865546214, "loss": 2.0758, "step": 77000 }, { "epoch": 0.21, "learning_rate": 0.00026628151260504196, "loss": 2.0677, "step": 77500 }, { "epoch": 0.21, "learning_rate": 0.00026596638655462183, "loss": 2.0643, "step": 78000 }, { "epoch": 0.21, "learning_rate": 0.00026565126050420165, "loss": 2.0588, "step": 78500 }, { "epoch": 0.21, "learning_rate": 0.00026533613445378146, "loss": 2.0676, "step": 79000 }, { "epoch": 0.22, "learning_rate": 0.00026502100840336134, "loss": 2.065, "step": 79500 }, { "epoch": 0.22, "learning_rate": 0.00026470588235294115, "loss": 2.064, "step": 80000 }, { "epoch": 0.22, "learning_rate": 0.00026439075630252097, "loss": 2.0579, "step": 80500 }, { "epoch": 0.22, "learning_rate": 0.0002640756302521008, "loss": 2.0614, "step": 81000 }, { "epoch": 0.22, "learning_rate": 0.00026376050420168066, "loss": 2.0664, "step": 81500 }, { "epoch": 0.22, "learning_rate": 0.0002634453781512605, "loss": 2.0648, "step": 82000 }, { "epoch": 0.22, "learning_rate": 0.0002631302521008403, "loss": 2.0564, "step": 82500 }, { "epoch": 0.22, "learning_rate": 0.00026281512605042017, "loss": 2.0569, "step": 83000 }, { "epoch": 0.23, "learning_rate": 0.0002625, "loss": 2.0517, "step": 83500 }, { "epoch": 0.23, "learning_rate": 0.0002621848739495798, "loss": 2.0453, "step": 84000 }, { "epoch": 0.23, "learning_rate": 0.0002618697478991596, "loss": 2.0447, "step": 84500 }, { "epoch": 0.23, "learning_rate": 0.0002615546218487395, "loss": 2.0431, "step": 85000 }, { "epoch": 0.23, "learning_rate": 0.0002612394957983193, "loss": 2.0414, "step": 85500 }, { "epoch": 0.23, "learning_rate": 0.0002609243697478991, "loss": 2.0381, "step": 86000 }, { "epoch": 0.23, "learning_rate": 0.000260609243697479, "loss": 2.0392, "step": 86500 }, { "epoch": 0.24, "learning_rate": 0.0002602941176470588, "loss": 2.0317, "step": 87000 }, { "epoch": 0.24, "learning_rate": 0.00025997899159663863, "loss": 2.0338, "step": 87500 }, { "epoch": 0.24, "learning_rate": 0.00025966386554621845, "loss": 2.0348, "step": 88000 }, { "epoch": 0.24, "learning_rate": 0.0002593487394957983, "loss": 2.0349, "step": 88500 }, { "epoch": 0.24, "learning_rate": 0.00025903361344537814, "loss": 2.0295, "step": 89000 }, { "epoch": 0.24, "learning_rate": 0.00025871848739495796, "loss": 2.0253, "step": 89500 }, { "epoch": 0.24, "learning_rate": 0.00025840336134453783, "loss": 2.0272, "step": 90000 }, { "epoch": 0.24, "learning_rate": 0.00025808823529411764, "loss": 2.0273, "step": 90500 }, { "epoch": 0.25, "learning_rate": 0.00025777310924369746, "loss": 2.0295, "step": 91000 }, { "epoch": 0.25, "learning_rate": 0.0002574579831932773, "loss": 2.0284, "step": 91500 }, { "epoch": 0.25, "learning_rate": 0.0002571428571428571, "loss": 2.0335, "step": 92000 }, { "epoch": 0.25, "learning_rate": 0.0002568277310924369, "loss": 2.032, "step": 92500 }, { "epoch": 0.25, "learning_rate": 0.0002565126050420168, "loss": 2.0256, "step": 93000 }, { "epoch": 0.25, "learning_rate": 0.0002561974789915966, "loss": 2.0278, "step": 93500 }, { "epoch": 0.25, "learning_rate": 0.0002558823529411764, "loss": 2.0226, "step": 94000 }, { "epoch": 0.26, "learning_rate": 0.0002555672268907563, "loss": 2.0186, "step": 94500 }, { "epoch": 0.26, "learning_rate": 0.0002552521008403361, "loss": 2.0111, "step": 95000 }, { "epoch": 0.26, "learning_rate": 0.00025493697478991593, "loss": 2.0139, "step": 95500 }, { "epoch": 0.26, "learning_rate": 0.00025462184873949575, "loss": 2.0092, "step": 96000 }, { "epoch": 0.26, "learning_rate": 0.0002543067226890756, "loss": 2.0139, "step": 96500 }, { "epoch": 0.26, "learning_rate": 0.00025399159663865543, "loss": 2.0017, "step": 97000 }, { "epoch": 0.26, "learning_rate": 0.00025367647058823525, "loss": 2.0061, "step": 97500 }, { "epoch": 0.27, "learning_rate": 0.0002533613445378151, "loss": 2.0049, "step": 98000 }, { "epoch": 0.27, "learning_rate": 0.00025304621848739494, "loss": 2.0054, "step": 98500 }, { "epoch": 0.27, "learning_rate": 0.00025273109243697476, "loss": 2.0045, "step": 99000 }, { "epoch": 0.27, "learning_rate": 0.0002524159663865546, "loss": 2.0043, "step": 99500 }, { "epoch": 0.27, "learning_rate": 0.00025210084033613445, "loss": 2.0007, "step": 100000 }, { "epoch": 0.27, "learning_rate": 0.00025178571428571426, "loss": 2.0047, "step": 100500 }, { "epoch": 0.27, "learning_rate": 0.0002514705882352941, "loss": 2.0061, "step": 101000 }, { "epoch": 0.27, "learning_rate": 0.00025115546218487395, "loss": 2.0006, "step": 101500 }, { "epoch": 0.28, "learning_rate": 0.00025084033613445377, "loss": 1.9995, "step": 102000 }, { "epoch": 0.28, "learning_rate": 0.0002505252100840336, "loss": 1.9997, "step": 102500 }, { "epoch": 0.28, "learning_rate": 0.00025021008403361346, "loss": 1.9942, "step": 103000 }, { "epoch": 0.28, "learning_rate": 0.0002498949579831933, "loss": 1.9951, "step": 103500 }, { "epoch": 0.28, "learning_rate": 0.0002495798319327731, "loss": 1.9872, "step": 104000 }, { "epoch": 0.28, "learning_rate": 0.0002492647058823529, "loss": 1.9908, "step": 104500 }, { "epoch": 0.28, "learning_rate": 0.0002489495798319328, "loss": 1.9933, "step": 105000 }, { "epoch": 0.29, "learning_rate": 0.0002486344537815126, "loss": 1.9884, "step": 105500 }, { "epoch": 0.29, "learning_rate": 0.0002483193277310924, "loss": 1.9885, "step": 106000 }, { "epoch": 0.29, "learning_rate": 0.00024800420168067224, "loss": 1.9881, "step": 106500 }, { "epoch": 0.29, "learning_rate": 0.00024768907563025205, "loss": 1.9842, "step": 107000 }, { "epoch": 0.29, "learning_rate": 0.00024737394957983187, "loss": 1.9816, "step": 107500 }, { "epoch": 0.29, "learning_rate": 0.00024705882352941174, "loss": 1.9772, "step": 108000 }, { "epoch": 0.29, "learning_rate": 0.00024674369747899156, "loss": 1.9766, "step": 108500 }, { "epoch": 0.3, "learning_rate": 0.0002464285714285714, "loss": 1.9795, "step": 109000 }, { "epoch": 0.3, "learning_rate": 0.00024611344537815125, "loss": 1.9754, "step": 109500 }, { "epoch": 0.3, "learning_rate": 0.00024579831932773107, "loss": 1.9751, "step": 110000 }, { "epoch": 0.3, "learning_rate": 0.0002454831932773109, "loss": 1.9757, "step": 110500 }, { "epoch": 0.3, "learning_rate": 0.00024516806722689076, "loss": 1.9755, "step": 111000 }, { "epoch": 0.3, "learning_rate": 0.0002448529411764706, "loss": 1.9753, "step": 111500 }, { "epoch": 0.3, "learning_rate": 0.0002445378151260504, "loss": 1.9657, "step": 112000 }, { "epoch": 0.3, "learning_rate": 0.0002442226890756302, "loss": 1.9663, "step": 112500 }, { "epoch": 0.31, "learning_rate": 0.00024390756302521005, "loss": 1.9682, "step": 113000 }, { "epoch": 0.31, "learning_rate": 0.0002435924369747899, "loss": 1.971, "step": 113500 }, { "epoch": 0.31, "learning_rate": 0.00024327731092436971, "loss": 1.9687, "step": 114000 }, { "epoch": 0.31, "learning_rate": 0.00024296218487394956, "loss": 1.9826, "step": 114500 }, { "epoch": 0.31, "learning_rate": 0.0002426470588235294, "loss": 1.9643, "step": 115000 }, { "epoch": 0.31, "learning_rate": 0.00024233193277310922, "loss": 1.9708, "step": 115500 }, { "epoch": 0.31, "learning_rate": 0.00024201680672268907, "loss": 1.9628, "step": 116000 }, { "epoch": 0.32, "learning_rate": 0.00024170168067226888, "loss": 1.9633, "step": 116500 }, { "epoch": 0.32, "learning_rate": 0.00024138655462184873, "loss": 1.9607, "step": 117000 }, { "epoch": 0.32, "learning_rate": 0.00024107142857142857, "loss": 1.9574, "step": 117500 }, { "epoch": 0.32, "learning_rate": 0.0002407563025210084, "loss": 1.9611, "step": 118000 }, { "epoch": 0.32, "learning_rate": 0.00024044117647058823, "loss": 1.9575, "step": 118500 }, { "epoch": 0.32, "learning_rate": 0.00024012605042016805, "loss": 1.9538, "step": 119000 }, { "epoch": 0.32, "learning_rate": 0.0002398109243697479, "loss": 1.9573, "step": 119500 }, { "epoch": 0.32, "learning_rate": 0.00023949579831932771, "loss": 1.9601, "step": 120000 }, { "epoch": 0.33, "learning_rate": 0.00023918067226890756, "loss": 1.9539, "step": 120500 }, { "epoch": 0.33, "learning_rate": 0.00023886554621848735, "loss": 1.9532, "step": 121000 }, { "epoch": 0.33, "learning_rate": 0.0002385504201680672, "loss": 1.951, "step": 121500 }, { "epoch": 0.33, "learning_rate": 0.000238235294117647, "loss": 1.9554, "step": 122000 }, { "epoch": 0.33, "learning_rate": 0.00023792016806722686, "loss": 1.9472, "step": 122500 }, { "epoch": 0.33, "learning_rate": 0.0002376050420168067, "loss": 1.9462, "step": 123000 }, { "epoch": 0.33, "learning_rate": 0.00023728991596638652, "loss": 1.9524, "step": 123500 }, { "epoch": 0.34, "learning_rate": 0.00023697478991596636, "loss": 1.9525, "step": 124000 }, { "epoch": 0.34, "learning_rate": 0.00023665966386554618, "loss": 1.9444, "step": 124500 }, { "epoch": 0.34, "learning_rate": 0.00023634453781512602, "loss": 1.9404, "step": 125000 }, { "epoch": 0.34, "learning_rate": 0.00023602941176470587, "loss": 1.9431, "step": 125500 }, { "epoch": 0.34, "learning_rate": 0.00023571428571428569, "loss": 1.943, "step": 126000 }, { "epoch": 0.34, "learning_rate": 0.00023539915966386553, "loss": 1.9404, "step": 126500 }, { "epoch": 0.34, "learning_rate": 0.00023508403361344535, "loss": 1.9447, "step": 127000 }, { "epoch": 0.35, "learning_rate": 0.0002347689075630252, "loss": 1.9425, "step": 127500 }, { "epoch": 0.35, "learning_rate": 0.000234453781512605, "loss": 1.9376, "step": 128000 }, { "epoch": 0.35, "learning_rate": 0.00023413865546218485, "loss": 1.9374, "step": 128500 }, { "epoch": 0.35, "learning_rate": 0.0002338235294117647, "loss": 1.9373, "step": 129000 }, { "epoch": 0.35, "learning_rate": 0.00023350840336134452, "loss": 1.9364, "step": 129500 }, { "epoch": 0.35, "learning_rate": 0.00023319327731092436, "loss": 1.9344, "step": 130000 }, { "epoch": 0.35, "learning_rate": 0.00023287815126050418, "loss": 1.9299, "step": 130500 }, { "epoch": 0.35, "learning_rate": 0.00023256302521008402, "loss": 1.9243, "step": 131000 }, { "epoch": 0.36, "learning_rate": 0.00023224789915966384, "loss": 1.93, "step": 131500 }, { "epoch": 0.36, "learning_rate": 0.00023193277310924368, "loss": 1.925, "step": 132000 }, { "epoch": 0.36, "learning_rate": 0.00023161764705882353, "loss": 1.9256, "step": 132500 }, { "epoch": 0.36, "learning_rate": 0.00023130252100840335, "loss": 1.9334, "step": 133000 }, { "epoch": 0.36, "learning_rate": 0.0002309873949579832, "loss": 1.9266, "step": 133500 }, { "epoch": 0.36, "learning_rate": 0.000230672268907563, "loss": 1.9338, "step": 134000 }, { "epoch": 0.36, "learning_rate": 0.00023035714285714285, "loss": 1.9258, "step": 134500 }, { "epoch": 0.37, "learning_rate": 0.00023004201680672267, "loss": 1.9253, "step": 135000 }, { "epoch": 0.37, "learning_rate": 0.00022972689075630252, "loss": 1.9259, "step": 135500 }, { "epoch": 0.37, "learning_rate": 0.0002294117647058823, "loss": 1.9509, "step": 136000 }, { "epoch": 0.37, "learning_rate": 0.00022909663865546215, "loss": 1.9366, "step": 136500 }, { "epoch": 0.37, "learning_rate": 0.000228781512605042, "loss": 1.9323, "step": 137000 }, { "epoch": 0.37, "learning_rate": 0.0002284663865546218, "loss": 1.9298, "step": 137500 }, { "epoch": 0.37, "learning_rate": 0.00022815126050420166, "loss": 1.9611, "step": 138000 }, { "epoch": 0.37, "learning_rate": 0.00022783613445378147, "loss": 1.9599, "step": 138500 }, { "epoch": 0.38, "learning_rate": 0.00022752100840336132, "loss": 1.9537, "step": 139000 }, { "epoch": 0.38, "learning_rate": 0.00022720588235294114, "loss": 1.9308, "step": 139500 }, { "epoch": 0.38, "learning_rate": 0.00022689075630252098, "loss": 1.9208, "step": 140000 }, { "epoch": 0.38, "learning_rate": 0.00022657563025210083, "loss": 1.9233, "step": 140500 }, { "epoch": 0.38, "learning_rate": 0.00022626050420168064, "loss": 1.9233, "step": 141000 }, { "epoch": 0.38, "learning_rate": 0.0002259453781512605, "loss": 1.927, "step": 141500 }, { "epoch": 0.38, "learning_rate": 0.0002256302521008403, "loss": 1.9331, "step": 142000 }, { "epoch": 0.39, "learning_rate": 0.00022531512605042015, "loss": 1.9358, "step": 142500 }, { "epoch": 0.39, "learning_rate": 0.000225, "loss": 1.9415, "step": 143000 }, { "epoch": 0.39, "learning_rate": 0.0002246848739495798, "loss": 1.9427, "step": 143500 }, { "epoch": 0.39, "learning_rate": 0.00022436974789915966, "loss": 1.9216, "step": 144000 }, { "epoch": 0.39, "learning_rate": 0.00022405462184873947, "loss": 1.9196, "step": 144500 }, { "epoch": 0.39, "learning_rate": 0.00022373949579831932, "loss": 1.9109, "step": 145000 }, { "epoch": 0.39, "learning_rate": 0.00022342436974789914, "loss": 1.9166, "step": 145500 }, { "epoch": 0.4, "learning_rate": 0.00022310924369747898, "loss": 1.9231, "step": 146000 }, { "epoch": 0.4, "learning_rate": 0.00022279411764705882, "loss": 1.9091, "step": 146500 }, { "epoch": 0.4, "learning_rate": 0.00022247899159663864, "loss": 1.9096, "step": 147000 }, { "epoch": 0.4, "learning_rate": 0.00022216386554621849, "loss": 1.9107, "step": 147500 }, { "epoch": 0.4, "learning_rate": 0.0002218487394957983, "loss": 1.908, "step": 148000 }, { "epoch": 0.4, "learning_rate": 0.00022153361344537815, "loss": 1.9052, "step": 148500 }, { "epoch": 0.4, "learning_rate": 0.00022121848739495797, "loss": 1.9058, "step": 149000 }, { "epoch": 0.4, "learning_rate": 0.0002209033613445378, "loss": 1.9043, "step": 149500 }, { "epoch": 0.41, "learning_rate": 0.00022058823529411765, "loss": 1.9049, "step": 150000 }, { "epoch": 0.41, "learning_rate": 0.00022027310924369745, "loss": 1.9028, "step": 150500 }, { "epoch": 0.41, "learning_rate": 0.0002199579831932773, "loss": 1.9034, "step": 151000 }, { "epoch": 0.41, "learning_rate": 0.0002196428571428571, "loss": 1.8976, "step": 151500 }, { "epoch": 0.41, "learning_rate": 0.00021932773109243695, "loss": 1.8957, "step": 152000 }, { "epoch": 0.41, "learning_rate": 0.00021901260504201677, "loss": 1.895, "step": 152500 }, { "epoch": 0.41, "learning_rate": 0.00021869747899159661, "loss": 1.8944, "step": 153000 }, { "epoch": 0.42, "learning_rate": 0.00021838235294117643, "loss": 1.8904, "step": 153500 }, { "epoch": 0.42, "learning_rate": 0.00021806722689075628, "loss": 1.8939, "step": 154000 }, { "epoch": 0.42, "learning_rate": 0.00021775210084033612, "loss": 1.8921, "step": 154500 }, { "epoch": 0.42, "learning_rate": 0.00021743697478991594, "loss": 1.8873, "step": 155000 }, { "epoch": 0.42, "learning_rate": 0.00021712184873949578, "loss": 1.8907, "step": 155500 }, { "epoch": 0.42, "learning_rate": 0.0002168067226890756, "loss": 1.8898, "step": 156000 }, { "epoch": 0.42, "learning_rate": 0.00021649159663865544, "loss": 1.8826, "step": 156500 }, { "epoch": 0.43, "learning_rate": 0.00021617647058823526, "loss": 1.8896, "step": 157000 }, { "epoch": 0.43, "learning_rate": 0.0002158613445378151, "loss": 1.8836, "step": 157500 }, { "epoch": 0.43, "learning_rate": 0.00021554621848739495, "loss": 1.9006, "step": 158000 }, { "epoch": 0.43, "learning_rate": 0.00021523109243697477, "loss": 1.8949, "step": 158500 }, { "epoch": 0.43, "learning_rate": 0.0002149159663865546, "loss": 1.8931, "step": 159000 }, { "epoch": 0.43, "learning_rate": 0.00021460084033613443, "loss": 1.8886, "step": 159500 }, { "epoch": 0.43, "learning_rate": 0.00021428571428571427, "loss": 1.892, "step": 160000 }, { "epoch": 0.43, "learning_rate": 0.0002139705882352941, "loss": 1.8935, "step": 160500 }, { "epoch": 0.44, "learning_rate": 0.00021365546218487394, "loss": 1.8977, "step": 161000 }, { "epoch": 0.44, "learning_rate": 0.00021334033613445378, "loss": 1.8891, "step": 161500 }, { "epoch": 0.44, "learning_rate": 0.0002130252100840336, "loss": 1.8846, "step": 162000 }, { "epoch": 0.44, "learning_rate": 0.00021271008403361344, "loss": 1.8816, "step": 162500 }, { "epoch": 0.44, "learning_rate": 0.00021239495798319326, "loss": 1.8791, "step": 163000 }, { "epoch": 0.44, "learning_rate": 0.0002120798319327731, "loss": 1.9027, "step": 163500 }, { "epoch": 0.44, "learning_rate": 0.00021176470588235295, "loss": 1.8873, "step": 164000 }, { "epoch": 0.45, "learning_rate": 0.00021144957983193277, "loss": 1.8795, "step": 164500 }, { "epoch": 0.45, "learning_rate": 0.0002111344537815126, "loss": 1.8775, "step": 165000 }, { "epoch": 0.45, "learning_rate": 0.0002108193277310924, "loss": 1.8799, "step": 165500 }, { "epoch": 0.45, "learning_rate": 0.00021050420168067225, "loss": 1.8771, "step": 166000 }, { "epoch": 0.45, "learning_rate": 0.00021018907563025206, "loss": 1.8797, "step": 166500 }, { "epoch": 0.45, "learning_rate": 0.0002098739495798319, "loss": 1.8759, "step": 167000 }, { "epoch": 0.45, "learning_rate": 0.00020955882352941173, "loss": 1.8742, "step": 167500 }, { "epoch": 0.45, "learning_rate": 0.00020924369747899157, "loss": 1.8691, "step": 168000 }, { "epoch": 0.46, "learning_rate": 0.0002089285714285714, "loss": 1.8702, "step": 168500 }, { "epoch": 0.46, "learning_rate": 0.00020861344537815123, "loss": 1.8689, "step": 169000 }, { "epoch": 0.46, "learning_rate": 0.00020829831932773108, "loss": 1.8701, "step": 169500 }, { "epoch": 0.46, "learning_rate": 0.0002079831932773109, "loss": 1.8695, "step": 170000 }, { "epoch": 0.46, "learning_rate": 0.00020766806722689074, "loss": 1.8706, "step": 170500 }, { "epoch": 0.46, "learning_rate": 0.00020735294117647056, "loss": 1.8935, "step": 171000 }, { "epoch": 0.46, "learning_rate": 0.0002070378151260504, "loss": 1.9075, "step": 171500 }, { "epoch": 0.47, "learning_rate": 0.00020672268907563025, "loss": 1.8929, "step": 172000 }, { "epoch": 0.47, "learning_rate": 0.00020640756302521006, "loss": 1.8795, "step": 172500 }, { "epoch": 0.47, "learning_rate": 0.0002060924369747899, "loss": 1.8845, "step": 173000 }, { "epoch": 0.47, "learning_rate": 0.00020577731092436973, "loss": 1.8762, "step": 173500 }, { "epoch": 0.47, "learning_rate": 0.00020546218487394957, "loss": 1.8745, "step": 174000 }, { "epoch": 0.47, "learning_rate": 0.0002051470588235294, "loss": 1.8751, "step": 174500 }, { "epoch": 0.47, "learning_rate": 0.00020483193277310923, "loss": 1.8746, "step": 175000 }, { "epoch": 0.48, "learning_rate": 0.00020451680672268908, "loss": 1.8614, "step": 175500 }, { "epoch": 0.48, "learning_rate": 0.0002042016806722689, "loss": 1.8634, "step": 176000 }, { "epoch": 0.48, "learning_rate": 0.00020388655462184874, "loss": 1.8711, "step": 176500 }, { "epoch": 0.48, "learning_rate": 0.00020357142857142856, "loss": 1.8648, "step": 177000 }, { "epoch": 0.48, "learning_rate": 0.0002032563025210084, "loss": 1.8615, "step": 177500 }, { "epoch": 0.48, "learning_rate": 0.00020294117647058822, "loss": 1.8661, "step": 178000 }, { "epoch": 0.48, "learning_rate": 0.00020262605042016806, "loss": 1.8616, "step": 178500 }, { "epoch": 0.48, "learning_rate": 0.0002023109243697479, "loss": 1.8601, "step": 179000 }, { "epoch": 0.49, "learning_rate": 0.00020199579831932772, "loss": 1.865, "step": 179500 }, { "epoch": 0.49, "learning_rate": 0.00020168067226890757, "loss": 1.8664, "step": 180000 }, { "epoch": 0.49, "learning_rate": 0.00020136554621848736, "loss": 1.8562, "step": 180500 }, { "epoch": 0.49, "learning_rate": 0.0002010504201680672, "loss": 1.8589, "step": 181000 }, { "epoch": 0.49, "learning_rate": 0.00020073529411764702, "loss": 1.8583, "step": 181500 }, { "epoch": 0.49, "learning_rate": 0.00020042016806722687, "loss": 1.8623, "step": 182000 }, { "epoch": 0.49, "learning_rate": 0.00020010504201680668, "loss": 1.8561, "step": 182500 }, { "epoch": 0.5, "learning_rate": 0.00019978991596638653, "loss": 1.8537, "step": 183000 }, { "epoch": 0.5, "learning_rate": 0.00019947478991596637, "loss": 1.8622, "step": 183500 }, { "epoch": 0.5, "learning_rate": 0.0001991596638655462, "loss": 1.8568, "step": 184000 }, { "epoch": 0.5, "learning_rate": 0.00019884453781512603, "loss": 1.8543, "step": 184500 }, { "epoch": 0.5, "learning_rate": 0.00019852941176470585, "loss": 1.8528, "step": 185000 }, { "epoch": 0.5, "learning_rate": 0.0001982142857142857, "loss": 1.8509, "step": 185500 }, { "epoch": 0.5, "learning_rate": 0.0001978991596638655, "loss": 1.8489, "step": 186000 }, { "epoch": 0.5, "learning_rate": 0.00019758403361344536, "loss": 1.8483, "step": 186500 }, { "epoch": 0.51, "learning_rate": 0.0001972689075630252, "loss": 1.8487, "step": 187000 }, { "epoch": 0.51, "learning_rate": 0.00019695378151260502, "loss": 1.8416, "step": 187500 }, { "epoch": 0.51, "learning_rate": 0.00019663865546218486, "loss": 1.8497, "step": 188000 }, { "epoch": 0.51, "learning_rate": 0.00019632352941176468, "loss": 1.849, "step": 188500 }, { "epoch": 0.51, "learning_rate": 0.00019600840336134453, "loss": 1.8466, "step": 189000 }, { "epoch": 0.51, "learning_rate": 0.00019569327731092434, "loss": 1.8456, "step": 189500 }, { "epoch": 0.51, "learning_rate": 0.0001953781512605042, "loss": 1.8453, "step": 190000 }, { "epoch": 0.52, "learning_rate": 0.00019506302521008403, "loss": 1.8418, "step": 190500 }, { "epoch": 0.52, "learning_rate": 0.00019474789915966385, "loss": 1.8429, "step": 191000 }, { "epoch": 0.52, "learning_rate": 0.0001944327731092437, "loss": 1.8453, "step": 191500 }, { "epoch": 0.52, "learning_rate": 0.0001941176470588235, "loss": 1.8458, "step": 192000 }, { "epoch": 0.52, "learning_rate": 0.00019380252100840336, "loss": 1.842, "step": 192500 }, { "epoch": 0.52, "learning_rate": 0.0001934873949579832, "loss": 1.8381, "step": 193000 }, { "epoch": 0.52, "learning_rate": 0.00019317226890756302, "loss": 1.8374, "step": 193500 }, { "epoch": 0.53, "learning_rate": 0.00019285714285714286, "loss": 1.8359, "step": 194000 }, { "epoch": 0.53, "learning_rate": 0.00019254201680672268, "loss": 1.8336, "step": 194500 }, { "epoch": 0.53, "learning_rate": 0.00019222689075630253, "loss": 1.8365, "step": 195000 }, { "epoch": 0.53, "learning_rate": 0.00019191176470588232, "loss": 1.8391, "step": 195500 }, { "epoch": 0.53, "learning_rate": 0.00019159663865546216, "loss": 1.8431, "step": 196000 }, { "epoch": 0.53, "learning_rate": 0.00019128151260504198, "loss": 1.8361, "step": 196500 }, { "epoch": 0.53, "learning_rate": 0.00019096638655462182, "loss": 1.8583, "step": 197000 }, { "epoch": 0.53, "learning_rate": 0.00019065126050420164, "loss": 1.8656, "step": 197500 }, { "epoch": 0.54, "learning_rate": 0.00019033613445378148, "loss": 1.8508, "step": 198000 }, { "epoch": 0.54, "learning_rate": 0.00019002100840336133, "loss": 1.8533, "step": 198500 }, { "epoch": 0.54, "learning_rate": 0.00018970588235294115, "loss": 1.8416, "step": 199000 }, { "epoch": 0.54, "learning_rate": 0.000189390756302521, "loss": 1.8503, "step": 199500 }, { "epoch": 0.54, "learning_rate": 0.0001890756302521008, "loss": 1.844, "step": 200000 }, { "epoch": 0.54, "learning_rate": 0.00018876050420168065, "loss": 1.8389, "step": 200500 }, { "epoch": 0.54, "learning_rate": 0.0001884453781512605, "loss": 1.836, "step": 201000 }, { "epoch": 0.55, "learning_rate": 0.00018813025210084031, "loss": 1.8354, "step": 201500 }, { "epoch": 0.55, "learning_rate": 0.00018781512605042016, "loss": 1.833, "step": 202000 }, { "epoch": 0.55, "learning_rate": 0.00018749999999999998, "loss": 1.8308, "step": 202500 }, { "epoch": 0.55, "learning_rate": 0.00018718487394957982, "loss": 1.8314, "step": 203000 }, { "epoch": 0.55, "learning_rate": 0.00018686974789915964, "loss": 1.83, "step": 203500 }, { "epoch": 0.55, "learning_rate": 0.00018655462184873948, "loss": 1.8318, "step": 204000 }, { "epoch": 0.55, "learning_rate": 0.00018623949579831933, "loss": 1.8313, "step": 204500 }, { "epoch": 0.55, "learning_rate": 0.00018592436974789915, "loss": 1.8234, "step": 205000 }, { "epoch": 0.56, "learning_rate": 0.000185609243697479, "loss": 1.8286, "step": 205500 }, { "epoch": 0.56, "learning_rate": 0.0001852941176470588, "loss": 1.8246, "step": 206000 }, { "epoch": 0.56, "learning_rate": 0.00018497899159663865, "loss": 1.8213, "step": 206500 }, { "epoch": 0.56, "learning_rate": 0.00018466386554621847, "loss": 1.8237, "step": 207000 }, { "epoch": 0.56, "learning_rate": 0.00018434873949579831, "loss": 1.8247, "step": 207500 }, { "epoch": 0.56, "learning_rate": 0.00018403361344537816, "loss": 1.8215, "step": 208000 }, { "epoch": 0.56, "learning_rate": 0.00018371848739495798, "loss": 1.8174, "step": 208500 }, { "epoch": 0.57, "learning_rate": 0.00018340336134453782, "loss": 1.8215, "step": 209000 }, { "epoch": 0.57, "learning_rate": 0.00018308823529411764, "loss": 1.8168, "step": 209500 }, { "epoch": 0.57, "learning_rate": 0.00018277310924369746, "loss": 1.8186, "step": 210000 }, { "epoch": 0.57, "learning_rate": 0.00018245798319327727, "loss": 1.8229, "step": 210500 }, { "epoch": 0.57, "learning_rate": 0.00018214285714285712, "loss": 1.8165, "step": 211000 }, { "epoch": 0.57, "learning_rate": 0.00018182773109243693, "loss": 1.8173, "step": 211500 }, { "epoch": 0.57, "learning_rate": 0.00018151260504201678, "loss": 1.8115, "step": 212000 }, { "epoch": 0.58, "learning_rate": 0.00018119747899159662, "loss": 1.8157, "step": 212500 }, { "epoch": 0.58, "learning_rate": 0.00018088235294117644, "loss": 1.813, "step": 213000 }, { "epoch": 0.58, "learning_rate": 0.00018056722689075629, "loss": 1.8126, "step": 213500 }, { "epoch": 0.58, "learning_rate": 0.0001802521008403361, "loss": 1.808, "step": 214000 }, { "epoch": 0.58, "learning_rate": 0.00017993697478991595, "loss": 1.8116, "step": 214500 }, { "epoch": 0.58, "learning_rate": 0.00017962184873949577, "loss": 1.8165, "step": 215000 }, { "epoch": 0.58, "learning_rate": 0.0001793067226890756, "loss": 1.807, "step": 215500 }, { "epoch": 0.58, "learning_rate": 0.00017899159663865545, "loss": 1.8051, "step": 216000 }, { "epoch": 0.59, "learning_rate": 0.00017867647058823527, "loss": 1.8081, "step": 216500 }, { "epoch": 0.59, "learning_rate": 0.00017836134453781512, "loss": 1.8084, "step": 217000 }, { "epoch": 0.59, "learning_rate": 0.00017804621848739493, "loss": 1.811, "step": 217500 }, { "epoch": 0.59, "learning_rate": 0.00017773109243697478, "loss": 1.8068, "step": 218000 }, { "epoch": 0.59, "learning_rate": 0.0001774159663865546, "loss": 1.805, "step": 218500 }, { "epoch": 0.59, "learning_rate": 0.00017710084033613444, "loss": 1.8042, "step": 219000 }, { "epoch": 0.59, "learning_rate": 0.00017678571428571428, "loss": 1.8004, "step": 219500 }, { "epoch": 0.6, "learning_rate": 0.0001764705882352941, "loss": 1.8028, "step": 220000 }, { "epoch": 0.6, "learning_rate": 0.00017615546218487395, "loss": 1.8111, "step": 220500 }, { "epoch": 0.6, "learning_rate": 0.00017584033613445376, "loss": 1.8076, "step": 221000 }, { "epoch": 0.6, "learning_rate": 0.0001755252100840336, "loss": 1.8017, "step": 221500 }, { "epoch": 0.6, "learning_rate": 0.00017521008403361345, "loss": 1.802, "step": 222000 }, { "epoch": 0.6, "learning_rate": 0.00017489495798319327, "loss": 1.803, "step": 222500 }, { "epoch": 0.6, "learning_rate": 0.00017457983193277312, "loss": 1.8023, "step": 223000 }, { "epoch": 0.61, "learning_rate": 0.00017426470588235293, "loss": 1.7978, "step": 223500 }, { "epoch": 0.61, "learning_rate": 0.00017394957983193278, "loss": 1.7959, "step": 224000 }, { "epoch": 0.61, "learning_rate": 0.0001736344537815126, "loss": 1.7971, "step": 224500 }, { "epoch": 0.61, "learning_rate": 0.0001733193277310924, "loss": 1.801, "step": 225000 }, { "epoch": 0.61, "learning_rate": 0.00017300420168067223, "loss": 1.7987, "step": 225500 }, { "epoch": 0.61, "learning_rate": 0.00017268907563025207, "loss": 1.7975, "step": 226000 }, { "epoch": 0.61, "learning_rate": 0.0001723739495798319, "loss": 1.7997, "step": 226500 }, { "epoch": 0.61, "learning_rate": 0.00017205882352941174, "loss": 1.7993, "step": 227000 }, { "epoch": 0.62, "learning_rate": 0.00017174369747899158, "loss": 1.8021, "step": 227500 }, { "epoch": 0.62, "learning_rate": 0.0001714285714285714, "loss": 1.795, "step": 228000 }, { "epoch": 0.62, "learning_rate": 0.00017111344537815124, "loss": 1.7957, "step": 228500 }, { "epoch": 0.62, "learning_rate": 0.00017079831932773106, "loss": 1.7967, "step": 229000 }, { "epoch": 0.62, "learning_rate": 0.0001704831932773109, "loss": 1.7951, "step": 229500 }, { "epoch": 0.62, "learning_rate": 0.00017016806722689075, "loss": 1.7953, "step": 230000 }, { "epoch": 0.62, "learning_rate": 0.00016985294117647057, "loss": 1.7938, "step": 230500 }, { "epoch": 0.63, "learning_rate": 0.0001695378151260504, "loss": 1.7925, "step": 231000 }, { "epoch": 0.63, "learning_rate": 0.00016922268907563023, "loss": 1.7944, "step": 231500 }, { "epoch": 0.63, "learning_rate": 0.00016890756302521007, "loss": 1.7897, "step": 232000 }, { "epoch": 0.63, "learning_rate": 0.0001685924369747899, "loss": 1.7929, "step": 232500 }, { "epoch": 0.63, "learning_rate": 0.00016827731092436974, "loss": 1.7916, "step": 233000 }, { "epoch": 0.63, "learning_rate": 0.00016796218487394958, "loss": 1.7885, "step": 233500 }, { "epoch": 0.63, "learning_rate": 0.0001676470588235294, "loss": 1.7971, "step": 234000 }, { "epoch": 0.63, "learning_rate": 0.00016733193277310924, "loss": 1.7871, "step": 234500 }, { "epoch": 0.64, "learning_rate": 0.00016701680672268906, "loss": 1.7862, "step": 235000 }, { "epoch": 0.64, "learning_rate": 0.0001667016806722689, "loss": 1.7858, "step": 235500 }, { "epoch": 0.64, "learning_rate": 0.00016638655462184872, "loss": 1.7859, "step": 236000 }, { "epoch": 0.64, "learning_rate": 0.00016607142857142857, "loss": 1.7864, "step": 236500 }, { "epoch": 0.64, "learning_rate": 0.0001657563025210084, "loss": 1.7869, "step": 237000 }, { "epoch": 0.64, "learning_rate": 0.00016544117647058823, "loss": 1.7816, "step": 237500 }, { "epoch": 0.64, "learning_rate": 0.00016512605042016807, "loss": 1.7865, "step": 238000 }, { "epoch": 0.65, "learning_rate": 0.0001648109243697479, "loss": 1.7867, "step": 238500 }, { "epoch": 0.65, "learning_rate": 0.00016449579831932773, "loss": 1.7868, "step": 239000 }, { "epoch": 0.65, "learning_rate": 0.00016418067226890755, "loss": 1.7795, "step": 239500 }, { "epoch": 0.65, "learning_rate": 0.00016386554621848737, "loss": 1.7833, "step": 240000 }, { "epoch": 0.65, "learning_rate": 0.00016355042016806719, "loss": 1.7885, "step": 240500 }, { "epoch": 0.65, "learning_rate": 0.00016323529411764703, "loss": 1.7886, "step": 241000 }, { "epoch": 0.65, "learning_rate": 0.00016292016806722688, "loss": 1.7865, "step": 241500 }, { "epoch": 0.66, "learning_rate": 0.0001626050420168067, "loss": 1.787, "step": 242000 }, { "epoch": 0.66, "learning_rate": 0.00016228991596638654, "loss": 1.7791, "step": 242500 }, { "epoch": 0.66, "learning_rate": 0.00016197478991596635, "loss": 1.7786, "step": 243000 }, { "epoch": 0.66, "learning_rate": 0.0001616596638655462, "loss": 1.7819, "step": 243500 }, { "epoch": 0.66, "learning_rate": 0.00016134453781512602, "loss": 1.7766, "step": 244000 }, { "epoch": 0.66, "learning_rate": 0.00016102941176470586, "loss": 1.7766, "step": 244500 }, { "epoch": 0.66, "learning_rate": 0.0001607142857142857, "loss": 1.7809, "step": 245000 }, { "epoch": 0.66, "learning_rate": 0.00016039915966386552, "loss": 1.7708, "step": 245500 }, { "epoch": 0.67, "learning_rate": 0.00016008403361344537, "loss": 1.7713, "step": 246000 }, { "epoch": 0.67, "learning_rate": 0.00015976890756302519, "loss": 1.7773, "step": 246500 }, { "epoch": 0.67, "learning_rate": 0.00015945378151260503, "loss": 1.7746, "step": 247000 }, { "epoch": 0.67, "learning_rate": 0.00015913865546218485, "loss": 1.7722, "step": 247500 }, { "epoch": 0.67, "learning_rate": 0.0001588235294117647, "loss": 1.7707, "step": 248000 }, { "epoch": 0.67, "learning_rate": 0.00015850840336134454, "loss": 1.7785, "step": 248500 }, { "epoch": 0.67, "learning_rate": 0.00015819327731092435, "loss": 1.7745, "step": 249000 }, { "epoch": 0.68, "learning_rate": 0.0001578781512605042, "loss": 1.7746, "step": 249500 }, { "epoch": 0.68, "learning_rate": 0.00015756302521008402, "loss": 1.7706, "step": 250000 }, { "epoch": 0.68, "learning_rate": 0.00015724789915966386, "loss": 1.7689, "step": 250500 }, { "epoch": 0.68, "learning_rate": 0.0001569327731092437, "loss": 1.7702, "step": 251000 }, { "epoch": 0.68, "learning_rate": 0.00015661764705882352, "loss": 1.7716, "step": 251500 }, { "epoch": 0.68, "learning_rate": 0.00015630252100840337, "loss": 1.7709, "step": 252000 }, { "epoch": 0.68, "learning_rate": 0.00015598739495798318, "loss": 1.7689, "step": 252500 }, { "epoch": 0.68, "learning_rate": 0.00015567226890756303, "loss": 1.7709, "step": 253000 }, { "epoch": 0.69, "learning_rate": 0.00015535714285714285, "loss": 1.7666, "step": 253500 }, { "epoch": 0.69, "learning_rate": 0.0001550420168067227, "loss": 1.768, "step": 254000 }, { "epoch": 0.69, "learning_rate": 0.00015472689075630254, "loss": 1.7646, "step": 254500 }, { "epoch": 0.69, "learning_rate": 0.00015441176470588233, "loss": 1.7624, "step": 255000 }, { "epoch": 0.69, "learning_rate": 0.00015409663865546214, "loss": 1.7639, "step": 255500 }, { "epoch": 0.69, "learning_rate": 0.000153781512605042, "loss": 1.7684, "step": 256000 }, { "epoch": 0.69, "learning_rate": 0.00015346638655462183, "loss": 1.7621, "step": 256500 }, { "epoch": 0.7, "learning_rate": 0.00015315126050420165, "loss": 1.7632, "step": 257000 }, { "epoch": 0.7, "learning_rate": 0.0001528361344537815, "loss": 1.7617, "step": 257500 }, { "epoch": 0.7, "learning_rate": 0.0001525210084033613, "loss": 1.7616, "step": 258000 }, { "epoch": 0.7, "learning_rate": 0.00015220588235294116, "loss": 1.7607, "step": 258500 }, { "epoch": 0.7, "learning_rate": 0.000151890756302521, "loss": 1.7652, "step": 259000 }, { "epoch": 0.7, "learning_rate": 0.00015157563025210082, "loss": 1.7628, "step": 259500 }, { "epoch": 0.7, "learning_rate": 0.00015126050420168066, "loss": 1.7605, "step": 260000 }, { "epoch": 0.71, "learning_rate": 0.00015094537815126048, "loss": 1.7624, "step": 260500 }, { "epoch": 0.71, "learning_rate": 0.00015063025210084032, "loss": 1.7642, "step": 261000 }, { "epoch": 0.71, "learning_rate": 0.00015031512605042014, "loss": 1.7577, "step": 261500 }, { "epoch": 0.71, "learning_rate": 0.00015, "loss": 1.7586, "step": 262000 }, { "epoch": 0.71, "learning_rate": 0.00014968487394957983, "loss": 1.7582, "step": 262500 }, { "epoch": 0.71, "learning_rate": 0.00014936974789915965, "loss": 1.7578, "step": 263000 }, { "epoch": 0.71, "learning_rate": 0.0001490546218487395, "loss": 1.7638, "step": 263500 }, { "epoch": 0.71, "learning_rate": 0.0001487394957983193, "loss": 1.7529, "step": 264000 }, { "epoch": 0.72, "learning_rate": 0.00014842436974789916, "loss": 1.7571, "step": 264500 }, { "epoch": 0.72, "learning_rate": 0.00014810924369747897, "loss": 1.7603, "step": 265000 }, { "epoch": 0.72, "learning_rate": 0.00014779411764705882, "loss": 1.7539, "step": 265500 }, { "epoch": 0.72, "learning_rate": 0.00014747899159663863, "loss": 1.7537, "step": 266000 }, { "epoch": 0.72, "learning_rate": 0.00014716386554621848, "loss": 1.7532, "step": 266500 }, { "epoch": 0.72, "learning_rate": 0.0001468487394957983, "loss": 1.7529, "step": 267000 }, { "epoch": 0.72, "learning_rate": 0.00014653361344537814, "loss": 1.7504, "step": 267500 }, { "epoch": 0.73, "learning_rate": 0.00014621848739495796, "loss": 1.7513, "step": 268000 }, { "epoch": 0.73, "learning_rate": 0.0001459033613445378, "loss": 1.7492, "step": 268500 }, { "epoch": 0.73, "learning_rate": 0.00014558823529411762, "loss": 1.7476, "step": 269000 }, { "epoch": 0.73, "learning_rate": 0.00014527310924369747, "loss": 1.7557, "step": 269500 }, { "epoch": 0.73, "learning_rate": 0.0001449579831932773, "loss": 1.7506, "step": 270000 }, { "epoch": 0.73, "learning_rate": 0.00014464285714285713, "loss": 1.7497, "step": 270500 }, { "epoch": 0.73, "learning_rate": 0.00014432773109243697, "loss": 1.7499, "step": 271000 }, { "epoch": 0.73, "learning_rate": 0.0001440126050420168, "loss": 1.7459, "step": 271500 }, { "epoch": 0.74, "learning_rate": 0.00014369747899159663, "loss": 1.7445, "step": 272000 }, { "epoch": 0.74, "learning_rate": 0.00014338235294117648, "loss": 1.7445, "step": 272500 }, { "epoch": 0.74, "learning_rate": 0.0001430672268907563, "loss": 1.739, "step": 273000 }, { "epoch": 0.74, "learning_rate": 0.0001427521008403361, "loss": 1.7409, "step": 273500 }, { "epoch": 0.74, "learning_rate": 0.00014243697478991596, "loss": 1.7429, "step": 274000 }, { "epoch": 0.74, "learning_rate": 0.00014212184873949578, "loss": 1.741, "step": 274500 }, { "epoch": 0.74, "learning_rate": 0.00014180672268907562, "loss": 1.7431, "step": 275000 }, { "epoch": 0.75, "learning_rate": 0.00014149159663865544, "loss": 1.745, "step": 275500 }, { "epoch": 0.75, "learning_rate": 0.00014117647058823528, "loss": 1.7412, "step": 276000 }, { "epoch": 0.75, "learning_rate": 0.00014086134453781513, "loss": 1.7372, "step": 276500 }, { "epoch": 0.75, "learning_rate": 0.00014054621848739494, "loss": 1.7369, "step": 277000 }, { "epoch": 0.75, "learning_rate": 0.0001402310924369748, "loss": 1.7373, "step": 277500 }, { "epoch": 0.75, "learning_rate": 0.0001399159663865546, "loss": 1.7404, "step": 278000 }, { "epoch": 0.75, "learning_rate": 0.00013960084033613445, "loss": 1.7424, "step": 278500 }, { "epoch": 0.76, "learning_rate": 0.00013928571428571427, "loss": 1.7359, "step": 279000 }, { "epoch": 0.76, "learning_rate": 0.0001389705882352941, "loss": 1.7376, "step": 279500 }, { "epoch": 0.76, "learning_rate": 0.00013865546218487396, "loss": 1.7362, "step": 280000 }, { "epoch": 0.76, "learning_rate": 0.00013834033613445377, "loss": 1.7405, "step": 280500 }, { "epoch": 0.76, "learning_rate": 0.0001380252100840336, "loss": 1.7375, "step": 281000 }, { "epoch": 0.76, "learning_rate": 0.00013771008403361344, "loss": 1.7355, "step": 281500 }, { "epoch": 0.76, "learning_rate": 0.00013739495798319325, "loss": 1.733, "step": 282000 }, { "epoch": 0.76, "learning_rate": 0.0001370798319327731, "loss": 1.7353, "step": 282500 }, { "epoch": 0.77, "learning_rate": 0.00013676470588235292, "loss": 1.7307, "step": 283000 }, { "epoch": 0.77, "learning_rate": 0.00013644957983193276, "loss": 1.7362, "step": 283500 }, { "epoch": 0.77, "learning_rate": 0.0001361344537815126, "loss": 1.7338, "step": 284000 }, { "epoch": 0.77, "learning_rate": 0.00013581932773109242, "loss": 1.732, "step": 284500 }, { "epoch": 0.77, "learning_rate": 0.00013550420168067227, "loss": 1.7284, "step": 285000 }, { "epoch": 0.77, "learning_rate": 0.00013518907563025208, "loss": 1.7307, "step": 285500 }, { "epoch": 0.77, "learning_rate": 0.00013487394957983193, "loss": 1.7303, "step": 286000 }, { "epoch": 0.78, "learning_rate": 0.00013455882352941175, "loss": 1.7277, "step": 286500 }, { "epoch": 0.78, "learning_rate": 0.0001342436974789916, "loss": 1.7332, "step": 287000 }, { "epoch": 0.78, "learning_rate": 0.00013392857142857144, "loss": 1.7255, "step": 287500 }, { "epoch": 0.78, "learning_rate": 0.00013361344537815125, "loss": 1.7242, "step": 288000 }, { "epoch": 0.78, "learning_rate": 0.00013329831932773107, "loss": 1.7248, "step": 288500 }, { "epoch": 0.78, "learning_rate": 0.00013298319327731091, "loss": 1.731, "step": 289000 }, { "epoch": 0.78, "learning_rate": 0.00013266806722689073, "loss": 1.7324, "step": 289500 }, { "epoch": 0.79, "learning_rate": 0.00013235294117647058, "loss": 1.7314, "step": 290000 }, { "epoch": 0.79, "learning_rate": 0.0001320378151260504, "loss": 1.7325, "step": 290500 }, { "epoch": 0.79, "learning_rate": 0.00013172268907563024, "loss": 1.7259, "step": 291000 }, { "epoch": 0.79, "learning_rate": 0.00013140756302521008, "loss": 1.7271, "step": 291500 }, { "epoch": 0.79, "learning_rate": 0.0001310924369747899, "loss": 1.7257, "step": 292000 }, { "epoch": 0.79, "learning_rate": 0.00013077731092436975, "loss": 1.7267, "step": 292500 }, { "epoch": 0.79, "learning_rate": 0.00013046218487394956, "loss": 1.7262, "step": 293000 }, { "epoch": 0.79, "learning_rate": 0.0001301470588235294, "loss": 1.7245, "step": 293500 }, { "epoch": 0.8, "learning_rate": 0.00012983193277310922, "loss": 1.7252, "step": 294000 }, { "epoch": 0.8, "learning_rate": 0.00012951680672268907, "loss": 1.7217, "step": 294500 }, { "epoch": 0.8, "learning_rate": 0.00012920168067226891, "loss": 1.7193, "step": 295000 }, { "epoch": 0.8, "learning_rate": 0.00012888655462184873, "loss": 1.7184, "step": 295500 }, { "epoch": 0.8, "learning_rate": 0.00012857142857142855, "loss": 1.7205, "step": 296000 }, { "epoch": 0.8, "learning_rate": 0.0001282563025210084, "loss": 1.7195, "step": 296500 }, { "epoch": 0.8, "learning_rate": 0.0001279411764705882, "loss": 1.7195, "step": 297000 }, { "epoch": 0.81, "learning_rate": 0.00012762605042016806, "loss": 1.7126, "step": 297500 }, { "epoch": 0.81, "learning_rate": 0.00012731092436974787, "loss": 1.7158, "step": 298000 }, { "epoch": 0.81, "learning_rate": 0.00012699579831932772, "loss": 1.7137, "step": 298500 }, { "epoch": 0.81, "learning_rate": 0.00012668067226890756, "loss": 1.7154, "step": 299000 }, { "epoch": 0.81, "learning_rate": 0.00012636554621848738, "loss": 1.7192, "step": 299500 }, { "epoch": 0.81, "learning_rate": 0.00012605042016806722, "loss": 1.7155, "step": 300000 }, { "epoch": 0.81, "learning_rate": 0.00012573529411764704, "loss": 1.7106, "step": 300500 }, { "epoch": 0.81, "learning_rate": 0.00012542016806722689, "loss": 1.7127, "step": 301000 }, { "epoch": 0.82, "learning_rate": 0.00012510504201680673, "loss": 1.7103, "step": 301500 }, { "epoch": 0.82, "learning_rate": 0.00012478991596638655, "loss": 1.7125, "step": 302000 }, { "epoch": 0.82, "learning_rate": 0.0001244747899159664, "loss": 1.7141, "step": 302500 }, { "epoch": 0.82, "learning_rate": 0.0001241596638655462, "loss": 1.7122, "step": 303000 }, { "epoch": 0.82, "learning_rate": 0.00012384453781512603, "loss": 1.7121, "step": 303500 }, { "epoch": 0.82, "learning_rate": 0.00012352941176470587, "loss": 1.7106, "step": 304000 }, { "epoch": 0.82, "learning_rate": 0.0001232142857142857, "loss": 1.708, "step": 304500 }, { "epoch": 0.83, "learning_rate": 0.00012289915966386553, "loss": 1.7086, "step": 305000 }, { "epoch": 0.83, "learning_rate": 0.00012258403361344538, "loss": 1.7096, "step": 305500 }, { "epoch": 0.83, "learning_rate": 0.0001222689075630252, "loss": 1.7094, "step": 306000 }, { "epoch": 0.83, "learning_rate": 0.00012195378151260503, "loss": 1.7103, "step": 306500 }, { "epoch": 0.83, "learning_rate": 0.00012163865546218486, "loss": 1.7038, "step": 307000 }, { "epoch": 0.83, "learning_rate": 0.0001213235294117647, "loss": 1.707, "step": 307500 }, { "epoch": 0.83, "learning_rate": 0.00012100840336134453, "loss": 1.7094, "step": 308000 }, { "epoch": 0.84, "learning_rate": 0.00012069327731092436, "loss": 1.7078, "step": 308500 }, { "epoch": 0.84, "learning_rate": 0.0001203781512605042, "loss": 1.7043, "step": 309000 }, { "epoch": 0.84, "learning_rate": 0.00012006302521008403, "loss": 1.7079, "step": 309500 }, { "epoch": 0.84, "learning_rate": 0.00011974789915966386, "loss": 1.7025, "step": 310000 }, { "epoch": 0.84, "learning_rate": 0.00011943277310924367, "loss": 1.7065, "step": 310500 }, { "epoch": 0.84, "learning_rate": 0.0001191176470588235, "loss": 1.7047, "step": 311000 }, { "epoch": 0.84, "learning_rate": 0.00011880252100840335, "loss": 1.6999, "step": 311500 }, { "epoch": 0.84, "learning_rate": 0.00011848739495798318, "loss": 1.7018, "step": 312000 }, { "epoch": 0.85, "learning_rate": 0.00011817226890756301, "loss": 1.7037, "step": 312500 }, { "epoch": 0.85, "learning_rate": 0.00011785714285714284, "loss": 1.6972, "step": 313000 }, { "epoch": 0.85, "learning_rate": 0.00011754201680672267, "loss": 1.6998, "step": 313500 }, { "epoch": 0.85, "learning_rate": 0.0001172268907563025, "loss": 1.7012, "step": 314000 }, { "epoch": 0.85, "learning_rate": 0.00011691176470588235, "loss": 1.6978, "step": 314500 }, { "epoch": 0.85, "learning_rate": 0.00011659663865546218, "loss": 1.7013, "step": 315000 }, { "epoch": 0.85, "learning_rate": 0.00011628151260504201, "loss": 1.6982, "step": 315500 }, { "epoch": 0.86, "learning_rate": 0.00011596638655462184, "loss": 1.699, "step": 316000 }, { "epoch": 0.86, "learning_rate": 0.00011565126050420167, "loss": 1.6956, "step": 316500 }, { "epoch": 0.86, "learning_rate": 0.0001153361344537815, "loss": 1.7124, "step": 317000 }, { "epoch": 0.86, "learning_rate": 0.00011502100840336134, "loss": 1.7101, "step": 317500 }, { "epoch": 0.86, "learning_rate": 0.00011470588235294115, "loss": 1.7024, "step": 318000 }, { "epoch": 0.86, "learning_rate": 0.000114390756302521, "loss": 1.7011, "step": 318500 }, { "epoch": 0.86, "learning_rate": 0.00011407563025210083, "loss": 1.7009, "step": 319000 }, { "epoch": 0.86, "learning_rate": 0.00011376050420168066, "loss": 1.6989, "step": 319500 }, { "epoch": 0.87, "learning_rate": 0.00011344537815126049, "loss": 1.7002, "step": 320000 }, { "epoch": 0.87, "learning_rate": 0.00011313025210084032, "loss": 1.6989, "step": 320500 }, { "epoch": 0.87, "learning_rate": 0.00011281512605042015, "loss": 1.6977, "step": 321000 }, { "epoch": 0.87, "learning_rate": 0.0001125, "loss": 1.6991, "step": 321500 }, { "epoch": 0.87, "learning_rate": 0.00011218487394957983, "loss": 1.6965, "step": 322000 }, { "epoch": 0.87, "learning_rate": 0.00011186974789915966, "loss": 1.6934, "step": 322500 }, { "epoch": 0.87, "learning_rate": 0.00011155462184873949, "loss": 1.6943, "step": 323000 }, { "epoch": 0.88, "learning_rate": 0.00011123949579831932, "loss": 1.6894, "step": 323500 }, { "epoch": 0.88, "learning_rate": 0.00011092436974789915, "loss": 1.6906, "step": 324000 }, { "epoch": 0.88, "learning_rate": 0.00011060924369747898, "loss": 1.7128, "step": 324500 }, { "epoch": 0.88, "learning_rate": 0.00011029411764705883, "loss": 1.6963, "step": 325000 }, { "epoch": 0.88, "learning_rate": 0.00010997899159663864, "loss": 1.7206, "step": 325500 }, { "epoch": 0.88, "learning_rate": 0.00010966386554621848, "loss": 1.7105, "step": 326000 }, { "epoch": 0.88, "learning_rate": 0.00010934873949579831, "loss": 1.6997, "step": 326500 }, { "epoch": 0.89, "learning_rate": 0.00010903361344537814, "loss": 1.7016, "step": 327000 }, { "epoch": 0.89, "learning_rate": 0.00010871848739495797, "loss": 1.7026, "step": 327500 }, { "epoch": 0.89, "learning_rate": 0.0001084033613445378, "loss": 1.6942, "step": 328000 }, { "epoch": 0.89, "learning_rate": 0.00010808823529411763, "loss": 1.6964, "step": 328500 }, { "epoch": 0.89, "learning_rate": 0.00010777310924369748, "loss": 1.696, "step": 329000 }, { "epoch": 0.89, "learning_rate": 0.0001074579831932773, "loss": 1.694, "step": 329500 }, { "epoch": 0.89, "learning_rate": 0.00010714285714285714, "loss": 1.6878, "step": 330000 }, { "epoch": 0.89, "learning_rate": 0.00010682773109243697, "loss": 1.6921, "step": 330500 }, { "epoch": 0.9, "learning_rate": 0.0001065126050420168, "loss": 1.6871, "step": 331000 }, { "epoch": 0.9, "learning_rate": 0.00010619747899159663, "loss": 1.6846, "step": 331500 }, { "epoch": 0.9, "learning_rate": 0.00010588235294117647, "loss": 1.6895, "step": 332000 }, { "epoch": 0.9, "learning_rate": 0.0001055672268907563, "loss": 1.6855, "step": 332500 }, { "epoch": 0.9, "learning_rate": 0.00010525210084033612, "loss": 1.6807, "step": 333000 }, { "epoch": 0.9, "learning_rate": 0.00010493697478991595, "loss": 1.6864, "step": 333500 }, { "epoch": 0.9, "learning_rate": 0.00010462184873949579, "loss": 1.681, "step": 334000 }, { "epoch": 0.91, "learning_rate": 0.00010430672268907562, "loss": 1.6913, "step": 334500 }, { "epoch": 0.91, "learning_rate": 0.00010399159663865545, "loss": 1.6789, "step": 335000 }, { "epoch": 0.91, "learning_rate": 0.00010367647058823528, "loss": 1.6939, "step": 335500 }, { "epoch": 0.91, "learning_rate": 0.00010336134453781512, "loss": 1.7741, "step": 336000 }, { "epoch": 0.91, "learning_rate": 0.00010304621848739495, "loss": 1.7619, "step": 336500 }, { "epoch": 0.91, "learning_rate": 0.00010273109243697478, "loss": 1.7091, "step": 337000 }, { "epoch": 0.91, "learning_rate": 0.00010241596638655462, "loss": 1.7116, "step": 337500 }, { "epoch": 0.91, "learning_rate": 0.00010210084033613445, "loss": 1.7243, "step": 338000 }, { "epoch": 0.92, "learning_rate": 0.00010178571428571428, "loss": 1.7255, "step": 338500 }, { "epoch": 0.92, "learning_rate": 0.00010147058823529411, "loss": 1.715, "step": 339000 }, { "epoch": 0.92, "learning_rate": 0.00010115546218487395, "loss": 1.6924, "step": 339500 }, { "epoch": 0.92, "learning_rate": 0.00010084033613445378, "loss": 1.6824, "step": 340000 }, { "epoch": 0.92, "learning_rate": 0.0001005252100840336, "loss": 1.6885, "step": 340500 }, { "epoch": 0.92, "learning_rate": 0.00010021008403361343, "loss": 1.6848, "step": 341000 }, { "epoch": 0.92, "learning_rate": 9.989495798319326e-05, "loss": 1.6831, "step": 341500 }, { "epoch": 0.93, "learning_rate": 9.95798319327731e-05, "loss": 1.6836, "step": 342000 }, { "epoch": 0.93, "learning_rate": 9.926470588235293e-05, "loss": 1.6765, "step": 342500 }, { "epoch": 0.93, "learning_rate": 9.894957983193276e-05, "loss": 1.6775, "step": 343000 }, { "epoch": 0.93, "learning_rate": 9.86344537815126e-05, "loss": 1.6854, "step": 343500 }, { "epoch": 0.93, "learning_rate": 9.831932773109243e-05, "loss": 1.6851, "step": 344000 }, { "epoch": 0.93, "learning_rate": 9.800420168067226e-05, "loss": 1.6761, "step": 344500 }, { "epoch": 0.93, "learning_rate": 9.76890756302521e-05, "loss": 1.6693, "step": 345000 }, { "epoch": 0.94, "learning_rate": 9.737394957983193e-05, "loss": 1.6734, "step": 345500 }, { "epoch": 0.94, "learning_rate": 9.705882352941176e-05, "loss": 1.6774, "step": 346000 }, { "epoch": 0.94, "learning_rate": 9.67436974789916e-05, "loss": 1.6749, "step": 346500 }, { "epoch": 0.94, "learning_rate": 9.642857142857143e-05, "loss": 1.6789, "step": 347000 }, { "epoch": 0.94, "learning_rate": 9.611344537815126e-05, "loss": 1.6758, "step": 347500 }, { "epoch": 0.94, "learning_rate": 9.579831932773108e-05, "loss": 1.6753, "step": 348000 }, { "epoch": 0.94, "learning_rate": 9.548319327731091e-05, "loss": 1.6737, "step": 348500 }, { "epoch": 0.94, "learning_rate": 9.516806722689074e-05, "loss": 1.6723, "step": 349000 }, { "epoch": 0.95, "learning_rate": 9.485294117647057e-05, "loss": 1.6752, "step": 349500 }, { "epoch": 0.95, "learning_rate": 9.45378151260504e-05, "loss": 1.6706, "step": 350000 }, { "epoch": 0.95, "learning_rate": 9.422268907563025e-05, "loss": 1.669, "step": 350500 }, { "epoch": 0.95, "learning_rate": 9.390756302521008e-05, "loss": 1.6694, "step": 351000 }, { "epoch": 0.95, "learning_rate": 9.359243697478991e-05, "loss": 1.6677, "step": 351500 }, { "epoch": 0.95, "learning_rate": 9.327731092436974e-05, "loss": 1.6709, "step": 352000 }, { "epoch": 0.95, "learning_rate": 9.296218487394957e-05, "loss": 1.6645, "step": 352500 }, { "epoch": 0.96, "learning_rate": 9.26470588235294e-05, "loss": 1.6648, "step": 353000 }, { "epoch": 0.96, "learning_rate": 9.233193277310923e-05, "loss": 1.6717, "step": 353500 }, { "epoch": 0.96, "learning_rate": 9.201680672268908e-05, "loss": 1.6614, "step": 354000 }, { "epoch": 0.96, "learning_rate": 9.170168067226891e-05, "loss": 1.664, "step": 354500 }, { "epoch": 0.96, "learning_rate": 9.138655462184873e-05, "loss": 1.6681, "step": 355000 }, { "epoch": 0.96, "learning_rate": 9.107142857142856e-05, "loss": 1.6683, "step": 355500 }, { "epoch": 0.96, "learning_rate": 9.075630252100839e-05, "loss": 1.6639, "step": 356000 }, { "epoch": 0.97, "learning_rate": 9.044117647058822e-05, "loss": 1.6637, "step": 356500 }, { "epoch": 0.97, "learning_rate": 9.012605042016805e-05, "loss": 1.6576, "step": 357000 }, { "epoch": 0.97, "learning_rate": 8.981092436974788e-05, "loss": 1.6616, "step": 357500 }, { "epoch": 0.97, "learning_rate": 8.949579831932773e-05, "loss": 1.6604, "step": 358000 }, { "epoch": 0.97, "learning_rate": 8.918067226890756e-05, "loss": 1.6611, "step": 358500 }, { "epoch": 0.97, "learning_rate": 8.886554621848739e-05, "loss": 1.6597, "step": 359000 }, { "epoch": 0.97, "learning_rate": 8.855042016806722e-05, "loss": 1.6613, "step": 359500 }, { "epoch": 0.97, "learning_rate": 8.823529411764705e-05, "loss": 1.6588, "step": 360000 }, { "epoch": 0.98, "learning_rate": 8.792016806722688e-05, "loss": 1.6573, "step": 360500 }, { "epoch": 0.98, "learning_rate": 8.760504201680673e-05, "loss": 1.6587, "step": 361000 }, { "epoch": 0.98, "learning_rate": 8.728991596638656e-05, "loss": 1.6581, "step": 361500 }, { "epoch": 0.98, "learning_rate": 8.697478991596639e-05, "loss": 1.6531, "step": 362000 }, { "epoch": 0.98, "learning_rate": 8.66596638655462e-05, "loss": 1.6542, "step": 362500 }, { "epoch": 0.98, "learning_rate": 8.634453781512604e-05, "loss": 1.6545, "step": 363000 }, { "epoch": 0.98, "learning_rate": 8.602941176470587e-05, "loss": 1.6519, "step": 363500 }, { "epoch": 0.99, "learning_rate": 8.57142857142857e-05, "loss": 1.6557, "step": 364000 }, { "epoch": 0.99, "learning_rate": 8.539915966386553e-05, "loss": 1.6518, "step": 364500 }, { "epoch": 0.99, "learning_rate": 8.508403361344537e-05, "loss": 1.6531, "step": 365000 }, { "epoch": 0.99, "learning_rate": 8.47689075630252e-05, "loss": 1.6481, "step": 365500 }, { "epoch": 0.99, "learning_rate": 8.445378151260504e-05, "loss": 1.6475, "step": 366000 }, { "epoch": 0.99, "learning_rate": 8.413865546218487e-05, "loss": 1.6491, "step": 366500 }, { "epoch": 0.99, "learning_rate": 8.38235294117647e-05, "loss": 1.6556, "step": 367000 }, { "epoch": 0.99, "learning_rate": 8.350840336134453e-05, "loss": 1.6472, "step": 367500 }, { "epoch": 1.0, "learning_rate": 8.319327731092436e-05, "loss": 1.6477, "step": 368000 }, { "epoch": 1.0, "learning_rate": 8.28781512605042e-05, "loss": 1.6496, "step": 368500 }, { "epoch": 1.0, "learning_rate": 8.256302521008404e-05, "loss": 1.6479, "step": 369000 }, { "epoch": 1.0, "learning_rate": 8.224789915966387e-05, "loss": 1.6492, "step": 369500 }, { "epoch": 1.0, "learning_rate": 8.193277310924368e-05, "loss": 1.6443, "step": 370000 }, { "epoch": 1.0, "learning_rate": 8.161764705882352e-05, "loss": 1.6443, "step": 370500 }, { "epoch": 1.0, "learning_rate": 8.130252100840335e-05, "loss": 1.6462, "step": 371000 }, { "epoch": 1.01, "learning_rate": 8.098739495798318e-05, "loss": 1.6454, "step": 371500 }, { "epoch": 1.01, "learning_rate": 8.067226890756301e-05, "loss": 1.6416, "step": 372000 }, { "epoch": 1.01, "learning_rate": 8.035714285714285e-05, "loss": 1.6433, "step": 372500 }, { "epoch": 1.01, "learning_rate": 8.004201680672268e-05, "loss": 1.6447, "step": 373000 }, { "epoch": 1.01, "learning_rate": 7.972689075630251e-05, "loss": 1.6454, "step": 373500 }, { "epoch": 1.01, "learning_rate": 7.941176470588235e-05, "loss": 1.6402, "step": 374000 }, { "epoch": 1.01, "learning_rate": 7.909663865546218e-05, "loss": 1.642, "step": 374500 }, { "epoch": 1.02, "learning_rate": 7.878151260504201e-05, "loss": 1.6401, "step": 375000 }, { "epoch": 1.02, "learning_rate": 7.846638655462185e-05, "loss": 1.6446, "step": 375500 }, { "epoch": 1.02, "learning_rate": 7.815126050420168e-05, "loss": 1.6374, "step": 376000 }, { "epoch": 1.02, "learning_rate": 7.783613445378151e-05, "loss": 1.6425, "step": 376500 }, { "epoch": 1.02, "learning_rate": 7.752100840336135e-05, "loss": 1.6418, "step": 377000 }, { "epoch": 1.02, "learning_rate": 7.720588235294116e-05, "loss": 1.6407, "step": 377500 }, { "epoch": 1.02, "learning_rate": 7.6890756302521e-05, "loss": 1.6373, "step": 378000 }, { "epoch": 1.02, "learning_rate": 7.657563025210082e-05, "loss": 1.6397, "step": 378500 }, { "epoch": 1.03, "learning_rate": 7.626050420168066e-05, "loss": 1.6408, "step": 379000 }, { "epoch": 1.03, "learning_rate": 7.59453781512605e-05, "loss": 1.6407, "step": 379500 }, { "epoch": 1.03, "learning_rate": 7.563025210084033e-05, "loss": 1.6397, "step": 380000 }, { "epoch": 1.03, "learning_rate": 7.531512605042016e-05, "loss": 1.6406, "step": 380500 }, { "epoch": 1.03, "learning_rate": 7.5e-05, "loss": 1.6399, "step": 381000 }, { "epoch": 1.03, "learning_rate": 7.468487394957982e-05, "loss": 1.6403, "step": 381500 }, { "epoch": 1.03, "learning_rate": 7.436974789915966e-05, "loss": 1.6394, "step": 382000 }, { "epoch": 1.04, "learning_rate": 7.405462184873949e-05, "loss": 1.6377, "step": 382500 }, { "epoch": 1.04, "learning_rate": 7.373949579831932e-05, "loss": 1.6365, "step": 383000 }, { "epoch": 1.04, "learning_rate": 7.342436974789915e-05, "loss": 1.6329, "step": 383500 }, { "epoch": 1.04, "learning_rate": 7.310924369747898e-05, "loss": 1.6361, "step": 384000 }, { "epoch": 1.04, "learning_rate": 7.279411764705881e-05, "loss": 1.6325, "step": 384500 }, { "epoch": 1.04, "learning_rate": 7.247899159663865e-05, "loss": 1.6347, "step": 385000 }, { "epoch": 1.04, "learning_rate": 7.216386554621849e-05, "loss": 1.6364, "step": 385500 }, { "epoch": 1.04, "learning_rate": 7.184873949579832e-05, "loss": 1.6293, "step": 386000 }, { "epoch": 1.05, "learning_rate": 7.153361344537815e-05, "loss": 1.6306, "step": 386500 }, { "epoch": 1.05, "learning_rate": 7.121848739495798e-05, "loss": 1.6308, "step": 387000 }, { "epoch": 1.05, "learning_rate": 7.090336134453781e-05, "loss": 1.6315, "step": 387500 }, { "epoch": 1.05, "learning_rate": 7.058823529411764e-05, "loss": 1.6326, "step": 388000 }, { "epoch": 1.05, "learning_rate": 7.027310924369747e-05, "loss": 1.6296, "step": 388500 }, { "epoch": 1.05, "learning_rate": 6.99579831932773e-05, "loss": 1.6332, "step": 389000 }, { "epoch": 1.05, "learning_rate": 6.964285714285713e-05, "loss": 1.6337, "step": 389500 }, { "epoch": 1.06, "learning_rate": 6.932773109243698e-05, "loss": 1.6279, "step": 390000 }, { "epoch": 1.06, "learning_rate": 6.90126050420168e-05, "loss": 1.6296, "step": 390500 }, { "epoch": 1.06, "learning_rate": 6.869747899159663e-05, "loss": 1.6244, "step": 391000 }, { "epoch": 1.06, "learning_rate": 6.838235294117646e-05, "loss": 1.6323, "step": 391500 }, { "epoch": 1.06, "learning_rate": 6.80672268907563e-05, "loss": 1.63, "step": 392000 }, { "epoch": 1.06, "learning_rate": 6.775210084033613e-05, "loss": 1.6253, "step": 392500 }, { "epoch": 1.06, "learning_rate": 6.743697478991596e-05, "loss": 1.623, "step": 393000 }, { "epoch": 1.07, "learning_rate": 6.71218487394958e-05, "loss": 1.6291, "step": 393500 }, { "epoch": 1.07, "learning_rate": 6.680672268907563e-05, "loss": 1.6264, "step": 394000 }, { "epoch": 1.07, "learning_rate": 6.649159663865546e-05, "loss": 1.6278, "step": 394500 }, { "epoch": 1.07, "learning_rate": 6.617647058823529e-05, "loss": 1.6274, "step": 395000 }, { "epoch": 1.07, "learning_rate": 6.586134453781512e-05, "loss": 1.6254, "step": 395500 }, { "epoch": 1.07, "learning_rate": 6.554621848739495e-05, "loss": 1.6237, "step": 396000 }, { "epoch": 1.07, "learning_rate": 6.523109243697478e-05, "loss": 1.6232, "step": 396500 }, { "epoch": 1.07, "learning_rate": 6.491596638655461e-05, "loss": 1.6204, "step": 397000 }, { "epoch": 1.08, "learning_rate": 6.460084033613446e-05, "loss": 1.6243, "step": 397500 }, { "epoch": 1.08, "learning_rate": 6.428571428571427e-05, "loss": 1.6234, "step": 398000 }, { "epoch": 1.08, "learning_rate": 6.39705882352941e-05, "loss": 1.6237, "step": 398500 }, { "epoch": 1.08, "learning_rate": 6.365546218487394e-05, "loss": 1.6239, "step": 399000 }, { "epoch": 1.08, "learning_rate": 6.334033613445378e-05, "loss": 1.6215, "step": 399500 }, { "epoch": 1.08, "learning_rate": 6.302521008403361e-05, "loss": 1.6229, "step": 400000 }, { "epoch": 1.08, "learning_rate": 6.271008403361344e-05, "loss": 1.6197, "step": 400500 }, { "epoch": 1.09, "learning_rate": 6.239495798319327e-05, "loss": 1.6183, "step": 401000 }, { "epoch": 1.09, "learning_rate": 6.20798319327731e-05, "loss": 1.6206, "step": 401500 }, { "epoch": 1.09, "learning_rate": 6.176470588235294e-05, "loss": 1.6182, "step": 402000 }, { "epoch": 1.09, "learning_rate": 6.144957983193277e-05, "loss": 1.621, "step": 402500 }, { "epoch": 1.09, "learning_rate": 6.11344537815126e-05, "loss": 1.6204, "step": 403000 }, { "epoch": 1.09, "learning_rate": 6.081932773109243e-05, "loss": 1.6213, "step": 403500 }, { "epoch": 1.09, "learning_rate": 6.0504201680672267e-05, "loss": 1.6187, "step": 404000 }, { "epoch": 1.09, "learning_rate": 6.01890756302521e-05, "loss": 1.617, "step": 404500 }, { "epoch": 1.1, "learning_rate": 5.987394957983193e-05, "loss": 1.6145, "step": 405000 }, { "epoch": 1.1, "learning_rate": 5.955882352941175e-05, "loss": 1.6146, "step": 405500 }, { "epoch": 1.1, "learning_rate": 5.924369747899159e-05, "loss": 1.615, "step": 406000 }, { "epoch": 1.1, "learning_rate": 5.892857142857142e-05, "loss": 1.6138, "step": 406500 }, { "epoch": 1.1, "learning_rate": 5.861344537815125e-05, "loss": 1.6158, "step": 407000 }, { "epoch": 1.1, "learning_rate": 5.829831932773109e-05, "loss": 1.6149, "step": 407500 }, { "epoch": 1.1, "learning_rate": 5.798319327731092e-05, "loss": 1.6198, "step": 408000 }, { "epoch": 1.11, "learning_rate": 5.766806722689075e-05, "loss": 1.6117, "step": 408500 }, { "epoch": 1.11, "learning_rate": 5.7352941176470576e-05, "loss": 1.6141, "step": 409000 }, { "epoch": 1.11, "learning_rate": 5.7037815126050414e-05, "loss": 1.6113, "step": 409500 }, { "epoch": 1.11, "learning_rate": 5.6722689075630245e-05, "loss": 1.6126, "step": 410000 }, { "epoch": 1.11, "learning_rate": 5.6407563025210076e-05, "loss": 1.612, "step": 410500 }, { "epoch": 1.11, "learning_rate": 5.6092436974789914e-05, "loss": 1.6067, "step": 411000 }, { "epoch": 1.11, "learning_rate": 5.5777310924369745e-05, "loss": 1.6128, "step": 411500 }, { "epoch": 1.12, "learning_rate": 5.5462184873949576e-05, "loss": 1.6148, "step": 412000 }, { "epoch": 1.12, "learning_rate": 5.5147058823529414e-05, "loss": 1.61, "step": 412500 }, { "epoch": 1.12, "learning_rate": 5.483193277310924e-05, "loss": 1.6094, "step": 413000 }, { "epoch": 1.12, "learning_rate": 5.451680672268907e-05, "loss": 1.6108, "step": 413500 }, { "epoch": 1.12, "learning_rate": 5.42016806722689e-05, "loss": 1.6029, "step": 414000 }, { "epoch": 1.12, "learning_rate": 5.388655462184874e-05, "loss": 1.608, "step": 414500 }, { "epoch": 1.12, "learning_rate": 5.357142857142857e-05, "loss": 1.6046, "step": 415000 }, { "epoch": 1.12, "learning_rate": 5.32563025210084e-05, "loss": 1.6096, "step": 415500 }, { "epoch": 1.13, "learning_rate": 5.294117647058824e-05, "loss": 1.6056, "step": 416000 }, { "epoch": 1.13, "learning_rate": 5.262605042016806e-05, "loss": 1.6055, "step": 416500 }, { "epoch": 1.13, "learning_rate": 5.231092436974789e-05, "loss": 1.6019, "step": 417000 }, { "epoch": 1.13, "learning_rate": 5.1995798319327724e-05, "loss": 1.603, "step": 417500 }, { "epoch": 1.13, "learning_rate": 5.168067226890756e-05, "loss": 1.6007, "step": 418000 }, { "epoch": 1.13, "learning_rate": 5.136554621848739e-05, "loss": 1.6038, "step": 418500 }, { "epoch": 1.13, "learning_rate": 5.105042016806722e-05, "loss": 1.6017, "step": 419000 }, { "epoch": 1.14, "learning_rate": 5.0735294117647054e-05, "loss": 1.6051, "step": 419500 }, { "epoch": 1.14, "learning_rate": 5.042016806722689e-05, "loss": 1.5992, "step": 420000 }, { "epoch": 1.14, "learning_rate": 5.0105042016806716e-05, "loss": 1.6022, "step": 420500 }, { "epoch": 1.14, "learning_rate": 4.978991596638655e-05, "loss": 1.6038, "step": 421000 }, { "epoch": 1.14, "learning_rate": 4.947478991596638e-05, "loss": 1.6019, "step": 421500 }, { "epoch": 1.14, "learning_rate": 4.9159663865546216e-05, "loss": 1.6006, "step": 422000 }, { "epoch": 1.14, "learning_rate": 4.884453781512605e-05, "loss": 1.6046, "step": 422500 }, { "epoch": 1.15, "learning_rate": 4.852941176470588e-05, "loss": 1.6049, "step": 423000 }, { "epoch": 1.15, "learning_rate": 4.8214285714285716e-05, "loss": 1.6004, "step": 423500 }, { "epoch": 1.15, "learning_rate": 4.789915966386554e-05, "loss": 1.6016, "step": 424000 }, { "epoch": 1.15, "learning_rate": 4.758403361344537e-05, "loss": 1.6024, "step": 424500 }, { "epoch": 1.15, "learning_rate": 4.72689075630252e-05, "loss": 1.5994, "step": 425000 }, { "epoch": 1.15, "learning_rate": 4.695378151260504e-05, "loss": 1.5989, "step": 425500 }, { "epoch": 1.15, "learning_rate": 4.663865546218487e-05, "loss": 1.599, "step": 426000 }, { "epoch": 1.15, "learning_rate": 4.63235294117647e-05, "loss": 1.5968, "step": 426500 }, { "epoch": 1.16, "learning_rate": 4.600840336134454e-05, "loss": 1.5968, "step": 427000 }, { "epoch": 1.16, "learning_rate": 4.5693277310924364e-05, "loss": 1.5981, "step": 427500 }, { "epoch": 1.16, "learning_rate": 4.5378151260504195e-05, "loss": 1.5961, "step": 428000 }, { "epoch": 1.16, "learning_rate": 4.5063025210084026e-05, "loss": 1.5967, "step": 428500 }, { "epoch": 1.16, "learning_rate": 4.4747899159663864e-05, "loss": 1.5963, "step": 429000 }, { "epoch": 1.16, "learning_rate": 4.4432773109243695e-05, "loss": 1.5937, "step": 429500 }, { "epoch": 1.16, "learning_rate": 4.4117647058823526e-05, "loss": 1.5963, "step": 430000 }, { "epoch": 1.17, "learning_rate": 4.380252100840336e-05, "loss": 1.5961, "step": 430500 }, { "epoch": 1.17, "learning_rate": 4.3487394957983194e-05, "loss": 1.5955, "step": 431000 }, { "epoch": 1.17, "learning_rate": 4.317226890756302e-05, "loss": 1.5905, "step": 431500 }, { "epoch": 1.17, "learning_rate": 4.285714285714285e-05, "loss": 1.5956, "step": 432000 }, { "epoch": 1.17, "learning_rate": 4.254201680672269e-05, "loss": 1.5938, "step": 432500 }, { "epoch": 1.17, "learning_rate": 4.222689075630252e-05, "loss": 1.5939, "step": 433000 }, { "epoch": 1.17, "learning_rate": 4.191176470588235e-05, "loss": 1.5919, "step": 433500 }, { "epoch": 1.17, "learning_rate": 4.159663865546218e-05, "loss": 1.5905, "step": 434000 }, { "epoch": 1.18, "learning_rate": 4.128151260504202e-05, "loss": 1.589, "step": 434500 }, { "epoch": 1.18, "learning_rate": 4.096638655462184e-05, "loss": 1.592, "step": 435000 }, { "epoch": 1.18, "learning_rate": 4.065126050420167e-05, "loss": 1.594, "step": 435500 }, { "epoch": 1.18, "learning_rate": 4.0336134453781504e-05, "loss": 1.5908, "step": 436000 }, { "epoch": 1.18, "learning_rate": 4.002100840336134e-05, "loss": 1.5876, "step": 436500 }, { "epoch": 1.18, "learning_rate": 3.970588235294117e-05, "loss": 1.5899, "step": 437000 }, { "epoch": 1.18, "learning_rate": 3.9390756302521004e-05, "loss": 1.5912, "step": 437500 }, { "epoch": 1.19, "learning_rate": 3.907563025210084e-05, "loss": 1.5885, "step": 438000 }, { "epoch": 1.19, "learning_rate": 3.876050420168067e-05, "loss": 1.5914, "step": 438500 }, { "epoch": 1.19, "learning_rate": 3.84453781512605e-05, "loss": 1.5905, "step": 439000 }, { "epoch": 1.19, "learning_rate": 3.813025210084033e-05, "loss": 1.5885, "step": 439500 }, { "epoch": 1.19, "learning_rate": 3.7815126050420166e-05, "loss": 1.5861, "step": 440000 }, { "epoch": 1.19, "learning_rate": 3.75e-05, "loss": 1.5877, "step": 440500 }, { "epoch": 1.19, "learning_rate": 3.718487394957983e-05, "loss": 1.5846, "step": 441000 }, { "epoch": 1.2, "learning_rate": 3.686974789915966e-05, "loss": 1.5875, "step": 441500 }, { "epoch": 1.2, "learning_rate": 3.655462184873949e-05, "loss": 1.5854, "step": 442000 }, { "epoch": 1.2, "learning_rate": 3.623949579831933e-05, "loss": 1.5824, "step": 442500 }, { "epoch": 1.2, "learning_rate": 3.592436974789916e-05, "loss": 1.5847, "step": 443000 }, { "epoch": 1.2, "learning_rate": 3.560924369747899e-05, "loss": 1.5848, "step": 443500 }, { "epoch": 1.2, "learning_rate": 3.529411764705882e-05, "loss": 1.5862, "step": 444000 }, { "epoch": 1.2, "learning_rate": 3.497899159663865e-05, "loss": 1.583, "step": 444500 }, { "epoch": 1.2, "learning_rate": 3.466386554621849e-05, "loss": 1.5854, "step": 445000 }, { "epoch": 1.21, "learning_rate": 3.4348739495798313e-05, "loss": 1.584, "step": 445500 }, { "epoch": 1.21, "learning_rate": 3.403361344537815e-05, "loss": 1.5825, "step": 446000 }, { "epoch": 1.21, "learning_rate": 3.371848739495798e-05, "loss": 1.5825, "step": 446500 }, { "epoch": 1.21, "learning_rate": 3.340336134453781e-05, "loss": 1.58, "step": 447000 }, { "epoch": 1.21, "learning_rate": 3.3088235294117644e-05, "loss": 1.5808, "step": 447500 }, { "epoch": 1.21, "learning_rate": 3.2773109243697475e-05, "loss": 1.5785, "step": 448000 }, { "epoch": 1.21, "learning_rate": 3.2457983193277306e-05, "loss": 1.5825, "step": 448500 }, { "epoch": 1.22, "learning_rate": 3.214285714285714e-05, "loss": 1.5783, "step": 449000 }, { "epoch": 1.22, "learning_rate": 3.182773109243697e-05, "loss": 1.5762, "step": 449500 }, { "epoch": 1.22, "learning_rate": 3.1512605042016806e-05, "loss": 1.5771, "step": 450000 }, { "epoch": 1.22, "learning_rate": 3.119747899159664e-05, "loss": 1.5822, "step": 450500 }, { "epoch": 1.22, "learning_rate": 3.088235294117647e-05, "loss": 1.5834, "step": 451000 }, { "epoch": 1.22, "learning_rate": 3.05672268907563e-05, "loss": 1.5763, "step": 451500 }, { "epoch": 1.22, "learning_rate": 3.0252100840336133e-05, "loss": 1.5771, "step": 452000 }, { "epoch": 1.22, "learning_rate": 2.9936974789915964e-05, "loss": 1.5771, "step": 452500 }, { "epoch": 1.23, "learning_rate": 2.9621848739495795e-05, "loss": 1.5819, "step": 453000 }, { "epoch": 1.23, "learning_rate": 2.9306722689075626e-05, "loss": 1.5786, "step": 453500 }, { "epoch": 1.23, "learning_rate": 2.899159663865546e-05, "loss": 1.579, "step": 454000 }, { "epoch": 1.23, "learning_rate": 2.8676470588235288e-05, "loss": 1.5774, "step": 454500 }, { "epoch": 1.23, "learning_rate": 2.8361344537815123e-05, "loss": 1.5733, "step": 455000 }, { "epoch": 1.23, "learning_rate": 2.8046218487394957e-05, "loss": 1.5743, "step": 455500 }, { "epoch": 1.23, "learning_rate": 2.7731092436974788e-05, "loss": 1.5715, "step": 456000 }, { "epoch": 1.24, "learning_rate": 2.741596638655462e-05, "loss": 1.5767, "step": 456500 }, { "epoch": 1.24, "learning_rate": 2.710084033613445e-05, "loss": 1.5753, "step": 457000 }, { "epoch": 1.24, "learning_rate": 2.6785714285714284e-05, "loss": 1.5726, "step": 457500 }, { "epoch": 1.24, "learning_rate": 2.647058823529412e-05, "loss": 1.5729, "step": 458000 }, { "epoch": 1.24, "learning_rate": 2.6155462184873946e-05, "loss": 1.5735, "step": 458500 }, { "epoch": 1.24, "learning_rate": 2.584033613445378e-05, "loss": 1.5719, "step": 459000 }, { "epoch": 1.24, "learning_rate": 2.552521008403361e-05, "loss": 1.5673, "step": 459500 }, { "epoch": 1.25, "learning_rate": 2.5210084033613446e-05, "loss": 1.5746, "step": 460000 }, { "epoch": 1.25, "learning_rate": 2.4894957983193274e-05, "loss": 1.5715, "step": 460500 }, { "epoch": 1.25, "learning_rate": 2.4579831932773108e-05, "loss": 1.5698, "step": 461000 }, { "epoch": 1.25, "learning_rate": 2.426470588235294e-05, "loss": 1.569, "step": 461500 }, { "epoch": 1.25, "learning_rate": 2.394957983193277e-05, "loss": 1.5693, "step": 462000 }, { "epoch": 1.25, "learning_rate": 2.36344537815126e-05, "loss": 1.5718, "step": 462500 }, { "epoch": 1.25, "learning_rate": 2.3319327731092435e-05, "loss": 1.5704, "step": 463000 }, { "epoch": 1.25, "learning_rate": 2.300420168067227e-05, "loss": 1.566, "step": 463500 }, { "epoch": 1.26, "learning_rate": 2.2689075630252097e-05, "loss": 1.5702, "step": 464000 }, { "epoch": 1.26, "learning_rate": 2.2373949579831932e-05, "loss": 1.572, "step": 464500 }, { "epoch": 1.26, "learning_rate": 2.2058823529411763e-05, "loss": 1.5689, "step": 465000 }, { "epoch": 1.26, "learning_rate": 2.1743697478991597e-05, "loss": 1.5692, "step": 465500 }, { "epoch": 1.26, "learning_rate": 2.1428571428571425e-05, "loss": 1.5679, "step": 466000 }, { "epoch": 1.26, "learning_rate": 2.111344537815126e-05, "loss": 1.5645, "step": 466500 }, { "epoch": 1.26, "learning_rate": 2.079831932773109e-05, "loss": 1.5667, "step": 467000 }, { "epoch": 1.27, "learning_rate": 2.048319327731092e-05, "loss": 1.5659, "step": 467500 }, { "epoch": 1.27, "learning_rate": 2.0168067226890752e-05, "loss": 1.5628, "step": 468000 }, { "epoch": 1.27, "learning_rate": 1.9852941176470586e-05, "loss": 1.5622, "step": 468500 }, { "epoch": 1.27, "learning_rate": 1.953781512605042e-05, "loss": 1.5674, "step": 469000 }, { "epoch": 1.27, "learning_rate": 1.922268907563025e-05, "loss": 1.5645, "step": 469500 }, { "epoch": 1.27, "learning_rate": 1.8907563025210083e-05, "loss": 1.5647, "step": 470000 }, { "epoch": 1.27, "learning_rate": 1.8592436974789914e-05, "loss": 1.5641, "step": 470500 }, { "epoch": 1.28, "learning_rate": 1.8277310924369745e-05, "loss": 1.5656, "step": 471000 }, { "epoch": 1.28, "learning_rate": 1.796218487394958e-05, "loss": 1.5635, "step": 471500 }, { "epoch": 1.28, "learning_rate": 1.764705882352941e-05, "loss": 1.5612, "step": 472000 }, { "epoch": 1.28, "learning_rate": 1.7331932773109245e-05, "loss": 1.5619, "step": 472500 }, { "epoch": 1.28, "learning_rate": 1.7016806722689076e-05, "loss": 1.5643, "step": 473000 }, { "epoch": 1.28, "learning_rate": 1.6701680672268907e-05, "loss": 1.5607, "step": 473500 }, { "epoch": 1.28, "learning_rate": 1.6386554621848738e-05, "loss": 1.5642, "step": 474000 }, { "epoch": 1.28, "learning_rate": 1.607142857142857e-05, "loss": 1.5603, "step": 474500 }, { "epoch": 1.29, "learning_rate": 1.5756302521008403e-05, "loss": 1.5591, "step": 475000 }, { "epoch": 1.29, "learning_rate": 1.5441176470588234e-05, "loss": 1.5632, "step": 475500 }, { "epoch": 1.29, "learning_rate": 1.5126050420168067e-05, "loss": 1.5606, "step": 476000 }, { "epoch": 1.29, "learning_rate": 1.4810924369747898e-05, "loss": 1.5598, "step": 476500 }, { "epoch": 1.29, "learning_rate": 1.449579831932773e-05, "loss": 1.5616, "step": 477000 }, { "epoch": 1.29, "learning_rate": 1.4180672268907561e-05, "loss": 1.5609, "step": 477500 }, { "epoch": 1.29, "learning_rate": 1.3865546218487394e-05, "loss": 1.56, "step": 478000 }, { "epoch": 1.3, "learning_rate": 1.3550420168067225e-05, "loss": 1.5609, "step": 478500 }, { "epoch": 1.3, "learning_rate": 1.323529411764706e-05, "loss": 1.5603, "step": 479000 }, { "epoch": 1.3, "learning_rate": 1.292016806722689e-05, "loss": 1.5612, "step": 479500 }, { "epoch": 1.3, "learning_rate": 1.2605042016806723e-05, "loss": 1.5655, "step": 480000 }, { "epoch": 1.3, "learning_rate": 1.2289915966386554e-05, "loss": 1.5588, "step": 480500 }, { "epoch": 1.3, "learning_rate": 1.1974789915966385e-05, "loss": 1.561, "step": 481000 }, { "epoch": 1.3, "learning_rate": 1.1659663865546218e-05, "loss": 1.5585, "step": 481500 }, { "epoch": 1.3, "learning_rate": 1.1344537815126049e-05, "loss": 1.5569, "step": 482000 }, { "epoch": 1.31, "learning_rate": 1.1029411764705881e-05, "loss": 1.5576, "step": 482500 }, { "epoch": 1.31, "learning_rate": 1.0714285714285712e-05, "loss": 1.5551, "step": 483000 }, { "epoch": 1.31, "learning_rate": 1.0399159663865545e-05, "loss": 1.5576, "step": 483500 }, { "epoch": 1.31, "learning_rate": 1.0084033613445376e-05, "loss": 1.558, "step": 484000 }, { "epoch": 1.31, "learning_rate": 9.76890756302521e-06, "loss": 1.5595, "step": 484500 }, { "epoch": 1.31, "learning_rate": 9.453781512605041e-06, "loss": 1.5606, "step": 485000 }, { "epoch": 1.31, "learning_rate": 9.138655462184872e-06, "loss": 1.5629, "step": 485500 }, { "epoch": 1.32, "learning_rate": 8.823529411764705e-06, "loss": 1.5605, "step": 486000 }, { "epoch": 1.32, "learning_rate": 8.508403361344538e-06, "loss": 1.5582, "step": 486500 }, { "epoch": 1.32, "learning_rate": 8.193277310924369e-06, "loss": 1.5558, "step": 487000 }, { "epoch": 1.32, "learning_rate": 7.878151260504201e-06, "loss": 1.5562, "step": 487500 }, { "epoch": 1.32, "learning_rate": 7.563025210084033e-06, "loss": 1.559, "step": 488000 }, { "epoch": 1.32, "learning_rate": 7.247899159663865e-06, "loss": 1.5608, "step": 488500 }, { "epoch": 1.32, "learning_rate": 6.932773109243697e-06, "loss": 1.5595, "step": 489000 }, { "epoch": 1.33, "learning_rate": 6.61764705882353e-06, "loss": 1.5569, "step": 489500 }, { "epoch": 1.33, "learning_rate": 6.3025210084033615e-06, "loss": 1.5545, "step": 490000 }, { "epoch": 1.33, "learning_rate": 5.9873949579831925e-06, "loss": 1.557, "step": 490500 }, { "epoch": 1.33, "learning_rate": 5.672268907563024e-06, "loss": 1.5559, "step": 491000 }, { "epoch": 1.33, "learning_rate": 5.357142857142856e-06, "loss": 1.5577, "step": 491500 }, { "epoch": 1.33, "learning_rate": 5.042016806722688e-06, "loss": 1.5555, "step": 492000 }, { "epoch": 1.33, "learning_rate": 4.726890756302521e-06, "loss": 1.5554, "step": 492500 }, { "epoch": 1.33, "learning_rate": 4.4117647058823526e-06, "loss": 1.5552, "step": 493000 }, { "epoch": 1.34, "learning_rate": 4.096638655462184e-06, "loss": 1.5573, "step": 493500 }, { "epoch": 1.34, "learning_rate": 3.7815126050420167e-06, "loss": 1.5555, "step": 494000 }, { "epoch": 1.34, "learning_rate": 3.4663865546218485e-06, "loss": 1.5551, "step": 494500 }, { "epoch": 1.34, "learning_rate": 3.1512605042016808e-06, "loss": 1.5499, "step": 495000 }, { "epoch": 1.34, "learning_rate": 2.836134453781512e-06, "loss": 1.5534, "step": 495500 }, { "epoch": 1.34, "learning_rate": 2.521008403361344e-06, "loss": 1.5509, "step": 496000 }, { "epoch": 1.34, "learning_rate": 2.2058823529411763e-06, "loss": 1.5551, "step": 496500 }, { "epoch": 1.35, "learning_rate": 1.8907563025210083e-06, "loss": 1.5567, "step": 497000 }, { "epoch": 1.35, "learning_rate": 1.5756302521008404e-06, "loss": 1.5546, "step": 497500 }, { "epoch": 1.35, "learning_rate": 1.260504201680672e-06, "loss": 1.5567, "step": 498000 }, { "epoch": 1.35, "learning_rate": 9.453781512605042e-07, "loss": 1.5546, "step": 498500 }, { "epoch": 1.35, "learning_rate": 6.30252100840336e-07, "loss": 1.5575, "step": 499000 }, { "epoch": 1.35, "learning_rate": 3.15126050420168e-07, "loss": 1.5552, "step": 499500 }, { "epoch": 1.35, "learning_rate": 0.0, "loss": 1.552, "step": 500000 }, { "epoch": 1.35, "step": 500000, "total_flos": 8.422691657052488e+18, "train_loss": 1.8320032868652343, "train_runtime": 99252.1755, "train_samples_per_second": 1289.644, "train_steps_per_second": 5.038 } ], "logging_steps": 500, "max_steps": 500000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10000, "total_flos": 8.422691657052488e+18, "train_batch_size": 256, "trial_name": null, "trial_params": null }