{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.0, "global_step": 94325, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.5e-05, "loss": 0.7132, "step": 50 }, { "epoch": 0.03, "learning_rate": 5e-05, "loss": 0.2354, "step": 100 }, { "epoch": 0.04, "learning_rate": 4.9973467763332446e-05, "loss": 0.2126, "step": 150 }, { "epoch": 0.05, "learning_rate": 4.9946935526664904e-05, "loss": 0.2106, "step": 200 }, { "epoch": 0.07, "learning_rate": 4.992040328999735e-05, "loss": 0.1826, "step": 250 }, { "epoch": 0.08, "learning_rate": 4.98938710533298e-05, "loss": 0.2157, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.986733881666225e-05, "loss": 0.2319, "step": 350 }, { "epoch": 0.11, "learning_rate": 4.98408065799947e-05, "loss": 0.1856, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.9814274343327143e-05, "loss": 0.2086, "step": 450 }, { "epoch": 0.13, "learning_rate": 4.9787742106659594e-05, "loss": 0.1875, "step": 500 }, { "epoch": 0.15, "learning_rate": 4.9761209869992045e-05, "loss": 0.177, "step": 550 }, { "epoch": 0.16, "learning_rate": 4.973467763332449e-05, "loss": 0.1842, "step": 600 }, { "epoch": 0.17, "learning_rate": 4.970814539665694e-05, "loss": 0.1874, "step": 650 }, { "epoch": 0.19, "learning_rate": 4.968161315998939e-05, "loss": 0.1717, "step": 700 }, { "epoch": 0.2, "learning_rate": 4.965508092332184e-05, "loss": 0.2115, "step": 750 }, { "epoch": 0.21, "learning_rate": 4.9628548686654285e-05, "loss": 0.2041, "step": 800 }, { "epoch": 0.23, "learning_rate": 4.9602016449986735e-05, "loss": 0.182, "step": 850 }, { "epoch": 0.24, "learning_rate": 4.9575484213319186e-05, "loss": 0.1523, "step": 900 }, { "epoch": 0.25, "learning_rate": 4.9548951976651637e-05, "loss": 0.187, "step": 950 }, { "epoch": 0.27, "learning_rate": 4.952241973998408e-05, "loss": 0.1861, "step": 1000 }, { "epoch": 0.28, "learning_rate": 4.949588750331653e-05, "loss": 0.1605, "step": 1050 }, { "epoch": 0.29, "learning_rate": 4.946935526664898e-05, "loss": 0.1499, "step": 1100 }, { "epoch": 0.3, "learning_rate": 4.9442823029981426e-05, "loss": 0.1522, "step": 1150 }, { "epoch": 0.32, "learning_rate": 4.9416290793313876e-05, "loss": 0.1781, "step": 1200 }, { "epoch": 0.33, "learning_rate": 4.938975855664633e-05, "loss": 0.1425, "step": 1250 }, { "epoch": 0.34, "learning_rate": 4.936322631997878e-05, "loss": 0.174, "step": 1300 }, { "epoch": 0.36, "learning_rate": 4.933669408331122e-05, "loss": 0.159, "step": 1350 }, { "epoch": 0.37, "learning_rate": 4.931016184664368e-05, "loss": 0.1535, "step": 1400 }, { "epoch": 0.38, "learning_rate": 4.928362960997612e-05, "loss": 0.1948, "step": 1450 }, { "epoch": 0.4, "learning_rate": 4.9257097373308574e-05, "loss": 0.151, "step": 1500 }, { "epoch": 0.41, "learning_rate": 4.9230565136641024e-05, "loss": 0.1701, "step": 1550 }, { "epoch": 0.42, "learning_rate": 4.920403289997347e-05, "loss": 0.1532, "step": 1600 }, { "epoch": 0.44, "learning_rate": 4.917750066330592e-05, "loss": 0.1433, "step": 1650 }, { "epoch": 0.45, "learning_rate": 4.915096842663837e-05, "loss": 0.1477, "step": 1700 }, { "epoch": 0.46, "learning_rate": 4.912443618997082e-05, "loss": 0.1447, "step": 1750 }, { "epoch": 0.48, "learning_rate": 4.9097903953303264e-05, "loss": 0.151, "step": 1800 }, { "epoch": 0.49, "learning_rate": 4.9071371716635715e-05, "loss": 0.1442, "step": 1850 }, { "epoch": 0.5, "learning_rate": 4.9044839479968165e-05, "loss": 0.1383, "step": 1900 }, { "epoch": 0.52, "learning_rate": 4.9018307243300616e-05, "loss": 0.1607, "step": 1950 }, { "epoch": 0.53, "learning_rate": 4.899177500663306e-05, "loss": 0.165, "step": 2000 }, { "epoch": 0.54, "learning_rate": 4.896524276996551e-05, "loss": 0.1567, "step": 2050 }, { "epoch": 0.56, "learning_rate": 4.893871053329796e-05, "loss": 0.1516, "step": 2100 }, { "epoch": 0.57, "learning_rate": 4.8912178296630405e-05, "loss": 0.1639, "step": 2150 }, { "epoch": 0.58, "learning_rate": 4.8885646059962856e-05, "loss": 0.1298, "step": 2200 }, { "epoch": 0.6, "learning_rate": 4.8859113823295306e-05, "loss": 0.1314, "step": 2250 }, { "epoch": 0.61, "learning_rate": 4.883258158662776e-05, "loss": 0.1592, "step": 2300 }, { "epoch": 0.62, "learning_rate": 4.88060493499602e-05, "loss": 0.1413, "step": 2350 }, { "epoch": 0.64, "learning_rate": 4.877951711329266e-05, "loss": 0.1323, "step": 2400 }, { "epoch": 0.65, "learning_rate": 4.87529848766251e-05, "loss": 0.1621, "step": 2450 }, { "epoch": 0.66, "learning_rate": 4.8726452639957546e-05, "loss": 0.1509, "step": 2500 }, { "epoch": 0.68, "learning_rate": 4.8699920403290004e-05, "loss": 0.1353, "step": 2550 }, { "epoch": 0.69, "learning_rate": 4.867338816662245e-05, "loss": 0.1499, "step": 2600 }, { "epoch": 0.7, "learning_rate": 4.86468559299549e-05, "loss": 0.1172, "step": 2650 }, { "epoch": 0.72, "learning_rate": 4.862032369328734e-05, "loss": 0.1352, "step": 2700 }, { "epoch": 0.73, "learning_rate": 4.85937914566198e-05, "loss": 0.135, "step": 2750 }, { "epoch": 0.74, "learning_rate": 4.856725921995224e-05, "loss": 0.1241, "step": 2800 }, { "epoch": 0.76, "learning_rate": 4.8540726983284694e-05, "loss": 0.1402, "step": 2850 }, { "epoch": 0.77, "learning_rate": 4.8514194746617145e-05, "loss": 0.1352, "step": 2900 }, { "epoch": 0.78, "learning_rate": 4.848766250994959e-05, "loss": 0.1463, "step": 2950 }, { "epoch": 0.8, "learning_rate": 4.846113027328204e-05, "loss": 0.1456, "step": 3000 }, { "epoch": 0.81, "learning_rate": 4.843459803661449e-05, "loss": 0.138, "step": 3050 }, { "epoch": 0.82, "learning_rate": 4.840806579994694e-05, "loss": 0.1452, "step": 3100 }, { "epoch": 0.83, "learning_rate": 4.8381533563279384e-05, "loss": 0.1214, "step": 3150 }, { "epoch": 0.85, "learning_rate": 4.8355001326611835e-05, "loss": 0.1265, "step": 3200 }, { "epoch": 0.86, "learning_rate": 4.8328469089944286e-05, "loss": 0.1382, "step": 3250 }, { "epoch": 0.87, "learning_rate": 4.8301936853276736e-05, "loss": 0.1143, "step": 3300 }, { "epoch": 0.89, "learning_rate": 4.827540461660918e-05, "loss": 0.1462, "step": 3350 }, { "epoch": 0.9, "learning_rate": 4.824887237994163e-05, "loss": 0.1118, "step": 3400 }, { "epoch": 0.91, "learning_rate": 4.822234014327408e-05, "loss": 0.1114, "step": 3450 }, { "epoch": 0.93, "learning_rate": 4.8195807906606525e-05, "loss": 0.1194, "step": 3500 }, { "epoch": 0.94, "learning_rate": 4.8169275669938976e-05, "loss": 0.1281, "step": 3550 }, { "epoch": 0.95, "learning_rate": 4.814274343327143e-05, "loss": 0.1142, "step": 3600 }, { "epoch": 0.97, "learning_rate": 4.811621119660388e-05, "loss": 0.1157, "step": 3650 }, { "epoch": 0.98, "learning_rate": 4.808967895993632e-05, "loss": 0.1176, "step": 3700 }, { "epoch": 0.99, "learning_rate": 4.806314672326878e-05, "loss": 0.1163, "step": 3750 }, { "epoch": 1.01, "learning_rate": 4.803661448660122e-05, "loss": 0.0801, "step": 3800 }, { "epoch": 1.02, "learning_rate": 4.801008224993367e-05, "loss": 0.0941, "step": 3850 }, { "epoch": 1.03, "learning_rate": 4.7983550013266124e-05, "loss": 0.0872, "step": 3900 }, { "epoch": 1.05, "learning_rate": 4.795701777659857e-05, "loss": 0.0808, "step": 3950 }, { "epoch": 1.06, "learning_rate": 4.793048553993102e-05, "loss": 0.0807, "step": 4000 }, { "epoch": 1.07, "learning_rate": 4.790395330326346e-05, "loss": 0.0817, "step": 4050 }, { "epoch": 1.09, "learning_rate": 4.787742106659592e-05, "loss": 0.0794, "step": 4100 }, { "epoch": 1.1, "learning_rate": 4.7850888829928364e-05, "loss": 0.0756, "step": 4150 }, { "epoch": 1.11, "learning_rate": 4.7824356593260814e-05, "loss": 0.0741, "step": 4200 }, { "epoch": 1.13, "learning_rate": 4.7797824356593265e-05, "loss": 0.0889, "step": 4250 }, { "epoch": 1.14, "learning_rate": 4.7771292119925716e-05, "loss": 0.0984, "step": 4300 }, { "epoch": 1.15, "learning_rate": 4.774475988325816e-05, "loss": 0.0863, "step": 4350 }, { "epoch": 1.17, "learning_rate": 4.771822764659061e-05, "loss": 0.0666, "step": 4400 }, { "epoch": 1.18, "learning_rate": 4.769169540992306e-05, "loss": 0.0782, "step": 4450 }, { "epoch": 1.19, "learning_rate": 4.7665163173255505e-05, "loss": 0.0726, "step": 4500 }, { "epoch": 1.21, "learning_rate": 4.7638630936587956e-05, "loss": 0.0681, "step": 4550 }, { "epoch": 1.22, "learning_rate": 4.7612098699920406e-05, "loss": 0.0919, "step": 4600 }, { "epoch": 1.23, "learning_rate": 4.758556646325286e-05, "loss": 0.0793, "step": 4650 }, { "epoch": 1.25, "learning_rate": 4.75590342265853e-05, "loss": 0.0651, "step": 4700 }, { "epoch": 1.26, "learning_rate": 4.753250198991776e-05, "loss": 0.0801, "step": 4750 }, { "epoch": 1.27, "learning_rate": 4.75059697532502e-05, "loss": 0.0895, "step": 4800 }, { "epoch": 1.29, "learning_rate": 4.7479437516582646e-05, "loss": 0.0759, "step": 4850 }, { "epoch": 1.3, "learning_rate": 4.74529052799151e-05, "loss": 0.0824, "step": 4900 }, { "epoch": 1.31, "learning_rate": 4.742637304324755e-05, "loss": 0.0837, "step": 4950 }, { "epoch": 1.33, "learning_rate": 4.739984080658e-05, "loss": 0.0697, "step": 5000 }, { "epoch": 1.34, "learning_rate": 4.737330856991244e-05, "loss": 0.0673, "step": 5050 }, { "epoch": 1.35, "learning_rate": 4.73467763332449e-05, "loss": 0.0659, "step": 5100 }, { "epoch": 1.36, "learning_rate": 4.732024409657734e-05, "loss": 0.0615, "step": 5150 }, { "epoch": 1.38, "learning_rate": 4.7293711859909794e-05, "loss": 0.0695, "step": 5200 }, { "epoch": 1.39, "learning_rate": 4.7267179623242244e-05, "loss": 0.0734, "step": 5250 }, { "epoch": 1.4, "learning_rate": 4.724064738657469e-05, "loss": 0.0594, "step": 5300 }, { "epoch": 1.42, "learning_rate": 4.721411514990714e-05, "loss": 0.0555, "step": 5350 }, { "epoch": 1.43, "learning_rate": 4.718758291323959e-05, "loss": 0.067, "step": 5400 }, { "epoch": 1.44, "learning_rate": 4.716105067657204e-05, "loss": 0.0777, "step": 5450 }, { "epoch": 1.46, "learning_rate": 4.7134518439904484e-05, "loss": 0.0758, "step": 5500 }, { "epoch": 1.47, "learning_rate": 4.7107986203236935e-05, "loss": 0.0797, "step": 5550 }, { "epoch": 1.48, "learning_rate": 4.7081453966569386e-05, "loss": 0.0721, "step": 5600 }, { "epoch": 1.5, "learning_rate": 4.7054921729901836e-05, "loss": 0.0688, "step": 5650 }, { "epoch": 1.51, "learning_rate": 4.702838949323428e-05, "loss": 0.0672, "step": 5700 }, { "epoch": 1.52, "learning_rate": 4.700185725656673e-05, "loss": 0.0937, "step": 5750 }, { "epoch": 1.54, "learning_rate": 4.697532501989918e-05, "loss": 0.0645, "step": 5800 }, { "epoch": 1.55, "learning_rate": 4.6948792783231625e-05, "loss": 0.0617, "step": 5850 }, { "epoch": 1.56, "learning_rate": 4.6922260546564076e-05, "loss": 0.066, "step": 5900 }, { "epoch": 1.58, "learning_rate": 4.689572830989653e-05, "loss": 0.0692, "step": 5950 }, { "epoch": 1.59, "learning_rate": 4.686919607322898e-05, "loss": 0.0451, "step": 6000 }, { "epoch": 1.6, "learning_rate": 4.684266383656142e-05, "loss": 0.0428, "step": 6050 }, { "epoch": 1.62, "learning_rate": 4.681613159989388e-05, "loss": 0.0469, "step": 6100 }, { "epoch": 1.63, "learning_rate": 4.678959936322632e-05, "loss": 0.0662, "step": 6150 }, { "epoch": 1.64, "learning_rate": 4.676306712655877e-05, "loss": 0.0629, "step": 6200 }, { "epoch": 1.66, "learning_rate": 4.673653488989122e-05, "loss": 0.0519, "step": 6250 }, { "epoch": 1.67, "learning_rate": 4.671000265322367e-05, "loss": 0.0619, "step": 6300 }, { "epoch": 1.68, "learning_rate": 4.668347041655612e-05, "loss": 0.0537, "step": 6350 }, { "epoch": 1.7, "learning_rate": 4.665693817988856e-05, "loss": 0.0473, "step": 6400 }, { "epoch": 1.71, "learning_rate": 4.663040594322102e-05, "loss": 0.0469, "step": 6450 }, { "epoch": 1.72, "learning_rate": 4.6603873706553464e-05, "loss": 0.0578, "step": 6500 }, { "epoch": 1.74, "learning_rate": 4.6577341469885914e-05, "loss": 0.0493, "step": 6550 }, { "epoch": 1.75, "learning_rate": 4.6550809233218365e-05, "loss": 0.0557, "step": 6600 }, { "epoch": 1.76, "learning_rate": 4.6524276996550816e-05, "loss": 0.0391, "step": 6650 }, { "epoch": 1.78, "learning_rate": 4.649774475988326e-05, "loss": 0.0474, "step": 6700 }, { "epoch": 1.79, "learning_rate": 4.647121252321571e-05, "loss": 0.0612, "step": 6750 }, { "epoch": 1.8, "learning_rate": 4.644468028654816e-05, "loss": 0.0555, "step": 6800 }, { "epoch": 1.82, "learning_rate": 4.6418148049880605e-05, "loss": 0.0548, "step": 6850 }, { "epoch": 1.83, "learning_rate": 4.6391615813213055e-05, "loss": 0.0653, "step": 6900 }, { "epoch": 1.84, "learning_rate": 4.6365083576545506e-05, "loss": 0.0487, "step": 6950 }, { "epoch": 1.86, "learning_rate": 4.633855133987796e-05, "loss": 0.0538, "step": 7000 }, { "epoch": 1.87, "learning_rate": 4.63120191032104e-05, "loss": 0.0461, "step": 7050 }, { "epoch": 1.88, "learning_rate": 4.628548686654285e-05, "loss": 0.0478, "step": 7100 }, { "epoch": 1.9, "learning_rate": 4.62589546298753e-05, "loss": 0.0598, "step": 7150 }, { "epoch": 1.91, "learning_rate": 4.6232422393207746e-05, "loss": 0.0505, "step": 7200 }, { "epoch": 1.92, "learning_rate": 4.6205890156540196e-05, "loss": 0.0457, "step": 7250 }, { "epoch": 1.93, "learning_rate": 4.617935791987265e-05, "loss": 0.0428, "step": 7300 }, { "epoch": 1.95, "learning_rate": 4.61528256832051e-05, "loss": 0.0377, "step": 7350 }, { "epoch": 1.96, "learning_rate": 4.612629344653754e-05, "loss": 0.0535, "step": 7400 }, { "epoch": 1.97, "learning_rate": 4.609976120987e-05, "loss": 0.0543, "step": 7450 }, { "epoch": 1.99, "learning_rate": 4.607322897320244e-05, "loss": 0.0303, "step": 7500 }, { "epoch": 2.0, "learning_rate": 4.6046696736534894e-05, "loss": 0.0405, "step": 7550 }, { "epoch": 2.01, "learning_rate": 4.602016449986734e-05, "loss": 0.0222, "step": 7600 }, { "epoch": 2.03, "learning_rate": 4.599363226319979e-05, "loss": 0.0254, "step": 7650 }, { "epoch": 2.04, "learning_rate": 4.596710002653224e-05, "loss": 0.0207, "step": 7700 }, { "epoch": 2.05, "learning_rate": 4.594056778986468e-05, "loss": 0.0226, "step": 7750 }, { "epoch": 2.07, "learning_rate": 4.591403555319714e-05, "loss": 0.0259, "step": 7800 }, { "epoch": 2.08, "learning_rate": 4.5887503316529584e-05, "loss": 0.0246, "step": 7850 }, { "epoch": 2.09, "learning_rate": 4.5860971079862035e-05, "loss": 0.0199, "step": 7900 }, { "epoch": 2.11, "learning_rate": 4.5834438843194485e-05, "loss": 0.0295, "step": 7950 }, { "epoch": 2.12, "learning_rate": 4.5807906606526936e-05, "loss": 0.0288, "step": 8000 }, { "epoch": 2.13, "learning_rate": 4.578137436985938e-05, "loss": 0.0219, "step": 8050 }, { "epoch": 2.15, "learning_rate": 4.575484213319183e-05, "loss": 0.023, "step": 8100 }, { "epoch": 2.16, "learning_rate": 4.572830989652428e-05, "loss": 0.0227, "step": 8150 }, { "epoch": 2.17, "learning_rate": 4.5701777659856725e-05, "loss": 0.0317, "step": 8200 }, { "epoch": 2.19, "learning_rate": 4.5675245423189176e-05, "loss": 0.0222, "step": 8250 }, { "epoch": 2.2, "learning_rate": 4.5648713186521626e-05, "loss": 0.0264, "step": 8300 }, { "epoch": 2.21, "learning_rate": 4.562218094985408e-05, "loss": 0.0267, "step": 8350 }, { "epoch": 2.23, "learning_rate": 4.559564871318652e-05, "loss": 0.0277, "step": 8400 }, { "epoch": 2.24, "learning_rate": 4.556911647651897e-05, "loss": 0.0247, "step": 8450 }, { "epoch": 2.25, "learning_rate": 4.554258423985142e-05, "loss": 0.028, "step": 8500 }, { "epoch": 2.27, "learning_rate": 4.551605200318387e-05, "loss": 0.0328, "step": 8550 }, { "epoch": 2.28, "learning_rate": 4.548951976651632e-05, "loss": 0.0248, "step": 8600 }, { "epoch": 2.29, "learning_rate": 4.546298752984877e-05, "loss": 0.0237, "step": 8650 }, { "epoch": 2.31, "learning_rate": 4.543645529318122e-05, "loss": 0.0235, "step": 8700 }, { "epoch": 2.32, "learning_rate": 4.540992305651366e-05, "loss": 0.0273, "step": 8750 }, { "epoch": 2.33, "learning_rate": 4.538339081984612e-05, "loss": 0.0197, "step": 8800 }, { "epoch": 2.35, "learning_rate": 4.5356858583178563e-05, "loss": 0.0269, "step": 8850 }, { "epoch": 2.36, "learning_rate": 4.5330326346511014e-05, "loss": 0.0255, "step": 8900 }, { "epoch": 2.37, "learning_rate": 4.5303794109843465e-05, "loss": 0.0235, "step": 8950 }, { "epoch": 2.39, "learning_rate": 4.5277261873175915e-05, "loss": 0.0229, "step": 9000 }, { "epoch": 2.4, "learning_rate": 4.525072963650836e-05, "loss": 0.0197, "step": 9050 }, { "epoch": 2.41, "learning_rate": 4.52241973998408e-05, "loss": 0.022, "step": 9100 }, { "epoch": 2.43, "learning_rate": 4.519766516317326e-05, "loss": 0.0336, "step": 9150 }, { "epoch": 2.44, "learning_rate": 4.5171132926505705e-05, "loss": 0.0226, "step": 9200 }, { "epoch": 2.45, "learning_rate": 4.5144600689838155e-05, "loss": 0.0244, "step": 9250 }, { "epoch": 2.46, "learning_rate": 4.5118068453170606e-05, "loss": 0.0231, "step": 9300 }, { "epoch": 2.48, "learning_rate": 4.5091536216503057e-05, "loss": 0.0252, "step": 9350 }, { "epoch": 2.49, "learning_rate": 4.50650039798355e-05, "loss": 0.0267, "step": 9400 }, { "epoch": 2.5, "learning_rate": 4.503847174316795e-05, "loss": 0.0235, "step": 9450 }, { "epoch": 2.52, "learning_rate": 4.50119395065004e-05, "loss": 0.0249, "step": 9500 }, { "epoch": 2.53, "learning_rate": 4.4985407269832846e-05, "loss": 0.0264, "step": 9550 }, { "epoch": 2.54, "learning_rate": 4.4958875033165296e-05, "loss": 0.0328, "step": 9600 }, { "epoch": 2.56, "learning_rate": 4.493234279649775e-05, "loss": 0.0283, "step": 9650 }, { "epoch": 2.57, "learning_rate": 4.49058105598302e-05, "loss": 0.0223, "step": 9700 }, { "epoch": 2.58, "learning_rate": 4.487927832316264e-05, "loss": 0.0264, "step": 9750 }, { "epoch": 2.6, "learning_rate": 4.485274608649509e-05, "loss": 0.0292, "step": 9800 }, { "epoch": 2.61, "learning_rate": 4.482621384982754e-05, "loss": 0.0227, "step": 9850 }, { "epoch": 2.62, "learning_rate": 4.4799681613159993e-05, "loss": 0.0235, "step": 9900 }, { "epoch": 2.64, "learning_rate": 4.477314937649244e-05, "loss": 0.0231, "step": 9950 }, { "epoch": 2.65, "learning_rate": 4.474661713982489e-05, "loss": 0.0224, "step": 10000 }, { "epoch": 2.66, "learning_rate": 4.472008490315734e-05, "loss": 0.0222, "step": 10050 }, { "epoch": 2.68, "learning_rate": 4.469355266648978e-05, "loss": 0.0289, "step": 10100 }, { "epoch": 2.69, "learning_rate": 4.466702042982224e-05, "loss": 0.0307, "step": 10150 }, { "epoch": 2.7, "learning_rate": 4.4640488193154684e-05, "loss": 0.0247, "step": 10200 }, { "epoch": 2.72, "learning_rate": 4.4613955956487135e-05, "loss": 0.0238, "step": 10250 }, { "epoch": 2.73, "learning_rate": 4.4587423719819585e-05, "loss": 0.0268, "step": 10300 }, { "epoch": 2.74, "learning_rate": 4.4560891483152036e-05, "loss": 0.0284, "step": 10350 }, { "epoch": 2.76, "learning_rate": 4.453435924648448e-05, "loss": 0.0252, "step": 10400 }, { "epoch": 2.77, "learning_rate": 4.4507827009816924e-05, "loss": 0.0252, "step": 10450 }, { "epoch": 2.78, "learning_rate": 4.448129477314938e-05, "loss": 0.0273, "step": 10500 }, { "epoch": 2.8, "learning_rate": 4.4454762536481825e-05, "loss": 0.0229, "step": 10550 }, { "epoch": 2.81, "learning_rate": 4.4428230299814276e-05, "loss": 0.0239, "step": 10600 }, { "epoch": 2.82, "learning_rate": 4.4401698063146726e-05, "loss": 0.0274, "step": 10650 }, { "epoch": 2.84, "learning_rate": 4.437516582647918e-05, "loss": 0.0227, "step": 10700 }, { "epoch": 2.85, "learning_rate": 4.434863358981162e-05, "loss": 0.0294, "step": 10750 }, { "epoch": 2.86, "learning_rate": 4.432210135314407e-05, "loss": 0.0267, "step": 10800 }, { "epoch": 2.88, "learning_rate": 4.429556911647652e-05, "loss": 0.0313, "step": 10850 }, { "epoch": 2.89, "learning_rate": 4.426903687980897e-05, "loss": 0.0264, "step": 10900 }, { "epoch": 2.9, "learning_rate": 4.424250464314142e-05, "loss": 0.0213, "step": 10950 }, { "epoch": 2.92, "learning_rate": 4.421597240647387e-05, "loss": 0.0252, "step": 11000 }, { "epoch": 2.93, "learning_rate": 4.418944016980632e-05, "loss": 0.0257, "step": 11050 }, { "epoch": 2.94, "learning_rate": 4.416290793313876e-05, "loss": 0.0233, "step": 11100 }, { "epoch": 2.96, "learning_rate": 4.413637569647122e-05, "loss": 0.031, "step": 11150 }, { "epoch": 2.97, "learning_rate": 4.410984345980366e-05, "loss": 0.0233, "step": 11200 }, { "epoch": 2.98, "learning_rate": 4.4083311223136114e-05, "loss": 0.0224, "step": 11250 }, { "epoch": 2.99, "learning_rate": 4.405677898646856e-05, "loss": 0.0287, "step": 11300 }, { "epoch": 3.01, "learning_rate": 4.4030246749801015e-05, "loss": 0.0179, "step": 11350 }, { "epoch": 3.02, "learning_rate": 4.400371451313346e-05, "loss": 0.0141, "step": 11400 }, { "epoch": 3.03, "learning_rate": 4.39771822764659e-05, "loss": 0.0133, "step": 11450 }, { "epoch": 3.05, "learning_rate": 4.395065003979836e-05, "loss": 0.015, "step": 11500 }, { "epoch": 3.06, "learning_rate": 4.3924117803130804e-05, "loss": 0.015, "step": 11550 }, { "epoch": 3.07, "learning_rate": 4.3897585566463255e-05, "loss": 0.015, "step": 11600 }, { "epoch": 3.09, "learning_rate": 4.3871053329795706e-05, "loss": 0.0178, "step": 11650 }, { "epoch": 3.1, "learning_rate": 4.3844521093128156e-05, "loss": 0.0141, "step": 11700 }, { "epoch": 3.11, "learning_rate": 4.38179888564606e-05, "loss": 0.014, "step": 11750 }, { "epoch": 3.13, "learning_rate": 4.379145661979305e-05, "loss": 0.0142, "step": 11800 }, { "epoch": 3.14, "learning_rate": 4.37649243831255e-05, "loss": 0.015, "step": 11850 }, { "epoch": 3.15, "learning_rate": 4.3738392146457945e-05, "loss": 0.0163, "step": 11900 }, { "epoch": 3.17, "learning_rate": 4.3711859909790396e-05, "loss": 0.0145, "step": 11950 }, { "epoch": 3.18, "learning_rate": 4.368532767312285e-05, "loss": 0.0139, "step": 12000 }, { "epoch": 3.19, "learning_rate": 4.36587954364553e-05, "loss": 0.0157, "step": 12050 }, { "epoch": 3.21, "learning_rate": 4.363226319978774e-05, "loss": 0.0185, "step": 12100 }, { "epoch": 3.22, "learning_rate": 4.360573096312019e-05, "loss": 0.015, "step": 12150 }, { "epoch": 3.23, "learning_rate": 4.357919872645264e-05, "loss": 0.0188, "step": 12200 }, { "epoch": 3.25, "learning_rate": 4.355266648978509e-05, "loss": 0.0157, "step": 12250 }, { "epoch": 3.26, "learning_rate": 4.352613425311754e-05, "loss": 0.0149, "step": 12300 }, { "epoch": 3.27, "learning_rate": 4.349960201644999e-05, "loss": 0.0154, "step": 12350 }, { "epoch": 3.29, "learning_rate": 4.347306977978244e-05, "loss": 0.0158, "step": 12400 }, { "epoch": 3.3, "learning_rate": 4.344653754311488e-05, "loss": 0.0153, "step": 12450 }, { "epoch": 3.31, "learning_rate": 4.342000530644734e-05, "loss": 0.0169, "step": 12500 }, { "epoch": 3.33, "learning_rate": 4.3393473069779784e-05, "loss": 0.0165, "step": 12550 }, { "epoch": 3.34, "learning_rate": 4.3366940833112234e-05, "loss": 0.0157, "step": 12600 }, { "epoch": 3.35, "learning_rate": 4.334040859644468e-05, "loss": 0.0169, "step": 12650 }, { "epoch": 3.37, "learning_rate": 4.3313876359777136e-05, "loss": 0.0167, "step": 12700 }, { "epoch": 3.38, "learning_rate": 4.328734412310958e-05, "loss": 0.0163, "step": 12750 }, { "epoch": 3.39, "learning_rate": 4.3260811886442023e-05, "loss": 0.0157, "step": 12800 }, { "epoch": 3.41, "learning_rate": 4.323427964977448e-05, "loss": 0.0165, "step": 12850 }, { "epoch": 3.42, "learning_rate": 4.3207747413106925e-05, "loss": 0.0166, "step": 12900 }, { "epoch": 3.43, "learning_rate": 4.3181215176439375e-05, "loss": 0.0172, "step": 12950 }, { "epoch": 3.45, "learning_rate": 4.3154682939771826e-05, "loss": 0.0183, "step": 13000 }, { "epoch": 3.46, "learning_rate": 4.312815070310428e-05, "loss": 0.0164, "step": 13050 }, { "epoch": 3.47, "learning_rate": 4.310161846643672e-05, "loss": 0.018, "step": 13100 }, { "epoch": 3.49, "learning_rate": 4.307508622976917e-05, "loss": 0.0185, "step": 13150 }, { "epoch": 3.5, "learning_rate": 4.304855399310162e-05, "loss": 0.0148, "step": 13200 }, { "epoch": 3.51, "learning_rate": 4.302202175643407e-05, "loss": 0.0179, "step": 13250 }, { "epoch": 3.53, "learning_rate": 4.2995489519766517e-05, "loss": 0.0216, "step": 13300 }, { "epoch": 3.54, "learning_rate": 4.296895728309897e-05, "loss": 0.0141, "step": 13350 }, { "epoch": 3.55, "learning_rate": 4.294242504643142e-05, "loss": 0.0202, "step": 13400 }, { "epoch": 3.56, "learning_rate": 4.291589280976386e-05, "loss": 0.0179, "step": 13450 }, { "epoch": 3.58, "learning_rate": 4.288936057309631e-05, "loss": 0.0166, "step": 13500 }, { "epoch": 3.59, "learning_rate": 4.286282833642876e-05, "loss": 0.0175, "step": 13550 }, { "epoch": 3.6, "learning_rate": 4.2836296099761214e-05, "loss": 0.0149, "step": 13600 }, { "epoch": 3.62, "learning_rate": 4.280976386309366e-05, "loss": 0.0188, "step": 13650 }, { "epoch": 3.63, "learning_rate": 4.2783231626426115e-05, "loss": 0.0167, "step": 13700 }, { "epoch": 3.64, "learning_rate": 4.275669938975856e-05, "loss": 0.015, "step": 13750 }, { "epoch": 3.66, "learning_rate": 4.2730167153091e-05, "loss": 0.0153, "step": 13800 }, { "epoch": 3.67, "learning_rate": 4.270363491642346e-05, "loss": 0.0174, "step": 13850 }, { "epoch": 3.68, "learning_rate": 4.2677102679755904e-05, "loss": 0.016, "step": 13900 }, { "epoch": 3.7, "learning_rate": 4.2650570443088355e-05, "loss": 0.0151, "step": 13950 }, { "epoch": 3.71, "learning_rate": 4.2624038206420806e-05, "loss": 0.0168, "step": 14000 }, { "epoch": 3.72, "learning_rate": 4.2597505969753256e-05, "loss": 0.0173, "step": 14050 }, { "epoch": 3.74, "learning_rate": 4.25709737330857e-05, "loss": 0.0191, "step": 14100 }, { "epoch": 3.75, "learning_rate": 4.254444149641815e-05, "loss": 0.0179, "step": 14150 }, { "epoch": 3.76, "learning_rate": 4.25179092597506e-05, "loss": 0.0186, "step": 14200 }, { "epoch": 3.78, "learning_rate": 4.2491377023083045e-05, "loss": 0.018, "step": 14250 }, { "epoch": 3.79, "learning_rate": 4.2464844786415496e-05, "loss": 0.0172, "step": 14300 }, { "epoch": 3.8, "learning_rate": 4.2438312549747947e-05, "loss": 0.017, "step": 14350 }, { "epoch": 3.82, "learning_rate": 4.24117803130804e-05, "loss": 0.0166, "step": 14400 }, { "epoch": 3.83, "learning_rate": 4.238524807641284e-05, "loss": 0.0154, "step": 14450 }, { "epoch": 3.84, "learning_rate": 4.235871583974529e-05, "loss": 0.0195, "step": 14500 }, { "epoch": 3.86, "learning_rate": 4.233218360307774e-05, "loss": 0.0188, "step": 14550 }, { "epoch": 3.87, "learning_rate": 4.230565136641019e-05, "loss": 0.0192, "step": 14600 }, { "epoch": 3.88, "learning_rate": 4.227911912974264e-05, "loss": 0.0166, "step": 14650 }, { "epoch": 3.9, "learning_rate": 4.225258689307509e-05, "loss": 0.0156, "step": 14700 }, { "epoch": 3.91, "learning_rate": 4.222605465640754e-05, "loss": 0.0178, "step": 14750 }, { "epoch": 3.92, "learning_rate": 4.219952241973998e-05, "loss": 0.0173, "step": 14800 }, { "epoch": 3.94, "learning_rate": 4.217299018307243e-05, "loss": 0.019, "step": 14850 }, { "epoch": 3.95, "learning_rate": 4.2146457946404884e-05, "loss": 0.0171, "step": 14900 }, { "epoch": 3.96, "learning_rate": 4.2119925709737334e-05, "loss": 0.0174, "step": 14950 }, { "epoch": 3.98, "learning_rate": 4.209339347306978e-05, "loss": 0.0195, "step": 15000 }, { "epoch": 3.99, "learning_rate": 4.2066861236402236e-05, "loss": 0.0177, "step": 15050 }, { "epoch": 4.0, "learning_rate": 4.204032899973468e-05, "loss": 0.0176, "step": 15100 }, { "epoch": 4.02, "learning_rate": 4.201379676306712e-05, "loss": 0.0122, "step": 15150 }, { "epoch": 4.03, "learning_rate": 4.198726452639958e-05, "loss": 0.0143, "step": 15200 }, { "epoch": 4.04, "learning_rate": 4.1960732289732025e-05, "loss": 0.0126, "step": 15250 }, { "epoch": 4.06, "learning_rate": 4.1934200053064475e-05, "loss": 0.012, "step": 15300 }, { "epoch": 4.07, "learning_rate": 4.1907667816396926e-05, "loss": 0.0128, "step": 15350 }, { "epoch": 4.08, "learning_rate": 4.188113557972938e-05, "loss": 0.0134, "step": 15400 }, { "epoch": 4.09, "learning_rate": 4.185460334306182e-05, "loss": 0.0117, "step": 15450 }, { "epoch": 4.11, "learning_rate": 4.182807110639427e-05, "loss": 0.0108, "step": 15500 }, { "epoch": 4.12, "learning_rate": 4.180153886972672e-05, "loss": 0.0127, "step": 15550 }, { "epoch": 4.13, "learning_rate": 4.177500663305917e-05, "loss": 0.0115, "step": 15600 }, { "epoch": 4.15, "learning_rate": 4.1748474396391616e-05, "loss": 0.0111, "step": 15650 }, { "epoch": 4.16, "learning_rate": 4.172194215972407e-05, "loss": 0.0113, "step": 15700 }, { "epoch": 4.17, "learning_rate": 4.169540992305652e-05, "loss": 0.0125, "step": 15750 }, { "epoch": 4.19, "learning_rate": 4.166887768638896e-05, "loss": 0.0121, "step": 15800 }, { "epoch": 4.2, "learning_rate": 4.164234544972141e-05, "loss": 0.0123, "step": 15850 }, { "epoch": 4.21, "learning_rate": 4.161581321305386e-05, "loss": 0.0123, "step": 15900 }, { "epoch": 4.23, "learning_rate": 4.1589280976386314e-05, "loss": 0.0124, "step": 15950 }, { "epoch": 4.24, "learning_rate": 4.156274873971876e-05, "loss": 0.0116, "step": 16000 }, { "epoch": 4.25, "learning_rate": 4.1536216503051215e-05, "loss": 0.0131, "step": 16050 }, { "epoch": 4.27, "learning_rate": 4.150968426638366e-05, "loss": 0.0138, "step": 16100 }, { "epoch": 4.28, "learning_rate": 4.14831520297161e-05, "loss": 0.0123, "step": 16150 }, { "epoch": 4.29, "learning_rate": 4.145661979304855e-05, "loss": 0.0131, "step": 16200 }, { "epoch": 4.31, "learning_rate": 4.1430087556381004e-05, "loss": 0.0136, "step": 16250 }, { "epoch": 4.32, "learning_rate": 4.1403555319713455e-05, "loss": 0.0136, "step": 16300 }, { "epoch": 4.33, "learning_rate": 4.13770230830459e-05, "loss": 0.012, "step": 16350 }, { "epoch": 4.35, "learning_rate": 4.1350490846378356e-05, "loss": 0.0117, "step": 16400 }, { "epoch": 4.36, "learning_rate": 4.13239586097108e-05, "loss": 0.0127, "step": 16450 }, { "epoch": 4.37, "learning_rate": 4.129742637304325e-05, "loss": 0.0133, "step": 16500 }, { "epoch": 4.39, "learning_rate": 4.12708941363757e-05, "loss": 0.0147, "step": 16550 }, { "epoch": 4.4, "learning_rate": 4.1244361899708145e-05, "loss": 0.0145, "step": 16600 }, { "epoch": 4.41, "learning_rate": 4.1217829663040596e-05, "loss": 0.0134, "step": 16650 }, { "epoch": 4.43, "learning_rate": 4.1191297426373046e-05, "loss": 0.0135, "step": 16700 }, { "epoch": 4.44, "learning_rate": 4.11647651897055e-05, "loss": 0.0131, "step": 16750 }, { "epoch": 4.45, "learning_rate": 4.113823295303794e-05, "loss": 0.0138, "step": 16800 }, { "epoch": 4.47, "learning_rate": 4.111170071637039e-05, "loss": 0.0135, "step": 16850 }, { "epoch": 4.48, "learning_rate": 4.108516847970284e-05, "loss": 0.0133, "step": 16900 }, { "epoch": 4.49, "learning_rate": 4.105863624303529e-05, "loss": 0.0133, "step": 16950 }, { "epoch": 4.51, "learning_rate": 4.103210400636774e-05, "loss": 0.0148, "step": 17000 }, { "epoch": 4.52, "learning_rate": 4.100557176970019e-05, "loss": 0.0146, "step": 17050 }, { "epoch": 4.53, "learning_rate": 4.097903953303264e-05, "loss": 0.0146, "step": 17100 }, { "epoch": 4.55, "learning_rate": 4.095250729636508e-05, "loss": 0.0143, "step": 17150 }, { "epoch": 4.56, "learning_rate": 4.092597505969753e-05, "loss": 0.0136, "step": 17200 }, { "epoch": 4.57, "learning_rate": 4.0899442823029983e-05, "loss": 0.014, "step": 17250 }, { "epoch": 4.59, "learning_rate": 4.0872910586362434e-05, "loss": 0.0135, "step": 17300 }, { "epoch": 4.6, "learning_rate": 4.084637834969488e-05, "loss": 0.0138, "step": 17350 }, { "epoch": 4.61, "learning_rate": 4.0819846113027335e-05, "loss": 0.0143, "step": 17400 }, { "epoch": 4.62, "learning_rate": 4.079331387635978e-05, "loss": 0.0138, "step": 17450 }, { "epoch": 4.64, "learning_rate": 4.076678163969222e-05, "loss": 0.0141, "step": 17500 }, { "epoch": 4.65, "learning_rate": 4.074024940302468e-05, "loss": 0.0127, "step": 17550 }, { "epoch": 4.66, "learning_rate": 4.0713717166357124e-05, "loss": 0.012, "step": 17600 }, { "epoch": 4.68, "learning_rate": 4.0687184929689575e-05, "loss": 0.0143, "step": 17650 }, { "epoch": 4.69, "learning_rate": 4.066065269302202e-05, "loss": 0.0142, "step": 17700 }, { "epoch": 4.7, "learning_rate": 4.0634120456354476e-05, "loss": 0.0151, "step": 17750 }, { "epoch": 4.72, "learning_rate": 4.060758821968692e-05, "loss": 0.0132, "step": 17800 }, { "epoch": 4.73, "learning_rate": 4.058105598301937e-05, "loss": 0.013, "step": 17850 }, { "epoch": 4.74, "learning_rate": 4.055452374635182e-05, "loss": 0.0145, "step": 17900 }, { "epoch": 4.76, "learning_rate": 4.052799150968427e-05, "loss": 0.0146, "step": 17950 }, { "epoch": 4.77, "learning_rate": 4.0501459273016716e-05, "loss": 0.0163, "step": 18000 }, { "epoch": 4.78, "learning_rate": 4.047492703634917e-05, "loss": 0.0144, "step": 18050 }, { "epoch": 4.8, "learning_rate": 4.044839479968162e-05, "loss": 0.0152, "step": 18100 }, { "epoch": 4.81, "learning_rate": 4.042186256301406e-05, "loss": 0.0163, "step": 18150 }, { "epoch": 4.82, "learning_rate": 4.039533032634651e-05, "loss": 0.0162, "step": 18200 }, { "epoch": 4.84, "learning_rate": 4.036879808967896e-05, "loss": 0.0163, "step": 18250 }, { "epoch": 4.85, "learning_rate": 4.0342265853011413e-05, "loss": 0.0153, "step": 18300 }, { "epoch": 4.86, "learning_rate": 4.031573361634386e-05, "loss": 0.0126, "step": 18350 }, { "epoch": 4.88, "learning_rate": 4.028920137967631e-05, "loss": 0.0139, "step": 18400 }, { "epoch": 4.89, "learning_rate": 4.026266914300876e-05, "loss": 0.0134, "step": 18450 }, { "epoch": 4.9, "learning_rate": 4.02361369063412e-05, "loss": 0.0136, "step": 18500 }, { "epoch": 4.92, "learning_rate": 4.020960466967365e-05, "loss": 0.0133, "step": 18550 }, { "epoch": 4.93, "learning_rate": 4.0183072433006104e-05, "loss": 0.0132, "step": 18600 }, { "epoch": 4.94, "learning_rate": 4.0156540196338555e-05, "loss": 0.013, "step": 18650 }, { "epoch": 4.96, "learning_rate": 4.0130007959671e-05, "loss": 0.0129, "step": 18700 }, { "epoch": 4.97, "learning_rate": 4.0103475723003456e-05, "loss": 0.0134, "step": 18750 }, { "epoch": 4.98, "learning_rate": 4.00769434863359e-05, "loss": 0.0139, "step": 18800 }, { "epoch": 5.0, "learning_rate": 4.005041124966835e-05, "loss": 0.0154, "step": 18850 }, { "epoch": 5.01, "learning_rate": 4.00238790130008e-05, "loss": 0.0131, "step": 18900 }, { "epoch": 5.02, "learning_rate": 3.9997346776333245e-05, "loss": 0.0112, "step": 18950 }, { "epoch": 5.04, "learning_rate": 3.9970814539665696e-05, "loss": 0.0109, "step": 19000 }, { "epoch": 5.05, "learning_rate": 3.994428230299814e-05, "loss": 0.0101, "step": 19050 }, { "epoch": 5.06, "learning_rate": 3.99177500663306e-05, "loss": 0.0105, "step": 19100 }, { "epoch": 5.08, "learning_rate": 3.989121782966304e-05, "loss": 0.0101, "step": 19150 }, { "epoch": 5.09, "learning_rate": 3.986468559299549e-05, "loss": 0.0104, "step": 19200 }, { "epoch": 5.1, "learning_rate": 3.983815335632794e-05, "loss": 0.0096, "step": 19250 }, { "epoch": 5.12, "learning_rate": 3.981162111966039e-05, "loss": 0.0108, "step": 19300 }, { "epoch": 5.13, "learning_rate": 3.978508888299284e-05, "loss": 0.0107, "step": 19350 }, { "epoch": 5.14, "learning_rate": 3.975855664632529e-05, "loss": 0.0104, "step": 19400 }, { "epoch": 5.16, "learning_rate": 3.973202440965774e-05, "loss": 0.0112, "step": 19450 }, { "epoch": 5.17, "learning_rate": 3.970549217299018e-05, "loss": 0.0113, "step": 19500 }, { "epoch": 5.18, "learning_rate": 3.967895993632263e-05, "loss": 0.0112, "step": 19550 }, { "epoch": 5.19, "learning_rate": 3.965242769965508e-05, "loss": 0.0091, "step": 19600 }, { "epoch": 5.21, "learning_rate": 3.9625895462987534e-05, "loss": 0.01, "step": 19650 }, { "epoch": 5.22, "learning_rate": 3.959936322631998e-05, "loss": 0.0099, "step": 19700 }, { "epoch": 5.23, "learning_rate": 3.9572830989652435e-05, "loss": 0.0113, "step": 19750 }, { "epoch": 5.25, "learning_rate": 3.954629875298488e-05, "loss": 0.0119, "step": 19800 }, { "epoch": 5.26, "learning_rate": 3.951976651631732e-05, "loss": 0.0097, "step": 19850 }, { "epoch": 5.27, "learning_rate": 3.9493234279649774e-05, "loss": 0.0115, "step": 19900 }, { "epoch": 5.29, "learning_rate": 3.9466702042982224e-05, "loss": 0.0108, "step": 19950 }, { "epoch": 5.3, "learning_rate": 3.9440169806314675e-05, "loss": 0.0112, "step": 20000 }, { "epoch": 5.31, "learning_rate": 3.941363756964712e-05, "loss": 0.0124, "step": 20050 }, { "epoch": 5.33, "learning_rate": 3.9387105332979576e-05, "loss": 0.0105, "step": 20100 }, { "epoch": 5.34, "learning_rate": 3.936057309631202e-05, "loss": 0.0117, "step": 20150 }, { "epoch": 5.35, "learning_rate": 3.933404085964447e-05, "loss": 0.011, "step": 20200 }, { "epoch": 5.37, "learning_rate": 3.930750862297692e-05, "loss": 0.011, "step": 20250 }, { "epoch": 5.38, "learning_rate": 3.928097638630937e-05, "loss": 0.012, "step": 20300 }, { "epoch": 5.39, "learning_rate": 3.9254444149641816e-05, "loss": 0.0126, "step": 20350 }, { "epoch": 5.41, "learning_rate": 3.922791191297427e-05, "loss": 0.0102, "step": 20400 }, { "epoch": 5.42, "learning_rate": 3.920137967630672e-05, "loss": 0.0109, "step": 20450 }, { "epoch": 5.43, "learning_rate": 3.917484743963916e-05, "loss": 0.0113, "step": 20500 }, { "epoch": 5.45, "learning_rate": 3.914831520297161e-05, "loss": 0.0118, "step": 20550 }, { "epoch": 5.46, "learning_rate": 3.912178296630406e-05, "loss": 0.0111, "step": 20600 }, { "epoch": 5.47, "learning_rate": 3.909525072963651e-05, "loss": 0.0121, "step": 20650 }, { "epoch": 5.49, "learning_rate": 3.906871849296896e-05, "loss": 0.0115, "step": 20700 }, { "epoch": 5.5, "learning_rate": 3.904218625630141e-05, "loss": 0.0124, "step": 20750 }, { "epoch": 5.51, "learning_rate": 3.901565401963386e-05, "loss": 0.0122, "step": 20800 }, { "epoch": 5.53, "learning_rate": 3.89891217829663e-05, "loss": 0.0116, "step": 20850 }, { "epoch": 5.54, "learning_rate": 3.896258954629875e-05, "loss": 0.0106, "step": 20900 }, { "epoch": 5.55, "learning_rate": 3.8936057309631204e-05, "loss": 0.0132, "step": 20950 }, { "epoch": 5.57, "learning_rate": 3.8909525072963654e-05, "loss": 0.0128, "step": 21000 }, { "epoch": 5.58, "learning_rate": 3.88829928362961e-05, "loss": 0.0105, "step": 21050 }, { "epoch": 5.59, "learning_rate": 3.8856460599628556e-05, "loss": 0.0119, "step": 21100 }, { "epoch": 5.61, "learning_rate": 3.8829928362961e-05, "loss": 0.0115, "step": 21150 }, { "epoch": 5.62, "learning_rate": 3.880339612629345e-05, "loss": 0.0115, "step": 21200 }, { "epoch": 5.63, "learning_rate": 3.8776863889625894e-05, "loss": 0.0108, "step": 21250 }, { "epoch": 5.65, "learning_rate": 3.8750331652958345e-05, "loss": 0.011, "step": 21300 }, { "epoch": 5.66, "learning_rate": 3.8723799416290795e-05, "loss": 0.0118, "step": 21350 }, { "epoch": 5.67, "learning_rate": 3.869726717962324e-05, "loss": 0.0124, "step": 21400 }, { "epoch": 5.69, "learning_rate": 3.86707349429557e-05, "loss": 0.0108, "step": 21450 }, { "epoch": 5.7, "learning_rate": 3.864420270628814e-05, "loss": 0.0111, "step": 21500 }, { "epoch": 5.71, "learning_rate": 3.861767046962059e-05, "loss": 0.0108, "step": 21550 }, { "epoch": 5.72, "learning_rate": 3.859113823295304e-05, "loss": 0.012, "step": 21600 }, { "epoch": 5.74, "learning_rate": 3.856460599628549e-05, "loss": 0.0109, "step": 21650 }, { "epoch": 5.75, "learning_rate": 3.8538073759617937e-05, "loss": 0.0125, "step": 21700 }, { "epoch": 5.76, "learning_rate": 3.851154152295039e-05, "loss": 0.0122, "step": 21750 }, { "epoch": 5.78, "learning_rate": 3.848500928628284e-05, "loss": 0.0116, "step": 21800 }, { "epoch": 5.79, "learning_rate": 3.845847704961528e-05, "loss": 0.0121, "step": 21850 }, { "epoch": 5.8, "learning_rate": 3.843194481294773e-05, "loss": 0.0112, "step": 21900 }, { "epoch": 5.82, "learning_rate": 3.840541257628018e-05, "loss": 0.0115, "step": 21950 }, { "epoch": 5.83, "learning_rate": 3.8378880339612634e-05, "loss": 0.0139, "step": 22000 }, { "epoch": 5.84, "learning_rate": 3.835234810294508e-05, "loss": 0.0119, "step": 22050 }, { "epoch": 5.86, "learning_rate": 3.832581586627753e-05, "loss": 0.0105, "step": 22100 }, { "epoch": 5.87, "learning_rate": 3.829928362960998e-05, "loss": 0.0102, "step": 22150 }, { "epoch": 5.88, "learning_rate": 3.827275139294242e-05, "loss": 0.0107, "step": 22200 }, { "epoch": 5.9, "learning_rate": 3.8246219156274873e-05, "loss": 0.0129, "step": 22250 }, { "epoch": 5.91, "learning_rate": 3.8219686919607324e-05, "loss": 0.013, "step": 22300 }, { "epoch": 5.92, "learning_rate": 3.8193154682939775e-05, "loss": 0.0118, "step": 22350 }, { "epoch": 5.94, "learning_rate": 3.816662244627222e-05, "loss": 0.0126, "step": 22400 }, { "epoch": 5.95, "learning_rate": 3.8140090209604676e-05, "loss": 0.0121, "step": 22450 }, { "epoch": 5.96, "learning_rate": 3.811355797293712e-05, "loss": 0.0113, "step": 22500 }, { "epoch": 5.98, "learning_rate": 3.808702573626957e-05, "loss": 0.0117, "step": 22550 }, { "epoch": 5.99, "learning_rate": 3.8060493499602015e-05, "loss": 0.011, "step": 22600 }, { "epoch": 6.0, "learning_rate": 3.803396126293447e-05, "loss": 0.0125, "step": 22650 }, { "epoch": 6.02, "learning_rate": 3.8007429026266916e-05, "loss": 0.0094, "step": 22700 }, { "epoch": 6.03, "learning_rate": 3.798089678959936e-05, "loss": 0.0086, "step": 22750 }, { "epoch": 6.04, "learning_rate": 3.795436455293182e-05, "loss": 0.0093, "step": 22800 }, { "epoch": 6.06, "learning_rate": 3.792783231626426e-05, "loss": 0.009, "step": 22850 }, { "epoch": 6.07, "learning_rate": 3.790130007959671e-05, "loss": 0.0091, "step": 22900 }, { "epoch": 6.08, "learning_rate": 3.787476784292916e-05, "loss": 0.0111, "step": 22950 }, { "epoch": 6.1, "learning_rate": 3.784823560626161e-05, "loss": 0.0098, "step": 23000 }, { "epoch": 6.11, "learning_rate": 3.782170336959406e-05, "loss": 0.0089, "step": 23050 }, { "epoch": 6.12, "learning_rate": 3.779517113292651e-05, "loss": 0.0092, "step": 23100 }, { "epoch": 6.14, "learning_rate": 3.776863889625896e-05, "loss": 0.0107, "step": 23150 }, { "epoch": 6.15, "learning_rate": 3.77421066595914e-05, "loss": 0.0085, "step": 23200 }, { "epoch": 6.16, "learning_rate": 3.771557442292385e-05, "loss": 0.0091, "step": 23250 }, { "epoch": 6.18, "learning_rate": 3.7689042186256304e-05, "loss": 0.0092, "step": 23300 }, { "epoch": 6.19, "learning_rate": 3.7662509949588754e-05, "loss": 0.0091, "step": 23350 }, { "epoch": 6.2, "learning_rate": 3.76359777129212e-05, "loss": 0.0096, "step": 23400 }, { "epoch": 6.22, "learning_rate": 3.760944547625365e-05, "loss": 0.0097, "step": 23450 }, { "epoch": 6.23, "learning_rate": 3.75829132395861e-05, "loss": 0.0089, "step": 23500 }, { "epoch": 6.24, "learning_rate": 3.755638100291855e-05, "loss": 0.0083, "step": 23550 }, { "epoch": 6.25, "learning_rate": 3.7529848766250994e-05, "loss": 0.0106, "step": 23600 }, { "epoch": 6.27, "learning_rate": 3.7503316529583445e-05, "loss": 0.0098, "step": 23650 }, { "epoch": 6.28, "learning_rate": 3.7476784292915895e-05, "loss": 0.0107, "step": 23700 }, { "epoch": 6.29, "learning_rate": 3.745025205624834e-05, "loss": 0.0104, "step": 23750 }, { "epoch": 6.31, "learning_rate": 3.7423719819580797e-05, "loss": 0.0098, "step": 23800 }, { "epoch": 6.32, "learning_rate": 3.739718758291324e-05, "loss": 0.0103, "step": 23850 }, { "epoch": 6.33, "learning_rate": 3.737065534624569e-05, "loss": 0.0097, "step": 23900 }, { "epoch": 6.35, "learning_rate": 3.734412310957814e-05, "loss": 0.0103, "step": 23950 }, { "epoch": 6.36, "learning_rate": 3.731759087291059e-05, "loss": 0.0099, "step": 24000 }, { "epoch": 6.37, "learning_rate": 3.7291058636243036e-05, "loss": 0.0102, "step": 24050 }, { "epoch": 6.39, "learning_rate": 3.726452639957548e-05, "loss": 0.01, "step": 24100 }, { "epoch": 6.4, "learning_rate": 3.723799416290794e-05, "loss": 0.0113, "step": 24150 }, { "epoch": 6.41, "learning_rate": 3.721146192624038e-05, "loss": 0.009, "step": 24200 }, { "epoch": 6.43, "learning_rate": 3.718492968957283e-05, "loss": 0.0103, "step": 24250 }, { "epoch": 6.44, "learning_rate": 3.715839745290528e-05, "loss": 0.0095, "step": 24300 }, { "epoch": 6.45, "learning_rate": 3.7131865216237734e-05, "loss": 0.0116, "step": 24350 }, { "epoch": 6.47, "learning_rate": 3.710533297957018e-05, "loss": 0.0113, "step": 24400 }, { "epoch": 6.48, "learning_rate": 3.707880074290263e-05, "loss": 0.0114, "step": 24450 }, { "epoch": 6.49, "learning_rate": 3.705226850623508e-05, "loss": 0.0106, "step": 24500 }, { "epoch": 6.51, "learning_rate": 3.702573626956752e-05, "loss": 0.0101, "step": 24550 }, { "epoch": 6.52, "learning_rate": 3.699920403289997e-05, "loss": 0.0085, "step": 24600 }, { "epoch": 6.53, "learning_rate": 3.6972671796232424e-05, "loss": 0.0095, "step": 24650 }, { "epoch": 6.55, "learning_rate": 3.6946139559564875e-05, "loss": 0.0112, "step": 24700 }, { "epoch": 6.56, "learning_rate": 3.691960732289732e-05, "loss": 0.0102, "step": 24750 }, { "epoch": 6.57, "learning_rate": 3.689307508622977e-05, "loss": 0.0104, "step": 24800 }, { "epoch": 6.59, "learning_rate": 3.686654284956222e-05, "loss": 0.0102, "step": 24850 }, { "epoch": 6.6, "learning_rate": 3.684001061289467e-05, "loss": 0.0103, "step": 24900 }, { "epoch": 6.61, "learning_rate": 3.6813478376227114e-05, "loss": 0.0096, "step": 24950 }, { "epoch": 6.63, "learning_rate": 3.678694613955957e-05, "loss": 0.0097, "step": 25000 }, { "epoch": 6.64, "learning_rate": 3.6760413902892016e-05, "loss": 0.0112, "step": 25050 }, { "epoch": 6.65, "learning_rate": 3.673388166622446e-05, "loss": 0.0099, "step": 25100 }, { "epoch": 6.67, "learning_rate": 3.670734942955692e-05, "loss": 0.0089, "step": 25150 }, { "epoch": 6.68, "learning_rate": 3.668081719288936e-05, "loss": 0.0111, "step": 25200 }, { "epoch": 6.69, "learning_rate": 3.665428495622181e-05, "loss": 0.012, "step": 25250 }, { "epoch": 6.71, "learning_rate": 3.662775271955426e-05, "loss": 0.0108, "step": 25300 }, { "epoch": 6.72, "learning_rate": 3.660122048288671e-05, "loss": 0.0097, "step": 25350 }, { "epoch": 6.73, "learning_rate": 3.657468824621916e-05, "loss": 0.0101, "step": 25400 }, { "epoch": 6.75, "learning_rate": 3.654815600955161e-05, "loss": 0.0105, "step": 25450 }, { "epoch": 6.76, "learning_rate": 3.652162377288406e-05, "loss": 0.0101, "step": 25500 }, { "epoch": 6.77, "learning_rate": 3.64950915362165e-05, "loss": 0.0097, "step": 25550 }, { "epoch": 6.79, "learning_rate": 3.646855929954895e-05, "loss": 0.0105, "step": 25600 }, { "epoch": 6.8, "learning_rate": 3.64420270628814e-05, "loss": 0.0107, "step": 25650 }, { "epoch": 6.81, "learning_rate": 3.6415494826213854e-05, "loss": 0.0104, "step": 25700 }, { "epoch": 6.82, "learning_rate": 3.63889625895463e-05, "loss": 0.0099, "step": 25750 }, { "epoch": 6.84, "learning_rate": 3.636243035287875e-05, "loss": 0.0106, "step": 25800 }, { "epoch": 6.85, "learning_rate": 3.63358981162112e-05, "loss": 0.0092, "step": 25850 }, { "epoch": 6.86, "learning_rate": 3.630936587954365e-05, "loss": 0.0096, "step": 25900 }, { "epoch": 6.88, "learning_rate": 3.6282833642876094e-05, "loss": 0.0104, "step": 25950 }, { "epoch": 6.89, "learning_rate": 3.6256301406208544e-05, "loss": 0.011, "step": 26000 }, { "epoch": 6.9, "learning_rate": 3.6229769169540995e-05, "loss": 0.0102, "step": 26050 }, { "epoch": 6.92, "learning_rate": 3.620323693287344e-05, "loss": 0.0115, "step": 26100 }, { "epoch": 6.93, "learning_rate": 3.6176704696205896e-05, "loss": 0.0119, "step": 26150 }, { "epoch": 6.94, "learning_rate": 3.615017245953834e-05, "loss": 0.0108, "step": 26200 }, { "epoch": 6.96, "learning_rate": 3.612364022287079e-05, "loss": 0.0109, "step": 26250 }, { "epoch": 6.97, "learning_rate": 3.6097107986203235e-05, "loss": 0.0104, "step": 26300 }, { "epoch": 6.98, "learning_rate": 3.607057574953569e-05, "loss": 0.0107, "step": 26350 }, { "epoch": 7.0, "learning_rate": 3.6044043512868136e-05, "loss": 0.0099, "step": 26400 }, { "epoch": 7.01, "learning_rate": 3.601751127620058e-05, "loss": 0.0095, "step": 26450 }, { "epoch": 7.02, "learning_rate": 3.599097903953304e-05, "loss": 0.0104, "step": 26500 }, { "epoch": 7.04, "learning_rate": 3.596444680286548e-05, "loss": 0.0082, "step": 26550 }, { "epoch": 7.05, "learning_rate": 3.593791456619793e-05, "loss": 0.0084, "step": 26600 }, { "epoch": 7.06, "learning_rate": 3.591138232953038e-05, "loss": 0.0078, "step": 26650 }, { "epoch": 7.08, "learning_rate": 3.5884850092862833e-05, "loss": 0.0088, "step": 26700 }, { "epoch": 7.09, "learning_rate": 3.585831785619528e-05, "loss": 0.0082, "step": 26750 }, { "epoch": 7.1, "learning_rate": 3.583178561952773e-05, "loss": 0.0086, "step": 26800 }, { "epoch": 7.12, "learning_rate": 3.580525338286018e-05, "loss": 0.0086, "step": 26850 }, { "epoch": 7.13, "learning_rate": 3.577872114619262e-05, "loss": 0.0088, "step": 26900 }, { "epoch": 7.14, "learning_rate": 3.575218890952507e-05, "loss": 0.0081, "step": 26950 }, { "epoch": 7.16, "learning_rate": 3.5725656672857524e-05, "loss": 0.0079, "step": 27000 }, { "epoch": 7.17, "learning_rate": 3.5699124436189974e-05, "loss": 0.0082, "step": 27050 }, { "epoch": 7.18, "learning_rate": 3.567259219952242e-05, "loss": 0.0082, "step": 27100 }, { "epoch": 7.2, "learning_rate": 3.564605996285487e-05, "loss": 0.008, "step": 27150 }, { "epoch": 7.21, "learning_rate": 3.561952772618732e-05, "loss": 0.0076, "step": 27200 }, { "epoch": 7.22, "learning_rate": 3.559299548951977e-05, "loss": 0.0083, "step": 27250 }, { "epoch": 7.24, "learning_rate": 3.5566463252852214e-05, "loss": 0.0089, "step": 27300 }, { "epoch": 7.25, "learning_rate": 3.553993101618467e-05, "loss": 0.0088, "step": 27350 }, { "epoch": 7.26, "learning_rate": 3.5513398779517116e-05, "loss": 0.0091, "step": 27400 }, { "epoch": 7.28, "learning_rate": 3.548686654284956e-05, "loss": 0.0079, "step": 27450 }, { "epoch": 7.29, "learning_rate": 3.546033430618202e-05, "loss": 0.0084, "step": 27500 }, { "epoch": 7.3, "learning_rate": 3.543380206951446e-05, "loss": 0.0089, "step": 27550 }, { "epoch": 7.32, "learning_rate": 3.540726983284691e-05, "loss": 0.0087, "step": 27600 }, { "epoch": 7.33, "learning_rate": 3.5380737596179355e-05, "loss": 0.0094, "step": 27650 }, { "epoch": 7.34, "learning_rate": 3.535420535951181e-05, "loss": 0.0092, "step": 27700 }, { "epoch": 7.35, "learning_rate": 3.532767312284426e-05, "loss": 0.0087, "step": 27750 }, { "epoch": 7.37, "learning_rate": 3.530114088617671e-05, "loss": 0.0087, "step": 27800 }, { "epoch": 7.38, "learning_rate": 3.527460864950916e-05, "loss": 0.0087, "step": 27850 }, { "epoch": 7.39, "learning_rate": 3.52480764128416e-05, "loss": 0.0085, "step": 27900 }, { "epoch": 7.41, "learning_rate": 3.522154417617405e-05, "loss": 0.0096, "step": 27950 }, { "epoch": 7.42, "learning_rate": 3.51950119395065e-05, "loss": 0.0081, "step": 28000 }, { "epoch": 7.43, "learning_rate": 3.5168479702838954e-05, "loss": 0.0089, "step": 28050 }, { "epoch": 7.45, "learning_rate": 3.51419474661714e-05, "loss": 0.0084, "step": 28100 }, { "epoch": 7.46, "learning_rate": 3.511541522950385e-05, "loss": 0.0094, "step": 28150 }, { "epoch": 7.47, "learning_rate": 3.50888829928363e-05, "loss": 0.0093, "step": 28200 }, { "epoch": 7.49, "learning_rate": 3.506235075616875e-05, "loss": 0.0096, "step": 28250 }, { "epoch": 7.5, "learning_rate": 3.5035818519501194e-05, "loss": 0.0092, "step": 28300 }, { "epoch": 7.51, "learning_rate": 3.5009286282833644e-05, "loss": 0.0085, "step": 28350 }, { "epoch": 7.53, "learning_rate": 3.4982754046166095e-05, "loss": 0.0096, "step": 28400 }, { "epoch": 7.54, "learning_rate": 3.495622180949854e-05, "loss": 0.0099, "step": 28450 }, { "epoch": 7.55, "learning_rate": 3.492968957283099e-05, "loss": 0.0086, "step": 28500 }, { "epoch": 7.57, "learning_rate": 3.490315733616344e-05, "loss": 0.009, "step": 28550 }, { "epoch": 7.58, "learning_rate": 3.487662509949589e-05, "loss": 0.0092, "step": 28600 }, { "epoch": 7.59, "learning_rate": 3.4850092862828335e-05, "loss": 0.0088, "step": 28650 }, { "epoch": 7.61, "learning_rate": 3.482356062616079e-05, "loss": 0.009, "step": 28700 }, { "epoch": 7.62, "learning_rate": 3.4797028389493236e-05, "loss": 0.0103, "step": 28750 }, { "epoch": 7.63, "learning_rate": 3.477049615282568e-05, "loss": 0.0099, "step": 28800 }, { "epoch": 7.65, "learning_rate": 3.474396391615814e-05, "loss": 0.0089, "step": 28850 }, { "epoch": 7.66, "learning_rate": 3.471743167949058e-05, "loss": 0.0098, "step": 28900 }, { "epoch": 7.67, "learning_rate": 3.469089944282303e-05, "loss": 0.0102, "step": 28950 }, { "epoch": 7.69, "learning_rate": 3.4664367206155476e-05, "loss": 0.0095, "step": 29000 }, { "epoch": 7.7, "learning_rate": 3.463783496948793e-05, "loss": 0.0092, "step": 29050 }, { "epoch": 7.71, "learning_rate": 3.461130273282038e-05, "loss": 0.0103, "step": 29100 }, { "epoch": 7.73, "learning_rate": 3.458477049615283e-05, "loss": 0.0097, "step": 29150 }, { "epoch": 7.74, "learning_rate": 3.455823825948528e-05, "loss": 0.0096, "step": 29200 }, { "epoch": 7.75, "learning_rate": 3.453170602281772e-05, "loss": 0.0097, "step": 29250 }, { "epoch": 7.77, "learning_rate": 3.450517378615017e-05, "loss": 0.0094, "step": 29300 }, { "epoch": 7.78, "learning_rate": 3.4478641549482624e-05, "loss": 0.009, "step": 29350 }, { "epoch": 7.79, "learning_rate": 3.4452109312815074e-05, "loss": 0.01, "step": 29400 }, { "epoch": 7.81, "learning_rate": 3.442557707614752e-05, "loss": 0.0102, "step": 29450 }, { "epoch": 7.82, "learning_rate": 3.439904483947997e-05, "loss": 0.009, "step": 29500 }, { "epoch": 7.83, "learning_rate": 3.437251260281242e-05, "loss": 0.0086, "step": 29550 }, { "epoch": 7.85, "learning_rate": 3.434598036614487e-05, "loss": 0.009, "step": 29600 }, { "epoch": 7.86, "learning_rate": 3.4319448129477314e-05, "loss": 0.0096, "step": 29650 }, { "epoch": 7.87, "learning_rate": 3.429291589280977e-05, "loss": 0.0102, "step": 29700 }, { "epoch": 7.88, "learning_rate": 3.4266383656142215e-05, "loss": 0.0095, "step": 29750 }, { "epoch": 7.9, "learning_rate": 3.423985141947466e-05, "loss": 0.0092, "step": 29800 }, { "epoch": 7.91, "learning_rate": 3.421331918280711e-05, "loss": 0.0095, "step": 29850 }, { "epoch": 7.92, "learning_rate": 3.418678694613956e-05, "loss": 0.0084, "step": 29900 }, { "epoch": 7.94, "learning_rate": 3.416025470947201e-05, "loss": 0.0093, "step": 29950 }, { "epoch": 7.95, "learning_rate": 3.4133722472804455e-05, "loss": 0.0091, "step": 30000 }, { "epoch": 7.96, "learning_rate": 3.410719023613691e-05, "loss": 0.0089, "step": 30050 }, { "epoch": 7.98, "learning_rate": 3.4080657999469356e-05, "loss": 0.0088, "step": 30100 }, { "epoch": 7.99, "learning_rate": 3.405412576280181e-05, "loss": 0.0095, "step": 30150 }, { "epoch": 8.0, "learning_rate": 3.402759352613426e-05, "loss": 0.0093, "step": 30200 }, { "epoch": 8.02, "learning_rate": 3.40010612894667e-05, "loss": 0.0083, "step": 30250 }, { "epoch": 8.03, "learning_rate": 3.397452905279915e-05, "loss": 0.0077, "step": 30300 }, { "epoch": 8.04, "learning_rate": 3.39479968161316e-05, "loss": 0.0073, "step": 30350 }, { "epoch": 8.06, "learning_rate": 3.3921464579464054e-05, "loss": 0.0074, "step": 30400 }, { "epoch": 8.07, "learning_rate": 3.38949323427965e-05, "loss": 0.0075, "step": 30450 }, { "epoch": 8.08, "learning_rate": 3.386840010612895e-05, "loss": 0.0076, "step": 30500 }, { "epoch": 8.1, "learning_rate": 3.38418678694614e-05, "loss": 0.0068, "step": 30550 }, { "epoch": 8.11, "learning_rate": 3.381533563279385e-05, "loss": 0.0068, "step": 30600 }, { "epoch": 8.12, "learning_rate": 3.3788803396126293e-05, "loss": 0.0074, "step": 30650 }, { "epoch": 8.14, "learning_rate": 3.3762271159458744e-05, "loss": 0.0077, "step": 30700 }, { "epoch": 8.15, "learning_rate": 3.3735738922791195e-05, "loss": 0.0075, "step": 30750 }, { "epoch": 8.16, "learning_rate": 3.370920668612364e-05, "loss": 0.0085, "step": 30800 }, { "epoch": 8.18, "learning_rate": 3.368267444945609e-05, "loss": 0.0086, "step": 30850 }, { "epoch": 8.19, "learning_rate": 3.365614221278854e-05, "loss": 0.0088, "step": 30900 }, { "epoch": 8.2, "learning_rate": 3.362960997612099e-05, "loss": 0.008, "step": 30950 }, { "epoch": 8.22, "learning_rate": 3.3603077739453435e-05, "loss": 0.0082, "step": 31000 }, { "epoch": 8.23, "learning_rate": 3.357654550278589e-05, "loss": 0.0088, "step": 31050 }, { "epoch": 8.24, "learning_rate": 3.3550013266118336e-05, "loss": 0.0075, "step": 31100 }, { "epoch": 8.26, "learning_rate": 3.352348102945078e-05, "loss": 0.0086, "step": 31150 }, { "epoch": 8.27, "learning_rate": 3.349694879278323e-05, "loss": 0.0081, "step": 31200 }, { "epoch": 8.28, "learning_rate": 3.347041655611568e-05, "loss": 0.0082, "step": 31250 }, { "epoch": 8.3, "learning_rate": 3.344388431944813e-05, "loss": 0.0082, "step": 31300 }, { "epoch": 8.31, "learning_rate": 3.3417352082780576e-05, "loss": 0.0089, "step": 31350 }, { "epoch": 8.32, "learning_rate": 3.339081984611303e-05, "loss": 0.0095, "step": 31400 }, { "epoch": 8.34, "learning_rate": 3.336428760944548e-05, "loss": 0.0079, "step": 31450 }, { "epoch": 8.35, "learning_rate": 3.333775537277793e-05, "loss": 0.0079, "step": 31500 }, { "epoch": 8.36, "learning_rate": 3.331122313611038e-05, "loss": 0.0077, "step": 31550 }, { "epoch": 8.38, "learning_rate": 3.328469089944282e-05, "loss": 0.0086, "step": 31600 }, { "epoch": 8.39, "learning_rate": 3.325815866277527e-05, "loss": 0.0081, "step": 31650 }, { "epoch": 8.4, "learning_rate": 3.3231626426107723e-05, "loss": 0.0082, "step": 31700 }, { "epoch": 8.42, "learning_rate": 3.3205094189440174e-05, "loss": 0.0079, "step": 31750 }, { "epoch": 8.43, "learning_rate": 3.317856195277262e-05, "loss": 0.0079, "step": 31800 }, { "epoch": 8.44, "learning_rate": 3.315202971610507e-05, "loss": 0.0079, "step": 31850 }, { "epoch": 8.45, "learning_rate": 3.312549747943752e-05, "loss": 0.0086, "step": 31900 }, { "epoch": 8.47, "learning_rate": 3.309896524276997e-05, "loss": 0.0083, "step": 31950 }, { "epoch": 8.48, "learning_rate": 3.3072433006102414e-05, "loss": 0.0082, "step": 32000 }, { "epoch": 8.49, "learning_rate": 3.3045900769434865e-05, "loss": 0.0095, "step": 32050 }, { "epoch": 8.51, "learning_rate": 3.3019368532767315e-05, "loss": 0.0078, "step": 32100 }, { "epoch": 8.52, "learning_rate": 3.299283629609976e-05, "loss": 0.009, "step": 32150 }, { "epoch": 8.53, "learning_rate": 3.296630405943221e-05, "loss": 0.008, "step": 32200 }, { "epoch": 8.55, "learning_rate": 3.293977182276466e-05, "loss": 0.008, "step": 32250 }, { "epoch": 8.56, "learning_rate": 3.291323958609711e-05, "loss": 0.0085, "step": 32300 }, { "epoch": 8.57, "learning_rate": 3.2886707349429555e-05, "loss": 0.0085, "step": 32350 }, { "epoch": 8.59, "learning_rate": 3.286017511276201e-05, "loss": 0.0079, "step": 32400 }, { "epoch": 8.6, "learning_rate": 3.2833642876094456e-05, "loss": 0.0085, "step": 32450 }, { "epoch": 8.61, "learning_rate": 3.280711063942691e-05, "loss": 0.0087, "step": 32500 }, { "epoch": 8.63, "learning_rate": 3.278057840275936e-05, "loss": 0.0087, "step": 32550 }, { "epoch": 8.64, "learning_rate": 3.27540461660918e-05, "loss": 0.0094, "step": 32600 }, { "epoch": 8.65, "learning_rate": 3.272751392942425e-05, "loss": 0.0083, "step": 32650 }, { "epoch": 8.67, "learning_rate": 3.2700981692756696e-05, "loss": 0.0091, "step": 32700 }, { "epoch": 8.68, "learning_rate": 3.2674449456089154e-05, "loss": 0.0089, "step": 32750 }, { "epoch": 8.69, "learning_rate": 3.26479172194216e-05, "loss": 0.0085, "step": 32800 }, { "epoch": 8.71, "learning_rate": 3.262138498275405e-05, "loss": 0.0078, "step": 32850 }, { "epoch": 8.72, "learning_rate": 3.25948527460865e-05, "loss": 0.0085, "step": 32900 }, { "epoch": 8.73, "learning_rate": 3.256832050941895e-05, "loss": 0.0096, "step": 32950 }, { "epoch": 8.75, "learning_rate": 3.254178827275139e-05, "loss": 0.0089, "step": 33000 }, { "epoch": 8.76, "learning_rate": 3.2515256036083844e-05, "loss": 0.008, "step": 33050 }, { "epoch": 8.77, "learning_rate": 3.2488723799416295e-05, "loss": 0.0084, "step": 33100 }, { "epoch": 8.79, "learning_rate": 3.246219156274874e-05, "loss": 0.0094, "step": 33150 }, { "epoch": 8.8, "learning_rate": 3.243565932608119e-05, "loss": 0.0089, "step": 33200 }, { "epoch": 8.81, "learning_rate": 3.240912708941364e-05, "loss": 0.0084, "step": 33250 }, { "epoch": 8.83, "learning_rate": 3.238259485274609e-05, "loss": 0.0092, "step": 33300 }, { "epoch": 8.84, "learning_rate": 3.2356062616078534e-05, "loss": 0.0083, "step": 33350 }, { "epoch": 8.85, "learning_rate": 3.2329530379410985e-05, "loss": 0.0089, "step": 33400 }, { "epoch": 8.87, "learning_rate": 3.2302998142743436e-05, "loss": 0.009, "step": 33450 }, { "epoch": 8.88, "learning_rate": 3.227646590607588e-05, "loss": 0.0084, "step": 33500 }, { "epoch": 8.89, "learning_rate": 3.224993366940833e-05, "loss": 0.0084, "step": 33550 }, { "epoch": 8.91, "learning_rate": 3.222340143274078e-05, "loss": 0.0083, "step": 33600 }, { "epoch": 8.92, "learning_rate": 3.219686919607323e-05, "loss": 0.0085, "step": 33650 }, { "epoch": 8.93, "learning_rate": 3.2170336959405675e-05, "loss": 0.0086, "step": 33700 }, { "epoch": 8.95, "learning_rate": 3.214380472273813e-05, "loss": 0.0085, "step": 33750 }, { "epoch": 8.96, "learning_rate": 3.211727248607058e-05, "loss": 0.0079, "step": 33800 }, { "epoch": 8.97, "learning_rate": 3.209074024940303e-05, "loss": 0.0082, "step": 33850 }, { "epoch": 8.98, "learning_rate": 3.206420801273548e-05, "loss": 0.008, "step": 33900 }, { "epoch": 9.0, "learning_rate": 3.203767577606792e-05, "loss": 0.0088, "step": 33950 }, { "epoch": 9.01, "learning_rate": 3.201114353940037e-05, "loss": 0.0073, "step": 34000 }, { "epoch": 9.02, "learning_rate": 3.1984611302732817e-05, "loss": 0.0069, "step": 34050 }, { "epoch": 9.04, "learning_rate": 3.1958079066065274e-05, "loss": 0.0069, "step": 34100 }, { "epoch": 9.05, "learning_rate": 3.193154682939772e-05, "loss": 0.0071, "step": 34150 }, { "epoch": 9.06, "learning_rate": 3.190501459273017e-05, "loss": 0.0066, "step": 34200 }, { "epoch": 9.08, "learning_rate": 3.187848235606262e-05, "loss": 0.0073, "step": 34250 }, { "epoch": 9.09, "learning_rate": 3.185195011939507e-05, "loss": 0.0076, "step": 34300 }, { "epoch": 9.1, "learning_rate": 3.1825417882727514e-05, "loss": 0.0078, "step": 34350 }, { "epoch": 9.12, "learning_rate": 3.1798885646059964e-05, "loss": 0.0072, "step": 34400 }, { "epoch": 9.13, "learning_rate": 3.1772353409392415e-05, "loss": 0.0066, "step": 34450 }, { "epoch": 9.14, "learning_rate": 3.174582117272486e-05, "loss": 0.008, "step": 34500 }, { "epoch": 9.16, "learning_rate": 3.171928893605731e-05, "loss": 0.007, "step": 34550 }, { "epoch": 9.17, "learning_rate": 3.169275669938976e-05, "loss": 0.0072, "step": 34600 }, { "epoch": 9.18, "learning_rate": 3.166622446272221e-05, "loss": 0.0077, "step": 34650 }, { "epoch": 9.2, "learning_rate": 3.1639692226054655e-05, "loss": 0.007, "step": 34700 }, { "epoch": 9.21, "learning_rate": 3.1613159989387105e-05, "loss": 0.0071, "step": 34750 }, { "epoch": 9.22, "learning_rate": 3.1586627752719556e-05, "loss": 0.0074, "step": 34800 }, { "epoch": 9.24, "learning_rate": 3.156009551605201e-05, "loss": 0.0067, "step": 34850 }, { "epoch": 9.25, "learning_rate": 3.153356327938445e-05, "loss": 0.0071, "step": 34900 }, { "epoch": 9.26, "learning_rate": 3.15070310427169e-05, "loss": 0.007, "step": 34950 }, { "epoch": 9.28, "learning_rate": 3.148049880604935e-05, "loss": 0.0084, "step": 35000 }, { "epoch": 9.29, "learning_rate": 3.1453966569381796e-05, "loss": 0.008, "step": 35050 }, { "epoch": 9.3, "learning_rate": 3.142743433271425e-05, "loss": 0.0074, "step": 35100 }, { "epoch": 9.32, "learning_rate": 3.14009020960467e-05, "loss": 0.0074, "step": 35150 }, { "epoch": 9.33, "learning_rate": 3.137436985937915e-05, "loss": 0.008, "step": 35200 }, { "epoch": 9.34, "learning_rate": 3.13478376227116e-05, "loss": 0.0082, "step": 35250 }, { "epoch": 9.36, "learning_rate": 3.132130538604405e-05, "loss": 0.0073, "step": 35300 }, { "epoch": 9.37, "learning_rate": 3.129477314937649e-05, "loss": 0.0074, "step": 35350 }, { "epoch": 9.38, "learning_rate": 3.1268240912708944e-05, "loss": 0.0067, "step": 35400 }, { "epoch": 9.4, "learning_rate": 3.1241708676041394e-05, "loss": 0.0074, "step": 35450 }, { "epoch": 9.41, "learning_rate": 3.121517643937384e-05, "loss": 0.0072, "step": 35500 }, { "epoch": 9.42, "learning_rate": 3.118864420270629e-05, "loss": 0.0072, "step": 35550 }, { "epoch": 9.44, "learning_rate": 3.116211196603874e-05, "loss": 0.0088, "step": 35600 }, { "epoch": 9.45, "learning_rate": 3.113557972937119e-05, "loss": 0.0076, "step": 35650 }, { "epoch": 9.46, "learning_rate": 3.1109047492703634e-05, "loss": 0.0087, "step": 35700 }, { "epoch": 9.48, "learning_rate": 3.1082515256036085e-05, "loss": 0.0083, "step": 35750 }, { "epoch": 9.49, "learning_rate": 3.1055983019368536e-05, "loss": 0.0075, "step": 35800 }, { "epoch": 9.5, "learning_rate": 3.102945078270098e-05, "loss": 0.0084, "step": 35850 }, { "epoch": 9.51, "learning_rate": 3.100291854603343e-05, "loss": 0.0082, "step": 35900 }, { "epoch": 9.53, "learning_rate": 3.097638630936588e-05, "loss": 0.0078, "step": 35950 }, { "epoch": 9.54, "learning_rate": 3.094985407269833e-05, "loss": 0.0088, "step": 36000 }, { "epoch": 9.55, "learning_rate": 3.0923321836030775e-05, "loss": 0.0085, "step": 36050 }, { "epoch": 9.57, "learning_rate": 3.089678959936323e-05, "loss": 0.0071, "step": 36100 }, { "epoch": 9.58, "learning_rate": 3.087025736269568e-05, "loss": 0.0079, "step": 36150 }, { "epoch": 9.59, "learning_rate": 3.084372512602813e-05, "loss": 0.0082, "step": 36200 }, { "epoch": 9.61, "learning_rate": 3.081719288936057e-05, "loss": 0.0076, "step": 36250 }, { "epoch": 9.62, "learning_rate": 3.079066065269302e-05, "loss": 0.0075, "step": 36300 }, { "epoch": 9.63, "learning_rate": 3.076412841602547e-05, "loss": 0.0078, "step": 36350 }, { "epoch": 9.65, "learning_rate": 3.0737596179357916e-05, "loss": 0.0078, "step": 36400 }, { "epoch": 9.66, "learning_rate": 3.0711063942690374e-05, "loss": 0.0073, "step": 36450 }, { "epoch": 9.67, "learning_rate": 3.068453170602282e-05, "loss": 0.0076, "step": 36500 }, { "epoch": 9.69, "learning_rate": 3.065799946935527e-05, "loss": 0.0073, "step": 36550 }, { "epoch": 9.7, "learning_rate": 3.063146723268772e-05, "loss": 0.0087, "step": 36600 }, { "epoch": 9.71, "learning_rate": 3.060493499602017e-05, "loss": 0.0078, "step": 36650 }, { "epoch": 9.73, "learning_rate": 3.0578402759352614e-05, "loss": 0.0076, "step": 36700 }, { "epoch": 9.74, "learning_rate": 3.0551870522685064e-05, "loss": 0.0078, "step": 36750 }, { "epoch": 9.75, "learning_rate": 3.0525338286017515e-05, "loss": 0.0074, "step": 36800 }, { "epoch": 9.77, "learning_rate": 3.049880604934996e-05, "loss": 0.0073, "step": 36850 }, { "epoch": 9.78, "learning_rate": 3.0472273812682413e-05, "loss": 0.0082, "step": 36900 }, { "epoch": 9.79, "learning_rate": 3.0445741576014857e-05, "loss": 0.008, "step": 36950 }, { "epoch": 9.81, "learning_rate": 3.041920933934731e-05, "loss": 0.0081, "step": 37000 }, { "epoch": 9.82, "learning_rate": 3.0392677102679755e-05, "loss": 0.008, "step": 37050 }, { "epoch": 9.83, "learning_rate": 3.036614486601221e-05, "loss": 0.0087, "step": 37100 }, { "epoch": 9.85, "learning_rate": 3.0339612629344656e-05, "loss": 0.0088, "step": 37150 }, { "epoch": 9.86, "learning_rate": 3.0313080392677107e-05, "loss": 0.008, "step": 37200 }, { "epoch": 9.87, "learning_rate": 3.0286548156009554e-05, "loss": 0.008, "step": 37250 }, { "epoch": 9.89, "learning_rate": 3.0260015919342e-05, "loss": 0.0082, "step": 37300 }, { "epoch": 9.9, "learning_rate": 3.0233483682674452e-05, "loss": 0.0082, "step": 37350 }, { "epoch": 9.91, "learning_rate": 3.02069514460069e-05, "loss": 0.0096, "step": 37400 }, { "epoch": 9.93, "learning_rate": 3.018041920933935e-05, "loss": 0.0083, "step": 37450 }, { "epoch": 9.94, "learning_rate": 3.0153886972671797e-05, "loss": 0.0081, "step": 37500 }, { "epoch": 9.95, "learning_rate": 3.0127354736004248e-05, "loss": 0.0103, "step": 37550 }, { "epoch": 9.97, "learning_rate": 3.0100822499336695e-05, "loss": 0.0084, "step": 37600 }, { "epoch": 9.98, "learning_rate": 3.0074290262669146e-05, "loss": 0.0084, "step": 37650 }, { "epoch": 9.99, "learning_rate": 3.0047758026001593e-05, "loss": 0.0073, "step": 37700 }, { "epoch": 10.01, "learning_rate": 3.002122578933404e-05, "loss": 0.0073, "step": 37750 }, { "epoch": 10.02, "learning_rate": 2.999469355266649e-05, "loss": 0.0065, "step": 37800 }, { "epoch": 10.03, "learning_rate": 2.9968161315998938e-05, "loss": 0.006, "step": 37850 }, { "epoch": 10.05, "learning_rate": 2.994162907933139e-05, "loss": 0.0065, "step": 37900 }, { "epoch": 10.06, "learning_rate": 2.9915096842663836e-05, "loss": 0.0071, "step": 37950 }, { "epoch": 10.07, "learning_rate": 2.988856460599629e-05, "loss": 0.0064, "step": 38000 }, { "epoch": 10.08, "learning_rate": 2.9862032369328734e-05, "loss": 0.0062, "step": 38050 }, { "epoch": 10.1, "learning_rate": 2.9835500132661188e-05, "loss": 0.0063, "step": 38100 }, { "epoch": 10.11, "learning_rate": 2.9808967895993632e-05, "loss": 0.0065, "step": 38150 }, { "epoch": 10.12, "learning_rate": 2.978243565932608e-05, "loss": 0.0072, "step": 38200 }, { "epoch": 10.14, "learning_rate": 2.9755903422658533e-05, "loss": 0.0066, "step": 38250 }, { "epoch": 10.15, "learning_rate": 2.9729371185990977e-05, "loss": 0.007, "step": 38300 }, { "epoch": 10.16, "learning_rate": 2.970283894932343e-05, "loss": 0.0067, "step": 38350 }, { "epoch": 10.18, "learning_rate": 2.967630671265588e-05, "loss": 0.0061, "step": 38400 }, { "epoch": 10.19, "learning_rate": 2.964977447598833e-05, "loss": 0.0068, "step": 38450 }, { "epoch": 10.2, "learning_rate": 2.9623242239320776e-05, "loss": 0.0069, "step": 38500 }, { "epoch": 10.22, "learning_rate": 2.9596710002653227e-05, "loss": 0.0069, "step": 38550 }, { "epoch": 10.23, "learning_rate": 2.9570177765985674e-05, "loss": 0.0073, "step": 38600 }, { "epoch": 10.24, "learning_rate": 2.954364552931812e-05, "loss": 0.0064, "step": 38650 }, { "epoch": 10.26, "learning_rate": 2.9517113292650572e-05, "loss": 0.0068, "step": 38700 }, { "epoch": 10.27, "learning_rate": 2.949058105598302e-05, "loss": 0.0066, "step": 38750 }, { "epoch": 10.28, "learning_rate": 2.946404881931547e-05, "loss": 0.0065, "step": 38800 }, { "epoch": 10.3, "learning_rate": 2.9437516582647918e-05, "loss": 0.0064, "step": 38850 }, { "epoch": 10.31, "learning_rate": 2.9410984345980368e-05, "loss": 0.0062, "step": 38900 }, { "epoch": 10.32, "learning_rate": 2.9384452109312815e-05, "loss": 0.0074, "step": 38950 }, { "epoch": 10.34, "learning_rate": 2.9357919872645266e-05, "loss": 0.0072, "step": 39000 }, { "epoch": 10.35, "learning_rate": 2.9331387635977713e-05, "loss": 0.007, "step": 39050 }, { "epoch": 10.36, "learning_rate": 2.9304855399310167e-05, "loss": 0.0067, "step": 39100 }, { "epoch": 10.38, "learning_rate": 2.927832316264261e-05, "loss": 0.0079, "step": 39150 }, { "epoch": 10.39, "learning_rate": 2.925179092597506e-05, "loss": 0.0067, "step": 39200 }, { "epoch": 10.4, "learning_rate": 2.922525868930751e-05, "loss": 0.0069, "step": 39250 }, { "epoch": 10.42, "learning_rate": 2.9198726452639957e-05, "loss": 0.0073, "step": 39300 }, { "epoch": 10.43, "learning_rate": 2.917219421597241e-05, "loss": 0.0067, "step": 39350 }, { "epoch": 10.44, "learning_rate": 2.9145661979304855e-05, "loss": 0.007, "step": 39400 }, { "epoch": 10.46, "learning_rate": 2.911912974263731e-05, "loss": 0.0069, "step": 39450 }, { "epoch": 10.47, "learning_rate": 2.9092597505969756e-05, "loss": 0.0076, "step": 39500 }, { "epoch": 10.48, "learning_rate": 2.9066065269302206e-05, "loss": 0.0069, "step": 39550 }, { "epoch": 10.5, "learning_rate": 2.9039533032634654e-05, "loss": 0.0074, "step": 39600 }, { "epoch": 10.51, "learning_rate": 2.9013000795967098e-05, "loss": 0.0077, "step": 39650 }, { "epoch": 10.52, "learning_rate": 2.8986468559299552e-05, "loss": 0.0075, "step": 39700 }, { "epoch": 10.54, "learning_rate": 2.8959936322632e-05, "loss": 0.0074, "step": 39750 }, { "epoch": 10.55, "learning_rate": 2.893340408596445e-05, "loss": 0.0076, "step": 39800 }, { "epoch": 10.56, "learning_rate": 2.8906871849296897e-05, "loss": 0.0076, "step": 39850 }, { "epoch": 10.58, "learning_rate": 2.8880339612629348e-05, "loss": 0.0069, "step": 39900 }, { "epoch": 10.59, "learning_rate": 2.8853807375961795e-05, "loss": 0.0075, "step": 39950 }, { "epoch": 10.6, "learning_rate": 2.8827275139294246e-05, "loss": 0.0086, "step": 40000 }, { "epoch": 10.61, "learning_rate": 2.8800742902626693e-05, "loss": 0.0064, "step": 40050 }, { "epoch": 10.63, "learning_rate": 2.877421066595914e-05, "loss": 0.0074, "step": 40100 }, { "epoch": 10.64, "learning_rate": 2.874767842929159e-05, "loss": 0.0084, "step": 40150 }, { "epoch": 10.65, "learning_rate": 2.8721146192624038e-05, "loss": 0.0074, "step": 40200 }, { "epoch": 10.67, "learning_rate": 2.869461395595649e-05, "loss": 0.0073, "step": 40250 }, { "epoch": 10.68, "learning_rate": 2.8668081719288936e-05, "loss": 0.0073, "step": 40300 }, { "epoch": 10.69, "learning_rate": 2.8641549482621387e-05, "loss": 0.0074, "step": 40350 }, { "epoch": 10.71, "learning_rate": 2.8615017245953834e-05, "loss": 0.0078, "step": 40400 }, { "epoch": 10.72, "learning_rate": 2.8588485009286288e-05, "loss": 0.0074, "step": 40450 }, { "epoch": 10.73, "learning_rate": 2.8561952772618732e-05, "loss": 0.0073, "step": 40500 }, { "epoch": 10.75, "learning_rate": 2.853542053595118e-05, "loss": 0.007, "step": 40550 }, { "epoch": 10.76, "learning_rate": 2.8508888299283633e-05, "loss": 0.0068, "step": 40600 }, { "epoch": 10.77, "learning_rate": 2.8482356062616077e-05, "loss": 0.0076, "step": 40650 }, { "epoch": 10.79, "learning_rate": 2.845582382594853e-05, "loss": 0.0073, "step": 40700 }, { "epoch": 10.8, "learning_rate": 2.8429291589280975e-05, "loss": 0.0065, "step": 40750 }, { "epoch": 10.81, "learning_rate": 2.840275935261343e-05, "loss": 0.0069, "step": 40800 }, { "epoch": 10.83, "learning_rate": 2.8376227115945876e-05, "loss": 0.0074, "step": 40850 }, { "epoch": 10.84, "learning_rate": 2.8349694879278327e-05, "loss": 0.0079, "step": 40900 }, { "epoch": 10.85, "learning_rate": 2.8323162642610774e-05, "loss": 0.0062, "step": 40950 }, { "epoch": 10.87, "learning_rate": 2.8296630405943218e-05, "loss": 0.0078, "step": 41000 }, { "epoch": 10.88, "learning_rate": 2.8270098169275672e-05, "loss": 0.0073, "step": 41050 }, { "epoch": 10.89, "learning_rate": 2.824356593260812e-05, "loss": 0.0078, "step": 41100 }, { "epoch": 10.91, "learning_rate": 2.821703369594057e-05, "loss": 0.0077, "step": 41150 }, { "epoch": 10.92, "learning_rate": 2.8190501459273017e-05, "loss": 0.0077, "step": 41200 }, { "epoch": 10.93, "learning_rate": 2.8163969222605468e-05, "loss": 0.0072, "step": 41250 }, { "epoch": 10.95, "learning_rate": 2.8137436985937915e-05, "loss": 0.0073, "step": 41300 }, { "epoch": 10.96, "learning_rate": 2.8110904749270366e-05, "loss": 0.0071, "step": 41350 }, { "epoch": 10.97, "learning_rate": 2.8084372512602813e-05, "loss": 0.0078, "step": 41400 }, { "epoch": 10.99, "learning_rate": 2.8057840275935264e-05, "loss": 0.0078, "step": 41450 }, { "epoch": 11.0, "learning_rate": 2.803130803926771e-05, "loss": 0.007, "step": 41500 }, { "epoch": 11.01, "learning_rate": 2.800477580260016e-05, "loss": 0.0061, "step": 41550 }, { "epoch": 11.03, "learning_rate": 2.797824356593261e-05, "loss": 0.0063, "step": 41600 }, { "epoch": 11.04, "learning_rate": 2.7951711329265056e-05, "loss": 0.007, "step": 41650 }, { "epoch": 11.05, "learning_rate": 2.7925179092597507e-05, "loss": 0.0064, "step": 41700 }, { "epoch": 11.07, "learning_rate": 2.7898646855929954e-05, "loss": 0.0058, "step": 41750 }, { "epoch": 11.08, "learning_rate": 2.787211461926241e-05, "loss": 0.0063, "step": 41800 }, { "epoch": 11.09, "learning_rate": 2.7845582382594852e-05, "loss": 0.0068, "step": 41850 }, { "epoch": 11.11, "learning_rate": 2.7819050145927306e-05, "loss": 0.0063, "step": 41900 }, { "epoch": 11.12, "learning_rate": 2.7792517909259754e-05, "loss": 0.0063, "step": 41950 }, { "epoch": 11.13, "learning_rate": 2.7765985672592197e-05, "loss": 0.0067, "step": 42000 }, { "epoch": 11.14, "learning_rate": 2.773945343592465e-05, "loss": 0.0068, "step": 42050 }, { "epoch": 11.16, "learning_rate": 2.7712921199257095e-05, "loss": 0.0062, "step": 42100 }, { "epoch": 11.17, "learning_rate": 2.768638896258955e-05, "loss": 0.0067, "step": 42150 }, { "epoch": 11.18, "learning_rate": 2.7659856725921997e-05, "loss": 0.0063, "step": 42200 }, { "epoch": 11.2, "learning_rate": 2.7633324489254447e-05, "loss": 0.0072, "step": 42250 }, { "epoch": 11.21, "learning_rate": 2.7606792252586895e-05, "loss": 0.0061, "step": 42300 }, { "epoch": 11.22, "learning_rate": 2.7580260015919345e-05, "loss": 0.0063, "step": 42350 }, { "epoch": 11.24, "learning_rate": 2.7553727779251793e-05, "loss": 0.0062, "step": 42400 }, { "epoch": 11.25, "learning_rate": 2.752719554258424e-05, "loss": 0.0065, "step": 42450 }, { "epoch": 11.26, "learning_rate": 2.750066330591669e-05, "loss": 0.0064, "step": 42500 }, { "epoch": 11.28, "learning_rate": 2.7474131069249138e-05, "loss": 0.0067, "step": 42550 }, { "epoch": 11.29, "learning_rate": 2.744759883258159e-05, "loss": 0.0061, "step": 42600 }, { "epoch": 11.3, "learning_rate": 2.7421066595914036e-05, "loss": 0.0064, "step": 42650 }, { "epoch": 11.32, "learning_rate": 2.7394534359246486e-05, "loss": 0.0072, "step": 42700 }, { "epoch": 11.33, "learning_rate": 2.7368002122578934e-05, "loss": 0.007, "step": 42750 }, { "epoch": 11.34, "learning_rate": 2.7341469885911384e-05, "loss": 0.0066, "step": 42800 }, { "epoch": 11.36, "learning_rate": 2.731493764924383e-05, "loss": 0.006, "step": 42850 }, { "epoch": 11.37, "learning_rate": 2.728840541257628e-05, "loss": 0.0067, "step": 42900 }, { "epoch": 11.38, "learning_rate": 2.726187317590873e-05, "loss": 0.007, "step": 42950 }, { "epoch": 11.4, "learning_rate": 2.7235340939241177e-05, "loss": 0.0064, "step": 43000 }, { "epoch": 11.41, "learning_rate": 2.720880870257363e-05, "loss": 0.0063, "step": 43050 }, { "epoch": 11.42, "learning_rate": 2.7182276465906075e-05, "loss": 0.008, "step": 43100 }, { "epoch": 11.44, "learning_rate": 2.715574422923853e-05, "loss": 0.0073, "step": 43150 }, { "epoch": 11.45, "learning_rate": 2.7129211992570973e-05, "loss": 0.0071, "step": 43200 }, { "epoch": 11.46, "learning_rate": 2.7102679755903427e-05, "loss": 0.0066, "step": 43250 }, { "epoch": 11.48, "learning_rate": 2.7076147519235874e-05, "loss": 0.0064, "step": 43300 }, { "epoch": 11.49, "learning_rate": 2.7049615282568318e-05, "loss": 0.0066, "step": 43350 }, { "epoch": 11.5, "learning_rate": 2.7023083045900772e-05, "loss": 0.0071, "step": 43400 }, { "epoch": 11.52, "learning_rate": 2.6996550809233216e-05, "loss": 0.0062, "step": 43450 }, { "epoch": 11.53, "learning_rate": 2.697001857256567e-05, "loss": 0.0066, "step": 43500 }, { "epoch": 11.54, "learning_rate": 2.6943486335898117e-05, "loss": 0.0068, "step": 43550 }, { "epoch": 11.56, "learning_rate": 2.6916954099230568e-05, "loss": 0.0061, "step": 43600 }, { "epoch": 11.57, "learning_rate": 2.6890421862563015e-05, "loss": 0.0068, "step": 43650 }, { "epoch": 11.58, "learning_rate": 2.6863889625895466e-05, "loss": 0.0077, "step": 43700 }, { "epoch": 11.6, "learning_rate": 2.6837357389227913e-05, "loss": 0.0071, "step": 43750 }, { "epoch": 11.61, "learning_rate": 2.6810825152560364e-05, "loss": 0.0066, "step": 43800 }, { "epoch": 11.62, "learning_rate": 2.678429291589281e-05, "loss": 0.0069, "step": 43850 }, { "epoch": 11.64, "learning_rate": 2.6757760679225258e-05, "loss": 0.0067, "step": 43900 }, { "epoch": 11.65, "learning_rate": 2.673122844255771e-05, "loss": 0.007, "step": 43950 }, { "epoch": 11.66, "learning_rate": 2.6704696205890156e-05, "loss": 0.0068, "step": 44000 }, { "epoch": 11.68, "learning_rate": 2.6678163969222607e-05, "loss": 0.0069, "step": 44050 }, { "epoch": 11.69, "learning_rate": 2.6651631732555054e-05, "loss": 0.0069, "step": 44100 }, { "epoch": 11.7, "learning_rate": 2.6625099495887508e-05, "loss": 0.0065, "step": 44150 }, { "epoch": 11.71, "learning_rate": 2.6598567259219952e-05, "loss": 0.0068, "step": 44200 }, { "epoch": 11.73, "learning_rate": 2.6572035022552406e-05, "loss": 0.0067, "step": 44250 }, { "epoch": 11.74, "learning_rate": 2.654550278588485e-05, "loss": 0.0074, "step": 44300 }, { "epoch": 11.75, "learning_rate": 2.6518970549217297e-05, "loss": 0.0071, "step": 44350 }, { "epoch": 11.77, "learning_rate": 2.649243831254975e-05, "loss": 0.0071, "step": 44400 }, { "epoch": 11.78, "learning_rate": 2.6465906075882195e-05, "loss": 0.0075, "step": 44450 }, { "epoch": 11.79, "learning_rate": 2.643937383921465e-05, "loss": 0.0069, "step": 44500 }, { "epoch": 11.81, "learning_rate": 2.6412841602547093e-05, "loss": 0.0073, "step": 44550 }, { "epoch": 11.82, "learning_rate": 2.6386309365879547e-05, "loss": 0.0065, "step": 44600 }, { "epoch": 11.83, "learning_rate": 2.6359777129211995e-05, "loss": 0.0077, "step": 44650 }, { "epoch": 11.85, "learning_rate": 2.6333244892544445e-05, "loss": 0.0072, "step": 44700 }, { "epoch": 11.86, "learning_rate": 2.6306712655876892e-05, "loss": 0.0069, "step": 44750 }, { "epoch": 11.87, "learning_rate": 2.628018041920934e-05, "loss": 0.0069, "step": 44800 }, { "epoch": 11.89, "learning_rate": 2.625364818254179e-05, "loss": 0.0068, "step": 44850 }, { "epoch": 11.9, "learning_rate": 2.6227115945874238e-05, "loss": 0.0073, "step": 44900 }, { "epoch": 11.91, "learning_rate": 2.620058370920669e-05, "loss": 0.0067, "step": 44950 }, { "epoch": 11.93, "learning_rate": 2.6174051472539136e-05, "loss": 0.0062, "step": 45000 }, { "epoch": 11.94, "learning_rate": 2.6147519235871586e-05, "loss": 0.0073, "step": 45050 }, { "epoch": 11.95, "learning_rate": 2.6120986999204034e-05, "loss": 0.0071, "step": 45100 }, { "epoch": 11.97, "learning_rate": 2.6094454762536484e-05, "loss": 0.0071, "step": 45150 }, { "epoch": 11.98, "learning_rate": 2.606792252586893e-05, "loss": 0.0072, "step": 45200 }, { "epoch": 11.99, "learning_rate": 2.604139028920138e-05, "loss": 0.007, "step": 45250 }, { "epoch": 12.01, "learning_rate": 2.601485805253383e-05, "loss": 0.0061, "step": 45300 }, { "epoch": 12.02, "learning_rate": 2.5988325815866277e-05, "loss": 0.0054, "step": 45350 }, { "epoch": 12.03, "learning_rate": 2.5961793579198727e-05, "loss": 0.0056, "step": 45400 }, { "epoch": 12.05, "learning_rate": 2.5935261342531175e-05, "loss": 0.0055, "step": 45450 }, { "epoch": 12.06, "learning_rate": 2.590872910586363e-05, "loss": 0.006, "step": 45500 }, { "epoch": 12.07, "learning_rate": 2.5882196869196073e-05, "loss": 0.0058, "step": 45550 }, { "epoch": 12.09, "learning_rate": 2.5855664632528527e-05, "loss": 0.0057, "step": 45600 }, { "epoch": 12.1, "learning_rate": 2.582913239586097e-05, "loss": 0.0068, "step": 45650 }, { "epoch": 12.11, "learning_rate": 2.5802600159193418e-05, "loss": 0.0058, "step": 45700 }, { "epoch": 12.13, "learning_rate": 2.5776067922525872e-05, "loss": 0.0062, "step": 45750 }, { "epoch": 12.14, "learning_rate": 2.5749535685858316e-05, "loss": 0.0064, "step": 45800 }, { "epoch": 12.15, "learning_rate": 2.572300344919077e-05, "loss": 0.006, "step": 45850 }, { "epoch": 12.17, "learning_rate": 2.5696471212523217e-05, "loss": 0.0066, "step": 45900 }, { "epoch": 12.18, "learning_rate": 2.5669938975855668e-05, "loss": 0.0063, "step": 45950 }, { "epoch": 12.19, "learning_rate": 2.5643406739188115e-05, "loss": 0.0059, "step": 46000 }, { "epoch": 12.21, "learning_rate": 2.5616874502520566e-05, "loss": 0.0059, "step": 46050 }, { "epoch": 12.22, "learning_rate": 2.5590342265853013e-05, "loss": 0.0061, "step": 46100 }, { "epoch": 12.23, "learning_rate": 2.5563810029185464e-05, "loss": 0.0065, "step": 46150 }, { "epoch": 12.24, "learning_rate": 2.553727779251791e-05, "loss": 0.0062, "step": 46200 }, { "epoch": 12.26, "learning_rate": 2.5510745555850358e-05, "loss": 0.0056, "step": 46250 }, { "epoch": 12.27, "learning_rate": 2.548421331918281e-05, "loss": 0.0064, "step": 46300 }, { "epoch": 12.28, "learning_rate": 2.5457681082515256e-05, "loss": 0.006, "step": 46350 }, { "epoch": 12.3, "learning_rate": 2.5431148845847707e-05, "loss": 0.006, "step": 46400 }, { "epoch": 12.31, "learning_rate": 2.5404616609180154e-05, "loss": 0.0062, "step": 46450 }, { "epoch": 12.32, "learning_rate": 2.5378084372512605e-05, "loss": 0.006, "step": 46500 }, { "epoch": 12.34, "learning_rate": 2.5351552135845052e-05, "loss": 0.0068, "step": 46550 }, { "epoch": 12.35, "learning_rate": 2.5325019899177506e-05, "loss": 0.0062, "step": 46600 }, { "epoch": 12.36, "learning_rate": 2.529848766250995e-05, "loss": 0.006, "step": 46650 }, { "epoch": 12.38, "learning_rate": 2.5271955425842397e-05, "loss": 0.0067, "step": 46700 }, { "epoch": 12.39, "learning_rate": 2.5245423189174848e-05, "loss": 0.0062, "step": 46750 }, { "epoch": 12.4, "learning_rate": 2.5218890952507295e-05, "loss": 0.006, "step": 46800 }, { "epoch": 12.42, "learning_rate": 2.519235871583975e-05, "loss": 0.007, "step": 46850 }, { "epoch": 12.43, "learning_rate": 2.5165826479172193e-05, "loss": 0.0065, "step": 46900 }, { "epoch": 12.44, "learning_rate": 2.5139294242504647e-05, "loss": 0.0068, "step": 46950 }, { "epoch": 12.46, "learning_rate": 2.5112762005837094e-05, "loss": 0.0072, "step": 47000 }, { "epoch": 12.47, "learning_rate": 2.5086229769169545e-05, "loss": 0.0066, "step": 47050 }, { "epoch": 12.48, "learning_rate": 2.5059697532501992e-05, "loss": 0.0065, "step": 47100 }, { "epoch": 12.5, "learning_rate": 2.5033165295834436e-05, "loss": 0.0065, "step": 47150 }, { "epoch": 12.51, "learning_rate": 2.500663305916689e-05, "loss": 0.0063, "step": 47200 }, { "epoch": 12.52, "learning_rate": 2.4980100822499338e-05, "loss": 0.0068, "step": 47250 }, { "epoch": 12.54, "learning_rate": 2.4953568585831788e-05, "loss": 0.0067, "step": 47300 }, { "epoch": 12.55, "learning_rate": 2.4927036349164235e-05, "loss": 0.0062, "step": 47350 }, { "epoch": 12.56, "learning_rate": 2.4900504112496683e-05, "loss": 0.0069, "step": 47400 }, { "epoch": 12.58, "learning_rate": 2.4873971875829133e-05, "loss": 0.0065, "step": 47450 }, { "epoch": 12.59, "learning_rate": 2.484743963916158e-05, "loss": 0.0067, "step": 47500 }, { "epoch": 12.6, "learning_rate": 2.482090740249403e-05, "loss": 0.0062, "step": 47550 }, { "epoch": 12.62, "learning_rate": 2.4794375165826482e-05, "loss": 0.006, "step": 47600 }, { "epoch": 12.63, "learning_rate": 2.476784292915893e-05, "loss": 0.006, "step": 47650 }, { "epoch": 12.64, "learning_rate": 2.474131069249138e-05, "loss": 0.0063, "step": 47700 }, { "epoch": 12.66, "learning_rate": 2.4714778455823827e-05, "loss": 0.0065, "step": 47750 }, { "epoch": 12.67, "learning_rate": 2.4688246219156274e-05, "loss": 0.0061, "step": 47800 }, { "epoch": 12.68, "learning_rate": 2.4661713982488725e-05, "loss": 0.0064, "step": 47850 }, { "epoch": 12.7, "learning_rate": 2.4635181745821172e-05, "loss": 0.0067, "step": 47900 }, { "epoch": 12.71, "learning_rate": 2.4608649509153623e-05, "loss": 0.0065, "step": 47950 }, { "epoch": 12.72, "learning_rate": 2.458211727248607e-05, "loss": 0.007, "step": 48000 }, { "epoch": 12.74, "learning_rate": 2.455558503581852e-05, "loss": 0.0068, "step": 48050 }, { "epoch": 12.75, "learning_rate": 2.452905279915097e-05, "loss": 0.0067, "step": 48100 }, { "epoch": 12.76, "learning_rate": 2.450252056248342e-05, "loss": 0.0062, "step": 48150 }, { "epoch": 12.77, "learning_rate": 2.447598832581587e-05, "loss": 0.0065, "step": 48200 }, { "epoch": 12.79, "learning_rate": 2.4449456089148317e-05, "loss": 0.0067, "step": 48250 }, { "epoch": 12.8, "learning_rate": 2.4422923852480764e-05, "loss": 0.0063, "step": 48300 }, { "epoch": 12.81, "learning_rate": 2.4396391615813215e-05, "loss": 0.0068, "step": 48350 }, { "epoch": 12.83, "learning_rate": 2.4369859379145662e-05, "loss": 0.0067, "step": 48400 }, { "epoch": 12.84, "learning_rate": 2.4343327142478113e-05, "loss": 0.0067, "step": 48450 }, { "epoch": 12.85, "learning_rate": 2.431679490581056e-05, "loss": 0.0066, "step": 48500 }, { "epoch": 12.87, "learning_rate": 2.429026266914301e-05, "loss": 0.0064, "step": 48550 }, { "epoch": 12.88, "learning_rate": 2.4263730432475458e-05, "loss": 0.0065, "step": 48600 }, { "epoch": 12.89, "learning_rate": 2.423719819580791e-05, "loss": 0.0061, "step": 48650 }, { "epoch": 12.91, "learning_rate": 2.421066595914036e-05, "loss": 0.0065, "step": 48700 }, { "epoch": 12.92, "learning_rate": 2.4184133722472803e-05, "loss": 0.0065, "step": 48750 }, { "epoch": 12.93, "learning_rate": 2.4157601485805254e-05, "loss": 0.0066, "step": 48800 }, { "epoch": 12.95, "learning_rate": 2.41310692491377e-05, "loss": 0.0066, "step": 48850 }, { "epoch": 12.96, "learning_rate": 2.4104537012470152e-05, "loss": 0.0068, "step": 48900 }, { "epoch": 12.97, "learning_rate": 2.4078004775802602e-05, "loss": 0.0066, "step": 48950 }, { "epoch": 12.99, "learning_rate": 2.405147253913505e-05, "loss": 0.0067, "step": 49000 }, { "epoch": 13.0, "learning_rate": 2.40249403024675e-05, "loss": 0.0063, "step": 49050 }, { "epoch": 13.01, "learning_rate": 2.3998408065799948e-05, "loss": 0.0058, "step": 49100 }, { "epoch": 13.03, "learning_rate": 2.39718758291324e-05, "loss": 0.0053, "step": 49150 }, { "epoch": 13.04, "learning_rate": 2.3945343592464846e-05, "loss": 0.0056, "step": 49200 }, { "epoch": 13.05, "learning_rate": 2.3918811355797293e-05, "loss": 0.0056, "step": 49250 }, { "epoch": 13.07, "learning_rate": 2.3892279119129744e-05, "loss": 0.0056, "step": 49300 }, { "epoch": 13.08, "learning_rate": 2.386574688246219e-05, "loss": 0.0058, "step": 49350 }, { "epoch": 13.09, "learning_rate": 2.383921464579464e-05, "loss": 0.0059, "step": 49400 }, { "epoch": 13.11, "learning_rate": 2.3812682409127092e-05, "loss": 0.006, "step": 49450 }, { "epoch": 13.12, "learning_rate": 2.378615017245954e-05, "loss": 0.0057, "step": 49500 }, { "epoch": 13.13, "learning_rate": 2.375961793579199e-05, "loss": 0.0059, "step": 49550 }, { "epoch": 13.15, "learning_rate": 2.3733085699124437e-05, "loss": 0.0054, "step": 49600 }, { "epoch": 13.16, "learning_rate": 2.3706553462456888e-05, "loss": 0.0055, "step": 49650 }, { "epoch": 13.17, "learning_rate": 2.3680021225789335e-05, "loss": 0.0057, "step": 49700 }, { "epoch": 13.19, "learning_rate": 2.3653488989121783e-05, "loss": 0.0056, "step": 49750 }, { "epoch": 13.2, "learning_rate": 2.3626956752454233e-05, "loss": 0.006, "step": 49800 }, { "epoch": 13.21, "learning_rate": 2.360042451578668e-05, "loss": 0.0059, "step": 49850 }, { "epoch": 13.23, "learning_rate": 2.357389227911913e-05, "loss": 0.0058, "step": 49900 }, { "epoch": 13.24, "learning_rate": 2.354736004245158e-05, "loss": 0.0056, "step": 49950 }, { "epoch": 13.25, "learning_rate": 2.352082780578403e-05, "loss": 0.0064, "step": 50000 }, { "epoch": 13.27, "learning_rate": 2.349429556911648e-05, "loss": 0.0063, "step": 50050 }, { "epoch": 13.28, "learning_rate": 2.3467763332448927e-05, "loss": 0.0064, "step": 50100 }, { "epoch": 13.29, "learning_rate": 2.3441231095781374e-05, "loss": 0.006, "step": 50150 }, { "epoch": 13.31, "learning_rate": 2.3414698859113825e-05, "loss": 0.0059, "step": 50200 }, { "epoch": 13.32, "learning_rate": 2.3388166622446272e-05, "loss": 0.0057, "step": 50250 }, { "epoch": 13.33, "learning_rate": 2.3361634385778723e-05, "loss": 0.006, "step": 50300 }, { "epoch": 13.34, "learning_rate": 2.333510214911117e-05, "loss": 0.006, "step": 50350 }, { "epoch": 13.36, "learning_rate": 2.330856991244362e-05, "loss": 0.0057, "step": 50400 }, { "epoch": 13.37, "learning_rate": 2.3282037675776068e-05, "loss": 0.006, "step": 50450 }, { "epoch": 13.38, "learning_rate": 2.325550543910852e-05, "loss": 0.0062, "step": 50500 }, { "epoch": 13.4, "learning_rate": 2.322897320244097e-05, "loss": 0.0056, "step": 50550 }, { "epoch": 13.41, "learning_rate": 2.3202440965773417e-05, "loss": 0.0062, "step": 50600 }, { "epoch": 13.42, "learning_rate": 2.3175908729105864e-05, "loss": 0.0056, "step": 50650 }, { "epoch": 13.44, "learning_rate": 2.314937649243831e-05, "loss": 0.0058, "step": 50700 }, { "epoch": 13.45, "learning_rate": 2.3122844255770762e-05, "loss": 0.0063, "step": 50750 }, { "epoch": 13.46, "learning_rate": 2.3096312019103213e-05, "loss": 0.0062, "step": 50800 }, { "epoch": 13.48, "learning_rate": 2.306977978243566e-05, "loss": 0.0062, "step": 50850 }, { "epoch": 13.49, "learning_rate": 2.304324754576811e-05, "loss": 0.0062, "step": 50900 }, { "epoch": 13.5, "learning_rate": 2.3016715309100558e-05, "loss": 0.006, "step": 50950 }, { "epoch": 13.52, "learning_rate": 2.299018307243301e-05, "loss": 0.0063, "step": 51000 }, { "epoch": 13.53, "learning_rate": 2.2963650835765456e-05, "loss": 0.0061, "step": 51050 }, { "epoch": 13.54, "learning_rate": 2.2937118599097903e-05, "loss": 0.006, "step": 51100 }, { "epoch": 13.56, "learning_rate": 2.2910586362430354e-05, "loss": 0.0066, "step": 51150 }, { "epoch": 13.57, "learning_rate": 2.28840541257628e-05, "loss": 0.0068, "step": 51200 }, { "epoch": 13.58, "learning_rate": 2.285752188909525e-05, "loss": 0.006, "step": 51250 }, { "epoch": 13.6, "learning_rate": 2.2830989652427702e-05, "loss": 0.0061, "step": 51300 }, { "epoch": 13.61, "learning_rate": 2.280445741576015e-05, "loss": 0.0062, "step": 51350 }, { "epoch": 13.62, "learning_rate": 2.27779251790926e-05, "loss": 0.0058, "step": 51400 }, { "epoch": 13.64, "learning_rate": 2.2751392942425047e-05, "loss": 0.0061, "step": 51450 }, { "epoch": 13.65, "learning_rate": 2.2724860705757498e-05, "loss": 0.0063, "step": 51500 }, { "epoch": 13.66, "learning_rate": 2.2698328469089945e-05, "loss": 0.0064, "step": 51550 }, { "epoch": 13.68, "learning_rate": 2.2671796232422393e-05, "loss": 0.0063, "step": 51600 }, { "epoch": 13.69, "learning_rate": 2.2645263995754843e-05, "loss": 0.0062, "step": 51650 }, { "epoch": 13.7, "learning_rate": 2.261873175908729e-05, "loss": 0.006, "step": 51700 }, { "epoch": 13.72, "learning_rate": 2.259219952241974e-05, "loss": 0.0059, "step": 51750 }, { "epoch": 13.73, "learning_rate": 2.256566728575219e-05, "loss": 0.0063, "step": 51800 }, { "epoch": 13.74, "learning_rate": 2.253913504908464e-05, "loss": 0.0066, "step": 51850 }, { "epoch": 13.76, "learning_rate": 2.251260281241709e-05, "loss": 0.0065, "step": 51900 }, { "epoch": 13.77, "learning_rate": 2.2486070575749537e-05, "loss": 0.0059, "step": 51950 }, { "epoch": 13.78, "learning_rate": 2.2459538339081988e-05, "loss": 0.0067, "step": 52000 }, { "epoch": 13.8, "learning_rate": 2.2433006102414432e-05, "loss": 0.0068, "step": 52050 }, { "epoch": 13.81, "learning_rate": 2.2406473865746882e-05, "loss": 0.0062, "step": 52100 }, { "epoch": 13.82, "learning_rate": 2.2379941629079333e-05, "loss": 0.0058, "step": 52150 }, { "epoch": 13.84, "learning_rate": 2.235340939241178e-05, "loss": 0.0062, "step": 52200 }, { "epoch": 13.85, "learning_rate": 2.232687715574423e-05, "loss": 0.006, "step": 52250 }, { "epoch": 13.86, "learning_rate": 2.2300344919076678e-05, "loss": 0.0062, "step": 52300 }, { "epoch": 13.87, "learning_rate": 2.227381268240913e-05, "loss": 0.0062, "step": 52350 }, { "epoch": 13.89, "learning_rate": 2.2247280445741576e-05, "loss": 0.0063, "step": 52400 }, { "epoch": 13.9, "learning_rate": 2.2220748209074027e-05, "loss": 0.006, "step": 52450 }, { "epoch": 13.91, "learning_rate": 2.2194215972406474e-05, "loss": 0.006, "step": 52500 }, { "epoch": 13.93, "learning_rate": 2.216768373573892e-05, "loss": 0.006, "step": 52550 }, { "epoch": 13.94, "learning_rate": 2.2141151499071372e-05, "loss": 0.0062, "step": 52600 }, { "epoch": 13.95, "learning_rate": 2.2114619262403823e-05, "loss": 0.0059, "step": 52650 }, { "epoch": 13.97, "learning_rate": 2.208808702573627e-05, "loss": 0.0062, "step": 52700 }, { "epoch": 13.98, "learning_rate": 2.206155478906872e-05, "loss": 0.0062, "step": 52750 }, { "epoch": 13.99, "learning_rate": 2.2035022552401168e-05, "loss": 0.0062, "step": 52800 }, { "epoch": 14.01, "learning_rate": 2.200849031573362e-05, "loss": 0.0053, "step": 52850 }, { "epoch": 14.02, "learning_rate": 2.1981958079066066e-05, "loss": 0.0051, "step": 52900 }, { "epoch": 14.03, "learning_rate": 2.1955425842398517e-05, "loss": 0.0053, "step": 52950 }, { "epoch": 14.05, "learning_rate": 2.1928893605730964e-05, "loss": 0.0052, "step": 53000 }, { "epoch": 14.06, "learning_rate": 2.190236136906341e-05, "loss": 0.0053, "step": 53050 }, { "epoch": 14.07, "learning_rate": 2.1875829132395862e-05, "loss": 0.0052, "step": 53100 }, { "epoch": 14.09, "learning_rate": 2.184929689572831e-05, "loss": 0.0059, "step": 53150 }, { "epoch": 14.1, "learning_rate": 2.182276465906076e-05, "loss": 0.0051, "step": 53200 }, { "epoch": 14.11, "learning_rate": 2.179623242239321e-05, "loss": 0.0052, "step": 53250 }, { "epoch": 14.13, "learning_rate": 2.1769700185725658e-05, "loss": 0.0051, "step": 53300 }, { "epoch": 14.14, "learning_rate": 2.1743167949058108e-05, "loss": 0.0052, "step": 53350 }, { "epoch": 14.15, "learning_rate": 2.1716635712390556e-05, "loss": 0.0054, "step": 53400 }, { "epoch": 14.17, "learning_rate": 2.1690103475723003e-05, "loss": 0.0053, "step": 53450 }, { "epoch": 14.18, "learning_rate": 2.1663571239055454e-05, "loss": 0.0055, "step": 53500 }, { "epoch": 14.19, "learning_rate": 2.16370390023879e-05, "loss": 0.0052, "step": 53550 }, { "epoch": 14.21, "learning_rate": 2.161050676572035e-05, "loss": 0.0056, "step": 53600 }, { "epoch": 14.22, "learning_rate": 2.15839745290528e-05, "loss": 0.0057, "step": 53650 }, { "epoch": 14.23, "learning_rate": 2.155744229238525e-05, "loss": 0.0057, "step": 53700 }, { "epoch": 14.25, "learning_rate": 2.15309100557177e-05, "loss": 0.0057, "step": 53750 }, { "epoch": 14.26, "learning_rate": 2.1504377819050147e-05, "loss": 0.0057, "step": 53800 }, { "epoch": 14.27, "learning_rate": 2.1477845582382598e-05, "loss": 0.0056, "step": 53850 }, { "epoch": 14.29, "learning_rate": 2.1451313345715045e-05, "loss": 0.0057, "step": 53900 }, { "epoch": 14.3, "learning_rate": 2.1424781109047493e-05, "loss": 0.0057, "step": 53950 }, { "epoch": 14.31, "learning_rate": 2.1398248872379943e-05, "loss": 0.0057, "step": 54000 }, { "epoch": 14.33, "learning_rate": 2.137171663571239e-05, "loss": 0.0056, "step": 54050 }, { "epoch": 14.34, "learning_rate": 2.134518439904484e-05, "loss": 0.0059, "step": 54100 }, { "epoch": 14.35, "learning_rate": 2.131865216237729e-05, "loss": 0.0059, "step": 54150 }, { "epoch": 14.37, "learning_rate": 2.129211992570974e-05, "loss": 0.0061, "step": 54200 }, { "epoch": 14.38, "learning_rate": 2.1265587689042186e-05, "loss": 0.0056, "step": 54250 }, { "epoch": 14.39, "learning_rate": 2.1239055452374637e-05, "loss": 0.0056, "step": 54300 }, { "epoch": 14.4, "learning_rate": 2.1212523215707088e-05, "loss": 0.0055, "step": 54350 }, { "epoch": 14.42, "learning_rate": 2.118599097903953e-05, "loss": 0.0062, "step": 54400 }, { "epoch": 14.43, "learning_rate": 2.1159458742371982e-05, "loss": 0.0057, "step": 54450 }, { "epoch": 14.44, "learning_rate": 2.1132926505704433e-05, "loss": 0.0068, "step": 54500 }, { "epoch": 14.46, "learning_rate": 2.110639426903688e-05, "loss": 0.0059, "step": 54550 }, { "epoch": 14.47, "learning_rate": 2.107986203236933e-05, "loss": 0.006, "step": 54600 }, { "epoch": 14.48, "learning_rate": 2.1053329795701778e-05, "loss": 0.0058, "step": 54650 }, { "epoch": 14.5, "learning_rate": 2.102679755903423e-05, "loss": 0.006, "step": 54700 }, { "epoch": 14.51, "learning_rate": 2.1000265322366676e-05, "loss": 0.006, "step": 54750 }, { "epoch": 14.52, "learning_rate": 2.0973733085699127e-05, "loss": 0.0056, "step": 54800 }, { "epoch": 14.54, "learning_rate": 2.0947200849031574e-05, "loss": 0.0057, "step": 54850 }, { "epoch": 14.55, "learning_rate": 2.092066861236402e-05, "loss": 0.0061, "step": 54900 }, { "epoch": 14.56, "learning_rate": 2.0894136375696472e-05, "loss": 0.006, "step": 54950 }, { "epoch": 14.58, "learning_rate": 2.086760413902892e-05, "loss": 0.0065, "step": 55000 }, { "epoch": 14.59, "learning_rate": 2.084107190236137e-05, "loss": 0.0057, "step": 55050 }, { "epoch": 14.6, "learning_rate": 2.081453966569382e-05, "loss": 0.0066, "step": 55100 }, { "epoch": 14.62, "learning_rate": 2.0788007429026268e-05, "loss": 0.0057, "step": 55150 }, { "epoch": 14.63, "learning_rate": 2.076147519235872e-05, "loss": 0.006, "step": 55200 }, { "epoch": 14.64, "learning_rate": 2.0734942955691166e-05, "loss": 0.0059, "step": 55250 }, { "epoch": 14.66, "learning_rate": 2.0708410719023616e-05, "loss": 0.006, "step": 55300 }, { "epoch": 14.67, "learning_rate": 2.0681878482356064e-05, "loss": 0.0058, "step": 55350 }, { "epoch": 14.68, "learning_rate": 2.065534624568851e-05, "loss": 0.0057, "step": 55400 }, { "epoch": 14.7, "learning_rate": 2.062881400902096e-05, "loss": 0.0056, "step": 55450 }, { "epoch": 14.71, "learning_rate": 2.060228177235341e-05, "loss": 0.0055, "step": 55500 }, { "epoch": 14.72, "learning_rate": 2.057574953568586e-05, "loss": 0.0062, "step": 55550 }, { "epoch": 14.74, "learning_rate": 2.054921729901831e-05, "loss": 0.0057, "step": 55600 }, { "epoch": 14.75, "learning_rate": 2.0522685062350757e-05, "loss": 0.0064, "step": 55650 }, { "epoch": 14.76, "learning_rate": 2.0496152825683208e-05, "loss": 0.0063, "step": 55700 }, { "epoch": 14.78, "learning_rate": 2.0469620589015655e-05, "loss": 0.0062, "step": 55750 }, { "epoch": 14.79, "learning_rate": 2.0443088352348103e-05, "loss": 0.006, "step": 55800 }, { "epoch": 14.8, "learning_rate": 2.0416556115680553e-05, "loss": 0.0059, "step": 55850 }, { "epoch": 14.82, "learning_rate": 2.0390023879013e-05, "loss": 0.006, "step": 55900 }, { "epoch": 14.83, "learning_rate": 2.036349164234545e-05, "loss": 0.0057, "step": 55950 }, { "epoch": 14.84, "learning_rate": 2.03369594056779e-05, "loss": 0.006, "step": 56000 }, { "epoch": 14.86, "learning_rate": 2.031042716901035e-05, "loss": 0.0062, "step": 56050 }, { "epoch": 14.87, "learning_rate": 2.0283894932342796e-05, "loss": 0.0061, "step": 56100 }, { "epoch": 14.88, "learning_rate": 2.0257362695675247e-05, "loss": 0.0056, "step": 56150 }, { "epoch": 14.9, "learning_rate": 2.0230830459007698e-05, "loss": 0.0059, "step": 56200 }, { "epoch": 14.91, "learning_rate": 2.0204298222340145e-05, "loss": 0.0058, "step": 56250 }, { "epoch": 14.92, "learning_rate": 2.0177765985672592e-05, "loss": 0.0061, "step": 56300 }, { "epoch": 14.94, "learning_rate": 2.015123374900504e-05, "loss": 0.006, "step": 56350 }, { "epoch": 14.95, "learning_rate": 2.012470151233749e-05, "loss": 0.0063, "step": 56400 }, { "epoch": 14.96, "learning_rate": 2.009816927566994e-05, "loss": 0.0062, "step": 56450 }, { "epoch": 14.97, "learning_rate": 2.0071637039002388e-05, "loss": 0.0059, "step": 56500 }, { "epoch": 14.99, "learning_rate": 2.004510480233484e-05, "loss": 0.006, "step": 56550 }, { "epoch": 15.0, "learning_rate": 2.0018572565667286e-05, "loss": 0.0063, "step": 56600 }, { "epoch": 15.01, "learning_rate": 1.9992040328999737e-05, "loss": 0.0059, "step": 56650 }, { "epoch": 15.03, "learning_rate": 1.9965508092332184e-05, "loss": 0.0049, "step": 56700 }, { "epoch": 15.04, "learning_rate": 1.993897585566463e-05, "loss": 0.0053, "step": 56750 }, { "epoch": 15.05, "learning_rate": 1.9912443618997082e-05, "loss": 0.0048, "step": 56800 }, { "epoch": 15.07, "learning_rate": 1.988591138232953e-05, "loss": 0.0053, "step": 56850 }, { "epoch": 15.08, "learning_rate": 1.985937914566198e-05, "loss": 0.0051, "step": 56900 }, { "epoch": 15.09, "learning_rate": 1.983284690899443e-05, "loss": 0.0058, "step": 56950 }, { "epoch": 15.11, "learning_rate": 1.9806314672326878e-05, "loss": 0.0052, "step": 57000 }, { "epoch": 15.12, "learning_rate": 1.977978243565933e-05, "loss": 0.0049, "step": 57050 }, { "epoch": 15.13, "learning_rate": 1.9753250198991776e-05, "loss": 0.005, "step": 57100 }, { "epoch": 15.15, "learning_rate": 1.9726717962324227e-05, "loss": 0.0051, "step": 57150 }, { "epoch": 15.16, "learning_rate": 1.9700185725656674e-05, "loss": 0.0051, "step": 57200 }, { "epoch": 15.17, "learning_rate": 1.967365348898912e-05, "loss": 0.0054, "step": 57250 }, { "epoch": 15.19, "learning_rate": 1.9647121252321572e-05, "loss": 0.0052, "step": 57300 }, { "epoch": 15.2, "learning_rate": 1.962058901565402e-05, "loss": 0.0053, "step": 57350 }, { "epoch": 15.21, "learning_rate": 1.959405677898647e-05, "loss": 0.0054, "step": 57400 }, { "epoch": 15.23, "learning_rate": 1.9567524542318917e-05, "loss": 0.0052, "step": 57450 }, { "epoch": 15.24, "learning_rate": 1.9540992305651368e-05, "loss": 0.0052, "step": 57500 }, { "epoch": 15.25, "learning_rate": 1.9514460068983818e-05, "loss": 0.0057, "step": 57550 }, { "epoch": 15.27, "learning_rate": 1.9487927832316266e-05, "loss": 0.0064, "step": 57600 }, { "epoch": 15.28, "learning_rate": 1.9461395595648716e-05, "loss": 0.0054, "step": 57650 }, { "epoch": 15.29, "learning_rate": 1.9434863358981163e-05, "loss": 0.0051, "step": 57700 }, { "epoch": 15.31, "learning_rate": 1.940833112231361e-05, "loss": 0.0055, "step": 57750 }, { "epoch": 15.32, "learning_rate": 1.938179888564606e-05, "loss": 0.0051, "step": 57800 }, { "epoch": 15.33, "learning_rate": 1.935526664897851e-05, "loss": 0.0052, "step": 57850 }, { "epoch": 15.35, "learning_rate": 1.932873441231096e-05, "loss": 0.0052, "step": 57900 }, { "epoch": 15.36, "learning_rate": 1.9302202175643407e-05, "loss": 0.0056, "step": 57950 }, { "epoch": 15.37, "learning_rate": 1.9275669938975857e-05, "loss": 0.0055, "step": 58000 }, { "epoch": 15.39, "learning_rate": 1.9249137702308308e-05, "loss": 0.0056, "step": 58050 }, { "epoch": 15.4, "learning_rate": 1.9222605465640755e-05, "loss": 0.0058, "step": 58100 }, { "epoch": 15.41, "learning_rate": 1.9196073228973203e-05, "loss": 0.0052, "step": 58150 }, { "epoch": 15.43, "learning_rate": 1.916954099230565e-05, "loss": 0.0053, "step": 58200 }, { "epoch": 15.44, "learning_rate": 1.91430087556381e-05, "loss": 0.0055, "step": 58250 }, { "epoch": 15.45, "learning_rate": 1.911647651897055e-05, "loss": 0.0053, "step": 58300 }, { "epoch": 15.47, "learning_rate": 1.9089944282303e-05, "loss": 0.0053, "step": 58350 }, { "epoch": 15.48, "learning_rate": 1.906341204563545e-05, "loss": 0.0056, "step": 58400 }, { "epoch": 15.49, "learning_rate": 1.9036879808967896e-05, "loss": 0.0057, "step": 58450 }, { "epoch": 15.5, "learning_rate": 1.9010347572300347e-05, "loss": 0.0056, "step": 58500 }, { "epoch": 15.52, "learning_rate": 1.8983815335632794e-05, "loss": 0.0057, "step": 58550 }, { "epoch": 15.53, "learning_rate": 1.8957283098965245e-05, "loss": 0.0058, "step": 58600 }, { "epoch": 15.54, "learning_rate": 1.8930750862297692e-05, "loss": 0.0057, "step": 58650 }, { "epoch": 15.56, "learning_rate": 1.890421862563014e-05, "loss": 0.0055, "step": 58700 }, { "epoch": 15.57, "learning_rate": 1.887768638896259e-05, "loss": 0.0062, "step": 58750 }, { "epoch": 15.58, "learning_rate": 1.885115415229504e-05, "loss": 0.0054, "step": 58800 }, { "epoch": 15.6, "learning_rate": 1.8824621915627488e-05, "loss": 0.0061, "step": 58850 }, { "epoch": 15.61, "learning_rate": 1.879808967895994e-05, "loss": 0.0056, "step": 58900 }, { "epoch": 15.62, "learning_rate": 1.8771557442292386e-05, "loss": 0.0053, "step": 58950 }, { "epoch": 15.64, "learning_rate": 1.8745025205624837e-05, "loss": 0.0057, "step": 59000 }, { "epoch": 15.65, "learning_rate": 1.8718492968957284e-05, "loss": 0.0054, "step": 59050 }, { "epoch": 15.66, "learning_rate": 1.869196073228973e-05, "loss": 0.0059, "step": 59100 }, { "epoch": 15.68, "learning_rate": 1.8665428495622182e-05, "loss": 0.0059, "step": 59150 }, { "epoch": 15.69, "learning_rate": 1.863889625895463e-05, "loss": 0.0057, "step": 59200 }, { "epoch": 15.7, "learning_rate": 1.861236402228708e-05, "loss": 0.0058, "step": 59250 }, { "epoch": 15.72, "learning_rate": 1.8585831785619527e-05, "loss": 0.0054, "step": 59300 }, { "epoch": 15.73, "learning_rate": 1.8559299548951978e-05, "loss": 0.0056, "step": 59350 }, { "epoch": 15.74, "learning_rate": 1.853276731228443e-05, "loss": 0.0056, "step": 59400 }, { "epoch": 15.76, "learning_rate": 1.8506235075616876e-05, "loss": 0.0058, "step": 59450 }, { "epoch": 15.77, "learning_rate": 1.8479702838949326e-05, "loss": 0.0056, "step": 59500 }, { "epoch": 15.78, "learning_rate": 1.845317060228177e-05, "loss": 0.0055, "step": 59550 }, { "epoch": 15.8, "learning_rate": 1.842663836561422e-05, "loss": 0.0057, "step": 59600 }, { "epoch": 15.81, "learning_rate": 1.840010612894667e-05, "loss": 0.0054, "step": 59650 }, { "epoch": 15.82, "learning_rate": 1.837357389227912e-05, "loss": 0.0055, "step": 59700 }, { "epoch": 15.84, "learning_rate": 1.834704165561157e-05, "loss": 0.0054, "step": 59750 }, { "epoch": 15.85, "learning_rate": 1.8320509418944017e-05, "loss": 0.0051, "step": 59800 }, { "epoch": 15.86, "learning_rate": 1.8293977182276467e-05, "loss": 0.0059, "step": 59850 }, { "epoch": 15.88, "learning_rate": 1.8267444945608915e-05, "loss": 0.0061, "step": 59900 }, { "epoch": 15.89, "learning_rate": 1.8240912708941365e-05, "loss": 0.0055, "step": 59950 }, { "epoch": 15.9, "learning_rate": 1.8214380472273816e-05, "loss": 0.0057, "step": 60000 }, { "epoch": 15.92, "learning_rate": 1.818784823560626e-05, "loss": 0.0056, "step": 60050 }, { "epoch": 15.93, "learning_rate": 1.816131599893871e-05, "loss": 0.0061, "step": 60100 }, { "epoch": 15.94, "learning_rate": 1.813478376227116e-05, "loss": 0.006, "step": 60150 }, { "epoch": 15.96, "learning_rate": 1.810825152560361e-05, "loss": 0.0058, "step": 60200 }, { "epoch": 15.97, "learning_rate": 1.808171928893606e-05, "loss": 0.0059, "step": 60250 }, { "epoch": 15.98, "learning_rate": 1.8055187052268506e-05, "loss": 0.0054, "step": 60300 }, { "epoch": 16.0, "learning_rate": 1.8028654815600957e-05, "loss": 0.0059, "step": 60350 }, { "epoch": 16.01, "learning_rate": 1.8002122578933404e-05, "loss": 0.0054, "step": 60400 }, { "epoch": 16.02, "learning_rate": 1.7975590342265855e-05, "loss": 0.0048, "step": 60450 }, { "epoch": 16.03, "learning_rate": 1.7949058105598302e-05, "loss": 0.0051, "step": 60500 }, { "epoch": 16.05, "learning_rate": 1.792252586893075e-05, "loss": 0.0051, "step": 60550 }, { "epoch": 16.06, "learning_rate": 1.78959936322632e-05, "loss": 0.0052, "step": 60600 }, { "epoch": 16.07, "learning_rate": 1.7869461395595648e-05, "loss": 0.0049, "step": 60650 }, { "epoch": 16.09, "learning_rate": 1.7842929158928098e-05, "loss": 0.0051, "step": 60700 }, { "epoch": 16.1, "learning_rate": 1.781639692226055e-05, "loss": 0.0049, "step": 60750 }, { "epoch": 16.11, "learning_rate": 1.7789864685592996e-05, "loss": 0.0053, "step": 60800 }, { "epoch": 16.13, "learning_rate": 1.7763332448925447e-05, "loss": 0.0058, "step": 60850 }, { "epoch": 16.14, "learning_rate": 1.7736800212257894e-05, "loss": 0.0051, "step": 60900 }, { "epoch": 16.15, "learning_rate": 1.7710267975590345e-05, "loss": 0.0049, "step": 60950 }, { "epoch": 16.17, "learning_rate": 1.7683735738922792e-05, "loss": 0.0048, "step": 61000 }, { "epoch": 16.18, "learning_rate": 1.765720350225524e-05, "loss": 0.0049, "step": 61050 }, { "epoch": 16.19, "learning_rate": 1.763067126558769e-05, "loss": 0.005, "step": 61100 }, { "epoch": 16.21, "learning_rate": 1.7604139028920137e-05, "loss": 0.0051, "step": 61150 }, { "epoch": 16.22, "learning_rate": 1.7577606792252588e-05, "loss": 0.005, "step": 61200 }, { "epoch": 16.23, "learning_rate": 1.755107455558504e-05, "loss": 0.005, "step": 61250 }, { "epoch": 16.25, "learning_rate": 1.7524542318917486e-05, "loss": 0.0052, "step": 61300 }, { "epoch": 16.26, "learning_rate": 1.7498010082249937e-05, "loss": 0.005, "step": 61350 }, { "epoch": 16.27, "learning_rate": 1.7471477845582384e-05, "loss": 0.0055, "step": 61400 }, { "epoch": 16.29, "learning_rate": 1.744494560891483e-05, "loss": 0.0051, "step": 61450 }, { "epoch": 16.3, "learning_rate": 1.7418413372247282e-05, "loss": 0.0057, "step": 61500 }, { "epoch": 16.31, "learning_rate": 1.739188113557973e-05, "loss": 0.0053, "step": 61550 }, { "epoch": 16.33, "learning_rate": 1.736534889891218e-05, "loss": 0.0053, "step": 61600 }, { "epoch": 16.34, "learning_rate": 1.7338816662244627e-05, "loss": 0.0052, "step": 61650 }, { "epoch": 16.35, "learning_rate": 1.7312284425577078e-05, "loss": 0.0052, "step": 61700 }, { "epoch": 16.37, "learning_rate": 1.7285752188909525e-05, "loss": 0.005, "step": 61750 }, { "epoch": 16.38, "learning_rate": 1.7259219952241976e-05, "loss": 0.0052, "step": 61800 }, { "epoch": 16.39, "learning_rate": 1.7232687715574426e-05, "loss": 0.0054, "step": 61850 }, { "epoch": 16.41, "learning_rate": 1.720615547890687e-05, "loss": 0.0061, "step": 61900 }, { "epoch": 16.42, "learning_rate": 1.717962324223932e-05, "loss": 0.0051, "step": 61950 }, { "epoch": 16.43, "learning_rate": 1.715309100557177e-05, "loss": 0.0056, "step": 62000 }, { "epoch": 16.45, "learning_rate": 1.712655876890422e-05, "loss": 0.0051, "step": 62050 }, { "epoch": 16.46, "learning_rate": 1.710002653223667e-05, "loss": 0.0055, "step": 62100 }, { "epoch": 16.47, "learning_rate": 1.7073494295569117e-05, "loss": 0.0052, "step": 62150 }, { "epoch": 16.49, "learning_rate": 1.7046962058901567e-05, "loss": 0.0053, "step": 62200 }, { "epoch": 16.5, "learning_rate": 1.7020429822234015e-05, "loss": 0.0052, "step": 62250 }, { "epoch": 16.51, "learning_rate": 1.6993897585566465e-05, "loss": 0.0051, "step": 62300 }, { "epoch": 16.53, "learning_rate": 1.6967365348898916e-05, "loss": 0.0057, "step": 62350 }, { "epoch": 16.54, "learning_rate": 1.694083311223136e-05, "loss": 0.0055, "step": 62400 }, { "epoch": 16.55, "learning_rate": 1.691430087556381e-05, "loss": 0.0051, "step": 62450 }, { "epoch": 16.57, "learning_rate": 1.6887768638896258e-05, "loss": 0.0054, "step": 62500 }, { "epoch": 16.58, "learning_rate": 1.686123640222871e-05, "loss": 0.0052, "step": 62550 }, { "epoch": 16.59, "learning_rate": 1.683470416556116e-05, "loss": 0.0053, "step": 62600 }, { "epoch": 16.6, "learning_rate": 1.6808171928893606e-05, "loss": 0.0055, "step": 62650 }, { "epoch": 16.62, "learning_rate": 1.6781639692226057e-05, "loss": 0.0054, "step": 62700 }, { "epoch": 16.63, "learning_rate": 1.6755107455558504e-05, "loss": 0.0056, "step": 62750 }, { "epoch": 16.64, "learning_rate": 1.6728575218890955e-05, "loss": 0.0054, "step": 62800 }, { "epoch": 16.66, "learning_rate": 1.6702042982223402e-05, "loss": 0.0053, "step": 62850 }, { "epoch": 16.67, "learning_rate": 1.667551074555585e-05, "loss": 0.006, "step": 62900 }, { "epoch": 16.68, "learning_rate": 1.66489785088883e-05, "loss": 0.0055, "step": 62950 }, { "epoch": 16.7, "learning_rate": 1.6622446272220747e-05, "loss": 0.0053, "step": 63000 }, { "epoch": 16.71, "learning_rate": 1.6595914035553198e-05, "loss": 0.0053, "step": 63050 }, { "epoch": 16.72, "learning_rate": 1.6569381798885645e-05, "loss": 0.0055, "step": 63100 }, { "epoch": 16.74, "learning_rate": 1.6542849562218096e-05, "loss": 0.0055, "step": 63150 }, { "epoch": 16.75, "learning_rate": 1.6516317325550547e-05, "loss": 0.0053, "step": 63200 }, { "epoch": 16.76, "learning_rate": 1.6489785088882994e-05, "loss": 0.0054, "step": 63250 }, { "epoch": 16.78, "learning_rate": 1.6463252852215445e-05, "loss": 0.0054, "step": 63300 }, { "epoch": 16.79, "learning_rate": 1.6436720615547892e-05, "loss": 0.0055, "step": 63350 }, { "epoch": 16.8, "learning_rate": 1.641018837888034e-05, "loss": 0.0052, "step": 63400 }, { "epoch": 16.82, "learning_rate": 1.638365614221279e-05, "loss": 0.0055, "step": 63450 }, { "epoch": 16.83, "learning_rate": 1.6357123905545237e-05, "loss": 0.0051, "step": 63500 }, { "epoch": 16.84, "learning_rate": 1.6330591668877688e-05, "loss": 0.0053, "step": 63550 }, { "epoch": 16.86, "learning_rate": 1.6304059432210135e-05, "loss": 0.0054, "step": 63600 }, { "epoch": 16.87, "learning_rate": 1.6277527195542586e-05, "loss": 0.0051, "step": 63650 }, { "epoch": 16.88, "learning_rate": 1.6250994958875036e-05, "loss": 0.005, "step": 63700 }, { "epoch": 16.9, "learning_rate": 1.6224462722207484e-05, "loss": 0.0053, "step": 63750 }, { "epoch": 16.91, "learning_rate": 1.619793048553993e-05, "loss": 0.0055, "step": 63800 }, { "epoch": 16.92, "learning_rate": 1.6171398248872378e-05, "loss": 0.0054, "step": 63850 }, { "epoch": 16.94, "learning_rate": 1.614486601220483e-05, "loss": 0.0054, "step": 63900 }, { "epoch": 16.95, "learning_rate": 1.611833377553728e-05, "loss": 0.0055, "step": 63950 }, { "epoch": 16.96, "learning_rate": 1.6091801538869727e-05, "loss": 0.0052, "step": 64000 }, { "epoch": 16.98, "learning_rate": 1.6065269302202177e-05, "loss": 0.0055, "step": 64050 }, { "epoch": 16.99, "learning_rate": 1.6038737065534625e-05, "loss": 0.0056, "step": 64100 }, { "epoch": 17.0, "learning_rate": 1.6012204828867075e-05, "loss": 0.0052, "step": 64150 }, { "epoch": 17.02, "learning_rate": 1.5985672592199523e-05, "loss": 0.0048, "step": 64200 }, { "epoch": 17.03, "learning_rate": 1.595914035553197e-05, "loss": 0.0045, "step": 64250 }, { "epoch": 17.04, "learning_rate": 1.593260811886442e-05, "loss": 0.0047, "step": 64300 }, { "epoch": 17.06, "learning_rate": 1.5906075882196868e-05, "loss": 0.0049, "step": 64350 }, { "epoch": 17.07, "learning_rate": 1.587954364552932e-05, "loss": 0.0046, "step": 64400 }, { "epoch": 17.08, "learning_rate": 1.585301140886177e-05, "loss": 0.0053, "step": 64450 }, { "epoch": 17.1, "learning_rate": 1.5826479172194216e-05, "loss": 0.0048, "step": 64500 }, { "epoch": 17.11, "learning_rate": 1.5799946935526667e-05, "loss": 0.005, "step": 64550 }, { "epoch": 17.12, "learning_rate": 1.5773414698859114e-05, "loss": 0.0047, "step": 64600 }, { "epoch": 17.13, "learning_rate": 1.5746882462191565e-05, "loss": 0.0047, "step": 64650 }, { "epoch": 17.15, "learning_rate": 1.5720350225524012e-05, "loss": 0.0049, "step": 64700 }, { "epoch": 17.16, "learning_rate": 1.569381798885646e-05, "loss": 0.0049, "step": 64750 }, { "epoch": 17.17, "learning_rate": 1.566728575218891e-05, "loss": 0.0052, "step": 64800 }, { "epoch": 17.19, "learning_rate": 1.5640753515521358e-05, "loss": 0.0049, "step": 64850 }, { "epoch": 17.2, "learning_rate": 1.5614221278853808e-05, "loss": 0.0052, "step": 64900 }, { "epoch": 17.21, "learning_rate": 1.5587689042186255e-05, "loss": 0.0049, "step": 64950 }, { "epoch": 17.23, "learning_rate": 1.5561156805518706e-05, "loss": 0.0047, "step": 65000 }, { "epoch": 17.24, "learning_rate": 1.5534624568851157e-05, "loss": 0.005, "step": 65050 }, { "epoch": 17.25, "learning_rate": 1.5508092332183604e-05, "loss": 0.0052, "step": 65100 }, { "epoch": 17.27, "learning_rate": 1.5481560095516055e-05, "loss": 0.0052, "step": 65150 }, { "epoch": 17.28, "learning_rate": 1.5455027858848502e-05, "loss": 0.0051, "step": 65200 }, { "epoch": 17.29, "learning_rate": 1.542849562218095e-05, "loss": 0.005, "step": 65250 }, { "epoch": 17.31, "learning_rate": 1.54019633855134e-05, "loss": 0.005, "step": 65300 }, { "epoch": 17.32, "learning_rate": 1.5375431148845847e-05, "loss": 0.0053, "step": 65350 }, { "epoch": 17.33, "learning_rate": 1.5348898912178298e-05, "loss": 0.005, "step": 65400 }, { "epoch": 17.35, "learning_rate": 1.5322366675510745e-05, "loss": 0.0053, "step": 65450 }, { "epoch": 17.36, "learning_rate": 1.5295834438843196e-05, "loss": 0.0052, "step": 65500 }, { "epoch": 17.37, "learning_rate": 1.5269302202175646e-05, "loss": 0.0055, "step": 65550 }, { "epoch": 17.39, "learning_rate": 1.5242769965508094e-05, "loss": 0.0054, "step": 65600 }, { "epoch": 17.4, "learning_rate": 1.5216237728840543e-05, "loss": 0.0054, "step": 65650 }, { "epoch": 17.41, "learning_rate": 1.518970549217299e-05, "loss": 0.0052, "step": 65700 }, { "epoch": 17.43, "learning_rate": 1.5163173255505439e-05, "loss": 0.0053, "step": 65750 }, { "epoch": 17.44, "learning_rate": 1.5136641018837888e-05, "loss": 0.0051, "step": 65800 }, { "epoch": 17.45, "learning_rate": 1.5110108782170337e-05, "loss": 0.0053, "step": 65850 }, { "epoch": 17.47, "learning_rate": 1.5083576545502786e-05, "loss": 0.005, "step": 65900 }, { "epoch": 17.48, "learning_rate": 1.5057044308835237e-05, "loss": 0.0048, "step": 65950 }, { "epoch": 17.49, "learning_rate": 1.5030512072167686e-05, "loss": 0.005, "step": 66000 }, { "epoch": 17.51, "learning_rate": 1.5003979835500134e-05, "loss": 0.0054, "step": 66050 }, { "epoch": 17.52, "learning_rate": 1.4977447598832583e-05, "loss": 0.0052, "step": 66100 }, { "epoch": 17.53, "learning_rate": 1.495091536216503e-05, "loss": 0.0057, "step": 66150 }, { "epoch": 17.55, "learning_rate": 1.492438312549748e-05, "loss": 0.0051, "step": 66200 }, { "epoch": 17.56, "learning_rate": 1.4897850888829929e-05, "loss": 0.0054, "step": 66250 }, { "epoch": 17.57, "learning_rate": 1.4871318652162378e-05, "loss": 0.0057, "step": 66300 }, { "epoch": 17.59, "learning_rate": 1.4844786415494827e-05, "loss": 0.0053, "step": 66350 }, { "epoch": 17.6, "learning_rate": 1.4818254178827276e-05, "loss": 0.0049, "step": 66400 }, { "epoch": 17.61, "learning_rate": 1.4791721942159725e-05, "loss": 0.0051, "step": 66450 }, { "epoch": 17.63, "learning_rate": 1.4765189705492175e-05, "loss": 0.0054, "step": 66500 }, { "epoch": 17.64, "learning_rate": 1.4738657468824624e-05, "loss": 0.0049, "step": 66550 }, { "epoch": 17.65, "learning_rate": 1.471212523215707e-05, "loss": 0.0051, "step": 66600 }, { "epoch": 17.66, "learning_rate": 1.4685592995489519e-05, "loss": 0.0053, "step": 66650 }, { "epoch": 17.68, "learning_rate": 1.465906075882197e-05, "loss": 0.0051, "step": 66700 }, { "epoch": 17.69, "learning_rate": 1.4632528522154418e-05, "loss": 0.005, "step": 66750 }, { "epoch": 17.7, "learning_rate": 1.4605996285486867e-05, "loss": 0.0054, "step": 66800 }, { "epoch": 17.72, "learning_rate": 1.4579464048819316e-05, "loss": 0.005, "step": 66850 }, { "epoch": 17.73, "learning_rate": 1.4552931812151765e-05, "loss": 0.0056, "step": 66900 }, { "epoch": 17.74, "learning_rate": 1.4526399575484214e-05, "loss": 0.0051, "step": 66950 }, { "epoch": 17.76, "learning_rate": 1.4499867338816663e-05, "loss": 0.0053, "step": 67000 }, { "epoch": 17.77, "learning_rate": 1.4473335102149114e-05, "loss": 0.0052, "step": 67050 }, { "epoch": 17.78, "learning_rate": 1.444680286548156e-05, "loss": 0.0055, "step": 67100 }, { "epoch": 17.8, "learning_rate": 1.4420270628814008e-05, "loss": 0.0051, "step": 67150 }, { "epoch": 17.81, "learning_rate": 1.4393738392146457e-05, "loss": 0.0056, "step": 67200 }, { "epoch": 17.82, "learning_rate": 1.4367206155478908e-05, "loss": 0.006, "step": 67250 }, { "epoch": 17.84, "learning_rate": 1.4340673918811357e-05, "loss": 0.0056, "step": 67300 }, { "epoch": 17.85, "learning_rate": 1.4314141682143806e-05, "loss": 0.0054, "step": 67350 }, { "epoch": 17.86, "learning_rate": 1.4287609445476255e-05, "loss": 0.0052, "step": 67400 }, { "epoch": 17.88, "learning_rate": 1.4261077208808704e-05, "loss": 0.006, "step": 67450 }, { "epoch": 17.89, "learning_rate": 1.4234544972141153e-05, "loss": 0.0052, "step": 67500 }, { "epoch": 17.9, "learning_rate": 1.42080127354736e-05, "loss": 0.0054, "step": 67550 }, { "epoch": 17.92, "learning_rate": 1.4181480498806049e-05, "loss": 0.0052, "step": 67600 }, { "epoch": 17.93, "learning_rate": 1.4154948262138498e-05, "loss": 0.0052, "step": 67650 }, { "epoch": 17.94, "learning_rate": 1.4128416025470947e-05, "loss": 0.0053, "step": 67700 }, { "epoch": 17.96, "learning_rate": 1.4101883788803396e-05, "loss": 0.0052, "step": 67750 }, { "epoch": 17.97, "learning_rate": 1.4075351552135847e-05, "loss": 0.0052, "step": 67800 }, { "epoch": 17.98, "learning_rate": 1.4048819315468296e-05, "loss": 0.0052, "step": 67850 }, { "epoch": 18.0, "learning_rate": 1.4022287078800745e-05, "loss": 0.005, "step": 67900 }, { "epoch": 18.01, "learning_rate": 1.3995754842133194e-05, "loss": 0.005, "step": 67950 }, { "epoch": 18.02, "learning_rate": 1.3969222605465643e-05, "loss": 0.0045, "step": 68000 }, { "epoch": 18.04, "learning_rate": 1.394269036879809e-05, "loss": 0.0045, "step": 68050 }, { "epoch": 18.05, "learning_rate": 1.3916158132130539e-05, "loss": 0.0049, "step": 68100 }, { "epoch": 18.06, "learning_rate": 1.3889625895462988e-05, "loss": 0.0047, "step": 68150 }, { "epoch": 18.08, "learning_rate": 1.3863093658795437e-05, "loss": 0.0052, "step": 68200 }, { "epoch": 18.09, "learning_rate": 1.3836561422127886e-05, "loss": 0.005, "step": 68250 }, { "epoch": 18.1, "learning_rate": 1.3810029185460335e-05, "loss": 0.0049, "step": 68300 }, { "epoch": 18.12, "learning_rate": 1.3783496948792785e-05, "loss": 0.0048, "step": 68350 }, { "epoch": 18.13, "learning_rate": 1.3756964712125234e-05, "loss": 0.0049, "step": 68400 }, { "epoch": 18.14, "learning_rate": 1.3730432475457683e-05, "loss": 0.0047, "step": 68450 }, { "epoch": 18.16, "learning_rate": 1.3703900238790129e-05, "loss": 0.005, "step": 68500 }, { "epoch": 18.17, "learning_rate": 1.3677368002122578e-05, "loss": 0.0047, "step": 68550 }, { "epoch": 18.18, "learning_rate": 1.3650835765455028e-05, "loss": 0.0047, "step": 68600 }, { "epoch": 18.2, "learning_rate": 1.3624303528787477e-05, "loss": 0.005, "step": 68650 }, { "epoch": 18.21, "learning_rate": 1.3597771292119926e-05, "loss": 0.0048, "step": 68700 }, { "epoch": 18.22, "learning_rate": 1.3571239055452375e-05, "loss": 0.0051, "step": 68750 }, { "epoch": 18.23, "learning_rate": 1.3544706818784824e-05, "loss": 0.0048, "step": 68800 }, { "epoch": 18.25, "learning_rate": 1.3518174582117273e-05, "loss": 0.0048, "step": 68850 }, { "epoch": 18.26, "learning_rate": 1.3491642345449724e-05, "loss": 0.0047, "step": 68900 }, { "epoch": 18.27, "learning_rate": 1.346511010878217e-05, "loss": 0.0049, "step": 68950 }, { "epoch": 18.29, "learning_rate": 1.3438577872114619e-05, "loss": 0.005, "step": 69000 }, { "epoch": 18.3, "learning_rate": 1.3412045635447068e-05, "loss": 0.0049, "step": 69050 }, { "epoch": 18.31, "learning_rate": 1.3385513398779516e-05, "loss": 0.0049, "step": 69100 }, { "epoch": 18.33, "learning_rate": 1.3358981162111967e-05, "loss": 0.0049, "step": 69150 }, { "epoch": 18.34, "learning_rate": 1.3332448925444416e-05, "loss": 0.0049, "step": 69200 }, { "epoch": 18.35, "learning_rate": 1.3305916688776865e-05, "loss": 0.0048, "step": 69250 }, { "epoch": 18.37, "learning_rate": 1.3279384452109314e-05, "loss": 0.0051, "step": 69300 }, { "epoch": 18.38, "learning_rate": 1.3252852215441763e-05, "loss": 0.0048, "step": 69350 }, { "epoch": 18.39, "learning_rate": 1.3226319978774212e-05, "loss": 0.0049, "step": 69400 }, { "epoch": 18.41, "learning_rate": 1.319978774210666e-05, "loss": 0.0047, "step": 69450 }, { "epoch": 18.42, "learning_rate": 1.3173255505439108e-05, "loss": 0.0053, "step": 69500 }, { "epoch": 18.43, "learning_rate": 1.3146723268771557e-05, "loss": 0.0051, "step": 69550 }, { "epoch": 18.45, "learning_rate": 1.3120191032104006e-05, "loss": 0.0051, "step": 69600 }, { "epoch": 18.46, "learning_rate": 1.3093658795436455e-05, "loss": 0.0052, "step": 69650 }, { "epoch": 18.47, "learning_rate": 1.3067126558768906e-05, "loss": 0.005, "step": 69700 }, { "epoch": 18.49, "learning_rate": 1.3040594322101355e-05, "loss": 0.0049, "step": 69750 }, { "epoch": 18.5, "learning_rate": 1.3014062085433804e-05, "loss": 0.0053, "step": 69800 }, { "epoch": 18.51, "learning_rate": 1.2987529848766253e-05, "loss": 0.0051, "step": 69850 }, { "epoch": 18.53, "learning_rate": 1.29609976120987e-05, "loss": 0.0048, "step": 69900 }, { "epoch": 18.54, "learning_rate": 1.2934465375431149e-05, "loss": 0.0051, "step": 69950 }, { "epoch": 18.55, "learning_rate": 1.2907933138763598e-05, "loss": 0.0049, "step": 70000 }, { "epoch": 18.57, "learning_rate": 1.2881400902096047e-05, "loss": 0.0049, "step": 70050 }, { "epoch": 18.58, "learning_rate": 1.2854868665428496e-05, "loss": 0.0051, "step": 70100 }, { "epoch": 18.59, "learning_rate": 1.2828336428760945e-05, "loss": 0.0049, "step": 70150 }, { "epoch": 18.61, "learning_rate": 1.2801804192093394e-05, "loss": 0.0049, "step": 70200 }, { "epoch": 18.62, "learning_rate": 1.2775271955425844e-05, "loss": 0.0049, "step": 70250 }, { "epoch": 18.63, "learning_rate": 1.2748739718758293e-05, "loss": 0.0051, "step": 70300 }, { "epoch": 18.65, "learning_rate": 1.2722207482090742e-05, "loss": 0.0049, "step": 70350 }, { "epoch": 18.66, "learning_rate": 1.2695675245423188e-05, "loss": 0.005, "step": 70400 }, { "epoch": 18.67, "learning_rate": 1.2669143008755639e-05, "loss": 0.0052, "step": 70450 }, { "epoch": 18.69, "learning_rate": 1.2642610772088088e-05, "loss": 0.0051, "step": 70500 }, { "epoch": 18.7, "learning_rate": 1.2616078535420537e-05, "loss": 0.005, "step": 70550 }, { "epoch": 18.71, "learning_rate": 1.2589546298752986e-05, "loss": 0.0051, "step": 70600 }, { "epoch": 18.73, "learning_rate": 1.2563014062085435e-05, "loss": 0.0049, "step": 70650 }, { "epoch": 18.74, "learning_rate": 1.2536481825417883e-05, "loss": 0.005, "step": 70700 }, { "epoch": 18.75, "learning_rate": 1.2509949588750332e-05, "loss": 0.0049, "step": 70750 }, { "epoch": 18.76, "learning_rate": 1.2483417352082781e-05, "loss": 0.0051, "step": 70800 }, { "epoch": 18.78, "learning_rate": 1.245688511541523e-05, "loss": 0.0049, "step": 70850 }, { "epoch": 18.79, "learning_rate": 1.243035287874768e-05, "loss": 0.005, "step": 70900 }, { "epoch": 18.8, "learning_rate": 1.2403820642080127e-05, "loss": 0.0049, "step": 70950 }, { "epoch": 18.82, "learning_rate": 1.2377288405412577e-05, "loss": 0.0049, "step": 71000 }, { "epoch": 18.83, "learning_rate": 1.2350756168745026e-05, "loss": 0.0052, "step": 71050 }, { "epoch": 18.84, "learning_rate": 1.2324223932077475e-05, "loss": 0.0049, "step": 71100 }, { "epoch": 18.86, "learning_rate": 1.2297691695409924e-05, "loss": 0.0052, "step": 71150 }, { "epoch": 18.87, "learning_rate": 1.2271159458742371e-05, "loss": 0.0051, "step": 71200 }, { "epoch": 18.88, "learning_rate": 1.224462722207482e-05, "loss": 0.005, "step": 71250 }, { "epoch": 18.9, "learning_rate": 1.2218094985407271e-05, "loss": 0.0049, "step": 71300 }, { "epoch": 18.91, "learning_rate": 1.219156274873972e-05, "loss": 0.0052, "step": 71350 }, { "epoch": 18.92, "learning_rate": 1.2165030512072169e-05, "loss": 0.0051, "step": 71400 }, { "epoch": 18.94, "learning_rate": 1.2138498275404616e-05, "loss": 0.0051, "step": 71450 }, { "epoch": 18.95, "learning_rate": 1.2111966038737065e-05, "loss": 0.0051, "step": 71500 }, { "epoch": 18.96, "learning_rate": 1.2085433802069516e-05, "loss": 0.0051, "step": 71550 }, { "epoch": 18.98, "learning_rate": 1.2058901565401965e-05, "loss": 0.0049, "step": 71600 }, { "epoch": 18.99, "learning_rate": 1.2032369328734412e-05, "loss": 0.0049, "step": 71650 }, { "epoch": 19.0, "learning_rate": 1.2005837092066861e-05, "loss": 0.0049, "step": 71700 }, { "epoch": 19.02, "learning_rate": 1.197930485539931e-05, "loss": 0.0045, "step": 71750 }, { "epoch": 19.03, "learning_rate": 1.1952772618731759e-05, "loss": 0.0044, "step": 71800 }, { "epoch": 19.04, "learning_rate": 1.192624038206421e-05, "loss": 0.0044, "step": 71850 }, { "epoch": 19.06, "learning_rate": 1.1899708145396657e-05, "loss": 0.0044, "step": 71900 }, { "epoch": 19.07, "learning_rate": 1.1873175908729106e-05, "loss": 0.0048, "step": 71950 }, { "epoch": 19.08, "learning_rate": 1.1846643672061555e-05, "loss": 0.0046, "step": 72000 }, { "epoch": 19.1, "learning_rate": 1.1820111435394004e-05, "loss": 0.0047, "step": 72050 }, { "epoch": 19.11, "learning_rate": 1.1793579198726455e-05, "loss": 0.0046, "step": 72100 }, { "epoch": 19.12, "learning_rate": 1.1767046962058902e-05, "loss": 0.0048, "step": 72150 }, { "epoch": 19.14, "learning_rate": 1.174051472539135e-05, "loss": 0.005, "step": 72200 }, { "epoch": 19.15, "learning_rate": 1.17139824887238e-05, "loss": 0.0051, "step": 72250 }, { "epoch": 19.16, "learning_rate": 1.1687450252056249e-05, "loss": 0.0047, "step": 72300 }, { "epoch": 19.18, "learning_rate": 1.1660918015388698e-05, "loss": 0.0048, "step": 72350 }, { "epoch": 19.19, "learning_rate": 1.1634385778721147e-05, "loss": 0.0046, "step": 72400 }, { "epoch": 19.2, "learning_rate": 1.1607853542053596e-05, "loss": 0.0047, "step": 72450 }, { "epoch": 19.22, "learning_rate": 1.1581321305386045e-05, "loss": 0.0045, "step": 72500 }, { "epoch": 19.23, "learning_rate": 1.1554789068718494e-05, "loss": 0.0047, "step": 72550 }, { "epoch": 19.24, "learning_rate": 1.1528256832050943e-05, "loss": 0.0047, "step": 72600 }, { "epoch": 19.26, "learning_rate": 1.1501724595383392e-05, "loss": 0.0048, "step": 72650 }, { "epoch": 19.27, "learning_rate": 1.147519235871584e-05, "loss": 0.0047, "step": 72700 }, { "epoch": 19.28, "learning_rate": 1.144866012204829e-05, "loss": 0.0048, "step": 72750 }, { "epoch": 19.29, "learning_rate": 1.1422127885380738e-05, "loss": 0.0047, "step": 72800 }, { "epoch": 19.31, "learning_rate": 1.1395595648713186e-05, "loss": 0.0048, "step": 72850 }, { "epoch": 19.32, "learning_rate": 1.1369063412045636e-05, "loss": 0.0047, "step": 72900 }, { "epoch": 19.33, "learning_rate": 1.1342531175378085e-05, "loss": 0.0049, "step": 72950 }, { "epoch": 19.35, "learning_rate": 1.1315998938710534e-05, "loss": 0.0049, "step": 73000 }, { "epoch": 19.36, "learning_rate": 1.1289466702042983e-05, "loss": 0.0047, "step": 73050 }, { "epoch": 19.37, "learning_rate": 1.126293446537543e-05, "loss": 0.0051, "step": 73100 }, { "epoch": 19.39, "learning_rate": 1.1236402228707881e-05, "loss": 0.0049, "step": 73150 }, { "epoch": 19.4, "learning_rate": 1.120986999204033e-05, "loss": 0.005, "step": 73200 }, { "epoch": 19.41, "learning_rate": 1.118333775537278e-05, "loss": 0.0049, "step": 73250 }, { "epoch": 19.43, "learning_rate": 1.1156805518705226e-05, "loss": 0.0049, "step": 73300 }, { "epoch": 19.44, "learning_rate": 1.1130273282037675e-05, "loss": 0.0049, "step": 73350 }, { "epoch": 19.45, "learning_rate": 1.1103741045370124e-05, "loss": 0.0048, "step": 73400 }, { "epoch": 19.47, "learning_rate": 1.1077208808702575e-05, "loss": 0.0048, "step": 73450 }, { "epoch": 19.48, "learning_rate": 1.1050676572035024e-05, "loss": 0.0048, "step": 73500 }, { "epoch": 19.49, "learning_rate": 1.1024144335367471e-05, "loss": 0.0047, "step": 73550 }, { "epoch": 19.51, "learning_rate": 1.099761209869992e-05, "loss": 0.0048, "step": 73600 }, { "epoch": 19.52, "learning_rate": 1.097107986203237e-05, "loss": 0.0049, "step": 73650 }, { "epoch": 19.53, "learning_rate": 1.094454762536482e-05, "loss": 0.0049, "step": 73700 }, { "epoch": 19.55, "learning_rate": 1.0918015388697269e-05, "loss": 0.0049, "step": 73750 }, { "epoch": 19.56, "learning_rate": 1.0891483152029716e-05, "loss": 0.0049, "step": 73800 }, { "epoch": 19.57, "learning_rate": 1.0864950915362165e-05, "loss": 0.0049, "step": 73850 }, { "epoch": 19.59, "learning_rate": 1.0838418678694614e-05, "loss": 0.0049, "step": 73900 }, { "epoch": 19.6, "learning_rate": 1.0811886442027063e-05, "loss": 0.0048, "step": 73950 }, { "epoch": 19.61, "learning_rate": 1.0785354205359512e-05, "loss": 0.0051, "step": 74000 }, { "epoch": 19.63, "learning_rate": 1.0758821968691961e-05, "loss": 0.0048, "step": 74050 }, { "epoch": 19.64, "learning_rate": 1.073228973202441e-05, "loss": 0.0049, "step": 74100 }, { "epoch": 19.65, "learning_rate": 1.0705757495356859e-05, "loss": 0.0049, "step": 74150 }, { "epoch": 19.67, "learning_rate": 1.0679225258689308e-05, "loss": 0.0049, "step": 74200 }, { "epoch": 19.68, "learning_rate": 1.0652693022021757e-05, "loss": 0.0048, "step": 74250 }, { "epoch": 19.69, "learning_rate": 1.0626160785354206e-05, "loss": 0.0049, "step": 74300 }, { "epoch": 19.71, "learning_rate": 1.0599628548686655e-05, "loss": 0.0052, "step": 74350 }, { "epoch": 19.72, "learning_rate": 1.0573096312019104e-05, "loss": 0.005, "step": 74400 }, { "epoch": 19.73, "learning_rate": 1.0546564075351553e-05, "loss": 0.005, "step": 74450 }, { "epoch": 19.75, "learning_rate": 1.0520031838684002e-05, "loss": 0.0048, "step": 74500 }, { "epoch": 19.76, "learning_rate": 1.049349960201645e-05, "loss": 0.0049, "step": 74550 }, { "epoch": 19.77, "learning_rate": 1.04669673653489e-05, "loss": 0.005, "step": 74600 }, { "epoch": 19.79, "learning_rate": 1.0440435128681349e-05, "loss": 0.005, "step": 74650 }, { "epoch": 19.8, "learning_rate": 1.0413902892013798e-05, "loss": 0.0051, "step": 74700 }, { "epoch": 19.81, "learning_rate": 1.0387370655346247e-05, "loss": 0.0048, "step": 74750 }, { "epoch": 19.83, "learning_rate": 1.0360838418678696e-05, "loss": 0.0052, "step": 74800 }, { "epoch": 19.84, "learning_rate": 1.0334306182011144e-05, "loss": 0.0054, "step": 74850 }, { "epoch": 19.85, "learning_rate": 1.0307773945343593e-05, "loss": 0.0049, "step": 74900 }, { "epoch": 19.86, "learning_rate": 1.028124170867604e-05, "loss": 0.0048, "step": 74950 }, { "epoch": 19.88, "learning_rate": 1.025470947200849e-05, "loss": 0.0049, "step": 75000 }, { "epoch": 19.89, "learning_rate": 1.022817723534094e-05, "loss": 0.0049, "step": 75050 }, { "epoch": 19.9, "learning_rate": 1.020164499867339e-05, "loss": 0.0049, "step": 75100 }, { "epoch": 19.92, "learning_rate": 1.0175112762005838e-05, "loss": 0.0051, "step": 75150 }, { "epoch": 19.93, "learning_rate": 1.0148580525338286e-05, "loss": 0.0051, "step": 75200 }, { "epoch": 19.94, "learning_rate": 1.0122048288670735e-05, "loss": 0.005, "step": 75250 }, { "epoch": 19.96, "learning_rate": 1.0095516052003185e-05, "loss": 0.0048, "step": 75300 }, { "epoch": 19.97, "learning_rate": 1.0068983815335634e-05, "loss": 0.0051, "step": 75350 }, { "epoch": 19.98, "learning_rate": 1.0042451578668083e-05, "loss": 0.0052, "step": 75400 }, { "epoch": 20.0, "learning_rate": 1.001591934200053e-05, "loss": 0.005, "step": 75450 }, { "epoch": 20.01, "learning_rate": 9.98938710533298e-06, "loss": 0.0044, "step": 75500 }, { "epoch": 20.02, "learning_rate": 9.962854868665428e-06, "loss": 0.0045, "step": 75550 }, { "epoch": 20.04, "learning_rate": 9.936322631997879e-06, "loss": 0.0044, "step": 75600 }, { "epoch": 20.05, "learning_rate": 9.909790395330326e-06, "loss": 0.0046, "step": 75650 }, { "epoch": 20.06, "learning_rate": 9.883258158662775e-06, "loss": 0.0046, "step": 75700 }, { "epoch": 20.08, "learning_rate": 9.856725921995224e-06, "loss": 0.0049, "step": 75750 }, { "epoch": 20.09, "learning_rate": 9.830193685327673e-06, "loss": 0.0046, "step": 75800 }, { "epoch": 20.1, "learning_rate": 9.803661448660124e-06, "loss": 0.0045, "step": 75850 }, { "epoch": 20.12, "learning_rate": 9.777129211992571e-06, "loss": 0.0046, "step": 75900 }, { "epoch": 20.13, "learning_rate": 9.75059697532502e-06, "loss": 0.0046, "step": 75950 }, { "epoch": 20.14, "learning_rate": 9.724064738657469e-06, "loss": 0.0045, "step": 76000 }, { "epoch": 20.16, "learning_rate": 9.697532501989918e-06, "loss": 0.0046, "step": 76050 }, { "epoch": 20.17, "learning_rate": 9.671000265322367e-06, "loss": 0.0045, "step": 76100 }, { "epoch": 20.18, "learning_rate": 9.644468028654816e-06, "loss": 0.0045, "step": 76150 }, { "epoch": 20.2, "learning_rate": 9.617935791987265e-06, "loss": 0.0046, "step": 76200 }, { "epoch": 20.21, "learning_rate": 9.591403555319714e-06, "loss": 0.0048, "step": 76250 }, { "epoch": 20.22, "learning_rate": 9.564871318652163e-06, "loss": 0.0045, "step": 76300 }, { "epoch": 20.24, "learning_rate": 9.538339081984612e-06, "loss": 0.0047, "step": 76350 }, { "epoch": 20.25, "learning_rate": 9.51180684531706e-06, "loss": 0.0045, "step": 76400 }, { "epoch": 20.26, "learning_rate": 9.48527460864951e-06, "loss": 0.0046, "step": 76450 }, { "epoch": 20.28, "learning_rate": 9.458742371981959e-06, "loss": 0.0047, "step": 76500 }, { "epoch": 20.29, "learning_rate": 9.432210135314408e-06, "loss": 0.0046, "step": 76550 }, { "epoch": 20.3, "learning_rate": 9.405677898646855e-06, "loss": 0.0047, "step": 76600 }, { "epoch": 20.32, "learning_rate": 9.379145661979306e-06, "loss": 0.0045, "step": 76650 }, { "epoch": 20.33, "learning_rate": 9.352613425311755e-06, "loss": 0.0047, "step": 76700 }, { "epoch": 20.34, "learning_rate": 9.326081188644204e-06, "loss": 0.0046, "step": 76750 }, { "epoch": 20.36, "learning_rate": 9.299548951976653e-06, "loss": 0.0048, "step": 76800 }, { "epoch": 20.37, "learning_rate": 9.2730167153091e-06, "loss": 0.0046, "step": 76850 }, { "epoch": 20.38, "learning_rate": 9.24648447864155e-06, "loss": 0.0046, "step": 76900 }, { "epoch": 20.39, "learning_rate": 9.219952241974e-06, "loss": 0.0046, "step": 76950 }, { "epoch": 20.41, "learning_rate": 9.193420005306448e-06, "loss": 0.0049, "step": 77000 }, { "epoch": 20.42, "learning_rate": 9.166887768638897e-06, "loss": 0.0047, "step": 77050 }, { "epoch": 20.43, "learning_rate": 9.140355531971345e-06, "loss": 0.0047, "step": 77100 }, { "epoch": 20.45, "learning_rate": 9.113823295303794e-06, "loss": 0.0046, "step": 77150 }, { "epoch": 20.46, "learning_rate": 9.087291058636244e-06, "loss": 0.0047, "step": 77200 }, { "epoch": 20.47, "learning_rate": 9.060758821968693e-06, "loss": 0.0048, "step": 77250 }, { "epoch": 20.49, "learning_rate": 9.03422658530114e-06, "loss": 0.0047, "step": 77300 }, { "epoch": 20.5, "learning_rate": 9.00769434863359e-06, "loss": 0.0047, "step": 77350 }, { "epoch": 20.51, "learning_rate": 8.981162111966039e-06, "loss": 0.0049, "step": 77400 }, { "epoch": 20.53, "learning_rate": 8.95462987529849e-06, "loss": 0.0046, "step": 77450 }, { "epoch": 20.54, "learning_rate": 8.928097638630938e-06, "loss": 0.0048, "step": 77500 }, { "epoch": 20.55, "learning_rate": 8.901565401963385e-06, "loss": 0.0046, "step": 77550 }, { "epoch": 20.57, "learning_rate": 8.875033165295834e-06, "loss": 0.0047, "step": 77600 }, { "epoch": 20.58, "learning_rate": 8.848500928628283e-06, "loss": 0.0048, "step": 77650 }, { "epoch": 20.59, "learning_rate": 8.821968691960732e-06, "loss": 0.0048, "step": 77700 }, { "epoch": 20.61, "learning_rate": 8.795436455293183e-06, "loss": 0.0047, "step": 77750 }, { "epoch": 20.62, "learning_rate": 8.76890421862563e-06, "loss": 0.0047, "step": 77800 }, { "epoch": 20.63, "learning_rate": 8.74237198195808e-06, "loss": 0.0047, "step": 77850 }, { "epoch": 20.65, "learning_rate": 8.715839745290528e-06, "loss": 0.0048, "step": 77900 }, { "epoch": 20.66, "learning_rate": 8.689307508622977e-06, "loss": 0.0048, "step": 77950 }, { "epoch": 20.67, "learning_rate": 8.662775271955426e-06, "loss": 0.0047, "step": 78000 }, { "epoch": 20.69, "learning_rate": 8.636243035287875e-06, "loss": 0.0048, "step": 78050 }, { "epoch": 20.7, "learning_rate": 8.609710798620324e-06, "loss": 0.0047, "step": 78100 }, { "epoch": 20.71, "learning_rate": 8.583178561952773e-06, "loss": 0.0049, "step": 78150 }, { "epoch": 20.73, "learning_rate": 8.556646325285222e-06, "loss": 0.0048, "step": 78200 }, { "epoch": 20.74, "learning_rate": 8.530114088617671e-06, "loss": 0.0048, "step": 78250 }, { "epoch": 20.75, "learning_rate": 8.50358185195012e-06, "loss": 0.0048, "step": 78300 }, { "epoch": 20.77, "learning_rate": 8.477049615282569e-06, "loss": 0.0047, "step": 78350 }, { "epoch": 20.78, "learning_rate": 8.450517378615018e-06, "loss": 0.0047, "step": 78400 }, { "epoch": 20.79, "learning_rate": 8.423985141947467e-06, "loss": 0.0049, "step": 78450 }, { "epoch": 20.81, "learning_rate": 8.397452905279916e-06, "loss": 0.005, "step": 78500 }, { "epoch": 20.82, "learning_rate": 8.370920668612365e-06, "loss": 0.0047, "step": 78550 }, { "epoch": 20.83, "learning_rate": 8.344388431944814e-06, "loss": 0.0048, "step": 78600 }, { "epoch": 20.85, "learning_rate": 8.317856195277263e-06, "loss": 0.0049, "step": 78650 }, { "epoch": 20.86, "learning_rate": 8.29132395860971e-06, "loss": 0.0053, "step": 78700 }, { "epoch": 20.87, "learning_rate": 8.264791721942159e-06, "loss": 0.0046, "step": 78750 }, { "epoch": 20.89, "learning_rate": 8.23825948527461e-06, "loss": 0.0048, "step": 78800 }, { "epoch": 20.9, "learning_rate": 8.211727248607059e-06, "loss": 0.0048, "step": 78850 }, { "epoch": 20.91, "learning_rate": 8.185195011939508e-06, "loss": 0.0048, "step": 78900 }, { "epoch": 20.92, "learning_rate": 8.158662775271955e-06, "loss": 0.0047, "step": 78950 }, { "epoch": 20.94, "learning_rate": 8.132130538604404e-06, "loss": 0.005, "step": 79000 }, { "epoch": 20.95, "learning_rate": 8.105598301936854e-06, "loss": 0.0052, "step": 79050 }, { "epoch": 20.96, "learning_rate": 8.079066065269303e-06, "loss": 0.0049, "step": 79100 }, { "epoch": 20.98, "learning_rate": 8.052533828601752e-06, "loss": 0.0049, "step": 79150 }, { "epoch": 20.99, "learning_rate": 8.0260015919342e-06, "loss": 0.0049, "step": 79200 }, { "epoch": 21.0, "learning_rate": 7.999469355266649e-06, "loss": 0.0046, "step": 79250 }, { "epoch": 21.02, "learning_rate": 7.972937118599098e-06, "loss": 0.0044, "step": 79300 }, { "epoch": 21.03, "learning_rate": 7.946404881931548e-06, "loss": 0.0044, "step": 79350 }, { "epoch": 21.04, "learning_rate": 7.919872645263997e-06, "loss": 0.0043, "step": 79400 }, { "epoch": 21.06, "learning_rate": 7.893340408596445e-06, "loss": 0.0044, "step": 79450 }, { "epoch": 21.07, "learning_rate": 7.866808171928893e-06, "loss": 0.0043, "step": 79500 }, { "epoch": 21.08, "learning_rate": 7.840275935261342e-06, "loss": 0.0043, "step": 79550 }, { "epoch": 21.1, "learning_rate": 7.813743698593793e-06, "loss": 0.0044, "step": 79600 }, { "epoch": 21.11, "learning_rate": 7.78721146192624e-06, "loss": 0.0045, "step": 79650 }, { "epoch": 21.12, "learning_rate": 7.76067922525869e-06, "loss": 0.0044, "step": 79700 }, { "epoch": 21.14, "learning_rate": 7.734146988591138e-06, "loss": 0.0046, "step": 79750 }, { "epoch": 21.15, "learning_rate": 7.707614751923587e-06, "loss": 0.0046, "step": 79800 }, { "epoch": 21.16, "learning_rate": 7.681082515256036e-06, "loss": 0.0045, "step": 79850 }, { "epoch": 21.18, "learning_rate": 7.654550278588485e-06, "loss": 0.0046, "step": 79900 }, { "epoch": 21.19, "learning_rate": 7.628018041920934e-06, "loss": 0.0048, "step": 79950 }, { "epoch": 21.2, "learning_rate": 7.601485805253383e-06, "loss": 0.0045, "step": 80000 }, { "epoch": 21.22, "learning_rate": 7.574953568585832e-06, "loss": 0.0045, "step": 80050 }, { "epoch": 21.23, "learning_rate": 7.548421331918282e-06, "loss": 0.0046, "step": 80100 }, { "epoch": 21.24, "learning_rate": 7.521889095250729e-06, "loss": 0.0048, "step": 80150 }, { "epoch": 21.26, "learning_rate": 7.495356858583179e-06, "loss": 0.0046, "step": 80200 }, { "epoch": 21.27, "learning_rate": 7.468824621915628e-06, "loss": 0.0046, "step": 80250 }, { "epoch": 21.28, "learning_rate": 7.442292385248077e-06, "loss": 0.0045, "step": 80300 }, { "epoch": 21.3, "learning_rate": 7.415760148580525e-06, "loss": 0.0045, "step": 80350 }, { "epoch": 21.31, "learning_rate": 7.389227911912974e-06, "loss": 0.0045, "step": 80400 }, { "epoch": 21.32, "learning_rate": 7.362695675245423e-06, "loss": 0.0045, "step": 80450 }, { "epoch": 21.34, "learning_rate": 7.336163438577873e-06, "loss": 0.0046, "step": 80500 }, { "epoch": 21.35, "learning_rate": 7.309631201910322e-06, "loss": 0.0045, "step": 80550 }, { "epoch": 21.36, "learning_rate": 7.28309896524277e-06, "loss": 0.0046, "step": 80600 }, { "epoch": 21.38, "learning_rate": 7.256566728575219e-06, "loss": 0.0046, "step": 80650 }, { "epoch": 21.39, "learning_rate": 7.230034491907668e-06, "loss": 0.0047, "step": 80700 }, { "epoch": 21.4, "learning_rate": 7.203502255240118e-06, "loss": 0.0046, "step": 80750 }, { "epoch": 21.42, "learning_rate": 7.176970018572567e-06, "loss": 0.0046, "step": 80800 }, { "epoch": 21.43, "learning_rate": 7.150437781905015e-06, "loss": 0.0047, "step": 80850 }, { "epoch": 21.44, "learning_rate": 7.123905545237464e-06, "loss": 0.0045, "step": 80900 }, { "epoch": 21.46, "learning_rate": 7.097373308569913e-06, "loss": 0.0046, "step": 80950 }, { "epoch": 21.47, "learning_rate": 7.070841071902362e-06, "loss": 0.0047, "step": 81000 }, { "epoch": 21.48, "learning_rate": 7.04430883523481e-06, "loss": 0.0046, "step": 81050 }, { "epoch": 21.49, "learning_rate": 7.017776598567259e-06, "loss": 0.0046, "step": 81100 }, { "epoch": 21.51, "learning_rate": 6.991244361899709e-06, "loss": 0.0048, "step": 81150 }, { "epoch": 21.52, "learning_rate": 6.964712125232158e-06, "loss": 0.0046, "step": 81200 }, { "epoch": 21.53, "learning_rate": 6.9381798885646066e-06, "loss": 0.0047, "step": 81250 }, { "epoch": 21.55, "learning_rate": 6.911647651897055e-06, "loss": 0.0046, "step": 81300 }, { "epoch": 21.56, "learning_rate": 6.885115415229504e-06, "loss": 0.0047, "step": 81350 }, { "epoch": 21.57, "learning_rate": 6.8585831785619535e-06, "loss": 0.0045, "step": 81400 }, { "epoch": 21.59, "learning_rate": 6.8320509418944024e-06, "loss": 0.0046, "step": 81450 }, { "epoch": 21.6, "learning_rate": 6.805518705226851e-06, "loss": 0.0049, "step": 81500 }, { "epoch": 21.61, "learning_rate": 6.7789864685592995e-06, "loss": 0.0047, "step": 81550 }, { "epoch": 21.63, "learning_rate": 6.7524542318917485e-06, "loss": 0.0047, "step": 81600 }, { "epoch": 21.64, "learning_rate": 6.7259219952241975e-06, "loss": 0.0047, "step": 81650 }, { "epoch": 21.65, "learning_rate": 6.699389758556647e-06, "loss": 0.0047, "step": 81700 }, { "epoch": 21.67, "learning_rate": 6.672857521889096e-06, "loss": 0.0049, "step": 81750 }, { "epoch": 21.68, "learning_rate": 6.646325285221544e-06, "loss": 0.0048, "step": 81800 }, { "epoch": 21.69, "learning_rate": 6.619793048553993e-06, "loss": 0.0047, "step": 81850 }, { "epoch": 21.71, "learning_rate": 6.593260811886442e-06, "loss": 0.0048, "step": 81900 }, { "epoch": 21.72, "learning_rate": 6.566728575218892e-06, "loss": 0.0047, "step": 81950 }, { "epoch": 21.73, "learning_rate": 6.540196338551339e-06, "loss": 0.0046, "step": 82000 }, { "epoch": 21.75, "learning_rate": 6.513664101883789e-06, "loss": 0.0046, "step": 82050 }, { "epoch": 21.76, "learning_rate": 6.487131865216238e-06, "loss": 0.0046, "step": 82100 }, { "epoch": 21.77, "learning_rate": 6.460599628548687e-06, "loss": 0.0046, "step": 82150 }, { "epoch": 21.79, "learning_rate": 6.434067391881136e-06, "loss": 0.0047, "step": 82200 }, { "epoch": 21.8, "learning_rate": 6.407535155213584e-06, "loss": 0.0047, "step": 82250 }, { "epoch": 21.81, "learning_rate": 6.381002918546033e-06, "loss": 0.0046, "step": 82300 }, { "epoch": 21.83, "learning_rate": 6.354470681878483e-06, "loss": 0.0047, "step": 82350 }, { "epoch": 21.84, "learning_rate": 6.327938445210932e-06, "loss": 0.0046, "step": 82400 }, { "epoch": 21.85, "learning_rate": 6.301406208543381e-06, "loss": 0.0047, "step": 82450 }, { "epoch": 21.87, "learning_rate": 6.274873971875829e-06, "loss": 0.0047, "step": 82500 }, { "epoch": 21.88, "learning_rate": 6.248341735208278e-06, "loss": 0.0048, "step": 82550 }, { "epoch": 21.89, "learning_rate": 6.221809498540727e-06, "loss": 0.0047, "step": 82600 }, { "epoch": 21.91, "learning_rate": 6.195277261873176e-06, "loss": 0.0048, "step": 82650 }, { "epoch": 21.92, "learning_rate": 6.168745025205625e-06, "loss": 0.0046, "step": 82700 }, { "epoch": 21.93, "learning_rate": 6.142212788538075e-06, "loss": 0.0047, "step": 82750 }, { "epoch": 21.95, "learning_rate": 6.115680551870523e-06, "loss": 0.0047, "step": 82800 }, { "epoch": 21.96, "learning_rate": 6.089148315202972e-06, "loss": 0.0046, "step": 82850 }, { "epoch": 21.97, "learning_rate": 6.062616078535421e-06, "loss": 0.0048, "step": 82900 }, { "epoch": 21.99, "learning_rate": 6.03608384186787e-06, "loss": 0.0046, "step": 82950 }, { "epoch": 22.0, "learning_rate": 6.009551605200319e-06, "loss": 0.0047, "step": 83000 }, { "epoch": 22.01, "learning_rate": 5.983019368532768e-06, "loss": 0.0042, "step": 83050 }, { "epoch": 22.02, "learning_rate": 5.956487131865217e-06, "loss": 0.0043, "step": 83100 }, { "epoch": 22.04, "learning_rate": 5.929954895197666e-06, "loss": 0.0044, "step": 83150 }, { "epoch": 22.05, "learning_rate": 5.903422658530115e-06, "loss": 0.0042, "step": 83200 }, { "epoch": 22.06, "learning_rate": 5.876890421862563e-06, "loss": 0.0044, "step": 83250 }, { "epoch": 22.08, "learning_rate": 5.8503581851950126e-06, "loss": 0.0042, "step": 83300 }, { "epoch": 22.09, "learning_rate": 5.823825948527461e-06, "loss": 0.0043, "step": 83350 }, { "epoch": 22.1, "learning_rate": 5.79729371185991e-06, "loss": 0.0043, "step": 83400 }, { "epoch": 22.12, "learning_rate": 5.7707614751923595e-06, "loss": 0.0044, "step": 83450 }, { "epoch": 22.13, "learning_rate": 5.744229238524808e-06, "loss": 0.0044, "step": 83500 }, { "epoch": 22.14, "learning_rate": 5.717697001857257e-06, "loss": 0.0043, "step": 83550 }, { "epoch": 22.16, "learning_rate": 5.6911647651897055e-06, "loss": 0.0043, "step": 83600 }, { "epoch": 22.17, "learning_rate": 5.6646325285221545e-06, "loss": 0.0044, "step": 83650 }, { "epoch": 22.18, "learning_rate": 5.6381002918546035e-06, "loss": 0.0044, "step": 83700 }, { "epoch": 22.2, "learning_rate": 5.6115680551870524e-06, "loss": 0.0043, "step": 83750 }, { "epoch": 22.21, "learning_rate": 5.585035818519501e-06, "loss": 0.0044, "step": 83800 }, { "epoch": 22.22, "learning_rate": 5.55850358185195e-06, "loss": 0.0044, "step": 83850 }, { "epoch": 22.24, "learning_rate": 5.531971345184399e-06, "loss": 0.0044, "step": 83900 }, { "epoch": 22.25, "learning_rate": 5.505439108516848e-06, "loss": 0.0044, "step": 83950 }, { "epoch": 22.26, "learning_rate": 5.478906871849297e-06, "loss": 0.0044, "step": 84000 }, { "epoch": 22.28, "learning_rate": 5.452374635181745e-06, "loss": 0.0045, "step": 84050 }, { "epoch": 22.29, "learning_rate": 5.425842398514195e-06, "loss": 0.0044, "step": 84100 }, { "epoch": 22.3, "learning_rate": 5.399310161846644e-06, "loss": 0.0045, "step": 84150 }, { "epoch": 22.32, "learning_rate": 5.372777925179092e-06, "loss": 0.0044, "step": 84200 }, { "epoch": 22.33, "learning_rate": 5.346245688511542e-06, "loss": 0.0045, "step": 84250 }, { "epoch": 22.34, "learning_rate": 5.31971345184399e-06, "loss": 0.0044, "step": 84300 }, { "epoch": 22.36, "learning_rate": 5.29318121517644e-06, "loss": 0.0045, "step": 84350 }, { "epoch": 22.37, "learning_rate": 5.266648978508888e-06, "loss": 0.0045, "step": 84400 }, { "epoch": 22.38, "learning_rate": 5.240116741841337e-06, "loss": 0.0045, "step": 84450 }, { "epoch": 22.4, "learning_rate": 5.213584505173787e-06, "loss": 0.0045, "step": 84500 }, { "epoch": 22.41, "learning_rate": 5.187052268506235e-06, "loss": 0.0045, "step": 84550 }, { "epoch": 22.42, "learning_rate": 5.160520031838684e-06, "loss": 0.0045, "step": 84600 }, { "epoch": 22.44, "learning_rate": 5.133987795171133e-06, "loss": 0.0045, "step": 84650 }, { "epoch": 22.45, "learning_rate": 5.107455558503582e-06, "loss": 0.0046, "step": 84700 }, { "epoch": 22.46, "learning_rate": 5.080923321836031e-06, "loss": 0.0046, "step": 84750 }, { "epoch": 22.48, "learning_rate": 5.05439108516848e-06, "loss": 0.0045, "step": 84800 }, { "epoch": 22.49, "learning_rate": 5.027858848500929e-06, "loss": 0.0045, "step": 84850 }, { "epoch": 22.5, "learning_rate": 5.001326611833378e-06, "loss": 0.0044, "step": 84900 }, { "epoch": 22.52, "learning_rate": 4.974794375165827e-06, "loss": 0.0045, "step": 84950 }, { "epoch": 22.53, "learning_rate": 4.948262138498275e-06, "loss": 0.0046, "step": 85000 }, { "epoch": 22.54, "learning_rate": 4.921729901830725e-06, "loss": 0.0046, "step": 85050 }, { "epoch": 22.55, "learning_rate": 4.895197665163174e-06, "loss": 0.0046, "step": 85100 }, { "epoch": 22.57, "learning_rate": 4.868665428495623e-06, "loss": 0.0045, "step": 85150 }, { "epoch": 22.58, "learning_rate": 4.842133191828072e-06, "loss": 0.0046, "step": 85200 }, { "epoch": 22.59, "learning_rate": 4.81560095516052e-06, "loss": 0.0046, "step": 85250 }, { "epoch": 22.61, "learning_rate": 4.78906871849297e-06, "loss": 0.0045, "step": 85300 }, { "epoch": 22.62, "learning_rate": 4.762536481825418e-06, "loss": 0.0045, "step": 85350 }, { "epoch": 22.63, "learning_rate": 4.736004245157867e-06, "loss": 0.0046, "step": 85400 }, { "epoch": 22.65, "learning_rate": 4.7094720084903165e-06, "loss": 0.0046, "step": 85450 }, { "epoch": 22.66, "learning_rate": 4.682939771822765e-06, "loss": 0.0046, "step": 85500 }, { "epoch": 22.67, "learning_rate": 4.656407535155214e-06, "loss": 0.0045, "step": 85550 }, { "epoch": 22.69, "learning_rate": 4.629875298487663e-06, "loss": 0.0046, "step": 85600 }, { "epoch": 22.7, "learning_rate": 4.6033430618201116e-06, "loss": 0.0046, "step": 85650 }, { "epoch": 22.71, "learning_rate": 4.5768108251525605e-06, "loss": 0.0045, "step": 85700 }, { "epoch": 22.73, "learning_rate": 4.5502785884850095e-06, "loss": 0.0046, "step": 85750 }, { "epoch": 22.74, "learning_rate": 4.5237463518174585e-06, "loss": 0.0047, "step": 85800 }, { "epoch": 22.75, "learning_rate": 4.4972141151499074e-06, "loss": 0.0046, "step": 85850 }, { "epoch": 22.77, "learning_rate": 4.470681878482356e-06, "loss": 0.0047, "step": 85900 }, { "epoch": 22.78, "learning_rate": 4.444149641814805e-06, "loss": 0.0046, "step": 85950 }, { "epoch": 22.79, "learning_rate": 4.417617405147254e-06, "loss": 0.0047, "step": 86000 }, { "epoch": 22.81, "learning_rate": 4.3910851684797025e-06, "loss": 0.0046, "step": 86050 }, { "epoch": 22.82, "learning_rate": 4.364552931812152e-06, "loss": 0.0046, "step": 86100 }, { "epoch": 22.83, "learning_rate": 4.338020695144601e-06, "loss": 0.0047, "step": 86150 }, { "epoch": 22.85, "learning_rate": 4.311488458477049e-06, "loss": 0.0047, "step": 86200 }, { "epoch": 22.86, "learning_rate": 4.284956221809499e-06, "loss": 0.0046, "step": 86250 }, { "epoch": 22.87, "learning_rate": 4.258423985141947e-06, "loss": 0.0046, "step": 86300 }, { "epoch": 22.89, "learning_rate": 4.231891748474396e-06, "loss": 0.0046, "step": 86350 }, { "epoch": 22.9, "learning_rate": 4.205359511806845e-06, "loss": 0.0046, "step": 86400 }, { "epoch": 22.91, "learning_rate": 4.178827275139294e-06, "loss": 0.0047, "step": 86450 }, { "epoch": 22.93, "learning_rate": 4.152295038471744e-06, "loss": 0.0046, "step": 86500 }, { "epoch": 22.94, "learning_rate": 4.125762801804192e-06, "loss": 0.0046, "step": 86550 }, { "epoch": 22.95, "learning_rate": 4.099230565136641e-06, "loss": 0.0047, "step": 86600 }, { "epoch": 22.97, "learning_rate": 4.07269832846909e-06, "loss": 0.0046, "step": 86650 }, { "epoch": 22.98, "learning_rate": 4.046166091801539e-06, "loss": 0.0046, "step": 86700 }, { "epoch": 22.99, "learning_rate": 4.019633855133988e-06, "loss": 0.0046, "step": 86750 }, { "epoch": 23.01, "learning_rate": 3.993101618466437e-06, "loss": 0.0044, "step": 86800 }, { "epoch": 23.02, "learning_rate": 3.966569381798886e-06, "loss": 0.0042, "step": 86850 }, { "epoch": 23.03, "learning_rate": 3.940037145131335e-06, "loss": 0.0042, "step": 86900 }, { "epoch": 23.05, "learning_rate": 3.913504908463784e-06, "loss": 0.0043, "step": 86950 }, { "epoch": 23.06, "learning_rate": 3.886972671796232e-06, "loss": 0.0043, "step": 87000 }, { "epoch": 23.07, "learning_rate": 3.860440435128682e-06, "loss": 0.0043, "step": 87050 }, { "epoch": 23.09, "learning_rate": 3.83390819846113e-06, "loss": 0.0042, "step": 87100 }, { "epoch": 23.1, "learning_rate": 3.8073759617935793e-06, "loss": 0.0043, "step": 87150 }, { "epoch": 23.11, "learning_rate": 3.7808437251260287e-06, "loss": 0.0043, "step": 87200 }, { "epoch": 23.12, "learning_rate": 3.7543114884584773e-06, "loss": 0.0043, "step": 87250 }, { "epoch": 23.14, "learning_rate": 3.7277792517909263e-06, "loss": 0.0042, "step": 87300 }, { "epoch": 23.15, "learning_rate": 3.701247015123375e-06, "loss": 0.0043, "step": 87350 }, { "epoch": 23.16, "learning_rate": 3.674714778455824e-06, "loss": 0.0043, "step": 87400 }, { "epoch": 23.18, "learning_rate": 3.648182541788273e-06, "loss": 0.0043, "step": 87450 }, { "epoch": 23.19, "learning_rate": 3.6216503051207217e-06, "loss": 0.0044, "step": 87500 }, { "epoch": 23.2, "learning_rate": 3.595118068453171e-06, "loss": 0.0043, "step": 87550 }, { "epoch": 23.22, "learning_rate": 3.5685858317856196e-06, "loss": 0.0043, "step": 87600 }, { "epoch": 23.23, "learning_rate": 3.5420535951180686e-06, "loss": 0.0042, "step": 87650 }, { "epoch": 23.24, "learning_rate": 3.515521358450517e-06, "loss": 0.0044, "step": 87700 }, { "epoch": 23.26, "learning_rate": 3.4889891217829665e-06, "loss": 0.0044, "step": 87750 }, { "epoch": 23.27, "learning_rate": 3.4624568851154155e-06, "loss": 0.0043, "step": 87800 }, { "epoch": 23.28, "learning_rate": 3.435924648447864e-06, "loss": 0.0044, "step": 87850 }, { "epoch": 23.3, "learning_rate": 3.4093924117803135e-06, "loss": 0.0043, "step": 87900 }, { "epoch": 23.31, "learning_rate": 3.382860175112762e-06, "loss": 0.0044, "step": 87950 }, { "epoch": 23.32, "learning_rate": 3.3563279384452114e-06, "loss": 0.0044, "step": 88000 }, { "epoch": 23.34, "learning_rate": 3.32979570177766e-06, "loss": 0.0044, "step": 88050 }, { "epoch": 23.35, "learning_rate": 3.303263465110109e-06, "loss": 0.0043, "step": 88100 }, { "epoch": 23.36, "learning_rate": 3.2767312284425583e-06, "loss": 0.0043, "step": 88150 }, { "epoch": 23.38, "learning_rate": 3.250198991775007e-06, "loss": 0.0043, "step": 88200 }, { "epoch": 23.39, "learning_rate": 3.223666755107456e-06, "loss": 0.0044, "step": 88250 }, { "epoch": 23.4, "learning_rate": 3.1971345184399044e-06, "loss": 0.0045, "step": 88300 }, { "epoch": 23.42, "learning_rate": 3.1706022817723537e-06, "loss": 0.0044, "step": 88350 }, { "epoch": 23.43, "learning_rate": 3.1440700451048023e-06, "loss": 0.0044, "step": 88400 }, { "epoch": 23.44, "learning_rate": 3.1175378084372513e-06, "loss": 0.0045, "step": 88450 }, { "epoch": 23.46, "learning_rate": 3.0910055717697002e-06, "loss": 0.0044, "step": 88500 }, { "epoch": 23.47, "learning_rate": 3.064473335102149e-06, "loss": 0.0044, "step": 88550 }, { "epoch": 23.48, "learning_rate": 3.037941098434598e-06, "loss": 0.0044, "step": 88600 }, { "epoch": 23.5, "learning_rate": 3.011408861767047e-06, "loss": 0.0044, "step": 88650 }, { "epoch": 23.51, "learning_rate": 2.984876625099496e-06, "loss": 0.0044, "step": 88700 }, { "epoch": 23.52, "learning_rate": 2.958344388431945e-06, "loss": 0.0045, "step": 88750 }, { "epoch": 23.54, "learning_rate": 2.931812151764394e-06, "loss": 0.0045, "step": 88800 }, { "epoch": 23.55, "learning_rate": 2.9052799150968426e-06, "loss": 0.0044, "step": 88850 }, { "epoch": 23.56, "learning_rate": 2.8787476784292916e-06, "loss": 0.0044, "step": 88900 }, { "epoch": 23.58, "learning_rate": 2.8522154417617405e-06, "loss": 0.0045, "step": 88950 }, { "epoch": 23.59, "learning_rate": 2.8256832050941895e-06, "loss": 0.0045, "step": 89000 }, { "epoch": 23.6, "learning_rate": 2.7991509684266385e-06, "loss": 0.0044, "step": 89050 }, { "epoch": 23.62, "learning_rate": 2.7726187317590874e-06, "loss": 0.0045, "step": 89100 }, { "epoch": 23.63, "learning_rate": 2.7460864950915364e-06, "loss": 0.0044, "step": 89150 }, { "epoch": 23.64, "learning_rate": 2.7195542584239854e-06, "loss": 0.0044, "step": 89200 }, { "epoch": 23.65, "learning_rate": 2.693022021756434e-06, "loss": 0.0044, "step": 89250 }, { "epoch": 23.67, "learning_rate": 2.6664897850888833e-06, "loss": 0.0045, "step": 89300 }, { "epoch": 23.68, "learning_rate": 2.6399575484213323e-06, "loss": 0.0044, "step": 89350 }, { "epoch": 23.69, "learning_rate": 2.613425311753781e-06, "loss": 0.0045, "step": 89400 }, { "epoch": 23.71, "learning_rate": 2.58689307508623e-06, "loss": 0.0045, "step": 89450 }, { "epoch": 23.72, "learning_rate": 2.5603608384186788e-06, "loss": 0.0044, "step": 89500 }, { "epoch": 23.73, "learning_rate": 2.5338286017511277e-06, "loss": 0.0045, "step": 89550 }, { "epoch": 23.75, "learning_rate": 2.5072963650835767e-06, "loss": 0.0044, "step": 89600 }, { "epoch": 23.76, "learning_rate": 2.4807641284160257e-06, "loss": 0.0044, "step": 89650 }, { "epoch": 23.77, "learning_rate": 2.4542318917484746e-06, "loss": 0.0045, "step": 89700 }, { "epoch": 23.79, "learning_rate": 2.4276996550809236e-06, "loss": 0.0044, "step": 89750 }, { "epoch": 23.8, "learning_rate": 2.401167418413372e-06, "loss": 0.0045, "step": 89800 }, { "epoch": 23.81, "learning_rate": 2.374635181745821e-06, "loss": 0.0045, "step": 89850 }, { "epoch": 23.83, "learning_rate": 2.34810294507827e-06, "loss": 0.0045, "step": 89900 }, { "epoch": 23.84, "learning_rate": 2.321570708410719e-06, "loss": 0.0045, "step": 89950 }, { "epoch": 23.85, "learning_rate": 2.295038471743168e-06, "loss": 0.0044, "step": 90000 }, { "epoch": 23.87, "learning_rate": 2.268506235075617e-06, "loss": 0.0045, "step": 90050 }, { "epoch": 23.88, "learning_rate": 2.241973998408066e-06, "loss": 0.0045, "step": 90100 }, { "epoch": 23.89, "learning_rate": 2.215441761740515e-06, "loss": 0.0045, "step": 90150 }, { "epoch": 23.91, "learning_rate": 2.1889095250729635e-06, "loss": 0.0046, "step": 90200 }, { "epoch": 23.92, "learning_rate": 2.1623772884054124e-06, "loss": 0.0045, "step": 90250 }, { "epoch": 23.93, "learning_rate": 2.135845051737862e-06, "loss": 0.0044, "step": 90300 }, { "epoch": 23.95, "learning_rate": 2.109312815070311e-06, "loss": 0.0044, "step": 90350 }, { "epoch": 23.96, "learning_rate": 2.0827805784027593e-06, "loss": 0.0045, "step": 90400 }, { "epoch": 23.97, "learning_rate": 2.0562483417352083e-06, "loss": 0.0045, "step": 90450 }, { "epoch": 23.99, "learning_rate": 2.0297161050676573e-06, "loss": 0.0044, "step": 90500 }, { "epoch": 24.0, "learning_rate": 2.0031838684001063e-06, "loss": 0.0045, "step": 90550 }, { "epoch": 24.01, "learning_rate": 1.976651631732555e-06, "loss": 0.0041, "step": 90600 }, { "epoch": 24.03, "learning_rate": 1.950119395065004e-06, "loss": 0.0042, "step": 90650 }, { "epoch": 24.04, "learning_rate": 1.923587158397453e-06, "loss": 0.0042, "step": 90700 }, { "epoch": 24.05, "learning_rate": 1.897054921729902e-06, "loss": 0.0042, "step": 90750 }, { "epoch": 24.07, "learning_rate": 1.8705226850623509e-06, "loss": 0.0042, "step": 90800 }, { "epoch": 24.08, "learning_rate": 1.8439904483947996e-06, "loss": 0.0042, "step": 90850 }, { "epoch": 24.09, "learning_rate": 1.8174582117272486e-06, "loss": 0.0041, "step": 90900 }, { "epoch": 24.11, "learning_rate": 1.7909259750596974e-06, "loss": 0.0042, "step": 90950 }, { "epoch": 24.12, "learning_rate": 1.7643937383921468e-06, "loss": 0.0042, "step": 91000 }, { "epoch": 24.13, "learning_rate": 1.7378615017245955e-06, "loss": 0.0042, "step": 91050 }, { "epoch": 24.15, "learning_rate": 1.7113292650570445e-06, "loss": 0.0041, "step": 91100 }, { "epoch": 24.16, "learning_rate": 1.6847970283894932e-06, "loss": 0.0042, "step": 91150 }, { "epoch": 24.17, "learning_rate": 1.6582647917219422e-06, "loss": 0.0042, "step": 91200 }, { "epoch": 24.18, "learning_rate": 1.631732555054391e-06, "loss": 0.0042, "step": 91250 }, { "epoch": 24.2, "learning_rate": 1.60520031838684e-06, "loss": 0.0042, "step": 91300 }, { "epoch": 24.21, "learning_rate": 1.5786680817192891e-06, "loss": 0.0042, "step": 91350 }, { "epoch": 24.22, "learning_rate": 1.552135845051738e-06, "loss": 0.0042, "step": 91400 }, { "epoch": 24.24, "learning_rate": 1.5256036083841868e-06, "loss": 0.0042, "step": 91450 }, { "epoch": 24.25, "learning_rate": 1.4990713717166358e-06, "loss": 0.0042, "step": 91500 }, { "epoch": 24.26, "learning_rate": 1.4725391350490846e-06, "loss": 0.0041, "step": 91550 }, { "epoch": 24.28, "learning_rate": 1.4460068983815337e-06, "loss": 0.0042, "step": 91600 }, { "epoch": 24.29, "learning_rate": 1.4194746617139825e-06, "loss": 0.0042, "step": 91650 }, { "epoch": 24.3, "learning_rate": 1.3929424250464315e-06, "loss": 0.0042, "step": 91700 }, { "epoch": 24.32, "learning_rate": 1.3664101883788804e-06, "loss": 0.0043, "step": 91750 }, { "epoch": 24.33, "learning_rate": 1.3398779517113294e-06, "loss": 0.0042, "step": 91800 }, { "epoch": 24.34, "learning_rate": 1.3133457150437782e-06, "loss": 0.0042, "step": 91850 }, { "epoch": 24.36, "learning_rate": 1.2868134783762273e-06, "loss": 0.0042, "step": 91900 }, { "epoch": 24.37, "learning_rate": 1.260281241708676e-06, "loss": 0.0042, "step": 91950 }, { "epoch": 24.38, "learning_rate": 1.233749005041125e-06, "loss": 0.0042, "step": 92000 }, { "epoch": 24.4, "learning_rate": 1.2072167683735738e-06, "loss": 0.0042, "step": 92050 }, { "epoch": 24.41, "learning_rate": 1.180684531706023e-06, "loss": 0.0042, "step": 92100 }, { "epoch": 24.42, "learning_rate": 1.1541522950384718e-06, "loss": 0.0042, "step": 92150 }, { "epoch": 24.44, "learning_rate": 1.1276200583709207e-06, "loss": 0.0042, "step": 92200 }, { "epoch": 24.45, "learning_rate": 1.1010878217033697e-06, "loss": 0.0042, "step": 92250 }, { "epoch": 24.46, "learning_rate": 1.0745555850358187e-06, "loss": 0.0042, "step": 92300 }, { "epoch": 24.48, "learning_rate": 1.0480233483682674e-06, "loss": 0.0042, "step": 92350 }, { "epoch": 24.49, "learning_rate": 1.0214911117007164e-06, "loss": 0.0042, "step": 92400 }, { "epoch": 24.5, "learning_rate": 9.949588750331654e-07, "loss": 0.0043, "step": 92450 }, { "epoch": 24.52, "learning_rate": 9.684266383656143e-07, "loss": 0.0043, "step": 92500 }, { "epoch": 24.53, "learning_rate": 9.418944016980631e-07, "loss": 0.0042, "step": 92550 }, { "epoch": 24.54, "learning_rate": 9.153621650305122e-07, "loss": 0.0042, "step": 92600 }, { "epoch": 24.56, "learning_rate": 8.88829928362961e-07, "loss": 0.0042, "step": 92650 }, { "epoch": 24.57, "learning_rate": 8.622976916954099e-07, "loss": 0.0042, "step": 92700 }, { "epoch": 24.58, "learning_rate": 8.35765455027859e-07, "loss": 0.0042, "step": 92750 }, { "epoch": 24.6, "learning_rate": 8.092332183603078e-07, "loss": 0.0042, "step": 92800 }, { "epoch": 24.61, "learning_rate": 7.827009816927567e-07, "loss": 0.0042, "step": 92850 }, { "epoch": 24.62, "learning_rate": 7.561687450252057e-07, "loss": 0.0042, "step": 92900 }, { "epoch": 24.64, "learning_rate": 7.296365083576546e-07, "loss": 0.0043, "step": 92950 }, { "epoch": 24.65, "learning_rate": 7.031042716901035e-07, "loss": 0.0043, "step": 93000 }, { "epoch": 24.66, "learning_rate": 6.765720350225525e-07, "loss": 0.0043, "step": 93050 }, { "epoch": 24.68, "learning_rate": 6.500397983550013e-07, "loss": 0.0043, "step": 93100 }, { "epoch": 24.69, "learning_rate": 6.235075616874503e-07, "loss": 0.0043, "step": 93150 }, { "epoch": 24.7, "learning_rate": 5.969753250198993e-07, "loss": 0.0042, "step": 93200 }, { "epoch": 24.72, "learning_rate": 5.704430883523481e-07, "loss": 0.0042, "step": 93250 }, { "epoch": 24.73, "learning_rate": 5.439108516847971e-07, "loss": 0.0043, "step": 93300 }, { "epoch": 24.74, "learning_rate": 5.17378615017246e-07, "loss": 0.0042, "step": 93350 }, { "epoch": 24.75, "learning_rate": 4.908463783496949e-07, "loss": 0.0043, "step": 93400 }, { "epoch": 24.77, "learning_rate": 4.6431414168214384e-07, "loss": 0.0043, "step": 93450 }, { "epoch": 24.78, "learning_rate": 4.3778190501459276e-07, "loss": 0.0042, "step": 93500 }, { "epoch": 24.79, "learning_rate": 4.112496683470417e-07, "loss": 0.0043, "step": 93550 }, { "epoch": 24.81, "learning_rate": 3.847174316794906e-07, "loss": 0.0043, "step": 93600 }, { "epoch": 24.82, "learning_rate": 3.581851950119395e-07, "loss": 0.0042, "step": 93650 }, { "epoch": 24.83, "learning_rate": 3.316529583443884e-07, "loss": 0.0043, "step": 93700 }, { "epoch": 24.85, "learning_rate": 3.0512072167683734e-07, "loss": 0.0042, "step": 93750 }, { "epoch": 24.86, "learning_rate": 2.7858848500928625e-07, "loss": 0.0042, "step": 93800 }, { "epoch": 24.87, "learning_rate": 2.520562483417352e-07, "loss": 0.0042, "step": 93850 }, { "epoch": 24.89, "learning_rate": 2.2552401167418414e-07, "loss": 0.0042, "step": 93900 }, { "epoch": 24.9, "learning_rate": 1.9899177500663305e-07, "loss": 0.0042, "step": 93950 }, { "epoch": 24.91, "learning_rate": 1.72459538339082e-07, "loss": 0.0042, "step": 94000 }, { "epoch": 24.93, "learning_rate": 1.459273016715309e-07, "loss": 0.0042, "step": 94050 }, { "epoch": 24.94, "learning_rate": 1.1939506500397983e-07, "loss": 0.0042, "step": 94100 }, { "epoch": 24.95, "learning_rate": 9.286282833642876e-08, "loss": 0.0043, "step": 94150 }, { "epoch": 24.97, "learning_rate": 6.633059166887768e-08, "loss": 0.0042, "step": 94200 }, { "epoch": 24.98, "learning_rate": 3.979835500132661e-08, "loss": 0.0042, "step": 94250 }, { "epoch": 24.99, "learning_rate": 1.3266118333775537e-08, "loss": 0.0043, "step": 94300 } ], "max_steps": 94325, "num_train_epochs": 25, "total_flos": 1.055269699190784e+18, "trial_name": null, "trial_params": null }