{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 16900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 4.985207100591716e-05, "loss": 2.2222, "step": 50 }, { "epoch": 0.12, "learning_rate": 4.970414201183432e-05, "loss": 2.043, "step": 100 }, { "epoch": 0.18, "learning_rate": 4.955621301775148e-05, "loss": 1.9649, "step": 150 }, { "epoch": 0.24, "learning_rate": 4.9408284023668644e-05, "loss": 1.9144, "step": 200 }, { "epoch": 0.3, "learning_rate": 4.92603550295858e-05, "loss": 1.8759, "step": 250 }, { "epoch": 0.36, "learning_rate": 4.9112426035502965e-05, "loss": 1.8501, "step": 300 }, { "epoch": 0.41, "learning_rate": 4.896449704142012e-05, "loss": 1.8282, "step": 350 }, { "epoch": 0.47, "learning_rate": 4.881656804733728e-05, "loss": 1.8055, "step": 400 }, { "epoch": 0.53, "learning_rate": 4.866863905325444e-05, "loss": 1.7912, "step": 450 }, { "epoch": 0.59, "learning_rate": 4.85207100591716e-05, "loss": 1.7801, "step": 500 }, { "epoch": 0.65, "learning_rate": 4.8372781065088756e-05, "loss": 1.7691, "step": 550 }, { "epoch": 0.71, "learning_rate": 4.822485207100592e-05, "loss": 1.753, "step": 600 }, { "epoch": 0.77, "learning_rate": 4.8076923076923084e-05, "loss": 1.7455, "step": 650 }, { "epoch": 0.83, "learning_rate": 4.792899408284024e-05, "loss": 1.7326, "step": 700 }, { "epoch": 0.89, "learning_rate": 4.77810650887574e-05, "loss": 1.7271, "step": 750 }, { "epoch": 0.95, "learning_rate": 4.7633136094674555e-05, "loss": 1.7172, "step": 800 }, { "epoch": 1.01, "learning_rate": 4.748520710059172e-05, "loss": 1.7097, "step": 850 }, { "epoch": 1.07, "learning_rate": 4.7337278106508875e-05, "loss": 1.698, "step": 900 }, { "epoch": 1.12, "learning_rate": 4.718934911242604e-05, "loss": 1.6881, "step": 950 }, { "epoch": 1.18, "learning_rate": 4.7041420118343196e-05, "loss": 1.6812, "step": 1000 }, { "epoch": 1.24, "learning_rate": 4.689349112426036e-05, "loss": 1.6711, "step": 1050 }, { "epoch": 1.3, "learning_rate": 4.674556213017752e-05, "loss": 1.6749, "step": 1100 }, { "epoch": 1.36, "learning_rate": 4.6597633136094674e-05, "loss": 1.6684, "step": 1150 }, { "epoch": 1.42, "learning_rate": 4.644970414201184e-05, "loss": 1.6656, "step": 1200 }, { "epoch": 1.48, "learning_rate": 4.6301775147928994e-05, "loss": 1.6604, "step": 1250 }, { "epoch": 1.54, "learning_rate": 4.615384615384616e-05, "loss": 1.6578, "step": 1300 }, { "epoch": 1.6, "learning_rate": 4.6005917159763315e-05, "loss": 1.6527, "step": 1350 }, { "epoch": 1.66, "learning_rate": 4.585798816568048e-05, "loss": 1.6502, "step": 1400 }, { "epoch": 1.72, "learning_rate": 4.5710059171597636e-05, "loss": 1.6485, "step": 1450 }, { "epoch": 1.78, "learning_rate": 4.556213017751479e-05, "loss": 1.6418, "step": 1500 }, { "epoch": 1.83, "learning_rate": 4.5414201183431957e-05, "loss": 1.6397, "step": 1550 }, { "epoch": 1.89, "learning_rate": 4.5266272189349114e-05, "loss": 1.6301, "step": 1600 }, { "epoch": 1.95, "learning_rate": 4.511834319526627e-05, "loss": 1.6273, "step": 1650 }, { "epoch": 2.01, "learning_rate": 4.4970414201183434e-05, "loss": 1.6227, "step": 1700 }, { "epoch": 2.07, "learning_rate": 4.48224852071006e-05, "loss": 1.6148, "step": 1750 }, { "epoch": 2.13, "learning_rate": 4.4674556213017755e-05, "loss": 1.6132, "step": 1800 }, { "epoch": 2.19, "learning_rate": 4.452662721893491e-05, "loss": 1.6094, "step": 1850 }, { "epoch": 2.25, "learning_rate": 4.437869822485207e-05, "loss": 1.605, "step": 1900 }, { "epoch": 2.31, "learning_rate": 4.423076923076923e-05, "loss": 1.6058, "step": 1950 }, { "epoch": 2.37, "learning_rate": 4.408284023668639e-05, "loss": 1.6, "step": 2000 }, { "epoch": 2.43, "learning_rate": 4.393491124260355e-05, "loss": 1.597, "step": 2050 }, { "epoch": 2.49, "learning_rate": 4.378698224852072e-05, "loss": 1.5985, "step": 2100 }, { "epoch": 2.54, "learning_rate": 4.3639053254437874e-05, "loss": 1.5941, "step": 2150 }, { "epoch": 2.6, "learning_rate": 4.349112426035503e-05, "loss": 1.5953, "step": 2200 }, { "epoch": 2.66, "learning_rate": 4.334319526627219e-05, "loss": 1.5922, "step": 2250 }, { "epoch": 2.72, "learning_rate": 4.319526627218935e-05, "loss": 1.5908, "step": 2300 }, { "epoch": 2.78, "learning_rate": 4.304733727810651e-05, "loss": 1.5827, "step": 2350 }, { "epoch": 2.84, "learning_rate": 4.289940828402367e-05, "loss": 1.585, "step": 2400 }, { "epoch": 2.9, "learning_rate": 4.275147928994083e-05, "loss": 1.5853, "step": 2450 }, { "epoch": 2.96, "learning_rate": 4.260355029585799e-05, "loss": 1.5837, "step": 2500 }, { "epoch": 3.02, "learning_rate": 4.245562130177515e-05, "loss": 1.5704, "step": 2550 }, { "epoch": 3.08, "learning_rate": 4.230769230769231e-05, "loss": 1.5664, "step": 2600 }, { "epoch": 3.14, "learning_rate": 4.215976331360947e-05, "loss": 1.5649, "step": 2650 }, { "epoch": 3.2, "learning_rate": 4.201183431952663e-05, "loss": 1.5639, "step": 2700 }, { "epoch": 3.25, "learning_rate": 4.1863905325443785e-05, "loss": 1.5623, "step": 2750 }, { "epoch": 3.31, "learning_rate": 4.171597633136095e-05, "loss": 1.5616, "step": 2800 }, { "epoch": 3.37, "learning_rate": 4.156804733727811e-05, "loss": 1.5603, "step": 2850 }, { "epoch": 3.43, "learning_rate": 4.142011834319527e-05, "loss": 1.56, "step": 2900 }, { "epoch": 3.49, "learning_rate": 4.1272189349112426e-05, "loss": 1.5552, "step": 2950 }, { "epoch": 3.55, "learning_rate": 4.112426035502959e-05, "loss": 1.5563, "step": 3000 }, { "epoch": 3.61, "learning_rate": 4.097633136094675e-05, "loss": 1.5531, "step": 3050 }, { "epoch": 3.67, "learning_rate": 4.0828402366863904e-05, "loss": 1.5525, "step": 3100 }, { "epoch": 3.73, "learning_rate": 4.068047337278107e-05, "loss": 1.5487, "step": 3150 }, { "epoch": 3.79, "learning_rate": 4.053254437869823e-05, "loss": 1.557, "step": 3200 }, { "epoch": 3.85, "learning_rate": 4.038461538461539e-05, "loss": 1.5508, "step": 3250 }, { "epoch": 3.91, "learning_rate": 4.0236686390532545e-05, "loss": 1.5467, "step": 3300 }, { "epoch": 3.96, "learning_rate": 4.00887573964497e-05, "loss": 1.546, "step": 3350 }, { "epoch": 4.02, "learning_rate": 3.9940828402366866e-05, "loss": 1.5368, "step": 3400 }, { "epoch": 4.08, "learning_rate": 3.979289940828402e-05, "loss": 1.5326, "step": 3450 }, { "epoch": 4.14, "learning_rate": 3.964497041420119e-05, "loss": 1.5352, "step": 3500 }, { "epoch": 4.2, "learning_rate": 3.9497041420118344e-05, "loss": 1.5311, "step": 3550 }, { "epoch": 4.26, "learning_rate": 3.934911242603551e-05, "loss": 1.5299, "step": 3600 }, { "epoch": 4.32, "learning_rate": 3.9201183431952664e-05, "loss": 1.5301, "step": 3650 }, { "epoch": 4.38, "learning_rate": 3.905325443786982e-05, "loss": 1.5334, "step": 3700 }, { "epoch": 4.44, "learning_rate": 3.8905325443786985e-05, "loss": 1.5256, "step": 3750 }, { "epoch": 4.5, "learning_rate": 3.875739644970414e-05, "loss": 1.5246, "step": 3800 }, { "epoch": 4.56, "learning_rate": 3.86094674556213e-05, "loss": 1.5305, "step": 3850 }, { "epoch": 4.62, "learning_rate": 3.846153846153846e-05, "loss": 1.5234, "step": 3900 }, { "epoch": 4.67, "learning_rate": 3.8313609467455627e-05, "loss": 1.5262, "step": 3950 }, { "epoch": 4.73, "learning_rate": 3.8165680473372784e-05, "loss": 1.5267, "step": 4000 }, { "epoch": 4.79, "learning_rate": 3.801775147928994e-05, "loss": 1.5209, "step": 4050 }, { "epoch": 4.85, "learning_rate": 3.7869822485207104e-05, "loss": 1.5196, "step": 4100 }, { "epoch": 4.91, "learning_rate": 3.772189349112426e-05, "loss": 1.5222, "step": 4150 }, { "epoch": 4.97, "learning_rate": 3.757396449704142e-05, "loss": 1.5196, "step": 4200 }, { "epoch": 5.03, "learning_rate": 3.742603550295858e-05, "loss": 1.5127, "step": 4250 }, { "epoch": 5.09, "learning_rate": 3.7278106508875746e-05, "loss": 1.5052, "step": 4300 }, { "epoch": 5.15, "learning_rate": 3.71301775147929e-05, "loss": 1.5066, "step": 4350 }, { "epoch": 5.21, "learning_rate": 3.698224852071006e-05, "loss": 1.5059, "step": 4400 }, { "epoch": 5.27, "learning_rate": 3.6834319526627223e-05, "loss": 1.5123, "step": 4450 }, { "epoch": 5.33, "learning_rate": 3.668639053254438e-05, "loss": 1.5058, "step": 4500 }, { "epoch": 5.38, "learning_rate": 3.653846153846154e-05, "loss": 1.5025, "step": 4550 }, { "epoch": 5.44, "learning_rate": 3.63905325443787e-05, "loss": 1.5006, "step": 4600 }, { "epoch": 5.5, "learning_rate": 3.6242603550295865e-05, "loss": 1.5022, "step": 4650 }, { "epoch": 5.56, "learning_rate": 3.609467455621302e-05, "loss": 1.5061, "step": 4700 }, { "epoch": 5.62, "learning_rate": 3.594674556213018e-05, "loss": 1.5057, "step": 4750 }, { "epoch": 5.68, "learning_rate": 3.5798816568047336e-05, "loss": 1.5022, "step": 4800 }, { "epoch": 5.74, "learning_rate": 3.56508875739645e-05, "loss": 1.5022, "step": 4850 }, { "epoch": 5.8, "learning_rate": 3.5502958579881656e-05, "loss": 1.5007, "step": 4900 }, { "epoch": 5.86, "learning_rate": 3.5355029585798813e-05, "loss": 1.4965, "step": 4950 }, { "epoch": 5.92, "learning_rate": 3.520710059171598e-05, "loss": 1.5002, "step": 5000 }, { "epoch": 5.98, "learning_rate": 3.505917159763314e-05, "loss": 1.4991, "step": 5050 }, { "epoch": 6.04, "learning_rate": 3.49112426035503e-05, "loss": 1.4943, "step": 5100 }, { "epoch": 6.09, "learning_rate": 3.4763313609467455e-05, "loss": 1.486, "step": 5150 }, { "epoch": 6.15, "learning_rate": 3.461538461538462e-05, "loss": 1.486, "step": 5200 }, { "epoch": 6.21, "learning_rate": 3.4467455621301776e-05, "loss": 1.4871, "step": 5250 }, { "epoch": 6.27, "learning_rate": 3.431952662721893e-05, "loss": 1.4844, "step": 5300 }, { "epoch": 6.33, "learning_rate": 3.4171597633136096e-05, "loss": 1.4845, "step": 5350 }, { "epoch": 6.39, "learning_rate": 3.402366863905326e-05, "loss": 1.4859, "step": 5400 }, { "epoch": 6.45, "learning_rate": 3.387573964497042e-05, "loss": 1.4872, "step": 5450 }, { "epoch": 6.51, "learning_rate": 3.3727810650887574e-05, "loss": 1.4816, "step": 5500 }, { "epoch": 6.57, "learning_rate": 3.357988165680474e-05, "loss": 1.4868, "step": 5550 }, { "epoch": 6.63, "learning_rate": 3.3431952662721895e-05, "loss": 1.4816, "step": 5600 }, { "epoch": 6.69, "learning_rate": 3.328402366863905e-05, "loss": 1.4838, "step": 5650 }, { "epoch": 6.75, "learning_rate": 3.3136094674556215e-05, "loss": 1.4808, "step": 5700 }, { "epoch": 6.8, "learning_rate": 3.298816568047338e-05, "loss": 1.4821, "step": 5750 }, { "epoch": 6.86, "learning_rate": 3.2840236686390536e-05, "loss": 1.4828, "step": 5800 }, { "epoch": 6.92, "learning_rate": 3.269230769230769e-05, "loss": 1.4768, "step": 5850 }, { "epoch": 6.98, "learning_rate": 3.254437869822485e-05, "loss": 1.4822, "step": 5900 }, { "epoch": 7.04, "learning_rate": 3.2396449704142014e-05, "loss": 1.4681, "step": 5950 }, { "epoch": 7.1, "learning_rate": 3.224852071005917e-05, "loss": 1.4655, "step": 6000 }, { "epoch": 7.16, "learning_rate": 3.210059171597633e-05, "loss": 1.4679, "step": 6050 }, { "epoch": 7.22, "learning_rate": 3.195266272189349e-05, "loss": 1.4687, "step": 6100 }, { "epoch": 7.28, "learning_rate": 3.1804733727810655e-05, "loss": 1.4695, "step": 6150 }, { "epoch": 7.34, "learning_rate": 3.165680473372781e-05, "loss": 1.4684, "step": 6200 }, { "epoch": 7.4, "learning_rate": 3.150887573964497e-05, "loss": 1.4689, "step": 6250 }, { "epoch": 7.46, "learning_rate": 3.136094674556213e-05, "loss": 1.4684, "step": 6300 }, { "epoch": 7.51, "learning_rate": 3.121301775147929e-05, "loss": 1.4662, "step": 6350 }, { "epoch": 7.57, "learning_rate": 3.106508875739645e-05, "loss": 1.4654, "step": 6400 }, { "epoch": 7.63, "learning_rate": 3.091715976331361e-05, "loss": 1.4662, "step": 6450 }, { "epoch": 7.69, "learning_rate": 3.0769230769230774e-05, "loss": 1.4669, "step": 6500 }, { "epoch": 7.75, "learning_rate": 3.062130177514793e-05, "loss": 1.4671, "step": 6550 }, { "epoch": 7.81, "learning_rate": 3.047337278106509e-05, "loss": 1.4647, "step": 6600 }, { "epoch": 7.87, "learning_rate": 3.032544378698225e-05, "loss": 1.4669, "step": 6650 }, { "epoch": 7.93, "learning_rate": 3.017751479289941e-05, "loss": 1.4637, "step": 6700 }, { "epoch": 7.99, "learning_rate": 3.0029585798816566e-05, "loss": 1.4706, "step": 6750 }, { "epoch": 8.05, "learning_rate": 2.9881656804733733e-05, "loss": 1.4526, "step": 6800 }, { "epoch": 8.11, "learning_rate": 2.973372781065089e-05, "loss": 1.4544, "step": 6850 }, { "epoch": 8.17, "learning_rate": 2.958579881656805e-05, "loss": 1.4539, "step": 6900 }, { "epoch": 8.22, "learning_rate": 2.9437869822485207e-05, "loss": 1.4521, "step": 6950 }, { "epoch": 8.28, "learning_rate": 2.9289940828402368e-05, "loss": 1.4562, "step": 7000 }, { "epoch": 8.34, "learning_rate": 2.9142011834319528e-05, "loss": 1.4511, "step": 7050 }, { "epoch": 8.4, "learning_rate": 2.8994082840236685e-05, "loss": 1.4522, "step": 7100 }, { "epoch": 8.46, "learning_rate": 2.8846153846153845e-05, "loss": 1.4518, "step": 7150 }, { "epoch": 8.52, "learning_rate": 2.869822485207101e-05, "loss": 1.4501, "step": 7200 }, { "epoch": 8.58, "learning_rate": 2.855029585798817e-05, "loss": 1.4577, "step": 7250 }, { "epoch": 8.64, "learning_rate": 2.8402366863905327e-05, "loss": 1.4475, "step": 7300 }, { "epoch": 8.7, "learning_rate": 2.8254437869822487e-05, "loss": 1.4528, "step": 7350 }, { "epoch": 8.76, "learning_rate": 2.8106508875739644e-05, "loss": 1.4495, "step": 7400 }, { "epoch": 8.82, "learning_rate": 2.7958579881656804e-05, "loss": 1.4552, "step": 7450 }, { "epoch": 8.88, "learning_rate": 2.7810650887573965e-05, "loss": 1.4544, "step": 7500 }, { "epoch": 8.93, "learning_rate": 2.766272189349113e-05, "loss": 1.4512, "step": 7550 }, { "epoch": 8.99, "learning_rate": 2.751479289940829e-05, "loss": 1.4531, "step": 7600 }, { "epoch": 9.05, "learning_rate": 2.7366863905325446e-05, "loss": 1.4462, "step": 7650 }, { "epoch": 9.11, "learning_rate": 2.7218934911242606e-05, "loss": 1.4399, "step": 7700 }, { "epoch": 9.17, "learning_rate": 2.7071005917159763e-05, "loss": 1.4394, "step": 7750 }, { "epoch": 9.23, "learning_rate": 2.6923076923076923e-05, "loss": 1.4366, "step": 7800 }, { "epoch": 9.29, "learning_rate": 2.6775147928994084e-05, "loss": 1.4416, "step": 7850 }, { "epoch": 9.35, "learning_rate": 2.6627218934911247e-05, "loss": 1.4405, "step": 7900 }, { "epoch": 9.41, "learning_rate": 2.6479289940828404e-05, "loss": 1.4387, "step": 7950 }, { "epoch": 9.47, "learning_rate": 2.6331360946745565e-05, "loss": 1.4418, "step": 8000 }, { "epoch": 9.53, "learning_rate": 2.6183431952662725e-05, "loss": 1.4351, "step": 8050 }, { "epoch": 9.59, "learning_rate": 2.6035502958579882e-05, "loss": 1.4399, "step": 8100 }, { "epoch": 9.64, "learning_rate": 2.5887573964497042e-05, "loss": 1.4389, "step": 8150 }, { "epoch": 9.7, "learning_rate": 2.57396449704142e-05, "loss": 1.4382, "step": 8200 }, { "epoch": 9.76, "learning_rate": 2.559171597633136e-05, "loss": 1.4391, "step": 8250 }, { "epoch": 9.82, "learning_rate": 2.5443786982248524e-05, "loss": 1.4429, "step": 8300 }, { "epoch": 9.88, "learning_rate": 2.5295857988165684e-05, "loss": 1.4393, "step": 8350 }, { "epoch": 9.94, "learning_rate": 2.514792899408284e-05, "loss": 1.4407, "step": 8400 }, { "epoch": 10.0, "learning_rate": 2.5e-05, "loss": 1.4418, "step": 8450 }, { "epoch": 10.06, "learning_rate": 2.485207100591716e-05, "loss": 1.4246, "step": 8500 }, { "epoch": 10.12, "learning_rate": 2.4704142011834322e-05, "loss": 1.4319, "step": 8550 }, { "epoch": 10.18, "learning_rate": 2.4556213017751482e-05, "loss": 1.4263, "step": 8600 }, { "epoch": 10.24, "learning_rate": 2.440828402366864e-05, "loss": 1.4334, "step": 8650 }, { "epoch": 10.3, "learning_rate": 2.42603550295858e-05, "loss": 1.4287, "step": 8700 }, { "epoch": 10.36, "learning_rate": 2.411242603550296e-05, "loss": 1.4289, "step": 8750 }, { "epoch": 10.41, "learning_rate": 2.396449704142012e-05, "loss": 1.4285, "step": 8800 }, { "epoch": 10.47, "learning_rate": 2.3816568047337277e-05, "loss": 1.4286, "step": 8850 }, { "epoch": 10.53, "learning_rate": 2.3668639053254438e-05, "loss": 1.4273, "step": 8900 }, { "epoch": 10.59, "learning_rate": 2.3520710059171598e-05, "loss": 1.4291, "step": 8950 }, { "epoch": 10.65, "learning_rate": 2.337278106508876e-05, "loss": 1.4268, "step": 9000 }, { "epoch": 10.71, "learning_rate": 2.322485207100592e-05, "loss": 1.4302, "step": 9050 }, { "epoch": 10.77, "learning_rate": 2.307692307692308e-05, "loss": 1.4313, "step": 9100 }, { "epoch": 10.83, "learning_rate": 2.292899408284024e-05, "loss": 1.4274, "step": 9150 }, { "epoch": 10.89, "learning_rate": 2.2781065088757396e-05, "loss": 1.4286, "step": 9200 }, { "epoch": 10.95, "learning_rate": 2.2633136094674557e-05, "loss": 1.4278, "step": 9250 }, { "epoch": 11.01, "learning_rate": 2.2485207100591717e-05, "loss": 1.4265, "step": 9300 }, { "epoch": 11.07, "learning_rate": 2.2337278106508877e-05, "loss": 1.4156, "step": 9350 }, { "epoch": 11.12, "learning_rate": 2.2189349112426034e-05, "loss": 1.4174, "step": 9400 }, { "epoch": 11.18, "learning_rate": 2.2041420118343195e-05, "loss": 1.4161, "step": 9450 }, { "epoch": 11.24, "learning_rate": 2.189349112426036e-05, "loss": 1.4216, "step": 9500 }, { "epoch": 11.3, "learning_rate": 2.1745562130177516e-05, "loss": 1.4167, "step": 9550 }, { "epoch": 11.36, "learning_rate": 2.1597633136094676e-05, "loss": 1.4188, "step": 9600 }, { "epoch": 11.42, "learning_rate": 2.1449704142011836e-05, "loss": 1.4242, "step": 9650 }, { "epoch": 11.48, "learning_rate": 2.1301775147928997e-05, "loss": 1.4177, "step": 9700 }, { "epoch": 11.54, "learning_rate": 2.1153846153846154e-05, "loss": 1.4188, "step": 9750 }, { "epoch": 11.6, "learning_rate": 2.1005917159763314e-05, "loss": 1.4202, "step": 9800 }, { "epoch": 11.66, "learning_rate": 2.0857988165680474e-05, "loss": 1.4209, "step": 9850 }, { "epoch": 11.72, "learning_rate": 2.0710059171597635e-05, "loss": 1.4155, "step": 9900 }, { "epoch": 11.78, "learning_rate": 2.0562130177514795e-05, "loss": 1.4214, "step": 9950 }, { "epoch": 11.83, "learning_rate": 2.0414201183431952e-05, "loss": 1.4201, "step": 10000 }, { "epoch": 11.89, "learning_rate": 2.0266272189349116e-05, "loss": 1.4175, "step": 10050 }, { "epoch": 11.95, "learning_rate": 2.0118343195266273e-05, "loss": 1.4171, "step": 10100 }, { "epoch": 12.01, "learning_rate": 1.9970414201183433e-05, "loss": 1.4188, "step": 10150 }, { "epoch": 12.07, "learning_rate": 1.9822485207100593e-05, "loss": 1.4123, "step": 10200 }, { "epoch": 12.13, "learning_rate": 1.9674556213017754e-05, "loss": 1.4079, "step": 10250 }, { "epoch": 12.19, "learning_rate": 1.952662721893491e-05, "loss": 1.4057, "step": 10300 }, { "epoch": 12.25, "learning_rate": 1.937869822485207e-05, "loss": 1.4053, "step": 10350 }, { "epoch": 12.31, "learning_rate": 1.923076923076923e-05, "loss": 1.41, "step": 10400 }, { "epoch": 12.37, "learning_rate": 1.9082840236686392e-05, "loss": 1.4113, "step": 10450 }, { "epoch": 12.43, "learning_rate": 1.8934911242603552e-05, "loss": 1.4073, "step": 10500 }, { "epoch": 12.49, "learning_rate": 1.878698224852071e-05, "loss": 1.416, "step": 10550 }, { "epoch": 12.54, "learning_rate": 1.8639053254437873e-05, "loss": 1.4113, "step": 10600 }, { "epoch": 12.6, "learning_rate": 1.849112426035503e-05, "loss": 1.4095, "step": 10650 }, { "epoch": 12.66, "learning_rate": 1.834319526627219e-05, "loss": 1.4056, "step": 10700 }, { "epoch": 12.72, "learning_rate": 1.819526627218935e-05, "loss": 1.4081, "step": 10750 }, { "epoch": 12.78, "learning_rate": 1.804733727810651e-05, "loss": 1.4123, "step": 10800 }, { "epoch": 12.84, "learning_rate": 1.7899408284023668e-05, "loss": 1.4106, "step": 10850 }, { "epoch": 12.9, "learning_rate": 1.7751479289940828e-05, "loss": 1.4123, "step": 10900 }, { "epoch": 12.96, "learning_rate": 1.760355029585799e-05, "loss": 1.4109, "step": 10950 }, { "epoch": 13.02, "learning_rate": 1.745562130177515e-05, "loss": 1.4044, "step": 11000 }, { "epoch": 13.08, "learning_rate": 1.730769230769231e-05, "loss": 1.4006, "step": 11050 }, { "epoch": 13.14, "learning_rate": 1.7159763313609466e-05, "loss": 1.4004, "step": 11100 }, { "epoch": 13.2, "learning_rate": 1.701183431952663e-05, "loss": 1.3985, "step": 11150 }, { "epoch": 13.25, "learning_rate": 1.6863905325443787e-05, "loss": 1.4041, "step": 11200 }, { "epoch": 13.31, "learning_rate": 1.6715976331360947e-05, "loss": 1.4045, "step": 11250 }, { "epoch": 13.37, "learning_rate": 1.6568047337278108e-05, "loss": 1.4003, "step": 11300 }, { "epoch": 13.43, "learning_rate": 1.6420118343195268e-05, "loss": 1.3994, "step": 11350 }, { "epoch": 13.49, "learning_rate": 1.6272189349112425e-05, "loss": 1.403, "step": 11400 }, { "epoch": 13.55, "learning_rate": 1.6124260355029585e-05, "loss": 1.4027, "step": 11450 }, { "epoch": 13.61, "learning_rate": 1.5976331360946746e-05, "loss": 1.4023, "step": 11500 }, { "epoch": 13.67, "learning_rate": 1.5828402366863906e-05, "loss": 1.4011, "step": 11550 }, { "epoch": 13.73, "learning_rate": 1.5680473372781066e-05, "loss": 1.3999, "step": 11600 }, { "epoch": 13.79, "learning_rate": 1.5532544378698223e-05, "loss": 1.4028, "step": 11650 }, { "epoch": 13.85, "learning_rate": 1.5384615384615387e-05, "loss": 1.404, "step": 11700 }, { "epoch": 13.91, "learning_rate": 1.5236686390532546e-05, "loss": 1.4037, "step": 11750 }, { "epoch": 13.96, "learning_rate": 1.5088757396449705e-05, "loss": 1.4025, "step": 11800 }, { "epoch": 14.02, "learning_rate": 1.4940828402366867e-05, "loss": 1.4015, "step": 11850 }, { "epoch": 14.08, "learning_rate": 1.4792899408284025e-05, "loss": 1.3906, "step": 11900 }, { "epoch": 14.14, "learning_rate": 1.4644970414201184e-05, "loss": 1.3972, "step": 11950 }, { "epoch": 14.2, "learning_rate": 1.4497041420118343e-05, "loss": 1.3938, "step": 12000 }, { "epoch": 14.26, "learning_rate": 1.4349112426035505e-05, "loss": 1.3925, "step": 12050 }, { "epoch": 14.32, "learning_rate": 1.4201183431952663e-05, "loss": 1.3954, "step": 12100 }, { "epoch": 14.38, "learning_rate": 1.4053254437869822e-05, "loss": 1.3917, "step": 12150 }, { "epoch": 14.44, "learning_rate": 1.3905325443786982e-05, "loss": 1.3955, "step": 12200 }, { "epoch": 14.5, "learning_rate": 1.3757396449704144e-05, "loss": 1.3981, "step": 12250 }, { "epoch": 14.56, "learning_rate": 1.3609467455621303e-05, "loss": 1.3969, "step": 12300 }, { "epoch": 14.62, "learning_rate": 1.3461538461538462e-05, "loss": 1.3936, "step": 12350 }, { "epoch": 14.67, "learning_rate": 1.3313609467455624e-05, "loss": 1.3959, "step": 12400 }, { "epoch": 14.73, "learning_rate": 1.3165680473372782e-05, "loss": 1.3921, "step": 12450 }, { "epoch": 14.79, "learning_rate": 1.3017751479289941e-05, "loss": 1.396, "step": 12500 }, { "epoch": 14.85, "learning_rate": 1.28698224852071e-05, "loss": 1.4021, "step": 12550 }, { "epoch": 14.91, "learning_rate": 1.2721893491124262e-05, "loss": 1.3903, "step": 12600 }, { "epoch": 14.97, "learning_rate": 1.257396449704142e-05, "loss": 1.3984, "step": 12650 }, { "epoch": 15.03, "learning_rate": 1.242603550295858e-05, "loss": 1.3901, "step": 12700 }, { "epoch": 15.09, "learning_rate": 1.2278106508875741e-05, "loss": 1.3892, "step": 12750 }, { "epoch": 15.15, "learning_rate": 1.21301775147929e-05, "loss": 1.3915, "step": 12800 }, { "epoch": 15.21, "learning_rate": 1.198224852071006e-05, "loss": 1.3855, "step": 12850 }, { "epoch": 15.27, "learning_rate": 1.1834319526627219e-05, "loss": 1.3946, "step": 12900 }, { "epoch": 15.33, "learning_rate": 1.168639053254438e-05, "loss": 1.3925, "step": 12950 }, { "epoch": 15.38, "learning_rate": 1.153846153846154e-05, "loss": 1.3904, "step": 13000 }, { "epoch": 15.44, "learning_rate": 1.1390532544378698e-05, "loss": 1.3887, "step": 13050 }, { "epoch": 15.5, "learning_rate": 1.1242603550295859e-05, "loss": 1.3902, "step": 13100 }, { "epoch": 15.56, "learning_rate": 1.1094674556213017e-05, "loss": 1.3915, "step": 13150 }, { "epoch": 15.62, "learning_rate": 1.094674556213018e-05, "loss": 1.3895, "step": 13200 }, { "epoch": 15.68, "learning_rate": 1.0798816568047338e-05, "loss": 1.3852, "step": 13250 }, { "epoch": 15.74, "learning_rate": 1.0650887573964498e-05, "loss": 1.3863, "step": 13300 }, { "epoch": 15.8, "learning_rate": 1.0502958579881657e-05, "loss": 1.3872, "step": 13350 }, { "epoch": 15.86, "learning_rate": 1.0355029585798817e-05, "loss": 1.3875, "step": 13400 }, { "epoch": 15.92, "learning_rate": 1.0207100591715976e-05, "loss": 1.3919, "step": 13450 }, { "epoch": 15.98, "learning_rate": 1.0059171597633136e-05, "loss": 1.3883, "step": 13500 }, { "epoch": 16.04, "learning_rate": 9.911242603550297e-06, "loss": 1.3864, "step": 13550 }, { "epoch": 16.09, "learning_rate": 9.763313609467455e-06, "loss": 1.3801, "step": 13600 }, { "epoch": 16.15, "learning_rate": 9.615384615384616e-06, "loss": 1.3839, "step": 13650 }, { "epoch": 16.21, "learning_rate": 9.467455621301776e-06, "loss": 1.3796, "step": 13700 }, { "epoch": 16.27, "learning_rate": 9.319526627218936e-06, "loss": 1.3852, "step": 13750 }, { "epoch": 16.33, "learning_rate": 9.171597633136095e-06, "loss": 1.384, "step": 13800 }, { "epoch": 16.39, "learning_rate": 9.023668639053255e-06, "loss": 1.3887, "step": 13850 }, { "epoch": 16.45, "learning_rate": 8.875739644970414e-06, "loss": 1.3861, "step": 13900 }, { "epoch": 16.51, "learning_rate": 8.727810650887574e-06, "loss": 1.3819, "step": 13950 }, { "epoch": 16.57, "learning_rate": 8.579881656804733e-06, "loss": 1.3845, "step": 14000 }, { "epoch": 16.63, "learning_rate": 8.431952662721893e-06, "loss": 1.3875, "step": 14050 }, { "epoch": 16.69, "learning_rate": 8.284023668639054e-06, "loss": 1.3842, "step": 14100 }, { "epoch": 16.75, "learning_rate": 8.136094674556213e-06, "loss": 1.3803, "step": 14150 }, { "epoch": 16.8, "learning_rate": 7.988165680473373e-06, "loss": 1.3822, "step": 14200 }, { "epoch": 16.86, "learning_rate": 7.840236686390533e-06, "loss": 1.3879, "step": 14250 }, { "epoch": 16.92, "learning_rate": 7.692307692307694e-06, "loss": 1.3829, "step": 14300 }, { "epoch": 16.98, "learning_rate": 7.544378698224852e-06, "loss": 1.3861, "step": 14350 }, { "epoch": 17.04, "learning_rate": 7.396449704142013e-06, "loss": 1.3802, "step": 14400 }, { "epoch": 17.1, "learning_rate": 7.248520710059171e-06, "loss": 1.3734, "step": 14450 }, { "epoch": 17.16, "learning_rate": 7.100591715976332e-06, "loss": 1.3826, "step": 14500 }, { "epoch": 17.22, "learning_rate": 6.952662721893491e-06, "loss": 1.3858, "step": 14550 }, { "epoch": 17.28, "learning_rate": 6.8047337278106515e-06, "loss": 1.3781, "step": 14600 }, { "epoch": 17.34, "learning_rate": 6.656804733727812e-06, "loss": 1.3831, "step": 14650 }, { "epoch": 17.4, "learning_rate": 6.5088757396449705e-06, "loss": 1.3755, "step": 14700 }, { "epoch": 17.46, "learning_rate": 6.360946745562131e-06, "loss": 1.3841, "step": 14750 }, { "epoch": 17.51, "learning_rate": 6.21301775147929e-06, "loss": 1.3796, "step": 14800 }, { "epoch": 17.57, "learning_rate": 6.06508875739645e-06, "loss": 1.3799, "step": 14850 }, { "epoch": 17.63, "learning_rate": 5.917159763313609e-06, "loss": 1.3814, "step": 14900 }, { "epoch": 17.69, "learning_rate": 5.76923076923077e-06, "loss": 1.3783, "step": 14950 }, { "epoch": 17.75, "learning_rate": 5.621301775147929e-06, "loss": 1.378, "step": 15000 }, { "epoch": 17.81, "learning_rate": 5.47337278106509e-06, "loss": 1.379, "step": 15050 }, { "epoch": 17.87, "learning_rate": 5.325443786982249e-06, "loss": 1.3806, "step": 15100 }, { "epoch": 17.93, "learning_rate": 5.177514792899409e-06, "loss": 1.3806, "step": 15150 }, { "epoch": 17.99, "learning_rate": 5.029585798816568e-06, "loss": 1.3809, "step": 15200 }, { "epoch": 18.05, "learning_rate": 4.881656804733728e-06, "loss": 1.3748, "step": 15250 }, { "epoch": 18.11, "learning_rate": 4.733727810650888e-06, "loss": 1.38, "step": 15300 }, { "epoch": 18.17, "learning_rate": 4.5857988165680475e-06, "loss": 1.3729, "step": 15350 }, { "epoch": 18.22, "learning_rate": 4.437869822485207e-06, "loss": 1.3798, "step": 15400 }, { "epoch": 18.28, "learning_rate": 4.2899408284023666e-06, "loss": 1.3779, "step": 15450 }, { "epoch": 18.34, "learning_rate": 4.142011834319527e-06, "loss": 1.377, "step": 15500 }, { "epoch": 18.4, "learning_rate": 3.9940828402366864e-06, "loss": 1.3722, "step": 15550 }, { "epoch": 18.46, "learning_rate": 3.846153846153847e-06, "loss": 1.377, "step": 15600 }, { "epoch": 18.52, "learning_rate": 3.6982248520710063e-06, "loss": 1.3774, "step": 15650 }, { "epoch": 18.58, "learning_rate": 3.550295857988166e-06, "loss": 1.3803, "step": 15700 }, { "epoch": 18.64, "learning_rate": 3.4023668639053257e-06, "loss": 1.3754, "step": 15750 }, { "epoch": 18.7, "learning_rate": 3.2544378698224853e-06, "loss": 1.3825, "step": 15800 }, { "epoch": 18.76, "learning_rate": 3.106508875739645e-06, "loss": 1.3737, "step": 15850 }, { "epoch": 18.82, "learning_rate": 2.9585798816568047e-06, "loss": 1.3765, "step": 15900 }, { "epoch": 18.88, "learning_rate": 2.8106508875739646e-06, "loss": 1.3763, "step": 15950 }, { "epoch": 18.93, "learning_rate": 2.6627218934911246e-06, "loss": 1.3733, "step": 16000 }, { "epoch": 18.99, "learning_rate": 2.514792899408284e-06, "loss": 1.3763, "step": 16050 }, { "epoch": 19.05, "learning_rate": 2.366863905325444e-06, "loss": 1.3739, "step": 16100 }, { "epoch": 19.11, "learning_rate": 2.2189349112426035e-06, "loss": 1.3749, "step": 16150 }, { "epoch": 19.17, "learning_rate": 2.0710059171597635e-06, "loss": 1.3695, "step": 16200 }, { "epoch": 19.23, "learning_rate": 1.9230769230769234e-06, "loss": 1.3725, "step": 16250 }, { "epoch": 19.29, "learning_rate": 1.775147928994083e-06, "loss": 1.373, "step": 16300 }, { "epoch": 19.35, "learning_rate": 1.6272189349112426e-06, "loss": 1.3751, "step": 16350 }, { "epoch": 19.41, "learning_rate": 1.4792899408284024e-06, "loss": 1.375, "step": 16400 }, { "epoch": 19.47, "learning_rate": 1.3313609467455623e-06, "loss": 1.3767, "step": 16450 }, { "epoch": 19.53, "learning_rate": 1.183431952662722e-06, "loss": 1.369, "step": 16500 }, { "epoch": 19.59, "learning_rate": 1.0355029585798817e-06, "loss": 1.3773, "step": 16550 }, { "epoch": 19.64, "learning_rate": 8.875739644970415e-07, "loss": 1.3749, "step": 16600 }, { "epoch": 19.7, "learning_rate": 7.396449704142012e-07, "loss": 1.3725, "step": 16650 }, { "epoch": 19.76, "learning_rate": 5.91715976331361e-07, "loss": 1.374, "step": 16700 }, { "epoch": 19.82, "learning_rate": 4.4378698224852073e-07, "loss": 1.3735, "step": 16750 }, { "epoch": 19.88, "learning_rate": 2.958579881656805e-07, "loss": 1.3759, "step": 16800 }, { "epoch": 19.94, "learning_rate": 1.4792899408284025e-07, "loss": 1.3747, "step": 16850 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 1.3793, "step": 16900 }, { "epoch": 20.0, "step": 16900, "total_flos": 6.27801669500928e+16, "train_loss": 1.4740209875163242, "train_runtime": 8294.5138, "train_samples_per_second": 260.678, "train_steps_per_second": 2.037 } ], "max_steps": 16900, "num_train_epochs": 20, "total_flos": 6.27801669500928e+16, "trial_name": null, "trial_params": null }