{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 184555, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.9865351792148686e-05, "loss": 8.822, "step": 500 }, { "epoch": 0.03, "learning_rate": 4.9729890818455207e-05, "loss": 8.316, "step": 1000 }, { "epoch": 0.04, "learning_rate": 4.959442984476173e-05, "loss": 8.2016, "step": 1500 }, { "epoch": 0.05, "learning_rate": 4.945896887106825e-05, "loss": 8.095, "step": 2000 }, { "epoch": 0.07, "learning_rate": 4.9323507897374774e-05, "loss": 7.977, "step": 2500 }, { "epoch": 0.08, "learning_rate": 4.9188046923681294e-05, "loss": 7.9151, "step": 3000 }, { "epoch": 0.09, "learning_rate": 4.9052585949987814e-05, "loss": 7.8304, "step": 3500 }, { "epoch": 0.11, "learning_rate": 4.8917124976294334e-05, "loss": 7.7661, "step": 4000 }, { "epoch": 0.12, "learning_rate": 4.8781664002600854e-05, "loss": 7.6552, "step": 4500 }, { "epoch": 0.14, "learning_rate": 4.8646203028907374e-05, "loss": 7.5958, "step": 5000 }, { "epoch": 0.15, "learning_rate": 4.8510742055213895e-05, "loss": 7.5298, "step": 5500 }, { "epoch": 0.16, "learning_rate": 4.837528108152042e-05, "loss": 7.4355, "step": 6000 }, { "epoch": 0.18, "learning_rate": 4.824036195172171e-05, "loss": 7.3976, "step": 6500 }, { "epoch": 0.19, "learning_rate": 4.810490097802823e-05, "loss": 7.3092, "step": 7000 }, { "epoch": 0.2, "learning_rate": 4.796971092628214e-05, "loss": 7.2767, "step": 7500 }, { "epoch": 0.22, "learning_rate": 4.783424995258866e-05, "loss": 7.2137, "step": 8000 }, { "epoch": 0.23, "learning_rate": 4.769878897889518e-05, "loss": 7.1265, "step": 8500 }, { "epoch": 0.24, "learning_rate": 4.75633280052017e-05, "loss": 7.0906, "step": 9000 }, { "epoch": 0.26, "learning_rate": 4.7427867031508226e-05, "loss": 7.0238, "step": 9500 }, { "epoch": 0.27, "learning_rate": 4.7292406057814747e-05, "loss": 6.9584, "step": 10000 }, { "epoch": 0.28, "learning_rate": 4.715694508412127e-05, "loss": 6.9311, "step": 10500 }, { "epoch": 0.3, "learning_rate": 4.702148411042779e-05, "loss": 6.8991, "step": 11000 }, { "epoch": 0.31, "learning_rate": 4.688602313673431e-05, "loss": 6.8189, "step": 11500 }, { "epoch": 0.33, "learning_rate": 4.675083308498821e-05, "loss": 6.7935, "step": 12000 }, { "epoch": 0.34, "learning_rate": 4.661537211129473e-05, "loss": 6.7746, "step": 12500 }, { "epoch": 0.35, "learning_rate": 4.647991113760126e-05, "loss": 6.6974, "step": 13000 }, { "epoch": 0.37, "learning_rate": 4.634445016390778e-05, "loss": 6.7058, "step": 13500 }, { "epoch": 0.38, "learning_rate": 4.62089891902143e-05, "loss": 6.658, "step": 14000 }, { "epoch": 0.39, "learning_rate": 4.607352821652082e-05, "loss": 6.6636, "step": 14500 }, { "epoch": 0.41, "learning_rate": 4.593806724282734e-05, "loss": 6.6163, "step": 15000 }, { "epoch": 0.42, "learning_rate": 4.580260626913386e-05, "loss": 6.5785, "step": 15500 }, { "epoch": 0.43, "learning_rate": 4.566714529544038e-05, "loss": 6.5564, "step": 16000 }, { "epoch": 0.45, "learning_rate": 4.5531955243694293e-05, "loss": 6.5223, "step": 16500 }, { "epoch": 0.46, "learning_rate": 4.5396494270000814e-05, "loss": 6.5068, "step": 17000 }, { "epoch": 0.47, "learning_rate": 4.5261033296307334e-05, "loss": 6.4513, "step": 17500 }, { "epoch": 0.49, "learning_rate": 4.5125572322613854e-05, "loss": 6.4563, "step": 18000 }, { "epoch": 0.5, "learning_rate": 4.499011134892038e-05, "loss": 6.4462, "step": 18500 }, { "epoch": 0.51, "learning_rate": 4.485519221912167e-05, "loss": 6.4118, "step": 19000 }, { "epoch": 0.53, "learning_rate": 4.471973124542819e-05, "loss": 6.3876, "step": 19500 }, { "epoch": 0.54, "learning_rate": 4.458427027173471e-05, "loss": 6.3519, "step": 20000 }, { "epoch": 0.56, "learning_rate": 4.444880929804124e-05, "loss": 6.3444, "step": 20500 }, { "epoch": 0.57, "learning_rate": 4.431334832434776e-05, "loss": 6.323, "step": 21000 }, { "epoch": 0.58, "learning_rate": 4.417788735065428e-05, "loss": 6.3065, "step": 21500 }, { "epoch": 0.6, "learning_rate": 4.40424263769608e-05, "loss": 6.2821, "step": 22000 }, { "epoch": 0.61, "learning_rate": 4.390696540326732e-05, "loss": 6.2683, "step": 22500 }, { "epoch": 0.62, "learning_rate": 4.377150442957384e-05, "loss": 6.2759, "step": 23000 }, { "epoch": 0.64, "learning_rate": 4.3636314377827746e-05, "loss": 6.2454, "step": 23500 }, { "epoch": 0.65, "learning_rate": 4.350085340413427e-05, "loss": 6.195, "step": 24000 }, { "epoch": 0.66, "learning_rate": 4.336539243044079e-05, "loss": 6.1899, "step": 24500 }, { "epoch": 0.68, "learning_rate": 4.322993145674731e-05, "loss": 6.1767, "step": 25000 }, { "epoch": 0.69, "learning_rate": 4.3094470483053833e-05, "loss": 6.1696, "step": 25500 }, { "epoch": 0.7, "learning_rate": 4.295928043130774e-05, "loss": 6.1583, "step": 26000 }, { "epoch": 0.72, "learning_rate": 4.282381945761426e-05, "loss": 6.1395, "step": 26500 }, { "epoch": 0.73, "learning_rate": 4.268835848392078e-05, "loss": 6.0983, "step": 27000 }, { "epoch": 0.75, "learning_rate": 4.2552897510227306e-05, "loss": 6.0714, "step": 27500 }, { "epoch": 0.76, "learning_rate": 4.241743653653383e-05, "loss": 6.0803, "step": 28000 }, { "epoch": 0.77, "learning_rate": 4.228197556284035e-05, "loss": 6.0694, "step": 28500 }, { "epoch": 0.79, "learning_rate": 4.214678551109425e-05, "loss": 6.0678, "step": 29000 }, { "epoch": 0.8, "learning_rate": 4.201132453740077e-05, "loss": 6.0277, "step": 29500 }, { "epoch": 0.81, "learning_rate": 4.187586356370729e-05, "loss": 6.0379, "step": 30000 }, { "epoch": 0.83, "learning_rate": 4.174040259001381e-05, "loss": 5.9916, "step": 30500 }, { "epoch": 0.84, "learning_rate": 4.160521253826773e-05, "loss": 6.0088, "step": 31000 }, { "epoch": 0.85, "learning_rate": 4.146975156457425e-05, "loss": 5.9999, "step": 31500 }, { "epoch": 0.87, "learning_rate": 4.133429059088077e-05, "loss": 6.0088, "step": 32000 }, { "epoch": 0.88, "learning_rate": 4.119882961718729e-05, "loss": 5.962, "step": 32500 }, { "epoch": 0.89, "learning_rate": 4.106336864349381e-05, "loss": 5.9289, "step": 33000 }, { "epoch": 0.91, "learning_rate": 4.092790766980033e-05, "loss": 5.9553, "step": 33500 }, { "epoch": 0.92, "learning_rate": 4.079271761805424e-05, "loss": 5.9288, "step": 34000 }, { "epoch": 0.93, "learning_rate": 4.065752756630815e-05, "loss": 5.9114, "step": 34500 }, { "epoch": 0.95, "learning_rate": 4.052206659261467e-05, "loss": 5.9116, "step": 35000 }, { "epoch": 0.96, "learning_rate": 4.038687654086858e-05, "loss": 5.9009, "step": 35500 }, { "epoch": 0.98, "learning_rate": 4.02514155671751e-05, "loss": 5.8766, "step": 36000 }, { "epoch": 0.99, "learning_rate": 4.011595459348162e-05, "loss": 5.8671, "step": 36500 }, { "epoch": 1.0, "learning_rate": 3.998049361978814e-05, "loss": 5.8468, "step": 37000 }, { "epoch": 1.02, "learning_rate": 3.984503264609466e-05, "loss": 5.8158, "step": 37500 }, { "epoch": 1.03, "learning_rate": 3.9709571672401185e-05, "loss": 5.8173, "step": 38000 }, { "epoch": 1.04, "learning_rate": 3.9574110698707705e-05, "loss": 5.8186, "step": 38500 }, { "epoch": 1.06, "learning_rate": 3.9438649725014225e-05, "loss": 5.7901, "step": 39000 }, { "epoch": 1.07, "learning_rate": 3.9303188751320746e-05, "loss": 5.7937, "step": 39500 }, { "epoch": 1.08, "learning_rate": 3.9167727777627266e-05, "loss": 5.7851, "step": 40000 }, { "epoch": 1.1, "learning_rate": 3.9032266803933786e-05, "loss": 5.7665, "step": 40500 }, { "epoch": 1.11, "learning_rate": 3.8896805830240306e-05, "loss": 5.7412, "step": 41000 }, { "epoch": 1.12, "learning_rate": 3.876134485654683e-05, "loss": 5.7565, "step": 41500 }, { "epoch": 1.14, "learning_rate": 3.862615480480074e-05, "loss": 5.7464, "step": 42000 }, { "epoch": 1.15, "learning_rate": 3.849069383110726e-05, "loss": 5.754, "step": 42500 }, { "epoch": 1.16, "learning_rate": 3.835523285741378e-05, "loss": 5.7239, "step": 43000 }, { "epoch": 1.18, "learning_rate": 3.82197718837203e-05, "loss": 5.7001, "step": 43500 }, { "epoch": 1.19, "learning_rate": 3.808431091002682e-05, "loss": 5.7204, "step": 44000 }, { "epoch": 1.21, "learning_rate": 3.794912085828073e-05, "loss": 5.7096, "step": 44500 }, { "epoch": 1.22, "learning_rate": 3.781365988458725e-05, "loss": 5.7, "step": 45000 }, { "epoch": 1.23, "learning_rate": 3.767819891089377e-05, "loss": 5.6793, "step": 45500 }, { "epoch": 1.25, "learning_rate": 3.754273793720029e-05, "loss": 5.6423, "step": 46000 }, { "epoch": 1.26, "learning_rate": 3.740727696350681e-05, "loss": 5.6658, "step": 46500 }, { "epoch": 1.27, "learning_rate": 3.7272086911760725e-05, "loss": 5.6413, "step": 47000 }, { "epoch": 1.29, "learning_rate": 3.7136625938067245e-05, "loss": 5.6256, "step": 47500 }, { "epoch": 1.3, "learning_rate": 3.7001164964373765e-05, "loss": 5.6421, "step": 48000 }, { "epoch": 1.31, "learning_rate": 3.686570399068029e-05, "loss": 5.6292, "step": 48500 }, { "epoch": 1.33, "learning_rate": 3.67305139389342e-05, "loss": 5.6188, "step": 49000 }, { "epoch": 1.34, "learning_rate": 3.659505296524072e-05, "loss": 5.6004, "step": 49500 }, { "epoch": 1.35, "learning_rate": 3.645959199154724e-05, "loss": 5.6285, "step": 50000 }, { "epoch": 1.37, "learning_rate": 3.632413101785376e-05, "loss": 5.5817, "step": 50500 }, { "epoch": 1.38, "learning_rate": 3.618867004416028e-05, "loss": 5.6021, "step": 51000 }, { "epoch": 1.4, "learning_rate": 3.60532090704668e-05, "loss": 5.5992, "step": 51500 }, { "epoch": 1.41, "learning_rate": 3.5917748096773326e-05, "loss": 5.5771, "step": 52000 }, { "epoch": 1.42, "learning_rate": 3.578255804502723e-05, "loss": 5.5678, "step": 52500 }, { "epoch": 1.44, "learning_rate": 3.564709707133375e-05, "loss": 5.5662, "step": 53000 }, { "epoch": 1.45, "learning_rate": 3.551163609764027e-05, "loss": 5.5427, "step": 53500 }, { "epoch": 1.46, "learning_rate": 3.537617512394679e-05, "loss": 5.5281, "step": 54000 }, { "epoch": 1.48, "learning_rate": 3.52409850722007e-05, "loss": 5.5256, "step": 54500 }, { "epoch": 1.49, "learning_rate": 3.510552409850722e-05, "loss": 5.52, "step": 55000 }, { "epoch": 1.5, "learning_rate": 3.4970063124813745e-05, "loss": 5.5423, "step": 55500 }, { "epoch": 1.52, "learning_rate": 3.4834602151120265e-05, "loss": 5.5106, "step": 56000 }, { "epoch": 1.53, "learning_rate": 3.4699141177426785e-05, "loss": 5.5255, "step": 56500 }, { "epoch": 1.54, "learning_rate": 3.4563680203733305e-05, "loss": 5.5081, "step": 57000 }, { "epoch": 1.56, "learning_rate": 3.4428219230039826e-05, "loss": 5.4839, "step": 57500 }, { "epoch": 1.57, "learning_rate": 3.4292758256346346e-05, "loss": 5.4764, "step": 58000 }, { "epoch": 1.58, "learning_rate": 3.4157839126547644e-05, "loss": 5.478, "step": 58500 }, { "epoch": 1.6, "learning_rate": 3.4022378152854164e-05, "loss": 5.4977, "step": 59000 }, { "epoch": 1.61, "learning_rate": 3.3886917179160684e-05, "loss": 5.4547, "step": 59500 }, { "epoch": 1.63, "learning_rate": 3.3751456205467204e-05, "loss": 5.4656, "step": 60000 }, { "epoch": 1.64, "learning_rate": 3.361599523177373e-05, "loss": 5.4723, "step": 60500 }, { "epoch": 1.65, "learning_rate": 3.348080518002764e-05, "loss": 5.4581, "step": 61000 }, { "epoch": 1.67, "learning_rate": 3.334534420633416e-05, "loss": 5.4571, "step": 61500 }, { "epoch": 1.68, "learning_rate": 3.320988323264068e-05, "loss": 5.4361, "step": 62000 }, { "epoch": 1.69, "learning_rate": 3.3074693180894583e-05, "loss": 5.4347, "step": 62500 }, { "epoch": 1.71, "learning_rate": 3.293923220720111e-05, "loss": 5.4383, "step": 63000 }, { "epoch": 1.72, "learning_rate": 3.280377123350763e-05, "loss": 5.413, "step": 63500 }, { "epoch": 1.73, "learning_rate": 3.266831025981415e-05, "loss": 5.4095, "step": 64000 }, { "epoch": 1.75, "learning_rate": 3.253284928612067e-05, "loss": 5.408, "step": 64500 }, { "epoch": 1.76, "learning_rate": 3.2397659234374577e-05, "loss": 5.3968, "step": 65000 }, { "epoch": 1.77, "learning_rate": 3.22621982606811e-05, "loss": 5.3889, "step": 65500 }, { "epoch": 1.79, "learning_rate": 3.212673728698762e-05, "loss": 5.3855, "step": 66000 }, { "epoch": 1.8, "learning_rate": 3.1991276313294144e-05, "loss": 5.3925, "step": 66500 }, { "epoch": 1.82, "learning_rate": 3.1855815339600664e-05, "loss": 5.3731, "step": 67000 }, { "epoch": 1.83, "learning_rate": 3.1720354365907184e-05, "loss": 5.3555, "step": 67500 }, { "epoch": 1.84, "learning_rate": 3.1584893392213704e-05, "loss": 5.3598, "step": 68000 }, { "epoch": 1.86, "learning_rate": 3.1449432418520224e-05, "loss": 5.3567, "step": 68500 }, { "epoch": 1.87, "learning_rate": 3.1313971444826745e-05, "loss": 5.3744, "step": 69000 }, { "epoch": 1.88, "learning_rate": 3.117878139308065e-05, "loss": 5.3452, "step": 69500 }, { "epoch": 1.9, "learning_rate": 3.104332041938718e-05, "loss": 5.35, "step": 70000 }, { "epoch": 1.91, "learning_rate": 3.09078594456937e-05, "loss": 5.3457, "step": 70500 }, { "epoch": 1.92, "learning_rate": 3.077239847200022e-05, "loss": 5.3314, "step": 71000 }, { "epoch": 1.94, "learning_rate": 3.063693749830674e-05, "loss": 5.3572, "step": 71500 }, { "epoch": 1.95, "learning_rate": 3.0501747446560647e-05, "loss": 5.3013, "step": 72000 }, { "epoch": 1.96, "learning_rate": 3.0366286472867167e-05, "loss": 5.3205, "step": 72500 }, { "epoch": 1.98, "learning_rate": 3.0230825499173687e-05, "loss": 5.3093, "step": 73000 }, { "epoch": 1.99, "learning_rate": 3.0095364525480214e-05, "loss": 5.3443, "step": 73500 }, { "epoch": 2.0, "learning_rate": 2.9959903551786734e-05, "loss": 5.3265, "step": 74000 }, { "epoch": 2.02, "learning_rate": 2.982471350004064e-05, "loss": 5.2934, "step": 74500 }, { "epoch": 2.03, "learning_rate": 2.968952344829455e-05, "loss": 5.2899, "step": 75000 }, { "epoch": 2.05, "learning_rate": 2.955406247460107e-05, "loss": 5.2758, "step": 75500 }, { "epoch": 2.06, "learning_rate": 2.941860150090759e-05, "loss": 5.2977, "step": 76000 }, { "epoch": 2.07, "learning_rate": 2.928314052721411e-05, "loss": 5.2775, "step": 76500 }, { "epoch": 2.09, "learning_rate": 2.9147679553520633e-05, "loss": 5.2554, "step": 77000 }, { "epoch": 2.1, "learning_rate": 2.9012218579827153e-05, "loss": 5.2733, "step": 77500 }, { "epoch": 2.11, "learning_rate": 2.8876757606133674e-05, "loss": 5.2688, "step": 78000 }, { "epoch": 2.13, "learning_rate": 2.8741296632440197e-05, "loss": 5.2627, "step": 78500 }, { "epoch": 2.14, "learning_rate": 2.8605835658746717e-05, "loss": 5.2376, "step": 79000 }, { "epoch": 2.15, "learning_rate": 2.8470645607000623e-05, "loss": 5.2662, "step": 79500 }, { "epoch": 2.17, "learning_rate": 2.8335184633307143e-05, "loss": 5.2381, "step": 80000 }, { "epoch": 2.18, "learning_rate": 2.8199994581561053e-05, "loss": 5.2358, "step": 80500 }, { "epoch": 2.19, "learning_rate": 2.8064533607867576e-05, "loss": 5.2339, "step": 81000 }, { "epoch": 2.21, "learning_rate": 2.7929072634174096e-05, "loss": 5.2222, "step": 81500 }, { "epoch": 2.22, "learning_rate": 2.7793611660480616e-05, "loss": 5.2319, "step": 82000 }, { "epoch": 2.24, "learning_rate": 2.7658150686787136e-05, "loss": 5.2433, "step": 82500 }, { "epoch": 2.25, "learning_rate": 2.7522689713093657e-05, "loss": 5.2092, "step": 83000 }, { "epoch": 2.26, "learning_rate": 2.7387228739400177e-05, "loss": 5.2139, "step": 83500 }, { "epoch": 2.28, "learning_rate": 2.7251767765706704e-05, "loss": 5.1814, "step": 84000 }, { "epoch": 2.29, "learning_rate": 2.7116306792013224e-05, "loss": 5.211, "step": 84500 }, { "epoch": 2.3, "learning_rate": 2.6981116740267133e-05, "loss": 5.1822, "step": 85000 }, { "epoch": 2.32, "learning_rate": 2.6845655766573653e-05, "loss": 5.1847, "step": 85500 }, { "epoch": 2.33, "learning_rate": 2.6710194792880173e-05, "loss": 5.1873, "step": 86000 }, { "epoch": 2.34, "learning_rate": 2.6574733819186694e-05, "loss": 5.2079, "step": 86500 }, { "epoch": 2.36, "learning_rate": 2.6439272845493214e-05, "loss": 5.1783, "step": 87000 }, { "epoch": 2.37, "learning_rate": 2.630408279374712e-05, "loss": 5.1551, "step": 87500 }, { "epoch": 2.38, "learning_rate": 2.6168621820053646e-05, "loss": 5.1518, "step": 88000 }, { "epoch": 2.4, "learning_rate": 2.6033160846360167e-05, "loss": 5.1565, "step": 88500 }, { "epoch": 2.41, "learning_rate": 2.5897699872666687e-05, "loss": 5.17, "step": 89000 }, { "epoch": 2.42, "learning_rate": 2.5762509820920593e-05, "loss": 5.153, "step": 89500 }, { "epoch": 2.44, "learning_rate": 2.5627048847227113e-05, "loss": 5.1538, "step": 90000 }, { "epoch": 2.45, "learning_rate": 2.5491587873533633e-05, "loss": 5.1384, "step": 90500 }, { "epoch": 2.47, "learning_rate": 2.5356126899840153e-05, "loss": 5.1408, "step": 91000 }, { "epoch": 2.48, "learning_rate": 2.522066592614668e-05, "loss": 5.1577, "step": 91500 }, { "epoch": 2.49, "learning_rate": 2.50852049524532e-05, "loss": 5.1285, "step": 92000 }, { "epoch": 2.51, "learning_rate": 2.494974397875972e-05, "loss": 5.1188, "step": 92500 }, { "epoch": 2.52, "learning_rate": 2.481428300506624e-05, "loss": 5.1353, "step": 93000 }, { "epoch": 2.53, "learning_rate": 2.467882203137276e-05, "loss": 5.1382, "step": 93500 }, { "epoch": 2.55, "learning_rate": 2.4543631979626673e-05, "loss": 5.1367, "step": 94000 }, { "epoch": 2.56, "learning_rate": 2.4408171005933193e-05, "loss": 5.1425, "step": 94500 }, { "epoch": 2.57, "learning_rate": 2.4272710032239713e-05, "loss": 5.1325, "step": 95000 }, { "epoch": 2.59, "learning_rate": 2.4137249058546234e-05, "loss": 5.1209, "step": 95500 }, { "epoch": 2.6, "learning_rate": 2.4001788084852757e-05, "loss": 5.1236, "step": 96000 }, { "epoch": 2.61, "learning_rate": 2.3866327111159277e-05, "loss": 5.0968, "step": 96500 }, { "epoch": 2.63, "learning_rate": 2.3731137059413183e-05, "loss": 5.1113, "step": 97000 }, { "epoch": 2.64, "learning_rate": 2.3595676085719707e-05, "loss": 5.0963, "step": 97500 }, { "epoch": 2.66, "learning_rate": 2.3460215112026227e-05, "loss": 5.1008, "step": 98000 }, { "epoch": 2.67, "learning_rate": 2.3324754138332747e-05, "loss": 5.0947, "step": 98500 }, { "epoch": 2.68, "learning_rate": 2.318929316463927e-05, "loss": 5.0981, "step": 99000 }, { "epoch": 2.7, "learning_rate": 2.3054103112893176e-05, "loss": 5.0859, "step": 99500 }, { "epoch": 2.71, "learning_rate": 2.2918913061147085e-05, "loss": 5.0686, "step": 100000 }, { "epoch": 2.72, "learning_rate": 2.2783452087453606e-05, "loss": 5.0733, "step": 100500 }, { "epoch": 2.74, "learning_rate": 2.264799111376013e-05, "loss": 5.1095, "step": 101000 }, { "epoch": 2.75, "learning_rate": 2.251253014006665e-05, "loss": 5.0707, "step": 101500 }, { "epoch": 2.76, "learning_rate": 2.237706916637317e-05, "loss": 5.0737, "step": 102000 }, { "epoch": 2.78, "learning_rate": 2.224160819267969e-05, "loss": 5.0509, "step": 102500 }, { "epoch": 2.79, "learning_rate": 2.2106147218986213e-05, "loss": 5.0901, "step": 103000 }, { "epoch": 2.8, "learning_rate": 2.1970686245292733e-05, "loss": 5.0635, "step": 103500 }, { "epoch": 2.82, "learning_rate": 2.1835225271599253e-05, "loss": 5.0665, "step": 104000 }, { "epoch": 2.83, "learning_rate": 2.1700035219853163e-05, "loss": 5.0423, "step": 104500 }, { "epoch": 2.84, "learning_rate": 2.1564574246159683e-05, "loss": 5.0465, "step": 105000 }, { "epoch": 2.86, "learning_rate": 2.1429113272466203e-05, "loss": 5.0422, "step": 105500 }, { "epoch": 2.87, "learning_rate": 2.1293923220720112e-05, "loss": 5.0851, "step": 106000 }, { "epoch": 2.89, "learning_rate": 2.1158462247026632e-05, "loss": 5.0242, "step": 106500 }, { "epoch": 2.9, "learning_rate": 2.1023001273333152e-05, "loss": 5.0386, "step": 107000 }, { "epoch": 2.91, "learning_rate": 2.0887540299639673e-05, "loss": 5.0455, "step": 107500 }, { "epoch": 2.93, "learning_rate": 2.0752079325946196e-05, "loss": 5.0002, "step": 108000 }, { "epoch": 2.94, "learning_rate": 2.0616618352252716e-05, "loss": 5.043, "step": 108500 }, { "epoch": 2.95, "learning_rate": 2.0481157378559236e-05, "loss": 5.012, "step": 109000 }, { "epoch": 2.97, "learning_rate": 2.0345696404865757e-05, "loss": 5.0406, "step": 109500 }, { "epoch": 2.98, "learning_rate": 2.021050635311967e-05, "loss": 5.0514, "step": 110000 }, { "epoch": 2.99, "learning_rate": 2.007504537942619e-05, "loss": 5.015, "step": 110500 }, { "epoch": 3.01, "learning_rate": 1.993958440573271e-05, "loss": 5.0416, "step": 111000 }, { "epoch": 3.02, "learning_rate": 1.9804123432039233e-05, "loss": 4.9965, "step": 111500 }, { "epoch": 3.03, "learning_rate": 1.9668662458345753e-05, "loss": 4.998, "step": 112000 }, { "epoch": 3.05, "learning_rate": 1.9533201484652273e-05, "loss": 4.9926, "step": 112500 }, { "epoch": 3.06, "learning_rate": 1.9397740510958793e-05, "loss": 4.9909, "step": 113000 }, { "epoch": 3.07, "learning_rate": 1.9262550459212703e-05, "loss": 5.006, "step": 113500 }, { "epoch": 3.09, "learning_rate": 1.9127089485519223e-05, "loss": 4.9905, "step": 114000 }, { "epoch": 3.1, "learning_rate": 1.8991628511825743e-05, "loss": 4.9792, "step": 114500 }, { "epoch": 3.12, "learning_rate": 1.8856167538132266e-05, "loss": 5.0, "step": 115000 }, { "epoch": 3.13, "learning_rate": 1.8720706564438787e-05, "loss": 4.9839, "step": 115500 }, { "epoch": 3.14, "learning_rate": 1.8585516512692692e-05, "loss": 4.9721, "step": 116000 }, { "epoch": 3.16, "learning_rate": 1.8450055538999213e-05, "loss": 4.9973, "step": 116500 }, { "epoch": 3.17, "learning_rate": 1.8314594565305736e-05, "loss": 4.9879, "step": 117000 }, { "epoch": 3.18, "learning_rate": 1.8179133591612256e-05, "loss": 4.9664, "step": 117500 }, { "epoch": 3.2, "learning_rate": 1.8043672617918776e-05, "loss": 4.994, "step": 118000 }, { "epoch": 3.21, "learning_rate": 1.7908482566172686e-05, "loss": 4.9679, "step": 118500 }, { "epoch": 3.22, "learning_rate": 1.777302159247921e-05, "loss": 4.9841, "step": 119000 }, { "epoch": 3.24, "learning_rate": 1.763756061878573e-05, "loss": 4.9488, "step": 119500 }, { "epoch": 3.25, "learning_rate": 1.750209964509225e-05, "loss": 4.9523, "step": 120000 }, { "epoch": 3.26, "learning_rate": 1.7366638671398773e-05, "loss": 4.9748, "step": 120500 }, { "epoch": 3.28, "learning_rate": 1.7231177697705293e-05, "loss": 4.9513, "step": 121000 }, { "epoch": 3.29, "learning_rate": 1.7095716724011813e-05, "loss": 4.9561, "step": 121500 }, { "epoch": 3.31, "learning_rate": 1.696052667226572e-05, "loss": 4.9517, "step": 122000 }, { "epoch": 3.32, "learning_rate": 1.6825065698572243e-05, "loss": 4.9385, "step": 122500 }, { "epoch": 3.33, "learning_rate": 1.6689604724878763e-05, "loss": 4.9755, "step": 123000 }, { "epoch": 3.35, "learning_rate": 1.6554143751185283e-05, "loss": 4.9507, "step": 123500 }, { "epoch": 3.36, "learning_rate": 1.6418682777491807e-05, "loss": 4.9272, "step": 124000 }, { "epoch": 3.37, "learning_rate": 1.6283492725745712e-05, "loss": 4.936, "step": 124500 }, { "epoch": 3.39, "learning_rate": 1.6148031752052233e-05, "loss": 4.9498, "step": 125000 }, { "epoch": 3.4, "learning_rate": 1.6012570778358756e-05, "loss": 4.9539, "step": 125500 }, { "epoch": 3.41, "learning_rate": 1.5877109804665276e-05, "loss": 4.962, "step": 126000 }, { "epoch": 3.43, "learning_rate": 1.5741919752919185e-05, "loss": 4.9617, "step": 126500 }, { "epoch": 3.44, "learning_rate": 1.560672970117309e-05, "loss": 4.9512, "step": 127000 }, { "epoch": 3.45, "learning_rate": 1.547126872747961e-05, "loss": 4.9527, "step": 127500 }, { "epoch": 3.47, "learning_rate": 1.5335807753786135e-05, "loss": 4.9305, "step": 128000 }, { "epoch": 3.48, "learning_rate": 1.5200346780092655e-05, "loss": 4.9259, "step": 128500 }, { "epoch": 3.49, "learning_rate": 1.5064885806399175e-05, "loss": 4.9345, "step": 129000 }, { "epoch": 3.51, "learning_rate": 1.4929424832705699e-05, "loss": 4.9137, "step": 129500 }, { "epoch": 3.52, "learning_rate": 1.4793963859012219e-05, "loss": 4.9374, "step": 130000 }, { "epoch": 3.54, "learning_rate": 1.4658502885318739e-05, "loss": 4.9258, "step": 130500 }, { "epoch": 3.55, "learning_rate": 1.4523041911625263e-05, "loss": 4.9272, "step": 131000 }, { "epoch": 3.56, "learning_rate": 1.4387580937931783e-05, "loss": 4.92, "step": 131500 }, { "epoch": 3.58, "learning_rate": 1.425239088618569e-05, "loss": 4.9077, "step": 132000 }, { "epoch": 3.59, "learning_rate": 1.411692991249221e-05, "loss": 4.9334, "step": 132500 }, { "epoch": 3.6, "learning_rate": 1.3981468938798734e-05, "loss": 4.9178, "step": 133000 }, { "epoch": 3.62, "learning_rate": 1.3846007965105254e-05, "loss": 4.8956, "step": 133500 }, { "epoch": 3.63, "learning_rate": 1.3710546991411774e-05, "loss": 4.9255, "step": 134000 }, { "epoch": 3.64, "learning_rate": 1.3575086017718298e-05, "loss": 4.9, "step": 134500 }, { "epoch": 3.66, "learning_rate": 1.3439625044024818e-05, "loss": 4.9317, "step": 135000 }, { "epoch": 3.67, "learning_rate": 1.3304164070331338e-05, "loss": 4.9136, "step": 135500 }, { "epoch": 3.68, "learning_rate": 1.3168974018585246e-05, "loss": 4.8968, "step": 136000 }, { "epoch": 3.7, "learning_rate": 1.3033783966839155e-05, "loss": 4.8791, "step": 136500 }, { "epoch": 3.71, "learning_rate": 1.2898322993145675e-05, "loss": 4.8937, "step": 137000 }, { "epoch": 3.73, "learning_rate": 1.2762862019452195e-05, "loss": 4.8803, "step": 137500 }, { "epoch": 3.74, "learning_rate": 1.2627401045758719e-05, "loss": 4.8928, "step": 138000 }, { "epoch": 3.75, "learning_rate": 1.2491940072065239e-05, "loss": 4.8757, "step": 138500 }, { "epoch": 3.77, "learning_rate": 1.2356750020319146e-05, "loss": 4.8957, "step": 139000 }, { "epoch": 3.78, "learning_rate": 1.2221289046625668e-05, "loss": 4.8828, "step": 139500 }, { "epoch": 3.79, "learning_rate": 1.2085828072932188e-05, "loss": 4.8819, "step": 140000 }, { "epoch": 3.81, "learning_rate": 1.195036709923871e-05, "loss": 4.8843, "step": 140500 }, { "epoch": 3.82, "learning_rate": 1.181490612554523e-05, "loss": 4.8891, "step": 141000 }, { "epoch": 3.83, "learning_rate": 1.1679445151851752e-05, "loss": 4.8904, "step": 141500 }, { "epoch": 3.85, "learning_rate": 1.1543984178158274e-05, "loss": 4.8785, "step": 142000 }, { "epoch": 3.86, "learning_rate": 1.1408523204464794e-05, "loss": 4.8821, "step": 142500 }, { "epoch": 3.87, "learning_rate": 1.1273604074666089e-05, "loss": 4.8758, "step": 143000 }, { "epoch": 3.89, "learning_rate": 1.1138143100972611e-05, "loss": 4.876, "step": 143500 }, { "epoch": 3.9, "learning_rate": 1.1002682127279133e-05, "loss": 4.8651, "step": 144000 }, { "epoch": 3.91, "learning_rate": 1.0867221153585653e-05, "loss": 4.8934, "step": 144500 }, { "epoch": 3.93, "learning_rate": 1.0731760179892173e-05, "loss": 4.8698, "step": 145000 }, { "epoch": 3.94, "learning_rate": 1.0596299206198695e-05, "loss": 4.8707, "step": 145500 }, { "epoch": 3.96, "learning_rate": 1.0461109154452602e-05, "loss": 4.8499, "step": 146000 }, { "epoch": 3.97, "learning_rate": 1.0325648180759123e-05, "loss": 4.8797, "step": 146500 }, { "epoch": 3.98, "learning_rate": 1.0190187207065644e-05, "loss": 4.8525, "step": 147000 }, { "epoch": 4.0, "learning_rate": 1.0054726233372166e-05, "loss": 4.8662, "step": 147500 }, { "epoch": 4.01, "learning_rate": 9.919265259678686e-06, "loss": 4.8338, "step": 148000 }, { "epoch": 4.02, "learning_rate": 9.784075207932596e-06, "loss": 4.8457, "step": 148500 }, { "epoch": 4.04, "learning_rate": 9.648614234239116e-06, "loss": 4.8422, "step": 149000 }, { "epoch": 4.05, "learning_rate": 9.513153260545638e-06, "loss": 4.8263, "step": 149500 }, { "epoch": 4.06, "learning_rate": 9.377692286852158e-06, "loss": 4.851, "step": 150000 }, { "epoch": 4.08, "learning_rate": 9.24223131315868e-06, "loss": 4.8555, "step": 150500 }, { "epoch": 4.09, "learning_rate": 9.107041261412587e-06, "loss": 4.8442, "step": 151000 }, { "epoch": 4.1, "learning_rate": 8.971580287719109e-06, "loss": 4.8482, "step": 151500 }, { "epoch": 4.12, "learning_rate": 8.83611931402563e-06, "loss": 4.8402, "step": 152000 }, { "epoch": 4.13, "learning_rate": 8.700929262279538e-06, "loss": 4.8554, "step": 152500 }, { "epoch": 4.15, "learning_rate": 8.56546828858606e-06, "loss": 4.8254, "step": 153000 }, { "epoch": 4.16, "learning_rate": 8.43000731489258e-06, "loss": 4.8518, "step": 153500 }, { "epoch": 4.17, "learning_rate": 8.2945463411991e-06, "loss": 4.8341, "step": 154000 }, { "epoch": 4.19, "learning_rate": 8.159085367505622e-06, "loss": 4.8232, "step": 154500 }, { "epoch": 4.2, "learning_rate": 8.023624393812142e-06, "loss": 4.8318, "step": 155000 }, { "epoch": 4.21, "learning_rate": 7.888163420118664e-06, "loss": 4.8271, "step": 155500 }, { "epoch": 4.23, "learning_rate": 7.752702446425184e-06, "loss": 4.8277, "step": 156000 }, { "epoch": 4.24, "learning_rate": 7.617241472731706e-06, "loss": 4.8374, "step": 156500 }, { "epoch": 4.25, "learning_rate": 7.481780499038226e-06, "loss": 4.8639, "step": 157000 }, { "epoch": 4.27, "learning_rate": 7.346319525344748e-06, "loss": 4.867, "step": 157500 }, { "epoch": 4.28, "learning_rate": 7.21085855165127e-06, "loss": 4.8409, "step": 158000 }, { "epoch": 4.29, "learning_rate": 7.0756684999051776e-06, "loss": 4.8484, "step": 158500 }, { "epoch": 4.31, "learning_rate": 6.940207526211699e-06, "loss": 4.8246, "step": 159000 }, { "epoch": 4.32, "learning_rate": 6.8047465525182196e-06, "loss": 4.838, "step": 159500 }, { "epoch": 4.33, "learning_rate": 6.669285578824741e-06, "loss": 4.8192, "step": 160000 }, { "epoch": 4.35, "learning_rate": 6.533824605131262e-06, "loss": 4.8319, "step": 160500 }, { "epoch": 4.36, "learning_rate": 6.39863455338517e-06, "loss": 4.8169, "step": 161000 }, { "epoch": 4.38, "learning_rate": 6.26317357969169e-06, "loss": 4.8317, "step": 161500 }, { "epoch": 4.39, "learning_rate": 6.127712605998212e-06, "loss": 4.836, "step": 162000 }, { "epoch": 4.4, "learning_rate": 5.992251632304733e-06, "loss": 4.8141, "step": 162500 }, { "epoch": 4.42, "learning_rate": 5.857061580558641e-06, "loss": 4.8147, "step": 163000 }, { "epoch": 4.43, "learning_rate": 5.721600606865162e-06, "loss": 4.8071, "step": 163500 }, { "epoch": 4.44, "learning_rate": 5.586139633171684e-06, "loss": 4.8112, "step": 164000 }, { "epoch": 4.46, "learning_rate": 5.450678659478205e-06, "loss": 4.8153, "step": 164500 }, { "epoch": 4.47, "learning_rate": 5.315488607732113e-06, "loss": 4.8249, "step": 165000 }, { "epoch": 4.48, "learning_rate": 5.180027634038634e-06, "loss": 4.8046, "step": 165500 }, { "epoch": 4.5, "learning_rate": 5.044566660345155e-06, "loss": 4.8319, "step": 166000 }, { "epoch": 4.51, "learning_rate": 4.909105686651676e-06, "loss": 4.8174, "step": 166500 }, { "epoch": 4.52, "learning_rate": 4.773644712958197e-06, "loss": 4.8236, "step": 167000 }, { "epoch": 4.54, "learning_rate": 4.6381837392647184e-06, "loss": 4.8157, "step": 167500 }, { "epoch": 4.55, "learning_rate": 4.502993687518626e-06, "loss": 4.8317, "step": 168000 }, { "epoch": 4.57, "learning_rate": 4.367532713825148e-06, "loss": 4.7945, "step": 168500 }, { "epoch": 4.58, "learning_rate": 4.232071740131669e-06, "loss": 4.8353, "step": 169000 }, { "epoch": 4.59, "learning_rate": 4.096610766438189e-06, "loss": 4.8057, "step": 169500 }, { "epoch": 4.61, "learning_rate": 3.96114979274471e-06, "loss": 4.8076, "step": 170000 }, { "epoch": 4.62, "learning_rate": 3.825688819051231e-06, "loss": 4.7903, "step": 170500 }, { "epoch": 4.63, "learning_rate": 3.6902278453577528e-06, "loss": 4.8109, "step": 171000 }, { "epoch": 4.65, "learning_rate": 3.5550377936116603e-06, "loss": 4.8055, "step": 171500 }, { "epoch": 4.66, "learning_rate": 3.419576819918182e-06, "loss": 4.8029, "step": 172000 }, { "epoch": 4.67, "learning_rate": 3.2841158462247027e-06, "loss": 4.8136, "step": 172500 }, { "epoch": 4.69, "learning_rate": 3.1486548725312237e-06, "loss": 4.8165, "step": 173000 }, { "epoch": 4.7, "learning_rate": 3.013193898837745e-06, "loss": 4.8255, "step": 173500 }, { "epoch": 4.71, "learning_rate": 2.877732925144266e-06, "loss": 4.7768, "step": 174000 }, { "epoch": 4.73, "learning_rate": 2.7422719514507875e-06, "loss": 4.7727, "step": 174500 }, { "epoch": 4.74, "learning_rate": 2.607081899704695e-06, "loss": 4.7951, "step": 175000 }, { "epoch": 4.75, "learning_rate": 2.471620926011216e-06, "loss": 4.7991, "step": 175500 }, { "epoch": 4.77, "learning_rate": 2.3361599523177375e-06, "loss": 4.8037, "step": 176000 }, { "epoch": 4.78, "learning_rate": 2.2006989786242585e-06, "loss": 4.7915, "step": 176500 }, { "epoch": 4.8, "learning_rate": 2.0652380049307795e-06, "loss": 4.7938, "step": 177000 }, { "epoch": 4.81, "learning_rate": 1.9297770312373005e-06, "loss": 4.7778, "step": 177500 }, { "epoch": 4.82, "learning_rate": 1.7943160575438219e-06, "loss": 4.7994, "step": 178000 }, { "epoch": 4.84, "learning_rate": 1.6588550838503429e-06, "loss": 4.7939, "step": 178500 }, { "epoch": 4.85, "learning_rate": 1.5236650321042508e-06, "loss": 4.7956, "step": 179000 }, { "epoch": 4.86, "learning_rate": 1.388474980358159e-06, "loss": 4.8032, "step": 179500 }, { "epoch": 4.88, "learning_rate": 1.25301400666468e-06, "loss": 4.7941, "step": 180000 }, { "epoch": 4.89, "learning_rate": 1.1175530329712012e-06, "loss": 4.7933, "step": 180500 }, { "epoch": 4.9, "learning_rate": 9.820920592777222e-07, "loss": 4.7929, "step": 181000 }, { "epoch": 4.92, "learning_rate": 8.466310855842432e-07, "loss": 4.8078, "step": 181500 }, { "epoch": 4.93, "learning_rate": 7.111701118907644e-07, "loss": 4.7951, "step": 182000 }, { "epoch": 4.94, "learning_rate": 5.757091381972854e-07, "loss": 4.8039, "step": 182500 }, { "epoch": 4.96, "learning_rate": 4.4024816450380646e-07, "loss": 4.7995, "step": 183000 }, { "epoch": 4.97, "learning_rate": 3.0505811275771453e-07, "loss": 4.8061, "step": 183500 }, { "epoch": 4.98, "learning_rate": 1.695971390642356e-07, "loss": 4.7984, "step": 184000 }, { "epoch": 5.0, "learning_rate": 3.413616537075669e-08, "loss": 4.7822, "step": 184500 }, { "epoch": 5.0, "step": 184555, "total_flos": 4.869967130385408e+17, "train_loss": 5.414583057241204, "train_runtime": 31930.2738, "train_samples_per_second": 57.799, "train_steps_per_second": 5.78 } ], "max_steps": 184555, "num_train_epochs": 5, "total_flos": 4.869967130385408e+17, "trial_name": null, "trial_params": null }