{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 232677, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.989255491518285e-05, "loss": 3.2391, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.97851098303657e-05, "loss": 2.3298, "step": 1000 }, { "epoch": 0.02, "learning_rate": 4.967766474554855e-05, "loss": 2.0813, "step": 1500 }, { "epoch": 0.03, "learning_rate": 4.9570219660731405e-05, "loss": 1.9156, "step": 2000 }, { "epoch": 0.03, "learning_rate": 4.946277457591425e-05, "loss": 1.8057, "step": 2500 }, { "epoch": 0.04, "learning_rate": 4.93553294910971e-05, "loss": 1.7528, "step": 3000 }, { "epoch": 0.05, "learning_rate": 4.924788440627995e-05, "loss": 1.6696, "step": 3500 }, { "epoch": 0.05, "learning_rate": 4.91404393214628e-05, "loss": 1.6381, "step": 4000 }, { "epoch": 0.06, "learning_rate": 4.903299423664565e-05, "loss": 1.5909, "step": 4500 }, { "epoch": 0.06, "learning_rate": 4.8925549151828506e-05, "loss": 1.5526, "step": 5000 }, { "epoch": 0.07, "learning_rate": 4.8818104067011355e-05, "loss": 1.5083, "step": 5500 }, { "epoch": 0.08, "learning_rate": 4.8710658982194204e-05, "loss": 1.4702, "step": 6000 }, { "epoch": 0.08, "learning_rate": 4.860321389737705e-05, "loss": 1.4743, "step": 6500 }, { "epoch": 0.09, "learning_rate": 4.84957688125599e-05, "loss": 1.458, "step": 7000 }, { "epoch": 0.1, "learning_rate": 4.838832372774275e-05, "loss": 1.4248, "step": 7500 }, { "epoch": 0.1, "learning_rate": 4.82808786429256e-05, "loss": 1.422, "step": 8000 }, { "epoch": 0.11, "learning_rate": 4.8173433558108456e-05, "loss": 1.3869, "step": 8500 }, { "epoch": 0.12, "learning_rate": 4.80659884732913e-05, "loss": 1.3768, "step": 9000 }, { "epoch": 0.12, "learning_rate": 4.7958543388474154e-05, "loss": 1.3556, "step": 9500 }, { "epoch": 0.13, "learning_rate": 4.7851098303657e-05, "loss": 1.3373, "step": 10000 }, { "epoch": 0.14, "learning_rate": 4.774365321883985e-05, "loss": 1.3237, "step": 10500 }, { "epoch": 0.14, "learning_rate": 4.76362081340227e-05, "loss": 1.3081, "step": 11000 }, { "epoch": 0.15, "learning_rate": 4.752876304920556e-05, "loss": 1.3192, "step": 11500 }, { "epoch": 0.15, "learning_rate": 4.74213179643884e-05, "loss": 1.2975, "step": 12000 }, { "epoch": 0.16, "learning_rate": 4.7313872879571255e-05, "loss": 1.2887, "step": 12500 }, { "epoch": 0.17, "learning_rate": 4.7206427794754104e-05, "loss": 1.2858, "step": 13000 }, { "epoch": 0.17, "learning_rate": 4.709898270993695e-05, "loss": 1.2903, "step": 13500 }, { "epoch": 0.18, "learning_rate": 4.69915376251198e-05, "loss": 1.267, "step": 14000 }, { "epoch": 0.19, "learning_rate": 4.688409254030265e-05, "loss": 1.249, "step": 14500 }, { "epoch": 0.19, "learning_rate": 4.677664745548551e-05, "loss": 1.2643, "step": 15000 }, { "epoch": 0.2, "learning_rate": 4.666920237066835e-05, "loss": 1.2351, "step": 15500 }, { "epoch": 0.21, "learning_rate": 4.6561757285851205e-05, "loss": 1.2437, "step": 16000 }, { "epoch": 0.21, "learning_rate": 4.6454312201034054e-05, "loss": 1.243, "step": 16500 }, { "epoch": 0.22, "learning_rate": 4.63468671162169e-05, "loss": 1.2196, "step": 17000 }, { "epoch": 0.23, "learning_rate": 4.623942203139975e-05, "loss": 1.2167, "step": 17500 }, { "epoch": 0.23, "learning_rate": 4.613197694658261e-05, "loss": 1.2147, "step": 18000 }, { "epoch": 0.24, "learning_rate": 4.602453186176545e-05, "loss": 1.2036, "step": 18500 }, { "epoch": 0.24, "learning_rate": 4.5917086776948306e-05, "loss": 1.2154, "step": 19000 }, { "epoch": 0.25, "learning_rate": 4.5809641692131155e-05, "loss": 1.1951, "step": 19500 }, { "epoch": 0.26, "learning_rate": 4.5702196607314004e-05, "loss": 1.1882, "step": 20000 }, { "epoch": 0.26, "learning_rate": 4.559475152249685e-05, "loss": 1.1959, "step": 20500 }, { "epoch": 0.27, "learning_rate": 4.548730643767971e-05, "loss": 1.1728, "step": 21000 }, { "epoch": 0.28, "learning_rate": 4.537986135286255e-05, "loss": 1.1758, "step": 21500 }, { "epoch": 0.28, "learning_rate": 4.527241626804541e-05, "loss": 1.1651, "step": 22000 }, { "epoch": 0.29, "learning_rate": 4.5164971183228256e-05, "loss": 1.158, "step": 22500 }, { "epoch": 0.3, "learning_rate": 4.5057526098411105e-05, "loss": 1.1669, "step": 23000 }, { "epoch": 0.3, "learning_rate": 4.4950081013593954e-05, "loss": 1.1594, "step": 23500 }, { "epoch": 0.31, "learning_rate": 4.48426359287768e-05, "loss": 1.1544, "step": 24000 }, { "epoch": 0.32, "learning_rate": 4.473519084395966e-05, "loss": 1.1542, "step": 24500 }, { "epoch": 0.32, "learning_rate": 4.46277457591425e-05, "loss": 1.1352, "step": 25000 }, { "epoch": 0.33, "learning_rate": 4.4520300674325357e-05, "loss": 1.137, "step": 25500 }, { "epoch": 0.34, "learning_rate": 4.4412855589508206e-05, "loss": 1.1335, "step": 26000 }, { "epoch": 0.34, "learning_rate": 4.4305410504691055e-05, "loss": 1.1436, "step": 26500 }, { "epoch": 0.35, "learning_rate": 4.4197965419873904e-05, "loss": 1.1098, "step": 27000 }, { "epoch": 0.35, "learning_rate": 4.409052033505676e-05, "loss": 1.1164, "step": 27500 }, { "epoch": 0.36, "learning_rate": 4.39830752502396e-05, "loss": 1.1358, "step": 28000 }, { "epoch": 0.37, "learning_rate": 4.387563016542246e-05, "loss": 1.1144, "step": 28500 }, { "epoch": 0.37, "learning_rate": 4.3768185080605306e-05, "loss": 1.1095, "step": 29000 }, { "epoch": 0.38, "learning_rate": 4.3660739995788155e-05, "loss": 1.1016, "step": 29500 }, { "epoch": 0.39, "learning_rate": 4.3553294910971004e-05, "loss": 1.1123, "step": 30000 }, { "epoch": 0.39, "learning_rate": 4.3445849826153853e-05, "loss": 1.0927, "step": 30500 }, { "epoch": 0.4, "learning_rate": 4.33384047413367e-05, "loss": 1.0979, "step": 31000 }, { "epoch": 0.41, "learning_rate": 4.323095965651955e-05, "loss": 1.0993, "step": 31500 }, { "epoch": 0.41, "learning_rate": 4.312351457170241e-05, "loss": 1.1017, "step": 32000 }, { "epoch": 0.42, "learning_rate": 4.3016069486885256e-05, "loss": 1.0919, "step": 32500 }, { "epoch": 0.43, "learning_rate": 4.2908624402068105e-05, "loss": 1.1008, "step": 33000 }, { "epoch": 0.43, "learning_rate": 4.2801179317250954e-05, "loss": 1.0825, "step": 33500 }, { "epoch": 0.44, "learning_rate": 4.269373423243381e-05, "loss": 1.0868, "step": 34000 }, { "epoch": 0.44, "learning_rate": 4.258628914761665e-05, "loss": 1.0704, "step": 34500 }, { "epoch": 0.45, "learning_rate": 4.247884406279951e-05, "loss": 1.0678, "step": 35000 }, { "epoch": 0.46, "learning_rate": 4.237139897798236e-05, "loss": 1.0851, "step": 35500 }, { "epoch": 0.46, "learning_rate": 4.2263953893165206e-05, "loss": 1.0685, "step": 36000 }, { "epoch": 0.47, "learning_rate": 4.2156508808348055e-05, "loss": 1.0732, "step": 36500 }, { "epoch": 0.48, "learning_rate": 4.2049063723530904e-05, "loss": 1.0755, "step": 37000 }, { "epoch": 0.48, "learning_rate": 4.194161863871375e-05, "loss": 1.0804, "step": 37500 }, { "epoch": 0.49, "learning_rate": 4.18341735538966e-05, "loss": 1.0588, "step": 38000 }, { "epoch": 0.5, "learning_rate": 4.172672846907946e-05, "loss": 1.066, "step": 38500 }, { "epoch": 0.5, "learning_rate": 4.16192833842623e-05, "loss": 1.0702, "step": 39000 }, { "epoch": 0.51, "learning_rate": 4.1511838299445156e-05, "loss": 1.0492, "step": 39500 }, { "epoch": 0.52, "learning_rate": 4.1404393214628005e-05, "loss": 1.0646, "step": 40000 }, { "epoch": 0.52, "learning_rate": 4.1296948129810854e-05, "loss": 1.0547, "step": 40500 }, { "epoch": 0.53, "learning_rate": 4.11895030449937e-05, "loss": 1.0512, "step": 41000 }, { "epoch": 0.54, "learning_rate": 4.108205796017656e-05, "loss": 1.0438, "step": 41500 }, { "epoch": 0.54, "learning_rate": 4.097461287535941e-05, "loss": 1.0411, "step": 42000 }, { "epoch": 0.55, "learning_rate": 4.086716779054226e-05, "loss": 1.0473, "step": 42500 }, { "epoch": 0.55, "learning_rate": 4.0759722705725106e-05, "loss": 1.0401, "step": 43000 }, { "epoch": 0.56, "learning_rate": 4.0652277620907955e-05, "loss": 1.0447, "step": 43500 }, { "epoch": 0.57, "learning_rate": 4.0544832536090804e-05, "loss": 1.0416, "step": 44000 }, { "epoch": 0.57, "learning_rate": 4.043738745127365e-05, "loss": 1.0266, "step": 44500 }, { "epoch": 0.58, "learning_rate": 4.032994236645651e-05, "loss": 1.0307, "step": 45000 }, { "epoch": 0.59, "learning_rate": 4.022249728163935e-05, "loss": 1.0371, "step": 45500 }, { "epoch": 0.59, "learning_rate": 4.011505219682221e-05, "loss": 1.0353, "step": 46000 }, { "epoch": 0.6, "learning_rate": 4.0007607112005056e-05, "loss": 1.0193, "step": 46500 }, { "epoch": 0.61, "learning_rate": 3.9900162027187905e-05, "loss": 1.0268, "step": 47000 }, { "epoch": 0.61, "learning_rate": 3.9792716942370754e-05, "loss": 1.0419, "step": 47500 }, { "epoch": 0.62, "learning_rate": 3.968527185755361e-05, "loss": 1.0197, "step": 48000 }, { "epoch": 0.63, "learning_rate": 3.957782677273645e-05, "loss": 1.0071, "step": 48500 }, { "epoch": 0.63, "learning_rate": 3.947038168791931e-05, "loss": 1.0169, "step": 49000 }, { "epoch": 0.64, "learning_rate": 3.936293660310216e-05, "loss": 1.003, "step": 49500 }, { "epoch": 0.64, "learning_rate": 3.9255491518285006e-05, "loss": 1.0132, "step": 50000 }, { "epoch": 0.65, "learning_rate": 3.9148046433467855e-05, "loss": 1.0233, "step": 50500 }, { "epoch": 0.66, "learning_rate": 3.9040601348650704e-05, "loss": 1.0143, "step": 51000 }, { "epoch": 0.66, "learning_rate": 3.893315626383356e-05, "loss": 0.9989, "step": 51500 }, { "epoch": 0.67, "learning_rate": 3.88257111790164e-05, "loss": 1.0168, "step": 52000 }, { "epoch": 0.68, "learning_rate": 3.871826609419926e-05, "loss": 1.0186, "step": 52500 }, { "epoch": 0.68, "learning_rate": 3.861082100938211e-05, "loss": 1.0083, "step": 53000 }, { "epoch": 0.69, "learning_rate": 3.8503375924564956e-05, "loss": 1.0098, "step": 53500 }, { "epoch": 0.7, "learning_rate": 3.8395930839747805e-05, "loss": 1.0015, "step": 54000 }, { "epoch": 0.7, "learning_rate": 3.828848575493066e-05, "loss": 0.9971, "step": 54500 }, { "epoch": 0.71, "learning_rate": 3.81810406701135e-05, "loss": 0.9934, "step": 55000 }, { "epoch": 0.72, "learning_rate": 3.807359558529636e-05, "loss": 0.9964, "step": 55500 }, { "epoch": 0.72, "learning_rate": 3.796615050047921e-05, "loss": 0.9898, "step": 56000 }, { "epoch": 0.73, "learning_rate": 3.785870541566206e-05, "loss": 1.0149, "step": 56500 }, { "epoch": 0.73, "learning_rate": 3.7751260330844906e-05, "loss": 0.9934, "step": 57000 }, { "epoch": 0.74, "learning_rate": 3.764381524602776e-05, "loss": 1.0015, "step": 57500 }, { "epoch": 0.75, "learning_rate": 3.7536370161210604e-05, "loss": 0.9857, "step": 58000 }, { "epoch": 0.75, "learning_rate": 3.742892507639346e-05, "loss": 0.9861, "step": 58500 }, { "epoch": 0.76, "learning_rate": 3.732147999157631e-05, "loss": 0.9826, "step": 59000 }, { "epoch": 0.77, "learning_rate": 3.721403490675916e-05, "loss": 0.9734, "step": 59500 }, { "epoch": 0.77, "learning_rate": 3.7106589821942007e-05, "loss": 0.9709, "step": 60000 }, { "epoch": 0.78, "learning_rate": 3.6999144737124856e-05, "loss": 0.9862, "step": 60500 }, { "epoch": 0.79, "learning_rate": 3.689169965230771e-05, "loss": 0.9882, "step": 61000 }, { "epoch": 0.79, "learning_rate": 3.6784254567490554e-05, "loss": 0.9921, "step": 61500 }, { "epoch": 0.8, "learning_rate": 3.667680948267341e-05, "loss": 0.9556, "step": 62000 }, { "epoch": 0.81, "learning_rate": 3.656936439785626e-05, "loss": 0.9703, "step": 62500 }, { "epoch": 0.81, "learning_rate": 3.646191931303911e-05, "loss": 0.9659, "step": 63000 }, { "epoch": 0.82, "learning_rate": 3.6354474228221956e-05, "loss": 0.9847, "step": 63500 }, { "epoch": 0.83, "learning_rate": 3.624702914340481e-05, "loss": 0.9644, "step": 64000 }, { "epoch": 0.83, "learning_rate": 3.6139584058587654e-05, "loss": 0.9698, "step": 64500 }, { "epoch": 0.84, "learning_rate": 3.603213897377051e-05, "loss": 0.9689, "step": 65000 }, { "epoch": 0.84, "learning_rate": 3.592469388895336e-05, "loss": 0.9743, "step": 65500 }, { "epoch": 0.85, "learning_rate": 3.581724880413621e-05, "loss": 0.9633, "step": 66000 }, { "epoch": 0.86, "learning_rate": 3.570980371931906e-05, "loss": 0.9677, "step": 66500 }, { "epoch": 0.86, "learning_rate": 3.5602358634501906e-05, "loss": 0.9823, "step": 67000 }, { "epoch": 0.87, "learning_rate": 3.5494913549684755e-05, "loss": 0.9523, "step": 67500 }, { "epoch": 0.88, "learning_rate": 3.5387468464867604e-05, "loss": 0.9553, "step": 68000 }, { "epoch": 0.88, "learning_rate": 3.528002338005046e-05, "loss": 0.9615, "step": 68500 }, { "epoch": 0.89, "learning_rate": 3.517257829523331e-05, "loss": 0.9701, "step": 69000 }, { "epoch": 0.9, "learning_rate": 3.506513321041616e-05, "loss": 0.9646, "step": 69500 }, { "epoch": 0.9, "learning_rate": 3.495768812559901e-05, "loss": 0.9583, "step": 70000 }, { "epoch": 0.91, "learning_rate": 3.485024304078186e-05, "loss": 0.9459, "step": 70500 }, { "epoch": 0.92, "learning_rate": 3.4742797955964705e-05, "loss": 0.952, "step": 71000 }, { "epoch": 0.92, "learning_rate": 3.463535287114756e-05, "loss": 0.9446, "step": 71500 }, { "epoch": 0.93, "learning_rate": 3.452790778633041e-05, "loss": 0.9434, "step": 72000 }, { "epoch": 0.93, "learning_rate": 3.442046270151326e-05, "loss": 0.9471, "step": 72500 }, { "epoch": 0.94, "learning_rate": 3.431301761669611e-05, "loss": 0.9501, "step": 73000 }, { "epoch": 0.95, "learning_rate": 3.420557253187896e-05, "loss": 0.9423, "step": 73500 }, { "epoch": 0.95, "learning_rate": 3.4098127447061806e-05, "loss": 0.9397, "step": 74000 }, { "epoch": 0.96, "learning_rate": 3.3990682362244655e-05, "loss": 0.9541, "step": 74500 }, { "epoch": 0.97, "learning_rate": 3.388323727742751e-05, "loss": 0.9535, "step": 75000 }, { "epoch": 0.97, "learning_rate": 3.377579219261035e-05, "loss": 0.9489, "step": 75500 }, { "epoch": 0.98, "learning_rate": 3.366834710779321e-05, "loss": 0.9429, "step": 76000 }, { "epoch": 0.99, "learning_rate": 3.356090202297606e-05, "loss": 0.9441, "step": 76500 }, { "epoch": 0.99, "learning_rate": 3.3453456938158914e-05, "loss": 0.9293, "step": 77000 }, { "epoch": 1.0, "learning_rate": 3.3346011853341756e-05, "loss": 0.9415, "step": 77500 }, { "epoch": 1.01, "learning_rate": 3.323856676852461e-05, "loss": 0.8832, "step": 78000 }, { "epoch": 1.01, "learning_rate": 3.313112168370746e-05, "loss": 0.8734, "step": 78500 }, { "epoch": 1.02, "learning_rate": 3.302367659889031e-05, "loss": 0.8732, "step": 79000 }, { "epoch": 1.03, "learning_rate": 3.291623151407316e-05, "loss": 0.865, "step": 79500 }, { "epoch": 1.03, "learning_rate": 3.280878642925601e-05, "loss": 0.8666, "step": 80000 }, { "epoch": 1.04, "learning_rate": 3.270134134443886e-05, "loss": 0.8559, "step": 80500 }, { "epoch": 1.04, "learning_rate": 3.2593896259621706e-05, "loss": 0.8607, "step": 81000 }, { "epoch": 1.05, "learning_rate": 3.248645117480456e-05, "loss": 0.8558, "step": 81500 }, { "epoch": 1.06, "learning_rate": 3.2379006089987404e-05, "loss": 0.8648, "step": 82000 }, { "epoch": 1.06, "learning_rate": 3.227156100517026e-05, "loss": 0.8691, "step": 82500 }, { "epoch": 1.07, "learning_rate": 3.216411592035311e-05, "loss": 0.8528, "step": 83000 }, { "epoch": 1.08, "learning_rate": 3.205667083553596e-05, "loss": 0.8692, "step": 83500 }, { "epoch": 1.08, "learning_rate": 3.194922575071881e-05, "loss": 0.8598, "step": 84000 }, { "epoch": 1.09, "learning_rate": 3.184178066590166e-05, "loss": 0.8698, "step": 84500 }, { "epoch": 1.1, "learning_rate": 3.1734335581084505e-05, "loss": 0.8725, "step": 85000 }, { "epoch": 1.1, "learning_rate": 3.162689049626736e-05, "loss": 0.8714, "step": 85500 }, { "epoch": 1.11, "learning_rate": 3.151944541145021e-05, "loss": 0.8551, "step": 86000 }, { "epoch": 1.12, "learning_rate": 3.141200032663306e-05, "loss": 0.8664, "step": 86500 }, { "epoch": 1.12, "learning_rate": 3.130455524181591e-05, "loss": 0.8624, "step": 87000 }, { "epoch": 1.13, "learning_rate": 3.119711015699876e-05, "loss": 0.8608, "step": 87500 }, { "epoch": 1.13, "learning_rate": 3.108966507218161e-05, "loss": 0.8631, "step": 88000 }, { "epoch": 1.14, "learning_rate": 3.0982219987364455e-05, "loss": 0.8638, "step": 88500 }, { "epoch": 1.15, "learning_rate": 3.087477490254731e-05, "loss": 0.8635, "step": 89000 }, { "epoch": 1.15, "learning_rate": 3.076732981773016e-05, "loss": 0.8537, "step": 89500 }, { "epoch": 1.16, "learning_rate": 3.065988473291301e-05, "loss": 0.8547, "step": 90000 }, { "epoch": 1.17, "learning_rate": 3.055243964809586e-05, "loss": 0.8602, "step": 90500 }, { "epoch": 1.17, "learning_rate": 3.044499456327871e-05, "loss": 0.8549, "step": 91000 }, { "epoch": 1.18, "learning_rate": 3.033754947846156e-05, "loss": 0.8652, "step": 91500 }, { "epoch": 1.19, "learning_rate": 3.0230104393644408e-05, "loss": 0.8453, "step": 92000 }, { "epoch": 1.19, "learning_rate": 3.012265930882726e-05, "loss": 0.8538, "step": 92500 }, { "epoch": 1.2, "learning_rate": 3.0015214224010106e-05, "loss": 0.866, "step": 93000 }, { "epoch": 1.21, "learning_rate": 2.990776913919296e-05, "loss": 0.8625, "step": 93500 }, { "epoch": 1.21, "learning_rate": 2.980032405437581e-05, "loss": 0.8586, "step": 94000 }, { "epoch": 1.22, "learning_rate": 2.9692878969558657e-05, "loss": 0.8501, "step": 94500 }, { "epoch": 1.22, "learning_rate": 2.958543388474151e-05, "loss": 0.8488, "step": 95000 }, { "epoch": 1.23, "learning_rate": 2.947798879992436e-05, "loss": 0.8551, "step": 95500 }, { "epoch": 1.24, "learning_rate": 2.9370543715107214e-05, "loss": 0.8468, "step": 96000 }, { "epoch": 1.24, "learning_rate": 2.926309863029006e-05, "loss": 0.8487, "step": 96500 }, { "epoch": 1.25, "learning_rate": 2.9155653545472912e-05, "loss": 0.8565, "step": 97000 }, { "epoch": 1.26, "learning_rate": 2.9048208460655764e-05, "loss": 0.8473, "step": 97500 }, { "epoch": 1.26, "learning_rate": 2.894076337583861e-05, "loss": 0.8492, "step": 98000 }, { "epoch": 1.27, "learning_rate": 2.8833318291021462e-05, "loss": 0.8577, "step": 98500 }, { "epoch": 1.28, "learning_rate": 2.872587320620431e-05, "loss": 0.8402, "step": 99000 }, { "epoch": 1.28, "learning_rate": 2.8618428121387157e-05, "loss": 0.8467, "step": 99500 }, { "epoch": 1.29, "learning_rate": 2.851098303657001e-05, "loss": 0.8384, "step": 100000 }, { "epoch": 1.3, "learning_rate": 2.840353795175286e-05, "loss": 0.8418, "step": 100500 }, { "epoch": 1.3, "learning_rate": 2.8296092866935707e-05, "loss": 0.8563, "step": 101000 }, { "epoch": 1.31, "learning_rate": 2.818864778211856e-05, "loss": 0.8453, "step": 101500 }, { "epoch": 1.32, "learning_rate": 2.8081202697301412e-05, "loss": 0.8435, "step": 102000 }, { "epoch": 1.32, "learning_rate": 2.7973757612484258e-05, "loss": 0.8489, "step": 102500 }, { "epoch": 1.33, "learning_rate": 2.786631252766711e-05, "loss": 0.8349, "step": 103000 }, { "epoch": 1.33, "learning_rate": 2.7758867442849963e-05, "loss": 0.8381, "step": 103500 }, { "epoch": 1.34, "learning_rate": 2.7651422358032808e-05, "loss": 0.8511, "step": 104000 }, { "epoch": 1.35, "learning_rate": 2.754397727321566e-05, "loss": 0.8422, "step": 104500 }, { "epoch": 1.35, "learning_rate": 2.7436532188398513e-05, "loss": 0.8487, "step": 105000 }, { "epoch": 1.36, "learning_rate": 2.7329087103581362e-05, "loss": 0.8302, "step": 105500 }, { "epoch": 1.37, "learning_rate": 2.722164201876421e-05, "loss": 0.836, "step": 106000 }, { "epoch": 1.37, "learning_rate": 2.711419693394706e-05, "loss": 0.8278, "step": 106500 }, { "epoch": 1.38, "learning_rate": 2.7006751849129912e-05, "loss": 0.8312, "step": 107000 }, { "epoch": 1.39, "learning_rate": 2.6899306764312758e-05, "loss": 0.8364, "step": 107500 }, { "epoch": 1.39, "learning_rate": 2.679186167949561e-05, "loss": 0.8439, "step": 108000 }, { "epoch": 1.4, "learning_rate": 2.6684416594678463e-05, "loss": 0.8234, "step": 108500 }, { "epoch": 1.41, "learning_rate": 2.657697150986131e-05, "loss": 0.8242, "step": 109000 }, { "epoch": 1.41, "learning_rate": 2.646952642504416e-05, "loss": 0.8394, "step": 109500 }, { "epoch": 1.42, "learning_rate": 2.6362081340227013e-05, "loss": 0.8351, "step": 110000 }, { "epoch": 1.42, "learning_rate": 2.625463625540986e-05, "loss": 0.8398, "step": 110500 }, { "epoch": 1.43, "learning_rate": 2.614719117059271e-05, "loss": 0.8326, "step": 111000 }, { "epoch": 1.44, "learning_rate": 2.6039746085775564e-05, "loss": 0.8381, "step": 111500 }, { "epoch": 1.44, "learning_rate": 2.593230100095841e-05, "loss": 0.8296, "step": 112000 }, { "epoch": 1.45, "learning_rate": 2.5824855916141262e-05, "loss": 0.8226, "step": 112500 }, { "epoch": 1.46, "learning_rate": 2.571741083132411e-05, "loss": 0.834, "step": 113000 }, { "epoch": 1.46, "learning_rate": 2.5609965746506963e-05, "loss": 0.8319, "step": 113500 }, { "epoch": 1.47, "learning_rate": 2.550252066168981e-05, "loss": 0.8286, "step": 114000 }, { "epoch": 1.48, "learning_rate": 2.539507557687266e-05, "loss": 0.8247, "step": 114500 }, { "epoch": 1.48, "learning_rate": 2.5287630492055514e-05, "loss": 0.8342, "step": 115000 }, { "epoch": 1.49, "learning_rate": 2.518018540723836e-05, "loss": 0.8276, "step": 115500 }, { "epoch": 1.5, "learning_rate": 2.5072740322421212e-05, "loss": 0.8181, "step": 116000 }, { "epoch": 1.5, "learning_rate": 2.496529523760406e-05, "loss": 0.816, "step": 116500 }, { "epoch": 1.51, "learning_rate": 2.4857850152786913e-05, "loss": 0.8375, "step": 117000 }, { "epoch": 1.51, "learning_rate": 2.4750405067969762e-05, "loss": 0.833, "step": 117500 }, { "epoch": 1.52, "learning_rate": 2.464295998315261e-05, "loss": 0.8227, "step": 118000 }, { "epoch": 1.53, "learning_rate": 2.4535514898335464e-05, "loss": 0.8335, "step": 118500 }, { "epoch": 1.53, "learning_rate": 2.4428069813518313e-05, "loss": 0.8168, "step": 119000 }, { "epoch": 1.54, "learning_rate": 2.432062472870116e-05, "loss": 0.8209, "step": 119500 }, { "epoch": 1.55, "learning_rate": 2.4213179643884014e-05, "loss": 0.8333, "step": 120000 }, { "epoch": 1.55, "learning_rate": 2.4105734559066863e-05, "loss": 0.8267, "step": 120500 }, { "epoch": 1.56, "learning_rate": 2.3998289474249712e-05, "loss": 0.8126, "step": 121000 }, { "epoch": 1.57, "learning_rate": 2.389084438943256e-05, "loss": 0.8239, "step": 121500 }, { "epoch": 1.57, "learning_rate": 2.378339930461541e-05, "loss": 0.8274, "step": 122000 }, { "epoch": 1.58, "learning_rate": 2.3675954219798263e-05, "loss": 0.8232, "step": 122500 }, { "epoch": 1.59, "learning_rate": 2.356850913498111e-05, "loss": 0.8266, "step": 123000 }, { "epoch": 1.59, "learning_rate": 2.346106405016396e-05, "loss": 0.8149, "step": 123500 }, { "epoch": 1.6, "learning_rate": 2.3353618965346813e-05, "loss": 0.8182, "step": 124000 }, { "epoch": 1.61, "learning_rate": 2.3246173880529662e-05, "loss": 0.8294, "step": 124500 }, { "epoch": 1.61, "learning_rate": 2.3138728795712514e-05, "loss": 0.8279, "step": 125000 }, { "epoch": 1.62, "learning_rate": 2.3031283710895363e-05, "loss": 0.8178, "step": 125500 }, { "epoch": 1.62, "learning_rate": 2.2923838626078212e-05, "loss": 0.8168, "step": 126000 }, { "epoch": 1.63, "learning_rate": 2.2816393541261065e-05, "loss": 0.8225, "step": 126500 }, { "epoch": 1.64, "learning_rate": 2.2708948456443914e-05, "loss": 0.8176, "step": 127000 }, { "epoch": 1.64, "learning_rate": 2.2601503371626763e-05, "loss": 0.8137, "step": 127500 }, { "epoch": 1.65, "learning_rate": 2.2494058286809612e-05, "loss": 0.8166, "step": 128000 }, { "epoch": 1.66, "learning_rate": 2.238661320199246e-05, "loss": 0.8308, "step": 128500 }, { "epoch": 1.66, "learning_rate": 2.227916811717531e-05, "loss": 0.8174, "step": 129000 }, { "epoch": 1.67, "learning_rate": 2.2171723032358162e-05, "loss": 0.8245, "step": 129500 }, { "epoch": 1.68, "learning_rate": 2.206427794754101e-05, "loss": 0.8278, "step": 130000 }, { "epoch": 1.68, "learning_rate": 2.1956832862723864e-05, "loss": 0.8151, "step": 130500 }, { "epoch": 1.69, "learning_rate": 2.1849387777906713e-05, "loss": 0.8036, "step": 131000 }, { "epoch": 1.7, "learning_rate": 2.1741942693089562e-05, "loss": 0.8306, "step": 131500 }, { "epoch": 1.7, "learning_rate": 2.1634497608272414e-05, "loss": 0.8021, "step": 132000 }, { "epoch": 1.71, "learning_rate": 2.1527052523455263e-05, "loss": 0.8143, "step": 132500 }, { "epoch": 1.71, "learning_rate": 2.1419607438638112e-05, "loss": 0.8141, "step": 133000 }, { "epoch": 1.72, "learning_rate": 2.1312162353820965e-05, "loss": 0.7968, "step": 133500 }, { "epoch": 1.73, "learning_rate": 2.1204717269003814e-05, "loss": 0.816, "step": 134000 }, { "epoch": 1.73, "learning_rate": 2.1097272184186666e-05, "loss": 0.8094, "step": 134500 }, { "epoch": 1.74, "learning_rate": 2.0989827099369515e-05, "loss": 0.8155, "step": 135000 }, { "epoch": 1.75, "learning_rate": 2.0882382014552364e-05, "loss": 0.8152, "step": 135500 }, { "epoch": 1.75, "learning_rate": 2.0774936929735213e-05, "loss": 0.8069, "step": 136000 }, { "epoch": 1.76, "learning_rate": 2.0667491844918062e-05, "loss": 0.8018, "step": 136500 }, { "epoch": 1.77, "learning_rate": 2.056004676010091e-05, "loss": 0.8077, "step": 137000 }, { "epoch": 1.77, "learning_rate": 2.0452601675283764e-05, "loss": 0.8133, "step": 137500 }, { "epoch": 1.78, "learning_rate": 2.0345156590466613e-05, "loss": 0.8058, "step": 138000 }, { "epoch": 1.79, "learning_rate": 2.0237711505649465e-05, "loss": 0.7893, "step": 138500 }, { "epoch": 1.79, "learning_rate": 2.0130266420832314e-05, "loss": 0.8099, "step": 139000 }, { "epoch": 1.8, "learning_rate": 2.0022821336015163e-05, "loss": 0.8123, "step": 139500 }, { "epoch": 1.81, "learning_rate": 1.9915376251198015e-05, "loss": 0.8003, "step": 140000 }, { "epoch": 1.81, "learning_rate": 1.9807931166380864e-05, "loss": 0.8107, "step": 140500 }, { "epoch": 1.82, "learning_rate": 1.9700486081563713e-05, "loss": 0.8041, "step": 141000 }, { "epoch": 1.82, "learning_rate": 1.9593040996746566e-05, "loss": 0.8021, "step": 141500 }, { "epoch": 1.83, "learning_rate": 1.9485595911929415e-05, "loss": 0.8062, "step": 142000 }, { "epoch": 1.84, "learning_rate": 1.9378150827112264e-05, "loss": 0.7965, "step": 142500 }, { "epoch": 1.84, "learning_rate": 1.9270705742295113e-05, "loss": 0.8046, "step": 143000 }, { "epoch": 1.85, "learning_rate": 1.9163260657477962e-05, "loss": 0.7925, "step": 143500 }, { "epoch": 1.86, "learning_rate": 1.9055815572660814e-05, "loss": 0.8049, "step": 144000 }, { "epoch": 1.86, "learning_rate": 1.8948370487843663e-05, "loss": 0.8026, "step": 144500 }, { "epoch": 1.87, "learning_rate": 1.8840925403026512e-05, "loss": 0.7817, "step": 145000 }, { "epoch": 1.88, "learning_rate": 1.8733480318209365e-05, "loss": 0.8083, "step": 145500 }, { "epoch": 1.88, "learning_rate": 1.8626035233392214e-05, "loss": 0.8003, "step": 146000 }, { "epoch": 1.89, "learning_rate": 1.8518590148575063e-05, "loss": 0.7988, "step": 146500 }, { "epoch": 1.9, "learning_rate": 1.8411145063757915e-05, "loss": 0.7907, "step": 147000 }, { "epoch": 1.9, "learning_rate": 1.8303699978940764e-05, "loss": 0.7941, "step": 147500 }, { "epoch": 1.91, "learning_rate": 1.8196254894123617e-05, "loss": 0.8028, "step": 148000 }, { "epoch": 1.91, "learning_rate": 1.8088809809306466e-05, "loss": 0.7745, "step": 148500 }, { "epoch": 1.92, "learning_rate": 1.7981364724489315e-05, "loss": 0.7932, "step": 149000 }, { "epoch": 1.93, "learning_rate": 1.7873919639672164e-05, "loss": 0.7963, "step": 149500 }, { "epoch": 1.93, "learning_rate": 1.7766474554855013e-05, "loss": 0.7902, "step": 150000 }, { "epoch": 1.94, "learning_rate": 1.7659029470037862e-05, "loss": 0.7906, "step": 150500 }, { "epoch": 1.95, "learning_rate": 1.7551584385220714e-05, "loss": 0.8034, "step": 151000 }, { "epoch": 1.95, "learning_rate": 1.7444139300403563e-05, "loss": 0.7946, "step": 151500 }, { "epoch": 1.96, "learning_rate": 1.7336694215586416e-05, "loss": 0.795, "step": 152000 }, { "epoch": 1.97, "learning_rate": 1.7229249130769265e-05, "loss": 0.7967, "step": 152500 }, { "epoch": 1.97, "learning_rate": 1.7121804045952114e-05, "loss": 0.7862, "step": 153000 }, { "epoch": 1.98, "learning_rate": 1.7014358961134966e-05, "loss": 0.794, "step": 153500 }, { "epoch": 1.99, "learning_rate": 1.6906913876317815e-05, "loss": 0.7892, "step": 154000 }, { "epoch": 1.99, "learning_rate": 1.6799468791500664e-05, "loss": 0.7941, "step": 154500 }, { "epoch": 2.0, "learning_rate": 1.6692023706683516e-05, "loss": 0.7881, "step": 155000 }, { "epoch": 2.0, "learning_rate": 1.6584578621866365e-05, "loss": 0.7288, "step": 155500 }, { "epoch": 2.01, "learning_rate": 1.6477133537049214e-05, "loss": 0.7028, "step": 156000 }, { "epoch": 2.02, "learning_rate": 1.6369688452232067e-05, "loss": 0.7056, "step": 156500 }, { "epoch": 2.02, "learning_rate": 1.6262243367414916e-05, "loss": 0.7078, "step": 157000 }, { "epoch": 2.03, "learning_rate": 1.6154798282597765e-05, "loss": 0.7096, "step": 157500 }, { "epoch": 2.04, "learning_rate": 1.6047353197780614e-05, "loss": 0.7061, "step": 158000 }, { "epoch": 2.04, "learning_rate": 1.5939908112963463e-05, "loss": 0.7079, "step": 158500 }, { "epoch": 2.05, "learning_rate": 1.5832463028146315e-05, "loss": 0.7145, "step": 159000 }, { "epoch": 2.06, "learning_rate": 1.5725017943329164e-05, "loss": 0.7152, "step": 159500 }, { "epoch": 2.06, "learning_rate": 1.5617572858512013e-05, "loss": 0.6995, "step": 160000 }, { "epoch": 2.07, "learning_rate": 1.5510127773694866e-05, "loss": 0.7079, "step": 160500 }, { "epoch": 2.08, "learning_rate": 1.5402682688877715e-05, "loss": 0.7106, "step": 161000 }, { "epoch": 2.08, "learning_rate": 1.5295237604060567e-05, "loss": 0.713, "step": 161500 }, { "epoch": 2.09, "learning_rate": 1.5187792519243416e-05, "loss": 0.6932, "step": 162000 }, { "epoch": 2.1, "learning_rate": 1.5080347434426265e-05, "loss": 0.7131, "step": 162500 }, { "epoch": 2.1, "learning_rate": 1.4972902349609116e-05, "loss": 0.7082, "step": 163000 }, { "epoch": 2.11, "learning_rate": 1.4865457264791965e-05, "loss": 0.6989, "step": 163500 }, { "epoch": 2.11, "learning_rate": 1.4758012179974814e-05, "loss": 0.707, "step": 164000 }, { "epoch": 2.12, "learning_rate": 1.4650567095157666e-05, "loss": 0.7028, "step": 164500 }, { "epoch": 2.13, "learning_rate": 1.4543122010340515e-05, "loss": 0.7023, "step": 165000 }, { "epoch": 2.13, "learning_rate": 1.4435676925523368e-05, "loss": 0.6984, "step": 165500 }, { "epoch": 2.14, "learning_rate": 1.4328231840706217e-05, "loss": 0.7091, "step": 166000 }, { "epoch": 2.15, "learning_rate": 1.4220786755889066e-05, "loss": 0.7133, "step": 166500 }, { "epoch": 2.15, "learning_rate": 1.4113341671071917e-05, "loss": 0.7201, "step": 167000 }, { "epoch": 2.16, "learning_rate": 1.4005896586254766e-05, "loss": 0.7, "step": 167500 }, { "epoch": 2.17, "learning_rate": 1.3898451501437615e-05, "loss": 0.7137, "step": 168000 }, { "epoch": 2.17, "learning_rate": 1.3791006416620467e-05, "loss": 0.7232, "step": 168500 }, { "epoch": 2.18, "learning_rate": 1.3683561331803316e-05, "loss": 0.7078, "step": 169000 }, { "epoch": 2.19, "learning_rate": 1.3576116246986165e-05, "loss": 0.7078, "step": 169500 }, { "epoch": 2.19, "learning_rate": 1.3468671162169016e-05, "loss": 0.7044, "step": 170000 }, { "epoch": 2.2, "learning_rate": 1.3361226077351865e-05, "loss": 0.6973, "step": 170500 }, { "epoch": 2.2, "learning_rate": 1.3253780992534717e-05, "loss": 0.7106, "step": 171000 }, { "epoch": 2.21, "learning_rate": 1.3146335907717566e-05, "loss": 0.7065, "step": 171500 }, { "epoch": 2.22, "learning_rate": 1.3038890822900415e-05, "loss": 0.7173, "step": 172000 }, { "epoch": 2.22, "learning_rate": 1.2931445738083268e-05, "loss": 0.7041, "step": 172500 }, { "epoch": 2.23, "learning_rate": 1.2824000653266117e-05, "loss": 0.7009, "step": 173000 }, { "epoch": 2.24, "learning_rate": 1.2716555568448966e-05, "loss": 0.7006, "step": 173500 }, { "epoch": 2.24, "learning_rate": 1.2609110483631816e-05, "loss": 0.7024, "step": 174000 }, { "epoch": 2.25, "learning_rate": 1.2501665398814665e-05, "loss": 0.7104, "step": 174500 }, { "epoch": 2.26, "learning_rate": 1.2394220313997516e-05, "loss": 0.699, "step": 175000 }, { "epoch": 2.26, "learning_rate": 1.2286775229180367e-05, "loss": 0.6915, "step": 175500 }, { "epoch": 2.27, "learning_rate": 1.2179330144363218e-05, "loss": 0.7046, "step": 176000 }, { "epoch": 2.28, "learning_rate": 1.2071885059546067e-05, "loss": 0.708, "step": 176500 }, { "epoch": 2.28, "learning_rate": 1.1964439974728917e-05, "loss": 0.7075, "step": 177000 }, { "epoch": 2.29, "learning_rate": 1.1856994889911766e-05, "loss": 0.6994, "step": 177500 }, { "epoch": 2.3, "learning_rate": 1.1749549805094617e-05, "loss": 0.7024, "step": 178000 }, { "epoch": 2.3, "learning_rate": 1.1642104720277466e-05, "loss": 0.6975, "step": 178500 }, { "epoch": 2.31, "learning_rate": 1.1534659635460317e-05, "loss": 0.6854, "step": 179000 }, { "epoch": 2.31, "learning_rate": 1.1427214550643167e-05, "loss": 0.699, "step": 179500 }, { "epoch": 2.32, "learning_rate": 1.1319769465826018e-05, "loss": 0.6951, "step": 180000 }, { "epoch": 2.33, "learning_rate": 1.1212324381008867e-05, "loss": 0.7039, "step": 180500 }, { "epoch": 2.33, "learning_rate": 1.1104879296191716e-05, "loss": 0.706, "step": 181000 }, { "epoch": 2.34, "learning_rate": 1.0997434211374567e-05, "loss": 0.6979, "step": 181500 }, { "epoch": 2.35, "learning_rate": 1.0889989126557416e-05, "loss": 0.6983, "step": 182000 }, { "epoch": 2.35, "learning_rate": 1.0782544041740267e-05, "loss": 0.7025, "step": 182500 }, { "epoch": 2.36, "learning_rate": 1.0675098956923117e-05, "loss": 0.6939, "step": 183000 }, { "epoch": 2.37, "learning_rate": 1.0567653872105968e-05, "loss": 0.6969, "step": 183500 }, { "epoch": 2.37, "learning_rate": 1.0460208787288817e-05, "loss": 0.7007, "step": 184000 }, { "epoch": 2.38, "learning_rate": 1.0352763702471668e-05, "loss": 0.7034, "step": 184500 }, { "epoch": 2.39, "learning_rate": 1.0245318617654517e-05, "loss": 0.6955, "step": 185000 }, { "epoch": 2.39, "learning_rate": 1.0137873532837368e-05, "loss": 0.6951, "step": 185500 }, { "epoch": 2.4, "learning_rate": 1.0030428448020217e-05, "loss": 0.6972, "step": 186000 }, { "epoch": 2.4, "learning_rate": 9.922983363203067e-06, "loss": 0.6884, "step": 186500 }, { "epoch": 2.41, "learning_rate": 9.815538278385918e-06, "loss": 0.7004, "step": 187000 }, { "epoch": 2.42, "learning_rate": 9.708093193568769e-06, "loss": 0.6932, "step": 187500 }, { "epoch": 2.42, "learning_rate": 9.600648108751618e-06, "loss": 0.6962, "step": 188000 }, { "epoch": 2.43, "learning_rate": 9.493203023934467e-06, "loss": 0.7033, "step": 188500 }, { "epoch": 2.44, "learning_rate": 9.385757939117317e-06, "loss": 0.7001, "step": 189000 }, { "epoch": 2.44, "learning_rate": 9.278312854300168e-06, "loss": 0.7074, "step": 189500 }, { "epoch": 2.45, "learning_rate": 9.170867769483017e-06, "loss": 0.7017, "step": 190000 }, { "epoch": 2.46, "learning_rate": 9.063422684665868e-06, "loss": 0.6907, "step": 190500 }, { "epoch": 2.46, "learning_rate": 8.955977599848719e-06, "loss": 0.6968, "step": 191000 }, { "epoch": 2.47, "learning_rate": 8.848532515031568e-06, "loss": 0.7041, "step": 191500 }, { "epoch": 2.48, "learning_rate": 8.741087430214417e-06, "loss": 0.6923, "step": 192000 }, { "epoch": 2.48, "learning_rate": 8.633642345397267e-06, "loss": 0.7016, "step": 192500 }, { "epoch": 2.49, "learning_rate": 8.526197260580118e-06, "loss": 0.6897, "step": 193000 }, { "epoch": 2.49, "learning_rate": 8.418752175762967e-06, "loss": 0.6916, "step": 193500 }, { "epoch": 2.5, "learning_rate": 8.311307090945818e-06, "loss": 0.6907, "step": 194000 }, { "epoch": 2.51, "learning_rate": 8.203862006128668e-06, "loss": 0.6901, "step": 194500 }, { "epoch": 2.51, "learning_rate": 8.09641692131152e-06, "loss": 0.7002, "step": 195000 }, { "epoch": 2.52, "learning_rate": 7.988971836494368e-06, "loss": 0.6927, "step": 195500 }, { "epoch": 2.53, "learning_rate": 7.881526751677217e-06, "loss": 0.6878, "step": 196000 }, { "epoch": 2.53, "learning_rate": 7.774081666860068e-06, "loss": 0.6867, "step": 196500 }, { "epoch": 2.54, "learning_rate": 7.666636582042919e-06, "loss": 0.6925, "step": 197000 }, { "epoch": 2.55, "learning_rate": 7.559191497225768e-06, "loss": 0.6991, "step": 197500 }, { "epoch": 2.55, "learning_rate": 7.451746412408618e-06, "loss": 0.6938, "step": 198000 }, { "epoch": 2.56, "learning_rate": 7.344301327591468e-06, "loss": 0.689, "step": 198500 }, { "epoch": 2.57, "learning_rate": 7.236856242774319e-06, "loss": 0.6954, "step": 199000 }, { "epoch": 2.57, "learning_rate": 7.129411157957168e-06, "loss": 0.6908, "step": 199500 }, { "epoch": 2.58, "learning_rate": 7.021966073140019e-06, "loss": 0.6783, "step": 200000 }, { "epoch": 2.59, "learning_rate": 6.9145209883228686e-06, "loss": 0.6891, "step": 200500 }, { "epoch": 2.59, "learning_rate": 6.807075903505719e-06, "loss": 0.6822, "step": 201000 }, { "epoch": 2.6, "learning_rate": 6.699630818688568e-06, "loss": 0.6902, "step": 201500 }, { "epoch": 2.6, "learning_rate": 6.592185733871418e-06, "loss": 0.6853, "step": 202000 }, { "epoch": 2.61, "learning_rate": 6.484740649054269e-06, "loss": 0.6843, "step": 202500 }, { "epoch": 2.62, "learning_rate": 6.37729556423712e-06, "loss": 0.6936, "step": 203000 }, { "epoch": 2.62, "learning_rate": 6.269850479419969e-06, "loss": 0.6855, "step": 203500 }, { "epoch": 2.63, "learning_rate": 6.1624053946028185e-06, "loss": 0.6771, "step": 204000 }, { "epoch": 2.64, "learning_rate": 6.054960309785669e-06, "loss": 0.6872, "step": 204500 }, { "epoch": 2.64, "learning_rate": 5.947515224968519e-06, "loss": 0.6814, "step": 205000 }, { "epoch": 2.65, "learning_rate": 5.840070140151369e-06, "loss": 0.6865, "step": 205500 }, { "epoch": 2.66, "learning_rate": 5.732625055334219e-06, "loss": 0.6823, "step": 206000 }, { "epoch": 2.66, "learning_rate": 5.625179970517069e-06, "loss": 0.6812, "step": 206500 }, { "epoch": 2.67, "learning_rate": 5.517734885699919e-06, "loss": 0.6904, "step": 207000 }, { "epoch": 2.68, "learning_rate": 5.410289800882768e-06, "loss": 0.6907, "step": 207500 }, { "epoch": 2.68, "learning_rate": 5.302844716065619e-06, "loss": 0.6775, "step": 208000 }, { "epoch": 2.69, "learning_rate": 5.195399631248469e-06, "loss": 0.6833, "step": 208500 }, { "epoch": 2.69, "learning_rate": 5.08795454643132e-06, "loss": 0.686, "step": 209000 }, { "epoch": 2.7, "learning_rate": 4.980509461614169e-06, "loss": 0.695, "step": 209500 }, { "epoch": 2.71, "learning_rate": 4.873064376797019e-06, "loss": 0.6924, "step": 210000 }, { "epoch": 2.71, "learning_rate": 4.765619291979869e-06, "loss": 0.679, "step": 210500 }, { "epoch": 2.72, "learning_rate": 4.658174207162719e-06, "loss": 0.6912, "step": 211000 }, { "epoch": 2.73, "learning_rate": 4.550729122345569e-06, "loss": 0.6843, "step": 211500 }, { "epoch": 2.73, "learning_rate": 4.44328403752842e-06, "loss": 0.6927, "step": 212000 }, { "epoch": 2.74, "learning_rate": 4.3358389527112696e-06, "loss": 0.6769, "step": 212500 }, { "epoch": 2.75, "learning_rate": 4.2283938678941194e-06, "loss": 0.6852, "step": 213000 }, { "epoch": 2.75, "learning_rate": 4.120948783076969e-06, "loss": 0.6781, "step": 213500 }, { "epoch": 2.76, "learning_rate": 4.01350369825982e-06, "loss": 0.6879, "step": 214000 }, { "epoch": 2.77, "learning_rate": 3.90605861344267e-06, "loss": 0.6882, "step": 214500 }, { "epoch": 2.77, "learning_rate": 3.7986135286255197e-06, "loss": 0.6814, "step": 215000 }, { "epoch": 2.78, "learning_rate": 3.6911684438083696e-06, "loss": 0.6787, "step": 215500 }, { "epoch": 2.78, "learning_rate": 3.58372335899122e-06, "loss": 0.6731, "step": 216000 }, { "epoch": 2.79, "learning_rate": 3.4762782741740698e-06, "loss": 0.6841, "step": 216500 }, { "epoch": 2.8, "learning_rate": 3.36883318935692e-06, "loss": 0.6683, "step": 217000 }, { "epoch": 2.8, "learning_rate": 3.26138810453977e-06, "loss": 0.6916, "step": 217500 }, { "epoch": 2.81, "learning_rate": 3.15394301972262e-06, "loss": 0.6767, "step": 218000 }, { "epoch": 2.82, "learning_rate": 3.04649793490547e-06, "loss": 0.6771, "step": 218500 }, { "epoch": 2.82, "learning_rate": 2.93905285008832e-06, "loss": 0.6844, "step": 219000 }, { "epoch": 2.83, "learning_rate": 2.8316077652711702e-06, "loss": 0.6677, "step": 219500 }, { "epoch": 2.84, "learning_rate": 2.72416268045402e-06, "loss": 0.685, "step": 220000 }, { "epoch": 2.84, "learning_rate": 2.6167175956368704e-06, "loss": 0.6773, "step": 220500 }, { "epoch": 2.85, "learning_rate": 2.5092725108197202e-06, "loss": 0.6843, "step": 221000 }, { "epoch": 2.86, "learning_rate": 2.40182742600257e-06, "loss": 0.6797, "step": 221500 }, { "epoch": 2.86, "learning_rate": 2.2943823411854204e-06, "loss": 0.6746, "step": 222000 }, { "epoch": 2.87, "learning_rate": 2.1869372563682703e-06, "loss": 0.6686, "step": 222500 }, { "epoch": 2.88, "learning_rate": 2.0794921715511205e-06, "loss": 0.6869, "step": 223000 }, { "epoch": 2.88, "learning_rate": 1.9720470867339704e-06, "loss": 0.6729, "step": 223500 }, { "epoch": 2.89, "learning_rate": 1.8646020019168205e-06, "loss": 0.6892, "step": 224000 }, { "epoch": 2.89, "learning_rate": 1.7571569170996706e-06, "loss": 0.6691, "step": 224500 }, { "epoch": 2.9, "learning_rate": 1.6497118322825206e-06, "loss": 0.672, "step": 225000 }, { "epoch": 2.91, "learning_rate": 1.5422667474653705e-06, "loss": 0.6801, "step": 225500 }, { "epoch": 2.91, "learning_rate": 1.4348216626482206e-06, "loss": 0.6827, "step": 226000 }, { "epoch": 2.92, "learning_rate": 1.3273765778310707e-06, "loss": 0.6748, "step": 226500 }, { "epoch": 2.93, "learning_rate": 1.2199314930139207e-06, "loss": 0.6651, "step": 227000 }, { "epoch": 2.93, "learning_rate": 1.1124864081967708e-06, "loss": 0.684, "step": 227500 }, { "epoch": 2.94, "learning_rate": 1.0050413233796209e-06, "loss": 0.6717, "step": 228000 }, { "epoch": 2.95, "learning_rate": 8.975962385624709e-07, "loss": 0.6818, "step": 228500 }, { "epoch": 2.95, "learning_rate": 7.901511537453207e-07, "loss": 0.6847, "step": 229000 }, { "epoch": 2.96, "learning_rate": 6.827060689281709e-07, "loss": 0.6779, "step": 229500 }, { "epoch": 2.97, "learning_rate": 5.752609841110209e-07, "loss": 0.6776, "step": 230000 }, { "epoch": 2.97, "learning_rate": 4.678158992938709e-07, "loss": 0.6637, "step": 230500 }, { "epoch": 2.98, "learning_rate": 3.60370814476721e-07, "loss": 0.674, "step": 231000 }, { "epoch": 2.98, "learning_rate": 2.52925729659571e-07, "loss": 0.6786, "step": 231500 }, { "epoch": 2.99, "learning_rate": 1.4548064484242105e-07, "loss": 0.6726, "step": 232000 }, { "epoch": 3.0, "learning_rate": 3.8035560025271085e-08, "loss": 0.6806, "step": 232500 }, { "epoch": 3.0, "step": 232677, "total_flos": 2.518844791101358e+17, "train_loss": 0.8888699503483147, "train_runtime": 65841.5776, "train_samples_per_second": 35.339, "train_steps_per_second": 3.534 } ], "max_steps": 232677, "num_train_epochs": 3, "total_flos": 2.518844791101358e+17, "trial_name": null, "trial_params": null }