{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 65700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.998599695585997e-05, "loss": 3.8582, "step": 25 }, { "epoch": 0.02, "learning_rate": 3.997138508371385e-05, "loss": 3.284, "step": 50 }, { "epoch": 0.02, "learning_rate": 3.995616438356165e-05, "loss": 3.1009, "step": 75 }, { "epoch": 0.03, "learning_rate": 3.994094368340944e-05, "loss": 3.1079, "step": 100 }, { "epoch": 0.04, "learning_rate": 3.9925722983257234e-05, "loss": 3.1098, "step": 125 }, { "epoch": 0.05, "learning_rate": 3.991050228310503e-05, "loss": 3.1987, "step": 150 }, { "epoch": 0.05, "learning_rate": 3.989528158295282e-05, "loss": 2.9982, "step": 175 }, { "epoch": 0.06, "learning_rate": 3.9880060882800616e-05, "loss": 2.9164, "step": 200 }, { "epoch": 0.07, "learning_rate": 3.9864840182648406e-05, "loss": 2.9484, "step": 225 }, { "epoch": 0.08, "learning_rate": 3.9849619482496195e-05, "loss": 2.8611, "step": 250 }, { "epoch": 0.08, "learning_rate": 3.983439878234399e-05, "loss": 2.9438, "step": 275 }, { "epoch": 0.09, "learning_rate": 3.981917808219178e-05, "loss": 3.0905, "step": 300 }, { "epoch": 0.1, "learning_rate": 3.980395738203958e-05, "loss": 3.0313, "step": 325 }, { "epoch": 0.11, "learning_rate": 3.978873668188737e-05, "loss": 2.9305, "step": 350 }, { "epoch": 0.11, "learning_rate": 3.977351598173516e-05, "loss": 2.9258, "step": 375 }, { "epoch": 0.12, "learning_rate": 3.975829528158296e-05, "loss": 2.9008, "step": 400 }, { "epoch": 0.13, "learning_rate": 3.974307458143075e-05, "loss": 2.7581, "step": 425 }, { "epoch": 0.14, "learning_rate": 3.9727853881278545e-05, "loss": 2.8324, "step": 450 }, { "epoch": 0.14, "learning_rate": 3.9712633181126334e-05, "loss": 2.6921, "step": 475 }, { "epoch": 0.15, "learning_rate": 3.9697412480974123e-05, "loss": 2.8431, "step": 500 }, { "epoch": 0.16, "learning_rate": 3.968219178082192e-05, "loss": 2.7828, "step": 525 }, { "epoch": 0.17, "learning_rate": 3.9666971080669716e-05, "loss": 2.8666, "step": 550 }, { "epoch": 0.18, "learning_rate": 3.9651750380517505e-05, "loss": 2.8547, "step": 575 }, { "epoch": 0.18, "learning_rate": 3.9637138508371385e-05, "loss": 2.6909, "step": 600 }, { "epoch": 0.19, "learning_rate": 3.962191780821918e-05, "loss": 2.7679, "step": 625 }, { "epoch": 0.2, "learning_rate": 3.960669710806698e-05, "loss": 2.7284, "step": 650 }, { "epoch": 0.21, "learning_rate": 3.959147640791477e-05, "loss": 2.902, "step": 675 }, { "epoch": 0.21, "learning_rate": 3.957625570776256e-05, "loss": 2.8097, "step": 700 }, { "epoch": 0.22, "learning_rate": 3.956103500761035e-05, "loss": 2.7846, "step": 725 }, { "epoch": 0.23, "learning_rate": 3.954581430745815e-05, "loss": 2.7917, "step": 750 }, { "epoch": 0.24, "learning_rate": 3.953059360730594e-05, "loss": 2.7978, "step": 775 }, { "epoch": 0.24, "learning_rate": 3.951537290715373e-05, "loss": 2.7402, "step": 800 }, { "epoch": 0.25, "learning_rate": 3.9500152207001524e-05, "loss": 2.7104, "step": 825 }, { "epoch": 0.26, "learning_rate": 3.948493150684932e-05, "loss": 2.7742, "step": 850 }, { "epoch": 0.27, "learning_rate": 3.946971080669711e-05, "loss": 2.6598, "step": 875 }, { "epoch": 0.27, "learning_rate": 3.9454490106544906e-05, "loss": 2.6493, "step": 900 }, { "epoch": 0.28, "learning_rate": 3.9439269406392695e-05, "loss": 2.6935, "step": 925 }, { "epoch": 0.29, "learning_rate": 3.942404870624049e-05, "loss": 2.6664, "step": 950 }, { "epoch": 0.3, "learning_rate": 3.940882800608828e-05, "loss": 2.7125, "step": 975 }, { "epoch": 0.3, "learning_rate": 3.939360730593608e-05, "loss": 2.77, "step": 1000 }, { "epoch": 0.31, "learning_rate": 3.9378386605783867e-05, "loss": 2.7562, "step": 1025 }, { "epoch": 0.32, "learning_rate": 3.936316590563166e-05, "loss": 2.699, "step": 1050 }, { "epoch": 0.33, "learning_rate": 3.934794520547946e-05, "loss": 2.8319, "step": 1075 }, { "epoch": 0.33, "learning_rate": 3.933272450532725e-05, "loss": 2.6888, "step": 1100 }, { "epoch": 0.34, "learning_rate": 3.9317503805175045e-05, "loss": 2.7843, "step": 1125 }, { "epoch": 0.35, "learning_rate": 3.9302283105022834e-05, "loss": 2.6738, "step": 1150 }, { "epoch": 0.36, "learning_rate": 3.9287062404870624e-05, "loss": 2.7412, "step": 1175 }, { "epoch": 0.37, "learning_rate": 3.927184170471842e-05, "loss": 2.6732, "step": 1200 }, { "epoch": 0.37, "learning_rate": 3.925662100456621e-05, "loss": 2.7467, "step": 1225 }, { "epoch": 0.38, "learning_rate": 3.9241400304414005e-05, "loss": 2.7866, "step": 1250 }, { "epoch": 0.39, "learning_rate": 3.92261796042618e-05, "loss": 2.8271, "step": 1275 }, { "epoch": 0.4, "learning_rate": 3.921095890410959e-05, "loss": 2.6714, "step": 1300 }, { "epoch": 0.4, "learning_rate": 3.919573820395739e-05, "loss": 2.6909, "step": 1325 }, { "epoch": 0.41, "learning_rate": 3.918051750380518e-05, "loss": 2.7683, "step": 1350 }, { "epoch": 0.42, "learning_rate": 3.916529680365297e-05, "loss": 2.7829, "step": 1375 }, { "epoch": 0.43, "learning_rate": 3.915007610350076e-05, "loss": 2.6544, "step": 1400 }, { "epoch": 0.43, "learning_rate": 3.913485540334855e-05, "loss": 2.6491, "step": 1425 }, { "epoch": 0.44, "learning_rate": 3.9119634703196355e-05, "loss": 2.7998, "step": 1450 }, { "epoch": 0.45, "learning_rate": 3.9104414003044144e-05, "loss": 2.6747, "step": 1475 }, { "epoch": 0.46, "learning_rate": 3.9089193302891934e-05, "loss": 2.8246, "step": 1500 }, { "epoch": 0.46, "learning_rate": 3.907397260273973e-05, "loss": 2.6864, "step": 1525 }, { "epoch": 0.47, "learning_rate": 3.905875190258752e-05, "loss": 2.7112, "step": 1550 }, { "epoch": 0.48, "learning_rate": 3.9043531202435316e-05, "loss": 2.641, "step": 1575 }, { "epoch": 0.49, "learning_rate": 3.9028310502283105e-05, "loss": 2.7254, "step": 1600 }, { "epoch": 0.49, "learning_rate": 3.90130898021309e-05, "loss": 2.6282, "step": 1625 }, { "epoch": 0.5, "learning_rate": 3.89978691019787e-05, "loss": 2.6655, "step": 1650 }, { "epoch": 0.51, "learning_rate": 3.898264840182649e-05, "loss": 2.646, "step": 1675 }, { "epoch": 0.52, "learning_rate": 3.896742770167428e-05, "loss": 2.6284, "step": 1700 }, { "epoch": 0.53, "learning_rate": 3.895220700152207e-05, "loss": 2.6256, "step": 1725 }, { "epoch": 0.53, "learning_rate": 3.893698630136987e-05, "loss": 2.5699, "step": 1750 }, { "epoch": 0.54, "learning_rate": 3.892176560121766e-05, "loss": 2.6774, "step": 1775 }, { "epoch": 0.55, "learning_rate": 3.890654490106545e-05, "loss": 2.7368, "step": 1800 }, { "epoch": 0.56, "learning_rate": 3.8891324200913244e-05, "loss": 2.6614, "step": 1825 }, { "epoch": 0.56, "learning_rate": 3.887610350076104e-05, "loss": 2.6495, "step": 1850 }, { "epoch": 0.57, "learning_rate": 3.886088280060883e-05, "loss": 2.6243, "step": 1875 }, { "epoch": 0.58, "learning_rate": 3.8845662100456626e-05, "loss": 2.5812, "step": 1900 }, { "epoch": 0.59, "learning_rate": 3.8830441400304416e-05, "loss": 2.6791, "step": 1925 }, { "epoch": 0.59, "learning_rate": 3.881522070015221e-05, "loss": 2.5731, "step": 1950 }, { "epoch": 0.6, "learning_rate": 3.88e-05, "loss": 2.5387, "step": 1975 }, { "epoch": 0.61, "learning_rate": 3.87847792998478e-05, "loss": 2.6743, "step": 2000 }, { "epoch": 0.62, "learning_rate": 3.876955859969559e-05, "loss": 2.5973, "step": 2025 }, { "epoch": 0.62, "learning_rate": 3.875433789954338e-05, "loss": 2.6038, "step": 2050 }, { "epoch": 0.63, "learning_rate": 3.873911719939118e-05, "loss": 2.7553, "step": 2075 }, { "epoch": 0.64, "learning_rate": 3.872389649923897e-05, "loss": 2.7192, "step": 2100 }, { "epoch": 0.65, "learning_rate": 3.870867579908676e-05, "loss": 2.6565, "step": 2125 }, { "epoch": 0.65, "learning_rate": 3.8693455098934554e-05, "loss": 2.5753, "step": 2150 }, { "epoch": 0.66, "learning_rate": 3.8678234398782344e-05, "loss": 2.501, "step": 2175 }, { "epoch": 0.67, "learning_rate": 3.866301369863014e-05, "loss": 2.72, "step": 2200 }, { "epoch": 0.68, "learning_rate": 3.864779299847793e-05, "loss": 2.5979, "step": 2225 }, { "epoch": 0.68, "learning_rate": 3.8632572298325726e-05, "loss": 2.587, "step": 2250 }, { "epoch": 0.69, "learning_rate": 3.861735159817352e-05, "loss": 2.7236, "step": 2275 }, { "epoch": 0.7, "learning_rate": 3.860213089802131e-05, "loss": 2.6234, "step": 2300 }, { "epoch": 0.71, "learning_rate": 3.858691019786911e-05, "loss": 2.652, "step": 2325 }, { "epoch": 0.72, "learning_rate": 3.85716894977169e-05, "loss": 2.5885, "step": 2350 }, { "epoch": 0.72, "learning_rate": 3.8556468797564693e-05, "loss": 2.6084, "step": 2375 }, { "epoch": 0.73, "learning_rate": 3.854124809741248e-05, "loss": 2.693, "step": 2400 }, { "epoch": 0.74, "learning_rate": 3.852602739726027e-05, "loss": 2.6778, "step": 2425 }, { "epoch": 0.75, "learning_rate": 3.851080669710807e-05, "loss": 2.591, "step": 2450 }, { "epoch": 0.75, "learning_rate": 3.8495585996955865e-05, "loss": 2.5932, "step": 2475 }, { "epoch": 0.76, "learning_rate": 3.8480365296803654e-05, "loss": 2.5999, "step": 2500 }, { "epoch": 0.77, "learning_rate": 3.846514459665145e-05, "loss": 2.5795, "step": 2525 }, { "epoch": 0.78, "learning_rate": 3.844992389649924e-05, "loss": 2.543, "step": 2550 }, { "epoch": 0.78, "learning_rate": 3.8434703196347036e-05, "loss": 2.5534, "step": 2575 }, { "epoch": 0.79, "learning_rate": 3.8419482496194826e-05, "loss": 2.6031, "step": 2600 }, { "epoch": 0.8, "learning_rate": 3.840426179604262e-05, "loss": 2.6382, "step": 2625 }, { "epoch": 0.81, "learning_rate": 3.838904109589042e-05, "loss": 2.5381, "step": 2650 }, { "epoch": 0.81, "learning_rate": 3.837382039573821e-05, "loss": 2.4354, "step": 2675 }, { "epoch": 0.82, "learning_rate": 3.8358599695586004e-05, "loss": 2.6449, "step": 2700 }, { "epoch": 0.83, "learning_rate": 3.834337899543379e-05, "loss": 2.5669, "step": 2725 }, { "epoch": 0.84, "learning_rate": 3.832815829528158e-05, "loss": 2.6498, "step": 2750 }, { "epoch": 0.84, "learning_rate": 3.831293759512938e-05, "loss": 2.6078, "step": 2775 }, { "epoch": 0.85, "learning_rate": 3.829771689497717e-05, "loss": 2.6951, "step": 2800 }, { "epoch": 0.86, "learning_rate": 3.8282496194824965e-05, "loss": 2.5454, "step": 2825 }, { "epoch": 0.87, "learning_rate": 3.826727549467276e-05, "loss": 2.6792, "step": 2850 }, { "epoch": 0.88, "learning_rate": 3.825205479452055e-05, "loss": 2.5385, "step": 2875 }, { "epoch": 0.88, "learning_rate": 3.8236834094368346e-05, "loss": 2.502, "step": 2900 }, { "epoch": 0.89, "learning_rate": 3.8221613394216136e-05, "loss": 2.5359, "step": 2925 }, { "epoch": 0.9, "learning_rate": 3.820639269406393e-05, "loss": 2.5942, "step": 2950 }, { "epoch": 0.91, "learning_rate": 3.819117199391172e-05, "loss": 2.5167, "step": 2975 }, { "epoch": 0.91, "learning_rate": 3.817595129375952e-05, "loss": 2.4514, "step": 3000 }, { "epoch": 0.92, "learning_rate": 3.816073059360731e-05, "loss": 2.6363, "step": 3025 }, { "epoch": 0.93, "learning_rate": 3.8145509893455103e-05, "loss": 2.39, "step": 3050 }, { "epoch": 0.94, "learning_rate": 3.813028919330289e-05, "loss": 2.5625, "step": 3075 }, { "epoch": 0.94, "learning_rate": 3.811506849315069e-05, "loss": 2.6096, "step": 3100 }, { "epoch": 0.95, "learning_rate": 3.809984779299848e-05, "loss": 2.7414, "step": 3125 }, { "epoch": 0.96, "learning_rate": 3.8084627092846275e-05, "loss": 2.6735, "step": 3150 }, { "epoch": 0.97, "learning_rate": 3.8069406392694064e-05, "loss": 2.6245, "step": 3175 }, { "epoch": 0.97, "learning_rate": 3.805418569254186e-05, "loss": 2.5353, "step": 3200 }, { "epoch": 0.98, "learning_rate": 3.803896499238965e-05, "loss": 2.4748, "step": 3225 }, { "epoch": 0.99, "learning_rate": 3.8023744292237446e-05, "loss": 2.6444, "step": 3250 }, { "epoch": 1.0, "learning_rate": 3.800852359208524e-05, "loss": 2.6216, "step": 3275 }, { "epoch": 1.0, "learning_rate": 3.799330289193303e-05, "loss": 2.6074, "step": 3300 }, { "epoch": 1.01, "learning_rate": 3.797808219178083e-05, "loss": 2.5025, "step": 3325 }, { "epoch": 1.02, "learning_rate": 3.796286149162862e-05, "loss": 2.62, "step": 3350 }, { "epoch": 1.03, "learning_rate": 3.794764079147641e-05, "loss": 2.521, "step": 3375 }, { "epoch": 1.04, "learning_rate": 3.79324200913242e-05, "loss": 2.4202, "step": 3400 }, { "epoch": 1.04, "learning_rate": 3.791719939117199e-05, "loss": 2.5935, "step": 3425 }, { "epoch": 1.05, "learning_rate": 3.790197869101979e-05, "loss": 2.5516, "step": 3450 }, { "epoch": 1.06, "learning_rate": 3.7886757990867585e-05, "loss": 2.5356, "step": 3475 }, { "epoch": 1.07, "learning_rate": 3.7871537290715375e-05, "loss": 2.6087, "step": 3500 }, { "epoch": 1.07, "learning_rate": 3.785631659056317e-05, "loss": 2.4974, "step": 3525 }, { "epoch": 1.08, "learning_rate": 3.784109589041096e-05, "loss": 2.4709, "step": 3550 }, { "epoch": 1.09, "learning_rate": 3.7825875190258757e-05, "loss": 2.5459, "step": 3575 }, { "epoch": 1.1, "learning_rate": 3.7810654490106546e-05, "loss": 2.5447, "step": 3600 }, { "epoch": 1.1, "learning_rate": 3.779543378995434e-05, "loss": 2.5014, "step": 3625 }, { "epoch": 1.11, "learning_rate": 3.778021308980214e-05, "loss": 2.56, "step": 3650 }, { "epoch": 1.12, "learning_rate": 3.776499238964993e-05, "loss": 2.5625, "step": 3675 }, { "epoch": 1.13, "learning_rate": 3.774977168949772e-05, "loss": 2.5072, "step": 3700 }, { "epoch": 1.13, "learning_rate": 3.7734550989345514e-05, "loss": 2.5928, "step": 3725 }, { "epoch": 1.14, "learning_rate": 3.77193302891933e-05, "loss": 2.4591, "step": 3750 }, { "epoch": 1.15, "learning_rate": 3.77041095890411e-05, "loss": 2.3873, "step": 3775 }, { "epoch": 1.16, "learning_rate": 3.768888888888889e-05, "loss": 2.468, "step": 3800 }, { "epoch": 1.16, "learning_rate": 3.7673668188736685e-05, "loss": 2.5052, "step": 3825 }, { "epoch": 1.17, "learning_rate": 3.765844748858448e-05, "loss": 2.5028, "step": 3850 }, { "epoch": 1.18, "learning_rate": 3.764322678843227e-05, "loss": 2.516, "step": 3875 }, { "epoch": 1.19, "learning_rate": 3.762800608828007e-05, "loss": 2.514, "step": 3900 }, { "epoch": 1.19, "learning_rate": 3.7612785388127856e-05, "loss": 2.5943, "step": 3925 }, { "epoch": 1.2, "learning_rate": 3.759756468797565e-05, "loss": 2.4263, "step": 3950 }, { "epoch": 1.21, "learning_rate": 3.758234398782344e-05, "loss": 2.3991, "step": 3975 }, { "epoch": 1.22, "learning_rate": 3.756712328767123e-05, "loss": 2.3838, "step": 4000 }, { "epoch": 1.23, "learning_rate": 3.755190258751903e-05, "loss": 2.4018, "step": 4025 }, { "epoch": 1.23, "learning_rate": 3.7536681887366824e-05, "loss": 2.4734, "step": 4050 }, { "epoch": 1.24, "learning_rate": 3.752146118721461e-05, "loss": 2.4167, "step": 4075 }, { "epoch": 1.25, "learning_rate": 3.750624048706241e-05, "loss": 2.3863, "step": 4100 }, { "epoch": 1.26, "learning_rate": 3.74910197869102e-05, "loss": 2.5197, "step": 4125 }, { "epoch": 1.26, "learning_rate": 3.7475799086757995e-05, "loss": 2.4539, "step": 4150 }, { "epoch": 1.27, "learning_rate": 3.7460578386605785e-05, "loss": 2.4153, "step": 4175 }, { "epoch": 1.28, "learning_rate": 3.744535768645358e-05, "loss": 2.5164, "step": 4200 }, { "epoch": 1.29, "learning_rate": 3.743013698630137e-05, "loss": 2.4194, "step": 4225 }, { "epoch": 1.29, "learning_rate": 3.7414916286149167e-05, "loss": 2.4861, "step": 4250 }, { "epoch": 1.3, "learning_rate": 3.739969558599696e-05, "loss": 2.4588, "step": 4275 }, { "epoch": 1.31, "learning_rate": 3.738447488584475e-05, "loss": 2.5212, "step": 4300 }, { "epoch": 1.32, "learning_rate": 3.736925418569254e-05, "loss": 2.3907, "step": 4325 }, { "epoch": 1.32, "learning_rate": 3.735403348554034e-05, "loss": 2.4816, "step": 4350 }, { "epoch": 1.33, "learning_rate": 3.733881278538813e-05, "loss": 2.5161, "step": 4375 }, { "epoch": 1.34, "learning_rate": 3.7323592085235924e-05, "loss": 2.5058, "step": 4400 }, { "epoch": 1.35, "learning_rate": 3.730837138508371e-05, "loss": 2.4269, "step": 4425 }, { "epoch": 1.35, "learning_rate": 3.729315068493151e-05, "loss": 2.5356, "step": 4450 }, { "epoch": 1.36, "learning_rate": 3.7277929984779306e-05, "loss": 2.509, "step": 4475 }, { "epoch": 1.37, "learning_rate": 3.7262709284627095e-05, "loss": 2.397, "step": 4500 }, { "epoch": 1.38, "learning_rate": 3.724748858447489e-05, "loss": 2.5031, "step": 4525 }, { "epoch": 1.39, "learning_rate": 3.723226788432268e-05, "loss": 2.4124, "step": 4550 }, { "epoch": 1.39, "learning_rate": 3.721704718417048e-05, "loss": 2.5102, "step": 4575 }, { "epoch": 1.4, "learning_rate": 3.7201826484018266e-05, "loss": 2.486, "step": 4600 }, { "epoch": 1.41, "learning_rate": 3.7186605783866056e-05, "loss": 2.407, "step": 4625 }, { "epoch": 1.42, "learning_rate": 3.717138508371386e-05, "loss": 2.464, "step": 4650 }, { "epoch": 1.42, "learning_rate": 3.715616438356165e-05, "loss": 2.5452, "step": 4675 }, { "epoch": 1.43, "learning_rate": 3.714094368340944e-05, "loss": 2.4468, "step": 4700 }, { "epoch": 1.44, "learning_rate": 3.7125722983257234e-05, "loss": 2.4068, "step": 4725 }, { "epoch": 1.45, "learning_rate": 3.711050228310502e-05, "loss": 2.4681, "step": 4750 }, { "epoch": 1.45, "learning_rate": 3.709528158295282e-05, "loss": 2.3566, "step": 4775 }, { "epoch": 1.46, "learning_rate": 3.70806697108067e-05, "loss": 2.4723, "step": 4800 }, { "epoch": 1.47, "learning_rate": 3.7065449010654495e-05, "loss": 2.4191, "step": 4825 }, { "epoch": 1.48, "learning_rate": 3.7050228310502285e-05, "loss": 2.5294, "step": 4850 }, { "epoch": 1.48, "learning_rate": 3.703500761035008e-05, "loss": 2.7049, "step": 4875 }, { "epoch": 1.49, "learning_rate": 3.701978691019787e-05, "loss": 2.5428, "step": 4900 }, { "epoch": 1.5, "learning_rate": 3.700456621004567e-05, "loss": 2.4997, "step": 4925 }, { "epoch": 1.51, "learning_rate": 3.6989345509893456e-05, "loss": 2.3452, "step": 4950 }, { "epoch": 1.51, "learning_rate": 3.697412480974125e-05, "loss": 2.5259, "step": 4975 }, { "epoch": 1.52, "learning_rate": 3.695890410958904e-05, "loss": 2.3965, "step": 5000 }, { "epoch": 1.53, "learning_rate": 3.694368340943684e-05, "loss": 2.501, "step": 5025 }, { "epoch": 1.54, "learning_rate": 3.692846270928463e-05, "loss": 2.4793, "step": 5050 }, { "epoch": 1.54, "learning_rate": 3.6913242009132424e-05, "loss": 2.5491, "step": 5075 }, { "epoch": 1.55, "learning_rate": 3.689802130898021e-05, "loss": 2.4435, "step": 5100 }, { "epoch": 1.56, "learning_rate": 3.688280060882801e-05, "loss": 2.4705, "step": 5125 }, { "epoch": 1.57, "learning_rate": 3.6867579908675806e-05, "loss": 2.475, "step": 5150 }, { "epoch": 1.58, "learning_rate": 3.6852359208523595e-05, "loss": 2.5521, "step": 5175 }, { "epoch": 1.58, "learning_rate": 3.683713850837139e-05, "loss": 2.5303, "step": 5200 }, { "epoch": 1.59, "learning_rate": 3.682191780821918e-05, "loss": 2.4951, "step": 5225 }, { "epoch": 1.6, "learning_rate": 3.680669710806697e-05, "loss": 2.4559, "step": 5250 }, { "epoch": 1.61, "learning_rate": 3.6791476407914766e-05, "loss": 2.5772, "step": 5275 }, { "epoch": 1.61, "learning_rate": 3.6776255707762556e-05, "loss": 2.4943, "step": 5300 }, { "epoch": 1.62, "learning_rate": 3.676164383561644e-05, "loss": 2.4351, "step": 5325 }, { "epoch": 1.63, "learning_rate": 3.674642313546423e-05, "loss": 2.5454, "step": 5350 }, { "epoch": 1.64, "learning_rate": 3.673120243531203e-05, "loss": 2.492, "step": 5375 }, { "epoch": 1.64, "learning_rate": 3.6715981735159824e-05, "loss": 2.5228, "step": 5400 }, { "epoch": 1.65, "learning_rate": 3.6700761035007614e-05, "loss": 2.4511, "step": 5425 }, { "epoch": 1.66, "learning_rate": 3.668554033485541e-05, "loss": 2.4755, "step": 5450 }, { "epoch": 1.67, "learning_rate": 3.66703196347032e-05, "loss": 2.4568, "step": 5475 }, { "epoch": 1.67, "learning_rate": 3.6655098934550995e-05, "loss": 2.4765, "step": 5500 }, { "epoch": 1.68, "learning_rate": 3.6639878234398785e-05, "loss": 2.4977, "step": 5525 }, { "epoch": 1.69, "learning_rate": 3.6624657534246574e-05, "loss": 2.5427, "step": 5550 }, { "epoch": 1.7, "learning_rate": 3.660943683409437e-05, "loss": 2.6176, "step": 5575 }, { "epoch": 1.7, "learning_rate": 3.659421613394217e-05, "loss": 2.5213, "step": 5600 }, { "epoch": 1.71, "learning_rate": 3.6578995433789956e-05, "loss": 2.5476, "step": 5625 }, { "epoch": 1.72, "learning_rate": 3.656377473363775e-05, "loss": 2.4277, "step": 5650 }, { "epoch": 1.73, "learning_rate": 3.654855403348554e-05, "loss": 2.5481, "step": 5675 }, { "epoch": 1.74, "learning_rate": 3.653333333333334e-05, "loss": 2.3107, "step": 5700 }, { "epoch": 1.74, "learning_rate": 3.651811263318113e-05, "loss": 2.4638, "step": 5725 }, { "epoch": 1.75, "learning_rate": 3.6502891933028924e-05, "loss": 2.4764, "step": 5750 }, { "epoch": 1.76, "learning_rate": 3.648767123287671e-05, "loss": 2.5169, "step": 5775 }, { "epoch": 1.77, "learning_rate": 3.647245053272451e-05, "loss": 2.5419, "step": 5800 }, { "epoch": 1.77, "learning_rate": 3.6457229832572306e-05, "loss": 2.469, "step": 5825 }, { "epoch": 1.78, "learning_rate": 3.6442009132420095e-05, "loss": 2.5355, "step": 5850 }, { "epoch": 1.79, "learning_rate": 3.6426788432267885e-05, "loss": 2.4811, "step": 5875 }, { "epoch": 1.8, "learning_rate": 3.641156773211568e-05, "loss": 2.3855, "step": 5900 }, { "epoch": 1.8, "learning_rate": 3.639634703196347e-05, "loss": 2.5192, "step": 5925 }, { "epoch": 1.81, "learning_rate": 3.6381126331811267e-05, "loss": 2.5408, "step": 5950 }, { "epoch": 1.82, "learning_rate": 3.6365905631659056e-05, "loss": 2.4392, "step": 5975 }, { "epoch": 1.83, "learning_rate": 3.635068493150685e-05, "loss": 2.33, "step": 6000 }, { "epoch": 1.83, "learning_rate": 3.633546423135465e-05, "loss": 2.3823, "step": 6025 }, { "epoch": 1.84, "learning_rate": 3.632024353120244e-05, "loss": 2.5043, "step": 6050 }, { "epoch": 1.85, "learning_rate": 3.6305022831050234e-05, "loss": 2.5216, "step": 6075 }, { "epoch": 1.86, "learning_rate": 3.6289802130898024e-05, "loss": 2.5068, "step": 6100 }, { "epoch": 1.86, "learning_rate": 3.627458143074582e-05, "loss": 2.4264, "step": 6125 }, { "epoch": 1.87, "learning_rate": 3.625936073059361e-05, "loss": 2.3439, "step": 6150 }, { "epoch": 1.88, "learning_rate": 3.62441400304414e-05, "loss": 2.467, "step": 6175 }, { "epoch": 1.89, "learning_rate": 3.62289193302892e-05, "loss": 2.4853, "step": 6200 }, { "epoch": 1.89, "learning_rate": 3.621369863013699e-05, "loss": 2.4064, "step": 6225 }, { "epoch": 1.9, "learning_rate": 3.619847792998478e-05, "loss": 2.4467, "step": 6250 }, { "epoch": 1.91, "learning_rate": 3.618325722983258e-05, "loss": 2.4435, "step": 6275 }, { "epoch": 1.92, "learning_rate": 3.6168036529680366e-05, "loss": 2.4629, "step": 6300 }, { "epoch": 1.93, "learning_rate": 3.615281582952816e-05, "loss": 2.5577, "step": 6325 }, { "epoch": 1.93, "learning_rate": 3.613759512937595e-05, "loss": 2.5897, "step": 6350 }, { "epoch": 1.94, "learning_rate": 3.612237442922375e-05, "loss": 2.4605, "step": 6375 }, { "epoch": 1.95, "learning_rate": 3.6107153729071544e-05, "loss": 2.4198, "step": 6400 }, { "epoch": 1.96, "learning_rate": 3.6091933028919334e-05, "loss": 2.5914, "step": 6425 }, { "epoch": 1.96, "learning_rate": 3.607671232876713e-05, "loss": 2.4329, "step": 6450 }, { "epoch": 1.97, "learning_rate": 3.606149162861492e-05, "loss": 2.3896, "step": 6475 }, { "epoch": 1.98, "learning_rate": 3.604627092846271e-05, "loss": 2.4753, "step": 6500 }, { "epoch": 1.99, "learning_rate": 3.6031050228310505e-05, "loss": 2.4698, "step": 6525 }, { "epoch": 1.99, "learning_rate": 3.6015829528158295e-05, "loss": 2.4075, "step": 6550 }, { "epoch": 2.0, "learning_rate": 3.600060882800609e-05, "loss": 2.4441, "step": 6575 }, { "epoch": 2.01, "learning_rate": 3.598538812785389e-05, "loss": 2.3363, "step": 6600 }, { "epoch": 2.02, "learning_rate": 3.597016742770168e-05, "loss": 2.3688, "step": 6625 }, { "epoch": 2.02, "learning_rate": 3.595494672754947e-05, "loss": 2.362, "step": 6650 }, { "epoch": 2.03, "learning_rate": 3.593972602739726e-05, "loss": 2.4022, "step": 6675 }, { "epoch": 2.04, "learning_rate": 3.592450532724506e-05, "loss": 2.295, "step": 6700 }, { "epoch": 2.05, "learning_rate": 3.590928462709285e-05, "loss": 2.4906, "step": 6725 }, { "epoch": 2.05, "learning_rate": 3.5894063926940644e-05, "loss": 2.3863, "step": 6750 }, { "epoch": 2.06, "learning_rate": 3.5878843226788434e-05, "loss": 2.4918, "step": 6775 }, { "epoch": 2.07, "learning_rate": 3.586362252663623e-05, "loss": 2.3376, "step": 6800 }, { "epoch": 2.08, "learning_rate": 3.5848401826484026e-05, "loss": 2.4073, "step": 6825 }, { "epoch": 2.09, "learning_rate": 3.5833181126331816e-05, "loss": 2.4083, "step": 6850 }, { "epoch": 2.09, "learning_rate": 3.5817960426179605e-05, "loss": 2.3808, "step": 6875 }, { "epoch": 2.1, "learning_rate": 3.58027397260274e-05, "loss": 2.389, "step": 6900 }, { "epoch": 2.11, "learning_rate": 3.578751902587519e-05, "loss": 2.4459, "step": 6925 }, { "epoch": 2.12, "learning_rate": 3.577229832572299e-05, "loss": 2.3535, "step": 6950 }, { "epoch": 2.12, "learning_rate": 3.5757077625570776e-05, "loss": 2.3977, "step": 6975 }, { "epoch": 2.13, "learning_rate": 3.574185692541857e-05, "loss": 2.4717, "step": 7000 }, { "epoch": 2.14, "learning_rate": 3.572663622526637e-05, "loss": 2.4242, "step": 7025 }, { "epoch": 2.15, "learning_rate": 3.571141552511416e-05, "loss": 2.5278, "step": 7050 }, { "epoch": 2.15, "learning_rate": 3.5696194824961955e-05, "loss": 2.3942, "step": 7075 }, { "epoch": 2.16, "learning_rate": 3.5680974124809744e-05, "loss": 2.4924, "step": 7100 }, { "epoch": 2.17, "learning_rate": 3.5665753424657533e-05, "loss": 2.4798, "step": 7125 }, { "epoch": 2.18, "learning_rate": 3.565053272450533e-05, "loss": 2.4777, "step": 7150 }, { "epoch": 2.18, "learning_rate": 3.563531202435312e-05, "loss": 2.4038, "step": 7175 }, { "epoch": 2.19, "learning_rate": 3.5620091324200915e-05, "loss": 2.4612, "step": 7200 }, { "epoch": 2.2, "learning_rate": 3.560487062404871e-05, "loss": 2.4008, "step": 7225 }, { "epoch": 2.21, "learning_rate": 3.55896499238965e-05, "loss": 2.4106, "step": 7250 }, { "epoch": 2.21, "learning_rate": 3.55744292237443e-05, "loss": 2.3472, "step": 7275 }, { "epoch": 2.22, "learning_rate": 3.555920852359209e-05, "loss": 2.3121, "step": 7300 }, { "epoch": 2.23, "learning_rate": 3.554398782343988e-05, "loss": 2.3585, "step": 7325 }, { "epoch": 2.24, "learning_rate": 3.552876712328767e-05, "loss": 2.421, "step": 7350 }, { "epoch": 2.25, "learning_rate": 3.551354642313547e-05, "loss": 2.3866, "step": 7375 }, { "epoch": 2.25, "learning_rate": 3.5498325722983265e-05, "loss": 2.3897, "step": 7400 }, { "epoch": 2.26, "learning_rate": 3.5483105022831054e-05, "loss": 2.3006, "step": 7425 }, { "epoch": 2.27, "learning_rate": 3.546788432267885e-05, "loss": 2.4119, "step": 7450 }, { "epoch": 2.28, "learning_rate": 3.545266362252664e-05, "loss": 2.3586, "step": 7475 }, { "epoch": 2.28, "learning_rate": 3.543744292237443e-05, "loss": 2.4986, "step": 7500 }, { "epoch": 2.29, "learning_rate": 3.5422222222222226e-05, "loss": 2.401, "step": 7525 }, { "epoch": 2.3, "learning_rate": 3.5407001522070015e-05, "loss": 2.4442, "step": 7550 }, { "epoch": 2.31, "learning_rate": 3.539178082191781e-05, "loss": 2.4115, "step": 7575 }, { "epoch": 2.31, "learning_rate": 3.537656012176561e-05, "loss": 2.3774, "step": 7600 }, { "epoch": 2.32, "learning_rate": 3.53613394216134e-05, "loss": 2.4354, "step": 7625 }, { "epoch": 2.33, "learning_rate": 3.534611872146119e-05, "loss": 2.4084, "step": 7650 }, { "epoch": 2.34, "learning_rate": 3.533089802130898e-05, "loss": 2.3302, "step": 7675 }, { "epoch": 2.34, "learning_rate": 3.531567732115678e-05, "loss": 2.4794, "step": 7700 }, { "epoch": 2.35, "learning_rate": 3.530045662100457e-05, "loss": 2.5226, "step": 7725 }, { "epoch": 2.36, "learning_rate": 3.528523592085236e-05, "loss": 2.4108, "step": 7750 }, { "epoch": 2.37, "learning_rate": 3.5270015220700154e-05, "loss": 2.3983, "step": 7775 }, { "epoch": 2.37, "learning_rate": 3.525479452054795e-05, "loss": 2.355, "step": 7800 }, { "epoch": 2.38, "learning_rate": 3.523957382039574e-05, "loss": 2.4036, "step": 7825 }, { "epoch": 2.39, "learning_rate": 3.5224353120243536e-05, "loss": 2.3806, "step": 7850 }, { "epoch": 2.4, "learning_rate": 3.5209132420091325e-05, "loss": 2.397, "step": 7875 }, { "epoch": 2.4, "learning_rate": 3.519391171993912e-05, "loss": 2.4013, "step": 7900 }, { "epoch": 2.41, "learning_rate": 3.517869101978691e-05, "loss": 2.3221, "step": 7925 }, { "epoch": 2.42, "learning_rate": 3.516347031963471e-05, "loss": 2.4978, "step": 7950 }, { "epoch": 2.43, "learning_rate": 3.51482496194825e-05, "loss": 2.3336, "step": 7975 }, { "epoch": 2.44, "learning_rate": 3.513302891933029e-05, "loss": 2.3407, "step": 8000 }, { "epoch": 2.44, "learning_rate": 3.511780821917809e-05, "loss": 2.3885, "step": 8025 }, { "epoch": 2.45, "learning_rate": 3.510258751902588e-05, "loss": 2.3823, "step": 8050 }, { "epoch": 2.46, "learning_rate": 3.5087366818873675e-05, "loss": 2.3793, "step": 8075 }, { "epoch": 2.47, "learning_rate": 3.5072146118721464e-05, "loss": 2.5151, "step": 8100 }, { "epoch": 2.47, "learning_rate": 3.5056925418569254e-05, "loss": 2.4551, "step": 8125 }, { "epoch": 2.48, "learning_rate": 3.504170471841705e-05, "loss": 2.2755, "step": 8150 }, { "epoch": 2.49, "learning_rate": 3.502648401826484e-05, "loss": 2.3501, "step": 8175 }, { "epoch": 2.5, "learning_rate": 3.5011263318112636e-05, "loss": 2.1867, "step": 8200 }, { "epoch": 2.5, "learning_rate": 3.499604261796043e-05, "loss": 2.4121, "step": 8225 }, { "epoch": 2.51, "learning_rate": 3.498082191780822e-05, "loss": 2.3982, "step": 8250 }, { "epoch": 2.52, "learning_rate": 3.496560121765602e-05, "loss": 2.3759, "step": 8275 }, { "epoch": 2.53, "learning_rate": 3.495038051750381e-05, "loss": 2.392, "step": 8300 }, { "epoch": 2.53, "learning_rate": 3.49351598173516e-05, "loss": 2.3885, "step": 8325 }, { "epoch": 2.54, "learning_rate": 3.491993911719939e-05, "loss": 2.385, "step": 8350 }, { "epoch": 2.55, "learning_rate": 3.490471841704718e-05, "loss": 2.3473, "step": 8375 }, { "epoch": 2.56, "learning_rate": 3.4889497716894985e-05, "loss": 2.3964, "step": 8400 }, { "epoch": 2.56, "learning_rate": 3.4874277016742775e-05, "loss": 2.4064, "step": 8425 }, { "epoch": 2.57, "learning_rate": 3.4859056316590564e-05, "loss": 2.418, "step": 8450 }, { "epoch": 2.58, "learning_rate": 3.484383561643836e-05, "loss": 2.3923, "step": 8475 }, { "epoch": 2.59, "learning_rate": 3.482861491628615e-05, "loss": 2.3131, "step": 8500 }, { "epoch": 2.6, "learning_rate": 3.4813394216133946e-05, "loss": 2.28, "step": 8525 }, { "epoch": 2.6, "learning_rate": 3.4798173515981735e-05, "loss": 2.2658, "step": 8550 }, { "epoch": 2.61, "learning_rate": 3.478295281582953e-05, "loss": 2.3469, "step": 8575 }, { "epoch": 2.62, "learning_rate": 3.476773211567733e-05, "loss": 2.3742, "step": 8600 }, { "epoch": 2.63, "learning_rate": 3.475251141552512e-05, "loss": 2.2876, "step": 8625 }, { "epoch": 2.63, "learning_rate": 3.4737290715372914e-05, "loss": 2.3574, "step": 8650 }, { "epoch": 2.64, "learning_rate": 3.47220700152207e-05, "loss": 2.3034, "step": 8675 }, { "epoch": 2.65, "learning_rate": 3.47068493150685e-05, "loss": 2.4737, "step": 8700 }, { "epoch": 2.66, "learning_rate": 3.469162861491629e-05, "loss": 2.4021, "step": 8725 }, { "epoch": 2.66, "learning_rate": 3.467640791476408e-05, "loss": 2.3528, "step": 8750 }, { "epoch": 2.67, "learning_rate": 3.4661187214611874e-05, "loss": 2.3844, "step": 8775 }, { "epoch": 2.68, "learning_rate": 3.464596651445967e-05, "loss": 2.3548, "step": 8800 }, { "epoch": 2.69, "learning_rate": 3.463074581430746e-05, "loss": 2.3506, "step": 8825 }, { "epoch": 2.69, "learning_rate": 3.4615525114155256e-05, "loss": 2.2949, "step": 8850 }, { "epoch": 2.7, "learning_rate": 3.4600304414003046e-05, "loss": 2.4181, "step": 8875 }, { "epoch": 2.71, "learning_rate": 3.458508371385084e-05, "loss": 2.3763, "step": 8900 }, { "epoch": 2.72, "learning_rate": 3.456986301369863e-05, "loss": 2.3273, "step": 8925 }, { "epoch": 2.72, "learning_rate": 3.455464231354643e-05, "loss": 2.2915, "step": 8950 }, { "epoch": 2.73, "learning_rate": 3.453942161339422e-05, "loss": 2.2817, "step": 8975 }, { "epoch": 2.74, "learning_rate": 3.452420091324201e-05, "loss": 2.4281, "step": 9000 }, { "epoch": 2.75, "learning_rate": 3.450898021308981e-05, "loss": 2.3455, "step": 9025 }, { "epoch": 2.75, "learning_rate": 3.44937595129376e-05, "loss": 2.3443, "step": 9050 }, { "epoch": 2.76, "learning_rate": 3.447853881278539e-05, "loss": 2.3764, "step": 9075 }, { "epoch": 2.77, "learning_rate": 3.4463318112633185e-05, "loss": 2.3281, "step": 9100 }, { "epoch": 2.78, "learning_rate": 3.4448097412480974e-05, "loss": 2.3462, "step": 9125 }, { "epoch": 2.79, "learning_rate": 3.443287671232877e-05, "loss": 2.3027, "step": 9150 }, { "epoch": 2.79, "learning_rate": 3.441765601217656e-05, "loss": 2.3474, "step": 9175 }, { "epoch": 2.8, "learning_rate": 3.4402435312024356e-05, "loss": 2.4758, "step": 9200 }, { "epoch": 2.81, "learning_rate": 3.438721461187215e-05, "loss": 2.4488, "step": 9225 }, { "epoch": 2.82, "learning_rate": 3.437199391171994e-05, "loss": 2.2771, "step": 9250 }, { "epoch": 2.82, "learning_rate": 3.435677321156774e-05, "loss": 2.3559, "step": 9275 }, { "epoch": 2.83, "learning_rate": 3.434155251141553e-05, "loss": 2.4645, "step": 9300 }, { "epoch": 2.84, "learning_rate": 3.4326331811263324e-05, "loss": 2.3265, "step": 9325 }, { "epoch": 2.85, "learning_rate": 3.431111111111111e-05, "loss": 2.3063, "step": 9350 }, { "epoch": 2.85, "learning_rate": 3.42958904109589e-05, "loss": 2.3427, "step": 9375 }, { "epoch": 2.86, "learning_rate": 3.42806697108067e-05, "loss": 2.372, "step": 9400 }, { "epoch": 2.87, "learning_rate": 3.4265449010654495e-05, "loss": 2.4457, "step": 9425 }, { "epoch": 2.88, "learning_rate": 3.4250228310502284e-05, "loss": 2.4012, "step": 9450 }, { "epoch": 2.88, "learning_rate": 3.423500761035008e-05, "loss": 2.4501, "step": 9475 }, { "epoch": 2.89, "learning_rate": 3.421978691019787e-05, "loss": 2.4032, "step": 9500 }, { "epoch": 2.9, "learning_rate": 3.4204566210045666e-05, "loss": 2.2623, "step": 9525 }, { "epoch": 2.91, "learning_rate": 3.4189345509893456e-05, "loss": 2.3914, "step": 9550 }, { "epoch": 2.91, "learning_rate": 3.417412480974125e-05, "loss": 2.2566, "step": 9575 }, { "epoch": 2.92, "learning_rate": 3.415890410958904e-05, "loss": 2.3409, "step": 9600 }, { "epoch": 2.93, "learning_rate": 3.414368340943684e-05, "loss": 2.4972, "step": 9625 }, { "epoch": 2.94, "learning_rate": 3.4128462709284634e-05, "loss": 2.4752, "step": 9650 }, { "epoch": 2.95, "learning_rate": 3.4113242009132423e-05, "loss": 2.3794, "step": 9675 }, { "epoch": 2.95, "learning_rate": 3.40986301369863e-05, "loss": 2.3816, "step": 9700 }, { "epoch": 2.96, "learning_rate": 3.40834094368341e-05, "loss": 2.3872, "step": 9725 }, { "epoch": 2.97, "learning_rate": 3.406818873668189e-05, "loss": 2.3142, "step": 9750 }, { "epoch": 2.98, "learning_rate": 3.4052968036529685e-05, "loss": 2.3174, "step": 9775 }, { "epoch": 2.98, "learning_rate": 3.4037747336377474e-05, "loss": 2.2765, "step": 9800 }, { "epoch": 2.99, "learning_rate": 3.402252663622527e-05, "loss": 2.3761, "step": 9825 }, { "epoch": 3.0, "learning_rate": 3.400730593607306e-05, "loss": 2.2079, "step": 9850 }, { "epoch": 3.01, "learning_rate": 3.3992085235920856e-05, "loss": 2.4002, "step": 9875 }, { "epoch": 3.01, "learning_rate": 3.397686453576865e-05, "loss": 2.3128, "step": 9900 }, { "epoch": 3.02, "learning_rate": 3.396164383561644e-05, "loss": 2.3319, "step": 9925 }, { "epoch": 3.03, "learning_rate": 3.394642313546424e-05, "loss": 2.3102, "step": 9950 }, { "epoch": 3.04, "learning_rate": 3.393120243531203e-05, "loss": 2.3264, "step": 9975 }, { "epoch": 3.04, "learning_rate": 3.391598173515982e-05, "loss": 2.285, "step": 10000 }, { "epoch": 3.05, "learning_rate": 3.390076103500761e-05, "loss": 2.3575, "step": 10025 }, { "epoch": 3.06, "learning_rate": 3.38855403348554e-05, "loss": 2.3238, "step": 10050 }, { "epoch": 3.07, "learning_rate": 3.38703196347032e-05, "loss": 2.3473, "step": 10075 }, { "epoch": 3.07, "learning_rate": 3.3855098934550995e-05, "loss": 2.3622, "step": 10100 }, { "epoch": 3.08, "learning_rate": 3.3839878234398785e-05, "loss": 2.3262, "step": 10125 }, { "epoch": 3.09, "learning_rate": 3.382465753424658e-05, "loss": 2.2765, "step": 10150 }, { "epoch": 3.1, "learning_rate": 3.380943683409437e-05, "loss": 2.3726, "step": 10175 }, { "epoch": 3.11, "learning_rate": 3.3794216133942167e-05, "loss": 2.3951, "step": 10200 }, { "epoch": 3.11, "learning_rate": 3.3778995433789956e-05, "loss": 2.2942, "step": 10225 }, { "epoch": 3.12, "learning_rate": 3.3763774733637745e-05, "loss": 2.2152, "step": 10250 }, { "epoch": 3.13, "learning_rate": 3.374855403348554e-05, "loss": 2.2103, "step": 10275 }, { "epoch": 3.14, "learning_rate": 3.373333333333334e-05, "loss": 2.3017, "step": 10300 }, { "epoch": 3.14, "learning_rate": 3.371811263318113e-05, "loss": 2.34, "step": 10325 }, { "epoch": 3.15, "learning_rate": 3.3702891933028924e-05, "loss": 2.411, "step": 10350 }, { "epoch": 3.16, "learning_rate": 3.368767123287671e-05, "loss": 2.3578, "step": 10375 }, { "epoch": 3.17, "learning_rate": 3.367245053272451e-05, "loss": 2.3459, "step": 10400 }, { "epoch": 3.17, "learning_rate": 3.36572298325723e-05, "loss": 2.37, "step": 10425 }, { "epoch": 3.18, "learning_rate": 3.3642009132420095e-05, "loss": 2.2468, "step": 10450 }, { "epoch": 3.19, "learning_rate": 3.3626788432267884e-05, "loss": 2.347, "step": 10475 }, { "epoch": 3.2, "learning_rate": 3.361156773211568e-05, "loss": 2.2949, "step": 10500 }, { "epoch": 3.2, "learning_rate": 3.359634703196348e-05, "loss": 2.2398, "step": 10525 }, { "epoch": 3.21, "learning_rate": 3.3581126331811266e-05, "loss": 2.3733, "step": 10550 }, { "epoch": 3.22, "learning_rate": 3.356590563165906e-05, "loss": 2.3245, "step": 10575 }, { "epoch": 3.23, "learning_rate": 3.355068493150685e-05, "loss": 2.3472, "step": 10600 }, { "epoch": 3.23, "learning_rate": 3.353546423135464e-05, "loss": 2.2422, "step": 10625 }, { "epoch": 3.24, "learning_rate": 3.352024353120244e-05, "loss": 2.2771, "step": 10650 }, { "epoch": 3.25, "learning_rate": 3.350502283105023e-05, "loss": 2.3022, "step": 10675 }, { "epoch": 3.26, "learning_rate": 3.348980213089802e-05, "loss": 2.363, "step": 10700 }, { "epoch": 3.26, "learning_rate": 3.347458143074582e-05, "loss": 2.3812, "step": 10725 }, { "epoch": 3.27, "learning_rate": 3.345936073059361e-05, "loss": 2.3246, "step": 10750 }, { "epoch": 3.28, "learning_rate": 3.3444140030441405e-05, "loss": 2.2674, "step": 10775 }, { "epoch": 3.29, "learning_rate": 3.3428919330289195e-05, "loss": 2.2517, "step": 10800 }, { "epoch": 3.3, "learning_rate": 3.341369863013699e-05, "loss": 2.3662, "step": 10825 }, { "epoch": 3.3, "learning_rate": 3.339847792998478e-05, "loss": 2.3391, "step": 10850 }, { "epoch": 3.31, "learning_rate": 3.338325722983257e-05, "loss": 2.3473, "step": 10875 }, { "epoch": 3.32, "learning_rate": 3.336803652968037e-05, "loss": 2.34, "step": 10900 }, { "epoch": 3.33, "learning_rate": 3.335281582952816e-05, "loss": 2.3697, "step": 10925 }, { "epoch": 3.33, "learning_rate": 3.333759512937595e-05, "loss": 2.3801, "step": 10950 }, { "epoch": 3.34, "learning_rate": 3.332237442922375e-05, "loss": 2.2578, "step": 10975 }, { "epoch": 3.35, "learning_rate": 3.330715372907154e-05, "loss": 2.287, "step": 11000 }, { "epoch": 3.36, "learning_rate": 3.3291933028919334e-05, "loss": 2.3065, "step": 11025 }, { "epoch": 3.36, "learning_rate": 3.327671232876712e-05, "loss": 2.3242, "step": 11050 }, { "epoch": 3.37, "learning_rate": 3.326149162861492e-05, "loss": 2.381, "step": 11075 }, { "epoch": 3.38, "learning_rate": 3.3246270928462716e-05, "loss": 2.3044, "step": 11100 }, { "epoch": 3.39, "learning_rate": 3.3231050228310505e-05, "loss": 2.3529, "step": 11125 }, { "epoch": 3.39, "learning_rate": 3.32158295281583e-05, "loss": 2.3205, "step": 11150 }, { "epoch": 3.4, "learning_rate": 3.320060882800609e-05, "loss": 2.2909, "step": 11175 }, { "epoch": 3.41, "learning_rate": 3.318538812785389e-05, "loss": 2.2733, "step": 11200 }, { "epoch": 3.42, "learning_rate": 3.3170167427701676e-05, "loss": 2.3524, "step": 11225 }, { "epoch": 3.42, "learning_rate": 3.3154946727549466e-05, "loss": 2.4237, "step": 11250 }, { "epoch": 3.43, "learning_rate": 3.313972602739726e-05, "loss": 2.3506, "step": 11275 }, { "epoch": 3.44, "learning_rate": 3.312450532724506e-05, "loss": 2.239, "step": 11300 }, { "epoch": 3.45, "learning_rate": 3.310928462709285e-05, "loss": 2.2446, "step": 11325 }, { "epoch": 3.46, "learning_rate": 3.3094063926940644e-05, "loss": 2.4092, "step": 11350 }, { "epoch": 3.46, "learning_rate": 3.307884322678843e-05, "loss": 2.2258, "step": 11375 }, { "epoch": 3.47, "learning_rate": 3.306362252663623e-05, "loss": 2.3767, "step": 11400 }, { "epoch": 3.48, "learning_rate": 3.304840182648402e-05, "loss": 2.3656, "step": 11425 }, { "epoch": 3.49, "learning_rate": 3.3033181126331815e-05, "loss": 2.3565, "step": 11450 }, { "epoch": 3.49, "learning_rate": 3.3017960426179605e-05, "loss": 2.3451, "step": 11475 }, { "epoch": 3.5, "learning_rate": 3.30027397260274e-05, "loss": 2.3693, "step": 11500 }, { "epoch": 3.51, "learning_rate": 3.29875190258752e-05, "loss": 2.2994, "step": 11525 }, { "epoch": 3.52, "learning_rate": 3.297229832572299e-05, "loss": 2.3294, "step": 11550 }, { "epoch": 3.52, "learning_rate": 3.2957077625570776e-05, "loss": 2.3281, "step": 11575 }, { "epoch": 3.53, "learning_rate": 3.294185692541857e-05, "loss": 2.2991, "step": 11600 }, { "epoch": 3.54, "learning_rate": 3.292663622526636e-05, "loss": 2.4062, "step": 11625 }, { "epoch": 3.55, "learning_rate": 3.291141552511416e-05, "loss": 2.2678, "step": 11650 }, { "epoch": 3.55, "learning_rate": 3.289619482496195e-05, "loss": 2.3024, "step": 11675 }, { "epoch": 3.56, "learning_rate": 3.2880974124809744e-05, "loss": 2.2003, "step": 11700 }, { "epoch": 3.57, "learning_rate": 3.286575342465754e-05, "loss": 2.3133, "step": 11725 }, { "epoch": 3.58, "learning_rate": 3.285053272450533e-05, "loss": 2.347, "step": 11750 }, { "epoch": 3.58, "learning_rate": 3.2835312024353126e-05, "loss": 2.2283, "step": 11775 }, { "epoch": 3.59, "learning_rate": 3.2820091324200915e-05, "loss": 2.293, "step": 11800 }, { "epoch": 3.6, "learning_rate": 3.280487062404871e-05, "loss": 2.3133, "step": 11825 }, { "epoch": 3.61, "learning_rate": 3.27896499238965e-05, "loss": 2.306, "step": 11850 }, { "epoch": 3.61, "learning_rate": 3.277442922374429e-05, "loss": 2.3111, "step": 11875 }, { "epoch": 3.62, "learning_rate": 3.275920852359209e-05, "loss": 2.2691, "step": 11900 }, { "epoch": 3.63, "learning_rate": 3.274398782343988e-05, "loss": 2.4566, "step": 11925 }, { "epoch": 3.64, "learning_rate": 3.272876712328767e-05, "loss": 2.3288, "step": 11950 }, { "epoch": 3.65, "learning_rate": 3.271354642313547e-05, "loss": 2.2988, "step": 11975 }, { "epoch": 3.65, "learning_rate": 3.269832572298326e-05, "loss": 2.2849, "step": 12000 }, { "epoch": 3.66, "learning_rate": 3.2683105022831054e-05, "loss": 2.2855, "step": 12025 }, { "epoch": 3.67, "learning_rate": 3.2667884322678843e-05, "loss": 2.3684, "step": 12050 }, { "epoch": 3.68, "learning_rate": 3.265266362252664e-05, "loss": 2.2872, "step": 12075 }, { "epoch": 3.68, "learning_rate": 3.2637442922374436e-05, "loss": 2.2523, "step": 12100 }, { "epoch": 3.69, "learning_rate": 3.2622222222222225e-05, "loss": 2.1726, "step": 12125 }, { "epoch": 3.7, "learning_rate": 3.260700152207002e-05, "loss": 2.4131, "step": 12150 }, { "epoch": 3.71, "learning_rate": 3.259178082191781e-05, "loss": 2.1719, "step": 12175 }, { "epoch": 3.71, "learning_rate": 3.25765601217656e-05, "loss": 2.3665, "step": 12200 }, { "epoch": 3.72, "learning_rate": 3.25613394216134e-05, "loss": 2.3097, "step": 12225 }, { "epoch": 3.73, "learning_rate": 3.2546118721461186e-05, "loss": 2.3808, "step": 12250 }, { "epoch": 3.74, "learning_rate": 3.253089802130898e-05, "loss": 2.3309, "step": 12275 }, { "epoch": 3.74, "learning_rate": 3.251567732115678e-05, "loss": 2.2463, "step": 12300 }, { "epoch": 3.75, "learning_rate": 3.250045662100457e-05, "loss": 2.2689, "step": 12325 }, { "epoch": 3.76, "learning_rate": 3.2485235920852364e-05, "loss": 2.301, "step": 12350 }, { "epoch": 3.77, "learning_rate": 3.2470015220700154e-05, "loss": 2.2508, "step": 12375 }, { "epoch": 3.77, "learning_rate": 3.245479452054795e-05, "loss": 2.2932, "step": 12400 }, { "epoch": 3.78, "learning_rate": 3.243957382039574e-05, "loss": 2.2948, "step": 12425 }, { "epoch": 3.79, "learning_rate": 3.2424353120243536e-05, "loss": 2.3646, "step": 12450 }, { "epoch": 3.8, "learning_rate": 3.2409132420091325e-05, "loss": 2.279, "step": 12475 }, { "epoch": 3.81, "learning_rate": 3.239391171993912e-05, "loss": 2.2907, "step": 12500 }, { "epoch": 3.81, "learning_rate": 3.2379299847793e-05, "loss": 2.2119, "step": 12525 }, { "epoch": 3.82, "learning_rate": 3.236407914764079e-05, "loss": 2.24, "step": 12550 }, { "epoch": 3.83, "learning_rate": 3.2348858447488587e-05, "loss": 2.3259, "step": 12575 }, { "epoch": 3.84, "learning_rate": 3.233363774733638e-05, "loss": 2.2823, "step": 12600 }, { "epoch": 3.84, "learning_rate": 3.231841704718417e-05, "loss": 2.3152, "step": 12625 }, { "epoch": 3.85, "learning_rate": 3.230319634703197e-05, "loss": 2.2235, "step": 12650 }, { "epoch": 3.86, "learning_rate": 3.228797564687976e-05, "loss": 2.2921, "step": 12675 }, { "epoch": 3.87, "learning_rate": 3.2272754946727554e-05, "loss": 2.2987, "step": 12700 }, { "epoch": 3.87, "learning_rate": 3.2257534246575344e-05, "loss": 2.2525, "step": 12725 }, { "epoch": 3.88, "learning_rate": 3.224231354642313e-05, "loss": 2.31, "step": 12750 }, { "epoch": 3.89, "learning_rate": 3.2227092846270936e-05, "loss": 2.2679, "step": 12775 }, { "epoch": 3.9, "learning_rate": 3.2211872146118725e-05, "loss": 2.3119, "step": 12800 }, { "epoch": 3.9, "learning_rate": 3.2196651445966515e-05, "loss": 2.2647, "step": 12825 }, { "epoch": 3.91, "learning_rate": 3.218143074581431e-05, "loss": 2.2724, "step": 12850 }, { "epoch": 3.92, "learning_rate": 3.21662100456621e-05, "loss": 2.1599, "step": 12875 }, { "epoch": 3.93, "learning_rate": 3.21509893455099e-05, "loss": 2.307, "step": 12900 }, { "epoch": 3.93, "learning_rate": 3.2135768645357686e-05, "loss": 2.282, "step": 12925 }, { "epoch": 3.94, "learning_rate": 3.212054794520548e-05, "loss": 2.2976, "step": 12950 }, { "epoch": 3.95, "learning_rate": 3.210532724505328e-05, "loss": 2.2577, "step": 12975 }, { "epoch": 3.96, "learning_rate": 3.209010654490107e-05, "loss": 2.3745, "step": 13000 }, { "epoch": 3.96, "learning_rate": 3.2074885844748864e-05, "loss": 2.3284, "step": 13025 }, { "epoch": 3.97, "learning_rate": 3.2059665144596654e-05, "loss": 2.2928, "step": 13050 }, { "epoch": 3.98, "learning_rate": 3.204444444444445e-05, "loss": 2.3184, "step": 13075 }, { "epoch": 3.99, "learning_rate": 3.202922374429224e-05, "loss": 2.377, "step": 13100 }, { "epoch": 4.0, "learning_rate": 3.201400304414003e-05, "loss": 2.2414, "step": 13125 }, { "epoch": 4.0, "learning_rate": 3.1998782343987825e-05, "loss": 2.2791, "step": 13150 }, { "epoch": 4.01, "learning_rate": 3.198356164383562e-05, "loss": 2.2645, "step": 13175 }, { "epoch": 4.02, "learning_rate": 3.196834094368341e-05, "loss": 2.2783, "step": 13200 }, { "epoch": 4.03, "learning_rate": 3.195312024353121e-05, "loss": 2.2995, "step": 13225 }, { "epoch": 4.03, "learning_rate": 3.1937899543378997e-05, "loss": 2.3162, "step": 13250 }, { "epoch": 4.04, "learning_rate": 3.192267884322679e-05, "loss": 2.273, "step": 13275 }, { "epoch": 4.05, "learning_rate": 3.190745814307458e-05, "loss": 2.1421, "step": 13300 }, { "epoch": 4.06, "learning_rate": 3.189223744292238e-05, "loss": 2.2324, "step": 13325 }, { "epoch": 4.06, "learning_rate": 3.187701674277017e-05, "loss": 2.148, "step": 13350 }, { "epoch": 4.07, "learning_rate": 3.1861796042617964e-05, "loss": 2.2015, "step": 13375 }, { "epoch": 4.08, "learning_rate": 3.184657534246576e-05, "loss": 2.2374, "step": 13400 }, { "epoch": 4.09, "learning_rate": 3.183135464231355e-05, "loss": 2.172, "step": 13425 }, { "epoch": 4.09, "learning_rate": 3.181613394216134e-05, "loss": 2.2591, "step": 13450 }, { "epoch": 4.1, "learning_rate": 3.1800913242009136e-05, "loss": 2.3048, "step": 13475 }, { "epoch": 4.11, "learning_rate": 3.1785692541856925e-05, "loss": 2.2699, "step": 13500 }, { "epoch": 4.12, "learning_rate": 3.177047184170472e-05, "loss": 2.2023, "step": 13525 }, { "epoch": 4.12, "learning_rate": 3.175525114155251e-05, "loss": 2.2619, "step": 13550 }, { "epoch": 4.13, "learning_rate": 3.174003044140031e-05, "loss": 2.2799, "step": 13575 }, { "epoch": 4.14, "learning_rate": 3.17248097412481e-05, "loss": 2.2879, "step": 13600 }, { "epoch": 4.15, "learning_rate": 3.170958904109589e-05, "loss": 2.2305, "step": 13625 }, { "epoch": 4.16, "learning_rate": 3.169436834094369e-05, "loss": 2.2701, "step": 13650 }, { "epoch": 4.16, "learning_rate": 3.167914764079148e-05, "loss": 2.1813, "step": 13675 }, { "epoch": 4.17, "learning_rate": 3.1663926940639274e-05, "loss": 2.239, "step": 13700 }, { "epoch": 4.18, "learning_rate": 3.1648706240487064e-05, "loss": 2.1826, "step": 13725 }, { "epoch": 4.19, "learning_rate": 3.1633485540334853e-05, "loss": 2.2252, "step": 13750 }, { "epoch": 4.19, "learning_rate": 3.1618264840182656e-05, "loss": 2.1614, "step": 13775 }, { "epoch": 4.2, "learning_rate": 3.1603044140030446e-05, "loss": 2.2616, "step": 13800 }, { "epoch": 4.21, "learning_rate": 3.1587823439878235e-05, "loss": 2.174, "step": 13825 }, { "epoch": 4.22, "learning_rate": 3.157260273972603e-05, "loss": 2.176, "step": 13850 }, { "epoch": 4.22, "learning_rate": 3.155738203957382e-05, "loss": 2.3032, "step": 13875 }, { "epoch": 4.23, "learning_rate": 3.154216133942162e-05, "loss": 2.3267, "step": 13900 }, { "epoch": 4.24, "learning_rate": 3.152694063926941e-05, "loss": 2.2344, "step": 13925 }, { "epoch": 4.25, "learning_rate": 3.15117199391172e-05, "loss": 2.2329, "step": 13950 }, { "epoch": 4.25, "learning_rate": 3.1496499238965e-05, "loss": 2.2776, "step": 13975 }, { "epoch": 4.26, "learning_rate": 3.148127853881279e-05, "loss": 2.2616, "step": 14000 }, { "epoch": 4.27, "learning_rate": 3.1466057838660585e-05, "loss": 2.2726, "step": 14025 }, { "epoch": 4.28, "learning_rate": 3.1450837138508374e-05, "loss": 2.2973, "step": 14050 }, { "epoch": 4.28, "learning_rate": 3.1435616438356164e-05, "loss": 2.4038, "step": 14075 }, { "epoch": 4.29, "learning_rate": 3.142039573820396e-05, "loss": 2.1422, "step": 14100 }, { "epoch": 4.3, "learning_rate": 3.140517503805175e-05, "loss": 2.2385, "step": 14125 }, { "epoch": 4.31, "learning_rate": 3.1389954337899546e-05, "loss": 2.3466, "step": 14150 }, { "epoch": 4.32, "learning_rate": 3.137473363774734e-05, "loss": 2.3624, "step": 14175 }, { "epoch": 4.32, "learning_rate": 3.135951293759513e-05, "loss": 2.1896, "step": 14200 }, { "epoch": 4.33, "learning_rate": 3.134429223744293e-05, "loss": 2.2879, "step": 14225 }, { "epoch": 4.34, "learning_rate": 3.132907153729072e-05, "loss": 2.2648, "step": 14250 }, { "epoch": 4.35, "learning_rate": 3.131385083713851e-05, "loss": 2.1587, "step": 14275 }, { "epoch": 4.35, "learning_rate": 3.12986301369863e-05, "loss": 2.2501, "step": 14300 }, { "epoch": 4.36, "learning_rate": 3.12834094368341e-05, "loss": 2.3226, "step": 14325 }, { "epoch": 4.37, "learning_rate": 3.126818873668189e-05, "loss": 2.2835, "step": 14350 }, { "epoch": 4.38, "learning_rate": 3.1252968036529685e-05, "loss": 2.1606, "step": 14375 }, { "epoch": 4.38, "learning_rate": 3.123774733637748e-05, "loss": 2.3472, "step": 14400 }, { "epoch": 4.39, "learning_rate": 3.122252663622527e-05, "loss": 2.3695, "step": 14425 }, { "epoch": 4.4, "learning_rate": 3.120730593607306e-05, "loss": 2.3861, "step": 14450 }, { "epoch": 4.41, "learning_rate": 3.1192085235920856e-05, "loss": 2.169, "step": 14475 }, { "epoch": 4.41, "learning_rate": 3.1176864535768645e-05, "loss": 2.2452, "step": 14500 }, { "epoch": 4.42, "learning_rate": 3.116164383561644e-05, "loss": 2.3446, "step": 14525 }, { "epoch": 4.43, "learning_rate": 3.114642313546423e-05, "loss": 2.265, "step": 14550 }, { "epoch": 4.44, "learning_rate": 3.113120243531203e-05, "loss": 2.3011, "step": 14575 }, { "epoch": 4.44, "learning_rate": 3.1115981735159824e-05, "loss": 2.1554, "step": 14600 }, { "epoch": 4.45, "learning_rate": 3.110076103500761e-05, "loss": 2.3185, "step": 14625 }, { "epoch": 4.46, "learning_rate": 3.108554033485541e-05, "loss": 2.1786, "step": 14650 }, { "epoch": 4.47, "learning_rate": 3.10703196347032e-05, "loss": 2.3914, "step": 14675 }, { "epoch": 4.47, "learning_rate": 3.105509893455099e-05, "loss": 2.2981, "step": 14700 }, { "epoch": 4.48, "learning_rate": 3.1039878234398784e-05, "loss": 2.1446, "step": 14725 }, { "epoch": 4.49, "learning_rate": 3.1024657534246574e-05, "loss": 2.2635, "step": 14750 }, { "epoch": 4.5, "learning_rate": 3.100943683409437e-05, "loss": 2.3167, "step": 14775 }, { "epoch": 4.51, "learning_rate": 3.0994216133942166e-05, "loss": 2.161, "step": 14800 }, { "epoch": 4.51, "learning_rate": 3.0978995433789956e-05, "loss": 2.2632, "step": 14825 }, { "epoch": 4.52, "learning_rate": 3.096377473363775e-05, "loss": 2.2193, "step": 14850 }, { "epoch": 4.53, "learning_rate": 3.094855403348554e-05, "loss": 2.2079, "step": 14875 }, { "epoch": 4.54, "learning_rate": 3.093333333333334e-05, "loss": 2.2739, "step": 14900 }, { "epoch": 4.54, "learning_rate": 3.091811263318113e-05, "loss": 2.2497, "step": 14925 }, { "epoch": 4.55, "learning_rate": 3.090289193302892e-05, "loss": 2.3314, "step": 14950 }, { "epoch": 4.56, "learning_rate": 3.088767123287672e-05, "loss": 2.14, "step": 14975 }, { "epoch": 4.57, "learning_rate": 3.087245053272451e-05, "loss": 2.2944, "step": 15000 }, { "epoch": 4.57, "learning_rate": 3.0857229832572305e-05, "loss": 2.2942, "step": 15025 }, { "epoch": 4.58, "learning_rate": 3.0842009132420095e-05, "loss": 2.2938, "step": 15050 }, { "epoch": 4.59, "learning_rate": 3.0826788432267884e-05, "loss": 2.2439, "step": 15075 }, { "epoch": 4.6, "learning_rate": 3.081156773211568e-05, "loss": 2.3085, "step": 15100 }, { "epoch": 4.6, "learning_rate": 3.079634703196347e-05, "loss": 2.2249, "step": 15125 }, { "epoch": 4.61, "learning_rate": 3.0781126331811266e-05, "loss": 2.2429, "step": 15150 }, { "epoch": 4.62, "learning_rate": 3.076590563165906e-05, "loss": 2.2207, "step": 15175 }, { "epoch": 4.63, "learning_rate": 3.075068493150685e-05, "loss": 2.2028, "step": 15200 }, { "epoch": 4.63, "learning_rate": 3.073546423135465e-05, "loss": 2.1323, "step": 15225 }, { "epoch": 4.64, "learning_rate": 3.072024353120244e-05, "loss": 2.2831, "step": 15250 }, { "epoch": 4.65, "learning_rate": 3.0705022831050234e-05, "loss": 2.2963, "step": 15275 }, { "epoch": 4.66, "learning_rate": 3.068980213089802e-05, "loss": 2.29, "step": 15300 }, { "epoch": 4.67, "learning_rate": 3.067458143074581e-05, "loss": 2.2412, "step": 15325 }, { "epoch": 4.67, "learning_rate": 3.065936073059361e-05, "loss": 2.3399, "step": 15350 }, { "epoch": 4.68, "learning_rate": 3.0644140030441405e-05, "loss": 2.1743, "step": 15375 }, { "epoch": 4.69, "learning_rate": 3.0628919330289194e-05, "loss": 2.2421, "step": 15400 }, { "epoch": 4.7, "learning_rate": 3.061369863013699e-05, "loss": 2.337, "step": 15425 }, { "epoch": 4.7, "learning_rate": 3.059847792998478e-05, "loss": 2.3268, "step": 15450 }, { "epoch": 4.71, "learning_rate": 3.0583257229832576e-05, "loss": 2.2685, "step": 15475 }, { "epoch": 4.72, "learning_rate": 3.0568036529680366e-05, "loss": 2.2797, "step": 15500 }, { "epoch": 4.73, "learning_rate": 3.055281582952816e-05, "loss": 2.27, "step": 15525 }, { "epoch": 4.73, "learning_rate": 3.053759512937595e-05, "loss": 2.2245, "step": 15550 }, { "epoch": 4.74, "learning_rate": 3.052237442922375e-05, "loss": 2.25, "step": 15575 }, { "epoch": 4.75, "learning_rate": 3.050715372907154e-05, "loss": 2.1833, "step": 15600 }, { "epoch": 4.76, "learning_rate": 3.0491933028919333e-05, "loss": 2.2166, "step": 15625 }, { "epoch": 4.76, "learning_rate": 3.0476712328767126e-05, "loss": 2.3001, "step": 15650 }, { "epoch": 4.77, "learning_rate": 3.046149162861492e-05, "loss": 2.2848, "step": 15675 }, { "epoch": 4.78, "learning_rate": 3.0446270928462712e-05, "loss": 2.1487, "step": 15700 }, { "epoch": 4.79, "learning_rate": 3.0431050228310505e-05, "loss": 2.1833, "step": 15725 }, { "epoch": 4.79, "learning_rate": 3.0415829528158298e-05, "loss": 2.1392, "step": 15750 }, { "epoch": 4.8, "learning_rate": 3.0400608828006094e-05, "loss": 2.2441, "step": 15775 }, { "epoch": 4.81, "learning_rate": 3.0385388127853887e-05, "loss": 2.2952, "step": 15800 }, { "epoch": 4.82, "learning_rate": 3.037016742770168e-05, "loss": 2.1949, "step": 15825 }, { "epoch": 4.82, "learning_rate": 3.035494672754947e-05, "loss": 2.2957, "step": 15850 }, { "epoch": 4.83, "learning_rate": 3.0339726027397262e-05, "loss": 2.1833, "step": 15875 }, { "epoch": 4.84, "learning_rate": 3.0324505327245055e-05, "loss": 2.2777, "step": 15900 }, { "epoch": 4.85, "learning_rate": 3.0309284627092847e-05, "loss": 2.3056, "step": 15925 }, { "epoch": 4.86, "learning_rate": 3.029406392694064e-05, "loss": 2.2749, "step": 15950 }, { "epoch": 4.86, "learning_rate": 3.0278843226788436e-05, "loss": 2.3683, "step": 15975 }, { "epoch": 4.87, "learning_rate": 3.026362252663623e-05, "loss": 2.3215, "step": 16000 }, { "epoch": 4.88, "learning_rate": 3.0248401826484022e-05, "loss": 2.1942, "step": 16025 }, { "epoch": 4.89, "learning_rate": 3.0233181126331815e-05, "loss": 2.17, "step": 16050 }, { "epoch": 4.89, "learning_rate": 3.0217960426179608e-05, "loss": 2.2755, "step": 16075 }, { "epoch": 4.9, "learning_rate": 3.02027397260274e-05, "loss": 2.2263, "step": 16100 }, { "epoch": 4.91, "learning_rate": 3.018751902587519e-05, "loss": 2.2477, "step": 16125 }, { "epoch": 4.92, "learning_rate": 3.0172298325722983e-05, "loss": 2.3542, "step": 16150 }, { "epoch": 4.92, "learning_rate": 3.0157077625570783e-05, "loss": 2.2229, "step": 16175 }, { "epoch": 4.93, "learning_rate": 3.0141856925418572e-05, "loss": 2.1218, "step": 16200 }, { "epoch": 4.94, "learning_rate": 3.0126636225266365e-05, "loss": 2.2416, "step": 16225 }, { "epoch": 4.95, "learning_rate": 3.0112024353120244e-05, "loss": 2.2834, "step": 16250 }, { "epoch": 4.95, "learning_rate": 3.009680365296804e-05, "loss": 2.2788, "step": 16275 }, { "epoch": 4.96, "learning_rate": 3.0081582952815833e-05, "loss": 2.1984, "step": 16300 }, { "epoch": 4.97, "learning_rate": 3.0066362252663626e-05, "loss": 2.2963, "step": 16325 }, { "epoch": 4.98, "learning_rate": 3.005114155251142e-05, "loss": 2.2688, "step": 16350 }, { "epoch": 4.98, "learning_rate": 3.0035920852359212e-05, "loss": 2.2477, "step": 16375 }, { "epoch": 4.99, "learning_rate": 3.0020700152207e-05, "loss": 2.2984, "step": 16400 }, { "epoch": 5.0, "learning_rate": 3.0005479452054794e-05, "loss": 2.1656, "step": 16425 }, { "epoch": 5.01, "learning_rate": 2.9990258751902587e-05, "loss": 2.2591, "step": 16450 }, { "epoch": 5.02, "learning_rate": 2.9975038051750383e-05, "loss": 2.129, "step": 16475 }, { "epoch": 5.02, "learning_rate": 2.9959817351598176e-05, "loss": 2.2228, "step": 16500 }, { "epoch": 5.03, "learning_rate": 2.994459665144597e-05, "loss": 2.2753, "step": 16525 }, { "epoch": 5.04, "learning_rate": 2.9929375951293762e-05, "loss": 2.1261, "step": 16550 }, { "epoch": 5.05, "learning_rate": 2.9914155251141555e-05, "loss": 2.194, "step": 16575 }, { "epoch": 5.05, "learning_rate": 2.9898934550989348e-05, "loss": 2.2243, "step": 16600 }, { "epoch": 5.06, "learning_rate": 2.988371385083714e-05, "loss": 2.1936, "step": 16625 }, { "epoch": 5.07, "learning_rate": 2.9868493150684933e-05, "loss": 2.1148, "step": 16650 }, { "epoch": 5.08, "learning_rate": 2.985327245053273e-05, "loss": 2.215, "step": 16675 }, { "epoch": 5.08, "learning_rate": 2.9838051750380522e-05, "loss": 2.192, "step": 16700 }, { "epoch": 5.09, "learning_rate": 2.9822831050228315e-05, "loss": 2.1374, "step": 16725 }, { "epoch": 5.1, "learning_rate": 2.9807610350076105e-05, "loss": 2.202, "step": 16750 }, { "epoch": 5.11, "learning_rate": 2.9792389649923897e-05, "loss": 2.3239, "step": 16775 }, { "epoch": 5.11, "learning_rate": 2.977716894977169e-05, "loss": 2.2086, "step": 16800 }, { "epoch": 5.12, "learning_rate": 2.9761948249619483e-05, "loss": 2.2127, "step": 16825 }, { "epoch": 5.13, "learning_rate": 2.9746727549467276e-05, "loss": 2.1751, "step": 16850 }, { "epoch": 5.14, "learning_rate": 2.9731506849315072e-05, "loss": 2.2261, "step": 16875 }, { "epoch": 5.14, "learning_rate": 2.9716286149162865e-05, "loss": 2.3385, "step": 16900 }, { "epoch": 5.15, "learning_rate": 2.9701065449010658e-05, "loss": 2.2147, "step": 16925 }, { "epoch": 5.16, "learning_rate": 2.968584474885845e-05, "loss": 2.2131, "step": 16950 }, { "epoch": 5.17, "learning_rate": 2.9670624048706244e-05, "loss": 2.1689, "step": 16975 }, { "epoch": 5.18, "learning_rate": 2.9655403348554036e-05, "loss": 2.1745, "step": 17000 }, { "epoch": 5.18, "learning_rate": 2.9640182648401826e-05, "loss": 2.1453, "step": 17025 }, { "epoch": 5.19, "learning_rate": 2.962496194824962e-05, "loss": 2.2432, "step": 17050 }, { "epoch": 5.2, "learning_rate": 2.9609741248097418e-05, "loss": 2.2764, "step": 17075 }, { "epoch": 5.21, "learning_rate": 2.9594520547945208e-05, "loss": 2.2671, "step": 17100 }, { "epoch": 5.21, "learning_rate": 2.9579299847793e-05, "loss": 2.2056, "step": 17125 }, { "epoch": 5.22, "learning_rate": 2.9564079147640793e-05, "loss": 2.2265, "step": 17150 }, { "epoch": 5.23, "learning_rate": 2.9549467275494676e-05, "loss": 2.2748, "step": 17175 }, { "epoch": 5.24, "learning_rate": 2.953424657534247e-05, "loss": 2.1414, "step": 17200 }, { "epoch": 5.24, "learning_rate": 2.9519025875190262e-05, "loss": 2.218, "step": 17225 }, { "epoch": 5.25, "learning_rate": 2.9503805175038055e-05, "loss": 2.2895, "step": 17250 }, { "epoch": 5.26, "learning_rate": 2.9488584474885848e-05, "loss": 2.2456, "step": 17275 }, { "epoch": 5.27, "learning_rate": 2.9473363774733637e-05, "loss": 2.1579, "step": 17300 }, { "epoch": 5.27, "learning_rate": 2.945814307458143e-05, "loss": 2.1798, "step": 17325 }, { "epoch": 5.28, "learning_rate": 2.944292237442923e-05, "loss": 2.2155, "step": 17350 }, { "epoch": 5.29, "learning_rate": 2.942770167427702e-05, "loss": 2.2606, "step": 17375 }, { "epoch": 5.3, "learning_rate": 2.9412480974124812e-05, "loss": 2.1556, "step": 17400 }, { "epoch": 5.3, "learning_rate": 2.9397260273972605e-05, "loss": 2.2074, "step": 17425 }, { "epoch": 5.31, "learning_rate": 2.9382039573820398e-05, "loss": 2.2237, "step": 17450 }, { "epoch": 5.32, "learning_rate": 2.936681887366819e-05, "loss": 2.1979, "step": 17475 }, { "epoch": 5.33, "learning_rate": 2.9351598173515983e-05, "loss": 2.2213, "step": 17500 }, { "epoch": 5.33, "learning_rate": 2.9336377473363776e-05, "loss": 2.2229, "step": 17525 }, { "epoch": 5.34, "learning_rate": 2.9321156773211572e-05, "loss": 2.2467, "step": 17550 }, { "epoch": 5.35, "learning_rate": 2.9305936073059365e-05, "loss": 2.3175, "step": 17575 }, { "epoch": 5.36, "learning_rate": 2.9290715372907158e-05, "loss": 2.105, "step": 17600 }, { "epoch": 5.37, "learning_rate": 2.927549467275495e-05, "loss": 2.2018, "step": 17625 }, { "epoch": 5.37, "learning_rate": 2.926027397260274e-05, "loss": 2.0804, "step": 17650 }, { "epoch": 5.38, "learning_rate": 2.9245053272450533e-05, "loss": 2.2365, "step": 17675 }, { "epoch": 5.39, "learning_rate": 2.9229832572298326e-05, "loss": 2.1185, "step": 17700 }, { "epoch": 5.4, "learning_rate": 2.921461187214612e-05, "loss": 2.1344, "step": 17725 }, { "epoch": 5.4, "learning_rate": 2.9199391171993915e-05, "loss": 2.2028, "step": 17750 }, { "epoch": 5.41, "learning_rate": 2.9184170471841708e-05, "loss": 2.2562, "step": 17775 }, { "epoch": 5.42, "learning_rate": 2.91689497716895e-05, "loss": 2.2123, "step": 17800 }, { "epoch": 5.43, "learning_rate": 2.9153729071537294e-05, "loss": 2.2453, "step": 17825 }, { "epoch": 5.43, "learning_rate": 2.9138508371385086e-05, "loss": 2.2053, "step": 17850 }, { "epoch": 5.44, "learning_rate": 2.912328767123288e-05, "loss": 2.1562, "step": 17875 }, { "epoch": 5.45, "learning_rate": 2.9108066971080672e-05, "loss": 2.0943, "step": 17900 }, { "epoch": 5.46, "learning_rate": 2.909284627092846e-05, "loss": 2.1485, "step": 17925 }, { "epoch": 5.46, "learning_rate": 2.907762557077626e-05, "loss": 2.2019, "step": 17950 }, { "epoch": 5.47, "learning_rate": 2.9062404870624054e-05, "loss": 2.1521, "step": 17975 }, { "epoch": 5.48, "learning_rate": 2.9047184170471843e-05, "loss": 2.2207, "step": 18000 }, { "epoch": 5.49, "learning_rate": 2.9031963470319636e-05, "loss": 2.1452, "step": 18025 }, { "epoch": 5.49, "learning_rate": 2.901674277016743e-05, "loss": 2.1539, "step": 18050 }, { "epoch": 5.5, "learning_rate": 2.9001522070015222e-05, "loss": 2.1437, "step": 18075 }, { "epoch": 5.51, "learning_rate": 2.8986301369863015e-05, "loss": 2.1719, "step": 18100 }, { "epoch": 5.52, "learning_rate": 2.8971080669710808e-05, "loss": 2.1572, "step": 18125 }, { "epoch": 5.53, "learning_rate": 2.8955859969558604e-05, "loss": 2.2169, "step": 18150 }, { "epoch": 5.53, "learning_rate": 2.8940639269406397e-05, "loss": 2.2541, "step": 18175 }, { "epoch": 5.54, "learning_rate": 2.892541856925419e-05, "loss": 2.2111, "step": 18200 }, { "epoch": 5.55, "learning_rate": 2.8910197869101982e-05, "loss": 2.2755, "step": 18225 }, { "epoch": 5.56, "learning_rate": 2.8894977168949775e-05, "loss": 2.131, "step": 18250 }, { "epoch": 5.56, "learning_rate": 2.8879756468797565e-05, "loss": 2.1832, "step": 18275 }, { "epoch": 5.57, "learning_rate": 2.8864535768645357e-05, "loss": 2.2637, "step": 18300 }, { "epoch": 5.58, "learning_rate": 2.884931506849315e-05, "loss": 2.3163, "step": 18325 }, { "epoch": 5.59, "learning_rate": 2.8834094368340947e-05, "loss": 2.2512, "step": 18350 }, { "epoch": 5.59, "learning_rate": 2.881887366818874e-05, "loss": 2.2196, "step": 18375 }, { "epoch": 5.6, "learning_rate": 2.8803652968036532e-05, "loss": 2.1694, "step": 18400 }, { "epoch": 5.61, "learning_rate": 2.8788432267884325e-05, "loss": 2.1062, "step": 18425 }, { "epoch": 5.62, "learning_rate": 2.8773211567732118e-05, "loss": 2.2445, "step": 18450 }, { "epoch": 5.62, "learning_rate": 2.875799086757991e-05, "loss": 2.2507, "step": 18475 }, { "epoch": 5.63, "learning_rate": 2.8742770167427704e-05, "loss": 2.2063, "step": 18500 }, { "epoch": 5.64, "learning_rate": 2.8727549467275496e-05, "loss": 2.3017, "step": 18525 }, { "epoch": 5.65, "learning_rate": 2.8712328767123293e-05, "loss": 2.1666, "step": 18550 }, { "epoch": 5.65, "learning_rate": 2.8697108066971085e-05, "loss": 2.0839, "step": 18575 }, { "epoch": 5.66, "learning_rate": 2.868188736681888e-05, "loss": 2.2795, "step": 18600 }, { "epoch": 5.67, "learning_rate": 2.8666666666666668e-05, "loss": 2.2939, "step": 18625 }, { "epoch": 5.68, "learning_rate": 2.865144596651446e-05, "loss": 2.2818, "step": 18650 }, { "epoch": 5.68, "learning_rate": 2.8636225266362253e-05, "loss": 2.2006, "step": 18675 }, { "epoch": 5.69, "learning_rate": 2.8621004566210046e-05, "loss": 2.055, "step": 18700 }, { "epoch": 5.7, "learning_rate": 2.860578386605784e-05, "loss": 2.217, "step": 18725 }, { "epoch": 5.71, "learning_rate": 2.8590563165905635e-05, "loss": 2.2419, "step": 18750 }, { "epoch": 5.72, "learning_rate": 2.8575342465753428e-05, "loss": 2.2466, "step": 18775 }, { "epoch": 5.72, "learning_rate": 2.856012176560122e-05, "loss": 2.2655, "step": 18800 }, { "epoch": 5.73, "learning_rate": 2.8544901065449014e-05, "loss": 2.1704, "step": 18825 }, { "epoch": 5.74, "learning_rate": 2.8529680365296807e-05, "loss": 2.2642, "step": 18850 }, { "epoch": 5.75, "learning_rate": 2.85144596651446e-05, "loss": 2.1253, "step": 18875 }, { "epoch": 5.75, "learning_rate": 2.849923896499239e-05, "loss": 2.1265, "step": 18900 }, { "epoch": 5.76, "learning_rate": 2.8484018264840182e-05, "loss": 2.2404, "step": 18925 }, { "epoch": 5.77, "learning_rate": 2.846879756468798e-05, "loss": 2.2131, "step": 18950 }, { "epoch": 5.78, "learning_rate": 2.845357686453577e-05, "loss": 2.2115, "step": 18975 }, { "epoch": 5.78, "learning_rate": 2.8438356164383564e-05, "loss": 2.1654, "step": 19000 }, { "epoch": 5.79, "learning_rate": 2.8423135464231357e-05, "loss": 2.1543, "step": 19025 }, { "epoch": 5.8, "learning_rate": 2.840791476407915e-05, "loss": 2.1774, "step": 19050 }, { "epoch": 5.81, "learning_rate": 2.8392694063926942e-05, "loss": 2.348, "step": 19075 }, { "epoch": 5.81, "learning_rate": 2.8377473363774735e-05, "loss": 2.1481, "step": 19100 }, { "epoch": 5.82, "learning_rate": 2.8362252663622528e-05, "loss": 2.2803, "step": 19125 }, { "epoch": 5.83, "learning_rate": 2.8347031963470324e-05, "loss": 2.1804, "step": 19150 }, { "epoch": 5.84, "learning_rate": 2.8331811263318117e-05, "loss": 2.1929, "step": 19175 }, { "epoch": 5.84, "learning_rate": 2.831659056316591e-05, "loss": 2.1959, "step": 19200 }, { "epoch": 5.85, "learning_rate": 2.8301369863013703e-05, "loss": 2.2637, "step": 19225 }, { "epoch": 5.86, "learning_rate": 2.8286149162861492e-05, "loss": 2.2302, "step": 19250 }, { "epoch": 5.87, "learning_rate": 2.8270928462709285e-05, "loss": 2.2437, "step": 19275 }, { "epoch": 5.88, "learning_rate": 2.8255707762557078e-05, "loss": 2.2423, "step": 19300 }, { "epoch": 5.88, "learning_rate": 2.824048706240487e-05, "loss": 2.2753, "step": 19325 }, { "epoch": 5.89, "learning_rate": 2.8225266362252667e-05, "loss": 2.1957, "step": 19350 }, { "epoch": 5.9, "learning_rate": 2.821004566210046e-05, "loss": 2.2732, "step": 19375 }, { "epoch": 5.91, "learning_rate": 2.8194824961948253e-05, "loss": 2.2094, "step": 19400 }, { "epoch": 5.91, "learning_rate": 2.8179604261796045e-05, "loss": 2.106, "step": 19425 }, { "epoch": 5.92, "learning_rate": 2.8164383561643838e-05, "loss": 2.2468, "step": 19450 }, { "epoch": 5.93, "learning_rate": 2.814916286149163e-05, "loss": 2.1399, "step": 19475 }, { "epoch": 5.94, "learning_rate": 2.8133942161339424e-05, "loss": 2.2894, "step": 19500 }, { "epoch": 5.94, "learning_rate": 2.8118721461187213e-05, "loss": 2.2403, "step": 19525 }, { "epoch": 5.95, "learning_rate": 2.8103500761035013e-05, "loss": 2.1764, "step": 19550 }, { "epoch": 5.96, "learning_rate": 2.8088280060882806e-05, "loss": 2.2491, "step": 19575 }, { "epoch": 5.97, "learning_rate": 2.8073059360730595e-05, "loss": 2.2154, "step": 19600 }, { "epoch": 5.97, "learning_rate": 2.8057838660578388e-05, "loss": 2.1643, "step": 19625 }, { "epoch": 5.98, "learning_rate": 2.804261796042618e-05, "loss": 2.1644, "step": 19650 }, { "epoch": 5.99, "learning_rate": 2.8027397260273974e-05, "loss": 2.1413, "step": 19675 }, { "epoch": 6.0, "learning_rate": 2.8012176560121767e-05, "loss": 2.1563, "step": 19700 }, { "epoch": 6.0, "learning_rate": 2.799695585996956e-05, "loss": 2.2266, "step": 19725 }, { "epoch": 6.01, "learning_rate": 2.7981735159817356e-05, "loss": 2.1458, "step": 19750 }, { "epoch": 6.02, "learning_rate": 2.796651445966515e-05, "loss": 2.1858, "step": 19775 }, { "epoch": 6.03, "learning_rate": 2.795129375951294e-05, "loss": 2.1965, "step": 19800 }, { "epoch": 6.04, "learning_rate": 2.7936073059360734e-05, "loss": 2.1685, "step": 19825 }, { "epoch": 6.04, "learning_rate": 2.7920852359208527e-05, "loss": 2.1751, "step": 19850 }, { "epoch": 6.05, "learning_rate": 2.7905631659056317e-05, "loss": 2.0926, "step": 19875 }, { "epoch": 6.06, "learning_rate": 2.789041095890411e-05, "loss": 2.2655, "step": 19900 }, { "epoch": 6.07, "learning_rate": 2.7875190258751902e-05, "loss": 2.1804, "step": 19925 }, { "epoch": 6.07, "learning_rate": 2.78599695585997e-05, "loss": 2.1363, "step": 19950 }, { "epoch": 6.08, "learning_rate": 2.784474885844749e-05, "loss": 2.2423, "step": 19975 }, { "epoch": 6.09, "learning_rate": 2.7829528158295284e-05, "loss": 2.0739, "step": 20000 }, { "epoch": 6.1, "learning_rate": 2.7814307458143077e-05, "loss": 2.2207, "step": 20025 }, { "epoch": 6.1, "learning_rate": 2.779908675799087e-05, "loss": 2.1476, "step": 20050 }, { "epoch": 6.11, "learning_rate": 2.7783866057838663e-05, "loss": 2.3169, "step": 20075 }, { "epoch": 6.12, "learning_rate": 2.7768645357686455e-05, "loss": 2.1508, "step": 20100 }, { "epoch": 6.13, "learning_rate": 2.775342465753425e-05, "loss": 2.1237, "step": 20125 }, { "epoch": 6.13, "learning_rate": 2.7738203957382045e-05, "loss": 2.2374, "step": 20150 }, { "epoch": 6.14, "learning_rate": 2.7722983257229837e-05, "loss": 2.2625, "step": 20175 }, { "epoch": 6.15, "learning_rate": 2.770776255707763e-05, "loss": 2.1413, "step": 20200 }, { "epoch": 6.16, "learning_rate": 2.769254185692542e-05, "loss": 2.2093, "step": 20225 }, { "epoch": 6.16, "learning_rate": 2.7677321156773213e-05, "loss": 2.1778, "step": 20250 }, { "epoch": 6.17, "learning_rate": 2.7662100456621005e-05, "loss": 2.1181, "step": 20275 }, { "epoch": 6.18, "learning_rate": 2.7646879756468798e-05, "loss": 2.2382, "step": 20300 }, { "epoch": 6.19, "learning_rate": 2.763165905631659e-05, "loss": 2.2121, "step": 20325 }, { "epoch": 6.19, "learning_rate": 2.7616438356164387e-05, "loss": 2.1685, "step": 20350 }, { "epoch": 6.2, "learning_rate": 2.760121765601218e-05, "loss": 2.1297, "step": 20375 }, { "epoch": 6.21, "learning_rate": 2.7585996955859973e-05, "loss": 2.0985, "step": 20400 }, { "epoch": 6.22, "learning_rate": 2.7570776255707766e-05, "loss": 2.2109, "step": 20425 }, { "epoch": 6.23, "learning_rate": 2.755555555555556e-05, "loss": 2.1285, "step": 20450 }, { "epoch": 6.23, "learning_rate": 2.754033485540335e-05, "loss": 2.1805, "step": 20475 }, { "epoch": 6.24, "learning_rate": 2.752511415525114e-05, "loss": 2.2301, "step": 20500 }, { "epoch": 6.25, "learning_rate": 2.7509893455098934e-05, "loss": 2.0494, "step": 20525 }, { "epoch": 6.26, "learning_rate": 2.7494672754946733e-05, "loss": 2.2462, "step": 20550 }, { "epoch": 6.26, "learning_rate": 2.7479452054794523e-05, "loss": 2.174, "step": 20575 }, { "epoch": 6.27, "learning_rate": 2.7464231354642316e-05, "loss": 2.2102, "step": 20600 }, { "epoch": 6.28, "learning_rate": 2.744901065449011e-05, "loss": 2.1673, "step": 20625 }, { "epoch": 6.29, "learning_rate": 2.74337899543379e-05, "loss": 2.1535, "step": 20650 }, { "epoch": 6.29, "learning_rate": 2.7418569254185694e-05, "loss": 2.1412, "step": 20675 }, { "epoch": 6.3, "learning_rate": 2.7403348554033487e-05, "loss": 2.1825, "step": 20700 }, { "epoch": 6.31, "learning_rate": 2.738812785388128e-05, "loss": 2.1337, "step": 20725 }, { "epoch": 6.32, "learning_rate": 2.7372907153729076e-05, "loss": 2.1748, "step": 20750 }, { "epoch": 6.32, "learning_rate": 2.735768645357687e-05, "loss": 2.2003, "step": 20775 }, { "epoch": 6.33, "learning_rate": 2.7342465753424662e-05, "loss": 2.1943, "step": 20800 }, { "epoch": 6.34, "learning_rate": 2.7327245053272455e-05, "loss": 2.188, "step": 20825 }, { "epoch": 6.35, "learning_rate": 2.7312024353120244e-05, "loss": 2.1538, "step": 20850 }, { "epoch": 6.35, "learning_rate": 2.7296803652968037e-05, "loss": 2.1763, "step": 20875 }, { "epoch": 6.36, "learning_rate": 2.728158295281583e-05, "loss": 2.202, "step": 20900 }, { "epoch": 6.37, "learning_rate": 2.7266362252663623e-05, "loss": 2.0576, "step": 20925 }, { "epoch": 6.38, "learning_rate": 2.725114155251142e-05, "loss": 2.0509, "step": 20950 }, { "epoch": 6.39, "learning_rate": 2.723592085235921e-05, "loss": 2.1629, "step": 20975 }, { "epoch": 6.39, "learning_rate": 2.7220700152207005e-05, "loss": 2.2119, "step": 21000 }, { "epoch": 6.4, "learning_rate": 2.7205479452054797e-05, "loss": 2.1668, "step": 21025 }, { "epoch": 6.41, "learning_rate": 2.719025875190259e-05, "loss": 2.1514, "step": 21050 }, { "epoch": 6.42, "learning_rate": 2.7175038051750383e-05, "loss": 2.1272, "step": 21075 }, { "epoch": 6.42, "learning_rate": 2.7159817351598176e-05, "loss": 2.1029, "step": 21100 }, { "epoch": 6.43, "learning_rate": 2.7144596651445965e-05, "loss": 2.112, "step": 21125 }, { "epoch": 6.44, "learning_rate": 2.7129375951293765e-05, "loss": 2.1119, "step": 21150 }, { "epoch": 6.45, "learning_rate": 2.7114155251141558e-05, "loss": 2.0902, "step": 21175 }, { "epoch": 6.45, "learning_rate": 2.7098934550989347e-05, "loss": 2.1778, "step": 21200 }, { "epoch": 6.46, "learning_rate": 2.708371385083714e-05, "loss": 2.0972, "step": 21225 }, { "epoch": 6.47, "learning_rate": 2.7068493150684933e-05, "loss": 2.1768, "step": 21250 }, { "epoch": 6.48, "learning_rate": 2.7053272450532726e-05, "loss": 2.178, "step": 21275 }, { "epoch": 6.48, "learning_rate": 2.703805175038052e-05, "loss": 2.1795, "step": 21300 }, { "epoch": 6.49, "learning_rate": 2.702283105022831e-05, "loss": 2.0706, "step": 21325 }, { "epoch": 6.5, "learning_rate": 2.7007610350076108e-05, "loss": 2.241, "step": 21350 }, { "epoch": 6.51, "learning_rate": 2.69923896499239e-05, "loss": 2.102, "step": 21375 }, { "epoch": 6.51, "learning_rate": 2.6977168949771693e-05, "loss": 2.2229, "step": 21400 }, { "epoch": 6.52, "learning_rate": 2.6961948249619486e-05, "loss": 2.083, "step": 21425 }, { "epoch": 6.53, "learning_rate": 2.694672754946728e-05, "loss": 2.0917, "step": 21450 }, { "epoch": 6.54, "learning_rate": 2.693150684931507e-05, "loss": 2.256, "step": 21475 }, { "epoch": 6.54, "learning_rate": 2.691628614916286e-05, "loss": 2.1306, "step": 21500 }, { "epoch": 6.55, "learning_rate": 2.6901065449010654e-05, "loss": 2.1752, "step": 21525 }, { "epoch": 6.56, "learning_rate": 2.688584474885845e-05, "loss": 2.1803, "step": 21550 }, { "epoch": 6.57, "learning_rate": 2.6870624048706243e-05, "loss": 2.1966, "step": 21575 }, { "epoch": 6.58, "learning_rate": 2.6855403348554036e-05, "loss": 2.193, "step": 21600 }, { "epoch": 6.58, "learning_rate": 2.684018264840183e-05, "loss": 2.1957, "step": 21625 }, { "epoch": 6.59, "learning_rate": 2.6824961948249622e-05, "loss": 2.2211, "step": 21650 }, { "epoch": 6.6, "learning_rate": 2.6809741248097415e-05, "loss": 2.1052, "step": 21675 }, { "epoch": 6.61, "learning_rate": 2.6794520547945207e-05, "loss": 2.1593, "step": 21700 }, { "epoch": 6.61, "learning_rate": 2.6779299847793e-05, "loss": 2.0678, "step": 21725 }, { "epoch": 6.62, "learning_rate": 2.6764079147640796e-05, "loss": 2.2496, "step": 21750 }, { "epoch": 6.63, "learning_rate": 2.674885844748859e-05, "loss": 2.1239, "step": 21775 }, { "epoch": 6.64, "learning_rate": 2.6733637747336382e-05, "loss": 2.1516, "step": 21800 }, { "epoch": 6.64, "learning_rate": 2.671841704718417e-05, "loss": 2.0752, "step": 21825 }, { "epoch": 6.65, "learning_rate": 2.6703196347031964e-05, "loss": 2.2453, "step": 21850 }, { "epoch": 6.66, "learning_rate": 2.6687975646879757e-05, "loss": 2.3072, "step": 21875 }, { "epoch": 6.67, "learning_rate": 2.667275494672755e-05, "loss": 2.1156, "step": 21900 }, { "epoch": 6.67, "learning_rate": 2.6657534246575343e-05, "loss": 2.1962, "step": 21925 }, { "epoch": 6.68, "learning_rate": 2.664231354642314e-05, "loss": 2.0963, "step": 21950 }, { "epoch": 6.69, "learning_rate": 2.6627092846270932e-05, "loss": 2.191, "step": 21975 }, { "epoch": 6.7, "learning_rate": 2.6611872146118725e-05, "loss": 2.2095, "step": 22000 }, { "epoch": 6.7, "learning_rate": 2.6596651445966518e-05, "loss": 2.1171, "step": 22025 }, { "epoch": 6.71, "learning_rate": 2.658143074581431e-05, "loss": 2.0649, "step": 22050 }, { "epoch": 6.72, "learning_rate": 2.6566210045662103e-05, "loss": 2.1147, "step": 22075 }, { "epoch": 6.73, "learning_rate": 2.6550989345509893e-05, "loss": 2.2051, "step": 22100 }, { "epoch": 6.74, "learning_rate": 2.6535768645357686e-05, "loss": 2.2622, "step": 22125 }, { "epoch": 6.74, "learning_rate": 2.6520547945205485e-05, "loss": 2.1057, "step": 22150 }, { "epoch": 6.75, "learning_rate": 2.6505327245053275e-05, "loss": 2.2136, "step": 22175 }, { "epoch": 6.76, "learning_rate": 2.6490106544901068e-05, "loss": 2.1146, "step": 22200 }, { "epoch": 6.77, "learning_rate": 2.647488584474886e-05, "loss": 2.1073, "step": 22225 }, { "epoch": 6.77, "learning_rate": 2.6459665144596653e-05, "loss": 2.2008, "step": 22250 }, { "epoch": 6.78, "learning_rate": 2.6444444444444446e-05, "loss": 2.1423, "step": 22275 }, { "epoch": 6.79, "learning_rate": 2.642922374429224e-05, "loss": 2.1471, "step": 22300 }, { "epoch": 6.8, "learning_rate": 2.6414003044140032e-05, "loss": 2.2099, "step": 22325 }, { "epoch": 6.8, "learning_rate": 2.6398782343987828e-05, "loss": 2.1029, "step": 22350 }, { "epoch": 6.81, "learning_rate": 2.638356164383562e-05, "loss": 2.1722, "step": 22375 }, { "epoch": 6.82, "learning_rate": 2.6368340943683414e-05, "loss": 2.2043, "step": 22400 }, { "epoch": 6.83, "learning_rate": 2.6353120243531207e-05, "loss": 2.144, "step": 22425 }, { "epoch": 6.83, "learning_rate": 2.6337899543378996e-05, "loss": 2.1838, "step": 22450 }, { "epoch": 6.84, "learning_rate": 2.632267884322679e-05, "loss": 2.2831, "step": 22475 }, { "epoch": 6.85, "learning_rate": 2.630745814307458e-05, "loss": 2.1283, "step": 22500 }, { "epoch": 6.86, "learning_rate": 2.6292237442922375e-05, "loss": 2.1583, "step": 22525 }, { "epoch": 6.86, "learning_rate": 2.627701674277017e-05, "loss": 2.1836, "step": 22550 }, { "epoch": 6.87, "learning_rate": 2.6261796042617964e-05, "loss": 2.1956, "step": 22575 }, { "epoch": 6.88, "learning_rate": 2.6246575342465756e-05, "loss": 2.1679, "step": 22600 }, { "epoch": 6.89, "learning_rate": 2.623135464231355e-05, "loss": 2.1463, "step": 22625 }, { "epoch": 6.89, "learning_rate": 2.6216133942161342e-05, "loss": 2.2765, "step": 22650 }, { "epoch": 6.9, "learning_rate": 2.6200913242009135e-05, "loss": 2.094, "step": 22675 }, { "epoch": 6.91, "learning_rate": 2.6185692541856928e-05, "loss": 2.0926, "step": 22700 }, { "epoch": 6.92, "learning_rate": 2.6170471841704717e-05, "loss": 2.0845, "step": 22725 }, { "epoch": 6.93, "learning_rate": 2.6155251141552517e-05, "loss": 2.2262, "step": 22750 }, { "epoch": 6.93, "learning_rate": 2.614003044140031e-05, "loss": 2.1352, "step": 22775 }, { "epoch": 6.94, "learning_rate": 2.61248097412481e-05, "loss": 2.1186, "step": 22800 }, { "epoch": 6.95, "learning_rate": 2.6109589041095892e-05, "loss": 2.1048, "step": 22825 }, { "epoch": 6.96, "learning_rate": 2.6094368340943685e-05, "loss": 2.1266, "step": 22850 }, { "epoch": 6.96, "learning_rate": 2.6079147640791478e-05, "loss": 2.2109, "step": 22875 }, { "epoch": 6.97, "learning_rate": 2.606392694063927e-05, "loss": 2.1962, "step": 22900 }, { "epoch": 6.98, "learning_rate": 2.6048706240487063e-05, "loss": 2.2317, "step": 22925 }, { "epoch": 6.99, "learning_rate": 2.603348554033486e-05, "loss": 2.1549, "step": 22950 }, { "epoch": 6.99, "learning_rate": 2.6018264840182652e-05, "loss": 2.2, "step": 22975 }, { "epoch": 7.0, "learning_rate": 2.6003044140030445e-05, "loss": 2.1342, "step": 23000 }, { "epoch": 7.01, "learning_rate": 2.5987823439878238e-05, "loss": 2.0854, "step": 23025 }, { "epoch": 7.02, "learning_rate": 2.597260273972603e-05, "loss": 2.0935, "step": 23050 }, { "epoch": 7.02, "learning_rate": 2.595738203957382e-05, "loss": 2.0792, "step": 23075 }, { "epoch": 7.03, "learning_rate": 2.5942161339421613e-05, "loss": 2.1502, "step": 23100 }, { "epoch": 7.04, "learning_rate": 2.5926940639269406e-05, "loss": 2.1767, "step": 23125 }, { "epoch": 7.05, "learning_rate": 2.5911719939117202e-05, "loss": 2.2009, "step": 23150 }, { "epoch": 7.05, "learning_rate": 2.5896499238964995e-05, "loss": 2.1147, "step": 23175 }, { "epoch": 7.06, "learning_rate": 2.5881278538812788e-05, "loss": 2.1359, "step": 23200 }, { "epoch": 7.07, "learning_rate": 2.586605783866058e-05, "loss": 2.0548, "step": 23225 }, { "epoch": 7.08, "learning_rate": 2.5850837138508374e-05, "loss": 2.0974, "step": 23250 }, { "epoch": 7.09, "learning_rate": 2.5835616438356166e-05, "loss": 2.0914, "step": 23275 }, { "epoch": 7.09, "learning_rate": 2.582039573820396e-05, "loss": 2.0525, "step": 23300 }, { "epoch": 7.1, "learning_rate": 2.5805175038051752e-05, "loss": 2.1788, "step": 23325 }, { "epoch": 7.11, "learning_rate": 2.578995433789955e-05, "loss": 2.0718, "step": 23350 }, { "epoch": 7.12, "learning_rate": 2.577473363774734e-05, "loss": 2.191, "step": 23375 }, { "epoch": 7.12, "learning_rate": 2.5759512937595134e-05, "loss": 2.0822, "step": 23400 }, { "epoch": 7.13, "learning_rate": 2.5744292237442924e-05, "loss": 2.1245, "step": 23425 }, { "epoch": 7.14, "learning_rate": 2.5729071537290716e-05, "loss": 2.1894, "step": 23450 }, { "epoch": 7.15, "learning_rate": 2.571385083713851e-05, "loss": 2.1632, "step": 23475 }, { "epoch": 7.15, "learning_rate": 2.5698630136986302e-05, "loss": 2.2474, "step": 23500 }, { "epoch": 7.16, "learning_rate": 2.5683409436834095e-05, "loss": 2.1482, "step": 23525 }, { "epoch": 7.17, "learning_rate": 2.566818873668189e-05, "loss": 2.164, "step": 23550 }, { "epoch": 7.18, "learning_rate": 2.5652968036529684e-05, "loss": 2.0679, "step": 23575 }, { "epoch": 7.18, "learning_rate": 2.5637747336377477e-05, "loss": 2.0875, "step": 23600 }, { "epoch": 7.19, "learning_rate": 2.562252663622527e-05, "loss": 2.1351, "step": 23625 }, { "epoch": 7.2, "learning_rate": 2.5607305936073062e-05, "loss": 2.137, "step": 23650 }, { "epoch": 7.21, "learning_rate": 2.5592085235920855e-05, "loss": 2.1051, "step": 23675 }, { "epoch": 7.21, "learning_rate": 2.5576864535768645e-05, "loss": 2.0808, "step": 23700 }, { "epoch": 7.22, "learning_rate": 2.5561643835616438e-05, "loss": 2.1877, "step": 23725 }, { "epoch": 7.23, "learning_rate": 2.5546423135464237e-05, "loss": 2.16, "step": 23750 }, { "epoch": 7.24, "learning_rate": 2.5531202435312027e-05, "loss": 2.1346, "step": 23775 }, { "epoch": 7.25, "learning_rate": 2.551598173515982e-05, "loss": 2.0598, "step": 23800 }, { "epoch": 7.25, "learning_rate": 2.5500761035007612e-05, "loss": 2.1666, "step": 23825 }, { "epoch": 7.26, "learning_rate": 2.5485540334855405e-05, "loss": 2.1896, "step": 23850 }, { "epoch": 7.27, "learning_rate": 2.5470319634703198e-05, "loss": 2.1093, "step": 23875 }, { "epoch": 7.28, "learning_rate": 2.545509893455099e-05, "loss": 2.0752, "step": 23900 }, { "epoch": 7.28, "learning_rate": 2.5439878234398784e-05, "loss": 2.1268, "step": 23925 }, { "epoch": 7.29, "learning_rate": 2.542465753424658e-05, "loss": 2.1263, "step": 23950 }, { "epoch": 7.3, "learning_rate": 2.5409436834094373e-05, "loss": 2.1407, "step": 23975 }, { "epoch": 7.31, "learning_rate": 2.539482496194825e-05, "loss": 2.1309, "step": 24000 }, { "epoch": 7.31, "learning_rate": 2.5379604261796042e-05, "loss": 2.093, "step": 24025 }, { "epoch": 7.32, "learning_rate": 2.5364383561643838e-05, "loss": 2.1654, "step": 24050 }, { "epoch": 7.33, "learning_rate": 2.534916286149163e-05, "loss": 2.1689, "step": 24075 }, { "epoch": 7.34, "learning_rate": 2.5333942161339424e-05, "loss": 2.1654, "step": 24100 }, { "epoch": 7.34, "learning_rate": 2.5318721461187216e-05, "loss": 2.2091, "step": 24125 }, { "epoch": 7.35, "learning_rate": 2.53041095890411e-05, "loss": 2.0029, "step": 24150 }, { "epoch": 7.36, "learning_rate": 2.5288888888888892e-05, "loss": 2.1621, "step": 24175 }, { "epoch": 7.37, "learning_rate": 2.5273668188736685e-05, "loss": 2.1024, "step": 24200 }, { "epoch": 7.37, "learning_rate": 2.5258447488584478e-05, "loss": 2.0642, "step": 24225 }, { "epoch": 7.38, "learning_rate": 2.5243226788432267e-05, "loss": 2.1373, "step": 24250 }, { "epoch": 7.39, "learning_rate": 2.522800608828006e-05, "loss": 2.1484, "step": 24275 }, { "epoch": 7.4, "learning_rate": 2.5212785388127853e-05, "loss": 2.1251, "step": 24300 }, { "epoch": 7.4, "learning_rate": 2.519756468797565e-05, "loss": 2.1474, "step": 24325 }, { "epoch": 7.41, "learning_rate": 2.5182343987823442e-05, "loss": 2.173, "step": 24350 }, { "epoch": 7.42, "learning_rate": 2.5167123287671235e-05, "loss": 2.0536, "step": 24375 }, { "epoch": 7.43, "learning_rate": 2.5151902587519028e-05, "loss": 2.1659, "step": 24400 }, { "epoch": 7.44, "learning_rate": 2.513668188736682e-05, "loss": 2.1325, "step": 24425 }, { "epoch": 7.44, "learning_rate": 2.5121461187214613e-05, "loss": 2.1678, "step": 24450 }, { "epoch": 7.45, "learning_rate": 2.5106240487062406e-05, "loss": 2.1031, "step": 24475 }, { "epoch": 7.46, "learning_rate": 2.50910197869102e-05, "loss": 2.1283, "step": 24500 }, { "epoch": 7.47, "learning_rate": 2.5075799086757995e-05, "loss": 2.0485, "step": 24525 }, { "epoch": 7.47, "learning_rate": 2.5060578386605788e-05, "loss": 2.1137, "step": 24550 }, { "epoch": 7.48, "learning_rate": 2.504535768645358e-05, "loss": 2.043, "step": 24575 }, { "epoch": 7.49, "learning_rate": 2.503013698630137e-05, "loss": 2.128, "step": 24600 }, { "epoch": 7.5, "learning_rate": 2.5014916286149163e-05, "loss": 2.1445, "step": 24625 }, { "epoch": 7.5, "learning_rate": 2.4999695585996956e-05, "loss": 2.1807, "step": 24650 }, { "epoch": 7.51, "learning_rate": 2.498447488584475e-05, "loss": 2.1145, "step": 24675 }, { "epoch": 7.52, "learning_rate": 2.4969254185692542e-05, "loss": 2.1025, "step": 24700 }, { "epoch": 7.53, "learning_rate": 2.4954033485540338e-05, "loss": 2.2375, "step": 24725 }, { "epoch": 7.53, "learning_rate": 2.493881278538813e-05, "loss": 2.0273, "step": 24750 }, { "epoch": 7.54, "learning_rate": 2.4923592085235924e-05, "loss": 2.202, "step": 24775 }, { "epoch": 7.55, "learning_rate": 2.4908371385083717e-05, "loss": 2.0888, "step": 24800 }, { "epoch": 7.56, "learning_rate": 2.48937595129376e-05, "loss": 2.1503, "step": 24825 }, { "epoch": 7.56, "learning_rate": 2.4878538812785392e-05, "loss": 1.9954, "step": 24850 }, { "epoch": 7.57, "learning_rate": 2.4863318112633185e-05, "loss": 2.1307, "step": 24875 }, { "epoch": 7.58, "learning_rate": 2.4848097412480975e-05, "loss": 2.0709, "step": 24900 }, { "epoch": 7.59, "learning_rate": 2.4832876712328767e-05, "loss": 2.0874, "step": 24925 }, { "epoch": 7.6, "learning_rate": 2.481765601217656e-05, "loss": 2.0786, "step": 24950 }, { "epoch": 7.6, "learning_rate": 2.4802435312024353e-05, "loss": 2.1518, "step": 24975 }, { "epoch": 7.61, "learning_rate": 2.478721461187215e-05, "loss": 2.1586, "step": 25000 }, { "epoch": 7.62, "learning_rate": 2.4771993911719942e-05, "loss": 2.0567, "step": 25025 }, { "epoch": 7.63, "learning_rate": 2.4756773211567735e-05, "loss": 2.1463, "step": 25050 }, { "epoch": 7.63, "learning_rate": 2.4741552511415528e-05, "loss": 2.1937, "step": 25075 }, { "epoch": 7.64, "learning_rate": 2.472633181126332e-05, "loss": 2.0548, "step": 25100 }, { "epoch": 7.65, "learning_rate": 2.4711111111111114e-05, "loss": 2.2335, "step": 25125 }, { "epoch": 7.66, "learning_rate": 2.4695890410958906e-05, "loss": 2.0733, "step": 25150 }, { "epoch": 7.66, "learning_rate": 2.4680669710806696e-05, "loss": 2.057, "step": 25175 }, { "epoch": 7.67, "learning_rate": 2.4665449010654495e-05, "loss": 2.0907, "step": 25200 }, { "epoch": 7.68, "learning_rate": 2.465083713850837e-05, "loss": 2.124, "step": 25225 }, { "epoch": 7.69, "learning_rate": 2.4635616438356164e-05, "loss": 2.1252, "step": 25250 }, { "epoch": 7.69, "learning_rate": 2.462039573820396e-05, "loss": 2.135, "step": 25275 }, { "epoch": 7.7, "learning_rate": 2.4605175038051753e-05, "loss": 2.0997, "step": 25300 }, { "epoch": 7.71, "learning_rate": 2.4589954337899546e-05, "loss": 2.1866, "step": 25325 }, { "epoch": 7.72, "learning_rate": 2.457473363774734e-05, "loss": 2.1311, "step": 25350 }, { "epoch": 7.72, "learning_rate": 2.4559512937595132e-05, "loss": 2.1598, "step": 25375 }, { "epoch": 7.73, "learning_rate": 2.4544292237442925e-05, "loss": 1.9998, "step": 25400 }, { "epoch": 7.74, "learning_rate": 2.4529071537290718e-05, "loss": 2.1786, "step": 25425 }, { "epoch": 7.75, "learning_rate": 2.4513850837138507e-05, "loss": 2.1348, "step": 25450 }, { "epoch": 7.75, "learning_rate": 2.4498630136986307e-05, "loss": 2.1772, "step": 25475 }, { "epoch": 7.76, "learning_rate": 2.44834094368341e-05, "loss": 2.1718, "step": 25500 }, { "epoch": 7.77, "learning_rate": 2.446818873668189e-05, "loss": 2.0588, "step": 25525 }, { "epoch": 7.78, "learning_rate": 2.4452968036529682e-05, "loss": 2.1257, "step": 25550 }, { "epoch": 7.79, "learning_rate": 2.4437747336377475e-05, "loss": 2.0751, "step": 25575 }, { "epoch": 7.79, "learning_rate": 2.4422526636225268e-05, "loss": 2.2192, "step": 25600 }, { "epoch": 7.8, "learning_rate": 2.440730593607306e-05, "loss": 2.125, "step": 25625 }, { "epoch": 7.81, "learning_rate": 2.4392085235920853e-05, "loss": 2.0945, "step": 25650 }, { "epoch": 7.82, "learning_rate": 2.437686453576865e-05, "loss": 2.2376, "step": 25675 }, { "epoch": 7.82, "learning_rate": 2.4361643835616442e-05, "loss": 2.1176, "step": 25700 }, { "epoch": 7.83, "learning_rate": 2.4346423135464235e-05, "loss": 2.0887, "step": 25725 }, { "epoch": 7.84, "learning_rate": 2.4331202435312028e-05, "loss": 2.1796, "step": 25750 }, { "epoch": 7.85, "learning_rate": 2.431598173515982e-05, "loss": 2.1368, "step": 25775 }, { "epoch": 7.85, "learning_rate": 2.430076103500761e-05, "loss": 2.2211, "step": 25800 }, { "epoch": 7.86, "learning_rate": 2.4285540334855403e-05, "loss": 2.147, "step": 25825 }, { "epoch": 7.87, "learning_rate": 2.4270319634703196e-05, "loss": 2.2212, "step": 25850 }, { "epoch": 7.88, "learning_rate": 2.4255098934550992e-05, "loss": 2.0163, "step": 25875 }, { "epoch": 7.88, "learning_rate": 2.4239878234398785e-05, "loss": 2.1679, "step": 25900 }, { "epoch": 7.89, "learning_rate": 2.4224657534246578e-05, "loss": 2.1631, "step": 25925 }, { "epoch": 7.9, "learning_rate": 2.420943683409437e-05, "loss": 2.2289, "step": 25950 }, { "epoch": 7.91, "learning_rate": 2.4194216133942164e-05, "loss": 2.1687, "step": 25975 }, { "epoch": 7.91, "learning_rate": 2.4178995433789956e-05, "loss": 2.1701, "step": 26000 }, { "epoch": 7.92, "learning_rate": 2.416377473363775e-05, "loss": 2.1613, "step": 26025 }, { "epoch": 7.93, "learning_rate": 2.4148554033485542e-05, "loss": 2.0793, "step": 26050 }, { "epoch": 7.94, "learning_rate": 2.413333333333334e-05, "loss": 2.2479, "step": 26075 }, { "epoch": 7.95, "learning_rate": 2.411811263318113e-05, "loss": 2.1886, "step": 26100 }, { "epoch": 7.95, "learning_rate": 2.4102891933028924e-05, "loss": 2.2579, "step": 26125 }, { "epoch": 7.96, "learning_rate": 2.4087671232876713e-05, "loss": 2.1238, "step": 26150 }, { "epoch": 7.97, "learning_rate": 2.4072450532724506e-05, "loss": 2.121, "step": 26175 }, { "epoch": 7.98, "learning_rate": 2.40572298325723e-05, "loss": 2.0184, "step": 26200 }, { "epoch": 7.98, "learning_rate": 2.4042009132420092e-05, "loss": 2.0781, "step": 26225 }, { "epoch": 7.99, "learning_rate": 2.4026788432267885e-05, "loss": 2.1665, "step": 26250 }, { "epoch": 8.0, "learning_rate": 2.401156773211568e-05, "loss": 2.078, "step": 26275 }, { "epoch": 8.01, "learning_rate": 2.3996347031963474e-05, "loss": 2.0936, "step": 26300 }, { "epoch": 8.01, "learning_rate": 2.3981126331811267e-05, "loss": 2.1457, "step": 26325 }, { "epoch": 8.02, "learning_rate": 2.396590563165906e-05, "loss": 2.218, "step": 26350 }, { "epoch": 8.03, "learning_rate": 2.3950684931506852e-05, "loss": 2.1591, "step": 26375 }, { "epoch": 8.04, "learning_rate": 2.3935464231354645e-05, "loss": 2.0221, "step": 26400 }, { "epoch": 8.04, "learning_rate": 2.3920243531202435e-05, "loss": 2.0926, "step": 26425 }, { "epoch": 8.05, "learning_rate": 2.3905022831050228e-05, "loss": 2.0845, "step": 26450 }, { "epoch": 8.06, "learning_rate": 2.3889802130898027e-05, "loss": 2.1087, "step": 26475 }, { "epoch": 8.07, "learning_rate": 2.3874581430745817e-05, "loss": 2.1333, "step": 26500 }, { "epoch": 8.07, "learning_rate": 2.385936073059361e-05, "loss": 2.0067, "step": 26525 }, { "epoch": 8.08, "learning_rate": 2.3844140030441402e-05, "loss": 2.1004, "step": 26550 }, { "epoch": 8.09, "learning_rate": 2.3828919330289195e-05, "loss": 2.1132, "step": 26575 }, { "epoch": 8.1, "learning_rate": 2.3813698630136988e-05, "loss": 2.1223, "step": 26600 }, { "epoch": 8.11, "learning_rate": 2.379847792998478e-05, "loss": 2.054, "step": 26625 }, { "epoch": 8.11, "learning_rate": 2.3783257229832574e-05, "loss": 2.0263, "step": 26650 }, { "epoch": 8.12, "learning_rate": 2.376803652968037e-05, "loss": 2.102, "step": 26675 }, { "epoch": 8.13, "learning_rate": 2.3752815829528163e-05, "loss": 2.0161, "step": 26700 }, { "epoch": 8.14, "learning_rate": 2.3737595129375956e-05, "loss": 2.1884, "step": 26725 }, { "epoch": 8.14, "learning_rate": 2.372237442922375e-05, "loss": 2.0326, "step": 26750 }, { "epoch": 8.15, "learning_rate": 2.3707153729071538e-05, "loss": 2.0053, "step": 26775 }, { "epoch": 8.16, "learning_rate": 2.369193302891933e-05, "loss": 2.101, "step": 26800 }, { "epoch": 8.17, "learning_rate": 2.3676712328767124e-05, "loss": 2.1225, "step": 26825 }, { "epoch": 8.17, "learning_rate": 2.3661491628614916e-05, "loss": 2.165, "step": 26850 }, { "epoch": 8.18, "learning_rate": 2.3646270928462713e-05, "loss": 2.0627, "step": 26875 }, { "epoch": 8.19, "learning_rate": 2.3631050228310505e-05, "loss": 2.1096, "step": 26900 }, { "epoch": 8.2, "learning_rate": 2.3615829528158298e-05, "loss": 2.1333, "step": 26925 }, { "epoch": 8.2, "learning_rate": 2.360060882800609e-05, "loss": 2.0718, "step": 26950 }, { "epoch": 8.21, "learning_rate": 2.3585388127853884e-05, "loss": 2.0667, "step": 26975 }, { "epoch": 8.22, "learning_rate": 2.3570167427701677e-05, "loss": 2.0372, "step": 27000 }, { "epoch": 8.23, "learning_rate": 2.355494672754947e-05, "loss": 2.1661, "step": 27025 }, { "epoch": 8.23, "learning_rate": 2.353972602739726e-05, "loss": 2.1163, "step": 27050 }, { "epoch": 8.24, "learning_rate": 2.3524505327245052e-05, "loss": 2.0357, "step": 27075 }, { "epoch": 8.25, "learning_rate": 2.350928462709285e-05, "loss": 2.0945, "step": 27100 }, { "epoch": 8.26, "learning_rate": 2.349406392694064e-05, "loss": 2.2122, "step": 27125 }, { "epoch": 8.26, "learning_rate": 2.3478843226788434e-05, "loss": 2.2343, "step": 27150 }, { "epoch": 8.27, "learning_rate": 2.3463622526636227e-05, "loss": 2.083, "step": 27175 }, { "epoch": 8.28, "learning_rate": 2.344840182648402e-05, "loss": 2.1022, "step": 27200 }, { "epoch": 8.29, "learning_rate": 2.3433181126331812e-05, "loss": 2.0508, "step": 27225 }, { "epoch": 8.3, "learning_rate": 2.3417960426179605e-05, "loss": 2.0839, "step": 27250 }, { "epoch": 8.3, "learning_rate": 2.3402739726027398e-05, "loss": 2.0814, "step": 27275 }, { "epoch": 8.31, "learning_rate": 2.3387519025875194e-05, "loss": 2.0378, "step": 27300 }, { "epoch": 8.32, "learning_rate": 2.3372298325722987e-05, "loss": 2.0934, "step": 27325 }, { "epoch": 8.33, "learning_rate": 2.335707762557078e-05, "loss": 2.0811, "step": 27350 }, { "epoch": 8.33, "learning_rate": 2.3341856925418573e-05, "loss": 2.0548, "step": 27375 }, { "epoch": 8.34, "learning_rate": 2.3326636225266362e-05, "loss": 2.0106, "step": 27400 }, { "epoch": 8.35, "learning_rate": 2.3311415525114155e-05, "loss": 2.0514, "step": 27425 }, { "epoch": 8.36, "learning_rate": 2.3296194824961948e-05, "loss": 2.0765, "step": 27450 }, { "epoch": 8.36, "learning_rate": 2.328097412480974e-05, "loss": 2.2477, "step": 27475 }, { "epoch": 8.37, "learning_rate": 2.3265753424657537e-05, "loss": 2.0045, "step": 27500 }, { "epoch": 8.38, "learning_rate": 2.325053272450533e-05, "loss": 2.1184, "step": 27525 }, { "epoch": 8.39, "learning_rate": 2.3235312024353123e-05, "loss": 2.143, "step": 27550 }, { "epoch": 8.39, "learning_rate": 2.3220091324200915e-05, "loss": 2.0317, "step": 27575 }, { "epoch": 8.4, "learning_rate": 2.320487062404871e-05, "loss": 2.0431, "step": 27600 }, { "epoch": 8.41, "learning_rate": 2.31896499238965e-05, "loss": 2.0331, "step": 27625 }, { "epoch": 8.42, "learning_rate": 2.3174429223744294e-05, "loss": 1.9947, "step": 27650 }, { "epoch": 8.42, "learning_rate": 2.3159208523592083e-05, "loss": 2.143, "step": 27675 }, { "epoch": 8.43, "learning_rate": 2.3143987823439883e-05, "loss": 2.2113, "step": 27700 }, { "epoch": 8.44, "learning_rate": 2.3128767123287676e-05, "loss": 2.0961, "step": 27725 }, { "epoch": 8.45, "learning_rate": 2.3113546423135465e-05, "loss": 2.0829, "step": 27750 }, { "epoch": 8.46, "learning_rate": 2.3098325722983258e-05, "loss": 2.1045, "step": 27775 }, { "epoch": 8.46, "learning_rate": 2.308310502283105e-05, "loss": 2.2277, "step": 27800 }, { "epoch": 8.47, "learning_rate": 2.3067884322678844e-05, "loss": 1.9936, "step": 27825 }, { "epoch": 8.48, "learning_rate": 2.3052663622526637e-05, "loss": 2.1255, "step": 27850 }, { "epoch": 8.49, "learning_rate": 2.303744292237443e-05, "loss": 2.1072, "step": 27875 }, { "epoch": 8.49, "learning_rate": 2.3022222222222226e-05, "loss": 2.0673, "step": 27900 }, { "epoch": 8.5, "learning_rate": 2.300700152207002e-05, "loss": 2.1319, "step": 27925 }, { "epoch": 8.51, "learning_rate": 2.299178082191781e-05, "loss": 2.1209, "step": 27950 }, { "epoch": 8.52, "learning_rate": 2.2977168949771694e-05, "loss": 2.1048, "step": 27975 }, { "epoch": 8.52, "learning_rate": 2.2961948249619487e-05, "loss": 2.0485, "step": 28000 }, { "epoch": 8.53, "learning_rate": 2.2946727549467277e-05, "loss": 2.0914, "step": 28025 }, { "epoch": 8.54, "learning_rate": 2.293150684931507e-05, "loss": 2.0312, "step": 28050 }, { "epoch": 8.55, "learning_rate": 2.2916286149162862e-05, "loss": 2.1114, "step": 28075 }, { "epoch": 8.55, "learning_rate": 2.2901065449010655e-05, "loss": 2.0711, "step": 28100 }, { "epoch": 8.56, "learning_rate": 2.2885844748858448e-05, "loss": 2.182, "step": 28125 }, { "epoch": 8.57, "learning_rate": 2.287062404870624e-05, "loss": 2.1511, "step": 28150 }, { "epoch": 8.58, "learning_rate": 2.2855403348554037e-05, "loss": 2.0635, "step": 28175 }, { "epoch": 8.58, "learning_rate": 2.284018264840183e-05, "loss": 2.0999, "step": 28200 }, { "epoch": 8.59, "learning_rate": 2.2824961948249623e-05, "loss": 2.0751, "step": 28225 }, { "epoch": 8.6, "learning_rate": 2.2809741248097416e-05, "loss": 2.0463, "step": 28250 }, { "epoch": 8.61, "learning_rate": 2.279452054794521e-05, "loss": 2.0961, "step": 28275 }, { "epoch": 8.61, "learning_rate": 2.2779299847792998e-05, "loss": 2.1299, "step": 28300 }, { "epoch": 8.62, "learning_rate": 2.276407914764079e-05, "loss": 2.0544, "step": 28325 }, { "epoch": 8.63, "learning_rate": 2.2748858447488584e-05, "loss": 2.0825, "step": 28350 }, { "epoch": 8.64, "learning_rate": 2.273363774733638e-05, "loss": 2.1132, "step": 28375 }, { "epoch": 8.65, "learning_rate": 2.2718417047184173e-05, "loss": 2.1715, "step": 28400 }, { "epoch": 8.65, "learning_rate": 2.2703196347031965e-05, "loss": 2.0798, "step": 28425 }, { "epoch": 8.66, "learning_rate": 2.268797564687976e-05, "loss": 2.0872, "step": 28450 }, { "epoch": 8.67, "learning_rate": 2.267275494672755e-05, "loss": 2.0075, "step": 28475 }, { "epoch": 8.68, "learning_rate": 2.2657534246575344e-05, "loss": 2.029, "step": 28500 }, { "epoch": 8.68, "learning_rate": 2.2642313546423137e-05, "loss": 2.1095, "step": 28525 }, { "epoch": 8.69, "learning_rate": 2.262709284627093e-05, "loss": 2.144, "step": 28550 }, { "epoch": 8.7, "learning_rate": 2.2611872146118726e-05, "loss": 2.1777, "step": 28575 }, { "epoch": 8.71, "learning_rate": 2.259665144596652e-05, "loss": 2.1214, "step": 28600 }, { "epoch": 8.71, "learning_rate": 2.258143074581431e-05, "loss": 2.1533, "step": 28625 }, { "epoch": 8.72, "learning_rate": 2.25662100456621e-05, "loss": 2.2198, "step": 28650 }, { "epoch": 8.73, "learning_rate": 2.2550989345509894e-05, "loss": 2.1757, "step": 28675 }, { "epoch": 8.74, "learning_rate": 2.2535768645357687e-05, "loss": 1.9718, "step": 28700 }, { "epoch": 8.74, "learning_rate": 2.252054794520548e-05, "loss": 2.06, "step": 28725 }, { "epoch": 8.75, "learning_rate": 2.2505327245053272e-05, "loss": 2.113, "step": 28750 }, { "epoch": 8.76, "learning_rate": 2.249010654490107e-05, "loss": 1.9986, "step": 28775 }, { "epoch": 8.77, "learning_rate": 2.247488584474886e-05, "loss": 2.0891, "step": 28800 }, { "epoch": 8.77, "learning_rate": 2.2459665144596654e-05, "loss": 2.1643, "step": 28825 }, { "epoch": 8.78, "learning_rate": 2.2444444444444447e-05, "loss": 2.1331, "step": 28850 }, { "epoch": 8.79, "learning_rate": 2.242922374429224e-05, "loss": 2.1478, "step": 28875 }, { "epoch": 8.8, "learning_rate": 2.2414003044140033e-05, "loss": 2.1363, "step": 28900 }, { "epoch": 8.81, "learning_rate": 2.2398782343987822e-05, "loss": 2.092, "step": 28925 }, { "epoch": 8.81, "learning_rate": 2.2383561643835615e-05, "loss": 2.1163, "step": 28950 }, { "epoch": 8.82, "learning_rate": 2.2368340943683415e-05, "loss": 2.0726, "step": 28975 }, { "epoch": 8.83, "learning_rate": 2.2353120243531204e-05, "loss": 2.1177, "step": 29000 }, { "epoch": 8.84, "learning_rate": 2.2337899543378997e-05, "loss": 2.1109, "step": 29025 }, { "epoch": 8.84, "learning_rate": 2.232267884322679e-05, "loss": 1.9997, "step": 29050 }, { "epoch": 8.85, "learning_rate": 2.2307458143074583e-05, "loss": 2.1331, "step": 29075 }, { "epoch": 8.86, "learning_rate": 2.2292237442922376e-05, "loss": 2.1436, "step": 29100 }, { "epoch": 8.87, "learning_rate": 2.227701674277017e-05, "loss": 1.897, "step": 29125 }, { "epoch": 8.87, "learning_rate": 2.226179604261796e-05, "loss": 2.1544, "step": 29150 }, { "epoch": 8.88, "learning_rate": 2.2246575342465757e-05, "loss": 2.0765, "step": 29175 }, { "epoch": 8.89, "learning_rate": 2.223135464231355e-05, "loss": 2.0012, "step": 29200 }, { "epoch": 8.9, "learning_rate": 2.2216133942161343e-05, "loss": 2.1186, "step": 29225 }, { "epoch": 8.9, "learning_rate": 2.2200913242009136e-05, "loss": 2.0887, "step": 29250 }, { "epoch": 8.91, "learning_rate": 2.2185692541856925e-05, "loss": 2.109, "step": 29275 }, { "epoch": 8.92, "learning_rate": 2.2170471841704718e-05, "loss": 2.119, "step": 29300 }, { "epoch": 8.93, "learning_rate": 2.215525114155251e-05, "loss": 2.0025, "step": 29325 }, { "epoch": 8.93, "learning_rate": 2.2140030441400304e-05, "loss": 2.0606, "step": 29350 }, { "epoch": 8.94, "learning_rate": 2.21248097412481e-05, "loss": 2.1352, "step": 29375 }, { "epoch": 8.95, "learning_rate": 2.2109589041095893e-05, "loss": 2.1215, "step": 29400 }, { "epoch": 8.96, "learning_rate": 2.2094368340943686e-05, "loss": 2.0059, "step": 29425 }, { "epoch": 8.96, "learning_rate": 2.207914764079148e-05, "loss": 2.1267, "step": 29450 }, { "epoch": 8.97, "learning_rate": 2.206392694063927e-05, "loss": 2.1622, "step": 29475 }, { "epoch": 8.98, "learning_rate": 2.2048706240487064e-05, "loss": 2.1535, "step": 29500 }, { "epoch": 8.99, "learning_rate": 2.2033485540334857e-05, "loss": 2.0931, "step": 29525 }, { "epoch": 9.0, "learning_rate": 2.2018264840182647e-05, "loss": 2.0341, "step": 29550 }, { "epoch": 9.0, "learning_rate": 2.2003044140030446e-05, "loss": 2.0635, "step": 29575 }, { "epoch": 9.01, "learning_rate": 2.198782343987824e-05, "loss": 2.1759, "step": 29600 }, { "epoch": 9.02, "learning_rate": 2.197260273972603e-05, "loss": 1.9937, "step": 29625 }, { "epoch": 9.03, "learning_rate": 2.195738203957382e-05, "loss": 1.9897, "step": 29650 }, { "epoch": 9.03, "learning_rate": 2.1942161339421614e-05, "loss": 2.0304, "step": 29675 }, { "epoch": 9.04, "learning_rate": 2.1926940639269407e-05, "loss": 2.0477, "step": 29700 }, { "epoch": 9.05, "learning_rate": 2.19117199391172e-05, "loss": 1.9738, "step": 29725 }, { "epoch": 9.06, "learning_rate": 2.1896499238964993e-05, "loss": 1.9111, "step": 29750 }, { "epoch": 9.06, "learning_rate": 2.188127853881279e-05, "loss": 1.9978, "step": 29775 }, { "epoch": 9.07, "learning_rate": 2.1866057838660582e-05, "loss": 2.134, "step": 29800 }, { "epoch": 9.08, "learning_rate": 2.1850837138508375e-05, "loss": 2.059, "step": 29825 }, { "epoch": 9.09, "learning_rate": 2.1835616438356168e-05, "loss": 2.0287, "step": 29850 }, { "epoch": 9.09, "learning_rate": 2.182039573820396e-05, "loss": 2.0847, "step": 29875 }, { "epoch": 9.1, "learning_rate": 2.180517503805175e-05, "loss": 1.9959, "step": 29900 }, { "epoch": 9.11, "learning_rate": 2.1789954337899543e-05, "loss": 2.0193, "step": 29925 }, { "epoch": 9.12, "learning_rate": 2.1774733637747335e-05, "loss": 2.0612, "step": 29950 }, { "epoch": 9.12, "learning_rate": 2.1759512937595132e-05, "loss": 2.0862, "step": 29975 }, { "epoch": 9.13, "learning_rate": 2.1744292237442925e-05, "loss": 2.0891, "step": 30000 }, { "epoch": 9.14, "learning_rate": 2.1729071537290717e-05, "loss": 2.0906, "step": 30025 }, { "epoch": 9.15, "learning_rate": 2.171385083713851e-05, "loss": 1.9966, "step": 30050 }, { "epoch": 9.16, "learning_rate": 2.1698630136986303e-05, "loss": 2.0964, "step": 30075 }, { "epoch": 9.16, "learning_rate": 2.1683409436834096e-05, "loss": 2.0682, "step": 30100 }, { "epoch": 9.17, "learning_rate": 2.166818873668189e-05, "loss": 2.0812, "step": 30125 }, { "epoch": 9.18, "learning_rate": 2.165296803652968e-05, "loss": 2.0623, "step": 30150 }, { "epoch": 9.19, "learning_rate": 2.1637747336377478e-05, "loss": 2.1535, "step": 30175 }, { "epoch": 9.19, "learning_rate": 2.162252663622527e-05, "loss": 2.1764, "step": 30200 }, { "epoch": 9.2, "learning_rate": 2.1607305936073064e-05, "loss": 1.9868, "step": 30225 }, { "epoch": 9.21, "learning_rate": 2.1592085235920853e-05, "loss": 2.0232, "step": 30250 }, { "epoch": 9.22, "learning_rate": 2.1576864535768646e-05, "loss": 2.102, "step": 30275 }, { "epoch": 9.22, "learning_rate": 2.156164383561644e-05, "loss": 2.0973, "step": 30300 }, { "epoch": 9.23, "learning_rate": 2.154642313546423e-05, "loss": 2.0408, "step": 30325 }, { "epoch": 9.24, "learning_rate": 2.1531202435312024e-05, "loss": 2.1228, "step": 30350 }, { "epoch": 9.25, "learning_rate": 2.151598173515982e-05, "loss": 2.0236, "step": 30375 }, { "epoch": 9.25, "learning_rate": 2.1500761035007613e-05, "loss": 2.0122, "step": 30400 }, { "epoch": 9.26, "learning_rate": 2.1485540334855406e-05, "loss": 2.0017, "step": 30425 }, { "epoch": 9.27, "learning_rate": 2.14703196347032e-05, "loss": 2.0903, "step": 30450 }, { "epoch": 9.28, "learning_rate": 2.1455098934550992e-05, "loss": 2.0511, "step": 30475 }, { "epoch": 9.28, "learning_rate": 2.1439878234398785e-05, "loss": 2.0603, "step": 30500 }, { "epoch": 9.29, "learning_rate": 2.1424657534246574e-05, "loss": 2.0815, "step": 30525 }, { "epoch": 9.3, "learning_rate": 2.1409436834094367e-05, "loss": 2.0313, "step": 30550 }, { "epoch": 9.31, "learning_rate": 2.1394216133942167e-05, "loss": 1.9726, "step": 30575 }, { "epoch": 9.32, "learning_rate": 2.1378995433789956e-05, "loss": 2.0626, "step": 30600 }, { "epoch": 9.32, "learning_rate": 2.136377473363775e-05, "loss": 2.1151, "step": 30625 }, { "epoch": 9.33, "learning_rate": 2.1348554033485542e-05, "loss": 2.0992, "step": 30650 }, { "epoch": 9.34, "learning_rate": 2.1333333333333335e-05, "loss": 2.0547, "step": 30675 }, { "epoch": 9.35, "learning_rate": 2.1318112633181127e-05, "loss": 2.0273, "step": 30700 }, { "epoch": 9.35, "learning_rate": 2.130289193302892e-05, "loss": 2.0666, "step": 30725 }, { "epoch": 9.36, "learning_rate": 2.1287671232876713e-05, "loss": 2.1278, "step": 30750 }, { "epoch": 9.37, "learning_rate": 2.127245053272451e-05, "loss": 2.08, "step": 30775 }, { "epoch": 9.38, "learning_rate": 2.1257229832572302e-05, "loss": 2.1754, "step": 30800 }, { "epoch": 9.38, "learning_rate": 2.1242009132420095e-05, "loss": 2.0204, "step": 30825 }, { "epoch": 9.39, "learning_rate": 2.1226788432267888e-05, "loss": 2.0768, "step": 30850 }, { "epoch": 9.4, "learning_rate": 2.1211567732115677e-05, "loss": 2.1745, "step": 30875 }, { "epoch": 9.41, "learning_rate": 2.119634703196347e-05, "loss": 2.0624, "step": 30900 }, { "epoch": 9.41, "learning_rate": 2.1181126331811263e-05, "loss": 2.0544, "step": 30925 }, { "epoch": 9.42, "learning_rate": 2.1165905631659056e-05, "loss": 1.9643, "step": 30950 }, { "epoch": 9.43, "learning_rate": 2.1150684931506852e-05, "loss": 2.0692, "step": 30975 }, { "epoch": 9.44, "learning_rate": 2.1135464231354645e-05, "loss": 2.0349, "step": 31000 }, { "epoch": 9.44, "learning_rate": 2.1120243531202438e-05, "loss": 2.0648, "step": 31025 }, { "epoch": 9.45, "learning_rate": 2.110502283105023e-05, "loss": 2.0094, "step": 31050 }, { "epoch": 9.46, "learning_rate": 2.1089802130898023e-05, "loss": 2.118, "step": 31075 }, { "epoch": 9.47, "learning_rate": 2.1074581430745816e-05, "loss": 2.0851, "step": 31100 }, { "epoch": 9.47, "learning_rate": 2.105936073059361e-05, "loss": 2.0231, "step": 31125 }, { "epoch": 9.48, "learning_rate": 2.10441400304414e-05, "loss": 2.1099, "step": 31150 }, { "epoch": 9.49, "learning_rate": 2.1028919330289198e-05, "loss": 2.1271, "step": 31175 }, { "epoch": 9.5, "learning_rate": 2.101369863013699e-05, "loss": 2.0688, "step": 31200 }, { "epoch": 9.51, "learning_rate": 2.099847792998478e-05, "loss": 2.0647, "step": 31225 }, { "epoch": 9.51, "learning_rate": 2.0983257229832573e-05, "loss": 2.1555, "step": 31250 }, { "epoch": 9.52, "learning_rate": 2.0968036529680366e-05, "loss": 2.0468, "step": 31275 }, { "epoch": 9.53, "learning_rate": 2.095281582952816e-05, "loss": 2.1865, "step": 31300 }, { "epoch": 9.54, "learning_rate": 2.0937595129375952e-05, "loss": 2.1422, "step": 31325 }, { "epoch": 9.54, "learning_rate": 2.0922374429223745e-05, "loss": 2.1395, "step": 31350 }, { "epoch": 9.55, "learning_rate": 2.090715372907154e-05, "loss": 2.09, "step": 31375 }, { "epoch": 9.56, "learning_rate": 2.0891933028919334e-05, "loss": 2.0477, "step": 31400 }, { "epoch": 9.57, "learning_rate": 2.0876712328767127e-05, "loss": 2.0753, "step": 31425 }, { "epoch": 9.57, "learning_rate": 2.086149162861492e-05, "loss": 2.0948, "step": 31450 }, { "epoch": 9.58, "learning_rate": 2.0846270928462712e-05, "loss": 2.0595, "step": 31475 }, { "epoch": 9.59, "learning_rate": 2.0831050228310502e-05, "loss": 2.01, "step": 31500 }, { "epoch": 9.6, "learning_rate": 2.0815829528158295e-05, "loss": 2.1305, "step": 31525 }, { "epoch": 9.6, "learning_rate": 2.0800608828006087e-05, "loss": 2.1159, "step": 31550 }, { "epoch": 9.61, "learning_rate": 2.0785388127853884e-05, "loss": 2.0244, "step": 31575 }, { "epoch": 9.62, "learning_rate": 2.0770167427701676e-05, "loss": 1.9978, "step": 31600 }, { "epoch": 9.63, "learning_rate": 2.075494672754947e-05, "loss": 2.0019, "step": 31625 }, { "epoch": 9.63, "learning_rate": 2.0739726027397262e-05, "loss": 2.1559, "step": 31650 }, { "epoch": 9.64, "learning_rate": 2.0724505327245055e-05, "loss": 2.0498, "step": 31675 }, { "epoch": 9.65, "learning_rate": 2.0709284627092848e-05, "loss": 2.0949, "step": 31700 }, { "epoch": 9.66, "learning_rate": 2.069406392694064e-05, "loss": 2.0069, "step": 31725 }, { "epoch": 9.67, "learning_rate": 2.0678843226788434e-05, "loss": 2.1079, "step": 31750 }, { "epoch": 9.67, "learning_rate": 2.066362252663623e-05, "loss": 2.0984, "step": 31775 }, { "epoch": 9.68, "learning_rate": 2.0648401826484023e-05, "loss": 2.0458, "step": 31800 }, { "epoch": 9.69, "learning_rate": 2.0633181126331815e-05, "loss": 2.0529, "step": 31825 }, { "epoch": 9.7, "learning_rate": 2.0617960426179605e-05, "loss": 2.1335, "step": 31850 }, { "epoch": 9.7, "learning_rate": 2.0602739726027398e-05, "loss": 2.0454, "step": 31875 }, { "epoch": 9.71, "learning_rate": 2.058751902587519e-05, "loss": 2.0767, "step": 31900 }, { "epoch": 9.72, "learning_rate": 2.0572298325722983e-05, "loss": 2.0955, "step": 31925 }, { "epoch": 9.73, "learning_rate": 2.0557077625570776e-05, "loss": 2.0321, "step": 31950 }, { "epoch": 9.73, "learning_rate": 2.0541856925418572e-05, "loss": 1.9845, "step": 31975 }, { "epoch": 9.74, "learning_rate": 2.0526636225266365e-05, "loss": 1.9996, "step": 32000 }, { "epoch": 9.75, "learning_rate": 2.0511415525114158e-05, "loss": 2.1175, "step": 32025 }, { "epoch": 9.76, "learning_rate": 2.049619482496195e-05, "loss": 2.1869, "step": 32050 }, { "epoch": 9.76, "learning_rate": 2.0480974124809744e-05, "loss": 2.0169, "step": 32075 }, { "epoch": 9.77, "learning_rate": 2.0465753424657537e-05, "loss": 2.0489, "step": 32100 }, { "epoch": 9.78, "learning_rate": 2.0450532724505326e-05, "loss": 2.1809, "step": 32125 }, { "epoch": 9.79, "learning_rate": 2.043531202435312e-05, "loss": 2.1068, "step": 32150 }, { "epoch": 9.79, "learning_rate": 2.042009132420092e-05, "loss": 2.1649, "step": 32175 }, { "epoch": 9.8, "learning_rate": 2.0404870624048708e-05, "loss": 2.1867, "step": 32200 }, { "epoch": 9.81, "learning_rate": 2.03896499238965e-05, "loss": 2.1665, "step": 32225 }, { "epoch": 9.82, "learning_rate": 2.0374429223744294e-05, "loss": 2.1239, "step": 32250 }, { "epoch": 9.82, "learning_rate": 2.0359208523592087e-05, "loss": 2.0871, "step": 32275 }, { "epoch": 9.83, "learning_rate": 2.034398782343988e-05, "loss": 2.0232, "step": 32300 }, { "epoch": 9.84, "learning_rate": 2.0328767123287672e-05, "loss": 2.028, "step": 32325 }, { "epoch": 9.85, "learning_rate": 2.0313546423135465e-05, "loss": 2.0638, "step": 32350 }, { "epoch": 9.86, "learning_rate": 2.029832572298326e-05, "loss": 2.0474, "step": 32375 }, { "epoch": 9.86, "learning_rate": 2.0283105022831054e-05, "loss": 2.0768, "step": 32400 }, { "epoch": 9.87, "learning_rate": 2.0267884322678847e-05, "loss": 2.0103, "step": 32425 }, { "epoch": 9.88, "learning_rate": 2.025266362252664e-05, "loss": 2.0039, "step": 32450 }, { "epoch": 9.89, "learning_rate": 2.023744292237443e-05, "loss": 2.0592, "step": 32475 }, { "epoch": 9.89, "learning_rate": 2.0222222222222222e-05, "loss": 1.9841, "step": 32500 }, { "epoch": 9.9, "learning_rate": 2.0207001522070015e-05, "loss": 2.0784, "step": 32525 }, { "epoch": 9.91, "learning_rate": 2.0191780821917808e-05, "loss": 2.1935, "step": 32550 }, { "epoch": 9.92, "learning_rate": 2.0176560121765604e-05, "loss": 2.0357, "step": 32575 }, { "epoch": 9.92, "learning_rate": 2.0161339421613397e-05, "loss": 2.1061, "step": 32600 }, { "epoch": 9.93, "learning_rate": 2.014611872146119e-05, "loss": 2.1107, "step": 32625 }, { "epoch": 9.94, "learning_rate": 2.0130898021308983e-05, "loss": 2.1319, "step": 32650 }, { "epoch": 9.95, "learning_rate": 2.0115677321156775e-05, "loss": 2.059, "step": 32675 }, { "epoch": 9.95, "learning_rate": 2.0100456621004568e-05, "loss": 2.1054, "step": 32700 }, { "epoch": 9.96, "learning_rate": 2.008523592085236e-05, "loss": 2.1039, "step": 32725 }, { "epoch": 9.97, "learning_rate": 2.007001522070015e-05, "loss": 2.0244, "step": 32750 }, { "epoch": 9.98, "learning_rate": 2.005479452054795e-05, "loss": 2.0507, "step": 32775 }, { "epoch": 9.98, "learning_rate": 2.0039573820395743e-05, "loss": 2.0643, "step": 32800 }, { "epoch": 9.99, "learning_rate": 2.0024353120243532e-05, "loss": 2.0597, "step": 32825 }, { "epoch": 10.0, "learning_rate": 2.0009132420091325e-05, "loss": 2.0781, "step": 32850 }, { "epoch": 10.01, "learning_rate": 1.9993911719939118e-05, "loss": 2.039, "step": 32875 }, { "epoch": 10.02, "learning_rate": 1.997869101978691e-05, "loss": 2.0825, "step": 32900 }, { "epoch": 10.02, "learning_rate": 1.9963470319634707e-05, "loss": 2.0215, "step": 32925 }, { "epoch": 10.03, "learning_rate": 1.9948249619482497e-05, "loss": 1.9872, "step": 32950 }, { "epoch": 10.04, "learning_rate": 1.993302891933029e-05, "loss": 1.9938, "step": 32975 }, { "epoch": 10.05, "learning_rate": 1.9917808219178082e-05, "loss": 2.0155, "step": 33000 }, { "epoch": 10.05, "learning_rate": 1.990258751902588e-05, "loss": 1.9574, "step": 33025 }, { "epoch": 10.06, "learning_rate": 1.988736681887367e-05, "loss": 1.9415, "step": 33050 }, { "epoch": 10.07, "learning_rate": 1.9872146118721464e-05, "loss": 2.1311, "step": 33075 }, { "epoch": 10.08, "learning_rate": 1.9856925418569254e-05, "loss": 2.0027, "step": 33100 }, { "epoch": 10.08, "learning_rate": 1.984170471841705e-05, "loss": 2.0285, "step": 33125 }, { "epoch": 10.09, "learning_rate": 1.9826484018264843e-05, "loss": 2.0183, "step": 33150 }, { "epoch": 10.1, "learning_rate": 1.9811263318112636e-05, "loss": 2.0641, "step": 33175 }, { "epoch": 10.11, "learning_rate": 1.979604261796043e-05, "loss": 2.0903, "step": 33200 }, { "epoch": 10.11, "learning_rate": 1.978082191780822e-05, "loss": 1.9673, "step": 33225 }, { "epoch": 10.12, "learning_rate": 1.9765601217656014e-05, "loss": 2.0883, "step": 33250 }, { "epoch": 10.13, "learning_rate": 1.9750380517503807e-05, "loss": 2.0343, "step": 33275 }, { "epoch": 10.14, "learning_rate": 1.97351598173516e-05, "loss": 2.0388, "step": 33300 }, { "epoch": 10.14, "learning_rate": 1.9719939117199393e-05, "loss": 1.9403, "step": 33325 }, { "epoch": 10.15, "learning_rate": 1.9704718417047185e-05, "loss": 2.1293, "step": 33350 }, { "epoch": 10.16, "learning_rate": 1.9689497716894978e-05, "loss": 1.9008, "step": 33375 }, { "epoch": 10.17, "learning_rate": 1.967427701674277e-05, "loss": 2.0903, "step": 33400 }, { "epoch": 10.18, "learning_rate": 1.9659056316590567e-05, "loss": 2.1492, "step": 33425 }, { "epoch": 10.18, "learning_rate": 1.9643835616438357e-05, "loss": 2.0181, "step": 33450 }, { "epoch": 10.19, "learning_rate": 1.962861491628615e-05, "loss": 2.1131, "step": 33475 }, { "epoch": 10.2, "learning_rate": 1.9613394216133942e-05, "loss": 1.9928, "step": 33500 }, { "epoch": 10.21, "learning_rate": 1.959817351598174e-05, "loss": 2.091, "step": 33525 }, { "epoch": 10.21, "learning_rate": 1.958295281582953e-05, "loss": 2.0378, "step": 33550 }, { "epoch": 10.22, "learning_rate": 1.956773211567732e-05, "loss": 2.0189, "step": 33575 }, { "epoch": 10.23, "learning_rate": 1.9552511415525114e-05, "loss": 2.1016, "step": 33600 }, { "epoch": 10.24, "learning_rate": 1.953729071537291e-05, "loss": 2.0928, "step": 33625 }, { "epoch": 10.24, "learning_rate": 1.9522070015220703e-05, "loss": 2.1194, "step": 33650 }, { "epoch": 10.25, "learning_rate": 1.9506849315068496e-05, "loss": 2.0628, "step": 33675 }, { "epoch": 10.26, "learning_rate": 1.949162861491629e-05, "loss": 2.0574, "step": 33700 }, { "epoch": 10.27, "learning_rate": 1.947640791476408e-05, "loss": 1.9877, "step": 33725 }, { "epoch": 10.27, "learning_rate": 1.9461187214611874e-05, "loss": 1.959, "step": 33750 }, { "epoch": 10.28, "learning_rate": 1.9445966514459667e-05, "loss": 2.0333, "step": 33775 }, { "epoch": 10.29, "learning_rate": 1.943074581430746e-05, "loss": 2.0887, "step": 33800 }, { "epoch": 10.3, "learning_rate": 1.9415525114155253e-05, "loss": 2.0249, "step": 33825 }, { "epoch": 10.3, "learning_rate": 1.9400304414003046e-05, "loss": 2.0648, "step": 33850 }, { "epoch": 10.31, "learning_rate": 1.938508371385084e-05, "loss": 2.1138, "step": 33875 }, { "epoch": 10.32, "learning_rate": 1.936986301369863e-05, "loss": 2.0822, "step": 33900 }, { "epoch": 10.33, "learning_rate": 1.9354642313546424e-05, "loss": 2.1693, "step": 33925 }, { "epoch": 10.33, "learning_rate": 1.9339421613394217e-05, "loss": 2.0125, "step": 33950 }, { "epoch": 10.34, "learning_rate": 1.932420091324201e-05, "loss": 2.0564, "step": 33975 }, { "epoch": 10.35, "learning_rate": 1.9308980213089803e-05, "loss": 2.1638, "step": 34000 }, { "epoch": 10.36, "learning_rate": 1.92937595129376e-05, "loss": 2.0393, "step": 34025 }, { "epoch": 10.37, "learning_rate": 1.9278538812785392e-05, "loss": 2.0248, "step": 34050 }, { "epoch": 10.37, "learning_rate": 1.926331811263318e-05, "loss": 1.9676, "step": 34075 }, { "epoch": 10.38, "learning_rate": 1.9248097412480974e-05, "loss": 2.0428, "step": 34100 }, { "epoch": 10.39, "learning_rate": 1.923287671232877e-05, "loss": 2.0268, "step": 34125 }, { "epoch": 10.4, "learning_rate": 1.9217656012176563e-05, "loss": 2.1396, "step": 34150 }, { "epoch": 10.4, "learning_rate": 1.9202435312024356e-05, "loss": 2.0436, "step": 34175 }, { "epoch": 10.41, "learning_rate": 1.9187214611872145e-05, "loss": 2.1294, "step": 34200 }, { "epoch": 10.42, "learning_rate": 1.917199391171994e-05, "loss": 2.0051, "step": 34225 }, { "epoch": 10.43, "learning_rate": 1.9156773211567734e-05, "loss": 2.0874, "step": 34250 }, { "epoch": 10.43, "learning_rate": 1.9141552511415527e-05, "loss": 2.0328, "step": 34275 }, { "epoch": 10.44, "learning_rate": 1.912633181126332e-05, "loss": 2.0851, "step": 34300 }, { "epoch": 10.45, "learning_rate": 1.9111111111111113e-05, "loss": 2.051, "step": 34325 }, { "epoch": 10.46, "learning_rate": 1.9095890410958906e-05, "loss": 2.1141, "step": 34350 }, { "epoch": 10.46, "learning_rate": 1.90806697108067e-05, "loss": 2.0351, "step": 34375 }, { "epoch": 10.47, "learning_rate": 1.906544901065449e-05, "loss": 1.9976, "step": 34400 }, { "epoch": 10.48, "learning_rate": 1.9050228310502284e-05, "loss": 2.0309, "step": 34425 }, { "epoch": 10.49, "learning_rate": 1.9035007610350077e-05, "loss": 2.0315, "step": 34450 }, { "epoch": 10.49, "learning_rate": 1.901978691019787e-05, "loss": 2.1068, "step": 34475 }, { "epoch": 10.5, "learning_rate": 1.9004566210045663e-05, "loss": 1.9746, "step": 34500 }, { "epoch": 10.51, "learning_rate": 1.898934550989346e-05, "loss": 2.0455, "step": 34525 }, { "epoch": 10.52, "learning_rate": 1.897412480974125e-05, "loss": 2.1544, "step": 34550 }, { "epoch": 10.53, "learning_rate": 1.895890410958904e-05, "loss": 2.0103, "step": 34575 }, { "epoch": 10.53, "learning_rate": 1.8943683409436834e-05, "loss": 2.0323, "step": 34600 }, { "epoch": 10.54, "learning_rate": 1.892846270928463e-05, "loss": 2.138, "step": 34625 }, { "epoch": 10.55, "learning_rate": 1.8913242009132423e-05, "loss": 2.0743, "step": 34650 }, { "epoch": 10.56, "learning_rate": 1.8898021308980216e-05, "loss": 2.097, "step": 34675 }, { "epoch": 10.56, "learning_rate": 1.8882800608828006e-05, "loss": 2.1001, "step": 34700 }, { "epoch": 10.57, "learning_rate": 1.8867579908675802e-05, "loss": 2.1197, "step": 34725 }, { "epoch": 10.58, "learning_rate": 1.8852359208523595e-05, "loss": 2.0936, "step": 34750 }, { "epoch": 10.59, "learning_rate": 1.8837138508371387e-05, "loss": 2.0947, "step": 34775 }, { "epoch": 10.59, "learning_rate": 1.882191780821918e-05, "loss": 1.9545, "step": 34800 }, { "epoch": 10.6, "learning_rate": 1.8806697108066973e-05, "loss": 1.9313, "step": 34825 }, { "epoch": 10.61, "learning_rate": 1.8791476407914766e-05, "loss": 1.9543, "step": 34850 }, { "epoch": 10.62, "learning_rate": 1.877625570776256e-05, "loss": 1.8876, "step": 34875 }, { "epoch": 10.62, "learning_rate": 1.876103500761035e-05, "loss": 2.0014, "step": 34900 }, { "epoch": 10.63, "learning_rate": 1.8745814307458145e-05, "loss": 2.0244, "step": 34925 }, { "epoch": 10.64, "learning_rate": 1.8730593607305937e-05, "loss": 2.033, "step": 34950 }, { "epoch": 10.65, "learning_rate": 1.871537290715373e-05, "loss": 2.0249, "step": 34975 }, { "epoch": 10.65, "learning_rate": 1.8700761035007613e-05, "loss": 2.052, "step": 35000 }, { "epoch": 10.66, "learning_rate": 1.8685540334855406e-05, "loss": 1.9771, "step": 35025 }, { "epoch": 10.67, "learning_rate": 1.86703196347032e-05, "loss": 2.036, "step": 35050 }, { "epoch": 10.68, "learning_rate": 1.865509893455099e-05, "loss": 2.0739, "step": 35075 }, { "epoch": 10.68, "learning_rate": 1.8639878234398784e-05, "loss": 1.9556, "step": 35100 }, { "epoch": 10.69, "learning_rate": 1.8624657534246577e-05, "loss": 2.0215, "step": 35125 }, { "epoch": 10.7, "learning_rate": 1.860943683409437e-05, "loss": 1.9275, "step": 35150 }, { "epoch": 10.71, "learning_rate": 1.8594216133942163e-05, "loss": 2.0273, "step": 35175 }, { "epoch": 10.72, "learning_rate": 1.8578995433789956e-05, "loss": 2.0919, "step": 35200 }, { "epoch": 10.72, "learning_rate": 1.856377473363775e-05, "loss": 2.0538, "step": 35225 }, { "epoch": 10.73, "learning_rate": 1.854855403348554e-05, "loss": 2.0061, "step": 35250 }, { "epoch": 10.74, "learning_rate": 1.8533333333333334e-05, "loss": 2.0814, "step": 35275 }, { "epoch": 10.75, "learning_rate": 1.851811263318113e-05, "loss": 2.1047, "step": 35300 }, { "epoch": 10.75, "learning_rate": 1.850289193302892e-05, "loss": 1.9969, "step": 35325 }, { "epoch": 10.76, "learning_rate": 1.8487671232876713e-05, "loss": 2.0814, "step": 35350 }, { "epoch": 10.77, "learning_rate": 1.8472450532724506e-05, "loss": 1.9949, "step": 35375 }, { "epoch": 10.78, "learning_rate": 1.8457229832572302e-05, "loss": 2.037, "step": 35400 }, { "epoch": 10.78, "learning_rate": 1.8442009132420095e-05, "loss": 2.0312, "step": 35425 }, { "epoch": 10.79, "learning_rate": 1.8426788432267884e-05, "loss": 1.987, "step": 35450 }, { "epoch": 10.8, "learning_rate": 1.8411567732115677e-05, "loss": 2.1242, "step": 35475 }, { "epoch": 10.81, "learning_rate": 1.839634703196347e-05, "loss": 2.0262, "step": 35500 }, { "epoch": 10.81, "learning_rate": 1.8381126331811266e-05, "loss": 2.0393, "step": 35525 }, { "epoch": 10.82, "learning_rate": 1.836590563165906e-05, "loss": 2.0485, "step": 35550 }, { "epoch": 10.83, "learning_rate": 1.8350684931506852e-05, "loss": 2.0249, "step": 35575 }, { "epoch": 10.84, "learning_rate": 1.833546423135464e-05, "loss": 1.9595, "step": 35600 }, { "epoch": 10.84, "learning_rate": 1.8320243531202437e-05, "loss": 2.0546, "step": 35625 }, { "epoch": 10.85, "learning_rate": 1.830502283105023e-05, "loss": 2.0052, "step": 35650 }, { "epoch": 10.86, "learning_rate": 1.8289802130898023e-05, "loss": 2.0484, "step": 35675 }, { "epoch": 10.87, "learning_rate": 1.8274581430745816e-05, "loss": 2.0976, "step": 35700 }, { "epoch": 10.88, "learning_rate": 1.825936073059361e-05, "loss": 2.0822, "step": 35725 }, { "epoch": 10.88, "learning_rate": 1.82441400304414e-05, "loss": 2.0432, "step": 35750 }, { "epoch": 10.89, "learning_rate": 1.8228919330289195e-05, "loss": 1.9908, "step": 35775 }, { "epoch": 10.9, "learning_rate": 1.8213698630136987e-05, "loss": 2.1414, "step": 35800 }, { "epoch": 10.91, "learning_rate": 1.819847792998478e-05, "loss": 1.9891, "step": 35825 }, { "epoch": 10.91, "learning_rate": 1.8183257229832573e-05, "loss": 2.054, "step": 35850 }, { "epoch": 10.92, "learning_rate": 1.8168036529680366e-05, "loss": 2.1285, "step": 35875 }, { "epoch": 10.93, "learning_rate": 1.815281582952816e-05, "loss": 2.0289, "step": 35900 }, { "epoch": 10.94, "learning_rate": 1.8137595129375955e-05, "loss": 2.0027, "step": 35925 }, { "epoch": 10.94, "learning_rate": 1.8122374429223744e-05, "loss": 2.0613, "step": 35950 }, { "epoch": 10.95, "learning_rate": 1.8107153729071537e-05, "loss": 2.0867, "step": 35975 }, { "epoch": 10.96, "learning_rate": 1.809193302891933e-05, "loss": 2.0042, "step": 36000 }, { "epoch": 10.97, "learning_rate": 1.8076712328767126e-05, "loss": 1.9788, "step": 36025 }, { "epoch": 10.97, "learning_rate": 1.806149162861492e-05, "loss": 2.1272, "step": 36050 }, { "epoch": 10.98, "learning_rate": 1.804627092846271e-05, "loss": 2.1209, "step": 36075 }, { "epoch": 10.99, "learning_rate": 1.80310502283105e-05, "loss": 2.0478, "step": 36100 }, { "epoch": 11.0, "learning_rate": 1.8015829528158298e-05, "loss": 2.0026, "step": 36125 }, { "epoch": 11.0, "learning_rate": 1.800060882800609e-05, "loss": 1.9632, "step": 36150 }, { "epoch": 11.01, "learning_rate": 1.7985388127853883e-05, "loss": 1.9789, "step": 36175 }, { "epoch": 11.02, "learning_rate": 1.7970167427701676e-05, "loss": 2.006, "step": 36200 }, { "epoch": 11.03, "learning_rate": 1.795494672754947e-05, "loss": 1.9146, "step": 36225 }, { "epoch": 11.04, "learning_rate": 1.7939726027397262e-05, "loss": 2.0145, "step": 36250 }, { "epoch": 11.04, "learning_rate": 1.7924505327245055e-05, "loss": 1.9779, "step": 36275 }, { "epoch": 11.05, "learning_rate": 1.7909284627092848e-05, "loss": 2.0325, "step": 36300 }, { "epoch": 11.06, "learning_rate": 1.789406392694064e-05, "loss": 2.1302, "step": 36325 }, { "epoch": 11.07, "learning_rate": 1.7878843226788433e-05, "loss": 2.035, "step": 36350 }, { "epoch": 11.07, "learning_rate": 1.7863622526636226e-05, "loss": 2.0235, "step": 36375 }, { "epoch": 11.08, "learning_rate": 1.784840182648402e-05, "loss": 2.0755, "step": 36400 }, { "epoch": 11.09, "learning_rate": 1.7833181126331812e-05, "loss": 2.1273, "step": 36425 }, { "epoch": 11.1, "learning_rate": 1.7817960426179605e-05, "loss": 1.8809, "step": 36450 }, { "epoch": 11.1, "learning_rate": 1.7802739726027397e-05, "loss": 2.0696, "step": 36475 }, { "epoch": 11.11, "learning_rate": 1.778751902587519e-05, "loss": 2.0482, "step": 36500 }, { "epoch": 11.12, "learning_rate": 1.7772298325722986e-05, "loss": 1.9742, "step": 36525 }, { "epoch": 11.13, "learning_rate": 1.775707762557078e-05, "loss": 2.0024, "step": 36550 }, { "epoch": 11.13, "learning_rate": 1.774185692541857e-05, "loss": 2.13, "step": 36575 }, { "epoch": 11.14, "learning_rate": 1.772663622526636e-05, "loss": 2.055, "step": 36600 }, { "epoch": 11.15, "learning_rate": 1.7711415525114158e-05, "loss": 2.0081, "step": 36625 }, { "epoch": 11.16, "learning_rate": 1.769619482496195e-05, "loss": 2.083, "step": 36650 }, { "epoch": 11.16, "learning_rate": 1.7680974124809744e-05, "loss": 1.9954, "step": 36675 }, { "epoch": 11.17, "learning_rate": 1.7665753424657533e-05, "loss": 1.9231, "step": 36700 }, { "epoch": 11.18, "learning_rate": 1.765053272450533e-05, "loss": 2.0633, "step": 36725 }, { "epoch": 11.19, "learning_rate": 1.7635312024353122e-05, "loss": 2.0269, "step": 36750 }, { "epoch": 11.19, "learning_rate": 1.7620091324200915e-05, "loss": 2.0619, "step": 36775 }, { "epoch": 11.2, "learning_rate": 1.7604870624048708e-05, "loss": 2.1641, "step": 36800 }, { "epoch": 11.21, "learning_rate": 1.75896499238965e-05, "loss": 2.0819, "step": 36825 }, { "epoch": 11.22, "learning_rate": 1.7574429223744293e-05, "loss": 2.0132, "step": 36850 }, { "epoch": 11.23, "learning_rate": 1.7559208523592086e-05, "loss": 2.0833, "step": 36875 }, { "epoch": 11.23, "learning_rate": 1.754398782343988e-05, "loss": 1.9813, "step": 36900 }, { "epoch": 11.24, "learning_rate": 1.7528767123287672e-05, "loss": 1.9838, "step": 36925 }, { "epoch": 11.25, "learning_rate": 1.7513546423135465e-05, "loss": 1.9377, "step": 36950 }, { "epoch": 11.26, "learning_rate": 1.7498325722983258e-05, "loss": 2.0033, "step": 36975 }, { "epoch": 11.26, "learning_rate": 1.748310502283105e-05, "loss": 2.0323, "step": 37000 }, { "epoch": 11.27, "learning_rate": 1.7467884322678847e-05, "loss": 1.8743, "step": 37025 }, { "epoch": 11.28, "learning_rate": 1.7452663622526636e-05, "loss": 2.021, "step": 37050 }, { "epoch": 11.29, "learning_rate": 1.743744292237443e-05, "loss": 1.995, "step": 37075 }, { "epoch": 11.29, "learning_rate": 1.7422222222222222e-05, "loss": 2.0027, "step": 37100 }, { "epoch": 11.3, "learning_rate": 1.7407001522070018e-05, "loss": 1.9754, "step": 37125 }, { "epoch": 11.31, "learning_rate": 1.739178082191781e-05, "loss": 2.042, "step": 37150 }, { "epoch": 11.32, "learning_rate": 1.7376560121765604e-05, "loss": 1.9945, "step": 37175 }, { "epoch": 11.32, "learning_rate": 1.7361339421613393e-05, "loss": 1.9659, "step": 37200 }, { "epoch": 11.33, "learning_rate": 1.734611872146119e-05, "loss": 2.083, "step": 37225 }, { "epoch": 11.34, "learning_rate": 1.7330898021308982e-05, "loss": 1.9871, "step": 37250 }, { "epoch": 11.35, "learning_rate": 1.7315677321156775e-05, "loss": 1.9931, "step": 37275 }, { "epoch": 11.35, "learning_rate": 1.7300456621004568e-05, "loss": 2.0554, "step": 37300 }, { "epoch": 11.36, "learning_rate": 1.728523592085236e-05, "loss": 2.118, "step": 37325 }, { "epoch": 11.37, "learning_rate": 1.7270015220700154e-05, "loss": 2.0515, "step": 37350 }, { "epoch": 11.38, "learning_rate": 1.7254794520547946e-05, "loss": 2.0505, "step": 37375 }, { "epoch": 11.39, "learning_rate": 1.723957382039574e-05, "loss": 2.1095, "step": 37400 }, { "epoch": 11.39, "learning_rate": 1.7224353120243532e-05, "loss": 2.0882, "step": 37425 }, { "epoch": 11.4, "learning_rate": 1.7209132420091325e-05, "loss": 1.9593, "step": 37450 }, { "epoch": 11.41, "learning_rate": 1.7193911719939118e-05, "loss": 2.0295, "step": 37475 }, { "epoch": 11.42, "learning_rate": 1.717869101978691e-05, "loss": 2.0128, "step": 37500 }, { "epoch": 11.42, "learning_rate": 1.7163470319634707e-05, "loss": 2.0363, "step": 37525 }, { "epoch": 11.43, "learning_rate": 1.7148249619482496e-05, "loss": 2.0494, "step": 37550 }, { "epoch": 11.44, "learning_rate": 1.713302891933029e-05, "loss": 2.0278, "step": 37575 }, { "epoch": 11.45, "learning_rate": 1.7117808219178082e-05, "loss": 1.9859, "step": 37600 }, { "epoch": 11.45, "learning_rate": 1.7102587519025878e-05, "loss": 2.0568, "step": 37625 }, { "epoch": 11.46, "learning_rate": 1.708736681887367e-05, "loss": 1.9878, "step": 37650 }, { "epoch": 11.47, "learning_rate": 1.707214611872146e-05, "loss": 2.0664, "step": 37675 }, { "epoch": 11.48, "learning_rate": 1.7056925418569253e-05, "loss": 1.9954, "step": 37700 }, { "epoch": 11.48, "learning_rate": 1.704170471841705e-05, "loss": 1.9442, "step": 37725 }, { "epoch": 11.49, "learning_rate": 1.7026484018264842e-05, "loss": 1.9689, "step": 37750 }, { "epoch": 11.5, "learning_rate": 1.7011263318112635e-05, "loss": 1.9743, "step": 37775 }, { "epoch": 11.51, "learning_rate": 1.6996042617960428e-05, "loss": 2.0726, "step": 37800 }, { "epoch": 11.51, "learning_rate": 1.698082191780822e-05, "loss": 1.9607, "step": 37825 }, { "epoch": 11.52, "learning_rate": 1.6965601217656014e-05, "loss": 2.0583, "step": 37850 }, { "epoch": 11.53, "learning_rate": 1.6950380517503807e-05, "loss": 2.0146, "step": 37875 }, { "epoch": 11.54, "learning_rate": 1.69351598173516e-05, "loss": 2.01, "step": 37900 }, { "epoch": 11.54, "learning_rate": 1.6919939117199392e-05, "loss": 2.0699, "step": 37925 }, { "epoch": 11.55, "learning_rate": 1.6904718417047185e-05, "loss": 2.0734, "step": 37950 }, { "epoch": 11.56, "learning_rate": 1.6889497716894978e-05, "loss": 2.0048, "step": 37975 }, { "epoch": 11.57, "learning_rate": 1.687427701674277e-05, "loss": 1.9044, "step": 38000 }, { "epoch": 11.58, "learning_rate": 1.6859056316590564e-05, "loss": 2.0226, "step": 38025 }, { "epoch": 11.58, "learning_rate": 1.6843835616438357e-05, "loss": 1.9519, "step": 38050 }, { "epoch": 11.59, "learning_rate": 1.682861491628615e-05, "loss": 2.03, "step": 38075 }, { "epoch": 11.6, "learning_rate": 1.6813394216133942e-05, "loss": 2.0279, "step": 38100 }, { "epoch": 11.61, "learning_rate": 1.679817351598174e-05, "loss": 1.9242, "step": 38125 }, { "epoch": 11.61, "learning_rate": 1.678295281582953e-05, "loss": 2.0287, "step": 38150 }, { "epoch": 11.62, "learning_rate": 1.676773211567732e-05, "loss": 1.9837, "step": 38175 }, { "epoch": 11.63, "learning_rate": 1.6752511415525114e-05, "loss": 2.1662, "step": 38200 }, { "epoch": 11.64, "learning_rate": 1.673729071537291e-05, "loss": 2.0756, "step": 38225 }, { "epoch": 11.64, "learning_rate": 1.6722070015220703e-05, "loss": 2.0991, "step": 38250 }, { "epoch": 11.65, "learning_rate": 1.6706849315068495e-05, "loss": 2.0585, "step": 38275 }, { "epoch": 11.66, "learning_rate": 1.6691628614916285e-05, "loss": 2.0222, "step": 38300 }, { "epoch": 11.67, "learning_rate": 1.667640791476408e-05, "loss": 2.1262, "step": 38325 }, { "epoch": 11.67, "learning_rate": 1.6661187214611874e-05, "loss": 2.0557, "step": 38350 }, { "epoch": 11.68, "learning_rate": 1.6645966514459667e-05, "loss": 2.0179, "step": 38375 }, { "epoch": 11.69, "learning_rate": 1.663074581430746e-05, "loss": 2.0048, "step": 38400 }, { "epoch": 11.7, "learning_rate": 1.6615525114155252e-05, "loss": 1.9947, "step": 38425 }, { "epoch": 11.7, "learning_rate": 1.6600304414003045e-05, "loss": 1.9683, "step": 38450 }, { "epoch": 11.71, "learning_rate": 1.6585083713850838e-05, "loss": 1.9387, "step": 38475 }, { "epoch": 11.72, "learning_rate": 1.656986301369863e-05, "loss": 2.0681, "step": 38500 }, { "epoch": 11.73, "learning_rate": 1.6554642313546424e-05, "loss": 2.0681, "step": 38525 }, { "epoch": 11.74, "learning_rate": 1.6539421613394217e-05, "loss": 2.0351, "step": 38550 }, { "epoch": 11.74, "learning_rate": 1.652420091324201e-05, "loss": 2.0481, "step": 38575 }, { "epoch": 11.75, "learning_rate": 1.6508980213089802e-05, "loss": 1.9093, "step": 38600 }, { "epoch": 11.76, "learning_rate": 1.64937595129376e-05, "loss": 1.9843, "step": 38625 }, { "epoch": 11.77, "learning_rate": 1.6478538812785388e-05, "loss": 1.958, "step": 38650 }, { "epoch": 11.77, "learning_rate": 1.646331811263318e-05, "loss": 2.0893, "step": 38675 }, { "epoch": 11.78, "learning_rate": 1.6448097412480974e-05, "loss": 1.9919, "step": 38700 }, { "epoch": 11.79, "learning_rate": 1.643287671232877e-05, "loss": 1.906, "step": 38725 }, { "epoch": 11.8, "learning_rate": 1.6417656012176563e-05, "loss": 2.062, "step": 38750 }, { "epoch": 11.8, "learning_rate": 1.6402435312024356e-05, "loss": 2.1367, "step": 38775 }, { "epoch": 11.81, "learning_rate": 1.6387214611872145e-05, "loss": 2.022, "step": 38800 }, { "epoch": 11.82, "learning_rate": 1.637199391171994e-05, "loss": 1.9896, "step": 38825 }, { "epoch": 11.83, "learning_rate": 1.6356773211567734e-05, "loss": 1.9913, "step": 38850 }, { "epoch": 11.83, "learning_rate": 1.6341552511415527e-05, "loss": 2.0554, "step": 38875 }, { "epoch": 11.84, "learning_rate": 1.632633181126332e-05, "loss": 2.0311, "step": 38900 }, { "epoch": 11.85, "learning_rate": 1.6311111111111113e-05, "loss": 2.0395, "step": 38925 }, { "epoch": 11.86, "learning_rate": 1.6295890410958906e-05, "loss": 2.0641, "step": 38950 }, { "epoch": 11.86, "learning_rate": 1.62806697108067e-05, "loss": 1.9859, "step": 38975 }, { "epoch": 11.87, "learning_rate": 1.626544901065449e-05, "loss": 1.9978, "step": 39000 }, { "epoch": 11.88, "learning_rate": 1.6250228310502284e-05, "loss": 2.042, "step": 39025 }, { "epoch": 11.89, "learning_rate": 1.6235007610350077e-05, "loss": 1.9892, "step": 39050 }, { "epoch": 11.89, "learning_rate": 1.621978691019787e-05, "loss": 2.0111, "step": 39075 }, { "epoch": 11.9, "learning_rate": 1.6204566210045663e-05, "loss": 2.0407, "step": 39100 }, { "epoch": 11.91, "learning_rate": 1.618934550989346e-05, "loss": 2.0049, "step": 39125 }, { "epoch": 11.92, "learning_rate": 1.6174124809741248e-05, "loss": 1.9931, "step": 39150 }, { "epoch": 11.93, "learning_rate": 1.615890410958904e-05, "loss": 2.0479, "step": 39175 }, { "epoch": 11.93, "learning_rate": 1.6143683409436834e-05, "loss": 2.1207, "step": 39200 }, { "epoch": 11.94, "learning_rate": 1.612846270928463e-05, "loss": 2.0321, "step": 39225 }, { "epoch": 11.95, "learning_rate": 1.6113242009132423e-05, "loss": 2.0861, "step": 39250 }, { "epoch": 11.96, "learning_rate": 1.6098021308980212e-05, "loss": 2.0689, "step": 39275 }, { "epoch": 11.96, "learning_rate": 1.6082800608828005e-05, "loss": 2.0498, "step": 39300 }, { "epoch": 11.97, "learning_rate": 1.60675799086758e-05, "loss": 2.0761, "step": 39325 }, { "epoch": 11.98, "learning_rate": 1.6052359208523594e-05, "loss": 1.9528, "step": 39350 }, { "epoch": 11.99, "learning_rate": 1.6037138508371387e-05, "loss": 2.0626, "step": 39375 }, { "epoch": 11.99, "learning_rate": 1.602191780821918e-05, "loss": 2.0268, "step": 39400 }, { "epoch": 12.0, "learning_rate": 1.6006697108066973e-05, "loss": 1.9873, "step": 39425 }, { "epoch": 12.01, "learning_rate": 1.5991476407914766e-05, "loss": 1.9849, "step": 39450 }, { "epoch": 12.02, "learning_rate": 1.597625570776256e-05, "loss": 1.9636, "step": 39475 }, { "epoch": 12.02, "learning_rate": 1.596103500761035e-05, "loss": 2.0264, "step": 39500 }, { "epoch": 12.03, "learning_rate": 1.5945814307458144e-05, "loss": 2.0667, "step": 39525 }, { "epoch": 12.04, "learning_rate": 1.5930593607305937e-05, "loss": 2.0723, "step": 39550 }, { "epoch": 12.05, "learning_rate": 1.591537290715373e-05, "loss": 1.923, "step": 39575 }, { "epoch": 12.05, "learning_rate": 1.5900152207001523e-05, "loss": 2.0104, "step": 39600 }, { "epoch": 12.06, "learning_rate": 1.5884931506849316e-05, "loss": 1.9761, "step": 39625 }, { "epoch": 12.07, "learning_rate": 1.586971080669711e-05, "loss": 1.9907, "step": 39650 }, { "epoch": 12.08, "learning_rate": 1.58544901065449e-05, "loss": 1.988, "step": 39675 }, { "epoch": 12.09, "learning_rate": 1.5839878234398784e-05, "loss": 1.8632, "step": 39700 }, { "epoch": 12.09, "learning_rate": 1.5824657534246577e-05, "loss": 2.0041, "step": 39725 }, { "epoch": 12.1, "learning_rate": 1.580943683409437e-05, "loss": 1.9654, "step": 39750 }, { "epoch": 12.11, "learning_rate": 1.5794216133942163e-05, "loss": 2.0408, "step": 39775 }, { "epoch": 12.12, "learning_rate": 1.5778995433789956e-05, "loss": 1.991, "step": 39800 }, { "epoch": 12.12, "learning_rate": 1.576377473363775e-05, "loss": 2.0496, "step": 39825 }, { "epoch": 12.13, "learning_rate": 1.574855403348554e-05, "loss": 1.9504, "step": 39850 }, { "epoch": 12.14, "learning_rate": 1.5733333333333334e-05, "loss": 1.982, "step": 39875 }, { "epoch": 12.15, "learning_rate": 1.5718112633181127e-05, "loss": 1.9645, "step": 39900 }, { "epoch": 12.15, "learning_rate": 1.570289193302892e-05, "loss": 1.9775, "step": 39925 }, { "epoch": 12.16, "learning_rate": 1.5687671232876713e-05, "loss": 2.0309, "step": 39950 }, { "epoch": 12.17, "learning_rate": 1.5672450532724505e-05, "loss": 1.94, "step": 39975 }, { "epoch": 12.18, "learning_rate": 1.56572298325723e-05, "loss": 1.9667, "step": 40000 }, { "epoch": 12.18, "learning_rate": 1.5642009132420094e-05, "loss": 1.9854, "step": 40025 }, { "epoch": 12.19, "learning_rate": 1.5626788432267884e-05, "loss": 2.031, "step": 40050 }, { "epoch": 12.2, "learning_rate": 1.5611567732115677e-05, "loss": 2.0135, "step": 40075 }, { "epoch": 12.21, "learning_rate": 1.5596347031963473e-05, "loss": 2.0855, "step": 40100 }, { "epoch": 12.21, "learning_rate": 1.5581126331811266e-05, "loss": 1.8975, "step": 40125 }, { "epoch": 12.22, "learning_rate": 1.556590563165906e-05, "loss": 2.0706, "step": 40150 }, { "epoch": 12.23, "learning_rate": 1.5550684931506848e-05, "loss": 2.043, "step": 40175 }, { "epoch": 12.24, "learning_rate": 1.5535464231354644e-05, "loss": 1.9185, "step": 40200 }, { "epoch": 12.25, "learning_rate": 1.5520243531202437e-05, "loss": 2.03, "step": 40225 }, { "epoch": 12.25, "learning_rate": 1.5505631659056317e-05, "loss": 2.0385, "step": 40250 }, { "epoch": 12.26, "learning_rate": 1.5490410958904113e-05, "loss": 2.0019, "step": 40275 }, { "epoch": 12.27, "learning_rate": 1.5475190258751906e-05, "loss": 1.9028, "step": 40300 }, { "epoch": 12.28, "learning_rate": 1.5459969558599695e-05, "loss": 2.0015, "step": 40325 }, { "epoch": 12.28, "learning_rate": 1.5444748858447488e-05, "loss": 1.94, "step": 40350 }, { "epoch": 12.29, "learning_rate": 1.5429528158295284e-05, "loss": 2.0226, "step": 40375 }, { "epoch": 12.3, "learning_rate": 1.5414307458143077e-05, "loss": 2.0047, "step": 40400 }, { "epoch": 12.31, "learning_rate": 1.539908675799087e-05, "loss": 2.0061, "step": 40425 }, { "epoch": 12.31, "learning_rate": 1.5383866057838663e-05, "loss": 2.0525, "step": 40450 }, { "epoch": 12.32, "learning_rate": 1.5368645357686456e-05, "loss": 1.9416, "step": 40475 }, { "epoch": 12.33, "learning_rate": 1.535342465753425e-05, "loss": 1.8657, "step": 40500 }, { "epoch": 12.34, "learning_rate": 1.533820395738204e-05, "loss": 1.9785, "step": 40525 }, { "epoch": 12.34, "learning_rate": 1.5322983257229834e-05, "loss": 1.9967, "step": 40550 }, { "epoch": 12.35, "learning_rate": 1.5307762557077627e-05, "loss": 2.1364, "step": 40575 }, { "epoch": 12.36, "learning_rate": 1.529254185692542e-05, "loss": 2.0049, "step": 40600 }, { "epoch": 12.37, "learning_rate": 1.5277321156773213e-05, "loss": 1.9425, "step": 40625 }, { "epoch": 12.37, "learning_rate": 1.5262100456621006e-05, "loss": 1.9966, "step": 40650 }, { "epoch": 12.38, "learning_rate": 1.52468797564688e-05, "loss": 2.0006, "step": 40675 }, { "epoch": 12.39, "learning_rate": 1.5231659056316593e-05, "loss": 2.0976, "step": 40700 }, { "epoch": 12.4, "learning_rate": 1.5216438356164384e-05, "loss": 2.0786, "step": 40725 }, { "epoch": 12.4, "learning_rate": 1.5201217656012177e-05, "loss": 2.013, "step": 40750 }, { "epoch": 12.41, "learning_rate": 1.5185996955859971e-05, "loss": 2.027, "step": 40775 }, { "epoch": 12.42, "learning_rate": 1.5170776255707764e-05, "loss": 2.0121, "step": 40800 }, { "epoch": 12.43, "learning_rate": 1.5155555555555557e-05, "loss": 2.0365, "step": 40825 }, { "epoch": 12.44, "learning_rate": 1.514033485540335e-05, "loss": 1.9879, "step": 40850 }, { "epoch": 12.44, "learning_rate": 1.5125114155251144e-05, "loss": 1.9805, "step": 40875 }, { "epoch": 12.45, "learning_rate": 1.5109893455098936e-05, "loss": 1.9679, "step": 40900 }, { "epoch": 12.46, "learning_rate": 1.5094672754946728e-05, "loss": 2.0051, "step": 40925 }, { "epoch": 12.47, "learning_rate": 1.5079452054794521e-05, "loss": 1.9057, "step": 40950 }, { "epoch": 12.47, "learning_rate": 1.5064231354642316e-05, "loss": 2.0421, "step": 40975 }, { "epoch": 12.48, "learning_rate": 1.5049010654490109e-05, "loss": 1.9747, "step": 41000 }, { "epoch": 12.49, "learning_rate": 1.5033789954337901e-05, "loss": 1.9291, "step": 41025 }, { "epoch": 12.5, "learning_rate": 1.5018569254185693e-05, "loss": 1.9702, "step": 41050 }, { "epoch": 12.5, "learning_rate": 1.5003348554033487e-05, "loss": 1.9764, "step": 41075 }, { "epoch": 12.51, "learning_rate": 1.498812785388128e-05, "loss": 2.0485, "step": 41100 }, { "epoch": 12.52, "learning_rate": 1.4972907153729073e-05, "loss": 2.0413, "step": 41125 }, { "epoch": 12.53, "learning_rate": 1.4957686453576866e-05, "loss": 1.9439, "step": 41150 }, { "epoch": 12.53, "learning_rate": 1.494246575342466e-05, "loss": 1.9991, "step": 41175 }, { "epoch": 12.54, "learning_rate": 1.4927245053272451e-05, "loss": 1.9862, "step": 41200 }, { "epoch": 12.55, "learning_rate": 1.4912024353120244e-05, "loss": 2.0129, "step": 41225 }, { "epoch": 12.56, "learning_rate": 1.4896803652968037e-05, "loss": 2.0102, "step": 41250 }, { "epoch": 12.56, "learning_rate": 1.4881582952815832e-05, "loss": 1.9498, "step": 41275 }, { "epoch": 12.57, "learning_rate": 1.4866362252663624e-05, "loss": 2.0143, "step": 41300 }, { "epoch": 12.58, "learning_rate": 1.4851141552511417e-05, "loss": 2.1354, "step": 41325 }, { "epoch": 12.59, "learning_rate": 1.4835920852359208e-05, "loss": 1.9088, "step": 41350 }, { "epoch": 12.6, "learning_rate": 1.4820700152207003e-05, "loss": 1.9757, "step": 41375 }, { "epoch": 12.6, "learning_rate": 1.4805479452054796e-05, "loss": 2.0642, "step": 41400 }, { "epoch": 12.61, "learning_rate": 1.4790258751902589e-05, "loss": 1.9588, "step": 41425 }, { "epoch": 12.62, "learning_rate": 1.4775038051750381e-05, "loss": 2.0616, "step": 41450 }, { "epoch": 12.63, "learning_rate": 1.4759817351598176e-05, "loss": 2.0629, "step": 41475 }, { "epoch": 12.63, "learning_rate": 1.4744596651445969e-05, "loss": 2.0032, "step": 41500 }, { "epoch": 12.64, "learning_rate": 1.472937595129376e-05, "loss": 2.0258, "step": 41525 }, { "epoch": 12.65, "learning_rate": 1.4714155251141553e-05, "loss": 2.0051, "step": 41550 }, { "epoch": 12.66, "learning_rate": 1.4698934550989347e-05, "loss": 1.9466, "step": 41575 }, { "epoch": 12.66, "learning_rate": 1.468371385083714e-05, "loss": 1.9509, "step": 41600 }, { "epoch": 12.67, "learning_rate": 1.4668493150684933e-05, "loss": 1.9443, "step": 41625 }, { "epoch": 12.68, "learning_rate": 1.4653272450532726e-05, "loss": 2.09, "step": 41650 }, { "epoch": 12.69, "learning_rate": 1.463805175038052e-05, "loss": 1.9646, "step": 41675 }, { "epoch": 12.69, "learning_rate": 1.4622831050228312e-05, "loss": 1.9612, "step": 41700 }, { "epoch": 12.7, "learning_rate": 1.4607610350076104e-05, "loss": 2.0115, "step": 41725 }, { "epoch": 12.71, "learning_rate": 1.4592389649923897e-05, "loss": 1.9786, "step": 41750 }, { "epoch": 12.72, "learning_rate": 1.4577168949771692e-05, "loss": 1.9919, "step": 41775 }, { "epoch": 12.72, "learning_rate": 1.4561948249619485e-05, "loss": 1.9885, "step": 41800 }, { "epoch": 12.73, "learning_rate": 1.4546727549467277e-05, "loss": 2.0261, "step": 41825 }, { "epoch": 12.74, "learning_rate": 1.4531506849315069e-05, "loss": 2.0076, "step": 41850 }, { "epoch": 12.75, "learning_rate": 1.4516286149162863e-05, "loss": 2.1131, "step": 41875 }, { "epoch": 12.75, "learning_rate": 1.4501065449010656e-05, "loss": 1.9584, "step": 41900 }, { "epoch": 12.76, "learning_rate": 1.4485844748858449e-05, "loss": 2.0659, "step": 41925 }, { "epoch": 12.77, "learning_rate": 1.4470624048706242e-05, "loss": 1.9934, "step": 41950 }, { "epoch": 12.78, "learning_rate": 1.4455403348554036e-05, "loss": 2.0764, "step": 41975 }, { "epoch": 12.79, "learning_rate": 1.4440182648401827e-05, "loss": 1.9509, "step": 42000 }, { "epoch": 12.79, "learning_rate": 1.442496194824962e-05, "loss": 1.952, "step": 42025 }, { "epoch": 12.8, "learning_rate": 1.4409741248097413e-05, "loss": 1.9733, "step": 42050 }, { "epoch": 12.81, "learning_rate": 1.4394520547945208e-05, "loss": 2.055, "step": 42075 }, { "epoch": 12.82, "learning_rate": 1.4379299847793e-05, "loss": 2.0173, "step": 42100 }, { "epoch": 12.82, "learning_rate": 1.4364079147640793e-05, "loss": 2.0792, "step": 42125 }, { "epoch": 12.83, "learning_rate": 1.4348858447488584e-05, "loss": 2.0051, "step": 42150 }, { "epoch": 12.84, "learning_rate": 1.4334246575342467e-05, "loss": 2.1026, "step": 42175 }, { "epoch": 12.85, "learning_rate": 1.431902587519026e-05, "loss": 1.9983, "step": 42200 }, { "epoch": 12.85, "learning_rate": 1.4303805175038053e-05, "loss": 2.0111, "step": 42225 }, { "epoch": 12.86, "learning_rate": 1.4288584474885844e-05, "loss": 1.9746, "step": 42250 }, { "epoch": 12.87, "learning_rate": 1.427336377473364e-05, "loss": 1.9585, "step": 42275 }, { "epoch": 12.88, "learning_rate": 1.4258143074581431e-05, "loss": 1.9328, "step": 42300 }, { "epoch": 12.88, "learning_rate": 1.4242922374429224e-05, "loss": 2.0076, "step": 42325 }, { "epoch": 12.89, "learning_rate": 1.4227701674277017e-05, "loss": 1.9502, "step": 42350 }, { "epoch": 12.9, "learning_rate": 1.4212480974124812e-05, "loss": 2.0057, "step": 42375 }, { "epoch": 12.91, "learning_rate": 1.4197260273972605e-05, "loss": 2.0274, "step": 42400 }, { "epoch": 12.91, "learning_rate": 1.4182039573820396e-05, "loss": 2.0714, "step": 42425 }, { "epoch": 12.92, "learning_rate": 1.4166818873668189e-05, "loss": 2.0538, "step": 42450 }, { "epoch": 12.93, "learning_rate": 1.4151598173515983e-05, "loss": 2.0079, "step": 42475 }, { "epoch": 12.94, "learning_rate": 1.4136377473363776e-05, "loss": 2.012, "step": 42500 }, { "epoch": 12.95, "learning_rate": 1.4121156773211569e-05, "loss": 2.0648, "step": 42525 }, { "epoch": 12.95, "learning_rate": 1.4105936073059362e-05, "loss": 1.9938, "step": 42550 }, { "epoch": 12.96, "learning_rate": 1.4090715372907156e-05, "loss": 2.0337, "step": 42575 }, { "epoch": 12.97, "learning_rate": 1.4075494672754947e-05, "loss": 1.9989, "step": 42600 }, { "epoch": 12.98, "learning_rate": 1.406027397260274e-05, "loss": 2.0122, "step": 42625 }, { "epoch": 12.98, "learning_rate": 1.4045053272450533e-05, "loss": 2.0362, "step": 42650 }, { "epoch": 12.99, "learning_rate": 1.4029832572298327e-05, "loss": 2.0769, "step": 42675 }, { "epoch": 13.0, "learning_rate": 1.401461187214612e-05, "loss": 1.9409, "step": 42700 }, { "epoch": 13.01, "learning_rate": 1.3999391171993913e-05, "loss": 2.0088, "step": 42725 }, { "epoch": 13.01, "learning_rate": 1.3984170471841704e-05, "loss": 1.9523, "step": 42750 }, { "epoch": 13.02, "learning_rate": 1.3968949771689499e-05, "loss": 2.0154, "step": 42775 }, { "epoch": 13.03, "learning_rate": 1.3953729071537292e-05, "loss": 1.8923, "step": 42800 }, { "epoch": 13.04, "learning_rate": 1.3938508371385084e-05, "loss": 1.9403, "step": 42825 }, { "epoch": 13.04, "learning_rate": 1.3923287671232877e-05, "loss": 1.9734, "step": 42850 }, { "epoch": 13.05, "learning_rate": 1.3908066971080672e-05, "loss": 1.9603, "step": 42875 }, { "epoch": 13.06, "learning_rate": 1.3892846270928465e-05, "loss": 2.0133, "step": 42900 }, { "epoch": 13.07, "learning_rate": 1.3877625570776256e-05, "loss": 1.9425, "step": 42925 }, { "epoch": 13.07, "learning_rate": 1.3862404870624049e-05, "loss": 2.0419, "step": 42950 }, { "epoch": 13.08, "learning_rate": 1.3847184170471843e-05, "loss": 2.0259, "step": 42975 }, { "epoch": 13.09, "learning_rate": 1.3831963470319636e-05, "loss": 2.0865, "step": 43000 }, { "epoch": 13.1, "learning_rate": 1.3816742770167429e-05, "loss": 1.939, "step": 43025 }, { "epoch": 13.11, "learning_rate": 1.380152207001522e-05, "loss": 1.9798, "step": 43050 }, { "epoch": 13.11, "learning_rate": 1.3786301369863016e-05, "loss": 1.9552, "step": 43075 }, { "epoch": 13.12, "learning_rate": 1.3771080669710807e-05, "loss": 2.015, "step": 43100 }, { "epoch": 13.13, "learning_rate": 1.37558599695586e-05, "loss": 2.0089, "step": 43125 }, { "epoch": 13.14, "learning_rate": 1.3740639269406393e-05, "loss": 2.0603, "step": 43150 }, { "epoch": 13.14, "learning_rate": 1.3725418569254188e-05, "loss": 2.0267, "step": 43175 }, { "epoch": 13.15, "learning_rate": 1.371019786910198e-05, "loss": 1.9773, "step": 43200 }, { "epoch": 13.16, "learning_rate": 1.3694977168949772e-05, "loss": 2.0409, "step": 43225 }, { "epoch": 13.17, "learning_rate": 1.3679756468797564e-05, "loss": 2.0045, "step": 43250 }, { "epoch": 13.17, "learning_rate": 1.3664535768645359e-05, "loss": 1.9808, "step": 43275 }, { "epoch": 13.18, "learning_rate": 1.3649315068493152e-05, "loss": 1.9854, "step": 43300 }, { "epoch": 13.19, "learning_rate": 1.3634094368340945e-05, "loss": 2.0598, "step": 43325 }, { "epoch": 13.2, "learning_rate": 1.3618873668188738e-05, "loss": 1.9223, "step": 43350 }, { "epoch": 13.2, "learning_rate": 1.3603652968036532e-05, "loss": 1.917, "step": 43375 }, { "epoch": 13.21, "learning_rate": 1.3588432267884323e-05, "loss": 2.0412, "step": 43400 }, { "epoch": 13.22, "learning_rate": 1.3573211567732116e-05, "loss": 1.9624, "step": 43425 }, { "epoch": 13.23, "learning_rate": 1.3557990867579909e-05, "loss": 2.0322, "step": 43450 }, { "epoch": 13.23, "learning_rate": 1.3543378995433792e-05, "loss": 1.9836, "step": 43475 }, { "epoch": 13.24, "learning_rate": 1.3528158295281583e-05, "loss": 2.0068, "step": 43500 }, { "epoch": 13.25, "learning_rate": 1.3512937595129376e-05, "loss": 2.0092, "step": 43525 }, { "epoch": 13.26, "learning_rate": 1.349771689497717e-05, "loss": 2.0502, "step": 43550 }, { "epoch": 13.26, "learning_rate": 1.3482496194824963e-05, "loss": 1.9953, "step": 43575 }, { "epoch": 13.27, "learning_rate": 1.3467275494672756e-05, "loss": 1.9734, "step": 43600 }, { "epoch": 13.28, "learning_rate": 1.3452054794520549e-05, "loss": 1.9861, "step": 43625 }, { "epoch": 13.29, "learning_rate": 1.3436834094368343e-05, "loss": 1.9144, "step": 43650 }, { "epoch": 13.3, "learning_rate": 1.3421613394216134e-05, "loss": 1.8742, "step": 43675 }, { "epoch": 13.3, "learning_rate": 1.3406392694063927e-05, "loss": 2.1363, "step": 43700 }, { "epoch": 13.31, "learning_rate": 1.339117199391172e-05, "loss": 1.9555, "step": 43725 }, { "epoch": 13.32, "learning_rate": 1.3375951293759515e-05, "loss": 1.8268, "step": 43750 }, { "epoch": 13.33, "learning_rate": 1.3360730593607308e-05, "loss": 1.9572, "step": 43775 }, { "epoch": 13.33, "learning_rate": 1.33455098934551e-05, "loss": 2.0172, "step": 43800 }, { "epoch": 13.34, "learning_rate": 1.3330289193302892e-05, "loss": 1.9264, "step": 43825 }, { "epoch": 13.35, "learning_rate": 1.3315068493150686e-05, "loss": 1.9078, "step": 43850 }, { "epoch": 13.36, "learning_rate": 1.3299847792998479e-05, "loss": 1.9868, "step": 43875 }, { "epoch": 13.36, "learning_rate": 1.3284627092846272e-05, "loss": 2.0821, "step": 43900 }, { "epoch": 13.37, "learning_rate": 1.3269406392694065e-05, "loss": 2.0153, "step": 43925 }, { "epoch": 13.38, "learning_rate": 1.3254185692541859e-05, "loss": 1.9711, "step": 43950 }, { "epoch": 13.39, "learning_rate": 1.3238964992389652e-05, "loss": 1.9948, "step": 43975 }, { "epoch": 13.39, "learning_rate": 1.3223744292237443e-05, "loss": 2.0175, "step": 44000 }, { "epoch": 13.4, "learning_rate": 1.3208523592085236e-05, "loss": 2.0479, "step": 44025 }, { "epoch": 13.41, "learning_rate": 1.319330289193303e-05, "loss": 1.97, "step": 44050 }, { "epoch": 13.42, "learning_rate": 1.3178082191780823e-05, "loss": 2.1167, "step": 44075 }, { "epoch": 13.42, "learning_rate": 1.3162861491628616e-05, "loss": 1.9078, "step": 44100 }, { "epoch": 13.43, "learning_rate": 1.3147640791476409e-05, "loss": 1.9969, "step": 44125 }, { "epoch": 13.44, "learning_rate": 1.3132420091324204e-05, "loss": 2.076, "step": 44150 }, { "epoch": 13.45, "learning_rate": 1.3117199391171995e-05, "loss": 2.0344, "step": 44175 }, { "epoch": 13.46, "learning_rate": 1.3101978691019788e-05, "loss": 1.9706, "step": 44200 }, { "epoch": 13.46, "learning_rate": 1.308675799086758e-05, "loss": 1.9537, "step": 44225 }, { "epoch": 13.47, "learning_rate": 1.3071537290715375e-05, "loss": 1.9367, "step": 44250 }, { "epoch": 13.48, "learning_rate": 1.3056316590563168e-05, "loss": 1.9292, "step": 44275 }, { "epoch": 13.49, "learning_rate": 1.3041095890410959e-05, "loss": 1.9613, "step": 44300 }, { "epoch": 13.49, "learning_rate": 1.3025875190258752e-05, "loss": 1.9972, "step": 44325 }, { "epoch": 13.5, "learning_rate": 1.3010654490106546e-05, "loss": 2.0244, "step": 44350 }, { "epoch": 13.51, "learning_rate": 1.2995433789954339e-05, "loss": 1.8689, "step": 44375 }, { "epoch": 13.52, "learning_rate": 1.2980213089802132e-05, "loss": 2.0505, "step": 44400 }, { "epoch": 13.52, "learning_rate": 1.2964992389649925e-05, "loss": 1.9141, "step": 44425 }, { "epoch": 13.53, "learning_rate": 1.294977168949772e-05, "loss": 1.9494, "step": 44450 }, { "epoch": 13.54, "learning_rate": 1.293455098934551e-05, "loss": 2.0679, "step": 44475 }, { "epoch": 13.55, "learning_rate": 1.2919330289193303e-05, "loss": 2.0277, "step": 44500 }, { "epoch": 13.55, "learning_rate": 1.2904109589041096e-05, "loss": 1.979, "step": 44525 }, { "epoch": 13.56, "learning_rate": 1.288888888888889e-05, "loss": 1.9098, "step": 44550 }, { "epoch": 13.57, "learning_rate": 1.2873668188736683e-05, "loss": 1.944, "step": 44575 }, { "epoch": 13.58, "learning_rate": 1.2858447488584476e-05, "loss": 2.0591, "step": 44600 }, { "epoch": 13.58, "learning_rate": 1.2843226788432267e-05, "loss": 2.0085, "step": 44625 }, { "epoch": 13.59, "learning_rate": 1.2828006088280062e-05, "loss": 1.9236, "step": 44650 }, { "epoch": 13.6, "learning_rate": 1.2812785388127855e-05, "loss": 1.9142, "step": 44675 }, { "epoch": 13.61, "learning_rate": 1.2797564687975648e-05, "loss": 1.9177, "step": 44700 }, { "epoch": 13.61, "learning_rate": 1.278234398782344e-05, "loss": 1.9706, "step": 44725 }, { "epoch": 13.62, "learning_rate": 1.2767123287671235e-05, "loss": 1.8988, "step": 44750 }, { "epoch": 13.63, "learning_rate": 1.2751902587519028e-05, "loss": 1.9589, "step": 44775 }, { "epoch": 13.64, "learning_rate": 1.2736681887366819e-05, "loss": 2.0487, "step": 44800 }, { "epoch": 13.65, "learning_rate": 1.2721461187214612e-05, "loss": 2.066, "step": 44825 }, { "epoch": 13.65, "learning_rate": 1.2706240487062406e-05, "loss": 2.0137, "step": 44850 }, { "epoch": 13.66, "learning_rate": 1.26910197869102e-05, "loss": 2.0245, "step": 44875 }, { "epoch": 13.67, "learning_rate": 1.2675799086757992e-05, "loss": 2.0623, "step": 44900 }, { "epoch": 13.68, "learning_rate": 1.2660578386605783e-05, "loss": 1.9067, "step": 44925 }, { "epoch": 13.68, "learning_rate": 1.264535768645358e-05, "loss": 2.0114, "step": 44950 }, { "epoch": 13.69, "learning_rate": 1.263013698630137e-05, "loss": 1.9816, "step": 44975 }, { "epoch": 13.7, "learning_rate": 1.2614916286149163e-05, "loss": 2.0646, "step": 45000 }, { "epoch": 13.71, "learning_rate": 1.2599695585996956e-05, "loss": 1.9261, "step": 45025 }, { "epoch": 13.71, "learning_rate": 1.258447488584475e-05, "loss": 2.0998, "step": 45050 }, { "epoch": 13.72, "learning_rate": 1.2569254185692544e-05, "loss": 2.0812, "step": 45075 }, { "epoch": 13.73, "learning_rate": 1.2554033485540335e-05, "loss": 1.9178, "step": 45100 }, { "epoch": 13.74, "learning_rate": 1.2538812785388128e-05, "loss": 1.9155, "step": 45125 }, { "epoch": 13.74, "learning_rate": 1.2523592085235922e-05, "loss": 1.9267, "step": 45150 }, { "epoch": 13.75, "learning_rate": 1.2508371385083715e-05, "loss": 2.039, "step": 45175 }, { "epoch": 13.76, "learning_rate": 1.2493150684931508e-05, "loss": 2.0541, "step": 45200 }, { "epoch": 13.77, "learning_rate": 1.24779299847793e-05, "loss": 1.9726, "step": 45225 }, { "epoch": 13.77, "learning_rate": 1.2462709284627095e-05, "loss": 1.9593, "step": 45250 }, { "epoch": 13.78, "learning_rate": 1.2447488584474886e-05, "loss": 1.9423, "step": 45275 }, { "epoch": 13.79, "learning_rate": 1.243226788432268e-05, "loss": 1.9559, "step": 45300 }, { "epoch": 13.8, "learning_rate": 1.2417047184170472e-05, "loss": 1.9699, "step": 45325 }, { "epoch": 13.81, "learning_rate": 1.2401826484018267e-05, "loss": 1.9827, "step": 45350 }, { "epoch": 13.81, "learning_rate": 1.238660578386606e-05, "loss": 1.9052, "step": 45375 }, { "epoch": 13.82, "learning_rate": 1.2371385083713852e-05, "loss": 1.962, "step": 45400 }, { "epoch": 13.83, "learning_rate": 1.2356164383561643e-05, "loss": 1.9576, "step": 45425 }, { "epoch": 13.84, "learning_rate": 1.2340943683409438e-05, "loss": 2.0912, "step": 45450 }, { "epoch": 13.84, "learning_rate": 1.232572298325723e-05, "loss": 1.9244, "step": 45475 }, { "epoch": 13.85, "learning_rate": 1.2310502283105024e-05, "loss": 1.9651, "step": 45500 }, { "epoch": 13.86, "learning_rate": 1.2295281582952816e-05, "loss": 2.1149, "step": 45525 }, { "epoch": 13.87, "learning_rate": 1.2280060882800611e-05, "loss": 1.9963, "step": 45550 }, { "epoch": 13.87, "learning_rate": 1.2264840182648404e-05, "loss": 1.9967, "step": 45575 }, { "epoch": 13.88, "learning_rate": 1.2249619482496195e-05, "loss": 1.9299, "step": 45600 }, { "epoch": 13.89, "learning_rate": 1.2234398782343988e-05, "loss": 1.936, "step": 45625 }, { "epoch": 13.9, "learning_rate": 1.2219178082191782e-05, "loss": 2.0121, "step": 45650 }, { "epoch": 13.9, "learning_rate": 1.2203957382039575e-05, "loss": 1.9676, "step": 45675 }, { "epoch": 13.91, "learning_rate": 1.2188736681887368e-05, "loss": 1.9345, "step": 45700 }, { "epoch": 13.92, "learning_rate": 1.217351598173516e-05, "loss": 1.9886, "step": 45725 }, { "epoch": 13.93, "learning_rate": 1.2158295281582955e-05, "loss": 1.9737, "step": 45750 }, { "epoch": 13.93, "learning_rate": 1.2143074581430747e-05, "loss": 1.9502, "step": 45775 }, { "epoch": 13.94, "learning_rate": 1.212785388127854e-05, "loss": 1.925, "step": 45800 }, { "epoch": 13.95, "learning_rate": 1.2112633181126332e-05, "loss": 2.0506, "step": 45825 }, { "epoch": 13.96, "learning_rate": 1.2097412480974127e-05, "loss": 1.9704, "step": 45850 }, { "epoch": 13.96, "learning_rate": 1.208219178082192e-05, "loss": 1.9452, "step": 45875 }, { "epoch": 13.97, "learning_rate": 1.206697108066971e-05, "loss": 2.022, "step": 45900 }, { "epoch": 13.98, "learning_rate": 1.2051750380517504e-05, "loss": 1.9387, "step": 45925 }, { "epoch": 13.99, "learning_rate": 1.2036529680365298e-05, "loss": 1.9663, "step": 45950 }, { "epoch": 14.0, "learning_rate": 1.2021308980213091e-05, "loss": 2.0126, "step": 45975 }, { "epoch": 14.0, "learning_rate": 1.2006088280060884e-05, "loss": 2.0184, "step": 46000 }, { "epoch": 14.01, "learning_rate": 1.1990867579908677e-05, "loss": 1.893, "step": 46025 }, { "epoch": 14.02, "learning_rate": 1.1975646879756471e-05, "loss": 2.0362, "step": 46050 }, { "epoch": 14.03, "learning_rate": 1.1960426179604262e-05, "loss": 1.9939, "step": 46075 }, { "epoch": 14.03, "learning_rate": 1.1945205479452055e-05, "loss": 2.0096, "step": 46100 }, { "epoch": 14.04, "learning_rate": 1.1929984779299848e-05, "loss": 2.0146, "step": 46125 }, { "epoch": 14.05, "learning_rate": 1.1914764079147643e-05, "loss": 1.9037, "step": 46150 }, { "epoch": 14.06, "learning_rate": 1.1899543378995435e-05, "loss": 2.0156, "step": 46175 }, { "epoch": 14.06, "learning_rate": 1.1884322678843228e-05, "loss": 1.9033, "step": 46200 }, { "epoch": 14.07, "learning_rate": 1.186910197869102e-05, "loss": 2.0248, "step": 46225 }, { "epoch": 14.08, "learning_rate": 1.1853881278538814e-05, "loss": 2.0205, "step": 46250 }, { "epoch": 14.09, "learning_rate": 1.1838660578386607e-05, "loss": 1.991, "step": 46275 }, { "epoch": 14.09, "learning_rate": 1.18234398782344e-05, "loss": 1.888, "step": 46300 }, { "epoch": 14.1, "learning_rate": 1.1808219178082192e-05, "loss": 1.8724, "step": 46325 }, { "epoch": 14.11, "learning_rate": 1.1792998477929987e-05, "loss": 1.8538, "step": 46350 }, { "epoch": 14.12, "learning_rate": 1.177777777777778e-05, "loss": 1.9678, "step": 46375 }, { "epoch": 14.12, "learning_rate": 1.1762557077625571e-05, "loss": 1.9731, "step": 46400 }, { "epoch": 14.13, "learning_rate": 1.1747336377473364e-05, "loss": 1.9184, "step": 46425 }, { "epoch": 14.14, "learning_rate": 1.1732115677321158e-05, "loss": 2.0107, "step": 46450 }, { "epoch": 14.15, "learning_rate": 1.1716894977168951e-05, "loss": 1.8769, "step": 46475 }, { "epoch": 14.16, "learning_rate": 1.1701674277016744e-05, "loss": 1.9563, "step": 46500 }, { "epoch": 14.16, "learning_rate": 1.1686453576864535e-05, "loss": 1.9436, "step": 46525 }, { "epoch": 14.17, "learning_rate": 1.1671232876712331e-05, "loss": 1.943, "step": 46550 }, { "epoch": 14.18, "learning_rate": 1.1656012176560123e-05, "loss": 1.9551, "step": 46575 }, { "epoch": 14.19, "learning_rate": 1.1640791476407915e-05, "loss": 2.0068, "step": 46600 }, { "epoch": 14.19, "learning_rate": 1.1625570776255708e-05, "loss": 2.0303, "step": 46625 }, { "epoch": 14.2, "learning_rate": 1.1610350076103503e-05, "loss": 1.9128, "step": 46650 }, { "epoch": 14.21, "learning_rate": 1.1595129375951296e-05, "loss": 2.0005, "step": 46675 }, { "epoch": 14.22, "learning_rate": 1.1579908675799087e-05, "loss": 1.9755, "step": 46700 }, { "epoch": 14.22, "learning_rate": 1.156468797564688e-05, "loss": 1.9389, "step": 46725 }, { "epoch": 14.23, "learning_rate": 1.1549467275494674e-05, "loss": 1.9299, "step": 46750 }, { "epoch": 14.24, "learning_rate": 1.1534246575342467e-05, "loss": 2.0026, "step": 46775 }, { "epoch": 14.25, "learning_rate": 1.151902587519026e-05, "loss": 1.9436, "step": 46800 }, { "epoch": 14.25, "learning_rate": 1.1503805175038053e-05, "loss": 1.929, "step": 46825 }, { "epoch": 14.26, "learning_rate": 1.1488584474885847e-05, "loss": 2.0119, "step": 46850 }, { "epoch": 14.27, "learning_rate": 1.1473363774733638e-05, "loss": 1.9605, "step": 46875 }, { "epoch": 14.28, "learning_rate": 1.1458143074581431e-05, "loss": 1.9182, "step": 46900 }, { "epoch": 14.28, "learning_rate": 1.1442922374429224e-05, "loss": 2.0203, "step": 46925 }, { "epoch": 14.29, "learning_rate": 1.1427701674277019e-05, "loss": 1.9565, "step": 46950 }, { "epoch": 14.3, "learning_rate": 1.1412480974124811e-05, "loss": 2.0476, "step": 46975 }, { "epoch": 14.31, "learning_rate": 1.1397260273972604e-05, "loss": 1.9167, "step": 47000 }, { "epoch": 14.32, "learning_rate": 1.1382039573820395e-05, "loss": 1.8304, "step": 47025 }, { "epoch": 14.32, "learning_rate": 1.136681887366819e-05, "loss": 2.0416, "step": 47050 }, { "epoch": 14.33, "learning_rate": 1.1351598173515983e-05, "loss": 2.0166, "step": 47075 }, { "epoch": 14.34, "learning_rate": 1.1336377473363776e-05, "loss": 1.9663, "step": 47100 }, { "epoch": 14.35, "learning_rate": 1.1321156773211568e-05, "loss": 2.0136, "step": 47125 }, { "epoch": 14.35, "learning_rate": 1.1305936073059363e-05, "loss": 1.936, "step": 47150 }, { "epoch": 14.36, "learning_rate": 1.1290715372907156e-05, "loss": 1.9158, "step": 47175 }, { "epoch": 14.37, "learning_rate": 1.1275494672754947e-05, "loss": 1.9207, "step": 47200 }, { "epoch": 14.38, "learning_rate": 1.126027397260274e-05, "loss": 2.0042, "step": 47225 }, { "epoch": 14.38, "learning_rate": 1.1245053272450534e-05, "loss": 2.0697, "step": 47250 }, { "epoch": 14.39, "learning_rate": 1.1229832572298327e-05, "loss": 1.9097, "step": 47275 }, { "epoch": 14.4, "learning_rate": 1.121461187214612e-05, "loss": 1.9998, "step": 47300 }, { "epoch": 14.41, "learning_rate": 1.1199391171993911e-05, "loss": 1.9464, "step": 47325 }, { "epoch": 14.41, "learning_rate": 1.1184170471841707e-05, "loss": 1.9361, "step": 47350 }, { "epoch": 14.42, "learning_rate": 1.1168949771689499e-05, "loss": 2.0586, "step": 47375 }, { "epoch": 14.43, "learning_rate": 1.1153729071537291e-05, "loss": 1.9005, "step": 47400 }, { "epoch": 14.44, "learning_rate": 1.1138508371385084e-05, "loss": 1.969, "step": 47425 }, { "epoch": 14.44, "learning_rate": 1.1123287671232879e-05, "loss": 2.015, "step": 47450 }, { "epoch": 14.45, "learning_rate": 1.1108066971080672e-05, "loss": 2.03, "step": 47475 }, { "epoch": 14.46, "learning_rate": 1.1092846270928463e-05, "loss": 1.9863, "step": 47500 }, { "epoch": 14.47, "learning_rate": 1.1077625570776256e-05, "loss": 1.9524, "step": 47525 }, { "epoch": 14.47, "learning_rate": 1.106240487062405e-05, "loss": 1.9094, "step": 47550 }, { "epoch": 14.48, "learning_rate": 1.1047184170471843e-05, "loss": 1.9733, "step": 47575 }, { "epoch": 14.49, "learning_rate": 1.1031963470319636e-05, "loss": 1.9335, "step": 47600 }, { "epoch": 14.5, "learning_rate": 1.1016742770167429e-05, "loss": 1.9351, "step": 47625 }, { "epoch": 14.51, "learning_rate": 1.1001522070015223e-05, "loss": 1.8879, "step": 47650 }, { "epoch": 14.51, "learning_rate": 1.0986301369863014e-05, "loss": 1.956, "step": 47675 }, { "epoch": 14.52, "learning_rate": 1.0971080669710807e-05, "loss": 1.9061, "step": 47700 }, { "epoch": 14.53, "learning_rate": 1.09558599695586e-05, "loss": 2.0037, "step": 47725 }, { "epoch": 14.54, "learning_rate": 1.0940639269406395e-05, "loss": 2.0163, "step": 47750 }, { "epoch": 14.54, "learning_rate": 1.0925418569254187e-05, "loss": 2.1089, "step": 47775 }, { "epoch": 14.55, "learning_rate": 1.091019786910198e-05, "loss": 1.9014, "step": 47800 }, { "epoch": 14.56, "learning_rate": 1.0894977168949771e-05, "loss": 2.033, "step": 47825 }, { "epoch": 14.57, "learning_rate": 1.0879756468797566e-05, "loss": 1.9819, "step": 47850 }, { "epoch": 14.57, "learning_rate": 1.0864535768645359e-05, "loss": 1.9444, "step": 47875 }, { "epoch": 14.58, "learning_rate": 1.0849315068493152e-05, "loss": 1.9933, "step": 47900 }, { "epoch": 14.59, "learning_rate": 1.0834094368340944e-05, "loss": 2.0139, "step": 47925 }, { "epoch": 14.6, "learning_rate": 1.0818873668188739e-05, "loss": 2.0485, "step": 47950 }, { "epoch": 14.6, "learning_rate": 1.0803652968036532e-05, "loss": 1.9591, "step": 47975 }, { "epoch": 14.61, "learning_rate": 1.0788432267884323e-05, "loss": 1.954, "step": 48000 }, { "epoch": 14.62, "learning_rate": 1.0773211567732116e-05, "loss": 1.9854, "step": 48025 }, { "epoch": 14.63, "learning_rate": 1.075799086757991e-05, "loss": 1.8831, "step": 48050 }, { "epoch": 14.63, "learning_rate": 1.0742770167427703e-05, "loss": 2.074, "step": 48075 }, { "epoch": 14.64, "learning_rate": 1.0727549467275496e-05, "loss": 1.9053, "step": 48100 }, { "epoch": 14.65, "learning_rate": 1.0712328767123287e-05, "loss": 2.0317, "step": 48125 }, { "epoch": 14.66, "learning_rate": 1.0697108066971083e-05, "loss": 1.9007, "step": 48150 }, { "epoch": 14.67, "learning_rate": 1.0681887366818874e-05, "loss": 1.9001, "step": 48175 }, { "epoch": 14.67, "learning_rate": 1.0666666666666667e-05, "loss": 1.936, "step": 48200 }, { "epoch": 14.68, "learning_rate": 1.065144596651446e-05, "loss": 1.9031, "step": 48225 }, { "epoch": 14.69, "learning_rate": 1.0636225266362255e-05, "loss": 2.0159, "step": 48250 }, { "epoch": 14.7, "learning_rate": 1.0621004566210048e-05, "loss": 1.9913, "step": 48275 }, { "epoch": 14.7, "learning_rate": 1.0605783866057839e-05, "loss": 2.0991, "step": 48300 }, { "epoch": 14.71, "learning_rate": 1.0590563165905632e-05, "loss": 2.0389, "step": 48325 }, { "epoch": 14.72, "learning_rate": 1.0575342465753426e-05, "loss": 1.9573, "step": 48350 }, { "epoch": 14.73, "learning_rate": 1.0560121765601219e-05, "loss": 1.9287, "step": 48375 }, { "epoch": 14.73, "learning_rate": 1.0544901065449012e-05, "loss": 1.9629, "step": 48400 }, { "epoch": 14.74, "learning_rate": 1.0529680365296805e-05, "loss": 2.0366, "step": 48425 }, { "epoch": 14.75, "learning_rate": 1.0514459665144599e-05, "loss": 2.0515, "step": 48450 }, { "epoch": 14.76, "learning_rate": 1.049923896499239e-05, "loss": 1.949, "step": 48475 }, { "epoch": 14.76, "learning_rate": 1.0484018264840183e-05, "loss": 2.0248, "step": 48500 }, { "epoch": 14.77, "learning_rate": 1.0468797564687976e-05, "loss": 1.9764, "step": 48525 }, { "epoch": 14.78, "learning_rate": 1.045357686453577e-05, "loss": 1.8845, "step": 48550 }, { "epoch": 14.79, "learning_rate": 1.0438356164383563e-05, "loss": 2.0516, "step": 48575 }, { "epoch": 14.79, "learning_rate": 1.0423135464231356e-05, "loss": 1.9194, "step": 48600 }, { "epoch": 14.8, "learning_rate": 1.0407914764079147e-05, "loss": 2.0027, "step": 48625 }, { "epoch": 14.81, "learning_rate": 1.0392694063926942e-05, "loss": 1.9999, "step": 48650 }, { "epoch": 14.82, "learning_rate": 1.0377473363774735e-05, "loss": 1.9374, "step": 48675 }, { "epoch": 14.82, "learning_rate": 1.0362252663622528e-05, "loss": 2.0, "step": 48700 }, { "epoch": 14.83, "learning_rate": 1.034703196347032e-05, "loss": 2.0698, "step": 48725 }, { "epoch": 14.84, "learning_rate": 1.0331811263318115e-05, "loss": 1.9073, "step": 48750 }, { "epoch": 14.85, "learning_rate": 1.0316590563165908e-05, "loss": 1.879, "step": 48775 }, { "epoch": 14.86, "learning_rate": 1.0301369863013699e-05, "loss": 2.0149, "step": 48800 }, { "epoch": 14.86, "learning_rate": 1.0286149162861492e-05, "loss": 1.9765, "step": 48825 }, { "epoch": 14.87, "learning_rate": 1.0270928462709286e-05, "loss": 2.0193, "step": 48850 }, { "epoch": 14.88, "learning_rate": 1.0255707762557079e-05, "loss": 2.0319, "step": 48875 }, { "epoch": 14.89, "learning_rate": 1.0240487062404872e-05, "loss": 2.0198, "step": 48900 }, { "epoch": 14.89, "learning_rate": 1.0225266362252663e-05, "loss": 1.9011, "step": 48925 }, { "epoch": 14.9, "learning_rate": 1.021004566210046e-05, "loss": 1.943, "step": 48950 }, { "epoch": 14.91, "learning_rate": 1.019482496194825e-05, "loss": 1.9647, "step": 48975 }, { "epoch": 14.92, "learning_rate": 1.0179604261796043e-05, "loss": 1.9477, "step": 49000 }, { "epoch": 14.92, "learning_rate": 1.0164383561643836e-05, "loss": 1.9735, "step": 49025 }, { "epoch": 14.93, "learning_rate": 1.014916286149163e-05, "loss": 1.9634, "step": 49050 }, { "epoch": 14.94, "learning_rate": 1.0133942161339423e-05, "loss": 2.0279, "step": 49075 }, { "epoch": 14.95, "learning_rate": 1.0118721461187215e-05, "loss": 1.9043, "step": 49100 }, { "epoch": 14.95, "learning_rate": 1.0103500761035007e-05, "loss": 1.9345, "step": 49125 }, { "epoch": 14.96, "learning_rate": 1.0088280060882802e-05, "loss": 1.9, "step": 49150 }, { "epoch": 14.97, "learning_rate": 1.0073059360730595e-05, "loss": 1.9883, "step": 49175 }, { "epoch": 14.98, "learning_rate": 1.0057838660578388e-05, "loss": 1.9939, "step": 49200 }, { "epoch": 14.98, "learning_rate": 1.004261796042618e-05, "loss": 1.8871, "step": 49225 }, { "epoch": 14.99, "learning_rate": 1.0027397260273975e-05, "loss": 2.009, "step": 49250 }, { "epoch": 15.0, "learning_rate": 1.0012176560121766e-05, "loss": 2.0039, "step": 49275 }, { "epoch": 15.01, "learning_rate": 9.996955859969559e-06, "loss": 2.0019, "step": 49300 }, { "epoch": 15.02, "learning_rate": 9.981735159817354e-06, "loss": 1.9455, "step": 49325 }, { "epoch": 15.02, "learning_rate": 9.966514459665145e-06, "loss": 1.9116, "step": 49350 }, { "epoch": 15.03, "learning_rate": 9.95129375951294e-06, "loss": 1.9623, "step": 49375 }, { "epoch": 15.04, "learning_rate": 9.936073059360732e-06, "loss": 2.0, "step": 49400 }, { "epoch": 15.05, "learning_rate": 9.920852359208525e-06, "loss": 1.9419, "step": 49425 }, { "epoch": 15.05, "learning_rate": 9.905631659056318e-06, "loss": 1.9811, "step": 49450 }, { "epoch": 15.06, "learning_rate": 9.89041095890411e-06, "loss": 1.9864, "step": 49475 }, { "epoch": 15.07, "learning_rate": 9.875190258751903e-06, "loss": 1.8739, "step": 49500 }, { "epoch": 15.08, "learning_rate": 9.859969558599696e-06, "loss": 2.0509, "step": 49525 }, { "epoch": 15.08, "learning_rate": 9.844748858447489e-06, "loss": 2.0082, "step": 49550 }, { "epoch": 15.09, "learning_rate": 9.829528158295284e-06, "loss": 1.961, "step": 49575 }, { "epoch": 15.1, "learning_rate": 9.814307458143075e-06, "loss": 1.9278, "step": 49600 }, { "epoch": 15.11, "learning_rate": 9.79908675799087e-06, "loss": 1.9934, "step": 49625 }, { "epoch": 15.11, "learning_rate": 9.78386605783866e-06, "loss": 1.8641, "step": 49650 }, { "epoch": 15.12, "learning_rate": 9.768645357686455e-06, "loss": 1.9328, "step": 49675 }, { "epoch": 15.13, "learning_rate": 9.753424657534248e-06, "loss": 1.991, "step": 49700 }, { "epoch": 15.14, "learning_rate": 9.73820395738204e-06, "loss": 1.9905, "step": 49725 }, { "epoch": 15.14, "learning_rate": 9.722983257229834e-06, "loss": 2.0744, "step": 49750 }, { "epoch": 15.15, "learning_rate": 9.707762557077626e-06, "loss": 1.8815, "step": 49775 }, { "epoch": 15.16, "learning_rate": 9.69254185692542e-06, "loss": 1.8839, "step": 49800 }, { "epoch": 15.17, "learning_rate": 9.677321156773212e-06, "loss": 1.9512, "step": 49825 }, { "epoch": 15.18, "learning_rate": 9.662100456621005e-06, "loss": 1.9877, "step": 49850 }, { "epoch": 15.18, "learning_rate": 9.6468797564688e-06, "loss": 2.0149, "step": 49875 }, { "epoch": 15.19, "learning_rate": 9.63165905631659e-06, "loss": 1.9746, "step": 49900 }, { "epoch": 15.2, "learning_rate": 9.616438356164385e-06, "loss": 1.9746, "step": 49925 }, { "epoch": 15.21, "learning_rate": 9.601217656012178e-06, "loss": 1.9566, "step": 49950 }, { "epoch": 15.21, "learning_rate": 9.58599695585997e-06, "loss": 1.889, "step": 49975 }, { "epoch": 15.22, "learning_rate": 9.570776255707764e-06, "loss": 1.978, "step": 50000 }, { "epoch": 15.23, "learning_rate": 9.555555555555556e-06, "loss": 1.9767, "step": 50025 }, { "epoch": 15.24, "learning_rate": 9.54033485540335e-06, "loss": 1.9758, "step": 50050 }, { "epoch": 15.24, "learning_rate": 9.525114155251142e-06, "loss": 2.0169, "step": 50075 }, { "epoch": 15.25, "learning_rate": 9.509893455098935e-06, "loss": 1.9795, "step": 50100 }, { "epoch": 15.26, "learning_rate": 9.49467275494673e-06, "loss": 1.9105, "step": 50125 }, { "epoch": 15.27, "learning_rate": 9.47945205479452e-06, "loss": 1.9358, "step": 50150 }, { "epoch": 15.27, "learning_rate": 9.464231354642315e-06, "loss": 1.9197, "step": 50175 }, { "epoch": 15.28, "learning_rate": 9.449010654490108e-06, "loss": 1.9679, "step": 50200 }, { "epoch": 15.29, "learning_rate": 9.433789954337901e-06, "loss": 2.0478, "step": 50225 }, { "epoch": 15.3, "learning_rate": 9.418569254185694e-06, "loss": 1.9561, "step": 50250 }, { "epoch": 15.3, "learning_rate": 9.403348554033487e-06, "loss": 2.0571, "step": 50275 }, { "epoch": 15.31, "learning_rate": 9.38812785388128e-06, "loss": 1.8852, "step": 50300 }, { "epoch": 15.32, "learning_rate": 9.372907153729072e-06, "loss": 1.8964, "step": 50325 }, { "epoch": 15.33, "learning_rate": 9.357686453576865e-06, "loss": 1.9007, "step": 50350 }, { "epoch": 15.33, "learning_rate": 9.342465753424658e-06, "loss": 1.9047, "step": 50375 }, { "epoch": 15.34, "learning_rate": 9.32724505327245e-06, "loss": 1.8826, "step": 50400 }, { "epoch": 15.35, "learning_rate": 9.312024353120245e-06, "loss": 2.0846, "step": 50425 }, { "epoch": 15.36, "learning_rate": 9.296803652968036e-06, "loss": 1.8743, "step": 50450 }, { "epoch": 15.37, "learning_rate": 9.281582952815831e-06, "loss": 1.8567, "step": 50475 }, { "epoch": 15.37, "learning_rate": 9.266362252663624e-06, "loss": 1.9336, "step": 50500 }, { "epoch": 15.38, "learning_rate": 9.251141552511417e-06, "loss": 1.9018, "step": 50525 }, { "epoch": 15.39, "learning_rate": 9.23592085235921e-06, "loss": 1.9833, "step": 50550 }, { "epoch": 15.4, "learning_rate": 9.220700152207002e-06, "loss": 1.9243, "step": 50575 }, { "epoch": 15.4, "learning_rate": 9.205479452054795e-06, "loss": 1.999, "step": 50600 }, { "epoch": 15.41, "learning_rate": 9.190258751902588e-06, "loss": 2.07, "step": 50625 }, { "epoch": 15.42, "learning_rate": 9.17564687975647e-06, "loss": 2.0052, "step": 50650 }, { "epoch": 15.43, "learning_rate": 9.160426179604262e-06, "loss": 1.9705, "step": 50675 }, { "epoch": 15.43, "learning_rate": 9.145205479452055e-06, "loss": 1.9387, "step": 50700 }, { "epoch": 15.44, "learning_rate": 9.12998477929985e-06, "loss": 2.0054, "step": 50725 }, { "epoch": 15.45, "learning_rate": 9.11476407914764e-06, "loss": 1.947, "step": 50750 }, { "epoch": 15.46, "learning_rate": 9.099543378995435e-06, "loss": 1.9877, "step": 50775 }, { "epoch": 15.46, "learning_rate": 9.084322678843226e-06, "loss": 1.9343, "step": 50800 }, { "epoch": 15.47, "learning_rate": 9.06910197869102e-06, "loss": 1.977, "step": 50825 }, { "epoch": 15.48, "learning_rate": 9.053881278538814e-06, "loss": 2.0358, "step": 50850 }, { "epoch": 15.49, "learning_rate": 9.038660578386606e-06, "loss": 1.9384, "step": 50875 }, { "epoch": 15.49, "learning_rate": 9.0234398782344e-06, "loss": 1.9109, "step": 50900 }, { "epoch": 15.5, "learning_rate": 9.008219178082192e-06, "loss": 1.9731, "step": 50925 }, { "epoch": 15.51, "learning_rate": 8.992998477929985e-06, "loss": 1.9435, "step": 50950 }, { "epoch": 15.52, "learning_rate": 8.977777777777778e-06, "loss": 1.8879, "step": 50975 }, { "epoch": 15.53, "learning_rate": 8.96255707762557e-06, "loss": 1.9415, "step": 51000 }, { "epoch": 15.53, "learning_rate": 8.947336377473365e-06, "loss": 1.9765, "step": 51025 }, { "epoch": 15.54, "learning_rate": 8.932115677321156e-06, "loss": 1.9545, "step": 51050 }, { "epoch": 15.55, "learning_rate": 8.916894977168951e-06, "loss": 1.8787, "step": 51075 }, { "epoch": 15.56, "learning_rate": 8.901674277016744e-06, "loss": 1.9561, "step": 51100 }, { "epoch": 15.56, "learning_rate": 8.886453576864537e-06, "loss": 2.0651, "step": 51125 }, { "epoch": 15.57, "learning_rate": 8.87123287671233e-06, "loss": 1.8878, "step": 51150 }, { "epoch": 15.58, "learning_rate": 8.856012176560122e-06, "loss": 1.9929, "step": 51175 }, { "epoch": 15.59, "learning_rate": 8.840791476407915e-06, "loss": 1.9261, "step": 51200 }, { "epoch": 15.59, "learning_rate": 8.825570776255708e-06, "loss": 2.0192, "step": 51225 }, { "epoch": 15.6, "learning_rate": 8.8103500761035e-06, "loss": 1.9563, "step": 51250 }, { "epoch": 15.61, "learning_rate": 8.795129375951295e-06, "loss": 1.9667, "step": 51275 }, { "epoch": 15.62, "learning_rate": 8.779908675799086e-06, "loss": 1.9906, "step": 51300 }, { "epoch": 15.62, "learning_rate": 8.764687975646881e-06, "loss": 1.9746, "step": 51325 }, { "epoch": 15.63, "learning_rate": 8.749467275494674e-06, "loss": 1.9885, "step": 51350 }, { "epoch": 15.64, "learning_rate": 8.734246575342467e-06, "loss": 1.9304, "step": 51375 }, { "epoch": 15.65, "learning_rate": 8.71902587519026e-06, "loss": 1.9789, "step": 51400 }, { "epoch": 15.65, "learning_rate": 8.703805175038052e-06, "loss": 1.9598, "step": 51425 }, { "epoch": 15.66, "learning_rate": 8.688584474885845e-06, "loss": 1.9788, "step": 51450 }, { "epoch": 15.67, "learning_rate": 8.673363774733638e-06, "loss": 1.9393, "step": 51475 }, { "epoch": 15.68, "learning_rate": 8.658143074581431e-06, "loss": 1.933, "step": 51500 }, { "epoch": 15.68, "learning_rate": 8.642922374429224e-06, "loss": 1.9745, "step": 51525 }, { "epoch": 15.69, "learning_rate": 8.627701674277017e-06, "loss": 1.9297, "step": 51550 }, { "epoch": 15.7, "learning_rate": 8.612480974124811e-06, "loss": 1.8979, "step": 51575 }, { "epoch": 15.71, "learning_rate": 8.597260273972602e-06, "loss": 1.9345, "step": 51600 }, { "epoch": 15.72, "learning_rate": 8.582039573820397e-06, "loss": 1.9887, "step": 51625 }, { "epoch": 15.72, "learning_rate": 8.56681887366819e-06, "loss": 2.0398, "step": 51650 }, { "epoch": 15.73, "learning_rate": 8.551598173515982e-06, "loss": 1.9723, "step": 51675 }, { "epoch": 15.74, "learning_rate": 8.536377473363775e-06, "loss": 1.9411, "step": 51700 }, { "epoch": 15.75, "learning_rate": 8.521156773211568e-06, "loss": 1.91, "step": 51725 }, { "epoch": 15.75, "learning_rate": 8.505936073059361e-06, "loss": 1.9958, "step": 51750 }, { "epoch": 15.76, "learning_rate": 8.490715372907154e-06, "loss": 1.9466, "step": 51775 }, { "epoch": 15.77, "learning_rate": 8.475494672754947e-06, "loss": 1.8703, "step": 51800 }, { "epoch": 15.78, "learning_rate": 8.460273972602741e-06, "loss": 1.9106, "step": 51825 }, { "epoch": 15.78, "learning_rate": 8.445053272450532e-06, "loss": 1.9092, "step": 51850 }, { "epoch": 15.79, "learning_rate": 8.429832572298327e-06, "loss": 1.9051, "step": 51875 }, { "epoch": 15.8, "learning_rate": 8.41461187214612e-06, "loss": 1.9716, "step": 51900 }, { "epoch": 15.81, "learning_rate": 8.399391171993913e-06, "loss": 1.8972, "step": 51925 }, { "epoch": 15.81, "learning_rate": 8.384170471841705e-06, "loss": 2.0439, "step": 51950 }, { "epoch": 15.82, "learning_rate": 8.368949771689498e-06, "loss": 1.9814, "step": 51975 }, { "epoch": 15.83, "learning_rate": 8.353729071537291e-06, "loss": 1.9067, "step": 52000 }, { "epoch": 15.84, "learning_rate": 8.338508371385084e-06, "loss": 1.9205, "step": 52025 }, { "epoch": 15.84, "learning_rate": 8.323287671232877e-06, "loss": 1.9346, "step": 52050 }, { "epoch": 15.85, "learning_rate": 8.308066971080671e-06, "loss": 1.9421, "step": 52075 }, { "epoch": 15.86, "learning_rate": 8.292846270928462e-06, "loss": 1.8881, "step": 52100 }, { "epoch": 15.87, "learning_rate": 8.277625570776257e-06, "loss": 2.0199, "step": 52125 }, { "epoch": 15.88, "learning_rate": 8.26240487062405e-06, "loss": 1.8355, "step": 52150 }, { "epoch": 15.88, "learning_rate": 8.247184170471843e-06, "loss": 1.9857, "step": 52175 }, { "epoch": 15.89, "learning_rate": 8.231963470319635e-06, "loss": 1.9821, "step": 52200 }, { "epoch": 15.9, "learning_rate": 8.216742770167428e-06, "loss": 1.9458, "step": 52225 }, { "epoch": 15.91, "learning_rate": 8.201522070015221e-06, "loss": 1.9139, "step": 52250 }, { "epoch": 15.91, "learning_rate": 8.186301369863014e-06, "loss": 1.9435, "step": 52275 }, { "epoch": 15.92, "learning_rate": 8.171080669710807e-06, "loss": 2.0494, "step": 52300 }, { "epoch": 15.93, "learning_rate": 8.1558599695586e-06, "loss": 2.0148, "step": 52325 }, { "epoch": 15.94, "learning_rate": 8.140639269406393e-06, "loss": 1.9855, "step": 52350 }, { "epoch": 15.94, "learning_rate": 8.125418569254187e-06, "loss": 1.8551, "step": 52375 }, { "epoch": 15.95, "learning_rate": 8.110197869101978e-06, "loss": 2.0901, "step": 52400 }, { "epoch": 15.96, "learning_rate": 8.094977168949773e-06, "loss": 1.9605, "step": 52425 }, { "epoch": 15.97, "learning_rate": 8.079756468797566e-06, "loss": 2.009, "step": 52450 }, { "epoch": 15.97, "learning_rate": 8.064535768645358e-06, "loss": 1.9351, "step": 52475 }, { "epoch": 15.98, "learning_rate": 8.049315068493151e-06, "loss": 1.9546, "step": 52500 }, { "epoch": 15.99, "learning_rate": 8.034094368340944e-06, "loss": 1.9336, "step": 52525 }, { "epoch": 16.0, "learning_rate": 8.018873668188737e-06, "loss": 1.9764, "step": 52550 }, { "epoch": 16.0, "learning_rate": 8.00365296803653e-06, "loss": 1.9384, "step": 52575 }, { "epoch": 16.01, "learning_rate": 7.988432267884323e-06, "loss": 1.8792, "step": 52600 }, { "epoch": 16.02, "learning_rate": 7.973211567732117e-06, "loss": 1.8954, "step": 52625 }, { "epoch": 16.03, "learning_rate": 7.957990867579908e-06, "loss": 1.887, "step": 52650 }, { "epoch": 16.04, "learning_rate": 7.942770167427703e-06, "loss": 1.8549, "step": 52675 }, { "epoch": 16.04, "learning_rate": 7.927549467275496e-06, "loss": 2.0322, "step": 52700 }, { "epoch": 16.05, "learning_rate": 7.912328767123288e-06, "loss": 1.931, "step": 52725 }, { "epoch": 16.06, "learning_rate": 7.897108066971081e-06, "loss": 1.9956, "step": 52750 }, { "epoch": 16.07, "learning_rate": 7.881887366818874e-06, "loss": 1.9696, "step": 52775 }, { "epoch": 16.07, "learning_rate": 7.866666666666667e-06, "loss": 1.9958, "step": 52800 }, { "epoch": 16.08, "learning_rate": 7.85144596651446e-06, "loss": 1.9175, "step": 52825 }, { "epoch": 16.09, "learning_rate": 7.836225266362253e-06, "loss": 1.8408, "step": 52850 }, { "epoch": 16.1, "learning_rate": 7.821004566210047e-06, "loss": 1.8441, "step": 52875 }, { "epoch": 16.1, "learning_rate": 7.805783866057838e-06, "loss": 1.9698, "step": 52900 }, { "epoch": 16.11, "learning_rate": 7.790563165905633e-06, "loss": 1.8653, "step": 52925 }, { "epoch": 16.12, "learning_rate": 7.775342465753424e-06, "loss": 2.0227, "step": 52950 }, { "epoch": 16.13, "learning_rate": 7.760121765601219e-06, "loss": 1.9209, "step": 52975 }, { "epoch": 16.13, "learning_rate": 7.744901065449011e-06, "loss": 1.9751, "step": 53000 }, { "epoch": 16.14, "learning_rate": 7.729680365296804e-06, "loss": 1.9758, "step": 53025 }, { "epoch": 16.15, "learning_rate": 7.714459665144597e-06, "loss": 2.0452, "step": 53050 }, { "epoch": 16.16, "learning_rate": 7.69923896499239e-06, "loss": 1.9115, "step": 53075 }, { "epoch": 16.16, "learning_rate": 7.684018264840183e-06, "loss": 1.961, "step": 53100 }, { "epoch": 16.17, "learning_rate": 7.668797564687976e-06, "loss": 1.9015, "step": 53125 }, { "epoch": 16.18, "learning_rate": 7.653576864535768e-06, "loss": 1.9459, "step": 53150 }, { "epoch": 16.19, "learning_rate": 7.638356164383563e-06, "loss": 1.9937, "step": 53175 }, { "epoch": 16.19, "learning_rate": 7.623135464231355e-06, "loss": 1.9711, "step": 53200 }, { "epoch": 16.2, "learning_rate": 7.607914764079149e-06, "loss": 1.9129, "step": 53225 }, { "epoch": 16.21, "learning_rate": 7.592694063926941e-06, "loss": 1.9291, "step": 53250 }, { "epoch": 16.22, "learning_rate": 7.577473363774734e-06, "loss": 2.0245, "step": 53275 }, { "epoch": 16.23, "learning_rate": 7.562252663622527e-06, "loss": 1.9558, "step": 53300 }, { "epoch": 16.23, "learning_rate": 7.54703196347032e-06, "loss": 1.9083, "step": 53325 }, { "epoch": 16.24, "learning_rate": 7.531811263318113e-06, "loss": 1.9875, "step": 53350 }, { "epoch": 16.25, "learning_rate": 7.5165905631659066e-06, "loss": 1.9424, "step": 53375 }, { "epoch": 16.26, "learning_rate": 7.5013698630136986e-06, "loss": 1.9449, "step": 53400 }, { "epoch": 16.26, "learning_rate": 7.486149162861492e-06, "loss": 1.8812, "step": 53425 }, { "epoch": 16.27, "learning_rate": 7.470928462709285e-06, "loss": 1.9874, "step": 53450 }, { "epoch": 16.28, "learning_rate": 7.455707762557079e-06, "loss": 2.0413, "step": 53475 }, { "epoch": 16.29, "learning_rate": 7.440487062404871e-06, "loss": 1.9994, "step": 53500 }, { "epoch": 16.29, "learning_rate": 7.4252663622526645e-06, "loss": 1.9185, "step": 53525 }, { "epoch": 16.3, "learning_rate": 7.4100456621004565e-06, "loss": 1.8973, "step": 53550 }, { "epoch": 16.31, "learning_rate": 7.39482496194825e-06, "loss": 1.9159, "step": 53575 }, { "epoch": 16.32, "learning_rate": 7.379604261796043e-06, "loss": 1.9243, "step": 53600 }, { "epoch": 16.32, "learning_rate": 7.364383561643837e-06, "loss": 1.9837, "step": 53625 }, { "epoch": 16.33, "learning_rate": 7.349162861491629e-06, "loss": 1.9124, "step": 53650 }, { "epoch": 16.34, "learning_rate": 7.333942161339422e-06, "loss": 1.9815, "step": 53675 }, { "epoch": 16.35, "learning_rate": 7.318721461187215e-06, "loss": 1.911, "step": 53700 }, { "epoch": 16.35, "learning_rate": 7.303500761035008e-06, "loss": 1.934, "step": 53725 }, { "epoch": 16.36, "learning_rate": 7.288280060882801e-06, "loss": 1.9157, "step": 53750 }, { "epoch": 16.37, "learning_rate": 7.2730593607305946e-06, "loss": 1.9445, "step": 53775 }, { "epoch": 16.38, "learning_rate": 7.2578386605783865e-06, "loss": 1.9126, "step": 53800 }, { "epoch": 16.39, "learning_rate": 7.24261796042618e-06, "loss": 1.9318, "step": 53825 }, { "epoch": 16.39, "learning_rate": 7.227397260273973e-06, "loss": 1.9675, "step": 53850 }, { "epoch": 16.4, "learning_rate": 7.212176560121767e-06, "loss": 2.0658, "step": 53875 }, { "epoch": 16.41, "learning_rate": 7.196955859969559e-06, "loss": 1.9834, "step": 53900 }, { "epoch": 16.42, "learning_rate": 7.1817351598173524e-06, "loss": 1.9381, "step": 53925 }, { "epoch": 16.42, "learning_rate": 7.1665144596651444e-06, "loss": 1.9349, "step": 53950 }, { "epoch": 16.43, "learning_rate": 7.151293759512938e-06, "loss": 1.981, "step": 53975 }, { "epoch": 16.44, "learning_rate": 7.136073059360731e-06, "loss": 1.9228, "step": 54000 }, { "epoch": 16.45, "learning_rate": 7.120852359208525e-06, "loss": 1.9982, "step": 54025 }, { "epoch": 16.45, "learning_rate": 7.105631659056317e-06, "loss": 1.9066, "step": 54050 }, { "epoch": 16.46, "learning_rate": 7.09041095890411e-06, "loss": 1.8969, "step": 54075 }, { "epoch": 16.47, "learning_rate": 7.075190258751903e-06, "loss": 1.9243, "step": 54100 }, { "epoch": 16.48, "learning_rate": 7.059969558599696e-06, "loss": 2.0025, "step": 54125 }, { "epoch": 16.48, "learning_rate": 7.044748858447489e-06, "loss": 1.9632, "step": 54150 }, { "epoch": 16.49, "learning_rate": 7.0295281582952825e-06, "loss": 1.9642, "step": 54175 }, { "epoch": 16.5, "learning_rate": 7.0143074581430745e-06, "loss": 1.8765, "step": 54200 }, { "epoch": 16.51, "learning_rate": 6.999086757990868e-06, "loss": 1.8405, "step": 54225 }, { "epoch": 16.51, "learning_rate": 6.983866057838661e-06, "loss": 1.9473, "step": 54250 }, { "epoch": 16.52, "learning_rate": 6.968645357686455e-06, "loss": 1.9706, "step": 54275 }, { "epoch": 16.53, "learning_rate": 6.953424657534247e-06, "loss": 1.9083, "step": 54300 }, { "epoch": 16.54, "learning_rate": 6.93820395738204e-06, "loss": 1.9204, "step": 54325 }, { "epoch": 16.54, "learning_rate": 6.922983257229832e-06, "loss": 1.9436, "step": 54350 }, { "epoch": 16.55, "learning_rate": 6.907762557077626e-06, "loss": 2.0207, "step": 54375 }, { "epoch": 16.56, "learning_rate": 6.892541856925419e-06, "loss": 1.961, "step": 54400 }, { "epoch": 16.57, "learning_rate": 6.877321156773213e-06, "loss": 1.8648, "step": 54425 }, { "epoch": 16.58, "learning_rate": 6.862100456621005e-06, "loss": 1.9194, "step": 54450 }, { "epoch": 16.58, "learning_rate": 6.846879756468798e-06, "loss": 1.9261, "step": 54475 }, { "epoch": 16.59, "learning_rate": 6.831659056316591e-06, "loss": 2.0225, "step": 54500 }, { "epoch": 16.6, "learning_rate": 6.816438356164384e-06, "loss": 1.9711, "step": 54525 }, { "epoch": 16.61, "learning_rate": 6.801217656012177e-06, "loss": 2.0096, "step": 54550 }, { "epoch": 16.61, "learning_rate": 6.7859969558599705e-06, "loss": 1.9443, "step": 54575 }, { "epoch": 16.62, "learning_rate": 6.7707762557077625e-06, "loss": 1.944, "step": 54600 }, { "epoch": 16.63, "learning_rate": 6.755555555555556e-06, "loss": 2.0549, "step": 54625 }, { "epoch": 16.64, "learning_rate": 6.740334855403349e-06, "loss": 1.9064, "step": 54650 }, { "epoch": 16.64, "learning_rate": 6.72572298325723e-06, "loss": 1.9491, "step": 54675 }, { "epoch": 16.65, "learning_rate": 6.710502283105024e-06, "loss": 1.9886, "step": 54700 }, { "epoch": 16.66, "learning_rate": 6.695281582952816e-06, "loss": 1.9576, "step": 54725 }, { "epoch": 16.67, "learning_rate": 6.68006088280061e-06, "loss": 1.921, "step": 54750 }, { "epoch": 16.67, "learning_rate": 6.6648401826484024e-06, "loss": 1.9427, "step": 54775 }, { "epoch": 16.68, "learning_rate": 6.649619482496196e-06, "loss": 1.9386, "step": 54800 }, { "epoch": 16.69, "learning_rate": 6.634398782343988e-06, "loss": 1.9491, "step": 54825 }, { "epoch": 16.7, "learning_rate": 6.619178082191782e-06, "loss": 1.9421, "step": 54850 }, { "epoch": 16.7, "learning_rate": 6.603957382039574e-06, "loss": 1.9423, "step": 54875 }, { "epoch": 16.71, "learning_rate": 6.5887366818873675e-06, "loss": 1.9382, "step": 54900 }, { "epoch": 16.72, "learning_rate": 6.57351598173516e-06, "loss": 1.9891, "step": 54925 }, { "epoch": 16.73, "learning_rate": 6.558295281582954e-06, "loss": 1.881, "step": 54950 }, { "epoch": 16.74, "learning_rate": 6.543074581430746e-06, "loss": 1.9859, "step": 54975 }, { "epoch": 16.74, "learning_rate": 6.52785388127854e-06, "loss": 1.8991, "step": 55000 }, { "epoch": 16.75, "learning_rate": 6.5126331811263325e-06, "loss": 1.9316, "step": 55025 }, { "epoch": 16.76, "learning_rate": 6.497412480974125e-06, "loss": 1.9139, "step": 55050 }, { "epoch": 16.77, "learning_rate": 6.482191780821918e-06, "loss": 1.8429, "step": 55075 }, { "epoch": 16.77, "learning_rate": 6.466971080669712e-06, "loss": 1.9872, "step": 55100 }, { "epoch": 16.78, "learning_rate": 6.451750380517504e-06, "loss": 1.8164, "step": 55125 }, { "epoch": 16.79, "learning_rate": 6.4365296803652976e-06, "loss": 1.9674, "step": 55150 }, { "epoch": 16.8, "learning_rate": 6.42130898021309e-06, "loss": 1.9245, "step": 55175 }, { "epoch": 16.8, "learning_rate": 6.406088280060884e-06, "loss": 1.9864, "step": 55200 }, { "epoch": 16.81, "learning_rate": 6.390867579908676e-06, "loss": 1.9186, "step": 55225 }, { "epoch": 16.82, "learning_rate": 6.37564687975647e-06, "loss": 1.9924, "step": 55250 }, { "epoch": 16.83, "learning_rate": 6.360426179604262e-06, "loss": 1.9816, "step": 55275 }, { "epoch": 16.83, "learning_rate": 6.3452054794520555e-06, "loss": 1.9403, "step": 55300 }, { "epoch": 16.84, "learning_rate": 6.3305936073059375e-06, "loss": 1.9687, "step": 55325 }, { "epoch": 16.85, "learning_rate": 6.3153729071537295e-06, "loss": 1.9345, "step": 55350 }, { "epoch": 16.86, "learning_rate": 6.300152207001523e-06, "loss": 2.0326, "step": 55375 }, { "epoch": 16.86, "learning_rate": 6.284931506849315e-06, "loss": 1.9578, "step": 55400 }, { "epoch": 16.87, "learning_rate": 6.269710806697109e-06, "loss": 1.8891, "step": 55425 }, { "epoch": 16.88, "learning_rate": 6.254490106544902e-06, "loss": 1.9388, "step": 55450 }, { "epoch": 16.89, "learning_rate": 6.239269406392695e-06, "loss": 1.8948, "step": 55475 }, { "epoch": 16.89, "learning_rate": 6.224048706240487e-06, "loss": 2.0185, "step": 55500 }, { "epoch": 16.9, "learning_rate": 6.208828006088281e-06, "loss": 2.0193, "step": 55525 }, { "epoch": 16.91, "learning_rate": 6.193607305936074e-06, "loss": 1.9999, "step": 55550 }, { "epoch": 16.92, "learning_rate": 6.178386605783867e-06, "loss": 1.9439, "step": 55575 }, { "epoch": 16.93, "learning_rate": 6.16316590563166e-06, "loss": 2.025, "step": 55600 }, { "epoch": 16.93, "learning_rate": 6.147945205479453e-06, "loss": 1.9343, "step": 55625 }, { "epoch": 16.94, "learning_rate": 6.132724505327245e-06, "loss": 1.982, "step": 55650 }, { "epoch": 16.95, "learning_rate": 6.117503805175039e-06, "loss": 1.9344, "step": 55675 }, { "epoch": 16.96, "learning_rate": 6.102283105022832e-06, "loss": 1.8833, "step": 55700 }, { "epoch": 16.96, "learning_rate": 6.087062404870625e-06, "loss": 1.9786, "step": 55725 }, { "epoch": 16.97, "learning_rate": 6.0718417047184175e-06, "loss": 1.8689, "step": 55750 }, { "epoch": 16.98, "learning_rate": 6.056621004566211e-06, "loss": 1.9735, "step": 55775 }, { "epoch": 16.99, "learning_rate": 6.041400304414003e-06, "loss": 1.9081, "step": 55800 }, { "epoch": 16.99, "learning_rate": 6.026179604261797e-06, "loss": 1.9836, "step": 55825 }, { "epoch": 17.0, "learning_rate": 6.01095890410959e-06, "loss": 1.9072, "step": 55850 }, { "epoch": 17.01, "learning_rate": 5.995738203957383e-06, "loss": 1.9328, "step": 55875 }, { "epoch": 17.02, "learning_rate": 5.980517503805175e-06, "loss": 1.8841, "step": 55900 }, { "epoch": 17.02, "learning_rate": 5.965296803652969e-06, "loss": 1.8813, "step": 55925 }, { "epoch": 17.03, "learning_rate": 5.950076103500762e-06, "loss": 1.8905, "step": 55950 }, { "epoch": 17.04, "learning_rate": 5.934855403348555e-06, "loss": 1.9598, "step": 55975 }, { "epoch": 17.05, "learning_rate": 5.9196347031963476e-06, "loss": 1.9439, "step": 56000 }, { "epoch": 17.05, "learning_rate": 5.904414003044141e-06, "loss": 1.8998, "step": 56025 }, { "epoch": 17.06, "learning_rate": 5.889193302891933e-06, "loss": 1.8348, "step": 56050 }, { "epoch": 17.07, "learning_rate": 5.873972602739727e-06, "loss": 1.8576, "step": 56075 }, { "epoch": 17.08, "learning_rate": 5.85875190258752e-06, "loss": 1.9066, "step": 56100 }, { "epoch": 17.09, "learning_rate": 5.843531202435313e-06, "loss": 1.9717, "step": 56125 }, { "epoch": 17.09, "learning_rate": 5.8283105022831055e-06, "loss": 1.9157, "step": 56150 }, { "epoch": 17.1, "learning_rate": 5.813089802130899e-06, "loss": 1.9095, "step": 56175 }, { "epoch": 17.11, "learning_rate": 5.797869101978691e-06, "loss": 1.9038, "step": 56200 }, { "epoch": 17.12, "learning_rate": 5.782648401826485e-06, "loss": 1.9096, "step": 56225 }, { "epoch": 17.12, "learning_rate": 5.767427701674278e-06, "loss": 1.9724, "step": 56250 }, { "epoch": 17.13, "learning_rate": 5.752207001522071e-06, "loss": 1.9575, "step": 56275 }, { "epoch": 17.14, "learning_rate": 5.736986301369863e-06, "loss": 1.9643, "step": 56300 }, { "epoch": 17.15, "learning_rate": 5.721765601217657e-06, "loss": 1.9695, "step": 56325 }, { "epoch": 17.15, "learning_rate": 5.70654490106545e-06, "loss": 1.9695, "step": 56350 }, { "epoch": 17.16, "learning_rate": 5.691324200913243e-06, "loss": 1.8842, "step": 56375 }, { "epoch": 17.17, "learning_rate": 5.6761035007610355e-06, "loss": 1.9068, "step": 56400 }, { "epoch": 17.18, "learning_rate": 5.660882800608829e-06, "loss": 1.8677, "step": 56425 }, { "epoch": 17.18, "learning_rate": 5.645662100456621e-06, "loss": 1.8634, "step": 56450 }, { "epoch": 17.19, "learning_rate": 5.630441400304415e-06, "loss": 1.9824, "step": 56475 }, { "epoch": 17.2, "learning_rate": 5.615220700152208e-06, "loss": 1.9314, "step": 56500 }, { "epoch": 17.21, "learning_rate": 5.600000000000001e-06, "loss": 1.9477, "step": 56525 }, { "epoch": 17.21, "learning_rate": 5.5847792998477934e-06, "loss": 1.9666, "step": 56550 }, { "epoch": 17.22, "learning_rate": 5.569558599695587e-06, "loss": 1.8937, "step": 56575 }, { "epoch": 17.23, "learning_rate": 5.554337899543379e-06, "loss": 1.9245, "step": 56600 }, { "epoch": 17.24, "learning_rate": 5.539117199391173e-06, "loss": 1.864, "step": 56625 }, { "epoch": 17.25, "learning_rate": 5.523896499238966e-06, "loss": 1.9335, "step": 56650 }, { "epoch": 17.25, "learning_rate": 5.508675799086759e-06, "loss": 1.9818, "step": 56675 }, { "epoch": 17.26, "learning_rate": 5.493455098934551e-06, "loss": 1.8967, "step": 56700 }, { "epoch": 17.27, "learning_rate": 5.478234398782345e-06, "loss": 1.8585, "step": 56725 }, { "epoch": 17.28, "learning_rate": 5.463013698630137e-06, "loss": 1.9773, "step": 56750 }, { "epoch": 17.28, "learning_rate": 5.447792998477931e-06, "loss": 1.9371, "step": 56775 }, { "epoch": 17.29, "learning_rate": 5.4325722983257235e-06, "loss": 1.9483, "step": 56800 }, { "epoch": 17.3, "learning_rate": 5.417351598173517e-06, "loss": 1.8756, "step": 56825 }, { "epoch": 17.31, "learning_rate": 5.402130898021309e-06, "loss": 1.7767, "step": 56850 }, { "epoch": 17.31, "learning_rate": 5.386910197869103e-06, "loss": 1.952, "step": 56875 }, { "epoch": 17.32, "learning_rate": 5.371689497716896e-06, "loss": 1.9177, "step": 56900 }, { "epoch": 17.33, "learning_rate": 5.3564687975646886e-06, "loss": 1.9437, "step": 56925 }, { "epoch": 17.34, "learning_rate": 5.341248097412481e-06, "loss": 1.9826, "step": 56950 }, { "epoch": 17.34, "learning_rate": 5.326027397260275e-06, "loss": 1.944, "step": 56975 }, { "epoch": 17.35, "learning_rate": 5.310806697108067e-06, "loss": 1.9131, "step": 57000 }, { "epoch": 17.36, "learning_rate": 5.295585996955861e-06, "loss": 1.917, "step": 57025 }, { "epoch": 17.37, "learning_rate": 5.280365296803654e-06, "loss": 1.9166, "step": 57050 }, { "epoch": 17.37, "learning_rate": 5.265144596651447e-06, "loss": 1.8458, "step": 57075 }, { "epoch": 17.38, "learning_rate": 5.249923896499239e-06, "loss": 1.9775, "step": 57100 }, { "epoch": 17.39, "learning_rate": 5.234703196347033e-06, "loss": 1.9948, "step": 57125 }, { "epoch": 17.4, "learning_rate": 5.219482496194825e-06, "loss": 1.9398, "step": 57150 }, { "epoch": 17.4, "learning_rate": 5.204261796042619e-06, "loss": 1.9874, "step": 57175 }, { "epoch": 17.41, "learning_rate": 5.1890410958904115e-06, "loss": 1.9431, "step": 57200 }, { "epoch": 17.42, "learning_rate": 5.1738203957382035e-06, "loss": 1.9826, "step": 57225 }, { "epoch": 17.43, "learning_rate": 5.158599695585997e-06, "loss": 1.8329, "step": 57250 }, { "epoch": 17.44, "learning_rate": 5.14337899543379e-06, "loss": 1.9459, "step": 57275 }, { "epoch": 17.44, "learning_rate": 5.128158295281584e-06, "loss": 1.8475, "step": 57300 }, { "epoch": 17.45, "learning_rate": 5.112937595129376e-06, "loss": 1.9633, "step": 57325 }, { "epoch": 17.46, "learning_rate": 5.097716894977169e-06, "loss": 1.879, "step": 57350 }, { "epoch": 17.47, "learning_rate": 5.082496194824962e-06, "loss": 1.9553, "step": 57375 }, { "epoch": 17.47, "learning_rate": 5.067275494672755e-06, "loss": 1.9861, "step": 57400 }, { "epoch": 17.48, "learning_rate": 5.052054794520548e-06, "loss": 1.9381, "step": 57425 }, { "epoch": 17.49, "learning_rate": 5.036834094368342e-06, "loss": 1.8685, "step": 57450 }, { "epoch": 17.5, "learning_rate": 5.021613394216134e-06, "loss": 1.8962, "step": 57475 }, { "epoch": 17.5, "learning_rate": 5.006392694063927e-06, "loss": 1.879, "step": 57500 }, { "epoch": 17.51, "learning_rate": 4.99117199391172e-06, "loss": 2.0326, "step": 57525 }, { "epoch": 17.52, "learning_rate": 4.975951293759513e-06, "loss": 1.9831, "step": 57550 }, { "epoch": 17.53, "learning_rate": 4.960730593607307e-06, "loss": 2.0538, "step": 57575 }, { "epoch": 17.53, "learning_rate": 4.9455098934550995e-06, "loss": 1.9686, "step": 57600 }, { "epoch": 17.54, "learning_rate": 4.930289193302892e-06, "loss": 1.9546, "step": 57625 }, { "epoch": 17.55, "learning_rate": 4.915068493150685e-06, "loss": 2.0175, "step": 57650 }, { "epoch": 17.56, "learning_rate": 4.899847792998478e-06, "loss": 1.9396, "step": 57675 }, { "epoch": 17.56, "learning_rate": 4.884627092846272e-06, "loss": 1.8997, "step": 57700 }, { "epoch": 17.57, "learning_rate": 4.8694063926940645e-06, "loss": 1.9828, "step": 57725 }, { "epoch": 17.58, "learning_rate": 4.854185692541857e-06, "loss": 2.0147, "step": 57750 }, { "epoch": 17.59, "learning_rate": 4.83896499238965e-06, "loss": 2.0053, "step": 57775 }, { "epoch": 17.6, "learning_rate": 4.823744292237443e-06, "loss": 1.9466, "step": 57800 }, { "epoch": 17.6, "learning_rate": 4.808523592085237e-06, "loss": 2.0544, "step": 57825 }, { "epoch": 17.61, "learning_rate": 4.793911719939118e-06, "loss": 2.0409, "step": 57850 }, { "epoch": 17.62, "learning_rate": 4.778691019786911e-06, "loss": 1.9398, "step": 57875 }, { "epoch": 17.63, "learning_rate": 4.763470319634704e-06, "loss": 1.961, "step": 57900 }, { "epoch": 17.63, "learning_rate": 4.7482496194824965e-06, "loss": 1.9101, "step": 57925 }, { "epoch": 17.64, "learning_rate": 4.73302891933029e-06, "loss": 1.9418, "step": 57950 }, { "epoch": 17.65, "learning_rate": 4.717808219178083e-06, "loss": 2.0025, "step": 57975 }, { "epoch": 17.66, "learning_rate": 4.702587519025876e-06, "loss": 1.8889, "step": 58000 }, { "epoch": 17.66, "learning_rate": 4.687366818873669e-06, "loss": 1.9201, "step": 58025 }, { "epoch": 17.67, "learning_rate": 4.6721461187214615e-06, "loss": 1.8512, "step": 58050 }, { "epoch": 17.68, "learning_rate": 4.656925418569254e-06, "loss": 1.9532, "step": 58075 }, { "epoch": 17.69, "learning_rate": 4.641704718417048e-06, "loss": 1.9218, "step": 58100 }, { "epoch": 17.69, "learning_rate": 4.626484018264841e-06, "loss": 1.8991, "step": 58125 }, { "epoch": 17.7, "learning_rate": 4.611263318112634e-06, "loss": 1.8098, "step": 58150 }, { "epoch": 17.71, "learning_rate": 4.5960426179604265e-06, "loss": 1.8689, "step": 58175 }, { "epoch": 17.72, "learning_rate": 4.580821917808219e-06, "loss": 1.9573, "step": 58200 }, { "epoch": 17.72, "learning_rate": 4.565601217656013e-06, "loss": 1.9308, "step": 58225 }, { "epoch": 17.73, "learning_rate": 4.550380517503806e-06, "loss": 1.9812, "step": 58250 }, { "epoch": 17.74, "learning_rate": 4.535159817351599e-06, "loss": 1.9183, "step": 58275 }, { "epoch": 17.75, "learning_rate": 4.519939117199392e-06, "loss": 1.9293, "step": 58300 }, { "epoch": 17.75, "learning_rate": 4.5047184170471844e-06, "loss": 1.8619, "step": 58325 }, { "epoch": 17.76, "learning_rate": 4.489497716894978e-06, "loss": 1.9365, "step": 58350 }, { "epoch": 17.77, "learning_rate": 4.474277016742771e-06, "loss": 1.961, "step": 58375 }, { "epoch": 17.78, "learning_rate": 4.459056316590564e-06, "loss": 1.9869, "step": 58400 }, { "epoch": 17.79, "learning_rate": 4.443835616438357e-06, "loss": 1.9984, "step": 58425 }, { "epoch": 17.79, "learning_rate": 4.4286149162861495e-06, "loss": 1.9806, "step": 58450 }, { "epoch": 17.8, "learning_rate": 4.413394216133942e-06, "loss": 1.9724, "step": 58475 }, { "epoch": 17.81, "learning_rate": 4.398173515981736e-06, "loss": 1.9639, "step": 58500 }, { "epoch": 17.82, "learning_rate": 4.382952815829529e-06, "loss": 2.0249, "step": 58525 }, { "epoch": 17.82, "learning_rate": 4.367732115677322e-06, "loss": 1.9433, "step": 58550 }, { "epoch": 17.83, "learning_rate": 4.3525114155251145e-06, "loss": 1.9502, "step": 58575 }, { "epoch": 17.84, "learning_rate": 4.337290715372907e-06, "loss": 1.996, "step": 58600 }, { "epoch": 17.85, "learning_rate": 4.322070015220701e-06, "loss": 1.9632, "step": 58625 }, { "epoch": 17.85, "learning_rate": 4.306849315068494e-06, "loss": 1.9467, "step": 58650 }, { "epoch": 17.86, "learning_rate": 4.291628614916287e-06, "loss": 1.915, "step": 58675 }, { "epoch": 17.87, "learning_rate": 4.2764079147640796e-06, "loss": 1.9257, "step": 58700 }, { "epoch": 17.88, "learning_rate": 4.261187214611872e-06, "loss": 1.9646, "step": 58725 }, { "epoch": 17.88, "learning_rate": 4.245966514459665e-06, "loss": 1.9155, "step": 58750 }, { "epoch": 17.89, "learning_rate": 4.230745814307459e-06, "loss": 1.8798, "step": 58775 }, { "epoch": 17.9, "learning_rate": 4.215525114155252e-06, "loss": 1.9897, "step": 58800 }, { "epoch": 17.91, "learning_rate": 4.200304414003045e-06, "loss": 1.9645, "step": 58825 }, { "epoch": 17.91, "learning_rate": 4.1850837138508375e-06, "loss": 1.9438, "step": 58850 }, { "epoch": 17.92, "learning_rate": 4.16986301369863e-06, "loss": 1.986, "step": 58875 }, { "epoch": 17.93, "learning_rate": 4.154642313546424e-06, "loss": 1.9541, "step": 58900 }, { "epoch": 17.94, "learning_rate": 4.139421613394217e-06, "loss": 1.9109, "step": 58925 }, { "epoch": 17.95, "learning_rate": 4.12420091324201e-06, "loss": 1.9129, "step": 58950 }, { "epoch": 17.95, "learning_rate": 4.1089802130898025e-06, "loss": 1.9034, "step": 58975 }, { "epoch": 17.96, "learning_rate": 4.093759512937595e-06, "loss": 2.0131, "step": 59000 }, { "epoch": 17.97, "learning_rate": 4.078538812785389e-06, "loss": 1.8497, "step": 59025 }, { "epoch": 17.98, "learning_rate": 4.063318112633182e-06, "loss": 1.9809, "step": 59050 }, { "epoch": 17.98, "learning_rate": 4.048097412480975e-06, "loss": 1.9217, "step": 59075 }, { "epoch": 17.99, "learning_rate": 4.0328767123287676e-06, "loss": 1.8929, "step": 59100 }, { "epoch": 18.0, "learning_rate": 4.01765601217656e-06, "loss": 1.9193, "step": 59125 }, { "epoch": 18.01, "learning_rate": 4.002435312024353e-06, "loss": 1.9315, "step": 59150 }, { "epoch": 18.01, "learning_rate": 3.987214611872147e-06, "loss": 1.8903, "step": 59175 }, { "epoch": 18.02, "learning_rate": 3.97199391171994e-06, "loss": 1.9688, "step": 59200 }, { "epoch": 18.03, "learning_rate": 3.956773211567733e-06, "loss": 1.9036, "step": 59225 }, { "epoch": 18.04, "learning_rate": 3.9415525114155254e-06, "loss": 1.9615, "step": 59250 }, { "epoch": 18.04, "learning_rate": 3.926331811263318e-06, "loss": 1.8522, "step": 59275 }, { "epoch": 18.05, "learning_rate": 3.911111111111112e-06, "loss": 1.9418, "step": 59300 }, { "epoch": 18.06, "learning_rate": 3.895890410958905e-06, "loss": 1.8732, "step": 59325 }, { "epoch": 18.07, "learning_rate": 3.880669710806698e-06, "loss": 1.9244, "step": 59350 }, { "epoch": 18.07, "learning_rate": 3.8654490106544905e-06, "loss": 1.9065, "step": 59375 }, { "epoch": 18.08, "learning_rate": 3.850228310502283e-06, "loss": 1.9452, "step": 59400 }, { "epoch": 18.09, "learning_rate": 3.835007610350077e-06, "loss": 1.9752, "step": 59425 }, { "epoch": 18.1, "learning_rate": 3.81978691019787e-06, "loss": 1.9594, "step": 59450 }, { "epoch": 18.11, "learning_rate": 3.8045662100456627e-06, "loss": 1.9121, "step": 59475 }, { "epoch": 18.11, "learning_rate": 3.7893455098934555e-06, "loss": 1.975, "step": 59500 }, { "epoch": 18.12, "learning_rate": 3.774124809741249e-06, "loss": 1.8229, "step": 59525 }, { "epoch": 18.13, "learning_rate": 3.7589041095890416e-06, "loss": 1.9183, "step": 59550 }, { "epoch": 18.14, "learning_rate": 3.7436834094368345e-06, "loss": 1.824, "step": 59575 }, { "epoch": 18.14, "learning_rate": 3.7284627092846277e-06, "loss": 1.9862, "step": 59600 }, { "epoch": 18.15, "learning_rate": 3.7132420091324206e-06, "loss": 1.9525, "step": 59625 }, { "epoch": 18.16, "learning_rate": 3.6980213089802134e-06, "loss": 1.9417, "step": 59650 }, { "epoch": 18.17, "learning_rate": 3.6828006088280067e-06, "loss": 1.9509, "step": 59675 }, { "epoch": 18.17, "learning_rate": 3.6675799086757995e-06, "loss": 1.9492, "step": 59700 }, { "epoch": 18.18, "learning_rate": 3.652359208523593e-06, "loss": 1.947, "step": 59725 }, { "epoch": 18.19, "learning_rate": 3.6371385083713856e-06, "loss": 1.8975, "step": 59750 }, { "epoch": 18.2, "learning_rate": 3.6219178082191785e-06, "loss": 1.8844, "step": 59775 }, { "epoch": 18.2, "learning_rate": 3.6066971080669717e-06, "loss": 1.9304, "step": 59800 }, { "epoch": 18.21, "learning_rate": 3.5914764079147646e-06, "loss": 2.0124, "step": 59825 }, { "epoch": 18.22, "learning_rate": 3.5762557077625574e-06, "loss": 1.876, "step": 59850 }, { "epoch": 18.23, "learning_rate": 3.5610350076103507e-06, "loss": 1.893, "step": 59875 }, { "epoch": 18.23, "learning_rate": 3.5458143074581435e-06, "loss": 1.9703, "step": 59900 }, { "epoch": 18.24, "learning_rate": 3.5305936073059368e-06, "loss": 1.9334, "step": 59925 }, { "epoch": 18.25, "learning_rate": 3.5153729071537296e-06, "loss": 1.9834, "step": 59950 }, { "epoch": 18.26, "learning_rate": 3.5001522070015225e-06, "loss": 1.868, "step": 59975 }, { "epoch": 18.26, "learning_rate": 3.4849315068493157e-06, "loss": 1.9806, "step": 60000 }, { "epoch": 18.27, "learning_rate": 3.4697108066971086e-06, "loss": 1.9378, "step": 60025 }, { "epoch": 18.28, "learning_rate": 3.4544901065449014e-06, "loss": 1.8521, "step": 60050 }, { "epoch": 18.29, "learning_rate": 3.4392694063926947e-06, "loss": 1.9511, "step": 60075 }, { "epoch": 18.3, "learning_rate": 3.4240487062404875e-06, "loss": 1.8839, "step": 60100 }, { "epoch": 18.3, "learning_rate": 3.4088280060882808e-06, "loss": 1.954, "step": 60125 }, { "epoch": 18.31, "learning_rate": 3.3936073059360736e-06, "loss": 1.9526, "step": 60150 }, { "epoch": 18.32, "learning_rate": 3.3783866057838664e-06, "loss": 1.8578, "step": 60175 }, { "epoch": 18.33, "learning_rate": 3.3631659056316597e-06, "loss": 1.9272, "step": 60200 }, { "epoch": 18.33, "learning_rate": 3.3479452054794526e-06, "loss": 1.8606, "step": 60225 }, { "epoch": 18.34, "learning_rate": 3.3327245053272454e-06, "loss": 1.8964, "step": 60250 }, { "epoch": 18.35, "learning_rate": 3.3175038051750387e-06, "loss": 1.9205, "step": 60275 }, { "epoch": 18.36, "learning_rate": 3.3022831050228315e-06, "loss": 1.8762, "step": 60300 }, { "epoch": 18.36, "learning_rate": 3.2870624048706248e-06, "loss": 1.93, "step": 60325 }, { "epoch": 18.37, "learning_rate": 3.2718417047184176e-06, "loss": 2.0007, "step": 60350 }, { "epoch": 18.38, "learning_rate": 3.2566210045662104e-06, "loss": 1.935, "step": 60375 }, { "epoch": 18.39, "learning_rate": 3.2414003044140037e-06, "loss": 1.9675, "step": 60400 }, { "epoch": 18.39, "learning_rate": 3.2261796042617965e-06, "loss": 2.0031, "step": 60425 }, { "epoch": 18.4, "learning_rate": 3.2109589041095894e-06, "loss": 1.9624, "step": 60450 }, { "epoch": 18.41, "learning_rate": 3.1957382039573826e-06, "loss": 1.9126, "step": 60475 }, { "epoch": 18.42, "learning_rate": 3.1805175038051755e-06, "loss": 1.9268, "step": 60500 }, { "epoch": 18.42, "learning_rate": 3.1652968036529687e-06, "loss": 1.8126, "step": 60525 }, { "epoch": 18.43, "learning_rate": 3.1500761035007616e-06, "loss": 1.9809, "step": 60550 }, { "epoch": 18.44, "learning_rate": 3.1348554033485544e-06, "loss": 1.8864, "step": 60575 }, { "epoch": 18.45, "learning_rate": 3.1196347031963477e-06, "loss": 1.9743, "step": 60600 }, { "epoch": 18.46, "learning_rate": 3.1044140030441405e-06, "loss": 2.0185, "step": 60625 }, { "epoch": 18.46, "learning_rate": 3.0891933028919334e-06, "loss": 1.9321, "step": 60650 }, { "epoch": 18.47, "learning_rate": 3.0739726027397266e-06, "loss": 1.9233, "step": 60675 }, { "epoch": 18.48, "learning_rate": 3.0587519025875195e-06, "loss": 2.0277, "step": 60700 }, { "epoch": 18.49, "learning_rate": 3.0435312024353123e-06, "loss": 1.98, "step": 60725 }, { "epoch": 18.49, "learning_rate": 3.0283105022831056e-06, "loss": 1.9194, "step": 60750 }, { "epoch": 18.5, "learning_rate": 3.0130898021308984e-06, "loss": 1.9477, "step": 60775 }, { "epoch": 18.51, "learning_rate": 2.9978691019786917e-06, "loss": 1.881, "step": 60800 }, { "epoch": 18.52, "learning_rate": 2.9826484018264845e-06, "loss": 1.9989, "step": 60825 }, { "epoch": 18.52, "learning_rate": 2.9680365296803653e-06, "loss": 1.941, "step": 60850 }, { "epoch": 18.53, "learning_rate": 2.9528158295281586e-06, "loss": 1.9003, "step": 60875 }, { "epoch": 18.54, "learning_rate": 2.9375951293759514e-06, "loss": 1.8749, "step": 60900 }, { "epoch": 18.55, "learning_rate": 2.9223744292237442e-06, "loss": 1.8517, "step": 60925 }, { "epoch": 18.55, "learning_rate": 2.9071537290715375e-06, "loss": 1.9296, "step": 60950 }, { "epoch": 18.56, "learning_rate": 2.8919330289193303e-06, "loss": 1.8825, "step": 60975 }, { "epoch": 18.57, "learning_rate": 2.876712328767123e-06, "loss": 1.9291, "step": 61000 }, { "epoch": 18.58, "learning_rate": 2.8614916286149164e-06, "loss": 1.9779, "step": 61025 }, { "epoch": 18.58, "learning_rate": 2.8462709284627093e-06, "loss": 1.9288, "step": 61050 }, { "epoch": 18.59, "learning_rate": 2.8310502283105025e-06, "loss": 1.9261, "step": 61075 }, { "epoch": 18.6, "learning_rate": 2.8158295281582954e-06, "loss": 1.939, "step": 61100 }, { "epoch": 18.61, "learning_rate": 2.8006088280060882e-06, "loss": 2.0491, "step": 61125 }, { "epoch": 18.61, "learning_rate": 2.7853881278538815e-06, "loss": 1.9455, "step": 61150 }, { "epoch": 18.62, "learning_rate": 2.7701674277016743e-06, "loss": 1.913, "step": 61175 }, { "epoch": 18.63, "learning_rate": 2.754946727549467e-06, "loss": 1.9188, "step": 61200 }, { "epoch": 18.64, "learning_rate": 2.7397260273972604e-06, "loss": 1.9576, "step": 61225 }, { "epoch": 18.65, "learning_rate": 2.7245053272450533e-06, "loss": 1.8853, "step": 61250 }, { "epoch": 18.65, "learning_rate": 2.7092846270928465e-06, "loss": 1.9414, "step": 61275 }, { "epoch": 18.66, "learning_rate": 2.6940639269406394e-06, "loss": 1.8567, "step": 61300 }, { "epoch": 18.67, "learning_rate": 2.6788432267884322e-06, "loss": 1.9412, "step": 61325 }, { "epoch": 18.68, "learning_rate": 2.6636225266362255e-06, "loss": 1.9092, "step": 61350 }, { "epoch": 18.68, "learning_rate": 2.6484018264840183e-06, "loss": 1.9909, "step": 61375 }, { "epoch": 18.69, "learning_rate": 2.633181126331811e-06, "loss": 1.9695, "step": 61400 }, { "epoch": 18.7, "learning_rate": 2.6179604261796044e-06, "loss": 1.9589, "step": 61425 }, { "epoch": 18.71, "learning_rate": 2.6027397260273973e-06, "loss": 1.9191, "step": 61450 }, { "epoch": 18.71, "learning_rate": 2.5875190258751905e-06, "loss": 1.893, "step": 61475 }, { "epoch": 18.72, "learning_rate": 2.5722983257229834e-06, "loss": 1.9381, "step": 61500 }, { "epoch": 18.73, "learning_rate": 2.557077625570776e-06, "loss": 1.9433, "step": 61525 }, { "epoch": 18.74, "learning_rate": 2.5418569254185695e-06, "loss": 1.8897, "step": 61550 }, { "epoch": 18.74, "learning_rate": 2.5266362252663623e-06, "loss": 1.977, "step": 61575 }, { "epoch": 18.75, "learning_rate": 2.511415525114155e-06, "loss": 1.8949, "step": 61600 }, { "epoch": 18.76, "learning_rate": 2.4961948249619484e-06, "loss": 1.9551, "step": 61625 }, { "epoch": 18.77, "learning_rate": 2.4809741248097413e-06, "loss": 1.8667, "step": 61650 }, { "epoch": 18.77, "learning_rate": 2.4657534246575345e-06, "loss": 1.8624, "step": 61675 }, { "epoch": 18.78, "learning_rate": 2.4505327245053274e-06, "loss": 1.9746, "step": 61700 }, { "epoch": 18.79, "learning_rate": 2.43531202435312e-06, "loss": 1.9408, "step": 61725 }, { "epoch": 18.8, "learning_rate": 2.4200913242009135e-06, "loss": 1.9824, "step": 61750 }, { "epoch": 18.81, "learning_rate": 2.4048706240487063e-06, "loss": 1.8111, "step": 61775 }, { "epoch": 18.81, "learning_rate": 2.389649923896499e-06, "loss": 2.0112, "step": 61800 }, { "epoch": 18.82, "learning_rate": 2.3744292237442924e-06, "loss": 1.8705, "step": 61825 }, { "epoch": 18.83, "learning_rate": 2.3592085235920852e-06, "loss": 1.9911, "step": 61850 }, { "epoch": 18.84, "learning_rate": 2.343987823439878e-06, "loss": 1.9958, "step": 61875 }, { "epoch": 18.84, "learning_rate": 2.3287671232876713e-06, "loss": 1.9827, "step": 61900 }, { "epoch": 18.85, "learning_rate": 2.313546423135464e-06, "loss": 1.8441, "step": 61925 }, { "epoch": 18.86, "learning_rate": 2.2983257229832575e-06, "loss": 1.9146, "step": 61950 }, { "epoch": 18.87, "learning_rate": 2.2831050228310503e-06, "loss": 1.9028, "step": 61975 }, { "epoch": 18.87, "learning_rate": 2.267884322678843e-06, "loss": 1.9987, "step": 62000 }, { "epoch": 18.88, "learning_rate": 2.2526636225266364e-06, "loss": 1.8399, "step": 62025 }, { "epoch": 18.89, "learning_rate": 2.2374429223744292e-06, "loss": 1.9116, "step": 62050 }, { "epoch": 18.9, "learning_rate": 2.222222222222222e-06, "loss": 1.9387, "step": 62075 }, { "epoch": 18.9, "learning_rate": 2.2070015220700153e-06, "loss": 1.9983, "step": 62100 }, { "epoch": 18.91, "learning_rate": 2.191780821917808e-06, "loss": 1.9453, "step": 62125 }, { "epoch": 18.92, "learning_rate": 2.1765601217656014e-06, "loss": 1.952, "step": 62150 }, { "epoch": 18.93, "learning_rate": 2.1613394216133943e-06, "loss": 1.9109, "step": 62175 }, { "epoch": 18.93, "learning_rate": 2.146118721461187e-06, "loss": 1.8966, "step": 62200 }, { "epoch": 18.94, "learning_rate": 2.1308980213089804e-06, "loss": 1.9123, "step": 62225 }, { "epoch": 18.95, "learning_rate": 2.1156773211567732e-06, "loss": 1.8517, "step": 62250 }, { "epoch": 18.96, "learning_rate": 2.100456621004566e-06, "loss": 1.9353, "step": 62275 }, { "epoch": 18.96, "learning_rate": 2.0852359208523593e-06, "loss": 1.8875, "step": 62300 }, { "epoch": 18.97, "learning_rate": 2.070015220700152e-06, "loss": 1.9581, "step": 62325 }, { "epoch": 18.98, "learning_rate": 2.0547945205479454e-06, "loss": 1.925, "step": 62350 }, { "epoch": 18.99, "learning_rate": 2.0395738203957383e-06, "loss": 1.9363, "step": 62375 }, { "epoch": 19.0, "learning_rate": 2.024353120243531e-06, "loss": 1.9514, "step": 62400 }, { "epoch": 19.0, "learning_rate": 2.0091324200913244e-06, "loss": 1.8421, "step": 62425 }, { "epoch": 19.01, "learning_rate": 1.9939117199391172e-06, "loss": 2.0036, "step": 62450 }, { "epoch": 19.02, "learning_rate": 1.97869101978691e-06, "loss": 1.8492, "step": 62475 }, { "epoch": 19.03, "learning_rate": 1.9634703196347033e-06, "loss": 1.8921, "step": 62500 }, { "epoch": 19.03, "learning_rate": 1.948249619482496e-06, "loss": 1.9476, "step": 62525 }, { "epoch": 19.04, "learning_rate": 1.9330289193302894e-06, "loss": 1.8792, "step": 62550 }, { "epoch": 19.05, "learning_rate": 1.9178082191780823e-06, "loss": 1.8571, "step": 62575 }, { "epoch": 19.06, "learning_rate": 1.9025875190258753e-06, "loss": 1.9357, "step": 62600 }, { "epoch": 19.06, "learning_rate": 1.8873668188736682e-06, "loss": 1.8947, "step": 62625 }, { "epoch": 19.07, "learning_rate": 1.8721461187214612e-06, "loss": 1.8989, "step": 62650 }, { "epoch": 19.08, "learning_rate": 1.8569254185692543e-06, "loss": 1.997, "step": 62675 }, { "epoch": 19.09, "learning_rate": 1.8417047184170473e-06, "loss": 1.9384, "step": 62700 }, { "epoch": 19.09, "learning_rate": 1.8264840182648401e-06, "loss": 1.8858, "step": 62725 }, { "epoch": 19.1, "learning_rate": 1.8112633181126332e-06, "loss": 1.8676, "step": 62750 }, { "epoch": 19.11, "learning_rate": 1.7960426179604263e-06, "loss": 1.9975, "step": 62775 }, { "epoch": 19.12, "learning_rate": 1.7808219178082193e-06, "loss": 1.9733, "step": 62800 }, { "epoch": 19.12, "learning_rate": 1.7656012176560121e-06, "loss": 1.9497, "step": 62825 }, { "epoch": 19.13, "learning_rate": 1.7503805175038052e-06, "loss": 1.8918, "step": 62850 }, { "epoch": 19.14, "learning_rate": 1.7351598173515982e-06, "loss": 1.9333, "step": 62875 }, { "epoch": 19.15, "learning_rate": 1.7199391171993913e-06, "loss": 1.9743, "step": 62900 }, { "epoch": 19.16, "learning_rate": 1.7047184170471841e-06, "loss": 1.8864, "step": 62925 }, { "epoch": 19.16, "learning_rate": 1.6894977168949772e-06, "loss": 1.9328, "step": 62950 }, { "epoch": 19.17, "learning_rate": 1.6742770167427702e-06, "loss": 1.9335, "step": 62975 }, { "epoch": 19.18, "learning_rate": 1.6596651445966516e-06, "loss": 1.9752, "step": 63000 }, { "epoch": 19.19, "learning_rate": 1.6444444444444447e-06, "loss": 1.9317, "step": 63025 }, { "epoch": 19.19, "learning_rate": 1.6292237442922375e-06, "loss": 1.9205, "step": 63050 }, { "epoch": 19.2, "learning_rate": 1.6140030441400306e-06, "loss": 1.9616, "step": 63075 }, { "epoch": 19.21, "learning_rate": 1.5987823439878236e-06, "loss": 2.047, "step": 63100 }, { "epoch": 19.22, "learning_rate": 1.5835616438356167e-06, "loss": 1.8741, "step": 63125 }, { "epoch": 19.22, "learning_rate": 1.5683409436834095e-06, "loss": 1.9207, "step": 63150 }, { "epoch": 19.23, "learning_rate": 1.5531202435312026e-06, "loss": 1.8698, "step": 63175 }, { "epoch": 19.24, "learning_rate": 1.5378995433789956e-06, "loss": 2.013, "step": 63200 }, { "epoch": 19.25, "learning_rate": 1.5226788432267887e-06, "loss": 1.8767, "step": 63225 }, { "epoch": 19.25, "learning_rate": 1.5074581430745815e-06, "loss": 1.9414, "step": 63250 }, { "epoch": 19.26, "learning_rate": 1.4922374429223746e-06, "loss": 2.025, "step": 63275 }, { "epoch": 19.27, "learning_rate": 1.4770167427701676e-06, "loss": 1.9642, "step": 63300 }, { "epoch": 19.28, "learning_rate": 1.4617960426179607e-06, "loss": 1.9146, "step": 63325 }, { "epoch": 19.28, "learning_rate": 1.4465753424657535e-06, "loss": 1.8681, "step": 63350 }, { "epoch": 19.29, "learning_rate": 1.4313546423135466e-06, "loss": 1.8663, "step": 63375 }, { "epoch": 19.3, "learning_rate": 1.4161339421613396e-06, "loss": 1.9674, "step": 63400 }, { "epoch": 19.31, "learning_rate": 1.4009132420091327e-06, "loss": 1.9506, "step": 63425 }, { "epoch": 19.32, "learning_rate": 1.3856925418569255e-06, "loss": 1.9306, "step": 63450 }, { "epoch": 19.32, "learning_rate": 1.3704718417047186e-06, "loss": 1.9052, "step": 63475 }, { "epoch": 19.33, "learning_rate": 1.3552511415525116e-06, "loss": 1.9244, "step": 63500 }, { "epoch": 19.34, "learning_rate": 1.3400304414003047e-06, "loss": 1.9635, "step": 63525 }, { "epoch": 19.35, "learning_rate": 1.3248097412480975e-06, "loss": 1.9551, "step": 63550 }, { "epoch": 19.35, "learning_rate": 1.3095890410958906e-06, "loss": 1.9475, "step": 63575 }, { "epoch": 19.36, "learning_rate": 1.2943683409436836e-06, "loss": 1.9417, "step": 63600 }, { "epoch": 19.37, "learning_rate": 1.2791476407914767e-06, "loss": 1.9187, "step": 63625 }, { "epoch": 19.38, "learning_rate": 1.2639269406392695e-06, "loss": 2.0175, "step": 63650 }, { "epoch": 19.38, "learning_rate": 1.2487062404870626e-06, "loss": 1.9486, "step": 63675 }, { "epoch": 19.39, "learning_rate": 1.2334855403348556e-06, "loss": 1.8935, "step": 63700 }, { "epoch": 19.4, "learning_rate": 1.2182648401826487e-06, "loss": 1.9383, "step": 63725 }, { "epoch": 19.41, "learning_rate": 1.2030441400304415e-06, "loss": 1.9645, "step": 63750 }, { "epoch": 19.41, "learning_rate": 1.1878234398782346e-06, "loss": 1.9119, "step": 63775 }, { "epoch": 19.42, "learning_rate": 1.1726027397260276e-06, "loss": 1.8621, "step": 63800 }, { "epoch": 19.43, "learning_rate": 1.1573820395738207e-06, "loss": 1.7967, "step": 63825 }, { "epoch": 19.44, "learning_rate": 1.1421613394216135e-06, "loss": 1.9894, "step": 63850 }, { "epoch": 19.44, "learning_rate": 1.1269406392694063e-06, "loss": 1.8729, "step": 63875 }, { "epoch": 19.45, "learning_rate": 1.1117199391171994e-06, "loss": 1.9467, "step": 63900 }, { "epoch": 19.46, "learning_rate": 1.0964992389649924e-06, "loss": 1.9585, "step": 63925 }, { "epoch": 19.47, "learning_rate": 1.0812785388127855e-06, "loss": 1.9316, "step": 63950 }, { "epoch": 19.47, "learning_rate": 1.0660578386605783e-06, "loss": 1.8996, "step": 63975 }, { "epoch": 19.48, "learning_rate": 1.0508371385083714e-06, "loss": 1.8568, "step": 64000 }, { "epoch": 19.49, "learning_rate": 1.0356164383561644e-06, "loss": 1.924, "step": 64025 }, { "epoch": 19.5, "learning_rate": 1.0203957382039575e-06, "loss": 1.8947, "step": 64050 }, { "epoch": 19.51, "learning_rate": 1.0051750380517503e-06, "loss": 2.0517, "step": 64075 }, { "epoch": 19.51, "learning_rate": 9.899543378995434e-07, "loss": 2.0314, "step": 64100 }, { "epoch": 19.52, "learning_rate": 9.747336377473364e-07, "loss": 1.8407, "step": 64125 }, { "epoch": 19.53, "learning_rate": 9.595129375951295e-07, "loss": 2.0457, "step": 64150 }, { "epoch": 19.54, "learning_rate": 9.442922374429224e-07, "loss": 1.9252, "step": 64175 }, { "epoch": 19.54, "learning_rate": 9.290715372907154e-07, "loss": 1.9663, "step": 64200 }, { "epoch": 19.55, "learning_rate": 9.138508371385084e-07, "loss": 2.0035, "step": 64225 }, { "epoch": 19.56, "learning_rate": 8.986301369863014e-07, "loss": 1.8369, "step": 64250 }, { "epoch": 19.57, "learning_rate": 8.834094368340944e-07, "loss": 1.89, "step": 64275 }, { "epoch": 19.57, "learning_rate": 8.681887366818874e-07, "loss": 1.813, "step": 64300 }, { "epoch": 19.58, "learning_rate": 8.529680365296804e-07, "loss": 1.9395, "step": 64325 }, { "epoch": 19.59, "learning_rate": 8.377473363774734e-07, "loss": 1.9558, "step": 64350 }, { "epoch": 19.6, "learning_rate": 8.225266362252664e-07, "loss": 1.9236, "step": 64375 }, { "epoch": 19.6, "learning_rate": 8.073059360730594e-07, "loss": 1.8632, "step": 64400 }, { "epoch": 19.61, "learning_rate": 7.920852359208524e-07, "loss": 1.8458, "step": 64425 }, { "epoch": 19.62, "learning_rate": 7.768645357686454e-07, "loss": 1.8997, "step": 64450 }, { "epoch": 19.63, "learning_rate": 7.616438356164384e-07, "loss": 1.9626, "step": 64475 }, { "epoch": 19.63, "learning_rate": 7.464231354642314e-07, "loss": 1.8967, "step": 64500 }, { "epoch": 19.64, "learning_rate": 7.312024353120244e-07, "loss": 1.961, "step": 64525 }, { "epoch": 19.65, "learning_rate": 7.159817351598174e-07, "loss": 1.8946, "step": 64550 }, { "epoch": 19.66, "learning_rate": 7.007610350076104e-07, "loss": 1.9476, "step": 64575 }, { "epoch": 19.67, "learning_rate": 6.855403348554034e-07, "loss": 1.9353, "step": 64600 }, { "epoch": 19.67, "learning_rate": 6.703196347031964e-07, "loss": 1.9945, "step": 64625 }, { "epoch": 19.68, "learning_rate": 6.550989345509894e-07, "loss": 1.9124, "step": 64650 }, { "epoch": 19.69, "learning_rate": 6.398782343987824e-07, "loss": 2.0121, "step": 64675 }, { "epoch": 19.7, "learning_rate": 6.246575342465755e-07, "loss": 1.9495, "step": 64700 }, { "epoch": 19.7, "learning_rate": 6.094368340943684e-07, "loss": 1.9243, "step": 64725 }, { "epoch": 19.71, "learning_rate": 5.942161339421615e-07, "loss": 1.9793, "step": 64750 }, { "epoch": 19.72, "learning_rate": 5.789954337899544e-07, "loss": 1.8846, "step": 64775 }, { "epoch": 19.73, "learning_rate": 5.637747336377475e-07, "loss": 1.8872, "step": 64800 }, { "epoch": 19.73, "learning_rate": 5.485540334855404e-07, "loss": 1.9736, "step": 64825 }, { "epoch": 19.74, "learning_rate": 5.333333333333335e-07, "loss": 1.8818, "step": 64850 }, { "epoch": 19.75, "learning_rate": 5.181126331811263e-07, "loss": 1.9747, "step": 64875 }, { "epoch": 19.76, "learning_rate": 5.028919330289193e-07, "loss": 1.9198, "step": 64900 }, { "epoch": 19.76, "learning_rate": 4.876712328767123e-07, "loss": 1.8793, "step": 64925 }, { "epoch": 19.77, "learning_rate": 4.7245053272450534e-07, "loss": 1.9494, "step": 64950 }, { "epoch": 19.78, "learning_rate": 4.5722983257229834e-07, "loss": 1.9567, "step": 64975 }, { "epoch": 19.79, "learning_rate": 4.4200913242009134e-07, "loss": 1.8885, "step": 65000 }, { "epoch": 19.79, "learning_rate": 4.2678843226788434e-07, "loss": 1.946, "step": 65025 }, { "epoch": 19.8, "learning_rate": 4.1156773211567733e-07, "loss": 1.9554, "step": 65050 }, { "epoch": 19.81, "learning_rate": 3.9634703196347033e-07, "loss": 1.8447, "step": 65075 }, { "epoch": 19.82, "learning_rate": 3.8112633181126333e-07, "loss": 1.9562, "step": 65100 }, { "epoch": 19.82, "learning_rate": 3.6590563165905633e-07, "loss": 1.9531, "step": 65125 }, { "epoch": 19.83, "learning_rate": 3.5068493150684933e-07, "loss": 1.9695, "step": 65150 }, { "epoch": 19.84, "learning_rate": 3.3546423135464233e-07, "loss": 1.9478, "step": 65175 }, { "epoch": 19.85, "learning_rate": 3.2024353120243533e-07, "loss": 2.0431, "step": 65200 }, { "epoch": 19.86, "learning_rate": 3.050228310502283e-07, "loss": 1.8231, "step": 65225 }, { "epoch": 19.86, "learning_rate": 2.898021308980213e-07, "loss": 1.8677, "step": 65250 }, { "epoch": 19.87, "learning_rate": 2.745814307458143e-07, "loss": 2.0174, "step": 65275 }, { "epoch": 19.88, "learning_rate": 2.593607305936073e-07, "loss": 1.9269, "step": 65300 }, { "epoch": 19.89, "learning_rate": 2.441400304414003e-07, "loss": 1.8943, "step": 65325 }, { "epoch": 19.89, "learning_rate": 2.2891933028919335e-07, "loss": 1.9661, "step": 65350 }, { "epoch": 19.9, "learning_rate": 2.1369863013698635e-07, "loss": 1.9616, "step": 65375 }, { "epoch": 19.91, "learning_rate": 1.984779299847793e-07, "loss": 1.8594, "step": 65400 }, { "epoch": 19.92, "learning_rate": 1.832572298325723e-07, "loss": 1.8842, "step": 65425 }, { "epoch": 19.92, "learning_rate": 1.680365296803653e-07, "loss": 2.032, "step": 65450 }, { "epoch": 19.93, "learning_rate": 1.5281582952815831e-07, "loss": 1.8597, "step": 65475 }, { "epoch": 19.94, "learning_rate": 1.375951293759513e-07, "loss": 1.9084, "step": 65500 }, { "epoch": 19.95, "learning_rate": 1.223744292237443e-07, "loss": 1.8235, "step": 65525 }, { "epoch": 19.95, "learning_rate": 1.0715372907153731e-07, "loss": 1.8212, "step": 65550 }, { "epoch": 19.96, "learning_rate": 9.193302891933031e-08, "loss": 1.8892, "step": 65575 }, { "epoch": 19.97, "learning_rate": 7.67123287671233e-08, "loss": 1.9391, "step": 65600 }, { "epoch": 19.98, "learning_rate": 6.149162861491629e-08, "loss": 1.922, "step": 65625 }, { "epoch": 19.98, "learning_rate": 4.687975646879757e-08, "loss": 1.8743, "step": 65650 }, { "epoch": 19.99, "learning_rate": 3.1659056316590565e-08, "loss": 1.8869, "step": 65675 }, { "epoch": 20.0, "learning_rate": 1.6438356164383564e-08, "loss": 2.0741, "step": 65700 }, { "epoch": 20.0, "step": 65700, "total_flos": 1.138795714927657e+18, "train_loss": 2.126255558368096, "train_runtime": 26711.3051, "train_samples_per_second": 14.755, "train_steps_per_second": 2.46 } ], "max_steps": 65700, "num_train_epochs": 20, "total_flos": 1.138795714927657e+18, "trial_name": null, "trial_params": null }