{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 212490, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.9882347404583746e-05, "loss": 2.9231, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.9764694809167496e-05, "loss": 2.1627, "step": 1000 }, { "epoch": 0.02, "learning_rate": 4.964704221375124e-05, "loss": 1.8699, "step": 1500 }, { "epoch": 0.03, "learning_rate": 4.9529389618334984e-05, "loss": 1.7267, "step": 2000 }, { "epoch": 0.04, "learning_rate": 4.941173702291873e-05, "loss": 1.6276, "step": 2500 }, { "epoch": 0.04, "learning_rate": 4.929408442750247e-05, "loss": 1.5576, "step": 3000 }, { "epoch": 0.05, "learning_rate": 4.9176431832086214e-05, "loss": 1.5107, "step": 3500 }, { "epoch": 0.06, "learning_rate": 4.905877923666996e-05, "loss": 1.4644, "step": 4000 }, { "epoch": 0.06, "learning_rate": 4.894112664125371e-05, "loss": 1.4061, "step": 4500 }, { "epoch": 0.07, "learning_rate": 4.882347404583745e-05, "loss": 1.3747, "step": 5000 }, { "epoch": 0.08, "learning_rate": 4.8705821450421196e-05, "loss": 1.3601, "step": 5500 }, { "epoch": 0.08, "learning_rate": 4.8588168855004946e-05, "loss": 1.3315, "step": 6000 }, { "epoch": 0.09, "learning_rate": 4.847051625958869e-05, "loss": 1.2922, "step": 6500 }, { "epoch": 0.1, "learning_rate": 4.835286366417243e-05, "loss": 1.2699, "step": 7000 }, { "epoch": 0.11, "learning_rate": 4.8235211068756184e-05, "loss": 1.2293, "step": 7500 }, { "epoch": 0.11, "learning_rate": 4.811755847333993e-05, "loss": 1.227, "step": 8000 }, { "epoch": 0.12, "learning_rate": 4.799990587792367e-05, "loss": 1.2126, "step": 8500 }, { "epoch": 0.13, "learning_rate": 4.7882253282507415e-05, "loss": 1.1798, "step": 9000 }, { "epoch": 0.13, "learning_rate": 4.776460068709116e-05, "loss": 1.1901, "step": 9500 }, { "epoch": 0.14, "learning_rate": 4.76469480916749e-05, "loss": 1.1668, "step": 10000 }, { "epoch": 0.15, "learning_rate": 4.752929549625865e-05, "loss": 1.1431, "step": 10500 }, { "epoch": 0.16, "learning_rate": 4.7411642900842396e-05, "loss": 1.1699, "step": 11000 }, { "epoch": 0.16, "learning_rate": 4.729399030542614e-05, "loss": 1.1271, "step": 11500 }, { "epoch": 0.17, "learning_rate": 4.717633771000988e-05, "loss": 1.1251, "step": 12000 }, { "epoch": 0.18, "learning_rate": 4.7058685114593633e-05, "loss": 1.0963, "step": 12500 }, { "epoch": 0.18, "learning_rate": 4.694103251917738e-05, "loss": 1.1141, "step": 13000 }, { "epoch": 0.19, "learning_rate": 4.682337992376112e-05, "loss": 1.0974, "step": 13500 }, { "epoch": 0.2, "learning_rate": 4.6705727328344864e-05, "loss": 1.0829, "step": 14000 }, { "epoch": 0.2, "learning_rate": 4.658807473292861e-05, "loss": 1.0893, "step": 14500 }, { "epoch": 0.21, "learning_rate": 4.647042213751235e-05, "loss": 1.0835, "step": 15000 }, { "epoch": 0.22, "learning_rate": 4.63527695420961e-05, "loss": 1.0583, "step": 15500 }, { "epoch": 0.23, "learning_rate": 4.6235116946679846e-05, "loss": 1.0708, "step": 16000 }, { "epoch": 0.23, "learning_rate": 4.611746435126359e-05, "loss": 1.0695, "step": 16500 }, { "epoch": 0.24, "learning_rate": 4.599981175584734e-05, "loss": 1.0373, "step": 17000 }, { "epoch": 0.25, "learning_rate": 4.588215916043108e-05, "loss": 1.02, "step": 17500 }, { "epoch": 0.25, "learning_rate": 4.576450656501483e-05, "loss": 1.0366, "step": 18000 }, { "epoch": 0.26, "learning_rate": 4.564685396959858e-05, "loss": 1.0232, "step": 18500 }, { "epoch": 0.27, "learning_rate": 4.552920137418232e-05, "loss": 0.9946, "step": 19000 }, { "epoch": 0.28, "learning_rate": 4.541154877876606e-05, "loss": 0.9895, "step": 19500 }, { "epoch": 0.28, "learning_rate": 4.52938961833498e-05, "loss": 1.0122, "step": 20000 }, { "epoch": 0.29, "learning_rate": 4.517624358793355e-05, "loss": 1.0048, "step": 20500 }, { "epoch": 0.3, "learning_rate": 4.5058590992517295e-05, "loss": 0.9837, "step": 21000 }, { "epoch": 0.3, "learning_rate": 4.494093839710104e-05, "loss": 0.9891, "step": 21500 }, { "epoch": 0.31, "learning_rate": 4.482328580168479e-05, "loss": 0.9838, "step": 22000 }, { "epoch": 0.32, "learning_rate": 4.470563320626853e-05, "loss": 0.9881, "step": 22500 }, { "epoch": 0.32, "learning_rate": 4.4587980610852277e-05, "loss": 0.9387, "step": 23000 }, { "epoch": 0.33, "learning_rate": 4.447032801543603e-05, "loss": 0.9741, "step": 23500 }, { "epoch": 0.34, "learning_rate": 4.435267542001977e-05, "loss": 0.9668, "step": 24000 }, { "epoch": 0.35, "learning_rate": 4.4235022824603514e-05, "loss": 0.9816, "step": 24500 }, { "epoch": 0.35, "learning_rate": 4.411737022918726e-05, "loss": 0.9479, "step": 25000 }, { "epoch": 0.36, "learning_rate": 4.3999717633771e-05, "loss": 0.9568, "step": 25500 }, { "epoch": 0.37, "learning_rate": 4.3882065038354745e-05, "loss": 0.9476, "step": 26000 }, { "epoch": 0.37, "learning_rate": 4.3764412442938495e-05, "loss": 0.9533, "step": 26500 }, { "epoch": 0.38, "learning_rate": 4.364675984752224e-05, "loss": 0.9282, "step": 27000 }, { "epoch": 0.39, "learning_rate": 4.352910725210598e-05, "loss": 0.9454, "step": 27500 }, { "epoch": 0.4, "learning_rate": 4.3411454656689726e-05, "loss": 0.9418, "step": 28000 }, { "epoch": 0.4, "learning_rate": 4.329380206127348e-05, "loss": 0.9214, "step": 28500 }, { "epoch": 0.41, "learning_rate": 4.317614946585722e-05, "loss": 0.9193, "step": 29000 }, { "epoch": 0.42, "learning_rate": 4.3058496870440964e-05, "loss": 0.8905, "step": 29500 }, { "epoch": 0.42, "learning_rate": 4.294084427502471e-05, "loss": 0.9192, "step": 30000 }, { "epoch": 0.43, "learning_rate": 4.282319167960845e-05, "loss": 0.9106, "step": 30500 }, { "epoch": 0.44, "learning_rate": 4.2705539084192195e-05, "loss": 0.8986, "step": 31000 }, { "epoch": 0.44, "learning_rate": 4.2587886488775945e-05, "loss": 0.9049, "step": 31500 }, { "epoch": 0.45, "learning_rate": 4.247023389335969e-05, "loss": 0.8943, "step": 32000 }, { "epoch": 0.46, "learning_rate": 4.235258129794343e-05, "loss": 0.8912, "step": 32500 }, { "epoch": 0.47, "learning_rate": 4.223492870252718e-05, "loss": 0.8812, "step": 33000 }, { "epoch": 0.47, "learning_rate": 4.2117276107110926e-05, "loss": 0.8942, "step": 33500 }, { "epoch": 0.48, "learning_rate": 4.199962351169467e-05, "loss": 0.8956, "step": 34000 }, { "epoch": 0.49, "learning_rate": 4.1881970916278414e-05, "loss": 0.8793, "step": 34500 }, { "epoch": 0.49, "learning_rate": 4.1764318320862164e-05, "loss": 0.8793, "step": 35000 }, { "epoch": 0.5, "learning_rate": 4.164666572544591e-05, "loss": 0.8847, "step": 35500 }, { "epoch": 0.51, "learning_rate": 4.152901313002965e-05, "loss": 0.886, "step": 36000 }, { "epoch": 0.52, "learning_rate": 4.1411360534613395e-05, "loss": 0.8677, "step": 36500 }, { "epoch": 0.52, "learning_rate": 4.129370793919714e-05, "loss": 0.8751, "step": 37000 }, { "epoch": 0.53, "learning_rate": 4.117605534378088e-05, "loss": 0.8672, "step": 37500 }, { "epoch": 0.54, "learning_rate": 4.105840274836463e-05, "loss": 0.8774, "step": 38000 }, { "epoch": 0.54, "learning_rate": 4.0940750152948376e-05, "loss": 0.8735, "step": 38500 }, { "epoch": 0.55, "learning_rate": 4.082309755753212e-05, "loss": 0.8639, "step": 39000 }, { "epoch": 0.56, "learning_rate": 4.070544496211587e-05, "loss": 0.8609, "step": 39500 }, { "epoch": 0.56, "learning_rate": 4.0587792366699614e-05, "loss": 0.8669, "step": 40000 }, { "epoch": 0.57, "learning_rate": 4.047013977128336e-05, "loss": 0.8662, "step": 40500 }, { "epoch": 0.58, "learning_rate": 4.03524871758671e-05, "loss": 0.8522, "step": 41000 }, { "epoch": 0.59, "learning_rate": 4.0234834580450845e-05, "loss": 0.8609, "step": 41500 }, { "epoch": 0.59, "learning_rate": 4.011718198503459e-05, "loss": 0.8459, "step": 42000 }, { "epoch": 0.6, "learning_rate": 3.999952938961833e-05, "loss": 0.8384, "step": 42500 }, { "epoch": 0.61, "learning_rate": 3.988187679420208e-05, "loss": 0.8335, "step": 43000 }, { "epoch": 0.61, "learning_rate": 3.9764224198785826e-05, "loss": 0.8567, "step": 43500 }, { "epoch": 0.62, "learning_rate": 3.964657160336957e-05, "loss": 0.8567, "step": 44000 }, { "epoch": 0.63, "learning_rate": 3.952891900795332e-05, "loss": 0.8256, "step": 44500 }, { "epoch": 0.64, "learning_rate": 3.9411266412537063e-05, "loss": 0.8289, "step": 45000 }, { "epoch": 0.64, "learning_rate": 3.929361381712081e-05, "loss": 0.8406, "step": 45500 }, { "epoch": 0.65, "learning_rate": 3.917596122170456e-05, "loss": 0.8277, "step": 46000 }, { "epoch": 0.66, "learning_rate": 3.90583086262883e-05, "loss": 0.807, "step": 46500 }, { "epoch": 0.66, "learning_rate": 3.8940656030872045e-05, "loss": 0.8062, "step": 47000 }, { "epoch": 0.67, "learning_rate": 3.882300343545579e-05, "loss": 0.825, "step": 47500 }, { "epoch": 0.68, "learning_rate": 3.870535084003953e-05, "loss": 0.8083, "step": 48000 }, { "epoch": 0.68, "learning_rate": 3.8587698244623275e-05, "loss": 0.8222, "step": 48500 }, { "epoch": 0.69, "learning_rate": 3.8470045649207026e-05, "loss": 0.8262, "step": 49000 }, { "epoch": 0.7, "learning_rate": 3.835239305379077e-05, "loss": 0.8231, "step": 49500 }, { "epoch": 0.71, "learning_rate": 3.823474045837451e-05, "loss": 0.8154, "step": 50000 }, { "epoch": 0.71, "learning_rate": 3.811708786295826e-05, "loss": 0.8021, "step": 50500 }, { "epoch": 0.72, "learning_rate": 3.799943526754201e-05, "loss": 0.8163, "step": 51000 }, { "epoch": 0.73, "learning_rate": 3.788178267212575e-05, "loss": 0.8021, "step": 51500 }, { "epoch": 0.73, "learning_rate": 3.7764130076709494e-05, "loss": 0.7975, "step": 52000 }, { "epoch": 0.74, "learning_rate": 3.764647748129324e-05, "loss": 0.7899, "step": 52500 }, { "epoch": 0.75, "learning_rate": 3.752882488587698e-05, "loss": 0.81, "step": 53000 }, { "epoch": 0.76, "learning_rate": 3.7411172290460725e-05, "loss": 0.8023, "step": 53500 }, { "epoch": 0.76, "learning_rate": 3.7293519695044476e-05, "loss": 0.7946, "step": 54000 }, { "epoch": 0.77, "learning_rate": 3.717586709962822e-05, "loss": 0.8181, "step": 54500 }, { "epoch": 0.78, "learning_rate": 3.705821450421196e-05, "loss": 0.8046, "step": 55000 }, { "epoch": 0.78, "learning_rate": 3.694056190879571e-05, "loss": 0.809, "step": 55500 }, { "epoch": 0.79, "learning_rate": 3.682290931337946e-05, "loss": 0.8072, "step": 56000 }, { "epoch": 0.8, "learning_rate": 3.67052567179632e-05, "loss": 0.7868, "step": 56500 }, { "epoch": 0.8, "learning_rate": 3.6587604122546944e-05, "loss": 0.7887, "step": 57000 }, { "epoch": 0.81, "learning_rate": 3.6469951527130694e-05, "loss": 0.7829, "step": 57500 }, { "epoch": 0.82, "learning_rate": 3.635229893171443e-05, "loss": 0.7685, "step": 58000 }, { "epoch": 0.83, "learning_rate": 3.6234646336298175e-05, "loss": 0.7888, "step": 58500 }, { "epoch": 0.83, "learning_rate": 3.6116993740881925e-05, "loss": 0.791, "step": 59000 }, { "epoch": 0.84, "learning_rate": 3.599934114546567e-05, "loss": 0.7961, "step": 59500 }, { "epoch": 0.85, "learning_rate": 3.588168855004941e-05, "loss": 0.7733, "step": 60000 }, { "epoch": 0.85, "learning_rate": 3.576403595463316e-05, "loss": 0.7765, "step": 60500 }, { "epoch": 0.86, "learning_rate": 3.5646383359216907e-05, "loss": 0.7793, "step": 61000 }, { "epoch": 0.87, "learning_rate": 3.552873076380065e-05, "loss": 0.7985, "step": 61500 }, { "epoch": 0.88, "learning_rate": 3.54110781683844e-05, "loss": 0.7671, "step": 62000 }, { "epoch": 0.88, "learning_rate": 3.5293425572968144e-05, "loss": 0.779, "step": 62500 }, { "epoch": 0.89, "learning_rate": 3.517577297755189e-05, "loss": 0.7888, "step": 63000 }, { "epoch": 0.9, "learning_rate": 3.505812038213563e-05, "loss": 0.7731, "step": 63500 }, { "epoch": 0.9, "learning_rate": 3.4940467786719375e-05, "loss": 0.783, "step": 64000 }, { "epoch": 0.91, "learning_rate": 3.482281519130312e-05, "loss": 0.7639, "step": 64500 }, { "epoch": 0.92, "learning_rate": 3.470516259588686e-05, "loss": 0.7659, "step": 65000 }, { "epoch": 0.92, "learning_rate": 3.458751000047061e-05, "loss": 0.7667, "step": 65500 }, { "epoch": 0.93, "learning_rate": 3.4469857405054356e-05, "loss": 0.7564, "step": 66000 }, { "epoch": 0.94, "learning_rate": 3.43522048096381e-05, "loss": 0.7697, "step": 66500 }, { "epoch": 0.95, "learning_rate": 3.423455221422185e-05, "loss": 0.765, "step": 67000 }, { "epoch": 0.95, "learning_rate": 3.4116899618805594e-05, "loss": 0.7538, "step": 67500 }, { "epoch": 0.96, "learning_rate": 3.399924702338934e-05, "loss": 0.7685, "step": 68000 }, { "epoch": 0.97, "learning_rate": 3.388159442797309e-05, "loss": 0.7635, "step": 68500 }, { "epoch": 0.97, "learning_rate": 3.3763941832556825e-05, "loss": 0.7547, "step": 69000 }, { "epoch": 0.98, "learning_rate": 3.364628923714057e-05, "loss": 0.7571, "step": 69500 }, { "epoch": 0.99, "learning_rate": 3.352863664172432e-05, "loss": 0.7492, "step": 70000 }, { "epoch": 1.0, "learning_rate": 3.341098404630806e-05, "loss": 0.7469, "step": 70500 }, { "epoch": 1.0, "learning_rate": 3.3293331450891806e-05, "loss": 0.7685, "step": 71000 }, { "epoch": 1.01, "learning_rate": 3.3175678855475556e-05, "loss": 0.7542, "step": 71500 }, { "epoch": 1.02, "learning_rate": 3.30580262600593e-05, "loss": 0.7503, "step": 72000 }, { "epoch": 1.02, "learning_rate": 3.2940373664643044e-05, "loss": 0.7487, "step": 72500 }, { "epoch": 1.03, "learning_rate": 3.282272106922679e-05, "loss": 0.7496, "step": 73000 }, { "epoch": 1.04, "learning_rate": 3.270506847381054e-05, "loss": 0.7461, "step": 73500 }, { "epoch": 1.04, "learning_rate": 3.258741587839428e-05, "loss": 0.7283, "step": 74000 }, { "epoch": 1.05, "learning_rate": 3.2469763282978025e-05, "loss": 0.7378, "step": 74500 }, { "epoch": 1.06, "learning_rate": 3.235211068756177e-05, "loss": 0.731, "step": 75000 }, { "epoch": 1.07, "learning_rate": 3.223445809214551e-05, "loss": 0.7312, "step": 75500 }, { "epoch": 1.07, "learning_rate": 3.2116805496729256e-05, "loss": 0.7316, "step": 76000 }, { "epoch": 1.08, "learning_rate": 3.1999152901313006e-05, "loss": 0.7235, "step": 76500 }, { "epoch": 1.09, "learning_rate": 3.188150030589675e-05, "loss": 0.736, "step": 77000 }, { "epoch": 1.09, "learning_rate": 3.176384771048049e-05, "loss": 0.7389, "step": 77500 }, { "epoch": 1.1, "learning_rate": 3.1646195115064244e-05, "loss": 0.7232, "step": 78000 }, { "epoch": 1.11, "learning_rate": 3.152854251964799e-05, "loss": 0.744, "step": 78500 }, { "epoch": 1.12, "learning_rate": 3.141088992423173e-05, "loss": 0.734, "step": 79000 }, { "epoch": 1.12, "learning_rate": 3.1293237328815475e-05, "loss": 0.7253, "step": 79500 }, { "epoch": 1.13, "learning_rate": 3.117558473339922e-05, "loss": 0.7222, "step": 80000 }, { "epoch": 1.14, "learning_rate": 3.105793213798296e-05, "loss": 0.7347, "step": 80500 }, { "epoch": 1.14, "learning_rate": 3.0940279542566705e-05, "loss": 0.7022, "step": 81000 }, { "epoch": 1.15, "learning_rate": 3.0822626947150456e-05, "loss": 0.7324, "step": 81500 }, { "epoch": 1.16, "learning_rate": 3.07049743517342e-05, "loss": 0.7156, "step": 82000 }, { "epoch": 1.16, "learning_rate": 3.058732175631794e-05, "loss": 0.7259, "step": 82500 }, { "epoch": 1.17, "learning_rate": 3.0469669160901693e-05, "loss": 0.7161, "step": 83000 }, { "epoch": 1.18, "learning_rate": 3.0352016565485437e-05, "loss": 0.7265, "step": 83500 }, { "epoch": 1.19, "learning_rate": 3.023436397006918e-05, "loss": 0.729, "step": 84000 }, { "epoch": 1.19, "learning_rate": 3.0116711374652928e-05, "loss": 0.7185, "step": 84500 }, { "epoch": 1.2, "learning_rate": 2.999905877923667e-05, "loss": 0.7164, "step": 85000 }, { "epoch": 1.21, "learning_rate": 2.9881406183820415e-05, "loss": 0.6989, "step": 85500 }, { "epoch": 1.21, "learning_rate": 2.9763753588404165e-05, "loss": 0.7204, "step": 86000 }, { "epoch": 1.22, "learning_rate": 2.964610099298791e-05, "loss": 0.7228, "step": 86500 }, { "epoch": 1.23, "learning_rate": 2.952844839757165e-05, "loss": 0.7076, "step": 87000 }, { "epoch": 1.24, "learning_rate": 2.9410795802155393e-05, "loss": 0.7198, "step": 87500 }, { "epoch": 1.24, "learning_rate": 2.9293143206739143e-05, "loss": 0.7035, "step": 88000 }, { "epoch": 1.25, "learning_rate": 2.9175490611322887e-05, "loss": 0.707, "step": 88500 }, { "epoch": 1.26, "learning_rate": 2.905783801590663e-05, "loss": 0.7194, "step": 89000 }, { "epoch": 1.26, "learning_rate": 2.8940185420490377e-05, "loss": 0.6997, "step": 89500 }, { "epoch": 1.27, "learning_rate": 2.882253282507412e-05, "loss": 0.714, "step": 90000 }, { "epoch": 1.28, "learning_rate": 2.8704880229657865e-05, "loss": 0.6986, "step": 90500 }, { "epoch": 1.28, "learning_rate": 2.8587227634241615e-05, "loss": 0.6982, "step": 91000 }, { "epoch": 1.29, "learning_rate": 2.846957503882536e-05, "loss": 0.6838, "step": 91500 }, { "epoch": 1.3, "learning_rate": 2.8351922443409102e-05, "loss": 0.711, "step": 92000 }, { "epoch": 1.31, "learning_rate": 2.823426984799285e-05, "loss": 0.7111, "step": 92500 }, { "epoch": 1.31, "learning_rate": 2.8116617252576593e-05, "loss": 0.7183, "step": 93000 }, { "epoch": 1.32, "learning_rate": 2.7998964657160337e-05, "loss": 0.7133, "step": 93500 }, { "epoch": 1.33, "learning_rate": 2.7881312061744087e-05, "loss": 0.717, "step": 94000 }, { "epoch": 1.33, "learning_rate": 2.776365946632783e-05, "loss": 0.7153, "step": 94500 }, { "epoch": 1.34, "learning_rate": 2.7646006870911574e-05, "loss": 0.7085, "step": 95000 }, { "epoch": 1.35, "learning_rate": 2.7528354275495318e-05, "loss": 0.7139, "step": 95500 }, { "epoch": 1.36, "learning_rate": 2.7410701680079065e-05, "loss": 0.6945, "step": 96000 }, { "epoch": 1.36, "learning_rate": 2.729304908466281e-05, "loss": 0.7022, "step": 96500 }, { "epoch": 1.37, "learning_rate": 2.7175396489246552e-05, "loss": 0.689, "step": 97000 }, { "epoch": 1.38, "learning_rate": 2.7057743893830302e-05, "loss": 0.6878, "step": 97500 }, { "epoch": 1.38, "learning_rate": 2.6940091298414043e-05, "loss": 0.6872, "step": 98000 }, { "epoch": 1.39, "learning_rate": 2.6822438702997786e-05, "loss": 0.6927, "step": 98500 }, { "epoch": 1.4, "learning_rate": 2.6704786107581537e-05, "loss": 0.6944, "step": 99000 }, { "epoch": 1.4, "learning_rate": 2.658713351216528e-05, "loss": 0.6956, "step": 99500 }, { "epoch": 1.41, "learning_rate": 2.6469480916749024e-05, "loss": 0.6888, "step": 100000 }, { "epoch": 1.42, "learning_rate": 2.635182832133277e-05, "loss": 0.6881, "step": 100500 }, { "epoch": 1.43, "learning_rate": 2.6234175725916514e-05, "loss": 0.6932, "step": 101000 }, { "epoch": 1.43, "learning_rate": 2.6116523130500258e-05, "loss": 0.691, "step": 101500 }, { "epoch": 1.44, "learning_rate": 2.599887053508401e-05, "loss": 0.6786, "step": 102000 }, { "epoch": 1.45, "learning_rate": 2.5881217939667752e-05, "loss": 0.6939, "step": 102500 }, { "epoch": 1.45, "learning_rate": 2.5763565344251496e-05, "loss": 0.6899, "step": 103000 }, { "epoch": 1.46, "learning_rate": 2.564591274883524e-05, "loss": 0.694, "step": 103500 }, { "epoch": 1.47, "learning_rate": 2.5528260153418986e-05, "loss": 0.6734, "step": 104000 }, { "epoch": 1.48, "learning_rate": 2.541060755800273e-05, "loss": 0.6901, "step": 104500 }, { "epoch": 1.48, "learning_rate": 2.5292954962586474e-05, "loss": 0.6726, "step": 105000 }, { "epoch": 1.49, "learning_rate": 2.5175302367170224e-05, "loss": 0.6855, "step": 105500 }, { "epoch": 1.5, "learning_rate": 2.5057649771753968e-05, "loss": 0.6883, "step": 106000 }, { "epoch": 1.5, "learning_rate": 2.493999717633771e-05, "loss": 0.6785, "step": 106500 }, { "epoch": 1.51, "learning_rate": 2.4822344580921455e-05, "loss": 0.6753, "step": 107000 }, { "epoch": 1.52, "learning_rate": 2.4704691985505202e-05, "loss": 0.6728, "step": 107500 }, { "epoch": 1.52, "learning_rate": 2.458703939008895e-05, "loss": 0.6804, "step": 108000 }, { "epoch": 1.53, "learning_rate": 2.4469386794672692e-05, "loss": 0.6792, "step": 108500 }, { "epoch": 1.54, "learning_rate": 2.4351734199256436e-05, "loss": 0.6722, "step": 109000 }, { "epoch": 1.55, "learning_rate": 2.423408160384018e-05, "loss": 0.6629, "step": 109500 }, { "epoch": 1.55, "learning_rate": 2.4116429008423927e-05, "loss": 0.664, "step": 110000 }, { "epoch": 1.56, "learning_rate": 2.3998776413007674e-05, "loss": 0.6612, "step": 110500 }, { "epoch": 1.57, "learning_rate": 2.3881123817591417e-05, "loss": 0.6662, "step": 111000 }, { "epoch": 1.57, "learning_rate": 2.3763471222175164e-05, "loss": 0.6791, "step": 111500 }, { "epoch": 1.58, "learning_rate": 2.3645818626758908e-05, "loss": 0.6658, "step": 112000 }, { "epoch": 1.59, "learning_rate": 2.352816603134265e-05, "loss": 0.6676, "step": 112500 }, { "epoch": 1.6, "learning_rate": 2.34105134359264e-05, "loss": 0.6798, "step": 113000 }, { "epoch": 1.6, "learning_rate": 2.3292860840510142e-05, "loss": 0.6786, "step": 113500 }, { "epoch": 1.61, "learning_rate": 2.317520824509389e-05, "loss": 0.661, "step": 114000 }, { "epoch": 1.62, "learning_rate": 2.3057555649677633e-05, "loss": 0.6662, "step": 114500 }, { "epoch": 1.62, "learning_rate": 2.2939903054261376e-05, "loss": 0.6649, "step": 115000 }, { "epoch": 1.63, "learning_rate": 2.2822250458845123e-05, "loss": 0.657, "step": 115500 }, { "epoch": 1.64, "learning_rate": 2.2704597863428867e-05, "loss": 0.6664, "step": 116000 }, { "epoch": 1.64, "learning_rate": 2.2586945268012614e-05, "loss": 0.6765, "step": 116500 }, { "epoch": 1.65, "learning_rate": 2.2469292672596358e-05, "loss": 0.664, "step": 117000 }, { "epoch": 1.66, "learning_rate": 2.23516400771801e-05, "loss": 0.6664, "step": 117500 }, { "epoch": 1.67, "learning_rate": 2.2233987481763848e-05, "loss": 0.6532, "step": 118000 }, { "epoch": 1.67, "learning_rate": 2.2116334886347595e-05, "loss": 0.6687, "step": 118500 }, { "epoch": 1.68, "learning_rate": 2.199868229093134e-05, "loss": 0.6621, "step": 119000 }, { "epoch": 1.69, "learning_rate": 2.1881029695515086e-05, "loss": 0.6671, "step": 119500 }, { "epoch": 1.69, "learning_rate": 2.176337710009883e-05, "loss": 0.6619, "step": 120000 }, { "epoch": 1.7, "learning_rate": 2.1645724504682573e-05, "loss": 0.6549, "step": 120500 }, { "epoch": 1.71, "learning_rate": 2.152807190926632e-05, "loss": 0.6668, "step": 121000 }, { "epoch": 1.72, "learning_rate": 2.1410419313850064e-05, "loss": 0.6439, "step": 121500 }, { "epoch": 1.72, "learning_rate": 2.129276671843381e-05, "loss": 0.6554, "step": 122000 }, { "epoch": 1.73, "learning_rate": 2.1175114123017554e-05, "loss": 0.6395, "step": 122500 }, { "epoch": 1.74, "learning_rate": 2.1057461527601298e-05, "loss": 0.6482, "step": 123000 }, { "epoch": 1.74, "learning_rate": 2.0939808932185045e-05, "loss": 0.6607, "step": 123500 }, { "epoch": 1.75, "learning_rate": 2.082215633676879e-05, "loss": 0.6508, "step": 124000 }, { "epoch": 1.76, "learning_rate": 2.0704503741352536e-05, "loss": 0.6438, "step": 124500 }, { "epoch": 1.76, "learning_rate": 2.0586851145936283e-05, "loss": 0.658, "step": 125000 }, { "epoch": 1.77, "learning_rate": 2.0469198550520026e-05, "loss": 0.6409, "step": 125500 }, { "epoch": 1.78, "learning_rate": 2.035154595510377e-05, "loss": 0.6439, "step": 126000 }, { "epoch": 1.79, "learning_rate": 2.0233893359687517e-05, "loss": 0.6674, "step": 126500 }, { "epoch": 1.79, "learning_rate": 2.011624076427126e-05, "loss": 0.66, "step": 127000 }, { "epoch": 1.8, "learning_rate": 1.9998588168855007e-05, "loss": 0.6576, "step": 127500 }, { "epoch": 1.81, "learning_rate": 1.988093557343875e-05, "loss": 0.6445, "step": 128000 }, { "epoch": 1.81, "learning_rate": 1.9763282978022495e-05, "loss": 0.6542, "step": 128500 }, { "epoch": 1.82, "learning_rate": 1.964563038260624e-05, "loss": 0.6412, "step": 129000 }, { "epoch": 1.83, "learning_rate": 1.9527977787189985e-05, "loss": 0.6496, "step": 129500 }, { "epoch": 1.84, "learning_rate": 1.9410325191773732e-05, "loss": 0.6423, "step": 130000 }, { "epoch": 1.84, "learning_rate": 1.929267259635748e-05, "loss": 0.6437, "step": 130500 }, { "epoch": 1.85, "learning_rate": 1.917502000094122e-05, "loss": 0.6467, "step": 131000 }, { "epoch": 1.86, "learning_rate": 1.9057367405524967e-05, "loss": 0.6481, "step": 131500 }, { "epoch": 1.86, "learning_rate": 1.893971481010871e-05, "loss": 0.6442, "step": 132000 }, { "epoch": 1.87, "learning_rate": 1.8822062214692457e-05, "loss": 0.6345, "step": 132500 }, { "epoch": 1.88, "learning_rate": 1.8704409619276204e-05, "loss": 0.642, "step": 133000 }, { "epoch": 1.88, "learning_rate": 1.8586757023859948e-05, "loss": 0.6373, "step": 133500 }, { "epoch": 1.89, "learning_rate": 1.846910442844369e-05, "loss": 0.6461, "step": 134000 }, { "epoch": 1.9, "learning_rate": 1.835145183302744e-05, "loss": 0.6429, "step": 134500 }, { "epoch": 1.91, "learning_rate": 1.8233799237611182e-05, "loss": 0.6389, "step": 135000 }, { "epoch": 1.91, "learning_rate": 1.811614664219493e-05, "loss": 0.6395, "step": 135500 }, { "epoch": 1.92, "learning_rate": 1.7998494046778673e-05, "loss": 0.6502, "step": 136000 }, { "epoch": 1.93, "learning_rate": 1.7880841451362416e-05, "loss": 0.6417, "step": 136500 }, { "epoch": 1.93, "learning_rate": 1.7763188855946163e-05, "loss": 0.6376, "step": 137000 }, { "epoch": 1.94, "learning_rate": 1.7645536260529907e-05, "loss": 0.6368, "step": 137500 }, { "epoch": 1.95, "learning_rate": 1.7527883665113654e-05, "loss": 0.638, "step": 138000 }, { "epoch": 1.96, "learning_rate": 1.74102310696974e-05, "loss": 0.6508, "step": 138500 }, { "epoch": 1.96, "learning_rate": 1.7292578474281145e-05, "loss": 0.6311, "step": 139000 }, { "epoch": 1.97, "learning_rate": 1.7174925878864888e-05, "loss": 0.641, "step": 139500 }, { "epoch": 1.98, "learning_rate": 1.7057273283448632e-05, "loss": 0.638, "step": 140000 }, { "epoch": 1.98, "learning_rate": 1.693962068803238e-05, "loss": 0.6176, "step": 140500 }, { "epoch": 1.99, "learning_rate": 1.6821968092616126e-05, "loss": 0.6346, "step": 141000 }, { "epoch": 2.0, "learning_rate": 1.670431549719987e-05, "loss": 0.6348, "step": 141500 }, { "epoch": 2.0, "learning_rate": 1.6586662901783613e-05, "loss": 0.6454, "step": 142000 }, { "epoch": 2.01, "learning_rate": 1.6469010306367357e-05, "loss": 0.6237, "step": 142500 }, { "epoch": 2.02, "learning_rate": 1.6351357710951104e-05, "loss": 0.622, "step": 143000 }, { "epoch": 2.03, "learning_rate": 1.623370511553485e-05, "loss": 0.6112, "step": 143500 }, { "epoch": 2.03, "learning_rate": 1.6116052520118594e-05, "loss": 0.6288, "step": 144000 }, { "epoch": 2.04, "learning_rate": 1.599839992470234e-05, "loss": 0.641, "step": 144500 }, { "epoch": 2.05, "learning_rate": 1.5880747329286085e-05, "loss": 0.6298, "step": 145000 }, { "epoch": 2.05, "learning_rate": 1.576309473386983e-05, "loss": 0.6144, "step": 145500 }, { "epoch": 2.06, "learning_rate": 1.5645442138453575e-05, "loss": 0.6261, "step": 146000 }, { "epoch": 2.07, "learning_rate": 1.552778954303732e-05, "loss": 0.6195, "step": 146500 }, { "epoch": 2.08, "learning_rate": 1.5410136947621066e-05, "loss": 0.6249, "step": 147000 }, { "epoch": 2.08, "learning_rate": 1.529248435220481e-05, "loss": 0.6395, "step": 147500 }, { "epoch": 2.09, "learning_rate": 1.5174831756788555e-05, "loss": 0.6273, "step": 148000 }, { "epoch": 2.1, "learning_rate": 1.50571791613723e-05, "loss": 0.6279, "step": 148500 }, { "epoch": 2.1, "learning_rate": 1.4939526565956047e-05, "loss": 0.6157, "step": 149000 }, { "epoch": 2.11, "learning_rate": 1.4821873970539791e-05, "loss": 0.6238, "step": 149500 }, { "epoch": 2.12, "learning_rate": 1.4704221375123536e-05, "loss": 0.6222, "step": 150000 }, { "epoch": 2.12, "learning_rate": 1.458656877970728e-05, "loss": 0.6292, "step": 150500 }, { "epoch": 2.13, "learning_rate": 1.4468916184291025e-05, "loss": 0.6159, "step": 151000 }, { "epoch": 2.14, "learning_rate": 1.4351263588874772e-05, "loss": 0.6197, "step": 151500 }, { "epoch": 2.15, "learning_rate": 1.4233610993458516e-05, "loss": 0.6247, "step": 152000 }, { "epoch": 2.15, "learning_rate": 1.4115958398042261e-05, "loss": 0.6152, "step": 152500 }, { "epoch": 2.16, "learning_rate": 1.3998305802626008e-05, "loss": 0.6279, "step": 153000 }, { "epoch": 2.17, "learning_rate": 1.3880653207209752e-05, "loss": 0.6107, "step": 153500 }, { "epoch": 2.17, "learning_rate": 1.3763000611793497e-05, "loss": 0.62, "step": 154000 }, { "epoch": 2.18, "learning_rate": 1.364534801637724e-05, "loss": 0.6009, "step": 154500 }, { "epoch": 2.19, "learning_rate": 1.3527695420960988e-05, "loss": 0.6217, "step": 155000 }, { "epoch": 2.2, "learning_rate": 1.3410042825544733e-05, "loss": 0.613, "step": 155500 }, { "epoch": 2.2, "learning_rate": 1.3292390230128477e-05, "loss": 0.6257, "step": 156000 }, { "epoch": 2.21, "learning_rate": 1.3174737634712222e-05, "loss": 0.6257, "step": 156500 }, { "epoch": 2.22, "learning_rate": 1.3057085039295969e-05, "loss": 0.6184, "step": 157000 }, { "epoch": 2.22, "learning_rate": 1.2939432443879713e-05, "loss": 0.6118, "step": 157500 }, { "epoch": 2.23, "learning_rate": 1.2821779848463458e-05, "loss": 0.5928, "step": 158000 }, { "epoch": 2.24, "learning_rate": 1.2704127253047201e-05, "loss": 0.6092, "step": 158500 }, { "epoch": 2.24, "learning_rate": 1.2586474657630948e-05, "loss": 0.6055, "step": 159000 }, { "epoch": 2.25, "learning_rate": 1.2468822062214692e-05, "loss": 0.6102, "step": 159500 }, { "epoch": 2.26, "learning_rate": 1.2351169466798439e-05, "loss": 0.6194, "step": 160000 }, { "epoch": 2.27, "learning_rate": 1.2233516871382183e-05, "loss": 0.6108, "step": 160500 }, { "epoch": 2.27, "learning_rate": 1.2115864275965928e-05, "loss": 0.6091, "step": 161000 }, { "epoch": 2.28, "learning_rate": 1.1998211680549673e-05, "loss": 0.6019, "step": 161500 }, { "epoch": 2.29, "learning_rate": 1.1880559085133419e-05, "loss": 0.6176, "step": 162000 }, { "epoch": 2.29, "learning_rate": 1.1762906489717164e-05, "loss": 0.6149, "step": 162500 }, { "epoch": 2.3, "learning_rate": 1.164525389430091e-05, "loss": 0.5845, "step": 163000 }, { "epoch": 2.31, "learning_rate": 1.1527601298884653e-05, "loss": 0.6121, "step": 163500 }, { "epoch": 2.32, "learning_rate": 1.14099487034684e-05, "loss": 0.6092, "step": 164000 }, { "epoch": 2.32, "learning_rate": 1.1292296108052145e-05, "loss": 0.606, "step": 164500 }, { "epoch": 2.33, "learning_rate": 1.1174643512635889e-05, "loss": 0.6011, "step": 165000 }, { "epoch": 2.34, "learning_rate": 1.1056990917219634e-05, "loss": 0.6015, "step": 165500 }, { "epoch": 2.34, "learning_rate": 1.093933832180338e-05, "loss": 0.6132, "step": 166000 }, { "epoch": 2.35, "learning_rate": 1.0821685726387125e-05, "loss": 0.6025, "step": 166500 }, { "epoch": 2.36, "learning_rate": 1.070403313097087e-05, "loss": 0.6099, "step": 167000 }, { "epoch": 2.36, "learning_rate": 1.0586380535554614e-05, "loss": 0.6172, "step": 167500 }, { "epoch": 2.37, "learning_rate": 1.046872794013836e-05, "loss": 0.6132, "step": 168000 }, { "epoch": 2.38, "learning_rate": 1.0351075344722106e-05, "loss": 0.5979, "step": 168500 }, { "epoch": 2.39, "learning_rate": 1.023342274930585e-05, "loss": 0.6012, "step": 169000 }, { "epoch": 2.39, "learning_rate": 1.0115770153889595e-05, "loss": 0.5921, "step": 169500 }, { "epoch": 2.4, "learning_rate": 9.998117558473342e-06, "loss": 0.6039, "step": 170000 }, { "epoch": 2.41, "learning_rate": 9.880464963057086e-06, "loss": 0.6064, "step": 170500 }, { "epoch": 2.41, "learning_rate": 9.76281236764083e-06, "loss": 0.6084, "step": 171000 }, { "epoch": 2.42, "learning_rate": 9.645159772224576e-06, "loss": 0.5933, "step": 171500 }, { "epoch": 2.43, "learning_rate": 9.52750717680832e-06, "loss": 0.6008, "step": 172000 }, { "epoch": 2.44, "learning_rate": 9.409854581392067e-06, "loss": 0.6048, "step": 172500 }, { "epoch": 2.44, "learning_rate": 9.29220198597581e-06, "loss": 0.5933, "step": 173000 }, { "epoch": 2.45, "learning_rate": 9.174549390559556e-06, "loss": 0.5908, "step": 173500 }, { "epoch": 2.46, "learning_rate": 9.056896795143301e-06, "loss": 0.5921, "step": 174000 }, { "epoch": 2.46, "learning_rate": 8.939244199727046e-06, "loss": 0.6012, "step": 174500 }, { "epoch": 2.47, "learning_rate": 8.821591604310792e-06, "loss": 0.5988, "step": 175000 }, { "epoch": 2.48, "learning_rate": 8.703939008894537e-06, "loss": 0.6019, "step": 175500 }, { "epoch": 2.48, "learning_rate": 8.58628641347828e-06, "loss": 0.5915, "step": 176000 }, { "epoch": 2.49, "learning_rate": 8.468633818062028e-06, "loss": 0.6104, "step": 176500 }, { "epoch": 2.5, "learning_rate": 8.350981222645773e-06, "loss": 0.6093, "step": 177000 }, { "epoch": 2.51, "learning_rate": 8.233328627229516e-06, "loss": 0.6084, "step": 177500 }, { "epoch": 2.51, "learning_rate": 8.115676031813262e-06, "loss": 0.5989, "step": 178000 }, { "epoch": 2.52, "learning_rate": 7.998023436397007e-06, "loss": 0.6088, "step": 178500 }, { "epoch": 2.53, "learning_rate": 7.880370840980752e-06, "loss": 0.5889, "step": 179000 }, { "epoch": 2.53, "learning_rate": 7.762718245564498e-06, "loss": 0.5819, "step": 179500 }, { "epoch": 2.54, "learning_rate": 7.645065650148241e-06, "loss": 0.5901, "step": 180000 }, { "epoch": 2.55, "learning_rate": 7.527413054731988e-06, "loss": 0.58, "step": 180500 }, { "epoch": 2.56, "learning_rate": 7.409760459315733e-06, "loss": 0.5963, "step": 181000 }, { "epoch": 2.56, "learning_rate": 7.292107863899478e-06, "loss": 0.604, "step": 181500 }, { "epoch": 2.57, "learning_rate": 7.174455268483223e-06, "loss": 0.5984, "step": 182000 }, { "epoch": 2.58, "learning_rate": 7.056802673066969e-06, "loss": 0.591, "step": 182500 }, { "epoch": 2.58, "learning_rate": 6.939150077650713e-06, "loss": 0.5932, "step": 183000 }, { "epoch": 2.59, "learning_rate": 6.8214974822344585e-06, "loss": 0.5996, "step": 183500 }, { "epoch": 2.6, "learning_rate": 6.703844886818203e-06, "loss": 0.5987, "step": 184000 }, { "epoch": 2.6, "learning_rate": 6.586192291401949e-06, "loss": 0.5859, "step": 184500 }, { "epoch": 2.61, "learning_rate": 6.468539695985694e-06, "loss": 0.594, "step": 185000 }, { "epoch": 2.62, "learning_rate": 6.350887100569439e-06, "loss": 0.5836, "step": 185500 }, { "epoch": 2.63, "learning_rate": 6.233234505153184e-06, "loss": 0.5789, "step": 186000 }, { "epoch": 2.63, "learning_rate": 6.115581909736929e-06, "loss": 0.595, "step": 186500 }, { "epoch": 2.64, "learning_rate": 5.997929314320674e-06, "loss": 0.5817, "step": 187000 }, { "epoch": 2.65, "learning_rate": 5.880276718904419e-06, "loss": 0.5926, "step": 187500 }, { "epoch": 2.65, "learning_rate": 5.762624123488165e-06, "loss": 0.6029, "step": 188000 }, { "epoch": 2.66, "learning_rate": 5.644971528071909e-06, "loss": 0.5888, "step": 188500 }, { "epoch": 2.67, "learning_rate": 5.527318932655655e-06, "loss": 0.5843, "step": 189000 }, { "epoch": 2.68, "learning_rate": 5.4096663372394e-06, "loss": 0.5794, "step": 189500 }, { "epoch": 2.68, "learning_rate": 5.292013741823145e-06, "loss": 0.5829, "step": 190000 }, { "epoch": 2.69, "learning_rate": 5.1743611464068895e-06, "loss": 0.5903, "step": 190500 }, { "epoch": 2.7, "learning_rate": 5.056708550990636e-06, "loss": 0.5742, "step": 191000 }, { "epoch": 2.7, "learning_rate": 4.93905595557438e-06, "loss": 0.5948, "step": 191500 }, { "epoch": 2.71, "learning_rate": 4.821403360158125e-06, "loss": 0.5807, "step": 192000 }, { "epoch": 2.72, "learning_rate": 4.703750764741871e-06, "loss": 0.5824, "step": 192500 }, { "epoch": 2.72, "learning_rate": 4.586098169325616e-06, "loss": 0.5818, "step": 193000 }, { "epoch": 2.73, "learning_rate": 4.4684455739093605e-06, "loss": 0.5871, "step": 193500 }, { "epoch": 2.74, "learning_rate": 4.350792978493106e-06, "loss": 0.5814, "step": 194000 }, { "epoch": 2.75, "learning_rate": 4.233140383076851e-06, "loss": 0.5851, "step": 194500 }, { "epoch": 2.75, "learning_rate": 4.1154877876605964e-06, "loss": 0.5794, "step": 195000 }, { "epoch": 2.76, "learning_rate": 3.997835192244341e-06, "loss": 0.5914, "step": 195500 }, { "epoch": 2.77, "learning_rate": 3.880182596828086e-06, "loss": 0.5792, "step": 196000 }, { "epoch": 2.77, "learning_rate": 3.762530001411831e-06, "loss": 0.5941, "step": 196500 }, { "epoch": 2.78, "learning_rate": 3.644877405995577e-06, "loss": 0.5673, "step": 197000 }, { "epoch": 2.79, "learning_rate": 3.5272248105793217e-06, "loss": 0.5818, "step": 197500 }, { "epoch": 2.8, "learning_rate": 3.409572215163067e-06, "loss": 0.5828, "step": 198000 }, { "epoch": 2.8, "learning_rate": 3.291919619746812e-06, "loss": 0.5798, "step": 198500 }, { "epoch": 2.81, "learning_rate": 3.1742670243305572e-06, "loss": 0.5714, "step": 199000 }, { "epoch": 2.82, "learning_rate": 3.056614428914302e-06, "loss": 0.5866, "step": 199500 }, { "epoch": 2.82, "learning_rate": 2.938961833498047e-06, "loss": 0.578, "step": 200000 }, { "epoch": 2.83, "learning_rate": 2.8213092380817923e-06, "loss": 0.5924, "step": 200500 }, { "epoch": 2.84, "learning_rate": 2.703656642665537e-06, "loss": 0.5854, "step": 201000 }, { "epoch": 2.84, "learning_rate": 2.5860040472492825e-06, "loss": 0.5865, "step": 201500 }, { "epoch": 2.85, "learning_rate": 2.4683514518330274e-06, "loss": 0.5933, "step": 202000 }, { "epoch": 2.86, "learning_rate": 2.3506988564167727e-06, "loss": 0.5748, "step": 202500 }, { "epoch": 2.87, "learning_rate": 2.233046261000518e-06, "loss": 0.5913, "step": 203000 }, { "epoch": 2.87, "learning_rate": 2.115393665584263e-06, "loss": 0.5811, "step": 203500 }, { "epoch": 2.88, "learning_rate": 1.997741070168008e-06, "loss": 0.5835, "step": 204000 }, { "epoch": 2.89, "learning_rate": 1.880088474751753e-06, "loss": 0.5836, "step": 204500 }, { "epoch": 2.89, "learning_rate": 1.7624358793354984e-06, "loss": 0.5737, "step": 205000 }, { "epoch": 2.9, "learning_rate": 1.6447832839192435e-06, "loss": 0.5813, "step": 205500 }, { "epoch": 2.91, "learning_rate": 1.5271306885029886e-06, "loss": 0.5837, "step": 206000 }, { "epoch": 2.92, "learning_rate": 1.4094780930867337e-06, "loss": 0.5816, "step": 206500 }, { "epoch": 2.92, "learning_rate": 1.2918254976704786e-06, "loss": 0.5871, "step": 207000 }, { "epoch": 2.93, "learning_rate": 1.1741729022542237e-06, "loss": 0.5657, "step": 207500 }, { "epoch": 2.94, "learning_rate": 1.0565203068379688e-06, "loss": 0.584, "step": 208000 }, { "epoch": 2.94, "learning_rate": 9.38867711421714e-07, "loss": 0.5672, "step": 208500 }, { "epoch": 2.95, "learning_rate": 8.212151160054591e-07, "loss": 0.5814, "step": 209000 }, { "epoch": 2.96, "learning_rate": 7.035625205892043e-07, "loss": 0.579, "step": 209500 }, { "epoch": 2.96, "learning_rate": 5.859099251729494e-07, "loss": 0.5969, "step": 210000 }, { "epoch": 2.97, "learning_rate": 4.6825732975669443e-07, "loss": 0.5796, "step": 210500 }, { "epoch": 2.98, "learning_rate": 3.506047343404396e-07, "loss": 0.5699, "step": 211000 }, { "epoch": 2.99, "learning_rate": 2.3295213892418468e-07, "loss": 0.5749, "step": 211500 }, { "epoch": 2.99, "learning_rate": 1.152995435079298e-07, "loss": 0.5678, "step": 212000 }, { "epoch": 3.0, "step": 212490, "total_flos": 4.485693256017408e+17, "train_loss": 0.7508337134063662, "train_runtime": 150510.9458, "train_samples_per_second": 11.294, "train_steps_per_second": 1.412 } ], "logging_steps": 500, "max_steps": 212490, "num_train_epochs": 3, "save_steps": 100000, "total_flos": 4.485693256017408e+17, "trial_name": null, "trial_params": null }