{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.796905222437138, "eval_steps": 200, "global_step": 6200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": "7.6923e-07", "loss": 0.7841, "slid_loss": 0.7841, "step": 1, "time": 108.93 }, { "epoch": 0.0, "learning_rate": "1.5385e-06", "loss": 0.8131, "slid_loss": 0.7986, "step": 2, "time": 77.59 }, { "epoch": 0.0, "learning_rate": "2.3077e-06", "loss": 0.7777, "slid_loss": 0.7916, "step": 3, "time": 76.68 }, { "epoch": 0.0, "learning_rate": "3.0769e-06", "loss": 0.7826, "slid_loss": 0.7894, "step": 4, "time": 73.4 }, { "epoch": 0.0, "learning_rate": "3.8462e-06", "loss": 0.7987, "slid_loss": 0.7912, "step": 5, "time": 71.53 }, { "epoch": 0.0, "learning_rate": "4.6154e-06", "loss": 0.7985, "slid_loss": 0.7924, "step": 6, "time": 72.52 }, { "epoch": 0.01, "learning_rate": "5.3846e-06", "loss": 0.7822, "slid_loss": 0.791, "step": 7, "time": 72.19 }, { "epoch": 0.01, "learning_rate": "6.1538e-06", "loss": 0.8001, "slid_loss": 0.7921, "step": 8, "time": 74.39 }, { "epoch": 0.01, "learning_rate": "6.9231e-06", "loss": 0.7945, "slid_loss": 0.7924, "step": 9, "time": 73.98 }, { "epoch": 0.01, "learning_rate": "7.6923e-06", "loss": 0.7119, "slid_loss": 0.7843, "step": 10, "time": 72.85 }, { "epoch": 0.01, "learning_rate": "8.4615e-06", "loss": 0.7862, "slid_loss": 0.7845, "step": 11, "time": 72.31 }, { "epoch": 0.01, "learning_rate": "9.2308e-06", "loss": 0.7474, "slid_loss": 0.7814, "step": 12, "time": 71.6 }, { "epoch": 0.01, "learning_rate": "1.0000e-05", "loss": 0.7358, "slid_loss": 0.7779, "step": 13, "time": 71.81 }, { "epoch": 0.01, "learning_rate": "1.0769e-05", "loss": 0.7687, "slid_loss": 0.7772, "step": 14, "time": 71.71 }, { "epoch": 0.01, "learning_rate": "1.1538e-05", "loss": 0.7255, "slid_loss": 0.7738, "step": 15, "time": 70.81 }, { "epoch": 0.01, "learning_rate": "1.2308e-05", "loss": 0.7561, "slid_loss": 0.7727, "step": 16, "time": 71.95 }, { "epoch": 0.01, "learning_rate": "1.3077e-05", "loss": 0.7209, "slid_loss": 0.7696, "step": 17, "time": 72.49 }, { "epoch": 0.01, "learning_rate": "1.3846e-05", "loss": 0.738, "slid_loss": 0.7679, "step": 18, "time": 71.59 }, { "epoch": 0.01, "learning_rate": "1.4615e-05", "loss": 0.7865, "slid_loss": 0.7689, "step": 19, "time": 71.55 }, { "epoch": 0.02, "learning_rate": "1.5385e-05", "loss": 0.7757, "slid_loss": 0.7692, "step": 20, "time": 72.26 }, { "epoch": 0.02, "learning_rate": "1.6154e-05", "loss": 0.7554, "slid_loss": 0.7686, "step": 21, "time": 72.31 }, { "epoch": 0.02, "learning_rate": "1.6923e-05", "loss": 0.7639, "slid_loss": 0.7683, "step": 22, "time": 71.61 }, { "epoch": 0.02, "learning_rate": "1.7692e-05", "loss": 0.7626, "slid_loss": 0.7681, "step": 23, "time": 71.07 }, { "epoch": 0.02, "learning_rate": "1.8462e-05", "loss": 0.6992, "slid_loss": 0.7652, "step": 24, "time": 72.52 }, { "epoch": 0.02, "learning_rate": "1.9231e-05", "loss": 0.7118, "slid_loss": 0.7631, "step": 25, "time": 72.07 }, { "epoch": 0.02, "learning_rate": "2.0000e-05", "loss": 0.7478, "slid_loss": 0.7625, "step": 26, "time": 71.49 }, { "epoch": 0.02, "learning_rate": "2.0769e-05", "loss": 0.7408, "slid_loss": 0.7617, "step": 27, "time": 72.52 }, { "epoch": 0.02, "learning_rate": "2.1538e-05", "loss": 0.7141, "slid_loss": 0.76, "step": 28, "time": 73.44 }, { "epoch": 0.02, "learning_rate": "2.2308e-05", "loss": 0.7183, "slid_loss": 0.7586, "step": 29, "time": 73.16 }, { "epoch": 0.02, "learning_rate": "2.3077e-05", "loss": 0.7388, "slid_loss": 0.7579, "step": 30, "time": 72.58 }, { "epoch": 0.02, "learning_rate": "2.3846e-05", "loss": 0.7215, "slid_loss": 0.7567, "step": 31, "time": 73.16 }, { "epoch": 0.02, "learning_rate": "2.4615e-05", "loss": 0.6927, "slid_loss": 0.7547, "step": 32, "time": 72.52 }, { "epoch": 0.03, "learning_rate": "2.5385e-05", "loss": 0.6983, "slid_loss": 0.753, "step": 33, "time": 72.24 }, { "epoch": 0.03, "learning_rate": "2.6154e-05", "loss": 0.6971, "slid_loss": 0.7514, "step": 34, "time": 73.28 }, { "epoch": 0.03, "learning_rate": "2.6923e-05", "loss": 0.7034, "slid_loss": 0.75, "step": 35, "time": 70.94 }, { "epoch": 0.03, "learning_rate": "2.7692e-05", "loss": 0.6918, "slid_loss": 0.7484, "step": 36, "time": 71.17 }, { "epoch": 0.03, "learning_rate": "2.8462e-05", "loss": 0.6982, "slid_loss": 0.747, "step": 37, "time": 71.72 }, { "epoch": 0.03, "learning_rate": "2.9231e-05", "loss": 0.7093, "slid_loss": 0.746, "step": 38, "time": 72.42 }, { "epoch": 0.03, "learning_rate": "3.0000e-05", "loss": 0.7104, "slid_loss": 0.7451, "step": 39, "time": 71.27 }, { "epoch": 0.03, "learning_rate": "3.0769e-05", "loss": 0.7086, "slid_loss": 0.7442, "step": 40, "time": 71.91 }, { "epoch": 0.03, "learning_rate": "3.1538e-05", "loss": 0.6981, "slid_loss": 0.7431, "step": 41, "time": 71.47 }, { "epoch": 0.03, "learning_rate": "3.2308e-05", "loss": 0.6619, "slid_loss": 0.7411, "step": 42, "time": 72.01 }, { "epoch": 0.03, "learning_rate": "3.3077e-05", "loss": 0.6968, "slid_loss": 0.7401, "step": 43, "time": 73.44 }, { "epoch": 0.03, "learning_rate": "3.3846e-05", "loss": 0.6541, "slid_loss": 0.7382, "step": 44, "time": 73.43 }, { "epoch": 0.03, "learning_rate": "3.4615e-05", "loss": 0.6637, "slid_loss": 0.7365, "step": 45, "time": 73.0 }, { "epoch": 0.04, "learning_rate": "3.5385e-05", "loss": 0.7043, "slid_loss": 0.7358, "step": 46, "time": 72.2 }, { "epoch": 0.04, "learning_rate": "3.6154e-05", "loss": 0.6472, "slid_loss": 0.7339, "step": 47, "time": 71.55 }, { "epoch": 0.04, "learning_rate": "3.6923e-05", "loss": 0.7024, "slid_loss": 0.7333, "step": 48, "time": 71.28 }, { "epoch": 0.04, "learning_rate": "3.7692e-05", "loss": 0.6799, "slid_loss": 0.7322, "step": 49, "time": 70.89 }, { "epoch": 0.04, "learning_rate": "3.8462e-05", "loss": 0.7049, "slid_loss": 0.7316, "step": 50, "time": 72.45 }, { "epoch": 0.04, "learning_rate": "3.9231e-05", "loss": 0.7001, "slid_loss": 0.731, "step": 51, "time": 72.07 }, { "epoch": 0.04, "learning_rate": "4.0000e-05", "loss": 0.6934, "slid_loss": 0.7303, "step": 52, "time": 71.54 }, { "epoch": 0.04, "learning_rate": "4.0769e-05", "loss": 0.6624, "slid_loss": 0.729, "step": 53, "time": 73.08 }, { "epoch": 0.04, "learning_rate": "4.1538e-05", "loss": 0.6621, "slid_loss": 0.7278, "step": 54, "time": 73.57 }, { "epoch": 0.04, "learning_rate": "4.2308e-05", "loss": 0.6901, "slid_loss": 0.7271, "step": 55, "time": 71.96 }, { "epoch": 0.04, "learning_rate": "4.3077e-05", "loss": 0.6935, "slid_loss": 0.7265, "step": 56, "time": 72.44 }, { "epoch": 0.04, "learning_rate": "4.3846e-05", "loss": 0.6394, "slid_loss": 0.725, "step": 57, "time": 71.58 }, { "epoch": 0.04, "learning_rate": "4.4615e-05", "loss": 0.7069, "slid_loss": 0.7246, "step": 58, "time": 71.85 }, { "epoch": 0.05, "learning_rate": "4.5385e-05", "loss": 0.715, "slid_loss": 0.7245, "step": 59, "time": 72.33 }, { "epoch": 0.05, "learning_rate": "4.6154e-05", "loss": 0.7086, "slid_loss": 0.7242, "step": 60, "time": 71.28 }, { "epoch": 0.05, "learning_rate": "4.6923e-05", "loss": 0.6842, "slid_loss": 0.7236, "step": 61, "time": 72.17 }, { "epoch": 0.05, "learning_rate": "4.7692e-05", "loss": 0.6778, "slid_loss": 0.7228, "step": 62, "time": 71.51 }, { "epoch": 0.05, "learning_rate": "4.8462e-05", "loss": 0.6516, "slid_loss": 0.7217, "step": 63, "time": 71.79 }, { "epoch": 0.05, "learning_rate": "4.9231e-05", "loss": 0.6387, "slid_loss": 0.7204, "step": 64, "time": 74.12 }, { "epoch": 0.05, "learning_rate": "5.0000e-05", "loss": 0.712, "slid_loss": 0.7203, "step": 65, "time": 72.34 }, { "epoch": 0.05, "learning_rate": "5.0000e-05", "loss": 0.6804, "slid_loss": 0.7197, "step": 66, "time": 72.75 }, { "epoch": 0.05, "learning_rate": "5.0000e-05", "loss": 0.6647, "slid_loss": 0.7188, "step": 67, "time": 71.08 }, { "epoch": 0.05, "learning_rate": "5.0000e-05", "loss": 0.6684, "slid_loss": 0.7181, "step": 68, "time": 71.5 }, { "epoch": 0.05, "learning_rate": "5.0000e-05", "loss": 0.6692, "slid_loss": 0.7174, "step": 69, "time": 71.51 }, { "epoch": 0.05, "learning_rate": "5.0000e-05", "loss": 0.678, "slid_loss": 0.7168, "step": 70, "time": 72.35 }, { "epoch": 0.05, "learning_rate": "5.0000e-05", "loss": 0.687, "slid_loss": 0.7164, "step": 71, "time": 72.2 }, { "epoch": 0.06, "learning_rate": "5.0000e-05", "loss": 0.7304, "slid_loss": 0.7166, "step": 72, "time": 70.81 }, { "epoch": 0.06, "learning_rate": "5.0000e-05", "loss": 0.7034, "slid_loss": 0.7164, "step": 73, "time": 71.57 }, { "epoch": 0.06, "learning_rate": "5.0000e-05", "loss": 0.7148, "slid_loss": 0.7164, "step": 74, "time": 73.09 }, { "epoch": 0.06, "learning_rate": "5.0000e-05", "loss": 0.6618, "slid_loss": 0.7157, "step": 75, "time": 71.78 }, { "epoch": 0.06, "learning_rate": "5.0000e-05", "loss": 0.7047, "slid_loss": 0.7155, "step": 76, "time": 71.46 }, { "epoch": 0.06, "learning_rate": "5.0000e-05", "loss": 0.7039, "slid_loss": 0.7154, "step": 77, "time": 72.02 }, { "epoch": 0.06, "learning_rate": "5.0000e-05", "loss": 0.6776, "slid_loss": 0.7149, "step": 78, "time": 72.37 }, { "epoch": 0.06, "learning_rate": "5.0000e-05", "loss": 0.6354, "slid_loss": 0.7139, "step": 79, "time": 71.57 }, { "epoch": 0.06, "learning_rate": "4.9999e-05", "loss": 0.6936, "slid_loss": 0.7136, "step": 80, "time": 71.58 }, { "epoch": 0.06, "learning_rate": "4.9999e-05", "loss": 0.6395, "slid_loss": 0.7127, "step": 81, "time": 71.97 }, { "epoch": 0.06, "learning_rate": "4.9999e-05", "loss": 0.6787, "slid_loss": 0.7123, "step": 82, "time": 71.99 }, { "epoch": 0.06, "learning_rate": "4.9999e-05", "loss": 0.7436, "slid_loss": 0.7127, "step": 83, "time": 71.84 }, { "epoch": 0.06, "learning_rate": "4.9999e-05", "loss": 0.7111, "slid_loss": 0.7127, "step": 84, "time": 73.51 }, { "epoch": 0.07, "learning_rate": "4.9999e-05", "loss": 0.6959, "slid_loss": 0.7125, "step": 85, "time": 72.86 }, { "epoch": 0.07, "learning_rate": "4.9999e-05", "loss": 0.6783, "slid_loss": 0.7121, "step": 86, "time": 73.51 }, { "epoch": 0.07, "learning_rate": "4.9999e-05", "loss": 0.6807, "slid_loss": 0.7117, "step": 87, "time": 71.98 }, { "epoch": 0.07, "learning_rate": "4.9999e-05", "loss": 0.6577, "slid_loss": 0.7111, "step": 88, "time": 71.47 }, { "epoch": 0.07, "learning_rate": "4.9999e-05", "loss": 0.6497, "slid_loss": 0.7104, "step": 89, "time": 72.71 }, { "epoch": 0.07, "learning_rate": "4.9998e-05", "loss": 0.6767, "slid_loss": 0.71, "step": 90, "time": 71.45 }, { "epoch": 0.07, "learning_rate": "4.9998e-05", "loss": 0.6573, "slid_loss": 0.7094, "step": 91, "time": 72.46 }, { "epoch": 0.07, "learning_rate": "4.9998e-05", "loss": 0.6612, "slid_loss": 0.7089, "step": 92, "time": 71.54 }, { "epoch": 0.07, "learning_rate": "4.9998e-05", "loss": 0.6541, "slid_loss": 0.7083, "step": 93, "time": 71.41 }, { "epoch": 0.07, "learning_rate": "4.9998e-05", "loss": 0.6713, "slid_loss": 0.7079, "step": 94, "time": 73.16 }, { "epoch": 0.07, "learning_rate": "4.9998e-05", "loss": 0.6527, "slid_loss": 0.7074, "step": 95, "time": 71.18 }, { "epoch": 0.07, "learning_rate": "4.9998e-05", "loss": 0.6661, "slid_loss": 0.7069, "step": 96, "time": 72.71 }, { "epoch": 0.08, "learning_rate": "4.9998e-05", "loss": 0.6599, "slid_loss": 0.7064, "step": 97, "time": 73.16 }, { "epoch": 0.08, "learning_rate": "4.9997e-05", "loss": 0.6534, "slid_loss": 0.7059, "step": 98, "time": 72.92 }, { "epoch": 0.08, "learning_rate": "4.9997e-05", "loss": 0.6543, "slid_loss": 0.7054, "step": 99, "time": 73.05 }, { "epoch": 0.08, "learning_rate": "4.9997e-05", "loss": 0.6546, "slid_loss": 0.7049, "step": 100, "time": 72.41 }, { "epoch": 0.08, "learning_rate": "4.9997e-05", "loss": 0.6723, "slid_loss": 0.7038, "step": 101, "time": 71.37 }, { "epoch": 0.08, "learning_rate": "4.9997e-05", "loss": 0.7297, "slid_loss": 0.7029, "step": 102, "time": 71.17 }, { "epoch": 0.08, "learning_rate": "4.9997e-05", "loss": 0.6734, "slid_loss": 0.7019, "step": 103, "time": 71.13 }, { "epoch": 0.08, "learning_rate": "4.9996e-05", "loss": 0.6733, "slid_loss": 0.7008, "step": 104, "time": 71.79 }, { "epoch": 0.08, "learning_rate": "4.9996e-05", "loss": 0.696, "slid_loss": 0.6998, "step": 105, "time": 72.86 }, { "epoch": 0.08, "learning_rate": "4.9996e-05", "loss": 0.6511, "slid_loss": 0.6983, "step": 106, "time": 71.67 }, { "epoch": 0.08, "learning_rate": "4.9996e-05", "loss": 0.6689, "slid_loss": 0.6972, "step": 107, "time": 72.96 }, { "epoch": 0.08, "learning_rate": "4.9996e-05", "loss": 0.6608, "slid_loss": 0.6958, "step": 108, "time": 72.13 }, { "epoch": 0.08, "learning_rate": "4.9995e-05", "loss": 0.6825, "slid_loss": 0.6946, "step": 109, "time": 71.51 }, { "epoch": 0.09, "learning_rate": "4.9995e-05", "loss": 0.6931, "slid_loss": 0.6944, "step": 110, "time": 72.05 }, { "epoch": 0.09, "learning_rate": "4.9995e-05", "loss": 0.6531, "slid_loss": 0.6931, "step": 111, "time": 72.0 }, { "epoch": 0.09, "learning_rate": "4.9995e-05", "loss": 0.6947, "slid_loss": 0.6926, "step": 112, "time": 72.53 }, { "epoch": 0.09, "learning_rate": "4.9994e-05", "loss": 0.7007, "slid_loss": 0.6922, "step": 113, "time": 72.37 }, { "epoch": 0.09, "learning_rate": "4.9994e-05", "loss": 0.654, "slid_loss": 0.6911, "step": 114, "time": 70.74 }, { "epoch": 0.09, "learning_rate": "4.9994e-05", "loss": 0.6841, "slid_loss": 0.6907, "step": 115, "time": 71.37 }, { "epoch": 0.09, "learning_rate": "4.9994e-05", "loss": 0.6849, "slid_loss": 0.69, "step": 116, "time": 71.82 }, { "epoch": 0.09, "learning_rate": "4.9993e-05", "loss": 0.6588, "slid_loss": 0.6893, "step": 117, "time": 71.94 }, { "epoch": 0.09, "learning_rate": "4.9993e-05", "loss": 0.7154, "slid_loss": 0.6891, "step": 118, "time": 72.46 }, { "epoch": 0.09, "learning_rate": "4.9993e-05", "loss": 0.6487, "slid_loss": 0.6877, "step": 119, "time": 72.24 }, { "epoch": 0.09, "learning_rate": "4.9993e-05", "loss": 0.6697, "slid_loss": 0.6867, "step": 120, "time": 72.58 }, { "epoch": 0.09, "learning_rate": "4.9992e-05", "loss": 0.6296, "slid_loss": 0.6854, "step": 121, "time": 72.28 }, { "epoch": 0.09, "learning_rate": "4.9992e-05", "loss": 0.6896, "slid_loss": 0.6847, "step": 122, "time": 72.73 }, { "epoch": 0.1, "learning_rate": "4.9992e-05", "loss": 0.6758, "slid_loss": 0.6838, "step": 123, "time": 71.41 }, { "epoch": 0.1, "learning_rate": "4.9992e-05", "loss": 0.6826, "slid_loss": 0.6836, "step": 124, "time": 383.45 }, { "epoch": 0.1, "learning_rate": "4.9991e-05", "loss": 0.6924, "slid_loss": 0.6835, "step": 125, "time": 72.2 }, { "epoch": 0.1, "learning_rate": "4.9991e-05", "loss": 0.7072, "slid_loss": 0.683, "step": 126, "time": 72.65 }, { "epoch": 0.1, "learning_rate": "4.9991e-05", "loss": 0.6797, "slid_loss": 0.6824, "step": 127, "time": 72.92 }, { "epoch": 0.1, "learning_rate": "4.9990e-05", "loss": 0.7312, "slid_loss": 0.6826, "step": 128, "time": 71.88 }, { "epoch": 0.1, "learning_rate": "4.9990e-05", "loss": 0.6636, "slid_loss": 0.6821, "step": 129, "time": 73.55 }, { "epoch": 0.1, "learning_rate": "4.9990e-05", "loss": 0.6878, "slid_loss": 0.6816, "step": 130, "time": 70.98 }, { "epoch": 0.1, "learning_rate": "4.9989e-05", "loss": 0.6858, "slid_loss": 0.6812, "step": 131, "time": 72.32 }, { "epoch": 0.1, "learning_rate": "4.9989e-05", "loss": 0.6644, "slid_loss": 0.6809, "step": 132, "time": 71.92 }, { "epoch": 0.1, "learning_rate": "4.9989e-05", "loss": 0.6758, "slid_loss": 0.6807, "step": 133, "time": 72.24 }, { "epoch": 0.1, "learning_rate": "4.9989e-05", "loss": 0.6877, "slid_loss": 0.6806, "step": 134, "time": 119.04 }, { "epoch": 0.1, "learning_rate": "4.9988e-05", "loss": 0.6734, "slid_loss": 0.6803, "step": 135, "time": 71.11 }, { "epoch": 0.11, "learning_rate": "4.9988e-05", "loss": 0.7042, "slid_loss": 0.6804, "step": 136, "time": 71.61 }, { "epoch": 0.11, "learning_rate": "4.9987e-05", "loss": 0.68, "slid_loss": 0.6802, "step": 137, "time": 71.96 }, { "epoch": 0.11, "learning_rate": "4.9987e-05", "loss": 0.7157, "slid_loss": 0.6803, "step": 138, "time": 71.77 }, { "epoch": 0.11, "learning_rate": "4.9987e-05", "loss": 0.6759, "slid_loss": 0.68, "step": 139, "time": 73.01 }, { "epoch": 0.11, "learning_rate": "4.9986e-05", "loss": 0.7322, "slid_loss": 0.6802, "step": 140, "time": 72.89 }, { "epoch": 0.11, "learning_rate": "4.9986e-05", "loss": 0.6707, "slid_loss": 0.6799, "step": 141, "time": 119.41 }, { "epoch": 0.11, "learning_rate": "4.9986e-05", "loss": 0.6845, "slid_loss": 0.6801, "step": 142, "time": 1424.28 }, { "epoch": 0.11, "learning_rate": "4.9985e-05", "loss": 0.6824, "slid_loss": 0.68, "step": 143, "time": 77.55 }, { "epoch": 0.11, "learning_rate": "4.9985e-05", "loss": 0.7347, "slid_loss": 0.6808, "step": 144, "time": 177.86 }, { "epoch": 0.11, "learning_rate": "4.9985e-05", "loss": 0.6985, "slid_loss": 0.6812, "step": 145, "time": 71.58 }, { "epoch": 0.11, "learning_rate": "4.9984e-05", "loss": 0.6784, "slid_loss": 0.6809, "step": 146, "time": 71.1 }, { "epoch": 0.11, "learning_rate": "4.9984e-05", "loss": 0.677, "slid_loss": 0.6812, "step": 147, "time": 72.28 }, { "epoch": 0.11, "learning_rate": "4.9983e-05", "loss": 0.6982, "slid_loss": 0.6811, "step": 148, "time": 184.42 }, { "epoch": 0.12, "learning_rate": "4.9983e-05", "loss": 0.6965, "slid_loss": 0.6813, "step": 149, "time": 120.48 }, { "epoch": 0.12, "learning_rate": "4.9983e-05", "loss": 0.6779, "slid_loss": 0.681, "step": 150, "time": 70.39 }, { "epoch": 0.12, "learning_rate": "4.9982e-05", "loss": 0.7089, "slid_loss": 0.6811, "step": 151, "time": 72.85 }, { "epoch": 0.12, "learning_rate": "4.9982e-05", "loss": 0.7195, "slid_loss": 0.6814, "step": 152, "time": 72.85 }, { "epoch": 0.12, "learning_rate": "4.9981e-05", "loss": 0.7186, "slid_loss": 0.682, "step": 153, "time": 72.61 }, { "epoch": 0.12, "learning_rate": "4.9981e-05", "loss": 0.6949, "slid_loss": 0.6823, "step": 154, "time": 72.51 }, { "epoch": 0.12, "learning_rate": "4.9980e-05", "loss": 0.6888, "slid_loss": 0.6823, "step": 155, "time": 73.05 }, { "epoch": 0.12, "learning_rate": "4.9980e-05", "loss": 0.6877, "slid_loss": 0.6822, "step": 156, "time": 114.32 }, { "epoch": 0.12, "learning_rate": "4.9980e-05", "loss": 0.7019, "slid_loss": 0.6828, "step": 157, "time": 179.94 }, { "epoch": 0.12, "learning_rate": "4.9979e-05", "loss": 0.725, "slid_loss": 0.683, "step": 158, "time": 204.75 }, { "epoch": 0.12, "learning_rate": "4.9979e-05", "loss": 0.7208, "slid_loss": 0.6831, "step": 159, "time": 212.92 }, { "epoch": 0.12, "learning_rate": "4.9978e-05", "loss": 0.6908, "slid_loss": 0.6829, "step": 160, "time": 112.89 }, { "epoch": 0.12, "learning_rate": "4.9978e-05", "loss": 0.6809, "slid_loss": 0.6829, "step": 161, "time": 73.27 }, { "epoch": 0.13, "learning_rate": "4.9977e-05", "loss": 0.6812, "slid_loss": 0.6829, "step": 162, "time": 70.77 }, { "epoch": 0.13, "learning_rate": "4.9977e-05", "loss": 0.667, "slid_loss": 0.6831, "step": 163, "time": 72.35 }, { "epoch": 0.13, "learning_rate": "4.9976e-05", "loss": 0.6758, "slid_loss": 0.6834, "step": 164, "time": 70.72 }, { "epoch": 0.13, "learning_rate": "4.9976e-05", "loss": 0.7063, "slid_loss": 0.6834, "step": 165, "time": 71.85 }, { "epoch": 0.13, "learning_rate": "4.9975e-05", "loss": 0.6666, "slid_loss": 0.6832, "step": 166, "time": 71.78 }, { "epoch": 0.13, "learning_rate": "4.9975e-05", "loss": 0.6398, "slid_loss": 0.683, "step": 167, "time": 71.93 }, { "epoch": 0.13, "learning_rate": "4.9974e-05", "loss": 0.6575, "slid_loss": 0.6829, "step": 168, "time": 71.28 }, { "epoch": 0.13, "learning_rate": "4.9974e-05", "loss": 0.691, "slid_loss": 0.6831, "step": 169, "time": 72.42 }, { "epoch": 0.13, "learning_rate": "4.9973e-05", "loss": 0.6562, "slid_loss": 0.6829, "step": 170, "time": 71.78 }, { "epoch": 0.13, "learning_rate": "4.9973e-05", "loss": 0.6568, "slid_loss": 0.6826, "step": 171, "time": 72.12 }, { "epoch": 0.13, "learning_rate": "4.9972e-05", "loss": 0.7019, "slid_loss": 0.6823, "step": 172, "time": 71.49 }, { "epoch": 0.13, "learning_rate": "4.9972e-05", "loss": 0.6809, "slid_loss": 0.6821, "step": 173, "time": 73.84 }, { "epoch": 0.13, "learning_rate": "4.9971e-05", "loss": 0.6885, "slid_loss": 0.6818, "step": 174, "time": 72.26 }, { "epoch": 0.14, "learning_rate": "4.9971e-05", "loss": 0.7787, "slid_loss": 0.683, "step": 175, "time": 72.12 }, { "epoch": 0.14, "learning_rate": "4.9970e-05", "loss": 0.689, "slid_loss": 0.6828, "step": 176, "time": 71.75 }, { "epoch": 0.14, "learning_rate": "4.9970e-05", "loss": 0.69, "slid_loss": 0.6827, "step": 177, "time": 72.71 }, { "epoch": 0.14, "learning_rate": "4.9969e-05", "loss": 0.6444, "slid_loss": 0.6823, "step": 178, "time": 72.1 }, { "epoch": 0.14, "learning_rate": "4.9969e-05", "loss": 0.6897, "slid_loss": 0.6829, "step": 179, "time": 72.16 }, { "epoch": 0.14, "learning_rate": "4.9968e-05", "loss": 0.6717, "slid_loss": 0.6827, "step": 180, "time": 71.21 }, { "epoch": 0.14, "learning_rate": "4.9968e-05", "loss": 0.6855, "slid_loss": 0.6831, "step": 181, "time": 70.96 }, { "epoch": 0.14, "learning_rate": "4.9967e-05", "loss": 0.7076, "slid_loss": 0.6834, "step": 182, "time": 71.11 }, { "epoch": 0.14, "learning_rate": "4.9966e-05", "loss": 0.6366, "slid_loss": 0.6823, "step": 183, "time": 72.54 }, { "epoch": 0.14, "learning_rate": "4.9966e-05", "loss": 0.6622, "slid_loss": 0.6818, "step": 184, "time": 72.36 }, { "epoch": 0.14, "learning_rate": "4.9965e-05", "loss": 0.6423, "slid_loss": 0.6813, "step": 185, "time": 71.53 }, { "epoch": 0.14, "learning_rate": "4.9965e-05", "loss": 0.6792, "slid_loss": 0.6813, "step": 186, "time": 72.12 }, { "epoch": 0.14, "learning_rate": "4.9964e-05", "loss": 0.7105, "slid_loss": 0.6816, "step": 187, "time": 71.02 }, { "epoch": 0.15, "learning_rate": "4.9963e-05", "loss": 0.6824, "slid_loss": 0.6819, "step": 188, "time": 71.76 }, { "epoch": 0.15, "learning_rate": "4.9963e-05", "loss": 0.7083, "slid_loss": 0.6825, "step": 189, "time": 72.73 }, { "epoch": 0.15, "learning_rate": "4.9962e-05", "loss": 0.6875, "slid_loss": 0.6826, "step": 190, "time": 71.87 }, { "epoch": 0.15, "learning_rate": "4.9962e-05", "loss": 0.6711, "slid_loss": 0.6827, "step": 191, "time": 71.46 }, { "epoch": 0.15, "learning_rate": "4.9961e-05", "loss": 0.6755, "slid_loss": 0.6828, "step": 192, "time": 72.96 }, { "epoch": 0.15, "learning_rate": "4.9960e-05", "loss": 0.6727, "slid_loss": 0.683, "step": 193, "time": 72.48 }, { "epoch": 0.15, "learning_rate": "4.9960e-05", "loss": 0.6584, "slid_loss": 0.6829, "step": 194, "time": 71.76 }, { "epoch": 0.15, "learning_rate": "4.9959e-05", "loss": 0.6619, "slid_loss": 0.683, "step": 195, "time": 71.51 }, { "epoch": 0.15, "learning_rate": "4.9959e-05", "loss": 0.6587, "slid_loss": 0.6829, "step": 196, "time": 72.84 }, { "epoch": 0.15, "learning_rate": "4.9958e-05", "loss": 0.6766, "slid_loss": 0.6831, "step": 197, "time": 72.59 }, { "epoch": 0.15, "learning_rate": "4.9957e-05", "loss": 0.6763, "slid_loss": 0.6833, "step": 198, "time": 70.88 }, { "epoch": 0.15, "learning_rate": "4.9957e-05", "loss": 0.6335, "slid_loss": 0.6831, "step": 199, "time": 71.95 }, { "epoch": 0.15, "learning_rate": "4.9956e-05", "loss": 0.6793, "slid_loss": 0.6834, "step": 200, "time": 71.66 }, { "epoch": 0.16, "learning_rate": "4.9955e-05", "loss": 0.6732, "slid_loss": 0.6834, "step": 201, "time": 859.09 }, { "epoch": 0.16, "learning_rate": "4.9955e-05", "loss": 0.6389, "slid_loss": 0.6825, "step": 202, "time": 71.93 }, { "epoch": 0.16, "learning_rate": "4.9954e-05", "loss": 0.7189, "slid_loss": 0.6829, "step": 203, "time": 71.4 }, { "epoch": 0.16, "learning_rate": "4.9953e-05", "loss": 0.6941, "slid_loss": 0.6831, "step": 204, "time": 72.58 }, { "epoch": 0.16, "learning_rate": "4.9953e-05", "loss": 0.6699, "slid_loss": 0.6829, "step": 205, "time": 72.64 }, { "epoch": 0.16, "learning_rate": "4.9952e-05", "loss": 0.7295, "slid_loss": 0.6836, "step": 206, "time": 71.86 }, { "epoch": 0.16, "learning_rate": "4.9951e-05", "loss": 0.6794, "slid_loss": 0.6837, "step": 207, "time": 70.2 }, { "epoch": 0.16, "learning_rate": "4.9951e-05", "loss": 0.7234, "slid_loss": 0.6844, "step": 208, "time": 72.08 }, { "epoch": 0.16, "learning_rate": "4.9950e-05", "loss": 0.6701, "slid_loss": 0.6842, "step": 209, "time": 72.14 }, { "epoch": 0.16, "learning_rate": "4.9949e-05", "loss": 0.7953, "slid_loss": 0.6853, "step": 210, "time": 73.17 }, { "epoch": 0.16, "learning_rate": "4.9949e-05", "loss": 0.6506, "slid_loss": 0.6852, "step": 211, "time": 74.2 }, { "epoch": 0.16, "learning_rate": "4.9948e-05", "loss": 0.6877, "slid_loss": 0.6852, "step": 212, "time": 72.05 }, { "epoch": 0.16, "learning_rate": "4.9947e-05", "loss": 0.678, "slid_loss": 0.6849, "step": 213, "time": 71.4 }, { "epoch": 0.17, "learning_rate": "4.9946e-05", "loss": 0.678, "slid_loss": 0.6852, "step": 214, "time": 73.07 }, { "epoch": 0.17, "learning_rate": "4.9946e-05", "loss": 0.6928, "slid_loss": 0.6853, "step": 215, "time": 72.81 }, { "epoch": 0.17, "learning_rate": "4.9945e-05", "loss": 0.6999, "slid_loss": 0.6854, "step": 216, "time": 71.72 }, { "epoch": 0.17, "learning_rate": "4.9944e-05", "loss": 0.7055, "slid_loss": 0.6859, "step": 217, "time": 72.67 }, { "epoch": 0.17, "learning_rate": "4.9944e-05", "loss": 0.6718, "slid_loss": 0.6855, "step": 218, "time": 71.33 }, { "epoch": 0.17, "learning_rate": "4.9943e-05", "loss": 0.6485, "slid_loss": 0.6855, "step": 219, "time": 72.75 }, { "epoch": 0.17, "learning_rate": "4.9942e-05", "loss": 0.687, "slid_loss": 0.6856, "step": 220, "time": 70.7 }, { "epoch": 0.17, "learning_rate": "4.9941e-05", "loss": 0.6585, "slid_loss": 0.6859, "step": 221, "time": 70.36 }, { "epoch": 0.17, "learning_rate": "4.9941e-05", "loss": 0.7179, "slid_loss": 0.6862, "step": 222, "time": 71.3 }, { "epoch": 0.17, "learning_rate": "4.9940e-05", "loss": 0.6444, "slid_loss": 0.6859, "step": 223, "time": 71.93 }, { "epoch": 0.17, "learning_rate": "4.9939e-05", "loss": 0.6609, "slid_loss": 0.6857, "step": 224, "time": 71.45 }, { "epoch": 0.17, "learning_rate": "4.9938e-05", "loss": 0.6586, "slid_loss": 0.6853, "step": 225, "time": 70.87 }, { "epoch": 0.17, "learning_rate": "4.9937e-05", "loss": 0.6639, "slid_loss": 0.6849, "step": 226, "time": 72.06 }, { "epoch": 0.18, "learning_rate": "4.9937e-05", "loss": 0.6524, "slid_loss": 0.6846, "step": 227, "time": 71.21 }, { "epoch": 0.18, "learning_rate": "4.9936e-05", "loss": 0.6651, "slid_loss": 0.684, "step": 228, "time": 72.88 }, { "epoch": 0.18, "learning_rate": "4.9935e-05", "loss": 0.6922, "slid_loss": 0.6842, "step": 229, "time": 72.92 }, { "epoch": 0.18, "learning_rate": "4.9934e-05", "loss": 0.6438, "slid_loss": 0.6838, "step": 230, "time": 72.74 }, { "epoch": 0.18, "learning_rate": "4.9934e-05", "loss": 0.6642, "slid_loss": 0.6836, "step": 231, "time": 71.66 }, { "epoch": 0.18, "learning_rate": "4.9933e-05", "loss": 0.6642, "slid_loss": 0.6836, "step": 232, "time": 73.64 }, { "epoch": 0.18, "learning_rate": "4.9932e-05", "loss": 0.7178, "slid_loss": 0.684, "step": 233, "time": 72.64 }, { "epoch": 0.18, "learning_rate": "4.9931e-05", "loss": 0.6491, "slid_loss": 0.6836, "step": 234, "time": 71.75 }, { "epoch": 0.18, "learning_rate": "4.9930e-05", "loss": 0.7023, "slid_loss": 0.6839, "step": 235, "time": 72.59 }, { "epoch": 0.18, "learning_rate": "4.9929e-05", "loss": 0.6846, "slid_loss": 0.6837, "step": 236, "time": 72.48 }, { "epoch": 0.18, "learning_rate": "4.9929e-05", "loss": 0.6694, "slid_loss": 0.6836, "step": 237, "time": 71.82 }, { "epoch": 0.18, "learning_rate": "4.9928e-05", "loss": 0.6734, "slid_loss": 0.6832, "step": 238, "time": 71.99 }, { "epoch": 0.18, "learning_rate": "4.9927e-05", "loss": 0.6777, "slid_loss": 0.6832, "step": 239, "time": 71.65 }, { "epoch": 0.19, "learning_rate": "4.9926e-05", "loss": 0.6549, "slid_loss": 0.6824, "step": 240, "time": 71.35 }, { "epoch": 0.19, "learning_rate": "4.9925e-05", "loss": 0.6463, "slid_loss": 0.6822, "step": 241, "time": 70.86 }, { "epoch": 0.19, "learning_rate": "4.9924e-05", "loss": 0.676, "slid_loss": 0.6821, "step": 242, "time": 74.34 }, { "epoch": 0.19, "learning_rate": "4.9924e-05", "loss": 0.6676, "slid_loss": 0.682, "step": 243, "time": 71.02 }, { "epoch": 0.19, "learning_rate": "4.9923e-05", "loss": 0.6452, "slid_loss": 0.6811, "step": 244, "time": 73.09 }, { "epoch": 0.19, "learning_rate": "4.9922e-05", "loss": 0.6825, "slid_loss": 0.6809, "step": 245, "time": 72.07 }, { "epoch": 0.19, "learning_rate": "4.9921e-05", "loss": 0.6591, "slid_loss": 0.6807, "step": 246, "time": 72.09 }, { "epoch": 0.19, "learning_rate": "4.9920e-05", "loss": 0.7058, "slid_loss": 0.681, "step": 247, "time": 72.29 }, { "epoch": 0.19, "learning_rate": "4.9919e-05", "loss": 0.6804, "slid_loss": 0.6808, "step": 248, "time": 71.1 }, { "epoch": 0.19, "learning_rate": "4.9918e-05", "loss": 0.6791, "slid_loss": 0.6806, "step": 249, "time": 71.48 }, { "epoch": 0.19, "learning_rate": "4.9917e-05", "loss": 0.6883, "slid_loss": 0.6807, "step": 250, "time": 70.49 }, { "epoch": 0.19, "learning_rate": "4.9917e-05", "loss": 0.7081, "slid_loss": 0.6807, "step": 251, "time": 72.35 }, { "epoch": 0.19, "learning_rate": "4.9916e-05", "loss": 0.65, "slid_loss": 0.68, "step": 252, "time": 72.12 }, { "epoch": 0.2, "learning_rate": "4.9915e-05", "loss": 0.667, "slid_loss": 0.6795, "step": 253, "time": 71.95 }, { "epoch": 0.2, "learning_rate": "4.9914e-05", "loss": 0.6623, "slid_loss": 0.6792, "step": 254, "time": 72.88 }, { "epoch": 0.2, "learning_rate": "4.9913e-05", "loss": 0.6611, "slid_loss": 0.6789, "step": 255, "time": 73.31 }, { "epoch": 0.2, "learning_rate": "4.9912e-05", "loss": 0.6594, "slid_loss": 0.6786, "step": 256, "time": 72.22 }, { "epoch": 0.2, "learning_rate": "4.9911e-05", "loss": 0.6518, "slid_loss": 0.6781, "step": 257, "time": 91.89 }, { "epoch": 0.2, "learning_rate": "4.9910e-05", "loss": 0.6444, "slid_loss": 0.6773, "step": 258, "time": 72.41 }, { "epoch": 0.2, "learning_rate": "4.9909e-05", "loss": 0.6527, "slid_loss": 0.6767, "step": 259, "time": 71.2 }, { "epoch": 0.2, "learning_rate": "4.9908e-05", "loss": 0.6353, "slid_loss": 0.6761, "step": 260, "time": 72.19 }, { "epoch": 0.2, "learning_rate": "4.9907e-05", "loss": 0.6724, "slid_loss": 0.676, "step": 261, "time": 72.0 }, { "epoch": 0.2, "learning_rate": "4.9906e-05", "loss": 0.6408, "slid_loss": 0.6756, "step": 262, "time": 72.42 }, { "epoch": 0.2, "learning_rate": "4.9905e-05", "loss": 0.7001, "slid_loss": 0.6759, "step": 263, "time": 72.29 }, { "epoch": 0.2, "learning_rate": "4.9905e-05", "loss": 0.6839, "slid_loss": 0.676, "step": 264, "time": 72.56 }, { "epoch": 0.21, "learning_rate": "4.9904e-05", "loss": 0.7039, "slid_loss": 0.676, "step": 265, "time": 71.52 }, { "epoch": 0.21, "learning_rate": "4.9903e-05", "loss": 0.6692, "slid_loss": 0.676, "step": 266, "time": 71.76 }, { "epoch": 0.21, "learning_rate": "4.9902e-05", "loss": 0.6525, "slid_loss": 0.6761, "step": 267, "time": 71.94 }, { "epoch": 0.21, "learning_rate": "4.9901e-05", "loss": 0.64, "slid_loss": 0.676, "step": 268, "time": 70.52 }, { "epoch": 0.21, "learning_rate": "4.9900e-05", "loss": 0.6774, "slid_loss": 0.6758, "step": 269, "time": 72.28 }, { "epoch": 0.21, "learning_rate": "4.9899e-05", "loss": 0.6533, "slid_loss": 0.6758, "step": 270, "time": 70.93 }, { "epoch": 0.21, "learning_rate": "4.9898e-05", "loss": 0.6506, "slid_loss": 0.6757, "step": 271, "time": 73.03 }, { "epoch": 0.21, "learning_rate": "4.9897e-05", "loss": 0.653, "slid_loss": 0.6753, "step": 272, "time": 72.17 }, { "epoch": 0.21, "learning_rate": "4.9896e-05", "loss": 0.6952, "slid_loss": 0.6754, "step": 273, "time": 71.37 }, { "epoch": 0.21, "learning_rate": "4.9895e-05", "loss": 0.6734, "slid_loss": 0.6753, "step": 274, "time": 72.04 }, { "epoch": 0.21, "learning_rate": "4.9894e-05", "loss": 0.6524, "slid_loss": 0.674, "step": 275, "time": 73.32 }, { "epoch": 0.21, "learning_rate": "4.9893e-05", "loss": 0.6535, "slid_loss": 0.6736, "step": 276, "time": 70.96 }, { "epoch": 0.21, "learning_rate": "4.9892e-05", "loss": 0.646, "slid_loss": 0.6732, "step": 277, "time": 72.92 }, { "epoch": 0.22, "learning_rate": "4.9891e-05", "loss": 0.6528, "slid_loss": 0.6733, "step": 278, "time": 71.04 }, { "epoch": 0.22, "learning_rate": "4.9890e-05", "loss": 0.6889, "slid_loss": 0.6733, "step": 279, "time": 72.18 }, { "epoch": 0.22, "learning_rate": "4.9889e-05", "loss": 0.6631, "slid_loss": 0.6732, "step": 280, "time": 70.64 }, { "epoch": 0.22, "learning_rate": "4.9888e-05", "loss": 0.6945, "slid_loss": 0.6733, "step": 281, "time": 71.75 }, { "epoch": 0.22, "learning_rate": "4.9886e-05", "loss": 0.6563, "slid_loss": 0.6728, "step": 282, "time": 72.05 }, { "epoch": 0.22, "learning_rate": "4.9885e-05", "loss": 0.6894, "slid_loss": 0.6733, "step": 283, "time": 71.99 }, { "epoch": 0.22, "learning_rate": "4.9884e-05", "loss": 0.6924, "slid_loss": 0.6736, "step": 284, "time": 72.17 }, { "epoch": 0.22, "learning_rate": "4.9883e-05", "loss": 0.6701, "slid_loss": 0.6739, "step": 285, "time": 70.73 }, { "epoch": 0.22, "learning_rate": "4.9882e-05", "loss": 0.7118, "slid_loss": 0.6742, "step": 286, "time": 72.14 }, { "epoch": 0.22, "learning_rate": "4.9881e-05", "loss": 0.7176, "slid_loss": 0.6743, "step": 287, "time": 71.87 }, { "epoch": 0.22, "learning_rate": "4.9880e-05", "loss": 0.6552, "slid_loss": 0.674, "step": 288, "time": 70.68 }, { "epoch": 0.22, "learning_rate": "4.9879e-05", "loss": 0.6393, "slid_loss": 0.6733, "step": 289, "time": 71.73 }, { "epoch": 0.22, "learning_rate": "4.9878e-05", "loss": 0.6865, "slid_loss": 0.6733, "step": 290, "time": 72.97 }, { "epoch": 0.23, "learning_rate": "4.9877e-05", "loss": 0.6534, "slid_loss": 0.6731, "step": 291, "time": 71.67 }, { "epoch": 0.23, "learning_rate": "4.9876e-05", "loss": 0.6831, "slid_loss": 0.6732, "step": 292, "time": 70.8 }, { "epoch": 0.23, "learning_rate": "4.9875e-05", "loss": 0.6849, "slid_loss": 0.6733, "step": 293, "time": 74.27 }, { "epoch": 0.23, "learning_rate": "4.9874e-05", "loss": 0.6975, "slid_loss": 0.6737, "step": 294, "time": 70.17 }, { "epoch": 0.23, "learning_rate": "4.9872e-05", "loss": 0.7017, "slid_loss": 0.6741, "step": 295, "time": 70.07 }, { "epoch": 0.23, "learning_rate": "4.9871e-05", "loss": 0.6298, "slid_loss": 0.6738, "step": 296, "time": 70.5 }, { "epoch": 0.23, "learning_rate": "4.9870e-05", "loss": 0.6558, "slid_loss": 0.6736, "step": 297, "time": 71.48 }, { "epoch": 0.23, "learning_rate": "4.9869e-05", "loss": 0.6725, "slid_loss": 0.6736, "step": 298, "time": 71.62 }, { "epoch": 0.23, "learning_rate": "4.9868e-05", "loss": 0.6669, "slid_loss": 0.6739, "step": 299, "time": 71.97 }, { "epoch": 0.23, "learning_rate": "4.9867e-05", "loss": 0.6556, "slid_loss": 0.6737, "step": 300, "time": 71.55 }, { "epoch": 0.23, "learning_rate": "4.9866e-05", "loss": 0.6414, "slid_loss": 0.6733, "step": 301, "time": 71.18 }, { "epoch": 0.23, "learning_rate": "4.9865e-05", "loss": 0.6999, "slid_loss": 0.674, "step": 302, "time": 71.25 }, { "epoch": 0.23, "learning_rate": "4.9863e-05", "loss": 0.6708, "slid_loss": 0.6735, "step": 303, "time": 71.24 }, { "epoch": 0.24, "learning_rate": "4.9862e-05", "loss": 0.6734, "slid_loss": 0.6733, "step": 304, "time": 71.56 }, { "epoch": 0.24, "learning_rate": "4.9861e-05", "loss": 0.6261, "slid_loss": 0.6728, "step": 305, "time": 71.89 }, { "epoch": 0.24, "learning_rate": "4.9860e-05", "loss": 0.6741, "slid_loss": 0.6723, "step": 306, "time": 71.68 }, { "epoch": 0.24, "learning_rate": "4.9859e-05", "loss": 0.6687, "slid_loss": 0.6722, "step": 307, "time": 72.45 }, { "epoch": 0.24, "learning_rate": "4.9858e-05", "loss": 0.6762, "slid_loss": 0.6717, "step": 308, "time": 72.28 }, { "epoch": 0.24, "learning_rate": "4.9856e-05", "loss": 0.7272, "slid_loss": 0.6723, "step": 309, "time": 71.66 }, { "epoch": 0.24, "learning_rate": "4.9855e-05", "loss": 0.6592, "slid_loss": 0.6709, "step": 310, "time": 73.36 }, { "epoch": 0.24, "learning_rate": "4.9854e-05", "loss": 0.6902, "slid_loss": 0.6713, "step": 311, "time": 72.0 }, { "epoch": 0.24, "learning_rate": "4.9853e-05", "loss": 0.6356, "slid_loss": 0.6708, "step": 312, "time": 71.19 }, { "epoch": 0.24, "learning_rate": "4.9852e-05", "loss": 0.6999, "slid_loss": 0.671, "step": 313, "time": 89.86 }, { "epoch": 0.24, "learning_rate": "4.9851e-05", "loss": 0.6571, "slid_loss": 0.6708, "step": 314, "time": 85.26 }, { "epoch": 0.24, "learning_rate": "4.9849e-05", "loss": 0.6682, "slid_loss": 0.6705, "step": 315, "time": 141.97 }, { "epoch": 0.24, "learning_rate": "4.9848e-05", "loss": 0.6611, "slid_loss": 0.6702, "step": 316, "time": 159.58 }, { "epoch": 0.25, "learning_rate": "4.9847e-05", "loss": 0.6687, "slid_loss": 0.6698, "step": 317, "time": 223.28 }, { "epoch": 0.25, "learning_rate": "4.9846e-05", "loss": 0.6796, "slid_loss": 0.6699, "step": 318, "time": 176.48 }, { "epoch": 0.25, "learning_rate": "4.9845e-05", "loss": 0.679, "slid_loss": 0.6702, "step": 319, "time": 133.74 }, { "epoch": 0.25, "learning_rate": "4.9843e-05", "loss": 0.6593, "slid_loss": 0.6699, "step": 320, "time": 84.45 }, { "epoch": 0.25, "learning_rate": "4.9842e-05", "loss": 0.6739, "slid_loss": 0.6701, "step": 321, "time": 71.24 }, { "epoch": 0.25, "learning_rate": "4.9841e-05", "loss": 0.6345, "slid_loss": 0.6692, "step": 322, "time": 75.75 }, { "epoch": 0.25, "learning_rate": "4.9840e-05", "loss": 0.6799, "slid_loss": 0.6696, "step": 323, "time": 71.08 }, { "epoch": 0.25, "learning_rate": "4.9838e-05", "loss": 0.6914, "slid_loss": 0.6699, "step": 324, "time": 72.05 }, { "epoch": 0.25, "learning_rate": "4.9837e-05", "loss": 0.7048, "slid_loss": 0.6703, "step": 325, "time": 71.38 }, { "epoch": 0.25, "learning_rate": "4.9836e-05", "loss": 0.664, "slid_loss": 0.6703, "step": 326, "time": 72.17 }, { "epoch": 0.25, "learning_rate": "4.9835e-05", "loss": 0.6604, "slid_loss": 0.6704, "step": 327, "time": 72.2 }, { "epoch": 0.25, "learning_rate": "4.9833e-05", "loss": 0.678, "slid_loss": 0.6706, "step": 328, "time": 71.39 }, { "epoch": 0.25, "learning_rate": "4.9832e-05", "loss": 0.6748, "slid_loss": 0.6704, "step": 329, "time": 71.56 }, { "epoch": 0.26, "learning_rate": "4.9831e-05", "loss": 0.6752, "slid_loss": 0.6707, "step": 330, "time": 72.88 }, { "epoch": 0.26, "learning_rate": "4.9829e-05", "loss": 0.6812, "slid_loss": 0.6709, "step": 331, "time": 71.77 }, { "epoch": 0.26, "learning_rate": "4.9828e-05", "loss": 0.6659, "slid_loss": 0.6709, "step": 332, "time": 73.24 }, { "epoch": 0.26, "learning_rate": "4.9827e-05", "loss": 0.6799, "slid_loss": 0.6705, "step": 333, "time": 72.21 }, { "epoch": 0.26, "learning_rate": "4.9826e-05", "loss": 0.656, "slid_loss": 0.6706, "step": 334, "time": 72.45 }, { "epoch": 0.26, "learning_rate": "4.9824e-05", "loss": 0.6914, "slid_loss": 0.6705, "step": 335, "time": 70.85 }, { "epoch": 0.26, "learning_rate": "4.9823e-05", "loss": 0.6669, "slid_loss": 0.6703, "step": 336, "time": 70.71 }, { "epoch": 0.26, "learning_rate": "4.9822e-05", "loss": 0.6567, "slid_loss": 0.6702, "step": 337, "time": 71.37 }, { "epoch": 0.26, "learning_rate": "4.9820e-05", "loss": 0.6577, "slid_loss": 0.67, "step": 338, "time": 71.94 }, { "epoch": 0.26, "learning_rate": "4.9819e-05", "loss": 0.6665, "slid_loss": 0.6699, "step": 339, "time": 70.82 }, { "epoch": 0.26, "learning_rate": "4.9818e-05", "loss": 0.725, "slid_loss": 0.6706, "step": 340, "time": 72.15 }, { "epoch": 0.26, "learning_rate": "4.9816e-05", "loss": 0.6593, "slid_loss": 0.6707, "step": 341, "time": 71.42 }, { "epoch": 0.26, "learning_rate": "4.9815e-05", "loss": 0.6378, "slid_loss": 0.6703, "step": 342, "time": 73.08 }, { "epoch": 0.27, "learning_rate": "4.9814e-05", "loss": 0.666, "slid_loss": 0.6703, "step": 343, "time": 71.34 }, { "epoch": 0.27, "learning_rate": "4.9812e-05", "loss": 0.6956, "slid_loss": 0.6708, "step": 344, "time": 71.92 }, { "epoch": 0.27, "learning_rate": "4.9811e-05", "loss": 0.6769, "slid_loss": 0.6708, "step": 345, "time": 72.28 }, { "epoch": 0.27, "learning_rate": "4.9810e-05", "loss": 0.6626, "slid_loss": 0.6708, "step": 346, "time": 72.34 }, { "epoch": 0.27, "learning_rate": "4.9808e-05", "loss": 0.6656, "slid_loss": 0.6704, "step": 347, "time": 70.11 }, { "epoch": 0.27, "learning_rate": "4.9807e-05", "loss": 0.6417, "slid_loss": 0.67, "step": 348, "time": 71.84 }, { "epoch": 0.27, "learning_rate": "4.9806e-05", "loss": 0.6564, "slid_loss": 0.6698, "step": 349, "time": 72.44 }, { "epoch": 0.27, "learning_rate": "4.9804e-05", "loss": 0.6606, "slid_loss": 0.6695, "step": 350, "time": 71.5 }, { "epoch": 0.27, "learning_rate": "4.9803e-05", "loss": 0.6834, "slid_loss": 0.6693, "step": 351, "time": 73.07 }, { "epoch": 0.27, "learning_rate": "4.9802e-05", "loss": 0.6663, "slid_loss": 0.6694, "step": 352, "time": 72.02 }, { "epoch": 0.27, "learning_rate": "4.9800e-05", "loss": 0.6851, "slid_loss": 0.6696, "step": 353, "time": 71.53 }, { "epoch": 0.27, "learning_rate": "4.9799e-05", "loss": 0.6785, "slid_loss": 0.6698, "step": 354, "time": 70.9 }, { "epoch": 0.27, "learning_rate": "4.9797e-05", "loss": 0.6525, "slid_loss": 0.6697, "step": 355, "time": 72.39 }, { "epoch": 0.28, "learning_rate": "4.9796e-05", "loss": 0.6739, "slid_loss": 0.6698, "step": 356, "time": 70.32 }, { "epoch": 0.28, "learning_rate": "4.9795e-05", "loss": 0.7136, "slid_loss": 0.6704, "step": 357, "time": 72.34 }, { "epoch": 0.28, "learning_rate": "4.9793e-05", "loss": 0.7039, "slid_loss": 0.671, "step": 358, "time": 72.6 }, { "epoch": 0.28, "learning_rate": "4.9792e-05", "loss": 0.6623, "slid_loss": 0.6711, "step": 359, "time": 71.13 }, { "epoch": 0.28, "learning_rate": "4.9790e-05", "loss": 0.6803, "slid_loss": 0.6716, "step": 360, "time": 70.68 }, { "epoch": 0.28, "learning_rate": "4.9789e-05", "loss": 0.6847, "slid_loss": 0.6717, "step": 361, "time": 70.85 }, { "epoch": 0.28, "learning_rate": "4.9787e-05", "loss": 0.7164, "slid_loss": 0.6725, "step": 362, "time": 72.74 }, { "epoch": 0.28, "learning_rate": "4.9786e-05", "loss": 0.6881, "slid_loss": 0.6723, "step": 363, "time": 72.37 }, { "epoch": 0.28, "learning_rate": "4.9785e-05", "loss": 0.664, "slid_loss": 0.6721, "step": 364, "time": 72.88 }, { "epoch": 0.28, "learning_rate": "4.9783e-05", "loss": 0.6767, "slid_loss": 0.6719, "step": 365, "time": 71.98 }, { "epoch": 0.28, "learning_rate": "4.9782e-05", "loss": 0.6842, "slid_loss": 0.672, "step": 366, "time": 72.32 }, { "epoch": 0.28, "learning_rate": "4.9780e-05", "loss": 0.693, "slid_loss": 0.6724, "step": 367, "time": 72.01 }, { "epoch": 0.28, "learning_rate": "4.9779e-05", "loss": 0.6563, "slid_loss": 0.6726, "step": 368, "time": 71.44 }, { "epoch": 0.29, "learning_rate": "4.9777e-05", "loss": 0.685, "slid_loss": 0.6727, "step": 369, "time": 72.08 }, { "epoch": 0.29, "learning_rate": "4.9776e-05", "loss": 0.6334, "slid_loss": 0.6725, "step": 370, "time": 70.39 }, { "epoch": 0.29, "learning_rate": "4.9774e-05", "loss": 0.6772, "slid_loss": 0.6727, "step": 371, "time": 72.13 }, { "epoch": 0.29, "learning_rate": "4.9773e-05", "loss": 0.6786, "slid_loss": 0.673, "step": 372, "time": 72.76 }, { "epoch": 0.29, "learning_rate": "4.9771e-05", "loss": 0.6745, "slid_loss": 0.6728, "step": 373, "time": 71.08 }, { "epoch": 0.29, "learning_rate": "4.9770e-05", "loss": 0.6693, "slid_loss": 0.6727, "step": 374, "time": 72.19 }, { "epoch": 0.29, "learning_rate": "4.9769e-05", "loss": 0.6413, "slid_loss": 0.6726, "step": 375, "time": 71.89 }, { "epoch": 0.29, "learning_rate": "4.9767e-05", "loss": 0.6157, "slid_loss": 0.6723, "step": 376, "time": 72.64 }, { "epoch": 0.29, "learning_rate": "4.9766e-05", "loss": 0.7226, "slid_loss": 0.673, "step": 377, "time": 72.18 }, { "epoch": 0.29, "learning_rate": "4.9764e-05", "loss": 0.6772, "slid_loss": 0.6733, "step": 378, "time": 70.81 }, { "epoch": 0.29, "learning_rate": "4.9763e-05", "loss": 0.6851, "slid_loss": 0.6732, "step": 379, "time": 71.67 }, { "epoch": 0.29, "learning_rate": "4.9761e-05", "loss": 0.6952, "slid_loss": 0.6735, "step": 380, "time": 71.64 }, { "epoch": 0.29, "learning_rate": "4.9759e-05", "loss": 0.6341, "slid_loss": 0.6729, "step": 381, "time": 72.42 }, { "epoch": 0.3, "learning_rate": "4.9758e-05", "loss": 0.6834, "slid_loss": 0.6732, "step": 382, "time": 70.98 }, { "epoch": 0.3, "learning_rate": "4.9756e-05", "loss": 0.6761, "slid_loss": 0.6731, "step": 383, "time": 72.08 }, { "epoch": 0.3, "learning_rate": "4.9755e-05", "loss": 0.6659, "slid_loss": 0.6728, "step": 384, "time": 71.5 }, { "epoch": 0.3, "learning_rate": "4.9753e-05", "loss": 0.7084, "slid_loss": 0.6732, "step": 385, "time": 73.42 }, { "epoch": 0.3, "learning_rate": "4.9752e-05", "loss": 0.6695, "slid_loss": 0.6728, "step": 386, "time": 73.27 }, { "epoch": 0.3, "learning_rate": "4.9750e-05", "loss": 0.7271, "slid_loss": 0.6729, "step": 387, "time": 71.27 }, { "epoch": 0.3, "learning_rate": "4.9749e-05", "loss": 0.6696, "slid_loss": 0.673, "step": 388, "time": 71.35 }, { "epoch": 0.3, "learning_rate": "4.9747e-05", "loss": 0.6511, "slid_loss": 0.6731, "step": 389, "time": 72.7 }, { "epoch": 0.3, "learning_rate": "4.9746e-05", "loss": 0.6817, "slid_loss": 0.6731, "step": 390, "time": 71.33 }, { "epoch": 0.3, "learning_rate": "4.9744e-05", "loss": 0.6424, "slid_loss": 0.673, "step": 391, "time": 72.51 }, { "epoch": 0.3, "learning_rate": "4.9742e-05", "loss": 0.6801, "slid_loss": 0.6729, "step": 392, "time": 72.05 }, { "epoch": 0.3, "learning_rate": "4.9741e-05", "loss": 0.6703, "slid_loss": 0.6728, "step": 393, "time": 72.45 }, { "epoch": 0.3, "learning_rate": "4.9739e-05", "loss": 0.6296, "slid_loss": 0.6721, "step": 394, "time": 71.39 }, { "epoch": 0.31, "learning_rate": "4.9738e-05", "loss": 0.6698, "slid_loss": 0.6718, "step": 395, "time": 70.58 }, { "epoch": 0.31, "learning_rate": "4.9736e-05", "loss": 0.6979, "slid_loss": 0.6725, "step": 396, "time": 70.87 }, { "epoch": 0.31, "learning_rate": "4.9735e-05", "loss": 0.671, "slid_loss": 0.6726, "step": 397, "time": 73.5 }, { "epoch": 0.31, "learning_rate": "4.9733e-05", "loss": 0.6656, "slid_loss": 0.6726, "step": 398, "time": 72.49 }, { "epoch": 0.31, "learning_rate": "4.9731e-05", "loss": 0.6856, "slid_loss": 0.6728, "step": 399, "time": 71.61 }, { "epoch": 0.31, "learning_rate": "4.9730e-05", "loss": 0.6499, "slid_loss": 0.6727, "step": 400, "time": 71.96 }, { "epoch": 0.31, "learning_rate": "4.9728e-05", "loss": 0.6941, "slid_loss": 0.6732, "step": 401, "time": 770.18 }, { "epoch": 0.31, "learning_rate": "4.9727e-05", "loss": 0.641, "slid_loss": 0.6726, "step": 402, "time": 71.23 }, { "epoch": 0.31, "learning_rate": "4.9725e-05", "loss": 0.6625, "slid_loss": 0.6726, "step": 403, "time": 71.67 }, { "epoch": 0.31, "learning_rate": "4.9723e-05", "loss": 0.6984, "slid_loss": 0.6728, "step": 404, "time": 72.37 }, { "epoch": 0.31, "learning_rate": "4.9722e-05", "loss": 0.6879, "slid_loss": 0.6734, "step": 405, "time": 72.32 }, { "epoch": 0.31, "learning_rate": "4.9720e-05", "loss": 0.6753, "slid_loss": 0.6734, "step": 406, "time": 71.51 }, { "epoch": 0.31, "learning_rate": "4.9718e-05", "loss": 0.7062, "slid_loss": 0.6738, "step": 407, "time": 71.34 }, { "epoch": 0.32, "learning_rate": "4.9717e-05", "loss": 0.6587, "slid_loss": 0.6736, "step": 408, "time": 71.67 }, { "epoch": 0.32, "learning_rate": "4.9715e-05", "loss": 0.6732, "slid_loss": 0.6731, "step": 409, "time": 73.47 }, { "epoch": 0.32, "learning_rate": "4.9713e-05", "loss": 0.6623, "slid_loss": 0.6731, "step": 410, "time": 71.3 }, { "epoch": 0.32, "learning_rate": "4.9712e-05", "loss": 0.6759, "slid_loss": 0.673, "step": 411, "time": 72.1 }, { "epoch": 0.32, "learning_rate": "4.9710e-05", "loss": 0.6623, "slid_loss": 0.6732, "step": 412, "time": 71.92 }, { "epoch": 0.32, "learning_rate": "4.9708e-05", "loss": 0.6415, "slid_loss": 0.6727, "step": 413, "time": 71.37 }, { "epoch": 0.32, "learning_rate": "4.9707e-05", "loss": 0.6225, "slid_loss": 0.6723, "step": 414, "time": 72.94 }, { "epoch": 0.32, "learning_rate": "4.9705e-05", "loss": 0.6652, "slid_loss": 0.6723, "step": 415, "time": 71.85 }, { "epoch": 0.32, "learning_rate": "4.9703e-05", "loss": 0.7014, "slid_loss": 0.6727, "step": 416, "time": 71.66 }, { "epoch": 0.32, "learning_rate": "4.9702e-05", "loss": 0.663, "slid_loss": 0.6726, "step": 417, "time": 71.22 }, { "epoch": 0.32, "learning_rate": "4.9700e-05", "loss": 0.6513, "slid_loss": 0.6723, "step": 418, "time": 73.0 }, { "epoch": 0.32, "learning_rate": "4.9698e-05", "loss": 0.6499, "slid_loss": 0.6721, "step": 419, "time": 72.33 }, { "epoch": 0.32, "learning_rate": "4.9697e-05", "loss": 0.654, "slid_loss": 0.672, "step": 420, "time": 71.58 }, { "epoch": 0.33, "learning_rate": "4.9695e-05", "loss": 0.639, "slid_loss": 0.6717, "step": 421, "time": 73.82 }, { "epoch": 0.33, "learning_rate": "4.9693e-05", "loss": 0.6725, "slid_loss": 0.672, "step": 422, "time": 70.83 }, { "epoch": 0.33, "learning_rate": "4.9691e-05", "loss": 0.6928, "slid_loss": 0.6722, "step": 423, "time": 71.08 }, { "epoch": 0.33, "learning_rate": "4.9690e-05", "loss": 0.6565, "slid_loss": 0.6718, "step": 424, "time": 71.48 }, { "epoch": 0.33, "learning_rate": "4.9688e-05", "loss": 0.6879, "slid_loss": 0.6716, "step": 425, "time": 70.62 }, { "epoch": 0.33, "learning_rate": "4.9686e-05", "loss": 0.6774, "slid_loss": 0.6718, "step": 426, "time": 74.24 }, { "epoch": 0.33, "learning_rate": "4.9685e-05", "loss": 0.6437, "slid_loss": 0.6716, "step": 427, "time": 73.14 }, { "epoch": 0.33, "learning_rate": "4.9683e-05", "loss": 0.6658, "slid_loss": 0.6715, "step": 428, "time": 71.51 }, { "epoch": 0.33, "learning_rate": "4.9681e-05", "loss": 0.6864, "slid_loss": 0.6716, "step": 429, "time": 72.44 }, { "epoch": 0.33, "learning_rate": "4.9679e-05", "loss": 0.6612, "slid_loss": 0.6715, "step": 430, "time": 72.46 }, { "epoch": 0.33, "learning_rate": "4.9678e-05", "loss": 0.6701, "slid_loss": 0.6714, "step": 431, "time": 72.35 }, { "epoch": 0.33, "learning_rate": "4.9676e-05", "loss": 0.6535, "slid_loss": 0.6712, "step": 432, "time": 73.45 }, { "epoch": 0.34, "learning_rate": "4.9674e-05", "loss": 0.6789, "slid_loss": 0.6712, "step": 433, "time": 71.64 }, { "epoch": 0.34, "learning_rate": "4.9672e-05", "loss": 0.6693, "slid_loss": 0.6714, "step": 434, "time": 72.31 }, { "epoch": 0.34, "learning_rate": "4.9671e-05", "loss": 0.6515, "slid_loss": 0.671, "step": 435, "time": 71.3 }, { "epoch": 0.34, "learning_rate": "4.9669e-05", "loss": 0.6445, "slid_loss": 0.6707, "step": 436, "time": 71.17 }, { "epoch": 0.34, "learning_rate": "4.9667e-05", "loss": 0.6819, "slid_loss": 0.671, "step": 437, "time": 72.1 }, { "epoch": 0.34, "learning_rate": "4.9665e-05", "loss": 0.621, "slid_loss": 0.6706, "step": 438, "time": 72.03 }, { "epoch": 0.34, "learning_rate": "4.9663e-05", "loss": 0.6718, "slid_loss": 0.6707, "step": 439, "time": 72.0 }, { "epoch": 0.34, "learning_rate": "4.9662e-05", "loss": 0.7298, "slid_loss": 0.6707, "step": 440, "time": 71.98 }, { "epoch": 0.34, "learning_rate": "4.9660e-05", "loss": 0.6699, "slid_loss": 0.6708, "step": 441, "time": 71.37 }, { "epoch": 0.34, "learning_rate": "4.9658e-05", "loss": 0.6831, "slid_loss": 0.6713, "step": 442, "time": 72.71 }, { "epoch": 0.34, "learning_rate": "4.9656e-05", "loss": 0.6781, "slid_loss": 0.6714, "step": 443, "time": 71.22 }, { "epoch": 0.34, "learning_rate": "4.9654e-05", "loss": 0.6553, "slid_loss": 0.671, "step": 444, "time": 71.3 }, { "epoch": 0.34, "learning_rate": "4.9653e-05", "loss": 0.6581, "slid_loss": 0.6708, "step": 445, "time": 72.28 }, { "epoch": 0.35, "learning_rate": "4.9651e-05", "loss": 0.6709, "slid_loss": 0.6709, "step": 446, "time": 73.11 }, { "epoch": 0.35, "learning_rate": "4.9649e-05", "loss": 0.6783, "slid_loss": 0.671, "step": 447, "time": 72.78 }, { "epoch": 0.35, "learning_rate": "4.9647e-05", "loss": 0.6487, "slid_loss": 0.6711, "step": 448, "time": 72.39 }, { "epoch": 0.35, "learning_rate": "4.9645e-05", "loss": 0.6423, "slid_loss": 0.6709, "step": 449, "time": 71.94 }, { "epoch": 0.35, "learning_rate": "4.9643e-05", "loss": 0.6454, "slid_loss": 0.6708, "step": 450, "time": 71.0 }, { "epoch": 0.35, "learning_rate": "4.9641e-05", "loss": 0.6568, "slid_loss": 0.6705, "step": 451, "time": 70.96 }, { "epoch": 0.35, "learning_rate": "4.9640e-05", "loss": 0.6456, "slid_loss": 0.6703, "step": 452, "time": 70.75 }, { "epoch": 0.35, "learning_rate": "4.9638e-05", "loss": 0.6511, "slid_loss": 0.67, "step": 453, "time": 74.26 }, { "epoch": 0.35, "learning_rate": "4.9636e-05", "loss": 0.6564, "slid_loss": 0.6698, "step": 454, "time": 71.23 }, { "epoch": 0.35, "learning_rate": "4.9634e-05", "loss": 0.6446, "slid_loss": 0.6697, "step": 455, "time": 72.06 }, { "epoch": 0.35, "learning_rate": "4.9632e-05", "loss": 0.6537, "slid_loss": 0.6695, "step": 456, "time": 71.12 }, { "epoch": 0.35, "learning_rate": "4.9630e-05", "loss": 0.6852, "slid_loss": 0.6692, "step": 457, "time": 71.68 }, { "epoch": 0.35, "learning_rate": "4.9628e-05", "loss": 0.6562, "slid_loss": 0.6687, "step": 458, "time": 72.77 }, { "epoch": 0.36, "learning_rate": "4.9627e-05", "loss": 0.65, "slid_loss": 0.6686, "step": 459, "time": 71.19 }, { "epoch": 0.36, "learning_rate": "4.9625e-05", "loss": 0.6902, "slid_loss": 0.6687, "step": 460, "time": 72.42 }, { "epoch": 0.36, "learning_rate": "4.9623e-05", "loss": 0.7171, "slid_loss": 0.669, "step": 461, "time": 72.99 }, { "epoch": 0.36, "learning_rate": "4.9621e-05", "loss": 0.6652, "slid_loss": 0.6685, "step": 462, "time": 71.29 }, { "epoch": 0.36, "learning_rate": "4.9619e-05", "loss": 0.6526, "slid_loss": 0.6681, "step": 463, "time": 70.45 }, { "epoch": 0.36, "learning_rate": "4.9617e-05", "loss": 0.6997, "slid_loss": 0.6685, "step": 464, "time": 71.19 }, { "epoch": 0.36, "learning_rate": "4.9615e-05", "loss": 0.6698, "slid_loss": 0.6684, "step": 465, "time": 71.97 }, { "epoch": 0.36, "learning_rate": "4.9613e-05", "loss": 0.6765, "slid_loss": 0.6684, "step": 466, "time": 71.94 }, { "epoch": 0.36, "learning_rate": "4.9611e-05", "loss": 0.6211, "slid_loss": 0.6676, "step": 467, "time": 71.98 }, { "epoch": 0.36, "learning_rate": "4.9609e-05", "loss": 0.6572, "slid_loss": 0.6676, "step": 468, "time": 73.43 }, { "epoch": 0.36, "learning_rate": "4.9607e-05", "loss": 0.6529, "slid_loss": 0.6673, "step": 469, "time": 72.63 }, { "epoch": 0.36, "learning_rate": "4.9605e-05", "loss": 0.6957, "slid_loss": 0.668, "step": 470, "time": 71.49 }, { "epoch": 0.36, "learning_rate": "4.9604e-05", "loss": 0.6425, "slid_loss": 0.6676, "step": 471, "time": 72.38 }, { "epoch": 0.37, "learning_rate": "4.9602e-05", "loss": 0.681, "slid_loss": 0.6676, "step": 472, "time": 89.44 }, { "epoch": 0.37, "learning_rate": "4.9600e-05", "loss": 0.6847, "slid_loss": 0.6677, "step": 473, "time": 130.82 }, { "epoch": 0.37, "learning_rate": "4.9598e-05", "loss": 0.6631, "slid_loss": 0.6677, "step": 474, "time": 136.84 }, { "epoch": 0.37, "learning_rate": "4.9596e-05", "loss": 0.6892, "slid_loss": 0.6681, "step": 475, "time": 168.24 }, { "epoch": 0.37, "learning_rate": "4.9594e-05", "loss": 0.6549, "slid_loss": 0.6685, "step": 476, "time": 187.97 }, { "epoch": 0.37, "learning_rate": "4.9592e-05", "loss": 0.6982, "slid_loss": 0.6683, "step": 477, "time": 177.75 }, { "epoch": 0.37, "learning_rate": "4.9590e-05", "loss": 0.692, "slid_loss": 0.6684, "step": 478, "time": 150.88 }, { "epoch": 0.37, "learning_rate": "4.9588e-05", "loss": 0.6941, "slid_loss": 0.6685, "step": 479, "time": 95.84 }, { "epoch": 0.37, "learning_rate": "4.9586e-05", "loss": 0.679, "slid_loss": 0.6684, "step": 480, "time": 71.75 }, { "epoch": 0.37, "learning_rate": "4.9584e-05", "loss": 0.6663, "slid_loss": 0.6687, "step": 481, "time": 70.58 }, { "epoch": 0.37, "learning_rate": "4.9582e-05", "loss": 0.673, "slid_loss": 0.6686, "step": 482, "time": 72.15 }, { "epoch": 0.37, "learning_rate": "4.9580e-05", "loss": 0.7347, "slid_loss": 0.6692, "step": 483, "time": 70.62 }, { "epoch": 0.37, "learning_rate": "4.9578e-05", "loss": 0.6756, "slid_loss": 0.6693, "step": 484, "time": 71.47 }, { "epoch": 0.38, "learning_rate": "4.9576e-05", "loss": 0.6291, "slid_loss": 0.6685, "step": 485, "time": 72.1 }, { "epoch": 0.38, "learning_rate": "4.9574e-05", "loss": 0.7007, "slid_loss": 0.6688, "step": 486, "time": 70.99 }, { "epoch": 0.38, "learning_rate": "4.9572e-05", "loss": 0.6788, "slid_loss": 0.6683, "step": 487, "time": 72.38 }, { "epoch": 0.38, "learning_rate": "4.9570e-05", "loss": 0.6609, "slid_loss": 0.6682, "step": 488, "time": 71.77 }, { "epoch": 0.38, "learning_rate": "4.9568e-05", "loss": 0.6628, "slid_loss": 0.6683, "step": 489, "time": 71.97 }, { "epoch": 0.38, "learning_rate": "4.9566e-05", "loss": 0.6685, "slid_loss": 0.6682, "step": 490, "time": 72.29 }, { "epoch": 0.38, "learning_rate": "4.9564e-05", "loss": 0.6808, "slid_loss": 0.6686, "step": 491, "time": 72.21 }, { "epoch": 0.38, "learning_rate": "4.9562e-05", "loss": 0.686, "slid_loss": 0.6687, "step": 492, "time": 71.54 }, { "epoch": 0.38, "learning_rate": "4.9560e-05", "loss": 0.6674, "slid_loss": 0.6686, "step": 493, "time": 71.58 }, { "epoch": 0.38, "learning_rate": "4.9557e-05", "loss": 0.6789, "slid_loss": 0.6691, "step": 494, "time": 71.22 }, { "epoch": 0.38, "learning_rate": "4.9555e-05", "loss": 0.6631, "slid_loss": 0.6691, "step": 495, "time": 71.68 }, { "epoch": 0.38, "learning_rate": "4.9553e-05", "loss": 0.6387, "slid_loss": 0.6685, "step": 496, "time": 71.65 }, { "epoch": 0.38, "learning_rate": "4.9551e-05", "loss": 0.6383, "slid_loss": 0.6681, "step": 497, "time": 71.68 }, { "epoch": 0.39, "learning_rate": "4.9549e-05", "loss": 0.6872, "slid_loss": 0.6683, "step": 498, "time": 72.32 }, { "epoch": 0.39, "learning_rate": "4.9547e-05", "loss": 0.6586, "slid_loss": 0.6681, "step": 499, "time": 72.29 }, { "epoch": 0.39, "learning_rate": "4.9545e-05", "loss": 0.6673, "slid_loss": 0.6683, "step": 500, "time": 73.52 }, { "epoch": 0.39, "learning_rate": "4.9543e-05", "loss": 0.6682, "slid_loss": 0.668, "step": 501, "time": 71.52 }, { "epoch": 0.39, "learning_rate": "4.9541e-05", "loss": 0.6784, "slid_loss": 0.6684, "step": 502, "time": 71.69 }, { "epoch": 0.39, "learning_rate": "4.9539e-05", "loss": 0.6976, "slid_loss": 0.6687, "step": 503, "time": 71.57 }, { "epoch": 0.39, "learning_rate": "4.9537e-05", "loss": 0.6656, "slid_loss": 0.6684, "step": 504, "time": 69.67 }, { "epoch": 0.39, "learning_rate": "4.9535e-05", "loss": 0.6297, "slid_loss": 0.6678, "step": 505, "time": 71.01 }, { "epoch": 0.39, "learning_rate": "4.9532e-05", "loss": 0.6782, "slid_loss": 0.6678, "step": 506, "time": 72.6 }, { "epoch": 0.39, "learning_rate": "4.9530e-05", "loss": 0.6718, "slid_loss": 0.6675, "step": 507, "time": 71.35 }, { "epoch": 0.39, "learning_rate": "4.9528e-05", "loss": 0.6551, "slid_loss": 0.6675, "step": 508, "time": 72.67 }, { "epoch": 0.39, "learning_rate": "4.9526e-05", "loss": 0.6579, "slid_loss": 0.6673, "step": 509, "time": 71.42 }, { "epoch": 0.39, "learning_rate": "4.9524e-05", "loss": 0.6745, "slid_loss": 0.6674, "step": 510, "time": 73.32 }, { "epoch": 0.4, "learning_rate": "4.9522e-05", "loss": 0.6589, "slid_loss": 0.6673, "step": 511, "time": 72.07 }, { "epoch": 0.4, "learning_rate": "4.9520e-05", "loss": 0.6734, "slid_loss": 0.6674, "step": 512, "time": 71.95 }, { "epoch": 0.4, "learning_rate": "4.9518e-05", "loss": 0.6547, "slid_loss": 0.6675, "step": 513, "time": 71.07 }, { "epoch": 0.4, "learning_rate": "4.9515e-05", "loss": 0.7002, "slid_loss": 0.6683, "step": 514, "time": 70.94 }, { "epoch": 0.4, "learning_rate": "4.9513e-05", "loss": 0.6445, "slid_loss": 0.6681, "step": 515, "time": 72.66 }, { "epoch": 0.4, "learning_rate": "4.9511e-05", "loss": 0.6863, "slid_loss": 0.6679, "step": 516, "time": 71.51 }, { "epoch": 0.4, "learning_rate": "4.9509e-05", "loss": 0.6224, "slid_loss": 0.6675, "step": 517, "time": 71.55 }, { "epoch": 0.4, "learning_rate": "4.9507e-05", "loss": 0.62, "slid_loss": 0.6672, "step": 518, "time": 72.29 }, { "epoch": 0.4, "learning_rate": "4.9505e-05", "loss": 0.6779, "slid_loss": 0.6675, "step": 519, "time": 72.17 }, { "epoch": 0.4, "learning_rate": "4.9502e-05", "loss": 0.6479, "slid_loss": 0.6674, "step": 520, "time": 71.52 }, { "epoch": 0.4, "learning_rate": "4.9500e-05", "loss": 0.656, "slid_loss": 0.6676, "step": 521, "time": 71.57 }, { "epoch": 0.4, "learning_rate": "4.9498e-05", "loss": 0.6502, "slid_loss": 0.6674, "step": 522, "time": 72.09 }, { "epoch": 0.4, "learning_rate": "4.9496e-05", "loss": 0.663, "slid_loss": 0.6671, "step": 523, "time": 69.97 }, { "epoch": 0.41, "learning_rate": "4.9494e-05", "loss": 0.6506, "slid_loss": 0.667, "step": 524, "time": 72.35 }, { "epoch": 0.41, "learning_rate": "4.9492e-05", "loss": 0.6801, "slid_loss": 0.6669, "step": 525, "time": 73.0 }, { "epoch": 0.41, "learning_rate": "4.9489e-05", "loss": 0.6836, "slid_loss": 0.667, "step": 526, "time": 70.51 }, { "epoch": 0.41, "learning_rate": "4.9487e-05", "loss": 0.6516, "slid_loss": 0.6671, "step": 527, "time": 71.41 }, { "epoch": 0.41, "learning_rate": "4.9485e-05", "loss": 0.6493, "slid_loss": 0.6669, "step": 528, "time": 72.08 }, { "epoch": 0.41, "learning_rate": "4.9483e-05", "loss": 0.6736, "slid_loss": 0.6668, "step": 529, "time": 71.74 }, { "epoch": 0.41, "learning_rate": "4.9480e-05", "loss": 0.6678, "slid_loss": 0.6668, "step": 530, "time": 72.23 }, { "epoch": 0.41, "learning_rate": "4.9478e-05", "loss": 0.6478, "slid_loss": 0.6666, "step": 531, "time": 72.92 }, { "epoch": 0.41, "learning_rate": "4.9476e-05", "loss": 0.6458, "slid_loss": 0.6665, "step": 532, "time": 72.13 }, { "epoch": 0.41, "learning_rate": "4.9474e-05", "loss": 0.6311, "slid_loss": 0.6661, "step": 533, "time": 71.66 }, { "epoch": 0.41, "learning_rate": "4.9472e-05", "loss": 0.654, "slid_loss": 0.6659, "step": 534, "time": 70.56 }, { "epoch": 0.41, "learning_rate": "4.9469e-05", "loss": 0.6665, "slid_loss": 0.6661, "step": 535, "time": 70.98 }, { "epoch": 0.41, "learning_rate": "4.9467e-05", "loss": 0.6613, "slid_loss": 0.6662, "step": 536, "time": 72.41 }, { "epoch": 0.42, "learning_rate": "4.9465e-05", "loss": 0.7169, "slid_loss": 0.6666, "step": 537, "time": 71.27 }, { "epoch": 0.42, "learning_rate": "4.9462e-05", "loss": 0.6847, "slid_loss": 0.6672, "step": 538, "time": 74.73 }, { "epoch": 0.42, "learning_rate": "4.9460e-05", "loss": 0.7224, "slid_loss": 0.6677, "step": 539, "time": 71.35 }, { "epoch": 0.42, "learning_rate": "4.9458e-05", "loss": 0.7346, "slid_loss": 0.6678, "step": 540, "time": 72.17 }, { "epoch": 0.42, "learning_rate": "4.9456e-05", "loss": 0.7103, "slid_loss": 0.6682, "step": 541, "time": 71.63 }, { "epoch": 0.42, "learning_rate": "4.9453e-05", "loss": 0.6939, "slid_loss": 0.6683, "step": 542, "time": 71.65 }, { "epoch": 0.42, "learning_rate": "4.9451e-05", "loss": 0.681, "slid_loss": 0.6683, "step": 543, "time": 70.78 }, { "epoch": 0.42, "learning_rate": "4.9449e-05", "loss": 0.6785, "slid_loss": 0.6685, "step": 544, "time": 72.6 }, { "epoch": 0.42, "learning_rate": "4.9447e-05", "loss": 0.6881, "slid_loss": 0.6688, "step": 545, "time": 71.81 }, { "epoch": 0.42, "learning_rate": "4.9444e-05", "loss": 0.6503, "slid_loss": 0.6686, "step": 546, "time": 72.15 }, { "epoch": 0.42, "learning_rate": "4.9442e-05", "loss": 0.6437, "slid_loss": 0.6683, "step": 547, "time": 73.15 }, { "epoch": 0.42, "learning_rate": "4.9440e-05", "loss": 0.6859, "slid_loss": 0.6687, "step": 548, "time": 72.06 }, { "epoch": 0.42, "learning_rate": "4.9437e-05", "loss": 0.683, "slid_loss": 0.6691, "step": 549, "time": 71.88 }, { "epoch": 0.43, "learning_rate": "4.9435e-05", "loss": 0.6667, "slid_loss": 0.6693, "step": 550, "time": 71.11 }, { "epoch": 0.43, "learning_rate": "4.9433e-05", "loss": 0.6429, "slid_loss": 0.6691, "step": 551, "time": 72.03 }, { "epoch": 0.43, "learning_rate": "4.9430e-05", "loss": 0.6654, "slid_loss": 0.6693, "step": 552, "time": 71.05 }, { "epoch": 0.43, "learning_rate": "4.9428e-05", "loss": 0.6925, "slid_loss": 0.6698, "step": 553, "time": 71.24 }, { "epoch": 0.43, "learning_rate": "4.9426e-05", "loss": 0.654, "slid_loss": 0.6697, "step": 554, "time": 71.79 }, { "epoch": 0.43, "learning_rate": "4.9423e-05", "loss": 0.6586, "slid_loss": 0.6699, "step": 555, "time": 71.13 }, { "epoch": 0.43, "learning_rate": "4.9421e-05", "loss": 0.6648, "slid_loss": 0.67, "step": 556, "time": 73.78 }, { "epoch": 0.43, "learning_rate": "4.9419e-05", "loss": 0.6726, "slid_loss": 0.6699, "step": 557, "time": 72.08 }, { "epoch": 0.43, "learning_rate": "4.9416e-05", "loss": 0.6517, "slid_loss": 0.6698, "step": 558, "time": 71.41 }, { "epoch": 0.43, "learning_rate": "4.9414e-05", "loss": 0.6616, "slid_loss": 0.6699, "step": 559, "time": 70.53 }, { "epoch": 0.43, "learning_rate": "4.9412e-05", "loss": 0.6933, "slid_loss": 0.67, "step": 560, "time": 71.83 }, { "epoch": 0.43, "learning_rate": "4.9409e-05", "loss": 0.6645, "slid_loss": 0.6694, "step": 561, "time": 72.82 }, { "epoch": 0.43, "learning_rate": "4.9407e-05", "loss": 0.7007, "slid_loss": 0.6698, "step": 562, "time": 72.8 }, { "epoch": 0.44, "learning_rate": "4.9404e-05", "loss": 0.6847, "slid_loss": 0.6701, "step": 563, "time": 71.31 }, { "epoch": 0.44, "learning_rate": "4.9402e-05", "loss": 0.6452, "slid_loss": 0.6696, "step": 564, "time": 73.37 }, { "epoch": 0.44, "learning_rate": "4.9400e-05", "loss": 0.6706, "slid_loss": 0.6696, "step": 565, "time": 70.96 }, { "epoch": 0.44, "learning_rate": "4.9397e-05", "loss": 0.659, "slid_loss": 0.6694, "step": 566, "time": 71.57 }, { "epoch": 0.44, "learning_rate": "4.9395e-05", "loss": 0.6484, "slid_loss": 0.6697, "step": 567, "time": 71.72 }, { "epoch": 0.44, "learning_rate": "4.9393e-05", "loss": 0.6453, "slid_loss": 0.6696, "step": 568, "time": 71.43 }, { "epoch": 0.44, "learning_rate": "4.9390e-05", "loss": 0.6485, "slid_loss": 0.6695, "step": 569, "time": 71.54 }, { "epoch": 0.44, "learning_rate": "4.9388e-05", "loss": 0.6305, "slid_loss": 0.6689, "step": 570, "time": 71.45 }, { "epoch": 0.44, "learning_rate": "4.9385e-05", "loss": 0.6471, "slid_loss": 0.6689, "step": 571, "time": 72.59 }, { "epoch": 0.44, "learning_rate": "4.9383e-05", "loss": 0.6854, "slid_loss": 0.6689, "step": 572, "time": 74.32 }, { "epoch": 0.44, "learning_rate": "4.9380e-05", "loss": 0.6519, "slid_loss": 0.6686, "step": 573, "time": 72.96 }, { "epoch": 0.44, "learning_rate": "4.9378e-05", "loss": 0.6458, "slid_loss": 0.6684, "step": 574, "time": 72.79 }, { "epoch": 0.44, "learning_rate": "4.9376e-05", "loss": 0.6731, "slid_loss": 0.6683, "step": 575, "time": 71.87 }, { "epoch": 0.45, "learning_rate": "4.9373e-05", "loss": 0.6414, "slid_loss": 0.6681, "step": 576, "time": 73.26 }, { "epoch": 0.45, "learning_rate": "4.9371e-05", "loss": 0.6807, "slid_loss": 0.668, "step": 577, "time": 71.29 }, { "epoch": 0.45, "learning_rate": "4.9368e-05", "loss": 0.6638, "slid_loss": 0.6677, "step": 578, "time": 74.43 }, { "epoch": 0.45, "learning_rate": "4.9366e-05", "loss": 0.6945, "slid_loss": 0.6677, "step": 579, "time": 72.27 }, { "epoch": 0.45, "learning_rate": "4.9363e-05", "loss": 0.6977, "slid_loss": 0.6679, "step": 580, "time": 71.99 }, { "epoch": 0.45, "learning_rate": "4.9361e-05", "loss": 0.6658, "slid_loss": 0.6679, "step": 581, "time": 71.03 }, { "epoch": 0.45, "learning_rate": "4.9358e-05", "loss": 0.6938, "slid_loss": 0.6681, "step": 582, "time": 72.43 }, { "epoch": 0.45, "learning_rate": "4.9356e-05", "loss": 0.6741, "slid_loss": 0.6675, "step": 583, "time": 72.28 }, { "epoch": 0.45, "learning_rate": "4.9353e-05", "loss": 0.7281, "slid_loss": 0.668, "step": 584, "time": 71.24 }, { "epoch": 0.45, "learning_rate": "4.9351e-05", "loss": 0.6505, "slid_loss": 0.6682, "step": 585, "time": 71.25 }, { "epoch": 0.45, "learning_rate": "4.9348e-05", "loss": 0.6773, "slid_loss": 0.668, "step": 586, "time": 72.0 }, { "epoch": 0.45, "learning_rate": "4.9346e-05", "loss": 0.7066, "slid_loss": 0.6683, "step": 587, "time": 72.76 }, { "epoch": 0.45, "learning_rate": "4.9344e-05", "loss": 0.6498, "slid_loss": 0.6682, "step": 588, "time": 71.93 }, { "epoch": 0.46, "learning_rate": "4.9341e-05", "loss": 0.7342, "slid_loss": 0.6689, "step": 589, "time": 72.17 }, { "epoch": 0.46, "learning_rate": "4.9339e-05", "loss": 0.6953, "slid_loss": 0.6691, "step": 590, "time": 72.96 }, { "epoch": 0.46, "learning_rate": "4.9336e-05", "loss": 0.6784, "slid_loss": 0.6691, "step": 591, "time": 73.48 }, { "epoch": 0.46, "learning_rate": "4.9333e-05", "loss": 0.7109, "slid_loss": 0.6694, "step": 592, "time": 72.91 }, { "epoch": 0.46, "learning_rate": "4.9331e-05", "loss": 0.689, "slid_loss": 0.6696, "step": 593, "time": 70.31 }, { "epoch": 0.46, "learning_rate": "4.9328e-05", "loss": 0.7073, "slid_loss": 0.6699, "step": 594, "time": 71.4 }, { "epoch": 0.46, "learning_rate": "4.9326e-05", "loss": 0.7491, "slid_loss": 0.6707, "step": 595, "time": 71.51 }, { "epoch": 0.46, "learning_rate": "4.9323e-05", "loss": 0.7242, "slid_loss": 0.6716, "step": 596, "time": 71.53 }, { "epoch": 0.46, "learning_rate": "4.9321e-05", "loss": 0.7009, "slid_loss": 0.6722, "step": 597, "time": 71.51 }, { "epoch": 0.46, "learning_rate": "4.9318e-05", "loss": 0.7696, "slid_loss": 0.673, "step": 598, "time": 70.03 }, { "epoch": 0.46, "learning_rate": "4.9316e-05", "loss": 0.7755, "slid_loss": 0.6742, "step": 599, "time": 72.47 }, { "epoch": 0.46, "learning_rate": "4.9313e-05", "loss": 0.7794, "slid_loss": 0.6753, "step": 600, "time": 71.05 }, { "epoch": 0.46, "learning_rate": "4.9311e-05", "loss": 0.8094, "slid_loss": 0.6767, "step": 601, "time": 768.37 }, { "epoch": 0.47, "learning_rate": "4.9308e-05", "loss": 0.8094, "slid_loss": 0.678, "step": 602, "time": 71.46 }, { "epoch": 0.47, "learning_rate": "4.9306e-05", "loss": 0.8345, "slid_loss": 0.6794, "step": 603, "time": 72.65 }, { "epoch": 0.47, "learning_rate": "4.9303e-05", "loss": 0.7848, "slid_loss": 0.6806, "step": 604, "time": 72.03 }, { "epoch": 0.47, "learning_rate": "4.9300e-05", "loss": 0.7983, "slid_loss": 0.6823, "step": 605, "time": 72.14 }, { "epoch": 0.47, "learning_rate": "4.9298e-05", "loss": 0.8265, "slid_loss": 0.6838, "step": 606, "time": 72.27 }, { "epoch": 0.47, "learning_rate": "4.9295e-05", "loss": 0.7747, "slid_loss": 0.6848, "step": 607, "time": 72.01 }, { "epoch": 0.47, "learning_rate": "4.9293e-05", "loss": 0.7667, "slid_loss": 0.6859, "step": 608, "time": 71.87 }, { "epoch": 0.47, "learning_rate": "4.9290e-05", "loss": 0.7946, "slid_loss": 0.6873, "step": 609, "time": 71.84 }, { "epoch": 0.47, "learning_rate": "4.9287e-05", "loss": 0.7893, "slid_loss": 0.6884, "step": 610, "time": 72.61 }, { "epoch": 0.47, "learning_rate": "4.9285e-05", "loss": 0.7867, "slid_loss": 0.6897, "step": 611, "time": 70.9 }, { "epoch": 0.47, "learning_rate": "4.9282e-05", "loss": 0.7981, "slid_loss": 0.691, "step": 612, "time": 72.45 }, { "epoch": 0.47, "learning_rate": "4.9280e-05", "loss": 0.8427, "slid_loss": 0.6928, "step": 613, "time": 71.4 }, { "epoch": 0.48, "learning_rate": "4.9277e-05", "loss": 0.8141, "slid_loss": 0.694, "step": 614, "time": 71.19 }, { "epoch": 0.48, "learning_rate": "4.9274e-05", "loss": 0.7854, "slid_loss": 0.6954, "step": 615, "time": 70.92 }, { "epoch": 0.48, "learning_rate": "4.9272e-05", "loss": 0.7835, "slid_loss": 0.6964, "step": 616, "time": 72.04 }, { "epoch": 0.48, "learning_rate": "4.9269e-05", "loss": 0.8189, "slid_loss": 0.6983, "step": 617, "time": 73.02 }, { "epoch": 0.48, "learning_rate": "4.9267e-05", "loss": 0.7468, "slid_loss": 0.6996, "step": 618, "time": 71.63 }, { "epoch": 0.48, "learning_rate": "4.9264e-05", "loss": 0.7907, "slid_loss": 0.7007, "step": 619, "time": 71.31 }, { "epoch": 0.48, "learning_rate": "4.9261e-05", "loss": 0.7728, "slid_loss": 0.702, "step": 620, "time": 72.39 }, { "epoch": 0.48, "learning_rate": "4.9259e-05", "loss": 0.7932, "slid_loss": 0.7033, "step": 621, "time": 72.46 }, { "epoch": 0.48, "learning_rate": "4.9256e-05", "loss": 0.8048, "slid_loss": 0.7049, "step": 622, "time": 70.8 }, { "epoch": 0.48, "learning_rate": "4.9253e-05", "loss": 0.7887, "slid_loss": 0.7061, "step": 623, "time": 72.62 }, { "epoch": 0.48, "learning_rate": "4.9251e-05", "loss": 0.7833, "slid_loss": 0.7075, "step": 624, "time": 74.0 }, { "epoch": 0.48, "learning_rate": "4.9248e-05", "loss": 0.8234, "slid_loss": 0.7089, "step": 625, "time": 71.98 }, { "epoch": 0.48, "learning_rate": "4.9245e-05", "loss": 0.8107, "slid_loss": 0.7102, "step": 626, "time": 70.87 }, { "epoch": 0.49, "learning_rate": "4.9243e-05", "loss": 0.7774, "slid_loss": 0.7114, "step": 627, "time": 70.69 }, { "epoch": 0.49, "learning_rate": "4.9240e-05", "loss": 0.8027, "slid_loss": 0.713, "step": 628, "time": 71.19 }, { "epoch": 0.49, "learning_rate": "4.9237e-05", "loss": 0.7696, "slid_loss": 0.7139, "step": 629, "time": 70.69 }, { "epoch": 0.49, "learning_rate": "4.9235e-05", "loss": 0.7856, "slid_loss": 0.7151, "step": 630, "time": 73.04 }, { "epoch": 0.49, "learning_rate": "4.9232e-05", "loss": 0.7778, "slid_loss": 0.7164, "step": 631, "time": 82.89 }, { "epoch": 0.49, "learning_rate": "4.9229e-05", "loss": 0.7838, "slid_loss": 0.7178, "step": 632, "time": 157.58 }, { "epoch": 0.49, "learning_rate": "4.9226e-05", "loss": 0.7739, "slid_loss": 0.7192, "step": 633, "time": 159.23 }, { "epoch": 0.49, "learning_rate": "4.9224e-05", "loss": 0.7996, "slid_loss": 0.7207, "step": 634, "time": 157.83 }, { "epoch": 0.49, "learning_rate": "4.9221e-05", "loss": 0.7532, "slid_loss": 0.7215, "step": 635, "time": 189.08 }, { "epoch": 0.49, "learning_rate": "4.9218e-05", "loss": 0.8291, "slid_loss": 0.7232, "step": 636, "time": 194.55 }, { "epoch": 0.49, "learning_rate": "4.9216e-05", "loss": 0.7844, "slid_loss": 0.7239, "step": 637, "time": 146.64 }, { "epoch": 0.49, "learning_rate": "4.9213e-05", "loss": 0.7859, "slid_loss": 0.7249, "step": 638, "time": 107.11 }, { "epoch": 0.49, "learning_rate": "4.9210e-05", "loss": 0.8083, "slid_loss": 0.7258, "step": 639, "time": 85.22 }, { "epoch": 0.5, "learning_rate": "4.9207e-05", "loss": 0.822, "slid_loss": 0.7266, "step": 640, "time": 71.23 }, { "epoch": 0.5, "learning_rate": "4.9205e-05", "loss": 0.8182, "slid_loss": 0.7277, "step": 641, "time": 72.02 }, { "epoch": 0.5, "learning_rate": "4.9202e-05", "loss": 0.7608, "slid_loss": 0.7284, "step": 642, "time": 72.16 }, { "epoch": 0.5, "learning_rate": "4.9199e-05", "loss": 0.7797, "slid_loss": 0.7294, "step": 643, "time": 71.89 }, { "epoch": 0.5, "learning_rate": "4.9196e-05", "loss": 0.7567, "slid_loss": 0.7301, "step": 644, "time": 73.32 }, { "epoch": 0.5, "learning_rate": "4.9194e-05", "loss": 0.8264, "slid_loss": 0.7315, "step": 645, "time": 71.42 }, { "epoch": 0.5, "learning_rate": "4.9191e-05", "loss": 0.7952, "slid_loss": 0.733, "step": 646, "time": 72.3 }, { "epoch": 0.5, "learning_rate": "4.9188e-05", "loss": 0.7771, "slid_loss": 0.7343, "step": 647, "time": 72.29 }, { "epoch": 0.5, "learning_rate": "4.9185e-05", "loss": 0.7643, "slid_loss": 0.7351, "step": 648, "time": 74.68 }, { "epoch": 0.5, "learning_rate": "4.9183e-05", "loss": 0.7531, "slid_loss": 0.7358, "step": 649, "time": 69.48 }, { "epoch": 0.5, "learning_rate": "4.9180e-05", "loss": 0.8165, "slid_loss": 0.7373, "step": 650, "time": 70.86 }, { "epoch": 0.5, "learning_rate": "4.9177e-05", "loss": 0.7908, "slid_loss": 0.7388, "step": 651, "time": 70.47 }, { "epoch": 0.5, "learning_rate": "4.9174e-05", "loss": 0.7972, "slid_loss": 0.7401, "step": 652, "time": 71.12 }, { "epoch": 0.51, "learning_rate": "4.9171e-05", "loss": 0.8167, "slid_loss": 0.7413, "step": 653, "time": 70.88 }, { "epoch": 0.51, "learning_rate": "4.9169e-05", "loss": 0.8113, "slid_loss": 0.7429, "step": 654, "time": 71.26 }, { "epoch": 0.51, "learning_rate": "4.9166e-05", "loss": 0.7983, "slid_loss": 0.7443, "step": 655, "time": 72.56 }, { "epoch": 0.51, "learning_rate": "4.9163e-05", "loss": 0.8053, "slid_loss": 0.7457, "step": 656, "time": 72.01 }, { "epoch": 0.51, "learning_rate": "4.9160e-05", "loss": 0.7999, "slid_loss": 0.747, "step": 657, "time": 71.35 }, { "epoch": 0.51, "learning_rate": "4.9157e-05", "loss": 0.7292, "slid_loss": 0.7478, "step": 658, "time": 73.45 }, { "epoch": 0.51, "learning_rate": "4.9155e-05", "loss": 0.8035, "slid_loss": 0.7492, "step": 659, "time": 70.95 }, { "epoch": 0.51, "learning_rate": "4.9152e-05", "loss": 0.7786, "slid_loss": 0.75, "step": 660, "time": 73.43 }, { "epoch": 0.51, "learning_rate": "4.9149e-05", "loss": 0.8086, "slid_loss": 0.7515, "step": 661, "time": 71.59 }, { "epoch": 0.51, "learning_rate": "4.9146e-05", "loss": 0.803, "slid_loss": 0.7525, "step": 662, "time": 70.98 }, { "epoch": 0.51, "learning_rate": "4.9143e-05", "loss": 0.8064, "slid_loss": 0.7537, "step": 663, "time": 70.84 }, { "epoch": 0.51, "learning_rate": "4.9140e-05", "loss": 0.7988, "slid_loss": 0.7553, "step": 664, "time": 72.18 }, { "epoch": 0.51, "learning_rate": "4.9137e-05", "loss": 0.8198, "slid_loss": 0.7567, "step": 665, "time": 70.66 }, { "epoch": 0.52, "learning_rate": "4.9135e-05", "loss": 0.7652, "slid_loss": 0.7578, "step": 666, "time": 72.2 }, { "epoch": 0.52, "learning_rate": "4.9132e-05", "loss": 0.8427, "slid_loss": 0.7597, "step": 667, "time": 71.67 }, { "epoch": 0.52, "learning_rate": "4.9129e-05", "loss": 0.8279, "slid_loss": 0.7616, "step": 668, "time": 72.8 }, { "epoch": 0.52, "learning_rate": "4.9126e-05", "loss": 0.7983, "slid_loss": 0.7631, "step": 669, "time": 71.6 }, { "epoch": 0.52, "learning_rate": "4.9123e-05", "loss": 0.7799, "slid_loss": 0.7646, "step": 670, "time": 72.14 }, { "epoch": 0.52, "learning_rate": "4.9120e-05", "loss": 0.7862, "slid_loss": 0.766, "step": 671, "time": 71.78 }, { "epoch": 0.52, "learning_rate": "4.9117e-05", "loss": 0.8457, "slid_loss": 0.7676, "step": 672, "time": 72.48 }, { "epoch": 0.52, "learning_rate": "4.9114e-05", "loss": 0.7886, "slid_loss": 0.7689, "step": 673, "time": 70.75 }, { "epoch": 0.52, "learning_rate": "4.9112e-05", "loss": 0.8187, "slid_loss": 0.7707, "step": 674, "time": 72.38 }, { "epoch": 0.52, "learning_rate": "4.9109e-05", "loss": 0.7845, "slid_loss": 0.7718, "step": 675, "time": 71.74 }, { "epoch": 0.52, "learning_rate": "4.9106e-05", "loss": 0.7528, "slid_loss": 0.7729, "step": 676, "time": 71.82 }, { "epoch": 0.52, "learning_rate": "4.9103e-05", "loss": 0.7696, "slid_loss": 0.7738, "step": 677, "time": 72.84 }, { "epoch": 0.52, "learning_rate": "4.9100e-05", "loss": 0.8153, "slid_loss": 0.7753, "step": 678, "time": 72.47 }, { "epoch": 0.53, "learning_rate": "4.9097e-05", "loss": 0.7937, "slid_loss": 0.7763, "step": 679, "time": 71.35 }, { "epoch": 0.53, "learning_rate": "4.9094e-05", "loss": 0.7483, "slid_loss": 0.7768, "step": 680, "time": 73.21 }, { "epoch": 0.53, "learning_rate": "4.9091e-05", "loss": 0.8135, "slid_loss": 0.7783, "step": 681, "time": 73.13 }, { "epoch": 0.53, "learning_rate": "4.9088e-05", "loss": 0.7862, "slid_loss": 0.7792, "step": 682, "time": 71.9 }, { "epoch": 0.53, "learning_rate": "4.9085e-05", "loss": 0.7805, "slid_loss": 0.7803, "step": 683, "time": 73.06 }, { "epoch": 0.53, "learning_rate": "4.9082e-05", "loss": 0.7742, "slid_loss": 0.7807, "step": 684, "time": 70.83 }, { "epoch": 0.53, "learning_rate": "4.9079e-05", "loss": 0.7954, "slid_loss": 0.7822, "step": 685, "time": 72.15 }, { "epoch": 0.53, "learning_rate": "4.9077e-05", "loss": 0.8089, "slid_loss": 0.7835, "step": 686, "time": 71.27 }, { "epoch": 0.53, "learning_rate": "4.9074e-05", "loss": 0.7886, "slid_loss": 0.7843, "step": 687, "time": 73.07 }, { "epoch": 0.53, "learning_rate": "4.9071e-05", "loss": 0.752, "slid_loss": 0.7853, "step": 688, "time": 71.21 }, { "epoch": 0.53, "learning_rate": "4.9068e-05", "loss": 0.7769, "slid_loss": 0.7857, "step": 689, "time": 70.88 }, { "epoch": 0.53, "learning_rate": "4.9065e-05", "loss": 0.7738, "slid_loss": 0.7865, "step": 690, "time": 72.0 }, { "epoch": 0.53, "learning_rate": "4.9062e-05", "loss": 0.792, "slid_loss": 0.7877, "step": 691, "time": 71.4 }, { "epoch": 0.54, "learning_rate": "4.9059e-05", "loss": 0.8049, "slid_loss": 0.7886, "step": 692, "time": 72.11 }, { "epoch": 0.54, "learning_rate": "4.9056e-05", "loss": 0.7584, "slid_loss": 0.7893, "step": 693, "time": 71.35 }, { "epoch": 0.54, "learning_rate": "4.9053e-05", "loss": 0.792, "slid_loss": 0.7901, "step": 694, "time": 70.96 }, { "epoch": 0.54, "learning_rate": "4.9050e-05", "loss": 0.7671, "slid_loss": 0.7903, "step": 695, "time": 70.21 }, { "epoch": 0.54, "learning_rate": "4.9047e-05", "loss": 0.7462, "slid_loss": 0.7905, "step": 696, "time": 71.4 }, { "epoch": 0.54, "learning_rate": "4.9044e-05", "loss": 0.7968, "slid_loss": 0.7915, "step": 697, "time": 74.49 }, { "epoch": 0.54, "learning_rate": "4.9041e-05", "loss": 0.7553, "slid_loss": 0.7914, "step": 698, "time": 71.97 }, { "epoch": 0.54, "learning_rate": "4.9038e-05", "loss": 0.8202, "slid_loss": 0.7918, "step": 699, "time": 70.62 }, { "epoch": 0.54, "learning_rate": "4.9035e-05", "loss": 0.7651, "slid_loss": 0.7917, "step": 700, "time": 72.1 }, { "epoch": 0.54, "learning_rate": "4.9032e-05", "loss": 0.7581, "slid_loss": 0.7912, "step": 701, "time": 72.92 }, { "epoch": 0.54, "learning_rate": "4.9029e-05", "loss": 0.7551, "slid_loss": 0.7906, "step": 702, "time": 70.49 }, { "epoch": 0.54, "learning_rate": "4.9026e-05", "loss": 0.7456, "slid_loss": 0.7897, "step": 703, "time": 71.94 }, { "epoch": 0.54, "learning_rate": "4.9023e-05", "loss": 0.8093, "slid_loss": 0.79, "step": 704, "time": 71.05 }, { "epoch": 0.55, "learning_rate": "4.9020e-05", "loss": 0.7963, "slid_loss": 0.7899, "step": 705, "time": 72.0 }, { "epoch": 0.55, "learning_rate": "4.9017e-05", "loss": 0.7846, "slid_loss": 0.7895, "step": 706, "time": 71.26 }, { "epoch": 0.55, "learning_rate": "4.9014e-05", "loss": 0.7976, "slid_loss": 0.7898, "step": 707, "time": 72.03 }, { "epoch": 0.55, "learning_rate": "4.9010e-05", "loss": 0.7798, "slid_loss": 0.7899, "step": 708, "time": 70.93 }, { "epoch": 0.55, "learning_rate": "4.9007e-05", "loss": 0.764, "slid_loss": 0.7896, "step": 709, "time": 72.37 }, { "epoch": 0.55, "learning_rate": "4.9004e-05", "loss": 0.7823, "slid_loss": 0.7895, "step": 710, "time": 72.26 }, { "epoch": 0.55, "learning_rate": "4.9001e-05", "loss": 0.8224, "slid_loss": 0.7899, "step": 711, "time": 71.7 }, { "epoch": 0.55, "learning_rate": "4.8998e-05", "loss": 0.7806, "slid_loss": 0.7897, "step": 712, "time": 72.43 }, { "epoch": 0.55, "learning_rate": "4.8995e-05", "loss": 0.7936, "slid_loss": 0.7892, "step": 713, "time": 71.18 }, { "epoch": 0.55, "learning_rate": "4.8992e-05", "loss": 0.7845, "slid_loss": 0.7889, "step": 714, "time": 72.46 }, { "epoch": 0.55, "learning_rate": "4.8989e-05", "loss": 0.7644, "slid_loss": 0.7887, "step": 715, "time": 71.1 }, { "epoch": 0.55, "learning_rate": "4.8986e-05", "loss": 0.7891, "slid_loss": 0.7888, "step": 716, "time": 71.16 }, { "epoch": 0.55, "learning_rate": "4.8983e-05", "loss": 0.7846, "slid_loss": 0.7884, "step": 717, "time": 71.21 }, { "epoch": 0.56, "learning_rate": "4.8980e-05", "loss": 0.7976, "slid_loss": 0.7889, "step": 718, "time": 71.0 }, { "epoch": 0.56, "learning_rate": "4.8977e-05", "loss": 0.7858, "slid_loss": 0.7889, "step": 719, "time": 70.62 }, { "epoch": 0.56, "learning_rate": "4.8974e-05", "loss": 0.8026, "slid_loss": 0.7892, "step": 720, "time": 71.49 }, { "epoch": 0.56, "learning_rate": "4.8970e-05", "loss": 0.7931, "slid_loss": 0.7892, "step": 721, "time": 71.65 }, { "epoch": 0.56, "learning_rate": "4.8967e-05", "loss": 0.7801, "slid_loss": 0.7889, "step": 722, "time": 73.07 }, { "epoch": 0.56, "learning_rate": "4.8964e-05", "loss": 0.7801, "slid_loss": 0.7888, "step": 723, "time": 71.04 }, { "epoch": 0.56, "learning_rate": "4.8961e-05", "loss": 0.7841, "slid_loss": 0.7888, "step": 724, "time": 73.18 }, { "epoch": 0.56, "learning_rate": "4.8958e-05", "loss": 0.7715, "slid_loss": 0.7883, "step": 725, "time": 71.71 }, { "epoch": 0.56, "learning_rate": "4.8955e-05", "loss": 0.7863, "slid_loss": 0.7881, "step": 726, "time": 70.96 }, { "epoch": 0.56, "learning_rate": "4.8952e-05", "loss": 0.7978, "slid_loss": 0.7883, "step": 727, "time": 71.1 }, { "epoch": 0.56, "learning_rate": "4.8949e-05", "loss": 0.792, "slid_loss": 0.7882, "step": 728, "time": 71.1 }, { "epoch": 0.56, "learning_rate": "4.8945e-05", "loss": 0.8003, "slid_loss": 0.7885, "step": 729, "time": 71.59 }, { "epoch": 0.56, "learning_rate": "4.8942e-05", "loss": 0.7981, "slid_loss": 0.7886, "step": 730, "time": 72.22 }, { "epoch": 0.57, "learning_rate": "4.8939e-05", "loss": 0.7786, "slid_loss": 0.7886, "step": 731, "time": 71.0 }, { "epoch": 0.57, "learning_rate": "4.8936e-05", "loss": 0.7834, "slid_loss": 0.7886, "step": 732, "time": 73.23 }, { "epoch": 0.57, "learning_rate": "4.8933e-05", "loss": 0.7827, "slid_loss": 0.7887, "step": 733, "time": 73.77 }, { "epoch": 0.57, "learning_rate": "4.8930e-05", "loss": 0.7677, "slid_loss": 0.7884, "step": 734, "time": 72.08 }, { "epoch": 0.57, "learning_rate": "4.8926e-05", "loss": 0.7673, "slid_loss": 0.7885, "step": 735, "time": 72.5 }, { "epoch": 0.57, "learning_rate": "4.8923e-05", "loss": 0.7545, "slid_loss": 0.7878, "step": 736, "time": 71.33 }, { "epoch": 0.57, "learning_rate": "4.8920e-05", "loss": 0.8457, "slid_loss": 0.7884, "step": 737, "time": 71.64 }, { "epoch": 0.57, "learning_rate": "4.8917e-05", "loss": 0.7762, "slid_loss": 0.7883, "step": 738, "time": 71.48 }, { "epoch": 0.57, "learning_rate": "4.8914e-05", "loss": 0.819, "slid_loss": 0.7884, "step": 739, "time": 71.51 }, { "epoch": 0.57, "learning_rate": "4.8910e-05", "loss": 0.7402, "slid_loss": 0.7876, "step": 740, "time": 70.88 }, { "epoch": 0.57, "learning_rate": "4.8907e-05", "loss": 0.7848, "slid_loss": 0.7872, "step": 741, "time": 71.01 }, { "epoch": 0.57, "learning_rate": "4.8904e-05", "loss": 0.7519, "slid_loss": 0.7872, "step": 742, "time": 72.77 }, { "epoch": 0.57, "learning_rate": "4.8901e-05", "loss": 0.8008, "slid_loss": 0.7874, "step": 743, "time": 71.36 }, { "epoch": 0.58, "learning_rate": "4.8898e-05", "loss": 0.8273, "slid_loss": 0.7881, "step": 744, "time": 71.07 }, { "epoch": 0.58, "learning_rate": "4.8894e-05", "loss": 0.7789, "slid_loss": 0.7876, "step": 745, "time": 71.96 }, { "epoch": 0.58, "learning_rate": "4.8891e-05", "loss": 0.8031, "slid_loss": 0.7877, "step": 746, "time": 74.01 }, { "epoch": 0.58, "learning_rate": "4.8888e-05", "loss": 0.7791, "slid_loss": 0.7877, "step": 747, "time": 71.14 }, { "epoch": 0.58, "learning_rate": "4.8885e-05", "loss": 0.7934, "slid_loss": 0.788, "step": 748, "time": 71.99 }, { "epoch": 0.58, "learning_rate": "4.8881e-05", "loss": 0.8022, "slid_loss": 0.7885, "step": 749, "time": 70.07 }, { "epoch": 0.58, "learning_rate": "4.8878e-05", "loss": 0.7837, "slid_loss": 0.7881, "step": 750, "time": 71.09 }, { "epoch": 0.58, "learning_rate": "4.8875e-05", "loss": 0.7985, "slid_loss": 0.7882, "step": 751, "time": 71.99 }, { "epoch": 0.58, "learning_rate": "4.8872e-05", "loss": 0.843, "slid_loss": 0.7887, "step": 752, "time": 71.78 }, { "epoch": 0.58, "learning_rate": "4.8868e-05", "loss": 0.7614, "slid_loss": 0.7881, "step": 753, "time": 72.09 }, { "epoch": 0.58, "learning_rate": "4.8865e-05", "loss": 0.7681, "slid_loss": 0.7877, "step": 754, "time": 73.09 }, { "epoch": 0.58, "learning_rate": "4.8862e-05", "loss": 0.7675, "slid_loss": 0.7874, "step": 755, "time": 71.93 }, { "epoch": 0.58, "learning_rate": "4.8859e-05", "loss": 0.7688, "slid_loss": 0.787, "step": 756, "time": 70.7 }, { "epoch": 0.59, "learning_rate": "4.8855e-05", "loss": 0.7734, "slid_loss": 0.7868, "step": 757, "time": 72.03 }, { "epoch": 0.59, "learning_rate": "4.8852e-05", "loss": 0.7992, "slid_loss": 0.7875, "step": 758, "time": 73.43 }, { "epoch": 0.59, "learning_rate": "4.8849e-05", "loss": 0.8017, "slid_loss": 0.7874, "step": 759, "time": 72.71 }, { "epoch": 0.59, "learning_rate": "4.8846e-05", "loss": 0.7802, "slid_loss": 0.7875, "step": 760, "time": 72.45 }, { "epoch": 0.59, "learning_rate": "4.8842e-05", "loss": 0.7625, "slid_loss": 0.787, "step": 761, "time": 71.37 }, { "epoch": 0.59, "learning_rate": "4.8839e-05", "loss": 0.7898, "slid_loss": 0.7869, "step": 762, "time": 71.54 }, { "epoch": 0.59, "learning_rate": "4.8836e-05", "loss": 0.7824, "slid_loss": 0.7866, "step": 763, "time": 70.76 }, { "epoch": 0.59, "learning_rate": "4.8832e-05", "loss": 0.7924, "slid_loss": 0.7866, "step": 764, "time": 70.98 }, { "epoch": 0.59, "learning_rate": "4.8829e-05", "loss": 0.7729, "slid_loss": 0.7861, "step": 765, "time": 191.78 }, { "epoch": 0.59, "learning_rate": "4.8826e-05", "loss": 0.7441, "slid_loss": 0.7859, "step": 766, "time": 73.19 }, { "epoch": 0.59, "learning_rate": "4.8822e-05", "loss": 0.7754, "slid_loss": 0.7852, "step": 767, "time": 72.03 }, { "epoch": 0.59, "learning_rate": "4.8819e-05", "loss": 0.7956, "slid_loss": 0.7849, "step": 768, "time": 71.17 }, { "epoch": 0.59, "learning_rate": "4.8816e-05", "loss": 0.7952, "slid_loss": 0.7848, "step": 769, "time": 92.02 }, { "epoch": 0.6, "learning_rate": "4.8812e-05", "loss": 0.7918, "slid_loss": 0.785, "step": 770, "time": 72.87 }, { "epoch": 0.6, "learning_rate": "4.8809e-05", "loss": 0.7563, "slid_loss": 0.7847, "step": 771, "time": 71.5 }, { "epoch": 0.6, "learning_rate": "4.8806e-05", "loss": 0.7653, "slid_loss": 0.7839, "step": 772, "time": 72.63 }, { "epoch": 0.6, "learning_rate": "4.8802e-05", "loss": 0.7542, "slid_loss": 0.7835, "step": 773, "time": 72.48 }, { "epoch": 0.6, "learning_rate": "4.8799e-05", "loss": 0.7828, "slid_loss": 0.7832, "step": 774, "time": 71.18 }, { "epoch": 0.6, "learning_rate": "4.8796e-05", "loss": 0.7283, "slid_loss": 0.7826, "step": 775, "time": 71.79 }, { "epoch": 0.6, "learning_rate": "4.8792e-05", "loss": 0.7738, "slid_loss": 0.7828, "step": 776, "time": 70.83 }, { "epoch": 0.6, "learning_rate": "4.8789e-05", "loss": 0.7615, "slid_loss": 0.7827, "step": 777, "time": 71.73 }, { "epoch": 0.6, "learning_rate": "4.8786e-05", "loss": 0.8156, "slid_loss": 0.7827, "step": 778, "time": 72.23 }, { "epoch": 0.6, "learning_rate": "4.8782e-05", "loss": 0.7542, "slid_loss": 0.7823, "step": 779, "time": 71.83 }, { "epoch": 0.6, "learning_rate": "4.8779e-05", "loss": 0.7627, "slid_loss": 0.7825, "step": 780, "time": 72.27 }, { "epoch": 0.6, "learning_rate": "4.8775e-05", "loss": 0.7557, "slid_loss": 0.7819, "step": 781, "time": 72.36 }, { "epoch": 0.61, "learning_rate": "4.8772e-05", "loss": 0.7753, "slid_loss": 0.7818, "step": 782, "time": 71.43 }, { "epoch": 0.61, "learning_rate": "4.8769e-05", "loss": 0.7827, "slid_loss": 0.7818, "step": 783, "time": 71.1 }, { "epoch": 0.61, "learning_rate": "4.8765e-05", "loss": 0.7482, "slid_loss": 0.7816, "step": 784, "time": 71.87 }, { "epoch": 0.61, "learning_rate": "4.8762e-05", "loss": 0.7699, "slid_loss": 0.7813, "step": 785, "time": 71.71 }, { "epoch": 0.61, "learning_rate": "4.8759e-05", "loss": 0.7794, "slid_loss": 0.781, "step": 786, "time": 71.51 }, { "epoch": 0.61, "learning_rate": "4.8755e-05", "loss": 0.7926, "slid_loss": 0.781, "step": 787, "time": 71.21 }, { "epoch": 0.61, "learning_rate": "4.8752e-05", "loss": 0.816, "slid_loss": 0.7817, "step": 788, "time": 70.23 }, { "epoch": 0.61, "learning_rate": "4.8748e-05", "loss": 0.8053, "slid_loss": 0.782, "step": 789, "time": 95.89 }, { "epoch": 0.61, "learning_rate": "4.8745e-05", "loss": 0.7515, "slid_loss": 0.7817, "step": 790, "time": 72.44 }, { "epoch": 0.61, "learning_rate": "4.8741e-05", "loss": 0.7589, "slid_loss": 0.7814, "step": 791, "time": 125.7 }, { "epoch": 0.61, "learning_rate": "4.8738e-05", "loss": 0.7676, "slid_loss": 0.781, "step": 792, "time": 169.92 }, { "epoch": 0.61, "learning_rate": "4.8735e-05", "loss": 0.8035, "slid_loss": 0.7815, "step": 793, "time": 164.43 }, { "epoch": 0.61, "learning_rate": "4.8731e-05", "loss": 0.755, "slid_loss": 0.7811, "step": 794, "time": 195.46 }, { "epoch": 0.62, "learning_rate": "4.8728e-05", "loss": 0.7484, "slid_loss": 0.7809, "step": 795, "time": 187.26 }, { "epoch": 0.62, "learning_rate": "4.8724e-05", "loss": 0.8223, "slid_loss": 0.7817, "step": 796, "time": 163.66 }, { "epoch": 0.62, "learning_rate": "4.8721e-05", "loss": 0.7675, "slid_loss": 0.7814, "step": 797, "time": 134.41 }, { "epoch": 0.62, "learning_rate": "4.8717e-05", "loss": 0.7782, "slid_loss": 0.7816, "step": 798, "time": 95.5 }, { "epoch": 0.62, "learning_rate": "4.8714e-05", "loss": 0.7607, "slid_loss": 0.781, "step": 799, "time": 73.16 }, { "epoch": 0.62, "learning_rate": "4.8710e-05", "loss": 0.764, "slid_loss": 0.781, "step": 800, "time": 83.52 }, { "epoch": 0.62, "learning_rate": "4.8707e-05", "loss": 0.7913, "slid_loss": 0.7814, "step": 801, "time": 877.39 }, { "epoch": 0.62, "learning_rate": "4.8703e-05", "loss": 0.7822, "slid_loss": 0.7816, "step": 802, "time": 71.48 }, { "epoch": 0.62, "learning_rate": "4.8700e-05", "loss": 0.7692, "slid_loss": 0.7819, "step": 803, "time": 74.01 }, { "epoch": 0.62, "learning_rate": "4.8696e-05", "loss": 0.766, "slid_loss": 0.7814, "step": 804, "time": 70.85 }, { "epoch": 0.62, "learning_rate": "4.8693e-05", "loss": 0.7842, "slid_loss": 0.7813, "step": 805, "time": 71.29 }, { "epoch": 0.62, "learning_rate": "4.8689e-05", "loss": 0.8008, "slid_loss": 0.7815, "step": 806, "time": 72.34 }, { "epoch": 0.62, "learning_rate": "4.8686e-05", "loss": 0.7503, "slid_loss": 0.781, "step": 807, "time": 70.6 }, { "epoch": 0.63, "learning_rate": "4.8682e-05", "loss": 0.7852, "slid_loss": 0.7811, "step": 808, "time": 70.84 }, { "epoch": 0.63, "learning_rate": "4.8679e-05", "loss": 0.7869, "slid_loss": 0.7813, "step": 809, "time": 72.17 }, { "epoch": 0.63, "learning_rate": "4.8675e-05", "loss": 0.7776, "slid_loss": 0.7812, "step": 810, "time": 71.1 }, { "epoch": 0.63, "learning_rate": "4.8672e-05", "loss": 0.7828, "slid_loss": 0.7808, "step": 811, "time": 71.93 }, { "epoch": 0.63, "learning_rate": "4.8668e-05", "loss": 0.7912, "slid_loss": 0.781, "step": 812, "time": 73.74 }, { "epoch": 0.63, "learning_rate": "4.8665e-05", "loss": 0.7681, "slid_loss": 0.7807, "step": 813, "time": 72.24 }, { "epoch": 0.63, "learning_rate": "4.8661e-05", "loss": 0.7804, "slid_loss": 0.7807, "step": 814, "time": 72.32 }, { "epoch": 0.63, "learning_rate": "4.8658e-05", "loss": 0.7769, "slid_loss": 0.7808, "step": 815, "time": 72.13 }, { "epoch": 0.63, "learning_rate": "4.8654e-05", "loss": 0.8341, "slid_loss": 0.7812, "step": 816, "time": 72.3 }, { "epoch": 0.63, "learning_rate": "4.8651e-05", "loss": 0.8261, "slid_loss": 0.7816, "step": 817, "time": 70.66 }, { "epoch": 0.63, "learning_rate": "4.8647e-05", "loss": 0.8167, "slid_loss": 0.7818, "step": 818, "time": 71.53 }, { "epoch": 0.63, "learning_rate": "4.8644e-05", "loss": 0.7613, "slid_loss": 0.7816, "step": 819, "time": 72.13 }, { "epoch": 0.63, "learning_rate": "4.8640e-05", "loss": 0.8095, "slid_loss": 0.7817, "step": 820, "time": 72.54 }, { "epoch": 0.64, "learning_rate": "4.8636e-05", "loss": 0.782, "slid_loss": 0.7816, "step": 821, "time": 71.25 }, { "epoch": 0.64, "learning_rate": "4.8633e-05", "loss": 0.7744, "slid_loss": 0.7815, "step": 822, "time": 72.17 }, { "epoch": 0.64, "learning_rate": "4.8629e-05", "loss": 0.8127, "slid_loss": 0.7818, "step": 823, "time": 71.13 }, { "epoch": 0.64, "learning_rate": "4.8626e-05", "loss": 0.7887, "slid_loss": 0.7819, "step": 824, "time": 72.62 }, { "epoch": 0.64, "learning_rate": "4.8622e-05", "loss": 0.7614, "slid_loss": 0.7818, "step": 825, "time": 70.59 }, { "epoch": 0.64, "learning_rate": "4.8619e-05", "loss": 0.7766, "slid_loss": 0.7817, "step": 826, "time": 71.74 }, { "epoch": 0.64, "learning_rate": "4.8615e-05", "loss": 0.7773, "slid_loss": 0.7815, "step": 827, "time": 74.18 }, { "epoch": 0.64, "learning_rate": "4.8611e-05", "loss": 0.7588, "slid_loss": 0.7811, "step": 828, "time": 72.84 }, { "epoch": 0.64, "learning_rate": "4.8608e-05", "loss": 0.7879, "slid_loss": 0.781, "step": 829, "time": 72.73 }, { "epoch": 0.64, "learning_rate": "4.8604e-05", "loss": 0.8008, "slid_loss": 0.781, "step": 830, "time": 73.33 }, { "epoch": 0.64, "learning_rate": "4.8601e-05", "loss": 0.7627, "slid_loss": 0.7809, "step": 831, "time": 71.48 }, { "epoch": 0.64, "learning_rate": "4.8597e-05", "loss": 0.7906, "slid_loss": 0.7809, "step": 832, "time": 70.73 }, { "epoch": 0.64, "learning_rate": "4.8593e-05", "loss": 0.8046, "slid_loss": 0.7812, "step": 833, "time": 70.62 }, { "epoch": 0.65, "learning_rate": "4.8590e-05", "loss": 0.7941, "slid_loss": 0.7814, "step": 834, "time": 72.87 }, { "epoch": 0.65, "learning_rate": "4.8586e-05", "loss": 0.7444, "slid_loss": 0.7812, "step": 835, "time": 70.65 }, { "epoch": 0.65, "learning_rate": "4.8582e-05", "loss": 0.7612, "slid_loss": 0.7813, "step": 836, "time": 72.44 }, { "epoch": 0.65, "learning_rate": "4.8579e-05", "loss": 0.7672, "slid_loss": 0.7805, "step": 837, "time": 72.64 }, { "epoch": 0.65, "learning_rate": "4.8575e-05", "loss": 0.7733, "slid_loss": 0.7805, "step": 838, "time": 73.0 }, { "epoch": 0.65, "learning_rate": "4.8572e-05", "loss": 0.784, "slid_loss": 0.7801, "step": 839, "time": 71.62 }, { "epoch": 0.65, "learning_rate": "4.8568e-05", "loss": 0.7618, "slid_loss": 0.7803, "step": 840, "time": 73.05 }, { "epoch": 0.65, "learning_rate": "4.8564e-05", "loss": 0.7871, "slid_loss": 0.7803, "step": 841, "time": 71.68 }, { "epoch": 0.65, "learning_rate": "4.8561e-05", "loss": 0.8231, "slid_loss": 0.7811, "step": 842, "time": 72.12 }, { "epoch": 0.65, "learning_rate": "4.8557e-05", "loss": 0.75, "slid_loss": 0.7805, "step": 843, "time": 71.91 }, { "epoch": 0.65, "learning_rate": "4.8553e-05", "loss": 0.7787, "slid_loss": 0.7801, "step": 844, "time": 71.2 }, { "epoch": 0.65, "learning_rate": "4.8550e-05", "loss": 0.7673, "slid_loss": 0.7799, "step": 845, "time": 73.49 }, { "epoch": 0.65, "learning_rate": "4.8546e-05", "loss": 0.7141, "slid_loss": 0.7791, "step": 846, "time": 71.72 }, { "epoch": 0.66, "learning_rate": "4.8542e-05", "loss": 0.7756, "slid_loss": 0.779, "step": 847, "time": 72.3 }, { "epoch": 0.66, "learning_rate": "4.8539e-05", "loss": 0.8125, "slid_loss": 0.7792, "step": 848, "time": 72.53 }, { "epoch": 0.66, "learning_rate": "4.8535e-05", "loss": 0.7982, "slid_loss": 0.7792, "step": 849, "time": 71.58 }, { "epoch": 0.66, "learning_rate": "4.8531e-05", "loss": 0.787, "slid_loss": 0.7792, "step": 850, "time": 72.37 }, { "epoch": 0.66, "learning_rate": "4.8527e-05", "loss": 0.7518, "slid_loss": 0.7787, "step": 851, "time": 70.96 }, { "epoch": 0.66, "learning_rate": "4.8524e-05", "loss": 0.7877, "slid_loss": 0.7782, "step": 852, "time": 72.05 }, { "epoch": 0.66, "learning_rate": "4.8520e-05", "loss": 0.7602, "slid_loss": 0.7782, "step": 853, "time": 72.11 }, { "epoch": 0.66, "learning_rate": "4.8516e-05", "loss": 0.7796, "slid_loss": 0.7783, "step": 854, "time": 70.12 }, { "epoch": 0.66, "learning_rate": "4.8513e-05", "loss": 0.7982, "slid_loss": 0.7786, "step": 855, "time": 71.2 }, { "epoch": 0.66, "learning_rate": "4.8509e-05", "loss": 0.7702, "slid_loss": 0.7786, "step": 856, "time": 72.5 }, { "epoch": 0.66, "learning_rate": "4.8505e-05", "loss": 0.773, "slid_loss": 0.7786, "step": 857, "time": 71.65 }, { "epoch": 0.66, "learning_rate": "4.8501e-05", "loss": 0.793, "slid_loss": 0.7785, "step": 858, "time": 70.64 }, { "epoch": 0.66, "learning_rate": "4.8498e-05", "loss": 0.7659, "slid_loss": 0.7782, "step": 859, "time": 71.84 }, { "epoch": 0.67, "learning_rate": "4.8494e-05", "loss": 0.7812, "slid_loss": 0.7782, "step": 860, "time": 71.57 }, { "epoch": 0.67, "learning_rate": "4.8490e-05", "loss": 0.8058, "slid_loss": 0.7786, "step": 861, "time": 71.82 }, { "epoch": 0.67, "learning_rate": "4.8487e-05", "loss": 0.7852, "slid_loss": 0.7786, "step": 862, "time": 71.98 }, { "epoch": 0.67, "learning_rate": "4.8483e-05", "loss": 0.7477, "slid_loss": 0.7782, "step": 863, "time": 72.6 }, { "epoch": 0.67, "learning_rate": "4.8479e-05", "loss": 0.7745, "slid_loss": 0.7781, "step": 864, "time": 71.92 }, { "epoch": 0.67, "learning_rate": "4.8475e-05", "loss": 0.7925, "slid_loss": 0.7783, "step": 865, "time": 70.98 }, { "epoch": 0.67, "learning_rate": "4.8471e-05", "loss": 0.7708, "slid_loss": 0.7785, "step": 866, "time": 71.95 }, { "epoch": 0.67, "learning_rate": "4.8468e-05", "loss": 0.7698, "slid_loss": 0.7785, "step": 867, "time": 71.65 }, { "epoch": 0.67, "learning_rate": "4.8464e-05", "loss": 0.7491, "slid_loss": 0.778, "step": 868, "time": 72.23 }, { "epoch": 0.67, "learning_rate": "4.8460e-05", "loss": 0.7544, "slid_loss": 0.7776, "step": 869, "time": 71.62 }, { "epoch": 0.67, "learning_rate": "4.8456e-05", "loss": 0.7676, "slid_loss": 0.7773, "step": 870, "time": 70.91 }, { "epoch": 0.67, "learning_rate": "4.8453e-05", "loss": 0.758, "slid_loss": 0.7774, "step": 871, "time": 71.22 }, { "epoch": 0.67, "learning_rate": "4.8449e-05", "loss": 0.7493, "slid_loss": 0.7772, "step": 872, "time": 71.29 }, { "epoch": 0.68, "learning_rate": "4.8445e-05", "loss": 0.7794, "slid_loss": 0.7775, "step": 873, "time": 72.53 }, { "epoch": 0.68, "learning_rate": "4.8441e-05", "loss": 0.7899, "slid_loss": 0.7775, "step": 874, "time": 75.33 }, { "epoch": 0.68, "learning_rate": "4.8437e-05", "loss": 0.7715, "slid_loss": 0.778, "step": 875, "time": 74.17 }, { "epoch": 0.68, "learning_rate": "4.8434e-05", "loss": 0.7747, "slid_loss": 0.778, "step": 876, "time": 70.88 }, { "epoch": 0.68, "learning_rate": "4.8430e-05", "loss": 0.7894, "slid_loss": 0.7782, "step": 877, "time": 72.53 }, { "epoch": 0.68, "learning_rate": "4.8426e-05", "loss": 0.7824, "slid_loss": 0.7779, "step": 878, "time": 71.84 }, { "epoch": 0.68, "learning_rate": "4.8422e-05", "loss": 0.7659, "slid_loss": 0.778, "step": 879, "time": 71.85 }, { "epoch": 0.68, "learning_rate": "4.8418e-05", "loss": 0.7547, "slid_loss": 0.778, "step": 880, "time": 71.84 }, { "epoch": 0.68, "learning_rate": "4.8414e-05", "loss": 0.846, "slid_loss": 0.7789, "step": 881, "time": 70.6 }, { "epoch": 0.68, "learning_rate": "4.8411e-05", "loss": 0.7859, "slid_loss": 0.779, "step": 882, "time": 72.06 }, { "epoch": 0.68, "learning_rate": "4.8407e-05", "loss": 0.7915, "slid_loss": 0.7791, "step": 883, "time": 71.38 }, { "epoch": 0.68, "learning_rate": "4.8403e-05", "loss": 0.7693, "slid_loss": 0.7793, "step": 884, "time": 72.34 }, { "epoch": 0.68, "learning_rate": "4.8399e-05", "loss": 0.741, "slid_loss": 0.779, "step": 885, "time": 71.84 }, { "epoch": 0.69, "learning_rate": "4.8395e-05", "loss": 0.7878, "slid_loss": 0.7791, "step": 886, "time": 70.8 }, { "epoch": 0.69, "learning_rate": "4.8391e-05", "loss": 0.7543, "slid_loss": 0.7787, "step": 887, "time": 72.46 }, { "epoch": 0.69, "learning_rate": "4.8388e-05", "loss": 0.7711, "slid_loss": 0.7782, "step": 888, "time": 70.76 }, { "epoch": 0.69, "learning_rate": "4.8384e-05", "loss": 0.7449, "slid_loss": 0.7776, "step": 889, "time": 70.23 }, { "epoch": 0.69, "learning_rate": "4.8380e-05", "loss": 0.7817, "slid_loss": 0.7779, "step": 890, "time": 72.75 }, { "epoch": 0.69, "learning_rate": "4.8376e-05", "loss": 0.7751, "slid_loss": 0.7781, "step": 891, "time": 71.42 }, { "epoch": 0.69, "learning_rate": "4.8372e-05", "loss": 0.7807, "slid_loss": 0.7782, "step": 892, "time": 71.79 }, { "epoch": 0.69, "learning_rate": "4.8368e-05", "loss": 0.8036, "slid_loss": 0.7782, "step": 893, "time": 71.08 }, { "epoch": 0.69, "learning_rate": "4.8364e-05", "loss": 0.7629, "slid_loss": 0.7783, "step": 894, "time": 70.58 }, { "epoch": 0.69, "learning_rate": "4.8360e-05", "loss": 0.749, "slid_loss": 0.7783, "step": 895, "time": 70.82 }, { "epoch": 0.69, "learning_rate": "4.8356e-05", "loss": 0.7861, "slid_loss": 0.7779, "step": 896, "time": 72.21 }, { "epoch": 0.69, "learning_rate": "4.8353e-05", "loss": 0.772, "slid_loss": 0.778, "step": 897, "time": 70.57 }, { "epoch": 0.69, "learning_rate": "4.8349e-05", "loss": 0.773, "slid_loss": 0.7779, "step": 898, "time": 69.78 }, { "epoch": 0.7, "learning_rate": "4.8345e-05", "loss": 0.7211, "slid_loss": 0.7775, "step": 899, "time": 70.84 }, { "epoch": 0.7, "learning_rate": "4.8341e-05", "loss": 0.7543, "slid_loss": 0.7774, "step": 900, "time": 71.4 }, { "epoch": 0.7, "learning_rate": "4.8337e-05", "loss": 0.7393, "slid_loss": 0.7769, "step": 901, "time": 71.33 }, { "epoch": 0.7, "learning_rate": "4.8333e-05", "loss": 0.7674, "slid_loss": 0.7768, "step": 902, "time": 72.81 }, { "epoch": 0.7, "learning_rate": "4.8329e-05", "loss": 0.8066, "slid_loss": 0.7771, "step": 903, "time": 71.91 }, { "epoch": 0.7, "learning_rate": "4.8325e-05", "loss": 0.7862, "slid_loss": 0.7773, "step": 904, "time": 71.8 }, { "epoch": 0.7, "learning_rate": "4.8321e-05", "loss": 0.796, "slid_loss": 0.7775, "step": 905, "time": 71.38 }, { "epoch": 0.7, "learning_rate": "4.8317e-05", "loss": 0.7658, "slid_loss": 0.7771, "step": 906, "time": 72.45 }, { "epoch": 0.7, "learning_rate": "4.8313e-05", "loss": 0.8267, "slid_loss": 0.7779, "step": 907, "time": 72.69 }, { "epoch": 0.7, "learning_rate": "4.8309e-05", "loss": 0.7946, "slid_loss": 0.778, "step": 908, "time": 71.63 }, { "epoch": 0.7, "learning_rate": "4.8305e-05", "loss": 0.7694, "slid_loss": 0.7778, "step": 909, "time": 72.5 }, { "epoch": 0.7, "learning_rate": "4.8301e-05", "loss": 0.7598, "slid_loss": 0.7776, "step": 910, "time": 71.91 }, { "epoch": 0.7, "learning_rate": "4.8297e-05", "loss": 0.7948, "slid_loss": 0.7777, "step": 911, "time": 71.39 }, { "epoch": 0.71, "learning_rate": "4.8293e-05", "loss": 0.7877, "slid_loss": 0.7777, "step": 912, "time": 70.93 }, { "epoch": 0.71, "learning_rate": "4.8290e-05", "loss": 0.7618, "slid_loss": 0.7776, "step": 913, "time": 72.94 }, { "epoch": 0.71, "learning_rate": "4.8286e-05", "loss": 0.7489, "slid_loss": 0.7773, "step": 914, "time": 71.45 }, { "epoch": 0.71, "learning_rate": "4.8282e-05", "loss": 0.7815, "slid_loss": 0.7774, "step": 915, "time": 72.5 }, { "epoch": 0.71, "learning_rate": "4.8278e-05", "loss": 0.7642, "slid_loss": 0.7767, "step": 916, "time": 70.91 }, { "epoch": 0.71, "learning_rate": "4.8274e-05", "loss": 0.7696, "slid_loss": 0.7761, "step": 917, "time": 70.55 }, { "epoch": 0.71, "learning_rate": "4.8270e-05", "loss": 0.7763, "slid_loss": 0.7757, "step": 918, "time": 71.69 }, { "epoch": 0.71, "learning_rate": "4.8266e-05", "loss": 0.7633, "slid_loss": 0.7757, "step": 919, "time": 73.3 }, { "epoch": 0.71, "learning_rate": "4.8262e-05", "loss": 0.7668, "slid_loss": 0.7753, "step": 920, "time": 73.48 }, { "epoch": 0.71, "learning_rate": "4.8258e-05", "loss": 0.7679, "slid_loss": 0.7752, "step": 921, "time": 69.55 }, { "epoch": 0.71, "learning_rate": "4.8254e-05", "loss": 0.7982, "slid_loss": 0.7754, "step": 922, "time": 72.23 }, { "epoch": 0.71, "learning_rate": "4.8250e-05", "loss": 0.77, "slid_loss": 0.775, "step": 923, "time": 71.76 }, { "epoch": 0.71, "learning_rate": "4.8246e-05", "loss": 0.8204, "slid_loss": 0.7753, "step": 924, "time": 71.05 }, { "epoch": 0.72, "learning_rate": "4.8241e-05", "loss": 0.7995, "slid_loss": 0.7757, "step": 925, "time": 72.27 }, { "epoch": 0.72, "learning_rate": "4.8237e-05", "loss": 0.7758, "slid_loss": 0.7757, "step": 926, "time": 70.97 }, { "epoch": 0.72, "learning_rate": "4.8233e-05", "loss": 0.7775, "slid_loss": 0.7757, "step": 927, "time": 72.07 }, { "epoch": 0.72, "learning_rate": "4.8229e-05", "loss": 0.7867, "slid_loss": 0.7759, "step": 928, "time": 71.98 }, { "epoch": 0.72, "learning_rate": "4.8225e-05", "loss": 0.7608, "slid_loss": 0.7757, "step": 929, "time": 72.76 }, { "epoch": 0.72, "learning_rate": "4.8221e-05", "loss": 0.7864, "slid_loss": 0.7755, "step": 930, "time": 71.43 }, { "epoch": 0.72, "learning_rate": "4.8217e-05", "loss": 0.7664, "slid_loss": 0.7756, "step": 931, "time": 71.98 }, { "epoch": 0.72, "learning_rate": "4.8213e-05", "loss": 0.7681, "slid_loss": 0.7753, "step": 932, "time": 72.51 }, { "epoch": 0.72, "learning_rate": "4.8209e-05", "loss": 0.7845, "slid_loss": 0.7751, "step": 933, "time": 72.46 }, { "epoch": 0.72, "learning_rate": "4.8205e-05", "loss": 0.7588, "slid_loss": 0.7748, "step": 934, "time": 70.84 }, { "epoch": 0.72, "learning_rate": "4.8201e-05", "loss": 0.7709, "slid_loss": 0.775, "step": 935, "time": 71.78 }, { "epoch": 0.72, "learning_rate": "4.8197e-05", "loss": 0.7765, "slid_loss": 0.7752, "step": 936, "time": 72.04 }, { "epoch": 0.72, "learning_rate": "4.8193e-05", "loss": 0.7656, "slid_loss": 0.7752, "step": 937, "time": 71.51 }, { "epoch": 0.73, "learning_rate": "4.8189e-05", "loss": 0.7897, "slid_loss": 0.7753, "step": 938, "time": 73.3 }, { "epoch": 0.73, "learning_rate": "4.8185e-05", "loss": 0.7681, "slid_loss": 0.7752, "step": 939, "time": 71.91 }, { "epoch": 0.73, "learning_rate": "4.8181e-05", "loss": 0.7524, "slid_loss": 0.7751, "step": 940, "time": 72.71 }, { "epoch": 0.73, "learning_rate": "4.8176e-05", "loss": 0.8141, "slid_loss": 0.7754, "step": 941, "time": 71.26 }, { "epoch": 0.73, "learning_rate": "4.8172e-05", "loss": 0.7515, "slid_loss": 0.7746, "step": 942, "time": 71.64 }, { "epoch": 0.73, "learning_rate": "4.8168e-05", "loss": 0.7451, "slid_loss": 0.7746, "step": 943, "time": 71.81 }, { "epoch": 0.73, "learning_rate": "4.8164e-05", "loss": 0.7874, "slid_loss": 0.7747, "step": 944, "time": 70.95 }, { "epoch": 0.73, "learning_rate": "4.8160e-05", "loss": 0.7505, "slid_loss": 0.7745, "step": 945, "time": 73.53 }, { "epoch": 0.73, "learning_rate": "4.8156e-05", "loss": 0.7778, "slid_loss": 0.7752, "step": 946, "time": 72.55 }, { "epoch": 0.73, "learning_rate": "4.8152e-05", "loss": 0.7685, "slid_loss": 0.7751, "step": 947, "time": 71.64 }, { "epoch": 0.73, "learning_rate": "4.8148e-05", "loss": 0.7768, "slid_loss": 0.7747, "step": 948, "time": 84.15 }, { "epoch": 0.73, "learning_rate": "4.8144e-05", "loss": 0.7592, "slid_loss": 0.7743, "step": 949, "time": 106.78 }, { "epoch": 0.74, "learning_rate": "4.8139e-05", "loss": 0.7943, "slid_loss": 0.7744, "step": 950, "time": 133.2 }, { "epoch": 0.74, "learning_rate": "4.8135e-05", "loss": 0.7515, "slid_loss": 0.7744, "step": 951, "time": 191.31 }, { "epoch": 0.74, "learning_rate": "4.8131e-05", "loss": 0.8087, "slid_loss": 0.7746, "step": 952, "time": 154.97 }, { "epoch": 0.74, "learning_rate": "4.8127e-05", "loss": 0.7504, "slid_loss": 0.7745, "step": 953, "time": 212.65 }, { "epoch": 0.74, "learning_rate": "4.8123e-05", "loss": 0.7806, "slid_loss": 0.7745, "step": 954, "time": 207.87 }, { "epoch": 0.74, "learning_rate": "4.8119e-05", "loss": 0.7601, "slid_loss": 0.7741, "step": 955, "time": 154.13 }, { "epoch": 0.74, "learning_rate": "4.8114e-05", "loss": 0.796, "slid_loss": 0.7744, "step": 956, "time": 150.13 }, { "epoch": 0.74, "learning_rate": "4.8110e-05", "loss": 0.7816, "slid_loss": 0.7745, "step": 957, "time": 97.79 }, { "epoch": 0.74, "learning_rate": "4.8106e-05", "loss": 0.7521, "slid_loss": 0.7741, "step": 958, "time": 71.41 }, { "epoch": 0.74, "learning_rate": "4.8102e-05", "loss": 0.7774, "slid_loss": 0.7742, "step": 959, "time": 84.47 }, { "epoch": 0.74, "learning_rate": "4.8098e-05", "loss": 0.7834, "slid_loss": 0.7742, "step": 960, "time": 71.55 }, { "epoch": 0.74, "learning_rate": "4.8094e-05", "loss": 0.7474, "slid_loss": 0.7736, "step": 961, "time": 70.89 }, { "epoch": 0.74, "learning_rate": "4.8089e-05", "loss": 0.7681, "slid_loss": 0.7735, "step": 962, "time": 72.1 }, { "epoch": 0.75, "learning_rate": "4.8085e-05", "loss": 0.774, "slid_loss": 0.7737, "step": 963, "time": 71.49 }, { "epoch": 0.75, "learning_rate": "4.8081e-05", "loss": 0.7579, "slid_loss": 0.7736, "step": 964, "time": 72.04 }, { "epoch": 0.75, "learning_rate": "4.8077e-05", "loss": 0.7616, "slid_loss": 0.7732, "step": 965, "time": 72.4 }, { "epoch": 0.75, "learning_rate": "4.8073e-05", "loss": 0.7766, "slid_loss": 0.7733, "step": 966, "time": 75.8 }, { "epoch": 0.75, "learning_rate": "4.8068e-05", "loss": 0.7937, "slid_loss": 0.7735, "step": 967, "time": 71.03 }, { "epoch": 0.75, "learning_rate": "4.8064e-05", "loss": 0.7741, "slid_loss": 0.7738, "step": 968, "time": 70.55 }, { "epoch": 0.75, "learning_rate": "4.8060e-05", "loss": 0.7972, "slid_loss": 0.7742, "step": 969, "time": 70.85 }, { "epoch": 0.75, "learning_rate": "4.8056e-05", "loss": 0.7376, "slid_loss": 0.7739, "step": 970, "time": 70.8 }, { "epoch": 0.75, "learning_rate": "4.8052e-05", "loss": 0.7718, "slid_loss": 0.7741, "step": 971, "time": 72.83 }, { "epoch": 0.75, "learning_rate": "4.8047e-05", "loss": 0.7904, "slid_loss": 0.7745, "step": 972, "time": 70.96 }, { "epoch": 0.75, "learning_rate": "4.8043e-05", "loss": 0.8268, "slid_loss": 0.7749, "step": 973, "time": 70.6 }, { "epoch": 0.75, "learning_rate": "4.8039e-05", "loss": 0.7701, "slid_loss": 0.7747, "step": 974, "time": 71.83 }, { "epoch": 0.75, "learning_rate": "4.8035e-05", "loss": 0.7763, "slid_loss": 0.7748, "step": 975, "time": 72.55 }, { "epoch": 0.76, "learning_rate": "4.8030e-05", "loss": 0.712, "slid_loss": 0.7742, "step": 976, "time": 73.31 }, { "epoch": 0.76, "learning_rate": "4.8026e-05", "loss": 0.7778, "slid_loss": 0.7741, "step": 977, "time": 72.11 }, { "epoch": 0.76, "learning_rate": "4.8022e-05", "loss": 0.7551, "slid_loss": 0.7738, "step": 978, "time": 71.19 }, { "epoch": 0.76, "learning_rate": "4.8018e-05", "loss": 0.7835, "slid_loss": 0.774, "step": 979, "time": 71.05 }, { "epoch": 0.76, "learning_rate": "4.8013e-05", "loss": 0.7967, "slid_loss": 0.7744, "step": 980, "time": 70.71 }, { "epoch": 0.76, "learning_rate": "4.8009e-05", "loss": 0.8031, "slid_loss": 0.7739, "step": 981, "time": 71.36 }, { "epoch": 0.76, "learning_rate": "4.8005e-05", "loss": 0.7471, "slid_loss": 0.7736, "step": 982, "time": 71.62 }, { "epoch": 0.76, "learning_rate": "4.8000e-05", "loss": 0.7434, "slid_loss": 0.7731, "step": 983, "time": 71.54 }, { "epoch": 0.76, "learning_rate": "4.7996e-05", "loss": 0.8033, "slid_loss": 0.7734, "step": 984, "time": 71.65 }, { "epoch": 0.76, "learning_rate": "4.7992e-05", "loss": 0.77, "slid_loss": 0.7737, "step": 985, "time": 72.37 }, { "epoch": 0.76, "learning_rate": "4.7988e-05", "loss": 0.7775, "slid_loss": 0.7736, "step": 986, "time": 71.43 }, { "epoch": 0.76, "learning_rate": "4.7983e-05", "loss": 0.7802, "slid_loss": 0.7739, "step": 987, "time": 70.69 }, { "epoch": 0.76, "learning_rate": "4.7979e-05", "loss": 0.7979, "slid_loss": 0.7741, "step": 988, "time": 70.28 }, { "epoch": 0.77, "learning_rate": "4.7975e-05", "loss": 0.7801, "slid_loss": 0.7745, "step": 989, "time": 72.22 }, { "epoch": 0.77, "learning_rate": "4.7970e-05", "loss": 0.7733, "slid_loss": 0.7744, "step": 990, "time": 72.79 }, { "epoch": 0.77, "learning_rate": "4.7966e-05", "loss": 0.7244, "slid_loss": 0.7739, "step": 991, "time": 72.42 }, { "epoch": 0.77, "learning_rate": "4.7962e-05", "loss": 0.761, "slid_loss": 0.7737, "step": 992, "time": 71.3 }, { "epoch": 0.77, "learning_rate": "4.7957e-05", "loss": 0.7897, "slid_loss": 0.7736, "step": 993, "time": 70.14 }, { "epoch": 0.77, "learning_rate": "4.7953e-05", "loss": 0.8107, "slid_loss": 0.774, "step": 994, "time": 70.22 }, { "epoch": 0.77, "learning_rate": "4.7949e-05", "loss": 0.7448, "slid_loss": 0.774, "step": 995, "time": 70.44 }, { "epoch": 0.77, "learning_rate": "4.7944e-05", "loss": 0.7537, "slid_loss": 0.7737, "step": 996, "time": 72.9 }, { "epoch": 0.77, "learning_rate": "4.7940e-05", "loss": 0.7866, "slid_loss": 0.7738, "step": 997, "time": 72.04 }, { "epoch": 0.77, "learning_rate": "4.7936e-05", "loss": 0.7519, "slid_loss": 0.7736, "step": 998, "time": 72.23 }, { "epoch": 0.77, "learning_rate": "4.7931e-05", "loss": 0.7863, "slid_loss": 0.7743, "step": 999, "time": 73.33 }, { "epoch": 0.77, "learning_rate": "4.7927e-05", "loss": 0.8071, "slid_loss": 0.7748, "step": 1000, "time": 71.51 }, { "epoch": 0.77, "learning_rate": "4.7923e-05", "loss": 0.7892, "slid_loss": 0.7753, "step": 1001, "time": 757.44 }, { "epoch": 0.78, "learning_rate": "4.7918e-05", "loss": 0.7167, "slid_loss": 0.7748, "step": 1002, "time": 206.52 }, { "epoch": 0.78, "learning_rate": "4.7914e-05", "loss": 0.7534, "slid_loss": 0.7742, "step": 1003, "time": 71.61 }, { "epoch": 0.78, "learning_rate": "4.7910e-05", "loss": 0.7872, "slid_loss": 0.7743, "step": 1004, "time": 72.63 }, { "epoch": 0.78, "learning_rate": "4.7905e-05", "loss": 0.7673, "slid_loss": 0.774, "step": 1005, "time": 71.27 }, { "epoch": 0.78, "learning_rate": "4.7901e-05", "loss": 0.7917, "slid_loss": 0.7742, "step": 1006, "time": 72.37 }, { "epoch": 0.78, "learning_rate": "4.7896e-05", "loss": 0.7458, "slid_loss": 0.7734, "step": 1007, "time": 72.97 }, { "epoch": 0.78, "learning_rate": "4.7892e-05", "loss": 0.7833, "slid_loss": 0.7733, "step": 1008, "time": 72.16 }, { "epoch": 0.78, "learning_rate": "4.7888e-05", "loss": 0.7899, "slid_loss": 0.7735, "step": 1009, "time": 72.28 }, { "epoch": 0.78, "learning_rate": "4.7883e-05", "loss": 0.7827, "slid_loss": 0.7737, "step": 1010, "time": 72.23 }, { "epoch": 0.78, "learning_rate": "4.7879e-05", "loss": 0.7494, "slid_loss": 0.7733, "step": 1011, "time": 71.86 }, { "epoch": 0.78, "learning_rate": "4.7874e-05", "loss": 0.7763, "slid_loss": 0.7732, "step": 1012, "time": 72.2 }, { "epoch": 0.78, "learning_rate": "4.7870e-05", "loss": 0.7607, "slid_loss": 0.7732, "step": 1013, "time": 71.85 }, { "epoch": 0.78, "learning_rate": "4.7866e-05", "loss": 0.7827, "slid_loss": 0.7735, "step": 1014, "time": 72.06 }, { "epoch": 0.79, "learning_rate": "4.7861e-05", "loss": 0.783, "slid_loss": 0.7735, "step": 1015, "time": 70.7 }, { "epoch": 0.79, "learning_rate": "4.7857e-05", "loss": 0.7922, "slid_loss": 0.7738, "step": 1016, "time": 72.77 }, { "epoch": 0.79, "learning_rate": "4.7852e-05", "loss": 0.8031, "slid_loss": 0.7741, "step": 1017, "time": 72.38 }, { "epoch": 0.79, "learning_rate": "4.7848e-05", "loss": 0.7367, "slid_loss": 0.7737, "step": 1018, "time": 71.74 }, { "epoch": 0.79, "learning_rate": "4.7843e-05", "loss": 0.7484, "slid_loss": 0.7736, "step": 1019, "time": 73.5 }, { "epoch": 0.79, "learning_rate": "4.7839e-05", "loss": 0.7538, "slid_loss": 0.7735, "step": 1020, "time": 73.62 }, { "epoch": 0.79, "learning_rate": "4.7835e-05", "loss": 0.7711, "slid_loss": 0.7735, "step": 1021, "time": 70.42 }, { "epoch": 0.79, "learning_rate": "4.7830e-05", "loss": 0.7607, "slid_loss": 0.7731, "step": 1022, "time": 71.73 }, { "epoch": 0.79, "learning_rate": "4.7826e-05", "loss": 0.7701, "slid_loss": 0.7731, "step": 1023, "time": 72.77 }, { "epoch": 0.79, "learning_rate": "4.7821e-05", "loss": 0.7944, "slid_loss": 0.7729, "step": 1024, "time": 72.0 }, { "epoch": 0.79, "learning_rate": "4.7817e-05", "loss": 0.7536, "slid_loss": 0.7724, "step": 1025, "time": 71.95 }, { "epoch": 0.79, "learning_rate": "4.7812e-05", "loss": 0.7644, "slid_loss": 0.7723, "step": 1026, "time": 71.57 }, { "epoch": 0.79, "learning_rate": "4.7808e-05", "loss": 0.7432, "slid_loss": 0.7719, "step": 1027, "time": 70.84 }, { "epoch": 0.8, "learning_rate": "4.7803e-05", "loss": 0.8083, "slid_loss": 0.7722, "step": 1028, "time": 70.76 }, { "epoch": 0.8, "learning_rate": "4.7799e-05", "loss": 0.7875, "slid_loss": 0.7724, "step": 1029, "time": 71.91 }, { "epoch": 0.8, "learning_rate": "4.7794e-05", "loss": 0.7722, "slid_loss": 0.7723, "step": 1030, "time": 71.81 }, { "epoch": 0.8, "learning_rate": "4.7790e-05", "loss": 0.7658, "slid_loss": 0.7723, "step": 1031, "time": 72.17 }, { "epoch": 0.8, "learning_rate": "4.7785e-05", "loss": 0.7706, "slid_loss": 0.7723, "step": 1032, "time": 72.05 }, { "epoch": 0.8, "learning_rate": "4.7781e-05", "loss": 0.8046, "slid_loss": 0.7725, "step": 1033, "time": 71.04 }, { "epoch": 0.8, "learning_rate": "4.7776e-05", "loss": 0.8337, "slid_loss": 0.7732, "step": 1034, "time": 70.45 }, { "epoch": 0.8, "learning_rate": "4.7772e-05", "loss": 0.7791, "slid_loss": 0.7733, "step": 1035, "time": 71.54 }, { "epoch": 0.8, "learning_rate": "4.7767e-05", "loss": 0.7724, "slid_loss": 0.7733, "step": 1036, "time": 70.96 }, { "epoch": 0.8, "learning_rate": "4.7763e-05", "loss": 0.7533, "slid_loss": 0.7732, "step": 1037, "time": 72.3 }, { "epoch": 0.8, "learning_rate": "4.7758e-05", "loss": 0.7908, "slid_loss": 0.7732, "step": 1038, "time": 71.2 }, { "epoch": 0.8, "learning_rate": "4.7754e-05", "loss": 0.7773, "slid_loss": 0.7733, "step": 1039, "time": 72.36 }, { "epoch": 0.8, "learning_rate": "4.7749e-05", "loss": 0.7371, "slid_loss": 0.7731, "step": 1040, "time": 70.28 }, { "epoch": 0.81, "learning_rate": "4.7745e-05", "loss": 0.7039, "slid_loss": 0.772, "step": 1041, "time": 73.28 }, { "epoch": 0.81, "learning_rate": "4.7740e-05", "loss": 0.7261, "slid_loss": 0.7718, "step": 1042, "time": 70.85 }, { "epoch": 0.81, "learning_rate": "4.7736e-05", "loss": 0.7906, "slid_loss": 0.7722, "step": 1043, "time": 70.43 }, { "epoch": 0.81, "learning_rate": "4.7731e-05", "loss": 0.7873, "slid_loss": 0.7722, "step": 1044, "time": 71.08 }, { "epoch": 0.81, "learning_rate": "4.7727e-05", "loss": 0.7471, "slid_loss": 0.7722, "step": 1045, "time": 73.66 }, { "epoch": 0.81, "learning_rate": "4.7722e-05", "loss": 0.7978, "slid_loss": 0.7724, "step": 1046, "time": 71.47 }, { "epoch": 0.81, "learning_rate": "4.7718e-05", "loss": 0.7699, "slid_loss": 0.7724, "step": 1047, "time": 71.73 }, { "epoch": 0.81, "learning_rate": "4.7713e-05", "loss": 0.7552, "slid_loss": 0.7722, "step": 1048, "time": 71.11 }, { "epoch": 0.81, "learning_rate": "4.7708e-05", "loss": 0.8036, "slid_loss": 0.7726, "step": 1049, "time": 72.98 }, { "epoch": 0.81, "learning_rate": "4.7704e-05", "loss": 0.742, "slid_loss": 0.7721, "step": 1050, "time": 71.42 }, { "epoch": 0.81, "learning_rate": "4.7699e-05", "loss": 0.7814, "slid_loss": 0.7724, "step": 1051, "time": 71.72 }, { "epoch": 0.81, "learning_rate": "4.7695e-05", "loss": 0.8226, "slid_loss": 0.7725, "step": 1052, "time": 73.56 }, { "epoch": 0.81, "learning_rate": "4.7690e-05", "loss": 0.7813, "slid_loss": 0.7728, "step": 1053, "time": 72.67 }, { "epoch": 0.82, "learning_rate": "4.7686e-05", "loss": 0.7723, "slid_loss": 0.7728, "step": 1054, "time": 72.59 }, { "epoch": 0.82, "learning_rate": "4.7681e-05", "loss": 0.7703, "slid_loss": 0.7729, "step": 1055, "time": 72.35 }, { "epoch": 0.82, "learning_rate": "4.7676e-05", "loss": 0.7673, "slid_loss": 0.7726, "step": 1056, "time": 71.16 }, { "epoch": 0.82, "learning_rate": "4.7672e-05", "loss": 0.7553, "slid_loss": 0.7723, "step": 1057, "time": 70.87 }, { "epoch": 0.82, "learning_rate": "4.7667e-05", "loss": 0.7615, "slid_loss": 0.7724, "step": 1058, "time": 72.15 }, { "epoch": 0.82, "learning_rate": "4.7663e-05", "loss": 0.788, "slid_loss": 0.7725, "step": 1059, "time": 71.88 }, { "epoch": 0.82, "learning_rate": "4.7658e-05", "loss": 0.7796, "slid_loss": 0.7725, "step": 1060, "time": 70.63 }, { "epoch": 0.82, "learning_rate": "4.7653e-05", "loss": 0.729, "slid_loss": 0.7723, "step": 1061, "time": 72.6 }, { "epoch": 0.82, "learning_rate": "4.7649e-05", "loss": 0.7938, "slid_loss": 0.7725, "step": 1062, "time": 72.35 }, { "epoch": 0.82, "learning_rate": "4.7644e-05", "loss": 0.7425, "slid_loss": 0.7722, "step": 1063, "time": 72.94 }, { "epoch": 0.82, "learning_rate": "4.7639e-05", "loss": 0.7876, "slid_loss": 0.7725, "step": 1064, "time": 70.91 }, { "epoch": 0.82, "learning_rate": "4.7635e-05", "loss": 0.7649, "slid_loss": 0.7726, "step": 1065, "time": 73.21 }, { "epoch": 0.82, "learning_rate": "4.7630e-05", "loss": 0.7477, "slid_loss": 0.7723, "step": 1066, "time": 72.31 }, { "epoch": 0.83, "learning_rate": "4.7626e-05", "loss": 0.7221, "slid_loss": 0.7716, "step": 1067, "time": 71.12 }, { "epoch": 0.83, "learning_rate": "4.7621e-05", "loss": 0.787, "slid_loss": 0.7717, "step": 1068, "time": 71.17 }, { "epoch": 0.83, "learning_rate": "4.7616e-05", "loss": 0.8054, "slid_loss": 0.7718, "step": 1069, "time": 70.94 }, { "epoch": 0.83, "learning_rate": "4.7612e-05", "loss": 0.7917, "slid_loss": 0.7723, "step": 1070, "time": 71.12 }, { "epoch": 0.83, "learning_rate": "4.7607e-05", "loss": 0.7669, "slid_loss": 0.7723, "step": 1071, "time": 71.71 }, { "epoch": 0.83, "learning_rate": "4.7602e-05", "loss": 0.7838, "slid_loss": 0.7722, "step": 1072, "time": 72.03 }, { "epoch": 0.83, "learning_rate": "4.7598e-05", "loss": 0.7424, "slid_loss": 0.7713, "step": 1073, "time": 72.85 }, { "epoch": 0.83, "learning_rate": "4.7593e-05", "loss": 0.7673, "slid_loss": 0.7713, "step": 1074, "time": 71.64 }, { "epoch": 0.83, "learning_rate": "4.7588e-05", "loss": 0.7788, "slid_loss": 0.7713, "step": 1075, "time": 70.42 }, { "epoch": 0.83, "learning_rate": "4.7584e-05", "loss": 0.7791, "slid_loss": 0.772, "step": 1076, "time": 71.0 }, { "epoch": 0.83, "learning_rate": "4.7579e-05", "loss": 0.7378, "slid_loss": 0.7716, "step": 1077, "time": 72.0 }, { "epoch": 0.83, "learning_rate": "4.7574e-05", "loss": 0.7722, "slid_loss": 0.7718, "step": 1078, "time": 72.3 }, { "epoch": 0.83, "learning_rate": "4.7570e-05", "loss": 0.7577, "slid_loss": 0.7715, "step": 1079, "time": 71.91 }, { "epoch": 0.84, "learning_rate": "4.7565e-05", "loss": 0.7496, "slid_loss": 0.7711, "step": 1080, "time": 72.69 }, { "epoch": 0.84, "learning_rate": "4.7560e-05", "loss": 0.7943, "slid_loss": 0.771, "step": 1081, "time": 70.36 }, { "epoch": 0.84, "learning_rate": "4.7555e-05", "loss": 0.771, "slid_loss": 0.7712, "step": 1082, "time": 72.21 }, { "epoch": 0.84, "learning_rate": "4.7551e-05", "loss": 0.7678, "slid_loss": 0.7715, "step": 1083, "time": 73.26 }, { "epoch": 0.84, "learning_rate": "4.7546e-05", "loss": 0.7474, "slid_loss": 0.7709, "step": 1084, "time": 71.34 }, { "epoch": 0.84, "learning_rate": "4.7541e-05", "loss": 0.7616, "slid_loss": 0.7708, "step": 1085, "time": 71.14 }, { "epoch": 0.84, "learning_rate": "4.7537e-05", "loss": 0.77, "slid_loss": 0.7707, "step": 1086, "time": 71.38 }, { "epoch": 0.84, "learning_rate": "4.7532e-05", "loss": 0.7294, "slid_loss": 0.7702, "step": 1087, "time": 72.18 }, { "epoch": 0.84, "learning_rate": "4.7527e-05", "loss": 0.7827, "slid_loss": 0.7701, "step": 1088, "time": 71.74 }, { "epoch": 0.84, "learning_rate": "4.7522e-05", "loss": 0.7394, "slid_loss": 0.7697, "step": 1089, "time": 71.29 }, { "epoch": 0.84, "learning_rate": "4.7518e-05", "loss": 0.7737, "slid_loss": 0.7697, "step": 1090, "time": 71.54 }, { "epoch": 0.84, "learning_rate": "4.7513e-05", "loss": 0.7409, "slid_loss": 0.7698, "step": 1091, "time": 71.35 }, { "epoch": 0.84, "learning_rate": "4.7508e-05", "loss": 0.8021, "slid_loss": 0.7702, "step": 1092, "time": 70.9 }, { "epoch": 0.85, "learning_rate": "4.7503e-05", "loss": 0.787, "slid_loss": 0.7702, "step": 1093, "time": 71.53 }, { "epoch": 0.85, "learning_rate": "4.7499e-05", "loss": 0.7772, "slid_loss": 0.7699, "step": 1094, "time": 72.39 }, { "epoch": 0.85, "learning_rate": "4.7494e-05", "loss": 0.7657, "slid_loss": 0.7701, "step": 1095, "time": 70.57 }, { "epoch": 0.85, "learning_rate": "4.7489e-05", "loss": 0.7866, "slid_loss": 0.7704, "step": 1096, "time": 71.12 }, { "epoch": 0.85, "learning_rate": "4.7484e-05", "loss": 0.7779, "slid_loss": 0.7703, "step": 1097, "time": 70.9 }, { "epoch": 0.85, "learning_rate": "4.7480e-05", "loss": 0.7947, "slid_loss": 0.7708, "step": 1098, "time": 70.82 }, { "epoch": 0.85, "learning_rate": "4.7475e-05", "loss": 0.7581, "slid_loss": 0.7705, "step": 1099, "time": 69.7 }, { "epoch": 0.85, "learning_rate": "4.7470e-05", "loss": 0.8027, "slid_loss": 0.7704, "step": 1100, "time": 72.15 }, { "epoch": 0.85, "learning_rate": "4.7465e-05", "loss": 0.7737, "slid_loss": 0.7703, "step": 1101, "time": 71.23 }, { "epoch": 0.85, "learning_rate": "4.7460e-05", "loss": 0.7287, "slid_loss": 0.7704, "step": 1102, "time": 71.53 }, { "epoch": 0.85, "learning_rate": "4.7456e-05", "loss": 0.8056, "slid_loss": 0.7709, "step": 1103, "time": 71.8 }, { "epoch": 0.85, "learning_rate": "4.7451e-05", "loss": 0.7604, "slid_loss": 0.7707, "step": 1104, "time": 71.42 }, { "epoch": 0.85, "learning_rate": "4.7446e-05", "loss": 0.7645, "slid_loss": 0.7706, "step": 1105, "time": 71.84 }, { "epoch": 0.86, "learning_rate": "4.7441e-05", "loss": 0.7901, "slid_loss": 0.7706, "step": 1106, "time": 73.2 }, { "epoch": 0.86, "learning_rate": "4.7436e-05", "loss": 0.7642, "slid_loss": 0.7708, "step": 1107, "time": 98.68 }, { "epoch": 0.86, "learning_rate": "4.7432e-05", "loss": 0.7681, "slid_loss": 0.7706, "step": 1108, "time": 114.65 }, { "epoch": 0.86, "learning_rate": "4.7427e-05", "loss": 0.7856, "slid_loss": 0.7706, "step": 1109, "time": 125.75 }, { "epoch": 0.86, "learning_rate": "4.7422e-05", "loss": 0.7562, "slid_loss": 0.7703, "step": 1110, "time": 169.38 }, { "epoch": 0.86, "learning_rate": "4.7417e-05", "loss": 0.7307, "slid_loss": 0.7702, "step": 1111, "time": 174.54 }, { "epoch": 0.86, "learning_rate": "4.7412e-05", "loss": 0.7667, "slid_loss": 0.7701, "step": 1112, "time": 170.13 }, { "epoch": 0.86, "learning_rate": "4.7407e-05", "loss": 0.7867, "slid_loss": 0.7703, "step": 1113, "time": 166.95 }, { "epoch": 0.86, "learning_rate": "4.7403e-05", "loss": 0.7939, "slid_loss": 0.7704, "step": 1114, "time": 176.99 }, { "epoch": 0.86, "learning_rate": "4.7398e-05", "loss": 0.8105, "slid_loss": 0.7707, "step": 1115, "time": 139.0 }, { "epoch": 0.86, "learning_rate": "4.7393e-05", "loss": 0.7662, "slid_loss": 0.7704, "step": 1116, "time": 110.38 }, { "epoch": 0.86, "learning_rate": "4.7388e-05", "loss": 0.7576, "slid_loss": 0.77, "step": 1117, "time": 82.58 }, { "epoch": 0.86, "learning_rate": "4.7383e-05", "loss": 0.7809, "slid_loss": 0.7704, "step": 1118, "time": 83.35 }, { "epoch": 0.87, "learning_rate": "4.7378e-05", "loss": 0.7898, "slid_loss": 0.7708, "step": 1119, "time": 72.54 }, { "epoch": 0.87, "learning_rate": "4.7373e-05", "loss": 0.7524, "slid_loss": 0.7708, "step": 1120, "time": 71.15 }, { "epoch": 0.87, "learning_rate": "4.7369e-05", "loss": 0.7524, "slid_loss": 0.7706, "step": 1121, "time": 71.62 }, { "epoch": 0.87, "learning_rate": "4.7364e-05", "loss": 0.7326, "slid_loss": 0.7704, "step": 1122, "time": 72.63 }, { "epoch": 0.87, "learning_rate": "4.7359e-05", "loss": 0.7767, "slid_loss": 0.7704, "step": 1123, "time": 71.11 }, { "epoch": 0.87, "learning_rate": "4.7354e-05", "loss": 0.777, "slid_loss": 0.7703, "step": 1124, "time": 72.42 }, { "epoch": 0.87, "learning_rate": "4.7349e-05", "loss": 0.7588, "slid_loss": 0.7703, "step": 1125, "time": 72.91 }, { "epoch": 0.87, "learning_rate": "4.7344e-05", "loss": 0.7413, "slid_loss": 0.7701, "step": 1126, "time": 70.0 }, { "epoch": 0.87, "learning_rate": "4.7339e-05", "loss": 0.7815, "slid_loss": 0.7705, "step": 1127, "time": 72.47 }, { "epoch": 0.87, "learning_rate": "4.7334e-05", "loss": 0.739, "slid_loss": 0.7698, "step": 1128, "time": 72.47 }, { "epoch": 0.87, "learning_rate": "4.7330e-05", "loss": 0.7212, "slid_loss": 0.7691, "step": 1129, "time": 71.43 }, { "epoch": 0.87, "learning_rate": "4.7325e-05", "loss": 0.761, "slid_loss": 0.769, "step": 1130, "time": 71.17 }, { "epoch": 0.88, "learning_rate": "4.7320e-05", "loss": 0.7724, "slid_loss": 0.7691, "step": 1131, "time": 70.41 }, { "epoch": 0.88, "learning_rate": "4.7315e-05", "loss": 0.7348, "slid_loss": 0.7687, "step": 1132, "time": 72.12 }, { "epoch": 0.88, "learning_rate": "4.7310e-05", "loss": 0.7612, "slid_loss": 0.7683, "step": 1133, "time": 71.82 }, { "epoch": 0.88, "learning_rate": "4.7305e-05", "loss": 0.7676, "slid_loss": 0.7676, "step": 1134, "time": 72.03 }, { "epoch": 0.88, "learning_rate": "4.7300e-05", "loss": 0.7475, "slid_loss": 0.7673, "step": 1135, "time": 72.28 }, { "epoch": 0.88, "learning_rate": "4.7295e-05", "loss": 0.7764, "slid_loss": 0.7673, "step": 1136, "time": 70.48 }, { "epoch": 0.88, "learning_rate": "4.7290e-05", "loss": 0.7868, "slid_loss": 0.7677, "step": 1137, "time": 71.17 }, { "epoch": 0.88, "learning_rate": "4.7285e-05", "loss": 0.7273, "slid_loss": 0.767, "step": 1138, "time": 72.38 }, { "epoch": 0.88, "learning_rate": "4.7280e-05", "loss": 0.7175, "slid_loss": 0.7664, "step": 1139, "time": 71.01 }, { "epoch": 0.88, "learning_rate": "4.7275e-05", "loss": 0.7623, "slid_loss": 0.7667, "step": 1140, "time": 71.0 }, { "epoch": 0.88, "learning_rate": "4.7270e-05", "loss": 0.7617, "slid_loss": 0.7673, "step": 1141, "time": 71.32 }, { "epoch": 0.88, "learning_rate": "4.7265e-05", "loss": 0.7671, "slid_loss": 0.7677, "step": 1142, "time": 70.56 }, { "epoch": 0.88, "learning_rate": "4.7260e-05", "loss": 0.7938, "slid_loss": 0.7677, "step": 1143, "time": 71.98 }, { "epoch": 0.89, "learning_rate": "4.7255e-05", "loss": 0.7635, "slid_loss": 0.7675, "step": 1144, "time": 71.57 }, { "epoch": 0.89, "learning_rate": "4.7250e-05", "loss": 0.7894, "slid_loss": 0.7679, "step": 1145, "time": 70.98 }, { "epoch": 0.89, "learning_rate": "4.7246e-05", "loss": 0.7323, "slid_loss": 0.7672, "step": 1146, "time": 71.97 }, { "epoch": 0.89, "learning_rate": "4.7241e-05", "loss": 0.7962, "slid_loss": 0.7675, "step": 1147, "time": 69.74 }, { "epoch": 0.89, "learning_rate": "4.7236e-05", "loss": 0.7542, "slid_loss": 0.7675, "step": 1148, "time": 70.69 }, { "epoch": 0.89, "learning_rate": "4.7231e-05", "loss": 0.7106, "slid_loss": 0.7666, "step": 1149, "time": 71.57 }, { "epoch": 0.89, "learning_rate": "4.7226e-05", "loss": 0.769, "slid_loss": 0.7668, "step": 1150, "time": 71.03 }, { "epoch": 0.89, "learning_rate": "4.7221e-05", "loss": 0.7584, "slid_loss": 0.7666, "step": 1151, "time": 72.37 }, { "epoch": 0.89, "learning_rate": "4.7216e-05", "loss": 0.7576, "slid_loss": 0.7659, "step": 1152, "time": 71.98 }, { "epoch": 0.89, "learning_rate": "4.7211e-05", "loss": 0.7785, "slid_loss": 0.7659, "step": 1153, "time": 70.36 }, { "epoch": 0.89, "learning_rate": "4.7206e-05", "loss": 0.7925, "slid_loss": 0.7661, "step": 1154, "time": 70.51 }, { "epoch": 0.89, "learning_rate": "4.7201e-05", "loss": 0.7552, "slid_loss": 0.766, "step": 1155, "time": 72.19 }, { "epoch": 0.89, "learning_rate": "4.7196e-05", "loss": 0.7303, "slid_loss": 0.7656, "step": 1156, "time": 71.99 }, { "epoch": 0.9, "learning_rate": "4.7191e-05", "loss": 0.7686, "slid_loss": 0.7657, "step": 1157, "time": 71.96 }, { "epoch": 0.9, "learning_rate": "4.7186e-05", "loss": 0.7294, "slid_loss": 0.7654, "step": 1158, "time": 71.96 }, { "epoch": 0.9, "learning_rate": "4.7180e-05", "loss": 0.7745, "slid_loss": 0.7653, "step": 1159, "time": 71.74 }, { "epoch": 0.9, "learning_rate": "4.7175e-05", "loss": 0.7582, "slid_loss": 0.7651, "step": 1160, "time": 71.59 }, { "epoch": 0.9, "learning_rate": "4.7170e-05", "loss": 0.761, "slid_loss": 0.7654, "step": 1161, "time": 71.31 }, { "epoch": 0.9, "learning_rate": "4.7165e-05", "loss": 0.7752, "slid_loss": 0.7652, "step": 1162, "time": 72.23 }, { "epoch": 0.9, "learning_rate": "4.7160e-05", "loss": 0.8099, "slid_loss": 0.7659, "step": 1163, "time": 71.73 }, { "epoch": 0.9, "learning_rate": "4.7155e-05", "loss": 0.7458, "slid_loss": 0.7655, "step": 1164, "time": 71.32 }, { "epoch": 0.9, "learning_rate": "4.7150e-05", "loss": 0.7668, "slid_loss": 0.7655, "step": 1165, "time": 72.95 }, { "epoch": 0.9, "learning_rate": "4.7145e-05", "loss": 0.7752, "slid_loss": 0.7657, "step": 1166, "time": 72.39 }, { "epoch": 0.9, "learning_rate": "4.7140e-05", "loss": 0.776, "slid_loss": 0.7663, "step": 1167, "time": 72.02 }, { "epoch": 0.9, "learning_rate": "4.7135e-05", "loss": 0.7514, "slid_loss": 0.7659, "step": 1168, "time": 71.09 }, { "epoch": 0.9, "learning_rate": "4.7130e-05", "loss": 0.7619, "slid_loss": 0.7655, "step": 1169, "time": 73.02 }, { "epoch": 0.91, "learning_rate": "4.7125e-05", "loss": 0.7466, "slid_loss": 0.765, "step": 1170, "time": 71.62 }, { "epoch": 0.91, "learning_rate": "4.7120e-05", "loss": 0.7917, "slid_loss": 0.7653, "step": 1171, "time": 71.89 }, { "epoch": 0.91, "learning_rate": "4.7115e-05", "loss": 0.76, "slid_loss": 0.7651, "step": 1172, "time": 72.3 }, { "epoch": 0.91, "learning_rate": "4.7110e-05", "loss": 0.7032, "slid_loss": 0.7647, "step": 1173, "time": 70.44 }, { "epoch": 0.91, "learning_rate": "4.7105e-05", "loss": 0.7642, "slid_loss": 0.7646, "step": 1174, "time": 72.2 }, { "epoch": 0.91, "learning_rate": "4.7099e-05", "loss": 0.7675, "slid_loss": 0.7645, "step": 1175, "time": 72.05 }, { "epoch": 0.91, "learning_rate": "4.7094e-05", "loss": 0.7895, "slid_loss": 0.7646, "step": 1176, "time": 71.53 }, { "epoch": 0.91, "learning_rate": "4.7089e-05", "loss": 0.7545, "slid_loss": 0.7648, "step": 1177, "time": 71.09 }, { "epoch": 0.91, "learning_rate": "4.7084e-05", "loss": 0.7947, "slid_loss": 0.765, "step": 1178, "time": 72.89 }, { "epoch": 0.91, "learning_rate": "4.7079e-05", "loss": 0.7771, "slid_loss": 0.7652, "step": 1179, "time": 72.44 }, { "epoch": 0.91, "learning_rate": "4.7074e-05", "loss": 0.747, "slid_loss": 0.7652, "step": 1180, "time": 70.98 }, { "epoch": 0.91, "learning_rate": "4.7069e-05", "loss": 0.7559, "slid_loss": 0.7648, "step": 1181, "time": 71.95 }, { "epoch": 0.91, "learning_rate": "4.7064e-05", "loss": 0.7629, "slid_loss": 0.7647, "step": 1182, "time": 72.57 }, { "epoch": 0.92, "learning_rate": "4.7059e-05", "loss": 0.7573, "slid_loss": 0.7646, "step": 1183, "time": 70.38 }, { "epoch": 0.92, "learning_rate": "4.7053e-05", "loss": 0.7768, "slid_loss": 0.7649, "step": 1184, "time": 72.33 }, { "epoch": 0.92, "learning_rate": "4.7048e-05", "loss": 0.7752, "slid_loss": 0.765, "step": 1185, "time": 72.34 }, { "epoch": 0.92, "learning_rate": "4.7043e-05", "loss": 0.7666, "slid_loss": 0.765, "step": 1186, "time": 71.34 }, { "epoch": 0.92, "learning_rate": "4.7038e-05", "loss": 0.7161, "slid_loss": 0.7649, "step": 1187, "time": 70.96 }, { "epoch": 0.92, "learning_rate": "4.7033e-05", "loss": 0.7579, "slid_loss": 0.7646, "step": 1188, "time": 72.15 }, { "epoch": 0.92, "learning_rate": "4.7028e-05", "loss": 0.7104, "slid_loss": 0.7643, "step": 1189, "time": 125.17 }, { "epoch": 0.92, "learning_rate": "4.7023e-05", "loss": 0.7457, "slid_loss": 0.7641, "step": 1190, "time": 69.95 }, { "epoch": 0.92, "learning_rate": "4.7017e-05", "loss": 0.7795, "slid_loss": 0.7644, "step": 1191, "time": 122.14 }, { "epoch": 0.92, "learning_rate": "4.7012e-05", "loss": 0.7893, "slid_loss": 0.7643, "step": 1192, "time": 71.7 }, { "epoch": 0.92, "learning_rate": "4.7007e-05", "loss": 0.7898, "slid_loss": 0.7643, "step": 1193, "time": 71.53 }, { "epoch": 0.92, "learning_rate": "4.7002e-05", "loss": 0.7527, "slid_loss": 0.7641, "step": 1194, "time": 72.36 }, { "epoch": 0.92, "learning_rate": "4.6997e-05", "loss": 0.7278, "slid_loss": 0.7637, "step": 1195, "time": 71.1 }, { "epoch": 0.93, "learning_rate": "4.6992e-05", "loss": 0.7881, "slid_loss": 0.7637, "step": 1196, "time": 70.99 }, { "epoch": 0.93, "learning_rate": "4.6986e-05", "loss": 0.7938, "slid_loss": 0.7639, "step": 1197, "time": 71.26 }, { "epoch": 0.93, "learning_rate": "4.6981e-05", "loss": 0.7894, "slid_loss": 0.7638, "step": 1198, "time": 73.14 }, { "epoch": 0.93, "learning_rate": "4.6976e-05", "loss": 0.7725, "slid_loss": 0.764, "step": 1199, "time": 72.38 }, { "epoch": 0.93, "learning_rate": "4.6971e-05", "loss": 0.7503, "slid_loss": 0.7635, "step": 1200, "time": 71.03 }, { "epoch": 0.93, "learning_rate": "4.6966e-05", "loss": 0.7515, "slid_loss": 0.7632, "step": 1201, "time": 844.5 }, { "epoch": 0.93, "learning_rate": "4.6960e-05", "loss": 0.7797, "slid_loss": 0.7638, "step": 1202, "time": 71.58 }, { "epoch": 0.93, "learning_rate": "4.6955e-05", "loss": 0.7463, "slid_loss": 0.7632, "step": 1203, "time": 71.78 }, { "epoch": 0.93, "learning_rate": "4.6950e-05", "loss": 0.7586, "slid_loss": 0.7631, "step": 1204, "time": 72.52 }, { "epoch": 0.93, "learning_rate": "4.6945e-05", "loss": 0.7816, "slid_loss": 0.7633, "step": 1205, "time": 71.0 }, { "epoch": 0.93, "learning_rate": "4.6940e-05", "loss": 0.7354, "slid_loss": 0.7628, "step": 1206, "time": 72.24 }, { "epoch": 0.93, "learning_rate": "4.6934e-05", "loss": 0.7144, "slid_loss": 0.7623, "step": 1207, "time": 71.51 }, { "epoch": 0.93, "learning_rate": "4.6929e-05", "loss": 0.7602, "slid_loss": 0.7622, "step": 1208, "time": 70.6 }, { "epoch": 0.94, "learning_rate": "4.6924e-05", "loss": 0.7406, "slid_loss": 0.7617, "step": 1209, "time": 72.68 }, { "epoch": 0.94, "learning_rate": "4.6919e-05", "loss": 0.7926, "slid_loss": 0.7621, "step": 1210, "time": 72.52 }, { "epoch": 0.94, "learning_rate": "4.6913e-05", "loss": 0.7614, "slid_loss": 0.7624, "step": 1211, "time": 71.73 }, { "epoch": 0.94, "learning_rate": "4.6908e-05", "loss": 0.7344, "slid_loss": 0.7621, "step": 1212, "time": 71.25 }, { "epoch": 0.94, "learning_rate": "4.6903e-05", "loss": 0.7582, "slid_loss": 0.7618, "step": 1213, "time": 71.46 }, { "epoch": 0.94, "learning_rate": "4.6898e-05", "loss": 0.7331, "slid_loss": 0.7612, "step": 1214, "time": 71.72 }, { "epoch": 0.94, "learning_rate": "4.6892e-05", "loss": 0.7736, "slid_loss": 0.7608, "step": 1215, "time": 71.8 }, { "epoch": 0.94, "learning_rate": "4.6887e-05", "loss": 0.7648, "slid_loss": 0.7608, "step": 1216, "time": 70.69 }, { "epoch": 0.94, "learning_rate": "4.6882e-05", "loss": 0.7404, "slid_loss": 0.7606, "step": 1217, "time": 69.62 }, { "epoch": 0.94, "learning_rate": "4.6877e-05", "loss": 0.744, "slid_loss": 0.7603, "step": 1218, "time": 72.16 }, { "epoch": 0.94, "learning_rate": "4.6871e-05", "loss": 0.7657, "slid_loss": 0.76, "step": 1219, "time": 71.49 }, { "epoch": 0.94, "learning_rate": "4.6866e-05", "loss": 0.8002, "slid_loss": 0.7605, "step": 1220, "time": 72.17 }, { "epoch": 0.94, "learning_rate": "4.6861e-05", "loss": 0.7298, "slid_loss": 0.7603, "step": 1221, "time": 70.51 }, { "epoch": 0.95, "learning_rate": "4.6855e-05", "loss": 0.7611, "slid_loss": 0.7606, "step": 1222, "time": 70.75 }, { "epoch": 0.95, "learning_rate": "4.6850e-05", "loss": 0.7722, "slid_loss": 0.7605, "step": 1223, "time": 72.32 }, { "epoch": 0.95, "learning_rate": "4.6845e-05", "loss": 0.7831, "slid_loss": 0.7606, "step": 1224, "time": 71.88 }, { "epoch": 0.95, "learning_rate": "4.6840e-05", "loss": 0.779, "slid_loss": 0.7608, "step": 1225, "time": 71.36 }, { "epoch": 0.95, "learning_rate": "4.6834e-05", "loss": 0.7465, "slid_loss": 0.7608, "step": 1226, "time": 71.26 }, { "epoch": 0.95, "learning_rate": "4.6829e-05", "loss": 0.7666, "slid_loss": 0.7607, "step": 1227, "time": 71.7 }, { "epoch": 0.95, "learning_rate": "4.6824e-05", "loss": 0.7885, "slid_loss": 0.7612, "step": 1228, "time": 71.6 }, { "epoch": 0.95, "learning_rate": "4.6818e-05", "loss": 0.7627, "slid_loss": 0.7616, "step": 1229, "time": 72.06 }, { "epoch": 0.95, "learning_rate": "4.6813e-05", "loss": 0.7717, "slid_loss": 0.7617, "step": 1230, "time": 71.36 }, { "epoch": 0.95, "learning_rate": "4.6808e-05", "loss": 0.7558, "slid_loss": 0.7615, "step": 1231, "time": 72.39 }, { "epoch": 0.95, "learning_rate": "4.6802e-05", "loss": 0.7237, "slid_loss": 0.7614, "step": 1232, "time": 72.95 }, { "epoch": 0.95, "learning_rate": "4.6797e-05", "loss": 0.7815, "slid_loss": 0.7616, "step": 1233, "time": 70.04 }, { "epoch": 0.95, "learning_rate": "4.6792e-05", "loss": 0.7301, "slid_loss": 0.7613, "step": 1234, "time": 71.86 }, { "epoch": 0.96, "learning_rate": "4.6786e-05", "loss": 0.7722, "slid_loss": 0.7615, "step": 1235, "time": 72.61 }, { "epoch": 0.96, "learning_rate": "4.6781e-05", "loss": 0.7663, "slid_loss": 0.7614, "step": 1236, "time": 72.84 }, { "epoch": 0.96, "learning_rate": "4.6776e-05", "loss": 0.7858, "slid_loss": 0.7614, "step": 1237, "time": 71.15 }, { "epoch": 0.96, "learning_rate": "4.6770e-05", "loss": 0.7325, "slid_loss": 0.7614, "step": 1238, "time": 71.35 }, { "epoch": 0.96, "learning_rate": "4.6765e-05", "loss": 0.7559, "slid_loss": 0.7618, "step": 1239, "time": 71.43 }, { "epoch": 0.96, "learning_rate": "4.6760e-05", "loss": 0.7362, "slid_loss": 0.7616, "step": 1240, "time": 70.87 }, { "epoch": 0.96, "learning_rate": "4.6754e-05", "loss": 0.7807, "slid_loss": 0.7618, "step": 1241, "time": 70.68 }, { "epoch": 0.96, "learning_rate": "4.6749e-05", "loss": 0.7641, "slid_loss": 0.7617, "step": 1242, "time": 73.06 }, { "epoch": 0.96, "learning_rate": "4.6743e-05", "loss": 0.7416, "slid_loss": 0.7612, "step": 1243, "time": 72.13 }, { "epoch": 0.96, "learning_rate": "4.6738e-05", "loss": 0.7717, "slid_loss": 0.7613, "step": 1244, "time": 71.08 }, { "epoch": 0.96, "learning_rate": "4.6733e-05", "loss": 0.7921, "slid_loss": 0.7613, "step": 1245, "time": 72.6 }, { "epoch": 0.96, "learning_rate": "4.6727e-05", "loss": 0.746, "slid_loss": 0.7614, "step": 1246, "time": 71.62 }, { "epoch": 0.96, "learning_rate": "4.6722e-05", "loss": 0.7971, "slid_loss": 0.7615, "step": 1247, "time": 73.5 }, { "epoch": 0.97, "learning_rate": "4.6717e-05", "loss": 0.7215, "slid_loss": 0.7611, "step": 1248, "time": 71.65 }, { "epoch": 0.97, "learning_rate": "4.6711e-05", "loss": 0.7747, "slid_loss": 0.7618, "step": 1249, "time": 72.58 }, { "epoch": 0.97, "learning_rate": "4.6706e-05", "loss": 0.748, "slid_loss": 0.7616, "step": 1250, "time": 71.76 }, { "epoch": 0.97, "learning_rate": "4.6700e-05", "loss": 0.7738, "slid_loss": 0.7617, "step": 1251, "time": 71.79 }, { "epoch": 0.97, "learning_rate": "4.6695e-05", "loss": 0.7845, "slid_loss": 0.762, "step": 1252, "time": 70.26 }, { "epoch": 0.97, "learning_rate": "4.6690e-05", "loss": 0.8052, "slid_loss": 0.7623, "step": 1253, "time": 71.36 }, { "epoch": 0.97, "learning_rate": "4.6684e-05", "loss": 0.7554, "slid_loss": 0.7619, "step": 1254, "time": 73.27 }, { "epoch": 0.97, "learning_rate": "4.6679e-05", "loss": 0.7455, "slid_loss": 0.7618, "step": 1255, "time": 71.85 }, { "epoch": 0.97, "learning_rate": "4.6673e-05", "loss": 0.7642, "slid_loss": 0.7621, "step": 1256, "time": 70.96 }, { "epoch": 0.97, "learning_rate": "4.6668e-05", "loss": 0.781, "slid_loss": 0.7622, "step": 1257, "time": 70.35 }, { "epoch": 0.97, "learning_rate": "4.6662e-05", "loss": 0.7605, "slid_loss": 0.7626, "step": 1258, "time": 72.56 }, { "epoch": 0.97, "learning_rate": "4.6657e-05", "loss": 0.7859, "slid_loss": 0.7627, "step": 1259, "time": 71.48 }, { "epoch": 0.97, "learning_rate": "4.6652e-05", "loss": 0.7333, "slid_loss": 0.7624, "step": 1260, "time": 71.68 }, { "epoch": 0.98, "learning_rate": "4.6646e-05", "loss": 0.7887, "slid_loss": 0.7627, "step": 1261, "time": 70.46 }, { "epoch": 0.98, "learning_rate": "4.6641e-05", "loss": 0.7664, "slid_loss": 0.7626, "step": 1262, "time": 71.54 }, { "epoch": 0.98, "learning_rate": "4.6635e-05", "loss": 0.7719, "slid_loss": 0.7622, "step": 1263, "time": 70.84 }, { "epoch": 0.98, "learning_rate": "4.6630e-05", "loss": 0.7973, "slid_loss": 0.7627, "step": 1264, "time": 73.38 }, { "epoch": 0.98, "learning_rate": "4.6624e-05", "loss": 0.7926, "slid_loss": 0.763, "step": 1265, "time": 97.0 }, { "epoch": 0.98, "learning_rate": "4.6619e-05", "loss": 0.763, "slid_loss": 0.7629, "step": 1266, "time": 96.14 }, { "epoch": 0.98, "learning_rate": "4.6613e-05", "loss": 0.7685, "slid_loss": 0.7628, "step": 1267, "time": 130.06 }, { "epoch": 0.98, "learning_rate": "4.6608e-05", "loss": 0.7295, "slid_loss": 0.7626, "step": 1268, "time": 133.09 }, { "epoch": 0.98, "learning_rate": "4.6602e-05", "loss": 0.7781, "slid_loss": 0.7628, "step": 1269, "time": 153.16 }, { "epoch": 0.98, "learning_rate": "4.6597e-05", "loss": 0.7687, "slid_loss": 0.763, "step": 1270, "time": 143.45 }, { "epoch": 0.98, "learning_rate": "4.6591e-05", "loss": 0.7588, "slid_loss": 0.7626, "step": 1271, "time": 166.98 }, { "epoch": 0.98, "learning_rate": "4.6586e-05", "loss": 0.7874, "slid_loss": 0.7629, "step": 1272, "time": 141.96 }, { "epoch": 0.98, "learning_rate": "4.6580e-05", "loss": 0.7363, "slid_loss": 0.7632, "step": 1273, "time": 158.0 }, { "epoch": 0.99, "learning_rate": "4.6575e-05", "loss": 0.7472, "slid_loss": 0.7631, "step": 1274, "time": 142.45 }, { "epoch": 0.99, "learning_rate": "4.6569e-05", "loss": 0.7651, "slid_loss": 0.7631, "step": 1275, "time": 108.59 }, { "epoch": 0.99, "learning_rate": "4.6564e-05", "loss": 0.6979, "slid_loss": 0.7621, "step": 1276, "time": 83.99 }, { "epoch": 0.99, "learning_rate": "4.6558e-05", "loss": 0.7624, "slid_loss": 0.7622, "step": 1277, "time": 82.8 }, { "epoch": 0.99, "learning_rate": "4.6553e-05", "loss": 0.7654, "slid_loss": 0.7619, "step": 1278, "time": 71.35 }, { "epoch": 0.99, "learning_rate": "4.6547e-05", "loss": 0.7501, "slid_loss": 0.7617, "step": 1279, "time": 71.72 }, { "epoch": 0.99, "learning_rate": "4.6542e-05", "loss": 0.7414, "slid_loss": 0.7616, "step": 1280, "time": 70.83 }, { "epoch": 0.99, "learning_rate": "4.6536e-05", "loss": 0.7653, "slid_loss": 0.7617, "step": 1281, "time": 71.83 }, { "epoch": 0.99, "learning_rate": "4.6531e-05", "loss": 0.7549, "slid_loss": 0.7616, "step": 1282, "time": 72.13 }, { "epoch": 0.99, "learning_rate": "4.6525e-05", "loss": 0.7783, "slid_loss": 0.7618, "step": 1283, "time": 70.92 }, { "epoch": 0.99, "learning_rate": "4.6520e-05", "loss": 0.7777, "slid_loss": 0.7618, "step": 1284, "time": 71.25 }, { "epoch": 0.99, "learning_rate": "4.6514e-05", "loss": 0.7293, "slid_loss": 0.7614, "step": 1285, "time": 72.16 }, { "epoch": 0.99, "learning_rate": "4.6509e-05", "loss": 0.7437, "slid_loss": 0.7611, "step": 1286, "time": 71.48 }, { "epoch": 1.0, "learning_rate": "4.6503e-05", "loss": 0.7641, "slid_loss": 0.7616, "step": 1287, "time": 71.54 }, { "epoch": 1.0, "learning_rate": "4.6498e-05", "loss": 0.7371, "slid_loss": 0.7614, "step": 1288, "time": 72.72 }, { "epoch": 1.0, "learning_rate": "4.6492e-05", "loss": 0.7378, "slid_loss": 0.7617, "step": 1289, "time": 71.0 }, { "epoch": 1.0, "learning_rate": "4.6486e-05", "loss": 0.7477, "slid_loss": 0.7617, "step": 1290, "time": 72.17 }, { "epoch": 1.0, "learning_rate": "4.6481e-05", "loss": 0.7689, "slid_loss": 0.7616, "step": 1291, "time": 72.16 }, { "epoch": 1.0, "learning_rate": "4.6475e-05", "loss": 0.7525, "slid_loss": 0.7612, "step": 1292, "time": 71.32 }, { "epoch": 1.0, "learning_rate": "4.6470e-05", "loss": 0.6507, "slid_loss": 0.7598, "step": 1293, "time": 131.39 }, { "epoch": 1.0, "learning_rate": "4.6464e-05", "loss": 0.7571, "slid_loss": 0.7599, "step": 1294, "time": 71.38 }, { "epoch": 1.0, "learning_rate": "4.6459e-05", "loss": 0.759, "slid_loss": 0.7602, "step": 1295, "time": 71.74 }, { "epoch": 1.0, "learning_rate": "4.6453e-05", "loss": 0.778, "slid_loss": 0.7601, "step": 1296, "time": 71.24 }, { "epoch": 1.0, "learning_rate": "4.6447e-05", "loss": 0.7893, "slid_loss": 0.7601, "step": 1297, "time": 71.58 }, { "epoch": 1.0, "learning_rate": "4.6442e-05", "loss": 0.7511, "slid_loss": 0.7597, "step": 1298, "time": 72.88 }, { "epoch": 1.01, "learning_rate": "4.6436e-05", "loss": 0.7554, "slid_loss": 0.7595, "step": 1299, "time": 72.98 }, { "epoch": 1.01, "learning_rate": "4.6431e-05", "loss": 0.7572, "slid_loss": 0.7596, "step": 1300, "time": 71.35 }, { "epoch": 1.01, "learning_rate": "4.6425e-05", "loss": 0.7937, "slid_loss": 0.76, "step": 1301, "time": 71.34 }, { "epoch": 1.01, "learning_rate": "4.6419e-05", "loss": 0.7987, "slid_loss": 0.7602, "step": 1302, "time": 70.87 }, { "epoch": 1.01, "learning_rate": "4.6414e-05", "loss": 0.7289, "slid_loss": 0.76, "step": 1303, "time": 71.37 }, { "epoch": 1.01, "learning_rate": "4.6408e-05", "loss": 0.7893, "slid_loss": 0.7603, "step": 1304, "time": 71.02 }, { "epoch": 1.01, "learning_rate": "4.6403e-05", "loss": 0.7569, "slid_loss": 0.7601, "step": 1305, "time": 71.52 }, { "epoch": 1.01, "learning_rate": "4.6397e-05", "loss": 0.7908, "slid_loss": 0.7606, "step": 1306, "time": 71.11 }, { "epoch": 1.01, "learning_rate": "4.6391e-05", "loss": 0.789, "slid_loss": 0.7614, "step": 1307, "time": 71.44 }, { "epoch": 1.01, "learning_rate": "4.6386e-05", "loss": 0.7672, "slid_loss": 0.7614, "step": 1308, "time": 71.92 }, { "epoch": 1.01, "learning_rate": "4.6380e-05", "loss": 0.7856, "slid_loss": 0.7619, "step": 1309, "time": 90.48 }, { "epoch": 1.01, "learning_rate": "4.6374e-05", "loss": 0.7435, "slid_loss": 0.7614, "step": 1310, "time": 75.22 }, { "epoch": 1.01, "learning_rate": "4.6369e-05", "loss": 0.7697, "slid_loss": 0.7615, "step": 1311, "time": 71.69 }, { "epoch": 1.02, "learning_rate": "4.6363e-05", "loss": 0.7504, "slid_loss": 0.7616, "step": 1312, "time": 71.75 }, { "epoch": 1.02, "learning_rate": "4.6357e-05", "loss": 0.7742, "slid_loss": 0.7618, "step": 1313, "time": 70.74 }, { "epoch": 1.02, "learning_rate": "4.6352e-05", "loss": 0.7055, "slid_loss": 0.7615, "step": 1314, "time": 72.81 }, { "epoch": 1.02, "learning_rate": "4.6346e-05", "loss": 0.7873, "slid_loss": 0.7617, "step": 1315, "time": 71.97 }, { "epoch": 1.02, "learning_rate": "4.6341e-05", "loss": 0.7702, "slid_loss": 0.7617, "step": 1316, "time": 72.95 }, { "epoch": 1.02, "learning_rate": "4.6335e-05", "loss": 0.7865, "slid_loss": 0.7622, "step": 1317, "time": 71.33 }, { "epoch": 1.02, "learning_rate": "4.6329e-05", "loss": 0.7722, "slid_loss": 0.7625, "step": 1318, "time": 71.09 }, { "epoch": 1.02, "learning_rate": "4.6323e-05", "loss": 0.7351, "slid_loss": 0.7621, "step": 1319, "time": 70.87 }, { "epoch": 1.02, "learning_rate": "4.6318e-05", "loss": 0.7608, "slid_loss": 0.7618, "step": 1320, "time": 72.06 }, { "epoch": 1.02, "learning_rate": "4.6312e-05", "loss": 0.7434, "slid_loss": 0.7619, "step": 1321, "time": 71.41 }, { "epoch": 1.02, "learning_rate": "4.6306e-05", "loss": 0.7695, "slid_loss": 0.762, "step": 1322, "time": 71.46 }, { "epoch": 1.02, "learning_rate": "4.6301e-05", "loss": 0.7458, "slid_loss": 0.7617, "step": 1323, "time": 72.57 }, { "epoch": 1.02, "learning_rate": "4.6295e-05", "loss": 0.7557, "slid_loss": 0.7614, "step": 1324, "time": 71.52 }, { "epoch": 1.03, "learning_rate": "4.6289e-05", "loss": 0.7451, "slid_loss": 0.7611, "step": 1325, "time": 71.25 }, { "epoch": 1.03, "learning_rate": "4.6284e-05", "loss": 0.74, "slid_loss": 0.761, "step": 1326, "time": 71.36 }, { "epoch": 1.03, "learning_rate": "4.6278e-05", "loss": 0.7361, "slid_loss": 0.7607, "step": 1327, "time": 72.92 }, { "epoch": 1.03, "learning_rate": "4.6272e-05", "loss": 0.7795, "slid_loss": 0.7606, "step": 1328, "time": 71.68 }, { "epoch": 1.03, "learning_rate": "4.6267e-05", "loss": 0.752, "slid_loss": 0.7605, "step": 1329, "time": 71.54 }, { "epoch": 1.03, "learning_rate": "4.6261e-05", "loss": 0.7633, "slid_loss": 0.7604, "step": 1330, "time": 71.52 }, { "epoch": 1.03, "learning_rate": "4.6255e-05", "loss": 0.7302, "slid_loss": 0.7602, "step": 1331, "time": 71.83 }, { "epoch": 1.03, "learning_rate": "4.6249e-05", "loss": 0.7661, "slid_loss": 0.7606, "step": 1332, "time": 71.99 }, { "epoch": 1.03, "learning_rate": "4.6244e-05", "loss": 0.7344, "slid_loss": 0.7601, "step": 1333, "time": 71.2 }, { "epoch": 1.03, "learning_rate": "4.6238e-05", "loss": 0.7497, "slid_loss": 0.7603, "step": 1334, "time": 72.16 }, { "epoch": 1.03, "learning_rate": "4.6232e-05", "loss": 0.7324, "slid_loss": 0.7599, "step": 1335, "time": 73.54 }, { "epoch": 1.03, "learning_rate": "4.6226e-05", "loss": 0.7612, "slid_loss": 0.7599, "step": 1336, "time": 71.85 }, { "epoch": 1.03, "learning_rate": "4.6221e-05", "loss": 0.7483, "slid_loss": 0.7595, "step": 1337, "time": 71.44 }, { "epoch": 1.04, "learning_rate": "4.6215e-05", "loss": 0.7779, "slid_loss": 0.76, "step": 1338, "time": 72.78 }, { "epoch": 1.04, "learning_rate": "4.6209e-05", "loss": 0.7546, "slid_loss": 0.76, "step": 1339, "time": 72.58 }, { "epoch": 1.04, "learning_rate": "4.6203e-05", "loss": 0.7297, "slid_loss": 0.7599, "step": 1340, "time": 72.03 }, { "epoch": 1.04, "learning_rate": "4.6198e-05", "loss": 0.7592, "slid_loss": 0.7597, "step": 1341, "time": 70.52 }, { "epoch": 1.04, "learning_rate": "4.6192e-05", "loss": 0.7337, "slid_loss": 0.7594, "step": 1342, "time": 71.88 }, { "epoch": 1.04, "learning_rate": "4.6186e-05", "loss": 0.7246, "slid_loss": 0.7592, "step": 1343, "time": 71.19 }, { "epoch": 1.04, "learning_rate": "4.6180e-05", "loss": 0.7687, "slid_loss": 0.7592, "step": 1344, "time": 71.76 }, { "epoch": 1.04, "learning_rate": "4.6175e-05", "loss": 0.7568, "slid_loss": 0.7588, "step": 1345, "time": 71.91 }, { "epoch": 1.04, "learning_rate": "4.6169e-05", "loss": 0.7376, "slid_loss": 0.7587, "step": 1346, "time": 71.47 }, { "epoch": 1.04, "learning_rate": "4.6163e-05", "loss": 0.7484, "slid_loss": 0.7582, "step": 1347, "time": 72.94 }, { "epoch": 1.04, "learning_rate": "4.6157e-05", "loss": 0.723, "slid_loss": 0.7583, "step": 1348, "time": 71.51 }, { "epoch": 1.04, "learning_rate": "4.6151e-05", "loss": 0.7552, "slid_loss": 0.7581, "step": 1349, "time": 71.92 }, { "epoch": 1.04, "learning_rate": "4.6146e-05", "loss": 0.7499, "slid_loss": 0.7581, "step": 1350, "time": 71.99 }, { "epoch": 1.05, "learning_rate": "4.6140e-05", "loss": 0.7577, "slid_loss": 0.7579, "step": 1351, "time": 72.4 }, { "epoch": 1.05, "learning_rate": "4.6134e-05", "loss": 0.8127, "slid_loss": 0.7582, "step": 1352, "time": 71.27 }, { "epoch": 1.05, "learning_rate": "4.6128e-05", "loss": 0.7694, "slid_loss": 0.7578, "step": 1353, "time": 71.51 }, { "epoch": 1.05, "learning_rate": "4.6122e-05", "loss": 0.7282, "slid_loss": 0.7576, "step": 1354, "time": 72.3 }, { "epoch": 1.05, "learning_rate": "4.6117e-05", "loss": 0.7381, "slid_loss": 0.7575, "step": 1355, "time": 72.08 }, { "epoch": 1.05, "learning_rate": "4.6111e-05", "loss": 0.7725, "slid_loss": 0.7576, "step": 1356, "time": 71.32 }, { "epoch": 1.05, "learning_rate": "4.6105e-05", "loss": 0.7515, "slid_loss": 0.7573, "step": 1357, "time": 72.17 }, { "epoch": 1.05, "learning_rate": "4.6099e-05", "loss": 0.744, "slid_loss": 0.7571, "step": 1358, "time": 72.78 }, { "epoch": 1.05, "learning_rate": "4.6093e-05", "loss": 0.7615, "slid_loss": 0.7569, "step": 1359, "time": 71.49 }, { "epoch": 1.05, "learning_rate": "4.6087e-05", "loss": 0.7909, "slid_loss": 0.7575, "step": 1360, "time": 72.31 }, { "epoch": 1.05, "learning_rate": "4.6082e-05", "loss": 0.7904, "slid_loss": 0.7575, "step": 1361, "time": 71.53 }, { "epoch": 1.05, "learning_rate": "4.6076e-05", "loss": 0.708, "slid_loss": 0.7569, "step": 1362, "time": 71.99 }, { "epoch": 1.05, "learning_rate": "4.6070e-05", "loss": 0.7691, "slid_loss": 0.7569, "step": 1363, "time": 71.18 }, { "epoch": 1.06, "learning_rate": "4.6064e-05", "loss": 0.7138, "slid_loss": 0.756, "step": 1364, "time": 70.57 }, { "epoch": 1.06, "learning_rate": "4.6058e-05", "loss": 0.7752, "slid_loss": 0.7559, "step": 1365, "time": 72.2 }, { "epoch": 1.06, "learning_rate": "4.6052e-05", "loss": 0.7546, "slid_loss": 0.7558, "step": 1366, "time": 71.96 }, { "epoch": 1.06, "learning_rate": "4.6046e-05", "loss": 0.7673, "slid_loss": 0.7558, "step": 1367, "time": 72.24 }, { "epoch": 1.06, "learning_rate": "4.6041e-05", "loss": 0.7604, "slid_loss": 0.7561, "step": 1368, "time": 72.42 }, { "epoch": 1.06, "learning_rate": "4.6035e-05", "loss": 0.7796, "slid_loss": 0.7561, "step": 1369, "time": 71.51 }, { "epoch": 1.06, "learning_rate": "4.6029e-05", "loss": 0.7463, "slid_loss": 0.7559, "step": 1370, "time": 70.68 }, { "epoch": 1.06, "learning_rate": "4.6023e-05", "loss": 0.7588, "slid_loss": 0.7559, "step": 1371, "time": 72.13 }, { "epoch": 1.06, "learning_rate": "4.6017e-05", "loss": 0.7407, "slid_loss": 0.7554, "step": 1372, "time": 72.3 }, { "epoch": 1.06, "learning_rate": "4.6011e-05", "loss": 0.7443, "slid_loss": 0.7555, "step": 1373, "time": 72.25 }, { "epoch": 1.06, "learning_rate": "4.6005e-05", "loss": 0.7247, "slid_loss": 0.7552, "step": 1374, "time": 72.69 }, { "epoch": 1.06, "learning_rate": "4.5999e-05", "loss": 0.783, "slid_loss": 0.7554, "step": 1375, "time": 70.48 }, { "epoch": 1.06, "learning_rate": "4.5994e-05", "loss": 0.7545, "slid_loss": 0.756, "step": 1376, "time": 71.57 }, { "epoch": 1.07, "learning_rate": "4.5988e-05", "loss": 0.7504, "slid_loss": 0.7559, "step": 1377, "time": 72.39 }, { "epoch": 1.07, "learning_rate": "4.5982e-05", "loss": 0.7767, "slid_loss": 0.756, "step": 1378, "time": 71.32 }, { "epoch": 1.07, "learning_rate": "4.5976e-05", "loss": 0.7557, "slid_loss": 0.756, "step": 1379, "time": 72.15 }, { "epoch": 1.07, "learning_rate": "4.5970e-05", "loss": 0.7301, "slid_loss": 0.7559, "step": 1380, "time": 71.86 }, { "epoch": 1.07, "learning_rate": "4.5964e-05", "loss": 0.736, "slid_loss": 0.7556, "step": 1381, "time": 70.81 }, { "epoch": 1.07, "learning_rate": "4.5958e-05", "loss": 0.7486, "slid_loss": 0.7556, "step": 1382, "time": 70.71 }, { "epoch": 1.07, "learning_rate": "4.5952e-05", "loss": 0.7573, "slid_loss": 0.7554, "step": 1383, "time": 72.82 }, { "epoch": 1.07, "learning_rate": "4.5946e-05", "loss": 0.7831, "slid_loss": 0.7554, "step": 1384, "time": 73.01 }, { "epoch": 1.07, "learning_rate": "4.5940e-05", "loss": 0.744, "slid_loss": 0.7556, "step": 1385, "time": 71.26 }, { "epoch": 1.07, "learning_rate": "4.5934e-05", "loss": 0.7508, "slid_loss": 0.7556, "step": 1386, "time": 70.79 }, { "epoch": 1.07, "learning_rate": "4.5928e-05", "loss": 0.7461, "slid_loss": 0.7555, "step": 1387, "time": 72.41 }, { "epoch": 1.07, "learning_rate": "4.5922e-05", "loss": 0.751, "slid_loss": 0.7556, "step": 1388, "time": 71.1 }, { "epoch": 1.07, "learning_rate": "4.5917e-05", "loss": 0.7337, "slid_loss": 0.7555, "step": 1389, "time": 72.47 }, { "epoch": 1.08, "learning_rate": "4.5911e-05", "loss": 0.7352, "slid_loss": 0.7554, "step": 1390, "time": 71.57 }, { "epoch": 1.08, "learning_rate": "4.5905e-05", "loss": 0.7838, "slid_loss": 0.7556, "step": 1391, "time": 72.49 }, { "epoch": 1.08, "learning_rate": "4.5899e-05", "loss": 0.7774, "slid_loss": 0.7558, "step": 1392, "time": 71.64 }, { "epoch": 1.08, "learning_rate": "4.5893e-05", "loss": 0.7632, "slid_loss": 0.7569, "step": 1393, "time": 73.59 }, { "epoch": 1.08, "learning_rate": "4.5887e-05", "loss": 0.7506, "slid_loss": 0.7569, "step": 1394, "time": 71.55 }, { "epoch": 1.08, "learning_rate": "4.5881e-05", "loss": 0.7364, "slid_loss": 0.7567, "step": 1395, "time": 72.46 }, { "epoch": 1.08, "learning_rate": "4.5875e-05", "loss": 0.7377, "slid_loss": 0.7563, "step": 1396, "time": 72.17 }, { "epoch": 1.08, "learning_rate": "4.5869e-05", "loss": 0.7628, "slid_loss": 0.756, "step": 1397, "time": 71.53 }, { "epoch": 1.08, "learning_rate": "4.5863e-05", "loss": 0.7453, "slid_loss": 0.7559, "step": 1398, "time": 71.78 }, { "epoch": 1.08, "learning_rate": "4.5857e-05", "loss": 0.7588, "slid_loss": 0.756, "step": 1399, "time": 70.51 }, { "epoch": 1.08, "learning_rate": "4.5851e-05", "loss": 0.7377, "slid_loss": 0.7558, "step": 1400, "time": 72.05 }, { "epoch": 1.08, "learning_rate": "4.5845e-05", "loss": 0.7435, "slid_loss": 0.7553, "step": 1401, "time": 843.17 }, { "epoch": 1.08, "learning_rate": "4.5839e-05", "loss": 0.7363, "slid_loss": 0.7546, "step": 1402, "time": 70.59 }, { "epoch": 1.09, "learning_rate": "4.5833e-05", "loss": 0.7491, "slid_loss": 0.7548, "step": 1403, "time": 72.4 }, { "epoch": 1.09, "learning_rate": "4.5827e-05", "loss": 0.7549, "slid_loss": 0.7545, "step": 1404, "time": 71.71 }, { "epoch": 1.09, "learning_rate": "4.5821e-05", "loss": 0.7528, "slid_loss": 0.7545, "step": 1405, "time": 71.39 }, { "epoch": 1.09, "learning_rate": "4.5815e-05", "loss": 0.7893, "slid_loss": 0.7544, "step": 1406, "time": 72.87 }, { "epoch": 1.09, "learning_rate": "4.5809e-05", "loss": 0.7552, "slid_loss": 0.7541, "step": 1407, "time": 72.65 }, { "epoch": 1.09, "learning_rate": "4.5803e-05", "loss": 0.7507, "slid_loss": 0.7539, "step": 1408, "time": 72.03 }, { "epoch": 1.09, "learning_rate": "4.5797e-05", "loss": 0.6905, "slid_loss": 0.753, "step": 1409, "time": 70.19 }, { "epoch": 1.09, "learning_rate": "4.5791e-05", "loss": 0.7274, "slid_loss": 0.7528, "step": 1410, "time": 72.14 }, { "epoch": 1.09, "learning_rate": "4.5785e-05", "loss": 0.7471, "slid_loss": 0.7526, "step": 1411, "time": 71.05 }, { "epoch": 1.09, "learning_rate": "4.5779e-05", "loss": 0.7677, "slid_loss": 0.7528, "step": 1412, "time": 70.55 }, { "epoch": 1.09, "learning_rate": "4.5773e-05", "loss": 0.74, "slid_loss": 0.7524, "step": 1413, "time": 72.03 }, { "epoch": 1.09, "learning_rate": "4.5767e-05", "loss": 0.7621, "slid_loss": 0.753, "step": 1414, "time": 72.15 }, { "epoch": 1.09, "learning_rate": "4.5761e-05", "loss": 0.736, "slid_loss": 0.7525, "step": 1415, "time": 71.31 }, { "epoch": 1.1, "learning_rate": "4.5755e-05", "loss": 0.7348, "slid_loss": 0.7521, "step": 1416, "time": 72.53 }, { "epoch": 1.1, "learning_rate": "4.5748e-05", "loss": 0.7419, "slid_loss": 0.7517, "step": 1417, "time": 73.5 }, { "epoch": 1.1, "learning_rate": "4.5742e-05", "loss": 0.7285, "slid_loss": 0.7513, "step": 1418, "time": 72.38 }, { "epoch": 1.1, "learning_rate": "4.5736e-05", "loss": 0.7641, "slid_loss": 0.7515, "step": 1419, "time": 71.38 }, { "epoch": 1.1, "learning_rate": "4.5730e-05", "loss": 0.7075, "slid_loss": 0.751, "step": 1420, "time": 72.54 }, { "epoch": 1.1, "learning_rate": "4.5724e-05", "loss": 0.7757, "slid_loss": 0.7513, "step": 1421, "time": 72.15 }, { "epoch": 1.1, "learning_rate": "4.5718e-05", "loss": 0.7406, "slid_loss": 0.751, "step": 1422, "time": 71.06 }, { "epoch": 1.1, "learning_rate": "4.5712e-05", "loss": 0.7606, "slid_loss": 0.7512, "step": 1423, "time": 88.08 }, { "epoch": 1.1, "learning_rate": "4.5706e-05", "loss": 0.7527, "slid_loss": 0.7512, "step": 1424, "time": 116.43 }, { "epoch": 1.1, "learning_rate": "4.5700e-05", "loss": 0.7466, "slid_loss": 0.7512, "step": 1425, "time": 121.79 }, { "epoch": 1.1, "learning_rate": "4.5694e-05", "loss": 0.7195, "slid_loss": 0.751, "step": 1426, "time": 153.86 }, { "epoch": 1.1, "learning_rate": "4.5688e-05", "loss": 0.7503, "slid_loss": 0.7511, "step": 1427, "time": 153.89 }, { "epoch": 1.1, "learning_rate": "4.5682e-05", "loss": 0.7256, "slid_loss": 0.7506, "step": 1428, "time": 155.62 }, { "epoch": 1.11, "learning_rate": "4.5675e-05", "loss": 0.7295, "slid_loss": 0.7503, "step": 1429, "time": 162.44 }, { "epoch": 1.11, "learning_rate": "4.5669e-05", "loss": 0.752, "slid_loss": 0.7502, "step": 1430, "time": 197.15 }, { "epoch": 1.11, "learning_rate": "4.5663e-05", "loss": 0.7536, "slid_loss": 0.7505, "step": 1431, "time": 187.06 }, { "epoch": 1.11, "learning_rate": "4.5657e-05", "loss": 0.759, "slid_loss": 0.7504, "step": 1432, "time": 159.31 }, { "epoch": 1.11, "learning_rate": "4.5651e-05", "loss": 0.7865, "slid_loss": 0.7509, "step": 1433, "time": 178.58 }, { "epoch": 1.11, "learning_rate": "4.5645e-05", "loss": 0.8042, "slid_loss": 0.7515, "step": 1434, "time": 134.77 }, { "epoch": 1.11, "learning_rate": "4.5639e-05", "loss": 0.7304, "slid_loss": 0.7514, "step": 1435, "time": 129.61 }, { "epoch": 1.11, "learning_rate": "4.5633e-05", "loss": 0.7302, "slid_loss": 0.7511, "step": 1436, "time": 72.53 }, { "epoch": 1.11, "learning_rate": "4.5627e-05", "loss": 0.7645, "slid_loss": 0.7513, "step": 1437, "time": 70.59 }, { "epoch": 1.11, "learning_rate": "4.5620e-05", "loss": 0.753, "slid_loss": 0.751, "step": 1438, "time": 71.26 }, { "epoch": 1.11, "learning_rate": "4.5614e-05", "loss": 0.7702, "slid_loss": 0.7512, "step": 1439, "time": 70.07 }, { "epoch": 1.11, "learning_rate": "4.5608e-05", "loss": 0.7695, "slid_loss": 0.7516, "step": 1440, "time": 71.34 }, { "epoch": 1.11, "learning_rate": "4.5602e-05", "loss": 0.7247, "slid_loss": 0.7513, "step": 1441, "time": 71.73 }, { "epoch": 1.12, "learning_rate": "4.5596e-05", "loss": 0.7612, "slid_loss": 0.7515, "step": 1442, "time": 72.79 }, { "epoch": 1.12, "learning_rate": "4.5590e-05", "loss": 0.7584, "slid_loss": 0.7519, "step": 1443, "time": 72.08 }, { "epoch": 1.12, "learning_rate": "4.5584e-05", "loss": 0.7571, "slid_loss": 0.7518, "step": 1444, "time": 72.05 }, { "epoch": 1.12, "learning_rate": "4.5577e-05", "loss": 0.7816, "slid_loss": 0.752, "step": 1445, "time": 70.94 }, { "epoch": 1.12, "learning_rate": "4.5571e-05", "loss": 0.7334, "slid_loss": 0.752, "step": 1446, "time": 72.32 }, { "epoch": 1.12, "learning_rate": "4.5565e-05", "loss": 0.7861, "slid_loss": 0.7523, "step": 1447, "time": 72.69 }, { "epoch": 1.12, "learning_rate": "4.5559e-05", "loss": 0.7485, "slid_loss": 0.7526, "step": 1448, "time": 71.81 }, { "epoch": 1.12, "learning_rate": "4.5553e-05", "loss": 0.7429, "slid_loss": 0.7525, "step": 1449, "time": 72.98 }, { "epoch": 1.12, "learning_rate": "4.5547e-05", "loss": 0.7495, "slid_loss": 0.7525, "step": 1450, "time": 71.47 }, { "epoch": 1.12, "learning_rate": "4.5540e-05", "loss": 0.7681, "slid_loss": 0.7526, "step": 1451, "time": 71.76 }, { "epoch": 1.12, "learning_rate": "4.5534e-05", "loss": 0.7823, "slid_loss": 0.7523, "step": 1452, "time": 73.35 }, { "epoch": 1.12, "learning_rate": "4.5528e-05", "loss": 0.7484, "slid_loss": 0.752, "step": 1453, "time": 71.55 }, { "epoch": 1.12, "learning_rate": "4.5522e-05", "loss": 0.7391, "slid_loss": 0.7522, "step": 1454, "time": 72.35 }, { "epoch": 1.13, "learning_rate": "4.5516e-05", "loss": 0.7518, "slid_loss": 0.7523, "step": 1455, "time": 71.48 }, { "epoch": 1.13, "learning_rate": "4.5509e-05", "loss": 0.7733, "slid_loss": 0.7523, "step": 1456, "time": 71.55 }, { "epoch": 1.13, "learning_rate": "4.5503e-05", "loss": 0.7342, "slid_loss": 0.7521, "step": 1457, "time": 72.28 }, { "epoch": 1.13, "learning_rate": "4.5497e-05", "loss": 0.7941, "slid_loss": 0.7526, "step": 1458, "time": 71.79 }, { "epoch": 1.13, "learning_rate": "4.5491e-05", "loss": 0.7472, "slid_loss": 0.7525, "step": 1459, "time": 71.71 }, { "epoch": 1.13, "learning_rate": "4.5485e-05", "loss": 0.7229, "slid_loss": 0.7518, "step": 1460, "time": 71.54 }, { "epoch": 1.13, "learning_rate": "4.5478e-05", "loss": 0.7518, "slid_loss": 0.7514, "step": 1461, "time": 71.96 }, { "epoch": 1.13, "learning_rate": "4.5472e-05", "loss": 0.7516, "slid_loss": 0.7519, "step": 1462, "time": 72.52 }, { "epoch": 1.13, "learning_rate": "4.5466e-05", "loss": 0.7431, "slid_loss": 0.7516, "step": 1463, "time": 72.21 }, { "epoch": 1.13, "learning_rate": "4.5460e-05", "loss": 0.7698, "slid_loss": 0.7522, "step": 1464, "time": 70.94 }, { "epoch": 1.13, "learning_rate": "4.5453e-05", "loss": 0.755, "slid_loss": 0.752, "step": 1465, "time": 70.9 }, { "epoch": 1.13, "learning_rate": "4.5447e-05", "loss": 0.7307, "slid_loss": 0.7517, "step": 1466, "time": 70.54 }, { "epoch": 1.14, "learning_rate": "4.5441e-05", "loss": 0.7419, "slid_loss": 0.7515, "step": 1467, "time": 70.45 }, { "epoch": 1.14, "learning_rate": "4.5435e-05", "loss": 0.7842, "slid_loss": 0.7517, "step": 1468, "time": 72.0 }, { "epoch": 1.14, "learning_rate": "4.5428e-05", "loss": 0.7756, "slid_loss": 0.7517, "step": 1469, "time": 72.47 }, { "epoch": 1.14, "learning_rate": "4.5422e-05", "loss": 0.7135, "slid_loss": 0.7513, "step": 1470, "time": 72.67 }, { "epoch": 1.14, "learning_rate": "4.5416e-05", "loss": 0.7439, "slid_loss": 0.7512, "step": 1471, "time": 71.0 }, { "epoch": 1.14, "learning_rate": "4.5410e-05", "loss": 0.7509, "slid_loss": 0.7513, "step": 1472, "time": 71.25 }, { "epoch": 1.14, "learning_rate": "4.5403e-05", "loss": 0.7397, "slid_loss": 0.7512, "step": 1473, "time": 72.84 }, { "epoch": 1.14, "learning_rate": "4.5397e-05", "loss": 0.7645, "slid_loss": 0.7516, "step": 1474, "time": 70.69 }, { "epoch": 1.14, "learning_rate": "4.5391e-05", "loss": 0.7428, "slid_loss": 0.7512, "step": 1475, "time": 72.03 }, { "epoch": 1.14, "learning_rate": "4.5385e-05", "loss": 0.7835, "slid_loss": 0.7515, "step": 1476, "time": 71.82 }, { "epoch": 1.14, "learning_rate": "4.5378e-05", "loss": 0.713, "slid_loss": 0.7512, "step": 1477, "time": 71.08 }, { "epoch": 1.14, "learning_rate": "4.5372e-05", "loss": 0.7662, "slid_loss": 0.7511, "step": 1478, "time": 72.57 }, { "epoch": 1.14, "learning_rate": "4.5366e-05", "loss": 0.7604, "slid_loss": 0.7511, "step": 1479, "time": 72.09 }, { "epoch": 1.15, "learning_rate": "4.5359e-05", "loss": 0.7607, "slid_loss": 0.7514, "step": 1480, "time": 70.92 }, { "epoch": 1.15, "learning_rate": "4.5353e-05", "loss": 0.7233, "slid_loss": 0.7513, "step": 1481, "time": 72.44 }, { "epoch": 1.15, "learning_rate": "4.5347e-05", "loss": 0.7454, "slid_loss": 0.7512, "step": 1482, "time": 71.3 }, { "epoch": 1.15, "learning_rate": "4.5341e-05", "loss": 0.7397, "slid_loss": 0.7511, "step": 1483, "time": 71.92 }, { "epoch": 1.15, "learning_rate": "4.5334e-05", "loss": 0.7635, "slid_loss": 0.7509, "step": 1484, "time": 70.85 }, { "epoch": 1.15, "learning_rate": "4.5328e-05", "loss": 0.7485, "slid_loss": 0.7509, "step": 1485, "time": 72.0 }, { "epoch": 1.15, "learning_rate": "4.5322e-05", "loss": 0.7552, "slid_loss": 0.751, "step": 1486, "time": 71.62 }, { "epoch": 1.15, "learning_rate": "4.5315e-05", "loss": 0.7443, "slid_loss": 0.7509, "step": 1487, "time": 72.61 }, { "epoch": 1.15, "learning_rate": "4.5309e-05", "loss": 0.7423, "slid_loss": 0.7509, "step": 1488, "time": 72.47 }, { "epoch": 1.15, "learning_rate": "4.5303e-05", "loss": 0.7465, "slid_loss": 0.751, "step": 1489, "time": 71.16 }, { "epoch": 1.15, "learning_rate": "4.5296e-05", "loss": 0.7541, "slid_loss": 0.7512, "step": 1490, "time": 72.26 }, { "epoch": 1.15, "learning_rate": "4.5290e-05", "loss": 0.7449, "slid_loss": 0.7508, "step": 1491, "time": 70.13 }, { "epoch": 1.15, "learning_rate": "4.5284e-05", "loss": 0.7547, "slid_loss": 0.7506, "step": 1492, "time": 71.73 }, { "epoch": 1.16, "learning_rate": "4.5277e-05", "loss": 0.742, "slid_loss": 0.7503, "step": 1493, "time": 72.0 }, { "epoch": 1.16, "learning_rate": "4.5271e-05", "loss": 0.764, "slid_loss": 0.7505, "step": 1494, "time": 71.47 }, { "epoch": 1.16, "learning_rate": "4.5265e-05", "loss": 0.7517, "slid_loss": 0.7506, "step": 1495, "time": 70.09 }, { "epoch": 1.16, "learning_rate": "4.5258e-05", "loss": 0.7199, "slid_loss": 0.7505, "step": 1496, "time": 71.76 }, { "epoch": 1.16, "learning_rate": "4.5252e-05", "loss": 0.7066, "slid_loss": 0.7499, "step": 1497, "time": 71.65 }, { "epoch": 1.16, "learning_rate": "4.5246e-05", "loss": 0.7764, "slid_loss": 0.7502, "step": 1498, "time": 72.13 }, { "epoch": 1.16, "learning_rate": "4.5239e-05", "loss": 0.7646, "slid_loss": 0.7503, "step": 1499, "time": 72.24 }, { "epoch": 1.16, "learning_rate": "4.5233e-05", "loss": 0.7554, "slid_loss": 0.7504, "step": 1500, "time": 71.21 }, { "epoch": 1.16, "learning_rate": "4.5226e-05", "loss": 0.7539, "slid_loss": 0.7505, "step": 1501, "time": 71.74 }, { "epoch": 1.16, "learning_rate": "4.5220e-05", "loss": 0.7567, "slid_loss": 0.7507, "step": 1502, "time": 71.67 }, { "epoch": 1.16, "learning_rate": "4.5214e-05", "loss": 0.7464, "slid_loss": 0.7507, "step": 1503, "time": 71.86 }, { "epoch": 1.16, "learning_rate": "4.5207e-05", "loss": 0.7489, "slid_loss": 0.7507, "step": 1504, "time": 71.19 }, { "epoch": 1.16, "learning_rate": "4.5201e-05", "loss": 0.7671, "slid_loss": 0.7508, "step": 1505, "time": 70.87 }, { "epoch": 1.17, "learning_rate": "4.5195e-05", "loss": 0.7327, "slid_loss": 0.7502, "step": 1506, "time": 72.23 }, { "epoch": 1.17, "learning_rate": "4.5188e-05", "loss": 0.728, "slid_loss": 0.75, "step": 1507, "time": 73.13 }, { "epoch": 1.17, "learning_rate": "4.5182e-05", "loss": 0.7667, "slid_loss": 0.7501, "step": 1508, "time": 72.56 }, { "epoch": 1.17, "learning_rate": "4.5175e-05", "loss": 0.758, "slid_loss": 0.7508, "step": 1509, "time": 70.77 }, { "epoch": 1.17, "learning_rate": "4.5169e-05", "loss": 0.7218, "slid_loss": 0.7507, "step": 1510, "time": 72.66 }, { "epoch": 1.17, "learning_rate": "4.5163e-05", "loss": 0.7959, "slid_loss": 0.7512, "step": 1511, "time": 72.27 }, { "epoch": 1.17, "learning_rate": "4.5156e-05", "loss": 0.7285, "slid_loss": 0.7508, "step": 1512, "time": 71.67 }, { "epoch": 1.17, "learning_rate": "4.5150e-05", "loss": 0.7692, "slid_loss": 0.7511, "step": 1513, "time": 71.16 }, { "epoch": 1.17, "learning_rate": "4.5143e-05", "loss": 0.7454, "slid_loss": 0.751, "step": 1514, "time": 70.77 }, { "epoch": 1.17, "learning_rate": "4.5137e-05", "loss": 0.751, "slid_loss": 0.7511, "step": 1515, "time": 70.75 }, { "epoch": 1.17, "learning_rate": "4.5130e-05", "loss": 0.765, "slid_loss": 0.7514, "step": 1516, "time": 71.41 }, { "epoch": 1.17, "learning_rate": "4.5124e-05", "loss": 0.7612, "slid_loss": 0.7516, "step": 1517, "time": 71.37 }, { "epoch": 1.17, "learning_rate": "4.5118e-05", "loss": 0.7423, "slid_loss": 0.7518, "step": 1518, "time": 70.81 }, { "epoch": 1.18, "learning_rate": "4.5111e-05", "loss": 0.7081, "slid_loss": 0.7512, "step": 1519, "time": 72.92 }, { "epoch": 1.18, "learning_rate": "4.5105e-05", "loss": 0.7449, "slid_loss": 0.7516, "step": 1520, "time": 70.97 }, { "epoch": 1.18, "learning_rate": "4.5098e-05", "loss": 0.7948, "slid_loss": 0.7518, "step": 1521, "time": 71.72 }, { "epoch": 1.18, "learning_rate": "4.5092e-05", "loss": 0.7652, "slid_loss": 0.752, "step": 1522, "time": 70.9 }, { "epoch": 1.18, "learning_rate": "4.5085e-05", "loss": 0.7374, "slid_loss": 0.7518, "step": 1523, "time": 71.58 }, { "epoch": 1.18, "learning_rate": "4.5079e-05", "loss": 0.7525, "slid_loss": 0.7518, "step": 1524, "time": 71.22 }, { "epoch": 1.18, "learning_rate": "4.5073e-05", "loss": 0.7423, "slid_loss": 0.7517, "step": 1525, "time": 72.61 }, { "epoch": 1.18, "learning_rate": "4.5066e-05", "loss": 0.759, "slid_loss": 0.7521, "step": 1526, "time": 71.57 }, { "epoch": 1.18, "learning_rate": "4.5060e-05", "loss": 0.7485, "slid_loss": 0.7521, "step": 1527, "time": 71.59 }, { "epoch": 1.18, "learning_rate": "4.5053e-05", "loss": 0.73, "slid_loss": 0.7521, "step": 1528, "time": 74.06 }, { "epoch": 1.18, "learning_rate": "4.5047e-05", "loss": 0.7701, "slid_loss": 0.7526, "step": 1529, "time": 72.67 }, { "epoch": 1.18, "learning_rate": "4.5040e-05", "loss": 0.6869, "slid_loss": 0.7519, "step": 1530, "time": 72.44 }, { "epoch": 1.18, "learning_rate": "4.5034e-05", "loss": 0.7528, "slid_loss": 0.7519, "step": 1531, "time": 71.69 }, { "epoch": 1.19, "learning_rate": "4.5027e-05", "loss": 0.7393, "slid_loss": 0.7517, "step": 1532, "time": 70.21 }, { "epoch": 1.19, "learning_rate": "4.5021e-05", "loss": 0.7317, "slid_loss": 0.7512, "step": 1533, "time": 71.34 }, { "epoch": 1.19, "learning_rate": "4.5014e-05", "loss": 0.7183, "slid_loss": 0.7503, "step": 1534, "time": 70.56 }, { "epoch": 1.19, "learning_rate": "4.5008e-05", "loss": 0.7321, "slid_loss": 0.7503, "step": 1535, "time": 73.44 }, { "epoch": 1.19, "learning_rate": "4.5001e-05", "loss": 0.7156, "slid_loss": 0.7502, "step": 1536, "time": 71.29 }, { "epoch": 1.19, "learning_rate": "4.4995e-05", "loss": 0.7526, "slid_loss": 0.75, "step": 1537, "time": 71.69 }, { "epoch": 1.19, "learning_rate": "4.4988e-05", "loss": 0.7822, "slid_loss": 0.7503, "step": 1538, "time": 72.64 }, { "epoch": 1.19, "learning_rate": "4.4982e-05", "loss": 0.7009, "slid_loss": 0.7496, "step": 1539, "time": 71.9 }, { "epoch": 1.19, "learning_rate": "4.4975e-05", "loss": 0.7261, "slid_loss": 0.7492, "step": 1540, "time": 71.14 }, { "epoch": 1.19, "learning_rate": "4.4969e-05", "loss": 0.7355, "slid_loss": 0.7493, "step": 1541, "time": 71.98 }, { "epoch": 1.19, "learning_rate": "4.4962e-05", "loss": 0.7553, "slid_loss": 0.7493, "step": 1542, "time": 71.77 }, { "epoch": 1.19, "learning_rate": "4.4956e-05", "loss": 0.8101, "slid_loss": 0.7498, "step": 1543, "time": 72.78 }, { "epoch": 1.19, "learning_rate": "4.4949e-05", "loss": 0.7549, "slid_loss": 0.7498, "step": 1544, "time": 71.7 }, { "epoch": 1.2, "learning_rate": "4.4943e-05", "loss": 0.7062, "slid_loss": 0.749, "step": 1545, "time": 72.33 }, { "epoch": 1.2, "learning_rate": "4.4936e-05", "loss": 0.722, "slid_loss": 0.7489, "step": 1546, "time": 71.07 }, { "epoch": 1.2, "learning_rate": "4.4930e-05", "loss": 0.7054, "slid_loss": 0.7481, "step": 1547, "time": 71.36 }, { "epoch": 1.2, "learning_rate": "4.4923e-05", "loss": 0.7599, "slid_loss": 0.7482, "step": 1548, "time": 72.63 }, { "epoch": 1.2, "learning_rate": "4.4916e-05", "loss": 0.7684, "slid_loss": 0.7484, "step": 1549, "time": 72.26 }, { "epoch": 1.2, "learning_rate": "4.4910e-05", "loss": 0.7885, "slid_loss": 0.7488, "step": 1550, "time": 71.05 }, { "epoch": 1.2, "learning_rate": "4.4903e-05", "loss": 0.7432, "slid_loss": 0.7486, "step": 1551, "time": 72.31 }, { "epoch": 1.2, "learning_rate": "4.4897e-05", "loss": 0.7597, "slid_loss": 0.7484, "step": 1552, "time": 71.28 }, { "epoch": 1.2, "learning_rate": "4.4890e-05", "loss": 0.7483, "slid_loss": 0.7484, "step": 1553, "time": 71.73 }, { "epoch": 1.2, "learning_rate": "4.4884e-05", "loss": 0.7489, "slid_loss": 0.7485, "step": 1554, "time": 70.98 }, { "epoch": 1.2, "learning_rate": "4.4877e-05", "loss": 0.7549, "slid_loss": 0.7485, "step": 1555, "time": 70.84 }, { "epoch": 1.2, "learning_rate": "4.4871e-05", "loss": 0.8029, "slid_loss": 0.7488, "step": 1556, "time": 71.78 }, { "epoch": 1.2, "learning_rate": "4.4864e-05", "loss": 0.756, "slid_loss": 0.749, "step": 1557, "time": 73.36 }, { "epoch": 1.21, "learning_rate": "4.4857e-05", "loss": 0.7613, "slid_loss": 0.7487, "step": 1558, "time": 71.5 }, { "epoch": 1.21, "learning_rate": "4.4851e-05", "loss": 0.7719, "slid_loss": 0.7489, "step": 1559, "time": 71.29 }, { "epoch": 1.21, "learning_rate": "4.4844e-05", "loss": 0.7558, "slid_loss": 0.7493, "step": 1560, "time": 72.33 }, { "epoch": 1.21, "learning_rate": "4.4838e-05", "loss": 0.7319, "slid_loss": 0.7491, "step": 1561, "time": 72.54 }, { "epoch": 1.21, "learning_rate": "4.4831e-05", "loss": 0.7545, "slid_loss": 0.7491, "step": 1562, "time": 71.92 }, { "epoch": 1.21, "learning_rate": "4.4824e-05", "loss": 0.7261, "slid_loss": 0.7489, "step": 1563, "time": 73.3 }, { "epoch": 1.21, "learning_rate": "4.4818e-05", "loss": 0.7287, "slid_loss": 0.7485, "step": 1564, "time": 71.85 }, { "epoch": 1.21, "learning_rate": "4.4811e-05", "loss": 0.7321, "slid_loss": 0.7483, "step": 1565, "time": 72.28 }, { "epoch": 1.21, "learning_rate": "4.4805e-05", "loss": 0.7194, "slid_loss": 0.7482, "step": 1566, "time": 72.47 }, { "epoch": 1.21, "learning_rate": "4.4798e-05", "loss": 0.7609, "slid_loss": 0.7483, "step": 1567, "time": 72.25 }, { "epoch": 1.21, "learning_rate": "4.4791e-05", "loss": 0.7486, "slid_loss": 0.748, "step": 1568, "time": 71.17 }, { "epoch": 1.21, "learning_rate": "4.4785e-05", "loss": 0.7461, "slid_loss": 0.7477, "step": 1569, "time": 72.62 }, { "epoch": 1.21, "learning_rate": "4.4778e-05", "loss": 0.7225, "slid_loss": 0.7478, "step": 1570, "time": 72.23 }, { "epoch": 1.22, "learning_rate": "4.4772e-05", "loss": 0.7581, "slid_loss": 0.7479, "step": 1571, "time": 72.48 }, { "epoch": 1.22, "learning_rate": "4.4765e-05", "loss": 0.7399, "slid_loss": 0.7478, "step": 1572, "time": 72.8 }, { "epoch": 1.22, "learning_rate": "4.4758e-05", "loss": 0.7352, "slid_loss": 0.7478, "step": 1573, "time": 72.16 }, { "epoch": 1.22, "learning_rate": "4.4752e-05", "loss": 0.738, "slid_loss": 0.7475, "step": 1574, "time": 71.32 }, { "epoch": 1.22, "learning_rate": "4.4745e-05", "loss": 0.7949, "slid_loss": 0.748, "step": 1575, "time": 71.93 }, { "epoch": 1.22, "learning_rate": "4.4738e-05", "loss": 0.7257, "slid_loss": 0.7475, "step": 1576, "time": 72.07 }, { "epoch": 1.22, "learning_rate": "4.4732e-05", "loss": 0.7614, "slid_loss": 0.7479, "step": 1577, "time": 71.44 }, { "epoch": 1.22, "learning_rate": "4.4725e-05", "loss": 0.6952, "slid_loss": 0.7472, "step": 1578, "time": 72.63 }, { "epoch": 1.22, "learning_rate": "4.4719e-05", "loss": 0.7601, "slid_loss": 0.7472, "step": 1579, "time": 70.9 }, { "epoch": 1.22, "learning_rate": "4.4712e-05", "loss": 0.7546, "slid_loss": 0.7472, "step": 1580, "time": 71.7 }, { "epoch": 1.22, "learning_rate": "4.4705e-05", "loss": 0.7674, "slid_loss": 0.7476, "step": 1581, "time": 71.15 }, { "epoch": 1.22, "learning_rate": "4.4699e-05", "loss": 0.6962, "slid_loss": 0.7471, "step": 1582, "time": 83.62 }, { "epoch": 1.22, "learning_rate": "4.4692e-05", "loss": 0.7619, "slid_loss": 0.7473, "step": 1583, "time": 110.4 }, { "epoch": 1.23, "learning_rate": "4.4685e-05", "loss": 0.7611, "slid_loss": 0.7473, "step": 1584, "time": 138.27 }, { "epoch": 1.23, "learning_rate": "4.4679e-05", "loss": 0.7317, "slid_loss": 0.7471, "step": 1585, "time": 123.6 }, { "epoch": 1.23, "learning_rate": "4.4672e-05", "loss": 0.7422, "slid_loss": 0.747, "step": 1586, "time": 126.55 }, { "epoch": 1.23, "learning_rate": "4.4665e-05", "loss": 0.7512, "slid_loss": 0.7471, "step": 1587, "time": 149.81 }, { "epoch": 1.23, "learning_rate": "4.4658e-05", "loss": 0.7776, "slid_loss": 0.7474, "step": 1588, "time": 135.98 }, { "epoch": 1.23, "learning_rate": "4.4652e-05", "loss": 0.7827, "slid_loss": 0.7478, "step": 1589, "time": 191.04 }, { "epoch": 1.23, "learning_rate": "4.4645e-05", "loss": 0.7914, "slid_loss": 0.7482, "step": 1590, "time": 176.71 }, { "epoch": 1.23, "learning_rate": "4.4638e-05", "loss": 0.7594, "slid_loss": 0.7483, "step": 1591, "time": 164.34 }, { "epoch": 1.23, "learning_rate": "4.4632e-05", "loss": 0.7309, "slid_loss": 0.7481, "step": 1592, "time": 134.71 }, { "epoch": 1.23, "learning_rate": "4.4625e-05", "loss": 0.7971, "slid_loss": 0.7486, "step": 1593, "time": 121.18 }, { "epoch": 1.23, "learning_rate": "4.4618e-05", "loss": 0.7201, "slid_loss": 0.7482, "step": 1594, "time": 97.62 }, { "epoch": 1.23, "learning_rate": "4.4612e-05", "loss": 0.7191, "slid_loss": 0.7479, "step": 1595, "time": 95.9 }, { "epoch": 1.23, "learning_rate": "4.4605e-05", "loss": 0.7915, "slid_loss": 0.7486, "step": 1596, "time": 71.3 }, { "epoch": 1.24, "learning_rate": "4.4598e-05", "loss": 0.7934, "slid_loss": 0.7494, "step": 1597, "time": 72.91 }, { "epoch": 1.24, "learning_rate": "4.4591e-05", "loss": 0.7434, "slid_loss": 0.7491, "step": 1598, "time": 71.95 }, { "epoch": 1.24, "learning_rate": "4.4585e-05", "loss": 0.7604, "slid_loss": 0.7491, "step": 1599, "time": 70.86 }, { "epoch": 1.24, "learning_rate": "4.4578e-05", "loss": 0.7354, "slid_loss": 0.7489, "step": 1600, "time": 70.94 }, { "epoch": 1.24, "learning_rate": "4.4571e-05", "loss": 0.7153, "slid_loss": 0.7485, "step": 1601, "time": 852.06 }, { "epoch": 1.24, "learning_rate": "4.4565e-05", "loss": 0.7474, "slid_loss": 0.7484, "step": 1602, "time": 71.74 }, { "epoch": 1.24, "learning_rate": "4.4558e-05", "loss": 0.7333, "slid_loss": 0.7483, "step": 1603, "time": 72.42 }, { "epoch": 1.24, "learning_rate": "4.4551e-05", "loss": 0.7551, "slid_loss": 0.7483, "step": 1604, "time": 71.75 }, { "epoch": 1.24, "learning_rate": "4.4544e-05", "loss": 0.7271, "slid_loss": 0.7479, "step": 1605, "time": 72.26 }, { "epoch": 1.24, "learning_rate": "4.4538e-05", "loss": 0.7576, "slid_loss": 0.7482, "step": 1606, "time": 73.07 }, { "epoch": 1.24, "learning_rate": "4.4531e-05", "loss": 0.7549, "slid_loss": 0.7484, "step": 1607, "time": 71.76 }, { "epoch": 1.24, "learning_rate": "4.4524e-05", "loss": 0.7263, "slid_loss": 0.748, "step": 1608, "time": 71.07 }, { "epoch": 1.24, "learning_rate": "4.4517e-05", "loss": 0.7589, "slid_loss": 0.748, "step": 1609, "time": 71.86 }, { "epoch": 1.25, "learning_rate": "4.4511e-05", "loss": 0.7249, "slid_loss": 0.7481, "step": 1610, "time": 74.59 }, { "epoch": 1.25, "learning_rate": "4.4504e-05", "loss": 0.7608, "slid_loss": 0.7477, "step": 1611, "time": 71.41 }, { "epoch": 1.25, "learning_rate": "4.4497e-05", "loss": 0.7464, "slid_loss": 0.7479, "step": 1612, "time": 71.51 }, { "epoch": 1.25, "learning_rate": "4.4490e-05", "loss": 0.7379, "slid_loss": 0.7476, "step": 1613, "time": 70.9 }, { "epoch": 1.25, "learning_rate": "4.4484e-05", "loss": 0.7143, "slid_loss": 0.7473, "step": 1614, "time": 71.93 }, { "epoch": 1.25, "learning_rate": "4.4477e-05", "loss": 0.7432, "slid_loss": 0.7472, "step": 1615, "time": 72.51 }, { "epoch": 1.25, "learning_rate": "4.4470e-05", "loss": 0.7578, "slid_loss": 0.7471, "step": 1616, "time": 71.49 }, { "epoch": 1.25, "learning_rate": "4.4463e-05", "loss": 0.7344, "slid_loss": 0.7469, "step": 1617, "time": 72.94 }, { "epoch": 1.25, "learning_rate": "4.4456e-05", "loss": 0.7958, "slid_loss": 0.7474, "step": 1618, "time": 70.64 }, { "epoch": 1.25, "learning_rate": "4.4450e-05", "loss": 0.7206, "slid_loss": 0.7475, "step": 1619, "time": 70.52 }, { "epoch": 1.25, "learning_rate": "4.4443e-05", "loss": 0.7569, "slid_loss": 0.7476, "step": 1620, "time": 72.47 }, { "epoch": 1.25, "learning_rate": "4.4436e-05", "loss": 0.726, "slid_loss": 0.7469, "step": 1621, "time": 70.93 }, { "epoch": 1.25, "learning_rate": "4.4429e-05", "loss": 0.773, "slid_loss": 0.747, "step": 1622, "time": 71.72 }, { "epoch": 1.26, "learning_rate": "4.4422e-05", "loss": 0.7233, "slid_loss": 0.7469, "step": 1623, "time": 71.01 }, { "epoch": 1.26, "learning_rate": "4.4416e-05", "loss": 0.7488, "slid_loss": 0.7468, "step": 1624, "time": 74.02 }, { "epoch": 1.26, "learning_rate": "4.4409e-05", "loss": 0.7538, "slid_loss": 0.747, "step": 1625, "time": 72.7 }, { "epoch": 1.26, "learning_rate": "4.4402e-05", "loss": 0.7301, "slid_loss": 0.7467, "step": 1626, "time": 70.96 }, { "epoch": 1.26, "learning_rate": "4.4395e-05", "loss": 0.7332, "slid_loss": 0.7465, "step": 1627, "time": 71.62 }, { "epoch": 1.26, "learning_rate": "4.4388e-05", "loss": 0.7517, "slid_loss": 0.7467, "step": 1628, "time": 72.53 }, { "epoch": 1.26, "learning_rate": "4.4381e-05", "loss": 0.7276, "slid_loss": 0.7463, "step": 1629, "time": 71.87 }, { "epoch": 1.26, "learning_rate": "4.4375e-05", "loss": 0.7654, "slid_loss": 0.7471, "step": 1630, "time": 70.41 }, { "epoch": 1.26, "learning_rate": "4.4368e-05", "loss": 0.7541, "slid_loss": 0.7471, "step": 1631, "time": 71.83 }, { "epoch": 1.26, "learning_rate": "4.4361e-05", "loss": 0.7698, "slid_loss": 0.7474, "step": 1632, "time": 71.77 }, { "epoch": 1.26, "learning_rate": "4.4354e-05", "loss": 0.7014, "slid_loss": 0.7471, "step": 1633, "time": 72.83 }, { "epoch": 1.26, "learning_rate": "4.4347e-05", "loss": 0.7523, "slid_loss": 0.7475, "step": 1634, "time": 72.19 }, { "epoch": 1.26, "learning_rate": "4.4340e-05", "loss": 0.6939, "slid_loss": 0.7471, "step": 1635, "time": 72.49 }, { "epoch": 1.27, "learning_rate": "4.4334e-05", "loss": 0.7339, "slid_loss": 0.7473, "step": 1636, "time": 71.27 }, { "epoch": 1.27, "learning_rate": "4.4327e-05", "loss": 0.74, "slid_loss": 0.7471, "step": 1637, "time": 71.57 }, { "epoch": 1.27, "learning_rate": "4.4320e-05", "loss": 0.7489, "slid_loss": 0.7468, "step": 1638, "time": 71.64 }, { "epoch": 1.27, "learning_rate": "4.4313e-05", "loss": 0.7118, "slid_loss": 0.7469, "step": 1639, "time": 70.52 }, { "epoch": 1.27, "learning_rate": "4.4306e-05", "loss": 0.7167, "slid_loss": 0.7468, "step": 1640, "time": 72.2 }, { "epoch": 1.27, "learning_rate": "4.4299e-05", "loss": 0.7732, "slid_loss": 0.7472, "step": 1641, "time": 72.36 }, { "epoch": 1.27, "learning_rate": "4.4292e-05", "loss": 0.7756, "slid_loss": 0.7474, "step": 1642, "time": 71.37 }, { "epoch": 1.27, "learning_rate": "4.4286e-05", "loss": 0.7141, "slid_loss": 0.7464, "step": 1643, "time": 70.61 }, { "epoch": 1.27, "learning_rate": "4.4279e-05", "loss": 0.7731, "slid_loss": 0.7466, "step": 1644, "time": 71.28 }, { "epoch": 1.27, "learning_rate": "4.4272e-05", "loss": 0.7382, "slid_loss": 0.7469, "step": 1645, "time": 71.68 }, { "epoch": 1.27, "learning_rate": "4.4265e-05", "loss": 0.7429, "slid_loss": 0.7471, "step": 1646, "time": 71.91 }, { "epoch": 1.27, "learning_rate": "4.4258e-05", "loss": 0.7408, "slid_loss": 0.7475, "step": 1647, "time": 71.93 }, { "epoch": 1.28, "learning_rate": "4.4251e-05", "loss": 0.7205, "slid_loss": 0.7471, "step": 1648, "time": 71.87 }, { "epoch": 1.28, "learning_rate": "4.4244e-05", "loss": 0.7673, "slid_loss": 0.7471, "step": 1649, "time": 72.21 }, { "epoch": 1.28, "learning_rate": "4.4237e-05", "loss": 0.7155, "slid_loss": 0.7464, "step": 1650, "time": 72.97 }, { "epoch": 1.28, "learning_rate": "4.4230e-05", "loss": 0.7609, "slid_loss": 0.7465, "step": 1651, "time": 72.43 }, { "epoch": 1.28, "learning_rate": "4.4224e-05", "loss": 0.7389, "slid_loss": 0.7463, "step": 1652, "time": 71.29 }, { "epoch": 1.28, "learning_rate": "4.4217e-05", "loss": 0.7428, "slid_loss": 0.7463, "step": 1653, "time": 71.45 }, { "epoch": 1.28, "learning_rate": "4.4210e-05", "loss": 0.7355, "slid_loss": 0.7461, "step": 1654, "time": 70.97 }, { "epoch": 1.28, "learning_rate": "4.4203e-05", "loss": 0.768, "slid_loss": 0.7463, "step": 1655, "time": 70.6 }, { "epoch": 1.28, "learning_rate": "4.4196e-05", "loss": 0.7555, "slid_loss": 0.7458, "step": 1656, "time": 71.18 }, { "epoch": 1.28, "learning_rate": "4.4189e-05", "loss": 0.729, "slid_loss": 0.7455, "step": 1657, "time": 71.29 }, { "epoch": 1.28, "learning_rate": "4.4182e-05", "loss": 0.7651, "slid_loss": 0.7456, "step": 1658, "time": 71.53 }, { "epoch": 1.28, "learning_rate": "4.4175e-05", "loss": 0.7401, "slid_loss": 0.7452, "step": 1659, "time": 71.51 }, { "epoch": 1.28, "learning_rate": "4.4168e-05", "loss": 0.7455, "slid_loss": 0.7451, "step": 1660, "time": 70.62 }, { "epoch": 1.29, "learning_rate": "4.4161e-05", "loss": 0.7734, "slid_loss": 0.7456, "step": 1661, "time": 71.15 }, { "epoch": 1.29, "learning_rate": "4.4154e-05", "loss": 0.7585, "slid_loss": 0.7456, "step": 1662, "time": 72.49 }, { "epoch": 1.29, "learning_rate": "4.4147e-05", "loss": 0.6963, "slid_loss": 0.7453, "step": 1663, "time": 70.93 }, { "epoch": 1.29, "learning_rate": "4.4140e-05", "loss": 0.7456, "slid_loss": 0.7455, "step": 1664, "time": 71.66 }, { "epoch": 1.29, "learning_rate": "4.4133e-05", "loss": 0.7175, "slid_loss": 0.7453, "step": 1665, "time": 71.53 }, { "epoch": 1.29, "learning_rate": "4.4126e-05", "loss": 0.7735, "slid_loss": 0.7459, "step": 1666, "time": 71.81 }, { "epoch": 1.29, "learning_rate": "4.4120e-05", "loss": 0.7456, "slid_loss": 0.7457, "step": 1667, "time": 70.79 }, { "epoch": 1.29, "learning_rate": "4.4113e-05", "loss": 0.7641, "slid_loss": 0.7459, "step": 1668, "time": 71.82 }, { "epoch": 1.29, "learning_rate": "4.4106e-05", "loss": 0.7624, "slid_loss": 0.746, "step": 1669, "time": 71.34 }, { "epoch": 1.29, "learning_rate": "4.4099e-05", "loss": 0.7272, "slid_loss": 0.7461, "step": 1670, "time": 71.16 }, { "epoch": 1.29, "learning_rate": "4.4092e-05", "loss": 0.754, "slid_loss": 0.746, "step": 1671, "time": 70.88 }, { "epoch": 1.29, "learning_rate": "4.4085e-05", "loss": 0.7254, "slid_loss": 0.7459, "step": 1672, "time": 72.12 }, { "epoch": 1.29, "learning_rate": "4.4078e-05", "loss": 0.7781, "slid_loss": 0.7463, "step": 1673, "time": 71.43 }, { "epoch": 1.3, "learning_rate": "4.4071e-05", "loss": 0.7301, "slid_loss": 0.7462, "step": 1674, "time": 71.76 }, { "epoch": 1.3, "learning_rate": "4.4064e-05", "loss": 0.707, "slid_loss": 0.7454, "step": 1675, "time": 71.96 }, { "epoch": 1.3, "learning_rate": "4.4057e-05", "loss": 0.7346, "slid_loss": 0.7455, "step": 1676, "time": 71.85 }, { "epoch": 1.3, "learning_rate": "4.4050e-05", "loss": 0.7421, "slid_loss": 0.7453, "step": 1677, "time": 71.04 }, { "epoch": 1.3, "learning_rate": "4.4043e-05", "loss": 0.7412, "slid_loss": 0.7457, "step": 1678, "time": 70.92 }, { "epoch": 1.3, "learning_rate": "4.4036e-05", "loss": 0.7418, "slid_loss": 0.7455, "step": 1679, "time": 71.55 }, { "epoch": 1.3, "learning_rate": "4.4029e-05", "loss": 0.7621, "slid_loss": 0.7456, "step": 1680, "time": 71.94 }, { "epoch": 1.3, "learning_rate": "4.4022e-05", "loss": 0.7369, "slid_loss": 0.7453, "step": 1681, "time": 72.48 }, { "epoch": 1.3, "learning_rate": "4.4015e-05", "loss": 0.759, "slid_loss": 0.7459, "step": 1682, "time": 72.01 }, { "epoch": 1.3, "learning_rate": "4.4008e-05", "loss": 0.7576, "slid_loss": 0.7459, "step": 1683, "time": 71.05 }, { "epoch": 1.3, "learning_rate": "4.4001e-05", "loss": 0.7355, "slid_loss": 0.7456, "step": 1684, "time": 72.36 }, { "epoch": 1.3, "learning_rate": "4.3994e-05", "loss": 0.7221, "slid_loss": 0.7455, "step": 1685, "time": 72.47 }, { "epoch": 1.3, "learning_rate": "4.3987e-05", "loss": 0.7198, "slid_loss": 0.7453, "step": 1686, "time": 71.68 }, { "epoch": 1.31, "learning_rate": "4.3980e-05", "loss": 0.7655, "slid_loss": 0.7455, "step": 1687, "time": 72.24 }, { "epoch": 1.31, "learning_rate": "4.3973e-05", "loss": 0.7255, "slid_loss": 0.7449, "step": 1688, "time": 72.45 }, { "epoch": 1.31, "learning_rate": "4.3966e-05", "loss": 0.739, "slid_loss": 0.7445, "step": 1689, "time": 72.29 }, { "epoch": 1.31, "learning_rate": "4.3959e-05", "loss": 0.7287, "slid_loss": 0.7439, "step": 1690, "time": 72.78 }, { "epoch": 1.31, "learning_rate": "4.3952e-05", "loss": 0.7395, "slid_loss": 0.7437, "step": 1691, "time": 71.92 }, { "epoch": 1.31, "learning_rate": "4.3945e-05", "loss": 0.7623, "slid_loss": 0.744, "step": 1692, "time": 71.24 }, { "epoch": 1.31, "learning_rate": "4.3937e-05", "loss": 0.7154, "slid_loss": 0.7432, "step": 1693, "time": 71.99 }, { "epoch": 1.31, "learning_rate": "4.3930e-05", "loss": 0.7378, "slid_loss": 0.7434, "step": 1694, "time": 71.51 }, { "epoch": 1.31, "learning_rate": "4.3923e-05", "loss": 0.7281, "slid_loss": 0.7434, "step": 1695, "time": 72.57 }, { "epoch": 1.31, "learning_rate": "4.3916e-05", "loss": 0.715, "slid_loss": 0.7427, "step": 1696, "time": 110.06 }, { "epoch": 1.31, "learning_rate": "4.3909e-05", "loss": 0.718, "slid_loss": 0.7419, "step": 1697, "time": 73.3 }, { "epoch": 1.31, "learning_rate": "4.3902e-05", "loss": 0.735, "slid_loss": 0.7418, "step": 1698, "time": 72.76 }, { "epoch": 1.31, "learning_rate": "4.3895e-05", "loss": 0.7676, "slid_loss": 0.7419, "step": 1699, "time": 72.71 }, { "epoch": 1.32, "learning_rate": "4.3888e-05", "loss": 0.7308, "slid_loss": 0.7419, "step": 1700, "time": 72.93 }, { "epoch": 1.32, "learning_rate": "4.3881e-05", "loss": 0.7386, "slid_loss": 0.7421, "step": 1701, "time": 71.71 }, { "epoch": 1.32, "learning_rate": "4.3874e-05", "loss": 0.6721, "slid_loss": 0.7413, "step": 1702, "time": 72.26 }, { "epoch": 1.32, "learning_rate": "4.3867e-05", "loss": 0.7692, "slid_loss": 0.7417, "step": 1703, "time": 71.81 }, { "epoch": 1.32, "learning_rate": "4.3860e-05", "loss": 0.7335, "slid_loss": 0.7415, "step": 1704, "time": 72.21 }, { "epoch": 1.32, "learning_rate": "4.3853e-05", "loss": 0.764, "slid_loss": 0.7419, "step": 1705, "time": 73.24 }, { "epoch": 1.32, "learning_rate": "4.3846e-05", "loss": 0.748, "slid_loss": 0.7418, "step": 1706, "time": 71.88 }, { "epoch": 1.32, "learning_rate": "4.3838e-05", "loss": 0.7264, "slid_loss": 0.7415, "step": 1707, "time": 71.97 }, { "epoch": 1.32, "learning_rate": "4.3831e-05", "loss": 0.7283, "slid_loss": 0.7415, "step": 1708, "time": 71.83 }, { "epoch": 1.32, "learning_rate": "4.3824e-05", "loss": 0.7562, "slid_loss": 0.7415, "step": 1709, "time": 71.71 }, { "epoch": 1.32, "learning_rate": "4.3817e-05", "loss": 0.7517, "slid_loss": 0.7417, "step": 1710, "time": 71.78 }, { "epoch": 1.32, "learning_rate": "4.3810e-05", "loss": 0.7132, "slid_loss": 0.7413, "step": 1711, "time": 70.56 }, { "epoch": 1.32, "learning_rate": "4.3803e-05", "loss": 0.7613, "slid_loss": 0.7414, "step": 1712, "time": 72.56 }, { "epoch": 1.33, "learning_rate": "4.3796e-05", "loss": 0.7338, "slid_loss": 0.7414, "step": 1713, "time": 70.82 }, { "epoch": 1.33, "learning_rate": "4.3789e-05", "loss": 0.7793, "slid_loss": 0.742, "step": 1714, "time": 72.58 }, { "epoch": 1.33, "learning_rate": "4.3782e-05", "loss": 0.7369, "slid_loss": 0.742, "step": 1715, "time": 71.54 }, { "epoch": 1.33, "learning_rate": "4.3775e-05", "loss": 0.7216, "slid_loss": 0.7416, "step": 1716, "time": 70.86 }, { "epoch": 1.33, "learning_rate": "4.3767e-05", "loss": 0.7573, "slid_loss": 0.7418, "step": 1717, "time": 71.79 }, { "epoch": 1.33, "learning_rate": "4.3760e-05", "loss": 0.7395, "slid_loss": 0.7413, "step": 1718, "time": 72.42 }, { "epoch": 1.33, "learning_rate": "4.3753e-05", "loss": 0.7353, "slid_loss": 0.7414, "step": 1719, "time": 73.17 }, { "epoch": 1.33, "learning_rate": "4.3746e-05", "loss": 0.7531, "slid_loss": 0.7414, "step": 1720, "time": 71.68 }, { "epoch": 1.33, "learning_rate": "4.3739e-05", "loss": 0.7486, "slid_loss": 0.7416, "step": 1721, "time": 71.47 }, { "epoch": 1.33, "learning_rate": "4.3732e-05", "loss": 0.7444, "slid_loss": 0.7413, "step": 1722, "time": 70.79 }, { "epoch": 1.33, "learning_rate": "4.3725e-05", "loss": 0.7869, "slid_loss": 0.742, "step": 1723, "time": 71.65 }, { "epoch": 1.33, "learning_rate": "4.3717e-05", "loss": 0.6867, "slid_loss": 0.7413, "step": 1724, "time": 71.93 }, { "epoch": 1.33, "learning_rate": "4.3710e-05", "loss": 0.7323, "slid_loss": 0.7411, "step": 1725, "time": 71.51 }, { "epoch": 1.34, "learning_rate": "4.3703e-05", "loss": 0.7391, "slid_loss": 0.7412, "step": 1726, "time": 71.51 }, { "epoch": 1.34, "learning_rate": "4.3696e-05", "loss": 0.738, "slid_loss": 0.7413, "step": 1727, "time": 72.13 }, { "epoch": 1.34, "learning_rate": "4.3689e-05", "loss": 0.7228, "slid_loss": 0.741, "step": 1728, "time": 70.61 }, { "epoch": 1.34, "learning_rate": "4.3682e-05", "loss": 0.7251, "slid_loss": 0.7409, "step": 1729, "time": 72.48 }, { "epoch": 1.34, "learning_rate": "4.3674e-05", "loss": 0.75, "slid_loss": 0.7408, "step": 1730, "time": 71.06 }, { "epoch": 1.34, "learning_rate": "4.3667e-05", "loss": 0.7372, "slid_loss": 0.7406, "step": 1731, "time": 70.96 }, { "epoch": 1.34, "learning_rate": "4.3660e-05", "loss": 0.7244, "slid_loss": 0.7402, "step": 1732, "time": 72.64 }, { "epoch": 1.34, "learning_rate": "4.3653e-05", "loss": 0.759, "slid_loss": 0.7407, "step": 1733, "time": 71.47 }, { "epoch": 1.34, "learning_rate": "4.3646e-05", "loss": 0.7702, "slid_loss": 0.7409, "step": 1734, "time": 72.08 }, { "epoch": 1.34, "learning_rate": "4.3639e-05", "loss": 0.7461, "slid_loss": 0.7414, "step": 1735, "time": 71.55 }, { "epoch": 1.34, "learning_rate": "4.3631e-05", "loss": 0.7788, "slid_loss": 0.7419, "step": 1736, "time": 72.25 }, { "epoch": 1.34, "learning_rate": "4.3624e-05", "loss": 0.7379, "slid_loss": 0.7419, "step": 1737, "time": 72.97 }, { "epoch": 1.34, "learning_rate": "4.3617e-05", "loss": 0.7356, "slid_loss": 0.7417, "step": 1738, "time": 72.37 }, { "epoch": 1.35, "learning_rate": "4.3610e-05", "loss": 0.7699, "slid_loss": 0.7423, "step": 1739, "time": 71.18 }, { "epoch": 1.35, "learning_rate": "4.3603e-05", "loss": 0.723, "slid_loss": 0.7424, "step": 1740, "time": 71.63 }, { "epoch": 1.35, "learning_rate": "4.3595e-05", "loss": 0.711, "slid_loss": 0.7418, "step": 1741, "time": 95.81 }, { "epoch": 1.35, "learning_rate": "4.3588e-05", "loss": 0.7481, "slid_loss": 0.7415, "step": 1742, "time": 83.04 }, { "epoch": 1.35, "learning_rate": "4.3581e-05", "loss": 0.7612, "slid_loss": 0.742, "step": 1743, "time": 124.05 }, { "epoch": 1.35, "learning_rate": "4.3574e-05", "loss": 0.7636, "slid_loss": 0.7419, "step": 1744, "time": 122.13 }, { "epoch": 1.35, "learning_rate": "4.3567e-05", "loss": 0.7496, "slid_loss": 0.742, "step": 1745, "time": 138.98 }, { "epoch": 1.35, "learning_rate": "4.3559e-05", "loss": 0.7929, "slid_loss": 0.7425, "step": 1746, "time": 175.35 }, { "epoch": 1.35, "learning_rate": "4.3552e-05", "loss": 0.7689, "slid_loss": 0.7428, "step": 1747, "time": 162.97 }, { "epoch": 1.35, "learning_rate": "4.3545e-05", "loss": 0.752, "slid_loss": 0.7431, "step": 1748, "time": 175.85 }, { "epoch": 1.35, "learning_rate": "4.3538e-05", "loss": 0.7566, "slid_loss": 0.743, "step": 1749, "time": 170.32 }, { "epoch": 1.35, "learning_rate": "4.3530e-05", "loss": 0.726, "slid_loss": 0.7431, "step": 1750, "time": 135.95 }, { "epoch": 1.35, "learning_rate": "4.3523e-05", "loss": 0.7283, "slid_loss": 0.7427, "step": 1751, "time": 151.47 }, { "epoch": 1.36, "learning_rate": "4.3516e-05", "loss": 0.7731, "slid_loss": 0.7431, "step": 1752, "time": 131.39 }, { "epoch": 1.36, "learning_rate": "4.3509e-05", "loss": 0.7468, "slid_loss": 0.7431, "step": 1753, "time": 97.93 }, { "epoch": 1.36, "learning_rate": "4.3501e-05", "loss": 0.741, "slid_loss": 0.7432, "step": 1754, "time": 95.24 }, { "epoch": 1.36, "learning_rate": "4.3494e-05", "loss": 0.7459, "slid_loss": 0.743, "step": 1755, "time": 89.54 }, { "epoch": 1.36, "learning_rate": "4.3487e-05", "loss": 0.7231, "slid_loss": 0.7426, "step": 1756, "time": 73.31 }, { "epoch": 1.36, "learning_rate": "4.3480e-05", "loss": 0.7678, "slid_loss": 0.743, "step": 1757, "time": 72.49 }, { "epoch": 1.36, "learning_rate": "4.3472e-05", "loss": 0.7179, "slid_loss": 0.7425, "step": 1758, "time": 70.78 }, { "epoch": 1.36, "learning_rate": "4.3465e-05", "loss": 0.7107, "slid_loss": 0.7423, "step": 1759, "time": 72.31 }, { "epoch": 1.36, "learning_rate": "4.3458e-05", "loss": 0.7377, "slid_loss": 0.7422, "step": 1760, "time": 71.24 }, { "epoch": 1.36, "learning_rate": "4.3451e-05", "loss": 0.7094, "slid_loss": 0.7415, "step": 1761, "time": 71.91 }, { "epoch": 1.36, "learning_rate": "4.3443e-05", "loss": 0.7443, "slid_loss": 0.7414, "step": 1762, "time": 70.7 }, { "epoch": 1.36, "learning_rate": "4.3436e-05", "loss": 0.7489, "slid_loss": 0.7419, "step": 1763, "time": 74.24 }, { "epoch": 1.36, "learning_rate": "4.3429e-05", "loss": 0.7213, "slid_loss": 0.7417, "step": 1764, "time": 71.81 }, { "epoch": 1.37, "learning_rate": "4.3422e-05", "loss": 0.7273, "slid_loss": 0.7418, "step": 1765, "time": 71.12 }, { "epoch": 1.37, "learning_rate": "4.3414e-05", "loss": 0.755, "slid_loss": 0.7416, "step": 1766, "time": 71.94 }, { "epoch": 1.37, "learning_rate": "4.3407e-05", "loss": 0.732, "slid_loss": 0.7415, "step": 1767, "time": 70.89 }, { "epoch": 1.37, "learning_rate": "4.3400e-05", "loss": 0.7548, "slid_loss": 0.7414, "step": 1768, "time": 71.36 }, { "epoch": 1.37, "learning_rate": "4.3392e-05", "loss": 0.7766, "slid_loss": 0.7415, "step": 1769, "time": 71.96 }, { "epoch": 1.37, "learning_rate": "4.3385e-05", "loss": 0.7595, "slid_loss": 0.7418, "step": 1770, "time": 72.07 }, { "epoch": 1.37, "learning_rate": "4.3378e-05", "loss": 0.7623, "slid_loss": 0.7419, "step": 1771, "time": 73.26 }, { "epoch": 1.37, "learning_rate": "4.3370e-05", "loss": 0.7433, "slid_loss": 0.7421, "step": 1772, "time": 70.69 }, { "epoch": 1.37, "learning_rate": "4.3363e-05", "loss": 0.7304, "slid_loss": 0.7416, "step": 1773, "time": 71.99 }, { "epoch": 1.37, "learning_rate": "4.3356e-05", "loss": 0.7437, "slid_loss": 0.7417, "step": 1774, "time": 70.83 }, { "epoch": 1.37, "learning_rate": "4.3349e-05", "loss": 0.7249, "slid_loss": 0.7419, "step": 1775, "time": 70.62 }, { "epoch": 1.37, "learning_rate": "4.3341e-05", "loss": 0.7398, "slid_loss": 0.742, "step": 1776, "time": 71.38 }, { "epoch": 1.37, "learning_rate": "4.3334e-05", "loss": 0.7324, "slid_loss": 0.7419, "step": 1777, "time": 73.51 }, { "epoch": 1.38, "learning_rate": "4.3327e-05", "loss": 0.7268, "slid_loss": 0.7417, "step": 1778, "time": 71.13 }, { "epoch": 1.38, "learning_rate": "4.3319e-05", "loss": 0.7544, "slid_loss": 0.7419, "step": 1779, "time": 72.39 }, { "epoch": 1.38, "learning_rate": "4.3312e-05", "loss": 0.7788, "slid_loss": 0.742, "step": 1780, "time": 71.86 }, { "epoch": 1.38, "learning_rate": "4.3305e-05", "loss": 0.7538, "slid_loss": 0.7422, "step": 1781, "time": 72.56 }, { "epoch": 1.38, "learning_rate": "4.3297e-05", "loss": 0.767, "slid_loss": 0.7423, "step": 1782, "time": 69.8 }, { "epoch": 1.38, "learning_rate": "4.3290e-05", "loss": 0.743, "slid_loss": 0.7421, "step": 1783, "time": 70.87 }, { "epoch": 1.38, "learning_rate": "4.3283e-05", "loss": 0.7112, "slid_loss": 0.7419, "step": 1784, "time": 72.86 }, { "epoch": 1.38, "learning_rate": "4.3275e-05", "loss": 0.7629, "slid_loss": 0.7423, "step": 1785, "time": 70.18 }, { "epoch": 1.38, "learning_rate": "4.3268e-05", "loss": 0.7355, "slid_loss": 0.7425, "step": 1786, "time": 71.73 }, { "epoch": 1.38, "learning_rate": "4.3260e-05", "loss": 0.7638, "slid_loss": 0.7424, "step": 1787, "time": 70.93 }, { "epoch": 1.38, "learning_rate": "4.3253e-05", "loss": 0.7402, "slid_loss": 0.7426, "step": 1788, "time": 71.95 }, { "epoch": 1.38, "learning_rate": "4.3246e-05", "loss": 0.7163, "slid_loss": 0.7424, "step": 1789, "time": 71.29 }, { "epoch": 1.38, "learning_rate": "4.3238e-05", "loss": 0.7232, "slid_loss": 0.7423, "step": 1790, "time": 71.09 }, { "epoch": 1.39, "learning_rate": "4.3231e-05", "loss": 0.7472, "slid_loss": 0.7424, "step": 1791, "time": 72.7 }, { "epoch": 1.39, "learning_rate": "4.3224e-05", "loss": 0.7285, "slid_loss": 0.742, "step": 1792, "time": 71.04 }, { "epoch": 1.39, "learning_rate": "4.3216e-05", "loss": 0.7016, "slid_loss": 0.7419, "step": 1793, "time": 73.91 }, { "epoch": 1.39, "learning_rate": "4.3209e-05", "loss": 0.7314, "slid_loss": 0.7418, "step": 1794, "time": 72.77 }, { "epoch": 1.39, "learning_rate": "4.3202e-05", "loss": 0.7875, "slid_loss": 0.7424, "step": 1795, "time": 72.76 }, { "epoch": 1.39, "learning_rate": "4.3194e-05", "loss": 0.7469, "slid_loss": 0.7427, "step": 1796, "time": 72.06 }, { "epoch": 1.39, "learning_rate": "4.3187e-05", "loss": 0.7727, "slid_loss": 0.7433, "step": 1797, "time": 71.22 }, { "epoch": 1.39, "learning_rate": "4.3179e-05", "loss": 0.7629, "slid_loss": 0.7436, "step": 1798, "time": 71.77 }, { "epoch": 1.39, "learning_rate": "4.3172e-05", "loss": 0.7216, "slid_loss": 0.7431, "step": 1799, "time": 71.45 }, { "epoch": 1.39, "learning_rate": "4.3165e-05", "loss": 0.7164, "slid_loss": 0.743, "step": 1800, "time": 73.39 }, { "epoch": 1.39, "learning_rate": "4.3157e-05", "loss": 0.7554, "slid_loss": 0.7431, "step": 1801, "time": 752.76 }, { "epoch": 1.39, "learning_rate": "4.3150e-05", "loss": 0.7331, "slid_loss": 0.7438, "step": 1802, "time": 72.44 }, { "epoch": 1.39, "learning_rate": "4.3142e-05", "loss": 0.7269, "slid_loss": 0.7433, "step": 1803, "time": 71.64 }, { "epoch": 1.4, "learning_rate": "4.3135e-05", "loss": 0.7398, "slid_loss": 0.7434, "step": 1804, "time": 72.14 }, { "epoch": 1.4, "learning_rate": "4.3128e-05", "loss": 0.7377, "slid_loss": 0.7431, "step": 1805, "time": 71.97 }, { "epoch": 1.4, "learning_rate": "4.3120e-05", "loss": 0.74, "slid_loss": 0.743, "step": 1806, "time": 71.9 }, { "epoch": 1.4, "learning_rate": "4.3113e-05", "loss": 0.7294, "slid_loss": 0.7431, "step": 1807, "time": 71.64 }, { "epoch": 1.4, "learning_rate": "4.3105e-05", "loss": 0.7695, "slid_loss": 0.7435, "step": 1808, "time": 72.77 }, { "epoch": 1.4, "learning_rate": "4.3098e-05", "loss": 0.7444, "slid_loss": 0.7434, "step": 1809, "time": 71.21 }, { "epoch": 1.4, "learning_rate": "4.3090e-05", "loss": 0.7096, "slid_loss": 0.7429, "step": 1810, "time": 71.79 }, { "epoch": 1.4, "learning_rate": "4.3083e-05", "loss": 0.7315, "slid_loss": 0.7431, "step": 1811, "time": 71.22 }, { "epoch": 1.4, "learning_rate": "4.3076e-05", "loss": 0.7507, "slid_loss": 0.743, "step": 1812, "time": 71.86 }, { "epoch": 1.4, "learning_rate": "4.3068e-05", "loss": 0.7302, "slid_loss": 0.743, "step": 1813, "time": 71.53 }, { "epoch": 1.4, "learning_rate": "4.3061e-05", "loss": 0.7586, "slid_loss": 0.7428, "step": 1814, "time": 72.67 }, { "epoch": 1.4, "learning_rate": "4.3053e-05", "loss": 0.7149, "slid_loss": 0.7426, "step": 1815, "time": 72.07 }, { "epoch": 1.41, "learning_rate": "4.3046e-05", "loss": 0.7171, "slid_loss": 0.7425, "step": 1816, "time": 73.07 }, { "epoch": 1.41, "learning_rate": "4.3038e-05", "loss": 0.721, "slid_loss": 0.7422, "step": 1817, "time": 71.34 }, { "epoch": 1.41, "learning_rate": "4.3031e-05", "loss": 0.7279, "slid_loss": 0.742, "step": 1818, "time": 71.21 }, { "epoch": 1.41, "learning_rate": "4.3023e-05", "loss": 0.7406, "slid_loss": 0.7421, "step": 1819, "time": 73.51 }, { "epoch": 1.41, "learning_rate": "4.3016e-05", "loss": 0.721, "slid_loss": 0.7418, "step": 1820, "time": 71.52 }, { "epoch": 1.41, "learning_rate": "4.3009e-05", "loss": 0.7032, "slid_loss": 0.7413, "step": 1821, "time": 71.28 }, { "epoch": 1.41, "learning_rate": "4.3001e-05", "loss": 0.7415, "slid_loss": 0.7413, "step": 1822, "time": 71.56 }, { "epoch": 1.41, "learning_rate": "4.2994e-05", "loss": 0.6952, "slid_loss": 0.7404, "step": 1823, "time": 71.83 }, { "epoch": 1.41, "learning_rate": "4.2986e-05", "loss": 0.7515, "slid_loss": 0.741, "step": 1824, "time": 72.58 }, { "epoch": 1.41, "learning_rate": "4.2979e-05", "loss": 0.7067, "slid_loss": 0.7408, "step": 1825, "time": 72.13 }, { "epoch": 1.41, "learning_rate": "4.2971e-05", "loss": 0.7369, "slid_loss": 0.7407, "step": 1826, "time": 70.97 }, { "epoch": 1.41, "learning_rate": "4.2964e-05", "loss": 0.7239, "slid_loss": 0.7406, "step": 1827, "time": 71.97 }, { "epoch": 1.41, "learning_rate": "4.2956e-05", "loss": 0.751, "slid_loss": 0.7409, "step": 1828, "time": 71.5 }, { "epoch": 1.42, "learning_rate": "4.2949e-05", "loss": 0.7908, "slid_loss": 0.7415, "step": 1829, "time": 73.5 }, { "epoch": 1.42, "learning_rate": "4.2941e-05", "loss": 0.7595, "slid_loss": 0.7416, "step": 1830, "time": 70.88 }, { "epoch": 1.42, "learning_rate": "4.2934e-05", "loss": 0.7372, "slid_loss": 0.7416, "step": 1831, "time": 71.7 }, { "epoch": 1.42, "learning_rate": "4.2926e-05", "loss": 0.7467, "slid_loss": 0.7419, "step": 1832, "time": 73.63 }, { "epoch": 1.42, "learning_rate": "4.2919e-05", "loss": 0.751, "slid_loss": 0.7418, "step": 1833, "time": 69.87 }, { "epoch": 1.42, "learning_rate": "4.2911e-05", "loss": 0.7414, "slid_loss": 0.7415, "step": 1834, "time": 70.21 }, { "epoch": 1.42, "learning_rate": "4.2904e-05", "loss": 0.7416, "slid_loss": 0.7414, "step": 1835, "time": 71.47 }, { "epoch": 1.42, "learning_rate": "4.2896e-05", "loss": 0.76, "slid_loss": 0.7413, "step": 1836, "time": 76.01 }, { "epoch": 1.42, "learning_rate": "4.2889e-05", "loss": 0.75, "slid_loss": 0.7414, "step": 1837, "time": 72.57 }, { "epoch": 1.42, "learning_rate": "4.2881e-05", "loss": 0.7292, "slid_loss": 0.7413, "step": 1838, "time": 72.38 }, { "epoch": 1.42, "learning_rate": "4.2874e-05", "loss": 0.7492, "slid_loss": 0.7411, "step": 1839, "time": 72.01 }, { "epoch": 1.42, "learning_rate": "4.2866e-05", "loss": 0.7492, "slid_loss": 0.7414, "step": 1840, "time": 70.92 }, { "epoch": 1.42, "learning_rate": "4.2859e-05", "loss": 0.7248, "slid_loss": 0.7415, "step": 1841, "time": 71.45 }, { "epoch": 1.43, "learning_rate": "4.2851e-05", "loss": 0.7516, "slid_loss": 0.7415, "step": 1842, "time": 71.61 }, { "epoch": 1.43, "learning_rate": "4.2844e-05", "loss": 0.7111, "slid_loss": 0.741, "step": 1843, "time": 72.21 }, { "epoch": 1.43, "learning_rate": "4.2836e-05", "loss": 0.7468, "slid_loss": 0.7409, "step": 1844, "time": 71.71 }, { "epoch": 1.43, "learning_rate": "4.2829e-05", "loss": 0.7197, "slid_loss": 0.7406, "step": 1845, "time": 71.09 }, { "epoch": 1.43, "learning_rate": "4.2821e-05", "loss": 0.7206, "slid_loss": 0.7399, "step": 1846, "time": 72.85 }, { "epoch": 1.43, "learning_rate": "4.2813e-05", "loss": 0.7113, "slid_loss": 0.7393, "step": 1847, "time": 71.22 }, { "epoch": 1.43, "learning_rate": "4.2806e-05", "loss": 0.7414, "slid_loss": 0.7392, "step": 1848, "time": 71.22 }, { "epoch": 1.43, "learning_rate": "4.2798e-05", "loss": 0.7085, "slid_loss": 0.7387, "step": 1849, "time": 73.12 }, { "epoch": 1.43, "learning_rate": "4.2791e-05", "loss": 0.7115, "slid_loss": 0.7385, "step": 1850, "time": 71.12 }, { "epoch": 1.43, "learning_rate": "4.2783e-05", "loss": 0.7311, "slid_loss": 0.7386, "step": 1851, "time": 71.02 }, { "epoch": 1.43, "learning_rate": "4.2776e-05", "loss": 0.7247, "slid_loss": 0.7381, "step": 1852, "time": 71.37 }, { "epoch": 1.43, "learning_rate": "4.2768e-05", "loss": 0.739, "slid_loss": 0.738, "step": 1853, "time": 72.83 }, { "epoch": 1.43, "learning_rate": "4.2761e-05", "loss": 0.7215, "slid_loss": 0.7378, "step": 1854, "time": 71.49 }, { "epoch": 1.44, "learning_rate": "4.2753e-05", "loss": 0.7101, "slid_loss": 0.7375, "step": 1855, "time": 71.53 }, { "epoch": 1.44, "learning_rate": "4.2745e-05", "loss": 0.7497, "slid_loss": 0.7377, "step": 1856, "time": 71.14 }, { "epoch": 1.44, "learning_rate": "4.2738e-05", "loss": 0.725, "slid_loss": 0.7373, "step": 1857, "time": 73.56 }, { "epoch": 1.44, "learning_rate": "4.2730e-05", "loss": 0.7231, "slid_loss": 0.7373, "step": 1858, "time": 72.21 }, { "epoch": 1.44, "learning_rate": "4.2723e-05", "loss": 0.7498, "slid_loss": 0.7377, "step": 1859, "time": 72.5 }, { "epoch": 1.44, "learning_rate": "4.2715e-05", "loss": 0.7545, "slid_loss": 0.7379, "step": 1860, "time": 72.16 }, { "epoch": 1.44, "learning_rate": "4.2708e-05", "loss": 0.6982, "slid_loss": 0.7378, "step": 1861, "time": 70.79 }, { "epoch": 1.44, "learning_rate": "4.2700e-05", "loss": 0.7317, "slid_loss": 0.7377, "step": 1862, "time": 71.97 }, { "epoch": 1.44, "learning_rate": "4.2692e-05", "loss": 0.7298, "slid_loss": 0.7375, "step": 1863, "time": 72.0 }, { "epoch": 1.44, "learning_rate": "4.2685e-05", "loss": 0.7413, "slid_loss": 0.7377, "step": 1864, "time": 71.27 }, { "epoch": 1.44, "learning_rate": "4.2677e-05", "loss": 0.7617, "slid_loss": 0.738, "step": 1865, "time": 71.44 }, { "epoch": 1.44, "learning_rate": "4.2670e-05", "loss": 0.7585, "slid_loss": 0.7381, "step": 1866, "time": 72.44 }, { "epoch": 1.44, "learning_rate": "4.2662e-05", "loss": 0.7261, "slid_loss": 0.738, "step": 1867, "time": 72.24 }, { "epoch": 1.45, "learning_rate": "4.2654e-05", "loss": 0.757, "slid_loss": 0.738, "step": 1868, "time": 72.38 }, { "epoch": 1.45, "learning_rate": "4.2647e-05", "loss": 0.6958, "slid_loss": 0.7372, "step": 1869, "time": 72.44 }, { "epoch": 1.45, "learning_rate": "4.2639e-05", "loss": 0.7586, "slid_loss": 0.7372, "step": 1870, "time": 71.63 }, { "epoch": 1.45, "learning_rate": "4.2632e-05", "loss": 0.7354, "slid_loss": 0.7369, "step": 1871, "time": 73.74 }, { "epoch": 1.45, "learning_rate": "4.2624e-05", "loss": 0.7715, "slid_loss": 0.7372, "step": 1872, "time": 72.54 }, { "epoch": 1.45, "learning_rate": "4.2616e-05", "loss": 0.7479, "slid_loss": 0.7374, "step": 1873, "time": 71.75 }, { "epoch": 1.45, "learning_rate": "4.2609e-05", "loss": 0.7394, "slid_loss": 0.7373, "step": 1874, "time": 72.4 }, { "epoch": 1.45, "learning_rate": "4.2601e-05", "loss": 0.7278, "slid_loss": 0.7374, "step": 1875, "time": 71.53 }, { "epoch": 1.45, "learning_rate": "4.2593e-05", "loss": 0.7397, "slid_loss": 0.7374, "step": 1876, "time": 70.5 }, { "epoch": 1.45, "learning_rate": "4.2586e-05", "loss": 0.736, "slid_loss": 0.7374, "step": 1877, "time": 73.45 }, { "epoch": 1.45, "learning_rate": "4.2578e-05", "loss": 0.6966, "slid_loss": 0.7371, "step": 1878, "time": 72.4 }, { "epoch": 1.45, "learning_rate": "4.2570e-05", "loss": 0.7337, "slid_loss": 0.7369, "step": 1879, "time": 71.6 }, { "epoch": 1.45, "learning_rate": "4.2563e-05", "loss": 0.7433, "slid_loss": 0.7365, "step": 1880, "time": 71.22 }, { "epoch": 1.46, "learning_rate": "4.2555e-05", "loss": 0.7113, "slid_loss": 0.7361, "step": 1881, "time": 72.93 }, { "epoch": 1.46, "learning_rate": "4.2548e-05", "loss": 0.7446, "slid_loss": 0.7359, "step": 1882, "time": 70.18 }, { "epoch": 1.46, "learning_rate": "4.2540e-05", "loss": 0.7346, "slid_loss": 0.7358, "step": 1883, "time": 70.74 }, { "epoch": 1.46, "learning_rate": "4.2532e-05", "loss": 0.708, "slid_loss": 0.7358, "step": 1884, "time": 72.14 }, { "epoch": 1.46, "learning_rate": "4.2525e-05", "loss": 0.7335, "slid_loss": 0.7355, "step": 1885, "time": 70.36 }, { "epoch": 1.46, "learning_rate": "4.2517e-05", "loss": 0.7453, "slid_loss": 0.7356, "step": 1886, "time": 72.53 }, { "epoch": 1.46, "learning_rate": "4.2509e-05", "loss": 0.7459, "slid_loss": 0.7354, "step": 1887, "time": 72.19 }, { "epoch": 1.46, "learning_rate": "4.2502e-05", "loss": 0.7622, "slid_loss": 0.7356, "step": 1888, "time": 71.28 }, { "epoch": 1.46, "learning_rate": "4.2494e-05", "loss": 0.7194, "slid_loss": 0.7357, "step": 1889, "time": 71.95 }, { "epoch": 1.46, "learning_rate": "4.2486e-05", "loss": 0.6867, "slid_loss": 0.7353, "step": 1890, "time": 71.07 }, { "epoch": 1.46, "learning_rate": "4.2479e-05", "loss": 0.7273, "slid_loss": 0.7351, "step": 1891, "time": 72.05 }, { "epoch": 1.46, "learning_rate": "4.2471e-05", "loss": 0.7493, "slid_loss": 0.7353, "step": 1892, "time": 72.17 }, { "epoch": 1.46, "learning_rate": "4.2463e-05", "loss": 0.7511, "slid_loss": 0.7358, "step": 1893, "time": 72.36 }, { "epoch": 1.47, "learning_rate": "4.2456e-05", "loss": 0.8039, "slid_loss": 0.7365, "step": 1894, "time": 72.23 }, { "epoch": 1.47, "learning_rate": "4.2448e-05", "loss": 0.7462, "slid_loss": 0.7361, "step": 1895, "time": 70.82 }, { "epoch": 1.47, "learning_rate": "4.2440e-05", "loss": 0.693, "slid_loss": 0.7356, "step": 1896, "time": 71.77 }, { "epoch": 1.47, "learning_rate": "4.2432e-05", "loss": 0.7442, "slid_loss": 0.7353, "step": 1897, "time": 70.81 }, { "epoch": 1.47, "learning_rate": "4.2425e-05", "loss": 0.7518, "slid_loss": 0.7352, "step": 1898, "time": 70.58 }, { "epoch": 1.47, "learning_rate": "4.2417e-05", "loss": 0.7697, "slid_loss": 0.7357, "step": 1899, "time": 71.56 }, { "epoch": 1.47, "learning_rate": "4.2409e-05", "loss": 0.7292, "slid_loss": 0.7358, "step": 1900, "time": 85.43 }, { "epoch": 1.47, "learning_rate": "4.2402e-05", "loss": 0.7, "slid_loss": 0.7352, "step": 1901, "time": 133.69 }, { "epoch": 1.47, "learning_rate": "4.2394e-05", "loss": 0.7094, "slid_loss": 0.735, "step": 1902, "time": 112.0 }, { "epoch": 1.47, "learning_rate": "4.2386e-05", "loss": 0.7478, "slid_loss": 0.7352, "step": 1903, "time": 122.55 }, { "epoch": 1.47, "learning_rate": "4.2379e-05", "loss": 0.7603, "slid_loss": 0.7354, "step": 1904, "time": 149.02 }, { "epoch": 1.47, "learning_rate": "4.2371e-05", "loss": 0.7358, "slid_loss": 0.7354, "step": 1905, "time": 163.47 }, { "epoch": 1.47, "learning_rate": "4.2363e-05", "loss": 0.7168, "slid_loss": 0.7352, "step": 1906, "time": 163.29 }, { "epoch": 1.48, "learning_rate": "4.2355e-05", "loss": 0.7483, "slid_loss": 0.7353, "step": 1907, "time": 165.24 }, { "epoch": 1.48, "learning_rate": "4.2348e-05", "loss": 0.7327, "slid_loss": 0.735, "step": 1908, "time": 148.91 }, { "epoch": 1.48, "learning_rate": "4.2340e-05", "loss": 0.733, "slid_loss": 0.7349, "step": 1909, "time": 139.68 }, { "epoch": 1.48, "learning_rate": "4.2332e-05", "loss": 0.7099, "slid_loss": 0.7349, "step": 1910, "time": 147.73 }, { "epoch": 1.48, "learning_rate": "4.2324e-05", "loss": 0.7512, "slid_loss": 0.7351, "step": 1911, "time": 127.81 }, { "epoch": 1.48, "learning_rate": "4.2317e-05", "loss": 0.7715, "slid_loss": 0.7353, "step": 1912, "time": 109.02 }, { "epoch": 1.48, "learning_rate": "4.2309e-05", "loss": 0.7408, "slid_loss": 0.7354, "step": 1913, "time": 73.45 }, { "epoch": 1.48, "learning_rate": "4.2301e-05", "loss": 0.7483, "slid_loss": 0.7353, "step": 1914, "time": 83.29 }, { "epoch": 1.48, "learning_rate": "4.2293e-05", "loss": 0.7228, "slid_loss": 0.7354, "step": 1915, "time": 95.45 }, { "epoch": 1.48, "learning_rate": "4.2286e-05", "loss": 0.7206, "slid_loss": 0.7354, "step": 1916, "time": 72.2 }, { "epoch": 1.48, "learning_rate": "4.2278e-05", "loss": 0.7239, "slid_loss": 0.7354, "step": 1917, "time": 72.25 }, { "epoch": 1.48, "learning_rate": "4.2270e-05", "loss": 0.7444, "slid_loss": 0.7356, "step": 1918, "time": 71.72 }, { "epoch": 1.48, "learning_rate": "4.2262e-05", "loss": 0.7213, "slid_loss": 0.7354, "step": 1919, "time": 70.9 }, { "epoch": 1.49, "learning_rate": "4.2255e-05", "loss": 0.6742, "slid_loss": 0.7349, "step": 1920, "time": 72.42 }, { "epoch": 1.49, "learning_rate": "4.2247e-05", "loss": 0.7121, "slid_loss": 0.735, "step": 1921, "time": 73.34 }, { "epoch": 1.49, "learning_rate": "4.2239e-05", "loss": 0.7037, "slid_loss": 0.7346, "step": 1922, "time": 72.74 }, { "epoch": 1.49, "learning_rate": "4.2231e-05", "loss": 0.7666, "slid_loss": 0.7353, "step": 1923, "time": 73.23 }, { "epoch": 1.49, "learning_rate": "4.2224e-05", "loss": 0.7233, "slid_loss": 0.7351, "step": 1924, "time": 71.44 }, { "epoch": 1.49, "learning_rate": "4.2216e-05", "loss": 0.7225, "slid_loss": 0.7352, "step": 1925, "time": 72.66 }, { "epoch": 1.49, "learning_rate": "4.2208e-05", "loss": 0.7543, "slid_loss": 0.7354, "step": 1926, "time": 73.17 }, { "epoch": 1.49, "learning_rate": "4.2200e-05", "loss": 0.7277, "slid_loss": 0.7354, "step": 1927, "time": 72.96 }, { "epoch": 1.49, "learning_rate": "4.2192e-05", "loss": 0.7437, "slid_loss": 0.7354, "step": 1928, "time": 70.81 }, { "epoch": 1.49, "learning_rate": "4.2185e-05", "loss": 0.7205, "slid_loss": 0.7347, "step": 1929, "time": 73.37 }, { "epoch": 1.49, "learning_rate": "4.2177e-05", "loss": 0.7322, "slid_loss": 0.7344, "step": 1930, "time": 71.59 }, { "epoch": 1.49, "learning_rate": "4.2169e-05", "loss": 0.7061, "slid_loss": 0.7341, "step": 1931, "time": 73.23 }, { "epoch": 1.49, "learning_rate": "4.2161e-05", "loss": 0.7152, "slid_loss": 0.7338, "step": 1932, "time": 73.0 }, { "epoch": 1.5, "learning_rate": "4.2153e-05", "loss": 0.7057, "slid_loss": 0.7333, "step": 1933, "time": 71.66 }, { "epoch": 1.5, "learning_rate": "4.2146e-05", "loss": 0.718, "slid_loss": 0.7331, "step": 1934, "time": 73.52 }, { "epoch": 1.5, "learning_rate": "4.2138e-05", "loss": 0.7282, "slid_loss": 0.7329, "step": 1935, "time": 71.57 }, { "epoch": 1.5, "learning_rate": "4.2130e-05", "loss": 0.7569, "slid_loss": 0.7329, "step": 1936, "time": 71.58 }, { "epoch": 1.5, "learning_rate": "4.2122e-05", "loss": 0.7448, "slid_loss": 0.7329, "step": 1937, "time": 71.42 }, { "epoch": 1.5, "learning_rate": "4.2114e-05", "loss": 0.7126, "slid_loss": 0.7327, "step": 1938, "time": 71.91 }, { "epoch": 1.5, "learning_rate": "4.2107e-05", "loss": 0.7784, "slid_loss": 0.733, "step": 1939, "time": 71.35 }, { "epoch": 1.5, "learning_rate": "4.2099e-05", "loss": 0.7284, "slid_loss": 0.7328, "step": 1940, "time": 72.65 }, { "epoch": 1.5, "learning_rate": "4.2091e-05", "loss": 0.7335, "slid_loss": 0.7329, "step": 1941, "time": 71.67 }, { "epoch": 1.5, "learning_rate": "4.2083e-05", "loss": 0.7687, "slid_loss": 0.733, "step": 1942, "time": 72.84 }, { "epoch": 1.5, "learning_rate": "4.2075e-05", "loss": 0.7306, "slid_loss": 0.7332, "step": 1943, "time": 69.97 }, { "epoch": 1.5, "learning_rate": "4.2067e-05", "loss": 0.711, "slid_loss": 0.7329, "step": 1944, "time": 71.67 }, { "epoch": 1.5, "learning_rate": "4.2060e-05", "loss": 0.7275, "slid_loss": 0.7329, "step": 1945, "time": 71.11 }, { "epoch": 1.51, "learning_rate": "4.2052e-05", "loss": 0.7454, "slid_loss": 0.7332, "step": 1946, "time": 72.8 }, { "epoch": 1.51, "learning_rate": "4.2044e-05", "loss": 0.7773, "slid_loss": 0.7339, "step": 1947, "time": 71.15 }, { "epoch": 1.51, "learning_rate": "4.2036e-05", "loss": 0.7354, "slid_loss": 0.7338, "step": 1948, "time": 69.52 }, { "epoch": 1.51, "learning_rate": "4.2028e-05", "loss": 0.7082, "slid_loss": 0.7338, "step": 1949, "time": 72.43 }, { "epoch": 1.51, "learning_rate": "4.2020e-05", "loss": 0.7035, "slid_loss": 0.7337, "step": 1950, "time": 71.15 }, { "epoch": 1.51, "learning_rate": "4.2013e-05", "loss": 0.713, "slid_loss": 0.7335, "step": 1951, "time": 72.21 }, { "epoch": 1.51, "learning_rate": "4.2005e-05", "loss": 0.7049, "slid_loss": 0.7333, "step": 1952, "time": 70.99 }, { "epoch": 1.51, "learning_rate": "4.1997e-05", "loss": 0.706, "slid_loss": 0.733, "step": 1953, "time": 71.25 }, { "epoch": 1.51, "learning_rate": "4.1989e-05", "loss": 0.7216, "slid_loss": 0.733, "step": 1954, "time": 71.27 }, { "epoch": 1.51, "learning_rate": "4.1981e-05", "loss": 0.7279, "slid_loss": 0.7332, "step": 1955, "time": 71.43 }, { "epoch": 1.51, "learning_rate": "4.1973e-05", "loss": 0.7423, "slid_loss": 0.7331, "step": 1956, "time": 71.18 }, { "epoch": 1.51, "learning_rate": "4.1965e-05", "loss": 0.7995, "slid_loss": 0.7339, "step": 1957, "time": 70.99 }, { "epoch": 1.51, "learning_rate": "4.1957e-05", "loss": 0.7197, "slid_loss": 0.7338, "step": 1958, "time": 70.62 }, { "epoch": 1.52, "learning_rate": "4.1950e-05", "loss": 0.7381, "slid_loss": 0.7337, "step": 1959, "time": 71.55 }, { "epoch": 1.52, "learning_rate": "4.1942e-05", "loss": 0.7397, "slid_loss": 0.7336, "step": 1960, "time": 71.05 }, { "epoch": 1.52, "learning_rate": "4.1934e-05", "loss": 0.6987, "slid_loss": 0.7336, "step": 1961, "time": 70.59 }, { "epoch": 1.52, "learning_rate": "4.1926e-05", "loss": 0.7214, "slid_loss": 0.7335, "step": 1962, "time": 72.16 }, { "epoch": 1.52, "learning_rate": "4.1918e-05", "loss": 0.7797, "slid_loss": 0.734, "step": 1963, "time": 70.92 }, { "epoch": 1.52, "learning_rate": "4.1910e-05", "loss": 0.7293, "slid_loss": 0.7338, "step": 1964, "time": 71.92 }, { "epoch": 1.52, "learning_rate": "4.1902e-05", "loss": 0.7163, "slid_loss": 0.7334, "step": 1965, "time": 71.77 }, { "epoch": 1.52, "learning_rate": "4.1894e-05", "loss": 0.7656, "slid_loss": 0.7335, "step": 1966, "time": 72.0 }, { "epoch": 1.52, "learning_rate": "4.1886e-05", "loss": 0.7315, "slid_loss": 0.7335, "step": 1967, "time": 72.46 }, { "epoch": 1.52, "learning_rate": "4.1879e-05", "loss": 0.7509, "slid_loss": 0.7334, "step": 1968, "time": 72.03 }, { "epoch": 1.52, "learning_rate": "4.1871e-05", "loss": 0.7172, "slid_loss": 0.7337, "step": 1969, "time": 70.55 }, { "epoch": 1.52, "learning_rate": "4.1863e-05", "loss": 0.7161, "slid_loss": 0.7332, "step": 1970, "time": 72.48 }, { "epoch": 1.52, "learning_rate": "4.1855e-05", "loss": 0.7473, "slid_loss": 0.7334, "step": 1971, "time": 71.83 }, { "epoch": 1.53, "learning_rate": "4.1847e-05", "loss": 0.7314, "slid_loss": 0.733, "step": 1972, "time": 70.87 }, { "epoch": 1.53, "learning_rate": "4.1839e-05", "loss": 0.7661, "slid_loss": 0.7331, "step": 1973, "time": 71.66 }, { "epoch": 1.53, "learning_rate": "4.1831e-05", "loss": 0.7021, "slid_loss": 0.7328, "step": 1974, "time": 71.77 }, { "epoch": 1.53, "learning_rate": "4.1823e-05", "loss": 0.7185, "slid_loss": 0.7327, "step": 1975, "time": 73.75 }, { "epoch": 1.53, "learning_rate": "4.1815e-05", "loss": 0.7166, "slid_loss": 0.7324, "step": 1976, "time": 72.41 }, { "epoch": 1.53, "learning_rate": "4.1807e-05", "loss": 0.7467, "slid_loss": 0.7325, "step": 1977, "time": 70.91 }, { "epoch": 1.53, "learning_rate": "4.1799e-05", "loss": 0.7148, "slid_loss": 0.7327, "step": 1978, "time": 70.41 }, { "epoch": 1.53, "learning_rate": "4.1791e-05", "loss": 0.7278, "slid_loss": 0.7327, "step": 1979, "time": 71.79 }, { "epoch": 1.53, "learning_rate": "4.1784e-05", "loss": 0.7342, "slid_loss": 0.7326, "step": 1980, "time": 70.49 }, { "epoch": 1.53, "learning_rate": "4.1776e-05", "loss": 0.6947, "slid_loss": 0.7324, "step": 1981, "time": 72.23 }, { "epoch": 1.53, "learning_rate": "4.1768e-05", "loss": 0.7328, "slid_loss": 0.7323, "step": 1982, "time": 71.77 }, { "epoch": 1.53, "learning_rate": "4.1760e-05", "loss": 0.7343, "slid_loss": 0.7323, "step": 1983, "time": 71.25 }, { "epoch": 1.54, "learning_rate": "4.1752e-05", "loss": 0.757, "slid_loss": 0.7328, "step": 1984, "time": 71.47 }, { "epoch": 1.54, "learning_rate": "4.1744e-05", "loss": 0.7326, "slid_loss": 0.7328, "step": 1985, "time": 72.15 }, { "epoch": 1.54, "learning_rate": "4.1736e-05", "loss": 0.7064, "slid_loss": 0.7324, "step": 1986, "time": 71.13 }, { "epoch": 1.54, "learning_rate": "4.1728e-05", "loss": 0.6965, "slid_loss": 0.7319, "step": 1987, "time": 71.79 }, { "epoch": 1.54, "learning_rate": "4.1720e-05", "loss": 0.7347, "slid_loss": 0.7316, "step": 1988, "time": 72.59 }, { "epoch": 1.54, "learning_rate": "4.1712e-05", "loss": 0.7462, "slid_loss": 0.7319, "step": 1989, "time": 72.68 }, { "epoch": 1.54, "learning_rate": "4.1704e-05", "loss": 0.7265, "slid_loss": 0.7323, "step": 1990, "time": 72.12 }, { "epoch": 1.54, "learning_rate": "4.1696e-05", "loss": 0.7588, "slid_loss": 0.7326, "step": 1991, "time": 71.5 }, { "epoch": 1.54, "learning_rate": "4.1688e-05", "loss": 0.6896, "slid_loss": 0.732, "step": 1992, "time": 72.54 }, { "epoch": 1.54, "learning_rate": "4.1680e-05", "loss": 0.7198, "slid_loss": 0.7317, "step": 1993, "time": 72.18 }, { "epoch": 1.54, "learning_rate": "4.1672e-05", "loss": 0.7024, "slid_loss": 0.7307, "step": 1994, "time": 71.13 }, { "epoch": 1.54, "learning_rate": "4.1664e-05", "loss": 0.7303, "slid_loss": 0.7305, "step": 1995, "time": 71.37 }, { "epoch": 1.54, "learning_rate": "4.1656e-05", "loss": 0.7644, "slid_loss": 0.7312, "step": 1996, "time": 72.48 }, { "epoch": 1.55, "learning_rate": "4.1648e-05", "loss": 0.7178, "slid_loss": 0.731, "step": 1997, "time": 72.77 }, { "epoch": 1.55, "learning_rate": "4.1640e-05", "loss": 0.7199, "slid_loss": 0.7306, "step": 1998, "time": 70.54 }, { "epoch": 1.55, "learning_rate": "4.1632e-05", "loss": 0.7333, "slid_loss": 0.7303, "step": 1999, "time": 72.81 }, { "epoch": 1.55, "learning_rate": "4.1624e-05", "loss": 0.7406, "slid_loss": 0.7304, "step": 2000, "time": 71.68 }, { "epoch": 1.55, "learning_rate": "4.1616e-05", "loss": 0.733, "slid_loss": 0.7307, "step": 2001, "time": 834.74 }, { "epoch": 1.55, "learning_rate": "4.1608e-05", "loss": 0.7396, "slid_loss": 0.731, "step": 2002, "time": 71.71 }, { "epoch": 1.55, "learning_rate": "4.1600e-05", "loss": 0.7247, "slid_loss": 0.7308, "step": 2003, "time": 71.21 }, { "epoch": 1.55, "learning_rate": "4.1592e-05", "loss": 0.7063, "slid_loss": 0.7302, "step": 2004, "time": 70.87 }, { "epoch": 1.55, "learning_rate": "4.1584e-05", "loss": 0.7174, "slid_loss": 0.7301, "step": 2005, "time": 72.06 }, { "epoch": 1.55, "learning_rate": "4.1576e-05", "loss": 0.7433, "slid_loss": 0.7303, "step": 2006, "time": 71.08 }, { "epoch": 1.55, "learning_rate": "4.1568e-05", "loss": 0.7038, "slid_loss": 0.7299, "step": 2007, "time": 72.26 }, { "epoch": 1.55, "learning_rate": "4.1560e-05", "loss": 0.7073, "slid_loss": 0.7296, "step": 2008, "time": 71.49 }, { "epoch": 1.55, "learning_rate": "4.1552e-05", "loss": 0.7408, "slid_loss": 0.7297, "step": 2009, "time": 71.9 }, { "epoch": 1.56, "learning_rate": "4.1544e-05", "loss": 0.7545, "slid_loss": 0.7302, "step": 2010, "time": 70.76 }, { "epoch": 1.56, "learning_rate": "4.1536e-05", "loss": 0.747, "slid_loss": 0.7301, "step": 2011, "time": 71.42 }, { "epoch": 1.56, "learning_rate": "4.1528e-05", "loss": 0.7487, "slid_loss": 0.7299, "step": 2012, "time": 71.21 }, { "epoch": 1.56, "learning_rate": "4.1520e-05", "loss": 0.75, "slid_loss": 0.73, "step": 2013, "time": 70.71 }, { "epoch": 1.56, "learning_rate": "4.1512e-05", "loss": 0.7185, "slid_loss": 0.7297, "step": 2014, "time": 71.53 }, { "epoch": 1.56, "learning_rate": "4.1504e-05", "loss": 0.7327, "slid_loss": 0.7298, "step": 2015, "time": 73.27 }, { "epoch": 1.56, "learning_rate": "4.1496e-05", "loss": 0.7274, "slid_loss": 0.7298, "step": 2016, "time": 72.98 }, { "epoch": 1.56, "learning_rate": "4.1488e-05", "loss": 0.7037, "slid_loss": 0.7296, "step": 2017, "time": 71.54 }, { "epoch": 1.56, "learning_rate": "4.1480e-05", "loss": 0.7226, "slid_loss": 0.7294, "step": 2018, "time": 70.72 }, { "epoch": 1.56, "learning_rate": "4.1472e-05", "loss": 0.7142, "slid_loss": 0.7294, "step": 2019, "time": 71.39 }, { "epoch": 1.56, "learning_rate": "4.1464e-05", "loss": 0.7451, "slid_loss": 0.7301, "step": 2020, "time": 70.52 }, { "epoch": 1.56, "learning_rate": "4.1456e-05", "loss": 0.7324, "slid_loss": 0.7303, "step": 2021, "time": 72.71 }, { "epoch": 1.56, "learning_rate": "4.1448e-05", "loss": 0.7209, "slid_loss": 0.7304, "step": 2022, "time": 71.54 }, { "epoch": 1.57, "learning_rate": "4.1440e-05", "loss": 0.6995, "slid_loss": 0.7298, "step": 2023, "time": 71.9 }, { "epoch": 1.57, "learning_rate": "4.1431e-05", "loss": 0.7193, "slid_loss": 0.7297, "step": 2024, "time": 72.15 }, { "epoch": 1.57, "learning_rate": "4.1423e-05", "loss": 0.721, "slid_loss": 0.7297, "step": 2025, "time": 72.15 }, { "epoch": 1.57, "learning_rate": "4.1415e-05", "loss": 0.7336, "slid_loss": 0.7295, "step": 2026, "time": 70.56 }, { "epoch": 1.57, "learning_rate": "4.1407e-05", "loss": 0.7487, "slid_loss": 0.7297, "step": 2027, "time": 70.82 }, { "epoch": 1.57, "learning_rate": "4.1399e-05", "loss": 0.6919, "slid_loss": 0.7292, "step": 2028, "time": 72.61 }, { "epoch": 1.57, "learning_rate": "4.1391e-05", "loss": 0.7244, "slid_loss": 0.7292, "step": 2029, "time": 70.24 }, { "epoch": 1.57, "learning_rate": "4.1383e-05", "loss": 0.7279, "slid_loss": 0.7292, "step": 2030, "time": 70.47 }, { "epoch": 1.57, "learning_rate": "4.1375e-05", "loss": 0.7368, "slid_loss": 0.7295, "step": 2031, "time": 71.33 }, { "epoch": 1.57, "learning_rate": "4.1367e-05", "loss": 0.7109, "slid_loss": 0.7295, "step": 2032, "time": 71.12 }, { "epoch": 1.57, "learning_rate": "4.1359e-05", "loss": 0.749, "slid_loss": 0.7299, "step": 2033, "time": 72.19 }, { "epoch": 1.57, "learning_rate": "4.1351e-05", "loss": 0.7414, "slid_loss": 0.7301, "step": 2034, "time": 70.08 }, { "epoch": 1.57, "learning_rate": "4.1343e-05", "loss": 0.7018, "slid_loss": 0.7299, "step": 2035, "time": 72.86 }, { "epoch": 1.58, "learning_rate": "4.1335e-05", "loss": 0.7237, "slid_loss": 0.7295, "step": 2036, "time": 70.43 }, { "epoch": 1.58, "learning_rate": "4.1326e-05", "loss": 0.7408, "slid_loss": 0.7295, "step": 2037, "time": 71.55 }, { "epoch": 1.58, "learning_rate": "4.1318e-05", "loss": 0.6776, "slid_loss": 0.7291, "step": 2038, "time": 71.72 }, { "epoch": 1.58, "learning_rate": "4.1310e-05", "loss": 0.6841, "slid_loss": 0.7282, "step": 2039, "time": 72.0 }, { "epoch": 1.58, "learning_rate": "4.1302e-05", "loss": 0.7017, "slid_loss": 0.7279, "step": 2040, "time": 72.59 }, { "epoch": 1.58, "learning_rate": "4.1294e-05", "loss": 0.7103, "slid_loss": 0.7277, "step": 2041, "time": 74.87 }, { "epoch": 1.58, "learning_rate": "4.1286e-05", "loss": 0.7454, "slid_loss": 0.7275, "step": 2042, "time": 71.99 }, { "epoch": 1.58, "learning_rate": "4.1278e-05", "loss": 0.7302, "slid_loss": 0.7275, "step": 2043, "time": 71.91 }, { "epoch": 1.58, "learning_rate": "4.1270e-05", "loss": 0.6992, "slid_loss": 0.7273, "step": 2044, "time": 71.66 }, { "epoch": 1.58, "learning_rate": "4.1262e-05", "loss": 0.7569, "slid_loss": 0.7276, "step": 2045, "time": 71.41 }, { "epoch": 1.58, "learning_rate": "4.1253e-05", "loss": 0.7305, "slid_loss": 0.7275, "step": 2046, "time": 70.56 }, { "epoch": 1.58, "learning_rate": "4.1245e-05", "loss": 0.7587, "slid_loss": 0.7273, "step": 2047, "time": 71.15 }, { "epoch": 1.58, "learning_rate": "4.1237e-05", "loss": 0.7239, "slid_loss": 0.7272, "step": 2048, "time": 71.21 }, { "epoch": 1.59, "learning_rate": "4.1229e-05", "loss": 0.7043, "slid_loss": 0.7271, "step": 2049, "time": 72.39 }, { "epoch": 1.59, "learning_rate": "4.1221e-05", "loss": 0.7571, "slid_loss": 0.7277, "step": 2050, "time": 70.44 }, { "epoch": 1.59, "learning_rate": "4.1213e-05", "loss": 0.711, "slid_loss": 0.7277, "step": 2051, "time": 71.59 }, { "epoch": 1.59, "learning_rate": "4.1205e-05", "loss": 0.7086, "slid_loss": 0.7277, "step": 2052, "time": 71.54 }, { "epoch": 1.59, "learning_rate": "4.1197e-05", "loss": 0.7707, "slid_loss": 0.7283, "step": 2053, "time": 72.11 }, { "epoch": 1.59, "learning_rate": "4.1188e-05", "loss": 0.7141, "slid_loss": 0.7283, "step": 2054, "time": 94.29 }, { "epoch": 1.59, "learning_rate": "4.1180e-05", "loss": 0.7146, "slid_loss": 0.7281, "step": 2055, "time": 73.5 }, { "epoch": 1.59, "learning_rate": "4.1172e-05", "loss": 0.7043, "slid_loss": 0.7278, "step": 2056, "time": 71.4 }, { "epoch": 1.59, "learning_rate": "4.1164e-05", "loss": 0.7195, "slid_loss": 0.727, "step": 2057, "time": 72.22 }, { "epoch": 1.59, "learning_rate": "4.1156e-05", "loss": 0.6946, "slid_loss": 0.7267, "step": 2058, "time": 84.33 }, { "epoch": 1.59, "learning_rate": "4.1148e-05", "loss": 0.7355, "slid_loss": 0.7267, "step": 2059, "time": 102.23 }, { "epoch": 1.59, "learning_rate": "4.1139e-05", "loss": 0.7613, "slid_loss": 0.7269, "step": 2060, "time": 83.55 }, { "epoch": 1.59, "learning_rate": "4.1131e-05", "loss": 0.739, "slid_loss": 0.7273, "step": 2061, "time": 131.85 }, { "epoch": 1.6, "learning_rate": "4.1123e-05", "loss": 0.7233, "slid_loss": 0.7273, "step": 2062, "time": 124.5 }, { "epoch": 1.6, "learning_rate": "4.1115e-05", "loss": 0.7121, "slid_loss": 0.7266, "step": 2063, "time": 137.12 }, { "epoch": 1.6, "learning_rate": "4.1107e-05", "loss": 0.7045, "slid_loss": 0.7264, "step": 2064, "time": 141.73 }, { "epoch": 1.6, "learning_rate": "4.1099e-05", "loss": 0.7527, "slid_loss": 0.7268, "step": 2065, "time": 162.84 }, { "epoch": 1.6, "learning_rate": "4.1090e-05", "loss": 0.7106, "slid_loss": 0.7262, "step": 2066, "time": 180.39 }, { "epoch": 1.6, "learning_rate": "4.1082e-05", "loss": 0.7051, "slid_loss": 0.7259, "step": 2067, "time": 161.13 }, { "epoch": 1.6, "learning_rate": "4.1074e-05", "loss": 0.7143, "slid_loss": 0.7256, "step": 2068, "time": 121.89 }, { "epoch": 1.6, "learning_rate": "4.1066e-05", "loss": 0.7092, "slid_loss": 0.7255, "step": 2069, "time": 133.49 }, { "epoch": 1.6, "learning_rate": "4.1058e-05", "loss": 0.7335, "slid_loss": 0.7257, "step": 2070, "time": 161.61 }, { "epoch": 1.6, "learning_rate": "4.1050e-05", "loss": 0.7343, "slid_loss": 0.7255, "step": 2071, "time": 123.61 }, { "epoch": 1.6, "learning_rate": "4.1041e-05", "loss": 0.7217, "slid_loss": 0.7254, "step": 2072, "time": 70.91 }, { "epoch": 1.6, "learning_rate": "4.1033e-05", "loss": 0.7313, "slid_loss": 0.7251, "step": 2073, "time": 108.07 }, { "epoch": 1.6, "learning_rate": "4.1025e-05", "loss": 0.7272, "slid_loss": 0.7253, "step": 2074, "time": 73.35 }, { "epoch": 1.61, "learning_rate": "4.1017e-05", "loss": 0.7158, "slid_loss": 0.7253, "step": 2075, "time": 70.58 }, { "epoch": 1.61, "learning_rate": "4.1009e-05", "loss": 0.7198, "slid_loss": 0.7254, "step": 2076, "time": 72.04 }, { "epoch": 1.61, "learning_rate": "4.1000e-05", "loss": 0.723, "slid_loss": 0.7251, "step": 2077, "time": 71.61 }, { "epoch": 1.61, "learning_rate": "4.0992e-05", "loss": 0.7101, "slid_loss": 0.7251, "step": 2078, "time": 72.48 }, { "epoch": 1.61, "learning_rate": "4.0984e-05", "loss": 0.7031, "slid_loss": 0.7248, "step": 2079, "time": 71.55 }, { "epoch": 1.61, "learning_rate": "4.0976e-05", "loss": 0.7499, "slid_loss": 0.725, "step": 2080, "time": 72.19 }, { "epoch": 1.61, "learning_rate": "4.0968e-05", "loss": 0.7263, "slid_loss": 0.7253, "step": 2081, "time": 71.93 }, { "epoch": 1.61, "learning_rate": "4.0959e-05", "loss": 0.716, "slid_loss": 0.7251, "step": 2082, "time": 72.13 }, { "epoch": 1.61, "learning_rate": "4.0951e-05", "loss": 0.692, "slid_loss": 0.7247, "step": 2083, "time": 72.68 }, { "epoch": 1.61, "learning_rate": "4.0943e-05", "loss": 0.7103, "slid_loss": 0.7242, "step": 2084, "time": 71.19 }, { "epoch": 1.61, "learning_rate": "4.0935e-05", "loss": 0.7153, "slid_loss": 0.7241, "step": 2085, "time": 72.1 }, { "epoch": 1.61, "learning_rate": "4.0926e-05", "loss": 0.7195, "slid_loss": 0.7242, "step": 2086, "time": 72.62 }, { "epoch": 1.61, "learning_rate": "4.0918e-05", "loss": 0.7256, "slid_loss": 0.7245, "step": 2087, "time": 72.09 }, { "epoch": 1.62, "learning_rate": "4.0910e-05", "loss": 0.7173, "slid_loss": 0.7243, "step": 2088, "time": 72.97 }, { "epoch": 1.62, "learning_rate": "4.0902e-05", "loss": 0.6763, "slid_loss": 0.7236, "step": 2089, "time": 71.37 }, { "epoch": 1.62, "learning_rate": "4.0893e-05", "loss": 0.7123, "slid_loss": 0.7235, "step": 2090, "time": 72.12 }, { "epoch": 1.62, "learning_rate": "4.0885e-05", "loss": 0.7199, "slid_loss": 0.7231, "step": 2091, "time": 72.67 }, { "epoch": 1.62, "learning_rate": "4.0877e-05", "loss": 0.7027, "slid_loss": 0.7232, "step": 2092, "time": 72.05 }, { "epoch": 1.62, "learning_rate": "4.0869e-05", "loss": 0.7142, "slid_loss": 0.7232, "step": 2093, "time": 71.36 }, { "epoch": 1.62, "learning_rate": "4.0860e-05", "loss": 0.6883, "slid_loss": 0.723, "step": 2094, "time": 71.87 }, { "epoch": 1.62, "learning_rate": "4.0852e-05", "loss": 0.7351, "slid_loss": 0.7231, "step": 2095, "time": 71.65 }, { "epoch": 1.62, "learning_rate": "4.0844e-05", "loss": 0.7071, "slid_loss": 0.7225, "step": 2096, "time": 73.37 }, { "epoch": 1.62, "learning_rate": "4.0836e-05", "loss": 0.7111, "slid_loss": 0.7224, "step": 2097, "time": 71.93 }, { "epoch": 1.62, "learning_rate": "4.0827e-05", "loss": 0.7241, "slid_loss": 0.7225, "step": 2098, "time": 70.89 }, { "epoch": 1.62, "learning_rate": "4.0819e-05", "loss": 0.7754, "slid_loss": 0.7229, "step": 2099, "time": 71.82 }, { "epoch": 1.62, "learning_rate": "4.0811e-05", "loss": 0.7616, "slid_loss": 0.7231, "step": 2100, "time": 70.12 }, { "epoch": 1.63, "learning_rate": "4.0803e-05", "loss": 0.7845, "slid_loss": 0.7236, "step": 2101, "time": 71.82 }, { "epoch": 1.63, "learning_rate": "4.0794e-05", "loss": 0.7421, "slid_loss": 0.7236, "step": 2102, "time": 72.04 }, { "epoch": 1.63, "learning_rate": "4.0786e-05", "loss": 0.6966, "slid_loss": 0.7234, "step": 2103, "time": 72.15 }, { "epoch": 1.63, "learning_rate": "4.0778e-05", "loss": 0.7159, "slid_loss": 0.7235, "step": 2104, "time": 70.38 }, { "epoch": 1.63, "learning_rate": "4.0770e-05", "loss": 0.7364, "slid_loss": 0.7236, "step": 2105, "time": 71.39 }, { "epoch": 1.63, "learning_rate": "4.0761e-05", "loss": 0.7494, "slid_loss": 0.7237, "step": 2106, "time": 72.17 }, { "epoch": 1.63, "learning_rate": "4.0753e-05", "loss": 0.7489, "slid_loss": 0.7242, "step": 2107, "time": 72.0 }, { "epoch": 1.63, "learning_rate": "4.0745e-05", "loss": 0.6918, "slid_loss": 0.724, "step": 2108, "time": 73.28 }, { "epoch": 1.63, "learning_rate": "4.0736e-05", "loss": 0.714, "slid_loss": 0.7237, "step": 2109, "time": 70.93 }, { "epoch": 1.63, "learning_rate": "4.0728e-05", "loss": 0.6872, "slid_loss": 0.7231, "step": 2110, "time": 71.21 }, { "epoch": 1.63, "learning_rate": "4.0720e-05", "loss": 0.6895, "slid_loss": 0.7225, "step": 2111, "time": 71.72 }, { "epoch": 1.63, "learning_rate": "4.0712e-05", "loss": 0.7402, "slid_loss": 0.7224, "step": 2112, "time": 72.46 }, { "epoch": 1.63, "learning_rate": "4.0703e-05", "loss": 0.7615, "slid_loss": 0.7225, "step": 2113, "time": 71.61 }, { "epoch": 1.64, "learning_rate": "4.0695e-05", "loss": 0.7306, "slid_loss": 0.7226, "step": 2114, "time": 71.73 }, { "epoch": 1.64, "learning_rate": "4.0687e-05", "loss": 0.7295, "slid_loss": 0.7226, "step": 2115, "time": 73.69 }, { "epoch": 1.64, "learning_rate": "4.0678e-05", "loss": 0.7391, "slid_loss": 0.7227, "step": 2116, "time": 71.62 }, { "epoch": 1.64, "learning_rate": "4.0670e-05", "loss": 0.7161, "slid_loss": 0.7228, "step": 2117, "time": 72.53 }, { "epoch": 1.64, "learning_rate": "4.0662e-05", "loss": 0.6923, "slid_loss": 0.7225, "step": 2118, "time": 71.32 }, { "epoch": 1.64, "learning_rate": "4.0653e-05", "loss": 0.7392, "slid_loss": 0.7228, "step": 2119, "time": 73.71 }, { "epoch": 1.64, "learning_rate": "4.0645e-05", "loss": 0.6982, "slid_loss": 0.7223, "step": 2120, "time": 71.49 }, { "epoch": 1.64, "learning_rate": "4.0637e-05", "loss": 0.6958, "slid_loss": 0.722, "step": 2121, "time": 72.14 }, { "epoch": 1.64, "learning_rate": "4.0628e-05", "loss": 0.7302, "slid_loss": 0.722, "step": 2122, "time": 70.96 }, { "epoch": 1.64, "learning_rate": "4.0620e-05", "loss": 0.7008, "slid_loss": 0.7221, "step": 2123, "time": 71.46 }, { "epoch": 1.64, "learning_rate": "4.0612e-05", "loss": 0.7079, "slid_loss": 0.7219, "step": 2124, "time": 72.5 }, { "epoch": 1.64, "learning_rate": "4.0603e-05", "loss": 0.7273, "slid_loss": 0.722, "step": 2125, "time": 71.38 }, { "epoch": 1.64, "learning_rate": "4.0595e-05", "loss": 0.7305, "slid_loss": 0.722, "step": 2126, "time": 72.18 }, { "epoch": 1.65, "learning_rate": "4.0587e-05", "loss": 0.6971, "slid_loss": 0.7215, "step": 2127, "time": 72.78 }, { "epoch": 1.65, "learning_rate": "4.0578e-05", "loss": 0.7131, "slid_loss": 0.7217, "step": 2128, "time": 70.89 }, { "epoch": 1.65, "learning_rate": "4.0570e-05", "loss": 0.7362, "slid_loss": 0.7218, "step": 2129, "time": 73.32 }, { "epoch": 1.65, "learning_rate": "4.0562e-05", "loss": 0.7268, "slid_loss": 0.7218, "step": 2130, "time": 71.69 }, { "epoch": 1.65, "learning_rate": "4.0553e-05", "loss": 0.7015, "slid_loss": 0.7214, "step": 2131, "time": 73.06 }, { "epoch": 1.65, "learning_rate": "4.0545e-05", "loss": 0.7376, "slid_loss": 0.7217, "step": 2132, "time": 72.35 }, { "epoch": 1.65, "learning_rate": "4.0537e-05", "loss": 0.7647, "slid_loss": 0.7219, "step": 2133, "time": 71.32 }, { "epoch": 1.65, "learning_rate": "4.0528e-05", "loss": 0.7279, "slid_loss": 0.7217, "step": 2134, "time": 71.33 }, { "epoch": 1.65, "learning_rate": "4.0520e-05", "loss": 0.7334, "slid_loss": 0.722, "step": 2135, "time": 72.17 }, { "epoch": 1.65, "learning_rate": "4.0512e-05", "loss": 0.7194, "slid_loss": 0.722, "step": 2136, "time": 71.52 }, { "epoch": 1.65, "learning_rate": "4.0503e-05", "loss": 0.7414, "slid_loss": 0.722, "step": 2137, "time": 70.48 }, { "epoch": 1.65, "learning_rate": "4.0495e-05", "loss": 0.7063, "slid_loss": 0.7223, "step": 2138, "time": 70.66 }, { "epoch": 1.65, "learning_rate": "4.0487e-05", "loss": 0.7119, "slid_loss": 0.7226, "step": 2139, "time": 72.76 }, { "epoch": 1.66, "learning_rate": "4.0478e-05", "loss": 0.7553, "slid_loss": 0.7231, "step": 2140, "time": 72.75 }, { "epoch": 1.66, "learning_rate": "4.0470e-05", "loss": 0.7463, "slid_loss": 0.7235, "step": 2141, "time": 70.9 }, { "epoch": 1.66, "learning_rate": "4.0461e-05", "loss": 0.7207, "slid_loss": 0.7232, "step": 2142, "time": 71.61 }, { "epoch": 1.66, "learning_rate": "4.0453e-05", "loss": 0.6961, "slid_loss": 0.7229, "step": 2143, "time": 71.46 }, { "epoch": 1.66, "learning_rate": "4.0445e-05", "loss": 0.6915, "slid_loss": 0.7228, "step": 2144, "time": 72.87 }, { "epoch": 1.66, "learning_rate": "4.0436e-05", "loss": 0.7206, "slid_loss": 0.7224, "step": 2145, "time": 71.14 }, { "epoch": 1.66, "learning_rate": "4.0428e-05", "loss": 0.7159, "slid_loss": 0.7223, "step": 2146, "time": 71.87 }, { "epoch": 1.66, "learning_rate": "4.0420e-05", "loss": 0.7075, "slid_loss": 0.7218, "step": 2147, "time": 71.83 }, { "epoch": 1.66, "learning_rate": "4.0411e-05", "loss": 0.7386, "slid_loss": 0.7219, "step": 2148, "time": 72.5 }, { "epoch": 1.66, "learning_rate": "4.0403e-05", "loss": 0.7141, "slid_loss": 0.722, "step": 2149, "time": 70.74 }, { "epoch": 1.66, "learning_rate": "4.0394e-05", "loss": 0.682, "slid_loss": 0.7213, "step": 2150, "time": 71.51 }, { "epoch": 1.66, "learning_rate": "4.0386e-05", "loss": 0.7435, "slid_loss": 0.7216, "step": 2151, "time": 71.82 }, { "epoch": 1.66, "learning_rate": "4.0378e-05", "loss": 0.7437, "slid_loss": 0.7219, "step": 2152, "time": 74.09 }, { "epoch": 1.67, "learning_rate": "4.0369e-05", "loss": 0.7236, "slid_loss": 0.7215, "step": 2153, "time": 71.36 }, { "epoch": 1.67, "learning_rate": "4.0361e-05", "loss": 0.749, "slid_loss": 0.7218, "step": 2154, "time": 71.09 }, { "epoch": 1.67, "learning_rate": "4.0352e-05", "loss": 0.7057, "slid_loss": 0.7217, "step": 2155, "time": 71.32 }, { "epoch": 1.67, "learning_rate": "4.0344e-05", "loss": 0.7227, "slid_loss": 0.7219, "step": 2156, "time": 71.91 }, { "epoch": 1.67, "learning_rate": "4.0336e-05", "loss": 0.7351, "slid_loss": 0.7221, "step": 2157, "time": 71.61 }, { "epoch": 1.67, "learning_rate": "4.0327e-05", "loss": 0.7759, "slid_loss": 0.7229, "step": 2158, "time": 71.16 }, { "epoch": 1.67, "learning_rate": "4.0319e-05", "loss": 0.7568, "slid_loss": 0.7231, "step": 2159, "time": 71.58 }, { "epoch": 1.67, "learning_rate": "4.0310e-05", "loss": 0.7168, "slid_loss": 0.7227, "step": 2160, "time": 71.77 }, { "epoch": 1.67, "learning_rate": "4.0302e-05", "loss": 0.7199, "slid_loss": 0.7225, "step": 2161, "time": 71.7 }, { "epoch": 1.67, "learning_rate": "4.0294e-05", "loss": 0.6971, "slid_loss": 0.7222, "step": 2162, "time": 72.04 }, { "epoch": 1.67, "learning_rate": "4.0285e-05", "loss": 0.7011, "slid_loss": 0.7221, "step": 2163, "time": 72.58 }, { "epoch": 1.67, "learning_rate": "4.0277e-05", "loss": 0.7218, "slid_loss": 0.7223, "step": 2164, "time": 72.14 }, { "epoch": 1.68, "learning_rate": "4.0268e-05", "loss": 0.7117, "slid_loss": 0.7218, "step": 2165, "time": 71.74 }, { "epoch": 1.68, "learning_rate": "4.0260e-05", "loss": 0.6849, "slid_loss": 0.7216, "step": 2166, "time": 71.55 }, { "epoch": 1.68, "learning_rate": "4.0251e-05", "loss": 0.7051, "slid_loss": 0.7216, "step": 2167, "time": 72.32 }, { "epoch": 1.68, "learning_rate": "4.0243e-05", "loss": 0.7019, "slid_loss": 0.7215, "step": 2168, "time": 71.28 }, { "epoch": 1.68, "learning_rate": "4.0234e-05", "loss": 0.7317, "slid_loss": 0.7217, "step": 2169, "time": 72.2 }, { "epoch": 1.68, "learning_rate": "4.0226e-05", "loss": 0.683, "slid_loss": 0.7212, "step": 2170, "time": 71.76 }, { "epoch": 1.68, "learning_rate": "4.0218e-05", "loss": 0.7331, "slid_loss": 0.7212, "step": 2171, "time": 72.16 }, { "epoch": 1.68, "learning_rate": "4.0209e-05", "loss": 0.7516, "slid_loss": 0.7215, "step": 2172, "time": 70.63 }, { "epoch": 1.68, "learning_rate": "4.0201e-05", "loss": 0.7069, "slid_loss": 0.7212, "step": 2173, "time": 71.98 }, { "epoch": 1.68, "learning_rate": "4.0192e-05", "loss": 0.7641, "slid_loss": 0.7216, "step": 2174, "time": 71.89 }, { "epoch": 1.68, "learning_rate": "4.0184e-05", "loss": 0.7362, "slid_loss": 0.7218, "step": 2175, "time": 72.18 }, { "epoch": 1.68, "learning_rate": "4.0175e-05", "loss": 0.7322, "slid_loss": 0.7219, "step": 2176, "time": 70.14 }, { "epoch": 1.68, "learning_rate": "4.0167e-05", "loss": 0.6928, "slid_loss": 0.7216, "step": 2177, "time": 71.41 }, { "epoch": 1.69, "learning_rate": "4.0158e-05", "loss": 0.6948, "slid_loss": 0.7215, "step": 2178, "time": 70.92 }, { "epoch": 1.69, "learning_rate": "4.0150e-05", "loss": 0.7482, "slid_loss": 0.7219, "step": 2179, "time": 71.93 }, { "epoch": 1.69, "learning_rate": "4.0141e-05", "loss": 0.7743, "slid_loss": 0.7222, "step": 2180, "time": 71.48 }, { "epoch": 1.69, "learning_rate": "4.0133e-05", "loss": 0.7159, "slid_loss": 0.7221, "step": 2181, "time": 71.5 }, { "epoch": 1.69, "learning_rate": "4.0125e-05", "loss": 0.6849, "slid_loss": 0.7218, "step": 2182, "time": 71.7 }, { "epoch": 1.69, "learning_rate": "4.0116e-05", "loss": 0.7124, "slid_loss": 0.722, "step": 2183, "time": 70.94 }, { "epoch": 1.69, "learning_rate": "4.0108e-05", "loss": 0.6989, "slid_loss": 0.7218, "step": 2184, "time": 71.55 }, { "epoch": 1.69, "learning_rate": "4.0099e-05", "loss": 0.7403, "slid_loss": 0.7221, "step": 2185, "time": 69.71 }, { "epoch": 1.69, "learning_rate": "4.0091e-05", "loss": 0.7132, "slid_loss": 0.722, "step": 2186, "time": 72.25 }, { "epoch": 1.69, "learning_rate": "4.0082e-05", "loss": 0.7507, "slid_loss": 0.7223, "step": 2187, "time": 72.03 }, { "epoch": 1.69, "learning_rate": "4.0074e-05", "loss": 0.7198, "slid_loss": 0.7223, "step": 2188, "time": 72.25 }, { "epoch": 1.69, "learning_rate": "4.0065e-05", "loss": 0.702, "slid_loss": 0.7226, "step": 2189, "time": 71.25 }, { "epoch": 1.69, "learning_rate": "4.0057e-05", "loss": 0.7148, "slid_loss": 0.7226, "step": 2190, "time": 73.67 }, { "epoch": 1.7, "learning_rate": "4.0048e-05", "loss": 0.7427, "slid_loss": 0.7228, "step": 2191, "time": 72.32 }, { "epoch": 1.7, "learning_rate": "4.0040e-05", "loss": 0.7184, "slid_loss": 0.723, "step": 2192, "time": 71.1 }, { "epoch": 1.7, "learning_rate": "4.0031e-05", "loss": 0.702, "slid_loss": 0.7229, "step": 2193, "time": 73.85 }, { "epoch": 1.7, "learning_rate": "4.0023e-05", "loss": 0.6989, "slid_loss": 0.723, "step": 2194, "time": 71.72 }, { "epoch": 1.7, "learning_rate": "4.0014e-05", "loss": 0.7217, "slid_loss": 0.7228, "step": 2195, "time": 71.22 }, { "epoch": 1.7, "learning_rate": "4.0006e-05", "loss": 0.7236, "slid_loss": 0.723, "step": 2196, "time": 71.82 }, { "epoch": 1.7, "learning_rate": "3.9997e-05", "loss": 0.721, "slid_loss": 0.7231, "step": 2197, "time": 73.06 }, { "epoch": 1.7, "learning_rate": "3.9989e-05", "loss": 0.6771, "slid_loss": 0.7226, "step": 2198, "time": 70.87 }, { "epoch": 1.7, "learning_rate": "3.9980e-05", "loss": 0.721, "slid_loss": 0.7221, "step": 2199, "time": 72.5 }, { "epoch": 1.7, "learning_rate": "3.9972e-05", "loss": 0.7091, "slid_loss": 0.7216, "step": 2200, "time": 70.58 }, { "epoch": 1.7, "learning_rate": "3.9963e-05", "loss": 0.7174, "slid_loss": 0.7209, "step": 2201, "time": 838.17 }, { "epoch": 1.7, "learning_rate": "3.9955e-05", "loss": 0.7352, "slid_loss": 0.7208, "step": 2202, "time": 73.99 }, { "epoch": 1.7, "learning_rate": "3.9946e-05", "loss": 0.7368, "slid_loss": 0.7212, "step": 2203, "time": 72.71 }, { "epoch": 1.71, "learning_rate": "3.9938e-05", "loss": 0.7221, "slid_loss": 0.7213, "step": 2204, "time": 71.28 }, { "epoch": 1.71, "learning_rate": "3.9929e-05", "loss": 0.7646, "slid_loss": 0.7216, "step": 2205, "time": 71.87 }, { "epoch": 1.71, "learning_rate": "3.9920e-05", "loss": 0.7404, "slid_loss": 0.7215, "step": 2206, "time": 70.59 }, { "epoch": 1.71, "learning_rate": "3.9912e-05", "loss": 0.7003, "slid_loss": 0.721, "step": 2207, "time": 72.12 }, { "epoch": 1.71, "learning_rate": "3.9903e-05", "loss": 0.6821, "slid_loss": 0.7209, "step": 2208, "time": 72.01 }, { "epoch": 1.71, "learning_rate": "3.9895e-05", "loss": 0.7245, "slid_loss": 0.721, "step": 2209, "time": 71.35 }, { "epoch": 1.71, "learning_rate": "3.9886e-05", "loss": 0.7219, "slid_loss": 0.7213, "step": 2210, "time": 73.23 }, { "epoch": 1.71, "learning_rate": "3.9878e-05", "loss": 0.6882, "slid_loss": 0.7213, "step": 2211, "time": 71.34 }, { "epoch": 1.71, "learning_rate": "3.9869e-05", "loss": 0.7352, "slid_loss": 0.7213, "step": 2212, "time": 72.42 }, { "epoch": 1.71, "learning_rate": "3.9861e-05", "loss": 0.712, "slid_loss": 0.7208, "step": 2213, "time": 72.63 }, { "epoch": 1.71, "learning_rate": "3.9852e-05", "loss": 0.6962, "slid_loss": 0.7204, "step": 2214, "time": 71.33 }, { "epoch": 1.71, "learning_rate": "3.9844e-05", "loss": 0.7432, "slid_loss": 0.7206, "step": 2215, "time": 70.64 }, { "epoch": 1.71, "learning_rate": "3.9835e-05", "loss": 0.7506, "slid_loss": 0.7207, "step": 2216, "time": 72.48 }, { "epoch": 1.72, "learning_rate": "3.9826e-05", "loss": 0.7212, "slid_loss": 0.7207, "step": 2217, "time": 121.16 }, { "epoch": 1.72, "learning_rate": "3.9818e-05", "loss": 0.7543, "slid_loss": 0.7214, "step": 2218, "time": 71.09 }, { "epoch": 1.72, "learning_rate": "3.9809e-05", "loss": 0.7202, "slid_loss": 0.7212, "step": 2219, "time": 98.75 }, { "epoch": 1.72, "learning_rate": "3.9801e-05", "loss": 0.7516, "slid_loss": 0.7217, "step": 2220, "time": 163.53 }, { "epoch": 1.72, "learning_rate": "3.9792e-05", "loss": 0.7622, "slid_loss": 0.7224, "step": 2221, "time": 160.99 }, { "epoch": 1.72, "learning_rate": "3.9784e-05", "loss": 0.7194, "slid_loss": 0.7223, "step": 2222, "time": 159.0 }, { "epoch": 1.72, "learning_rate": "3.9775e-05", "loss": 0.7203, "slid_loss": 0.7225, "step": 2223, "time": 133.34 }, { "epoch": 1.72, "learning_rate": "3.9767e-05", "loss": 0.6915, "slid_loss": 0.7223, "step": 2224, "time": 150.29 }, { "epoch": 1.72, "learning_rate": "3.9758e-05", "loss": 0.724, "slid_loss": 0.7223, "step": 2225, "time": 173.24 }, { "epoch": 1.72, "learning_rate": "3.9749e-05", "loss": 0.7248, "slid_loss": 0.7222, "step": 2226, "time": 173.78 }, { "epoch": 1.72, "learning_rate": "3.9741e-05", "loss": 0.6967, "slid_loss": 0.7222, "step": 2227, "time": 122.98 }, { "epoch": 1.72, "learning_rate": "3.9732e-05", "loss": 0.719, "slid_loss": 0.7223, "step": 2228, "time": 155.88 }, { "epoch": 1.72, "learning_rate": "3.9724e-05", "loss": 0.7127, "slid_loss": 0.722, "step": 2229, "time": 110.94 }, { "epoch": 1.73, "learning_rate": "3.9715e-05", "loss": 0.7391, "slid_loss": 0.7221, "step": 2230, "time": 135.64 }, { "epoch": 1.73, "learning_rate": "3.9706e-05", "loss": 0.7168, "slid_loss": 0.7223, "step": 2231, "time": 96.47 }, { "epoch": 1.73, "learning_rate": "3.9698e-05", "loss": 0.7218, "slid_loss": 0.7221, "step": 2232, "time": 82.92 }, { "epoch": 1.73, "learning_rate": "3.9689e-05", "loss": 0.715, "slid_loss": 0.7216, "step": 2233, "time": 83.0 }, { "epoch": 1.73, "learning_rate": "3.9681e-05", "loss": 0.6892, "slid_loss": 0.7213, "step": 2234, "time": 71.56 }, { "epoch": 1.73, "learning_rate": "3.9672e-05", "loss": 0.7102, "slid_loss": 0.721, "step": 2235, "time": 73.43 }, { "epoch": 1.73, "learning_rate": "3.9663e-05", "loss": 0.7027, "slid_loss": 0.7209, "step": 2236, "time": 72.81 }, { "epoch": 1.73, "learning_rate": "3.9655e-05", "loss": 0.74, "slid_loss": 0.7208, "step": 2237, "time": 70.88 }, { "epoch": 1.73, "learning_rate": "3.9646e-05", "loss": 0.7164, "slid_loss": 0.7209, "step": 2238, "time": 70.81 }, { "epoch": 1.73, "learning_rate": "3.9638e-05", "loss": 0.7327, "slid_loss": 0.7211, "step": 2239, "time": 73.79 }, { "epoch": 1.73, "learning_rate": "3.9629e-05", "loss": 0.7431, "slid_loss": 0.721, "step": 2240, "time": 70.82 }, { "epoch": 1.73, "learning_rate": "3.9620e-05", "loss": 0.7185, "slid_loss": 0.7207, "step": 2241, "time": 72.18 }, { "epoch": 1.73, "learning_rate": "3.9612e-05", "loss": 0.7169, "slid_loss": 0.7207, "step": 2242, "time": 73.6 }, { "epoch": 1.74, "learning_rate": "3.9603e-05", "loss": 0.7301, "slid_loss": 0.7211, "step": 2243, "time": 71.58 }, { "epoch": 1.74, "learning_rate": "3.9595e-05", "loss": 0.6999, "slid_loss": 0.7211, "step": 2244, "time": 72.3 }, { "epoch": 1.74, "learning_rate": "3.9586e-05", "loss": 0.6847, "slid_loss": 0.7208, "step": 2245, "time": 70.59 }, { "epoch": 1.74, "learning_rate": "3.9577e-05", "loss": 0.7458, "slid_loss": 0.7211, "step": 2246, "time": 70.9 }, { "epoch": 1.74, "learning_rate": "3.9569e-05", "loss": 0.6909, "slid_loss": 0.7209, "step": 2247, "time": 71.97 }, { "epoch": 1.74, "learning_rate": "3.9560e-05", "loss": 0.7595, "slid_loss": 0.7211, "step": 2248, "time": 71.17 }, { "epoch": 1.74, "learning_rate": "3.9551e-05", "loss": 0.7387, "slid_loss": 0.7214, "step": 2249, "time": 70.86 }, { "epoch": 1.74, "learning_rate": "3.9543e-05", "loss": 0.7132, "slid_loss": 0.7217, "step": 2250, "time": 71.4 }, { "epoch": 1.74, "learning_rate": "3.9534e-05", "loss": 0.697, "slid_loss": 0.7212, "step": 2251, "time": 71.18 }, { "epoch": 1.74, "learning_rate": "3.9526e-05", "loss": 0.7607, "slid_loss": 0.7214, "step": 2252, "time": 73.21 }, { "epoch": 1.74, "learning_rate": "3.9517e-05", "loss": 0.7188, "slid_loss": 0.7213, "step": 2253, "time": 70.27 }, { "epoch": 1.74, "learning_rate": "3.9508e-05", "loss": 0.6954, "slid_loss": 0.7208, "step": 2254, "time": 70.93 }, { "epoch": 1.74, "learning_rate": "3.9500e-05", "loss": 0.7222, "slid_loss": 0.721, "step": 2255, "time": 71.71 }, { "epoch": 1.75, "learning_rate": "3.9491e-05", "loss": 0.7513, "slid_loss": 0.7212, "step": 2256, "time": 73.48 }, { "epoch": 1.75, "learning_rate": "3.9482e-05", "loss": 0.7219, "slid_loss": 0.7211, "step": 2257, "time": 71.45 }, { "epoch": 1.75, "learning_rate": "3.9474e-05", "loss": 0.7469, "slid_loss": 0.7208, "step": 2258, "time": 70.62 }, { "epoch": 1.75, "learning_rate": "3.9465e-05", "loss": 0.7251, "slid_loss": 0.7205, "step": 2259, "time": 71.87 }, { "epoch": 1.75, "learning_rate": "3.9456e-05", "loss": 0.7164, "slid_loss": 0.7205, "step": 2260, "time": 71.72 }, { "epoch": 1.75, "learning_rate": "3.9448e-05", "loss": 0.7024, "slid_loss": 0.7203, "step": 2261, "time": 72.45 }, { "epoch": 1.75, "learning_rate": "3.9439e-05", "loss": 0.7189, "slid_loss": 0.7205, "step": 2262, "time": 71.6 }, { "epoch": 1.75, "learning_rate": "3.9430e-05", "loss": 0.7578, "slid_loss": 0.7211, "step": 2263, "time": 70.15 }, { "epoch": 1.75, "learning_rate": "3.9422e-05", "loss": 0.7221, "slid_loss": 0.7211, "step": 2264, "time": 71.95 }, { "epoch": 1.75, "learning_rate": "3.9413e-05", "loss": 0.7202, "slid_loss": 0.7212, "step": 2265, "time": 71.17 }, { "epoch": 1.75, "learning_rate": "3.9404e-05", "loss": 0.7263, "slid_loss": 0.7216, "step": 2266, "time": 71.39 }, { "epoch": 1.75, "learning_rate": "3.9396e-05", "loss": 0.6912, "slid_loss": 0.7215, "step": 2267, "time": 71.87 }, { "epoch": 1.75, "learning_rate": "3.9387e-05", "loss": 0.7293, "slid_loss": 0.7218, "step": 2268, "time": 72.78 }, { "epoch": 1.76, "learning_rate": "3.9378e-05", "loss": 0.7298, "slid_loss": 0.7217, "step": 2269, "time": 72.1 }, { "epoch": 1.76, "learning_rate": "3.9370e-05", "loss": 0.7192, "slid_loss": 0.7221, "step": 2270, "time": 71.49 }, { "epoch": 1.76, "learning_rate": "3.9361e-05", "loss": 0.7501, "slid_loss": 0.7223, "step": 2271, "time": 71.17 }, { "epoch": 1.76, "learning_rate": "3.9352e-05", "loss": 0.6885, "slid_loss": 0.7216, "step": 2272, "time": 72.59 }, { "epoch": 1.76, "learning_rate": "3.9344e-05", "loss": 0.7097, "slid_loss": 0.7217, "step": 2273, "time": 71.4 }, { "epoch": 1.76, "learning_rate": "3.9335e-05", "loss": 0.7235, "slid_loss": 0.7213, "step": 2274, "time": 72.07 }, { "epoch": 1.76, "learning_rate": "3.9326e-05", "loss": 0.7466, "slid_loss": 0.7214, "step": 2275, "time": 72.88 }, { "epoch": 1.76, "learning_rate": "3.9318e-05", "loss": 0.6927, "slid_loss": 0.721, "step": 2276, "time": 71.89 }, { "epoch": 1.76, "learning_rate": "3.9309e-05", "loss": 0.7005, "slid_loss": 0.721, "step": 2277, "time": 73.59 }, { "epoch": 1.76, "learning_rate": "3.9300e-05", "loss": 0.6758, "slid_loss": 0.7209, "step": 2278, "time": 71.49 }, { "epoch": 1.76, "learning_rate": "3.9291e-05", "loss": 0.7374, "slid_loss": 0.7207, "step": 2279, "time": 70.13 }, { "epoch": 1.76, "learning_rate": "3.9283e-05", "loss": 0.7261, "slid_loss": 0.7203, "step": 2280, "time": 70.62 }, { "epoch": 1.76, "learning_rate": "3.9274e-05", "loss": 0.7136, "slid_loss": 0.7202, "step": 2281, "time": 73.47 }, { "epoch": 1.77, "learning_rate": "3.9265e-05", "loss": 0.716, "slid_loss": 0.7205, "step": 2282, "time": 72.78 }, { "epoch": 1.77, "learning_rate": "3.9257e-05", "loss": 0.7211, "slid_loss": 0.7206, "step": 2283, "time": 71.6 }, { "epoch": 1.77, "learning_rate": "3.9248e-05", "loss": 0.6866, "slid_loss": 0.7205, "step": 2284, "time": 71.44 }, { "epoch": 1.77, "learning_rate": "3.9239e-05", "loss": 0.6618, "slid_loss": 0.7197, "step": 2285, "time": 71.22 }, { "epoch": 1.77, "learning_rate": "3.9230e-05", "loss": 0.72, "slid_loss": 0.7198, "step": 2286, "time": 72.13 }, { "epoch": 1.77, "learning_rate": "3.9222e-05", "loss": 0.6891, "slid_loss": 0.7192, "step": 2287, "time": 70.7 }, { "epoch": 1.77, "learning_rate": "3.9213e-05", "loss": 0.739, "slid_loss": 0.7194, "step": 2288, "time": 71.38 }, { "epoch": 1.77, "learning_rate": "3.9204e-05", "loss": 0.7429, "slid_loss": 0.7198, "step": 2289, "time": 71.66 }, { "epoch": 1.77, "learning_rate": "3.9196e-05", "loss": 0.7515, "slid_loss": 0.7201, "step": 2290, "time": 72.44 }, { "epoch": 1.77, "learning_rate": "3.9187e-05", "loss": 0.7287, "slid_loss": 0.72, "step": 2291, "time": 71.43 }, { "epoch": 1.77, "learning_rate": "3.9178e-05", "loss": 0.7259, "slid_loss": 0.7201, "step": 2292, "time": 71.47 }, { "epoch": 1.77, "learning_rate": "3.9169e-05", "loss": 0.7254, "slid_loss": 0.7203, "step": 2293, "time": 72.05 }, { "epoch": 1.77, "learning_rate": "3.9161e-05", "loss": 0.6933, "slid_loss": 0.7203, "step": 2294, "time": 71.27 }, { "epoch": 1.78, "learning_rate": "3.9152e-05", "loss": 0.696, "slid_loss": 0.72, "step": 2295, "time": 72.72 }, { "epoch": 1.78, "learning_rate": "3.9143e-05", "loss": 0.7213, "slid_loss": 0.72, "step": 2296, "time": 70.38 }, { "epoch": 1.78, "learning_rate": "3.9134e-05", "loss": 0.7688, "slid_loss": 0.7205, "step": 2297, "time": 71.65 }, { "epoch": 1.78, "learning_rate": "3.9126e-05", "loss": 0.7, "slid_loss": 0.7207, "step": 2298, "time": 70.88 }, { "epoch": 1.78, "learning_rate": "3.9117e-05", "loss": 0.7561, "slid_loss": 0.721, "step": 2299, "time": 71.77 }, { "epoch": 1.78, "learning_rate": "3.9108e-05", "loss": 0.7436, "slid_loss": 0.7214, "step": 2300, "time": 70.37 }, { "epoch": 1.78, "learning_rate": "3.9100e-05", "loss": 0.7248, "slid_loss": 0.7215, "step": 2301, "time": 70.77 }, { "epoch": 1.78, "learning_rate": "3.9091e-05", "loss": 0.7052, "slid_loss": 0.7212, "step": 2302, "time": 72.01 }, { "epoch": 1.78, "learning_rate": "3.9082e-05", "loss": 0.7353, "slid_loss": 0.7211, "step": 2303, "time": 73.1 }, { "epoch": 1.78, "learning_rate": "3.9073e-05", "loss": 0.7088, "slid_loss": 0.721, "step": 2304, "time": 71.61 }, { "epoch": 1.78, "learning_rate": "3.9064e-05", "loss": 0.6991, "slid_loss": 0.7204, "step": 2305, "time": 71.64 }, { "epoch": 1.78, "learning_rate": "3.9056e-05", "loss": 0.6939, "slid_loss": 0.7199, "step": 2306, "time": 73.49 }, { "epoch": 1.78, "learning_rate": "3.9047e-05", "loss": 0.6867, "slid_loss": 0.7198, "step": 2307, "time": 71.69 }, { "epoch": 1.79, "learning_rate": "3.9038e-05", "loss": 0.71, "slid_loss": 0.72, "step": 2308, "time": 71.5 }, { "epoch": 1.79, "learning_rate": "3.9029e-05", "loss": 0.7335, "slid_loss": 0.7201, "step": 2309, "time": 73.33 }, { "epoch": 1.79, "learning_rate": "3.9021e-05", "loss": 0.7225, "slid_loss": 0.7201, "step": 2310, "time": 70.67 }, { "epoch": 1.79, "learning_rate": "3.9012e-05", "loss": 0.7409, "slid_loss": 0.7207, "step": 2311, "time": 71.7 }, { "epoch": 1.79, "learning_rate": "3.9003e-05", "loss": 0.7063, "slid_loss": 0.7204, "step": 2312, "time": 71.88 }, { "epoch": 1.79, "learning_rate": "3.8994e-05", "loss": 0.7011, "slid_loss": 0.7203, "step": 2313, "time": 71.36 }, { "epoch": 1.79, "learning_rate": "3.8986e-05", "loss": 0.6926, "slid_loss": 0.7202, "step": 2314, "time": 72.12 }, { "epoch": 1.79, "learning_rate": "3.8977e-05", "loss": 0.741, "slid_loss": 0.7202, "step": 2315, "time": 72.47 }, { "epoch": 1.79, "learning_rate": "3.8968e-05", "loss": 0.7222, "slid_loss": 0.7199, "step": 2316, "time": 70.99 }, { "epoch": 1.79, "learning_rate": "3.8959e-05", "loss": 0.7052, "slid_loss": 0.7198, "step": 2317, "time": 71.62 }, { "epoch": 1.79, "learning_rate": "3.8950e-05", "loss": 0.7085, "slid_loss": 0.7193, "step": 2318, "time": 71.53 }, { "epoch": 1.79, "learning_rate": "3.8942e-05", "loss": 0.6949, "slid_loss": 0.719, "step": 2319, "time": 71.02 }, { "epoch": 1.79, "learning_rate": "3.8933e-05", "loss": 0.719, "slid_loss": 0.7187, "step": 2320, "time": 71.81 }, { "epoch": 1.8, "learning_rate": "3.8924e-05", "loss": 0.7248, "slid_loss": 0.7183, "step": 2321, "time": 71.23 }, { "epoch": 1.8, "learning_rate": "3.8915e-05", "loss": 0.706, "slid_loss": 0.7182, "step": 2322, "time": 71.31 }, { "epoch": 1.8, "learning_rate": "3.8906e-05", "loss": 0.7077, "slid_loss": 0.7181, "step": 2323, "time": 71.51 }, { "epoch": 1.8, "learning_rate": "3.8898e-05", "loss": 0.7088, "slid_loss": 0.7183, "step": 2324, "time": 71.24 }, { "epoch": 1.8, "learning_rate": "3.8889e-05", "loss": 0.6896, "slid_loss": 0.7179, "step": 2325, "time": 71.6 }, { "epoch": 1.8, "learning_rate": "3.8880e-05", "loss": 0.6998, "slid_loss": 0.7177, "step": 2326, "time": 73.1 }, { "epoch": 1.8, "learning_rate": "3.8871e-05", "loss": 0.7035, "slid_loss": 0.7177, "step": 2327, "time": 71.66 }, { "epoch": 1.8, "learning_rate": "3.8862e-05", "loss": 0.7248, "slid_loss": 0.7178, "step": 2328, "time": 71.76 }, { "epoch": 1.8, "learning_rate": "3.8854e-05", "loss": 0.7542, "slid_loss": 0.7182, "step": 2329, "time": 71.21 }, { "epoch": 1.8, "learning_rate": "3.8845e-05", "loss": 0.7441, "slid_loss": 0.7183, "step": 2330, "time": 71.53 }, { "epoch": 1.8, "learning_rate": "3.8836e-05", "loss": 0.69, "slid_loss": 0.718, "step": 2331, "time": 70.73 }, { "epoch": 1.8, "learning_rate": "3.8827e-05", "loss": 0.7248, "slid_loss": 0.718, "step": 2332, "time": 71.65 }, { "epoch": 1.81, "learning_rate": "3.8818e-05", "loss": 0.7271, "slid_loss": 0.7181, "step": 2333, "time": 71.92 }, { "epoch": 1.81, "learning_rate": "3.8810e-05", "loss": 0.7256, "slid_loss": 0.7185, "step": 2334, "time": 71.12 }, { "epoch": 1.81, "learning_rate": "3.8801e-05", "loss": 0.7427, "slid_loss": 0.7188, "step": 2335, "time": 70.81 }, { "epoch": 1.81, "learning_rate": "3.8792e-05", "loss": 0.721, "slid_loss": 0.719, "step": 2336, "time": 71.17 }, { "epoch": 1.81, "learning_rate": "3.8783e-05", "loss": 0.7116, "slid_loss": 0.7187, "step": 2337, "time": 71.36 }, { "epoch": 1.81, "learning_rate": "3.8774e-05", "loss": 0.7008, "slid_loss": 0.7186, "step": 2338, "time": 71.71 }, { "epoch": 1.81, "learning_rate": "3.8765e-05", "loss": 0.6977, "slid_loss": 0.7182, "step": 2339, "time": 71.62 }, { "epoch": 1.81, "learning_rate": "3.8757e-05", "loss": 0.7163, "slid_loss": 0.718, "step": 2340, "time": 71.49 }, { "epoch": 1.81, "learning_rate": "3.8748e-05", "loss": 0.7119, "slid_loss": 0.7179, "step": 2341, "time": 72.24 }, { "epoch": 1.81, "learning_rate": "3.8739e-05", "loss": 0.7605, "slid_loss": 0.7183, "step": 2342, "time": 71.86 }, { "epoch": 1.81, "learning_rate": "3.8730e-05", "loss": 0.6872, "slid_loss": 0.7179, "step": 2343, "time": 73.48 }, { "epoch": 1.81, "learning_rate": "3.8721e-05", "loss": 0.7333, "slid_loss": 0.7182, "step": 2344, "time": 71.92 }, { "epoch": 1.81, "learning_rate": "3.8712e-05", "loss": 0.7433, "slid_loss": 0.7188, "step": 2345, "time": 70.73 }, { "epoch": 1.82, "learning_rate": "3.8704e-05", "loss": 0.7588, "slid_loss": 0.7189, "step": 2346, "time": 72.09 }, { "epoch": 1.82, "learning_rate": "3.8695e-05", "loss": 0.6839, "slid_loss": 0.7189, "step": 2347, "time": 71.19 }, { "epoch": 1.82, "learning_rate": "3.8686e-05", "loss": 0.7055, "slid_loss": 0.7183, "step": 2348, "time": 71.13 }, { "epoch": 1.82, "learning_rate": "3.8677e-05", "loss": 0.7207, "slid_loss": 0.7182, "step": 2349, "time": 70.88 }, { "epoch": 1.82, "learning_rate": "3.8668e-05", "loss": 0.7369, "slid_loss": 0.7184, "step": 2350, "time": 71.69 }, { "epoch": 1.82, "learning_rate": "3.8659e-05", "loss": 0.7054, "slid_loss": 0.7185, "step": 2351, "time": 71.98 }, { "epoch": 1.82, "learning_rate": "3.8650e-05", "loss": 0.6916, "slid_loss": 0.7178, "step": 2352, "time": 72.16 }, { "epoch": 1.82, "learning_rate": "3.8642e-05", "loss": 0.7047, "slid_loss": 0.7176, "step": 2353, "time": 72.83 }, { "epoch": 1.82, "learning_rate": "3.8633e-05", "loss": 0.7213, "slid_loss": 0.7179, "step": 2354, "time": 70.09 }, { "epoch": 1.82, "learning_rate": "3.8624e-05", "loss": 0.7318, "slid_loss": 0.718, "step": 2355, "time": 71.49 }, { "epoch": 1.82, "learning_rate": "3.8615e-05", "loss": 0.7252, "slid_loss": 0.7177, "step": 2356, "time": 71.94 }, { "epoch": 1.82, "learning_rate": "3.8606e-05", "loss": 0.709, "slid_loss": 0.7176, "step": 2357, "time": 70.93 }, { "epoch": 1.82, "learning_rate": "3.8597e-05", "loss": 0.7198, "slid_loss": 0.7173, "step": 2358, "time": 71.44 }, { "epoch": 1.83, "learning_rate": "3.8588e-05", "loss": 0.6965, "slid_loss": 0.7171, "step": 2359, "time": 72.68 }, { "epoch": 1.83, "learning_rate": "3.8580e-05", "loss": 0.7241, "slid_loss": 0.7171, "step": 2360, "time": 71.13 }, { "epoch": 1.83, "learning_rate": "3.8571e-05", "loss": 0.699, "slid_loss": 0.7171, "step": 2361, "time": 71.87 }, { "epoch": 1.83, "learning_rate": "3.8562e-05", "loss": 0.7622, "slid_loss": 0.7175, "step": 2362, "time": 70.41 }, { "epoch": 1.83, "learning_rate": "3.8553e-05", "loss": 0.722, "slid_loss": 0.7172, "step": 2363, "time": 72.75 }, { "epoch": 1.83, "learning_rate": "3.8544e-05", "loss": 0.7436, "slid_loss": 0.7174, "step": 2364, "time": 71.92 }, { "epoch": 1.83, "learning_rate": "3.8535e-05", "loss": 0.6823, "slid_loss": 0.717, "step": 2365, "time": 70.78 }, { "epoch": 1.83, "learning_rate": "3.8526e-05", "loss": 0.7328, "slid_loss": 0.7171, "step": 2366, "time": 71.25 }, { "epoch": 1.83, "learning_rate": "3.8517e-05", "loss": 0.6985, "slid_loss": 0.7171, "step": 2367, "time": 72.16 }, { "epoch": 1.83, "learning_rate": "3.8508e-05", "loss": 0.67, "slid_loss": 0.7166, "step": 2368, "time": 70.85 }, { "epoch": 1.83, "learning_rate": "3.8500e-05", "loss": 0.7272, "slid_loss": 0.7165, "step": 2369, "time": 73.03 }, { "epoch": 1.83, "learning_rate": "3.8491e-05", "loss": 0.7296, "slid_loss": 0.7166, "step": 2370, "time": 70.53 }, { "epoch": 1.83, "learning_rate": "3.8482e-05", "loss": 0.7042, "slid_loss": 0.7162, "step": 2371, "time": 73.16 }, { "epoch": 1.84, "learning_rate": "3.8473e-05", "loss": 0.7182, "slid_loss": 0.7165, "step": 2372, "time": 71.22 }, { "epoch": 1.84, "learning_rate": "3.8464e-05", "loss": 0.6955, "slid_loss": 0.7163, "step": 2373, "time": 72.34 }, { "epoch": 1.84, "learning_rate": "3.8455e-05", "loss": 0.6882, "slid_loss": 0.716, "step": 2374, "time": 72.11 }, { "epoch": 1.84, "learning_rate": "3.8446e-05", "loss": 0.7199, "slid_loss": 0.7157, "step": 2375, "time": 71.55 }, { "epoch": 1.84, "learning_rate": "3.8437e-05", "loss": 0.7057, "slid_loss": 0.7158, "step": 2376, "time": 96.84 }, { "epoch": 1.84, "learning_rate": "3.8428e-05", "loss": 0.6945, "slid_loss": 0.7158, "step": 2377, "time": 109.36 }, { "epoch": 1.84, "learning_rate": "3.8419e-05", "loss": 0.7407, "slid_loss": 0.7164, "step": 2378, "time": 85.06 }, { "epoch": 1.84, "learning_rate": "3.8411e-05", "loss": 0.7346, "slid_loss": 0.7164, "step": 2379, "time": 111.13 }, { "epoch": 1.84, "learning_rate": "3.8402e-05", "loss": 0.6989, "slid_loss": 0.7161, "step": 2380, "time": 163.46 }, { "epoch": 1.84, "learning_rate": "3.8393e-05", "loss": 0.7241, "slid_loss": 0.7162, "step": 2381, "time": 137.36 }, { "epoch": 1.84, "learning_rate": "3.8384e-05", "loss": 0.7462, "slid_loss": 0.7165, "step": 2382, "time": 149.55 }, { "epoch": 1.84, "learning_rate": "3.8375e-05", "loss": 0.7185, "slid_loss": 0.7165, "step": 2383, "time": 189.36 }, { "epoch": 1.84, "learning_rate": "3.8366e-05", "loss": 0.6845, "slid_loss": 0.7165, "step": 2384, "time": 162.47 }, { "epoch": 1.85, "learning_rate": "3.8357e-05", "loss": 0.7097, "slid_loss": 0.717, "step": 2385, "time": 147.96 }, { "epoch": 1.85, "learning_rate": "3.8348e-05", "loss": 0.727, "slid_loss": 0.717, "step": 2386, "time": 159.31 }, { "epoch": 1.85, "learning_rate": "3.8339e-05", "loss": 0.7046, "slid_loss": 0.7172, "step": 2387, "time": 122.47 }, { "epoch": 1.85, "learning_rate": "3.8330e-05", "loss": 0.719, "slid_loss": 0.717, "step": 2388, "time": 154.11 }, { "epoch": 1.85, "learning_rate": "3.8321e-05", "loss": 0.7171, "slid_loss": 0.7167, "step": 2389, "time": 122.72 }, { "epoch": 1.85, "learning_rate": "3.8312e-05", "loss": 0.7043, "slid_loss": 0.7163, "step": 2390, "time": 83.57 }, { "epoch": 1.85, "learning_rate": "3.8303e-05", "loss": 0.7406, "slid_loss": 0.7164, "step": 2391, "time": 98.87 }, { "epoch": 1.85, "learning_rate": "3.8294e-05", "loss": 0.7108, "slid_loss": 0.7162, "step": 2392, "time": 97.07 }, { "epoch": 1.85, "learning_rate": "3.8286e-05", "loss": 0.7247, "slid_loss": 0.7162, "step": 2393, "time": 71.63 }, { "epoch": 1.85, "learning_rate": "3.8277e-05", "loss": 0.6727, "slid_loss": 0.716, "step": 2394, "time": 72.27 }, { "epoch": 1.85, "learning_rate": "3.8268e-05", "loss": 0.7072, "slid_loss": 0.7161, "step": 2395, "time": 71.54 }, { "epoch": 1.85, "learning_rate": "3.8259e-05", "loss": 0.7295, "slid_loss": 0.7162, "step": 2396, "time": 74.27 }, { "epoch": 1.85, "learning_rate": "3.8250e-05", "loss": 0.7355, "slid_loss": 0.7159, "step": 2397, "time": 71.53 }, { "epoch": 1.86, "learning_rate": "3.8241e-05", "loss": 0.7268, "slid_loss": 0.7161, "step": 2398, "time": 71.72 }, { "epoch": 1.86, "learning_rate": "3.8232e-05", "loss": 0.7015, "slid_loss": 0.7156, "step": 2399, "time": 72.86 }, { "epoch": 1.86, "learning_rate": "3.8223e-05", "loss": 0.7526, "slid_loss": 0.7157, "step": 2400, "time": 71.44 }, { "epoch": 1.86, "learning_rate": "3.8214e-05", "loss": 0.7176, "slid_loss": 0.7156, "step": 2401, "time": 756.34 }, { "epoch": 1.86, "learning_rate": "3.8205e-05", "loss": 0.6973, "slid_loss": 0.7155, "step": 2402, "time": 71.59 }, { "epoch": 1.86, "learning_rate": "3.8196e-05", "loss": 0.7104, "slid_loss": 0.7153, "step": 2403, "time": 71.32 }, { "epoch": 1.86, "learning_rate": "3.8187e-05", "loss": 0.6989, "slid_loss": 0.7152, "step": 2404, "time": 71.2 }, { "epoch": 1.86, "learning_rate": "3.8178e-05", "loss": 0.7322, "slid_loss": 0.7155, "step": 2405, "time": 71.29 }, { "epoch": 1.86, "learning_rate": "3.8169e-05", "loss": 0.691, "slid_loss": 0.7155, "step": 2406, "time": 74.11 }, { "epoch": 1.86, "learning_rate": "3.8160e-05", "loss": 0.7249, "slid_loss": 0.7159, "step": 2407, "time": 70.89 }, { "epoch": 1.86, "learning_rate": "3.8151e-05", "loss": 0.6699, "slid_loss": 0.7155, "step": 2408, "time": 71.43 }, { "epoch": 1.86, "learning_rate": "3.8142e-05", "loss": 0.7477, "slid_loss": 0.7156, "step": 2409, "time": 70.32 }, { "epoch": 1.86, "learning_rate": "3.8133e-05", "loss": 0.7271, "slid_loss": 0.7157, "step": 2410, "time": 72.23 }, { "epoch": 1.87, "learning_rate": "3.8124e-05", "loss": 0.7291, "slid_loss": 0.7155, "step": 2411, "time": 71.94 }, { "epoch": 1.87, "learning_rate": "3.8115e-05", "loss": 0.7194, "slid_loss": 0.7157, "step": 2412, "time": 71.58 }, { "epoch": 1.87, "learning_rate": "3.8106e-05", "loss": 0.6977, "slid_loss": 0.7156, "step": 2413, "time": 70.45 }, { "epoch": 1.87, "learning_rate": "3.8097e-05", "loss": 0.6956, "slid_loss": 0.7157, "step": 2414, "time": 72.79 }, { "epoch": 1.87, "learning_rate": "3.8088e-05", "loss": 0.7289, "slid_loss": 0.7155, "step": 2415, "time": 72.67 }, { "epoch": 1.87, "learning_rate": "3.8079e-05", "loss": 0.7077, "slid_loss": 0.7154, "step": 2416, "time": 71.27 }, { "epoch": 1.87, "learning_rate": "3.8070e-05", "loss": 0.7104, "slid_loss": 0.7154, "step": 2417, "time": 71.67 }, { "epoch": 1.87, "learning_rate": "3.8061e-05", "loss": 0.6881, "slid_loss": 0.7152, "step": 2418, "time": 72.12 }, { "epoch": 1.87, "learning_rate": "3.8052e-05", "loss": 0.7112, "slid_loss": 0.7154, "step": 2419, "time": 72.31 }, { "epoch": 1.87, "learning_rate": "3.8043e-05", "loss": 0.7272, "slid_loss": 0.7155, "step": 2420, "time": 70.84 }, { "epoch": 1.87, "learning_rate": "3.8034e-05", "loss": 0.7164, "slid_loss": 0.7154, "step": 2421, "time": 71.14 }, { "epoch": 1.87, "learning_rate": "3.8025e-05", "loss": 0.6835, "slid_loss": 0.7152, "step": 2422, "time": 72.15 }, { "epoch": 1.87, "learning_rate": "3.8016e-05", "loss": 0.6868, "slid_loss": 0.715, "step": 2423, "time": 72.05 }, { "epoch": 1.88, "learning_rate": "3.8007e-05", "loss": 0.7499, "slid_loss": 0.7154, "step": 2424, "time": 71.97 }, { "epoch": 1.88, "learning_rate": "3.7998e-05", "loss": 0.7094, "slid_loss": 0.7156, "step": 2425, "time": 71.27 }, { "epoch": 1.88, "learning_rate": "3.7989e-05", "loss": 0.7248, "slid_loss": 0.7158, "step": 2426, "time": 72.39 }, { "epoch": 1.88, "learning_rate": "3.7980e-05", "loss": 0.7555, "slid_loss": 0.7163, "step": 2427, "time": 71.5 }, { "epoch": 1.88, "learning_rate": "3.7971e-05", "loss": 0.7132, "slid_loss": 0.7162, "step": 2428, "time": 72.96 }, { "epoch": 1.88, "learning_rate": "3.7962e-05", "loss": 0.7212, "slid_loss": 0.7159, "step": 2429, "time": 71.27 }, { "epoch": 1.88, "learning_rate": "3.7953e-05", "loss": 0.755, "slid_loss": 0.716, "step": 2430, "time": 72.03 }, { "epoch": 1.88, "learning_rate": "3.7944e-05", "loss": 0.7165, "slid_loss": 0.7163, "step": 2431, "time": 73.49 }, { "epoch": 1.88, "learning_rate": "3.7935e-05", "loss": 0.645, "slid_loss": 0.7155, "step": 2432, "time": 71.03 }, { "epoch": 1.88, "learning_rate": "3.7926e-05", "loss": 0.696, "slid_loss": 0.7152, "step": 2433, "time": 72.01 }, { "epoch": 1.88, "learning_rate": "3.7917e-05", "loss": 0.6881, "slid_loss": 0.7148, "step": 2434, "time": 73.35 }, { "epoch": 1.88, "learning_rate": "3.7908e-05", "loss": 0.7211, "slid_loss": 0.7146, "step": 2435, "time": 70.68 }, { "epoch": 1.88, "learning_rate": "3.7899e-05", "loss": 0.6666, "slid_loss": 0.714, "step": 2436, "time": 72.35 }, { "epoch": 1.89, "learning_rate": "3.7890e-05", "loss": 0.712, "slid_loss": 0.714, "step": 2437, "time": 71.54 }, { "epoch": 1.89, "learning_rate": "3.7881e-05", "loss": 0.7011, "slid_loss": 0.714, "step": 2438, "time": 71.37 }, { "epoch": 1.89, "learning_rate": "3.7872e-05", "loss": 0.6869, "slid_loss": 0.7139, "step": 2439, "time": 71.62 }, { "epoch": 1.89, "learning_rate": "3.7863e-05", "loss": 0.7003, "slid_loss": 0.7138, "step": 2440, "time": 71.53 }, { "epoch": 1.89, "learning_rate": "3.7854e-05", "loss": 0.6977, "slid_loss": 0.7136, "step": 2441, "time": 72.64 }, { "epoch": 1.89, "learning_rate": "3.7845e-05", "loss": 0.6874, "slid_loss": 0.7129, "step": 2442, "time": 71.11 }, { "epoch": 1.89, "learning_rate": "3.7836e-05", "loss": 0.7361, "slid_loss": 0.7134, "step": 2443, "time": 71.16 }, { "epoch": 1.89, "learning_rate": "3.7827e-05", "loss": 0.7246, "slid_loss": 0.7133, "step": 2444, "time": 71.48 }, { "epoch": 1.89, "learning_rate": "3.7818e-05", "loss": 0.705, "slid_loss": 0.7129, "step": 2445, "time": 72.42 }, { "epoch": 1.89, "learning_rate": "3.7809e-05", "loss": 0.7043, "slid_loss": 0.7124, "step": 2446, "time": 70.14 }, { "epoch": 1.89, "learning_rate": "3.7800e-05", "loss": 0.6957, "slid_loss": 0.7125, "step": 2447, "time": 71.22 }, { "epoch": 1.89, "learning_rate": "3.7791e-05", "loss": 0.7445, "slid_loss": 0.7129, "step": 2448, "time": 71.76 }, { "epoch": 1.89, "learning_rate": "3.7782e-05", "loss": 0.71, "slid_loss": 0.7128, "step": 2449, "time": 72.15 }, { "epoch": 1.9, "learning_rate": "3.7773e-05", "loss": 0.7313, "slid_loss": 0.7127, "step": 2450, "time": 71.41 }, { "epoch": 1.9, "learning_rate": "3.7764e-05", "loss": 0.6837, "slid_loss": 0.7125, "step": 2451, "time": 72.96 }, { "epoch": 1.9, "learning_rate": "3.7755e-05", "loss": 0.7284, "slid_loss": 0.7129, "step": 2452, "time": 71.89 }, { "epoch": 1.9, "learning_rate": "3.7745e-05", "loss": 0.7312, "slid_loss": 0.7131, "step": 2453, "time": 71.28 }, { "epoch": 1.9, "learning_rate": "3.7736e-05", "loss": 0.7094, "slid_loss": 0.713, "step": 2454, "time": 71.1 }, { "epoch": 1.9, "learning_rate": "3.7727e-05", "loss": 0.7211, "slid_loss": 0.7129, "step": 2455, "time": 72.62 }, { "epoch": 1.9, "learning_rate": "3.7718e-05", "loss": 0.7537, "slid_loss": 0.7132, "step": 2456, "time": 72.27 }, { "epoch": 1.9, "learning_rate": "3.7709e-05", "loss": 0.713, "slid_loss": 0.7132, "step": 2457, "time": 72.86 }, { "epoch": 1.9, "learning_rate": "3.7700e-05", "loss": 0.6949, "slid_loss": 0.713, "step": 2458, "time": 71.81 }, { "epoch": 1.9, "learning_rate": "3.7691e-05", "loss": 0.7288, "slid_loss": 0.7133, "step": 2459, "time": 70.52 }, { "epoch": 1.9, "learning_rate": "3.7682e-05", "loss": 0.7151, "slid_loss": 0.7132, "step": 2460, "time": 70.79 }, { "epoch": 1.9, "learning_rate": "3.7673e-05", "loss": 0.6982, "slid_loss": 0.7132, "step": 2461, "time": 72.71 }, { "epoch": 1.9, "learning_rate": "3.7664e-05", "loss": 0.7309, "slid_loss": 0.7129, "step": 2462, "time": 71.79 }, { "epoch": 1.91, "learning_rate": "3.7655e-05", "loss": 0.7071, "slid_loss": 0.7128, "step": 2463, "time": 71.88 }, { "epoch": 1.91, "learning_rate": "3.7646e-05", "loss": 0.6887, "slid_loss": 0.7122, "step": 2464, "time": 72.07 }, { "epoch": 1.91, "learning_rate": "3.7637e-05", "loss": 0.6764, "slid_loss": 0.7121, "step": 2465, "time": 72.39 }, { "epoch": 1.91, "learning_rate": "3.7628e-05", "loss": 0.7002, "slid_loss": 0.7118, "step": 2466, "time": 70.8 }, { "epoch": 1.91, "learning_rate": "3.7618e-05", "loss": 0.7482, "slid_loss": 0.7123, "step": 2467, "time": 70.55 }, { "epoch": 1.91, "learning_rate": "3.7609e-05", "loss": 0.7071, "slid_loss": 0.7127, "step": 2468, "time": 71.46 }, { "epoch": 1.91, "learning_rate": "3.7600e-05", "loss": 0.7069, "slid_loss": 0.7125, "step": 2469, "time": 72.12 }, { "epoch": 1.91, "learning_rate": "3.7591e-05", "loss": 0.7089, "slid_loss": 0.7123, "step": 2470, "time": 69.95 }, { "epoch": 1.91, "learning_rate": "3.7582e-05", "loss": 0.6957, "slid_loss": 0.7122, "step": 2471, "time": 72.89 }, { "epoch": 1.91, "learning_rate": "3.7573e-05", "loss": 0.6936, "slid_loss": 0.7119, "step": 2472, "time": 72.07 }, { "epoch": 1.91, "learning_rate": "3.7564e-05", "loss": 0.7039, "slid_loss": 0.712, "step": 2473, "time": 72.4 }, { "epoch": 1.91, "learning_rate": "3.7555e-05", "loss": 0.7185, "slid_loss": 0.7123, "step": 2474, "time": 70.57 }, { "epoch": 1.91, "learning_rate": "3.7546e-05", "loss": 0.7218, "slid_loss": 0.7124, "step": 2475, "time": 71.97 }, { "epoch": 1.92, "learning_rate": "3.7537e-05", "loss": 0.6875, "slid_loss": 0.7122, "step": 2476, "time": 73.18 }, { "epoch": 1.92, "learning_rate": "3.7528e-05", "loss": 0.6891, "slid_loss": 0.7121, "step": 2477, "time": 71.12 }, { "epoch": 1.92, "learning_rate": "3.7518e-05", "loss": 0.7125, "slid_loss": 0.7118, "step": 2478, "time": 71.76 }, { "epoch": 1.92, "learning_rate": "3.7509e-05", "loss": 0.7315, "slid_loss": 0.7118, "step": 2479, "time": 72.09 }, { "epoch": 1.92, "learning_rate": "3.7500e-05", "loss": 0.7084, "slid_loss": 0.7119, "step": 2480, "time": 70.81 }, { "epoch": 1.92, "learning_rate": "3.7491e-05", "loss": 0.7714, "slid_loss": 0.7124, "step": 2481, "time": 71.19 }, { "epoch": 1.92, "learning_rate": "3.7482e-05", "loss": 0.6832, "slid_loss": 0.7117, "step": 2482, "time": 73.84 }, { "epoch": 1.92, "learning_rate": "3.7473e-05", "loss": 0.6858, "slid_loss": 0.7114, "step": 2483, "time": 72.19 }, { "epoch": 1.92, "learning_rate": "3.7464e-05", "loss": 0.7177, "slid_loss": 0.7117, "step": 2484, "time": 72.65 }, { "epoch": 1.92, "learning_rate": "3.7455e-05", "loss": 0.6742, "slid_loss": 0.7114, "step": 2485, "time": 71.45 }, { "epoch": 1.92, "learning_rate": "3.7446e-05", "loss": 0.7071, "slid_loss": 0.7112, "step": 2486, "time": 71.19 }, { "epoch": 1.92, "learning_rate": "3.7436e-05", "loss": 0.7248, "slid_loss": 0.7114, "step": 2487, "time": 71.31 }, { "epoch": 1.92, "learning_rate": "3.7427e-05", "loss": 0.705, "slid_loss": 0.7113, "step": 2488, "time": 71.62 }, { "epoch": 1.93, "learning_rate": "3.7418e-05", "loss": 0.7038, "slid_loss": 0.7111, "step": 2489, "time": 71.32 }, { "epoch": 1.93, "learning_rate": "3.7409e-05", "loss": 0.7051, "slid_loss": 0.7111, "step": 2490, "time": 71.96 }, { "epoch": 1.93, "learning_rate": "3.7400e-05", "loss": 0.694, "slid_loss": 0.7107, "step": 2491, "time": 71.04 }, { "epoch": 1.93, "learning_rate": "3.7391e-05", "loss": 0.713, "slid_loss": 0.7107, "step": 2492, "time": 72.83 }, { "epoch": 1.93, "learning_rate": "3.7382e-05", "loss": 0.7314, "slid_loss": 0.7108, "step": 2493, "time": 71.6 }, { "epoch": 1.93, "learning_rate": "3.7373e-05", "loss": 0.6693, "slid_loss": 0.7107, "step": 2494, "time": 72.85 }, { "epoch": 1.93, "learning_rate": "3.7363e-05", "loss": 0.6933, "slid_loss": 0.7106, "step": 2495, "time": 72.01 }, { "epoch": 1.93, "learning_rate": "3.7354e-05", "loss": 0.6883, "slid_loss": 0.7102, "step": 2496, "time": 71.34 }, { "epoch": 1.93, "learning_rate": "3.7345e-05", "loss": 0.7111, "slid_loss": 0.7099, "step": 2497, "time": 71.47 }, { "epoch": 1.93, "learning_rate": "3.7336e-05", "loss": 0.7155, "slid_loss": 0.7098, "step": 2498, "time": 71.74 }, { "epoch": 1.93, "learning_rate": "3.7327e-05", "loss": 0.7075, "slid_loss": 0.7099, "step": 2499, "time": 71.36 }, { "epoch": 1.93, "learning_rate": "3.7318e-05", "loss": 0.6955, "slid_loss": 0.7093, "step": 2500, "time": 72.15 }, { "epoch": 1.94, "learning_rate": "3.7309e-05", "loss": 0.7058, "slid_loss": 0.7092, "step": 2501, "time": 72.4 }, { "epoch": 1.94, "learning_rate": "3.7299e-05", "loss": 0.7174, "slid_loss": 0.7094, "step": 2502, "time": 71.26 }, { "epoch": 1.94, "learning_rate": "3.7290e-05", "loss": 0.7024, "slid_loss": 0.7093, "step": 2503, "time": 71.46 }, { "epoch": 1.94, "learning_rate": "3.7281e-05", "loss": 0.755, "slid_loss": 0.7099, "step": 2504, "time": 71.9 }, { "epoch": 1.94, "learning_rate": "3.7272e-05", "loss": 0.7004, "slid_loss": 0.7095, "step": 2505, "time": 71.6 }, { "epoch": 1.94, "learning_rate": "3.7263e-05", "loss": 0.6825, "slid_loss": 0.7095, "step": 2506, "time": 71.15 }, { "epoch": 1.94, "learning_rate": "3.7254e-05", "loss": 0.7114, "slid_loss": 0.7093, "step": 2507, "time": 70.57 }, { "epoch": 1.94, "learning_rate": "3.7244e-05", "loss": 0.7355, "slid_loss": 0.71, "step": 2508, "time": 70.97 }, { "epoch": 1.94, "learning_rate": "3.7235e-05", "loss": 0.7057, "slid_loss": 0.7096, "step": 2509, "time": 72.12 }, { "epoch": 1.94, "learning_rate": "3.7226e-05", "loss": 0.7138, "slid_loss": 0.7094, "step": 2510, "time": 73.25 }, { "epoch": 1.94, "learning_rate": "3.7217e-05", "loss": 0.7149, "slid_loss": 0.7093, "step": 2511, "time": 70.84 }, { "epoch": 1.94, "learning_rate": "3.7208e-05", "loss": 0.6942, "slid_loss": 0.709, "step": 2512, "time": 71.66 }, { "epoch": 1.94, "learning_rate": "3.7199e-05", "loss": 0.6855, "slid_loss": 0.7089, "step": 2513, "time": 72.22 }, { "epoch": 1.95, "learning_rate": "3.7190e-05", "loss": 0.6935, "slid_loss": 0.7089, "step": 2514, "time": 71.6 }, { "epoch": 1.95, "learning_rate": "3.7180e-05", "loss": 0.6879, "slid_loss": 0.7085, "step": 2515, "time": 72.16 }, { "epoch": 1.95, "learning_rate": "3.7171e-05", "loss": 0.727, "slid_loss": 0.7087, "step": 2516, "time": 72.0 }, { "epoch": 1.95, "learning_rate": "3.7162e-05", "loss": 0.7031, "slid_loss": 0.7086, "step": 2517, "time": 70.36 }, { "epoch": 1.95, "learning_rate": "3.7153e-05", "loss": 0.6922, "slid_loss": 0.7086, "step": 2518, "time": 72.49 }, { "epoch": 1.95, "learning_rate": "3.7144e-05", "loss": 0.6725, "slid_loss": 0.7083, "step": 2519, "time": 71.24 }, { "epoch": 1.95, "learning_rate": "3.7134e-05", "loss": 0.7352, "slid_loss": 0.7083, "step": 2520, "time": 71.42 }, { "epoch": 1.95, "learning_rate": "3.7125e-05", "loss": 0.6989, "slid_loss": 0.7082, "step": 2521, "time": 71.83 }, { "epoch": 1.95, "learning_rate": "3.7116e-05", "loss": 0.684, "slid_loss": 0.7082, "step": 2522, "time": 72.64 }, { "epoch": 1.95, "learning_rate": "3.7107e-05", "loss": 0.6888, "slid_loss": 0.7082, "step": 2523, "time": 72.03 }, { "epoch": 1.95, "learning_rate": "3.7098e-05", "loss": 0.6755, "slid_loss": 0.7074, "step": 2524, "time": 73.28 }, { "epoch": 1.95, "learning_rate": "3.7089e-05", "loss": 0.7127, "slid_loss": 0.7075, "step": 2525, "time": 72.46 }, { "epoch": 1.95, "learning_rate": "3.7079e-05", "loss": 0.7117, "slid_loss": 0.7073, "step": 2526, "time": 71.86 }, { "epoch": 1.96, "learning_rate": "3.7070e-05", "loss": 0.7271, "slid_loss": 0.7071, "step": 2527, "time": 70.58 }, { "epoch": 1.96, "learning_rate": "3.7061e-05", "loss": 0.7186, "slid_loss": 0.7071, "step": 2528, "time": 71.49 }, { "epoch": 1.96, "learning_rate": "3.7052e-05", "loss": 0.6919, "slid_loss": 0.7068, "step": 2529, "time": 71.84 }, { "epoch": 1.96, "learning_rate": "3.7043e-05", "loss": 0.6655, "slid_loss": 0.7059, "step": 2530, "time": 72.82 }, { "epoch": 1.96, "learning_rate": "3.7033e-05", "loss": 0.6698, "slid_loss": 0.7055, "step": 2531, "time": 71.35 }, { "epoch": 1.96, "learning_rate": "3.7024e-05", "loss": 0.6932, "slid_loss": 0.7059, "step": 2532, "time": 71.73 }, { "epoch": 1.96, "learning_rate": "3.7015e-05", "loss": 0.725, "slid_loss": 0.7062, "step": 2533, "time": 72.13 }, { "epoch": 1.96, "learning_rate": "3.7006e-05", "loss": 0.7112, "slid_loss": 0.7065, "step": 2534, "time": 85.95 }, { "epoch": 1.96, "learning_rate": "3.6997e-05", "loss": 0.7158, "slid_loss": 0.7064, "step": 2535, "time": 85.33 }, { "epoch": 1.96, "learning_rate": "3.6987e-05", "loss": 0.751, "slid_loss": 0.7073, "step": 2536, "time": 109.18 }, { "epoch": 1.96, "learning_rate": "3.6978e-05", "loss": 0.7601, "slid_loss": 0.7077, "step": 2537, "time": 99.17 }, { "epoch": 1.96, "learning_rate": "3.6969e-05", "loss": 0.6646, "slid_loss": 0.7074, "step": 2538, "time": 109.87 }, { "epoch": 1.96, "learning_rate": "3.6960e-05", "loss": 0.7318, "slid_loss": 0.7078, "step": 2539, "time": 108.79 }, { "epoch": 1.97, "learning_rate": "3.6951e-05", "loss": 0.6864, "slid_loss": 0.7077, "step": 2540, "time": 138.76 }, { "epoch": 1.97, "learning_rate": "3.6941e-05", "loss": 0.7211, "slid_loss": 0.7079, "step": 2541, "time": 165.69 }, { "epoch": 1.97, "learning_rate": "3.6932e-05", "loss": 0.6779, "slid_loss": 0.7078, "step": 2542, "time": 165.55 }, { "epoch": 1.97, "learning_rate": "3.6923e-05", "loss": 0.74, "slid_loss": 0.7079, "step": 2543, "time": 160.21 }, { "epoch": 1.97, "learning_rate": "3.6914e-05", "loss": 0.7385, "slid_loss": 0.708, "step": 2544, "time": 157.11 }, { "epoch": 1.97, "learning_rate": "3.6904e-05", "loss": 0.7514, "slid_loss": 0.7085, "step": 2545, "time": 137.61 }, { "epoch": 1.97, "learning_rate": "3.6895e-05", "loss": 0.6896, "slid_loss": 0.7083, "step": 2546, "time": 121.94 }, { "epoch": 1.97, "learning_rate": "3.6886e-05", "loss": 0.6975, "slid_loss": 0.7083, "step": 2547, "time": 133.51 }, { "epoch": 1.97, "learning_rate": "3.6877e-05", "loss": 0.7276, "slid_loss": 0.7082, "step": 2548, "time": 122.67 }, { "epoch": 1.97, "learning_rate": "3.6868e-05", "loss": 0.6805, "slid_loss": 0.7079, "step": 2549, "time": 109.05 }, { "epoch": 1.97, "learning_rate": "3.6858e-05", "loss": 0.7056, "slid_loss": 0.7076, "step": 2550, "time": 82.21 }, { "epoch": 1.97, "learning_rate": "3.6849e-05", "loss": 0.7582, "slid_loss": 0.7084, "step": 2551, "time": 107.74 }, { "epoch": 1.97, "learning_rate": "3.6840e-05", "loss": 0.6723, "slid_loss": 0.7078, "step": 2552, "time": 71.73 }, { "epoch": 1.98, "learning_rate": "3.6831e-05", "loss": 0.7447, "slid_loss": 0.7079, "step": 2553, "time": 71.28 }, { "epoch": 1.98, "learning_rate": "3.6821e-05", "loss": 0.7283, "slid_loss": 0.7081, "step": 2554, "time": 70.62 }, { "epoch": 1.98, "learning_rate": "3.6812e-05", "loss": 0.7298, "slid_loss": 0.7082, "step": 2555, "time": 71.1 }, { "epoch": 1.98, "learning_rate": "3.6803e-05", "loss": 0.7156, "slid_loss": 0.7078, "step": 2556, "time": 71.8 }, { "epoch": 1.98, "learning_rate": "3.6794e-05", "loss": 0.7303, "slid_loss": 0.708, "step": 2557, "time": 71.01 }, { "epoch": 1.98, "learning_rate": "3.6784e-05", "loss": 0.6941, "slid_loss": 0.708, "step": 2558, "time": 72.11 }, { "epoch": 1.98, "learning_rate": "3.6775e-05", "loss": 0.713, "slid_loss": 0.7078, "step": 2559, "time": 72.76 }, { "epoch": 1.98, "learning_rate": "3.6766e-05", "loss": 0.7315, "slid_loss": 0.708, "step": 2560, "time": 70.99 }, { "epoch": 1.98, "learning_rate": "3.6757e-05", "loss": 0.7377, "slid_loss": 0.7084, "step": 2561, "time": 71.42 }, { "epoch": 1.98, "learning_rate": "3.6747e-05", "loss": 0.6913, "slid_loss": 0.708, "step": 2562, "time": 70.7 }, { "epoch": 1.98, "learning_rate": "3.6738e-05", "loss": 0.7027, "slid_loss": 0.7079, "step": 2563, "time": 72.21 }, { "epoch": 1.98, "learning_rate": "3.6729e-05", "loss": 0.7379, "slid_loss": 0.7084, "step": 2564, "time": 71.45 }, { "epoch": 1.98, "learning_rate": "3.6720e-05", "loss": 0.7436, "slid_loss": 0.7091, "step": 2565, "time": 71.61 }, { "epoch": 1.99, "learning_rate": "3.6710e-05", "loss": 0.7286, "slid_loss": 0.7094, "step": 2566, "time": 73.21 }, { "epoch": 1.99, "learning_rate": "3.6701e-05", "loss": 0.7471, "slid_loss": 0.7094, "step": 2567, "time": 71.53 }, { "epoch": 1.99, "learning_rate": "3.6692e-05", "loss": 0.729, "slid_loss": 0.7096, "step": 2568, "time": 71.25 }, { "epoch": 1.99, "learning_rate": "3.6683e-05", "loss": 0.7333, "slid_loss": 0.7099, "step": 2569, "time": 71.61 }, { "epoch": 1.99, "learning_rate": "3.6673e-05", "loss": 0.6782, "slid_loss": 0.7096, "step": 2570, "time": 71.77 }, { "epoch": 1.99, "learning_rate": "3.6664e-05", "loss": 0.7531, "slid_loss": 0.7101, "step": 2571, "time": 70.71 }, { "epoch": 1.99, "learning_rate": "3.6655e-05", "loss": 0.7255, "slid_loss": 0.7105, "step": 2572, "time": 71.28 }, { "epoch": 1.99, "learning_rate": "3.6646e-05", "loss": 0.6743, "slid_loss": 0.7102, "step": 2573, "time": 71.37 }, { "epoch": 1.99, "learning_rate": "3.6636e-05", "loss": 0.7261, "slid_loss": 0.7102, "step": 2574, "time": 71.77 }, { "epoch": 1.99, "learning_rate": "3.6627e-05", "loss": 0.7048, "slid_loss": 0.7101, "step": 2575, "time": 71.4 }, { "epoch": 1.99, "learning_rate": "3.6618e-05", "loss": 0.7112, "slid_loss": 0.7103, "step": 2576, "time": 70.77 }, { "epoch": 1.99, "learning_rate": "3.6609e-05", "loss": 0.7036, "slid_loss": 0.7104, "step": 2577, "time": 72.37 }, { "epoch": 1.99, "learning_rate": "3.6599e-05", "loss": 0.6738, "slid_loss": 0.7101, "step": 2578, "time": 71.43 }, { "epoch": 2.0, "learning_rate": "3.6590e-05", "loss": 0.6949, "slid_loss": 0.7097, "step": 2579, "time": 73.17 }, { "epoch": 2.0, "learning_rate": "3.6581e-05", "loss": 0.6841, "slid_loss": 0.7094, "step": 2580, "time": 71.55 }, { "epoch": 2.0, "learning_rate": "3.6571e-05", "loss": 0.7111, "slid_loss": 0.7088, "step": 2581, "time": 71.52 }, { "epoch": 2.0, "learning_rate": "3.6562e-05", "loss": 0.7181, "slid_loss": 0.7092, "step": 2582, "time": 71.83 }, { "epoch": 2.0, "learning_rate": "3.6553e-05", "loss": 0.6811, "slid_loss": 0.7091, "step": 2583, "time": 72.63 }, { "epoch": 2.0, "learning_rate": "3.6544e-05", "loss": 0.6707, "slid_loss": 0.7087, "step": 2584, "time": 71.72 }, { "epoch": 2.0, "learning_rate": "3.6534e-05", "loss": 0.68, "slid_loss": 0.7087, "step": 2585, "time": 74.93 }, { "epoch": 2.0, "learning_rate": "3.6525e-05", "loss": 0.5569, "slid_loss": 0.7072, "step": 2586, "time": 96.9 }, { "epoch": 2.0, "learning_rate": "3.6516e-05", "loss": 0.7062, "slid_loss": 0.707, "step": 2587, "time": 72.09 }, { "epoch": 2.0, "learning_rate": "3.6506e-05", "loss": 0.7042, "slid_loss": 0.707, "step": 2588, "time": 71.83 }, { "epoch": 2.0, "learning_rate": "3.6497e-05", "loss": 0.7157, "slid_loss": 0.7072, "step": 2589, "time": 72.59 }, { "epoch": 2.0, "learning_rate": "3.6488e-05", "loss": 0.6836, "slid_loss": 0.7069, "step": 2590, "time": 72.18 }, { "epoch": 2.0, "learning_rate": "3.6479e-05", "loss": 0.6986, "slid_loss": 0.707, "step": 2591, "time": 73.78 }, { "epoch": 2.01, "learning_rate": "3.6469e-05", "loss": 0.6949, "slid_loss": 0.7068, "step": 2592, "time": 71.33 }, { "epoch": 2.01, "learning_rate": "3.6460e-05", "loss": 0.7066, "slid_loss": 0.7066, "step": 2593, "time": 72.17 }, { "epoch": 2.01, "learning_rate": "3.6451e-05", "loss": 0.6975, "slid_loss": 0.7068, "step": 2594, "time": 72.01 }, { "epoch": 2.01, "learning_rate": "3.6441e-05", "loss": 0.7323, "slid_loss": 0.7072, "step": 2595, "time": 72.11 }, { "epoch": 2.01, "learning_rate": "3.6432e-05", "loss": 0.7251, "slid_loss": 0.7076, "step": 2596, "time": 71.15 }, { "epoch": 2.01, "learning_rate": "3.6423e-05", "loss": 0.6913, "slid_loss": 0.7074, "step": 2597, "time": 72.33 }, { "epoch": 2.01, "learning_rate": "3.6413e-05", "loss": 0.6945, "slid_loss": 0.7072, "step": 2598, "time": 73.34 }, { "epoch": 2.01, "learning_rate": "3.6404e-05", "loss": 0.708, "slid_loss": 0.7072, "step": 2599, "time": 71.82 }, { "epoch": 2.01, "learning_rate": "3.6395e-05", "loss": 0.6869, "slid_loss": 0.7071, "step": 2600, "time": 71.74 }, { "epoch": 2.01, "learning_rate": "3.6386e-05", "loss": 0.7014, "slid_loss": 0.7071, "step": 2601, "time": 787.94 }, { "epoch": 2.01, "learning_rate": "3.6376e-05", "loss": 0.6962, "slid_loss": 0.7069, "step": 2602, "time": 72.4 }, { "epoch": 2.01, "learning_rate": "3.6367e-05", "loss": 0.7, "slid_loss": 0.7068, "step": 2603, "time": 72.35 }, { "epoch": 2.01, "learning_rate": "3.6358e-05", "loss": 0.7283, "slid_loss": 0.7066, "step": 2604, "time": 71.56 }, { "epoch": 2.02, "learning_rate": "3.6348e-05", "loss": 0.7162, "slid_loss": 0.7067, "step": 2605, "time": 71.7 }, { "epoch": 2.02, "learning_rate": "3.6339e-05", "loss": 0.7175, "slid_loss": 0.7071, "step": 2606, "time": 71.35 }, { "epoch": 2.02, "learning_rate": "3.6330e-05", "loss": 0.6782, "slid_loss": 0.7067, "step": 2607, "time": 71.76 }, { "epoch": 2.02, "learning_rate": "3.6320e-05", "loss": 0.7189, "slid_loss": 0.7066, "step": 2608, "time": 73.17 }, { "epoch": 2.02, "learning_rate": "3.6311e-05", "loss": 0.6818, "slid_loss": 0.7063, "step": 2609, "time": 72.21 }, { "epoch": 2.02, "learning_rate": "3.6302e-05", "loss": 0.712, "slid_loss": 0.7063, "step": 2610, "time": 71.72 }, { "epoch": 2.02, "learning_rate": "3.6292e-05", "loss": 0.7256, "slid_loss": 0.7064, "step": 2611, "time": 71.11 }, { "epoch": 2.02, "learning_rate": "3.6283e-05", "loss": 0.7472, "slid_loss": 0.707, "step": 2612, "time": 71.11 }, { "epoch": 2.02, "learning_rate": "3.6274e-05", "loss": 0.7088, "slid_loss": 0.7072, "step": 2613, "time": 71.45 }, { "epoch": 2.02, "learning_rate": "3.6264e-05", "loss": 0.7366, "slid_loss": 0.7076, "step": 2614, "time": 73.4 }, { "epoch": 2.02, "learning_rate": "3.6255e-05", "loss": 0.6947, "slid_loss": 0.7077, "step": 2615, "time": 72.11 }, { "epoch": 2.02, "learning_rate": "3.6246e-05", "loss": 0.7264, "slid_loss": 0.7077, "step": 2616, "time": 72.53 }, { "epoch": 2.02, "learning_rate": "3.6236e-05", "loss": 0.6922, "slid_loss": 0.7076, "step": 2617, "time": 74.28 }, { "epoch": 2.03, "learning_rate": "3.6227e-05", "loss": 0.6939, "slid_loss": 0.7076, "step": 2618, "time": 72.01 }, { "epoch": 2.03, "learning_rate": "3.6218e-05", "loss": 0.6982, "slid_loss": 0.7078, "step": 2619, "time": 71.61 }, { "epoch": 2.03, "learning_rate": "3.6208e-05", "loss": 0.701, "slid_loss": 0.7075, "step": 2620, "time": 72.64 }, { "epoch": 2.03, "learning_rate": "3.6199e-05", "loss": 0.7077, "slid_loss": 0.7076, "step": 2621, "time": 72.04 }, { "epoch": 2.03, "learning_rate": "3.6190e-05", "loss": 0.6767, "slid_loss": 0.7075, "step": 2622, "time": 72.95 }, { "epoch": 2.03, "learning_rate": "3.6180e-05", "loss": 0.6902, "slid_loss": 0.7075, "step": 2623, "time": 71.84 }, { "epoch": 2.03, "learning_rate": "3.6171e-05", "loss": 0.6751, "slid_loss": 0.7075, "step": 2624, "time": 71.06 }, { "epoch": 2.03, "learning_rate": "3.6162e-05", "loss": 0.704, "slid_loss": 0.7074, "step": 2625, "time": 71.75 }, { "epoch": 2.03, "learning_rate": "3.6152e-05", "loss": 0.6913, "slid_loss": 0.7072, "step": 2626, "time": 71.46 }, { "epoch": 2.03, "learning_rate": "3.6143e-05", "loss": 0.723, "slid_loss": 0.7072, "step": 2627, "time": 70.78 }, { "epoch": 2.03, "learning_rate": "3.6134e-05", "loss": 0.7225, "slid_loss": 0.7072, "step": 2628, "time": 72.45 }, { "epoch": 2.03, "learning_rate": "3.6124e-05", "loss": 0.7429, "slid_loss": 0.7077, "step": 2629, "time": 73.09 }, { "epoch": 2.03, "learning_rate": "3.6115e-05", "loss": 0.6854, "slid_loss": 0.7079, "step": 2630, "time": 71.68 }, { "epoch": 2.04, "learning_rate": "3.6106e-05", "loss": 0.6929, "slid_loss": 0.7082, "step": 2631, "time": 71.77 }, { "epoch": 2.04, "learning_rate": "3.6096e-05", "loss": 0.7015, "slid_loss": 0.7083, "step": 2632, "time": 70.86 }, { "epoch": 2.04, "learning_rate": "3.6087e-05", "loss": 0.6922, "slid_loss": 0.7079, "step": 2633, "time": 71.81 }, { "epoch": 2.04, "learning_rate": "3.6077e-05", "loss": 0.7101, "slid_loss": 0.7079, "step": 2634, "time": 71.52 }, { "epoch": 2.04, "learning_rate": "3.6068e-05", "loss": 0.7169, "slid_loss": 0.7079, "step": 2635, "time": 72.66 }, { "epoch": 2.04, "learning_rate": "3.6059e-05", "loss": 0.736, "slid_loss": 0.7078, "step": 2636, "time": 70.77 }, { "epoch": 2.04, "learning_rate": "3.6049e-05", "loss": 0.7213, "slid_loss": 0.7074, "step": 2637, "time": 72.21 }, { "epoch": 2.04, "learning_rate": "3.6040e-05", "loss": 0.7036, "slid_loss": 0.7078, "step": 2638, "time": 71.35 }, { "epoch": 2.04, "learning_rate": "3.6031e-05", "loss": 0.703, "slid_loss": 0.7075, "step": 2639, "time": 72.43 }, { "epoch": 2.04, "learning_rate": "3.6021e-05", "loss": 0.6759, "slid_loss": 0.7074, "step": 2640, "time": 70.95 }, { "epoch": 2.04, "learning_rate": "3.6012e-05", "loss": 0.7106, "slid_loss": 0.7073, "step": 2641, "time": 72.02 }, { "epoch": 2.04, "learning_rate": "3.6003e-05", "loss": 0.7068, "slid_loss": 0.7076, "step": 2642, "time": 70.77 }, { "epoch": 2.04, "learning_rate": "3.5993e-05", "loss": 0.6977, "slid_loss": 0.7071, "step": 2643, "time": 71.04 }, { "epoch": 2.05, "learning_rate": "3.5984e-05", "loss": 0.7289, "slid_loss": 0.7071, "step": 2644, "time": 72.49 }, { "epoch": 2.05, "learning_rate": "3.5974e-05", "loss": 0.7156, "slid_loss": 0.7067, "step": 2645, "time": 71.31 }, { "epoch": 2.05, "learning_rate": "3.5965e-05", "loss": 0.6625, "slid_loss": 0.7064, "step": 2646, "time": 72.26 }, { "epoch": 2.05, "learning_rate": "3.5956e-05", "loss": 0.6878, "slid_loss": 0.7063, "step": 2647, "time": 70.97 }, { "epoch": 2.05, "learning_rate": "3.5946e-05", "loss": 0.6952, "slid_loss": 0.706, "step": 2648, "time": 71.97 }, { "epoch": 2.05, "learning_rate": "3.5937e-05", "loss": 0.676, "slid_loss": 0.706, "step": 2649, "time": 73.11 }, { "epoch": 2.05, "learning_rate": "3.5928e-05", "loss": 0.686, "slid_loss": 0.7058, "step": 2650, "time": 71.9 }, { "epoch": 2.05, "learning_rate": "3.5918e-05", "loss": 0.7201, "slid_loss": 0.7054, "step": 2651, "time": 70.62 }, { "epoch": 2.05, "learning_rate": "3.5909e-05", "loss": 0.7241, "slid_loss": 0.7059, "step": 2652, "time": 71.5 }, { "epoch": 2.05, "learning_rate": "3.5899e-05", "loss": 0.7012, "slid_loss": 0.7055, "step": 2653, "time": 73.51 }, { "epoch": 2.05, "learning_rate": "3.5890e-05", "loss": 0.6899, "slid_loss": 0.7051, "step": 2654, "time": 71.66 }, { "epoch": 2.05, "learning_rate": "3.5881e-05", "loss": 0.6993, "slid_loss": 0.7048, "step": 2655, "time": 73.05 }, { "epoch": 2.05, "learning_rate": "3.5871e-05", "loss": 0.7018, "slid_loss": 0.7046, "step": 2656, "time": 71.32 }, { "epoch": 2.06, "learning_rate": "3.5862e-05", "loss": 0.67, "slid_loss": 0.704, "step": 2657, "time": 72.71 }, { "epoch": 2.06, "learning_rate": "3.5852e-05", "loss": 0.7071, "slid_loss": 0.7042, "step": 2658, "time": 71.42 }, { "epoch": 2.06, "learning_rate": "3.5843e-05", "loss": 0.7093, "slid_loss": 0.7041, "step": 2659, "time": 71.38 }, { "epoch": 2.06, "learning_rate": "3.5834e-05", "loss": 0.6536, "slid_loss": 0.7033, "step": 2660, "time": 71.1 }, { "epoch": 2.06, "learning_rate": "3.5824e-05", "loss": 0.7324, "slid_loss": 0.7033, "step": 2661, "time": 70.96 }, { "epoch": 2.06, "learning_rate": "3.5815e-05", "loss": 0.7028, "slid_loss": 0.7034, "step": 2662, "time": 72.22 }, { "epoch": 2.06, "learning_rate": "3.5805e-05", "loss": 0.7183, "slid_loss": 0.7036, "step": 2663, "time": 70.98 }, { "epoch": 2.06, "learning_rate": "3.5796e-05", "loss": 0.703, "slid_loss": 0.7032, "step": 2664, "time": 70.67 }, { "epoch": 2.06, "learning_rate": "3.5787e-05", "loss": 0.7165, "slid_loss": 0.7029, "step": 2665, "time": 71.81 }, { "epoch": 2.06, "learning_rate": "3.5777e-05", "loss": 0.7073, "slid_loss": 0.7027, "step": 2666, "time": 73.77 }, { "epoch": 2.06, "learning_rate": "3.5768e-05", "loss": 0.6926, "slid_loss": 0.7022, "step": 2667, "time": 72.91 }, { "epoch": 2.06, "learning_rate": "3.5758e-05", "loss": 0.6947, "slid_loss": 0.7018, "step": 2668, "time": 73.56 }, { "epoch": 2.06, "learning_rate": "3.5749e-05", "loss": 0.7051, "slid_loss": 0.7016, "step": 2669, "time": 71.19 }, { "epoch": 2.07, "learning_rate": "3.5740e-05", "loss": 0.7082, "slid_loss": 0.7019, "step": 2670, "time": 71.1 }, { "epoch": 2.07, "learning_rate": "3.5730e-05", "loss": 0.7114, "slid_loss": 0.7014, "step": 2671, "time": 72.35 }, { "epoch": 2.07, "learning_rate": "3.5721e-05", "loss": 0.6944, "slid_loss": 0.7011, "step": 2672, "time": 71.07 }, { "epoch": 2.07, "learning_rate": "3.5711e-05", "loss": 0.6958, "slid_loss": 0.7013, "step": 2673, "time": 71.64 }, { "epoch": 2.07, "learning_rate": "3.5702e-05", "loss": 0.666, "slid_loss": 0.7007, "step": 2674, "time": 72.03 }, { "epoch": 2.07, "learning_rate": "3.5692e-05", "loss": 0.72, "slid_loss": 0.7009, "step": 2675, "time": 70.89 }, { "epoch": 2.07, "learning_rate": "3.5683e-05", "loss": 0.7441, "slid_loss": 0.7012, "step": 2676, "time": 72.56 }, { "epoch": 2.07, "learning_rate": "3.5674e-05", "loss": 0.7018, "slid_loss": 0.7012, "step": 2677, "time": 71.85 }, { "epoch": 2.07, "learning_rate": "3.5664e-05", "loss": 0.686, "slid_loss": 0.7013, "step": 2678, "time": 70.97 }, { "epoch": 2.07, "learning_rate": "3.5655e-05", "loss": 0.7048, "slid_loss": 0.7014, "step": 2679, "time": 72.04 }, { "epoch": 2.07, "learning_rate": "3.5645e-05", "loss": 0.6931, "slid_loss": 0.7015, "step": 2680, "time": 72.1 }, { "epoch": 2.07, "learning_rate": "3.5636e-05", "loss": 0.6973, "slid_loss": 0.7014, "step": 2681, "time": 73.14 }, { "epoch": 2.08, "learning_rate": "3.5627e-05", "loss": 0.6598, "slid_loss": 0.7008, "step": 2682, "time": 70.87 }, { "epoch": 2.08, "learning_rate": "3.5617e-05", "loss": 0.7733, "slid_loss": 0.7017, "step": 2683, "time": 71.25 }, { "epoch": 2.08, "learning_rate": "3.5608e-05", "loss": 0.6829, "slid_loss": 0.7018, "step": 2684, "time": 72.57 }, { "epoch": 2.08, "learning_rate": "3.5598e-05", "loss": 0.6963, "slid_loss": 0.702, "step": 2685, "time": 71.96 }, { "epoch": 2.08, "learning_rate": "3.5589e-05", "loss": 0.6577, "slid_loss": 0.703, "step": 2686, "time": 72.01 }, { "epoch": 2.08, "learning_rate": "3.5579e-05", "loss": 0.6278, "slid_loss": 0.7022, "step": 2687, "time": 72.83 }, { "epoch": 2.08, "learning_rate": "3.5570e-05", "loss": 0.7195, "slid_loss": 0.7024, "step": 2688, "time": 71.84 }, { "epoch": 2.08, "learning_rate": "3.5560e-05", "loss": 0.7391, "slid_loss": 0.7026, "step": 2689, "time": 70.84 }, { "epoch": 2.08, "learning_rate": "3.5551e-05", "loss": 0.7175, "slid_loss": 0.703, "step": 2690, "time": 72.93 }, { "epoch": 2.08, "learning_rate": "3.5542e-05", "loss": 0.6718, "slid_loss": 0.7027, "step": 2691, "time": 72.13 }, { "epoch": 2.08, "learning_rate": "3.5532e-05", "loss": 0.7237, "slid_loss": 0.703, "step": 2692, "time": 92.35 }, { "epoch": 2.08, "learning_rate": "3.5523e-05", "loss": 0.666, "slid_loss": 0.7026, "step": 2693, "time": 73.48 }, { "epoch": 2.08, "learning_rate": "3.5513e-05", "loss": 0.6767, "slid_loss": 0.7024, "step": 2694, "time": 118.77 }, { "epoch": 2.09, "learning_rate": "3.5504e-05", "loss": 0.6926, "slid_loss": 0.702, "step": 2695, "time": 88.91 }, { "epoch": 2.09, "learning_rate": "3.5494e-05", "loss": 0.6688, "slid_loss": 0.7014, "step": 2696, "time": 84.96 }, { "epoch": 2.09, "learning_rate": "3.5485e-05", "loss": 0.7055, "slid_loss": 0.7015, "step": 2697, "time": 123.96 }, { "epoch": 2.09, "learning_rate": "3.5475e-05", "loss": 0.6852, "slid_loss": 0.7014, "step": 2698, "time": 100.89 }, { "epoch": 2.09, "learning_rate": "3.5466e-05", "loss": 0.6782, "slid_loss": 0.7012, "step": 2699, "time": 150.66 }, { "epoch": 2.09, "learning_rate": "3.5457e-05", "loss": 0.7059, "slid_loss": 0.7013, "step": 2700, "time": 192.98 }, { "epoch": 2.09, "learning_rate": "3.5447e-05", "loss": 0.6987, "slid_loss": 0.7013, "step": 2701, "time": 177.27 }, { "epoch": 2.09, "learning_rate": "3.5438e-05", "loss": 0.6931, "slid_loss": 0.7013, "step": 2702, "time": 153.54 }, { "epoch": 2.09, "learning_rate": "3.5428e-05", "loss": 0.7083, "slid_loss": 0.7014, "step": 2703, "time": 143.44 }, { "epoch": 2.09, "learning_rate": "3.5419e-05", "loss": 0.7128, "slid_loss": 0.7012, "step": 2704, "time": 143.09 }, { "epoch": 2.09, "learning_rate": "3.5409e-05", "loss": 0.7165, "slid_loss": 0.7012, "step": 2705, "time": 133.89 }, { "epoch": 2.09, "learning_rate": "3.5400e-05", "loss": 0.6835, "slid_loss": 0.7009, "step": 2706, "time": 127.08 }, { "epoch": 2.09, "learning_rate": "3.5390e-05", "loss": 0.6787, "slid_loss": 0.7009, "step": 2707, "time": 160.41 }, { "epoch": 2.1, "learning_rate": "3.5381e-05", "loss": 0.6937, "slid_loss": 0.7006, "step": 2708, "time": 127.87 }, { "epoch": 2.1, "learning_rate": "3.5371e-05", "loss": 0.7099, "slid_loss": 0.7009, "step": 2709, "time": 98.99 }, { "epoch": 2.1, "learning_rate": "3.5362e-05", "loss": 0.7169, "slid_loss": 0.701, "step": 2710, "time": 84.84 }, { "epoch": 2.1, "learning_rate": "3.5353e-05", "loss": 0.724, "slid_loss": 0.7009, "step": 2711, "time": 83.22 }, { "epoch": 2.1, "learning_rate": "3.5343e-05", "loss": 0.6918, "slid_loss": 0.7004, "step": 2712, "time": 70.37 }, { "epoch": 2.1, "learning_rate": "3.5334e-05", "loss": 0.7184, "slid_loss": 0.7005, "step": 2713, "time": 70.92 }, { "epoch": 2.1, "learning_rate": "3.5324e-05", "loss": 0.721, "slid_loss": 0.7003, "step": 2714, "time": 72.04 }, { "epoch": 2.1, "learning_rate": "3.5315e-05", "loss": 0.7105, "slid_loss": 0.7005, "step": 2715, "time": 70.77 }, { "epoch": 2.1, "learning_rate": "3.5305e-05", "loss": 0.679, "slid_loss": 0.7, "step": 2716, "time": 72.71 }, { "epoch": 2.1, "learning_rate": "3.5296e-05", "loss": 0.711, "slid_loss": 0.7002, "step": 2717, "time": 71.91 }, { "epoch": 2.1, "learning_rate": "3.5286e-05", "loss": 0.6931, "slid_loss": 0.7002, "step": 2718, "time": 70.85 }, { "epoch": 2.1, "learning_rate": "3.5277e-05", "loss": 0.6756, "slid_loss": 0.7, "step": 2719, "time": 71.82 }, { "epoch": 2.1, "learning_rate": "3.5267e-05", "loss": 0.6988, "slid_loss": 0.6999, "step": 2720, "time": 71.73 }, { "epoch": 2.11, "learning_rate": "3.5258e-05", "loss": 0.7014, "slid_loss": 0.6999, "step": 2721, "time": 70.73 }, { "epoch": 2.11, "learning_rate": "3.5248e-05", "loss": 0.6918, "slid_loss": 0.7, "step": 2722, "time": 72.15 }, { "epoch": 2.11, "learning_rate": "3.5239e-05", "loss": 0.6876, "slid_loss": 0.7, "step": 2723, "time": 71.72 }, { "epoch": 2.11, "learning_rate": "3.5229e-05", "loss": 0.6736, "slid_loss": 0.7, "step": 2724, "time": 72.34 }, { "epoch": 2.11, "learning_rate": "3.5220e-05", "loss": 0.7099, "slid_loss": 0.7001, "step": 2725, "time": 72.61 }, { "epoch": 2.11, "learning_rate": "3.5210e-05", "loss": 0.6919, "slid_loss": 0.7001, "step": 2726, "time": 71.06 }, { "epoch": 2.11, "learning_rate": "3.5201e-05", "loss": 0.7148, "slid_loss": 0.7, "step": 2727, "time": 72.88 }, { "epoch": 2.11, "learning_rate": "3.5191e-05", "loss": 0.6975, "slid_loss": 0.6997, "step": 2728, "time": 71.57 }, { "epoch": 2.11, "learning_rate": "3.5182e-05", "loss": 0.67, "slid_loss": 0.699, "step": 2729, "time": 71.03 }, { "epoch": 2.11, "learning_rate": "3.5172e-05", "loss": 0.7452, "slid_loss": 0.6996, "step": 2730, "time": 70.96 }, { "epoch": 2.11, "learning_rate": "3.5163e-05", "loss": 0.6978, "slid_loss": 0.6996, "step": 2731, "time": 70.89 }, { "epoch": 2.11, "learning_rate": "3.5153e-05", "loss": 0.7352, "slid_loss": 0.7, "step": 2732, "time": 71.9 }, { "epoch": 2.11, "learning_rate": "3.5144e-05", "loss": 0.7027, "slid_loss": 0.7001, "step": 2733, "time": 71.66 }, { "epoch": 2.12, "learning_rate": "3.5134e-05", "loss": 0.7015, "slid_loss": 0.7, "step": 2734, "time": 71.66 }, { "epoch": 2.12, "learning_rate": "3.5125e-05", "loss": 0.7164, "slid_loss": 0.7, "step": 2735, "time": 71.78 }, { "epoch": 2.12, "learning_rate": "3.5115e-05", "loss": 0.7087, "slid_loss": 0.6997, "step": 2736, "time": 71.3 }, { "epoch": 2.12, "learning_rate": "3.5106e-05", "loss": 0.6765, "slid_loss": 0.6993, "step": 2737, "time": 71.24 }, { "epoch": 2.12, "learning_rate": "3.5096e-05", "loss": 0.7166, "slid_loss": 0.6994, "step": 2738, "time": 71.62 }, { "epoch": 2.12, "learning_rate": "3.5087e-05", "loss": 0.6907, "slid_loss": 0.6993, "step": 2739, "time": 70.22 }, { "epoch": 2.12, "learning_rate": "3.5077e-05", "loss": 0.7024, "slid_loss": 0.6995, "step": 2740, "time": 71.94 }, { "epoch": 2.12, "learning_rate": "3.5068e-05", "loss": 0.6783, "slid_loss": 0.6992, "step": 2741, "time": 71.97 }, { "epoch": 2.12, "learning_rate": "3.5058e-05", "loss": 0.7384, "slid_loss": 0.6995, "step": 2742, "time": 71.24 }, { "epoch": 2.12, "learning_rate": "3.5049e-05", "loss": 0.726, "slid_loss": 0.6998, "step": 2743, "time": 71.75 }, { "epoch": 2.12, "learning_rate": "3.5039e-05", "loss": 0.6579, "slid_loss": 0.6991, "step": 2744, "time": 72.09 }, { "epoch": 2.12, "learning_rate": "3.5030e-05", "loss": 0.6906, "slid_loss": 0.6989, "step": 2745, "time": 73.29 }, { "epoch": 2.12, "learning_rate": "3.5020e-05", "loss": 0.7132, "slid_loss": 0.6994, "step": 2746, "time": 71.95 }, { "epoch": 2.13, "learning_rate": "3.5011e-05", "loss": 0.7287, "slid_loss": 0.6998, "step": 2747, "time": 72.43 }, { "epoch": 2.13, "learning_rate": "3.5001e-05", "loss": 0.645, "slid_loss": 0.6993, "step": 2748, "time": 72.8 }, { "epoch": 2.13, "learning_rate": "3.4992e-05", "loss": 0.7232, "slid_loss": 0.6998, "step": 2749, "time": 71.28 }, { "epoch": 2.13, "learning_rate": "3.4982e-05", "loss": 0.7078, "slid_loss": 0.7, "step": 2750, "time": 71.79 }, { "epoch": 2.13, "learning_rate": "3.4973e-05", "loss": 0.7082, "slid_loss": 0.6998, "step": 2751, "time": 71.31 }, { "epoch": 2.13, "learning_rate": "3.4963e-05", "loss": 0.6854, "slid_loss": 0.6995, "step": 2752, "time": 71.84 }, { "epoch": 2.13, "learning_rate": "3.4954e-05", "loss": 0.7019, "slid_loss": 0.6995, "step": 2753, "time": 73.29 }, { "epoch": 2.13, "learning_rate": "3.4944e-05", "loss": 0.6873, "slid_loss": 0.6994, "step": 2754, "time": 71.69 }, { "epoch": 2.13, "learning_rate": "3.4935e-05", "loss": 0.7281, "slid_loss": 0.6997, "step": 2755, "time": 72.11 }, { "epoch": 2.13, "learning_rate": "3.4925e-05", "loss": 0.7002, "slid_loss": 0.6997, "step": 2756, "time": 72.1 }, { "epoch": 2.13, "learning_rate": "3.4916e-05", "loss": 0.656, "slid_loss": 0.6996, "step": 2757, "time": 71.46 }, { "epoch": 2.13, "learning_rate": "3.4906e-05", "loss": 0.697, "slid_loss": 0.6995, "step": 2758, "time": 73.56 }, { "epoch": 2.13, "learning_rate": "3.4897e-05", "loss": 0.6926, "slid_loss": 0.6993, "step": 2759, "time": 72.65 }, { "epoch": 2.14, "learning_rate": "3.4887e-05", "loss": 0.7106, "slid_loss": 0.6999, "step": 2760, "time": 71.31 }, { "epoch": 2.14, "learning_rate": "3.4878e-05", "loss": 0.7125, "slid_loss": 0.6997, "step": 2761, "time": 71.6 }, { "epoch": 2.14, "learning_rate": "3.4868e-05", "loss": 0.7113, "slid_loss": 0.6998, "step": 2762, "time": 70.93 }, { "epoch": 2.14, "learning_rate": "3.4859e-05", "loss": 0.6719, "slid_loss": 0.6993, "step": 2763, "time": 70.59 }, { "epoch": 2.14, "learning_rate": "3.4849e-05", "loss": 0.68, "slid_loss": 0.6991, "step": 2764, "time": 71.83 }, { "epoch": 2.14, "learning_rate": "3.4839e-05", "loss": 0.7028, "slid_loss": 0.6989, "step": 2765, "time": 72.75 }, { "epoch": 2.14, "learning_rate": "3.4830e-05", "loss": 0.7091, "slid_loss": 0.699, "step": 2766, "time": 71.22 }, { "epoch": 2.14, "learning_rate": "3.4820e-05", "loss": 0.7046, "slid_loss": 0.6991, "step": 2767, "time": 70.63 }, { "epoch": 2.14, "learning_rate": "3.4811e-05", "loss": 0.7197, "slid_loss": 0.6993, "step": 2768, "time": 72.97 }, { "epoch": 2.14, "learning_rate": "3.4801e-05", "loss": 0.6929, "slid_loss": 0.6992, "step": 2769, "time": 71.35 }, { "epoch": 2.14, "learning_rate": "3.4792e-05", "loss": 0.706, "slid_loss": 0.6992, "step": 2770, "time": 72.01 }, { "epoch": 2.14, "learning_rate": "3.4782e-05", "loss": 0.7488, "slid_loss": 0.6996, "step": 2771, "time": 72.34 }, { "epoch": 2.14, "learning_rate": "3.4773e-05", "loss": 0.6689, "slid_loss": 0.6993, "step": 2772, "time": 71.99 }, { "epoch": 2.15, "learning_rate": "3.4763e-05", "loss": 0.7058, "slid_loss": 0.6994, "step": 2773, "time": 70.7 }, { "epoch": 2.15, "learning_rate": "3.4754e-05", "loss": 0.6937, "slid_loss": 0.6997, "step": 2774, "time": 72.17 }, { "epoch": 2.15, "learning_rate": "3.4744e-05", "loss": 0.6982, "slid_loss": 0.6995, "step": 2775, "time": 72.56 }, { "epoch": 2.15, "learning_rate": "3.4735e-05", "loss": 0.7033, "slid_loss": 0.6991, "step": 2776, "time": 71.17 }, { "epoch": 2.15, "learning_rate": "3.4725e-05", "loss": 0.6854, "slid_loss": 0.6989, "step": 2777, "time": 71.48 }, { "epoch": 2.15, "learning_rate": "3.4715e-05", "loss": 0.7082, "slid_loss": 0.6991, "step": 2778, "time": 71.77 }, { "epoch": 2.15, "learning_rate": "3.4706e-05", "loss": 0.698, "slid_loss": 0.699, "step": 2779, "time": 71.06 }, { "epoch": 2.15, "learning_rate": "3.4696e-05", "loss": 0.7095, "slid_loss": 0.6992, "step": 2780, "time": 70.4 }, { "epoch": 2.15, "learning_rate": "3.4687e-05", "loss": 0.6982, "slid_loss": 0.6992, "step": 2781, "time": 70.98 }, { "epoch": 2.15, "learning_rate": "3.4677e-05", "loss": 0.7135, "slid_loss": 0.6997, "step": 2782, "time": 72.0 }, { "epoch": 2.15, "learning_rate": "3.4668e-05", "loss": 0.7144, "slid_loss": 0.6992, "step": 2783, "time": 70.9 }, { "epoch": 2.15, "learning_rate": "3.4658e-05", "loss": 0.6964, "slid_loss": 0.6993, "step": 2784, "time": 71.13 }, { "epoch": 2.15, "learning_rate": "3.4649e-05", "loss": 0.7114, "slid_loss": 0.6994, "step": 2785, "time": 71.6 }, { "epoch": 2.16, "learning_rate": "3.4639e-05", "loss": 0.6634, "slid_loss": 0.6995, "step": 2786, "time": 72.03 }, { "epoch": 2.16, "learning_rate": "3.4629e-05", "loss": 0.6755, "slid_loss": 0.7, "step": 2787, "time": 71.99 }, { "epoch": 2.16, "learning_rate": "3.4620e-05", "loss": 0.7169, "slid_loss": 0.7, "step": 2788, "time": 73.39 }, { "epoch": 2.16, "learning_rate": "3.4610e-05", "loss": 0.7004, "slid_loss": 0.6996, "step": 2789, "time": 71.99 }, { "epoch": 2.16, "learning_rate": "3.4601e-05", "loss": 0.6718, "slid_loss": 0.6991, "step": 2790, "time": 71.6 }, { "epoch": 2.16, "learning_rate": "3.4591e-05", "loss": 0.6807, "slid_loss": 0.6992, "step": 2791, "time": 74.41 }, { "epoch": 2.16, "learning_rate": "3.4582e-05", "loss": 0.7313, "slid_loss": 0.6993, "step": 2792, "time": 71.83 }, { "epoch": 2.16, "learning_rate": "3.4572e-05", "loss": 0.7269, "slid_loss": 0.6999, "step": 2793, "time": 71.04 }, { "epoch": 2.16, "learning_rate": "3.4563e-05", "loss": 0.7102, "slid_loss": 0.7002, "step": 2794, "time": 73.12 }, { "epoch": 2.16, "learning_rate": "3.4553e-05", "loss": 0.6792, "slid_loss": 0.7001, "step": 2795, "time": 71.62 }, { "epoch": 2.16, "learning_rate": "3.4543e-05", "loss": 0.6947, "slid_loss": 0.7003, "step": 2796, "time": 70.97 }, { "epoch": 2.16, "learning_rate": "3.4534e-05", "loss": 0.6946, "slid_loss": 0.7002, "step": 2797, "time": 72.96 }, { "epoch": 2.16, "learning_rate": "3.4524e-05", "loss": 0.7025, "slid_loss": 0.7004, "step": 2798, "time": 71.68 }, { "epoch": 2.17, "learning_rate": "3.4515e-05", "loss": 0.7149, "slid_loss": 0.7008, "step": 2799, "time": 72.08 }, { "epoch": 2.17, "learning_rate": "3.4505e-05", "loss": 0.723, "slid_loss": 0.7009, "step": 2800, "time": 70.78 }, { "epoch": 2.17, "learning_rate": "3.4496e-05", "loss": 0.7369, "slid_loss": 0.7013, "step": 2801, "time": 751.23 }, { "epoch": 2.17, "learning_rate": "3.4486e-05", "loss": 0.6872, "slid_loss": 0.7013, "step": 2802, "time": 70.77 }, { "epoch": 2.17, "learning_rate": "3.4476e-05", "loss": 0.6883, "slid_loss": 0.7011, "step": 2803, "time": 71.37 }, { "epoch": 2.17, "learning_rate": "3.4467e-05", "loss": 0.6739, "slid_loss": 0.7007, "step": 2804, "time": 71.96 }, { "epoch": 2.17, "learning_rate": "3.4457e-05", "loss": 0.6921, "slid_loss": 0.7004, "step": 2805, "time": 72.86 }, { "epoch": 2.17, "learning_rate": "3.4448e-05", "loss": 0.7298, "slid_loss": 0.7009, "step": 2806, "time": 71.81 }, { "epoch": 2.17, "learning_rate": "3.4438e-05", "loss": 0.67, "slid_loss": 0.7008, "step": 2807, "time": 71.63 }, { "epoch": 2.17, "learning_rate": "3.4429e-05", "loss": 0.6885, "slid_loss": 0.7008, "step": 2808, "time": 70.73 }, { "epoch": 2.17, "learning_rate": "3.4419e-05", "loss": 0.6673, "slid_loss": 0.7003, "step": 2809, "time": 72.05 }, { "epoch": 2.17, "learning_rate": "3.4409e-05", "loss": 0.7444, "slid_loss": 0.7006, "step": 2810, "time": 71.25 }, { "epoch": 2.17, "learning_rate": "3.4400e-05", "loss": 0.6988, "slid_loss": 0.7004, "step": 2811, "time": 71.52 }, { "epoch": 2.18, "learning_rate": "3.4390e-05", "loss": 0.6983, "slid_loss": 0.7004, "step": 2812, "time": 72.23 }, { "epoch": 2.18, "learning_rate": "3.4381e-05", "loss": 0.65, "slid_loss": 0.6997, "step": 2813, "time": 70.2 }, { "epoch": 2.18, "learning_rate": "3.4371e-05", "loss": 0.694, "slid_loss": 0.6995, "step": 2814, "time": 72.15 }, { "epoch": 2.18, "learning_rate": "3.4361e-05", "loss": 0.7134, "slid_loss": 0.6995, "step": 2815, "time": 72.07 }, { "epoch": 2.18, "learning_rate": "3.4352e-05", "loss": 0.7195, "slid_loss": 0.6999, "step": 2816, "time": 71.82 }, { "epoch": 2.18, "learning_rate": "3.4342e-05", "loss": 0.6726, "slid_loss": 0.6995, "step": 2817, "time": 71.2 }, { "epoch": 2.18, "learning_rate": "3.4333e-05", "loss": 0.7061, "slid_loss": 0.6996, "step": 2818, "time": 71.41 }, { "epoch": 2.18, "learning_rate": "3.4323e-05", "loss": 0.7597, "slid_loss": 0.7005, "step": 2819, "time": 72.8 }, { "epoch": 2.18, "learning_rate": "3.4313e-05", "loss": 0.703, "slid_loss": 0.7005, "step": 2820, "time": 70.87 }, { "epoch": 2.18, "learning_rate": "3.4304e-05", "loss": 0.7087, "slid_loss": 0.7006, "step": 2821, "time": 70.87 }, { "epoch": 2.18, "learning_rate": "3.4294e-05", "loss": 0.7071, "slid_loss": 0.7008, "step": 2822, "time": 70.97 }, { "epoch": 2.18, "learning_rate": "3.4285e-05", "loss": 0.7478, "slid_loss": 0.7014, "step": 2823, "time": 72.63 }, { "epoch": 2.18, "learning_rate": "3.4275e-05", "loss": 0.6689, "slid_loss": 0.7013, "step": 2824, "time": 72.14 }, { "epoch": 2.19, "learning_rate": "3.4265e-05", "loss": 0.7314, "slid_loss": 0.7015, "step": 2825, "time": 72.28 }, { "epoch": 2.19, "learning_rate": "3.4256e-05", "loss": 0.6685, "slid_loss": 0.7013, "step": 2826, "time": 72.28 }, { "epoch": 2.19, "learning_rate": "3.4246e-05", "loss": 0.7041, "slid_loss": 0.7012, "step": 2827, "time": 70.93 }, { "epoch": 2.19, "learning_rate": "3.4237e-05", "loss": 0.6617, "slid_loss": 0.7008, "step": 2828, "time": 72.19 }, { "epoch": 2.19, "learning_rate": "3.4227e-05", "loss": 0.7228, "slid_loss": 0.7014, "step": 2829, "time": 71.73 }, { "epoch": 2.19, "learning_rate": "3.4217e-05", "loss": 0.6837, "slid_loss": 0.7007, "step": 2830, "time": 72.5 }, { "epoch": 2.19, "learning_rate": "3.4208e-05", "loss": 0.6679, "slid_loss": 0.7004, "step": 2831, "time": 73.12 }, { "epoch": 2.19, "learning_rate": "3.4198e-05", "loss": 0.6812, "slid_loss": 0.6999, "step": 2832, "time": 72.08 }, { "epoch": 2.19, "learning_rate": "3.4189e-05", "loss": 0.6605, "slid_loss": 0.6995, "step": 2833, "time": 71.23 }, { "epoch": 2.19, "learning_rate": "3.4179e-05", "loss": 0.693, "slid_loss": 0.6994, "step": 2834, "time": 73.28 }, { "epoch": 2.19, "learning_rate": "3.4169e-05", "loss": 0.6947, "slid_loss": 0.6992, "step": 2835, "time": 71.62 }, { "epoch": 2.19, "learning_rate": "3.4160e-05", "loss": 0.69, "slid_loss": 0.699, "step": 2836, "time": 72.01 }, { "epoch": 2.19, "learning_rate": "3.4150e-05", "loss": 0.7256, "slid_loss": 0.6995, "step": 2837, "time": 70.93 }, { "epoch": 2.2, "learning_rate": "3.4141e-05", "loss": 0.7032, "slid_loss": 0.6993, "step": 2838, "time": 70.13 }, { "epoch": 2.2, "learning_rate": "3.4131e-05", "loss": 0.6905, "slid_loss": 0.6993, "step": 2839, "time": 71.37 }, { "epoch": 2.2, "learning_rate": "3.4121e-05", "loss": 0.7376, "slid_loss": 0.6997, "step": 2840, "time": 71.97 }, { "epoch": 2.2, "learning_rate": "3.4112e-05", "loss": 0.6777, "slid_loss": 0.6997, "step": 2841, "time": 70.65 }, { "epoch": 2.2, "learning_rate": "3.4102e-05", "loss": 0.6752, "slid_loss": 0.6991, "step": 2842, "time": 72.48 }, { "epoch": 2.2, "learning_rate": "3.4093e-05", "loss": 0.7061, "slid_loss": 0.6989, "step": 2843, "time": 71.81 }, { "epoch": 2.2, "learning_rate": "3.4083e-05", "loss": 0.6825, "slid_loss": 0.6991, "step": 2844, "time": 72.56 }, { "epoch": 2.2, "learning_rate": "3.4073e-05", "loss": 0.7064, "slid_loss": 0.6993, "step": 2845, "time": 70.97 }, { "epoch": 2.2, "learning_rate": "3.4064e-05", "loss": 0.6966, "slid_loss": 0.6991, "step": 2846, "time": 73.47 }, { "epoch": 2.2, "learning_rate": "3.4054e-05", "loss": 0.7193, "slid_loss": 0.699, "step": 2847, "time": 72.66 }, { "epoch": 2.2, "learning_rate": "3.4044e-05", "loss": 0.6703, "slid_loss": 0.6993, "step": 2848, "time": 71.69 }, { "epoch": 2.2, "learning_rate": "3.4035e-05", "loss": 0.6877, "slid_loss": 0.6989, "step": 2849, "time": 71.45 }, { "epoch": 2.21, "learning_rate": "3.4025e-05", "loss": 0.6618, "slid_loss": 0.6984, "step": 2850, "time": 71.22 }, { "epoch": 2.21, "learning_rate": "3.4016e-05", "loss": 0.6343, "slid_loss": 0.6977, "step": 2851, "time": 90.27 }, { "epoch": 2.21, "learning_rate": "3.4006e-05", "loss": 0.7384, "slid_loss": 0.6982, "step": 2852, "time": 98.06 }, { "epoch": 2.21, "learning_rate": "3.3996e-05", "loss": 0.7027, "slid_loss": 0.6982, "step": 2853, "time": 72.58 }, { "epoch": 2.21, "learning_rate": "3.3987e-05", "loss": 0.6481, "slid_loss": 0.6978, "step": 2854, "time": 99.38 }, { "epoch": 2.21, "learning_rate": "3.3977e-05", "loss": 0.6963, "slid_loss": 0.6975, "step": 2855, "time": 125.47 }, { "epoch": 2.21, "learning_rate": "3.3967e-05", "loss": 0.7096, "slid_loss": 0.6976, "step": 2856, "time": 120.1 }, { "epoch": 2.21, "learning_rate": "3.3958e-05", "loss": 0.681, "slid_loss": 0.6979, "step": 2857, "time": 139.67 }, { "epoch": 2.21, "learning_rate": "3.3948e-05", "loss": 0.7181, "slid_loss": 0.6981, "step": 2858, "time": 137.75 }, { "epoch": 2.21, "learning_rate": "3.3939e-05", "loss": 0.6873, "slid_loss": 0.698, "step": 2859, "time": 138.0 }, { "epoch": 2.21, "learning_rate": "3.3929e-05", "loss": 0.671, "slid_loss": 0.6976, "step": 2860, "time": 170.56 }, { "epoch": 2.21, "learning_rate": "3.3919e-05", "loss": 0.713, "slid_loss": 0.6976, "step": 2861, "time": 130.07 }, { "epoch": 2.21, "learning_rate": "3.3910e-05", "loss": 0.7005, "slid_loss": 0.6975, "step": 2862, "time": 118.9 }, { "epoch": 2.22, "learning_rate": "3.3900e-05", "loss": 0.6767, "slid_loss": 0.6976, "step": 2863, "time": 144.36 }, { "epoch": 2.22, "learning_rate": "3.3890e-05", "loss": 0.7005, "slid_loss": 0.6978, "step": 2864, "time": 133.27 }, { "epoch": 2.22, "learning_rate": "3.3881e-05", "loss": 0.6899, "slid_loss": 0.6977, "step": 2865, "time": 145.59 }, { "epoch": 2.22, "learning_rate": "3.3871e-05", "loss": 0.7047, "slid_loss": 0.6976, "step": 2866, "time": 131.7 }, { "epoch": 2.22, "learning_rate": "3.3861e-05", "loss": 0.6833, "slid_loss": 0.6974, "step": 2867, "time": 97.97 }, { "epoch": 2.22, "learning_rate": "3.3852e-05", "loss": 0.6679, "slid_loss": 0.6969, "step": 2868, "time": 111.14 }, { "epoch": 2.22, "learning_rate": "3.3842e-05", "loss": 0.6999, "slid_loss": 0.697, "step": 2869, "time": 71.27 }, { "epoch": 2.22, "learning_rate": "3.3833e-05", "loss": 0.6837, "slid_loss": 0.6967, "step": 2870, "time": 93.3 }, { "epoch": 2.22, "learning_rate": "3.3823e-05", "loss": 0.723, "slid_loss": 0.6965, "step": 2871, "time": 83.57 }, { "epoch": 2.22, "learning_rate": "3.3813e-05", "loss": 0.6787, "slid_loss": 0.6966, "step": 2872, "time": 72.28 }, { "epoch": 2.22, "learning_rate": "3.3804e-05", "loss": 0.674, "slid_loss": 0.6963, "step": 2873, "time": 70.62 }, { "epoch": 2.22, "learning_rate": "3.3794e-05", "loss": 0.6625, "slid_loss": 0.6959, "step": 2874, "time": 71.26 }, { "epoch": 2.22, "learning_rate": "3.3784e-05", "loss": 0.68, "slid_loss": 0.6958, "step": 2875, "time": 70.02 }, { "epoch": 2.23, "learning_rate": "3.3775e-05", "loss": 0.6988, "slid_loss": 0.6957, "step": 2876, "time": 72.13 }, { "epoch": 2.23, "learning_rate": "3.3765e-05", "loss": 0.7049, "slid_loss": 0.6959, "step": 2877, "time": 72.77 }, { "epoch": 2.23, "learning_rate": "3.3755e-05", "loss": 0.6713, "slid_loss": 0.6955, "step": 2878, "time": 71.82 }, { "epoch": 2.23, "learning_rate": "3.3746e-05", "loss": 0.7066, "slid_loss": 0.6956, "step": 2879, "time": 70.98 }, { "epoch": 2.23, "learning_rate": "3.3736e-05", "loss": 0.7105, "slid_loss": 0.6956, "step": 2880, "time": 73.07 }, { "epoch": 2.23, "learning_rate": "3.3726e-05", "loss": 0.6956, "slid_loss": 0.6956, "step": 2881, "time": 71.99 }, { "epoch": 2.23, "learning_rate": "3.3717e-05", "loss": 0.7017, "slid_loss": 0.6955, "step": 2882, "time": 72.15 }, { "epoch": 2.23, "learning_rate": "3.3707e-05", "loss": 0.6942, "slid_loss": 0.6953, "step": 2883, "time": 71.87 }, { "epoch": 2.23, "learning_rate": "3.3697e-05", "loss": 0.6901, "slid_loss": 0.6952, "step": 2884, "time": 72.23 }, { "epoch": 2.23, "learning_rate": "3.3688e-05", "loss": 0.7136, "slid_loss": 0.6952, "step": 2885, "time": 71.72 }, { "epoch": 2.23, "learning_rate": "3.3678e-05", "loss": 0.6823, "slid_loss": 0.6954, "step": 2886, "time": 71.44 }, { "epoch": 2.23, "learning_rate": "3.3668e-05", "loss": 0.6996, "slid_loss": 0.6957, "step": 2887, "time": 72.08 }, { "epoch": 2.23, "learning_rate": "3.3659e-05", "loss": 0.6762, "slid_loss": 0.6953, "step": 2888, "time": 71.63 }, { "epoch": 2.24, "learning_rate": "3.3649e-05", "loss": 0.6892, "slid_loss": 0.6952, "step": 2889, "time": 70.82 }, { "epoch": 2.24, "learning_rate": "3.3639e-05", "loss": 0.742, "slid_loss": 0.6959, "step": 2890, "time": 71.65 }, { "epoch": 2.24, "learning_rate": "3.3630e-05", "loss": 0.6907, "slid_loss": 0.696, "step": 2891, "time": 70.84 }, { "epoch": 2.24, "learning_rate": "3.3620e-05", "loss": 0.6841, "slid_loss": 0.6955, "step": 2892, "time": 72.28 }, { "epoch": 2.24, "learning_rate": "3.3610e-05", "loss": 0.7097, "slid_loss": 0.6953, "step": 2893, "time": 72.44 }, { "epoch": 2.24, "learning_rate": "3.3601e-05", "loss": 0.7074, "slid_loss": 0.6953, "step": 2894, "time": 71.66 }, { "epoch": 2.24, "learning_rate": "3.3591e-05", "loss": 0.7561, "slid_loss": 0.6961, "step": 2895, "time": 71.63 }, { "epoch": 2.24, "learning_rate": "3.3581e-05", "loss": 0.6782, "slid_loss": 0.6959, "step": 2896, "time": 73.91 }, { "epoch": 2.24, "learning_rate": "3.3572e-05", "loss": 0.6909, "slid_loss": 0.6959, "step": 2897, "time": 73.56 }, { "epoch": 2.24, "learning_rate": "3.3562e-05", "loss": 0.6812, "slid_loss": 0.6956, "step": 2898, "time": 70.42 }, { "epoch": 2.24, "learning_rate": "3.3552e-05", "loss": 0.7006, "slid_loss": 0.6955, "step": 2899, "time": 70.99 }, { "epoch": 2.24, "learning_rate": "3.3543e-05", "loss": 0.6745, "slid_loss": 0.695, "step": 2900, "time": 72.79 }, { "epoch": 2.24, "learning_rate": "3.3533e-05", "loss": 0.7331, "slid_loss": 0.695, "step": 2901, "time": 71.5 }, { "epoch": 2.25, "learning_rate": "3.3523e-05", "loss": 0.7089, "slid_loss": 0.6952, "step": 2902, "time": 72.55 }, { "epoch": 2.25, "learning_rate": "3.3514e-05", "loss": 0.6881, "slid_loss": 0.6952, "step": 2903, "time": 71.29 }, { "epoch": 2.25, "learning_rate": "3.3504e-05", "loss": 0.7126, "slid_loss": 0.6956, "step": 2904, "time": 71.64 }, { "epoch": 2.25, "learning_rate": "3.3494e-05", "loss": 0.6679, "slid_loss": 0.6953, "step": 2905, "time": 71.87 }, { "epoch": 2.25, "learning_rate": "3.3485e-05", "loss": 0.6834, "slid_loss": 0.6949, "step": 2906, "time": 70.95 }, { "epoch": 2.25, "learning_rate": "3.3475e-05", "loss": 0.6841, "slid_loss": 0.695, "step": 2907, "time": 70.91 }, { "epoch": 2.25, "learning_rate": "3.3465e-05", "loss": 0.6701, "slid_loss": 0.6948, "step": 2908, "time": 71.25 }, { "epoch": 2.25, "learning_rate": "3.3456e-05", "loss": 0.6834, "slid_loss": 0.695, "step": 2909, "time": 73.03 }, { "epoch": 2.25, "learning_rate": "3.3446e-05", "loss": 0.675, "slid_loss": 0.6943, "step": 2910, "time": 72.08 }, { "epoch": 2.25, "learning_rate": "3.3436e-05", "loss": 0.7271, "slid_loss": 0.6946, "step": 2911, "time": 71.63 }, { "epoch": 2.25, "learning_rate": "3.3427e-05", "loss": 0.7249, "slid_loss": 0.6948, "step": 2912, "time": 71.68 }, { "epoch": 2.25, "learning_rate": "3.3417e-05", "loss": 0.6762, "slid_loss": 0.6951, "step": 2913, "time": 71.32 }, { "epoch": 2.25, "learning_rate": "3.3407e-05", "loss": 0.7152, "slid_loss": 0.6953, "step": 2914, "time": 71.67 }, { "epoch": 2.26, "learning_rate": "3.3398e-05", "loss": 0.7065, "slid_loss": 0.6953, "step": 2915, "time": 72.54 }, { "epoch": 2.26, "learning_rate": "3.3388e-05", "loss": 0.7046, "slid_loss": 0.6951, "step": 2916, "time": 72.82 }, { "epoch": 2.26, "learning_rate": "3.3378e-05", "loss": 0.727, "slid_loss": 0.6956, "step": 2917, "time": 72.18 }, { "epoch": 2.26, "learning_rate": "3.3369e-05", "loss": 0.6874, "slid_loss": 0.6955, "step": 2918, "time": 71.71 }, { "epoch": 2.26, "learning_rate": "3.3359e-05", "loss": 0.6952, "slid_loss": 0.6948, "step": 2919, "time": 71.27 }, { "epoch": 2.26, "learning_rate": "3.3349e-05", "loss": 0.729, "slid_loss": 0.6951, "step": 2920, "time": 70.95 }, { "epoch": 2.26, "learning_rate": "3.3340e-05", "loss": 0.6572, "slid_loss": 0.6946, "step": 2921, "time": 72.98 }, { "epoch": 2.26, "learning_rate": "3.3330e-05", "loss": 0.6728, "slid_loss": 0.6942, "step": 2922, "time": 70.89 }, { "epoch": 2.26, "learning_rate": "3.3320e-05", "loss": 0.7098, "slid_loss": 0.6938, "step": 2923, "time": 73.28 }, { "epoch": 2.26, "learning_rate": "3.3311e-05", "loss": 0.672, "slid_loss": 0.6939, "step": 2924, "time": 73.11 }, { "epoch": 2.26, "learning_rate": "3.3301e-05", "loss": 0.7244, "slid_loss": 0.6938, "step": 2925, "time": 72.97 }, { "epoch": 2.26, "learning_rate": "3.3291e-05", "loss": 0.6901, "slid_loss": 0.694, "step": 2926, "time": 71.7 }, { "epoch": 2.26, "learning_rate": "3.3281e-05", "loss": 0.6688, "slid_loss": 0.6937, "step": 2927, "time": 73.87 }, { "epoch": 2.27, "learning_rate": "3.3272e-05", "loss": 0.6836, "slid_loss": 0.6939, "step": 2928, "time": 72.13 }, { "epoch": 2.27, "learning_rate": "3.3262e-05", "loss": 0.6917, "slid_loss": 0.6936, "step": 2929, "time": 71.09 }, { "epoch": 2.27, "learning_rate": "3.3252e-05", "loss": 0.699, "slid_loss": 0.6937, "step": 2930, "time": 70.96 }, { "epoch": 2.27, "learning_rate": "3.3243e-05", "loss": 0.6985, "slid_loss": 0.694, "step": 2931, "time": 72.81 }, { "epoch": 2.27, "learning_rate": "3.3233e-05", "loss": 0.6762, "slid_loss": 0.694, "step": 2932, "time": 71.69 }, { "epoch": 2.27, "learning_rate": "3.3223e-05", "loss": 0.6794, "slid_loss": 0.6942, "step": 2933, "time": 72.53 }, { "epoch": 2.27, "learning_rate": "3.3214e-05", "loss": 0.6793, "slid_loss": 0.694, "step": 2934, "time": 71.46 }, { "epoch": 2.27, "learning_rate": "3.3204e-05", "loss": 0.6819, "slid_loss": 0.6939, "step": 2935, "time": 71.51 }, { "epoch": 2.27, "learning_rate": "3.3194e-05", "loss": 0.673, "slid_loss": 0.6937, "step": 2936, "time": 72.09 }, { "epoch": 2.27, "learning_rate": "3.3184e-05", "loss": 0.7541, "slid_loss": 0.694, "step": 2937, "time": 72.2 }, { "epoch": 2.27, "learning_rate": "3.3175e-05", "loss": 0.6941, "slid_loss": 0.6939, "step": 2938, "time": 72.09 }, { "epoch": 2.27, "learning_rate": "3.3165e-05", "loss": 0.7248, "slid_loss": 0.6943, "step": 2939, "time": 71.33 }, { "epoch": 2.27, "learning_rate": "3.3155e-05", "loss": 0.7249, "slid_loss": 0.6941, "step": 2940, "time": 71.2 }, { "epoch": 2.28, "learning_rate": "3.3146e-05", "loss": 0.6731, "slid_loss": 0.6941, "step": 2941, "time": 72.04 }, { "epoch": 2.28, "learning_rate": "3.3136e-05", "loss": 0.7193, "slid_loss": 0.6945, "step": 2942, "time": 71.24 }, { "epoch": 2.28, "learning_rate": "3.3126e-05", "loss": 0.7039, "slid_loss": 0.6945, "step": 2943, "time": 71.38 }, { "epoch": 2.28, "learning_rate": "3.3117e-05", "loss": 0.7055, "slid_loss": 0.6947, "step": 2944, "time": 71.55 }, { "epoch": 2.28, "learning_rate": "3.3107e-05", "loss": 0.7315, "slid_loss": 0.695, "step": 2945, "time": 72.93 }, { "epoch": 2.28, "learning_rate": "3.3097e-05", "loss": 0.6875, "slid_loss": 0.6949, "step": 2946, "time": 73.08 }, { "epoch": 2.28, "learning_rate": "3.3087e-05", "loss": 0.6993, "slid_loss": 0.6947, "step": 2947, "time": 70.66 }, { "epoch": 2.28, "learning_rate": "3.3078e-05", "loss": 0.6588, "slid_loss": 0.6946, "step": 2948, "time": 73.68 }, { "epoch": 2.28, "learning_rate": "3.3068e-05", "loss": 0.7197, "slid_loss": 0.6949, "step": 2949, "time": 70.47 }, { "epoch": 2.28, "learning_rate": "3.3058e-05", "loss": 0.6977, "slid_loss": 0.6953, "step": 2950, "time": 70.67 }, { "epoch": 2.28, "learning_rate": "3.3049e-05", "loss": 0.7343, "slid_loss": 0.6963, "step": 2951, "time": 71.12 }, { "epoch": 2.28, "learning_rate": "3.3039e-05", "loss": 0.7169, "slid_loss": 0.6961, "step": 2952, "time": 71.35 }, { "epoch": 2.28, "learning_rate": "3.3029e-05", "loss": 0.7132, "slid_loss": 0.6962, "step": 2953, "time": 70.99 }, { "epoch": 2.29, "learning_rate": "3.3019e-05", "loss": 0.6848, "slid_loss": 0.6965, "step": 2954, "time": 71.95 }, { "epoch": 2.29, "learning_rate": "3.3010e-05", "loss": 0.725, "slid_loss": 0.6968, "step": 2955, "time": 71.58 }, { "epoch": 2.29, "learning_rate": "3.3000e-05", "loss": 0.6896, "slid_loss": 0.6966, "step": 2956, "time": 75.52 }, { "epoch": 2.29, "learning_rate": "3.2990e-05", "loss": 0.7342, "slid_loss": 0.6971, "step": 2957, "time": 70.69 }, { "epoch": 2.29, "learning_rate": "3.2981e-05", "loss": 0.6771, "slid_loss": 0.6967, "step": 2958, "time": 70.63 }, { "epoch": 2.29, "learning_rate": "3.2971e-05", "loss": 0.7248, "slid_loss": 0.6971, "step": 2959, "time": 71.96 }, { "epoch": 2.29, "learning_rate": "3.2961e-05", "loss": 0.6598, "slid_loss": 0.697, "step": 2960, "time": 73.47 }, { "epoch": 2.29, "learning_rate": "3.2951e-05", "loss": 0.7131, "slid_loss": 0.697, "step": 2961, "time": 70.4 }, { "epoch": 2.29, "learning_rate": "3.2942e-05", "loss": 0.6904, "slid_loss": 0.6969, "step": 2962, "time": 71.56 }, { "epoch": 2.29, "learning_rate": "3.2932e-05", "loss": 0.6911, "slid_loss": 0.697, "step": 2963, "time": 70.45 }, { "epoch": 2.29, "learning_rate": "3.2922e-05", "loss": 0.7189, "slid_loss": 0.6972, "step": 2964, "time": 71.63 }, { "epoch": 2.29, "learning_rate": "3.2913e-05", "loss": 0.6944, "slid_loss": 0.6973, "step": 2965, "time": 70.71 }, { "epoch": 2.29, "learning_rate": "3.2903e-05", "loss": 0.6607, "slid_loss": 0.6968, "step": 2966, "time": 73.01 }, { "epoch": 2.3, "learning_rate": "3.2893e-05", "loss": 0.7065, "slid_loss": 0.6971, "step": 2967, "time": 70.84 }, { "epoch": 2.3, "learning_rate": "3.2883e-05", "loss": 0.6793, "slid_loss": 0.6972, "step": 2968, "time": 71.07 }, { "epoch": 2.3, "learning_rate": "3.2874e-05", "loss": 0.6886, "slid_loss": 0.6971, "step": 2969, "time": 71.86 }, { "epoch": 2.3, "learning_rate": "3.2864e-05", "loss": 0.6917, "slid_loss": 0.6971, "step": 2970, "time": 71.61 }, { "epoch": 2.3, "learning_rate": "3.2854e-05", "loss": 0.708, "slid_loss": 0.697, "step": 2971, "time": 71.25 }, { "epoch": 2.3, "learning_rate": "3.2845e-05", "loss": 0.6857, "slid_loss": 0.6971, "step": 2972, "time": 71.65 }, { "epoch": 2.3, "learning_rate": "3.2835e-05", "loss": 0.6788, "slid_loss": 0.6971, "step": 2973, "time": 71.57 }, { "epoch": 2.3, "learning_rate": "3.2825e-05", "loss": 0.6614, "slid_loss": 0.6971, "step": 2974, "time": 73.25 }, { "epoch": 2.3, "learning_rate": "3.2815e-05", "loss": 0.711, "slid_loss": 0.6974, "step": 2975, "time": 71.18 }, { "epoch": 2.3, "learning_rate": "3.2806e-05", "loss": 0.6692, "slid_loss": 0.6971, "step": 2976, "time": 71.76 }, { "epoch": 2.3, "learning_rate": "3.2796e-05", "loss": 0.6606, "slid_loss": 0.6967, "step": 2977, "time": 71.23 }, { "epoch": 2.3, "learning_rate": "3.2786e-05", "loss": 0.6998, "slid_loss": 0.697, "step": 2978, "time": 70.68 }, { "epoch": 2.3, "learning_rate": "3.2776e-05", "loss": 0.7218, "slid_loss": 0.6971, "step": 2979, "time": 71.38 }, { "epoch": 2.31, "learning_rate": "3.2767e-05", "loss": 0.6912, "slid_loss": 0.6969, "step": 2980, "time": 70.89 }, { "epoch": 2.31, "learning_rate": "3.2757e-05", "loss": 0.664, "slid_loss": 0.6966, "step": 2981, "time": 71.24 }, { "epoch": 2.31, "learning_rate": "3.2747e-05", "loss": 0.7024, "slid_loss": 0.6966, "step": 2982, "time": 71.08 }, { "epoch": 2.31, "learning_rate": "3.2738e-05", "loss": 0.7142, "slid_loss": 0.6968, "step": 2983, "time": 71.01 }, { "epoch": 2.31, "learning_rate": "3.2728e-05", "loss": 0.6934, "slid_loss": 0.6968, "step": 2984, "time": 71.38 }, { "epoch": 2.31, "learning_rate": "3.2718e-05", "loss": 0.721, "slid_loss": 0.6969, "step": 2985, "time": 70.34 }, { "epoch": 2.31, "learning_rate": "3.2708e-05", "loss": 0.7093, "slid_loss": 0.6972, "step": 2986, "time": 72.52 }, { "epoch": 2.31, "learning_rate": "3.2699e-05", "loss": 0.6871, "slid_loss": 0.6971, "step": 2987, "time": 72.81 }, { "epoch": 2.31, "learning_rate": "3.2689e-05", "loss": 0.6528, "slid_loss": 0.6968, "step": 2988, "time": 72.48 }, { "epoch": 2.31, "learning_rate": "3.2679e-05", "loss": 0.7061, "slid_loss": 0.697, "step": 2989, "time": 73.32 }, { "epoch": 2.31, "learning_rate": "3.2669e-05", "loss": 0.7047, "slid_loss": 0.6966, "step": 2990, "time": 72.54 }, { "epoch": 2.31, "learning_rate": "3.2660e-05", "loss": 0.6918, "slid_loss": 0.6966, "step": 2991, "time": 71.62 }, { "epoch": 2.31, "learning_rate": "3.2650e-05", "loss": 0.7041, "slid_loss": 0.6968, "step": 2992, "time": 71.75 }, { "epoch": 2.32, "learning_rate": "3.2640e-05", "loss": 0.6723, "slid_loss": 0.6965, "step": 2993, "time": 71.87 }, { "epoch": 2.32, "learning_rate": "3.2630e-05", "loss": 0.6958, "slid_loss": 0.6963, "step": 2994, "time": 72.02 }, { "epoch": 2.32, "learning_rate": "3.2621e-05", "loss": 0.6925, "slid_loss": 0.6957, "step": 2995, "time": 71.59 }, { "epoch": 2.32, "learning_rate": "3.2611e-05", "loss": 0.706, "slid_loss": 0.696, "step": 2996, "time": 73.28 }, { "epoch": 2.32, "learning_rate": "3.2601e-05", "loss": 0.6866, "slid_loss": 0.6959, "step": 2997, "time": 71.91 }, { "epoch": 2.32, "learning_rate": "3.2591e-05", "loss": 0.6727, "slid_loss": 0.6959, "step": 2998, "time": 71.85 }, { "epoch": 2.32, "learning_rate": "3.2582e-05", "loss": 0.7039, "slid_loss": 0.6959, "step": 2999, "time": 72.75 }, { "epoch": 2.32, "learning_rate": "3.2572e-05", "loss": 0.7089, "slid_loss": 0.6962, "step": 3000, "time": 72.47 }, { "epoch": 2.32, "learning_rate": "3.2562e-05", "loss": 0.6985, "slid_loss": 0.6959, "step": 3001, "time": 839.95 }, { "epoch": 2.32, "learning_rate": "3.2552e-05", "loss": 0.7226, "slid_loss": 0.696, "step": 3002, "time": 71.42 }, { "epoch": 2.32, "learning_rate": "3.2543e-05", "loss": 0.6729, "slid_loss": 0.6959, "step": 3003, "time": 70.54 }, { "epoch": 2.32, "learning_rate": "3.2533e-05", "loss": 0.6715, "slid_loss": 0.6955, "step": 3004, "time": 71.28 }, { "epoch": 2.32, "learning_rate": "3.2523e-05", "loss": 0.6972, "slid_loss": 0.6958, "step": 3005, "time": 72.41 }, { "epoch": 2.33, "learning_rate": "3.2513e-05", "loss": 0.6646, "slid_loss": 0.6956, "step": 3006, "time": 71.3 }, { "epoch": 2.33, "learning_rate": "3.2504e-05", "loss": 0.6921, "slid_loss": 0.6956, "step": 3007, "time": 70.79 }, { "epoch": 2.33, "learning_rate": "3.2494e-05", "loss": 0.7009, "slid_loss": 0.696, "step": 3008, "time": 71.92 }, { "epoch": 2.33, "learning_rate": "3.2484e-05", "loss": 0.6718, "slid_loss": 0.6958, "step": 3009, "time": 87.97 }, { "epoch": 2.33, "learning_rate": "3.2474e-05", "loss": 0.6849, "slid_loss": 0.6959, "step": 3010, "time": 85.95 }, { "epoch": 2.33, "learning_rate": "3.2465e-05", "loss": 0.6981, "slid_loss": 0.6956, "step": 3011, "time": 71.26 }, { "epoch": 2.33, "learning_rate": "3.2455e-05", "loss": 0.7173, "slid_loss": 0.6956, "step": 3012, "time": 84.55 }, { "epoch": 2.33, "learning_rate": "3.2445e-05", "loss": 0.6951, "slid_loss": 0.6958, "step": 3013, "time": 100.71 }, { "epoch": 2.33, "learning_rate": "3.2435e-05", "loss": 0.6843, "slid_loss": 0.6955, "step": 3014, "time": 110.54 }, { "epoch": 2.33, "learning_rate": "3.2426e-05", "loss": 0.6945, "slid_loss": 0.6953, "step": 3015, "time": 135.46 }, { "epoch": 2.33, "learning_rate": "3.2416e-05", "loss": 0.6807, "slid_loss": 0.6951, "step": 3016, "time": 149.13 }, { "epoch": 2.33, "learning_rate": "3.2406e-05", "loss": 0.6987, "slid_loss": 0.6948, "step": 3017, "time": 153.24 }, { "epoch": 2.34, "learning_rate": "3.2396e-05", "loss": 0.698, "slid_loss": 0.6949, "step": 3018, "time": 174.37 }, { "epoch": 2.34, "learning_rate": "3.2387e-05", "loss": 0.6641, "slid_loss": 0.6946, "step": 3019, "time": 165.27 }, { "epoch": 2.34, "learning_rate": "3.2377e-05", "loss": 0.7034, "slid_loss": 0.6943, "step": 3020, "time": 162.32 }, { "epoch": 2.34, "learning_rate": "3.2367e-05", "loss": 0.6749, "slid_loss": 0.6945, "step": 3021, "time": 119.92 }, { "epoch": 2.34, "learning_rate": "3.2357e-05", "loss": 0.6952, "slid_loss": 0.6948, "step": 3022, "time": 135.23 }, { "epoch": 2.34, "learning_rate": "3.2348e-05", "loss": 0.6908, "slid_loss": 0.6946, "step": 3023, "time": 128.68 }, { "epoch": 2.34, "learning_rate": "3.2338e-05", "loss": 0.7276, "slid_loss": 0.6951, "step": 3024, "time": 130.55 }, { "epoch": 2.34, "learning_rate": "3.2328e-05", "loss": 0.7099, "slid_loss": 0.695, "step": 3025, "time": 121.88 }, { "epoch": 2.34, "learning_rate": "3.2318e-05", "loss": 0.6828, "slid_loss": 0.6949, "step": 3026, "time": 93.6 }, { "epoch": 2.34, "learning_rate": "3.2309e-05", "loss": 0.6612, "slid_loss": 0.6948, "step": 3027, "time": 106.06 }, { "epoch": 2.34, "learning_rate": "3.2299e-05", "loss": 0.6642, "slid_loss": 0.6946, "step": 3028, "time": 83.06 }, { "epoch": 2.34, "learning_rate": "3.2289e-05", "loss": 0.6776, "slid_loss": 0.6945, "step": 3029, "time": 82.96 }, { "epoch": 2.34, "learning_rate": "3.2279e-05", "loss": 0.7099, "slid_loss": 0.6946, "step": 3030, "time": 84.65 }, { "epoch": 2.35, "learning_rate": "3.2270e-05", "loss": 0.7005, "slid_loss": 0.6946, "step": 3031, "time": 70.68 }, { "epoch": 2.35, "learning_rate": "3.2260e-05", "loss": 0.6976, "slid_loss": 0.6948, "step": 3032, "time": 71.09 }, { "epoch": 2.35, "learning_rate": "3.2250e-05", "loss": 0.6978, "slid_loss": 0.695, "step": 3033, "time": 71.72 }, { "epoch": 2.35, "learning_rate": "3.2240e-05", "loss": 0.6902, "slid_loss": 0.6951, "step": 3034, "time": 71.78 }, { "epoch": 2.35, "learning_rate": "3.2231e-05", "loss": 0.6625, "slid_loss": 0.6949, "step": 3035, "time": 70.8 }, { "epoch": 2.35, "learning_rate": "3.2221e-05", "loss": 0.6879, "slid_loss": 0.6951, "step": 3036, "time": 71.49 }, { "epoch": 2.35, "learning_rate": "3.2211e-05", "loss": 0.6778, "slid_loss": 0.6943, "step": 3037, "time": 72.47 }, { "epoch": 2.35, "learning_rate": "3.2201e-05", "loss": 0.6943, "slid_loss": 0.6943, "step": 3038, "time": 72.04 }, { "epoch": 2.35, "learning_rate": "3.2192e-05", "loss": 0.6827, "slid_loss": 0.6939, "step": 3039, "time": 71.51 }, { "epoch": 2.35, "learning_rate": "3.2182e-05", "loss": 0.6417, "slid_loss": 0.6931, "step": 3040, "time": 71.9 }, { "epoch": 2.35, "learning_rate": "3.2172e-05", "loss": 0.6914, "slid_loss": 0.6932, "step": 3041, "time": 72.02 }, { "epoch": 2.35, "learning_rate": "3.2162e-05", "loss": 0.6945, "slid_loss": 0.693, "step": 3042, "time": 71.13 }, { "epoch": 2.35, "learning_rate": "3.2152e-05", "loss": 0.7091, "slid_loss": 0.693, "step": 3043, "time": 72.5 }, { "epoch": 2.36, "learning_rate": "3.2143e-05", "loss": 0.6849, "slid_loss": 0.6928, "step": 3044, "time": 73.8 }, { "epoch": 2.36, "learning_rate": "3.2133e-05", "loss": 0.6279, "slid_loss": 0.6918, "step": 3045, "time": 70.93 }, { "epoch": 2.36, "learning_rate": "3.2123e-05", "loss": 0.6552, "slid_loss": 0.6915, "step": 3046, "time": 73.55 }, { "epoch": 2.36, "learning_rate": "3.2113e-05", "loss": 0.6699, "slid_loss": 0.6912, "step": 3047, "time": 71.43 }, { "epoch": 2.36, "learning_rate": "3.2104e-05", "loss": 0.6924, "slid_loss": 0.6915, "step": 3048, "time": 74.44 }, { "epoch": 2.36, "learning_rate": "3.2094e-05", "loss": 0.703, "slid_loss": 0.6914, "step": 3049, "time": 72.29 }, { "epoch": 2.36, "learning_rate": "3.2084e-05", "loss": 0.6384, "slid_loss": 0.6908, "step": 3050, "time": 71.28 }, { "epoch": 2.36, "learning_rate": "3.2074e-05", "loss": 0.6729, "slid_loss": 0.6902, "step": 3051, "time": 70.72 }, { "epoch": 2.36, "learning_rate": "3.2065e-05", "loss": 0.6948, "slid_loss": 0.6899, "step": 3052, "time": 71.88 }, { "epoch": 2.36, "learning_rate": "3.2055e-05", "loss": 0.7082, "slid_loss": 0.6899, "step": 3053, "time": 71.26 }, { "epoch": 2.36, "learning_rate": "3.2045e-05", "loss": 0.7035, "slid_loss": 0.6901, "step": 3054, "time": 73.24 }, { "epoch": 2.36, "learning_rate": "3.2035e-05", "loss": 0.7037, "slid_loss": 0.6899, "step": 3055, "time": 71.83 }, { "epoch": 2.36, "learning_rate": "3.2025e-05", "loss": 0.7083, "slid_loss": 0.69, "step": 3056, "time": 71.42 }, { "epoch": 2.37, "learning_rate": "3.2016e-05", "loss": 0.7062, "slid_loss": 0.6898, "step": 3057, "time": 72.9 }, { "epoch": 2.37, "learning_rate": "3.2006e-05", "loss": 0.6536, "slid_loss": 0.6895, "step": 3058, "time": 70.71 }, { "epoch": 2.37, "learning_rate": "3.1996e-05", "loss": 0.7262, "slid_loss": 0.6895, "step": 3059, "time": 70.96 }, { "epoch": 2.37, "learning_rate": "3.1986e-05", "loss": 0.7037, "slid_loss": 0.69, "step": 3060, "time": 72.92 }, { "epoch": 2.37, "learning_rate": "3.1977e-05", "loss": 0.6998, "slid_loss": 0.6898, "step": 3061, "time": 71.73 }, { "epoch": 2.37, "learning_rate": "3.1967e-05", "loss": 0.6705, "slid_loss": 0.6896, "step": 3062, "time": 72.67 }, { "epoch": 2.37, "learning_rate": "3.1957e-05", "loss": 0.6698, "slid_loss": 0.6894, "step": 3063, "time": 71.87 }, { "epoch": 2.37, "learning_rate": "3.1947e-05", "loss": 0.6746, "slid_loss": 0.689, "step": 3064, "time": 72.55 }, { "epoch": 2.37, "learning_rate": "3.1937e-05", "loss": 0.6942, "slid_loss": 0.689, "step": 3065, "time": 71.4 }, { "epoch": 2.37, "learning_rate": "3.1928e-05", "loss": 0.6855, "slid_loss": 0.6892, "step": 3066, "time": 70.37 }, { "epoch": 2.37, "learning_rate": "3.1918e-05", "loss": 0.6817, "slid_loss": 0.689, "step": 3067, "time": 72.42 }, { "epoch": 2.37, "learning_rate": "3.1908e-05", "loss": 0.6547, "slid_loss": 0.6887, "step": 3068, "time": 70.76 }, { "epoch": 2.37, "learning_rate": "3.1898e-05", "loss": 0.6987, "slid_loss": 0.6888, "step": 3069, "time": 70.58 }, { "epoch": 2.38, "learning_rate": "3.1889e-05", "loss": 0.7152, "slid_loss": 0.6891, "step": 3070, "time": 72.35 }, { "epoch": 2.38, "learning_rate": "3.1879e-05", "loss": 0.6838, "slid_loss": 0.6888, "step": 3071, "time": 71.79 }, { "epoch": 2.38, "learning_rate": "3.1869e-05", "loss": 0.7201, "slid_loss": 0.6892, "step": 3072, "time": 73.0 }, { "epoch": 2.38, "learning_rate": "3.1859e-05", "loss": 0.6935, "slid_loss": 0.6893, "step": 3073, "time": 72.32 }, { "epoch": 2.38, "learning_rate": "3.1849e-05", "loss": 0.6956, "slid_loss": 0.6897, "step": 3074, "time": 72.56 }, { "epoch": 2.38, "learning_rate": "3.1840e-05", "loss": 0.6643, "slid_loss": 0.6892, "step": 3075, "time": 72.25 }, { "epoch": 2.38, "learning_rate": "3.1830e-05", "loss": 0.6789, "slid_loss": 0.6893, "step": 3076, "time": 72.11 }, { "epoch": 2.38, "learning_rate": "3.1820e-05", "loss": 0.6796, "slid_loss": 0.6895, "step": 3077, "time": 71.26 }, { "epoch": 2.38, "learning_rate": "3.1810e-05", "loss": 0.6657, "slid_loss": 0.6891, "step": 3078, "time": 71.63 }, { "epoch": 2.38, "learning_rate": "3.1800e-05", "loss": 0.7028, "slid_loss": 0.689, "step": 3079, "time": 70.71 }, { "epoch": 2.38, "learning_rate": "3.1791e-05", "loss": 0.6314, "slid_loss": 0.6884, "step": 3080, "time": 71.44 }, { "epoch": 2.38, "learning_rate": "3.1781e-05", "loss": 0.6823, "slid_loss": 0.6885, "step": 3081, "time": 72.35 }, { "epoch": 2.38, "learning_rate": "3.1771e-05", "loss": 0.6894, "slid_loss": 0.6884, "step": 3082, "time": 71.23 }, { "epoch": 2.39, "learning_rate": "3.1761e-05", "loss": 0.67, "slid_loss": 0.688, "step": 3083, "time": 70.67 }, { "epoch": 2.39, "learning_rate": "3.1752e-05", "loss": 0.6499, "slid_loss": 0.6875, "step": 3084, "time": 71.14 }, { "epoch": 2.39, "learning_rate": "3.1742e-05", "loss": 0.6235, "slid_loss": 0.6866, "step": 3085, "time": 70.85 }, { "epoch": 2.39, "learning_rate": "3.1732e-05", "loss": 0.6551, "slid_loss": 0.686, "step": 3086, "time": 72.15 }, { "epoch": 2.39, "learning_rate": "3.1722e-05", "loss": 0.7064, "slid_loss": 0.6862, "step": 3087, "time": 71.28 }, { "epoch": 2.39, "learning_rate": "3.1712e-05", "loss": 0.6698, "slid_loss": 0.6864, "step": 3088, "time": 71.22 }, { "epoch": 2.39, "learning_rate": "3.1703e-05", "loss": 0.6952, "slid_loss": 0.6863, "step": 3089, "time": 73.2 }, { "epoch": 2.39, "learning_rate": "3.1693e-05", "loss": 0.7149, "slid_loss": 0.6864, "step": 3090, "time": 71.15 }, { "epoch": 2.39, "learning_rate": "3.1683e-05", "loss": 0.7062, "slid_loss": 0.6865, "step": 3091, "time": 71.2 }, { "epoch": 2.39, "learning_rate": "3.1673e-05", "loss": 0.6971, "slid_loss": 0.6864, "step": 3092, "time": 71.22 }, { "epoch": 2.39, "learning_rate": "3.1663e-05", "loss": 0.7082, "slid_loss": 0.6868, "step": 3093, "time": 71.29 }, { "epoch": 2.39, "learning_rate": "3.1654e-05", "loss": 0.7035, "slid_loss": 0.6869, "step": 3094, "time": 71.33 }, { "epoch": 2.39, "learning_rate": "3.1644e-05", "loss": 0.6815, "slid_loss": 0.6868, "step": 3095, "time": 71.8 }, { "epoch": 2.4, "learning_rate": "3.1634e-05", "loss": 0.6653, "slid_loss": 0.6864, "step": 3096, "time": 71.64 }, { "epoch": 2.4, "learning_rate": "3.1624e-05", "loss": 0.6569, "slid_loss": 0.6861, "step": 3097, "time": 72.74 }, { "epoch": 2.4, "learning_rate": "3.1614e-05", "loss": 0.6594, "slid_loss": 0.6859, "step": 3098, "time": 72.73 }, { "epoch": 2.4, "learning_rate": "3.1605e-05", "loss": 0.7643, "slid_loss": 0.6865, "step": 3099, "time": 73.24 }, { "epoch": 2.4, "learning_rate": "3.1595e-05", "loss": 0.7191, "slid_loss": 0.6866, "step": 3100, "time": 70.88 }, { "epoch": 2.4, "learning_rate": "3.1585e-05", "loss": 0.7039, "slid_loss": 0.6867, "step": 3101, "time": 71.69 }, { "epoch": 2.4, "learning_rate": "3.1575e-05", "loss": 0.6921, "slid_loss": 0.6864, "step": 3102, "time": 72.33 }, { "epoch": 2.4, "learning_rate": "3.1566e-05", "loss": 0.6782, "slid_loss": 0.6864, "step": 3103, "time": 72.35 }, { "epoch": 2.4, "learning_rate": "3.1556e-05", "loss": 0.6694, "slid_loss": 0.6864, "step": 3104, "time": 70.62 }, { "epoch": 2.4, "learning_rate": "3.1546e-05", "loss": 0.7021, "slid_loss": 0.6865, "step": 3105, "time": 71.54 }, { "epoch": 2.4, "learning_rate": "3.1536e-05", "loss": 0.647, "slid_loss": 0.6863, "step": 3106, "time": 71.43 }, { "epoch": 2.4, "learning_rate": "3.1526e-05", "loss": 0.6653, "slid_loss": 0.686, "step": 3107, "time": 72.38 }, { "epoch": 2.4, "learning_rate": "3.1517e-05", "loss": 0.6806, "slid_loss": 0.6858, "step": 3108, "time": 71.63 }, { "epoch": 2.41, "learning_rate": "3.1507e-05", "loss": 0.6587, "slid_loss": 0.6857, "step": 3109, "time": 70.49 }, { "epoch": 2.41, "learning_rate": "3.1497e-05", "loss": 0.6981, "slid_loss": 0.6858, "step": 3110, "time": 70.65 }, { "epoch": 2.41, "learning_rate": "3.1487e-05", "loss": 0.6425, "slid_loss": 0.6853, "step": 3111, "time": 71.07 }, { "epoch": 2.41, "learning_rate": "3.1477e-05", "loss": 0.7071, "slid_loss": 0.6852, "step": 3112, "time": 70.88 }, { "epoch": 2.41, "learning_rate": "3.1468e-05", "loss": 0.7163, "slid_loss": 0.6854, "step": 3113, "time": 71.26 }, { "epoch": 2.41, "learning_rate": "3.1458e-05", "loss": 0.6798, "slid_loss": 0.6853, "step": 3114, "time": 70.96 }, { "epoch": 2.41, "learning_rate": "3.1448e-05", "loss": 0.6999, "slid_loss": 0.6854, "step": 3115, "time": 71.89 }, { "epoch": 2.41, "learning_rate": "3.1438e-05", "loss": 0.6902, "slid_loss": 0.6855, "step": 3116, "time": 71.66 }, { "epoch": 2.41, "learning_rate": "3.1428e-05", "loss": 0.6813, "slid_loss": 0.6853, "step": 3117, "time": 72.11 }, { "epoch": 2.41, "learning_rate": "3.1419e-05", "loss": 0.6777, "slid_loss": 0.6851, "step": 3118, "time": 72.25 }, { "epoch": 2.41, "learning_rate": "3.1409e-05", "loss": 0.6979, "slid_loss": 0.6854, "step": 3119, "time": 71.94 }, { "epoch": 2.41, "learning_rate": "3.1399e-05", "loss": 0.6497, "slid_loss": 0.6849, "step": 3120, "time": 71.41 }, { "epoch": 2.41, "learning_rate": "3.1389e-05", "loss": 0.6804, "slid_loss": 0.685, "step": 3121, "time": 71.05 }, { "epoch": 2.42, "learning_rate": "3.1379e-05", "loss": 0.7125, "slid_loss": 0.6851, "step": 3122, "time": 73.16 }, { "epoch": 2.42, "learning_rate": "3.1370e-05", "loss": 0.7056, "slid_loss": 0.6853, "step": 3123, "time": 72.45 }, { "epoch": 2.42, "learning_rate": "3.1360e-05", "loss": 0.6574, "slid_loss": 0.6846, "step": 3124, "time": 72.0 }, { "epoch": 2.42, "learning_rate": "3.1350e-05", "loss": 0.6865, "slid_loss": 0.6843, "step": 3125, "time": 73.81 }, { "epoch": 2.42, "learning_rate": "3.1340e-05", "loss": 0.7107, "slid_loss": 0.6846, "step": 3126, "time": 71.54 }, { "epoch": 2.42, "learning_rate": "3.1330e-05", "loss": 0.7016, "slid_loss": 0.685, "step": 3127, "time": 72.52 }, { "epoch": 2.42, "learning_rate": "3.1321e-05", "loss": 0.7002, "slid_loss": 0.6854, "step": 3128, "time": 73.52 }, { "epoch": 2.42, "learning_rate": "3.1311e-05", "loss": 0.6928, "slid_loss": 0.6855, "step": 3129, "time": 70.53 }, { "epoch": 2.42, "learning_rate": "3.1301e-05", "loss": 0.6946, "slid_loss": 0.6854, "step": 3130, "time": 70.87 }, { "epoch": 2.42, "learning_rate": "3.1291e-05", "loss": 0.6638, "slid_loss": 0.685, "step": 3131, "time": 70.3 }, { "epoch": 2.42, "learning_rate": "3.1281e-05", "loss": 0.6724, "slid_loss": 0.6848, "step": 3132, "time": 70.87 }, { "epoch": 2.42, "learning_rate": "3.1271e-05", "loss": 0.7107, "slid_loss": 0.6849, "step": 3133, "time": 70.45 }, { "epoch": 2.42, "learning_rate": "3.1262e-05", "loss": 0.6661, "slid_loss": 0.6847, "step": 3134, "time": 71.79 }, { "epoch": 2.43, "learning_rate": "3.1252e-05", "loss": 0.652, "slid_loss": 0.6846, "step": 3135, "time": 72.02 }, { "epoch": 2.43, "learning_rate": "3.1242e-05", "loss": 0.6605, "slid_loss": 0.6843, "step": 3136, "time": 71.91 }, { "epoch": 2.43, "learning_rate": "3.1232e-05", "loss": 0.6596, "slid_loss": 0.6841, "step": 3137, "time": 70.97 }, { "epoch": 2.43, "learning_rate": "3.1222e-05", "loss": 0.7109, "slid_loss": 0.6843, "step": 3138, "time": 71.76 }, { "epoch": 2.43, "learning_rate": "3.1213e-05", "loss": 0.7004, "slid_loss": 0.6844, "step": 3139, "time": 72.46 }, { "epoch": 2.43, "learning_rate": "3.1203e-05", "loss": 0.6638, "slid_loss": 0.6847, "step": 3140, "time": 71.37 }, { "epoch": 2.43, "learning_rate": "3.1193e-05", "loss": 0.681, "slid_loss": 0.6846, "step": 3141, "time": 71.16 }, { "epoch": 2.43, "learning_rate": "3.1183e-05", "loss": 0.6973, "slid_loss": 0.6846, "step": 3142, "time": 72.63 }, { "epoch": 2.43, "learning_rate": "3.1173e-05", "loss": 0.7061, "slid_loss": 0.6846, "step": 3143, "time": 70.35 }, { "epoch": 2.43, "learning_rate": "3.1164e-05", "loss": 0.6859, "slid_loss": 0.6846, "step": 3144, "time": 72.96 }, { "epoch": 2.43, "learning_rate": "3.1154e-05", "loss": 0.6886, "slid_loss": 0.6852, "step": 3145, "time": 71.48 }, { "epoch": 2.43, "learning_rate": "3.1144e-05", "loss": 0.6963, "slid_loss": 0.6856, "step": 3146, "time": 72.51 }, { "epoch": 2.43, "learning_rate": "3.1134e-05", "loss": 0.6665, "slid_loss": 0.6856, "step": 3147, "time": 72.66 }, { "epoch": 2.44, "learning_rate": "3.1124e-05", "loss": 0.7035, "slid_loss": 0.6857, "step": 3148, "time": 71.6 }, { "epoch": 2.44, "learning_rate": "3.1115e-05", "loss": 0.6858, "slid_loss": 0.6855, "step": 3149, "time": 71.34 }, { "epoch": 2.44, "learning_rate": "3.1105e-05", "loss": 0.6602, "slid_loss": 0.6857, "step": 3150, "time": 70.83 }, { "epoch": 2.44, "learning_rate": "3.1095e-05", "loss": 0.6591, "slid_loss": 0.6856, "step": 3151, "time": 72.69 }, { "epoch": 2.44, "learning_rate": "3.1085e-05", "loss": 0.6942, "slid_loss": 0.6856, "step": 3152, "time": 72.7 }, { "epoch": 2.44, "learning_rate": "3.1075e-05", "loss": 0.6725, "slid_loss": 0.6852, "step": 3153, "time": 72.07 }, { "epoch": 2.44, "learning_rate": "3.1066e-05", "loss": 0.6852, "slid_loss": 0.685, "step": 3154, "time": 72.91 }, { "epoch": 2.44, "learning_rate": "3.1056e-05", "loss": 0.6602, "slid_loss": 0.6846, "step": 3155, "time": 70.56 }, { "epoch": 2.44, "learning_rate": "3.1046e-05", "loss": 0.7012, "slid_loss": 0.6845, "step": 3156, "time": 71.02 }, { "epoch": 2.44, "learning_rate": "3.1036e-05", "loss": 0.7212, "slid_loss": 0.6847, "step": 3157, "time": 72.06 }, { "epoch": 2.44, "learning_rate": "3.1026e-05", "loss": 0.6636, "slid_loss": 0.6848, "step": 3158, "time": 70.85 }, { "epoch": 2.44, "learning_rate": "3.1016e-05", "loss": 0.6716, "slid_loss": 0.6842, "step": 3159, "time": 71.17 }, { "epoch": 2.44, "learning_rate": "3.1007e-05", "loss": 0.656, "slid_loss": 0.6837, "step": 3160, "time": 71.55 }, { "epoch": 2.45, "learning_rate": "3.0997e-05", "loss": 0.6728, "slid_loss": 0.6835, "step": 3161, "time": 72.7 }, { "epoch": 2.45, "learning_rate": "3.0987e-05", "loss": 0.6674, "slid_loss": 0.6834, "step": 3162, "time": 71.09 }, { "epoch": 2.45, "learning_rate": "3.0977e-05", "loss": 0.6522, "slid_loss": 0.6833, "step": 3163, "time": 71.3 }, { "epoch": 2.45, "learning_rate": "3.0967e-05", "loss": 0.6851, "slid_loss": 0.6834, "step": 3164, "time": 71.19 }, { "epoch": 2.45, "learning_rate": "3.0958e-05", "loss": 0.6594, "slid_loss": 0.683, "step": 3165, "time": 72.86 }, { "epoch": 2.45, "learning_rate": "3.0948e-05", "loss": 0.6749, "slid_loss": 0.6829, "step": 3166, "time": 71.81 }, { "epoch": 2.45, "learning_rate": "3.0938e-05", "loss": 0.6919, "slid_loss": 0.683, "step": 3167, "time": 72.22 }, { "epoch": 2.45, "learning_rate": "3.0928e-05", "loss": 0.7227, "slid_loss": 0.6837, "step": 3168, "time": 102.86 }, { "epoch": 2.45, "learning_rate": "3.0918e-05", "loss": 0.6848, "slid_loss": 0.6836, "step": 3169, "time": 71.06 }, { "epoch": 2.45, "learning_rate": "3.0909e-05", "loss": 0.6669, "slid_loss": 0.6831, "step": 3170, "time": 85.21 }, { "epoch": 2.45, "learning_rate": "3.0899e-05", "loss": 0.6936, "slid_loss": 0.6832, "step": 3171, "time": 83.63 }, { "epoch": 2.45, "learning_rate": "3.0889e-05", "loss": 0.6949, "slid_loss": 0.6829, "step": 3172, "time": 89.78 }, { "epoch": 2.45, "learning_rate": "3.0879e-05", "loss": 0.6908, "slid_loss": 0.6829, "step": 3173, "time": 98.19 }, { "epoch": 2.46, "learning_rate": "3.0869e-05", "loss": 0.6498, "slid_loss": 0.6824, "step": 3174, "time": 121.31 }, { "epoch": 2.46, "learning_rate": "3.0859e-05", "loss": 0.7114, "slid_loss": 0.6829, "step": 3175, "time": 125.39 }, { "epoch": 2.46, "learning_rate": "3.0850e-05", "loss": 0.6961, "slid_loss": 0.6831, "step": 3176, "time": 135.3 }, { "epoch": 2.46, "learning_rate": "3.0840e-05", "loss": 0.6994, "slid_loss": 0.6833, "step": 3177, "time": 146.28 }, { "epoch": 2.46, "learning_rate": "3.0830e-05", "loss": 0.6757, "slid_loss": 0.6834, "step": 3178, "time": 171.04 }, { "epoch": 2.46, "learning_rate": "3.0820e-05", "loss": 0.6732, "slid_loss": 0.6831, "step": 3179, "time": 133.51 }, { "epoch": 2.46, "learning_rate": "3.0810e-05", "loss": 0.688, "slid_loss": 0.6837, "step": 3180, "time": 118.96 }, { "epoch": 2.46, "learning_rate": "3.0801e-05", "loss": 0.7397, "slid_loss": 0.6842, "step": 3181, "time": 116.69 }, { "epoch": 2.46, "learning_rate": "3.0791e-05", "loss": 0.6436, "slid_loss": 0.6838, "step": 3182, "time": 141.27 }, { "epoch": 2.46, "learning_rate": "3.0781e-05", "loss": 0.7006, "slid_loss": 0.6841, "step": 3183, "time": 111.34 }, { "epoch": 2.46, "learning_rate": "3.0771e-05", "loss": 0.6715, "slid_loss": 0.6843, "step": 3184, "time": 135.75 }, { "epoch": 2.46, "learning_rate": "3.0761e-05", "loss": 0.6897, "slid_loss": 0.685, "step": 3185, "time": 106.48 }, { "epoch": 2.46, "learning_rate": "3.0751e-05", "loss": 0.6866, "slid_loss": 0.6853, "step": 3186, "time": 83.27 }, { "epoch": 2.47, "learning_rate": "3.0742e-05", "loss": 0.6645, "slid_loss": 0.6848, "step": 3187, "time": 92.46 }, { "epoch": 2.47, "learning_rate": "3.0732e-05", "loss": 0.7014, "slid_loss": 0.6852, "step": 3188, "time": 83.17 }, { "epoch": 2.47, "learning_rate": "3.0722e-05", "loss": 0.6931, "slid_loss": 0.6851, "step": 3189, "time": 84.01 }, { "epoch": 2.47, "learning_rate": "3.0712e-05", "loss": 0.6684, "slid_loss": 0.6847, "step": 3190, "time": 70.81 }, { "epoch": 2.47, "learning_rate": "3.0702e-05", "loss": 0.6542, "slid_loss": 0.6842, "step": 3191, "time": 71.73 }, { "epoch": 2.47, "learning_rate": "3.0693e-05", "loss": 0.6634, "slid_loss": 0.6838, "step": 3192, "time": 72.39 }, { "epoch": 2.47, "learning_rate": "3.0683e-05", "loss": 0.6852, "slid_loss": 0.6836, "step": 3193, "time": 71.63 }, { "epoch": 2.47, "learning_rate": "3.0673e-05", "loss": 0.7084, "slid_loss": 0.6836, "step": 3194, "time": 72.33 }, { "epoch": 2.47, "learning_rate": "3.0663e-05", "loss": 0.674, "slid_loss": 0.6836, "step": 3195, "time": 72.19 }, { "epoch": 2.47, "learning_rate": "3.0653e-05", "loss": 0.6848, "slid_loss": 0.6838, "step": 3196, "time": 72.37 }, { "epoch": 2.47, "learning_rate": "3.0643e-05", "loss": 0.6738, "slid_loss": 0.6839, "step": 3197, "time": 71.72 }, { "epoch": 2.47, "learning_rate": "3.0634e-05", "loss": 0.6936, "slid_loss": 0.6843, "step": 3198, "time": 71.48 }, { "epoch": 2.48, "learning_rate": "3.0624e-05", "loss": 0.6562, "slid_loss": 0.6832, "step": 3199, "time": 71.4 }, { "epoch": 2.48, "learning_rate": "3.0614e-05", "loss": 0.699, "slid_loss": 0.683, "step": 3200, "time": 71.63 }, { "epoch": 2.48, "learning_rate": "3.0604e-05", "loss": 0.6953, "slid_loss": 0.6829, "step": 3201, "time": 773.47 }, { "epoch": 2.48, "learning_rate": "3.0594e-05", "loss": 0.7022, "slid_loss": 0.683, "step": 3202, "time": 71.02 }, { "epoch": 2.48, "learning_rate": "3.0585e-05", "loss": 0.7228, "slid_loss": 0.6835, "step": 3203, "time": 70.85 }, { "epoch": 2.48, "learning_rate": "3.0575e-05", "loss": 0.6478, "slid_loss": 0.6832, "step": 3204, "time": 72.01 }, { "epoch": 2.48, "learning_rate": "3.0565e-05", "loss": 0.7072, "slid_loss": 0.6833, "step": 3205, "time": 71.81 }, { "epoch": 2.48, "learning_rate": "3.0555e-05", "loss": 0.6646, "slid_loss": 0.6835, "step": 3206, "time": 71.43 }, { "epoch": 2.48, "learning_rate": "3.0545e-05", "loss": 0.7456, "slid_loss": 0.6843, "step": 3207, "time": 70.76 }, { "epoch": 2.48, "learning_rate": "3.0535e-05", "loss": 0.6742, "slid_loss": 0.6842, "step": 3208, "time": 71.62 }, { "epoch": 2.48, "learning_rate": "3.0526e-05", "loss": 0.6622, "slid_loss": 0.6842, "step": 3209, "time": 72.25 }, { "epoch": 2.48, "learning_rate": "3.0516e-05", "loss": 0.6722, "slid_loss": 0.684, "step": 3210, "time": 70.79 }, { "epoch": 2.48, "learning_rate": "3.0506e-05", "loss": 0.6802, "slid_loss": 0.6844, "step": 3211, "time": 71.67 }, { "epoch": 2.49, "learning_rate": "3.0496e-05", "loss": 0.7078, "slid_loss": 0.6844, "step": 3212, "time": 71.08 }, { "epoch": 2.49, "learning_rate": "3.0486e-05", "loss": 0.6928, "slid_loss": 0.6841, "step": 3213, "time": 71.8 }, { "epoch": 2.49, "learning_rate": "3.0476e-05", "loss": 0.7406, "slid_loss": 0.6847, "step": 3214, "time": 74.48 }, { "epoch": 2.49, "learning_rate": "3.0467e-05", "loss": 0.7008, "slid_loss": 0.6847, "step": 3215, "time": 71.77 }, { "epoch": 2.49, "learning_rate": "3.0457e-05", "loss": 0.6613, "slid_loss": 0.6845, "step": 3216, "time": 72.45 }, { "epoch": 2.49, "learning_rate": "3.0447e-05", "loss": 0.6977, "slid_loss": 0.6846, "step": 3217, "time": 72.46 }, { "epoch": 2.49, "learning_rate": "3.0437e-05", "loss": 0.6572, "slid_loss": 0.6844, "step": 3218, "time": 70.85 }, { "epoch": 2.49, "learning_rate": "3.0427e-05", "loss": 0.6917, "slid_loss": 0.6844, "step": 3219, "time": 73.78 }, { "epoch": 2.49, "learning_rate": "3.0418e-05", "loss": 0.723, "slid_loss": 0.6851, "step": 3220, "time": 72.8 }, { "epoch": 2.49, "learning_rate": "3.0408e-05", "loss": 0.6978, "slid_loss": 0.6853, "step": 3221, "time": 72.12 }, { "epoch": 2.49, "learning_rate": "3.0398e-05", "loss": 0.6831, "slid_loss": 0.685, "step": 3222, "time": 72.44 }, { "epoch": 2.49, "learning_rate": "3.0388e-05", "loss": 0.6789, "slid_loss": 0.6847, "step": 3223, "time": 70.72 }, { "epoch": 2.49, "learning_rate": "3.0378e-05", "loss": 0.7163, "slid_loss": 0.6853, "step": 3224, "time": 72.68 }, { "epoch": 2.5, "learning_rate": "3.0368e-05", "loss": 0.6497, "slid_loss": 0.6849, "step": 3225, "time": 71.34 }, { "epoch": 2.5, "learning_rate": "3.0359e-05", "loss": 0.671, "slid_loss": 0.6845, "step": 3226, "time": 70.72 }, { "epoch": 2.5, "learning_rate": "3.0349e-05", "loss": 0.7217, "slid_loss": 0.6847, "step": 3227, "time": 71.58 }, { "epoch": 2.5, "learning_rate": "3.0339e-05", "loss": 0.6458, "slid_loss": 0.6842, "step": 3228, "time": 72.19 }, { "epoch": 2.5, "learning_rate": "3.0329e-05", "loss": 0.7284, "slid_loss": 0.6845, "step": 3229, "time": 71.01 }, { "epoch": 2.5, "learning_rate": "3.0319e-05", "loss": 0.6734, "slid_loss": 0.6843, "step": 3230, "time": 72.04 }, { "epoch": 2.5, "learning_rate": "3.0309e-05", "loss": 0.688, "slid_loss": 0.6846, "step": 3231, "time": 71.87 }, { "epoch": 2.5, "learning_rate": "3.0300e-05", "loss": 0.6644, "slid_loss": 0.6845, "step": 3232, "time": 70.83 }, { "epoch": 2.5, "learning_rate": "3.0290e-05", "loss": 0.702, "slid_loss": 0.6844, "step": 3233, "time": 71.8 }, { "epoch": 2.5, "learning_rate": "3.0280e-05", "loss": 0.6843, "slid_loss": 0.6846, "step": 3234, "time": 70.82 }, { "epoch": 2.5, "learning_rate": "3.0270e-05", "loss": 0.6912, "slid_loss": 0.685, "step": 3235, "time": 72.23 }, { "epoch": 2.5, "learning_rate": "3.0260e-05", "loss": 0.6973, "slid_loss": 0.6853, "step": 3236, "time": 71.0 }, { "epoch": 2.5, "learning_rate": "3.0251e-05", "loss": 0.6441, "slid_loss": 0.6852, "step": 3237, "time": 71.58 }, { "epoch": 2.51, "learning_rate": "3.0241e-05", "loss": 0.6453, "slid_loss": 0.6845, "step": 3238, "time": 70.32 }, { "epoch": 2.51, "learning_rate": "3.0231e-05", "loss": 0.6418, "slid_loss": 0.6839, "step": 3239, "time": 71.65 }, { "epoch": 2.51, "learning_rate": "3.0221e-05", "loss": 0.6795, "slid_loss": 0.6841, "step": 3240, "time": 72.87 }, { "epoch": 2.51, "learning_rate": "3.0211e-05", "loss": 0.704, "slid_loss": 0.6843, "step": 3241, "time": 72.22 }, { "epoch": 2.51, "learning_rate": "3.0201e-05", "loss": 0.7063, "slid_loss": 0.6844, "step": 3242, "time": 71.87 }, { "epoch": 2.51, "learning_rate": "3.0192e-05", "loss": 0.6686, "slid_loss": 0.684, "step": 3243, "time": 70.91 }, { "epoch": 2.51, "learning_rate": "3.0182e-05", "loss": 0.6804, "slid_loss": 0.684, "step": 3244, "time": 71.74 }, { "epoch": 2.51, "learning_rate": "3.0172e-05", "loss": 0.6965, "slid_loss": 0.6841, "step": 3245, "time": 71.6 }, { "epoch": 2.51, "learning_rate": "3.0162e-05", "loss": 0.677, "slid_loss": 0.6839, "step": 3246, "time": 71.61 }, { "epoch": 2.51, "learning_rate": "3.0152e-05", "loss": 0.6842, "slid_loss": 0.684, "step": 3247, "time": 72.68 }, { "epoch": 2.51, "learning_rate": "3.0142e-05", "loss": 0.7134, "slid_loss": 0.6841, "step": 3248, "time": 72.2 }, { "epoch": 2.51, "learning_rate": "3.0133e-05", "loss": 0.6605, "slid_loss": 0.6839, "step": 3249, "time": 72.59 }, { "epoch": 2.51, "learning_rate": "3.0123e-05", "loss": 0.6745, "slid_loss": 0.684, "step": 3250, "time": 71.69 }, { "epoch": 2.52, "learning_rate": "3.0113e-05", "loss": 0.6803, "slid_loss": 0.6842, "step": 3251, "time": 70.83 }, { "epoch": 2.52, "learning_rate": "3.0103e-05", "loss": 0.7078, "slid_loss": 0.6844, "step": 3252, "time": 70.59 }, { "epoch": 2.52, "learning_rate": "3.0093e-05", "loss": 0.7034, "slid_loss": 0.6847, "step": 3253, "time": 70.84 }, { "epoch": 2.52, "learning_rate": "3.0084e-05", "loss": 0.6904, "slid_loss": 0.6847, "step": 3254, "time": 71.79 }, { "epoch": 2.52, "learning_rate": "3.0074e-05", "loss": 0.6816, "slid_loss": 0.685, "step": 3255, "time": 71.04 }, { "epoch": 2.52, "learning_rate": "3.0064e-05", "loss": 0.6767, "slid_loss": 0.6847, "step": 3256, "time": 72.67 }, { "epoch": 2.52, "learning_rate": "3.0054e-05", "loss": 0.7107, "slid_loss": 0.6846, "step": 3257, "time": 73.59 }, { "epoch": 2.52, "learning_rate": "3.0044e-05", "loss": 0.6365, "slid_loss": 0.6843, "step": 3258, "time": 70.8 }, { "epoch": 2.52, "learning_rate": "3.0034e-05", "loss": 0.651, "slid_loss": 0.6841, "step": 3259, "time": 72.3 }, { "epoch": 2.52, "learning_rate": "3.0025e-05", "loss": 0.6811, "slid_loss": 0.6844, "step": 3260, "time": 72.95 }, { "epoch": 2.52, "learning_rate": "3.0015e-05", "loss": 0.7094, "slid_loss": 0.6848, "step": 3261, "time": 71.52 }, { "epoch": 2.52, "learning_rate": "3.0005e-05", "loss": 0.6525, "slid_loss": 0.6846, "step": 3262, "time": 71.66 }, { "epoch": 2.52, "learning_rate": "2.9995e-05", "loss": 0.6849, "slid_loss": 0.6849, "step": 3263, "time": 72.38 }, { "epoch": 2.53, "learning_rate": "2.9985e-05", "loss": 0.7028, "slid_loss": 0.6851, "step": 3264, "time": 71.9 }, { "epoch": 2.53, "learning_rate": "2.9975e-05", "loss": 0.6786, "slid_loss": 0.6853, "step": 3265, "time": 70.64 }, { "epoch": 2.53, "learning_rate": "2.9966e-05", "loss": 0.6979, "slid_loss": 0.6855, "step": 3266, "time": 71.16 }, { "epoch": 2.53, "learning_rate": "2.9956e-05", "loss": 0.6614, "slid_loss": 0.6852, "step": 3267, "time": 72.41 }, { "epoch": 2.53, "learning_rate": "2.9946e-05", "loss": 0.6995, "slid_loss": 0.685, "step": 3268, "time": 72.29 }, { "epoch": 2.53, "learning_rate": "2.9936e-05", "loss": 0.6704, "slid_loss": 0.6848, "step": 3269, "time": 72.37 }, { "epoch": 2.53, "learning_rate": "2.9926e-05", "loss": 0.7236, "slid_loss": 0.6854, "step": 3270, "time": 70.86 }, { "epoch": 2.53, "learning_rate": "2.9916e-05", "loss": 0.6679, "slid_loss": 0.6852, "step": 3271, "time": 72.31 }, { "epoch": 2.53, "learning_rate": "2.9907e-05", "loss": 0.6542, "slid_loss": 0.6847, "step": 3272, "time": 73.22 }, { "epoch": 2.53, "learning_rate": "2.9897e-05", "loss": 0.7081, "slid_loss": 0.6849, "step": 3273, "time": 71.33 }, { "epoch": 2.53, "learning_rate": "2.9887e-05", "loss": 0.6746, "slid_loss": 0.6852, "step": 3274, "time": 70.52 }, { "epoch": 2.53, "learning_rate": "2.9877e-05", "loss": 0.6787, "slid_loss": 0.6848, "step": 3275, "time": 72.35 }, { "epoch": 2.53, "learning_rate": "2.9867e-05", "loss": 0.6559, "slid_loss": 0.6844, "step": 3276, "time": 71.59 }, { "epoch": 2.54, "learning_rate": "2.9858e-05", "loss": 0.7011, "slid_loss": 0.6845, "step": 3277, "time": 72.08 }, { "epoch": 2.54, "learning_rate": "2.9848e-05", "loss": 0.6777, "slid_loss": 0.6845, "step": 3278, "time": 71.54 }, { "epoch": 2.54, "learning_rate": "2.9838e-05", "loss": 0.6529, "slid_loss": 0.6843, "step": 3279, "time": 71.68 }, { "epoch": 2.54, "learning_rate": "2.9828e-05", "loss": 0.6937, "slid_loss": 0.6843, "step": 3280, "time": 72.74 }, { "epoch": 2.54, "learning_rate": "2.9818e-05", "loss": 0.6757, "slid_loss": 0.6837, "step": 3281, "time": 71.35 }, { "epoch": 2.54, "learning_rate": "2.9808e-05", "loss": 0.7039, "slid_loss": 0.6843, "step": 3282, "time": 72.1 }, { "epoch": 2.54, "learning_rate": "2.9799e-05", "loss": 0.7035, "slid_loss": 0.6843, "step": 3283, "time": 71.31 }, { "epoch": 2.54, "learning_rate": "2.9789e-05", "loss": 0.6807, "slid_loss": 0.6844, "step": 3284, "time": 71.06 }, { "epoch": 2.54, "learning_rate": "2.9779e-05", "loss": 0.7052, "slid_loss": 0.6846, "step": 3285, "time": 72.11 }, { "epoch": 2.54, "learning_rate": "2.9769e-05", "loss": 0.665, "slid_loss": 0.6844, "step": 3286, "time": 71.63 }, { "epoch": 2.54, "learning_rate": "2.9759e-05", "loss": 0.6715, "slid_loss": 0.6844, "step": 3287, "time": 71.99 }, { "epoch": 2.54, "learning_rate": "2.9749e-05", "loss": 0.678, "slid_loss": 0.6842, "step": 3288, "time": 71.95 }, { "epoch": 2.54, "learning_rate": "2.9740e-05", "loss": 0.7108, "slid_loss": 0.6844, "step": 3289, "time": 71.56 }, { "epoch": 2.55, "learning_rate": "2.9730e-05", "loss": 0.7133, "slid_loss": 0.6848, "step": 3290, "time": 71.09 }, { "epoch": 2.55, "learning_rate": "2.9720e-05", "loss": 0.7386, "slid_loss": 0.6857, "step": 3291, "time": 71.38 }, { "epoch": 2.55, "learning_rate": "2.9710e-05", "loss": 0.6868, "slid_loss": 0.6859, "step": 3292, "time": 70.89 }, { "epoch": 2.55, "learning_rate": "2.9700e-05", "loss": 0.6779, "slid_loss": 0.6858, "step": 3293, "time": 71.42 }, { "epoch": 2.55, "learning_rate": "2.9691e-05", "loss": 0.6829, "slid_loss": 0.6856, "step": 3294, "time": 71.98 }, { "epoch": 2.55, "learning_rate": "2.9681e-05", "loss": 0.6733, "slid_loss": 0.6856, "step": 3295, "time": 72.79 }, { "epoch": 2.55, "learning_rate": "2.9671e-05", "loss": 0.6935, "slid_loss": 0.6856, "step": 3296, "time": 71.19 }, { "epoch": 2.55, "learning_rate": "2.9661e-05", "loss": 0.6959, "slid_loss": 0.6859, "step": 3297, "time": 71.57 }, { "epoch": 2.55, "learning_rate": "2.9651e-05", "loss": 0.6844, "slid_loss": 0.6858, "step": 3298, "time": 71.21 }, { "epoch": 2.55, "learning_rate": "2.9641e-05", "loss": 0.7118, "slid_loss": 0.6863, "step": 3299, "time": 71.11 }, { "epoch": 2.55, "learning_rate": "2.9632e-05", "loss": 0.6569, "slid_loss": 0.6859, "step": 3300, "time": 71.25 }, { "epoch": 2.55, "learning_rate": "2.9622e-05", "loss": 0.6746, "slid_loss": 0.6857, "step": 3301, "time": 72.19 }, { "epoch": 2.55, "learning_rate": "2.9612e-05", "loss": 0.6468, "slid_loss": 0.6851, "step": 3302, "time": 72.0 }, { "epoch": 2.56, "learning_rate": "2.9602e-05", "loss": 0.6599, "slid_loss": 0.6845, "step": 3303, "time": 71.11 }, { "epoch": 2.56, "learning_rate": "2.9592e-05", "loss": 0.6521, "slid_loss": 0.6846, "step": 3304, "time": 72.42 }, { "epoch": 2.56, "learning_rate": "2.9582e-05", "loss": 0.6449, "slid_loss": 0.6839, "step": 3305, "time": 71.49 }, { "epoch": 2.56, "learning_rate": "2.9573e-05", "loss": 0.6768, "slid_loss": 0.6841, "step": 3306, "time": 72.65 }, { "epoch": 2.56, "learning_rate": "2.9563e-05", "loss": 0.6899, "slid_loss": 0.6835, "step": 3307, "time": 72.02 }, { "epoch": 2.56, "learning_rate": "2.9553e-05", "loss": 0.7149, "slid_loss": 0.6839, "step": 3308, "time": 71.87 }, { "epoch": 2.56, "learning_rate": "2.9543e-05", "loss": 0.6557, "slid_loss": 0.6838, "step": 3309, "time": 71.92 }, { "epoch": 2.56, "learning_rate": "2.9533e-05", "loss": 0.6852, "slid_loss": 0.684, "step": 3310, "time": 71.53 }, { "epoch": 2.56, "learning_rate": "2.9524e-05", "loss": 0.7012, "slid_loss": 0.6842, "step": 3311, "time": 71.47 }, { "epoch": 2.56, "learning_rate": "2.9514e-05", "loss": 0.6814, "slid_loss": 0.6839, "step": 3312, "time": 71.55 }, { "epoch": 2.56, "learning_rate": "2.9504e-05", "loss": 0.6901, "slid_loss": 0.6839, "step": 3313, "time": 71.28 }, { "epoch": 2.56, "learning_rate": "2.9494e-05", "loss": 0.691, "slid_loss": 0.6834, "step": 3314, "time": 72.74 }, { "epoch": 2.56, "learning_rate": "2.9484e-05", "loss": 0.6679, "slid_loss": 0.6831, "step": 3315, "time": 70.81 }, { "epoch": 2.57, "learning_rate": "2.9474e-05", "loss": 0.6831, "slid_loss": 0.6833, "step": 3316, "time": 71.38 }, { "epoch": 2.57, "learning_rate": "2.9465e-05", "loss": 0.6672, "slid_loss": 0.683, "step": 3317, "time": 92.62 }, { "epoch": 2.57, "learning_rate": "2.9455e-05", "loss": 0.6704, "slid_loss": 0.6831, "step": 3318, "time": 71.4 }, { "epoch": 2.57, "learning_rate": "2.9445e-05", "loss": 0.6829, "slid_loss": 0.683, "step": 3319, "time": 71.56 }, { "epoch": 2.57, "learning_rate": "2.9435e-05", "loss": 0.657, "slid_loss": 0.6824, "step": 3320, "time": 71.5 }, { "epoch": 2.57, "learning_rate": "2.9425e-05", "loss": 0.6497, "slid_loss": 0.6819, "step": 3321, "time": 71.68 }, { "epoch": 2.57, "learning_rate": "2.9415e-05", "loss": 0.6961, "slid_loss": 0.682, "step": 3322, "time": 71.81 }, { "epoch": 2.57, "learning_rate": "2.9406e-05", "loss": 0.6626, "slid_loss": 0.6819, "step": 3323, "time": 71.65 }, { "epoch": 2.57, "learning_rate": "2.9396e-05", "loss": 0.7344, "slid_loss": 0.682, "step": 3324, "time": 71.1 }, { "epoch": 2.57, "learning_rate": "2.9386e-05", "loss": 0.6415, "slid_loss": 0.682, "step": 3325, "time": 98.03 }, { "epoch": 2.57, "learning_rate": "2.9376e-05", "loss": 0.7189, "slid_loss": 0.6824, "step": 3326, "time": 70.18 }, { "epoch": 2.57, "learning_rate": "2.9366e-05", "loss": 0.6842, "slid_loss": 0.6821, "step": 3327, "time": 71.69 }, { "epoch": 2.57, "learning_rate": "2.9357e-05", "loss": 0.7025, "slid_loss": 0.6826, "step": 3328, "time": 83.8 }, { "epoch": 2.58, "learning_rate": "2.9347e-05", "loss": 0.7054, "slid_loss": 0.6824, "step": 3329, "time": 70.84 }, { "epoch": 2.58, "learning_rate": "2.9337e-05", "loss": 0.6456, "slid_loss": 0.6821, "step": 3330, "time": 71.58 }, { "epoch": 2.58, "learning_rate": "2.9327e-05", "loss": 0.6855, "slid_loss": 0.6821, "step": 3331, "time": 86.37 }, { "epoch": 2.58, "learning_rate": "2.9317e-05", "loss": 0.6633, "slid_loss": 0.6821, "step": 3332, "time": 110.63 }, { "epoch": 2.58, "learning_rate": "2.9307e-05", "loss": 0.6916, "slid_loss": 0.682, "step": 3333, "time": 126.93 }, { "epoch": 2.58, "learning_rate": "2.9298e-05", "loss": 0.651, "slid_loss": 0.6816, "step": 3334, "time": 121.34 }, { "epoch": 2.58, "learning_rate": "2.9288e-05", "loss": 0.6707, "slid_loss": 0.6814, "step": 3335, "time": 148.03 }, { "epoch": 2.58, "learning_rate": "2.9278e-05", "loss": 0.7247, "slid_loss": 0.6817, "step": 3336, "time": 145.81 }, { "epoch": 2.58, "learning_rate": "2.9268e-05", "loss": 0.6764, "slid_loss": 0.682, "step": 3337, "time": 159.67 }, { "epoch": 2.58, "learning_rate": "2.9258e-05", "loss": 0.7066, "slid_loss": 0.6827, "step": 3338, "time": 121.77 }, { "epoch": 2.58, "learning_rate": "2.9249e-05", "loss": 0.7214, "slid_loss": 0.6834, "step": 3339, "time": 116.81 }, { "epoch": 2.58, "learning_rate": "2.9239e-05", "loss": 0.6742, "slid_loss": 0.6834, "step": 3340, "time": 132.21 }, { "epoch": 2.58, "learning_rate": "2.9229e-05", "loss": 0.7132, "slid_loss": 0.6835, "step": 3341, "time": 118.33 }, { "epoch": 2.59, "learning_rate": "2.9219e-05", "loss": 0.6685, "slid_loss": 0.6831, "step": 3342, "time": 149.0 }, { "epoch": 2.59, "learning_rate": "2.9209e-05", "loss": 0.6803, "slid_loss": 0.6832, "step": 3343, "time": 130.61 }, { "epoch": 2.59, "learning_rate": "2.9199e-05", "loss": 0.6914, "slid_loss": 0.6833, "step": 3344, "time": 109.45 }, { "epoch": 2.59, "learning_rate": "2.9190e-05", "loss": 0.6971, "slid_loss": 0.6833, "step": 3345, "time": 83.08 }, { "epoch": 2.59, "learning_rate": "2.9180e-05", "loss": 0.6317, "slid_loss": 0.6829, "step": 3346, "time": 85.01 }, { "epoch": 2.59, "learning_rate": "2.9170e-05", "loss": 0.7015, "slid_loss": 0.6831, "step": 3347, "time": 92.71 }, { "epoch": 2.59, "learning_rate": "2.9160e-05", "loss": 0.6874, "slid_loss": 0.6828, "step": 3348, "time": 70.6 }, { "epoch": 2.59, "learning_rate": "2.9150e-05", "loss": 0.6928, "slid_loss": 0.6831, "step": 3349, "time": 72.0 }, { "epoch": 2.59, "learning_rate": "2.9141e-05", "loss": 0.6743, "slid_loss": 0.6831, "step": 3350, "time": 71.82 }, { "epoch": 2.59, "learning_rate": "2.9131e-05", "loss": 0.6576, "slid_loss": 0.6829, "step": 3351, "time": 72.15 }, { "epoch": 2.59, "learning_rate": "2.9121e-05", "loss": 0.6699, "slid_loss": 0.6825, "step": 3352, "time": 71.78 }, { "epoch": 2.59, "learning_rate": "2.9111e-05", "loss": 0.7595, "slid_loss": 0.6831, "step": 3353, "time": 72.24 }, { "epoch": 2.59, "learning_rate": "2.9101e-05", "loss": 0.7132, "slid_loss": 0.6833, "step": 3354, "time": 74.28 }, { "epoch": 2.6, "learning_rate": "2.9091e-05", "loss": 0.652, "slid_loss": 0.683, "step": 3355, "time": 73.06 }, { "epoch": 2.6, "learning_rate": "2.9082e-05", "loss": 0.6979, "slid_loss": 0.6832, "step": 3356, "time": 72.68 }, { "epoch": 2.6, "learning_rate": "2.9072e-05", "loss": 0.6892, "slid_loss": 0.683, "step": 3357, "time": 72.25 }, { "epoch": 2.6, "learning_rate": "2.9062e-05", "loss": 0.6752, "slid_loss": 0.6834, "step": 3358, "time": 72.77 }, { "epoch": 2.6, "learning_rate": "2.9052e-05", "loss": 0.671, "slid_loss": 0.6836, "step": 3359, "time": 72.21 }, { "epoch": 2.6, "learning_rate": "2.9042e-05", "loss": 0.6892, "slid_loss": 0.6837, "step": 3360, "time": 71.37 }, { "epoch": 2.6, "learning_rate": "2.9033e-05", "loss": 0.6506, "slid_loss": 0.6831, "step": 3361, "time": 70.84 }, { "epoch": 2.6, "learning_rate": "2.9023e-05", "loss": 0.6752, "slid_loss": 0.6833, "step": 3362, "time": 72.59 }, { "epoch": 2.6, "learning_rate": "2.9013e-05", "loss": 0.6355, "slid_loss": 0.6828, "step": 3363, "time": 72.2 }, { "epoch": 2.6, "learning_rate": "2.9003e-05", "loss": 0.6885, "slid_loss": 0.6827, "step": 3364, "time": 72.09 }, { "epoch": 2.6, "learning_rate": "2.8993e-05", "loss": 0.6678, "slid_loss": 0.6826, "step": 3365, "time": 71.61 }, { "epoch": 2.6, "learning_rate": "2.8984e-05", "loss": 0.7073, "slid_loss": 0.6827, "step": 3366, "time": 71.99 }, { "epoch": 2.61, "learning_rate": "2.8974e-05", "loss": 0.717, "slid_loss": 0.6832, "step": 3367, "time": 70.06 }, { "epoch": 2.61, "learning_rate": "2.8964e-05", "loss": 0.6662, "slid_loss": 0.6829, "step": 3368, "time": 70.77 }, { "epoch": 2.61, "learning_rate": "2.8954e-05", "loss": 0.6888, "slid_loss": 0.6831, "step": 3369, "time": 70.76 }, { "epoch": 2.61, "learning_rate": "2.8944e-05", "loss": 0.7134, "slid_loss": 0.683, "step": 3370, "time": 71.95 }, { "epoch": 2.61, "learning_rate": "2.8934e-05", "loss": 0.6614, "slid_loss": 0.6829, "step": 3371, "time": 71.61 }, { "epoch": 2.61, "learning_rate": "2.8925e-05", "loss": 0.6865, "slid_loss": 0.6832, "step": 3372, "time": 70.9 }, { "epoch": 2.61, "learning_rate": "2.8915e-05", "loss": 0.6709, "slid_loss": 0.6829, "step": 3373, "time": 71.49 }, { "epoch": 2.61, "learning_rate": "2.8905e-05", "loss": 0.6636, "slid_loss": 0.6827, "step": 3374, "time": 71.64 }, { "epoch": 2.61, "learning_rate": "2.8895e-05", "loss": 0.6466, "slid_loss": 0.6824, "step": 3375, "time": 72.51 }, { "epoch": 2.61, "learning_rate": "2.8885e-05", "loss": 0.7034, "slid_loss": 0.6829, "step": 3376, "time": 71.5 }, { "epoch": 2.61, "learning_rate": "2.8876e-05", "loss": 0.7193, "slid_loss": 0.6831, "step": 3377, "time": 70.8 }, { "epoch": 2.61, "learning_rate": "2.8866e-05", "loss": 0.6702, "slid_loss": 0.683, "step": 3378, "time": 72.58 }, { "epoch": 2.61, "learning_rate": "2.8856e-05", "loss": 0.6563, "slid_loss": 0.683, "step": 3379, "time": 73.6 }, { "epoch": 2.62, "learning_rate": "2.8846e-05", "loss": 0.6685, "slid_loss": 0.6828, "step": 3380, "time": 71.09 }, { "epoch": 2.62, "learning_rate": "2.8836e-05", "loss": 0.6875, "slid_loss": 0.6829, "step": 3381, "time": 71.91 }, { "epoch": 2.62, "learning_rate": "2.8827e-05", "loss": 0.6567, "slid_loss": 0.6824, "step": 3382, "time": 72.66 }, { "epoch": 2.62, "learning_rate": "2.8817e-05", "loss": 0.6926, "slid_loss": 0.6823, "step": 3383, "time": 71.08 }, { "epoch": 2.62, "learning_rate": "2.8807e-05", "loss": 0.6702, "slid_loss": 0.6822, "step": 3384, "time": 71.35 }, { "epoch": 2.62, "learning_rate": "2.8797e-05", "loss": 0.7004, "slid_loss": 0.6822, "step": 3385, "time": 71.48 }, { "epoch": 2.62, "learning_rate": "2.8787e-05", "loss": 0.6728, "slid_loss": 0.6822, "step": 3386, "time": 72.69 }, { "epoch": 2.62, "learning_rate": "2.8778e-05", "loss": 0.7077, "slid_loss": 0.6826, "step": 3387, "time": 72.35 }, { "epoch": 2.62, "learning_rate": "2.8768e-05", "loss": 0.6663, "slid_loss": 0.6825, "step": 3388, "time": 71.73 }, { "epoch": 2.62, "learning_rate": "2.8758e-05", "loss": 0.6871, "slid_loss": 0.6823, "step": 3389, "time": 72.41 }, { "epoch": 2.62, "learning_rate": "2.8748e-05", "loss": 0.6516, "slid_loss": 0.6816, "step": 3390, "time": 71.65 }, { "epoch": 2.62, "learning_rate": "2.8738e-05", "loss": 0.7009, "slid_loss": 0.6813, "step": 3391, "time": 73.41 }, { "epoch": 2.62, "learning_rate": "2.8729e-05", "loss": 0.6878, "slid_loss": 0.6813, "step": 3392, "time": 71.06 }, { "epoch": 2.63, "learning_rate": "2.8719e-05", "loss": 0.6972, "slid_loss": 0.6815, "step": 3393, "time": 71.93 }, { "epoch": 2.63, "learning_rate": "2.8709e-05", "loss": 0.6863, "slid_loss": 0.6815, "step": 3394, "time": 72.01 }, { "epoch": 2.63, "learning_rate": "2.8699e-05", "loss": 0.6618, "slid_loss": 0.6814, "step": 3395, "time": 72.66 }, { "epoch": 2.63, "learning_rate": "2.8689e-05", "loss": 0.7134, "slid_loss": 0.6816, "step": 3396, "time": 71.21 }, { "epoch": 2.63, "learning_rate": "2.8679e-05", "loss": 0.7033, "slid_loss": 0.6817, "step": 3397, "time": 72.5 }, { "epoch": 2.63, "learning_rate": "2.8670e-05", "loss": 0.7321, "slid_loss": 0.6821, "step": 3398, "time": 72.1 }, { "epoch": 2.63, "learning_rate": "2.8660e-05", "loss": 0.6721, "slid_loss": 0.6817, "step": 3399, "time": 71.71 }, { "epoch": 2.63, "learning_rate": "2.8650e-05", "loss": 0.6824, "slid_loss": 0.682, "step": 3400, "time": 71.08 }, { "epoch": 2.63, "learning_rate": "2.8640e-05", "loss": 0.6755, "slid_loss": 0.682, "step": 3401, "time": 853.15 }, { "epoch": 2.63, "learning_rate": "2.8630e-05", "loss": 0.6622, "slid_loss": 0.6822, "step": 3402, "time": 71.92 }, { "epoch": 2.63, "learning_rate": "2.8621e-05", "loss": 0.7103, "slid_loss": 0.6827, "step": 3403, "time": 72.4 }, { "epoch": 2.63, "learning_rate": "2.8611e-05", "loss": 0.6773, "slid_loss": 0.6829, "step": 3404, "time": 73.33 }, { "epoch": 2.63, "learning_rate": "2.8601e-05", "loss": 0.6448, "slid_loss": 0.6829, "step": 3405, "time": 73.06 }, { "epoch": 2.64, "learning_rate": "2.8591e-05", "loss": 0.6808, "slid_loss": 0.6829, "step": 3406, "time": 71.23 }, { "epoch": 2.64, "learning_rate": "2.8581e-05", "loss": 0.6351, "slid_loss": 0.6824, "step": 3407, "time": 70.84 }, { "epoch": 2.64, "learning_rate": "2.8572e-05", "loss": 0.6746, "slid_loss": 0.682, "step": 3408, "time": 73.48 }, { "epoch": 2.64, "learning_rate": "2.8562e-05", "loss": 0.6777, "slid_loss": 0.6822, "step": 3409, "time": 72.66 }, { "epoch": 2.64, "learning_rate": "2.8552e-05", "loss": 0.6441, "slid_loss": 0.6818, "step": 3410, "time": 73.15 }, { "epoch": 2.64, "learning_rate": "2.8542e-05", "loss": 0.6563, "slid_loss": 0.6814, "step": 3411, "time": 72.49 }, { "epoch": 2.64, "learning_rate": "2.8532e-05", "loss": 0.7339, "slid_loss": 0.6819, "step": 3412, "time": 71.43 }, { "epoch": 2.64, "learning_rate": "2.8523e-05", "loss": 0.6527, "slid_loss": 0.6815, "step": 3413, "time": 71.88 }, { "epoch": 2.64, "learning_rate": "2.8513e-05", "loss": 0.6597, "slid_loss": 0.6812, "step": 3414, "time": 72.57 }, { "epoch": 2.64, "learning_rate": "2.8503e-05", "loss": 0.6884, "slid_loss": 0.6814, "step": 3415, "time": 72.25 }, { "epoch": 2.64, "learning_rate": "2.8493e-05", "loss": 0.6796, "slid_loss": 0.6814, "step": 3416, "time": 72.05 }, { "epoch": 2.64, "learning_rate": "2.8483e-05", "loss": 0.6656, "slid_loss": 0.6813, "step": 3417, "time": 71.15 }, { "epoch": 2.64, "learning_rate": "2.8474e-05", "loss": 0.6961, "slid_loss": 0.6816, "step": 3418, "time": 71.75 }, { "epoch": 2.65, "learning_rate": "2.8464e-05", "loss": 0.6854, "slid_loss": 0.6816, "step": 3419, "time": 71.06 }, { "epoch": 2.65, "learning_rate": "2.8454e-05", "loss": 0.6482, "slid_loss": 0.6815, "step": 3420, "time": 72.31 }, { "epoch": 2.65, "learning_rate": "2.8444e-05", "loss": 0.6468, "slid_loss": 0.6815, "step": 3421, "time": 71.22 }, { "epoch": 2.65, "learning_rate": "2.8434e-05", "loss": 0.7092, "slid_loss": 0.6816, "step": 3422, "time": 150.44 }, { "epoch": 2.65, "learning_rate": "2.8425e-05", "loss": 0.7323, "slid_loss": 0.6823, "step": 3423, "time": 71.93 }, { "epoch": 2.65, "learning_rate": "2.8415e-05", "loss": 0.6847, "slid_loss": 0.6818, "step": 3424, "time": 70.74 }, { "epoch": 2.65, "learning_rate": "2.8405e-05", "loss": 0.6601, "slid_loss": 0.682, "step": 3425, "time": 72.67 }, { "epoch": 2.65, "learning_rate": "2.8395e-05", "loss": 0.7103, "slid_loss": 0.6819, "step": 3426, "time": 71.79 }, { "epoch": 2.65, "learning_rate": "2.8386e-05", "loss": 0.7026, "slid_loss": 0.6821, "step": 3427, "time": 72.98 }, { "epoch": 2.65, "learning_rate": "2.8376e-05", "loss": 0.666, "slid_loss": 0.6818, "step": 3428, "time": 72.39 }, { "epoch": 2.65, "learning_rate": "2.8366e-05", "loss": 0.6848, "slid_loss": 0.6816, "step": 3429, "time": 71.98 }, { "epoch": 2.65, "learning_rate": "2.8356e-05", "loss": 0.6539, "slid_loss": 0.6816, "step": 3430, "time": 71.88 }, { "epoch": 2.65, "learning_rate": "2.8346e-05", "loss": 0.6775, "slid_loss": 0.6816, "step": 3431, "time": 72.55 }, { "epoch": 2.66, "learning_rate": "2.8337e-05", "loss": 0.7082, "slid_loss": 0.682, "step": 3432, "time": 71.2 }, { "epoch": 2.66, "learning_rate": "2.8327e-05", "loss": 0.6744, "slid_loss": 0.6818, "step": 3433, "time": 71.81 }, { "epoch": 2.66, "learning_rate": "2.8317e-05", "loss": 0.6718, "slid_loss": 0.682, "step": 3434, "time": 71.3 }, { "epoch": 2.66, "learning_rate": "2.8307e-05", "loss": 0.6404, "slid_loss": 0.6817, "step": 3435, "time": 71.66 }, { "epoch": 2.66, "learning_rate": "2.8297e-05", "loss": 0.7003, "slid_loss": 0.6815, "step": 3436, "time": 72.04 }, { "epoch": 2.66, "learning_rate": "2.8288e-05", "loss": 0.6782, "slid_loss": 0.6815, "step": 3437, "time": 71.9 }, { "epoch": 2.66, "learning_rate": "2.8278e-05", "loss": 0.6949, "slid_loss": 0.6814, "step": 3438, "time": 71.45 }, { "epoch": 2.66, "learning_rate": "2.8268e-05", "loss": 0.6536, "slid_loss": 0.6807, "step": 3439, "time": 72.38 }, { "epoch": 2.66, "learning_rate": "2.8258e-05", "loss": 0.702, "slid_loss": 0.681, "step": 3440, "time": 70.85 }, { "epoch": 2.66, "learning_rate": "2.8248e-05", "loss": 0.6571, "slid_loss": 0.6804, "step": 3441, "time": 71.73 }, { "epoch": 2.66, "learning_rate": "2.8239e-05", "loss": 0.6629, "slid_loss": 0.6804, "step": 3442, "time": 71.06 }, { "epoch": 2.66, "learning_rate": "2.8229e-05", "loss": 0.6676, "slid_loss": 0.6803, "step": 3443, "time": 71.79 }, { "epoch": 2.66, "learning_rate": "2.8219e-05", "loss": 0.6638, "slid_loss": 0.68, "step": 3444, "time": 71.86 }, { "epoch": 2.67, "learning_rate": "2.8209e-05", "loss": 0.6602, "slid_loss": 0.6796, "step": 3445, "time": 71.93 }, { "epoch": 2.67, "learning_rate": "2.8200e-05", "loss": 0.6932, "slid_loss": 0.6802, "step": 3446, "time": 71.55 }, { "epoch": 2.67, "learning_rate": "2.8190e-05", "loss": 0.6824, "slid_loss": 0.68, "step": 3447, "time": 70.46 }, { "epoch": 2.67, "learning_rate": "2.8180e-05", "loss": 0.6994, "slid_loss": 0.6802, "step": 3448, "time": 71.58 }, { "epoch": 2.67, "learning_rate": "2.8170e-05", "loss": 0.6695, "slid_loss": 0.6799, "step": 3449, "time": 71.72 }, { "epoch": 2.67, "learning_rate": "2.8160e-05", "loss": 0.6678, "slid_loss": 0.6799, "step": 3450, "time": 72.55 }, { "epoch": 2.67, "learning_rate": "2.8151e-05", "loss": 0.6591, "slid_loss": 0.6799, "step": 3451, "time": 71.58 }, { "epoch": 2.67, "learning_rate": "2.8141e-05", "loss": 0.6613, "slid_loss": 0.6798, "step": 3452, "time": 72.47 }, { "epoch": 2.67, "learning_rate": "2.8131e-05", "loss": 0.66, "slid_loss": 0.6788, "step": 3453, "time": 70.24 }, { "epoch": 2.67, "learning_rate": "2.8121e-05", "loss": 0.6817, "slid_loss": 0.6785, "step": 3454, "time": 72.29 }, { "epoch": 2.67, "learning_rate": "2.8111e-05", "loss": 0.7139, "slid_loss": 0.6791, "step": 3455, "time": 72.26 }, { "epoch": 2.67, "learning_rate": "2.8102e-05", "loss": 0.6784, "slid_loss": 0.6789, "step": 3456, "time": 71.52 }, { "epoch": 2.67, "learning_rate": "2.8092e-05", "loss": 0.6942, "slid_loss": 0.679, "step": 3457, "time": 71.82 }, { "epoch": 2.68, "learning_rate": "2.8082e-05", "loss": 0.6449, "slid_loss": 0.6787, "step": 3458, "time": 71.04 }, { "epoch": 2.68, "learning_rate": "2.8072e-05", "loss": 0.6697, "slid_loss": 0.6786, "step": 3459, "time": 71.22 }, { "epoch": 2.68, "learning_rate": "2.8063e-05", "loss": 0.6827, "slid_loss": 0.6786, "step": 3460, "time": 71.1 }, { "epoch": 2.68, "learning_rate": "2.8053e-05", "loss": 0.6523, "slid_loss": 0.6786, "step": 3461, "time": 70.74 }, { "epoch": 2.68, "learning_rate": "2.8043e-05", "loss": 0.6398, "slid_loss": 0.6782, "step": 3462, "time": 72.54 }, { "epoch": 2.68, "learning_rate": "2.8033e-05", "loss": 0.6996, "slid_loss": 0.6789, "step": 3463, "time": 71.23 }, { "epoch": 2.68, "learning_rate": "2.8023e-05", "loss": 0.6731, "slid_loss": 0.6787, "step": 3464, "time": 72.79 }, { "epoch": 2.68, "learning_rate": "2.8014e-05", "loss": 0.6782, "slid_loss": 0.6788, "step": 3465, "time": 71.34 }, { "epoch": 2.68, "learning_rate": "2.8004e-05", "loss": 0.6858, "slid_loss": 0.6786, "step": 3466, "time": 72.29 }, { "epoch": 2.68, "learning_rate": "2.7994e-05", "loss": 0.6848, "slid_loss": 0.6783, "step": 3467, "time": 71.55 }, { "epoch": 2.68, "learning_rate": "2.7984e-05", "loss": 0.6467, "slid_loss": 0.6781, "step": 3468, "time": 72.9 }, { "epoch": 2.68, "learning_rate": "2.7975e-05", "loss": 0.6676, "slid_loss": 0.6779, "step": 3469, "time": 71.86 }, { "epoch": 2.68, "learning_rate": "2.7965e-05", "loss": 0.6971, "slid_loss": 0.6777, "step": 3470, "time": 71.95 }, { "epoch": 2.69, "learning_rate": "2.7955e-05", "loss": 0.6704, "slid_loss": 0.6778, "step": 3471, "time": 70.99 }, { "epoch": 2.69, "learning_rate": "2.7945e-05", "loss": 0.6952, "slid_loss": 0.6779, "step": 3472, "time": 71.14 }, { "epoch": 2.69, "learning_rate": "2.7935e-05", "loss": 0.6669, "slid_loss": 0.6779, "step": 3473, "time": 70.36 }, { "epoch": 2.69, "learning_rate": "2.7926e-05", "loss": 0.6842, "slid_loss": 0.6781, "step": 3474, "time": 71.21 }, { "epoch": 2.69, "learning_rate": "2.7916e-05", "loss": 0.6743, "slid_loss": 0.6783, "step": 3475, "time": 71.38 }, { "epoch": 2.69, "learning_rate": "2.7906e-05", "loss": 0.6646, "slid_loss": 0.678, "step": 3476, "time": 74.9 }, { "epoch": 2.69, "learning_rate": "2.7896e-05", "loss": 0.6552, "slid_loss": 0.6773, "step": 3477, "time": 72.07 }, { "epoch": 2.69, "learning_rate": "2.7887e-05", "loss": 0.6537, "slid_loss": 0.6771, "step": 3478, "time": 72.03 }, { "epoch": 2.69, "learning_rate": "2.7877e-05", "loss": 0.7032, "slid_loss": 0.6776, "step": 3479, "time": 72.37 }, { "epoch": 2.69, "learning_rate": "2.7867e-05", "loss": 0.6639, "slid_loss": 0.6776, "step": 3480, "time": 84.11 }, { "epoch": 2.69, "learning_rate": "2.7857e-05", "loss": 0.6616, "slid_loss": 0.6773, "step": 3481, "time": 202.44 }, { "epoch": 2.69, "learning_rate": "2.7848e-05", "loss": 0.6805, "slid_loss": 0.6776, "step": 3482, "time": 71.97 }, { "epoch": 2.69, "learning_rate": "2.7838e-05", "loss": 0.6771, "slid_loss": 0.6774, "step": 3483, "time": 84.9 }, { "epoch": 2.7, "learning_rate": "2.7828e-05", "loss": 0.6629, "slid_loss": 0.6773, "step": 3484, "time": 85.42 }, { "epoch": 2.7, "learning_rate": "2.7818e-05", "loss": 0.6779, "slid_loss": 0.6771, "step": 3485, "time": 73.54 }, { "epoch": 2.7, "learning_rate": "2.7808e-05", "loss": 0.6575, "slid_loss": 0.6769, "step": 3486, "time": 71.22 }, { "epoch": 2.7, "learning_rate": "2.7799e-05", "loss": 0.6855, "slid_loss": 0.6767, "step": 3487, "time": 84.48 }, { "epoch": 2.7, "learning_rate": "2.7789e-05", "loss": 0.6763, "slid_loss": 0.6768, "step": 3488, "time": 71.41 }, { "epoch": 2.7, "learning_rate": "2.7779e-05", "loss": 0.6771, "slid_loss": 0.6767, "step": 3489, "time": 84.79 }, { "epoch": 2.7, "learning_rate": "2.7769e-05", "loss": 0.6742, "slid_loss": 0.677, "step": 3490, "time": 99.01 }, { "epoch": 2.7, "learning_rate": "2.7760e-05", "loss": 0.738, "slid_loss": 0.6773, "step": 3491, "time": 109.02 }, { "epoch": 2.7, "learning_rate": "2.7750e-05", "loss": 0.6876, "slid_loss": 0.6773, "step": 3492, "time": 107.58 }, { "epoch": 2.7, "learning_rate": "2.7740e-05", "loss": 0.6652, "slid_loss": 0.677, "step": 3493, "time": 147.34 }, { "epoch": 2.7, "learning_rate": "2.7730e-05", "loss": 0.6563, "slid_loss": 0.6767, "step": 3494, "time": 144.45 }, { "epoch": 2.7, "learning_rate": "2.7721e-05", "loss": 0.7053, "slid_loss": 0.6771, "step": 3495, "time": 150.07 }, { "epoch": 2.7, "learning_rate": "2.7711e-05", "loss": 0.6702, "slid_loss": 0.6767, "step": 3496, "time": 144.83 }, { "epoch": 2.71, "learning_rate": "2.7701e-05", "loss": 0.662, "slid_loss": 0.6763, "step": 3497, "time": 142.63 }, { "epoch": 2.71, "learning_rate": "2.7691e-05", "loss": 0.6585, "slid_loss": 0.6756, "step": 3498, "time": 110.11 }, { "epoch": 2.71, "learning_rate": "2.7682e-05", "loss": 0.682, "slid_loss": 0.6757, "step": 3499, "time": 156.82 }, { "epoch": 2.71, "learning_rate": "2.7672e-05", "loss": 0.6437, "slid_loss": 0.6753, "step": 3500, "time": 111.24 }, { "epoch": 2.71, "learning_rate": "2.7662e-05", "loss": 0.6779, "slid_loss": 0.6753, "step": 3501, "time": 108.34 }, { "epoch": 2.71, "learning_rate": "2.7652e-05", "loss": 0.6439, "slid_loss": 0.6751, "step": 3502, "time": 152.69 }, { "epoch": 2.71, "learning_rate": "2.7643e-05", "loss": 0.6823, "slid_loss": 0.6748, "step": 3503, "time": 121.39 }, { "epoch": 2.71, "learning_rate": "2.7633e-05", "loss": 0.6655, "slid_loss": 0.6747, "step": 3504, "time": 70.91 }, { "epoch": 2.71, "learning_rate": "2.7623e-05", "loss": 0.663, "slid_loss": 0.6749, "step": 3505, "time": 94.6 }, { "epoch": 2.71, "learning_rate": "2.7613e-05", "loss": 0.6819, "slid_loss": 0.6749, "step": 3506, "time": 80.57 }, { "epoch": 2.71, "learning_rate": "2.7604e-05", "loss": 0.6867, "slid_loss": 0.6754, "step": 3507, "time": 71.16 }, { "epoch": 2.71, "learning_rate": "2.7594e-05", "loss": 0.6622, "slid_loss": 0.6753, "step": 3508, "time": 84.01 }, { "epoch": 2.71, "learning_rate": "2.7584e-05", "loss": 0.7258, "slid_loss": 0.6758, "step": 3509, "time": 71.22 }, { "epoch": 2.72, "learning_rate": "2.7574e-05", "loss": 0.6372, "slid_loss": 0.6757, "step": 3510, "time": 70.96 }, { "epoch": 2.72, "learning_rate": "2.7565e-05", "loss": 0.6569, "slid_loss": 0.6757, "step": 3511, "time": 72.74 }, { "epoch": 2.72, "learning_rate": "2.7555e-05", "loss": 0.6895, "slid_loss": 0.6753, "step": 3512, "time": 72.01 }, { "epoch": 2.72, "learning_rate": "2.7545e-05", "loss": 0.6971, "slid_loss": 0.6757, "step": 3513, "time": 72.44 }, { "epoch": 2.72, "learning_rate": "2.7535e-05", "loss": 0.6671, "slid_loss": 0.6758, "step": 3514, "time": 71.67 }, { "epoch": 2.72, "learning_rate": "2.7526e-05", "loss": 0.7211, "slid_loss": 0.6761, "step": 3515, "time": 71.95 }, { "epoch": 2.72, "learning_rate": "2.7516e-05", "loss": 0.7032, "slid_loss": 0.6763, "step": 3516, "time": 71.65 }, { "epoch": 2.72, "learning_rate": "2.7506e-05", "loss": 0.6886, "slid_loss": 0.6766, "step": 3517, "time": 71.43 }, { "epoch": 2.72, "learning_rate": "2.7496e-05", "loss": 0.6426, "slid_loss": 0.676, "step": 3518, "time": 71.23 }, { "epoch": 2.72, "learning_rate": "2.7487e-05", "loss": 0.6869, "slid_loss": 0.6761, "step": 3519, "time": 70.73 }, { "epoch": 2.72, "learning_rate": "2.7477e-05", "loss": 0.679, "slid_loss": 0.6764, "step": 3520, "time": 70.7 }, { "epoch": 2.72, "learning_rate": "2.7467e-05", "loss": 0.6947, "slid_loss": 0.6768, "step": 3521, "time": 71.41 }, { "epoch": 2.72, "learning_rate": "2.7457e-05", "loss": 0.6273, "slid_loss": 0.676, "step": 3522, "time": 72.79 }, { "epoch": 2.73, "learning_rate": "2.7448e-05", "loss": 0.6744, "slid_loss": 0.6754, "step": 3523, "time": 71.31 }, { "epoch": 2.73, "learning_rate": "2.7438e-05", "loss": 0.6225, "slid_loss": 0.6748, "step": 3524, "time": 70.95 }, { "epoch": 2.73, "learning_rate": "2.7428e-05", "loss": 0.6555, "slid_loss": 0.6748, "step": 3525, "time": 71.44 }, { "epoch": 2.73, "learning_rate": "2.7418e-05", "loss": 0.6826, "slid_loss": 0.6745, "step": 3526, "time": 71.57 }, { "epoch": 2.73, "learning_rate": "2.7409e-05", "loss": 0.6972, "slid_loss": 0.6744, "step": 3527, "time": 72.19 }, { "epoch": 2.73, "learning_rate": "2.7399e-05", "loss": 0.6848, "slid_loss": 0.6746, "step": 3528, "time": 91.69 }, { "epoch": 2.73, "learning_rate": "2.7389e-05", "loss": 0.6669, "slid_loss": 0.6745, "step": 3529, "time": 72.1 }, { "epoch": 2.73, "learning_rate": "2.7379e-05", "loss": 0.6742, "slid_loss": 0.6747, "step": 3530, "time": 71.05 }, { "epoch": 2.73, "learning_rate": "2.7370e-05", "loss": 0.6862, "slid_loss": 0.6747, "step": 3531, "time": 71.77 }, { "epoch": 2.73, "learning_rate": "2.7360e-05", "loss": 0.6918, "slid_loss": 0.6746, "step": 3532, "time": 679.07 }, { "epoch": 2.73, "learning_rate": "2.7350e-05", "loss": 0.6678, "slid_loss": 0.6745, "step": 3533, "time": 641.48 }, { "epoch": 2.73, "learning_rate": "2.7340e-05", "loss": 0.6672, "slid_loss": 0.6745, "step": 3534, "time": 274.28 }, { "epoch": 2.74, "learning_rate": "2.7331e-05", "loss": 0.6629, "slid_loss": 0.6747, "step": 3535, "time": 167.38 }, { "epoch": 2.74, "learning_rate": "2.7321e-05", "loss": 0.7009, "slid_loss": 0.6747, "step": 3536, "time": 71.93 }, { "epoch": 2.74, "learning_rate": "2.7311e-05", "loss": 0.6534, "slid_loss": 0.6745, "step": 3537, "time": 74.18 }, { "epoch": 2.74, "learning_rate": "2.7301e-05", "loss": 0.6425, "slid_loss": 0.6739, "step": 3538, "time": 71.58 }, { "epoch": 2.74, "learning_rate": "2.7292e-05", "loss": 0.6316, "slid_loss": 0.6737, "step": 3539, "time": 72.73 }, { "epoch": 2.74, "learning_rate": "2.7282e-05", "loss": 0.6683, "slid_loss": 0.6734, "step": 3540, "time": 207.59 }, { "epoch": 2.74, "learning_rate": "2.7272e-05", "loss": 0.6974, "slid_loss": 0.6738, "step": 3541, "time": 72.25 }, { "epoch": 2.74, "learning_rate": "2.7262e-05", "loss": 0.6553, "slid_loss": 0.6737, "step": 3542, "time": 70.95 }, { "epoch": 2.74, "learning_rate": "2.7253e-05", "loss": 0.6907, "slid_loss": 0.6739, "step": 3543, "time": 71.27 }, { "epoch": 2.74, "learning_rate": "2.7243e-05", "loss": 0.6149, "slid_loss": 0.6734, "step": 3544, "time": 71.47 }, { "epoch": 2.74, "learning_rate": "2.7233e-05", "loss": 0.6509, "slid_loss": 0.6733, "step": 3545, "time": 71.56 }, { "epoch": 2.74, "learning_rate": "2.7224e-05", "loss": 0.6835, "slid_loss": 0.6732, "step": 3546, "time": 71.29 }, { "epoch": 2.74, "learning_rate": "2.7214e-05", "loss": 0.6902, "slid_loss": 0.6733, "step": 3547, "time": 71.84 }, { "epoch": 2.75, "learning_rate": "2.7204e-05", "loss": 0.641, "slid_loss": 0.6727, "step": 3548, "time": 308.94 }, { "epoch": 2.75, "learning_rate": "2.7194e-05", "loss": 0.6795, "slid_loss": 0.6728, "step": 3549, "time": 72.03 }, { "epoch": 2.75, "learning_rate": "2.7185e-05", "loss": 0.6641, "slid_loss": 0.6728, "step": 3550, "time": 71.35 }, { "epoch": 2.75, "learning_rate": "2.7175e-05", "loss": 0.6556, "slid_loss": 0.6728, "step": 3551, "time": 71.93 }, { "epoch": 2.75, "learning_rate": "2.7165e-05", "loss": 0.6618, "slid_loss": 0.6728, "step": 3552, "time": 83.41 }, { "epoch": 2.75, "learning_rate": "2.7155e-05", "loss": 0.7021, "slid_loss": 0.6732, "step": 3553, "time": 71.7 }, { "epoch": 2.75, "learning_rate": "2.7146e-05", "loss": 0.6565, "slid_loss": 0.6729, "step": 3554, "time": 71.82 }, { "epoch": 2.75, "learning_rate": "2.7136e-05", "loss": 0.654, "slid_loss": 0.6723, "step": 3555, "time": 74.79 }, { "epoch": 2.75, "learning_rate": "2.7126e-05", "loss": 0.6795, "slid_loss": 0.6724, "step": 3556, "time": 70.27 }, { "epoch": 2.75, "learning_rate": "2.7117e-05", "loss": 0.6922, "slid_loss": 0.6723, "step": 3557, "time": 167.78 }, { "epoch": 2.75, "learning_rate": "2.7107e-05", "loss": 0.6241, "slid_loss": 0.6721, "step": 3558, "time": 71.7 }, { "epoch": 2.75, "learning_rate": "2.7097e-05", "loss": 0.6409, "slid_loss": 0.6718, "step": 3559, "time": 71.42 }, { "epoch": 2.75, "learning_rate": "2.7087e-05", "loss": 0.6774, "slid_loss": 0.6718, "step": 3560, "time": 71.18 }, { "epoch": 2.76, "learning_rate": "2.7078e-05", "loss": 0.6851, "slid_loss": 0.6721, "step": 3561, "time": 166.61 }, { "epoch": 2.76, "learning_rate": "2.7068e-05", "loss": 0.6542, "slid_loss": 0.6723, "step": 3562, "time": 133.3 }, { "epoch": 2.76, "learning_rate": "2.7058e-05", "loss": 0.6653, "slid_loss": 0.6719, "step": 3563, "time": 71.71 }, { "epoch": 2.76, "learning_rate": "2.7049e-05", "loss": 0.6336, "slid_loss": 0.6715, "step": 3564, "time": 185.18 }, { "epoch": 2.76, "learning_rate": "2.7039e-05", "loss": 0.6976, "slid_loss": 0.6717, "step": 3565, "time": 71.58 }, { "epoch": 2.76, "learning_rate": "2.7029e-05", "loss": 0.6776, "slid_loss": 0.6716, "step": 3566, "time": 72.09 }, { "epoch": 2.76, "learning_rate": "2.7019e-05", "loss": 0.6526, "slid_loss": 0.6713, "step": 3567, "time": 72.12 }, { "epoch": 2.76, "learning_rate": "2.7010e-05", "loss": 0.6707, "slid_loss": 0.6715, "step": 3568, "time": 70.31 }, { "epoch": 2.76, "learning_rate": "2.7000e-05", "loss": 0.6645, "slid_loss": 0.6715, "step": 3569, "time": 71.08 }, { "epoch": 2.76, "learning_rate": "2.6990e-05", "loss": 0.7002, "slid_loss": 0.6715, "step": 3570, "time": 71.11 }, { "epoch": 2.76, "learning_rate": "2.6981e-05", "loss": 0.6337, "slid_loss": 0.6712, "step": 3571, "time": 72.26 }, { "epoch": 2.76, "learning_rate": "2.6971e-05", "loss": 0.6678, "slid_loss": 0.6709, "step": 3572, "time": 197.67 }, { "epoch": 2.76, "learning_rate": "2.6961e-05", "loss": 0.7048, "slid_loss": 0.6713, "step": 3573, "time": 71.04 }, { "epoch": 2.77, "learning_rate": "2.6951e-05", "loss": 0.7139, "slid_loss": 0.6716, "step": 3574, "time": 71.99 }, { "epoch": 2.77, "learning_rate": "2.6942e-05", "loss": 0.6617, "slid_loss": 0.6715, "step": 3575, "time": 71.79 }, { "epoch": 2.77, "learning_rate": "2.6932e-05", "loss": 0.6521, "slid_loss": 0.6713, "step": 3576, "time": 73.21 }, { "epoch": 2.77, "learning_rate": "2.6922e-05", "loss": 0.7229, "slid_loss": 0.672, "step": 3577, "time": 71.02 }, { "epoch": 2.77, "learning_rate": "2.6913e-05", "loss": 0.6802, "slid_loss": 0.6723, "step": 3578, "time": 73.19 }, { "epoch": 2.77, "learning_rate": "2.6903e-05", "loss": 0.6587, "slid_loss": 0.6718, "step": 3579, "time": 71.31 }, { "epoch": 2.77, "learning_rate": "2.6893e-05", "loss": 0.6894, "slid_loss": 0.6721, "step": 3580, "time": 71.5 }, { "epoch": 2.77, "learning_rate": "2.6883e-05", "loss": 0.6601, "slid_loss": 0.6721, "step": 3581, "time": 71.09 }, { "epoch": 2.77, "learning_rate": "2.6874e-05", "loss": 0.6746, "slid_loss": 0.672, "step": 3582, "time": 72.06 }, { "epoch": 2.77, "learning_rate": "2.6864e-05", "loss": 0.6584, "slid_loss": 0.6718, "step": 3583, "time": 70.73 }, { "epoch": 2.77, "learning_rate": "2.6854e-05", "loss": 0.6472, "slid_loss": 0.6717, "step": 3584, "time": 71.14 }, { "epoch": 2.77, "learning_rate": "2.6845e-05", "loss": 0.6861, "slid_loss": 0.6718, "step": 3585, "time": 71.99 }, { "epoch": 2.77, "learning_rate": "2.6835e-05", "loss": 0.6518, "slid_loss": 0.6717, "step": 3586, "time": 71.27 }, { "epoch": 2.78, "learning_rate": "2.6825e-05", "loss": 0.6767, "slid_loss": 0.6716, "step": 3587, "time": 71.14 }, { "epoch": 2.78, "learning_rate": "2.6816e-05", "loss": 0.6722, "slid_loss": 0.6716, "step": 3588, "time": 71.96 }, { "epoch": 2.78, "learning_rate": "2.6806e-05", "loss": 0.6405, "slid_loss": 0.6712, "step": 3589, "time": 72.21 }, { "epoch": 2.78, "learning_rate": "2.6796e-05", "loss": 0.6312, "slid_loss": 0.6708, "step": 3590, "time": 72.44 }, { "epoch": 2.78, "learning_rate": "2.6786e-05", "loss": 0.6455, "slid_loss": 0.6698, "step": 3591, "time": 70.89 }, { "epoch": 2.78, "learning_rate": "2.6777e-05", "loss": 0.6698, "slid_loss": 0.6697, "step": 3592, "time": 71.0 }, { "epoch": 2.78, "learning_rate": "2.6767e-05", "loss": 0.6945, "slid_loss": 0.67, "step": 3593, "time": 74.28 }, { "epoch": 2.78, "learning_rate": "2.6757e-05", "loss": 0.6698, "slid_loss": 0.6701, "step": 3594, "time": 71.37 }, { "epoch": 2.78, "learning_rate": "2.6748e-05", "loss": 0.7156, "slid_loss": 0.6702, "step": 3595, "time": 71.62 }, { "epoch": 2.78, "learning_rate": "2.6738e-05", "loss": 0.7098, "slid_loss": 0.6706, "step": 3596, "time": 71.61 }, { "epoch": 2.78, "learning_rate": "2.6728e-05", "loss": 0.6999, "slid_loss": 0.671, "step": 3597, "time": 70.56 }, { "epoch": 2.78, "learning_rate": "2.6719e-05", "loss": 0.6582, "slid_loss": 0.671, "step": 3598, "time": 71.68 }, { "epoch": 2.78, "learning_rate": "2.6709e-05", "loss": 0.6605, "slid_loss": 0.6708, "step": 3599, "time": 71.93 }, { "epoch": 2.79, "learning_rate": "2.6699e-05", "loss": 0.6831, "slid_loss": 0.6711, "step": 3600, "time": 71.14 }, { "epoch": 2.79, "learning_rate": "2.6689e-05", "loss": 0.6595, "slid_loss": 0.671, "step": 3601, "time": 785.4 }, { "epoch": 2.79, "learning_rate": "2.6680e-05", "loss": 0.6637, "slid_loss": 0.6712, "step": 3602, "time": 72.02 }, { "epoch": 2.79, "learning_rate": "2.6670e-05", "loss": 0.6526, "slid_loss": 0.6709, "step": 3603, "time": 72.13 }, { "epoch": 2.79, "learning_rate": "2.6660e-05", "loss": 0.6354, "slid_loss": 0.6706, "step": 3604, "time": 71.87 }, { "epoch": 2.79, "learning_rate": "2.6651e-05", "loss": 0.676, "slid_loss": 0.6707, "step": 3605, "time": 70.55 }, { "epoch": 2.79, "learning_rate": "2.6641e-05", "loss": 0.6877, "slid_loss": 0.6708, "step": 3606, "time": 69.98 }, { "epoch": 2.79, "learning_rate": "2.6631e-05", "loss": 0.662, "slid_loss": 0.6705, "step": 3607, "time": 73.03 }, { "epoch": 2.79, "learning_rate": "2.6622e-05", "loss": 0.673, "slid_loss": 0.6706, "step": 3608, "time": 71.06 }, { "epoch": 2.79, "learning_rate": "2.6612e-05", "loss": 0.6626, "slid_loss": 0.67, "step": 3609, "time": 70.93 }, { "epoch": 2.79, "learning_rate": "2.6602e-05", "loss": 0.6903, "slid_loss": 0.6705, "step": 3610, "time": 72.93 }, { "epoch": 2.79, "learning_rate": "2.6593e-05", "loss": 0.6936, "slid_loss": 0.6709, "step": 3611, "time": 72.98 }, { "epoch": 2.79, "learning_rate": "2.6583e-05", "loss": 0.6998, "slid_loss": 0.671, "step": 3612, "time": 75.22 }, { "epoch": 2.8, "learning_rate": "2.6573e-05", "loss": 0.6967, "slid_loss": 0.671, "step": 3613, "time": 71.75 }, { "epoch": 2.8, "learning_rate": "2.6564e-05", "loss": 0.6424, "slid_loss": 0.6707, "step": 3614, "time": 72.83 }, { "epoch": 2.8, "learning_rate": "2.6554e-05", "loss": 0.6392, "slid_loss": 0.6699, "step": 3615, "time": 72.14 }, { "epoch": 2.8, "learning_rate": "2.6544e-05", "loss": 0.6598, "slid_loss": 0.6695, "step": 3616, "time": 71.91 }, { "epoch": 2.8, "learning_rate": "2.6535e-05", "loss": 0.6463, "slid_loss": 0.6691, "step": 3617, "time": 72.66 }, { "epoch": 2.8, "learning_rate": "2.6525e-05", "loss": 0.6673, "slid_loss": 0.6693, "step": 3618, "time": 71.37 }, { "epoch": 2.8, "learning_rate": "2.6515e-05", "loss": 0.68, "slid_loss": 0.6692, "step": 3619, "time": 71.74 }, { "epoch": 2.8, "learning_rate": "2.6506e-05", "loss": 0.6622, "slid_loss": 0.6691, "step": 3620, "time": 74.37 }, { "epoch": 2.8, "learning_rate": "2.6496e-05", "loss": 0.6838, "slid_loss": 0.669, "step": 3621, "time": 70.91 }, { "epoch": 2.8, "learning_rate": "2.6486e-05", "loss": 0.67, "slid_loss": 0.6694, "step": 3622, "time": 71.52 }, { "epoch": 2.8, "learning_rate": "2.6477e-05", "loss": 0.6355, "slid_loss": 0.669, "step": 3623, "time": 71.69 }, { "epoch": 2.8, "learning_rate": "2.6467e-05", "loss": 0.6875, "slid_loss": 0.6696, "step": 3624, "time": 71.93 }, { "epoch": 2.8, "learning_rate": "2.6457e-05", "loss": 0.6917, "slid_loss": 0.67, "step": 3625, "time": 72.33 }, { "epoch": 2.81, "learning_rate": "2.6448e-05", "loss": 0.6928, "slid_loss": 0.6701, "step": 3626, "time": 71.4 }, { "epoch": 2.81, "learning_rate": "2.6438e-05", "loss": 0.6541, "slid_loss": 0.6697, "step": 3627, "time": 71.85 }, { "epoch": 2.81, "learning_rate": "2.6428e-05", "loss": 0.667, "slid_loss": 0.6695, "step": 3628, "time": 71.47 }, { "epoch": 2.81, "learning_rate": "2.6419e-05", "loss": 0.6146, "slid_loss": 0.669, "step": 3629, "time": 71.27 }, { "epoch": 2.81, "learning_rate": "2.6409e-05", "loss": 0.6724, "slid_loss": 0.669, "step": 3630, "time": 71.58 }, { "epoch": 2.81, "learning_rate": "2.6399e-05", "loss": 0.6802, "slid_loss": 0.6689, "step": 3631, "time": 73.12 }, { "epoch": 2.81, "learning_rate": "2.6390e-05", "loss": 0.6516, "slid_loss": 0.6685, "step": 3632, "time": 72.38 }, { "epoch": 2.81, "learning_rate": "2.6380e-05", "loss": 0.6432, "slid_loss": 0.6682, "step": 3633, "time": 71.99 }, { "epoch": 2.81, "learning_rate": "2.6370e-05", "loss": 0.6667, "slid_loss": 0.6682, "step": 3634, "time": 71.55 }, { "epoch": 2.81, "learning_rate": "2.6361e-05", "loss": 0.6524, "slid_loss": 0.6681, "step": 3635, "time": 70.68 }, { "epoch": 2.81, "learning_rate": "2.6351e-05", "loss": 0.6794, "slid_loss": 0.6679, "step": 3636, "time": 71.58 }, { "epoch": 2.81, "learning_rate": "2.6341e-05", "loss": 0.6627, "slid_loss": 0.668, "step": 3637, "time": 71.99 }, { "epoch": 2.81, "learning_rate": "2.6332e-05", "loss": 0.6916, "slid_loss": 0.6685, "step": 3638, "time": 71.34 }, { "epoch": 2.82, "learning_rate": "2.6322e-05", "loss": 0.6423, "slid_loss": 0.6686, "step": 3639, "time": 72.74 }, { "epoch": 2.82, "learning_rate": "2.6312e-05", "loss": 0.6577, "slid_loss": 0.6685, "step": 3640, "time": 72.38 }, { "epoch": 2.82, "learning_rate": "2.6303e-05", "loss": 0.6895, "slid_loss": 0.6684, "step": 3641, "time": 71.28 }, { "epoch": 2.82, "learning_rate": "2.6293e-05", "loss": 0.6665, "slid_loss": 0.6685, "step": 3642, "time": 72.25 }, { "epoch": 2.82, "learning_rate": "2.6283e-05", "loss": 0.6813, "slid_loss": 0.6685, "step": 3643, "time": 96.54 }, { "epoch": 2.82, "learning_rate": "2.6274e-05", "loss": 0.667, "slid_loss": 0.669, "step": 3644, "time": 84.25 }, { "epoch": 2.82, "learning_rate": "2.6264e-05", "loss": 0.6392, "slid_loss": 0.6689, "step": 3645, "time": 71.38 }, { "epoch": 2.82, "learning_rate": "2.6254e-05", "loss": 0.6716, "slid_loss": 0.6687, "step": 3646, "time": 71.11 }, { "epoch": 2.82, "learning_rate": "2.6245e-05", "loss": 0.6783, "slid_loss": 0.6686, "step": 3647, "time": 83.17 }, { "epoch": 2.82, "learning_rate": "2.6235e-05", "loss": 0.6798, "slid_loss": 0.669, "step": 3648, "time": 96.3 }, { "epoch": 2.82, "learning_rate": "2.6225e-05", "loss": 0.6477, "slid_loss": 0.6687, "step": 3649, "time": 95.09 }, { "epoch": 2.82, "learning_rate": "2.6216e-05", "loss": 0.6646, "slid_loss": 0.6687, "step": 3650, "time": 85.23 }, { "epoch": 2.82, "learning_rate": "2.6206e-05", "loss": 0.6006, "slid_loss": 0.6681, "step": 3651, "time": 120.96 }, { "epoch": 2.83, "learning_rate": "2.6196e-05", "loss": 0.67, "slid_loss": 0.6682, "step": 3652, "time": 96.41 }, { "epoch": 2.83, "learning_rate": "2.6187e-05", "loss": 0.6843, "slid_loss": 0.668, "step": 3653, "time": 143.42 }, { "epoch": 2.83, "learning_rate": "2.6177e-05", "loss": 0.6645, "slid_loss": 0.6681, "step": 3654, "time": 147.78 }, { "epoch": 2.83, "learning_rate": "2.6167e-05", "loss": 0.6771, "slid_loss": 0.6684, "step": 3655, "time": 144.4 }, { "epoch": 2.83, "learning_rate": "2.6158e-05", "loss": 0.6365, "slid_loss": 0.6679, "step": 3656, "time": 169.29 }, { "epoch": 2.83, "learning_rate": "2.6148e-05", "loss": 0.6972, "slid_loss": 0.668, "step": 3657, "time": 167.89 }, { "epoch": 2.83, "learning_rate": "2.6139e-05", "loss": 0.6677, "slid_loss": 0.6684, "step": 3658, "time": 109.68 }, { "epoch": 2.83, "learning_rate": "2.6129e-05", "loss": 0.6557, "slid_loss": 0.6686, "step": 3659, "time": 126.84 }, { "epoch": 2.83, "learning_rate": "2.6119e-05", "loss": 0.6857, "slid_loss": 0.6686, "step": 3660, "time": 135.51 }, { "epoch": 2.83, "learning_rate": "2.6110e-05", "loss": 0.6697, "slid_loss": 0.6685, "step": 3661, "time": 118.64 }, { "epoch": 2.83, "learning_rate": "2.6100e-05", "loss": 0.6351, "slid_loss": 0.6683, "step": 3662, "time": 95.69 }, { "epoch": 2.83, "learning_rate": "2.6090e-05", "loss": 0.6732, "slid_loss": 0.6684, "step": 3663, "time": 94.06 }, { "epoch": 2.83, "learning_rate": "2.6081e-05", "loss": 0.6467, "slid_loss": 0.6685, "step": 3664, "time": 106.2 }, { "epoch": 2.84, "learning_rate": "2.6071e-05", "loss": 0.697, "slid_loss": 0.6685, "step": 3665, "time": 70.13 }, { "epoch": 2.84, "learning_rate": "2.6061e-05", "loss": 0.64, "slid_loss": 0.6681, "step": 3666, "time": 71.38 }, { "epoch": 2.84, "learning_rate": "2.6052e-05", "loss": 0.6902, "slid_loss": 0.6685, "step": 3667, "time": 82.91 }, { "epoch": 2.84, "learning_rate": "2.6042e-05", "loss": 0.6345, "slid_loss": 0.6681, "step": 3668, "time": 73.46 }, { "epoch": 2.84, "learning_rate": "2.6033e-05", "loss": 0.6812, "slid_loss": 0.6683, "step": 3669, "time": 70.78 }, { "epoch": 2.84, "learning_rate": "2.6023e-05", "loss": 0.6562, "slid_loss": 0.6679, "step": 3670, "time": 69.96 }, { "epoch": 2.84, "learning_rate": "2.6013e-05", "loss": 0.629, "slid_loss": 0.6678, "step": 3671, "time": 72.85 }, { "epoch": 2.84, "learning_rate": "2.6004e-05", "loss": 0.6347, "slid_loss": 0.6675, "step": 3672, "time": 71.43 }, { "epoch": 2.84, "learning_rate": "2.5994e-05", "loss": 0.6904, "slid_loss": 0.6673, "step": 3673, "time": 71.5 }, { "epoch": 2.84, "learning_rate": "2.5984e-05", "loss": 0.633, "slid_loss": 0.6665, "step": 3674, "time": 72.77 }, { "epoch": 2.84, "learning_rate": "2.5975e-05", "loss": 0.6695, "slid_loss": 0.6666, "step": 3675, "time": 71.87 }, { "epoch": 2.84, "learning_rate": "2.5965e-05", "loss": 0.655, "slid_loss": 0.6666, "step": 3676, "time": 72.13 }, { "epoch": 2.84, "learning_rate": "2.5956e-05", "loss": 0.6666, "slid_loss": 0.6661, "step": 3677, "time": 71.55 }, { "epoch": 2.85, "learning_rate": "2.5946e-05", "loss": 0.6409, "slid_loss": 0.6657, "step": 3678, "time": 72.88 }, { "epoch": 2.85, "learning_rate": "2.5936e-05", "loss": 0.6478, "slid_loss": 0.6656, "step": 3679, "time": 71.03 }, { "epoch": 2.85, "learning_rate": "2.5927e-05", "loss": 0.6951, "slid_loss": 0.6656, "step": 3680, "time": 70.63 }, { "epoch": 2.85, "learning_rate": "2.5917e-05", "loss": 0.6517, "slid_loss": 0.6655, "step": 3681, "time": 72.14 }, { "epoch": 2.85, "learning_rate": "2.5907e-05", "loss": 0.6649, "slid_loss": 0.6655, "step": 3682, "time": 72.11 }, { "epoch": 2.85, "learning_rate": "2.5898e-05", "loss": 0.7, "slid_loss": 0.6659, "step": 3683, "time": 71.7 }, { "epoch": 2.85, "learning_rate": "2.5888e-05", "loss": 0.6718, "slid_loss": 0.6661, "step": 3684, "time": 71.27 }, { "epoch": 2.85, "learning_rate": "2.5879e-05", "loss": 0.6619, "slid_loss": 0.6659, "step": 3685, "time": 72.7 }, { "epoch": 2.85, "learning_rate": "2.5869e-05", "loss": 0.6757, "slid_loss": 0.6661, "step": 3686, "time": 71.15 }, { "epoch": 2.85, "learning_rate": "2.5859e-05", "loss": 0.6739, "slid_loss": 0.6661, "step": 3687, "time": 69.92 }, { "epoch": 2.85, "learning_rate": "2.5850e-05", "loss": 0.6528, "slid_loss": 0.6659, "step": 3688, "time": 72.55 }, { "epoch": 2.85, "learning_rate": "2.5840e-05", "loss": 0.6646, "slid_loss": 0.6661, "step": 3689, "time": 72.8 }, { "epoch": 2.85, "learning_rate": "2.5831e-05", "loss": 0.6737, "slid_loss": 0.6666, "step": 3690, "time": 72.61 }, { "epoch": 2.86, "learning_rate": "2.5821e-05", "loss": 0.6599, "slid_loss": 0.6667, "step": 3691, "time": 72.44 }, { "epoch": 2.86, "learning_rate": "2.5811e-05", "loss": 0.7049, "slid_loss": 0.667, "step": 3692, "time": 71.96 }, { "epoch": 2.86, "learning_rate": "2.5802e-05", "loss": 0.6642, "slid_loss": 0.6667, "step": 3693, "time": 72.21 }, { "epoch": 2.86, "learning_rate": "2.5792e-05", "loss": 0.6584, "slid_loss": 0.6666, "step": 3694, "time": 72.59 }, { "epoch": 2.86, "learning_rate": "2.5783e-05", "loss": 0.6631, "slid_loss": 0.6661, "step": 3695, "time": 72.8 }, { "epoch": 2.86, "learning_rate": "2.5773e-05", "loss": 0.6502, "slid_loss": 0.6655, "step": 3696, "time": 71.66 }, { "epoch": 2.86, "learning_rate": "2.5763e-05", "loss": 0.6875, "slid_loss": 0.6654, "step": 3697, "time": 71.01 }, { "epoch": 2.86, "learning_rate": "2.5754e-05", "loss": 0.7003, "slid_loss": 0.6658, "step": 3698, "time": 71.44 }, { "epoch": 2.86, "learning_rate": "2.5744e-05", "loss": 0.6903, "slid_loss": 0.6661, "step": 3699, "time": 72.53 }, { "epoch": 2.86, "learning_rate": "2.5735e-05", "loss": 0.6352, "slid_loss": 0.6656, "step": 3700, "time": 72.54 }, { "epoch": 2.86, "learning_rate": "2.5725e-05", "loss": 0.6573, "slid_loss": 0.6656, "step": 3701, "time": 69.63 }, { "epoch": 2.86, "learning_rate": "2.5715e-05", "loss": 0.6293, "slid_loss": 0.6653, "step": 3702, "time": 72.41 }, { "epoch": 2.86, "learning_rate": "2.5706e-05", "loss": 0.6767, "slid_loss": 0.6655, "step": 3703, "time": 72.0 }, { "epoch": 2.87, "learning_rate": "2.5696e-05", "loss": 0.6553, "slid_loss": 0.6657, "step": 3704, "time": 71.66 }, { "epoch": 2.87, "learning_rate": "2.5687e-05", "loss": 0.6566, "slid_loss": 0.6655, "step": 3705, "time": 71.02 }, { "epoch": 2.87, "learning_rate": "2.5677e-05", "loss": 0.624, "slid_loss": 0.6649, "step": 3706, "time": 71.73 }, { "epoch": 2.87, "learning_rate": "2.5667e-05", "loss": 0.7001, "slid_loss": 0.6653, "step": 3707, "time": 71.29 }, { "epoch": 2.87, "learning_rate": "2.5658e-05", "loss": 0.6657, "slid_loss": 0.6652, "step": 3708, "time": 71.86 }, { "epoch": 2.87, "learning_rate": "2.5648e-05", "loss": 0.6574, "slid_loss": 0.6651, "step": 3709, "time": 71.27 }, { "epoch": 2.87, "learning_rate": "2.5639e-05", "loss": 0.6368, "slid_loss": 0.6646, "step": 3710, "time": 71.41 }, { "epoch": 2.87, "learning_rate": "2.5629e-05", "loss": 0.6709, "slid_loss": 0.6644, "step": 3711, "time": 71.45 }, { "epoch": 2.87, "learning_rate": "2.5619e-05", "loss": 0.646, "slid_loss": 0.6638, "step": 3712, "time": 71.62 }, { "epoch": 2.87, "learning_rate": "2.5610e-05", "loss": 0.6516, "slid_loss": 0.6634, "step": 3713, "time": 73.01 }, { "epoch": 2.87, "learning_rate": "2.5600e-05", "loss": 0.6586, "slid_loss": 0.6635, "step": 3714, "time": 71.17 }, { "epoch": 2.87, "learning_rate": "2.5591e-05", "loss": 0.6446, "slid_loss": 0.6636, "step": 3715, "time": 72.17 }, { "epoch": 2.88, "learning_rate": "2.5581e-05", "loss": 0.6448, "slid_loss": 0.6634, "step": 3716, "time": 73.2 }, { "epoch": 2.88, "learning_rate": "2.5571e-05", "loss": 0.6608, "slid_loss": 0.6636, "step": 3717, "time": 71.34 }, { "epoch": 2.88, "learning_rate": "2.5562e-05", "loss": 0.6637, "slid_loss": 0.6635, "step": 3718, "time": 71.74 }, { "epoch": 2.88, "learning_rate": "2.5552e-05", "loss": 0.6591, "slid_loss": 0.6633, "step": 3719, "time": 71.52 }, { "epoch": 2.88, "learning_rate": "2.5543e-05", "loss": 0.6678, "slid_loss": 0.6634, "step": 3720, "time": 70.87 }, { "epoch": 2.88, "learning_rate": "2.5533e-05", "loss": 0.7007, "slid_loss": 0.6636, "step": 3721, "time": 71.18 }, { "epoch": 2.88, "learning_rate": "2.5524e-05", "loss": 0.6381, "slid_loss": 0.6632, "step": 3722, "time": 71.7 }, { "epoch": 2.88, "learning_rate": "2.5514e-05", "loss": 0.6817, "slid_loss": 0.6637, "step": 3723, "time": 72.2 }, { "epoch": 2.88, "learning_rate": "2.5504e-05", "loss": 0.6533, "slid_loss": 0.6634, "step": 3724, "time": 70.97 }, { "epoch": 2.88, "learning_rate": "2.5495e-05", "loss": 0.6418, "slid_loss": 0.6629, "step": 3725, "time": 71.44 }, { "epoch": 2.88, "learning_rate": "2.5485e-05", "loss": 0.6826, "slid_loss": 0.6628, "step": 3726, "time": 73.48 }, { "epoch": 2.88, "learning_rate": "2.5476e-05", "loss": 0.6955, "slid_loss": 0.6632, "step": 3727, "time": 71.47 }, { "epoch": 2.88, "learning_rate": "2.5466e-05", "loss": 0.6604, "slid_loss": 0.6631, "step": 3728, "time": 72.1 }, { "epoch": 2.89, "learning_rate": "2.5457e-05", "loss": 0.6993, "slid_loss": 0.664, "step": 3729, "time": 70.42 }, { "epoch": 2.89, "learning_rate": "2.5447e-05", "loss": 0.649, "slid_loss": 0.6637, "step": 3730, "time": 71.49 }, { "epoch": 2.89, "learning_rate": "2.5437e-05", "loss": 0.7033, "slid_loss": 0.664, "step": 3731, "time": 72.55 }, { "epoch": 2.89, "learning_rate": "2.5428e-05", "loss": 0.648, "slid_loss": 0.6639, "step": 3732, "time": 72.29 }, { "epoch": 2.89, "learning_rate": "2.5418e-05", "loss": 0.6637, "slid_loss": 0.6641, "step": 3733, "time": 71.44 }, { "epoch": 2.89, "learning_rate": "2.5409e-05", "loss": 0.6731, "slid_loss": 0.6642, "step": 3734, "time": 70.97 }, { "epoch": 2.89, "learning_rate": "2.5399e-05", "loss": 0.6721, "slid_loss": 0.6644, "step": 3735, "time": 72.08 }, { "epoch": 2.89, "learning_rate": "2.5390e-05", "loss": 0.6353, "slid_loss": 0.6639, "step": 3736, "time": 72.89 }, { "epoch": 2.89, "learning_rate": "2.5380e-05", "loss": 0.6634, "slid_loss": 0.664, "step": 3737, "time": 72.38 }, { "epoch": 2.89, "learning_rate": "2.5371e-05", "loss": 0.686, "slid_loss": 0.6639, "step": 3738, "time": 72.17 }, { "epoch": 2.89, "learning_rate": "2.5361e-05", "loss": 0.6552, "slid_loss": 0.664, "step": 3739, "time": 70.82 }, { "epoch": 2.89, "learning_rate": "2.5351e-05", "loss": 0.6728, "slid_loss": 0.6642, "step": 3740, "time": 69.8 }, { "epoch": 2.89, "learning_rate": "2.5342e-05", "loss": 0.6362, "slid_loss": 0.6636, "step": 3741, "time": 71.24 }, { "epoch": 2.9, "learning_rate": "2.5332e-05", "loss": 0.6529, "slid_loss": 0.6635, "step": 3742, "time": 71.06 }, { "epoch": 2.9, "learning_rate": "2.5323e-05", "loss": 0.675, "slid_loss": 0.6634, "step": 3743, "time": 70.71 }, { "epoch": 2.9, "learning_rate": "2.5313e-05", "loss": 0.6321, "slid_loss": 0.6631, "step": 3744, "time": 71.25 }, { "epoch": 2.9, "learning_rate": "2.5304e-05", "loss": 0.6155, "slid_loss": 0.6629, "step": 3745, "time": 71.64 }, { "epoch": 2.9, "learning_rate": "2.5294e-05", "loss": 0.6761, "slid_loss": 0.6629, "step": 3746, "time": 72.93 }, { "epoch": 2.9, "learning_rate": "2.5285e-05", "loss": 0.704, "slid_loss": 0.6632, "step": 3747, "time": 71.09 }, { "epoch": 2.9, "learning_rate": "2.5275e-05", "loss": 0.6346, "slid_loss": 0.6627, "step": 3748, "time": 72.09 }, { "epoch": 2.9, "learning_rate": "2.5265e-05", "loss": 0.6444, "slid_loss": 0.6627, "step": 3749, "time": 73.8 }, { "epoch": 2.9, "learning_rate": "2.5256e-05", "loss": 0.6391, "slid_loss": 0.6624, "step": 3750, "time": 70.75 }, { "epoch": 2.9, "learning_rate": "2.5246e-05", "loss": 0.6729, "slid_loss": 0.6631, "step": 3751, "time": 71.77 }, { "epoch": 2.9, "learning_rate": "2.5237e-05", "loss": 0.6379, "slid_loss": 0.6628, "step": 3752, "time": 72.64 }, { "epoch": 2.9, "learning_rate": "2.5227e-05", "loss": 0.6473, "slid_loss": 0.6624, "step": 3753, "time": 71.67 }, { "epoch": 2.9, "learning_rate": "2.5218e-05", "loss": 0.6348, "slid_loss": 0.6622, "step": 3754, "time": 72.59 }, { "epoch": 2.91, "learning_rate": "2.5208e-05", "loss": 0.6795, "slid_loss": 0.6622, "step": 3755, "time": 71.94 }, { "epoch": 2.91, "learning_rate": "2.5199e-05", "loss": 0.6758, "slid_loss": 0.6626, "step": 3756, "time": 71.7 }, { "epoch": 2.91, "learning_rate": "2.5189e-05", "loss": 0.6566, "slid_loss": 0.6622, "step": 3757, "time": 70.88 }, { "epoch": 2.91, "learning_rate": "2.5180e-05", "loss": 0.6553, "slid_loss": 0.662, "step": 3758, "time": 72.77 }, { "epoch": 2.91, "learning_rate": "2.5170e-05", "loss": 0.6723, "slid_loss": 0.6622, "step": 3759, "time": 69.99 }, { "epoch": 2.91, "learning_rate": "2.5161e-05", "loss": 0.6524, "slid_loss": 0.6619, "step": 3760, "time": 74.06 }, { "epoch": 2.91, "learning_rate": "2.5151e-05", "loss": 0.6832, "slid_loss": 0.662, "step": 3761, "time": 72.32 }, { "epoch": 2.91, "learning_rate": "2.5141e-05", "loss": 0.6627, "slid_loss": 0.6623, "step": 3762, "time": 71.79 }, { "epoch": 2.91, "learning_rate": "2.5132e-05", "loss": 0.6619, "slid_loss": 0.6622, "step": 3763, "time": 72.28 }, { "epoch": 2.91, "learning_rate": "2.5122e-05", "loss": 0.6863, "slid_loss": 0.6626, "step": 3764, "time": 70.97 }, { "epoch": 2.91, "learning_rate": "2.5113e-05", "loss": 0.669, "slid_loss": 0.6623, "step": 3765, "time": 71.52 }, { "epoch": 2.91, "learning_rate": "2.5103e-05", "loss": 0.661, "slid_loss": 0.6625, "step": 3766, "time": 70.76 }, { "epoch": 2.91, "learning_rate": "2.5094e-05", "loss": 0.652, "slid_loss": 0.6621, "step": 3767, "time": 70.57 }, { "epoch": 2.92, "learning_rate": "2.5084e-05", "loss": 0.7035, "slid_loss": 0.6628, "step": 3768, "time": 71.88 }, { "epoch": 2.92, "learning_rate": "2.5075e-05", "loss": 0.6507, "slid_loss": 0.6625, "step": 3769, "time": 73.45 }, { "epoch": 2.92, "learning_rate": "2.5065e-05", "loss": 0.6448, "slid_loss": 0.6624, "step": 3770, "time": 72.74 }, { "epoch": 2.92, "learning_rate": "2.5056e-05", "loss": 0.6666, "slid_loss": 0.6628, "step": 3771, "time": 71.81 }, { "epoch": 2.92, "learning_rate": "2.5046e-05", "loss": 0.6896, "slid_loss": 0.6633, "step": 3772, "time": 71.27 }, { "epoch": 2.92, "learning_rate": "2.5037e-05", "loss": 0.6536, "slid_loss": 0.6629, "step": 3773, "time": 71.51 }, { "epoch": 2.92, "learning_rate": "2.5027e-05", "loss": 0.6512, "slid_loss": 0.6631, "step": 3774, "time": 70.73 }, { "epoch": 2.92, "learning_rate": "2.5018e-05", "loss": 0.6847, "slid_loss": 0.6633, "step": 3775, "time": 70.75 }, { "epoch": 2.92, "learning_rate": "2.5008e-05", "loss": 0.6667, "slid_loss": 0.6634, "step": 3776, "time": 72.23 }, { "epoch": 2.92, "learning_rate": "2.4999e-05", "loss": 0.6953, "slid_loss": 0.6637, "step": 3777, "time": 71.71 }, { "epoch": 2.92, "learning_rate": "2.4989e-05", "loss": 0.6667, "slid_loss": 0.6639, "step": 3778, "time": 72.39 }, { "epoch": 2.92, "learning_rate": "2.4980e-05", "loss": 0.632, "slid_loss": 0.6638, "step": 3779, "time": 72.3 }, { "epoch": 2.92, "learning_rate": "2.4970e-05", "loss": 0.645, "slid_loss": 0.6633, "step": 3780, "time": 72.63 }, { "epoch": 2.93, "learning_rate": "2.4961e-05", "loss": 0.6735, "slid_loss": 0.6635, "step": 3781, "time": 71.99 }, { "epoch": 2.93, "learning_rate": "2.4951e-05", "loss": 0.6313, "slid_loss": 0.6632, "step": 3782, "time": 70.89 }, { "epoch": 2.93, "learning_rate": "2.4942e-05", "loss": 0.6631, "slid_loss": 0.6628, "step": 3783, "time": 71.1 }, { "epoch": 2.93, "learning_rate": "2.4932e-05", "loss": 0.662, "slid_loss": 0.6627, "step": 3784, "time": 71.62 }, { "epoch": 2.93, "learning_rate": "2.4923e-05", "loss": 0.6121, "slid_loss": 0.6622, "step": 3785, "time": 73.64 }, { "epoch": 2.93, "learning_rate": "2.4913e-05", "loss": 0.6747, "slid_loss": 0.6622, "step": 3786, "time": 72.68 }, { "epoch": 2.93, "learning_rate": "2.4904e-05", "loss": 0.6572, "slid_loss": 0.662, "step": 3787, "time": 72.6 }, { "epoch": 2.93, "learning_rate": "2.4894e-05", "loss": 0.6615, "slid_loss": 0.6621, "step": 3788, "time": 71.17 }, { "epoch": 2.93, "learning_rate": "2.4885e-05", "loss": 0.6903, "slid_loss": 0.6624, "step": 3789, "time": 73.65 }, { "epoch": 2.93, "learning_rate": "2.4875e-05", "loss": 0.6282, "slid_loss": 0.6619, "step": 3790, "time": 71.12 }, { "epoch": 2.93, "learning_rate": "2.4866e-05", "loss": 0.6901, "slid_loss": 0.6622, "step": 3791, "time": 70.81 }, { "epoch": 2.93, "learning_rate": "2.4856e-05", "loss": 0.6886, "slid_loss": 0.6621, "step": 3792, "time": 72.94 }, { "epoch": 2.93, "learning_rate": "2.4847e-05", "loss": 0.666, "slid_loss": 0.6621, "step": 3793, "time": 71.8 }, { "epoch": 2.94, "learning_rate": "2.4837e-05", "loss": 0.6516, "slid_loss": 0.662, "step": 3794, "time": 71.99 }, { "epoch": 2.94, "learning_rate": "2.4828e-05", "loss": 0.65, "slid_loss": 0.6619, "step": 3795, "time": 71.74 }, { "epoch": 2.94, "learning_rate": "2.4818e-05", "loss": 0.7072, "slid_loss": 0.6624, "step": 3796, "time": 72.08 }, { "epoch": 2.94, "learning_rate": "2.4809e-05", "loss": 0.665, "slid_loss": 0.6622, "step": 3797, "time": 71.28 }, { "epoch": 2.94, "learning_rate": "2.4799e-05", "loss": 0.6573, "slid_loss": 0.6618, "step": 3798, "time": 72.59 }, { "epoch": 2.94, "learning_rate": "2.4790e-05", "loss": 0.6494, "slid_loss": 0.6614, "step": 3799, "time": 70.88 }, { "epoch": 2.94, "learning_rate": "2.4780e-05", "loss": 0.6588, "slid_loss": 0.6616, "step": 3800, "time": 70.6 }, { "epoch": 2.94, "learning_rate": "2.4771e-05", "loss": 0.6948, "slid_loss": 0.662, "step": 3801, "time": 756.72 }, { "epoch": 2.94, "learning_rate": "2.4761e-05", "loss": 0.6936, "slid_loss": 0.6626, "step": 3802, "time": 86.22 }, { "epoch": 2.94, "learning_rate": "2.4752e-05", "loss": 0.6404, "slid_loss": 0.6623, "step": 3803, "time": 83.75 }, { "epoch": 2.94, "learning_rate": "2.4742e-05", "loss": 0.6429, "slid_loss": 0.6621, "step": 3804, "time": 71.65 }, { "epoch": 2.94, "learning_rate": "2.4733e-05", "loss": 0.6766, "slid_loss": 0.6623, "step": 3805, "time": 72.37 }, { "epoch": 2.94, "learning_rate": "2.4723e-05", "loss": 0.6509, "slid_loss": 0.6626, "step": 3806, "time": 83.71 }, { "epoch": 2.95, "learning_rate": "2.4714e-05", "loss": 0.6979, "slid_loss": 0.6626, "step": 3807, "time": 98.18 }, { "epoch": 2.95, "learning_rate": "2.4704e-05", "loss": 0.6737, "slid_loss": 0.6627, "step": 3808, "time": 83.82 }, { "epoch": 2.95, "learning_rate": "2.4695e-05", "loss": 0.6689, "slid_loss": 0.6628, "step": 3809, "time": 98.47 }, { "epoch": 2.95, "learning_rate": "2.4685e-05", "loss": 0.676, "slid_loss": 0.6632, "step": 3810, "time": 118.92 }, { "epoch": 2.95, "learning_rate": "2.4676e-05", "loss": 0.662, "slid_loss": 0.6631, "step": 3811, "time": 124.86 }, { "epoch": 2.95, "learning_rate": "2.4666e-05", "loss": 0.705, "slid_loss": 0.6637, "step": 3812, "time": 134.1 }, { "epoch": 2.95, "learning_rate": "2.4657e-05", "loss": 0.646, "slid_loss": 0.6636, "step": 3813, "time": 145.62 }, { "epoch": 2.95, "learning_rate": "2.4647e-05", "loss": 0.6838, "slid_loss": 0.6639, "step": 3814, "time": 133.46 }, { "epoch": 2.95, "learning_rate": "2.4638e-05", "loss": 0.6654, "slid_loss": 0.6641, "step": 3815, "time": 147.45 }, { "epoch": 2.95, "learning_rate": "2.4629e-05", "loss": 0.6278, "slid_loss": 0.6639, "step": 3816, "time": 154.58 }, { "epoch": 2.95, "learning_rate": "2.4619e-05", "loss": 0.6787, "slid_loss": 0.6641, "step": 3817, "time": 130.53 }, { "epoch": 2.95, "learning_rate": "2.4610e-05", "loss": 0.6354, "slid_loss": 0.6638, "step": 3818, "time": 138.86 }, { "epoch": 2.95, "learning_rate": "2.4600e-05", "loss": 0.7121, "slid_loss": 0.6643, "step": 3819, "time": 151.4 }, { "epoch": 2.96, "learning_rate": "2.4591e-05", "loss": 0.6171, "slid_loss": 0.6638, "step": 3820, "time": 107.88 }, { "epoch": 2.96, "learning_rate": "2.4581e-05", "loss": 0.6348, "slid_loss": 0.6632, "step": 3821, "time": 95.3 }, { "epoch": 2.96, "learning_rate": "2.4572e-05", "loss": 0.626, "slid_loss": 0.6631, "step": 3822, "time": 95.81 }, { "epoch": 2.96, "learning_rate": "2.4562e-05", "loss": 0.6541, "slid_loss": 0.6628, "step": 3823, "time": 96.7 }, { "epoch": 2.96, "learning_rate": "2.4553e-05", "loss": 0.6692, "slid_loss": 0.6629, "step": 3824, "time": 83.04 }, { "epoch": 2.96, "learning_rate": "2.4543e-05", "loss": 0.6586, "slid_loss": 0.6631, "step": 3825, "time": 71.71 }, { "epoch": 2.96, "learning_rate": "2.4534e-05", "loss": 0.6629, "slid_loss": 0.6629, "step": 3826, "time": 73.49 }, { "epoch": 2.96, "learning_rate": "2.4525e-05", "loss": 0.6561, "slid_loss": 0.6625, "step": 3827, "time": 84.33 }, { "epoch": 2.96, "learning_rate": "2.4515e-05", "loss": 0.698, "slid_loss": 0.6629, "step": 3828, "time": 71.99 }, { "epoch": 2.96, "learning_rate": "2.4506e-05", "loss": 0.6555, "slid_loss": 0.6625, "step": 3829, "time": 71.96 }, { "epoch": 2.96, "learning_rate": "2.4496e-05", "loss": 0.659, "slid_loss": 0.6626, "step": 3830, "time": 71.24 }, { "epoch": 2.96, "learning_rate": "2.4487e-05", "loss": 0.6662, "slid_loss": 0.6622, "step": 3831, "time": 70.99 }, { "epoch": 2.96, "learning_rate": "2.4477e-05", "loss": 0.6696, "slid_loss": 0.6624, "step": 3832, "time": 72.62 }, { "epoch": 2.97, "learning_rate": "2.4468e-05", "loss": 0.6831, "slid_loss": 0.6626, "step": 3833, "time": 70.98 }, { "epoch": 2.97, "learning_rate": "2.4458e-05", "loss": 0.6965, "slid_loss": 0.6628, "step": 3834, "time": 73.07 }, { "epoch": 2.97, "learning_rate": "2.4449e-05", "loss": 0.6504, "slid_loss": 0.6626, "step": 3835, "time": 72.25 }, { "epoch": 2.97, "learning_rate": "2.4440e-05", "loss": 0.6478, "slid_loss": 0.6627, "step": 3836, "time": 72.12 }, { "epoch": 2.97, "learning_rate": "2.4430e-05", "loss": 0.6642, "slid_loss": 0.6627, "step": 3837, "time": 71.81 }, { "epoch": 2.97, "learning_rate": "2.4421e-05", "loss": 0.6521, "slid_loss": 0.6624, "step": 3838, "time": 71.6 }, { "epoch": 2.97, "learning_rate": "2.4411e-05", "loss": 0.6397, "slid_loss": 0.6622, "step": 3839, "time": 73.26 }, { "epoch": 2.97, "learning_rate": "2.4402e-05", "loss": 0.6635, "slid_loss": 0.6622, "step": 3840, "time": 71.08 }, { "epoch": 2.97, "learning_rate": "2.4392e-05", "loss": 0.648, "slid_loss": 0.6623, "step": 3841, "time": 73.14 }, { "epoch": 2.97, "learning_rate": "2.4383e-05", "loss": 0.6692, "slid_loss": 0.6624, "step": 3842, "time": 71.8 }, { "epoch": 2.97, "learning_rate": "2.4373e-05", "loss": 0.6475, "slid_loss": 0.6622, "step": 3843, "time": 71.54 }, { "epoch": 2.97, "learning_rate": "2.4364e-05", "loss": 0.6646, "slid_loss": 0.6625, "step": 3844, "time": 71.76 }, { "epoch": 2.97, "learning_rate": "2.4355e-05", "loss": 0.6882, "slid_loss": 0.6632, "step": 3845, "time": 72.2 }, { "epoch": 2.98, "learning_rate": "2.4345e-05", "loss": 0.6745, "slid_loss": 0.6632, "step": 3846, "time": 71.06 }, { "epoch": 2.98, "learning_rate": "2.4336e-05", "loss": 0.672, "slid_loss": 0.6629, "step": 3847, "time": 73.93 }, { "epoch": 2.98, "learning_rate": "2.4326e-05", "loss": 0.6803, "slid_loss": 0.6633, "step": 3848, "time": 71.59 }, { "epoch": 2.98, "learning_rate": "2.4317e-05", "loss": 0.6687, "slid_loss": 0.6636, "step": 3849, "time": 70.96 }, { "epoch": 2.98, "learning_rate": "2.4308e-05", "loss": 0.6388, "slid_loss": 0.6636, "step": 3850, "time": 71.04 }, { "epoch": 2.98, "learning_rate": "2.4298e-05", "loss": 0.6482, "slid_loss": 0.6633, "step": 3851, "time": 71.48 }, { "epoch": 2.98, "learning_rate": "2.4289e-05", "loss": 0.6847, "slid_loss": 0.6638, "step": 3852, "time": 72.0 }, { "epoch": 2.98, "learning_rate": "2.4279e-05", "loss": 0.6958, "slid_loss": 0.6643, "step": 3853, "time": 71.43 }, { "epoch": 2.98, "learning_rate": "2.4270e-05", "loss": 0.6707, "slid_loss": 0.6646, "step": 3854, "time": 72.48 }, { "epoch": 2.98, "learning_rate": "2.4260e-05", "loss": 0.6749, "slid_loss": 0.6646, "step": 3855, "time": 72.05 }, { "epoch": 2.98, "learning_rate": "2.4251e-05", "loss": 0.6967, "slid_loss": 0.6648, "step": 3856, "time": 71.1 }, { "epoch": 2.98, "learning_rate": "2.4242e-05", "loss": 0.6722, "slid_loss": 0.665, "step": 3857, "time": 71.91 }, { "epoch": 2.98, "learning_rate": "2.4232e-05", "loss": 0.6877, "slid_loss": 0.6653, "step": 3858, "time": 73.16 }, { "epoch": 2.99, "learning_rate": "2.4223e-05", "loss": 0.6703, "slid_loss": 0.6653, "step": 3859, "time": 71.45 }, { "epoch": 2.99, "learning_rate": "2.4213e-05", "loss": 0.6098, "slid_loss": 0.6648, "step": 3860, "time": 71.96 }, { "epoch": 2.99, "learning_rate": "2.4204e-05", "loss": 0.6571, "slid_loss": 0.6646, "step": 3861, "time": 71.85 }, { "epoch": 2.99, "learning_rate": "2.4195e-05", "loss": 0.6639, "slid_loss": 0.6646, "step": 3862, "time": 71.77 }, { "epoch": 2.99, "learning_rate": "2.4185e-05", "loss": 0.6571, "slid_loss": 0.6645, "step": 3863, "time": 71.42 }, { "epoch": 2.99, "learning_rate": "2.4176e-05", "loss": 0.6621, "slid_loss": 0.6643, "step": 3864, "time": 71.34 }, { "epoch": 2.99, "learning_rate": "2.4166e-05", "loss": 0.662, "slid_loss": 0.6642, "step": 3865, "time": 71.3 }, { "epoch": 2.99, "learning_rate": "2.4157e-05", "loss": 0.6711, "slid_loss": 0.6643, "step": 3866, "time": 71.62 }, { "epoch": 2.99, "learning_rate": "2.4148e-05", "loss": 0.6539, "slid_loss": 0.6643, "step": 3867, "time": 71.52 }, { "epoch": 2.99, "learning_rate": "2.4138e-05", "loss": 0.6104, "slid_loss": 0.6634, "step": 3868, "time": 72.66 }, { "epoch": 2.99, "learning_rate": "2.4129e-05", "loss": 0.6408, "slid_loss": 0.6633, "step": 3869, "time": 72.08 }, { "epoch": 2.99, "learning_rate": "2.4119e-05", "loss": 0.6537, "slid_loss": 0.6634, "step": 3870, "time": 71.46 }, { "epoch": 2.99, "learning_rate": "2.4110e-05", "loss": 0.6475, "slid_loss": 0.6632, "step": 3871, "time": 73.0 }, { "epoch": 3.0, "learning_rate": "2.4101e-05", "loss": 0.6823, "slid_loss": 0.6631, "step": 3872, "time": 71.8 }, { "epoch": 3.0, "learning_rate": "2.4091e-05", "loss": 0.628, "slid_loss": 0.6629, "step": 3873, "time": 70.71 }, { "epoch": 3.0, "learning_rate": "2.4082e-05", "loss": 0.6622, "slid_loss": 0.663, "step": 3874, "time": 72.04 }, { "epoch": 3.0, "learning_rate": "2.4072e-05", "loss": 0.6912, "slid_loss": 0.6631, "step": 3875, "time": 72.69 }, { "epoch": 3.0, "learning_rate": "2.4063e-05", "loss": 0.6389, "slid_loss": 0.6628, "step": 3876, "time": 72.09 }, { "epoch": 3.0, "learning_rate": "2.4054e-05", "loss": 0.6288, "slid_loss": 0.6621, "step": 3877, "time": 71.91 }, { "epoch": 3.0, "learning_rate": "2.4044e-05", "loss": 0.7237, "slid_loss": 0.6627, "step": 3878, "time": 110.71 }, { "epoch": 3.0, "learning_rate": "2.4035e-05", "loss": 0.6392, "slid_loss": 0.6628, "step": 3879, "time": 71.13 }, { "epoch": 3.0, "learning_rate": "2.4026e-05", "loss": 0.6378, "slid_loss": 0.6627, "step": 3880, "time": 72.27 }, { "epoch": 3.0, "learning_rate": "2.4016e-05", "loss": 0.6441, "slid_loss": 0.6624, "step": 3881, "time": 70.79 }, { "epoch": 3.0, "learning_rate": "2.4007e-05", "loss": 0.6458, "slid_loss": 0.6625, "step": 3882, "time": 70.99 }, { "epoch": 3.0, "learning_rate": "2.3997e-05", "loss": 0.6729, "slid_loss": 0.6626, "step": 3883, "time": 71.24 }, { "epoch": 3.01, "learning_rate": "2.3988e-05", "loss": 0.6626, "slid_loss": 0.6626, "step": 3884, "time": 72.29 }, { "epoch": 3.01, "learning_rate": "2.3979e-05", "loss": 0.6462, "slid_loss": 0.663, "step": 3885, "time": 72.07 }, { "epoch": 3.01, "learning_rate": "2.3969e-05", "loss": 0.6358, "slid_loss": 0.6626, "step": 3886, "time": 71.37 }, { "epoch": 3.01, "learning_rate": "2.3960e-05", "loss": 0.6647, "slid_loss": 0.6627, "step": 3887, "time": 72.25 }, { "epoch": 3.01, "learning_rate": "2.3951e-05", "loss": 0.6649, "slid_loss": 0.6627, "step": 3888, "time": 71.03 }, { "epoch": 3.01, "learning_rate": "2.3941e-05", "loss": 0.6599, "slid_loss": 0.6624, "step": 3889, "time": 71.6 }, { "epoch": 3.01, "learning_rate": "2.3932e-05", "loss": 0.6852, "slid_loss": 0.663, "step": 3890, "time": 71.45 }, { "epoch": 3.01, "learning_rate": "2.3923e-05", "loss": 0.6831, "slid_loss": 0.6629, "step": 3891, "time": 72.92 }, { "epoch": 3.01, "learning_rate": "2.3913e-05", "loss": 0.6595, "slid_loss": 0.6626, "step": 3892, "time": 71.57 }, { "epoch": 3.01, "learning_rate": "2.3904e-05", "loss": 0.6604, "slid_loss": 0.6626, "step": 3893, "time": 71.37 }, { "epoch": 3.01, "learning_rate": "2.3894e-05", "loss": 0.6736, "slid_loss": 0.6628, "step": 3894, "time": 71.19 }, { "epoch": 3.01, "learning_rate": "2.3885e-05", "loss": 0.6437, "slid_loss": 0.6627, "step": 3895, "time": 72.03 }, { "epoch": 3.01, "learning_rate": "2.3876e-05", "loss": 0.6998, "slid_loss": 0.6626, "step": 3896, "time": 71.72 }, { "epoch": 3.02, "learning_rate": "2.3866e-05", "loss": 0.6507, "slid_loss": 0.6625, "step": 3897, "time": 71.75 }, { "epoch": 3.02, "learning_rate": "2.3857e-05", "loss": 0.6415, "slid_loss": 0.6623, "step": 3898, "time": 72.22 }, { "epoch": 3.02, "learning_rate": "2.3848e-05", "loss": 0.6687, "slid_loss": 0.6625, "step": 3899, "time": 72.01 }, { "epoch": 3.02, "learning_rate": "2.3838e-05", "loss": 0.6352, "slid_loss": 0.6623, "step": 3900, "time": 72.49 }, { "epoch": 3.02, "learning_rate": "2.3829e-05", "loss": 0.6637, "slid_loss": 0.662, "step": 3901, "time": 70.62 }, { "epoch": 3.02, "learning_rate": "2.3820e-05", "loss": 0.6369, "slid_loss": 0.6614, "step": 3902, "time": 71.42 }, { "epoch": 3.02, "learning_rate": "2.3810e-05", "loss": 0.6558, "slid_loss": 0.6616, "step": 3903, "time": 71.64 }, { "epoch": 3.02, "learning_rate": "2.3801e-05", "loss": 0.664, "slid_loss": 0.6618, "step": 3904, "time": 72.3 }, { "epoch": 3.02, "learning_rate": "2.3792e-05", "loss": 0.6527, "slid_loss": 0.6615, "step": 3905, "time": 71.11 }, { "epoch": 3.02, "learning_rate": "2.3782e-05", "loss": 0.6677, "slid_loss": 0.6617, "step": 3906, "time": 71.89 }, { "epoch": 3.02, "learning_rate": "2.3773e-05", "loss": 0.6721, "slid_loss": 0.6615, "step": 3907, "time": 72.58 }, { "epoch": 3.02, "learning_rate": "2.3764e-05", "loss": 0.6247, "slid_loss": 0.661, "step": 3908, "time": 71.58 }, { "epoch": 3.02, "learning_rate": "2.3754e-05", "loss": 0.6495, "slid_loss": 0.6608, "step": 3909, "time": 72.24 }, { "epoch": 3.03, "learning_rate": "2.3745e-05", "loss": 0.637, "slid_loss": 0.6604, "step": 3910, "time": 72.67 }, { "epoch": 3.03, "learning_rate": "2.3736e-05", "loss": 0.6353, "slid_loss": 0.6601, "step": 3911, "time": 71.26 }, { "epoch": 3.03, "learning_rate": "2.3726e-05", "loss": 0.6575, "slid_loss": 0.6596, "step": 3912, "time": 71.72 }, { "epoch": 3.03, "learning_rate": "2.3717e-05", "loss": 0.674, "slid_loss": 0.6599, "step": 3913, "time": 71.16 }, { "epoch": 3.03, "learning_rate": "2.3708e-05", "loss": 0.6705, "slid_loss": 0.6598, "step": 3914, "time": 71.57 }, { "epoch": 3.03, "learning_rate": "2.3698e-05", "loss": 0.6555, "slid_loss": 0.6597, "step": 3915, "time": 71.36 }, { "epoch": 3.03, "learning_rate": "2.3689e-05", "loss": 0.6964, "slid_loss": 0.6604, "step": 3916, "time": 71.2 }, { "epoch": 3.03, "learning_rate": "2.3680e-05", "loss": 0.6514, "slid_loss": 0.6601, "step": 3917, "time": 72.04 }, { "epoch": 3.03, "learning_rate": "2.3670e-05", "loss": 0.683, "slid_loss": 0.6606, "step": 3918, "time": 72.26 }, { "epoch": 3.03, "learning_rate": "2.3661e-05", "loss": 0.6643, "slid_loss": 0.6601, "step": 3919, "time": 70.89 }, { "epoch": 3.03, "learning_rate": "2.3652e-05", "loss": 0.6667, "slid_loss": 0.6606, "step": 3920, "time": 70.97 }, { "epoch": 3.03, "learning_rate": "2.3642e-05", "loss": 0.6651, "slid_loss": 0.6609, "step": 3921, "time": 72.04 }, { "epoch": 3.03, "learning_rate": "2.3633e-05", "loss": 0.6766, "slid_loss": 0.6614, "step": 3922, "time": 70.68 }, { "epoch": 3.04, "learning_rate": "2.3624e-05", "loss": 0.6455, "slid_loss": 0.6613, "step": 3923, "time": 73.87 }, { "epoch": 3.04, "learning_rate": "2.3614e-05", "loss": 0.6767, "slid_loss": 0.6614, "step": 3924, "time": 72.58 }, { "epoch": 3.04, "learning_rate": "2.3605e-05", "loss": 0.6349, "slid_loss": 0.6612, "step": 3925, "time": 71.19 }, { "epoch": 3.04, "learning_rate": "2.3596e-05", "loss": 0.6431, "slid_loss": 0.661, "step": 3926, "time": 72.71 }, { "epoch": 3.04, "learning_rate": "2.3587e-05", "loss": 0.6594, "slid_loss": 0.661, "step": 3927, "time": 70.74 }, { "epoch": 3.04, "learning_rate": "2.3577e-05", "loss": 0.6398, "slid_loss": 0.6604, "step": 3928, "time": 71.87 }, { "epoch": 3.04, "learning_rate": "2.3568e-05", "loss": 0.6452, "slid_loss": 0.6603, "step": 3929, "time": 71.24 }, { "epoch": 3.04, "learning_rate": "2.3559e-05", "loss": 0.6601, "slid_loss": 0.6603, "step": 3930, "time": 72.02 }, { "epoch": 3.04, "learning_rate": "2.3549e-05", "loss": 0.6243, "slid_loss": 0.6599, "step": 3931, "time": 72.09 }, { "epoch": 3.04, "learning_rate": "2.3540e-05", "loss": 0.7123, "slid_loss": 0.6603, "step": 3932, "time": 70.67 }, { "epoch": 3.04, "learning_rate": "2.3531e-05", "loss": 0.6384, "slid_loss": 0.6599, "step": 3933, "time": 71.32 }, { "epoch": 3.04, "learning_rate": "2.3521e-05", "loss": 0.6323, "slid_loss": 0.6592, "step": 3934, "time": 72.14 }, { "epoch": 3.04, "learning_rate": "2.3512e-05", "loss": 0.6445, "slid_loss": 0.6592, "step": 3935, "time": 70.1 }, { "epoch": 3.05, "learning_rate": "2.3503e-05", "loss": 0.6983, "slid_loss": 0.6597, "step": 3936, "time": 71.44 }, { "epoch": 3.05, "learning_rate": "2.3494e-05", "loss": 0.702, "slid_loss": 0.6601, "step": 3937, "time": 70.73 }, { "epoch": 3.05, "learning_rate": "2.3484e-05", "loss": 0.6518, "slid_loss": 0.6601, "step": 3938, "time": 71.69 }, { "epoch": 3.05, "learning_rate": "2.3475e-05", "loss": 0.6889, "slid_loss": 0.6605, "step": 3939, "time": 71.12 }, { "epoch": 3.05, "learning_rate": "2.3466e-05", "loss": 0.654, "slid_loss": 0.6604, "step": 3940, "time": 72.05 }, { "epoch": 3.05, "learning_rate": "2.3456e-05", "loss": 0.6566, "slid_loss": 0.6605, "step": 3941, "time": 71.57 }, { "epoch": 3.05, "learning_rate": "2.3447e-05", "loss": 0.6766, "slid_loss": 0.6606, "step": 3942, "time": 70.53 }, { "epoch": 3.05, "learning_rate": "2.3438e-05", "loss": 0.6687, "slid_loss": 0.6608, "step": 3943, "time": 72.29 }, { "epoch": 3.05, "learning_rate": "2.3429e-05", "loss": 0.6771, "slid_loss": 0.6609, "step": 3944, "time": 71.7 }, { "epoch": 3.05, "learning_rate": "2.3419e-05", "loss": 0.6544, "slid_loss": 0.6606, "step": 3945, "time": 70.99 }, { "epoch": 3.05, "learning_rate": "2.3410e-05", "loss": 0.6743, "slid_loss": 0.6606, "step": 3946, "time": 71.72 }, { "epoch": 3.05, "learning_rate": "2.3401e-05", "loss": 0.6288, "slid_loss": 0.6602, "step": 3947, "time": 72.36 }, { "epoch": 3.05, "learning_rate": "2.3391e-05", "loss": 0.6786, "slid_loss": 0.6602, "step": 3948, "time": 70.87 }, { "epoch": 3.06, "learning_rate": "2.3382e-05", "loss": 0.6646, "slid_loss": 0.6601, "step": 3949, "time": 72.35 }, { "epoch": 3.06, "learning_rate": "2.3373e-05", "loss": 0.64, "slid_loss": 0.6601, "step": 3950, "time": 72.42 }, { "epoch": 3.06, "learning_rate": "2.3364e-05", "loss": 0.6266, "slid_loss": 0.6599, "step": 3951, "time": 70.5 }, { "epoch": 3.06, "learning_rate": "2.3354e-05", "loss": 0.6712, "slid_loss": 0.6598, "step": 3952, "time": 71.1 }, { "epoch": 3.06, "learning_rate": "2.3345e-05", "loss": 0.6772, "slid_loss": 0.6596, "step": 3953, "time": 71.88 }, { "epoch": 3.06, "learning_rate": "2.3336e-05", "loss": 0.6846, "slid_loss": 0.6597, "step": 3954, "time": 71.94 }, { "epoch": 3.06, "learning_rate": "2.3327e-05", "loss": 0.662, "slid_loss": 0.6596, "step": 3955, "time": 71.85 }, { "epoch": 3.06, "learning_rate": "2.3317e-05", "loss": 0.6265, "slid_loss": 0.6589, "step": 3956, "time": 72.91 }, { "epoch": 3.06, "learning_rate": "2.3308e-05", "loss": 0.6768, "slid_loss": 0.6589, "step": 3957, "time": 72.92 }, { "epoch": 3.06, "learning_rate": "2.3299e-05", "loss": 0.6407, "slid_loss": 0.6585, "step": 3958, "time": 71.3 }, { "epoch": 3.06, "learning_rate": "2.3290e-05", "loss": 0.674, "slid_loss": 0.6585, "step": 3959, "time": 71.98 }, { "epoch": 3.06, "learning_rate": "2.3280e-05", "loss": 0.6626, "slid_loss": 0.659, "step": 3960, "time": 95.41 }, { "epoch": 3.06, "learning_rate": "2.3271e-05", "loss": 0.6536, "slid_loss": 0.659, "step": 3961, "time": 89.79 }, { "epoch": 3.07, "learning_rate": "2.3262e-05", "loss": 0.6274, "slid_loss": 0.6586, "step": 3962, "time": 87.19 }, { "epoch": 3.07, "learning_rate": "2.3253e-05", "loss": 0.6626, "slid_loss": 0.6587, "step": 3963, "time": 72.61 }, { "epoch": 3.07, "learning_rate": "2.3243e-05", "loss": 0.6826, "slid_loss": 0.6589, "step": 3964, "time": 71.11 }, { "epoch": 3.07, "learning_rate": "2.3234e-05", "loss": 0.6572, "slid_loss": 0.6589, "step": 3965, "time": 117.43 }, { "epoch": 3.07, "learning_rate": "2.3225e-05", "loss": 0.6932, "slid_loss": 0.6591, "step": 3966, "time": 119.57 }, { "epoch": 3.07, "learning_rate": "2.3216e-05", "loss": 0.6463, "slid_loss": 0.659, "step": 3967, "time": 86.22 }, { "epoch": 3.07, "learning_rate": "2.3206e-05", "loss": 0.6787, "slid_loss": 0.6597, "step": 3968, "time": 100.64 }, { "epoch": 3.07, "learning_rate": "2.3197e-05", "loss": 0.6733, "slid_loss": 0.66, "step": 3969, "time": 108.07 }, { "epoch": 3.07, "learning_rate": "2.3188e-05", "loss": 0.6791, "slid_loss": 0.6603, "step": 3970, "time": 104.69 }, { "epoch": 3.07, "learning_rate": "2.3179e-05", "loss": 0.6484, "slid_loss": 0.6603, "step": 3971, "time": 167.09 }, { "epoch": 3.07, "learning_rate": "2.3169e-05", "loss": 0.659, "slid_loss": 0.66, "step": 3972, "time": 197.34 }, { "epoch": 3.07, "learning_rate": "2.3160e-05", "loss": 0.6328, "slid_loss": 0.6601, "step": 3973, "time": 146.71 }, { "epoch": 3.07, "learning_rate": "2.3151e-05", "loss": 0.6507, "slid_loss": 0.66, "step": 3974, "time": 162.87 }, { "epoch": 3.08, "learning_rate": "2.3142e-05", "loss": 0.6581, "slid_loss": 0.6596, "step": 3975, "time": 150.78 }, { "epoch": 3.08, "learning_rate": "2.3132e-05", "loss": 0.6435, "slid_loss": 0.6597, "step": 3976, "time": 162.78 }, { "epoch": 3.08, "learning_rate": "2.3123e-05", "loss": 0.6491, "slid_loss": 0.6599, "step": 3977, "time": 182.59 }, { "epoch": 3.08, "learning_rate": "2.3114e-05", "loss": 0.6515, "slid_loss": 0.6592, "step": 3978, "time": 86.46 }, { "epoch": 3.08, "learning_rate": "2.3105e-05", "loss": 0.6445, "slid_loss": 0.6592, "step": 3979, "time": 149.68 }, { "epoch": 3.08, "learning_rate": "2.3096e-05", "loss": 0.6549, "slid_loss": 0.6594, "step": 3980, "time": 114.48 }, { "epoch": 3.08, "learning_rate": "2.3086e-05", "loss": 0.6591, "slid_loss": 0.6595, "step": 3981, "time": 101.52 }, { "epoch": 3.08, "learning_rate": "2.3077e-05", "loss": 0.6557, "slid_loss": 0.6596, "step": 3982, "time": 103.04 }, { "epoch": 3.08, "learning_rate": "2.3068e-05", "loss": 0.6794, "slid_loss": 0.6597, "step": 3983, "time": 85.02 }, { "epoch": 3.08, "learning_rate": "2.3059e-05", "loss": 0.6719, "slid_loss": 0.6598, "step": 3984, "time": 72.35 }, { "epoch": 3.08, "learning_rate": "2.3049e-05", "loss": 0.6826, "slid_loss": 0.6602, "step": 3985, "time": 71.61 }, { "epoch": 3.08, "learning_rate": "2.3040e-05", "loss": 0.652, "slid_loss": 0.6603, "step": 3986, "time": 86.81 }, { "epoch": 3.08, "learning_rate": "2.3031e-05", "loss": 0.6584, "slid_loss": 0.6603, "step": 3987, "time": 71.45 }, { "epoch": 3.09, "learning_rate": "2.3022e-05", "loss": 0.6712, "slid_loss": 0.6603, "step": 3988, "time": 72.04 }, { "epoch": 3.09, "learning_rate": "2.3013e-05", "loss": 0.6328, "slid_loss": 0.66, "step": 3989, "time": 71.35 }, { "epoch": 3.09, "learning_rate": "2.3003e-05", "loss": 0.6686, "slid_loss": 0.6599, "step": 3990, "time": 72.71 }, { "epoch": 3.09, "learning_rate": "2.2994e-05", "loss": 0.6755, "slid_loss": 0.6598, "step": 3991, "time": 71.54 }, { "epoch": 3.09, "learning_rate": "2.2985e-05", "loss": 0.6484, "slid_loss": 0.6597, "step": 3992, "time": 73.22 }, { "epoch": 3.09, "learning_rate": "2.2976e-05", "loss": 0.6647, "slid_loss": 0.6597, "step": 3993, "time": 71.82 }, { "epoch": 3.09, "learning_rate": "2.2967e-05", "loss": 0.6666, "slid_loss": 0.6597, "step": 3994, "time": 72.06 }, { "epoch": 3.09, "learning_rate": "2.2957e-05", "loss": 0.6694, "slid_loss": 0.6599, "step": 3995, "time": 71.9 }, { "epoch": 3.09, "learning_rate": "2.2948e-05", "loss": 0.6752, "slid_loss": 0.6597, "step": 3996, "time": 72.56 }, { "epoch": 3.09, "learning_rate": "2.2939e-05", "loss": 0.6221, "slid_loss": 0.6594, "step": 3997, "time": 72.89 }, { "epoch": 3.09, "learning_rate": "2.2930e-05", "loss": 0.623, "slid_loss": 0.6592, "step": 3998, "time": 73.92 }, { "epoch": 3.09, "learning_rate": "2.2921e-05", "loss": 0.6885, "slid_loss": 0.6594, "step": 3999, "time": 71.85 }, { "epoch": 3.09, "learning_rate": "2.2911e-05", "loss": 0.6775, "slid_loss": 0.6598, "step": 4000, "time": 72.37 }, { "epoch": 3.1, "learning_rate": "2.2902e-05", "loss": 0.6278, "slid_loss": 0.6595, "step": 4001, "time": 839.26 }, { "epoch": 3.1, "learning_rate": "2.2893e-05", "loss": 0.6645, "slid_loss": 0.6597, "step": 4002, "time": 72.16 }, { "epoch": 3.1, "learning_rate": "2.2884e-05", "loss": 0.6277, "slid_loss": 0.6595, "step": 4003, "time": 72.0 }, { "epoch": 3.1, "learning_rate": "2.2875e-05", "loss": 0.6854, "slid_loss": 0.6597, "step": 4004, "time": 71.78 }, { "epoch": 3.1, "learning_rate": "2.2866e-05", "loss": 0.6615, "slid_loss": 0.6598, "step": 4005, "time": 71.72 }, { "epoch": 3.1, "learning_rate": "2.2856e-05", "loss": 0.6575, "slid_loss": 0.6597, "step": 4006, "time": 71.71 }, { "epoch": 3.1, "learning_rate": "2.2847e-05", "loss": 0.6537, "slid_loss": 0.6595, "step": 4007, "time": 71.53 }, { "epoch": 3.1, "learning_rate": "2.2838e-05", "loss": 0.6603, "slid_loss": 0.6598, "step": 4008, "time": 71.6 }, { "epoch": 3.1, "learning_rate": "2.2829e-05", "loss": 0.6293, "slid_loss": 0.6596, "step": 4009, "time": 70.99 }, { "epoch": 3.1, "learning_rate": "2.2820e-05", "loss": 0.6672, "slid_loss": 0.6599, "step": 4010, "time": 71.93 }, { "epoch": 3.1, "learning_rate": "2.2810e-05", "loss": 0.6753, "slid_loss": 0.6603, "step": 4011, "time": 72.55 }, { "epoch": 3.1, "learning_rate": "2.2801e-05", "loss": 0.6891, "slid_loss": 0.6607, "step": 4012, "time": 72.29 }, { "epoch": 3.1, "learning_rate": "2.2792e-05", "loss": 0.6348, "slid_loss": 0.6603, "step": 4013, "time": 70.55 }, { "epoch": 3.11, "learning_rate": "2.2783e-05", "loss": 0.6419, "slid_loss": 0.66, "step": 4014, "time": 71.67 }, { "epoch": 3.11, "learning_rate": "2.2774e-05", "loss": 0.6682, "slid_loss": 0.6601, "step": 4015, "time": 72.44 }, { "epoch": 3.11, "learning_rate": "2.2765e-05", "loss": 0.6663, "slid_loss": 0.6598, "step": 4016, "time": 70.65 }, { "epoch": 3.11, "learning_rate": "2.2756e-05", "loss": 0.6658, "slid_loss": 0.6599, "step": 4017, "time": 72.45 }, { "epoch": 3.11, "learning_rate": "2.2746e-05", "loss": 0.6943, "slid_loss": 0.6601, "step": 4018, "time": 70.92 }, { "epoch": 3.11, "learning_rate": "2.2737e-05", "loss": 0.6471, "slid_loss": 0.6599, "step": 4019, "time": 72.79 }, { "epoch": 3.11, "learning_rate": "2.2728e-05", "loss": 0.6717, "slid_loss": 0.6599, "step": 4020, "time": 71.27 }, { "epoch": 3.11, "learning_rate": "2.2719e-05", "loss": 0.6788, "slid_loss": 0.6601, "step": 4021, "time": 72.27 }, { "epoch": 3.11, "learning_rate": "2.2710e-05", "loss": 0.6316, "slid_loss": 0.6596, "step": 4022, "time": 71.94 }, { "epoch": 3.11, "learning_rate": "2.2701e-05", "loss": 0.7111, "slid_loss": 0.6603, "step": 4023, "time": 71.01 }, { "epoch": 3.11, "learning_rate": "2.2691e-05", "loss": 0.6747, "slid_loss": 0.6603, "step": 4024, "time": 71.03 }, { "epoch": 3.11, "learning_rate": "2.2682e-05", "loss": 0.6313, "slid_loss": 0.6602, "step": 4025, "time": 71.52 }, { "epoch": 3.11, "learning_rate": "2.2673e-05", "loss": 0.6417, "slid_loss": 0.6602, "step": 4026, "time": 71.2 }, { "epoch": 3.12, "learning_rate": "2.2664e-05", "loss": 0.6504, "slid_loss": 0.6601, "step": 4027, "time": 73.24 }, { "epoch": 3.12, "learning_rate": "2.2655e-05", "loss": 0.6541, "slid_loss": 0.6603, "step": 4028, "time": 71.31 }, { "epoch": 3.12, "learning_rate": "2.2646e-05", "loss": 0.6321, "slid_loss": 0.6601, "step": 4029, "time": 72.33 }, { "epoch": 3.12, "learning_rate": "2.2637e-05", "loss": 0.6821, "slid_loss": 0.6604, "step": 4030, "time": 72.49 }, { "epoch": 3.12, "learning_rate": "2.2627e-05", "loss": 0.6121, "slid_loss": 0.6602, "step": 4031, "time": 72.07 }, { "epoch": 3.12, "learning_rate": "2.2618e-05", "loss": 0.651, "slid_loss": 0.6596, "step": 4032, "time": 72.78 }, { "epoch": 3.12, "learning_rate": "2.2609e-05", "loss": 0.6497, "slid_loss": 0.6597, "step": 4033, "time": 72.36 }, { "epoch": 3.12, "learning_rate": "2.2600e-05", "loss": 0.6325, "slid_loss": 0.6597, "step": 4034, "time": 72.91 }, { "epoch": 3.12, "learning_rate": "2.2591e-05", "loss": 0.6798, "slid_loss": 0.6601, "step": 4035, "time": 71.01 }, { "epoch": 3.12, "learning_rate": "2.2582e-05", "loss": 0.6691, "slid_loss": 0.6598, "step": 4036, "time": 72.61 }, { "epoch": 3.12, "learning_rate": "2.2573e-05", "loss": 0.6579, "slid_loss": 0.6594, "step": 4037, "time": 71.51 }, { "epoch": 3.12, "learning_rate": "2.2564e-05", "loss": 0.6998, "slid_loss": 0.6598, "step": 4038, "time": 74.32 }, { "epoch": 3.12, "learning_rate": "2.2554e-05", "loss": 0.6308, "slid_loss": 0.6592, "step": 4039, "time": 72.23 }, { "epoch": 3.13, "learning_rate": "2.2545e-05", "loss": 0.6809, "slid_loss": 0.6595, "step": 4040, "time": 71.26 }, { "epoch": 3.13, "learning_rate": "2.2536e-05", "loss": 0.6513, "slid_loss": 0.6595, "step": 4041, "time": 72.64 }, { "epoch": 3.13, "learning_rate": "2.2527e-05", "loss": 0.6772, "slid_loss": 0.6595, "step": 4042, "time": 70.34 }, { "epoch": 3.13, "learning_rate": "2.2518e-05", "loss": 0.6526, "slid_loss": 0.6593, "step": 4043, "time": 72.24 }, { "epoch": 3.13, "learning_rate": "2.2509e-05", "loss": 0.6272, "slid_loss": 0.6588, "step": 4044, "time": 71.9 }, { "epoch": 3.13, "learning_rate": "2.2500e-05", "loss": 0.6397, "slid_loss": 0.6587, "step": 4045, "time": 72.88 }, { "epoch": 3.13, "learning_rate": "2.2491e-05", "loss": 0.6528, "slid_loss": 0.6585, "step": 4046, "time": 72.33 }, { "epoch": 3.13, "learning_rate": "2.2482e-05", "loss": 0.6788, "slid_loss": 0.659, "step": 4047, "time": 71.77 }, { "epoch": 3.13, "learning_rate": "2.2472e-05", "loss": 0.6589, "slid_loss": 0.6588, "step": 4048, "time": 70.87 }, { "epoch": 3.13, "learning_rate": "2.2463e-05", "loss": 0.6528, "slid_loss": 0.6586, "step": 4049, "time": 72.23 }, { "epoch": 3.13, "learning_rate": "2.2454e-05", "loss": 0.6569, "slid_loss": 0.6588, "step": 4050, "time": 71.8 }, { "epoch": 3.13, "learning_rate": "2.2445e-05", "loss": 0.6508, "slid_loss": 0.659, "step": 4051, "time": 70.19 }, { "epoch": 3.14, "learning_rate": "2.2436e-05", "loss": 0.6762, "slid_loss": 0.6591, "step": 4052, "time": 72.82 }, { "epoch": 3.14, "learning_rate": "2.2427e-05", "loss": 0.6718, "slid_loss": 0.659, "step": 4053, "time": 71.73 }, { "epoch": 3.14, "learning_rate": "2.2418e-05", "loss": 0.6478, "slid_loss": 0.6587, "step": 4054, "time": 71.6 }, { "epoch": 3.14, "learning_rate": "2.2409e-05", "loss": 0.7039, "slid_loss": 0.6591, "step": 4055, "time": 71.45 }, { "epoch": 3.14, "learning_rate": "2.2400e-05", "loss": 0.7031, "slid_loss": 0.6599, "step": 4056, "time": 71.2 }, { "epoch": 3.14, "learning_rate": "2.2391e-05", "loss": 0.6632, "slid_loss": 0.6597, "step": 4057, "time": 70.64 }, { "epoch": 3.14, "learning_rate": "2.2382e-05", "loss": 0.6585, "slid_loss": 0.6599, "step": 4058, "time": 70.65 }, { "epoch": 3.14, "learning_rate": "2.2372e-05", "loss": 0.6559, "slid_loss": 0.6597, "step": 4059, "time": 71.73 }, { "epoch": 3.14, "learning_rate": "2.2363e-05", "loss": 0.6538, "slid_loss": 0.6596, "step": 4060, "time": 71.78 }, { "epoch": 3.14, "learning_rate": "2.2354e-05", "loss": 0.6587, "slid_loss": 0.6597, "step": 4061, "time": 71.8 }, { "epoch": 3.14, "learning_rate": "2.2345e-05", "loss": 0.6622, "slid_loss": 0.66, "step": 4062, "time": 72.03 }, { "epoch": 3.14, "learning_rate": "2.2336e-05", "loss": 0.6752, "slid_loss": 0.6602, "step": 4063, "time": 72.78 }, { "epoch": 3.14, "learning_rate": "2.2327e-05", "loss": 0.6734, "slid_loss": 0.6601, "step": 4064, "time": 71.73 }, { "epoch": 3.15, "learning_rate": "2.2318e-05", "loss": 0.666, "slid_loss": 0.6602, "step": 4065, "time": 71.88 }, { "epoch": 3.15, "learning_rate": "2.2309e-05", "loss": 0.6699, "slid_loss": 0.6599, "step": 4066, "time": 71.62 }, { "epoch": 3.15, "learning_rate": "2.2300e-05", "loss": 0.6115, "slid_loss": 0.6596, "step": 4067, "time": 71.65 }, { "epoch": 3.15, "learning_rate": "2.2291e-05", "loss": 0.6552, "slid_loss": 0.6593, "step": 4068, "time": 71.72 }, { "epoch": 3.15, "learning_rate": "2.2282e-05", "loss": 0.6405, "slid_loss": 0.659, "step": 4069, "time": 71.41 }, { "epoch": 3.15, "learning_rate": "2.2273e-05", "loss": 0.6292, "slid_loss": 0.6585, "step": 4070, "time": 72.52 }, { "epoch": 3.15, "learning_rate": "2.2264e-05", "loss": 0.6391, "slid_loss": 0.6584, "step": 4071, "time": 72.69 }, { "epoch": 3.15, "learning_rate": "2.2255e-05", "loss": 0.6255, "slid_loss": 0.6581, "step": 4072, "time": 70.99 }, { "epoch": 3.15, "learning_rate": "2.2245e-05", "loss": 0.6574, "slid_loss": 0.6583, "step": 4073, "time": 72.02 }, { "epoch": 3.15, "learning_rate": "2.2236e-05", "loss": 0.6613, "slid_loss": 0.6584, "step": 4074, "time": 73.15 }, { "epoch": 3.15, "learning_rate": "2.2227e-05", "loss": 0.667, "slid_loss": 0.6585, "step": 4075, "time": 71.64 }, { "epoch": 3.15, "learning_rate": "2.2218e-05", "loss": 0.6714, "slid_loss": 0.6588, "step": 4076, "time": 71.92 }, { "epoch": 3.15, "learning_rate": "2.2209e-05", "loss": 0.6482, "slid_loss": 0.6588, "step": 4077, "time": 70.67 }, { "epoch": 3.16, "learning_rate": "2.2200e-05", "loss": 0.6754, "slid_loss": 0.659, "step": 4078, "time": 72.89 }, { "epoch": 3.16, "learning_rate": "2.2191e-05", "loss": 0.6723, "slid_loss": 0.6593, "step": 4079, "time": 70.76 }, { "epoch": 3.16, "learning_rate": "2.2182e-05", "loss": 0.6438, "slid_loss": 0.6592, "step": 4080, "time": 70.85 }, { "epoch": 3.16, "learning_rate": "2.2173e-05", "loss": 0.651, "slid_loss": 0.6591, "step": 4081, "time": 70.8 }, { "epoch": 3.16, "learning_rate": "2.2164e-05", "loss": 0.6453, "slid_loss": 0.659, "step": 4082, "time": 72.1 }, { "epoch": 3.16, "learning_rate": "2.2155e-05", "loss": 0.6361, "slid_loss": 0.6586, "step": 4083, "time": 71.33 }, { "epoch": 3.16, "learning_rate": "2.2146e-05", "loss": 0.6542, "slid_loss": 0.6584, "step": 4084, "time": 72.52 }, { "epoch": 3.16, "learning_rate": "2.2137e-05", "loss": 0.6422, "slid_loss": 0.658, "step": 4085, "time": 72.64 }, { "epoch": 3.16, "learning_rate": "2.2128e-05", "loss": 0.6307, "slid_loss": 0.6578, "step": 4086, "time": 70.72 }, { "epoch": 3.16, "learning_rate": "2.2119e-05", "loss": 0.6704, "slid_loss": 0.6579, "step": 4087, "time": 71.63 }, { "epoch": 3.16, "learning_rate": "2.2110e-05", "loss": 0.7064, "slid_loss": 0.6583, "step": 4088, "time": 71.46 }, { "epoch": 3.16, "learning_rate": "2.2101e-05", "loss": 0.6211, "slid_loss": 0.6581, "step": 4089, "time": 71.56 }, { "epoch": 3.16, "learning_rate": "2.2092e-05", "loss": 0.6424, "slid_loss": 0.6579, "step": 4090, "time": 70.83 }, { "epoch": 3.17, "learning_rate": "2.2083e-05", "loss": 0.6861, "slid_loss": 0.658, "step": 4091, "time": 73.13 }, { "epoch": 3.17, "learning_rate": "2.2074e-05", "loss": 0.6697, "slid_loss": 0.6582, "step": 4092, "time": 71.1 }, { "epoch": 3.17, "learning_rate": "2.2065e-05", "loss": 0.6636, "slid_loss": 0.6582, "step": 4093, "time": 71.24 }, { "epoch": 3.17, "learning_rate": "2.2056e-05", "loss": 0.7033, "slid_loss": 0.6586, "step": 4094, "time": 70.78 }, { "epoch": 3.17, "learning_rate": "2.2047e-05", "loss": 0.675, "slid_loss": 0.6586, "step": 4095, "time": 72.01 }, { "epoch": 3.17, "learning_rate": "2.2038e-05", "loss": 0.6467, "slid_loss": 0.6583, "step": 4096, "time": 71.02 }, { "epoch": 3.17, "learning_rate": "2.2029e-05", "loss": 0.6915, "slid_loss": 0.659, "step": 4097, "time": 72.03 }, { "epoch": 3.17, "learning_rate": "2.2020e-05", "loss": 0.6297, "slid_loss": 0.6591, "step": 4098, "time": 71.47 }, { "epoch": 3.17, "learning_rate": "2.2011e-05", "loss": 0.6524, "slid_loss": 0.6587, "step": 4099, "time": 70.77 }, { "epoch": 3.17, "learning_rate": "2.2002e-05", "loss": 0.6353, "slid_loss": 0.6583, "step": 4100, "time": 70.03 }, { "epoch": 3.17, "learning_rate": "2.1993e-05", "loss": 0.6652, "slid_loss": 0.6587, "step": 4101, "time": 71.99 }, { "epoch": 3.17, "learning_rate": "2.1984e-05", "loss": 0.6517, "slid_loss": 0.6585, "step": 4102, "time": 71.34 }, { "epoch": 3.17, "learning_rate": "2.1975e-05", "loss": 0.6469, "slid_loss": 0.6587, "step": 4103, "time": 72.22 }, { "epoch": 3.18, "learning_rate": "2.1966e-05", "loss": 0.6553, "slid_loss": 0.6584, "step": 4104, "time": 72.61 }, { "epoch": 3.18, "learning_rate": "2.1957e-05", "loss": 0.6736, "slid_loss": 0.6586, "step": 4105, "time": 70.58 }, { "epoch": 3.18, "learning_rate": "2.1948e-05", "loss": 0.6773, "slid_loss": 0.6588, "step": 4106, "time": 71.09 }, { "epoch": 3.18, "learning_rate": "2.1939e-05", "loss": 0.6107, "slid_loss": 0.6583, "step": 4107, "time": 70.83 }, { "epoch": 3.18, "learning_rate": "2.1930e-05", "loss": 0.6562, "slid_loss": 0.6583, "step": 4108, "time": 71.93 }, { "epoch": 3.18, "learning_rate": "2.1921e-05", "loss": 0.6348, "slid_loss": 0.6583, "step": 4109, "time": 71.47 }, { "epoch": 3.18, "learning_rate": "2.1912e-05", "loss": 0.6341, "slid_loss": 0.658, "step": 4110, "time": 72.59 }, { "epoch": 3.18, "learning_rate": "2.1903e-05", "loss": 0.6602, "slid_loss": 0.6579, "step": 4111, "time": 70.09 }, { "epoch": 3.18, "learning_rate": "2.1894e-05", "loss": 0.6615, "slid_loss": 0.6576, "step": 4112, "time": 72.62 }, { "epoch": 3.18, "learning_rate": "2.1885e-05", "loss": 0.7038, "slid_loss": 0.6583, "step": 4113, "time": 71.22 }, { "epoch": 3.18, "learning_rate": "2.1876e-05", "loss": 0.6403, "slid_loss": 0.6583, "step": 4114, "time": 72.67 }, { "epoch": 3.18, "learning_rate": "2.1867e-05", "loss": 0.6719, "slid_loss": 0.6583, "step": 4115, "time": 71.82 }, { "epoch": 3.18, "learning_rate": "2.1858e-05", "loss": 0.6631, "slid_loss": 0.6583, "step": 4116, "time": 72.65 }, { "epoch": 3.19, "learning_rate": "2.1849e-05", "loss": 0.6876, "slid_loss": 0.6585, "step": 4117, "time": 89.12 }, { "epoch": 3.19, "learning_rate": "2.1840e-05", "loss": 0.6279, "slid_loss": 0.6578, "step": 4118, "time": 71.42 }, { "epoch": 3.19, "learning_rate": "2.1831e-05", "loss": 0.6799, "slid_loss": 0.6581, "step": 4119, "time": 83.05 }, { "epoch": 3.19, "learning_rate": "2.1822e-05", "loss": 0.6576, "slid_loss": 0.658, "step": 4120, "time": 85.91 }, { "epoch": 3.19, "learning_rate": "2.1813e-05", "loss": 0.6951, "slid_loss": 0.6582, "step": 4121, "time": 70.23 }, { "epoch": 3.19, "learning_rate": "2.1804e-05", "loss": 0.6706, "slid_loss": 0.6586, "step": 4122, "time": 71.98 }, { "epoch": 3.19, "learning_rate": "2.1795e-05", "loss": 0.6457, "slid_loss": 0.6579, "step": 4123, "time": 71.66 }, { "epoch": 3.19, "learning_rate": "2.1786e-05", "loss": 0.6777, "slid_loss": 0.6579, "step": 4124, "time": 97.05 }, { "epoch": 3.19, "learning_rate": "2.1777e-05", "loss": 0.6614, "slid_loss": 0.6582, "step": 4125, "time": 98.29 }, { "epoch": 3.19, "learning_rate": "2.1768e-05", "loss": 0.6477, "slid_loss": 0.6583, "step": 4126, "time": 96.61 }, { "epoch": 3.19, "learning_rate": "2.1759e-05", "loss": 0.6537, "slid_loss": 0.6583, "step": 4127, "time": 98.32 }, { "epoch": 3.19, "learning_rate": "2.1750e-05", "loss": 0.6158, "slid_loss": 0.6579, "step": 4128, "time": 122.28 }, { "epoch": 3.19, "learning_rate": "2.1741e-05", "loss": 0.6778, "slid_loss": 0.6584, "step": 4129, "time": 112.34 }, { "epoch": 3.2, "learning_rate": "2.1732e-05", "loss": 0.6697, "slid_loss": 0.6583, "step": 4130, "time": 168.93 }, { "epoch": 3.2, "learning_rate": "2.1723e-05", "loss": 0.6491, "slid_loss": 0.6586, "step": 4131, "time": 149.12 }, { "epoch": 3.2, "learning_rate": "2.1714e-05", "loss": 0.6449, "slid_loss": 0.6586, "step": 4132, "time": 148.92 }, { "epoch": 3.2, "learning_rate": "2.1706e-05", "loss": 0.6764, "slid_loss": 0.6589, "step": 4133, "time": 178.8 }, { "epoch": 3.2, "learning_rate": "2.1697e-05", "loss": 0.667, "slid_loss": 0.6592, "step": 4134, "time": 122.21 }, { "epoch": 3.2, "learning_rate": "2.1688e-05", "loss": 0.6323, "slid_loss": 0.6587, "step": 4135, "time": 132.27 }, { "epoch": 3.2, "learning_rate": "2.1679e-05", "loss": 0.6717, "slid_loss": 0.6587, "step": 4136, "time": 143.18 }, { "epoch": 3.2, "learning_rate": "2.1670e-05", "loss": 0.6565, "slid_loss": 0.6587, "step": 4137, "time": 151.56 }, { "epoch": 3.2, "learning_rate": "2.1661e-05", "loss": 0.6404, "slid_loss": 0.6581, "step": 4138, "time": 84.42 }, { "epoch": 3.2, "learning_rate": "2.1652e-05", "loss": 0.6431, "slid_loss": 0.6583, "step": 4139, "time": 84.59 }, { "epoch": 3.2, "learning_rate": "2.1643e-05", "loss": 0.6666, "slid_loss": 0.6581, "step": 4140, "time": 107.81 }, { "epoch": 3.2, "learning_rate": "2.1634e-05", "loss": 0.6677, "slid_loss": 0.6583, "step": 4141, "time": 97.1 }, { "epoch": 3.2, "learning_rate": "2.1625e-05", "loss": 0.6953, "slid_loss": 0.6585, "step": 4142, "time": 94.9 }, { "epoch": 3.21, "learning_rate": "2.1616e-05", "loss": 0.6922, "slid_loss": 0.6589, "step": 4143, "time": 71.89 }, { "epoch": 3.21, "learning_rate": "2.1607e-05", "loss": 0.6323, "slid_loss": 0.6589, "step": 4144, "time": 71.63 }, { "epoch": 3.21, "learning_rate": "2.1598e-05", "loss": 0.6593, "slid_loss": 0.6591, "step": 4145, "time": 71.95 }, { "epoch": 3.21, "learning_rate": "2.1589e-05", "loss": 0.6478, "slid_loss": 0.6591, "step": 4146, "time": 83.91 }, { "epoch": 3.21, "learning_rate": "2.1581e-05", "loss": 0.6453, "slid_loss": 0.6587, "step": 4147, "time": 72.41 }, { "epoch": 3.21, "learning_rate": "2.1572e-05", "loss": 0.6039, "slid_loss": 0.6582, "step": 4148, "time": 72.57 }, { "epoch": 3.21, "learning_rate": "2.1563e-05", "loss": 0.6444, "slid_loss": 0.6581, "step": 4149, "time": 72.23 }, { "epoch": 3.21, "learning_rate": "2.1554e-05", "loss": 0.684, "slid_loss": 0.6584, "step": 4150, "time": 72.02 }, { "epoch": 3.21, "learning_rate": "2.1545e-05", "loss": 0.6821, "slid_loss": 0.6587, "step": 4151, "time": 70.81 }, { "epoch": 3.21, "learning_rate": "2.1536e-05", "loss": 0.6281, "slid_loss": 0.6582, "step": 4152, "time": 72.49 }, { "epoch": 3.21, "learning_rate": "2.1527e-05", "loss": 0.6671, "slid_loss": 0.6581, "step": 4153, "time": 71.6 }, { "epoch": 3.21, "learning_rate": "2.1518e-05", "loss": 0.6562, "slid_loss": 0.6582, "step": 4154, "time": 71.17 }, { "epoch": 3.21, "learning_rate": "2.1509e-05", "loss": 0.6673, "slid_loss": 0.6579, "step": 4155, "time": 70.82 }, { "epoch": 3.22, "learning_rate": "2.1500e-05", "loss": 0.6329, "slid_loss": 0.6572, "step": 4156, "time": 72.4 }, { "epoch": 3.22, "learning_rate": "2.1492e-05", "loss": 0.6296, "slid_loss": 0.6568, "step": 4157, "time": 72.79 }, { "epoch": 3.22, "learning_rate": "2.1483e-05", "loss": 0.6099, "slid_loss": 0.6563, "step": 4158, "time": 72.25 }, { "epoch": 3.22, "learning_rate": "2.1474e-05", "loss": 0.6639, "slid_loss": 0.6564, "step": 4159, "time": 71.57 }, { "epoch": 3.22, "learning_rate": "2.1465e-05", "loss": 0.641, "slid_loss": 0.6563, "step": 4160, "time": 72.96 }, { "epoch": 3.22, "learning_rate": "2.1456e-05", "loss": 0.6564, "slid_loss": 0.6563, "step": 4161, "time": 71.27 }, { "epoch": 3.22, "learning_rate": "2.1447e-05", "loss": 0.6177, "slid_loss": 0.6558, "step": 4162, "time": 71.13 }, { "epoch": 3.22, "learning_rate": "2.1438e-05", "loss": 0.6644, "slid_loss": 0.6557, "step": 4163, "time": 71.42 }, { "epoch": 3.22, "learning_rate": "2.1429e-05", "loss": 0.6742, "slid_loss": 0.6557, "step": 4164, "time": 71.2 }, { "epoch": 3.22, "learning_rate": "2.1420e-05", "loss": 0.6439, "slid_loss": 0.6555, "step": 4165, "time": 71.66 }, { "epoch": 3.22, "learning_rate": "2.1412e-05", "loss": 0.6644, "slid_loss": 0.6555, "step": 4166, "time": 71.38 }, { "epoch": 3.22, "learning_rate": "2.1403e-05", "loss": 0.6501, "slid_loss": 0.6558, "step": 4167, "time": 71.24 }, { "epoch": 3.22, "learning_rate": "2.1394e-05", "loss": 0.6387, "slid_loss": 0.6557, "step": 4168, "time": 72.11 }, { "epoch": 3.23, "learning_rate": "2.1385e-05", "loss": 0.6334, "slid_loss": 0.6556, "step": 4169, "time": 71.63 }, { "epoch": 3.23, "learning_rate": "2.1376e-05", "loss": 0.6782, "slid_loss": 0.6561, "step": 4170, "time": 71.97 }, { "epoch": 3.23, "learning_rate": "2.1367e-05", "loss": 0.6606, "slid_loss": 0.6563, "step": 4171, "time": 71.62 }, { "epoch": 3.23, "learning_rate": "2.1358e-05", "loss": 0.6433, "slid_loss": 0.6565, "step": 4172, "time": 73.61 }, { "epoch": 3.23, "learning_rate": "2.1350e-05", "loss": 0.6443, "slid_loss": 0.6564, "step": 4173, "time": 72.02 }, { "epoch": 3.23, "learning_rate": "2.1341e-05", "loss": 0.5997, "slid_loss": 0.6557, "step": 4174, "time": 70.37 }, { "epoch": 3.23, "learning_rate": "2.1332e-05", "loss": 0.7139, "slid_loss": 0.6562, "step": 4175, "time": 71.7 }, { "epoch": 3.23, "learning_rate": "2.1323e-05", "loss": 0.6669, "slid_loss": 0.6562, "step": 4176, "time": 71.71 }, { "epoch": 3.23, "learning_rate": "2.1314e-05", "loss": 0.6926, "slid_loss": 0.6566, "step": 4177, "time": 71.51 }, { "epoch": 3.23, "learning_rate": "2.1305e-05", "loss": 0.6837, "slid_loss": 0.6567, "step": 4178, "time": 71.62 }, { "epoch": 3.23, "learning_rate": "2.1296e-05", "loss": 0.641, "slid_loss": 0.6564, "step": 4179, "time": 70.88 }, { "epoch": 3.23, "learning_rate": "2.1288e-05", "loss": 0.6633, "slid_loss": 0.6566, "step": 4180, "time": 72.5 }, { "epoch": 3.23, "learning_rate": "2.1279e-05", "loss": 0.627, "slid_loss": 0.6563, "step": 4181, "time": 73.66 }, { "epoch": 3.24, "learning_rate": "2.1270e-05", "loss": 0.6138, "slid_loss": 0.656, "step": 4182, "time": 70.73 }, { "epoch": 3.24, "learning_rate": "2.1261e-05", "loss": 0.6452, "slid_loss": 0.6561, "step": 4183, "time": 73.07 }, { "epoch": 3.24, "learning_rate": "2.1252e-05", "loss": 0.6164, "slid_loss": 0.6557, "step": 4184, "time": 72.44 }, { "epoch": 3.24, "learning_rate": "2.1243e-05", "loss": 0.6389, "slid_loss": 0.6557, "step": 4185, "time": 71.74 }, { "epoch": 3.24, "learning_rate": "2.1235e-05", "loss": 0.6846, "slid_loss": 0.6562, "step": 4186, "time": 72.26 }, { "epoch": 3.24, "learning_rate": "2.1226e-05", "loss": 0.6623, "slid_loss": 0.6562, "step": 4187, "time": 70.98 }, { "epoch": 3.24, "learning_rate": "2.1217e-05", "loss": 0.6361, "slid_loss": 0.6555, "step": 4188, "time": 72.68 }, { "epoch": 3.24, "learning_rate": "2.1208e-05", "loss": 0.6685, "slid_loss": 0.6559, "step": 4189, "time": 70.69 }, { "epoch": 3.24, "learning_rate": "2.1199e-05", "loss": 0.6116, "slid_loss": 0.6556, "step": 4190, "time": 73.29 }, { "epoch": 3.24, "learning_rate": "2.1190e-05", "loss": 0.672, "slid_loss": 0.6555, "step": 4191, "time": 69.93 }, { "epoch": 3.24, "learning_rate": "2.1182e-05", "loss": 0.6755, "slid_loss": 0.6555, "step": 4192, "time": 71.12 }, { "epoch": 3.24, "learning_rate": "2.1173e-05", "loss": 0.67, "slid_loss": 0.6556, "step": 4193, "time": 71.23 }, { "epoch": 3.24, "learning_rate": "2.1164e-05", "loss": 0.6585, "slid_loss": 0.6552, "step": 4194, "time": 70.21 }, { "epoch": 3.25, "learning_rate": "2.1155e-05", "loss": 0.6516, "slid_loss": 0.6549, "step": 4195, "time": 71.45 }, { "epoch": 3.25, "learning_rate": "2.1146e-05", "loss": 0.5957, "slid_loss": 0.6544, "step": 4196, "time": 70.82 }, { "epoch": 3.25, "learning_rate": "2.1138e-05", "loss": 0.6382, "slid_loss": 0.6539, "step": 4197, "time": 71.44 }, { "epoch": 3.25, "learning_rate": "2.1129e-05", "loss": 0.6612, "slid_loss": 0.6542, "step": 4198, "time": 71.5 }, { "epoch": 3.25, "learning_rate": "2.1120e-05", "loss": 0.6261, "slid_loss": 0.6539, "step": 4199, "time": 71.07 }, { "epoch": 3.25, "learning_rate": "2.1111e-05", "loss": 0.6529, "slid_loss": 0.6541, "step": 4200, "time": 72.06 }, { "epoch": 3.25, "learning_rate": "2.1102e-05", "loss": 0.6583, "slid_loss": 0.654, "step": 4201, "time": 755.39 }, { "epoch": 3.25, "learning_rate": "2.1094e-05", "loss": 0.6497, "slid_loss": 0.654, "step": 4202, "time": 70.56 }, { "epoch": 3.25, "learning_rate": "2.1085e-05", "loss": 0.6595, "slid_loss": 0.6541, "step": 4203, "time": 71.67 }, { "epoch": 3.25, "learning_rate": "2.1076e-05", "loss": 0.6719, "slid_loss": 0.6543, "step": 4204, "time": 71.66 }, { "epoch": 3.25, "learning_rate": "2.1067e-05", "loss": 0.668, "slid_loss": 0.6542, "step": 4205, "time": 70.8 }, { "epoch": 3.25, "learning_rate": "2.1058e-05", "loss": 0.6434, "slid_loss": 0.6539, "step": 4206, "time": 71.61 }, { "epoch": 3.25, "learning_rate": "2.1050e-05", "loss": 0.621, "slid_loss": 0.654, "step": 4207, "time": 72.41 }, { "epoch": 3.26, "learning_rate": "2.1041e-05", "loss": 0.6687, "slid_loss": 0.6541, "step": 4208, "time": 71.0 }, { "epoch": 3.26, "learning_rate": "2.1032e-05", "loss": 0.6476, "slid_loss": 0.6543, "step": 4209, "time": 73.36 }, { "epoch": 3.26, "learning_rate": "2.1023e-05", "loss": 0.6654, "slid_loss": 0.6546, "step": 4210, "time": 71.09 }, { "epoch": 3.26, "learning_rate": "2.1014e-05", "loss": 0.6202, "slid_loss": 0.6542, "step": 4211, "time": 72.4 }, { "epoch": 3.26, "learning_rate": "2.1006e-05", "loss": 0.652, "slid_loss": 0.6541, "step": 4212, "time": 71.47 }, { "epoch": 3.26, "learning_rate": "2.0997e-05", "loss": 0.6404, "slid_loss": 0.6534, "step": 4213, "time": 73.06 }, { "epoch": 3.26, "learning_rate": "2.0988e-05", "loss": 0.6373, "slid_loss": 0.6534, "step": 4214, "time": 73.15 }, { "epoch": 3.26, "learning_rate": "2.0979e-05", "loss": 0.6719, "slid_loss": 0.6534, "step": 4215, "time": 71.83 }, { "epoch": 3.26, "learning_rate": "2.0971e-05", "loss": 0.6302, "slid_loss": 0.6531, "step": 4216, "time": 72.04 }, { "epoch": 3.26, "learning_rate": "2.0962e-05", "loss": 0.6278, "slid_loss": 0.6525, "step": 4217, "time": 71.69 }, { "epoch": 3.26, "learning_rate": "2.0953e-05", "loss": 0.6784, "slid_loss": 0.653, "step": 4218, "time": 70.97 }, { "epoch": 3.26, "learning_rate": "2.0944e-05", "loss": 0.655, "slid_loss": 0.6527, "step": 4219, "time": 71.82 }, { "epoch": 3.26, "learning_rate": "2.0936e-05", "loss": 0.6616, "slid_loss": 0.6528, "step": 4220, "time": 71.36 }, { "epoch": 3.27, "learning_rate": "2.0927e-05", "loss": 0.7227, "slid_loss": 0.6531, "step": 4221, "time": 71.45 }, { "epoch": 3.27, "learning_rate": "2.0918e-05", "loss": 0.6437, "slid_loss": 0.6528, "step": 4222, "time": 72.01 }, { "epoch": 3.27, "learning_rate": "2.0909e-05", "loss": 0.6793, "slid_loss": 0.6531, "step": 4223, "time": 72.06 }, { "epoch": 3.27, "learning_rate": "2.0900e-05", "loss": 0.6692, "slid_loss": 0.653, "step": 4224, "time": 71.93 }, { "epoch": 3.27, "learning_rate": "2.0892e-05", "loss": 0.6401, "slid_loss": 0.6528, "step": 4225, "time": 71.31 }, { "epoch": 3.27, "learning_rate": "2.0883e-05", "loss": 0.6246, "slid_loss": 0.6526, "step": 4226, "time": 71.8 }, { "epoch": 3.27, "learning_rate": "2.0874e-05", "loss": 0.6495, "slid_loss": 0.6526, "step": 4227, "time": 71.61 }, { "epoch": 3.27, "learning_rate": "2.0866e-05", "loss": 0.6533, "slid_loss": 0.6529, "step": 4228, "time": 71.18 }, { "epoch": 3.27, "learning_rate": "2.0857e-05", "loss": 0.6608, "slid_loss": 0.6528, "step": 4229, "time": 71.62 }, { "epoch": 3.27, "learning_rate": "2.0848e-05", "loss": 0.6331, "slid_loss": 0.6524, "step": 4230, "time": 71.46 }, { "epoch": 3.27, "learning_rate": "2.0839e-05", "loss": 0.6545, "slid_loss": 0.6525, "step": 4231, "time": 71.88 }, { "epoch": 3.27, "learning_rate": "2.0831e-05", "loss": 0.6608, "slid_loss": 0.6526, "step": 4232, "time": 71.37 }, { "epoch": 3.28, "learning_rate": "2.0822e-05", "loss": 0.6402, "slid_loss": 0.6522, "step": 4233, "time": 72.46 }, { "epoch": 3.28, "learning_rate": "2.0813e-05", "loss": 0.633, "slid_loss": 0.6519, "step": 4234, "time": 71.51 }, { "epoch": 3.28, "learning_rate": "2.0804e-05", "loss": 0.6054, "slid_loss": 0.6516, "step": 4235, "time": 72.22 }, { "epoch": 3.28, "learning_rate": "2.0796e-05", "loss": 0.633, "slid_loss": 0.6513, "step": 4236, "time": 72.6 }, { "epoch": 3.28, "learning_rate": "2.0787e-05", "loss": 0.6332, "slid_loss": 0.651, "step": 4237, "time": 71.35 }, { "epoch": 3.28, "learning_rate": "2.0778e-05", "loss": 0.6288, "slid_loss": 0.6509, "step": 4238, "time": 71.23 }, { "epoch": 3.28, "learning_rate": "2.0770e-05", "loss": 0.6715, "slid_loss": 0.6512, "step": 4239, "time": 72.93 }, { "epoch": 3.28, "learning_rate": "2.0761e-05", "loss": 0.6188, "slid_loss": 0.6507, "step": 4240, "time": 70.75 }, { "epoch": 3.28, "learning_rate": "2.0752e-05", "loss": 0.6415, "slid_loss": 0.6504, "step": 4241, "time": 72.28 }, { "epoch": 3.28, "learning_rate": "2.0743e-05", "loss": 0.6706, "slid_loss": 0.6502, "step": 4242, "time": 70.93 }, { "epoch": 3.28, "learning_rate": "2.0735e-05", "loss": 0.637, "slid_loss": 0.6496, "step": 4243, "time": 71.7 }, { "epoch": 3.28, "learning_rate": "2.0726e-05", "loss": 0.6458, "slid_loss": 0.6498, "step": 4244, "time": 71.67 }, { "epoch": 3.28, "learning_rate": "2.0717e-05", "loss": 0.6639, "slid_loss": 0.6498, "step": 4245, "time": 71.23 }, { "epoch": 3.29, "learning_rate": "2.0709e-05", "loss": 0.6574, "slid_loss": 0.6499, "step": 4246, "time": 72.0 }, { "epoch": 3.29, "learning_rate": "2.0700e-05", "loss": 0.6567, "slid_loss": 0.65, "step": 4247, "time": 71.44 }, { "epoch": 3.29, "learning_rate": "2.0691e-05", "loss": 0.6466, "slid_loss": 0.6505, "step": 4248, "time": 72.24 }, { "epoch": 3.29, "learning_rate": "2.0682e-05", "loss": 0.6528, "slid_loss": 0.6505, "step": 4249, "time": 72.08 }, { "epoch": 3.29, "learning_rate": "2.0674e-05", "loss": 0.7003, "slid_loss": 0.6507, "step": 4250, "time": 71.41 }, { "epoch": 3.29, "learning_rate": "2.0665e-05", "loss": 0.652, "slid_loss": 0.6504, "step": 4251, "time": 70.83 }, { "epoch": 3.29, "learning_rate": "2.0656e-05", "loss": 0.672, "slid_loss": 0.6508, "step": 4252, "time": 71.68 }, { "epoch": 3.29, "learning_rate": "2.0648e-05", "loss": 0.7035, "slid_loss": 0.6512, "step": 4253, "time": 71.58 }, { "epoch": 3.29, "learning_rate": "2.0639e-05", "loss": 0.6592, "slid_loss": 0.6512, "step": 4254, "time": 71.91 }, { "epoch": 3.29, "learning_rate": "2.0630e-05", "loss": 0.6713, "slid_loss": 0.6513, "step": 4255, "time": 72.98 }, { "epoch": 3.29, "learning_rate": "2.0622e-05", "loss": 0.6488, "slid_loss": 0.6514, "step": 4256, "time": 72.51 }, { "epoch": 3.29, "learning_rate": "2.0613e-05", "loss": 0.6471, "slid_loss": 0.6516, "step": 4257, "time": 72.24 }, { "epoch": 3.29, "learning_rate": "2.0604e-05", "loss": 0.5893, "slid_loss": 0.6514, "step": 4258, "time": 70.73 }, { "epoch": 3.3, "learning_rate": "2.0596e-05", "loss": 0.6466, "slid_loss": 0.6512, "step": 4259, "time": 72.53 }, { "epoch": 3.3, "learning_rate": "2.0587e-05", "loss": 0.6217, "slid_loss": 0.651, "step": 4260, "time": 71.4 }, { "epoch": 3.3, "learning_rate": "2.0578e-05", "loss": 0.6504, "slid_loss": 0.651, "step": 4261, "time": 72.22 }, { "epoch": 3.3, "learning_rate": "2.0570e-05", "loss": 0.6369, "slid_loss": 0.6512, "step": 4262, "time": 70.38 }, { "epoch": 3.3, "learning_rate": "2.0561e-05", "loss": 0.6282, "slid_loss": 0.6508, "step": 4263, "time": 71.09 }, { "epoch": 3.3, "learning_rate": "2.0552e-05", "loss": 0.6875, "slid_loss": 0.6509, "step": 4264, "time": 70.98 }, { "epoch": 3.3, "learning_rate": "2.0544e-05", "loss": 0.6275, "slid_loss": 0.6508, "step": 4265, "time": 73.17 }, { "epoch": 3.3, "learning_rate": "2.0535e-05", "loss": 0.6778, "slid_loss": 0.6509, "step": 4266, "time": 70.67 }, { "epoch": 3.3, "learning_rate": "2.0526e-05", "loss": 0.667, "slid_loss": 0.6511, "step": 4267, "time": 71.8 }, { "epoch": 3.3, "learning_rate": "2.0518e-05", "loss": 0.6336, "slid_loss": 0.651, "step": 4268, "time": 72.16 }, { "epoch": 3.3, "learning_rate": "2.0509e-05", "loss": 0.6248, "slid_loss": 0.6509, "step": 4269, "time": 71.77 }, { "epoch": 3.3, "learning_rate": "2.0500e-05", "loss": 0.6751, "slid_loss": 0.6509, "step": 4270, "time": 71.67 }, { "epoch": 3.3, "learning_rate": "2.0492e-05", "loss": 0.6234, "slid_loss": 0.6505, "step": 4271, "time": 70.63 }, { "epoch": 3.31, "learning_rate": "2.0483e-05", "loss": 0.6162, "slid_loss": 0.6503, "step": 4272, "time": 72.03 }, { "epoch": 3.31, "learning_rate": "2.0474e-05", "loss": 0.6301, "slid_loss": 0.6501, "step": 4273, "time": 72.25 }, { "epoch": 3.31, "learning_rate": "2.0466e-05", "loss": 0.6294, "slid_loss": 0.6504, "step": 4274, "time": 71.29 }, { "epoch": 3.31, "learning_rate": "2.0457e-05", "loss": 0.6611, "slid_loss": 0.6499, "step": 4275, "time": 72.57 }, { "epoch": 3.31, "learning_rate": "2.0449e-05", "loss": 0.6526, "slid_loss": 0.6498, "step": 4276, "time": 91.24 }, { "epoch": 3.31, "learning_rate": "2.0440e-05", "loss": 0.6226, "slid_loss": 0.6491, "step": 4277, "time": 71.98 }, { "epoch": 3.31, "learning_rate": "2.0431e-05", "loss": 0.6766, "slid_loss": 0.649, "step": 4278, "time": 72.26 }, { "epoch": 3.31, "learning_rate": "2.0423e-05", "loss": 0.6732, "slid_loss": 0.6493, "step": 4279, "time": 83.52 }, { "epoch": 3.31, "learning_rate": "2.0414e-05", "loss": 0.6396, "slid_loss": 0.6491, "step": 4280, "time": 83.92 }, { "epoch": 3.31, "learning_rate": "2.0405e-05", "loss": 0.6195, "slid_loss": 0.649, "step": 4281, "time": 70.31 }, { "epoch": 3.31, "learning_rate": "2.0397e-05", "loss": 0.6215, "slid_loss": 0.6491, "step": 4282, "time": 83.92 }, { "epoch": 3.31, "learning_rate": "2.0388e-05", "loss": 0.6719, "slid_loss": 0.6493, "step": 4283, "time": 97.36 }, { "epoch": 3.31, "learning_rate": "2.0380e-05", "loss": 0.6533, "slid_loss": 0.6497, "step": 4284, "time": 101.43 }, { "epoch": 3.32, "learning_rate": "2.0371e-05", "loss": 0.6263, "slid_loss": 0.6496, "step": 4285, "time": 87.58 }, { "epoch": 3.32, "learning_rate": "2.0362e-05", "loss": 0.6597, "slid_loss": 0.6493, "step": 4286, "time": 112.76 }, { "epoch": 3.32, "learning_rate": "2.0354e-05", "loss": 0.6875, "slid_loss": 0.6496, "step": 4287, "time": 107.24 }, { "epoch": 3.32, "learning_rate": "2.0345e-05", "loss": 0.6383, "slid_loss": 0.6496, "step": 4288, "time": 135.66 }, { "epoch": 3.32, "learning_rate": "2.0337e-05", "loss": 0.6207, "slid_loss": 0.6491, "step": 4289, "time": 159.91 }, { "epoch": 3.32, "learning_rate": "2.0328e-05", "loss": 0.643, "slid_loss": 0.6494, "step": 4290, "time": 148.88 }, { "epoch": 3.32, "learning_rate": "2.0319e-05", "loss": 0.6657, "slid_loss": 0.6494, "step": 4291, "time": 164.05 }, { "epoch": 3.32, "learning_rate": "2.0311e-05", "loss": 0.6407, "slid_loss": 0.649, "step": 4292, "time": 136.29 }, { "epoch": 3.32, "learning_rate": "2.0302e-05", "loss": 0.642, "slid_loss": 0.6488, "step": 4293, "time": 127.13 }, { "epoch": 3.32, "learning_rate": "2.0294e-05", "loss": 0.6212, "slid_loss": 0.6484, "step": 4294, "time": 110.82 }, { "epoch": 3.32, "learning_rate": "2.0285e-05", "loss": 0.6584, "slid_loss": 0.6484, "step": 4295, "time": 151.32 }, { "epoch": 3.32, "learning_rate": "2.0276e-05", "loss": 0.6347, "slid_loss": 0.6488, "step": 4296, "time": 134.56 }, { "epoch": 3.32, "learning_rate": "2.0268e-05", "loss": 0.6548, "slid_loss": 0.649, "step": 4297, "time": 108.71 }, { "epoch": 3.33, "learning_rate": "2.0259e-05", "loss": 0.6599, "slid_loss": 0.649, "step": 4298, "time": 94.15 }, { "epoch": 3.33, "learning_rate": "2.0251e-05", "loss": 0.6372, "slid_loss": 0.6491, "step": 4299, "time": 98.48 }, { "epoch": 3.33, "learning_rate": "2.0242e-05", "loss": 0.6508, "slid_loss": 0.6491, "step": 4300, "time": 98.43 }, { "epoch": 3.33, "learning_rate": "2.0233e-05", "loss": 0.6733, "slid_loss": 0.6492, "step": 4301, "time": 96.24 }, { "epoch": 3.33, "learning_rate": "2.0225e-05", "loss": 0.6101, "slid_loss": 0.6488, "step": 4302, "time": 70.52 }, { "epoch": 3.33, "learning_rate": "2.0216e-05", "loss": 0.6521, "slid_loss": 0.6488, "step": 4303, "time": 72.42 }, { "epoch": 3.33, "learning_rate": "2.0208e-05", "loss": 0.659, "slid_loss": 0.6486, "step": 4304, "time": 70.16 }, { "epoch": 3.33, "learning_rate": "2.0199e-05", "loss": 0.6504, "slid_loss": 0.6485, "step": 4305, "time": 84.47 }, { "epoch": 3.33, "learning_rate": "2.0191e-05", "loss": 0.6348, "slid_loss": 0.6484, "step": 4306, "time": 72.27 }, { "epoch": 3.33, "learning_rate": "2.0182e-05", "loss": 0.66, "slid_loss": 0.6488, "step": 4307, "time": 70.58 }, { "epoch": 3.33, "learning_rate": "2.0174e-05", "loss": 0.6064, "slid_loss": 0.6481, "step": 4308, "time": 72.68 }, { "epoch": 3.33, "learning_rate": "2.0165e-05", "loss": 0.6757, "slid_loss": 0.6484, "step": 4309, "time": 71.9 }, { "epoch": 3.33, "learning_rate": "2.0156e-05", "loss": 0.6406, "slid_loss": 0.6482, "step": 4310, "time": 72.56 }, { "epoch": 3.34, "learning_rate": "2.0148e-05", "loss": 0.6826, "slid_loss": 0.6488, "step": 4311, "time": 71.03 }, { "epoch": 3.34, "learning_rate": "2.0139e-05", "loss": 0.649, "slid_loss": 0.6488, "step": 4312, "time": 71.9 }, { "epoch": 3.34, "learning_rate": "2.0131e-05", "loss": 0.6406, "slid_loss": 0.6488, "step": 4313, "time": 71.53 }, { "epoch": 3.34, "learning_rate": "2.0122e-05", "loss": 0.6463, "slid_loss": 0.6489, "step": 4314, "time": 71.18 }, { "epoch": 3.34, "learning_rate": "2.0114e-05", "loss": 0.6788, "slid_loss": 0.6489, "step": 4315, "time": 72.82 }, { "epoch": 3.34, "learning_rate": "2.0105e-05", "loss": 0.6389, "slid_loss": 0.649, "step": 4316, "time": 70.62 }, { "epoch": 3.34, "learning_rate": "2.0097e-05", "loss": 0.6615, "slid_loss": 0.6494, "step": 4317, "time": 73.38 }, { "epoch": 3.34, "learning_rate": "2.0088e-05", "loss": 0.6465, "slid_loss": 0.649, "step": 4318, "time": 70.59 }, { "epoch": 3.34, "learning_rate": "2.0080e-05", "loss": 0.6454, "slid_loss": 0.6489, "step": 4319, "time": 71.63 }, { "epoch": 3.34, "learning_rate": "2.0071e-05", "loss": 0.6397, "slid_loss": 0.6487, "step": 4320, "time": 71.84 }, { "epoch": 3.34, "learning_rate": "2.0062e-05", "loss": 0.65, "slid_loss": 0.648, "step": 4321, "time": 72.69 }, { "epoch": 3.34, "learning_rate": "2.0054e-05", "loss": 0.64, "slid_loss": 0.648, "step": 4322, "time": 73.95 }, { "epoch": 3.34, "learning_rate": "2.0045e-05", "loss": 0.6678, "slid_loss": 0.6478, "step": 4323, "time": 72.17 }, { "epoch": 3.35, "learning_rate": "2.0037e-05", "loss": 0.6554, "slid_loss": 0.6477, "step": 4324, "time": 71.28 }, { "epoch": 3.35, "learning_rate": "2.0028e-05", "loss": 0.6471, "slid_loss": 0.6478, "step": 4325, "time": 70.78 }, { "epoch": 3.35, "learning_rate": "2.0020e-05", "loss": 0.6083, "slid_loss": 0.6476, "step": 4326, "time": 72.82 }, { "epoch": 3.35, "learning_rate": "2.0011e-05", "loss": 0.6811, "slid_loss": 0.6479, "step": 4327, "time": 71.31 }, { "epoch": 3.35, "learning_rate": "2.0003e-05", "loss": 0.6561, "slid_loss": 0.648, "step": 4328, "time": 71.35 }, { "epoch": 3.35, "learning_rate": "1.9994e-05", "loss": 0.6221, "slid_loss": 0.6476, "step": 4329, "time": 72.78 }, { "epoch": 3.35, "learning_rate": "1.9986e-05", "loss": 0.6487, "slid_loss": 0.6477, "step": 4330, "time": 72.08 }, { "epoch": 3.35, "learning_rate": "1.9977e-05", "loss": 0.6458, "slid_loss": 0.6476, "step": 4331, "time": 71.63 }, { "epoch": 3.35, "learning_rate": "1.9969e-05", "loss": 0.6176, "slid_loss": 0.6472, "step": 4332, "time": 70.6 }, { "epoch": 3.35, "learning_rate": "1.9960e-05", "loss": 0.6491, "slid_loss": 0.6473, "step": 4333, "time": 72.44 }, { "epoch": 3.35, "learning_rate": "1.9952e-05", "loss": 0.6358, "slid_loss": 0.6473, "step": 4334, "time": 71.24 }, { "epoch": 3.35, "learning_rate": "1.9943e-05", "loss": 0.6534, "slid_loss": 0.6478, "step": 4335, "time": 71.45 }, { "epoch": 3.35, "learning_rate": "1.9935e-05", "loss": 0.7015, "slid_loss": 0.6485, "step": 4336, "time": 71.52 }, { "epoch": 3.36, "learning_rate": "1.9926e-05", "loss": 0.6448, "slid_loss": 0.6486, "step": 4337, "time": 72.21 }, { "epoch": 3.36, "learning_rate": "1.9918e-05", "loss": 0.6644, "slid_loss": 0.649, "step": 4338, "time": 71.03 }, { "epoch": 3.36, "learning_rate": "1.9909e-05", "loss": 0.6212, "slid_loss": 0.6485, "step": 4339, "time": 72.26 }, { "epoch": 3.36, "learning_rate": "1.9901e-05", "loss": 0.6434, "slid_loss": 0.6487, "step": 4340, "time": 71.73 }, { "epoch": 3.36, "learning_rate": "1.9892e-05", "loss": 0.6479, "slid_loss": 0.6488, "step": 4341, "time": 71.55 }, { "epoch": 3.36, "learning_rate": "1.9884e-05", "loss": 0.6415, "slid_loss": 0.6485, "step": 4342, "time": 70.93 }, { "epoch": 3.36, "learning_rate": "1.9875e-05", "loss": 0.6569, "slid_loss": 0.6487, "step": 4343, "time": 70.77 }, { "epoch": 3.36, "learning_rate": "1.9867e-05", "loss": 0.6637, "slid_loss": 0.6489, "step": 4344, "time": 72.27 }, { "epoch": 3.36, "learning_rate": "1.9859e-05", "loss": 0.6597, "slid_loss": 0.6488, "step": 4345, "time": 72.06 }, { "epoch": 3.36, "learning_rate": "1.9850e-05", "loss": 0.6012, "slid_loss": 0.6482, "step": 4346, "time": 71.73 }, { "epoch": 3.36, "learning_rate": "1.9842e-05", "loss": 0.6621, "slid_loss": 0.6483, "step": 4347, "time": 71.14 }, { "epoch": 3.36, "learning_rate": "1.9833e-05", "loss": 0.6423, "slid_loss": 0.6483, "step": 4348, "time": 70.85 }, { "epoch": 3.36, "learning_rate": "1.9825e-05", "loss": 0.6268, "slid_loss": 0.648, "step": 4349, "time": 70.98 }, { "epoch": 3.37, "learning_rate": "1.9816e-05", "loss": 0.6257, "slid_loss": 0.6473, "step": 4350, "time": 71.43 }, { "epoch": 3.37, "learning_rate": "1.9808e-05", "loss": 0.634, "slid_loss": 0.6471, "step": 4351, "time": 72.06 }, { "epoch": 3.37, "learning_rate": "1.9799e-05", "loss": 0.6374, "slid_loss": 0.6467, "step": 4352, "time": 70.32 }, { "epoch": 3.37, "learning_rate": "1.9791e-05", "loss": 0.6439, "slid_loss": 0.6461, "step": 4353, "time": 75.71 }, { "epoch": 3.37, "learning_rate": "1.9782e-05", "loss": 0.6345, "slid_loss": 0.6459, "step": 4354, "time": 72.13 }, { "epoch": 3.37, "learning_rate": "1.9774e-05", "loss": 0.6146, "slid_loss": 0.6453, "step": 4355, "time": 72.76 }, { "epoch": 3.37, "learning_rate": "1.9766e-05", "loss": 0.6626, "slid_loss": 0.6455, "step": 4356, "time": 72.03 }, { "epoch": 3.37, "learning_rate": "1.9757e-05", "loss": 0.6657, "slid_loss": 0.6456, "step": 4357, "time": 72.4 }, { "epoch": 3.37, "learning_rate": "1.9749e-05", "loss": 0.6712, "slid_loss": 0.6465, "step": 4358, "time": 72.38 }, { "epoch": 3.37, "learning_rate": "1.9740e-05", "loss": 0.6548, "slid_loss": 0.6465, "step": 4359, "time": 72.07 }, { "epoch": 3.37, "learning_rate": "1.9732e-05", "loss": 0.6318, "slid_loss": 0.6466, "step": 4360, "time": 70.21 }, { "epoch": 3.37, "learning_rate": "1.9723e-05", "loss": 0.6255, "slid_loss": 0.6464, "step": 4361, "time": 73.58 }, { "epoch": 3.37, "learning_rate": "1.9715e-05", "loss": 0.6424, "slid_loss": 0.6464, "step": 4362, "time": 71.55 }, { "epoch": 3.38, "learning_rate": "1.9706e-05", "loss": 0.648, "slid_loss": 0.6466, "step": 4363, "time": 72.86 }, { "epoch": 3.38, "learning_rate": "1.9698e-05", "loss": 0.6474, "slid_loss": 0.6462, "step": 4364, "time": 71.68 }, { "epoch": 3.38, "learning_rate": "1.9690e-05", "loss": 0.6301, "slid_loss": 0.6463, "step": 4365, "time": 73.53 }, { "epoch": 3.38, "learning_rate": "1.9681e-05", "loss": 0.6547, "slid_loss": 0.646, "step": 4366, "time": 70.96 }, { "epoch": 3.38, "learning_rate": "1.9673e-05", "loss": 0.6856, "slid_loss": 0.6462, "step": 4367, "time": 70.15 }, { "epoch": 3.38, "learning_rate": "1.9664e-05", "loss": 0.6593, "slid_loss": 0.6465, "step": 4368, "time": 72.9 }, { "epoch": 3.38, "learning_rate": "1.9656e-05", "loss": 0.6878, "slid_loss": 0.6471, "step": 4369, "time": 70.83 }, { "epoch": 3.38, "learning_rate": "1.9648e-05", "loss": 0.6431, "slid_loss": 0.6468, "step": 4370, "time": 71.59 }, { "epoch": 3.38, "learning_rate": "1.9639e-05", "loss": 0.6422, "slid_loss": 0.647, "step": 4371, "time": 71.84 }, { "epoch": 3.38, "learning_rate": "1.9631e-05", "loss": 0.6369, "slid_loss": 0.6472, "step": 4372, "time": 71.97 }, { "epoch": 3.38, "learning_rate": "1.9622e-05", "loss": 0.6236, "slid_loss": 0.6471, "step": 4373, "time": 73.17 }, { "epoch": 3.38, "learning_rate": "1.9614e-05", "loss": 0.6317, "slid_loss": 0.6471, "step": 4374, "time": 72.74 }, { "epoch": 3.38, "learning_rate": "1.9606e-05", "loss": 0.6201, "slid_loss": 0.6467, "step": 4375, "time": 72.14 }, { "epoch": 3.39, "learning_rate": "1.9597e-05", "loss": 0.66, "slid_loss": 0.6468, "step": 4376, "time": 71.45 }, { "epoch": 3.39, "learning_rate": "1.9589e-05", "loss": 0.601, "slid_loss": 0.6466, "step": 4377, "time": 71.88 }, { "epoch": 3.39, "learning_rate": "1.9580e-05", "loss": 0.6418, "slid_loss": 0.6462, "step": 4378, "time": 71.86 }, { "epoch": 3.39, "learning_rate": "1.9572e-05", "loss": 0.6215, "slid_loss": 0.6457, "step": 4379, "time": 71.11 }, { "epoch": 3.39, "learning_rate": "1.9564e-05", "loss": 0.6214, "slid_loss": 0.6455, "step": 4380, "time": 72.02 }, { "epoch": 3.39, "learning_rate": "1.9555e-05", "loss": 0.6286, "slid_loss": 0.6456, "step": 4381, "time": 71.87 }, { "epoch": 3.39, "learning_rate": "1.9547e-05", "loss": 0.6441, "slid_loss": 0.6459, "step": 4382, "time": 71.75 }, { "epoch": 3.39, "learning_rate": "1.9539e-05", "loss": 0.646, "slid_loss": 0.6456, "step": 4383, "time": 71.46 }, { "epoch": 3.39, "learning_rate": "1.9530e-05", "loss": 0.6292, "slid_loss": 0.6454, "step": 4384, "time": 73.6 }, { "epoch": 3.39, "learning_rate": "1.9522e-05", "loss": 0.6282, "slid_loss": 0.6454, "step": 4385, "time": 72.45 }, { "epoch": 3.39, "learning_rate": "1.9513e-05", "loss": 0.6437, "slid_loss": 0.6452, "step": 4386, "time": 72.26 }, { "epoch": 3.39, "learning_rate": "1.9505e-05", "loss": 0.6799, "slid_loss": 0.6451, "step": 4387, "time": 70.65 }, { "epoch": 3.39, "learning_rate": "1.9497e-05", "loss": 0.6383, "slid_loss": 0.6451, "step": 4388, "time": 71.93 }, { "epoch": 3.4, "learning_rate": "1.9488e-05", "loss": 0.6365, "slid_loss": 0.6453, "step": 4389, "time": 70.59 }, { "epoch": 3.4, "learning_rate": "1.9480e-05", "loss": 0.6712, "slid_loss": 0.6456, "step": 4390, "time": 71.29 }, { "epoch": 3.4, "learning_rate": "1.9472e-05", "loss": 0.6521, "slid_loss": 0.6454, "step": 4391, "time": 70.58 }, { "epoch": 3.4, "learning_rate": "1.9463e-05", "loss": 0.6439, "slid_loss": 0.6455, "step": 4392, "time": 70.58 }, { "epoch": 3.4, "learning_rate": "1.9455e-05", "loss": 0.5971, "slid_loss": 0.645, "step": 4393, "time": 71.32 }, { "epoch": 3.4, "learning_rate": "1.9447e-05", "loss": 0.6405, "slid_loss": 0.6452, "step": 4394, "time": 72.18 }, { "epoch": 3.4, "learning_rate": "1.9438e-05", "loss": 0.6242, "slid_loss": 0.6449, "step": 4395, "time": 72.26 }, { "epoch": 3.4, "learning_rate": "1.9430e-05", "loss": 0.6293, "slid_loss": 0.6448, "step": 4396, "time": 71.81 }, { "epoch": 3.4, "learning_rate": "1.9422e-05", "loss": 0.6441, "slid_loss": 0.6447, "step": 4397, "time": 71.38 }, { "epoch": 3.4, "learning_rate": "1.9413e-05", "loss": 0.6307, "slid_loss": 0.6444, "step": 4398, "time": 71.34 }, { "epoch": 3.4, "learning_rate": "1.9405e-05", "loss": 0.6696, "slid_loss": 0.6448, "step": 4399, "time": 71.29 }, { "epoch": 3.4, "learning_rate": "1.9397e-05", "loss": 0.6358, "slid_loss": 0.6446, "step": 4400, "time": 72.94 }, { "epoch": 3.41, "learning_rate": "1.9388e-05", "loss": 0.6178, "slid_loss": 0.644, "step": 4401, "time": 760.47 }, { "epoch": 3.41, "learning_rate": "1.9380e-05", "loss": 0.6528, "slid_loss": 0.6445, "step": 4402, "time": 71.84 }, { "epoch": 3.41, "learning_rate": "1.9372e-05", "loss": 0.6483, "slid_loss": 0.6444, "step": 4403, "time": 70.22 }, { "epoch": 3.41, "learning_rate": "1.9363e-05", "loss": 0.6539, "slid_loss": 0.6444, "step": 4404, "time": 71.67 }, { "epoch": 3.41, "learning_rate": "1.9355e-05", "loss": 0.6336, "slid_loss": 0.6442, "step": 4405, "time": 70.78 }, { "epoch": 3.41, "learning_rate": "1.9347e-05", "loss": 0.6672, "slid_loss": 0.6445, "step": 4406, "time": 71.58 }, { "epoch": 3.41, "learning_rate": "1.9338e-05", "loss": 0.6772, "slid_loss": 0.6447, "step": 4407, "time": 71.78 }, { "epoch": 3.41, "learning_rate": "1.9330e-05", "loss": 0.6477, "slid_loss": 0.6451, "step": 4408, "time": 71.4 }, { "epoch": 3.41, "learning_rate": "1.9322e-05", "loss": 0.6232, "slid_loss": 0.6446, "step": 4409, "time": 71.93 }, { "epoch": 3.41, "learning_rate": "1.9313e-05", "loss": 0.6343, "slid_loss": 0.6445, "step": 4410, "time": 71.56 }, { "epoch": 3.41, "learning_rate": "1.9305e-05", "loss": 0.652, "slid_loss": 0.6442, "step": 4411, "time": 70.93 }, { "epoch": 3.41, "learning_rate": "1.9297e-05", "loss": 0.6484, "slid_loss": 0.6442, "step": 4412, "time": 71.17 }, { "epoch": 3.41, "learning_rate": "1.9288e-05", "loss": 0.668, "slid_loss": 0.6445, "step": 4413, "time": 72.22 }, { "epoch": 3.42, "learning_rate": "1.9280e-05", "loss": 0.6435, "slid_loss": 0.6445, "step": 4414, "time": 70.86 }, { "epoch": 3.42, "learning_rate": "1.9272e-05", "loss": 0.6583, "slid_loss": 0.6443, "step": 4415, "time": 72.35 }, { "epoch": 3.42, "learning_rate": "1.9264e-05", "loss": 0.6595, "slid_loss": 0.6445, "step": 4416, "time": 72.26 }, { "epoch": 3.42, "learning_rate": "1.9255e-05", "loss": 0.655, "slid_loss": 0.6444, "step": 4417, "time": 71.87 }, { "epoch": 3.42, "learning_rate": "1.9247e-05", "loss": 0.6235, "slid_loss": 0.6442, "step": 4418, "time": 70.78 }, { "epoch": 3.42, "learning_rate": "1.9239e-05", "loss": 0.6507, "slid_loss": 0.6442, "step": 4419, "time": 72.69 }, { "epoch": 3.42, "learning_rate": "1.9230e-05", "loss": 0.6722, "slid_loss": 0.6446, "step": 4420, "time": 70.9 }, { "epoch": 3.42, "learning_rate": "1.9222e-05", "loss": 0.6232, "slid_loss": 0.6443, "step": 4421, "time": 72.24 }, { "epoch": 3.42, "learning_rate": "1.9214e-05", "loss": 0.6536, "slid_loss": 0.6444, "step": 4422, "time": 71.63 }, { "epoch": 3.42, "learning_rate": "1.9206e-05", "loss": 0.648, "slid_loss": 0.6442, "step": 4423, "time": 72.04 }, { "epoch": 3.42, "learning_rate": "1.9197e-05", "loss": 0.6627, "slid_loss": 0.6443, "step": 4424, "time": 71.25 }, { "epoch": 3.42, "learning_rate": "1.9189e-05", "loss": 0.6004, "slid_loss": 0.6438, "step": 4425, "time": 72.91 }, { "epoch": 3.42, "learning_rate": "1.9181e-05", "loss": 0.6393, "slid_loss": 0.6441, "step": 4426, "time": 71.26 }, { "epoch": 3.43, "learning_rate": "1.9173e-05", "loss": 0.6242, "slid_loss": 0.6436, "step": 4427, "time": 70.9 }, { "epoch": 3.43, "learning_rate": "1.9164e-05", "loss": 0.6028, "slid_loss": 0.643, "step": 4428, "time": 72.1 }, { "epoch": 3.43, "learning_rate": "1.9156e-05", "loss": 0.6307, "slid_loss": 0.6431, "step": 4429, "time": 71.65 }, { "epoch": 3.43, "learning_rate": "1.9148e-05", "loss": 0.6551, "slid_loss": 0.6432, "step": 4430, "time": 71.23 }, { "epoch": 3.43, "learning_rate": "1.9140e-05", "loss": 0.6745, "slid_loss": 0.6435, "step": 4431, "time": 71.55 }, { "epoch": 3.43, "learning_rate": "1.9131e-05", "loss": 0.638, "slid_loss": 0.6437, "step": 4432, "time": 71.24 }, { "epoch": 3.43, "learning_rate": "1.9123e-05", "loss": 0.6616, "slid_loss": 0.6438, "step": 4433, "time": 70.78 }, { "epoch": 3.43, "learning_rate": "1.9115e-05", "loss": 0.6244, "slid_loss": 0.6437, "step": 4434, "time": 72.9 }, { "epoch": 3.43, "learning_rate": "1.9107e-05", "loss": 0.6296, "slid_loss": 0.6435, "step": 4435, "time": 87.42 }, { "epoch": 3.43, "learning_rate": "1.9098e-05", "loss": 0.6664, "slid_loss": 0.6431, "step": 4436, "time": 70.76 }, { "epoch": 3.43, "learning_rate": "1.9090e-05", "loss": 0.6574, "slid_loss": 0.6432, "step": 4437, "time": 72.03 }, { "epoch": 3.43, "learning_rate": "1.9082e-05", "loss": 0.6645, "slid_loss": 0.6432, "step": 4438, "time": 72.03 }, { "epoch": 3.43, "learning_rate": "1.9074e-05", "loss": 0.6739, "slid_loss": 0.6438, "step": 4439, "time": 71.54 }, { "epoch": 3.44, "learning_rate": "1.9065e-05", "loss": 0.6294, "slid_loss": 0.6436, "step": 4440, "time": 100.16 }, { "epoch": 3.44, "learning_rate": "1.9057e-05", "loss": 0.6184, "slid_loss": 0.6433, "step": 4441, "time": 70.22 }, { "epoch": 3.44, "learning_rate": "1.9049e-05", "loss": 0.6404, "slid_loss": 0.6433, "step": 4442, "time": 94.49 }, { "epoch": 3.44, "learning_rate": "1.9041e-05", "loss": 0.6358, "slid_loss": 0.6431, "step": 4443, "time": 98.49 }, { "epoch": 3.44, "learning_rate": "1.9032e-05", "loss": 0.6478, "slid_loss": 0.6429, "step": 4444, "time": 110.23 }, { "epoch": 3.44, "learning_rate": "1.9024e-05", "loss": 0.5936, "slid_loss": 0.6423, "step": 4445, "time": 85.02 }, { "epoch": 3.44, "learning_rate": "1.9016e-05", "loss": 0.654, "slid_loss": 0.6428, "step": 4446, "time": 139.1 }, { "epoch": 3.44, "learning_rate": "1.9008e-05", "loss": 0.6755, "slid_loss": 0.6429, "step": 4447, "time": 111.26 }, { "epoch": 3.44, "learning_rate": "1.9000e-05", "loss": 0.6549, "slid_loss": 0.6431, "step": 4448, "time": 159.9 }, { "epoch": 3.44, "learning_rate": "1.8991e-05", "loss": 0.6703, "slid_loss": 0.6435, "step": 4449, "time": 134.26 }, { "epoch": 3.44, "learning_rate": "1.8983e-05", "loss": 0.6646, "slid_loss": 0.6439, "step": 4450, "time": 162.04 }, { "epoch": 3.44, "learning_rate": "1.8975e-05", "loss": 0.617, "slid_loss": 0.6437, "step": 4451, "time": 146.35 }, { "epoch": 3.44, "learning_rate": "1.8967e-05", "loss": 0.6614, "slid_loss": 0.644, "step": 4452, "time": 140.11 }, { "epoch": 3.45, "learning_rate": "1.8959e-05", "loss": 0.6418, "slid_loss": 0.6439, "step": 4453, "time": 94.8 }, { "epoch": 3.45, "learning_rate": "1.8950e-05", "loss": 0.6918, "slid_loss": 0.6445, "step": 4454, "time": 160.3 }, { "epoch": 3.45, "learning_rate": "1.8942e-05", "loss": 0.6052, "slid_loss": 0.6444, "step": 4455, "time": 137.48 }, { "epoch": 3.45, "learning_rate": "1.8934e-05", "loss": 0.634, "slid_loss": 0.6441, "step": 4456, "time": 132.34 }, { "epoch": 3.45, "learning_rate": "1.8926e-05", "loss": 0.6525, "slid_loss": 0.644, "step": 4457, "time": 108.02 }, { "epoch": 3.45, "learning_rate": "1.8918e-05", "loss": 0.6516, "slid_loss": 0.6438, "step": 4458, "time": 99.06 }, { "epoch": 3.45, "learning_rate": "1.8910e-05", "loss": 0.6146, "slid_loss": 0.6434, "step": 4459, "time": 94.76 }, { "epoch": 3.45, "learning_rate": "1.8901e-05", "loss": 0.6273, "slid_loss": 0.6434, "step": 4460, "time": 72.08 }, { "epoch": 3.45, "learning_rate": "1.8893e-05", "loss": 0.642, "slid_loss": 0.6435, "step": 4461, "time": 85.07 }, { "epoch": 3.45, "learning_rate": "1.8885e-05", "loss": 0.6609, "slid_loss": 0.6437, "step": 4462, "time": 82.71 }, { "epoch": 3.45, "learning_rate": "1.8877e-05", "loss": 0.7181, "slid_loss": 0.6444, "step": 4463, "time": 72.14 }, { "epoch": 3.45, "learning_rate": "1.8869e-05", "loss": 0.6357, "slid_loss": 0.6443, "step": 4464, "time": 83.12 }, { "epoch": 3.45, "learning_rate": "1.8861e-05", "loss": 0.6469, "slid_loss": 0.6445, "step": 4465, "time": 70.76 }, { "epoch": 3.46, "learning_rate": "1.8852e-05", "loss": 0.6723, "slid_loss": 0.6446, "step": 4466, "time": 72.37 }, { "epoch": 3.46, "learning_rate": "1.8844e-05", "loss": 0.6373, "slid_loss": 0.6442, "step": 4467, "time": 71.43 }, { "epoch": 3.46, "learning_rate": "1.8836e-05", "loss": 0.6579, "slid_loss": 0.6441, "step": 4468, "time": 71.54 }, { "epoch": 3.46, "learning_rate": "1.8828e-05", "loss": 0.6448, "slid_loss": 0.6437, "step": 4469, "time": 72.15 }, { "epoch": 3.46, "learning_rate": "1.8820e-05", "loss": 0.6074, "slid_loss": 0.6434, "step": 4470, "time": 74.11 }, { "epoch": 3.46, "learning_rate": "1.8812e-05", "loss": 0.6649, "slid_loss": 0.6436, "step": 4471, "time": 71.75 }, { "epoch": 3.46, "learning_rate": "1.8803e-05", "loss": 0.659, "slid_loss": 0.6438, "step": 4472, "time": 71.19 }, { "epoch": 3.46, "learning_rate": "1.8795e-05", "loss": 0.6489, "slid_loss": 0.6441, "step": 4473, "time": 71.67 }, { "epoch": 3.46, "learning_rate": "1.8787e-05", "loss": 0.656, "slid_loss": 0.6443, "step": 4474, "time": 70.52 }, { "epoch": 3.46, "learning_rate": "1.8779e-05", "loss": 0.6411, "slid_loss": 0.6445, "step": 4475, "time": 72.99 }, { "epoch": 3.46, "learning_rate": "1.8771e-05", "loss": 0.6606, "slid_loss": 0.6445, "step": 4476, "time": 73.46 }, { "epoch": 3.46, "learning_rate": "1.8763e-05", "loss": 0.6142, "slid_loss": 0.6447, "step": 4477, "time": 71.06 }, { "epoch": 3.46, "learning_rate": "1.8755e-05", "loss": 0.6894, "slid_loss": 0.6451, "step": 4478, "time": 72.19 }, { "epoch": 3.47, "learning_rate": "1.8747e-05", "loss": 0.6564, "slid_loss": 0.6455, "step": 4479, "time": 71.36 }, { "epoch": 3.47, "learning_rate": "1.8738e-05", "loss": 0.6546, "slid_loss": 0.6458, "step": 4480, "time": 71.45 }, { "epoch": 3.47, "learning_rate": "1.8730e-05", "loss": 0.6541, "slid_loss": 0.6461, "step": 4481, "time": 70.8 }, { "epoch": 3.47, "learning_rate": "1.8722e-05", "loss": 0.6793, "slid_loss": 0.6464, "step": 4482, "time": 72.79 }, { "epoch": 3.47, "learning_rate": "1.8714e-05", "loss": 0.6343, "slid_loss": 0.6463, "step": 4483, "time": 71.15 }, { "epoch": 3.47, "learning_rate": "1.8706e-05", "loss": 0.6246, "slid_loss": 0.6463, "step": 4484, "time": 72.42 }, { "epoch": 3.47, "learning_rate": "1.8698e-05", "loss": 0.6644, "slid_loss": 0.6466, "step": 4485, "time": 72.15 }, { "epoch": 3.47, "learning_rate": "1.8690e-05", "loss": 0.6536, "slid_loss": 0.6467, "step": 4486, "time": 70.79 }, { "epoch": 3.47, "learning_rate": "1.8682e-05", "loss": 0.6473, "slid_loss": 0.6464, "step": 4487, "time": 71.75 }, { "epoch": 3.47, "learning_rate": "1.8674e-05", "loss": 0.6593, "slid_loss": 0.6466, "step": 4488, "time": 71.29 }, { "epoch": 3.47, "learning_rate": "1.8665e-05", "loss": 0.6202, "slid_loss": 0.6464, "step": 4489, "time": 70.46 }, { "epoch": 3.47, "learning_rate": "1.8657e-05", "loss": 0.6561, "slid_loss": 0.6463, "step": 4490, "time": 70.48 }, { "epoch": 3.47, "learning_rate": "1.8649e-05", "loss": 0.589, "slid_loss": 0.6457, "step": 4491, "time": 72.54 }, { "epoch": 3.48, "learning_rate": "1.8641e-05", "loss": 0.6352, "slid_loss": 0.6456, "step": 4492, "time": 71.32 }, { "epoch": 3.48, "learning_rate": "1.8633e-05", "loss": 0.6484, "slid_loss": 0.6461, "step": 4493, "time": 72.57 }, { "epoch": 3.48, "learning_rate": "1.8625e-05", "loss": 0.6477, "slid_loss": 0.6462, "step": 4494, "time": 72.84 }, { "epoch": 3.48, "learning_rate": "1.8617e-05", "loss": 0.6409, "slid_loss": 0.6463, "step": 4495, "time": 72.9 }, { "epoch": 3.48, "learning_rate": "1.8609e-05", "loss": 0.6446, "slid_loss": 0.6465, "step": 4496, "time": 72.43 }, { "epoch": 3.48, "learning_rate": "1.8601e-05", "loss": 0.6193, "slid_loss": 0.6462, "step": 4497, "time": 72.46 }, { "epoch": 3.48, "learning_rate": "1.8593e-05", "loss": 0.623, "slid_loss": 0.6461, "step": 4498, "time": 72.24 }, { "epoch": 3.48, "learning_rate": "1.8585e-05", "loss": 0.6461, "slid_loss": 0.6459, "step": 4499, "time": 72.71 }, { "epoch": 3.48, "learning_rate": "1.8577e-05", "loss": 0.637, "slid_loss": 0.6459, "step": 4500, "time": 71.83 }, { "epoch": 3.48, "learning_rate": "1.8569e-05", "loss": 0.6389, "slid_loss": 0.6461, "step": 4501, "time": 72.73 }, { "epoch": 3.48, "learning_rate": "1.8560e-05", "loss": 0.6358, "slid_loss": 0.646, "step": 4502, "time": 72.44 }, { "epoch": 3.48, "learning_rate": "1.8552e-05", "loss": 0.6431, "slid_loss": 0.6459, "step": 4503, "time": 71.9 }, { "epoch": 3.48, "learning_rate": "1.8544e-05", "loss": 0.6396, "slid_loss": 0.6458, "step": 4504, "time": 71.25 }, { "epoch": 3.49, "learning_rate": "1.8536e-05", "loss": 0.6261, "slid_loss": 0.6457, "step": 4505, "time": 73.35 }, { "epoch": 3.49, "learning_rate": "1.8528e-05", "loss": 0.654, "slid_loss": 0.6456, "step": 4506, "time": 72.48 }, { "epoch": 3.49, "learning_rate": "1.8520e-05", "loss": 0.6197, "slid_loss": 0.645, "step": 4507, "time": 70.54 }, { "epoch": 3.49, "learning_rate": "1.8512e-05", "loss": 0.667, "slid_loss": 0.6452, "step": 4508, "time": 70.86 }, { "epoch": 3.49, "learning_rate": "1.8504e-05", "loss": 0.6126, "slid_loss": 0.6451, "step": 4509, "time": 71.18 }, { "epoch": 3.49, "learning_rate": "1.8496e-05", "loss": 0.6666, "slid_loss": 0.6454, "step": 4510, "time": 72.68 }, { "epoch": 3.49, "learning_rate": "1.8488e-05", "loss": 0.6183, "slid_loss": 0.6451, "step": 4511, "time": 71.48 }, { "epoch": 3.49, "learning_rate": "1.8480e-05", "loss": 0.6323, "slid_loss": 0.6449, "step": 4512, "time": 72.53 }, { "epoch": 3.49, "learning_rate": "1.8472e-05", "loss": 0.6348, "slid_loss": 0.6446, "step": 4513, "time": 71.51 }, { "epoch": 3.49, "learning_rate": "1.8464e-05", "loss": 0.6595, "slid_loss": 0.6447, "step": 4514, "time": 72.26 }, { "epoch": 3.49, "learning_rate": "1.8456e-05", "loss": 0.6489, "slid_loss": 0.6446, "step": 4515, "time": 71.22 }, { "epoch": 3.49, "learning_rate": "1.8448e-05", "loss": 0.6138, "slid_loss": 0.6442, "step": 4516, "time": 71.27 }, { "epoch": 3.49, "learning_rate": "1.8440e-05", "loss": 0.6485, "slid_loss": 0.6441, "step": 4517, "time": 70.9 }, { "epoch": 3.5, "learning_rate": "1.8432e-05", "loss": 0.6339, "slid_loss": 0.6442, "step": 4518, "time": 72.28 }, { "epoch": 3.5, "learning_rate": "1.8424e-05", "loss": 0.6295, "slid_loss": 0.644, "step": 4519, "time": 71.17 }, { "epoch": 3.5, "learning_rate": "1.8416e-05", "loss": 0.6399, "slid_loss": 0.6437, "step": 4520, "time": 72.45 }, { "epoch": 3.5, "learning_rate": "1.8408e-05", "loss": 0.6467, "slid_loss": 0.6439, "step": 4521, "time": 71.35 }, { "epoch": 3.5, "learning_rate": "1.8400e-05", "loss": 0.6307, "slid_loss": 0.6437, "step": 4522, "time": 70.08 }, { "epoch": 3.5, "learning_rate": "1.8392e-05", "loss": 0.6464, "slid_loss": 0.6437, "step": 4523, "time": 71.42 }, { "epoch": 3.5, "learning_rate": "1.8384e-05", "loss": 0.6339, "slid_loss": 0.6434, "step": 4524, "time": 72.45 }, { "epoch": 3.5, "learning_rate": "1.8376e-05", "loss": 0.6462, "slid_loss": 0.6438, "step": 4525, "time": 71.73 }, { "epoch": 3.5, "learning_rate": "1.8368e-05", "loss": 0.6548, "slid_loss": 0.644, "step": 4526, "time": 71.13 }, { "epoch": 3.5, "learning_rate": "1.8360e-05", "loss": 0.6413, "slid_loss": 0.6442, "step": 4527, "time": 71.8 }, { "epoch": 3.5, "learning_rate": "1.8352e-05", "loss": 0.6684, "slid_loss": 0.6448, "step": 4528, "time": 71.72 }, { "epoch": 3.5, "learning_rate": "1.8344e-05", "loss": 0.6438, "slid_loss": 0.6449, "step": 4529, "time": 71.43 }, { "epoch": 3.5, "learning_rate": "1.8336e-05", "loss": 0.6441, "slid_loss": 0.6448, "step": 4530, "time": 72.02 }, { "epoch": 3.51, "learning_rate": "1.8328e-05", "loss": 0.6703, "slid_loss": 0.6448, "step": 4531, "time": 70.8 }, { "epoch": 3.51, "learning_rate": "1.8320e-05", "loss": 0.6507, "slid_loss": 0.6449, "step": 4532, "time": 71.0 }, { "epoch": 3.51, "learning_rate": "1.8312e-05", "loss": 0.6498, "slid_loss": 0.6448, "step": 4533, "time": 70.89 }, { "epoch": 3.51, "learning_rate": "1.8304e-05", "loss": 0.6412, "slid_loss": 0.645, "step": 4534, "time": 71.37 }, { "epoch": 3.51, "learning_rate": "1.8296e-05", "loss": 0.6637, "slid_loss": 0.6453, "step": 4535, "time": 72.19 }, { "epoch": 3.51, "learning_rate": "1.8288e-05", "loss": 0.6492, "slid_loss": 0.6451, "step": 4536, "time": 71.05 }, { "epoch": 3.51, "learning_rate": "1.8280e-05", "loss": 0.6495, "slid_loss": 0.6451, "step": 4537, "time": 71.22 }, { "epoch": 3.51, "learning_rate": "1.8272e-05", "loss": 0.6538, "slid_loss": 0.645, "step": 4538, "time": 71.82 }, { "epoch": 3.51, "learning_rate": "1.8264e-05", "loss": 0.6349, "slid_loss": 0.6446, "step": 4539, "time": 72.48 }, { "epoch": 3.51, "learning_rate": "1.8256e-05", "loss": 0.6383, "slid_loss": 0.6447, "step": 4540, "time": 72.24 }, { "epoch": 3.51, "learning_rate": "1.8248e-05", "loss": 0.6814, "slid_loss": 0.6453, "step": 4541, "time": 70.89 }, { "epoch": 3.51, "learning_rate": "1.8240e-05", "loss": 0.6294, "slid_loss": 0.6452, "step": 4542, "time": 71.76 }, { "epoch": 3.51, "learning_rate": "1.8232e-05", "loss": 0.6633, "slid_loss": 0.6454, "step": 4543, "time": 73.36 }, { "epoch": 3.52, "learning_rate": "1.8224e-05", "loss": 0.5967, "slid_loss": 0.6449, "step": 4544, "time": 72.08 }, { "epoch": 3.52, "learning_rate": "1.8216e-05", "loss": 0.6174, "slid_loss": 0.6452, "step": 4545, "time": 71.93 }, { "epoch": 3.52, "learning_rate": "1.8209e-05", "loss": 0.6295, "slid_loss": 0.6449, "step": 4546, "time": 71.8 }, { "epoch": 3.52, "learning_rate": "1.8201e-05", "loss": 0.6613, "slid_loss": 0.6448, "step": 4547, "time": 71.11 }, { "epoch": 3.52, "learning_rate": "1.8193e-05", "loss": 0.6331, "slid_loss": 0.6446, "step": 4548, "time": 72.26 }, { "epoch": 3.52, "learning_rate": "1.8185e-05", "loss": 0.6503, "slid_loss": 0.6444, "step": 4549, "time": 71.67 }, { "epoch": 3.52, "learning_rate": "1.8177e-05", "loss": 0.6484, "slid_loss": 0.6442, "step": 4550, "time": 72.04 }, { "epoch": 3.52, "learning_rate": "1.8169e-05", "loss": 0.6607, "slid_loss": 0.6446, "step": 4551, "time": 72.6 }, { "epoch": 3.52, "learning_rate": "1.8161e-05", "loss": 0.5985, "slid_loss": 0.644, "step": 4552, "time": 70.55 }, { "epoch": 3.52, "learning_rate": "1.8153e-05", "loss": 0.6229, "slid_loss": 0.6438, "step": 4553, "time": 71.83 }, { "epoch": 3.52, "learning_rate": "1.8145e-05", "loss": 0.6498, "slid_loss": 0.6434, "step": 4554, "time": 71.62 }, { "epoch": 3.52, "learning_rate": "1.8137e-05", "loss": 0.6728, "slid_loss": 0.6441, "step": 4555, "time": 72.15 }, { "epoch": 3.52, "learning_rate": "1.8129e-05", "loss": 0.6745, "slid_loss": 0.6445, "step": 4556, "time": 71.37 }, { "epoch": 3.53, "learning_rate": "1.8121e-05", "loss": 0.636, "slid_loss": 0.6443, "step": 4557, "time": 72.7 }, { "epoch": 3.53, "learning_rate": "1.8114e-05", "loss": 0.6485, "slid_loss": 0.6443, "step": 4558, "time": 72.02 }, { "epoch": 3.53, "learning_rate": "1.8106e-05", "loss": 0.6503, "slid_loss": 0.6447, "step": 4559, "time": 72.47 }, { "epoch": 3.53, "learning_rate": "1.8098e-05", "loss": 0.6396, "slid_loss": 0.6448, "step": 4560, "time": 70.48 }, { "epoch": 3.53, "learning_rate": "1.8090e-05", "loss": 0.6391, "slid_loss": 0.6447, "step": 4561, "time": 72.41 }, { "epoch": 3.53, "learning_rate": "1.8082e-05", "loss": 0.6472, "slid_loss": 0.6446, "step": 4562, "time": 71.28 }, { "epoch": 3.53, "learning_rate": "1.8074e-05", "loss": 0.6639, "slid_loss": 0.6441, "step": 4563, "time": 71.25 }, { "epoch": 3.53, "learning_rate": "1.8066e-05", "loss": 0.6327, "slid_loss": 0.644, "step": 4564, "time": 71.58 }, { "epoch": 3.53, "learning_rate": "1.8058e-05", "loss": 0.6538, "slid_loss": 0.6441, "step": 4565, "time": 71.69 }, { "epoch": 3.53, "learning_rate": "1.8050e-05", "loss": 0.5957, "slid_loss": 0.6433, "step": 4566, "time": 72.51 }, { "epoch": 3.53, "learning_rate": "1.8043e-05", "loss": 0.636, "slid_loss": 0.6433, "step": 4567, "time": 71.23 }, { "epoch": 3.53, "learning_rate": "1.8035e-05", "loss": 0.6822, "slid_loss": 0.6436, "step": 4568, "time": 71.65 }, { "epoch": 3.54, "learning_rate": "1.8027e-05", "loss": 0.6609, "slid_loss": 0.6437, "step": 4569, "time": 71.07 }, { "epoch": 3.54, "learning_rate": "1.8019e-05", "loss": 0.6014, "slid_loss": 0.6437, "step": 4570, "time": 73.01 }, { "epoch": 3.54, "learning_rate": "1.8011e-05", "loss": 0.6262, "slid_loss": 0.6433, "step": 4571, "time": 72.74 }, { "epoch": 3.54, "learning_rate": "1.8003e-05", "loss": 0.6342, "slid_loss": 0.643, "step": 4572, "time": 71.67 }, { "epoch": 3.54, "learning_rate": "1.7995e-05", "loss": 0.625, "slid_loss": 0.6428, "step": 4573, "time": 72.63 }, { "epoch": 3.54, "learning_rate": "1.7987e-05", "loss": 0.6015, "slid_loss": 0.6422, "step": 4574, "time": 72.85 }, { "epoch": 3.54, "learning_rate": "1.7980e-05", "loss": 0.6463, "slid_loss": 0.6423, "step": 4575, "time": 71.19 }, { "epoch": 3.54, "learning_rate": "1.7972e-05", "loss": 0.6736, "slid_loss": 0.6424, "step": 4576, "time": 70.49 }, { "epoch": 3.54, "learning_rate": "1.7964e-05", "loss": 0.6403, "slid_loss": 0.6427, "step": 4577, "time": 70.56 }, { "epoch": 3.54, "learning_rate": "1.7956e-05", "loss": 0.6054, "slid_loss": 0.6419, "step": 4578, "time": 72.96 }, { "epoch": 3.54, "learning_rate": "1.7948e-05", "loss": 0.6275, "slid_loss": 0.6416, "step": 4579, "time": 72.37 }, { "epoch": 3.54, "learning_rate": "1.7940e-05", "loss": 0.633, "slid_loss": 0.6413, "step": 4580, "time": 72.26 }, { "epoch": 3.54, "learning_rate": "1.7933e-05", "loss": 0.6145, "slid_loss": 0.641, "step": 4581, "time": 72.05 }, { "epoch": 3.55, "learning_rate": "1.7925e-05", "loss": 0.6306, "slid_loss": 0.6405, "step": 4582, "time": 71.79 }, { "epoch": 3.55, "learning_rate": "1.7917e-05", "loss": 0.6396, "slid_loss": 0.6405, "step": 4583, "time": 70.83 }, { "epoch": 3.55, "learning_rate": "1.7909e-05", "loss": 0.6234, "slid_loss": 0.6405, "step": 4584, "time": 72.62 }, { "epoch": 3.55, "learning_rate": "1.7901e-05", "loss": 0.6673, "slid_loss": 0.6405, "step": 4585, "time": 71.5 }, { "epoch": 3.55, "learning_rate": "1.7893e-05", "loss": 0.6388, "slid_loss": 0.6404, "step": 4586, "time": 73.03 }, { "epoch": 3.55, "learning_rate": "1.7886e-05", "loss": 0.5965, "slid_loss": 0.6399, "step": 4587, "time": 71.8 }, { "epoch": 3.55, "learning_rate": "1.7878e-05", "loss": 0.6537, "slid_loss": 0.6398, "step": 4588, "time": 70.62 }, { "epoch": 3.55, "learning_rate": "1.7870e-05", "loss": 0.6491, "slid_loss": 0.6401, "step": 4589, "time": 74.03 }, { "epoch": 3.55, "learning_rate": "1.7862e-05", "loss": 0.6725, "slid_loss": 0.6403, "step": 4590, "time": 70.09 }, { "epoch": 3.55, "learning_rate": "1.7854e-05", "loss": 0.6311, "slid_loss": 0.6407, "step": 4591, "time": 71.77 }, { "epoch": 3.55, "learning_rate": "1.7847e-05", "loss": 0.6513, "slid_loss": 0.6409, "step": 4592, "time": 72.14 }, { "epoch": 3.55, "learning_rate": "1.7839e-05", "loss": 0.6467, "slid_loss": 0.6408, "step": 4593, "time": 70.81 }, { "epoch": 3.55, "learning_rate": "1.7831e-05", "loss": 0.6166, "slid_loss": 0.6405, "step": 4594, "time": 85.11 }, { "epoch": 3.56, "learning_rate": "1.7823e-05", "loss": 0.6657, "slid_loss": 0.6408, "step": 4595, "time": 73.09 }, { "epoch": 3.56, "learning_rate": "1.7815e-05", "loss": 0.6171, "slid_loss": 0.6405, "step": 4596, "time": 71.98 }, { "epoch": 3.56, "learning_rate": "1.7808e-05", "loss": 0.6462, "slid_loss": 0.6408, "step": 4597, "time": 70.76 }, { "epoch": 3.56, "learning_rate": "1.7800e-05", "loss": 0.6611, "slid_loss": 0.6412, "step": 4598, "time": 70.65 }, { "epoch": 3.56, "learning_rate": "1.7792e-05", "loss": 0.6196, "slid_loss": 0.6409, "step": 4599, "time": 97.7 }, { "epoch": 3.56, "learning_rate": "1.7784e-05", "loss": 0.6179, "slid_loss": 0.6407, "step": 4600, "time": 85.03 }, { "epoch": 3.56, "learning_rate": "1.7776e-05", "loss": 0.6385, "slid_loss": 0.6407, "step": 4601, "time": 849.52 }, { "epoch": 3.56, "learning_rate": "1.7769e-05", "loss": 0.6487, "slid_loss": 0.6408, "step": 4602, "time": 126.16 }, { "epoch": 3.56, "learning_rate": "1.7761e-05", "loss": 0.6327, "slid_loss": 0.6407, "step": 4603, "time": 87.23 }, { "epoch": 3.56, "learning_rate": "1.7753e-05", "loss": 0.6122, "slid_loss": 0.6404, "step": 4604, "time": 106.86 }, { "epoch": 3.56, "learning_rate": "1.7745e-05", "loss": 0.677, "slid_loss": 0.6409, "step": 4605, "time": 110.58 }, { "epoch": 3.56, "learning_rate": "1.7738e-05", "loss": 0.6315, "slid_loss": 0.6407, "step": 4606, "time": 150.04 }, { "epoch": 3.56, "learning_rate": "1.7730e-05", "loss": 0.6404, "slid_loss": 0.6409, "step": 4607, "time": 122.32 }, { "epoch": 3.57, "learning_rate": "1.7722e-05", "loss": 0.5984, "slid_loss": 0.6402, "step": 4608, "time": 172.15 }, { "epoch": 3.57, "learning_rate": "1.7714e-05", "loss": 0.6065, "slid_loss": 0.6402, "step": 4609, "time": 166.0 }, { "epoch": 3.57, "learning_rate": "1.7707e-05", "loss": 0.6208, "slid_loss": 0.6397, "step": 4610, "time": 136.2 }, { "epoch": 3.57, "learning_rate": "1.7699e-05", "loss": 0.6032, "slid_loss": 0.6396, "step": 4611, "time": 135.83 }, { "epoch": 3.57, "learning_rate": "1.7691e-05", "loss": 0.6619, "slid_loss": 0.6399, "step": 4612, "time": 132.37 }, { "epoch": 3.57, "learning_rate": "1.7683e-05", "loss": 0.6233, "slid_loss": 0.6398, "step": 4613, "time": 95.99 }, { "epoch": 3.57, "learning_rate": "1.7676e-05", "loss": 0.6088, "slid_loss": 0.6393, "step": 4614, "time": 147.4 }, { "epoch": 3.57, "learning_rate": "1.7668e-05", "loss": 0.6572, "slid_loss": 0.6393, "step": 4615, "time": 113.11 }, { "epoch": 3.57, "learning_rate": "1.7660e-05", "loss": 0.6131, "slid_loss": 0.6393, "step": 4616, "time": 122.24 }, { "epoch": 3.57, "learning_rate": "1.7652e-05", "loss": 0.6673, "slid_loss": 0.6395, "step": 4617, "time": 121.55 }, { "epoch": 3.57, "learning_rate": "1.7645e-05", "loss": 0.6457, "slid_loss": 0.6396, "step": 4618, "time": 96.02 }, { "epoch": 3.57, "learning_rate": "1.7637e-05", "loss": 0.6545, "slid_loss": 0.6399, "step": 4619, "time": 83.75 }, { "epoch": 3.57, "learning_rate": "1.7629e-05", "loss": 0.6374, "slid_loss": 0.6399, "step": 4620, "time": 84.44 }, { "epoch": 3.58, "learning_rate": "1.7621e-05", "loss": 0.6468, "slid_loss": 0.6399, "step": 4621, "time": 81.75 }, { "epoch": 3.58, "learning_rate": "1.7614e-05", "loss": 0.6209, "slid_loss": 0.6398, "step": 4622, "time": 82.94 }, { "epoch": 3.58, "learning_rate": "1.7606e-05", "loss": 0.6615, "slid_loss": 0.6399, "step": 4623, "time": 71.81 }, { "epoch": 3.58, "learning_rate": "1.7598e-05", "loss": 0.6229, "slid_loss": 0.6398, "step": 4624, "time": 72.25 }, { "epoch": 3.58, "learning_rate": "1.7591e-05", "loss": 0.6361, "slid_loss": 0.6397, "step": 4625, "time": 72.86 }, { "epoch": 3.58, "learning_rate": "1.7583e-05", "loss": 0.6103, "slid_loss": 0.6393, "step": 4626, "time": 72.46 }, { "epoch": 3.58, "learning_rate": "1.7575e-05", "loss": 0.6016, "slid_loss": 0.6389, "step": 4627, "time": 72.18 }, { "epoch": 3.58, "learning_rate": "1.7568e-05", "loss": 0.607, "slid_loss": 0.6382, "step": 4628, "time": 70.2 }, { "epoch": 3.58, "learning_rate": "1.7560e-05", "loss": 0.6621, "slid_loss": 0.6384, "step": 4629, "time": 70.66 }, { "epoch": 3.58, "learning_rate": "1.7552e-05", "loss": 0.6363, "slid_loss": 0.6384, "step": 4630, "time": 72.62 }, { "epoch": 3.58, "learning_rate": "1.7544e-05", "loss": 0.6151, "slid_loss": 0.6378, "step": 4631, "time": 71.06 }, { "epoch": 3.58, "learning_rate": "1.7537e-05", "loss": 0.6028, "slid_loss": 0.6373, "step": 4632, "time": 70.44 }, { "epoch": 3.58, "learning_rate": "1.7529e-05", "loss": 0.654, "slid_loss": 0.6374, "step": 4633, "time": 72.02 }, { "epoch": 3.59, "learning_rate": "1.7521e-05", "loss": 0.6902, "slid_loss": 0.6379, "step": 4634, "time": 72.56 }, { "epoch": 3.59, "learning_rate": "1.7514e-05", "loss": 0.6282, "slid_loss": 0.6375, "step": 4635, "time": 73.0 }, { "epoch": 3.59, "learning_rate": "1.7506e-05", "loss": 0.6773, "slid_loss": 0.6378, "step": 4636, "time": 71.75 }, { "epoch": 3.59, "learning_rate": "1.7498e-05", "loss": 0.6372, "slid_loss": 0.6377, "step": 4637, "time": 70.94 }, { "epoch": 3.59, "learning_rate": "1.7491e-05", "loss": 0.6399, "slid_loss": 0.6375, "step": 4638, "time": 71.15 }, { "epoch": 3.59, "learning_rate": "1.7483e-05", "loss": 0.6109, "slid_loss": 0.6373, "step": 4639, "time": 73.06 }, { "epoch": 3.59, "learning_rate": "1.7475e-05", "loss": 0.6318, "slid_loss": 0.6372, "step": 4640, "time": 72.22 }, { "epoch": 3.59, "learning_rate": "1.7468e-05", "loss": 0.632, "slid_loss": 0.6367, "step": 4641, "time": 71.48 }, { "epoch": 3.59, "learning_rate": "1.7460e-05", "loss": 0.6617, "slid_loss": 0.637, "step": 4642, "time": 70.22 }, { "epoch": 3.59, "learning_rate": "1.7452e-05", "loss": 0.6422, "slid_loss": 0.6368, "step": 4643, "time": 71.82 }, { "epoch": 3.59, "learning_rate": "1.7445e-05", "loss": 0.658, "slid_loss": 0.6374, "step": 4644, "time": 70.8 }, { "epoch": 3.59, "learning_rate": "1.7437e-05", "loss": 0.6742, "slid_loss": 0.638, "step": 4645, "time": 74.57 }, { "epoch": 3.59, "learning_rate": "1.7430e-05", "loss": 0.6381, "slid_loss": 0.6381, "step": 4646, "time": 72.03 }, { "epoch": 3.6, "learning_rate": "1.7422e-05", "loss": 0.6439, "slid_loss": 0.6379, "step": 4647, "time": 71.49 }, { "epoch": 3.6, "learning_rate": "1.7414e-05", "loss": 0.646, "slid_loss": 0.6381, "step": 4648, "time": 72.75 }, { "epoch": 3.6, "learning_rate": "1.7407e-05", "loss": 0.6176, "slid_loss": 0.6377, "step": 4649, "time": 71.97 }, { "epoch": 3.6, "learning_rate": "1.7399e-05", "loss": 0.6327, "slid_loss": 0.6376, "step": 4650, "time": 72.61 }, { "epoch": 3.6, "learning_rate": "1.7391e-05", "loss": 0.6113, "slid_loss": 0.6371, "step": 4651, "time": 71.1 }, { "epoch": 3.6, "learning_rate": "1.7384e-05", "loss": 0.6436, "slid_loss": 0.6375, "step": 4652, "time": 71.61 }, { "epoch": 3.6, "learning_rate": "1.7376e-05", "loss": 0.6306, "slid_loss": 0.6376, "step": 4653, "time": 73.71 }, { "epoch": 3.6, "learning_rate": "1.7368e-05", "loss": 0.6685, "slid_loss": 0.6378, "step": 4654, "time": 71.6 }, { "epoch": 3.6, "learning_rate": "1.7361e-05", "loss": 0.6331, "slid_loss": 0.6374, "step": 4655, "time": 72.17 }, { "epoch": 3.6, "learning_rate": "1.7353e-05", "loss": 0.6099, "slid_loss": 0.6367, "step": 4656, "time": 72.65 }, { "epoch": 3.6, "learning_rate": "1.7346e-05", "loss": 0.6485, "slid_loss": 0.6369, "step": 4657, "time": 71.85 }, { "epoch": 3.6, "learning_rate": "1.7338e-05", "loss": 0.6192, "slid_loss": 0.6366, "step": 4658, "time": 70.99 }, { "epoch": 3.6, "learning_rate": "1.7330e-05", "loss": 0.7054, "slid_loss": 0.6371, "step": 4659, "time": 72.17 }, { "epoch": 3.61, "learning_rate": "1.7323e-05", "loss": 0.6418, "slid_loss": 0.6372, "step": 4660, "time": 71.45 }, { "epoch": 3.61, "learning_rate": "1.7315e-05", "loss": 0.6581, "slid_loss": 0.6373, "step": 4661, "time": 71.6 }, { "epoch": 3.61, "learning_rate": "1.7308e-05", "loss": 0.6524, "slid_loss": 0.6374, "step": 4662, "time": 70.79 }, { "epoch": 3.61, "learning_rate": "1.7300e-05", "loss": 0.6266, "slid_loss": 0.637, "step": 4663, "time": 70.83 }, { "epoch": 3.61, "learning_rate": "1.7292e-05", "loss": 0.6401, "slid_loss": 0.6371, "step": 4664, "time": 71.51 }, { "epoch": 3.61, "learning_rate": "1.7285e-05", "loss": 0.6475, "slid_loss": 0.637, "step": 4665, "time": 72.45 }, { "epoch": 3.61, "learning_rate": "1.7277e-05", "loss": 0.6487, "slid_loss": 0.6376, "step": 4666, "time": 71.25 }, { "epoch": 3.61, "learning_rate": "1.7270e-05", "loss": 0.6505, "slid_loss": 0.6377, "step": 4667, "time": 73.11 }, { "epoch": 3.61, "learning_rate": "1.7262e-05", "loss": 0.6562, "slid_loss": 0.6374, "step": 4668, "time": 72.66 }, { "epoch": 3.61, "learning_rate": "1.7255e-05", "loss": 0.6393, "slid_loss": 0.6372, "step": 4669, "time": 71.6 }, { "epoch": 3.61, "learning_rate": "1.7247e-05", "loss": 0.6503, "slid_loss": 0.6377, "step": 4670, "time": 70.96 }, { "epoch": 3.61, "learning_rate": "1.7239e-05", "loss": 0.6255, "slid_loss": 0.6377, "step": 4671, "time": 73.79 }, { "epoch": 3.61, "learning_rate": "1.7232e-05", "loss": 0.6586, "slid_loss": 0.638, "step": 4672, "time": 70.3 }, { "epoch": 3.62, "learning_rate": "1.7224e-05", "loss": 0.6401, "slid_loss": 0.6381, "step": 4673, "time": 72.2 }, { "epoch": 3.62, "learning_rate": "1.7217e-05", "loss": 0.6386, "slid_loss": 0.6385, "step": 4674, "time": 71.46 }, { "epoch": 3.62, "learning_rate": "1.7209e-05", "loss": 0.6235, "slid_loss": 0.6383, "step": 4675, "time": 72.47 }, { "epoch": 3.62, "learning_rate": "1.7202e-05", "loss": 0.6233, "slid_loss": 0.6377, "step": 4676, "time": 72.05 }, { "epoch": 3.62, "learning_rate": "1.7194e-05", "loss": 0.623, "slid_loss": 0.6376, "step": 4677, "time": 72.42 }, { "epoch": 3.62, "learning_rate": "1.7187e-05", "loss": 0.6446, "slid_loss": 0.638, "step": 4678, "time": 71.54 }, { "epoch": 3.62, "learning_rate": "1.7179e-05", "loss": 0.6031, "slid_loss": 0.6377, "step": 4679, "time": 72.33 }, { "epoch": 3.62, "learning_rate": "1.7171e-05", "loss": 0.6418, "slid_loss": 0.6378, "step": 4680, "time": 71.56 }, { "epoch": 3.62, "learning_rate": "1.7164e-05", "loss": 0.6401, "slid_loss": 0.6381, "step": 4681, "time": 71.97 }, { "epoch": 3.62, "learning_rate": "1.7156e-05", "loss": 0.6349, "slid_loss": 0.6381, "step": 4682, "time": 72.3 }, { "epoch": 3.62, "learning_rate": "1.7149e-05", "loss": 0.6213, "slid_loss": 0.6379, "step": 4683, "time": 71.92 }, { "epoch": 3.62, "learning_rate": "1.7141e-05", "loss": 0.6267, "slid_loss": 0.638, "step": 4684, "time": 71.5 }, { "epoch": 3.62, "learning_rate": "1.7134e-05", "loss": 0.6412, "slid_loss": 0.6377, "step": 4685, "time": 71.92 }, { "epoch": 3.63, "learning_rate": "1.7126e-05", "loss": 0.6531, "slid_loss": 0.6378, "step": 4686, "time": 72.95 }, { "epoch": 3.63, "learning_rate": "1.7119e-05", "loss": 0.6226, "slid_loss": 0.6381, "step": 4687, "time": 70.47 }, { "epoch": 3.63, "learning_rate": "1.7111e-05", "loss": 0.6363, "slid_loss": 0.6379, "step": 4688, "time": 72.98 }, { "epoch": 3.63, "learning_rate": "1.7104e-05", "loss": 0.6361, "slid_loss": 0.6378, "step": 4689, "time": 70.81 }, { "epoch": 3.63, "learning_rate": "1.7096e-05", "loss": 0.6524, "slid_loss": 0.6376, "step": 4690, "time": 72.17 }, { "epoch": 3.63, "learning_rate": "1.7089e-05", "loss": 0.6372, "slid_loss": 0.6377, "step": 4691, "time": 71.51 }, { "epoch": 3.63, "learning_rate": "1.7081e-05", "loss": 0.6579, "slid_loss": 0.6377, "step": 4692, "time": 72.24 }, { "epoch": 3.63, "learning_rate": "1.7074e-05", "loss": 0.638, "slid_loss": 0.6376, "step": 4693, "time": 71.73 }, { "epoch": 3.63, "learning_rate": "1.7066e-05", "loss": 0.6262, "slid_loss": 0.6377, "step": 4694, "time": 71.82 }, { "epoch": 3.63, "learning_rate": "1.7059e-05", "loss": 0.5994, "slid_loss": 0.6371, "step": 4695, "time": 71.28 }, { "epoch": 3.63, "learning_rate": "1.7051e-05", "loss": 0.6263, "slid_loss": 0.6372, "step": 4696, "time": 71.86 }, { "epoch": 3.63, "learning_rate": "1.7044e-05", "loss": 0.6227, "slid_loss": 0.6369, "step": 4697, "time": 72.64 }, { "epoch": 3.63, "learning_rate": "1.7036e-05", "loss": 0.6385, "slid_loss": 0.6367, "step": 4698, "time": 70.7 }, { "epoch": 3.64, "learning_rate": "1.7029e-05", "loss": 0.6666, "slid_loss": 0.6372, "step": 4699, "time": 71.33 }, { "epoch": 3.64, "learning_rate": "1.7021e-05", "loss": 0.6556, "slid_loss": 0.6376, "step": 4700, "time": 71.87 }, { "epoch": 3.64, "learning_rate": "1.7014e-05", "loss": 0.6536, "slid_loss": 0.6377, "step": 4701, "time": 71.99 }, { "epoch": 3.64, "learning_rate": "1.7006e-05", "loss": 0.6147, "slid_loss": 0.6374, "step": 4702, "time": 71.59 }, { "epoch": 3.64, "learning_rate": "1.6999e-05", "loss": 0.6246, "slid_loss": 0.6373, "step": 4703, "time": 70.73 }, { "epoch": 3.64, "learning_rate": "1.6991e-05", "loss": 0.6004, "slid_loss": 0.6372, "step": 4704, "time": 71.37 }, { "epoch": 3.64, "learning_rate": "1.6984e-05", "loss": 0.6439, "slid_loss": 0.6368, "step": 4705, "time": 70.59 }, { "epoch": 3.64, "learning_rate": "1.6977e-05", "loss": 0.607, "slid_loss": 0.6366, "step": 4706, "time": 71.87 }, { "epoch": 3.64, "learning_rate": "1.6969e-05", "loss": 0.642, "slid_loss": 0.6366, "step": 4707, "time": 71.64 }, { "epoch": 3.64, "learning_rate": "1.6962e-05", "loss": 0.6287, "slid_loss": 0.6369, "step": 4708, "time": 72.24 }, { "epoch": 3.64, "learning_rate": "1.6954e-05", "loss": 0.6651, "slid_loss": 0.6375, "step": 4709, "time": 71.12 }, { "epoch": 3.64, "learning_rate": "1.6947e-05", "loss": 0.6411, "slid_loss": 0.6377, "step": 4710, "time": 73.07 }, { "epoch": 3.64, "learning_rate": "1.6939e-05", "loss": 0.6629, "slid_loss": 0.6383, "step": 4711, "time": 73.02 }, { "epoch": 3.65, "learning_rate": "1.6932e-05", "loss": 0.6346, "slid_loss": 0.638, "step": 4712, "time": 72.53 }, { "epoch": 3.65, "learning_rate": "1.6924e-05", "loss": 0.6118, "slid_loss": 0.6379, "step": 4713, "time": 71.5 }, { "epoch": 3.65, "learning_rate": "1.6917e-05", "loss": 0.6291, "slid_loss": 0.6381, "step": 4714, "time": 70.34 }, { "epoch": 3.65, "learning_rate": "1.6910e-05", "loss": 0.6326, "slid_loss": 0.6379, "step": 4715, "time": 71.69 }, { "epoch": 3.65, "learning_rate": "1.6902e-05", "loss": 0.6005, "slid_loss": 0.6377, "step": 4716, "time": 72.23 }, { "epoch": 3.65, "learning_rate": "1.6895e-05", "loss": 0.6384, "slid_loss": 0.6374, "step": 4717, "time": 70.88 }, { "epoch": 3.65, "learning_rate": "1.6887e-05", "loss": 0.6443, "slid_loss": 0.6374, "step": 4718, "time": 72.15 }, { "epoch": 3.65, "learning_rate": "1.6880e-05", "loss": 0.6035, "slid_loss": 0.6369, "step": 4719, "time": 71.37 }, { "epoch": 3.65, "learning_rate": "1.6872e-05", "loss": 0.6244, "slid_loss": 0.6368, "step": 4720, "time": 70.76 }, { "epoch": 3.65, "learning_rate": "1.6865e-05", "loss": 0.612, "slid_loss": 0.6364, "step": 4721, "time": 72.17 }, { "epoch": 3.65, "learning_rate": "1.6858e-05", "loss": 0.6253, "slid_loss": 0.6365, "step": 4722, "time": 70.0 }, { "epoch": 3.65, "learning_rate": "1.6850e-05", "loss": 0.6426, "slid_loss": 0.6363, "step": 4723, "time": 71.91 }, { "epoch": 3.65, "learning_rate": "1.6843e-05", "loss": 0.6401, "slid_loss": 0.6365, "step": 4724, "time": 71.79 }, { "epoch": 3.66, "learning_rate": "1.6835e-05", "loss": 0.648, "slid_loss": 0.6366, "step": 4725, "time": 73.65 }, { "epoch": 3.66, "learning_rate": "1.6828e-05", "loss": 0.6297, "slid_loss": 0.6368, "step": 4726, "time": 72.72 }, { "epoch": 3.66, "learning_rate": "1.6821e-05", "loss": 0.6355, "slid_loss": 0.6371, "step": 4727, "time": 72.7 }, { "epoch": 3.66, "learning_rate": "1.6813e-05", "loss": 0.6141, "slid_loss": 0.6372, "step": 4728, "time": 71.88 }, { "epoch": 3.66, "learning_rate": "1.6806e-05", "loss": 0.6127, "slid_loss": 0.6367, "step": 4729, "time": 71.16 }, { "epoch": 3.66, "learning_rate": "1.6798e-05", "loss": 0.6286, "slid_loss": 0.6366, "step": 4730, "time": 73.26 }, { "epoch": 3.66, "learning_rate": "1.6791e-05", "loss": 0.6425, "slid_loss": 0.6369, "step": 4731, "time": 72.84 }, { "epoch": 3.66, "learning_rate": "1.6784e-05", "loss": 0.6454, "slid_loss": 0.6373, "step": 4732, "time": 71.02 }, { "epoch": 3.66, "learning_rate": "1.6776e-05", "loss": 0.6578, "slid_loss": 0.6374, "step": 4733, "time": 71.24 }, { "epoch": 3.66, "learning_rate": "1.6769e-05", "loss": 0.629, "slid_loss": 0.6367, "step": 4734, "time": 71.03 }, { "epoch": 3.66, "learning_rate": "1.6762e-05", "loss": 0.6238, "slid_loss": 0.6367, "step": 4735, "time": 71.1 }, { "epoch": 3.66, "learning_rate": "1.6754e-05", "loss": 0.6568, "slid_loss": 0.6365, "step": 4736, "time": 71.97 }, { "epoch": 3.66, "learning_rate": "1.6747e-05", "loss": 0.6179, "slid_loss": 0.6363, "step": 4737, "time": 70.21 }, { "epoch": 3.67, "learning_rate": "1.6740e-05", "loss": 0.6581, "slid_loss": 0.6365, "step": 4738, "time": 71.79 }, { "epoch": 3.67, "learning_rate": "1.6732e-05", "loss": 0.6483, "slid_loss": 0.6369, "step": 4739, "time": 71.64 }, { "epoch": 3.67, "learning_rate": "1.6725e-05", "loss": 0.5876, "slid_loss": 0.6364, "step": 4740, "time": 71.17 }, { "epoch": 3.67, "learning_rate": "1.6717e-05", "loss": 0.6924, "slid_loss": 0.637, "step": 4741, "time": 72.52 }, { "epoch": 3.67, "learning_rate": "1.6710e-05", "loss": 0.675, "slid_loss": 0.6372, "step": 4742, "time": 71.44 }, { "epoch": 3.67, "learning_rate": "1.6703e-05", "loss": 0.6268, "slid_loss": 0.637, "step": 4743, "time": 71.22 }, { "epoch": 3.67, "learning_rate": "1.6695e-05", "loss": 0.6084, "slid_loss": 0.6365, "step": 4744, "time": 70.28 }, { "epoch": 3.67, "learning_rate": "1.6688e-05", "loss": 0.6375, "slid_loss": 0.6361, "step": 4745, "time": 71.99 }, { "epoch": 3.67, "learning_rate": "1.6681e-05", "loss": 0.6605, "slid_loss": 0.6364, "step": 4746, "time": 73.39 }, { "epoch": 3.67, "learning_rate": "1.6673e-05", "loss": 0.6975, "slid_loss": 0.6369, "step": 4747, "time": 70.85 }, { "epoch": 3.67, "learning_rate": "1.6666e-05", "loss": 0.6405, "slid_loss": 0.6368, "step": 4748, "time": 71.41 }, { "epoch": 3.67, "learning_rate": "1.6659e-05", "loss": 0.6004, "slid_loss": 0.6367, "step": 4749, "time": 72.66 }, { "epoch": 3.68, "learning_rate": "1.6651e-05", "loss": 0.5942, "slid_loss": 0.6363, "step": 4750, "time": 70.83 }, { "epoch": 3.68, "learning_rate": "1.6644e-05", "loss": 0.6307, "slid_loss": 0.6365, "step": 4751, "time": 70.5 }, { "epoch": 3.68, "learning_rate": "1.6637e-05", "loss": 0.6556, "slid_loss": 0.6366, "step": 4752, "time": 71.03 }, { "epoch": 3.68, "learning_rate": "1.6630e-05", "loss": 0.601, "slid_loss": 0.6363, "step": 4753, "time": 85.03 }, { "epoch": 3.68, "learning_rate": "1.6622e-05", "loss": 0.6465, "slid_loss": 0.6361, "step": 4754, "time": 73.21 }, { "epoch": 3.68, "learning_rate": "1.6615e-05", "loss": 0.6479, "slid_loss": 0.6362, "step": 4755, "time": 71.7 }, { "epoch": 3.68, "learning_rate": "1.6608e-05", "loss": 0.6284, "slid_loss": 0.6364, "step": 4756, "time": 71.56 }, { "epoch": 3.68, "learning_rate": "1.6600e-05", "loss": 0.6607, "slid_loss": 0.6365, "step": 4757, "time": 72.82 }, { "epoch": 3.68, "learning_rate": "1.6593e-05", "loss": 0.6463, "slid_loss": 0.6368, "step": 4758, "time": 98.57 }, { "epoch": 3.68, "learning_rate": "1.6586e-05", "loss": 0.6263, "slid_loss": 0.636, "step": 4759, "time": 71.1 }, { "epoch": 3.68, "learning_rate": "1.6578e-05", "loss": 0.6315, "slid_loss": 0.6359, "step": 4760, "time": 106.74 }, { "epoch": 3.68, "learning_rate": "1.6571e-05", "loss": 0.6409, "slid_loss": 0.6357, "step": 4761, "time": 82.21 }, { "epoch": 3.68, "learning_rate": "1.6564e-05", "loss": 0.6734, "slid_loss": 0.636, "step": 4762, "time": 113.47 }, { "epoch": 3.69, "learning_rate": "1.6557e-05", "loss": 0.6503, "slid_loss": 0.6362, "step": 4763, "time": 95.21 }, { "epoch": 3.69, "learning_rate": "1.6549e-05", "loss": 0.6219, "slid_loss": 0.636, "step": 4764, "time": 122.54 }, { "epoch": 3.69, "learning_rate": "1.6542e-05", "loss": 0.6275, "slid_loss": 0.6358, "step": 4765, "time": 124.77 }, { "epoch": 3.69, "learning_rate": "1.6535e-05", "loss": 0.6378, "slid_loss": 0.6357, "step": 4766, "time": 139.95 }, { "epoch": 3.69, "learning_rate": "1.6528e-05", "loss": 0.6433, "slid_loss": 0.6356, "step": 4767, "time": 159.69 }, { "epoch": 3.69, "learning_rate": "1.6520e-05", "loss": 0.6409, "slid_loss": 0.6355, "step": 4768, "time": 122.38 }, { "epoch": 3.69, "learning_rate": "1.6513e-05", "loss": 0.6678, "slid_loss": 0.6358, "step": 4769, "time": 170.08 }, { "epoch": 3.69, "learning_rate": "1.6506e-05", "loss": 0.6382, "slid_loss": 0.6356, "step": 4770, "time": 146.76 }, { "epoch": 3.69, "learning_rate": "1.6499e-05", "loss": 0.639, "slid_loss": 0.6358, "step": 4771, "time": 159.63 }, { "epoch": 3.69, "learning_rate": "1.6491e-05", "loss": 0.637, "slid_loss": 0.6356, "step": 4772, "time": 110.64 }, { "epoch": 3.69, "learning_rate": "1.6484e-05", "loss": 0.6436, "slid_loss": 0.6356, "step": 4773, "time": 144.71 }, { "epoch": 3.69, "learning_rate": "1.6477e-05", "loss": 0.6467, "slid_loss": 0.6357, "step": 4774, "time": 110.77 }, { "epoch": 3.69, "learning_rate": "1.6470e-05", "loss": 0.6318, "slid_loss": 0.6358, "step": 4775, "time": 108.54 }, { "epoch": 3.7, "learning_rate": "1.6462e-05", "loss": 0.6184, "slid_loss": 0.6357, "step": 4776, "time": 108.22 }, { "epoch": 3.7, "learning_rate": "1.6455e-05", "loss": 0.6142, "slid_loss": 0.6356, "step": 4777, "time": 99.94 }, { "epoch": 3.7, "learning_rate": "1.6448e-05", "loss": 0.638, "slid_loss": 0.6356, "step": 4778, "time": 84.66 }, { "epoch": 3.7, "learning_rate": "1.6441e-05", "loss": 0.6745, "slid_loss": 0.6363, "step": 4779, "time": 96.1 }, { "epoch": 3.7, "learning_rate": "1.6433e-05", "loss": 0.6433, "slid_loss": 0.6363, "step": 4780, "time": 72.48 }, { "epoch": 3.7, "learning_rate": "1.6426e-05", "loss": 0.6385, "slid_loss": 0.6363, "step": 4781, "time": 72.39 }, { "epoch": 3.7, "learning_rate": "1.6419e-05", "loss": 0.6411, "slid_loss": 0.6363, "step": 4782, "time": 83.18 }, { "epoch": 3.7, "learning_rate": "1.6412e-05", "loss": 0.6005, "slid_loss": 0.6361, "step": 4783, "time": 70.51 }, { "epoch": 3.7, "learning_rate": "1.6405e-05", "loss": 0.6321, "slid_loss": 0.6362, "step": 4784, "time": 73.2 }, { "epoch": 3.7, "learning_rate": "1.6397e-05", "loss": 0.6282, "slid_loss": 0.636, "step": 4785, "time": 72.14 }, { "epoch": 3.7, "learning_rate": "1.6390e-05", "loss": 0.6607, "slid_loss": 0.6361, "step": 4786, "time": 72.69 }, { "epoch": 3.7, "learning_rate": "1.6383e-05", "loss": 0.5743, "slid_loss": 0.6356, "step": 4787, "time": 71.1 }, { "epoch": 3.7, "learning_rate": "1.6376e-05", "loss": 0.6467, "slid_loss": 0.6357, "step": 4788, "time": 72.27 }, { "epoch": 3.71, "learning_rate": "1.6369e-05", "loss": 0.5719, "slid_loss": 0.6351, "step": 4789, "time": 72.4 }, { "epoch": 3.71, "learning_rate": "1.6361e-05", "loss": 0.6033, "slid_loss": 0.6346, "step": 4790, "time": 72.58 }, { "epoch": 3.71, "learning_rate": "1.6354e-05", "loss": 0.6665, "slid_loss": 0.6349, "step": 4791, "time": 71.01 }, { "epoch": 3.71, "learning_rate": "1.6347e-05", "loss": 0.646, "slid_loss": 0.6348, "step": 4792, "time": 71.29 }, { "epoch": 3.71, "learning_rate": "1.6340e-05", "loss": 0.578, "slid_loss": 0.6342, "step": 4793, "time": 72.43 }, { "epoch": 3.71, "learning_rate": "1.6333e-05", "loss": 0.6204, "slid_loss": 0.6341, "step": 4794, "time": 72.38 }, { "epoch": 3.71, "learning_rate": "1.6326e-05", "loss": 0.6643, "slid_loss": 0.6348, "step": 4795, "time": 70.53 }, { "epoch": 3.71, "learning_rate": "1.6318e-05", "loss": 0.6502, "slid_loss": 0.635, "step": 4796, "time": 72.03 }, { "epoch": 3.71, "learning_rate": "1.6311e-05", "loss": 0.6032, "slid_loss": 0.6348, "step": 4797, "time": 72.45 }, { "epoch": 3.71, "learning_rate": "1.6304e-05", "loss": 0.6142, "slid_loss": 0.6346, "step": 4798, "time": 72.1 }, { "epoch": 3.71, "learning_rate": "1.6297e-05", "loss": 0.6347, "slid_loss": 0.6343, "step": 4799, "time": 73.22 }, { "epoch": 3.71, "learning_rate": "1.6290e-05", "loss": 0.5959, "slid_loss": 0.6337, "step": 4800, "time": 72.92 }, { "epoch": 3.71, "learning_rate": "1.6283e-05", "loss": 0.5985, "slid_loss": 0.6331, "step": 4801, "time": 760.58 }, { "epoch": 3.72, "learning_rate": "1.6275e-05", "loss": 0.6534, "slid_loss": 0.6335, "step": 4802, "time": 72.75 }, { "epoch": 3.72, "learning_rate": "1.6268e-05", "loss": 0.6519, "slid_loss": 0.6338, "step": 4803, "time": 71.58 }, { "epoch": 3.72, "learning_rate": "1.6261e-05", "loss": 0.5969, "slid_loss": 0.6337, "step": 4804, "time": 70.9 }, { "epoch": 3.72, "learning_rate": "1.6254e-05", "loss": 0.6245, "slid_loss": 0.6335, "step": 4805, "time": 71.78 }, { "epoch": 3.72, "learning_rate": "1.6247e-05", "loss": 0.6266, "slid_loss": 0.6337, "step": 4806, "time": 72.6 }, { "epoch": 3.72, "learning_rate": "1.6240e-05", "loss": 0.6294, "slid_loss": 0.6336, "step": 4807, "time": 71.16 }, { "epoch": 3.72, "learning_rate": "1.6233e-05", "loss": 0.6446, "slid_loss": 0.6338, "step": 4808, "time": 72.5 }, { "epoch": 3.72, "learning_rate": "1.6225e-05", "loss": 0.5972, "slid_loss": 0.6331, "step": 4809, "time": 71.41 }, { "epoch": 3.72, "learning_rate": "1.6218e-05", "loss": 0.6331, "slid_loss": 0.633, "step": 4810, "time": 71.81 }, { "epoch": 3.72, "learning_rate": "1.6211e-05", "loss": 0.5745, "slid_loss": 0.6321, "step": 4811, "time": 71.66 }, { "epoch": 3.72, "learning_rate": "1.6204e-05", "loss": 0.6162, "slid_loss": 0.6319, "step": 4812, "time": 71.29 }, { "epoch": 3.72, "learning_rate": "1.6197e-05", "loss": 0.5836, "slid_loss": 0.6317, "step": 4813, "time": 71.85 }, { "epoch": 3.72, "learning_rate": "1.6190e-05", "loss": 0.6358, "slid_loss": 0.6317, "step": 4814, "time": 70.1 }, { "epoch": 3.73, "learning_rate": "1.6183e-05", "loss": 0.6231, "slid_loss": 0.6316, "step": 4815, "time": 72.85 }, { "epoch": 3.73, "learning_rate": "1.6176e-05", "loss": 0.6607, "slid_loss": 0.6322, "step": 4816, "time": 71.2 }, { "epoch": 3.73, "learning_rate": "1.6169e-05", "loss": 0.6602, "slid_loss": 0.6325, "step": 4817, "time": 71.79 }, { "epoch": 3.73, "learning_rate": "1.6162e-05", "loss": 0.639, "slid_loss": 0.6324, "step": 4818, "time": 72.09 }, { "epoch": 3.73, "learning_rate": "1.6154e-05", "loss": 0.6403, "slid_loss": 0.6328, "step": 4819, "time": 70.97 }, { "epoch": 3.73, "learning_rate": "1.6147e-05", "loss": 0.6624, "slid_loss": 0.6332, "step": 4820, "time": 72.45 }, { "epoch": 3.73, "learning_rate": "1.6140e-05", "loss": 0.5873, "slid_loss": 0.6329, "step": 4821, "time": 72.01 }, { "epoch": 3.73, "learning_rate": "1.6133e-05", "loss": 0.6296, "slid_loss": 0.6329, "step": 4822, "time": 71.17 }, { "epoch": 3.73, "learning_rate": "1.6126e-05", "loss": 0.6368, "slid_loss": 0.6329, "step": 4823, "time": 71.8 }, { "epoch": 3.73, "learning_rate": "1.6119e-05", "loss": 0.6288, "slid_loss": 0.6328, "step": 4824, "time": 71.19 }, { "epoch": 3.73, "learning_rate": "1.6112e-05", "loss": 0.6399, "slid_loss": 0.6327, "step": 4825, "time": 71.62 }, { "epoch": 3.73, "learning_rate": "1.6105e-05", "loss": 0.6238, "slid_loss": 0.6326, "step": 4826, "time": 71.58 }, { "epoch": 3.73, "learning_rate": "1.6098e-05", "loss": 0.6323, "slid_loss": 0.6326, "step": 4827, "time": 71.95 }, { "epoch": 3.74, "learning_rate": "1.6091e-05", "loss": 0.6246, "slid_loss": 0.6327, "step": 4828, "time": 72.42 }, { "epoch": 3.74, "learning_rate": "1.6084e-05", "loss": 0.651, "slid_loss": 0.6331, "step": 4829, "time": 70.94 }, { "epoch": 3.74, "learning_rate": "1.6077e-05", "loss": 0.6342, "slid_loss": 0.6331, "step": 4830, "time": 71.09 }, { "epoch": 3.74, "learning_rate": "1.6070e-05", "loss": 0.6401, "slid_loss": 0.6331, "step": 4831, "time": 71.46 }, { "epoch": 3.74, "learning_rate": "1.6063e-05", "loss": 0.6193, "slid_loss": 0.6329, "step": 4832, "time": 71.45 }, { "epoch": 3.74, "learning_rate": "1.6055e-05", "loss": 0.6302, "slid_loss": 0.6326, "step": 4833, "time": 72.25 }, { "epoch": 3.74, "learning_rate": "1.6048e-05", "loss": 0.6551, "slid_loss": 0.6328, "step": 4834, "time": 71.35 }, { "epoch": 3.74, "learning_rate": "1.6041e-05", "loss": 0.5829, "slid_loss": 0.6324, "step": 4835, "time": 70.93 }, { "epoch": 3.74, "learning_rate": "1.6034e-05", "loss": 0.6474, "slid_loss": 0.6323, "step": 4836, "time": 71.73 }, { "epoch": 3.74, "learning_rate": "1.6027e-05", "loss": 0.6295, "slid_loss": 0.6325, "step": 4837, "time": 72.17 }, { "epoch": 3.74, "learning_rate": "1.6020e-05", "loss": 0.6279, "slid_loss": 0.6322, "step": 4838, "time": 72.17 }, { "epoch": 3.74, "learning_rate": "1.6013e-05", "loss": 0.6135, "slid_loss": 0.6318, "step": 4839, "time": 72.45 }, { "epoch": 3.74, "learning_rate": "1.6006e-05", "loss": 0.6187, "slid_loss": 0.6321, "step": 4840, "time": 71.89 }, { "epoch": 3.75, "learning_rate": "1.5999e-05", "loss": 0.6436, "slid_loss": 0.6316, "step": 4841, "time": 71.96 }, { "epoch": 3.75, "learning_rate": "1.5992e-05", "loss": 0.6414, "slid_loss": 0.6313, "step": 4842, "time": 70.56 }, { "epoch": 3.75, "learning_rate": "1.5985e-05", "loss": 0.6281, "slid_loss": 0.6313, "step": 4843, "time": 71.82 }, { "epoch": 3.75, "learning_rate": "1.5978e-05", "loss": 0.6273, "slid_loss": 0.6315, "step": 4844, "time": 72.15 }, { "epoch": 3.75, "learning_rate": "1.5971e-05", "loss": 0.6181, "slid_loss": 0.6313, "step": 4845, "time": 71.82 }, { "epoch": 3.75, "learning_rate": "1.5964e-05", "loss": 0.6336, "slid_loss": 0.631, "step": 4846, "time": 72.06 }, { "epoch": 3.75, "learning_rate": "1.5957e-05", "loss": 0.623, "slid_loss": 0.6303, "step": 4847, "time": 71.15 }, { "epoch": 3.75, "learning_rate": "1.5950e-05", "loss": 0.6635, "slid_loss": 0.6305, "step": 4848, "time": 72.17 }, { "epoch": 3.75, "learning_rate": "1.5943e-05", "loss": 0.6457, "slid_loss": 0.631, "step": 4849, "time": 70.8 }, { "epoch": 3.75, "learning_rate": "1.5936e-05", "loss": 0.6343, "slid_loss": 0.6314, "step": 4850, "time": 70.54 }, { "epoch": 3.75, "learning_rate": "1.5929e-05", "loss": 0.6282, "slid_loss": 0.6314, "step": 4851, "time": 71.64 }, { "epoch": 3.75, "learning_rate": "1.5922e-05", "loss": 0.6218, "slid_loss": 0.631, "step": 4852, "time": 73.24 }, { "epoch": 3.75, "learning_rate": "1.5915e-05", "loss": 0.6423, "slid_loss": 0.6314, "step": 4853, "time": 71.01 }, { "epoch": 3.76, "learning_rate": "1.5908e-05", "loss": 0.6334, "slid_loss": 0.6313, "step": 4854, "time": 73.02 }, { "epoch": 3.76, "learning_rate": "1.5901e-05", "loss": 0.6292, "slid_loss": 0.6311, "step": 4855, "time": 72.19 }, { "epoch": 3.76, "learning_rate": "1.5894e-05", "loss": 0.6449, "slid_loss": 0.6313, "step": 4856, "time": 72.08 }, { "epoch": 3.76, "learning_rate": "1.5887e-05", "loss": 0.6287, "slid_loss": 0.631, "step": 4857, "time": 71.95 }, { "epoch": 3.76, "learning_rate": "1.5880e-05", "loss": 0.6123, "slid_loss": 0.6306, "step": 4858, "time": 72.11 }, { "epoch": 3.76, "learning_rate": "1.5874e-05", "loss": 0.6267, "slid_loss": 0.6306, "step": 4859, "time": 72.03 }, { "epoch": 3.76, "learning_rate": "1.5867e-05", "loss": 0.635, "slid_loss": 0.6307, "step": 4860, "time": 71.13 }, { "epoch": 3.76, "learning_rate": "1.5860e-05", "loss": 0.5951, "slid_loss": 0.6302, "step": 4861, "time": 70.87 }, { "epoch": 3.76, "learning_rate": "1.5853e-05", "loss": 0.6327, "slid_loss": 0.6298, "step": 4862, "time": 71.56 }, { "epoch": 3.76, "learning_rate": "1.5846e-05", "loss": 0.6282, "slid_loss": 0.6296, "step": 4863, "time": 72.31 }, { "epoch": 3.76, "learning_rate": "1.5839e-05", "loss": 0.6244, "slid_loss": 0.6296, "step": 4864, "time": 71.16 }, { "epoch": 3.76, "learning_rate": "1.5832e-05", "loss": 0.6129, "slid_loss": 0.6294, "step": 4865, "time": 71.48 }, { "epoch": 3.76, "learning_rate": "1.5825e-05", "loss": 0.6432, "slid_loss": 0.6295, "step": 4866, "time": 71.86 }, { "epoch": 3.77, "learning_rate": "1.5818e-05", "loss": 0.6397, "slid_loss": 0.6295, "step": 4867, "time": 71.89 }, { "epoch": 3.77, "learning_rate": "1.5811e-05", "loss": 0.6224, "slid_loss": 0.6293, "step": 4868, "time": 71.91 }, { "epoch": 3.77, "learning_rate": "1.5804e-05", "loss": 0.6314, "slid_loss": 0.6289, "step": 4869, "time": 71.68 }, { "epoch": 3.77, "learning_rate": "1.5797e-05", "loss": 0.6279, "slid_loss": 0.6288, "step": 4870, "time": 71.43 }, { "epoch": 3.77, "learning_rate": "1.5790e-05", "loss": 0.6226, "slid_loss": 0.6286, "step": 4871, "time": 70.06 }, { "epoch": 3.77, "learning_rate": "1.5783e-05", "loss": 0.6314, "slid_loss": 0.6286, "step": 4872, "time": 71.94 }, { "epoch": 3.77, "learning_rate": "1.5776e-05", "loss": 0.6012, "slid_loss": 0.6282, "step": 4873, "time": 72.04 }, { "epoch": 3.77, "learning_rate": "1.5770e-05", "loss": 0.6289, "slid_loss": 0.628, "step": 4874, "time": 71.81 }, { "epoch": 3.77, "learning_rate": "1.5763e-05", "loss": 0.6418, "slid_loss": 0.6281, "step": 4875, "time": 71.02 }, { "epoch": 3.77, "learning_rate": "1.5756e-05", "loss": 0.6231, "slid_loss": 0.6281, "step": 4876, "time": 72.1 }, { "epoch": 3.77, "learning_rate": "1.5749e-05", "loss": 0.6333, "slid_loss": 0.6283, "step": 4877, "time": 71.97 }, { "epoch": 3.77, "learning_rate": "1.5742e-05", "loss": 0.6326, "slid_loss": 0.6283, "step": 4878, "time": 71.68 }, { "epoch": 3.77, "learning_rate": "1.5735e-05", "loss": 0.6436, "slid_loss": 0.628, "step": 4879, "time": 73.25 }, { "epoch": 3.78, "learning_rate": "1.5728e-05", "loss": 0.5881, "slid_loss": 0.6274, "step": 4880, "time": 71.42 }, { "epoch": 3.78, "learning_rate": "1.5721e-05", "loss": 0.6329, "slid_loss": 0.6274, "step": 4881, "time": 72.16 }, { "epoch": 3.78, "learning_rate": "1.5714e-05", "loss": 0.6136, "slid_loss": 0.6271, "step": 4882, "time": 70.93 }, { "epoch": 3.78, "learning_rate": "1.5708e-05", "loss": 0.6019, "slid_loss": 0.6271, "step": 4883, "time": 71.49 }, { "epoch": 3.78, "learning_rate": "1.5701e-05", "loss": 0.6297, "slid_loss": 0.6271, "step": 4884, "time": 71.35 }, { "epoch": 3.78, "learning_rate": "1.5694e-05", "loss": 0.6015, "slid_loss": 0.6268, "step": 4885, "time": 72.47 }, { "epoch": 3.78, "learning_rate": "1.5687e-05", "loss": 0.5963, "slid_loss": 0.6262, "step": 4886, "time": 72.85 }, { "epoch": 3.78, "learning_rate": "1.5680e-05", "loss": 0.6579, "slid_loss": 0.627, "step": 4887, "time": 70.48 }, { "epoch": 3.78, "learning_rate": "1.5673e-05", "loss": 0.6717, "slid_loss": 0.6272, "step": 4888, "time": 70.67 }, { "epoch": 3.78, "learning_rate": "1.5666e-05", "loss": 0.633, "slid_loss": 0.6279, "step": 4889, "time": 71.95 }, { "epoch": 3.78, "learning_rate": "1.5660e-05", "loss": 0.6533, "slid_loss": 0.6284, "step": 4890, "time": 72.12 }, { "epoch": 3.78, "learning_rate": "1.5653e-05", "loss": 0.5987, "slid_loss": 0.6277, "step": 4891, "time": 71.07 }, { "epoch": 3.78, "learning_rate": "1.5646e-05", "loss": 0.6386, "slid_loss": 0.6276, "step": 4892, "time": 71.44 }, { "epoch": 3.79, "learning_rate": "1.5639e-05", "loss": 0.6193, "slid_loss": 0.628, "step": 4893, "time": 72.49 }, { "epoch": 3.79, "learning_rate": "1.5632e-05", "loss": 0.5991, "slid_loss": 0.6278, "step": 4894, "time": 71.8 }, { "epoch": 3.79, "learning_rate": "1.5625e-05", "loss": 0.6332, "slid_loss": 0.6275, "step": 4895, "time": 71.97 }, { "epoch": 3.79, "learning_rate": "1.5619e-05", "loss": 0.6257, "slid_loss": 0.6272, "step": 4896, "time": 71.79 }, { "epoch": 3.79, "learning_rate": "1.5612e-05", "loss": 0.6607, "slid_loss": 0.6278, "step": 4897, "time": 72.45 }, { "epoch": 3.79, "learning_rate": "1.5605e-05", "loss": 0.652, "slid_loss": 0.6282, "step": 4898, "time": 72.21 }, { "epoch": 3.79, "learning_rate": "1.5598e-05", "loss": 0.6388, "slid_loss": 0.6282, "step": 4899, "time": 70.91 }, { "epoch": 3.79, "learning_rate": "1.5591e-05", "loss": 0.611, "slid_loss": 0.6284, "step": 4900, "time": 71.02 }, { "epoch": 3.79, "learning_rate": "1.5584e-05", "loss": 0.6295, "slid_loss": 0.6287, "step": 4901, "time": 71.44 }, { "epoch": 3.79, "learning_rate": "1.5578e-05", "loss": 0.6181, "slid_loss": 0.6283, "step": 4902, "time": 72.21 }, { "epoch": 3.79, "learning_rate": "1.5571e-05", "loss": 0.6091, "slid_loss": 0.6279, "step": 4903, "time": 72.8 }, { "epoch": 3.79, "learning_rate": "1.5564e-05", "loss": 0.6035, "slid_loss": 0.628, "step": 4904, "time": 71.49 }, { "epoch": 3.79, "learning_rate": "1.5557e-05", "loss": 0.634, "slid_loss": 0.6281, "step": 4905, "time": 71.38 }, { "epoch": 3.8, "learning_rate": "1.5550e-05", "loss": 0.6605, "slid_loss": 0.6284, "step": 4906, "time": 73.12 }, { "epoch": 3.8, "learning_rate": "1.5544e-05", "loss": 0.6191, "slid_loss": 0.6283, "step": 4907, "time": 71.48 }, { "epoch": 3.8, "learning_rate": "1.5537e-05", "loss": 0.6389, "slid_loss": 0.6283, "step": 4908, "time": 71.35 }, { "epoch": 3.8, "learning_rate": "1.5530e-05", "loss": 0.6374, "slid_loss": 0.6287, "step": 4909, "time": 73.05 }, { "epoch": 3.8, "learning_rate": "1.5523e-05", "loss": 0.5931, "slid_loss": 0.6283, "step": 4910, "time": 72.81 }, { "epoch": 3.8, "learning_rate": "1.5516e-05", "loss": 0.5952, "slid_loss": 0.6285, "step": 4911, "time": 84.84 }, { "epoch": 3.8, "learning_rate": "1.5510e-05", "loss": 0.6142, "slid_loss": 0.6285, "step": 4912, "time": 71.11 }, { "epoch": 3.8, "learning_rate": "1.5503e-05", "loss": 0.658, "slid_loss": 0.6292, "step": 4913, "time": 71.86 }, { "epoch": 3.8, "learning_rate": "1.5496e-05", "loss": 0.6577, "slid_loss": 0.6294, "step": 4914, "time": 72.36 }, { "epoch": 3.8, "learning_rate": "1.5489e-05", "loss": 0.6489, "slid_loss": 0.6297, "step": 4915, "time": 84.55 }, { "epoch": 3.8, "learning_rate": "1.5483e-05", "loss": 0.6393, "slid_loss": 0.6295, "step": 4916, "time": 71.84 }, { "epoch": 3.8, "learning_rate": "1.5476e-05", "loss": 0.6138, "slid_loss": 0.629, "step": 4917, "time": 82.51 }, { "epoch": 3.81, "learning_rate": "1.5469e-05", "loss": 0.5806, "slid_loss": 0.6284, "step": 4918, "time": 96.09 }, { "epoch": 3.81, "learning_rate": "1.5462e-05", "loss": 0.6088, "slid_loss": 0.6281, "step": 4919, "time": 96.68 }, { "epoch": 3.81, "learning_rate": "1.5456e-05", "loss": 0.5998, "slid_loss": 0.6275, "step": 4920, "time": 96.29 }, { "epoch": 3.81, "learning_rate": "1.5449e-05", "loss": 0.6425, "slid_loss": 0.628, "step": 4921, "time": 85.22 }, { "epoch": 3.81, "learning_rate": "1.5442e-05", "loss": 0.6412, "slid_loss": 0.6281, "step": 4922, "time": 111.45 }, { "epoch": 3.81, "learning_rate": "1.5435e-05", "loss": 0.6014, "slid_loss": 0.6278, "step": 4923, "time": 98.18 }, { "epoch": 3.81, "learning_rate": "1.5429e-05", "loss": 0.6194, "slid_loss": 0.6277, "step": 4924, "time": 147.64 }, { "epoch": 3.81, "learning_rate": "1.5422e-05", "loss": 0.6545, "slid_loss": 0.6278, "step": 4925, "time": 158.29 }, { "epoch": 3.81, "learning_rate": "1.5415e-05", "loss": 0.6466, "slid_loss": 0.6281, "step": 4926, "time": 136.51 }, { "epoch": 3.81, "learning_rate": "1.5409e-05", "loss": 0.6098, "slid_loss": 0.6278, "step": 4927, "time": 173.15 }, { "epoch": 3.81, "learning_rate": "1.5402e-05", "loss": 0.6232, "slid_loss": 0.6278, "step": 4928, "time": 149.14 }, { "epoch": 3.81, "learning_rate": "1.5395e-05", "loss": 0.6411, "slid_loss": 0.6277, "step": 4929, "time": 134.49 }, { "epoch": 3.81, "learning_rate": "1.5388e-05", "loss": 0.6317, "slid_loss": 0.6277, "step": 4930, "time": 119.43 }, { "epoch": 3.82, "learning_rate": "1.5382e-05", "loss": 0.6123, "slid_loss": 0.6274, "step": 4931, "time": 134.28 }, { "epoch": 3.82, "learning_rate": "1.5375e-05", "loss": 0.6164, "slid_loss": 0.6274, "step": 4932, "time": 134.76 }, { "epoch": 3.82, "learning_rate": "1.5368e-05", "loss": 0.6505, "slid_loss": 0.6276, "step": 4933, "time": 124.09 }, { "epoch": 3.82, "learning_rate": "1.5362e-05", "loss": 0.6304, "slid_loss": 0.6273, "step": 4934, "time": 108.96 }, { "epoch": 3.82, "learning_rate": "1.5355e-05", "loss": 0.6167, "slid_loss": 0.6277, "step": 4935, "time": 84.7 }, { "epoch": 3.82, "learning_rate": "1.5348e-05", "loss": 0.6092, "slid_loss": 0.6273, "step": 4936, "time": 82.55 }, { "epoch": 3.82, "learning_rate": "1.5342e-05", "loss": 0.6392, "slid_loss": 0.6274, "step": 4937, "time": 109.63 }, { "epoch": 3.82, "learning_rate": "1.5335e-05", "loss": 0.6076, "slid_loss": 0.6272, "step": 4938, "time": 95.43 }, { "epoch": 3.82, "learning_rate": "1.5328e-05", "loss": 0.6373, "slid_loss": 0.6274, "step": 4939, "time": 72.52 }, { "epoch": 3.82, "learning_rate": "1.5321e-05", "loss": 0.6049, "slid_loss": 0.6273, "step": 4940, "time": 71.52 }, { "epoch": 3.82, "learning_rate": "1.5315e-05", "loss": 0.682, "slid_loss": 0.6277, "step": 4941, "time": 83.3 }, { "epoch": 3.82, "learning_rate": "1.5308e-05", "loss": 0.5997, "slid_loss": 0.6273, "step": 4942, "time": 73.48 }, { "epoch": 3.82, "learning_rate": "1.5301e-05", "loss": 0.6251, "slid_loss": 0.6272, "step": 4943, "time": 72.83 }, { "epoch": 3.83, "learning_rate": "1.5295e-05", "loss": 0.5966, "slid_loss": 0.6269, "step": 4944, "time": 71.03 }, { "epoch": 3.83, "learning_rate": "1.5288e-05", "loss": 0.6024, "slid_loss": 0.6268, "step": 4945, "time": 71.55 }, { "epoch": 3.83, "learning_rate": "1.5281e-05", "loss": 0.588, "slid_loss": 0.6263, "step": 4946, "time": 72.39 }, { "epoch": 3.83, "learning_rate": "1.5275e-05", "loss": 0.6261, "slid_loss": 0.6263, "step": 4947, "time": 71.69 }, { "epoch": 3.83, "learning_rate": "1.5268e-05", "loss": 0.6092, "slid_loss": 0.6258, "step": 4948, "time": 70.75 }, { "epoch": 3.83, "learning_rate": "1.5262e-05", "loss": 0.6325, "slid_loss": 0.6257, "step": 4949, "time": 71.33 }, { "epoch": 3.83, "learning_rate": "1.5255e-05", "loss": 0.6461, "slid_loss": 0.6258, "step": 4950, "time": 72.75 }, { "epoch": 3.83, "learning_rate": "1.5248e-05", "loss": 0.5921, "slid_loss": 0.6254, "step": 4951, "time": 71.68 }, { "epoch": 3.83, "learning_rate": "1.5242e-05", "loss": 0.6009, "slid_loss": 0.6252, "step": 4952, "time": 71.43 }, { "epoch": 3.83, "learning_rate": "1.5235e-05", "loss": 0.6482, "slid_loss": 0.6253, "step": 4953, "time": 70.53 }, { "epoch": 3.83, "learning_rate": "1.5228e-05", "loss": 0.6185, "slid_loss": 0.6251, "step": 4954, "time": 71.54 }, { "epoch": 3.83, "learning_rate": "1.5222e-05", "loss": 0.6092, "slid_loss": 0.6249, "step": 4955, "time": 71.94 }, { "epoch": 3.83, "learning_rate": "1.5215e-05", "loss": 0.6115, "slid_loss": 0.6246, "step": 4956, "time": 71.62 }, { "epoch": 3.84, "learning_rate": "1.5209e-05", "loss": 0.6095, "slid_loss": 0.6244, "step": 4957, "time": 71.53 }, { "epoch": 3.84, "learning_rate": "1.5202e-05", "loss": 0.6351, "slid_loss": 0.6246, "step": 4958, "time": 72.23 }, { "epoch": 3.84, "learning_rate": "1.5195e-05", "loss": 0.6578, "slid_loss": 0.6249, "step": 4959, "time": 72.48 }, { "epoch": 3.84, "learning_rate": "1.5189e-05", "loss": 0.6031, "slid_loss": 0.6246, "step": 4960, "time": 71.43 }, { "epoch": 3.84, "learning_rate": "1.5182e-05", "loss": 0.6171, "slid_loss": 0.6248, "step": 4961, "time": 72.55 }, { "epoch": 3.84, "learning_rate": "1.5176e-05", "loss": 0.6248, "slid_loss": 0.6248, "step": 4962, "time": 71.16 }, { "epoch": 3.84, "learning_rate": "1.5169e-05", "loss": 0.5999, "slid_loss": 0.6245, "step": 4963, "time": 72.44 }, { "epoch": 3.84, "learning_rate": "1.5162e-05", "loss": 0.6508, "slid_loss": 0.6247, "step": 4964, "time": 71.32 }, { "epoch": 3.84, "learning_rate": "1.5156e-05", "loss": 0.6338, "slid_loss": 0.625, "step": 4965, "time": 71.21 }, { "epoch": 3.84, "learning_rate": "1.5149e-05", "loss": 0.6603, "slid_loss": 0.6251, "step": 4966, "time": 73.81 }, { "epoch": 3.84, "learning_rate": "1.5143e-05", "loss": 0.6167, "slid_loss": 0.6249, "step": 4967, "time": 71.79 }, { "epoch": 3.84, "learning_rate": "1.5136e-05", "loss": 0.6218, "slid_loss": 0.6249, "step": 4968, "time": 71.82 }, { "epoch": 3.84, "learning_rate": "1.5129e-05", "loss": 0.6011, "slid_loss": 0.6246, "step": 4969, "time": 71.41 }, { "epoch": 3.85, "learning_rate": "1.5123e-05", "loss": 0.6186, "slid_loss": 0.6245, "step": 4970, "time": 71.49 }, { "epoch": 3.85, "learning_rate": "1.5116e-05", "loss": 0.6495, "slid_loss": 0.6248, "step": 4971, "time": 70.55 }, { "epoch": 3.85, "learning_rate": "1.5110e-05", "loss": 0.6411, "slid_loss": 0.6249, "step": 4972, "time": 71.18 }, { "epoch": 3.85, "learning_rate": "1.5103e-05", "loss": 0.628, "slid_loss": 0.6251, "step": 4973, "time": 71.35 }, { "epoch": 3.85, "learning_rate": "1.5097e-05", "loss": 0.6368, "slid_loss": 0.6252, "step": 4974, "time": 70.91 }, { "epoch": 3.85, "learning_rate": "1.5090e-05", "loss": 0.6149, "slid_loss": 0.6249, "step": 4975, "time": 72.45 }, { "epoch": 3.85, "learning_rate": "1.5084e-05", "loss": 0.5865, "slid_loss": 0.6246, "step": 4976, "time": 71.3 }, { "epoch": 3.85, "learning_rate": "1.5077e-05", "loss": 0.6192, "slid_loss": 0.6244, "step": 4977, "time": 72.15 }, { "epoch": 3.85, "learning_rate": "1.5070e-05", "loss": 0.5879, "slid_loss": 0.624, "step": 4978, "time": 71.09 }, { "epoch": 3.85, "learning_rate": "1.5064e-05", "loss": 0.6145, "slid_loss": 0.6237, "step": 4979, "time": 71.65 }, { "epoch": 3.85, "learning_rate": "1.5057e-05", "loss": 0.6265, "slid_loss": 0.6241, "step": 4980, "time": 72.05 }, { "epoch": 3.85, "learning_rate": "1.5051e-05", "loss": 0.6646, "slid_loss": 0.6244, "step": 4981, "time": 71.54 }, { "epoch": 3.85, "learning_rate": "1.5044e-05", "loss": 0.6145, "slid_loss": 0.6244, "step": 4982, "time": 72.69 }, { "epoch": 3.86, "learning_rate": "1.5038e-05", "loss": 0.6773, "slid_loss": 0.6252, "step": 4983, "time": 72.45 }, { "epoch": 3.86, "learning_rate": "1.5031e-05", "loss": 0.6477, "slid_loss": 0.6253, "step": 4984, "time": 71.31 }, { "epoch": 3.86, "learning_rate": "1.5025e-05", "loss": 0.5949, "slid_loss": 0.6253, "step": 4985, "time": 71.87 }, { "epoch": 3.86, "learning_rate": "1.5018e-05", "loss": 0.6361, "slid_loss": 0.6257, "step": 4986, "time": 72.49 }, { "epoch": 3.86, "learning_rate": "1.5012e-05", "loss": 0.6482, "slid_loss": 0.6256, "step": 4987, "time": 72.55 }, { "epoch": 3.86, "learning_rate": "1.5005e-05", "loss": 0.644, "slid_loss": 0.6253, "step": 4988, "time": 71.62 }, { "epoch": 3.86, "learning_rate": "1.4999e-05", "loss": 0.6236, "slid_loss": 0.6252, "step": 4989, "time": 72.33 }, { "epoch": 3.86, "learning_rate": "1.4992e-05", "loss": 0.627, "slid_loss": 0.6249, "step": 4990, "time": 72.46 }, { "epoch": 3.86, "learning_rate": "1.4986e-05", "loss": 0.6278, "slid_loss": 0.6252, "step": 4991, "time": 72.37 }, { "epoch": 3.86, "learning_rate": "1.4979e-05", "loss": 0.6392, "slid_loss": 0.6252, "step": 4992, "time": 70.87 }, { "epoch": 3.86, "learning_rate": "1.4973e-05", "loss": 0.6104, "slid_loss": 0.6251, "step": 4993, "time": 71.49 }, { "epoch": 3.86, "learning_rate": "1.4966e-05", "loss": 0.6412, "slid_loss": 0.6256, "step": 4994, "time": 71.82 }, { "epoch": 3.86, "learning_rate": "1.4960e-05", "loss": 0.6548, "slid_loss": 0.6258, "step": 4995, "time": 72.62 }, { "epoch": 3.87, "learning_rate": "1.4953e-05", "loss": 0.6063, "slid_loss": 0.6256, "step": 4996, "time": 71.21 }, { "epoch": 3.87, "learning_rate": "1.4947e-05", "loss": 0.6334, "slid_loss": 0.6253, "step": 4997, "time": 71.34 }, { "epoch": 3.87, "learning_rate": "1.4940e-05", "loss": 0.6571, "slid_loss": 0.6254, "step": 4998, "time": 71.98 }, { "epoch": 3.87, "learning_rate": "1.4934e-05", "loss": 0.6547, "slid_loss": 0.6255, "step": 4999, "time": 70.0 }, { "epoch": 3.87, "learning_rate": "1.4927e-05", "loss": 0.6211, "slid_loss": 0.6256, "step": 5000, "time": 71.5 }, { "epoch": 3.87, "learning_rate": "1.4921e-05", "loss": 0.6595, "slid_loss": 0.6259, "step": 5001, "time": 844.15 }, { "epoch": 3.87, "learning_rate": "1.4915e-05", "loss": 0.6219, "slid_loss": 0.626, "step": 5002, "time": 72.34 }, { "epoch": 3.87, "learning_rate": "1.4908e-05", "loss": 0.6071, "slid_loss": 0.6259, "step": 5003, "time": 74.72 }, { "epoch": 3.87, "learning_rate": "1.4902e-05", "loss": 0.642, "slid_loss": 0.6263, "step": 5004, "time": 71.44 }, { "epoch": 3.87, "learning_rate": "1.4895e-05", "loss": 0.6199, "slid_loss": 0.6262, "step": 5005, "time": 71.03 }, { "epoch": 3.87, "learning_rate": "1.4889e-05", "loss": 0.6707, "slid_loss": 0.6263, "step": 5006, "time": 72.64 }, { "epoch": 3.87, "learning_rate": "1.4882e-05", "loss": 0.6257, "slid_loss": 0.6264, "step": 5007, "time": 71.46 }, { "epoch": 3.87, "learning_rate": "1.4876e-05", "loss": 0.5855, "slid_loss": 0.6258, "step": 5008, "time": 72.03 }, { "epoch": 3.88, "learning_rate": "1.4870e-05", "loss": 0.6496, "slid_loss": 0.6259, "step": 5009, "time": 70.49 }, { "epoch": 3.88, "learning_rate": "1.4863e-05", "loss": 0.6415, "slid_loss": 0.6264, "step": 5010, "time": 71.43 }, { "epoch": 3.88, "learning_rate": "1.4857e-05", "loss": 0.6313, "slid_loss": 0.6268, "step": 5011, "time": 71.17 }, { "epoch": 3.88, "learning_rate": "1.4850e-05", "loss": 0.5929, "slid_loss": 0.6266, "step": 5012, "time": 71.42 }, { "epoch": 3.88, "learning_rate": "1.4844e-05", "loss": 0.6241, "slid_loss": 0.6262, "step": 5013, "time": 71.49 }, { "epoch": 3.88, "learning_rate": "1.4837e-05", "loss": 0.6184, "slid_loss": 0.6258, "step": 5014, "time": 72.39 }, { "epoch": 3.88, "learning_rate": "1.4831e-05", "loss": 0.6441, "slid_loss": 0.6258, "step": 5015, "time": 71.52 }, { "epoch": 3.88, "learning_rate": "1.4825e-05", "loss": 0.6461, "slid_loss": 0.6259, "step": 5016, "time": 70.99 }, { "epoch": 3.88, "learning_rate": "1.4818e-05", "loss": 0.6582, "slid_loss": 0.6263, "step": 5017, "time": 72.46 }, { "epoch": 3.88, "learning_rate": "1.4812e-05", "loss": 0.6353, "slid_loss": 0.6269, "step": 5018, "time": 71.56 }, { "epoch": 3.88, "learning_rate": "1.4805e-05", "loss": 0.6506, "slid_loss": 0.6273, "step": 5019, "time": 71.92 }, { "epoch": 3.88, "learning_rate": "1.4799e-05", "loss": 0.6366, "slid_loss": 0.6276, "step": 5020, "time": 72.1 }, { "epoch": 3.88, "learning_rate": "1.4793e-05", "loss": 0.6293, "slid_loss": 0.6275, "step": 5021, "time": 71.56 }, { "epoch": 3.89, "learning_rate": "1.4786e-05", "loss": 0.617, "slid_loss": 0.6273, "step": 5022, "time": 70.43 }, { "epoch": 3.89, "learning_rate": "1.4780e-05", "loss": 0.6336, "slid_loss": 0.6276, "step": 5023, "time": 72.06 }, { "epoch": 3.89, "learning_rate": "1.4774e-05", "loss": 0.6227, "slid_loss": 0.6276, "step": 5024, "time": 71.2 }, { "epoch": 3.89, "learning_rate": "1.4767e-05", "loss": 0.6257, "slid_loss": 0.6273, "step": 5025, "time": 73.49 }, { "epoch": 3.89, "learning_rate": "1.4761e-05", "loss": 0.6382, "slid_loss": 0.6272, "step": 5026, "time": 71.23 }, { "epoch": 3.89, "learning_rate": "1.4754e-05", "loss": 0.704, "slid_loss": 0.6282, "step": 5027, "time": 71.37 }, { "epoch": 3.89, "learning_rate": "1.4748e-05", "loss": 0.6485, "slid_loss": 0.6284, "step": 5028, "time": 71.94 }, { "epoch": 3.89, "learning_rate": "1.4742e-05", "loss": 0.6155, "slid_loss": 0.6282, "step": 5029, "time": 71.03 }, { "epoch": 3.89, "learning_rate": "1.4735e-05", "loss": 0.6293, "slid_loss": 0.6282, "step": 5030, "time": 70.66 }, { "epoch": 3.89, "learning_rate": "1.4729e-05", "loss": 0.6366, "slid_loss": 0.6284, "step": 5031, "time": 70.88 }, { "epoch": 3.89, "learning_rate": "1.4723e-05", "loss": 0.6116, "slid_loss": 0.6284, "step": 5032, "time": 72.6 }, { "epoch": 3.89, "learning_rate": "1.4716e-05", "loss": 0.6359, "slid_loss": 0.6282, "step": 5033, "time": 72.13 }, { "epoch": 3.89, "learning_rate": "1.4710e-05", "loss": 0.6064, "slid_loss": 0.628, "step": 5034, "time": 71.28 }, { "epoch": 3.9, "learning_rate": "1.4704e-05", "loss": 0.6043, "slid_loss": 0.6279, "step": 5035, "time": 71.6 }, { "epoch": 3.9, "learning_rate": "1.4697e-05", "loss": 0.619, "slid_loss": 0.6279, "step": 5036, "time": 71.84 }, { "epoch": 3.9, "learning_rate": "1.4691e-05", "loss": 0.6298, "slid_loss": 0.6279, "step": 5037, "time": 71.93 }, { "epoch": 3.9, "learning_rate": "1.4685e-05", "loss": 0.6158, "slid_loss": 0.6279, "step": 5038, "time": 71.23 }, { "epoch": 3.9, "learning_rate": "1.4678e-05", "loss": 0.6296, "slid_loss": 0.6279, "step": 5039, "time": 71.88 }, { "epoch": 3.9, "learning_rate": "1.4672e-05", "loss": 0.6299, "slid_loss": 0.6281, "step": 5040, "time": 71.33 }, { "epoch": 3.9, "learning_rate": "1.4666e-05", "loss": 0.651, "slid_loss": 0.6278, "step": 5041, "time": 71.11 }, { "epoch": 3.9, "learning_rate": "1.4659e-05", "loss": 0.6576, "slid_loss": 0.6284, "step": 5042, "time": 72.04 }, { "epoch": 3.9, "learning_rate": "1.4653e-05", "loss": 0.6054, "slid_loss": 0.6282, "step": 5043, "time": 71.91 }, { "epoch": 3.9, "learning_rate": "1.4647e-05", "loss": 0.6248, "slid_loss": 0.6285, "step": 5044, "time": 71.88 }, { "epoch": 3.9, "learning_rate": "1.4641e-05", "loss": 0.6143, "slid_loss": 0.6286, "step": 5045, "time": 71.91 }, { "epoch": 3.9, "learning_rate": "1.4634e-05", "loss": 0.5917, "slid_loss": 0.6286, "step": 5046, "time": 71.81 }, { "epoch": 3.9, "learning_rate": "1.4628e-05", "loss": 0.5903, "slid_loss": 0.6283, "step": 5047, "time": 73.15 }, { "epoch": 3.91, "learning_rate": "1.4622e-05", "loss": 0.5913, "slid_loss": 0.6281, "step": 5048, "time": 71.33 }, { "epoch": 3.91, "learning_rate": "1.4615e-05", "loss": 0.6078, "slid_loss": 0.6278, "step": 5049, "time": 71.97 }, { "epoch": 3.91, "learning_rate": "1.4609e-05", "loss": 0.6127, "slid_loss": 0.6275, "step": 5050, "time": 71.29 }, { "epoch": 3.91, "learning_rate": "1.4603e-05", "loss": 0.6303, "slid_loss": 0.6279, "step": 5051, "time": 72.85 }, { "epoch": 3.91, "learning_rate": "1.4597e-05", "loss": 0.6441, "slid_loss": 0.6283, "step": 5052, "time": 70.57 }, { "epoch": 3.91, "learning_rate": "1.4590e-05", "loss": 0.6331, "slid_loss": 0.6282, "step": 5053, "time": 72.51 }, { "epoch": 3.91, "learning_rate": "1.4584e-05", "loss": 0.6287, "slid_loss": 0.6283, "step": 5054, "time": 72.37 }, { "epoch": 3.91, "learning_rate": "1.4578e-05", "loss": 0.5805, "slid_loss": 0.628, "step": 5055, "time": 73.0 }, { "epoch": 3.91, "learning_rate": "1.4572e-05", "loss": 0.6292, "slid_loss": 0.6282, "step": 5056, "time": 71.74 }, { "epoch": 3.91, "learning_rate": "1.4565e-05", "loss": 0.6347, "slid_loss": 0.6284, "step": 5057, "time": 71.77 }, { "epoch": 3.91, "learning_rate": "1.4559e-05", "loss": 0.65, "slid_loss": 0.6286, "step": 5058, "time": 72.11 }, { "epoch": 3.91, "learning_rate": "1.4553e-05", "loss": 0.6315, "slid_loss": 0.6283, "step": 5059, "time": 71.54 }, { "epoch": 3.91, "learning_rate": "1.4547e-05", "loss": 0.6028, "slid_loss": 0.6283, "step": 5060, "time": 72.38 }, { "epoch": 3.92, "learning_rate": "1.4540e-05", "loss": 0.652, "slid_loss": 0.6286, "step": 5061, "time": 73.54 }, { "epoch": 3.92, "learning_rate": "1.4534e-05", "loss": 0.6117, "slid_loss": 0.6285, "step": 5062, "time": 71.28 }, { "epoch": 3.92, "learning_rate": "1.4528e-05", "loss": 0.6812, "slid_loss": 0.6293, "step": 5063, "time": 71.11 }, { "epoch": 3.92, "learning_rate": "1.4522e-05", "loss": 0.6429, "slid_loss": 0.6292, "step": 5064, "time": 71.6 }, { "epoch": 3.92, "learning_rate": "1.4515e-05", "loss": 0.6361, "slid_loss": 0.6293, "step": 5065, "time": 71.64 }, { "epoch": 3.92, "learning_rate": "1.4509e-05", "loss": 0.6331, "slid_loss": 0.629, "step": 5066, "time": 71.11 }, { "epoch": 3.92, "learning_rate": "1.4503e-05", "loss": 0.6395, "slid_loss": 0.6292, "step": 5067, "time": 73.0 }, { "epoch": 3.92, "learning_rate": "1.4497e-05", "loss": 0.6679, "slid_loss": 0.6297, "step": 5068, "time": 71.6 }, { "epoch": 3.92, "learning_rate": "1.4491e-05", "loss": 0.65, "slid_loss": 0.6302, "step": 5069, "time": 70.64 }, { "epoch": 3.92, "learning_rate": "1.4484e-05", "loss": 0.6319, "slid_loss": 0.6303, "step": 5070, "time": 70.89 }, { "epoch": 3.92, "learning_rate": "1.4478e-05", "loss": 0.6236, "slid_loss": 0.63, "step": 5071, "time": 84.25 }, { "epoch": 3.92, "learning_rate": "1.4472e-05", "loss": 0.631, "slid_loss": 0.6299, "step": 5072, "time": 71.28 }, { "epoch": 3.92, "learning_rate": "1.4466e-05", "loss": 0.6227, "slid_loss": 0.6299, "step": 5073, "time": 72.17 }, { "epoch": 3.93, "learning_rate": "1.4460e-05", "loss": 0.6032, "slid_loss": 0.6296, "step": 5074, "time": 84.96 }, { "epoch": 3.93, "learning_rate": "1.4453e-05", "loss": 0.6553, "slid_loss": 0.63, "step": 5075, "time": 71.7 }, { "epoch": 3.93, "learning_rate": "1.4447e-05", "loss": 0.6172, "slid_loss": 0.6303, "step": 5076, "time": 85.78 }, { "epoch": 3.93, "learning_rate": "1.4441e-05", "loss": 0.6547, "slid_loss": 0.6306, "step": 5077, "time": 85.44 }, { "epoch": 3.93, "learning_rate": "1.4435e-05", "loss": 0.6088, "slid_loss": 0.6308, "step": 5078, "time": 97.01 }, { "epoch": 3.93, "learning_rate": "1.4429e-05", "loss": 0.6173, "slid_loss": 0.6309, "step": 5079, "time": 97.45 }, { "epoch": 3.93, "learning_rate": "1.4423e-05", "loss": 0.6424, "slid_loss": 0.631, "step": 5080, "time": 108.97 }, { "epoch": 3.93, "learning_rate": "1.4416e-05", "loss": 0.6165, "slid_loss": 0.6305, "step": 5081, "time": 110.97 }, { "epoch": 3.93, "learning_rate": "1.4410e-05", "loss": 0.6223, "slid_loss": 0.6306, "step": 5082, "time": 122.89 }, { "epoch": 3.93, "learning_rate": "1.4404e-05", "loss": 0.6635, "slid_loss": 0.6305, "step": 5083, "time": 121.28 }, { "epoch": 3.93, "learning_rate": "1.4398e-05", "loss": 0.6208, "slid_loss": 0.6302, "step": 5084, "time": 147.02 }, { "epoch": 3.93, "learning_rate": "1.4392e-05", "loss": 0.6479, "slid_loss": 0.6307, "step": 5085, "time": 134.92 }, { "epoch": 3.94, "learning_rate": "1.4386e-05", "loss": 0.6304, "slid_loss": 0.6307, "step": 5086, "time": 131.07 }, { "epoch": 3.94, "learning_rate": "1.4380e-05", "loss": 0.6191, "slid_loss": 0.6304, "step": 5087, "time": 138.06 }, { "epoch": 3.94, "learning_rate": "1.4373e-05", "loss": 0.6702, "slid_loss": 0.6306, "step": 5088, "time": 169.4 }, { "epoch": 3.94, "learning_rate": "1.4367e-05", "loss": 0.6345, "slid_loss": 0.6308, "step": 5089, "time": 107.41 }, { "epoch": 3.94, "learning_rate": "1.4361e-05", "loss": 0.5816, "slid_loss": 0.6303, "step": 5090, "time": 159.6 }, { "epoch": 3.94, "learning_rate": "1.4355e-05", "loss": 0.6166, "slid_loss": 0.6302, "step": 5091, "time": 137.08 }, { "epoch": 3.94, "learning_rate": "1.4349e-05", "loss": 0.6186, "slid_loss": 0.63, "step": 5092, "time": 134.2 }, { "epoch": 3.94, "learning_rate": "1.4343e-05", "loss": 0.5948, "slid_loss": 0.6298, "step": 5093, "time": 96.94 }, { "epoch": 3.94, "learning_rate": "1.4337e-05", "loss": 0.6383, "slid_loss": 0.6298, "step": 5094, "time": 99.62 }, { "epoch": 3.94, "learning_rate": "1.4331e-05", "loss": 0.6112, "slid_loss": 0.6294, "step": 5095, "time": 97.54 }, { "epoch": 3.94, "learning_rate": "1.4325e-05", "loss": 0.6027, "slid_loss": 0.6293, "step": 5096, "time": 95.02 }, { "epoch": 3.94, "learning_rate": "1.4318e-05", "loss": 0.5747, "slid_loss": 0.6287, "step": 5097, "time": 95.85 }, { "epoch": 3.94, "learning_rate": "1.4312e-05", "loss": 0.5683, "slid_loss": 0.6279, "step": 5098, "time": 72.62 }, { "epoch": 3.95, "learning_rate": "1.4306e-05", "loss": 0.5823, "slid_loss": 0.6271, "step": 5099, "time": 71.73 }, { "epoch": 3.95, "learning_rate": "1.4300e-05", "loss": 0.6368, "slid_loss": 0.6273, "step": 5100, "time": 72.74 }, { "epoch": 3.95, "learning_rate": "1.4294e-05", "loss": 0.6068, "slid_loss": 0.6268, "step": 5101, "time": 72.09 }, { "epoch": 3.95, "learning_rate": "1.4288e-05", "loss": 0.6535, "slid_loss": 0.6271, "step": 5102, "time": 83.82 }, { "epoch": 3.95, "learning_rate": "1.4282e-05", "loss": 0.6139, "slid_loss": 0.6271, "step": 5103, "time": 71.65 }, { "epoch": 3.95, "learning_rate": "1.4276e-05", "loss": 0.6136, "slid_loss": 0.6269, "step": 5104, "time": 72.8 }, { "epoch": 3.95, "learning_rate": "1.4270e-05", "loss": 0.6231, "slid_loss": 0.6269, "step": 5105, "time": 72.97 }, { "epoch": 3.95, "learning_rate": "1.4264e-05", "loss": 0.6265, "slid_loss": 0.6265, "step": 5106, "time": 71.28 }, { "epoch": 3.95, "learning_rate": "1.4258e-05", "loss": 0.6424, "slid_loss": 0.6266, "step": 5107, "time": 73.15 }, { "epoch": 3.95, "learning_rate": "1.4252e-05", "loss": 0.6121, "slid_loss": 0.6269, "step": 5108, "time": 72.23 }, { "epoch": 3.95, "learning_rate": "1.4245e-05", "loss": 0.6098, "slid_loss": 0.6265, "step": 5109, "time": 71.71 }, { "epoch": 3.95, "learning_rate": "1.4239e-05", "loss": 0.632, "slid_loss": 0.6264, "step": 5110, "time": 71.72 }, { "epoch": 3.95, "learning_rate": "1.4233e-05", "loss": 0.643, "slid_loss": 0.6265, "step": 5111, "time": 71.3 }, { "epoch": 3.96, "learning_rate": "1.4227e-05", "loss": 0.6246, "slid_loss": 0.6268, "step": 5112, "time": 73.28 }, { "epoch": 3.96, "learning_rate": "1.4221e-05", "loss": 0.592, "slid_loss": 0.6265, "step": 5113, "time": 71.58 }, { "epoch": 3.96, "learning_rate": "1.4215e-05", "loss": 0.6566, "slid_loss": 0.6269, "step": 5114, "time": 71.68 }, { "epoch": 3.96, "learning_rate": "1.4209e-05", "loss": 0.6329, "slid_loss": 0.6268, "step": 5115, "time": 72.12 }, { "epoch": 3.96, "learning_rate": "1.4203e-05", "loss": 0.6305, "slid_loss": 0.6266, "step": 5116, "time": 70.73 }, { "epoch": 3.96, "learning_rate": "1.4197e-05", "loss": 0.6177, "slid_loss": 0.6262, "step": 5117, "time": 70.79 }, { "epoch": 3.96, "learning_rate": "1.4191e-05", "loss": 0.6, "slid_loss": 0.6259, "step": 5118, "time": 70.92 }, { "epoch": 3.96, "learning_rate": "1.4185e-05", "loss": 0.6023, "slid_loss": 0.6254, "step": 5119, "time": 71.97 }, { "epoch": 3.96, "learning_rate": "1.4179e-05", "loss": 0.6332, "slid_loss": 0.6253, "step": 5120, "time": 71.48 }, { "epoch": 3.96, "learning_rate": "1.4173e-05", "loss": 0.619, "slid_loss": 0.6252, "step": 5121, "time": 72.38 }, { "epoch": 3.96, "learning_rate": "1.4167e-05", "loss": 0.6107, "slid_loss": 0.6252, "step": 5122, "time": 72.11 }, { "epoch": 3.96, "learning_rate": "1.4161e-05", "loss": 0.6243, "slid_loss": 0.6251, "step": 5123, "time": 71.43 }, { "epoch": 3.96, "learning_rate": "1.4155e-05", "loss": 0.6131, "slid_loss": 0.625, "step": 5124, "time": 72.69 }, { "epoch": 3.97, "learning_rate": "1.4149e-05", "loss": 0.6154, "slid_loss": 0.6249, "step": 5125, "time": 71.42 }, { "epoch": 3.97, "learning_rate": "1.4143e-05", "loss": 0.6125, "slid_loss": 0.6246, "step": 5126, "time": 71.94 }, { "epoch": 3.97, "learning_rate": "1.4137e-05", "loss": 0.6083, "slid_loss": 0.6237, "step": 5127, "time": 71.33 }, { "epoch": 3.97, "learning_rate": "1.4131e-05", "loss": 0.6266, "slid_loss": 0.6235, "step": 5128, "time": 71.21 }, { "epoch": 3.97, "learning_rate": "1.4125e-05", "loss": 0.5599, "slid_loss": 0.6229, "step": 5129, "time": 71.34 }, { "epoch": 3.97, "learning_rate": "1.4119e-05", "loss": 0.6118, "slid_loss": 0.6227, "step": 5130, "time": 72.95 }, { "epoch": 3.97, "learning_rate": "1.4113e-05", "loss": 0.6223, "slid_loss": 0.6226, "step": 5131, "time": 70.76 }, { "epoch": 3.97, "learning_rate": "1.4107e-05", "loss": 0.6461, "slid_loss": 0.6229, "step": 5132, "time": 71.05 }, { "epoch": 3.97, "learning_rate": "1.4101e-05", "loss": 0.6017, "slid_loss": 0.6226, "step": 5133, "time": 72.32 }, { "epoch": 3.97, "learning_rate": "1.4095e-05", "loss": 0.5932, "slid_loss": 0.6225, "step": 5134, "time": 72.01 }, { "epoch": 3.97, "learning_rate": "1.4089e-05", "loss": 0.6458, "slid_loss": 0.6229, "step": 5135, "time": 71.22 }, { "epoch": 3.97, "learning_rate": "1.4083e-05", "loss": 0.634, "slid_loss": 0.623, "step": 5136, "time": 70.25 }, { "epoch": 3.97, "learning_rate": "1.4078e-05", "loss": 0.6471, "slid_loss": 0.6232, "step": 5137, "time": 71.55 }, { "epoch": 3.98, "learning_rate": "1.4072e-05", "loss": 0.6259, "slid_loss": 0.6233, "step": 5138, "time": 71.55 }, { "epoch": 3.98, "learning_rate": "1.4066e-05", "loss": 0.6387, "slid_loss": 0.6234, "step": 5139, "time": 71.7 }, { "epoch": 3.98, "learning_rate": "1.4060e-05", "loss": 0.5824, "slid_loss": 0.6229, "step": 5140, "time": 73.27 }, { "epoch": 3.98, "learning_rate": "1.4054e-05", "loss": 0.6252, "slid_loss": 0.6227, "step": 5141, "time": 72.64 }, { "epoch": 3.98, "learning_rate": "1.4048e-05", "loss": 0.6257, "slid_loss": 0.6223, "step": 5142, "time": 71.82 }, { "epoch": 3.98, "learning_rate": "1.4042e-05", "loss": 0.6288, "slid_loss": 0.6226, "step": 5143, "time": 72.01 }, { "epoch": 3.98, "learning_rate": "1.4036e-05", "loss": 0.6552, "slid_loss": 0.6229, "step": 5144, "time": 71.53 }, { "epoch": 3.98, "learning_rate": "1.4030e-05", "loss": 0.5983, "slid_loss": 0.6227, "step": 5145, "time": 72.89 }, { "epoch": 3.98, "learning_rate": "1.4024e-05", "loss": 0.6508, "slid_loss": 0.6233, "step": 5146, "time": 71.04 }, { "epoch": 3.98, "learning_rate": "1.4018e-05", "loss": 0.6463, "slid_loss": 0.6239, "step": 5147, "time": 71.35 }, { "epoch": 3.98, "learning_rate": "1.4012e-05", "loss": 0.6174, "slid_loss": 0.6241, "step": 5148, "time": 72.38 }, { "epoch": 3.98, "learning_rate": "1.4006e-05", "loss": 0.6693, "slid_loss": 0.6247, "step": 5149, "time": 72.07 }, { "epoch": 3.98, "learning_rate": "1.4001e-05", "loss": 0.5812, "slid_loss": 0.6244, "step": 5150, "time": 71.63 }, { "epoch": 3.99, "learning_rate": "1.3995e-05", "loss": 0.6364, "slid_loss": 0.6245, "step": 5151, "time": 72.47 }, { "epoch": 3.99, "learning_rate": "1.3989e-05", "loss": 0.6677, "slid_loss": 0.6247, "step": 5152, "time": 71.41 }, { "epoch": 3.99, "learning_rate": "1.3983e-05", "loss": 0.6514, "slid_loss": 0.6249, "step": 5153, "time": 71.04 }, { "epoch": 3.99, "learning_rate": "1.3977e-05", "loss": 0.6285, "slid_loss": 0.6249, "step": 5154, "time": 71.72 }, { "epoch": 3.99, "learning_rate": "1.3971e-05", "loss": 0.5806, "slid_loss": 0.6249, "step": 5155, "time": 71.99 }, { "epoch": 3.99, "learning_rate": "1.3965e-05", "loss": 0.6163, "slid_loss": 0.6248, "step": 5156, "time": 71.73 }, { "epoch": 3.99, "learning_rate": "1.3959e-05", "loss": 0.5614, "slid_loss": 0.624, "step": 5157, "time": 71.95 }, { "epoch": 3.99, "learning_rate": "1.3954e-05", "loss": 0.6208, "slid_loss": 0.6237, "step": 5158, "time": 71.99 }, { "epoch": 3.99, "learning_rate": "1.3948e-05", "loss": 0.6256, "slid_loss": 0.6237, "step": 5159, "time": 72.08 }, { "epoch": 3.99, "learning_rate": "1.3942e-05", "loss": 0.6079, "slid_loss": 0.6237, "step": 5160, "time": 71.83 }, { "epoch": 3.99, "learning_rate": "1.3936e-05", "loss": 0.6393, "slid_loss": 0.6236, "step": 5161, "time": 74.32 }, { "epoch": 3.99, "learning_rate": "1.3930e-05", "loss": 0.6245, "slid_loss": 0.6237, "step": 5162, "time": 72.13 }, { "epoch": 3.99, "learning_rate": "1.3924e-05", "loss": 0.6012, "slid_loss": 0.6229, "step": 5163, "time": 70.8 }, { "epoch": 4.0, "learning_rate": "1.3918e-05", "loss": 0.5903, "slid_loss": 0.6224, "step": 5164, "time": 71.67 }, { "epoch": 4.0, "learning_rate": "1.3913e-05", "loss": 0.6124, "slid_loss": 0.6222, "step": 5165, "time": 73.1 }, { "epoch": 4.0, "learning_rate": "1.3907e-05", "loss": 0.6102, "slid_loss": 0.6219, "step": 5166, "time": 72.03 }, { "epoch": 4.0, "learning_rate": "1.3901e-05", "loss": 0.5775, "slid_loss": 0.6213, "step": 5167, "time": 70.98 }, { "epoch": 4.0, "learning_rate": "1.3895e-05", "loss": 0.6646, "slid_loss": 0.6213, "step": 5168, "time": 71.07 }, { "epoch": 4.0, "learning_rate": "1.3889e-05", "loss": 0.5934, "slid_loss": 0.6207, "step": 5169, "time": 72.71 }, { "epoch": 4.0, "learning_rate": "1.3883e-05", "loss": 0.641, "slid_loss": 0.6208, "step": 5170, "time": 74.71 }, { "epoch": 4.0, "learning_rate": "1.3878e-05", "loss": 0.6587, "slid_loss": 0.6212, "step": 5171, "time": 108.83 }, { "epoch": 4.0, "learning_rate": "1.3872e-05", "loss": 0.6069, "slid_loss": 0.6209, "step": 5172, "time": 71.84 }, { "epoch": 4.0, "learning_rate": "1.3866e-05", "loss": 0.599, "slid_loss": 0.6207, "step": 5173, "time": 71.49 }, { "epoch": 4.0, "learning_rate": "1.3860e-05", "loss": 0.6318, "slid_loss": 0.621, "step": 5174, "time": 70.37 }, { "epoch": 4.0, "learning_rate": "1.3854e-05", "loss": 0.594, "slid_loss": 0.6204, "step": 5175, "time": 71.66 }, { "epoch": 4.0, "learning_rate": "1.3849e-05", "loss": 0.6144, "slid_loss": 0.6203, "step": 5176, "time": 71.59 }, { "epoch": 4.01, "learning_rate": "1.3843e-05", "loss": 0.6346, "slid_loss": 0.6201, "step": 5177, "time": 71.37 }, { "epoch": 4.01, "learning_rate": "1.3837e-05", "loss": 0.6179, "slid_loss": 0.6202, "step": 5178, "time": 71.81 }, { "epoch": 4.01, "learning_rate": "1.3831e-05", "loss": 0.6231, "slid_loss": 0.6203, "step": 5179, "time": 71.72 }, { "epoch": 4.01, "learning_rate": "1.3825e-05", "loss": 0.617, "slid_loss": 0.62, "step": 5180, "time": 72.02 }, { "epoch": 4.01, "learning_rate": "1.3820e-05", "loss": 0.6334, "slid_loss": 0.6202, "step": 5181, "time": 73.09 }, { "epoch": 4.01, "learning_rate": "1.3814e-05", "loss": 0.6544, "slid_loss": 0.6205, "step": 5182, "time": 71.4 }, { "epoch": 4.01, "learning_rate": "1.3808e-05", "loss": 0.6295, "slid_loss": 0.6202, "step": 5183, "time": 70.6 }, { "epoch": 4.01, "learning_rate": "1.3802e-05", "loss": 0.6193, "slid_loss": 0.6202, "step": 5184, "time": 70.29 }, { "epoch": 4.01, "learning_rate": "1.3797e-05", "loss": 0.5951, "slid_loss": 0.6196, "step": 5185, "time": 71.81 }, { "epoch": 4.01, "learning_rate": "1.3791e-05", "loss": 0.6436, "slid_loss": 0.6198, "step": 5186, "time": 71.19 }, { "epoch": 4.01, "learning_rate": "1.3785e-05", "loss": 0.6491, "slid_loss": 0.6201, "step": 5187, "time": 70.63 }, { "epoch": 4.01, "learning_rate": "1.3779e-05", "loss": 0.6256, "slid_loss": 0.6196, "step": 5188, "time": 72.7 }, { "epoch": 4.01, "learning_rate": "1.3774e-05", "loss": 0.5908, "slid_loss": 0.6192, "step": 5189, "time": 72.15 }, { "epoch": 4.02, "learning_rate": "1.3768e-05", "loss": 0.6042, "slid_loss": 0.6194, "step": 5190, "time": 70.51 }, { "epoch": 4.02, "learning_rate": "1.3762e-05", "loss": 0.6283, "slid_loss": 0.6195, "step": 5191, "time": 72.86 }, { "epoch": 4.02, "learning_rate": "1.3756e-05", "loss": 0.6004, "slid_loss": 0.6193, "step": 5192, "time": 71.45 }, { "epoch": 4.02, "learning_rate": "1.3751e-05", "loss": 0.5866, "slid_loss": 0.6193, "step": 5193, "time": 71.04 }, { "epoch": 4.02, "learning_rate": "1.3745e-05", "loss": 0.5798, "slid_loss": 0.6187, "step": 5194, "time": 72.39 }, { "epoch": 4.02, "learning_rate": "1.3739e-05", "loss": 0.6671, "slid_loss": 0.6192, "step": 5195, "time": 71.67 }, { "epoch": 4.02, "learning_rate": "1.3733e-05", "loss": 0.6573, "slid_loss": 0.6198, "step": 5196, "time": 72.27 }, { "epoch": 4.02, "learning_rate": "1.3728e-05", "loss": 0.6209, "slid_loss": 0.6202, "step": 5197, "time": 71.15 }, { "epoch": 4.02, "learning_rate": "1.3722e-05", "loss": 0.6419, "slid_loss": 0.621, "step": 5198, "time": 71.7 }, { "epoch": 4.02, "learning_rate": "1.3716e-05", "loss": 0.5973, "slid_loss": 0.6211, "step": 5199, "time": 72.18 }, { "epoch": 4.02, "learning_rate": "1.3711e-05", "loss": 0.6344, "slid_loss": 0.6211, "step": 5200, "time": 73.74 }, { "epoch": 4.02, "learning_rate": "1.3705e-05", "loss": 0.6448, "slid_loss": 0.6215, "step": 5201, "time": 759.73 }, { "epoch": 4.02, "learning_rate": "1.3699e-05", "loss": 0.6311, "slid_loss": 0.6213, "step": 5202, "time": 71.85 }, { "epoch": 4.03, "learning_rate": "1.3694e-05", "loss": 0.5974, "slid_loss": 0.6211, "step": 5203, "time": 72.71 }, { "epoch": 4.03, "learning_rate": "1.3688e-05", "loss": 0.6486, "slid_loss": 0.6214, "step": 5204, "time": 71.19 }, { "epoch": 4.03, "learning_rate": "1.3682e-05", "loss": 0.6394, "slid_loss": 0.6216, "step": 5205, "time": 72.91 }, { "epoch": 4.03, "learning_rate": "1.3677e-05", "loss": 0.6412, "slid_loss": 0.6218, "step": 5206, "time": 72.45 }, { "epoch": 4.03, "learning_rate": "1.3671e-05", "loss": 0.6545, "slid_loss": 0.6219, "step": 5207, "time": 71.33 }, { "epoch": 4.03, "learning_rate": "1.3665e-05", "loss": 0.6048, "slid_loss": 0.6218, "step": 5208, "time": 72.87 }, { "epoch": 4.03, "learning_rate": "1.3659e-05", "loss": 0.6092, "slid_loss": 0.6218, "step": 5209, "time": 72.0 }, { "epoch": 4.03, "learning_rate": "1.3654e-05", "loss": 0.6272, "slid_loss": 0.6218, "step": 5210, "time": 71.63 }, { "epoch": 4.03, "learning_rate": "1.3648e-05", "loss": 0.6336, "slid_loss": 0.6217, "step": 5211, "time": 72.18 }, { "epoch": 4.03, "learning_rate": "1.3643e-05", "loss": 0.6047, "slid_loss": 0.6215, "step": 5212, "time": 71.8 }, { "epoch": 4.03, "learning_rate": "1.3637e-05", "loss": 0.6317, "slid_loss": 0.6219, "step": 5213, "time": 72.16 }, { "epoch": 4.03, "learning_rate": "1.3631e-05", "loss": 0.6545, "slid_loss": 0.6218, "step": 5214, "time": 72.58 }, { "epoch": 4.03, "learning_rate": "1.3626e-05", "loss": 0.654, "slid_loss": 0.6221, "step": 5215, "time": 72.23 }, { "epoch": 4.04, "learning_rate": "1.3620e-05", "loss": 0.648, "slid_loss": 0.6222, "step": 5216, "time": 72.13 }, { "epoch": 4.04, "learning_rate": "1.3614e-05", "loss": 0.6431, "slid_loss": 0.6225, "step": 5217, "time": 71.65 }, { "epoch": 4.04, "learning_rate": "1.3609e-05", "loss": 0.6489, "slid_loss": 0.623, "step": 5218, "time": 71.96 }, { "epoch": 4.04, "learning_rate": "1.3603e-05", "loss": 0.6082, "slid_loss": 0.623, "step": 5219, "time": 70.76 }, { "epoch": 4.04, "learning_rate": "1.3597e-05", "loss": 0.649, "slid_loss": 0.6232, "step": 5220, "time": 71.64 }, { "epoch": 4.04, "learning_rate": "1.3592e-05", "loss": 0.6476, "slid_loss": 0.6235, "step": 5221, "time": 70.47 }, { "epoch": 4.04, "learning_rate": "1.3586e-05", "loss": 0.6051, "slid_loss": 0.6234, "step": 5222, "time": 71.66 }, { "epoch": 4.04, "learning_rate": "1.3581e-05", "loss": 0.6256, "slid_loss": 0.6234, "step": 5223, "time": 71.58 }, { "epoch": 4.04, "learning_rate": "1.3575e-05", "loss": 0.5844, "slid_loss": 0.6231, "step": 5224, "time": 71.35 }, { "epoch": 4.04, "learning_rate": "1.3569e-05", "loss": 0.6203, "slid_loss": 0.6232, "step": 5225, "time": 71.41 }, { "epoch": 4.04, "learning_rate": "1.3564e-05", "loss": 0.6322, "slid_loss": 0.6234, "step": 5226, "time": 70.15 }, { "epoch": 4.04, "learning_rate": "1.3558e-05", "loss": 0.6389, "slid_loss": 0.6237, "step": 5227, "time": 71.13 }, { "epoch": 4.04, "learning_rate": "1.3553e-05", "loss": 0.6135, "slid_loss": 0.6236, "step": 5228, "time": 95.18 }, { "epoch": 4.05, "learning_rate": "1.3547e-05", "loss": 0.6155, "slid_loss": 0.6241, "step": 5229, "time": 71.95 }, { "epoch": 4.05, "learning_rate": "1.3541e-05", "loss": 0.6091, "slid_loss": 0.6241, "step": 5230, "time": 70.48 }, { "epoch": 4.05, "learning_rate": "1.3536e-05", "loss": 0.6021, "slid_loss": 0.6239, "step": 5231, "time": 71.62 }, { "epoch": 4.05, "learning_rate": "1.3530e-05", "loss": 0.6557, "slid_loss": 0.624, "step": 5232, "time": 70.76 }, { "epoch": 4.05, "learning_rate": "1.3525e-05", "loss": 0.6855, "slid_loss": 0.6248, "step": 5233, "time": 85.62 }, { "epoch": 4.05, "learning_rate": "1.3519e-05", "loss": 0.6368, "slid_loss": 0.6253, "step": 5234, "time": 70.71 }, { "epoch": 4.05, "learning_rate": "1.3514e-05", "loss": 0.6403, "slid_loss": 0.6252, "step": 5235, "time": 85.64 }, { "epoch": 4.05, "learning_rate": "1.3508e-05", "loss": 0.6002, "slid_loss": 0.6249, "step": 5236, "time": 101.79 }, { "epoch": 4.05, "learning_rate": "1.3502e-05", "loss": 0.614, "slid_loss": 0.6245, "step": 5237, "time": 88.03 }, { "epoch": 4.05, "learning_rate": "1.3497e-05", "loss": 0.6338, "slid_loss": 0.6246, "step": 5238, "time": 103.8 }, { "epoch": 4.05, "learning_rate": "1.3491e-05", "loss": 0.5944, "slid_loss": 0.6242, "step": 5239, "time": 118.32 }, { "epoch": 4.05, "learning_rate": "1.3486e-05", "loss": 0.6334, "slid_loss": 0.6247, "step": 5240, "time": 103.28 }, { "epoch": 4.05, "learning_rate": "1.3480e-05", "loss": 0.6153, "slid_loss": 0.6246, "step": 5241, "time": 161.38 }, { "epoch": 4.06, "learning_rate": "1.3475e-05", "loss": 0.6142, "slid_loss": 0.6245, "step": 5242, "time": 155.53 }, { "epoch": 4.06, "learning_rate": "1.3469e-05", "loss": 0.6515, "slid_loss": 0.6247, "step": 5243, "time": 165.91 }, { "epoch": 4.06, "learning_rate": "1.3464e-05", "loss": 0.5961, "slid_loss": 0.6241, "step": 5244, "time": 153.92 }, { "epoch": 4.06, "learning_rate": "1.3458e-05", "loss": 0.6214, "slid_loss": 0.6243, "step": 5245, "time": 157.76 }, { "epoch": 4.06, "learning_rate": "1.3453e-05", "loss": 0.6113, "slid_loss": 0.6239, "step": 5246, "time": 150.43 }, { "epoch": 4.06, "learning_rate": "1.3447e-05", "loss": 0.615, "slid_loss": 0.6236, "step": 5247, "time": 128.0 }, { "epoch": 4.06, "learning_rate": "1.3442e-05", "loss": 0.5963, "slid_loss": 0.6234, "step": 5248, "time": 154.08 }, { "epoch": 4.06, "learning_rate": "1.3436e-05", "loss": 0.6025, "slid_loss": 0.6227, "step": 5249, "time": 123.48 }, { "epoch": 4.06, "learning_rate": "1.3431e-05", "loss": 0.599, "slid_loss": 0.6229, "step": 5250, "time": 145.42 }, { "epoch": 4.06, "learning_rate": "1.3425e-05", "loss": 0.6089, "slid_loss": 0.6226, "step": 5251, "time": 136.53 }, { "epoch": 4.06, "learning_rate": "1.3420e-05", "loss": 0.5885, "slid_loss": 0.6219, "step": 5252, "time": 128.31 }, { "epoch": 4.06, "learning_rate": "1.3414e-05", "loss": 0.616, "slid_loss": 0.6215, "step": 5253, "time": 114.21 }, { "epoch": 4.06, "learning_rate": "1.3409e-05", "loss": 0.6039, "slid_loss": 0.6213, "step": 5254, "time": 104.99 }, { "epoch": 4.07, "learning_rate": "1.3403e-05", "loss": 0.6318, "slid_loss": 0.6218, "step": 5255, "time": 127.42 }, { "epoch": 4.07, "learning_rate": "1.3398e-05", "loss": 0.6299, "slid_loss": 0.6219, "step": 5256, "time": 71.26 }, { "epoch": 4.07, "learning_rate": "1.3392e-05", "loss": 0.607, "slid_loss": 0.6224, "step": 5257, "time": 72.26 }, { "epoch": 4.07, "learning_rate": "1.3387e-05", "loss": 0.606, "slid_loss": 0.6222, "step": 5258, "time": 71.72 }, { "epoch": 4.07, "learning_rate": "1.3381e-05", "loss": 0.6359, "slid_loss": 0.6223, "step": 5259, "time": 220.64 }, { "epoch": 4.07, "learning_rate": "1.3376e-05", "loss": 0.653, "slid_loss": 0.6228, "step": 5260, "time": 72.11 }, { "epoch": 4.07, "learning_rate": "1.3370e-05", "loss": 0.5888, "slid_loss": 0.6223, "step": 5261, "time": 73.67 }, { "epoch": 4.07, "learning_rate": "1.3365e-05", "loss": 0.5782, "slid_loss": 0.6218, "step": 5262, "time": 73.74 }, { "epoch": 4.07, "learning_rate": "1.3359e-05", "loss": 0.6341, "slid_loss": 0.6221, "step": 5263, "time": 70.62 }, { "epoch": 4.07, "learning_rate": "1.3354e-05", "loss": 0.6645, "slid_loss": 0.6229, "step": 5264, "time": 71.44 }, { "epoch": 4.07, "learning_rate": "1.3348e-05", "loss": 0.6204, "slid_loss": 0.623, "step": 5265, "time": 70.4 }, { "epoch": 4.07, "learning_rate": "1.3343e-05", "loss": 0.6367, "slid_loss": 0.6232, "step": 5266, "time": 72.16 }, { "epoch": 4.08, "learning_rate": "1.3338e-05", "loss": 0.6187, "slid_loss": 0.6236, "step": 5267, "time": 71.31 }, { "epoch": 4.08, "learning_rate": "1.3332e-05", "loss": 0.6299, "slid_loss": 0.6233, "step": 5268, "time": 71.77 }, { "epoch": 4.08, "learning_rate": "1.3327e-05", "loss": 0.6395, "slid_loss": 0.6237, "step": 5269, "time": 70.9 }, { "epoch": 4.08, "learning_rate": "1.3321e-05", "loss": 0.6031, "slid_loss": 0.6234, "step": 5270, "time": 71.35 }, { "epoch": 4.08, "learning_rate": "1.3316e-05", "loss": 0.6325, "slid_loss": 0.6231, "step": 5271, "time": 72.18 }, { "epoch": 4.08, "learning_rate": "1.3310e-05", "loss": 0.6073, "slid_loss": 0.6231, "step": 5272, "time": 70.66 }, { "epoch": 4.08, "learning_rate": "1.3305e-05", "loss": 0.6259, "slid_loss": 0.6234, "step": 5273, "time": 72.27 }, { "epoch": 4.08, "learning_rate": "1.3300e-05", "loss": 0.6628, "slid_loss": 0.6237, "step": 5274, "time": 73.19 }, { "epoch": 4.08, "learning_rate": "1.3294e-05", "loss": 0.6278, "slid_loss": 0.624, "step": 5275, "time": 70.7 }, { "epoch": 4.08, "learning_rate": "1.3289e-05", "loss": 0.6237, "slid_loss": 0.6241, "step": 5276, "time": 70.7 }, { "epoch": 4.08, "learning_rate": "1.3283e-05", "loss": 0.5992, "slid_loss": 0.6238, "step": 5277, "time": 72.23 }, { "epoch": 4.08, "learning_rate": "1.3278e-05", "loss": 0.6669, "slid_loss": 0.6243, "step": 5278, "time": 71.71 }, { "epoch": 4.08, "learning_rate": "1.3273e-05", "loss": 0.6167, "slid_loss": 0.6242, "step": 5279, "time": 71.73 }, { "epoch": 4.09, "learning_rate": "1.3267e-05", "loss": 0.5822, "slid_loss": 0.6238, "step": 5280, "time": 72.23 }, { "epoch": 4.09, "learning_rate": "1.3262e-05", "loss": 0.6619, "slid_loss": 0.6241, "step": 5281, "time": 72.03 }, { "epoch": 4.09, "learning_rate": "1.3257e-05", "loss": 0.6217, "slid_loss": 0.6238, "step": 5282, "time": 71.47 }, { "epoch": 4.09, "learning_rate": "1.3251e-05", "loss": 0.6026, "slid_loss": 0.6235, "step": 5283, "time": 70.77 }, { "epoch": 4.09, "learning_rate": "1.3246e-05", "loss": 0.6353, "slid_loss": 0.6237, "step": 5284, "time": 71.95 }, { "epoch": 4.09, "learning_rate": "1.3240e-05", "loss": 0.6112, "slid_loss": 0.6239, "step": 5285, "time": 71.26 }, { "epoch": 4.09, "learning_rate": "1.3235e-05", "loss": 0.6097, "slid_loss": 0.6235, "step": 5286, "time": 71.64 }, { "epoch": 4.09, "learning_rate": "1.3230e-05", "loss": 0.6525, "slid_loss": 0.6235, "step": 5287, "time": 70.88 }, { "epoch": 4.09, "learning_rate": "1.3224e-05", "loss": 0.6255, "slid_loss": 0.6235, "step": 5288, "time": 72.47 }, { "epoch": 4.09, "learning_rate": "1.3219e-05", "loss": 0.5696, "slid_loss": 0.6233, "step": 5289, "time": 72.9 }, { "epoch": 4.09, "learning_rate": "1.3214e-05", "loss": 0.6861, "slid_loss": 0.6242, "step": 5290, "time": 70.32 }, { "epoch": 4.09, "learning_rate": "1.3208e-05", "loss": 0.6427, "slid_loss": 0.6243, "step": 5291, "time": 72.0 }, { "epoch": 4.09, "learning_rate": "1.3203e-05", "loss": 0.6089, "slid_loss": 0.6244, "step": 5292, "time": 70.67 }, { "epoch": 4.1, "learning_rate": "1.3198e-05", "loss": 0.6049, "slid_loss": 0.6246, "step": 5293, "time": 71.02 }, { "epoch": 4.1, "learning_rate": "1.3192e-05", "loss": 0.5809, "slid_loss": 0.6246, "step": 5294, "time": 72.1 }, { "epoch": 4.1, "learning_rate": "1.3187e-05", "loss": 0.653, "slid_loss": 0.6244, "step": 5295, "time": 72.21 }, { "epoch": 4.1, "learning_rate": "1.3182e-05", "loss": 0.6363, "slid_loss": 0.6242, "step": 5296, "time": 71.37 }, { "epoch": 4.1, "learning_rate": "1.3176e-05", "loss": 0.5923, "slid_loss": 0.6239, "step": 5297, "time": 72.0 }, { "epoch": 4.1, "learning_rate": "1.3171e-05", "loss": 0.6016, "slid_loss": 0.6235, "step": 5298, "time": 71.64 }, { "epoch": 4.1, "learning_rate": "1.3166e-05", "loss": 0.6036, "slid_loss": 0.6236, "step": 5299, "time": 71.05 }, { "epoch": 4.1, "learning_rate": "1.3160e-05", "loss": 0.636, "slid_loss": 0.6236, "step": 5300, "time": 70.8 }, { "epoch": 4.1, "learning_rate": "1.3155e-05", "loss": 0.6339, "slid_loss": 0.6235, "step": 5301, "time": 71.37 }, { "epoch": 4.1, "learning_rate": "1.3150e-05", "loss": 0.6413, "slid_loss": 0.6236, "step": 5302, "time": 71.53 }, { "epoch": 4.1, "learning_rate": "1.3145e-05", "loss": 0.6355, "slid_loss": 0.624, "step": 5303, "time": 71.96 }, { "epoch": 4.1, "learning_rate": "1.3139e-05", "loss": 0.6418, "slid_loss": 0.6239, "step": 5304, "time": 71.8 }, { "epoch": 4.1, "learning_rate": "1.3134e-05", "loss": 0.6529, "slid_loss": 0.6241, "step": 5305, "time": 70.63 }, { "epoch": 4.11, "learning_rate": "1.3129e-05", "loss": 0.63, "slid_loss": 0.6239, "step": 5306, "time": 71.18 }, { "epoch": 4.11, "learning_rate": "1.3123e-05", "loss": 0.5904, "slid_loss": 0.6233, "step": 5307, "time": 71.43 }, { "epoch": 4.11, "learning_rate": "1.3118e-05", "loss": 0.6028, "slid_loss": 0.6233, "step": 5308, "time": 71.68 }, { "epoch": 4.11, "learning_rate": "1.3113e-05", "loss": 0.5793, "slid_loss": 0.623, "step": 5309, "time": 71.57 }, { "epoch": 4.11, "learning_rate": "1.3108e-05", "loss": 0.6348, "slid_loss": 0.6231, "step": 5310, "time": 72.13 }, { "epoch": 4.11, "learning_rate": "1.3102e-05", "loss": 0.6234, "slid_loss": 0.623, "step": 5311, "time": 72.95 }, { "epoch": 4.11, "learning_rate": "1.3097e-05", "loss": 0.6296, "slid_loss": 0.6232, "step": 5312, "time": 72.15 }, { "epoch": 4.11, "learning_rate": "1.3092e-05", "loss": 0.6491, "slid_loss": 0.6234, "step": 5313, "time": 71.64 }, { "epoch": 4.11, "learning_rate": "1.3087e-05", "loss": 0.6147, "slid_loss": 0.623, "step": 5314, "time": 70.39 }, { "epoch": 4.11, "learning_rate": "1.3081e-05", "loss": 0.6251, "slid_loss": 0.6227, "step": 5315, "time": 70.68 }, { "epoch": 4.11, "learning_rate": "1.3076e-05", "loss": 0.5973, "slid_loss": 0.6222, "step": 5316, "time": 71.65 }, { "epoch": 4.11, "learning_rate": "1.3071e-05", "loss": 0.6248, "slid_loss": 0.622, "step": 5317, "time": 71.24 }, { "epoch": 4.11, "learning_rate": "1.3066e-05", "loss": 0.5995, "slid_loss": 0.6215, "step": 5318, "time": 71.26 }, { "epoch": 4.12, "learning_rate": "1.3060e-05", "loss": 0.628, "slid_loss": 0.6217, "step": 5319, "time": 71.29 }, { "epoch": 4.12, "learning_rate": "1.3055e-05", "loss": 0.608, "slid_loss": 0.6213, "step": 5320, "time": 71.98 }, { "epoch": 4.12, "learning_rate": "1.3050e-05", "loss": 0.6404, "slid_loss": 0.6212, "step": 5321, "time": 71.4 }, { "epoch": 4.12, "learning_rate": "1.3045e-05", "loss": 0.6658, "slid_loss": 0.6218, "step": 5322, "time": 71.46 }, { "epoch": 4.12, "learning_rate": "1.3040e-05", "loss": 0.6005, "slid_loss": 0.6216, "step": 5323, "time": 70.95 }, { "epoch": 4.12, "learning_rate": "1.3034e-05", "loss": 0.6214, "slid_loss": 0.6219, "step": 5324, "time": 72.08 }, { "epoch": 4.12, "learning_rate": "1.3029e-05", "loss": 0.6283, "slid_loss": 0.622, "step": 5325, "time": 71.09 }, { "epoch": 4.12, "learning_rate": "1.3024e-05", "loss": 0.6122, "slid_loss": 0.6218, "step": 5326, "time": 71.11 }, { "epoch": 4.12, "learning_rate": "1.3019e-05", "loss": 0.6553, "slid_loss": 0.622, "step": 5327, "time": 70.77 }, { "epoch": 4.12, "learning_rate": "1.3014e-05", "loss": 0.6291, "slid_loss": 0.6221, "step": 5328, "time": 73.35 }, { "epoch": 4.12, "learning_rate": "1.3008e-05", "loss": 0.589, "slid_loss": 0.6219, "step": 5329, "time": 72.71 }, { "epoch": 4.12, "learning_rate": "1.3003e-05", "loss": 0.5876, "slid_loss": 0.6217, "step": 5330, "time": 71.58 }, { "epoch": 4.12, "learning_rate": "1.2998e-05", "loss": 0.6452, "slid_loss": 0.6221, "step": 5331, "time": 73.14 }, { "epoch": 4.13, "learning_rate": "1.2993e-05", "loss": 0.6425, "slid_loss": 0.622, "step": 5332, "time": 71.45 }, { "epoch": 4.13, "learning_rate": "1.2988e-05", "loss": 0.6235, "slid_loss": 0.6213, "step": 5333, "time": 72.78 }, { "epoch": 4.13, "learning_rate": "1.2983e-05", "loss": 0.6391, "slid_loss": 0.6214, "step": 5334, "time": 72.31 }, { "epoch": 4.13, "learning_rate": "1.2977e-05", "loss": 0.629, "slid_loss": 0.6213, "step": 5335, "time": 71.99 }, { "epoch": 4.13, "learning_rate": "1.2972e-05", "loss": 0.6258, "slid_loss": 0.6215, "step": 5336, "time": 73.27 }, { "epoch": 4.13, "learning_rate": "1.2967e-05", "loss": 0.5943, "slid_loss": 0.6213, "step": 5337, "time": 72.91 }, { "epoch": 4.13, "learning_rate": "1.2962e-05", "loss": 0.5924, "slid_loss": 0.6209, "step": 5338, "time": 71.4 }, { "epoch": 4.13, "learning_rate": "1.2957e-05", "loss": 0.6207, "slid_loss": 0.6212, "step": 5339, "time": 71.48 }, { "epoch": 4.13, "learning_rate": "1.2952e-05", "loss": 0.6142, "slid_loss": 0.621, "step": 5340, "time": 72.3 }, { "epoch": 4.13, "learning_rate": "1.2947e-05", "loss": 0.5939, "slid_loss": 0.6208, "step": 5341, "time": 72.12 }, { "epoch": 4.13, "learning_rate": "1.2941e-05", "loss": 0.6124, "slid_loss": 0.6207, "step": 5342, "time": 72.62 }, { "epoch": 4.13, "learning_rate": "1.2936e-05", "loss": 0.6473, "slid_loss": 0.6207, "step": 5343, "time": 71.52 }, { "epoch": 4.13, "learning_rate": "1.2931e-05", "loss": 0.6466, "slid_loss": 0.6212, "step": 5344, "time": 71.07 }, { "epoch": 4.14, "learning_rate": "1.2926e-05", "loss": 0.6064, "slid_loss": 0.6211, "step": 5345, "time": 72.73 }, { "epoch": 4.14, "learning_rate": "1.2921e-05", "loss": 0.6116, "slid_loss": 0.6211, "step": 5346, "time": 71.66 }, { "epoch": 4.14, "learning_rate": "1.2916e-05", "loss": 0.6008, "slid_loss": 0.6209, "step": 5347, "time": 72.12 }, { "epoch": 4.14, "learning_rate": "1.2911e-05", "loss": 0.6278, "slid_loss": 0.6212, "step": 5348, "time": 73.25 }, { "epoch": 4.14, "learning_rate": "1.2906e-05", "loss": 0.6034, "slid_loss": 0.6212, "step": 5349, "time": 72.71 }, { "epoch": 4.14, "learning_rate": "1.2901e-05", "loss": 0.6288, "slid_loss": 0.6215, "step": 5350, "time": 72.78 }, { "epoch": 4.14, "learning_rate": "1.2895e-05", "loss": 0.6376, "slid_loss": 0.6218, "step": 5351, "time": 70.66 }, { "epoch": 4.14, "learning_rate": "1.2890e-05", "loss": 0.5872, "slid_loss": 0.6218, "step": 5352, "time": 72.23 }, { "epoch": 4.14, "learning_rate": "1.2885e-05", "loss": 0.6394, "slid_loss": 0.622, "step": 5353, "time": 70.55 }, { "epoch": 4.14, "learning_rate": "1.2880e-05", "loss": 0.6074, "slid_loss": 0.6221, "step": 5354, "time": 73.2 }, { "epoch": 4.14, "learning_rate": "1.2875e-05", "loss": 0.5842, "slid_loss": 0.6216, "step": 5355, "time": 71.36 }, { "epoch": 4.14, "learning_rate": "1.2870e-05", "loss": 0.6351, "slid_loss": 0.6217, "step": 5356, "time": 72.71 }, { "epoch": 4.14, "learning_rate": "1.2865e-05", "loss": 0.6053, "slid_loss": 0.6216, "step": 5357, "time": 71.54 }, { "epoch": 4.15, "learning_rate": "1.2860e-05", "loss": 0.6043, "slid_loss": 0.6216, "step": 5358, "time": 71.34 }, { "epoch": 4.15, "learning_rate": "1.2855e-05", "loss": 0.6033, "slid_loss": 0.6213, "step": 5359, "time": 69.77 }, { "epoch": 4.15, "learning_rate": "1.2850e-05", "loss": 0.637, "slid_loss": 0.6211, "step": 5360, "time": 72.51 }, { "epoch": 4.15, "learning_rate": "1.2845e-05", "loss": 0.6297, "slid_loss": 0.6215, "step": 5361, "time": 70.91 }, { "epoch": 4.15, "learning_rate": "1.2840e-05", "loss": 0.6272, "slid_loss": 0.622, "step": 5362, "time": 71.84 }, { "epoch": 4.15, "learning_rate": "1.2835e-05", "loss": 0.5891, "slid_loss": 0.6216, "step": 5363, "time": 70.09 }, { "epoch": 4.15, "learning_rate": "1.2830e-05", "loss": 0.6072, "slid_loss": 0.621, "step": 5364, "time": 70.79 }, { "epoch": 4.15, "learning_rate": "1.2825e-05", "loss": 0.6, "slid_loss": 0.6208, "step": 5365, "time": 73.57 }, { "epoch": 4.15, "learning_rate": "1.2820e-05", "loss": 0.6308, "slid_loss": 0.6207, "step": 5366, "time": 71.99 }, { "epoch": 4.15, "learning_rate": "1.2814e-05", "loss": 0.6098, "slid_loss": 0.6207, "step": 5367, "time": 71.44 }, { "epoch": 4.15, "learning_rate": "1.2809e-05", "loss": 0.6055, "slid_loss": 0.6204, "step": 5368, "time": 72.81 }, { "epoch": 4.15, "learning_rate": "1.2804e-05", "loss": 0.5804, "slid_loss": 0.6198, "step": 5369, "time": 71.36 }, { "epoch": 4.15, "learning_rate": "1.2799e-05", "loss": 0.6372, "slid_loss": 0.6202, "step": 5370, "time": 71.3 }, { "epoch": 4.16, "learning_rate": "1.2794e-05", "loss": 0.5991, "slid_loss": 0.6198, "step": 5371, "time": 71.1 }, { "epoch": 4.16, "learning_rate": "1.2789e-05", "loss": 0.6123, "slid_loss": 0.6199, "step": 5372, "time": 71.79 }, { "epoch": 4.16, "learning_rate": "1.2784e-05", "loss": 0.6307, "slid_loss": 0.6199, "step": 5373, "time": 71.73 }, { "epoch": 4.16, "learning_rate": "1.2779e-05", "loss": 0.6528, "slid_loss": 0.6198, "step": 5374, "time": 71.28 }, { "epoch": 4.16, "learning_rate": "1.2774e-05", "loss": 0.5998, "slid_loss": 0.6195, "step": 5375, "time": 73.62 }, { "epoch": 4.16, "learning_rate": "1.2769e-05", "loss": 0.6129, "slid_loss": 0.6194, "step": 5376, "time": 71.21 }, { "epoch": 4.16, "learning_rate": "1.2764e-05", "loss": 0.6252, "slid_loss": 0.6197, "step": 5377, "time": 71.29 }, { "epoch": 4.16, "learning_rate": "1.2759e-05", "loss": 0.6268, "slid_loss": 0.6193, "step": 5378, "time": 71.86 }, { "epoch": 4.16, "learning_rate": "1.2754e-05", "loss": 0.6478, "slid_loss": 0.6196, "step": 5379, "time": 71.29 }, { "epoch": 4.16, "learning_rate": "1.2750e-05", "loss": 0.598, "slid_loss": 0.6198, "step": 5380, "time": 71.22 }, { "epoch": 4.16, "learning_rate": "1.2745e-05", "loss": 0.5937, "slid_loss": 0.6191, "step": 5381, "time": 72.07 }, { "epoch": 4.16, "learning_rate": "1.2740e-05", "loss": 0.6248, "slid_loss": 0.6191, "step": 5382, "time": 71.96 }, { "epoch": 4.16, "learning_rate": "1.2735e-05", "loss": 0.6405, "slid_loss": 0.6195, "step": 5383, "time": 71.07 }, { "epoch": 4.17, "learning_rate": "1.2730e-05", "loss": 0.6216, "slid_loss": 0.6194, "step": 5384, "time": 71.34 }, { "epoch": 4.17, "learning_rate": "1.2725e-05", "loss": 0.6322, "slid_loss": 0.6196, "step": 5385, "time": 72.38 }, { "epoch": 4.17, "learning_rate": "1.2720e-05", "loss": 0.6215, "slid_loss": 0.6197, "step": 5386, "time": 91.3 }, { "epoch": 4.17, "learning_rate": "1.2715e-05", "loss": 0.6373, "slid_loss": 0.6195, "step": 5387, "time": 73.08 }, { "epoch": 4.17, "learning_rate": "1.2710e-05", "loss": 0.6364, "slid_loss": 0.6196, "step": 5388, "time": 73.34 }, { "epoch": 4.17, "learning_rate": "1.2705e-05", "loss": 0.6394, "slid_loss": 0.6203, "step": 5389, "time": 71.99 }, { "epoch": 4.17, "learning_rate": "1.2700e-05", "loss": 0.6409, "slid_loss": 0.6199, "step": 5390, "time": 71.7 }, { "epoch": 4.17, "learning_rate": "1.2695e-05", "loss": 0.6202, "slid_loss": 0.6197, "step": 5391, "time": 71.43 }, { "epoch": 4.17, "learning_rate": "1.2690e-05", "loss": 0.6126, "slid_loss": 0.6197, "step": 5392, "time": 70.8 }, { "epoch": 4.17, "learning_rate": "1.2685e-05", "loss": 0.6223, "slid_loss": 0.6199, "step": 5393, "time": 71.52 }, { "epoch": 4.17, "learning_rate": "1.2680e-05", "loss": 0.6536, "slid_loss": 0.6206, "step": 5394, "time": 85.15 }, { "epoch": 4.17, "learning_rate": "1.2675e-05", "loss": 0.5792, "slid_loss": 0.6199, "step": 5395, "time": 71.95 }, { "epoch": 4.17, "learning_rate": "1.2670e-05", "loss": 0.6133, "slid_loss": 0.6196, "step": 5396, "time": 134.57 }, { "epoch": 4.18, "learning_rate": "1.2666e-05", "loss": 0.6083, "slid_loss": 0.6198, "step": 5397, "time": 71.68 }, { "epoch": 4.18, "learning_rate": "1.2661e-05", "loss": 0.589, "slid_loss": 0.6197, "step": 5398, "time": 83.12 }, { "epoch": 4.18, "learning_rate": "1.2656e-05", "loss": 0.6086, "slid_loss": 0.6197, "step": 5399, "time": 98.88 }, { "epoch": 4.18, "learning_rate": "1.2651e-05", "loss": 0.6068, "slid_loss": 0.6194, "step": 5400, "time": 141.37 }, { "epoch": 4.18, "learning_rate": "1.2646e-05", "loss": 0.6068, "slid_loss": 0.6192, "step": 5401, "time": 816.54 }, { "epoch": 4.18, "learning_rate": "1.2641e-05", "loss": 0.5736, "slid_loss": 0.6185, "step": 5402, "time": 140.62 }, { "epoch": 4.18, "learning_rate": "1.2636e-05", "loss": 0.6039, "slid_loss": 0.6182, "step": 5403, "time": 168.56 }, { "epoch": 4.18, "learning_rate": "1.2631e-05", "loss": 0.628, "slid_loss": 0.618, "step": 5404, "time": 131.65 }, { "epoch": 4.18, "learning_rate": "1.2627e-05", "loss": 0.6363, "slid_loss": 0.6179, "step": 5405, "time": 138.19 }, { "epoch": 4.18, "learning_rate": "1.2622e-05", "loss": 0.6051, "slid_loss": 0.6176, "step": 5406, "time": 161.41 }, { "epoch": 4.18, "learning_rate": "1.2617e-05", "loss": 0.6547, "slid_loss": 0.6183, "step": 5407, "time": 119.02 }, { "epoch": 4.18, "learning_rate": "1.2612e-05", "loss": 0.6072, "slid_loss": 0.6183, "step": 5408, "time": 156.1 }, { "epoch": 4.18, "learning_rate": "1.2607e-05", "loss": 0.5833, "slid_loss": 0.6183, "step": 5409, "time": 121.17 }, { "epoch": 4.19, "learning_rate": "1.2602e-05", "loss": 0.6194, "slid_loss": 0.6182, "step": 5410, "time": 125.38 }, { "epoch": 4.19, "learning_rate": "1.2597e-05", "loss": 0.6194, "slid_loss": 0.6181, "step": 5411, "time": 120.71 }, { "epoch": 4.19, "learning_rate": "1.2593e-05", "loss": 0.6321, "slid_loss": 0.6182, "step": 5412, "time": 96.16 }, { "epoch": 4.19, "learning_rate": "1.2588e-05", "loss": 0.62, "slid_loss": 0.6179, "step": 5413, "time": 101.34 }, { "epoch": 4.19, "learning_rate": "1.2583e-05", "loss": 0.6208, "slid_loss": 0.6179, "step": 5414, "time": 96.8 }, { "epoch": 4.19, "learning_rate": "1.2578e-05", "loss": 0.6071, "slid_loss": 0.6178, "step": 5415, "time": 83.44 }, { "epoch": 4.19, "learning_rate": "1.2573e-05", "loss": 0.6317, "slid_loss": 0.6181, "step": 5416, "time": 71.69 }, { "epoch": 4.19, "learning_rate": "1.2568e-05", "loss": 0.6513, "slid_loss": 0.6184, "step": 5417, "time": 71.19 }, { "epoch": 4.19, "learning_rate": "1.2564e-05", "loss": 0.5774, "slid_loss": 0.6181, "step": 5418, "time": 83.14 }, { "epoch": 4.19, "learning_rate": "1.2559e-05", "loss": 0.6148, "slid_loss": 0.618, "step": 5419, "time": 87.26 }, { "epoch": 4.19, "learning_rate": "1.2554e-05", "loss": 0.6186, "slid_loss": 0.6181, "step": 5420, "time": 72.75 }, { "epoch": 4.19, "learning_rate": "1.2549e-05", "loss": 0.6149, "slid_loss": 0.6179, "step": 5421, "time": 70.86 }, { "epoch": 4.19, "learning_rate": "1.2544e-05", "loss": 0.6425, "slid_loss": 0.6176, "step": 5422, "time": 76.44 }, { "epoch": 4.2, "learning_rate": "1.2540e-05", "loss": 0.6034, "slid_loss": 0.6177, "step": 5423, "time": 71.57 }, { "epoch": 4.2, "learning_rate": "1.2535e-05", "loss": 0.593, "slid_loss": 0.6174, "step": 5424, "time": 72.26 }, { "epoch": 4.2, "learning_rate": "1.2530e-05", "loss": 0.6187, "slid_loss": 0.6173, "step": 5425, "time": 71.76 }, { "epoch": 4.2, "learning_rate": "1.2525e-05", "loss": 0.6248, "slid_loss": 0.6174, "step": 5426, "time": 71.46 }, { "epoch": 4.2, "learning_rate": "1.2520e-05", "loss": 0.6061, "slid_loss": 0.6169, "step": 5427, "time": 72.91 }, { "epoch": 4.2, "learning_rate": "1.2516e-05", "loss": 0.6194, "slid_loss": 0.6168, "step": 5428, "time": 70.64 }, { "epoch": 4.2, "learning_rate": "1.2511e-05", "loss": 0.6603, "slid_loss": 0.6175, "step": 5429, "time": 70.48 }, { "epoch": 4.2, "learning_rate": "1.2506e-05", "loss": 0.6314, "slid_loss": 0.618, "step": 5430, "time": 71.49 }, { "epoch": 4.2, "learning_rate": "1.2501e-05", "loss": 0.5659, "slid_loss": 0.6172, "step": 5431, "time": 72.35 }, { "epoch": 4.2, "learning_rate": "1.2497e-05", "loss": 0.6157, "slid_loss": 0.6169, "step": 5432, "time": 74.15 }, { "epoch": 4.2, "learning_rate": "1.2492e-05", "loss": 0.633, "slid_loss": 0.617, "step": 5433, "time": 72.96 }, { "epoch": 4.2, "learning_rate": "1.2487e-05", "loss": 0.6597, "slid_loss": 0.6172, "step": 5434, "time": 71.96 }, { "epoch": 4.21, "learning_rate": "1.2482e-05", "loss": 0.6135, "slid_loss": 0.6171, "step": 5435, "time": 71.15 }, { "epoch": 4.21, "learning_rate": "1.2478e-05", "loss": 0.6321, "slid_loss": 0.6171, "step": 5436, "time": 71.63 }, { "epoch": 4.21, "learning_rate": "1.2473e-05", "loss": 0.6, "slid_loss": 0.6172, "step": 5437, "time": 72.03 }, { "epoch": 4.21, "learning_rate": "1.2468e-05", "loss": 0.599, "slid_loss": 0.6172, "step": 5438, "time": 71.46 }, { "epoch": 4.21, "learning_rate": "1.2463e-05", "loss": 0.6364, "slid_loss": 0.6174, "step": 5439, "time": 70.35 }, { "epoch": 4.21, "learning_rate": "1.2459e-05", "loss": 0.6135, "slid_loss": 0.6174, "step": 5440, "time": 71.88 }, { "epoch": 4.21, "learning_rate": "1.2454e-05", "loss": 0.6154, "slid_loss": 0.6176, "step": 5441, "time": 71.57 }, { "epoch": 4.21, "learning_rate": "1.2449e-05", "loss": 0.5695, "slid_loss": 0.6172, "step": 5442, "time": 71.29 }, { "epoch": 4.21, "learning_rate": "1.2445e-05", "loss": 0.6374, "slid_loss": 0.6171, "step": 5443, "time": 71.25 }, { "epoch": 4.21, "learning_rate": "1.2440e-05", "loss": 0.6678, "slid_loss": 0.6173, "step": 5444, "time": 70.47 }, { "epoch": 4.21, "learning_rate": "1.2435e-05", "loss": 0.6364, "slid_loss": 0.6176, "step": 5445, "time": 71.54 }, { "epoch": 4.21, "learning_rate": "1.2430e-05", "loss": 0.6395, "slid_loss": 0.6179, "step": 5446, "time": 70.81 }, { "epoch": 4.21, "learning_rate": "1.2426e-05", "loss": 0.6186, "slid_loss": 0.618, "step": 5447, "time": 71.83 }, { "epoch": 4.22, "learning_rate": "1.2421e-05", "loss": 0.6363, "slid_loss": 0.6181, "step": 5448, "time": 71.29 }, { "epoch": 4.22, "learning_rate": "1.2416e-05", "loss": 0.5914, "slid_loss": 0.618, "step": 5449, "time": 71.77 }, { "epoch": 4.22, "learning_rate": "1.2412e-05", "loss": 0.6348, "slid_loss": 0.6181, "step": 5450, "time": 71.47 }, { "epoch": 4.22, "learning_rate": "1.2407e-05", "loss": 0.6621, "slid_loss": 0.6183, "step": 5451, "time": 72.97 }, { "epoch": 4.22, "learning_rate": "1.2402e-05", "loss": 0.6314, "slid_loss": 0.6188, "step": 5452, "time": 72.7 }, { "epoch": 4.22, "learning_rate": "1.2398e-05", "loss": 0.5871, "slid_loss": 0.6182, "step": 5453, "time": 71.6 }, { "epoch": 4.22, "learning_rate": "1.2393e-05", "loss": 0.6339, "slid_loss": 0.6185, "step": 5454, "time": 73.16 }, { "epoch": 4.22, "learning_rate": "1.2388e-05", "loss": 0.6317, "slid_loss": 0.619, "step": 5455, "time": 72.59 }, { "epoch": 4.22, "learning_rate": "1.2384e-05", "loss": 0.6099, "slid_loss": 0.6187, "step": 5456, "time": 72.59 }, { "epoch": 4.22, "learning_rate": "1.2379e-05", "loss": 0.5871, "slid_loss": 0.6185, "step": 5457, "time": 72.89 }, { "epoch": 4.22, "learning_rate": "1.2374e-05", "loss": 0.6429, "slid_loss": 0.6189, "step": 5458, "time": 70.93 }, { "epoch": 4.22, "learning_rate": "1.2370e-05", "loss": 0.6289, "slid_loss": 0.6192, "step": 5459, "time": 72.9 }, { "epoch": 4.22, "learning_rate": "1.2365e-05", "loss": 0.6059, "slid_loss": 0.6189, "step": 5460, "time": 71.83 }, { "epoch": 4.23, "learning_rate": "1.2361e-05", "loss": 0.5812, "slid_loss": 0.6184, "step": 5461, "time": 71.79 }, { "epoch": 4.23, "learning_rate": "1.2356e-05", "loss": 0.5937, "slid_loss": 0.6181, "step": 5462, "time": 72.21 }, { "epoch": 4.23, "learning_rate": "1.2351e-05", "loss": 0.638, "slid_loss": 0.6185, "step": 5463, "time": 72.04 }, { "epoch": 4.23, "learning_rate": "1.2347e-05", "loss": 0.6032, "slid_loss": 0.6185, "step": 5464, "time": 72.52 }, { "epoch": 4.23, "learning_rate": "1.2342e-05", "loss": 0.609, "slid_loss": 0.6186, "step": 5465, "time": 70.88 }, { "epoch": 4.23, "learning_rate": "1.2337e-05", "loss": 0.6295, "slid_loss": 0.6186, "step": 5466, "time": 71.29 }, { "epoch": 4.23, "learning_rate": "1.2333e-05", "loss": 0.6144, "slid_loss": 0.6186, "step": 5467, "time": 71.79 }, { "epoch": 4.23, "learning_rate": "1.2328e-05", "loss": 0.6288, "slid_loss": 0.6189, "step": 5468, "time": 71.36 }, { "epoch": 4.23, "learning_rate": "1.2324e-05", "loss": 0.6056, "slid_loss": 0.6191, "step": 5469, "time": 72.23 }, { "epoch": 4.23, "learning_rate": "1.2319e-05", "loss": 0.6153, "slid_loss": 0.6189, "step": 5470, "time": 70.43 }, { "epoch": 4.23, "learning_rate": "1.2314e-05", "loss": 0.6135, "slid_loss": 0.619, "step": 5471, "time": 73.29 }, { "epoch": 4.23, "learning_rate": "1.2310e-05", "loss": 0.6136, "slid_loss": 0.6191, "step": 5472, "time": 71.87 }, { "epoch": 4.23, "learning_rate": "1.2305e-05", "loss": 0.6018, "slid_loss": 0.6188, "step": 5473, "time": 71.89 }, { "epoch": 4.24, "learning_rate": "1.2301e-05", "loss": 0.6078, "slid_loss": 0.6183, "step": 5474, "time": 71.05 }, { "epoch": 4.24, "learning_rate": "1.2296e-05", "loss": 0.65, "slid_loss": 0.6188, "step": 5475, "time": 69.95 }, { "epoch": 4.24, "learning_rate": "1.2292e-05", "loss": 0.6439, "slid_loss": 0.6191, "step": 5476, "time": 70.85 }, { "epoch": 4.24, "learning_rate": "1.2287e-05", "loss": 0.625, "slid_loss": 0.6191, "step": 5477, "time": 73.04 }, { "epoch": 4.24, "learning_rate": "1.2282e-05", "loss": 0.6137, "slid_loss": 0.619, "step": 5478, "time": 71.01 }, { "epoch": 4.24, "learning_rate": "1.2278e-05", "loss": 0.6407, "slid_loss": 0.6189, "step": 5479, "time": 71.48 }, { "epoch": 4.24, "learning_rate": "1.2273e-05", "loss": 0.6058, "slid_loss": 0.619, "step": 5480, "time": 71.27 }, { "epoch": 4.24, "learning_rate": "1.2269e-05", "loss": 0.6258, "slid_loss": 0.6193, "step": 5481, "time": 70.41 }, { "epoch": 4.24, "learning_rate": "1.2264e-05", "loss": 0.6457, "slid_loss": 0.6195, "step": 5482, "time": 73.41 }, { "epoch": 4.24, "learning_rate": "1.2260e-05", "loss": 0.5827, "slid_loss": 0.619, "step": 5483, "time": 71.62 }, { "epoch": 4.24, "learning_rate": "1.2255e-05", "loss": 0.6084, "slid_loss": 0.6188, "step": 5484, "time": 71.87 }, { "epoch": 4.24, "learning_rate": "1.2251e-05", "loss": 0.6099, "slid_loss": 0.6186, "step": 5485, "time": 71.39 }, { "epoch": 4.24, "learning_rate": "1.2246e-05", "loss": 0.6158, "slid_loss": 0.6185, "step": 5486, "time": 72.82 }, { "epoch": 4.25, "learning_rate": "1.2242e-05", "loss": 0.5892, "slid_loss": 0.6181, "step": 5487, "time": 70.88 }, { "epoch": 4.25, "learning_rate": "1.2237e-05", "loss": 0.6239, "slid_loss": 0.6179, "step": 5488, "time": 71.4 }, { "epoch": 4.25, "learning_rate": "1.2233e-05", "loss": 0.6109, "slid_loss": 0.6176, "step": 5489, "time": 71.04 }, { "epoch": 4.25, "learning_rate": "1.2228e-05", "loss": 0.5897, "slid_loss": 0.6171, "step": 5490, "time": 72.39 }, { "epoch": 4.25, "learning_rate": "1.2224e-05", "loss": 0.6274, "slid_loss": 0.6172, "step": 5491, "time": 70.85 }, { "epoch": 4.25, "learning_rate": "1.2219e-05", "loss": 0.6325, "slid_loss": 0.6174, "step": 5492, "time": 71.39 }, { "epoch": 4.25, "learning_rate": "1.2215e-05", "loss": 0.6233, "slid_loss": 0.6174, "step": 5493, "time": 71.5 }, { "epoch": 4.25, "learning_rate": "1.2210e-05", "loss": 0.6494, "slid_loss": 0.6174, "step": 5494, "time": 120.88 }, { "epoch": 4.25, "learning_rate": "1.2206e-05", "loss": 0.6334, "slid_loss": 0.6179, "step": 5495, "time": 71.5 }, { "epoch": 4.25, "learning_rate": "1.2201e-05", "loss": 0.5932, "slid_loss": 0.6177, "step": 5496, "time": 71.27 }, { "epoch": 4.25, "learning_rate": "1.2197e-05", "loss": 0.6221, "slid_loss": 0.6179, "step": 5497, "time": 71.74 }, { "epoch": 4.25, "learning_rate": "1.2192e-05", "loss": 0.6181, "slid_loss": 0.6181, "step": 5498, "time": 70.03 }, { "epoch": 4.25, "learning_rate": "1.2188e-05", "loss": 0.6025, "slid_loss": 0.6181, "step": 5499, "time": 71.0 }, { "epoch": 4.26, "learning_rate": "1.2183e-05", "loss": 0.6004, "slid_loss": 0.618, "step": 5500, "time": 70.76 }, { "epoch": 4.26, "learning_rate": "1.2179e-05", "loss": 0.6207, "slid_loss": 0.6182, "step": 5501, "time": 71.53 }, { "epoch": 4.26, "learning_rate": "1.2174e-05", "loss": 0.5977, "slid_loss": 0.6184, "step": 5502, "time": 71.33 }, { "epoch": 4.26, "learning_rate": "1.2170e-05", "loss": 0.6167, "slid_loss": 0.6185, "step": 5503, "time": 72.12 }, { "epoch": 4.26, "learning_rate": "1.2165e-05", "loss": 0.6294, "slid_loss": 0.6185, "step": 5504, "time": 71.43 }, { "epoch": 4.26, "learning_rate": "1.2161e-05", "loss": 0.615, "slid_loss": 0.6183, "step": 5505, "time": 72.99 }, { "epoch": 4.26, "learning_rate": "1.2157e-05", "loss": 0.6146, "slid_loss": 0.6184, "step": 5506, "time": 72.25 }, { "epoch": 4.26, "learning_rate": "1.2152e-05", "loss": 0.6115, "slid_loss": 0.618, "step": 5507, "time": 72.26 }, { "epoch": 4.26, "learning_rate": "1.2148e-05", "loss": 0.6024, "slid_loss": 0.6179, "step": 5508, "time": 71.1 }, { "epoch": 4.26, "learning_rate": "1.2143e-05", "loss": 0.626, "slid_loss": 0.6184, "step": 5509, "time": 71.54 }, { "epoch": 4.26, "learning_rate": "1.2139e-05", "loss": 0.6321, "slid_loss": 0.6185, "step": 5510, "time": 71.81 }, { "epoch": 4.26, "learning_rate": "1.2134e-05", "loss": 0.6226, "slid_loss": 0.6185, "step": 5511, "time": 71.66 }, { "epoch": 4.26, "learning_rate": "1.2130e-05", "loss": 0.6254, "slid_loss": 0.6185, "step": 5512, "time": 71.78 }, { "epoch": 4.27, "learning_rate": "1.2126e-05", "loss": 0.6148, "slid_loss": 0.6184, "step": 5513, "time": 72.63 }, { "epoch": 4.27, "learning_rate": "1.2121e-05", "loss": 0.6163, "slid_loss": 0.6184, "step": 5514, "time": 72.44 }, { "epoch": 4.27, "learning_rate": "1.2117e-05", "loss": 0.5902, "slid_loss": 0.6182, "step": 5515, "time": 71.67 }, { "epoch": 4.27, "learning_rate": "1.2112e-05", "loss": 0.6231, "slid_loss": 0.6181, "step": 5516, "time": 70.95 }, { "epoch": 4.27, "learning_rate": "1.2108e-05", "loss": 0.5879, "slid_loss": 0.6175, "step": 5517, "time": 71.15 }, { "epoch": 4.27, "learning_rate": "1.2104e-05", "loss": 0.6534, "slid_loss": 0.6182, "step": 5518, "time": 72.26 }, { "epoch": 4.27, "learning_rate": "1.2099e-05", "loss": 0.6215, "slid_loss": 0.6183, "step": 5519, "time": 73.33 }, { "epoch": 4.27, "learning_rate": "1.2095e-05", "loss": 0.5949, "slid_loss": 0.6181, "step": 5520, "time": 71.81 }, { "epoch": 4.27, "learning_rate": "1.2090e-05", "loss": 0.6178, "slid_loss": 0.6181, "step": 5521, "time": 72.56 }, { "epoch": 4.27, "learning_rate": "1.2086e-05", "loss": 0.6395, "slid_loss": 0.6181, "step": 5522, "time": 71.2 }, { "epoch": 4.27, "learning_rate": "1.2082e-05", "loss": 0.6176, "slid_loss": 0.6182, "step": 5523, "time": 72.55 }, { "epoch": 4.27, "learning_rate": "1.2077e-05", "loss": 0.5798, "slid_loss": 0.6181, "step": 5524, "time": 73.08 }, { "epoch": 4.27, "learning_rate": "1.2073e-05", "loss": 0.6096, "slid_loss": 0.618, "step": 5525, "time": 71.1 }, { "epoch": 4.28, "learning_rate": "1.2069e-05", "loss": 0.583, "slid_loss": 0.6176, "step": 5526, "time": 72.98 }, { "epoch": 4.28, "learning_rate": "1.2064e-05", "loss": 0.6166, "slid_loss": 0.6177, "step": 5527, "time": 71.84 }, { "epoch": 4.28, "learning_rate": "1.2060e-05", "loss": 0.579, "slid_loss": 0.6173, "step": 5528, "time": 71.43 }, { "epoch": 4.28, "learning_rate": "1.2056e-05", "loss": 0.6336, "slid_loss": 0.617, "step": 5529, "time": 71.41 }, { "epoch": 4.28, "learning_rate": "1.2051e-05", "loss": 0.6036, "slid_loss": 0.6167, "step": 5530, "time": 72.24 }, { "epoch": 4.28, "learning_rate": "1.2047e-05", "loss": 0.6154, "slid_loss": 0.6172, "step": 5531, "time": 72.18 }, { "epoch": 4.28, "learning_rate": "1.2043e-05", "loss": 0.6489, "slid_loss": 0.6175, "step": 5532, "time": 72.54 }, { "epoch": 4.28, "learning_rate": "1.2038e-05", "loss": 0.5917, "slid_loss": 0.6171, "step": 5533, "time": 71.5 }, { "epoch": 4.28, "learning_rate": "1.2034e-05", "loss": 0.5992, "slid_loss": 0.6165, "step": 5534, "time": 70.51 }, { "epoch": 4.28, "learning_rate": "1.2030e-05", "loss": 0.5964, "slid_loss": 0.6164, "step": 5535, "time": 72.53 }, { "epoch": 4.28, "learning_rate": "1.2025e-05", "loss": 0.6364, "slid_loss": 0.6164, "step": 5536, "time": 72.02 }, { "epoch": 4.28, "learning_rate": "1.2021e-05", "loss": 0.6146, "slid_loss": 0.6165, "step": 5537, "time": 70.64 }, { "epoch": 4.28, "learning_rate": "1.2017e-05", "loss": 0.6167, "slid_loss": 0.6167, "step": 5538, "time": 72.05 }, { "epoch": 4.29, "learning_rate": "1.2012e-05", "loss": 0.6121, "slid_loss": 0.6165, "step": 5539, "time": 71.42 }, { "epoch": 4.29, "learning_rate": "1.2008e-05", "loss": 0.6357, "slid_loss": 0.6167, "step": 5540, "time": 71.37 }, { "epoch": 4.29, "learning_rate": "1.2004e-05", "loss": 0.6114, "slid_loss": 0.6167, "step": 5541, "time": 71.46 }, { "epoch": 4.29, "learning_rate": "1.2000e-05", "loss": 0.6, "slid_loss": 0.617, "step": 5542, "time": 70.93 }, { "epoch": 4.29, "learning_rate": "1.1995e-05", "loss": 0.6352, "slid_loss": 0.6169, "step": 5543, "time": 71.2 }, { "epoch": 4.29, "learning_rate": "1.1991e-05", "loss": 0.5907, "slid_loss": 0.6162, "step": 5544, "time": 90.82 }, { "epoch": 4.29, "learning_rate": "1.1987e-05", "loss": 0.5879, "slid_loss": 0.6157, "step": 5545, "time": 70.79 }, { "epoch": 4.29, "learning_rate": "1.1982e-05", "loss": 0.5769, "slid_loss": 0.6151, "step": 5546, "time": 71.95 }, { "epoch": 4.29, "learning_rate": "1.1978e-05", "loss": 0.5836, "slid_loss": 0.6147, "step": 5547, "time": 72.32 }, { "epoch": 4.29, "learning_rate": "1.1974e-05", "loss": 0.6124, "slid_loss": 0.6145, "step": 5548, "time": 70.7 }, { "epoch": 4.29, "learning_rate": "1.1970e-05", "loss": 0.6123, "slid_loss": 0.6147, "step": 5549, "time": 72.46 }, { "epoch": 4.29, "learning_rate": "1.1965e-05", "loss": 0.649, "slid_loss": 0.6148, "step": 5550, "time": 70.24 }, { "epoch": 4.29, "learning_rate": "1.1961e-05", "loss": 0.6281, "slid_loss": 0.6145, "step": 5551, "time": 73.37 }, { "epoch": 4.3, "learning_rate": "1.1957e-05", "loss": 0.6145, "slid_loss": 0.6143, "step": 5552, "time": 70.99 }, { "epoch": 4.3, "learning_rate": "1.1953e-05", "loss": 0.5875, "slid_loss": 0.6143, "step": 5553, "time": 97.08 }, { "epoch": 4.3, "learning_rate": "1.1948e-05", "loss": 0.6119, "slid_loss": 0.6141, "step": 5554, "time": 95.81 }, { "epoch": 4.3, "learning_rate": "1.1944e-05", "loss": 0.5948, "slid_loss": 0.6137, "step": 5555, "time": 96.86 }, { "epoch": 4.3, "learning_rate": "1.1940e-05", "loss": 0.6164, "slid_loss": 0.6138, "step": 5556, "time": 98.88 }, { "epoch": 4.3, "learning_rate": "1.1936e-05", "loss": 0.6119, "slid_loss": 0.614, "step": 5557, "time": 83.04 }, { "epoch": 4.3, "learning_rate": "1.1932e-05", "loss": 0.6008, "slid_loss": 0.6136, "step": 5558, "time": 99.56 }, { "epoch": 4.3, "learning_rate": "1.1927e-05", "loss": 0.6238, "slid_loss": 0.6136, "step": 5559, "time": 110.55 }, { "epoch": 4.3, "learning_rate": "1.1923e-05", "loss": 0.655, "slid_loss": 0.6141, "step": 5560, "time": 164.25 }, { "epoch": 4.3, "learning_rate": "1.1919e-05", "loss": 0.5793, "slid_loss": 0.614, "step": 5561, "time": 154.21 }, { "epoch": 4.3, "learning_rate": "1.1915e-05", "loss": 0.6101, "slid_loss": 0.6142, "step": 5562, "time": 166.0 }, { "epoch": 4.3, "learning_rate": "1.1911e-05", "loss": 0.6138, "slid_loss": 0.614, "step": 5563, "time": 155.11 }, { "epoch": 4.3, "learning_rate": "1.1906e-05", "loss": 0.5807, "slid_loss": 0.6137, "step": 5564, "time": 146.81 }, { "epoch": 4.31, "learning_rate": "1.1902e-05", "loss": 0.6157, "slid_loss": 0.6138, "step": 5565, "time": 132.07 }, { "epoch": 4.31, "learning_rate": "1.1898e-05", "loss": 0.6177, "slid_loss": 0.6137, "step": 5566, "time": 129.92 }, { "epoch": 4.31, "learning_rate": "1.1894e-05", "loss": 0.6248, "slid_loss": 0.6138, "step": 5567, "time": 140.08 }, { "epoch": 4.31, "learning_rate": "1.1890e-05", "loss": 0.5972, "slid_loss": 0.6135, "step": 5568, "time": 107.53 }, { "epoch": 4.31, "learning_rate": "1.1886e-05", "loss": 0.6227, "slid_loss": 0.6136, "step": 5569, "time": 123.25 }, { "epoch": 4.31, "learning_rate": "1.1881e-05", "loss": 0.6306, "slid_loss": 0.6138, "step": 5570, "time": 121.92 }, { "epoch": 4.31, "learning_rate": "1.1877e-05", "loss": 0.613, "slid_loss": 0.6138, "step": 5571, "time": 97.76 }, { "epoch": 4.31, "learning_rate": "1.1873e-05", "loss": 0.6273, "slid_loss": 0.6139, "step": 5572, "time": 109.48 }, { "epoch": 4.31, "learning_rate": "1.1869e-05", "loss": 0.6266, "slid_loss": 0.6142, "step": 5573, "time": 99.48 }, { "epoch": 4.31, "learning_rate": "1.1865e-05", "loss": 0.6034, "slid_loss": 0.6141, "step": 5574, "time": 82.77 }, { "epoch": 4.31, "learning_rate": "1.1861e-05", "loss": 0.5813, "slid_loss": 0.6134, "step": 5575, "time": 83.83 }, { "epoch": 4.31, "learning_rate": "1.1856e-05", "loss": 0.5919, "slid_loss": 0.6129, "step": 5576, "time": 73.04 }, { "epoch": 4.31, "learning_rate": "1.1852e-05", "loss": 0.646, "slid_loss": 0.6131, "step": 5577, "time": 72.01 }, { "epoch": 4.32, "learning_rate": "1.1848e-05", "loss": 0.6055, "slid_loss": 0.6131, "step": 5578, "time": 81.79 }, { "epoch": 4.32, "learning_rate": "1.1844e-05", "loss": 0.5845, "slid_loss": 0.6125, "step": 5579, "time": 73.65 }, { "epoch": 4.32, "learning_rate": "1.1840e-05", "loss": 0.5856, "slid_loss": 0.6123, "step": 5580, "time": 70.61 }, { "epoch": 4.32, "learning_rate": "1.1836e-05", "loss": 0.6624, "slid_loss": 0.6127, "step": 5581, "time": 70.54 }, { "epoch": 4.32, "learning_rate": "1.1832e-05", "loss": 0.5921, "slid_loss": 0.6121, "step": 5582, "time": 74.54 }, { "epoch": 4.32, "learning_rate": "1.1828e-05", "loss": 0.6021, "slid_loss": 0.6123, "step": 5583, "time": 72.96 }, { "epoch": 4.32, "learning_rate": "1.1824e-05", "loss": 0.6319, "slid_loss": 0.6126, "step": 5584, "time": 72.82 }, { "epoch": 4.32, "learning_rate": "1.1819e-05", "loss": 0.6229, "slid_loss": 0.6127, "step": 5585, "time": 73.88 }, { "epoch": 4.32, "learning_rate": "1.1815e-05", "loss": 0.6255, "slid_loss": 0.6128, "step": 5586, "time": 71.16 }, { "epoch": 4.32, "learning_rate": "1.1811e-05", "loss": 0.6229, "slid_loss": 0.6131, "step": 5587, "time": 71.39 }, { "epoch": 4.32, "learning_rate": "1.1807e-05", "loss": 0.6152, "slid_loss": 0.613, "step": 5588, "time": 72.97 }, { "epoch": 4.32, "learning_rate": "1.1803e-05", "loss": 0.5769, "slid_loss": 0.6127, "step": 5589, "time": 73.23 }, { "epoch": 4.32, "learning_rate": "1.1799e-05", "loss": 0.5878, "slid_loss": 0.6127, "step": 5590, "time": 73.16 }, { "epoch": 4.33, "learning_rate": "1.1795e-05", "loss": 0.5978, "slid_loss": 0.6124, "step": 5591, "time": 72.64 }, { "epoch": 4.33, "learning_rate": "1.1791e-05", "loss": 0.6106, "slid_loss": 0.6122, "step": 5592, "time": 70.55 }, { "epoch": 4.33, "learning_rate": "1.1787e-05", "loss": 0.613, "slid_loss": 0.6121, "step": 5593, "time": 71.58 }, { "epoch": 4.33, "learning_rate": "1.1783e-05", "loss": 0.622, "slid_loss": 0.6118, "step": 5594, "time": 70.02 }, { "epoch": 4.33, "learning_rate": "1.1779e-05", "loss": 0.6012, "slid_loss": 0.6115, "step": 5595, "time": 71.07 }, { "epoch": 4.33, "learning_rate": "1.1775e-05", "loss": 0.6182, "slid_loss": 0.6117, "step": 5596, "time": 72.74 }, { "epoch": 4.33, "learning_rate": "1.1771e-05", "loss": 0.5846, "slid_loss": 0.6113, "step": 5597, "time": 71.09 }, { "epoch": 4.33, "learning_rate": "1.1767e-05", "loss": 0.6271, "slid_loss": 0.6114, "step": 5598, "time": 73.88 }, { "epoch": 4.33, "learning_rate": "1.1763e-05", "loss": 0.623, "slid_loss": 0.6116, "step": 5599, "time": 72.78 }, { "epoch": 4.33, "learning_rate": "1.1759e-05", "loss": 0.6369, "slid_loss": 0.612, "step": 5600, "time": 71.23 }, { "epoch": 4.33, "learning_rate": "1.1754e-05", "loss": 0.6159, "slid_loss": 0.6119, "step": 5601, "time": 754.54 }, { "epoch": 4.33, "learning_rate": "1.1750e-05", "loss": 0.6422, "slid_loss": 0.6124, "step": 5602, "time": 72.94 }, { "epoch": 4.34, "learning_rate": "1.1746e-05", "loss": 0.6488, "slid_loss": 0.6127, "step": 5603, "time": 71.8 }, { "epoch": 4.34, "learning_rate": "1.1742e-05", "loss": 0.614, "slid_loss": 0.6126, "step": 5604, "time": 71.39 }, { "epoch": 4.34, "learning_rate": "1.1738e-05", "loss": 0.5858, "slid_loss": 0.6123, "step": 5605, "time": 72.31 }, { "epoch": 4.34, "learning_rate": "1.1734e-05", "loss": 0.6338, "slid_loss": 0.6125, "step": 5606, "time": 70.9 }, { "epoch": 4.34, "learning_rate": "1.1730e-05", "loss": 0.578, "slid_loss": 0.6121, "step": 5607, "time": 70.98 }, { "epoch": 4.34, "learning_rate": "1.1726e-05", "loss": 0.6258, "slid_loss": 0.6124, "step": 5608, "time": 72.62 }, { "epoch": 4.34, "learning_rate": "1.1722e-05", "loss": 0.5974, "slid_loss": 0.6121, "step": 5609, "time": 71.09 }, { "epoch": 4.34, "learning_rate": "1.1718e-05", "loss": 0.6584, "slid_loss": 0.6123, "step": 5610, "time": 71.91 }, { "epoch": 4.34, "learning_rate": "1.1714e-05", "loss": 0.6497, "slid_loss": 0.6126, "step": 5611, "time": 71.09 }, { "epoch": 4.34, "learning_rate": "1.1710e-05", "loss": 0.6023, "slid_loss": 0.6124, "step": 5612, "time": 72.44 }, { "epoch": 4.34, "learning_rate": "1.1707e-05", "loss": 0.5983, "slid_loss": 0.6122, "step": 5613, "time": 72.58 }, { "epoch": 4.34, "learning_rate": "1.1703e-05", "loss": 0.6112, "slid_loss": 0.6122, "step": 5614, "time": 72.24 }, { "epoch": 4.34, "learning_rate": "1.1699e-05", "loss": 0.6496, "slid_loss": 0.6127, "step": 5615, "time": 71.01 }, { "epoch": 4.35, "learning_rate": "1.1695e-05", "loss": 0.5919, "slid_loss": 0.6124, "step": 5616, "time": 72.96 }, { "epoch": 4.35, "learning_rate": "1.1691e-05", "loss": 0.5716, "slid_loss": 0.6123, "step": 5617, "time": 72.1 }, { "epoch": 4.35, "learning_rate": "1.1687e-05", "loss": 0.5921, "slid_loss": 0.6117, "step": 5618, "time": 70.76 }, { "epoch": 4.35, "learning_rate": "1.1683e-05", "loss": 0.6099, "slid_loss": 0.6115, "step": 5619, "time": 73.02 }, { "epoch": 4.35, "learning_rate": "1.1679e-05", "loss": 0.6331, "slid_loss": 0.6119, "step": 5620, "time": 71.05 }, { "epoch": 4.35, "learning_rate": "1.1675e-05", "loss": 0.608, "slid_loss": 0.6118, "step": 5621, "time": 72.49 }, { "epoch": 4.35, "learning_rate": "1.1671e-05", "loss": 0.6488, "slid_loss": 0.6119, "step": 5622, "time": 71.11 }, { "epoch": 4.35, "learning_rate": "1.1667e-05", "loss": 0.6104, "slid_loss": 0.6119, "step": 5623, "time": 71.59 }, { "epoch": 4.35, "learning_rate": "1.1663e-05", "loss": 0.6006, "slid_loss": 0.6121, "step": 5624, "time": 71.03 }, { "epoch": 4.35, "learning_rate": "1.1659e-05", "loss": 0.6116, "slid_loss": 0.6121, "step": 5625, "time": 70.51 }, { "epoch": 4.35, "learning_rate": "1.1655e-05", "loss": 0.574, "slid_loss": 0.612, "step": 5626, "time": 71.32 }, { "epoch": 4.35, "learning_rate": "1.1651e-05", "loss": 0.6216, "slid_loss": 0.612, "step": 5627, "time": 72.9 }, { "epoch": 4.35, "learning_rate": "1.1647e-05", "loss": 0.6072, "slid_loss": 0.6123, "step": 5628, "time": 71.14 }, { "epoch": 4.36, "learning_rate": "1.1644e-05", "loss": 0.6024, "slid_loss": 0.612, "step": 5629, "time": 70.84 }, { "epoch": 4.36, "learning_rate": "1.1640e-05", "loss": 0.6197, "slid_loss": 0.6122, "step": 5630, "time": 72.07 }, { "epoch": 4.36, "learning_rate": "1.1636e-05", "loss": 0.5885, "slid_loss": 0.6119, "step": 5631, "time": 71.58 }, { "epoch": 4.36, "learning_rate": "1.1632e-05", "loss": 0.6544, "slid_loss": 0.612, "step": 5632, "time": 71.22 }, { "epoch": 4.36, "learning_rate": "1.1628e-05", "loss": 0.6116, "slid_loss": 0.6122, "step": 5633, "time": 70.84 }, { "epoch": 4.36, "learning_rate": "1.1624e-05", "loss": 0.584, "slid_loss": 0.612, "step": 5634, "time": 70.56 }, { "epoch": 4.36, "learning_rate": "1.1620e-05", "loss": 0.6352, "slid_loss": 0.6124, "step": 5635, "time": 72.84 }, { "epoch": 4.36, "learning_rate": "1.1616e-05", "loss": 0.6129, "slid_loss": 0.6122, "step": 5636, "time": 71.35 }, { "epoch": 4.36, "learning_rate": "1.1612e-05", "loss": 0.5818, "slid_loss": 0.6118, "step": 5637, "time": 71.99 }, { "epoch": 4.36, "learning_rate": "1.1609e-05", "loss": 0.6112, "slid_loss": 0.6118, "step": 5638, "time": 72.13 }, { "epoch": 4.36, "learning_rate": "1.1605e-05", "loss": 0.6303, "slid_loss": 0.612, "step": 5639, "time": 72.96 }, { "epoch": 4.36, "learning_rate": "1.1601e-05", "loss": 0.587, "slid_loss": 0.6115, "step": 5640, "time": 72.54 }, { "epoch": 4.36, "learning_rate": "1.1597e-05", "loss": 0.6283, "slid_loss": 0.6116, "step": 5641, "time": 72.67 }, { "epoch": 4.37, "learning_rate": "1.1593e-05", "loss": 0.6649, "slid_loss": 0.6123, "step": 5642, "time": 71.33 }, { "epoch": 4.37, "learning_rate": "1.1589e-05", "loss": 0.6135, "slid_loss": 0.6121, "step": 5643, "time": 71.5 }, { "epoch": 4.37, "learning_rate": "1.1586e-05", "loss": 0.6133, "slid_loss": 0.6123, "step": 5644, "time": 71.89 }, { "epoch": 4.37, "learning_rate": "1.1582e-05", "loss": 0.5707, "slid_loss": 0.6121, "step": 5645, "time": 72.5 }, { "epoch": 4.37, "learning_rate": "1.1578e-05", "loss": 0.6179, "slid_loss": 0.6125, "step": 5646, "time": 73.09 }, { "epoch": 4.37, "learning_rate": "1.1574e-05", "loss": 0.5887, "slid_loss": 0.6126, "step": 5647, "time": 72.33 }, { "epoch": 4.37, "learning_rate": "1.1570e-05", "loss": 0.6311, "slid_loss": 0.6128, "step": 5648, "time": 71.82 }, { "epoch": 4.37, "learning_rate": "1.1566e-05", "loss": 0.6072, "slid_loss": 0.6127, "step": 5649, "time": 72.56 }, { "epoch": 4.37, "learning_rate": "1.1563e-05", "loss": 0.6067, "slid_loss": 0.6123, "step": 5650, "time": 72.31 }, { "epoch": 4.37, "learning_rate": "1.1559e-05", "loss": 0.5911, "slid_loss": 0.6119, "step": 5651, "time": 71.61 }, { "epoch": 4.37, "learning_rate": "1.1555e-05", "loss": 0.5994, "slid_loss": 0.6118, "step": 5652, "time": 72.15 }, { "epoch": 4.37, "learning_rate": "1.1551e-05", "loss": 0.6245, "slid_loss": 0.6121, "step": 5653, "time": 71.47 }, { "epoch": 4.37, "learning_rate": "1.1547e-05", "loss": 0.6246, "slid_loss": 0.6123, "step": 5654, "time": 71.51 }, { "epoch": 4.38, "learning_rate": "1.1544e-05", "loss": 0.5926, "slid_loss": 0.6123, "step": 5655, "time": 72.26 }, { "epoch": 4.38, "learning_rate": "1.1540e-05", "loss": 0.6135, "slid_loss": 0.6122, "step": 5656, "time": 72.36 }, { "epoch": 4.38, "learning_rate": "1.1536e-05", "loss": 0.5834, "slid_loss": 0.6119, "step": 5657, "time": 73.01 }, { "epoch": 4.38, "learning_rate": "1.1532e-05", "loss": 0.6216, "slid_loss": 0.6121, "step": 5658, "time": 71.91 }, { "epoch": 4.38, "learning_rate": "1.1529e-05", "loss": 0.5942, "slid_loss": 0.6118, "step": 5659, "time": 72.21 }, { "epoch": 4.38, "learning_rate": "1.1525e-05", "loss": 0.6401, "slid_loss": 0.6117, "step": 5660, "time": 73.28 }, { "epoch": 4.38, "learning_rate": "1.1521e-05", "loss": 0.6407, "slid_loss": 0.6123, "step": 5661, "time": 71.46 }, { "epoch": 4.38, "learning_rate": "1.1517e-05", "loss": 0.5865, "slid_loss": 0.6121, "step": 5662, "time": 72.4 }, { "epoch": 4.38, "learning_rate": "1.1513e-05", "loss": 0.6313, "slid_loss": 0.6123, "step": 5663, "time": 71.53 }, { "epoch": 4.38, "learning_rate": "1.1510e-05", "loss": 0.5885, "slid_loss": 0.6123, "step": 5664, "time": 72.17 }, { "epoch": 4.38, "learning_rate": "1.1506e-05", "loss": 0.6404, "slid_loss": 0.6126, "step": 5665, "time": 72.2 }, { "epoch": 4.38, "learning_rate": "1.1502e-05", "loss": 0.6305, "slid_loss": 0.6127, "step": 5666, "time": 70.63 }, { "epoch": 4.38, "learning_rate": "1.1499e-05", "loss": 0.6095, "slid_loss": 0.6126, "step": 5667, "time": 72.53 }, { "epoch": 4.39, "learning_rate": "1.1495e-05", "loss": 0.6006, "slid_loss": 0.6126, "step": 5668, "time": 71.05 }, { "epoch": 4.39, "learning_rate": "1.1491e-05", "loss": 0.62, "slid_loss": 0.6126, "step": 5669, "time": 71.74 }, { "epoch": 4.39, "learning_rate": "1.1487e-05", "loss": 0.5703, "slid_loss": 0.612, "step": 5670, "time": 71.2 }, { "epoch": 4.39, "learning_rate": "1.1484e-05", "loss": 0.6048, "slid_loss": 0.6119, "step": 5671, "time": 73.11 }, { "epoch": 4.39, "learning_rate": "1.1480e-05", "loss": 0.6183, "slid_loss": 0.6118, "step": 5672, "time": 72.01 }, { "epoch": 4.39, "learning_rate": "1.1476e-05", "loss": 0.5754, "slid_loss": 0.6113, "step": 5673, "time": 71.25 }, { "epoch": 4.39, "learning_rate": "1.1473e-05", "loss": 0.5851, "slid_loss": 0.6111, "step": 5674, "time": 71.57 }, { "epoch": 4.39, "learning_rate": "1.1469e-05", "loss": 0.6147, "slid_loss": 0.6114, "step": 5675, "time": 72.78 }, { "epoch": 4.39, "learning_rate": "1.1465e-05", "loss": 0.6006, "slid_loss": 0.6115, "step": 5676, "time": 73.16 }, { "epoch": 4.39, "learning_rate": "1.1461e-05", "loss": 0.6112, "slid_loss": 0.6112, "step": 5677, "time": 71.97 }, { "epoch": 4.39, "learning_rate": "1.1458e-05", "loss": 0.602, "slid_loss": 0.6111, "step": 5678, "time": 71.78 }, { "epoch": 4.39, "learning_rate": "1.1454e-05", "loss": 0.604, "slid_loss": 0.6113, "step": 5679, "time": 71.22 }, { "epoch": 4.39, "learning_rate": "1.1450e-05", "loss": 0.6085, "slid_loss": 0.6116, "step": 5680, "time": 71.58 }, { "epoch": 4.4, "learning_rate": "1.1447e-05", "loss": 0.5741, "slid_loss": 0.6107, "step": 5681, "time": 71.25 }, { "epoch": 4.4, "learning_rate": "1.1443e-05", "loss": 0.6085, "slid_loss": 0.6108, "step": 5682, "time": 70.25 }, { "epoch": 4.4, "learning_rate": "1.1439e-05", "loss": 0.5983, "slid_loss": 0.6108, "step": 5683, "time": 70.97 }, { "epoch": 4.4, "learning_rate": "1.1436e-05", "loss": 0.6316, "slid_loss": 0.6108, "step": 5684, "time": 71.22 }, { "epoch": 4.4, "learning_rate": "1.1432e-05", "loss": 0.6327, "slid_loss": 0.6109, "step": 5685, "time": 72.05 }, { "epoch": 4.4, "learning_rate": "1.1428e-05", "loss": 0.5823, "slid_loss": 0.6105, "step": 5686, "time": 72.33 }, { "epoch": 4.4, "learning_rate": "1.1425e-05", "loss": 0.6328, "slid_loss": 0.6106, "step": 5687, "time": 71.27 }, { "epoch": 4.4, "learning_rate": "1.1421e-05", "loss": 0.6008, "slid_loss": 0.6104, "step": 5688, "time": 71.43 }, { "epoch": 4.4, "learning_rate": "1.1418e-05", "loss": 0.5771, "slid_loss": 0.6104, "step": 5689, "time": 71.95 }, { "epoch": 4.4, "learning_rate": "1.1414e-05", "loss": 0.5924, "slid_loss": 0.6105, "step": 5690, "time": 71.7 }, { "epoch": 4.4, "learning_rate": "1.1410e-05", "loss": 0.5838, "slid_loss": 0.6103, "step": 5691, "time": 71.79 }, { "epoch": 4.4, "learning_rate": "1.1407e-05", "loss": 0.5997, "slid_loss": 0.6102, "step": 5692, "time": 71.91 }, { "epoch": 4.4, "learning_rate": "1.1403e-05", "loss": 0.6105, "slid_loss": 0.6102, "step": 5693, "time": 71.42 }, { "epoch": 4.41, "learning_rate": "1.1399e-05", "loss": 0.5911, "slid_loss": 0.6099, "step": 5694, "time": 71.67 }, { "epoch": 4.41, "learning_rate": "1.1396e-05", "loss": 0.6404, "slid_loss": 0.6103, "step": 5695, "time": 71.07 }, { "epoch": 4.41, "learning_rate": "1.1392e-05", "loss": 0.6292, "slid_loss": 0.6104, "step": 5696, "time": 72.03 }, { "epoch": 4.41, "learning_rate": "1.1389e-05", "loss": 0.628, "slid_loss": 0.6108, "step": 5697, "time": 71.07 }, { "epoch": 4.41, "learning_rate": "1.1385e-05", "loss": 0.601, "slid_loss": 0.6106, "step": 5698, "time": 71.89 }, { "epoch": 4.41, "learning_rate": "1.1381e-05", "loss": 0.6045, "slid_loss": 0.6104, "step": 5699, "time": 71.05 }, { "epoch": 4.41, "learning_rate": "1.1378e-05", "loss": 0.6023, "slid_loss": 0.61, "step": 5700, "time": 71.13 }, { "epoch": 4.41, "learning_rate": "1.1374e-05", "loss": 0.608, "slid_loss": 0.6099, "step": 5701, "time": 72.13 }, { "epoch": 4.41, "learning_rate": "1.1371e-05", "loss": 0.5865, "slid_loss": 0.6094, "step": 5702, "time": 71.43 }, { "epoch": 4.41, "learning_rate": "1.1367e-05", "loss": 0.6391, "slid_loss": 0.6093, "step": 5703, "time": 86.34 }, { "epoch": 4.41, "learning_rate": "1.1364e-05", "loss": 0.631, "slid_loss": 0.6095, "step": 5704, "time": 71.49 }, { "epoch": 4.41, "learning_rate": "1.1360e-05", "loss": 0.5944, "slid_loss": 0.6095, "step": 5705, "time": 72.72 }, { "epoch": 4.41, "learning_rate": "1.1356e-05", "loss": 0.592, "slid_loss": 0.6091, "step": 5706, "time": 71.41 }, { "epoch": 4.42, "learning_rate": "1.1353e-05", "loss": 0.6579, "slid_loss": 0.6099, "step": 5707, "time": 72.37 }, { "epoch": 4.42, "learning_rate": "1.1349e-05", "loss": 0.6215, "slid_loss": 0.6099, "step": 5708, "time": 73.04 }, { "epoch": 4.42, "learning_rate": "1.1346e-05", "loss": 0.6245, "slid_loss": 0.6102, "step": 5709, "time": 71.64 }, { "epoch": 4.42, "learning_rate": "1.1342e-05", "loss": 0.6151, "slid_loss": 0.6097, "step": 5710, "time": 71.76 }, { "epoch": 4.42, "learning_rate": "1.1339e-05", "loss": 0.6265, "slid_loss": 0.6095, "step": 5711, "time": 118.24 }, { "epoch": 4.42, "learning_rate": "1.1335e-05", "loss": 0.6268, "slid_loss": 0.6097, "step": 5712, "time": 84.22 }, { "epoch": 4.42, "learning_rate": "1.1332e-05", "loss": 0.6092, "slid_loss": 0.6098, "step": 5713, "time": 110.09 }, { "epoch": 4.42, "learning_rate": "1.1328e-05", "loss": 0.6056, "slid_loss": 0.6098, "step": 5714, "time": 83.27 }, { "epoch": 4.42, "learning_rate": "1.1325e-05", "loss": 0.6452, "slid_loss": 0.6097, "step": 5715, "time": 71.25 }, { "epoch": 4.42, "learning_rate": "1.1321e-05", "loss": 0.6017, "slid_loss": 0.6098, "step": 5716, "time": 110.33 }, { "epoch": 4.42, "learning_rate": "1.1318e-05", "loss": 0.5863, "slid_loss": 0.61, "step": 5717, "time": 84.03 }, { "epoch": 4.42, "learning_rate": "1.1314e-05", "loss": 0.5877, "slid_loss": 0.6099, "step": 5718, "time": 111.12 }, { "epoch": 4.42, "learning_rate": "1.1311e-05", "loss": 0.6189, "slid_loss": 0.61, "step": 5719, "time": 150.74 }, { "epoch": 4.43, "learning_rate": "1.1307e-05", "loss": 0.6162, "slid_loss": 0.6099, "step": 5720, "time": 136.28 }, { "epoch": 4.43, "learning_rate": "1.1304e-05", "loss": 0.6459, "slid_loss": 0.6102, "step": 5721, "time": 133.96 }, { "epoch": 4.43, "learning_rate": "1.1300e-05", "loss": 0.6326, "slid_loss": 0.6101, "step": 5722, "time": 143.07 }, { "epoch": 4.43, "learning_rate": "1.1297e-05", "loss": 0.6246, "slid_loss": 0.6102, "step": 5723, "time": 146.92 }, { "epoch": 4.43, "learning_rate": "1.1293e-05", "loss": 0.5808, "slid_loss": 0.61, "step": 5724, "time": 95.65 }, { "epoch": 4.43, "learning_rate": "1.1290e-05", "loss": 0.5994, "slid_loss": 0.6099, "step": 5725, "time": 142.99 }, { "epoch": 4.43, "learning_rate": "1.1286e-05", "loss": 0.6085, "slid_loss": 0.6103, "step": 5726, "time": 83.08 }, { "epoch": 4.43, "learning_rate": "1.1283e-05", "loss": 0.6301, "slid_loss": 0.6103, "step": 5727, "time": 142.71 }, { "epoch": 4.43, "learning_rate": "1.1279e-05", "loss": 0.6164, "slid_loss": 0.6104, "step": 5728, "time": 122.32 }, { "epoch": 4.43, "learning_rate": "1.1276e-05", "loss": 0.6399, "slid_loss": 0.6108, "step": 5729, "time": 110.75 }, { "epoch": 4.43, "learning_rate": "1.1272e-05", "loss": 0.5876, "slid_loss": 0.6105, "step": 5730, "time": 109.25 }, { "epoch": 4.43, "learning_rate": "1.1269e-05", "loss": 0.6123, "slid_loss": 0.6107, "step": 5731, "time": 107.67 }, { "epoch": 4.43, "learning_rate": "1.1265e-05", "loss": 0.5886, "slid_loss": 0.6101, "step": 5732, "time": 107.18 }, { "epoch": 4.44, "learning_rate": "1.1262e-05", "loss": 0.5908, "slid_loss": 0.6099, "step": 5733, "time": 96.67 }, { "epoch": 4.44, "learning_rate": "1.1259e-05", "loss": 0.5816, "slid_loss": 0.6098, "step": 5734, "time": 71.26 }, { "epoch": 4.44, "learning_rate": "1.1255e-05", "loss": 0.64, "slid_loss": 0.6099, "step": 5735, "time": 70.57 }, { "epoch": 4.44, "learning_rate": "1.1252e-05", "loss": 0.6371, "slid_loss": 0.6101, "step": 5736, "time": 71.77 }, { "epoch": 4.44, "learning_rate": "1.1248e-05", "loss": 0.6127, "slid_loss": 0.6104, "step": 5737, "time": 82.78 }, { "epoch": 4.44, "learning_rate": "1.1245e-05", "loss": 0.6086, "slid_loss": 0.6104, "step": 5738, "time": 71.85 }, { "epoch": 4.44, "learning_rate": "1.1241e-05", "loss": 0.5917, "slid_loss": 0.61, "step": 5739, "time": 73.12 }, { "epoch": 4.44, "learning_rate": "1.1238e-05", "loss": 0.612, "slid_loss": 0.6103, "step": 5740, "time": 71.47 }, { "epoch": 4.44, "learning_rate": "1.1235e-05", "loss": 0.6152, "slid_loss": 0.6101, "step": 5741, "time": 72.23 }, { "epoch": 4.44, "learning_rate": "1.1231e-05", "loss": 0.6204, "slid_loss": 0.6097, "step": 5742, "time": 71.68 }, { "epoch": 4.44, "learning_rate": "1.1228e-05", "loss": 0.6352, "slid_loss": 0.6099, "step": 5743, "time": 71.39 }, { "epoch": 4.44, "learning_rate": "1.1225e-05", "loss": 0.615, "slid_loss": 0.6099, "step": 5744, "time": 72.18 }, { "epoch": 4.44, "learning_rate": "1.1221e-05", "loss": 0.6249, "slid_loss": 0.6105, "step": 5745, "time": 71.47 }, { "epoch": 4.45, "learning_rate": "1.1218e-05", "loss": 0.6128, "slid_loss": 0.6104, "step": 5746, "time": 72.19 }, { "epoch": 4.45, "learning_rate": "1.1214e-05", "loss": 0.6291, "slid_loss": 0.6108, "step": 5747, "time": 72.35 }, { "epoch": 4.45, "learning_rate": "1.1211e-05", "loss": 0.5887, "slid_loss": 0.6104, "step": 5748, "time": 71.4 }, { "epoch": 4.45, "learning_rate": "1.1208e-05", "loss": 0.6243, "slid_loss": 0.6106, "step": 5749, "time": 72.02 }, { "epoch": 4.45, "learning_rate": "1.1204e-05", "loss": 0.5994, "slid_loss": 0.6105, "step": 5750, "time": 70.7 }, { "epoch": 4.45, "learning_rate": "1.1201e-05", "loss": 0.605, "slid_loss": 0.6106, "step": 5751, "time": 71.64 }, { "epoch": 4.45, "learning_rate": "1.1198e-05", "loss": 0.6063, "slid_loss": 0.6107, "step": 5752, "time": 71.66 }, { "epoch": 4.45, "learning_rate": "1.1194e-05", "loss": 0.5774, "slid_loss": 0.6102, "step": 5753, "time": 71.31 }, { "epoch": 4.45, "learning_rate": "1.1191e-05", "loss": 0.651, "slid_loss": 0.6105, "step": 5754, "time": 72.26 }, { "epoch": 4.45, "learning_rate": "1.1188e-05", "loss": 0.6323, "slid_loss": 0.6109, "step": 5755, "time": 72.18 }, { "epoch": 4.45, "learning_rate": "1.1184e-05", "loss": 0.5801, "slid_loss": 0.6106, "step": 5756, "time": 70.74 }, { "epoch": 4.45, "learning_rate": "1.1181e-05", "loss": 0.6138, "slid_loss": 0.6109, "step": 5757, "time": 71.55 }, { "epoch": 4.45, "learning_rate": "1.1178e-05", "loss": 0.5967, "slid_loss": 0.6106, "step": 5758, "time": 72.55 }, { "epoch": 4.46, "learning_rate": "1.1174e-05", "loss": 0.5796, "slid_loss": 0.6105, "step": 5759, "time": 70.58 }, { "epoch": 4.46, "learning_rate": "1.1171e-05", "loss": 0.6283, "slid_loss": 0.6103, "step": 5760, "time": 70.76 }, { "epoch": 4.46, "learning_rate": "1.1168e-05", "loss": 0.5868, "slid_loss": 0.6098, "step": 5761, "time": 71.45 }, { "epoch": 4.46, "learning_rate": "1.1164e-05", "loss": 0.5798, "slid_loss": 0.6097, "step": 5762, "time": 72.62 }, { "epoch": 4.46, "learning_rate": "1.1161e-05", "loss": 0.603, "slid_loss": 0.6095, "step": 5763, "time": 72.24 }, { "epoch": 4.46, "learning_rate": "1.1158e-05", "loss": 0.6047, "slid_loss": 0.6096, "step": 5764, "time": 71.87 }, { "epoch": 4.46, "learning_rate": "1.1154e-05", "loss": 0.5911, "slid_loss": 0.6091, "step": 5765, "time": 71.18 }, { "epoch": 4.46, "learning_rate": "1.1151e-05", "loss": 0.5718, "slid_loss": 0.6085, "step": 5766, "time": 71.27 }, { "epoch": 4.46, "learning_rate": "1.1148e-05", "loss": 0.6026, "slid_loss": 0.6085, "step": 5767, "time": 70.62 }, { "epoch": 4.46, "learning_rate": "1.1145e-05", "loss": 0.6072, "slid_loss": 0.6085, "step": 5768, "time": 71.1 }, { "epoch": 4.46, "learning_rate": "1.1141e-05", "loss": 0.5939, "slid_loss": 0.6083, "step": 5769, "time": 71.5 }, { "epoch": 4.46, "learning_rate": "1.1138e-05", "loss": 0.5959, "slid_loss": 0.6085, "step": 5770, "time": 71.31 }, { "epoch": 4.46, "learning_rate": "1.1135e-05", "loss": 0.6288, "slid_loss": 0.6088, "step": 5771, "time": 72.07 }, { "epoch": 4.47, "learning_rate": "1.1132e-05", "loss": 0.5963, "slid_loss": 0.6086, "step": 5772, "time": 72.45 }, { "epoch": 4.47, "learning_rate": "1.1128e-05", "loss": 0.6155, "slid_loss": 0.609, "step": 5773, "time": 71.89 }, { "epoch": 4.47, "learning_rate": "1.1125e-05", "loss": 0.595, "slid_loss": 0.6091, "step": 5774, "time": 72.0 }, { "epoch": 4.47, "learning_rate": "1.1122e-05", "loss": 0.6329, "slid_loss": 0.6092, "step": 5775, "time": 71.8 }, { "epoch": 4.47, "learning_rate": "1.1119e-05", "loss": 0.5901, "slid_loss": 0.6091, "step": 5776, "time": 70.7 }, { "epoch": 4.47, "learning_rate": "1.1115e-05", "loss": 0.6184, "slid_loss": 0.6092, "step": 5777, "time": 71.18 }, { "epoch": 4.47, "learning_rate": "1.1112e-05", "loss": 0.595, "slid_loss": 0.6091, "step": 5778, "time": 71.85 }, { "epoch": 4.47, "learning_rate": "1.1109e-05", "loss": 0.655, "slid_loss": 0.6096, "step": 5779, "time": 71.32 }, { "epoch": 4.47, "learning_rate": "1.1106e-05", "loss": 0.5969, "slid_loss": 0.6095, "step": 5780, "time": 71.44 }, { "epoch": 4.47, "learning_rate": "1.1102e-05", "loss": 0.6149, "slid_loss": 0.6099, "step": 5781, "time": 71.65 }, { "epoch": 4.47, "learning_rate": "1.1099e-05", "loss": 0.6352, "slid_loss": 0.6102, "step": 5782, "time": 72.33 }, { "epoch": 4.47, "learning_rate": "1.1096e-05", "loss": 0.5998, "slid_loss": 0.6102, "step": 5783, "time": 71.75 }, { "epoch": 4.48, "learning_rate": "1.1093e-05", "loss": 0.5854, "slid_loss": 0.6098, "step": 5784, "time": 72.76 }, { "epoch": 4.48, "learning_rate": "1.1090e-05", "loss": 0.5902, "slid_loss": 0.6093, "step": 5785, "time": 72.6 }, { "epoch": 4.48, "learning_rate": "1.1086e-05", "loss": 0.6355, "slid_loss": 0.6099, "step": 5786, "time": 72.96 }, { "epoch": 4.48, "learning_rate": "1.1083e-05", "loss": 0.6142, "slid_loss": 0.6097, "step": 5787, "time": 71.36 }, { "epoch": 4.48, "learning_rate": "1.1080e-05", "loss": 0.643, "slid_loss": 0.6101, "step": 5788, "time": 72.83 }, { "epoch": 4.48, "learning_rate": "1.1077e-05", "loss": 0.6305, "slid_loss": 0.6106, "step": 5789, "time": 73.56 }, { "epoch": 4.48, "learning_rate": "1.1074e-05", "loss": 0.5975, "slid_loss": 0.6107, "step": 5790, "time": 70.51 }, { "epoch": 4.48, "learning_rate": "1.1070e-05", "loss": 0.608, "slid_loss": 0.6109, "step": 5791, "time": 71.64 }, { "epoch": 4.48, "learning_rate": "1.1067e-05", "loss": 0.6372, "slid_loss": 0.6113, "step": 5792, "time": 72.84 }, { "epoch": 4.48, "learning_rate": "1.1064e-05", "loss": 0.6188, "slid_loss": 0.6114, "step": 5793, "time": 70.92 }, { "epoch": 4.48, "learning_rate": "1.1061e-05", "loss": 0.5884, "slid_loss": 0.6114, "step": 5794, "time": 70.66 }, { "epoch": 4.48, "learning_rate": "1.1058e-05", "loss": 0.635, "slid_loss": 0.6113, "step": 5795, "time": 71.5 }, { "epoch": 4.48, "learning_rate": "1.1055e-05", "loss": 0.6025, "slid_loss": 0.611, "step": 5796, "time": 71.5 }, { "epoch": 4.49, "learning_rate": "1.1051e-05", "loss": 0.6103, "slid_loss": 0.6109, "step": 5797, "time": 73.0 }, { "epoch": 4.49, "learning_rate": "1.1048e-05", "loss": 0.5885, "slid_loss": 0.6107, "step": 5798, "time": 71.75 }, { "epoch": 4.49, "learning_rate": "1.1045e-05", "loss": 0.615, "slid_loss": 0.6108, "step": 5799, "time": 71.76 }, { "epoch": 4.49, "learning_rate": "1.1042e-05", "loss": 0.5767, "slid_loss": 0.6106, "step": 5800, "time": 71.57 }, { "epoch": 4.49, "learning_rate": "1.1039e-05", "loss": 0.6323, "slid_loss": 0.6108, "step": 5801, "time": 848.56 }, { "epoch": 4.49, "learning_rate": "1.1036e-05", "loss": 0.6067, "slid_loss": 0.611, "step": 5802, "time": 72.79 }, { "epoch": 4.49, "learning_rate": "1.1033e-05", "loss": 0.5994, "slid_loss": 0.6106, "step": 5803, "time": 71.37 }, { "epoch": 4.49, "learning_rate": "1.1030e-05", "loss": 0.6149, "slid_loss": 0.6105, "step": 5804, "time": 72.81 }, { "epoch": 4.49, "learning_rate": "1.1026e-05", "loss": 0.6241, "slid_loss": 0.6108, "step": 5805, "time": 72.45 }, { "epoch": 4.49, "learning_rate": "1.1023e-05", "loss": 0.5891, "slid_loss": 0.6107, "step": 5806, "time": 72.6 }, { "epoch": 4.49, "learning_rate": "1.1020e-05", "loss": 0.6225, "slid_loss": 0.6104, "step": 5807, "time": 72.68 }, { "epoch": 4.49, "learning_rate": "1.1017e-05", "loss": 0.5879, "slid_loss": 0.61, "step": 5808, "time": 71.33 }, { "epoch": 4.49, "learning_rate": "1.1014e-05", "loss": 0.5926, "slid_loss": 0.6097, "step": 5809, "time": 72.44 }, { "epoch": 4.5, "learning_rate": "1.1011e-05", "loss": 0.6374, "slid_loss": 0.61, "step": 5810, "time": 70.4 }, { "epoch": 4.5, "learning_rate": "1.1008e-05", "loss": 0.6048, "slid_loss": 0.6097, "step": 5811, "time": 71.84 }, { "epoch": 4.5, "learning_rate": "1.1005e-05", "loss": 0.5939, "slid_loss": 0.6094, "step": 5812, "time": 72.55 }, { "epoch": 4.5, "learning_rate": "1.1002e-05", "loss": 0.5862, "slid_loss": 0.6092, "step": 5813, "time": 73.61 }, { "epoch": 4.5, "learning_rate": "1.0999e-05", "loss": 0.6228, "slid_loss": 0.6093, "step": 5814, "time": 71.63 }, { "epoch": 4.5, "learning_rate": "1.0996e-05", "loss": 0.6018, "slid_loss": 0.6089, "step": 5815, "time": 72.48 }, { "epoch": 4.5, "learning_rate": "1.0993e-05", "loss": 0.5782, "slid_loss": 0.6087, "step": 5816, "time": 72.12 }, { "epoch": 4.5, "learning_rate": "1.0990e-05", "loss": 0.6115, "slid_loss": 0.6089, "step": 5817, "time": 71.94 }, { "epoch": 4.5, "learning_rate": "1.0986e-05", "loss": 0.6111, "slid_loss": 0.6092, "step": 5818, "time": 72.2 }, { "epoch": 4.5, "learning_rate": "1.0983e-05", "loss": 0.5902, "slid_loss": 0.6089, "step": 5819, "time": 72.38 }, { "epoch": 4.5, "learning_rate": "1.0980e-05", "loss": 0.5931, "slid_loss": 0.6086, "step": 5820, "time": 71.23 }, { "epoch": 4.5, "learning_rate": "1.0977e-05", "loss": 0.602, "slid_loss": 0.6082, "step": 5821, "time": 71.64 }, { "epoch": 4.5, "learning_rate": "1.0974e-05", "loss": 0.5905, "slid_loss": 0.6078, "step": 5822, "time": 71.66 }, { "epoch": 4.51, "learning_rate": "1.0971e-05", "loss": 0.5961, "slid_loss": 0.6075, "step": 5823, "time": 71.66 }, { "epoch": 4.51, "learning_rate": "1.0968e-05", "loss": 0.5829, "slid_loss": 0.6075, "step": 5824, "time": 71.97 }, { "epoch": 4.51, "learning_rate": "1.0965e-05", "loss": 0.6334, "slid_loss": 0.6079, "step": 5825, "time": 73.04 }, { "epoch": 4.51, "learning_rate": "1.0962e-05", "loss": 0.5782, "slid_loss": 0.6076, "step": 5826, "time": 72.86 }, { "epoch": 4.51, "learning_rate": "1.0959e-05", "loss": 0.6277, "slid_loss": 0.6075, "step": 5827, "time": 70.41 }, { "epoch": 4.51, "learning_rate": "1.0956e-05", "loss": 0.62, "slid_loss": 0.6076, "step": 5828, "time": 71.53 }, { "epoch": 4.51, "learning_rate": "1.0953e-05", "loss": 0.605, "slid_loss": 0.6072, "step": 5829, "time": 71.43 }, { "epoch": 4.51, "learning_rate": "1.0950e-05", "loss": 0.6443, "slid_loss": 0.6078, "step": 5830, "time": 70.2 }, { "epoch": 4.51, "learning_rate": "1.0947e-05", "loss": 0.5803, "slid_loss": 0.6075, "step": 5831, "time": 72.79 }, { "epoch": 4.51, "learning_rate": "1.0944e-05", "loss": 0.6017, "slid_loss": 0.6076, "step": 5832, "time": 71.14 }, { "epoch": 4.51, "learning_rate": "1.0941e-05", "loss": 0.6393, "slid_loss": 0.6081, "step": 5833, "time": 71.32 }, { "epoch": 4.51, "learning_rate": "1.0938e-05", "loss": 0.6109, "slid_loss": 0.6084, "step": 5834, "time": 71.97 }, { "epoch": 4.51, "learning_rate": "1.0935e-05", "loss": 0.6298, "slid_loss": 0.6083, "step": 5835, "time": 71.83 }, { "epoch": 4.52, "learning_rate": "1.0932e-05", "loss": 0.6027, "slid_loss": 0.6079, "step": 5836, "time": 72.4 }, { "epoch": 4.52, "learning_rate": "1.0929e-05", "loss": 0.5672, "slid_loss": 0.6075, "step": 5837, "time": 71.86 }, { "epoch": 4.52, "learning_rate": "1.0926e-05", "loss": 0.62, "slid_loss": 0.6076, "step": 5838, "time": 72.01 }, { "epoch": 4.52, "learning_rate": "1.0923e-05", "loss": 0.6116, "slid_loss": 0.6078, "step": 5839, "time": 71.59 }, { "epoch": 4.52, "learning_rate": "1.0921e-05", "loss": 0.6127, "slid_loss": 0.6078, "step": 5840, "time": 71.36 }, { "epoch": 4.52, "learning_rate": "1.0918e-05", "loss": 0.6108, "slid_loss": 0.6078, "step": 5841, "time": 71.46 }, { "epoch": 4.52, "learning_rate": "1.0915e-05", "loss": 0.6011, "slid_loss": 0.6076, "step": 5842, "time": 70.69 }, { "epoch": 4.52, "learning_rate": "1.0912e-05", "loss": 0.6099, "slid_loss": 0.6073, "step": 5843, "time": 70.7 }, { "epoch": 4.52, "learning_rate": "1.0909e-05", "loss": 0.5942, "slid_loss": 0.6071, "step": 5844, "time": 71.99 }, { "epoch": 4.52, "learning_rate": "1.0906e-05", "loss": 0.6195, "slid_loss": 0.607, "step": 5845, "time": 71.39 }, { "epoch": 4.52, "learning_rate": "1.0903e-05", "loss": 0.637, "slid_loss": 0.6073, "step": 5846, "time": 72.2 }, { "epoch": 4.52, "learning_rate": "1.0900e-05", "loss": 0.5946, "slid_loss": 0.6069, "step": 5847, "time": 71.9 }, { "epoch": 4.52, "learning_rate": "1.0897e-05", "loss": 0.6045, "slid_loss": 0.6071, "step": 5848, "time": 71.57 }, { "epoch": 4.53, "learning_rate": "1.0894e-05", "loss": 0.6389, "slid_loss": 0.6072, "step": 5849, "time": 71.38 }, { "epoch": 4.53, "learning_rate": "1.0891e-05", "loss": 0.6154, "slid_loss": 0.6074, "step": 5850, "time": 71.85 }, { "epoch": 4.53, "learning_rate": "1.0888e-05", "loss": 0.6412, "slid_loss": 0.6078, "step": 5851, "time": 72.11 }, { "epoch": 4.53, "learning_rate": "1.0886e-05", "loss": 0.5804, "slid_loss": 0.6075, "step": 5852, "time": 70.79 }, { "epoch": 4.53, "learning_rate": "1.0883e-05", "loss": 0.5885, "slid_loss": 0.6076, "step": 5853, "time": 72.43 }, { "epoch": 4.53, "learning_rate": "1.0880e-05", "loss": 0.6189, "slid_loss": 0.6073, "step": 5854, "time": 73.23 }, { "epoch": 4.53, "learning_rate": "1.0877e-05", "loss": 0.5892, "slid_loss": 0.6069, "step": 5855, "time": 70.82 }, { "epoch": 4.53, "learning_rate": "1.0874e-05", "loss": 0.5667, "slid_loss": 0.6067, "step": 5856, "time": 72.15 }, { "epoch": 4.53, "learning_rate": "1.0871e-05", "loss": 0.6292, "slid_loss": 0.6069, "step": 5857, "time": 72.1 }, { "epoch": 4.53, "learning_rate": "1.0868e-05", "loss": 0.6312, "slid_loss": 0.6072, "step": 5858, "time": 71.36 }, { "epoch": 4.53, "learning_rate": "1.0865e-05", "loss": 0.6504, "slid_loss": 0.6079, "step": 5859, "time": 72.96 }, { "epoch": 4.53, "learning_rate": "1.0863e-05", "loss": 0.6072, "slid_loss": 0.6077, "step": 5860, "time": 72.29 }, { "epoch": 4.53, "learning_rate": "1.0860e-05", "loss": 0.5766, "slid_loss": 0.6076, "step": 5861, "time": 71.43 }, { "epoch": 4.54, "learning_rate": "1.0857e-05", "loss": 0.6287, "slid_loss": 0.6081, "step": 5862, "time": 72.56 }, { "epoch": 4.54, "learning_rate": "1.0854e-05", "loss": 0.6164, "slid_loss": 0.6082, "step": 5863, "time": 83.51 }, { "epoch": 4.54, "learning_rate": "1.0851e-05", "loss": 0.6105, "slid_loss": 0.6083, "step": 5864, "time": 71.85 }, { "epoch": 4.54, "learning_rate": "1.0848e-05", "loss": 0.6305, "slid_loss": 0.6087, "step": 5865, "time": 71.92 }, { "epoch": 4.54, "learning_rate": "1.0845e-05", "loss": 0.6112, "slid_loss": 0.6091, "step": 5866, "time": 72.78 }, { "epoch": 4.54, "learning_rate": "1.0843e-05", "loss": 0.6336, "slid_loss": 0.6094, "step": 5867, "time": 90.7 }, { "epoch": 4.54, "learning_rate": "1.0840e-05", "loss": 0.6398, "slid_loss": 0.6097, "step": 5868, "time": 71.79 }, { "epoch": 4.54, "learning_rate": "1.0837e-05", "loss": 0.6172, "slid_loss": 0.61, "step": 5869, "time": 72.17 }, { "epoch": 4.54, "learning_rate": "1.0834e-05", "loss": 0.6378, "slid_loss": 0.6104, "step": 5870, "time": 85.13 }, { "epoch": 4.54, "learning_rate": "1.0831e-05", "loss": 0.6046, "slid_loss": 0.6101, "step": 5871, "time": 97.1 }, { "epoch": 4.54, "learning_rate": "1.0829e-05", "loss": 0.6056, "slid_loss": 0.6102, "step": 5872, "time": 82.94 }, { "epoch": 4.54, "learning_rate": "1.0826e-05", "loss": 0.5942, "slid_loss": 0.61, "step": 5873, "time": 98.09 }, { "epoch": 4.54, "learning_rate": "1.0823e-05", "loss": 0.6157, "slid_loss": 0.6102, "step": 5874, "time": 86.01 }, { "epoch": 4.55, "learning_rate": "1.0820e-05", "loss": 0.5777, "slid_loss": 0.6097, "step": 5875, "time": 84.36 }, { "epoch": 4.55, "learning_rate": "1.0817e-05", "loss": 0.6163, "slid_loss": 0.6099, "step": 5876, "time": 70.58 }, { "epoch": 4.55, "learning_rate": "1.0815e-05", "loss": 0.6359, "slid_loss": 0.6101, "step": 5877, "time": 122.58 }, { "epoch": 4.55, "learning_rate": "1.0812e-05", "loss": 0.6063, "slid_loss": 0.6102, "step": 5878, "time": 152.88 }, { "epoch": 4.55, "learning_rate": "1.0809e-05", "loss": 0.6151, "slid_loss": 0.6098, "step": 5879, "time": 136.58 }, { "epoch": 4.55, "learning_rate": "1.0806e-05", "loss": 0.5912, "slid_loss": 0.6098, "step": 5880, "time": 152.88 }, { "epoch": 4.55, "learning_rate": "1.0804e-05", "loss": 0.6242, "slid_loss": 0.6099, "step": 5881, "time": 133.1 }, { "epoch": 4.55, "learning_rate": "1.0801e-05", "loss": 0.5608, "slid_loss": 0.6091, "step": 5882, "time": 135.93 }, { "epoch": 4.55, "learning_rate": "1.0798e-05", "loss": 0.6063, "slid_loss": 0.6092, "step": 5883, "time": 121.2 }, { "epoch": 4.55, "learning_rate": "1.0795e-05", "loss": 0.5886, "slid_loss": 0.6092, "step": 5884, "time": 136.16 }, { "epoch": 4.55, "learning_rate": "1.0793e-05", "loss": 0.6145, "slid_loss": 0.6095, "step": 5885, "time": 123.75 }, { "epoch": 4.55, "learning_rate": "1.0790e-05", "loss": 0.6036, "slid_loss": 0.6091, "step": 5886, "time": 133.03 }, { "epoch": 4.55, "learning_rate": "1.0787e-05", "loss": 0.6081, "slid_loss": 0.6091, "step": 5887, "time": 119.82 }, { "epoch": 4.56, "learning_rate": "1.0784e-05", "loss": 0.5678, "slid_loss": 0.6083, "step": 5888, "time": 120.06 }, { "epoch": 4.56, "learning_rate": "1.0782e-05", "loss": 0.6027, "slid_loss": 0.608, "step": 5889, "time": 109.53 }, { "epoch": 4.56, "learning_rate": "1.0779e-05", "loss": 0.6253, "slid_loss": 0.6083, "step": 5890, "time": 108.33 }, { "epoch": 4.56, "learning_rate": "1.0776e-05", "loss": 0.6022, "slid_loss": 0.6083, "step": 5891, "time": 108.13 }, { "epoch": 4.56, "learning_rate": "1.0774e-05", "loss": 0.5867, "slid_loss": 0.6078, "step": 5892, "time": 86.69 }, { "epoch": 4.56, "learning_rate": "1.0771e-05", "loss": 0.6109, "slid_loss": 0.6077, "step": 5893, "time": 82.67 }, { "epoch": 4.56, "learning_rate": "1.0768e-05", "loss": 0.6569, "slid_loss": 0.6084, "step": 5894, "time": 71.98 }, { "epoch": 4.56, "learning_rate": "1.0765e-05", "loss": 0.6245, "slid_loss": 0.6083, "step": 5895, "time": 72.04 }, { "epoch": 4.56, "learning_rate": "1.0763e-05", "loss": 0.6371, "slid_loss": 0.6086, "step": 5896, "time": 71.7 }, { "epoch": 4.56, "learning_rate": "1.0760e-05", "loss": 0.578, "slid_loss": 0.6083, "step": 5897, "time": 83.73 }, { "epoch": 4.56, "learning_rate": "1.0757e-05", "loss": 0.6099, "slid_loss": 0.6085, "step": 5898, "time": 72.16 }, { "epoch": 4.56, "learning_rate": "1.0755e-05", "loss": 0.5935, "slid_loss": 0.6083, "step": 5899, "time": 70.36 }, { "epoch": 4.56, "learning_rate": "1.0752e-05", "loss": 0.5799, "slid_loss": 0.6083, "step": 5900, "time": 72.53 }, { "epoch": 4.57, "learning_rate": "1.0749e-05", "loss": 0.6252, "slid_loss": 0.6082, "step": 5901, "time": 70.91 }, { "epoch": 4.57, "learning_rate": "1.0747e-05", "loss": 0.6381, "slid_loss": 0.6086, "step": 5902, "time": 70.58 }, { "epoch": 4.57, "learning_rate": "1.0744e-05", "loss": 0.6108, "slid_loss": 0.6087, "step": 5903, "time": 71.86 }, { "epoch": 4.57, "learning_rate": "1.0741e-05", "loss": 0.6393, "slid_loss": 0.6089, "step": 5904, "time": 71.88 }, { "epoch": 4.57, "learning_rate": "1.0739e-05", "loss": 0.6111, "slid_loss": 0.6088, "step": 5905, "time": 71.21 }, { "epoch": 4.57, "learning_rate": "1.0736e-05", "loss": 0.6055, "slid_loss": 0.609, "step": 5906, "time": 71.18 }, { "epoch": 4.57, "learning_rate": "1.0733e-05", "loss": 0.5867, "slid_loss": 0.6086, "step": 5907, "time": 71.5 }, { "epoch": 4.57, "learning_rate": "1.0731e-05", "loss": 0.5894, "slid_loss": 0.6086, "step": 5908, "time": 71.22 }, { "epoch": 4.57, "learning_rate": "1.0728e-05", "loss": 0.593, "slid_loss": 0.6086, "step": 5909, "time": 72.12 }, { "epoch": 4.57, "learning_rate": "1.0726e-05", "loss": 0.641, "slid_loss": 0.6086, "step": 5910, "time": 71.31 }, { "epoch": 4.57, "learning_rate": "1.0723e-05", "loss": 0.6093, "slid_loss": 0.6087, "step": 5911, "time": 70.43 }, { "epoch": 4.57, "learning_rate": "1.0720e-05", "loss": 0.5999, "slid_loss": 0.6088, "step": 5912, "time": 72.14 }, { "epoch": 4.57, "learning_rate": "1.0718e-05", "loss": 0.6068, "slid_loss": 0.609, "step": 5913, "time": 71.66 }, { "epoch": 4.58, "learning_rate": "1.0715e-05", "loss": 0.6383, "slid_loss": 0.6091, "step": 5914, "time": 70.89 }, { "epoch": 4.58, "learning_rate": "1.0713e-05", "loss": 0.5713, "slid_loss": 0.6088, "step": 5915, "time": 72.01 }, { "epoch": 4.58, "learning_rate": "1.0710e-05", "loss": 0.6194, "slid_loss": 0.6092, "step": 5916, "time": 71.82 }, { "epoch": 4.58, "learning_rate": "1.0707e-05", "loss": 0.5947, "slid_loss": 0.6091, "step": 5917, "time": 71.04 }, { "epoch": 4.58, "learning_rate": "1.0705e-05", "loss": 0.6023, "slid_loss": 0.609, "step": 5918, "time": 72.44 }, { "epoch": 4.58, "learning_rate": "1.0702e-05", "loss": 0.5929, "slid_loss": 0.609, "step": 5919, "time": 71.29 }, { "epoch": 4.58, "learning_rate": "1.0700e-05", "loss": 0.6217, "slid_loss": 0.6093, "step": 5920, "time": 71.19 }, { "epoch": 4.58, "learning_rate": "1.0697e-05", "loss": 0.6472, "slid_loss": 0.6097, "step": 5921, "time": 72.07 }, { "epoch": 4.58, "learning_rate": "1.0694e-05", "loss": 0.6091, "slid_loss": 0.6099, "step": 5922, "time": 71.4 }, { "epoch": 4.58, "learning_rate": "1.0692e-05", "loss": 0.5832, "slid_loss": 0.6098, "step": 5923, "time": 72.99 }, { "epoch": 4.58, "learning_rate": "1.0689e-05", "loss": 0.6217, "slid_loss": 0.6102, "step": 5924, "time": 71.17 }, { "epoch": 4.58, "learning_rate": "1.0687e-05", "loss": 0.6011, "slid_loss": 0.6099, "step": 5925, "time": 71.91 }, { "epoch": 4.58, "learning_rate": "1.0684e-05", "loss": 0.5985, "slid_loss": 0.6101, "step": 5926, "time": 72.82 }, { "epoch": 4.59, "learning_rate": "1.0682e-05", "loss": 0.5837, "slid_loss": 0.6096, "step": 5927, "time": 71.06 }, { "epoch": 4.59, "learning_rate": "1.0679e-05", "loss": 0.6232, "slid_loss": 0.6096, "step": 5928, "time": 70.54 }, { "epoch": 4.59, "learning_rate": "1.0677e-05", "loss": 0.6171, "slid_loss": 0.6098, "step": 5929, "time": 72.31 }, { "epoch": 4.59, "learning_rate": "1.0674e-05", "loss": 0.6106, "slid_loss": 0.6094, "step": 5930, "time": 72.21 }, { "epoch": 4.59, "learning_rate": "1.0672e-05", "loss": 0.6049, "slid_loss": 0.6097, "step": 5931, "time": 72.51 }, { "epoch": 4.59, "learning_rate": "1.0669e-05", "loss": 0.6473, "slid_loss": 0.6101, "step": 5932, "time": 71.01 }, { "epoch": 4.59, "learning_rate": "1.0667e-05", "loss": 0.5694, "slid_loss": 0.6094, "step": 5933, "time": 73.65 }, { "epoch": 4.59, "learning_rate": "1.0664e-05", "loss": 0.6164, "slid_loss": 0.6095, "step": 5934, "time": 73.05 }, { "epoch": 4.59, "learning_rate": "1.0661e-05", "loss": 0.6491, "slid_loss": 0.6097, "step": 5935, "time": 70.94 }, { "epoch": 4.59, "learning_rate": "1.0659e-05", "loss": 0.593, "slid_loss": 0.6096, "step": 5936, "time": 72.19 }, { "epoch": 4.59, "learning_rate": "1.0656e-05", "loss": 0.6107, "slid_loss": 0.61, "step": 5937, "time": 70.88 }, { "epoch": 4.59, "learning_rate": "1.0654e-05", "loss": 0.6338, "slid_loss": 0.6102, "step": 5938, "time": 71.7 }, { "epoch": 4.59, "learning_rate": "1.0652e-05", "loss": 0.5933, "slid_loss": 0.61, "step": 5939, "time": 71.94 }, { "epoch": 4.6, "learning_rate": "1.0649e-05", "loss": 0.6334, "slid_loss": 0.6102, "step": 5940, "time": 71.18 }, { "epoch": 4.6, "learning_rate": "1.0647e-05", "loss": 0.5735, "slid_loss": 0.6098, "step": 5941, "time": 70.9 }, { "epoch": 4.6, "learning_rate": "1.0644e-05", "loss": 0.6185, "slid_loss": 0.61, "step": 5942, "time": 71.97 }, { "epoch": 4.6, "learning_rate": "1.0642e-05", "loss": 0.6289, "slid_loss": 0.6102, "step": 5943, "time": 71.61 }, { "epoch": 4.6, "learning_rate": "1.0639e-05", "loss": 0.5821, "slid_loss": 0.6101, "step": 5944, "time": 72.16 }, { "epoch": 4.6, "learning_rate": "1.0637e-05", "loss": 0.5519, "slid_loss": 0.6094, "step": 5945, "time": 70.0 }, { "epoch": 4.6, "learning_rate": "1.0634e-05", "loss": 0.6158, "slid_loss": 0.6092, "step": 5946, "time": 70.81 }, { "epoch": 4.6, "learning_rate": "1.0632e-05", "loss": 0.6425, "slid_loss": 0.6096, "step": 5947, "time": 70.95 }, { "epoch": 4.6, "learning_rate": "1.0629e-05", "loss": 0.6124, "slid_loss": 0.6097, "step": 5948, "time": 71.18 }, { "epoch": 4.6, "learning_rate": "1.0627e-05", "loss": 0.6199, "slid_loss": 0.6095, "step": 5949, "time": 71.68 }, { "epoch": 4.6, "learning_rate": "1.0624e-05", "loss": 0.6023, "slid_loss": 0.6094, "step": 5950, "time": 72.71 }, { "epoch": 4.6, "learning_rate": "1.0622e-05", "loss": 0.5823, "slid_loss": 0.6088, "step": 5951, "time": 72.76 }, { "epoch": 4.61, "learning_rate": "1.0620e-05", "loss": 0.6289, "slid_loss": 0.6093, "step": 5952, "time": 71.29 }, { "epoch": 4.61, "learning_rate": "1.0617e-05", "loss": 0.585, "slid_loss": 0.6093, "step": 5953, "time": 72.71 }, { "epoch": 4.61, "learning_rate": "1.0615e-05", "loss": 0.6174, "slid_loss": 0.6092, "step": 5954, "time": 71.88 }, { "epoch": 4.61, "learning_rate": "1.0612e-05", "loss": 0.6167, "slid_loss": 0.6095, "step": 5955, "time": 70.04 }, { "epoch": 4.61, "learning_rate": "1.0610e-05", "loss": 0.5975, "slid_loss": 0.6098, "step": 5956, "time": 72.55 }, { "epoch": 4.61, "learning_rate": "1.0607e-05", "loss": 0.6204, "slid_loss": 0.6097, "step": 5957, "time": 71.86 }, { "epoch": 4.61, "learning_rate": "1.0605e-05", "loss": 0.6108, "slid_loss": 0.6095, "step": 5958, "time": 72.34 }, { "epoch": 4.61, "learning_rate": "1.0603e-05", "loss": 0.5785, "slid_loss": 0.6088, "step": 5959, "time": 71.98 }, { "epoch": 4.61, "learning_rate": "1.0600e-05", "loss": 0.5919, "slid_loss": 0.6087, "step": 5960, "time": 70.55 }, { "epoch": 4.61, "learning_rate": "1.0598e-05", "loss": 0.5897, "slid_loss": 0.6088, "step": 5961, "time": 71.46 }, { "epoch": 4.61, "learning_rate": "1.0596e-05", "loss": 0.5857, "slid_loss": 0.6084, "step": 5962, "time": 72.24 }, { "epoch": 4.61, "learning_rate": "1.0593e-05", "loss": 0.5885, "slid_loss": 0.6081, "step": 5963, "time": 72.3 }, { "epoch": 4.61, "learning_rate": "1.0591e-05", "loss": 0.6193, "slid_loss": 0.6082, "step": 5964, "time": 71.68 }, { "epoch": 4.62, "learning_rate": "1.0588e-05", "loss": 0.6316, "slid_loss": 0.6082, "step": 5965, "time": 72.55 }, { "epoch": 4.62, "learning_rate": "1.0586e-05", "loss": 0.596, "slid_loss": 0.608, "step": 5966, "time": 71.59 }, { "epoch": 4.62, "learning_rate": "1.0584e-05", "loss": 0.6164, "slid_loss": 0.6079, "step": 5967, "time": 72.47 }, { "epoch": 4.62, "learning_rate": "1.0581e-05", "loss": 0.5849, "slid_loss": 0.6073, "step": 5968, "time": 71.84 }, { "epoch": 4.62, "learning_rate": "1.0579e-05", "loss": 0.6461, "slid_loss": 0.6076, "step": 5969, "time": 71.05 }, { "epoch": 4.62, "learning_rate": "1.0577e-05", "loss": 0.6163, "slid_loss": 0.6074, "step": 5970, "time": 69.94 }, { "epoch": 4.62, "learning_rate": "1.0574e-05", "loss": 0.6236, "slid_loss": 0.6076, "step": 5971, "time": 74.06 }, { "epoch": 4.62, "learning_rate": "1.0572e-05", "loss": 0.5823, "slid_loss": 0.6073, "step": 5972, "time": 72.35 }, { "epoch": 4.62, "learning_rate": "1.0570e-05", "loss": 0.6093, "slid_loss": 0.6075, "step": 5973, "time": 72.35 }, { "epoch": 4.62, "learning_rate": "1.0567e-05", "loss": 0.6102, "slid_loss": 0.6074, "step": 5974, "time": 71.74 }, { "epoch": 4.62, "learning_rate": "1.0565e-05", "loss": 0.616, "slid_loss": 0.6078, "step": 5975, "time": 71.03 }, { "epoch": 4.62, "learning_rate": "1.0563e-05", "loss": 0.6213, "slid_loss": 0.6079, "step": 5976, "time": 71.42 }, { "epoch": 4.62, "learning_rate": "1.0560e-05", "loss": 0.5717, "slid_loss": 0.6072, "step": 5977, "time": 69.94 }, { "epoch": 4.63, "learning_rate": "1.0558e-05", "loss": 0.5836, "slid_loss": 0.607, "step": 5978, "time": 73.09 }, { "epoch": 4.63, "learning_rate": "1.0556e-05", "loss": 0.583, "slid_loss": 0.6067, "step": 5979, "time": 71.81 }, { "epoch": 4.63, "learning_rate": "1.0553e-05", "loss": 0.6218, "slid_loss": 0.607, "step": 5980, "time": 72.56 }, { "epoch": 4.63, "learning_rate": "1.0551e-05", "loss": 0.5841, "slid_loss": 0.6066, "step": 5981, "time": 71.21 }, { "epoch": 4.63, "learning_rate": "1.0549e-05", "loss": 0.6021, "slid_loss": 0.607, "step": 5982, "time": 71.48 }, { "epoch": 4.63, "learning_rate": "1.0547e-05", "loss": 0.6178, "slid_loss": 0.6071, "step": 5983, "time": 72.72 }, { "epoch": 4.63, "learning_rate": "1.0544e-05", "loss": 0.6104, "slid_loss": 0.6073, "step": 5984, "time": 71.32 }, { "epoch": 4.63, "learning_rate": "1.0542e-05", "loss": 0.6258, "slid_loss": 0.6075, "step": 5985, "time": 71.49 }, { "epoch": 4.63, "learning_rate": "1.0540e-05", "loss": 0.6102, "slid_loss": 0.6075, "step": 5986, "time": 71.48 }, { "epoch": 4.63, "learning_rate": "1.0538e-05", "loss": 0.5894, "slid_loss": 0.6073, "step": 5987, "time": 71.36 }, { "epoch": 4.63, "learning_rate": "1.0535e-05", "loss": 0.603, "slid_loss": 0.6077, "step": 5988, "time": 71.47 }, { "epoch": 4.63, "learning_rate": "1.0533e-05", "loss": 0.5873, "slid_loss": 0.6075, "step": 5989, "time": 73.11 }, { "epoch": 4.63, "learning_rate": "1.0531e-05", "loss": 0.6034, "slid_loss": 0.6073, "step": 5990, "time": 70.77 }, { "epoch": 4.64, "learning_rate": "1.0528e-05", "loss": 0.583, "slid_loss": 0.6071, "step": 5991, "time": 72.75 }, { "epoch": 4.64, "learning_rate": "1.0526e-05", "loss": 0.6154, "slid_loss": 0.6074, "step": 5992, "time": 72.52 }, { "epoch": 4.64, "learning_rate": "1.0524e-05", "loss": 0.6329, "slid_loss": 0.6076, "step": 5993, "time": 72.15 }, { "epoch": 4.64, "learning_rate": "1.0522e-05", "loss": 0.6103, "slid_loss": 0.6072, "step": 5994, "time": 71.38 }, { "epoch": 4.64, "learning_rate": "1.0520e-05", "loss": 0.6271, "slid_loss": 0.6072, "step": 5995, "time": 72.97 }, { "epoch": 4.64, "learning_rate": "1.0517e-05", "loss": 0.5993, "slid_loss": 0.6068, "step": 5996, "time": 73.38 }, { "epoch": 4.64, "learning_rate": "1.0515e-05", "loss": 0.6116, "slid_loss": 0.6071, "step": 5997, "time": 72.13 }, { "epoch": 4.64, "learning_rate": "1.0513e-05", "loss": 0.6011, "slid_loss": 0.6071, "step": 5998, "time": 72.1 }, { "epoch": 4.64, "learning_rate": "1.0511e-05", "loss": 0.5925, "slid_loss": 0.607, "step": 5999, "time": 71.71 }, { "epoch": 4.64, "learning_rate": "1.0508e-05", "loss": 0.5736, "slid_loss": 0.607, "step": 6000, "time": 72.04 }, { "epoch": 4.64, "learning_rate": "1.0506e-05", "loss": 0.6162, "slid_loss": 0.6069, "step": 6001, "time": 848.42 }, { "epoch": 4.64, "learning_rate": "1.0504e-05", "loss": 0.5787, "slid_loss": 0.6063, "step": 6002, "time": 71.23 }, { "epoch": 4.64, "learning_rate": "1.0502e-05", "loss": 0.6145, "slid_loss": 0.6063, "step": 6003, "time": 72.12 }, { "epoch": 4.65, "learning_rate": "1.0500e-05", "loss": 0.601, "slid_loss": 0.606, "step": 6004, "time": 72.07 }, { "epoch": 4.65, "learning_rate": "1.0498e-05", "loss": 0.5877, "slid_loss": 0.6057, "step": 6005, "time": 71.91 }, { "epoch": 4.65, "learning_rate": "1.0495e-05", "loss": 0.6133, "slid_loss": 0.6058, "step": 6006, "time": 72.97 }, { "epoch": 4.65, "learning_rate": "1.0493e-05", "loss": 0.6135, "slid_loss": 0.6061, "step": 6007, "time": 71.46 }, { "epoch": 4.65, "learning_rate": "1.0491e-05", "loss": 0.5811, "slid_loss": 0.606, "step": 6008, "time": 71.55 }, { "epoch": 4.65, "learning_rate": "1.0489e-05", "loss": 0.6129, "slid_loss": 0.6062, "step": 6009, "time": 71.16 }, { "epoch": 4.65, "learning_rate": "1.0487e-05", "loss": 0.5913, "slid_loss": 0.6057, "step": 6010, "time": 72.78 }, { "epoch": 4.65, "learning_rate": "1.0485e-05", "loss": 0.5978, "slid_loss": 0.6056, "step": 6011, "time": 72.9 }, { "epoch": 4.65, "learning_rate": "1.0482e-05", "loss": 0.6272, "slid_loss": 0.6058, "step": 6012, "time": 70.29 }, { "epoch": 4.65, "learning_rate": "1.0480e-05", "loss": 0.581, "slid_loss": 0.6056, "step": 6013, "time": 72.15 }, { "epoch": 4.65, "learning_rate": "1.0478e-05", "loss": 0.5973, "slid_loss": 0.6052, "step": 6014, "time": 71.25 }, { "epoch": 4.65, "learning_rate": "1.0476e-05", "loss": 0.6251, "slid_loss": 0.6057, "step": 6015, "time": 72.09 }, { "epoch": 4.65, "learning_rate": "1.0474e-05", "loss": 0.6696, "slid_loss": 0.6062, "step": 6016, "time": 71.05 }, { "epoch": 4.66, "learning_rate": "1.0472e-05", "loss": 0.6001, "slid_loss": 0.6063, "step": 6017, "time": 71.73 }, { "epoch": 4.66, "learning_rate": "1.0470e-05", "loss": 0.6318, "slid_loss": 0.6066, "step": 6018, "time": 74.25 }, { "epoch": 4.66, "learning_rate": "1.0468e-05", "loss": 0.5819, "slid_loss": 0.6065, "step": 6019, "time": 72.54 }, { "epoch": 4.66, "learning_rate": "1.0465e-05", "loss": 0.5524, "slid_loss": 0.6058, "step": 6020, "time": 71.16 }, { "epoch": 4.66, "learning_rate": "1.0463e-05", "loss": 0.6015, "slid_loss": 0.6053, "step": 6021, "time": 71.67 }, { "epoch": 4.66, "learning_rate": "1.0461e-05", "loss": 0.5889, "slid_loss": 0.6051, "step": 6022, "time": 83.24 }, { "epoch": 4.66, "learning_rate": "1.0459e-05", "loss": 0.6021, "slid_loss": 0.6053, "step": 6023, "time": 71.2 }, { "epoch": 4.66, "learning_rate": "1.0457e-05", "loss": 0.5921, "slid_loss": 0.605, "step": 6024, "time": 72.87 }, { "epoch": 4.66, "learning_rate": "1.0455e-05", "loss": 0.5822, "slid_loss": 0.6048, "step": 6025, "time": 71.98 }, { "epoch": 4.66, "learning_rate": "1.0453e-05", "loss": 0.613, "slid_loss": 0.6049, "step": 6026, "time": 75.59 }, { "epoch": 4.66, "learning_rate": "1.0451e-05", "loss": 0.6038, "slid_loss": 0.6051, "step": 6027, "time": 71.94 }, { "epoch": 4.66, "learning_rate": "1.0449e-05", "loss": 0.5859, "slid_loss": 0.6048, "step": 6028, "time": 73.4 }, { "epoch": 4.66, "learning_rate": "1.0447e-05", "loss": 0.6044, "slid_loss": 0.6047, "step": 6029, "time": 71.57 }, { "epoch": 4.67, "learning_rate": "1.0445e-05", "loss": 0.6343, "slid_loss": 0.6049, "step": 6030, "time": 111.2 }, { "epoch": 4.67, "learning_rate": "1.0443e-05", "loss": 0.5932, "slid_loss": 0.6048, "step": 6031, "time": 96.82 }, { "epoch": 4.67, "learning_rate": "1.0440e-05", "loss": 0.6364, "slid_loss": 0.6047, "step": 6032, "time": 96.25 }, { "epoch": 4.67, "learning_rate": "1.0438e-05", "loss": 0.6006, "slid_loss": 0.605, "step": 6033, "time": 71.65 }, { "epoch": 4.67, "learning_rate": "1.0436e-05", "loss": 0.5818, "slid_loss": 0.6046, "step": 6034, "time": 83.98 }, { "epoch": 4.67, "learning_rate": "1.0434e-05", "loss": 0.6228, "slid_loss": 0.6044, "step": 6035, "time": 96.24 }, { "epoch": 4.67, "learning_rate": "1.0432e-05", "loss": 0.5786, "slid_loss": 0.6042, "step": 6036, "time": 146.17 }, { "epoch": 4.67, "learning_rate": "1.0430e-05", "loss": 0.5888, "slid_loss": 0.604, "step": 6037, "time": 126.52 }, { "epoch": 4.67, "learning_rate": "1.0428e-05", "loss": 0.6279, "slid_loss": 0.6039, "step": 6038, "time": 129.73 }, { "epoch": 4.67, "learning_rate": "1.0426e-05", "loss": 0.6126, "slid_loss": 0.6041, "step": 6039, "time": 120.76 }, { "epoch": 4.67, "learning_rate": "1.0424e-05", "loss": 0.6021, "slid_loss": 0.6038, "step": 6040, "time": 159.7 }, { "epoch": 4.67, "learning_rate": "1.0422e-05", "loss": 0.5737, "slid_loss": 0.6038, "step": 6041, "time": 148.34 }, { "epoch": 4.67, "learning_rate": "1.0420e-05", "loss": 0.59, "slid_loss": 0.6035, "step": 6042, "time": 107.86 }, { "epoch": 4.68, "learning_rate": "1.0418e-05", "loss": 0.5888, "slid_loss": 0.6031, "step": 6043, "time": 131.63 }, { "epoch": 4.68, "learning_rate": "1.0416e-05", "loss": 0.6382, "slid_loss": 0.6037, "step": 6044, "time": 122.01 }, { "epoch": 4.68, "learning_rate": "1.0414e-05", "loss": 0.5787, "slid_loss": 0.604, "step": 6045, "time": 131.42 }, { "epoch": 4.68, "learning_rate": "1.0412e-05", "loss": 0.6061, "slid_loss": 0.6039, "step": 6046, "time": 122.89 }, { "epoch": 4.68, "learning_rate": "1.0410e-05", "loss": 0.6127, "slid_loss": 0.6036, "step": 6047, "time": 106.45 }, { "epoch": 4.68, "learning_rate": "1.0408e-05", "loss": 0.6131, "slid_loss": 0.6036, "step": 6048, "time": 124.56 }, { "epoch": 4.68, "learning_rate": "1.0406e-05", "loss": 0.5964, "slid_loss": 0.6033, "step": 6049, "time": 96.1 }, { "epoch": 4.68, "learning_rate": "1.0404e-05", "loss": 0.6082, "slid_loss": 0.6034, "step": 6050, "time": 97.31 }, { "epoch": 4.68, "learning_rate": "1.0402e-05", "loss": 0.5708, "slid_loss": 0.6033, "step": 6051, "time": 72.37 }, { "epoch": 4.68, "learning_rate": "1.0400e-05", "loss": 0.6295, "slid_loss": 0.6033, "step": 6052, "time": 85.03 }, { "epoch": 4.68, "learning_rate": "1.0398e-05", "loss": 0.6058, "slid_loss": 0.6035, "step": 6053, "time": 72.66 }, { "epoch": 4.68, "learning_rate": "1.0396e-05", "loss": 0.6023, "slid_loss": 0.6034, "step": 6054, "time": 72.64 }, { "epoch": 4.68, "learning_rate": "1.0395e-05", "loss": 0.6265, "slid_loss": 0.6034, "step": 6055, "time": 72.45 }, { "epoch": 4.69, "learning_rate": "1.0393e-05", "loss": 0.5684, "slid_loss": 0.6032, "step": 6056, "time": 72.34 }, { "epoch": 4.69, "learning_rate": "1.0391e-05", "loss": 0.5596, "slid_loss": 0.6026, "step": 6057, "time": 83.85 }, { "epoch": 4.69, "learning_rate": "1.0389e-05", "loss": 0.6147, "slid_loss": 0.6026, "step": 6058, "time": 72.93 }, { "epoch": 4.69, "learning_rate": "1.0387e-05", "loss": 0.5734, "slid_loss": 0.6025, "step": 6059, "time": 71.77 }, { "epoch": 4.69, "learning_rate": "1.0385e-05", "loss": 0.5979, "slid_loss": 0.6026, "step": 6060, "time": 71.42 }, { "epoch": 4.69, "learning_rate": "1.0383e-05", "loss": 0.597, "slid_loss": 0.6027, "step": 6061, "time": 71.64 }, { "epoch": 4.69, "learning_rate": "1.0381e-05", "loss": 0.619, "slid_loss": 0.603, "step": 6062, "time": 71.28 }, { "epoch": 4.69, "learning_rate": "1.0379e-05", "loss": 0.5848, "slid_loss": 0.603, "step": 6063, "time": 71.86 }, { "epoch": 4.69, "learning_rate": "1.0377e-05", "loss": 0.583, "slid_loss": 0.6026, "step": 6064, "time": 71.24 }, { "epoch": 4.69, "learning_rate": "1.0375e-05", "loss": 0.5868, "slid_loss": 0.6022, "step": 6065, "time": 72.27 }, { "epoch": 4.69, "learning_rate": "1.0373e-05", "loss": 0.6398, "slid_loss": 0.6026, "step": 6066, "time": 71.61 }, { "epoch": 4.69, "learning_rate": "1.0372e-05", "loss": 0.6002, "slid_loss": 0.6024, "step": 6067, "time": 71.64 }, { "epoch": 4.69, "learning_rate": "1.0370e-05", "loss": 0.6217, "slid_loss": 0.6028, "step": 6068, "time": 70.34 }, { "epoch": 4.7, "learning_rate": "1.0368e-05", "loss": 0.6179, "slid_loss": 0.6025, "step": 6069, "time": 71.56 }, { "epoch": 4.7, "learning_rate": "1.0366e-05", "loss": 0.6465, "slid_loss": 0.6028, "step": 6070, "time": 72.4 }, { "epoch": 4.7, "learning_rate": "1.0364e-05", "loss": 0.5772, "slid_loss": 0.6024, "step": 6071, "time": 71.17 }, { "epoch": 4.7, "learning_rate": "1.0362e-05", "loss": 0.6034, "slid_loss": 0.6026, "step": 6072, "time": 71.4 }, { "epoch": 4.7, "learning_rate": "1.0360e-05", "loss": 0.617, "slid_loss": 0.6026, "step": 6073, "time": 72.68 }, { "epoch": 4.7, "learning_rate": "1.0359e-05", "loss": 0.5829, "slid_loss": 0.6024, "step": 6074, "time": 71.81 }, { "epoch": 4.7, "learning_rate": "1.0357e-05", "loss": 0.6024, "slid_loss": 0.6022, "step": 6075, "time": 71.84 }, { "epoch": 4.7, "learning_rate": "1.0355e-05", "loss": 0.5647, "slid_loss": 0.6017, "step": 6076, "time": 71.68 }, { "epoch": 4.7, "learning_rate": "1.0353e-05", "loss": 0.6115, "slid_loss": 0.6021, "step": 6077, "time": 71.66 }, { "epoch": 4.7, "learning_rate": "1.0351e-05", "loss": 0.5989, "slid_loss": 0.6022, "step": 6078, "time": 72.81 }, { "epoch": 4.7, "learning_rate": "1.0349e-05", "loss": 0.638, "slid_loss": 0.6028, "step": 6079, "time": 70.97 }, { "epoch": 4.7, "learning_rate": "1.0347e-05", "loss": 0.6334, "slid_loss": 0.6029, "step": 6080, "time": 72.32 }, { "epoch": 4.7, "learning_rate": "1.0346e-05", "loss": 0.6228, "slid_loss": 0.6033, "step": 6081, "time": 70.85 }, { "epoch": 4.71, "learning_rate": "1.0344e-05", "loss": 0.566, "slid_loss": 0.6029, "step": 6082, "time": 71.91 }, { "epoch": 4.71, "learning_rate": "1.0342e-05", "loss": 0.5777, "slid_loss": 0.6025, "step": 6083, "time": 71.8 }, { "epoch": 4.71, "learning_rate": "1.0340e-05", "loss": 0.6401, "slid_loss": 0.6028, "step": 6084, "time": 70.74 }, { "epoch": 4.71, "learning_rate": "1.0338e-05", "loss": 0.5783, "slid_loss": 0.6023, "step": 6085, "time": 72.47 }, { "epoch": 4.71, "learning_rate": "1.0337e-05", "loss": 0.5698, "slid_loss": 0.6019, "step": 6086, "time": 71.99 }, { "epoch": 4.71, "learning_rate": "1.0335e-05", "loss": 0.5979, "slid_loss": 0.602, "step": 6087, "time": 72.69 }, { "epoch": 4.71, "learning_rate": "1.0333e-05", "loss": 0.5713, "slid_loss": 0.6017, "step": 6088, "time": 73.01 }, { "epoch": 4.71, "learning_rate": "1.0331e-05", "loss": 0.6258, "slid_loss": 0.6021, "step": 6089, "time": 71.32 }, { "epoch": 4.71, "learning_rate": "1.0329e-05", "loss": 0.6428, "slid_loss": 0.6025, "step": 6090, "time": 71.29 }, { "epoch": 4.71, "learning_rate": "1.0328e-05", "loss": 0.5991, "slid_loss": 0.6026, "step": 6091, "time": 71.31 }, { "epoch": 4.71, "learning_rate": "1.0326e-05", "loss": 0.5767, "slid_loss": 0.6022, "step": 6092, "time": 71.76 }, { "epoch": 4.71, "learning_rate": "1.0324e-05", "loss": 0.579, "slid_loss": 0.6017, "step": 6093, "time": 72.5 }, { "epoch": 4.71, "learning_rate": "1.0322e-05", "loss": 0.6048, "slid_loss": 0.6017, "step": 6094, "time": 69.79 }, { "epoch": 4.72, "learning_rate": "1.0321e-05", "loss": 0.6096, "slid_loss": 0.6015, "step": 6095, "time": 72.61 }, { "epoch": 4.72, "learning_rate": "1.0319e-05", "loss": 0.5881, "slid_loss": 0.6014, "step": 6096, "time": 71.53 }, { "epoch": 4.72, "learning_rate": "1.0317e-05", "loss": 0.5965, "slid_loss": 0.6012, "step": 6097, "time": 72.69 }, { "epoch": 4.72, "learning_rate": "1.0315e-05", "loss": 0.6086, "slid_loss": 0.6013, "step": 6098, "time": 71.28 }, { "epoch": 4.72, "learning_rate": "1.0314e-05", "loss": 0.5542, "slid_loss": 0.6009, "step": 6099, "time": 71.28 }, { "epoch": 4.72, "learning_rate": "1.0312e-05", "loss": 0.5936, "slid_loss": 0.6011, "step": 6100, "time": 71.42 }, { "epoch": 4.72, "learning_rate": "1.0310e-05", "loss": 0.5931, "slid_loss": 0.6009, "step": 6101, "time": 70.69 }, { "epoch": 4.72, "learning_rate": "1.0309e-05", "loss": 0.5906, "slid_loss": 0.601, "step": 6102, "time": 71.35 }, { "epoch": 4.72, "learning_rate": "1.0307e-05", "loss": 0.6163, "slid_loss": 0.601, "step": 6103, "time": 71.59 }, { "epoch": 4.72, "learning_rate": "1.0305e-05", "loss": 0.5908, "slid_loss": 0.6009, "step": 6104, "time": 71.34 }, { "epoch": 4.72, "learning_rate": "1.0303e-05", "loss": 0.6185, "slid_loss": 0.6012, "step": 6105, "time": 72.19 }, { "epoch": 4.72, "learning_rate": "1.0302e-05", "loss": 0.5685, "slid_loss": 0.6008, "step": 6106, "time": 72.7 }, { "epoch": 4.72, "learning_rate": "1.0300e-05", "loss": 0.6002, "slid_loss": 0.6006, "step": 6107, "time": 72.44 }, { "epoch": 4.73, "learning_rate": "1.0298e-05", "loss": 0.6009, "slid_loss": 0.6008, "step": 6108, "time": 71.49 }, { "epoch": 4.73, "learning_rate": "1.0297e-05", "loss": 0.5756, "slid_loss": 0.6005, "step": 6109, "time": 70.73 }, { "epoch": 4.73, "learning_rate": "1.0295e-05", "loss": 0.607, "slid_loss": 0.6006, "step": 6110, "time": 70.35 }, { "epoch": 4.73, "learning_rate": "1.0293e-05", "loss": 0.5902, "slid_loss": 0.6005, "step": 6111, "time": 71.38 }, { "epoch": 4.73, "learning_rate": "1.0292e-05", "loss": 0.6101, "slid_loss": 0.6004, "step": 6112, "time": 71.82 }, { "epoch": 4.73, "learning_rate": "1.0290e-05", "loss": 0.6002, "slid_loss": 0.6006, "step": 6113, "time": 73.0 }, { "epoch": 4.73, "learning_rate": "1.0288e-05", "loss": 0.6286, "slid_loss": 0.6009, "step": 6114, "time": 70.65 }, { "epoch": 4.73, "learning_rate": "1.0287e-05", "loss": 0.6188, "slid_loss": 0.6008, "step": 6115, "time": 72.17 }, { "epoch": 4.73, "learning_rate": "1.0285e-05", "loss": 0.6242, "slid_loss": 0.6004, "step": 6116, "time": 71.77 }, { "epoch": 4.73, "learning_rate": "1.0283e-05", "loss": 0.6253, "slid_loss": 0.6006, "step": 6117, "time": 71.61 }, { "epoch": 4.73, "learning_rate": "1.0282e-05", "loss": 0.5648, "slid_loss": 0.5999, "step": 6118, "time": 73.44 }, { "epoch": 4.73, "learning_rate": "1.0280e-05", "loss": 0.6102, "slid_loss": 0.6002, "step": 6119, "time": 72.06 }, { "epoch": 4.74, "learning_rate": "1.0278e-05", "loss": 0.6052, "slid_loss": 0.6008, "step": 6120, "time": 72.26 }, { "epoch": 4.74, "learning_rate": "1.0277e-05", "loss": 0.5998, "slid_loss": 0.6007, "step": 6121, "time": 71.12 }, { "epoch": 4.74, "learning_rate": "1.0275e-05", "loss": 0.615, "slid_loss": 0.601, "step": 6122, "time": 71.27 }, { "epoch": 4.74, "learning_rate": "1.0273e-05", "loss": 0.5963, "slid_loss": 0.6009, "step": 6123, "time": 71.06 }, { "epoch": 4.74, "learning_rate": "1.0272e-05", "loss": 0.6229, "slid_loss": 0.6013, "step": 6124, "time": 70.92 }, { "epoch": 4.74, "learning_rate": "1.0270e-05", "loss": 0.6007, "slid_loss": 0.6014, "step": 6125, "time": 71.46 }, { "epoch": 4.74, "learning_rate": "1.0269e-05", "loss": 0.5885, "slid_loss": 0.6012, "step": 6126, "time": 72.03 }, { "epoch": 4.74, "learning_rate": "1.0267e-05", "loss": 0.5997, "slid_loss": 0.6012, "step": 6127, "time": 71.84 }, { "epoch": 4.74, "learning_rate": "1.0265e-05", "loss": 0.6049, "slid_loss": 0.6013, "step": 6128, "time": 72.22 }, { "epoch": 4.74, "learning_rate": "1.0264e-05", "loss": 0.6455, "slid_loss": 0.6018, "step": 6129, "time": 71.83 }, { "epoch": 4.74, "learning_rate": "1.0262e-05", "loss": 0.6254, "slid_loss": 0.6017, "step": 6130, "time": 72.79 }, { "epoch": 4.74, "learning_rate": "1.0261e-05", "loss": 0.6113, "slid_loss": 0.6018, "step": 6131, "time": 71.99 }, { "epoch": 4.74, "learning_rate": "1.0259e-05", "loss": 0.6185, "slid_loss": 0.6017, "step": 6132, "time": 70.66 }, { "epoch": 4.75, "learning_rate": "1.0258e-05", "loss": 0.5781, "slid_loss": 0.6014, "step": 6133, "time": 71.33 }, { "epoch": 4.75, "learning_rate": "1.0256e-05", "loss": 0.6335, "slid_loss": 0.602, "step": 6134, "time": 72.04 }, { "epoch": 4.75, "learning_rate": "1.0254e-05", "loss": 0.5916, "slid_loss": 0.6016, "step": 6135, "time": 71.3 }, { "epoch": 4.75, "learning_rate": "1.0253e-05", "loss": 0.6242, "slid_loss": 0.6021, "step": 6136, "time": 70.63 }, { "epoch": 4.75, "learning_rate": "1.0251e-05", "loss": 0.5983, "slid_loss": 0.6022, "step": 6137, "time": 71.22 }, { "epoch": 4.75, "learning_rate": "1.0250e-05", "loss": 0.6329, "slid_loss": 0.6022, "step": 6138, "time": 72.01 }, { "epoch": 4.75, "learning_rate": "1.0248e-05", "loss": 0.6378, "slid_loss": 0.6025, "step": 6139, "time": 72.48 }, { "epoch": 4.75, "learning_rate": "1.0247e-05", "loss": 0.5753, "slid_loss": 0.6022, "step": 6140, "time": 72.83 }, { "epoch": 4.75, "learning_rate": "1.0245e-05", "loss": 0.6262, "slid_loss": 0.6028, "step": 6141, "time": 72.05 }, { "epoch": 4.75, "learning_rate": "1.0244e-05", "loss": 0.5631, "slid_loss": 0.6025, "step": 6142, "time": 71.72 }, { "epoch": 4.75, "learning_rate": "1.0242e-05", "loss": 0.5878, "slid_loss": 0.6025, "step": 6143, "time": 71.52 }, { "epoch": 4.75, "learning_rate": "1.0241e-05", "loss": 0.5892, "slid_loss": 0.602, "step": 6144, "time": 71.76 }, { "epoch": 4.75, "learning_rate": "1.0239e-05", "loss": 0.6061, "slid_loss": 0.6023, "step": 6145, "time": 72.27 }, { "epoch": 4.76, "learning_rate": "1.0237e-05", "loss": 0.5855, "slid_loss": 0.6021, "step": 6146, "time": 73.03 }, { "epoch": 4.76, "learning_rate": "1.0236e-05", "loss": 0.5883, "slid_loss": 0.6018, "step": 6147, "time": 71.21 }, { "epoch": 4.76, "learning_rate": "1.0234e-05", "loss": 0.5941, "slid_loss": 0.6016, "step": 6148, "time": 70.73 }, { "epoch": 4.76, "learning_rate": "1.0233e-05", "loss": 0.5912, "slid_loss": 0.6016, "step": 6149, "time": 70.62 }, { "epoch": 4.76, "learning_rate": "1.0231e-05", "loss": 0.5894, "slid_loss": 0.6014, "step": 6150, "time": 71.76 }, { "epoch": 4.76, "learning_rate": "1.0230e-05", "loss": 0.626, "slid_loss": 0.6019, "step": 6151, "time": 71.51 }, { "epoch": 4.76, "learning_rate": "1.0229e-05", "loss": 0.5937, "slid_loss": 0.6016, "step": 6152, "time": 71.87 }, { "epoch": 4.76, "learning_rate": "1.0227e-05", "loss": 0.5935, "slid_loss": 0.6015, "step": 6153, "time": 71.99 }, { "epoch": 4.76, "learning_rate": "1.0226e-05", "loss": 0.5719, "slid_loss": 0.6011, "step": 6154, "time": 70.97 }, { "epoch": 4.76, "learning_rate": "1.0224e-05", "loss": 0.6044, "slid_loss": 0.6009, "step": 6155, "time": 71.26 }, { "epoch": 4.76, "learning_rate": "1.0223e-05", "loss": 0.6028, "slid_loss": 0.6013, "step": 6156, "time": 70.92 }, { "epoch": 4.76, "learning_rate": "1.0221e-05", "loss": 0.6086, "slid_loss": 0.6018, "step": 6157, "time": 71.97 }, { "epoch": 4.76, "learning_rate": "1.0220e-05", "loss": 0.6049, "slid_loss": 0.6017, "step": 6158, "time": 72.96 }, { "epoch": 4.77, "learning_rate": "1.0218e-05", "loss": 0.6047, "slid_loss": 0.602, "step": 6159, "time": 72.01 }, { "epoch": 4.77, "learning_rate": "1.0217e-05", "loss": 0.5809, "slid_loss": 0.6018, "step": 6160, "time": 70.9 }, { "epoch": 4.77, "learning_rate": "1.0215e-05", "loss": 0.5958, "slid_loss": 0.6018, "step": 6161, "time": 72.42 }, { "epoch": 4.77, "learning_rate": "1.0214e-05", "loss": 0.6001, "slid_loss": 0.6016, "step": 6162, "time": 72.14 }, { "epoch": 4.77, "learning_rate": "1.0213e-05", "loss": 0.6281, "slid_loss": 0.602, "step": 6163, "time": 73.07 }, { "epoch": 4.77, "learning_rate": "1.0211e-05", "loss": 0.6267, "slid_loss": 0.6025, "step": 6164, "time": 72.23 }, { "epoch": 4.77, "learning_rate": "1.0210e-05", "loss": 0.6146, "slid_loss": 0.6028, "step": 6165, "time": 72.3 }, { "epoch": 4.77, "learning_rate": "1.0208e-05", "loss": 0.6206, "slid_loss": 0.6026, "step": 6166, "time": 70.74 }, { "epoch": 4.77, "learning_rate": "1.0207e-05", "loss": 0.6035, "slid_loss": 0.6026, "step": 6167, "time": 71.68 }, { "epoch": 4.77, "learning_rate": "1.0205e-05", "loss": 0.596, "slid_loss": 0.6023, "step": 6168, "time": 73.6 }, { "epoch": 4.77, "learning_rate": "1.0204e-05", "loss": 0.5934, "slid_loss": 0.6021, "step": 6169, "time": 72.28 }, { "epoch": 4.77, "learning_rate": "1.0203e-05", "loss": 0.5986, "slid_loss": 0.6016, "step": 6170, "time": 71.66 }, { "epoch": 4.77, "learning_rate": "1.0201e-05", "loss": 0.599, "slid_loss": 0.6018, "step": 6171, "time": 71.11 }, { "epoch": 4.78, "learning_rate": "1.0200e-05", "loss": 0.5802, "slid_loss": 0.6016, "step": 6172, "time": 74.55 }, { "epoch": 4.78, "learning_rate": "1.0198e-05", "loss": 0.5997, "slid_loss": 0.6014, "step": 6173, "time": 71.43 }, { "epoch": 4.78, "learning_rate": "1.0197e-05", "loss": 0.6064, "slid_loss": 0.6017, "step": 6174, "time": 71.32 }, { "epoch": 4.78, "learning_rate": "1.0196e-05", "loss": 0.5885, "slid_loss": 0.6015, "step": 6175, "time": 70.92 }, { "epoch": 4.78, "learning_rate": "1.0194e-05", "loss": 0.5731, "slid_loss": 0.6016, "step": 6176, "time": 72.6 }, { "epoch": 4.78, "learning_rate": "1.0193e-05", "loss": 0.6127, "slid_loss": 0.6016, "step": 6177, "time": 74.04 }, { "epoch": 4.78, "learning_rate": "1.0192e-05", "loss": 0.5882, "slid_loss": 0.6015, "step": 6178, "time": 71.74 }, { "epoch": 4.78, "learning_rate": "1.0190e-05", "loss": 0.6093, "slid_loss": 0.6012, "step": 6179, "time": 71.7 }, { "epoch": 4.78, "learning_rate": "1.0189e-05", "loss": 0.6098, "slid_loss": 0.601, "step": 6180, "time": 70.71 }, { "epoch": 4.78, "learning_rate": "1.0188e-05", "loss": 0.605, "slid_loss": 0.6008, "step": 6181, "time": 88.0 }, { "epoch": 4.78, "learning_rate": "1.0186e-05", "loss": 0.5955, "slid_loss": 0.6011, "step": 6182, "time": 72.43 }, { "epoch": 4.78, "learning_rate": "1.0185e-05", "loss": 0.5706, "slid_loss": 0.601, "step": 6183, "time": 73.54 }, { "epoch": 4.78, "learning_rate": "1.0184e-05", "loss": 0.6073, "slid_loss": 0.6007, "step": 6184, "time": 71.39 }, { "epoch": 4.79, "learning_rate": "1.0182e-05", "loss": 0.627, "slid_loss": 0.6012, "step": 6185, "time": 71.54 }, { "epoch": 4.79, "learning_rate": "1.0181e-05", "loss": 0.6433, "slid_loss": 0.6019, "step": 6186, "time": 73.79 }, { "epoch": 4.79, "learning_rate": "1.0180e-05", "loss": 0.5878, "slid_loss": 0.6018, "step": 6187, "time": 71.39 }, { "epoch": 4.79, "learning_rate": "1.0178e-05", "loss": 0.5967, "slid_loss": 0.6021, "step": 6188, "time": 70.5 }, { "epoch": 4.79, "learning_rate": "1.0177e-05", "loss": 0.6122, "slid_loss": 0.6019, "step": 6189, "time": 83.26 }, { "epoch": 4.79, "learning_rate": "1.0176e-05", "loss": 0.6265, "slid_loss": 0.6018, "step": 6190, "time": 96.16 }, { "epoch": 4.79, "learning_rate": "1.0174e-05", "loss": 0.6025, "slid_loss": 0.6018, "step": 6191, "time": 84.13 }, { "epoch": 4.79, "learning_rate": "1.0173e-05", "loss": 0.5736, "slid_loss": 0.6018, "step": 6192, "time": 122.1 }, { "epoch": 4.79, "learning_rate": "1.0172e-05", "loss": 0.6411, "slid_loss": 0.6024, "step": 6193, "time": 95.31 }, { "epoch": 4.79, "learning_rate": "1.0171e-05", "loss": 0.5988, "slid_loss": 0.6023, "step": 6194, "time": 99.01 }, { "epoch": 4.79, "learning_rate": "1.0169e-05", "loss": 0.5876, "slid_loss": 0.6021, "step": 6195, "time": 132.44 }, { "epoch": 4.79, "learning_rate": "1.0168e-05", "loss": 0.626, "slid_loss": 0.6025, "step": 6196, "time": 136.7 }, { "epoch": 4.79, "learning_rate": "1.0167e-05", "loss": 0.6168, "slid_loss": 0.6027, "step": 6197, "time": 138.87 }, { "epoch": 4.8, "learning_rate": "1.0165e-05", "loss": 0.6144, "slid_loss": 0.6028, "step": 6198, "time": 110.97 }, { "epoch": 4.8, "learning_rate": "1.0164e-05", "loss": 0.6087, "slid_loss": 0.6033, "step": 6199, "time": 118.8 }, { "epoch": 4.8, "learning_rate": "1.0163e-05", "loss": 0.6342, "slid_loss": 0.6037, "step": 6200, "time": 136.51 } ], "logging_steps": 1.0, "max_steps": 6460, "num_train_epochs": 5, "save_steps": 5000, "total_flos": 0.0, "trial_name": null, "trial_params": null }