{ "best_metric": 0.8553540706634521, "best_model_checkpoint": "/data/kabanda/result_en_fr/checkpoint-70000", "epoch": 3.464660463274599, "global_step": 70000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.9876262126311625e-05, "loss": 5.2926, "step": 500 }, { "epoch": 0.02, "eval_bleu": 4.1544, "eval_gen_len": 19.7843, "eval_loss": 4.638627529144287, "eval_runtime": 1532.3805, "eval_samples_per_second": 4.661, "eval_steps_per_second": 0.389, "step": 500 }, { "epoch": 0.05, "learning_rate": 4.975252425262325e-05, "loss": 4.2117, "step": 1000 }, { "epoch": 0.05, "eval_bleu": 5.0944, "eval_gen_len": 25.8076, "eval_loss": 3.982415199279785, "eval_runtime": 2306.4168, "eval_samples_per_second": 3.097, "eval_steps_per_second": 0.258, "step": 1000 }, { "epoch": 0.07, "learning_rate": 4.962878637893486e-05, "loss": 3.6754, "step": 1500 }, { "epoch": 0.07, "eval_bleu": 9.7913, "eval_gen_len": 18.5118, "eval_loss": 3.528294801712036, "eval_runtime": 1473.4816, "eval_samples_per_second": 4.848, "eval_steps_per_second": 0.404, "step": 1500 }, { "epoch": 0.1, "learning_rate": 4.9505048505246485e-05, "loss": 3.2658, "step": 2000 }, { "epoch": 0.1, "eval_bleu": 11.9905, "eval_gen_len": 18.3529, "eval_loss": 3.1733100414276123, "eval_runtime": 1495.811, "eval_samples_per_second": 4.775, "eval_steps_per_second": 0.398, "step": 2000 }, { "epoch": 0.12, "learning_rate": 4.938131063155811e-05, "loss": 2.9666, "step": 2500 }, { "epoch": 0.12, "eval_bleu": 14.6761, "eval_gen_len": 17.3686, "eval_loss": 2.8982934951782227, "eval_runtime": 1369.1732, "eval_samples_per_second": 5.217, "eval_steps_per_second": 0.435, "step": 2500 }, { "epoch": 0.15, "learning_rate": 4.925757275786973e-05, "loss": 2.7349, "step": 3000 }, { "epoch": 0.15, "eval_bleu": 16.3199, "eval_gen_len": 17.9567, "eval_loss": 2.693359613418579, "eval_runtime": 1446.9239, "eval_samples_per_second": 4.937, "eval_steps_per_second": 0.412, "step": 3000 }, { "epoch": 0.17, "learning_rate": 4.913383488418135e-05, "loss": 2.5024, "step": 3500 }, { "epoch": 0.17, "eval_bleu": 18.1705, "eval_gen_len": 18.0659, "eval_loss": 2.521655321121216, "eval_runtime": 1477.4963, "eval_samples_per_second": 4.835, "eval_steps_per_second": 0.403, "step": 3500 }, { "epoch": 0.2, "learning_rate": 4.9010097010492975e-05, "loss": 2.3656, "step": 4000 }, { "epoch": 0.2, "eval_bleu": 19.9036, "eval_gen_len": 17.349, "eval_loss": 2.388259172439575, "eval_runtime": 1363.4598, "eval_samples_per_second": 5.239, "eval_steps_per_second": 0.437, "step": 4000 }, { "epoch": 0.22, "learning_rate": 4.888635913680459e-05, "loss": 2.2392, "step": 4500 }, { "epoch": 0.22, "eval_bleu": 21.2018, "eval_gen_len": 17.771, "eval_loss": 2.2573838233947754, "eval_runtime": 1439.0867, "eval_samples_per_second": 4.964, "eval_steps_per_second": 0.414, "step": 4500 }, { "epoch": 0.25, "learning_rate": 4.876262126311621e-05, "loss": 2.0961, "step": 5000 }, { "epoch": 0.25, "eval_bleu": 22.5424, "eval_gen_len": 17.0724, "eval_loss": 2.15091609954834, "eval_runtime": 1354.3029, "eval_samples_per_second": 5.274, "eval_steps_per_second": 0.44, "step": 5000 }, { "epoch": 0.27, "learning_rate": 4.8638883389427836e-05, "loss": 1.9984, "step": 5500 }, { "epoch": 0.27, "eval_bleu": 22.8122, "eval_gen_len": 18.099, "eval_loss": 2.0877768993377686, "eval_runtime": 1398.4571, "eval_samples_per_second": 5.108, "eval_steps_per_second": 0.426, "step": 5500 }, { "epoch": 0.3, "learning_rate": 4.851514551573946e-05, "loss": 1.9071, "step": 6000 }, { "epoch": 0.3, "eval_bleu": 25.4086, "eval_gen_len": 17.1431, "eval_loss": 1.975221872329712, "eval_runtime": 1364.3455, "eval_samples_per_second": 5.235, "eval_steps_per_second": 0.437, "step": 6000 }, { "epoch": 0.32, "learning_rate": 4.839140764205108e-05, "loss": 1.8419, "step": 6500 }, { "epoch": 0.32, "eval_bleu": 26.1858, "eval_gen_len": 17.5429, "eval_loss": 1.896142601966858, "eval_runtime": 1384.4266, "eval_samples_per_second": 5.16, "eval_steps_per_second": 0.431, "step": 6500 }, { "epoch": 0.35, "learning_rate": 4.82676697683627e-05, "loss": 1.7696, "step": 7000 }, { "epoch": 0.35, "eval_bleu": 26.7992, "eval_gen_len": 17.5017, "eval_loss": 1.835010051727295, "eval_runtime": 1380.8355, "eval_samples_per_second": 5.173, "eval_steps_per_second": 0.432, "step": 7000 }, { "epoch": 0.37, "learning_rate": 4.8143931894674325e-05, "loss": 1.709, "step": 7500 }, { "epoch": 0.37, "eval_bleu": 28.566, "eval_gen_len": 17.306, "eval_loss": 1.754841923713684, "eval_runtime": 1381.6803, "eval_samples_per_second": 5.17, "eval_steps_per_second": 0.431, "step": 7500 }, { "epoch": 0.4, "learning_rate": 4.802019402098595e-05, "loss": 1.6455, "step": 8000 }, { "epoch": 0.4, "eval_bleu": 28.9268, "eval_gen_len": 17.2583, "eval_loss": 1.7277344465255737, "eval_runtime": 1383.5791, "eval_samples_per_second": 5.163, "eval_steps_per_second": 0.431, "step": 8000 }, { "epoch": 0.42, "learning_rate": 4.789645614729757e-05, "loss": 1.5882, "step": 8500 }, { "epoch": 0.42, "eval_bleu": 29.6493, "eval_gen_len": 17.3461, "eval_loss": 1.6740621328353882, "eval_runtime": 1382.0901, "eval_samples_per_second": 5.168, "eval_steps_per_second": 0.431, "step": 8500 }, { "epoch": 0.45, "learning_rate": 4.777271827360919e-05, "loss": 1.5327, "step": 9000 }, { "epoch": 0.45, "eval_bleu": 30.4256, "eval_gen_len": 17.1523, "eval_loss": 1.6332658529281616, "eval_runtime": 1358.5029, "eval_samples_per_second": 5.258, "eval_steps_per_second": 0.439, "step": 9000 }, { "epoch": 0.47, "learning_rate": 4.7648980399920815e-05, "loss": 1.4922, "step": 9500 }, { "epoch": 0.47, "eval_bleu": 31.3386, "eval_gen_len": 17.1382, "eval_loss": 1.5828430652618408, "eval_runtime": 1370.5696, "eval_samples_per_second": 5.212, "eval_steps_per_second": 0.435, "step": 9500 }, { "epoch": 0.49, "learning_rate": 4.752524252623243e-05, "loss": 1.4468, "step": 10000 }, { "epoch": 0.49, "eval_bleu": 32.089, "eval_gen_len": 17.0442, "eval_loss": 1.5548750162124634, "eval_runtime": 1350.7323, "eval_samples_per_second": 5.288, "eval_steps_per_second": 0.441, "step": 10000 }, { "epoch": 0.52, "learning_rate": 4.740150465254405e-05, "loss": 1.4125, "step": 10500 }, { "epoch": 0.52, "eval_bleu": 32.4293, "eval_gen_len": 16.9427, "eval_loss": 1.52163827419281, "eval_runtime": 1355.6836, "eval_samples_per_second": 5.269, "eval_steps_per_second": 0.44, "step": 10500 }, { "epoch": 0.54, "learning_rate": 4.7277766778855676e-05, "loss": 1.411, "step": 11000 }, { "epoch": 0.54, "eval_bleu": 33.0258, "eval_gen_len": 16.7907, "eval_loss": 1.4887491464614868, "eval_runtime": 1353.9121, "eval_samples_per_second": 5.276, "eval_steps_per_second": 0.44, "step": 11000 }, { "epoch": 0.57, "learning_rate": 4.71540289051673e-05, "loss": 1.3452, "step": 11500 }, { "epoch": 0.57, "eval_bleu": 33.4866, "eval_gen_len": 16.9954, "eval_loss": 1.4632903337478638, "eval_runtime": 1372.2724, "eval_samples_per_second": 5.205, "eval_steps_per_second": 0.434, "step": 11500 }, { "epoch": 0.59, "learning_rate": 4.703029103147892e-05, "loss": 1.3275, "step": 12000 }, { "epoch": 0.59, "eval_bleu": 34.0579, "eval_gen_len": 16.8965, "eval_loss": 1.4243967533111572, "eval_runtime": 1354.7775, "eval_samples_per_second": 5.272, "eval_steps_per_second": 0.44, "step": 12000 }, { "epoch": 0.62, "learning_rate": 4.6906553157790536e-05, "loss": 1.3073, "step": 12500 }, { "epoch": 0.62, "eval_bleu": 34.2442, "eval_gen_len": 16.8726, "eval_loss": 1.405735969543457, "eval_runtime": 1344.8184, "eval_samples_per_second": 5.311, "eval_steps_per_second": 0.443, "step": 12500 }, { "epoch": 0.64, "learning_rate": 4.678281528410216e-05, "loss": 1.283, "step": 13000 }, { "epoch": 0.64, "eval_bleu": 35.3879, "eval_gen_len": 17.1285, "eval_loss": 1.3807507753372192, "eval_runtime": 1352.5706, "eval_samples_per_second": 5.281, "eval_steps_per_second": 0.441, "step": 13000 }, { "epoch": 0.67, "learning_rate": 4.665907741041378e-05, "loss": 1.2526, "step": 13500 }, { "epoch": 0.67, "eval_bleu": 35.286, "eval_gen_len": 17.0997, "eval_loss": 1.3556008338928223, "eval_runtime": 1360.8433, "eval_samples_per_second": 5.249, "eval_steps_per_second": 0.438, "step": 13500 }, { "epoch": 0.69, "learning_rate": 4.65353395367254e-05, "loss": 1.2355, "step": 14000 }, { "epoch": 0.69, "eval_bleu": 35.5153, "eval_gen_len": 16.7456, "eval_loss": 1.3322172164916992, "eval_runtime": 1342.5622, "eval_samples_per_second": 5.32, "eval_steps_per_second": 0.444, "step": 14000 }, { "epoch": 0.72, "learning_rate": 4.6411601663037026e-05, "loss": 1.2072, "step": 14500 }, { "epoch": 0.72, "eval_bleu": 35.9173, "eval_gen_len": 16.9327, "eval_loss": 1.3275119066238403, "eval_runtime": 1338.7699, "eval_samples_per_second": 5.335, "eval_steps_per_second": 0.445, "step": 14500 }, { "epoch": 0.74, "learning_rate": 4.628786378934865e-05, "loss": 1.195, "step": 15000 }, { "epoch": 0.74, "eval_bleu": 35.7681, "eval_gen_len": 16.8524, "eval_loss": 1.3128494024276733, "eval_runtime": 1345.0353, "eval_samples_per_second": 5.311, "eval_steps_per_second": 0.443, "step": 15000 }, { "epoch": 0.77, "learning_rate": 4.6164125915660264e-05, "loss": 1.1689, "step": 15500 }, { "epoch": 0.77, "eval_bleu": 36.7489, "eval_gen_len": 16.8296, "eval_loss": 1.2844336032867432, "eval_runtime": 1344.8016, "eval_samples_per_second": 5.312, "eval_steps_per_second": 0.443, "step": 15500 }, { "epoch": 0.79, "learning_rate": 4.6040388041971886e-05, "loss": 1.1573, "step": 16000 }, { "epoch": 0.79, "eval_bleu": 37.2278, "eval_gen_len": 16.8921, "eval_loss": 1.2794346809387207, "eval_runtime": 1341.0354, "eval_samples_per_second": 5.326, "eval_steps_per_second": 0.444, "step": 16000 }, { "epoch": 0.82, "learning_rate": 4.591665016828351e-05, "loss": 1.1307, "step": 16500 }, { "epoch": 0.82, "eval_bleu": 36.748, "eval_gen_len": 16.5867, "eval_loss": 1.2566970586776733, "eval_runtime": 1321.9093, "eval_samples_per_second": 5.404, "eval_steps_per_second": 0.451, "step": 16500 }, { "epoch": 0.84, "learning_rate": 4.579291229459513e-05, "loss": 1.1222, "step": 17000 }, { "epoch": 0.84, "eval_bleu": 37.2793, "eval_gen_len": 16.838, "eval_loss": 1.2408839464187622, "eval_runtime": 1339.7522, "eval_samples_per_second": 5.332, "eval_steps_per_second": 0.445, "step": 17000 }, { "epoch": 0.87, "learning_rate": 4.5669174420906754e-05, "loss": 1.1129, "step": 17500 }, { "epoch": 0.87, "eval_bleu": 37.5801, "eval_gen_len": 16.6506, "eval_loss": 1.2285243272781372, "eval_runtime": 1332.5485, "eval_samples_per_second": 5.36, "eval_steps_per_second": 0.447, "step": 17500 }, { "epoch": 0.89, "learning_rate": 4.5545436547218376e-05, "loss": 1.11, "step": 18000 }, { "epoch": 0.89, "eval_bleu": 37.3726, "eval_gen_len": 17.1085, "eval_loss": 1.2697083950042725, "eval_runtime": 1412.2634, "eval_samples_per_second": 5.058, "eval_steps_per_second": 0.422, "step": 18000 }, { "epoch": 0.92, "learning_rate": 4.542169867352999e-05, "loss": 1.0779, "step": 18500 }, { "epoch": 0.92, "eval_bleu": 38.4744, "eval_gen_len": 16.9744, "eval_loss": 1.2056454420089722, "eval_runtime": 1349.6935, "eval_samples_per_second": 5.292, "eval_steps_per_second": 0.442, "step": 18500 }, { "epoch": 0.94, "learning_rate": 4.5297960799841614e-05, "loss": 1.0651, "step": 19000 }, { "epoch": 0.94, "eval_bleu": 38.3271, "eval_gen_len": 17.006, "eval_loss": 1.1996458768844604, "eval_runtime": 1369.5468, "eval_samples_per_second": 5.216, "eval_steps_per_second": 0.435, "step": 19000 }, { "epoch": 0.97, "learning_rate": 4.5174222926153237e-05, "loss": 1.0647, "step": 19500 }, { "epoch": 0.97, "eval_bleu": 38.6809, "eval_gen_len": 16.7178, "eval_loss": 1.1885197162628174, "eval_runtime": 1332.8156, "eval_samples_per_second": 5.359, "eval_steps_per_second": 0.447, "step": 19500 }, { "epoch": 0.99, "learning_rate": 4.505048505246486e-05, "loss": 1.0463, "step": 20000 }, { "epoch": 0.99, "eval_bleu": 39.0708, "eval_gen_len": 16.8376, "eval_loss": 1.1725914478302002, "eval_runtime": 1343.3869, "eval_samples_per_second": 5.317, "eval_steps_per_second": 0.444, "step": 20000 }, { "epoch": 1.01, "learning_rate": 4.492674717877648e-05, "loss": 0.9704, "step": 20500 }, { "epoch": 1.01, "eval_bleu": 39.6273, "eval_gen_len": 16.8159, "eval_loss": 1.158150315284729, "eval_runtime": 1348.8721, "eval_samples_per_second": 5.296, "eval_steps_per_second": 0.442, "step": 20500 }, { "epoch": 1.04, "learning_rate": 4.4803009305088104e-05, "loss": 0.8936, "step": 21000 }, { "epoch": 1.04, "eval_bleu": 39.3436, "eval_gen_len": 16.6532, "eval_loss": 1.1593575477600098, "eval_runtime": 1336.9289, "eval_samples_per_second": 5.343, "eval_steps_per_second": 0.446, "step": 21000 }, { "epoch": 1.06, "learning_rate": 4.4679271431399726e-05, "loss": 0.8839, "step": 21500 }, { "epoch": 1.06, "eval_bleu": 39.8191, "eval_gen_len": 16.4936, "eval_loss": 1.1491360664367676, "eval_runtime": 1334.0963, "eval_samples_per_second": 5.354, "eval_steps_per_second": 0.447, "step": 21500 }, { "epoch": 1.09, "learning_rate": 4.455553355771135e-05, "loss": 0.8823, "step": 22000 }, { "epoch": 1.09, "eval_bleu": 40.01, "eval_gen_len": 16.6578, "eval_loss": 1.14137601852417, "eval_runtime": 1346.5689, "eval_samples_per_second": 5.305, "eval_steps_per_second": 0.443, "step": 22000 }, { "epoch": 1.11, "learning_rate": 4.443179568402297e-05, "loss": 0.8898, "step": 22500 }, { "epoch": 1.11, "eval_bleu": 39.7192, "eval_gen_len": 16.8047, "eval_loss": 1.125238299369812, "eval_runtime": 1349.9067, "eval_samples_per_second": 5.291, "eval_steps_per_second": 0.442, "step": 22500 }, { "epoch": 1.14, "learning_rate": 4.4308057810334594e-05, "loss": 0.8783, "step": 23000 }, { "epoch": 1.14, "eval_bleu": 39.9004, "eval_gen_len": 16.7007, "eval_loss": 1.1242141723632812, "eval_runtime": 1331.8043, "eval_samples_per_second": 5.363, "eval_steps_per_second": 0.448, "step": 23000 }, { "epoch": 1.16, "learning_rate": 4.418431993664621e-05, "loss": 0.869, "step": 23500 }, { "epoch": 1.16, "eval_bleu": 40.6549, "eval_gen_len": 16.8279, "eval_loss": 1.1088123321533203, "eval_runtime": 1337.7469, "eval_samples_per_second": 5.34, "eval_steps_per_second": 0.446, "step": 23500 }, { "epoch": 1.19, "learning_rate": 4.406058206295783e-05, "loss": 0.8602, "step": 24000 }, { "epoch": 1.19, "eval_bleu": 40.9968, "eval_gen_len": 16.7487, "eval_loss": 1.100970983505249, "eval_runtime": 1347.265, "eval_samples_per_second": 5.302, "eval_steps_per_second": 0.442, "step": 24000 }, { "epoch": 1.21, "learning_rate": 4.3936844189269454e-05, "loss": 0.8617, "step": 24500 }, { "epoch": 1.21, "eval_bleu": 40.6003, "eval_gen_len": 16.544, "eval_loss": 1.1008094549179077, "eval_runtime": 1323.7915, "eval_samples_per_second": 5.396, "eval_steps_per_second": 0.45, "step": 24500 }, { "epoch": 1.24, "learning_rate": 4.3813106315581076e-05, "loss": 0.8571, "step": 25000 }, { "epoch": 1.24, "eval_bleu": 41.2081, "eval_gen_len": 16.8625, "eval_loss": 1.0926491022109985, "eval_runtime": 1338.7092, "eval_samples_per_second": 5.336, "eval_steps_per_second": 0.445, "step": 25000 }, { "epoch": 1.26, "learning_rate": 4.36893684418927e-05, "loss": 0.845, "step": 25500 }, { "epoch": 1.26, "eval_bleu": 41.0718, "eval_gen_len": 16.6553, "eval_loss": 1.088665246963501, "eval_runtime": 1330.6533, "eval_samples_per_second": 5.368, "eval_steps_per_second": 0.448, "step": 25500 }, { "epoch": 1.29, "learning_rate": 4.356563056820432e-05, "loss": 0.8533, "step": 26000 }, { "epoch": 1.29, "eval_bleu": 41.2884, "eval_gen_len": 16.7981, "eval_loss": 1.0817835330963135, "eval_runtime": 1339.6246, "eval_samples_per_second": 5.332, "eval_steps_per_second": 0.445, "step": 26000 }, { "epoch": 1.31, "learning_rate": 4.344189269451594e-05, "loss": 0.8486, "step": 26500 }, { "epoch": 1.31, "eval_bleu": 40.9701, "eval_gen_len": 16.9712, "eval_loss": 1.0806611776351929, "eval_runtime": 1354.0651, "eval_samples_per_second": 5.275, "eval_steps_per_second": 0.44, "step": 26500 }, { "epoch": 1.34, "learning_rate": 4.331815482082756e-05, "loss": 0.8441, "step": 27000 }, { "epoch": 1.34, "eval_bleu": 41.3608, "eval_gen_len": 16.6277, "eval_loss": 1.0752187967300415, "eval_runtime": 1330.2452, "eval_samples_per_second": 5.37, "eval_steps_per_second": 0.448, "step": 27000 }, { "epoch": 1.36, "learning_rate": 4.319441694713918e-05, "loss": 0.8531, "step": 27500 }, { "epoch": 1.36, "eval_bleu": 41.5215, "eval_gen_len": 16.8085, "eval_loss": 1.066485047340393, "eval_runtime": 1334.1554, "eval_samples_per_second": 5.354, "eval_steps_per_second": 0.447, "step": 27500 }, { "epoch": 1.39, "learning_rate": 4.3070679073450804e-05, "loss": 0.8371, "step": 28000 }, { "epoch": 1.39, "eval_bleu": 41.9045, "eval_gen_len": 16.736, "eval_loss": 1.0553510189056396, "eval_runtime": 1333.8696, "eval_samples_per_second": 5.355, "eval_steps_per_second": 0.447, "step": 28000 }, { "epoch": 1.41, "learning_rate": 4.294694119976243e-05, "loss": 0.8171, "step": 28500 }, { "epoch": 1.41, "eval_bleu": 42.0877, "eval_gen_len": 16.7124, "eval_loss": 1.0496976375579834, "eval_runtime": 1332.4047, "eval_samples_per_second": 5.361, "eval_steps_per_second": 0.447, "step": 28500 }, { "epoch": 1.44, "learning_rate": 4.282320332607405e-05, "loss": 0.8359, "step": 29000 }, { "epoch": 1.44, "eval_bleu": 42.1081, "eval_gen_len": 16.6317, "eval_loss": 1.0442363023757935, "eval_runtime": 1329.2047, "eval_samples_per_second": 5.374, "eval_steps_per_second": 0.448, "step": 29000 }, { "epoch": 1.46, "learning_rate": 4.2699465452385665e-05, "loss": 0.8097, "step": 29500 }, { "epoch": 1.46, "eval_bleu": 42.3305, "eval_gen_len": 16.5241, "eval_loss": 1.0467731952667236, "eval_runtime": 1325.978, "eval_samples_per_second": 5.387, "eval_steps_per_second": 0.449, "step": 29500 }, { "epoch": 1.48, "learning_rate": 4.257572757869729e-05, "loss": 0.8151, "step": 30000 }, { "epoch": 1.48, "eval_bleu": 42.208, "eval_gen_len": 16.5507, "eval_loss": 1.0358787775039673, "eval_runtime": 1325.8985, "eval_samples_per_second": 5.387, "eval_steps_per_second": 0.45, "step": 30000 }, { "epoch": 1.51, "learning_rate": 4.245198970500891e-05, "loss": 0.8008, "step": 30500 }, { "epoch": 1.51, "eval_bleu": 43.0639, "eval_gen_len": 16.7362, "eval_loss": 1.0226542949676514, "eval_runtime": 1337.8285, "eval_samples_per_second": 5.339, "eval_steps_per_second": 0.445, "step": 30500 }, { "epoch": 1.53, "learning_rate": 4.232825183132053e-05, "loss": 0.8105, "step": 31000 }, { "epoch": 1.53, "eval_bleu": 42.7617, "eval_gen_len": 16.4645, "eval_loss": 1.016375184059143, "eval_runtime": 1327.3901, "eval_samples_per_second": 5.381, "eval_steps_per_second": 0.449, "step": 31000 }, { "epoch": 1.56, "learning_rate": 4.2204513957632154e-05, "loss": 0.8108, "step": 31500 }, { "epoch": 1.56, "eval_bleu": 42.9124, "eval_gen_len": 16.8116, "eval_loss": 1.0157625675201416, "eval_runtime": 1343.0655, "eval_samples_per_second": 5.318, "eval_steps_per_second": 0.444, "step": 31500 }, { "epoch": 1.58, "learning_rate": 4.208077608394378e-05, "loss": 0.8052, "step": 32000 }, { "epoch": 1.58, "eval_bleu": 42.8819, "eval_gen_len": 16.6151, "eval_loss": 1.0091865062713623, "eval_runtime": 1330.5695, "eval_samples_per_second": 5.368, "eval_steps_per_second": 0.448, "step": 32000 }, { "epoch": 1.61, "learning_rate": 4.195703821025539e-05, "loss": 0.8024, "step": 32500 }, { "epoch": 1.61, "eval_bleu": 43.0788, "eval_gen_len": 16.7483, "eval_loss": 1.0076141357421875, "eval_runtime": 1332.4446, "eval_samples_per_second": 5.361, "eval_steps_per_second": 0.447, "step": 32500 }, { "epoch": 1.63, "learning_rate": 4.1833300336567015e-05, "loss": 0.7971, "step": 33000 }, { "epoch": 1.63, "eval_bleu": 43.6296, "eval_gen_len": 16.7942, "eval_loss": 0.9935155510902405, "eval_runtime": 1351.7288, "eval_samples_per_second": 5.284, "eval_steps_per_second": 0.441, "step": 33000 }, { "epoch": 1.66, "learning_rate": 4.170956246287864e-05, "loss": 0.8006, "step": 33500 }, { "epoch": 1.66, "eval_bleu": 43.9739, "eval_gen_len": 16.7378, "eval_loss": 0.991138219833374, "eval_runtime": 1346.9798, "eval_samples_per_second": 5.303, "eval_steps_per_second": 0.442, "step": 33500 }, { "epoch": 1.68, "learning_rate": 4.158582458919026e-05, "loss": 0.7855, "step": 34000 }, { "epoch": 1.68, "eval_bleu": 43.5366, "eval_gen_len": 16.5919, "eval_loss": 0.991809070110321, "eval_runtime": 1337.0859, "eval_samples_per_second": 5.342, "eval_steps_per_second": 0.446, "step": 34000 }, { "epoch": 1.71, "learning_rate": 4.146208671550188e-05, "loss": 0.7786, "step": 34500 }, { "epoch": 1.71, "eval_bleu": 43.8212, "eval_gen_len": 16.5705, "eval_loss": 0.9883202314376831, "eval_runtime": 1322.7907, "eval_samples_per_second": 5.4, "eval_steps_per_second": 0.451, "step": 34500 }, { "epoch": 1.73, "learning_rate": 4.1338348841813505e-05, "loss": 0.7877, "step": 35000 }, { "epoch": 1.73, "eval_bleu": 43.7342, "eval_gen_len": 16.7165, "eval_loss": 0.9775007963180542, "eval_runtime": 1338.9365, "eval_samples_per_second": 5.335, "eval_steps_per_second": 0.445, "step": 35000 }, { "epoch": 1.76, "learning_rate": 4.121461096812513e-05, "loss": 0.7832, "step": 35500 }, { "epoch": 1.76, "eval_bleu": 43.8512, "eval_gen_len": 16.712, "eval_loss": 0.9759693145751953, "eval_runtime": 1341.6352, "eval_samples_per_second": 5.324, "eval_steps_per_second": 0.444, "step": 35500 }, { "epoch": 1.78, "learning_rate": 4.109087309443675e-05, "loss": 0.7812, "step": 36000 }, { "epoch": 1.78, "eval_bleu": 44.606, "eval_gen_len": 16.7719, "eval_loss": 0.9725255370140076, "eval_runtime": 1344.5964, "eval_samples_per_second": 5.312, "eval_steps_per_second": 0.443, "step": 36000 }, { "epoch": 1.81, "learning_rate": 4.096713522074837e-05, "loss": 0.766, "step": 36500 }, { "epoch": 1.81, "eval_bleu": 44.1586, "eval_gen_len": 16.568, "eval_loss": 0.9698735475540161, "eval_runtime": 1331.8223, "eval_samples_per_second": 5.363, "eval_steps_per_second": 0.448, "step": 36500 }, { "epoch": 1.83, "learning_rate": 4.0843397347059994e-05, "loss": 0.7688, "step": 37000 }, { "epoch": 1.83, "eval_bleu": 44.5813, "eval_gen_len": 16.7396, "eval_loss": 0.9606902599334717, "eval_runtime": 1334.582, "eval_samples_per_second": 5.352, "eval_steps_per_second": 0.447, "step": 37000 }, { "epoch": 1.86, "learning_rate": 4.071965947337161e-05, "loss": 0.7727, "step": 37500 }, { "epoch": 1.86, "eval_bleu": 44.1615, "eval_gen_len": 16.707, "eval_loss": 0.962368369102478, "eval_runtime": 1341.1556, "eval_samples_per_second": 5.326, "eval_steps_per_second": 0.444, "step": 37500 }, { "epoch": 1.88, "learning_rate": 4.059592159968323e-05, "loss": 0.7645, "step": 38000 }, { "epoch": 1.88, "eval_bleu": 44.1652, "eval_gen_len": 16.5927, "eval_loss": 0.9592034220695496, "eval_runtime": 1336.4184, "eval_samples_per_second": 5.345, "eval_steps_per_second": 0.446, "step": 38000 }, { "epoch": 1.91, "learning_rate": 4.0472183725994855e-05, "loss": 0.7638, "step": 38500 }, { "epoch": 1.91, "eval_bleu": 44.4663, "eval_gen_len": 16.6448, "eval_loss": 0.9551029205322266, "eval_runtime": 1335.4927, "eval_samples_per_second": 5.349, "eval_steps_per_second": 0.446, "step": 38500 }, { "epoch": 1.93, "learning_rate": 4.034844585230648e-05, "loss": 0.7544, "step": 39000 }, { "epoch": 1.93, "eval_bleu": 44.3807, "eval_gen_len": 16.7161, "eval_loss": 0.9564537405967712, "eval_runtime": 1335.4847, "eval_samples_per_second": 5.349, "eval_steps_per_second": 0.446, "step": 39000 }, { "epoch": 1.96, "learning_rate": 4.02247079786181e-05, "loss": 0.7513, "step": 39500 }, { "epoch": 1.96, "eval_bleu": 44.9465, "eval_gen_len": 16.6132, "eval_loss": 0.9481050372123718, "eval_runtime": 1327.2386, "eval_samples_per_second": 5.382, "eval_steps_per_second": 0.449, "step": 39500 }, { "epoch": 1.98, "learning_rate": 4.010097010492972e-05, "loss": 0.7563, "step": 40000 }, { "epoch": 1.98, "eval_bleu": 45.5396, "eval_gen_len": 16.8183, "eval_loss": 0.939414381980896, "eval_runtime": 1341.9443, "eval_samples_per_second": 5.323, "eval_steps_per_second": 0.444, "step": 40000 }, { "epoch": 2.0, "learning_rate": 3.997723223124134e-05, "loss": 0.7206, "step": 40500 }, { "epoch": 2.0, "eval_bleu": 44.6231, "eval_gen_len": 16.5586, "eval_loss": 0.9512418508529663, "eval_runtime": 1333.952, "eval_samples_per_second": 5.355, "eval_steps_per_second": 0.447, "step": 40500 }, { "epoch": 2.03, "learning_rate": 3.985349435755296e-05, "loss": 0.5864, "step": 41000 }, { "epoch": 2.03, "eval_bleu": 45.4858, "eval_gen_len": 16.784, "eval_loss": 0.9353885650634766, "eval_runtime": 1336.3848, "eval_samples_per_second": 5.345, "eval_steps_per_second": 0.446, "step": 41000 }, { "epoch": 2.05, "learning_rate": 3.972975648386458e-05, "loss": 0.5916, "step": 41500 }, { "epoch": 2.05, "eval_bleu": 45.1704, "eval_gen_len": 16.6795, "eval_loss": 0.9400736689567566, "eval_runtime": 1337.6062, "eval_samples_per_second": 5.34, "eval_steps_per_second": 0.446, "step": 41500 }, { "epoch": 2.08, "learning_rate": 3.9606018610176205e-05, "loss": 0.5907, "step": 42000 }, { "epoch": 2.08, "eval_bleu": 45.0619, "eval_gen_len": 16.6905, "eval_loss": 0.9472643733024597, "eval_runtime": 1332.9894, "eval_samples_per_second": 5.359, "eval_steps_per_second": 0.447, "step": 42000 }, { "epoch": 2.1, "learning_rate": 3.948228073648783e-05, "loss": 0.584, "step": 42500 }, { "epoch": 2.1, "eval_bleu": 45.072, "eval_gen_len": 16.7739, "eval_loss": 0.943854808807373, "eval_runtime": 1336.6571, "eval_samples_per_second": 5.344, "eval_steps_per_second": 0.446, "step": 42500 }, { "epoch": 2.13, "learning_rate": 3.935854286279945e-05, "loss": 0.5979, "step": 43000 }, { "epoch": 2.13, "eval_bleu": 45.1267, "eval_gen_len": 16.9086, "eval_loss": 0.9410138726234436, "eval_runtime": 1338.7702, "eval_samples_per_second": 5.335, "eval_steps_per_second": 0.445, "step": 43000 }, { "epoch": 2.15, "learning_rate": 3.9234804989111066e-05, "loss": 0.5929, "step": 43500 }, { "epoch": 2.15, "eval_bleu": 45.3207, "eval_gen_len": 16.6486, "eval_loss": 0.9384570121765137, "eval_runtime": 1333.0456, "eval_samples_per_second": 5.358, "eval_steps_per_second": 0.447, "step": 43500 }, { "epoch": 2.18, "learning_rate": 3.911106711542269e-05, "loss": 0.588, "step": 44000 }, { "epoch": 2.18, "eval_bleu": 45.3945, "eval_gen_len": 16.7284, "eval_loss": 0.9332572817802429, "eval_runtime": 1330.0518, "eval_samples_per_second": 5.37, "eval_steps_per_second": 0.448, "step": 44000 }, { "epoch": 2.2, "learning_rate": 3.898732924173431e-05, "loss": 0.594, "step": 44500 }, { "epoch": 2.2, "eval_bleu": 45.3986, "eval_gen_len": 16.6814, "eval_loss": 0.9292554259300232, "eval_runtime": 1334.5426, "eval_samples_per_second": 5.352, "eval_steps_per_second": 0.447, "step": 44500 }, { "epoch": 2.23, "learning_rate": 3.886359136804593e-05, "loss": 0.5983, "step": 45000 }, { "epoch": 2.23, "eval_bleu": 45.6357, "eval_gen_len": 16.617, "eval_loss": 0.920947253704071, "eval_runtime": 1332.827, "eval_samples_per_second": 5.359, "eval_steps_per_second": 0.447, "step": 45000 }, { "epoch": 2.25, "learning_rate": 3.8739853494357555e-05, "loss": 0.6063, "step": 45500 }, { "epoch": 2.25, "eval_bleu": 45.8697, "eval_gen_len": 16.7532, "eval_loss": 0.9304359555244446, "eval_runtime": 1340.1007, "eval_samples_per_second": 5.33, "eval_steps_per_second": 0.445, "step": 45500 }, { "epoch": 2.28, "learning_rate": 3.861611562066917e-05, "loss": 0.6004, "step": 46000 }, { "epoch": 2.28, "eval_bleu": 45.8707, "eval_gen_len": 16.6399, "eval_loss": 0.9174883961677551, "eval_runtime": 1333.508, "eval_samples_per_second": 5.357, "eval_steps_per_second": 0.447, "step": 46000 }, { "epoch": 2.3, "learning_rate": 3.8492377746980794e-05, "loss": 0.5951, "step": 46500 }, { "epoch": 2.3, "eval_bleu": 45.0087, "eval_gen_len": 16.5349, "eval_loss": 0.9316369295120239, "eval_runtime": 1331.8005, "eval_samples_per_second": 5.363, "eval_steps_per_second": 0.448, "step": 46500 }, { "epoch": 2.33, "learning_rate": 3.8368639873292416e-05, "loss": 0.5908, "step": 47000 }, { "epoch": 2.33, "eval_bleu": 45.485, "eval_gen_len": 16.5745, "eval_loss": 0.9216477870941162, "eval_runtime": 1328.0985, "eval_samples_per_second": 5.378, "eval_steps_per_second": 0.449, "step": 47000 }, { "epoch": 2.35, "learning_rate": 3.824490199960404e-05, "loss": 0.5847, "step": 47500 }, { "epoch": 2.35, "eval_bleu": 45.9099, "eval_gen_len": 16.6137, "eval_loss": 0.9153968691825867, "eval_runtime": 1331.7962, "eval_samples_per_second": 5.363, "eval_steps_per_second": 0.448, "step": 47500 }, { "epoch": 2.38, "learning_rate": 3.812116412591566e-05, "loss": 0.5897, "step": 48000 }, { "epoch": 2.38, "eval_bleu": 46.0344, "eval_gen_len": 16.5282, "eval_loss": 0.9067392945289612, "eval_runtime": 1320.4869, "eval_samples_per_second": 5.409, "eval_steps_per_second": 0.451, "step": 48000 }, { "epoch": 2.4, "learning_rate": 3.799742625222728e-05, "loss": 0.5951, "step": 48500 }, { "epoch": 2.4, "eval_bleu": 45.9988, "eval_gen_len": 16.6501, "eval_loss": 0.9071055054664612, "eval_runtime": 1333.3527, "eval_samples_per_second": 5.357, "eval_steps_per_second": 0.447, "step": 48500 }, { "epoch": 2.43, "learning_rate": 3.7873688378538906e-05, "loss": 0.5781, "step": 49000 }, { "epoch": 2.43, "eval_bleu": 46.0018, "eval_gen_len": 16.7336, "eval_loss": 0.9073439240455627, "eval_runtime": 1334.0382, "eval_samples_per_second": 5.354, "eval_steps_per_second": 0.447, "step": 49000 }, { "epoch": 2.45, "learning_rate": 3.774995050485053e-05, "loss": 0.5934, "step": 49500 }, { "epoch": 2.45, "eval_bleu": 46.5706, "eval_gen_len": 16.8471, "eval_loss": 0.9027701616287231, "eval_runtime": 1336.5927, "eval_samples_per_second": 5.344, "eval_steps_per_second": 0.446, "step": 49500 }, { "epoch": 2.47, "learning_rate": 3.762621263116215e-05, "loss": 0.5883, "step": 50000 }, { "epoch": 2.47, "eval_bleu": 46.6303, "eval_gen_len": 16.8614, "eval_loss": 0.9047964215278625, "eval_runtime": 1340.8517, "eval_samples_per_second": 5.327, "eval_steps_per_second": 0.444, "step": 50000 }, { "epoch": 2.5, "learning_rate": 3.750247475747377e-05, "loss": 0.5964, "step": 50500 }, { "epoch": 2.5, "eval_bleu": 46.087, "eval_gen_len": 16.6417, "eval_loss": 0.8980411887168884, "eval_runtime": 1332.5501, "eval_samples_per_second": 5.36, "eval_steps_per_second": 0.447, "step": 50500 }, { "epoch": 2.52, "learning_rate": 3.7378736883785395e-05, "loss": 0.5924, "step": 51000 }, { "epoch": 2.52, "eval_bleu": 47.0224, "eval_gen_len": 16.6821, "eval_loss": 0.9017878770828247, "eval_runtime": 1333.0812, "eval_samples_per_second": 5.358, "eval_steps_per_second": 0.447, "step": 51000 }, { "epoch": 2.55, "learning_rate": 3.725499901009701e-05, "loss": 0.5866, "step": 51500 }, { "epoch": 2.55, "eval_bleu": 46.244, "eval_gen_len": 16.7493, "eval_loss": 0.8992940187454224, "eval_runtime": 1343.2842, "eval_samples_per_second": 5.318, "eval_steps_per_second": 0.444, "step": 51500 }, { "epoch": 2.57, "learning_rate": 3.7131261136408633e-05, "loss": 0.5764, "step": 52000 }, { "epoch": 2.57, "eval_bleu": 46.3888, "eval_gen_len": 16.6049, "eval_loss": 0.9019588828086853, "eval_runtime": 1338.9531, "eval_samples_per_second": 5.335, "eval_steps_per_second": 0.445, "step": 52000 }, { "epoch": 2.6, "learning_rate": 3.7007523262720256e-05, "loss": 0.5865, "step": 52500 }, { "epoch": 2.6, "eval_bleu": 46.8308, "eval_gen_len": 16.7173, "eval_loss": 0.8952421545982361, "eval_runtime": 1352.2564, "eval_samples_per_second": 5.282, "eval_steps_per_second": 0.441, "step": 52500 }, { "epoch": 2.62, "learning_rate": 3.688378538903188e-05, "loss": 0.6016, "step": 53000 }, { "epoch": 2.62, "eval_bleu": 47.0928, "eval_gen_len": 16.7008, "eval_loss": 0.8945097923278809, "eval_runtime": 1354.2335, "eval_samples_per_second": 5.275, "eval_steps_per_second": 0.44, "step": 53000 }, { "epoch": 2.65, "learning_rate": 3.67600475153435e-05, "loss": 0.5902, "step": 53500 }, { "epoch": 2.65, "eval_bleu": 46.6548, "eval_gen_len": 16.7439, "eval_loss": 0.8932007551193237, "eval_runtime": 1382.9624, "eval_samples_per_second": 5.165, "eval_steps_per_second": 0.431, "step": 53500 }, { "epoch": 2.67, "learning_rate": 3.663630964165512e-05, "loss": 0.587, "step": 54000 }, { "epoch": 2.67, "eval_bleu": 46.9113, "eval_gen_len": 16.6783, "eval_loss": 0.8879196047782898, "eval_runtime": 1386.1138, "eval_samples_per_second": 5.153, "eval_steps_per_second": 0.43, "step": 54000 }, { "epoch": 2.7, "learning_rate": 3.651257176796674e-05, "loss": 0.5968, "step": 54500 }, { "epoch": 2.7, "eval_bleu": 47.0957, "eval_gen_len": 16.5933, "eval_loss": 0.8864062428474426, "eval_runtime": 1383.0254, "eval_samples_per_second": 5.165, "eval_steps_per_second": 0.431, "step": 54500 }, { "epoch": 2.72, "learning_rate": 3.638883389427836e-05, "loss": 0.5853, "step": 55000 }, { "epoch": 2.72, "eval_bleu": 47.1709, "eval_gen_len": 16.6479, "eval_loss": 0.8836300373077393, "eval_runtime": 1390.1185, "eval_samples_per_second": 5.138, "eval_steps_per_second": 0.429, "step": 55000 }, { "epoch": 2.75, "learning_rate": 3.6265096020589984e-05, "loss": 0.5943, "step": 55500 }, { "epoch": 2.75, "eval_bleu": 47.184, "eval_gen_len": 16.6594, "eval_loss": 0.8828004598617554, "eval_runtime": 1374.4327, "eval_samples_per_second": 5.197, "eval_steps_per_second": 0.434, "step": 55500 }, { "epoch": 2.77, "learning_rate": 3.6141358146901606e-05, "loss": 0.5891, "step": 56000 }, { "epoch": 2.77, "eval_bleu": 46.7804, "eval_gen_len": 16.7312, "eval_loss": 0.8724841475486755, "eval_runtime": 1340.042, "eval_samples_per_second": 5.33, "eval_steps_per_second": 0.445, "step": 56000 }, { "epoch": 2.8, "learning_rate": 3.601762027321323e-05, "loss": 0.585, "step": 56500 }, { "epoch": 2.8, "eval_bleu": 46.8152, "eval_gen_len": 16.6263, "eval_loss": 0.8797315955162048, "eval_runtime": 1333.5752, "eval_samples_per_second": 5.356, "eval_steps_per_second": 0.447, "step": 56500 }, { "epoch": 2.82, "learning_rate": 3.5893882399524844e-05, "loss": 0.5889, "step": 57000 }, { "epoch": 2.82, "eval_bleu": 47.3674, "eval_gen_len": 16.7459, "eval_loss": 0.8741356134414673, "eval_runtime": 1336.3135, "eval_samples_per_second": 5.345, "eval_steps_per_second": 0.446, "step": 57000 }, { "epoch": 2.85, "learning_rate": 3.577014452583647e-05, "loss": 0.5841, "step": 57500 }, { "epoch": 2.85, "eval_bleu": 46.9029, "eval_gen_len": 16.742, "eval_loss": 0.875153124332428, "eval_runtime": 1335.6996, "eval_samples_per_second": 5.348, "eval_steps_per_second": 0.446, "step": 57500 }, { "epoch": 2.87, "learning_rate": 3.564640665214809e-05, "loss": 0.5786, "step": 58000 }, { "epoch": 2.87, "eval_bleu": 47.4711, "eval_gen_len": 16.6132, "eval_loss": 0.8668653964996338, "eval_runtime": 1333.4019, "eval_samples_per_second": 5.357, "eval_steps_per_second": 0.447, "step": 58000 }, { "epoch": 2.9, "learning_rate": 3.552266877845971e-05, "loss": 0.5784, "step": 58500 }, { "epoch": 2.9, "eval_bleu": 47.3049, "eval_gen_len": 16.6745, "eval_loss": 0.8657991290092468, "eval_runtime": 1339.6294, "eval_samples_per_second": 5.332, "eval_steps_per_second": 0.445, "step": 58500 }, { "epoch": 2.92, "learning_rate": 3.5398930904771334e-05, "loss": 0.5807, "step": 59000 }, { "epoch": 2.92, "eval_bleu": 47.2634, "eval_gen_len": 16.6962, "eval_loss": 0.8614685535430908, "eval_runtime": 1334.3541, "eval_samples_per_second": 5.353, "eval_steps_per_second": 0.447, "step": 59000 }, { "epoch": 2.94, "learning_rate": 3.5275193031082956e-05, "loss": 0.5856, "step": 59500 }, { "epoch": 2.94, "eval_bleu": 47.8743, "eval_gen_len": 16.7721, "eval_loss": 0.8591042757034302, "eval_runtime": 1335.7672, "eval_samples_per_second": 5.347, "eval_steps_per_second": 0.446, "step": 59500 }, { "epoch": 2.97, "learning_rate": 3.515145515739457e-05, "loss": 0.58, "step": 60000 }, { "epoch": 2.97, "eval_bleu": 47.6119, "eval_gen_len": 16.6251, "eval_loss": 0.8595740795135498, "eval_runtime": 1332.2154, "eval_samples_per_second": 5.362, "eval_steps_per_second": 0.447, "step": 60000 }, { "epoch": 2.99, "learning_rate": 3.5027717283706194e-05, "loss": 0.5802, "step": 60500 }, { "epoch": 2.99, "eval_bleu": 47.703, "eval_gen_len": 16.6114, "eval_loss": 0.8570582270622253, "eval_runtime": 1325.8364, "eval_samples_per_second": 5.388, "eval_steps_per_second": 0.45, "step": 60500 }, { "epoch": 3.02, "learning_rate": 3.490397941001782e-05, "loss": 0.4491, "step": 61000 }, { "epoch": 3.02, "eval_bleu": 47.8822, "eval_gen_len": 16.732, "eval_loss": 0.859774649143219, "eval_runtime": 1336.5318, "eval_samples_per_second": 5.344, "eval_steps_per_second": 0.446, "step": 61000 }, { "epoch": 3.04, "learning_rate": 3.478024153632944e-05, "loss": 0.4305, "step": 61500 }, { "epoch": 3.04, "eval_bleu": 47.6096, "eval_gen_len": 16.5932, "eval_loss": 0.8659536242485046, "eval_runtime": 1328.147, "eval_samples_per_second": 5.378, "eval_steps_per_second": 0.449, "step": 61500 }, { "epoch": 3.07, "learning_rate": 3.465650366264107e-05, "loss": 0.4268, "step": 62000 }, { "epoch": 3.07, "eval_bleu": 48.0044, "eval_gen_len": 16.7596, "eval_loss": 0.8622444868087769, "eval_runtime": 1332.1214, "eval_samples_per_second": 5.362, "eval_steps_per_second": 0.447, "step": 62000 }, { "epoch": 3.09, "learning_rate": 3.4532765788952684e-05, "loss": 0.4274, "step": 62500 }, { "epoch": 3.09, "eval_bleu": 47.8272, "eval_gen_len": 16.6352, "eval_loss": 0.8669659495353699, "eval_runtime": 1330.5387, "eval_samples_per_second": 5.369, "eval_steps_per_second": 0.448, "step": 62500 }, { "epoch": 3.12, "learning_rate": 3.4409027915264307e-05, "loss": 0.4383, "step": 63000 }, { "epoch": 3.12, "eval_bleu": 47.8411, "eval_gen_len": 16.6331, "eval_loss": 0.8617204427719116, "eval_runtime": 1339.6282, "eval_samples_per_second": 5.332, "eval_steps_per_second": 0.445, "step": 63000 }, { "epoch": 3.14, "learning_rate": 3.428529004157593e-05, "loss": 0.4339, "step": 63500 }, { "epoch": 3.14, "eval_bleu": 48.232, "eval_gen_len": 16.839, "eval_loss": 0.8655442595481873, "eval_runtime": 1339.0945, "eval_samples_per_second": 5.334, "eval_steps_per_second": 0.445, "step": 63500 }, { "epoch": 3.17, "learning_rate": 3.416155216788755e-05, "loss": 0.4361, "step": 64000 }, { "epoch": 3.17, "eval_bleu": 47.978, "eval_gen_len": 16.7057, "eval_loss": 0.8637909889221191, "eval_runtime": 1337.5265, "eval_samples_per_second": 5.34, "eval_steps_per_second": 0.446, "step": 64000 }, { "epoch": 3.19, "learning_rate": 3.4037814294199174e-05, "loss": 0.4373, "step": 64500 }, { "epoch": 3.19, "eval_bleu": 48.0663, "eval_gen_len": 16.652, "eval_loss": 0.8616685271263123, "eval_runtime": 1344.0854, "eval_samples_per_second": 5.314, "eval_steps_per_second": 0.443, "step": 64500 }, { "epoch": 3.22, "learning_rate": 3.3914076420510796e-05, "loss": 0.43, "step": 65000 }, { "epoch": 3.22, "eval_bleu": 47.8077, "eval_gen_len": 16.7621, "eval_loss": 0.8689031004905701, "eval_runtime": 1338.3656, "eval_samples_per_second": 5.337, "eval_steps_per_second": 0.445, "step": 65000 }, { "epoch": 3.24, "learning_rate": 3.379033854682241e-05, "loss": 0.4325, "step": 65500 }, { "epoch": 3.24, "eval_bleu": 47.9434, "eval_gen_len": 16.6489, "eval_loss": 0.870697021484375, "eval_runtime": 1329.6325, "eval_samples_per_second": 5.372, "eval_steps_per_second": 0.448, "step": 65500 }, { "epoch": 3.27, "learning_rate": 3.3666600673134034e-05, "loss": 0.4409, "step": 66000 }, { "epoch": 3.27, "eval_bleu": 48.2346, "eval_gen_len": 16.6951, "eval_loss": 0.871823787689209, "eval_runtime": 1334.2329, "eval_samples_per_second": 5.354, "eval_steps_per_second": 0.447, "step": 66000 }, { "epoch": 3.29, "learning_rate": 3.354286279944566e-05, "loss": 0.4407, "step": 66500 }, { "epoch": 3.29, "eval_bleu": 48.0356, "eval_gen_len": 16.7549, "eval_loss": 0.8691708445549011, "eval_runtime": 1345.0681, "eval_samples_per_second": 5.311, "eval_steps_per_second": 0.443, "step": 66500 }, { "epoch": 3.32, "learning_rate": 3.341912492575728e-05, "loss": 0.444, "step": 67000 }, { "epoch": 3.32, "eval_bleu": 48.295, "eval_gen_len": 16.6175, "eval_loss": 0.8652631044387817, "eval_runtime": 1333.9567, "eval_samples_per_second": 5.355, "eval_steps_per_second": 0.447, "step": 67000 }, { "epoch": 3.34, "learning_rate": 3.32953870520689e-05, "loss": 0.4403, "step": 67500 }, { "epoch": 3.34, "eval_bleu": 48.9108, "eval_gen_len": 16.7733, "eval_loss": 0.8616347312927246, "eval_runtime": 1343.5713, "eval_samples_per_second": 5.316, "eval_steps_per_second": 0.444, "step": 67500 }, { "epoch": 3.37, "learning_rate": 3.317164917838052e-05, "loss": 0.4383, "step": 68000 }, { "epoch": 3.37, "eval_bleu": 48.7693, "eval_gen_len": 16.8501, "eval_loss": 0.8552740812301636, "eval_runtime": 1342.1211, "eval_samples_per_second": 5.322, "eval_steps_per_second": 0.444, "step": 68000 }, { "epoch": 3.39, "learning_rate": 3.304791130469214e-05, "loss": 0.4354, "step": 68500 }, { "epoch": 3.39, "eval_bleu": 48.6325, "eval_gen_len": 16.7572, "eval_loss": 0.8553012013435364, "eval_runtime": 1337.7018, "eval_samples_per_second": 5.34, "eval_steps_per_second": 0.446, "step": 68500 }, { "epoch": 3.42, "learning_rate": 3.292417343100376e-05, "loss": 0.4316, "step": 69000 }, { "epoch": 3.42, "eval_bleu": 48.7044, "eval_gen_len": 16.7796, "eval_loss": 0.8564686179161072, "eval_runtime": 1336.7625, "eval_samples_per_second": 5.344, "eval_steps_per_second": 0.446, "step": 69000 }, { "epoch": 3.44, "learning_rate": 3.2800435557315385e-05, "loss": 0.4465, "step": 69500 }, { "epoch": 3.44, "eval_bleu": 48.5715, "eval_gen_len": 16.6865, "eval_loss": 0.8632897138595581, "eval_runtime": 1336.3648, "eval_samples_per_second": 5.345, "eval_steps_per_second": 0.446, "step": 69500 }, { "epoch": 3.46, "learning_rate": 3.267669768362701e-05, "loss": 0.4412, "step": 70000 }, { "epoch": 3.46, "eval_bleu": 48.18, "eval_gen_len": 16.6885, "eval_loss": 0.8553540706634521, "eval_runtime": 1331.2155, "eval_samples_per_second": 5.366, "eval_steps_per_second": 0.448, "step": 70000 } ], "max_steps": 202040, "num_train_epochs": 10, "total_flos": 2.2754599026819072e+17, "trial_name": null, "trial_params": null }