{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9696311523096615, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.980607376953807e-05, "loss": 0.2765, "step": 500 }, { "epoch": 0.02, "eval_bleu": 31.7549, "eval_chrf++": 58.7253, "eval_gen_len": 22.1982, "eval_loss": 1.1465204954147339, "eval_runtime": 2452.853, "eval_samples_per_second": 6.832, "eval_spbleu": 45.4525, "eval_steps_per_second": 0.683, "eval_ter": 56.1665, "step": 500 }, { "epoch": 0.04, "learning_rate": 4.9612147539076136e-05, "loss": 0.2494, "step": 1000 }, { "epoch": 0.04, "eval_bleu": 30.6155, "eval_chrf++": 58.4159, "eval_gen_len": 22.5912, "eval_loss": 1.1881319284439087, "eval_runtime": 2483.4163, "eval_samples_per_second": 6.748, "eval_spbleu": 43.8898, "eval_steps_per_second": 0.675, "eval_ter": 57.8655, "step": 1000 }, { "epoch": 0.06, "learning_rate": 4.94182213086142e-05, "loss": 0.2576, "step": 1500 }, { "epoch": 0.06, "eval_bleu": 29.789, "eval_chrf++": 57.4443, "eval_gen_len": 22.689, "eval_loss": 1.1570631265640259, "eval_runtime": 2516.9829, "eval_samples_per_second": 6.658, "eval_spbleu": 43.1911, "eval_steps_per_second": 0.666, "eval_ter": 59.1299, "step": 1500 }, { "epoch": 0.08, "learning_rate": 4.922429507815227e-05, "loss": 0.2624, "step": 2000 }, { "epoch": 0.08, "eval_bleu": 31.0288, "eval_chrf++": 58.4502, "eval_gen_len": 22.467, "eval_loss": 1.1633208990097046, "eval_runtime": 2478.0629, "eval_samples_per_second": 6.763, "eval_spbleu": 44.5894, "eval_steps_per_second": 0.676, "eval_ter": 57.6651, "step": 2000 }, { "epoch": 0.1, "learning_rate": 4.9030368847690336e-05, "loss": 0.2284, "step": 2500 }, { "epoch": 0.1, "eval_bleu": 31.2885, "eval_chrf++": 58.0527, "eval_gen_len": 22.7715, "eval_loss": 1.1723754405975342, "eval_runtime": 2538.3187, "eval_samples_per_second": 6.602, "eval_spbleu": 44.5546, "eval_steps_per_second": 0.66, "eval_ter": 57.8341, "step": 2500 }, { "epoch": 0.12, "learning_rate": 4.88364426172284e-05, "loss": 0.2314, "step": 3000 }, { "epoch": 0.12, "eval_bleu": 30.7414, "eval_chrf++": 58.1161, "eval_gen_len": 22.5233, "eval_loss": 1.1770201921463013, "eval_runtime": 2476.3422, "eval_samples_per_second": 6.767, "eval_spbleu": 44.334, "eval_steps_per_second": 0.677, "eval_ter": 58.228, "step": 3000 }, { "epoch": 0.14, "learning_rate": 4.8642516386766476e-05, "loss": 0.2294, "step": 3500 }, { "epoch": 0.14, "eval_bleu": 31.7781, "eval_chrf++": 58.9642, "eval_gen_len": 22.4762, "eval_loss": 1.2094552516937256, "eval_runtime": 2449.7863, "eval_samples_per_second": 6.841, "eval_spbleu": 45.3044, "eval_steps_per_second": 0.684, "eval_ter": 56.8016, "step": 3500 }, { "epoch": 0.16, "learning_rate": 4.844859015630454e-05, "loss": 0.2457, "step": 4000 }, { "epoch": 0.16, "eval_bleu": 31.0111, "eval_chrf++": 58.203, "eval_gen_len": 22.8377, "eval_loss": 1.1406781673431396, "eval_runtime": 2532.244, "eval_samples_per_second": 6.618, "eval_spbleu": 44.1543, "eval_steps_per_second": 0.662, "eval_ter": 57.7781, "step": 4000 }, { "epoch": 0.17, "learning_rate": 4.825466392584262e-05, "loss": 0.5335, "step": 4500 }, { "epoch": 0.17, "eval_bleu": 31.2577, "eval_chrf++": 58.4688, "eval_gen_len": 22.6196, "eval_loss": 1.0519380569458008, "eval_runtime": 2534.3052, "eval_samples_per_second": 6.612, "eval_spbleu": 44.8062, "eval_steps_per_second": 0.661, "eval_ter": 57.333, "step": 4500 }, { "epoch": 0.19, "learning_rate": 4.8060737695380683e-05, "loss": 0.5162, "step": 5000 }, { "epoch": 0.19, "eval_bleu": 32.2483, "eval_chrf++": 59.2532, "eval_gen_len": 22.461, "eval_loss": 1.0528730154037476, "eval_runtime": 2496.711, "eval_samples_per_second": 6.712, "eval_spbleu": 45.5671, "eval_steps_per_second": 0.671, "eval_ter": 56.2061, "step": 5000 }, { "epoch": 0.21, "learning_rate": 4.786681146491875e-05, "loss": 0.5135, "step": 5500 }, { "epoch": 0.21, "eval_bleu": 32.273, "eval_chrf++": 59.7056, "eval_gen_len": 22.429, "eval_loss": 1.0336111783981323, "eval_runtime": 2482.8365, "eval_samples_per_second": 6.75, "eval_spbleu": 46.0401, "eval_steps_per_second": 0.675, "eval_ter": 56.4706, "step": 5500 }, { "epoch": 0.23, "learning_rate": 4.767288523445682e-05, "loss": 0.5227, "step": 6000 }, { "epoch": 0.23, "eval_bleu": 32.9242, "eval_chrf++": 59.8998, "eval_gen_len": 22.6151, "eval_loss": 1.0207685232162476, "eval_runtime": 2489.8807, "eval_samples_per_second": 6.73, "eval_spbleu": 46.3519, "eval_steps_per_second": 0.673, "eval_ter": 55.7539, "step": 6000 }, { "epoch": 0.25, "learning_rate": 4.7478959003994884e-05, "loss": 0.5181, "step": 6500 }, { "epoch": 0.25, "eval_bleu": 33.276, "eval_chrf++": 60.0419, "eval_gen_len": 22.3479, "eval_loss": 1.0149798393249512, "eval_runtime": 2483.325, "eval_samples_per_second": 6.748, "eval_spbleu": 46.8525, "eval_steps_per_second": 0.675, "eval_ter": 55.268, "step": 6500 }, { "epoch": 0.27, "learning_rate": 4.728503277353295e-05, "loss": 0.4933, "step": 7000 }, { "epoch": 0.27, "eval_bleu": 33.8673, "eval_chrf++": 60.2337, "eval_gen_len": 22.3286, "eval_loss": 1.013890027999878, "eval_runtime": 2447.8332, "eval_samples_per_second": 6.846, "eval_spbleu": 47.5698, "eval_steps_per_second": 0.685, "eval_ter": 54.9242, "step": 7000 }, { "epoch": 0.29, "learning_rate": 4.709110654307102e-05, "loss": 0.5043, "step": 7500 }, { "epoch": 0.29, "eval_bleu": 33.4038, "eval_chrf++": 60.1428, "eval_gen_len": 22.5602, "eval_loss": 1.0105745792388916, "eval_runtime": 2487.4813, "eval_samples_per_second": 6.737, "eval_spbleu": 47.2035, "eval_steps_per_second": 0.674, "eval_ter": 55.3892, "step": 7500 }, { "epoch": 0.31, "learning_rate": 4.6897180312609084e-05, "loss": 0.5013, "step": 8000 }, { "epoch": 0.31, "eval_bleu": 34.3818, "eval_chrf++": 60.7806, "eval_gen_len": 22.365, "eval_loss": 1.0027358531951904, "eval_runtime": 2471.8949, "eval_samples_per_second": 6.779, "eval_spbleu": 47.8553, "eval_steps_per_second": 0.678, "eval_ter": 54.3031, "step": 8000 }, { "epoch": 0.33, "learning_rate": 4.670325408214715e-05, "loss": 0.4898, "step": 8500 }, { "epoch": 0.33, "eval_bleu": 34.5081, "eval_chrf++": 60.6054, "eval_gen_len": 22.1483, "eval_loss": 0.9998334646224976, "eval_runtime": 2464.0531, "eval_samples_per_second": 6.801, "eval_spbleu": 48.0651, "eval_steps_per_second": 0.68, "eval_ter": 53.7379, "step": 8500 }, { "epoch": 0.35, "learning_rate": 4.650932785168522e-05, "loss": 0.5011, "step": 9000 }, { "epoch": 0.35, "eval_bleu": 33.9543, "eval_chrf++": 60.3165, "eval_gen_len": 22.2106, "eval_loss": 0.9939271211624146, "eval_runtime": 2455.6708, "eval_samples_per_second": 6.824, "eval_spbleu": 47.6339, "eval_steps_per_second": 0.683, "eval_ter": 54.3159, "step": 9000 }, { "epoch": 0.37, "learning_rate": 4.6315401621223284e-05, "loss": 0.5014, "step": 9500 }, { "epoch": 0.37, "eval_bleu": 35.12, "eval_chrf++": 61.0072, "eval_gen_len": 22.2949, "eval_loss": 0.9882155060768127, "eval_runtime": 2466.3803, "eval_samples_per_second": 6.795, "eval_spbleu": 48.5338, "eval_steps_per_second": 0.68, "eval_ter": 53.6108, "step": 9500 }, { "epoch": 0.39, "learning_rate": 4.612147539076135e-05, "loss": 0.4861, "step": 10000 }, { "epoch": 0.39, "eval_bleu": 34.2002, "eval_chrf++": 60.9191, "eval_gen_len": 22.472, "eval_loss": 0.9832409620285034, "eval_runtime": 2528.4848, "eval_samples_per_second": 6.628, "eval_spbleu": 47.7142, "eval_steps_per_second": 0.663, "eval_ter": 54.9125, "step": 10000 }, { "epoch": 0.41, "learning_rate": 4.5927549160299424e-05, "loss": 0.4777, "step": 10500 }, { "epoch": 0.41, "eval_bleu": 35.0653, "eval_chrf++": 61.3647, "eval_gen_len": 22.1853, "eval_loss": 0.9780123829841614, "eval_runtime": 2478.2202, "eval_samples_per_second": 6.762, "eval_spbleu": 48.6202, "eval_steps_per_second": 0.676, "eval_ter": 53.7728, "step": 10500 }, { "epoch": 0.43, "learning_rate": 4.573362292983749e-05, "loss": 0.4882, "step": 11000 }, { "epoch": 0.43, "eval_bleu": 34.4099, "eval_chrf++": 60.8297, "eval_gen_len": 22.4734, "eval_loss": 0.9755488038063049, "eval_runtime": 2481.0601, "eval_samples_per_second": 6.754, "eval_spbleu": 48.1346, "eval_steps_per_second": 0.676, "eval_ter": 54.8636, "step": 11000 }, { "epoch": 0.45, "learning_rate": 4.5539696699375565e-05, "loss": 0.489, "step": 11500 }, { "epoch": 0.45, "eval_bleu": 35.3644, "eval_chrf++": 61.3208, "eval_gen_len": 22.2351, "eval_loss": 0.9737293720245361, "eval_runtime": 2438.4039, "eval_samples_per_second": 6.873, "eval_spbleu": 49.0035, "eval_steps_per_second": 0.687, "eval_ter": 53.0538, "step": 11500 }, { "epoch": 0.47, "learning_rate": 4.534577046891363e-05, "loss": 0.4778, "step": 12000 }, { "epoch": 0.47, "eval_bleu": 35.2536, "eval_chrf++": 61.1847, "eval_gen_len": 22.3455, "eval_loss": 0.9639460444450378, "eval_runtime": 2456.4058, "eval_samples_per_second": 6.822, "eval_spbleu": 48.8273, "eval_steps_per_second": 0.682, "eval_ter": 53.7647, "step": 12000 }, { "epoch": 0.48, "learning_rate": 4.51518442384517e-05, "loss": 0.4659, "step": 12500 }, { "epoch": 0.48, "eval_bleu": 35.0392, "eval_chrf++": 61.2274, "eval_gen_len": 22.6852, "eval_loss": 0.9616146683692932, "eval_runtime": 2525.3804, "eval_samples_per_second": 6.636, "eval_spbleu": 48.3755, "eval_steps_per_second": 0.664, "eval_ter": 54.1807, "step": 12500 }, { "epoch": 0.5, "learning_rate": 4.4957918007989765e-05, "loss": 0.4882, "step": 13000 }, { "epoch": 0.5, "eval_bleu": 35.2876, "eval_chrf++": 61.5126, "eval_gen_len": 22.4171, "eval_loss": 0.9623438715934753, "eval_runtime": 2448.0204, "eval_samples_per_second": 6.846, "eval_spbleu": 48.9762, "eval_steps_per_second": 0.685, "eval_ter": 53.5118, "step": 13000 }, { "epoch": 0.52, "learning_rate": 4.476399177752783e-05, "loss": 0.4757, "step": 13500 }, { "epoch": 0.52, "eval_bleu": 36.1928, "eval_chrf++": 61.7736, "eval_gen_len": 22.2772, "eval_loss": 0.9600822925567627, "eval_runtime": 2458.1658, "eval_samples_per_second": 6.817, "eval_spbleu": 49.4709, "eval_steps_per_second": 0.682, "eval_ter": 52.9163, "step": 13500 }, { "epoch": 0.54, "learning_rate": 4.45700655470659e-05, "loss": 0.4532, "step": 14000 }, { "epoch": 0.54, "eval_bleu": 35.3757, "eval_chrf++": 61.4145, "eval_gen_len": 22.4656, "eval_loss": 0.9569535255432129, "eval_runtime": 2509.366, "eval_samples_per_second": 6.678, "eval_spbleu": 48.8161, "eval_steps_per_second": 0.668, "eval_ter": 53.6143, "step": 14000 }, { "epoch": 0.56, "learning_rate": 4.4376139316603965e-05, "loss": 0.4624, "step": 14500 }, { "epoch": 0.56, "eval_bleu": 35.5809, "eval_chrf++": 61.5561, "eval_gen_len": 22.6749, "eval_loss": 0.9505798816680908, "eval_runtime": 2539.8539, "eval_samples_per_second": 6.598, "eval_spbleu": 48.7951, "eval_steps_per_second": 0.66, "eval_ter": 53.478, "step": 14500 }, { "epoch": 0.58, "learning_rate": 4.418221308614203e-05, "loss": 0.4731, "step": 15000 }, { "epoch": 0.58, "eval_bleu": 36.0873, "eval_chrf++": 61.7264, "eval_gen_len": 22.5004, "eval_loss": 0.9501732587814331, "eval_runtime": 2492.0241, "eval_samples_per_second": 6.725, "eval_spbleu": 49.4647, "eval_steps_per_second": 0.673, "eval_ter": 52.7939, "step": 15000 }, { "epoch": 0.6, "learning_rate": 4.39882868556801e-05, "loss": 0.4794, "step": 15500 }, { "epoch": 0.6, "eval_bleu": 36.1453, "eval_chrf++": 61.9504, "eval_gen_len": 22.3361, "eval_loss": 0.9433434009552002, "eval_runtime": 2532.9385, "eval_samples_per_second": 6.616, "eval_spbleu": 49.3902, "eval_steps_per_second": 0.662, "eval_ter": 52.9862, "step": 15500 }, { "epoch": 0.62, "learning_rate": 4.3794360625218165e-05, "loss": 0.4616, "step": 16000 }, { "epoch": 0.62, "eval_bleu": 36.4514, "eval_chrf++": 62.2395, "eval_gen_len": 22.4107, "eval_loss": 0.9410406351089478, "eval_runtime": 2518.4187, "eval_samples_per_second": 6.654, "eval_spbleu": 49.7739, "eval_steps_per_second": 0.665, "eval_ter": 52.5317, "step": 16000 }, { "epoch": 0.64, "learning_rate": 4.360043439475623e-05, "loss": 0.4768, "step": 16500 }, { "epoch": 0.64, "eval_bleu": 36.8462, "eval_chrf++": 62.2425, "eval_gen_len": 22.348, "eval_loss": 0.9391294717788696, "eval_runtime": 2464.2252, "eval_samples_per_second": 6.801, "eval_spbleu": 50.2231, "eval_steps_per_second": 0.68, "eval_ter": 52.1938, "step": 16500 }, { "epoch": 0.66, "learning_rate": 4.3406508164294306e-05, "loss": 0.4482, "step": 17000 }, { "epoch": 0.66, "eval_bleu": 36.9137, "eval_chrf++": 62.5127, "eval_gen_len": 22.5448, "eval_loss": 0.9357725381851196, "eval_runtime": 2549.3781, "eval_samples_per_second": 6.573, "eval_spbleu": 49.9168, "eval_steps_per_second": 0.657, "eval_ter": 52.3604, "step": 17000 }, { "epoch": 0.68, "learning_rate": 4.321258193383237e-05, "loss": 0.4648, "step": 17500 }, { "epoch": 0.68, "eval_bleu": 37.1733, "eval_chrf++": 62.7256, "eval_gen_len": 22.4406, "eval_loss": 0.935612678527832, "eval_runtime": 2499.4089, "eval_samples_per_second": 6.705, "eval_spbleu": 50.417, "eval_steps_per_second": 0.671, "eval_ter": 51.7731, "step": 17500 }, { "epoch": 0.7, "learning_rate": 4.301865570337044e-05, "loss": 0.4642, "step": 18000 }, { "epoch": 0.7, "eval_bleu": 37.1839, "eval_chrf++": 62.648, "eval_gen_len": 22.4244, "eval_loss": 0.9331343770027161, "eval_runtime": 2493.1006, "eval_samples_per_second": 6.722, "eval_spbleu": 50.1902, "eval_steps_per_second": 0.672, "eval_ter": 51.702, "step": 18000 }, { "epoch": 0.72, "learning_rate": 4.2824729472908506e-05, "loss": 0.4691, "step": 18500 }, { "epoch": 0.72, "eval_bleu": 37.1915, "eval_chrf++": 62.7489, "eval_gen_len": 22.4675, "eval_loss": 0.9320312142372131, "eval_runtime": 2469.7829, "eval_samples_per_second": 6.785, "eval_spbleu": 50.4819, "eval_steps_per_second": 0.679, "eval_ter": 51.5528, "step": 18500 }, { "epoch": 0.74, "learning_rate": 4.263080324244658e-05, "loss": 0.4676, "step": 19000 }, { "epoch": 0.74, "eval_bleu": 36.4381, "eval_chrf++": 62.2257, "eval_gen_len": 22.7873, "eval_loss": 0.9254695177078247, "eval_runtime": 2555.4331, "eval_samples_per_second": 6.558, "eval_spbleu": 49.2678, "eval_steps_per_second": 0.656, "eval_ter": 52.7193, "step": 19000 }, { "epoch": 0.76, "learning_rate": 4.2436877011984646e-05, "loss": 0.4515, "step": 19500 }, { "epoch": 0.76, "eval_bleu": 37.2335, "eval_chrf++": 62.7349, "eval_gen_len": 22.3555, "eval_loss": 0.9193410277366638, "eval_runtime": 2451.9368, "eval_samples_per_second": 6.835, "eval_spbleu": 50.8378, "eval_steps_per_second": 0.684, "eval_ter": 51.103, "step": 19500 }, { "epoch": 0.78, "learning_rate": 4.224295078152271e-05, "loss": 0.4605, "step": 20000 }, { "epoch": 0.78, "eval_bleu": 37.615, "eval_chrf++": 62.9994, "eval_gen_len": 22.4271, "eval_loss": 0.920886218547821, "eval_runtime": 2460.502, "eval_samples_per_second": 6.811, "eval_spbleu": 50.9187, "eval_steps_per_second": 0.681, "eval_ter": 51.1974, "step": 20000 }, { "epoch": 0.8, "learning_rate": 4.204902455106078e-05, "loss": 0.462, "step": 20500 }, { "epoch": 0.8, "eval_bleu": 37.4618, "eval_chrf++": 62.7306, "eval_gen_len": 22.3868, "eval_loss": 0.9150309562683105, "eval_runtime": 2485.0566, "eval_samples_per_second": 6.744, "eval_spbleu": 50.7521, "eval_steps_per_second": 0.674, "eval_ter": 51.4503, "step": 20500 }, { "epoch": 0.81, "learning_rate": 4.1855098320598846e-05, "loss": 0.4584, "step": 21000 }, { "epoch": 0.81, "eval_bleu": 37.6302, "eval_chrf++": 62.7543, "eval_gen_len": 22.2999, "eval_loss": 0.9146909713745117, "eval_runtime": 2475.6351, "eval_samples_per_second": 6.769, "eval_spbleu": 50.9152, "eval_steps_per_second": 0.677, "eval_ter": 51.2347, "step": 21000 }, { "epoch": 0.83, "learning_rate": 4.166117209013691e-05, "loss": 0.4511, "step": 21500 }, { "epoch": 0.83, "eval_bleu": 37.4586, "eval_chrf++": 62.7716, "eval_gen_len": 22.491, "eval_loss": 0.9128248691558838, "eval_runtime": 2475.0464, "eval_samples_per_second": 6.771, "eval_spbleu": 50.7685, "eval_steps_per_second": 0.677, "eval_ter": 51.1974, "step": 21500 }, { "epoch": 0.85, "learning_rate": 4.146724585967498e-05, "loss": 0.4463, "step": 22000 }, { "epoch": 0.85, "eval_bleu": 37.352, "eval_chrf++": 62.9395, "eval_gen_len": 22.5926, "eval_loss": 0.9129999876022339, "eval_runtime": 2497.6416, "eval_samples_per_second": 6.71, "eval_spbleu": 50.4575, "eval_steps_per_second": 0.671, "eval_ter": 51.5074, "step": 22000 }, { "epoch": 0.87, "learning_rate": 4.1273319629213047e-05, "loss": 0.4442, "step": 22500 }, { "epoch": 0.87, "eval_bleu": 37.2191, "eval_chrf++": 62.6919, "eval_gen_len": 22.3296, "eval_loss": 0.913128137588501, "eval_runtime": 2479.804, "eval_samples_per_second": 6.758, "eval_spbleu": 50.5573, "eval_steps_per_second": 0.676, "eval_ter": 51.8057, "step": 22500 }, { "epoch": 0.89, "learning_rate": 4.107939339875111e-05, "loss": 0.4398, "step": 23000 }, { "epoch": 0.89, "eval_bleu": 37.4159, "eval_chrf++": 62.7847, "eval_gen_len": 22.3621, "eval_loss": 0.9087494611740112, "eval_runtime": 2492.9389, "eval_samples_per_second": 6.722, "eval_spbleu": 50.7329, "eval_steps_per_second": 0.672, "eval_ter": 51.1589, "step": 23000 }, { "epoch": 0.91, "learning_rate": 4.088546716828918e-05, "loss": 0.4601, "step": 23500 }, { "epoch": 0.91, "eval_bleu": 37.6768, "eval_chrf++": 63.0053, "eval_gen_len": 22.2636, "eval_loss": 0.9055464267730713, "eval_runtime": 2494.326, "eval_samples_per_second": 6.718, "eval_spbleu": 50.7562, "eval_steps_per_second": 0.672, "eval_ter": 51.2394, "step": 23500 }, { "epoch": 0.93, "learning_rate": 4.0691540937827254e-05, "loss": 0.4436, "step": 24000 }, { "epoch": 0.93, "eval_bleu": 37.7386, "eval_chrf++": 63.1409, "eval_gen_len": 22.6223, "eval_loss": 0.9039002656936646, "eval_runtime": 2526.9249, "eval_samples_per_second": 6.632, "eval_spbleu": 50.7251, "eval_steps_per_second": 0.663, "eval_ter": 51.3804, "step": 24000 }, { "epoch": 0.95, "learning_rate": 4.049761470736532e-05, "loss": 0.4654, "step": 24500 }, { "epoch": 0.95, "eval_bleu": 38.0304, "eval_chrf++": 63.2108, "eval_gen_len": 22.343, "eval_loss": 0.9021787047386169, "eval_runtime": 2462.9496, "eval_samples_per_second": 6.804, "eval_spbleu": 51.3142, "eval_steps_per_second": 0.68, "eval_ter": 50.4096, "step": 24500 }, { "epoch": 0.97, "learning_rate": 4.030368847690339e-05, "loss": 0.4485, "step": 25000 }, { "epoch": 0.97, "eval_bleu": 38.1296, "eval_chrf++": 63.3962, "eval_gen_len": 22.518, "eval_loss": 0.9031027555465698, "eval_runtime": 2500.9301, "eval_samples_per_second": 6.701, "eval_spbleu": 51.1976, "eval_steps_per_second": 0.67, "eval_ter": 51.0377, "step": 25000 } ], "max_steps": 128915, "num_train_epochs": 5, "total_flos": 2.127904100057088e+17, "trial_name": null, "trial_params": null }