{ "best_metric": null, "best_model_checkpoint": null, "epoch": 41.666666666666664, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 31.6092, "eval_gen_len": 18.7891, "eval_loss": 0.4555904269218445, "eval_meteor": 0.5392, "eval_runtime": 9.3254, "eval_samples_per_second": 15.763, "eval_steps_per_second": 0.536, "step": 36 }, { "epoch": 2.0, "eval_bleu": 39.2195, "eval_gen_len": 18.9592, "eval_loss": 0.2636318802833557, "eval_meteor": 0.596, "eval_runtime": 4.5819, "eval_samples_per_second": 32.083, "eval_steps_per_second": 1.091, "step": 72 }, { "epoch": 3.0, "eval_bleu": 41.6894, "eval_gen_len": 18.9456, "eval_loss": 0.19347849488258362, "eval_meteor": 0.6148, "eval_runtime": 4.5897, "eval_samples_per_second": 32.028, "eval_steps_per_second": 1.089, "step": 108 }, { "epoch": 4.0, "eval_bleu": 42.741, "eval_gen_len": 18.9524, "eval_loss": 0.14780716598033905, "eval_meteor": 0.629, "eval_runtime": 4.5508, "eval_samples_per_second": 32.302, "eval_steps_per_second": 1.099, "step": 144 }, { "epoch": 5.0, "eval_bleu": 44.9412, "eval_gen_len": 18.9592, "eval_loss": 0.10975220054388046, "eval_meteor": 0.6429, "eval_runtime": 4.5865, "eval_samples_per_second": 32.05, "eval_steps_per_second": 1.09, "step": 180 }, { "epoch": 6.0, "eval_bleu": 44.7737, "eval_gen_len": 18.9592, "eval_loss": 0.0995095744729042, "eval_meteor": 0.6438, "eval_runtime": 4.5237, "eval_samples_per_second": 32.495, "eval_steps_per_second": 1.105, "step": 216 }, { "epoch": 7.0, "eval_bleu": 46.3121, "eval_gen_len": 18.966, "eval_loss": 0.07827065885066986, "eval_meteor": 0.6584, "eval_runtime": 4.5425, "eval_samples_per_second": 32.361, "eval_steps_per_second": 1.101, "step": 252 }, { "epoch": 8.0, "eval_bleu": 46.0999, "eval_gen_len": 18.966, "eval_loss": 0.06610731780529022, "eval_meteor": 0.6517, "eval_runtime": 4.517, "eval_samples_per_second": 32.544, "eval_steps_per_second": 1.107, "step": 288 }, { "epoch": 9.0, "eval_bleu": 46.8293, "eval_gen_len": 18.9592, "eval_loss": 0.05300338938832283, "eval_meteor": 0.6635, "eval_runtime": 4.5646, "eval_samples_per_second": 32.204, "eval_steps_per_second": 1.095, "step": 324 }, { "epoch": 10.0, "eval_bleu": 46.8475, "eval_gen_len": 18.9592, "eval_loss": 0.04656795412302017, "eval_meteor": 0.6619, "eval_runtime": 4.5539, "eval_samples_per_second": 32.28, "eval_steps_per_second": 1.098, "step": 360 }, { "epoch": 11.0, "eval_bleu": 47.1376, "eval_gen_len": 18.966, "eval_loss": 0.038304273039102554, "eval_meteor": 0.6641, "eval_runtime": 4.5164, "eval_samples_per_second": 32.548, "eval_steps_per_second": 1.107, "step": 396 }, { "epoch": 12.0, "eval_bleu": 47.323, "eval_gen_len": 18.9796, "eval_loss": 0.03775802627205849, "eval_meteor": 0.6633, "eval_runtime": 4.5165, "eval_samples_per_second": 32.547, "eval_steps_per_second": 1.107, "step": 432 }, { "epoch": 13.0, "eval_bleu": 47.3289, "eval_gen_len": 18.9524, "eval_loss": 0.03001909889280796, "eval_meteor": 0.666, "eval_runtime": 4.5251, "eval_samples_per_second": 32.486, "eval_steps_per_second": 1.105, "step": 468 }, { "epoch": 13.89, "learning_rate": 1.4444444444444446e-05, "loss": 0.3225, "step": 500 }, { "epoch": 14.0, "eval_bleu": 47.5584, "eval_gen_len": 18.9796, "eval_loss": 0.025304924696683884, "eval_meteor": 0.6656, "eval_runtime": 4.5255, "eval_samples_per_second": 32.483, "eval_steps_per_second": 1.105, "step": 504 }, { "epoch": 15.0, "eval_bleu": 47.811, "eval_gen_len": 18.9796, "eval_loss": 0.020229607820510864, "eval_meteor": 0.667, "eval_runtime": 4.5539, "eval_samples_per_second": 32.28, "eval_steps_per_second": 1.098, "step": 540 }, { "epoch": 16.0, "eval_bleu": 47.4249, "eval_gen_len": 18.9524, "eval_loss": 0.01784553937613964, "eval_meteor": 0.6666, "eval_runtime": 4.5361, "eval_samples_per_second": 32.407, "eval_steps_per_second": 1.102, "step": 576 }, { "epoch": 17.0, "eval_bleu": 47.8092, "eval_gen_len": 18.9796, "eval_loss": 0.014756113290786743, "eval_meteor": 0.6668, "eval_runtime": 4.546, "eval_samples_per_second": 32.336, "eval_steps_per_second": 1.1, "step": 612 }, { "epoch": 18.0, "eval_bleu": 47.9094, "eval_gen_len": 18.966, "eval_loss": 0.012134283781051636, "eval_meteor": 0.6684, "eval_runtime": 4.5836, "eval_samples_per_second": 32.071, "eval_steps_per_second": 1.091, "step": 648 }, { "epoch": 19.0, "eval_bleu": 47.9966, "eval_gen_len": 18.9796, "eval_loss": 0.011289956048130989, "eval_meteor": 0.6676, "eval_runtime": 4.5433, "eval_samples_per_second": 32.355, "eval_steps_per_second": 1.101, "step": 684 }, { "epoch": 20.0, "eval_bleu": 47.7647, "eval_gen_len": 18.966, "eval_loss": 0.009249957278370857, "eval_meteor": 0.6692, "eval_runtime": 4.5767, "eval_samples_per_second": 32.119, "eval_steps_per_second": 1.092, "step": 720 }, { "epoch": 21.0, "eval_bleu": 47.7044, "eval_gen_len": 18.966, "eval_loss": 0.010435191914439201, "eval_meteor": 0.6681, "eval_runtime": 4.5541, "eval_samples_per_second": 32.279, "eval_steps_per_second": 1.098, "step": 756 }, { "epoch": 22.0, "eval_bleu": 47.8202, "eval_gen_len": 18.966, "eval_loss": 0.0071021514013409615, "eval_meteor": 0.6691, "eval_runtime": 4.5533, "eval_samples_per_second": 32.284, "eval_steps_per_second": 1.098, "step": 792 }, { "epoch": 23.0, "eval_bleu": 47.7776, "eval_gen_len": 18.966, "eval_loss": 0.006448396481573582, "eval_meteor": 0.6691, "eval_runtime": 4.5473, "eval_samples_per_second": 32.327, "eval_steps_per_second": 1.1, "step": 828 }, { "epoch": 24.0, "eval_bleu": 47.7776, "eval_gen_len": 18.966, "eval_loss": 0.005185275804251432, "eval_meteor": 0.6691, "eval_runtime": 4.5157, "eval_samples_per_second": 32.553, "eval_steps_per_second": 1.107, "step": 864 }, { "epoch": 25.0, "eval_bleu": 47.8964, "eval_gen_len": 18.966, "eval_loss": 0.00483354926109314, "eval_meteor": 0.669, "eval_runtime": 4.5248, "eval_samples_per_second": 32.488, "eval_steps_per_second": 1.105, "step": 900 }, { "epoch": 26.0, "eval_bleu": 47.8964, "eval_gen_len": 18.966, "eval_loss": 0.007188287563621998, "eval_meteor": 0.6689, "eval_runtime": 4.5497, "eval_samples_per_second": 32.309, "eval_steps_per_second": 1.099, "step": 936 }, { "epoch": 27.0, "eval_bleu": 47.7776, "eval_gen_len": 18.966, "eval_loss": 0.0029953974299132824, "eval_meteor": 0.6691, "eval_runtime": 4.5837, "eval_samples_per_second": 32.07, "eval_steps_per_second": 1.091, "step": 972 }, { "epoch": 27.78, "learning_rate": 8.888888888888888e-06, "loss": 0.0406, "step": 1000 }, { "epoch": 28.0, "eval_bleu": 47.7776, "eval_gen_len": 18.966, "eval_loss": 0.003659368259832263, "eval_meteor": 0.6691, "eval_runtime": 4.5366, "eval_samples_per_second": 32.403, "eval_steps_per_second": 1.102, "step": 1008 }, { "epoch": 29.0, "eval_bleu": 47.8202, "eval_gen_len": 18.966, "eval_loss": 0.002260121051222086, "eval_meteor": 0.6691, "eval_runtime": 4.5255, "eval_samples_per_second": 32.483, "eval_steps_per_second": 1.105, "step": 1044 }, { "epoch": 30.0, "eval_bleu": 47.8768, "eval_gen_len": 18.966, "eval_loss": 0.004143106751143932, "eval_meteor": 0.6693, "eval_runtime": 4.5546, "eval_samples_per_second": 32.275, "eval_steps_per_second": 1.098, "step": 1080 }, { "epoch": 31.0, "eval_bleu": 47.8964, "eval_gen_len": 18.966, "eval_loss": 0.003928070422261953, "eval_meteor": 0.669, "eval_runtime": 4.5798, "eval_samples_per_second": 32.097, "eval_steps_per_second": 1.092, "step": 1116 }, { "epoch": 32.0, "eval_bleu": 47.8964, "eval_gen_len": 18.966, "eval_loss": 0.0019372537499293685, "eval_meteor": 0.669, "eval_runtime": 4.5224, "eval_samples_per_second": 32.505, "eval_steps_per_second": 1.106, "step": 1152 }, { "epoch": 33.0, "eval_bleu": 47.8964, "eval_gen_len": 18.966, "eval_loss": 0.0019416833529248834, "eval_meteor": 0.669, "eval_runtime": 4.5629, "eval_samples_per_second": 32.216, "eval_steps_per_second": 1.096, "step": 1188 }, { "epoch": 34.0, "eval_bleu": 47.82, "eval_gen_len": 18.966, "eval_loss": 0.0017969176406040788, "eval_meteor": 0.669, "eval_runtime": 4.5036, "eval_samples_per_second": 32.64, "eval_steps_per_second": 1.11, "step": 1224 }, { "epoch": 35.0, "eval_bleu": 48.0042, "eval_gen_len": 18.966, "eval_loss": 0.0013237865641713142, "eval_meteor": 0.6701, "eval_runtime": 4.545, "eval_samples_per_second": 32.343, "eval_steps_per_second": 1.1, "step": 1260 }, { "epoch": 36.0, "eval_bleu": 47.8964, "eval_gen_len": 18.966, "eval_loss": 0.001009355066344142, "eval_meteor": 0.669, "eval_runtime": 4.5054, "eval_samples_per_second": 32.628, "eval_steps_per_second": 1.11, "step": 1296 }, { "epoch": 37.0, "eval_bleu": 47.8964, "eval_gen_len": 18.966, "eval_loss": 0.0010085658868774772, "eval_meteor": 0.669, "eval_runtime": 4.512, "eval_samples_per_second": 32.58, "eval_steps_per_second": 1.108, "step": 1332 }, { "epoch": 38.0, "eval_bleu": 47.9279, "eval_gen_len": 18.966, "eval_loss": 0.0019252120982855558, "eval_meteor": 0.67, "eval_runtime": 4.5663, "eval_samples_per_second": 32.192, "eval_steps_per_second": 1.095, "step": 1368 }, { "epoch": 39.0, "eval_bleu": 48.0042, "eval_gen_len": 18.966, "eval_loss": 0.0006166099337860942, "eval_meteor": 0.6701, "eval_runtime": 4.5135, "eval_samples_per_second": 32.569, "eval_steps_per_second": 1.108, "step": 1404 }, { "epoch": 40.0, "eval_bleu": 48.0042, "eval_gen_len": 18.966, "eval_loss": 0.0006395149976015091, "eval_meteor": 0.6701, "eval_runtime": 4.5938, "eval_samples_per_second": 31.999, "eval_steps_per_second": 1.088, "step": 1440 }, { "epoch": 41.0, "eval_bleu": 48.0042, "eval_gen_len": 18.966, "eval_loss": 0.0003768605238292366, "eval_meteor": 0.6701, "eval_runtime": 4.5485, "eval_samples_per_second": 32.318, "eval_steps_per_second": 1.099, "step": 1476 }, { "epoch": 41.67, "learning_rate": 3.3333333333333333e-06, "loss": 0.0166, "step": 1500 } ], "logging_steps": 500, "max_steps": 1800, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 5983929469071360.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }