{ "best_metric": null, "best_model_checkpoint": null, "epoch": 13.88888888888889, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 32.3172, "eval_gen_len": 18.7687, "eval_loss": 0.4578506052494049, "eval_meteor": 0.5435, "eval_runtime": 9.7073, "eval_samples_per_second": 15.143, "eval_steps_per_second": 0.515, "step": 36 }, { "epoch": 2.0, "eval_bleu": 39.1064, "eval_gen_len": 18.8776, "eval_loss": 0.2628231346607208, "eval_meteor": 0.6002, "eval_runtime": 4.5901, "eval_samples_per_second": 32.026, "eval_steps_per_second": 1.089, "step": 72 }, { "epoch": 3.0, "eval_bleu": 41.6145, "eval_gen_len": 18.9524, "eval_loss": 0.1903754472732544, "eval_meteor": 0.6203, "eval_runtime": 4.7069, "eval_samples_per_second": 31.231, "eval_steps_per_second": 1.062, "step": 108 }, { "epoch": 4.0, "eval_bleu": 42.3964, "eval_gen_len": 18.9592, "eval_loss": 0.15086665749549866, "eval_meteor": 0.6265, "eval_runtime": 4.8165, "eval_samples_per_second": 30.52, "eval_steps_per_second": 1.038, "step": 144 }, { "epoch": 5.0, "eval_bleu": 44.1755, "eval_gen_len": 18.9524, "eval_loss": 0.12055599689483643, "eval_meteor": 0.6429, "eval_runtime": 4.7165, "eval_samples_per_second": 31.167, "eval_steps_per_second": 1.06, "step": 180 }, { "epoch": 6.0, "eval_bleu": 44.9309, "eval_gen_len": 18.966, "eval_loss": 0.09637363255023956, "eval_meteor": 0.6459, "eval_runtime": 4.8039, "eval_samples_per_second": 30.6, "eval_steps_per_second": 1.041, "step": 216 }, { "epoch": 7.0, "eval_bleu": 45.7689, "eval_gen_len": 18.966, "eval_loss": 0.08098377287387848, "eval_meteor": 0.653, "eval_runtime": 4.7281, "eval_samples_per_second": 31.091, "eval_steps_per_second": 1.058, "step": 252 }, { "epoch": 8.0, "eval_bleu": 45.9332, "eval_gen_len": 18.966, "eval_loss": 0.06647928804159164, "eval_meteor": 0.6519, "eval_runtime": 4.8185, "eval_samples_per_second": 30.507, "eval_steps_per_second": 1.038, "step": 288 }, { "epoch": 9.0, "eval_bleu": 46.4058, "eval_gen_len": 18.966, "eval_loss": 0.05435887724161148, "eval_meteor": 0.6531, "eval_runtime": 4.7173, "eval_samples_per_second": 31.162, "eval_steps_per_second": 1.06, "step": 324 }, { "epoch": 10.0, "eval_bleu": 46.8506, "eval_gen_len": 18.966, "eval_loss": 0.04958011582493782, "eval_meteor": 0.6616, "eval_runtime": 4.7859, "eval_samples_per_second": 30.716, "eval_steps_per_second": 1.045, "step": 360 }, { "epoch": 11.0, "eval_bleu": 46.9661, "eval_gen_len": 18.9524, "eval_loss": 0.03974379226565361, "eval_meteor": 0.6633, "eval_runtime": 4.8008, "eval_samples_per_second": 30.62, "eval_steps_per_second": 1.041, "step": 396 }, { "epoch": 12.0, "eval_bleu": 47.1602, "eval_gen_len": 18.9524, "eval_loss": 0.03513666242361069, "eval_meteor": 0.6674, "eval_runtime": 4.7634, "eval_samples_per_second": 30.861, "eval_steps_per_second": 1.05, "step": 432 }, { "epoch": 13.0, "eval_bleu": 47.3154, "eval_gen_len": 18.966, "eval_loss": 0.03375198319554329, "eval_meteor": 0.6668, "eval_runtime": 4.8164, "eval_samples_per_second": 30.52, "eval_steps_per_second": 1.038, "step": 468 }, { "epoch": 13.89, "learning_rate": 8.888888888888888e-06, "loss": 0.327, "step": 500 } ], "logging_steps": 500, "max_steps": 900, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "total_flos": 2008343123066880.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }