{ "best_metric": 1.1539206504821777, "best_model_checkpoint": "./jako_mbartLarge_6p_run1/checkpoint-4000", "epoch": 3.8396928245740343, "eval_steps": 1000, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 4.951290793960059e-05, "loss": 1.8861, "step": 500 }, { "epoch": 0.48, "learning_rate": 4.829517778860205e-05, "loss": 1.4641, "step": 1000 }, { "epoch": 0.48, "eval_bleu": 21.6162, "eval_gen_len": 19.4434, "eval_loss": 1.3276299238204956, "eval_runtime": 299.0357, "eval_samples_per_second": 13.931, "eval_steps_per_second": 0.873, "step": 1000 }, { "epoch": 0.72, "learning_rate": 4.707744763760351e-05, "loss": 1.3282, "step": 1500 }, { "epoch": 0.96, "learning_rate": 4.585971748660497e-05, "loss": 1.2615, "step": 2000 }, { "epoch": 0.96, "eval_bleu": 24.346, "eval_gen_len": 19.4734, "eval_loss": 1.186624526977539, "eval_runtime": 297.0522, "eval_samples_per_second": 14.024, "eval_steps_per_second": 0.879, "step": 2000 }, { "epoch": 1.2, "learning_rate": 4.4641987335606436e-05, "loss": 1.0805, "step": 2500 }, { "epoch": 1.44, "learning_rate": 4.342425718460789e-05, "loss": 0.9103, "step": 3000 }, { "epoch": 1.44, "eval_bleu": 25.4249, "eval_gen_len": 19.0086, "eval_loss": 1.1637648344039917, "eval_runtime": 293.4921, "eval_samples_per_second": 14.195, "eval_steps_per_second": 0.889, "step": 3000 }, { "epoch": 1.68, "learning_rate": 4.2206527033609356e-05, "loss": 0.8534, "step": 3500 }, { "epoch": 1.92, "learning_rate": 4.0988796882610817e-05, "loss": 0.8285, "step": 4000 }, { "epoch": 1.92, "eval_bleu": 26.2658, "eval_gen_len": 19.3961, "eval_loss": 1.1539206504821777, "eval_runtime": 298.2089, "eval_samples_per_second": 13.97, "eval_steps_per_second": 0.875, "step": 4000 }, { "epoch": 2.16, "learning_rate": 3.977106673161228e-05, "loss": 0.7521, "step": 4500 }, { "epoch": 2.4, "learning_rate": 3.855333658061374e-05, "loss": 0.5977, "step": 5000 }, { "epoch": 2.4, "eval_bleu": 25.5651, "eval_gen_len": 19.6248, "eval_loss": 1.1977771520614624, "eval_runtime": 299.5483, "eval_samples_per_second": 13.908, "eval_steps_per_second": 0.871, "step": 5000 }, { "epoch": 2.64, "learning_rate": 3.73356064296152e-05, "loss": 0.5686, "step": 5500 }, { "epoch": 2.88, "learning_rate": 3.611787627861666e-05, "loss": 0.5423, "step": 6000 }, { "epoch": 2.88, "eval_bleu": 26.8441, "eval_gen_len": 19.1349, "eval_loss": 1.1830259561538696, "eval_runtime": 285.8007, "eval_samples_per_second": 14.577, "eval_steps_per_second": 0.913, "step": 6000 }, { "epoch": 3.12, "learning_rate": 3.4900146127618125e-05, "loss": 0.5099, "step": 6500 }, { "epoch": 3.36, "learning_rate": 3.368241597661958e-05, "loss": 0.3816, "step": 7000 }, { "epoch": 3.36, "eval_bleu": 26.1301, "eval_gen_len": 19.1207, "eval_loss": 1.266960620880127, "eval_runtime": 292.1624, "eval_samples_per_second": 14.259, "eval_steps_per_second": 0.893, "step": 7000 }, { "epoch": 3.6, "learning_rate": 3.2464685825621045e-05, "loss": 0.3637, "step": 7500 }, { "epoch": 3.84, "learning_rate": 3.1246955674622506e-05, "loss": 0.3412, "step": 8000 }, { "epoch": 3.84, "eval_bleu": 26.7783, "eval_gen_len": 19.2417, "eval_loss": 1.2869776487350464, "eval_runtime": 291.2259, "eval_samples_per_second": 14.305, "eval_steps_per_second": 0.896, "step": 8000 }, { "epoch": 3.84, "step": 8000, "total_flos": 2.7749663225623347e+17, "train_loss": 0.8543571968078614, "train_runtime": 11541.5643, "train_samples_per_second": 28.881, "train_steps_per_second": 1.805 } ], "logging_steps": 500, "max_steps": 20830, "num_train_epochs": 10, "save_steps": 1000, "total_flos": 2.7749663225623347e+17, "trial_name": null, "trial_params": null }