{ "best_metric": 35.2849, "best_model_checkpoint": "large_ox-wn_cod_15ep_eap/checkpoint-38360", "epoch": 15.0, "global_step": 41100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.666666666666667e-05, "loss": 2.1769, "step": 2740 }, { "epoch": 1.0, "eval_gen_len": 11.342940924045202, "eval_loss": 1.905047059059143, "eval_rouge1": 28.7222, "eval_rouge2": 9.1873, "eval_rougeL": 26.6888, "eval_rougeLsum": 26.6937, "eval_runtime": 86.0596, "eval_samples_per_second": 162.469, "eval_steps_per_second": 1.278, "step": 2740 }, { "epoch": 2.0, "learning_rate": 4.3333333333333334e-05, "loss": 1.9408, "step": 5480 }, { "epoch": 2.0, "eval_gen_len": 11.416464025175225, "eval_loss": 1.8151417970657349, "eval_rouge1": 29.8799, "eval_rouge2": 10.2327, "eval_rougeL": 27.7947, "eval_rougeLsum": 27.8044, "eval_runtime": 98.7019, "eval_samples_per_second": 141.659, "eval_steps_per_second": 1.114, "step": 5480 }, { "epoch": 3.0, "learning_rate": 4e-05, "loss": 1.8124, "step": 8220 }, { "epoch": 3.0, "eval_gen_len": 11.531039908453726, "eval_loss": 1.7607892751693726, "eval_rouge1": 30.9845, "eval_rouge2": 10.9982, "eval_rougeL": 28.8059, "eval_rougeLsum": 28.8131, "eval_runtime": 96.5011, "eval_samples_per_second": 144.889, "eval_steps_per_second": 1.14, "step": 8220 }, { "epoch": 4.0, "learning_rate": 3.6666666666666666e-05, "loss": 1.7118, "step": 10960 }, { "epoch": 4.0, "eval_gen_len": 11.703690459161779, "eval_loss": 1.7228699922561646, "eval_rouge1": 31.6943, "eval_rouge2": 11.7412, "eval_rougeL": 29.4967, "eval_rougeLsum": 29.5319, "eval_runtime": 87.7321, "eval_samples_per_second": 159.372, "eval_steps_per_second": 1.254, "step": 10960 }, { "epoch": 5.0, "learning_rate": 3.3333333333333335e-05, "loss": 1.6286, "step": 13700 }, { "epoch": 5.0, "eval_gen_len": 11.77835788871406, "eval_loss": 1.6936795711517334, "eval_rouge1": 32.5839, "eval_rouge2": 12.2431, "eval_rougeL": 30.1799, "eval_rougeLsum": 30.206, "eval_runtime": 84.5028, "eval_samples_per_second": 165.462, "eval_steps_per_second": 1.302, "step": 13700 }, { "epoch": 6.0, "learning_rate": 3e-05, "loss": 1.5597, "step": 16440 }, { "epoch": 6.0, "eval_gen_len": 11.597410956944643, "eval_loss": 1.674757480621338, "eval_rouge1": 32.9915, "eval_rouge2": 12.8514, "eval_rougeL": 30.7016, "eval_rougeLsum": 30.7145, "eval_runtime": 87.802, "eval_samples_per_second": 159.245, "eval_steps_per_second": 1.253, "step": 16440 }, { "epoch": 7.0, "learning_rate": 2.6666666666666667e-05, "loss": 1.4982, "step": 19180 }, { "epoch": 7.0, "eval_gen_len": 11.358031755113718, "eval_loss": 1.6578471660614014, "eval_rouge1": 33.2157, "eval_rouge2": 13.1389, "eval_rougeL": 30.9428, "eval_rougeLsum": 30.9519, "eval_runtime": 89.406, "eval_samples_per_second": 156.388, "eval_steps_per_second": 1.23, "step": 19180 }, { "epoch": 8.0, "learning_rate": 2.3333333333333336e-05, "loss": 1.4468, "step": 21920 }, { "epoch": 8.0, "eval_gen_len": 11.572378772707767, "eval_loss": 1.6473166942596436, "eval_rouge1": 33.6146, "eval_rouge2": 13.5922, "eval_rougeL": 31.3001, "eval_rougeLsum": 31.3235, "eval_runtime": 98.6248, "eval_samples_per_second": 141.77, "eval_steps_per_second": 1.115, "step": 21920 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 1.4022, "step": 24660 }, { "epoch": 9.0, "eval_gen_len": 11.738878558146189, "eval_loss": 1.6383947134017944, "eval_rouge1": 34.1711, "eval_rouge2": 14.1117, "eval_rougeL": 31.7951, "eval_rougeLsum": 31.8066, "eval_runtime": 89.5372, "eval_samples_per_second": 156.159, "eval_steps_per_second": 1.229, "step": 24660 }, { "epoch": 10.0, "learning_rate": 1.6666666666666667e-05, "loss": 1.364, "step": 27400 }, { "epoch": 10.0, "eval_gen_len": 11.665856100700902, "eval_loss": 1.6336920261383057, "eval_rouge1": 34.5489, "eval_rouge2": 14.5012, "eval_rougeL": 32.1329, "eval_rougeLsum": 32.1446, "eval_runtime": 103.7766, "eval_samples_per_second": 134.732, "eval_steps_per_second": 1.06, "step": 27400 }, { "epoch": 11.0, "learning_rate": 1.3333333333333333e-05, "loss": 1.3321, "step": 30140 }, { "epoch": 11.0, "eval_gen_len": 11.800314690316121, "eval_loss": 1.6291483640670776, "eval_rouge1": 34.7133, "eval_rouge2": 14.7297, "eval_rougeL": 32.3042, "eval_rougeLsum": 32.314, "eval_runtime": 91.3961, "eval_samples_per_second": 152.982, "eval_steps_per_second": 1.204, "step": 30140 }, { "epoch": 12.0, "learning_rate": 1e-05, "loss": 1.3054, "step": 32880 }, { "epoch": 12.0, "eval_gen_len": 11.761908167644114, "eval_loss": 1.6267131567001343, "eval_rouge1": 34.9411, "eval_rouge2": 15.0282, "eval_rougeL": 32.5335, "eval_rougeLsum": 32.5451, "eval_runtime": 98.5092, "eval_samples_per_second": 141.936, "eval_steps_per_second": 1.117, "step": 32880 }, { "epoch": 13.0, "learning_rate": 6.666666666666667e-06, "loss": 1.2845, "step": 35620 }, { "epoch": 13.0, "eval_gen_len": 11.831712201401801, "eval_loss": 1.626239538192749, "eval_rouge1": 35.1648, "eval_rouge2": 15.2154, "eval_rougeL": 32.7387, "eval_rougeLsum": 32.742, "eval_runtime": 85.528, "eval_samples_per_second": 163.479, "eval_steps_per_second": 1.286, "step": 35620 }, { "epoch": 14.0, "learning_rate": 3.3333333333333333e-06, "loss": 1.2699, "step": 38360 }, { "epoch": 14.0, "eval_gen_len": 11.816764411386067, "eval_loss": 1.6257190704345703, "eval_rouge1": 35.2849, "eval_rouge2": 15.3109, "eval_rougeL": 32.8508, "eval_rougeLsum": 32.853, "eval_runtime": 84.6116, "eval_samples_per_second": 165.249, "eval_steps_per_second": 1.3, "step": 38360 }, { "epoch": 15.0, "learning_rate": 0.0, "loss": 1.2595, "step": 41100 }, { "epoch": 15.0, "eval_gen_len": 11.797096266628522, "eval_loss": 1.6273423433303833, "eval_rouge1": 35.2224, "eval_rouge2": 15.2781, "eval_rougeL": 32.7718, "eval_rougeLsum": 32.7826, "eval_runtime": 95.1523, "eval_samples_per_second": 146.943, "eval_steps_per_second": 1.156, "step": 41100 }, { "epoch": 15.0, "step": 41100, "total_flos": 9.049973435337277e+17, "train_loss": 1.5328590292826185, "train_runtime": 18417.6233, "train_samples_per_second": 142.797, "train_steps_per_second": 2.232 } ], "max_steps": 41100, "num_train_epochs": 15, "total_flos": 9.049973435337277e+17, "trial_name": null, "trial_params": null }