{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.776824034334766, "eval_steps": 500, "global_step": 720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 0.0001, "loss": 3.6024, "step": 36 }, { "epoch": 0.99, "eval_bleu": 4.4454, "eval_bp": 0.6832, "eval_counts_1": 5645, "eval_counts_2": 1343, "eval_counts_3": 424, "eval_counts_4": 109, "eval_exact_match": 0.0005, "eval_f1": 0.2236, "eval_gen_len": 11.6338, "eval_loss": 2.468198776245117, "eval_precisions_1": 36.6844, "eval_precisions_2": 10.1866, "eval_precisions_3": 3.8616, "eval_precisions_4": 1.242, "eval_ref_len": 21250, "eval_rouge1": 0.2285, "eval_rouge2": 0.0824, "eval_rougeL": 0.2192, "eval_rougeLsum": 0.2188, "eval_runtime": 813.9917, "eval_samples_per_second": 2.708, "eval_steps_per_second": 0.677, "eval_sys_len": 15388, "eval_totals_1": 15388, "eval_totals_2": 13184, "eval_totals_3": 10980, "eval_totals_4": 8776, "step": 36 }, { "epoch": 1.98, "learning_rate": 0.0001, "loss": 2.9671, "step": 72 }, { "epoch": 1.98, "eval_bleu": 5.7163, "eval_bp": 0.7259, "eval_counts_1": 5988, "eval_counts_2": 1562, "eval_counts_3": 569, "eval_counts_4": 179, "eval_exact_match": 0.0018, "eval_f1": 0.2401, "eval_gen_len": 12.314, "eval_loss": 2.244511842727661, "eval_precisions_1": 37.2064, "eval_precisions_2": 11.2455, "eval_precisions_3": 4.8691, "eval_precisions_4": 1.8878, "eval_ref_len": 21250, "eval_rouge1": 0.2465, "eval_rouge2": 0.0971, "eval_rougeL": 0.2371, "eval_rougeLsum": 0.2371, "eval_runtime": 802.4783, "eval_samples_per_second": 2.746, "eval_steps_per_second": 0.687, "eval_sys_len": 16094, "eval_totals_1": 16094, "eval_totals_2": 13890, "eval_totals_3": 11686, "eval_totals_4": 9482, "step": 72 }, { "epoch": 2.99, "learning_rate": 0.0001, "loss": 2.6324, "step": 109 }, { "epoch": 2.99, "eval_bleu": 6.9028, "eval_bp": 0.7887, "eval_counts_1": 6539, "eval_counts_2": 1846, "eval_counts_3": 702, "eval_counts_4": 240, "eval_exact_match": 0.0027, "eval_f1": 0.2663, "eval_gen_len": 13.2319, "eval_loss": 2.122749090194702, "eval_precisions_1": 38.0772, "eval_precisions_2": 12.3322, "eval_precisions_3": 5.4994, "eval_precisions_4": 2.2725, "eval_ref_len": 21250, "eval_rouge1": 0.2729, "eval_rouge2": 0.1154, "eval_rougeL": 0.2601, "eval_rougeLsum": 0.2604, "eval_runtime": 822.9261, "eval_samples_per_second": 2.678, "eval_steps_per_second": 0.67, "eval_sys_len": 17173, "eval_totals_1": 17173, "eval_totals_2": 14969, "eval_totals_3": 12765, "eval_totals_4": 10561, "step": 109 }, { "epoch": 3.98, "learning_rate": 0.0001, "loss": 2.5557, "step": 145 }, { "epoch": 3.98, "eval_bleu": 7.3331, "eval_bp": 0.7179, "eval_counts_1": 6491, "eval_counts_2": 1923, "eval_counts_3": 752, "eval_counts_4": 275, "eval_exact_match": 0.0059, "eval_f1": 0.2729, "eval_gen_len": 12.0962, "eval_loss": 2.035691022872925, "eval_precisions_1": 40.6679, "eval_precisions_2": 13.9783, "eval_precisions_3": 6.5091, "eval_precisions_4": 2.9415, "eval_ref_len": 21250, "eval_rouge1": 0.2783, "eval_rouge2": 0.1214, "eval_rougeL": 0.2676, "eval_rougeLsum": 0.2678, "eval_runtime": 786.7967, "eval_samples_per_second": 2.801, "eval_steps_per_second": 0.7, "eval_sys_len": 15961, "eval_totals_1": 15961, "eval_totals_2": 13757, "eval_totals_3": 11553, "eval_totals_4": 9349, "step": 145 }, { "epoch": 5.0, "learning_rate": 0.0001, "loss": 2.3785, "step": 182 }, { "epoch": 5.0, "eval_bleu": 8.2007, "eval_bp": 0.7463, "eval_counts_1": 6808, "eval_counts_2": 2113, "eval_counts_3": 855, "eval_counts_4": 328, "eval_exact_match": 0.0064, "eval_f1": 0.2892, "eval_gen_len": 12.6819, "eval_loss": 1.9824347496032715, "eval_precisions_1": 41.4137, "eval_precisions_2": 14.8437, "eval_precisions_3": 7.1066, "eval_precisions_4": 3.3377, "eval_ref_len": 21250, "eval_rouge1": 0.2948, "eval_rouge2": 0.1326, "eval_rougeL": 0.2825, "eval_rougeLsum": 0.2825, "eval_runtime": 806.3535, "eval_samples_per_second": 2.733, "eval_steps_per_second": 0.683, "eval_sys_len": 16439, "eval_totals_1": 16439, "eval_totals_2": 14235, "eval_totals_3": 12031, "eval_totals_4": 9827, "step": 182 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 2.3396, "step": 218 }, { "epoch": 5.99, "eval_bleu": 8.639, "eval_bp": 0.7702, "eval_counts_1": 7033, "eval_counts_2": 2194, "eval_counts_3": 886, "eval_counts_4": 364, "eval_exact_match": 0.0086, "eval_f1": 0.3, "eval_gen_len": 13.0254, "eval_loss": 1.9448895454406738, "eval_precisions_1": 41.7364, "eval_precisions_2": 14.9792, "eval_precisions_3": 7.1205, "eval_precisions_4": 3.555, "eval_ref_len": 21250, "eval_rouge1": 0.3044, "eval_rouge2": 0.1373, "eval_rougeL": 0.292, "eval_rougeLsum": 0.2922, "eval_runtime": 473.2306, "eval_samples_per_second": 4.657, "eval_steps_per_second": 1.164, "eval_sys_len": 16851, "eval_totals_1": 16851, "eval_totals_2": 14647, "eval_totals_3": 12443, "eval_totals_4": 10239, "step": 218 }, { "epoch": 6.98, "learning_rate": 0.0001, "loss": 2.2557, "step": 254 }, { "epoch": 6.98, "eval_bleu": 9.049, "eval_bp": 0.7515, "eval_counts_1": 7167, "eval_counts_2": 2285, "eval_counts_3": 939, "eval_counts_4": 389, "eval_exact_match": 0.0095, "eval_f1": 0.3119, "eval_gen_len": 12.7119, "eval_loss": 1.8937886953353882, "eval_precisions_1": 43.3602, "eval_precisions_2": 15.9511, "eval_precisions_3": 7.7469, "eval_precisions_4": 3.9226, "eval_ref_len": 21250, "eval_rouge1": 0.3166, "eval_rouge2": 0.1428, "eval_rougeL": 0.3043, "eval_rougeLsum": 0.3046, "eval_runtime": 453.3958, "eval_samples_per_second": 4.861, "eval_steps_per_second": 1.215, "eval_sys_len": 16529, "eval_totals_1": 16529, "eval_totals_2": 14325, "eval_totals_3": 12121, "eval_totals_4": 9917, "step": 254 }, { "epoch": 7.99, "learning_rate": 0.0001, "loss": 2.1168, "step": 291 }, { "epoch": 7.99, "eval_bleu": 9.6447, "eval_bp": 0.7708, "eval_counts_1": 7347, "eval_counts_2": 2425, "eval_counts_3": 1021, "eval_counts_4": 425, "eval_exact_match": 0.0104, "eval_f1": 0.3211, "eval_gen_len": 12.9374, "eval_loss": 1.857459306716919, "eval_precisions_1": 43.5765, "eval_precisions_2": 16.5461, "eval_precisions_3": 8.1995, "eval_precisions_4": 4.1472, "eval_ref_len": 21250, "eval_rouge1": 0.3258, "eval_rouge2": 0.1505, "eval_rougeL": 0.3137, "eval_rougeLsum": 0.3142, "eval_runtime": 457.8255, "eval_samples_per_second": 4.814, "eval_steps_per_second": 1.204, "eval_sys_len": 16860, "eval_totals_1": 16860, "eval_totals_2": 14656, "eval_totals_3": 12452, "eval_totals_4": 10248, "step": 291 }, { "epoch": 8.98, "learning_rate": 0.0001, "loss": 2.1105, "step": 327 }, { "epoch": 8.98, "eval_bleu": 9.9436, "eval_bp": 0.7807, "eval_counts_1": 7460, "eval_counts_2": 2461, "eval_counts_3": 1061, "eval_counts_4": 449, "eval_exact_match": 0.0095, "eval_f1": 0.3267, "eval_gen_len": 13.1828, "eval_loss": 1.8283559083938599, "eval_precisions_1": 43.7948, "eval_precisions_2": 16.5947, "eval_precisions_3": 8.4033, "eval_precisions_4": 4.3082, "eval_ref_len": 21250, "eval_rouge1": 0.3317, "eval_rouge2": 0.1521, "eval_rougeL": 0.3187, "eval_rougeLsum": 0.3191, "eval_runtime": 464.6, "eval_samples_per_second": 4.744, "eval_steps_per_second": 1.186, "eval_sys_len": 17034, "eval_totals_1": 17034, "eval_totals_2": 14830, "eval_totals_3": 12626, "eval_totals_4": 10422, "step": 327 }, { "epoch": 10.0, "learning_rate": 0.0001, "loss": 1.9913, "step": 364 }, { "epoch": 10.0, "eval_bleu": 10.3601, "eval_bp": 0.7791, "eval_counts_1": 7547, "eval_counts_2": 2537, "eval_counts_3": 1105, "eval_counts_4": 487, "eval_exact_match": 0.0113, "eval_f1": 0.3316, "eval_gen_len": 13.0358, "eval_loss": 1.8056522607803345, "eval_precisions_1": 44.3811, "eval_precisions_2": 17.1407, "eval_precisions_3": 8.7719, "eval_precisions_4": 4.6858, "eval_ref_len": 21250, "eval_rouge1": 0.335, "eval_rouge2": 0.1566, "eval_rougeL": 0.323, "eval_rougeLsum": 0.3233, "eval_runtime": 492.674, "eval_samples_per_second": 4.474, "eval_steps_per_second": 1.118, "eval_sys_len": 17005, "eval_totals_1": 17005, "eval_totals_2": 14801, "eval_totals_3": 12597, "eval_totals_4": 10393, "step": 364 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 1.9943, "step": 400 }, { "epoch": 10.99, "eval_bleu": 10.5378, "eval_bp": 0.7697, "eval_counts_1": 7629, "eval_counts_2": 2574, "eval_counts_3": 1131, "eval_counts_4": 496, "eval_exact_match": 0.0113, "eval_f1": 0.3385, "eval_gen_len": 13.0154, "eval_loss": 1.7973003387451172, "eval_precisions_1": 45.2975, "eval_precisions_2": 17.5844, "eval_precisions_3": 9.096, "eval_precisions_4": 4.8485, "eval_ref_len": 21250, "eval_rouge1": 0.343, "eval_rouge2": 0.1594, "eval_rougeL": 0.3296, "eval_rougeLsum": 0.33, "eval_runtime": 454.7448, "eval_samples_per_second": 4.847, "eval_steps_per_second": 1.212, "eval_sys_len": 16842, "eval_totals_1": 16842, "eval_totals_2": 14638, "eval_totals_3": 12434, "eval_totals_4": 10230, "step": 400 }, { "epoch": 11.98, "learning_rate": 0.0001, "loss": 1.941, "step": 436 }, { "epoch": 11.98, "eval_bleu": 10.8273, "eval_bp": 0.7848, "eval_counts_1": 7681, "eval_counts_2": 2606, "eval_counts_3": 1164, "eval_counts_4": 528, "eval_exact_match": 0.0132, "eval_f1": 0.3385, "eval_gen_len": 13.1361, "eval_loss": 1.777303695678711, "eval_precisions_1": 44.905, "eval_precisions_2": 17.4888, "eval_precisions_3": 9.1675, "eval_precisions_4": 5.0319, "eval_ref_len": 21250, "eval_rouge1": 0.3421, "eval_rouge2": 0.1607, "eval_rougeL": 0.3295, "eval_rougeLsum": 0.3294, "eval_runtime": 458.5033, "eval_samples_per_second": 4.807, "eval_steps_per_second": 1.202, "eval_sys_len": 17105, "eval_totals_1": 17105, "eval_totals_2": 14901, "eval_totals_3": 12697, "eval_totals_4": 10493, "step": 436 }, { "epoch": 12.99, "learning_rate": 0.0001, "loss": 1.8453, "step": 473 }, { "epoch": 12.99, "eval_bleu": 11.2687, "eval_bp": 0.7972, "eval_counts_1": 7817, "eval_counts_2": 2700, "eval_counts_3": 1224, "eval_counts_4": 560, "eval_exact_match": 0.0127, "eval_f1": 0.3447, "eval_gen_len": 13.5018, "eval_loss": 1.7595148086547852, "eval_precisions_1": 45.1224, "eval_precisions_2": 17.8571, "eval_precisions_3": 9.4766, "eval_precisions_4": 5.2278, "eval_ref_len": 21250, "eval_rouge1": 0.3492, "eval_rouge2": 0.1662, "eval_rougeL": 0.3367, "eval_rougeLsum": 0.3367, "eval_runtime": 465.5444, "eval_samples_per_second": 4.734, "eval_steps_per_second": 1.184, "eval_sys_len": 17324, "eval_totals_1": 17324, "eval_totals_2": 15120, "eval_totals_3": 12916, "eval_totals_4": 10712, "step": 473 }, { "epoch": 13.98, "learning_rate": 0.0001, "loss": 1.85, "step": 509 }, { "epoch": 13.98, "eval_bleu": 10.9825, "eval_bp": 0.8025, "eval_counts_1": 7792, "eval_counts_2": 2642, "eval_counts_3": 1182, "eval_counts_4": 537, "eval_exact_match": 0.0127, "eval_f1": 0.3416, "eval_gen_len": 13.5395, "eval_loss": 1.7414402961730957, "eval_precisions_1": 44.7379, "eval_precisions_2": 17.3667, "eval_precisions_3": 9.086, "eval_precisions_4": 4.9699, "eval_ref_len": 21250, "eval_rouge1": 0.3458, "eval_rouge2": 0.1632, "eval_rougeL": 0.3322, "eval_rougeLsum": 0.3322, "eval_runtime": 468.8552, "eval_samples_per_second": 4.701, "eval_steps_per_second": 1.175, "eval_sys_len": 17417, "eval_totals_1": 17417, "eval_totals_2": 15213, "eval_totals_3": 13009, "eval_totals_4": 10805, "step": 509 }, { "epoch": 15.0, "learning_rate": 0.0001, "loss": 1.7588, "step": 546 }, { "epoch": 15.0, "eval_bleu": 11.3189, "eval_bp": 0.7939, "eval_counts_1": 7827, "eval_counts_2": 2702, "eval_counts_3": 1223, "eval_counts_4": 569, "eval_exact_match": 0.015, "eval_f1": 0.3446, "eval_gen_len": 13.3026, "eval_loss": 1.7346255779266357, "eval_precisions_1": 45.3345, "eval_precisions_2": 17.9404, "eval_precisions_3": 9.5123, "eval_precisions_4": 5.3412, "eval_ref_len": 21250, "eval_rouge1": 0.3487, "eval_rouge2": 0.1661, "eval_rougeL": 0.3355, "eval_rougeLsum": 0.3354, "eval_runtime": 464.8491, "eval_samples_per_second": 4.741, "eval_steps_per_second": 1.185, "eval_sys_len": 17265, "eval_totals_1": 17265, "eval_totals_2": 15061, "eval_totals_3": 12857, "eval_totals_4": 10653, "step": 546 }, { "epoch": 15.99, "learning_rate": 0.0001, "loss": 1.7663, "step": 582 }, { "epoch": 15.99, "eval_bleu": 11.5245, "eval_bp": 0.8032, "eval_counts_1": 7946, "eval_counts_2": 2757, "eval_counts_3": 1245, "eval_counts_4": 581, "eval_exact_match": 0.0154, "eval_f1": 0.3501, "eval_gen_len": 13.4515, "eval_loss": 1.7190728187561035, "eval_precisions_1": 45.5855, "eval_precisions_2": 18.106, "eval_precisions_3": 9.56, "eval_precisions_4": 5.3702, "eval_ref_len": 21250, "eval_rouge1": 0.3544, "eval_rouge2": 0.1695, "eval_rougeL": 0.3418, "eval_rougeLsum": 0.3416, "eval_runtime": 465.8123, "eval_samples_per_second": 4.732, "eval_steps_per_second": 1.183, "eval_sys_len": 17431, "eval_totals_1": 17431, "eval_totals_2": 15227, "eval_totals_3": 13023, "eval_totals_4": 10819, "step": 582 }, { "epoch": 16.98, "learning_rate": 0.0001, "loss": 1.7317, "step": 618 }, { "epoch": 16.98, "eval_bleu": 12.0845, "eval_bp": 0.8212, "eval_counts_1": 8068, "eval_counts_2": 2844, "eval_counts_3": 1325, "eval_counts_4": 633, "eval_exact_match": 0.0163, "eval_f1": 0.3527, "eval_gen_len": 13.77, "eval_loss": 1.7133468389511108, "eval_precisions_1": 45.4484, "eval_precisions_2": 18.2917, "eval_precisions_3": 9.9296, "eval_precisions_4": 5.6822, "eval_ref_len": 21250, "eval_rouge1": 0.3575, "eval_rouge2": 0.1746, "eval_rougeL": 0.3445, "eval_rougeLsum": 0.3447, "eval_runtime": 458.8154, "eval_samples_per_second": 4.804, "eval_steps_per_second": 1.201, "eval_sys_len": 17752, "eval_totals_1": 17752, "eval_totals_2": 15548, "eval_totals_3": 13344, "eval_totals_4": 11140, "step": 618 }, { "epoch": 17.99, "learning_rate": 0.0001, "loss": 1.6421, "step": 655 }, { "epoch": 17.99, "eval_bleu": 11.877, "eval_bp": 0.8091, "eval_counts_1": 8003, "eval_counts_2": 2823, "eval_counts_3": 1301, "eval_counts_4": 609, "eval_exact_match": 0.015, "eval_f1": 0.353, "eval_gen_len": 13.4669, "eval_loss": 1.719835877418518, "eval_precisions_1": 45.6401, "eval_precisions_2": 18.4137, "eval_precisions_3": 9.9109, "eval_precisions_4": 5.5754, "eval_ref_len": 21250, "eval_rouge1": 0.3576, "eval_rouge2": 0.1737, "eval_rougeL": 0.3447, "eval_rougeLsum": 0.3448, "eval_runtime": 467.8501, "eval_samples_per_second": 4.711, "eval_steps_per_second": 1.178, "eval_sys_len": 17535, "eval_totals_1": 17535, "eval_totals_2": 15331, "eval_totals_3": 13127, "eval_totals_4": 10923, "step": 655 }, { "epoch": 18.98, "learning_rate": 0.0001, "loss": 1.6543, "step": 691 }, { "epoch": 18.98, "eval_bleu": 11.8679, "eval_bp": 0.824, "eval_counts_1": 8031, "eval_counts_2": 2817, "eval_counts_3": 1294, "eval_counts_4": 612, "eval_exact_match": 0.015, "eval_f1": 0.351, "eval_gen_len": 13.8648, "eval_loss": 1.715085506439209, "eval_precisions_1": 45.1104, "eval_precisions_2": 18.0588, "eval_precisions_3": 9.6603, "eval_precisions_4": 5.4687, "eval_ref_len": 21250, "eval_rouge1": 0.3567, "eval_rouge2": 0.1734, "eval_rougeL": 0.3435, "eval_rougeLsum": 0.3431, "eval_runtime": 748.2265, "eval_samples_per_second": 2.946, "eval_steps_per_second": 0.736, "eval_sys_len": 17803, "eval_totals_1": 17803, "eval_totals_2": 15599, "eval_totals_3": 13395, "eval_totals_4": 11191, "step": 691 }, { "epoch": 19.78, "learning_rate": 0.0001, "loss": 1.5702, "step": 720 }, { "epoch": 19.78, "eval_bleu": 12.1229, "eval_bp": 0.7945, "eval_counts_1": 7996, "eval_counts_2": 2850, "eval_counts_3": 1330, "eval_counts_4": 639, "eval_exact_match": 0.0168, "eval_f1": 0.3569, "eval_gen_len": 13.3367, "eval_loss": 1.7079344987869263, "eval_precisions_1": 46.2865, "eval_precisions_2": 18.9105, "eval_precisions_3": 10.3365, "eval_precisions_4": 5.9927, "eval_ref_len": 21250, "eval_rouge1": 0.3618, "eval_rouge2": 0.1769, "eval_rougeL": 0.3485, "eval_rougeLsum": 0.348, "eval_runtime": 880.8231, "eval_samples_per_second": 2.502, "eval_steps_per_second": 0.626, "eval_sys_len": 17275, "eval_totals_1": 17275, "eval_totals_2": 15071, "eval_totals_3": 12867, "eval_totals_4": 10663, "step": 720 }, { "epoch": 19.78, "step": 720, "total_flos": 2.52283256045568e+17, "train_loss": 2.1398978657192655, "train_runtime": 23260.8504, "train_samples_per_second": 8.008, "train_steps_per_second": 0.031 } ], "logging_steps": 500, "max_steps": 720, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.52283256045568e+17, "trial_name": null, "trial_params": null }