{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.789564097058193, "eval_steps": 500, "global_step": 720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 0.0001, "loss": 8.8727, "step": 36 }, { "epoch": 0.99, "eval_bleu": 0.0, "eval_bp": 0.0002, "eval_counts_1": 2198, "eval_counts_2": 0, "eval_counts_3": 0, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_gen_len": 2.0, "eval_loss": 6.380987644195557, "eval_precisions_1": 99.7278, "eval_precisions_2": 0.0, "eval_precisions_3": 0.0, "eval_precisions_4": 0.0, "eval_ref_len": 21250, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 575.1236, "eval_samples_per_second": 3.832, "eval_steps_per_second": 1.916, "eval_sys_len": 2204, "eval_totals_1": 2204, "eval_totals_2": 0, "eval_totals_3": 0, "eval_totals_4": 0, "step": 36 }, { "epoch": 1.98, "learning_rate": 0.0001, "loss": 6.0165, "step": 72 }, { "epoch": 1.98, "eval_bleu": 0.0851, "eval_bp": 1.0, "eval_counts_1": 3587, "eval_counts_2": 137, "eval_counts_3": 0, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.073, "eval_gen_len": 15.0091, "eval_loss": 5.386419773101807, "eval_precisions_1": 16.3342, "eval_precisions_2": 0.6935, "eval_precisions_3": 0.0028, "eval_precisions_4": 0.0016, "eval_ref_len": 21250, "eval_rouge1": 0.0702, "eval_rouge2": 0.0079, "eval_rougeL": 0.07, "eval_rougeLsum": 0.07, "eval_runtime": 2466.4687, "eval_samples_per_second": 0.894, "eval_steps_per_second": 0.447, "eval_sys_len": 21960, "eval_totals_1": 21960, "eval_totals_2": 19756, "eval_totals_3": 17552, "eval_totals_4": 15348, "step": 72 }, { "epoch": 3.0, "learning_rate": 0.0001, "loss": 5.1537, "step": 109 }, { "epoch": 3.0, "eval_bleu": 0.13, "eval_bp": 0.6246, "eval_counts_1": 3601, "eval_counts_2": 145, "eval_counts_3": 1, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.0926, "eval_gen_len": 9.5309, "eval_loss": 4.961660861968994, "eval_precisions_1": 24.9221, "eval_precisions_2": 1.1842, "eval_precisions_3": 0.01, "eval_precisions_4": 0.0064, "eval_ref_len": 21250, "eval_rouge1": 0.0882, "eval_rouge2": 0.0107, "eval_rougeL": 0.0877, "eval_rougeLsum": 0.0876, "eval_runtime": 2425.7875, "eval_samples_per_second": 0.909, "eval_steps_per_second": 0.454, "eval_sys_len": 14449, "eval_totals_1": 14449, "eval_totals_2": 12245, "eval_totals_3": 10041, "eval_totals_4": 7837, "step": 109 }, { "epoch": 3.99, "learning_rate": 0.0001, "loss": 4.863, "step": 145 }, { "epoch": 3.99, "eval_bleu": 0.1468, "eval_bp": 1.0, "eval_counts_1": 4590, "eval_counts_2": 229, "eval_counts_3": 19, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.0836, "eval_gen_len": 29.4528, "eval_loss": 4.553112030029297, "eval_precisions_1": 11.0141, "eval_precisions_2": 0.5802, "eval_precisions_3": 0.051, "eval_precisions_4": 0.0014, "eval_ref_len": 21250, "eval_rouge1": 0.0811, "eval_rouge2": 0.0081, "eval_rougeL": 0.0768, "eval_rougeLsum": 0.0767, "eval_runtime": 4425.6343, "eval_samples_per_second": 0.498, "eval_steps_per_second": 0.249, "eval_sys_len": 41674, "eval_totals_1": 41674, "eval_totals_2": 39470, "eval_totals_3": 37266, "eval_totals_4": 35062, "step": 145 }, { "epoch": 4.97, "learning_rate": 0.0001, "loss": 4.5201, "step": 181 }, { "epoch": 4.97, "eval_bleu": 0.2845, "eval_bp": 0.7265, "eval_counts_1": 3643, "eval_counts_2": 169, "eval_counts_3": 19, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.0907, "eval_gen_len": 12.5077, "eval_loss": 4.201998233795166, "eval_precisions_1": 22.6217, "eval_precisions_2": 1.2158, "eval_precisions_3": 0.1624, "eval_precisions_4": 0.0053, "eval_ref_len": 21250, "eval_rouge1": 0.0865, "eval_rouge2": 0.0115, "eval_rougeL": 0.0856, "eval_rougeLsum": 0.0855, "eval_runtime": 2637.7264, "eval_samples_per_second": 0.836, "eval_steps_per_second": 0.418, "eval_sys_len": 16104, "eval_totals_1": 16104, "eval_totals_2": 13900, "eval_totals_3": 11696, "eval_totals_4": 9492, "step": 181 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 4.1347, "step": 218 }, { "epoch": 5.99, "eval_bleu": 0.2878, "eval_bp": 0.7671, "eval_counts_1": 3670, "eval_counts_2": 167, "eval_counts_3": 20, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.0917, "eval_gen_len": 13.1656, "eval_loss": 3.9352548122406006, "eval_precisions_1": 21.8504, "eval_precisions_2": 1.1445, "eval_precisions_3": 0.1614, "eval_precisions_4": 0.0049, "eval_ref_len": 21250, "eval_rouge1": 0.087, "eval_rouge2": 0.0114, "eval_rougeL": 0.0859, "eval_rougeLsum": 0.0858, "eval_runtime": 3199.7795, "eval_samples_per_second": 0.689, "eval_steps_per_second": 0.344, "eval_sys_len": 16796, "eval_totals_1": 16796, "eval_totals_2": 14592, "eval_totals_3": 12388, "eval_totals_4": 10184, "step": 218 }, { "epoch": 6.98, "learning_rate": 0.0001, "loss": 4.012, "step": 254 }, { "epoch": 6.98, "eval_bleu": 0.4139, "eval_bp": 0.7546, "eval_counts_1": 3780, "eval_counts_2": 198, "eval_counts_3": 35, "eval_counts_4": 1, "eval_exact_match": 0.0, "eval_f1": 0.0968, "eval_gen_len": 12.2931, "eval_loss": 3.759270191192627, "eval_precisions_1": 22.7958, "eval_precisions_2": 1.3771, "eval_precisions_3": 0.2875, "eval_precisions_4": 0.01, "eval_ref_len": 21250, "eval_rouge1": 0.0916, "eval_rouge2": 0.0128, "eval_rougeL": 0.0903, "eval_rougeLsum": 0.0902, "eval_runtime": 2449.2398, "eval_samples_per_second": 0.9, "eval_steps_per_second": 0.45, "eval_sys_len": 16582, "eval_totals_1": 16582, "eval_totals_2": 14378, "eval_totals_3": 12174, "eval_totals_4": 9970, "step": 254 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 3.7048, "step": 291 }, { "epoch": 8.0, "eval_bleu": 0.5493, "eval_bp": 0.7297, "eval_counts_1": 3668, "eval_counts_2": 205, "eval_counts_3": 36, "eval_counts_4": 3, "eval_exact_match": 0.0, "eval_f1": 0.0923, "eval_gen_len": 11.7568, "eval_loss": 3.603360414505005, "eval_precisions_1": 22.7008, "eval_precisions_2": 1.4691, "eval_precisions_3": 0.3064, "eval_precisions_4": 0.0314, "eval_ref_len": 21250, "eval_rouge1": 0.0882, "eval_rouge2": 0.0134, "eval_rougeL": 0.0873, "eval_rougeLsum": 0.0872, "eval_runtime": 2487.6786, "eval_samples_per_second": 0.886, "eval_steps_per_second": 0.443, "eval_sys_len": 16158, "eval_totals_1": 16158, "eval_totals_2": 13954, "eval_totals_3": 11750, "eval_totals_4": 9546, "step": 291 }, { "epoch": 8.99, "learning_rate": 0.0001, "loss": 3.6284, "step": 327 }, { "epoch": 8.99, "eval_bleu": 1.8083, "eval_bp": 0.8048, "eval_counts_1": 4070, "eval_counts_2": 527, "eval_counts_3": 160, "eval_counts_4": 28, "eval_exact_match": 0.0, "eval_f1": 0.1152, "eval_gen_len": 9.7777, "eval_loss": 3.4566922187805176, "eval_precisions_1": 23.3118, "eval_precisions_2": 3.4546, "eval_precisions_3": 1.226, "eval_precisions_4": 0.2581, "eval_ref_len": 21250, "eval_rouge1": 0.1109, "eval_rouge2": 0.0281, "eval_rougeL": 0.1083, "eval_rougeLsum": 0.1082, "eval_runtime": 2473.1224, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.446, "eval_sys_len": 17459, "eval_totals_1": 17459, "eval_totals_2": 15255, "eval_totals_3": 13051, "eval_totals_4": 10847, "step": 327 }, { "epoch": 9.98, "learning_rate": 0.0001, "loss": 3.4605, "step": 363 }, { "epoch": 9.98, "eval_bleu": 1.6972, "eval_bp": 0.8793, "eval_counts_1": 4325, "eval_counts_2": 512, "eval_counts_3": 128, "eval_counts_4": 27, "eval_exact_match": 0.0, "eval_f1": 0.1254, "eval_gen_len": 12.6729, "eval_loss": 3.339012622833252, "eval_precisions_1": 22.9699, "eval_precisions_2": 3.0797, "eval_precisions_3": 0.8876, "eval_precisions_4": 0.221, "eval_ref_len": 21250, "eval_rouge1": 0.1206, "eval_rouge2": 0.0288, "eval_rougeL": 0.1168, "eval_rougeLsum": 0.1167, "eval_runtime": 2821.508, "eval_samples_per_second": 0.781, "eval_steps_per_second": 0.391, "eval_sys_len": 18829, "eval_totals_1": 18829, "eval_totals_2": 16625, "eval_totals_3": 14421, "eval_totals_4": 12217, "step": 363 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 3.2267, "step": 400 }, { "epoch": 10.99, "eval_bleu": 2.5735, "eval_bp": 0.8779, "eval_counts_1": 4498, "eval_counts_2": 774, "eval_counts_3": 237, "eval_counts_4": 49, "eval_exact_match": 0.0005, "eval_f1": 0.1381, "eval_gen_len": 11.5009, "eval_loss": 3.199504852294922, "eval_precisions_1": 23.923, "eval_precisions_2": 4.6632, "eval_precisions_3": 1.6465, "eval_precisions_4": 0.402, "eval_ref_len": 21250, "eval_rouge1": 0.1348, "eval_rouge2": 0.0405, "eval_rougeL": 0.132, "eval_rougeLsum": 0.1319, "eval_runtime": 2727.7858, "eval_samples_per_second": 0.808, "eval_steps_per_second": 0.404, "eval_sys_len": 18802, "eval_totals_1": 18802, "eval_totals_2": 16598, "eval_totals_3": 14394, "eval_totals_4": 12190, "step": 400 }, { "epoch": 11.98, "learning_rate": 0.0001, "loss": 3.1761, "step": 436 }, { "epoch": 11.98, "eval_bleu": 2.7554, "eval_bp": 0.7767, "eval_counts_1": 4578, "eval_counts_2": 866, "eval_counts_3": 260, "eval_counts_4": 50, "eval_exact_match": 0.0005, "eval_f1": 0.1492, "eval_gen_len": 10.5172, "eval_loss": 3.116502523422241, "eval_precisions_1": 26.9882, "eval_precisions_2": 5.8676, "eval_precisions_3": 2.0709, "eval_precisions_4": 0.483, "eval_ref_len": 21250, "eval_rouge1": 0.1454, "eval_rouge2": 0.0464, "eval_rougeL": 0.1426, "eval_rougeLsum": 0.1427, "eval_runtime": 2404.4354, "eval_samples_per_second": 0.917, "eval_steps_per_second": 0.458, "eval_sys_len": 16963, "eval_totals_1": 16963, "eval_totals_2": 14759, "eval_totals_3": 12555, "eval_totals_4": 10351, "step": 436 }, { "epoch": 12.97, "learning_rate": 0.0001, "loss": 3.0323, "step": 472 }, { "epoch": 12.97, "eval_bleu": 3.2318, "eval_bp": 0.839, "eval_counts_1": 5019, "eval_counts_2": 1048, "eval_counts_3": 319, "eval_counts_4": 59, "eval_exact_match": 0.0009, "eval_f1": 0.1729, "eval_gen_len": 12.8294, "eval_loss": 3.0074305534362793, "eval_precisions_1": 27.7646, "eval_precisions_2": 6.6024, "eval_precisions_3": 2.3337, "eval_precisions_4": 0.5146, "eval_ref_len": 21250, "eval_rouge1": 0.1691, "eval_rouge2": 0.0557, "eval_rougeL": 0.1648, "eval_rougeLsum": 0.1647, "eval_runtime": 1416.83, "eval_samples_per_second": 1.556, "eval_steps_per_second": 0.778, "eval_sys_len": 18077, "eval_totals_1": 18077, "eval_totals_2": 15873, "eval_totals_3": 13669, "eval_totals_4": 11465, "step": 472 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 2.8223, "step": 509 }, { "epoch": 13.99, "eval_bleu": 3.7161, "eval_bp": 0.783, "eval_counts_1": 5257, "eval_counts_2": 1120, "eval_counts_3": 341, "eval_counts_4": 85, "eval_exact_match": 0.0018, "eval_f1": 0.1929, "eval_gen_len": 12.6824, "eval_loss": 2.891127109527588, "eval_precisions_1": 30.7895, "eval_precisions_2": 7.5319, "eval_precisions_3": 2.6922, "eval_precisions_4": 0.8125, "eval_ref_len": 21250, "eval_rouge1": 0.189, "eval_rouge2": 0.0635, "eval_rougeL": 0.1841, "eval_rougeLsum": 0.184, "eval_runtime": 2341.3383, "eval_samples_per_second": 0.941, "eval_steps_per_second": 0.471, "eval_sys_len": 17074, "eval_totals_1": 17074, "eval_totals_2": 14870, "eval_totals_3": 12666, "eval_totals_4": 10462, "step": 509 }, { "epoch": 14.98, "learning_rate": 0.0001, "loss": 2.7732, "step": 545 }, { "epoch": 14.98, "eval_bleu": 4.3667, "eval_bp": 0.8229, "eval_counts_1": 5616, "eval_counts_2": 1271, "eval_counts_3": 407, "eval_counts_4": 113, "eval_exact_match": 0.0045, "eval_f1": 0.217, "eval_gen_len": 13.0944, "eval_loss": 2.8103041648864746, "eval_precisions_1": 31.5789, "eval_precisions_2": 8.1579, "eval_precisions_3": 3.0428, "eval_precisions_4": 1.0115, "eval_ref_len": 21250, "eval_rouge1": 0.2122, "eval_rouge2": 0.0731, "eval_rougeL": 0.2063, "eval_rougeLsum": 0.2061, "eval_runtime": 2877.1799, "eval_samples_per_second": 0.766, "eval_steps_per_second": 0.383, "eval_sys_len": 17784, "eval_totals_1": 17784, "eval_totals_2": 15580, "eval_totals_3": 13376, "eval_totals_4": 11172, "step": 545 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 2.58, "step": 582 }, { "epoch": 16.0, "eval_bleu": 5.357, "eval_bp": 0.8782, "eval_counts_1": 5959, "eval_counts_2": 1461, "eval_counts_3": 510, "eval_counts_4": 171, "eval_exact_match": 0.0064, "eval_f1": 0.2316, "eval_gen_len": 13.9174, "eval_loss": 2.718313694000244, "eval_precisions_1": 31.6833, "eval_precisions_2": 8.7991, "eval_precisions_3": 3.5417, "eval_precisions_4": 1.4021, "eval_ref_len": 21250, "eval_rouge1": 0.2286, "eval_rouge2": 0.0822, "eval_rougeL": 0.2214, "eval_rougeLsum": 0.2212, "eval_runtime": 2978.2825, "eval_samples_per_second": 0.74, "eval_steps_per_second": 0.37, "eval_sys_len": 18808, "eval_totals_1": 18808, "eval_totals_2": 16604, "eval_totals_3": 14400, "eval_totals_4": 12196, "step": 582 }, { "epoch": 16.99, "learning_rate": 0.0001, "loss": 2.5368, "step": 618 }, { "epoch": 16.99, "eval_bleu": 5.8686, "eval_bp": 0.7744, "eval_counts_1": 5935, "eval_counts_2": 1543, "eval_counts_3": 576, "eval_counts_4": 201, "eval_exact_match": 0.0059, "eval_f1": 0.2377, "eval_gen_len": 12.3185, "eval_loss": 2.6629750728607178, "eval_precisions_1": 35.0706, "eval_precisions_2": 10.483, "eval_precisions_3": 4.6025, "eval_precisions_4": 1.9494, "eval_ref_len": 21250, "eval_rouge1": 0.2365, "eval_rouge2": 0.089, "eval_rougeL": 0.2309, "eval_rougeLsum": 0.2307, "eval_runtime": 2677.1671, "eval_samples_per_second": 0.823, "eval_steps_per_second": 0.412, "eval_sys_len": 16923, "eval_totals_1": 16923, "eval_totals_2": 14719, "eval_totals_3": 12515, "eval_totals_4": 10311, "step": 618 }, { "epoch": 17.98, "learning_rate": 0.0001, "loss": 2.4325, "step": 654 }, { "epoch": 17.98, "eval_bleu": 6.8664, "eval_bp": 0.8277, "eval_counts_1": 6305, "eval_counts_2": 1756, "eval_counts_3": 685, "eval_counts_4": 265, "eval_exact_match": 0.0059, "eval_f1": 0.2537, "eval_gen_len": 13.1688, "eval_loss": 2.579846143722534, "eval_precisions_1": 35.2826, "eval_precisions_2": 11.209, "eval_precisions_3": 5.0884, "eval_precisions_4": 2.3539, "eval_ref_len": 21250, "eval_rouge1": 0.2518, "eval_rouge2": 0.0982, "eval_rougeL": 0.2452, "eval_rougeLsum": 0.2452, "eval_runtime": 2086.3742, "eval_samples_per_second": 1.056, "eval_steps_per_second": 0.528, "eval_sys_len": 17870, "eval_totals_1": 17870, "eval_totals_2": 15666, "eval_totals_3": 13462, "eval_totals_4": 11258, "step": 654 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 2.2632, "step": 691 }, { "epoch": 18.99, "eval_bleu": 7.5129, "eval_bp": 0.823, "eval_counts_1": 6577, "eval_counts_2": 1888, "eval_counts_3": 762, "eval_counts_4": 304, "eval_exact_match": 0.0086, "eval_f1": 0.2702, "eval_gen_len": 13.2373, "eval_loss": 2.515482187271118, "eval_precisions_1": 36.9806, "eval_precisions_2": 12.1173, "eval_precisions_3": 5.6963, "eval_precisions_4": 2.7208, "eval_ref_len": 21250, "eval_rouge1": 0.2689, "eval_rouge2": 0.1102, "eval_rougeL": 0.261, "eval_rougeLsum": 0.2611, "eval_runtime": 2869.1423, "eval_samples_per_second": 0.768, "eval_steps_per_second": 0.384, "eval_sys_len": 17785, "eval_totals_1": 17785, "eval_totals_2": 15581, "eval_totals_3": 13377, "eval_totals_4": 11173, "step": 691 }, { "epoch": 19.79, "learning_rate": 0.0001, "loss": 2.2026, "step": 720 }, { "epoch": 19.79, "eval_bleu": 7.1987, "eval_bp": 0.8159, "eval_counts_1": 6644, "eval_counts_2": 1853, "eval_counts_3": 720, "eval_counts_4": 273, "eval_exact_match": 0.0073, "eval_f1": 0.2742, "eval_gen_len": 13.6343, "eval_loss": 2.499704122543335, "eval_precisions_1": 37.626, "eval_precisions_2": 11.9904, "eval_precisions_3": 5.434, "eval_precisions_4": 2.4715, "eval_ref_len": 21250, "eval_rouge1": 0.2717, "eval_rouge2": 0.1097, "eval_rougeL": 0.2628, "eval_rougeLsum": 0.2625, "eval_runtime": 2871.4974, "eval_samples_per_second": 0.768, "eval_steps_per_second": 0.384, "eval_sys_len": 17658, "eval_totals_1": 17658, "eval_totals_2": 15454, "eval_totals_3": 13250, "eval_totals_4": 11046, "step": 720 }, { "epoch": 19.79, "step": 720, "total_flos": 8.496574887886848e+17, "train_loss": 3.782369862662421, "train_runtime": 109409.7996, "train_samples_per_second": 1.703, "train_steps_per_second": 0.007 } ], "logging_steps": 500, "max_steps": 720, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8.496574887886848e+17, "trial_name": null, "trial_params": null }