{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.789564097058193, "eval_steps": 500, "global_step": 1440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 0.0001, "loss": 7.5882, "step": 72 }, { "epoch": 0.99, "eval_bleu": 0.0872, "eval_bp": 0.6461, "eval_counts_1": 3993, "eval_counts_2": 105, "eval_counts_3": 0, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.1155, "eval_gen_len": 9.7105, "eval_loss": 5.682333469390869, "eval_precisions_1": 26.998, "eval_precisions_2": 0.8343, "eval_precisions_3": 0.0048, "eval_precisions_4": 0.0031, "eval_ref_len": 21250, "eval_rouge1": 0.1101, "eval_rouge2": 0.0077, "eval_rougeL": 0.1078, "eval_rougeLsum": 0.1076, "eval_runtime": 1951.3051, "eval_samples_per_second": 1.13, "eval_steps_per_second": 0.565, "eval_sys_len": 14790, "eval_totals_1": 14790, "eval_totals_2": 12586, "eval_totals_3": 10382, "eval_totals_4": 8178, "step": 72 }, { "epoch": 1.99, "learning_rate": 0.0001, "loss": 5.2903, "step": 145 }, { "epoch": 1.99, "eval_bleu": 0.351, "eval_bp": 0.8828, "eval_counts_1": 3827, "eval_counts_2": 229, "eval_counts_3": 32, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.0964, "eval_gen_len": 16.7005, "eval_loss": 4.872079372406006, "eval_precisions_1": 20.2551, "eval_precisions_2": 1.3721, "eval_precisions_3": 0.2209, "eval_precisions_4": 0.0041, "eval_ref_len": 21250, "eval_rouge1": 0.0924, "eval_rouge2": 0.015, "eval_rougeL": 0.091, "eval_rougeLsum": 0.0909, "eval_runtime": 3438.1674, "eval_samples_per_second": 0.641, "eval_steps_per_second": 0.321, "eval_sys_len": 18894, "eval_totals_1": 18894, "eval_totals_2": 16690, "eval_totals_3": 14486, "eval_totals_4": 12282, "step": 145 }, { "epoch": 3.0, "learning_rate": 0.0001, "loss": 4.6636, "step": 218 }, { "epoch": 3.0, "eval_bleu": 0.2933, "eval_bp": 0.6758, "eval_counts_1": 3638, "eval_counts_2": 174, "eval_counts_3": 21, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.0925, "eval_gen_len": 8.9197, "eval_loss": 4.280586242675781, "eval_precisions_1": 23.8276, "eval_precisions_2": 1.3319, "eval_precisions_3": 0.1934, "eval_precisions_4": 0.0058, "eval_ref_len": 21250, "eval_rouge1": 0.0884, "eval_rouge2": 0.012, "eval_rougeL": 0.0876, "eval_rougeLsum": 0.0874, "eval_runtime": 2326.5895, "eval_samples_per_second": 0.947, "eval_steps_per_second": 0.474, "eval_sys_len": 15268, "eval_totals_1": 15268, "eval_totals_2": 13064, "eval_totals_3": 10860, "eval_totals_4": 8656, "step": 218 }, { "epoch": 4.0, "learning_rate": 0.0001, "loss": 4.2229, "step": 291 }, { "epoch": 4.0, "eval_bleu": 0.2288, "eval_bp": 1.0, "eval_counts_1": 4274, "eval_counts_2": 240, "eval_counts_3": 24, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.1023, "eval_gen_len": 24.7015, "eval_loss": 3.9210410118103027, "eval_precisions_1": 14.583, "eval_precisions_2": 0.8855, "eval_precisions_3": 0.0964, "eval_precisions_4": 0.0022, "eval_ref_len": 21250, "eval_rouge1": 0.0894, "eval_rouge2": 0.0109, "eval_rougeL": 0.0849, "eval_rougeLsum": 0.0849, "eval_runtime": 2975.0462, "eval_samples_per_second": 0.741, "eval_steps_per_second": 0.37, "eval_sys_len": 29308, "eval_totals_1": 29308, "eval_totals_2": 27104, "eval_totals_3": 24900, "eval_totals_4": 22696, "step": 291 }, { "epoch": 4.99, "learning_rate": 0.0001, "loss": 3.9434, "step": 363 }, { "epoch": 4.99, "eval_bleu": 0.4204, "eval_bp": 0.7465, "eval_counts_1": 3652, "eval_counts_2": 218, "eval_counts_3": 35, "eval_counts_4": 1, "eval_exact_match": 0.0, "eval_f1": 0.0898, "eval_gen_len": 12.3049, "eval_loss": 3.690653085708618, "eval_precisions_1": 22.2114, "eval_precisions_2": 1.5311, "eval_precisions_3": 0.2908, "eval_precisions_4": 0.0102, "eval_ref_len": 21250, "eval_rouge1": 0.0856, "eval_rouge2": 0.0141, "eval_rougeL": 0.0843, "eval_rougeLsum": 0.0842, "eval_runtime": 3036.8902, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.363, "eval_sys_len": 16442, "eval_totals_1": 16442, "eval_totals_2": 14238, "eval_totals_3": 12034, "eval_totals_4": 9830, "step": 363 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 3.6152, "step": 436 }, { "epoch": 5.99, "eval_bleu": 1.0505, "eval_bp": 0.968, "eval_counts_1": 4103, "eval_counts_2": 341, "eval_counts_3": 77, "eval_counts_4": 11, "eval_exact_match": 0.0, "eval_f1": 0.112, "eval_gen_len": 14.3607, "eval_loss": 3.460298538208008, "eval_precisions_1": 19.9359, "eval_precisions_2": 1.8556, "eval_precisions_3": 0.4761, "eval_precisions_4": 0.0787, "eval_ref_len": 21250, "eval_rouge1": 0.107, "eval_rouge2": 0.019, "eval_rougeL": 0.1023, "eval_rougeLsum": 0.1024, "eval_runtime": 3225.717, "eval_samples_per_second": 0.683, "eval_steps_per_second": 0.342, "eval_sys_len": 20581, "eval_totals_1": 20581, "eval_totals_2": 18377, "eval_totals_3": 16173, "eval_totals_4": 13969, "step": 436 }, { "epoch": 7.0, "learning_rate": 0.0001, "loss": 3.3814, "step": 509 }, { "epoch": 7.0, "eval_bleu": 2.3489, "eval_bp": 0.8218, "eval_counts_1": 4342, "eval_counts_2": 675, "eval_counts_3": 218, "eval_counts_4": 43, "eval_exact_match": 0.0005, "eval_f1": 0.1308, "eval_gen_len": 10.2418, "eval_loss": 3.2883455753326416, "eval_precisions_1": 24.4441, "eval_precisions_2": 4.3383, "eval_precisions_3": 1.6323, "eval_precisions_4": 0.3856, "eval_ref_len": 21250, "eval_rouge1": 0.1264, "eval_rouge2": 0.0353, "eval_rougeL": 0.1234, "eval_rougeLsum": 0.1234, "eval_runtime": 2402.3288, "eval_samples_per_second": 0.917, "eval_steps_per_second": 0.459, "eval_sys_len": 17763, "eval_totals_1": 17763, "eval_totals_2": 15559, "eval_totals_3": 13355, "eval_totals_4": 11151, "step": 509 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 3.1711, "step": 582 }, { "epoch": 8.0, "eval_bleu": 2.6207, "eval_bp": 0.9273, "eval_counts_1": 4820, "eval_counts_2": 856, "eval_counts_3": 246, "eval_counts_4": 44, "eval_exact_match": 0.0005, "eval_f1": 0.1547, "eval_gen_len": 14.3249, "eval_loss": 3.0987935066223145, "eval_precisions_1": 24.3939, "eval_precisions_2": 4.8761, "eval_precisions_3": 1.6025, "eval_precisions_4": 0.3347, "eval_ref_len": 21250, "eval_rouge1": 0.1503, "eval_rouge2": 0.0465, "eval_rougeL": 0.1455, "eval_rougeLsum": 0.1457, "eval_runtime": 2969.3248, "eval_samples_per_second": 0.742, "eval_steps_per_second": 0.371, "eval_sys_len": 19759, "eval_totals_1": 19759, "eval_totals_2": 17555, "eval_totals_3": 15351, "eval_totals_4": 13147, "step": 582 }, { "epoch": 8.99, "learning_rate": 0.0001, "loss": 3.0147, "step": 654 }, { "epoch": 8.99, "eval_bleu": 3.4764, "eval_bp": 0.8739, "eval_counts_1": 5167, "eval_counts_2": 1066, "eval_counts_3": 321, "eval_counts_4": 76, "eval_exact_match": 0.0018, "eval_f1": 0.1816, "eval_gen_len": 14.3067, "eval_loss": 2.9539589881896973, "eval_precisions_1": 27.5941, "eval_precisions_2": 6.4524, "eval_precisions_3": 2.2421, "eval_precisions_4": 0.6274, "eval_ref_len": 21250, "eval_rouge1": 0.1773, "eval_rouge2": 0.0588, "eval_rougeL": 0.1721, "eval_rougeLsum": 0.1721, "eval_runtime": 2825.1201, "eval_samples_per_second": 0.78, "eval_steps_per_second": 0.39, "eval_sys_len": 18725, "eval_totals_1": 18725, "eval_totals_2": 16521, "eval_totals_3": 14317, "eval_totals_4": 12113, "step": 654 }, { "epoch": 9.99, "learning_rate": 0.0001, "loss": 2.7829, "step": 727 }, { "epoch": 9.99, "eval_bleu": 4.5099, "eval_bp": 0.7974, "eval_counts_1": 5625, "eval_counts_2": 1267, "eval_counts_3": 420, "eval_counts_4": 124, "eval_exact_match": 0.0045, "eval_f1": 0.2159, "eval_gen_len": 12.9741, "eval_loss": 2.8288302421569824, "eval_precisions_1": 32.4638, "eval_precisions_2": 8.378, "eval_precisions_3": 3.251, "eval_precisions_4": 1.1573, "eval_ref_len": 21250, "eval_rouge1": 0.2127, "eval_rouge2": 0.0741, "eval_rougeL": 0.2067, "eval_rougeLsum": 0.2065, "eval_runtime": 2709.6941, "eval_samples_per_second": 0.813, "eval_steps_per_second": 0.407, "eval_sys_len": 17327, "eval_totals_1": 17327, "eval_totals_2": 15123, "eval_totals_3": 12919, "eval_totals_4": 10715, "step": 727 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 2.6093, "step": 800 }, { "epoch": 10.99, "eval_bleu": 5.5051, "eval_bp": 0.8685, "eval_counts_1": 6005, "eval_counts_2": 1469, "eval_counts_3": 528, "eval_counts_4": 181, "eval_exact_match": 0.0064, "eval_f1": 0.231, "eval_gen_len": 14.4791, "eval_loss": 2.7177300453186035, "eval_precisions_1": 32.2416, "eval_precisions_2": 8.9459, "eval_precisions_3": 3.7139, "eval_precisions_4": 1.5067, "eval_ref_len": 21250, "eval_rouge1": 0.229, "eval_rouge2": 0.0827, "eval_rougeL": 0.2215, "eval_rougeLsum": 0.2213, "eval_runtime": 1457.0803, "eval_samples_per_second": 1.513, "eval_steps_per_second": 0.756, "eval_sys_len": 18625, "eval_totals_1": 18625, "eval_totals_2": 16421, "eval_totals_3": 14217, "eval_totals_4": 12013, "step": 800 }, { "epoch": 12.0, "learning_rate": 0.0001, "loss": 2.453, "step": 873 }, { "epoch": 12.0, "eval_bleu": 6.6865, "eval_bp": 0.8515, "eval_counts_1": 6396, "eval_counts_2": 1744, "eval_counts_3": 664, "eval_counts_4": 246, "eval_exact_match": 0.0059, "eval_f1": 0.2565, "eval_gen_len": 13.7142, "eval_loss": 2.591360330581665, "eval_precisions_1": 34.9375, "eval_precisions_2": 10.8303, "eval_precisions_3": 4.7773, "eval_precisions_4": 2.1035, "eval_ref_len": 21250, "eval_rouge1": 0.2553, "eval_rouge2": 0.0998, "eval_rougeL": 0.2479, "eval_rougeLsum": 0.2478, "eval_runtime": 1377.6536, "eval_samples_per_second": 1.6, "eval_steps_per_second": 0.8, "eval_sys_len": 18307, "eval_totals_1": 18307, "eval_totals_2": 16103, "eval_totals_3": 13899, "eval_totals_4": 11695, "step": 873 }, { "epoch": 12.99, "learning_rate": 0.0001, "loss": 2.3329, "step": 945 }, { "epoch": 12.99, "eval_bleu": 7.383, "eval_bp": 0.8592, "eval_counts_1": 6673, "eval_counts_2": 1888, "eval_counts_3": 741, "eval_counts_4": 291, "eval_exact_match": 0.0091, "eval_f1": 0.2749, "eval_gen_len": 14.1751, "eval_loss": 2.499257802963257, "eval_precisions_1": 36.1661, "eval_precisions_2": 11.6206, "eval_precisions_3": 5.2767, "eval_precisions_4": 2.458, "eval_ref_len": 21250, "eval_rouge1": 0.2747, "eval_rouge2": 0.1114, "eval_rougeL": 0.2652, "eval_rougeLsum": 0.2652, "eval_runtime": 1427.0765, "eval_samples_per_second": 1.544, "eval_steps_per_second": 0.772, "eval_sys_len": 18451, "eval_totals_1": 18451, "eval_totals_2": 16247, "eval_totals_3": 14043, "eval_totals_4": 11839, "step": 945 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 2.1663, "step": 1018 }, { "epoch": 13.99, "eval_bleu": 8.1343, "eval_bp": 0.8635, "eval_counts_1": 6953, "eval_counts_2": 2052, "eval_counts_3": 834, "eval_counts_4": 337, "eval_exact_match": 0.0082, "eval_f1": 0.2889, "eval_gen_len": 14.6783, "eval_loss": 2.4196276664733887, "eval_precisions_1": 37.5209, "eval_precisions_2": 12.5681, "eval_precisions_3": 5.9053, "eval_precisions_4": 2.8274, "eval_ref_len": 21250, "eval_rouge1": 0.2886, "eval_rouge2": 0.1215, "eval_rougeL": 0.2773, "eval_rougeLsum": 0.277, "eval_runtime": 1443.0194, "eval_samples_per_second": 1.527, "eval_steps_per_second": 0.764, "eval_sys_len": 18531, "eval_totals_1": 18531, "eval_totals_2": 16327, "eval_totals_3": 14123, "eval_totals_4": 11919, "step": 1018 }, { "epoch": 14.99, "learning_rate": 0.0001, "loss": 2.0422, "step": 1091 }, { "epoch": 14.99, "eval_bleu": 8.4322, "eval_bp": 0.8339, "eval_counts_1": 6968, "eval_counts_2": 2089, "eval_counts_3": 862, "eval_counts_4": 365, "eval_exact_match": 0.0113, "eval_f1": 0.2951, "eval_gen_len": 13.6987, "eval_loss": 2.3703055381774902, "eval_precisions_1": 38.7456, "eval_precisions_2": 13.2383, "eval_precisions_3": 6.3494, "eval_precisions_4": 3.2096, "eval_ref_len": 21250, "eval_rouge1": 0.2961, "eval_rouge2": 0.1268, "eval_rougeL": 0.2858, "eval_rougeLsum": 0.2857, "eval_runtime": 1381.8523, "eval_samples_per_second": 1.595, "eval_steps_per_second": 0.797, "eval_sys_len": 17984, "eval_totals_1": 17984, "eval_totals_2": 15780, "eval_totals_3": 13576, "eval_totals_4": 11372, "step": 1091 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 1.9245, "step": 1164 }, { "epoch": 16.0, "eval_bleu": 9.5973, "eval_bp": 0.8892, "eval_counts_1": 7500, "eval_counts_2": 2353, "eval_counts_3": 999, "eval_counts_4": 446, "eval_exact_match": 0.0132, "eval_f1": 0.314, "eval_gen_len": 14.77, "eval_loss": 2.3217406272888184, "eval_precisions_1": 39.4384, "eval_precisions_2": 13.9951, "eval_precisions_3": 6.8383, "eval_precisions_4": 3.5953, "eval_ref_len": 21250, "eval_rouge1": 0.3149, "eval_rouge2": 0.1407, "eval_rougeL": 0.3017, "eval_rougeLsum": 0.3017, "eval_runtime": 1430.5793, "eval_samples_per_second": 1.541, "eval_steps_per_second": 0.77, "eval_sys_len": 19017, "eval_totals_1": 19017, "eval_totals_2": 16813, "eval_totals_3": 14609, "eval_totals_4": 12405, "step": 1164 }, { "epoch": 17.0, "learning_rate": 0.0001, "loss": 1.8216, "step": 1237 }, { "epoch": 17.0, "eval_bleu": 9.9557, "eval_bp": 0.8467, "eval_counts_1": 7444, "eval_counts_2": 2357, "eval_counts_3": 1044, "eval_counts_4": 488, "eval_exact_match": 0.0132, "eval_f1": 0.3181, "eval_gen_len": 13.8031, "eval_loss": 2.27047061920166, "eval_precisions_1": 40.8584, "eval_precisions_2": 14.7175, "eval_precisions_3": 7.5592, "eval_precisions_4": 4.2044, "eval_ref_len": 21250, "eval_rouge1": 0.3201, "eval_rouge2": 0.1437, "eval_rougeL": 0.3081, "eval_rougeLsum": 0.3077, "eval_runtime": 1357.6078, "eval_samples_per_second": 1.623, "eval_steps_per_second": 0.812, "eval_sys_len": 18219, "eval_totals_1": 18219, "eval_totals_2": 16015, "eval_totals_3": 13811, "eval_totals_4": 11607, "step": 1237 }, { "epoch": 17.99, "learning_rate": 0.0001, "loss": 1.7503, "step": 1309 }, { "epoch": 17.99, "eval_bleu": 10.4354, "eval_bp": 0.8498, "eval_counts_1": 7571, "eval_counts_2": 2487, "eval_counts_3": 1114, "eval_counts_4": 515, "eval_exact_match": 0.0145, "eval_f1": 0.3265, "eval_gen_len": 13.9106, "eval_loss": 2.238603353500366, "eval_precisions_1": 41.4282, "eval_precisions_2": 15.4751, "eval_precisions_3": 8.0335, "eval_precisions_4": 4.4157, "eval_ref_len": 21250, "eval_rouge1": 0.3289, "eval_rouge2": 0.1512, "eval_rougeL": 0.3153, "eval_rougeLsum": 0.3151, "eval_runtime": 1353.0462, "eval_samples_per_second": 1.629, "eval_steps_per_second": 0.814, "eval_sys_len": 18275, "eval_totals_1": 18275, "eval_totals_2": 16071, "eval_totals_3": 13867, "eval_totals_4": 11663, "step": 1309 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 1.6342, "step": 1382 }, { "epoch": 18.99, "eval_bleu": 10.7447, "eval_bp": 0.8418, "eval_counts_1": 7697, "eval_counts_2": 2536, "eval_counts_3": 1155, "eval_counts_4": 537, "eval_exact_match": 0.0177, "eval_f1": 0.3313, "eval_gen_len": 13.8494, "eval_loss": 2.2182679176330566, "eval_precisions_1": 42.4568, "eval_precisions_2": 15.9246, "eval_precisions_3": 8.4178, "eval_precisions_4": 4.6627, "eval_ref_len": 21250, "eval_rouge1": 0.3342, "eval_rouge2": 0.1559, "eval_rougeL": 0.3224, "eval_rougeLsum": 0.3222, "eval_runtime": 1333.2607, "eval_samples_per_second": 1.653, "eval_steps_per_second": 0.827, "eval_sys_len": 18129, "eval_totals_1": 18129, "eval_totals_2": 15925, "eval_totals_3": 13721, "eval_totals_4": 11517, "step": 1382 }, { "epoch": 19.79, "learning_rate": 0.0001, "loss": 1.5474, "step": 1440 }, { "epoch": 19.79, "eval_bleu": 11.1066, "eval_bp": 0.8786, "eval_counts_1": 7879, "eval_counts_2": 2632, "eval_counts_3": 1187, "eval_counts_4": 570, "eval_exact_match": 0.0177, "eval_f1": 0.3375, "eval_gen_len": 14.5136, "eval_loss": 2.1956045627593994, "eval_precisions_1": 41.8762, "eval_precisions_2": 15.8449, "eval_precisions_3": 8.2391, "eval_precisions_4": 4.671, "eval_ref_len": 21250, "eval_rouge1": 0.3398, "eval_rouge2": 0.1607, "eval_rougeL": 0.326, "eval_rougeLsum": 0.326, "eval_runtime": 1394.5803, "eval_samples_per_second": 1.58, "eval_steps_per_second": 0.79, "eval_sys_len": 18815, "eval_totals_1": 18815, "eval_totals_2": 16611, "eval_totals_3": 14407, "eval_totals_4": 12203, "step": 1440 }, { "epoch": 19.79, "step": 1440, "total_flos": 8.496574887886848e+17, "train_loss": 3.111723126305474, "train_runtime": 93678.1212, "train_samples_per_second": 1.989, "train_steps_per_second": 0.015 } ], "logging_steps": 500, "max_steps": 1440, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8.496574887886848e+17, "trial_name": null, "trial_params": null }