{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.926991625509984, "eval_steps": 500, "global_step": 2900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.0001, "loss": 6.5987, "step": 145 }, { "epoch": 1.0, "eval_bleu": 0.1374, "eval_bp": 1.0, "eval_counts_1": 3804, "eval_counts_2": 134, "eval_counts_3": 2, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.0814, "eval_gen_len": 16.2899, "eval_loss": 5.069606304168701, "eval_precisions_1": 16.6019, "eval_precisions_2": 0.6471, "eval_precisions_3": 0.0108, "eval_precisions_4": 0.0031, "eval_ref_len": 21250, "eval_rouge1": 0.0783, "eval_rouge2": 0.007, "eval_rougeL": 0.0769, "eval_rougeLsum": 0.0768, "eval_runtime": 2008.1612, "eval_samples_per_second": 1.098, "eval_steps_per_second": 0.549, "eval_sys_len": 22913, "eval_totals_1": 22913, "eval_totals_2": 20709, "eval_totals_3": 18505, "eval_totals_4": 16301, "step": 145 }, { "epoch": 2.0, "learning_rate": 0.0001, "loss": 4.7443, "step": 291 }, { "epoch": 2.0, "eval_bleu": 0.303, "eval_bp": 0.7996, "eval_counts_1": 4022, "eval_counts_2": 188, "eval_counts_3": 20, "eval_counts_4": 0, "eval_exact_match": 0.0, "eval_f1": 0.1073, "eval_gen_len": 12.9038, "eval_loss": 4.227029323577881, "eval_precisions_1": 23.1602, "eval_precisions_2": 1.2399, "eval_precisions_3": 0.1543, "eval_precisions_4": 0.0046, "eval_ref_len": 21250, "eval_rouge1": 0.1028, "eval_rouge2": 0.012, "eval_rougeL": 0.0991, "eval_rougeLsum": 0.099, "eval_runtime": 2942.0368, "eval_samples_per_second": 0.749, "eval_steps_per_second": 0.375, "eval_sys_len": 17366, "eval_totals_1": 17366, "eval_totals_2": 15162, "eval_totals_3": 12958, "eval_totals_4": 10754, "step": 291 }, { "epoch": 3.0, "learning_rate": 0.0001, "loss": 4.1412, "step": 436 }, { "epoch": 3.0, "eval_bleu": 0.4488, "eval_bp": 0.7507, "eval_counts_1": 3723, "eval_counts_2": 187, "eval_counts_3": 26, "eval_counts_4": 2, "eval_exact_match": 0.0, "eval_f1": 0.0938, "eval_gen_len": 12.4769, "eval_loss": 3.7837560176849365, "eval_precisions_1": 22.5431, "eval_precisions_2": 1.3067, "eval_precisions_3": 0.2148, "eval_precisions_4": 0.0202, "eval_ref_len": 21250, "eval_rouge1": 0.0899, "eval_rouge2": 0.0124, "eval_rougeL": 0.0886, "eval_rougeLsum": 0.0884, "eval_runtime": 2963.118, "eval_samples_per_second": 0.744, "eval_steps_per_second": 0.372, "eval_sys_len": 16515, "eval_totals_1": 16515, "eval_totals_2": 14311, "eval_totals_3": 12107, "eval_totals_4": 9903, "step": 436 }, { "epoch": 4.0, "learning_rate": 0.0001, "loss": 3.6791, "step": 582 }, { "epoch": 4.0, "eval_bleu": 1.6623, "eval_bp": 1.0, "eval_counts_1": 4576, "eval_counts_2": 549, "eval_counts_3": 134, "eval_counts_4": 26, "eval_exact_match": 0.0, "eval_f1": 0.1323, "eval_gen_len": 14.5676, "eval_loss": 3.4246089458465576, "eval_precisions_1": 20.9227, "eval_precisions_2": 2.7915, "eval_precisions_3": 0.7673, "eval_precisions_4": 0.1704, "eval_ref_len": 21250, "eval_rouge1": 0.1259, "eval_rouge2": 0.0296, "eval_rougeL": 0.1204, "eval_rougeLsum": 0.1201, "eval_runtime": 3118.2455, "eval_samples_per_second": 0.707, "eval_steps_per_second": 0.353, "eval_sys_len": 21871, "eval_totals_1": 21871, "eval_totals_2": 19667, "eval_totals_3": 17463, "eval_totals_4": 15259, "step": 582 }, { "epoch": 5.0, "learning_rate": 0.0001, "loss": 3.3523, "step": 727 }, { "epoch": 5.0, "eval_bleu": 2.4472, "eval_bp": 0.9085, "eval_counts_1": 4900, "eval_counts_2": 796, "eval_counts_3": 210, "eval_counts_4": 41, "eval_exact_match": 0.0005, "eval_f1": 0.1585, "eval_gen_len": 14.3943, "eval_loss": 3.172255277633667, "eval_precisions_1": 25.2721, "eval_precisions_2": 4.6319, "eval_precisions_3": 1.4018, "eval_precisions_4": 0.3209, "eval_ref_len": 21250, "eval_rouge1": 0.1542, "eval_rouge2": 0.0449, "eval_rougeL": 0.1486, "eval_rougeLsum": 0.1484, "eval_runtime": 3087.9672, "eval_samples_per_second": 0.714, "eval_steps_per_second": 0.357, "eval_sys_len": 19389, "eval_totals_1": 19389, "eval_totals_2": 17185, "eval_totals_3": 14981, "eval_totals_4": 12777, "step": 727 }, { "epoch": 6.0, "learning_rate": 0.0001, "loss": 3.0161, "step": 873 }, { "epoch": 6.0, "eval_bleu": 4.1987, "eval_bp": 0.8907, "eval_counts_1": 5633, "eval_counts_2": 1182, "eval_counts_3": 390, "eval_counts_4": 111, "eval_exact_match": 0.0045, "eval_f1": 0.2074, "eval_gen_len": 14.5789, "eval_loss": 2.926840305328369, "eval_precisions_1": 29.5773, "eval_precisions_2": 7.0186, "eval_precisions_3": 2.6645, "eval_precisions_4": 0.8928, "eval_ref_len": 21250, "eval_rouge1": 0.204, "eval_rouge2": 0.069, "eval_rougeL": 0.196, "eval_rougeLsum": 0.1961, "eval_runtime": 3093.3528, "eval_samples_per_second": 0.712, "eval_steps_per_second": 0.356, "eval_sys_len": 19045, "eval_totals_1": 19045, "eval_totals_2": 16841, "eval_totals_3": 14637, "eval_totals_4": 12433, "step": 873 }, { "epoch": 7.0, "learning_rate": 0.0001, "loss": 2.7639, "step": 1018 }, { "epoch": 7.0, "eval_bleu": 5.3362, "eval_bp": 0.8306, "eval_counts_1": 6100, "eval_counts_2": 1461, "eval_counts_3": 499, "eval_counts_4": 165, "eval_exact_match": 0.0073, "eval_f1": 0.2431, "eval_gen_len": 13.8553, "eval_loss": 2.760089635848999, "eval_precisions_1": 34.0326, "eval_precisions_2": 9.2939, "eval_precisions_3": 3.6919, "eval_precisions_4": 1.4586, "eval_ref_len": 21250, "eval_rouge1": 0.2409, "eval_rouge2": 0.0885, "eval_rougeL": 0.2332, "eval_rougeLsum": 0.2331, "eval_runtime": 2991.0063, "eval_samples_per_second": 0.737, "eval_steps_per_second": 0.368, "eval_sys_len": 17924, "eval_totals_1": 17924, "eval_totals_2": 15720, "eval_totals_3": 13516, "eval_totals_4": 11312, "step": 1018 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 2.5036, "step": 1164 }, { "epoch": 8.0, "eval_bleu": 7.0633, "eval_bp": 0.9483, "eval_counts_1": 6765, "eval_counts_2": 1845, "eval_counts_3": 701, "eval_counts_4": 273, "eval_exact_match": 0.0059, "eval_f1": 0.2689, "eval_gen_len": 15.7232, "eval_loss": 2.572913885116577, "eval_precisions_1": 33.525, "eval_precisions_2": 10.2643, "eval_precisions_3": 4.4449, "eval_precisions_4": 2.0122, "eval_ref_len": 21250, "eval_rouge1": 0.2682, "eval_rouge2": 0.1079, "eval_rougeL": 0.2589, "eval_rougeLsum": 0.259, "eval_runtime": 3343.9439, "eval_samples_per_second": 0.659, "eval_steps_per_second": 0.33, "eval_sys_len": 20179, "eval_totals_1": 20179, "eval_totals_2": 17975, "eval_totals_3": 15771, "eval_totals_4": 13567, "step": 1164 }, { "epoch": 8.99, "learning_rate": 0.0001, "loss": 2.307, "step": 1309 }, { "epoch": 8.99, "eval_bleu": 8.1681, "eval_bp": 0.8911, "eval_counts_1": 7018, "eval_counts_2": 2047, "eval_counts_3": 826, "eval_counts_4": 348, "eval_exact_match": 0.0095, "eval_f1": 0.2907, "eval_gen_len": 14.8076, "eval_loss": 2.4636850357055664, "eval_precisions_1": 36.8322, "eval_precisions_2": 12.1484, "eval_precisions_3": 5.6398, "eval_precisions_4": 2.797, "eval_ref_len": 21250, "eval_rouge1": 0.2907, "eval_rouge2": 0.1218, "eval_rougeL": 0.2799, "eval_rougeLsum": 0.2798, "eval_runtime": 3082.8011, "eval_samples_per_second": 0.715, "eval_steps_per_second": 0.357, "eval_sys_len": 19054, "eval_totals_1": 19054, "eval_totals_2": 16850, "eval_totals_3": 14646, "eval_totals_4": 12442, "step": 1309 }, { "epoch": 10.0, "learning_rate": 0.0001, "loss": 2.1012, "step": 1455 }, { "epoch": 10.0, "eval_bleu": 8.6921, "eval_bp": 0.8604, "eval_counts_1": 7147, "eval_counts_2": 2127, "eval_counts_3": 883, "eval_counts_4": 389, "eval_exact_match": 0.0118, "eval_f1": 0.3008, "eval_gen_len": 14.2736, "eval_loss": 2.361370325088501, "eval_precisions_1": 38.6889, "eval_precisions_2": 13.0739, "eval_precisions_3": 6.278, "eval_precisions_4": 3.2797, "eval_ref_len": 21250, "eval_rouge1": 0.3003, "eval_rouge2": 0.1275, "eval_rougeL": 0.289, "eval_rougeLsum": 0.2888, "eval_runtime": 2980.6044, "eval_samples_per_second": 0.739, "eval_steps_per_second": 0.37, "eval_sys_len": 18473, "eval_totals_1": 18473, "eval_totals_2": 16269, "eval_totals_3": 14065, "eval_totals_4": 11861, "step": 1455 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 1.9538, "step": 1600 }, { "epoch": 10.99, "eval_bleu": 9.67, "eval_bp": 0.8632, "eval_counts_1": 7481, "eval_counts_2": 2339, "eval_counts_3": 997, "eval_counts_4": 459, "eval_exact_match": 0.0127, "eval_f1": 0.3167, "eval_gen_len": 14.3757, "eval_loss": 2.297987461090088, "eval_precisions_1": 40.3854, "eval_precisions_2": 14.3321, "eval_precisions_3": 7.0629, "eval_precisions_4": 3.8533, "eval_ref_len": 21250, "eval_rouge1": 0.3192, "eval_rouge2": 0.1423, "eval_rougeL": 0.3064, "eval_rougeLsum": 0.3068, "eval_runtime": 1745.8738, "eval_samples_per_second": 1.262, "eval_steps_per_second": 0.631, "eval_sys_len": 18524, "eval_totals_1": 18524, "eval_totals_2": 16320, "eval_totals_3": 14116, "eval_totals_4": 11912, "step": 1600 }, { "epoch": 12.0, "learning_rate": 0.0001, "loss": 1.7909, "step": 1746 }, { "epoch": 12.0, "eval_bleu": 10.724, "eval_bp": 0.8804, "eval_counts_1": 7675, "eval_counts_2": 2546, "eval_counts_3": 1144, "eval_counts_4": 546, "eval_exact_match": 0.015, "eval_f1": 0.3279, "eval_gen_len": 14.583, "eval_loss": 2.2389414310455322, "eval_precisions_1": 40.7183, "eval_precisions_2": 15.2959, "eval_precisions_3": 7.9219, "eval_precisions_4": 4.4619, "eval_ref_len": 21250, "eval_rouge1": 0.3299, "eval_rouge2": 0.1528, "eval_rougeL": 0.3174, "eval_rougeLsum": 0.3175, "eval_runtime": 1768.3367, "eval_samples_per_second": 1.246, "eval_steps_per_second": 0.623, "eval_sys_len": 18849, "eval_totals_1": 18849, "eval_totals_2": 16645, "eval_totals_3": 14441, "eval_totals_4": 12237, "step": 1746 }, { "epoch": 12.99, "learning_rate": 0.0001, "loss": 1.6691, "step": 1891 }, { "epoch": 12.99, "eval_bleu": 11.1241, "eval_bp": 0.8695, "eval_counts_1": 7858, "eval_counts_2": 2635, "eval_counts_3": 1179, "eval_counts_4": 576, "eval_exact_match": 0.0163, "eval_f1": 0.3395, "eval_gen_len": 14.3848, "eval_loss": 2.181286096572876, "eval_precisions_1": 42.1499, "eval_precisions_2": 16.029, "eval_precisions_3": 8.2824, "eval_precisions_4": 4.7876, "eval_ref_len": 21250, "eval_rouge1": 0.344, "eval_rouge2": 0.1626, "eval_rougeL": 0.33, "eval_rougeLsum": 0.33, "eval_runtime": 1475.7204, "eval_samples_per_second": 1.494, "eval_steps_per_second": 0.747, "eval_sys_len": 18643, "eval_totals_1": 18643, "eval_totals_2": 16439, "eval_totals_3": 14235, "eval_totals_4": 12031, "step": 1891 }, { "epoch": 14.0, "learning_rate": 0.0001, "loss": 1.5361, "step": 2037 }, { "epoch": 14.0, "eval_bleu": 11.5803, "eval_bp": 0.8754, "eval_counts_1": 8016, "eval_counts_2": 2729, "eval_counts_3": 1249, "eval_counts_4": 606, "eval_exact_match": 0.0163, "eval_f1": 0.3462, "eval_gen_len": 14.564, "eval_loss": 2.15460205078125, "eval_precisions_1": 42.7429, "eval_precisions_2": 16.4894, "eval_precisions_3": 8.7063, "eval_precisions_4": 4.9909, "eval_ref_len": 21250, "eval_rouge1": 0.3494, "eval_rouge2": 0.1664, "eval_rougeL": 0.3349, "eval_rougeLsum": 0.3351, "eval_runtime": 2521.9472, "eval_samples_per_second": 0.874, "eval_steps_per_second": 0.437, "eval_sys_len": 18754, "eval_totals_1": 18754, "eval_totals_2": 16550, "eval_totals_3": 14346, "eval_totals_4": 12142, "step": 2037 }, { "epoch": 14.99, "learning_rate": 0.0001, "loss": 1.4365, "step": 2182 }, { "epoch": 14.99, "eval_bleu": 12.1055, "eval_bp": 0.856, "eval_counts_1": 8112, "eval_counts_2": 2839, "eval_counts_3": 1316, "eval_counts_4": 647, "eval_exact_match": 0.02, "eval_f1": 0.3538, "eval_gen_len": 14.1656, "eval_loss": 2.1357789039611816, "eval_precisions_1": 44.1109, "eval_precisions_2": 17.5398, "eval_precisions_3": 9.4121, "eval_precisions_4": 5.4933, "eval_ref_len": 21250, "eval_rouge1": 0.3581, "eval_rouge2": 0.1761, "eval_rougeL": 0.3448, "eval_rougeLsum": 0.3448, "eval_runtime": 2133.8974, "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.516, "eval_sys_len": 18390, "eval_totals_1": 18390, "eval_totals_2": 16186, "eval_totals_3": 13982, "eval_totals_4": 11778, "step": 2182 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 1.3263, "step": 2328 }, { "epoch": 16.0, "eval_bleu": 12.9765, "eval_bp": 0.8827, "eval_counts_1": 8381, "eval_counts_2": 2990, "eval_counts_3": 1430, "eval_counts_4": 731, "eval_exact_match": 0.0209, "eval_f1": 0.363, "eval_gen_len": 14.5445, "eval_loss": 2.1189985275268555, "eval_precisions_1": 44.3627, "eval_precisions_2": 17.9171, "eval_precisions_3": 9.873, "eval_precisions_4": 5.9528, "eval_ref_len": 21250, "eval_rouge1": 0.3681, "eval_rouge2": 0.1831, "eval_rougeL": 0.3532, "eval_rougeLsum": 0.3534, "eval_runtime": 1849.5796, "eval_samples_per_second": 1.192, "eval_steps_per_second": 0.596, "eval_sys_len": 18892, "eval_totals_1": 18892, "eval_totals_2": 16688, "eval_totals_3": 14484, "eval_totals_4": 12280, "step": 2328 }, { "epoch": 17.0, "learning_rate": 0.0001, "loss": 1.2329, "step": 2474 }, { "epoch": 17.0, "eval_bleu": 13.5903, "eval_bp": 0.8678, "eval_counts_1": 8449, "eval_counts_2": 3101, "eval_counts_3": 1520, "eval_counts_4": 786, "eval_exact_match": 0.0227, "eval_f1": 0.3692, "eval_gen_len": 14.1779, "eval_loss": 2.1201868057250977, "eval_precisions_1": 45.3954, "eval_precisions_2": 18.8993, "eval_precisions_3": 10.7012, "eval_precisions_4": 6.55, "eval_ref_len": 21250, "eval_rouge1": 0.3743, "eval_rouge2": 0.1901, "eval_rougeL": 0.3603, "eval_rougeLsum": 0.3603, "eval_runtime": 1363.814, "eval_samples_per_second": 1.616, "eval_steps_per_second": 0.808, "eval_sys_len": 18612, "eval_totals_1": 18612, "eval_totals_2": 16408, "eval_totals_3": 14204, "eval_totals_4": 12000, "step": 2474 }, { "epoch": 18.0, "learning_rate": 0.0001, "loss": 1.1557, "step": 2619 }, { "epoch": 18.0, "eval_bleu": 13.8388, "eval_bp": 0.8325, "eval_counts_1": 8406, "eval_counts_2": 3154, "eval_counts_3": 1558, "eval_counts_4": 804, "eval_exact_match": 0.0277, "eval_f1": 0.371, "eval_gen_len": 13.677, "eval_loss": 2.1282455921173096, "eval_precisions_1": 46.8092, "eval_precisions_2": 20.0203, "eval_precisions_3": 11.4982, "eval_precisions_4": 7.0862, "eval_ref_len": 21250, "eval_rouge1": 0.3761, "eval_rouge2": 0.194, "eval_rougeL": 0.3633, "eval_rougeLsum": 0.3636, "eval_runtime": 1323.8829, "eval_samples_per_second": 1.665, "eval_steps_per_second": 0.832, "eval_sys_len": 17958, "eval_totals_1": 17958, "eval_totals_2": 15754, "eval_totals_3": 13550, "eval_totals_4": 11346, "step": 2619 }, { "epoch": 19.0, "learning_rate": 0.0001, "loss": 1.0658, "step": 2765 }, { "epoch": 19.0, "eval_bleu": 14.2084, "eval_bp": 0.886, "eval_counts_1": 8614, "eval_counts_2": 3241, "eval_counts_3": 1610, "eval_counts_4": 839, "eval_exact_match": 0.0272, "eval_f1": 0.3749, "eval_gen_len": 14.3816, "eval_loss": 2.123244524002075, "eval_precisions_1": 45.4445, "eval_precisions_2": 19.3481, "eval_precisions_3": 11.0676, "eval_precisions_4": 6.7974, "eval_ref_len": 21250, "eval_rouge1": 0.3803, "eval_rouge2": 0.196, "eval_rougeL": 0.3654, "eval_rougeLsum": 0.3656, "eval_runtime": 1378.8855, "eval_samples_per_second": 1.598, "eval_steps_per_second": 0.799, "eval_sys_len": 18955, "eval_totals_1": 18955, "eval_totals_2": 16751, "eval_totals_3": 14547, "eval_totals_4": 12343, "step": 2765 }, { "epoch": 19.93, "learning_rate": 0.0001, "loss": 0.9944, "step": 2900 }, { "epoch": 19.93, "eval_bleu": 14.3883, "eval_bp": 0.8806, "eval_counts_1": 8658, "eval_counts_2": 3273, "eval_counts_3": 1625, "eval_counts_4": 859, "eval_exact_match": 0.0268, "eval_f1": 0.3775, "eval_gen_len": 14.2881, "eval_loss": 2.1203458309173584, "eval_precisions_1": 45.9237, "eval_precisions_2": 19.6588, "eval_precisions_3": 11.2496, "eval_precisions_4": 7.0174, "eval_ref_len": 21250, "eval_rouge1": 0.3833, "eval_rouge2": 0.1977, "eval_rougeL": 0.369, "eval_rougeLsum": 0.3691, "eval_runtime": 1364.695, "eval_samples_per_second": 1.615, "eval_steps_per_second": 0.808, "eval_sys_len": 18853, "eval_totals_1": 18853, "eval_totals_2": 16649, "eval_totals_3": 14445, "eval_totals_4": 12241, "step": 2900 }, { "epoch": 19.93, "step": 2900, "total_flos": 8.55557888016384e+17, "train_loss": 2.472949571280644, "train_runtime": 103540.1577, "train_samples_per_second": 1.799, "train_steps_per_second": 0.028 } ], "logging_steps": 500, "max_steps": 2900, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8.55557888016384e+17, "trial_name": null, "trial_params": null }