|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.926991625509984, |
|
"eval_steps": 500, |
|
"global_step": 2900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001, |
|
"loss": 6.5987, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.1374, |
|
"eval_bp": 1.0, |
|
"eval_counts_1": 3804, |
|
"eval_counts_2": 134, |
|
"eval_counts_3": 2, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0814, |
|
"eval_gen_len": 16.2899, |
|
"eval_loss": 5.069606304168701, |
|
"eval_precisions_1": 16.6019, |
|
"eval_precisions_2": 0.6471, |
|
"eval_precisions_3": 0.0108, |
|
"eval_precisions_4": 0.0031, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0783, |
|
"eval_rouge2": 0.007, |
|
"eval_rougeL": 0.0769, |
|
"eval_rougeLsum": 0.0768, |
|
"eval_runtime": 2008.1612, |
|
"eval_samples_per_second": 1.098, |
|
"eval_steps_per_second": 0.549, |
|
"eval_sys_len": 22913, |
|
"eval_totals_1": 22913, |
|
"eval_totals_2": 20709, |
|
"eval_totals_3": 18505, |
|
"eval_totals_4": 16301, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001, |
|
"loss": 4.7443, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.303, |
|
"eval_bp": 0.7996, |
|
"eval_counts_1": 4022, |
|
"eval_counts_2": 188, |
|
"eval_counts_3": 20, |
|
"eval_counts_4": 0, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.1073, |
|
"eval_gen_len": 12.9038, |
|
"eval_loss": 4.227029323577881, |
|
"eval_precisions_1": 23.1602, |
|
"eval_precisions_2": 1.2399, |
|
"eval_precisions_3": 0.1543, |
|
"eval_precisions_4": 0.0046, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1028, |
|
"eval_rouge2": 0.012, |
|
"eval_rougeL": 0.0991, |
|
"eval_rougeLsum": 0.099, |
|
"eval_runtime": 2942.0368, |
|
"eval_samples_per_second": 0.749, |
|
"eval_steps_per_second": 0.375, |
|
"eval_sys_len": 17366, |
|
"eval_totals_1": 17366, |
|
"eval_totals_2": 15162, |
|
"eval_totals_3": 12958, |
|
"eval_totals_4": 10754, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0001, |
|
"loss": 4.1412, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.4488, |
|
"eval_bp": 0.7507, |
|
"eval_counts_1": 3723, |
|
"eval_counts_2": 187, |
|
"eval_counts_3": 26, |
|
"eval_counts_4": 2, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0938, |
|
"eval_gen_len": 12.4769, |
|
"eval_loss": 3.7837560176849365, |
|
"eval_precisions_1": 22.5431, |
|
"eval_precisions_2": 1.3067, |
|
"eval_precisions_3": 0.2148, |
|
"eval_precisions_4": 0.0202, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0899, |
|
"eval_rouge2": 0.0124, |
|
"eval_rougeL": 0.0886, |
|
"eval_rougeLsum": 0.0884, |
|
"eval_runtime": 2963.118, |
|
"eval_samples_per_second": 0.744, |
|
"eval_steps_per_second": 0.372, |
|
"eval_sys_len": 16515, |
|
"eval_totals_1": 16515, |
|
"eval_totals_2": 14311, |
|
"eval_totals_3": 12107, |
|
"eval_totals_4": 9903, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001, |
|
"loss": 3.6791, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 1.6623, |
|
"eval_bp": 1.0, |
|
"eval_counts_1": 4576, |
|
"eval_counts_2": 549, |
|
"eval_counts_3": 134, |
|
"eval_counts_4": 26, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.1323, |
|
"eval_gen_len": 14.5676, |
|
"eval_loss": 3.4246089458465576, |
|
"eval_precisions_1": 20.9227, |
|
"eval_precisions_2": 2.7915, |
|
"eval_precisions_3": 0.7673, |
|
"eval_precisions_4": 0.1704, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1259, |
|
"eval_rouge2": 0.0296, |
|
"eval_rougeL": 0.1204, |
|
"eval_rougeLsum": 0.1201, |
|
"eval_runtime": 3118.2455, |
|
"eval_samples_per_second": 0.707, |
|
"eval_steps_per_second": 0.353, |
|
"eval_sys_len": 21871, |
|
"eval_totals_1": 21871, |
|
"eval_totals_2": 19667, |
|
"eval_totals_3": 17463, |
|
"eval_totals_4": 15259, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0001, |
|
"loss": 3.3523, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 2.4472, |
|
"eval_bp": 0.9085, |
|
"eval_counts_1": 4900, |
|
"eval_counts_2": 796, |
|
"eval_counts_3": 210, |
|
"eval_counts_4": 41, |
|
"eval_exact_match": 0.0005, |
|
"eval_f1": 0.1585, |
|
"eval_gen_len": 14.3943, |
|
"eval_loss": 3.172255277633667, |
|
"eval_precisions_1": 25.2721, |
|
"eval_precisions_2": 4.6319, |
|
"eval_precisions_3": 1.4018, |
|
"eval_precisions_4": 0.3209, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1542, |
|
"eval_rouge2": 0.0449, |
|
"eval_rougeL": 0.1486, |
|
"eval_rougeLsum": 0.1484, |
|
"eval_runtime": 3087.9672, |
|
"eval_samples_per_second": 0.714, |
|
"eval_steps_per_second": 0.357, |
|
"eval_sys_len": 19389, |
|
"eval_totals_1": 19389, |
|
"eval_totals_2": 17185, |
|
"eval_totals_3": 14981, |
|
"eval_totals_4": 12777, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0001, |
|
"loss": 3.0161, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 4.1987, |
|
"eval_bp": 0.8907, |
|
"eval_counts_1": 5633, |
|
"eval_counts_2": 1182, |
|
"eval_counts_3": 390, |
|
"eval_counts_4": 111, |
|
"eval_exact_match": 0.0045, |
|
"eval_f1": 0.2074, |
|
"eval_gen_len": 14.5789, |
|
"eval_loss": 2.926840305328369, |
|
"eval_precisions_1": 29.5773, |
|
"eval_precisions_2": 7.0186, |
|
"eval_precisions_3": 2.6645, |
|
"eval_precisions_4": 0.8928, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.204, |
|
"eval_rouge2": 0.069, |
|
"eval_rougeL": 0.196, |
|
"eval_rougeLsum": 0.1961, |
|
"eval_runtime": 3093.3528, |
|
"eval_samples_per_second": 0.712, |
|
"eval_steps_per_second": 0.356, |
|
"eval_sys_len": 19045, |
|
"eval_totals_1": 19045, |
|
"eval_totals_2": 16841, |
|
"eval_totals_3": 14637, |
|
"eval_totals_4": 12433, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.7639, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 5.3362, |
|
"eval_bp": 0.8306, |
|
"eval_counts_1": 6100, |
|
"eval_counts_2": 1461, |
|
"eval_counts_3": 499, |
|
"eval_counts_4": 165, |
|
"eval_exact_match": 0.0073, |
|
"eval_f1": 0.2431, |
|
"eval_gen_len": 13.8553, |
|
"eval_loss": 2.760089635848999, |
|
"eval_precisions_1": 34.0326, |
|
"eval_precisions_2": 9.2939, |
|
"eval_precisions_3": 3.6919, |
|
"eval_precisions_4": 1.4586, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2409, |
|
"eval_rouge2": 0.0885, |
|
"eval_rougeL": 0.2332, |
|
"eval_rougeLsum": 0.2331, |
|
"eval_runtime": 2991.0063, |
|
"eval_samples_per_second": 0.737, |
|
"eval_steps_per_second": 0.368, |
|
"eval_sys_len": 17924, |
|
"eval_totals_1": 17924, |
|
"eval_totals_2": 15720, |
|
"eval_totals_3": 13516, |
|
"eval_totals_4": 11312, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.5036, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 7.0633, |
|
"eval_bp": 0.9483, |
|
"eval_counts_1": 6765, |
|
"eval_counts_2": 1845, |
|
"eval_counts_3": 701, |
|
"eval_counts_4": 273, |
|
"eval_exact_match": 0.0059, |
|
"eval_f1": 0.2689, |
|
"eval_gen_len": 15.7232, |
|
"eval_loss": 2.572913885116577, |
|
"eval_precisions_1": 33.525, |
|
"eval_precisions_2": 10.2643, |
|
"eval_precisions_3": 4.4449, |
|
"eval_precisions_4": 2.0122, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2682, |
|
"eval_rouge2": 0.1079, |
|
"eval_rougeL": 0.2589, |
|
"eval_rougeLsum": 0.259, |
|
"eval_runtime": 3343.9439, |
|
"eval_samples_per_second": 0.659, |
|
"eval_steps_per_second": 0.33, |
|
"eval_sys_len": 20179, |
|
"eval_totals_1": 20179, |
|
"eval_totals_2": 17975, |
|
"eval_totals_3": 15771, |
|
"eval_totals_4": 13567, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.307, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_bleu": 8.1681, |
|
"eval_bp": 0.8911, |
|
"eval_counts_1": 7018, |
|
"eval_counts_2": 2047, |
|
"eval_counts_3": 826, |
|
"eval_counts_4": 348, |
|
"eval_exact_match": 0.0095, |
|
"eval_f1": 0.2907, |
|
"eval_gen_len": 14.8076, |
|
"eval_loss": 2.4636850357055664, |
|
"eval_precisions_1": 36.8322, |
|
"eval_precisions_2": 12.1484, |
|
"eval_precisions_3": 5.6398, |
|
"eval_precisions_4": 2.797, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2907, |
|
"eval_rouge2": 0.1218, |
|
"eval_rougeL": 0.2799, |
|
"eval_rougeLsum": 0.2798, |
|
"eval_runtime": 3082.8011, |
|
"eval_samples_per_second": 0.715, |
|
"eval_steps_per_second": 0.357, |
|
"eval_sys_len": 19054, |
|
"eval_totals_1": 19054, |
|
"eval_totals_2": 16850, |
|
"eval_totals_3": 14646, |
|
"eval_totals_4": 12442, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1012, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 8.6921, |
|
"eval_bp": 0.8604, |
|
"eval_counts_1": 7147, |
|
"eval_counts_2": 2127, |
|
"eval_counts_3": 883, |
|
"eval_counts_4": 389, |
|
"eval_exact_match": 0.0118, |
|
"eval_f1": 0.3008, |
|
"eval_gen_len": 14.2736, |
|
"eval_loss": 2.361370325088501, |
|
"eval_precisions_1": 38.6889, |
|
"eval_precisions_2": 13.0739, |
|
"eval_precisions_3": 6.278, |
|
"eval_precisions_4": 3.2797, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3003, |
|
"eval_rouge2": 0.1275, |
|
"eval_rougeL": 0.289, |
|
"eval_rougeLsum": 0.2888, |
|
"eval_runtime": 2980.6044, |
|
"eval_samples_per_second": 0.739, |
|
"eval_steps_per_second": 0.37, |
|
"eval_sys_len": 18473, |
|
"eval_totals_1": 18473, |
|
"eval_totals_2": 16269, |
|
"eval_totals_3": 14065, |
|
"eval_totals_4": 11861, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9538, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 9.67, |
|
"eval_bp": 0.8632, |
|
"eval_counts_1": 7481, |
|
"eval_counts_2": 2339, |
|
"eval_counts_3": 997, |
|
"eval_counts_4": 459, |
|
"eval_exact_match": 0.0127, |
|
"eval_f1": 0.3167, |
|
"eval_gen_len": 14.3757, |
|
"eval_loss": 2.297987461090088, |
|
"eval_precisions_1": 40.3854, |
|
"eval_precisions_2": 14.3321, |
|
"eval_precisions_3": 7.0629, |
|
"eval_precisions_4": 3.8533, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3192, |
|
"eval_rouge2": 0.1423, |
|
"eval_rougeL": 0.3064, |
|
"eval_rougeLsum": 0.3068, |
|
"eval_runtime": 1745.8738, |
|
"eval_samples_per_second": 1.262, |
|
"eval_steps_per_second": 0.631, |
|
"eval_sys_len": 18524, |
|
"eval_totals_1": 18524, |
|
"eval_totals_2": 16320, |
|
"eval_totals_3": 14116, |
|
"eval_totals_4": 11912, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7909, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 10.724, |
|
"eval_bp": 0.8804, |
|
"eval_counts_1": 7675, |
|
"eval_counts_2": 2546, |
|
"eval_counts_3": 1144, |
|
"eval_counts_4": 546, |
|
"eval_exact_match": 0.015, |
|
"eval_f1": 0.3279, |
|
"eval_gen_len": 14.583, |
|
"eval_loss": 2.2389414310455322, |
|
"eval_precisions_1": 40.7183, |
|
"eval_precisions_2": 15.2959, |
|
"eval_precisions_3": 7.9219, |
|
"eval_precisions_4": 4.4619, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3299, |
|
"eval_rouge2": 0.1528, |
|
"eval_rougeL": 0.3174, |
|
"eval_rougeLsum": 0.3175, |
|
"eval_runtime": 1768.3367, |
|
"eval_samples_per_second": 1.246, |
|
"eval_steps_per_second": 0.623, |
|
"eval_sys_len": 18849, |
|
"eval_totals_1": 18849, |
|
"eval_totals_2": 16645, |
|
"eval_totals_3": 14441, |
|
"eval_totals_4": 12237, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6691, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_bleu": 11.1241, |
|
"eval_bp": 0.8695, |
|
"eval_counts_1": 7858, |
|
"eval_counts_2": 2635, |
|
"eval_counts_3": 1179, |
|
"eval_counts_4": 576, |
|
"eval_exact_match": 0.0163, |
|
"eval_f1": 0.3395, |
|
"eval_gen_len": 14.3848, |
|
"eval_loss": 2.181286096572876, |
|
"eval_precisions_1": 42.1499, |
|
"eval_precisions_2": 16.029, |
|
"eval_precisions_3": 8.2824, |
|
"eval_precisions_4": 4.7876, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.344, |
|
"eval_rouge2": 0.1626, |
|
"eval_rougeL": 0.33, |
|
"eval_rougeLsum": 0.33, |
|
"eval_runtime": 1475.7204, |
|
"eval_samples_per_second": 1.494, |
|
"eval_steps_per_second": 0.747, |
|
"eval_sys_len": 18643, |
|
"eval_totals_1": 18643, |
|
"eval_totals_2": 16439, |
|
"eval_totals_3": 14235, |
|
"eval_totals_4": 12031, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5361, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 11.5803, |
|
"eval_bp": 0.8754, |
|
"eval_counts_1": 8016, |
|
"eval_counts_2": 2729, |
|
"eval_counts_3": 1249, |
|
"eval_counts_4": 606, |
|
"eval_exact_match": 0.0163, |
|
"eval_f1": 0.3462, |
|
"eval_gen_len": 14.564, |
|
"eval_loss": 2.15460205078125, |
|
"eval_precisions_1": 42.7429, |
|
"eval_precisions_2": 16.4894, |
|
"eval_precisions_3": 8.7063, |
|
"eval_precisions_4": 4.9909, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3494, |
|
"eval_rouge2": 0.1664, |
|
"eval_rougeL": 0.3349, |
|
"eval_rougeLsum": 0.3351, |
|
"eval_runtime": 2521.9472, |
|
"eval_samples_per_second": 0.874, |
|
"eval_steps_per_second": 0.437, |
|
"eval_sys_len": 18754, |
|
"eval_totals_1": 18754, |
|
"eval_totals_2": 16550, |
|
"eval_totals_3": 14346, |
|
"eval_totals_4": 12142, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4365, |
|
"step": 2182 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_bleu": 12.1055, |
|
"eval_bp": 0.856, |
|
"eval_counts_1": 8112, |
|
"eval_counts_2": 2839, |
|
"eval_counts_3": 1316, |
|
"eval_counts_4": 647, |
|
"eval_exact_match": 0.02, |
|
"eval_f1": 0.3538, |
|
"eval_gen_len": 14.1656, |
|
"eval_loss": 2.1357789039611816, |
|
"eval_precisions_1": 44.1109, |
|
"eval_precisions_2": 17.5398, |
|
"eval_precisions_3": 9.4121, |
|
"eval_precisions_4": 5.4933, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3581, |
|
"eval_rouge2": 0.1761, |
|
"eval_rougeL": 0.3448, |
|
"eval_rougeLsum": 0.3448, |
|
"eval_runtime": 2133.8974, |
|
"eval_samples_per_second": 1.033, |
|
"eval_steps_per_second": 0.516, |
|
"eval_sys_len": 18390, |
|
"eval_totals_1": 18390, |
|
"eval_totals_2": 16186, |
|
"eval_totals_3": 13982, |
|
"eval_totals_4": 11778, |
|
"step": 2182 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3263, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 12.9765, |
|
"eval_bp": 0.8827, |
|
"eval_counts_1": 8381, |
|
"eval_counts_2": 2990, |
|
"eval_counts_3": 1430, |
|
"eval_counts_4": 731, |
|
"eval_exact_match": 0.0209, |
|
"eval_f1": 0.363, |
|
"eval_gen_len": 14.5445, |
|
"eval_loss": 2.1189985275268555, |
|
"eval_precisions_1": 44.3627, |
|
"eval_precisions_2": 17.9171, |
|
"eval_precisions_3": 9.873, |
|
"eval_precisions_4": 5.9528, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3681, |
|
"eval_rouge2": 0.1831, |
|
"eval_rougeL": 0.3532, |
|
"eval_rougeLsum": 0.3534, |
|
"eval_runtime": 1849.5796, |
|
"eval_samples_per_second": 1.192, |
|
"eval_steps_per_second": 0.596, |
|
"eval_sys_len": 18892, |
|
"eval_totals_1": 18892, |
|
"eval_totals_2": 16688, |
|
"eval_totals_3": 14484, |
|
"eval_totals_4": 12280, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2329, |
|
"step": 2474 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 13.5903, |
|
"eval_bp": 0.8678, |
|
"eval_counts_1": 8449, |
|
"eval_counts_2": 3101, |
|
"eval_counts_3": 1520, |
|
"eval_counts_4": 786, |
|
"eval_exact_match": 0.0227, |
|
"eval_f1": 0.3692, |
|
"eval_gen_len": 14.1779, |
|
"eval_loss": 2.1201868057250977, |
|
"eval_precisions_1": 45.3954, |
|
"eval_precisions_2": 18.8993, |
|
"eval_precisions_3": 10.7012, |
|
"eval_precisions_4": 6.55, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3743, |
|
"eval_rouge2": 0.1901, |
|
"eval_rougeL": 0.3603, |
|
"eval_rougeLsum": 0.3603, |
|
"eval_runtime": 1363.814, |
|
"eval_samples_per_second": 1.616, |
|
"eval_steps_per_second": 0.808, |
|
"eval_sys_len": 18612, |
|
"eval_totals_1": 18612, |
|
"eval_totals_2": 16408, |
|
"eval_totals_3": 14204, |
|
"eval_totals_4": 12000, |
|
"step": 2474 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1557, |
|
"step": 2619 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 13.8388, |
|
"eval_bp": 0.8325, |
|
"eval_counts_1": 8406, |
|
"eval_counts_2": 3154, |
|
"eval_counts_3": 1558, |
|
"eval_counts_4": 804, |
|
"eval_exact_match": 0.0277, |
|
"eval_f1": 0.371, |
|
"eval_gen_len": 13.677, |
|
"eval_loss": 2.1282455921173096, |
|
"eval_precisions_1": 46.8092, |
|
"eval_precisions_2": 20.0203, |
|
"eval_precisions_3": 11.4982, |
|
"eval_precisions_4": 7.0862, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3761, |
|
"eval_rouge2": 0.194, |
|
"eval_rougeL": 0.3633, |
|
"eval_rougeLsum": 0.3636, |
|
"eval_runtime": 1323.8829, |
|
"eval_samples_per_second": 1.665, |
|
"eval_steps_per_second": 0.832, |
|
"eval_sys_len": 17958, |
|
"eval_totals_1": 17958, |
|
"eval_totals_2": 15754, |
|
"eval_totals_3": 13550, |
|
"eval_totals_4": 11346, |
|
"step": 2619 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0658, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 14.2084, |
|
"eval_bp": 0.886, |
|
"eval_counts_1": 8614, |
|
"eval_counts_2": 3241, |
|
"eval_counts_3": 1610, |
|
"eval_counts_4": 839, |
|
"eval_exact_match": 0.0272, |
|
"eval_f1": 0.3749, |
|
"eval_gen_len": 14.3816, |
|
"eval_loss": 2.123244524002075, |
|
"eval_precisions_1": 45.4445, |
|
"eval_precisions_2": 19.3481, |
|
"eval_precisions_3": 11.0676, |
|
"eval_precisions_4": 6.7974, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3803, |
|
"eval_rouge2": 0.196, |
|
"eval_rougeL": 0.3654, |
|
"eval_rougeLsum": 0.3656, |
|
"eval_runtime": 1378.8855, |
|
"eval_samples_per_second": 1.598, |
|
"eval_steps_per_second": 0.799, |
|
"eval_sys_len": 18955, |
|
"eval_totals_1": 18955, |
|
"eval_totals_2": 16751, |
|
"eval_totals_3": 14547, |
|
"eval_totals_4": 12343, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 19.93, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9944, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 19.93, |
|
"eval_bleu": 14.3883, |
|
"eval_bp": 0.8806, |
|
"eval_counts_1": 8658, |
|
"eval_counts_2": 3273, |
|
"eval_counts_3": 1625, |
|
"eval_counts_4": 859, |
|
"eval_exact_match": 0.0268, |
|
"eval_f1": 0.3775, |
|
"eval_gen_len": 14.2881, |
|
"eval_loss": 2.1203458309173584, |
|
"eval_precisions_1": 45.9237, |
|
"eval_precisions_2": 19.6588, |
|
"eval_precisions_3": 11.2496, |
|
"eval_precisions_4": 7.0174, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3833, |
|
"eval_rouge2": 0.1977, |
|
"eval_rougeL": 0.369, |
|
"eval_rougeLsum": 0.3691, |
|
"eval_runtime": 1364.695, |
|
"eval_samples_per_second": 1.615, |
|
"eval_steps_per_second": 0.808, |
|
"eval_sys_len": 18853, |
|
"eval_totals_1": 18853, |
|
"eval_totals_2": 16649, |
|
"eval_totals_3": 14445, |
|
"eval_totals_4": 12241, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 19.93, |
|
"step": 2900, |
|
"total_flos": 8.55557888016384e+17, |
|
"train_loss": 2.472949571280644, |
|
"train_runtime": 103540.1577, |
|
"train_samples_per_second": 1.799, |
|
"train_steps_per_second": 0.028 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2900, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8.55557888016384e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|