|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.914163090128756, |
|
"eval_steps": 500, |
|
"global_step": 2900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001, |
|
"loss": 3.1671, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 5.9441, |
|
"eval_bp": 0.7156, |
|
"eval_counts_1": 6177, |
|
"eval_counts_2": 1669, |
|
"eval_counts_3": 604, |
|
"eval_counts_4": 179, |
|
"eval_exact_match": 0.0023, |
|
"eval_f1": 0.2528, |
|
"eval_gen_len": 12.0218, |
|
"eval_loss": 2.190216541290283, |
|
"eval_precisions_1": 38.7954, |
|
"eval_precisions_2": 12.1665, |
|
"eval_precisions_3": 5.2458, |
|
"eval_precisions_4": 1.9227, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2595, |
|
"eval_rouge2": 0.1035, |
|
"eval_rougeL": 0.2491, |
|
"eval_rougeLsum": 0.2492, |
|
"eval_runtime": 793.0147, |
|
"eval_samples_per_second": 2.779, |
|
"eval_steps_per_second": 0.695, |
|
"eval_sys_len": 15922, |
|
"eval_totals_1": 15922, |
|
"eval_totals_2": 13718, |
|
"eval_totals_3": 11514, |
|
"eval_totals_4": 9310, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.5597, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 7.7787, |
|
"eval_bp": 0.7556, |
|
"eval_counts_1": 6785, |
|
"eval_counts_2": 2044, |
|
"eval_counts_3": 804, |
|
"eval_counts_4": 293, |
|
"eval_exact_match": 0.0064, |
|
"eval_f1": 0.2864, |
|
"eval_gen_len": 12.6084, |
|
"eval_loss": 2.016404151916504, |
|
"eval_precisions_1": 40.876, |
|
"eval_precisions_2": 14.1994, |
|
"eval_precisions_3": 6.595, |
|
"eval_precisions_4": 2.9338, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2931, |
|
"eval_rouge2": 0.1291, |
|
"eval_rougeL": 0.2817, |
|
"eval_rougeLsum": 0.2818, |
|
"eval_runtime": 817.9822, |
|
"eval_samples_per_second": 2.694, |
|
"eval_steps_per_second": 0.674, |
|
"eval_sys_len": 16599, |
|
"eval_totals_1": 16599, |
|
"eval_totals_2": 14395, |
|
"eval_totals_3": 12191, |
|
"eval_totals_4": 9987, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3464, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_bleu": 9.2407, |
|
"eval_bp": 0.7935, |
|
"eval_counts_1": 7251, |
|
"eval_counts_2": 2326, |
|
"eval_counts_3": 969, |
|
"eval_counts_4": 400, |
|
"eval_exact_match": 0.0073, |
|
"eval_f1": 0.3114, |
|
"eval_gen_len": 13.2296, |
|
"eval_loss": 1.9138075113296509, |
|
"eval_precisions_1": 42.0129, |
|
"eval_precisions_2": 15.45, |
|
"eval_precisions_3": 7.5403, |
|
"eval_precisions_4": 3.7569, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3162, |
|
"eval_rouge2": 0.1456, |
|
"eval_rougeL": 0.3031, |
|
"eval_rougeLsum": 0.3031, |
|
"eval_runtime": 765.0466, |
|
"eval_samples_per_second": 2.881, |
|
"eval_steps_per_second": 0.72, |
|
"eval_sys_len": 17259, |
|
"eval_totals_1": 17259, |
|
"eval_totals_2": 15055, |
|
"eval_totals_3": 12851, |
|
"eval_totals_4": 10647, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1679, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 9.6363, |
|
"eval_bp": 0.7795, |
|
"eval_counts_1": 7382, |
|
"eval_counts_2": 2393, |
|
"eval_counts_3": 1006, |
|
"eval_counts_4": 434, |
|
"eval_exact_match": 0.0109, |
|
"eval_f1": 0.3226, |
|
"eval_gen_len": 13.1207, |
|
"eval_loss": 1.8524010181427002, |
|
"eval_precisions_1": 43.3903, |
|
"eval_precisions_2": 16.1591, |
|
"eval_precisions_3": 7.981, |
|
"eval_precisions_4": 4.1727, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3272, |
|
"eval_rouge2": 0.1504, |
|
"eval_rougeL": 0.3147, |
|
"eval_rougeLsum": 0.3149, |
|
"eval_runtime": 882.4242, |
|
"eval_samples_per_second": 2.498, |
|
"eval_steps_per_second": 0.624, |
|
"eval_sys_len": 17013, |
|
"eval_totals_1": 17013, |
|
"eval_totals_2": 14809, |
|
"eval_totals_3": 12605, |
|
"eval_totals_4": 10401, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.0454, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 10.3812, |
|
"eval_bp": 0.7665, |
|
"eval_counts_1": 7581, |
|
"eval_counts_2": 2555, |
|
"eval_counts_3": 1111, |
|
"eval_counts_4": 482, |
|
"eval_exact_match": 0.0132, |
|
"eval_f1": 0.3357, |
|
"eval_gen_len": 12.9782, |
|
"eval_loss": 1.7996737957000732, |
|
"eval_precisions_1": 45.1599, |
|
"eval_precisions_2": 17.5204, |
|
"eval_precisions_3": 8.9749, |
|
"eval_precisions_4": 4.7371, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3401, |
|
"eval_rouge2": 0.1606, |
|
"eval_rougeL": 0.3278, |
|
"eval_rougeLsum": 0.3279, |
|
"eval_runtime": 519.8377, |
|
"eval_samples_per_second": 4.24, |
|
"eval_steps_per_second": 1.06, |
|
"eval_sys_len": 16787, |
|
"eval_totals_1": 16787, |
|
"eval_totals_2": 14583, |
|
"eval_totals_3": 12379, |
|
"eval_totals_4": 10175, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9502, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 10.7668, |
|
"eval_bp": 0.7992, |
|
"eval_counts_1": 7759, |
|
"eval_counts_2": 2618, |
|
"eval_counts_3": 1162, |
|
"eval_counts_4": 511, |
|
"eval_exact_match": 0.0127, |
|
"eval_f1": 0.3406, |
|
"eval_gen_len": 13.4841, |
|
"eval_loss": 1.7696163654327393, |
|
"eval_precisions_1": 44.6973, |
|
"eval_precisions_2": 17.2748, |
|
"eval_precisions_3": 8.9723, |
|
"eval_precisions_4": 4.7548, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3452, |
|
"eval_rouge2": 0.1631, |
|
"eval_rougeL": 0.3321, |
|
"eval_rougeLsum": 0.3319, |
|
"eval_runtime": 542.6731, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 1.015, |
|
"eval_sys_len": 17359, |
|
"eval_totals_1": 17359, |
|
"eval_totals_2": 15155, |
|
"eval_totals_3": 12951, |
|
"eval_totals_4": 10747, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8414, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 11.3408, |
|
"eval_bp": 0.7721, |
|
"eval_counts_1": 7791, |
|
"eval_counts_2": 2693, |
|
"eval_counts_3": 1236, |
|
"eval_counts_4": 570, |
|
"eval_exact_match": 0.015, |
|
"eval_f1": 0.347, |
|
"eval_gen_len": 13.0563, |
|
"eval_loss": 1.7471755743026733, |
|
"eval_precisions_1": 46.147, |
|
"eval_precisions_2": 18.3459, |
|
"eval_precisions_3": 9.9078, |
|
"eval_precisions_4": 5.5496, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3513, |
|
"eval_rouge2": 0.1679, |
|
"eval_rougeL": 0.3391, |
|
"eval_rougeLsum": 0.3391, |
|
"eval_runtime": 455.2485, |
|
"eval_samples_per_second": 4.841, |
|
"eval_steps_per_second": 1.21, |
|
"eval_sys_len": 16883, |
|
"eval_totals_1": 16883, |
|
"eval_totals_2": 14679, |
|
"eval_totals_3": 12475, |
|
"eval_totals_4": 10271, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7614, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 11.8447, |
|
"eval_bp": 0.8198, |
|
"eval_counts_1": 8024, |
|
"eval_counts_2": 2799, |
|
"eval_counts_3": 1296, |
|
"eval_counts_4": 610, |
|
"eval_exact_match": 0.0145, |
|
"eval_f1": 0.352, |
|
"eval_gen_len": 13.515, |
|
"eval_loss": 1.7203415632247925, |
|
"eval_precisions_1": 45.2643, |
|
"eval_precisions_2": 18.0313, |
|
"eval_precisions_3": 9.7305, |
|
"eval_precisions_4": 5.4881, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3565, |
|
"eval_rouge2": 0.1711, |
|
"eval_rougeL": 0.3422, |
|
"eval_rougeLsum": 0.3423, |
|
"eval_runtime": 457.6091, |
|
"eval_samples_per_second": 4.816, |
|
"eval_steps_per_second": 1.204, |
|
"eval_sys_len": 17727, |
|
"eval_totals_1": 17727, |
|
"eval_totals_2": 15523, |
|
"eval_totals_3": 13319, |
|
"eval_totals_4": 11115, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6997, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 11.9689, |
|
"eval_bp": 0.8027, |
|
"eval_counts_1": 8046, |
|
"eval_counts_2": 2835, |
|
"eval_counts_3": 1314, |
|
"eval_counts_4": 615, |
|
"eval_exact_match": 0.0168, |
|
"eval_f1": 0.3568, |
|
"eval_gen_len": 13.4306, |
|
"eval_loss": 1.7166661024093628, |
|
"eval_precisions_1": 46.183, |
|
"eval_precisions_2": 18.6293, |
|
"eval_precisions_3": 10.0968, |
|
"eval_precisions_4": 5.6892, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3613, |
|
"eval_rouge2": 0.1746, |
|
"eval_rougeL": 0.3466, |
|
"eval_rougeLsum": 0.3466, |
|
"eval_runtime": 543.9804, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 1.013, |
|
"eval_sys_len": 17422, |
|
"eval_totals_1": 17422, |
|
"eval_totals_2": 15218, |
|
"eval_totals_3": 13014, |
|
"eval_totals_4": 10810, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6159, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 12.5678, |
|
"eval_bp": 0.8182, |
|
"eval_counts_1": 8087, |
|
"eval_counts_2": 2928, |
|
"eval_counts_3": 1395, |
|
"eval_counts_4": 681, |
|
"eval_exact_match": 0.0181, |
|
"eval_f1": 0.3564, |
|
"eval_gen_len": 13.5268, |
|
"eval_loss": 1.689180612564087, |
|
"eval_precisions_1": 45.6944, |
|
"eval_precisions_2": 18.8976, |
|
"eval_precisions_3": 10.4966, |
|
"eval_precisions_4": 6.1429, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3612, |
|
"eval_rouge2": 0.1795, |
|
"eval_rougeL": 0.3485, |
|
"eval_rougeLsum": 0.3482, |
|
"eval_runtime": 661.754, |
|
"eval_samples_per_second": 3.331, |
|
"eval_steps_per_second": 0.833, |
|
"eval_sys_len": 17698, |
|
"eval_totals_1": 17698, |
|
"eval_totals_2": 15494, |
|
"eval_totals_3": 13290, |
|
"eval_totals_4": 11086, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5681, |
|
"step": 1601 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 12.497, |
|
"eval_bp": 0.813, |
|
"eval_counts_1": 8154, |
|
"eval_counts_2": 2933, |
|
"eval_counts_3": 1383, |
|
"eval_counts_4": 664, |
|
"eval_exact_match": 0.0168, |
|
"eval_f1": 0.3605, |
|
"eval_gen_len": 13.6044, |
|
"eval_loss": 1.6923038959503174, |
|
"eval_precisions_1": 46.3164, |
|
"eval_precisions_2": 19.0442, |
|
"eval_precisions_3": 10.4797, |
|
"eval_precisions_4": 6.0402, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3654, |
|
"eval_rouge2": 0.1789, |
|
"eval_rougeL": 0.3506, |
|
"eval_rougeLsum": 0.3505, |
|
"eval_runtime": 528.2815, |
|
"eval_samples_per_second": 4.172, |
|
"eval_steps_per_second": 1.043, |
|
"eval_sys_len": 17605, |
|
"eval_totals_1": 17605, |
|
"eval_totals_2": 15401, |
|
"eval_totals_3": 13197, |
|
"eval_totals_4": 10993, |
|
"step": 1601 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4987, |
|
"step": 1747 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 12.8959, |
|
"eval_bp": 0.8169, |
|
"eval_counts_1": 8295, |
|
"eval_counts_2": 3011, |
|
"eval_counts_3": 1432, |
|
"eval_counts_4": 697, |
|
"eval_exact_match": 0.0181, |
|
"eval_f1": 0.3675, |
|
"eval_gen_len": 13.6134, |
|
"eval_loss": 1.6824951171875, |
|
"eval_precisions_1": 46.928, |
|
"eval_precisions_2": 19.461, |
|
"eval_precisions_3": 10.7929, |
|
"eval_precisions_4": 6.2997, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3734, |
|
"eval_rouge2": 0.1846, |
|
"eval_rougeL": 0.3576, |
|
"eval_rougeLsum": 0.3577, |
|
"eval_runtime": 636.4551, |
|
"eval_samples_per_second": 3.463, |
|
"eval_steps_per_second": 0.866, |
|
"eval_sys_len": 17676, |
|
"eval_totals_1": 17676, |
|
"eval_totals_2": 15472, |
|
"eval_totals_3": 13268, |
|
"eval_totals_4": 11064, |
|
"step": 1747 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4461, |
|
"step": 1893 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 12.8688, |
|
"eval_bp": 0.8139, |
|
"eval_counts_1": 8246, |
|
"eval_counts_2": 3005, |
|
"eval_counts_3": 1424, |
|
"eval_counts_4": 700, |
|
"eval_exact_match": 0.0191, |
|
"eval_f1": 0.3658, |
|
"eval_gen_len": 13.5812, |
|
"eval_loss": 1.6783509254455566, |
|
"eval_precisions_1": 46.7964, |
|
"eval_precisions_2": 19.4915, |
|
"eval_precisions_3": 10.7773, |
|
"eval_precisions_4": 6.3584, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3725, |
|
"eval_rouge2": 0.1857, |
|
"eval_rougeL": 0.358, |
|
"eval_rougeLsum": 0.3576, |
|
"eval_runtime": 521.7174, |
|
"eval_samples_per_second": 4.225, |
|
"eval_steps_per_second": 1.056, |
|
"eval_sys_len": 17621, |
|
"eval_totals_1": 17621, |
|
"eval_totals_2": 15417, |
|
"eval_totals_3": 13213, |
|
"eval_totals_4": 11009, |
|
"step": 1893 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4002, |
|
"step": 2038 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_bleu": 13.4526, |
|
"eval_bp": 0.8329, |
|
"eval_counts_1": 8457, |
|
"eval_counts_2": 3130, |
|
"eval_counts_3": 1504, |
|
"eval_counts_4": 745, |
|
"eval_exact_match": 0.02, |
|
"eval_f1": 0.3727, |
|
"eval_gen_len": 13.9179, |
|
"eval_loss": 1.6725177764892578, |
|
"eval_precisions_1": 47.0749, |
|
"eval_precisions_2": 19.8591, |
|
"eval_precisions_3": 11.0939, |
|
"eval_precisions_4": 6.5621, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3797, |
|
"eval_rouge2": 0.1915, |
|
"eval_rougeL": 0.3637, |
|
"eval_rougeLsum": 0.3634, |
|
"eval_runtime": 592.5507, |
|
"eval_samples_per_second": 3.72, |
|
"eval_steps_per_second": 0.93, |
|
"eval_sys_len": 17965, |
|
"eval_totals_1": 17965, |
|
"eval_totals_2": 15761, |
|
"eval_totals_3": 13557, |
|
"eval_totals_4": 11353, |
|
"step": 2038 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3391, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 13.211, |
|
"eval_bp": 0.8283, |
|
"eval_counts_1": 8443, |
|
"eval_counts_2": 3091, |
|
"eval_counts_3": 1468, |
|
"eval_counts_4": 719, |
|
"eval_exact_match": 0.0204, |
|
"eval_f1": 0.3737, |
|
"eval_gen_len": 13.9133, |
|
"eval_loss": 1.6783130168914795, |
|
"eval_precisions_1": 47.2177, |
|
"eval_precisions_2": 19.7168, |
|
"eval_precisions_3": 10.8959, |
|
"eval_precisions_4": 6.3803, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3804, |
|
"eval_rouge2": 0.1901, |
|
"eval_rougeL": 0.3634, |
|
"eval_rougeLsum": 0.363, |
|
"eval_runtime": 547.4964, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 1.006, |
|
"eval_sys_len": 17881, |
|
"eval_totals_1": 17881, |
|
"eval_totals_2": 15677, |
|
"eval_totals_3": 13473, |
|
"eval_totals_4": 11269, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2921, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 13.4907, |
|
"eval_bp": 0.8373, |
|
"eval_counts_1": 8457, |
|
"eval_counts_2": 3147, |
|
"eval_counts_3": 1511, |
|
"eval_counts_4": 747, |
|
"eval_exact_match": 0.0195, |
|
"eval_f1": 0.3716, |
|
"eval_gen_len": 13.9882, |
|
"eval_loss": 1.6737552881240845, |
|
"eval_precisions_1": 46.8662, |
|
"eval_precisions_2": 19.8662, |
|
"eval_precisions_3": 11.0801, |
|
"eval_precisions_4": 6.5337, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3782, |
|
"eval_rouge2": 0.1902, |
|
"eval_rougeL": 0.3624, |
|
"eval_rougeLsum": 0.3624, |
|
"eval_runtime": 652.072, |
|
"eval_samples_per_second": 3.38, |
|
"eval_steps_per_second": 0.845, |
|
"eval_sys_len": 18045, |
|
"eval_totals_1": 18045, |
|
"eval_totals_2": 15841, |
|
"eval_totals_3": 13637, |
|
"eval_totals_4": 11433, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2572, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 13.8581, |
|
"eval_bp": 0.8267, |
|
"eval_counts_1": 8473, |
|
"eval_counts_2": 3219, |
|
"eval_counts_3": 1561, |
|
"eval_counts_4": 783, |
|
"eval_exact_match": 0.02, |
|
"eval_f1": 0.3753, |
|
"eval_gen_len": 13.7618, |
|
"eval_loss": 1.676971435546875, |
|
"eval_precisions_1": 47.4598, |
|
"eval_precisions_2": 20.57, |
|
"eval_precisions_3": 11.6103, |
|
"eval_precisions_4": 6.9656, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3821, |
|
"eval_rouge2": 0.1948, |
|
"eval_rougeL": 0.3669, |
|
"eval_rougeLsum": 0.3665, |
|
"eval_runtime": 452.0799, |
|
"eval_samples_per_second": 4.875, |
|
"eval_steps_per_second": 1.219, |
|
"eval_sys_len": 17853, |
|
"eval_totals_1": 17853, |
|
"eval_totals_2": 15649, |
|
"eval_totals_3": 13445, |
|
"eval_totals_4": 11241, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.199, |
|
"step": 2621 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 13.7496, |
|
"eval_bp": 0.8326, |
|
"eval_counts_1": 8484, |
|
"eval_counts_2": 3190, |
|
"eval_counts_3": 1551, |
|
"eval_counts_4": 771, |
|
"eval_exact_match": 0.0186, |
|
"eval_f1": 0.3745, |
|
"eval_gen_len": 13.8798, |
|
"eval_loss": 1.6934301853179932, |
|
"eval_precisions_1": 47.2409, |
|
"eval_precisions_2": 20.2475, |
|
"eval_precisions_3": 11.4456, |
|
"eval_precisions_4": 6.7947, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3812, |
|
"eval_rouge2": 0.1922, |
|
"eval_rougeL": 0.3657, |
|
"eval_rougeLsum": 0.3658, |
|
"eval_runtime": 869.0302, |
|
"eval_samples_per_second": 2.536, |
|
"eval_steps_per_second": 0.634, |
|
"eval_sys_len": 17959, |
|
"eval_totals_1": 17959, |
|
"eval_totals_2": 15755, |
|
"eval_totals_3": 13551, |
|
"eval_totals_4": 11347, |
|
"step": 2621 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1668, |
|
"step": 2766 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_bleu": 13.7379, |
|
"eval_bp": 0.8395, |
|
"eval_counts_1": 8504, |
|
"eval_counts_2": 3179, |
|
"eval_counts_3": 1541, |
|
"eval_counts_4": 776, |
|
"eval_exact_match": 0.0204, |
|
"eval_f1": 0.376, |
|
"eval_gen_len": 13.9256, |
|
"eval_loss": 1.6926020383834839, |
|
"eval_precisions_1": 47.0198, |
|
"eval_precisions_2": 20.0164, |
|
"eval_precisions_3": 11.2663, |
|
"eval_precisions_4": 6.7631, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3828, |
|
"eval_rouge2": 0.1939, |
|
"eval_rougeL": 0.3665, |
|
"eval_rougeLsum": 0.3665, |
|
"eval_runtime": 580.7372, |
|
"eval_samples_per_second": 3.795, |
|
"eval_steps_per_second": 0.949, |
|
"eval_sys_len": 18086, |
|
"eval_totals_1": 18086, |
|
"eval_totals_2": 15882, |
|
"eval_totals_3": 13678, |
|
"eval_totals_4": 11474, |
|
"step": 2766 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1164, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"eval_bleu": 14.1906, |
|
"eval_bp": 0.8529, |
|
"eval_counts_1": 8625, |
|
"eval_counts_2": 3250, |
|
"eval_counts_3": 1609, |
|
"eval_counts_4": 820, |
|
"eval_exact_match": 0.0204, |
|
"eval_f1": 0.3803, |
|
"eval_gen_len": 14.069, |
|
"eval_loss": 1.7026218175888062, |
|
"eval_precisions_1": 47.0463, |
|
"eval_precisions_2": 20.15, |
|
"eval_precisions_3": 11.5548, |
|
"eval_precisions_4": 6.996, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3874, |
|
"eval_rouge2": 0.1964, |
|
"eval_rougeL": 0.3716, |
|
"eval_rougeLsum": 0.3715, |
|
"eval_runtime": 462.8982, |
|
"eval_samples_per_second": 4.761, |
|
"eval_steps_per_second": 1.19, |
|
"eval_sys_len": 18333, |
|
"eval_totals_1": 18333, |
|
"eval_totals_2": 16129, |
|
"eval_totals_3": 13925, |
|
"eval_totals_4": 11721, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"step": 2900, |
|
"total_flos": 2.54036307345408e+17, |
|
"train_loss": 1.724, |
|
"train_runtime": 25476.0, |
|
"train_samples_per_second": 7.312, |
|
"train_steps_per_second": 0.114 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2900, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 2.54036307345408e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|