|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.789564097058193, |
|
"eval_steps": 500, |
|
"global_step": 720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001, |
|
"loss": 5.932, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 3.7906, |
|
"eval_bp": 1.0, |
|
"eval_counts_1": 5614, |
|
"eval_counts_2": 1426, |
|
"eval_counts_3": 527, |
|
"eval_counts_4": 204, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.2127, |
|
"eval_gen_len": 11.4306, |
|
"eval_loss": 2.450988292694092, |
|
"eval_precisions_1": 19.4694, |
|
"eval_precisions_2": 5.3547, |
|
"eval_precisions_3": 2.1574, |
|
"eval_precisions_4": 0.918, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.1946, |
|
"eval_rouge2": 0.0763, |
|
"eval_rougeL": 0.1843, |
|
"eval_rougeLsum": 0.1843, |
|
"eval_runtime": 1846.0551, |
|
"eval_samples_per_second": 1.194, |
|
"eval_steps_per_second": 1.194, |
|
"eval_sys_len": 28835, |
|
"eval_totals_1": 28835, |
|
"eval_totals_2": 26631, |
|
"eval_totals_3": 24427, |
|
"eval_totals_4": 22223, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3089, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_bleu": 11.3254, |
|
"eval_bp": 0.7904, |
|
"eval_counts_1": 7578, |
|
"eval_counts_2": 2696, |
|
"eval_counts_3": 1244, |
|
"eval_counts_4": 580, |
|
"eval_exact_match": 0.01, |
|
"eval_f1": 0.3246, |
|
"eval_gen_len": 12.6583, |
|
"eval_loss": 1.3963948488235474, |
|
"eval_precisions_1": 44.0505, |
|
"eval_precisions_2": 17.9745, |
|
"eval_precisions_3": 9.7225, |
|
"eval_precisions_4": 5.4763, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3312, |
|
"eval_rouge2": 0.1655, |
|
"eval_rougeL": 0.316, |
|
"eval_rougeLsum": 0.3162, |
|
"eval_runtime": 1912.8668, |
|
"eval_samples_per_second": 1.152, |
|
"eval_steps_per_second": 1.152, |
|
"eval_sys_len": 17203, |
|
"eval_totals_1": 17203, |
|
"eval_totals_2": 14999, |
|
"eval_totals_3": 12795, |
|
"eval_totals_4": 10591, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6778, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 13.128, |
|
"eval_bp": 0.7826, |
|
"eval_counts_1": 7961, |
|
"eval_counts_2": 3020, |
|
"eval_counts_3": 1480, |
|
"eval_counts_4": 747, |
|
"eval_exact_match": 0.0195, |
|
"eval_f1": 0.3517, |
|
"eval_gen_len": 12.4682, |
|
"eval_loss": 1.2659858465194702, |
|
"eval_precisions_1": 46.6456, |
|
"eval_precisions_2": 20.3189, |
|
"eval_precisions_3": 11.6913, |
|
"eval_precisions_4": 7.1449, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3608, |
|
"eval_rouge2": 0.1881, |
|
"eval_rougeL": 0.3456, |
|
"eval_rougeLsum": 0.3454, |
|
"eval_runtime": 1864.6637, |
|
"eval_samples_per_second": 1.182, |
|
"eval_steps_per_second": 1.182, |
|
"eval_sys_len": 17067, |
|
"eval_totals_1": 17067, |
|
"eval_totals_2": 14863, |
|
"eval_totals_3": 12659, |
|
"eval_totals_4": 10455, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5383, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_bleu": 13.625, |
|
"eval_bp": 0.7612, |
|
"eval_counts_1": 7948, |
|
"eval_counts_2": 3121, |
|
"eval_counts_3": 1558, |
|
"eval_counts_4": 796, |
|
"eval_exact_match": 0.024, |
|
"eval_f1": 0.3554, |
|
"eval_gen_len": 12.221, |
|
"eval_loss": 1.2212449312210083, |
|
"eval_precisions_1": 47.6099, |
|
"eval_precisions_2": 21.539, |
|
"eval_precisions_3": 12.6811, |
|
"eval_precisions_4": 7.8953, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3663, |
|
"eval_rouge2": 0.1989, |
|
"eval_rougeL": 0.3523, |
|
"eval_rougeLsum": 0.352, |
|
"eval_runtime": 1862.6167, |
|
"eval_samples_per_second": 1.183, |
|
"eval_steps_per_second": 1.183, |
|
"eval_sys_len": 16694, |
|
"eval_totals_1": 16694, |
|
"eval_totals_2": 14490, |
|
"eval_totals_3": 12286, |
|
"eval_totals_4": 10082, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0001, |
|
"loss": 1.423, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_bleu": 15.7567, |
|
"eval_bp": 0.8219, |
|
"eval_counts_1": 8746, |
|
"eval_counts_2": 3590, |
|
"eval_counts_3": 1840, |
|
"eval_counts_4": 963, |
|
"eval_exact_match": 0.0304, |
|
"eval_f1": 0.3941, |
|
"eval_gen_len": 13.0277, |
|
"eval_loss": 1.1706066131591797, |
|
"eval_precisions_1": 49.2316, |
|
"eval_precisions_2": 23.0705, |
|
"eval_precisions_3": 13.7755, |
|
"eval_precisions_4": 8.6344, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4033, |
|
"eval_rouge2": 0.2224, |
|
"eval_rougeL": 0.3876, |
|
"eval_rougeLsum": 0.3874, |
|
"eval_runtime": 1923.5708, |
|
"eval_samples_per_second": 1.146, |
|
"eval_steps_per_second": 1.146, |
|
"eval_sys_len": 17765, |
|
"eval_totals_1": 17765, |
|
"eval_totals_2": 15561, |
|
"eval_totals_3": 13357, |
|
"eval_totals_4": 11153, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2861, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 16.123, |
|
"eval_bp": 0.8018, |
|
"eval_counts_1": 8885, |
|
"eval_counts_2": 3646, |
|
"eval_counts_3": 1864, |
|
"eval_counts_4": 1005, |
|
"eval_exact_match": 0.0331, |
|
"eval_f1": 0.4092, |
|
"eval_gen_len": 12.9142, |
|
"eval_loss": 1.1327459812164307, |
|
"eval_precisions_1": 51.0456, |
|
"eval_precisions_2": 23.9837, |
|
"eval_precisions_3": 14.3407, |
|
"eval_precisions_4": 9.3107, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4181, |
|
"eval_rouge2": 0.2295, |
|
"eval_rougeL": 0.4022, |
|
"eval_rougeLsum": 0.402, |
|
"eval_runtime": 2271.0986, |
|
"eval_samples_per_second": 0.97, |
|
"eval_steps_per_second": 0.97, |
|
"eval_sys_len": 17406, |
|
"eval_totals_1": 17406, |
|
"eval_totals_2": 15202, |
|
"eval_totals_3": 12998, |
|
"eval_totals_4": 10794, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2372, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_bleu": 17.0334, |
|
"eval_bp": 0.7964, |
|
"eval_counts_1": 9122, |
|
"eval_counts_2": 3824, |
|
"eval_counts_3": 1997, |
|
"eval_counts_4": 1084, |
|
"eval_exact_match": 0.0358, |
|
"eval_f1": 0.4236, |
|
"eval_gen_len": 12.8412, |
|
"eval_loss": 1.1248232126235962, |
|
"eval_precisions_1": 52.6979, |
|
"eval_precisions_2": 25.3144, |
|
"eval_precisions_3": 15.4782, |
|
"eval_precisions_4": 10.1327, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4313, |
|
"eval_rouge2": 0.239, |
|
"eval_rougeL": 0.4175, |
|
"eval_rougeLsum": 0.4172, |
|
"eval_runtime": 2069.7482, |
|
"eval_samples_per_second": 1.065, |
|
"eval_steps_per_second": 1.065, |
|
"eval_sys_len": 17310, |
|
"eval_totals_1": 17310, |
|
"eval_totals_2": 15106, |
|
"eval_totals_3": 12902, |
|
"eval_totals_4": 10698, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1307, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 18.0474, |
|
"eval_bp": 0.8389, |
|
"eval_counts_1": 9423, |
|
"eval_counts_2": 4019, |
|
"eval_counts_3": 2136, |
|
"eval_counts_4": 1190, |
|
"eval_exact_match": 0.0404, |
|
"eval_f1": 0.4327, |
|
"eval_gen_len": 13.4138, |
|
"eval_loss": 1.0998262166976929, |
|
"eval_precisions_1": 52.1357, |
|
"eval_precisions_2": 25.3245, |
|
"eval_precisions_3": 15.63, |
|
"eval_precisions_4": 10.3821, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.441, |
|
"eval_rouge2": 0.249, |
|
"eval_rougeL": 0.4255, |
|
"eval_rougeLsum": 0.4252, |
|
"eval_runtime": 2305.9504, |
|
"eval_samples_per_second": 0.956, |
|
"eval_steps_per_second": 0.956, |
|
"eval_sys_len": 18074, |
|
"eval_totals_1": 18074, |
|
"eval_totals_2": 15870, |
|
"eval_totals_3": 13666, |
|
"eval_totals_4": 11462, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0982, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_bleu": 18.0367, |
|
"eval_bp": 0.8427, |
|
"eval_counts_1": 9450, |
|
"eval_counts_2": 4003, |
|
"eval_counts_3": 2147, |
|
"eval_counts_4": 1184, |
|
"eval_exact_match": 0.0426, |
|
"eval_f1": 0.4344, |
|
"eval_gen_len": 13.4465, |
|
"eval_loss": 1.1051570177078247, |
|
"eval_precisions_1": 52.0805, |
|
"eval_precisions_2": 25.1113, |
|
"eval_precisions_3": 15.6293, |
|
"eval_precisions_4": 10.2662, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4427, |
|
"eval_rouge2": 0.2492, |
|
"eval_rougeL": 0.4266, |
|
"eval_rougeLsum": 0.4261, |
|
"eval_runtime": 2511.4033, |
|
"eval_samples_per_second": 0.878, |
|
"eval_steps_per_second": 0.878, |
|
"eval_sys_len": 18145, |
|
"eval_totals_1": 18145, |
|
"eval_totals_2": 15941, |
|
"eval_totals_3": 13737, |
|
"eval_totals_4": 11533, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0449, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_bleu": 18.0793, |
|
"eval_bp": 0.8385, |
|
"eval_counts_1": 9471, |
|
"eval_counts_2": 4036, |
|
"eval_counts_3": 2149, |
|
"eval_counts_4": 1180, |
|
"eval_exact_match": 0.0404, |
|
"eval_f1": 0.4341, |
|
"eval_gen_len": 13.333, |
|
"eval_loss": 1.0995820760726929, |
|
"eval_precisions_1": 52.4215, |
|
"eval_precisions_2": 25.4429, |
|
"eval_precisions_3": 15.7332, |
|
"eval_precisions_4": 10.3012, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4422, |
|
"eval_rouge2": 0.2477, |
|
"eval_rougeL": 0.4261, |
|
"eval_rougeLsum": 0.4257, |
|
"eval_runtime": 1973.5312, |
|
"eval_samples_per_second": 1.117, |
|
"eval_steps_per_second": 1.117, |
|
"eval_sys_len": 18067, |
|
"eval_totals_1": 18067, |
|
"eval_totals_2": 15863, |
|
"eval_totals_3": 13659, |
|
"eval_totals_4": 11455, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9686, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 18.6914, |
|
"eval_bp": 0.8339, |
|
"eval_counts_1": 9612, |
|
"eval_counts_2": 4165, |
|
"eval_counts_3": 2240, |
|
"eval_counts_4": 1233, |
|
"eval_exact_match": 0.0449, |
|
"eval_f1": 0.4458, |
|
"eval_gen_len": 13.3534, |
|
"eval_loss": 1.1012390851974487, |
|
"eval_precisions_1": 53.4505, |
|
"eval_precisions_2": 26.3958, |
|
"eval_precisions_3": 16.5009, |
|
"eval_precisions_4": 10.8434, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4534, |
|
"eval_rouge2": 0.2591, |
|
"eval_rougeL": 0.4381, |
|
"eval_rougeLsum": 0.4378, |
|
"eval_runtime": 2028.4468, |
|
"eval_samples_per_second": 1.087, |
|
"eval_steps_per_second": 1.087, |
|
"eval_sys_len": 17983, |
|
"eval_totals_1": 17983, |
|
"eval_totals_2": 15779, |
|
"eval_totals_3": 13575, |
|
"eval_totals_4": 11371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9465, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_bleu": 18.6863, |
|
"eval_bp": 0.8466, |
|
"eval_counts_1": 9670, |
|
"eval_counts_2": 4154, |
|
"eval_counts_3": 2229, |
|
"eval_counts_4": 1239, |
|
"eval_exact_match": 0.0445, |
|
"eval_f1": 0.4452, |
|
"eval_gen_len": 13.5912, |
|
"eval_loss": 1.1026833057403564, |
|
"eval_precisions_1": 53.0823, |
|
"eval_precisions_2": 25.9414, |
|
"eval_precisions_3": 16.1416, |
|
"eval_precisions_4": 10.6764, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4531, |
|
"eval_rouge2": 0.258, |
|
"eval_rougeL": 0.4377, |
|
"eval_rougeLsum": 0.4374, |
|
"eval_runtime": 4413.1713, |
|
"eval_samples_per_second": 0.499, |
|
"eval_steps_per_second": 0.499, |
|
"eval_sys_len": 18217, |
|
"eval_totals_1": 18217, |
|
"eval_totals_2": 16013, |
|
"eval_totals_3": 13809, |
|
"eval_totals_4": 11605, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9025, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 12.97, |
|
"eval_bleu": 18.7344, |
|
"eval_bp": 0.839, |
|
"eval_counts_1": 9627, |
|
"eval_counts_2": 4155, |
|
"eval_counts_3": 2241, |
|
"eval_counts_4": 1247, |
|
"eval_exact_match": 0.0436, |
|
"eval_f1": 0.4452, |
|
"eval_gen_len": 13.5259, |
|
"eval_loss": 1.11244535446167, |
|
"eval_precisions_1": 53.2585, |
|
"eval_precisions_2": 26.1782, |
|
"eval_precisions_3": 16.396, |
|
"eval_precisions_4": 10.8775, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4531, |
|
"eval_rouge2": 0.2583, |
|
"eval_rougeL": 0.4386, |
|
"eval_rougeLsum": 0.4382, |
|
"eval_runtime": 3852.502, |
|
"eval_samples_per_second": 0.572, |
|
"eval_steps_per_second": 0.572, |
|
"eval_sys_len": 18076, |
|
"eval_totals_1": 18076, |
|
"eval_totals_2": 15872, |
|
"eval_totals_3": 13668, |
|
"eval_totals_4": 11464, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8402, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_bleu": 18.3062, |
|
"eval_bp": 0.7981, |
|
"eval_counts_1": 9425, |
|
"eval_counts_2": 4071, |
|
"eval_counts_3": 2176, |
|
"eval_counts_4": 1207, |
|
"eval_exact_match": 0.0445, |
|
"eval_f1": 0.4417, |
|
"eval_gen_len": 12.9129, |
|
"eval_loss": 1.139233946800232, |
|
"eval_precisions_1": 54.3572, |
|
"eval_precisions_2": 26.8979, |
|
"eval_precisions_3": 16.8278, |
|
"eval_precisions_4": 11.252, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4495, |
|
"eval_rouge2": 0.2568, |
|
"eval_rougeL": 0.4365, |
|
"eval_rougeLsum": 0.4358, |
|
"eval_runtime": 3679.9149, |
|
"eval_samples_per_second": 0.599, |
|
"eval_steps_per_second": 0.599, |
|
"eval_sys_len": 17339, |
|
"eval_totals_1": 17339, |
|
"eval_totals_2": 15135, |
|
"eval_totals_3": 12931, |
|
"eval_totals_4": 10727, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8282, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_bleu": 19.2695, |
|
"eval_bp": 0.87, |
|
"eval_counts_1": 9803, |
|
"eval_counts_2": 4274, |
|
"eval_counts_3": 2316, |
|
"eval_counts_4": 1305, |
|
"eval_exact_match": 0.0463, |
|
"eval_f1": 0.4496, |
|
"eval_gen_len": 14.0104, |
|
"eval_loss": 1.1227205991744995, |
|
"eval_precisions_1": 52.5574, |
|
"eval_precisions_2": 25.9849, |
|
"eval_precisions_3": 16.2595, |
|
"eval_precisions_4": 10.8389, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4573, |
|
"eval_rouge2": 0.2627, |
|
"eval_rougeL": 0.4418, |
|
"eval_rougeLsum": 0.4414, |
|
"eval_runtime": 3897.0455, |
|
"eval_samples_per_second": 0.566, |
|
"eval_steps_per_second": 0.566, |
|
"eval_sys_len": 18652, |
|
"eval_totals_1": 18652, |
|
"eval_totals_2": 16448, |
|
"eval_totals_3": 14244, |
|
"eval_totals_4": 12040, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7694, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 19.1704, |
|
"eval_bp": 0.8501, |
|
"eval_counts_1": 9740, |
|
"eval_counts_2": 4240, |
|
"eval_counts_3": 2299, |
|
"eval_counts_4": 1296, |
|
"eval_exact_match": 0.0476, |
|
"eval_f1": 0.4492, |
|
"eval_gen_len": 13.6475, |
|
"eval_loss": 1.139431357383728, |
|
"eval_precisions_1": 53.2794, |
|
"eval_precisions_2": 26.3731, |
|
"eval_precisions_3": 16.5718, |
|
"eval_precisions_4": 11.1064, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4572, |
|
"eval_rouge2": 0.2629, |
|
"eval_rougeL": 0.4411, |
|
"eval_rougeLsum": 0.4412, |
|
"eval_runtime": 3879.8331, |
|
"eval_samples_per_second": 0.568, |
|
"eval_steps_per_second": 0.568, |
|
"eval_sys_len": 18281, |
|
"eval_totals_1": 18281, |
|
"eval_totals_2": 16077, |
|
"eval_totals_3": 13873, |
|
"eval_totals_4": 11669, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7589, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_bleu": 18.5906, |
|
"eval_bp": 0.8572, |
|
"eval_counts_1": 9663, |
|
"eval_counts_2": 4140, |
|
"eval_counts_3": 2214, |
|
"eval_counts_4": 1232, |
|
"eval_exact_match": 0.044, |
|
"eval_f1": 0.4432, |
|
"eval_gen_len": 13.7926, |
|
"eval_loss": 1.1496515274047852, |
|
"eval_precisions_1": 52.4821, |
|
"eval_precisions_2": 25.5429, |
|
"eval_precisions_3": 15.8098, |
|
"eval_precisions_4": 10.4407, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4515, |
|
"eval_rouge2": 0.2561, |
|
"eval_rougeL": 0.4359, |
|
"eval_rougeLsum": 0.4358, |
|
"eval_runtime": 3896.3752, |
|
"eval_samples_per_second": 0.566, |
|
"eval_steps_per_second": 0.566, |
|
"eval_sys_len": 18412, |
|
"eval_totals_1": 18412, |
|
"eval_totals_2": 16208, |
|
"eval_totals_3": 14004, |
|
"eval_totals_4": 11800, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.724, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"eval_bleu": 19.2167, |
|
"eval_bp": 0.8566, |
|
"eval_counts_1": 9743, |
|
"eval_counts_2": 4246, |
|
"eval_counts_3": 2316, |
|
"eval_counts_4": 1300, |
|
"eval_exact_match": 0.0472, |
|
"eval_f1": 0.4474, |
|
"eval_gen_len": 13.7214, |
|
"eval_loss": 1.1680002212524414, |
|
"eval_precisions_1": 52.9453, |
|
"eval_precisions_2": 26.2131, |
|
"eval_precisions_3": 16.5499, |
|
"eval_precisions_4": 11.0263, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4562, |
|
"eval_rouge2": 0.2625, |
|
"eval_rougeL": 0.4408, |
|
"eval_rougeLsum": 0.441, |
|
"eval_runtime": 3957.7799, |
|
"eval_samples_per_second": 0.557, |
|
"eval_steps_per_second": 0.557, |
|
"eval_sys_len": 18402, |
|
"eval_totals_1": 18402, |
|
"eval_totals_2": 16198, |
|
"eval_totals_3": 13994, |
|
"eval_totals_4": 11790, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6755, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_bleu": 19.4647, |
|
"eval_bp": 0.8496, |
|
"eval_counts_1": 9722, |
|
"eval_counts_2": 4266, |
|
"eval_counts_3": 2351, |
|
"eval_counts_4": 1341, |
|
"eval_exact_match": 0.0495, |
|
"eval_f1": 0.4469, |
|
"eval_gen_len": 13.6071, |
|
"eval_loss": 1.1873786449432373, |
|
"eval_precisions_1": 53.2071, |
|
"eval_precisions_2": 26.5497, |
|
"eval_precisions_3": 16.9576, |
|
"eval_precisions_4": 11.5009, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4559, |
|
"eval_rouge2": 0.2639, |
|
"eval_rougeL": 0.4417, |
|
"eval_rougeLsum": 0.4413, |
|
"eval_runtime": 3873.6961, |
|
"eval_samples_per_second": 0.569, |
|
"eval_steps_per_second": 0.569, |
|
"eval_sys_len": 18272, |
|
"eval_totals_1": 18272, |
|
"eval_totals_2": 16068, |
|
"eval_totals_3": 13864, |
|
"eval_totals_4": 11660, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.0001, |
|
"loss": 0.657, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"eval_bleu": 19.8248, |
|
"eval_bp": 0.8822, |
|
"eval_counts_1": 9920, |
|
"eval_counts_2": 4361, |
|
"eval_counts_3": 2402, |
|
"eval_counts_4": 1373, |
|
"eval_exact_match": 0.0467, |
|
"eval_f1": 0.4508, |
|
"eval_gen_len": 14.2001, |
|
"eval_loss": 1.1845453977584839, |
|
"eval_precisions_1": 52.5312, |
|
"eval_precisions_2": 26.1451, |
|
"eval_precisions_3": 16.593, |
|
"eval_precisions_4": 11.1881, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4594, |
|
"eval_rouge2": 0.2647, |
|
"eval_rougeL": 0.4423, |
|
"eval_rougeLsum": 0.4421, |
|
"eval_runtime": 3958.6673, |
|
"eval_samples_per_second": 0.557, |
|
"eval_steps_per_second": 0.557, |
|
"eval_sys_len": 18884, |
|
"eval_totals_1": 18884, |
|
"eval_totals_2": 16680, |
|
"eval_totals_3": 14476, |
|
"eval_totals_4": 12272, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"step": 720, |
|
"total_flos": 1.102412878184448e+18, |
|
"train_loss": 1.3412119759453667, |
|
"train_runtime": 128063.7495, |
|
"train_samples_per_second": 1.455, |
|
"train_steps_per_second": 0.006 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 720, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.102412878184448e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|