{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.789564097058193, "eval_steps": 500, "global_step": 720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 0.0001, "loss": 5.932, "step": 36 }, { "epoch": 0.99, "eval_bleu": 3.7906, "eval_bp": 1.0, "eval_counts_1": 5614, "eval_counts_2": 1426, "eval_counts_3": 527, "eval_counts_4": 204, "eval_exact_match": 0.0, "eval_f1": 0.2127, "eval_gen_len": 11.4306, "eval_loss": 2.450988292694092, "eval_precisions_1": 19.4694, "eval_precisions_2": 5.3547, "eval_precisions_3": 2.1574, "eval_precisions_4": 0.918, "eval_ref_len": 21250, "eval_rouge1": 0.1946, "eval_rouge2": 0.0763, "eval_rougeL": 0.1843, "eval_rougeLsum": 0.1843, "eval_runtime": 1846.0551, "eval_samples_per_second": 1.194, "eval_steps_per_second": 1.194, "eval_sys_len": 28835, "eval_totals_1": 28835, "eval_totals_2": 26631, "eval_totals_3": 24427, "eval_totals_4": 22223, "step": 36 }, { "epoch": 1.98, "learning_rate": 0.0001, "loss": 2.3089, "step": 72 }, { "epoch": 1.98, "eval_bleu": 11.3254, "eval_bp": 0.7904, "eval_counts_1": 7578, "eval_counts_2": 2696, "eval_counts_3": 1244, "eval_counts_4": 580, "eval_exact_match": 0.01, "eval_f1": 0.3246, "eval_gen_len": 12.6583, "eval_loss": 1.3963948488235474, "eval_precisions_1": 44.0505, "eval_precisions_2": 17.9745, "eval_precisions_3": 9.7225, "eval_precisions_4": 5.4763, "eval_ref_len": 21250, "eval_rouge1": 0.3312, "eval_rouge2": 0.1655, "eval_rougeL": 0.316, "eval_rougeLsum": 0.3162, "eval_runtime": 1912.8668, "eval_samples_per_second": 1.152, "eval_steps_per_second": 1.152, "eval_sys_len": 17203, "eval_totals_1": 17203, "eval_totals_2": 14999, "eval_totals_3": 12795, "eval_totals_4": 10591, "step": 72 }, { "epoch": 3.0, "learning_rate": 0.0001, "loss": 1.6778, "step": 109 }, { "epoch": 3.0, "eval_bleu": 13.128, "eval_bp": 0.7826, "eval_counts_1": 7961, "eval_counts_2": 3020, "eval_counts_3": 1480, "eval_counts_4": 747, "eval_exact_match": 0.0195, "eval_f1": 0.3517, "eval_gen_len": 12.4682, "eval_loss": 1.2659858465194702, "eval_precisions_1": 46.6456, "eval_precisions_2": 20.3189, "eval_precisions_3": 11.6913, "eval_precisions_4": 7.1449, "eval_ref_len": 21250, "eval_rouge1": 0.3608, "eval_rouge2": 0.1881, "eval_rougeL": 0.3456, "eval_rougeLsum": 0.3454, "eval_runtime": 1864.6637, "eval_samples_per_second": 1.182, "eval_steps_per_second": 1.182, "eval_sys_len": 17067, "eval_totals_1": 17067, "eval_totals_2": 14863, "eval_totals_3": 12659, "eval_totals_4": 10455, "step": 109 }, { "epoch": 3.99, "learning_rate": 0.0001, "loss": 1.5383, "step": 145 }, { "epoch": 3.99, "eval_bleu": 13.625, "eval_bp": 0.7612, "eval_counts_1": 7948, "eval_counts_2": 3121, "eval_counts_3": 1558, "eval_counts_4": 796, "eval_exact_match": 0.024, "eval_f1": 0.3554, "eval_gen_len": 12.221, "eval_loss": 1.2212449312210083, "eval_precisions_1": 47.6099, "eval_precisions_2": 21.539, "eval_precisions_3": 12.6811, "eval_precisions_4": 7.8953, "eval_ref_len": 21250, "eval_rouge1": 0.3663, "eval_rouge2": 0.1989, "eval_rougeL": 0.3523, "eval_rougeLsum": 0.352, "eval_runtime": 1862.6167, "eval_samples_per_second": 1.183, "eval_steps_per_second": 1.183, "eval_sys_len": 16694, "eval_totals_1": 16694, "eval_totals_2": 14490, "eval_totals_3": 12286, "eval_totals_4": 10082, "step": 145 }, { "epoch": 4.97, "learning_rate": 0.0001, "loss": 1.423, "step": 181 }, { "epoch": 4.97, "eval_bleu": 15.7567, "eval_bp": 0.8219, "eval_counts_1": 8746, "eval_counts_2": 3590, "eval_counts_3": 1840, "eval_counts_4": 963, "eval_exact_match": 0.0304, "eval_f1": 0.3941, "eval_gen_len": 13.0277, "eval_loss": 1.1706066131591797, "eval_precisions_1": 49.2316, "eval_precisions_2": 23.0705, "eval_precisions_3": 13.7755, "eval_precisions_4": 8.6344, "eval_ref_len": 21250, "eval_rouge1": 0.4033, "eval_rouge2": 0.2224, "eval_rougeL": 0.3876, "eval_rougeLsum": 0.3874, "eval_runtime": 1923.5708, "eval_samples_per_second": 1.146, "eval_steps_per_second": 1.146, "eval_sys_len": 17765, "eval_totals_1": 17765, "eval_totals_2": 15561, "eval_totals_3": 13357, "eval_totals_4": 11153, "step": 181 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 1.2861, "step": 218 }, { "epoch": 5.99, "eval_bleu": 16.123, "eval_bp": 0.8018, "eval_counts_1": 8885, "eval_counts_2": 3646, "eval_counts_3": 1864, "eval_counts_4": 1005, "eval_exact_match": 0.0331, "eval_f1": 0.4092, "eval_gen_len": 12.9142, "eval_loss": 1.1327459812164307, "eval_precisions_1": 51.0456, "eval_precisions_2": 23.9837, "eval_precisions_3": 14.3407, "eval_precisions_4": 9.3107, "eval_ref_len": 21250, "eval_rouge1": 0.4181, "eval_rouge2": 0.2295, "eval_rougeL": 0.4022, "eval_rougeLsum": 0.402, "eval_runtime": 2271.0986, "eval_samples_per_second": 0.97, "eval_steps_per_second": 0.97, "eval_sys_len": 17406, "eval_totals_1": 17406, "eval_totals_2": 15202, "eval_totals_3": 12998, "eval_totals_4": 10794, "step": 218 }, { "epoch": 6.98, "learning_rate": 0.0001, "loss": 1.2372, "step": 254 }, { "epoch": 6.98, "eval_bleu": 17.0334, "eval_bp": 0.7964, "eval_counts_1": 9122, "eval_counts_2": 3824, "eval_counts_3": 1997, "eval_counts_4": 1084, "eval_exact_match": 0.0358, "eval_f1": 0.4236, "eval_gen_len": 12.8412, "eval_loss": 1.1248232126235962, "eval_precisions_1": 52.6979, "eval_precisions_2": 25.3144, "eval_precisions_3": 15.4782, "eval_precisions_4": 10.1327, "eval_ref_len": 21250, "eval_rouge1": 0.4313, "eval_rouge2": 0.239, "eval_rougeL": 0.4175, "eval_rougeLsum": 0.4172, "eval_runtime": 2069.7482, "eval_samples_per_second": 1.065, "eval_steps_per_second": 1.065, "eval_sys_len": 17310, "eval_totals_1": 17310, "eval_totals_2": 15106, "eval_totals_3": 12902, "eval_totals_4": 10698, "step": 254 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 1.1307, "step": 291 }, { "epoch": 8.0, "eval_bleu": 18.0474, "eval_bp": 0.8389, "eval_counts_1": 9423, "eval_counts_2": 4019, "eval_counts_3": 2136, "eval_counts_4": 1190, "eval_exact_match": 0.0404, "eval_f1": 0.4327, "eval_gen_len": 13.4138, "eval_loss": 1.0998262166976929, "eval_precisions_1": 52.1357, "eval_precisions_2": 25.3245, "eval_precisions_3": 15.63, "eval_precisions_4": 10.3821, "eval_ref_len": 21250, "eval_rouge1": 0.441, "eval_rouge2": 0.249, "eval_rougeL": 0.4255, "eval_rougeLsum": 0.4252, "eval_runtime": 2305.9504, "eval_samples_per_second": 0.956, "eval_steps_per_second": 0.956, "eval_sys_len": 18074, "eval_totals_1": 18074, "eval_totals_2": 15870, "eval_totals_3": 13666, "eval_totals_4": 11462, "step": 291 }, { "epoch": 8.99, "learning_rate": 0.0001, "loss": 1.0982, "step": 327 }, { "epoch": 8.99, "eval_bleu": 18.0367, "eval_bp": 0.8427, "eval_counts_1": 9450, "eval_counts_2": 4003, "eval_counts_3": 2147, "eval_counts_4": 1184, "eval_exact_match": 0.0426, "eval_f1": 0.4344, "eval_gen_len": 13.4465, "eval_loss": 1.1051570177078247, "eval_precisions_1": 52.0805, "eval_precisions_2": 25.1113, "eval_precisions_3": 15.6293, "eval_precisions_4": 10.2662, "eval_ref_len": 21250, "eval_rouge1": 0.4427, "eval_rouge2": 0.2492, "eval_rougeL": 0.4266, "eval_rougeLsum": 0.4261, "eval_runtime": 2511.4033, "eval_samples_per_second": 0.878, "eval_steps_per_second": 0.878, "eval_sys_len": 18145, "eval_totals_1": 18145, "eval_totals_2": 15941, "eval_totals_3": 13737, "eval_totals_4": 11533, "step": 327 }, { "epoch": 9.98, "learning_rate": 0.0001, "loss": 1.0449, "step": 363 }, { "epoch": 9.98, "eval_bleu": 18.0793, "eval_bp": 0.8385, "eval_counts_1": 9471, "eval_counts_2": 4036, "eval_counts_3": 2149, "eval_counts_4": 1180, "eval_exact_match": 0.0404, "eval_f1": 0.4341, "eval_gen_len": 13.333, "eval_loss": 1.0995820760726929, "eval_precisions_1": 52.4215, "eval_precisions_2": 25.4429, "eval_precisions_3": 15.7332, "eval_precisions_4": 10.3012, "eval_ref_len": 21250, "eval_rouge1": 0.4422, "eval_rouge2": 0.2477, "eval_rougeL": 0.4261, "eval_rougeLsum": 0.4257, "eval_runtime": 1973.5312, "eval_samples_per_second": 1.117, "eval_steps_per_second": 1.117, "eval_sys_len": 18067, "eval_totals_1": 18067, "eval_totals_2": 15863, "eval_totals_3": 13659, "eval_totals_4": 11455, "step": 363 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 0.9686, "step": 400 }, { "epoch": 10.99, "eval_bleu": 18.6914, "eval_bp": 0.8339, "eval_counts_1": 9612, "eval_counts_2": 4165, "eval_counts_3": 2240, "eval_counts_4": 1233, "eval_exact_match": 0.0449, "eval_f1": 0.4458, "eval_gen_len": 13.3534, "eval_loss": 1.1012390851974487, "eval_precisions_1": 53.4505, "eval_precisions_2": 26.3958, "eval_precisions_3": 16.5009, "eval_precisions_4": 10.8434, "eval_ref_len": 21250, "eval_rouge1": 0.4534, "eval_rouge2": 0.2591, "eval_rougeL": 0.4381, "eval_rougeLsum": 0.4378, "eval_runtime": 2028.4468, "eval_samples_per_second": 1.087, "eval_steps_per_second": 1.087, "eval_sys_len": 17983, "eval_totals_1": 17983, "eval_totals_2": 15779, "eval_totals_3": 13575, "eval_totals_4": 11371, "step": 400 }, { "epoch": 11.98, "learning_rate": 0.0001, "loss": 0.9465, "step": 436 }, { "epoch": 11.98, "eval_bleu": 18.6863, "eval_bp": 0.8466, "eval_counts_1": 9670, "eval_counts_2": 4154, "eval_counts_3": 2229, "eval_counts_4": 1239, "eval_exact_match": 0.0445, "eval_f1": 0.4452, "eval_gen_len": 13.5912, "eval_loss": 1.1026833057403564, "eval_precisions_1": 53.0823, "eval_precisions_2": 25.9414, "eval_precisions_3": 16.1416, "eval_precisions_4": 10.6764, "eval_ref_len": 21250, "eval_rouge1": 0.4531, "eval_rouge2": 0.258, "eval_rougeL": 0.4377, "eval_rougeLsum": 0.4374, "eval_runtime": 4413.1713, "eval_samples_per_second": 0.499, "eval_steps_per_second": 0.499, "eval_sys_len": 18217, "eval_totals_1": 18217, "eval_totals_2": 16013, "eval_totals_3": 13809, "eval_totals_4": 11605, "step": 436 }, { "epoch": 12.97, "learning_rate": 0.0001, "loss": 0.9025, "step": 472 }, { "epoch": 12.97, "eval_bleu": 18.7344, "eval_bp": 0.839, "eval_counts_1": 9627, "eval_counts_2": 4155, "eval_counts_3": 2241, "eval_counts_4": 1247, "eval_exact_match": 0.0436, "eval_f1": 0.4452, "eval_gen_len": 13.5259, "eval_loss": 1.11244535446167, "eval_precisions_1": 53.2585, "eval_precisions_2": 26.1782, "eval_precisions_3": 16.396, "eval_precisions_4": 10.8775, "eval_ref_len": 21250, "eval_rouge1": 0.4531, "eval_rouge2": 0.2583, "eval_rougeL": 0.4386, "eval_rougeLsum": 0.4382, "eval_runtime": 3852.502, "eval_samples_per_second": 0.572, "eval_steps_per_second": 0.572, "eval_sys_len": 18076, "eval_totals_1": 18076, "eval_totals_2": 15872, "eval_totals_3": 13668, "eval_totals_4": 11464, "step": 472 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 0.8402, "step": 509 }, { "epoch": 13.99, "eval_bleu": 18.3062, "eval_bp": 0.7981, "eval_counts_1": 9425, "eval_counts_2": 4071, "eval_counts_3": 2176, "eval_counts_4": 1207, "eval_exact_match": 0.0445, "eval_f1": 0.4417, "eval_gen_len": 12.9129, "eval_loss": 1.139233946800232, "eval_precisions_1": 54.3572, "eval_precisions_2": 26.8979, "eval_precisions_3": 16.8278, "eval_precisions_4": 11.252, "eval_ref_len": 21250, "eval_rouge1": 0.4495, "eval_rouge2": 0.2568, "eval_rougeL": 0.4365, "eval_rougeLsum": 0.4358, "eval_runtime": 3679.9149, "eval_samples_per_second": 0.599, "eval_steps_per_second": 0.599, "eval_sys_len": 17339, "eval_totals_1": 17339, "eval_totals_2": 15135, "eval_totals_3": 12931, "eval_totals_4": 10727, "step": 509 }, { "epoch": 14.98, "learning_rate": 0.0001, "loss": 0.8282, "step": 545 }, { "epoch": 14.98, "eval_bleu": 19.2695, "eval_bp": 0.87, "eval_counts_1": 9803, "eval_counts_2": 4274, "eval_counts_3": 2316, "eval_counts_4": 1305, "eval_exact_match": 0.0463, "eval_f1": 0.4496, "eval_gen_len": 14.0104, "eval_loss": 1.1227205991744995, "eval_precisions_1": 52.5574, "eval_precisions_2": 25.9849, "eval_precisions_3": 16.2595, "eval_precisions_4": 10.8389, "eval_ref_len": 21250, "eval_rouge1": 0.4573, "eval_rouge2": 0.2627, "eval_rougeL": 0.4418, "eval_rougeLsum": 0.4414, "eval_runtime": 3897.0455, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.566, "eval_sys_len": 18652, "eval_totals_1": 18652, "eval_totals_2": 16448, "eval_totals_3": 14244, "eval_totals_4": 12040, "step": 545 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 0.7694, "step": 582 }, { "epoch": 16.0, "eval_bleu": 19.1704, "eval_bp": 0.8501, "eval_counts_1": 9740, "eval_counts_2": 4240, "eval_counts_3": 2299, "eval_counts_4": 1296, "eval_exact_match": 0.0476, "eval_f1": 0.4492, "eval_gen_len": 13.6475, "eval_loss": 1.139431357383728, "eval_precisions_1": 53.2794, "eval_precisions_2": 26.3731, "eval_precisions_3": 16.5718, "eval_precisions_4": 11.1064, "eval_ref_len": 21250, "eval_rouge1": 0.4572, "eval_rouge2": 0.2629, "eval_rougeL": 0.4411, "eval_rougeLsum": 0.4412, "eval_runtime": 3879.8331, "eval_samples_per_second": 0.568, "eval_steps_per_second": 0.568, "eval_sys_len": 18281, "eval_totals_1": 18281, "eval_totals_2": 16077, "eval_totals_3": 13873, "eval_totals_4": 11669, "step": 582 }, { "epoch": 16.99, "learning_rate": 0.0001, "loss": 0.7589, "step": 618 }, { "epoch": 16.99, "eval_bleu": 18.5906, "eval_bp": 0.8572, "eval_counts_1": 9663, "eval_counts_2": 4140, "eval_counts_3": 2214, "eval_counts_4": 1232, "eval_exact_match": 0.044, "eval_f1": 0.4432, "eval_gen_len": 13.7926, "eval_loss": 1.1496515274047852, "eval_precisions_1": 52.4821, "eval_precisions_2": 25.5429, "eval_precisions_3": 15.8098, "eval_precisions_4": 10.4407, "eval_ref_len": 21250, "eval_rouge1": 0.4515, "eval_rouge2": 0.2561, "eval_rougeL": 0.4359, "eval_rougeLsum": 0.4358, "eval_runtime": 3896.3752, "eval_samples_per_second": 0.566, "eval_steps_per_second": 0.566, "eval_sys_len": 18412, "eval_totals_1": 18412, "eval_totals_2": 16208, "eval_totals_3": 14004, "eval_totals_4": 11800, "step": 618 }, { "epoch": 17.98, "learning_rate": 0.0001, "loss": 0.724, "step": 654 }, { "epoch": 17.98, "eval_bleu": 19.2167, "eval_bp": 0.8566, "eval_counts_1": 9743, "eval_counts_2": 4246, "eval_counts_3": 2316, "eval_counts_4": 1300, "eval_exact_match": 0.0472, "eval_f1": 0.4474, "eval_gen_len": 13.7214, "eval_loss": 1.1680002212524414, "eval_precisions_1": 52.9453, "eval_precisions_2": 26.2131, "eval_precisions_3": 16.5499, "eval_precisions_4": 11.0263, "eval_ref_len": 21250, "eval_rouge1": 0.4562, "eval_rouge2": 0.2625, "eval_rougeL": 0.4408, "eval_rougeLsum": 0.441, "eval_runtime": 3957.7799, "eval_samples_per_second": 0.557, "eval_steps_per_second": 0.557, "eval_sys_len": 18402, "eval_totals_1": 18402, "eval_totals_2": 16198, "eval_totals_3": 13994, "eval_totals_4": 11790, "step": 654 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 0.6755, "step": 691 }, { "epoch": 18.99, "eval_bleu": 19.4647, "eval_bp": 0.8496, "eval_counts_1": 9722, "eval_counts_2": 4266, "eval_counts_3": 2351, "eval_counts_4": 1341, "eval_exact_match": 0.0495, "eval_f1": 0.4469, "eval_gen_len": 13.6071, "eval_loss": 1.1873786449432373, "eval_precisions_1": 53.2071, "eval_precisions_2": 26.5497, "eval_precisions_3": 16.9576, "eval_precisions_4": 11.5009, "eval_ref_len": 21250, "eval_rouge1": 0.4559, "eval_rouge2": 0.2639, "eval_rougeL": 0.4417, "eval_rougeLsum": 0.4413, "eval_runtime": 3873.6961, "eval_samples_per_second": 0.569, "eval_steps_per_second": 0.569, "eval_sys_len": 18272, "eval_totals_1": 18272, "eval_totals_2": 16068, "eval_totals_3": 13864, "eval_totals_4": 11660, "step": 691 }, { "epoch": 19.79, "learning_rate": 0.0001, "loss": 0.657, "step": 720 }, { "epoch": 19.79, "eval_bleu": 19.8248, "eval_bp": 0.8822, "eval_counts_1": 9920, "eval_counts_2": 4361, "eval_counts_3": 2402, "eval_counts_4": 1373, "eval_exact_match": 0.0467, "eval_f1": 0.4508, "eval_gen_len": 14.2001, "eval_loss": 1.1845453977584839, "eval_precisions_1": 52.5312, "eval_precisions_2": 26.1451, "eval_precisions_3": 16.593, "eval_precisions_4": 11.1881, "eval_ref_len": 21250, "eval_rouge1": 0.4594, "eval_rouge2": 0.2647, "eval_rougeL": 0.4423, "eval_rougeLsum": 0.4421, "eval_runtime": 3958.6673, "eval_samples_per_second": 0.557, "eval_steps_per_second": 0.557, "eval_sys_len": 18884, "eval_totals_1": 18884, "eval_totals_2": 16680, "eval_totals_3": 14476, "eval_totals_4": 12272, "step": 720 }, { "epoch": 19.79, "step": 720, "total_flos": 1.102412878184448e+18, "train_loss": 1.3412119759453667, "train_runtime": 128063.7495, "train_samples_per_second": 1.455, "train_steps_per_second": 0.006 } ], "logging_steps": 500, "max_steps": 720, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.102412878184448e+18, "trial_name": null, "trial_params": null }