{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.776824034334766, "eval_steps": 500, "global_step": 1440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 0.0001, "loss": 3.458, "step": 72 }, { "epoch": 0.99, "eval_bleu": 4.6384, "eval_bp": 0.6642, "eval_counts_1": 5618, "eval_counts_2": 1383, "eval_counts_3": 463, "eval_counts_4": 116, "eval_exact_match": 0.0005, "eval_f1": 0.2226, "eval_gen_len": 11.3013, "eval_loss": 2.3696436882019043, "eval_precisions_1": 37.2546, "eval_precisions_2": 10.7409, "eval_precisions_3": 4.3385, "eval_precisions_4": 1.3699, "eval_ref_len": 21250, "eval_rouge1": 0.2266, "eval_rouge2": 0.0841, "eval_rougeL": 0.2197, "eval_rougeLsum": 0.2196, "eval_runtime": 433.2426, "eval_samples_per_second": 5.087, "eval_steps_per_second": 1.272, "eval_sys_len": 15080, "eval_totals_1": 15080, "eval_totals_2": 12876, "eval_totals_3": 10672, "eval_totals_4": 8468, "step": 72 }, { "epoch": 1.99, "learning_rate": 0.0001, "loss": 2.7548, "step": 145 }, { "epoch": 1.99, "eval_bleu": 6.9183, "eval_bp": 0.728, "eval_counts_1": 6361, "eval_counts_2": 1807, "eval_counts_3": 700, "eval_counts_4": 254, "eval_exact_match": 0.0036, "eval_f1": 0.2635, "eval_gen_len": 12.206, "eval_loss": 2.1310036182403564, "eval_precisions_1": 39.4358, "eval_precisions_2": 12.9757, "eval_precisions_3": 5.9717, "eval_precisions_4": 2.6686, "eval_ref_len": 21250, "eval_rouge1": 0.2706, "eval_rouge2": 0.1122, "eval_rougeL": 0.2596, "eval_rougeLsum": 0.2596, "eval_runtime": 445.5518, "eval_samples_per_second": 4.947, "eval_steps_per_second": 1.237, "eval_sys_len": 16130, "eval_totals_1": 16130, "eval_totals_2": 13926, "eval_totals_3": 11722, "eval_totals_4": 9518, "step": 145 }, { "epoch": 2.99, "learning_rate": 0.0001, "loss": 2.5084, "step": 218 }, { "epoch": 2.99, "eval_bleu": 7.616, "eval_bp": 0.7714, "eval_counts_1": 6758, "eval_counts_2": 2001, "eval_counts_3": 780, "eval_counts_4": 285, "eval_exact_match": 0.0045, "eval_f1": 0.2832, "eval_gen_len": 12.8825, "eval_loss": 2.0244088172912598, "eval_precisions_1": 40.0569, "eval_precisions_2": 13.6429, "eval_precisions_3": 6.2585, "eval_precisions_4": 2.778, "eval_ref_len": 21250, "eval_rouge1": 0.2888, "eval_rouge2": 0.1258, "eval_rougeL": 0.2766, "eval_rougeLsum": 0.2767, "eval_runtime": 693.3228, "eval_samples_per_second": 3.179, "eval_steps_per_second": 0.795, "eval_sys_len": 16871, "eval_totals_1": 16871, "eval_totals_2": 14667, "eval_totals_3": 12463, "eval_totals_4": 10259, "step": 218 }, { "epoch": 4.0, "learning_rate": 0.0001, "loss": 2.3562, "step": 291 }, { "epoch": 4.0, "eval_bleu": 8.6611, "eval_bp": 0.7671, "eval_counts_1": 7011, "eval_counts_2": 2193, "eval_counts_3": 908, "eval_counts_4": 360, "eval_exact_match": 0.0077, "eval_f1": 0.2978, "eval_gen_len": 12.9142, "eval_loss": 1.950147032737732, "eval_precisions_1": 41.7421, "eval_precisions_2": 15.0288, "eval_precisions_3": 7.3297, "eval_precisions_4": 3.535, "eval_ref_len": 21250, "eval_rouge1": 0.303, "eval_rouge2": 0.1375, "eval_rougeL": 0.2892, "eval_rougeLsum": 0.2894, "eval_runtime": 807.954, "eval_samples_per_second": 2.728, "eval_steps_per_second": 0.682, "eval_sys_len": 16796, "eval_totals_1": 16796, "eval_totals_2": 14592, "eval_totals_3": 12388, "eval_totals_4": 10184, "step": 291 }, { "epoch": 5.0, "learning_rate": 0.0001, "loss": 2.2383, "step": 364 }, { "epoch": 5.0, "eval_bleu": 9.6159, "eval_bp": 0.762, "eval_counts_1": 7245, "eval_counts_2": 2386, "eval_counts_3": 1015, "eval_counts_4": 435, "eval_exact_match": 0.0113, "eval_f1": 0.3155, "eval_gen_len": 12.8417, "eval_loss": 1.8873705863952637, "eval_precisions_1": 43.3625, "eval_precisions_2": 16.4506, "eval_precisions_3": 8.252, "eval_precisions_4": 4.3086, "eval_ref_len": 21250, "eval_rouge1": 0.3198, "eval_rouge2": 0.1498, "eval_rougeL": 0.3077, "eval_rougeLsum": 0.3079, "eval_runtime": 789.9213, "eval_samples_per_second": 2.79, "eval_steps_per_second": 0.698, "eval_sys_len": 16708, "eval_totals_1": 16708, "eval_totals_2": 14504, "eval_totals_3": 12300, "eval_totals_4": 10096, "step": 364 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 2.1576, "step": 436 }, { "epoch": 5.99, "eval_bleu": 9.5745, "eval_bp": 0.7796, "eval_counts_1": 7378, "eval_counts_2": 2382, "eval_counts_3": 997, "eval_counts_4": 429, "eval_exact_match": 0.0109, "eval_f1": 0.3215, "eval_gen_len": 13.2187, "eval_loss": 1.859336018562317, "eval_precisions_1": 43.3643, "eval_precisions_2": 16.0837, "eval_precisions_3": 7.9089, "eval_precisions_4": 4.1242, "eval_ref_len": 21250, "eval_rouge1": 0.326, "eval_rouge2": 0.1497, "eval_rougeL": 0.3132, "eval_rougeLsum": 0.3132, "eval_runtime": 806.6141, "eval_samples_per_second": 2.732, "eval_steps_per_second": 0.683, "eval_sys_len": 17014, "eval_totals_1": 17014, "eval_totals_2": 14810, "eval_totals_3": 12606, "eval_totals_4": 10402, "step": 436 }, { "epoch": 6.99, "learning_rate": 0.0001, "loss": 2.0356, "step": 509 }, { "epoch": 6.99, "eval_bleu": 10.3053, "eval_bp": 0.7787, "eval_counts_1": 7570, "eval_counts_2": 2520, "eval_counts_3": 1097, "eval_counts_4": 482, "eval_exact_match": 0.0123, "eval_f1": 0.3339, "eval_gen_len": 13.0368, "eval_loss": 1.8132530450820923, "eval_precisions_1": 44.532, "eval_precisions_2": 17.0328, "eval_precisions_3": 8.7126, "eval_precisions_4": 4.6404, "eval_ref_len": 21250, "eval_rouge1": 0.3384, "eval_rouge2": 0.158, "eval_rougeL": 0.3258, "eval_rougeLsum": 0.3257, "eval_runtime": 454.2622, "eval_samples_per_second": 4.852, "eval_steps_per_second": 1.213, "eval_sys_len": 16999, "eval_totals_1": 16999, "eval_totals_2": 14795, "eval_totals_3": 12591, "eval_totals_4": 10387, "step": 509 }, { "epoch": 7.99, "learning_rate": 0.0001, "loss": 1.9575, "step": 582 }, { "epoch": 7.99, "eval_bleu": 10.993, "eval_bp": 0.8003, "eval_counts_1": 7764, "eval_counts_2": 2637, "eval_counts_3": 1175, "eval_counts_4": 545, "eval_exact_match": 0.0136, "eval_f1": 0.3407, "eval_gen_len": 13.4719, "eval_loss": 1.7855687141418457, "eval_precisions_1": 44.6746, "eval_precisions_2": 17.3773, "eval_precisions_3": 9.0587, "eval_precisions_4": 5.0618, "eval_ref_len": 21250, "eval_rouge1": 0.345, "eval_rouge2": 0.1625, "eval_rougeL": 0.3322, "eval_rougeLsum": 0.3324, "eval_runtime": 470.7972, "eval_samples_per_second": 4.681, "eval_steps_per_second": 1.17, "eval_sys_len": 17379, "eval_totals_1": 17379, "eval_totals_2": 15175, "eval_totals_3": 12971, "eval_totals_4": 10767, "step": 582 }, { "epoch": 9.0, "learning_rate": 0.0001, "loss": 1.8889, "step": 655 }, { "epoch": 9.0, "eval_bleu": 10.9637, "eval_bp": 0.7846, "eval_counts_1": 7766, "eval_counts_2": 2644, "eval_counts_3": 1184, "eval_counts_4": 532, "eval_exact_match": 0.0123, "eval_f1": 0.3438, "eval_gen_len": 13.2164, "eval_loss": 1.7666170597076416, "eval_precisions_1": 45.4099, "eval_precisions_2": 17.7473, "eval_precisions_3": 9.3272, "eval_precisions_4": 5.0715, "eval_ref_len": 21250, "eval_rouge1": 0.3487, "eval_rouge2": 0.1636, "eval_rougeL": 0.3348, "eval_rougeLsum": 0.335, "eval_runtime": 461.5052, "eval_samples_per_second": 4.776, "eval_steps_per_second": 1.194, "eval_sys_len": 17102, "eval_totals_1": 17102, "eval_totals_2": 14898, "eval_totals_3": 12694, "eval_totals_4": 10490, "step": 655 }, { "epoch": 10.0, "learning_rate": 0.0001, "loss": 1.8201, "step": 728 }, { "epoch": 10.0, "eval_bleu": 11.3891, "eval_bp": 0.7877, "eval_counts_1": 7737, "eval_counts_2": 2680, "eval_counts_3": 1238, "eval_counts_4": 587, "eval_exact_match": 0.0163, "eval_f1": 0.3406, "eval_gen_len": 13.1388, "eval_loss": 1.7414668798446655, "eval_precisions_1": 45.0979, "eval_precisions_2": 17.924, "eval_precisions_3": 9.7113, "eval_precisions_4": 5.5671, "eval_ref_len": 21250, "eval_rouge1": 0.3453, "eval_rouge2": 0.1666, "eval_rougeL": 0.3332, "eval_rougeLsum": 0.3333, "eval_runtime": 457.4345, "eval_samples_per_second": 4.818, "eval_steps_per_second": 1.205, "eval_sys_len": 17156, "eval_totals_1": 17156, "eval_totals_2": 14952, "eval_totals_3": 12748, "eval_totals_4": 10544, "step": 728 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 1.7882, "step": 800 }, { "epoch": 10.99, "eval_bleu": 11.4047, "eval_bp": 0.7995, "eval_counts_1": 7859, "eval_counts_2": 2722, "eval_counts_3": 1241, "eval_counts_4": 572, "eval_exact_match": 0.0145, "eval_f1": 0.3473, "eval_gen_len": 13.4052, "eval_loss": 1.7331299781799316, "eval_precisions_1": 45.2603, "eval_precisions_2": 17.9551, "eval_precisions_3": 9.5786, "eval_precisions_4": 5.3199, "eval_ref_len": 21250, "eval_rouge1": 0.3524, "eval_rouge2": 0.1673, "eval_rougeL": 0.3387, "eval_rougeLsum": 0.3385, "eval_runtime": 470.5412, "eval_samples_per_second": 4.684, "eval_steps_per_second": 1.171, "eval_sys_len": 17364, "eval_totals_1": 17364, "eval_totals_2": 15160, "eval_totals_3": 12956, "eval_totals_4": 10752, "step": 800 }, { "epoch": 11.99, "learning_rate": 0.0001, "loss": 1.7095, "step": 873 }, { "epoch": 11.99, "eval_bleu": 11.8807, "eval_bp": 0.8053, "eval_counts_1": 7968, "eval_counts_2": 2783, "eval_counts_3": 1292, "eval_counts_4": 625, "eval_exact_match": 0.0154, "eval_f1": 0.3495, "eval_gen_len": 13.4437, "eval_loss": 1.7193998098373413, "eval_precisions_1": 45.6175, "eval_precisions_2": 18.2336, "eval_precisions_3": 9.8936, "eval_precisions_4": 5.7577, "eval_ref_len": 21250, "eval_rouge1": 0.3547, "eval_rouge2": 0.1708, "eval_rougeL": 0.3418, "eval_rougeLsum": 0.3414, "eval_runtime": 472.913, "eval_samples_per_second": 4.66, "eval_steps_per_second": 1.165, "eval_sys_len": 17467, "eval_totals_1": 17467, "eval_totals_2": 15263, "eval_totals_3": 13059, "eval_totals_4": 10855, "step": 873 }, { "epoch": 12.99, "learning_rate": 0.0001, "loss": 1.6619, "step": 946 }, { "epoch": 12.99, "eval_bleu": 11.7968, "eval_bp": 0.8034, "eval_counts_1": 8011, "eval_counts_2": 2796, "eval_counts_3": 1286, "eval_counts_4": 604, "eval_exact_match": 0.0154, "eval_f1": 0.3526, "eval_gen_len": 13.4964, "eval_loss": 1.703238606452942, "eval_precisions_1": 45.9531, "eval_precisions_2": 18.3597, "eval_precisions_3": 9.8733, "eval_precisions_4": 5.5817, "eval_ref_len": 21250, "eval_rouge1": 0.3584, "eval_rouge2": 0.1736, "eval_rougeL": 0.3454, "eval_rougeLsum": 0.3454, "eval_runtime": 460.9308, "eval_samples_per_second": 4.782, "eval_steps_per_second": 1.195, "eval_sys_len": 17433, "eval_totals_1": 17433, "eval_totals_2": 15229, "eval_totals_3": 13025, "eval_totals_4": 10821, "step": 946 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 1.6103, "step": 1019 }, { "epoch": 13.99, "eval_bleu": 12.235, "eval_bp": 0.8163, "eval_counts_1": 8154, "eval_counts_2": 2891, "eval_counts_3": 1347, "eval_counts_4": 636, "eval_exact_match": 0.015, "eval_f1": 0.3602, "eval_gen_len": 13.7223, "eval_loss": 1.7027523517608643, "eval_precisions_1": 46.1591, "eval_precisions_2": 18.6987, "eval_precisions_3": 10.1607, "eval_precisions_4": 5.7541, "eval_ref_len": 21250, "eval_rouge1": 0.3659, "eval_rouge2": 0.1795, "eval_rougeL": 0.3509, "eval_rougeLsum": 0.3508, "eval_runtime": 461.3951, "eval_samples_per_second": 4.777, "eval_steps_per_second": 1.194, "eval_sys_len": 17665, "eval_totals_1": 17665, "eval_totals_2": 15461, "eval_totals_3": 13257, "eval_totals_4": 11053, "step": 1019 }, { "epoch": 15.0, "learning_rate": 0.0001, "loss": 1.565, "step": 1092 }, { "epoch": 15.0, "eval_bleu": 12.4116, "eval_bp": 0.8088, "eval_counts_1": 8135, "eval_counts_2": 2897, "eval_counts_3": 1362, "eval_counts_4": 665, "eval_exact_match": 0.02, "eval_f1": 0.3603, "eval_gen_len": 13.6107, "eval_loss": 1.6954691410064697, "eval_precisions_1": 46.4062, "eval_precisions_2": 18.9025, "eval_precisions_3": 10.3795, "eval_precisions_4": 6.0909, "eval_ref_len": 21250, "eval_rouge1": 0.3668, "eval_rouge2": 0.1808, "eval_rougeL": 0.3518, "eval_rougeLsum": 0.3516, "eval_runtime": 457.9806, "eval_samples_per_second": 4.812, "eval_steps_per_second": 1.203, "eval_sys_len": 17530, "eval_totals_1": 17530, "eval_totals_2": 15326, "eval_totals_3": 13122, "eval_totals_4": 10918, "step": 1092 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 1.522, "step": 1165 }, { "epoch": 16.0, "eval_bleu": 12.8008, "eval_bp": 0.8318, "eval_counts_1": 8271, "eval_counts_2": 2982, "eval_counts_3": 1414, "eval_counts_4": 697, "eval_exact_match": 0.0191, "eval_f1": 0.3632, "eval_gen_len": 13.9192, "eval_loss": 1.6792546510696411, "eval_precisions_1": 46.0883, "eval_precisions_2": 18.943, "eval_precisions_3": 10.4447, "eval_precisions_4": 6.1496, "eval_ref_len": 21250, "eval_rouge1": 0.3695, "eval_rouge2": 0.1828, "eval_rougeL": 0.354, "eval_rougeLsum": 0.354, "eval_runtime": 476.6232, "eval_samples_per_second": 4.624, "eval_steps_per_second": 1.156, "eval_sys_len": 17946, "eval_totals_1": 17946, "eval_totals_2": 15742, "eval_totals_3": 13538, "eval_totals_4": 11334, "step": 1165 }, { "epoch": 16.99, "learning_rate": 0.0001, "loss": 1.5022, "step": 1237 }, { "epoch": 16.99, "eval_bleu": 12.6672, "eval_bp": 0.8077, "eval_counts_1": 8244, "eval_counts_2": 2967, "eval_counts_3": 1392, "eval_counts_4": 680, "eval_exact_match": 0.0191, "eval_f1": 0.366, "eval_gen_len": 13.6243, "eval_loss": 1.684873104095459, "eval_precisions_1": 47.0817, "eval_precisions_2": 19.3846, "eval_precisions_3": 10.6243, "eval_precisions_4": 6.2397, "eval_ref_len": 21250, "eval_rouge1": 0.3728, "eval_rouge2": 0.184, "eval_rougeL": 0.3569, "eval_rougeLsum": 0.3569, "eval_runtime": 453.2, "eval_samples_per_second": 4.863, "eval_steps_per_second": 1.216, "eval_sys_len": 17510, "eval_totals_1": 17510, "eval_totals_2": 15306, "eval_totals_3": 13102, "eval_totals_4": 10898, "step": 1237 }, { "epoch": 17.99, "learning_rate": 0.0001, "loss": 1.4359, "step": 1310 }, { "epoch": 17.99, "eval_bleu": 13.0683, "eval_bp": 0.8278, "eval_counts_1": 8328, "eval_counts_2": 3050, "eval_counts_3": 1448, "eval_counts_4": 717, "eval_exact_match": 0.0181, "eval_f1": 0.3671, "eval_gen_len": 13.7255, "eval_loss": 1.686221718788147, "eval_precisions_1": 46.5954, "eval_precisions_2": 19.4652, "eval_precisions_3": 10.7538, "eval_precisions_4": 6.3671, "eval_ref_len": 21250, "eval_rouge1": 0.3742, "eval_rouge2": 0.1866, "eval_rougeL": 0.3582, "eval_rougeLsum": 0.3583, "eval_runtime": 451.92, "eval_samples_per_second": 4.877, "eval_steps_per_second": 1.219, "eval_sys_len": 17873, "eval_totals_1": 17873, "eval_totals_2": 15669, "eval_totals_3": 13465, "eval_totals_4": 11261, "step": 1310 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 1.3994, "step": 1383 }, { "epoch": 18.99, "eval_bleu": 12.8728, "eval_bp": 0.8152, "eval_counts_1": 8272, "eval_counts_2": 2998, "eval_counts_3": 1417, "eval_counts_4": 704, "eval_exact_match": 0.0213, "eval_f1": 0.3673, "eval_gen_len": 13.6956, "eval_loss": 1.6775314807891846, "eval_precisions_1": 46.8801, "eval_precisions_2": 19.4158, "eval_precisions_3": 10.7048, "eval_precisions_4": 6.3809, "eval_ref_len": 21250, "eval_rouge1": 0.3739, "eval_rouge2": 0.1866, "eval_rougeL": 0.3583, "eval_rougeLsum": 0.3581, "eval_runtime": 818.5079, "eval_samples_per_second": 2.693, "eval_steps_per_second": 0.673, "eval_sys_len": 17645, "eval_totals_1": 17645, "eval_totals_2": 15441, "eval_totals_3": 13237, "eval_totals_4": 11033, "step": 1383 }, { "epoch": 19.78, "learning_rate": 0.0001, "loss": 1.3609, "step": 1440 }, { "epoch": 19.78, "eval_bleu": 13.1569, "eval_bp": 0.8251, "eval_counts_1": 8347, "eval_counts_2": 3062, "eval_counts_3": 1465, "eval_counts_4": 723, "eval_exact_match": 0.0204, "eval_f1": 0.3692, "eval_gen_len": 13.7328, "eval_loss": 1.688394546508789, "eval_precisions_1": 46.8327, "eval_precisions_2": 19.6043, "eval_precisions_3": 10.9206, "eval_precisions_4": 6.449, "eval_ref_len": 21250, "eval_rouge1": 0.3761, "eval_rouge2": 0.1886, "eval_rougeL": 0.3601, "eval_rougeLsum": 0.3596, "eval_runtime": 834.1703, "eval_samples_per_second": 2.642, "eval_steps_per_second": 0.661, "eval_sys_len": 17823, "eval_totals_1": 17823, "eval_totals_2": 15619, "eval_totals_3": 13415, "eval_totals_4": 11211, "step": 1440 }, { "epoch": 19.78, "step": 1440, "total_flos": 2.52283256045568e+17, "train_loss": 1.9421327537960476, "train_runtime": 22435.6962, "train_samples_per_second": 8.303, "train_steps_per_second": 0.064 } ], "logging_steps": 500, "max_steps": 1440, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.52283256045568e+17, "trial_name": null, "trial_params": null }