{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.78531558608845, "eval_steps": 500, "global_step": 720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 0.0001, "loss": 8.9608, "step": 36 }, { "epoch": 0.99, "eval_bleu": 0.2352, "eval_bp": 0.828, "eval_counts_1": 2306, "eval_counts_2": 50, "eval_counts_3": 12, "eval_counts_4": 2, "eval_exact_match": 0.0, "eval_f1": 0.0092, "eval_gen_len": 3.1969, "eval_loss": 2.8882896900177, "eval_precisions_1": 12.9, "eval_precisions_2": 0.319, "eval_precisions_3": 0.0891, "eval_precisions_4": 0.0178, "eval_ref_len": 21250, "eval_rouge1": 0.0081, "eval_rouge2": 0.0022, "eval_rougeL": 0.0078, "eval_rougeLsum": 0.0078, "eval_runtime": 386.3015, "eval_samples_per_second": 5.705, "eval_steps_per_second": 1.426, "eval_sys_len": 17876, "eval_totals_1": 17876, "eval_totals_2": 15672, "eval_totals_3": 13468, "eval_totals_4": 11264, "step": 36 }, { "epoch": 1.98, "learning_rate": 0.0001, "loss": 3.2364, "step": 72 }, { "epoch": 1.98, "eval_bleu": 6.7083, "eval_bp": 0.9954, "eval_counts_1": 6125, "eval_counts_2": 1727, "eval_counts_3": 687, "eval_counts_4": 277, "eval_exact_match": 0.0018, "eval_f1": 0.2514, "eval_gen_len": 11.8072, "eval_loss": 1.9241770505905151, "eval_precisions_1": 28.9571, "eval_precisions_2": 9.1144, "eval_precisions_3": 4.103, "eval_precisions_4": 1.9051, "eval_ref_len": 21250, "eval_rouge1": 0.2457, "eval_rouge2": 0.1026, "eval_rougeL": 0.2345, "eval_rougeLsum": 0.2346, "eval_runtime": 440.0537, "eval_samples_per_second": 5.008, "eval_steps_per_second": 1.252, "eval_sys_len": 21152, "eval_totals_1": 21152, "eval_totals_2": 18948, "eval_totals_3": 16744, "eval_totals_4": 14540, "step": 72 }, { "epoch": 3.0, "learning_rate": 0.0001, "loss": 2.4963, "step": 109 }, { "epoch": 3.0, "eval_bleu": 9.1493, "eval_bp": 0.752, "eval_counts_1": 6903, "eval_counts_2": 2271, "eval_counts_3": 975, "eval_counts_4": 409, "eval_exact_match": 0.01, "eval_f1": 0.2909, "eval_gen_len": 12.176, "eval_loss": 1.6558014154434204, "eval_precisions_1": 41.7428, "eval_precisions_2": 15.8446, "eval_precisions_3": 8.0386, "eval_precisions_4": 4.1209, "eval_ref_len": 21250, "eval_rouge1": 0.2966, "eval_rouge2": 0.1415, "eval_rougeL": 0.2854, "eval_rougeLsum": 0.2852, "eval_runtime": 434.1741, "eval_samples_per_second": 5.076, "eval_steps_per_second": 1.269, "eval_sys_len": 16537, "eval_totals_1": 16537, "eval_totals_2": 14333, "eval_totals_3": 12129, "eval_totals_4": 9925, "step": 109 }, { "epoch": 3.98, "learning_rate": 0.0001, "loss": 2.2314, "step": 145 }, { "epoch": 3.98, "eval_bleu": 10.187, "eval_bp": 0.7573, "eval_counts_1": 7160, "eval_counts_2": 2440, "eval_counts_3": 1098, "eval_counts_4": 501, "eval_exact_match": 0.0136, "eval_f1": 0.3069, "eval_gen_len": 12.157, "eval_loss": 1.5771422386169434, "eval_precisions_1": 43.0625, "eval_precisions_2": 16.9174, "eval_precisions_3": 8.986, "eval_precisions_4": 5.0025, "eval_ref_len": 21250, "eval_rouge1": 0.314, "eval_rouge2": 0.1535, "eval_rougeL": 0.3028, "eval_rougeLsum": 0.3028, "eval_runtime": 436.5308, "eval_samples_per_second": 5.049, "eval_steps_per_second": 1.262, "eval_sys_len": 16627, "eval_totals_1": 16627, "eval_totals_2": 14423, "eval_totals_3": 12219, "eval_totals_4": 10015, "step": 145 }, { "epoch": 4.97, "learning_rate": 0.0001, "loss": 2.0578, "step": 181 }, { "epoch": 4.97, "eval_bleu": 11.0621, "eval_bp": 0.7961, "eval_counts_1": 7447, "eval_counts_2": 2625, "eval_counts_3": 1214, "eval_counts_4": 566, "eval_exact_match": 0.0163, "eval_f1": 0.32, "eval_gen_len": 12.5585, "eval_loss": 1.5346813201904297, "eval_precisions_1": 43.0338, "eval_precisions_2": 17.383, "eval_precisions_3": 9.413, "eval_precisions_4": 5.2932, "eval_ref_len": 21250, "eval_rouge1": 0.3286, "eval_rouge2": 0.1628, "eval_rougeL": 0.3146, "eval_rougeLsum": 0.3146, "eval_runtime": 444.2911, "eval_samples_per_second": 4.961, "eval_steps_per_second": 1.24, "eval_sys_len": 17305, "eval_totals_1": 17305, "eval_totals_2": 15101, "eval_totals_3": 12897, "eval_totals_4": 10693, "step": 181 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 1.8928, "step": 218 }, { "epoch": 5.99, "eval_bleu": 11.4063, "eval_bp": 0.7556, "eval_counts_1": 7396, "eval_counts_2": 2659, "eval_counts_3": 1257, "eval_counts_4": 611, "eval_exact_match": 0.0177, "eval_f1": 0.3234, "eval_gen_len": 12.1692, "eval_loss": 1.512817144393921, "eval_precisions_1": 44.5596, "eval_precisions_2": 18.473, "eval_precisions_3": 10.3117, "eval_precisions_4": 6.1186, "eval_ref_len": 21250, "eval_rouge1": 0.3326, "eval_rouge2": 0.1684, "eval_rougeL": 0.3198, "eval_rougeLsum": 0.3198, "eval_runtime": 441.07, "eval_samples_per_second": 4.997, "eval_steps_per_second": 1.249, "eval_sys_len": 16598, "eval_totals_1": 16598, "eval_totals_2": 14394, "eval_totals_3": 12190, "eval_totals_4": 9986, "step": 218 }, { "epoch": 6.98, "learning_rate": 0.0001, "loss": 1.8573, "step": 254 }, { "epoch": 6.98, "eval_bleu": 11.8292, "eval_bp": 0.7631, "eval_counts_1": 7531, "eval_counts_2": 2758, "eval_counts_3": 1313, "eval_counts_4": 641, "eval_exact_match": 0.0163, "eval_f1": 0.327, "eval_gen_len": 12.3035, "eval_loss": 1.4735780954360962, "eval_precisions_1": 45.0203, "eval_precisions_2": 18.9893, "eval_precisions_3": 10.6575, "eval_precisions_4": 6.3365, "eval_ref_len": 21250, "eval_rouge1": 0.3349, "eval_rouge2": 0.1717, "eval_rougeL": 0.3216, "eval_rougeLsum": 0.3216, "eval_runtime": 442.6304, "eval_samples_per_second": 4.979, "eval_steps_per_second": 1.245, "eval_sys_len": 16728, "eval_totals_1": 16728, "eval_totals_2": 14524, "eval_totals_3": 12320, "eval_totals_4": 10116, "step": 254 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 1.7361, "step": 291 }, { "epoch": 8.0, "eval_bleu": 12.2208, "eval_bp": 0.7747, "eval_counts_1": 7658, "eval_counts_2": 2849, "eval_counts_3": 1368, "eval_counts_4": 668, "eval_exact_match": 0.0181, "eval_f1": 0.3334, "eval_gen_len": 12.4628, "eval_loss": 1.4544174671173096, "eval_precisions_1": 45.2387, "eval_precisions_2": 19.3494, "eval_precisions_3": 10.9265, "eval_precisions_4": 6.4754, "eval_ref_len": 21250, "eval_rouge1": 0.3414, "eval_rouge2": 0.1762, "eval_rougeL": 0.3283, "eval_rougeLsum": 0.3284, "eval_runtime": 442.3648, "eval_samples_per_second": 4.982, "eval_steps_per_second": 1.246, "eval_sys_len": 16928, "eval_totals_1": 16928, "eval_totals_2": 14724, "eval_totals_3": 12520, "eval_totals_4": 10316, "step": 291 }, { "epoch": 8.99, "learning_rate": 0.0001, "loss": 1.7162, "step": 327 }, { "epoch": 8.99, "eval_bleu": 12.4536, "eval_bp": 0.767, "eval_counts_1": 7703, "eval_counts_2": 2891, "eval_counts_3": 1390, "eval_counts_4": 694, "eval_exact_match": 0.0159, "eval_f1": 0.3374, "eval_gen_len": 12.4174, "eval_loss": 1.4459445476531982, "eval_precisions_1": 45.8648, "eval_precisions_2": 19.8136, "eval_precisions_3": 11.2214, "eval_precisions_4": 6.8153, "eval_ref_len": 21250, "eval_rouge1": 0.3454, "eval_rouge2": 0.1785, "eval_rougeL": 0.3325, "eval_rougeLsum": 0.3323, "eval_runtime": 436.4836, "eval_samples_per_second": 5.049, "eval_steps_per_second": 1.262, "eval_sys_len": 16795, "eval_totals_1": 16795, "eval_totals_2": 14591, "eval_totals_3": 12387, "eval_totals_4": 10183, "step": 327 }, { "epoch": 9.98, "learning_rate": 0.0001, "loss": 1.6589, "step": 363 }, { "epoch": 9.98, "eval_bleu": 12.8553, "eval_bp": 0.8002, "eval_counts_1": 7889, "eval_counts_2": 2983, "eval_counts_3": 1449, "eval_counts_4": 719, "eval_exact_match": 0.0172, "eval_f1": 0.3435, "eval_gen_len": 12.7101, "eval_loss": 1.438312292098999, "eval_precisions_1": 45.4017, "eval_precisions_2": 19.6612, "eval_precisions_3": 11.1737, "eval_precisions_4": 6.6797, "eval_ref_len": 21250, "eval_rouge1": 0.3519, "eval_rouge2": 0.1816, "eval_rougeL": 0.3375, "eval_rougeLsum": 0.3372, "eval_runtime": 449.3427, "eval_samples_per_second": 4.905, "eval_steps_per_second": 1.226, "eval_sys_len": 17376, "eval_totals_1": 17376, "eval_totals_2": 15172, "eval_totals_3": 12968, "eval_totals_4": 10764, "step": 363 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 1.5571, "step": 400 }, { "epoch": 10.99, "eval_bleu": 12.9671, "eval_bp": 0.7894, "eval_counts_1": 7889, "eval_counts_2": 2994, "eval_counts_3": 1457, "eval_counts_4": 736, "eval_exact_match": 0.02, "eval_f1": 0.3457, "eval_gen_len": 12.6466, "eval_loss": 1.4213731288909912, "eval_precisions_1": 45.9063, "eval_precisions_2": 19.9853, "eval_precisions_3": 11.4033, "eval_precisions_4": 6.9611, "eval_ref_len": 21250, "eval_rouge1": 0.3529, "eval_rouge2": 0.1845, "eval_rougeL": 0.3392, "eval_rougeLsum": 0.3393, "eval_runtime": 440.5687, "eval_samples_per_second": 5.003, "eval_steps_per_second": 1.251, "eval_sys_len": 17185, "eval_totals_1": 17185, "eval_totals_2": 14981, "eval_totals_3": 12777, "eval_totals_4": 10573, "step": 400 }, { "epoch": 11.98, "learning_rate": 0.0001, "loss": 1.5502, "step": 436 }, { "epoch": 11.98, "eval_bleu": 13.0741, "eval_bp": 0.7712, "eval_counts_1": 7930, "eval_counts_2": 3008, "eval_counts_3": 1477, "eval_counts_4": 741, "eval_exact_match": 0.0213, "eval_f1": 0.3541, "eval_gen_len": 12.4483, "eval_loss": 1.4135174751281738, "eval_precisions_1": 47.0121, "eval_precisions_2": 20.5128, "eval_precisions_3": 11.8539, "eval_precisions_4": 7.225, "eval_ref_len": 21250, "eval_rouge1": 0.3619, "eval_rouge2": 0.189, "eval_rougeL": 0.3492, "eval_rougeLsum": 0.3491, "eval_runtime": 443.1145, "eval_samples_per_second": 4.974, "eval_steps_per_second": 1.243, "eval_sys_len": 16868, "eval_totals_1": 16868, "eval_totals_2": 14664, "eval_totals_3": 12460, "eval_totals_4": 10256, "step": 436 }, { "epoch": 13.0, "learning_rate": 0.0001, "loss": 1.4564, "step": 473 }, { "epoch": 13.0, "eval_bleu": 14.1014, "eval_bp": 0.8309, "eval_counts_1": 8268, "eval_counts_2": 3200, "eval_counts_3": 1616, "eval_counts_4": 837, "eval_exact_match": 0.0218, "eval_f1": 0.3647, "eval_gen_len": 13.2441, "eval_loss": 1.3942722082138062, "eval_precisions_1": 46.1152, "eval_precisions_2": 20.3498, "eval_precisions_3": 11.9518, "eval_precisions_4": 7.396, "eval_ref_len": 21250, "eval_rouge1": 0.3729, "eval_rouge2": 0.1974, "eval_rougeL": 0.3578, "eval_rougeLsum": 0.3576, "eval_runtime": 460.2282, "eval_samples_per_second": 4.789, "eval_steps_per_second": 1.197, "eval_sys_len": 17929, "eval_totals_1": 17929, "eval_totals_2": 15725, "eval_totals_3": 13521, "eval_totals_4": 11317, "step": 473 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 1.4522, "step": 509 }, { "epoch": 13.99, "eval_bleu": 13.7526, "eval_bp": 0.7667, "eval_counts_1": 8047, "eval_counts_2": 3130, "eval_counts_3": 1564, "eval_counts_4": 811, "eval_exact_match": 0.0227, "eval_f1": 0.3627, "eval_gen_len": 12.515, "eval_loss": 1.3952871561050415, "eval_precisions_1": 47.9302, "eval_precisions_2": 21.4604, "eval_precisions_3": 12.6323, "eval_precisions_4": 7.9689, "eval_ref_len": 21250, "eval_rouge1": 0.3712, "eval_rouge2": 0.197, "eval_rougeL": 0.3582, "eval_rougeLsum": 0.3581, "eval_runtime": 437.5396, "eval_samples_per_second": 5.037, "eval_steps_per_second": 1.259, "eval_sys_len": 16789, "eval_totals_1": 16789, "eval_totals_2": 14585, "eval_totals_3": 12381, "eval_totals_4": 10177, "step": 509 }, { "epoch": 14.98, "learning_rate": 0.0001, "loss": 1.407, "step": 545 }, { "epoch": 14.98, "eval_bleu": 14.7315, "eval_bp": 0.8306, "eval_counts_1": 8498, "eval_counts_2": 3358, "eval_counts_3": 1703, "eval_counts_4": 877, "eval_exact_match": 0.0213, "eval_f1": 0.3772, "eval_gen_len": 13.2849, "eval_loss": 1.3759350776672363, "eval_precisions_1": 47.4139, "eval_precisions_2": 21.3627, "eval_precisions_3": 12.6008, "eval_precisions_4": 7.7535, "eval_ref_len": 21250, "eval_rouge1": 0.3856, "eval_rouge2": 0.2063, "eval_rougeL": 0.3709, "eval_rougeLsum": 0.3706, "eval_runtime": 453.6157, "eval_samples_per_second": 4.859, "eval_steps_per_second": 1.215, "eval_sys_len": 17923, "eval_totals_1": 17923, "eval_totals_2": 15719, "eval_totals_3": 13515, "eval_totals_4": 11311, "step": 545 }, { "epoch": 15.99, "learning_rate": 0.0001, "loss": 1.3294, "step": 582 }, { "epoch": 15.99, "eval_bleu": 14.868, "eval_bp": 0.8044, "eval_counts_1": 8481, "eval_counts_2": 3407, "eval_counts_3": 1721, "eval_counts_4": 883, "eval_exact_match": 0.024, "eval_f1": 0.3822, "eval_gen_len": 12.9142, "eval_loss": 1.3775662183761597, "eval_precisions_1": 48.5989, "eval_precisions_2": 22.3454, "eval_precisions_3": 13.1948, "eval_precisions_4": 8.1465, "eval_ref_len": 21250, "eval_rouge1": 0.3907, "eval_rouge2": 0.211, "eval_rougeL": 0.3766, "eval_rougeLsum": 0.3766, "eval_runtime": 448.6685, "eval_samples_per_second": 4.912, "eval_steps_per_second": 1.228, "eval_sys_len": 17451, "eval_totals_1": 17451, "eval_totals_2": 15247, "eval_totals_3": 13043, "eval_totals_4": 10839, "step": 582 }, { "epoch": 16.98, "learning_rate": 0.0001, "loss": 1.3294, "step": 618 }, { "epoch": 16.98, "eval_bleu": 15.2312, "eval_bp": 0.835, "eval_counts_1": 8633, "eval_counts_2": 3464, "eval_counts_3": 1767, "eval_counts_4": 923, "eval_exact_match": 0.0263, "eval_f1": 0.3868, "eval_gen_len": 13.3103, "eval_loss": 1.380259394645691, "eval_precisions_1": 47.9505, "eval_precisions_2": 21.9241, "eval_precisions_3": 12.9965, "eval_precisions_4": 8.1022, "eval_ref_len": 21250, "eval_rouge1": 0.3946, "eval_rouge2": 0.2133, "eval_rougeL": 0.3801, "eval_rougeLsum": 0.3798, "eval_runtime": 456.612, "eval_samples_per_second": 4.827, "eval_steps_per_second": 1.207, "eval_sys_len": 18004, "eval_totals_1": 18004, "eval_totals_2": 15800, "eval_totals_3": 13596, "eval_totals_4": 11392, "step": 618 }, { "epoch": 18.0, "learning_rate": 0.0001, "loss": 1.2605, "step": 655 }, { "epoch": 18.0, "eval_bleu": 14.779, "eval_bp": 0.8255, "eval_counts_1": 8560, "eval_counts_2": 3376, "eval_counts_3": 1695, "eval_counts_4": 880, "eval_exact_match": 0.0231, "eval_f1": 0.3846, "eval_gen_len": 13.1665, "eval_loss": 1.3709588050842285, "eval_precisions_1": 48.009, "eval_precisions_2": 21.605, "eval_precisions_3": 12.6285, "eval_precisions_4": 7.8445, "eval_ref_len": 21250, "eval_rouge1": 0.3922, "eval_rouge2": 0.2092, "eval_rougeL": 0.3778, "eval_rougeLsum": 0.3775, "eval_runtime": 456.164, "eval_samples_per_second": 4.832, "eval_steps_per_second": 1.208, "eval_sys_len": 17830, "eval_totals_1": 17830, "eval_totals_2": 15626, "eval_totals_3": 13422, "eval_totals_4": 11218, "step": 655 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 1.2667, "step": 691 }, { "epoch": 18.99, "eval_bleu": 15.0008, "eval_bp": 0.8257, "eval_counts_1": 8664, "eval_counts_2": 3455, "eval_counts_3": 1733, "eval_counts_4": 882, "eval_exact_match": 0.0227, "eval_f1": 0.3906, "eval_gen_len": 13.2232, "eval_loss": 1.3694192171096802, "eval_precisions_1": 48.5814, "eval_precisions_2": 22.1049, "eval_precisions_3": 12.9078, "eval_precisions_4": 7.8596, "eval_ref_len": 21250, "eval_rouge1": 0.3987, "eval_rouge2": 0.2138, "eval_rougeL": 0.3853, "eval_rougeLsum": 0.3851, "eval_runtime": 454.2362, "eval_samples_per_second": 4.852, "eval_steps_per_second": 1.213, "eval_sys_len": 17834, "eval_totals_1": 17834, "eval_totals_2": 15630, "eval_totals_3": 13426, "eval_totals_4": 11222, "step": 691 }, { "epoch": 19.79, "learning_rate": 0.0001, "loss": 1.2074, "step": 720 }, { "epoch": 19.79, "eval_bleu": 15.0442, "eval_bp": 0.8369, "eval_counts_1": 8770, "eval_counts_2": 3465, "eval_counts_3": 1737, "eval_counts_4": 880, "eval_exact_match": 0.0227, "eval_f1": 0.3941, "eval_gen_len": 13.4424, "eval_loss": 1.365785837173462, "eval_precisions_1": 48.6169, "eval_precisions_2": 21.8819, "eval_precisions_3": 12.743, "eval_precisions_4": 7.7011, "eval_ref_len": 21250, "eval_rouge1": 0.4025, "eval_rouge2": 0.215, "eval_rougeL": 0.3883, "eval_rougeLsum": 0.3879, "eval_runtime": 459.1457, "eval_samples_per_second": 4.8, "eval_steps_per_second": 1.2, "eval_sys_len": 18039, "eval_totals_1": 18039, "eval_totals_2": 15835, "eval_totals_3": 13631, "eval_totals_4": 11427, "step": 720 }, { "epoch": 19.79, "step": 720, "total_flos": 4.419252384883016e+17, "train_loss": 2.0875697082943385, "train_runtime": 23544.6757, "train_samples_per_second": 7.912, "train_steps_per_second": 0.031 } ], "logging_steps": 500, "max_steps": 720, "num_train_epochs": 20, "save_steps": 500, "total_flos": 4.419252384883016e+17, "trial_name": null, "trial_params": null }