{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.78531558608845, "eval_steps": 500, "global_step": 1440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 0.0001, "loss": 6.6905, "step": 72 }, { "epoch": 0.99, "eval_bleu": 3.7816, "eval_bp": 1.0, "eval_counts_1": 5515, "eval_counts_2": 1394, "eval_counts_3": 522, "eval_counts_4": 191, "eval_exact_match": 0.0, "eval_f1": 0.2106, "eval_gen_len": 11.2786, "eval_loss": 2.097219705581665, "eval_precisions_1": 19.5762, "eval_precisions_2": 5.3681, "eval_precisions_3": 2.1966, "eval_precisions_4": 0.8859, "eval_ref_len": 21250, "eval_rouge1": 0.1942, "eval_rouge2": 0.0761, "eval_rougeL": 0.1837, "eval_rougeLsum": 0.1841, "eval_runtime": 456.7865, "eval_samples_per_second": 4.825, "eval_steps_per_second": 1.206, "eval_sys_len": 28172, "eval_totals_1": 28172, "eval_totals_2": 25968, "eval_totals_3": 23764, "eval_totals_4": 21560, "step": 72 }, { "epoch": 1.99, "learning_rate": 0.0001, "loss": 2.4978, "step": 145 }, { "epoch": 1.99, "eval_bleu": 9.6021, "eval_bp": 0.7524, "eval_counts_1": 7079, "eval_counts_2": 2339, "eval_counts_3": 1027, "eval_counts_4": 446, "eval_exact_match": 0.01, "eval_f1": 0.3032, "eval_gen_len": 12.0159, "eval_loss": 1.6211049556732178, "eval_precisions_1": 42.7889, "eval_precisions_2": 16.311, "eval_precisions_3": 8.4624, "eval_precisions_4": 4.4905, "eval_ref_len": 21250, "eval_rouge1": 0.3097, "eval_rouge2": 0.1455, "eval_rougeL": 0.2971, "eval_rougeLsum": 0.2969, "eval_runtime": 435.2772, "eval_samples_per_second": 5.063, "eval_steps_per_second": 1.266, "eval_sys_len": 16544, "eval_totals_1": 16544, "eval_totals_2": 14340, "eval_totals_3": 12136, "eval_totals_4": 9932, "step": 145 }, { "epoch": 3.0, "learning_rate": 0.0001, "loss": 2.1021, "step": 218 }, { "epoch": 3.0, "eval_bleu": 11.162, "eval_bp": 0.7908, "eval_counts_1": 7507, "eval_counts_2": 2637, "eval_counts_3": 1222, "eval_counts_4": 575, "eval_exact_match": 0.0141, "eval_f1": 0.3228, "eval_gen_len": 12.6375, "eval_loss": 1.5342339277267456, "eval_precisions_1": 43.6175, "eval_precisions_2": 17.5718, "eval_precisions_3": 9.5446, "eval_precisions_4": 5.425, "eval_ref_len": 21250, "eval_rouge1": 0.3304, "eval_rouge2": 0.1642, "eval_rougeL": 0.3172, "eval_rougeLsum": 0.3171, "eval_runtime": 446.8682, "eval_samples_per_second": 4.932, "eval_steps_per_second": 1.233, "eval_sys_len": 17211, "eval_totals_1": 17211, "eval_totals_2": 15007, "eval_totals_3": 12803, "eval_totals_4": 10599, "step": 218 }, { "epoch": 4.0, "learning_rate": 0.0001, "loss": 1.9208, "step": 291 }, { "epoch": 4.0, "eval_bleu": 11.7136, "eval_bp": 0.7714, "eval_counts_1": 7599, "eval_counts_2": 2755, "eval_counts_3": 1296, "eval_counts_4": 620, "eval_exact_match": 0.015, "eval_f1": 0.33, "eval_gen_len": 12.3938, "eval_loss": 1.4861969947814941, "eval_precisions_1": 45.0418, "eval_precisions_2": 18.7837, "eval_precisions_3": 10.3988, "eval_precisions_4": 6.0435, "eval_ref_len": 21250, "eval_rouge1": 0.3377, "eval_rouge2": 0.1721, "eval_rougeL": 0.3232, "eval_rougeLsum": 0.3229, "eval_runtime": 440.9926, "eval_samples_per_second": 4.998, "eval_steps_per_second": 1.249, "eval_sys_len": 16871, "eval_totals_1": 16871, "eval_totals_2": 14667, "eval_totals_3": 12463, "eval_totals_4": 10259, "step": 291 }, { "epoch": 4.99, "learning_rate": 0.0001, "loss": 1.8135, "step": 363 }, { "epoch": 4.99, "eval_bleu": 12.6402, "eval_bp": 0.7893, "eval_counts_1": 7831, "eval_counts_2": 2955, "eval_counts_3": 1424, "eval_counts_4": 694, "eval_exact_match": 0.0177, "eval_f1": 0.3417, "eval_gen_len": 12.6366, "eval_loss": 1.4626398086547852, "eval_precisions_1": 45.5715, "eval_precisions_2": 19.7263, "eval_precisions_3": 11.1459, "eval_precisions_4": 6.5645, "eval_ref_len": 21250, "eval_rouge1": 0.3497, "eval_rouge2": 0.1837, "eval_rougeL": 0.3358, "eval_rougeLsum": 0.3354, "eval_runtime": 448.9344, "eval_samples_per_second": 4.909, "eval_steps_per_second": 1.227, "eval_sys_len": 17184, "eval_totals_1": 17184, "eval_totals_2": 14980, "eval_totals_3": 12776, "eval_totals_4": 10572, "step": 363 }, { "epoch": 5.99, "learning_rate": 0.0001, "loss": 1.6907, "step": 436 }, { "epoch": 5.99, "eval_bleu": 13.0722, "eval_bp": 0.7735, "eval_counts_1": 7872, "eval_counts_2": 3023, "eval_counts_3": 1482, "eval_counts_4": 740, "eval_exact_match": 0.0177, "eval_f1": 0.3483, "eval_gen_len": 12.564, "eval_loss": 1.439197301864624, "eval_precisions_1": 46.5606, "eval_precisions_2": 20.5604, "eval_precisions_3": 11.8569, "eval_precisions_4": 7.188, "eval_ref_len": 21250, "eval_rouge1": 0.3566, "eval_rouge2": 0.1896, "eval_rougeL": 0.3432, "eval_rougeLsum": 0.343, "eval_runtime": 718.6776, "eval_samples_per_second": 3.067, "eval_steps_per_second": 0.767, "eval_sys_len": 16907, "eval_totals_1": 16907, "eval_totals_2": 14703, "eval_totals_3": 12499, "eval_totals_4": 10295, "step": 436 }, { "epoch": 6.99, "learning_rate": 0.0001, "loss": 1.6159, "step": 509 }, { "epoch": 6.99, "eval_bleu": 13.5053, "eval_bp": 0.7797, "eval_counts_1": 7981, "eval_counts_2": 3128, "eval_counts_3": 1542, "eval_counts_4": 773, "eval_exact_match": 0.0191, "eval_f1": 0.3543, "eval_gen_len": 12.5749, "eval_loss": 1.4288065433502197, "eval_precisions_1": 46.9029, "eval_precisions_2": 21.118, "eval_precisions_3": 12.2303, "eval_precisions_4": 7.4298, "eval_ref_len": 21250, "eval_rouge1": 0.363, "eval_rouge2": 0.1952, "eval_rougeL": 0.3504, "eval_rougeLsum": 0.3502, "eval_runtime": 709.1881, "eval_samples_per_second": 3.108, "eval_steps_per_second": 0.777, "eval_sys_len": 17016, "eval_totals_1": 17016, "eval_totals_2": 14812, "eval_totals_3": 12608, "eval_totals_4": 10404, "step": 509 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 1.556, "step": 582 }, { "epoch": 8.0, "eval_bleu": 13.2095, "eval_bp": 0.797, "eval_counts_1": 8014, "eval_counts_2": 3046, "eval_counts_3": 1496, "eval_counts_4": 748, "eval_exact_match": 0.0222, "eval_f1": 0.355, "eval_gen_len": 12.7641, "eval_loss": 1.4131838083267212, "eval_precisions_1": 46.2702, "eval_precisions_2": 20.1508, "eval_precisions_3": 11.5861, "eval_precisions_4": 6.9854, "eval_ref_len": 21250, "eval_rouge1": 0.3632, "eval_rouge2": 0.1903, "eval_rougeL": 0.3489, "eval_rougeLsum": 0.3491, "eval_runtime": 736.4055, "eval_samples_per_second": 2.993, "eval_steps_per_second": 0.748, "eval_sys_len": 17320, "eval_totals_1": 17320, "eval_totals_2": 15116, "eval_totals_3": 12912, "eval_totals_4": 10708, "step": 582 }, { "epoch": 9.0, "learning_rate": 0.0001, "loss": 1.4951, "step": 655 }, { "epoch": 9.0, "eval_bleu": 14.1831, "eval_bp": 0.789, "eval_counts_1": 8342, "eval_counts_2": 3271, "eval_counts_3": 1622, "eval_counts_4": 819, "eval_exact_match": 0.0218, "eval_f1": 0.3769, "eval_gen_len": 12.7654, "eval_loss": 1.3926042318344116, "eval_precisions_1": 48.5621, "eval_precisions_2": 21.8445, "eval_precisions_3": 12.7016, "eval_precisions_4": 7.7513, "eval_ref_len": 21250, "eval_rouge1": 0.3843, "eval_rouge2": 0.2059, "eval_rougeL": 0.3704, "eval_rougeLsum": 0.3704, "eval_runtime": 695.8554, "eval_samples_per_second": 3.167, "eval_steps_per_second": 0.792, "eval_sys_len": 17178, "eval_totals_1": 17178, "eval_totals_2": 14974, "eval_totals_3": 12770, "eval_totals_4": 10566, "step": 655 }, { "epoch": 9.99, "learning_rate": 0.0001, "loss": 1.4522, "step": 727 }, { "epoch": 9.99, "eval_bleu": 15.0442, "eval_bp": 0.8187, "eval_counts_1": 8639, "eval_counts_2": 3449, "eval_counts_3": 1740, "eval_counts_4": 891, "eval_exact_match": 0.024, "eval_f1": 0.3895, "eval_gen_len": 13.1016, "eval_loss": 1.3769304752349854, "eval_precisions_1": 48.7859, "eval_precisions_2": 22.2459, "eval_precisions_3": 13.0827, "eval_precisions_4": 8.0299, "eval_ref_len": 21250, "eval_rouge1": 0.3972, "eval_rouge2": 0.2129, "eval_rougeL": 0.3821, "eval_rougeLsum": 0.3823, "eval_runtime": 733.5109, "eval_samples_per_second": 3.005, "eval_steps_per_second": 0.751, "eval_sys_len": 17708, "eval_totals_1": 17708, "eval_totals_2": 15504, "eval_totals_3": 13300, "eval_totals_4": 11096, "step": 727 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 1.3663, "step": 800 }, { "epoch": 10.99, "eval_bleu": 15.2622, "eval_bp": 0.8168, "eval_counts_1": 8736, "eval_counts_2": 3468, "eval_counts_3": 1747, "eval_counts_4": 924, "eval_exact_match": 0.0245, "eval_f1": 0.3946, "eval_gen_len": 13.0399, "eval_loss": 1.3676577806472778, "eval_precisions_1": 49.4285, "eval_precisions_2": 22.4176, "eval_precisions_3": 13.169, "eval_precisions_4": 8.3529, "eval_ref_len": 21250, "eval_rouge1": 0.4027, "eval_rouge2": 0.215, "eval_rougeL": 0.3871, "eval_rougeLsum": 0.387, "eval_runtime": 746.3261, "eval_samples_per_second": 2.953, "eval_steps_per_second": 0.738, "eval_sys_len": 17674, "eval_totals_1": 17674, "eval_totals_2": 15470, "eval_totals_3": 13266, "eval_totals_4": 11062, "step": 800 }, { "epoch": 11.99, "learning_rate": 0.0001, "loss": 1.3122, "step": 873 }, { "epoch": 11.99, "eval_bleu": 15.3943, "eval_bp": 0.8308, "eval_counts_1": 8833, "eval_counts_2": 3533, "eval_counts_3": 1780, "eval_counts_4": 915, "eval_exact_match": 0.0222, "eval_f1": 0.3975, "eval_gen_len": 13.3494, "eval_loss": 1.352068305015564, "eval_precisions_1": 49.272, "eval_precisions_2": 22.4703, "eval_precisions_3": 13.1667, "eval_precisions_4": 8.0866, "eval_ref_len": 21250, "eval_rouge1": 0.4055, "eval_rouge2": 0.219, "eval_rougeL": 0.3915, "eval_rougeLsum": 0.3915, "eval_runtime": 815.025, "eval_samples_per_second": 2.704, "eval_steps_per_second": 0.676, "eval_sys_len": 17927, "eval_totals_1": 17927, "eval_totals_2": 15723, "eval_totals_3": 13519, "eval_totals_4": 11315, "step": 873 }, { "epoch": 13.0, "learning_rate": 0.0001, "loss": 1.2641, "step": 946 }, { "epoch": 13.0, "eval_bleu": 16.1011, "eval_bp": 0.848, "eval_counts_1": 9048, "eval_counts_2": 3668, "eval_counts_3": 1864, "eval_counts_4": 989, "eval_exact_match": 0.0268, "eval_f1": 0.408, "eval_gen_len": 13.5508, "eval_loss": 1.3493599891662598, "eval_precisions_1": 49.5998, "eval_precisions_2": 22.8707, "eval_precisions_3": 13.474, "eval_precisions_4": 8.5039, "eval_ref_len": 21250, "eval_rouge1": 0.4165, "eval_rouge2": 0.2265, "eval_rougeL": 0.4011, "eval_rougeLsum": 0.401, "eval_runtime": 726.7867, "eval_samples_per_second": 3.033, "eval_steps_per_second": 0.758, "eval_sys_len": 18242, "eval_totals_1": 18242, "eval_totals_2": 16038, "eval_totals_3": 13834, "eval_totals_4": 11630, "step": 946 }, { "epoch": 13.99, "learning_rate": 0.0001, "loss": 1.2359, "step": 1018 }, { "epoch": 13.99, "eval_bleu": 16.3595, "eval_bp": 0.8402, "eval_counts_1": 9075, "eval_counts_2": 3709, "eval_counts_3": 1907, "eval_counts_4": 1013, "eval_exact_match": 0.0259, "eval_f1": 0.4113, "eval_gen_len": 13.5681, "eval_loss": 1.3488041162490845, "eval_precisions_1": 50.1437, "eval_precisions_2": 23.3359, "eval_precisions_3": 13.9299, "eval_precisions_4": 8.8194, "eval_ref_len": 21250, "eval_rouge1": 0.4195, "eval_rouge2": 0.2298, "eval_rougeL": 0.4041, "eval_rougeLsum": 0.4038, "eval_runtime": 701.8557, "eval_samples_per_second": 3.14, "eval_steps_per_second": 0.785, "eval_sys_len": 18098, "eval_totals_1": 18098, "eval_totals_2": 15894, "eval_totals_3": 13690, "eval_totals_4": 11486, "step": 1018 }, { "epoch": 14.99, "learning_rate": 0.0001, "loss": 1.1754, "step": 1091 }, { "epoch": 14.99, "eval_bleu": 16.7083, "eval_bp": 0.8547, "eval_counts_1": 9182, "eval_counts_2": 3777, "eval_counts_3": 1957, "eval_counts_4": 1048, "eval_exact_match": 0.0268, "eval_f1": 0.4145, "eval_gen_len": 13.6534, "eval_loss": 1.3482075929641724, "eval_precisions_1": 49.9946, "eval_precisions_2": 23.3696, "eval_precisions_3": 14.0206, "eval_precisions_4": 8.9161, "eval_ref_len": 21250, "eval_rouge1": 0.4227, "eval_rouge2": 0.2314, "eval_rougeL": 0.406, "eval_rougeLsum": 0.4058, "eval_runtime": 469.6435, "eval_samples_per_second": 4.693, "eval_steps_per_second": 1.173, "eval_sys_len": 18366, "eval_totals_1": 18366, "eval_totals_2": 16162, "eval_totals_3": 13958, "eval_totals_4": 11754, "step": 1091 }, { "epoch": 15.99, "learning_rate": 0.0001, "loss": 1.1367, "step": 1164 }, { "epoch": 15.99, "eval_bleu": 16.5803, "eval_bp": 0.8517, "eval_counts_1": 9164, "eval_counts_2": 3761, "eval_counts_3": 1935, "eval_counts_4": 1033, "eval_exact_match": 0.0245, "eval_f1": 0.4147, "eval_gen_len": 13.6152, "eval_loss": 1.3501369953155518, "eval_precisions_1": 50.0492, "eval_precisions_2": 23.3515, "eval_precisions_3": 13.9189, "eval_precisions_4": 8.8306, "eval_ref_len": 21250, "eval_rouge1": 0.4225, "eval_rouge2": 0.2316, "eval_rougeL": 0.4078, "eval_rougeLsum": 0.4079, "eval_runtime": 480.2308, "eval_samples_per_second": 4.589, "eval_steps_per_second": 1.147, "eval_sys_len": 18310, "eval_totals_1": 18310, "eval_totals_2": 16106, "eval_totals_3": 13902, "eval_totals_4": 11698, "step": 1164 }, { "epoch": 17.0, "learning_rate": 0.0001, "loss": 1.096, "step": 1237 }, { "epoch": 17.0, "eval_bleu": 16.5513, "eval_bp": 0.8499, "eval_counts_1": 9126, "eval_counts_2": 3712, "eval_counts_3": 1922, "eval_counts_4": 1050, "eval_exact_match": 0.0295, "eval_f1": 0.4141, "eval_gen_len": 13.6325, "eval_loss": 1.358604907989502, "eval_precisions_1": 49.9316, "eval_precisions_2": 23.0946, "eval_precisions_3": 13.8582, "eval_precisions_4": 9.0013, "eval_ref_len": 21250, "eval_rouge1": 0.4217, "eval_rouge2": 0.2304, "eval_rougeL": 0.4066, "eval_rougeLsum": 0.4066, "eval_runtime": 465.7019, "eval_samples_per_second": 4.733, "eval_steps_per_second": 1.183, "eval_sys_len": 18277, "eval_totals_1": 18277, "eval_totals_2": 16073, "eval_totals_3": 13869, "eval_totals_4": 11665, "step": 1237 }, { "epoch": 18.0, "learning_rate": 0.0001, "loss": 1.0571, "step": 1310 }, { "epoch": 18.0, "eval_bleu": 16.4708, "eval_bp": 0.8446, "eval_counts_1": 9087, "eval_counts_2": 3707, "eval_counts_3": 1923, "eval_counts_4": 1033, "eval_exact_match": 0.029, "eval_f1": 0.4116, "eval_gen_len": 13.5172, "eval_loss": 1.3658462762832642, "eval_precisions_1": 49.9862, "eval_precisions_2": 23.205, "eval_precisions_3": 13.9641, "eval_precisions_4": 8.9306, "eval_ref_len": 21250, "eval_rouge1": 0.4196, "eval_rouge2": 0.2301, "eval_rougeL": 0.4049, "eval_rougeLsum": 0.4049, "eval_runtime": 463.8447, "eval_samples_per_second": 4.752, "eval_steps_per_second": 1.188, "eval_sys_len": 18179, "eval_totals_1": 18179, "eval_totals_2": 15975, "eval_totals_3": 13771, "eval_totals_4": 11567, "step": 1310 }, { "epoch": 18.99, "learning_rate": 0.0001, "loss": 1.036, "step": 1382 }, { "epoch": 18.99, "eval_bleu": 16.8386, "eval_bp": 0.8528, "eval_counts_1": 9206, "eval_counts_2": 3806, "eval_counts_3": 1976, "eval_counts_4": 1059, "eval_exact_match": 0.0309, "eval_f1": 0.4174, "eval_gen_len": 13.7205, "eval_loss": 1.367233395576477, "eval_precisions_1": 50.2182, "eval_precisions_2": 23.5987, "eval_precisions_3": 14.1913, "eval_precisions_4": 9.0358, "eval_ref_len": 21250, "eval_rouge1": 0.4254, "eval_rouge2": 0.2348, "eval_rougeL": 0.4106, "eval_rougeLsum": 0.4107, "eval_runtime": 489.8628, "eval_samples_per_second": 4.499, "eval_steps_per_second": 1.125, "eval_sys_len": 18332, "eval_totals_1": 18332, "eval_totals_2": 16128, "eval_totals_3": 13924, "eval_totals_4": 11720, "step": 1382 }, { "epoch": 19.79, "learning_rate": 0.0001, "loss": 0.9785, "step": 1440 }, { "epoch": 19.79, "eval_bleu": 16.8234, "eval_bp": 0.8438, "eval_counts_1": 9180, "eval_counts_2": 3796, "eval_counts_3": 1973, "eval_counts_4": 1059, "eval_exact_match": 0.0327, "eval_f1": 0.4172, "eval_gen_len": 13.5113, "eval_loss": 1.381914496421814, "eval_precisions_1": 50.5395, "eval_precisions_2": 23.7845, "eval_precisions_3": 14.3428, "eval_precisions_4": 9.1672, "eval_ref_len": 21250, "eval_rouge1": 0.4254, "eval_rouge2": 0.2344, "eval_rougeL": 0.4116, "eval_rougeLsum": 0.4117, "eval_runtime": 465.8344, "eval_samples_per_second": 4.731, "eval_steps_per_second": 1.183, "eval_sys_len": 18164, "eval_totals_1": 18164, "eval_totals_2": 15960, "eval_totals_3": 13756, "eval_totals_4": 11552, "step": 1440 }, { "epoch": 19.79, "step": 1440, "total_flos": 4.419252384883016e+17, "train_loss": 1.7299000342686972, "train_runtime": 27815.7883, "train_samples_per_second": 6.697, "train_steps_per_second": 0.052 } ], "logging_steps": 500, "max_steps": 1440, "num_train_epochs": 20, "save_steps": 500, "total_flos": 4.419252384883016e+17, "trial_name": null, "trial_params": null }