{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.934292462958986, "eval_steps": 500, "global_step": 2900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.0001, "loss": 2.732, "step": 145 }, { "epoch": 1.0, "eval_bleu": 12.4473, "eval_bp": 0.7805, "eval_counts_1": 7779, "eval_counts_2": 2893, "eval_counts_3": 1393, "eval_counts_4": 685, "eval_exact_match": 0.0168, "eval_f1": 0.3393, "eval_gen_len": 12.2523, "eval_loss": 1.2989141941070557, "eval_precisions_1": 45.6809, "eval_precisions_2": 19.5143, "eval_precisions_3": 11.0372, "eval_precisions_4": 6.5758, "eval_ref_len": 21250, "eval_rouge1": 0.3487, "eval_rouge2": 0.1796, "eval_rougeL": 0.3329, "eval_rougeLsum": 0.3327, "eval_runtime": 2048.1193, "eval_samples_per_second": 1.076, "eval_steps_per_second": 1.076, "eval_sys_len": 17029, "eval_totals_1": 17029, "eval_totals_2": 14825, "eval_totals_3": 12621, "eval_totals_4": 10417, "step": 145 }, { "epoch": 2.0, "learning_rate": 0.0001, "loss": 1.5514, "step": 291 }, { "epoch": 2.0, "eval_bleu": 14.7663, "eval_bp": 0.7871, "eval_counts_1": 8297, "eval_counts_2": 3336, "eval_counts_3": 1711, "eval_counts_4": 899, "eval_exact_match": 0.025, "eval_f1": 0.3743, "eval_gen_len": 12.441, "eval_loss": 1.20997154712677, "eval_precisions_1": 48.3931, "eval_precisions_2": 22.3278, "eval_precisions_3": 13.4333, "eval_precisions_4": 8.5351, "eval_ref_len": 21250, "eval_rouge1": 0.3839, "eval_rouge2": 0.2089, "eval_rougeL": 0.3688, "eval_rougeLsum": 0.369, "eval_runtime": 2570.171, "eval_samples_per_second": 0.858, "eval_steps_per_second": 0.858, "eval_sys_len": 17145, "eval_totals_1": 17145, "eval_totals_2": 14941, "eval_totals_3": 12737, "eval_totals_4": 10533, "step": 291 }, { "epoch": 3.0, "learning_rate": 0.0001, "loss": 1.3546, "step": 435 }, { "epoch": 3.0, "eval_bleu": 16.3903, "eval_bp": 0.7798, "eval_counts_1": 8930, "eval_counts_2": 3713, "eval_counts_3": 1905, "eval_counts_4": 1022, "eval_exact_match": 0.034, "eval_f1": 0.4155, "eval_gen_len": 12.6021, "eval_loss": 1.142831563949585, "eval_precisions_1": 52.4739, "eval_precisions_2": 25.0641, "eval_precisions_3": 15.1071, "eval_precisions_4": 9.8213, "eval_ref_len": 21250, "eval_rouge1": 0.4225, "eval_rouge2": 0.2345, "eval_rougeL": 0.4075, "eval_rougeLsum": 0.4074, "eval_runtime": 2923.7087, "eval_samples_per_second": 0.754, "eval_steps_per_second": 0.754, "eval_sys_len": 17018, "eval_totals_1": 17018, "eval_totals_2": 14814, "eval_totals_3": 12610, "eval_totals_4": 10406, "step": 435 }, { "epoch": 4.0, "learning_rate": 0.0001, "loss": 1.1969, "step": 581 }, { "epoch": 4.0, "eval_bleu": 17.8161, "eval_bp": 0.8441, "eval_counts_1": 9456, "eval_counts_2": 3994, "eval_counts_3": 2096, "eval_counts_4": 1157, "eval_exact_match": 0.0386, "eval_f1": 0.4334, "eval_gen_len": 13.4061, "eval_loss": 1.1113450527191162, "eval_precisions_1": 52.039, "eval_precisions_2": 25.0141, "eval_precisions_3": 15.2292, "eval_precisions_4": 10.0095, "eval_ref_len": 21250, "eval_rouge1": 0.4409, "eval_rouge2": 0.246, "eval_rougeL": 0.4251, "eval_rougeLsum": 0.4251, "eval_runtime": 2741.9646, "eval_samples_per_second": 0.804, "eval_steps_per_second": 0.804, "eval_sys_len": 18171, "eval_totals_1": 18171, "eval_totals_2": 15967, "eval_totals_3": 13763, "eval_totals_4": 11559, "step": 581 }, { "epoch": 5.0, "learning_rate": 0.0001, "loss": 1.0876, "step": 726 }, { "epoch": 5.0, "eval_bleu": 18.6911, "eval_bp": 0.8446, "eval_counts_1": 9606, "eval_counts_2": 4162, "eval_counts_3": 2233, "eval_counts_4": 1243, "eval_exact_match": 0.0377, "eval_f1": 0.443, "eval_gen_len": 13.5599, "eval_loss": 1.1031831502914429, "eval_precisions_1": 52.8412, "eval_precisions_2": 26.0532, "eval_precisions_3": 16.2152, "eval_precisions_4": 10.7461, "eval_ref_len": 21250, "eval_rouge1": 0.4504, "eval_rouge2": 0.2571, "eval_rougeL": 0.4356, "eval_rougeLsum": 0.4357, "eval_runtime": 3812.6899, "eval_samples_per_second": 0.578, "eval_steps_per_second": 0.578, "eval_sys_len": 18179, "eval_totals_1": 18179, "eval_totals_2": 15975, "eval_totals_3": 13771, "eval_totals_4": 11567, "step": 726 }, { "epoch": 6.0, "learning_rate": 0.0001, "loss": 0.9881, "step": 872 }, { "epoch": 6.0, "eval_bleu": 18.7071, "eval_bp": 0.8481, "eval_counts_1": 9608, "eval_counts_2": 4167, "eval_counts_3": 2235, "eval_counts_4": 1246, "eval_exact_match": 0.044, "eval_f1": 0.4429, "eval_gen_len": 13.6978, "eval_loss": 1.1118519306182861, "eval_precisions_1": 52.661, "eval_precisions_2": 25.9772, "eval_precisions_3": 16.1523, "eval_precisions_4": 10.7109, "eval_ref_len": 21250, "eval_rouge1": 0.4505, "eval_rouge2": 0.2567, "eval_rougeL": 0.4348, "eval_rougeLsum": 0.4349, "eval_runtime": 2020.0708, "eval_samples_per_second": 1.091, "eval_steps_per_second": 1.091, "eval_sys_len": 18245, "eval_totals_1": 18245, "eval_totals_2": 16041, "eval_totals_3": 13837, "eval_totals_4": 11633, "step": 872 }, { "epoch": 7.0, "learning_rate": 0.0001, "loss": 0.9142, "step": 1017 }, { "epoch": 7.0, "eval_bleu": 19.3053, "eval_bp": 0.8506, "eval_counts_1": 9757, "eval_counts_2": 4285, "eval_counts_3": 2311, "eval_counts_4": 1310, "eval_exact_match": 0.0495, "eval_f1": 0.451, "eval_gen_len": 13.5826, "eval_loss": 1.1105936765670776, "eval_precisions_1": 53.3432, "eval_precisions_2": 26.6364, "eval_precisions_3": 16.6463, "eval_precisions_4": 11.2167, "eval_ref_len": 21250, "eval_rouge1": 0.4587, "eval_rouge2": 0.2641, "eval_rougeL": 0.4427, "eval_rougeLsum": 0.443, "eval_runtime": 1991.0459, "eval_samples_per_second": 1.107, "eval_steps_per_second": 1.107, "eval_sys_len": 18291, "eval_totals_1": 18291, "eval_totals_2": 16087, "eval_totals_3": 13883, "eval_totals_4": 11679, "step": 1017 }, { "epoch": 8.0, "learning_rate": 0.0001, "loss": 0.8323, "step": 1163 }, { "epoch": 8.0, "eval_bleu": 19.4102, "eval_bp": 0.8507, "eval_counts_1": 9757, "eval_counts_2": 4300, "eval_counts_3": 2341, "eval_counts_4": 1317, "eval_exact_match": 0.0472, "eval_f1": 0.4513, "eval_gen_len": 13.6239, "eval_loss": 1.1327157020568848, "eval_precisions_1": 53.3373, "eval_precisions_2": 26.7263, "eval_precisions_3": 16.8599, "eval_precisions_4": 11.2747, "eval_ref_len": 21250, "eval_rouge1": 0.4587, "eval_rouge2": 0.2662, "eval_rougeL": 0.4429, "eval_rougeLsum": 0.4426, "eval_runtime": 1972.0648, "eval_samples_per_second": 1.118, "eval_steps_per_second": 1.118, "eval_sys_len": 18293, "eval_totals_1": 18293, "eval_totals_2": 16089, "eval_totals_3": 13885, "eval_totals_4": 11681, "step": 1163 }, { "epoch": 9.0, "learning_rate": 0.0001, "loss": 0.7742, "step": 1308 }, { "epoch": 9.0, "eval_bleu": 19.3574, "eval_bp": 0.8497, "eval_counts_1": 9757, "eval_counts_2": 4273, "eval_counts_3": 2324, "eval_counts_4": 1320, "eval_exact_match": 0.049, "eval_f1": 0.451, "eval_gen_len": 13.5944, "eval_loss": 1.1574428081512451, "eval_precisions_1": 53.3957, "eval_precisions_2": 26.5916, "eval_precisions_3": 16.7616, "eval_precisions_4": 11.3198, "eval_ref_len": 21250, "eval_rouge1": 0.4585, "eval_rouge2": 0.2653, "eval_rougeL": 0.4431, "eval_rougeLsum": 0.443, "eval_runtime": 1991.8737, "eval_samples_per_second": 1.106, "eval_steps_per_second": 1.106, "eval_sys_len": 18273, "eval_totals_1": 18273, "eval_totals_2": 16069, "eval_totals_3": 13865, "eval_totals_4": 11661, "step": 1308 }, { "epoch": 10.0, "learning_rate": 0.0001, "loss": 0.7101, "step": 1454 }, { "epoch": 10.0, "eval_bleu": 20.1003, "eval_bp": 0.8694, "eval_counts_1": 9861, "eval_counts_2": 4403, "eval_counts_3": 2438, "eval_counts_4": 1416, "eval_exact_match": 0.0531, "eval_f1": 0.4525, "eval_gen_len": 13.9133, "eval_loss": 1.167409896850586, "eval_precisions_1": 52.8995, "eval_precisions_2": 26.7871, "eval_precisions_3": 17.1292, "eval_precisions_4": 11.7716, "eval_ref_len": 21250, "eval_rouge1": 0.4594, "eval_rouge2": 0.2689, "eval_rougeL": 0.444, "eval_rougeLsum": 0.4435, "eval_runtime": 2025.3437, "eval_samples_per_second": 1.088, "eval_steps_per_second": 1.088, "eval_sys_len": 18641, "eval_totals_1": 18641, "eval_totals_2": 16437, "eval_totals_3": 14233, "eval_totals_4": 12029, "step": 1454 }, { "epoch": 10.99, "learning_rate": 0.0001, "loss": 0.6642, "step": 1599 }, { "epoch": 10.99, "eval_bleu": 19.655, "eval_bp": 0.8558, "eval_counts_1": 9868, "eval_counts_2": 4380, "eval_counts_3": 2358, "eval_counts_4": 1337, "eval_exact_match": 0.0476, "eval_f1": 0.4551, "eval_gen_len": 13.9142, "eval_loss": 1.1888612508773804, "eval_precisions_1": 53.6713, "eval_precisions_2": 27.0671, "eval_precisions_3": 16.8694, "eval_precisions_4": 11.3555, "eval_ref_len": 21250, "eval_rouge1": 0.4622, "eval_rouge2": 0.2694, "eval_rougeL": 0.4469, "eval_rougeLsum": 0.4466, "eval_runtime": 2020.9205, "eval_samples_per_second": 1.091, "eval_steps_per_second": 1.091, "eval_sys_len": 18386, "eval_totals_1": 18386, "eval_totals_2": 16182, "eval_totals_3": 13978, "eval_totals_4": 11774, "step": 1599 }, { "epoch": 12.0, "learning_rate": 0.0001, "loss": 0.6067, "step": 1745 }, { "epoch": 12.0, "eval_bleu": 19.9169, "eval_bp": 0.8828, "eval_counts_1": 9872, "eval_counts_2": 4384, "eval_counts_3": 2408, "eval_counts_4": 1395, "eval_exact_match": 0.0472, "eval_f1": 0.4489, "eval_gen_len": 14.2482, "eval_loss": 1.2207266092300415, "eval_precisions_1": 52.2494, "eval_precisions_2": 26.2672, "eval_precisions_3": 16.6229, "eval_precisions_4": 11.3581, "eval_ref_len": 21250, "eval_rouge1": 0.4569, "eval_rouge2": 0.2667, "eval_rougeL": 0.441, "eval_rougeLsum": 0.4408, "eval_runtime": 2047.2616, "eval_samples_per_second": 1.077, "eval_steps_per_second": 1.077, "eval_sys_len": 18894, "eval_totals_1": 18894, "eval_totals_2": 16690, "eval_totals_3": 14486, "eval_totals_4": 12282, "step": 1745 }, { "epoch": 12.99, "learning_rate": 0.0001, "loss": 0.5684, "step": 1890 }, { "epoch": 12.99, "eval_bleu": 19.5451, "eval_bp": 0.8831, "eval_counts_1": 9870, "eval_counts_2": 4356, "eval_counts_3": 2360, "eval_counts_4": 1329, "eval_exact_match": 0.0485, "eval_f1": 0.4506, "eval_gen_len": 14.2432, "eval_loss": 1.2586854696273804, "eval_precisions_1": 52.2195, "eval_precisions_2": 26.0885, "eval_precisions_3": 16.2837, "eval_precisions_4": 10.8145, "eval_ref_len": 21250, "eval_rouge1": 0.4581, "eval_rouge2": 0.2651, "eval_rougeL": 0.4414, "eval_rougeLsum": 0.4409, "eval_runtime": 2126.3316, "eval_samples_per_second": 1.037, "eval_steps_per_second": 1.037, "eval_sys_len": 18901, "eval_totals_1": 18901, "eval_totals_2": 16697, "eval_totals_3": 14493, "eval_totals_4": 12289, "step": 1890 }, { "epoch": 14.0, "learning_rate": 0.0001, "loss": 0.5288, "step": 2036 }, { "epoch": 14.0, "eval_bleu": 19.6648, "eval_bp": 0.8547, "eval_counts_1": 9815, "eval_counts_2": 4360, "eval_counts_3": 2389, "eval_counts_4": 1335, "eval_exact_match": 0.0454, "eval_f1": 0.4504, "eval_gen_len": 13.7432, "eval_loss": 1.2803738117218018, "eval_precisions_1": 53.4382, "eval_precisions_2": 26.9752, "eval_precisions_3": 17.1144, "eval_precisions_4": 11.3569, "eval_ref_len": 21250, "eval_rouge1": 0.4592, "eval_rouge2": 0.2671, "eval_rougeL": 0.4443, "eval_rougeLsum": 0.4436, "eval_runtime": 3989.4275, "eval_samples_per_second": 0.552, "eval_steps_per_second": 0.552, "eval_sys_len": 18367, "eval_totals_1": 18367, "eval_totals_2": 16163, "eval_totals_3": 13959, "eval_totals_4": 11755, "step": 2036 }, { "epoch": 14.99, "learning_rate": 0.0001, "loss": 0.4902, "step": 2181 }, { "epoch": 14.99, "eval_bleu": 19.8138, "eval_bp": 0.8766, "eval_counts_1": 9886, "eval_counts_2": 4407, "eval_counts_3": 2398, "eval_counts_4": 1359, "eval_exact_match": 0.0495, "eval_f1": 0.451, "eval_gen_len": 14.1225, "eval_loss": 1.321104884147644, "eval_precisions_1": 52.6495, "eval_precisions_2": 26.5914, "eval_precisions_3": 16.6887, "eval_precisions_4": 11.1714, "eval_ref_len": 21250, "eval_rouge1": 0.4582, "eval_rouge2": 0.2674, "eval_rougeL": 0.4426, "eval_rougeLsum": 0.4421, "eval_runtime": 2190.6068, "eval_samples_per_second": 1.006, "eval_steps_per_second": 1.006, "eval_sys_len": 18777, "eval_totals_1": 18777, "eval_totals_2": 16573, "eval_totals_3": 14369, "eval_totals_4": 12165, "step": 2181 }, { "epoch": 16.0, "learning_rate": 0.0001, "loss": 0.4498, "step": 2327 }, { "epoch": 16.0, "eval_bleu": 20.0703, "eval_bp": 0.909, "eval_counts_1": 10008, "eval_counts_2": 4477, "eval_counts_3": 2456, "eval_counts_4": 1381, "eval_exact_match": 0.0476, "eval_f1": 0.4491, "eval_gen_len": 14.3725, "eval_loss": 1.3620938062667847, "eval_precisions_1": 51.5903, "eval_precisions_2": 26.0366, "eval_precisions_3": 16.3832, "eval_precisions_4": 10.8, "eval_ref_len": 21250, "eval_rouge1": 0.4569, "eval_rouge2": 0.2679, "eval_rougeL": 0.4415, "eval_rougeLsum": 0.4412, "eval_runtime": 4080.8757, "eval_samples_per_second": 0.54, "eval_steps_per_second": 0.54, "eval_sys_len": 19399, "eval_totals_1": 19399, "eval_totals_2": 17195, "eval_totals_3": 14991, "eval_totals_4": 12787, "step": 2327 }, { "epoch": 16.99, "learning_rate": 0.0001, "loss": 0.4216, "step": 2472 }, { "epoch": 16.99, "eval_bleu": 20.1319, "eval_bp": 0.8948, "eval_counts_1": 10016, "eval_counts_2": 4483, "eval_counts_3": 2455, "eval_counts_4": 1385, "eval_exact_match": 0.0481, "eval_f1": 0.4531, "eval_gen_len": 14.3008, "eval_loss": 1.3966974020004272, "eval_precisions_1": 52.3712, "eval_precisions_2": 26.4937, "eval_precisions_3": 16.6814, "eval_precisions_4": 11.0685, "eval_ref_len": 21250, "eval_rouge1": 0.4615, "eval_rouge2": 0.2705, "eval_rougeL": 0.4457, "eval_rougeLsum": 0.4451, "eval_runtime": 3311.0939, "eval_samples_per_second": 0.666, "eval_steps_per_second": 0.666, "eval_sys_len": 19125, "eval_totals_1": 19125, "eval_totals_2": 16921, "eval_totals_3": 14717, "eval_totals_4": 12513, "step": 2472 }, { "epoch": 18.0, "learning_rate": 0.0001, "loss": 0.3829, "step": 2618 }, { "epoch": 18.0, "eval_bleu": 19.8508, "eval_bp": 0.9123, "eval_counts_1": 9976, "eval_counts_2": 4407, "eval_counts_3": 2412, "eval_counts_4": 1374, "eval_exact_match": 0.0476, "eval_f1": 0.4479, "eval_gen_len": 14.7046, "eval_loss": 1.4460452795028687, "eval_precisions_1": 51.2536, "eval_precisions_2": 25.533, "eval_precisions_3": 16.0202, "eval_precisions_4": 10.6909, "eval_ref_len": 21250, "eval_rouge1": 0.4556, "eval_rouge2": 0.2627, "eval_rougeL": 0.4387, "eval_rougeLsum": 0.4385, "eval_runtime": 3748.4463, "eval_samples_per_second": 0.588, "eval_steps_per_second": 0.588, "eval_sys_len": 19464, "eval_totals_1": 19464, "eval_totals_2": 17260, "eval_totals_3": 15056, "eval_totals_4": 12852, "step": 2618 }, { "epoch": 19.0, "learning_rate": 0.0001, "loss": 0.3551, "step": 2764 }, { "epoch": 19.0, "eval_bleu": 20.0572, "eval_bp": 0.8952, "eval_counts_1": 10010, "eval_counts_2": 4451, "eval_counts_3": 2438, "eval_counts_4": 1385, "eval_exact_match": 0.0463, "eval_f1": 0.4523, "eval_gen_len": 14.3807, "eval_loss": 1.4725110530853271, "eval_precisions_1": 52.3235, "eval_precisions_2": 26.2953, "eval_precisions_3": 16.5591, "eval_precisions_4": 11.0632, "eval_ref_len": 21250, "eval_rouge1": 0.4606, "eval_rouge2": 0.2672, "eval_rougeL": 0.4438, "eval_rougeLsum": 0.4434, "eval_runtime": 2215.2029, "eval_samples_per_second": 0.995, "eval_steps_per_second": 0.995, "eval_sys_len": 19131, "eval_totals_1": 19131, "eval_totals_2": 16927, "eval_totals_3": 14723, "eval_totals_4": 12519, "step": 2764 }, { "epoch": 19.93, "learning_rate": 0.0001, "loss": 0.3301, "step": 2900 }, { "epoch": 19.93, "eval_bleu": 19.8047, "eval_bp": 0.8816, "eval_counts_1": 9858, "eval_counts_2": 4378, "eval_counts_3": 2406, "eval_counts_4": 1368, "eval_exact_match": 0.0495, "eval_f1": 0.4483, "eval_gen_len": 14.2795, "eval_loss": 1.5030488967895508, "eval_precisions_1": 52.2361, "eval_precisions_2": 26.2659, "eval_precisions_3": 16.6344, "eval_precisions_4": 11.1582, "eval_ref_len": 21250, "eval_rouge1": 0.4569, "eval_rouge2": 0.2644, "eval_rougeL": 0.4412, "eval_rougeLsum": 0.4405, "eval_runtime": 2181.7432, "eval_samples_per_second": 1.01, "eval_steps_per_second": 1.01, "eval_sys_len": 18872, "eval_totals_1": 18872, "eval_totals_2": 16668, "eval_totals_3": 14464, "eval_totals_4": 12260, "step": 2900 }, { "epoch": 19.93, "step": 2900, "total_flos": 1.1100924470624256e+18, "train_loss": 0.6333936349276839, "train_runtime": 110329.1002, "train_samples_per_second": 1.688, "train_steps_per_second": 0.026 } ], "logging_steps": 500, "max_steps": 2900, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.1100924470624256e+18, "trial_name": null, "trial_params": null }