{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 397, "global_step": 9536, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "eval_gen_len": 83.5593, "eval_loss": 1.6528608798980713, "eval_rouge1": 50.4957, "eval_rouge2": 32.5323, "eval_rougeL": 40.7567, "eval_rougeLsum": 40.5733, "eval_runtime": 41.984, "eval_samples_per_second": 1.405, "eval_steps_per_second": 0.715, "step": 397 }, { "epoch": 0.21, "grad_norm": 8.36017894744873, "learning_rate": 1.9404362416107384e-05, "loss": 2.7418, "step": 500 }, { "epoch": 0.33, "eval_gen_len": 83.3729, "eval_loss": 1.3848459720611572, "eval_rouge1": 49.9993, "eval_rouge2": 31.5422, "eval_rougeL": 40.7043, "eval_rougeLsum": 40.6632, "eval_runtime": 41.848, "eval_samples_per_second": 1.41, "eval_steps_per_second": 0.717, "step": 794 }, { "epoch": 0.42, "grad_norm": 10.013521194458008, "learning_rate": 1.880512943432407e-05, "loss": 1.6117, "step": 1000 }, { "epoch": 0.5, "eval_gen_len": 84.8983, "eval_loss": 1.3274192810058594, "eval_rouge1": 50.0655, "eval_rouge2": 31.4638, "eval_rougeL": 40.2184, "eval_rougeLsum": 39.9987, "eval_runtime": 42.1731, "eval_samples_per_second": 1.399, "eval_steps_per_second": 0.711, "step": 1191 }, { "epoch": 0.63, "grad_norm": 9.272841453552246, "learning_rate": 1.820589645254075e-05, "loss": 1.4861, "step": 1500 }, { "epoch": 0.67, "eval_gen_len": 87.1864, "eval_loss": 1.3262691497802734, "eval_rouge1": 51.2154, "eval_rouge2": 33.6289, "eval_rougeL": 41.9642, "eval_rougeLsum": 41.7649, "eval_runtime": 43.0575, "eval_samples_per_second": 1.37, "eval_steps_per_second": 0.697, "step": 1588 }, { "epoch": 0.83, "eval_gen_len": 85.9661, "eval_loss": 1.2881355285644531, "eval_rouge1": 52.2072, "eval_rouge2": 34.2681, "eval_rougeL": 42.7582, "eval_rougeLsum": 42.5683, "eval_runtime": 42.4338, "eval_samples_per_second": 1.39, "eval_steps_per_second": 0.707, "step": 1985 }, { "epoch": 0.84, "grad_norm": 10.048806190490723, "learning_rate": 1.7609060402684567e-05, "loss": 1.495, "step": 2000 }, { "epoch": 1.0, "eval_gen_len": 80.1864, "eval_loss": 1.2640005350112915, "eval_rouge1": 52.1344, "eval_rouge2": 34.3518, "eval_rougeL": 42.9145, "eval_rougeLsum": 42.7837, "eval_runtime": 40.643, "eval_samples_per_second": 1.452, "eval_steps_per_second": 0.738, "step": 2382 }, { "epoch": 1.05, "grad_norm": 9.145220756530762, "learning_rate": 1.7009827420901247e-05, "loss": 1.4292, "step": 2500 }, { "epoch": 1.17, "eval_gen_len": 83.5593, "eval_loss": 1.2814366817474365, "eval_rouge1": 51.9388, "eval_rouge2": 33.6073, "eval_rougeL": 41.9771, "eval_rougeLsum": 41.8638, "eval_runtime": 41.9785, "eval_samples_per_second": 1.405, "eval_steps_per_second": 0.715, "step": 2779 }, { "epoch": 1.26, "grad_norm": 9.626166343688965, "learning_rate": 1.641059443911793e-05, "loss": 1.2572, "step": 3000 }, { "epoch": 1.33, "eval_gen_len": 81.7458, "eval_loss": 1.3041572570800781, "eval_rouge1": 52.685, "eval_rouge2": 34.8664, "eval_rougeL": 43.247, "eval_rougeLsum": 43.2174, "eval_runtime": 40.4041, "eval_samples_per_second": 1.46, "eval_steps_per_second": 0.742, "step": 3176 }, { "epoch": 1.47, "grad_norm": 8.85732364654541, "learning_rate": 1.5811361457334612e-05, "loss": 1.2858, "step": 3500 }, { "epoch": 1.5, "eval_gen_len": 83.4915, "eval_loss": 1.250982403755188, "eval_rouge1": 53.1395, "eval_rouge2": 35.0366, "eval_rougeL": 44.0336, "eval_rougeLsum": 43.8277, "eval_runtime": 41.066, "eval_samples_per_second": 1.437, "eval_steps_per_second": 0.731, "step": 3573 }, { "epoch": 1.67, "eval_gen_len": 85.7797, "eval_loss": 1.2450958490371704, "eval_rouge1": 53.2435, "eval_rouge2": 34.0265, "eval_rougeL": 43.1606, "eval_rougeLsum": 42.9125, "eval_runtime": 42.4863, "eval_samples_per_second": 1.389, "eval_steps_per_second": 0.706, "step": 3970 }, { "epoch": 1.68, "grad_norm": 9.060718536376953, "learning_rate": 1.5212128475551296e-05, "loss": 1.2632, "step": 4000 }, { "epoch": 1.83, "eval_gen_len": 84.0678, "eval_loss": 1.2505569458007812, "eval_rouge1": 52.9033, "eval_rouge2": 34.6637, "eval_rougeL": 43.0146, "eval_rougeLsum": 42.8985, "eval_runtime": 42.1089, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.712, "step": 4367 }, { "epoch": 1.89, "grad_norm": 7.555502414703369, "learning_rate": 1.4612895493767978e-05, "loss": 1.2367, "step": 4500 }, { "epoch": 2.0, "eval_gen_len": 82.322, "eval_loss": 1.2485252618789673, "eval_rouge1": 50.1387, "eval_rouge2": 31.1201, "eval_rougeL": 40.0786, "eval_rougeLsum": 40.1657, "eval_runtime": 41.189, "eval_samples_per_second": 1.432, "eval_steps_per_second": 0.728, "step": 4764 }, { "epoch": 2.1, "grad_norm": 7.1890788078308105, "learning_rate": 1.401366251198466e-05, "loss": 1.1512, "step": 5000 }, { "epoch": 2.16, "eval_gen_len": 82.9322, "eval_loss": 1.261144757270813, "eval_rouge1": 52.7072, "eval_rouge2": 34.6442, "eval_rougeL": 43.2377, "eval_rougeLsum": 43.1384, "eval_runtime": 41.2591, "eval_samples_per_second": 1.43, "eval_steps_per_second": 0.727, "step": 5161 }, { "epoch": 2.31, "grad_norm": 7.86561918258667, "learning_rate": 1.341562799616491e-05, "loss": 1.0728, "step": 5500 }, { "epoch": 2.33, "eval_gen_len": 86.4237, "eval_loss": 1.2699768543243408, "eval_rouge1": 52.3383, "eval_rouge2": 34.7756, "eval_rougeL": 42.9406, "eval_rougeLsum": 42.7658, "eval_runtime": 42.4715, "eval_samples_per_second": 1.389, "eval_steps_per_second": 0.706, "step": 5558 }, { "epoch": 2.5, "eval_gen_len": 84.3051, "eval_loss": 1.2631828784942627, "eval_rouge1": 52.8233, "eval_rouge2": 35.1768, "eval_rougeL": 43.8642, "eval_rougeLsum": 43.7259, "eval_runtime": 41.727, "eval_samples_per_second": 1.414, "eval_steps_per_second": 0.719, "step": 5955 }, { "epoch": 2.52, "grad_norm": 7.868692398071289, "learning_rate": 1.2816395014381592e-05, "loss": 1.0826, "step": 6000 }, { "epoch": 2.66, "eval_gen_len": 82.8644, "eval_loss": 1.2638760805130005, "eval_rouge1": 53.9367, "eval_rouge2": 36.2676, "eval_rougeL": 44.9414, "eval_rougeLsum": 44.7603, "eval_runtime": 42.7574, "eval_samples_per_second": 1.38, "eval_steps_per_second": 0.702, "step": 6352 }, { "epoch": 2.73, "grad_norm": 7.449892997741699, "learning_rate": 1.2217162032598275e-05, "loss": 1.0921, "step": 6500 }, { "epoch": 2.83, "eval_gen_len": 82.4237, "eval_loss": 1.2491506338119507, "eval_rouge1": 52.8146, "eval_rouge2": 34.6392, "eval_rougeL": 43.5323, "eval_rougeLsum": 43.4647, "eval_runtime": 42.5398, "eval_samples_per_second": 1.387, "eval_steps_per_second": 0.705, "step": 6749 }, { "epoch": 2.94, "grad_norm": 7.139917850494385, "learning_rate": 1.1617929050814957e-05, "loss": 1.1129, "step": 7000 }, { "epoch": 3.0, "eval_gen_len": 83.1356, "eval_loss": 1.2625495195388794, "eval_rouge1": 53.6493, "eval_rouge2": 35.0396, "eval_rougeL": 43.501, "eval_rougeLsum": 43.4039, "eval_runtime": 43.1051, "eval_samples_per_second": 1.369, "eval_steps_per_second": 0.696, "step": 7146 }, { "epoch": 3.15, "grad_norm": 5.8409600257873535, "learning_rate": 1.1018696069031641e-05, "loss": 0.9783, "step": 7500 }, { "epoch": 3.16, "eval_gen_len": 84.7797, "eval_loss": 1.293487787246704, "eval_rouge1": 53.245, "eval_rouge2": 35.655, "eval_rougeL": 44.4306, "eval_rougeLsum": 44.482, "eval_runtime": 41.7791, "eval_samples_per_second": 1.412, "eval_steps_per_second": 0.718, "step": 7543 }, { "epoch": 3.33, "eval_gen_len": 84.1186, "eval_loss": 1.266953706741333, "eval_rouge1": 52.146, "eval_rouge2": 33.0632, "eval_rougeL": 41.4382, "eval_rougeLsum": 41.5159, "eval_runtime": 41.1238, "eval_samples_per_second": 1.435, "eval_steps_per_second": 0.73, "step": 7940 }, { "epoch": 3.36, "grad_norm": 8.737879753112793, "learning_rate": 1.0419463087248323e-05, "loss": 0.9771, "step": 8000 }, { "epoch": 3.5, "eval_gen_len": 82.8475, "eval_loss": 1.275550127029419, "eval_rouge1": 51.7108, "eval_rouge2": 33.5352, "eval_rougeL": 42.4153, "eval_rougeLsum": 42.4572, "eval_runtime": 41.088, "eval_samples_per_second": 1.436, "eval_steps_per_second": 0.73, "step": 8337 }, { "epoch": 3.57, "grad_norm": 8.45171070098877, "learning_rate": 9.820230105465006e-06, "loss": 0.9841, "step": 8500 }, { "epoch": 3.66, "eval_gen_len": 84.322, "eval_loss": 1.260237455368042, "eval_rouge1": 53.2394, "eval_rouge2": 34.9695, "eval_rougeL": 43.2182, "eval_rougeLsum": 43.1333, "eval_runtime": 41.5567, "eval_samples_per_second": 1.42, "eval_steps_per_second": 0.722, "step": 8734 }, { "epoch": 3.78, "grad_norm": 47.454078674316406, "learning_rate": 9.220997123681688e-06, "loss": 0.9643, "step": 9000 }, { "epoch": 3.83, "eval_gen_len": 81.4915, "eval_loss": 1.27409827709198, "eval_rouge1": 53.5588, "eval_rouge2": 36.0425, "eval_rougeL": 44.2044, "eval_rougeLsum": 44.2287, "eval_runtime": 40.5566, "eval_samples_per_second": 1.455, "eval_steps_per_second": 0.74, "step": 9131 }, { "epoch": 3.98, "grad_norm": 7.34140157699585, "learning_rate": 8.62176414189837e-06, "loss": 0.9439, "step": 9500 }, { "epoch": 4.0, "eval_gen_len": 86.1864, "eval_loss": 1.2641756534576416, "eval_rouge1": 53.7305, "eval_rouge2": 35.3844, "eval_rougeL": 43.8211, "eval_rougeLsum": 43.7597, "eval_runtime": 42.1877, "eval_samples_per_second": 1.399, "eval_steps_per_second": 0.711, "step": 9528 } ], "logging_steps": 500, "max_steps": 16688, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 1192, "total_flos": 2.071244574793728e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }