{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.992800575953924, "eval_steps": 500, "global_step": 11104, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.48, "learning_rate": 1.9399615754082615e-05, "loss": 2.0304, "step": 500 }, { "epoch": 0.96, "learning_rate": 1.879923150816523e-05, "loss": 1.781, "step": 1000 }, { "epoch": 2.0, "eval_f1": 0.9088, "eval_gen_len": 26.88909090909091, "eval_loss": 1.579687476158142, "eval_precision": 0.908, "eval_recall": 0.91, "eval_rouge1": 0.4708, "eval_rouge2": 0.2219, "eval_rougeL": 0.3892, "eval_rougeLsum": 0.389, "eval_runtime": 1186.1406, "eval_samples_per_second": 4.637, "eval_steps_per_second": 0.29, "step": 1388 }, { "epoch": 2.16, "learning_rate": 1.729827089337176e-05, "loss": 1.7026, "step": 1500 }, { "epoch": 2.88, "learning_rate": 1.6397694524495677e-05, "loss": 1.6618, "step": 2000 }, { "epoch": 3.0, "eval_f1": 0.91, "eval_gen_len": 26.728181818181817, "eval_loss": 1.5411016941070557, "eval_precision": 0.9094, "eval_recall": 0.9111, "eval_rouge1": 0.4776, "eval_rouge2": 0.2303, "eval_rougeL": 0.3977, "eval_rougeLsum": 0.3973, "eval_runtime": 1083.838, "eval_samples_per_second": 5.075, "eval_steps_per_second": 0.317, "step": 2083 }, { "epoch": 3.6, "learning_rate": 1.5497118155619597e-05, "loss": 1.626, "step": 2500 }, { "epoch": 4.0, "eval_f1": 0.911, "eval_gen_len": 26.759636363636364, "eval_loss": 1.5170917510986328, "eval_precision": 0.9102, "eval_recall": 0.9121, "eval_rouge1": 0.4834, "eval_rouge2": 0.2345, "eval_rougeL": 0.402, "eval_rougeLsum": 0.402, "eval_runtime": 1053.82, "eval_samples_per_second": 5.219, "eval_steps_per_second": 0.326, "step": 2776 }, { "epoch": 4.32, "learning_rate": 1.4596541786743516e-05, "loss": 1.5918, "step": 3000 }, { "epoch": 5.0, "eval_f1": 0.9112, "eval_gen_len": 26.647636363636362, "eval_loss": 1.500138521194458, "eval_precision": 0.9106, "eval_recall": 0.9122, "eval_rouge1": 0.4853, "eval_rouge2": 0.2365, "eval_rougeL": 0.4045, "eval_rougeLsum": 0.4045, "eval_runtime": 1079.0919, "eval_samples_per_second": 5.097, "eval_steps_per_second": 0.319, "step": 3471 }, { "epoch": 5.04, "learning_rate": 1.3695965417867436e-05, "loss": 1.5798, "step": 3500 }, { "epoch": 5.76, "learning_rate": 1.2795389048991355e-05, "loss": 1.5586, "step": 4000 }, { "epoch": 6.0, "eval_f1": 0.9116, "eval_gen_len": 26.777818181818184, "eval_loss": 1.4880452156066895, "eval_precision": 0.9108, "eval_recall": 0.9127, "eval_rouge1": 0.4875, "eval_rouge2": 0.2373, "eval_rougeL": 0.4063, "eval_rougeLsum": 0.4063, "eval_runtime": 1027.5441, "eval_samples_per_second": 5.353, "eval_steps_per_second": 0.335, "step": 4164 }, { "epoch": 6.48, "learning_rate": 1.1894812680115276e-05, "loss": 1.5375, "step": 4500 }, { "epoch": 7.0, "eval_f1": 0.912, "eval_gen_len": 26.39909090909091, "eval_loss": 1.4768402576446533, "eval_precision": 0.9116, "eval_recall": 0.9128, "eval_rouge1": 0.4898, "eval_rouge2": 0.24, "eval_rougeL": 0.4083, "eval_rougeLsum": 0.4083, "eval_runtime": 922.1893, "eval_samples_per_second": 5.964, "eval_steps_per_second": 0.373, "step": 4858 }, { "epoch": 7.2, "learning_rate": 1.0994236311239194e-05, "loss": 1.5228, "step": 5000 }, { "epoch": 7.92, "learning_rate": 1.0093659942363115e-05, "loss": 1.5146, "step": 5500 }, { "epoch": 8.0, "eval_f1": 0.9126, "eval_gen_len": 26.156, "eval_loss": 1.4685654640197754, "eval_precision": 0.9123, "eval_recall": 0.9133, "eval_rouge1": 0.4907, "eval_rouge2": 0.241, "eval_rougeL": 0.4088, "eval_rougeLsum": 0.4089, "eval_runtime": 865.3485, "eval_samples_per_second": 6.356, "eval_steps_per_second": 0.398, "step": 5553 }, { "epoch": 8.64, "learning_rate": 9.193083573487034e-06, "loss": 1.5006, "step": 6000 }, { "epoch": 9.0, "eval_f1": 0.9127, "eval_gen_len": 26.26290909090909, "eval_loss": 1.4636152982711792, "eval_precision": 0.9122, "eval_recall": 0.9135, "eval_rouge1": 0.4914, "eval_rouge2": 0.2419, "eval_rougeL": 0.4097, "eval_rougeLsum": 0.4099, "eval_runtime": 874.612, "eval_samples_per_second": 6.289, "eval_steps_per_second": 0.393, "step": 6247 }, { "epoch": 9.36, "learning_rate": 8.29250720461095e-06, "loss": 1.49, "step": 6500 }, { "epoch": 10.0, "eval_f1": 0.9127, "eval_gen_len": 26.027272727272727, "eval_loss": 1.4580360651016235, "eval_precision": 0.9125, "eval_recall": 0.9133, "eval_rouge1": 0.4911, "eval_rouge2": 0.2429, "eval_rougeL": 0.4109, "eval_rougeLsum": 0.411, "eval_runtime": 855.8845, "eval_samples_per_second": 6.426, "eval_steps_per_second": 0.402, "step": 6942 }, { "epoch": 10.08, "learning_rate": 7.391930835734871e-06, "loss": 1.485, "step": 7000 }, { "epoch": 10.8, "learning_rate": 6.491354466858791e-06, "loss": 1.4749, "step": 7500 }, { "epoch": 11.0, "eval_f1": 0.9131, "eval_gen_len": 26.230363636363638, "eval_loss": 1.4546109437942505, "eval_precision": 0.9127, "eval_recall": 0.9138, "eval_rouge1": 0.4932, "eval_rouge2": 0.244, "eval_rougeL": 0.4121, "eval_rougeLsum": 0.4123, "eval_runtime": 871.4205, "eval_samples_per_second": 6.312, "eval_steps_per_second": 0.395, "step": 7636 }, { "epoch": 11.52, "learning_rate": 5.590778097982709e-06, "loss": 1.4661, "step": 8000 }, { "epoch": 12.0, "eval_f1": 0.9132, "eval_gen_len": 25.87781818181818, "eval_loss": 1.4514495134353638, "eval_precision": 0.9133, "eval_recall": 0.9136, "eval_rouge1": 0.4937, "eval_rouge2": 0.2448, "eval_rougeL": 0.4126, "eval_rougeLsum": 0.4127, "eval_runtime": 867.3574, "eval_samples_per_second": 6.341, "eval_steps_per_second": 0.397, "step": 8331 }, { "epoch": 12.24, "learning_rate": 4.690201729106629e-06, "loss": 1.4626, "step": 8500 }, { "epoch": 12.96, "learning_rate": 3.7896253602305477e-06, "loss": 1.4575, "step": 9000 }, { "epoch": 13.0, "eval_f1": 0.9133, "eval_gen_len": 26.11509090909091, "eval_loss": 1.4499082565307617, "eval_precision": 0.913, "eval_recall": 0.914, "eval_rouge1": 0.4947, "eval_rouge2": 0.2453, "eval_rougeL": 0.4139, "eval_rougeLsum": 0.414, "eval_runtime": 860.9844, "eval_samples_per_second": 6.388, "eval_steps_per_second": 0.4, "step": 9025 }, { "epoch": 13.68, "learning_rate": 2.8890489913544673e-06, "loss": 1.4511, "step": 9500 }, { "epoch": 14.0, "eval_f1": 0.9133, "eval_gen_len": 26.028727272727274, "eval_loss": 1.44780433177948, "eval_precision": 0.9131, "eval_recall": 0.9138, "eval_rouge1": 0.4939, "eval_rouge2": 0.2451, "eval_rougeL": 0.4133, "eval_rougeLsum": 0.4134, "eval_runtime": 862.0827, "eval_samples_per_second": 6.38, "eval_steps_per_second": 0.399, "step": 9720 }, { "epoch": 14.4, "learning_rate": 1.988472622478386e-06, "loss": 1.4519, "step": 10000 }, { "epoch": 15.0, "eval_f1": 0.9133, "eval_gen_len": 25.907818181818183, "eval_loss": 1.4471020698547363, "eval_precision": 0.9132, "eval_recall": 0.9137, "eval_rouge1": 0.4938, "eval_rouge2": 0.2451, "eval_rougeL": 0.4134, "eval_rougeLsum": 0.4134, "eval_runtime": 855.2673, "eval_samples_per_second": 6.431, "eval_steps_per_second": 0.402, "step": 10414 }, { "epoch": 15.12, "learning_rate": 1.0878962536023055e-06, "loss": 1.4475, "step": 10500 }, { "epoch": 15.84, "learning_rate": 1.8731988472622478e-07, "loss": 1.4439, "step": 11000 }, { "epoch": 15.99, "eval_f1": 0.9134, "eval_gen_len": 25.96290909090909, "eval_loss": 1.4468724727630615, "eval_precision": 0.9133, "eval_recall": 0.9138, "eval_rouge1": 0.4939, "eval_rouge2": 0.2453, "eval_rougeL": 0.4133, "eval_rougeLsum": 0.4134, "eval_runtime": 864.4194, "eval_samples_per_second": 6.363, "eval_steps_per_second": 0.398, "step": 11104 }, { "epoch": 15.99, "step": 11104, "total_flos": 2.2405705733792072e+18, "train_loss": 0.8767006197992594, "train_runtime": 37037.5595, "train_samples_per_second": 43.199, "train_steps_per_second": 0.3 } ], "logging_steps": 500, "max_steps": 11104, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "total_flos": 2.2405705733792072e+18, "train_batch_size": 24, "trial_name": null, "trial_params": null }