{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.99640028797696, "eval_steps": 500, "global_step": 13880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.48, "learning_rate": 1.9399615754082615e-05, "loss": 2.0304, "step": 500 }, { "epoch": 0.96, "learning_rate": 1.879923150816523e-05, "loss": 1.781, "step": 1000 }, { "epoch": 2.0, "eval_f1": 0.9088, "eval_gen_len": 26.88909090909091, "eval_loss": 1.579687476158142, "eval_precision": 0.908, "eval_recall": 0.91, "eval_rouge1": 0.4708, "eval_rouge2": 0.2219, "eval_rougeL": 0.3892, "eval_rougeLsum": 0.389, "eval_runtime": 1186.1406, "eval_samples_per_second": 4.637, "eval_steps_per_second": 0.29, "step": 1388 }, { "epoch": 2.16, "learning_rate": 1.729827089337176e-05, "loss": 1.7026, "step": 1500 }, { "epoch": 2.88, "learning_rate": 1.6397694524495677e-05, "loss": 1.6618, "step": 2000 }, { "epoch": 3.0, "eval_f1": 0.91, "eval_gen_len": 26.728181818181817, "eval_loss": 1.5411016941070557, "eval_precision": 0.9094, "eval_recall": 0.9111, "eval_rouge1": 0.4776, "eval_rouge2": 0.2303, "eval_rougeL": 0.3977, "eval_rougeLsum": 0.3973, "eval_runtime": 1083.838, "eval_samples_per_second": 5.075, "eval_steps_per_second": 0.317, "step": 2083 }, { "epoch": 3.6, "learning_rate": 1.5497118155619597e-05, "loss": 1.626, "step": 2500 }, { "epoch": 4.0, "eval_f1": 0.911, "eval_gen_len": 26.759636363636364, "eval_loss": 1.5170917510986328, "eval_precision": 0.9102, "eval_recall": 0.9121, "eval_rouge1": 0.4834, "eval_rouge2": 0.2345, "eval_rougeL": 0.402, "eval_rougeLsum": 0.402, "eval_runtime": 1053.82, "eval_samples_per_second": 5.219, "eval_steps_per_second": 0.326, "step": 2776 }, { "epoch": 4.32, "learning_rate": 1.4596541786743516e-05, "loss": 1.5918, "step": 3000 }, { "epoch": 5.0, "eval_f1": 0.9112, "eval_gen_len": 26.647636363636362, "eval_loss": 1.500138521194458, "eval_precision": 0.9106, "eval_recall": 0.9122, "eval_rouge1": 0.4853, "eval_rouge2": 0.2365, "eval_rougeL": 0.4045, "eval_rougeLsum": 0.4045, "eval_runtime": 1079.0919, "eval_samples_per_second": 5.097, "eval_steps_per_second": 0.319, "step": 3471 }, { "epoch": 5.04, "learning_rate": 1.3695965417867436e-05, "loss": 1.5798, "step": 3500 }, { "epoch": 5.76, "learning_rate": 1.2795389048991355e-05, "loss": 1.5586, "step": 4000 }, { "epoch": 6.0, "eval_f1": 0.9116, "eval_gen_len": 26.777818181818184, "eval_loss": 1.4880452156066895, "eval_precision": 0.9108, "eval_recall": 0.9127, "eval_rouge1": 0.4875, "eval_rouge2": 0.2373, "eval_rougeL": 0.4063, "eval_rougeLsum": 0.4063, "eval_runtime": 1027.5441, "eval_samples_per_second": 5.353, "eval_steps_per_second": 0.335, "step": 4164 }, { "epoch": 6.48, "learning_rate": 1.1894812680115276e-05, "loss": 1.5375, "step": 4500 }, { "epoch": 7.0, "eval_f1": 0.912, "eval_gen_len": 26.39909090909091, "eval_loss": 1.4768402576446533, "eval_precision": 0.9116, "eval_recall": 0.9128, "eval_rouge1": 0.4898, "eval_rouge2": 0.24, "eval_rougeL": 0.4083, "eval_rougeLsum": 0.4083, "eval_runtime": 922.1893, "eval_samples_per_second": 5.964, "eval_steps_per_second": 0.373, "step": 4858 }, { "epoch": 7.2, "learning_rate": 1.0994236311239194e-05, "loss": 1.5228, "step": 5000 }, { "epoch": 7.92, "learning_rate": 1.0093659942363115e-05, "loss": 1.5146, "step": 5500 }, { "epoch": 8.0, "eval_f1": 0.9126, "eval_gen_len": 26.156, "eval_loss": 1.4685654640197754, "eval_precision": 0.9123, "eval_recall": 0.9133, "eval_rouge1": 0.4907, "eval_rouge2": 0.241, "eval_rougeL": 0.4088, "eval_rougeLsum": 0.4089, "eval_runtime": 865.3485, "eval_samples_per_second": 6.356, "eval_steps_per_second": 0.398, "step": 5553 }, { "epoch": 8.64, "learning_rate": 9.193083573487034e-06, "loss": 1.5006, "step": 6000 }, { "epoch": 9.0, "eval_f1": 0.9127, "eval_gen_len": 26.26290909090909, "eval_loss": 1.4636152982711792, "eval_precision": 0.9122, "eval_recall": 0.9135, "eval_rouge1": 0.4914, "eval_rouge2": 0.2419, "eval_rougeL": 0.4097, "eval_rougeLsum": 0.4099, "eval_runtime": 874.612, "eval_samples_per_second": 6.289, "eval_steps_per_second": 0.393, "step": 6247 }, { "epoch": 9.36, "learning_rate": 8.29250720461095e-06, "loss": 1.49, "step": 6500 }, { "epoch": 10.0, "eval_f1": 0.9127, "eval_gen_len": 26.027272727272727, "eval_loss": 1.4580360651016235, "eval_precision": 0.9125, "eval_recall": 0.9133, "eval_rouge1": 0.4911, "eval_rouge2": 0.2429, "eval_rougeL": 0.4109, "eval_rougeLsum": 0.411, "eval_runtime": 855.8845, "eval_samples_per_second": 6.426, "eval_steps_per_second": 0.402, "step": 6942 }, { "epoch": 10.08, "learning_rate": 7.391930835734871e-06, "loss": 1.485, "step": 7000 }, { "epoch": 10.8, "learning_rate": 6.491354466858791e-06, "loss": 1.4749, "step": 7500 }, { "epoch": 11.0, "eval_f1": 0.9131, "eval_gen_len": 26.230363636363638, "eval_loss": 1.4546109437942505, "eval_precision": 0.9127, "eval_recall": 0.9138, "eval_rouge1": 0.4932, "eval_rouge2": 0.244, "eval_rougeL": 0.4121, "eval_rougeLsum": 0.4123, "eval_runtime": 871.4205, "eval_samples_per_second": 6.312, "eval_steps_per_second": 0.395, "step": 7636 }, { "epoch": 11.52, "learning_rate": 5.590778097982709e-06, "loss": 1.4661, "step": 8000 }, { "epoch": 12.0, "eval_f1": 0.9132, "eval_gen_len": 25.87781818181818, "eval_loss": 1.4514495134353638, "eval_precision": 0.9133, "eval_recall": 0.9136, "eval_rouge1": 0.4937, "eval_rouge2": 0.2448, "eval_rougeL": 0.4126, "eval_rougeLsum": 0.4127, "eval_runtime": 867.3574, "eval_samples_per_second": 6.341, "eval_steps_per_second": 0.397, "step": 8331 }, { "epoch": 12.24, "learning_rate": 4.690201729106629e-06, "loss": 1.4626, "step": 8500 }, { "epoch": 12.96, "learning_rate": 3.7896253602305477e-06, "loss": 1.4575, "step": 9000 }, { "epoch": 13.0, "eval_f1": 0.9133, "eval_gen_len": 26.11509090909091, "eval_loss": 1.4499082565307617, "eval_precision": 0.913, "eval_recall": 0.914, "eval_rouge1": 0.4947, "eval_rouge2": 0.2453, "eval_rougeL": 0.4139, "eval_rougeLsum": 0.414, "eval_runtime": 860.9844, "eval_samples_per_second": 6.388, "eval_steps_per_second": 0.4, "step": 9025 }, { "epoch": 13.68, "learning_rate": 2.8890489913544673e-06, "loss": 1.4511, "step": 9500 }, { "epoch": 14.0, "eval_f1": 0.9133, "eval_gen_len": 26.028727272727274, "eval_loss": 1.44780433177948, "eval_precision": 0.9131, "eval_recall": 0.9138, "eval_rouge1": 0.4939, "eval_rouge2": 0.2451, "eval_rougeL": 0.4133, "eval_rougeLsum": 0.4134, "eval_runtime": 862.0827, "eval_samples_per_second": 6.38, "eval_steps_per_second": 0.399, "step": 9720 }, { "epoch": 14.4, "learning_rate": 1.988472622478386e-06, "loss": 1.4519, "step": 10000 }, { "epoch": 15.0, "eval_f1": 0.9133, "eval_gen_len": 25.907818181818183, "eval_loss": 1.4471020698547363, "eval_precision": 0.9132, "eval_recall": 0.9137, "eval_rouge1": 0.4938, "eval_rouge2": 0.2451, "eval_rougeL": 0.4134, "eval_rougeLsum": 0.4134, "eval_runtime": 855.2673, "eval_samples_per_second": 6.431, "eval_steps_per_second": 0.402, "step": 10414 }, { "epoch": 15.12, "learning_rate": 1.0878962536023055e-06, "loss": 1.4475, "step": 10500 }, { "epoch": 15.84, "learning_rate": 1.8731988472622478e-07, "loss": 1.4439, "step": 11000 }, { "epoch": 16.0, "eval_f1": 0.9133, "eval_gen_len": 26.034545454545455, "eval_loss": 1.4474281072616577, "eval_precision": 0.9131, "eval_recall": 0.9139, "eval_rouge1": 0.4942, "eval_rouge2": 0.2456, "eval_rougeL": 0.4133, "eval_rougeLsum": 0.4134, "eval_runtime": 875.1275, "eval_samples_per_second": 6.285, "eval_steps_per_second": 0.393, "step": 11104 }, { "epoch": 16.57, "learning_rate": 3.4293948126801158e-06, "loss": 1.4441, "step": 11500 }, { "epoch": 17.0, "eval_f1": 0.9134, "eval_gen_len": 25.939090909090908, "eval_loss": 1.4446682929992676, "eval_precision": 0.9133, "eval_recall": 0.9138, "eval_rouge1": 0.4945, "eval_rouge2": 0.2457, "eval_rougeL": 0.4139, "eval_rougeLsum": 0.414, "eval_runtime": 853.4658, "eval_samples_per_second": 6.444, "eval_steps_per_second": 0.403, "step": 11799 }, { "epoch": 17.29, "learning_rate": 2.708933717579251e-06, "loss": 1.444, "step": 12000 }, { "epoch": 18.0, "eval_f1": 0.9135, "eval_gen_len": 26.010727272727273, "eval_loss": 1.4445807933807373, "eval_precision": 0.9133, "eval_recall": 0.9141, "eval_rouge1": 0.4957, "eval_rouge2": 0.2473, "eval_rougeL": 0.415, "eval_rougeLsum": 0.4151, "eval_runtime": 869.7396, "eval_samples_per_second": 6.324, "eval_steps_per_second": 0.396, "step": 12493 }, { "epoch": 18.01, "learning_rate": 1.988472622478386e-06, "loss": 1.4378, "step": 12500 }, { "epoch": 18.73, "learning_rate": 1.2680115273775217e-06, "loss": 1.4375, "step": 13000 }, { "epoch": 19.0, "eval_f1": 0.9136, "eval_gen_len": 25.88690909090909, "eval_loss": 1.4433233737945557, "eval_precision": 0.9136, "eval_recall": 0.914, "eval_rouge1": 0.4961, "eval_rouge2": 0.2473, "eval_rougeL": 0.4153, "eval_rougeLsum": 0.4153, "eval_runtime": 854.4011, "eval_samples_per_second": 6.437, "eval_steps_per_second": 0.403, "step": 13188 }, { "epoch": 19.45, "learning_rate": 5.475504322766571e-07, "loss": 1.4361, "step": 13500 }, { "epoch": 20.0, "eval_f1": 0.9137, "eval_gen_len": 25.862909090909092, "eval_loss": 1.4432713985443115, "eval_precision": 0.9136, "eval_recall": 0.914, "eval_rouge1": 0.4961, "eval_rouge2": 0.2476, "eval_rougeL": 0.4155, "eval_rougeLsum": 0.4154, "eval_runtime": 863.7254, "eval_samples_per_second": 6.368, "eval_steps_per_second": 0.398, "step": 13880 }, { "epoch": 20.0, "step": 13880, "total_flos": 2.818047373345161e+18, "train_loss": 0.2986434628709249, "train_runtime": 16684.611, "train_samples_per_second": 119.871, "train_steps_per_second": 0.832 } ], "logging_steps": 500, "max_steps": 13880, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.818047373345161e+18, "train_batch_size": 24, "trial_name": null, "trial_params": null }