|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.99640028797696, |
|
"eval_steps": 500, |
|
"global_step": 13880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9399615754082615e-05, |
|
"loss": 2.0304, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.879923150816523e-05, |
|
"loss": 1.781, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9088, |
|
"eval_gen_len": 26.88909090909091, |
|
"eval_loss": 1.579687476158142, |
|
"eval_precision": 0.908, |
|
"eval_recall": 0.91, |
|
"eval_rouge1": 0.4708, |
|
"eval_rouge2": 0.2219, |
|
"eval_rougeL": 0.3892, |
|
"eval_rougeLsum": 0.389, |
|
"eval_runtime": 1186.1406, |
|
"eval_samples_per_second": 4.637, |
|
"eval_steps_per_second": 0.29, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.729827089337176e-05, |
|
"loss": 1.7026, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.6397694524495677e-05, |
|
"loss": 1.6618, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.91, |
|
"eval_gen_len": 26.728181818181817, |
|
"eval_loss": 1.5411016941070557, |
|
"eval_precision": 0.9094, |
|
"eval_recall": 0.9111, |
|
"eval_rouge1": 0.4776, |
|
"eval_rouge2": 0.2303, |
|
"eval_rougeL": 0.3977, |
|
"eval_rougeLsum": 0.3973, |
|
"eval_runtime": 1083.838, |
|
"eval_samples_per_second": 5.075, |
|
"eval_steps_per_second": 0.317, |
|
"step": 2083 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.5497118155619597e-05, |
|
"loss": 1.626, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.911, |
|
"eval_gen_len": 26.759636363636364, |
|
"eval_loss": 1.5170917510986328, |
|
"eval_precision": 0.9102, |
|
"eval_recall": 0.9121, |
|
"eval_rouge1": 0.4834, |
|
"eval_rouge2": 0.2345, |
|
"eval_rougeL": 0.402, |
|
"eval_rougeLsum": 0.402, |
|
"eval_runtime": 1053.82, |
|
"eval_samples_per_second": 5.219, |
|
"eval_steps_per_second": 0.326, |
|
"step": 2776 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.4596541786743516e-05, |
|
"loss": 1.5918, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9112, |
|
"eval_gen_len": 26.647636363636362, |
|
"eval_loss": 1.500138521194458, |
|
"eval_precision": 0.9106, |
|
"eval_recall": 0.9122, |
|
"eval_rouge1": 0.4853, |
|
"eval_rouge2": 0.2365, |
|
"eval_rougeL": 0.4045, |
|
"eval_rougeLsum": 0.4045, |
|
"eval_runtime": 1079.0919, |
|
"eval_samples_per_second": 5.097, |
|
"eval_steps_per_second": 0.319, |
|
"step": 3471 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 1.3695965417867436e-05, |
|
"loss": 1.5798, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.2795389048991355e-05, |
|
"loss": 1.5586, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.9116, |
|
"eval_gen_len": 26.777818181818184, |
|
"eval_loss": 1.4880452156066895, |
|
"eval_precision": 0.9108, |
|
"eval_recall": 0.9127, |
|
"eval_rouge1": 0.4875, |
|
"eval_rouge2": 0.2373, |
|
"eval_rougeL": 0.4063, |
|
"eval_rougeLsum": 0.4063, |
|
"eval_runtime": 1027.5441, |
|
"eval_samples_per_second": 5.353, |
|
"eval_steps_per_second": 0.335, |
|
"step": 4164 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 1.1894812680115276e-05, |
|
"loss": 1.5375, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.912, |
|
"eval_gen_len": 26.39909090909091, |
|
"eval_loss": 1.4768402576446533, |
|
"eval_precision": 0.9116, |
|
"eval_recall": 0.9128, |
|
"eval_rouge1": 0.4898, |
|
"eval_rouge2": 0.24, |
|
"eval_rougeL": 0.4083, |
|
"eval_rougeLsum": 0.4083, |
|
"eval_runtime": 922.1893, |
|
"eval_samples_per_second": 5.964, |
|
"eval_steps_per_second": 0.373, |
|
"step": 4858 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.0994236311239194e-05, |
|
"loss": 1.5228, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 1.0093659942363115e-05, |
|
"loss": 1.5146, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.9126, |
|
"eval_gen_len": 26.156, |
|
"eval_loss": 1.4685654640197754, |
|
"eval_precision": 0.9123, |
|
"eval_recall": 0.9133, |
|
"eval_rouge1": 0.4907, |
|
"eval_rouge2": 0.241, |
|
"eval_rougeL": 0.4088, |
|
"eval_rougeLsum": 0.4089, |
|
"eval_runtime": 865.3485, |
|
"eval_samples_per_second": 6.356, |
|
"eval_steps_per_second": 0.398, |
|
"step": 5553 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 9.193083573487034e-06, |
|
"loss": 1.5006, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.9127, |
|
"eval_gen_len": 26.26290909090909, |
|
"eval_loss": 1.4636152982711792, |
|
"eval_precision": 0.9122, |
|
"eval_recall": 0.9135, |
|
"eval_rouge1": 0.4914, |
|
"eval_rouge2": 0.2419, |
|
"eval_rougeL": 0.4097, |
|
"eval_rougeLsum": 0.4099, |
|
"eval_runtime": 874.612, |
|
"eval_samples_per_second": 6.289, |
|
"eval_steps_per_second": 0.393, |
|
"step": 6247 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 8.29250720461095e-06, |
|
"loss": 1.49, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.9127, |
|
"eval_gen_len": 26.027272727272727, |
|
"eval_loss": 1.4580360651016235, |
|
"eval_precision": 0.9125, |
|
"eval_recall": 0.9133, |
|
"eval_rouge1": 0.4911, |
|
"eval_rouge2": 0.2429, |
|
"eval_rougeL": 0.4109, |
|
"eval_rougeLsum": 0.411, |
|
"eval_runtime": 855.8845, |
|
"eval_samples_per_second": 6.426, |
|
"eval_steps_per_second": 0.402, |
|
"step": 6942 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 7.391930835734871e-06, |
|
"loss": 1.485, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 6.491354466858791e-06, |
|
"loss": 1.4749, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.9131, |
|
"eval_gen_len": 26.230363636363638, |
|
"eval_loss": 1.4546109437942505, |
|
"eval_precision": 0.9127, |
|
"eval_recall": 0.9138, |
|
"eval_rouge1": 0.4932, |
|
"eval_rouge2": 0.244, |
|
"eval_rougeL": 0.4121, |
|
"eval_rougeLsum": 0.4123, |
|
"eval_runtime": 871.4205, |
|
"eval_samples_per_second": 6.312, |
|
"eval_steps_per_second": 0.395, |
|
"step": 7636 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 5.590778097982709e-06, |
|
"loss": 1.4661, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.9132, |
|
"eval_gen_len": 25.87781818181818, |
|
"eval_loss": 1.4514495134353638, |
|
"eval_precision": 0.9133, |
|
"eval_recall": 0.9136, |
|
"eval_rouge1": 0.4937, |
|
"eval_rouge2": 0.2448, |
|
"eval_rougeL": 0.4126, |
|
"eval_rougeLsum": 0.4127, |
|
"eval_runtime": 867.3574, |
|
"eval_samples_per_second": 6.341, |
|
"eval_steps_per_second": 0.397, |
|
"step": 8331 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 4.690201729106629e-06, |
|
"loss": 1.4626, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 3.7896253602305477e-06, |
|
"loss": 1.4575, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.9133, |
|
"eval_gen_len": 26.11509090909091, |
|
"eval_loss": 1.4499082565307617, |
|
"eval_precision": 0.913, |
|
"eval_recall": 0.914, |
|
"eval_rouge1": 0.4947, |
|
"eval_rouge2": 0.2453, |
|
"eval_rougeL": 0.4139, |
|
"eval_rougeLsum": 0.414, |
|
"eval_runtime": 860.9844, |
|
"eval_samples_per_second": 6.388, |
|
"eval_steps_per_second": 0.4, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 2.8890489913544673e-06, |
|
"loss": 1.4511, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.9133, |
|
"eval_gen_len": 26.028727272727274, |
|
"eval_loss": 1.44780433177948, |
|
"eval_precision": 0.9131, |
|
"eval_recall": 0.9138, |
|
"eval_rouge1": 0.4939, |
|
"eval_rouge2": 0.2451, |
|
"eval_rougeL": 0.4133, |
|
"eval_rougeLsum": 0.4134, |
|
"eval_runtime": 862.0827, |
|
"eval_samples_per_second": 6.38, |
|
"eval_steps_per_second": 0.399, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 1.988472622478386e-06, |
|
"loss": 1.4519, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.9133, |
|
"eval_gen_len": 25.907818181818183, |
|
"eval_loss": 1.4471020698547363, |
|
"eval_precision": 0.9132, |
|
"eval_recall": 0.9137, |
|
"eval_rouge1": 0.4938, |
|
"eval_rouge2": 0.2451, |
|
"eval_rougeL": 0.4134, |
|
"eval_rougeLsum": 0.4134, |
|
"eval_runtime": 855.2673, |
|
"eval_samples_per_second": 6.431, |
|
"eval_steps_per_second": 0.402, |
|
"step": 10414 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 1.0878962536023055e-06, |
|
"loss": 1.4475, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 1.8731988472622478e-07, |
|
"loss": 1.4439, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.9133, |
|
"eval_gen_len": 26.034545454545455, |
|
"eval_loss": 1.4474281072616577, |
|
"eval_precision": 0.9131, |
|
"eval_recall": 0.9139, |
|
"eval_rouge1": 0.4942, |
|
"eval_rouge2": 0.2456, |
|
"eval_rougeL": 0.4133, |
|
"eval_rougeLsum": 0.4134, |
|
"eval_runtime": 875.1275, |
|
"eval_samples_per_second": 6.285, |
|
"eval_steps_per_second": 0.393, |
|
"step": 11104 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 3.4293948126801158e-06, |
|
"loss": 1.4441, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_f1": 0.9134, |
|
"eval_gen_len": 25.939090909090908, |
|
"eval_loss": 1.4446682929992676, |
|
"eval_precision": 0.9133, |
|
"eval_recall": 0.9138, |
|
"eval_rouge1": 0.4945, |
|
"eval_rouge2": 0.2457, |
|
"eval_rougeL": 0.4139, |
|
"eval_rougeLsum": 0.414, |
|
"eval_runtime": 853.4658, |
|
"eval_samples_per_second": 6.444, |
|
"eval_steps_per_second": 0.403, |
|
"step": 11799 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 2.708933717579251e-06, |
|
"loss": 1.444, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_f1": 0.9135, |
|
"eval_gen_len": 26.010727272727273, |
|
"eval_loss": 1.4445807933807373, |
|
"eval_precision": 0.9133, |
|
"eval_recall": 0.9141, |
|
"eval_rouge1": 0.4957, |
|
"eval_rouge2": 0.2473, |
|
"eval_rougeL": 0.415, |
|
"eval_rougeLsum": 0.4151, |
|
"eval_runtime": 869.7396, |
|
"eval_samples_per_second": 6.324, |
|
"eval_steps_per_second": 0.396, |
|
"step": 12493 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 1.988472622478386e-06, |
|
"loss": 1.4378, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 1.2680115273775217e-06, |
|
"loss": 1.4375, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_f1": 0.9136, |
|
"eval_gen_len": 25.88690909090909, |
|
"eval_loss": 1.4433233737945557, |
|
"eval_precision": 0.9136, |
|
"eval_recall": 0.914, |
|
"eval_rouge1": 0.4961, |
|
"eval_rouge2": 0.2473, |
|
"eval_rougeL": 0.4153, |
|
"eval_rougeLsum": 0.4153, |
|
"eval_runtime": 854.4011, |
|
"eval_samples_per_second": 6.437, |
|
"eval_steps_per_second": 0.403, |
|
"step": 13188 |
|
}, |
|
{ |
|
"epoch": 19.45, |
|
"learning_rate": 5.475504322766571e-07, |
|
"loss": 1.4361, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_f1": 0.9137, |
|
"eval_gen_len": 25.862909090909092, |
|
"eval_loss": 1.4432713985443115, |
|
"eval_precision": 0.9136, |
|
"eval_recall": 0.914, |
|
"eval_rouge1": 0.4961, |
|
"eval_rouge2": 0.2476, |
|
"eval_rougeL": 0.4155, |
|
"eval_rougeLsum": 0.4154, |
|
"eval_runtime": 863.7254, |
|
"eval_samples_per_second": 6.368, |
|
"eval_steps_per_second": 0.398, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 13880, |
|
"total_flos": 2.818047373345161e+18, |
|
"train_loss": 0.2986434628709249, |
|
"train_runtime": 16684.611, |
|
"train_samples_per_second": 119.871, |
|
"train_steps_per_second": 0.832 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 13880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 2.818047373345161e+18, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|