|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.992800575953924, |
|
"eval_steps": 500, |
|
"global_step": 11104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9399615754082615e-05, |
|
"loss": 2.0304, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.879923150816523e-05, |
|
"loss": 1.781, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9088, |
|
"eval_gen_len": 26.88909090909091, |
|
"eval_loss": 1.579687476158142, |
|
"eval_precision": 0.908, |
|
"eval_recall": 0.91, |
|
"eval_rouge1": 0.4708, |
|
"eval_rouge2": 0.2219, |
|
"eval_rougeL": 0.3892, |
|
"eval_rougeLsum": 0.389, |
|
"eval_runtime": 1186.1406, |
|
"eval_samples_per_second": 4.637, |
|
"eval_steps_per_second": 0.29, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.729827089337176e-05, |
|
"loss": 1.7026, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.6397694524495677e-05, |
|
"loss": 1.6618, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.91, |
|
"eval_gen_len": 26.728181818181817, |
|
"eval_loss": 1.5411016941070557, |
|
"eval_precision": 0.9094, |
|
"eval_recall": 0.9111, |
|
"eval_rouge1": 0.4776, |
|
"eval_rouge2": 0.2303, |
|
"eval_rougeL": 0.3977, |
|
"eval_rougeLsum": 0.3973, |
|
"eval_runtime": 1083.838, |
|
"eval_samples_per_second": 5.075, |
|
"eval_steps_per_second": 0.317, |
|
"step": 2083 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.5497118155619597e-05, |
|
"loss": 1.626, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.911, |
|
"eval_gen_len": 26.759636363636364, |
|
"eval_loss": 1.5170917510986328, |
|
"eval_precision": 0.9102, |
|
"eval_recall": 0.9121, |
|
"eval_rouge1": 0.4834, |
|
"eval_rouge2": 0.2345, |
|
"eval_rougeL": 0.402, |
|
"eval_rougeLsum": 0.402, |
|
"eval_runtime": 1053.82, |
|
"eval_samples_per_second": 5.219, |
|
"eval_steps_per_second": 0.326, |
|
"step": 2776 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.4596541786743516e-05, |
|
"loss": 1.5918, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9112, |
|
"eval_gen_len": 26.647636363636362, |
|
"eval_loss": 1.500138521194458, |
|
"eval_precision": 0.9106, |
|
"eval_recall": 0.9122, |
|
"eval_rouge1": 0.4853, |
|
"eval_rouge2": 0.2365, |
|
"eval_rougeL": 0.4045, |
|
"eval_rougeLsum": 0.4045, |
|
"eval_runtime": 1079.0919, |
|
"eval_samples_per_second": 5.097, |
|
"eval_steps_per_second": 0.319, |
|
"step": 3471 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 1.3695965417867436e-05, |
|
"loss": 1.5798, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.2795389048991355e-05, |
|
"loss": 1.5586, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.9116, |
|
"eval_gen_len": 26.777818181818184, |
|
"eval_loss": 1.4880452156066895, |
|
"eval_precision": 0.9108, |
|
"eval_recall": 0.9127, |
|
"eval_rouge1": 0.4875, |
|
"eval_rouge2": 0.2373, |
|
"eval_rougeL": 0.4063, |
|
"eval_rougeLsum": 0.4063, |
|
"eval_runtime": 1027.5441, |
|
"eval_samples_per_second": 5.353, |
|
"eval_steps_per_second": 0.335, |
|
"step": 4164 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 1.1894812680115276e-05, |
|
"loss": 1.5375, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.912, |
|
"eval_gen_len": 26.39909090909091, |
|
"eval_loss": 1.4768402576446533, |
|
"eval_precision": 0.9116, |
|
"eval_recall": 0.9128, |
|
"eval_rouge1": 0.4898, |
|
"eval_rouge2": 0.24, |
|
"eval_rougeL": 0.4083, |
|
"eval_rougeLsum": 0.4083, |
|
"eval_runtime": 922.1893, |
|
"eval_samples_per_second": 5.964, |
|
"eval_steps_per_second": 0.373, |
|
"step": 4858 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.0994236311239194e-05, |
|
"loss": 1.5228, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 1.0093659942363115e-05, |
|
"loss": 1.5146, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.9126, |
|
"eval_gen_len": 26.156, |
|
"eval_loss": 1.4685654640197754, |
|
"eval_precision": 0.9123, |
|
"eval_recall": 0.9133, |
|
"eval_rouge1": 0.4907, |
|
"eval_rouge2": 0.241, |
|
"eval_rougeL": 0.4088, |
|
"eval_rougeLsum": 0.4089, |
|
"eval_runtime": 865.3485, |
|
"eval_samples_per_second": 6.356, |
|
"eval_steps_per_second": 0.398, |
|
"step": 5553 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 9.193083573487034e-06, |
|
"loss": 1.5006, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.9127, |
|
"eval_gen_len": 26.26290909090909, |
|
"eval_loss": 1.4636152982711792, |
|
"eval_precision": 0.9122, |
|
"eval_recall": 0.9135, |
|
"eval_rouge1": 0.4914, |
|
"eval_rouge2": 0.2419, |
|
"eval_rougeL": 0.4097, |
|
"eval_rougeLsum": 0.4099, |
|
"eval_runtime": 874.612, |
|
"eval_samples_per_second": 6.289, |
|
"eval_steps_per_second": 0.393, |
|
"step": 6247 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 8.29250720461095e-06, |
|
"loss": 1.49, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.9127, |
|
"eval_gen_len": 26.027272727272727, |
|
"eval_loss": 1.4580360651016235, |
|
"eval_precision": 0.9125, |
|
"eval_recall": 0.9133, |
|
"eval_rouge1": 0.4911, |
|
"eval_rouge2": 0.2429, |
|
"eval_rougeL": 0.4109, |
|
"eval_rougeLsum": 0.411, |
|
"eval_runtime": 855.8845, |
|
"eval_samples_per_second": 6.426, |
|
"eval_steps_per_second": 0.402, |
|
"step": 6942 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 7.391930835734871e-06, |
|
"loss": 1.485, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 6.491354466858791e-06, |
|
"loss": 1.4749, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.9131, |
|
"eval_gen_len": 26.230363636363638, |
|
"eval_loss": 1.4546109437942505, |
|
"eval_precision": 0.9127, |
|
"eval_recall": 0.9138, |
|
"eval_rouge1": 0.4932, |
|
"eval_rouge2": 0.244, |
|
"eval_rougeL": 0.4121, |
|
"eval_rougeLsum": 0.4123, |
|
"eval_runtime": 871.4205, |
|
"eval_samples_per_second": 6.312, |
|
"eval_steps_per_second": 0.395, |
|
"step": 7636 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 5.590778097982709e-06, |
|
"loss": 1.4661, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.9132, |
|
"eval_gen_len": 25.87781818181818, |
|
"eval_loss": 1.4514495134353638, |
|
"eval_precision": 0.9133, |
|
"eval_recall": 0.9136, |
|
"eval_rouge1": 0.4937, |
|
"eval_rouge2": 0.2448, |
|
"eval_rougeL": 0.4126, |
|
"eval_rougeLsum": 0.4127, |
|
"eval_runtime": 867.3574, |
|
"eval_samples_per_second": 6.341, |
|
"eval_steps_per_second": 0.397, |
|
"step": 8331 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 4.690201729106629e-06, |
|
"loss": 1.4626, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 3.7896253602305477e-06, |
|
"loss": 1.4575, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.9133, |
|
"eval_gen_len": 26.11509090909091, |
|
"eval_loss": 1.4499082565307617, |
|
"eval_precision": 0.913, |
|
"eval_recall": 0.914, |
|
"eval_rouge1": 0.4947, |
|
"eval_rouge2": 0.2453, |
|
"eval_rougeL": 0.4139, |
|
"eval_rougeLsum": 0.414, |
|
"eval_runtime": 860.9844, |
|
"eval_samples_per_second": 6.388, |
|
"eval_steps_per_second": 0.4, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 2.8890489913544673e-06, |
|
"loss": 1.4511, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.9133, |
|
"eval_gen_len": 26.028727272727274, |
|
"eval_loss": 1.44780433177948, |
|
"eval_precision": 0.9131, |
|
"eval_recall": 0.9138, |
|
"eval_rouge1": 0.4939, |
|
"eval_rouge2": 0.2451, |
|
"eval_rougeL": 0.4133, |
|
"eval_rougeLsum": 0.4134, |
|
"eval_runtime": 862.0827, |
|
"eval_samples_per_second": 6.38, |
|
"eval_steps_per_second": 0.399, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 1.988472622478386e-06, |
|
"loss": 1.4519, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.9133, |
|
"eval_gen_len": 25.907818181818183, |
|
"eval_loss": 1.4471020698547363, |
|
"eval_precision": 0.9132, |
|
"eval_recall": 0.9137, |
|
"eval_rouge1": 0.4938, |
|
"eval_rouge2": 0.2451, |
|
"eval_rougeL": 0.4134, |
|
"eval_rougeLsum": 0.4134, |
|
"eval_runtime": 855.2673, |
|
"eval_samples_per_second": 6.431, |
|
"eval_steps_per_second": 0.402, |
|
"step": 10414 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 1.0878962536023055e-06, |
|
"loss": 1.4475, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 1.8731988472622478e-07, |
|
"loss": 1.4439, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_f1": 0.9134, |
|
"eval_gen_len": 25.96290909090909, |
|
"eval_loss": 1.4468724727630615, |
|
"eval_precision": 0.9133, |
|
"eval_recall": 0.9138, |
|
"eval_rouge1": 0.4939, |
|
"eval_rouge2": 0.2453, |
|
"eval_rougeL": 0.4133, |
|
"eval_rougeLsum": 0.4134, |
|
"eval_runtime": 864.4194, |
|
"eval_samples_per_second": 6.363, |
|
"eval_steps_per_second": 0.398, |
|
"step": 11104 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"step": 11104, |
|
"total_flos": 2.2405705733792072e+18, |
|
"train_loss": 0.8767006197992594, |
|
"train_runtime": 37037.5595, |
|
"train_samples_per_second": 43.199, |
|
"train_steps_per_second": 0.3 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 11104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 16, |
|
"save_steps": 500, |
|
"total_flos": 2.2405705733792072e+18, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|