|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.6, |
|
"eval_steps": 75, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.6603230237960815, |
|
"learning_rate": 0.00020833333333333335, |
|
"loss": 2.7781, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.839109182357788, |
|
"eval_rouge1": 28.5024, |
|
"eval_rouge2": 11.2717, |
|
"eval_rougeL": 22.108, |
|
"eval_rougeLsum": 22.4361, |
|
"eval_runtime": 722.0927, |
|
"eval_samples_per_second": 13.849, |
|
"eval_steps_per_second": 0.866, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.18350088596344, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 2.0622, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.7575578689575195, |
|
"eval_rouge1": 28.0245, |
|
"eval_rouge2": 10.6112, |
|
"eval_rougeL": 21.7353, |
|
"eval_rougeLsum": 22.0685, |
|
"eval_runtime": 728.2015, |
|
"eval_samples_per_second": 13.732, |
|
"eval_steps_per_second": 0.858, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.9635187983512878, |
|
"learning_rate": 0.000625, |
|
"loss": 1.8636, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.617906928062439, |
|
"eval_rouge1": 27.353, |
|
"eval_rouge2": 10.6238, |
|
"eval_rougeL": 21.4686, |
|
"eval_rougeLsum": 21.7512, |
|
"eval_runtime": 730.8407, |
|
"eval_samples_per_second": 13.683, |
|
"eval_steps_per_second": 0.855, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.1745034456253052, |
|
"learning_rate": 0.0008333333333333334, |
|
"loss": 1.7408, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.6142878532409668, |
|
"eval_rouge1": 28.0928, |
|
"eval_rouge2": 11.2857, |
|
"eval_rougeL": 22.06, |
|
"eval_rougeLsum": 22.3629, |
|
"eval_runtime": 802.3401, |
|
"eval_samples_per_second": 12.464, |
|
"eval_steps_per_second": 0.779, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.7837355136871338, |
|
"learning_rate": 0.0009615384615384616, |
|
"loss": 1.6492, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.5411357879638672, |
|
"eval_rouge1": 27.8209, |
|
"eval_rouge2": 10.9184, |
|
"eval_rougeL": 21.6819, |
|
"eval_rougeLsum": 21.9773, |
|
"eval_runtime": 711.0964, |
|
"eval_samples_per_second": 14.063, |
|
"eval_steps_per_second": 0.879, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.5993546843528748, |
|
"learning_rate": 0.0007692307692307693, |
|
"loss": 1.5448, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.4802035093307495, |
|
"eval_rouge1": 28.0433, |
|
"eval_rouge2": 11.4232, |
|
"eval_rougeL": 22.0696, |
|
"eval_rougeLsum": 22.373, |
|
"eval_runtime": 728.4308, |
|
"eval_samples_per_second": 13.728, |
|
"eval_steps_per_second": 0.858, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.7141011357307434, |
|
"learning_rate": 0.0005769230769230769, |
|
"loss": 1.4454, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.462142825126648, |
|
"eval_rouge1": 27.8552, |
|
"eval_rouge2": 11.1708, |
|
"eval_rougeL": 21.8958, |
|
"eval_rougeLsum": 22.1949, |
|
"eval_runtime": 640.7723, |
|
"eval_samples_per_second": 15.606, |
|
"eval_steps_per_second": 0.975, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.6400988101959229, |
|
"learning_rate": 0.00038461538461538467, |
|
"loss": 1.3636, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.4522408246994019, |
|
"eval_rouge1": 28.3264, |
|
"eval_rouge2": 11.7945, |
|
"eval_rougeL": 22.3563, |
|
"eval_rougeLsum": 22.6524, |
|
"eval_runtime": 715.9922, |
|
"eval_samples_per_second": 13.967, |
|
"eval_steps_per_second": 0.873, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.6958301067352295, |
|
"learning_rate": 0.00019230769230769233, |
|
"loss": 1.2978, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.4346853494644165, |
|
"eval_rouge1": 28.444, |
|
"eval_rouge2": 11.9388, |
|
"eval_rougeL": 22.4279, |
|
"eval_rougeLsum": 22.7344, |
|
"eval_runtime": 652.829, |
|
"eval_samples_per_second": 15.318, |
|
"eval_steps_per_second": 0.957, |
|
"step": 5625 |
|
} |
|
], |
|
"logging_steps": 625, |
|
"max_steps": 6250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.075997032448e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|