|
{ |
|
"best_metric": 0.3033078610897064, |
|
"best_model_checkpoint": "weights/checkpoint-2400", |
|
"epoch": 2.05477107402653, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0008858447488584475, |
|
"loss": 1.0868, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.3964288830757141, |
|
"eval_rouge1": 0.1061059158446104, |
|
"eval_rouge2": 0.03465244917613476, |
|
"eval_rougeL": 0.10627894663827464, |
|
"eval_rougeLsum": 0.10587803730968609, |
|
"eval_runtime": 213.0846, |
|
"eval_samples_per_second": 11.667, |
|
"eval_steps_per_second": 1.46, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.000771689497716895, |
|
"loss": 0.4378, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.3599437475204468, |
|
"eval_rouge1": 0.10699235686427112, |
|
"eval_rouge2": 0.034402581194616595, |
|
"eval_rougeL": 0.10726814102163426, |
|
"eval_rougeLsum": 0.10683004020509843, |
|
"eval_runtime": 209.0997, |
|
"eval_samples_per_second": 11.889, |
|
"eval_steps_per_second": 1.487, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0006575342465753425, |
|
"loss": 0.3695, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 0.3147580027580261, |
|
"eval_rouge1": 0.10717208698017014, |
|
"eval_rouge2": 0.03477288244690299, |
|
"eval_rougeL": 0.10748345500504963, |
|
"eval_rougeLsum": 0.10712451901224819, |
|
"eval_runtime": 208.2509, |
|
"eval_samples_per_second": 11.938, |
|
"eval_steps_per_second": 1.493, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00054337899543379, |
|
"loss": 0.2645, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.31476476788520813, |
|
"eval_rouge1": 0.10597239490642546, |
|
"eval_rouge2": 0.033266358974880464, |
|
"eval_rougeL": 0.10614402559092825, |
|
"eval_rougeLsum": 0.10592368490095762, |
|
"eval_runtime": 208.6832, |
|
"eval_samples_per_second": 11.913, |
|
"eval_steps_per_second": 1.49, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00042922374429223744, |
|
"loss": 0.2503, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.3020365834236145, |
|
"eval_rouge1": 0.10566329888936485, |
|
"eval_rouge2": 0.03209767038592902, |
|
"eval_rougeL": 0.10595888780924984, |
|
"eval_rougeLsum": 0.10570986122575018, |
|
"eval_runtime": 207.8626, |
|
"eval_samples_per_second": 11.96, |
|
"eval_steps_per_second": 1.496, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00031506849315068495, |
|
"loss": 0.2264, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.3033078610897064, |
|
"eval_rouge1": 0.10519725760071695, |
|
"eval_rouge2": 0.03199324082356017, |
|
"eval_rougeL": 0.10550921669284499, |
|
"eval_rougeLsum": 0.10514026310687613, |
|
"eval_runtime": 207.427, |
|
"eval_samples_per_second": 11.985, |
|
"eval_steps_per_second": 1.499, |
|
"step": 2400 |
|
} |
|
], |
|
"max_steps": 3504, |
|
"num_train_epochs": 3, |
|
"total_flos": 5666207247866880.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|