uk-mt5-gec / trainer_state.json
root
initial commit
ec4c964
{
"best_metric": 0.3033078610897064,
"best_model_checkpoint": "weights/checkpoint-2400",
"epoch": 2.05477107402653,
"global_step": 2400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.34,
"learning_rate": 0.0008858447488584475,
"loss": 1.0868,
"step": 400
},
{
"epoch": 0.34,
"eval_loss": 0.3964288830757141,
"eval_rouge1": 0.1061059158446104,
"eval_rouge2": 0.03465244917613476,
"eval_rougeL": 0.10627894663827464,
"eval_rougeLsum": 0.10587803730968609,
"eval_runtime": 213.0846,
"eval_samples_per_second": 11.667,
"eval_steps_per_second": 1.46,
"step": 400
},
{
"epoch": 0.68,
"learning_rate": 0.000771689497716895,
"loss": 0.4378,
"step": 800
},
{
"epoch": 0.68,
"eval_loss": 0.3599437475204468,
"eval_rouge1": 0.10699235686427112,
"eval_rouge2": 0.034402581194616595,
"eval_rougeL": 0.10726814102163426,
"eval_rougeLsum": 0.10683004020509843,
"eval_runtime": 209.0997,
"eval_samples_per_second": 11.889,
"eval_steps_per_second": 1.487,
"step": 800
},
{
"epoch": 1.03,
"learning_rate": 0.0006575342465753425,
"loss": 0.3695,
"step": 1200
},
{
"epoch": 1.03,
"eval_loss": 0.3147580027580261,
"eval_rouge1": 0.10717208698017014,
"eval_rouge2": 0.03477288244690299,
"eval_rougeL": 0.10748345500504963,
"eval_rougeLsum": 0.10712451901224819,
"eval_runtime": 208.2509,
"eval_samples_per_second": 11.938,
"eval_steps_per_second": 1.493,
"step": 1200
},
{
"epoch": 1.37,
"learning_rate": 0.00054337899543379,
"loss": 0.2645,
"step": 1600
},
{
"epoch": 1.37,
"eval_loss": 0.31476476788520813,
"eval_rouge1": 0.10597239490642546,
"eval_rouge2": 0.033266358974880464,
"eval_rougeL": 0.10614402559092825,
"eval_rougeLsum": 0.10592368490095762,
"eval_runtime": 208.6832,
"eval_samples_per_second": 11.913,
"eval_steps_per_second": 1.49,
"step": 1600
},
{
"epoch": 1.71,
"learning_rate": 0.00042922374429223744,
"loss": 0.2503,
"step": 2000
},
{
"epoch": 1.71,
"eval_loss": 0.3020365834236145,
"eval_rouge1": 0.10566329888936485,
"eval_rouge2": 0.03209767038592902,
"eval_rougeL": 0.10595888780924984,
"eval_rougeLsum": 0.10570986122575018,
"eval_runtime": 207.8626,
"eval_samples_per_second": 11.96,
"eval_steps_per_second": 1.496,
"step": 2000
},
{
"epoch": 2.05,
"learning_rate": 0.00031506849315068495,
"loss": 0.2264,
"step": 2400
},
{
"epoch": 2.05,
"eval_loss": 0.3033078610897064,
"eval_rouge1": 0.10519725760071695,
"eval_rouge2": 0.03199324082356017,
"eval_rougeL": 0.10550921669284499,
"eval_rougeLsum": 0.10514026310687613,
"eval_runtime": 207.427,
"eval_samples_per_second": 11.985,
"eval_steps_per_second": 1.499,
"step": 2400
}
],
"max_steps": 3504,
"num_train_epochs": 3,
"total_flos": 5666207247866880.0,
"trial_name": null,
"trial_params": null
}