flan-t5-large-full / checkpoint-1208 /trainer_state.json
alexyhc's picture
Upload folder using huggingface_hub
3140e34 verified
{
"best_metric": 1.7036641836166382,
"best_model_checkpoint": "flan-t5-large/checkpoint-604",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 1208,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_gen_len": 106.27584586466165,
"eval_loss": 1.7234793901443481,
"eval_rouge1": 44.3127,
"eval_rouge2": 18.5321,
"eval_rougeL": 31.8618,
"eval_rougeLsum": 41.1286,
"eval_runtime": 1981.3194,
"eval_samples_per_second": 1.064,
"eval_steps_per_second": 0.019,
"step": 302
},
{
"epoch": 1.66,
"grad_norm": 0.43800386174930117,
"learning_rate": 0.0001,
"loss": 1.8444,
"step": 500
},
{
"epoch": 2.0,
"eval_gen_len": 98.73778195488721,
"eval_loss": 1.7036641836166382,
"eval_rouge1": 44.6191,
"eval_rouge2": 18.529,
"eval_rougeL": 32.4108,
"eval_rougeLsum": 41.5764,
"eval_runtime": 1977.8502,
"eval_samples_per_second": 1.066,
"eval_steps_per_second": 0.019,
"step": 604
},
{
"epoch": 3.0,
"eval_gen_len": 100.92763157894737,
"eval_loss": 1.7065348625183105,
"eval_rouge1": 45.2044,
"eval_rouge2": 19.0224,
"eval_rougeL": 32.7758,
"eval_rougeLsum": 42.1475,
"eval_runtime": 1977.3953,
"eval_samples_per_second": 1.066,
"eval_steps_per_second": 0.019,
"step": 906
},
{
"epoch": 3.31,
"grad_norm": 0.4764731580095982,
"learning_rate": 0.0001,
"loss": 1.6046,
"step": 1000
},
{
"epoch": 4.0,
"eval_gen_len": 97.4515977443609,
"eval_loss": 1.7114441394805908,
"eval_rouge1": 45.5049,
"eval_rouge2": 19.1992,
"eval_rougeL": 32.905,
"eval_rougeLsum": 42.5556,
"eval_runtime": 1982.5948,
"eval_samples_per_second": 1.063,
"eval_steps_per_second": 0.019,
"step": 1208
}
],
"logging_steps": 500,
"max_steps": 1510,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 25961869344768.0,
"train_batch_size": 14,
"trial_name": null,
"trial_params": null
}