k4black's picture
Training in progress, step 3500
0b8b0b6
raw
history blame
No virus
4.29 kB
{
"best_metric": 0.5598750842730023,
"best_model_checkpoint": "results/checkpoint-3500",
"epoch": 0.7,
"global_step": 3500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 6.6e-05,
"loss": 2.0145,
"step": 500
},
{
"epoch": 0.1,
"eval_bleu": 0.1150567678154011,
"eval_exact_match": 0.132,
"eval_loss": 0.9738793969154358,
"eval_rouge1": 0.5382577445488361,
"eval_rouge2": 0.3545728830144713,
"eval_rougeL": 0.5187026156468759,
"eval_rougeLsum": 0.5259109026044895,
"eval_runtime": 59.0107,
"eval_samples_per_second": 33.892,
"eval_steps_per_second": 1.695,
"step": 500
},
{
"epoch": 0.2,
"learning_rate": 9.828070175438598e-05,
"loss": 0.8761,
"step": 1000
},
{
"epoch": 0.2,
"eval_bleu": 0.11272954562081616,
"eval_exact_match": 0.1475,
"eval_loss": 0.901577353477478,
"eval_rouge1": 0.5359795989196405,
"eval_rouge2": 0.3649742450836393,
"eval_rougeL": 0.5187273882783475,
"eval_rougeLsum": 0.5253745777261007,
"eval_runtime": 55.4587,
"eval_samples_per_second": 36.063,
"eval_steps_per_second": 1.803,
"step": 1000
},
{
"epoch": 0.3,
"learning_rate": 9.477192982456141e-05,
"loss": 0.8139,
"step": 1500
},
{
"epoch": 0.3,
"eval_bleu": 0.11562105292517781,
"eval_exact_match": 0.1425,
"eval_loss": 0.8843896389007568,
"eval_rouge1": 0.5385440798738839,
"eval_rouge2": 0.3630877932160531,
"eval_rougeL": 0.520238931080043,
"eval_rougeLsum": 0.5274836357515698,
"eval_runtime": 55.2097,
"eval_samples_per_second": 36.226,
"eval_steps_per_second": 1.811,
"step": 1500
},
{
"epoch": 0.4,
"learning_rate": 9.126315789473685e-05,
"loss": 0.7745,
"step": 2000
},
{
"epoch": 0.4,
"eval_bleu": 0.11999069408174769,
"eval_exact_match": 0.152,
"eval_loss": 0.8501775860786438,
"eval_rouge1": 0.5495869647995661,
"eval_rouge2": 0.37385624817390106,
"eval_rougeL": 0.5307934771920211,
"eval_rougeLsum": 0.5391651788581161,
"eval_runtime": 56.8684,
"eval_samples_per_second": 35.169,
"eval_steps_per_second": 1.758,
"step": 2000
},
{
"epoch": 0.5,
"learning_rate": 8.77543859649123e-05,
"loss": 0.7416,
"step": 2500
},
{
"epoch": 0.5,
"eval_bleu": 0.12262269562147479,
"eval_exact_match": 0.148,
"eval_loss": 0.8417201042175293,
"eval_rouge1": 0.5524556612739725,
"eval_rouge2": 0.37743352139147385,
"eval_rougeL": 0.5333960691072436,
"eval_rougeLsum": 0.5408174364513855,
"eval_runtime": 55.5707,
"eval_samples_per_second": 35.99,
"eval_steps_per_second": 1.8,
"step": 2500
},
{
"epoch": 0.6,
"learning_rate": 8.424561403508773e-05,
"loss": 0.7097,
"step": 3000
},
{
"epoch": 0.6,
"eval_bleu": 0.12307160579590622,
"eval_exact_match": 0.155,
"eval_loss": 0.8275034427642822,
"eval_rouge1": 0.5571591325212851,
"eval_rouge2": 0.3846841785043018,
"eval_rougeL": 0.5390722174679519,
"eval_rougeLsum": 0.5456202385734745,
"eval_runtime": 56.9486,
"eval_samples_per_second": 35.119,
"eval_steps_per_second": 1.756,
"step": 3000
},
{
"epoch": 0.7,
"learning_rate": 8.073684210526316e-05,
"loss": 0.6923,
"step": 3500
},
{
"epoch": 0.7,
"eval_bleu": 0.1269094917224047,
"eval_exact_match": 0.156,
"eval_loss": 0.8161399960517883,
"eval_rouge1": 0.5598750842730023,
"eval_rouge2": 0.38392351740583797,
"eval_rougeL": 0.5409288136858783,
"eval_rougeLsum": 0.5484962105303921,
"eval_runtime": 55.0573,
"eval_samples_per_second": 36.326,
"eval_steps_per_second": 1.816,
"step": 3500
}
],
"max_steps": 15000,
"num_train_epochs": 3,
"total_flos": 8985722848542720.0,
"trial_name": null,
"trial_params": null
}