|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.32827994984611875, |
|
"eval_steps": 720, |
|
"global_step": 5760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3646579682826996, |
|
"learning_rate": 9.795674104471232e-05, |
|
"loss": 0.0784, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_bertscore": 0.7196829915046692, |
|
"eval_loss": 0.10667099058628082, |
|
"eval_rouge1": 0.5424204681399662, |
|
"eval_rouge2": 0.3232621307936826, |
|
"eval_rougeL": 0.4168316330799737, |
|
"eval_rougeLsum": 0.41874171810922023, |
|
"eval_runtime": 50.0749, |
|
"eval_samples_per_second": 1.078, |
|
"eval_steps_per_second": 0.28, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.590778262232482e-05, |
|
"loss": 0.0858, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bertscore": 0.6401040554046631, |
|
"eval_loss": 0.18695296347141266, |
|
"eval_rouge1": 0.4271646967472444, |
|
"eval_rouge2": 0.1659738534008396, |
|
"eval_rougeL": 0.28939047028045584, |
|
"eval_rougeLsum": 0.28900025002813623, |
|
"eval_runtime": 43.3827, |
|
"eval_samples_per_second": 1.245, |
|
"eval_steps_per_second": 0.323, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3369450867176056, |
|
"learning_rate": 9.38559744663874e-05, |
|
"loss": 0.0887, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bertscore": 0.7218716740608215, |
|
"eval_loss": 0.1099499836564064, |
|
"eval_rouge1": 0.5524760977800962, |
|
"eval_rouge2": 0.330958882130141, |
|
"eval_rougeL": 0.4259051117722474, |
|
"eval_rougeLsum": 0.42919968644337714, |
|
"eval_runtime": 47.1009, |
|
"eval_samples_per_second": 1.146, |
|
"eval_steps_per_second": 0.297, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.36330386996269226, |
|
"learning_rate": 9.180416631044998e-05, |
|
"loss": 0.0933, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bertscore": 0.7138540744781494, |
|
"eval_loss": 0.1120433360338211, |
|
"eval_rouge1": 0.5390251173909333, |
|
"eval_rouge2": 0.31146103356099275, |
|
"eval_rougeL": 0.41387331131584476, |
|
"eval_rougeLsum": 0.4135311998867288, |
|
"eval_runtime": 46.0195, |
|
"eval_samples_per_second": 1.173, |
|
"eval_steps_per_second": 0.304, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.35335826873779297, |
|
"learning_rate": 8.975235815451256e-05, |
|
"loss": 0.0862, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_bertscore": 0.7312328219413757, |
|
"eval_loss": 0.10742145031690598, |
|
"eval_rouge1": 0.5669051469800668, |
|
"eval_rouge2": 0.3520718989284114, |
|
"eval_rougeL": 0.44425293679893696, |
|
"eval_rougeLsum": 0.44695777725182906, |
|
"eval_runtime": 45.6563, |
|
"eval_samples_per_second": 1.183, |
|
"eval_steps_per_second": 0.307, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.37303468585014343, |
|
"learning_rate": 8.770339973212504e-05, |
|
"loss": 0.0911, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bertscore": 0.7096375226974487, |
|
"eval_loss": 0.11186981201171875, |
|
"eval_rouge1": 0.5333109271513738, |
|
"eval_rouge2": 0.30338933797823264, |
|
"eval_rougeL": 0.4003430978893555, |
|
"eval_rougeLsum": 0.4005552066640774, |
|
"eval_runtime": 43.9874, |
|
"eval_samples_per_second": 1.228, |
|
"eval_steps_per_second": 0.318, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.30176717042922974, |
|
"learning_rate": 8.565159157618762e-05, |
|
"loss": 0.0893, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_bertscore": 0.7181953191757202, |
|
"eval_loss": 0.10768043249845505, |
|
"eval_rouge1": 0.5493961807050101, |
|
"eval_rouge2": 0.3304637891082364, |
|
"eval_rougeL": 0.42184528341938216, |
|
"eval_rougeLsum": 0.4241212110511772, |
|
"eval_runtime": 45.8651, |
|
"eval_samples_per_second": 1.177, |
|
"eval_steps_per_second": 0.305, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.35905396938323975, |
|
"learning_rate": 8.35997834202502e-05, |
|
"loss": 0.0895, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_bertscore": 0.7235485315322876, |
|
"eval_loss": 0.10892420262098312, |
|
"eval_rouge1": 0.5464214086441583, |
|
"eval_rouge2": 0.33055059501726136, |
|
"eval_rougeL": 0.4309761675921166, |
|
"eval_rougeLsum": 0.43243067509302885, |
|
"eval_runtime": 46.6469, |
|
"eval_samples_per_second": 1.158, |
|
"eval_steps_per_second": 0.3, |
|
"step": 5760 |
|
} |
|
], |
|
"logging_steps": 720, |
|
"max_steps": 35092, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 2880, |
|
"total_flos": 4.993009003266048e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|