longshort-c / trainer_state.json
allstax's picture
Upload folder using huggingface_hub
299c19b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 397,
"global_step": 2384,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17,
"eval_gen_len": 81.6102,
"eval_loss": 1.3396695852279663,
"eval_rouge1": 52.6908,
"eval_rouge2": 34.3367,
"eval_rougeL": 43.9351,
"eval_rougeLsum": 44.0153,
"eval_runtime": 41.4692,
"eval_samples_per_second": 1.423,
"eval_steps_per_second": 0.723,
"step": 397
},
{
"epoch": 0.21,
"grad_norm": 6.220447063446045,
"learning_rate": 1.895763422818792e-05,
"loss": 0.719,
"step": 500
},
{
"epoch": 0.33,
"eval_gen_len": 84.2203,
"eval_loss": 1.363059639930725,
"eval_rouge1": 54.543,
"eval_rouge2": 36.4199,
"eval_rougeL": 45.8273,
"eval_rougeLsum": 45.7925,
"eval_runtime": 42.2281,
"eval_samples_per_second": 1.397,
"eval_steps_per_second": 0.71,
"step": 794
},
{
"epoch": 0.42,
"grad_norm": 8.490495681762695,
"learning_rate": 1.7908976510067115e-05,
"loss": 0.7459,
"step": 1000
},
{
"epoch": 0.5,
"eval_gen_len": 85.2373,
"eval_loss": 1.3582559823989868,
"eval_rouge1": 53.2262,
"eval_rouge2": 34.8889,
"eval_rougeL": 44.1043,
"eval_rougeLsum": 44.0998,
"eval_runtime": 42.6328,
"eval_samples_per_second": 1.384,
"eval_steps_per_second": 0.704,
"step": 1191
},
{
"epoch": 0.63,
"grad_norm": 6.794929027557373,
"learning_rate": 1.686031879194631e-05,
"loss": 0.7154,
"step": 1500
},
{
"epoch": 0.67,
"eval_gen_len": 83.7797,
"eval_loss": 1.3886514902114868,
"eval_rouge1": 54.9928,
"eval_rouge2": 37.1125,
"eval_rougeL": 46.4105,
"eval_rougeLsum": 46.4044,
"eval_runtime": 42.3464,
"eval_samples_per_second": 1.393,
"eval_steps_per_second": 0.708,
"step": 1588
},
{
"epoch": 0.83,
"eval_gen_len": 85.8814,
"eval_loss": 1.3405011892318726,
"eval_rouge1": 52.5543,
"eval_rouge2": 33.702,
"eval_rougeL": 42.9428,
"eval_rougeLsum": 43.0015,
"eval_runtime": 43.1199,
"eval_samples_per_second": 1.368,
"eval_steps_per_second": 0.696,
"step": 1985
},
{
"epoch": 0.84,
"grad_norm": 8.691970825195312,
"learning_rate": 1.5811661073825504e-05,
"loss": 0.7507,
"step": 2000
},
{
"epoch": 1.0,
"eval_gen_len": 81.7797,
"eval_loss": 1.3399206399917603,
"eval_rouge1": 52.4327,
"eval_rouge2": 34.1158,
"eval_rougeL": 43.2742,
"eval_rougeLsum": 43.1693,
"eval_runtime": 41.935,
"eval_samples_per_second": 1.407,
"eval_steps_per_second": 0.715,
"step": 2382
}
],
"logging_steps": 500,
"max_steps": 9536,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 1192,
"total_flos": 5.17811143698432e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}