|
{ |
|
"best_metric": 11.716, |
|
"best_model_checkpoint": "saved/tobyoki-pairwise/bart-base-japanese/BaseModel/checkpoint-4332", |
|
"epoch": 4.0, |
|
"global_step": 17328, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8845798707294554e-05, |
|
"loss": 2.1617, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7691597414589107e-05, |
|
"loss": 1.346, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.653739612188366e-05, |
|
"loss": 0.96, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.538319482917821e-05, |
|
"loss": 0.7184, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.422899353647276e-05, |
|
"loss": 0.5936, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.3074792243767315e-05, |
|
"loss": 0.4745, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.192059095106187e-05, |
|
"loss": 0.389, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.076638965835642e-05, |
|
"loss": 0.298, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 52.15822784810127, |
|
"eval_loss": 2.7362189292907715, |
|
"eval_rouge1": 11.716, |
|
"eval_rouge2": 1.7738, |
|
"eval_rougeL": 7.7212, |
|
"eval_rougeLsum": 10.0982, |
|
"eval_runtime": 282.218, |
|
"eval_samples_per_second": 2.239, |
|
"eval_steps_per_second": 2.239, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.961218836565097e-05, |
|
"loss": 0.2931, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.845798707294552e-05, |
|
"loss": 0.27, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.7303785780240075e-05, |
|
"loss": 0.2255, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.614958448753463e-05, |
|
"loss": 0.1818, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.499538319482918e-05, |
|
"loss": 0.1736, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.384118190212373e-05, |
|
"loss": 0.1894, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.2686980609418284e-05, |
|
"loss": 0.1642, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.1532779316712836e-05, |
|
"loss": 0.1262, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.0378578024007388e-05, |
|
"loss": 0.1596, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 42.75791139240506, |
|
"eval_loss": 3.1079843044281006, |
|
"eval_rouge1": 11.6339, |
|
"eval_rouge2": 1.9589, |
|
"eval_rougeL": 8.2226, |
|
"eval_rougeLsum": 9.5881, |
|
"eval_runtime": 452.5754, |
|
"eval_samples_per_second": 1.396, |
|
"eval_steps_per_second": 1.396, |
|
"step": 8664 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.922437673130194e-05, |
|
"loss": 0.1249, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.8070175438596492e-05, |
|
"loss": 0.1056, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.6915974145891044e-05, |
|
"loss": 0.0981, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.5761772853185596e-05, |
|
"loss": 0.1025, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.460757156048015e-05, |
|
"loss": 0.0777, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3453370267774704e-05, |
|
"loss": 0.0849, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2299168975069256e-05, |
|
"loss": 0.095, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.1144967682363804e-05, |
|
"loss": 0.0805, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 43.24683544303797, |
|
"eval_loss": 3.2877912521362305, |
|
"eval_rouge1": 11.3717, |
|
"eval_rouge2": 2.0083, |
|
"eval_rougeL": 7.8703, |
|
"eval_rougeLsum": 9.6383, |
|
"eval_runtime": 236.6502, |
|
"eval_samples_per_second": 2.671, |
|
"eval_steps_per_second": 2.671, |
|
"step": 12996 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.9990766389658356e-05, |
|
"loss": 0.081, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.883656509695291e-05, |
|
"loss": 0.0562, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.768236380424746e-05, |
|
"loss": 0.0659, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.6528162511542013e-05, |
|
"loss": 0.0641, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.5373961218836565e-05, |
|
"loss": 0.0621, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.4219759926131118e-05, |
|
"loss": 0.0495, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.306555863342567e-05, |
|
"loss": 0.052, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.1911357340720223e-05, |
|
"loss": 0.0624, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.0757156048014775e-05, |
|
"loss": 0.0576, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 43.36867088607595, |
|
"eval_loss": 3.3865511417388916, |
|
"eval_rouge1": 11.5579, |
|
"eval_rouge2": 2.3617, |
|
"eval_rougeL": 8.299, |
|
"eval_rougeLsum": 9.8507, |
|
"eval_runtime": 456.2669, |
|
"eval_samples_per_second": 1.385, |
|
"eval_steps_per_second": 1.385, |
|
"step": 17328 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 17328, |
|
"total_flos": 2855176963080192.0, |
|
"train_loss": 0.29059598513036994, |
|
"train_runtime": 2646.0496, |
|
"train_samples_per_second": 8.186, |
|
"train_steps_per_second": 8.186 |
|
} |
|
], |
|
"max_steps": 21660, |
|
"num_train_epochs": 5, |
|
"total_flos": 2855176963080192.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|