|
{ |
|
"best_metric": 13.7182, |
|
"best_model_checkpoint": "saved/tobyoki-pairwise/bart-base-japanese/BaseModel/checkpoint-8664", |
|
"epoch": 5.0, |
|
"global_step": 21660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8845798707294554e-05, |
|
"loss": 2.1628, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7691597414589107e-05, |
|
"loss": 1.3515, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.653739612188366e-05, |
|
"loss": 0.9577, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.538319482917821e-05, |
|
"loss": 0.7042, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.422899353647276e-05, |
|
"loss": 0.5935, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.3074792243767315e-05, |
|
"loss": 0.4658, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.192059095106187e-05, |
|
"loss": 0.3854, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.076638965835642e-05, |
|
"loss": 0.2994, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 55.063291139240505, |
|
"eval_loss": 2.7883288860321045, |
|
"eval_rouge1": 11.1611, |
|
"eval_rouge2": 1.7768, |
|
"eval_rougeL": 7.5158, |
|
"eval_rougeLsum": 9.6222, |
|
"eval_runtime": 352.9978, |
|
"eval_samples_per_second": 1.79, |
|
"eval_steps_per_second": 1.79, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.961218836565097e-05, |
|
"loss": 0.2806, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.845798707294552e-05, |
|
"loss": 0.263, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.7303785780240075e-05, |
|
"loss": 0.2187, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.614958448753463e-05, |
|
"loss": 0.1761, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.499538319482918e-05, |
|
"loss": 0.1664, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.384118190212373e-05, |
|
"loss": 0.1896, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.2686980609418284e-05, |
|
"loss": 0.1627, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.1532779316712836e-05, |
|
"loss": 0.1146, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.0378578024007388e-05, |
|
"loss": 0.1513, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 57.35284810126582, |
|
"eval_loss": 3.128607988357544, |
|
"eval_rouge1": 13.7182, |
|
"eval_rouge2": 2.311, |
|
"eval_rougeL": 9.1726, |
|
"eval_rougeLsum": 11.5058, |
|
"eval_runtime": 358.8793, |
|
"eval_samples_per_second": 1.761, |
|
"eval_steps_per_second": 1.761, |
|
"step": 8664 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.922437673130194e-05, |
|
"loss": 0.125, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.8070175438596492e-05, |
|
"loss": 0.1028, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.6915974145891044e-05, |
|
"loss": 0.0934, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.5761772853185596e-05, |
|
"loss": 0.0993, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.460757156048015e-05, |
|
"loss": 0.074, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3453370267774704e-05, |
|
"loss": 0.0857, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2299168975069256e-05, |
|
"loss": 0.0823, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.1144967682363804e-05, |
|
"loss": 0.0778, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 48.70886075949367, |
|
"eval_loss": 3.3238439559936523, |
|
"eval_rouge1": 12.1173, |
|
"eval_rouge2": 1.88, |
|
"eval_rougeL": 8.1156, |
|
"eval_rougeLsum": 10.1187, |
|
"eval_runtime": 315.5777, |
|
"eval_samples_per_second": 2.003, |
|
"eval_steps_per_second": 2.003, |
|
"step": 12996 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.9990766389658356e-05, |
|
"loss": 0.078, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.883656509695291e-05, |
|
"loss": 0.0546, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.768236380424746e-05, |
|
"loss": 0.062, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.6528162511542013e-05, |
|
"loss": 0.0656, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.5373961218836565e-05, |
|
"loss": 0.0555, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.4219759926131118e-05, |
|
"loss": 0.0437, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.306555863342567e-05, |
|
"loss": 0.05, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.1911357340720223e-05, |
|
"loss": 0.0577, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.0757156048014775e-05, |
|
"loss": 0.056, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 50.7373417721519, |
|
"eval_loss": 3.4031858444213867, |
|
"eval_rouge1": 11.9555, |
|
"eval_rouge2": 2.0536, |
|
"eval_rougeL": 8.2185, |
|
"eval_rougeLsum": 10.0656, |
|
"eval_runtime": 343.5063, |
|
"eval_samples_per_second": 1.84, |
|
"eval_steps_per_second": 1.84, |
|
"step": 17328 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 9.602954755309327e-06, |
|
"loss": 0.0416, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 8.448753462603879e-06, |
|
"loss": 0.0393, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 7.29455216989843e-06, |
|
"loss": 0.0368, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 6.140350877192982e-06, |
|
"loss": 0.0433, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 4.986149584487535e-06, |
|
"loss": 0.0421, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 3.831948291782087e-06, |
|
"loss": 0.038, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.6777469990766392e-06, |
|
"loss": 0.0285, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.5235457063711913e-06, |
|
"loss": 0.0343, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 3.693444136657433e-07, |
|
"loss": 0.0364, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 50.44462025316456, |
|
"eval_loss": 3.525162696838379, |
|
"eval_rouge1": 11.814, |
|
"eval_rouge2": 1.7965, |
|
"eval_rougeL": 8.0177, |
|
"eval_rougeLsum": 9.7342, |
|
"eval_runtime": 338.0819, |
|
"eval_samples_per_second": 1.869, |
|
"eval_steps_per_second": 1.869, |
|
"step": 21660 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 21660, |
|
"total_flos": 3568971203850240.0, |
|
"train_loss": 0.23670565184904047, |
|
"train_runtime": 3830.6771, |
|
"train_samples_per_second": 5.654, |
|
"train_steps_per_second": 5.654 |
|
} |
|
], |
|
"max_steps": 21660, |
|
"num_train_epochs": 5, |
|
"total_flos": 3568971203850240.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|