|
{ |
|
"best_metric": 1.1251049041748047, |
|
"best_model_checkpoint": "./mbartLarge_koja_mid2_run1/checkpoint-22708", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 22708, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.977981328166285e-05, |
|
"loss": 1.9241, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.95596265633257e-05, |
|
"loss": 1.6494, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.933943984498855e-05, |
|
"loss": 1.5434, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.91192531266514e-05, |
|
"loss": 1.4836, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.889906640831425e-05, |
|
"loss": 1.4499, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.86788796899771e-05, |
|
"loss": 1.3934, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.845869297163995e-05, |
|
"loss": 1.3857, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.82385062533028e-05, |
|
"loss": 1.3396, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.801831953496565e-05, |
|
"loss": 1.3322, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.77981328166285e-05, |
|
"loss": 1.3106, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.757794609829135e-05, |
|
"loss": 1.2924, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.73577593799542e-05, |
|
"loss": 1.2834, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.713757266161705e-05, |
|
"loss": 1.2791, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.69173859432799e-05, |
|
"loss": 1.255, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.669719922494275e-05, |
|
"loss": 1.2573, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.64770125066056e-05, |
|
"loss": 1.2382, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.625682578826845e-05, |
|
"loss": 1.223, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.6036639069931303e-05, |
|
"loss": 1.2145, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.5816452351594153e-05, |
|
"loss": 1.219, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.5596265633257004e-05, |
|
"loss": 1.2125, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.5376078914919854e-05, |
|
"loss": 1.1996, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.5155892196582704e-05, |
|
"loss": 1.1823, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 29.4501, |
|
"eval_gen_len": 18.8118, |
|
"eval_loss": 1.1695398092269897, |
|
"eval_runtime": 1716.1373, |
|
"eval_samples_per_second": 13.231, |
|
"eval_steps_per_second": 0.827, |
|
"step": 11354 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.4935705478245554e-05, |
|
"loss": 1.1596, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.4715518759908404e-05, |
|
"loss": 1.0777, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.4495332041571254e-05, |
|
"loss": 1.0658, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.427514532323411e-05, |
|
"loss": 1.0296, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.4054958604896954e-05, |
|
"loss": 1.0269, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.3834771886559804e-05, |
|
"loss": 1.0192, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.3614585168222654e-05, |
|
"loss": 0.9946, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.3394398449885504e-05, |
|
"loss": 0.9996, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.3174211731548354e-05, |
|
"loss": 0.975, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.295402501321121e-05, |
|
"loss": 0.9874, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.2733838294874054e-05, |
|
"loss": 0.9683, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.2513651576536904e-05, |
|
"loss": 0.9675, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.2293464858199754e-05, |
|
"loss": 0.969, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.2073278139862604e-05, |
|
"loss": 0.9548, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.1853091421525454e-05, |
|
"loss": 0.9563, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.163290470318831e-05, |
|
"loss": 0.9536, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.1412717984851155e-05, |
|
"loss": 0.941, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.1192531266514005e-05, |
|
"loss": 0.9285, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.0972344548176855e-05, |
|
"loss": 0.9303, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.0752157829839705e-05, |
|
"loss": 0.9428, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.0531971111502555e-05, |
|
"loss": 0.9359, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.031178439316541e-05, |
|
"loss": 0.9302, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.0091597674828255e-05, |
|
"loss": 0.9207, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 30.842, |
|
"eval_gen_len": 18.0892, |
|
"eval_loss": 1.1251049041748047, |
|
"eval_runtime": 1535.7063, |
|
"eval_samples_per_second": 14.786, |
|
"eval_steps_per_second": 0.925, |
|
"step": 22708 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 113540, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 7.874829468219474e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|