|
{ |
|
"best_metric": 0.025585556402802467, |
|
"best_model_checkpoint": "/data/wheld3/mt5-base-pointer-top_v2/checkpoint-2600", |
|
"epoch": 12.345168539325842, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 1.4545, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 0.12942890442890442, |
|
"eval_loss": 0.25418975949287415, |
|
"eval_runtime": 1925.1292, |
|
"eval_samples_per_second": 8.914, |
|
"eval_steps_per_second": 2.228, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 0.1878, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_exact_match": 0.21276223776223777, |
|
"eval_loss": 0.06684188544750214, |
|
"eval_runtime": 1936.6986, |
|
"eval_samples_per_second": 8.86, |
|
"eval_steps_per_second": 2.215, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0008, |
|
"loss": 0.0796, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_exact_match": 0.22756410256410256, |
|
"eval_loss": 0.04656381905078888, |
|
"eval_runtime": 1952.2349, |
|
"eval_samples_per_second": 8.79, |
|
"eval_steps_per_second": 2.197, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 0.0536, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_exact_match": 0.23088578088578088, |
|
"eval_loss": 0.03555314987897873, |
|
"eval_runtime": 1939.2428, |
|
"eval_samples_per_second": 8.849, |
|
"eval_steps_per_second": 2.212, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 0.0424, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_exact_match": 0.23280885780885782, |
|
"eval_loss": 0.031658221036195755, |
|
"eval_runtime": 1929.3944, |
|
"eval_samples_per_second": 8.894, |
|
"eval_steps_per_second": 2.223, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0006, |
|
"loss": 0.0356, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_exact_match": 0.23403263403263402, |
|
"eval_loss": 0.02952779084444046, |
|
"eval_runtime": 1929.8018, |
|
"eval_samples_per_second": 8.892, |
|
"eval_steps_per_second": 2.223, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.0005333333333333334, |
|
"loss": 0.0306, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_exact_match": 0.23572261072261072, |
|
"eval_loss": 0.028804348781704903, |
|
"eval_runtime": 1939.2007, |
|
"eval_samples_per_second": 8.849, |
|
"eval_steps_per_second": 2.212, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.00046666666666666666, |
|
"loss": 0.0277, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_exact_match": 0.23513986013986013, |
|
"eval_loss": 0.027136022225022316, |
|
"eval_runtime": 1941.0005, |
|
"eval_samples_per_second": 8.841, |
|
"eval_steps_per_second": 2.21, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.0004, |
|
"loss": 0.0243, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_exact_match": 0.23513986013986013, |
|
"eval_loss": 0.02723020501434803, |
|
"eval_runtime": 1956.0015, |
|
"eval_samples_per_second": 8.773, |
|
"eval_steps_per_second": 2.193, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.0225, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"eval_exact_match": 0.2353146853146853, |
|
"eval_loss": 0.02720719203352928, |
|
"eval_runtime": 1939.1308, |
|
"eval_samples_per_second": 8.849, |
|
"eval_steps_per_second": 2.212, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.0002666666666666667, |
|
"loss": 0.0206, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_exact_match": 0.23682983682983683, |
|
"eval_loss": 0.026673857122659683, |
|
"eval_runtime": 1950.8619, |
|
"eval_samples_per_second": 8.796, |
|
"eval_steps_per_second": 2.199, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.0002, |
|
"loss": 0.0187, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_exact_match": 0.2367132867132867, |
|
"eval_loss": 0.02599777653813362, |
|
"eval_runtime": 1938.3074, |
|
"eval_samples_per_second": 8.853, |
|
"eval_steps_per_second": 2.213, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.0173, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"eval_exact_match": 0.23828671328671328, |
|
"eval_loss": 0.025585556402802467, |
|
"eval_runtime": 1929.5665, |
|
"eval_samples_per_second": 8.893, |
|
"eval_steps_per_second": 2.223, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0161, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"eval_exact_match": 0.23828671328671328, |
|
"eval_loss": 0.02604704163968563, |
|
"eval_runtime": 1944.3112, |
|
"eval_samples_per_second": 8.826, |
|
"eval_steps_per_second": 2.206, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 0.0, |
|
"loss": 0.0153, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"eval_exact_match": 0.2377039627039627, |
|
"eval_loss": 0.025679251179099083, |
|
"eval_runtime": 1944.0793, |
|
"eval_samples_per_second": 8.827, |
|
"eval_steps_per_second": 2.207, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"step": 3000, |
|
"total_flos": 7.256060777948774e+16, |
|
"train_loss": 0.13643742847442628, |
|
"train_runtime": 83186.8691, |
|
"train_samples_per_second": 18.464, |
|
"train_steps_per_second": 0.036 |
|
} |
|
], |
|
"max_steps": 3000, |
|
"num_train_epochs": 13, |
|
"total_flos": 7.256060777948774e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|