|
{ |
|
"best_metric": 0.8401998462720984, |
|
"best_model_checkpoint": "/mnt/hdd-nfs/jungsoo/DensePhrases/resources/ckpts/labeler_multi.train.v4.1_nq.dev.v4.1_rlmulti_title/checkpoint-198000", |
|
"epoch": 1.0, |
|
"global_step": 235538, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.532984062019718e-06, |
|
"loss": 0.4566, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.8049192928516525, |
|
"eval_loss": 0.4426476061344147, |
|
"eval_runtime": 31.0353, |
|
"eval_samples_per_second": 419.2, |
|
"eval_steps_per_second": 52.424, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.065968124039434e-06, |
|
"loss": 0.4093, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.8199846272098386, |
|
"eval_loss": 0.3974132835865021, |
|
"eval_runtime": 31.2125, |
|
"eval_samples_per_second": 416.821, |
|
"eval_steps_per_second": 52.127, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.598952186059151e-06, |
|
"loss": 0.387, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.8220599538816296, |
|
"eval_loss": 0.39724817872047424, |
|
"eval_runtime": 31.0035, |
|
"eval_samples_per_second": 419.63, |
|
"eval_steps_per_second": 52.478, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.131936248078867e-06, |
|
"loss": 0.3694, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.8254419677171406, |
|
"eval_loss": 0.4177829623222351, |
|
"eval_runtime": 31.0051, |
|
"eval_samples_per_second": 419.608, |
|
"eval_steps_per_second": 52.475, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.664920310098584e-06, |
|
"loss": 0.3528, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.828977709454266, |
|
"eval_loss": 0.38233569264411926, |
|
"eval_runtime": 31.0115, |
|
"eval_samples_per_second": 419.521, |
|
"eval_steps_per_second": 52.464, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.1979043721182995e-06, |
|
"loss": 0.3368, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.8327440430438124, |
|
"eval_loss": 0.38260024785995483, |
|
"eval_runtime": 31.0066, |
|
"eval_samples_per_second": 419.588, |
|
"eval_steps_per_second": 52.473, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.730888434138017e-06, |
|
"loss": 0.3236, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.8348962336664104, |
|
"eval_loss": 0.3918485939502716, |
|
"eval_runtime": 31.0424, |
|
"eval_samples_per_second": 419.104, |
|
"eval_steps_per_second": 52.412, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.263872496157733e-06, |
|
"loss": 0.3107, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.8374327440430438, |
|
"eval_loss": 0.40744495391845703, |
|
"eval_runtime": 30.981, |
|
"eval_samples_per_second": 419.935, |
|
"eval_steps_per_second": 52.516, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.79685655817745e-06, |
|
"loss": 0.299, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.8369715603382014, |
|
"eval_loss": 0.42519351840019226, |
|
"eval_runtime": 30.9991, |
|
"eval_samples_per_second": 419.69, |
|
"eval_steps_per_second": 52.485, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.329840620197166e-06, |
|
"loss": 0.2857, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.8330514988470408, |
|
"eval_loss": 0.432172954082489, |
|
"eval_runtime": 31.0082, |
|
"eval_samples_per_second": 419.567, |
|
"eval_steps_per_second": 52.47, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.862824682216883e-06, |
|
"loss": 0.2756, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.8378170637970792, |
|
"eval_loss": 0.41075146198272705, |
|
"eval_runtime": 30.9654, |
|
"eval_samples_per_second": 420.147, |
|
"eval_steps_per_second": 52.543, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.395808744236599e-06, |
|
"loss": 0.266, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.8359723289777095, |
|
"eval_loss": 0.4291342794895172, |
|
"eval_runtime": 31.031, |
|
"eval_samples_per_second": 419.258, |
|
"eval_steps_per_second": 52.431, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9287928062563155e-06, |
|
"loss": 0.2557, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.8334358186010761, |
|
"eval_loss": 0.4698314964771271, |
|
"eval_runtime": 31.0504, |
|
"eval_samples_per_second": 418.997, |
|
"eval_steps_per_second": 52.399, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.4617768682760324e-06, |
|
"loss": 0.2478, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.8363566487317448, |
|
"eval_loss": 0.4574269652366638, |
|
"eval_runtime": 31.1319, |
|
"eval_samples_per_second": 417.9, |
|
"eval_steps_per_second": 52.262, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.994760930295749e-06, |
|
"loss": 0.2393, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.8398923904688701, |
|
"eval_loss": 0.4403812289237976, |
|
"eval_runtime": 31.055, |
|
"eval_samples_per_second": 418.934, |
|
"eval_steps_per_second": 52.391, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.5277449923154652e-06, |
|
"loss": 0.2321, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.8360491929285165, |
|
"eval_loss": 0.4628557562828064, |
|
"eval_runtime": 31.1062, |
|
"eval_samples_per_second": 418.245, |
|
"eval_steps_per_second": 52.305, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.0607290543351817e-06, |
|
"loss": 0.2246, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.8398923904688701, |
|
"eval_loss": 0.45440638065338135, |
|
"eval_runtime": 31.0474, |
|
"eval_samples_per_second": 419.036, |
|
"eval_steps_per_second": 52.404, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5937131163548984e-06, |
|
"loss": 0.2194, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.8401998462720984, |
|
"eval_loss": 0.4905882179737091, |
|
"eval_runtime": 31.0565, |
|
"eval_samples_per_second": 418.915, |
|
"eval_steps_per_second": 52.388, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1266971783746148e-06, |
|
"loss": 0.2125, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.8395080707148348, |
|
"eval_loss": 0.48444053530693054, |
|
"eval_runtime": 31.0361, |
|
"eval_samples_per_second": 419.189, |
|
"eval_steps_per_second": 52.423, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.596812403943314e-07, |
|
"loss": 0.2101, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.8388931591083781, |
|
"eval_loss": 0.49173465371131897, |
|
"eval_runtime": 31.025, |
|
"eval_samples_per_second": 419.34, |
|
"eval_steps_per_second": 52.442, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.9266530241404786e-07, |
|
"loss": 0.2052, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.8384319754035358, |
|
"eval_loss": 0.4989364445209503, |
|
"eval_runtime": 31.4895, |
|
"eval_samples_per_second": 413.153, |
|
"eval_steps_per_second": 51.668, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 235538, |
|
"total_flos": 2.758442251818959e+18, |
|
"train_loss": 0.2896772417569365, |
|
"train_runtime": 61879.5726, |
|
"train_samples_per_second": 121.804, |
|
"train_steps_per_second": 3.806 |
|
} |
|
], |
|
"max_steps": 235538, |
|
"num_train_epochs": 1, |
|
"total_flos": 2.758442251818959e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|