{ "best_metric": 0.8401998462720984, "best_model_checkpoint": "/mnt/hdd-nfs/jungsoo/DensePhrases/resources/ckpts/labeler_multi.train.v4.1_nq.dev.v4.1_rlmulti_title/checkpoint-198000", "epoch": 1.0, "global_step": 235538, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 9.532984062019718e-06, "loss": 0.4566, "step": 11000 }, { "epoch": 0.05, "eval_accuracy": 0.8049192928516525, "eval_loss": 0.4426476061344147, "eval_runtime": 31.0353, "eval_samples_per_second": 419.2, "eval_steps_per_second": 52.424, "step": 11000 }, { "epoch": 0.09, "learning_rate": 9.065968124039434e-06, "loss": 0.4093, "step": 22000 }, { "epoch": 0.09, "eval_accuracy": 0.8199846272098386, "eval_loss": 0.3974132835865021, "eval_runtime": 31.2125, "eval_samples_per_second": 416.821, "eval_steps_per_second": 52.127, "step": 22000 }, { "epoch": 0.14, "learning_rate": 8.598952186059151e-06, "loss": 0.387, "step": 33000 }, { "epoch": 0.14, "eval_accuracy": 0.8220599538816296, "eval_loss": 0.39724817872047424, "eval_runtime": 31.0035, "eval_samples_per_second": 419.63, "eval_steps_per_second": 52.478, "step": 33000 }, { "epoch": 0.19, "learning_rate": 8.131936248078867e-06, "loss": 0.3694, "step": 44000 }, { "epoch": 0.19, "eval_accuracy": 0.8254419677171406, "eval_loss": 0.4177829623222351, "eval_runtime": 31.0051, "eval_samples_per_second": 419.608, "eval_steps_per_second": 52.475, "step": 44000 }, { "epoch": 0.23, "learning_rate": 7.664920310098584e-06, "loss": 0.3528, "step": 55000 }, { "epoch": 0.23, "eval_accuracy": 0.828977709454266, "eval_loss": 0.38233569264411926, "eval_runtime": 31.0115, "eval_samples_per_second": 419.521, "eval_steps_per_second": 52.464, "step": 55000 }, { "epoch": 0.28, "learning_rate": 7.1979043721182995e-06, "loss": 0.3368, "step": 66000 }, { "epoch": 0.28, "eval_accuracy": 0.8327440430438124, "eval_loss": 0.38260024785995483, "eval_runtime": 31.0066, "eval_samples_per_second": 419.588, "eval_steps_per_second": 52.473, "step": 66000 }, { "epoch": 0.33, "learning_rate": 6.730888434138017e-06, "loss": 0.3236, "step": 77000 }, { "epoch": 0.33, "eval_accuracy": 0.8348962336664104, "eval_loss": 0.3918485939502716, "eval_runtime": 31.0424, "eval_samples_per_second": 419.104, "eval_steps_per_second": 52.412, "step": 77000 }, { "epoch": 0.37, "learning_rate": 6.263872496157733e-06, "loss": 0.3107, "step": 88000 }, { "epoch": 0.37, "eval_accuracy": 0.8374327440430438, "eval_loss": 0.40744495391845703, "eval_runtime": 30.981, "eval_samples_per_second": 419.935, "eval_steps_per_second": 52.516, "step": 88000 }, { "epoch": 0.42, "learning_rate": 5.79685655817745e-06, "loss": 0.299, "step": 99000 }, { "epoch": 0.42, "eval_accuracy": 0.8369715603382014, "eval_loss": 0.42519351840019226, "eval_runtime": 30.9991, "eval_samples_per_second": 419.69, "eval_steps_per_second": 52.485, "step": 99000 }, { "epoch": 0.47, "learning_rate": 5.329840620197166e-06, "loss": 0.2857, "step": 110000 }, { "epoch": 0.47, "eval_accuracy": 0.8330514988470408, "eval_loss": 0.432172954082489, "eval_runtime": 31.0082, "eval_samples_per_second": 419.567, "eval_steps_per_second": 52.47, "step": 110000 }, { "epoch": 0.51, "learning_rate": 4.862824682216883e-06, "loss": 0.2756, "step": 121000 }, { "epoch": 0.51, "eval_accuracy": 0.8378170637970792, "eval_loss": 0.41075146198272705, "eval_runtime": 30.9654, "eval_samples_per_second": 420.147, "eval_steps_per_second": 52.543, "step": 121000 }, { "epoch": 0.56, "learning_rate": 4.395808744236599e-06, "loss": 0.266, "step": 132000 }, { "epoch": 0.56, "eval_accuracy": 0.8359723289777095, "eval_loss": 0.4291342794895172, "eval_runtime": 31.031, "eval_samples_per_second": 419.258, "eval_steps_per_second": 52.431, "step": 132000 }, { "epoch": 0.61, "learning_rate": 3.9287928062563155e-06, "loss": 0.2557, "step": 143000 }, { "epoch": 0.61, "eval_accuracy": 0.8334358186010761, "eval_loss": 0.4698314964771271, "eval_runtime": 31.0504, "eval_samples_per_second": 418.997, "eval_steps_per_second": 52.399, "step": 143000 }, { "epoch": 0.65, "learning_rate": 3.4617768682760324e-06, "loss": 0.2478, "step": 154000 }, { "epoch": 0.65, "eval_accuracy": 0.8363566487317448, "eval_loss": 0.4574269652366638, "eval_runtime": 31.1319, "eval_samples_per_second": 417.9, "eval_steps_per_second": 52.262, "step": 154000 }, { "epoch": 0.7, "learning_rate": 2.994760930295749e-06, "loss": 0.2393, "step": 165000 }, { "epoch": 0.7, "eval_accuracy": 0.8398923904688701, "eval_loss": 0.4403812289237976, "eval_runtime": 31.055, "eval_samples_per_second": 418.934, "eval_steps_per_second": 52.391, "step": 165000 }, { "epoch": 0.75, "learning_rate": 2.5277449923154652e-06, "loss": 0.2321, "step": 176000 }, { "epoch": 0.75, "eval_accuracy": 0.8360491929285165, "eval_loss": 0.4628557562828064, "eval_runtime": 31.1062, "eval_samples_per_second": 418.245, "eval_steps_per_second": 52.305, "step": 176000 }, { "epoch": 0.79, "learning_rate": 2.0607290543351817e-06, "loss": 0.2246, "step": 187000 }, { "epoch": 0.79, "eval_accuracy": 0.8398923904688701, "eval_loss": 0.45440638065338135, "eval_runtime": 31.0474, "eval_samples_per_second": 419.036, "eval_steps_per_second": 52.404, "step": 187000 }, { "epoch": 0.84, "learning_rate": 1.5937131163548984e-06, "loss": 0.2194, "step": 198000 }, { "epoch": 0.84, "eval_accuracy": 0.8401998462720984, "eval_loss": 0.4905882179737091, "eval_runtime": 31.0565, "eval_samples_per_second": 418.915, "eval_steps_per_second": 52.388, "step": 198000 }, { "epoch": 0.89, "learning_rate": 1.1266971783746148e-06, "loss": 0.2125, "step": 209000 }, { "epoch": 0.89, "eval_accuracy": 0.8395080707148348, "eval_loss": 0.48444053530693054, "eval_runtime": 31.0361, "eval_samples_per_second": 419.189, "eval_steps_per_second": 52.423, "step": 209000 }, { "epoch": 0.93, "learning_rate": 6.596812403943314e-07, "loss": 0.2101, "step": 220000 }, { "epoch": 0.93, "eval_accuracy": 0.8388931591083781, "eval_loss": 0.49173465371131897, "eval_runtime": 31.025, "eval_samples_per_second": 419.34, "eval_steps_per_second": 52.442, "step": 220000 }, { "epoch": 0.98, "learning_rate": 1.9266530241404786e-07, "loss": 0.2052, "step": 231000 }, { "epoch": 0.98, "eval_accuracy": 0.8384319754035358, "eval_loss": 0.4989364445209503, "eval_runtime": 31.4895, "eval_samples_per_second": 413.153, "eval_steps_per_second": 51.668, "step": 231000 }, { "epoch": 1.0, "step": 235538, "total_flos": 2.758442251818959e+18, "train_loss": 0.2896772417569365, "train_runtime": 61879.5726, "train_samples_per_second": 121.804, "train_steps_per_second": 3.806 } ], "max_steps": 235538, "num_train_epochs": 1, "total_flos": 2.758442251818959e+18, "trial_name": null, "trial_params": null }