{ "best_metric": 0.22899456322193146, "best_model_checkpoint": "output_dir/finetuned_best_qnli/checkpoint-5000", "epoch": 4.886988393402565, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 1.938912645082468e-05, "loss": 0.4392, "step": 500 }, { "epoch": 0.15, "eval_accuracy": 0.8553908109097565, "eval_loss": 0.32711732387542725, "eval_runtime": 26.9542, "eval_samples_per_second": 202.677, "eval_steps_per_second": 12.688, "step": 500 }, { "epoch": 0.31, "learning_rate": 1.8778252901649362e-05, "loss": 0.3536, "step": 1000 }, { "epoch": 0.31, "eval_accuracy": 0.8828482518762585, "eval_loss": 0.2784218490123749, "eval_runtime": 28.1713, "eval_samples_per_second": 193.921, "eval_steps_per_second": 12.14, "step": 1000 }, { "epoch": 0.46, "learning_rate": 1.816737935247404e-05, "loss": 0.3288, "step": 1500 }, { "epoch": 0.46, "eval_accuracy": 0.8934651290499726, "eval_loss": 0.2534938454627991, "eval_runtime": 24.2891, "eval_samples_per_second": 224.916, "eval_steps_per_second": 14.08, "step": 1500 }, { "epoch": 0.61, "learning_rate": 1.755650580329872e-05, "loss": 0.3105, "step": 2000 }, { "epoch": 0.61, "eval_accuracy": 0.9033498077979132, "eval_loss": 0.24962939321994781, "eval_runtime": 24.0235, "eval_samples_per_second": 227.402, "eval_steps_per_second": 14.236, "step": 2000 }, { "epoch": 0.76, "learning_rate": 1.69456322541234e-05, "loss": 0.3061, "step": 2500 }, { "epoch": 0.76, "eval_accuracy": 0.900604063701263, "eval_loss": 0.24303048849105835, "eval_runtime": 24.6661, "eval_samples_per_second": 221.478, "eval_steps_per_second": 13.865, "step": 2500 }, { "epoch": 0.92, "learning_rate": 1.6334758704948076e-05, "loss": 0.2907, "step": 3000 }, { "epoch": 0.92, "eval_accuracy": 0.9029837085850265, "eval_loss": 0.2454785704612732, "eval_runtime": 25.5666, "eval_samples_per_second": 213.677, "eval_steps_per_second": 13.377, "step": 3000 }, { "epoch": 1.07, "learning_rate": 1.5723885155772757e-05, "loss": 0.2484, "step": 3500 }, { "epoch": 1.07, "eval_accuracy": 0.9125022881200805, "eval_loss": 0.24116584658622742, "eval_runtime": 25.7855, "eval_samples_per_second": 211.864, "eval_steps_per_second": 13.263, "step": 3500 }, { "epoch": 1.22, "learning_rate": 1.5113011606597437e-05, "loss": 0.1937, "step": 4000 }, { "epoch": 1.22, "eval_accuracy": 0.9077429983525536, "eval_loss": 0.25882598757743835, "eval_runtime": 24.3945, "eval_samples_per_second": 223.944, "eval_steps_per_second": 14.02, "step": 4000 }, { "epoch": 1.37, "learning_rate": 1.4502138057422115e-05, "loss": 0.1948, "step": 4500 }, { "epoch": 1.37, "eval_accuracy": 0.9134175361522973, "eval_loss": 0.24079638719558716, "eval_runtime": 25.609, "eval_samples_per_second": 213.323, "eval_steps_per_second": 13.355, "step": 4500 }, { "epoch": 1.53, "learning_rate": 1.3891264508246794e-05, "loss": 0.1955, "step": 5000 }, { "epoch": 1.53, "eval_accuracy": 0.9126853377265239, "eval_loss": 0.22899456322193146, "eval_runtime": 25.5564, "eval_samples_per_second": 213.763, "eval_steps_per_second": 13.382, "step": 5000 }, { "epoch": 1.68, "learning_rate": 1.3280390959071474e-05, "loss": 0.1869, "step": 5500 }, { "epoch": 1.68, "eval_accuracy": 0.9084751967783269, "eval_loss": 0.2446000874042511, "eval_runtime": 24.7359, "eval_samples_per_second": 220.853, "eval_steps_per_second": 13.826, "step": 5500 }, { "epoch": 1.83, "learning_rate": 1.2669517409896153e-05, "loss": 0.1883, "step": 6000 }, { "epoch": 1.83, "eval_accuracy": 0.9115870400878638, "eval_loss": 0.23336854577064514, "eval_runtime": 26.4848, "eval_samples_per_second": 206.269, "eval_steps_per_second": 12.913, "step": 6000 }, { "epoch": 1.99, "learning_rate": 1.2058643860720831e-05, "loss": 0.1897, "step": 6500 }, { "epoch": 1.99, "eval_accuracy": 0.9084751967783269, "eval_loss": 0.2398270219564438, "eval_runtime": 26.0879, "eval_samples_per_second": 209.407, "eval_steps_per_second": 13.11, "step": 6500 }, { "epoch": 2.14, "learning_rate": 1.1447770311545512e-05, "loss": 0.1169, "step": 7000 }, { "epoch": 2.14, "eval_accuracy": 0.9123192385136372, "eval_loss": 0.30561476945877075, "eval_runtime": 24.4434, "eval_samples_per_second": 223.496, "eval_steps_per_second": 13.992, "step": 7000 }, { "epoch": 2.29, "learning_rate": 1.083689676237019e-05, "loss": 0.1069, "step": 7500 }, { "epoch": 2.29, "eval_accuracy": 0.9130514369394106, "eval_loss": 0.295697420835495, "eval_runtime": 25.6997, "eval_samples_per_second": 212.571, "eval_steps_per_second": 13.308, "step": 7500 }, { "epoch": 2.44, "learning_rate": 1.0226023213194869e-05, "loss": 0.1079, "step": 8000 }, { "epoch": 2.44, "eval_accuracy": 0.9146988833974007, "eval_loss": 0.31733793020248413, "eval_runtime": 25.5822, "eval_samples_per_second": 213.547, "eval_steps_per_second": 13.369, "step": 8000 }, { "epoch": 2.6, "learning_rate": 9.615149664019549e-06, "loss": 0.1132, "step": 8500 }, { "epoch": 2.6, "eval_accuracy": 0.9137836353651839, "eval_loss": 0.3351670503616333, "eval_runtime": 26.0424, "eval_samples_per_second": 209.774, "eval_steps_per_second": 13.132, "step": 8500 }, { "epoch": 2.75, "learning_rate": 9.004276114844227e-06, "loss": 0.1141, "step": 9000 }, { "epoch": 2.75, "eval_accuracy": 0.9139666849716274, "eval_loss": 0.3112892210483551, "eval_runtime": 25.7504, "eval_samples_per_second": 212.152, "eval_steps_per_second": 13.281, "step": 9000 }, { "epoch": 2.9, "learning_rate": 8.393402565668908e-06, "loss": 0.118, "step": 9500 }, { "epoch": 2.9, "eval_accuracy": 0.9178107267069375, "eval_loss": 0.28728175163269043, "eval_runtime": 24.7133, "eval_samples_per_second": 221.055, "eval_steps_per_second": 13.839, "step": 9500 }, { "epoch": 3.05, "learning_rate": 7.782529016493586e-06, "loss": 0.0918, "step": 10000 }, { "epoch": 3.05, "eval_accuracy": 0.9145158337909574, "eval_loss": 0.4170892536640167, "eval_runtime": 26.16, "eval_samples_per_second": 208.83, "eval_steps_per_second": 13.073, "step": 10000 }, { "epoch": 3.21, "learning_rate": 7.171655467318266e-06, "loss": 0.0678, "step": 10500 }, { "epoch": 3.21, "eval_accuracy": 0.9112209408749771, "eval_loss": 0.4185065031051636, "eval_runtime": 24.3588, "eval_samples_per_second": 224.272, "eval_steps_per_second": 14.04, "step": 10500 }, { "epoch": 3.36, "learning_rate": 6.560781918142944e-06, "loss": 0.0678, "step": 11000 }, { "epoch": 3.36, "eval_accuracy": 0.9112209408749771, "eval_loss": 0.4082688093185425, "eval_runtime": 24.7254, "eval_samples_per_second": 220.947, "eval_steps_per_second": 13.832, "step": 11000 }, { "epoch": 3.51, "learning_rate": 5.949908368967624e-06, "loss": 0.0732, "step": 11500 }, { "epoch": 3.51, "eval_accuracy": 0.9146988833974007, "eval_loss": 0.39638856053352356, "eval_runtime": 25.9223, "eval_samples_per_second": 210.746, "eval_steps_per_second": 13.193, "step": 11500 }, { "epoch": 3.67, "learning_rate": 5.339034819792304e-06, "loss": 0.0747, "step": 12000 }, { "epoch": 3.67, "eval_accuracy": 0.9154310818231741, "eval_loss": 0.39597952365875244, "eval_runtime": 25.8782, "eval_samples_per_second": 211.104, "eval_steps_per_second": 13.216, "step": 12000 }, { "epoch": 3.82, "learning_rate": 4.728161270616982e-06, "loss": 0.0694, "step": 12500 }, { "epoch": 3.82, "eval_accuracy": 0.9167124290682775, "eval_loss": 0.3776060938835144, "eval_runtime": 25.8946, "eval_samples_per_second": 210.97, "eval_steps_per_second": 13.207, "step": 12500 }, { "epoch": 3.97, "learning_rate": 4.117287721441662e-06, "loss": 0.0711, "step": 13000 }, { "epoch": 3.97, "eval_accuracy": 0.9154310818231741, "eval_loss": 0.3837811052799225, "eval_runtime": 24.4925, "eval_samples_per_second": 223.048, "eval_steps_per_second": 13.963, "step": 13000 }, { "epoch": 4.12, "learning_rate": 3.506414172266341e-06, "loss": 0.0473, "step": 13500 }, { "epoch": 4.12, "eval_accuracy": 0.9146988833974007, "eval_loss": 0.44068974256515503, "eval_runtime": 26.0081, "eval_samples_per_second": 210.05, "eval_steps_per_second": 13.15, "step": 13500 }, { "epoch": 4.28, "learning_rate": 2.8955406230910206e-06, "loss": 0.043, "step": 14000 }, { "epoch": 4.28, "eval_accuracy": 0.9168954786747209, "eval_loss": 0.4517548680305481, "eval_runtime": 25.7888, "eval_samples_per_second": 211.836, "eval_steps_per_second": 13.262, "step": 14000 }, { "epoch": 4.43, "learning_rate": 2.2846670739156996e-06, "loss": 0.0459, "step": 14500 }, { "epoch": 4.43, "eval_accuracy": 0.917993776313381, "eval_loss": 0.43741610646247864, "eval_runtime": 28.8351, "eval_samples_per_second": 189.456, "eval_steps_per_second": 11.861, "step": 14500 }, { "epoch": 4.58, "learning_rate": 1.6737935247403788e-06, "loss": 0.043, "step": 15000 }, { "epoch": 4.58, "eval_accuracy": 0.9154310818231741, "eval_loss": 0.4457714259624481, "eval_runtime": 24.7472, "eval_samples_per_second": 220.752, "eval_steps_per_second": 13.82, "step": 15000 }, { "epoch": 4.73, "learning_rate": 1.0629199755650582e-06, "loss": 0.0472, "step": 15500 }, { "epoch": 4.73, "eval_accuracy": 0.9183598755262676, "eval_loss": 0.43522849678993225, "eval_runtime": 25.0601, "eval_samples_per_second": 217.996, "eval_steps_per_second": 13.647, "step": 15500 }, { "epoch": 4.89, "learning_rate": 4.5204642638973736e-07, "loss": 0.041, "step": 16000 }, { "epoch": 4.89, "eval_accuracy": 0.9183598755262676, "eval_loss": 0.44270119071006775, "eval_runtime": 24.5695, "eval_samples_per_second": 222.349, "eval_steps_per_second": 13.92, "step": 16000 } ], "max_steps": 16370, "num_train_epochs": 5, "total_flos": 3.3671637309696e+16, "trial_name": null, "trial_params": null }