{ "best_metric": 0.2568952143192291, "best_model_checkpoint": "./results_train/bert-base-cased/qnli/checkpoint-13000", "epoch": 10.0, "global_step": 65470, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 2.545176889793841e-06, "loss": 0.6529, "step": 500 }, { "epoch": 0.08, "eval_accuracy": 0.7733845872231375, "eval_loss": 0.4945374131202698, "eval_runtime": 15.9632, "eval_samples_per_second": 342.225, "eval_steps_per_second": 42.786, "step": 500 }, { "epoch": 0.15, "learning_rate": 5.090353779587682e-06, "loss": 0.4731, "step": 1000 }, { "epoch": 0.15, "eval_accuracy": 0.8405637927878455, "eval_loss": 0.38875827193260193, "eval_runtime": 16.0177, "eval_samples_per_second": 341.06, "eval_steps_per_second": 42.64, "step": 1000 }, { "epoch": 0.23, "learning_rate": 7.635530669381522e-06, "loss": 0.4113, "step": 1500 }, { "epoch": 0.23, "eval_accuracy": 0.8522789676002197, "eval_loss": 0.3604711890220642, "eval_runtime": 15.9651, "eval_samples_per_second": 342.185, "eval_steps_per_second": 42.781, "step": 1500 }, { "epoch": 0.31, "learning_rate": 1.0180707559175364e-05, "loss": 0.4006, "step": 2000 }, { "epoch": 0.31, "eval_accuracy": 0.8649093904448105, "eval_loss": 0.3175490200519562, "eval_runtime": 16.0149, "eval_samples_per_second": 341.119, "eval_steps_per_second": 42.648, "step": 2000 }, { "epoch": 0.38, "learning_rate": 1.2725884448969203e-05, "loss": 0.3615, "step": 2500 }, { "epoch": 0.38, "eval_accuracy": 0.867838184147904, "eval_loss": 0.32201188802719116, "eval_runtime": 15.9586, "eval_samples_per_second": 342.323, "eval_steps_per_second": 42.798, "step": 2500 }, { "epoch": 0.46, "learning_rate": 1.5271061338763045e-05, "loss": 0.3662, "step": 3000 }, { "epoch": 0.46, "eval_accuracy": 0.866739886509244, "eval_loss": 0.3197564482688904, "eval_runtime": 16.037, "eval_samples_per_second": 340.649, "eval_steps_per_second": 42.589, "step": 3000 }, { "epoch": 0.53, "learning_rate": 1.7816238228556887e-05, "loss": 0.3578, "step": 3500 }, { "epoch": 0.53, "eval_accuracy": 0.8779059125022881, "eval_loss": 0.29696720838546753, "eval_runtime": 15.9685, "eval_samples_per_second": 342.112, "eval_steps_per_second": 42.772, "step": 3500 }, { "epoch": 0.61, "learning_rate": 1.997692595180449e-05, "loss": 0.3349, "step": 4000 }, { "epoch": 0.61, "eval_accuracy": 0.8833974006955885, "eval_loss": 0.28639864921569824, "eval_runtime": 16.0264, "eval_samples_per_second": 340.875, "eval_steps_per_second": 42.617, "step": 4000 }, { "epoch": 0.69, "learning_rate": 1.9814432654652997e-05, "loss": 0.3548, "step": 4500 }, { "epoch": 0.69, "eval_accuracy": 0.8945634266886326, "eval_loss": 0.2664399743080139, "eval_runtime": 15.9996, "eval_samples_per_second": 341.447, "eval_steps_per_second": 42.689, "step": 4500 }, { "epoch": 0.76, "learning_rate": 1.9651939357501505e-05, "loss": 0.3254, "step": 5000 }, { "epoch": 0.76, "eval_accuracy": 0.8929159802306426, "eval_loss": 0.2650640904903412, "eval_runtime": 16.02, "eval_samples_per_second": 341.011, "eval_steps_per_second": 42.634, "step": 5000 }, { "epoch": 0.84, "learning_rate": 1.9489446060350013e-05, "loss": 0.3212, "step": 5500 }, { "epoch": 0.84, "eval_accuracy": 0.8817499542375984, "eval_loss": 0.2942565679550171, "eval_runtime": 15.947, "eval_samples_per_second": 342.573, "eval_steps_per_second": 42.829, "step": 5500 }, { "epoch": 0.92, "learning_rate": 1.932695276319852e-05, "loss": 0.3195, "step": 6000 }, { "epoch": 0.92, "eval_accuracy": 0.8960278235401794, "eval_loss": 0.26299455761909485, "eval_runtime": 15.9626, "eval_samples_per_second": 342.237, "eval_steps_per_second": 42.787, "step": 6000 }, { "epoch": 0.99, "learning_rate": 1.916445946604703e-05, "loss": 0.3044, "step": 6500 }, { "epoch": 0.99, "eval_accuracy": 0.8881566904631155, "eval_loss": 0.27664485573768616, "eval_runtime": 15.9599, "eval_samples_per_second": 342.295, "eval_steps_per_second": 42.795, "step": 6500 }, { "epoch": 1.07, "learning_rate": 1.9001966168895533e-05, "loss": 0.245, "step": 7000 }, { "epoch": 1.07, "eval_accuracy": 0.9013362621270364, "eval_loss": 0.3268437087535858, "eval_runtime": 16.0333, "eval_samples_per_second": 340.728, "eval_steps_per_second": 42.599, "step": 7000 }, { "epoch": 1.15, "learning_rate": 1.883947287174404e-05, "loss": 0.2415, "step": 7500 }, { "epoch": 1.15, "eval_accuracy": 0.8991396668497162, "eval_loss": 0.29871582984924316, "eval_runtime": 15.9621, "eval_samples_per_second": 342.249, "eval_steps_per_second": 42.789, "step": 7500 }, { "epoch": 1.22, "learning_rate": 1.867697957459255e-05, "loss": 0.248, "step": 8000 }, { "epoch": 1.22, "eval_accuracy": 0.8812008054182684, "eval_loss": 0.3372972905635834, "eval_runtime": 16.0184, "eval_samples_per_second": 341.046, "eval_steps_per_second": 42.639, "step": 8000 }, { "epoch": 1.3, "learning_rate": 1.8514486277441056e-05, "loss": 0.2328, "step": 8500 }, { "epoch": 1.3, "eval_accuracy": 0.8808347062053816, "eval_loss": 0.34742510318756104, "eval_runtime": 15.9539, "eval_samples_per_second": 342.424, "eval_steps_per_second": 42.811, "step": 8500 }, { "epoch": 1.37, "learning_rate": 1.8351992980289564e-05, "loss": 0.2427, "step": 9000 }, { "epoch": 1.37, "eval_accuracy": 0.8912685337726524, "eval_loss": 0.2986329197883606, "eval_runtime": 15.9879, "eval_samples_per_second": 341.695, "eval_steps_per_second": 42.72, "step": 9000 }, { "epoch": 1.45, "learning_rate": 1.8189499683138072e-05, "loss": 0.243, "step": 9500 }, { "epoch": 1.45, "eval_accuracy": 0.8982244188174996, "eval_loss": 0.2834506034851074, "eval_runtime": 15.9775, "eval_samples_per_second": 341.919, "eval_steps_per_second": 42.748, "step": 9500 }, { "epoch": 1.53, "learning_rate": 1.802700638598658e-05, "loss": 0.2375, "step": 10000 }, { "epoch": 1.53, "eval_accuracy": 0.8909024345597657, "eval_loss": 0.2893359661102295, "eval_runtime": 15.9492, "eval_samples_per_second": 342.525, "eval_steps_per_second": 42.823, "step": 10000 }, { "epoch": 1.6, "learning_rate": 1.7864513088835088e-05, "loss": 0.2253, "step": 10500 }, { "epoch": 1.6, "eval_accuracy": 0.8960278235401794, "eval_loss": 0.2941761910915375, "eval_runtime": 15.9667, "eval_samples_per_second": 342.15, "eval_steps_per_second": 42.777, "step": 10500 }, { "epoch": 1.68, "learning_rate": 1.7702019791683592e-05, "loss": 0.2429, "step": 11000 }, { "epoch": 1.68, "eval_accuracy": 0.9028006589785832, "eval_loss": 0.28518712520599365, "eval_runtime": 16.0239, "eval_samples_per_second": 340.928, "eval_steps_per_second": 42.624, "step": 11000 }, { "epoch": 1.76, "learning_rate": 1.75395264945321e-05, "loss": 0.238, "step": 11500 }, { "epoch": 1.76, "eval_accuracy": 0.9037159070108, "eval_loss": 0.2671617865562439, "eval_runtime": 15.9753, "eval_samples_per_second": 341.966, "eval_steps_per_second": 42.754, "step": 11500 }, { "epoch": 1.83, "learning_rate": 1.737703319738061e-05, "loss": 0.2344, "step": 12000 }, { "epoch": 1.83, "eval_accuracy": 0.9020684605528098, "eval_loss": 0.267158567905426, "eval_runtime": 16.0345, "eval_samples_per_second": 340.703, "eval_steps_per_second": 42.596, "step": 12000 }, { "epoch": 1.91, "learning_rate": 1.7214539900229116e-05, "loss": 0.2368, "step": 12500 }, { "epoch": 1.91, "eval_accuracy": 0.9018854109463664, "eval_loss": 0.2753552496433258, "eval_runtime": 15.9627, "eval_samples_per_second": 342.234, "eval_steps_per_second": 42.787, "step": 12500 }, { "epoch": 1.99, "learning_rate": 1.7052046603077624e-05, "loss": 0.2424, "step": 13000 }, { "epoch": 1.99, "eval_accuracy": 0.9086582463847702, "eval_loss": 0.2568952143192291, "eval_runtime": 15.9721, "eval_samples_per_second": 342.034, "eval_steps_per_second": 42.762, "step": 13000 }, { "epoch": 2.06, "learning_rate": 1.6889553305926132e-05, "loss": 0.1542, "step": 13500 }, { "epoch": 2.06, "eval_accuracy": 0.9026176093721399, "eval_loss": 0.38070985674858093, "eval_runtime": 15.9896, "eval_samples_per_second": 341.659, "eval_steps_per_second": 42.715, "step": 13500 }, { "epoch": 2.14, "learning_rate": 1.672706000877464e-05, "loss": 0.1416, "step": 14000 }, { "epoch": 2.14, "eval_accuracy": 0.8980413692110562, "eval_loss": 0.3915307819843292, "eval_runtime": 15.9993, "eval_samples_per_second": 341.452, "eval_steps_per_second": 42.689, "step": 14000 }, { "epoch": 2.21, "learning_rate": 1.6564566711623148e-05, "loss": 0.1445, "step": 14500 }, { "epoch": 2.21, "eval_accuracy": 0.8984074684239429, "eval_loss": 0.42921990156173706, "eval_runtime": 15.964, "eval_samples_per_second": 342.208, "eval_steps_per_second": 42.784, "step": 14500 }, { "epoch": 2.29, "learning_rate": 1.6402073414471655e-05, "loss": 0.1631, "step": 15000 }, { "epoch": 2.29, "eval_accuracy": 0.8971261211788395, "eval_loss": 0.4096887409687042, "eval_runtime": 16.0404, "eval_samples_per_second": 340.578, "eval_steps_per_second": 42.58, "step": 15000 }, { "epoch": 2.37, "learning_rate": 1.6239580117320163e-05, "loss": 0.1512, "step": 15500 }, { "epoch": 2.37, "eval_accuracy": 0.9011532125205931, "eval_loss": 0.3880288898944855, "eval_runtime": 15.9791, "eval_samples_per_second": 341.885, "eval_steps_per_second": 42.743, "step": 15500 }, { "epoch": 2.44, "learning_rate": 1.607708682016867e-05, "loss": 0.1624, "step": 16000 }, { "epoch": 2.44, "eval_accuracy": 0.8954786747208493, "eval_loss": 0.40831083059310913, "eval_runtime": 16.0179, "eval_samples_per_second": 341.056, "eval_steps_per_second": 42.64, "step": 16000 }, { "epoch": 2.52, "learning_rate": 1.5914593523017176e-05, "loss": 0.1616, "step": 16500 }, { "epoch": 2.52, "eval_accuracy": 0.9038989566172433, "eval_loss": 0.3950469195842743, "eval_runtime": 15.9181, "eval_samples_per_second": 343.195, "eval_steps_per_second": 42.907, "step": 16500 }, { "epoch": 2.6, "learning_rate": 1.5752100225865684e-05, "loss": 0.1587, "step": 17000 }, { "epoch": 2.6, "eval_accuracy": 0.9103056928427604, "eval_loss": 0.35793736577033997, "eval_runtime": 15.9998, "eval_samples_per_second": 341.441, "eval_steps_per_second": 42.688, "step": 17000 }, { "epoch": 2.67, "learning_rate": 1.558960692871419e-05, "loss": 0.1615, "step": 17500 }, { "epoch": 2.67, "eval_accuracy": 0.9011532125205931, "eval_loss": 0.39305415749549866, "eval_runtime": 15.9412, "eval_samples_per_second": 342.697, "eval_steps_per_second": 42.845, "step": 17500 }, { "epoch": 2.75, "learning_rate": 1.54271136315627e-05, "loss": 0.1623, "step": 18000 }, { "epoch": 2.75, "eval_accuracy": 0.90591250228812, "eval_loss": 0.3697090446949005, "eval_runtime": 15.9757, "eval_samples_per_second": 341.957, "eval_steps_per_second": 42.752, "step": 18000 }, { "epoch": 2.83, "learning_rate": 1.5264620334411207e-05, "loss": 0.1687, "step": 18500 }, { "epoch": 2.83, "eval_accuracy": 0.9037159070108, "eval_loss": 0.34730929136276245, "eval_runtime": 15.964, "eval_samples_per_second": 342.208, "eval_steps_per_second": 42.784, "step": 18500 }, { "epoch": 2.9, "learning_rate": 1.5102127037259715e-05, "loss": 0.1627, "step": 19000 }, { "epoch": 2.9, "eval_accuracy": 0.8982244188174996, "eval_loss": 0.3851012885570526, "eval_runtime": 15.9956, "eval_samples_per_second": 341.531, "eval_steps_per_second": 42.699, "step": 19000 }, { "epoch": 2.98, "learning_rate": 1.4939633740108221e-05, "loss": 0.1593, "step": 19500 }, { "epoch": 2.98, "eval_accuracy": 0.9018854109463664, "eval_loss": 0.4038921296596527, "eval_runtime": 15.9569, "eval_samples_per_second": 342.36, "eval_steps_per_second": 42.803, "step": 19500 }, { "epoch": 3.05, "learning_rate": 1.477714044295673e-05, "loss": 0.1136, "step": 20000 }, { "epoch": 3.05, "eval_accuracy": 0.9024345597656965, "eval_loss": 0.48348602652549744, "eval_runtime": 15.9762, "eval_samples_per_second": 341.946, "eval_steps_per_second": 42.751, "step": 20000 }, { "epoch": 3.13, "learning_rate": 1.4614647145805237e-05, "loss": 0.0965, "step": 20500 }, { "epoch": 3.13, "eval_accuracy": 0.9011532125205931, "eval_loss": 0.5060518383979797, "eval_runtime": 15.9664, "eval_samples_per_second": 342.155, "eval_steps_per_second": 42.777, "step": 20500 }, { "epoch": 3.21, "learning_rate": 1.4452153848653745e-05, "loss": 0.0931, "step": 21000 }, { "epoch": 3.21, "eval_accuracy": 0.8991396668497162, "eval_loss": 0.5278568267822266, "eval_runtime": 16.0118, "eval_samples_per_second": 341.186, "eval_steps_per_second": 42.656, "step": 21000 }, { "epoch": 3.28, "learning_rate": 1.4289660551502251e-05, "loss": 0.0993, "step": 21500 }, { "epoch": 3.28, "eval_accuracy": 0.9018854109463664, "eval_loss": 0.48559123277664185, "eval_runtime": 15.9958, "eval_samples_per_second": 341.526, "eval_steps_per_second": 42.699, "step": 21500 }, { "epoch": 3.36, "learning_rate": 1.412716725435076e-05, "loss": 0.1187, "step": 22000 }, { "epoch": 3.36, "eval_accuracy": 0.9031667581914699, "eval_loss": 0.48830872774124146, "eval_runtime": 16.0824, "eval_samples_per_second": 339.689, "eval_steps_per_second": 42.469, "step": 22000 }, { "epoch": 3.44, "learning_rate": 1.3964673957199267e-05, "loss": 0.1008, "step": 22500 }, { "epoch": 3.44, "eval_accuracy": 0.9015193117334798, "eval_loss": 0.5054051280021667, "eval_runtime": 15.9998, "eval_samples_per_second": 341.442, "eval_steps_per_second": 42.688, "step": 22500 }, { "epoch": 3.51, "learning_rate": 1.3802180660047775e-05, "loss": 0.1013, "step": 23000 }, { "epoch": 3.51, "eval_accuracy": 0.9022515101592532, "eval_loss": 0.5024679899215698, "eval_runtime": 16.0433, "eval_samples_per_second": 340.516, "eval_steps_per_second": 42.572, "step": 23000 }, { "epoch": 3.59, "learning_rate": 1.3639687362896281e-05, "loss": 0.1092, "step": 23500 }, { "epoch": 3.59, "eval_accuracy": 0.8985905180303863, "eval_loss": 0.44847574830055237, "eval_runtime": 15.9971, "eval_samples_per_second": 341.499, "eval_steps_per_second": 42.695, "step": 23500 }, { "epoch": 3.67, "learning_rate": 1.347719406574479e-05, "loss": 0.1135, "step": 24000 }, { "epoch": 3.67, "eval_accuracy": 0.8976752699981695, "eval_loss": 0.512342095375061, "eval_runtime": 16.034, "eval_samples_per_second": 340.714, "eval_steps_per_second": 42.597, "step": 24000 }, { "epoch": 3.74, "learning_rate": 1.3314700768593297e-05, "loss": 0.1042, "step": 24500 }, { "epoch": 3.74, "eval_accuracy": 0.9009701629141498, "eval_loss": 0.4883783161640167, "eval_runtime": 16.5969, "eval_samples_per_second": 329.158, "eval_steps_per_second": 41.152, "step": 24500 }, { "epoch": 3.82, "learning_rate": 1.3152207471441804e-05, "loss": 0.1178, "step": 25000 }, { "epoch": 3.82, "eval_accuracy": 0.900604063701263, "eval_loss": 0.4129727780818939, "eval_runtime": 16.0114, "eval_samples_per_second": 341.195, "eval_steps_per_second": 42.657, "step": 25000 }, { "epoch": 3.89, "learning_rate": 1.298971417429031e-05, "loss": 0.1041, "step": 25500 }, { "epoch": 3.89, "eval_accuracy": 0.8951125755079626, "eval_loss": 0.4847397208213806, "eval_runtime": 15.9759, "eval_samples_per_second": 341.952, "eval_steps_per_second": 42.752, "step": 25500 }, { "epoch": 3.97, "learning_rate": 1.282722087713882e-05, "loss": 0.1025, "step": 26000 }, { "epoch": 3.97, "eval_accuracy": 0.8985905180303863, "eval_loss": 0.4706151783466339, "eval_runtime": 15.9981, "eval_samples_per_second": 341.478, "eval_steps_per_second": 42.693, "step": 26000 }, { "epoch": 4.05, "learning_rate": 1.2664727579987326e-05, "loss": 0.0833, "step": 26500 }, { "epoch": 4.05, "eval_accuracy": 0.90536335346879, "eval_loss": 0.5018308162689209, "eval_runtime": 15.9984, "eval_samples_per_second": 341.472, "eval_steps_per_second": 42.692, "step": 26500 }, { "epoch": 4.12, "learning_rate": 1.2502234282835834e-05, "loss": 0.0632, "step": 27000 }, { "epoch": 4.12, "eval_accuracy": 0.9055464030752334, "eval_loss": 0.5425559282302856, "eval_runtime": 15.9902, "eval_samples_per_second": 341.646, "eval_steps_per_second": 42.714, "step": 27000 }, { "epoch": 4.2, "learning_rate": 1.233974098568434e-05, "loss": 0.0613, "step": 27500 }, { "epoch": 4.2, "eval_accuracy": 0.9020684605528098, "eval_loss": 0.5629337430000305, "eval_runtime": 16.0205, "eval_samples_per_second": 341.001, "eval_steps_per_second": 42.633, "step": 27500 }, { "epoch": 4.28, "learning_rate": 1.217724768853285e-05, "loss": 0.0703, "step": 28000 }, { "epoch": 4.28, "eval_accuracy": 0.9033498077979132, "eval_loss": 0.5763201713562012, "eval_runtime": 16.043, "eval_samples_per_second": 340.522, "eval_steps_per_second": 42.573, "step": 28000 }, { "epoch": 4.35, "learning_rate": 1.2014754391381356e-05, "loss": 0.0687, "step": 28500 }, { "epoch": 4.35, "eval_accuracy": 0.9038989566172433, "eval_loss": 0.5274228453636169, "eval_runtime": 16.0055, "eval_samples_per_second": 341.319, "eval_steps_per_second": 42.673, "step": 28500 }, { "epoch": 4.43, "learning_rate": 1.1852261094229864e-05, "loss": 0.0707, "step": 29000 }, { "epoch": 4.43, "eval_accuracy": 0.9018854109463664, "eval_loss": 0.5681867599487305, "eval_runtime": 16.0832, "eval_samples_per_second": 339.671, "eval_steps_per_second": 42.467, "step": 29000 }, { "epoch": 4.51, "learning_rate": 1.168976779707837e-05, "loss": 0.0717, "step": 29500 }, { "epoch": 4.51, "eval_accuracy": 0.9004210140948197, "eval_loss": 0.5382347702980042, "eval_runtime": 16.0114, "eval_samples_per_second": 341.195, "eval_steps_per_second": 42.657, "step": 29500 }, { "epoch": 4.58, "learning_rate": 1.152727449992688e-05, "loss": 0.0692, "step": 30000 }, { "epoch": 4.58, "eval_accuracy": 0.9009701629141498, "eval_loss": 0.5901069045066833, "eval_runtime": 15.9825, "eval_samples_per_second": 341.812, "eval_steps_per_second": 42.734, "step": 30000 }, { "epoch": 4.66, "learning_rate": 1.1364781202775386e-05, "loss": 0.0701, "step": 30500 }, { "epoch": 4.66, "eval_accuracy": 0.8989566172432729, "eval_loss": 0.5817105770111084, "eval_runtime": 16.0192, "eval_samples_per_second": 341.027, "eval_steps_per_second": 42.636, "step": 30500 }, { "epoch": 4.73, "learning_rate": 1.1202287905623894e-05, "loss": 0.0708, "step": 31000 }, { "epoch": 4.73, "eval_accuracy": 0.9018854109463664, "eval_loss": 0.5580092668533325, "eval_runtime": 16.0391, "eval_samples_per_second": 340.605, "eval_steps_per_second": 42.583, "step": 31000 }, { "epoch": 4.81, "learning_rate": 1.10397946084724e-05, "loss": 0.07, "step": 31500 }, { "epoch": 4.81, "eval_accuracy": 0.8989566172432729, "eval_loss": 0.5640453100204468, "eval_runtime": 15.9857, "eval_samples_per_second": 341.744, "eval_steps_per_second": 42.726, "step": 31500 }, { "epoch": 4.89, "learning_rate": 1.087730131132091e-05, "loss": 0.0725, "step": 32000 }, { "epoch": 4.89, "eval_accuracy": 0.8993227164561596, "eval_loss": 0.5767560005187988, "eval_runtime": 16.012, "eval_samples_per_second": 341.182, "eval_steps_per_second": 42.656, "step": 32000 }, { "epoch": 4.96, "learning_rate": 1.0714808014169416e-05, "loss": 0.0701, "step": 32500 }, { "epoch": 4.96, "eval_accuracy": 0.9035328574043566, "eval_loss": 0.5289304852485657, "eval_runtime": 16.0251, "eval_samples_per_second": 340.903, "eval_steps_per_second": 42.621, "step": 32500 }, { "epoch": 5.04, "learning_rate": 1.0552314717017924e-05, "loss": 0.0441, "step": 33000 }, { "epoch": 5.04, "eval_accuracy": 0.9009701629141498, "eval_loss": 0.6401296854019165, "eval_runtime": 16.0047, "eval_samples_per_second": 341.338, "eval_steps_per_second": 42.675, "step": 33000 }, { "epoch": 5.12, "learning_rate": 1.038982141986643e-05, "loss": 0.0388, "step": 33500 }, { "epoch": 5.12, "eval_accuracy": 0.8991396668497162, "eval_loss": 0.644571840763092, "eval_runtime": 15.9856, "eval_samples_per_second": 341.745, "eval_steps_per_second": 42.726, "step": 33500 }, { "epoch": 5.19, "learning_rate": 1.022732812271494e-05, "loss": 0.0417, "step": 34000 }, { "epoch": 5.19, "eval_accuracy": 0.9038989566172433, "eval_loss": 0.6326994299888611, "eval_runtime": 15.9965, "eval_samples_per_second": 341.512, "eval_steps_per_second": 42.697, "step": 34000 }, { "epoch": 5.27, "learning_rate": 1.0064834825563446e-05, "loss": 0.039, "step": 34500 }, { "epoch": 5.27, "eval_accuracy": 0.90481420464946, "eval_loss": 0.6385270357131958, "eval_runtime": 15.9731, "eval_samples_per_second": 342.013, "eval_steps_per_second": 42.759, "step": 34500 }, { "epoch": 5.35, "learning_rate": 9.902341528411954e-06, "loss": 0.0407, "step": 35000 }, { "epoch": 5.35, "eval_accuracy": 0.9029837085850265, "eval_loss": 0.6509894728660583, "eval_runtime": 16.0054, "eval_samples_per_second": 341.322, "eval_steps_per_second": 42.673, "step": 35000 }, { "epoch": 5.42, "learning_rate": 9.739848231260461e-06, "loss": 0.0446, "step": 35500 }, { "epoch": 5.42, "eval_accuracy": 0.9029837085850265, "eval_loss": 0.5787823796272278, "eval_runtime": 16.0265, "eval_samples_per_second": 340.872, "eval_steps_per_second": 42.617, "step": 35500 }, { "epoch": 5.5, "learning_rate": 9.57735493410897e-06, "loss": 0.0422, "step": 36000 }, { "epoch": 5.5, "eval_accuracy": 0.9024345597656965, "eval_loss": 0.6722646951675415, "eval_runtime": 16.1928, "eval_samples_per_second": 337.372, "eval_steps_per_second": 42.179, "step": 36000 }, { "epoch": 5.58, "learning_rate": 9.414861636957477e-06, "loss": 0.04, "step": 36500 }, { "epoch": 5.58, "eval_accuracy": 0.9037159070108, "eval_loss": 0.6601631045341492, "eval_runtime": 15.9359, "eval_samples_per_second": 342.812, "eval_steps_per_second": 42.859, "step": 36500 }, { "epoch": 5.65, "learning_rate": 9.252368339805983e-06, "loss": 0.0514, "step": 37000 }, { "epoch": 5.65, "eval_accuracy": 0.9024345597656965, "eval_loss": 0.6407366394996643, "eval_runtime": 16.0327, "eval_samples_per_second": 340.742, "eval_steps_per_second": 42.601, "step": 37000 }, { "epoch": 5.73, "learning_rate": 9.089875042654491e-06, "loss": 0.0462, "step": 37500 }, { "epoch": 5.73, "eval_accuracy": 0.90481420464946, "eval_loss": 0.6144635677337646, "eval_runtime": 16.0227, "eval_samples_per_second": 340.955, "eval_steps_per_second": 42.627, "step": 37500 }, { "epoch": 5.8, "learning_rate": 8.927381745502999e-06, "loss": 0.0479, "step": 38000 }, { "epoch": 5.8, "eval_accuracy": 0.9007871133077063, "eval_loss": 0.5881094336509705, "eval_runtime": 16.0441, "eval_samples_per_second": 340.5, "eval_steps_per_second": 42.57, "step": 38000 }, { "epoch": 5.88, "learning_rate": 8.764888448351507e-06, "loss": 0.0503, "step": 38500 }, { "epoch": 5.88, "eval_accuracy": 0.9007871133077063, "eval_loss": 0.6001153588294983, "eval_runtime": 15.9844, "eval_samples_per_second": 341.77, "eval_steps_per_second": 42.729, "step": 38500 }, { "epoch": 5.96, "learning_rate": 8.602395151200013e-06, "loss": 0.0385, "step": 39000 }, { "epoch": 5.96, "eval_accuracy": 0.9051803038623467, "eval_loss": 0.6463525295257568, "eval_runtime": 16.0778, "eval_samples_per_second": 339.785, "eval_steps_per_second": 42.481, "step": 39000 }, { "epoch": 6.03, "learning_rate": 8.439901854048521e-06, "loss": 0.0436, "step": 39500 }, { "epoch": 6.03, "eval_accuracy": 0.9038989566172433, "eval_loss": 0.6683228611946106, "eval_runtime": 15.9874, "eval_samples_per_second": 341.707, "eval_steps_per_second": 42.721, "step": 39500 }, { "epoch": 6.11, "learning_rate": 8.277408556897029e-06, "loss": 0.0296, "step": 40000 }, { "epoch": 6.11, "eval_accuracy": 0.9037159070108, "eval_loss": 0.696530282497406, "eval_runtime": 15.9785, "eval_samples_per_second": 341.896, "eval_steps_per_second": 42.745, "step": 40000 }, { "epoch": 6.19, "learning_rate": 8.114915259745537e-06, "loss": 0.0275, "step": 40500 }, { "epoch": 6.19, "eval_accuracy": 0.9038989566172433, "eval_loss": 0.7192733883857727, "eval_runtime": 15.9602, "eval_samples_per_second": 342.288, "eval_steps_per_second": 42.794, "step": 40500 }, { "epoch": 6.26, "learning_rate": 7.952421962594043e-06, "loss": 0.0258, "step": 41000 }, { "epoch": 6.26, "eval_accuracy": 0.9038989566172433, "eval_loss": 0.7229453921318054, "eval_runtime": 15.9822, "eval_samples_per_second": 341.817, "eval_steps_per_second": 42.735, "step": 41000 }, { "epoch": 6.34, "learning_rate": 7.789928665442551e-06, "loss": 0.0223, "step": 41500 }, { "epoch": 6.34, "eval_accuracy": 0.9073768991396669, "eval_loss": 0.697367250919342, "eval_runtime": 15.9944, "eval_samples_per_second": 341.557, "eval_steps_per_second": 42.702, "step": 41500 }, { "epoch": 6.42, "learning_rate": 7.627435368291059e-06, "loss": 0.0268, "step": 42000 }, { "epoch": 6.42, "eval_accuracy": 0.9028006589785832, "eval_loss": 0.704451858997345, "eval_runtime": 16.0075, "eval_samples_per_second": 341.277, "eval_steps_per_second": 42.667, "step": 42000 }, { "epoch": 6.49, "learning_rate": 7.464942071139566e-06, "loss": 0.0297, "step": 42500 }, { "epoch": 6.49, "eval_accuracy": 0.9055464030752334, "eval_loss": 0.72894686460495, "eval_runtime": 15.9896, "eval_samples_per_second": 341.659, "eval_steps_per_second": 42.715, "step": 42500 }, { "epoch": 6.57, "learning_rate": 7.302448773988074e-06, "loss": 0.0295, "step": 43000 }, { "epoch": 6.57, "eval_accuracy": 0.9049972542559034, "eval_loss": 0.6809965372085571, "eval_runtime": 16.0, "eval_samples_per_second": 341.438, "eval_steps_per_second": 42.688, "step": 43000 }, { "epoch": 6.64, "learning_rate": 7.139955476836581e-06, "loss": 0.0265, "step": 43500 }, { "epoch": 6.64, "eval_accuracy": 0.9064616511074501, "eval_loss": 0.6833410263061523, "eval_runtime": 15.9737, "eval_samples_per_second": 341.999, "eval_steps_per_second": 42.758, "step": 43500 }, { "epoch": 6.72, "learning_rate": 6.9774621796850885e-06, "loss": 0.0268, "step": 44000 }, { "epoch": 6.72, "eval_accuracy": 0.9035328574043566, "eval_loss": 0.7155495882034302, "eval_runtime": 16.0634, "eval_samples_per_second": 340.089, "eval_steps_per_second": 42.519, "step": 44000 }, { "epoch": 6.8, "learning_rate": 6.8149688825335956e-06, "loss": 0.0293, "step": 44500 }, { "epoch": 6.8, "eval_accuracy": 0.9015193117334798, "eval_loss": 0.7632263898849487, "eval_runtime": 15.9819, "eval_samples_per_second": 341.824, "eval_steps_per_second": 42.736, "step": 44500 }, { "epoch": 6.87, "learning_rate": 6.6524755853821034e-06, "loss": 0.0289, "step": 45000 }, { "epoch": 6.87, "eval_accuracy": 0.9033498077979132, "eval_loss": 0.7229273319244385, "eval_runtime": 16.0622, "eval_samples_per_second": 340.116, "eval_steps_per_second": 42.522, "step": 45000 }, { "epoch": 6.95, "learning_rate": 6.4899822882306105e-06, "loss": 0.0286, "step": 45500 }, { "epoch": 6.95, "eval_accuracy": 0.90536335346879, "eval_loss": 0.6670674681663513, "eval_runtime": 15.9524, "eval_samples_per_second": 342.456, "eval_steps_per_second": 42.815, "step": 45500 }, { "epoch": 7.03, "learning_rate": 6.327488991079118e-06, "loss": 0.024, "step": 46000 }, { "epoch": 7.03, "eval_accuracy": 0.9033498077979132, "eval_loss": 0.7658922672271729, "eval_runtime": 16.0192, "eval_samples_per_second": 341.028, "eval_steps_per_second": 42.636, "step": 46000 }, { "epoch": 7.1, "learning_rate": 6.164995693927625e-06, "loss": 0.0141, "step": 46500 }, { "epoch": 7.1, "eval_accuracy": 0.9022515101592532, "eval_loss": 0.7980563044548035, "eval_runtime": 16.0395, "eval_samples_per_second": 340.597, "eval_steps_per_second": 42.582, "step": 46500 }, { "epoch": 7.18, "learning_rate": 6.002502396776133e-06, "loss": 0.0212, "step": 47000 }, { "epoch": 7.18, "eval_accuracy": 0.9088412959912137, "eval_loss": 0.7021394968032837, "eval_runtime": 15.9988, "eval_samples_per_second": 341.463, "eval_steps_per_second": 42.691, "step": 47000 }, { "epoch": 7.26, "learning_rate": 5.84000909962464e-06, "loss": 0.0183, "step": 47500 }, { "epoch": 7.26, "eval_accuracy": 0.909573494416987, "eval_loss": 0.7122032046318054, "eval_runtime": 16.0037, "eval_samples_per_second": 341.358, "eval_steps_per_second": 42.678, "step": 47500 }, { "epoch": 7.33, "learning_rate": 5.677515802473148e-06, "loss": 0.0221, "step": 48000 }, { "epoch": 7.33, "eval_accuracy": 0.9064616511074501, "eval_loss": 0.7080280780792236, "eval_runtime": 16.0516, "eval_samples_per_second": 340.339, "eval_steps_per_second": 42.55, "step": 48000 }, { "epoch": 7.41, "learning_rate": 5.515022505321655e-06, "loss": 0.0146, "step": 48500 }, { "epoch": 7.41, "eval_accuracy": 0.9073768991396669, "eval_loss": 0.7343564033508301, "eval_runtime": 16.0182, "eval_samples_per_second": 341.049, "eval_steps_per_second": 42.639, "step": 48500 }, { "epoch": 7.48, "learning_rate": 5.352529208170163e-06, "loss": 0.0181, "step": 49000 }, { "epoch": 7.48, "eval_accuracy": 0.9104887424492037, "eval_loss": 0.7273329496383667, "eval_runtime": 15.9794, "eval_samples_per_second": 341.877, "eval_steps_per_second": 42.742, "step": 49000 }, { "epoch": 7.56, "learning_rate": 5.19003591101867e-06, "loss": 0.0161, "step": 49500 }, { "epoch": 7.56, "eval_accuracy": 0.9082921471718836, "eval_loss": 0.73320472240448, "eval_runtime": 15.9747, "eval_samples_per_second": 341.978, "eval_steps_per_second": 42.755, "step": 49500 }, { "epoch": 7.64, "learning_rate": 5.027542613867178e-06, "loss": 0.0193, "step": 50000 }, { "epoch": 7.64, "eval_accuracy": 0.9093904448105437, "eval_loss": 0.7117404937744141, "eval_runtime": 16.028, "eval_samples_per_second": 340.841, "eval_steps_per_second": 42.613, "step": 50000 }, { "epoch": 7.71, "learning_rate": 4.865049316715686e-06, "loss": 0.0165, "step": 50500 }, { "epoch": 7.71, "eval_accuracy": 0.9070107999267801, "eval_loss": 0.7796688675880432, "eval_runtime": 16.0196, "eval_samples_per_second": 341.02, "eval_steps_per_second": 42.635, "step": 50500 }, { "epoch": 7.79, "learning_rate": 4.702556019564194e-06, "loss": 0.0173, "step": 51000 }, { "epoch": 7.79, "eval_accuracy": 0.9106717920556471, "eval_loss": 0.7127623558044434, "eval_runtime": 16.0489, "eval_samples_per_second": 340.398, "eval_steps_per_second": 42.558, "step": 51000 }, { "epoch": 7.87, "learning_rate": 4.540062722412701e-06, "loss": 0.0158, "step": 51500 }, { "epoch": 7.87, "eval_accuracy": 0.9110378912685337, "eval_loss": 0.7419800758361816, "eval_runtime": 15.9669, "eval_samples_per_second": 342.144, "eval_steps_per_second": 42.776, "step": 51500 }, { "epoch": 7.94, "learning_rate": 4.377569425261209e-06, "loss": 0.0157, "step": 52000 }, { "epoch": 7.94, "eval_accuracy": 0.9128683873329673, "eval_loss": 0.7197983860969543, "eval_runtime": 16.0117, "eval_samples_per_second": 341.189, "eval_steps_per_second": 42.656, "step": 52000 }, { "epoch": 8.02, "learning_rate": 4.215076128109716e-06, "loss": 0.0142, "step": 52500 }, { "epoch": 8.02, "eval_accuracy": 0.9114039904814205, "eval_loss": 0.7032836675643921, "eval_runtime": 15.9817, "eval_samples_per_second": 341.829, "eval_steps_per_second": 42.736, "step": 52500 }, { "epoch": 8.1, "learning_rate": 4.0525828309582235e-06, "loss": 0.0107, "step": 53000 }, { "epoch": 8.1, "eval_accuracy": 0.9108548416620904, "eval_loss": 0.776072084903717, "eval_runtime": 16.015, "eval_samples_per_second": 341.118, "eval_steps_per_second": 42.648, "step": 53000 }, { "epoch": 8.17, "learning_rate": 3.8900895338067306e-06, "loss": 0.0122, "step": 53500 }, { "epoch": 8.17, "eval_accuracy": 0.9110378912685337, "eval_loss": 0.7777400612831116, "eval_runtime": 15.947, "eval_samples_per_second": 342.572, "eval_steps_per_second": 42.829, "step": 53500 }, { "epoch": 8.25, "learning_rate": 3.727596236655238e-06, "loss": 0.013, "step": 54000 }, { "epoch": 8.25, "eval_accuracy": 0.9075599487461102, "eval_loss": 0.7880035042762756, "eval_runtime": 15.9944, "eval_samples_per_second": 341.558, "eval_steps_per_second": 42.703, "step": 54000 }, { "epoch": 8.32, "learning_rate": 3.5651029395037455e-06, "loss": 0.0092, "step": 54500 }, { "epoch": 8.32, "eval_accuracy": 0.9073768991396669, "eval_loss": 0.8070389032363892, "eval_runtime": 15.9764, "eval_samples_per_second": 341.942, "eval_steps_per_second": 42.751, "step": 54500 }, { "epoch": 8.4, "learning_rate": 3.402609642352253e-06, "loss": 0.0102, "step": 55000 }, { "epoch": 8.4, "eval_accuracy": 0.9055464030752334, "eval_loss": 0.8112868666648865, "eval_runtime": 16.2072, "eval_samples_per_second": 337.073, "eval_steps_per_second": 42.142, "step": 55000 }, { "epoch": 8.48, "learning_rate": 3.2401163452007604e-06, "loss": 0.0099, "step": 55500 }, { "epoch": 8.48, "eval_accuracy": 0.9079260479589969, "eval_loss": 0.8152795433998108, "eval_runtime": 16.1355, "eval_samples_per_second": 338.57, "eval_steps_per_second": 42.329, "step": 55500 }, { "epoch": 8.55, "learning_rate": 3.0776230480492682e-06, "loss": 0.0087, "step": 56000 }, { "epoch": 8.55, "eval_accuracy": 0.9112209408749771, "eval_loss": 0.8045311570167542, "eval_runtime": 16.0534, "eval_samples_per_second": 340.302, "eval_steps_per_second": 42.546, "step": 56000 }, { "epoch": 8.63, "learning_rate": 2.9151297508977757e-06, "loss": 0.0133, "step": 56500 }, { "epoch": 8.63, "eval_accuracy": 0.9081090975654402, "eval_loss": 0.8173390030860901, "eval_runtime": 15.9908, "eval_samples_per_second": 341.634, "eval_steps_per_second": 42.712, "step": 56500 }, { "epoch": 8.71, "learning_rate": 2.752636453746283e-06, "loss": 0.017, "step": 57000 }, { "epoch": 8.71, "eval_accuracy": 0.9101226432363171, "eval_loss": 0.7646049857139587, "eval_runtime": 16.0101, "eval_samples_per_second": 341.222, "eval_steps_per_second": 42.661, "step": 57000 }, { "epoch": 8.78, "learning_rate": 2.5901431565947906e-06, "loss": 0.0093, "step": 57500 }, { "epoch": 8.78, "eval_accuracy": 0.9119531393007505, "eval_loss": 0.7681165337562561, "eval_runtime": 15.9413, "eval_samples_per_second": 342.694, "eval_steps_per_second": 42.845, "step": 57500 }, { "epoch": 8.86, "learning_rate": 2.427649859443298e-06, "loss": 0.0085, "step": 58000 }, { "epoch": 8.86, "eval_accuracy": 0.9082921471718836, "eval_loss": 0.8067189455032349, "eval_runtime": 16.0314, "eval_samples_per_second": 340.768, "eval_steps_per_second": 42.604, "step": 58000 }, { "epoch": 8.94, "learning_rate": 2.2651565622918055e-06, "loss": 0.0137, "step": 58500 }, { "epoch": 8.94, "eval_accuracy": 0.9106717920556471, "eval_loss": 0.7645466327667236, "eval_runtime": 15.9969, "eval_samples_per_second": 341.504, "eval_steps_per_second": 42.696, "step": 58500 }, { "epoch": 9.01, "learning_rate": 2.102663265140313e-06, "loss": 0.0099, "step": 59000 }, { "epoch": 9.01, "eval_accuracy": 0.9123192385136372, "eval_loss": 0.7742260098457336, "eval_runtime": 16.0529, "eval_samples_per_second": 340.313, "eval_steps_per_second": 42.547, "step": 59000 }, { "epoch": 9.09, "learning_rate": 1.9401699679888204e-06, "loss": 0.0074, "step": 59500 }, { "epoch": 9.09, "eval_accuracy": 0.90591250228812, "eval_loss": 0.8052135705947876, "eval_runtime": 15.9958, "eval_samples_per_second": 341.527, "eval_steps_per_second": 42.699, "step": 59500 }, { "epoch": 9.16, "learning_rate": 1.777676670837328e-06, "loss": 0.0057, "step": 60000 }, { "epoch": 9.16, "eval_accuracy": 0.9132344865458539, "eval_loss": 0.790639340877533, "eval_runtime": 16.0441, "eval_samples_per_second": 340.499, "eval_steps_per_second": 42.57, "step": 60000 }, { "epoch": 9.24, "learning_rate": 1.6151833736858355e-06, "loss": 0.0046, "step": 60500 }, { "epoch": 9.24, "eval_accuracy": 0.9136005857587406, "eval_loss": 0.7914955019950867, "eval_runtime": 15.9828, "eval_samples_per_second": 341.806, "eval_steps_per_second": 42.734, "step": 60500 }, { "epoch": 9.32, "learning_rate": 1.452690076534343e-06, "loss": 0.0036, "step": 61000 }, { "epoch": 9.32, "eval_accuracy": 0.9101226432363171, "eval_loss": 0.8114162087440491, "eval_runtime": 16.0918, "eval_samples_per_second": 339.489, "eval_steps_per_second": 42.444, "step": 61000 }, { "epoch": 9.39, "learning_rate": 1.2901967793828504e-06, "loss": 0.0036, "step": 61500 }, { "epoch": 9.39, "eval_accuracy": 0.9086582463847702, "eval_loss": 0.8381993174552917, "eval_runtime": 16.0882, "eval_samples_per_second": 339.565, "eval_steps_per_second": 42.453, "step": 61500 }, { "epoch": 9.47, "learning_rate": 1.127703482231358e-06, "loss": 0.0079, "step": 62000 }, { "epoch": 9.47, "eval_accuracy": 0.9110378912685337, "eval_loss": 0.8012856841087341, "eval_runtime": 16.0031, "eval_samples_per_second": 341.371, "eval_steps_per_second": 42.679, "step": 62000 }, { "epoch": 9.55, "learning_rate": 9.652101850798656e-07, "loss": 0.0079, "step": 62500 }, { "epoch": 9.55, "eval_accuracy": 0.9099395936298736, "eval_loss": 0.8116302490234375, "eval_runtime": 16.0181, "eval_samples_per_second": 341.052, "eval_steps_per_second": 42.639, "step": 62500 }, { "epoch": 9.62, "learning_rate": 8.02716887928373e-07, "loss": 0.0054, "step": 63000 }, { "epoch": 9.62, "eval_accuracy": 0.9110378912685337, "eval_loss": 0.8142715096473694, "eval_runtime": 16.0228, "eval_samples_per_second": 340.951, "eval_steps_per_second": 42.627, "step": 63000 }, { "epoch": 9.7, "learning_rate": 6.402235907768806e-07, "loss": 0.0036, "step": 63500 }, { "epoch": 9.7, "eval_accuracy": 0.9114039904814205, "eval_loss": 0.8137949109077454, "eval_runtime": 16.0052, "eval_samples_per_second": 341.327, "eval_steps_per_second": 42.674, "step": 63500 }, { "epoch": 9.78, "learning_rate": 4.777302936253879e-07, "loss": 0.0053, "step": 64000 }, { "epoch": 9.78, "eval_accuracy": 0.9106717920556471, "eval_loss": 0.8157973289489746, "eval_runtime": 16.048, "eval_samples_per_second": 340.415, "eval_steps_per_second": 42.56, "step": 64000 }, { "epoch": 9.85, "learning_rate": 3.1523699647389547e-07, "loss": 0.0035, "step": 64500 }, { "epoch": 9.85, "eval_accuracy": 0.9112209408749771, "eval_loss": 0.814604640007019, "eval_runtime": 16.0042, "eval_samples_per_second": 341.348, "eval_steps_per_second": 42.676, "step": 64500 }, { "epoch": 9.93, "learning_rate": 1.5274369932240295e-07, "loss": 0.0051, "step": 65000 }, { "epoch": 9.93, "eval_accuracy": 0.9106717920556471, "eval_loss": 0.8167830109596252, "eval_runtime": 16.0336, "eval_samples_per_second": 340.722, "eval_steps_per_second": 42.598, "step": 65000 }, { "epoch": 10.0, "step": 65470, "total_flos": 6.88976031789312e+16, "train_loss": 0.10566671685770447, "train_runtime": 14393.9986, "train_samples_per_second": 72.769, "train_steps_per_second": 4.548 } ], "max_steps": 65470, "num_train_epochs": 10, "total_flos": 6.88976031789312e+16, "trial_name": null, "trial_params": null }