{ "best_metric": 0.23302823305130005, "best_model_checkpoint": "./results_train/roberta-base/qnli/checkpoint-7000", "epoch": 10.0, "global_step": 65470, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 2.545176889793841e-06, "loss": 0.6919, "step": 500 }, { "epoch": 0.08, "eval_accuracy": 0.7501372872048325, "eval_loss": 0.6161059141159058, "eval_runtime": 15.4359, "eval_samples_per_second": 353.916, "eval_steps_per_second": 44.248, "step": 500 }, { "epoch": 0.15, "learning_rate": 5.090353779587682e-06, "loss": 0.4801, "step": 1000 }, { "epoch": 0.15, "eval_accuracy": 0.8550247116968699, "eval_loss": 0.3524323105812073, "eval_runtime": 15.4744, "eval_samples_per_second": 353.035, "eval_steps_per_second": 44.137, "step": 1000 }, { "epoch": 0.23, "learning_rate": 7.635530669381522e-06, "loss": 0.4049, "step": 1500 }, { "epoch": 0.23, "eval_accuracy": 0.8742449203734212, "eval_loss": 0.301090806722641, "eval_runtime": 15.4636, "eval_samples_per_second": 353.282, "eval_steps_per_second": 44.168, "step": 1500 }, { "epoch": 0.31, "learning_rate": 1.0180707559175364e-05, "loss": 0.3827, "step": 2000 }, { "epoch": 0.31, "eval_accuracy": 0.876807614863628, "eval_loss": 0.3125079274177551, "eval_runtime": 15.5809, "eval_samples_per_second": 350.621, "eval_steps_per_second": 43.836, "step": 2000 }, { "epoch": 0.38, "learning_rate": 1.2725884448969203e-05, "loss": 0.3445, "step": 2500 }, { "epoch": 0.38, "eval_accuracy": 0.8923668314113125, "eval_loss": 0.29156529903411865, "eval_runtime": 15.4606, "eval_samples_per_second": 353.351, "eval_steps_per_second": 44.177, "step": 2500 }, { "epoch": 0.46, "learning_rate": 1.5271061338763045e-05, "loss": 0.3567, "step": 3000 }, { "epoch": 0.46, "eval_accuracy": 0.8991396668497162, "eval_loss": 0.2662096619606018, "eval_runtime": 15.4956, "eval_samples_per_second": 352.551, "eval_steps_per_second": 44.077, "step": 3000 }, { "epoch": 0.53, "learning_rate": 1.7816238228556887e-05, "loss": 0.3422, "step": 3500 }, { "epoch": 0.53, "eval_accuracy": 0.8980413692110562, "eval_loss": 0.2656656801700592, "eval_runtime": 15.5011, "eval_samples_per_second": 352.426, "eval_steps_per_second": 44.061, "step": 3500 }, { "epoch": 0.61, "learning_rate": 1.997692595180449e-05, "loss": 0.3257, "step": 4000 }, { "epoch": 0.61, "eval_accuracy": 0.9020684605528098, "eval_loss": 0.2830497622489929, "eval_runtime": 15.5605, "eval_samples_per_second": 351.081, "eval_steps_per_second": 43.893, "step": 4000 }, { "epoch": 0.69, "learning_rate": 1.9814432654652997e-05, "loss": 0.3506, "step": 4500 }, { "epoch": 0.69, "eval_accuracy": 0.9062786015010068, "eval_loss": 0.24342051148414612, "eval_runtime": 15.5612, "eval_samples_per_second": 351.065, "eval_steps_per_second": 43.891, "step": 4500 }, { "epoch": 0.76, "learning_rate": 1.9651939357501505e-05, "loss": 0.317, "step": 5000 }, { "epoch": 0.76, "eval_accuracy": 0.9051803038623467, "eval_loss": 0.24402067065238953, "eval_runtime": 15.5216, "eval_samples_per_second": 351.96, "eval_steps_per_second": 44.003, "step": 5000 }, { "epoch": 0.84, "learning_rate": 1.9489446060350013e-05, "loss": 0.3152, "step": 5500 }, { "epoch": 0.84, "eval_accuracy": 0.9015193117334798, "eval_loss": 0.2785772681236267, "eval_runtime": 15.4885, "eval_samples_per_second": 352.712, "eval_steps_per_second": 44.097, "step": 5500 }, { "epoch": 0.92, "learning_rate": 1.932695276319852e-05, "loss": 0.2966, "step": 6000 }, { "epoch": 0.92, "eval_accuracy": 0.9082921471718836, "eval_loss": 0.2599336504936218, "eval_runtime": 15.5494, "eval_samples_per_second": 351.332, "eval_steps_per_second": 43.924, "step": 6000 }, { "epoch": 0.99, "learning_rate": 1.916445946604703e-05, "loss": 0.298, "step": 6500 }, { "epoch": 0.99, "eval_accuracy": 0.9070107999267801, "eval_loss": 0.2616613209247589, "eval_runtime": 16.1579, "eval_samples_per_second": 338.101, "eval_steps_per_second": 42.27, "step": 6500 }, { "epoch": 1.07, "learning_rate": 1.9001966168895533e-05, "loss": 0.2634, "step": 7000 }, { "epoch": 1.07, "eval_accuracy": 0.9154310818231741, "eval_loss": 0.23302823305130005, "eval_runtime": 15.5105, "eval_samples_per_second": 352.213, "eval_steps_per_second": 44.035, "step": 7000 }, { "epoch": 1.15, "learning_rate": 1.883947287174404e-05, "loss": 0.2625, "step": 7500 }, { "epoch": 1.15, "eval_accuracy": 0.9108548416620904, "eval_loss": 0.259826123714447, "eval_runtime": 16.1363, "eval_samples_per_second": 338.553, "eval_steps_per_second": 42.327, "step": 7500 }, { "epoch": 1.22, "learning_rate": 1.867697957459255e-05, "loss": 0.2596, "step": 8000 }, { "epoch": 1.22, "eval_accuracy": 0.9099395936298736, "eval_loss": 0.36161935329437256, "eval_runtime": 15.5589, "eval_samples_per_second": 351.116, "eval_steps_per_second": 43.898, "step": 8000 }, { "epoch": 1.3, "learning_rate": 1.8514486277441056e-05, "loss": 0.2457, "step": 8500 }, { "epoch": 1.3, "eval_accuracy": 0.909573494416987, "eval_loss": 0.2799856662750244, "eval_runtime": 15.5157, "eval_samples_per_second": 352.095, "eval_steps_per_second": 44.02, "step": 8500 }, { "epoch": 1.37, "learning_rate": 1.8351992980289564e-05, "loss": 0.2545, "step": 9000 }, { "epoch": 1.37, "eval_accuracy": 0.9081090975654402, "eval_loss": 0.29598304629325867, "eval_runtime": 15.4941, "eval_samples_per_second": 352.587, "eval_steps_per_second": 44.081, "step": 9000 }, { "epoch": 1.45, "learning_rate": 1.8189499683138072e-05, "loss": 0.2535, "step": 9500 }, { "epoch": 1.45, "eval_accuracy": 0.9114039904814205, "eval_loss": 0.23894734680652618, "eval_runtime": 16.236, "eval_samples_per_second": 336.474, "eval_steps_per_second": 42.067, "step": 9500 }, { "epoch": 1.53, "learning_rate": 1.802700638598658e-05, "loss": 0.2639, "step": 10000 }, { "epoch": 1.53, "eval_accuracy": 0.8912685337726524, "eval_loss": 0.33427268266677856, "eval_runtime": 15.4977, "eval_samples_per_second": 352.503, "eval_steps_per_second": 44.071, "step": 10000 }, { "epoch": 1.6, "learning_rate": 1.7864513088835088e-05, "loss": 0.2434, "step": 10500 }, { "epoch": 1.6, "eval_accuracy": 0.9115870400878638, "eval_loss": 0.24697424471378326, "eval_runtime": 15.5007, "eval_samples_per_second": 352.436, "eval_steps_per_second": 44.063, "step": 10500 }, { "epoch": 1.68, "learning_rate": 1.7702019791683592e-05, "loss": 0.2613, "step": 11000 }, { "epoch": 1.68, "eval_accuracy": 0.9092073952041003, "eval_loss": 0.294897198677063, "eval_runtime": 15.4616, "eval_samples_per_second": 353.327, "eval_steps_per_second": 44.174, "step": 11000 }, { "epoch": 1.76, "learning_rate": 1.75395264945321e-05, "loss": 0.2456, "step": 11500 }, { "epoch": 1.76, "eval_accuracy": 0.9163463298553908, "eval_loss": 0.2556602358818054, "eval_runtime": 15.4884, "eval_samples_per_second": 352.715, "eval_steps_per_second": 44.097, "step": 11500 }, { "epoch": 1.83, "learning_rate": 1.737703319738061e-05, "loss": 0.2483, "step": 12000 }, { "epoch": 1.83, "eval_accuracy": 0.9141497345780707, "eval_loss": 0.24623550474643707, "eval_runtime": 15.544, "eval_samples_per_second": 351.453, "eval_steps_per_second": 43.94, "step": 12000 }, { "epoch": 1.91, "learning_rate": 1.7214539900229116e-05, "loss": 0.2524, "step": 12500 }, { "epoch": 1.91, "eval_accuracy": 0.9114039904814205, "eval_loss": 0.2453071027994156, "eval_runtime": 15.5138, "eval_samples_per_second": 352.139, "eval_steps_per_second": 44.025, "step": 12500 }, { "epoch": 1.99, "learning_rate": 1.7052046603077624e-05, "loss": 0.2467, "step": 13000 }, { "epoch": 1.99, "eval_accuracy": 0.9161632802489474, "eval_loss": 0.2610779404640198, "eval_runtime": 15.5605, "eval_samples_per_second": 351.082, "eval_steps_per_second": 43.893, "step": 13000 }, { "epoch": 2.06, "learning_rate": 1.6889553305926132e-05, "loss": 0.2059, "step": 13500 }, { "epoch": 2.06, "eval_accuracy": 0.9157971810360608, "eval_loss": 0.3070930540561676, "eval_runtime": 15.4895, "eval_samples_per_second": 352.691, "eval_steps_per_second": 44.094, "step": 13500 }, { "epoch": 2.14, "learning_rate": 1.672706000877464e-05, "loss": 0.1968, "step": 14000 }, { "epoch": 2.14, "eval_accuracy": 0.9209225700164745, "eval_loss": 0.3205060660839081, "eval_runtime": 15.5462, "eval_samples_per_second": 351.404, "eval_steps_per_second": 43.933, "step": 14000 }, { "epoch": 2.21, "learning_rate": 1.6564566711623148e-05, "loss": 0.1944, "step": 14500 }, { "epoch": 2.21, "eval_accuracy": 0.9145158337909574, "eval_loss": 0.342997282743454, "eval_runtime": 15.462, "eval_samples_per_second": 353.317, "eval_steps_per_second": 44.173, "step": 14500 }, { "epoch": 2.29, "learning_rate": 1.6402073414471655e-05, "loss": 0.2065, "step": 15000 }, { "epoch": 2.29, "eval_accuracy": 0.9146988833974007, "eval_loss": 0.33879798650741577, "eval_runtime": 15.4872, "eval_samples_per_second": 352.743, "eval_steps_per_second": 44.101, "step": 15000 }, { "epoch": 2.37, "learning_rate": 1.6239580117320163e-05, "loss": 0.1992, "step": 15500 }, { "epoch": 2.37, "eval_accuracy": 0.9157971810360608, "eval_loss": 0.2569463551044464, "eval_runtime": 15.4551, "eval_samples_per_second": 353.476, "eval_steps_per_second": 44.193, "step": 15500 }, { "epoch": 2.44, "learning_rate": 1.607708682016867e-05, "loss": 0.1994, "step": 16000 }, { "epoch": 2.44, "eval_accuracy": 0.9108548416620904, "eval_loss": 0.33493661880493164, "eval_runtime": 15.5323, "eval_samples_per_second": 351.718, "eval_steps_per_second": 43.973, "step": 16000 }, { "epoch": 2.52, "learning_rate": 1.5914593523017176e-05, "loss": 0.2001, "step": 16500 }, { "epoch": 2.52, "eval_accuracy": 0.909573494416987, "eval_loss": 0.2850426137447357, "eval_runtime": 15.4842, "eval_samples_per_second": 352.811, "eval_steps_per_second": 44.109, "step": 16500 }, { "epoch": 2.6, "learning_rate": 1.5752100225865684e-05, "loss": 0.2014, "step": 17000 }, { "epoch": 2.6, "eval_accuracy": 0.9200073219842577, "eval_loss": 0.3214400112628937, "eval_runtime": 15.5272, "eval_samples_per_second": 351.833, "eval_steps_per_second": 43.987, "step": 17000 }, { "epoch": 2.67, "learning_rate": 1.558960692871419e-05, "loss": 0.2156, "step": 17500 }, { "epoch": 2.67, "eval_accuracy": 0.9134175361522973, "eval_loss": 0.3078743815422058, "eval_runtime": 15.5189, "eval_samples_per_second": 352.023, "eval_steps_per_second": 44.011, "step": 17500 }, { "epoch": 2.75, "learning_rate": 1.54271136315627e-05, "loss": 0.2036, "step": 18000 }, { "epoch": 2.75, "eval_accuracy": 0.9163463298553908, "eval_loss": 0.2739076316356659, "eval_runtime": 15.5712, "eval_samples_per_second": 350.84, "eval_steps_per_second": 43.863, "step": 18000 }, { "epoch": 2.83, "learning_rate": 1.5264620334411207e-05, "loss": 0.2118, "step": 18500 }, { "epoch": 2.83, "eval_accuracy": 0.918542925132711, "eval_loss": 0.2790161669254303, "eval_runtime": 15.4896, "eval_samples_per_second": 352.687, "eval_steps_per_second": 44.094, "step": 18500 }, { "epoch": 2.9, "learning_rate": 1.5102127037259715e-05, "loss": 0.2167, "step": 19000 }, { "epoch": 2.9, "eval_accuracy": 0.9167124290682775, "eval_loss": 0.2698538899421692, "eval_runtime": 15.51, "eval_samples_per_second": 352.224, "eval_steps_per_second": 44.036, "step": 19000 }, { "epoch": 2.98, "learning_rate": 1.4939633740108221e-05, "loss": 0.2015, "step": 19500 }, { "epoch": 2.98, "eval_accuracy": 0.9189090243455976, "eval_loss": 0.28947147727012634, "eval_runtime": 15.4881, "eval_samples_per_second": 352.721, "eval_steps_per_second": 44.098, "step": 19500 }, { "epoch": 3.05, "learning_rate": 1.477714044295673e-05, "loss": 0.1649, "step": 20000 }, { "epoch": 3.05, "eval_accuracy": 0.9161632802489474, "eval_loss": 0.37189313769340515, "eval_runtime": 15.48, "eval_samples_per_second": 352.907, "eval_steps_per_second": 44.121, "step": 20000 }, { "epoch": 3.13, "learning_rate": 1.4614647145805237e-05, "loss": 0.1505, "step": 20500 }, { "epoch": 3.13, "eval_accuracy": 0.9132344865458539, "eval_loss": 0.3699614107608795, "eval_runtime": 15.4613, "eval_samples_per_second": 353.334, "eval_steps_per_second": 44.175, "step": 20500 }, { "epoch": 3.21, "learning_rate": 1.4452153848653745e-05, "loss": 0.1509, "step": 21000 }, { "epoch": 3.21, "eval_accuracy": 0.9156141314296175, "eval_loss": 0.3720746636390686, "eval_runtime": 15.4811, "eval_samples_per_second": 352.881, "eval_steps_per_second": 44.118, "step": 21000 }, { "epoch": 3.28, "learning_rate": 1.4289660551502251e-05, "loss": 0.1517, "step": 21500 }, { "epoch": 3.28, "eval_accuracy": 0.9154310818231741, "eval_loss": 0.35659804940223694, "eval_runtime": 15.4964, "eval_samples_per_second": 352.533, "eval_steps_per_second": 44.075, "step": 21500 }, { "epoch": 3.36, "learning_rate": 1.412716725435076e-05, "loss": 0.1583, "step": 22000 }, { "epoch": 3.36, "eval_accuracy": 0.9139666849716274, "eval_loss": 0.3975317180156708, "eval_runtime": 15.5028, "eval_samples_per_second": 352.388, "eval_steps_per_second": 44.057, "step": 22000 }, { "epoch": 3.44, "learning_rate": 1.3964673957199267e-05, "loss": 0.1568, "step": 22500 }, { "epoch": 3.44, "eval_accuracy": 0.9136005857587406, "eval_loss": 0.4135216772556305, "eval_runtime": 15.5258, "eval_samples_per_second": 351.866, "eval_steps_per_second": 43.991, "step": 22500 }, { "epoch": 3.51, "learning_rate": 1.3802180660047775e-05, "loss": 0.1642, "step": 23000 }, { "epoch": 3.51, "eval_accuracy": 0.9128683873329673, "eval_loss": 0.37052008509635925, "eval_runtime": 15.557, "eval_samples_per_second": 351.161, "eval_steps_per_second": 43.903, "step": 23000 }, { "epoch": 3.59, "learning_rate": 1.3639687362896281e-05, "loss": 0.1781, "step": 23500 }, { "epoch": 3.59, "eval_accuracy": 0.9156141314296175, "eval_loss": 0.33989137411117554, "eval_runtime": 15.5332, "eval_samples_per_second": 351.699, "eval_steps_per_second": 43.97, "step": 23500 }, { "epoch": 3.67, "learning_rate": 1.347719406574479e-05, "loss": 0.1725, "step": 24000 }, { "epoch": 3.67, "eval_accuracy": 0.9159802306425041, "eval_loss": 0.3164858818054199, "eval_runtime": 15.5466, "eval_samples_per_second": 351.394, "eval_steps_per_second": 43.932, "step": 24000 }, { "epoch": 3.74, "learning_rate": 1.3314700768593297e-05, "loss": 0.1675, "step": 24500 }, { "epoch": 3.74, "eval_accuracy": 0.917993776313381, "eval_loss": 0.3278675973415375, "eval_runtime": 15.4883, "eval_samples_per_second": 352.717, "eval_steps_per_second": 44.098, "step": 24500 }, { "epoch": 3.82, "learning_rate": 1.3152207471441804e-05, "loss": 0.165, "step": 25000 }, { "epoch": 3.82, "eval_accuracy": 0.920190371590701, "eval_loss": 0.34237906336784363, "eval_runtime": 15.4787, "eval_samples_per_second": 352.937, "eval_steps_per_second": 44.125, "step": 25000 }, { "epoch": 3.89, "learning_rate": 1.298971417429031e-05, "loss": 0.1608, "step": 25500 }, { "epoch": 3.89, "eval_accuracy": 0.9137836353651839, "eval_loss": 0.4021676778793335, "eval_runtime": 15.432, "eval_samples_per_second": 354.004, "eval_steps_per_second": 44.259, "step": 25500 }, { "epoch": 3.97, "learning_rate": 1.282722087713882e-05, "loss": 0.1576, "step": 26000 }, { "epoch": 3.97, "eval_accuracy": 0.9146988833974007, "eval_loss": 0.3611091375350952, "eval_runtime": 15.4994, "eval_samples_per_second": 352.465, "eval_steps_per_second": 44.066, "step": 26000 }, { "epoch": 4.05, "learning_rate": 1.2664727579987326e-05, "loss": 0.1382, "step": 26500 }, { "epoch": 4.05, "eval_accuracy": 0.9139666849716274, "eval_loss": 0.400115966796875, "eval_runtime": 15.4319, "eval_samples_per_second": 354.008, "eval_steps_per_second": 44.259, "step": 26500 }, { "epoch": 4.12, "learning_rate": 1.2502234282835834e-05, "loss": 0.1126, "step": 27000 }, { "epoch": 4.12, "eval_accuracy": 0.9168954786747209, "eval_loss": 0.40148910880088806, "eval_runtime": 15.5197, "eval_samples_per_second": 352.005, "eval_steps_per_second": 44.009, "step": 27000 }, { "epoch": 4.2, "learning_rate": 1.233974098568434e-05, "loss": 0.1048, "step": 27500 }, { "epoch": 4.2, "eval_accuracy": 0.9168954786747209, "eval_loss": 0.391897052526474, "eval_runtime": 15.4767, "eval_samples_per_second": 352.982, "eval_steps_per_second": 44.131, "step": 27500 }, { "epoch": 4.28, "learning_rate": 1.217724768853285e-05, "loss": 0.1057, "step": 28000 }, { "epoch": 4.28, "eval_accuracy": 0.9176276771004942, "eval_loss": 0.4071587324142456, "eval_runtime": 15.5017, "eval_samples_per_second": 352.414, "eval_steps_per_second": 44.06, "step": 28000 }, { "epoch": 4.35, "learning_rate": 1.2014754391381356e-05, "loss": 0.1212, "step": 28500 }, { "epoch": 4.35, "eval_accuracy": 0.9161632802489474, "eval_loss": 0.36226457357406616, "eval_runtime": 15.4788, "eval_samples_per_second": 352.935, "eval_steps_per_second": 44.125, "step": 28500 }, { "epoch": 4.43, "learning_rate": 1.1852261094229864e-05, "loss": 0.1152, "step": 29000 }, { "epoch": 4.43, "eval_accuracy": 0.914881933003844, "eval_loss": 0.39458510279655457, "eval_runtime": 15.5093, "eval_samples_per_second": 352.239, "eval_steps_per_second": 44.038, "step": 29000 }, { "epoch": 4.51, "learning_rate": 1.168976779707837e-05, "loss": 0.125, "step": 29500 }, { "epoch": 4.51, "eval_accuracy": 0.9156141314296175, "eval_loss": 0.4142376780509949, "eval_runtime": 15.4899, "eval_samples_per_second": 352.682, "eval_steps_per_second": 44.093, "step": 29500 }, { "epoch": 4.58, "learning_rate": 1.152727449992688e-05, "loss": 0.1195, "step": 30000 }, { "epoch": 4.58, "eval_accuracy": 0.9150649826102873, "eval_loss": 0.40947625041007996, "eval_runtime": 15.5825, "eval_samples_per_second": 350.585, "eval_steps_per_second": 43.831, "step": 30000 }, { "epoch": 4.66, "learning_rate": 1.1364781202775386e-05, "loss": 0.1139, "step": 30500 }, { "epoch": 4.66, "eval_accuracy": 0.9088412959912137, "eval_loss": 0.4585929811000824, "eval_runtime": 15.5177, "eval_samples_per_second": 352.05, "eval_steps_per_second": 44.014, "step": 30500 }, { "epoch": 4.73, "learning_rate": 1.1202287905623894e-05, "loss": 0.1279, "step": 31000 }, { "epoch": 4.73, "eval_accuracy": 0.9203734211971444, "eval_loss": 0.39000362157821655, "eval_runtime": 15.5995, "eval_samples_per_second": 350.204, "eval_steps_per_second": 43.784, "step": 31000 }, { "epoch": 4.81, "learning_rate": 1.10397946084724e-05, "loss": 0.1306, "step": 31500 }, { "epoch": 4.81, "eval_accuracy": 0.9165293794618341, "eval_loss": 0.37414777278900146, "eval_runtime": 15.5095, "eval_samples_per_second": 352.236, "eval_steps_per_second": 44.038, "step": 31500 }, { "epoch": 4.89, "learning_rate": 1.087730131132091e-05, "loss": 0.1091, "step": 32000 }, { "epoch": 4.89, "eval_accuracy": 0.9207395204100312, "eval_loss": 0.4295918941497803, "eval_runtime": 15.4949, "eval_samples_per_second": 352.567, "eval_steps_per_second": 44.079, "step": 32000 }, { "epoch": 4.96, "learning_rate": 1.0714808014169416e-05, "loss": 0.1272, "step": 32500 }, { "epoch": 4.96, "eval_accuracy": 0.9189090243455976, "eval_loss": 0.37242451310157776, "eval_runtime": 15.5005, "eval_samples_per_second": 352.44, "eval_steps_per_second": 44.063, "step": 32500 }, { "epoch": 5.04, "learning_rate": 1.0552314717017924e-05, "loss": 0.0906, "step": 33000 }, { "epoch": 5.04, "eval_accuracy": 0.9181768259198243, "eval_loss": 0.4511684477329254, "eval_runtime": 15.5205, "eval_samples_per_second": 351.985, "eval_steps_per_second": 44.006, "step": 33000 }, { "epoch": 5.12, "learning_rate": 1.038982141986643e-05, "loss": 0.0915, "step": 33500 }, { "epoch": 5.12, "eval_accuracy": 0.9220208676551346, "eval_loss": 0.4160422086715698, "eval_runtime": 15.4765, "eval_samples_per_second": 352.987, "eval_steps_per_second": 44.131, "step": 33500 }, { "epoch": 5.19, "learning_rate": 1.022732812271494e-05, "loss": 0.0773, "step": 34000 }, { "epoch": 5.19, "eval_accuracy": 0.917993776313381, "eval_loss": 0.47428572177886963, "eval_runtime": 15.4784, "eval_samples_per_second": 352.944, "eval_steps_per_second": 44.126, "step": 34000 }, { "epoch": 5.27, "learning_rate": 1.0064834825563446e-05, "loss": 0.0861, "step": 34500 }, { "epoch": 5.27, "eval_accuracy": 0.9203734211971444, "eval_loss": 0.5023528337478638, "eval_runtime": 15.4707, "eval_samples_per_second": 353.119, "eval_steps_per_second": 44.148, "step": 34500 }, { "epoch": 5.35, "learning_rate": 9.902341528411954e-06, "loss": 0.0729, "step": 35000 }, { "epoch": 5.35, "eval_accuracy": 0.9203734211971444, "eval_loss": 0.42824575304985046, "eval_runtime": 15.5357, "eval_samples_per_second": 351.643, "eval_steps_per_second": 43.963, "step": 35000 }, { "epoch": 5.42, "learning_rate": 9.739848231260461e-06, "loss": 0.0901, "step": 35500 }, { "epoch": 5.42, "eval_accuracy": 0.9225700164744646, "eval_loss": 0.46121296286582947, "eval_runtime": 15.4855, "eval_samples_per_second": 352.782, "eval_steps_per_second": 44.106, "step": 35500 }, { "epoch": 5.5, "learning_rate": 9.57735493410897e-06, "loss": 0.0856, "step": 36000 }, { "epoch": 5.5, "eval_accuracy": 0.917993776313381, "eval_loss": 0.44952550530433655, "eval_runtime": 15.5112, "eval_samples_per_second": 352.196, "eval_steps_per_second": 44.033, "step": 36000 }, { "epoch": 5.58, "learning_rate": 9.414861636957477e-06, "loss": 0.0839, "step": 36500 }, { "epoch": 5.58, "eval_accuracy": 0.9205564708035878, "eval_loss": 0.45009082555770874, "eval_runtime": 15.4775, "eval_samples_per_second": 352.963, "eval_steps_per_second": 44.128, "step": 36500 }, { "epoch": 5.65, "learning_rate": 9.252368339805983e-06, "loss": 0.0874, "step": 37000 }, { "epoch": 5.65, "eval_accuracy": 0.9200073219842577, "eval_loss": 0.41364917159080505, "eval_runtime": 15.5201, "eval_samples_per_second": 351.994, "eval_steps_per_second": 44.007, "step": 37000 }, { "epoch": 5.73, "learning_rate": 9.089875042654491e-06, "loss": 0.0944, "step": 37500 }, { "epoch": 5.73, "eval_accuracy": 0.9165293794618341, "eval_loss": 0.46293067932128906, "eval_runtime": 15.474, "eval_samples_per_second": 353.043, "eval_steps_per_second": 44.138, "step": 37500 }, { "epoch": 5.8, "learning_rate": 8.927381745502999e-06, "loss": 0.0874, "step": 38000 }, { "epoch": 5.8, "eval_accuracy": 0.9159802306425041, "eval_loss": 0.4790218770503998, "eval_runtime": 15.4804, "eval_samples_per_second": 352.897, "eval_steps_per_second": 44.12, "step": 38000 }, { "epoch": 5.88, "learning_rate": 8.764888448351507e-06, "loss": 0.0859, "step": 38500 }, { "epoch": 5.88, "eval_accuracy": 0.9132344865458539, "eval_loss": 0.4725111722946167, "eval_runtime": 15.4939, "eval_samples_per_second": 352.59, "eval_steps_per_second": 44.082, "step": 38500 }, { "epoch": 5.96, "learning_rate": 8.602395151200013e-06, "loss": 0.0808, "step": 39000 }, { "epoch": 5.96, "eval_accuracy": 0.9161632802489474, "eval_loss": 0.4613119959831238, "eval_runtime": 15.5324, "eval_samples_per_second": 351.717, "eval_steps_per_second": 43.973, "step": 39000 }, { "epoch": 6.03, "learning_rate": 8.439901854048521e-06, "loss": 0.0723, "step": 39500 }, { "epoch": 6.03, "eval_accuracy": 0.9194581731649277, "eval_loss": 0.4815793037414551, "eval_runtime": 15.4962, "eval_samples_per_second": 352.537, "eval_steps_per_second": 44.075, "step": 39500 }, { "epoch": 6.11, "learning_rate": 8.277408556897029e-06, "loss": 0.0568, "step": 40000 }, { "epoch": 6.11, "eval_accuracy": 0.9187259747391543, "eval_loss": 0.5257057547569275, "eval_runtime": 15.5592, "eval_samples_per_second": 351.112, "eval_steps_per_second": 43.897, "step": 40000 }, { "epoch": 6.19, "learning_rate": 8.114915259745537e-06, "loss": 0.0628, "step": 40500 }, { "epoch": 6.19, "eval_accuracy": 0.9194581731649277, "eval_loss": 0.4516303837299347, "eval_runtime": 15.5547, "eval_samples_per_second": 351.213, "eval_steps_per_second": 43.91, "step": 40500 }, { "epoch": 6.26, "learning_rate": 7.952421962594043e-06, "loss": 0.053, "step": 41000 }, { "epoch": 6.26, "eval_accuracy": 0.9187259747391543, "eval_loss": 0.4928816258907318, "eval_runtime": 15.5194, "eval_samples_per_second": 352.012, "eval_steps_per_second": 44.01, "step": 41000 }, { "epoch": 6.34, "learning_rate": 7.789928665442551e-06, "loss": 0.0574, "step": 41500 }, { "epoch": 6.34, "eval_accuracy": 0.919092073952041, "eval_loss": 0.48882895708084106, "eval_runtime": 15.4527, "eval_samples_per_second": 353.531, "eval_steps_per_second": 44.2, "step": 41500 }, { "epoch": 6.42, "learning_rate": 7.627435368291059e-06, "loss": 0.0717, "step": 42000 }, { "epoch": 6.42, "eval_accuracy": 0.9165293794618341, "eval_loss": 0.4769105911254883, "eval_runtime": 15.4715, "eval_samples_per_second": 353.101, "eval_steps_per_second": 44.146, "step": 42000 }, { "epoch": 6.49, "learning_rate": 7.464942071139566e-06, "loss": 0.0622, "step": 42500 }, { "epoch": 6.49, "eval_accuracy": 0.9183598755262676, "eval_loss": 0.5081596374511719, "eval_runtime": 15.4674, "eval_samples_per_second": 353.193, "eval_steps_per_second": 44.157, "step": 42500 }, { "epoch": 6.57, "learning_rate": 7.302448773988074e-06, "loss": 0.0593, "step": 43000 }, { "epoch": 6.57, "eval_accuracy": 0.9211056196229178, "eval_loss": 0.44598302245140076, "eval_runtime": 15.4988, "eval_samples_per_second": 352.48, "eval_steps_per_second": 44.068, "step": 43000 }, { "epoch": 6.64, "learning_rate": 7.139955476836581e-06, "loss": 0.0603, "step": 43500 }, { "epoch": 6.64, "eval_accuracy": 0.9205564708035878, "eval_loss": 0.43451356887817383, "eval_runtime": 15.4424, "eval_samples_per_second": 353.766, "eval_steps_per_second": 44.229, "step": 43500 }, { "epoch": 6.72, "learning_rate": 6.9774621796850885e-06, "loss": 0.0659, "step": 44000 }, { "epoch": 6.72, "eval_accuracy": 0.9189090243455976, "eval_loss": 0.4423438310623169, "eval_runtime": 15.4892, "eval_samples_per_second": 352.697, "eval_steps_per_second": 44.095, "step": 44000 }, { "epoch": 6.8, "learning_rate": 6.8149688825335956e-06, "loss": 0.0629, "step": 44500 }, { "epoch": 6.8, "eval_accuracy": 0.919092073952041, "eval_loss": 0.47710955142974854, "eval_runtime": 15.4296, "eval_samples_per_second": 354.061, "eval_steps_per_second": 44.266, "step": 44500 }, { "epoch": 6.87, "learning_rate": 6.6524755853821034e-06, "loss": 0.058, "step": 45000 }, { "epoch": 6.87, "eval_accuracy": 0.9227530660809079, "eval_loss": 0.4589206576347351, "eval_runtime": 15.5247, "eval_samples_per_second": 351.892, "eval_steps_per_second": 43.995, "step": 45000 }, { "epoch": 6.95, "learning_rate": 6.4899822882306105e-06, "loss": 0.0545, "step": 45500 }, { "epoch": 6.95, "eval_accuracy": 0.9200073219842577, "eval_loss": 0.508368730545044, "eval_runtime": 15.453, "eval_samples_per_second": 353.524, "eval_steps_per_second": 44.199, "step": 45500 }, { "epoch": 7.03, "learning_rate": 6.327488991079118e-06, "loss": 0.0465, "step": 46000 }, { "epoch": 7.03, "eval_accuracy": 0.9192751235584844, "eval_loss": 0.5422417521476746, "eval_runtime": 15.5091, "eval_samples_per_second": 352.244, "eval_steps_per_second": 44.039, "step": 46000 }, { "epoch": 7.1, "learning_rate": 6.164995693927625e-06, "loss": 0.0424, "step": 46500 }, { "epoch": 7.1, "eval_accuracy": 0.920190371590701, "eval_loss": 0.5029627680778503, "eval_runtime": 15.5152, "eval_samples_per_second": 352.107, "eval_steps_per_second": 44.021, "step": 46500 }, { "epoch": 7.18, "learning_rate": 6.002502396776133e-06, "loss": 0.0317, "step": 47000 }, { "epoch": 7.18, "eval_accuracy": 0.9212886692293611, "eval_loss": 0.5393094420433044, "eval_runtime": 15.5001, "eval_samples_per_second": 352.449, "eval_steps_per_second": 44.064, "step": 47000 }, { "epoch": 7.26, "learning_rate": 5.84000909962464e-06, "loss": 0.029, "step": 47500 }, { "epoch": 7.26, "eval_accuracy": 0.9174446274940509, "eval_loss": 0.5618457794189453, "eval_runtime": 15.5012, "eval_samples_per_second": 352.424, "eval_steps_per_second": 44.061, "step": 47500 }, { "epoch": 7.33, "learning_rate": 5.677515802473148e-06, "loss": 0.0439, "step": 48000 }, { "epoch": 7.33, "eval_accuracy": 0.9194581731649277, "eval_loss": 0.5000433325767517, "eval_runtime": 15.5179, "eval_samples_per_second": 352.046, "eval_steps_per_second": 44.014, "step": 48000 }, { "epoch": 7.41, "learning_rate": 5.515022505321655e-06, "loss": 0.0347, "step": 48500 }, { "epoch": 7.41, "eval_accuracy": 0.9200073219842577, "eval_loss": 0.5093263387680054, "eval_runtime": 15.4926, "eval_samples_per_second": 352.62, "eval_steps_per_second": 44.086, "step": 48500 }, { "epoch": 7.48, "learning_rate": 5.352529208170163e-06, "loss": 0.0425, "step": 49000 }, { "epoch": 7.48, "eval_accuracy": 0.9174446274940509, "eval_loss": 0.5311388373374939, "eval_runtime": 15.4852, "eval_samples_per_second": 352.789, "eval_steps_per_second": 44.107, "step": 49000 }, { "epoch": 7.56, "learning_rate": 5.19003591101867e-06, "loss": 0.0384, "step": 49500 }, { "epoch": 7.56, "eval_accuracy": 0.9198242723778144, "eval_loss": 0.501017689704895, "eval_runtime": 15.5002, "eval_samples_per_second": 352.448, "eval_steps_per_second": 44.064, "step": 49500 }, { "epoch": 7.64, "learning_rate": 5.027542613867178e-06, "loss": 0.039, "step": 50000 }, { "epoch": 7.64, "eval_accuracy": 0.9209225700164745, "eval_loss": 0.5181994438171387, "eval_runtime": 15.4866, "eval_samples_per_second": 352.755, "eval_steps_per_second": 44.103, "step": 50000 }, { "epoch": 7.71, "learning_rate": 4.865049316715686e-06, "loss": 0.04, "step": 50500 }, { "epoch": 7.71, "eval_accuracy": 0.9214717188358045, "eval_loss": 0.5238317847251892, "eval_runtime": 15.4962, "eval_samples_per_second": 352.538, "eval_steps_per_second": 44.075, "step": 50500 }, { "epoch": 7.79, "learning_rate": 4.702556019564194e-06, "loss": 0.0374, "step": 51000 }, { "epoch": 7.79, "eval_accuracy": 0.9218378180486912, "eval_loss": 0.5560519695281982, "eval_runtime": 15.4921, "eval_samples_per_second": 352.63, "eval_steps_per_second": 44.087, "step": 51000 }, { "epoch": 7.87, "learning_rate": 4.540062722412701e-06, "loss": 0.0366, "step": 51500 }, { "epoch": 7.87, "eval_accuracy": 0.9200073219842577, "eval_loss": 0.5412092208862305, "eval_runtime": 15.4629, "eval_samples_per_second": 353.297, "eval_steps_per_second": 44.17, "step": 51500 }, { "epoch": 7.94, "learning_rate": 4.377569425261209e-06, "loss": 0.036, "step": 52000 }, { "epoch": 7.94, "eval_accuracy": 0.9212886692293611, "eval_loss": 0.5213342905044556, "eval_runtime": 15.4852, "eval_samples_per_second": 352.789, "eval_steps_per_second": 44.107, "step": 52000 }, { "epoch": 8.02, "learning_rate": 4.215076128109716e-06, "loss": 0.0348, "step": 52500 }, { "epoch": 8.02, "eval_accuracy": 0.9216547684422478, "eval_loss": 0.51399165391922, "eval_runtime": 15.5008, "eval_samples_per_second": 352.434, "eval_steps_per_second": 44.062, "step": 52500 }, { "epoch": 8.1, "learning_rate": 4.0525828309582235e-06, "loss": 0.0186, "step": 53000 }, { "epoch": 8.1, "eval_accuracy": 0.9240344133260113, "eval_loss": 0.5692733526229858, "eval_runtime": 15.5019, "eval_samples_per_second": 352.408, "eval_steps_per_second": 44.059, "step": 53000 }, { "epoch": 8.17, "learning_rate": 3.8900895338067306e-06, "loss": 0.0275, "step": 53500 }, { "epoch": 8.17, "eval_accuracy": 0.923851363719568, "eval_loss": 0.5007224082946777, "eval_runtime": 15.4606, "eval_samples_per_second": 353.349, "eval_steps_per_second": 44.177, "step": 53500 }, { "epoch": 8.25, "learning_rate": 3.727596236655238e-06, "loss": 0.0219, "step": 54000 }, { "epoch": 8.25, "eval_accuracy": 0.9240344133260113, "eval_loss": 0.5399835705757141, "eval_runtime": 15.5486, "eval_samples_per_second": 351.35, "eval_steps_per_second": 43.927, "step": 54000 }, { "epoch": 8.32, "learning_rate": 3.5651029395037455e-06, "loss": 0.0238, "step": 54500 }, { "epoch": 8.32, "eval_accuracy": 0.9227530660809079, "eval_loss": 0.5536710023880005, "eval_runtime": 15.4711, "eval_samples_per_second": 353.109, "eval_steps_per_second": 44.147, "step": 54500 }, { "epoch": 8.4, "learning_rate": 3.402609642352253e-06, "loss": 0.0201, "step": 55000 }, { "epoch": 8.4, "eval_accuracy": 0.9214717188358045, "eval_loss": 0.5851342678070068, "eval_runtime": 15.5145, "eval_samples_per_second": 352.122, "eval_steps_per_second": 44.023, "step": 55000 }, { "epoch": 8.48, "learning_rate": 3.2401163452007604e-06, "loss": 0.0253, "step": 55500 }, { "epoch": 8.48, "eval_accuracy": 0.9216547684422478, "eval_loss": 0.5654112696647644, "eval_runtime": 15.537, "eval_samples_per_second": 351.612, "eval_steps_per_second": 43.96, "step": 55500 }, { "epoch": 8.55, "learning_rate": 3.0776230480492682e-06, "loss": 0.0243, "step": 56000 }, { "epoch": 8.55, "eval_accuracy": 0.9212886692293611, "eval_loss": 0.5833209156990051, "eval_runtime": 15.4958, "eval_samples_per_second": 352.547, "eval_steps_per_second": 44.076, "step": 56000 }, { "epoch": 8.63, "learning_rate": 2.9151297508977757e-06, "loss": 0.0298, "step": 56500 }, { "epoch": 8.63, "eval_accuracy": 0.9209225700164745, "eval_loss": 0.5482963919639587, "eval_runtime": 15.4637, "eval_samples_per_second": 353.28, "eval_steps_per_second": 44.168, "step": 56500 }, { "epoch": 8.71, "learning_rate": 2.752636453746283e-06, "loss": 0.0232, "step": 57000 }, { "epoch": 8.71, "eval_accuracy": 0.9214717188358045, "eval_loss": 0.5724154114723206, "eval_runtime": 15.5462, "eval_samples_per_second": 351.403, "eval_steps_per_second": 43.933, "step": 57000 }, { "epoch": 8.78, "learning_rate": 2.5901431565947906e-06, "loss": 0.0239, "step": 57500 }, { "epoch": 8.78, "eval_accuracy": 0.9194581731649277, "eval_loss": 0.5574042201042175, "eval_runtime": 15.4883, "eval_samples_per_second": 352.717, "eval_steps_per_second": 44.098, "step": 57500 }, { "epoch": 8.86, "learning_rate": 2.427649859443298e-06, "loss": 0.0263, "step": 58000 }, { "epoch": 8.86, "eval_accuracy": 0.9234852645066813, "eval_loss": 0.5491210222244263, "eval_runtime": 15.5424, "eval_samples_per_second": 351.489, "eval_steps_per_second": 43.944, "step": 58000 }, { "epoch": 8.94, "learning_rate": 2.2651565622918055e-06, "loss": 0.0333, "step": 58500 }, { "epoch": 8.94, "eval_accuracy": 0.9209225700164745, "eval_loss": 0.5322240591049194, "eval_runtime": 15.4522, "eval_samples_per_second": 353.541, "eval_steps_per_second": 44.201, "step": 58500 }, { "epoch": 9.01, "learning_rate": 2.102663265140313e-06, "loss": 0.0259, "step": 59000 }, { "epoch": 9.01, "eval_accuracy": 0.9216547684422478, "eval_loss": 0.5492663979530334, "eval_runtime": 15.4874, "eval_samples_per_second": 352.738, "eval_steps_per_second": 44.1, "step": 59000 }, { "epoch": 9.09, "learning_rate": 1.9401699679888204e-06, "loss": 0.0197, "step": 59500 }, { "epoch": 9.09, "eval_accuracy": 0.9209225700164745, "eval_loss": 0.5670880675315857, "eval_runtime": 15.4679, "eval_samples_per_second": 353.183, "eval_steps_per_second": 44.156, "step": 59500 }, { "epoch": 9.16, "learning_rate": 1.777676670837328e-06, "loss": 0.0237, "step": 60000 }, { "epoch": 9.16, "eval_accuracy": 0.9209225700164745, "eval_loss": 0.5536203980445862, "eval_runtime": 15.492, "eval_samples_per_second": 352.633, "eval_steps_per_second": 44.087, "step": 60000 }, { "epoch": 9.24, "learning_rate": 1.6151833736858355e-06, "loss": 0.022, "step": 60500 }, { "epoch": 9.24, "eval_accuracy": 0.9216547684422478, "eval_loss": 0.5523190498352051, "eval_runtime": 15.499, "eval_samples_per_second": 352.475, "eval_steps_per_second": 44.067, "step": 60500 }, { "epoch": 9.32, "learning_rate": 1.452690076534343e-06, "loss": 0.0246, "step": 61000 }, { "epoch": 9.32, "eval_accuracy": 0.9220208676551346, "eval_loss": 0.5619478821754456, "eval_runtime": 15.4813, "eval_samples_per_second": 352.878, "eval_steps_per_second": 44.118, "step": 61000 }, { "epoch": 9.39, "learning_rate": 1.2901967793828504e-06, "loss": 0.0202, "step": 61500 }, { "epoch": 9.39, "eval_accuracy": 0.9227530660809079, "eval_loss": 0.5619451999664307, "eval_runtime": 15.4982, "eval_samples_per_second": 352.492, "eval_steps_per_second": 44.07, "step": 61500 }, { "epoch": 9.47, "learning_rate": 1.127703482231358e-06, "loss": 0.0184, "step": 62000 }, { "epoch": 9.47, "eval_accuracy": 0.9216547684422478, "eval_loss": 0.5728563070297241, "eval_runtime": 15.4841, "eval_samples_per_second": 352.814, "eval_steps_per_second": 44.11, "step": 62000 }, { "epoch": 9.55, "learning_rate": 9.652101850798656e-07, "loss": 0.0122, "step": 62500 }, { "epoch": 9.55, "eval_accuracy": 0.920190371590701, "eval_loss": 0.594571053981781, "eval_runtime": 15.5059, "eval_samples_per_second": 352.317, "eval_steps_per_second": 44.048, "step": 62500 }, { "epoch": 9.62, "learning_rate": 8.02716887928373e-07, "loss": 0.015, "step": 63000 }, { "epoch": 9.62, "eval_accuracy": 0.9214717188358045, "eval_loss": 0.601406455039978, "eval_runtime": 15.5587, "eval_samples_per_second": 351.121, "eval_steps_per_second": 43.898, "step": 63000 }, { "epoch": 9.7, "learning_rate": 6.402235907768806e-07, "loss": 0.0189, "step": 63500 }, { "epoch": 9.7, "eval_accuracy": 0.9225700164744646, "eval_loss": 0.5927833318710327, "eval_runtime": 15.5411, "eval_samples_per_second": 351.518, "eval_steps_per_second": 43.948, "step": 63500 }, { "epoch": 9.78, "learning_rate": 4.777302936253879e-07, "loss": 0.0194, "step": 64000 }, { "epoch": 9.78, "eval_accuracy": 0.9220208676551346, "eval_loss": 0.5897760987281799, "eval_runtime": 15.5041, "eval_samples_per_second": 352.359, "eval_steps_per_second": 44.053, "step": 64000 }, { "epoch": 9.85, "learning_rate": 3.1523699647389547e-07, "loss": 0.0219, "step": 64500 }, { "epoch": 9.85, "eval_accuracy": 0.9218378180486912, "eval_loss": 0.5851386189460754, "eval_runtime": 15.4835, "eval_samples_per_second": 352.826, "eval_steps_per_second": 44.111, "step": 64500 }, { "epoch": 9.93, "learning_rate": 1.5274369932240295e-07, "loss": 0.017, "step": 65000 }, { "epoch": 9.93, "eval_accuracy": 0.9218378180486912, "eval_loss": 0.5890805125236511, "eval_runtime": 15.4838, "eval_samples_per_second": 352.819, "eval_steps_per_second": 44.111, "step": 65000 }, { "epoch": 10.0, "step": 65470, "total_flos": 6.88976031789312e+16, "train_loss": 0.1338691882801653, "train_runtime": 14647.724, "train_samples_per_second": 71.508, "train_steps_per_second": 4.47 } ], "max_steps": 65470, "num_train_epochs": 10, "total_flos": 6.88976031789312e+16, "trial_name": null, "trial_params": null }