{ "best_metric": 0.19522710144519806, "best_model_checkpoint": "./results_train/roberta-base/sst2/checkpoint-3500", "epoch": 10.0, "global_step": 42100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 3.9588281868566905e-06, "loss": 0.575, "step": 500 }, { "epoch": 0.12, "eval_accuracy": 0.9071100917431193, "eval_loss": 0.2664913535118103, "eval_runtime": 2.4433, "eval_samples_per_second": 356.901, "eval_steps_per_second": 44.613, "step": 500 }, { "epoch": 0.24, "learning_rate": 7.917656373713381e-06, "loss": 0.2989, "step": 1000 }, { "epoch": 0.24, "eval_accuracy": 0.9220183486238532, "eval_loss": 0.20883557200431824, "eval_runtime": 2.4454, "eval_samples_per_second": 356.584, "eval_steps_per_second": 44.573, "step": 1000 }, { "epoch": 0.36, "learning_rate": 1.1876484560570072e-05, "loss": 0.2725, "step": 1500 }, { "epoch": 0.36, "eval_accuracy": 0.9243119266055045, "eval_loss": 0.25596883893013, "eval_runtime": 2.451, "eval_samples_per_second": 355.775, "eval_steps_per_second": 44.472, "step": 1500 }, { "epoch": 0.48, "learning_rate": 1.5835312747426762e-05, "loss": 0.2814, "step": 2000 }, { "epoch": 0.48, "eval_accuracy": 0.926605504587156, "eval_loss": 0.20158442854881287, "eval_runtime": 2.462, "eval_samples_per_second": 354.188, "eval_steps_per_second": 44.274, "step": 2000 }, { "epoch": 0.59, "learning_rate": 1.9794140934283453e-05, "loss": 0.2586, "step": 2500 }, { "epoch": 0.59, "eval_accuracy": 0.9174311926605505, "eval_loss": 0.22930225729942322, "eval_runtime": 2.4517, "eval_samples_per_second": 355.671, "eval_steps_per_second": 44.459, "step": 2500 }, { "epoch": 0.71, "learning_rate": 1.9760448779501697e-05, "loss": 0.2536, "step": 3000 }, { "epoch": 0.71, "eval_accuracy": 0.9323394495412844, "eval_loss": 0.23396578431129456, "eval_runtime": 2.4584, "eval_samples_per_second": 354.697, "eval_steps_per_second": 44.337, "step": 3000 }, { "epoch": 0.83, "learning_rate": 1.95077576186385e-05, "loss": 0.2494, "step": 3500 }, { "epoch": 0.83, "eval_accuracy": 0.9323394495412844, "eval_loss": 0.19522710144519806, "eval_runtime": 2.4521, "eval_samples_per_second": 355.616, "eval_steps_per_second": 44.452, "step": 3500 }, { "epoch": 0.95, "learning_rate": 1.925506645777531e-05, "loss": 0.2396, "step": 4000 }, { "epoch": 0.95, "eval_accuracy": 0.9323394495412844, "eval_loss": 0.24936608970165253, "eval_runtime": 2.4569, "eval_samples_per_second": 354.916, "eval_steps_per_second": 44.365, "step": 4000 }, { "epoch": 1.07, "learning_rate": 1.9002375296912114e-05, "loss": 0.2123, "step": 4500 }, { "epoch": 1.07, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.21870844066143036, "eval_runtime": 2.449, "eval_samples_per_second": 356.068, "eval_steps_per_second": 44.509, "step": 4500 }, { "epoch": 1.19, "learning_rate": 1.874968413604892e-05, "loss": 0.2042, "step": 5000 }, { "epoch": 1.19, "eval_accuracy": 0.9151376146788991, "eval_loss": 0.2811821401119232, "eval_runtime": 2.4602, "eval_samples_per_second": 354.439, "eval_steps_per_second": 44.305, "step": 5000 }, { "epoch": 1.31, "learning_rate": 1.849699297518573e-05, "loss": 0.2083, "step": 5500 }, { "epoch": 1.31, "eval_accuracy": 0.9346330275229358, "eval_loss": 0.27386215329170227, "eval_runtime": 2.5255, "eval_samples_per_second": 345.272, "eval_steps_per_second": 43.159, "step": 5500 }, { "epoch": 1.43, "learning_rate": 1.8244301814322537e-05, "loss": 0.2041, "step": 6000 }, { "epoch": 1.43, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.20871196687221527, "eval_runtime": 2.4547, "eval_samples_per_second": 355.241, "eval_steps_per_second": 44.405, "step": 6000 }, { "epoch": 1.54, "learning_rate": 1.7991610653459345e-05, "loss": 0.1969, "step": 6500 }, { "epoch": 1.54, "eval_accuracy": 0.9254587155963303, "eval_loss": 0.25904807448387146, "eval_runtime": 2.4532, "eval_samples_per_second": 355.448, "eval_steps_per_second": 44.431, "step": 6500 }, { "epoch": 1.66, "learning_rate": 1.773891949259615e-05, "loss": 0.1982, "step": 7000 }, { "epoch": 1.66, "eval_accuracy": 0.930045871559633, "eval_loss": 0.2444588840007782, "eval_runtime": 2.4545, "eval_samples_per_second": 355.268, "eval_steps_per_second": 44.409, "step": 7000 }, { "epoch": 1.78, "learning_rate": 1.7486228331732958e-05, "loss": 0.1943, "step": 7500 }, { "epoch": 1.78, "eval_accuracy": 0.926605504587156, "eval_loss": 0.2798321545124054, "eval_runtime": 2.4455, "eval_samples_per_second": 356.567, "eval_steps_per_second": 44.571, "step": 7500 }, { "epoch": 1.9, "learning_rate": 1.7233537170869766e-05, "loss": 0.1848, "step": 8000 }, { "epoch": 1.9, "eval_accuracy": 0.9311926605504587, "eval_loss": 0.2844010591506958, "eval_runtime": 2.4586, "eval_samples_per_second": 354.679, "eval_steps_per_second": 44.335, "step": 8000 }, { "epoch": 2.02, "learning_rate": 1.698084601000657e-05, "loss": 0.1788, "step": 8500 }, { "epoch": 2.02, "eval_accuracy": 0.9254587155963303, "eval_loss": 0.2998378872871399, "eval_runtime": 2.446, "eval_samples_per_second": 356.496, "eval_steps_per_second": 44.562, "step": 8500 }, { "epoch": 2.14, "learning_rate": 1.672815484914338e-05, "loss": 0.1623, "step": 9000 }, { "epoch": 2.14, "eval_accuracy": 0.9392201834862385, "eval_loss": 0.2695905268192291, "eval_runtime": 2.4607, "eval_samples_per_second": 354.365, "eval_steps_per_second": 44.296, "step": 9000 }, { "epoch": 2.26, "learning_rate": 1.6475463688280183e-05, "loss": 0.1499, "step": 9500 }, { "epoch": 2.26, "eval_accuracy": 0.9277522935779816, "eval_loss": 0.25331878662109375, "eval_runtime": 2.4449, "eval_samples_per_second": 356.659, "eval_steps_per_second": 44.582, "step": 9500 }, { "epoch": 2.38, "learning_rate": 1.622277252741699e-05, "loss": 0.1426, "step": 10000 }, { "epoch": 2.38, "eval_accuracy": 0.930045871559633, "eval_loss": 0.29705262184143066, "eval_runtime": 2.4651, "eval_samples_per_second": 353.733, "eval_steps_per_second": 44.217, "step": 10000 }, { "epoch": 2.49, "learning_rate": 1.59700813665538e-05, "loss": 0.1479, "step": 10500 }, { "epoch": 2.49, "eval_accuracy": 0.9357798165137615, "eval_loss": 0.25958266854286194, "eval_runtime": 2.4502, "eval_samples_per_second": 355.883, "eval_steps_per_second": 44.485, "step": 10500 }, { "epoch": 2.61, "learning_rate": 1.5717390205690607e-05, "loss": 0.1405, "step": 11000 }, { "epoch": 2.61, "eval_accuracy": 0.9254587155963303, "eval_loss": 0.2944609522819519, "eval_runtime": 2.4554, "eval_samples_per_second": 355.141, "eval_steps_per_second": 44.393, "step": 11000 }, { "epoch": 2.73, "learning_rate": 1.5464699044827415e-05, "loss": 0.1577, "step": 11500 }, { "epoch": 2.73, "eval_accuracy": 0.9002293577981652, "eval_loss": 0.40612396597862244, "eval_runtime": 2.4539, "eval_samples_per_second": 355.36, "eval_steps_per_second": 44.42, "step": 11500 }, { "epoch": 2.85, "learning_rate": 1.521200788396422e-05, "loss": 0.1521, "step": 12000 }, { "epoch": 2.85, "eval_accuracy": 0.9334862385321101, "eval_loss": 0.2724354565143585, "eval_runtime": 2.4461, "eval_samples_per_second": 356.483, "eval_steps_per_second": 44.56, "step": 12000 }, { "epoch": 2.97, "learning_rate": 1.4959316723101027e-05, "loss": 0.1426, "step": 12500 }, { "epoch": 2.97, "eval_accuracy": 0.9426605504587156, "eval_loss": 0.27123740315437317, "eval_runtime": 2.4449, "eval_samples_per_second": 356.655, "eval_steps_per_second": 44.582, "step": 12500 }, { "epoch": 3.09, "learning_rate": 1.4706625562237835e-05, "loss": 0.1206, "step": 13000 }, { "epoch": 3.09, "eval_accuracy": 0.9357798165137615, "eval_loss": 0.2954227328300476, "eval_runtime": 2.467, "eval_samples_per_second": 353.464, "eval_steps_per_second": 44.183, "step": 13000 }, { "epoch": 3.21, "learning_rate": 1.4453934401374641e-05, "loss": 0.1074, "step": 13500 }, { "epoch": 3.21, "eval_accuracy": 0.9392201834862385, "eval_loss": 0.2653304934501648, "eval_runtime": 2.4486, "eval_samples_per_second": 356.118, "eval_steps_per_second": 44.515, "step": 13500 }, { "epoch": 3.33, "learning_rate": 1.420124324051145e-05, "loss": 0.112, "step": 14000 }, { "epoch": 3.33, "eval_accuracy": 0.9346330275229358, "eval_loss": 0.2777578830718994, "eval_runtime": 2.4566, "eval_samples_per_second": 354.969, "eval_steps_per_second": 44.371, "step": 14000 }, { "epoch": 3.44, "learning_rate": 1.3948552079648254e-05, "loss": 0.1147, "step": 14500 }, { "epoch": 3.44, "eval_accuracy": 0.9311926605504587, "eval_loss": 0.3704558312892914, "eval_runtime": 2.4454, "eval_samples_per_second": 356.589, "eval_steps_per_second": 44.574, "step": 14500 }, { "epoch": 3.56, "learning_rate": 1.3695860918785062e-05, "loss": 0.1196, "step": 15000 }, { "epoch": 3.56, "eval_accuracy": 0.9346330275229358, "eval_loss": 0.2889645993709564, "eval_runtime": 2.4563, "eval_samples_per_second": 354.999, "eval_steps_per_second": 44.375, "step": 15000 }, { "epoch": 3.68, "learning_rate": 1.344316975792187e-05, "loss": 0.1159, "step": 15500 }, { "epoch": 3.68, "eval_accuracy": 0.926605504587156, "eval_loss": 0.3448694944381714, "eval_runtime": 2.4429, "eval_samples_per_second": 356.949, "eval_steps_per_second": 44.619, "step": 15500 }, { "epoch": 3.8, "learning_rate": 1.3190478597058676e-05, "loss": 0.119, "step": 16000 }, { "epoch": 3.8, "eval_accuracy": 0.9334862385321101, "eval_loss": 0.3207152187824249, "eval_runtime": 2.461, "eval_samples_per_second": 354.323, "eval_steps_per_second": 44.29, "step": 16000 }, { "epoch": 3.92, "learning_rate": 1.2937787436195484e-05, "loss": 0.1268, "step": 16500 }, { "epoch": 3.92, "eval_accuracy": 0.9311926605504587, "eval_loss": 0.3234628736972809, "eval_runtime": 2.4504, "eval_samples_per_second": 355.858, "eval_steps_per_second": 44.482, "step": 16500 }, { "epoch": 4.04, "learning_rate": 1.2685096275332289e-05, "loss": 0.1074, "step": 17000 }, { "epoch": 4.04, "eval_accuracy": 0.9334862385321101, "eval_loss": 0.3650290369987488, "eval_runtime": 2.456, "eval_samples_per_second": 355.052, "eval_steps_per_second": 44.382, "step": 17000 }, { "epoch": 4.16, "learning_rate": 1.2432405114469096e-05, "loss": 0.0805, "step": 17500 }, { "epoch": 4.16, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.33378419280052185, "eval_runtime": 2.4457, "eval_samples_per_second": 356.538, "eval_steps_per_second": 44.567, "step": 17500 }, { "epoch": 4.28, "learning_rate": 1.2179713953605903e-05, "loss": 0.0838, "step": 18000 }, { "epoch": 4.28, "eval_accuracy": 0.9208715596330275, "eval_loss": 0.4302394688129425, "eval_runtime": 2.4587, "eval_samples_per_second": 354.661, "eval_steps_per_second": 44.333, "step": 18000 }, { "epoch": 4.39, "learning_rate": 1.192702279274271e-05, "loss": 0.0848, "step": 18500 }, { "epoch": 4.39, "eval_accuracy": 0.9323394495412844, "eval_loss": 0.40956971049308777, "eval_runtime": 2.4483, "eval_samples_per_second": 356.162, "eval_steps_per_second": 44.52, "step": 18500 }, { "epoch": 4.51, "learning_rate": 1.1674331631879519e-05, "loss": 0.0922, "step": 19000 }, { "epoch": 4.51, "eval_accuracy": 0.9369266055045872, "eval_loss": 0.3332035541534424, "eval_runtime": 2.4597, "eval_samples_per_second": 354.511, "eval_steps_per_second": 44.314, "step": 19000 }, { "epoch": 4.63, "learning_rate": 1.1421640471016325e-05, "loss": 0.091, "step": 19500 }, { "epoch": 4.63, "eval_accuracy": 0.9438073394495413, "eval_loss": 0.3024330735206604, "eval_runtime": 2.4457, "eval_samples_per_second": 356.542, "eval_steps_per_second": 44.568, "step": 19500 }, { "epoch": 4.75, "learning_rate": 1.1168949310153133e-05, "loss": 0.0977, "step": 20000 }, { "epoch": 4.75, "eval_accuracy": 0.9495412844036697, "eval_loss": 0.2673788070678711, "eval_runtime": 2.4587, "eval_samples_per_second": 354.654, "eval_steps_per_second": 44.332, "step": 20000 }, { "epoch": 4.87, "learning_rate": 1.0916258149289937e-05, "loss": 0.0897, "step": 20500 }, { "epoch": 4.87, "eval_accuracy": 0.930045871559633, "eval_loss": 0.39930590987205505, "eval_runtime": 2.4473, "eval_samples_per_second": 356.313, "eval_steps_per_second": 44.539, "step": 20500 }, { "epoch": 4.99, "learning_rate": 1.0663566988426745e-05, "loss": 0.1013, "step": 21000 }, { "epoch": 4.99, "eval_accuracy": 0.9288990825688074, "eval_loss": 0.322666198015213, "eval_runtime": 2.4496, "eval_samples_per_second": 355.981, "eval_steps_per_second": 44.498, "step": 21000 }, { "epoch": 5.11, "learning_rate": 1.0410875827563553e-05, "loss": 0.0671, "step": 21500 }, { "epoch": 5.11, "eval_accuracy": 0.9426605504587156, "eval_loss": 0.3374435603618622, "eval_runtime": 2.4457, "eval_samples_per_second": 356.54, "eval_steps_per_second": 44.567, "step": 21500 }, { "epoch": 5.23, "learning_rate": 1.015818466670036e-05, "loss": 0.0671, "step": 22000 }, { "epoch": 5.23, "eval_accuracy": 0.9277522935779816, "eval_loss": 0.4108366072177887, "eval_runtime": 2.4551, "eval_samples_per_second": 355.179, "eval_steps_per_second": 44.397, "step": 22000 }, { "epoch": 5.34, "learning_rate": 9.905493505837167e-06, "loss": 0.0652, "step": 22500 }, { "epoch": 5.34, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.3549734652042389, "eval_runtime": 2.4475, "eval_samples_per_second": 356.289, "eval_steps_per_second": 44.536, "step": 22500 }, { "epoch": 5.46, "learning_rate": 9.652802344973974e-06, "loss": 0.0664, "step": 23000 }, { "epoch": 5.46, "eval_accuracy": 0.9357798165137615, "eval_loss": 0.339821994304657, "eval_runtime": 2.4559, "eval_samples_per_second": 355.062, "eval_steps_per_second": 44.383, "step": 23000 }, { "epoch": 5.58, "learning_rate": 9.40011118411078e-06, "loss": 0.0742, "step": 23500 }, { "epoch": 5.58, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.3286002278327942, "eval_runtime": 2.4471, "eval_samples_per_second": 356.342, "eval_steps_per_second": 44.543, "step": 23500 }, { "epoch": 5.7, "learning_rate": 9.147420023247588e-06, "loss": 0.0758, "step": 24000 }, { "epoch": 5.7, "eval_accuracy": 0.9311926605504587, "eval_loss": 0.32764118909835815, "eval_runtime": 2.4639, "eval_samples_per_second": 353.904, "eval_steps_per_second": 44.238, "step": 24000 }, { "epoch": 5.82, "learning_rate": 8.894728862384394e-06, "loss": 0.075, "step": 24500 }, { "epoch": 5.82, "eval_accuracy": 0.9369266055045872, "eval_loss": 0.32022935152053833, "eval_runtime": 2.4503, "eval_samples_per_second": 355.874, "eval_steps_per_second": 44.484, "step": 24500 }, { "epoch": 5.94, "learning_rate": 8.642037701521202e-06, "loss": 0.0686, "step": 25000 }, { "epoch": 5.94, "eval_accuracy": 0.9415137614678899, "eval_loss": 0.3481292426586151, "eval_runtime": 2.4555, "eval_samples_per_second": 355.12, "eval_steps_per_second": 44.39, "step": 25000 }, { "epoch": 6.06, "learning_rate": 8.389346540658008e-06, "loss": 0.0729, "step": 25500 }, { "epoch": 6.06, "eval_accuracy": 0.9334862385321101, "eval_loss": 0.38161903619766235, "eval_runtime": 2.4476, "eval_samples_per_second": 356.27, "eval_steps_per_second": 44.534, "step": 25500 }, { "epoch": 6.18, "learning_rate": 8.136655379794816e-06, "loss": 0.0568, "step": 26000 }, { "epoch": 6.18, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.31324318051338196, "eval_runtime": 2.4707, "eval_samples_per_second": 352.935, "eval_steps_per_second": 44.117, "step": 26000 }, { "epoch": 6.29, "learning_rate": 7.883964218931623e-06, "loss": 0.0529, "step": 26500 }, { "epoch": 6.29, "eval_accuracy": 0.930045871559633, "eval_loss": 0.3756808340549469, "eval_runtime": 2.4544, "eval_samples_per_second": 355.287, "eval_steps_per_second": 44.411, "step": 26500 }, { "epoch": 6.41, "learning_rate": 7.631273058068429e-06, "loss": 0.0506, "step": 27000 }, { "epoch": 6.41, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.33958113193511963, "eval_runtime": 2.4531, "eval_samples_per_second": 355.471, "eval_steps_per_second": 44.434, "step": 27000 }, { "epoch": 6.53, "learning_rate": 7.378581897205236e-06, "loss": 0.0476, "step": 27500 }, { "epoch": 6.53, "eval_accuracy": 0.9403669724770642, "eval_loss": 0.3641544580459595, "eval_runtime": 2.4417, "eval_samples_per_second": 357.132, "eval_steps_per_second": 44.641, "step": 27500 }, { "epoch": 6.65, "learning_rate": 7.125890736342044e-06, "loss": 0.0555, "step": 28000 }, { "epoch": 6.65, "eval_accuracy": 0.9403669724770642, "eval_loss": 0.34298017621040344, "eval_runtime": 2.4463, "eval_samples_per_second": 356.452, "eval_steps_per_second": 44.556, "step": 28000 }, { "epoch": 6.77, "learning_rate": 6.87319957547885e-06, "loss": 0.0574, "step": 28500 }, { "epoch": 6.77, "eval_accuracy": 0.9392201834862385, "eval_loss": 0.3401435613632202, "eval_runtime": 2.4439, "eval_samples_per_second": 356.811, "eval_steps_per_second": 44.601, "step": 28500 }, { "epoch": 6.89, "learning_rate": 6.620508414615657e-06, "loss": 0.0524, "step": 29000 }, { "epoch": 6.89, "eval_accuracy": 0.9346330275229358, "eval_loss": 0.33783158659935, "eval_runtime": 2.4521, "eval_samples_per_second": 355.616, "eval_steps_per_second": 44.452, "step": 29000 }, { "epoch": 7.01, "learning_rate": 6.367817253752464e-06, "loss": 0.0492, "step": 29500 }, { "epoch": 7.01, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.3833492398262024, "eval_runtime": 2.4457, "eval_samples_per_second": 356.538, "eval_steps_per_second": 44.567, "step": 29500 }, { "epoch": 7.13, "learning_rate": 6.1151260928892706e-06, "loss": 0.039, "step": 30000 }, { "epoch": 7.13, "eval_accuracy": 0.9346330275229358, "eval_loss": 0.3346712589263916, "eval_runtime": 2.4434, "eval_samples_per_second": 356.873, "eval_steps_per_second": 44.609, "step": 30000 }, { "epoch": 7.24, "learning_rate": 5.8624349320260785e-06, "loss": 0.0411, "step": 30500 }, { "epoch": 7.24, "eval_accuracy": 0.9334862385321101, "eval_loss": 0.4404141902923584, "eval_runtime": 2.4419, "eval_samples_per_second": 357.102, "eval_steps_per_second": 44.638, "step": 30500 }, { "epoch": 7.36, "learning_rate": 5.609743771162886e-06, "loss": 0.0412, "step": 31000 }, { "epoch": 7.36, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.36179476976394653, "eval_runtime": 2.4414, "eval_samples_per_second": 357.173, "eval_steps_per_second": 44.647, "step": 31000 }, { "epoch": 7.48, "learning_rate": 5.357052610299692e-06, "loss": 0.0477, "step": 31500 }, { "epoch": 7.48, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.3806387484073639, "eval_runtime": 2.4471, "eval_samples_per_second": 356.337, "eval_steps_per_second": 44.542, "step": 31500 }, { "epoch": 7.6, "learning_rate": 5.104361449436499e-06, "loss": 0.0435, "step": 32000 }, { "epoch": 7.6, "eval_accuracy": 0.9334862385321101, "eval_loss": 0.39115917682647705, "eval_runtime": 2.4665, "eval_samples_per_second": 353.536, "eval_steps_per_second": 44.192, "step": 32000 }, { "epoch": 7.72, "learning_rate": 4.851670288573306e-06, "loss": 0.0443, "step": 32500 }, { "epoch": 7.72, "eval_accuracy": 0.9392201834862385, "eval_loss": 0.39003145694732666, "eval_runtime": 2.4534, "eval_samples_per_second": 355.426, "eval_steps_per_second": 44.428, "step": 32500 }, { "epoch": 7.84, "learning_rate": 4.598979127710113e-06, "loss": 0.0421, "step": 33000 }, { "epoch": 7.84, "eval_accuracy": 0.9369266055045872, "eval_loss": 0.4152164161205292, "eval_runtime": 2.4525, "eval_samples_per_second": 355.556, "eval_steps_per_second": 44.445, "step": 33000 }, { "epoch": 7.96, "learning_rate": 4.34628796684692e-06, "loss": 0.0495, "step": 33500 }, { "epoch": 7.96, "eval_accuracy": 0.9288990825688074, "eval_loss": 0.3831779360771179, "eval_runtime": 2.447, "eval_samples_per_second": 356.361, "eval_steps_per_second": 44.545, "step": 33500 }, { "epoch": 8.08, "learning_rate": 4.093596805983727e-06, "loss": 0.0293, "step": 34000 }, { "epoch": 8.08, "eval_accuracy": 0.9346330275229358, "eval_loss": 0.44268128275871277, "eval_runtime": 2.4587, "eval_samples_per_second": 354.661, "eval_steps_per_second": 44.333, "step": 34000 }, { "epoch": 8.19, "learning_rate": 3.840905645120534e-06, "loss": 0.0253, "step": 34500 }, { "epoch": 8.19, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.44246163964271545, "eval_runtime": 2.4427, "eval_samples_per_second": 356.983, "eval_steps_per_second": 44.623, "step": 34500 }, { "epoch": 8.31, "learning_rate": 3.5882144842573407e-06, "loss": 0.0407, "step": 35000 }, { "epoch": 8.31, "eval_accuracy": 0.9357798165137615, "eval_loss": 0.41019341349601746, "eval_runtime": 2.453, "eval_samples_per_second": 355.477, "eval_steps_per_second": 44.435, "step": 35000 }, { "epoch": 8.43, "learning_rate": 3.3355233233941482e-06, "loss": 0.0311, "step": 35500 }, { "epoch": 8.43, "eval_accuracy": 0.9369266055045872, "eval_loss": 0.44467687606811523, "eval_runtime": 2.4425, "eval_samples_per_second": 357.013, "eval_steps_per_second": 44.627, "step": 35500 }, { "epoch": 8.55, "learning_rate": 3.082832162530955e-06, "loss": 0.0291, "step": 36000 }, { "epoch": 8.55, "eval_accuracy": 0.9346330275229358, "eval_loss": 0.46120545268058777, "eval_runtime": 2.4514, "eval_samples_per_second": 355.714, "eval_steps_per_second": 44.464, "step": 36000 }, { "epoch": 8.67, "learning_rate": 2.8301410016677616e-06, "loss": 0.035, "step": 36500 }, { "epoch": 8.67, "eval_accuracy": 0.9346330275229358, "eval_loss": 0.4240852892398834, "eval_runtime": 2.4477, "eval_samples_per_second": 356.249, "eval_steps_per_second": 44.531, "step": 36500 }, { "epoch": 8.79, "learning_rate": 2.577449840804569e-06, "loss": 0.0381, "step": 37000 }, { "epoch": 8.79, "eval_accuracy": 0.9311926605504587, "eval_loss": 0.41976186633110046, "eval_runtime": 2.4523, "eval_samples_per_second": 355.586, "eval_steps_per_second": 44.448, "step": 37000 }, { "epoch": 8.91, "learning_rate": 2.3247586799413758e-06, "loss": 0.0234, "step": 37500 }, { "epoch": 8.91, "eval_accuracy": 0.9369266055045872, "eval_loss": 0.4344768822193146, "eval_runtime": 2.4469, "eval_samples_per_second": 356.366, "eval_steps_per_second": 44.546, "step": 37500 }, { "epoch": 9.03, "learning_rate": 2.072067519078183e-06, "loss": 0.0311, "step": 38000 }, { "epoch": 9.03, "eval_accuracy": 0.9311926605504587, "eval_loss": 0.45580777525901794, "eval_runtime": 2.4545, "eval_samples_per_second": 355.27, "eval_steps_per_second": 44.409, "step": 38000 }, { "epoch": 9.14, "learning_rate": 1.8193763582149898e-06, "loss": 0.028, "step": 38500 }, { "epoch": 9.14, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.42450448870658875, "eval_runtime": 2.4449, "eval_samples_per_second": 356.658, "eval_steps_per_second": 44.582, "step": 38500 }, { "epoch": 9.26, "learning_rate": 1.5666851973517969e-06, "loss": 0.0213, "step": 39000 }, { "epoch": 9.26, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.446162611246109, "eval_runtime": 2.4606, "eval_samples_per_second": 354.384, "eval_steps_per_second": 44.298, "step": 39000 }, { "epoch": 9.38, "learning_rate": 1.3139940364886035e-06, "loss": 0.0276, "step": 39500 }, { "epoch": 9.38, "eval_accuracy": 0.9380733944954128, "eval_loss": 0.42100322246551514, "eval_runtime": 2.4512, "eval_samples_per_second": 355.743, "eval_steps_per_second": 44.468, "step": 39500 }, { "epoch": 9.5, "learning_rate": 1.0613028756254106e-06, "loss": 0.0183, "step": 40000 }, { "epoch": 9.5, "eval_accuracy": 0.9403669724770642, "eval_loss": 0.43098002672195435, "eval_runtime": 2.45, "eval_samples_per_second": 355.922, "eval_steps_per_second": 44.49, "step": 40000 }, { "epoch": 9.62, "learning_rate": 8.086117147622177e-07, "loss": 0.0184, "step": 40500 }, { "epoch": 9.62, "eval_accuracy": 0.9403669724770642, "eval_loss": 0.4437469244003296, "eval_runtime": 2.4461, "eval_samples_per_second": 356.492, "eval_steps_per_second": 44.561, "step": 40500 }, { "epoch": 9.74, "learning_rate": 5.559205538990246e-07, "loss": 0.0296, "step": 41000 }, { "epoch": 9.74, "eval_accuracy": 0.9392201834862385, "eval_loss": 0.43114030361175537, "eval_runtime": 2.4504, "eval_samples_per_second": 355.859, "eval_steps_per_second": 44.482, "step": 41000 }, { "epoch": 9.86, "learning_rate": 3.0322939303583163e-07, "loss": 0.019, "step": 41500 }, { "epoch": 9.86, "eval_accuracy": 0.9415137614678899, "eval_loss": 0.42435380816459656, "eval_runtime": 2.4473, "eval_samples_per_second": 356.311, "eval_steps_per_second": 44.539, "step": 41500 }, { "epoch": 9.98, "learning_rate": 5.053823217263861e-08, "loss": 0.0245, "step": 42000 }, { "epoch": 9.98, "eval_accuracy": 0.9415137614678899, "eval_loss": 0.42697247862815857, "eval_runtime": 2.46, "eval_samples_per_second": 354.474, "eval_steps_per_second": 44.309, "step": 42000 }, { "epoch": 10.0, "step": 42100, "total_flos": 4.43006661686016e+16, "train_loss": 0.10745611605338416, "train_runtime": 8358.8854, "train_samples_per_second": 80.572, "train_steps_per_second": 5.037 } ], "max_steps": 42100, "num_train_epochs": 10, "total_flos": 4.43006661686016e+16, "trial_name": null, "trial_params": null }