{ "best_metric": 0.8284289836883545, "best_model_checkpoint": "/output/longformer-large-4096-scratch-mlm-zgt-radpat/checkpoint-31300", "epoch": 9.999175145683829, "global_step": 32200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "eval_accuracy": 0.12266339212733007, "eval_loss": 6.975634574890137, "eval_runtime": 6116.0321, "eval_samples_per_second": 16.872, "eval_steps_per_second": 4.218, "step": 313 }, { "epoch": 0.16, "learning_rate": 1.5527950310559007e-05, "loss": 7.8221, "step": 500 }, { "epoch": 0.19, "eval_accuracy": 0.15375473317214908, "eval_loss": 6.221883773803711, "eval_runtime": 6119.4463, "eval_samples_per_second": 16.862, "eval_steps_per_second": 4.216, "step": 626 }, { "epoch": 0.29, "eval_accuracy": 0.16615663705726413, "eval_loss": 6.070300102233887, "eval_runtime": 6123.864, "eval_samples_per_second": 16.85, "eval_steps_per_second": 4.213, "step": 939 }, { "epoch": 0.31, "learning_rate": 3.1055900621118014e-05, "loss": 6.2078, "step": 1000 }, { "epoch": 0.39, "eval_accuracy": 0.17121056433657572, "eval_loss": 5.859891414642334, "eval_runtime": 6127.1504, "eval_samples_per_second": 16.841, "eval_steps_per_second": 4.21, "step": 1252 }, { "epoch": 0.47, "learning_rate": 4.658385093167702e-05, "loss": 5.8885, "step": 1500 }, { "epoch": 0.49, "eval_accuracy": 0.2018564191205144, "eval_loss": 5.480071544647217, "eval_runtime": 6125.2208, "eval_samples_per_second": 16.846, "eval_steps_per_second": 4.212, "step": 1565 }, { "epoch": 0.58, "eval_accuracy": 0.28117280447637577, "eval_loss": 4.865741729736328, "eval_runtime": 6125.2116, "eval_samples_per_second": 16.846, "eval_steps_per_second": 4.212, "step": 1878 }, { "epoch": 0.62, "learning_rate": 6.211180124223603e-05, "loss": 5.222, "step": 2000 }, { "epoch": 0.68, "eval_accuracy": 0.3429139079332977, "eval_loss": 4.355594158172607, "eval_runtime": 6130.4382, "eval_samples_per_second": 16.832, "eval_steps_per_second": 4.208, "step": 2191 }, { "epoch": 0.78, "learning_rate": 7.763975155279503e-05, "loss": 4.4722, "step": 2500 }, { "epoch": 0.78, "eval_accuracy": 0.40118303502111985, "eval_loss": 3.8668248653411865, "eval_runtime": 6127.9717, "eval_samples_per_second": 16.839, "eval_steps_per_second": 4.21, "step": 2504 }, { "epoch": 0.87, "eval_accuracy": 0.5023100325479124, "eval_loss": 3.0883595943450928, "eval_runtime": 6125.5473, "eval_samples_per_second": 16.845, "eval_steps_per_second": 4.211, "step": 2817 }, { "epoch": 0.93, "learning_rate": 9.316770186335404e-05, "loss": 3.4756, "step": 3000 }, { "epoch": 0.97, "eval_accuracy": 0.5704291572279797, "eval_loss": 2.500981569290161, "eval_runtime": 6130.9045, "eval_samples_per_second": 16.831, "eval_steps_per_second": 4.208, "step": 3130 }, { "epoch": 1.07, "eval_accuracy": 0.6179390492627002, "eval_loss": 2.098602771759033, "eval_runtime": 6130.0855, "eval_samples_per_second": 16.833, "eval_steps_per_second": 4.208, "step": 3443 }, { "epoch": 1.09, "learning_rate": 9.903381642512077e-05, "loss": 2.473, "step": 3500 }, { "epoch": 1.17, "eval_accuracy": 0.6461125725768823, "eval_loss": 1.8769867420196533, "eval_runtime": 6125.6206, "eval_samples_per_second": 16.845, "eval_steps_per_second": 4.211, "step": 3756 }, { "epoch": 1.24, "learning_rate": 9.730848861283644e-05, "loss": 1.9842, "step": 4000 }, { "epoch": 1.26, "eval_accuracy": 0.6658163018931873, "eval_loss": 1.7306807041168213, "eval_runtime": 6126.2778, "eval_samples_per_second": 16.843, "eval_steps_per_second": 4.211, "step": 4069 }, { "epoch": 1.36, "eval_accuracy": 0.6793036581603035, "eval_loss": 1.6312057971954346, "eval_runtime": 6129.7016, "eval_samples_per_second": 16.834, "eval_steps_per_second": 4.209, "step": 4382 }, { "epoch": 1.4, "learning_rate": 9.558316080055211e-05, "loss": 1.7588, "step": 4500 }, { "epoch": 1.46, "eval_accuracy": 0.6910826606245232, "eval_loss": 1.5486171245574951, "eval_runtime": 6126.9734, "eval_samples_per_second": 16.841, "eval_steps_per_second": 4.21, "step": 4695 }, { "epoch": 1.55, "learning_rate": 9.385783298826778e-05, "loss": 1.6227, "step": 5000 }, { "epoch": 1.56, "eval_accuracy": 0.7005288313548309, "eval_loss": 1.4852144718170166, "eval_runtime": 6125.5835, "eval_samples_per_second": 16.845, "eval_steps_per_second": 4.211, "step": 5008 }, { "epoch": 1.65, "eval_accuracy": 0.7085754746399128, "eval_loss": 1.4299465417861938, "eval_runtime": 6127.5749, "eval_samples_per_second": 16.84, "eval_steps_per_second": 4.21, "step": 5321 }, { "epoch": 1.71, "learning_rate": 9.213250517598345e-05, "loss": 1.5262, "step": 5500 }, { "epoch": 1.75, "eval_accuracy": 0.714780717522465, "eval_loss": 1.3879172801971436, "eval_runtime": 6127.0567, "eval_samples_per_second": 16.841, "eval_steps_per_second": 4.21, "step": 5634 }, { "epoch": 1.85, "eval_accuracy": 0.7206512153561178, "eval_loss": 1.3517948389053345, "eval_runtime": 6124.3738, "eval_samples_per_second": 16.849, "eval_steps_per_second": 4.212, "step": 5947 }, { "epoch": 1.86, "learning_rate": 9.04071773636991e-05, "loss": 1.4504, "step": 6000 }, { "epoch": 1.94, "eval_accuracy": 0.7259066376655939, "eval_loss": 1.3164656162261963, "eval_runtime": 6118.789, "eval_samples_per_second": 16.864, "eval_steps_per_second": 4.216, "step": 6260 }, { "epoch": 2.02, "learning_rate": 8.868184955141477e-05, "loss": 1.3953, "step": 6500 }, { "epoch": 2.04, "eval_accuracy": 0.730818673639555, "eval_loss": 1.285917043685913, "eval_runtime": 6120.6477, "eval_samples_per_second": 16.859, "eval_steps_per_second": 4.215, "step": 6573 }, { "epoch": 2.14, "eval_accuracy": 0.734559869460255, "eval_loss": 1.2613903284072876, "eval_runtime": 6121.6933, "eval_samples_per_second": 16.856, "eval_steps_per_second": 4.214, "step": 6886 }, { "epoch": 2.17, "learning_rate": 8.695652173913044e-05, "loss": 1.3444, "step": 7000 }, { "epoch": 2.24, "eval_accuracy": 0.7384604131666711, "eval_loss": 1.236000895500183, "eval_runtime": 6122.4897, "eval_samples_per_second": 16.854, "eval_steps_per_second": 4.213, "step": 7199 }, { "epoch": 2.33, "learning_rate": 8.523119392684611e-05, "loss": 1.3047, "step": 7500 }, { "epoch": 2.33, "eval_accuracy": 0.7415128788148121, "eval_loss": 1.2168104648590088, "eval_runtime": 6126.7013, "eval_samples_per_second": 16.842, "eval_steps_per_second": 4.211, "step": 7512 }, { "epoch": 2.43, "eval_accuracy": 0.7450029545595697, "eval_loss": 1.1964406967163086, "eval_runtime": 6123.0501, "eval_samples_per_second": 16.852, "eval_steps_per_second": 4.213, "step": 7825 }, { "epoch": 2.48, "learning_rate": 8.350586611456177e-05, "loss": 1.2713, "step": 8000 }, { "epoch": 2.53, "eval_accuracy": 0.7467751766581295, "eval_loss": 1.1841331720352173, "eval_runtime": 6122.3999, "eval_samples_per_second": 16.854, "eval_steps_per_second": 4.214, "step": 8138 }, { "epoch": 2.62, "eval_accuracy": 0.750416850580808, "eval_loss": 1.1633927822113037, "eval_runtime": 6127.4486, "eval_samples_per_second": 16.84, "eval_steps_per_second": 4.21, "step": 8451 }, { "epoch": 2.64, "learning_rate": 8.178053830227743e-05, "loss": 1.2431, "step": 8500 }, { "epoch": 2.72, "eval_accuracy": 0.7527193981891372, "eval_loss": 1.146986722946167, "eval_runtime": 6131.9044, "eval_samples_per_second": 16.828, "eval_steps_per_second": 4.207, "step": 8764 }, { "epoch": 2.79, "learning_rate": 8.00552104899931e-05, "loss": 1.2164, "step": 9000 }, { "epoch": 2.82, "eval_accuracy": 0.7551538736391906, "eval_loss": 1.132608413696289, "eval_runtime": 6121.9035, "eval_samples_per_second": 16.855, "eval_steps_per_second": 4.214, "step": 9077 }, { "epoch": 2.92, "eval_accuracy": 0.7571211907517355, "eval_loss": 1.1203465461730957, "eval_runtime": 6121.527, "eval_samples_per_second": 16.856, "eval_steps_per_second": 4.214, "step": 9390 }, { "epoch": 2.95, "learning_rate": 7.832988267770877e-05, "loss": 1.1951, "step": 9500 }, { "epoch": 3.01, "eval_accuracy": 0.7589963980672606, "eval_loss": 1.1114239692687988, "eval_runtime": 6126.4612, "eval_samples_per_second": 16.843, "eval_steps_per_second": 4.211, "step": 9703 }, { "epoch": 3.11, "learning_rate": 7.660455486542444e-05, "loss": 1.1705, "step": 10000 }, { "epoch": 3.11, "eval_accuracy": 0.7612426818924412, "eval_loss": 1.0974253416061401, "eval_runtime": 6122.547, "eval_samples_per_second": 16.854, "eval_steps_per_second": 4.213, "step": 10016 }, { "epoch": 3.21, "eval_accuracy": 0.7631302412738202, "eval_loss": 1.0867012739181519, "eval_runtime": 6126.2709, "eval_samples_per_second": 16.843, "eval_steps_per_second": 4.211, "step": 10329 }, { "epoch": 3.26, "learning_rate": 7.48792270531401e-05, "loss": 1.1516, "step": 10500 }, { "epoch": 3.3, "eval_accuracy": 0.7646139267496522, "eval_loss": 1.0770790576934814, "eval_runtime": 6130.429, "eval_samples_per_second": 16.832, "eval_steps_per_second": 4.208, "step": 10642 }, { "epoch": 3.4, "eval_accuracy": 0.7660438596581639, "eval_loss": 1.0668072700500488, "eval_runtime": 6129.5434, "eval_samples_per_second": 16.834, "eval_steps_per_second": 4.209, "step": 10955 }, { "epoch": 3.42, "learning_rate": 7.315389924085577e-05, "loss": 1.1345, "step": 11000 }, { "epoch": 3.5, "eval_accuracy": 0.7675726293257004, "eval_loss": 1.05952787399292, "eval_runtime": 6126.4998, "eval_samples_per_second": 16.843, "eval_steps_per_second": 4.211, "step": 11268 }, { "epoch": 3.57, "learning_rate": 7.142857142857143e-05, "loss": 1.1192, "step": 11500 }, { "epoch": 3.6, "eval_accuracy": 0.7694602055551931, "eval_loss": 1.0479472875595093, "eval_runtime": 6127.4827, "eval_samples_per_second": 16.84, "eval_steps_per_second": 4.21, "step": 11581 }, { "epoch": 3.69, "eval_accuracy": 0.7707531140981431, "eval_loss": 1.0423223972320557, "eval_runtime": 6131.6585, "eval_samples_per_second": 16.829, "eval_steps_per_second": 4.207, "step": 11894 }, { "epoch": 3.73, "learning_rate": 6.970324361628709e-05, "loss": 1.106, "step": 12000 }, { "epoch": 3.79, "eval_accuracy": 0.7719773558500885, "eval_loss": 1.0328373908996582, "eval_runtime": 6128.1273, "eval_samples_per_second": 16.838, "eval_steps_per_second": 4.21, "step": 12207 }, { "epoch": 3.88, "learning_rate": 6.797791580400277e-05, "loss": 1.0916, "step": 12500 }, { "epoch": 3.89, "eval_accuracy": 0.7731614368018522, "eval_loss": 1.0272808074951172, "eval_runtime": 6120.3326, "eval_samples_per_second": 16.86, "eval_steps_per_second": 4.215, "step": 12520 }, { "epoch": 3.99, "eval_accuracy": 0.7742511503011699, "eval_loss": 1.0189120769500732, "eval_runtime": 6131.1757, "eval_samples_per_second": 16.83, "eval_steps_per_second": 4.208, "step": 12833 }, { "epoch": 4.04, "learning_rate": 6.625258799171843e-05, "loss": 1.0789, "step": 13000 }, { "epoch": 4.08, "eval_accuracy": 0.7757384860054987, "eval_loss": 1.0113306045532227, "eval_runtime": 6133.3354, "eval_samples_per_second": 16.824, "eval_steps_per_second": 4.206, "step": 13146 }, { "epoch": 4.18, "eval_accuracy": 0.776816006797112, "eval_loss": 1.0058414936065674, "eval_runtime": 6130.4902, "eval_samples_per_second": 16.832, "eval_steps_per_second": 4.208, "step": 13459 }, { "epoch": 4.19, "learning_rate": 6.45272601794341e-05, "loss": 1.0631, "step": 13500 }, { "epoch": 4.28, "eval_accuracy": 0.7777869709950421, "eval_loss": 1.000064730644226, "eval_runtime": 6129.8863, "eval_samples_per_second": 16.833, "eval_steps_per_second": 4.208, "step": 13772 }, { "epoch": 4.35, "learning_rate": 6.280193236714976e-05, "loss": 1.0557, "step": 14000 }, { "epoch": 4.37, "eval_accuracy": 0.778843659908514, "eval_loss": 0.993532121181488, "eval_runtime": 6126.5895, "eval_samples_per_second": 16.842, "eval_steps_per_second": 4.211, "step": 14085 }, { "epoch": 4.47, "eval_accuracy": 0.7797456195039035, "eval_loss": 0.9887062311172485, "eval_runtime": 6127.2121, "eval_samples_per_second": 16.841, "eval_steps_per_second": 4.21, "step": 14398 }, { "epoch": 4.5, "learning_rate": 6.107660455486542e-05, "loss": 1.0438, "step": 14500 }, { "epoch": 4.57, "eval_accuracy": 0.7807731355140578, "eval_loss": 0.9825865030288696, "eval_runtime": 6126.4985, "eval_samples_per_second": 16.843, "eval_steps_per_second": 4.211, "step": 14711 }, { "epoch": 4.66, "learning_rate": 5.9351276742581096e-05, "loss": 1.0361, "step": 15000 }, { "epoch": 4.67, "eval_accuracy": 0.7818996676870377, "eval_loss": 0.9763655662536621, "eval_runtime": 6127.4496, "eval_samples_per_second": 16.84, "eval_steps_per_second": 4.21, "step": 15024 }, { "epoch": 4.76, "eval_accuracy": 0.782940768919716, "eval_loss": 0.9697893857955933, "eval_runtime": 6126.6806, "eval_samples_per_second": 16.842, "eval_steps_per_second": 4.211, "step": 15337 }, { "epoch": 4.81, "learning_rate": 5.762594893029676e-05, "loss": 1.0264, "step": 15500 }, { "epoch": 4.86, "eval_accuracy": 0.7841247808628483, "eval_loss": 0.9644368290901184, "eval_runtime": 6128.2176, "eval_samples_per_second": 16.838, "eval_steps_per_second": 4.21, "step": 15650 }, { "epoch": 4.96, "eval_accuracy": 0.7846301810721098, "eval_loss": 0.9614962339401245, "eval_runtime": 6132.4257, "eval_samples_per_second": 16.826, "eval_steps_per_second": 4.207, "step": 15963 }, { "epoch": 4.97, "learning_rate": 5.590062111801242e-05, "loss": 1.0176, "step": 16000 }, { "epoch": 5.05, "eval_accuracy": 0.7858738693048405, "eval_loss": 0.9536014795303345, "eval_runtime": 6134.6669, "eval_samples_per_second": 16.82, "eval_steps_per_second": 4.205, "step": 16276 }, { "epoch": 5.12, "learning_rate": 5.417529330572809e-05, "loss": 1.007, "step": 16500 }, { "epoch": 5.15, "eval_accuracy": 0.7867571419814423, "eval_loss": 0.9484899044036865, "eval_runtime": 6130.582, "eval_samples_per_second": 16.832, "eval_steps_per_second": 4.208, "step": 16589 }, { "epoch": 5.25, "eval_accuracy": 0.7867586112749555, "eval_loss": 0.9482876658439636, "eval_runtime": 6124.9513, "eval_samples_per_second": 16.847, "eval_steps_per_second": 4.212, "step": 16902 }, { "epoch": 5.28, "learning_rate": 5.244996549344375e-05, "loss": 0.9965, "step": 17000 }, { "epoch": 5.35, "eval_accuracy": 0.7880718537015102, "eval_loss": 0.9402521848678589, "eval_runtime": 6133.8455, "eval_samples_per_second": 16.823, "eval_steps_per_second": 4.206, "step": 17215 }, { "epoch": 5.43, "learning_rate": 5.072463768115943e-05, "loss": 0.9911, "step": 17500 }, { "epoch": 5.44, "eval_accuracy": 0.7888353320614213, "eval_loss": 0.9360187649726868, "eval_runtime": 6131.2854, "eval_samples_per_second": 16.83, "eval_steps_per_second": 4.207, "step": 17528 }, { "epoch": 5.54, "eval_accuracy": 0.7896846598862644, "eval_loss": 0.9315310120582581, "eval_runtime": 6130.221, "eval_samples_per_second": 16.833, "eval_steps_per_second": 4.208, "step": 17841 }, { "epoch": 5.59, "learning_rate": 4.899930986887509e-05, "loss": 0.9861, "step": 18000 }, { "epoch": 5.64, "eval_accuracy": 0.7902251551194575, "eval_loss": 0.9286208152770996, "eval_runtime": 6135.9888, "eval_samples_per_second": 16.817, "eval_steps_per_second": 4.204, "step": 18154 }, { "epoch": 5.73, "eval_accuracy": 0.7910160835517881, "eval_loss": 0.9242651462554932, "eval_runtime": 6134.4232, "eval_samples_per_second": 16.821, "eval_steps_per_second": 4.205, "step": 18467 }, { "epoch": 5.74, "learning_rate": 4.727398205659075e-05, "loss": 0.9787, "step": 18500 }, { "epoch": 5.83, "eval_accuracy": 0.7916774902149969, "eval_loss": 0.9199575185775757, "eval_runtime": 6127.6258, "eval_samples_per_second": 16.84, "eval_steps_per_second": 4.21, "step": 18780 }, { "epoch": 5.9, "learning_rate": 4.554865424430642e-05, "loss": 0.972, "step": 19000 }, { "epoch": 5.93, "eval_accuracy": 0.7921690081239334, "eval_loss": 0.9167630076408386, "eval_runtime": 6121.9416, "eval_samples_per_second": 16.855, "eval_steps_per_second": 4.214, "step": 19093 }, { "epoch": 6.03, "eval_accuracy": 0.7929045391491827, "eval_loss": 0.9131466150283813, "eval_runtime": 6136.433, "eval_samples_per_second": 16.815, "eval_steps_per_second": 4.204, "step": 19406 }, { "epoch": 6.06, "learning_rate": 4.382332643202209e-05, "loss": 0.9642, "step": 19500 }, { "epoch": 6.12, "eval_accuracy": 0.7933599893983608, "eval_loss": 0.9112694263458252, "eval_runtime": 6128.5893, "eval_samples_per_second": 16.837, "eval_steps_per_second": 4.209, "step": 19719 }, { "epoch": 6.21, "learning_rate": 4.209799861973775e-05, "loss": 0.9576, "step": 20000 }, { "epoch": 6.22, "eval_accuracy": 0.7940601199523715, "eval_loss": 0.9060889482498169, "eval_runtime": 6120.6148, "eval_samples_per_second": 16.859, "eval_steps_per_second": 4.215, "step": 20032 }, { "epoch": 6.32, "eval_accuracy": 0.7948685797545274, "eval_loss": 0.9030121564865112, "eval_runtime": 6124.1894, "eval_samples_per_second": 16.849, "eval_steps_per_second": 4.212, "step": 20345 }, { "epoch": 6.37, "learning_rate": 4.0372670807453414e-05, "loss": 0.9514, "step": 20500 }, { "epoch": 6.41, "eval_accuracy": 0.7954765058682228, "eval_loss": 0.8997820615768433, "eval_runtime": 6126.3307, "eval_samples_per_second": 16.843, "eval_steps_per_second": 4.211, "step": 20658 }, { "epoch": 6.51, "eval_accuracy": 0.7961196847197146, "eval_loss": 0.8957119584083557, "eval_runtime": 6121.3143, "eval_samples_per_second": 16.857, "eval_steps_per_second": 4.214, "step": 20971 }, { "epoch": 6.52, "learning_rate": 3.864734299516908e-05, "loss": 0.9457, "step": 21000 }, { "epoch": 6.61, "eval_accuracy": 0.7966353338873807, "eval_loss": 0.8925579190254211, "eval_runtime": 6121.7054, "eval_samples_per_second": 16.856, "eval_steps_per_second": 4.214, "step": 21284 }, { "epoch": 6.68, "learning_rate": 3.692201518288475e-05, "loss": 0.9411, "step": 21500 }, { "epoch": 6.71, "eval_accuracy": 0.7968278874690401, "eval_loss": 0.8926752805709839, "eval_runtime": 6123.2773, "eval_samples_per_second": 16.852, "eval_steps_per_second": 4.213, "step": 21597 }, { "epoch": 6.8, "eval_accuracy": 0.7974544355055755, "eval_loss": 0.8880347609519958, "eval_runtime": 6121.4872, "eval_samples_per_second": 16.857, "eval_steps_per_second": 4.214, "step": 21910 }, { "epoch": 6.83, "learning_rate": 3.519668737060042e-05, "loss": 0.9349, "step": 22000 }, { "epoch": 6.9, "eval_accuracy": 0.7982437294026129, "eval_loss": 0.8834199905395508, "eval_runtime": 6123.2699, "eval_samples_per_second": 16.852, "eval_steps_per_second": 4.213, "step": 22223 }, { "epoch": 6.99, "learning_rate": 3.347135955831608e-05, "loss": 0.9319, "step": 22500 }, { "epoch": 7.0, "eval_accuracy": 0.7990805845521158, "eval_loss": 0.8799129724502563, "eval_runtime": 6120.2145, "eval_samples_per_second": 16.86, "eval_steps_per_second": 4.215, "step": 22536 }, { "epoch": 7.1, "eval_accuracy": 0.7991272231482186, "eval_loss": 0.879518449306488, "eval_runtime": 6125.0222, "eval_samples_per_second": 16.847, "eval_steps_per_second": 4.212, "step": 22849 }, { "epoch": 7.14, "learning_rate": 3.1746031746031745e-05, "loss": 0.9235, "step": 23000 }, { "epoch": 7.19, "eval_accuracy": 0.7999484030167242, "eval_loss": 0.8756560683250427, "eval_runtime": 6127.211, "eval_samples_per_second": 16.841, "eval_steps_per_second": 4.21, "step": 23162 }, { "epoch": 7.29, "eval_accuracy": 0.8001440250718516, "eval_loss": 0.8739376068115234, "eval_runtime": 6134.261, "eval_samples_per_second": 16.821, "eval_steps_per_second": 4.205, "step": 23475 }, { "epoch": 7.3, "learning_rate": 3.0020703933747414e-05, "loss": 0.9198, "step": 23500 }, { "epoch": 7.39, "eval_accuracy": 0.8010690824018636, "eval_loss": 0.8693613409996033, "eval_runtime": 6132.2846, "eval_samples_per_second": 16.827, "eval_steps_per_second": 4.207, "step": 23788 }, { "epoch": 7.45, "learning_rate": 2.829537612146308e-05, "loss": 0.9158, "step": 24000 }, { "epoch": 7.48, "eval_accuracy": 0.8011952977602468, "eval_loss": 0.8689371943473816, "eval_runtime": 6129.2095, "eval_samples_per_second": 16.835, "eval_steps_per_second": 4.209, "step": 24101 }, { "epoch": 7.58, "eval_accuracy": 0.8017360324487328, "eval_loss": 0.8663704991340637, "eval_runtime": 6128.5565, "eval_samples_per_second": 16.837, "eval_steps_per_second": 4.209, "step": 24414 }, { "epoch": 7.61, "learning_rate": 2.6570048309178748e-05, "loss": 0.9125, "step": 24500 }, { "epoch": 7.68, "eval_accuracy": 0.8020007406811046, "eval_loss": 0.8649431467056274, "eval_runtime": 6132.8666, "eval_samples_per_second": 16.825, "eval_steps_per_second": 4.206, "step": 24727 }, { "epoch": 7.76, "learning_rate": 2.484472049689441e-05, "loss": 0.9099, "step": 25000 }, { "epoch": 7.78, "eval_accuracy": 0.8026024276561983, "eval_loss": 0.8605436086654663, "eval_runtime": 6126.7586, "eval_samples_per_second": 16.842, "eval_steps_per_second": 4.211, "step": 25040 }, { "epoch": 7.87, "eval_accuracy": 0.80301129412462, "eval_loss": 0.8582573533058167, "eval_runtime": 6127.3341, "eval_samples_per_second": 16.84, "eval_steps_per_second": 4.21, "step": 25353 }, { "epoch": 7.92, "learning_rate": 2.311939268461008e-05, "loss": 0.9054, "step": 25500 }, { "epoch": 7.97, "eval_accuracy": 0.8034071794966846, "eval_loss": 0.8573377132415771, "eval_runtime": 6131.9465, "eval_samples_per_second": 16.828, "eval_steps_per_second": 4.207, "step": 25666 }, { "epoch": 8.07, "eval_accuracy": 0.8038572222331624, "eval_loss": 0.8544816374778748, "eval_runtime": 6128.9922, "eval_samples_per_second": 16.836, "eval_steps_per_second": 4.209, "step": 25979 }, { "epoch": 8.07, "learning_rate": 2.139406487232574e-05, "loss": 0.8998, "step": 26000 }, { "epoch": 8.16, "eval_accuracy": 0.8044058818938022, "eval_loss": 0.8519273400306702, "eval_runtime": 6124.6473, "eval_samples_per_second": 16.848, "eval_steps_per_second": 4.212, "step": 26292 }, { "epoch": 8.23, "learning_rate": 1.966873706004141e-05, "loss": 0.8939, "step": 26500 }, { "epoch": 8.26, "eval_accuracy": 0.8044216416179728, "eval_loss": 0.8512473702430725, "eval_runtime": 6126.8526, "eval_samples_per_second": 16.842, "eval_steps_per_second": 4.21, "step": 26605 }, { "epoch": 8.36, "eval_accuracy": 0.804752442721678, "eval_loss": 0.8492391705513, "eval_runtime": 6127.2647, "eval_samples_per_second": 16.841, "eval_steps_per_second": 4.21, "step": 26918 }, { "epoch": 8.38, "learning_rate": 1.7943409247757076e-05, "loss": 0.8942, "step": 27000 }, { "epoch": 8.46, "eval_accuracy": 0.8051816524786768, "eval_loss": 0.8468219637870789, "eval_runtime": 6124.9306, "eval_samples_per_second": 16.847, "eval_steps_per_second": 4.212, "step": 27231 }, { "epoch": 8.54, "learning_rate": 1.621808143547274e-05, "loss": 0.8904, "step": 27500 }, { "epoch": 8.55, "eval_accuracy": 0.8055019757141467, "eval_loss": 0.8458420634269714, "eval_runtime": 6124.8245, "eval_samples_per_second": 16.847, "eval_steps_per_second": 4.212, "step": 27544 }, { "epoch": 8.65, "eval_accuracy": 0.8057308816675628, "eval_loss": 0.8443206548690796, "eval_runtime": 6129.9291, "eval_samples_per_second": 16.833, "eval_steps_per_second": 4.208, "step": 27857 }, { "epoch": 8.69, "learning_rate": 1.4492753623188407e-05, "loss": 0.8862, "step": 28000 }, { "epoch": 8.75, "eval_accuracy": 0.805897348183967, "eval_loss": 0.843222439289093, "eval_runtime": 6128.1919, "eval_samples_per_second": 16.838, "eval_steps_per_second": 4.21, "step": 28170 }, { "epoch": 8.84, "eval_accuracy": 0.8064984673341041, "eval_loss": 0.84042888879776, "eval_runtime": 6116.6369, "eval_samples_per_second": 16.87, "eval_steps_per_second": 4.218, "step": 28483 }, { "epoch": 8.85, "learning_rate": 1.276742581090407e-05, "loss": 0.8842, "step": 28500 }, { "epoch": 8.94, "eval_accuracy": 0.806853518328651, "eval_loss": 0.8381487727165222, "eval_runtime": 6118.9718, "eval_samples_per_second": 16.863, "eval_steps_per_second": 4.216, "step": 28796 }, { "epoch": 9.01, "learning_rate": 1.1042097998619738e-05, "loss": 0.8812, "step": 29000 }, { "epoch": 9.04, "eval_accuracy": 0.8070338579198731, "eval_loss": 0.8374488353729248, "eval_runtime": 6118.7308, "eval_samples_per_second": 16.864, "eval_steps_per_second": 4.216, "step": 29109 }, { "epoch": 9.14, "eval_accuracy": 0.8068436046687713, "eval_loss": 0.8375363945960999, "eval_runtime": 6128.5918, "eval_samples_per_second": 16.837, "eval_steps_per_second": 4.209, "step": 29422 }, { "epoch": 9.16, "learning_rate": 9.316770186335403e-06, "loss": 0.8774, "step": 29500 }, { "epoch": 9.23, "eval_accuracy": 0.8077565106716271, "eval_loss": 0.8336867094039917, "eval_runtime": 6119.8095, "eval_samples_per_second": 16.861, "eval_steps_per_second": 4.215, "step": 29735 }, { "epoch": 9.32, "learning_rate": 7.591442374051071e-06, "loss": 0.8752, "step": 30000 }, { "epoch": 9.33, "eval_accuracy": 0.8081288482769053, "eval_loss": 0.8320378661155701, "eval_runtime": 6119.7341, "eval_samples_per_second": 16.861, "eval_steps_per_second": 4.215, "step": 30048 }, { "epoch": 9.43, "eval_accuracy": 0.8082356261550239, "eval_loss": 0.8310965299606323, "eval_runtime": 6119.6431, "eval_samples_per_second": 16.862, "eval_steps_per_second": 4.215, "step": 30361 }, { "epoch": 9.47, "learning_rate": 5.866114561766736e-06, "loss": 0.8732, "step": 30500 }, { "epoch": 9.53, "eval_accuracy": 0.8083999448820824, "eval_loss": 0.8303462266921997, "eval_runtime": 6118.4989, "eval_samples_per_second": 16.865, "eval_steps_per_second": 4.216, "step": 30674 }, { "epoch": 9.62, "eval_accuracy": 0.8084419046833061, "eval_loss": 0.8290849328041077, "eval_runtime": 6127.7892, "eval_samples_per_second": 16.839, "eval_steps_per_second": 4.21, "step": 30987 }, { "epoch": 9.63, "learning_rate": 4.140786749482402e-06, "loss": 0.8715, "step": 31000 }, { "epoch": 9.72, "eval_accuracy": 0.8088197529604327, "eval_loss": 0.8284289836883545, "eval_runtime": 6124.7156, "eval_samples_per_second": 16.848, "eval_steps_per_second": 4.212, "step": 31300 }, { "epoch": 9.78, "learning_rate": 2.4154589371980677e-06, "loss": 0.8705, "step": 31500 }, { "epoch": 9.82, "eval_accuracy": 0.8085015827448934, "eval_loss": 0.8298270106315613, "eval_runtime": 6120.6207, "eval_samples_per_second": 16.859, "eval_steps_per_second": 4.215, "step": 31613 }, { "epoch": 9.91, "eval_accuracy": 0.8086080278025564, "eval_loss": 0.8285703659057617, "eval_runtime": 6122.3492, "eval_samples_per_second": 16.854, "eval_steps_per_second": 4.214, "step": 31926 }, { "epoch": 9.94, "learning_rate": 6.901311249137336e-07, "loss": 0.8676, "step": 32000 }, { "epoch": 10.0, "step": 32200, "total_flos": 9.597056792179405e+18, "train_loss": 1.5035098029663845, "train_runtime": 2595975.6547, "train_samples_per_second": 3.176, "train_steps_per_second": 0.012 } ], "max_steps": 32200, "num_train_epochs": 10, "total_flos": 9.597056792179405e+18, "trial_name": null, "trial_params": null }