{ "best_metric": 0.3248412013053894, "best_model_checkpoint": "./vit-base-beans/checkpoint-3840", "epoch": 1.9865494050698396, "global_step": 3840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.00019948266942576307, "loss": 1.9449, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.00019896533885152613, "loss": 1.5813, "step": 20 }, { "epoch": 0.02, "learning_rate": 0.0001984480082772892, "loss": 1.3362, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.00019793067770305226, "loss": 1.332, "step": 40 }, { "epoch": 0.02, "eval_accuracy": 0.5205692108667529, "eval_loss": 1.2994741201400757, "eval_runtime": 42.6019, "eval_samples_per_second": 90.724, "eval_steps_per_second": 11.361, "step": 40 }, { "epoch": 0.03, "learning_rate": 0.00019741334712881532, "loss": 1.2472, "step": 50 }, { "epoch": 0.03, "learning_rate": 0.00019689601655457838, "loss": 1.2749, "step": 60 }, { "epoch": 0.04, "learning_rate": 0.00019637868598034144, "loss": 1.0352, "step": 70 }, { "epoch": 0.04, "learning_rate": 0.0001958613554061045, "loss": 1.0517, "step": 80 }, { "epoch": 0.04, "eval_accuracy": 0.5673997412677878, "eval_loss": 1.1108654737472534, "eval_runtime": 42.8705, "eval_samples_per_second": 90.155, "eval_steps_per_second": 11.29, "step": 80 }, { "epoch": 0.05, "learning_rate": 0.00019534402483186757, "loss": 1.0752, "step": 90 }, { "epoch": 0.05, "learning_rate": 0.00019482669425763063, "loss": 1.2143, "step": 100 }, { "epoch": 0.06, "learning_rate": 0.0001943093636833937, "loss": 1.0985, "step": 110 }, { "epoch": 0.06, "learning_rate": 0.00019379203310915678, "loss": 1.1256, "step": 120 }, { "epoch": 0.06, "eval_accuracy": 0.6051746442432083, "eval_loss": 1.0107784271240234, "eval_runtime": 42.8294, "eval_samples_per_second": 90.242, "eval_steps_per_second": 11.301, "step": 120 }, { "epoch": 0.07, "learning_rate": 0.0001932747025349198, "loss": 0.9229, "step": 130 }, { "epoch": 0.07, "learning_rate": 0.00019275737196068287, "loss": 0.9273, "step": 140 }, { "epoch": 0.08, "learning_rate": 0.00019224004138644596, "loss": 0.958, "step": 150 }, { "epoch": 0.08, "learning_rate": 0.000191722710812209, "loss": 0.8958, "step": 160 }, { "epoch": 0.08, "eval_accuracy": 0.6248382923673997, "eval_loss": 0.9631242156028748, "eval_runtime": 42.7586, "eval_samples_per_second": 90.391, "eval_steps_per_second": 11.319, "step": 160 }, { "epoch": 0.09, "learning_rate": 0.00019120538023797209, "loss": 0.9523, "step": 170 }, { "epoch": 0.09, "learning_rate": 0.00019068804966373515, "loss": 1.0217, "step": 180 }, { "epoch": 0.1, "learning_rate": 0.00019017071908949818, "loss": 1.0574, "step": 190 }, { "epoch": 0.1, "learning_rate": 0.00018965338851526127, "loss": 0.9888, "step": 200 }, { "epoch": 0.1, "eval_accuracy": 0.6349288486416559, "eval_loss": 0.9595150947570801, "eval_runtime": 42.8989, "eval_samples_per_second": 90.095, "eval_steps_per_second": 11.282, "step": 200 }, { "epoch": 0.11, "learning_rate": 0.0001891360579410243, "loss": 0.8882, "step": 210 }, { "epoch": 0.11, "learning_rate": 0.0001886187273667874, "loss": 0.92, "step": 220 }, { "epoch": 0.12, "learning_rate": 0.00018810139679255046, "loss": 0.8887, "step": 230 }, { "epoch": 0.12, "learning_rate": 0.0001875840662183135, "loss": 0.8887, "step": 240 }, { "epoch": 0.12, "eval_accuracy": 0.6305304010349289, "eval_loss": 0.9519457817077637, "eval_runtime": 42.9821, "eval_samples_per_second": 89.921, "eval_steps_per_second": 11.261, "step": 240 }, { "epoch": 0.13, "learning_rate": 0.00018706673564407658, "loss": 0.974, "step": 250 }, { "epoch": 0.13, "learning_rate": 0.00018654940506983964, "loss": 0.9556, "step": 260 }, { "epoch": 0.14, "learning_rate": 0.0001860320744956027, "loss": 0.8191, "step": 270 }, { "epoch": 0.14, "learning_rate": 0.00018551474392136577, "loss": 0.7793, "step": 280 }, { "epoch": 0.14, "eval_accuracy": 0.6677878395860285, "eval_loss": 0.8867014050483704, "eval_runtime": 42.8178, "eval_samples_per_second": 90.266, "eval_steps_per_second": 11.304, "step": 280 }, { "epoch": 0.15, "learning_rate": 0.00018499741334712883, "loss": 0.8582, "step": 290 }, { "epoch": 0.16, "learning_rate": 0.0001844800827728919, "loss": 0.8303, "step": 300 }, { "epoch": 0.16, "learning_rate": 0.00018396275219865495, "loss": 0.9885, "step": 310 }, { "epoch": 0.17, "learning_rate": 0.000183445421624418, "loss": 0.8471, "step": 320 }, { "epoch": 0.17, "eval_accuracy": 0.6623544631306598, "eval_loss": 0.8803606033325195, "eval_runtime": 42.9624, "eval_samples_per_second": 89.962, "eval_steps_per_second": 11.266, "step": 320 }, { "epoch": 0.17, "learning_rate": 0.00018292809105018107, "loss": 1.0244, "step": 330 }, { "epoch": 0.18, "learning_rate": 0.00018241076047594414, "loss": 0.9719, "step": 340 }, { "epoch": 0.18, "learning_rate": 0.0001818934299017072, "loss": 0.8929, "step": 350 }, { "epoch": 0.19, "learning_rate": 0.00018137609932747026, "loss": 0.907, "step": 360 }, { "epoch": 0.19, "eval_accuracy": 0.6675291073738681, "eval_loss": 0.8609929084777832, "eval_runtime": 42.7896, "eval_samples_per_second": 90.326, "eval_steps_per_second": 11.311, "step": 360 }, { "epoch": 0.19, "learning_rate": 0.00018085876875323332, "loss": 0.9411, "step": 370 }, { "epoch": 0.2, "learning_rate": 0.00018034143817899638, "loss": 0.7953, "step": 380 }, { "epoch": 0.2, "learning_rate": 0.00017982410760475944, "loss": 0.9103, "step": 390 }, { "epoch": 0.21, "learning_rate": 0.00017930677703052253, "loss": 0.8575, "step": 400 }, { "epoch": 0.21, "eval_accuracy": 0.685640362225097, "eval_loss": 0.8224917650222778, "eval_runtime": 42.3738, "eval_samples_per_second": 91.212, "eval_steps_per_second": 11.422, "step": 400 }, { "epoch": 0.21, "learning_rate": 0.00017878944645628557, "loss": 0.8143, "step": 410 }, { "epoch": 0.22, "learning_rate": 0.00017827211588204863, "loss": 0.6689, "step": 420 }, { "epoch": 0.22, "learning_rate": 0.00017775478530781172, "loss": 0.7662, "step": 430 }, { "epoch": 0.23, "learning_rate": 0.00017723745473357475, "loss": 0.7847, "step": 440 }, { "epoch": 0.23, "eval_accuracy": 0.7073738680465718, "eval_loss": 0.7917023301124573, "eval_runtime": 42.3838, "eval_samples_per_second": 91.191, "eval_steps_per_second": 11.419, "step": 440 }, { "epoch": 0.23, "learning_rate": 0.00017672012415933784, "loss": 0.7556, "step": 450 }, { "epoch": 0.24, "learning_rate": 0.00017620279358510088, "loss": 0.7435, "step": 460 }, { "epoch": 0.24, "learning_rate": 0.00017568546301086394, "loss": 0.8761, "step": 470 }, { "epoch": 0.25, "learning_rate": 0.00017516813243662703, "loss": 0.7827, "step": 480 }, { "epoch": 0.25, "eval_accuracy": 0.7009055627425614, "eval_loss": 0.7942800521850586, "eval_runtime": 42.3643, "eval_samples_per_second": 91.233, "eval_steps_per_second": 11.425, "step": 480 }, { "epoch": 0.25, "learning_rate": 0.00017465080186239006, "loss": 0.9351, "step": 490 }, { "epoch": 0.26, "learning_rate": 0.00017413347128815315, "loss": 0.805, "step": 500 }, { "epoch": 0.26, "learning_rate": 0.0001736161407139162, "loss": 0.9189, "step": 510 }, { "epoch": 0.27, "learning_rate": 0.00017309881013967925, "loss": 0.7886, "step": 520 }, { "epoch": 0.27, "eval_accuracy": 0.6613195342820181, "eval_loss": 0.8620208501815796, "eval_runtime": 42.6346, "eval_samples_per_second": 90.654, "eval_steps_per_second": 11.352, "step": 520 }, { "epoch": 0.27, "learning_rate": 0.00017258147956544234, "loss": 0.7991, "step": 530 }, { "epoch": 0.28, "learning_rate": 0.0001720641489912054, "loss": 0.8246, "step": 540 }, { "epoch": 0.28, "learning_rate": 0.00017154681841696846, "loss": 0.8205, "step": 550 }, { "epoch": 0.29, "learning_rate": 0.00017102948784273152, "loss": 0.7851, "step": 560 }, { "epoch": 0.29, "eval_accuracy": 0.6972833117723156, "eval_loss": 0.7913413047790527, "eval_runtime": 42.3735, "eval_samples_per_second": 91.213, "eval_steps_per_second": 11.422, "step": 560 }, { "epoch": 0.29, "learning_rate": 0.00017051215726849456, "loss": 0.8207, "step": 570 }, { "epoch": 0.3, "learning_rate": 0.00016999482669425764, "loss": 0.7919, "step": 580 }, { "epoch": 0.31, "learning_rate": 0.0001694774961200207, "loss": 0.6958, "step": 590 }, { "epoch": 0.31, "learning_rate": 0.00016896016554578377, "loss": 0.9368, "step": 600 }, { "epoch": 0.31, "eval_accuracy": 0.6957309184993532, "eval_loss": 0.8293155431747437, "eval_runtime": 42.2867, "eval_samples_per_second": 91.4, "eval_steps_per_second": 11.446, "step": 600 }, { "epoch": 0.32, "learning_rate": 0.00016844283497154683, "loss": 0.7047, "step": 610 }, { "epoch": 0.32, "learning_rate": 0.0001679255043973099, "loss": 0.8973, "step": 620 }, { "epoch": 0.33, "learning_rate": 0.00016740817382307295, "loss": 0.8332, "step": 630 }, { "epoch": 0.33, "learning_rate": 0.00016689084324883602, "loss": 0.8284, "step": 640 }, { "epoch": 0.33, "eval_accuracy": 0.6437257438551099, "eval_loss": 0.8856919407844543, "eval_runtime": 42.4054, "eval_samples_per_second": 91.144, "eval_steps_per_second": 11.414, "step": 640 }, { "epoch": 0.34, "learning_rate": 0.00016637351267459908, "loss": 0.7203, "step": 650 }, { "epoch": 0.34, "learning_rate": 0.00016585618210036214, "loss": 0.7151, "step": 660 }, { "epoch": 0.35, "learning_rate": 0.0001653388515261252, "loss": 0.6793, "step": 670 }, { "epoch": 0.35, "learning_rate": 0.00016482152095188826, "loss": 0.8299, "step": 680 }, { "epoch": 0.35, "eval_accuracy": 0.726261319534282, "eval_loss": 0.7111316323280334, "eval_runtime": 42.7336, "eval_samples_per_second": 90.444, "eval_steps_per_second": 11.326, "step": 680 }, { "epoch": 0.36, "learning_rate": 0.00016430419037765132, "loss": 0.7213, "step": 690 }, { "epoch": 0.36, "learning_rate": 0.00016378685980341439, "loss": 0.6364, "step": 700 }, { "epoch": 0.37, "learning_rate": 0.00016326952922917745, "loss": 0.6296, "step": 710 }, { "epoch": 0.37, "learning_rate": 0.0001627521986549405, "loss": 0.7239, "step": 720 }, { "epoch": 0.37, "eval_accuracy": 0.7304010349288487, "eval_loss": 0.7160272002220154, "eval_runtime": 42.6157, "eval_samples_per_second": 90.694, "eval_steps_per_second": 11.357, "step": 720 }, { "epoch": 0.38, "learning_rate": 0.00016223486808070357, "loss": 0.766, "step": 730 }, { "epoch": 0.38, "learning_rate": 0.00016171753750646663, "loss": 0.8078, "step": 740 }, { "epoch": 0.39, "learning_rate": 0.0001612002069322297, "loss": 0.8452, "step": 750 }, { "epoch": 0.39, "learning_rate": 0.00016068287635799278, "loss": 0.6726, "step": 760 }, { "epoch": 0.39, "eval_accuracy": 0.7270375161707633, "eval_loss": 0.7201307415962219, "eval_runtime": 42.4274, "eval_samples_per_second": 91.097, "eval_steps_per_second": 11.408, "step": 760 }, { "epoch": 0.4, "learning_rate": 0.00016016554578375582, "loss": 0.7182, "step": 770 }, { "epoch": 0.4, "learning_rate": 0.00015964821520951888, "loss": 0.7185, "step": 780 }, { "epoch": 0.41, "learning_rate": 0.00015913088463528197, "loss": 0.6869, "step": 790 }, { "epoch": 0.41, "learning_rate": 0.000158613554061045, "loss": 0.6081, "step": 800 }, { "epoch": 0.41, "eval_accuracy": 0.6970245795601553, "eval_loss": 0.8389468789100647, "eval_runtime": 42.7093, "eval_samples_per_second": 90.496, "eval_steps_per_second": 11.332, "step": 800 }, { "epoch": 0.42, "learning_rate": 0.0001580962234868081, "loss": 0.8349, "step": 810 }, { "epoch": 0.42, "learning_rate": 0.00015757889291257113, "loss": 0.6436, "step": 820 }, { "epoch": 0.43, "learning_rate": 0.0001570615623383342, "loss": 0.7325, "step": 830 }, { "epoch": 0.43, "learning_rate": 0.00015654423176409728, "loss": 0.8363, "step": 840 }, { "epoch": 0.43, "eval_accuracy": 0.7260025873221216, "eval_loss": 0.7098237872123718, "eval_runtime": 42.9691, "eval_samples_per_second": 89.948, "eval_steps_per_second": 11.264, "step": 840 }, { "epoch": 0.44, "learning_rate": 0.0001560269011898603, "loss": 0.6456, "step": 850 }, { "epoch": 0.44, "learning_rate": 0.0001555095706156234, "loss": 0.7325, "step": 860 }, { "epoch": 0.45, "learning_rate": 0.00015499224004138646, "loss": 0.6097, "step": 870 }, { "epoch": 0.46, "learning_rate": 0.0001544749094671495, "loss": 0.6176, "step": 880 }, { "epoch": 0.46, "eval_accuracy": 0.7267787839586028, "eval_loss": 0.7126018404960632, "eval_runtime": 42.8632, "eval_samples_per_second": 90.171, "eval_steps_per_second": 11.292, "step": 880 }, { "epoch": 0.46, "learning_rate": 0.00015395757889291259, "loss": 0.6045, "step": 890 }, { "epoch": 0.47, "learning_rate": 0.00015344024831867565, "loss": 0.6202, "step": 900 }, { "epoch": 0.47, "learning_rate": 0.0001529229177444387, "loss": 0.753, "step": 910 }, { "epoch": 0.48, "learning_rate": 0.00015240558717020177, "loss": 0.852, "step": 920 }, { "epoch": 0.48, "eval_accuracy": 0.727554980595084, "eval_loss": 0.715835452079773, "eval_runtime": 42.7695, "eval_samples_per_second": 90.368, "eval_steps_per_second": 11.316, "step": 920 }, { "epoch": 0.48, "learning_rate": 0.00015188825659596483, "loss": 0.7012, "step": 930 }, { "epoch": 0.49, "learning_rate": 0.0001513709260217279, "loss": 0.5647, "step": 940 }, { "epoch": 0.49, "learning_rate": 0.00015085359544749096, "loss": 0.6485, "step": 950 }, { "epoch": 0.5, "learning_rate": 0.00015033626487325402, "loss": 0.7937, "step": 960 }, { "epoch": 0.5, "eval_accuracy": 0.7190168175937904, "eval_loss": 0.7453812956809998, "eval_runtime": 43.0359, "eval_samples_per_second": 89.809, "eval_steps_per_second": 11.246, "step": 960 }, { "epoch": 0.5, "learning_rate": 0.00014981893429901708, "loss": 0.7984, "step": 970 }, { "epoch": 0.51, "learning_rate": 0.00014930160372478014, "loss": 0.6896, "step": 980 }, { "epoch": 0.51, "learning_rate": 0.0001487842731505432, "loss": 0.6828, "step": 990 }, { "epoch": 0.52, "learning_rate": 0.00014826694257630627, "loss": 0.6087, "step": 1000 }, { "epoch": 0.52, "eval_accuracy": 0.745666235446313, "eval_loss": 0.7019104361534119, "eval_runtime": 43.12, "eval_samples_per_second": 89.634, "eval_steps_per_second": 11.224, "step": 1000 }, { "epoch": 0.52, "learning_rate": 0.00014774961200206933, "loss": 0.5794, "step": 1010 }, { "epoch": 0.53, "learning_rate": 0.0001472322814278324, "loss": 0.6007, "step": 1020 }, { "epoch": 0.53, "learning_rate": 0.00014671495085359545, "loss": 0.6977, "step": 1030 }, { "epoch": 0.54, "learning_rate": 0.00014619762027935854, "loss": 0.6523, "step": 1040 }, { "epoch": 0.54, "eval_accuracy": 0.7611901681759379, "eval_loss": 0.6592049598693848, "eval_runtime": 43.048, "eval_samples_per_second": 89.783, "eval_steps_per_second": 11.243, "step": 1040 }, { "epoch": 0.54, "learning_rate": 0.00014568028970512157, "loss": 0.7187, "step": 1050 }, { "epoch": 0.55, "learning_rate": 0.00014516295913088464, "loss": 0.7343, "step": 1060 }, { "epoch": 0.55, "learning_rate": 0.0001446456285566477, "loss": 0.6977, "step": 1070 }, { "epoch": 0.56, "learning_rate": 0.00014412829798241076, "loss": 0.6964, "step": 1080 }, { "epoch": 0.56, "eval_accuracy": 0.7428201811125485, "eval_loss": 0.682680606842041, "eval_runtime": 43.1365, "eval_samples_per_second": 89.599, "eval_steps_per_second": 11.22, "step": 1080 }, { "epoch": 0.56, "learning_rate": 0.00014361096740817385, "loss": 0.6316, "step": 1090 }, { "epoch": 0.57, "learning_rate": 0.00014309363683393688, "loss": 0.5869, "step": 1100 }, { "epoch": 0.57, "learning_rate": 0.00014257630625969994, "loss": 0.734, "step": 1110 }, { "epoch": 0.58, "learning_rate": 0.00014205897568546303, "loss": 0.5214, "step": 1120 }, { "epoch": 0.58, "eval_accuracy": 0.7619663648124192, "eval_loss": 0.6545156240463257, "eval_runtime": 43.5785, "eval_samples_per_second": 88.691, "eval_steps_per_second": 11.106, "step": 1120 }, { "epoch": 0.58, "learning_rate": 0.00014154164511122607, "loss": 0.6521, "step": 1130 }, { "epoch": 0.59, "learning_rate": 0.00014102431453698916, "loss": 0.7645, "step": 1140 }, { "epoch": 0.59, "learning_rate": 0.00014050698396275222, "loss": 0.6585, "step": 1150 }, { "epoch": 0.6, "learning_rate": 0.00013998965338851525, "loss": 0.6959, "step": 1160 }, { "epoch": 0.6, "eval_accuracy": 0.7391979301423027, "eval_loss": 0.6814814805984497, "eval_runtime": 43.4601, "eval_samples_per_second": 88.932, "eval_steps_per_second": 11.137, "step": 1160 }, { "epoch": 0.61, "learning_rate": 0.00013947232281427834, "loss": 0.6886, "step": 1170 }, { "epoch": 0.61, "learning_rate": 0.0001389549922400414, "loss": 0.706, "step": 1180 }, { "epoch": 0.62, "learning_rate": 0.00013843766166580447, "loss": 0.7285, "step": 1190 }, { "epoch": 0.62, "learning_rate": 0.00013792033109156753, "loss": 0.7318, "step": 1200 }, { "epoch": 0.62, "eval_accuracy": 0.726261319534282, "eval_loss": 0.7493521571159363, "eval_runtime": 43.7478, "eval_samples_per_second": 88.347, "eval_steps_per_second": 11.063, "step": 1200 }, { "epoch": 0.63, "learning_rate": 0.00013740300051733056, "loss": 0.6447, "step": 1210 }, { "epoch": 0.63, "learning_rate": 0.00013688566994309365, "loss": 0.6502, "step": 1220 }, { "epoch": 0.64, "learning_rate": 0.0001363683393688567, "loss": 0.6552, "step": 1230 }, { "epoch": 0.64, "learning_rate": 0.00013585100879461975, "loss": 0.4897, "step": 1240 }, { "epoch": 0.64, "eval_accuracy": 0.7353169469598965, "eval_loss": 0.6954035758972168, "eval_runtime": 43.8864, "eval_samples_per_second": 88.068, "eval_steps_per_second": 11.028, "step": 1240 }, { "epoch": 0.65, "learning_rate": 0.00013533367822038284, "loss": 0.7551, "step": 1250 }, { "epoch": 0.65, "learning_rate": 0.0001348163476461459, "loss": 0.6713, "step": 1260 }, { "epoch": 0.66, "learning_rate": 0.00013429901707190896, "loss": 0.6389, "step": 1270 }, { "epoch": 0.66, "learning_rate": 0.00013378168649767202, "loss": 0.7711, "step": 1280 }, { "epoch": 0.66, "eval_accuracy": 0.7648124191461837, "eval_loss": 0.6494836807250977, "eval_runtime": 43.4576, "eval_samples_per_second": 88.937, "eval_steps_per_second": 11.137, "step": 1280 }, { "epoch": 0.67, "learning_rate": 0.00013326435592343508, "loss": 0.5356, "step": 1290 }, { "epoch": 0.67, "learning_rate": 0.00013274702534919814, "loss": 0.534, "step": 1300 }, { "epoch": 0.68, "learning_rate": 0.0001322296947749612, "loss": 0.5749, "step": 1310 }, { "epoch": 0.68, "learning_rate": 0.00013171236420072427, "loss": 0.5831, "step": 1320 }, { "epoch": 0.68, "eval_accuracy": 0.7689521345407503, "eval_loss": 0.6288875341415405, "eval_runtime": 43.2829, "eval_samples_per_second": 89.296, "eval_steps_per_second": 11.182, "step": 1320 }, { "epoch": 0.69, "learning_rate": 0.00013119503362648733, "loss": 0.6402, "step": 1330 }, { "epoch": 0.69, "learning_rate": 0.0001306777030522504, "loss": 0.6681, "step": 1340 }, { "epoch": 0.7, "learning_rate": 0.00013016037247801345, "loss": 0.616, "step": 1350 }, { "epoch": 0.7, "learning_rate": 0.00012964304190377652, "loss": 0.6276, "step": 1360 }, { "epoch": 0.7, "eval_accuracy": 0.7547218628719276, "eval_loss": 0.6559097766876221, "eval_runtime": 42.7536, "eval_samples_per_second": 90.402, "eval_steps_per_second": 11.321, "step": 1360 }, { "epoch": 0.71, "learning_rate": 0.00012912571132953958, "loss": 0.6331, "step": 1370 }, { "epoch": 0.71, "learning_rate": 0.00012860838075530264, "loss": 0.6172, "step": 1380 }, { "epoch": 0.72, "learning_rate": 0.0001280910501810657, "loss": 0.6621, "step": 1390 }, { "epoch": 0.72, "learning_rate": 0.0001275737196068288, "loss": 0.6204, "step": 1400 }, { "epoch": 0.72, "eval_accuracy": 0.7464424320827943, "eval_loss": 0.6652135848999023, "eval_runtime": 42.8306, "eval_samples_per_second": 90.239, "eval_steps_per_second": 11.3, "step": 1400 }, { "epoch": 0.73, "learning_rate": 0.00012705638903259182, "loss": 0.5672, "step": 1410 }, { "epoch": 0.73, "learning_rate": 0.00012653905845835489, "loss": 0.5268, "step": 1420 }, { "epoch": 0.74, "learning_rate": 0.00012602172788411797, "loss": 0.5703, "step": 1430 }, { "epoch": 0.74, "learning_rate": 0.000125504397309881, "loss": 0.4628, "step": 1440 }, { "epoch": 0.74, "eval_accuracy": 0.7562742561448901, "eval_loss": 0.6426355838775635, "eval_runtime": 42.7732, "eval_samples_per_second": 90.36, "eval_steps_per_second": 11.316, "step": 1440 }, { "epoch": 0.75, "learning_rate": 0.0001249870667356441, "loss": 0.5506, "step": 1450 }, { "epoch": 0.76, "learning_rate": 0.00012446973616140713, "loss": 0.6282, "step": 1460 }, { "epoch": 0.76, "learning_rate": 0.0001239524055871702, "loss": 0.77, "step": 1470 }, { "epoch": 0.77, "learning_rate": 0.00012343507501293328, "loss": 0.5973, "step": 1480 }, { "epoch": 0.77, "eval_accuracy": 0.7865459249676585, "eval_loss": 0.5713614225387573, "eval_runtime": 42.9276, "eval_samples_per_second": 90.035, "eval_steps_per_second": 11.275, "step": 1480 }, { "epoch": 0.77, "learning_rate": 0.00012291774443869632, "loss": 0.6429, "step": 1490 }, { "epoch": 0.78, "learning_rate": 0.0001224004138644594, "loss": 0.66, "step": 1500 }, { "epoch": 0.78, "learning_rate": 0.00012188308329022247, "loss": 0.6809, "step": 1510 }, { "epoch": 0.79, "learning_rate": 0.00012136575271598552, "loss": 0.534, "step": 1520 }, { "epoch": 0.79, "eval_accuracy": 0.8005174644243208, "eval_loss": 0.5555915236473083, "eval_runtime": 43.3619, "eval_samples_per_second": 89.134, "eval_steps_per_second": 11.162, "step": 1520 }, { "epoch": 0.79, "learning_rate": 0.00012084842214174858, "loss": 0.5991, "step": 1530 }, { "epoch": 0.8, "learning_rate": 0.00012033109156751165, "loss": 0.5939, "step": 1540 }, { "epoch": 0.8, "learning_rate": 0.0001198137609932747, "loss": 0.5722, "step": 1550 }, { "epoch": 0.81, "learning_rate": 0.00011929643041903778, "loss": 0.5295, "step": 1560 }, { "epoch": 0.81, "eval_accuracy": 0.789391979301423, "eval_loss": 0.5779016017913818, "eval_runtime": 43.1951, "eval_samples_per_second": 89.478, "eval_steps_per_second": 11.205, "step": 1560 }, { "epoch": 0.81, "learning_rate": 0.00011877909984480083, "loss": 0.5853, "step": 1570 }, { "epoch": 0.82, "learning_rate": 0.00011826176927056389, "loss": 0.5291, "step": 1580 }, { "epoch": 0.82, "learning_rate": 0.00011774443869632696, "loss": 0.7245, "step": 1590 }, { "epoch": 0.83, "learning_rate": 0.00011722710812209001, "loss": 0.523, "step": 1600 }, { "epoch": 0.83, "eval_accuracy": 0.7836998706338939, "eval_loss": 0.5925487875938416, "eval_runtime": 43.5083, "eval_samples_per_second": 88.834, "eval_steps_per_second": 11.124, "step": 1600 }, { "epoch": 0.83, "learning_rate": 0.00011670977754785309, "loss": 0.5309, "step": 1610 }, { "epoch": 0.84, "learning_rate": 0.00011619244697361615, "loss": 0.6577, "step": 1620 }, { "epoch": 0.84, "learning_rate": 0.0001156751163993792, "loss": 0.7373, "step": 1630 }, { "epoch": 0.85, "learning_rate": 0.00011515778582514227, "loss": 0.6749, "step": 1640 }, { "epoch": 0.85, "eval_accuracy": 0.7904269081500647, "eval_loss": 0.5738394856452942, "eval_runtime": 43.1735, "eval_samples_per_second": 89.522, "eval_steps_per_second": 11.211, "step": 1640 }, { "epoch": 0.85, "learning_rate": 0.00011464045525090535, "loss": 0.4776, "step": 1650 }, { "epoch": 0.86, "learning_rate": 0.0001141231246766684, "loss": 0.4709, "step": 1660 }, { "epoch": 0.86, "learning_rate": 0.00011360579410243146, "loss": 0.6755, "step": 1670 }, { "epoch": 0.87, "learning_rate": 0.00011308846352819453, "loss": 0.6328, "step": 1680 }, { "epoch": 0.87, "eval_accuracy": 0.7875808538163002, "eval_loss": 0.5803186893463135, "eval_runtime": 43.3376, "eval_samples_per_second": 89.184, "eval_steps_per_second": 11.168, "step": 1680 }, { "epoch": 0.87, "learning_rate": 0.00011257113295395758, "loss": 0.579, "step": 1690 }, { "epoch": 0.88, "learning_rate": 0.00011205380237972066, "loss": 0.7426, "step": 1700 }, { "epoch": 0.88, "learning_rate": 0.0001115364718054837, "loss": 0.5371, "step": 1710 }, { "epoch": 0.89, "learning_rate": 0.00011101914123124677, "loss": 0.5914, "step": 1720 }, { "epoch": 0.89, "eval_accuracy": 0.7953428201811126, "eval_loss": 0.5584585666656494, "eval_runtime": 43.5623, "eval_samples_per_second": 88.723, "eval_steps_per_second": 11.111, "step": 1720 }, { "epoch": 0.89, "learning_rate": 0.00011050181065700984, "loss": 0.643, "step": 1730 }, { "epoch": 0.9, "learning_rate": 0.00010998448008277289, "loss": 0.6047, "step": 1740 }, { "epoch": 0.91, "learning_rate": 0.00010946714950853596, "loss": 0.5757, "step": 1750 }, { "epoch": 0.91, "learning_rate": 0.00010894981893429903, "loss": 0.578, "step": 1760 }, { "epoch": 0.91, "eval_accuracy": 0.8005174644243208, "eval_loss": 0.5448063015937805, "eval_runtime": 43.3799, "eval_samples_per_second": 89.096, "eval_steps_per_second": 11.157, "step": 1760 }, { "epoch": 0.92, "learning_rate": 0.00010843248836006207, "loss": 0.4588, "step": 1770 }, { "epoch": 0.92, "learning_rate": 0.00010791515778582515, "loss": 0.5981, "step": 1780 }, { "epoch": 0.93, "learning_rate": 0.00010739782721158822, "loss": 0.667, "step": 1790 }, { "epoch": 0.93, "learning_rate": 0.00010688049663735127, "loss": 0.4411, "step": 1800 }, { "epoch": 0.93, "eval_accuracy": 0.8227684346701164, "eval_loss": 0.5116038918495178, "eval_runtime": 42.7036, "eval_samples_per_second": 90.508, "eval_steps_per_second": 11.334, "step": 1800 }, { "epoch": 0.94, "learning_rate": 0.00010636316606311433, "loss": 0.5035, "step": 1810 }, { "epoch": 0.94, "learning_rate": 0.00010584583548887738, "loss": 0.5763, "step": 1820 }, { "epoch": 0.95, "learning_rate": 0.00010532850491464046, "loss": 0.5425, "step": 1830 }, { "epoch": 0.95, "learning_rate": 0.00010481117434040353, "loss": 0.5106, "step": 1840 }, { "epoch": 0.95, "eval_accuracy": 0.8147477360931435, "eval_loss": 0.5113465785980225, "eval_runtime": 42.6403, "eval_samples_per_second": 90.642, "eval_steps_per_second": 11.351, "step": 1840 }, { "epoch": 0.96, "learning_rate": 0.00010429384376616658, "loss": 0.5484, "step": 1850 }, { "epoch": 0.96, "learning_rate": 0.00010377651319192964, "loss": 0.5009, "step": 1860 }, { "epoch": 0.97, "learning_rate": 0.00010325918261769272, "loss": 0.6042, "step": 1870 }, { "epoch": 0.97, "learning_rate": 0.00010274185204345577, "loss": 0.5546, "step": 1880 }, { "epoch": 0.97, "eval_accuracy": 0.8103492884864165, "eval_loss": 0.5038859248161316, "eval_runtime": 42.5887, "eval_samples_per_second": 90.752, "eval_steps_per_second": 11.365, "step": 1880 }, { "epoch": 0.98, "learning_rate": 0.00010222452146921884, "loss": 0.5231, "step": 1890 }, { "epoch": 0.98, "learning_rate": 0.0001017071908949819, "loss": 0.5845, "step": 1900 }, { "epoch": 0.99, "learning_rate": 0.00010118986032074495, "loss": 0.4398, "step": 1910 }, { "epoch": 0.99, "learning_rate": 0.00010067252974650803, "loss": 0.608, "step": 1920 }, { "epoch": 0.99, "eval_accuracy": 0.8142302716688228, "eval_loss": 0.5111123323440552, "eval_runtime": 42.6045, "eval_samples_per_second": 90.718, "eval_steps_per_second": 11.36, "step": 1920 }, { "epoch": 1.0, "learning_rate": 0.0001001551991722711, "loss": 0.4246, "step": 1930 }, { "epoch": 1.0, "learning_rate": 9.963786859803415e-05, "loss": 0.4871, "step": 1940 }, { "epoch": 1.01, "learning_rate": 9.912053802379721e-05, "loss": 0.4021, "step": 1950 }, { "epoch": 1.01, "learning_rate": 9.860320744956027e-05, "loss": 0.4014, "step": 1960 }, { "epoch": 1.01, "eval_accuracy": 0.8147477360931435, "eval_loss": 0.5171140432357788, "eval_runtime": 42.2155, "eval_samples_per_second": 91.554, "eval_steps_per_second": 11.465, "step": 1960 }, { "epoch": 1.02, "learning_rate": 9.808587687532334e-05, "loss": 0.3512, "step": 1970 }, { "epoch": 1.02, "learning_rate": 9.75685463010864e-05, "loss": 0.4632, "step": 1980 }, { "epoch": 1.03, "learning_rate": 9.705121572684946e-05, "loss": 0.4426, "step": 1990 }, { "epoch": 1.03, "learning_rate": 9.653388515261252e-05, "loss": 0.3698, "step": 2000 }, { "epoch": 1.03, "eval_accuracy": 0.8098318240620958, "eval_loss": 0.5332066416740417, "eval_runtime": 42.367, "eval_samples_per_second": 91.227, "eval_steps_per_second": 11.424, "step": 2000 }, { "epoch": 1.04, "learning_rate": 9.601655457837558e-05, "loss": 0.3427, "step": 2010 }, { "epoch": 1.05, "learning_rate": 9.549922400413866e-05, "loss": 0.3389, "step": 2020 }, { "epoch": 1.05, "learning_rate": 9.498189342990172e-05, "loss": 0.2373, "step": 2030 }, { "epoch": 1.06, "learning_rate": 9.446456285566477e-05, "loss": 0.3809, "step": 2040 }, { "epoch": 1.06, "eval_accuracy": 0.8062095730918499, "eval_loss": 0.5469871759414673, "eval_runtime": 42.1927, "eval_samples_per_second": 91.604, "eval_steps_per_second": 11.471, "step": 2040 }, { "epoch": 1.06, "learning_rate": 9.394723228142783e-05, "loss": 0.3754, "step": 2050 }, { "epoch": 1.07, "learning_rate": 9.34299017071909e-05, "loss": 0.4235, "step": 2060 }, { "epoch": 1.07, "learning_rate": 9.291257113295397e-05, "loss": 0.38, "step": 2070 }, { "epoch": 1.08, "learning_rate": 9.239524055871703e-05, "loss": 0.3148, "step": 2080 }, { "epoch": 1.08, "eval_accuracy": 0.8028460543337645, "eval_loss": 0.5701329708099365, "eval_runtime": 42.3816, "eval_samples_per_second": 91.195, "eval_steps_per_second": 11.42, "step": 2080 }, { "epoch": 1.08, "learning_rate": 9.187790998448008e-05, "loss": 0.4024, "step": 2090 }, { "epoch": 1.09, "learning_rate": 9.136057941024315e-05, "loss": 0.4395, "step": 2100 }, { "epoch": 1.09, "learning_rate": 9.084324883600621e-05, "loss": 0.3876, "step": 2110 }, { "epoch": 1.1, "learning_rate": 9.032591826176928e-05, "loss": 0.343, "step": 2120 }, { "epoch": 1.1, "eval_accuracy": 0.8209573091849935, "eval_loss": 0.4977104961872101, "eval_runtime": 42.4591, "eval_samples_per_second": 91.029, "eval_steps_per_second": 11.399, "step": 2120 }, { "epoch": 1.1, "learning_rate": 8.980858768753234e-05, "loss": 0.4519, "step": 2130 }, { "epoch": 1.11, "learning_rate": 8.92912571132954e-05, "loss": 0.3997, "step": 2140 }, { "epoch": 1.11, "learning_rate": 8.877392653905846e-05, "loss": 0.2541, "step": 2150 }, { "epoch": 1.12, "learning_rate": 8.825659596482152e-05, "loss": 0.3902, "step": 2160 }, { "epoch": 1.12, "eval_accuracy": 0.8206985769728331, "eval_loss": 0.5099577903747559, "eval_runtime": 42.2629, "eval_samples_per_second": 91.451, "eval_steps_per_second": 11.452, "step": 2160 }, { "epoch": 1.12, "learning_rate": 8.773926539058458e-05, "loss": 0.3226, "step": 2170 }, { "epoch": 1.13, "learning_rate": 8.722193481634765e-05, "loss": 0.3409, "step": 2180 }, { "epoch": 1.13, "learning_rate": 8.670460424211071e-05, "loss": 0.4179, "step": 2190 }, { "epoch": 1.14, "learning_rate": 8.618727366787378e-05, "loss": 0.4167, "step": 2200 }, { "epoch": 1.14, "eval_accuracy": 0.8175937904269082, "eval_loss": 0.5081688761711121, "eval_runtime": 42.3344, "eval_samples_per_second": 91.297, "eval_steps_per_second": 11.433, "step": 2200 }, { "epoch": 1.14, "learning_rate": 8.566994309363684e-05, "loss": 0.4263, "step": 2210 }, { "epoch": 1.15, "learning_rate": 8.515261251939989e-05, "loss": 0.3126, "step": 2220 }, { "epoch": 1.15, "learning_rate": 8.463528194516295e-05, "loss": 0.4524, "step": 2230 }, { "epoch": 1.16, "learning_rate": 8.411795137092603e-05, "loss": 0.5353, "step": 2240 }, { "epoch": 1.16, "eval_accuracy": 0.8282018111254851, "eval_loss": 0.4856567680835724, "eval_runtime": 42.6599, "eval_samples_per_second": 90.6, "eval_steps_per_second": 11.346, "step": 2240 }, { "epoch": 1.16, "learning_rate": 8.360062079668909e-05, "loss": 0.4858, "step": 2250 }, { "epoch": 1.17, "learning_rate": 8.308329022245215e-05, "loss": 0.5258, "step": 2260 }, { "epoch": 1.17, "learning_rate": 8.256595964821522e-05, "loss": 0.3878, "step": 2270 }, { "epoch": 1.18, "learning_rate": 8.204862907397828e-05, "loss": 0.3638, "step": 2280 }, { "epoch": 1.18, "eval_accuracy": 0.8196636481241915, "eval_loss": 0.496245414018631, "eval_runtime": 42.5167, "eval_samples_per_second": 90.905, "eval_steps_per_second": 11.384, "step": 2280 }, { "epoch": 1.18, "learning_rate": 8.153129849974134e-05, "loss": 0.4835, "step": 2290 }, { "epoch": 1.19, "learning_rate": 8.10139679255044e-05, "loss": 0.3377, "step": 2300 }, { "epoch": 1.2, "learning_rate": 8.049663735126746e-05, "loss": 0.4418, "step": 2310 }, { "epoch": 1.2, "learning_rate": 7.997930677703052e-05, "loss": 0.3683, "step": 2320 }, { "epoch": 1.2, "eval_accuracy": 0.823803363518758, "eval_loss": 0.5005962252616882, "eval_runtime": 42.9491, "eval_samples_per_second": 89.99, "eval_steps_per_second": 11.269, "step": 2320 }, { "epoch": 1.21, "learning_rate": 7.946197620279359e-05, "loss": 0.3083, "step": 2330 }, { "epoch": 1.21, "learning_rate": 7.894464562855665e-05, "loss": 0.471, "step": 2340 }, { "epoch": 1.22, "learning_rate": 7.842731505431972e-05, "loss": 0.4098, "step": 2350 }, { "epoch": 1.22, "learning_rate": 7.790998448008277e-05, "loss": 0.4013, "step": 2360 }, { "epoch": 1.22, "eval_accuracy": 0.8302716688227685, "eval_loss": 0.4766274690628052, "eval_runtime": 42.6182, "eval_samples_per_second": 90.689, "eval_steps_per_second": 11.357, "step": 2360 }, { "epoch": 1.23, "learning_rate": 7.739265390584583e-05, "loss": 0.3378, "step": 2370 }, { "epoch": 1.23, "learning_rate": 7.687532333160891e-05, "loss": 0.2741, "step": 2380 }, { "epoch": 1.24, "learning_rate": 7.635799275737197e-05, "loss": 0.2806, "step": 2390 }, { "epoch": 1.24, "learning_rate": 7.584066218313503e-05, "loss": 0.2147, "step": 2400 }, { "epoch": 1.24, "eval_accuracy": 0.8315653298835705, "eval_loss": 0.4876723885536194, "eval_runtime": 42.6227, "eval_samples_per_second": 90.679, "eval_steps_per_second": 11.355, "step": 2400 }, { "epoch": 1.25, "learning_rate": 7.532333160889808e-05, "loss": 0.3557, "step": 2410 }, { "epoch": 1.25, "learning_rate": 7.480600103466116e-05, "loss": 0.3691, "step": 2420 }, { "epoch": 1.26, "learning_rate": 7.428867046042422e-05, "loss": 0.3896, "step": 2430 }, { "epoch": 1.26, "learning_rate": 7.377133988618728e-05, "loss": 0.3973, "step": 2440 }, { "epoch": 1.26, "eval_accuracy": 0.8385510996119017, "eval_loss": 0.4723876118659973, "eval_runtime": 42.708, "eval_samples_per_second": 90.498, "eval_steps_per_second": 11.333, "step": 2440 }, { "epoch": 1.27, "learning_rate": 7.325400931195034e-05, "loss": 0.2486, "step": 2450 }, { "epoch": 1.27, "learning_rate": 7.27366787377134e-05, "loss": 0.3107, "step": 2460 }, { "epoch": 1.28, "learning_rate": 7.221934816347646e-05, "loss": 0.4066, "step": 2470 }, { "epoch": 1.28, "learning_rate": 7.170201758923953e-05, "loss": 0.3876, "step": 2480 }, { "epoch": 1.28, "eval_accuracy": 0.84372574385511, "eval_loss": 0.4504094421863556, "eval_runtime": 42.6319, "eval_samples_per_second": 90.66, "eval_steps_per_second": 11.353, "step": 2480 }, { "epoch": 1.29, "learning_rate": 7.118468701500259e-05, "loss": 0.314, "step": 2490 }, { "epoch": 1.29, "learning_rate": 7.066735644076565e-05, "loss": 0.3722, "step": 2500 }, { "epoch": 1.3, "learning_rate": 7.015002586652871e-05, "loss": 0.2722, "step": 2510 }, { "epoch": 1.3, "learning_rate": 6.963269529229179e-05, "loss": 0.2998, "step": 2520 }, { "epoch": 1.3, "eval_accuracy": 0.8499353169469599, "eval_loss": 0.43631112575531006, "eval_runtime": 42.596, "eval_samples_per_second": 90.736, "eval_steps_per_second": 11.363, "step": 2520 }, { "epoch": 1.31, "learning_rate": 6.911536471805485e-05, "loss": 0.2214, "step": 2530 }, { "epoch": 1.31, "learning_rate": 6.85980341438179e-05, "loss": 0.3157, "step": 2540 }, { "epoch": 1.32, "learning_rate": 6.808070356958096e-05, "loss": 0.4113, "step": 2550 }, { "epoch": 1.32, "learning_rate": 6.756337299534403e-05, "loss": 0.3621, "step": 2560 }, { "epoch": 1.32, "eval_accuracy": 0.8476067270375162, "eval_loss": 0.44939151406288147, "eval_runtime": 42.5462, "eval_samples_per_second": 90.842, "eval_steps_per_second": 11.376, "step": 2560 }, { "epoch": 1.33, "learning_rate": 6.70460424211071e-05, "loss": 0.3491, "step": 2570 }, { "epoch": 1.33, "learning_rate": 6.652871184687016e-05, "loss": 0.3335, "step": 2580 }, { "epoch": 1.34, "learning_rate": 6.60113812726332e-05, "loss": 0.3568, "step": 2590 }, { "epoch": 1.35, "learning_rate": 6.549405069839628e-05, "loss": 0.3128, "step": 2600 }, { "epoch": 1.35, "eval_accuracy": 0.8478654592496766, "eval_loss": 0.43925172090530396, "eval_runtime": 42.5607, "eval_samples_per_second": 90.811, "eval_steps_per_second": 11.372, "step": 2600 }, { "epoch": 1.35, "learning_rate": 6.497672012415934e-05, "loss": 0.3334, "step": 2610 }, { "epoch": 1.36, "learning_rate": 6.44593895499224e-05, "loss": 0.3584, "step": 2620 }, { "epoch": 1.36, "learning_rate": 6.394205897568547e-05, "loss": 0.2895, "step": 2630 }, { "epoch": 1.37, "learning_rate": 6.342472840144853e-05, "loss": 0.3283, "step": 2640 }, { "epoch": 1.37, "eval_accuracy": 0.8473479948253557, "eval_loss": 0.44278526306152344, "eval_runtime": 42.7932, "eval_samples_per_second": 90.318, "eval_steps_per_second": 11.31, "step": 2640 }, { "epoch": 1.37, "learning_rate": 6.290739782721159e-05, "loss": 0.4039, "step": 2650 }, { "epoch": 1.38, "learning_rate": 6.239006725297465e-05, "loss": 0.3649, "step": 2660 }, { "epoch": 1.38, "learning_rate": 6.187273667873773e-05, "loss": 0.2828, "step": 2670 }, { "epoch": 1.39, "learning_rate": 6.135540610450077e-05, "loss": 0.4072, "step": 2680 }, { "epoch": 1.39, "eval_accuracy": 0.8455368693402329, "eval_loss": 0.4490886330604553, "eval_runtime": 42.8656, "eval_samples_per_second": 90.166, "eval_steps_per_second": 11.291, "step": 2680 }, { "epoch": 1.39, "learning_rate": 6.0838075530263836e-05, "loss": 0.3216, "step": 2690 }, { "epoch": 1.4, "learning_rate": 6.032074495602691e-05, "loss": 0.3065, "step": 2700 }, { "epoch": 1.4, "learning_rate": 5.9803414381789966e-05, "loss": 0.2062, "step": 2710 }, { "epoch": 1.41, "learning_rate": 5.928608380755303e-05, "loss": 0.2698, "step": 2720 }, { "epoch": 1.41, "eval_accuracy": 0.8525226390685641, "eval_loss": 0.41426753997802734, "eval_runtime": 42.9118, "eval_samples_per_second": 90.068, "eval_steps_per_second": 11.279, "step": 2720 }, { "epoch": 1.41, "learning_rate": 5.876875323331609e-05, "loss": 0.2975, "step": 2730 }, { "epoch": 1.42, "learning_rate": 5.825142265907916e-05, "loss": 0.2441, "step": 2740 }, { "epoch": 1.42, "learning_rate": 5.773409208484222e-05, "loss": 0.3095, "step": 2750 }, { "epoch": 1.43, "learning_rate": 5.7216761510605275e-05, "loss": 0.2922, "step": 2760 }, { "epoch": 1.43, "eval_accuracy": 0.8514877102199224, "eval_loss": 0.4335246980190277, "eval_runtime": 42.5699, "eval_samples_per_second": 90.792, "eval_steps_per_second": 11.37, "step": 2760 }, { "epoch": 1.43, "learning_rate": 5.669943093636835e-05, "loss": 0.3733, "step": 2770 }, { "epoch": 1.44, "learning_rate": 5.6182100362131405e-05, "loss": 0.2772, "step": 2780 }, { "epoch": 1.44, "learning_rate": 5.566476978789447e-05, "loss": 0.2602, "step": 2790 }, { "epoch": 1.45, "learning_rate": 5.514743921365753e-05, "loss": 0.3662, "step": 2800 }, { "epoch": 1.45, "eval_accuracy": 0.8613195342820181, "eval_loss": 0.41216862201690674, "eval_runtime": 42.677, "eval_samples_per_second": 90.564, "eval_steps_per_second": 11.341, "step": 2800 }, { "epoch": 1.45, "learning_rate": 5.46301086394206e-05, "loss": 0.3444, "step": 2810 }, { "epoch": 1.46, "learning_rate": 5.411277806518366e-05, "loss": 0.2477, "step": 2820 }, { "epoch": 1.46, "learning_rate": 5.3595447490946714e-05, "loss": 0.242, "step": 2830 }, { "epoch": 1.47, "learning_rate": 5.3078116916709776e-05, "loss": 0.3607, "step": 2840 }, { "epoch": 1.47, "eval_accuracy": 0.8633893919793014, "eval_loss": 0.40597081184387207, "eval_runtime": 42.553, "eval_samples_per_second": 90.828, "eval_steps_per_second": 11.374, "step": 2840 }, { "epoch": 1.47, "learning_rate": 5.2560786342472844e-05, "loss": 0.3836, "step": 2850 }, { "epoch": 1.48, "learning_rate": 5.2043455768235906e-05, "loss": 0.2116, "step": 2860 }, { "epoch": 1.48, "learning_rate": 5.152612519399897e-05, "loss": 0.3695, "step": 2870 }, { "epoch": 1.49, "learning_rate": 5.1008794619762036e-05, "loss": 0.2488, "step": 2880 }, { "epoch": 1.49, "eval_accuracy": 0.8633893919793014, "eval_loss": 0.4011004567146301, "eval_runtime": 42.6493, "eval_samples_per_second": 90.623, "eval_steps_per_second": 11.348, "step": 2880 }, { "epoch": 1.5, "learning_rate": 5.049146404552509e-05, "loss": 0.2717, "step": 2890 }, { "epoch": 1.5, "learning_rate": 4.997413347128815e-05, "loss": 0.3791, "step": 2900 }, { "epoch": 1.51, "learning_rate": 4.945680289705122e-05, "loss": 0.4196, "step": 2910 }, { "epoch": 1.51, "learning_rate": 4.8939472322814276e-05, "loss": 0.3733, "step": 2920 }, { "epoch": 1.51, "eval_accuracy": 0.8566623544631307, "eval_loss": 0.4146381914615631, "eval_runtime": 42.9227, "eval_samples_per_second": 90.045, "eval_steps_per_second": 11.276, "step": 2920 }, { "epoch": 1.52, "learning_rate": 4.8422141748577345e-05, "loss": 0.3268, "step": 2930 }, { "epoch": 1.52, "learning_rate": 4.7904811174340407e-05, "loss": 0.2866, "step": 2940 }, { "epoch": 1.53, "learning_rate": 4.738748060010347e-05, "loss": 0.2063, "step": 2950 }, { "epoch": 1.53, "learning_rate": 4.687015002586653e-05, "loss": 0.3388, "step": 2960 }, { "epoch": 1.53, "eval_accuracy": 0.8657179818887452, "eval_loss": 0.4066773056983948, "eval_runtime": 42.7293, "eval_samples_per_second": 90.453, "eval_steps_per_second": 11.327, "step": 2960 }, { "epoch": 1.54, "learning_rate": 4.635281945162959e-05, "loss": 0.2401, "step": 2970 }, { "epoch": 1.54, "learning_rate": 4.583548887739266e-05, "loss": 0.3411, "step": 2980 }, { "epoch": 1.55, "learning_rate": 4.5318158303155715e-05, "loss": 0.2449, "step": 2990 }, { "epoch": 1.55, "learning_rate": 4.4800827728918784e-05, "loss": 0.3176, "step": 3000 }, { "epoch": 1.55, "eval_accuracy": 0.8654592496765847, "eval_loss": 0.3914910852909088, "eval_runtime": 42.6139, "eval_samples_per_second": 90.698, "eval_steps_per_second": 11.358, "step": 3000 }, { "epoch": 1.56, "learning_rate": 4.428349715468184e-05, "loss": 0.3395, "step": 3010 }, { "epoch": 1.56, "learning_rate": 4.376616658044491e-05, "loss": 0.2695, "step": 3020 }, { "epoch": 1.57, "learning_rate": 4.324883600620797e-05, "loss": 0.2911, "step": 3030 }, { "epoch": 1.57, "learning_rate": 4.273150543197103e-05, "loss": 0.3989, "step": 3040 }, { "epoch": 1.57, "eval_accuracy": 0.8690815006468305, "eval_loss": 0.37924402952194214, "eval_runtime": 42.79, "eval_samples_per_second": 90.325, "eval_steps_per_second": 11.311, "step": 3040 }, { "epoch": 1.58, "learning_rate": 4.221417485773409e-05, "loss": 0.1723, "step": 3050 }, { "epoch": 1.58, "learning_rate": 4.1696844283497154e-05, "loss": 0.1942, "step": 3060 }, { "epoch": 1.59, "learning_rate": 4.117951370926022e-05, "loss": 0.359, "step": 3070 }, { "epoch": 1.59, "learning_rate": 4.066218313502328e-05, "loss": 0.2519, "step": 3080 }, { "epoch": 1.59, "eval_accuracy": 0.8734799482535576, "eval_loss": 0.3759077787399292, "eval_runtime": 42.6891, "eval_samples_per_second": 90.538, "eval_steps_per_second": 11.338, "step": 3080 }, { "epoch": 1.6, "learning_rate": 4.0144852560786346e-05, "loss": 0.2928, "step": 3090 }, { "epoch": 1.6, "learning_rate": 3.962752198654941e-05, "loss": 0.2605, "step": 3100 }, { "epoch": 1.61, "learning_rate": 3.911019141231247e-05, "loss": 0.2225, "step": 3110 }, { "epoch": 1.61, "learning_rate": 3.859286083807553e-05, "loss": 0.241, "step": 3120 }, { "epoch": 1.61, "eval_accuracy": 0.8716688227684347, "eval_loss": 0.38423553109169006, "eval_runtime": 42.5605, "eval_samples_per_second": 90.812, "eval_steps_per_second": 11.372, "step": 3120 }, { "epoch": 1.62, "learning_rate": 3.807553026383859e-05, "loss": 0.3516, "step": 3130 }, { "epoch": 1.62, "learning_rate": 3.755819968960166e-05, "loss": 0.2819, "step": 3140 }, { "epoch": 1.63, "learning_rate": 3.704086911536472e-05, "loss": 0.2807, "step": 3150 }, { "epoch": 1.63, "learning_rate": 3.6523538541127785e-05, "loss": 0.2908, "step": 3160 }, { "epoch": 1.63, "eval_accuracy": 0.8698576972833117, "eval_loss": 0.37994685769081116, "eval_runtime": 42.6371, "eval_samples_per_second": 90.649, "eval_steps_per_second": 11.352, "step": 3160 }, { "epoch": 1.64, "learning_rate": 3.600620796689084e-05, "loss": 0.2328, "step": 3170 }, { "epoch": 1.65, "learning_rate": 3.548887739265391e-05, "loss": 0.1905, "step": 3180 }, { "epoch": 1.65, "learning_rate": 3.497154681841697e-05, "loss": 0.3018, "step": 3190 }, { "epoch": 1.66, "learning_rate": 3.445421624418003e-05, "loss": 0.2793, "step": 3200 }, { "epoch": 1.66, "eval_accuracy": 0.8659767141009056, "eval_loss": 0.38844797015190125, "eval_runtime": 42.5682, "eval_samples_per_second": 90.795, "eval_steps_per_second": 11.37, "step": 3200 }, { "epoch": 1.66, "learning_rate": 3.3936885669943094e-05, "loss": 0.2598, "step": 3210 }, { "epoch": 1.67, "learning_rate": 3.3419555095706156e-05, "loss": 0.289, "step": 3220 }, { "epoch": 1.67, "learning_rate": 3.2902224521469224e-05, "loss": 0.3378, "step": 3230 }, { "epoch": 1.68, "learning_rate": 3.238489394723228e-05, "loss": 0.2196, "step": 3240 }, { "epoch": 1.68, "eval_accuracy": 0.8747736093143597, "eval_loss": 0.3740740716457367, "eval_runtime": 42.8776, "eval_samples_per_second": 90.14, "eval_steps_per_second": 11.288, "step": 3240 }, { "epoch": 1.68, "learning_rate": 3.186756337299535e-05, "loss": 0.3452, "step": 3250 }, { "epoch": 1.69, "learning_rate": 3.13502327987584e-05, "loss": 0.3273, "step": 3260 }, { "epoch": 1.69, "learning_rate": 3.083290222452147e-05, "loss": 0.3743, "step": 3270 }, { "epoch": 1.7, "learning_rate": 3.0315571650284536e-05, "loss": 0.2614, "step": 3280 }, { "epoch": 1.7, "eval_accuracy": 0.8727037516170764, "eval_loss": 0.3755486309528351, "eval_runtime": 42.6899, "eval_samples_per_second": 90.537, "eval_steps_per_second": 11.338, "step": 3280 }, { "epoch": 1.7, "learning_rate": 2.9798241076047595e-05, "loss": 0.2846, "step": 3290 }, { "epoch": 1.71, "learning_rate": 2.928091050181066e-05, "loss": 0.199, "step": 3300 }, { "epoch": 1.71, "learning_rate": 2.8763579927573718e-05, "loss": 0.2398, "step": 3310 }, { "epoch": 1.72, "learning_rate": 2.8246249353336783e-05, "loss": 0.1883, "step": 3320 }, { "epoch": 1.72, "eval_accuracy": 0.8727037516170764, "eval_loss": 0.3819185793399811, "eval_runtime": 42.9048, "eval_samples_per_second": 90.083, "eval_steps_per_second": 11.281, "step": 3320 }, { "epoch": 1.72, "learning_rate": 2.7728918779099845e-05, "loss": 0.3646, "step": 3330 }, { "epoch": 1.73, "learning_rate": 2.721158820486291e-05, "loss": 0.2332, "step": 3340 }, { "epoch": 1.73, "learning_rate": 2.6694257630625975e-05, "loss": 0.3035, "step": 3350 }, { "epoch": 1.74, "learning_rate": 2.6176927056389034e-05, "loss": 0.247, "step": 3360 }, { "epoch": 1.74, "eval_accuracy": 0.8776196636481242, "eval_loss": 0.36190494894981384, "eval_runtime": 43.0956, "eval_samples_per_second": 89.684, "eval_steps_per_second": 11.231, "step": 3360 }, { "epoch": 1.74, "learning_rate": 2.56595964821521e-05, "loss": 0.2436, "step": 3370 }, { "epoch": 1.75, "learning_rate": 2.5142265907915157e-05, "loss": 0.305, "step": 3380 }, { "epoch": 1.75, "learning_rate": 2.4624935333678222e-05, "loss": 0.2783, "step": 3390 }, { "epoch": 1.76, "learning_rate": 2.4107604759441284e-05, "loss": 0.1617, "step": 3400 }, { "epoch": 1.76, "eval_accuracy": 0.8760672703751617, "eval_loss": 0.3629147410392761, "eval_runtime": 42.8946, "eval_samples_per_second": 90.105, "eval_steps_per_second": 11.283, "step": 3400 }, { "epoch": 1.76, "learning_rate": 2.3590274185204346e-05, "loss": 0.2749, "step": 3410 }, { "epoch": 1.77, "learning_rate": 2.307294361096741e-05, "loss": 0.3613, "step": 3420 }, { "epoch": 1.77, "learning_rate": 2.2555613036730473e-05, "loss": 0.2276, "step": 3430 }, { "epoch": 1.78, "learning_rate": 2.2038282462493534e-05, "loss": 0.2177, "step": 3440 }, { "epoch": 1.78, "eval_accuracy": 0.8846054333764554, "eval_loss": 0.3531200587749481, "eval_runtime": 42.8358, "eval_samples_per_second": 90.228, "eval_steps_per_second": 11.299, "step": 3440 }, { "epoch": 1.78, "learning_rate": 2.1520951888256596e-05, "loss": 0.378, "step": 3450 }, { "epoch": 1.79, "learning_rate": 2.1003621314019658e-05, "loss": 0.1863, "step": 3460 }, { "epoch": 1.8, "learning_rate": 2.048629073978272e-05, "loss": 0.2241, "step": 3470 }, { "epoch": 1.8, "learning_rate": 1.9968960165545785e-05, "loss": 0.265, "step": 3480 }, { "epoch": 1.8, "eval_accuracy": 0.8799482535575679, "eval_loss": 0.3503970801830292, "eval_runtime": 42.617, "eval_samples_per_second": 90.692, "eval_steps_per_second": 11.357, "step": 3480 }, { "epoch": 1.81, "learning_rate": 1.9451629591308847e-05, "loss": 0.3363, "step": 3490 }, { "epoch": 1.81, "learning_rate": 1.893429901707191e-05, "loss": 0.2832, "step": 3500 }, { "epoch": 1.82, "learning_rate": 1.8416968442834973e-05, "loss": 0.2654, "step": 3510 }, { "epoch": 1.82, "learning_rate": 1.7899637868598035e-05, "loss": 0.176, "step": 3520 }, { "epoch": 1.82, "eval_accuracy": 0.888745148771022, "eval_loss": 0.34128451347351074, "eval_runtime": 42.6366, "eval_samples_per_second": 90.65, "eval_steps_per_second": 11.352, "step": 3520 }, { "epoch": 1.83, "learning_rate": 1.7382307294361097e-05, "loss": 0.2136, "step": 3530 }, { "epoch": 1.83, "learning_rate": 1.686497672012416e-05, "loss": 0.2482, "step": 3540 }, { "epoch": 1.84, "learning_rate": 1.634764614588722e-05, "loss": 0.2034, "step": 3550 }, { "epoch": 1.84, "learning_rate": 1.5830315571650286e-05, "loss": 0.1942, "step": 3560 }, { "epoch": 1.84, "eval_accuracy": 0.8892626131953428, "eval_loss": 0.3416860103607178, "eval_runtime": 42.741, "eval_samples_per_second": 90.428, "eval_steps_per_second": 11.324, "step": 3560 }, { "epoch": 1.85, "learning_rate": 1.5312984997413347e-05, "loss": 0.1961, "step": 3570 }, { "epoch": 1.85, "learning_rate": 1.479565442317641e-05, "loss": 0.1949, "step": 3580 }, { "epoch": 1.86, "learning_rate": 1.4278323848939472e-05, "loss": 0.2767, "step": 3590 }, { "epoch": 1.86, "learning_rate": 1.3760993274702536e-05, "loss": 0.2977, "step": 3600 }, { "epoch": 1.86, "eval_accuracy": 0.8864165588615782, "eval_loss": 0.34559765458106995, "eval_runtime": 42.8519, "eval_samples_per_second": 90.194, "eval_steps_per_second": 11.295, "step": 3600 }, { "epoch": 1.87, "learning_rate": 1.3243662700465598e-05, "loss": 0.2409, "step": 3610 }, { "epoch": 1.87, "learning_rate": 1.272633212622866e-05, "loss": 0.2807, "step": 3620 }, { "epoch": 1.88, "learning_rate": 1.2209001551991723e-05, "loss": 0.2191, "step": 3630 }, { "epoch": 1.88, "learning_rate": 1.1691670977754786e-05, "loss": 0.1658, "step": 3640 }, { "epoch": 1.88, "eval_accuracy": 0.8882276843467012, "eval_loss": 0.3382810950279236, "eval_runtime": 42.9231, "eval_samples_per_second": 90.045, "eval_steps_per_second": 11.276, "step": 3640 }, { "epoch": 1.89, "learning_rate": 1.1174340403517848e-05, "loss": 0.1498, "step": 3650 }, { "epoch": 1.89, "learning_rate": 1.065700982928091e-05, "loss": 0.2529, "step": 3660 }, { "epoch": 1.9, "learning_rate": 1.0139679255043973e-05, "loss": 0.1997, "step": 3670 }, { "epoch": 1.9, "learning_rate": 9.622348680807037e-06, "loss": 0.2904, "step": 3680 }, { "epoch": 1.9, "eval_accuracy": 0.891849935316947, "eval_loss": 0.33569639921188354, "eval_runtime": 42.8384, "eval_samples_per_second": 90.223, "eval_steps_per_second": 11.298, "step": 3680 }, { "epoch": 1.91, "learning_rate": 9.105018106570098e-06, "loss": 0.2314, "step": 3690 }, { "epoch": 1.91, "learning_rate": 8.58768753233316e-06, "loss": 0.3056, "step": 3700 }, { "epoch": 1.92, "learning_rate": 8.070356958096224e-06, "loss": 0.1688, "step": 3710 }, { "epoch": 1.92, "learning_rate": 7.553026383859286e-06, "loss": 0.2423, "step": 3720 }, { "epoch": 1.92, "eval_accuracy": 0.8944372574385511, "eval_loss": 0.32754141092300415, "eval_runtime": 42.7436, "eval_samples_per_second": 90.423, "eval_steps_per_second": 11.323, "step": 3720 }, { "epoch": 1.93, "learning_rate": 7.035695809622349e-06, "loss": 0.1978, "step": 3730 }, { "epoch": 1.93, "learning_rate": 6.518365235385411e-06, "loss": 0.2156, "step": 3740 }, { "epoch": 1.94, "learning_rate": 6.001034661148474e-06, "loss": 0.1649, "step": 3750 }, { "epoch": 1.95, "learning_rate": 5.4837040869115365e-06, "loss": 0.263, "step": 3760 }, { "epoch": 1.95, "eval_accuracy": 0.8921086675291073, "eval_loss": 0.3273804187774658, "eval_runtime": 42.7684, "eval_samples_per_second": 90.37, "eval_steps_per_second": 11.317, "step": 3760 }, { "epoch": 1.95, "learning_rate": 4.966373512674599e-06, "loss": 0.3802, "step": 3770 }, { "epoch": 1.96, "learning_rate": 4.449042938437662e-06, "loss": 0.2816, "step": 3780 }, { "epoch": 1.96, "learning_rate": 3.931712364200724e-06, "loss": 0.2565, "step": 3790 }, { "epoch": 1.97, "learning_rate": 3.4143817899637873e-06, "loss": 0.2458, "step": 3800 }, { "epoch": 1.97, "eval_accuracy": 0.8923673997412678, "eval_loss": 0.3263191878795624, "eval_runtime": 42.9857, "eval_samples_per_second": 89.914, "eval_steps_per_second": 11.26, "step": 3800 }, { "epoch": 1.97, "learning_rate": 2.8970512157268495e-06, "loss": 0.3755, "step": 3810 }, { "epoch": 1.98, "learning_rate": 2.379720641489912e-06, "loss": 0.3133, "step": 3820 }, { "epoch": 1.98, "learning_rate": 1.8623900672529747e-06, "loss": 0.2631, "step": 3830 }, { "epoch": 1.99, "learning_rate": 1.3450594930160373e-06, "loss": 0.227, "step": 3840 }, { "epoch": 1.99, "eval_accuracy": 0.8923673997412678, "eval_loss": 0.3248412013053894, "eval_runtime": 42.9211, "eval_samples_per_second": 90.049, "eval_steps_per_second": 11.276, "step": 3840 } ], "max_steps": 3866, "num_train_epochs": 2, "total_flos": 4.760709076383676e+18, "trial_name": null, "trial_params": null }