{ "best_metric": 0.9227471218737594, "best_model_checkpoint": "outputs/whisper-small-keyword-spotting-m-agv-grabo/checkpoint-723", "epoch": 5.0, "eval_steps": 500, "global_step": 3615, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.7624309392265193e-05, "loss": 0.6646, "step": 10 }, { "epoch": 0.03, "learning_rate": 5.524861878453039e-05, "loss": 0.5982, "step": 20 }, { "epoch": 0.04, "learning_rate": 8.287292817679558e-05, "loss": 0.5497, "step": 30 }, { "epoch": 0.06, "learning_rate": 0.00011049723756906077, "loss": 0.5489, "step": 40 }, { "epoch": 0.07, "learning_rate": 0.00013812154696132598, "loss": 0.5144, "step": 50 }, { "epoch": 0.08, "learning_rate": 0.00016574585635359117, "loss": 0.5082, "step": 60 }, { "epoch": 0.1, "learning_rate": 0.00019337016574585636, "loss": 0.4936, "step": 70 }, { "epoch": 0.11, "learning_rate": 0.00022099447513812155, "loss": 0.4818, "step": 80 }, { "epoch": 0.12, "learning_rate": 0.00024861878453038676, "loss": 0.4708, "step": 90 }, { "epoch": 0.14, "learning_rate": 0.00027624309392265195, "loss": 0.4238, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.00030386740331491714, "loss": 0.4179, "step": 110 }, { "epoch": 0.17, "learning_rate": 0.00033149171270718233, "loss": 0.4103, "step": 120 }, { "epoch": 0.18, "learning_rate": 0.0003591160220994475, "loss": 0.3913, "step": 130 }, { "epoch": 0.19, "learning_rate": 0.0003867403314917127, "loss": 0.3914, "step": 140 }, { "epoch": 0.21, "learning_rate": 0.0004143646408839779, "loss": 0.3795, "step": 150 }, { "epoch": 0.22, "learning_rate": 0.0004419889502762431, "loss": 0.3642, "step": 160 }, { "epoch": 0.24, "learning_rate": 0.00046961325966850834, "loss": 0.3802, "step": 170 }, { "epoch": 0.25, "learning_rate": 0.0004972375690607735, "loss": 0.3794, "step": 180 }, { "epoch": 0.26, "learning_rate": 0.0005248618784530387, "loss": 0.3375, "step": 190 }, { "epoch": 0.28, "learning_rate": 0.0005524861878453039, "loss": 0.3635, "step": 200 }, { "epoch": 0.29, "learning_rate": 0.000580110497237569, "loss": 0.3237, "step": 210 }, { "epoch": 0.3, "learning_rate": 0.0006077348066298343, "loss": 0.326, "step": 220 }, { "epoch": 0.32, "learning_rate": 0.0006353591160220995, "loss": 0.3118, "step": 230 }, { "epoch": 0.33, "learning_rate": 0.0006629834254143647, "loss": 0.3553, "step": 240 }, { "epoch": 0.35, "learning_rate": 0.0006906077348066299, "loss": 0.2944, "step": 250 }, { "epoch": 0.36, "learning_rate": 0.000718232044198895, "loss": 0.2763, "step": 260 }, { "epoch": 0.37, "learning_rate": 0.0007458563535911602, "loss": 0.3624, "step": 270 }, { "epoch": 0.39, "learning_rate": 0.0007734806629834254, "loss": 0.2961, "step": 280 }, { "epoch": 0.4, "learning_rate": 0.0008011049723756906, "loss": 0.3019, "step": 290 }, { "epoch": 0.41, "learning_rate": 0.0008287292817679558, "loss": 0.3244, "step": 300 }, { "epoch": 0.43, "learning_rate": 0.0008563535911602211, "loss": 0.2954, "step": 310 }, { "epoch": 0.44, "learning_rate": 0.0008839779005524862, "loss": 0.3259, "step": 320 }, { "epoch": 0.46, "learning_rate": 0.0009116022099447514, "loss": 0.2811, "step": 330 }, { "epoch": 0.47, "learning_rate": 0.0009392265193370167, "loss": 0.315, "step": 340 }, { "epoch": 0.48, "learning_rate": 0.0009668508287292818, "loss": 0.2881, "step": 350 }, { "epoch": 0.5, "learning_rate": 0.000994475138121547, "loss": 0.3164, "step": 360 }, { "epoch": 0.51, "learning_rate": 0.0009975407316323394, "loss": 0.2444, "step": 370 }, { "epoch": 0.53, "learning_rate": 0.0009944666461727636, "loss": 0.2568, "step": 380 }, { "epoch": 0.54, "learning_rate": 0.000991392560713188, "loss": 0.2413, "step": 390 }, { "epoch": 0.55, "learning_rate": 0.000988318475253612, "loss": 0.3132, "step": 400 }, { "epoch": 0.57, "learning_rate": 0.0009852443897940364, "loss": 0.3086, "step": 410 }, { "epoch": 0.58, "learning_rate": 0.0009821703043344605, "loss": 0.2726, "step": 420 }, { "epoch": 0.59, "learning_rate": 0.0009790962188748848, "loss": 0.2549, "step": 430 }, { "epoch": 0.61, "learning_rate": 0.000976022133415309, "loss": 0.2724, "step": 440 }, { "epoch": 0.62, "learning_rate": 0.0009729480479557332, "loss": 0.2591, "step": 450 }, { "epoch": 0.64, "learning_rate": 0.0009698739624961574, "loss": 0.2414, "step": 460 }, { "epoch": 0.65, "learning_rate": 0.0009667998770365817, "loss": 0.2387, "step": 470 }, { "epoch": 0.66, "learning_rate": 0.0009637257915770059, "loss": 0.2447, "step": 480 }, { "epoch": 0.68, "learning_rate": 0.0009606517061174301, "loss": 0.2283, "step": 490 }, { "epoch": 0.69, "learning_rate": 0.0009575776206578543, "loss": 0.2752, "step": 500 }, { "epoch": 0.71, "learning_rate": 0.0009545035351982785, "loss": 0.2889, "step": 510 }, { "epoch": 0.72, "learning_rate": 0.0009514294497387027, "loss": 0.2681, "step": 520 }, { "epoch": 0.73, "learning_rate": 0.000948355364279127, "loss": 0.2498, "step": 530 }, { "epoch": 0.75, "learning_rate": 0.0009452812788195512, "loss": 0.2686, "step": 540 }, { "epoch": 0.76, "learning_rate": 0.0009422071933599754, "loss": 0.2308, "step": 550 }, { "epoch": 0.77, "learning_rate": 0.0009391331079003996, "loss": 0.2405, "step": 560 }, { "epoch": 0.79, "learning_rate": 0.0009360590224408239, "loss": 0.2557, "step": 570 }, { "epoch": 0.8, "learning_rate": 0.0009329849369812481, "loss": 0.4182, "step": 580 }, { "epoch": 0.82, "learning_rate": 0.0009299108515216723, "loss": 0.2541, "step": 590 }, { "epoch": 0.83, "learning_rate": 0.0009268367660620966, "loss": 0.2168, "step": 600 }, { "epoch": 0.84, "learning_rate": 0.0009237626806025208, "loss": 0.2416, "step": 610 }, { "epoch": 0.86, "learning_rate": 0.000920688595142945, "loss": 0.2436, "step": 620 }, { "epoch": 0.87, "learning_rate": 0.0009176145096833693, "loss": 0.245, "step": 630 }, { "epoch": 0.89, "learning_rate": 0.0009145404242237935, "loss": 0.2471, "step": 640 }, { "epoch": 0.9, "learning_rate": 0.0009114663387642177, "loss": 0.2252, "step": 650 }, { "epoch": 0.91, "learning_rate": 0.000908392253304642, "loss": 0.2087, "step": 660 }, { "epoch": 0.93, "learning_rate": 0.0009053181678450662, "loss": 0.2288, "step": 670 }, { "epoch": 0.94, "learning_rate": 0.0009022440823854904, "loss": 0.258, "step": 680 }, { "epoch": 0.95, "learning_rate": 0.0008991699969259147, "loss": 0.2467, "step": 690 }, { "epoch": 0.97, "learning_rate": 0.0008960959114663388, "loss": 0.2665, "step": 700 }, { "epoch": 0.98, "learning_rate": 0.0008930218260067629, "loss": 0.2265, "step": 710 }, { "epoch": 1.0, "learning_rate": 0.0008899477405471871, "loss": 0.2841, "step": 720 }, { "epoch": 1.0, "eval_accuracy": 0.9227471218737594, "eval_loss": 0.16996660828590393, "eval_runtime": 475.1508, "eval_samples_per_second": 26.507, "eval_steps_per_second": 0.829, "step": 723 }, { "epoch": 1.01, "learning_rate": 0.0008868736550876114, "loss": 0.2341, "step": 730 }, { "epoch": 1.02, "learning_rate": 0.0008837995696280356, "loss": 0.1977, "step": 740 }, { "epoch": 1.04, "learning_rate": 0.0008807254841684598, "loss": 0.2085, "step": 750 }, { "epoch": 1.05, "learning_rate": 0.0008776513987088841, "loss": 0.2595, "step": 760 }, { "epoch": 1.07, "learning_rate": 0.0008745773132493083, "loss": 0.2888, "step": 770 }, { "epoch": 1.08, "learning_rate": 0.0008715032277897325, "loss": 0.1983, "step": 780 }, { "epoch": 1.09, "learning_rate": 0.0008684291423301568, "loss": 0.2042, "step": 790 }, { "epoch": 1.11, "learning_rate": 0.000865355056870581, "loss": 0.2073, "step": 800 }, { "epoch": 1.12, "learning_rate": 0.0008622809714110052, "loss": 0.2405, "step": 810 }, { "epoch": 1.13, "learning_rate": 0.0008592068859514295, "loss": 0.1793, "step": 820 }, { "epoch": 1.15, "learning_rate": 0.0008561328004918537, "loss": 0.1951, "step": 830 }, { "epoch": 1.16, "learning_rate": 0.0008530587150322779, "loss": 0.209, "step": 840 }, { "epoch": 1.18, "learning_rate": 0.0008499846295727022, "loss": 0.2235, "step": 850 }, { "epoch": 1.19, "learning_rate": 0.0008469105441131264, "loss": 0.2534, "step": 860 }, { "epoch": 1.2, "learning_rate": 0.0008438364586535506, "loss": 0.2315, "step": 870 }, { "epoch": 1.22, "learning_rate": 0.0008407623731939749, "loss": 0.3287, "step": 880 }, { "epoch": 1.23, "learning_rate": 0.000837688287734399, "loss": 0.1967, "step": 890 }, { "epoch": 1.24, "learning_rate": 0.0008346142022748232, "loss": 0.1835, "step": 900 }, { "epoch": 1.26, "learning_rate": 0.0008315401168152474, "loss": 0.2297, "step": 910 }, { "epoch": 1.27, "learning_rate": 0.0008284660313556717, "loss": 0.2412, "step": 920 }, { "epoch": 1.29, "learning_rate": 0.0008253919458960959, "loss": 0.2503, "step": 930 }, { "epoch": 1.3, "learning_rate": 0.0008223178604365201, "loss": 0.2083, "step": 940 }, { "epoch": 1.31, "learning_rate": 0.0008192437749769444, "loss": 0.1994, "step": 950 }, { "epoch": 1.33, "learning_rate": 0.0008161696895173686, "loss": 0.1757, "step": 960 }, { "epoch": 1.34, "learning_rate": 0.0008130956040577928, "loss": 0.214, "step": 970 }, { "epoch": 1.36, "learning_rate": 0.0008100215185982171, "loss": 0.2483, "step": 980 }, { "epoch": 1.37, "learning_rate": 0.0008069474331386413, "loss": 0.2136, "step": 990 }, { "epoch": 1.38, "learning_rate": 0.0008038733476790655, "loss": 0.2111, "step": 1000 }, { "epoch": 1.4, "learning_rate": 0.0008007992622194898, "loss": 0.1751, "step": 1010 }, { "epoch": 1.41, "learning_rate": 0.000797725176759914, "loss": 0.2262, "step": 1020 }, { "epoch": 1.42, "learning_rate": 0.0007946510913003382, "loss": 0.2485, "step": 1030 }, { "epoch": 1.44, "learning_rate": 0.0007915770058407625, "loss": 0.2275, "step": 1040 }, { "epoch": 1.45, "learning_rate": 0.0007885029203811867, "loss": 0.2771, "step": 1050 }, { "epoch": 1.47, "learning_rate": 0.0007854288349216109, "loss": 0.2777, "step": 1060 }, { "epoch": 1.48, "learning_rate": 0.0007823547494620352, "loss": 0.2614, "step": 1070 }, { "epoch": 1.49, "learning_rate": 0.0007792806640024594, "loss": 0.2189, "step": 1080 }, { "epoch": 1.51, "learning_rate": 0.0007762065785428835, "loss": 0.1682, "step": 1090 }, { "epoch": 1.52, "learning_rate": 0.0007731324930833076, "loss": 0.2051, "step": 1100 }, { "epoch": 1.54, "learning_rate": 0.0007700584076237319, "loss": 0.2981, "step": 1110 }, { "epoch": 1.55, "learning_rate": 0.0007669843221641561, "loss": 0.2469, "step": 1120 }, { "epoch": 1.56, "learning_rate": 0.0007639102367045803, "loss": 0.2232, "step": 1130 }, { "epoch": 1.58, "learning_rate": 0.0007608361512450046, "loss": 0.2312, "step": 1140 }, { "epoch": 1.59, "learning_rate": 0.0007577620657854288, "loss": 0.2055, "step": 1150 }, { "epoch": 1.6, "learning_rate": 0.000754687980325853, "loss": 0.1844, "step": 1160 }, { "epoch": 1.62, "learning_rate": 0.0007516138948662773, "loss": 0.185, "step": 1170 }, { "epoch": 1.63, "learning_rate": 0.0007485398094067015, "loss": 0.2394, "step": 1180 }, { "epoch": 1.65, "learning_rate": 0.0007454657239471257, "loss": 0.2908, "step": 1190 }, { "epoch": 1.66, "learning_rate": 0.00074239163848755, "loss": 0.2708, "step": 1200 }, { "epoch": 1.67, "learning_rate": 0.0007393175530279742, "loss": 0.1798, "step": 1210 }, { "epoch": 1.69, "learning_rate": 0.0007362434675683984, "loss": 0.1992, "step": 1220 }, { "epoch": 1.7, "learning_rate": 0.0007331693821088227, "loss": 0.2794, "step": 1230 }, { "epoch": 1.72, "learning_rate": 0.0007300952966492469, "loss": 0.2765, "step": 1240 }, { "epoch": 1.73, "learning_rate": 0.0007270212111896711, "loss": 0.2008, "step": 1250 }, { "epoch": 1.74, "learning_rate": 0.0007239471257300953, "loss": 0.1814, "step": 1260 }, { "epoch": 1.76, "learning_rate": 0.0007208730402705196, "loss": 0.1971, "step": 1270 }, { "epoch": 1.77, "learning_rate": 0.0007177989548109437, "loss": 0.179, "step": 1280 }, { "epoch": 1.78, "learning_rate": 0.0007147248693513679, "loss": 0.2237, "step": 1290 }, { "epoch": 1.8, "learning_rate": 0.0007116507838917922, "loss": 0.1961, "step": 1300 }, { "epoch": 1.81, "learning_rate": 0.0007085766984322164, "loss": 0.2523, "step": 1310 }, { "epoch": 1.83, "learning_rate": 0.0007055026129726406, "loss": 0.2659, "step": 1320 }, { "epoch": 1.84, "learning_rate": 0.0007024285275130649, "loss": 0.2138, "step": 1330 }, { "epoch": 1.85, "learning_rate": 0.0006993544420534891, "loss": 0.1813, "step": 1340 }, { "epoch": 1.87, "learning_rate": 0.0006962803565939133, "loss": 0.1657, "step": 1350 }, { "epoch": 1.88, "learning_rate": 0.0006932062711343376, "loss": 0.2167, "step": 1360 }, { "epoch": 1.89, "learning_rate": 0.0006901321856747618, "loss": 0.2037, "step": 1370 }, { "epoch": 1.91, "learning_rate": 0.000687058100215186, "loss": 0.1702, "step": 1380 }, { "epoch": 1.92, "learning_rate": 0.0006839840147556103, "loss": 0.1792, "step": 1390 }, { "epoch": 1.94, "learning_rate": 0.0006809099292960345, "loss": 0.2097, "step": 1400 }, { "epoch": 1.95, "learning_rate": 0.0006778358438364587, "loss": 0.2015, "step": 1410 }, { "epoch": 1.96, "learning_rate": 0.000674761758376883, "loss": 0.1708, "step": 1420 }, { "epoch": 1.98, "learning_rate": 0.0006716876729173072, "loss": 0.1777, "step": 1430 }, { "epoch": 1.99, "learning_rate": 0.0006686135874577314, "loss": 0.1826, "step": 1440 }, { "epoch": 2.0, "eval_accuracy": 0.9147280666931322, "eval_loss": 0.21437880396842957, "eval_runtime": 474.1279, "eval_samples_per_second": 26.565, "eval_steps_per_second": 0.831, "step": 1446 }, { "epoch": 2.01, "learning_rate": 0.0006655395019981556, "loss": 0.1741, "step": 1450 }, { "epoch": 2.02, "learning_rate": 0.0006624654165385799, "loss": 0.1686, "step": 1460 }, { "epoch": 2.03, "learning_rate": 0.0006593913310790041, "loss": 0.2035, "step": 1470 }, { "epoch": 2.05, "learning_rate": 0.0006563172456194282, "loss": 0.2041, "step": 1480 }, { "epoch": 2.06, "learning_rate": 0.0006532431601598524, "loss": 0.179, "step": 1490 }, { "epoch": 2.07, "learning_rate": 0.0006501690747002766, "loss": 0.2548, "step": 1500 }, { "epoch": 2.09, "learning_rate": 0.0006470949892407008, "loss": 0.1814, "step": 1510 }, { "epoch": 2.1, "learning_rate": 0.0006440209037811251, "loss": 0.2271, "step": 1520 }, { "epoch": 2.12, "learning_rate": 0.0006409468183215493, "loss": 0.1752, "step": 1530 }, { "epoch": 2.13, "learning_rate": 0.0006378727328619735, "loss": 0.1917, "step": 1540 }, { "epoch": 2.14, "learning_rate": 0.0006347986474023978, "loss": 0.1763, "step": 1550 }, { "epoch": 2.16, "learning_rate": 0.000631724561942822, "loss": 0.2002, "step": 1560 }, { "epoch": 2.17, "learning_rate": 0.0006286504764832462, "loss": 0.214, "step": 1570 }, { "epoch": 2.19, "learning_rate": 0.0006255763910236704, "loss": 0.1708, "step": 1580 }, { "epoch": 2.2, "learning_rate": 0.0006225023055640947, "loss": 0.1559, "step": 1590 }, { "epoch": 2.21, "learning_rate": 0.0006194282201045189, "loss": 0.1594, "step": 1600 }, { "epoch": 2.23, "learning_rate": 0.0006163541346449431, "loss": 0.1601, "step": 1610 }, { "epoch": 2.24, "learning_rate": 0.0006132800491853674, "loss": 0.1779, "step": 1620 }, { "epoch": 2.25, "learning_rate": 0.0006102059637257916, "loss": 0.1746, "step": 1630 }, { "epoch": 2.27, "learning_rate": 0.0006071318782662158, "loss": 0.2524, "step": 1640 }, { "epoch": 2.28, "learning_rate": 0.0006040577928066401, "loss": 0.2179, "step": 1650 }, { "epoch": 2.3, "learning_rate": 0.0006009837073470643, "loss": 0.1713, "step": 1660 }, { "epoch": 2.31, "learning_rate": 0.0005979096218874884, "loss": 0.1803, "step": 1670 }, { "epoch": 2.32, "learning_rate": 0.0005948355364279127, "loss": 0.189, "step": 1680 }, { "epoch": 2.34, "learning_rate": 0.0005917614509683369, "loss": 0.1712, "step": 1690 }, { "epoch": 2.35, "learning_rate": 0.0005886873655087611, "loss": 0.1769, "step": 1700 }, { "epoch": 2.37, "learning_rate": 0.0005856132800491854, "loss": 0.1795, "step": 1710 }, { "epoch": 2.38, "learning_rate": 0.0005825391945896096, "loss": 0.1535, "step": 1720 }, { "epoch": 2.39, "learning_rate": 0.0005794651091300338, "loss": 0.1562, "step": 1730 }, { "epoch": 2.41, "learning_rate": 0.0005763910236704581, "loss": 0.2128, "step": 1740 }, { "epoch": 2.42, "learning_rate": 0.0005733169382108823, "loss": 0.1707, "step": 1750 }, { "epoch": 2.43, "learning_rate": 0.0005702428527513065, "loss": 0.1792, "step": 1760 }, { "epoch": 2.45, "learning_rate": 0.0005671687672917308, "loss": 0.1504, "step": 1770 }, { "epoch": 2.46, "learning_rate": 0.000564094681832155, "loss": 0.1598, "step": 1780 }, { "epoch": 2.48, "learning_rate": 0.0005610205963725792, "loss": 0.1737, "step": 1790 }, { "epoch": 2.49, "learning_rate": 0.0005579465109130034, "loss": 0.2276, "step": 1800 }, { "epoch": 2.5, "learning_rate": 0.0005548724254534277, "loss": 0.2368, "step": 1810 }, { "epoch": 2.52, "learning_rate": 0.0005517983399938519, "loss": 0.2196, "step": 1820 }, { "epoch": 2.53, "learning_rate": 0.0005487242545342761, "loss": 0.2351, "step": 1830 }, { "epoch": 2.54, "learning_rate": 0.0005456501690747004, "loss": 0.198, "step": 1840 }, { "epoch": 2.56, "learning_rate": 0.0005425760836151246, "loss": 0.1952, "step": 1850 }, { "epoch": 2.57, "learning_rate": 0.0005395019981555488, "loss": 0.1659, "step": 1860 }, { "epoch": 2.59, "learning_rate": 0.0005364279126959729, "loss": 0.1596, "step": 1870 }, { "epoch": 2.6, "learning_rate": 0.0005333538272363971, "loss": 0.1564, "step": 1880 }, { "epoch": 2.61, "learning_rate": 0.0005302797417768213, "loss": 0.2239, "step": 1890 }, { "epoch": 2.63, "learning_rate": 0.0005272056563172456, "loss": 0.1639, "step": 1900 }, { "epoch": 2.64, "learning_rate": 0.0005241315708576698, "loss": 0.1504, "step": 1910 }, { "epoch": 2.66, "learning_rate": 0.000521057485398094, "loss": 0.1506, "step": 1920 }, { "epoch": 2.67, "learning_rate": 0.0005179833999385182, "loss": 0.154, "step": 1930 }, { "epoch": 2.68, "learning_rate": 0.0005149093144789425, "loss": 0.2018, "step": 1940 }, { "epoch": 2.7, "learning_rate": 0.0005118352290193667, "loss": 0.1539, "step": 1950 }, { "epoch": 2.71, "learning_rate": 0.0005087611435597909, "loss": 0.1614, "step": 1960 }, { "epoch": 2.72, "learning_rate": 0.0005056870581002152, "loss": 0.1625, "step": 1970 }, { "epoch": 2.74, "learning_rate": 0.0005026129726406394, "loss": 0.1542, "step": 1980 }, { "epoch": 2.75, "learning_rate": 0.0004995388871810636, "loss": 0.1852, "step": 1990 }, { "epoch": 2.77, "learning_rate": 0.0004964648017214879, "loss": 0.183, "step": 2000 }, { "epoch": 2.78, "learning_rate": 0.0004933907162619121, "loss": 0.1372, "step": 2010 }, { "epoch": 2.79, "learning_rate": 0.0004903166308023363, "loss": 0.1798, "step": 2020 }, { "epoch": 2.81, "learning_rate": 0.0004872425453427605, "loss": 0.1427, "step": 2030 }, { "epoch": 2.82, "learning_rate": 0.00048416845988318475, "loss": 0.1477, "step": 2040 }, { "epoch": 2.84, "learning_rate": 0.000481094374423609, "loss": 0.1675, "step": 2050 }, { "epoch": 2.85, "learning_rate": 0.0004780202889640332, "loss": 0.1769, "step": 2060 }, { "epoch": 2.86, "learning_rate": 0.000475253612050415, "loss": 0.2386, "step": 2070 }, { "epoch": 2.88, "learning_rate": 0.0004721795265908392, "loss": 0.1842, "step": 2080 }, { "epoch": 2.89, "learning_rate": 0.00046910544113126345, "loss": 0.1855, "step": 2090 }, { "epoch": 2.9, "learning_rate": 0.0004660313556716877, "loss": 0.1855, "step": 2100 }, { "epoch": 2.92, "learning_rate": 0.0004629572702121119, "loss": 0.1694, "step": 2110 }, { "epoch": 2.93, "learning_rate": 0.00045988318475253615, "loss": 0.1774, "step": 2120 }, { "epoch": 2.95, "learning_rate": 0.0004568090992929603, "loss": 0.1704, "step": 2130 }, { "epoch": 2.96, "learning_rate": 0.00045373501383338456, "loss": 0.1604, "step": 2140 }, { "epoch": 2.97, "learning_rate": 0.0004506609283738088, "loss": 0.1807, "step": 2150 }, { "epoch": 2.99, "learning_rate": 0.000447586842914233, "loss": 0.2425, "step": 2160 }, { "epoch": 3.0, "eval_accuracy": 0.914410480349345, "eval_loss": 0.19619111716747284, "eval_runtime": 474.4554, "eval_samples_per_second": 26.546, "eval_steps_per_second": 0.83, "step": 2169 }, { "epoch": 3.0, "learning_rate": 0.00044451275745465725, "loss": 0.1888, "step": 2170 }, { "epoch": 3.02, "learning_rate": 0.0004414386719950815, "loss": 0.171, "step": 2180 }, { "epoch": 3.03, "learning_rate": 0.0004383645865355057, "loss": 0.1572, "step": 2190 }, { "epoch": 3.04, "learning_rate": 0.00043529050107592995, "loss": 0.1851, "step": 2200 }, { "epoch": 3.06, "learning_rate": 0.0004322164156163542, "loss": 0.1688, "step": 2210 }, { "epoch": 3.07, "learning_rate": 0.0004291423301567784, "loss": 0.2044, "step": 2220 }, { "epoch": 3.08, "learning_rate": 0.0004260682446972026, "loss": 0.1637, "step": 2230 }, { "epoch": 3.1, "learning_rate": 0.00042299415923762677, "loss": 0.1795, "step": 2240 }, { "epoch": 3.11, "learning_rate": 0.000419920073778051, "loss": 0.2163, "step": 2250 }, { "epoch": 3.13, "learning_rate": 0.00041684598831847523, "loss": 0.2959, "step": 2260 }, { "epoch": 3.14, "learning_rate": 0.00041377190285889947, "loss": 0.1883, "step": 2270 }, { "epoch": 3.15, "learning_rate": 0.0004106978173993237, "loss": 0.1615, "step": 2280 }, { "epoch": 3.17, "learning_rate": 0.00040762373193974793, "loss": 0.1716, "step": 2290 }, { "epoch": 3.18, "learning_rate": 0.00040454964648017216, "loss": 0.1548, "step": 2300 }, { "epoch": 3.2, "learning_rate": 0.0004014755610205964, "loss": 0.1689, "step": 2310 }, { "epoch": 3.21, "learning_rate": 0.0003984014755610206, "loss": 0.1307, "step": 2320 }, { "epoch": 3.22, "learning_rate": 0.0003953273901014448, "loss": 0.171, "step": 2330 }, { "epoch": 3.24, "learning_rate": 0.00039225330464186904, "loss": 0.1799, "step": 2340 }, { "epoch": 3.25, "learning_rate": 0.00038917921918229327, "loss": 0.1637, "step": 2350 }, { "epoch": 3.26, "learning_rate": 0.0003861051337227175, "loss": 0.1673, "step": 2360 }, { "epoch": 3.28, "learning_rate": 0.00038303104826314173, "loss": 0.1606, "step": 2370 }, { "epoch": 3.29, "learning_rate": 0.00037995696280356596, "loss": 0.1588, "step": 2380 }, { "epoch": 3.31, "learning_rate": 0.0003768828773439902, "loss": 0.1895, "step": 2390 }, { "epoch": 3.32, "learning_rate": 0.00037380879188441443, "loss": 0.1782, "step": 2400 }, { "epoch": 3.33, "learning_rate": 0.00037073470642483866, "loss": 0.1842, "step": 2410 }, { "epoch": 3.35, "learning_rate": 0.00036766062096526284, "loss": 0.138, "step": 2420 }, { "epoch": 3.36, "learning_rate": 0.00036458653550568707, "loss": 0.1522, "step": 2430 }, { "epoch": 3.37, "learning_rate": 0.00036151245004611125, "loss": 0.1433, "step": 2440 }, { "epoch": 3.39, "learning_rate": 0.0003584383645865355, "loss": 0.1681, "step": 2450 }, { "epoch": 3.4, "learning_rate": 0.0003553642791269597, "loss": 0.1797, "step": 2460 }, { "epoch": 3.42, "learning_rate": 0.00035229019366738395, "loss": 0.1606, "step": 2470 }, { "epoch": 3.43, "learning_rate": 0.0003492161082078082, "loss": 0.1424, "step": 2480 }, { "epoch": 3.44, "learning_rate": 0.0003461420227482324, "loss": 0.1439, "step": 2490 }, { "epoch": 3.46, "learning_rate": 0.00034306793728865664, "loss": 0.1623, "step": 2500 }, { "epoch": 3.47, "learning_rate": 0.0003399938518290809, "loss": 0.1482, "step": 2510 }, { "epoch": 3.49, "learning_rate": 0.00033691976636950505, "loss": 0.1742, "step": 2520 }, { "epoch": 3.5, "learning_rate": 0.0003338456809099293, "loss": 0.1851, "step": 2530 }, { "epoch": 3.51, "learning_rate": 0.0003307715954503535, "loss": 0.1319, "step": 2540 }, { "epoch": 3.53, "learning_rate": 0.00032769750999077775, "loss": 0.1445, "step": 2550 }, { "epoch": 3.54, "learning_rate": 0.000324623424531202, "loss": 0.1687, "step": 2560 }, { "epoch": 3.55, "learning_rate": 0.0003215493390716262, "loss": 0.1547, "step": 2570 }, { "epoch": 3.57, "learning_rate": 0.00031847525361205044, "loss": 0.1399, "step": 2580 }, { "epoch": 3.58, "learning_rate": 0.0003154011681524747, "loss": 0.1409, "step": 2590 }, { "epoch": 3.6, "learning_rate": 0.0003123270826928989, "loss": 0.1422, "step": 2600 }, { "epoch": 3.61, "learning_rate": 0.00030925299723332314, "loss": 0.1663, "step": 2610 }, { "epoch": 3.62, "learning_rate": 0.0003061789117737473, "loss": 0.1772, "step": 2620 }, { "epoch": 3.64, "learning_rate": 0.0003031048263141715, "loss": 0.1479, "step": 2630 }, { "epoch": 3.65, "learning_rate": 0.00030003074085459573, "loss": 0.2015, "step": 2640 }, { "epoch": 3.67, "learning_rate": 0.00029695665539501996, "loss": 0.1665, "step": 2650 }, { "epoch": 3.68, "learning_rate": 0.0002938825699354442, "loss": 0.151, "step": 2660 }, { "epoch": 3.69, "learning_rate": 0.0002908084844758684, "loss": 0.1354, "step": 2670 }, { "epoch": 3.71, "learning_rate": 0.00028773439901629266, "loss": 0.1601, "step": 2680 }, { "epoch": 3.72, "learning_rate": 0.0002846603135567169, "loss": 0.1495, "step": 2690 }, { "epoch": 3.73, "learning_rate": 0.0002815862280971411, "loss": 0.1288, "step": 2700 }, { "epoch": 3.75, "learning_rate": 0.00027851214263756535, "loss": 0.1375, "step": 2710 }, { "epoch": 3.76, "learning_rate": 0.00027543805717798953, "loss": 0.1395, "step": 2720 }, { "epoch": 3.78, "learning_rate": 0.00027236397171841376, "loss": 0.1605, "step": 2730 }, { "epoch": 3.79, "learning_rate": 0.000269289886258838, "loss": 0.1627, "step": 2740 }, { "epoch": 3.8, "learning_rate": 0.0002662158007992622, "loss": 0.1511, "step": 2750 }, { "epoch": 3.82, "learning_rate": 0.00026314171533968646, "loss": 0.153, "step": 2760 }, { "epoch": 3.83, "learning_rate": 0.0002600676298801107, "loss": 0.1617, "step": 2770 }, { "epoch": 3.85, "learning_rate": 0.0002569935444205349, "loss": 0.1371, "step": 2780 }, { "epoch": 3.86, "learning_rate": 0.00025391945896095916, "loss": 0.1389, "step": 2790 }, { "epoch": 3.87, "learning_rate": 0.0002508453735013834, "loss": 0.1336, "step": 2800 }, { "epoch": 3.89, "learning_rate": 0.00024777128804180757, "loss": 0.1533, "step": 2810 }, { "epoch": 3.9, "learning_rate": 0.0002446972025822318, "loss": 0.1717, "step": 2820 }, { "epoch": 3.91, "learning_rate": 0.00024162311712265603, "loss": 0.1487, "step": 2830 }, { "epoch": 3.93, "learning_rate": 0.00023854903166308023, "loss": 0.1646, "step": 2840 }, { "epoch": 3.94, "learning_rate": 0.00023547494620350447, "loss": 0.1433, "step": 2850 }, { "epoch": 3.96, "learning_rate": 0.0002324008607439287, "loss": 0.17, "step": 2860 }, { "epoch": 3.97, "learning_rate": 0.00022932677528435293, "loss": 0.1905, "step": 2870 }, { "epoch": 3.98, "learning_rate": 0.00022625268982477714, "loss": 0.1478, "step": 2880 }, { "epoch": 4.0, "learning_rate": 0.00022317860436520134, "loss": 0.164, "step": 2890 }, { "epoch": 4.0, "eval_accuracy": 0.8864628820960698, "eval_loss": 0.3099741041660309, "eval_runtime": 481.6212, "eval_samples_per_second": 26.151, "eval_steps_per_second": 0.818, "step": 2892 }, { "epoch": 4.01, "learning_rate": 0.00022010451890562557, "loss": 0.1351, "step": 2900 }, { "epoch": 4.02, "learning_rate": 0.0002170304334460498, "loss": 0.1478, "step": 2910 }, { "epoch": 4.04, "learning_rate": 0.00021395634798647404, "loss": 0.1262, "step": 2920 }, { "epoch": 4.05, "learning_rate": 0.00021088226252689824, "loss": 0.1532, "step": 2930 }, { "epoch": 4.07, "learning_rate": 0.00020780817706732247, "loss": 0.1281, "step": 2940 }, { "epoch": 4.08, "learning_rate": 0.0002047340916077467, "loss": 0.1589, "step": 2950 }, { "epoch": 4.09, "learning_rate": 0.00020166000614817094, "loss": 0.1528, "step": 2960 }, { "epoch": 4.11, "learning_rate": 0.00019858592068859514, "loss": 0.1503, "step": 2970 }, { "epoch": 4.12, "learning_rate": 0.00019551183522901935, "loss": 0.1493, "step": 2980 }, { "epoch": 4.14, "learning_rate": 0.00019243774976944358, "loss": 0.1585, "step": 2990 }, { "epoch": 4.15, "learning_rate": 0.0001893636643098678, "loss": 0.1414, "step": 3000 }, { "epoch": 4.16, "learning_rate": 0.00018628957885029204, "loss": 0.1394, "step": 3010 }, { "epoch": 4.18, "learning_rate": 0.00018321549339071628, "loss": 0.1633, "step": 3020 }, { "epoch": 4.19, "learning_rate": 0.00018014140793114048, "loss": 0.1323, "step": 3030 }, { "epoch": 4.2, "learning_rate": 0.00017706732247156471, "loss": 0.1531, "step": 3040 }, { "epoch": 4.22, "learning_rate": 0.00017399323701198895, "loss": 0.1726, "step": 3050 }, { "epoch": 4.23, "learning_rate": 0.00017091915155241318, "loss": 0.1552, "step": 3060 }, { "epoch": 4.25, "learning_rate": 0.00016784506609283738, "loss": 0.1761, "step": 3070 }, { "epoch": 4.26, "learning_rate": 0.0001647709806332616, "loss": 0.1269, "step": 3080 }, { "epoch": 4.27, "learning_rate": 0.00016169689517368582, "loss": 0.131, "step": 3090 }, { "epoch": 4.29, "learning_rate": 0.00015862280971411005, "loss": 0.1513, "step": 3100 }, { "epoch": 4.3, "learning_rate": 0.00015554872425453428, "loss": 0.154, "step": 3110 }, { "epoch": 4.32, "learning_rate": 0.00015247463879495852, "loss": 0.1658, "step": 3120 }, { "epoch": 4.33, "learning_rate": 0.00014940055333538272, "loss": 0.1544, "step": 3130 }, { "epoch": 4.34, "learning_rate": 0.00014632646787580695, "loss": 0.1497, "step": 3140 }, { "epoch": 4.36, "learning_rate": 0.00014325238241623119, "loss": 0.1534, "step": 3150 }, { "epoch": 4.37, "learning_rate": 0.00014017829695665542, "loss": 0.1518, "step": 3160 }, { "epoch": 4.38, "learning_rate": 0.00013710421149707962, "loss": 0.1346, "step": 3170 }, { "epoch": 4.4, "learning_rate": 0.00013403012603750383, "loss": 0.1416, "step": 3180 }, { "epoch": 4.41, "learning_rate": 0.00013095604057792806, "loss": 0.1483, "step": 3190 }, { "epoch": 4.43, "learning_rate": 0.0001278819551183523, "loss": 0.1473, "step": 3200 }, { "epoch": 4.44, "learning_rate": 0.0001248078696587765, "loss": 0.1481, "step": 3210 }, { "epoch": 4.45, "learning_rate": 0.00012173378419920074, "loss": 0.1452, "step": 3220 }, { "epoch": 4.47, "learning_rate": 0.00011865969873962497, "loss": 0.1305, "step": 3230 }, { "epoch": 4.48, "learning_rate": 0.00011558561328004918, "loss": 0.1579, "step": 3240 }, { "epoch": 4.5, "learning_rate": 0.00011251152782047341, "loss": 0.1436, "step": 3250 }, { "epoch": 4.51, "learning_rate": 0.00010943744236089763, "loss": 0.1808, "step": 3260 }, { "epoch": 4.52, "learning_rate": 0.00010636335690132186, "loss": 0.1617, "step": 3270 }, { "epoch": 4.54, "learning_rate": 0.0001032892714417461, "loss": 0.1579, "step": 3280 }, { "epoch": 4.55, "learning_rate": 0.0001002151859821703, "loss": 0.1535, "step": 3290 }, { "epoch": 4.56, "learning_rate": 9.714110052259453e-05, "loss": 0.1448, "step": 3300 }, { "epoch": 4.58, "learning_rate": 9.406701506301875e-05, "loss": 0.1381, "step": 3310 }, { "epoch": 4.59, "learning_rate": 9.099292960344298e-05, "loss": 0.1419, "step": 3320 }, { "epoch": 4.61, "learning_rate": 8.791884414386721e-05, "loss": 0.1439, "step": 3330 }, { "epoch": 4.62, "learning_rate": 8.484475868429142e-05, "loss": 0.1354, "step": 3340 }, { "epoch": 4.63, "learning_rate": 8.177067322471565e-05, "loss": 0.1363, "step": 3350 }, { "epoch": 4.65, "learning_rate": 7.869658776513987e-05, "loss": 0.1395, "step": 3360 }, { "epoch": 4.66, "learning_rate": 7.56225023055641e-05, "loss": 0.146, "step": 3370 }, { "epoch": 4.67, "learning_rate": 7.254841684598831e-05, "loss": 0.1472, "step": 3380 }, { "epoch": 4.69, "learning_rate": 6.947433138641254e-05, "loss": 0.1328, "step": 3390 }, { "epoch": 4.7, "learning_rate": 6.640024592683677e-05, "loss": 0.143, "step": 3400 }, { "epoch": 4.72, "learning_rate": 6.332616046726099e-05, "loss": 0.1339, "step": 3410 }, { "epoch": 4.73, "learning_rate": 6.0252075007685215e-05, "loss": 0.1298, "step": 3420 }, { "epoch": 4.74, "learning_rate": 5.717798954810944e-05, "loss": 0.121, "step": 3430 }, { "epoch": 4.76, "learning_rate": 5.410390408853366e-05, "loss": 0.1523, "step": 3440 }, { "epoch": 4.77, "learning_rate": 5.1029818628957884e-05, "loss": 0.147, "step": 3450 }, { "epoch": 4.79, "learning_rate": 4.795573316938211e-05, "loss": 0.1253, "step": 3460 }, { "epoch": 4.8, "learning_rate": 4.4881647709806335e-05, "loss": 0.1386, "step": 3470 }, { "epoch": 4.81, "learning_rate": 4.180756225023056e-05, "loss": 0.1316, "step": 3480 }, { "epoch": 4.83, "learning_rate": 3.873347679065478e-05, "loss": 0.1338, "step": 3490 }, { "epoch": 4.84, "learning_rate": 3.5659391331079004e-05, "loss": 0.1354, "step": 3500 }, { "epoch": 4.85, "learning_rate": 3.258530587150323e-05, "loss": 0.1512, "step": 3510 }, { "epoch": 4.87, "learning_rate": 2.951122041192745e-05, "loss": 0.1288, "step": 3520 }, { "epoch": 4.88, "learning_rate": 2.6437134952351673e-05, "loss": 0.1418, "step": 3530 }, { "epoch": 4.9, "learning_rate": 2.3363049492775902e-05, "loss": 0.122, "step": 3540 }, { "epoch": 4.91, "learning_rate": 2.0288964033200124e-05, "loss": 0.1276, "step": 3550 }, { "epoch": 4.92, "learning_rate": 1.7214878573624346e-05, "loss": 0.1306, "step": 3560 }, { "epoch": 4.94, "learning_rate": 1.4140793114048571e-05, "loss": 0.1367, "step": 3570 }, { "epoch": 4.95, "learning_rate": 1.1066707654472795e-05, "loss": 0.1482, "step": 3580 }, { "epoch": 4.97, "learning_rate": 7.992622194897018e-06, "loss": 0.1459, "step": 3590 }, { "epoch": 4.98, "learning_rate": 4.918536735321242e-06, "loss": 0.1087, "step": 3600 }, { "epoch": 4.99, "learning_rate": 1.8444512757454657e-06, "loss": 0.1525, "step": 3610 }, { "epoch": 5.0, "eval_accuracy": 0.8999603017070266, "eval_loss": 0.2823958098888397, "eval_runtime": 482.9204, "eval_samples_per_second": 26.081, "eval_steps_per_second": 0.816, "step": 3615 }, { "epoch": 5.0, "step": 3615, "total_flos": 5.8101857302104e+19, "train_loss": 0.20637444648505246, "train_runtime": 19742.095, "train_samples_per_second": 23.438, "train_steps_per_second": 0.183 } ], "logging_steps": 10, "max_steps": 3615, "num_train_epochs": 5, "save_steps": 500, "total_flos": 5.8101857302104e+19, "trial_name": null, "trial_params": null }