{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.93538067818298, "global_step": 39000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26, "learning_rate": 2e-05, "loss": 16.6458, "step": 200 }, { "epoch": 0.51, "learning_rate": 4e-05, "loss": 4.0476, "step": 400 }, { "epoch": 0.51, "eval_loss": 3.31887149810791, "eval_runtime": 123.9389, "eval_samples_per_second": 12.28, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.77, "learning_rate": 6e-05, "loss": 3.2819, "step": 600 }, { "epoch": 1.02, "learning_rate": 8e-05, "loss": 3.2548, "step": 800 }, { "epoch": 1.02, "eval_loss": 3.226016044616699, "eval_runtime": 123.3474, "eval_samples_per_second": 12.339, "eval_wer": 1.0, "step": 800 }, { "epoch": 1.28, "learning_rate": 0.0001, "loss": 3.1788, "step": 1000 }, { "epoch": 1.54, "learning_rate": 9.947437582128778e-05, "loss": 2.064, "step": 1200 }, { "epoch": 1.54, "eval_loss": 0.9218049049377441, "eval_runtime": 123.4543, "eval_samples_per_second": 12.328, "eval_wer": 0.8255778742303422, "step": 1200 }, { "epoch": 1.79, "learning_rate": 9.894875164257556e-05, "loss": 0.9611, "step": 1400 }, { "epoch": 2.05, "learning_rate": 9.842312746386335e-05, "loss": 0.741, "step": 1600 }, { "epoch": 2.05, "eval_loss": 0.5120046138763428, "eval_runtime": 124.3691, "eval_samples_per_second": 12.238, "eval_wer": 0.6447966084586656, "step": 1600 }, { "epoch": 2.3, "learning_rate": 9.789750328515113e-05, "loss": 0.6514, "step": 1800 }, { "epoch": 2.56, "learning_rate": 9.73718791064389e-05, "loss": 0.5501, "step": 2000 }, { "epoch": 2.56, "eval_loss": 0.4019804298877716, "eval_runtime": 124.8901, "eval_samples_per_second": 12.187, "eval_wer": 0.5592005652568891, "step": 2000 }, { "epoch": 2.82, "learning_rate": 9.684625492772667e-05, "loss": 0.5241, "step": 2200 }, { "epoch": 3.07, "learning_rate": 9.632063074901446e-05, "loss": 0.4649, "step": 2400 }, { "epoch": 3.07, "eval_loss": 0.3544917106628418, "eval_runtime": 124.4823, "eval_samples_per_second": 12.227, "eval_wer": 0.49167255475926114, "step": 2400 }, { "epoch": 3.33, "learning_rate": 9.579500657030223e-05, "loss": 0.409, "step": 2600 }, { "epoch": 3.58, "learning_rate": 9.526938239159002e-05, "loss": 0.4302, "step": 2800 }, { "epoch": 3.58, "eval_loss": 0.3078452944755554, "eval_runtime": 116.543, "eval_samples_per_second": 13.06, "eval_wer": 0.46431815887756134, "step": 2800 }, { "epoch": 3.84, "learning_rate": 9.47437582128778e-05, "loss": 0.3978, "step": 3000 }, { "epoch": 4.1, "learning_rate": 9.421813403416557e-05, "loss": 0.3713, "step": 3200 }, { "epoch": 4.1, "eval_loss": 0.29381415247917175, "eval_runtime": 125.5692, "eval_samples_per_second": 12.121, "eval_wer": 0.44190976077520944, "step": 3200 }, { "epoch": 4.35, "learning_rate": 9.369250985545336e-05, "loss": 0.326, "step": 3400 }, { "epoch": 4.61, "learning_rate": 9.316688567674113e-05, "loss": 0.3284, "step": 3600 }, { "epoch": 4.61, "eval_loss": 0.27045169472694397, "eval_runtime": 117.0161, "eval_samples_per_second": 13.007, "eval_wer": 0.4268698899767841, "step": 3600 }, { "epoch": 4.87, "learning_rate": 9.26412614980289e-05, "loss": 0.3261, "step": 3800 }, { "epoch": 5.12, "learning_rate": 9.21156373193167e-05, "loss": 0.3229, "step": 4000 }, { "epoch": 5.12, "eval_loss": 0.2579388916492462, "eval_runtime": 117.216, "eval_samples_per_second": 12.985, "eval_wer": 0.40022206520641973, "step": 4000 }, { "epoch": 5.38, "learning_rate": 9.159001314060447e-05, "loss": 0.2779, "step": 4200 }, { "epoch": 5.63, "learning_rate": 9.106438896189226e-05, "loss": 0.2622, "step": 4400 }, { "epoch": 5.63, "eval_loss": 0.24829687178134918, "eval_runtime": 125.5848, "eval_samples_per_second": 12.119, "eval_wer": 0.3847784394872312, "step": 4400 }, { "epoch": 5.89, "learning_rate": 9.053876478318003e-05, "loss": 0.2819, "step": 4600 }, { "epoch": 6.15, "learning_rate": 9.00131406044678e-05, "loss": 0.2831, "step": 4800 }, { "epoch": 6.15, "eval_loss": 0.22981220483779907, "eval_runtime": 120.9279, "eval_samples_per_second": 12.586, "eval_wer": 0.37579489250025233, "step": 4800 }, { "epoch": 6.4, "learning_rate": 8.948751642575559e-05, "loss": 0.2358, "step": 5000 }, { "epoch": 6.66, "learning_rate": 8.896189224704337e-05, "loss": 0.2505, "step": 5200 }, { "epoch": 6.66, "eval_loss": 0.2343754768371582, "eval_runtime": 125.1098, "eval_samples_per_second": 12.165, "eval_wer": 0.36943575249823357, "step": 5200 }, { "epoch": 6.91, "learning_rate": 8.843626806833116e-05, "loss": 0.2277, "step": 5400 }, { "epoch": 7.17, "learning_rate": 8.791064388961893e-05, "loss": 0.2436, "step": 5600 }, { "epoch": 7.17, "eval_loss": 0.22785206139087677, "eval_runtime": 118.7977, "eval_samples_per_second": 12.812, "eval_wer": 0.370142323609569, "step": 5600 }, { "epoch": 7.43, "learning_rate": 8.73850197109067e-05, "loss": 0.2253, "step": 5800 }, { "epoch": 7.68, "learning_rate": 8.685939553219448e-05, "loss": 0.2139, "step": 6000 }, { "epoch": 7.68, "eval_loss": 0.23501864075660706, "eval_runtime": 125.2542, "eval_samples_per_second": 12.151, "eval_wer": 0.35853436963762997, "step": 6000 }, { "epoch": 7.94, "learning_rate": 8.633377135348227e-05, "loss": 0.2083, "step": 6200 }, { "epoch": 8.19, "learning_rate": 8.580814717477005e-05, "loss": 0.2075, "step": 6400 }, { "epoch": 8.19, "eval_loss": 0.21803195774555206, "eval_runtime": 117.7604, "eval_samples_per_second": 12.925, "eval_wer": 0.3443020086807308, "step": 6400 }, { "epoch": 8.45, "learning_rate": 8.528252299605783e-05, "loss": 0.1983, "step": 6600 }, { "epoch": 8.71, "learning_rate": 8.47568988173456e-05, "loss": 0.2165, "step": 6800 }, { "epoch": 8.71, "eval_loss": 0.19772419333457947, "eval_runtime": 117.7225, "eval_samples_per_second": 12.929, "eval_wer": 0.34490764106187544, "step": 6800 }, { "epoch": 8.96, "learning_rate": 8.423127463863338e-05, "loss": 0.1871, "step": 7000 }, { "epoch": 9.22, "learning_rate": 8.370565045992115e-05, "loss": 0.1695, "step": 7200 }, { "epoch": 9.22, "eval_loss": 0.2186300903558731, "eval_runtime": 116.8691, "eval_samples_per_second": 13.023, "eval_wer": 0.3477339255072171, "step": 7200 }, { "epoch": 9.47, "learning_rate": 8.318002628120894e-05, "loss": 0.181, "step": 7400 }, { "epoch": 9.73, "learning_rate": 8.265440210249672e-05, "loss": 0.1998, "step": 7600 }, { "epoch": 9.73, "eval_loss": 0.20928959548473358, "eval_runtime": 125.7928, "eval_samples_per_second": 12.099, "eval_wer": 0.33057434137478553, "step": 7600 }, { "epoch": 9.99, "learning_rate": 8.21287779237845e-05, "loss": 0.1776, "step": 7800 }, { "epoch": 10.24, "learning_rate": 8.160315374507228e-05, "loss": 0.1778, "step": 8000 }, { "epoch": 10.24, "eval_loss": 0.2176637053489685, "eval_runtime": 118.7135, "eval_samples_per_second": 12.821, "eval_wer": 0.32734430200868075, "step": 8000 }, { "epoch": 10.5, "learning_rate": 8.107752956636005e-05, "loss": 0.1779, "step": 8200 }, { "epoch": 10.75, "learning_rate": 8.055190538764782e-05, "loss": 0.1748, "step": 8400 }, { "epoch": 10.75, "eval_loss": 0.21325238049030304, "eval_runtime": 126.2474, "eval_samples_per_second": 12.056, "eval_wer": 0.3220954880387605, "step": 8400 }, { "epoch": 11.01, "learning_rate": 8.002628120893562e-05, "loss": 0.1676, "step": 8600 }, { "epoch": 11.27, "learning_rate": 7.95006570302234e-05, "loss": 0.1591, "step": 8800 }, { "epoch": 11.27, "eval_loss": 0.21119922399520874, "eval_runtime": 116.6989, "eval_samples_per_second": 13.042, "eval_wer": 0.32340769153124055, "step": 8800 }, { "epoch": 11.52, "learning_rate": 7.897503285151118e-05, "loss": 0.1613, "step": 9000 }, { "epoch": 11.78, "learning_rate": 7.844940867279895e-05, "loss": 0.1697, "step": 9200 }, { "epoch": 11.78, "eval_loss": 0.19785504043102264, "eval_runtime": 117.4774, "eval_samples_per_second": 12.956, "eval_wer": 0.319168264863228, "step": 9200 }, { "epoch": 12.04, "learning_rate": 7.792378449408672e-05, "loss": 0.1434, "step": 9400 }, { "epoch": 12.29, "learning_rate": 7.739816031537451e-05, "loss": 0.1429, "step": 9600 }, { "epoch": 12.29, "eval_loss": 0.19984780251979828, "eval_runtime": 116.8648, "eval_samples_per_second": 13.024, "eval_wer": 0.3129100635914, "step": 9600 }, { "epoch": 12.55, "learning_rate": 7.687253613666229e-05, "loss": 0.1492, "step": 9800 }, { "epoch": 12.8, "learning_rate": 7.634691195795008e-05, "loss": 0.158, "step": 10000 }, { "epoch": 12.8, "eval_loss": 0.1837874799966812, "eval_runtime": 117.4413, "eval_samples_per_second": 12.96, "eval_wer": 0.3110931664479661, "step": 10000 }, { "epoch": 13.06, "learning_rate": 7.582128777923785e-05, "loss": 0.1671, "step": 10200 }, { "epoch": 13.32, "learning_rate": 7.529566360052562e-05, "loss": 0.1486, "step": 10400 }, { "epoch": 13.32, "eval_loss": 0.1937190741300583, "eval_runtime": 117.8624, "eval_samples_per_second": 12.913, "eval_wer": 0.30897345311395985, "step": 10400 }, { "epoch": 13.57, "learning_rate": 7.47700394218134e-05, "loss": 0.1446, "step": 10600 }, { "epoch": 13.83, "learning_rate": 7.424441524310118e-05, "loss": 0.1486, "step": 10800 }, { "epoch": 13.83, "eval_loss": 0.21288040280342102, "eval_runtime": 118.4975, "eval_samples_per_second": 12.844, "eval_wer": 0.30806500454224284, "step": 10800 }, { "epoch": 14.08, "learning_rate": 7.371879106438898e-05, "loss": 0.1455, "step": 11000 }, { "epoch": 14.34, "learning_rate": 7.319316688567675e-05, "loss": 0.1407, "step": 11200 }, { "epoch": 14.34, "eval_loss": 0.1925038844347, "eval_runtime": 125.2228, "eval_samples_per_second": 12.154, "eval_wer": 0.30493590390632885, "step": 11200 }, { "epoch": 14.6, "learning_rate": 7.266754270696452e-05, "loss": 0.1342, "step": 11400 }, { "epoch": 14.85, "learning_rate": 7.21419185282523e-05, "loss": 0.148, "step": 11600 }, { "epoch": 14.85, "eval_loss": 0.19629527628421783, "eval_runtime": 117.7213, "eval_samples_per_second": 12.929, "eval_wer": 0.3043302715251842, "step": 11600 }, { "epoch": 15.11, "learning_rate": 7.161629434954008e-05, "loss": 0.1295, "step": 11800 }, { "epoch": 15.36, "learning_rate": 7.109067017082786e-05, "loss": 0.1307, "step": 12000 }, { "epoch": 15.36, "eval_loss": 0.19587725400924683, "eval_runtime": 116.5086, "eval_samples_per_second": 13.063, "eval_wer": 0.30301806803270415, "step": 12000 }, { "epoch": 15.62, "learning_rate": 7.056504599211565e-05, "loss": 0.1371, "step": 12200 }, { "epoch": 15.88, "learning_rate": 7.003942181340342e-05, "loss": 0.1356, "step": 12400 }, { "epoch": 15.88, "eval_loss": 0.2000432014465332, "eval_runtime": 117.211, "eval_samples_per_second": 12.985, "eval_wer": 0.2992833350156455, "step": 12400 }, { "epoch": 16.13, "learning_rate": 6.95137976346912e-05, "loss": 0.1351, "step": 12600 }, { "epoch": 16.39, "learning_rate": 6.898817345597897e-05, "loss": 0.1213, "step": 12800 }, { "epoch": 16.39, "eval_loss": 0.19138583540916443, "eval_runtime": 117.6478, "eval_samples_per_second": 12.937, "eval_wer": 0.2961542343797315, "step": 12800 }, { "epoch": 16.64, "learning_rate": 6.846254927726675e-05, "loss": 0.122, "step": 13000 }, { "epoch": 16.9, "learning_rate": 6.793692509855453e-05, "loss": 0.1266, "step": 13200 }, { "epoch": 16.9, "eval_loss": 0.1783333718776703, "eval_runtime": 117.3319, "eval_samples_per_second": 12.972, "eval_wer": 0.2945392146966791, "step": 13200 }, { "epoch": 17.16, "learning_rate": 6.741130091984232e-05, "loss": 0.1292, "step": 13400 }, { "epoch": 17.41, "learning_rate": 6.68856767411301e-05, "loss": 0.1128, "step": 13600 }, { "epoch": 17.41, "eval_loss": 0.1909104287624359, "eval_runtime": 117.9127, "eval_samples_per_second": 12.908, "eval_wer": 0.29575047945896843, "step": 13600 }, { "epoch": 17.67, "learning_rate": 6.636005256241787e-05, "loss": 0.1293, "step": 13800 }, { "epoch": 17.93, "learning_rate": 6.583442838370564e-05, "loss": 0.1236, "step": 14000 }, { "epoch": 17.93, "eval_loss": 0.19043225049972534, "eval_runtime": 116.9176, "eval_samples_per_second": 13.018, "eval_wer": 0.2986777026345009, "step": 14000 }, { "epoch": 18.18, "learning_rate": 6.530880420499343e-05, "loss": 0.1228, "step": 14200 }, { "epoch": 18.44, "learning_rate": 6.478318002628122e-05, "loss": 0.1183, "step": 14400 }, { "epoch": 18.44, "eval_loss": 0.19979140162467957, "eval_runtime": 118.1055, "eval_samples_per_second": 12.887, "eval_wer": 0.29231856263248207, "step": 14400 }, { "epoch": 18.69, "learning_rate": 6.4257555847569e-05, "loss": 0.1195, "step": 14600 }, { "epoch": 18.95, "learning_rate": 6.373193166885677e-05, "loss": 0.1238, "step": 14800 }, { "epoch": 18.95, "eval_loss": 0.18403638899326324, "eval_runtime": 121.6258, "eval_samples_per_second": 12.514, "eval_wer": 0.287877258504088, "step": 14800 }, { "epoch": 19.21, "learning_rate": 6.320630749014454e-05, "loss": 0.1044, "step": 15000 }, { "epoch": 19.46, "learning_rate": 6.268068331143233e-05, "loss": 0.1266, "step": 15200 }, { "epoch": 19.46, "eval_loss": 0.193573996424675, "eval_runtime": 121.7733, "eval_samples_per_second": 12.499, "eval_wer": 0.2893913394569496, "step": 15200 }, { "epoch": 19.72, "learning_rate": 6.21550591327201e-05, "loss": 0.1104, "step": 15400 }, { "epoch": 19.97, "learning_rate": 6.162943495400789e-05, "loss": 0.1232, "step": 15600 }, { "epoch": 19.97, "eval_loss": 0.18593670427799225, "eval_runtime": 125.602, "eval_samples_per_second": 12.118, "eval_wer": 0.28474815786817403, "step": 15600 }, { "epoch": 20.23, "learning_rate": 6.110381077529567e-05, "loss": 0.1072, "step": 15800 }, { "epoch": 20.49, "learning_rate": 6.057818659658344e-05, "loss": 0.1093, "step": 16000 }, { "epoch": 20.49, "eval_loss": 0.1964750736951828, "eval_runtime": 120.8874, "eval_samples_per_second": 12.59, "eval_wer": 0.28484909659836477, "step": 16000 }, { "epoch": 20.74, "learning_rate": 6.005256241787123e-05, "loss": 0.1223, "step": 16200 }, { "epoch": 21.0, "learning_rate": 5.9526938239159005e-05, "loss": 0.1037, "step": 16400 }, { "epoch": 21.0, "eval_loss": 0.20040956139564514, "eval_runtime": 122.3183, "eval_samples_per_second": 12.443, "eval_wer": 0.28505097405874635, "step": 16400 }, { "epoch": 21.25, "learning_rate": 5.900131406044679e-05, "loss": 0.1114, "step": 16600 }, { "epoch": 21.51, "learning_rate": 5.847568988173456e-05, "loss": 0.1018, "step": 16800 }, { "epoch": 21.51, "eval_loss": 0.1897500604391098, "eval_runtime": 123.2693, "eval_samples_per_second": 12.347, "eval_wer": 0.288785707075805, "step": 16800 }, { "epoch": 21.77, "learning_rate": 5.795006570302234e-05, "loss": 0.1116, "step": 17000 }, { "epoch": 22.02, "learning_rate": 5.7424441524310126e-05, "loss": 0.1018, "step": 17200 }, { "epoch": 22.02, "eval_loss": 0.17437991499900818, "eval_runtime": 118.5065, "eval_samples_per_second": 12.843, "eval_wer": 0.28616130009084484, "step": 17200 }, { "epoch": 22.28, "learning_rate": 5.68988173455979e-05, "loss": 0.1042, "step": 17400 }, { "epoch": 22.53, "learning_rate": 5.637319316688568e-05, "loss": 0.0895, "step": 17600 }, { "epoch": 22.53, "eval_loss": 0.18042823672294617, "eval_runtime": 117.6274, "eval_samples_per_second": 12.939, "eval_wer": 0.2839406480266478, "step": 17600 }, { "epoch": 22.79, "learning_rate": 5.584756898817346e-05, "loss": 0.1136, "step": 17800 }, { "epoch": 23.05, "learning_rate": 5.532194480946123e-05, "loss": 0.1098, "step": 18000 }, { "epoch": 23.05, "eval_loss": 0.1741502583026886, "eval_runtime": 125.9195, "eval_samples_per_second": 12.087, "eval_wer": 0.28071060866054304, "step": 18000 }, { "epoch": 23.3, "learning_rate": 5.479632063074902e-05, "loss": 0.1041, "step": 18200 }, { "epoch": 23.56, "learning_rate": 5.4270696452036796e-05, "loss": 0.0944, "step": 18400 }, { "epoch": 23.56, "eval_loss": 0.18072044849395752, "eval_runtime": 117.8579, "eval_samples_per_second": 12.914, "eval_wer": 0.27758150802462905, "step": 18400 }, { "epoch": 23.82, "learning_rate": 5.374507227332457e-05, "loss": 0.1025, "step": 18600 }, { "epoch": 24.07, "learning_rate": 5.321944809461236e-05, "loss": 0.109, "step": 18800 }, { "epoch": 24.07, "eval_loss": 0.1833416223526001, "eval_runtime": 125.3831, "eval_samples_per_second": 12.139, "eval_wer": 0.2742505299283335, "step": 18800 }, { "epoch": 24.33, "learning_rate": 5.269382391590013e-05, "loss": 0.0997, "step": 19000 }, { "epoch": 24.58, "learning_rate": 5.2168199737187916e-05, "loss": 0.0954, "step": 19200 }, { "epoch": 24.58, "eval_loss": 0.18462379276752472, "eval_runtime": 126.0288, "eval_samples_per_second": 12.077, "eval_wer": 0.27758150802462905, "step": 19200 }, { "epoch": 24.84, "learning_rate": 5.1642575558475694e-05, "loss": 0.0982, "step": 19400 }, { "epoch": 25.1, "learning_rate": 5.1116951379763466e-05, "loss": 0.1054, "step": 19600 }, { "epoch": 25.1, "eval_loss": 0.17953717708587646, "eval_runtime": 125.8704, "eval_samples_per_second": 12.092, "eval_wer": 0.2747552235792874, "step": 19600 }, { "epoch": 25.35, "learning_rate": 5.059132720105125e-05, "loss": 0.0965, "step": 19800 }, { "epoch": 25.61, "learning_rate": 5.006570302233903e-05, "loss": 0.097, "step": 20000 }, { "epoch": 25.61, "eval_loss": 0.1935284584760666, "eval_runtime": 117.464, "eval_samples_per_second": 12.957, "eval_wer": 0.27566367215100435, "step": 20000 }, { "epoch": 25.86, "learning_rate": 4.954007884362681e-05, "loss": 0.0924, "step": 20200 }, { "epoch": 26.12, "learning_rate": 4.9014454664914586e-05, "loss": 0.0832, "step": 20400 }, { "epoch": 26.12, "eval_loss": 0.1959334760904312, "eval_runtime": 125.8706, "eval_samples_per_second": 12.092, "eval_wer": 0.27334208135661653, "step": 20400 }, { "epoch": 26.38, "learning_rate": 4.848883048620237e-05, "loss": 0.0977, "step": 20600 }, { "epoch": 26.63, "learning_rate": 4.796320630749015e-05, "loss": 0.0936, "step": 20800 }, { "epoch": 26.63, "eval_loss": 0.1751490980386734, "eval_runtime": 126.4687, "eval_samples_per_second": 12.035, "eval_wer": 0.27202987786413646, "step": 20800 }, { "epoch": 26.89, "learning_rate": 4.743758212877792e-05, "loss": 0.0843, "step": 21000 }, { "epoch": 27.14, "learning_rate": 4.6911957950065706e-05, "loss": 0.1042, "step": 21200 }, { "epoch": 27.14, "eval_loss": 0.18527205288410187, "eval_runtime": 117.2693, "eval_samples_per_second": 12.979, "eval_wer": 0.2728373877056627, "step": 21200 }, { "epoch": 27.4, "learning_rate": 4.6386333771353484e-05, "loss": 0.0881, "step": 21400 }, { "epoch": 27.66, "learning_rate": 4.586070959264126e-05, "loss": 0.0794, "step": 21600 }, { "epoch": 27.66, "eval_loss": 0.17558707296848297, "eval_runtime": 117.2386, "eval_samples_per_second": 12.982, "eval_wer": 0.27192893913394567, "step": 21600 }, { "epoch": 27.91, "learning_rate": 4.533508541392904e-05, "loss": 0.0973, "step": 21800 }, { "epoch": 28.17, "learning_rate": 4.480946123521682e-05, "loss": 0.0882, "step": 22000 }, { "epoch": 28.17, "eval_loss": 0.18478873372077942, "eval_runtime": 117.58, "eval_samples_per_second": 12.944, "eval_wer": 0.27172706167356414, "step": 22000 }, { "epoch": 28.42, "learning_rate": 4.42838370565046e-05, "loss": 0.0894, "step": 22200 }, { "epoch": 28.68, "learning_rate": 4.375821287779238e-05, "loss": 0.0941, "step": 22400 }, { "epoch": 28.68, "eval_loss": 0.1826159507036209, "eval_runtime": 117.7022, "eval_samples_per_second": 12.931, "eval_wer": 0.2711214292924195, "step": 22400 }, { "epoch": 28.94, "learning_rate": 4.323258869908016e-05, "loss": 0.0804, "step": 22600 }, { "epoch": 29.19, "learning_rate": 4.270696452036794e-05, "loss": 0.0904, "step": 22800 }, { "epoch": 29.19, "eval_loss": 0.1743590384721756, "eval_runtime": 117.4139, "eval_samples_per_second": 12.963, "eval_wer": 0.27182800040375493, "step": 22800 }, { "epoch": 29.45, "learning_rate": 4.218134034165572e-05, "loss": 0.0869, "step": 23000 }, { "epoch": 29.71, "learning_rate": 4.1655716162943496e-05, "loss": 0.0845, "step": 23200 }, { "epoch": 29.71, "eval_loss": 0.18005579710006714, "eval_runtime": 117.9505, "eval_samples_per_second": 12.904, "eval_wer": 0.2710204905622287, "step": 23200 }, { "epoch": 29.96, "learning_rate": 4.113009198423128e-05, "loss": 0.0788, "step": 23400 }, { "epoch": 30.22, "learning_rate": 4.060446780551905e-05, "loss": 0.0701, "step": 23600 }, { "epoch": 30.22, "eval_loss": 0.17918919026851654, "eval_runtime": 118.3396, "eval_samples_per_second": 12.861, "eval_wer": 0.27051579691127486, "step": 23600 }, { "epoch": 30.47, "learning_rate": 4.007884362680683e-05, "loss": 0.0851, "step": 23800 }, { "epoch": 30.73, "learning_rate": 3.9553219448094617e-05, "loss": 0.0772, "step": 24000 }, { "epoch": 30.73, "eval_loss": 0.1685931384563446, "eval_runtime": 117.9466, "eval_samples_per_second": 12.904, "eval_wer": 0.2686988997678409, "step": 24000 }, { "epoch": 30.99, "learning_rate": 3.9027595269382395e-05, "loss": 0.0863, "step": 24200 }, { "epoch": 31.24, "learning_rate": 3.850197109067017e-05, "loss": 0.0772, "step": 24400 }, { "epoch": 31.24, "eval_loss": 0.16450461745262146, "eval_runtime": 118.178, "eval_samples_per_second": 12.879, "eval_wer": 0.26637730897345313, "step": 24400 }, { "epoch": 31.5, "learning_rate": 3.797634691195795e-05, "loss": 0.0935, "step": 24600 }, { "epoch": 31.75, "learning_rate": 3.745072273324573e-05, "loss": 0.0855, "step": 24800 }, { "epoch": 31.75, "eval_loss": 0.1594998687505722, "eval_runtime": 120.8974, "eval_samples_per_second": 12.589, "eval_wer": 0.26476228929040074, "step": 24800 }, { "epoch": 32.01, "learning_rate": 3.692509855453351e-05, "loss": 0.0794, "step": 25000 }, { "epoch": 32.27, "learning_rate": 3.6399474375821293e-05, "loss": 0.084, "step": 25200 }, { "epoch": 32.27, "eval_loss": 0.16218672692775726, "eval_runtime": 120.7524, "eval_samples_per_second": 12.604, "eval_wer": 0.2636519632583022, "step": 25200 }, { "epoch": 32.52, "learning_rate": 3.587385019710907e-05, "loss": 0.0815, "step": 25400 }, { "epoch": 32.78, "learning_rate": 3.534822601839684e-05, "loss": 0.0812, "step": 25600 }, { "epoch": 32.78, "eval_loss": 0.16536009311676025, "eval_runtime": 129.74, "eval_samples_per_second": 11.731, "eval_wer": 0.26466135056020995, "step": 25600 }, { "epoch": 33.03, "learning_rate": 3.482260183968463e-05, "loss": 0.0707, "step": 25800 }, { "epoch": 33.29, "learning_rate": 3.429697766097241e-05, "loss": 0.0859, "step": 26000 }, { "epoch": 33.29, "eval_loss": 0.18085506558418274, "eval_runtime": 129.8039, "eval_samples_per_second": 11.725, "eval_wer": 0.2648632280205915, "step": 26000 }, { "epoch": 33.55, "learning_rate": 3.3771353482260185e-05, "loss": 0.0666, "step": 26200 }, { "epoch": 33.8, "learning_rate": 3.3245729303547964e-05, "loss": 0.0796, "step": 26400 }, { "epoch": 33.8, "eval_loss": 0.17295604944229126, "eval_runtime": 125.8166, "eval_samples_per_second": 12.097, "eval_wer": 0.2602200464318159, "step": 26400 }, { "epoch": 34.06, "learning_rate": 3.272010512483574e-05, "loss": 0.0797, "step": 26600 }, { "epoch": 34.31, "learning_rate": 3.219448094612352e-05, "loss": 0.0738, "step": 26800 }, { "epoch": 34.31, "eval_loss": 0.17386285960674286, "eval_runtime": 129.8901, "eval_samples_per_second": 11.718, "eval_wer": 0.2622388210356314, "step": 26800 }, { "epoch": 34.57, "learning_rate": 3.1668856767411305e-05, "loss": 0.0805, "step": 27000 }, { "epoch": 34.83, "learning_rate": 3.1143232588699084e-05, "loss": 0.0659, "step": 27200 }, { "epoch": 34.83, "eval_loss": 0.17923414707183838, "eval_runtime": 124.9405, "eval_samples_per_second": 12.182, "eval_wer": 0.2634500857979207, "step": 27200 }, { "epoch": 35.08, "learning_rate": 3.061760840998686e-05, "loss": 0.0847, "step": 27400 }, { "epoch": 35.34, "learning_rate": 3.009198423127464e-05, "loss": 0.0657, "step": 27600 }, { "epoch": 35.34, "eval_loss": 0.16643725335597992, "eval_runtime": 128.2311, "eval_samples_per_second": 11.869, "eval_wer": 0.2618350661148683, "step": 27600 }, { "epoch": 35.6, "learning_rate": 2.956636005256242e-05, "loss": 0.0716, "step": 27800 }, { "epoch": 35.85, "learning_rate": 2.90407358738502e-05, "loss": 0.0761, "step": 28000 }, { "epoch": 35.85, "eval_loss": 0.17373836040496826, "eval_runtime": 131.1327, "eval_samples_per_second": 11.607, "eval_wer": 0.26284445341677604, "step": 28000 }, { "epoch": 36.11, "learning_rate": 2.851511169513798e-05, "loss": 0.0759, "step": 28200 }, { "epoch": 36.36, "learning_rate": 2.7989487516425754e-05, "loss": 0.0724, "step": 28400 }, { "epoch": 36.36, "eval_loss": 0.17160625755786896, "eval_runtime": 130.0843, "eval_samples_per_second": 11.7, "eval_wer": 0.2599172302412436, "step": 28400 }, { "epoch": 36.62, "learning_rate": 2.7463863337713536e-05, "loss": 0.0682, "step": 28600 }, { "epoch": 36.88, "learning_rate": 2.6938239159001317e-05, "loss": 0.0712, "step": 28800 }, { "epoch": 36.88, "eval_loss": 0.18185953795909882, "eval_runtime": 131.2291, "eval_samples_per_second": 11.598, "eval_wer": 0.2621378823054406, "step": 28800 }, { "epoch": 37.13, "learning_rate": 2.6412614980289096e-05, "loss": 0.0755, "step": 29000 }, { "epoch": 37.39, "learning_rate": 2.588699080157687e-05, "loss": 0.0725, "step": 29200 }, { "epoch": 37.39, "eval_loss": 0.18556980788707733, "eval_runtime": 129.7316, "eval_samples_per_second": 11.732, "eval_wer": 0.26082567881296054, "step": 29200 }, { "epoch": 37.64, "learning_rate": 2.5361366622864652e-05, "loss": 0.0711, "step": 29400 }, { "epoch": 37.9, "learning_rate": 2.483574244415243e-05, "loss": 0.0703, "step": 29600 }, { "epoch": 37.9, "eval_loss": 0.18763265013694763, "eval_runtime": 130.3296, "eval_samples_per_second": 11.678, "eval_wer": 0.2598162915110528, "step": 29600 }, { "epoch": 38.16, "learning_rate": 2.4310118265440212e-05, "loss": 0.0696, "step": 29800 }, { "epoch": 38.41, "learning_rate": 2.378449408672799e-05, "loss": 0.0656, "step": 30000 }, { "epoch": 38.41, "eval_loss": 0.19020360708236694, "eval_runtime": 120.9063, "eval_samples_per_second": 12.588, "eval_wer": 0.25608155849399417, "step": 30000 }, { "epoch": 38.67, "learning_rate": 2.325886990801577e-05, "loss": 0.0694, "step": 30200 }, { "epoch": 38.92, "learning_rate": 2.2733245729303547e-05, "loss": 0.0704, "step": 30400 }, { "epoch": 38.92, "eval_loss": 0.1706458330154419, "eval_runtime": 121.416, "eval_samples_per_second": 12.535, "eval_wer": 0.25386090642979714, "step": 30400 }, { "epoch": 39.18, "learning_rate": 2.220762155059133e-05, "loss": 0.0732, "step": 30600 }, { "epoch": 39.44, "learning_rate": 2.1681997371879108e-05, "loss": 0.0631, "step": 30800 }, { "epoch": 39.44, "eval_loss": 0.18751636147499084, "eval_runtime": 121.7301, "eval_samples_per_second": 12.503, "eval_wer": 0.25678812960532954, "step": 30800 }, { "epoch": 39.69, "learning_rate": 2.1156373193166886e-05, "loss": 0.0669, "step": 31000 }, { "epoch": 39.95, "learning_rate": 2.0630749014454668e-05, "loss": 0.0674, "step": 31200 }, { "epoch": 39.95, "eval_loss": 0.1696523278951645, "eval_runtime": 130.8318, "eval_samples_per_second": 11.633, "eval_wer": 0.25739376198647423, "step": 31200 }, { "epoch": 40.2, "learning_rate": 2.0105124835742446e-05, "loss": 0.0642, "step": 31400 }, { "epoch": 40.46, "learning_rate": 1.9579500657030224e-05, "loss": 0.0729, "step": 31600 }, { "epoch": 40.46, "eval_loss": 0.17705558240413666, "eval_runtime": 123.0592, "eval_samples_per_second": 12.368, "eval_wer": 0.2561824972241849, "step": 31600 }, { "epoch": 40.72, "learning_rate": 1.9053876478318003e-05, "loss": 0.0636, "step": 31800 }, { "epoch": 40.97, "learning_rate": 1.8528252299605784e-05, "loss": 0.0696, "step": 32000 }, { "epoch": 40.97, "eval_loss": 0.16749870777130127, "eval_runtime": 128.7269, "eval_samples_per_second": 11.823, "eval_wer": 0.25577874230342185, "step": 32000 }, { "epoch": 41.23, "learning_rate": 1.800262812089356e-05, "loss": 0.0703, "step": 32200 }, { "epoch": 41.48, "learning_rate": 1.747700394218134e-05, "loss": 0.0674, "step": 32400 }, { "epoch": 41.48, "eval_loss": 0.1687408983707428, "eval_runtime": 123.6528, "eval_samples_per_second": 12.309, "eval_wer": 0.25688906833552033, "step": 32400 }, { "epoch": 41.74, "learning_rate": 1.695137976346912e-05, "loss": 0.066, "step": 32600 }, { "epoch": 42.0, "learning_rate": 1.6425755584756898e-05, "loss": 0.0594, "step": 32800 }, { "epoch": 42.0, "eval_loss": 0.1702132523059845, "eval_runtime": 122.2348, "eval_samples_per_second": 12.451, "eval_wer": 0.25406278389017867, "step": 32800 }, { "epoch": 42.25, "learning_rate": 1.590013140604468e-05, "loss": 0.0595, "step": 33000 }, { "epoch": 42.51, "learning_rate": 1.5374507227332458e-05, "loss": 0.0653, "step": 33200 }, { "epoch": 42.51, "eval_loss": 0.1668023020029068, "eval_runtime": 121.6403, "eval_samples_per_second": 12.512, "eval_wer": 0.25355809023922476, "step": 33200 }, { "epoch": 42.77, "learning_rate": 1.4848883048620238e-05, "loss": 0.0671, "step": 33400 }, { "epoch": 43.02, "learning_rate": 1.4323258869908016e-05, "loss": 0.0647, "step": 33600 }, { "epoch": 43.02, "eval_loss": 0.1677715927362442, "eval_runtime": 121.5078, "eval_samples_per_second": 12.526, "eval_wer": 0.2561824972241849, "step": 33600 }, { "epoch": 43.28, "learning_rate": 1.3797634691195796e-05, "loss": 0.065, "step": 33800 }, { "epoch": 43.53, "learning_rate": 1.3272010512483573e-05, "loss": 0.059, "step": 34000 }, { "epoch": 43.53, "eval_loss": 0.17403629422187805, "eval_runtime": 121.8146, "eval_samples_per_second": 12.494, "eval_wer": 0.2570909457959019, "step": 34000 }, { "epoch": 43.79, "learning_rate": 1.2746386333771355e-05, "loss": 0.0586, "step": 34200 }, { "epoch": 44.05, "learning_rate": 1.2220762155059133e-05, "loss": 0.0597, "step": 34400 }, { "epoch": 44.05, "eval_loss": 0.1736619770526886, "eval_runtime": 122.8592, "eval_samples_per_second": 12.388, "eval_wer": 0.25375996769960635, "step": 34400 }, { "epoch": 44.3, "learning_rate": 1.1695137976346911e-05, "loss": 0.057, "step": 34600 }, { "epoch": 44.56, "learning_rate": 1.1169513797634691e-05, "loss": 0.0644, "step": 34800 }, { "epoch": 44.56, "eval_loss": 0.17717154324054718, "eval_runtime": 121.8694, "eval_samples_per_second": 12.489, "eval_wer": 0.2528515191278894, "step": 34800 }, { "epoch": 44.81, "learning_rate": 1.0643889618922471e-05, "loss": 0.0606, "step": 35000 }, { "epoch": 45.07, "learning_rate": 1.011826544021025e-05, "loss": 0.0572, "step": 35200 }, { "epoch": 45.07, "eval_loss": 0.1694445163011551, "eval_runtime": 122.3379, "eval_samples_per_second": 12.441, "eval_wer": 0.2547693550015141, "step": 35200 }, { "epoch": 45.33, "learning_rate": 9.59264126149803e-06, "loss": 0.0539, "step": 35400 }, { "epoch": 45.58, "learning_rate": 9.067017082785808e-06, "loss": 0.0695, "step": 35600 }, { "epoch": 45.58, "eval_loss": 0.16321270167827606, "eval_runtime": 121.7216, "eval_samples_per_second": 12.504, "eval_wer": 0.25214494801655396, "step": 35600 }, { "epoch": 45.84, "learning_rate": 8.541392904073588e-06, "loss": 0.0626, "step": 35800 }, { "epoch": 46.09, "learning_rate": 8.015768725361367e-06, "loss": 0.0626, "step": 36000 }, { "epoch": 46.09, "eval_loss": 0.163968026638031, "eval_runtime": 129.7157, "eval_samples_per_second": 11.733, "eval_wer": 0.25345715150903403, "step": 36000 }, { "epoch": 46.35, "learning_rate": 7.490144546649146e-06, "loss": 0.0577, "step": 36200 }, { "epoch": 46.61, "learning_rate": 6.964520367936925e-06, "loss": 0.0595, "step": 36400 }, { "epoch": 46.61, "eval_loss": 0.16631732881069183, "eval_runtime": 132.5032, "eval_samples_per_second": 11.487, "eval_wer": 0.25143837690521853, "step": 36400 }, { "epoch": 46.86, "learning_rate": 6.438896189224705e-06, "loss": 0.054, "step": 36600 }, { "epoch": 47.12, "learning_rate": 5.913272010512484e-06, "loss": 0.0625, "step": 36800 }, { "epoch": 47.12, "eval_loss": 0.1676352173089981, "eval_runtime": 131.5543, "eval_samples_per_second": 11.569, "eval_wer": 0.25214494801655396, "step": 36800 }, { "epoch": 47.37, "learning_rate": 5.3876478318002635e-06, "loss": 0.0599, "step": 37000 }, { "epoch": 47.63, "learning_rate": 4.862023653088043e-06, "loss": 0.0625, "step": 37200 }, { "epoch": 47.63, "eval_loss": 0.16559843719005585, "eval_runtime": 122.7608, "eval_samples_per_second": 12.398, "eval_wer": 0.2523468254769355, "step": 37200 }, { "epoch": 47.89, "learning_rate": 4.336399474375821e-06, "loss": 0.0612, "step": 37400 }, { "epoch": 48.14, "learning_rate": 3.810775295663601e-06, "loss": 0.0526, "step": 37600 }, { "epoch": 48.14, "eval_loss": 0.1701846420764923, "eval_runtime": 121.6682, "eval_samples_per_second": 12.509, "eval_wer": 0.2527505803976986, "step": 37600 }, { "epoch": 48.4, "learning_rate": 3.2851511169513802e-06, "loss": 0.0645, "step": 37800 }, { "epoch": 48.66, "learning_rate": 2.759526938239159e-06, "loss": 0.0484, "step": 38000 }, { "epoch": 48.66, "eval_loss": 0.16875717043876648, "eval_runtime": 121.6109, "eval_samples_per_second": 12.515, "eval_wer": 0.25335621277884324, "step": 38000 }, { "epoch": 48.91, "learning_rate": 2.2339027595269386e-06, "loss": 0.0564, "step": 38200 }, { "epoch": 49.17, "learning_rate": 1.7082785808147174e-06, "loss": 0.0674, "step": 38400 }, { "epoch": 49.17, "eval_loss": 0.16684982180595398, "eval_runtime": 121.9908, "eval_samples_per_second": 12.476, "eval_wer": 0.25224588674674475, "step": 38400 }, { "epoch": 49.42, "learning_rate": 1.1826544021024968e-06, "loss": 0.064, "step": 38600 }, { "epoch": 49.68, "learning_rate": 6.57030223390276e-07, "loss": 0.054, "step": 38800 }, { "epoch": 49.68, "eval_loss": 0.16605187952518463, "eval_runtime": 121.8992, "eval_samples_per_second": 12.486, "eval_wer": 0.2519430705561724, "step": 38800 }, { "epoch": 49.94, "learning_rate": 1.314060446780552e-07, "loss": 0.0545, "step": 39000 } ], "max_steps": 39050, "num_train_epochs": 50, "total_flos": 5.8955237568621085e+19, "trial_name": null, "trial_params": null }