{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.999677731227845, "global_step": 23265, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 4.800000000000001e-06, "loss": 5.4592, "step": 100 }, { "epoch": 0.13, "learning_rate": 9.800000000000001e-06, "loss": 2.9124, "step": 200 }, { "epoch": 0.19, "learning_rate": 1.48e-05, "loss": 2.3526, "step": 300 }, { "epoch": 0.26, "learning_rate": 1.9800000000000004e-05, "loss": 1.1889, "step": 400 }, { "epoch": 0.32, "learning_rate": 2.48e-05, "loss": 0.8361, "step": 500 }, { "epoch": 0.32, "eval_loss": 0.6304140686988831, "eval_runtime": 553.4275, "eval_samples_per_second": 9.824, "eval_steps_per_second": 0.614, "eval_wer": 0.4970241305264396, "step": 500 }, { "epoch": 0.39, "learning_rate": 2.98e-05, "loss": 0.6758, "step": 600 }, { "epoch": 0.45, "learning_rate": 3.48e-05, "loss": 0.6902, "step": 700 }, { "epoch": 0.52, "learning_rate": 3.9800000000000005e-05, "loss": 0.6335, "step": 800 }, { "epoch": 0.58, "learning_rate": 4.4800000000000005e-05, "loss": 0.5867, "step": 900 }, { "epoch": 0.64, "learning_rate": 4.9800000000000004e-05, "loss": 0.5703, "step": 1000 }, { "epoch": 0.64, "eval_loss": 0.3195364475250244, "eval_runtime": 552.5688, "eval_samples_per_second": 9.839, "eval_steps_per_second": 0.615, "eval_wer": 0.27750595664153277, "step": 1000 }, { "epoch": 0.71, "learning_rate": 5.4800000000000004e-05, "loss": 0.5762, "step": 1100 }, { "epoch": 0.77, "learning_rate": 5.9800000000000003e-05, "loss": 0.5656, "step": 1200 }, { "epoch": 0.84, "learning_rate": 6.48e-05, "loss": 0.5864, "step": 1300 }, { "epoch": 0.9, "learning_rate": 6.975e-05, "loss": 0.5116, "step": 1400 }, { "epoch": 0.97, "learning_rate": 7.475000000000001e-05, "loss": 0.5451, "step": 1500 }, { "epoch": 0.97, "eval_loss": 0.2699669599533081, "eval_runtime": 552.0028, "eval_samples_per_second": 9.85, "eval_steps_per_second": 0.616, "eval_wer": 0.2246070577622638, "step": 1500 }, { "epoch": 1.03, "learning_rate": 7.975e-05, "loss": 0.5134, "step": 1600 }, { "epoch": 1.1, "learning_rate": 8.475000000000001e-05, "loss": 0.467, "step": 1700 }, { "epoch": 1.16, "learning_rate": 8.975e-05, "loss": 0.4769, "step": 1800 }, { "epoch": 1.22, "learning_rate": 9.47e-05, "loss": 0.461, "step": 1900 }, { "epoch": 1.29, "learning_rate": 9.970000000000001e-05, "loss": 0.47, "step": 2000 }, { "epoch": 1.29, "eval_loss": 0.25637131929397583, "eval_runtime": 552.4533, "eval_samples_per_second": 9.842, "eval_steps_per_second": 0.615, "eval_wer": 0.23294145332248228, "step": 2000 }, { "epoch": 1.35, "learning_rate": 9.955795908770281e-05, "loss": 0.4506, "step": 2100 }, { "epoch": 1.42, "learning_rate": 9.908770279802493e-05, "loss": 0.4154, "step": 2200 }, { "epoch": 1.48, "learning_rate": 9.861744650834705e-05, "loss": 0.4377, "step": 2300 }, { "epoch": 1.55, "learning_rate": 9.814719021866918e-05, "loss": 0.4723, "step": 2400 }, { "epoch": 1.61, "learning_rate": 9.767693392899132e-05, "loss": 0.4063, "step": 2500 }, { "epoch": 1.61, "eval_loss": 0.2459278106689453, "eval_runtime": 552.824, "eval_samples_per_second": 9.835, "eval_steps_per_second": 0.615, "eval_wer": 0.20988949572004276, "step": 2500 }, { "epoch": 1.68, "learning_rate": 9.720667763931343e-05, "loss": 0.3807, "step": 2600 }, { "epoch": 1.74, "learning_rate": 9.673642134963555e-05, "loss": 0.4094, "step": 2700 }, { "epoch": 1.81, "learning_rate": 9.626616505995768e-05, "loss": 0.401, "step": 2800 }, { "epoch": 1.87, "learning_rate": 9.579590877027981e-05, "loss": 0.3849, "step": 2900 }, { "epoch": 1.93, "learning_rate": 9.532565248060194e-05, "loss": 0.374, "step": 3000 }, { "epoch": 1.93, "eval_loss": 0.21745240688323975, "eval_runtime": 553.0877, "eval_samples_per_second": 9.83, "eval_steps_per_second": 0.615, "eval_wer": 0.18940649298439996, "step": 3000 }, { "epoch": 2.0, "learning_rate": 9.485539619092406e-05, "loss": 0.3684, "step": 3100 }, { "epoch": 2.06, "learning_rate": 9.438513990124619e-05, "loss": 0.3658, "step": 3200 }, { "epoch": 2.13, "learning_rate": 9.391488361156831e-05, "loss": 0.3432, "step": 3300 }, { "epoch": 2.19, "learning_rate": 9.344462732189043e-05, "loss": 0.336, "step": 3400 }, { "epoch": 2.26, "learning_rate": 9.297437103221256e-05, "loss": 0.3297, "step": 3500 }, { "epoch": 2.26, "eval_loss": 0.20356999337673187, "eval_runtime": 553.8668, "eval_samples_per_second": 9.816, "eval_steps_per_second": 0.614, "eval_wer": 0.17550276015570612, "step": 3500 }, { "epoch": 2.32, "learning_rate": 9.250411474253468e-05, "loss": 0.3177, "step": 3600 }, { "epoch": 2.39, "learning_rate": 9.203385845285682e-05, "loss": 0.3111, "step": 3700 }, { "epoch": 2.45, "learning_rate": 9.156360216317894e-05, "loss": 0.3209, "step": 3800 }, { "epoch": 2.51, "learning_rate": 9.109334587350105e-05, "loss": 0.3439, "step": 3900 }, { "epoch": 2.58, "learning_rate": 9.062308958382318e-05, "loss": 0.3145, "step": 4000 }, { "epoch": 2.58, "eval_loss": 0.1957111805677414, "eval_runtime": 553.0089, "eval_samples_per_second": 9.832, "eval_steps_per_second": 0.615, "eval_wer": 0.1757282790943944, "step": 4000 }, { "epoch": 2.64, "learning_rate": 9.01575358570421e-05, "loss": 0.3077, "step": 4100 }, { "epoch": 2.71, "learning_rate": 8.968727956736421e-05, "loss": 0.3147, "step": 4200 }, { "epoch": 2.77, "learning_rate": 8.921702327768633e-05, "loss": 0.3232, "step": 4300 }, { "epoch": 2.84, "learning_rate": 8.874676698800847e-05, "loss": 0.3195, "step": 4400 }, { "epoch": 2.9, "learning_rate": 8.82765106983306e-05, "loss": 0.3989, "step": 4500 }, { "epoch": 2.9, "eval_loss": 0.19227643311023712, "eval_runtime": 554.5698, "eval_samples_per_second": 9.804, "eval_steps_per_second": 0.613, "eval_wer": 0.17229646915783384, "step": 4500 }, { "epoch": 2.97, "learning_rate": 8.780625440865272e-05, "loss": 0.3035, "step": 4600 }, { "epoch": 3.03, "learning_rate": 8.733599811897484e-05, "loss": 0.2997, "step": 4700 }, { "epoch": 3.09, "learning_rate": 8.686574182929698e-05, "loss": 0.2798, "step": 4800 }, { "epoch": 3.16, "learning_rate": 8.639548553961909e-05, "loss": 0.274, "step": 4900 }, { "epoch": 3.22, "learning_rate": 8.592522924994122e-05, "loss": 0.271, "step": 5000 }, { "epoch": 3.22, "eval_loss": 0.18889272212982178, "eval_runtime": 553.6667, "eval_samples_per_second": 9.82, "eval_steps_per_second": 0.614, "eval_wer": 0.16491317520860502, "step": 5000 }, { "epoch": 3.29, "learning_rate": 8.545497296026334e-05, "loss": 0.2726, "step": 5100 }, { "epoch": 3.35, "learning_rate": 8.498471667058548e-05, "loss": 0.283, "step": 5200 }, { "epoch": 3.42, "learning_rate": 8.45144603809076e-05, "loss": 0.2837, "step": 5300 }, { "epoch": 3.48, "learning_rate": 8.404420409122973e-05, "loss": 0.2915, "step": 5400 }, { "epoch": 3.55, "learning_rate": 8.357394780155185e-05, "loss": 0.2758, "step": 5500 }, { "epoch": 3.55, "eval_loss": 0.17681501805782318, "eval_runtime": 557.2307, "eval_samples_per_second": 9.757, "eval_steps_per_second": 0.61, "eval_wer": 0.15884377420651652, "step": 5500 }, { "epoch": 3.61, "learning_rate": 8.310369151187397e-05, "loss": 0.2699, "step": 5600 }, { "epoch": 3.67, "learning_rate": 8.26334352221961e-05, "loss": 0.2659, "step": 5700 }, { "epoch": 3.74, "learning_rate": 8.216317893251822e-05, "loss": 0.2759, "step": 5800 }, { "epoch": 3.8, "learning_rate": 8.169292264284035e-05, "loss": 0.2632, "step": 5900 }, { "epoch": 3.87, "learning_rate": 8.122736891605925e-05, "loss": 0.2683, "step": 6000 }, { "epoch": 3.87, "eval_loss": 0.17198732495307922, "eval_runtime": 555.6592, "eval_samples_per_second": 9.785, "eval_steps_per_second": 0.612, "eval_wer": 0.1533822938217616, "step": 6000 }, { "epoch": 3.93, "learning_rate": 8.075711262638138e-05, "loss": 0.2628, "step": 6100 }, { "epoch": 4.0, "learning_rate": 8.02868563367035e-05, "loss": 0.3404, "step": 6200 }, { "epoch": 4.06, "learning_rate": 7.981660004702564e-05, "loss": 0.2964, "step": 6300 }, { "epoch": 4.13, "learning_rate": 7.934634375734776e-05, "loss": 0.2458, "step": 6400 }, { "epoch": 4.19, "learning_rate": 7.887608746766987e-05, "loss": 0.2341, "step": 6500 }, { "epoch": 4.19, "eval_loss": 0.16891594231128693, "eval_runtime": 553.369, "eval_samples_per_second": 9.825, "eval_steps_per_second": 0.614, "eval_wer": 0.1470775687097375, "step": 6500 }, { "epoch": 4.26, "learning_rate": 7.840583117799201e-05, "loss": 0.2353, "step": 6600 }, { "epoch": 4.32, "learning_rate": 7.794027745121092e-05, "loss": 0.2433, "step": 6700 }, { "epoch": 4.38, "learning_rate": 7.747002116153303e-05, "loss": 0.2467, "step": 6800 }, { "epoch": 4.45, "learning_rate": 7.699976487185517e-05, "loss": 0.2423, "step": 6900 }, { "epoch": 4.51, "learning_rate": 7.652950858217729e-05, "loss": 0.2316, "step": 7000 }, { "epoch": 4.51, "eval_loss": 0.1705636978149414, "eval_runtime": 555.4457, "eval_samples_per_second": 9.789, "eval_steps_per_second": 0.612, "eval_wer": 0.14053751948777785, "step": 7000 }, { "epoch": 4.58, "learning_rate": 7.605925229249942e-05, "loss": 0.2239, "step": 7100 }, { "epoch": 4.64, "learning_rate": 7.558899600282154e-05, "loss": 0.2305, "step": 7200 }, { "epoch": 4.71, "learning_rate": 7.511873971314367e-05, "loss": 0.2291, "step": 7300 }, { "epoch": 4.77, "learning_rate": 7.464848342346579e-05, "loss": 0.2411, "step": 7400 }, { "epoch": 4.84, "learning_rate": 7.417822713378791e-05, "loss": 0.2383, "step": 7500 }, { "epoch": 4.84, "eval_loss": 0.16366632282733917, "eval_runtime": 555.2596, "eval_samples_per_second": 9.792, "eval_steps_per_second": 0.612, "eval_wer": 0.1426064106209615, "step": 7500 }, { "epoch": 4.9, "learning_rate": 7.370797084411004e-05, "loss": 0.2328, "step": 7600 }, { "epoch": 4.96, "learning_rate": 7.323771455443218e-05, "loss": 0.2575, "step": 7700 }, { "epoch": 5.03, "learning_rate": 7.27674582647543e-05, "loss": 0.2183, "step": 7800 }, { "epoch": 5.09, "learning_rate": 7.229720197507642e-05, "loss": 0.2254, "step": 7900 }, { "epoch": 5.16, "learning_rate": 7.182694568539855e-05, "loss": 0.2148, "step": 8000 }, { "epoch": 5.16, "eval_loss": 0.1583578884601593, "eval_runtime": 555.4598, "eval_samples_per_second": 9.788, "eval_steps_per_second": 0.612, "eval_wer": 0.1347328581093669, "step": 8000 }, { "epoch": 5.22, "learning_rate": 7.135668939572067e-05, "loss": 0.2096, "step": 8100 }, { "epoch": 5.29, "learning_rate": 7.08864331060428e-05, "loss": 0.2142, "step": 8200 }, { "epoch": 5.35, "learning_rate": 7.041617681636492e-05, "loss": 0.2113, "step": 8300 }, { "epoch": 5.42, "learning_rate": 6.994592052668704e-05, "loss": 0.2177, "step": 8400 }, { "epoch": 5.48, "learning_rate": 6.947566423700918e-05, "loss": 0.2085, "step": 8500 }, { "epoch": 5.48, "eval_loss": 0.16014662384986877, "eval_runtime": 556.0462, "eval_samples_per_second": 9.778, "eval_steps_per_second": 0.611, "eval_wer": 0.13866473177954053, "step": 8500 }, { "epoch": 5.54, "learning_rate": 6.90054079473313e-05, "loss": 0.2112, "step": 8600 }, { "epoch": 5.61, "learning_rate": 6.853515165765343e-05, "loss": 0.2051, "step": 8700 }, { "epoch": 5.67, "learning_rate": 6.806489536797554e-05, "loss": 0.2214, "step": 8800 }, { "epoch": 5.74, "learning_rate": 6.759463907829768e-05, "loss": 0.2155, "step": 8900 }, { "epoch": 5.8, "learning_rate": 6.71243827886198e-05, "loss": 0.2944, "step": 9000 }, { "epoch": 5.8, "eval_loss": 0.1565837264060974, "eval_runtime": 556.6846, "eval_samples_per_second": 9.767, "eval_steps_per_second": 0.611, "eval_wer": 0.1293596242658378, "step": 9000 }, { "epoch": 5.87, "learning_rate": 6.665412649894193e-05, "loss": 0.2048, "step": 9100 }, { "epoch": 5.93, "learning_rate": 6.618387020926405e-05, "loss": 0.2089, "step": 9200 }, { "epoch": 6.0, "learning_rate": 6.571831648248296e-05, "loss": 0.1928, "step": 9300 }, { "epoch": 6.06, "learning_rate": 6.524806019280508e-05, "loss": 0.2435, "step": 9400 }, { "epoch": 6.13, "learning_rate": 6.47778039031272e-05, "loss": 0.1944, "step": 9500 }, { "epoch": 6.13, "eval_loss": 0.14938245713710785, "eval_runtime": 556.2024, "eval_samples_per_second": 9.775, "eval_steps_per_second": 0.611, "eval_wer": 0.12710443487895517, "step": 9500 }, { "epoch": 6.19, "learning_rate": 6.430754761344934e-05, "loss": 0.1952, "step": 9600 }, { "epoch": 6.25, "learning_rate": 6.383729132377145e-05, "loss": 0.1923, "step": 9700 }, { "epoch": 6.32, "learning_rate": 6.336703503409358e-05, "loss": 0.1796, "step": 9800 }, { "epoch": 6.38, "learning_rate": 6.28967787444157e-05, "loss": 0.1851, "step": 9900 }, { "epoch": 6.45, "learning_rate": 6.242652245473784e-05, "loss": 0.1853, "step": 10000 }, { "epoch": 6.45, "eval_loss": 0.1561189442873001, "eval_runtime": 554.459, "eval_samples_per_second": 9.806, "eval_steps_per_second": 0.613, "eval_wer": 0.12469236275211547, "step": 10000 }, { "epoch": 6.51, "learning_rate": 6.195626616505996e-05, "loss": 0.1918, "step": 10100 }, { "epoch": 6.58, "learning_rate": 6.148600987538209e-05, "loss": 0.1743, "step": 10200 }, { "epoch": 6.64, "learning_rate": 6.101575358570422e-05, "loss": 0.1967, "step": 10300 }, { "epoch": 6.71, "learning_rate": 6.054549729602633e-05, "loss": 0.1922, "step": 10400 }, { "epoch": 6.77, "learning_rate": 6.0079943569245244e-05, "loss": 0.235, "step": 10500 }, { "epoch": 6.77, "eval_loss": 0.14610491693019867, "eval_runtime": 555.3804, "eval_samples_per_second": 9.79, "eval_steps_per_second": 0.612, "eval_wer": 0.1214958769254905, "step": 10500 }, { "epoch": 6.83, "learning_rate": 5.9609687279567375e-05, "loss": 0.1934, "step": 10600 }, { "epoch": 6.9, "learning_rate": 5.9139430989889486e-05, "loss": 0.1913, "step": 10700 }, { "epoch": 6.96, "learning_rate": 5.866917470021162e-05, "loss": 0.1801, "step": 10800 }, { "epoch": 7.03, "learning_rate": 5.819891841053374e-05, "loss": 0.1829, "step": 10900 }, { "epoch": 7.09, "learning_rate": 5.772866212085587e-05, "loss": 0.2286, "step": 11000 }, { "epoch": 7.09, "eval_loss": 0.1447422057390213, "eval_runtime": 557.5344, "eval_samples_per_second": 9.752, "eval_steps_per_second": 0.61, "eval_wer": 0.11670114818555306, "step": 11000 }, { "epoch": 7.16, "learning_rate": 5.7258405831177996e-05, "loss": 0.1764, "step": 11100 }, { "epoch": 7.22, "learning_rate": 5.6788149541500127e-05, "loss": 0.1775, "step": 11200 }, { "epoch": 7.29, "learning_rate": 5.631789325182225e-05, "loss": 0.1602, "step": 11300 }, { "epoch": 7.35, "learning_rate": 5.584763696214437e-05, "loss": 0.1748, "step": 11400 }, { "epoch": 7.41, "learning_rate": 5.537738067246649e-05, "loss": 0.1781, "step": 11500 }, { "epoch": 7.41, "eval_loss": 0.15018154680728912, "eval_runtime": 554.8903, "eval_samples_per_second": 9.798, "eval_steps_per_second": 0.613, "eval_wer": 0.11993685469716729, "step": 11500 }, { "epoch": 7.48, "learning_rate": 5.490712438278862e-05, "loss": 0.1682, "step": 11600 }, { "epoch": 7.54, "learning_rate": 5.443686809311075e-05, "loss": 0.1706, "step": 11700 }, { "epoch": 7.61, "learning_rate": 5.397131436632965e-05, "loss": 0.1554, "step": 11800 }, { "epoch": 7.67, "learning_rate": 5.350105807665178e-05, "loss": 0.1612, "step": 11900 }, { "epoch": 7.74, "learning_rate": 5.30308017869739e-05, "loss": 0.1714, "step": 12000 }, { "epoch": 7.74, "eval_loss": 0.14246602356433868, "eval_runtime": 557.0268, "eval_samples_per_second": 9.761, "eval_steps_per_second": 0.61, "eval_wer": 0.11787776873523097, "step": 12000 }, { "epoch": 7.8, "learning_rate": 5.2560545497296034e-05, "loss": 0.1719, "step": 12100 }, { "epoch": 7.87, "learning_rate": 5.209028920761816e-05, "loss": 0.1663, "step": 12200 }, { "epoch": 7.93, "learning_rate": 5.1620032917940275e-05, "loss": 0.1597, "step": 12300 }, { "epoch": 7.99, "learning_rate": 5.11497766282624e-05, "loss": 0.1622, "step": 12400 }, { "epoch": 8.06, "learning_rate": 5.067952033858453e-05, "loss": 0.1725, "step": 12500 }, { "epoch": 8.06, "eval_loss": 0.1426704078912735, "eval_runtime": 554.3005, "eval_samples_per_second": 9.809, "eval_steps_per_second": 0.613, "eval_wer": 0.11733848431662859, "step": 12500 }, { "epoch": 8.12, "learning_rate": 5.0209264048906654e-05, "loss": 0.1545, "step": 12600 }, { "epoch": 8.19, "learning_rate": 4.9739007759228785e-05, "loss": 0.1531, "step": 12700 }, { "epoch": 8.25, "learning_rate": 4.92687514695509e-05, "loss": 0.1546, "step": 12800 }, { "epoch": 8.32, "learning_rate": 4.8798495179873033e-05, "loss": 0.151, "step": 12900 }, { "epoch": 8.38, "learning_rate": 4.832823889019516e-05, "loss": 0.143, "step": 13000 }, { "epoch": 8.38, "eval_loss": 0.1447545289993286, "eval_runtime": 556.1274, "eval_samples_per_second": 9.777, "eval_steps_per_second": 0.611, "eval_wer": 0.11421063468873484, "step": 13000 }, { "epoch": 8.45, "learning_rate": 4.785798260051728e-05, "loss": 0.1568, "step": 13100 }, { "epoch": 8.51, "learning_rate": 4.7387726310839406e-05, "loss": 0.1565, "step": 13200 }, { "epoch": 8.57, "learning_rate": 4.6917470021161537e-05, "loss": 0.1515, "step": 13300 }, { "epoch": 8.64, "learning_rate": 4.644721373148366e-05, "loss": 0.2059, "step": 13400 }, { "epoch": 8.7, "learning_rate": 4.5976957441805785e-05, "loss": 0.154, "step": 13500 }, { "epoch": 8.7, "eval_loss": 0.13924145698547363, "eval_runtime": 554.6978, "eval_samples_per_second": 9.802, "eval_steps_per_second": 0.613, "eval_wer": 0.11040622824477629, "step": 13500 }, { "epoch": 8.77, "learning_rate": 4.550670115212791e-05, "loss": 0.1606, "step": 13600 }, { "epoch": 8.83, "learning_rate": 4.504114742534681e-05, "loss": 0.1532, "step": 13700 }, { "epoch": 8.9, "learning_rate": 4.457089113566894e-05, "loss": 0.1509, "step": 13800 }, { "epoch": 8.96, "learning_rate": 4.4100634845991065e-05, "loss": 0.1538, "step": 13900 }, { "epoch": 9.03, "learning_rate": 4.3630378556313196e-05, "loss": 0.1447, "step": 14000 }, { "epoch": 9.03, "eval_loss": 0.1403558999300003, "eval_runtime": 554.9504, "eval_samples_per_second": 9.797, "eval_steps_per_second": 0.613, "eval_wer": 0.1094159059487974, "step": 14000 }, { "epoch": 9.09, "learning_rate": 4.316012226663532e-05, "loss": 0.1445, "step": 14100 }, { "epoch": 9.16, "learning_rate": 4.2689865976957444e-05, "loss": 0.1641, "step": 14200 }, { "epoch": 9.22, "learning_rate": 4.221960968727957e-05, "loss": 0.1448, "step": 14300 }, { "epoch": 9.28, "learning_rate": 4.174935339760169e-05, "loss": 0.1386, "step": 14400 }, { "epoch": 9.35, "learning_rate": 4.127909710792382e-05, "loss": 0.1471, "step": 14500 }, { "epoch": 9.35, "eval_loss": 0.14039234817028046, "eval_runtime": 559.6371, "eval_samples_per_second": 9.715, "eval_steps_per_second": 0.608, "eval_wer": 0.10880798533146381, "step": 14500 }, { "epoch": 9.41, "learning_rate": 4.080884081824595e-05, "loss": 0.1514, "step": 14600 }, { "epoch": 9.48, "learning_rate": 4.033858452856807e-05, "loss": 0.148, "step": 14700 }, { "epoch": 9.54, "learning_rate": 3.9868328238890195e-05, "loss": 0.1463, "step": 14800 }, { "epoch": 9.61, "learning_rate": 3.9398071949212326e-05, "loss": 0.1412, "step": 14900 }, { "epoch": 9.67, "learning_rate": 3.892781565953445e-05, "loss": 0.1479, "step": 15000 }, { "epoch": 9.67, "eval_loss": 0.14136268198490143, "eval_runtime": 556.7019, "eval_samples_per_second": 9.766, "eval_steps_per_second": 0.611, "eval_wer": 0.11328894859148715, "step": 15000 }, { "epoch": 9.74, "learning_rate": 3.8457559369856574e-05, "loss": 0.1541, "step": 15100 }, { "epoch": 9.8, "learning_rate": 3.79873030801787e-05, "loss": 0.1482, "step": 15200 }, { "epoch": 9.86, "learning_rate": 3.751704679050083e-05, "loss": 0.1462, "step": 15300 }, { "epoch": 9.93, "learning_rate": 3.705149306371973e-05, "loss": 0.1513, "step": 15400 }, { "epoch": 9.99, "learning_rate": 3.6581236774041855e-05, "loss": 0.1607, "step": 15500 }, { "epoch": 9.99, "eval_loss": 0.14584468305110931, "eval_runtime": 555.5836, "eval_samples_per_second": 9.786, "eval_steps_per_second": 0.612, "eval_wer": 0.11712277054918764, "step": 15500 }, { "epoch": 10.06, "learning_rate": 3.611098048436398e-05, "loss": 0.1589, "step": 15600 }, { "epoch": 10.12, "learning_rate": 3.56407241946861e-05, "loss": 0.1588, "step": 15700 }, { "epoch": 10.19, "learning_rate": 3.5170467905008234e-05, "loss": 0.1646, "step": 15800 }, { "epoch": 10.25, "learning_rate": 3.470021161533036e-05, "loss": 0.17, "step": 15900 }, { "epoch": 10.32, "learning_rate": 3.422995532565248e-05, "loss": 0.166, "step": 16000 }, { "epoch": 10.32, "eval_loss": 0.16516457498073578, "eval_runtime": 557.4495, "eval_samples_per_second": 9.753, "eval_steps_per_second": 0.61, "eval_wer": 0.12636904703540647, "step": 16000 }, { "epoch": 10.38, "learning_rate": 3.3759699035974606e-05, "loss": 0.1784, "step": 16100 }, { "epoch": 10.44, "learning_rate": 3.328944274629674e-05, "loss": 0.1678, "step": 16200 }, { "epoch": 10.51, "learning_rate": 3.281918645661886e-05, "loss": 0.1771, "step": 16300 }, { "epoch": 10.57, "learning_rate": 3.235363272983776e-05, "loss": 0.1885, "step": 16400 }, { "epoch": 10.64, "learning_rate": 3.188337644015989e-05, "loss": 0.188, "step": 16500 }, { "epoch": 10.64, "eval_loss": 0.17125311493873596, "eval_runtime": 557.8968, "eval_samples_per_second": 9.746, "eval_steps_per_second": 0.609, "eval_wer": 0.13218351358506478, "step": 16500 }, { "epoch": 10.7, "learning_rate": 3.141312015048201e-05, "loss": 0.1847, "step": 16600 }, { "epoch": 10.77, "learning_rate": 3.094286386080414e-05, "loss": 0.2217, "step": 16700 }, { "epoch": 10.83, "learning_rate": 3.0472607571126265e-05, "loss": 0.1662, "step": 16800 }, { "epoch": 10.9, "learning_rate": 3.0002351281448393e-05, "loss": 0.1461, "step": 16900 }, { "epoch": 10.96, "learning_rate": 2.9532094991770514e-05, "loss": 0.1461, "step": 17000 }, { "epoch": 10.96, "eval_loss": 0.14227142930030823, "eval_runtime": 556.6368, "eval_samples_per_second": 9.768, "eval_steps_per_second": 0.611, "eval_wer": 0.11110239540333572, "step": 17000 }, { "epoch": 11.03, "learning_rate": 2.906183870209264e-05, "loss": 0.1583, "step": 17100 }, { "epoch": 11.09, "learning_rate": 2.859158241241477e-05, "loss": 0.1221, "step": 17200 }, { "epoch": 11.15, "learning_rate": 2.812132612273689e-05, "loss": 0.138, "step": 17300 }, { "epoch": 11.22, "learning_rate": 2.7651069833059017e-05, "loss": 0.1269, "step": 17400 }, { "epoch": 11.28, "learning_rate": 2.7180813543381144e-05, "loss": 0.1289, "step": 17500 }, { "epoch": 11.28, "eval_loss": 0.13876527547836304, "eval_runtime": 555.2154, "eval_samples_per_second": 9.793, "eval_steps_per_second": 0.612, "eval_wer": 0.10974928177120613, "step": 17500 }, { "epoch": 11.35, "learning_rate": 2.671055725370327e-05, "loss": 0.1395, "step": 17600 }, { "epoch": 11.41, "learning_rate": 2.6240300964025392e-05, "loss": 0.1397, "step": 17700 }, { "epoch": 11.48, "learning_rate": 2.577004467434752e-05, "loss": 0.123, "step": 17800 }, { "epoch": 11.54, "learning_rate": 2.5299788384669647e-05, "loss": 0.1486, "step": 17900 }, { "epoch": 11.61, "learning_rate": 2.482953209499177e-05, "loss": 0.1273, "step": 18000 }, { "epoch": 11.61, "eval_loss": 0.1438097506761551, "eval_runtime": 554.8582, "eval_samples_per_second": 9.799, "eval_steps_per_second": 0.613, "eval_wer": 0.10742545618559228, "step": 18000 }, { "epoch": 11.67, "learning_rate": 2.4363978368210676e-05, "loss": 0.1313, "step": 18100 }, { "epoch": 11.73, "learning_rate": 2.38937220785328e-05, "loss": 0.1365, "step": 18200 }, { "epoch": 11.8, "learning_rate": 2.3423465788854924e-05, "loss": 0.1335, "step": 18300 }, { "epoch": 11.86, "learning_rate": 2.295320949917705e-05, "loss": 0.1321, "step": 18400 }, { "epoch": 11.93, "learning_rate": 2.2482953209499176e-05, "loss": 0.1317, "step": 18500 }, { "epoch": 11.93, "eval_loss": 0.13120408356189728, "eval_runtime": 555.4622, "eval_samples_per_second": 9.788, "eval_steps_per_second": 0.612, "eval_wer": 0.10656260111582849, "step": 18500 }, { "epoch": 11.99, "learning_rate": 2.2012696919821303e-05, "loss": 0.137, "step": 18600 }, { "epoch": 12.06, "learning_rate": 2.154244063014343e-05, "loss": 0.1224, "step": 18700 }, { "epoch": 12.12, "learning_rate": 2.1072184340465555e-05, "loss": 0.1293, "step": 18800 }, { "epoch": 12.19, "learning_rate": 2.0606630613684456e-05, "loss": 0.1245, "step": 18900 }, { "epoch": 12.25, "learning_rate": 2.0136374324006584e-05, "loss": 0.1448, "step": 19000 }, { "epoch": 12.25, "eval_loss": 0.1446371227502823, "eval_runtime": 558.424, "eval_samples_per_second": 9.736, "eval_steps_per_second": 0.609, "eval_wer": 0.10419955484522538, "step": 19000 }, { "epoch": 12.31, "learning_rate": 1.9666118034328708e-05, "loss": 0.1472, "step": 19100 }, { "epoch": 12.38, "learning_rate": 1.9195861744650835e-05, "loss": 0.15, "step": 19200 }, { "epoch": 12.44, "learning_rate": 1.8725605454972963e-05, "loss": 0.1434, "step": 19300 }, { "epoch": 12.51, "learning_rate": 1.8255349165295087e-05, "loss": 0.1439, "step": 19400 }, { "epoch": 12.57, "learning_rate": 1.7785092875617214e-05, "loss": 0.1424, "step": 19500 }, { "epoch": 12.57, "eval_loss": 0.13855019211769104, "eval_runtime": 555.0962, "eval_samples_per_second": 9.795, "eval_steps_per_second": 0.613, "eval_wer": 0.10152274309470814, "step": 19500 }, { "epoch": 12.64, "learning_rate": 1.7314836585939338e-05, "loss": 0.1422, "step": 19600 }, { "epoch": 12.7, "learning_rate": 1.6844580296261466e-05, "loss": 0.1426, "step": 19700 }, { "epoch": 12.77, "learning_rate": 1.637432400658359e-05, "loss": 0.1395, "step": 19800 }, { "epoch": 12.83, "learning_rate": 1.5904067716905717e-05, "loss": 0.1363, "step": 19900 }, { "epoch": 12.89, "learning_rate": 1.543381142722784e-05, "loss": 0.1392, "step": 20000 }, { "epoch": 12.89, "eval_loss": 0.13786287605762482, "eval_runtime": 553.5953, "eval_samples_per_second": 9.821, "eval_steps_per_second": 0.614, "eval_wer": 0.10046378459999804, "step": 20000 }, { "epoch": 12.96, "learning_rate": 1.4963555137549964e-05, "loss": 0.1387, "step": 20100 }, { "epoch": 13.02, "learning_rate": 1.4493298847872091e-05, "loss": 0.1388, "step": 20200 }, { "epoch": 13.09, "learning_rate": 1.4023042558194215e-05, "loss": 0.1259, "step": 20300 }, { "epoch": 13.15, "learning_rate": 1.3552786268516343e-05, "loss": 0.1475, "step": 20400 }, { "epoch": 13.22, "learning_rate": 1.3082529978838467e-05, "loss": 0.1408, "step": 20500 }, { "epoch": 13.22, "eval_loss": 0.1407657265663147, "eval_runtime": 557.4847, "eval_samples_per_second": 9.753, "eval_steps_per_second": 0.61, "eval_wer": 0.09921852785158893, "step": 20500 }, { "epoch": 13.28, "learning_rate": 1.2612273689160592e-05, "loss": 0.1444, "step": 20600 }, { "epoch": 13.35, "learning_rate": 1.214201739948272e-05, "loss": 0.1324, "step": 20700 }, { "epoch": 13.41, "learning_rate": 1.1671761109804846e-05, "loss": 0.1397, "step": 20800 }, { "epoch": 13.48, "learning_rate": 1.120150482012697e-05, "loss": 0.1382, "step": 20900 }, { "epoch": 13.54, "learning_rate": 1.0731248530449096e-05, "loss": 0.1239, "step": 21000 }, { "epoch": 13.54, "eval_loss": 0.13379834592342377, "eval_runtime": 553.3736, "eval_samples_per_second": 9.825, "eval_steps_per_second": 0.614, "eval_wer": 0.09677704021100729, "step": 21000 }, { "epoch": 13.6, "learning_rate": 1.0260992240771221e-05, "loss": 0.122, "step": 21100 }, { "epoch": 13.67, "learning_rate": 9.790735951093347e-06, "loss": 0.1209, "step": 21200 }, { "epoch": 13.73, "learning_rate": 9.320479661415473e-06, "loss": 0.1248, "step": 21300 }, { "epoch": 13.8, "learning_rate": 8.850223371737597e-06, "loss": 0.1198, "step": 21400 }, { "epoch": 13.86, "learning_rate": 8.379967082059723e-06, "loss": 0.1244, "step": 21500 }, { "epoch": 13.86, "eval_loss": 0.1335345059633255, "eval_runtime": 554.5112, "eval_samples_per_second": 9.805, "eval_steps_per_second": 0.613, "eval_wer": 0.09565925068881329, "step": 21500 }, { "epoch": 13.93, "learning_rate": 7.914413355278628e-06, "loss": 0.1235, "step": 21600 }, { "epoch": 13.99, "learning_rate": 7.444157065600752e-06, "loss": 0.1367, "step": 21700 }, { "epoch": 14.06, "learning_rate": 6.973900775922878e-06, "loss": 0.1288, "step": 21800 }, { "epoch": 14.12, "learning_rate": 6.503644486245005e-06, "loss": 0.1358, "step": 21900 }, { "epoch": 14.18, "learning_rate": 6.03338819656713e-06, "loss": 0.1254, "step": 22000 }, { "epoch": 14.18, "eval_loss": 0.13817058503627777, "eval_runtime": 555.1553, "eval_samples_per_second": 9.794, "eval_steps_per_second": 0.612, "eval_wer": 0.09503171972898507, "step": 22000 }, { "epoch": 14.25, "learning_rate": 5.5631319068892555e-06, "loss": 0.1343, "step": 22100 }, { "epoch": 14.31, "learning_rate": 5.097578180108159e-06, "loss": 0.131, "step": 22200 }, { "epoch": 14.38, "learning_rate": 4.627321890430284e-06, "loss": 0.1503, "step": 22300 }, { "epoch": 14.44, "learning_rate": 4.15706560075241e-06, "loss": 0.1543, "step": 22400 }, { "epoch": 14.51, "learning_rate": 3.6868093110745355e-06, "loss": 0.1597, "step": 22500 }, { "epoch": 14.51, "eval_loss": 0.15444068610668182, "eval_runtime": 553.9922, "eval_samples_per_second": 9.814, "eval_steps_per_second": 0.614, "eval_wer": 0.09704177983468482, "step": 22500 }, { "epoch": 14.57, "learning_rate": 3.2165530213966613e-06, "loss": 0.1588, "step": 22600 }, { "epoch": 14.64, "learning_rate": 2.746296731718787e-06, "loss": 0.1616, "step": 22700 }, { "epoch": 14.7, "learning_rate": 2.2760404420409124e-06, "loss": 0.1701, "step": 22800 }, { "epoch": 14.76, "learning_rate": 1.8057841523630381e-06, "loss": 0.1569, "step": 22900 }, { "epoch": 14.83, "learning_rate": 1.3355278626851635e-06, "loss": 0.1566, "step": 23000 }, { "epoch": 14.83, "eval_loss": 0.1588866114616394, "eval_runtime": 554.1492, "eval_samples_per_second": 9.811, "eval_steps_per_second": 0.614, "eval_wer": 0.09631619716238345, "step": 23000 }, { "epoch": 14.89, "learning_rate": 8.652715730072889e-07, "loss": 0.1645, "step": 23100 }, { "epoch": 14.96, "learning_rate": 3.950152833294146e-07, "loss": 0.1684, "step": 23200 }, { "epoch": 15.0, "step": 23265, "total_flos": 4.936819874574462e+20, "train_loss": 0.2744614067626927, "train_runtime": 128452.941, "train_samples_per_second": 5.797, "train_steps_per_second": 0.181 } ], "max_steps": 23265, "num_train_epochs": 15, "total_flos": 4.936819874574462e+20, "trial_name": null, "trial_params": null }