diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,6 +1,6 @@ { - "best_metric": 2.074415922164917, - "best_model_checkpoint": "ai-light-dance_drums_ft_pretrain_wav2vec2-base/checkpoint-744", + "best_metric": 1.9551260471343994, + "best_model_checkpoint": "ai-light-dance_drums_ft_pretrain_wav2vec2-base/checkpoint-1408", "epoch": 199.90140845070422, "global_step": 1600, "is_hyper_param_search": false, @@ -9,2772 +9,2772 @@ "log_history": [ { "epoch": 0.9, - "eval_loss": 2.379915952682495, - "eval_runtime": 7.7959, - "eval_samples_per_second": 8.081, - "eval_steps_per_second": 1.026, - "eval_wer": 0.7797066746483089, + "eval_loss": 2.037254571914673, + "eval_runtime": 7.6198, + "eval_samples_per_second": 8.268, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6779407363064951, "step": 8 }, { "epoch": 1.23, "learning_rate": 0.0002, - "loss": 0.9677, + "loss": 0.6674, "step": 10 }, { "epoch": 1.9, - "eval_loss": 2.4062252044677734, - "eval_runtime": 7.9931, - "eval_samples_per_second": 7.882, - "eval_steps_per_second": 1.001, - "eval_wer": 0.7913798263992816, + "eval_loss": 2.2592625617980957, + "eval_runtime": 7.6185, + "eval_samples_per_second": 8.269, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6749476204729123, "step": 16 }, { "epoch": 2.45, "learning_rate": 0.0004, - "loss": 0.9433, + "loss": 0.6653, "step": 20 }, { "epoch": 2.9, - "eval_loss": 2.6954824924468994, - "eval_runtime": 8.0202, - "eval_samples_per_second": 7.855, - "eval_steps_per_second": 0.997, - "eval_wer": 0.7683328344806944, + "eval_loss": 2.043417453765869, + "eval_runtime": 7.6392, + "eval_samples_per_second": 8.247, + "eval_steps_per_second": 1.047, + "eval_wer": 0.6668662077222388, "step": 24 }, { "epoch": 3.68, "learning_rate": 0.00039746835443037974, - "loss": 1.0276, + "loss": 0.7249, "step": 30 }, { "epoch": 3.9, - "eval_loss": 2.5257680416107178, - "eval_runtime": 8.0107, - "eval_samples_per_second": 7.864, - "eval_steps_per_second": 0.999, - "eval_wer": 0.7892846453157737, + "eval_loss": 2.1789791584014893, + "eval_runtime": 7.6684, + "eval_samples_per_second": 8.216, + "eval_steps_per_second": 1.043, + "eval_wer": 0.6935049386411254, "step": 32 }, { "epoch": 4.9, "learning_rate": 0.0003949367088607595, - "loss": 0.9923, + "loss": 0.683, "step": 40 }, { "epoch": 4.9, - "eval_loss": 2.417393445968628, - "eval_runtime": 8.1407, - "eval_samples_per_second": 7.739, - "eval_steps_per_second": 0.983, - "eval_wer": 0.7824004788985334, + "eval_loss": 2.1209700107574463, + "eval_runtime": 7.7094, + "eval_samples_per_second": 8.172, + "eval_steps_per_second": 1.038, + "eval_wer": 0.6866207722238851, "step": 40 }, { "epoch": 5.9, - "eval_loss": 2.7146248817443848, - "eval_runtime": 8.0327, - "eval_samples_per_second": 7.843, - "eval_steps_per_second": 0.996, - "eval_wer": 0.7788087398982341, + "eval_loss": 2.240757703781128, + "eval_runtime": 7.6326, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.048, + "eval_wer": 0.6803352289733613, "step": 48 }, { "epoch": 6.23, "learning_rate": 0.0003924050632911393, - "loss": 1.0927, + "loss": 0.7518, "step": 50 }, { "epoch": 6.9, - "eval_loss": 2.9690921306610107, - "eval_runtime": 7.8332, - "eval_samples_per_second": 8.043, - "eval_steps_per_second": 1.021, - "eval_wer": 0.7883867105656989, + "eval_loss": 2.28826642036438, + "eval_runtime": 7.57, + "eval_samples_per_second": 8.322, + "eval_steps_per_second": 1.057, + "eval_wer": 0.6902125112241844, "step": 56 }, { "epoch": 7.45, "learning_rate": 0.000389873417721519, - "loss": 1.0133, + "loss": 0.686, "step": 60 }, { "epoch": 7.9, - "eval_loss": 3.11049485206604, - "eval_runtime": 8.0174, - "eval_samples_per_second": 7.858, - "eval_steps_per_second": 0.998, - "eval_wer": 0.7644417838970368, + "eval_loss": 2.1073131561279297, + "eval_runtime": 7.7427, + "eval_samples_per_second": 8.137, + "eval_steps_per_second": 1.033, + "eval_wer": 0.6818317868901527, "step": 64 }, { "epoch": 8.68, "learning_rate": 0.00038734177215189877, - "loss": 0.98, + "loss": 0.6771, "step": 70 }, { "epoch": 8.9, - "eval_loss": 2.8638393878936768, - "eval_runtime": 8.0331, - "eval_samples_per_second": 7.843, - "eval_steps_per_second": 0.996, - "eval_wer": 0.7644417838970368, + "eval_loss": 2.340881109237671, + "eval_runtime": 7.6198, + "eval_samples_per_second": 8.268, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6689613888057467, "step": 72 }, { "epoch": 9.9, "learning_rate": 0.0003848101265822785, - "loss": 0.9688, + "loss": 0.6593, "step": 80 }, { "epoch": 9.9, - "eval_loss": 2.7538065910339355, - "eval_runtime": 7.9304, - "eval_samples_per_second": 7.944, - "eval_steps_per_second": 1.009, - "eval_wer": 0.758156240646513, + "eval_loss": 2.4714651107788086, + "eval_runtime": 7.6918, + "eval_samples_per_second": 8.191, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6602813528883568, "step": 80 }, { "epoch": 10.9, - "eval_loss": 2.6010308265686035, - "eval_runtime": 8.0107, - "eval_samples_per_second": 7.865, - "eval_steps_per_second": 0.999, - "eval_wer": 0.7713259503142772, + "eval_loss": 2.097456455230713, + "eval_runtime": 7.666, + "eval_samples_per_second": 8.218, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6722538162226879, "step": 88 }, { "epoch": 11.23, "learning_rate": 0.00038227848101265825, - "loss": 1.0453, + "loss": 0.7433, "step": 90 }, { "epoch": 11.9, - "eval_loss": 2.6365110874176025, - "eval_runtime": 7.9124, - "eval_samples_per_second": 7.962, - "eval_steps_per_second": 1.011, - "eval_wer": 0.7482789583956899, + "eval_loss": 2.033815860748291, + "eval_runtime": 7.6612, + "eval_samples_per_second": 8.223, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6728524393894044, "step": 96 }, { "epoch": 12.45, "learning_rate": 0.00037974683544303797, - "loss": 0.9397, + "loss": 0.6497, "step": 100 }, { "epoch": 12.9, - "eval_loss": 2.474029302597046, - "eval_runtime": 7.927, - "eval_samples_per_second": 7.948, - "eval_steps_per_second": 1.009, - "eval_wer": 0.7629452259802454, + "eval_loss": 2.141453266143799, + "eval_runtime": 7.6044, + "eval_samples_per_second": 8.285, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6824304100568692, "step": 104 }, { "epoch": 13.68, "learning_rate": 0.00037721518987341774, - "loss": 0.9466, + "loss": 0.6497, "step": 110 }, { "epoch": 13.9, - "eval_loss": 2.6466243267059326, - "eval_runtime": 7.9192, - "eval_samples_per_second": 7.955, - "eval_steps_per_second": 1.01, - "eval_wer": 0.7617479796468123, + "eval_loss": 2.181793689727783, + "eval_runtime": 7.6888, + "eval_samples_per_second": 8.194, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6653696498054474, "step": 112 }, { "epoch": 14.9, "learning_rate": 0.00037468354430379746, - "loss": 0.9665, + "loss": 0.6799, "step": 120 }, { "epoch": 14.9, - "eval_loss": 2.6643269062042236, - "eval_runtime": 7.9716, - "eval_samples_per_second": 7.903, - "eval_steps_per_second": 1.004, - "eval_wer": 0.7644417838970368, + "eval_loss": 2.0863850116729736, + "eval_runtime": 7.6642, + "eval_samples_per_second": 8.22, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6755462436396289, "step": 120 }, { "epoch": 15.9, - "eval_loss": 2.4003283977508545, - "eval_runtime": 7.9753, - "eval_samples_per_second": 7.899, - "eval_steps_per_second": 1.003, - "eval_wer": 0.740796168811733, + "eval_loss": 2.4925379753112793, + "eval_runtime": 7.6569, + "eval_samples_per_second": 8.228, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6797366058066447, "step": 128 }, { "epoch": 16.23, "learning_rate": 0.00037215189873417723, - "loss": 1.0577, + "loss": 0.7459, "step": 130 }, { "epoch": 16.9, - "eval_loss": 2.6306591033935547, - "eval_runtime": 7.9695, - "eval_samples_per_second": 7.905, - "eval_steps_per_second": 1.004, - "eval_wer": 0.7707273271475606, + "eval_loss": 2.335496425628662, + "eval_runtime": 7.6286, + "eval_samples_per_second": 8.258, + "eval_steps_per_second": 1.049, + "eval_wer": 0.6860221490571685, "step": 136 }, { "epoch": 17.45, "learning_rate": 0.000369620253164557, - "loss": 0.9154, + "loss": 0.6576, "step": 140 }, { "epoch": 17.9, - "eval_loss": 2.6734743118286133, - "eval_runtime": 7.957, - "eval_samples_per_second": 7.918, - "eval_steps_per_second": 1.005, - "eval_wer": 0.7491768931457647, + "eval_loss": 2.234079360961914, + "eval_runtime": 7.6403, + "eval_samples_per_second": 8.246, + "eval_steps_per_second": 1.047, + "eval_wer": 0.6704579467225381, "step": 144 }, { "epoch": 18.68, "learning_rate": 0.0003670886075949367, - "loss": 0.9352, + "loss": 0.6798, "step": 150 }, { "epoch": 18.9, - "eval_loss": 2.4403460025787354, - "eval_runtime": 7.9548, - "eval_samples_per_second": 7.92, - "eval_steps_per_second": 1.006, - "eval_wer": 0.7491768931457647, + "eval_loss": 2.142047643661499, + "eval_runtime": 7.5893, + "eval_samples_per_second": 8.301, + "eval_steps_per_second": 1.054, + "eval_wer": 0.6614785992217899, "step": 152 }, { "epoch": 19.9, "learning_rate": 0.0003645569620253165, - "loss": 0.8999, + "loss": 0.6479, "step": 160 }, { "epoch": 19.9, - "eval_loss": 2.71948504447937, - "eval_runtime": 7.9309, - "eval_samples_per_second": 7.944, - "eval_steps_per_second": 1.009, - "eval_wer": 0.7650404070637533, + "eval_loss": 2.4265201091766357, + "eval_runtime": 7.5945, + "eval_samples_per_second": 8.296, + "eval_steps_per_second": 1.053, + "eval_wer": 0.6755462436396289, "step": 160 }, { "epoch": 20.9, - "eval_loss": 2.2872610092163086, - "eval_runtime": 7.9835, - "eval_samples_per_second": 7.891, - "eval_steps_per_second": 1.002, - "eval_wer": 0.7602514217300209, + "eval_loss": 2.300034523010254, + "eval_runtime": 7.6736, + "eval_samples_per_second": 8.21, + "eval_steps_per_second": 1.043, + "eval_wer": 0.6944028733912002, "step": 168 }, { "epoch": 21.23, "learning_rate": 0.00036202531645569626, - "loss": 0.9935, + "loss": 0.708, "step": 170 }, { "epoch": 21.9, - "eval_loss": 2.844008207321167, - "eval_runtime": 7.9178, - "eval_samples_per_second": 7.957, - "eval_steps_per_second": 1.01, - "eval_wer": 0.7512720742292727, + "eval_loss": 2.246602773666382, + "eval_runtime": 7.6898, + "eval_samples_per_second": 8.193, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6731517509727627, "step": 176 }, { "epoch": 22.45, "learning_rate": 0.000359493670886076, - "loss": 0.885, + "loss": 0.6596, "step": 180 }, { "epoch": 22.9, - "eval_loss": 2.7574303150177, - "eval_runtime": 7.9538, - "eval_samples_per_second": 7.921, - "eval_steps_per_second": 1.006, - "eval_wer": 0.7485782699790482, + "eval_loss": 2.4366281032562256, + "eval_runtime": 7.6554, + "eval_samples_per_second": 8.23, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6776414247231368, "step": 184 }, { "epoch": 23.68, "learning_rate": 0.0003569620253164557, - "loss": 0.8979, + "loss": 0.643, "step": 190 }, { "epoch": 23.9, - "eval_loss": 3.1470837593078613, - "eval_runtime": 7.9957, - "eval_samples_per_second": 7.879, - "eval_steps_per_second": 1.001, - "eval_wer": 0.7536665668961389, + "eval_loss": 2.091036081314087, + "eval_runtime": 7.7242, + "eval_samples_per_second": 8.156, + "eval_steps_per_second": 1.036, + "eval_wer": 0.6513020053876085, "step": 192 }, { "epoch": 24.9, "learning_rate": 0.00035443037974683546, - "loss": 0.9074, + "loss": 0.6644, "step": 200 }, { "epoch": 24.9, - "eval_loss": 3.089177131652832, - "eval_runtime": 7.9191, - "eval_samples_per_second": 7.955, - "eval_steps_per_second": 1.01, - "eval_wer": 0.7288237054774019, + "eval_loss": 2.265995740890503, + "eval_runtime": 7.6041, + "eval_samples_per_second": 8.285, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6644717150553726, "step": 200 }, { "epoch": 25.9, - "eval_loss": 2.8640966415405273, - "eval_runtime": 7.907, - "eval_samples_per_second": 7.968, - "eval_steps_per_second": 1.012, - "eval_wer": 0.7518706973959892, + "eval_loss": 2.1543314456939697, + "eval_runtime": 7.6598, + "eval_samples_per_second": 8.225, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6764441783897037, "step": 208 }, { "epoch": 26.23, "learning_rate": 0.00035189873417721524, - "loss": 0.9498, + "loss": 0.6936, "step": 210 }, { "epoch": 26.9, - "eval_loss": 2.879786729812622, - "eval_runtime": 7.8826, - "eval_samples_per_second": 7.992, - "eval_steps_per_second": 1.015, - "eval_wer": 0.7398982340616582, + "eval_loss": 2.150484561920166, + "eval_runtime": 7.5916, + "eval_samples_per_second": 8.299, + "eval_steps_per_second": 1.054, + "eval_wer": 0.6698593235558216, "step": 216 }, { "epoch": 27.45, "learning_rate": 0.00034936708860759495, - "loss": 0.8821, + "loss": 0.6338, "step": 220 }, { "epoch": 27.9, - "eval_loss": 2.8114750385284424, - "eval_runtime": 7.9479, - "eval_samples_per_second": 7.927, - "eval_steps_per_second": 1.007, - "eval_wer": 0.749476204729123, + "eval_loss": 2.2812647819519043, + "eval_runtime": 7.6091, + "eval_samples_per_second": 8.28, + "eval_steps_per_second": 1.051, + "eval_wer": 0.6707572583058964, "step": 224 }, { "epoch": 28.68, "learning_rate": 0.00034683544303797467, - "loss": 0.8968, + "loss": 0.6393, "step": 230 }, { "epoch": 28.9, - "eval_loss": 2.922487735748291, - "eval_runtime": 7.8659, - "eval_samples_per_second": 8.009, - "eval_steps_per_second": 1.017, - "eval_wer": 0.7446872193953906, + "eval_loss": 2.103980779647827, + "eval_runtime": 7.6072, + "eval_samples_per_second": 8.282, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6596827297216402, "step": 232 }, { "epoch": 29.9, "learning_rate": 0.00034430379746835444, - "loss": 0.8783, + "loss": 0.6378, "step": 240 }, { "epoch": 29.9, - "eval_loss": 2.541382074356079, - "eval_runtime": 8.0089, - "eval_samples_per_second": 7.866, - "eval_steps_per_second": 0.999, - "eval_wer": 0.7503741394791978, + "eval_loss": 2.2749102115631104, + "eval_runtime": 7.6923, + "eval_samples_per_second": 8.19, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6740496857228375, "step": 240 }, { "epoch": 30.9, - "eval_loss": 2.352771759033203, - "eval_runtime": 7.9156, - "eval_samples_per_second": 7.959, - "eval_steps_per_second": 1.011, - "eval_wer": 0.7378030529781503, + "eval_loss": 2.109790802001953, + "eval_runtime": 7.5197, + "eval_samples_per_second": 8.378, + "eval_steps_per_second": 1.064, + "eval_wer": 0.6611792876384316, "step": 248 }, { "epoch": 31.23, "learning_rate": 0.0003417721518987342, - "loss": 0.9428, + "loss": 0.6829, "step": 250 }, { "epoch": 31.9, - "eval_loss": 2.807304859161377, - "eval_runtime": 7.8598, - "eval_samples_per_second": 8.015, - "eval_steps_per_second": 1.018, - "eval_wer": 0.7141574378928465, + "eval_loss": 2.1962344646453857, + "eval_runtime": 7.7439, + "eval_samples_per_second": 8.135, + "eval_steps_per_second": 1.033, + "eval_wer": 0.6513020053876085, "step": 256 }, { "epoch": 32.45, "learning_rate": 0.00033924050632911393, - "loss": 0.8184, + "loss": 0.6002, "step": 260 }, { "epoch": 32.9, - "eval_loss": 2.575679063796997, - "eval_runtime": 8.0158, - "eval_samples_per_second": 7.859, - "eval_steps_per_second": 0.998, - "eval_wer": 0.7192457348099371, + "eval_loss": 2.131077289581299, + "eval_runtime": 7.5878, + "eval_samples_per_second": 8.303, + "eval_steps_per_second": 1.054, + "eval_wer": 0.6617779108051481, "step": 264 }, { "epoch": 33.68, "learning_rate": 0.0003367088607594937, - "loss": 0.9092, + "loss": 0.6656, "step": 270 }, { "epoch": 33.9, - "eval_loss": 2.4403328895568848, - "eval_runtime": 7.9646, - "eval_samples_per_second": 7.91, - "eval_steps_per_second": 1.004, - "eval_wer": 0.7093684525591141, + "eval_loss": 2.2650504112243652, + "eval_runtime": 7.6783, + "eval_samples_per_second": 8.205, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6510026938042502, "step": 272 }, { "epoch": 34.9, "learning_rate": 0.0003341772151898734, - "loss": 0.8749, + "loss": 0.633, "step": 280 }, { "epoch": 34.9, - "eval_loss": 2.691164493560791, - "eval_runtime": 7.9176, - "eval_samples_per_second": 7.957, - "eval_steps_per_second": 1.01, - "eval_wer": 0.7219395390601616, + "eval_loss": 2.2622334957122803, + "eval_runtime": 7.6997, + "eval_samples_per_second": 8.182, + "eval_steps_per_second": 1.039, + "eval_wer": 0.6513020053876085, "step": 280 }, { "epoch": 35.9, - "eval_loss": 2.4073445796966553, - "eval_runtime": 8.0386, - "eval_samples_per_second": 7.837, - "eval_steps_per_second": 0.995, - "eval_wer": 0.7327147560610595, + "eval_loss": 2.2585790157318115, + "eval_runtime": 7.6561, + "eval_samples_per_second": 8.229, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6620772223885064, "step": 288 }, { "epoch": 36.23, "learning_rate": 0.0003316455696202532, - "loss": 0.9235, + "loss": 0.6644, "step": 290 }, { "epoch": 36.9, - "eval_loss": 2.444603681564331, - "eval_runtime": 7.9142, - "eval_samples_per_second": 7.96, - "eval_steps_per_second": 1.011, - "eval_wer": 0.726728524393894, + "eval_loss": 2.4157793521881104, + "eval_runtime": 7.6013, + "eval_samples_per_second": 8.288, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6593834181382819, "step": 296 }, { "epoch": 37.45, "learning_rate": 0.00032911392405063296, - "loss": 0.8654, + "loss": 0.6235, "step": 300 }, { "epoch": 37.9, - "eval_loss": 2.821065902709961, - "eval_runtime": 7.9214, - "eval_samples_per_second": 7.953, - "eval_steps_per_second": 1.01, - "eval_wer": 0.7360071834780006, + "eval_loss": 2.425449848175049, + "eval_runtime": 7.5997, + "eval_samples_per_second": 8.29, + "eval_steps_per_second": 1.053, + "eval_wer": 0.6477102663873092, "step": 304 }, { "epoch": 38.68, "learning_rate": 0.0003265822784810127, - "loss": 0.8428, + "loss": 0.6041, "step": 310 }, { "epoch": 38.9, - "eval_loss": 2.481106996536255, - "eval_runtime": 7.9459, - "eval_samples_per_second": 7.929, - "eval_steps_per_second": 1.007, - "eval_wer": 0.7243340317270278, + "eval_loss": 2.308098793029785, + "eval_runtime": 7.8174, + "eval_samples_per_second": 8.059, + "eval_steps_per_second": 1.023, + "eval_wer": 0.6632744687219395, "step": 312 }, { "epoch": 39.9, "learning_rate": 0.0003240506329113924, - "loss": 0.8355, + "loss": 0.6215, "step": 320 }, { "epoch": 39.9, - "eval_loss": 2.357530355453491, - "eval_runtime": 8.0775, - "eval_samples_per_second": 7.799, - "eval_steps_per_second": 0.99, - "eval_wer": 0.7192457348099371, + "eval_loss": 2.425708532333374, + "eval_runtime": 7.5701, + "eval_samples_per_second": 8.322, + "eval_steps_per_second": 1.057, + "eval_wer": 0.6498054474708171, "step": 320 }, { "epoch": 40.9, - "eval_loss": 2.3957395553588867, - "eval_runtime": 7.951, - "eval_samples_per_second": 7.924, - "eval_steps_per_second": 1.006, - "eval_wer": 0.7138581263094882, + "eval_loss": 2.301248550415039, + "eval_runtime": 7.574, + "eval_samples_per_second": 8.318, + "eval_steps_per_second": 1.056, + "eval_wer": 0.6366357378030529, "step": 328 }, { "epoch": 41.23, "learning_rate": 0.00032151898734177216, - "loss": 0.8992, + "loss": 0.6684, "step": 330 }, { "epoch": 41.9, - "eval_loss": 2.4372870922088623, - "eval_runtime": 7.962, - "eval_samples_per_second": 7.913, - "eval_steps_per_second": 1.005, - "eval_wer": 0.7138581263094882, + "eval_loss": 2.2059545516967773, + "eval_runtime": 7.5863, + "eval_samples_per_second": 8.304, + "eval_steps_per_second": 1.055, + "eval_wer": 0.6584854833882071, "step": 336 }, { "epoch": 42.45, "learning_rate": 0.00031898734177215193, - "loss": 0.8221, + "loss": 0.6201, "step": 340 }, { "epoch": 42.9, - "eval_loss": 2.4234611988067627, - "eval_runtime": 7.963, - "eval_samples_per_second": 7.912, - "eval_steps_per_second": 1.005, - "eval_wer": 0.7126608799760551, + "eval_loss": 2.030762195587158, + "eval_runtime": 7.5021, + "eval_samples_per_second": 8.398, + "eval_steps_per_second": 1.066, + "eval_wer": 0.6680634540556719, "step": 344 }, { "epoch": 43.68, "learning_rate": 0.00031645569620253165, - "loss": 0.8305, + "loss": 0.5957, "step": 350 }, { "epoch": 43.9, - "eval_loss": 2.3404624462127686, - "eval_runtime": 7.9486, - "eval_samples_per_second": 7.926, - "eval_steps_per_second": 1.006, - "eval_wer": 0.7111643220592637, + "eval_loss": 2.137511730194092, + "eval_runtime": 7.6442, + "eval_samples_per_second": 8.242, + "eval_steps_per_second": 1.047, + "eval_wer": 0.6575875486381323, "step": 352 }, { "epoch": 44.9, "learning_rate": 0.00031392405063291137, - "loss": 0.8328, + "loss": 0.6158, "step": 360 }, { "epoch": 44.9, - "eval_loss": 2.340630292892456, - "eval_runtime": 8.0091, - "eval_samples_per_second": 7.866, - "eval_steps_per_second": 0.999, - "eval_wer": 0.7216402274768033, + "eval_loss": 2.0826189517974854, + "eval_runtime": 7.5707, + "eval_samples_per_second": 8.322, + "eval_steps_per_second": 1.057, + "eval_wer": 0.6671655193055971, "step": 360 }, { "epoch": 45.9, - "eval_loss": 2.4469380378723145, - "eval_runtime": 7.9319, - "eval_samples_per_second": 7.943, - "eval_steps_per_second": 1.009, - "eval_wer": 0.7165519305597127, + "eval_loss": 2.198957681655884, + "eval_runtime": 7.6122, + "eval_samples_per_second": 8.276, + "eval_steps_per_second": 1.051, + "eval_wer": 0.6584854833882071, "step": 368 }, { "epoch": 46.23, "learning_rate": 0.0003113924050632912, - "loss": 0.8611, + "loss": 0.6291, "step": 370 }, { "epoch": 46.9, - "eval_loss": 2.429659366607666, - "eval_runtime": 7.8903, - "eval_samples_per_second": 7.984, - "eval_steps_per_second": 1.014, - "eval_wer": 0.7156539958096378, + "eval_loss": 2.054161787033081, + "eval_runtime": 7.6382, + "eval_samples_per_second": 8.248, + "eval_steps_per_second": 1.047, + "eval_wer": 0.6650703382220892, "step": 376 }, { "epoch": 47.45, "learning_rate": 0.0003088607594936709, - "loss": 0.8092, + "loss": 0.5924, "step": 380 }, { "epoch": 47.9, - "eval_loss": 2.5867514610290527, - "eval_runtime": 8.0153, - "eval_samples_per_second": 7.86, - "eval_steps_per_second": 0.998, - "eval_wer": 0.7093684525591141, + "eval_loss": 2.057255983352661, + "eval_runtime": 7.5837, + "eval_samples_per_second": 8.307, + "eval_steps_per_second": 1.055, + "eval_wer": 0.653995809637833, "step": 384 }, { "epoch": 48.68, "learning_rate": 0.00030632911392405063, - "loss": 0.8173, + "loss": 0.6063, "step": 390 }, { "epoch": 48.9, - "eval_loss": 2.2557618618011475, - "eval_runtime": 8.0103, - "eval_samples_per_second": 7.865, - "eval_steps_per_second": 0.999, - "eval_wer": 0.7003891050583657, + "eval_loss": 2.248441457748413, + "eval_runtime": 7.5574, + "eval_samples_per_second": 8.336, + "eval_steps_per_second": 1.059, + "eval_wer": 0.6530978748877582, "step": 392 }, { "epoch": 49.9, "learning_rate": 0.0003037974683544304, - "loss": 0.7772, + "loss": 0.5984, "step": 400 }, { "epoch": 49.9, - "eval_loss": 2.359837770462036, - "eval_runtime": 7.7486, - "eval_samples_per_second": 8.13, - "eval_steps_per_second": 1.032, - "eval_wer": 0.7003891050583657, + "eval_loss": 2.0361733436584473, + "eval_runtime": 7.6951, + "eval_samples_per_second": 8.187, + "eval_steps_per_second": 1.04, + "eval_wer": 0.660580664471715, "step": 400 }, { "epoch": 50.9, - "eval_loss": 2.308309316635132, - "eval_runtime": 7.7464, - "eval_samples_per_second": 8.133, - "eval_steps_per_second": 1.033, - "eval_wer": 0.6881173301406764, + "eval_loss": 2.1027872562408447, + "eval_runtime": 7.6986, + "eval_samples_per_second": 8.183, + "eval_steps_per_second": 1.039, + "eval_wer": 0.6554923675546244, "step": 408 }, { "epoch": 51.23, "learning_rate": 0.00030126582278481017, - "loss": 0.8494, + "loss": 0.6309, "step": 410 }, { "epoch": 51.9, - "eval_loss": 2.4430971145629883, - "eval_runtime": 7.7316, - "eval_samples_per_second": 8.148, - "eval_steps_per_second": 1.035, - "eval_wer": 0.7012870398084405, + "eval_loss": 2.2151310443878174, + "eval_runtime": 7.7336, + "eval_samples_per_second": 8.146, + "eval_steps_per_second": 1.034, + "eval_wer": 0.6590841065549237, "step": 416 }, { "epoch": 52.45, "learning_rate": 0.0002987341772151899, - "loss": 0.7997, + "loss": 0.5979, "step": 420 }, { "epoch": 52.9, - "eval_loss": 2.3005003929138184, - "eval_runtime": 7.7273, - "eval_samples_per_second": 8.153, - "eval_steps_per_second": 1.035, - "eval_wer": 0.7111643220592637, + "eval_loss": 2.0955023765563965, + "eval_runtime": 7.7001, + "eval_samples_per_second": 8.182, + "eval_steps_per_second": 1.039, + "eval_wer": 0.6608799760550733, "step": 424 }, { "epoch": 53.68, "learning_rate": 0.0002962025316455696, - "loss": 0.7879, + "loss": 0.5941, "step": 430 }, { "epoch": 53.9, - "eval_loss": 2.1985087394714355, - "eval_runtime": 7.8272, - "eval_samples_per_second": 8.049, - "eval_steps_per_second": 1.022, - "eval_wer": 0.7297216402274768, + "eval_loss": 2.2526333332061768, + "eval_runtime": 7.6578, + "eval_samples_per_second": 8.227, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6731517509727627, "step": 432 }, { "epoch": 54.9, "learning_rate": 0.0002936708860759494, - "loss": 0.7694, + "loss": 0.5897, "step": 440 }, { "epoch": 54.9, - "eval_loss": 2.337602376937866, - "eval_runtime": 7.7096, - "eval_samples_per_second": 8.172, - "eval_steps_per_second": 1.038, - "eval_wer": 0.708171206225681, + "eval_loss": 2.385159492492676, + "eval_runtime": 7.7372, + "eval_samples_per_second": 8.143, + "eval_steps_per_second": 1.034, + "eval_wer": 0.6542951212211913, "step": 440 }, { "epoch": 55.9, - "eval_loss": 2.371609926223755, - "eval_runtime": 7.7243, - "eval_samples_per_second": 8.156, - "eval_steps_per_second": 1.036, - "eval_wer": 0.7012870398084405, + "eval_loss": 2.080392837524414, + "eval_runtime": 7.6943, + "eval_samples_per_second": 8.188, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6563903023046992, "step": 448 }, { "epoch": 56.23, "learning_rate": 0.00029113924050632915, - "loss": 0.8397, + "loss": 0.6443, "step": 450 }, { "epoch": 56.9, - "eval_loss": 2.381723403930664, - "eval_runtime": 7.7601, - "eval_samples_per_second": 8.118, - "eval_steps_per_second": 1.031, - "eval_wer": 0.711463633642622, + "eval_loss": 2.002680778503418, + "eval_runtime": 7.6599, + "eval_samples_per_second": 8.225, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6569889254714157, "step": 456 }, { "epoch": 57.45, "learning_rate": 0.00028860759493670886, - "loss": 0.7868, + "loss": 0.6055, "step": 460 }, { "epoch": 57.9, - "eval_loss": 2.257676601409912, - "eval_runtime": 7.7658, - "eval_samples_per_second": 8.112, - "eval_steps_per_second": 1.03, - "eval_wer": 0.7090691409757558, + "eval_loss": 2.1207001209259033, + "eval_runtime": 7.74, + "eval_samples_per_second": 8.14, + "eval_steps_per_second": 1.034, + "eval_wer": 0.6635737803052978, "step": 464 }, { "epoch": 58.68, "learning_rate": 0.00028607594936708863, - "loss": 0.7311, + "loss": 0.5422, "step": 470 }, { "epoch": 58.9, - "eval_loss": 2.3895084857940674, - "eval_runtime": 7.7486, - "eval_samples_per_second": 8.13, - "eval_steps_per_second": 1.032, - "eval_wer": 0.7126608799760551, + "eval_loss": 2.2515547275543213, + "eval_runtime": 7.7005, + "eval_samples_per_second": 8.181, + "eval_steps_per_second": 1.039, + "eval_wer": 0.6617779108051481, "step": 472 }, { "epoch": 59.9, "learning_rate": 0.00028354430379746835, - "loss": 0.7796, + "loss": 0.5879, "step": 480 }, { "epoch": 59.9, - "eval_loss": 2.276035785675049, - "eval_runtime": 7.7565, - "eval_samples_per_second": 8.122, - "eval_steps_per_second": 1.031, - "eval_wer": 0.7099670757258306, + "eval_loss": 2.002816915512085, + "eval_runtime": 7.6055, + "eval_samples_per_second": 8.284, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6548937443879078, "step": 480 }, { "epoch": 60.9, - "eval_loss": 2.5685431957244873, - "eval_runtime": 7.7725, - "eval_samples_per_second": 8.105, - "eval_steps_per_second": 1.029, - "eval_wer": 0.7072732714756061, + "eval_loss": 2.3659348487854004, + "eval_runtime": 7.5632, + "eval_samples_per_second": 8.33, + "eval_steps_per_second": 1.058, + "eval_wer": 0.6575875486381323, "step": 488 }, { "epoch": 61.23, "learning_rate": 0.0002810126582278481, - "loss": 0.8272, + "loss": 0.638, "step": 490 }, { "epoch": 61.9, - "eval_loss": 2.3881263732910156, - "eval_runtime": 7.7155, - "eval_samples_per_second": 8.165, - "eval_steps_per_second": 1.037, - "eval_wer": 0.7027835977252319, + "eval_loss": 2.499941825866699, + "eval_runtime": 7.5886, + "eval_samples_per_second": 8.302, + "eval_steps_per_second": 1.054, + "eval_wer": 0.6551930559712661, "step": 496 }, { "epoch": 62.45, "learning_rate": 0.0002784810126582279, - "loss": 0.7639, + "loss": 0.5874, "step": 500 }, { "epoch": 62.9, - "eval_loss": 2.3456814289093018, - "eval_runtime": 7.7642, - "eval_samples_per_second": 8.114, - "eval_steps_per_second": 1.03, - "eval_wer": 0.7084705178090392, + "eval_loss": 2.3140323162078857, + "eval_runtime": 7.6729, + "eval_samples_per_second": 8.211, + "eval_steps_per_second": 1.043, + "eval_wer": 0.6483088895540258, "step": 504 }, { "epoch": 63.68, "learning_rate": 0.0002759493670886076, - "loss": 0.789, + "loss": 0.5829, "step": 510 }, { "epoch": 63.9, - "eval_loss": 2.3291287422180176, - "eval_runtime": 7.7437, - "eval_samples_per_second": 8.136, - "eval_steps_per_second": 1.033, - "eval_wer": 0.700688416641724, + "eval_loss": 2.208669900894165, + "eval_runtime": 7.6542, + "eval_samples_per_second": 8.231, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6408260999700689, "step": 512 }, { "epoch": 64.9, "learning_rate": 0.0002734177215189873, - "loss": 0.7472, + "loss": 0.5632, "step": 520 }, { "epoch": 64.9, - "eval_loss": 2.5174083709716797, - "eval_runtime": 7.7856, - "eval_samples_per_second": 8.092, - "eval_steps_per_second": 1.028, - "eval_wer": 0.70487877880874, + "eval_loss": 2.1988563537597656, + "eval_runtime": 7.7231, + "eval_samples_per_second": 8.157, + "eval_steps_per_second": 1.036, + "eval_wer": 0.6533971864711164, "step": 520 }, { "epoch": 65.9, - "eval_loss": 2.399650812149048, - "eval_runtime": 7.7605, - "eval_samples_per_second": 8.118, - "eval_steps_per_second": 1.031, - "eval_wer": 0.7153546842262796, + "eval_loss": 2.244645118713379, + "eval_runtime": 7.7705, + "eval_samples_per_second": 8.108, + "eval_steps_per_second": 1.03, + "eval_wer": 0.6623765339718647, "step": 528 }, { "epoch": 66.23, "learning_rate": 0.0002708860759493671, - "loss": 0.8056, + "loss": 0.6143, "step": 530 }, { "epoch": 66.9, - "eval_loss": 2.4573962688446045, - "eval_runtime": 7.7606, - "eval_samples_per_second": 8.118, - "eval_steps_per_second": 1.031, - "eval_wer": 0.7237354085603113, + "eval_loss": 2.1098854541778564, + "eval_runtime": 7.5934, + "eval_samples_per_second": 8.297, + "eval_steps_per_second": 1.054, + "eval_wer": 0.642023346303502, "step": 536 }, { "epoch": 67.45, "learning_rate": 0.00026835443037974687, - "loss": 0.7752, + "loss": 0.5924, "step": 540 }, { "epoch": 67.9, - "eval_loss": 2.4979681968688965, - "eval_runtime": 7.7515, - "eval_samples_per_second": 8.127, - "eval_steps_per_second": 1.032, - "eval_wer": 0.7003891050583657, + "eval_loss": 2.237240791320801, + "eval_runtime": 7.6205, + "eval_samples_per_second": 8.267, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6423226578868603, "step": 544 }, { "epoch": 68.68, "learning_rate": 0.0002658227848101266, - "loss": 0.7084, + "loss": 0.5239, "step": 550 }, { "epoch": 68.9, - "eval_loss": 2.2370431423187256, - "eval_runtime": 7.792, - "eval_samples_per_second": 8.085, - "eval_steps_per_second": 1.027, - "eval_wer": 0.7084705178090392, + "eval_loss": 2.348794460296631, + "eval_runtime": 7.6413, + "eval_samples_per_second": 8.245, + "eval_steps_per_second": 1.047, + "eval_wer": 0.645315773720443, "step": 552 }, { "epoch": 69.9, "learning_rate": 0.0002632911392405063, - "loss": 0.7824, + "loss": 0.5883, "step": 560 }, { "epoch": 69.9, - "eval_loss": 2.359494924545288, - "eval_runtime": 7.722, - "eval_samples_per_second": 8.159, - "eval_steps_per_second": 1.036, - "eval_wer": 0.6970966776414247, + "eval_loss": 2.196101427078247, + "eval_runtime": 7.6374, + "eval_samples_per_second": 8.249, + "eval_steps_per_second": 1.047, + "eval_wer": 0.6438192158036516, "step": 560 }, { "epoch": 70.9, - "eval_loss": 2.19962477684021, - "eval_runtime": 7.7269, - "eval_samples_per_second": 8.153, - "eval_steps_per_second": 1.035, - "eval_wer": 0.7003891050583657, + "eval_loss": 2.2004334926605225, + "eval_runtime": 7.62, + "eval_samples_per_second": 8.268, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6309488177192457, "step": 568 }, { "epoch": 71.23, "learning_rate": 0.00026075949367088613, - "loss": 0.7776, + "loss": 0.5918, "step": 570 }, { "epoch": 71.9, - "eval_loss": 2.2957143783569336, - "eval_runtime": 7.7186, - "eval_samples_per_second": 8.162, - "eval_steps_per_second": 1.036, - "eval_wer": 0.6902125112241844, + "eval_loss": 2.020232677459717, + "eval_runtime": 7.6799, + "eval_samples_per_second": 8.203, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6336426219694702, "step": 576 }, { "epoch": 72.45, "learning_rate": 0.00025822784810126584, - "loss": 0.7205, + "loss": 0.5602, "step": 580 }, { "epoch": 72.9, - "eval_loss": 2.2435786724090576, - "eval_runtime": 7.7517, - "eval_samples_per_second": 8.127, - "eval_steps_per_second": 1.032, - "eval_wer": 0.6908111343909009, + "eval_loss": 2.0783603191375732, + "eval_runtime": 7.6645, + "eval_samples_per_second": 8.22, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6372343609697695, "step": 584 }, { "epoch": 73.68, "learning_rate": 0.00025569620253164556, - "loss": 0.7074, + "loss": 0.5323, "step": 590 }, { "epoch": 73.9, - "eval_loss": 2.23608136177063, - "eval_runtime": 7.7615, - "eval_samples_per_second": 8.117, - "eval_steps_per_second": 1.031, - "eval_wer": 0.6932056270577671, + "eval_loss": 2.1598377227783203, + "eval_runtime": 7.5941, + "eval_samples_per_second": 8.296, + "eval_steps_per_second": 1.053, + "eval_wer": 0.657288237054774, "step": 592 }, { "epoch": 74.9, "learning_rate": 0.00025316455696202533, - "loss": 0.7237, + "loss": 0.5584, "step": 600 }, { "epoch": 74.9, - "eval_loss": 2.207817792892456, - "eval_runtime": 7.6966, - "eval_samples_per_second": 8.185, - "eval_steps_per_second": 1.039, - "eval_wer": 0.6857228374738102, + "eval_loss": 2.1240878105163574, + "eval_runtime": 7.7267, + "eval_samples_per_second": 8.154, + "eval_steps_per_second": 1.035, + "eval_wer": 0.6351391798862616, "step": 600 }, { "epoch": 75.9, - "eval_loss": 2.233380079269409, - "eval_runtime": 7.7786, - "eval_samples_per_second": 8.099, - "eval_steps_per_second": 1.028, - "eval_wer": 0.6905118228075426, + "eval_loss": 2.113551616668701, + "eval_runtime": 7.6206, + "eval_samples_per_second": 8.267, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6381322957198443, "step": 608 }, { "epoch": 76.23, "learning_rate": 0.0002506329113924051, - "loss": 0.7862, + "loss": 0.5979, "step": 610 }, { "epoch": 76.9, - "eval_loss": 2.3564865589141846, - "eval_runtime": 7.7597, - "eval_samples_per_second": 8.119, - "eval_steps_per_second": 1.031, - "eval_wer": 0.6976953008081412, + "eval_loss": 2.1425106525421143, + "eval_runtime": 7.6456, + "eval_samples_per_second": 8.24, + "eval_steps_per_second": 1.046, + "eval_wer": 0.6330439988027536, "step": 616 }, { "epoch": 77.45, "learning_rate": 0.0002481012658227848, - "loss": 0.7299, + "loss": 0.5525, "step": 620 }, { "epoch": 77.9, - "eval_loss": 2.1293139457702637, - "eval_runtime": 7.6982, - "eval_samples_per_second": 8.184, - "eval_steps_per_second": 1.039, - "eval_wer": 0.6779407363064951, + "eval_loss": 2.1255650520324707, + "eval_runtime": 7.6228, + "eval_samples_per_second": 8.265, + "eval_steps_per_second": 1.049, + "eval_wer": 0.6303501945525292, "step": 624 }, { "epoch": 78.68, "learning_rate": 0.0002455696202531646, - "loss": 0.6755, + "loss": 0.5197, "step": 630 }, { "epoch": 78.9, - "eval_loss": 2.2523531913757324, - "eval_runtime": 7.7601, - "eval_samples_per_second": 8.118, - "eval_steps_per_second": 1.031, - "eval_wer": 0.6860221490571685, + "eval_loss": 2.0801608562469482, + "eval_runtime": 7.634, + "eval_samples_per_second": 8.253, + "eval_steps_per_second": 1.048, + "eval_wer": 0.631248129302604, "step": 632 }, { "epoch": 79.9, "learning_rate": 0.00024303797468354434, - "loss": 0.724, + "loss": 0.5509, "step": 640 }, { "epoch": 79.9, - "eval_loss": 2.206925630569458, - "eval_runtime": 7.7271, - "eval_samples_per_second": 8.153, - "eval_steps_per_second": 1.035, - "eval_wer": 0.688715953307393, + "eval_loss": 2.110111713409424, + "eval_runtime": 7.6969, + "eval_samples_per_second": 8.185, + "eval_steps_per_second": 1.039, + "eval_wer": 0.6369350493864112, "step": 640 }, { "epoch": 80.9, - "eval_loss": 2.5267446041107178, - "eval_runtime": 7.7853, - "eval_samples_per_second": 8.092, - "eval_steps_per_second": 1.028, - "eval_wer": 0.6785393594732116, + "eval_loss": 2.078524112701416, + "eval_runtime": 7.7231, + "eval_samples_per_second": 8.157, + "eval_steps_per_second": 1.036, + "eval_wer": 0.6348398683029033, "step": 648 }, { "epoch": 81.23, "learning_rate": 0.00024050632911392405, - "loss": 0.7878, + "loss": 0.6176, "step": 650 }, { "epoch": 81.9, - "eval_loss": 2.6393752098083496, - "eval_runtime": 7.709, - "eval_samples_per_second": 8.172, - "eval_steps_per_second": 1.038, - "eval_wer": 0.6824304100568692, + "eval_loss": 2.0631349086761475, + "eval_runtime": 7.6229, + "eval_samples_per_second": 8.265, + "eval_steps_per_second": 1.049, + "eval_wer": 0.6270577671355881, "step": 656 }, { "epoch": 82.45, "learning_rate": 0.0002379746835443038, - "loss": 0.6882, + "loss": 0.5294, "step": 660 }, { "epoch": 82.9, - "eval_loss": 2.4647722244262695, - "eval_runtime": 7.8339, - "eval_samples_per_second": 8.042, - "eval_steps_per_second": 1.021, - "eval_wer": 0.6764441783897037, + "eval_loss": 2.1448206901550293, + "eval_runtime": 7.5975, + "eval_samples_per_second": 8.292, + "eval_steps_per_second": 1.053, + "eval_wer": 0.6357378030529781, "step": 664 }, { "epoch": 83.68, "learning_rate": 0.00023544303797468357, - "loss": 0.6996, + "loss": 0.5399, "step": 670 }, { "epoch": 83.9, - "eval_loss": 2.4115612506866455, - "eval_runtime": 7.7276, - "eval_samples_per_second": 8.153, - "eval_steps_per_second": 1.035, - "eval_wer": 0.6890152648907513, + "eval_loss": 2.2992684841156006, + "eval_runtime": 7.6959, + "eval_samples_per_second": 8.186, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6423226578868603, "step": 672 }, { "epoch": 84.9, "learning_rate": 0.0002329113924050633, - "loss": 0.7149, + "loss": 0.5507, "step": 680 }, { "epoch": 84.9, - "eval_loss": 2.1044137477874756, - "eval_runtime": 7.7519, - "eval_samples_per_second": 8.127, - "eval_steps_per_second": 1.032, - "eval_wer": 0.6893145764741095, + "eval_loss": 2.242187976837158, + "eval_runtime": 7.6929, + "eval_samples_per_second": 8.189, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6441185273870099, "step": 680 }, { "epoch": 85.9, - "eval_loss": 2.1447622776031494, - "eval_runtime": 7.7429, - "eval_samples_per_second": 8.136, - "eval_steps_per_second": 1.033, - "eval_wer": 0.6917090691409757, + "eval_loss": 2.1182749271392822, + "eval_runtime": 7.6056, + "eval_samples_per_second": 8.283, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6429212810535768, "step": 688 }, { "epoch": 86.23, "learning_rate": 0.00023037974683544303, - "loss": 0.7499, + "loss": 0.5813, "step": 690 }, { "epoch": 86.9, - "eval_loss": 2.2603471279144287, - "eval_runtime": 7.7748, - "eval_samples_per_second": 8.103, - "eval_steps_per_second": 1.029, - "eval_wer": 0.6875187069739599, + "eval_loss": 2.162194013595581, + "eval_runtime": 7.6302, + "eval_samples_per_second": 8.257, + "eval_steps_per_second": 1.048, + "eval_wer": 0.6408260999700689, "step": 696 }, { "epoch": 87.45, "learning_rate": 0.0002278481012658228, - "loss": 0.6881, + "loss": 0.5319, "step": 700 }, { "epoch": 87.9, - "eval_loss": 2.1306064128875732, - "eval_runtime": 7.7761, - "eval_samples_per_second": 8.102, - "eval_steps_per_second": 1.029, - "eval_wer": 0.6815324753067944, + "eval_loss": 2.0702972412109375, + "eval_runtime": 7.6258, + "eval_samples_per_second": 8.261, + "eval_steps_per_second": 1.049, + "eval_wer": 0.6363364262196947, "step": 704 }, { "epoch": 88.68, "learning_rate": 0.00022531645569620254, - "loss": 0.6652, + "loss": 0.5247, "step": 710 }, { "epoch": 88.9, - "eval_loss": 2.195241689682007, - "eval_runtime": 7.7296, - "eval_samples_per_second": 8.151, - "eval_steps_per_second": 1.035, - "eval_wer": 0.6905118228075426, + "eval_loss": 2.1978485584259033, + "eval_runtime": 7.6643, + "eval_samples_per_second": 8.22, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6411254115534272, "step": 712 }, { "epoch": 89.9, "learning_rate": 0.0002227848101265823, - "loss": 0.7093, + "loss": 0.5607, "step": 720 }, { "epoch": 89.9, - "eval_loss": 2.3550162315368652, - "eval_runtime": 7.7395, - "eval_samples_per_second": 8.14, - "eval_steps_per_second": 1.034, - "eval_wer": 0.676743489973062, + "eval_loss": 2.307122230529785, + "eval_runtime": 7.6348, + "eval_samples_per_second": 8.252, + "eval_steps_per_second": 1.048, + "eval_wer": 0.6411254115534272, "step": 720 }, { "epoch": 90.9, - "eval_loss": 2.261043071746826, - "eval_runtime": 7.7396, - "eval_samples_per_second": 8.14, - "eval_steps_per_second": 1.034, - "eval_wer": 0.6749476204729123, + "eval_loss": 2.2638070583343506, + "eval_runtime": 7.6046, + "eval_samples_per_second": 8.285, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6303501945525292, "step": 728 }, { "epoch": 91.23, "learning_rate": 0.00022025316455696206, - "loss": 0.7439, + "loss": 0.5796, "step": 730 }, { "epoch": 91.9, - "eval_loss": 2.147209882736206, - "eval_runtime": 7.7312, - "eval_samples_per_second": 8.149, - "eval_steps_per_second": 1.035, - "eval_wer": 0.6857228374738102, + "eval_loss": 2.1073036193847656, + "eval_runtime": 7.6773, + "eval_samples_per_second": 8.206, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6441185273870099, "step": 736 }, { "epoch": 92.45, "learning_rate": 0.00021772151898734177, - "loss": 0.6898, + "loss": 0.5521, "step": 740 }, { "epoch": 92.9, - "eval_loss": 2.074415922164917, - "eval_runtime": 7.7637, - "eval_samples_per_second": 8.115, - "eval_steps_per_second": 1.03, - "eval_wer": 0.6881173301406764, + "eval_loss": 2.057901620864868, + "eval_runtime": 7.5831, + "eval_samples_per_second": 8.308, + "eval_steps_per_second": 1.055, + "eval_wer": 0.6456150853038013, "step": 744 }, { "epoch": 93.68, "learning_rate": 0.00021518987341772152, - "loss": 0.6734, + "loss": 0.5625, "step": 750 }, { "epoch": 93.9, - "eval_loss": 2.0897610187530518, - "eval_runtime": 7.7107, - "eval_samples_per_second": 8.17, - "eval_steps_per_second": 1.038, - "eval_wer": 0.6929063154744088, + "eval_loss": 2.0663516521453857, + "eval_runtime": 7.623, + "eval_samples_per_second": 8.264, + "eval_steps_per_second": 1.049, + "eval_wer": 0.6501047590541754, "step": 752 }, { "epoch": 94.9, "learning_rate": 0.00021265822784810126, - "loss": 0.6926, + "loss": 0.5901, "step": 760 }, { "epoch": 94.9, - "eval_loss": 2.089553117752075, - "eval_runtime": 7.721, - "eval_samples_per_second": 8.16, - "eval_steps_per_second": 1.036, - "eval_wer": 0.6683627656390302, + "eval_loss": 2.0674116611480713, + "eval_runtime": 7.5478, + "eval_samples_per_second": 8.347, + "eval_steps_per_second": 1.06, + "eval_wer": 0.6327446872193954, "step": 760 }, { "epoch": 95.9, - "eval_loss": 2.1928741931915283, - "eval_runtime": 7.7808, - "eval_samples_per_second": 8.097, - "eval_steps_per_second": 1.028, - "eval_wer": 0.6812331637234361, + "eval_loss": 2.185175895690918, + "eval_runtime": 7.6051, + "eval_samples_per_second": 8.284, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6381322957198443, "step": 768 }, { "epoch": 96.23, "learning_rate": 0.00021012658227848103, - "loss": 0.7154, + "loss": 0.5974, "step": 770 }, { "epoch": 96.9, - "eval_loss": 2.153787612915039, - "eval_runtime": 7.7478, - "eval_samples_per_second": 8.131, - "eval_steps_per_second": 1.033, - "eval_wer": 0.6860221490571685, + "eval_loss": 2.2212321758270264, + "eval_runtime": 7.6016, + "eval_samples_per_second": 8.288, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6387309188865609, "step": 776 }, { "epoch": 97.45, "learning_rate": 0.00020759493670886078, - "loss": 0.6493, + "loss": 0.5359, "step": 780 }, { "epoch": 97.9, - "eval_loss": 2.143815279006958, - "eval_runtime": 7.7358, - "eval_samples_per_second": 8.144, - "eval_steps_per_second": 1.034, - "eval_wer": 0.6815324753067944, + "eval_loss": 2.1027910709381104, + "eval_runtime": 7.6755, + "eval_samples_per_second": 8.208, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6390302304699191, "step": 784 }, { "epoch": 98.68, "learning_rate": 0.0002050632911392405, - "loss": 0.6755, + "loss": 0.5643, "step": 790 }, { "epoch": 98.9, - "eval_loss": 2.1560962200164795, - "eval_runtime": 7.7208, - "eval_samples_per_second": 8.16, - "eval_steps_per_second": 1.036, - "eval_wer": 0.6902125112241844, + "eval_loss": 2.1437582969665527, + "eval_runtime": 7.7627, + "eval_samples_per_second": 8.116, + "eval_steps_per_second": 1.031, + "eval_wer": 0.6516013169709668, "step": 792 }, { "epoch": 99.9, "learning_rate": 0.00020253164556962027, - "loss": 0.6667, + "loss": 0.5488, "step": 800 }, { "epoch": 99.9, - "eval_loss": 2.076709032058716, - "eval_runtime": 7.7317, - "eval_samples_per_second": 8.148, - "eval_steps_per_second": 1.035, - "eval_wer": 0.6908111343909009, + "eval_loss": 2.1104142665863037, + "eval_runtime": 7.5682, + "eval_samples_per_second": 8.324, + "eval_steps_per_second": 1.057, + "eval_wer": 0.6447171505537265, "step": 800 }, { "epoch": 100.9, - "eval_loss": 2.106370449066162, - "eval_runtime": 7.7472, - "eval_samples_per_second": 8.132, - "eval_steps_per_second": 1.033, - "eval_wer": 0.6785393594732116, + "eval_loss": 2.1389875411987305, + "eval_runtime": 7.6495, + "eval_samples_per_second": 8.236, + "eval_steps_per_second": 1.046, + "eval_wer": 0.639928165219994, "step": 808 }, { "epoch": 101.23, "learning_rate": 0.0002, - "loss": 0.7016, + "loss": 0.5906, "step": 810 }, { "epoch": 101.9, - "eval_loss": 2.227837085723877, - "eval_runtime": 8.0181, - "eval_samples_per_second": 7.857, - "eval_steps_per_second": 0.998, - "eval_wer": 0.676743489973062, + "eval_loss": 2.3832998275756836, + "eval_runtime": 7.5968, + "eval_samples_per_second": 8.293, + "eval_steps_per_second": 1.053, + "eval_wer": 0.6387309188865609, "step": 816 }, { "epoch": 102.45, "learning_rate": 0.00019746835443037975, - "loss": 0.6726, + "loss": 0.5735, "step": 820 }, { "epoch": 102.9, - "eval_loss": 2.261566162109375, - "eval_runtime": 7.7943, - "eval_samples_per_second": 8.083, - "eval_steps_per_second": 1.026, - "eval_wer": 0.6689613888057467, + "eval_loss": 2.4907209873199463, + "eval_runtime": 7.6164, + "eval_samples_per_second": 8.272, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6303501945525292, "step": 824 }, { "epoch": 103.68, "learning_rate": 0.0001949367088607595, - "loss": 0.6725, + "loss": 0.5617, "step": 830 }, { "epoch": 103.9, - "eval_loss": 2.133070230484009, - "eval_runtime": 7.7265, - "eval_samples_per_second": 8.154, - "eval_steps_per_second": 1.035, - "eval_wer": 0.6878180185573182, + "eval_loss": 2.117652416229248, + "eval_runtime": 7.5597, + "eval_samples_per_second": 8.334, + "eval_steps_per_second": 1.058, + "eval_wer": 0.6438192158036516, "step": 832 }, { "epoch": 104.9, "learning_rate": 0.00019240506329113924, - "loss": 0.6657, + "loss": 0.5547, "step": 840 }, { "epoch": 104.9, - "eval_loss": 2.1497416496276855, - "eval_runtime": 7.7253, - "eval_samples_per_second": 8.155, - "eval_steps_per_second": 1.036, - "eval_wer": 0.6731517509727627, + "eval_loss": 2.0854134559631348, + "eval_runtime": 7.6526, + "eval_samples_per_second": 8.233, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6366357378030529, "step": 840 }, { "epoch": 105.9, - "eval_loss": 2.160142421722412, - "eval_runtime": 7.7477, - "eval_samples_per_second": 8.131, - "eval_steps_per_second": 1.033, - "eval_wer": 0.6737503741394792, + "eval_loss": 2.1921279430389404, + "eval_runtime": 7.5438, + "eval_samples_per_second": 8.351, + "eval_steps_per_second": 1.06, + "eval_wer": 0.6411254115534272, "step": 848 }, { "epoch": 106.23, "learning_rate": 0.00018987341772151899, - "loss": 0.6989, + "loss": 0.5805, "step": 850 }, { "epoch": 106.9, - "eval_loss": 2.319141149520874, - "eval_runtime": 7.732, - "eval_samples_per_second": 8.148, - "eval_steps_per_second": 1.035, - "eval_wer": 0.6674648308889554, + "eval_loss": 2.275364398956299, + "eval_runtime": 7.657, + "eval_samples_per_second": 8.228, + "eval_steps_per_second": 1.045, + "eval_wer": 0.631248129302604, "step": 856 }, { "epoch": 107.45, "learning_rate": 0.00018734177215189873, - "loss": 0.6658, + "loss": 0.5455, "step": 860 }, { "epoch": 107.9, - "eval_loss": 2.354734420776367, - "eval_runtime": 7.7273, - "eval_samples_per_second": 8.153, - "eval_steps_per_second": 1.035, - "eval_wer": 0.6788386710565699, + "eval_loss": 2.280198574066162, + "eval_runtime": 7.6544, + "eval_samples_per_second": 8.231, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6348398683029033, "step": 864 }, { "epoch": 108.68, "learning_rate": 0.0001848101265822785, - "loss": 0.6398, + "loss": 0.5342, "step": 870 }, { "epoch": 108.9, - "eval_loss": 2.3368043899536133, - "eval_runtime": 7.7551, - "eval_samples_per_second": 8.124, - "eval_steps_per_second": 1.032, - "eval_wer": 0.6740496857228375, + "eval_loss": 2.321939468383789, + "eval_runtime": 7.6518, + "eval_samples_per_second": 8.233, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6324453756360371, "step": 872 }, { "epoch": 109.9, "learning_rate": 0.00018227848101265824, - "loss": 0.6465, + "loss": 0.5372, "step": 880 }, { "epoch": 109.9, - "eval_loss": 2.1896259784698486, - "eval_runtime": 7.7364, - "eval_samples_per_second": 8.143, - "eval_steps_per_second": 1.034, - "eval_wer": 0.6806345405567196, + "eval_loss": 2.090009927749634, + "eval_runtime": 7.6209, + "eval_samples_per_second": 8.267, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6423226578868603, "step": 880 }, { "epoch": 110.9, - "eval_loss": 2.1210110187530518, - "eval_runtime": 7.7217, - "eval_samples_per_second": 8.159, - "eval_steps_per_second": 1.036, - "eval_wer": 0.6797366058066447, + "eval_loss": 1.9904694557189941, + "eval_runtime": 7.6862, + "eval_samples_per_second": 8.196, + "eval_steps_per_second": 1.041, + "eval_wer": 0.6351391798862616, "step": 888 }, { "epoch": 111.23, "learning_rate": 0.000179746835443038, - "loss": 0.727, + "loss": 0.6146, "step": 890 }, { "epoch": 111.9, - "eval_loss": 2.3508195877075195, - "eval_runtime": 7.7774, - "eval_samples_per_second": 8.1, - "eval_steps_per_second": 1.029, - "eval_wer": 0.6686620772223885, + "eval_loss": 2.2073147296905518, + "eval_runtime": 7.7334, + "eval_samples_per_second": 8.146, + "eval_steps_per_second": 1.034, + "eval_wer": 0.6294522598024543, "step": 896 }, { "epoch": 112.45, "learning_rate": 0.00017721518987341773, - "loss": 0.6409, + "loss": 0.5517, "step": 900 }, { "epoch": 112.9, - "eval_loss": 2.3439958095550537, - "eval_runtime": 7.7324, - "eval_samples_per_second": 8.148, - "eval_steps_per_second": 1.035, - "eval_wer": 0.6752469320562706, + "eval_loss": 2.2817723751068115, + "eval_runtime": 7.6895, + "eval_samples_per_second": 8.193, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6387309188865609, "step": 904 }, { "epoch": 113.68, "learning_rate": 0.00017468354430379748, - "loss": 0.6573, + "loss": 0.5501, "step": 910 }, { "epoch": 113.9, - "eval_loss": 2.269505500793457, - "eval_runtime": 7.7658, - "eval_samples_per_second": 8.112, - "eval_steps_per_second": 1.03, - "eval_wer": 0.6623765339718647, + "eval_loss": 2.4255876541137695, + "eval_runtime": 7.5778, + "eval_samples_per_second": 8.314, + "eval_steps_per_second": 1.056, + "eval_wer": 0.6318467524693205, "step": 912 }, { "epoch": 114.9, "learning_rate": 0.00017215189873417722, - "loss": 0.645, + "loss": 0.5469, "step": 920 }, { "epoch": 114.9, - "eval_loss": 2.1471199989318848, - "eval_runtime": 7.7501, - "eval_samples_per_second": 8.129, - "eval_steps_per_second": 1.032, - "eval_wer": 0.6770428015564203, + "eval_loss": 2.2073605060577393, + "eval_runtime": 7.6787, + "eval_samples_per_second": 8.205, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6411254115534272, "step": 920 }, { "epoch": 115.9, - "eval_loss": 2.186685562133789, - "eval_runtime": 7.7952, - "eval_samples_per_second": 8.082, - "eval_steps_per_second": 1.026, - "eval_wer": 0.6743489973061958, + "eval_loss": 2.2370004653930664, + "eval_runtime": 7.5756, + "eval_samples_per_second": 8.316, + "eval_steps_per_second": 1.056, + "eval_wer": 0.6282550134690212, "step": 928 }, { "epoch": 116.23, "learning_rate": 0.00016962025316455696, - "loss": 0.7103, + "loss": 0.6065, "step": 930 }, { "epoch": 116.9, - "eval_loss": 2.232990026473999, - "eval_runtime": 7.7463, - "eval_samples_per_second": 8.133, - "eval_steps_per_second": 1.033, - "eval_wer": 0.6701586351391798, + "eval_loss": 2.2338502407073975, + "eval_runtime": 7.6802, + "eval_samples_per_second": 8.203, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6267584555522299, "step": 936 }, { "epoch": 117.45, "learning_rate": 0.0001670886075949367, - "loss": 0.6214, + "loss": 0.5265, "step": 940 }, { "epoch": 117.9, - "eval_loss": 2.217428207397461, - "eval_runtime": 7.7992, - "eval_samples_per_second": 8.078, - "eval_steps_per_second": 1.026, - "eval_wer": 0.6686620772223885, + "eval_loss": 2.2717647552490234, + "eval_runtime": 7.6002, + "eval_samples_per_second": 8.289, + "eval_steps_per_second": 1.053, + "eval_wer": 0.6234660281352888, "step": 944 }, { "epoch": 118.68, "learning_rate": 0.00016455696202531648, - "loss": 0.6134, + "loss": 0.512, "step": 950 }, { "epoch": 118.9, - "eval_loss": 2.198005437850952, - "eval_runtime": 7.8436, - "eval_samples_per_second": 8.032, - "eval_steps_per_second": 1.02, - "eval_wer": 0.6620772223885064, + "eval_loss": 2.1963329315185547, + "eval_runtime": 7.6507, + "eval_samples_per_second": 8.235, + "eval_steps_per_second": 1.046, + "eval_wer": 0.6333433103861119, "step": 952 }, { "epoch": 119.9, "learning_rate": 0.0001620253164556962, - "loss": 0.6612, + "loss": 0.5571, "step": 960 }, { "epoch": 119.9, - "eval_loss": 2.2890524864196777, - "eval_runtime": 7.8296, - "eval_samples_per_second": 8.046, - "eval_steps_per_second": 1.022, - "eval_wer": 0.6749476204729123, + "eval_loss": 2.2201318740844727, + "eval_runtime": 7.6307, + "eval_samples_per_second": 8.256, + "eval_steps_per_second": 1.048, + "eval_wer": 0.6348398683029033, "step": 960 }, { "epoch": 120.9, - "eval_loss": 2.2862656116485596, - "eval_runtime": 7.7538, - "eval_samples_per_second": 8.125, - "eval_steps_per_second": 1.032, - "eval_wer": 0.6641724034720143, + "eval_loss": 2.110605478286743, + "eval_runtime": 7.6664, + "eval_samples_per_second": 8.218, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6330439988027536, "step": 968 }, { "epoch": 121.23, "learning_rate": 0.00015949367088607597, - "loss": 0.688, + "loss": 0.5778, "step": 970 }, { "epoch": 121.9, - "eval_loss": 2.319775342941284, - "eval_runtime": 7.7964, - "eval_samples_per_second": 8.081, - "eval_steps_per_second": 1.026, - "eval_wer": 0.6668662077222388, + "eval_loss": 2.3301584720611572, + "eval_runtime": 7.7674, + "eval_samples_per_second": 8.111, + "eval_steps_per_second": 1.03, + "eval_wer": 0.6306495061358874, "step": 976 }, { "epoch": 122.45, "learning_rate": 0.00015696202531645568, - "loss": 0.6451, + "loss": 0.539, "step": 980 }, { "epoch": 122.9, - "eval_loss": 2.169647216796875, - "eval_runtime": 7.8494, - "eval_samples_per_second": 8.026, - "eval_steps_per_second": 1.019, - "eval_wer": 0.6668662077222388, + "eval_loss": 2.371450662612915, + "eval_runtime": 7.6444, + "eval_samples_per_second": 8.241, + "eval_steps_per_second": 1.047, + "eval_wer": 0.6273570787189464, "step": 984 }, { "epoch": 123.68, "learning_rate": 0.00015443037974683546, - "loss": 0.6308, + "loss": 0.5306, "step": 990 }, { "epoch": 123.9, - "eval_loss": 2.125494956970215, - "eval_runtime": 7.8606, - "eval_samples_per_second": 8.015, - "eval_steps_per_second": 1.018, - "eval_wer": 0.6596827297216402, + "eval_loss": 2.241682529449463, + "eval_runtime": 7.5759, + "eval_samples_per_second": 8.316, + "eval_steps_per_second": 1.056, + "eval_wer": 0.6351391798862616, "step": 992 }, { "epoch": 124.9, "learning_rate": 0.0001518987341772152, - "loss": 0.6359, + "loss": 0.5271, "step": 1000 }, { "epoch": 124.9, - "eval_loss": 2.2053353786468506, - "eval_runtime": 7.7903, - "eval_samples_per_second": 8.087, - "eval_steps_per_second": 1.027, - "eval_wer": 0.6569889254714157, + "eval_loss": 2.169471502304077, + "eval_runtime": 7.5614, + "eval_samples_per_second": 8.332, + "eval_steps_per_second": 1.058, + "eval_wer": 0.6249625860520802, "step": 1000 }, { "epoch": 125.9, - "eval_loss": 2.1914987564086914, - "eval_runtime": 7.7966, - "eval_samples_per_second": 8.08, - "eval_steps_per_second": 1.026, - "eval_wer": 0.6581861718048488, + "eval_loss": 2.1912169456481934, + "eval_runtime": 7.6883, + "eval_samples_per_second": 8.194, + "eval_steps_per_second": 1.041, + "eval_wer": 0.627955701885663, "step": 1008 }, { "epoch": 126.23, "learning_rate": 0.00014936708860759494, - "loss": 0.6845, + "loss": 0.5766, "step": 1010 }, { "epoch": 126.9, - "eval_loss": 2.1405885219573975, - "eval_runtime": 7.7604, - "eval_samples_per_second": 8.118, - "eval_steps_per_second": 1.031, - "eval_wer": 0.6656689613888057, + "eval_loss": 2.1122422218322754, + "eval_runtime": 7.6874, + "eval_samples_per_second": 8.195, + "eval_steps_per_second": 1.041, + "eval_wer": 0.6339419335528285, "step": 1016 }, { "epoch": 127.45, "learning_rate": 0.0001468354430379747, - "loss": 0.6609, + "loss": 0.5483, "step": 1020 }, { "epoch": 127.9, - "eval_loss": 2.1851718425750732, - "eval_runtime": 7.745, - "eval_samples_per_second": 8.134, - "eval_steps_per_second": 1.033, - "eval_wer": 0.6752469320562706, + "eval_loss": 2.0696394443511963, + "eval_runtime": 7.6797, + "eval_samples_per_second": 8.203, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6321460640526788, "step": 1024 }, { "epoch": 128.68, "learning_rate": 0.00014430379746835443, - "loss": 0.6345, + "loss": 0.5414, "step": 1030 }, { "epoch": 128.9, - "eval_loss": 2.1838414669036865, - "eval_runtime": 7.7504, - "eval_samples_per_second": 8.129, - "eval_steps_per_second": 1.032, - "eval_wer": 0.6620772223885064, + "eval_loss": 2.093480348587036, + "eval_runtime": 7.6571, + "eval_samples_per_second": 8.228, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6315474408859623, "step": 1032 }, { "epoch": 129.9, "learning_rate": 0.00014177215189873418, - "loss": 0.6055, + "loss": 0.5125, "step": 1040 }, { "epoch": 129.9, - "eval_loss": 2.158566474914551, - "eval_runtime": 7.7809, - "eval_samples_per_second": 8.097, - "eval_steps_per_second": 1.028, - "eval_wer": 0.6701586351391798, + "eval_loss": 2.1692750453948975, + "eval_runtime": 7.609, + "eval_samples_per_second": 8.28, + "eval_steps_per_second": 1.051, + "eval_wer": 0.6336426219694702, "step": 1040 }, { "epoch": 130.9, - "eval_loss": 2.1627261638641357, - "eval_runtime": 7.7707, - "eval_samples_per_second": 8.107, - "eval_steps_per_second": 1.03, - "eval_wer": 0.6680634540556719, + "eval_loss": 2.135119915008545, + "eval_runtime": 7.7121, + "eval_samples_per_second": 8.169, + "eval_steps_per_second": 1.037, + "eval_wer": 0.6315474408859623, "step": 1048 }, { "epoch": 131.23, "learning_rate": 0.00013924050632911395, - "loss": 0.6737, + "loss": 0.5733, "step": 1050 }, { "epoch": 131.9, - "eval_loss": 2.263141632080078, - "eval_runtime": 7.8874, - "eval_samples_per_second": 7.987, - "eval_steps_per_second": 1.014, - "eval_wer": 0.6761448668063454, + "eval_loss": 2.1570212841033936, + "eval_runtime": 7.6856, + "eval_samples_per_second": 8.197, + "eval_steps_per_second": 1.041, + "eval_wer": 0.6405267883867105, "step": 1056 }, { "epoch": 132.45, "learning_rate": 0.00013670886075949366, - "loss": 0.6237, + "loss": 0.5285, "step": 1060 }, { "epoch": 132.9, - "eval_loss": 2.2553627490997314, - "eval_runtime": 7.755, - "eval_samples_per_second": 8.124, - "eval_steps_per_second": 1.032, - "eval_wer": 0.6620772223885064, + "eval_loss": 2.1996734142303467, + "eval_runtime": 7.7021, + "eval_samples_per_second": 8.18, + "eval_steps_per_second": 1.039, + "eval_wer": 0.6309488177192457, "step": 1064 }, { "epoch": 133.68, "learning_rate": 0.00013417721518987343, - "loss": 0.6468, + "loss": 0.5426, "step": 1070 }, { "epoch": 133.9, - "eval_loss": 2.2538866996765137, - "eval_runtime": 7.7947, - "eval_samples_per_second": 8.082, - "eval_steps_per_second": 1.026, - "eval_wer": 0.6668662077222388, + "eval_loss": 2.1216108798980713, + "eval_runtime": 7.7071, + "eval_samples_per_second": 8.174, + "eval_steps_per_second": 1.038, + "eval_wer": 0.6321460640526788, "step": 1072 }, { "epoch": 134.9, "learning_rate": 0.00013164556962025315, - "loss": 0.5948, + "loss": 0.5018, "step": 1080 }, { "epoch": 134.9, - "eval_loss": 2.2463722229003906, - "eval_runtime": 7.7617, - "eval_samples_per_second": 8.117, - "eval_steps_per_second": 1.031, - "eval_wer": 0.6516013169709668, + "eval_loss": 2.174179792404175, + "eval_runtime": 7.6539, + "eval_samples_per_second": 8.231, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6246632744687219, "step": 1080 }, { "epoch": 135.9, - "eval_loss": 2.349128484725952, - "eval_runtime": 7.7593, - "eval_samples_per_second": 8.119, - "eval_steps_per_second": 1.031, - "eval_wer": 0.6620772223885064, + "eval_loss": 2.120788335800171, + "eval_runtime": 7.6772, + "eval_samples_per_second": 8.206, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6303501945525292, "step": 1088 }, { "epoch": 136.23, "learning_rate": 0.00012911392405063292, - "loss": 0.6645, + "loss": 0.5611, "step": 1090 }, { "epoch": 136.9, - "eval_loss": 2.2536532878875732, - "eval_runtime": 7.8036, - "eval_samples_per_second": 8.073, - "eval_steps_per_second": 1.025, - "eval_wer": 0.6620772223885064, + "eval_loss": 2.122828483581543, + "eval_runtime": 7.6522, + "eval_samples_per_second": 8.233, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6303501945525292, "step": 1096 }, { "epoch": 137.45, "learning_rate": 0.00012658227848101267, - "loss": 0.6195, + "loss": 0.5258, "step": 1100 }, { "epoch": 137.9, - "eval_loss": 2.371696710586548, - "eval_runtime": 7.7793, - "eval_samples_per_second": 8.098, - "eval_steps_per_second": 1.028, - "eval_wer": 0.6665668961388805, + "eval_loss": 2.225569248199463, + "eval_runtime": 7.692, + "eval_samples_per_second": 8.19, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6309488177192457, "step": 1104 }, { "epoch": 138.68, "learning_rate": 0.0001240506329113924, - "loss": 0.6317, + "loss": 0.5364, "step": 1110 }, { "epoch": 138.9, - "eval_loss": 2.2024903297424316, - "eval_runtime": 7.8765, - "eval_samples_per_second": 7.998, - "eval_steps_per_second": 1.016, - "eval_wer": 0.6551930559712661, + "eval_loss": 2.162257432937622, + "eval_runtime": 7.6234, + "eval_samples_per_second": 8.264, + "eval_steps_per_second": 1.049, + "eval_wer": 0.6306495061358874, "step": 1112 }, { "epoch": 139.9, "learning_rate": 0.00012151898734177217, - "loss": 0.6336, + "loss": 0.528, "step": 1120 }, { "epoch": 139.9, - "eval_loss": 2.142206907272339, - "eval_runtime": 7.8378, - "eval_samples_per_second": 8.038, - "eval_steps_per_second": 1.021, - "eval_wer": 0.6623765339718647, + "eval_loss": 2.0063600540161133, + "eval_runtime": 7.6673, + "eval_samples_per_second": 8.217, + "eval_steps_per_second": 1.043, + "eval_wer": 0.6288536366357378, "step": 1120 }, { "epoch": 140.9, - "eval_loss": 2.106228828430176, - "eval_runtime": 7.7968, - "eval_samples_per_second": 8.08, - "eval_steps_per_second": 1.026, - "eval_wer": 0.660580664471715, + "eval_loss": 2.0472218990325928, + "eval_runtime": 7.572, + "eval_samples_per_second": 8.32, + "eval_steps_per_second": 1.057, + "eval_wer": 0.6297515713858126, "step": 1128 }, { "epoch": 141.23, "learning_rate": 0.0001189873417721519, - "loss": 0.664, + "loss": 0.5637, "step": 1130 }, { "epoch": 141.9, - "eval_loss": 2.2253739833831787, - "eval_runtime": 7.7492, - "eval_samples_per_second": 8.13, - "eval_steps_per_second": 1.032, - "eval_wer": 0.6596827297216402, + "eval_loss": 2.190746307373047, + "eval_runtime": 7.6599, + "eval_samples_per_second": 8.225, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6318467524693205, "step": 1136 }, { "epoch": 142.45, "learning_rate": 0.00011645569620253166, - "loss": 0.6047, + "loss": 0.5051, "step": 1140 }, { "epoch": 142.9, - "eval_loss": 2.322563886642456, - "eval_runtime": 7.7874, - "eval_samples_per_second": 8.09, - "eval_steps_per_second": 1.027, - "eval_wer": 0.653995809637833, + "eval_loss": 2.156960964202881, + "eval_runtime": 7.648, + "eval_samples_per_second": 8.237, + "eval_steps_per_second": 1.046, + "eval_wer": 0.6291529482190961, "step": 1144 }, { "epoch": 143.68, "learning_rate": 0.0001139240506329114, - "loss": 0.6173, + "loss": 0.523, "step": 1150 }, { "epoch": 143.9, - "eval_loss": 2.227854013442993, - "eval_runtime": 7.8279, - "eval_samples_per_second": 8.048, - "eval_steps_per_second": 1.022, - "eval_wer": 0.6683627656390302, + "eval_loss": 2.049663782119751, + "eval_runtime": 7.6935, + "eval_samples_per_second": 8.189, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6423226578868603, "step": 1152 }, { "epoch": 144.9, "learning_rate": 0.00011139240506329114, - "loss": 0.6466, + "loss": 0.5516, "step": 1160 }, { "epoch": 144.9, - "eval_loss": 2.1866044998168945, - "eval_runtime": 7.7744, - "eval_samples_per_second": 8.104, - "eval_steps_per_second": 1.029, - "eval_wer": 0.657288237054774, + "eval_loss": 2.090688467025757, + "eval_runtime": 7.6869, + "eval_samples_per_second": 8.196, + "eval_steps_per_second": 1.041, + "eval_wer": 0.6324453756360371, "step": 1160 }, { "epoch": 145.9, - "eval_loss": 2.2489023208618164, - "eval_runtime": 7.7715, - "eval_samples_per_second": 8.107, - "eval_steps_per_second": 1.029, - "eval_wer": 0.6590841065549237, + "eval_loss": 2.147946357727051, + "eval_runtime": 7.6864, + "eval_samples_per_second": 8.196, + "eval_steps_per_second": 1.041, + "eval_wer": 0.6309488177192457, "step": 1168 }, { "epoch": 146.23, "learning_rate": 0.00010886075949367089, - "loss": 0.6585, + "loss": 0.5524, "step": 1170 }, { "epoch": 146.9, - "eval_loss": 2.2273900508880615, - "eval_runtime": 7.821, - "eval_samples_per_second": 8.055, - "eval_steps_per_second": 1.023, - "eval_wer": 0.6480095779706675, + "eval_loss": 2.0570223331451416, + "eval_runtime": 7.5902, + "eval_samples_per_second": 8.3, + "eval_steps_per_second": 1.054, + "eval_wer": 0.6273570787189464, "step": 1176 }, { "epoch": 147.45, "learning_rate": 0.00010632911392405063, - "loss": 0.6244, + "loss": 0.5215, "step": 1180 }, { "epoch": 147.9, - "eval_loss": 2.1959400177001953, - "eval_runtime": 7.8697, - "eval_samples_per_second": 8.005, - "eval_steps_per_second": 1.017, - "eval_wer": 0.662675845555223, + "eval_loss": 2.137968063354492, + "eval_runtime": 7.6003, + "eval_samples_per_second": 8.289, + "eval_steps_per_second": 1.053, + "eval_wer": 0.6339419335528285, "step": 1184 }, { "epoch": 148.68, "learning_rate": 0.00010379746835443039, - "loss": 0.6527, + "loss": 0.5447, "step": 1190 }, { "epoch": 148.9, - "eval_loss": 2.2115025520324707, - "eval_runtime": 7.8828, - "eval_samples_per_second": 7.992, - "eval_steps_per_second": 1.015, - "eval_wer": 0.6593834181382819, + "eval_loss": 2.2314438819885254, + "eval_runtime": 7.671, + "eval_samples_per_second": 8.213, + "eval_steps_per_second": 1.043, + "eval_wer": 0.6303501945525292, "step": 1192 }, { "epoch": 149.9, "learning_rate": 0.00010126582278481013, - "loss": 0.6247, + "loss": 0.521, "step": 1200 }, { "epoch": 149.9, - "eval_loss": 2.2804923057556152, - "eval_runtime": 7.7687, - "eval_samples_per_second": 8.109, - "eval_steps_per_second": 1.03, - "eval_wer": 0.6620772223885064, + "eval_loss": 2.147305727005005, + "eval_runtime": 7.6063, + "eval_samples_per_second": 8.283, + "eval_steps_per_second": 1.052, + "eval_wer": 0.6333433103861119, "step": 1200 }, { "epoch": 150.9, - "eval_loss": 2.2128920555114746, - "eval_runtime": 7.7584, - "eval_samples_per_second": 8.12, - "eval_steps_per_second": 1.031, - "eval_wer": 0.6578868602214906, + "eval_loss": 2.1240322589874268, + "eval_runtime": 7.5902, + "eval_samples_per_second": 8.3, + "eval_steps_per_second": 1.054, + "eval_wer": 0.6291529482190961, "step": 1208 }, { "epoch": 151.23, "learning_rate": 9.873417721518988e-05, - "loss": 0.6614, + "loss": 0.5501, "step": 1210 }, { "epoch": 151.9, - "eval_loss": 2.23846697807312, - "eval_runtime": 7.7588, - "eval_samples_per_second": 8.12, - "eval_steps_per_second": 1.031, - "eval_wer": 0.6635737803052978, + "eval_loss": 2.1305501461029053, + "eval_runtime": 7.5935, + "eval_samples_per_second": 8.297, + "eval_steps_per_second": 1.054, + "eval_wer": 0.6216701586351392, "step": 1216 }, { "epoch": 152.45, "learning_rate": 9.620253164556962e-05, - "loss": 0.6309, + "loss": 0.5309, "step": 1220 }, { "epoch": 152.9, - "eval_loss": 2.275683641433716, - "eval_runtime": 7.8149, - "eval_samples_per_second": 8.061, - "eval_steps_per_second": 1.024, - "eval_wer": 0.6614785992217899, + "eval_loss": 2.1293582916259766, + "eval_runtime": 7.5958, + "eval_samples_per_second": 8.294, + "eval_steps_per_second": 1.053, + "eval_wer": 0.6315474408859623, "step": 1224 }, { "epoch": 153.68, "learning_rate": 9.367088607594936e-05, - "loss": 0.6501, + "loss": 0.5293, "step": 1230 }, { "epoch": 153.9, - "eval_loss": 2.3265960216522217, - "eval_runtime": 7.8195, - "eval_samples_per_second": 8.057, - "eval_steps_per_second": 1.023, - "eval_wer": 0.6647710266387309, + "eval_loss": 2.2012763023376465, + "eval_runtime": 7.6851, + "eval_samples_per_second": 8.198, + "eval_steps_per_second": 1.041, + "eval_wer": 0.6285543250523795, "step": 1232 }, { "epoch": 154.9, "learning_rate": 9.113924050632912e-05, - "loss": 0.5869, + "loss": 0.4898, "step": 1240 }, { "epoch": 154.9, - "eval_loss": 2.336057662963867, - "eval_runtime": 7.8329, - "eval_samples_per_second": 8.043, - "eval_steps_per_second": 1.021, - "eval_wer": 0.6632744687219395, + "eval_loss": 2.216932773590088, + "eval_runtime": 7.6132, + "eval_samples_per_second": 8.275, + "eval_steps_per_second": 1.051, + "eval_wer": 0.6291529482190961, "step": 1240 }, { "epoch": 155.9, - "eval_loss": 2.345245838165283, - "eval_runtime": 7.7323, - "eval_samples_per_second": 8.148, - "eval_steps_per_second": 1.035, - "eval_wer": 0.653995809637833, + "eval_loss": 2.2271268367767334, + "eval_runtime": 7.5691, + "eval_samples_per_second": 8.323, + "eval_steps_per_second": 1.057, + "eval_wer": 0.6237653397186471, "step": 1248 }, { "epoch": 156.23, "learning_rate": 8.860759493670887e-05, - "loss": 0.6676, + "loss": 0.559, "step": 1250 }, { "epoch": 156.9, - "eval_loss": 2.280003070831299, - "eval_runtime": 7.8173, - "eval_samples_per_second": 8.059, - "eval_steps_per_second": 1.023, - "eval_wer": 0.6614785992217899, + "eval_loss": 2.1803646087646484, + "eval_runtime": 7.6189, + "eval_samples_per_second": 8.269, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6276563903023047, "step": 1256 }, { "epoch": 157.45, "learning_rate": 8.607594936708861e-05, - "loss": 0.6494, + "loss": 0.5451, "step": 1260 }, { "epoch": 157.9, - "eval_loss": 2.305755376815796, - "eval_runtime": 7.8527, - "eval_samples_per_second": 8.023, - "eval_steps_per_second": 1.019, - "eval_wer": 0.6662675845555223, + "eval_loss": 2.1884472370147705, + "eval_runtime": 7.7078, + "eval_samples_per_second": 8.174, + "eval_steps_per_second": 1.038, + "eval_wer": 0.6303501945525292, "step": 1264 }, { "epoch": 158.68, "learning_rate": 8.354430379746835e-05, - "loss": 0.6017, + "loss": 0.5072, "step": 1270 }, { "epoch": 158.9, - "eval_loss": 2.2905781269073486, - "eval_runtime": 7.8309, - "eval_samples_per_second": 8.045, - "eval_steps_per_second": 1.022, - "eval_wer": 0.6662675845555223, + "eval_loss": 2.229902505874634, + "eval_runtime": 7.6574, + "eval_samples_per_second": 8.227, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6309488177192457, "step": 1272 }, { "epoch": 159.9, "learning_rate": 8.10126582278481e-05, - "loss": 0.6266, + "loss": 0.5259, "step": 1280 }, { "epoch": 159.9, - "eval_loss": 2.2315995693206787, - "eval_runtime": 7.7807, - "eval_samples_per_second": 8.097, - "eval_steps_per_second": 1.028, - "eval_wer": 0.6596827297216402, + "eval_loss": 2.16611909866333, + "eval_runtime": 7.6474, + "eval_samples_per_second": 8.238, + "eval_steps_per_second": 1.046, + "eval_wer": 0.625860520802155, "step": 1280 }, { "epoch": 160.9, - "eval_loss": 2.188622236251831, - "eval_runtime": 7.8175, - "eval_samples_per_second": 8.059, - "eval_steps_per_second": 1.023, - "eval_wer": 0.6710565698892547, + "eval_loss": 2.157914638519287, + "eval_runtime": 7.699, + "eval_samples_per_second": 8.183, + "eval_steps_per_second": 1.039, + "eval_wer": 0.6264591439688716, "step": 1288 }, { "epoch": 161.23, "learning_rate": 7.848101265822784e-05, - "loss": 0.6704, + "loss": 0.5609, "step": 1290 }, { "epoch": 161.9, - "eval_loss": 2.3184380531311035, - "eval_runtime": 7.8037, - "eval_samples_per_second": 8.073, - "eval_steps_per_second": 1.025, - "eval_wer": 0.6590841065549237, + "eval_loss": 2.2085673809051514, + "eval_runtime": 7.6653, + "eval_samples_per_second": 8.219, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6168811733014068, "step": 1296 }, { "epoch": 162.45, "learning_rate": 7.59493670886076e-05, - "loss": 0.6239, + "loss": 0.5168, "step": 1300 }, { "epoch": 162.9, - "eval_loss": 2.3544297218322754, - "eval_runtime": 7.7214, - "eval_samples_per_second": 8.159, - "eval_steps_per_second": 1.036, - "eval_wer": 0.6617779108051481, + "eval_loss": 2.146618366241455, + "eval_runtime": 7.6769, + "eval_samples_per_second": 8.206, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6222687818018557, "step": 1304 }, { "epoch": 163.68, "learning_rate": 7.341772151898734e-05, - "loss": 0.5997, + "loss": 0.4984, "step": 1310 }, { "epoch": 163.9, - "eval_loss": 2.298438310623169, - "eval_runtime": 7.7258, - "eval_samples_per_second": 8.155, - "eval_steps_per_second": 1.035, - "eval_wer": 0.6677641424723136, + "eval_loss": 2.141826868057251, + "eval_runtime": 7.6477, + "eval_samples_per_second": 8.238, + "eval_steps_per_second": 1.046, + "eval_wer": 0.625860520802155, "step": 1312 }, { "epoch": 164.9, "learning_rate": 7.088607594936709e-05, - "loss": 0.6228, + "loss": 0.5254, "step": 1320 }, { "epoch": 164.9, - "eval_loss": 2.293006658554077, - "eval_runtime": 7.7223, - "eval_samples_per_second": 8.158, - "eval_steps_per_second": 1.036, - "eval_wer": 0.669260700389105, + "eval_loss": 2.117159605026245, + "eval_runtime": 7.6915, + "eval_samples_per_second": 8.191, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6282550134690212, "step": 1320 }, { "epoch": 165.9, - "eval_loss": 2.3272392749786377, - "eval_runtime": 7.7917, - "eval_samples_per_second": 8.086, - "eval_steps_per_second": 1.027, - "eval_wer": 0.6584854833882071, + "eval_loss": 2.091912031173706, + "eval_runtime": 7.6372, + "eval_samples_per_second": 8.249, + "eval_steps_per_second": 1.048, + "eval_wer": 0.6246632744687219, "step": 1328 }, { "epoch": 166.23, "learning_rate": 6.835443037974683e-05, - "loss": 0.6683, + "loss": 0.5685, "step": 1330 }, { "epoch": 166.9, - "eval_loss": 2.3456509113311768, - "eval_runtime": 7.7579, - "eval_samples_per_second": 8.121, - "eval_steps_per_second": 1.031, - "eval_wer": 0.657288237054774, + "eval_loss": 2.1054956912994385, + "eval_runtime": 7.6658, + "eval_samples_per_second": 8.218, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6261598323855133, "step": 1336 }, { "epoch": 167.45, "learning_rate": 6.582278481012658e-05, - "loss": 0.598, + "loss": 0.4952, "step": 1340 }, { "epoch": 167.9, - "eval_loss": 2.2177622318267822, - "eval_runtime": 7.8139, - "eval_samples_per_second": 8.063, - "eval_steps_per_second": 1.024, - "eval_wer": 0.6638730918886561, + "eval_loss": 2.083932638168335, + "eval_runtime": 7.6404, + "eval_samples_per_second": 8.246, + "eval_steps_per_second": 1.047, + "eval_wer": 0.6252618976354385, "step": 1344 }, { "epoch": 168.68, "learning_rate": 6.329113924050633e-05, - "loss": 0.6164, + "loss": 0.5024, "step": 1350 }, { "epoch": 168.9, - "eval_loss": 2.1438896656036377, - "eval_runtime": 7.7254, - "eval_samples_per_second": 8.155, - "eval_steps_per_second": 1.036, - "eval_wer": 0.6542951212211913, + "eval_loss": 2.0244107246398926, + "eval_runtime": 7.6196, + "eval_samples_per_second": 8.268, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6255612092187968, "step": 1352 }, { "epoch": 169.9, "learning_rate": 6.0759493670886084e-05, - "loss": 0.5963, + "loss": 0.5028, "step": 1360 }, { "epoch": 169.9, - "eval_loss": 2.1238651275634766, - "eval_runtime": 7.8668, - "eval_samples_per_second": 8.008, - "eval_steps_per_second": 1.017, - "eval_wer": 0.6513020053876085, + "eval_loss": 2.0157837867736816, + "eval_runtime": 7.7009, + "eval_samples_per_second": 8.181, + "eval_steps_per_second": 1.039, + "eval_wer": 0.6240646513020054, "step": 1360 }, { "epoch": 170.9, - "eval_loss": 2.1392319202423096, - "eval_runtime": 7.7989, - "eval_samples_per_second": 8.078, - "eval_steps_per_second": 1.026, - "eval_wer": 0.6593834181382819, + "eval_loss": 2.009673833847046, + "eval_runtime": 7.6823, + "eval_samples_per_second": 8.201, + "eval_steps_per_second": 1.041, + "eval_wer": 0.6240646513020054, "step": 1368 }, { "epoch": 171.23, "learning_rate": 5.822784810126583e-05, - "loss": 0.6782, + "loss": 0.5731, "step": 1370 }, { "epoch": 171.9, - "eval_loss": 2.129168748855591, - "eval_runtime": 7.8187, - "eval_samples_per_second": 8.058, - "eval_steps_per_second": 1.023, - "eval_wer": 0.6578868602214906, + "eval_loss": 1.988526463508606, + "eval_runtime": 7.6305, + "eval_samples_per_second": 8.256, + "eval_steps_per_second": 1.048, + "eval_wer": 0.6216701586351392, "step": 1376 }, { "epoch": 172.45, "learning_rate": 5.569620253164557e-05, - "loss": 0.5783, + "loss": 0.4829, "step": 1380 }, { "epoch": 172.9, - "eval_loss": 2.1256933212280273, - "eval_runtime": 7.8785, - "eval_samples_per_second": 7.996, - "eval_steps_per_second": 1.015, - "eval_wer": 0.6596827297216402, + "eval_loss": 1.9991979598999023, + "eval_runtime": 7.591, + "eval_samples_per_second": 8.299, + "eval_steps_per_second": 1.054, + "eval_wer": 0.6237653397186471, "step": 1384 }, { "epoch": 173.68, "learning_rate": 5.3164556962025316e-05, - "loss": 0.6087, + "loss": 0.5101, "step": 1390 }, { "epoch": 173.9, - "eval_loss": 2.125344753265381, - "eval_runtime": 7.7825, - "eval_samples_per_second": 8.095, - "eval_steps_per_second": 1.028, - "eval_wer": 0.6593834181382819, + "eval_loss": 1.9918380975723267, + "eval_runtime": 7.7023, + "eval_samples_per_second": 8.179, + "eval_steps_per_second": 1.039, + "eval_wer": 0.6210715354684226, "step": 1392 }, { "epoch": 174.9, "learning_rate": 5.0632911392405066e-05, - "loss": 0.6045, + "loss": 0.5058, "step": 1400 }, { "epoch": 174.9, - "eval_loss": 2.133322238922119, - "eval_runtime": 7.8569, - "eval_samples_per_second": 8.018, - "eval_steps_per_second": 1.018, - "eval_wer": 0.6560909907213409, + "eval_loss": 1.9633089303970337, + "eval_runtime": 7.6237, + "eval_samples_per_second": 8.264, + "eval_steps_per_second": 1.049, + "eval_wer": 0.6282550134690212, "step": 1400 }, { "epoch": 175.9, - "eval_loss": 2.1041812896728516, - "eval_runtime": 7.7837, - "eval_samples_per_second": 8.094, - "eval_steps_per_second": 1.028, - "eval_wer": 0.650703382220892, + "eval_loss": 1.9551260471343994, + "eval_runtime": 7.6939, + "eval_samples_per_second": 8.188, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6228674049685723, "step": 1408 }, { "epoch": 176.23, "learning_rate": 4.810126582278481e-05, - "loss": 0.6299, + "loss": 0.5182, "step": 1410 }, { "epoch": 176.9, - "eval_loss": 2.111027956008911, - "eval_runtime": 7.8074, - "eval_samples_per_second": 8.069, - "eval_steps_per_second": 1.025, - "eval_wer": 0.6569889254714157, + "eval_loss": 2.016941785812378, + "eval_runtime": 7.658, + "eval_samples_per_second": 8.227, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6162825501346902, "step": 1416 }, { "epoch": 177.45, "learning_rate": 4.556962025316456e-05, - "loss": 0.6401, + "loss": 0.5443, "step": 1420 }, { "epoch": 177.9, - "eval_loss": 2.116133451461792, - "eval_runtime": 7.7361, - "eval_samples_per_second": 8.144, - "eval_steps_per_second": 1.034, - "eval_wer": 0.6611792876384316, + "eval_loss": 2.016011953353882, + "eval_runtime": 7.599, + "eval_samples_per_second": 8.291, + "eval_steps_per_second": 1.053, + "eval_wer": 0.6186770428015564, "step": 1424 }, { "epoch": 178.68, "learning_rate": 4.3037974683544305e-05, - "loss": 0.622, + "loss": 0.522, "step": 1430 }, { "epoch": 178.9, - "eval_loss": 2.168447494506836, - "eval_runtime": 7.8421, - "eval_samples_per_second": 8.034, - "eval_steps_per_second": 1.02, - "eval_wer": 0.6483088895540258, + "eval_loss": 2.060013771057129, + "eval_runtime": 7.6526, + "eval_samples_per_second": 8.232, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6177791080514816, "step": 1432 }, { "epoch": 179.9, "learning_rate": 4.050632911392405e-05, - "loss": 0.599, + "loss": 0.5042, "step": 1440 }, { "epoch": 179.9, - "eval_loss": 2.190560817718506, - "eval_runtime": 7.8024, - "eval_samples_per_second": 8.074, - "eval_steps_per_second": 1.025, - "eval_wer": 0.6551930559712661, + "eval_loss": 2.036672353744507, + "eval_runtime": 7.6364, + "eval_samples_per_second": 8.25, + "eval_steps_per_second": 1.048, + "eval_wer": 0.625860520802155, "step": 1440 }, { "epoch": 180.9, - "eval_loss": 2.2257890701293945, - "eval_runtime": 7.7768, - "eval_samples_per_second": 8.101, - "eval_steps_per_second": 1.029, - "eval_wer": 0.6492068243041006, + "eval_loss": 2.071685314178467, + "eval_runtime": 7.672, + "eval_samples_per_second": 8.212, + "eval_steps_per_second": 1.043, + "eval_wer": 0.6186770428015564, "step": 1448 }, { "epoch": 181.23, "learning_rate": 3.79746835443038e-05, - "loss": 0.6516, + "loss": 0.5486, "step": 1450 }, { "epoch": 181.9, - "eval_loss": 2.203824758529663, - "eval_runtime": 7.8475, - "eval_samples_per_second": 8.028, - "eval_steps_per_second": 1.019, - "eval_wer": 0.6536964980544747, + "eval_loss": 2.058335304260254, + "eval_runtime": 7.6269, + "eval_samples_per_second": 8.26, + "eval_steps_per_second": 1.049, + "eval_wer": 0.6252618976354385, "step": 1456 }, { "epoch": 182.45, "learning_rate": 3.5443037974683544e-05, - "loss": 0.5907, + "loss": 0.4946, "step": 1460 }, { "epoch": 182.9, - "eval_loss": 2.194875955581665, - "eval_runtime": 7.95, - "eval_samples_per_second": 7.925, - "eval_steps_per_second": 1.006, - "eval_wer": 0.6533971864711164, + "eval_loss": 2.067958116531372, + "eval_runtime": 7.6539, + "eval_samples_per_second": 8.231, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6261598323855133, "step": 1464 }, { "epoch": 183.68, "learning_rate": 3.291139240506329e-05, - "loss": 0.5979, + "loss": 0.5056, "step": 1470 }, { "epoch": 183.9, - "eval_loss": 2.196150541305542, - "eval_runtime": 7.8608, - "eval_samples_per_second": 8.014, - "eval_steps_per_second": 1.018, - "eval_wer": 0.6530978748877582, + "eval_loss": 2.0846917629241943, + "eval_runtime": 7.6222, + "eval_samples_per_second": 8.265, + "eval_steps_per_second": 1.05, + "eval_wer": 0.6219694702184975, "step": 1472 }, { "epoch": 184.9, "learning_rate": 3.0379746835443042e-05, - "loss": 0.6064, + "loss": 0.513, "step": 1480 }, { "epoch": 184.9, - "eval_loss": 2.1942551136016846, - "eval_runtime": 7.781, - "eval_samples_per_second": 8.097, - "eval_steps_per_second": 1.028, - "eval_wer": 0.6498054474708171, + "eval_loss": 2.0797383785247803, + "eval_runtime": 7.7548, + "eval_samples_per_second": 8.124, + "eval_steps_per_second": 1.032, + "eval_wer": 0.6231667165519306, "step": 1480 }, { "epoch": 185.9, - "eval_loss": 2.1707868576049805, - "eval_runtime": 7.9327, - "eval_samples_per_second": 7.942, - "eval_steps_per_second": 1.008, - "eval_wer": 0.6524992517210416, + "eval_loss": 2.056018590927124, + "eval_runtime": 7.6669, + "eval_samples_per_second": 8.217, + "eval_steps_per_second": 1.043, + "eval_wer": 0.622568093385214, "step": 1488 }, { "epoch": 186.23, "learning_rate": 2.7848101265822786e-05, - "loss": 0.6363, + "loss": 0.5334, "step": 1490 }, { "epoch": 186.9, - "eval_loss": 2.1659958362579346, - "eval_runtime": 7.8392, - "eval_samples_per_second": 8.037, - "eval_steps_per_second": 1.021, - "eval_wer": 0.6560909907213409, + "eval_loss": 2.0349366664886475, + "eval_runtime": 7.6484, + "eval_samples_per_second": 8.237, + "eval_steps_per_second": 1.046, + "eval_wer": 0.6288536366357378, "step": 1496 }, { "epoch": 187.45, "learning_rate": 2.5316455696202533e-05, - "loss": 0.6257, + "loss": 0.5265, "step": 1500 }, { "epoch": 187.9, - "eval_loss": 2.174118757247925, - "eval_runtime": 7.8051, - "eval_samples_per_second": 8.072, - "eval_steps_per_second": 1.025, - "eval_wer": 0.657288237054774, + "eval_loss": 2.0137064456939697, + "eval_runtime": 7.7543, + "eval_samples_per_second": 8.125, + "eval_steps_per_second": 1.032, + "eval_wer": 0.6276563903023047, "step": 1504 }, { "epoch": 188.68, "learning_rate": 2.278481012658228e-05, - "loss": 0.6128, + "loss": 0.5135, "step": 1510 }, { "epoch": 188.9, - "eval_loss": 2.1725566387176514, - "eval_runtime": 7.7709, - "eval_samples_per_second": 8.107, - "eval_steps_per_second": 1.029, - "eval_wer": 0.6563903023046992, + "eval_loss": 2.022761106491089, + "eval_runtime": 7.6517, + "eval_samples_per_second": 8.233, + "eval_steps_per_second": 1.046, + "eval_wer": 0.625860520802155, "step": 1512 }, { "epoch": 189.9, "learning_rate": 2.0253164556962025e-05, - "loss": 0.602, + "loss": 0.5062, "step": 1520 }, { "epoch": 189.9, - "eval_loss": 2.168877124786377, - "eval_runtime": 7.9172, - "eval_samples_per_second": 7.957, - "eval_steps_per_second": 1.01, - "eval_wer": 0.6554923675546244, + "eval_loss": 2.0344340801239014, + "eval_runtime": 7.6532, + "eval_samples_per_second": 8.232, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6243639628853637, "step": 1520 }, { "epoch": 190.9, - "eval_loss": 2.170177936553955, - "eval_runtime": 7.8092, - "eval_samples_per_second": 8.067, - "eval_steps_per_second": 1.024, - "eval_wer": 0.6566896138880575, + "eval_loss": 2.02585768699646, + "eval_runtime": 7.6933, + "eval_samples_per_second": 8.189, + "eval_steps_per_second": 1.04, + "eval_wer": 0.6252618976354385, "step": 1528 }, { "epoch": 191.23, "learning_rate": 1.7721518987341772e-05, - "loss": 0.645, + "loss": 0.5459, "step": 1530 }, { "epoch": 191.9, - "eval_loss": 2.1751959323883057, - "eval_runtime": 7.9351, - "eval_samples_per_second": 7.939, - "eval_steps_per_second": 1.008, - "eval_wer": 0.6590841065549237, + "eval_loss": 2.019090175628662, + "eval_runtime": 7.6526, + "eval_samples_per_second": 8.233, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6261598323855133, "step": 1536 }, { "epoch": 192.45, "learning_rate": 1.5189873417721521e-05, - "loss": 0.5916, + "loss": 0.4993, "step": 1540 }, { "epoch": 192.9, - "eval_loss": 2.190694570541382, - "eval_runtime": 7.7664, - "eval_samples_per_second": 8.112, - "eval_steps_per_second": 1.03, - "eval_wer": 0.6560909907213409, + "eval_loss": 2.0223634243011475, + "eval_runtime": 7.635, + "eval_samples_per_second": 8.251, + "eval_steps_per_second": 1.048, + "eval_wer": 0.6249625860520802, "step": 1544 }, { "epoch": 193.68, "learning_rate": 1.2658227848101267e-05, - "loss": 0.5853, + "loss": 0.4965, "step": 1550 }, { "epoch": 193.9, - "eval_loss": 2.1865837574005127, - "eval_runtime": 7.7908, - "eval_samples_per_second": 8.086, - "eval_steps_per_second": 1.027, - "eval_wer": 0.6545944328045495, + "eval_loss": 2.0135087966918945, + "eval_runtime": 7.6082, + "eval_samples_per_second": 8.281, + "eval_steps_per_second": 1.051, + "eval_wer": 0.6273570787189464, "step": 1552 }, { "epoch": 194.9, "learning_rate": 1.0126582278481012e-05, - "loss": 0.5735, + "loss": 0.4827, "step": 1560 }, { "epoch": 194.9, - "eval_loss": 2.1829705238342285, - "eval_runtime": 7.8222, - "eval_samples_per_second": 8.054, - "eval_steps_per_second": 1.023, - "eval_wer": 0.6554923675546244, + "eval_loss": 2.0026562213897705, + "eval_runtime": 7.6658, + "eval_samples_per_second": 8.218, + "eval_steps_per_second": 1.044, + "eval_wer": 0.6288536366357378, "step": 1560 }, { "epoch": 195.9, - "eval_loss": 2.176032781600952, - "eval_runtime": 7.8048, - "eval_samples_per_second": 8.072, - "eval_steps_per_second": 1.025, - "eval_wer": 0.6563903023046992, + "eval_loss": 2.006549119949341, + "eval_runtime": 7.6324, + "eval_samples_per_second": 8.254, + "eval_steps_per_second": 1.048, + "eval_wer": 0.6264591439688716, "step": 1568 }, { "epoch": 196.23, "learning_rate": 7.5949367088607605e-06, - "loss": 0.6294, + "loss": 0.5181, "step": 1570 }, { "epoch": 196.9, - "eval_loss": 2.1679303646087646, - "eval_runtime": 7.925, - "eval_samples_per_second": 7.949, - "eval_steps_per_second": 1.009, - "eval_wer": 0.6578868602214906, + "eval_loss": 2.0064003467559814, + "eval_runtime": 7.653, + "eval_samples_per_second": 8.232, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6270577671355881, "step": 1576 }, { "epoch": 197.45, "learning_rate": 5.063291139240506e-06, - "loss": 0.6149, + "loss": 0.518, "step": 1580 }, { "epoch": 197.9, - "eval_loss": 2.1631834506988525, - "eval_runtime": 7.8984, - "eval_samples_per_second": 7.976, - "eval_steps_per_second": 1.013, - "eval_wer": 0.6575875486381323, + "eval_loss": 2.007786750793457, + "eval_runtime": 7.6766, + "eval_samples_per_second": 8.207, + "eval_steps_per_second": 1.042, + "eval_wer": 0.6276563903023047, "step": 1584 }, { "epoch": 198.68, "learning_rate": 2.531645569620253e-06, - "loss": 0.5761, + "loss": 0.4807, "step": 1590 }, { "epoch": 198.9, - "eval_loss": 2.1613857746124268, - "eval_runtime": 7.8089, - "eval_samples_per_second": 8.068, - "eval_steps_per_second": 1.024, - "eval_wer": 0.657288237054774, + "eval_loss": 2.008424758911133, + "eval_runtime": 7.654, + "eval_samples_per_second": 8.231, + "eval_steps_per_second": 1.045, + "eval_wer": 0.6276563903023047, "step": 1592 }, { "epoch": 199.9, "learning_rate": 0.0, - "loss": 0.6111, + "loss": 0.5078, "step": 1600 }, { "epoch": 199.9, - "eval_loss": 2.1611926555633545, - "eval_runtime": 7.7531, - "eval_samples_per_second": 8.126, - "eval_steps_per_second": 1.032, - "eval_wer": 0.6575875486381323, + "eval_loss": 2.008995532989502, + "eval_runtime": 7.6484, + "eval_samples_per_second": 8.237, + "eval_steps_per_second": 1.046, + "eval_wer": 0.6276563903023047, "step": 1600 }, { "epoch": 199.9, "step": 1600, "total_flos": 1.1848602213269387e+19, - "train_loss": 0.7314322146773339, - "train_runtime": 18544.7872, - "train_samples_per_second": 6.072, - "train_steps_per_second": 0.086 + "train_loss": 0.5738694608211518, + "train_runtime": 18463.351, + "train_samples_per_second": 6.099, + "train_steps_per_second": 0.087 } ], "max_steps": 1600,