{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.807200929152149, "eval_steps": 100, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019357336430507164, "eval_loss": 3.567659854888916, "eval_runtime": 160.9588, "eval_samples_per_second": 35.139, "eval_steps_per_second": 4.392, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.03871467286101433, "eval_loss": 3.0471677780151367, "eval_runtime": 158.7973, "eval_samples_per_second": 35.618, "eval_steps_per_second": 4.452, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.05807200929152149, "eval_loss": 2.9665186405181885, "eval_runtime": 159.308, "eval_samples_per_second": 35.504, "eval_steps_per_second": 4.438, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.07742934572202866, "eval_loss": 2.464332103729248, "eval_runtime": 159.8297, "eval_samples_per_second": 35.388, "eval_steps_per_second": 4.423, "eval_wer": 0.9813195101988413, "step": 400 }, { "epoch": 0.09678668215253582, "grad_norm": 6.005111217498779, "learning_rate": 0.00029759999999999997, "loss": 4.1279, "step": 500 }, { "epoch": 0.09678668215253582, "eval_loss": 1.625333547592163, "eval_runtime": 160.6655, "eval_samples_per_second": 35.204, "eval_steps_per_second": 4.4, "eval_wer": 0.9345380430421595, "step": 500 }, { "epoch": 0.11614401858304298, "eval_loss": 1.24808931350708, "eval_runtime": 160.4402, "eval_samples_per_second": 35.253, "eval_steps_per_second": 4.407, "eval_wer": 0.8190528157147213, "step": 600 }, { "epoch": 0.13550135501355012, "eval_loss": 1.0997203588485718, "eval_runtime": 161.1872, "eval_samples_per_second": 35.09, "eval_steps_per_second": 4.386, "eval_wer": 0.7769735680698432, "step": 700 }, { "epoch": 0.1548586914440573, "eval_loss": 1.0475263595581055, "eval_runtime": 161.0626, "eval_samples_per_second": 35.117, "eval_steps_per_second": 4.39, "eval_wer": 0.7339795541718156, "step": 800 }, { "epoch": 0.17421602787456447, "eval_loss": 0.9692754149436951, "eval_runtime": 161.3683, "eval_samples_per_second": 35.05, "eval_steps_per_second": 4.381, "eval_wer": 0.7012565999582738, "step": 900 }, { "epoch": 0.19357336430507163, "grad_norm": 2.414348602294922, "learning_rate": 0.0002949762711864406, "loss": 1.0598, "step": 1000 }, { "epoch": 0.19357336430507163, "eval_loss": 0.911480724811554, "eval_runtime": 162.0835, "eval_samples_per_second": 34.896, "eval_steps_per_second": 4.362, "eval_wer": 0.6749049124552647, "step": 1000 }, { "epoch": 0.2129307007355788, "eval_loss": 0.8823792338371277, "eval_runtime": 161.5426, "eval_samples_per_second": 35.012, "eval_steps_per_second": 4.377, "eval_wer": 0.65625651971562, "step": 1100 }, { "epoch": 0.23228803716608595, "eval_loss": 0.8609552383422852, "eval_runtime": 161.6778, "eval_samples_per_second": 34.983, "eval_steps_per_second": 4.373, "eval_wer": 0.6431288215563865, "step": 1200 }, { "epoch": 0.2516453735965931, "eval_loss": 0.8330459594726562, "eval_runtime": 161.7974, "eval_samples_per_second": 34.957, "eval_steps_per_second": 4.37, "eval_wer": 0.6114169247805363, "step": 1300 }, { "epoch": 0.27100271002710025, "eval_loss": 0.8172780871391296, "eval_runtime": 161.4977, "eval_samples_per_second": 35.022, "eval_steps_per_second": 4.378, "eval_wer": 0.6017396607340598, "step": 1400 }, { "epoch": 0.29036004645760743, "grad_norm": 4.460846424102783, "learning_rate": 0.0002898915254237288, "loss": 0.8546, "step": 1500 }, { "epoch": 0.29036004645760743, "eval_loss": 0.8102588653564453, "eval_runtime": 161.397, "eval_samples_per_second": 35.044, "eval_steps_per_second": 4.381, "eval_wer": 0.6139204955786298, "step": 1500 }, { "epoch": 0.3097173828881146, "eval_loss": 0.7860382795333862, "eval_runtime": 162.0586, "eval_samples_per_second": 34.901, "eval_steps_per_second": 4.363, "eval_wer": 0.6077739082986953, "step": 1600 }, { "epoch": 0.32907471931862176, "eval_loss": 0.857550323009491, "eval_runtime": 161.2906, "eval_samples_per_second": 35.067, "eval_steps_per_second": 4.383, "eval_wer": 0.5990114105053682, "step": 1700 }, { "epoch": 0.34843205574912894, "eval_loss": 0.7555657029151917, "eval_runtime": 161.7718, "eval_samples_per_second": 34.963, "eval_steps_per_second": 4.37, "eval_wer": 0.5773298454526488, "step": 1800 }, { "epoch": 0.3677893921796361, "eval_loss": 0.7365372180938721, "eval_runtime": 162.1133, "eval_samples_per_second": 34.889, "eval_steps_per_second": 4.361, "eval_wer": 0.5825777150101908, "step": 1900 }, { "epoch": 0.38714672861014326, "grad_norm": 3.4646999835968018, "learning_rate": 0.0002848067796610169, "loss": 0.7776, "step": 2000 }, { "epoch": 0.38714672861014326, "eval_loss": 0.7291606068611145, "eval_runtime": 162.354, "eval_samples_per_second": 34.837, "eval_steps_per_second": 4.355, "eval_wer": 0.5551989215387332, "step": 2000 }, { "epoch": 0.4065040650406504, "eval_loss": 0.716595470905304, "eval_runtime": 162.461, "eval_samples_per_second": 34.815, "eval_steps_per_second": 4.352, "eval_wer": 0.5385726436744716, "step": 2100 }, { "epoch": 0.4258614014711576, "eval_loss": 0.7117305397987366, "eval_runtime": 161.9033, "eval_samples_per_second": 34.934, "eval_steps_per_second": 4.367, "eval_wer": 0.5401774967501726, "step": 2200 }, { "epoch": 0.4452187379016647, "eval_loss": 0.7060667872428894, "eval_runtime": 162.0901, "eval_samples_per_second": 34.894, "eval_steps_per_second": 4.362, "eval_wer": 0.5388294201665838, "step": 2300 }, { "epoch": 0.4645760743321719, "eval_loss": 0.7044907212257385, "eval_runtime": 162.5576, "eval_samples_per_second": 34.794, "eval_steps_per_second": 4.349, "eval_wer": 0.5364060920222754, "step": 2400 }, { "epoch": 0.48393341076267904, "grad_norm": 2.70296573638916, "learning_rate": 0.00027972203389830505, "loss": 0.706, "step": 2500 }, { "epoch": 0.48393341076267904, "eval_loss": 0.7062936425209045, "eval_runtime": 162.4753, "eval_samples_per_second": 34.811, "eval_steps_per_second": 4.351, "eval_wer": 0.5428736499173501, "step": 2500 }, { "epoch": 0.5032907471931862, "eval_loss": 0.6941363215446472, "eval_runtime": 162.6699, "eval_samples_per_second": 34.77, "eval_steps_per_second": 4.346, "eval_wer": 0.5433872029015744, "step": 2600 }, { "epoch": 0.5226480836236934, "eval_loss": 0.6840428113937378, "eval_runtime": 162.5617, "eval_samples_per_second": 34.793, "eval_steps_per_second": 4.349, "eval_wer": 0.5203094156729952, "step": 2700 }, { "epoch": 0.5420054200542005, "eval_loss": 0.6902298331260681, "eval_runtime": 162.7532, "eval_samples_per_second": 34.752, "eval_steps_per_second": 4.344, "eval_wer": 0.5593715395355555, "step": 2800 }, { "epoch": 0.5613627564847077, "eval_loss": 0.6594961881637573, "eval_runtime": 163.1259, "eval_samples_per_second": 34.673, "eval_steps_per_second": 4.334, "eval_wer": 0.5149171093386401, "step": 2900 }, { "epoch": 0.5807200929152149, "grad_norm": 4.962900161743164, "learning_rate": 0.0002746372881355932, "loss": 0.7002, "step": 3000 }, { "epoch": 0.5807200929152149, "eval_loss": 0.6767885088920593, "eval_runtime": 162.9945, "eval_samples_per_second": 34.701, "eval_steps_per_second": 4.338, "eval_wer": 0.525284460207668, "step": 3000 }, { "epoch": 0.6000774293457221, "eval_loss": 0.6656874418258667, "eval_runtime": 163.0572, "eval_samples_per_second": 34.687, "eval_steps_per_second": 4.336, "eval_wer": 0.5063953395066682, "step": 3100 }, { "epoch": 0.6194347657762292, "eval_loss": 0.6758668422698975, "eval_runtime": 163.796, "eval_samples_per_second": 34.531, "eval_steps_per_second": 4.316, "eval_wer": 0.5409478262265089, "step": 3200 }, { "epoch": 0.6387921022067363, "eval_loss": 0.6709346175193787, "eval_runtime": 162.7448, "eval_samples_per_second": 34.754, "eval_steps_per_second": 4.344, "eval_wer": 0.5090914926738457, "step": 3300 }, { "epoch": 0.6581494386372435, "eval_loss": 0.6478992104530334, "eval_runtime": 163.2374, "eval_samples_per_second": 34.649, "eval_steps_per_second": 4.331, "eval_wer": 0.5037473319317617, "step": 3400 }, { "epoch": 0.6775067750677507, "grad_norm": 3.27418851852417, "learning_rate": 0.0002695525423728813, "loss": 0.685, "step": 3500 }, { "epoch": 0.6775067750677507, "eval_loss": 0.6378278136253357, "eval_runtime": 162.9066, "eval_samples_per_second": 34.719, "eval_steps_per_second": 4.34, "eval_wer": 0.5033782157243505, "step": 3500 }, { "epoch": 0.6968641114982579, "eval_loss": 0.6492822170257568, "eval_runtime": 162.8688, "eval_samples_per_second": 34.727, "eval_steps_per_second": 4.341, "eval_wer": 0.49883648152011684, "step": 3600 }, { "epoch": 0.716221447928765, "eval_loss": 0.6340391635894775, "eval_runtime": 163.0198, "eval_samples_per_second": 34.695, "eval_steps_per_second": 4.337, "eval_wer": 0.4832694066858179, "step": 3700 }, { "epoch": 0.7355787843592722, "eval_loss": 0.6226627826690674, "eval_runtime": 164.0506, "eval_samples_per_second": 34.477, "eval_steps_per_second": 4.31, "eval_wer": 0.47354399704707034, "step": 3800 }, { "epoch": 0.7549361207897793, "eval_loss": 0.6257476210594177, "eval_runtime": 167.0907, "eval_samples_per_second": 33.85, "eval_steps_per_second": 4.231, "eval_wer": 0.49068382789555615, "step": 3900 }, { "epoch": 0.7742934572202865, "grad_norm": 5.494376182556152, "learning_rate": 0.0002644677966101695, "loss": 0.6655, "step": 4000 }, { "epoch": 0.7742934572202865, "eval_loss": 0.6420141458511353, "eval_runtime": 163.4141, "eval_samples_per_second": 34.611, "eval_steps_per_second": 4.326, "eval_wer": 0.49987963601932245, "step": 4000 }, { "epoch": 0.7936507936507936, "eval_loss": 0.6111469268798828, "eval_runtime": 163.5321, "eval_samples_per_second": 34.586, "eval_steps_per_second": 4.323, "eval_wer": 0.4790646916274815, "step": 4100 }, { "epoch": 0.8130081300813008, "eval_loss": 0.6136205196380615, "eval_runtime": 163.9442, "eval_samples_per_second": 34.5, "eval_steps_per_second": 4.312, "eval_wer": 0.48073373882621045, "step": 4200 }, { "epoch": 0.832365466511808, "eval_loss": 0.6218396425247192, "eval_runtime": 163.3391, "eval_samples_per_second": 34.627, "eval_steps_per_second": 4.328, "eval_wer": 0.48596555985299544, "step": 4300 }, { "epoch": 0.8517228029423152, "eval_loss": 0.6084252595901489, "eval_runtime": 162.8945, "eval_samples_per_second": 34.722, "eval_steps_per_second": 4.34, "eval_wer": 0.4585386207892667, "step": 4400 }, { "epoch": 0.8710801393728222, "grad_norm": 3.0379676818847656, "learning_rate": 0.0002593830508474576, "loss": 0.6431, "step": 4500 }, { "epoch": 0.8710801393728222, "eval_loss": 0.6008957624435425, "eval_runtime": 163.8125, "eval_samples_per_second": 34.527, "eval_steps_per_second": 4.316, "eval_wer": 0.4627593843783602, "step": 4500 }, { "epoch": 0.8904374758033294, "eval_loss": 0.6009930968284607, "eval_runtime": 163.1039, "eval_samples_per_second": 34.677, "eval_steps_per_second": 4.335, "eval_wer": 0.46295196674744427, "step": 4600 }, { "epoch": 0.9097948122338366, "eval_loss": 0.5823432207107544, "eval_runtime": 163.3804, "eval_samples_per_second": 34.619, "eval_steps_per_second": 4.327, "eval_wer": 0.45035387010319206, "step": 4700 }, { "epoch": 0.9291521486643438, "eval_loss": 0.6118789315223694, "eval_runtime": 163.7255, "eval_samples_per_second": 34.546, "eval_steps_per_second": 4.318, "eval_wer": 0.4630001123397153, "step": 4800 }, { "epoch": 0.948509485094851, "eval_loss": 0.6001989245414734, "eval_runtime": 163.3492, "eval_samples_per_second": 34.625, "eval_steps_per_second": 4.328, "eval_wer": 0.4600150856189116, "step": 4900 }, { "epoch": 0.9678668215253581, "grad_norm": 3.1605985164642334, "learning_rate": 0.00025430847457627115, "loss": 0.6235, "step": 5000 }, { "epoch": 0.9678668215253581, "eval_loss": 0.5892329216003418, "eval_runtime": 163.5255, "eval_samples_per_second": 34.588, "eval_steps_per_second": 4.323, "eval_wer": 0.4551844778610518, "step": 5000 }, { "epoch": 0.9872241579558653, "eval_loss": 0.5673592686653137, "eval_runtime": 163.343, "eval_samples_per_second": 34.627, "eval_steps_per_second": 4.328, "eval_wer": 0.44889345380430423, "step": 5100 }, { "epoch": 1.0065814943863725, "eval_loss": 0.5792257785797119, "eval_runtime": 162.8592, "eval_samples_per_second": 34.729, "eval_steps_per_second": 4.341, "eval_wer": 0.43167338030203334, "step": 5200 }, { "epoch": 1.0259388308168795, "eval_loss": 0.5752869844436646, "eval_runtime": 162.7856, "eval_samples_per_second": 34.745, "eval_steps_per_second": 4.343, "eval_wer": 0.43331033043924827, "step": 5300 }, { "epoch": 1.0452961672473868, "eval_loss": 0.5698733925819397, "eval_runtime": 161.9949, "eval_samples_per_second": 34.915, "eval_steps_per_second": 4.364, "eval_wer": 0.44619730063712665, "step": 5400 }, { "epoch": 1.064653503677894, "grad_norm": 0.8791279792785645, "learning_rate": 0.0002492338983050847, "loss": 0.5527, "step": 5500 }, { "epoch": 1.064653503677894, "eval_loss": 0.5666691660881042, "eval_runtime": 162.0517, "eval_samples_per_second": 34.902, "eval_steps_per_second": 4.363, "eval_wer": 0.43639164834459404, "step": 5500 }, { "epoch": 1.084010840108401, "eval_loss": 0.5558171272277832, "eval_runtime": 161.9847, "eval_samples_per_second": 34.917, "eval_steps_per_second": 4.365, "eval_wer": 0.42945868305756607, "step": 5600 }, { "epoch": 1.1033681765389083, "eval_loss": 0.5602455139160156, "eval_runtime": 162.701, "eval_samples_per_second": 34.763, "eval_steps_per_second": 4.345, "eval_wer": 0.422349183932211, "step": 5700 }, { "epoch": 1.1227255129694154, "eval_loss": 0.559140145778656, "eval_runtime": 162.4402, "eval_samples_per_second": 34.819, "eval_steps_per_second": 4.352, "eval_wer": 0.41942835133443535, "step": 5800 }, { "epoch": 1.1420828493999227, "eval_loss": 0.5399234890937805, "eval_runtime": 162.3316, "eval_samples_per_second": 34.842, "eval_steps_per_second": 4.355, "eval_wer": 0.418818507165669, "step": 5900 }, { "epoch": 1.1614401858304297, "grad_norm": 0.9803772568702698, "learning_rate": 0.00024414915254237287, "loss": 0.533, "step": 6000 }, { "epoch": 1.1614401858304297, "eval_loss": 0.545900821685791, "eval_runtime": 161.6822, "eval_samples_per_second": 34.982, "eval_steps_per_second": 4.373, "eval_wer": 0.431063536133267, "step": 6000 }, { "epoch": 1.1807975222609368, "eval_loss": 0.5347985625267029, "eval_runtime": 161.8121, "eval_samples_per_second": 34.954, "eval_steps_per_second": 4.369, "eval_wer": 0.41183739628637, "step": 6100 }, { "epoch": 1.2001548586914441, "eval_loss": 0.5453631281852722, "eval_runtime": 161.8802, "eval_samples_per_second": 34.939, "eval_steps_per_second": 4.367, "eval_wer": 0.4176309158896503, "step": 6200 }, { "epoch": 1.2195121951219512, "eval_loss": 0.5442932844161987, "eval_runtime": 162.1767, "eval_samples_per_second": 34.876, "eval_steps_per_second": 4.359, "eval_wer": 0.42157885445587456, "step": 6300 }, { "epoch": 1.2388695315524583, "eval_loss": 0.5382806658744812, "eval_runtime": 161.5364, "eval_samples_per_second": 35.014, "eval_steps_per_second": 4.377, "eval_wer": 0.40962269904190274, "step": 6400 }, { "epoch": 1.2582268679829656, "grad_norm": 2.7026009559631348, "learning_rate": 0.00023906440677966102, "loss": 0.5228, "step": 6500 }, { "epoch": 1.2582268679829656, "eval_loss": 0.540704071521759, "eval_runtime": 161.6157, "eval_samples_per_second": 34.997, "eval_steps_per_second": 4.375, "eval_wer": 0.41260772576270643, "step": 6500 }, { "epoch": 1.2775842044134726, "eval_loss": 0.5527251362800598, "eval_runtime": 161.6568, "eval_samples_per_second": 34.988, "eval_steps_per_second": 4.373, "eval_wer": 0.41426072443067835, "step": 6600 }, { "epoch": 1.29694154084398, "eval_loss": 0.5312824845314026, "eval_runtime": 161.7821, "eval_samples_per_second": 34.961, "eval_steps_per_second": 4.37, "eval_wer": 0.40811413715074385, "step": 6700 }, { "epoch": 1.316298877274487, "eval_loss": 0.533909797668457, "eval_runtime": 161.8361, "eval_samples_per_second": 34.949, "eval_steps_per_second": 4.369, "eval_wer": 0.4150471024377718, "step": 6800 }, { "epoch": 1.3356562137049943, "eval_loss": 0.523649275302887, "eval_runtime": 161.5476, "eval_samples_per_second": 35.011, "eval_steps_per_second": 4.376, "eval_wer": 0.4120781242477251, "step": 6900 }, { "epoch": 1.3550135501355014, "grad_norm": 0.709751546382904, "learning_rate": 0.00023397966101694912, "loss": 0.5204, "step": 7000 }, { "epoch": 1.3550135501355014, "eval_loss": 0.5527586340904236, "eval_runtime": 162.1137, "eval_samples_per_second": 34.889, "eval_steps_per_second": 4.361, "eval_wer": 0.4165877613904447, "step": 7000 }, { "epoch": 1.3743708865660085, "eval_loss": 0.5330629944801331, "eval_runtime": 161.681, "eval_samples_per_second": 34.982, "eval_steps_per_second": 4.373, "eval_wer": 0.40559451782189343, "step": 7100 }, { "epoch": 1.3937282229965158, "eval_loss": 0.5242415070533752, "eval_runtime": 162.2742, "eval_samples_per_second": 34.855, "eval_steps_per_second": 4.357, "eval_wer": 0.4058833913755196, "step": 7200 }, { "epoch": 1.4130855594270229, "eval_loss": 0.5309507250785828, "eval_runtime": 163.2224, "eval_samples_per_second": 34.652, "eval_steps_per_second": 4.332, "eval_wer": 0.4092856798960055, "step": 7300 }, { "epoch": 1.43244289585753, "eval_loss": 0.5278186798095703, "eval_runtime": 162.9755, "eval_samples_per_second": 34.705, "eval_steps_per_second": 4.338, "eval_wer": 0.4063006531752018, "step": 7400 }, { "epoch": 1.4518002322880372, "grad_norm": 0.9905166029930115, "learning_rate": 0.00022889491525423728, "loss": 0.5199, "step": 7500 }, { "epoch": 1.4518002322880372, "eval_loss": 0.5168124437332153, "eval_runtime": 162.1336, "eval_samples_per_second": 34.885, "eval_steps_per_second": 4.361, "eval_wer": 0.3955802346295197, "step": 7500 }, { "epoch": 1.4711575687185443, "eval_loss": 0.5236623287200928, "eval_runtime": 161.1426, "eval_samples_per_second": 35.099, "eval_steps_per_second": 4.387, "eval_wer": 0.40235271460897754, "step": 7600 }, { "epoch": 1.4905149051490514, "eval_loss": 0.5316073894500732, "eval_runtime": 162.1382, "eval_samples_per_second": 34.884, "eval_steps_per_second": 4.36, "eval_wer": 0.4179358379740335, "step": 7700 }, { "epoch": 1.5098722415795587, "eval_loss": 0.5182381868362427, "eval_runtime": 161.7911, "eval_samples_per_second": 34.959, "eval_steps_per_second": 4.37, "eval_wer": 0.40326748086212705, "step": 7800 }, { "epoch": 1.5292295780100658, "eval_loss": 0.5175392627716064, "eval_runtime": 161.5725, "eval_samples_per_second": 35.006, "eval_steps_per_second": 4.376, "eval_wer": 0.3983887275119963, "step": 7900 }, { "epoch": 1.5485869144405728, "grad_norm": 0.8261615037918091, "learning_rate": 0.00022382033898305084, "loss": 0.5066, "step": 8000 }, { "epoch": 1.5485869144405728, "eval_loss": 0.5138476490974426, "eval_runtime": 162.4044, "eval_samples_per_second": 34.827, "eval_steps_per_second": 4.353, "eval_wer": 0.39492224486848226, "step": 8000 }, { "epoch": 1.5679442508710801, "eval_loss": 0.515566885471344, "eval_runtime": 162.5299, "eval_samples_per_second": 34.8, "eval_steps_per_second": 4.35, "eval_wer": 0.4016305307249121, "step": 8100 }, { "epoch": 1.5873015873015874, "eval_loss": 0.5131089091300964, "eval_runtime": 162.835, "eval_samples_per_second": 34.735, "eval_steps_per_second": 4.342, "eval_wer": 0.39793936865080004, "step": 8200 }, { "epoch": 1.6066589237320945, "eval_loss": 0.5139849185943604, "eval_runtime": 162.5015, "eval_samples_per_second": 34.806, "eval_steps_per_second": 4.351, "eval_wer": 0.39413586686138885, "step": 8300 }, { "epoch": 1.6260162601626016, "eval_loss": 0.5224258303642273, "eval_runtime": 162.1349, "eval_samples_per_second": 34.885, "eval_steps_per_second": 4.361, "eval_wer": 0.39853316428880936, "step": 8400 }, { "epoch": 1.645373596593109, "grad_norm": 1.0760446786880493, "learning_rate": 0.00021873559322033897, "loss": 0.502, "step": 8500 }, { "epoch": 1.645373596593109, "eval_loss": 0.5274536609649658, "eval_runtime": 162.3162, "eval_samples_per_second": 34.846, "eval_steps_per_second": 4.356, "eval_wer": 0.40023430854905234, "step": 8500 }, { "epoch": 1.664730933023616, "eval_loss": 0.5054244995117188, "eval_runtime": 162.7623, "eval_samples_per_second": 34.75, "eval_steps_per_second": 4.344, "eval_wer": 0.3860955529521272, "step": 8600 }, { "epoch": 1.684088269454123, "eval_loss": 0.5144466161727905, "eval_runtime": 162.1114, "eval_samples_per_second": 34.89, "eval_steps_per_second": 4.361, "eval_wer": 0.3912631798558842, "step": 8700 }, { "epoch": 1.7034456058846303, "eval_loss": 0.5017980933189392, "eval_runtime": 162.1144, "eval_samples_per_second": 34.889, "eval_steps_per_second": 4.361, "eval_wer": 0.3860955529521272, "step": 8800 }, { "epoch": 1.7228029423151374, "eval_loss": 0.5001707673072815, "eval_runtime": 162.7958, "eval_samples_per_second": 34.743, "eval_steps_per_second": 4.343, "eval_wer": 0.39978494968785605, "step": 8900 }, { "epoch": 1.7421602787456445, "grad_norm": 2.3791110515594482, "learning_rate": 0.00021366101694915253, "loss": 0.4965, "step": 9000 }, { "epoch": 1.7421602787456445, "eval_loss": 0.5074877142906189, "eval_runtime": 162.5487, "eval_samples_per_second": 34.796, "eval_steps_per_second": 4.349, "eval_wer": 0.38896823995763186, "step": 9000 }, { "epoch": 1.7615176151761518, "eval_loss": 0.4928957521915436, "eval_runtime": 162.2035, "eval_samples_per_second": 34.87, "eval_steps_per_second": 4.359, "eval_wer": 0.3865449118133235, "step": 9100 }, { "epoch": 1.7808749516066589, "eval_loss": 0.49622705578804016, "eval_runtime": 162.8808, "eval_samples_per_second": 34.725, "eval_steps_per_second": 4.341, "eval_wer": 0.38559804849866, "step": 9200 }, { "epoch": 1.800232288037166, "eval_loss": 0.49036508798599243, "eval_runtime": 162.3886, "eval_samples_per_second": 34.83, "eval_steps_per_second": 4.354, "eval_wer": 0.3759689300444544, "step": 9300 }, { "epoch": 1.8195896244676733, "eval_loss": 0.49964088201522827, "eval_runtime": 162.526, "eval_samples_per_second": 34.801, "eval_steps_per_second": 4.35, "eval_wer": 0.3901237341721365, "step": 9400 }, { "epoch": 1.8389469608981805, "grad_norm": 1.2548748254776, "learning_rate": 0.00020857627118644066, "loss": 0.4776, "step": 9500 }, { "epoch": 1.8389469608981805, "eval_loss": 0.4899130165576935, "eval_runtime": 162.5723, "eval_samples_per_second": 34.791, "eval_steps_per_second": 4.349, "eval_wer": 0.37616151241353857, "step": 9500 }, { "epoch": 1.8583042973286876, "eval_loss": 0.4918155074119568, "eval_runtime": 162.1914, "eval_samples_per_second": 34.872, "eval_steps_per_second": 4.359, "eval_wer": 0.37948355828023944, "step": 9600 }, { "epoch": 1.8776616337591947, "eval_loss": 0.49148374795913696, "eval_runtime": 162.2813, "eval_samples_per_second": 34.853, "eval_steps_per_second": 4.357, "eval_wer": 0.37980452889537963, "step": 9700 }, { "epoch": 1.897018970189702, "eval_loss": 0.4841060936450958, "eval_runtime": 162.2556, "eval_samples_per_second": 34.859, "eval_steps_per_second": 4.357, "eval_wer": 0.37060872077161333, "step": 9800 }, { "epoch": 1.916376306620209, "eval_loss": 0.4834117293357849, "eval_runtime": 163.5813, "eval_samples_per_second": 34.576, "eval_steps_per_second": 4.322, "eval_wer": 0.37728490956652916, "step": 9900 }, { "epoch": 1.9357336430507162, "grad_norm": 1.011767029762268, "learning_rate": 0.00020349152542372878, "loss": 0.4752, "step": 10000 }, { "epoch": 1.9357336430507162, "eval_loss": 0.4831894338130951, "eval_runtime": 162.4305, "eval_samples_per_second": 34.821, "eval_steps_per_second": 4.353, "eval_wer": 0.3711704193481087, "step": 10000 }, { "epoch": 1.9550909794812235, "eval_loss": 0.4890592396259308, "eval_runtime": 162.3568, "eval_samples_per_second": 34.837, "eval_steps_per_second": 4.355, "eval_wer": 0.37829596700422075, "step": 10100 }, { "epoch": 1.9744483159117305, "eval_loss": 0.4786697328090668, "eval_runtime": 163.1452, "eval_samples_per_second": 34.669, "eval_steps_per_second": 4.334, "eval_wer": 0.3783441125964918, "step": 10200 }, { "epoch": 1.9938056523422376, "eval_loss": 0.4726457893848419, "eval_runtime": 162.2371, "eval_samples_per_second": 34.863, "eval_steps_per_second": 4.358, "eval_wer": 0.37141114730946384, "step": 10300 }, { "epoch": 2.013162988772745, "eval_loss": 0.49166908860206604, "eval_runtime": 162.2356, "eval_samples_per_second": 34.863, "eval_steps_per_second": 4.358, "eval_wer": 0.37320858275424884, "step": 10400 }, { "epoch": 2.032520325203252, "grad_norm": 0.9389815926551819, "learning_rate": 0.00019840677966101694, "loss": 0.4587, "step": 10500 }, { "epoch": 2.032520325203252, "eval_loss": 0.48015162348747253, "eval_runtime": 162.7857, "eval_samples_per_second": 34.745, "eval_steps_per_second": 4.343, "eval_wer": 0.37264688417775355, "step": 10500 }, { "epoch": 2.051877661633759, "eval_loss": 0.4883776903152466, "eval_runtime": 162.7407, "eval_samples_per_second": 34.755, "eval_steps_per_second": 4.344, "eval_wer": 0.3825327791240712, "step": 10600 }, { "epoch": 2.0712349980642664, "eval_loss": 0.4841337502002716, "eval_runtime": 162.7772, "eval_samples_per_second": 34.747, "eval_steps_per_second": 4.343, "eval_wer": 0.37845645231179087, "step": 10700 }, { "epoch": 2.0905923344947737, "eval_loss": 0.4809282422065735, "eval_runtime": 162.8688, "eval_samples_per_second": 34.727, "eval_steps_per_second": 4.341, "eval_wer": 0.3738184269230152, "step": 10800 }, { "epoch": 2.1099496709252805, "eval_loss": 0.47966596484184265, "eval_runtime": 163.5921, "eval_samples_per_second": 34.574, "eval_steps_per_second": 4.322, "eval_wer": 0.3713469531864358, "step": 10900 }, { "epoch": 2.129307007355788, "grad_norm": 0.6634272933006287, "learning_rate": 0.0001933220338983051, "loss": 0.3967, "step": 11000 }, { "epoch": 2.129307007355788, "eval_loss": 0.4866289794445038, "eval_runtime": 162.8573, "eval_samples_per_second": 34.73, "eval_steps_per_second": 4.341, "eval_wer": 0.37497392113751987, "step": 11000 }, { "epoch": 2.148664343786295, "eval_loss": 0.4938376843929291, "eval_runtime": 163.5145, "eval_samples_per_second": 34.59, "eval_steps_per_second": 4.324, "eval_wer": 0.3749097270144918, "step": 11100 }, { "epoch": 2.168021680216802, "eval_loss": 0.48603999614715576, "eval_runtime": 162.8433, "eval_samples_per_second": 34.733, "eval_steps_per_second": 4.342, "eval_wer": 0.36796071319670687, "step": 11200 }, { "epoch": 2.1873790166473093, "eval_loss": 0.4849016070365906, "eval_runtime": 162.5249, "eval_samples_per_second": 34.801, "eval_steps_per_second": 4.35, "eval_wer": 0.369966779541333, "step": 11300 }, { "epoch": 2.2067363530778166, "eval_loss": 0.49077799916267395, "eval_runtime": 162.3783, "eval_samples_per_second": 34.832, "eval_steps_per_second": 4.354, "eval_wer": 0.36377204666912744, "step": 11400 }, { "epoch": 2.226093689508324, "grad_norm": 0.9132543206214905, "learning_rate": 0.0001882372881355932, "loss": 0.406, "step": 11500 }, { "epoch": 2.226093689508324, "eval_loss": 0.4797042906284332, "eval_runtime": 162.1749, "eval_samples_per_second": 34.876, "eval_steps_per_second": 4.359, "eval_wer": 0.3678644220121648, "step": 11500 }, { "epoch": 2.2454510259388307, "eval_loss": 0.48121991753578186, "eval_runtime": 161.7765, "eval_samples_per_second": 34.962, "eval_steps_per_second": 4.37, "eval_wer": 0.37585659032915536, "step": 11600 }, { "epoch": 2.264808362369338, "eval_loss": 0.47043049335479736, "eval_runtime": 163.1227, "eval_samples_per_second": 34.673, "eval_steps_per_second": 4.334, "eval_wer": 0.361268475871034, "step": 11700 }, { "epoch": 2.2841656987998453, "eval_loss": 0.4715932607650757, "eval_runtime": 163.2336, "eval_samples_per_second": 34.65, "eval_steps_per_second": 4.331, "eval_wer": 0.36345107605398724, "step": 11800 }, { "epoch": 2.303523035230352, "eval_loss": 0.4676753580570221, "eval_runtime": 163.378, "eval_samples_per_second": 34.619, "eval_steps_per_second": 4.327, "eval_wer": 0.3635473672385293, "step": 11900 }, { "epoch": 2.3228803716608595, "grad_norm": 0.6134137511253357, "learning_rate": 0.00018315254237288135, "loss": 0.4088, "step": 12000 }, { "epoch": 2.3228803716608595, "eval_loss": 0.47054949402809143, "eval_runtime": 162.7912, "eval_samples_per_second": 34.744, "eval_steps_per_second": 4.343, "eval_wer": 0.36399672609972555, "step": 12000 }, { "epoch": 2.3422377080913668, "eval_loss": 0.4782082140445709, "eval_runtime": 162.8776, "eval_samples_per_second": 34.725, "eval_steps_per_second": 4.341, "eval_wer": 0.35905377862656673, "step": 12100 }, { "epoch": 2.3615950445218736, "eval_loss": 0.4795554578304291, "eval_runtime": 163.357, "eval_samples_per_second": 34.624, "eval_steps_per_second": 4.328, "eval_wer": 0.36128452440179104, "step": 12200 }, { "epoch": 2.380952380952381, "eval_loss": 0.47130346298217773, "eval_runtime": 163.1765, "eval_samples_per_second": 34.662, "eval_steps_per_second": 4.333, "eval_wer": 0.3558119754136509, "step": 12300 }, { "epoch": 2.4003097173828882, "eval_loss": 0.47632816433906555, "eval_runtime": 163.2851, "eval_samples_per_second": 34.639, "eval_steps_per_second": 4.33, "eval_wer": 0.3588772447882396, "step": 12400 }, { "epoch": 2.419667053813395, "grad_norm": 0.6301820874214172, "learning_rate": 0.0001780779661016949, "loss": 0.407, "step": 12500 }, { "epoch": 2.419667053813395, "eval_loss": 0.46899136900901794, "eval_runtime": 163.5219, "eval_samples_per_second": 34.589, "eval_steps_per_second": 4.324, "eval_wer": 0.3565181107669593, "step": 12500 }, { "epoch": 2.4390243902439024, "eval_loss": 0.4686334431171417, "eval_runtime": 163.9109, "eval_samples_per_second": 34.507, "eval_steps_per_second": 4.313, "eval_wer": 0.35767360498146394, "step": 12600 }, { "epoch": 2.4583817266744097, "eval_loss": 0.467680424451828, "eval_runtime": 163.7544, "eval_samples_per_second": 34.54, "eval_steps_per_second": 4.317, "eval_wer": 0.3584278859270434, "step": 12700 }, { "epoch": 2.4777390631049165, "eval_loss": 0.46144554018974304, "eval_runtime": 163.8198, "eval_samples_per_second": 34.526, "eval_steps_per_second": 4.316, "eval_wer": 0.35765755645070696, "step": 12800 }, { "epoch": 2.497096399535424, "eval_loss": 0.455834299325943, "eval_runtime": 163.2394, "eval_samples_per_second": 34.649, "eval_steps_per_second": 4.331, "eval_wer": 0.35992039928744524, "step": 12900 }, { "epoch": 2.516453735965931, "grad_norm": 0.41953468322753906, "learning_rate": 0.00017299322033898304, "loss": 0.3855, "step": 13000 }, { "epoch": 2.516453735965931, "eval_loss": 0.4555678367614746, "eval_runtime": 164.8785, "eval_samples_per_second": 34.304, "eval_steps_per_second": 4.288, "eval_wer": 0.3565341592977163, "step": 13000 }, { "epoch": 2.535811072396438, "eval_loss": 0.4600988030433655, "eval_runtime": 163.6778, "eval_samples_per_second": 34.556, "eval_steps_per_second": 4.319, "eval_wer": 0.3558280239444079, "step": 13100 }, { "epoch": 2.5551684088269453, "eval_loss": 0.4650043547153473, "eval_runtime": 163.58, "eval_samples_per_second": 34.576, "eval_steps_per_second": 4.322, "eval_wer": 0.354303413522492, "step": 13200 }, { "epoch": 2.5745257452574526, "eval_loss": 0.4737236201763153, "eval_runtime": 163.9532, "eval_samples_per_second": 34.498, "eval_steps_per_second": 4.312, "eval_wer": 0.35483301503747333, "step": 13300 }, { "epoch": 2.59388308168796, "eval_loss": 0.45056912302970886, "eval_runtime": 163.6462, "eval_samples_per_second": 34.562, "eval_steps_per_second": 4.32, "eval_wer": 0.3534367928616135, "step": 13400 }, { "epoch": 2.6132404181184667, "grad_norm": 1.5978127717971802, "learning_rate": 0.0001679186440677966, "loss": 0.3748, "step": 13500 }, { "epoch": 2.6132404181184667, "eval_loss": 0.4606887698173523, "eval_runtime": 163.0481, "eval_samples_per_second": 34.689, "eval_steps_per_second": 4.336, "eval_wer": 0.3589253903805107, "step": 13500 }, { "epoch": 2.632597754548974, "eval_loss": 0.45494645833969116, "eval_runtime": 162.9584, "eval_samples_per_second": 34.708, "eval_steps_per_second": 4.339, "eval_wer": 0.35372566641523967, "step": 13600 }, { "epoch": 2.6519550909794813, "eval_loss": 0.4562608301639557, "eval_runtime": 164.1384, "eval_samples_per_second": 34.459, "eval_steps_per_second": 4.307, "eval_wer": 0.36409301728426763, "step": 13700 }, { "epoch": 2.6713124274099886, "eval_loss": 0.44666969776153564, "eval_runtime": 163.7423, "eval_samples_per_second": 34.542, "eval_steps_per_second": 4.318, "eval_wer": 0.34369533469210894, "step": 13800 }, { "epoch": 2.6906697638404955, "eval_loss": 0.4536294639110565, "eval_runtime": 163.3892, "eval_samples_per_second": 34.617, "eval_steps_per_second": 4.327, "eval_wer": 0.35446389883006213, "step": 13900 }, { "epoch": 2.710027100271003, "grad_norm": 0.6591352224349976, "learning_rate": 0.00016283389830508475, "loss": 0.3888, "step": 14000 }, { "epoch": 2.710027100271003, "eval_loss": 0.4504217505455017, "eval_runtime": 163.3199, "eval_samples_per_second": 34.631, "eval_steps_per_second": 4.329, "eval_wer": 0.3509653191250341, "step": 14000 }, { "epoch": 2.72938443670151, "eval_loss": 0.44697660207748413, "eval_runtime": 163.5943, "eval_samples_per_second": 34.573, "eval_steps_per_second": 4.322, "eval_wer": 0.3602092728410714, "step": 14100 }, { "epoch": 2.748741773132017, "eval_loss": 0.45640549063682556, "eval_runtime": 163.6363, "eval_samples_per_second": 34.564, "eval_steps_per_second": 4.321, "eval_wer": 0.3539022002535668, "step": 14200 }, { "epoch": 2.7680991095625243, "eval_loss": 0.45214343070983887, "eval_runtime": 164.011, "eval_samples_per_second": 34.485, "eval_steps_per_second": 4.311, "eval_wer": 0.3561971401518191, "step": 14300 }, { "epoch": 2.7874564459930316, "eval_loss": 0.4452911913394928, "eval_runtime": 163.9028, "eval_samples_per_second": 34.508, "eval_steps_per_second": 4.314, "eval_wer": 0.35221710452408084, "step": 14400 }, { "epoch": 2.8068137824235384, "grad_norm": 0.6879103779792786, "learning_rate": 0.00015774915254237285, "loss": 0.376, "step": 14500 }, { "epoch": 2.8068137824235384, "eval_loss": 0.45518526434898376, "eval_runtime": 164.0636, "eval_samples_per_second": 34.474, "eval_steps_per_second": 4.309, "eval_wer": 0.35170355153985655, "step": 14500 }, { "epoch": 2.8261711188540457, "eval_loss": 0.45344606041908264, "eval_runtime": 163.5444, "eval_samples_per_second": 34.584, "eval_steps_per_second": 4.323, "eval_wer": 0.3549774518142864, "step": 14600 }, { "epoch": 2.845528455284553, "eval_loss": 0.45520055294036865, "eval_runtime": 163.3824, "eval_samples_per_second": 34.618, "eval_steps_per_second": 4.327, "eval_wer": 0.3405016770714641, "step": 14700 }, { "epoch": 2.86488579171506, "eval_loss": 0.45560306310653687, "eval_runtime": 164.0292, "eval_samples_per_second": 34.482, "eval_steps_per_second": 4.31, "eval_wer": 0.35138258092471636, "step": 14800 }, { "epoch": 2.884243128145567, "eval_loss": 0.44232824444770813, "eval_runtime": 164.3662, "eval_samples_per_second": 34.411, "eval_steps_per_second": 4.301, "eval_wer": 0.3467606040666977, "step": 14900 }, { "epoch": 2.9036004645760745, "grad_norm": 0.5280432105064392, "learning_rate": 0.00015267457627118642, "loss": 0.379, "step": 15000 }, { "epoch": 2.9036004645760745, "eval_loss": 0.43873003125190735, "eval_runtime": 163.5232, "eval_samples_per_second": 34.588, "eval_steps_per_second": 4.324, "eval_wer": 0.34268427725441736, "step": 15000 }, { "epoch": 2.9229578010065813, "eval_loss": 0.4372723400592804, "eval_runtime": 163.4018, "eval_samples_per_second": 34.614, "eval_steps_per_second": 4.327, "eval_wer": 0.34364718909983794, "step": 15100 }, { "epoch": 2.9423151374370886, "eval_loss": 0.4399470090866089, "eval_runtime": 164.335, "eval_samples_per_second": 34.418, "eval_steps_per_second": 4.302, "eval_wer": 0.33870424162667906, "step": 15200 }, { "epoch": 2.961672473867596, "eval_loss": 0.44378861784935, "eval_runtime": 164.0477, "eval_samples_per_second": 34.478, "eval_steps_per_second": 4.31, "eval_wer": 0.3380462518656417, "step": 15300 }, { "epoch": 2.9810298102981028, "eval_loss": 0.436974436044693, "eval_runtime": 163.6525, "eval_samples_per_second": 34.561, "eval_steps_per_second": 4.32, "eval_wer": 0.3430694419925856, "step": 15400 }, { "epoch": 3.00038714672861, "grad_norm": 0.660970151424408, "learning_rate": 0.00014758983050847457, "loss": 0.3731, "step": 15500 }, { "epoch": 3.00038714672861, "eval_loss": 0.43810611963272095, "eval_runtime": 162.9215, "eval_samples_per_second": 34.716, "eval_steps_per_second": 4.34, "eval_wer": 0.33413041036093144, "step": 15500 }, { "epoch": 3.0197444831591174, "eval_loss": 0.45139721035957336, "eval_runtime": 164.2748, "eval_samples_per_second": 34.43, "eval_steps_per_second": 4.304, "eval_wer": 0.3286418128420343, "step": 15600 }, { "epoch": 3.0391018195896247, "eval_loss": 0.43782538175582886, "eval_runtime": 164.2188, "eval_samples_per_second": 34.442, "eval_steps_per_second": 4.305, "eval_wer": 0.3340180706456324, "step": 15700 }, { "epoch": 3.0584591560201315, "eval_loss": 0.44340020418167114, "eval_runtime": 163.4459, "eval_samples_per_second": 34.605, "eval_steps_per_second": 4.326, "eval_wer": 0.3441446935533052, "step": 15800 }, { "epoch": 3.077816492450639, "eval_loss": 0.44192126393318176, "eval_runtime": 164.2009, "eval_samples_per_second": 34.446, "eval_steps_per_second": 4.306, "eval_wer": 0.3399239299642118, "step": 15900 }, { "epoch": 3.097173828881146, "grad_norm": 0.6999391913414001, "learning_rate": 0.0001425050847457627, "loss": 0.3176, "step": 16000 }, { "epoch": 3.097173828881146, "eval_loss": 0.4407601058483124, "eval_runtime": 164.7219, "eval_samples_per_second": 34.337, "eval_steps_per_second": 4.292, "eval_wer": 0.3335366147229221, "step": 16000 }, { "epoch": 3.116531165311653, "eval_loss": 0.436761736869812, "eval_runtime": 164.2581, "eval_samples_per_second": 34.434, "eval_steps_per_second": 4.304, "eval_wer": 0.33584760315193146, "step": 16100 }, { "epoch": 3.1358885017421603, "eval_loss": 0.4477560520172119, "eval_runtime": 163.8979, "eval_samples_per_second": 34.509, "eval_steps_per_second": 4.314, "eval_wer": 0.3400523182102678, "step": 16200 }, { "epoch": 3.1552458381726676, "eval_loss": 0.4414171576499939, "eval_runtime": 164.4332, "eval_samples_per_second": 34.397, "eval_steps_per_second": 4.3, "eval_wer": 0.3373882621046043, "step": 16300 }, { "epoch": 3.1746031746031744, "eval_loss": 0.4476623833179474, "eval_runtime": 163.6322, "eval_samples_per_second": 34.565, "eval_steps_per_second": 4.321, "eval_wer": 0.335013079552567, "step": 16400 }, { "epoch": 3.1939605110336817, "grad_norm": 0.5408484935760498, "learning_rate": 0.00013742033898305083, "loss": 0.3201, "step": 16500 }, { "epoch": 3.1939605110336817, "eval_loss": 0.4305751919746399, "eval_runtime": 163.8253, "eval_samples_per_second": 34.525, "eval_steps_per_second": 4.316, "eval_wer": 0.32917141435701563, "step": 16500 }, { "epoch": 3.213317847464189, "eval_loss": 0.4534677267074585, "eval_runtime": 163.8666, "eval_samples_per_second": 34.516, "eval_steps_per_second": 4.314, "eval_wer": 0.32941214231837074, "step": 16600 }, { "epoch": 3.2326751838946963, "eval_loss": 0.4379562437534332, "eval_runtime": 164.1184, "eval_samples_per_second": 34.463, "eval_steps_per_second": 4.308, "eval_wer": 0.33408226476866043, "step": 16700 }, { "epoch": 3.252032520325203, "eval_loss": 0.43677663803100586, "eval_runtime": 163.6899, "eval_samples_per_second": 34.553, "eval_steps_per_second": 4.319, "eval_wer": 0.33252555728523053, "step": 16800 }, { "epoch": 3.2713898567557105, "eval_loss": 0.4359833598136902, "eval_runtime": 164.1551, "eval_samples_per_second": 34.455, "eval_steps_per_second": 4.307, "eval_wer": 0.33043924828681936, "step": 16900 }, { "epoch": 3.290747193186218, "grad_norm": 0.5537161231040955, "learning_rate": 0.00013233559322033898, "loss": 0.3101, "step": 17000 }, { "epoch": 3.290747193186218, "eval_loss": 0.4347226917743683, "eval_runtime": 164.2964, "eval_samples_per_second": 34.426, "eval_steps_per_second": 4.303, "eval_wer": 0.32812825985781, "step": 17000 }, { "epoch": 3.3101045296167246, "eval_loss": 0.4375491738319397, "eval_runtime": 162.8938, "eval_samples_per_second": 34.722, "eval_steps_per_second": 4.34, "eval_wer": 0.3284973760652212, "step": 17100 }, { "epoch": 3.329461866047232, "eval_loss": 0.4491961896419525, "eval_runtime": 163.5698, "eval_samples_per_second": 34.579, "eval_steps_per_second": 4.322, "eval_wer": 0.33032690857152025, "step": 17200 }, { "epoch": 3.3488192024777392, "eval_loss": 0.4268127977848053, "eval_runtime": 164.9468, "eval_samples_per_second": 34.29, "eval_steps_per_second": 4.286, "eval_wer": 0.3284652790037072, "step": 17300 }, { "epoch": 3.368176538908246, "eval_loss": 0.4377237558364868, "eval_runtime": 164.0847, "eval_samples_per_second": 34.47, "eval_steps_per_second": 4.309, "eval_wer": 0.3269888141740624, "step": 17400 }, { "epoch": 3.3875338753387534, "grad_norm": 0.5330023765563965, "learning_rate": 0.00012726101694915254, "loss": 0.2963, "step": 17500 }, { "epoch": 3.3875338753387534, "eval_loss": 0.42494186758995056, "eval_runtime": 163.0698, "eval_samples_per_second": 34.685, "eval_steps_per_second": 4.336, "eval_wer": 0.3322527322623614, "step": 17500 }, { "epoch": 3.4068912117692607, "eval_loss": 0.4404699206352234, "eval_runtime": 164.4408, "eval_samples_per_second": 34.395, "eval_steps_per_second": 4.299, "eval_wer": 0.3338736338688193, "step": 17600 }, { "epoch": 3.4262485481997675, "eval_loss": 0.43636277318000793, "eval_runtime": 163.9679, "eval_samples_per_second": 34.495, "eval_steps_per_second": 4.312, "eval_wer": 0.3285615701882493, "step": 17700 }, { "epoch": 3.445605884630275, "eval_loss": 0.4350505769252777, "eval_runtime": 163.6477, "eval_samples_per_second": 34.562, "eval_steps_per_second": 4.32, "eval_wer": 0.3309207042095296, "step": 17800 }, { "epoch": 3.464963221060782, "eval_loss": 0.42997920513153076, "eval_runtime": 163.7592, "eval_samples_per_second": 34.539, "eval_steps_per_second": 4.317, "eval_wer": 0.322880390300268, "step": 17900 }, { "epoch": 3.484320557491289, "grad_norm": 0.31616127490997314, "learning_rate": 0.00012217627118644067, "loss": 0.3062, "step": 18000 }, { "epoch": 3.484320557491289, "eval_loss": 0.42307358980178833, "eval_runtime": 164.6469, "eval_samples_per_second": 34.352, "eval_steps_per_second": 4.294, "eval_wer": 0.32523952432154835, "step": 18000 }, { "epoch": 3.5036778939217963, "eval_loss": 0.4325993061065674, "eval_runtime": 164.1143, "eval_samples_per_second": 34.464, "eval_steps_per_second": 4.308, "eval_wer": 0.32326555503843624, "step": 18100 }, { "epoch": 3.5230352303523036, "eval_loss": 0.43141353130340576, "eval_runtime": 163.522, "eval_samples_per_second": 34.589, "eval_steps_per_second": 4.324, "eval_wer": 0.3282405995731091, "step": 18200 }, { "epoch": 3.5423925667828104, "eval_loss": 0.4343957006931305, "eval_runtime": 163.8089, "eval_samples_per_second": 34.528, "eval_steps_per_second": 4.316, "eval_wer": 0.32894673492641746, "step": 18300 }, { "epoch": 3.5617499032133177, "eval_loss": 0.42664915323257446, "eval_runtime": 166.5849, "eval_samples_per_second": 33.953, "eval_steps_per_second": 4.244, "eval_wer": 0.32207796376241754, "step": 18400 }, { "epoch": 3.581107239643825, "grad_norm": 0.40817028284072876, "learning_rate": 0.00011710169491525424, "loss": 0.2968, "step": 18500 }, { "epoch": 3.581107239643825, "eval_loss": 0.4305819571018219, "eval_runtime": 164.3676, "eval_samples_per_second": 34.411, "eval_steps_per_second": 4.301, "eval_wer": 0.32161255637046426, "step": 18500 }, { "epoch": 3.600464576074332, "eval_loss": 0.4318545460700989, "eval_runtime": 166.4377, "eval_samples_per_second": 33.983, "eval_steps_per_second": 4.248, "eval_wer": 0.3238914477379596, "step": 18600 }, { "epoch": 3.619821912504839, "eval_loss": 0.4271145164966583, "eval_runtime": 166.5812, "eval_samples_per_second": 33.953, "eval_steps_per_second": 4.244, "eval_wer": 0.3232013609154082, "step": 18700 }, { "epoch": 3.6391792489353465, "eval_loss": 0.41837719082832336, "eval_runtime": 164.5624, "eval_samples_per_second": 34.37, "eval_steps_per_second": 4.296, "eval_wer": 0.32641106706681006, "step": 18800 }, { "epoch": 3.658536585365854, "eval_loss": 0.4237981140613556, "eval_runtime": 165.5252, "eval_samples_per_second": 34.17, "eval_steps_per_second": 4.271, "eval_wer": 0.31997560623324933, "step": 18900 }, { "epoch": 3.6778939217963607, "grad_norm": 0.9548519253730774, "learning_rate": 0.00011201694915254236, "loss": 0.3191, "step": 19000 }, { "epoch": 3.6778939217963607, "eval_loss": 0.41389960050582886, "eval_runtime": 163.7093, "eval_samples_per_second": 34.549, "eval_steps_per_second": 4.319, "eval_wer": 0.3225915167466418, "step": 19000 }, { "epoch": 3.697251258226868, "eval_loss": 0.42384064197540283, "eval_runtime": 164.2841, "eval_samples_per_second": 34.428, "eval_steps_per_second": 4.304, "eval_wer": 0.3159955706055111, "step": 19100 }, { "epoch": 3.7166085946573753, "eval_loss": 0.4176156520843506, "eval_runtime": 163.8287, "eval_samples_per_second": 34.524, "eval_steps_per_second": 4.315, "eval_wer": 0.319301567941455, "step": 19200 }, { "epoch": 3.7359659310878826, "eval_loss": 0.4196203351020813, "eval_runtime": 165.0023, "eval_samples_per_second": 34.278, "eval_steps_per_second": 4.285, "eval_wer": 0.3202644797868755, "step": 19300 }, { "epoch": 3.7553232675183894, "eval_loss": 0.409524530172348, "eval_runtime": 164.4937, "eval_samples_per_second": 34.384, "eval_steps_per_second": 4.298, "eval_wer": 0.3181621222577073, "step": 19400 }, { "epoch": 3.7746806039488967, "grad_norm": 0.43373510241508484, "learning_rate": 0.00010693220338983049, "loss": 0.2921, "step": 19500 }, { "epoch": 3.7746806039488967, "eval_loss": 0.41209807991981506, "eval_runtime": 164.8329, "eval_samples_per_second": 34.314, "eval_steps_per_second": 4.289, "eval_wer": 0.31665356036654846, "step": 19500 }, { "epoch": 3.794037940379404, "eval_loss": 0.4112759530544281, "eval_runtime": 164.1863, "eval_samples_per_second": 34.449, "eval_steps_per_second": 4.306, "eval_wer": 0.31455120283738025, "step": 19600 }, { "epoch": 3.813395276809911, "eval_loss": 0.4094259738922119, "eval_runtime": 164.4841, "eval_samples_per_second": 34.386, "eval_steps_per_second": 4.298, "eval_wer": 0.3160758132592961, "step": 19700 }, { "epoch": 3.832752613240418, "eval_loss": 0.40931811928749084, "eval_runtime": 164.2911, "eval_samples_per_second": 34.427, "eval_steps_per_second": 4.303, "eval_wer": 0.31386111601482886, "step": 19800 }, { "epoch": 3.8521099496709255, "eval_loss": 0.41117748618125916, "eval_runtime": 165.1732, "eval_samples_per_second": 34.243, "eval_steps_per_second": 4.28, "eval_wer": 0.31731155012758583, "step": 19900 }, { "epoch": 3.8714672861014323, "grad_norm": 0.5022397637367249, "learning_rate": 0.00010184745762711863, "loss": 0.3007, "step": 20000 }, { "epoch": 3.8714672861014323, "eval_loss": 0.4092504680156708, "eval_runtime": 163.9434, "eval_samples_per_second": 34.5, "eval_steps_per_second": 4.312, "eval_wer": 0.31593137648248304, "step": 20000 }, { "epoch": 3.8908246225319396, "eval_loss": 0.4147598147392273, "eval_runtime": 164.6303, "eval_samples_per_second": 34.356, "eval_steps_per_second": 4.294, "eval_wer": 0.31565855145961386, "step": 20100 }, { "epoch": 3.910181958962447, "eval_loss": 0.41137251257896423, "eval_runtime": 164.6634, "eval_samples_per_second": 34.349, "eval_steps_per_second": 4.294, "eval_wer": 0.3150326587600905, "step": 20200 }, { "epoch": 3.9295392953929538, "eval_loss": 0.4155375361442566, "eval_runtime": 164.3406, "eval_samples_per_second": 34.416, "eval_steps_per_second": 4.302, "eval_wer": 0.31456725136813724, "step": 20300 }, { "epoch": 3.948896631823461, "eval_loss": 0.4075925648212433, "eval_runtime": 164.3692, "eval_samples_per_second": 34.41, "eval_steps_per_second": 4.301, "eval_wer": 0.3135722424612027, "step": 20400 }, { "epoch": 3.9682539682539684, "grad_norm": 0.6109060049057007, "learning_rate": 9.67728813559322e-05, "loss": 0.296, "step": 20500 }, { "epoch": 3.9682539682539684, "eval_loss": 0.4066578149795532, "eval_runtime": 164.7852, "eval_samples_per_second": 34.323, "eval_steps_per_second": 4.29, "eval_wer": 0.3125611850235111, "step": 20500 }, { "epoch": 3.987611304684475, "eval_loss": 0.40839362144470215, "eval_runtime": 164.8883, "eval_samples_per_second": 34.302, "eval_steps_per_second": 4.288, "eval_wer": 0.3150487072908475, "step": 20600 }, { "epoch": 4.006968641114983, "eval_loss": 0.4150494635105133, "eval_runtime": 164.1525, "eval_samples_per_second": 34.456, "eval_steps_per_second": 4.307, "eval_wer": 0.312432796777455, "step": 20700 }, { "epoch": 4.02632597754549, "eval_loss": 0.41322341561317444, "eval_runtime": 164.6726, "eval_samples_per_second": 34.347, "eval_steps_per_second": 4.293, "eval_wer": 0.3132512718460625, "step": 20800 }, { "epoch": 4.045683313975997, "eval_loss": 0.4182606339454651, "eval_runtime": 164.9667, "eval_samples_per_second": 34.286, "eval_steps_per_second": 4.286, "eval_wer": 0.31464749402192227, "step": 20900 }, { "epoch": 4.065040650406504, "grad_norm": 0.9771650433540344, "learning_rate": 9.168813559322032e-05, "loss": 0.2611, "step": 21000 }, { "epoch": 4.065040650406504, "eval_loss": 0.41840454936027527, "eval_runtime": 164.8893, "eval_samples_per_second": 34.302, "eval_steps_per_second": 4.288, "eval_wer": 0.30952801271043634, "step": 21000 }, { "epoch": 4.084397986837011, "eval_loss": 0.4167742431163788, "eval_runtime": 165.1089, "eval_samples_per_second": 34.256, "eval_steps_per_second": 4.282, "eval_wer": 0.30845276114971676, "step": 21100 }, { "epoch": 4.103755323267518, "eval_loss": 0.42244288325309753, "eval_runtime": 164.9167, "eval_samples_per_second": 34.296, "eval_steps_per_second": 4.287, "eval_wer": 0.31015390540995974, "step": 21200 }, { "epoch": 4.123112659698026, "eval_loss": 0.4187394678592682, "eval_runtime": 164.7166, "eval_samples_per_second": 34.338, "eval_steps_per_second": 4.292, "eval_wer": 0.30456901670652053, "step": 21300 }, { "epoch": 4.142469996128533, "eval_loss": 0.41454723477363586, "eval_runtime": 164.38, "eval_samples_per_second": 34.408, "eval_steps_per_second": 4.301, "eval_wer": 0.3110044775400812, "step": 21400 }, { "epoch": 4.16182733255904, "grad_norm": 0.8976078629493713, "learning_rate": 8.660338983050847e-05, "loss": 0.2431, "step": 21500 }, { "epoch": 4.16182733255904, "eval_loss": 0.42720434069633484, "eval_runtime": 165.0533, "eval_samples_per_second": 34.268, "eval_steps_per_second": 4.283, "eval_wer": 0.31071560398645504, "step": 21500 }, { "epoch": 4.181184668989547, "eval_loss": 0.41736435890197754, "eval_runtime": 164.6627, "eval_samples_per_second": 34.349, "eval_steps_per_second": 4.294, "eval_wer": 0.3069923448508289, "step": 21600 }, { "epoch": 4.200542005420054, "eval_loss": 0.41904589533805847, "eval_runtime": 165.6169, "eval_samples_per_second": 34.151, "eval_steps_per_second": 4.269, "eval_wer": 0.3085811493957728, "step": 21700 }, { "epoch": 4.219899341850561, "eval_loss": 0.41643446683883667, "eval_runtime": 165.1417, "eval_samples_per_second": 34.249, "eval_steps_per_second": 4.281, "eval_wer": 0.3050825696907448, "step": 21800 }, { "epoch": 4.239256678281069, "eval_loss": 0.41955476999282837, "eval_runtime": 165.2591, "eval_samples_per_second": 34.225, "eval_steps_per_second": 4.278, "eval_wer": 0.30777872285792235, "step": 21900 }, { "epoch": 4.258614014711576, "grad_norm": 1.5854851007461548, "learning_rate": 8.15186440677966e-05, "loss": 0.2453, "step": 22000 }, { "epoch": 4.258614014711576, "eval_loss": 0.42485129833221436, "eval_runtime": 164.7312, "eval_samples_per_second": 34.335, "eval_steps_per_second": 4.292, "eval_wer": 0.30915889650302514, "step": 22000 }, { "epoch": 4.2779713511420825, "eval_loss": 0.4246067404747009, "eval_runtime": 164.9099, "eval_samples_per_second": 34.298, "eval_steps_per_second": 4.287, "eval_wer": 0.30736146105824014, "step": 22100 }, { "epoch": 4.29732868757259, "eval_loss": 0.4166228771209717, "eval_runtime": 164.9564, "eval_samples_per_second": 34.288, "eval_steps_per_second": 4.286, "eval_wer": 0.30740960665051115, "step": 22200 }, { "epoch": 4.316686024003097, "eval_loss": 0.4192067086696625, "eval_runtime": 165.6591, "eval_samples_per_second": 34.142, "eval_steps_per_second": 4.268, "eval_wer": 0.3027555327309785, "step": 22300 }, { "epoch": 4.336043360433604, "eval_loss": 0.41863906383514404, "eval_runtime": 164.9558, "eval_samples_per_second": 34.288, "eval_steps_per_second": 4.286, "eval_wer": 0.3020975429699411, "step": 22400 }, { "epoch": 4.355400696864112, "grad_norm": 1.1900339126586914, "learning_rate": 7.645423728813559e-05, "loss": 0.2336, "step": 22500 }, { "epoch": 4.355400696864112, "eval_loss": 0.4268459677696228, "eval_runtime": 166.7137, "eval_samples_per_second": 33.926, "eval_steps_per_second": 4.241, "eval_wer": 0.3083885670266887, "step": 22500 }, { "epoch": 4.3747580332946185, "eval_loss": 0.4346672296524048, "eval_runtime": 170.3751, "eval_samples_per_second": 33.197, "eval_steps_per_second": 4.15, "eval_wer": 0.307104684566128, "step": 22600 }, { "epoch": 4.394115369725126, "eval_loss": 0.47525468468666077, "eval_runtime": 164.9807, "eval_samples_per_second": 34.283, "eval_steps_per_second": 4.285, "eval_wer": 0.3208582754248849, "step": 22700 }, { "epoch": 4.413472706155633, "eval_loss": 0.582381546497345, "eval_runtime": 165.1397, "eval_samples_per_second": 34.25, "eval_steps_per_second": 4.281, "eval_wer": 0.415416218645183, "step": 22800 }, { "epoch": 4.43283004258614, "eval_loss": 0.5073803067207336, "eval_runtime": 165.3352, "eval_samples_per_second": 34.209, "eval_steps_per_second": 4.276, "eval_wer": 0.3415448315706697, "step": 22900 }, { "epoch": 4.452187379016648, "grad_norm": 1.5807456970214844, "learning_rate": 7.136949152542373e-05, "loss": 0.3426, "step": 23000 }, { "epoch": 4.452187379016648, "eval_loss": 0.6242379546165466, "eval_runtime": 164.8642, "eval_samples_per_second": 34.307, "eval_steps_per_second": 4.288, "eval_wer": 0.41979746754184655, "step": 23000 }, { "epoch": 4.471544715447155, "eval_loss": 0.5862211585044861, "eval_runtime": 164.8283, "eval_samples_per_second": 34.314, "eval_steps_per_second": 4.289, "eval_wer": 0.4200702925647157, "step": 23100 }, { "epoch": 4.4909020518776614, "eval_loss": 0.6151086091995239, "eval_runtime": 165.0995, "eval_samples_per_second": 34.258, "eval_steps_per_second": 4.282, "eval_wer": 0.39638266116737014, "step": 23200 }, { "epoch": 4.510259388308169, "eval_loss": 0.5640283226966858, "eval_runtime": 164.849, "eval_samples_per_second": 34.31, "eval_steps_per_second": 4.289, "eval_wer": 0.3685705573654732, "step": 23300 }, { "epoch": 4.529616724738676, "eval_loss": 0.6589744091033936, "eval_runtime": 164.8194, "eval_samples_per_second": 34.316, "eval_steps_per_second": 4.29, "eval_wer": 0.4647494021922293, "step": 23400 }, { "epoch": 4.548974061169183, "grad_norm": 1.0218427181243896, "learning_rate": 6.628474576271186e-05, "loss": 0.4541, "step": 23500 }, { "epoch": 4.548974061169183, "eval_loss": 0.6010532975196838, "eval_runtime": 165.0253, "eval_samples_per_second": 34.274, "eval_steps_per_second": 4.284, "eval_wer": 0.3959974964292019, "step": 23500 }, { "epoch": 4.568331397599691, "eval_loss": 0.5802894830703735, "eval_runtime": 166.0838, "eval_samples_per_second": 34.055, "eval_steps_per_second": 4.257, "eval_wer": 0.39505063311453836, "step": 23600 }, { "epoch": 4.5876887340301975, "eval_loss": 0.5762883424758911, "eval_runtime": 165.1308, "eval_samples_per_second": 34.252, "eval_steps_per_second": 4.281, "eval_wer": 0.3910545489560431, "step": 23700 }, { "epoch": 4.607046070460704, "eval_loss": 0.5418487787246704, "eval_runtime": 165.1407, "eval_samples_per_second": 34.25, "eval_steps_per_second": 4.281, "eval_wer": 0.36550528799088444, "step": 23800 }, { "epoch": 4.626403406891212, "eval_loss": 0.5546759366989136, "eval_runtime": 165.8678, "eval_samples_per_second": 34.099, "eval_steps_per_second": 4.262, "eval_wer": 0.38877565758854776, "step": 23900 }, { "epoch": 4.645760743321719, "grad_norm": 32.50680923461914, "learning_rate": 6.12e-05, "loss": 0.4145, "step": 24000 }, { "epoch": 4.645760743321719, "eval_loss": 0.5300523638725281, "eval_runtime": 164.9724, "eval_samples_per_second": 34.285, "eval_steps_per_second": 4.286, "eval_wer": 0.3608030684790807, "step": 24000 }, { "epoch": 4.665118079752226, "eval_loss": 0.573882520198822, "eval_runtime": 165.2226, "eval_samples_per_second": 34.233, "eval_steps_per_second": 4.279, "eval_wer": 0.39927139670363176, "step": 24100 }, { "epoch": 4.6844754161827336, "eval_loss": 0.5775899887084961, "eval_runtime": 165.3691, "eval_samples_per_second": 34.202, "eval_steps_per_second": 4.275, "eval_wer": 0.39816404808139816, "step": 24200 }, { "epoch": 4.70383275261324, "eval_loss": 0.5412492156028748, "eval_runtime": 164.9818, "eval_samples_per_second": 34.283, "eval_steps_per_second": 4.285, "eval_wer": 0.37078525460994044, "step": 24300 }, { "epoch": 4.723190089043747, "eval_loss": 0.5329325199127197, "eval_runtime": 165.8065, "eval_samples_per_second": 34.112, "eval_steps_per_second": 4.264, "eval_wer": 0.37044823546404326, "step": 24400 }, { "epoch": 4.742547425474255, "grad_norm": 1.8765805959701538, "learning_rate": 5.611525423728813e-05, "loss": 0.3834, "step": 24500 }, { "epoch": 4.742547425474255, "eval_loss": 0.5299070477485657, "eval_runtime": 165.3917, "eval_samples_per_second": 34.198, "eval_steps_per_second": 4.275, "eval_wer": 0.3732246312850059, "step": 24500 }, { "epoch": 4.761904761904762, "eval_loss": 0.5424681901931763, "eval_runtime": 165.4071, "eval_samples_per_second": 34.194, "eval_steps_per_second": 4.274, "eval_wer": 0.3928519844008281, "step": 24600 }, { "epoch": 4.781262098335269, "eval_loss": 0.5111268758773804, "eval_runtime": 165.4914, "eval_samples_per_second": 34.177, "eval_steps_per_second": 4.272, "eval_wer": 0.3585241771115854, "step": 24700 }, { "epoch": 4.8006194347657765, "eval_loss": 0.5076457858085632, "eval_runtime": 165.6732, "eval_samples_per_second": 34.14, "eval_steps_per_second": 4.267, "eval_wer": 0.35033942642551075, "step": 24800 }, { "epoch": 4.819976771196283, "eval_loss": 0.5261921882629395, "eval_runtime": 165.2946, "eval_samples_per_second": 34.218, "eval_steps_per_second": 4.277, "eval_wer": 0.3681372470350339, "step": 24900 }, { "epoch": 4.83933410762679, "grad_norm": 5.934371471405029, "learning_rate": 5.1030508474576264e-05, "loss": 0.3719, "step": 25000 }, { "epoch": 4.83933410762679, "eval_loss": 0.547415018081665, "eval_runtime": 165.6997, "eval_samples_per_second": 34.134, "eval_steps_per_second": 4.267, "eval_wer": 0.3833031086004076, "step": 25000 }, { "epoch": 4.858691444057298, "eval_loss": 0.5746738910675049, "eval_runtime": 165.8407, "eval_samples_per_second": 34.105, "eval_steps_per_second": 4.263, "eval_wer": 0.40389337356165045, "step": 25100 }, { "epoch": 4.878048780487805, "eval_loss": 0.5188133120536804, "eval_runtime": 165.5746, "eval_samples_per_second": 34.16, "eval_steps_per_second": 4.27, "eval_wer": 0.3503073293639967, "step": 25200 }, { "epoch": 4.897406116918312, "eval_loss": 0.5522667169570923, "eval_runtime": 165.1011, "eval_samples_per_second": 34.258, "eval_steps_per_second": 4.282, "eval_wer": 0.3865609603440805, "step": 25300 }, { "epoch": 4.916763453348819, "eval_loss": 0.5302358865737915, "eval_runtime": 165.871, "eval_samples_per_second": 34.099, "eval_steps_per_second": 4.262, "eval_wer": 0.36446213349167883, "step": 25400 }, { "epoch": 4.936120789779326, "grad_norm": 1.1752023696899414, "learning_rate": 4.595593220338983e-05, "loss": 0.3798, "step": 25500 }, { "epoch": 4.936120789779326, "eval_loss": 0.5099266767501831, "eval_runtime": 165.9652, "eval_samples_per_second": 34.079, "eval_steps_per_second": 4.26, "eval_wer": 0.3499542616873425, "step": 25500 }, { "epoch": 4.955478126209833, "eval_loss": 0.4823363125324249, "eval_runtime": 164.8602, "eval_samples_per_second": 34.308, "eval_steps_per_second": 4.288, "eval_wer": 0.33761294153520244, "step": 25600 }, { "epoch": 4.974835462640341, "eval_loss": 0.4805842936038971, "eval_runtime": 166.8028, "eval_samples_per_second": 33.908, "eval_steps_per_second": 4.239, "eval_wer": 0.3357352634366324, "step": 25700 }, { "epoch": 4.994192799070848, "eval_loss": 0.4942820370197296, "eval_runtime": 165.4552, "eval_samples_per_second": 34.184, "eval_steps_per_second": 4.273, "eval_wer": 0.35093322206352007, "step": 25800 }, { "epoch": 5.013550135501355, "eval_loss": 0.49528568983078003, "eval_runtime": 165.947, "eval_samples_per_second": 34.083, "eval_steps_per_second": 4.26, "eval_wer": 0.35245783248543594, "step": 25900 }, { "epoch": 5.032907471931862, "grad_norm": 27.542322158813477, "learning_rate": 4.087118644067796e-05, "loss": 0.3158, "step": 26000 }, { "epoch": 5.032907471931862, "eval_loss": 0.485315203666687, "eval_runtime": 165.2844, "eval_samples_per_second": 34.22, "eval_steps_per_second": 4.277, "eval_wer": 0.34703342908956686, "step": 26000 }, { "epoch": 5.052264808362369, "eval_loss": 0.5204781293869019, "eval_runtime": 165.773, "eval_samples_per_second": 34.119, "eval_steps_per_second": 4.265, "eval_wer": 0.36183017444752935, "step": 26100 }, { "epoch": 5.071622144792877, "eval_loss": 0.5013459920883179, "eval_runtime": 165.057, "eval_samples_per_second": 34.267, "eval_steps_per_second": 4.283, "eval_wer": 0.3510455617788191, "step": 26200 }, { "epoch": 5.090979481223384, "eval_loss": 0.4863474667072296, "eval_runtime": 165.6964, "eval_samples_per_second": 34.135, "eval_steps_per_second": 4.267, "eval_wer": 0.3396511049413426, "step": 26300 }, { "epoch": 5.110336817653891, "eval_loss": 0.47152572870254517, "eval_runtime": 166.0563, "eval_samples_per_second": 34.061, "eval_steps_per_second": 4.258, "eval_wer": 0.32851342459597827, "step": 26400 }, { "epoch": 5.129694154084398, "grad_norm": 0.8464019894599915, "learning_rate": 3.5786440677966095e-05, "loss": 0.2993, "step": 26500 }, { "epoch": 5.129694154084398, "eval_loss": 0.4816218912601471, "eval_runtime": 165.4176, "eval_samples_per_second": 34.192, "eval_steps_per_second": 4.274, "eval_wer": 0.33273418818507167, "step": 26500 }, { "epoch": 5.149051490514905, "eval_loss": 0.48058804869651794, "eval_runtime": 166.4075, "eval_samples_per_second": 33.989, "eval_steps_per_second": 4.249, "eval_wer": 0.33811044598866974, "step": 26600 }, { "epoch": 5.168408826945412, "eval_loss": 0.4854019284248352, "eval_runtime": 165.1934, "eval_samples_per_second": 34.239, "eval_steps_per_second": 4.28, "eval_wer": 0.33416250742244547, "step": 26700 }, { "epoch": 5.18776616337592, "eval_loss": 0.49545472860336304, "eval_runtime": 165.6735, "eval_samples_per_second": 34.139, "eval_steps_per_second": 4.267, "eval_wer": 0.3433422670154547, "step": 26800 }, { "epoch": 5.207123499806427, "eval_loss": 0.4862872064113617, "eval_runtime": 165.5277, "eval_samples_per_second": 34.17, "eval_steps_per_second": 4.271, "eval_wer": 0.34337436407696875, "step": 26900 }, { "epoch": 5.2264808362369335, "grad_norm": 10.611580848693848, "learning_rate": 3.0701694915254236e-05, "loss": 0.2902, "step": 27000 }, { "epoch": 5.2264808362369335, "eval_loss": 0.48670876026153564, "eval_runtime": 165.4986, "eval_samples_per_second": 34.176, "eval_steps_per_second": 4.272, "eval_wer": 0.3448508289066136, "step": 27000 }, { "epoch": 5.245838172667441, "eval_loss": 0.4787338674068451, "eval_runtime": 165.461, "eval_samples_per_second": 34.183, "eval_steps_per_second": 4.273, "eval_wer": 0.33778947537352955, "step": 27100 }, { "epoch": 5.265195509097948, "eval_loss": 0.4861724376678467, "eval_runtime": 165.3459, "eval_samples_per_second": 34.207, "eval_steps_per_second": 4.276, "eval_wer": 0.33793391215034263, "step": 27200 }, { "epoch": 5.284552845528455, "eval_loss": 0.4954308271408081, "eval_runtime": 165.6637, "eval_samples_per_second": 34.141, "eval_steps_per_second": 4.268, "eval_wer": 0.3467927011282117, "step": 27300 }, { "epoch": 5.303910181958963, "eval_loss": 0.572640061378479, "eval_runtime": 165.6804, "eval_samples_per_second": 34.138, "eval_steps_per_second": 4.267, "eval_wer": 0.41416443324613633, "step": 27400 }, { "epoch": 5.3232675183894695, "grad_norm": 1.2211335897445679, "learning_rate": 2.5627118644067793e-05, "loss": 0.305, "step": 27500 }, { "epoch": 5.3232675183894695, "eval_loss": 0.5179979205131531, "eval_runtime": 165.6208, "eval_samples_per_second": 34.15, "eval_steps_per_second": 4.269, "eval_wer": 0.35735263436632375, "step": 27500 }, { "epoch": 5.342624854819976, "eval_loss": 0.4996646046638489, "eval_runtime": 164.9613, "eval_samples_per_second": 34.287, "eval_steps_per_second": 4.286, "eval_wer": 0.3452038965832678, "step": 27600 }, { "epoch": 5.361982191250484, "eval_loss": 0.4949517846107483, "eval_runtime": 165.643, "eval_samples_per_second": 34.146, "eval_steps_per_second": 4.268, "eval_wer": 0.34130410360931457, "step": 27700 }, { "epoch": 5.381339527680991, "eval_loss": 0.5071349143981934, "eval_runtime": 166.3001, "eval_samples_per_second": 34.011, "eval_steps_per_second": 4.251, "eval_wer": 0.3491999807417631, "step": 27800 }, { "epoch": 5.400696864111498, "eval_loss": 0.5095939040184021, "eval_runtime": 165.4785, "eval_samples_per_second": 34.18, "eval_steps_per_second": 4.272, "eval_wer": 0.3544799473608191, "step": 27900 }, { "epoch": 5.420054200542006, "grad_norm": 15.024033546447754, "learning_rate": 2.054237288135593e-05, "loss": 0.3163, "step": 28000 }, { "epoch": 5.420054200542006, "eval_loss": 0.5129156112670898, "eval_runtime": 166.1505, "eval_samples_per_second": 34.041, "eval_steps_per_second": 4.255, "eval_wer": 0.3565983534207443, "step": 28000 }, { "epoch": 5.4394115369725125, "eval_loss": 0.5067318677902222, "eval_runtime": 165.9899, "eval_samples_per_second": 34.074, "eval_steps_per_second": 4.259, "eval_wer": 0.3506122514483799, "step": 28100 }, { "epoch": 5.45876887340302, "eval_loss": 0.5053198337554932, "eval_runtime": 165.3351, "eval_samples_per_second": 34.209, "eval_steps_per_second": 4.276, "eval_wer": 0.35000240727961357, "step": 28200 }, { "epoch": 5.478126209833527, "eval_loss": 0.5077947974205017, "eval_runtime": 165.3012, "eval_samples_per_second": 34.216, "eval_steps_per_second": 4.277, "eval_wer": 0.3518640368474266, "step": 28300 }, { "epoch": 5.497483546264034, "eval_loss": 0.48453789949417114, "eval_runtime": 165.2767, "eval_samples_per_second": 34.221, "eval_steps_per_second": 4.278, "eval_wer": 0.3375166503506604, "step": 28400 }, { "epoch": 5.516840882694542, "grad_norm": 0.43120303750038147, "learning_rate": 1.5457627118644067e-05, "loss": 0.3136, "step": 28500 }, { "epoch": 5.516840882694542, "eval_loss": 0.4930485486984253, "eval_runtime": 165.9777, "eval_samples_per_second": 34.077, "eval_steps_per_second": 4.26, "eval_wer": 0.3439842082457351, "step": 28500 }, { "epoch": 5.5361982191250485, "eval_loss": 0.5025920271873474, "eval_runtime": 165.742, "eval_samples_per_second": 34.125, "eval_steps_per_second": 4.266, "eval_wer": 0.35122209561714623, "step": 28600 }, { "epoch": 5.555555555555555, "eval_loss": 0.5056036710739136, "eval_runtime": 165.717, "eval_samples_per_second": 34.13, "eval_steps_per_second": 4.266, "eval_wer": 0.3518800853781836, "step": 28700 }, { "epoch": 5.574912891986063, "eval_loss": 0.5090658068656921, "eval_runtime": 165.5604, "eval_samples_per_second": 34.163, "eval_steps_per_second": 4.27, "eval_wer": 0.3546404326683892, "step": 28800 }, { "epoch": 5.59427022841657, "eval_loss": 0.5027741193771362, "eval_runtime": 170.2845, "eval_samples_per_second": 33.215, "eval_steps_per_second": 4.152, "eval_wer": 0.34952095135690325, "step": 28900 }, { "epoch": 5.613627564847077, "grad_norm": 1.859834909439087, "learning_rate": 1.0372881355932203e-05, "loss": 0.3092, "step": 29000 }, { "epoch": 5.613627564847077, "eval_loss": 0.505651593208313, "eval_runtime": 164.9869, "eval_samples_per_second": 34.282, "eval_steps_per_second": 4.285, "eval_wer": 0.3509974161865481, "step": 29000 }, { "epoch": 5.6329849012775846, "eval_loss": 0.5085631608963013, "eval_runtime": 165.6325, "eval_samples_per_second": 34.148, "eval_steps_per_second": 4.268, "eval_wer": 0.3532923560848004, "step": 29100 }, { "epoch": 5.652342237708091, "eval_loss": 0.5055486559867859, "eval_runtime": 165.6348, "eval_samples_per_second": 34.147, "eval_steps_per_second": 4.268, "eval_wer": 0.35144677504774435, "step": 29200 }, { "epoch": 5.671699574138598, "eval_loss": 0.5133376717567444, "eval_runtime": 166.8503, "eval_samples_per_second": 33.899, "eval_steps_per_second": 4.237, "eval_wer": 0.35765755645070696, "step": 29300 }, { "epoch": 5.691056910569106, "eval_loss": 0.5129527449607849, "eval_runtime": 165.063, "eval_samples_per_second": 34.266, "eval_steps_per_second": 4.283, "eval_wer": 0.35703166375118356, "step": 29400 }, { "epoch": 5.710414246999613, "grad_norm": 1.5260862112045288, "learning_rate": 5.288135593220339e-06, "loss": 0.3152, "step": 29500 }, { "epoch": 5.710414246999613, "eval_loss": 0.5147610902786255, "eval_runtime": 165.1582, "eval_samples_per_second": 34.246, "eval_steps_per_second": 4.281, "eval_wer": 0.3581390123734172, "step": 29500 }, { "epoch": 5.72977158343012, "eval_loss": 0.5114809274673462, "eval_runtime": 165.7617, "eval_samples_per_second": 34.121, "eval_steps_per_second": 4.265, "eval_wer": 0.3554589077369967, "step": 29600 }, { "epoch": 5.7491289198606275, "eval_loss": 0.5053985714912415, "eval_runtime": 165.7121, "eval_samples_per_second": 34.131, "eval_steps_per_second": 4.266, "eval_wer": 0.35263436632376305, "step": 29700 }, { "epoch": 5.768486256291134, "eval_loss": 0.5080947279930115, "eval_runtime": 165.6502, "eval_samples_per_second": 34.144, "eval_steps_per_second": 4.268, "eval_wer": 0.3535651811076696, "step": 29800 }, { "epoch": 5.787843592721641, "eval_loss": 0.5076740384101868, "eval_runtime": 164.8589, "eval_samples_per_second": 34.308, "eval_steps_per_second": 4.289, "eval_wer": 0.35348493845388457, "step": 29900 }, { "epoch": 5.807200929152149, "grad_norm": 24.957311630249023, "learning_rate": 2.0338983050847458e-07, "loss": 0.3085, "step": 30000 }, { "epoch": 5.807200929152149, "eval_loss": 0.5066753029823303, "eval_runtime": 165.8811, "eval_samples_per_second": 34.097, "eval_steps_per_second": 4.262, "eval_wer": 0.35224920158559486, "step": 30000 }, { "epoch": 5.807200929152149, "step": 30000, "total_flos": 3.3745707679449666e+19, "train_loss": 0.49102539647420246, "train_runtime": 61359.0363, "train_samples_per_second": 3.911, "train_steps_per_second": 0.489 } ], "logging_steps": 500, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 400, "total_flos": 3.3745707679449666e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }