{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.8714672861014323, "eval_steps": 100, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019357336430507164, "eval_loss": 3.5565404891967773, "eval_runtime": 151.5266, "eval_samples_per_second": 37.327, "eval_steps_per_second": 4.666, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.03871467286101433, "eval_loss": 3.0301756858825684, "eval_runtime": 150.582, "eval_samples_per_second": 37.561, "eval_steps_per_second": 4.695, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.05807200929152149, "eval_loss": 2.9460911750793457, "eval_runtime": 148.9065, "eval_samples_per_second": 37.984, "eval_steps_per_second": 4.748, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.07742934572202866, "eval_loss": 1.8142520189285278, "eval_runtime": 149.8655, "eval_samples_per_second": 37.741, "eval_steps_per_second": 4.718, "eval_wer": 0.940732775914365, "step": 400 }, { "epoch": 0.09678668215253582, "grad_norm": 3.132490396499634, "learning_rate": 0.00029759999999999997, "loss": 3.9521, "step": 500 }, { "epoch": 0.09678668215253582, "eval_loss": 1.4195518493652344, "eval_runtime": 150.5171, "eval_samples_per_second": 37.577, "eval_steps_per_second": 4.697, "eval_wer": 0.8693007655149171, "step": 500 }, { "epoch": 0.11614401858304298, "eval_loss": 1.16689133644104, "eval_runtime": 150.5387, "eval_samples_per_second": 37.572, "eval_steps_per_second": 4.696, "eval_wer": 0.8055239042865626, "step": 600 }, { "epoch": 0.13550135501355012, "eval_loss": 1.0756505727767944, "eval_runtime": 151.2385, "eval_samples_per_second": 37.398, "eval_steps_per_second": 4.675, "eval_wer": 0.7596251063215163, "step": 700 }, { "epoch": 0.1548586914440573, "eval_loss": 0.9944618344306946, "eval_runtime": 151.1646, "eval_samples_per_second": 37.416, "eval_steps_per_second": 4.677, "eval_wer": 0.7223925149652549, "step": 800 }, { "epoch": 0.17421602787456447, "eval_loss": 0.9381263256072998, "eval_runtime": 151.6289, "eval_samples_per_second": 37.302, "eval_steps_per_second": 4.663, "eval_wer": 0.6870857472998347, "step": 900 }, { "epoch": 0.19357336430507163, "grad_norm": 7.335289001464844, "learning_rate": 0.0002844, "loss": 1.0266, "step": 1000 }, { "epoch": 0.19357336430507163, "eval_loss": 0.8977694511413574, "eval_runtime": 156.0202, "eval_samples_per_second": 36.252, "eval_steps_per_second": 4.531, "eval_wer": 0.661472292211648, "step": 1000 }, { "epoch": 0.2129307007355788, "eval_loss": 0.8770694136619568, "eval_runtime": 151.6589, "eval_samples_per_second": 37.294, "eval_steps_per_second": 4.662, "eval_wer": 0.6450385967164706, "step": 1100 }, { "epoch": 0.23228803716608595, "eval_loss": 0.851553201675415, "eval_runtime": 151.5945, "eval_samples_per_second": 37.31, "eval_steps_per_second": 4.664, "eval_wer": 0.640432668389209, "step": 1200 }, { "epoch": 0.2516453735965931, "eval_loss": 0.8273979425430298, "eval_runtime": 151.4524, "eval_samples_per_second": 37.345, "eval_steps_per_second": 4.668, "eval_wer": 0.6138081558633307, "step": 1300 }, { "epoch": 0.27100271002710025, "eval_loss": 0.7992698550224304, "eval_runtime": 152.8076, "eval_samples_per_second": 37.014, "eval_steps_per_second": 4.627, "eval_wer": 0.596973247099228, "step": 1400 }, { "epoch": 0.29036004645760743, "grad_norm": 4.0737223625183105, "learning_rate": 0.00026861052631578947, "loss": 0.8454, "step": 1500 }, { "epoch": 0.29036004645760743, "eval_loss": 0.7768516540527344, "eval_runtime": 152.3743, "eval_samples_per_second": 37.119, "eval_steps_per_second": 4.64, "eval_wer": 0.5887563993516394, "step": 1500 }, { "epoch": 0.3097173828881146, "eval_loss": 0.7664207220077515, "eval_runtime": 154.3668, "eval_samples_per_second": 36.64, "eval_steps_per_second": 4.58, "eval_wer": 0.5997977885124617, "step": 1600 }, { "epoch": 0.32907471931862176, "eval_loss": 0.7400562763214111, "eval_runtime": 153.7228, "eval_samples_per_second": 36.793, "eval_steps_per_second": 4.599, "eval_wer": 0.5592110542279854, "step": 1700 }, { "epoch": 0.34843205574912894, "eval_loss": 0.746478796005249, "eval_runtime": 151.7535, "eval_samples_per_second": 37.271, "eval_steps_per_second": 4.659, "eval_wer": 0.5650206223620228, "step": 1800 }, { "epoch": 0.3677893921796361, "eval_loss": 0.7252949476242065, "eval_runtime": 151.7548, "eval_samples_per_second": 37.271, "eval_steps_per_second": 4.659, "eval_wer": 0.5791272808974338, "step": 1900 }, { "epoch": 0.38714672861014326, "grad_norm": 2.4802448749542236, "learning_rate": 0.0002528210526315789, "loss": 0.7537, "step": 2000 }, { "epoch": 0.38714672861014326, "eval_loss": 0.7039346098899841, "eval_runtime": 152.7969, "eval_samples_per_second": 37.016, "eval_steps_per_second": 4.627, "eval_wer": 0.5343518800853782, "step": 2000 }, { "epoch": 0.4065040650406504, "eval_loss": 0.6932350397109985, "eval_runtime": 152.4406, "eval_samples_per_second": 37.103, "eval_steps_per_second": 4.638, "eval_wer": 0.5168429330294811, "step": 2100 }, { "epoch": 0.4258614014711576, "eval_loss": 0.696869432926178, "eval_runtime": 153.0527, "eval_samples_per_second": 36.955, "eval_steps_per_second": 4.619, "eval_wer": 0.5364381890837894, "step": 2200 }, { "epoch": 0.4452187379016647, "eval_loss": 0.6781283617019653, "eval_runtime": 152.1378, "eval_samples_per_second": 37.177, "eval_steps_per_second": 4.647, "eval_wer": 0.5173725345444624, "step": 2300 }, { "epoch": 0.4645760743321719, "eval_loss": 0.6760829091072083, "eval_runtime": 151.9712, "eval_samples_per_second": 37.218, "eval_steps_per_second": 4.652, "eval_wer": 0.5050312143923223, "step": 2400 }, { "epoch": 0.48393341076267904, "grad_norm": 3.791292667388916, "learning_rate": 0.0002370315789473684, "loss": 0.681, "step": 2500 }, { "epoch": 0.48393341076267904, "eval_loss": 0.6720712780952454, "eval_runtime": 152.2414, "eval_samples_per_second": 37.152, "eval_steps_per_second": 4.644, "eval_wer": 0.528718845789668, "step": 2500 }, { "epoch": 0.5032907471931862, "eval_loss": 0.6598270535469055, "eval_runtime": 151.7192, "eval_samples_per_second": 37.279, "eval_steps_per_second": 4.66, "eval_wer": 0.5195069891351447, "step": 2600 }, { "epoch": 0.5226480836236934, "eval_loss": 0.6555168628692627, "eval_runtime": 152.5678, "eval_samples_per_second": 37.072, "eval_steps_per_second": 4.634, "eval_wer": 0.4975846961210701, "step": 2700 }, { "epoch": 0.5420054200542005, "eval_loss": 0.6535276770591736, "eval_runtime": 152.5246, "eval_samples_per_second": 37.083, "eval_steps_per_second": 4.635, "eval_wer": 0.49936608303509816, "step": 2800 }, { "epoch": 0.5613627564847077, "eval_loss": 0.6258506178855896, "eval_runtime": 151.843, "eval_samples_per_second": 37.249, "eval_steps_per_second": 4.656, "eval_wer": 0.48192133010222915, "step": 2900 }, { "epoch": 0.5807200929152149, "grad_norm": 9.4619779586792, "learning_rate": 0.00022124210526315786, "loss": 0.6737, "step": 3000 }, { "epoch": 0.5807200929152149, "eval_loss": 0.629943311214447, "eval_runtime": 151.8389, "eval_samples_per_second": 37.25, "eval_steps_per_second": 4.656, "eval_wer": 0.48022018584198617, "step": 3000 }, { "epoch": 0.6000774293457221, "eval_loss": 0.6378594636917114, "eval_runtime": 151.6255, "eval_samples_per_second": 37.302, "eval_steps_per_second": 4.663, "eval_wer": 0.4893197027812104, "step": 3100 }, { "epoch": 0.6194347657762292, "eval_loss": 0.6225672364234924, "eval_runtime": 153.0144, "eval_samples_per_second": 36.964, "eval_steps_per_second": 4.62, "eval_wer": 0.4806053505801544, "step": 3200 }, { "epoch": 0.6387921022067363, "eval_loss": 0.6088670492172241, "eval_runtime": 152.2222, "eval_samples_per_second": 37.156, "eval_steps_per_second": 4.645, "eval_wer": 0.4627112387860891, "step": 3300 }, { "epoch": 0.6581494386372435, "eval_loss": 0.6028585433959961, "eval_runtime": 153.0615, "eval_samples_per_second": 36.952, "eval_steps_per_second": 4.619, "eval_wer": 0.47354399704707034, "step": 3400 }, { "epoch": 0.6775067750677507, "grad_norm": 3.4705822467803955, "learning_rate": 0.00020545263157894736, "loss": 0.6419, "step": 3500 }, { "epoch": 0.6775067750677507, "eval_loss": 0.5871421694755554, "eval_runtime": 152.5739, "eval_samples_per_second": 37.071, "eval_steps_per_second": 4.634, "eval_wer": 0.4592126590810611, "step": 3500 }, { "epoch": 0.6968641114982579, "eval_loss": 0.6001027226448059, "eval_runtime": 152.1697, "eval_samples_per_second": 37.169, "eval_steps_per_second": 4.646, "eval_wer": 0.4610742886488742, "step": 3600 }, { "epoch": 0.716221447928765, "eval_loss": 0.5848923921585083, "eval_runtime": 152.6563, "eval_samples_per_second": 37.051, "eval_steps_per_second": 4.631, "eval_wer": 0.4472565036670893, "step": 3700 }, { "epoch": 0.7355787843592722, "eval_loss": 0.5923960208892822, "eval_runtime": 152.6559, "eval_samples_per_second": 37.051, "eval_steps_per_second": 4.631, "eval_wer": 0.46377044181605176, "step": 3800 }, { "epoch": 0.7549361207897793, "eval_loss": 0.5767965316772461, "eval_runtime": 152.1652, "eval_samples_per_second": 37.17, "eval_steps_per_second": 4.646, "eval_wer": 0.4584904751969957, "step": 3900 }, { "epoch": 0.7742934572202865, "grad_norm": 3.628082275390625, "learning_rate": 0.00018966315789473683, "loss": 0.6183, "step": 4000 }, { "epoch": 0.7742934572202865, "eval_loss": 0.5672534704208374, "eval_runtime": 152.4329, "eval_samples_per_second": 37.105, "eval_steps_per_second": 4.638, "eval_wer": 0.44531463144549116, "step": 4000 }, { "epoch": 0.7936507936507936, "eval_loss": 0.5575382113456726, "eval_runtime": 152.2388, "eval_samples_per_second": 37.152, "eval_steps_per_second": 4.644, "eval_wer": 0.4451862431994351, "step": 4100 }, { "epoch": 0.8130081300813008, "eval_loss": 0.5631808042526245, "eval_runtime": 152.7545, "eval_samples_per_second": 37.027, "eval_steps_per_second": 4.628, "eval_wer": 0.4474972316284444, "step": 4200 }, { "epoch": 0.832365466511808, "eval_loss": 0.5498641729354858, "eval_runtime": 153.7788, "eval_samples_per_second": 36.78, "eval_steps_per_second": 4.598, "eval_wer": 0.44008281041870617, "step": 4300 }, { "epoch": 0.8517228029423152, "eval_loss": 0.5662574172019958, "eval_runtime": 152.5034, "eval_samples_per_second": 37.088, "eval_steps_per_second": 4.636, "eval_wer": 0.43101539054099597, "step": 4400 }, { "epoch": 0.8710801393728222, "grad_norm": 2.376349925994873, "learning_rate": 0.0001738736842105263, "loss": 0.5877, "step": 4500 }, { "epoch": 0.8710801393728222, "eval_loss": 0.5584732294082642, "eval_runtime": 152.1714, "eval_samples_per_second": 37.169, "eval_steps_per_second": 4.646, "eval_wer": 0.4317215258943044, "step": 4500 }, { "epoch": 0.8904374758033294, "eval_loss": 0.5463821291923523, "eval_runtime": 152.4923, "eval_samples_per_second": 37.09, "eval_steps_per_second": 4.636, "eval_wer": 0.41997400138017366, "step": 4600 }, { "epoch": 0.9097948122338366, "eval_loss": 0.5381494164466858, "eval_runtime": 153.2139, "eval_samples_per_second": 36.916, "eval_steps_per_second": 4.614, "eval_wer": 0.4192197204345942, "step": 4700 }, { "epoch": 0.9291521486643438, "eval_loss": 0.5453722476959229, "eval_runtime": 151.9737, "eval_samples_per_second": 37.217, "eval_steps_per_second": 4.652, "eval_wer": 0.4201986808107718, "step": 4800 }, { "epoch": 0.948509485094851, "eval_loss": 0.5237515568733215, "eval_runtime": 151.8558, "eval_samples_per_second": 37.246, "eval_steps_per_second": 4.656, "eval_wer": 0.41241514339362234, "step": 4900 }, { "epoch": 0.9678668215253581, "grad_norm": 2.5489518642425537, "learning_rate": 0.0001581157894736842, "loss": 0.5621, "step": 5000 }, { "epoch": 0.9678668215253581, "eval_loss": 0.5303541421890259, "eval_runtime": 152.515, "eval_samples_per_second": 37.085, "eval_steps_per_second": 4.636, "eval_wer": 0.41353854054661293, "step": 5000 }, { "epoch": 0.9872241579558653, "eval_loss": 0.5163344740867615, "eval_runtime": 156.7945, "eval_samples_per_second": 36.073, "eval_steps_per_second": 4.509, "eval_wer": 0.4061080708061177, "step": 5100 }, { "epoch": 1.0065814943863725, "eval_loss": 0.51596599817276, "eval_runtime": 153.2891, "eval_samples_per_second": 36.898, "eval_steps_per_second": 4.612, "eval_wer": 0.39927139670363176, "step": 5200 }, { "epoch": 1.0259388308168795, "eval_loss": 0.5088583827018738, "eval_runtime": 152.7112, "eval_samples_per_second": 37.037, "eval_steps_per_second": 4.63, "eval_wer": 0.3898509091492674, "step": 5300 }, { "epoch": 1.0452961672473868, "eval_loss": 0.5110610723495483, "eval_runtime": 152.5555, "eval_samples_per_second": 37.075, "eval_steps_per_second": 4.634, "eval_wer": 0.3985652613503234, "step": 5400 }, { "epoch": 1.064653503677894, "grad_norm": 1.1362248659133911, "learning_rate": 0.0001423578947368421, "loss": 0.4882, "step": 5500 }, { "epoch": 1.064653503677894, "eval_loss": 0.5010027885437012, "eval_runtime": 152.1249, "eval_samples_per_second": 37.18, "eval_steps_per_second": 4.647, "eval_wer": 0.38574248527547306, "step": 5500 }, { "epoch": 1.084010840108401, "eval_loss": 0.49406561255455017, "eval_runtime": 151.5623, "eval_samples_per_second": 37.318, "eval_steps_per_second": 4.665, "eval_wer": 0.3858548249907721, "step": 5600 }, { "epoch": 1.1033681765389083, "eval_loss": 0.49403733015060425, "eval_runtime": 152.7631, "eval_samples_per_second": 37.025, "eval_steps_per_second": 4.628, "eval_wer": 0.3813451878480525, "step": 5700 }, { "epoch": 1.1227255129694154, "eval_loss": 0.4913772642612457, "eval_runtime": 152.1406, "eval_samples_per_second": 37.176, "eval_steps_per_second": 4.647, "eval_wer": 0.37815153022740766, "step": 5800 }, { "epoch": 1.1420828493999227, "eval_loss": 0.48747047781944275, "eval_runtime": 151.3195, "eval_samples_per_second": 37.378, "eval_steps_per_second": 4.672, "eval_wer": 0.3745406108070806, "step": 5900 }, { "epoch": 1.1614401858304297, "grad_norm": 1.0150744915008545, "learning_rate": 0.00012656842105263156, "loss": 0.4569, "step": 6000 }, { "epoch": 1.1614401858304297, "eval_loss": 0.4841971695423126, "eval_runtime": 151.8567, "eval_samples_per_second": 37.246, "eval_steps_per_second": 4.656, "eval_wer": 0.38071929514852915, "step": 6000 }, { "epoch": 1.1807975222609368, "eval_loss": 0.48611822724342346, "eval_runtime": 150.971, "eval_samples_per_second": 37.464, "eval_steps_per_second": 4.683, "eval_wer": 0.37370608720771614, "step": 6100 }, { "epoch": 1.2001548586914441, "eval_loss": 0.48144644498825073, "eval_runtime": 151.4548, "eval_samples_per_second": 37.344, "eval_steps_per_second": 4.668, "eval_wer": 0.3760973182905105, "step": 6200 }, { "epoch": 1.2195121951219512, "eval_loss": 0.47813892364501953, "eval_runtime": 151.1935, "eval_samples_per_second": 37.409, "eval_steps_per_second": 4.676, "eval_wer": 0.37409125194588433, "step": 6300 }, { "epoch": 1.2388695315524583, "eval_loss": 0.4771001935005188, "eval_runtime": 151.1732, "eval_samples_per_second": 37.414, "eval_steps_per_second": 4.677, "eval_wer": 0.36815329556579096, "step": 6400 }, { "epoch": 1.2582268679829656, "grad_norm": 1.3292571306228638, "learning_rate": 0.00011077894736842105, "loss": 0.4416, "step": 6500 }, { "epoch": 1.2582268679829656, "eval_loss": 0.47095027565956116, "eval_runtime": 151.5037, "eval_samples_per_second": 37.332, "eval_steps_per_second": 4.667, "eval_wer": 0.37338511659257595, "step": 6500 }, { "epoch": 1.2775842044134726, "eval_loss": 0.47211408615112305, "eval_runtime": 150.9455, "eval_samples_per_second": 37.47, "eval_steps_per_second": 4.684, "eval_wer": 0.3659706953828377, "step": 6600 }, { "epoch": 1.29694154084398, "eval_loss": 0.4679400622844696, "eval_runtime": 151.4191, "eval_samples_per_second": 37.353, "eval_steps_per_second": 4.669, "eval_wer": 0.3638843863844265, "step": 6700 }, { "epoch": 1.316298877274487, "eval_loss": 0.46228036284446716, "eval_runtime": 151.3839, "eval_samples_per_second": 37.362, "eval_steps_per_second": 4.67, "eval_wer": 0.366532393959333, "step": 6800 }, { "epoch": 1.3356562137049943, "eval_loss": 0.46108925342559814, "eval_runtime": 151.8163, "eval_samples_per_second": 37.256, "eval_steps_per_second": 4.657, "eval_wer": 0.3601771757795574, "step": 6900 }, { "epoch": 1.3550135501355014, "grad_norm": 0.8062695860862732, "learning_rate": 9.498947368421052e-05, "loss": 0.4324, "step": 7000 }, { "epoch": 1.3550135501355014, "eval_loss": 0.46888086199760437, "eval_runtime": 152.4379, "eval_samples_per_second": 37.104, "eval_steps_per_second": 4.638, "eval_wer": 0.3609314567251368, "step": 7000 }, { "epoch": 1.3743708865660085, "eval_loss": 0.4573034346103668, "eval_runtime": 151.3077, "eval_samples_per_second": 37.381, "eval_steps_per_second": 4.673, "eval_wer": 0.3602574184333424, "step": 7100 }, { "epoch": 1.3937282229965158, "eval_loss": 0.45749789476394653, "eval_runtime": 151.5824, "eval_samples_per_second": 37.313, "eval_steps_per_second": 4.664, "eval_wer": 0.3546083356068752, "step": 7200 }, { "epoch": 1.4130855594270229, "eval_loss": 0.4555954933166504, "eval_runtime": 151.6035, "eval_samples_per_second": 37.308, "eval_steps_per_second": 4.663, "eval_wer": 0.35836369180401534, "step": 7300 }, { "epoch": 1.43244289585753, "eval_loss": 0.4495578408241272, "eval_runtime": 152.5621, "eval_samples_per_second": 37.073, "eval_steps_per_second": 4.634, "eval_wer": 0.350724591163679, "step": 7400 }, { "epoch": 1.4518002322880372, "grad_norm": 0.7916799187660217, "learning_rate": 7.92e-05, "loss": 0.4255, "step": 7500 }, { "epoch": 1.4518002322880372, "eval_loss": 0.44609567523002625, "eval_runtime": 151.8498, "eval_samples_per_second": 37.247, "eval_steps_per_second": 4.656, "eval_wer": 0.34671245847442667, "step": 7500 }, { "epoch": 1.4711575687185443, "eval_loss": 0.44341230392456055, "eval_runtime": 152.528, "eval_samples_per_second": 37.082, "eval_steps_per_second": 4.635, "eval_wer": 0.3462470510824734, "step": 7600 }, { "epoch": 1.4905149051490514, "eval_loss": 0.44362780451774597, "eval_runtime": 152.5253, "eval_samples_per_second": 37.082, "eval_steps_per_second": 4.635, "eval_wer": 0.3516393574168285, "step": 7700 }, { "epoch": 1.5098722415795587, "eval_loss": 0.4406072199344635, "eval_runtime": 152.4039, "eval_samples_per_second": 37.112, "eval_steps_per_second": 4.639, "eval_wer": 0.34579769222127715, "step": 7800 }, { "epoch": 1.5292295780100658, "eval_loss": 0.43874725699424744, "eval_runtime": 152.6604, "eval_samples_per_second": 37.05, "eval_steps_per_second": 4.631, "eval_wer": 0.3439360626534641, "step": 7900 }, { "epoch": 1.5485869144405728, "grad_norm": 0.7491864562034607, "learning_rate": 6.344210526315788e-05, "loss": 0.4094, "step": 8000 }, { "epoch": 1.5485869144405728, "eval_loss": 0.43253499269485474, "eval_runtime": 153.8006, "eval_samples_per_second": 36.775, "eval_steps_per_second": 4.597, "eval_wer": 0.3409831329941744, "step": 8000 }, { "epoch": 1.5679442508710801, "eval_loss": 0.4359830617904663, "eval_runtime": 153.3674, "eval_samples_per_second": 36.879, "eval_steps_per_second": 4.61, "eval_wer": 0.3419299963088379, "step": 8100 }, { "epoch": 1.5873015873015874, "eval_loss": 0.4285949170589447, "eval_runtime": 153.3711, "eval_samples_per_second": 36.878, "eval_steps_per_second": 4.61, "eval_wer": 0.3377252812505015, "step": 8200 }, { "epoch": 1.6066589237320945, "eval_loss": 0.43007034063339233, "eval_runtime": 152.2201, "eval_samples_per_second": 37.157, "eval_steps_per_second": 4.645, "eval_wer": 0.3335526632536791, "step": 8300 }, { "epoch": 1.6260162601626016, "eval_loss": 0.42966797947883606, "eval_runtime": 152.0163, "eval_samples_per_second": 37.207, "eval_steps_per_second": 4.651, "eval_wer": 0.3322848293238754, "step": 8400 }, { "epoch": 1.645373596593109, "grad_norm": 1.047472596168518, "learning_rate": 4.765263157894736e-05, "loss": 0.4018, "step": 8500 }, { "epoch": 1.645373596593109, "eval_loss": 0.4270441234111786, "eval_runtime": 152.8058, "eval_samples_per_second": 37.014, "eval_steps_per_second": 4.627, "eval_wer": 0.3338575853380623, "step": 8500 }, { "epoch": 1.664730933023616, "eval_loss": 0.4267289638519287, "eval_runtime": 152.5032, "eval_samples_per_second": 37.088, "eval_steps_per_second": 4.636, "eval_wer": 0.3319959557702492, "step": 8600 }, { "epoch": 1.684088269454123, "eval_loss": 0.4224300980567932, "eval_runtime": 152.5862, "eval_samples_per_second": 37.068, "eval_steps_per_second": 4.633, "eval_wer": 0.33275023671582865, "step": 8700 }, { "epoch": 1.7034456058846303, "eval_loss": 0.4207303822040558, "eval_runtime": 154.5205, "eval_samples_per_second": 36.604, "eval_steps_per_second": 4.575, "eval_wer": 0.32984545264881, "step": 8800 }, { "epoch": 1.7228029423151374, "eval_loss": 0.4197385013103485, "eval_runtime": 152.0624, "eval_samples_per_second": 37.195, "eval_steps_per_second": 4.649, "eval_wer": 0.32978125852578194, "step": 8900 }, { "epoch": 1.7421602787456445, "grad_norm": 1.4507739543914795, "learning_rate": 3.189473684210526e-05, "loss": 0.3899, "step": 9000 }, { "epoch": 1.7421602787456445, "eval_loss": 0.4183507561683655, "eval_runtime": 157.4278, "eval_samples_per_second": 35.928, "eval_steps_per_second": 4.491, "eval_wer": 0.3258493684903147, "step": 9000 }, { "epoch": 1.7615176151761518, "eval_loss": 0.4164830148220062, "eval_runtime": 153.0475, "eval_samples_per_second": 36.956, "eval_steps_per_second": 4.619, "eval_wer": 0.3262024361669689, "step": 9100 }, { "epoch": 1.7808749516066589, "eval_loss": 0.41182050108909607, "eval_runtime": 152.4839, "eval_samples_per_second": 37.092, "eval_steps_per_second": 4.637, "eval_wer": 0.322864341769511, "step": 9200 }, { "epoch": 1.800232288037166, "eval_loss": 0.4134317636489868, "eval_runtime": 152.6353, "eval_samples_per_second": 37.056, "eval_steps_per_second": 4.632, "eval_wer": 0.3232334579769222, "step": 9300 }, { "epoch": 1.8195896244676733, "eval_loss": 0.4126824736595154, "eval_runtime": 152.5246, "eval_samples_per_second": 37.083, "eval_steps_per_second": 4.635, "eval_wer": 0.3209064210171559, "step": 9400 }, { "epoch": 1.8389469608981805, "grad_norm": 1.0012460947036743, "learning_rate": 1.6105263157894736e-05, "loss": 0.3665, "step": 9500 }, { "epoch": 1.8389469608981805, "eval_loss": 0.41083237528800964, "eval_runtime": 152.9993, "eval_samples_per_second": 36.967, "eval_steps_per_second": 4.621, "eval_wer": 0.32109900338624, "step": 9500 }, { "epoch": 1.8583042973286876, "eval_loss": 0.4090138077735901, "eval_runtime": 152.5291, "eval_samples_per_second": 37.081, "eval_steps_per_second": 4.635, "eval_wer": 0.3199114121102213, "step": 9600 }, { "epoch": 1.8776616337591947, "eval_loss": 0.407578706741333, "eval_runtime": 153.0711, "eval_samples_per_second": 36.95, "eval_steps_per_second": 4.619, "eval_wer": 0.32087432395564186, "step": 9700 }, { "epoch": 1.897018970189702, "eval_loss": 0.40649694204330444, "eval_runtime": 154.4136, "eval_samples_per_second": 36.629, "eval_steps_per_second": 4.579, "eval_wer": 0.31981512092567926, "step": 9800 }, { "epoch": 1.916376306620209, "eval_loss": 0.40620651841163635, "eval_runtime": 153.7508, "eval_samples_per_second": 36.787, "eval_steps_per_second": 4.598, "eval_wer": 0.31923737381842693, "step": 9900 }, { "epoch": 1.9357336430507162, "grad_norm": 0.7244949340820312, "learning_rate": 3.157894736842105e-07, "loss": 0.3698, "step": 10000 }, { "epoch": 1.9357336430507162, "eval_loss": 0.4060620963573456, "eval_runtime": 153.976, "eval_samples_per_second": 36.733, "eval_steps_per_second": 4.592, "eval_wer": 0.31928551941069794, "step": 10000 }, { "epoch": 1.9550909794812235, "eval_loss": 0.45229342579841614, "eval_runtime": 154.2948, "eval_samples_per_second": 36.657, "eval_steps_per_second": 4.582, "eval_wer": 0.3406140167867632, "step": 10100 }, { "epoch": 1.9744483159117305, "eval_loss": 0.4579542577266693, "eval_runtime": 151.5074, "eval_samples_per_second": 37.331, "eval_steps_per_second": 4.666, "eval_wer": 0.3517837941936416, "step": 10200 }, { "epoch": 1.9938056523422376, "eval_loss": 0.46043792366981506, "eval_runtime": 151.4438, "eval_samples_per_second": 37.347, "eval_steps_per_second": 4.668, "eval_wer": 0.35115790149411824, "step": 10300 }, { "epoch": 2.013162988772745, "eval_loss": 0.46549099683761597, "eval_runtime": 151.5994, "eval_samples_per_second": 37.309, "eval_steps_per_second": 4.664, "eval_wer": 0.3552181797756415, "step": 10400 }, { "epoch": 2.032520325203252, "grad_norm": 0.703632652759552, "learning_rate": 0.0001463076923076923, "loss": 0.3624, "step": 10500 }, { "epoch": 2.032520325203252, "eval_loss": 0.4670031666755676, "eval_runtime": 151.5063, "eval_samples_per_second": 37.332, "eval_steps_per_second": 4.666, "eval_wer": 0.35144677504774435, "step": 10500 }, { "epoch": 2.051877661633759, "eval_loss": 0.459250271320343, "eval_runtime": 153.0971, "eval_samples_per_second": 36.944, "eval_steps_per_second": 4.618, "eval_wer": 0.3628251833544639, "step": 10600 }, { "epoch": 2.0712349980642664, "eval_loss": 0.46061432361602783, "eval_runtime": 152.0732, "eval_samples_per_second": 37.193, "eval_steps_per_second": 4.649, "eval_wer": 0.3545922870761182, "step": 10700 }, { "epoch": 2.0905923344947737, "eval_loss": 0.46500489115715027, "eval_runtime": 151.985, "eval_samples_per_second": 37.214, "eval_steps_per_second": 4.652, "eval_wer": 0.35905377862656673, "step": 10800 }, { "epoch": 2.1099496709252805, "eval_loss": 0.46085453033447266, "eval_runtime": 152.4835, "eval_samples_per_second": 37.093, "eval_steps_per_second": 4.637, "eval_wer": 0.35483301503747333, "step": 10900 }, { "epoch": 2.129307007355788, "grad_norm": 0.5008242726325989, "learning_rate": 0.00013863076923076922, "loss": 0.3755, "step": 11000 }, { "epoch": 2.129307007355788, "eval_loss": 0.4708138406276703, "eval_runtime": 152.3457, "eval_samples_per_second": 37.126, "eval_steps_per_second": 4.641, "eval_wer": 0.35573173275986586, "step": 11000 }, { "epoch": 2.148664343786295, "eval_loss": 0.4649392366409302, "eval_runtime": 152.7087, "eval_samples_per_second": 37.038, "eval_steps_per_second": 4.63, "eval_wer": 0.3548009179759593, "step": 11100 }, { "epoch": 2.168021680216802, "eval_loss": 0.4624271094799042, "eval_runtime": 153.153, "eval_samples_per_second": 36.93, "eval_steps_per_second": 4.616, "eval_wer": 0.355956412190464, "step": 11200 }, { "epoch": 2.1873790166473093, "eval_loss": 0.45822229981422424, "eval_runtime": 156.1964, "eval_samples_per_second": 36.211, "eval_steps_per_second": 4.526, "eval_wer": 0.35229734717786587, "step": 11300 }, { "epoch": 2.2067363530778166, "eval_loss": 0.466250479221344, "eval_runtime": 152.6707, "eval_samples_per_second": 37.047, "eval_steps_per_second": 4.631, "eval_wer": 0.3586044197653705, "step": 11400 }, { "epoch": 2.226093689508324, "grad_norm": 0.9631055593490601, "learning_rate": 0.00013093846153846151, "loss": 0.3891, "step": 11500 }, { "epoch": 2.226093689508324, "eval_loss": 0.46153655648231506, "eval_runtime": 153.1909, "eval_samples_per_second": 36.921, "eval_steps_per_second": 4.615, "eval_wer": 0.3552181797756415, "step": 11500 }, { "epoch": 2.2454510259388307, "eval_loss": 0.4631531238555908, "eval_runtime": 152.9395, "eval_samples_per_second": 36.982, "eval_steps_per_second": 4.623, "eval_wer": 0.35886119625748264, "step": 11600 }, { "epoch": 2.264808362369338, "eval_loss": 0.4495234191417694, "eval_runtime": 153.0237, "eval_samples_per_second": 36.962, "eval_steps_per_second": 4.62, "eval_wer": 0.3425398404776043, "step": 11700 }, { "epoch": 2.2841656987998453, "eval_loss": 0.462666779756546, "eval_runtime": 152.4714, "eval_samples_per_second": 37.095, "eval_steps_per_second": 4.637, "eval_wer": 0.34942466017236123, "step": 11800 }, { "epoch": 2.303523035230352, "eval_loss": 0.4550352096557617, "eval_runtime": 152.8072, "eval_samples_per_second": 37.014, "eval_steps_per_second": 4.627, "eval_wer": 0.3451717995217538, "step": 11900 }, { "epoch": 2.3228803716608595, "grad_norm": 0.7961182594299316, "learning_rate": 0.00012324615384615384, "loss": 0.3946, "step": 12000 }, { "epoch": 2.3228803716608595, "eval_loss": 0.44988927245140076, "eval_runtime": 152.9644, "eval_samples_per_second": 36.976, "eval_steps_per_second": 4.622, "eval_wer": 0.3462310025517164, "step": 12000 }, { "epoch": 2.3422377080913668, "eval_loss": 0.4501667320728302, "eval_runtime": 153.061, "eval_samples_per_second": 36.953, "eval_steps_per_second": 4.619, "eval_wer": 0.341978141901109, "step": 12100 }, { "epoch": 2.3615950445218736, "eval_loss": 0.4580215513706207, "eval_runtime": 153.2108, "eval_samples_per_second": 36.916, "eval_steps_per_second": 4.615, "eval_wer": 0.3412399094862865, "step": 12200 }, { "epoch": 2.380952380952381, "eval_loss": 0.4506891667842865, "eval_runtime": 153.6611, "eval_samples_per_second": 36.808, "eval_steps_per_second": 4.601, "eval_wer": 0.34339041260772574, "step": 12300 }, { "epoch": 2.4003097173828882, "eval_loss": 0.44618555903434753, "eval_runtime": 153.273, "eval_samples_per_second": 36.901, "eval_steps_per_second": 4.613, "eval_wer": 0.34475453772207154, "step": 12400 }, { "epoch": 2.419667053813395, "grad_norm": 0.828158974647522, "learning_rate": 0.00011556923076923076, "loss": 0.3824, "step": 12500 }, { "epoch": 2.419667053813395, "eval_loss": 0.44126543402671814, "eval_runtime": 153.3979, "eval_samples_per_second": 36.871, "eval_steps_per_second": 4.609, "eval_wer": 0.34127200654780054, "step": 12500 }, { "epoch": 2.4390243902439024, "eval_loss": 0.44880929589271545, "eval_runtime": 153.7143, "eval_samples_per_second": 36.796, "eval_steps_per_second": 4.599, "eval_wer": 0.3443212273916323, "step": 12600 }, { "epoch": 2.4583817266744097, "eval_loss": 0.44148463010787964, "eval_runtime": 153.647, "eval_samples_per_second": 36.812, "eval_steps_per_second": 4.601, "eval_wer": 0.3431657331771276, "step": 12700 }, { "epoch": 2.4777390631049165, "eval_loss": 0.44202256202697754, "eval_runtime": 153.5743, "eval_samples_per_second": 36.829, "eval_steps_per_second": 4.604, "eval_wer": 0.34093498740190337, "step": 12800 }, { "epoch": 2.497096399535424, "eval_loss": 0.4379221200942993, "eval_runtime": 153.5736, "eval_samples_per_second": 36.829, "eval_steps_per_second": 4.604, "eval_wer": 0.3361204281748006, "step": 12900 }, { "epoch": 2.516453735965931, "grad_norm": 1.2163615226745605, "learning_rate": 0.00010787692307692307, "loss": 0.372, "step": 13000 }, { "epoch": 2.516453735965931, "eval_loss": 0.43855318427085876, "eval_runtime": 153.5476, "eval_samples_per_second": 36.835, "eval_steps_per_second": 4.604, "eval_wer": 0.3334884691306511, "step": 13000 }, { "epoch": 2.535811072396438, "eval_loss": 0.44449883699417114, "eval_runtime": 153.7016, "eval_samples_per_second": 36.799, "eval_steps_per_second": 4.6, "eval_wer": 0.3397794931873987, "step": 13100 }, { "epoch": 2.5551684088269453, "eval_loss": 0.4401286542415619, "eval_runtime": 154.2488, "eval_samples_per_second": 36.668, "eval_steps_per_second": 4.584, "eval_wer": 0.3392819887339314, "step": 13200 }, { "epoch": 2.5745257452574526, "eval_loss": 0.437770813703537, "eval_runtime": 153.8927, "eval_samples_per_second": 36.753, "eval_steps_per_second": 4.594, "eval_wer": 0.335077273675595, "step": 13300 }, { "epoch": 2.59388308168796, "eval_loss": 0.4315861463546753, "eval_runtime": 153.7886, "eval_samples_per_second": 36.778, "eval_steps_per_second": 4.597, "eval_wer": 0.33517356486013705, "step": 13400 }, { "epoch": 2.6132404181184667, "grad_norm": 1.084632158279419, "learning_rate": 0.0001002, "loss": 0.3521, "step": 13500 }, { "epoch": 2.6132404181184667, "eval_loss": 0.43864014744758606, "eval_runtime": 153.9711, "eval_samples_per_second": 36.734, "eval_steps_per_second": 4.592, "eval_wer": 0.33398597358411836, "step": 13500 }, { "epoch": 2.632597754548974, "eval_loss": 0.43551018834114075, "eval_runtime": 154.3017, "eval_samples_per_second": 36.655, "eval_steps_per_second": 4.582, "eval_wer": 0.33096884980180064, "step": 13600 }, { "epoch": 2.6519550909794813, "eval_loss": 0.4325660765171051, "eval_runtime": 154.4812, "eval_samples_per_second": 36.613, "eval_steps_per_second": 4.577, "eval_wer": 0.3343871868530436, "step": 13700 }, { "epoch": 2.6713124274099886, "eval_loss": 0.4263465404510498, "eval_runtime": 154.0733, "eval_samples_per_second": 36.71, "eval_steps_per_second": 4.589, "eval_wer": 0.32629872735151094, "step": 13800 }, { "epoch": 2.6906697638404955, "eval_loss": 0.42636117339134216, "eval_runtime": 154.1615, "eval_samples_per_second": 36.689, "eval_steps_per_second": 4.586, "eval_wer": 0.32353838006130536, "step": 13900 }, { "epoch": 2.710027100271003, "grad_norm": 1.1979655027389526, "learning_rate": 9.25076923076923e-05, "loss": 0.3592, "step": 14000 }, { "epoch": 2.710027100271003, "eval_loss": 0.4322036802768707, "eval_runtime": 154.5242, "eval_samples_per_second": 36.603, "eval_steps_per_second": 4.575, "eval_wer": 0.3299738408948661, "step": 14000 }, { "epoch": 2.72938443670151, "eval_loss": 0.4294193983078003, "eval_runtime": 154.4329, "eval_samples_per_second": 36.624, "eval_steps_per_second": 4.578, "eval_wer": 0.3261542905746979, "step": 14100 }, { "epoch": 2.748741773132017, "eval_loss": 0.43099814653396606, "eval_runtime": 154.4209, "eval_samples_per_second": 36.627, "eval_steps_per_second": 4.578, "eval_wer": 0.32329765209995026, "step": 14200 }, { "epoch": 2.7680991095625243, "eval_loss": 0.42700281739234924, "eval_runtime": 155.4008, "eval_samples_per_second": 36.396, "eval_steps_per_second": 4.55, "eval_wer": 0.3268122803357353, "step": 14300 }, { "epoch": 2.7874564459930316, "eval_loss": 0.4209098219871521, "eval_runtime": 156.5271, "eval_samples_per_second": 36.134, "eval_steps_per_second": 4.517, "eval_wer": 0.3254321066906325, "step": 14400 }, { "epoch": 2.8068137824235384, "grad_norm": 0.6974443793296814, "learning_rate": 8.48153846153846e-05, "loss": 0.3459, "step": 14500 }, { "epoch": 2.8068137824235384, "eval_loss": 0.42542555928230286, "eval_runtime": 157.9392, "eval_samples_per_second": 35.811, "eval_steps_per_second": 4.476, "eval_wer": 0.32729373625844554, "step": 14500 }, { "epoch": 2.8261711188540457, "eval_loss": 0.42783817648887634, "eval_runtime": 155.0217, "eval_samples_per_second": 36.485, "eval_steps_per_second": 4.561, "eval_wer": 0.3231532153231372, "step": 14600 }, { "epoch": 2.845528455284553, "eval_loss": 0.4212438464164734, "eval_runtime": 154.853, "eval_samples_per_second": 36.525, "eval_steps_per_second": 4.566, "eval_wer": 0.3215002166551652, "step": 14700 }, { "epoch": 2.86488579171506, "eval_loss": 0.4169256389141083, "eval_runtime": 154.5142, "eval_samples_per_second": 36.605, "eval_steps_per_second": 4.576, "eval_wer": 0.31928551941069794, "step": 14800 }, { "epoch": 2.884243128145567, "eval_loss": 0.42132049798965454, "eval_runtime": 154.8091, "eval_samples_per_second": 36.535, "eval_steps_per_second": 4.567, "eval_wer": 0.3195262473720531, "step": 14900 }, { "epoch": 2.9036004645760745, "grad_norm": 1.099702000617981, "learning_rate": 7.713846153846152e-05, "loss": 0.3483, "step": 15000 }, { "epoch": 2.9036004645760745, "eval_loss": 0.41696369647979736, "eval_runtime": 155.3223, "eval_samples_per_second": 36.415, "eval_steps_per_second": 4.552, "eval_wer": 0.31652517212049236, "step": 15000 }, { "epoch": 2.9229578010065813, "eval_loss": 0.41230952739715576, "eval_runtime": 154.9181, "eval_samples_per_second": 36.51, "eval_steps_per_second": 4.564, "eval_wer": 0.31418208662996905, "step": 15100 }, { "epoch": 2.9423151374370886, "eval_loss": 0.4116990566253662, "eval_runtime": 154.97, "eval_samples_per_second": 36.497, "eval_steps_per_second": 4.562, "eval_wer": 0.31337966009211854, "step": 15200 }, { "epoch": 2.961672473867596, "eval_loss": 0.410386323928833, "eval_runtime": 155.0232, "eval_samples_per_second": 36.485, "eval_steps_per_second": 4.561, "eval_wer": 0.31158222464733354, "step": 15300 }, { "epoch": 2.9810298102981028, "eval_loss": 0.41244322061538696, "eval_runtime": 154.4682, "eval_samples_per_second": 36.616, "eval_steps_per_second": 4.577, "eval_wer": 0.31419813516072603, "step": 15400 }, { "epoch": 3.00038714672861, "grad_norm": 0.725528359413147, "learning_rate": 6.946153846153845e-05, "loss": 0.3501, "step": 15500 }, { "epoch": 3.00038714672861, "eval_loss": 0.40684688091278076, "eval_runtime": 154.5863, "eval_samples_per_second": 36.588, "eval_steps_per_second": 4.573, "eval_wer": 0.31272167033108117, "step": 15500 }, { "epoch": 3.0197444831591174, "eval_loss": 0.4200752079486847, "eval_runtime": 154.5821, "eval_samples_per_second": 36.589, "eval_steps_per_second": 4.574, "eval_wer": 0.3087416347033429, "step": 15600 }, { "epoch": 3.0391018195896247, "eval_loss": 0.4186869263648987, "eval_runtime": 154.7417, "eval_samples_per_second": 36.551, "eval_steps_per_second": 4.569, "eval_wer": 0.3137808733610438, "step": 15700 }, { "epoch": 3.0584591560201315, "eval_loss": 0.41133585572242737, "eval_runtime": 155.21, "eval_samples_per_second": 36.441, "eval_steps_per_second": 4.555, "eval_wer": 0.31092423488629617, "step": 15800 }, { "epoch": 3.077816492450639, "eval_loss": 0.4191639721393585, "eval_runtime": 155.276, "eval_samples_per_second": 36.425, "eval_steps_per_second": 4.553, "eval_wer": 0.30851695527274475, "step": 15900 }, { "epoch": 3.097173828881146, "grad_norm": 0.5114701390266418, "learning_rate": 6.176923076923076e-05, "loss": 0.2754, "step": 16000 }, { "epoch": 3.097173828881146, "eval_loss": 0.4161028265953064, "eval_runtime": 154.597, "eval_samples_per_second": 36.585, "eval_steps_per_second": 4.573, "eval_wer": 0.30901445972621205, "step": 16000 }, { "epoch": 3.116531165311653, "eval_loss": 0.4183988571166992, "eval_runtime": 155.0124, "eval_samples_per_second": 36.487, "eval_steps_per_second": 4.561, "eval_wer": 0.307152830158399, "step": 16100 }, { "epoch": 3.1358885017421603, "eval_loss": 0.4186756908893585, "eval_runtime": 154.8535, "eval_samples_per_second": 36.525, "eval_steps_per_second": 4.566, "eval_wer": 0.3060936271284364, "step": 16200 }, { "epoch": 3.1552458381726676, "eval_loss": 0.4193824827671051, "eval_runtime": 154.3195, "eval_samples_per_second": 36.651, "eval_steps_per_second": 4.581, "eval_wer": 0.3059652388823803, "step": 16300 }, { "epoch": 3.1746031746031744, "eval_loss": 0.40788766741752625, "eval_runtime": 154.9673, "eval_samples_per_second": 36.498, "eval_steps_per_second": 4.562, "eval_wer": 0.3038949784147261, "step": 16400 }, { "epoch": 3.1939605110336817, "grad_norm": 0.5594165325164795, "learning_rate": 5.4076923076923074e-05, "loss": 0.2802, "step": 16500 }, { "epoch": 3.1939605110336817, "eval_loss": 0.41461309790611267, "eval_runtime": 154.8662, "eval_samples_per_second": 36.522, "eval_steps_per_second": 4.565, "eval_wer": 0.30424804609138034, "step": 16500 }, { "epoch": 3.213317847464189, "eval_loss": 0.4168522357940674, "eval_runtime": 155.0374, "eval_samples_per_second": 36.482, "eval_steps_per_second": 4.56, "eval_wer": 0.30116672818603457, "step": 16600 }, { "epoch": 3.2326751838946963, "eval_loss": 0.40926745533943176, "eval_runtime": 154.7423, "eval_samples_per_second": 36.551, "eval_steps_per_second": 4.569, "eval_wer": 0.3023864165235673, "step": 16700 }, { "epoch": 3.252032520325203, "eval_loss": 0.4115259051322937, "eval_runtime": 154.7933, "eval_samples_per_second": 36.539, "eval_steps_per_second": 4.567, "eval_wer": 0.3005408354865112, "step": 16800 }, { "epoch": 3.2713898567557105, "eval_loss": 0.40197211503982544, "eval_runtime": 155.5964, "eval_samples_per_second": 36.35, "eval_steps_per_second": 4.544, "eval_wer": 0.30410360931456726, "step": 16900 }, { "epoch": 3.290747193186218, "grad_norm": 1.4730154275894165, "learning_rate": 4.6384615384615385e-05, "loss": 0.2723, "step": 17000 }, { "epoch": 3.290747193186218, "eval_loss": 0.4058869779109955, "eval_runtime": 155.0898, "eval_samples_per_second": 36.469, "eval_steps_per_second": 4.559, "eval_wer": 0.30442457992970745, "step": 17000 }, { "epoch": 3.3101045296167246, "eval_loss": 0.40676185488700867, "eval_runtime": 155.0576, "eval_samples_per_second": 36.477, "eval_steps_per_second": 4.56, "eval_wer": 0.3013753590858757, "step": 17100 }, { "epoch": 3.329461866047232, "eval_loss": 0.40653425455093384, "eval_runtime": 155.8377, "eval_samples_per_second": 36.294, "eval_steps_per_second": 4.537, "eval_wer": 0.30878978029561394, "step": 17200 }, { "epoch": 3.3488192024777392, "eval_loss": 0.4082197844982147, "eval_runtime": 155.7924, "eval_samples_per_second": 36.305, "eval_steps_per_second": 4.538, "eval_wer": 0.3010543884707355, "step": 17300 }, { "epoch": 3.368176538908246, "eval_loss": 0.4083554446697235, "eval_runtime": 155.6775, "eval_samples_per_second": 36.332, "eval_steps_per_second": 4.541, "eval_wer": 0.3007494663863523, "step": 17400 }, { "epoch": 3.3875338753387534, "grad_norm": 0.5211097598075867, "learning_rate": 3.87076923076923e-05, "loss": 0.2557, "step": 17500 }, { "epoch": 3.3875338753387534, "eval_loss": 0.4009736180305481, "eval_runtime": 155.105, "eval_samples_per_second": 36.466, "eval_steps_per_second": 4.558, "eval_wer": 0.29924090449519347, "step": 17500 }, { "epoch": 3.4068912117692607, "eval_loss": 0.4061805009841919, "eval_runtime": 154.8792, "eval_samples_per_second": 36.519, "eval_steps_per_second": 4.565, "eval_wer": 0.2999951854407729, "step": 17600 }, { "epoch": 3.4262485481997675, "eval_loss": 0.40264037251472473, "eval_runtime": 155.9957, "eval_samples_per_second": 36.257, "eval_steps_per_second": 4.532, "eval_wer": 0.2980533132191748, "step": 17700 }, { "epoch": 3.445605884630275, "eval_loss": 0.40035372972488403, "eval_runtime": 155.0928, "eval_samples_per_second": 36.468, "eval_steps_per_second": 4.559, "eval_wer": 0.29893598241081026, "step": 17800 }, { "epoch": 3.464963221060782, "eval_loss": 0.40443336963653564, "eval_runtime": 154.9305, "eval_samples_per_second": 36.507, "eval_steps_per_second": 4.563, "eval_wer": 0.29906437065686636, "step": 17900 }, { "epoch": 3.484320557491289, "grad_norm": 0.7458967566490173, "learning_rate": 3.101538461538461e-05, "loss": 0.2578, "step": 18000 }, { "epoch": 3.484320557491289, "eval_loss": 0.4003549814224243, "eval_runtime": 155.7394, "eval_samples_per_second": 36.317, "eval_steps_per_second": 4.54, "eval_wer": 0.29660894545104394, "step": 18000 }, { "epoch": 3.5036778939217963, "eval_loss": 0.40592488646507263, "eval_runtime": 159.2644, "eval_samples_per_second": 35.513, "eval_steps_per_second": 4.439, "eval_wer": 0.29449053939111874, "step": 18100 }, { "epoch": 3.5230352303523036, "eval_loss": 0.4014962613582611, "eval_runtime": 155.6654, "eval_samples_per_second": 36.334, "eval_steps_per_second": 4.542, "eval_wer": 0.29632007189741777, "step": 18200 }, { "epoch": 3.5423925667828104, "eval_loss": 0.396659791469574, "eval_runtime": 156.1536, "eval_samples_per_second": 36.221, "eval_steps_per_second": 4.528, "eval_wer": 0.29585466450546455, "step": 18300 }, { "epoch": 3.5617499032133177, "eval_loss": 0.4001907706260681, "eval_runtime": 155.7578, "eval_samples_per_second": 36.313, "eval_steps_per_second": 4.539, "eval_wer": 0.29412142318370754, "step": 18400 }, { "epoch": 3.581107239643825, "grad_norm": 0.6122294664382935, "learning_rate": 2.3338461538461535e-05, "loss": 0.2508, "step": 18500 }, { "epoch": 3.581107239643825, "eval_loss": 0.39826107025146484, "eval_runtime": 155.467, "eval_samples_per_second": 36.381, "eval_steps_per_second": 4.548, "eval_wer": 0.2945547335141468, "step": 18500 }, { "epoch": 3.600464576074332, "eval_loss": 0.3958674967288971, "eval_runtime": 155.8242, "eval_samples_per_second": 36.297, "eval_steps_per_second": 4.537, "eval_wer": 0.29365601579175427, "step": 18600 }, { "epoch": 3.619821912504839, "eval_loss": 0.3970955014228821, "eval_runtime": 155.4329, "eval_samples_per_second": 36.389, "eval_steps_per_second": 4.549, "eval_wer": 0.2942016658374926, "step": 18700 }, { "epoch": 3.6391792489353465, "eval_loss": 0.3906669616699219, "eval_runtime": 155.4929, "eval_samples_per_second": 36.375, "eval_steps_per_second": 4.547, "eval_wer": 0.2923239877389225, "step": 18800 }, { "epoch": 3.658536585365854, "eval_loss": 0.39506247639656067, "eval_runtime": 155.5246, "eval_samples_per_second": 36.367, "eval_steps_per_second": 4.546, "eval_wer": 0.2903981640480814, "step": 18900 }, { "epoch": 3.6778939217963607, "grad_norm": 0.33715635538101196, "learning_rate": 1.5646153846153846e-05, "loss": 0.2659, "step": 19000 }, { "epoch": 3.6778939217963607, "eval_loss": 0.3892674744129181, "eval_runtime": 155.5533, "eval_samples_per_second": 36.361, "eval_steps_per_second": 4.545, "eval_wer": 0.29309431721525897, "step": 19000 }, { "epoch": 3.697251258226868, "eval_loss": 0.39077267050743103, "eval_runtime": 155.448, "eval_samples_per_second": 36.385, "eval_steps_per_second": 4.548, "eval_wer": 0.2900771934329412, "step": 19100 }, { "epoch": 3.7166085946573753, "eval_loss": 0.39407432079315186, "eval_runtime": 155.4696, "eval_samples_per_second": 36.38, "eval_steps_per_second": 4.548, "eval_wer": 0.2884241947649693, "step": 19200 }, { "epoch": 3.7359659310878826, "eval_loss": 0.3924821615219116, "eval_runtime": 155.4791, "eval_samples_per_second": 36.378, "eval_steps_per_second": 4.547, "eval_wer": 0.2890019418722216, "step": 19300 }, { "epoch": 3.7553232675183894, "eval_loss": 0.3916691243648529, "eval_runtime": 155.9516, "eval_samples_per_second": 36.268, "eval_steps_per_second": 4.533, "eval_wer": 0.2892908154258478, "step": 19400 }, { "epoch": 3.7746806039488967, "grad_norm": 0.4647356867790222, "learning_rate": 7.953846153846153e-06, "loss": 0.2488, "step": 19500 }, { "epoch": 3.7746806039488967, "eval_loss": 0.39043277502059937, "eval_runtime": 155.3552, "eval_samples_per_second": 36.407, "eval_steps_per_second": 4.551, "eval_wer": 0.2884562918264833, "step": 19500 }, { "epoch": 3.794037940379404, "eval_loss": 0.39014604687690735, "eval_runtime": 155.2137, "eval_samples_per_second": 36.44, "eval_steps_per_second": 4.555, "eval_wer": 0.2887933109723805, "step": 19600 }, { "epoch": 3.813395276809911, "eval_loss": 0.3883425295352936, "eval_runtime": 155.5369, "eval_samples_per_second": 36.364, "eval_steps_per_second": 4.546, "eval_wer": 0.28922662130281973, "step": 19700 }, { "epoch": 3.832752613240418, "eval_loss": 0.38913780450820923, "eval_runtime": 155.8958, "eval_samples_per_second": 36.281, "eval_steps_per_second": 4.535, "eval_wer": 0.28903403893373564, "step": 19800 }, { "epoch": 3.8521099496709255, "eval_loss": 0.3888201415538788, "eval_runtime": 155.6372, "eval_samples_per_second": 36.341, "eval_steps_per_second": 4.543, "eval_wer": 0.2888254080338945, "step": 19900 }, { "epoch": 3.8714672861014323, "grad_norm": 0.3741956055164337, "learning_rate": 2.615384615384615e-07, "loss": 0.2602, "step": 20000 }, { "epoch": 3.8714672861014323, "eval_loss": 0.38884833455085754, "eval_runtime": 155.0772, "eval_samples_per_second": 36.472, "eval_steps_per_second": 4.559, "eval_wer": 0.2884883888879973, "step": 20000 }, { "epoch": 3.8714672861014323, "step": 20000, "total_flos": 2.249387574100498e+19, "train_loss": 0.15996522521972656, "train_runtime": 19346.8732, "train_samples_per_second": 8.27, "train_steps_per_second": 1.034 } ], "logging_steps": 500, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "total_flos": 2.249387574100498e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }