diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,2109 +1,3149 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 3.8714672861014323, + "epoch": 5.807200929152149, "eval_steps": 100, - "global_step": 20000, + "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019357336430507164, - "eval_loss": 3.5565404891967773, - "eval_runtime": 151.5266, - "eval_samples_per_second": 37.327, - "eval_steps_per_second": 4.666, + "eval_loss": 3.567659854888916, + "eval_runtime": 160.9588, + "eval_samples_per_second": 35.139, + "eval_steps_per_second": 4.392, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.03871467286101433, - "eval_loss": 3.0301756858825684, - "eval_runtime": 150.582, - "eval_samples_per_second": 37.561, - "eval_steps_per_second": 4.695, + "eval_loss": 3.0471677780151367, + "eval_runtime": 158.7973, + "eval_samples_per_second": 35.618, + "eval_steps_per_second": 4.452, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.05807200929152149, - "eval_loss": 2.9460911750793457, - "eval_runtime": 148.9065, - "eval_samples_per_second": 37.984, - "eval_steps_per_second": 4.748, + "eval_loss": 2.9665186405181885, + "eval_runtime": 159.308, + "eval_samples_per_second": 35.504, + "eval_steps_per_second": 4.438, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.07742934572202866, - "eval_loss": 1.8142520189285278, - "eval_runtime": 149.8655, - "eval_samples_per_second": 37.741, - "eval_steps_per_second": 4.718, - "eval_wer": 0.940732775914365, + "eval_loss": 2.464332103729248, + "eval_runtime": 159.8297, + "eval_samples_per_second": 35.388, + "eval_steps_per_second": 4.423, + "eval_wer": 0.9813195101988413, "step": 400 }, { "epoch": 0.09678668215253582, - "grad_norm": 3.132490396499634, + "grad_norm": 6.005111217498779, "learning_rate": 0.00029759999999999997, - "loss": 3.9521, + "loss": 4.1279, "step": 500 }, { "epoch": 0.09678668215253582, - "eval_loss": 1.4195518493652344, - "eval_runtime": 150.5171, - "eval_samples_per_second": 37.577, - "eval_steps_per_second": 4.697, - "eval_wer": 0.8693007655149171, + "eval_loss": 1.625333547592163, + "eval_runtime": 160.6655, + "eval_samples_per_second": 35.204, + "eval_steps_per_second": 4.4, + "eval_wer": 0.9345380430421595, "step": 500 }, { "epoch": 0.11614401858304298, - "eval_loss": 1.16689133644104, - "eval_runtime": 150.5387, - "eval_samples_per_second": 37.572, - "eval_steps_per_second": 4.696, - "eval_wer": 0.8055239042865626, + "eval_loss": 1.24808931350708, + "eval_runtime": 160.4402, + "eval_samples_per_second": 35.253, + "eval_steps_per_second": 4.407, + "eval_wer": 0.8190528157147213, "step": 600 }, { "epoch": 0.13550135501355012, - "eval_loss": 1.0756505727767944, - "eval_runtime": 151.2385, - "eval_samples_per_second": 37.398, - "eval_steps_per_second": 4.675, - "eval_wer": 0.7596251063215163, + "eval_loss": 1.0997203588485718, + "eval_runtime": 161.1872, + "eval_samples_per_second": 35.09, + "eval_steps_per_second": 4.386, + "eval_wer": 0.7769735680698432, "step": 700 }, { "epoch": 0.1548586914440573, - "eval_loss": 0.9944618344306946, - "eval_runtime": 151.1646, - "eval_samples_per_second": 37.416, - "eval_steps_per_second": 4.677, - "eval_wer": 0.7223925149652549, + "eval_loss": 1.0475263595581055, + "eval_runtime": 161.0626, + "eval_samples_per_second": 35.117, + "eval_steps_per_second": 4.39, + "eval_wer": 0.7339795541718156, "step": 800 }, { "epoch": 0.17421602787456447, - "eval_loss": 0.9381263256072998, - "eval_runtime": 151.6289, - "eval_samples_per_second": 37.302, - "eval_steps_per_second": 4.663, - "eval_wer": 0.6870857472998347, + "eval_loss": 0.9692754149436951, + "eval_runtime": 161.3683, + "eval_samples_per_second": 35.05, + "eval_steps_per_second": 4.381, + "eval_wer": 0.7012565999582738, "step": 900 }, { "epoch": 0.19357336430507163, - "grad_norm": 7.335289001464844, - "learning_rate": 0.0002844, - "loss": 1.0266, + "grad_norm": 2.414348602294922, + "learning_rate": 0.0002949762711864406, + "loss": 1.0598, "step": 1000 }, { "epoch": 0.19357336430507163, - "eval_loss": 0.8977694511413574, - "eval_runtime": 156.0202, - "eval_samples_per_second": 36.252, - "eval_steps_per_second": 4.531, - "eval_wer": 0.661472292211648, + "eval_loss": 0.911480724811554, + "eval_runtime": 162.0835, + "eval_samples_per_second": 34.896, + "eval_steps_per_second": 4.362, + "eval_wer": 0.6749049124552647, "step": 1000 }, { "epoch": 0.2129307007355788, - "eval_loss": 0.8770694136619568, - "eval_runtime": 151.6589, - "eval_samples_per_second": 37.294, - "eval_steps_per_second": 4.662, - "eval_wer": 0.6450385967164706, + "eval_loss": 0.8823792338371277, + "eval_runtime": 161.5426, + "eval_samples_per_second": 35.012, + "eval_steps_per_second": 4.377, + "eval_wer": 0.65625651971562, "step": 1100 }, { "epoch": 0.23228803716608595, - "eval_loss": 0.851553201675415, - "eval_runtime": 151.5945, - "eval_samples_per_second": 37.31, - "eval_steps_per_second": 4.664, - "eval_wer": 0.640432668389209, + "eval_loss": 0.8609552383422852, + "eval_runtime": 161.6778, + "eval_samples_per_second": 34.983, + "eval_steps_per_second": 4.373, + "eval_wer": 0.6431288215563865, "step": 1200 }, { "epoch": 0.2516453735965931, - "eval_loss": 0.8273979425430298, - "eval_runtime": 151.4524, - "eval_samples_per_second": 37.345, - "eval_steps_per_second": 4.668, - "eval_wer": 0.6138081558633307, + "eval_loss": 0.8330459594726562, + "eval_runtime": 161.7974, + "eval_samples_per_second": 34.957, + "eval_steps_per_second": 4.37, + "eval_wer": 0.6114169247805363, "step": 1300 }, { "epoch": 0.27100271002710025, - "eval_loss": 0.7992698550224304, - "eval_runtime": 152.8076, - "eval_samples_per_second": 37.014, - "eval_steps_per_second": 4.627, - "eval_wer": 0.596973247099228, + "eval_loss": 0.8172780871391296, + "eval_runtime": 161.4977, + "eval_samples_per_second": 35.022, + "eval_steps_per_second": 4.378, + "eval_wer": 0.6017396607340598, "step": 1400 }, { "epoch": 0.29036004645760743, - "grad_norm": 4.0737223625183105, - "learning_rate": 0.00026861052631578947, - "loss": 0.8454, + "grad_norm": 4.460846424102783, + "learning_rate": 0.0002898915254237288, + "loss": 0.8546, "step": 1500 }, { "epoch": 0.29036004645760743, - "eval_loss": 0.7768516540527344, - "eval_runtime": 152.3743, - "eval_samples_per_second": 37.119, - "eval_steps_per_second": 4.64, - "eval_wer": 0.5887563993516394, + "eval_loss": 0.8102588653564453, + "eval_runtime": 161.397, + "eval_samples_per_second": 35.044, + "eval_steps_per_second": 4.381, + "eval_wer": 0.6139204955786298, "step": 1500 }, { "epoch": 0.3097173828881146, - "eval_loss": 0.7664207220077515, - "eval_runtime": 154.3668, - "eval_samples_per_second": 36.64, - "eval_steps_per_second": 4.58, - "eval_wer": 0.5997977885124617, + "eval_loss": 0.7860382795333862, + "eval_runtime": 162.0586, + "eval_samples_per_second": 34.901, + "eval_steps_per_second": 4.363, + "eval_wer": 0.6077739082986953, "step": 1600 }, { "epoch": 0.32907471931862176, - "eval_loss": 0.7400562763214111, - "eval_runtime": 153.7228, - "eval_samples_per_second": 36.793, - "eval_steps_per_second": 4.599, - "eval_wer": 0.5592110542279854, + "eval_loss": 0.857550323009491, + "eval_runtime": 161.2906, + "eval_samples_per_second": 35.067, + "eval_steps_per_second": 4.383, + "eval_wer": 0.5990114105053682, "step": 1700 }, { "epoch": 0.34843205574912894, - "eval_loss": 0.746478796005249, - "eval_runtime": 151.7535, - "eval_samples_per_second": 37.271, - "eval_steps_per_second": 4.659, - "eval_wer": 0.5650206223620228, + "eval_loss": 0.7555657029151917, + "eval_runtime": 161.7718, + "eval_samples_per_second": 34.963, + "eval_steps_per_second": 4.37, + "eval_wer": 0.5773298454526488, "step": 1800 }, { "epoch": 0.3677893921796361, - "eval_loss": 0.7252949476242065, - "eval_runtime": 151.7548, - "eval_samples_per_second": 37.271, - "eval_steps_per_second": 4.659, - "eval_wer": 0.5791272808974338, + "eval_loss": 0.7365372180938721, + "eval_runtime": 162.1133, + "eval_samples_per_second": 34.889, + "eval_steps_per_second": 4.361, + "eval_wer": 0.5825777150101908, "step": 1900 }, { "epoch": 0.38714672861014326, - "grad_norm": 2.4802448749542236, - "learning_rate": 0.0002528210526315789, - "loss": 0.7537, + "grad_norm": 3.4646999835968018, + "learning_rate": 0.0002848067796610169, + "loss": 0.7776, "step": 2000 }, { "epoch": 0.38714672861014326, - "eval_loss": 0.7039346098899841, - "eval_runtime": 152.7969, - "eval_samples_per_second": 37.016, - "eval_steps_per_second": 4.627, - "eval_wer": 0.5343518800853782, + "eval_loss": 0.7291606068611145, + "eval_runtime": 162.354, + "eval_samples_per_second": 34.837, + "eval_steps_per_second": 4.355, + "eval_wer": 0.5551989215387332, "step": 2000 }, { "epoch": 0.4065040650406504, - "eval_loss": 0.6932350397109985, - "eval_runtime": 152.4406, - "eval_samples_per_second": 37.103, - "eval_steps_per_second": 4.638, - "eval_wer": 0.5168429330294811, + "eval_loss": 0.716595470905304, + "eval_runtime": 162.461, + "eval_samples_per_second": 34.815, + "eval_steps_per_second": 4.352, + "eval_wer": 0.5385726436744716, "step": 2100 }, { "epoch": 0.4258614014711576, - "eval_loss": 0.696869432926178, - "eval_runtime": 153.0527, - "eval_samples_per_second": 36.955, - "eval_steps_per_second": 4.619, - "eval_wer": 0.5364381890837894, + "eval_loss": 0.7117305397987366, + "eval_runtime": 161.9033, + "eval_samples_per_second": 34.934, + "eval_steps_per_second": 4.367, + "eval_wer": 0.5401774967501726, "step": 2200 }, { "epoch": 0.4452187379016647, - "eval_loss": 0.6781283617019653, - "eval_runtime": 152.1378, - "eval_samples_per_second": 37.177, - "eval_steps_per_second": 4.647, - "eval_wer": 0.5173725345444624, + "eval_loss": 0.7060667872428894, + "eval_runtime": 162.0901, + "eval_samples_per_second": 34.894, + "eval_steps_per_second": 4.362, + "eval_wer": 0.5388294201665838, "step": 2300 }, { "epoch": 0.4645760743321719, - "eval_loss": 0.6760829091072083, - "eval_runtime": 151.9712, - "eval_samples_per_second": 37.218, - "eval_steps_per_second": 4.652, - "eval_wer": 0.5050312143923223, + "eval_loss": 0.7044907212257385, + "eval_runtime": 162.5576, + "eval_samples_per_second": 34.794, + "eval_steps_per_second": 4.349, + "eval_wer": 0.5364060920222754, "step": 2400 }, { "epoch": 0.48393341076267904, - "grad_norm": 3.791292667388916, - "learning_rate": 0.0002370315789473684, - "loss": 0.681, + "grad_norm": 2.70296573638916, + "learning_rate": 0.00027972203389830505, + "loss": 0.706, "step": 2500 }, { "epoch": 0.48393341076267904, - "eval_loss": 0.6720712780952454, - "eval_runtime": 152.2414, - "eval_samples_per_second": 37.152, - "eval_steps_per_second": 4.644, - "eval_wer": 0.528718845789668, + "eval_loss": 0.7062936425209045, + "eval_runtime": 162.4753, + "eval_samples_per_second": 34.811, + "eval_steps_per_second": 4.351, + "eval_wer": 0.5428736499173501, "step": 2500 }, { "epoch": 0.5032907471931862, - "eval_loss": 0.6598270535469055, - "eval_runtime": 151.7192, - "eval_samples_per_second": 37.279, - "eval_steps_per_second": 4.66, - "eval_wer": 0.5195069891351447, + "eval_loss": 0.6941363215446472, + "eval_runtime": 162.6699, + "eval_samples_per_second": 34.77, + "eval_steps_per_second": 4.346, + "eval_wer": 0.5433872029015744, "step": 2600 }, { "epoch": 0.5226480836236934, - "eval_loss": 0.6555168628692627, - "eval_runtime": 152.5678, - "eval_samples_per_second": 37.072, - "eval_steps_per_second": 4.634, - "eval_wer": 0.4975846961210701, + "eval_loss": 0.6840428113937378, + "eval_runtime": 162.5617, + "eval_samples_per_second": 34.793, + "eval_steps_per_second": 4.349, + "eval_wer": 0.5203094156729952, "step": 2700 }, { "epoch": 0.5420054200542005, - "eval_loss": 0.6535276770591736, - "eval_runtime": 152.5246, - "eval_samples_per_second": 37.083, - "eval_steps_per_second": 4.635, - "eval_wer": 0.49936608303509816, + "eval_loss": 0.6902298331260681, + "eval_runtime": 162.7532, + "eval_samples_per_second": 34.752, + "eval_steps_per_second": 4.344, + "eval_wer": 0.5593715395355555, "step": 2800 }, { "epoch": 0.5613627564847077, - "eval_loss": 0.6258506178855896, - "eval_runtime": 151.843, - "eval_samples_per_second": 37.249, - "eval_steps_per_second": 4.656, - "eval_wer": 0.48192133010222915, + "eval_loss": 0.6594961881637573, + "eval_runtime": 163.1259, + "eval_samples_per_second": 34.673, + "eval_steps_per_second": 4.334, + "eval_wer": 0.5149171093386401, "step": 2900 }, { "epoch": 0.5807200929152149, - "grad_norm": 9.4619779586792, - "learning_rate": 0.00022124210526315786, - "loss": 0.6737, + "grad_norm": 4.962900161743164, + "learning_rate": 0.0002746372881355932, + "loss": 0.7002, "step": 3000 }, { "epoch": 0.5807200929152149, - "eval_loss": 0.629943311214447, - "eval_runtime": 151.8389, - "eval_samples_per_second": 37.25, - "eval_steps_per_second": 4.656, - "eval_wer": 0.48022018584198617, + "eval_loss": 0.6767885088920593, + "eval_runtime": 162.9945, + "eval_samples_per_second": 34.701, + "eval_steps_per_second": 4.338, + "eval_wer": 0.525284460207668, "step": 3000 }, { "epoch": 0.6000774293457221, - "eval_loss": 0.6378594636917114, - "eval_runtime": 151.6255, - "eval_samples_per_second": 37.302, - "eval_steps_per_second": 4.663, - "eval_wer": 0.4893197027812104, + "eval_loss": 0.6656874418258667, + "eval_runtime": 163.0572, + "eval_samples_per_second": 34.687, + "eval_steps_per_second": 4.336, + "eval_wer": 0.5063953395066682, "step": 3100 }, { "epoch": 0.6194347657762292, - "eval_loss": 0.6225672364234924, - "eval_runtime": 153.0144, - "eval_samples_per_second": 36.964, - "eval_steps_per_second": 4.62, - "eval_wer": 0.4806053505801544, + "eval_loss": 0.6758668422698975, + "eval_runtime": 163.796, + "eval_samples_per_second": 34.531, + "eval_steps_per_second": 4.316, + "eval_wer": 0.5409478262265089, "step": 3200 }, { "epoch": 0.6387921022067363, - "eval_loss": 0.6088670492172241, - "eval_runtime": 152.2222, - "eval_samples_per_second": 37.156, - "eval_steps_per_second": 4.645, - "eval_wer": 0.4627112387860891, + "eval_loss": 0.6709346175193787, + "eval_runtime": 162.7448, + "eval_samples_per_second": 34.754, + "eval_steps_per_second": 4.344, + "eval_wer": 0.5090914926738457, "step": 3300 }, { "epoch": 0.6581494386372435, - "eval_loss": 0.6028585433959961, - "eval_runtime": 153.0615, - "eval_samples_per_second": 36.952, - "eval_steps_per_second": 4.619, - "eval_wer": 0.47354399704707034, + "eval_loss": 0.6478992104530334, + "eval_runtime": 163.2374, + "eval_samples_per_second": 34.649, + "eval_steps_per_second": 4.331, + "eval_wer": 0.5037473319317617, "step": 3400 }, { "epoch": 0.6775067750677507, - "grad_norm": 3.4705822467803955, - "learning_rate": 0.00020545263157894736, - "loss": 0.6419, + "grad_norm": 3.27418851852417, + "learning_rate": 0.0002695525423728813, + "loss": 0.685, "step": 3500 }, { "epoch": 0.6775067750677507, - "eval_loss": 0.5871421694755554, - "eval_runtime": 152.5739, - "eval_samples_per_second": 37.071, - "eval_steps_per_second": 4.634, - "eval_wer": 0.4592126590810611, + "eval_loss": 0.6378278136253357, + "eval_runtime": 162.9066, + "eval_samples_per_second": 34.719, + "eval_steps_per_second": 4.34, + "eval_wer": 0.5033782157243505, "step": 3500 }, { "epoch": 0.6968641114982579, - "eval_loss": 0.6001027226448059, - "eval_runtime": 152.1697, - "eval_samples_per_second": 37.169, - "eval_steps_per_second": 4.646, - "eval_wer": 0.4610742886488742, + "eval_loss": 0.6492822170257568, + "eval_runtime": 162.8688, + "eval_samples_per_second": 34.727, + "eval_steps_per_second": 4.341, + "eval_wer": 0.49883648152011684, "step": 3600 }, { "epoch": 0.716221447928765, - "eval_loss": 0.5848923921585083, - "eval_runtime": 152.6563, - "eval_samples_per_second": 37.051, - "eval_steps_per_second": 4.631, - "eval_wer": 0.4472565036670893, + "eval_loss": 0.6340391635894775, + "eval_runtime": 163.0198, + "eval_samples_per_second": 34.695, + "eval_steps_per_second": 4.337, + "eval_wer": 0.4832694066858179, "step": 3700 }, { "epoch": 0.7355787843592722, - "eval_loss": 0.5923960208892822, - "eval_runtime": 152.6559, - "eval_samples_per_second": 37.051, - "eval_steps_per_second": 4.631, - "eval_wer": 0.46377044181605176, + "eval_loss": 0.6226627826690674, + "eval_runtime": 164.0506, + "eval_samples_per_second": 34.477, + "eval_steps_per_second": 4.31, + "eval_wer": 0.47354399704707034, "step": 3800 }, { "epoch": 0.7549361207897793, - "eval_loss": 0.5767965316772461, - "eval_runtime": 152.1652, - "eval_samples_per_second": 37.17, - "eval_steps_per_second": 4.646, - "eval_wer": 0.4584904751969957, + "eval_loss": 0.6257476210594177, + "eval_runtime": 167.0907, + "eval_samples_per_second": 33.85, + "eval_steps_per_second": 4.231, + "eval_wer": 0.49068382789555615, "step": 3900 }, { "epoch": 0.7742934572202865, - "grad_norm": 3.628082275390625, - "learning_rate": 0.00018966315789473683, - "loss": 0.6183, + "grad_norm": 5.494376182556152, + "learning_rate": 0.0002644677966101695, + "loss": 0.6655, "step": 4000 }, { "epoch": 0.7742934572202865, - "eval_loss": 0.5672534704208374, - "eval_runtime": 152.4329, - "eval_samples_per_second": 37.105, - "eval_steps_per_second": 4.638, - "eval_wer": 0.44531463144549116, + "eval_loss": 0.6420141458511353, + "eval_runtime": 163.4141, + "eval_samples_per_second": 34.611, + "eval_steps_per_second": 4.326, + "eval_wer": 0.49987963601932245, "step": 4000 }, { "epoch": 0.7936507936507936, - "eval_loss": 0.5575382113456726, - "eval_runtime": 152.2388, - "eval_samples_per_second": 37.152, - "eval_steps_per_second": 4.644, - "eval_wer": 0.4451862431994351, + "eval_loss": 0.6111469268798828, + "eval_runtime": 163.5321, + "eval_samples_per_second": 34.586, + "eval_steps_per_second": 4.323, + "eval_wer": 0.4790646916274815, "step": 4100 }, { "epoch": 0.8130081300813008, - "eval_loss": 0.5631808042526245, - "eval_runtime": 152.7545, - "eval_samples_per_second": 37.027, - "eval_steps_per_second": 4.628, - "eval_wer": 0.4474972316284444, + "eval_loss": 0.6136205196380615, + "eval_runtime": 163.9442, + "eval_samples_per_second": 34.5, + "eval_steps_per_second": 4.312, + "eval_wer": 0.48073373882621045, "step": 4200 }, { "epoch": 0.832365466511808, - "eval_loss": 0.5498641729354858, - "eval_runtime": 153.7788, - "eval_samples_per_second": 36.78, - "eval_steps_per_second": 4.598, - "eval_wer": 0.44008281041870617, + "eval_loss": 0.6218396425247192, + "eval_runtime": 163.3391, + "eval_samples_per_second": 34.627, + "eval_steps_per_second": 4.328, + "eval_wer": 0.48596555985299544, "step": 4300 }, { "epoch": 0.8517228029423152, - "eval_loss": 0.5662574172019958, - "eval_runtime": 152.5034, - "eval_samples_per_second": 37.088, - "eval_steps_per_second": 4.636, - "eval_wer": 0.43101539054099597, + "eval_loss": 0.6084252595901489, + "eval_runtime": 162.8945, + "eval_samples_per_second": 34.722, + "eval_steps_per_second": 4.34, + "eval_wer": 0.4585386207892667, "step": 4400 }, { "epoch": 0.8710801393728222, - "grad_norm": 2.376349925994873, - "learning_rate": 0.0001738736842105263, - "loss": 0.5877, + "grad_norm": 3.0379676818847656, + "learning_rate": 0.0002593830508474576, + "loss": 0.6431, "step": 4500 }, { "epoch": 0.8710801393728222, - "eval_loss": 0.5584732294082642, - "eval_runtime": 152.1714, - "eval_samples_per_second": 37.169, - "eval_steps_per_second": 4.646, - "eval_wer": 0.4317215258943044, + "eval_loss": 0.6008957624435425, + "eval_runtime": 163.8125, + "eval_samples_per_second": 34.527, + "eval_steps_per_second": 4.316, + "eval_wer": 0.4627593843783602, "step": 4500 }, { "epoch": 0.8904374758033294, - "eval_loss": 0.5463821291923523, - "eval_runtime": 152.4923, - "eval_samples_per_second": 37.09, - "eval_steps_per_second": 4.636, - "eval_wer": 0.41997400138017366, + "eval_loss": 0.6009930968284607, + "eval_runtime": 163.1039, + "eval_samples_per_second": 34.677, + "eval_steps_per_second": 4.335, + "eval_wer": 0.46295196674744427, "step": 4600 }, { "epoch": 0.9097948122338366, - "eval_loss": 0.5381494164466858, - "eval_runtime": 153.2139, - "eval_samples_per_second": 36.916, - "eval_steps_per_second": 4.614, - "eval_wer": 0.4192197204345942, + "eval_loss": 0.5823432207107544, + "eval_runtime": 163.3804, + "eval_samples_per_second": 34.619, + "eval_steps_per_second": 4.327, + "eval_wer": 0.45035387010319206, "step": 4700 }, { "epoch": 0.9291521486643438, - "eval_loss": 0.5453722476959229, - "eval_runtime": 151.9737, - "eval_samples_per_second": 37.217, - "eval_steps_per_second": 4.652, - "eval_wer": 0.4201986808107718, + "eval_loss": 0.6118789315223694, + "eval_runtime": 163.7255, + "eval_samples_per_second": 34.546, + "eval_steps_per_second": 4.318, + "eval_wer": 0.4630001123397153, "step": 4800 }, { "epoch": 0.948509485094851, - "eval_loss": 0.5237515568733215, - "eval_runtime": 151.8558, - "eval_samples_per_second": 37.246, - "eval_steps_per_second": 4.656, - "eval_wer": 0.41241514339362234, + "eval_loss": 0.6001989245414734, + "eval_runtime": 163.3492, + "eval_samples_per_second": 34.625, + "eval_steps_per_second": 4.328, + "eval_wer": 0.4600150856189116, "step": 4900 }, { "epoch": 0.9678668215253581, - "grad_norm": 2.5489518642425537, - "learning_rate": 0.0001581157894736842, - "loss": 0.5621, + "grad_norm": 3.1605985164642334, + "learning_rate": 0.00025430847457627115, + "loss": 0.6235, "step": 5000 }, { "epoch": 0.9678668215253581, - "eval_loss": 0.5303541421890259, - "eval_runtime": 152.515, - "eval_samples_per_second": 37.085, - "eval_steps_per_second": 4.636, - "eval_wer": 0.41353854054661293, + "eval_loss": 0.5892329216003418, + "eval_runtime": 163.5255, + "eval_samples_per_second": 34.588, + "eval_steps_per_second": 4.323, + "eval_wer": 0.4551844778610518, "step": 5000 }, { "epoch": 0.9872241579558653, - "eval_loss": 0.5163344740867615, - "eval_runtime": 156.7945, - "eval_samples_per_second": 36.073, - "eval_steps_per_second": 4.509, - "eval_wer": 0.4061080708061177, + "eval_loss": 0.5673592686653137, + "eval_runtime": 163.343, + "eval_samples_per_second": 34.627, + "eval_steps_per_second": 4.328, + "eval_wer": 0.44889345380430423, "step": 5100 }, { "epoch": 1.0065814943863725, - "eval_loss": 0.51596599817276, - "eval_runtime": 153.2891, - "eval_samples_per_second": 36.898, - "eval_steps_per_second": 4.612, - "eval_wer": 0.39927139670363176, + "eval_loss": 0.5792257785797119, + "eval_runtime": 162.8592, + "eval_samples_per_second": 34.729, + "eval_steps_per_second": 4.341, + "eval_wer": 0.43167338030203334, "step": 5200 }, { "epoch": 1.0259388308168795, - "eval_loss": 0.5088583827018738, - "eval_runtime": 152.7112, - "eval_samples_per_second": 37.037, - "eval_steps_per_second": 4.63, - "eval_wer": 0.3898509091492674, + "eval_loss": 0.5752869844436646, + "eval_runtime": 162.7856, + "eval_samples_per_second": 34.745, + "eval_steps_per_second": 4.343, + "eval_wer": 0.43331033043924827, "step": 5300 }, { "epoch": 1.0452961672473868, - "eval_loss": 0.5110610723495483, - "eval_runtime": 152.5555, - "eval_samples_per_second": 37.075, - "eval_steps_per_second": 4.634, - "eval_wer": 0.3985652613503234, + "eval_loss": 0.5698733925819397, + "eval_runtime": 161.9949, + "eval_samples_per_second": 34.915, + "eval_steps_per_second": 4.364, + "eval_wer": 0.44619730063712665, "step": 5400 }, { "epoch": 1.064653503677894, - "grad_norm": 1.1362248659133911, - "learning_rate": 0.0001423578947368421, - "loss": 0.4882, + "grad_norm": 0.8791279792785645, + "learning_rate": 0.0002492338983050847, + "loss": 0.5527, "step": 5500 }, { "epoch": 1.064653503677894, - "eval_loss": 0.5010027885437012, - "eval_runtime": 152.1249, - "eval_samples_per_second": 37.18, - "eval_steps_per_second": 4.647, - "eval_wer": 0.38574248527547306, + "eval_loss": 0.5666691660881042, + "eval_runtime": 162.0517, + "eval_samples_per_second": 34.902, + "eval_steps_per_second": 4.363, + "eval_wer": 0.43639164834459404, "step": 5500 }, { "epoch": 1.084010840108401, - "eval_loss": 0.49406561255455017, - "eval_runtime": 151.5623, - "eval_samples_per_second": 37.318, - "eval_steps_per_second": 4.665, - "eval_wer": 0.3858548249907721, + "eval_loss": 0.5558171272277832, + "eval_runtime": 161.9847, + "eval_samples_per_second": 34.917, + "eval_steps_per_second": 4.365, + "eval_wer": 0.42945868305756607, "step": 5600 }, { "epoch": 1.1033681765389083, - "eval_loss": 0.49403733015060425, - "eval_runtime": 152.7631, - "eval_samples_per_second": 37.025, - "eval_steps_per_second": 4.628, - "eval_wer": 0.3813451878480525, + "eval_loss": 0.5602455139160156, + "eval_runtime": 162.701, + "eval_samples_per_second": 34.763, + "eval_steps_per_second": 4.345, + "eval_wer": 0.422349183932211, "step": 5700 }, { "epoch": 1.1227255129694154, - "eval_loss": 0.4913772642612457, - "eval_runtime": 152.1406, - "eval_samples_per_second": 37.176, - "eval_steps_per_second": 4.647, - "eval_wer": 0.37815153022740766, + "eval_loss": 0.559140145778656, + "eval_runtime": 162.4402, + "eval_samples_per_second": 34.819, + "eval_steps_per_second": 4.352, + "eval_wer": 0.41942835133443535, "step": 5800 }, { "epoch": 1.1420828493999227, - "eval_loss": 0.48747047781944275, - "eval_runtime": 151.3195, - "eval_samples_per_second": 37.378, - "eval_steps_per_second": 4.672, - "eval_wer": 0.3745406108070806, + "eval_loss": 0.5399234890937805, + "eval_runtime": 162.3316, + "eval_samples_per_second": 34.842, + "eval_steps_per_second": 4.355, + "eval_wer": 0.418818507165669, "step": 5900 }, { "epoch": 1.1614401858304297, - "grad_norm": 1.0150744915008545, - "learning_rate": 0.00012656842105263156, - "loss": 0.4569, + "grad_norm": 0.9803772568702698, + "learning_rate": 0.00024414915254237287, + "loss": 0.533, "step": 6000 }, { "epoch": 1.1614401858304297, - "eval_loss": 0.4841971695423126, - "eval_runtime": 151.8567, - "eval_samples_per_second": 37.246, - "eval_steps_per_second": 4.656, - "eval_wer": 0.38071929514852915, + "eval_loss": 0.545900821685791, + "eval_runtime": 161.6822, + "eval_samples_per_second": 34.982, + "eval_steps_per_second": 4.373, + "eval_wer": 0.431063536133267, "step": 6000 }, { "epoch": 1.1807975222609368, - "eval_loss": 0.48611822724342346, - "eval_runtime": 150.971, - "eval_samples_per_second": 37.464, - "eval_steps_per_second": 4.683, - "eval_wer": 0.37370608720771614, + "eval_loss": 0.5347985625267029, + "eval_runtime": 161.8121, + "eval_samples_per_second": 34.954, + "eval_steps_per_second": 4.369, + "eval_wer": 0.41183739628637, "step": 6100 }, { "epoch": 1.2001548586914441, - "eval_loss": 0.48144644498825073, - "eval_runtime": 151.4548, - "eval_samples_per_second": 37.344, - "eval_steps_per_second": 4.668, - "eval_wer": 0.3760973182905105, + "eval_loss": 0.5453631281852722, + "eval_runtime": 161.8802, + "eval_samples_per_second": 34.939, + "eval_steps_per_second": 4.367, + "eval_wer": 0.4176309158896503, "step": 6200 }, { "epoch": 1.2195121951219512, - "eval_loss": 0.47813892364501953, - "eval_runtime": 151.1935, - "eval_samples_per_second": 37.409, - "eval_steps_per_second": 4.676, - "eval_wer": 0.37409125194588433, + "eval_loss": 0.5442932844161987, + "eval_runtime": 162.1767, + "eval_samples_per_second": 34.876, + "eval_steps_per_second": 4.359, + "eval_wer": 0.42157885445587456, "step": 6300 }, { "epoch": 1.2388695315524583, - "eval_loss": 0.4771001935005188, - "eval_runtime": 151.1732, - "eval_samples_per_second": 37.414, - "eval_steps_per_second": 4.677, - "eval_wer": 0.36815329556579096, + "eval_loss": 0.5382806658744812, + "eval_runtime": 161.5364, + "eval_samples_per_second": 35.014, + "eval_steps_per_second": 4.377, + "eval_wer": 0.40962269904190274, "step": 6400 }, { "epoch": 1.2582268679829656, - "grad_norm": 1.3292571306228638, - "learning_rate": 0.00011077894736842105, - "loss": 0.4416, + "grad_norm": 2.7026009559631348, + "learning_rate": 0.00023906440677966102, + "loss": 0.5228, "step": 6500 }, { "epoch": 1.2582268679829656, - "eval_loss": 0.47095027565956116, - "eval_runtime": 151.5037, - "eval_samples_per_second": 37.332, - "eval_steps_per_second": 4.667, - "eval_wer": 0.37338511659257595, + "eval_loss": 0.540704071521759, + "eval_runtime": 161.6157, + "eval_samples_per_second": 34.997, + "eval_steps_per_second": 4.375, + "eval_wer": 0.41260772576270643, "step": 6500 }, { "epoch": 1.2775842044134726, - "eval_loss": 0.47211408615112305, - "eval_runtime": 150.9455, - "eval_samples_per_second": 37.47, - "eval_steps_per_second": 4.684, - "eval_wer": 0.3659706953828377, + "eval_loss": 0.5527251362800598, + "eval_runtime": 161.6568, + "eval_samples_per_second": 34.988, + "eval_steps_per_second": 4.373, + "eval_wer": 0.41426072443067835, "step": 6600 }, { "epoch": 1.29694154084398, - "eval_loss": 0.4679400622844696, - "eval_runtime": 151.4191, - "eval_samples_per_second": 37.353, - "eval_steps_per_second": 4.669, - "eval_wer": 0.3638843863844265, + "eval_loss": 0.5312824845314026, + "eval_runtime": 161.7821, + "eval_samples_per_second": 34.961, + "eval_steps_per_second": 4.37, + "eval_wer": 0.40811413715074385, "step": 6700 }, { "epoch": 1.316298877274487, - "eval_loss": 0.46228036284446716, - "eval_runtime": 151.3839, - "eval_samples_per_second": 37.362, - "eval_steps_per_second": 4.67, - "eval_wer": 0.366532393959333, + "eval_loss": 0.533909797668457, + "eval_runtime": 161.8361, + "eval_samples_per_second": 34.949, + "eval_steps_per_second": 4.369, + "eval_wer": 0.4150471024377718, "step": 6800 }, { "epoch": 1.3356562137049943, - "eval_loss": 0.46108925342559814, - "eval_runtime": 151.8163, - "eval_samples_per_second": 37.256, - "eval_steps_per_second": 4.657, - "eval_wer": 0.3601771757795574, + "eval_loss": 0.523649275302887, + "eval_runtime": 161.5476, + "eval_samples_per_second": 35.011, + "eval_steps_per_second": 4.376, + "eval_wer": 0.4120781242477251, "step": 6900 }, { "epoch": 1.3550135501355014, - "grad_norm": 0.8062695860862732, - "learning_rate": 9.498947368421052e-05, - "loss": 0.4324, + "grad_norm": 0.709751546382904, + "learning_rate": 0.00023397966101694912, + "loss": 0.5204, "step": 7000 }, { "epoch": 1.3550135501355014, - "eval_loss": 0.46888086199760437, - "eval_runtime": 152.4379, - "eval_samples_per_second": 37.104, - "eval_steps_per_second": 4.638, - "eval_wer": 0.3609314567251368, + "eval_loss": 0.5527586340904236, + "eval_runtime": 162.1137, + "eval_samples_per_second": 34.889, + "eval_steps_per_second": 4.361, + "eval_wer": 0.4165877613904447, "step": 7000 }, { "epoch": 1.3743708865660085, - "eval_loss": 0.4573034346103668, - "eval_runtime": 151.3077, - "eval_samples_per_second": 37.381, - "eval_steps_per_second": 4.673, - "eval_wer": 0.3602574184333424, + "eval_loss": 0.5330629944801331, + "eval_runtime": 161.681, + "eval_samples_per_second": 34.982, + "eval_steps_per_second": 4.373, + "eval_wer": 0.40559451782189343, "step": 7100 }, { "epoch": 1.3937282229965158, - "eval_loss": 0.45749789476394653, - "eval_runtime": 151.5824, - "eval_samples_per_second": 37.313, - "eval_steps_per_second": 4.664, - "eval_wer": 0.3546083356068752, + "eval_loss": 0.5242415070533752, + "eval_runtime": 162.2742, + "eval_samples_per_second": 34.855, + "eval_steps_per_second": 4.357, + "eval_wer": 0.4058833913755196, "step": 7200 }, { "epoch": 1.4130855594270229, - "eval_loss": 0.4555954933166504, - "eval_runtime": 151.6035, - "eval_samples_per_second": 37.308, - "eval_steps_per_second": 4.663, - "eval_wer": 0.35836369180401534, + "eval_loss": 0.5309507250785828, + "eval_runtime": 163.2224, + "eval_samples_per_second": 34.652, + "eval_steps_per_second": 4.332, + "eval_wer": 0.4092856798960055, "step": 7300 }, { "epoch": 1.43244289585753, - "eval_loss": 0.4495578408241272, - "eval_runtime": 152.5621, - "eval_samples_per_second": 37.073, - "eval_steps_per_second": 4.634, - "eval_wer": 0.350724591163679, + "eval_loss": 0.5278186798095703, + "eval_runtime": 162.9755, + "eval_samples_per_second": 34.705, + "eval_steps_per_second": 4.338, + "eval_wer": 0.4063006531752018, "step": 7400 }, { "epoch": 1.4518002322880372, - "grad_norm": 0.7916799187660217, - "learning_rate": 7.92e-05, - "loss": 0.4255, + "grad_norm": 0.9905166029930115, + "learning_rate": 0.00022889491525423728, + "loss": 0.5199, "step": 7500 }, { "epoch": 1.4518002322880372, - "eval_loss": 0.44609567523002625, - "eval_runtime": 151.8498, - "eval_samples_per_second": 37.247, - "eval_steps_per_second": 4.656, - "eval_wer": 0.34671245847442667, + "eval_loss": 0.5168124437332153, + "eval_runtime": 162.1336, + "eval_samples_per_second": 34.885, + "eval_steps_per_second": 4.361, + "eval_wer": 0.3955802346295197, "step": 7500 }, { "epoch": 1.4711575687185443, - "eval_loss": 0.44341230392456055, - "eval_runtime": 152.528, - "eval_samples_per_second": 37.082, - "eval_steps_per_second": 4.635, - "eval_wer": 0.3462470510824734, + "eval_loss": 0.5236623287200928, + "eval_runtime": 161.1426, + "eval_samples_per_second": 35.099, + "eval_steps_per_second": 4.387, + "eval_wer": 0.40235271460897754, "step": 7600 }, { "epoch": 1.4905149051490514, - "eval_loss": 0.44362780451774597, - "eval_runtime": 152.5253, - "eval_samples_per_second": 37.082, - "eval_steps_per_second": 4.635, - "eval_wer": 0.3516393574168285, + "eval_loss": 0.5316073894500732, + "eval_runtime": 162.1382, + "eval_samples_per_second": 34.884, + "eval_steps_per_second": 4.36, + "eval_wer": 0.4179358379740335, "step": 7700 }, { "epoch": 1.5098722415795587, - "eval_loss": 0.4406072199344635, - "eval_runtime": 152.4039, - "eval_samples_per_second": 37.112, - "eval_steps_per_second": 4.639, - "eval_wer": 0.34579769222127715, + "eval_loss": 0.5182381868362427, + "eval_runtime": 161.7911, + "eval_samples_per_second": 34.959, + "eval_steps_per_second": 4.37, + "eval_wer": 0.40326748086212705, "step": 7800 }, { "epoch": 1.5292295780100658, - "eval_loss": 0.43874725699424744, - "eval_runtime": 152.6604, - "eval_samples_per_second": 37.05, - "eval_steps_per_second": 4.631, - "eval_wer": 0.3439360626534641, + "eval_loss": 0.5175392627716064, + "eval_runtime": 161.5725, + "eval_samples_per_second": 35.006, + "eval_steps_per_second": 4.376, + "eval_wer": 0.3983887275119963, "step": 7900 }, { "epoch": 1.5485869144405728, - "grad_norm": 0.7491864562034607, - "learning_rate": 6.344210526315788e-05, - "loss": 0.4094, + "grad_norm": 0.8261615037918091, + "learning_rate": 0.00022382033898305084, + "loss": 0.5066, "step": 8000 }, { "epoch": 1.5485869144405728, - "eval_loss": 0.43253499269485474, - "eval_runtime": 153.8006, - "eval_samples_per_second": 36.775, - "eval_steps_per_second": 4.597, - "eval_wer": 0.3409831329941744, + "eval_loss": 0.5138476490974426, + "eval_runtime": 162.4044, + "eval_samples_per_second": 34.827, + "eval_steps_per_second": 4.353, + "eval_wer": 0.39492224486848226, "step": 8000 }, { "epoch": 1.5679442508710801, - "eval_loss": 0.4359830617904663, - "eval_runtime": 153.3674, - "eval_samples_per_second": 36.879, - "eval_steps_per_second": 4.61, - "eval_wer": 0.3419299963088379, + "eval_loss": 0.515566885471344, + "eval_runtime": 162.5299, + "eval_samples_per_second": 34.8, + "eval_steps_per_second": 4.35, + "eval_wer": 0.4016305307249121, "step": 8100 }, { "epoch": 1.5873015873015874, - "eval_loss": 0.4285949170589447, - "eval_runtime": 153.3711, - "eval_samples_per_second": 36.878, - "eval_steps_per_second": 4.61, - "eval_wer": 0.3377252812505015, + "eval_loss": 0.5131089091300964, + "eval_runtime": 162.835, + "eval_samples_per_second": 34.735, + "eval_steps_per_second": 4.342, + "eval_wer": 0.39793936865080004, "step": 8200 }, { "epoch": 1.6066589237320945, - "eval_loss": 0.43007034063339233, - "eval_runtime": 152.2201, - "eval_samples_per_second": 37.157, - "eval_steps_per_second": 4.645, - "eval_wer": 0.3335526632536791, + "eval_loss": 0.5139849185943604, + "eval_runtime": 162.5015, + "eval_samples_per_second": 34.806, + "eval_steps_per_second": 4.351, + "eval_wer": 0.39413586686138885, "step": 8300 }, { "epoch": 1.6260162601626016, - "eval_loss": 0.42966797947883606, - "eval_runtime": 152.0163, - "eval_samples_per_second": 37.207, - "eval_steps_per_second": 4.651, - "eval_wer": 0.3322848293238754, + "eval_loss": 0.5224258303642273, + "eval_runtime": 162.1349, + "eval_samples_per_second": 34.885, + "eval_steps_per_second": 4.361, + "eval_wer": 0.39853316428880936, "step": 8400 }, { "epoch": 1.645373596593109, - "grad_norm": 1.047472596168518, - "learning_rate": 4.765263157894736e-05, - "loss": 0.4018, + "grad_norm": 1.0760446786880493, + "learning_rate": 0.00021873559322033897, + "loss": 0.502, "step": 8500 }, { "epoch": 1.645373596593109, - "eval_loss": 0.4270441234111786, - "eval_runtime": 152.8058, - "eval_samples_per_second": 37.014, - "eval_steps_per_second": 4.627, - "eval_wer": 0.3338575853380623, + "eval_loss": 0.5274536609649658, + "eval_runtime": 162.3162, + "eval_samples_per_second": 34.846, + "eval_steps_per_second": 4.356, + "eval_wer": 0.40023430854905234, "step": 8500 }, { "epoch": 1.664730933023616, - "eval_loss": 0.4267289638519287, - "eval_runtime": 152.5032, - "eval_samples_per_second": 37.088, - "eval_steps_per_second": 4.636, - "eval_wer": 0.3319959557702492, + "eval_loss": 0.5054244995117188, + "eval_runtime": 162.7623, + "eval_samples_per_second": 34.75, + "eval_steps_per_second": 4.344, + "eval_wer": 0.3860955529521272, "step": 8600 }, { "epoch": 1.684088269454123, - "eval_loss": 0.4224300980567932, - "eval_runtime": 152.5862, - "eval_samples_per_second": 37.068, - "eval_steps_per_second": 4.633, - "eval_wer": 0.33275023671582865, + "eval_loss": 0.5144466161727905, + "eval_runtime": 162.1114, + "eval_samples_per_second": 34.89, + "eval_steps_per_second": 4.361, + "eval_wer": 0.3912631798558842, "step": 8700 }, { "epoch": 1.7034456058846303, - "eval_loss": 0.4207303822040558, - "eval_runtime": 154.5205, - "eval_samples_per_second": 36.604, - "eval_steps_per_second": 4.575, - "eval_wer": 0.32984545264881, + "eval_loss": 0.5017980933189392, + "eval_runtime": 162.1144, + "eval_samples_per_second": 34.889, + "eval_steps_per_second": 4.361, + "eval_wer": 0.3860955529521272, "step": 8800 }, { "epoch": 1.7228029423151374, - "eval_loss": 0.4197385013103485, - "eval_runtime": 152.0624, - "eval_samples_per_second": 37.195, - "eval_steps_per_second": 4.649, - "eval_wer": 0.32978125852578194, + "eval_loss": 0.5001707673072815, + "eval_runtime": 162.7958, + "eval_samples_per_second": 34.743, + "eval_steps_per_second": 4.343, + "eval_wer": 0.39978494968785605, "step": 8900 }, { "epoch": 1.7421602787456445, - "grad_norm": 1.4507739543914795, - "learning_rate": 3.189473684210526e-05, - "loss": 0.3899, + "grad_norm": 2.3791110515594482, + "learning_rate": 0.00021366101694915253, + "loss": 0.4965, "step": 9000 }, { "epoch": 1.7421602787456445, - "eval_loss": 0.4183507561683655, - "eval_runtime": 157.4278, - "eval_samples_per_second": 35.928, - "eval_steps_per_second": 4.491, - "eval_wer": 0.3258493684903147, + "eval_loss": 0.5074877142906189, + "eval_runtime": 162.5487, + "eval_samples_per_second": 34.796, + "eval_steps_per_second": 4.349, + "eval_wer": 0.38896823995763186, "step": 9000 }, { "epoch": 1.7615176151761518, - "eval_loss": 0.4164830148220062, - "eval_runtime": 153.0475, - "eval_samples_per_second": 36.956, - "eval_steps_per_second": 4.619, - "eval_wer": 0.3262024361669689, + "eval_loss": 0.4928957521915436, + "eval_runtime": 162.2035, + "eval_samples_per_second": 34.87, + "eval_steps_per_second": 4.359, + "eval_wer": 0.3865449118133235, "step": 9100 }, { "epoch": 1.7808749516066589, - "eval_loss": 0.41182050108909607, - "eval_runtime": 152.4839, - "eval_samples_per_second": 37.092, - "eval_steps_per_second": 4.637, - "eval_wer": 0.322864341769511, + "eval_loss": 0.49622705578804016, + "eval_runtime": 162.8808, + "eval_samples_per_second": 34.725, + "eval_steps_per_second": 4.341, + "eval_wer": 0.38559804849866, "step": 9200 }, { "epoch": 1.800232288037166, - "eval_loss": 0.4134317636489868, - "eval_runtime": 152.6353, - "eval_samples_per_second": 37.056, - "eval_steps_per_second": 4.632, - "eval_wer": 0.3232334579769222, + "eval_loss": 0.49036508798599243, + "eval_runtime": 162.3886, + "eval_samples_per_second": 34.83, + "eval_steps_per_second": 4.354, + "eval_wer": 0.3759689300444544, "step": 9300 }, { "epoch": 1.8195896244676733, - "eval_loss": 0.4126824736595154, - "eval_runtime": 152.5246, - "eval_samples_per_second": 37.083, - "eval_steps_per_second": 4.635, - "eval_wer": 0.3209064210171559, + "eval_loss": 0.49964088201522827, + "eval_runtime": 162.526, + "eval_samples_per_second": 34.801, + "eval_steps_per_second": 4.35, + "eval_wer": 0.3901237341721365, "step": 9400 }, { "epoch": 1.8389469608981805, - "grad_norm": 1.0012460947036743, - "learning_rate": 1.6105263157894736e-05, - "loss": 0.3665, + "grad_norm": 1.2548748254776, + "learning_rate": 0.00020857627118644066, + "loss": 0.4776, "step": 9500 }, { "epoch": 1.8389469608981805, - "eval_loss": 0.41083237528800964, - "eval_runtime": 152.9993, - "eval_samples_per_second": 36.967, - "eval_steps_per_second": 4.621, - "eval_wer": 0.32109900338624, + "eval_loss": 0.4899130165576935, + "eval_runtime": 162.5723, + "eval_samples_per_second": 34.791, + "eval_steps_per_second": 4.349, + "eval_wer": 0.37616151241353857, "step": 9500 }, { "epoch": 1.8583042973286876, - "eval_loss": 0.4090138077735901, - "eval_runtime": 152.5291, - "eval_samples_per_second": 37.081, - "eval_steps_per_second": 4.635, - "eval_wer": 0.3199114121102213, + "eval_loss": 0.4918155074119568, + "eval_runtime": 162.1914, + "eval_samples_per_second": 34.872, + "eval_steps_per_second": 4.359, + "eval_wer": 0.37948355828023944, "step": 9600 }, { "epoch": 1.8776616337591947, - "eval_loss": 0.407578706741333, - "eval_runtime": 153.0711, - "eval_samples_per_second": 36.95, - "eval_steps_per_second": 4.619, - "eval_wer": 0.32087432395564186, + "eval_loss": 0.49148374795913696, + "eval_runtime": 162.2813, + "eval_samples_per_second": 34.853, + "eval_steps_per_second": 4.357, + "eval_wer": 0.37980452889537963, "step": 9700 }, { "epoch": 1.897018970189702, - "eval_loss": 0.40649694204330444, - "eval_runtime": 154.4136, - "eval_samples_per_second": 36.629, - "eval_steps_per_second": 4.579, - "eval_wer": 0.31981512092567926, + "eval_loss": 0.4841060936450958, + "eval_runtime": 162.2556, + "eval_samples_per_second": 34.859, + "eval_steps_per_second": 4.357, + "eval_wer": 0.37060872077161333, "step": 9800 }, { "epoch": 1.916376306620209, - "eval_loss": 0.40620651841163635, - "eval_runtime": 153.7508, - "eval_samples_per_second": 36.787, - "eval_steps_per_second": 4.598, - "eval_wer": 0.31923737381842693, + "eval_loss": 0.4834117293357849, + "eval_runtime": 163.5813, + "eval_samples_per_second": 34.576, + "eval_steps_per_second": 4.322, + "eval_wer": 0.37728490956652916, "step": 9900 }, { "epoch": 1.9357336430507162, - "grad_norm": 0.7244949340820312, - "learning_rate": 3.157894736842105e-07, - "loss": 0.3698, + "grad_norm": 1.011767029762268, + "learning_rate": 0.00020349152542372878, + "loss": 0.4752, "step": 10000 }, { "epoch": 1.9357336430507162, - "eval_loss": 0.4060620963573456, - "eval_runtime": 153.976, - "eval_samples_per_second": 36.733, - "eval_steps_per_second": 4.592, - "eval_wer": 0.31928551941069794, + "eval_loss": 0.4831894338130951, + "eval_runtime": 162.4305, + "eval_samples_per_second": 34.821, + "eval_steps_per_second": 4.353, + "eval_wer": 0.3711704193481087, "step": 10000 }, { "epoch": 1.9550909794812235, - "eval_loss": 0.45229342579841614, - "eval_runtime": 154.2948, - "eval_samples_per_second": 36.657, - "eval_steps_per_second": 4.582, - "eval_wer": 0.3406140167867632, + "eval_loss": 0.4890592396259308, + "eval_runtime": 162.3568, + "eval_samples_per_second": 34.837, + "eval_steps_per_second": 4.355, + "eval_wer": 0.37829596700422075, "step": 10100 }, { "epoch": 1.9744483159117305, - "eval_loss": 0.4579542577266693, - "eval_runtime": 151.5074, - "eval_samples_per_second": 37.331, - "eval_steps_per_second": 4.666, - "eval_wer": 0.3517837941936416, + "eval_loss": 0.4786697328090668, + "eval_runtime": 163.1452, + "eval_samples_per_second": 34.669, + "eval_steps_per_second": 4.334, + "eval_wer": 0.3783441125964918, "step": 10200 }, { "epoch": 1.9938056523422376, - "eval_loss": 0.46043792366981506, - "eval_runtime": 151.4438, - "eval_samples_per_second": 37.347, - "eval_steps_per_second": 4.668, - "eval_wer": 0.35115790149411824, + "eval_loss": 0.4726457893848419, + "eval_runtime": 162.2371, + "eval_samples_per_second": 34.863, + "eval_steps_per_second": 4.358, + "eval_wer": 0.37141114730946384, "step": 10300 }, { "epoch": 2.013162988772745, - "eval_loss": 0.46549099683761597, - "eval_runtime": 151.5994, - "eval_samples_per_second": 37.309, - "eval_steps_per_second": 4.664, - "eval_wer": 0.3552181797756415, + "eval_loss": 0.49166908860206604, + "eval_runtime": 162.2356, + "eval_samples_per_second": 34.863, + "eval_steps_per_second": 4.358, + "eval_wer": 0.37320858275424884, "step": 10400 }, { "epoch": 2.032520325203252, - "grad_norm": 0.703632652759552, - "learning_rate": 0.0001463076923076923, - "loss": 0.3624, + "grad_norm": 0.9389815926551819, + "learning_rate": 0.00019840677966101694, + "loss": 0.4587, "step": 10500 }, { "epoch": 2.032520325203252, - "eval_loss": 0.4670031666755676, - "eval_runtime": 151.5063, - "eval_samples_per_second": 37.332, - "eval_steps_per_second": 4.666, - "eval_wer": 0.35144677504774435, + "eval_loss": 0.48015162348747253, + "eval_runtime": 162.7857, + "eval_samples_per_second": 34.745, + "eval_steps_per_second": 4.343, + "eval_wer": 0.37264688417775355, "step": 10500 }, { "epoch": 2.051877661633759, - "eval_loss": 0.459250271320343, - "eval_runtime": 153.0971, - "eval_samples_per_second": 36.944, - "eval_steps_per_second": 4.618, - "eval_wer": 0.3628251833544639, + "eval_loss": 0.4883776903152466, + "eval_runtime": 162.7407, + "eval_samples_per_second": 34.755, + "eval_steps_per_second": 4.344, + "eval_wer": 0.3825327791240712, "step": 10600 }, { "epoch": 2.0712349980642664, - "eval_loss": 0.46061432361602783, - "eval_runtime": 152.0732, - "eval_samples_per_second": 37.193, - "eval_steps_per_second": 4.649, - "eval_wer": 0.3545922870761182, + "eval_loss": 0.4841337502002716, + "eval_runtime": 162.7772, + "eval_samples_per_second": 34.747, + "eval_steps_per_second": 4.343, + "eval_wer": 0.37845645231179087, "step": 10700 }, { "epoch": 2.0905923344947737, - "eval_loss": 0.46500489115715027, - "eval_runtime": 151.985, - "eval_samples_per_second": 37.214, - "eval_steps_per_second": 4.652, - "eval_wer": 0.35905377862656673, + "eval_loss": 0.4809282422065735, + "eval_runtime": 162.8688, + "eval_samples_per_second": 34.727, + "eval_steps_per_second": 4.341, + "eval_wer": 0.3738184269230152, "step": 10800 }, { "epoch": 2.1099496709252805, - "eval_loss": 0.46085453033447266, - "eval_runtime": 152.4835, - "eval_samples_per_second": 37.093, - "eval_steps_per_second": 4.637, - "eval_wer": 0.35483301503747333, + "eval_loss": 0.47966596484184265, + "eval_runtime": 163.5921, + "eval_samples_per_second": 34.574, + "eval_steps_per_second": 4.322, + "eval_wer": 0.3713469531864358, "step": 10900 }, { "epoch": 2.129307007355788, - "grad_norm": 0.5008242726325989, - "learning_rate": 0.00013863076923076922, - "loss": 0.3755, + "grad_norm": 0.6634272933006287, + "learning_rate": 0.0001933220338983051, + "loss": 0.3967, "step": 11000 }, { "epoch": 2.129307007355788, - "eval_loss": 0.4708138406276703, - "eval_runtime": 152.3457, - "eval_samples_per_second": 37.126, - "eval_steps_per_second": 4.641, - "eval_wer": 0.35573173275986586, + "eval_loss": 0.4866289794445038, + "eval_runtime": 162.8573, + "eval_samples_per_second": 34.73, + "eval_steps_per_second": 4.341, + "eval_wer": 0.37497392113751987, "step": 11000 }, { "epoch": 2.148664343786295, - "eval_loss": 0.4649392366409302, - "eval_runtime": 152.7087, - "eval_samples_per_second": 37.038, - "eval_steps_per_second": 4.63, - "eval_wer": 0.3548009179759593, + "eval_loss": 0.4938376843929291, + "eval_runtime": 163.5145, + "eval_samples_per_second": 34.59, + "eval_steps_per_second": 4.324, + "eval_wer": 0.3749097270144918, "step": 11100 }, { "epoch": 2.168021680216802, - "eval_loss": 0.4624271094799042, - "eval_runtime": 153.153, - "eval_samples_per_second": 36.93, - "eval_steps_per_second": 4.616, - "eval_wer": 0.355956412190464, + "eval_loss": 0.48603999614715576, + "eval_runtime": 162.8433, + "eval_samples_per_second": 34.733, + "eval_steps_per_second": 4.342, + "eval_wer": 0.36796071319670687, "step": 11200 }, { "epoch": 2.1873790166473093, - "eval_loss": 0.45822229981422424, - "eval_runtime": 156.1964, - "eval_samples_per_second": 36.211, - "eval_steps_per_second": 4.526, - "eval_wer": 0.35229734717786587, + "eval_loss": 0.4849016070365906, + "eval_runtime": 162.5249, + "eval_samples_per_second": 34.801, + "eval_steps_per_second": 4.35, + "eval_wer": 0.369966779541333, "step": 11300 }, { "epoch": 2.2067363530778166, - "eval_loss": 0.466250479221344, - "eval_runtime": 152.6707, - "eval_samples_per_second": 37.047, - "eval_steps_per_second": 4.631, - "eval_wer": 0.3586044197653705, + "eval_loss": 0.49077799916267395, + "eval_runtime": 162.3783, + "eval_samples_per_second": 34.832, + "eval_steps_per_second": 4.354, + "eval_wer": 0.36377204666912744, "step": 11400 }, { "epoch": 2.226093689508324, - "grad_norm": 0.9631055593490601, - "learning_rate": 0.00013093846153846151, - "loss": 0.3891, + "grad_norm": 0.9132543206214905, + "learning_rate": 0.0001882372881355932, + "loss": 0.406, "step": 11500 }, { "epoch": 2.226093689508324, - "eval_loss": 0.46153655648231506, - "eval_runtime": 153.1909, - "eval_samples_per_second": 36.921, - "eval_steps_per_second": 4.615, - "eval_wer": 0.3552181797756415, + "eval_loss": 0.4797042906284332, + "eval_runtime": 162.1749, + "eval_samples_per_second": 34.876, + "eval_steps_per_second": 4.359, + "eval_wer": 0.3678644220121648, "step": 11500 }, { "epoch": 2.2454510259388307, - "eval_loss": 0.4631531238555908, - "eval_runtime": 152.9395, - "eval_samples_per_second": 36.982, - "eval_steps_per_second": 4.623, - "eval_wer": 0.35886119625748264, + "eval_loss": 0.48121991753578186, + "eval_runtime": 161.7765, + "eval_samples_per_second": 34.962, + "eval_steps_per_second": 4.37, + "eval_wer": 0.37585659032915536, "step": 11600 }, { "epoch": 2.264808362369338, - "eval_loss": 0.4495234191417694, - "eval_runtime": 153.0237, - "eval_samples_per_second": 36.962, - "eval_steps_per_second": 4.62, - "eval_wer": 0.3425398404776043, + "eval_loss": 0.47043049335479736, + "eval_runtime": 163.1227, + "eval_samples_per_second": 34.673, + "eval_steps_per_second": 4.334, + "eval_wer": 0.361268475871034, "step": 11700 }, { "epoch": 2.2841656987998453, - "eval_loss": 0.462666779756546, - "eval_runtime": 152.4714, - "eval_samples_per_second": 37.095, - "eval_steps_per_second": 4.637, - "eval_wer": 0.34942466017236123, + "eval_loss": 0.4715932607650757, + "eval_runtime": 163.2336, + "eval_samples_per_second": 34.65, + "eval_steps_per_second": 4.331, + "eval_wer": 0.36345107605398724, "step": 11800 }, { "epoch": 2.303523035230352, - "eval_loss": 0.4550352096557617, - "eval_runtime": 152.8072, - "eval_samples_per_second": 37.014, - "eval_steps_per_second": 4.627, - "eval_wer": 0.3451717995217538, + "eval_loss": 0.4676753580570221, + "eval_runtime": 163.378, + "eval_samples_per_second": 34.619, + "eval_steps_per_second": 4.327, + "eval_wer": 0.3635473672385293, "step": 11900 }, { "epoch": 2.3228803716608595, - "grad_norm": 0.7961182594299316, - "learning_rate": 0.00012324615384615384, - "loss": 0.3946, + "grad_norm": 0.6134137511253357, + "learning_rate": 0.00018315254237288135, + "loss": 0.4088, "step": 12000 }, { "epoch": 2.3228803716608595, - "eval_loss": 0.44988927245140076, - "eval_runtime": 152.9644, - "eval_samples_per_second": 36.976, - "eval_steps_per_second": 4.622, - "eval_wer": 0.3462310025517164, + "eval_loss": 0.47054949402809143, + "eval_runtime": 162.7912, + "eval_samples_per_second": 34.744, + "eval_steps_per_second": 4.343, + "eval_wer": 0.36399672609972555, "step": 12000 }, { "epoch": 2.3422377080913668, - "eval_loss": 0.4501667320728302, - "eval_runtime": 153.061, - "eval_samples_per_second": 36.953, - "eval_steps_per_second": 4.619, - "eval_wer": 0.341978141901109, + "eval_loss": 0.4782082140445709, + "eval_runtime": 162.8776, + "eval_samples_per_second": 34.725, + "eval_steps_per_second": 4.341, + "eval_wer": 0.35905377862656673, "step": 12100 }, { "epoch": 2.3615950445218736, - "eval_loss": 0.4580215513706207, - "eval_runtime": 153.2108, - "eval_samples_per_second": 36.916, - "eval_steps_per_second": 4.615, - "eval_wer": 0.3412399094862865, + "eval_loss": 0.4795554578304291, + "eval_runtime": 163.357, + "eval_samples_per_second": 34.624, + "eval_steps_per_second": 4.328, + "eval_wer": 0.36128452440179104, "step": 12200 }, { "epoch": 2.380952380952381, - "eval_loss": 0.4506891667842865, - "eval_runtime": 153.6611, - "eval_samples_per_second": 36.808, - "eval_steps_per_second": 4.601, - "eval_wer": 0.34339041260772574, + "eval_loss": 0.47130346298217773, + "eval_runtime": 163.1765, + "eval_samples_per_second": 34.662, + "eval_steps_per_second": 4.333, + "eval_wer": 0.3558119754136509, "step": 12300 }, { "epoch": 2.4003097173828882, - "eval_loss": 0.44618555903434753, - "eval_runtime": 153.273, - "eval_samples_per_second": 36.901, - "eval_steps_per_second": 4.613, - "eval_wer": 0.34475453772207154, + "eval_loss": 0.47632816433906555, + "eval_runtime": 163.2851, + "eval_samples_per_second": 34.639, + "eval_steps_per_second": 4.33, + "eval_wer": 0.3588772447882396, "step": 12400 }, { "epoch": 2.419667053813395, - "grad_norm": 0.828158974647522, - "learning_rate": 0.00011556923076923076, - "loss": 0.3824, + "grad_norm": 0.6301820874214172, + "learning_rate": 0.0001780779661016949, + "loss": 0.407, "step": 12500 }, { "epoch": 2.419667053813395, - "eval_loss": 0.44126543402671814, - "eval_runtime": 153.3979, - "eval_samples_per_second": 36.871, - "eval_steps_per_second": 4.609, - "eval_wer": 0.34127200654780054, + "eval_loss": 0.46899136900901794, + "eval_runtime": 163.5219, + "eval_samples_per_second": 34.589, + "eval_steps_per_second": 4.324, + "eval_wer": 0.3565181107669593, "step": 12500 }, { "epoch": 2.4390243902439024, - "eval_loss": 0.44880929589271545, - "eval_runtime": 153.7143, - "eval_samples_per_second": 36.796, - "eval_steps_per_second": 4.599, - "eval_wer": 0.3443212273916323, + "eval_loss": 0.4686334431171417, + "eval_runtime": 163.9109, + "eval_samples_per_second": 34.507, + "eval_steps_per_second": 4.313, + "eval_wer": 0.35767360498146394, "step": 12600 }, { "epoch": 2.4583817266744097, - "eval_loss": 0.44148463010787964, - "eval_runtime": 153.647, - "eval_samples_per_second": 36.812, - "eval_steps_per_second": 4.601, - "eval_wer": 0.3431657331771276, + "eval_loss": 0.467680424451828, + "eval_runtime": 163.7544, + "eval_samples_per_second": 34.54, + "eval_steps_per_second": 4.317, + "eval_wer": 0.3584278859270434, "step": 12700 }, { "epoch": 2.4777390631049165, - "eval_loss": 0.44202256202697754, - "eval_runtime": 153.5743, - "eval_samples_per_second": 36.829, - "eval_steps_per_second": 4.604, - "eval_wer": 0.34093498740190337, + "eval_loss": 0.46144554018974304, + "eval_runtime": 163.8198, + "eval_samples_per_second": 34.526, + "eval_steps_per_second": 4.316, + "eval_wer": 0.35765755645070696, "step": 12800 }, { "epoch": 2.497096399535424, - "eval_loss": 0.4379221200942993, - "eval_runtime": 153.5736, - "eval_samples_per_second": 36.829, - "eval_steps_per_second": 4.604, - "eval_wer": 0.3361204281748006, + "eval_loss": 0.455834299325943, + "eval_runtime": 163.2394, + "eval_samples_per_second": 34.649, + "eval_steps_per_second": 4.331, + "eval_wer": 0.35992039928744524, "step": 12900 }, { "epoch": 2.516453735965931, - "grad_norm": 1.2163615226745605, - "learning_rate": 0.00010787692307692307, - "loss": 0.372, + "grad_norm": 0.41953468322753906, + "learning_rate": 0.00017299322033898304, + "loss": 0.3855, "step": 13000 }, { "epoch": 2.516453735965931, - "eval_loss": 0.43855318427085876, - "eval_runtime": 153.5476, - "eval_samples_per_second": 36.835, - "eval_steps_per_second": 4.604, - "eval_wer": 0.3334884691306511, + "eval_loss": 0.4555678367614746, + "eval_runtime": 164.8785, + "eval_samples_per_second": 34.304, + "eval_steps_per_second": 4.288, + "eval_wer": 0.3565341592977163, "step": 13000 }, { "epoch": 2.535811072396438, - "eval_loss": 0.44449883699417114, - "eval_runtime": 153.7016, - "eval_samples_per_second": 36.799, - "eval_steps_per_second": 4.6, - "eval_wer": 0.3397794931873987, + "eval_loss": 0.4600988030433655, + "eval_runtime": 163.6778, + "eval_samples_per_second": 34.556, + "eval_steps_per_second": 4.319, + "eval_wer": 0.3558280239444079, "step": 13100 }, { "epoch": 2.5551684088269453, - "eval_loss": 0.4401286542415619, - "eval_runtime": 154.2488, - "eval_samples_per_second": 36.668, - "eval_steps_per_second": 4.584, - "eval_wer": 0.3392819887339314, + "eval_loss": 0.4650043547153473, + "eval_runtime": 163.58, + "eval_samples_per_second": 34.576, + "eval_steps_per_second": 4.322, + "eval_wer": 0.354303413522492, "step": 13200 }, { "epoch": 2.5745257452574526, - "eval_loss": 0.437770813703537, - "eval_runtime": 153.8927, - "eval_samples_per_second": 36.753, - "eval_steps_per_second": 4.594, - "eval_wer": 0.335077273675595, + "eval_loss": 0.4737236201763153, + "eval_runtime": 163.9532, + "eval_samples_per_second": 34.498, + "eval_steps_per_second": 4.312, + "eval_wer": 0.35483301503747333, "step": 13300 }, { "epoch": 2.59388308168796, - "eval_loss": 0.4315861463546753, - "eval_runtime": 153.7886, - "eval_samples_per_second": 36.778, - "eval_steps_per_second": 4.597, - "eval_wer": 0.33517356486013705, + "eval_loss": 0.45056912302970886, + "eval_runtime": 163.6462, + "eval_samples_per_second": 34.562, + "eval_steps_per_second": 4.32, + "eval_wer": 0.3534367928616135, "step": 13400 }, { "epoch": 2.6132404181184667, - "grad_norm": 1.084632158279419, - "learning_rate": 0.0001002, - "loss": 0.3521, + "grad_norm": 1.5978127717971802, + "learning_rate": 0.0001679186440677966, + "loss": 0.3748, "step": 13500 }, { "epoch": 2.6132404181184667, - "eval_loss": 0.43864014744758606, - "eval_runtime": 153.9711, - "eval_samples_per_second": 36.734, - "eval_steps_per_second": 4.592, - "eval_wer": 0.33398597358411836, + "eval_loss": 0.4606887698173523, + "eval_runtime": 163.0481, + "eval_samples_per_second": 34.689, + "eval_steps_per_second": 4.336, + "eval_wer": 0.3589253903805107, "step": 13500 }, { "epoch": 2.632597754548974, - "eval_loss": 0.43551018834114075, - "eval_runtime": 154.3017, - "eval_samples_per_second": 36.655, - "eval_steps_per_second": 4.582, - "eval_wer": 0.33096884980180064, + "eval_loss": 0.45494645833969116, + "eval_runtime": 162.9584, + "eval_samples_per_second": 34.708, + "eval_steps_per_second": 4.339, + "eval_wer": 0.35372566641523967, "step": 13600 }, { "epoch": 2.6519550909794813, - "eval_loss": 0.4325660765171051, - "eval_runtime": 154.4812, - "eval_samples_per_second": 36.613, - "eval_steps_per_second": 4.577, - "eval_wer": 0.3343871868530436, + "eval_loss": 0.4562608301639557, + "eval_runtime": 164.1384, + "eval_samples_per_second": 34.459, + "eval_steps_per_second": 4.307, + "eval_wer": 0.36409301728426763, "step": 13700 }, { "epoch": 2.6713124274099886, - "eval_loss": 0.4263465404510498, - "eval_runtime": 154.0733, - "eval_samples_per_second": 36.71, - "eval_steps_per_second": 4.589, - "eval_wer": 0.32629872735151094, + "eval_loss": 0.44666969776153564, + "eval_runtime": 163.7423, + "eval_samples_per_second": 34.542, + "eval_steps_per_second": 4.318, + "eval_wer": 0.34369533469210894, "step": 13800 }, { "epoch": 2.6906697638404955, - "eval_loss": 0.42636117339134216, - "eval_runtime": 154.1615, - "eval_samples_per_second": 36.689, - "eval_steps_per_second": 4.586, - "eval_wer": 0.32353838006130536, + "eval_loss": 0.4536294639110565, + "eval_runtime": 163.3892, + "eval_samples_per_second": 34.617, + "eval_steps_per_second": 4.327, + "eval_wer": 0.35446389883006213, "step": 13900 }, { "epoch": 2.710027100271003, - "grad_norm": 1.1979655027389526, - "learning_rate": 9.25076923076923e-05, - "loss": 0.3592, + "grad_norm": 0.6591352224349976, + "learning_rate": 0.00016283389830508475, + "loss": 0.3888, "step": 14000 }, { "epoch": 2.710027100271003, - "eval_loss": 0.4322036802768707, - "eval_runtime": 154.5242, - "eval_samples_per_second": 36.603, - "eval_steps_per_second": 4.575, - "eval_wer": 0.3299738408948661, + "eval_loss": 0.4504217505455017, + "eval_runtime": 163.3199, + "eval_samples_per_second": 34.631, + "eval_steps_per_second": 4.329, + "eval_wer": 0.3509653191250341, "step": 14000 }, { "epoch": 2.72938443670151, - "eval_loss": 0.4294193983078003, - "eval_runtime": 154.4329, - "eval_samples_per_second": 36.624, - "eval_steps_per_second": 4.578, - "eval_wer": 0.3261542905746979, + "eval_loss": 0.44697660207748413, + "eval_runtime": 163.5943, + "eval_samples_per_second": 34.573, + "eval_steps_per_second": 4.322, + "eval_wer": 0.3602092728410714, "step": 14100 }, { "epoch": 2.748741773132017, - "eval_loss": 0.43099814653396606, - "eval_runtime": 154.4209, - "eval_samples_per_second": 36.627, - "eval_steps_per_second": 4.578, - "eval_wer": 0.32329765209995026, + "eval_loss": 0.45640549063682556, + "eval_runtime": 163.6363, + "eval_samples_per_second": 34.564, + "eval_steps_per_second": 4.321, + "eval_wer": 0.3539022002535668, "step": 14200 }, { "epoch": 2.7680991095625243, - "eval_loss": 0.42700281739234924, - "eval_runtime": 155.4008, - "eval_samples_per_second": 36.396, - "eval_steps_per_second": 4.55, - "eval_wer": 0.3268122803357353, + "eval_loss": 0.45214343070983887, + "eval_runtime": 164.011, + "eval_samples_per_second": 34.485, + "eval_steps_per_second": 4.311, + "eval_wer": 0.3561971401518191, "step": 14300 }, { "epoch": 2.7874564459930316, - "eval_loss": 0.4209098219871521, - "eval_runtime": 156.5271, - "eval_samples_per_second": 36.134, - "eval_steps_per_second": 4.517, - "eval_wer": 0.3254321066906325, + "eval_loss": 0.4452911913394928, + "eval_runtime": 163.9028, + "eval_samples_per_second": 34.508, + "eval_steps_per_second": 4.314, + "eval_wer": 0.35221710452408084, "step": 14400 }, { "epoch": 2.8068137824235384, - "grad_norm": 0.6974443793296814, - "learning_rate": 8.48153846153846e-05, - "loss": 0.3459, + "grad_norm": 0.6879103779792786, + "learning_rate": 0.00015774915254237285, + "loss": 0.376, "step": 14500 }, { "epoch": 2.8068137824235384, - "eval_loss": 0.42542555928230286, - "eval_runtime": 157.9392, - "eval_samples_per_second": 35.811, - "eval_steps_per_second": 4.476, - "eval_wer": 0.32729373625844554, + "eval_loss": 0.45518526434898376, + "eval_runtime": 164.0636, + "eval_samples_per_second": 34.474, + "eval_steps_per_second": 4.309, + "eval_wer": 0.35170355153985655, "step": 14500 }, { "epoch": 2.8261711188540457, - "eval_loss": 0.42783817648887634, - "eval_runtime": 155.0217, - "eval_samples_per_second": 36.485, - "eval_steps_per_second": 4.561, - "eval_wer": 0.3231532153231372, + "eval_loss": 0.45344606041908264, + "eval_runtime": 163.5444, + "eval_samples_per_second": 34.584, + "eval_steps_per_second": 4.323, + "eval_wer": 0.3549774518142864, "step": 14600 }, { "epoch": 2.845528455284553, - "eval_loss": 0.4212438464164734, - "eval_runtime": 154.853, - "eval_samples_per_second": 36.525, - "eval_steps_per_second": 4.566, - "eval_wer": 0.3215002166551652, + "eval_loss": 0.45520055294036865, + "eval_runtime": 163.3824, + "eval_samples_per_second": 34.618, + "eval_steps_per_second": 4.327, + "eval_wer": 0.3405016770714641, "step": 14700 }, { "epoch": 2.86488579171506, - "eval_loss": 0.4169256389141083, - "eval_runtime": 154.5142, - "eval_samples_per_second": 36.605, - "eval_steps_per_second": 4.576, - "eval_wer": 0.31928551941069794, + "eval_loss": 0.45560306310653687, + "eval_runtime": 164.0292, + "eval_samples_per_second": 34.482, + "eval_steps_per_second": 4.31, + "eval_wer": 0.35138258092471636, "step": 14800 }, { "epoch": 2.884243128145567, - "eval_loss": 0.42132049798965454, - "eval_runtime": 154.8091, - "eval_samples_per_second": 36.535, - "eval_steps_per_second": 4.567, - "eval_wer": 0.3195262473720531, + "eval_loss": 0.44232824444770813, + "eval_runtime": 164.3662, + "eval_samples_per_second": 34.411, + "eval_steps_per_second": 4.301, + "eval_wer": 0.3467606040666977, "step": 14900 }, { "epoch": 2.9036004645760745, - "grad_norm": 1.099702000617981, - "learning_rate": 7.713846153846152e-05, - "loss": 0.3483, + "grad_norm": 0.5280432105064392, + "learning_rate": 0.00015267457627118642, + "loss": 0.379, "step": 15000 }, { "epoch": 2.9036004645760745, - "eval_loss": 0.41696369647979736, - "eval_runtime": 155.3223, - "eval_samples_per_second": 36.415, - "eval_steps_per_second": 4.552, - "eval_wer": 0.31652517212049236, + "eval_loss": 0.43873003125190735, + "eval_runtime": 163.5232, + "eval_samples_per_second": 34.588, + "eval_steps_per_second": 4.324, + "eval_wer": 0.34268427725441736, "step": 15000 }, { "epoch": 2.9229578010065813, - "eval_loss": 0.41230952739715576, - "eval_runtime": 154.9181, - "eval_samples_per_second": 36.51, - "eval_steps_per_second": 4.564, - "eval_wer": 0.31418208662996905, + "eval_loss": 0.4372723400592804, + "eval_runtime": 163.4018, + "eval_samples_per_second": 34.614, + "eval_steps_per_second": 4.327, + "eval_wer": 0.34364718909983794, "step": 15100 }, { "epoch": 2.9423151374370886, - "eval_loss": 0.4116990566253662, - "eval_runtime": 154.97, - "eval_samples_per_second": 36.497, - "eval_steps_per_second": 4.562, - "eval_wer": 0.31337966009211854, + "eval_loss": 0.4399470090866089, + "eval_runtime": 164.335, + "eval_samples_per_second": 34.418, + "eval_steps_per_second": 4.302, + "eval_wer": 0.33870424162667906, "step": 15200 }, { "epoch": 2.961672473867596, - "eval_loss": 0.410386323928833, - "eval_runtime": 155.0232, - "eval_samples_per_second": 36.485, - "eval_steps_per_second": 4.561, - "eval_wer": 0.31158222464733354, + "eval_loss": 0.44378861784935, + "eval_runtime": 164.0477, + "eval_samples_per_second": 34.478, + "eval_steps_per_second": 4.31, + "eval_wer": 0.3380462518656417, "step": 15300 }, { "epoch": 2.9810298102981028, - "eval_loss": 0.41244322061538696, - "eval_runtime": 154.4682, - "eval_samples_per_second": 36.616, - "eval_steps_per_second": 4.577, - "eval_wer": 0.31419813516072603, + "eval_loss": 0.436974436044693, + "eval_runtime": 163.6525, + "eval_samples_per_second": 34.561, + "eval_steps_per_second": 4.32, + "eval_wer": 0.3430694419925856, "step": 15400 }, { "epoch": 3.00038714672861, - "grad_norm": 0.725528359413147, - "learning_rate": 6.946153846153845e-05, - "loss": 0.3501, + "grad_norm": 0.660970151424408, + "learning_rate": 0.00014758983050847457, + "loss": 0.3731, "step": 15500 }, { "epoch": 3.00038714672861, - "eval_loss": 0.40684688091278076, - "eval_runtime": 154.5863, - "eval_samples_per_second": 36.588, - "eval_steps_per_second": 4.573, - "eval_wer": 0.31272167033108117, + "eval_loss": 0.43810611963272095, + "eval_runtime": 162.9215, + "eval_samples_per_second": 34.716, + "eval_steps_per_second": 4.34, + "eval_wer": 0.33413041036093144, "step": 15500 }, { "epoch": 3.0197444831591174, - "eval_loss": 0.4200752079486847, - "eval_runtime": 154.5821, - "eval_samples_per_second": 36.589, - "eval_steps_per_second": 4.574, - "eval_wer": 0.3087416347033429, + "eval_loss": 0.45139721035957336, + "eval_runtime": 164.2748, + "eval_samples_per_second": 34.43, + "eval_steps_per_second": 4.304, + "eval_wer": 0.3286418128420343, "step": 15600 }, { "epoch": 3.0391018195896247, - "eval_loss": 0.4186869263648987, - "eval_runtime": 154.7417, - "eval_samples_per_second": 36.551, - "eval_steps_per_second": 4.569, - "eval_wer": 0.3137808733610438, + "eval_loss": 0.43782538175582886, + "eval_runtime": 164.2188, + "eval_samples_per_second": 34.442, + "eval_steps_per_second": 4.305, + "eval_wer": 0.3340180706456324, "step": 15700 }, { "epoch": 3.0584591560201315, - "eval_loss": 0.41133585572242737, - "eval_runtime": 155.21, - "eval_samples_per_second": 36.441, - "eval_steps_per_second": 4.555, - "eval_wer": 0.31092423488629617, + "eval_loss": 0.44340020418167114, + "eval_runtime": 163.4459, + "eval_samples_per_second": 34.605, + "eval_steps_per_second": 4.326, + "eval_wer": 0.3441446935533052, "step": 15800 }, { "epoch": 3.077816492450639, - "eval_loss": 0.4191639721393585, - "eval_runtime": 155.276, - "eval_samples_per_second": 36.425, - "eval_steps_per_second": 4.553, - "eval_wer": 0.30851695527274475, + "eval_loss": 0.44192126393318176, + "eval_runtime": 164.2009, + "eval_samples_per_second": 34.446, + "eval_steps_per_second": 4.306, + "eval_wer": 0.3399239299642118, "step": 15900 }, { "epoch": 3.097173828881146, - "grad_norm": 0.5114701390266418, - "learning_rate": 6.176923076923076e-05, - "loss": 0.2754, + "grad_norm": 0.6999391913414001, + "learning_rate": 0.0001425050847457627, + "loss": 0.3176, "step": 16000 }, { "epoch": 3.097173828881146, - "eval_loss": 0.4161028265953064, - "eval_runtime": 154.597, - "eval_samples_per_second": 36.585, - "eval_steps_per_second": 4.573, - "eval_wer": 0.30901445972621205, + "eval_loss": 0.4407601058483124, + "eval_runtime": 164.7219, + "eval_samples_per_second": 34.337, + "eval_steps_per_second": 4.292, + "eval_wer": 0.3335366147229221, "step": 16000 }, { "epoch": 3.116531165311653, - "eval_loss": 0.4183988571166992, - "eval_runtime": 155.0124, - "eval_samples_per_second": 36.487, - "eval_steps_per_second": 4.561, - "eval_wer": 0.307152830158399, + "eval_loss": 0.436761736869812, + "eval_runtime": 164.2581, + "eval_samples_per_second": 34.434, + "eval_steps_per_second": 4.304, + "eval_wer": 0.33584760315193146, "step": 16100 }, { "epoch": 3.1358885017421603, - "eval_loss": 0.4186756908893585, - "eval_runtime": 154.8535, - "eval_samples_per_second": 36.525, - "eval_steps_per_second": 4.566, - "eval_wer": 0.3060936271284364, + "eval_loss": 0.4477560520172119, + "eval_runtime": 163.8979, + "eval_samples_per_second": 34.509, + "eval_steps_per_second": 4.314, + "eval_wer": 0.3400523182102678, "step": 16200 }, { "epoch": 3.1552458381726676, - "eval_loss": 0.4193824827671051, - "eval_runtime": 154.3195, - "eval_samples_per_second": 36.651, - "eval_steps_per_second": 4.581, - "eval_wer": 0.3059652388823803, + "eval_loss": 0.4414171576499939, + "eval_runtime": 164.4332, + "eval_samples_per_second": 34.397, + "eval_steps_per_second": 4.3, + "eval_wer": 0.3373882621046043, "step": 16300 }, { "epoch": 3.1746031746031744, - "eval_loss": 0.40788766741752625, - "eval_runtime": 154.9673, - "eval_samples_per_second": 36.498, - "eval_steps_per_second": 4.562, - "eval_wer": 0.3038949784147261, + "eval_loss": 0.4476623833179474, + "eval_runtime": 163.6322, + "eval_samples_per_second": 34.565, + "eval_steps_per_second": 4.321, + "eval_wer": 0.335013079552567, "step": 16400 }, { "epoch": 3.1939605110336817, - "grad_norm": 0.5594165325164795, - "learning_rate": 5.4076923076923074e-05, - "loss": 0.2802, + "grad_norm": 0.5408484935760498, + "learning_rate": 0.00013742033898305083, + "loss": 0.3201, "step": 16500 }, { "epoch": 3.1939605110336817, - "eval_loss": 0.41461309790611267, - "eval_runtime": 154.8662, - "eval_samples_per_second": 36.522, - "eval_steps_per_second": 4.565, - "eval_wer": 0.30424804609138034, + "eval_loss": 0.4305751919746399, + "eval_runtime": 163.8253, + "eval_samples_per_second": 34.525, + "eval_steps_per_second": 4.316, + "eval_wer": 0.32917141435701563, "step": 16500 }, { "epoch": 3.213317847464189, - "eval_loss": 0.4168522357940674, - "eval_runtime": 155.0374, - "eval_samples_per_second": 36.482, - "eval_steps_per_second": 4.56, - "eval_wer": 0.30116672818603457, + "eval_loss": 0.4534677267074585, + "eval_runtime": 163.8666, + "eval_samples_per_second": 34.516, + "eval_steps_per_second": 4.314, + "eval_wer": 0.32941214231837074, "step": 16600 }, { "epoch": 3.2326751838946963, - "eval_loss": 0.40926745533943176, - "eval_runtime": 154.7423, - "eval_samples_per_second": 36.551, - "eval_steps_per_second": 4.569, - "eval_wer": 0.3023864165235673, + "eval_loss": 0.4379562437534332, + "eval_runtime": 164.1184, + "eval_samples_per_second": 34.463, + "eval_steps_per_second": 4.308, + "eval_wer": 0.33408226476866043, "step": 16700 }, { "epoch": 3.252032520325203, - "eval_loss": 0.4115259051322937, - "eval_runtime": 154.7933, - "eval_samples_per_second": 36.539, - "eval_steps_per_second": 4.567, - "eval_wer": 0.3005408354865112, + "eval_loss": 0.43677663803100586, + "eval_runtime": 163.6899, + "eval_samples_per_second": 34.553, + "eval_steps_per_second": 4.319, + "eval_wer": 0.33252555728523053, "step": 16800 }, { "epoch": 3.2713898567557105, - "eval_loss": 0.40197211503982544, - "eval_runtime": 155.5964, - "eval_samples_per_second": 36.35, - "eval_steps_per_second": 4.544, - "eval_wer": 0.30410360931456726, + "eval_loss": 0.4359833598136902, + "eval_runtime": 164.1551, + "eval_samples_per_second": 34.455, + "eval_steps_per_second": 4.307, + "eval_wer": 0.33043924828681936, "step": 16900 }, { "epoch": 3.290747193186218, - "grad_norm": 1.4730154275894165, - "learning_rate": 4.6384615384615385e-05, - "loss": 0.2723, + "grad_norm": 0.5537161231040955, + "learning_rate": 0.00013233559322033898, + "loss": 0.3101, "step": 17000 }, { "epoch": 3.290747193186218, - "eval_loss": 0.4058869779109955, - "eval_runtime": 155.0898, - "eval_samples_per_second": 36.469, - "eval_steps_per_second": 4.559, - "eval_wer": 0.30442457992970745, + "eval_loss": 0.4347226917743683, + "eval_runtime": 164.2964, + "eval_samples_per_second": 34.426, + "eval_steps_per_second": 4.303, + "eval_wer": 0.32812825985781, "step": 17000 }, { "epoch": 3.3101045296167246, - "eval_loss": 0.40676185488700867, - "eval_runtime": 155.0576, - "eval_samples_per_second": 36.477, - "eval_steps_per_second": 4.56, - "eval_wer": 0.3013753590858757, + "eval_loss": 0.4375491738319397, + "eval_runtime": 162.8938, + "eval_samples_per_second": 34.722, + "eval_steps_per_second": 4.34, + "eval_wer": 0.3284973760652212, "step": 17100 }, { "epoch": 3.329461866047232, - "eval_loss": 0.40653425455093384, - "eval_runtime": 155.8377, - "eval_samples_per_second": 36.294, - "eval_steps_per_second": 4.537, - "eval_wer": 0.30878978029561394, + "eval_loss": 0.4491961896419525, + "eval_runtime": 163.5698, + "eval_samples_per_second": 34.579, + "eval_steps_per_second": 4.322, + "eval_wer": 0.33032690857152025, "step": 17200 }, { "epoch": 3.3488192024777392, - "eval_loss": 0.4082197844982147, - "eval_runtime": 155.7924, - "eval_samples_per_second": 36.305, - "eval_steps_per_second": 4.538, - "eval_wer": 0.3010543884707355, + "eval_loss": 0.4268127977848053, + "eval_runtime": 164.9468, + "eval_samples_per_second": 34.29, + "eval_steps_per_second": 4.286, + "eval_wer": 0.3284652790037072, "step": 17300 }, { "epoch": 3.368176538908246, - "eval_loss": 0.4083554446697235, - "eval_runtime": 155.6775, - "eval_samples_per_second": 36.332, - "eval_steps_per_second": 4.541, - "eval_wer": 0.3007494663863523, + "eval_loss": 0.4377237558364868, + "eval_runtime": 164.0847, + "eval_samples_per_second": 34.47, + "eval_steps_per_second": 4.309, + "eval_wer": 0.3269888141740624, "step": 17400 }, { "epoch": 3.3875338753387534, - "grad_norm": 0.5211097598075867, - "learning_rate": 3.87076923076923e-05, - "loss": 0.2557, + "grad_norm": 0.5330023765563965, + "learning_rate": 0.00012726101694915254, + "loss": 0.2963, "step": 17500 }, { "epoch": 3.3875338753387534, - "eval_loss": 0.4009736180305481, - "eval_runtime": 155.105, - "eval_samples_per_second": 36.466, - "eval_steps_per_second": 4.558, - "eval_wer": 0.29924090449519347, + "eval_loss": 0.42494186758995056, + "eval_runtime": 163.0698, + "eval_samples_per_second": 34.685, + "eval_steps_per_second": 4.336, + "eval_wer": 0.3322527322623614, "step": 17500 }, { "epoch": 3.4068912117692607, - "eval_loss": 0.4061805009841919, - "eval_runtime": 154.8792, - "eval_samples_per_second": 36.519, - "eval_steps_per_second": 4.565, - "eval_wer": 0.2999951854407729, + "eval_loss": 0.4404699206352234, + "eval_runtime": 164.4408, + "eval_samples_per_second": 34.395, + "eval_steps_per_second": 4.299, + "eval_wer": 0.3338736338688193, "step": 17600 }, { "epoch": 3.4262485481997675, - "eval_loss": 0.40264037251472473, - "eval_runtime": 155.9957, - "eval_samples_per_second": 36.257, - "eval_steps_per_second": 4.532, - "eval_wer": 0.2980533132191748, + "eval_loss": 0.43636277318000793, + "eval_runtime": 163.9679, + "eval_samples_per_second": 34.495, + "eval_steps_per_second": 4.312, + "eval_wer": 0.3285615701882493, "step": 17700 }, { "epoch": 3.445605884630275, - "eval_loss": 0.40035372972488403, - "eval_runtime": 155.0928, - "eval_samples_per_second": 36.468, - "eval_steps_per_second": 4.559, - "eval_wer": 0.29893598241081026, + "eval_loss": 0.4350505769252777, + "eval_runtime": 163.6477, + "eval_samples_per_second": 34.562, + "eval_steps_per_second": 4.32, + "eval_wer": 0.3309207042095296, "step": 17800 }, { "epoch": 3.464963221060782, - "eval_loss": 0.40443336963653564, - "eval_runtime": 154.9305, - "eval_samples_per_second": 36.507, - "eval_steps_per_second": 4.563, - "eval_wer": 0.29906437065686636, + "eval_loss": 0.42997920513153076, + "eval_runtime": 163.7592, + "eval_samples_per_second": 34.539, + "eval_steps_per_second": 4.317, + "eval_wer": 0.322880390300268, "step": 17900 }, { "epoch": 3.484320557491289, - "grad_norm": 0.7458967566490173, - "learning_rate": 3.101538461538461e-05, - "loss": 0.2578, + "grad_norm": 0.31616127490997314, + "learning_rate": 0.00012217627118644067, + "loss": 0.3062, "step": 18000 }, { "epoch": 3.484320557491289, - "eval_loss": 0.4003549814224243, - "eval_runtime": 155.7394, - "eval_samples_per_second": 36.317, - "eval_steps_per_second": 4.54, - "eval_wer": 0.29660894545104394, + "eval_loss": 0.42307358980178833, + "eval_runtime": 164.6469, + "eval_samples_per_second": 34.352, + "eval_steps_per_second": 4.294, + "eval_wer": 0.32523952432154835, "step": 18000 }, { "epoch": 3.5036778939217963, - "eval_loss": 0.40592488646507263, - "eval_runtime": 159.2644, - "eval_samples_per_second": 35.513, - "eval_steps_per_second": 4.439, - "eval_wer": 0.29449053939111874, + "eval_loss": 0.4325993061065674, + "eval_runtime": 164.1143, + "eval_samples_per_second": 34.464, + "eval_steps_per_second": 4.308, + "eval_wer": 0.32326555503843624, "step": 18100 }, { "epoch": 3.5230352303523036, - "eval_loss": 0.4014962613582611, - "eval_runtime": 155.6654, - "eval_samples_per_second": 36.334, - "eval_steps_per_second": 4.542, - "eval_wer": 0.29632007189741777, + "eval_loss": 0.43141353130340576, + "eval_runtime": 163.522, + "eval_samples_per_second": 34.589, + "eval_steps_per_second": 4.324, + "eval_wer": 0.3282405995731091, "step": 18200 }, { "epoch": 3.5423925667828104, - "eval_loss": 0.396659791469574, - "eval_runtime": 156.1536, - "eval_samples_per_second": 36.221, - "eval_steps_per_second": 4.528, - "eval_wer": 0.29585466450546455, + "eval_loss": 0.4343957006931305, + "eval_runtime": 163.8089, + "eval_samples_per_second": 34.528, + "eval_steps_per_second": 4.316, + "eval_wer": 0.32894673492641746, "step": 18300 }, { "epoch": 3.5617499032133177, - "eval_loss": 0.4001907706260681, - "eval_runtime": 155.7578, - "eval_samples_per_second": 36.313, - "eval_steps_per_second": 4.539, - "eval_wer": 0.29412142318370754, + "eval_loss": 0.42664915323257446, + "eval_runtime": 166.5849, + "eval_samples_per_second": 33.953, + "eval_steps_per_second": 4.244, + "eval_wer": 0.32207796376241754, "step": 18400 }, { "epoch": 3.581107239643825, - "grad_norm": 0.6122294664382935, - "learning_rate": 2.3338461538461535e-05, - "loss": 0.2508, + "grad_norm": 0.40817028284072876, + "learning_rate": 0.00011710169491525424, + "loss": 0.2968, "step": 18500 }, { "epoch": 3.581107239643825, - "eval_loss": 0.39826107025146484, - "eval_runtime": 155.467, - "eval_samples_per_second": 36.381, - "eval_steps_per_second": 4.548, - "eval_wer": 0.2945547335141468, + "eval_loss": 0.4305819571018219, + "eval_runtime": 164.3676, + "eval_samples_per_second": 34.411, + "eval_steps_per_second": 4.301, + "eval_wer": 0.32161255637046426, "step": 18500 }, { "epoch": 3.600464576074332, - "eval_loss": 0.3958674967288971, - "eval_runtime": 155.8242, - "eval_samples_per_second": 36.297, - "eval_steps_per_second": 4.537, - "eval_wer": 0.29365601579175427, + "eval_loss": 0.4318545460700989, + "eval_runtime": 166.4377, + "eval_samples_per_second": 33.983, + "eval_steps_per_second": 4.248, + "eval_wer": 0.3238914477379596, "step": 18600 }, { "epoch": 3.619821912504839, - "eval_loss": 0.3970955014228821, - "eval_runtime": 155.4329, - "eval_samples_per_second": 36.389, - "eval_steps_per_second": 4.549, - "eval_wer": 0.2942016658374926, + "eval_loss": 0.4271145164966583, + "eval_runtime": 166.5812, + "eval_samples_per_second": 33.953, + "eval_steps_per_second": 4.244, + "eval_wer": 0.3232013609154082, "step": 18700 }, { "epoch": 3.6391792489353465, - "eval_loss": 0.3906669616699219, - "eval_runtime": 155.4929, - "eval_samples_per_second": 36.375, - "eval_steps_per_second": 4.547, - "eval_wer": 0.2923239877389225, + "eval_loss": 0.41837719082832336, + "eval_runtime": 164.5624, + "eval_samples_per_second": 34.37, + "eval_steps_per_second": 4.296, + "eval_wer": 0.32641106706681006, "step": 18800 }, { "epoch": 3.658536585365854, - "eval_loss": 0.39506247639656067, - "eval_runtime": 155.5246, - "eval_samples_per_second": 36.367, - "eval_steps_per_second": 4.546, - "eval_wer": 0.2903981640480814, + "eval_loss": 0.4237981140613556, + "eval_runtime": 165.5252, + "eval_samples_per_second": 34.17, + "eval_steps_per_second": 4.271, + "eval_wer": 0.31997560623324933, "step": 18900 }, { "epoch": 3.6778939217963607, - "grad_norm": 0.33715635538101196, - "learning_rate": 1.5646153846153846e-05, - "loss": 0.2659, + "grad_norm": 0.9548519253730774, + "learning_rate": 0.00011201694915254236, + "loss": 0.3191, "step": 19000 }, { "epoch": 3.6778939217963607, - "eval_loss": 0.3892674744129181, - "eval_runtime": 155.5533, - "eval_samples_per_second": 36.361, - "eval_steps_per_second": 4.545, - "eval_wer": 0.29309431721525897, + "eval_loss": 0.41389960050582886, + "eval_runtime": 163.7093, + "eval_samples_per_second": 34.549, + "eval_steps_per_second": 4.319, + "eval_wer": 0.3225915167466418, "step": 19000 }, { "epoch": 3.697251258226868, - "eval_loss": 0.39077267050743103, - "eval_runtime": 155.448, - "eval_samples_per_second": 36.385, - "eval_steps_per_second": 4.548, - "eval_wer": 0.2900771934329412, + "eval_loss": 0.42384064197540283, + "eval_runtime": 164.2841, + "eval_samples_per_second": 34.428, + "eval_steps_per_second": 4.304, + "eval_wer": 0.3159955706055111, "step": 19100 }, { "epoch": 3.7166085946573753, - "eval_loss": 0.39407432079315186, - "eval_runtime": 155.4696, - "eval_samples_per_second": 36.38, - "eval_steps_per_second": 4.548, - "eval_wer": 0.2884241947649693, + "eval_loss": 0.4176156520843506, + "eval_runtime": 163.8287, + "eval_samples_per_second": 34.524, + "eval_steps_per_second": 4.315, + "eval_wer": 0.319301567941455, "step": 19200 }, { "epoch": 3.7359659310878826, - "eval_loss": 0.3924821615219116, - "eval_runtime": 155.4791, - "eval_samples_per_second": 36.378, - "eval_steps_per_second": 4.547, - "eval_wer": 0.2890019418722216, + "eval_loss": 0.4196203351020813, + "eval_runtime": 165.0023, + "eval_samples_per_second": 34.278, + "eval_steps_per_second": 4.285, + "eval_wer": 0.3202644797868755, "step": 19300 }, { "epoch": 3.7553232675183894, - "eval_loss": 0.3916691243648529, - "eval_runtime": 155.9516, - "eval_samples_per_second": 36.268, - "eval_steps_per_second": 4.533, - "eval_wer": 0.2892908154258478, + "eval_loss": 0.409524530172348, + "eval_runtime": 164.4937, + "eval_samples_per_second": 34.384, + "eval_steps_per_second": 4.298, + "eval_wer": 0.3181621222577073, "step": 19400 }, { "epoch": 3.7746806039488967, - "grad_norm": 0.4647356867790222, - "learning_rate": 7.953846153846153e-06, - "loss": 0.2488, + "grad_norm": 0.43373510241508484, + "learning_rate": 0.00010693220338983049, + "loss": 0.2921, "step": 19500 }, { "epoch": 3.7746806039488967, - "eval_loss": 0.39043277502059937, - "eval_runtime": 155.3552, - "eval_samples_per_second": 36.407, - "eval_steps_per_second": 4.551, - "eval_wer": 0.2884562918264833, + "eval_loss": 0.41209807991981506, + "eval_runtime": 164.8329, + "eval_samples_per_second": 34.314, + "eval_steps_per_second": 4.289, + "eval_wer": 0.31665356036654846, "step": 19500 }, { "epoch": 3.794037940379404, - "eval_loss": 0.39014604687690735, - "eval_runtime": 155.2137, - "eval_samples_per_second": 36.44, - "eval_steps_per_second": 4.555, - "eval_wer": 0.2887933109723805, + "eval_loss": 0.4112759530544281, + "eval_runtime": 164.1863, + "eval_samples_per_second": 34.449, + "eval_steps_per_second": 4.306, + "eval_wer": 0.31455120283738025, "step": 19600 }, { "epoch": 3.813395276809911, - "eval_loss": 0.3883425295352936, - "eval_runtime": 155.5369, - "eval_samples_per_second": 36.364, - "eval_steps_per_second": 4.546, - "eval_wer": 0.28922662130281973, + "eval_loss": 0.4094259738922119, + "eval_runtime": 164.4841, + "eval_samples_per_second": 34.386, + "eval_steps_per_second": 4.298, + "eval_wer": 0.3160758132592961, "step": 19700 }, { "epoch": 3.832752613240418, - "eval_loss": 0.38913780450820923, - "eval_runtime": 155.8958, - "eval_samples_per_second": 36.281, - "eval_steps_per_second": 4.535, - "eval_wer": 0.28903403893373564, + "eval_loss": 0.40931811928749084, + "eval_runtime": 164.2911, + "eval_samples_per_second": 34.427, + "eval_steps_per_second": 4.303, + "eval_wer": 0.31386111601482886, "step": 19800 }, { "epoch": 3.8521099496709255, - "eval_loss": 0.3888201415538788, - "eval_runtime": 155.6372, - "eval_samples_per_second": 36.341, - "eval_steps_per_second": 4.543, - "eval_wer": 0.2888254080338945, + "eval_loss": 0.41117748618125916, + "eval_runtime": 165.1732, + "eval_samples_per_second": 34.243, + "eval_steps_per_second": 4.28, + "eval_wer": 0.31731155012758583, "step": 19900 }, { "epoch": 3.8714672861014323, - "grad_norm": 0.3741956055164337, - "learning_rate": 2.615384615384615e-07, - "loss": 0.2602, + "grad_norm": 0.5022397637367249, + "learning_rate": 0.00010184745762711863, + "loss": 0.3007, "step": 20000 }, { "epoch": 3.8714672861014323, - "eval_loss": 0.38884833455085754, - "eval_runtime": 155.0772, - "eval_samples_per_second": 36.472, - "eval_steps_per_second": 4.559, - "eval_wer": 0.2884883888879973, + "eval_loss": 0.4092504680156708, + "eval_runtime": 163.9434, + "eval_samples_per_second": 34.5, + "eval_steps_per_second": 4.312, + "eval_wer": 0.31593137648248304, "step": 20000 }, { - "epoch": 3.8714672861014323, - "step": 20000, - "total_flos": 2.249387574100498e+19, - "train_loss": 0.15996522521972656, - "train_runtime": 19346.8732, - "train_samples_per_second": 8.27, - "train_steps_per_second": 1.034 + "epoch": 3.8908246225319396, + "eval_loss": 0.4147598147392273, + "eval_runtime": 164.6303, + "eval_samples_per_second": 34.356, + "eval_steps_per_second": 4.294, + "eval_wer": 0.31565855145961386, + "step": 20100 + }, + { + "epoch": 3.910181958962447, + "eval_loss": 0.41137251257896423, + "eval_runtime": 164.6634, + "eval_samples_per_second": 34.349, + "eval_steps_per_second": 4.294, + "eval_wer": 0.3150326587600905, + "step": 20200 + }, + { + "epoch": 3.9295392953929538, + "eval_loss": 0.4155375361442566, + "eval_runtime": 164.3406, + "eval_samples_per_second": 34.416, + "eval_steps_per_second": 4.302, + "eval_wer": 0.31456725136813724, + "step": 20300 + }, + { + "epoch": 3.948896631823461, + "eval_loss": 0.4075925648212433, + "eval_runtime": 164.3692, + "eval_samples_per_second": 34.41, + "eval_steps_per_second": 4.301, + "eval_wer": 0.3135722424612027, + "step": 20400 + }, + { + "epoch": 3.9682539682539684, + "grad_norm": 0.6109060049057007, + "learning_rate": 9.67728813559322e-05, + "loss": 0.296, + "step": 20500 + }, + { + "epoch": 3.9682539682539684, + "eval_loss": 0.4066578149795532, + "eval_runtime": 164.7852, + "eval_samples_per_second": 34.323, + "eval_steps_per_second": 4.29, + "eval_wer": 0.3125611850235111, + "step": 20500 + }, + { + "epoch": 3.987611304684475, + "eval_loss": 0.40839362144470215, + "eval_runtime": 164.8883, + "eval_samples_per_second": 34.302, + "eval_steps_per_second": 4.288, + "eval_wer": 0.3150487072908475, + "step": 20600 + }, + { + "epoch": 4.006968641114983, + "eval_loss": 0.4150494635105133, + "eval_runtime": 164.1525, + "eval_samples_per_second": 34.456, + "eval_steps_per_second": 4.307, + "eval_wer": 0.312432796777455, + "step": 20700 + }, + { + "epoch": 4.02632597754549, + "eval_loss": 0.41322341561317444, + "eval_runtime": 164.6726, + "eval_samples_per_second": 34.347, + "eval_steps_per_second": 4.293, + "eval_wer": 0.3132512718460625, + "step": 20800 + }, + { + "epoch": 4.045683313975997, + "eval_loss": 0.4182606339454651, + "eval_runtime": 164.9667, + "eval_samples_per_second": 34.286, + "eval_steps_per_second": 4.286, + "eval_wer": 0.31464749402192227, + "step": 20900 + }, + { + "epoch": 4.065040650406504, + "grad_norm": 0.9771650433540344, + "learning_rate": 9.168813559322032e-05, + "loss": 0.2611, + "step": 21000 + }, + { + "epoch": 4.065040650406504, + "eval_loss": 0.41840454936027527, + "eval_runtime": 164.8893, + "eval_samples_per_second": 34.302, + "eval_steps_per_second": 4.288, + "eval_wer": 0.30952801271043634, + "step": 21000 + }, + { + "epoch": 4.084397986837011, + "eval_loss": 0.4167742431163788, + "eval_runtime": 165.1089, + "eval_samples_per_second": 34.256, + "eval_steps_per_second": 4.282, + "eval_wer": 0.30845276114971676, + "step": 21100 + }, + { + "epoch": 4.103755323267518, + "eval_loss": 0.42244288325309753, + "eval_runtime": 164.9167, + "eval_samples_per_second": 34.296, + "eval_steps_per_second": 4.287, + "eval_wer": 0.31015390540995974, + "step": 21200 + }, + { + "epoch": 4.123112659698026, + "eval_loss": 0.4187394678592682, + "eval_runtime": 164.7166, + "eval_samples_per_second": 34.338, + "eval_steps_per_second": 4.292, + "eval_wer": 0.30456901670652053, + "step": 21300 + }, + { + "epoch": 4.142469996128533, + "eval_loss": 0.41454723477363586, + "eval_runtime": 164.38, + "eval_samples_per_second": 34.408, + "eval_steps_per_second": 4.301, + "eval_wer": 0.3110044775400812, + "step": 21400 + }, + { + "epoch": 4.16182733255904, + "grad_norm": 0.8976078629493713, + "learning_rate": 8.660338983050847e-05, + "loss": 0.2431, + "step": 21500 + }, + { + "epoch": 4.16182733255904, + "eval_loss": 0.42720434069633484, + "eval_runtime": 165.0533, + "eval_samples_per_second": 34.268, + "eval_steps_per_second": 4.283, + "eval_wer": 0.31071560398645504, + "step": 21500 + }, + { + "epoch": 4.181184668989547, + "eval_loss": 0.41736435890197754, + "eval_runtime": 164.6627, + "eval_samples_per_second": 34.349, + "eval_steps_per_second": 4.294, + "eval_wer": 0.3069923448508289, + "step": 21600 + }, + { + "epoch": 4.200542005420054, + "eval_loss": 0.41904589533805847, + "eval_runtime": 165.6169, + "eval_samples_per_second": 34.151, + "eval_steps_per_second": 4.269, + "eval_wer": 0.3085811493957728, + "step": 21700 + }, + { + "epoch": 4.219899341850561, + "eval_loss": 0.41643446683883667, + "eval_runtime": 165.1417, + "eval_samples_per_second": 34.249, + "eval_steps_per_second": 4.281, + "eval_wer": 0.3050825696907448, + "step": 21800 + }, + { + "epoch": 4.239256678281069, + "eval_loss": 0.41955476999282837, + "eval_runtime": 165.2591, + "eval_samples_per_second": 34.225, + "eval_steps_per_second": 4.278, + "eval_wer": 0.30777872285792235, + "step": 21900 + }, + { + "epoch": 4.258614014711576, + "grad_norm": 1.5854851007461548, + "learning_rate": 8.15186440677966e-05, + "loss": 0.2453, + "step": 22000 + }, + { + "epoch": 4.258614014711576, + "eval_loss": 0.42485129833221436, + "eval_runtime": 164.7312, + "eval_samples_per_second": 34.335, + "eval_steps_per_second": 4.292, + "eval_wer": 0.30915889650302514, + "step": 22000 + }, + { + "epoch": 4.2779713511420825, + "eval_loss": 0.4246067404747009, + "eval_runtime": 164.9099, + "eval_samples_per_second": 34.298, + "eval_steps_per_second": 4.287, + "eval_wer": 0.30736146105824014, + "step": 22100 + }, + { + "epoch": 4.29732868757259, + "eval_loss": 0.4166228771209717, + "eval_runtime": 164.9564, + "eval_samples_per_second": 34.288, + "eval_steps_per_second": 4.286, + "eval_wer": 0.30740960665051115, + "step": 22200 + }, + { + "epoch": 4.316686024003097, + "eval_loss": 0.4192067086696625, + "eval_runtime": 165.6591, + "eval_samples_per_second": 34.142, + "eval_steps_per_second": 4.268, + "eval_wer": 0.3027555327309785, + "step": 22300 + }, + { + "epoch": 4.336043360433604, + "eval_loss": 0.41863906383514404, + "eval_runtime": 164.9558, + "eval_samples_per_second": 34.288, + "eval_steps_per_second": 4.286, + "eval_wer": 0.3020975429699411, + "step": 22400 + }, + { + "epoch": 4.355400696864112, + "grad_norm": 1.1900339126586914, + "learning_rate": 7.645423728813559e-05, + "loss": 0.2336, + "step": 22500 + }, + { + "epoch": 4.355400696864112, + "eval_loss": 0.4268459677696228, + "eval_runtime": 166.7137, + "eval_samples_per_second": 33.926, + "eval_steps_per_second": 4.241, + "eval_wer": 0.3083885670266887, + "step": 22500 + }, + { + "epoch": 4.3747580332946185, + "eval_loss": 0.4346672296524048, + "eval_runtime": 170.3751, + "eval_samples_per_second": 33.197, + "eval_steps_per_second": 4.15, + "eval_wer": 0.307104684566128, + "step": 22600 + }, + { + "epoch": 4.394115369725126, + "eval_loss": 0.47525468468666077, + "eval_runtime": 164.9807, + "eval_samples_per_second": 34.283, + "eval_steps_per_second": 4.285, + "eval_wer": 0.3208582754248849, + "step": 22700 + }, + { + "epoch": 4.413472706155633, + "eval_loss": 0.582381546497345, + "eval_runtime": 165.1397, + "eval_samples_per_second": 34.25, + "eval_steps_per_second": 4.281, + "eval_wer": 0.415416218645183, + "step": 22800 + }, + { + "epoch": 4.43283004258614, + "eval_loss": 0.5073803067207336, + "eval_runtime": 165.3352, + "eval_samples_per_second": 34.209, + "eval_steps_per_second": 4.276, + "eval_wer": 0.3415448315706697, + "step": 22900 + }, + { + "epoch": 4.452187379016648, + "grad_norm": 1.5807456970214844, + "learning_rate": 7.136949152542373e-05, + "loss": 0.3426, + "step": 23000 + }, + { + "epoch": 4.452187379016648, + "eval_loss": 0.6242379546165466, + "eval_runtime": 164.8642, + "eval_samples_per_second": 34.307, + "eval_steps_per_second": 4.288, + "eval_wer": 0.41979746754184655, + "step": 23000 + }, + { + "epoch": 4.471544715447155, + "eval_loss": 0.5862211585044861, + "eval_runtime": 164.8283, + "eval_samples_per_second": 34.314, + "eval_steps_per_second": 4.289, + "eval_wer": 0.4200702925647157, + "step": 23100 + }, + { + "epoch": 4.4909020518776614, + "eval_loss": 0.6151086091995239, + "eval_runtime": 165.0995, + "eval_samples_per_second": 34.258, + "eval_steps_per_second": 4.282, + "eval_wer": 0.39638266116737014, + "step": 23200 + }, + { + "epoch": 4.510259388308169, + "eval_loss": 0.5640283226966858, + "eval_runtime": 164.849, + "eval_samples_per_second": 34.31, + "eval_steps_per_second": 4.289, + "eval_wer": 0.3685705573654732, + "step": 23300 + }, + { + "epoch": 4.529616724738676, + "eval_loss": 0.6589744091033936, + "eval_runtime": 164.8194, + "eval_samples_per_second": 34.316, + "eval_steps_per_second": 4.29, + "eval_wer": 0.4647494021922293, + "step": 23400 + }, + { + "epoch": 4.548974061169183, + "grad_norm": 1.0218427181243896, + "learning_rate": 6.628474576271186e-05, + "loss": 0.4541, + "step": 23500 + }, + { + "epoch": 4.548974061169183, + "eval_loss": 0.6010532975196838, + "eval_runtime": 165.0253, + "eval_samples_per_second": 34.274, + "eval_steps_per_second": 4.284, + "eval_wer": 0.3959974964292019, + "step": 23500 + }, + { + "epoch": 4.568331397599691, + "eval_loss": 0.5802894830703735, + "eval_runtime": 166.0838, + "eval_samples_per_second": 34.055, + "eval_steps_per_second": 4.257, + "eval_wer": 0.39505063311453836, + "step": 23600 + }, + { + "epoch": 4.5876887340301975, + "eval_loss": 0.5762883424758911, + "eval_runtime": 165.1308, + "eval_samples_per_second": 34.252, + "eval_steps_per_second": 4.281, + "eval_wer": 0.3910545489560431, + "step": 23700 + }, + { + "epoch": 4.607046070460704, + "eval_loss": 0.5418487787246704, + "eval_runtime": 165.1407, + "eval_samples_per_second": 34.25, + "eval_steps_per_second": 4.281, + "eval_wer": 0.36550528799088444, + "step": 23800 + }, + { + "epoch": 4.626403406891212, + "eval_loss": 0.5546759366989136, + "eval_runtime": 165.8678, + "eval_samples_per_second": 34.099, + "eval_steps_per_second": 4.262, + "eval_wer": 0.38877565758854776, + "step": 23900 + }, + { + "epoch": 4.645760743321719, + "grad_norm": 32.50680923461914, + "learning_rate": 6.12e-05, + "loss": 0.4145, + "step": 24000 + }, + { + "epoch": 4.645760743321719, + "eval_loss": 0.5300523638725281, + "eval_runtime": 164.9724, + "eval_samples_per_second": 34.285, + "eval_steps_per_second": 4.286, + "eval_wer": 0.3608030684790807, + "step": 24000 + }, + { + "epoch": 4.665118079752226, + "eval_loss": 0.573882520198822, + "eval_runtime": 165.2226, + "eval_samples_per_second": 34.233, + "eval_steps_per_second": 4.279, + "eval_wer": 0.39927139670363176, + "step": 24100 + }, + { + "epoch": 4.6844754161827336, + "eval_loss": 0.5775899887084961, + "eval_runtime": 165.3691, + "eval_samples_per_second": 34.202, + "eval_steps_per_second": 4.275, + "eval_wer": 0.39816404808139816, + "step": 24200 + }, + { + "epoch": 4.70383275261324, + "eval_loss": 0.5412492156028748, + "eval_runtime": 164.9818, + "eval_samples_per_second": 34.283, + "eval_steps_per_second": 4.285, + "eval_wer": 0.37078525460994044, + "step": 24300 + }, + { + "epoch": 4.723190089043747, + "eval_loss": 0.5329325199127197, + "eval_runtime": 165.8065, + "eval_samples_per_second": 34.112, + "eval_steps_per_second": 4.264, + "eval_wer": 0.37044823546404326, + "step": 24400 + }, + { + "epoch": 4.742547425474255, + "grad_norm": 1.8765805959701538, + "learning_rate": 5.611525423728813e-05, + "loss": 0.3834, + "step": 24500 + }, + { + "epoch": 4.742547425474255, + "eval_loss": 0.5299070477485657, + "eval_runtime": 165.3917, + "eval_samples_per_second": 34.198, + "eval_steps_per_second": 4.275, + "eval_wer": 0.3732246312850059, + "step": 24500 + }, + { + "epoch": 4.761904761904762, + "eval_loss": 0.5424681901931763, + "eval_runtime": 165.4071, + "eval_samples_per_second": 34.194, + "eval_steps_per_second": 4.274, + "eval_wer": 0.3928519844008281, + "step": 24600 + }, + { + "epoch": 4.781262098335269, + "eval_loss": 0.5111268758773804, + "eval_runtime": 165.4914, + "eval_samples_per_second": 34.177, + "eval_steps_per_second": 4.272, + "eval_wer": 0.3585241771115854, + "step": 24700 + }, + { + "epoch": 4.8006194347657765, + "eval_loss": 0.5076457858085632, + "eval_runtime": 165.6732, + "eval_samples_per_second": 34.14, + "eval_steps_per_second": 4.267, + "eval_wer": 0.35033942642551075, + "step": 24800 + }, + { + "epoch": 4.819976771196283, + "eval_loss": 0.5261921882629395, + "eval_runtime": 165.2946, + "eval_samples_per_second": 34.218, + "eval_steps_per_second": 4.277, + "eval_wer": 0.3681372470350339, + "step": 24900 + }, + { + "epoch": 4.83933410762679, + "grad_norm": 5.934371471405029, + "learning_rate": 5.1030508474576264e-05, + "loss": 0.3719, + "step": 25000 + }, + { + "epoch": 4.83933410762679, + "eval_loss": 0.547415018081665, + "eval_runtime": 165.6997, + "eval_samples_per_second": 34.134, + "eval_steps_per_second": 4.267, + "eval_wer": 0.3833031086004076, + "step": 25000 + }, + { + "epoch": 4.858691444057298, + "eval_loss": 0.5746738910675049, + "eval_runtime": 165.8407, + "eval_samples_per_second": 34.105, + "eval_steps_per_second": 4.263, + "eval_wer": 0.40389337356165045, + "step": 25100 + }, + { + "epoch": 4.878048780487805, + "eval_loss": 0.5188133120536804, + "eval_runtime": 165.5746, + "eval_samples_per_second": 34.16, + "eval_steps_per_second": 4.27, + "eval_wer": 0.3503073293639967, + "step": 25200 + }, + { + "epoch": 4.897406116918312, + "eval_loss": 0.5522667169570923, + "eval_runtime": 165.1011, + "eval_samples_per_second": 34.258, + "eval_steps_per_second": 4.282, + "eval_wer": 0.3865609603440805, + "step": 25300 + }, + { + "epoch": 4.916763453348819, + "eval_loss": 0.5302358865737915, + "eval_runtime": 165.871, + "eval_samples_per_second": 34.099, + "eval_steps_per_second": 4.262, + "eval_wer": 0.36446213349167883, + "step": 25400 + }, + { + "epoch": 4.936120789779326, + "grad_norm": 1.1752023696899414, + "learning_rate": 4.595593220338983e-05, + "loss": 0.3798, + "step": 25500 + }, + { + "epoch": 4.936120789779326, + "eval_loss": 0.5099266767501831, + "eval_runtime": 165.9652, + "eval_samples_per_second": 34.079, + "eval_steps_per_second": 4.26, + "eval_wer": 0.3499542616873425, + "step": 25500 + }, + { + "epoch": 4.955478126209833, + "eval_loss": 0.4823363125324249, + "eval_runtime": 164.8602, + "eval_samples_per_second": 34.308, + "eval_steps_per_second": 4.288, + "eval_wer": 0.33761294153520244, + "step": 25600 + }, + { + "epoch": 4.974835462640341, + "eval_loss": 0.4805842936038971, + "eval_runtime": 166.8028, + "eval_samples_per_second": 33.908, + "eval_steps_per_second": 4.239, + "eval_wer": 0.3357352634366324, + "step": 25700 + }, + { + "epoch": 4.994192799070848, + "eval_loss": 0.4942820370197296, + "eval_runtime": 165.4552, + "eval_samples_per_second": 34.184, + "eval_steps_per_second": 4.273, + "eval_wer": 0.35093322206352007, + "step": 25800 + }, + { + "epoch": 5.013550135501355, + "eval_loss": 0.49528568983078003, + "eval_runtime": 165.947, + "eval_samples_per_second": 34.083, + "eval_steps_per_second": 4.26, + "eval_wer": 0.35245783248543594, + "step": 25900 + }, + { + "epoch": 5.032907471931862, + "grad_norm": 27.542322158813477, + "learning_rate": 4.087118644067796e-05, + "loss": 0.3158, + "step": 26000 + }, + { + "epoch": 5.032907471931862, + "eval_loss": 0.485315203666687, + "eval_runtime": 165.2844, + "eval_samples_per_second": 34.22, + "eval_steps_per_second": 4.277, + "eval_wer": 0.34703342908956686, + "step": 26000 + }, + { + "epoch": 5.052264808362369, + "eval_loss": 0.5204781293869019, + "eval_runtime": 165.773, + "eval_samples_per_second": 34.119, + "eval_steps_per_second": 4.265, + "eval_wer": 0.36183017444752935, + "step": 26100 + }, + { + "epoch": 5.071622144792877, + "eval_loss": 0.5013459920883179, + "eval_runtime": 165.057, + "eval_samples_per_second": 34.267, + "eval_steps_per_second": 4.283, + "eval_wer": 0.3510455617788191, + "step": 26200 + }, + { + "epoch": 5.090979481223384, + "eval_loss": 0.4863474667072296, + "eval_runtime": 165.6964, + "eval_samples_per_second": 34.135, + "eval_steps_per_second": 4.267, + "eval_wer": 0.3396511049413426, + "step": 26300 + }, + { + "epoch": 5.110336817653891, + "eval_loss": 0.47152572870254517, + "eval_runtime": 166.0563, + "eval_samples_per_second": 34.061, + "eval_steps_per_second": 4.258, + "eval_wer": 0.32851342459597827, + "step": 26400 + }, + { + "epoch": 5.129694154084398, + "grad_norm": 0.8464019894599915, + "learning_rate": 3.5786440677966095e-05, + "loss": 0.2993, + "step": 26500 + }, + { + "epoch": 5.129694154084398, + "eval_loss": 0.4816218912601471, + "eval_runtime": 165.4176, + "eval_samples_per_second": 34.192, + "eval_steps_per_second": 4.274, + "eval_wer": 0.33273418818507167, + "step": 26500 + }, + { + "epoch": 5.149051490514905, + "eval_loss": 0.48058804869651794, + "eval_runtime": 166.4075, + "eval_samples_per_second": 33.989, + "eval_steps_per_second": 4.249, + "eval_wer": 0.33811044598866974, + "step": 26600 + }, + { + "epoch": 5.168408826945412, + "eval_loss": 0.4854019284248352, + "eval_runtime": 165.1934, + "eval_samples_per_second": 34.239, + "eval_steps_per_second": 4.28, + "eval_wer": 0.33416250742244547, + "step": 26700 + }, + { + "epoch": 5.18776616337592, + "eval_loss": 0.49545472860336304, + "eval_runtime": 165.6735, + "eval_samples_per_second": 34.139, + "eval_steps_per_second": 4.267, + "eval_wer": 0.3433422670154547, + "step": 26800 + }, + { + "epoch": 5.207123499806427, + "eval_loss": 0.4862872064113617, + "eval_runtime": 165.5277, + "eval_samples_per_second": 34.17, + "eval_steps_per_second": 4.271, + "eval_wer": 0.34337436407696875, + "step": 26900 + }, + { + "epoch": 5.2264808362369335, + "grad_norm": 10.611580848693848, + "learning_rate": 3.0701694915254236e-05, + "loss": 0.2902, + "step": 27000 + }, + { + "epoch": 5.2264808362369335, + "eval_loss": 0.48670876026153564, + "eval_runtime": 165.4986, + "eval_samples_per_second": 34.176, + "eval_steps_per_second": 4.272, + "eval_wer": 0.3448508289066136, + "step": 27000 + }, + { + "epoch": 5.245838172667441, + "eval_loss": 0.4787338674068451, + "eval_runtime": 165.461, + "eval_samples_per_second": 34.183, + "eval_steps_per_second": 4.273, + "eval_wer": 0.33778947537352955, + "step": 27100 + }, + { + "epoch": 5.265195509097948, + "eval_loss": 0.4861724376678467, + "eval_runtime": 165.3459, + "eval_samples_per_second": 34.207, + "eval_steps_per_second": 4.276, + "eval_wer": 0.33793391215034263, + "step": 27200 + }, + { + "epoch": 5.284552845528455, + "eval_loss": 0.4954308271408081, + "eval_runtime": 165.6637, + "eval_samples_per_second": 34.141, + "eval_steps_per_second": 4.268, + "eval_wer": 0.3467927011282117, + "step": 27300 + }, + { + "epoch": 5.303910181958963, + "eval_loss": 0.572640061378479, + "eval_runtime": 165.6804, + "eval_samples_per_second": 34.138, + "eval_steps_per_second": 4.267, + "eval_wer": 0.41416443324613633, + "step": 27400 + }, + { + "epoch": 5.3232675183894695, + "grad_norm": 1.2211335897445679, + "learning_rate": 2.5627118644067793e-05, + "loss": 0.305, + "step": 27500 + }, + { + "epoch": 5.3232675183894695, + "eval_loss": 0.5179979205131531, + "eval_runtime": 165.6208, + "eval_samples_per_second": 34.15, + "eval_steps_per_second": 4.269, + "eval_wer": 0.35735263436632375, + "step": 27500 + }, + { + "epoch": 5.342624854819976, + "eval_loss": 0.4996646046638489, + "eval_runtime": 164.9613, + "eval_samples_per_second": 34.287, + "eval_steps_per_second": 4.286, + "eval_wer": 0.3452038965832678, + "step": 27600 + }, + { + "epoch": 5.361982191250484, + "eval_loss": 0.4949517846107483, + "eval_runtime": 165.643, + "eval_samples_per_second": 34.146, + "eval_steps_per_second": 4.268, + "eval_wer": 0.34130410360931457, + "step": 27700 + }, + { + "epoch": 5.381339527680991, + "eval_loss": 0.5071349143981934, + "eval_runtime": 166.3001, + "eval_samples_per_second": 34.011, + "eval_steps_per_second": 4.251, + "eval_wer": 0.3491999807417631, + "step": 27800 + }, + { + "epoch": 5.400696864111498, + "eval_loss": 0.5095939040184021, + "eval_runtime": 165.4785, + "eval_samples_per_second": 34.18, + "eval_steps_per_second": 4.272, + "eval_wer": 0.3544799473608191, + "step": 27900 + }, + { + "epoch": 5.420054200542006, + "grad_norm": 15.024033546447754, + "learning_rate": 2.054237288135593e-05, + "loss": 0.3163, + "step": 28000 + }, + { + "epoch": 5.420054200542006, + "eval_loss": 0.5129156112670898, + "eval_runtime": 166.1505, + "eval_samples_per_second": 34.041, + "eval_steps_per_second": 4.255, + "eval_wer": 0.3565983534207443, + "step": 28000 + }, + { + "epoch": 5.4394115369725125, + "eval_loss": 0.5067318677902222, + "eval_runtime": 165.9899, + "eval_samples_per_second": 34.074, + "eval_steps_per_second": 4.259, + "eval_wer": 0.3506122514483799, + "step": 28100 + }, + { + "epoch": 5.45876887340302, + "eval_loss": 0.5053198337554932, + "eval_runtime": 165.3351, + "eval_samples_per_second": 34.209, + "eval_steps_per_second": 4.276, + "eval_wer": 0.35000240727961357, + "step": 28200 + }, + { + "epoch": 5.478126209833527, + "eval_loss": 0.5077947974205017, + "eval_runtime": 165.3012, + "eval_samples_per_second": 34.216, + "eval_steps_per_second": 4.277, + "eval_wer": 0.3518640368474266, + "step": 28300 + }, + { + "epoch": 5.497483546264034, + "eval_loss": 0.48453789949417114, + "eval_runtime": 165.2767, + "eval_samples_per_second": 34.221, + "eval_steps_per_second": 4.278, + "eval_wer": 0.3375166503506604, + "step": 28400 + }, + { + "epoch": 5.516840882694542, + "grad_norm": 0.43120303750038147, + "learning_rate": 1.5457627118644067e-05, + "loss": 0.3136, + "step": 28500 + }, + { + "epoch": 5.516840882694542, + "eval_loss": 0.4930485486984253, + "eval_runtime": 165.9777, + "eval_samples_per_second": 34.077, + "eval_steps_per_second": 4.26, + "eval_wer": 0.3439842082457351, + "step": 28500 + }, + { + "epoch": 5.5361982191250485, + "eval_loss": 0.5025920271873474, + "eval_runtime": 165.742, + "eval_samples_per_second": 34.125, + "eval_steps_per_second": 4.266, + "eval_wer": 0.35122209561714623, + "step": 28600 + }, + { + "epoch": 5.555555555555555, + "eval_loss": 0.5056036710739136, + "eval_runtime": 165.717, + "eval_samples_per_second": 34.13, + "eval_steps_per_second": 4.266, + "eval_wer": 0.3518800853781836, + "step": 28700 + }, + { + "epoch": 5.574912891986063, + "eval_loss": 0.5090658068656921, + "eval_runtime": 165.5604, + "eval_samples_per_second": 34.163, + "eval_steps_per_second": 4.27, + "eval_wer": 0.3546404326683892, + "step": 28800 + }, + { + "epoch": 5.59427022841657, + "eval_loss": 0.5027741193771362, + "eval_runtime": 170.2845, + "eval_samples_per_second": 33.215, + "eval_steps_per_second": 4.152, + "eval_wer": 0.34952095135690325, + "step": 28900 + }, + { + "epoch": 5.613627564847077, + "grad_norm": 1.859834909439087, + "learning_rate": 1.0372881355932203e-05, + "loss": 0.3092, + "step": 29000 + }, + { + "epoch": 5.613627564847077, + "eval_loss": 0.505651593208313, + "eval_runtime": 164.9869, + "eval_samples_per_second": 34.282, + "eval_steps_per_second": 4.285, + "eval_wer": 0.3509974161865481, + "step": 29000 + }, + { + "epoch": 5.6329849012775846, + "eval_loss": 0.5085631608963013, + "eval_runtime": 165.6325, + "eval_samples_per_second": 34.148, + "eval_steps_per_second": 4.268, + "eval_wer": 0.3532923560848004, + "step": 29100 + }, + { + "epoch": 5.652342237708091, + "eval_loss": 0.5055486559867859, + "eval_runtime": 165.6348, + "eval_samples_per_second": 34.147, + "eval_steps_per_second": 4.268, + "eval_wer": 0.35144677504774435, + "step": 29200 + }, + { + "epoch": 5.671699574138598, + "eval_loss": 0.5133376717567444, + "eval_runtime": 166.8503, + "eval_samples_per_second": 33.899, + "eval_steps_per_second": 4.237, + "eval_wer": 0.35765755645070696, + "step": 29300 + }, + { + "epoch": 5.691056910569106, + "eval_loss": 0.5129527449607849, + "eval_runtime": 165.063, + "eval_samples_per_second": 34.266, + "eval_steps_per_second": 4.283, + "eval_wer": 0.35703166375118356, + "step": 29400 + }, + { + "epoch": 5.710414246999613, + "grad_norm": 1.5260862112045288, + "learning_rate": 5.288135593220339e-06, + "loss": 0.3152, + "step": 29500 + }, + { + "epoch": 5.710414246999613, + "eval_loss": 0.5147610902786255, + "eval_runtime": 165.1582, + "eval_samples_per_second": 34.246, + "eval_steps_per_second": 4.281, + "eval_wer": 0.3581390123734172, + "step": 29500 + }, + { + "epoch": 5.72977158343012, + "eval_loss": 0.5114809274673462, + "eval_runtime": 165.7617, + "eval_samples_per_second": 34.121, + "eval_steps_per_second": 4.265, + "eval_wer": 0.3554589077369967, + "step": 29600 + }, + { + "epoch": 5.7491289198606275, + "eval_loss": 0.5053985714912415, + "eval_runtime": 165.7121, + "eval_samples_per_second": 34.131, + "eval_steps_per_second": 4.266, + "eval_wer": 0.35263436632376305, + "step": 29700 + }, + { + "epoch": 5.768486256291134, + "eval_loss": 0.5080947279930115, + "eval_runtime": 165.6502, + "eval_samples_per_second": 34.144, + "eval_steps_per_second": 4.268, + "eval_wer": 0.3535651811076696, + "step": 29800 + }, + { + "epoch": 5.787843592721641, + "eval_loss": 0.5076740384101868, + "eval_runtime": 164.8589, + "eval_samples_per_second": 34.308, + "eval_steps_per_second": 4.289, + "eval_wer": 0.35348493845388457, + "step": 29900 + }, + { + "epoch": 5.807200929152149, + "grad_norm": 24.957311630249023, + "learning_rate": 2.0338983050847458e-07, + "loss": 0.3085, + "step": 30000 + }, + { + "epoch": 5.807200929152149, + "eval_loss": 0.5066753029823303, + "eval_runtime": 165.8811, + "eval_samples_per_second": 34.097, + "eval_steps_per_second": 4.262, + "eval_wer": 0.35224920158559486, + "step": 30000 + }, + { + "epoch": 5.807200929152149, + "step": 30000, + "total_flos": 3.3745707679449666e+19, + "train_loss": 0.49102539647420246, + "train_runtime": 61359.0363, + "train_samples_per_second": 3.911, + "train_steps_per_second": 0.489 } ], "logging_steps": 500, - "max_steps": 20000, + "max_steps": 30000, "num_input_tokens_seen": 0, - "num_train_epochs": 4, + "num_train_epochs": 6, "save_steps": 400, - "total_flos": 2.249387574100498e+19, + "total_flos": 3.3745707679449666e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null