diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,3149 +1,1081 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 5.807200929152149, + "epoch": 1.9357336430507162, "eval_steps": 100, - "global_step": 30000, + "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019357336430507164, - "eval_loss": 3.567659854888916, - "eval_runtime": 160.9588, - "eval_samples_per_second": 35.139, - "eval_steps_per_second": 4.392, + "eval_loss": 3.5727736949920654, + "eval_runtime": 146.249, + "eval_samples_per_second": 38.674, + "eval_steps_per_second": 4.834, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.03871467286101433, - "eval_loss": 3.0471677780151367, - "eval_runtime": 158.7973, - "eval_samples_per_second": 35.618, - "eval_steps_per_second": 4.452, + "eval_loss": 3.076800584793091, + "eval_runtime": 143.9591, + "eval_samples_per_second": 39.289, + "eval_steps_per_second": 4.911, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.05807200929152149, - "eval_loss": 2.9665186405181885, - "eval_runtime": 159.308, - "eval_samples_per_second": 35.504, - "eval_steps_per_second": 4.438, + "eval_loss": 3.500979423522949, + "eval_runtime": 144.2352, + "eval_samples_per_second": 39.214, + "eval_steps_per_second": 4.902, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.07742934572202866, - "eval_loss": 2.464332103729248, - "eval_runtime": 159.8297, - "eval_samples_per_second": 35.388, - "eval_steps_per_second": 4.423, - "eval_wer": 0.9813195101988413, + "eval_loss": 2.0594074726104736, + "eval_runtime": 144.7975, + "eval_samples_per_second": 39.061, + "eval_steps_per_second": 4.883, + "eval_wer": 0.9899857168076263, "step": 400 }, { "epoch": 0.09678668215253582, - "grad_norm": 6.005111217498779, + "grad_norm": 3.567307710647583, "learning_rate": 0.00029759999999999997, - "loss": 4.1279, + "loss": 4.06, "step": 500 }, { "epoch": 0.09678668215253582, - "eval_loss": 1.625333547592163, - "eval_runtime": 160.6655, - "eval_samples_per_second": 35.204, - "eval_steps_per_second": 4.4, - "eval_wer": 0.9345380430421595, + "eval_loss": 1.4703481197357178, + "eval_runtime": 145.0673, + "eval_samples_per_second": 38.989, + "eval_steps_per_second": 4.874, + "eval_wer": 0.8800211840605993, "step": 500 }, { "epoch": 0.11614401858304298, - "eval_loss": 1.24808931350708, - "eval_runtime": 160.4402, - "eval_samples_per_second": 35.253, - "eval_steps_per_second": 4.407, - "eval_wer": 0.8190528157147213, + "eval_loss": 1.2463648319244385, + "eval_runtime": 146.5204, + "eval_samples_per_second": 38.602, + "eval_steps_per_second": 4.825, + "eval_wer": 0.8296608945451044, "step": 600 }, { "epoch": 0.13550135501355012, - "eval_loss": 1.0997203588485718, - "eval_runtime": 161.1872, - "eval_samples_per_second": 35.09, - "eval_steps_per_second": 4.386, - "eval_wer": 0.7769735680698432, + "eval_loss": 1.0686180591583252, + "eval_runtime": 145.9363, + "eval_samples_per_second": 38.757, + "eval_steps_per_second": 4.845, + "eval_wer": 0.7492738039832453, "step": 700 }, { "epoch": 0.1548586914440573, - "eval_loss": 1.0475263595581055, - "eval_runtime": 161.0626, - "eval_samples_per_second": 35.117, - "eval_steps_per_second": 4.39, - "eval_wer": 0.7339795541718156, + "eval_loss": 1.006879448890686, + "eval_runtime": 146.414, + "eval_samples_per_second": 38.63, + "eval_steps_per_second": 4.829, + "eval_wer": 0.7116239508273018, "step": 800 }, { "epoch": 0.17421602787456447, - "eval_loss": 0.9692754149436951, - "eval_runtime": 161.3683, - "eval_samples_per_second": 35.05, - "eval_steps_per_second": 4.381, - "eval_wer": 0.7012565999582738, + "eval_loss": 0.936712920665741, + "eval_runtime": 146.8805, + "eval_samples_per_second": 38.507, + "eval_steps_per_second": 4.813, + "eval_wer": 0.6887868915600777, "step": 900 }, { "epoch": 0.19357336430507163, - "grad_norm": 2.414348602294922, - "learning_rate": 0.0002949762711864406, - "loss": 1.0598, + "grad_norm": 2.5543222427368164, + "learning_rate": 0.0002844, + "loss": 1.0399, "step": 1000 }, { "epoch": 0.19357336430507163, - "eval_loss": 0.911480724811554, - "eval_runtime": 162.0835, - "eval_samples_per_second": 34.896, - "eval_steps_per_second": 4.362, - "eval_wer": 0.6749049124552647, + "eval_loss": 0.8960636854171753, + "eval_runtime": 146.9745, + "eval_samples_per_second": 38.483, + "eval_steps_per_second": 4.81, + "eval_wer": 0.6741987771019563, "step": 1000 }, { "epoch": 0.2129307007355788, - "eval_loss": 0.8823792338371277, - "eval_runtime": 161.5426, - "eval_samples_per_second": 35.012, - "eval_steps_per_second": 4.377, - "eval_wer": 0.65625651971562, + "eval_loss": 0.896744430065155, + "eval_runtime": 146.7401, + "eval_samples_per_second": 38.544, + "eval_steps_per_second": 4.818, + "eval_wer": 0.6412671919885734, "step": 1100 }, { "epoch": 0.23228803716608595, - "eval_loss": 0.8609552383422852, - "eval_runtime": 161.6778, - "eval_samples_per_second": 34.983, - "eval_steps_per_second": 4.373, - "eval_wer": 0.6431288215563865, + "eval_loss": 0.8311247825622559, + "eval_runtime": 145.8768, + "eval_samples_per_second": 38.772, + "eval_steps_per_second": 4.847, + "eval_wer": 0.6152685721622185, "step": 1200 }, { "epoch": 0.2516453735965931, - "eval_loss": 0.8330459594726562, - "eval_runtime": 161.7974, - "eval_samples_per_second": 34.957, - "eval_steps_per_second": 4.37, - "eval_wer": 0.6114169247805363, + "eval_loss": 0.8018700480461121, + "eval_runtime": 146.5709, + "eval_samples_per_second": 38.589, + "eval_steps_per_second": 4.824, + "eval_wer": 0.5965238882380318, "step": 1300 }, { "epoch": 0.27100271002710025, - "eval_loss": 0.8172780871391296, - "eval_runtime": 161.4977, - "eval_samples_per_second": 35.022, - "eval_steps_per_second": 4.378, - "eval_wer": 0.6017396607340598, + "eval_loss": 0.7925447225570679, + "eval_runtime": 146.4405, + "eval_samples_per_second": 38.623, + "eval_steps_per_second": 4.828, + "eval_wer": 0.5926561923255926, "step": 1400 }, { "epoch": 0.29036004645760743, - "grad_norm": 4.460846424102783, - "learning_rate": 0.0002898915254237288, - "loss": 0.8546, + "grad_norm": 2.373326539993286, + "learning_rate": 0.00026861052631578947, + "loss": 0.8395, "step": 1500 }, { "epoch": 0.29036004645760743, - "eval_loss": 0.8102588653564453, - "eval_runtime": 161.397, - "eval_samples_per_second": 35.044, - "eval_steps_per_second": 4.381, - "eval_wer": 0.6139204955786298, + "eval_loss": 0.8164969086647034, + "eval_runtime": 151.4209, + "eval_samples_per_second": 37.353, + "eval_steps_per_second": 4.669, + "eval_wer": 0.5986743913594711, "step": 1500 }, { "epoch": 0.3097173828881146, - "eval_loss": 0.7860382795333862, - "eval_runtime": 162.0586, - "eval_samples_per_second": 34.901, - "eval_steps_per_second": 4.363, - "eval_wer": 0.6077739082986953, + "eval_loss": 0.7696186304092407, + "eval_runtime": 147.0556, + "eval_samples_per_second": 38.462, + "eval_steps_per_second": 4.808, + "eval_wer": 0.6150278442008634, "step": 1600 }, { "epoch": 0.32907471931862176, - "eval_loss": 0.857550323009491, - "eval_runtime": 161.2906, - "eval_samples_per_second": 35.067, - "eval_steps_per_second": 4.383, - "eval_wer": 0.5990114105053682, + "eval_loss": 0.7454735636711121, + "eval_runtime": 148.1475, + "eval_samples_per_second": 38.178, + "eval_steps_per_second": 4.772, + "eval_wer": 0.5624207603793873, "step": 1700 }, { "epoch": 0.34843205574912894, - "eval_loss": 0.7555657029151917, - "eval_runtime": 161.7718, - "eval_samples_per_second": 34.963, - "eval_steps_per_second": 4.37, - "eval_wer": 0.5773298454526488, + "eval_loss": 0.7681124806404114, + "eval_runtime": 147.9355, + "eval_samples_per_second": 38.233, + "eval_steps_per_second": 4.779, + "eval_wer": 0.5684068623517518, "step": 1800 }, { "epoch": 0.3677893921796361, - "eval_loss": 0.7365372180938721, - "eval_runtime": 162.1133, - "eval_samples_per_second": 34.889, - "eval_steps_per_second": 4.361, - "eval_wer": 0.5825777150101908, + "eval_loss": 0.7292491793632507, + "eval_runtime": 148.2347, + "eval_samples_per_second": 38.156, + "eval_steps_per_second": 4.769, + "eval_wer": 0.5609282470189854, "step": 1900 }, { "epoch": 0.38714672861014326, - "grad_norm": 3.4646999835968018, - "learning_rate": 0.0002848067796610169, - "loss": 0.7776, + "grad_norm": 2.988316059112549, + "learning_rate": 0.0002528210526315789, + "loss": 0.7574, "step": 2000 }, { "epoch": 0.38714672861014326, - "eval_loss": 0.7291606068611145, - "eval_runtime": 162.354, - "eval_samples_per_second": 34.837, - "eval_steps_per_second": 4.355, - "eval_wer": 0.5551989215387332, + "eval_loss": 0.7304644584655762, + "eval_runtime": 148.3775, + "eval_samples_per_second": 38.119, + "eval_steps_per_second": 4.765, + "eval_wer": 0.5534014860939481, "step": 2000 }, { "epoch": 0.4065040650406504, - "eval_loss": 0.716595470905304, - "eval_runtime": 162.461, - "eval_samples_per_second": 34.815, - "eval_steps_per_second": 4.352, - "eval_wer": 0.5385726436744716, + "eval_loss": 0.7095713019371033, + "eval_runtime": 148.1439, + "eval_samples_per_second": 38.179, + "eval_steps_per_second": 4.772, + "eval_wer": 0.5363418978992474, "step": 2100 }, { "epoch": 0.4258614014711576, - "eval_loss": 0.7117305397987366, - "eval_runtime": 161.9033, - "eval_samples_per_second": 34.934, - "eval_steps_per_second": 4.367, - "eval_wer": 0.5401774967501726, + "eval_loss": 0.7107743620872498, + "eval_runtime": 147.443, + "eval_samples_per_second": 38.361, + "eval_steps_per_second": 4.795, + "eval_wer": 0.5572370849448733, "step": 2200 }, { "epoch": 0.4452187379016647, - "eval_loss": 0.7060667872428894, - "eval_runtime": 162.0901, - "eval_samples_per_second": 34.894, - "eval_steps_per_second": 4.362, - "eval_wer": 0.5388294201665838, + "eval_loss": 0.6702781319618225, + "eval_runtime": 147.3568, + "eval_samples_per_second": 38.383, + "eval_steps_per_second": 4.798, + "eval_wer": 0.5175330198520326, "step": 2300 }, { "epoch": 0.4645760743321719, - "eval_loss": 0.7044907212257385, - "eval_runtime": 162.5576, - "eval_samples_per_second": 34.794, - "eval_steps_per_second": 4.349, - "eval_wer": 0.5364060920222754, + "eval_loss": 0.6596451997756958, + "eval_runtime": 148.5753, + "eval_samples_per_second": 38.068, + "eval_steps_per_second": 4.759, + "eval_wer": 0.514885012277126, "step": 2400 }, { "epoch": 0.48393341076267904, - "grad_norm": 2.70296573638916, - "learning_rate": 0.00027972203389830505, - "loss": 0.706, + "grad_norm": 3.3213086128234863, + "learning_rate": 0.0002370315789473684, + "loss": 0.6864, "step": 2500 }, { "epoch": 0.48393341076267904, - "eval_loss": 0.7062936425209045, - "eval_runtime": 162.4753, - "eval_samples_per_second": 34.811, - "eval_steps_per_second": 4.351, - "eval_wer": 0.5428736499173501, + "eval_loss": 0.6845841407775879, + "eval_runtime": 149.4982, + "eval_samples_per_second": 37.833, + "eval_steps_per_second": 4.729, + "eval_wer": 0.5336457447320698, "step": 2500 }, { "epoch": 0.5032907471931862, - "eval_loss": 0.6941363215446472, - "eval_runtime": 162.6699, - "eval_samples_per_second": 34.77, - "eval_steps_per_second": 4.346, - "eval_wer": 0.5433872029015744, + "eval_loss": 0.6666129231452942, + "eval_runtime": 148.0482, + "eval_samples_per_second": 38.204, + "eval_steps_per_second": 4.775, + "eval_wer": 0.5285744090128549, "step": 2600 }, { "epoch": 0.5226480836236934, - "eval_loss": 0.6840428113937378, - "eval_runtime": 162.5617, - "eval_samples_per_second": 34.793, - "eval_steps_per_second": 4.349, - "eval_wer": 0.5203094156729952, + "eval_loss": 0.6390946507453918, + "eval_runtime": 148.4402, + "eval_samples_per_second": 38.103, + "eval_steps_per_second": 4.763, + "eval_wer": 0.4949366885461636, "step": 2700 }, { "epoch": 0.5420054200542005, - "eval_loss": 0.6902298331260681, - "eval_runtime": 162.7532, - "eval_samples_per_second": 34.752, - "eval_steps_per_second": 4.344, - "eval_wer": 0.5593715395355555, + "eval_loss": 0.6295592188835144, + "eval_runtime": 147.8141, + "eval_samples_per_second": 38.264, + "eval_steps_per_second": 4.783, + "eval_wer": 0.4989648697661729, "step": 2800 }, { "epoch": 0.5613627564847077, - "eval_loss": 0.6594961881637573, - "eval_runtime": 163.1259, - "eval_samples_per_second": 34.673, - "eval_steps_per_second": 4.334, - "eval_wer": 0.5149171093386401, + "eval_loss": 0.6291782855987549, + "eval_runtime": 148.1212, + "eval_samples_per_second": 38.185, + "eval_steps_per_second": 4.773, + "eval_wer": 0.4957391150840141, "step": 2900 }, { "epoch": 0.5807200929152149, - "grad_norm": 4.962900161743164, - "learning_rate": 0.0002746372881355932, - "loss": 0.7002, + "grad_norm": 5.012236595153809, + "learning_rate": 0.00022124210526315786, + "loss": 0.6734, "step": 3000 }, { "epoch": 0.5807200929152149, - "eval_loss": 0.6767885088920593, - "eval_runtime": 162.9945, - "eval_samples_per_second": 34.701, - "eval_steps_per_second": 4.338, - "eval_wer": 0.525284460207668, + "eval_loss": 0.6164219975471497, + "eval_runtime": 148.0479, + "eval_samples_per_second": 38.204, + "eval_steps_per_second": 4.775, + "eval_wer": 0.47652902376787404, "step": 3000 }, { "epoch": 0.6000774293457221, - "eval_loss": 0.6656874418258667, - "eval_runtime": 163.0572, - "eval_samples_per_second": 34.687, - "eval_steps_per_second": 4.336, - "eval_wer": 0.5063953395066682, + "eval_loss": 0.6179572343826294, + "eval_runtime": 148.2452, + "eval_samples_per_second": 38.153, + "eval_steps_per_second": 4.769, + "eval_wer": 0.4777808091669208, "step": 3100 }, { "epoch": 0.6194347657762292, - "eval_loss": 0.6758668422698975, - "eval_runtime": 163.796, - "eval_samples_per_second": 34.531, - "eval_steps_per_second": 4.316, - "eval_wer": 0.5409478262265089, + "eval_loss": 0.6132367849349976, + "eval_runtime": 148.4317, + "eval_samples_per_second": 38.105, + "eval_steps_per_second": 4.763, + "eval_wer": 0.49086036173388325, "step": 3200 }, { "epoch": 0.6387921022067363, - "eval_loss": 0.6709346175193787, - "eval_runtime": 162.7448, - "eval_samples_per_second": 34.754, - "eval_steps_per_second": 4.344, - "eval_wer": 0.5090914926738457, + "eval_loss": 0.6107444763183594, + "eval_runtime": 148.2189, + "eval_samples_per_second": 38.16, + "eval_steps_per_second": 4.77, + "eval_wer": 0.4683442730817994, "step": 3300 }, { "epoch": 0.6581494386372435, - "eval_loss": 0.6478992104530334, - "eval_runtime": 163.2374, - "eval_samples_per_second": 34.649, - "eval_steps_per_second": 4.331, - "eval_wer": 0.5037473319317617, + "eval_loss": 0.6068131327629089, + "eval_runtime": 147.7251, + "eval_samples_per_second": 38.287, + "eval_steps_per_second": 4.786, + "eval_wer": 0.4748760250999021, "step": 3400 }, { "epoch": 0.6775067750677507, - "grad_norm": 3.27418851852417, - "learning_rate": 0.0002695525423728813, - "loss": 0.685, + "grad_norm": 3.184985399246216, + "learning_rate": 0.00020545263157894736, + "loss": 0.6433, "step": 3500 }, { "epoch": 0.6775067750677507, - "eval_loss": 0.6378278136253357, - "eval_runtime": 162.9066, - "eval_samples_per_second": 34.719, - "eval_steps_per_second": 4.34, - "eval_wer": 0.5033782157243505, + "eval_loss": 0.6008120775222778, + "eval_runtime": 147.947, + "eval_samples_per_second": 38.23, + "eval_steps_per_second": 4.779, + "eval_wer": 0.47725120765193946, "step": 3500 }, { "epoch": 0.6968641114982579, - "eval_loss": 0.6492822170257568, - "eval_runtime": 162.8688, - "eval_samples_per_second": 34.727, - "eval_steps_per_second": 4.341, - "eval_wer": 0.49883648152011684, + "eval_loss": 0.5916668772697449, + "eval_runtime": 147.0363, + "eval_samples_per_second": 38.467, + "eval_steps_per_second": 4.808, + "eval_wer": 0.4656320713838648, "step": 3600 }, { "epoch": 0.716221447928765, - "eval_loss": 0.6340391635894775, - "eval_runtime": 163.0198, - "eval_samples_per_second": 34.695, - "eval_steps_per_second": 4.337, - "eval_wer": 0.4832694066858179, + "eval_loss": 0.5885007381439209, + "eval_runtime": 148.9484, + "eval_samples_per_second": 37.973, + "eval_steps_per_second": 4.747, + "eval_wer": 0.4600953282726966, "step": 3700 }, { "epoch": 0.7355787843592722, - "eval_loss": 0.6226627826690674, - "eval_runtime": 164.0506, - "eval_samples_per_second": 34.477, - "eval_steps_per_second": 4.31, - "eval_wer": 0.47354399704707034, + "eval_loss": 0.5848101377487183, + "eval_runtime": 148.7388, + "eval_samples_per_second": 38.026, + "eval_steps_per_second": 4.753, + "eval_wer": 0.44823546404326686, "step": 3800 }, { "epoch": 0.7549361207897793, - "eval_loss": 0.6257476210594177, - "eval_runtime": 167.0907, - "eval_samples_per_second": 33.85, - "eval_steps_per_second": 4.231, - "eval_wer": 0.49068382789555615, + "eval_loss": 0.5852195620536804, + "eval_runtime": 148.3227, + "eval_samples_per_second": 38.133, + "eval_steps_per_second": 4.767, + "eval_wer": 0.44963168621912664, "step": 3900 }, { "epoch": 0.7742934572202865, - "grad_norm": 5.494376182556152, - "learning_rate": 0.0002644677966101695, - "loss": 0.6655, + "grad_norm": 4.9515814781188965, + "learning_rate": 0.00018966315789473683, + "loss": 0.6217, "step": 4000 }, { "epoch": 0.7742934572202865, - "eval_loss": 0.6420141458511353, - "eval_runtime": 163.4141, - "eval_samples_per_second": 34.611, - "eval_steps_per_second": 4.326, - "eval_wer": 0.49987963601932245, + "eval_loss": 0.577220618724823, + "eval_runtime": 147.6504, + "eval_samples_per_second": 38.307, + "eval_steps_per_second": 4.788, + "eval_wer": 0.44163951790213607, "step": 4000 }, { "epoch": 0.7936507936507936, - "eval_loss": 0.6111469268798828, - "eval_runtime": 163.5321, - "eval_samples_per_second": 34.586, - "eval_steps_per_second": 4.323, - "eval_wer": 0.4790646916274815, + "eval_loss": 0.56705242395401, + "eval_runtime": 152.2357, + "eval_samples_per_second": 37.153, + "eval_steps_per_second": 4.644, + "eval_wer": 0.44691948452119207, "step": 4100 }, { "epoch": 0.8130081300813008, - "eval_loss": 0.6136205196380615, - "eval_runtime": 163.9442, - "eval_samples_per_second": 34.5, - "eval_steps_per_second": 4.312, - "eval_wer": 0.48073373882621045, + "eval_loss": 0.5668296813964844, + "eval_runtime": 148.0111, + "eval_samples_per_second": 38.213, + "eval_steps_per_second": 4.777, + "eval_wer": 0.4462614947601547, "step": 4200 }, { "epoch": 0.832365466511808, - "eval_loss": 0.6218396425247192, - "eval_runtime": 163.3391, - "eval_samples_per_second": 34.627, - "eval_steps_per_second": 4.328, - "eval_wer": 0.48596555985299544, + "eval_loss": 0.5557947754859924, + "eval_runtime": 149.4281, + "eval_samples_per_second": 37.851, + "eval_steps_per_second": 4.731, + "eval_wer": 0.44006676188794913, "step": 4300 }, { "epoch": 0.8517228029423152, - "eval_loss": 0.6084252595901489, - "eval_runtime": 162.8945, - "eval_samples_per_second": 34.722, - "eval_steps_per_second": 4.34, - "eval_wer": 0.4585386207892667, + "eval_loss": 0.5651959776878357, + "eval_runtime": 149.3956, + "eval_samples_per_second": 37.859, + "eval_steps_per_second": 4.732, + "eval_wer": 0.4306783713950988, "step": 4400 }, { "epoch": 0.8710801393728222, - "grad_norm": 3.0379676818847656, - "learning_rate": 0.0002593830508474576, - "loss": 0.6431, + "grad_norm": 3.5483193397521973, + "learning_rate": 0.0001738736842105263, + "loss": 0.5954, "step": 4500 }, { "epoch": 0.8710801393728222, - "eval_loss": 0.6008957624435425, - "eval_runtime": 163.8125, - "eval_samples_per_second": 34.527, - "eval_steps_per_second": 4.316, - "eval_wer": 0.4627593843783602, + "eval_loss": 0.5561267733573914, + "eval_runtime": 149.9212, + "eval_samples_per_second": 37.726, + "eval_steps_per_second": 4.716, + "eval_wer": 0.4307265169873698, "step": 4500 }, { "epoch": 0.8904374758033294, - "eval_loss": 0.6009930968284607, - "eval_runtime": 163.1039, - "eval_samples_per_second": 34.677, - "eval_steps_per_second": 4.335, - "eval_wer": 0.46295196674744427, + "eval_loss": 0.5431749820709229, + "eval_runtime": 149.9454, + "eval_samples_per_second": 37.72, + "eval_steps_per_second": 4.715, + "eval_wer": 0.420648039671968, "step": 4600 }, { "epoch": 0.9097948122338366, - "eval_loss": 0.5823432207107544, - "eval_runtime": 163.3804, - "eval_samples_per_second": 34.619, - "eval_steps_per_second": 4.327, - "eval_wer": 0.45035387010319206, + "eval_loss": 0.5294374823570251, + "eval_runtime": 148.9794, + "eval_samples_per_second": 37.965, + "eval_steps_per_second": 4.746, + "eval_wer": 0.41371507438494004, "step": 4700 }, { "epoch": 0.9291521486643438, - "eval_loss": 0.6118789315223694, - "eval_runtime": 163.7255, - "eval_samples_per_second": 34.546, - "eval_steps_per_second": 4.318, - "eval_wer": 0.4630001123397153, + "eval_loss": 0.5444126725196838, + "eval_runtime": 148.5962, + "eval_samples_per_second": 38.063, + "eval_steps_per_second": 4.758, + "eval_wer": 0.4209529617563512, "step": 4800 }, { "epoch": 0.948509485094851, - "eval_loss": 0.6001989245414734, - "eval_runtime": 163.3492, - "eval_samples_per_second": 34.625, - "eval_steps_per_second": 4.328, - "eval_wer": 0.4600150856189116, + "eval_loss": 0.5291473269462585, + "eval_runtime": 150.1832, + "eval_samples_per_second": 37.661, + "eval_steps_per_second": 4.708, + "eval_wer": 0.4156569466065382, "step": 4900 }, { "epoch": 0.9678668215253581, - "grad_norm": 3.1605985164642334, - "learning_rate": 0.00025430847457627115, - "loss": 0.6235, + "grad_norm": 3.1595053672790527, + "learning_rate": 0.0001581157894736842, + "loss": 0.5663, "step": 5000 }, { "epoch": 0.9678668215253581, - "eval_loss": 0.5892329216003418, - "eval_runtime": 163.5255, - "eval_samples_per_second": 34.588, - "eval_steps_per_second": 4.323, - "eval_wer": 0.4551844778610518, + "eval_loss": 0.5428867340087891, + "eval_runtime": 149.6435, + "eval_samples_per_second": 37.797, + "eval_steps_per_second": 4.725, + "eval_wer": 0.4139558023462952, "step": 5000 }, { "epoch": 0.9872241579558653, - "eval_loss": 0.5673592686653137, - "eval_runtime": 163.343, - "eval_samples_per_second": 34.627, - "eval_steps_per_second": 4.328, - "eval_wer": 0.44889345380430423, + "eval_loss": 0.5208781361579895, + "eval_runtime": 149.0703, + "eval_samples_per_second": 37.942, + "eval_steps_per_second": 4.743, + "eval_wer": 0.41159666832501485, "step": 5100 }, { "epoch": 1.0065814943863725, - "eval_loss": 0.5792257785797119, - "eval_runtime": 162.8592, - "eval_samples_per_second": 34.729, - "eval_steps_per_second": 4.341, - "eval_wer": 0.43167338030203334, + "eval_loss": 0.5281690359115601, + "eval_runtime": 148.6703, + "eval_samples_per_second": 38.044, + "eval_steps_per_second": 4.755, + "eval_wer": 0.40421434417679064, "step": 5200 }, { "epoch": 1.0259388308168795, - "eval_loss": 0.5752869844436646, - "eval_runtime": 162.7856, - "eval_samples_per_second": 34.745, - "eval_steps_per_second": 4.343, - "eval_wer": 0.43331033043924827, + "eval_loss": 0.5118032693862915, + "eval_runtime": 148.0473, + "eval_samples_per_second": 38.204, + "eval_steps_per_second": 4.776, + "eval_wer": 0.39184092696313655, "step": 5300 }, { "epoch": 1.0452961672473868, - "eval_loss": 0.5698733925819397, - "eval_runtime": 161.9949, - "eval_samples_per_second": 34.915, - "eval_steps_per_second": 4.364, - "eval_wer": 0.44619730063712665, + "eval_loss": 0.5089045166969299, + "eval_runtime": 147.9634, + "eval_samples_per_second": 38.226, + "eval_steps_per_second": 4.778, + "eval_wer": 0.39927139670363176, "step": 5400 }, { "epoch": 1.064653503677894, - "grad_norm": 0.8791279792785645, - "learning_rate": 0.0002492338983050847, - "loss": 0.5527, + "grad_norm": 2.1315221786499023, + "learning_rate": 0.0001423578947368421, + "loss": 0.4941, "step": 5500 }, { "epoch": 1.064653503677894, - "eval_loss": 0.5666691660881042, - "eval_runtime": 162.0517, - "eval_samples_per_second": 34.902, - "eval_steps_per_second": 4.363, - "eval_wer": 0.43639164834459404, + "eval_loss": 0.5010989308357239, + "eval_runtime": 147.8753, + "eval_samples_per_second": 38.248, + "eval_steps_per_second": 4.781, + "eval_wer": 0.3921458490475197, "step": 5500 }, { "epoch": 1.084010840108401, - "eval_loss": 0.5558171272277832, - "eval_runtime": 161.9847, - "eval_samples_per_second": 34.917, - "eval_steps_per_second": 4.365, - "eval_wer": 0.42945868305756607, + "eval_loss": 0.5022321343421936, + "eval_runtime": 148.3164, + "eval_samples_per_second": 38.135, + "eval_steps_per_second": 4.767, + "eval_wer": 0.38869541493476273, "step": 5600 }, { "epoch": 1.1033681765389083, - "eval_loss": 0.5602455139160156, - "eval_runtime": 162.701, - "eval_samples_per_second": 34.763, - "eval_steps_per_second": 4.345, - "eval_wer": 0.422349183932211, + "eval_loss": 0.5066320896148682, + "eval_runtime": 148.554, + "eval_samples_per_second": 38.074, + "eval_steps_per_second": 4.759, + "eval_wer": 0.38526102935276274, "step": 5700 }, { "epoch": 1.1227255129694154, - "eval_loss": 0.559140145778656, - "eval_runtime": 162.4402, - "eval_samples_per_second": 34.819, - "eval_steps_per_second": 4.352, - "eval_wer": 0.41942835133443535, + "eval_loss": 0.49068546295166016, + "eval_runtime": 148.2455, + "eval_samples_per_second": 38.153, + "eval_steps_per_second": 4.769, + "eval_wer": 0.3815217216863796, "step": 5800 }, { "epoch": 1.1420828493999227, - "eval_loss": 0.5399234890937805, - "eval_runtime": 162.3316, - "eval_samples_per_second": 34.842, - "eval_steps_per_second": 4.355, - "eval_wer": 0.418818507165669, + "eval_loss": 0.4982084035873413, + "eval_runtime": 148.9817, + "eval_samples_per_second": 37.964, + "eval_steps_per_second": 4.746, + "eval_wer": 0.38086373192534223, "step": 5900 }, { "epoch": 1.1614401858304297, - "grad_norm": 0.9803772568702698, - "learning_rate": 0.00024414915254237287, - "loss": 0.533, + "grad_norm": 0.8627763390541077, + "learning_rate": 0.00012656842105263156, + "loss": 0.4628, "step": 6000 }, { "epoch": 1.1614401858304297, - "eval_loss": 0.545900821685791, - "eval_runtime": 161.6822, - "eval_samples_per_second": 34.982, - "eval_steps_per_second": 4.373, - "eval_wer": 0.431063536133267, + "eval_loss": 0.49128398299217224, + "eval_runtime": 149.7714, + "eval_samples_per_second": 37.764, + "eval_steps_per_second": 4.721, + "eval_wer": 0.38956203559564123, "step": 6000 }, { "epoch": 1.1807975222609368, - "eval_loss": 0.5347985625267029, - "eval_runtime": 161.8121, - "eval_samples_per_second": 34.954, - "eval_steps_per_second": 4.369, - "eval_wer": 0.41183739628637, + "eval_loss": 0.48260679841041565, + "eval_runtime": 149.8626, + "eval_samples_per_second": 37.741, + "eval_steps_per_second": 4.718, + "eval_wer": 0.373449310715604, "step": 6100 }, { "epoch": 1.2001548586914441, - "eval_loss": 0.5453631281852722, - "eval_runtime": 161.8802, - "eval_samples_per_second": 34.939, - "eval_steps_per_second": 4.367, - "eval_wer": 0.4176309158896503, + "eval_loss": 0.4883708655834198, + "eval_runtime": 149.0462, + "eval_samples_per_second": 37.948, + "eval_steps_per_second": 4.743, + "eval_wer": 0.3739949607613423, "step": 6200 }, { "epoch": 1.2195121951219512, - "eval_loss": 0.5442932844161987, - "eval_runtime": 162.1767, - "eval_samples_per_second": 34.876, - "eval_steps_per_second": 4.359, - "eval_wer": 0.42157885445587456, + "eval_loss": 0.4841243028640747, + "eval_runtime": 148.8948, + "eval_samples_per_second": 37.987, + "eval_steps_per_second": 4.748, + "eval_wer": 0.37004702219511804, "step": 6300 }, { "epoch": 1.2388695315524583, - "eval_loss": 0.5382806658744812, - "eval_runtime": 161.5364, - "eval_samples_per_second": 35.014, - "eval_steps_per_second": 4.377, - "eval_wer": 0.40962269904190274, + "eval_loss": 0.4828014671802521, + "eval_runtime": 149.5102, + "eval_samples_per_second": 37.83, + "eval_steps_per_second": 4.729, + "eval_wer": 0.36971000304922086, "step": 6400 }, { "epoch": 1.2582268679829656, - "grad_norm": 2.7026009559631348, - "learning_rate": 0.00023906440677966102, - "loss": 0.5228, + "grad_norm": 1.5625278949737549, + "learning_rate": 0.00011077894736842105, + "loss": 0.4435, "step": 6500 }, { "epoch": 1.2582268679829656, - "eval_loss": 0.540704071521759, - "eval_runtime": 161.6157, - "eval_samples_per_second": 34.997, - "eval_steps_per_second": 4.375, - "eval_wer": 0.41260772576270643, + "eval_loss": 0.48161521553993225, + "eval_runtime": 148.9005, + "eval_samples_per_second": 37.985, + "eval_steps_per_second": 4.748, + "eval_wer": 0.37389866957680024, "step": 6500 }, { "epoch": 1.2775842044134726, - "eval_loss": 0.5527251362800598, - "eval_runtime": 161.6568, - "eval_samples_per_second": 34.988, - "eval_steps_per_second": 4.373, - "eval_wer": 0.41426072443067835, + "eval_loss": 0.47928386926651, + "eval_runtime": 149.5106, + "eval_samples_per_second": 37.83, + "eval_steps_per_second": 4.729, + "eval_wer": 0.3673990146202115, "step": 6600 }, { "epoch": 1.29694154084398, - "eval_loss": 0.5312824845314026, - "eval_runtime": 161.7821, - "eval_samples_per_second": 34.961, - "eval_steps_per_second": 4.37, - "eval_wer": 0.40811413715074385, + "eval_loss": 0.4744218587875366, + "eval_runtime": 148.9048, + "eval_samples_per_second": 37.984, + "eval_steps_per_second": 4.748, + "eval_wer": 0.36688546163598723, "step": 6700 }, { "epoch": 1.316298877274487, - "eval_loss": 0.533909797668457, - "eval_runtime": 161.8361, - "eval_samples_per_second": 34.949, - "eval_steps_per_second": 4.369, - "eval_wer": 0.4150471024377718, + "eval_loss": 0.46821942925453186, + "eval_runtime": 148.7411, + "eval_samples_per_second": 38.026, + "eval_steps_per_second": 4.753, + "eval_wer": 0.3608672626021088, "step": 6800 }, { "epoch": 1.3356562137049943, - "eval_loss": 0.523649275302887, - "eval_runtime": 161.5476, - "eval_samples_per_second": 35.011, - "eval_steps_per_second": 4.376, - "eval_wer": 0.4120781242477251, + "eval_loss": 0.46276068687438965, + "eval_runtime": 150.3036, + "eval_samples_per_second": 37.63, + "eval_steps_per_second": 4.704, + "eval_wer": 0.359438943364735, "step": 6900 }, { "epoch": 1.3550135501355014, - "grad_norm": 0.709751546382904, - "learning_rate": 0.00023397966101694912, - "loss": 0.5204, + "grad_norm": 0.7794021964073181, + "learning_rate": 9.498947368421052e-05, + "loss": 0.4298, "step": 7000 }, { "epoch": 1.3550135501355014, - "eval_loss": 0.5527586340904236, - "eval_runtime": 162.1137, - "eval_samples_per_second": 34.889, - "eval_steps_per_second": 4.361, - "eval_wer": 0.4165877613904447, + "eval_loss": 0.4662827253341675, + "eval_runtime": 149.5174, + "eval_samples_per_second": 37.828, + "eval_steps_per_second": 4.729, + "eval_wer": 0.3554428592062397, "step": 7000 }, { "epoch": 1.3743708865660085, - "eval_loss": 0.5330629944801331, - "eval_runtime": 161.681, - "eval_samples_per_second": 34.982, - "eval_steps_per_second": 4.373, - "eval_wer": 0.40559451782189343, + "eval_loss": 0.4656233489513397, + "eval_runtime": 148.8165, + "eval_samples_per_second": 38.007, + "eval_steps_per_second": 4.751, + "eval_wer": 0.3583797403347724, "step": 7100 }, { "epoch": 1.3937282229965158, - "eval_loss": 0.5242415070533752, - "eval_runtime": 162.2742, - "eval_samples_per_second": 34.855, - "eval_steps_per_second": 4.357, - "eval_wer": 0.4058833913755196, + "eval_loss": 0.45931774377822876, + "eval_runtime": 150.2338, + "eval_samples_per_second": 37.648, + "eval_steps_per_second": 4.706, + "eval_wer": 0.35648601370544525, "step": 7200 }, { "epoch": 1.4130855594270229, - "eval_loss": 0.5309507250785828, - "eval_runtime": 163.2224, - "eval_samples_per_second": 34.652, - "eval_steps_per_second": 4.332, - "eval_wer": 0.4092856798960055, + "eval_loss": 0.45989105105400085, + "eval_runtime": 150.9977, + "eval_samples_per_second": 37.458, + "eval_steps_per_second": 4.682, + "eval_wer": 0.3565823048899873, "step": 7300 }, { "epoch": 1.43244289585753, - "eval_loss": 0.5278186798095703, - "eval_runtime": 162.9755, - "eval_samples_per_second": 34.705, - "eval_steps_per_second": 4.338, - "eval_wer": 0.4063006531752018, + "eval_loss": 0.46128061413764954, + "eval_runtime": 150.0246, + "eval_samples_per_second": 37.7, + "eval_steps_per_second": 4.713, + "eval_wer": 0.35208871627802474, "step": 7400 }, { "epoch": 1.4518002322880372, - "grad_norm": 0.9905166029930115, - "learning_rate": 0.00022889491525423728, - "loss": 0.5199, + "grad_norm": 0.7098228931427002, + "learning_rate": 7.92e-05, + "loss": 0.4292, "step": 7500 }, { "epoch": 1.4518002322880372, - "eval_loss": 0.5168124437332153, - "eval_runtime": 162.1336, - "eval_samples_per_second": 34.885, - "eval_steps_per_second": 4.361, - "eval_wer": 0.3955802346295197, + "eval_loss": 0.4520701467990875, + "eval_runtime": 149.5493, + "eval_samples_per_second": 37.82, + "eval_steps_per_second": 4.728, + "eval_wer": 0.34745069088924907, "step": 7500 }, { "epoch": 1.4711575687185443, - "eval_loss": 0.5236623287200928, - "eval_runtime": 161.1426, - "eval_samples_per_second": 35.099, - "eval_steps_per_second": 4.387, - "eval_wer": 0.40235271460897754, + "eval_loss": 0.4512416422367096, + "eval_runtime": 149.5055, + "eval_samples_per_second": 37.831, + "eval_steps_per_second": 4.729, + "eval_wer": 0.349071592495707, "step": 7600 }, { "epoch": 1.4905149051490514, - "eval_loss": 0.5316073894500732, - "eval_runtime": 162.1382, - "eval_samples_per_second": 34.884, - "eval_steps_per_second": 4.36, - "eval_wer": 0.4179358379740335, + "eval_loss": 0.4478435218334198, + "eval_runtime": 149.0622, + "eval_samples_per_second": 37.944, + "eval_steps_per_second": 4.743, + "eval_wer": 0.35175169713212756, "step": 7700 }, { "epoch": 1.5098722415795587, - "eval_loss": 0.5182381868362427, - "eval_runtime": 161.7911, - "eval_samples_per_second": 34.959, - "eval_steps_per_second": 4.37, - "eval_wer": 0.40326748086212705, + "eval_loss": 0.4415859878063202, + "eval_runtime": 148.899, + "eval_samples_per_second": 37.985, + "eval_steps_per_second": 4.748, + "eval_wer": 0.34213862720867905, "step": 7800 }, { "epoch": 1.5292295780100658, - "eval_loss": 0.5175392627716064, - "eval_runtime": 161.5725, - "eval_samples_per_second": 35.006, - "eval_steps_per_second": 4.376, - "eval_wer": 0.3983887275119963, + "eval_loss": 0.4426974952220917, + "eval_runtime": 149.2815, + "eval_samples_per_second": 37.888, + "eval_steps_per_second": 4.736, + "eval_wer": 0.3458779348750622, "step": 7900 }, { "epoch": 1.5485869144405728, - "grad_norm": 0.8261615037918091, - "learning_rate": 0.00022382033898305084, - "loss": 0.5066, + "grad_norm": 1.0578420162200928, + "learning_rate": 6.344210526315788e-05, + "loss": 0.4072, "step": 8000 }, { "epoch": 1.5485869144405728, - "eval_loss": 0.5138476490974426, - "eval_runtime": 162.4044, - "eval_samples_per_second": 34.827, - "eval_steps_per_second": 4.353, - "eval_wer": 0.39492224486848226, + "eval_loss": 0.43879374861717224, + "eval_runtime": 148.7049, + "eval_samples_per_second": 38.035, + "eval_steps_per_second": 4.754, + "eval_wer": 0.34565325544446407, "step": 8000 }, { "epoch": 1.5679442508710801, - "eval_loss": 0.515566885471344, - "eval_runtime": 162.5299, - "eval_samples_per_second": 34.8, - "eval_steps_per_second": 4.35, - "eval_wer": 0.4016305307249121, + "eval_loss": 0.44011563062667847, + "eval_runtime": 150.4046, + "eval_samples_per_second": 37.605, + "eval_steps_per_second": 4.701, + "eval_wer": 0.3453162362985669, "step": 8100 }, { "epoch": 1.5873015873015874, - "eval_loss": 0.5131089091300964, - "eval_runtime": 162.835, - "eval_samples_per_second": 34.735, - "eval_steps_per_second": 4.342, - "eval_wer": 0.39793936865080004, + "eval_loss": 0.43649429082870483, + "eval_runtime": 148.8759, + "eval_samples_per_second": 37.991, + "eval_steps_per_second": 4.749, + "eval_wer": 0.3434385581999968, "step": 8200 }, { "epoch": 1.6066589237320945, - "eval_loss": 0.5139849185943604, - "eval_runtime": 162.5015, - "eval_samples_per_second": 34.806, - "eval_steps_per_second": 4.351, - "eval_wer": 0.39413586686138885, + "eval_loss": 0.4346481263637543, + "eval_runtime": 149.1351, + "eval_samples_per_second": 37.925, + "eval_steps_per_second": 4.741, + "eval_wer": 0.33974739612588467, "step": 8300 }, { "epoch": 1.6260162601626016, - "eval_loss": 0.5224258303642273, - "eval_runtime": 162.1349, - "eval_samples_per_second": 34.885, - "eval_steps_per_second": 4.361, - "eval_wer": 0.39853316428880936, + "eval_loss": 0.43247029185295105, + "eval_runtime": 149.5691, + "eval_samples_per_second": 37.815, + "eval_steps_per_second": 4.727, + "eval_wer": 0.33604018552101556, "step": 8400 }, { "epoch": 1.645373596593109, - "grad_norm": 1.0760446786880493, - "learning_rate": 0.00021873559322033897, - "loss": 0.502, + "grad_norm": 1.7964462041854858, + "learning_rate": 4.765263157894736e-05, + "loss": 0.3991, "step": 8500 }, { "epoch": 1.645373596593109, - "eval_loss": 0.5274536609649658, - "eval_runtime": 162.3162, - "eval_samples_per_second": 34.846, - "eval_steps_per_second": 4.356, - "eval_wer": 0.40023430854905234, + "eval_loss": 0.43196219205856323, + "eval_runtime": 150.109, + "eval_samples_per_second": 37.679, + "eval_steps_per_second": 4.71, + "eval_wer": 0.3357834090289034, "step": 8500 }, { "epoch": 1.664730933023616, - "eval_loss": 0.5054244995117188, - "eval_runtime": 162.7623, - "eval_samples_per_second": 34.75, - "eval_steps_per_second": 4.344, - "eval_wer": 0.3860955529521272, + "eval_loss": 0.42872872948646545, + "eval_runtime": 150.0401, + "eval_samples_per_second": 37.697, + "eval_steps_per_second": 4.712, + "eval_wer": 0.3354624384137632, "step": 8600 }, { "epoch": 1.684088269454123, - "eval_loss": 0.5144466161727905, - "eval_runtime": 162.1114, - "eval_samples_per_second": 34.89, - "eval_steps_per_second": 4.361, - "eval_wer": 0.3912631798558842, + "eval_loss": 0.42928823828697205, + "eval_runtime": 149.2284, + "eval_samples_per_second": 37.902, + "eval_steps_per_second": 4.738, + "eval_wer": 0.33342427500762306, "step": 8700 }, { "epoch": 1.7034456058846303, - "eval_loss": 0.5017980933189392, - "eval_runtime": 162.1144, - "eval_samples_per_second": 34.889, - "eval_steps_per_second": 4.361, - "eval_wer": 0.3860955529521272, + "eval_loss": 0.4271656274795532, + "eval_runtime": 149.5274, + "eval_samples_per_second": 37.826, + "eval_steps_per_second": 4.728, + "eval_wer": 0.333327983823081, "step": 8800 }, { "epoch": 1.7228029423151374, - "eval_loss": 0.5001707673072815, - "eval_runtime": 162.7958, - "eval_samples_per_second": 34.743, - "eval_steps_per_second": 4.343, - "eval_wer": 0.39978494968785605, + "eval_loss": 0.4219857156276703, + "eval_runtime": 149.1865, + "eval_samples_per_second": 37.912, + "eval_steps_per_second": 4.739, + "eval_wer": 0.3302948115100063, "step": 8900 }, { "epoch": 1.7421602787456445, - "grad_norm": 2.3791110515594482, - "learning_rate": 0.00021366101694915253, - "loss": 0.4965, + "grad_norm": 1.7460029125213623, + "learning_rate": 3.189473684210526e-05, + "loss": 0.3916, "step": 9000 }, { "epoch": 1.7421602787456445, - "eval_loss": 0.5074877142906189, - "eval_runtime": 162.5487, - "eval_samples_per_second": 34.796, - "eval_steps_per_second": 4.349, - "eval_wer": 0.38896823995763186, + "eval_loss": 0.4238153398036957, + "eval_runtime": 149.4733, + "eval_samples_per_second": 37.84, + "eval_steps_per_second": 4.73, + "eval_wer": 0.3291874628877726, "step": 9000 }, { "epoch": 1.7615176151761518, - "eval_loss": 0.4928957521915436, - "eval_runtime": 162.2035, - "eval_samples_per_second": 34.87, - "eval_steps_per_second": 4.359, - "eval_wer": 0.3865449118133235, + "eval_loss": 0.42150619626045227, + "eval_runtime": 148.8948, + "eval_samples_per_second": 37.987, + "eval_steps_per_second": 4.748, + "eval_wer": 0.32812825985781, "step": 9100 }, { "epoch": 1.7808749516066589, - "eval_loss": 0.49622705578804016, - "eval_runtime": 162.8808, - "eval_samples_per_second": 34.725, - "eval_steps_per_second": 4.341, - "eval_wer": 0.38559804849866, + "eval_loss": 0.4176540672779083, + "eval_runtime": 150.0504, + "eval_samples_per_second": 37.694, + "eval_steps_per_second": 4.712, + "eval_wer": 0.3265876009051371, "step": 9200 }, { "epoch": 1.800232288037166, - "eval_loss": 0.49036508798599243, - "eval_runtime": 162.3886, - "eval_samples_per_second": 34.83, - "eval_steps_per_second": 4.354, - "eval_wer": 0.3759689300444544, + "eval_loss": 0.41875413060188293, + "eval_runtime": 150.5043, + "eval_samples_per_second": 37.58, + "eval_steps_per_second": 4.698, + "eval_wer": 0.32573702877501565, "step": 9300 }, { "epoch": 1.8195896244676733, - "eval_loss": 0.49964088201522827, - "eval_runtime": 162.526, - "eval_samples_per_second": 34.801, - "eval_steps_per_second": 4.35, - "eval_wer": 0.3901237341721365, + "eval_loss": 0.41637665033340454, + "eval_runtime": 150.1757, + "eval_samples_per_second": 37.663, + "eval_steps_per_second": 4.708, + "eval_wer": 0.32469387427581003, "step": 9400 }, { "epoch": 1.8389469608981805, - "grad_norm": 1.2548748254776, - "learning_rate": 0.00020857627118644066, - "loss": 0.4776, + "grad_norm": 0.8558129668235779, + "learning_rate": 1.6105263157894736e-05, + "loss": 0.3687, "step": 9500 }, { "epoch": 1.8389469608981805, - "eval_loss": 0.4899130165576935, - "eval_runtime": 162.5723, - "eval_samples_per_second": 34.791, - "eval_steps_per_second": 4.349, - "eval_wer": 0.37616151241353857, + "eval_loss": 0.41629916429519653, + "eval_runtime": 149.3775, + "eval_samples_per_second": 37.864, + "eval_steps_per_second": 4.733, + "eval_wer": 0.3242766124761278, "step": 9500 }, { "epoch": 1.8583042973286876, - "eval_loss": 0.4918155074119568, - "eval_runtime": 162.1914, - "eval_samples_per_second": 34.872, - "eval_steps_per_second": 4.359, - "eval_wer": 0.37948355828023944, + "eval_loss": 0.4140332341194153, + "eval_runtime": 149.5915, + "eval_samples_per_second": 37.81, + "eval_steps_per_second": 4.726, + "eval_wer": 0.3238914477379596, "step": 9600 }, { "epoch": 1.8776616337591947, - "eval_loss": 0.49148374795913696, - "eval_runtime": 162.2813, - "eval_samples_per_second": 34.853, - "eval_steps_per_second": 4.357, - "eval_wer": 0.37980452889537963, + "eval_loss": 0.4132048189640045, + "eval_runtime": 150.4642, + "eval_samples_per_second": 37.59, + "eval_steps_per_second": 4.699, + "eval_wer": 0.324661777214296, "step": 9700 }, { "epoch": 1.897018970189702, - "eval_loss": 0.4841060936450958, - "eval_runtime": 162.2556, - "eval_samples_per_second": 34.859, - "eval_steps_per_second": 4.357, - "eval_wer": 0.37060872077161333, + "eval_loss": 0.4122065007686615, + "eval_runtime": 150.0219, + "eval_samples_per_second": 37.701, + "eval_steps_per_second": 4.713, + "eval_wer": 0.3223668373160437, "step": 9800 }, { "epoch": 1.916376306620209, - "eval_loss": 0.4834117293357849, - "eval_runtime": 163.5813, - "eval_samples_per_second": 34.576, - "eval_steps_per_second": 4.322, - "eval_wer": 0.37728490956652916, + "eval_loss": 0.41170838475227356, + "eval_runtime": 149.8162, + "eval_samples_per_second": 37.753, + "eval_steps_per_second": 4.719, + "eval_wer": 0.3218532843318194, "step": 9900 }, { "epoch": 1.9357336430507162, - "grad_norm": 1.011767029762268, - "learning_rate": 0.00020349152542372878, - "loss": 0.4752, + "grad_norm": 2.01002836227417, + "learning_rate": 3.157894736842105e-07, + "loss": 0.3707, "step": 10000 }, { "epoch": 1.9357336430507162, - "eval_loss": 0.4831894338130951, - "eval_runtime": 162.4305, - "eval_samples_per_second": 34.821, - "eval_steps_per_second": 4.353, - "eval_wer": 0.3711704193481087, + "eval_loss": 0.41177985072135925, + "eval_runtime": 148.9604, + "eval_samples_per_second": 37.97, + "eval_steps_per_second": 4.746, + "eval_wer": 0.32191747845484747, "step": 10000 }, { - "epoch": 1.9550909794812235, - "eval_loss": 0.4890592396259308, - "eval_runtime": 162.3568, - "eval_samples_per_second": 34.837, - "eval_steps_per_second": 4.355, - "eval_wer": 0.37829596700422075, - "step": 10100 - }, - { - "epoch": 1.9744483159117305, - "eval_loss": 0.4786697328090668, - "eval_runtime": 163.1452, - "eval_samples_per_second": 34.669, - "eval_steps_per_second": 4.334, - "eval_wer": 0.3783441125964918, - "step": 10200 - }, - { - "epoch": 1.9938056523422376, - "eval_loss": 0.4726457893848419, - "eval_runtime": 162.2371, - "eval_samples_per_second": 34.863, - "eval_steps_per_second": 4.358, - "eval_wer": 0.37141114730946384, - "step": 10300 - }, - { - "epoch": 2.013162988772745, - "eval_loss": 0.49166908860206604, - "eval_runtime": 162.2356, - "eval_samples_per_second": 34.863, - "eval_steps_per_second": 4.358, - "eval_wer": 0.37320858275424884, - "step": 10400 - }, - { - "epoch": 2.032520325203252, - "grad_norm": 0.9389815926551819, - "learning_rate": 0.00019840677966101694, - "loss": 0.4587, - "step": 10500 - }, - { - "epoch": 2.032520325203252, - "eval_loss": 0.48015162348747253, - "eval_runtime": 162.7857, - "eval_samples_per_second": 34.745, - "eval_steps_per_second": 4.343, - "eval_wer": 0.37264688417775355, - "step": 10500 - }, - { - "epoch": 2.051877661633759, - "eval_loss": 0.4883776903152466, - "eval_runtime": 162.7407, - "eval_samples_per_second": 34.755, - "eval_steps_per_second": 4.344, - "eval_wer": 0.3825327791240712, - "step": 10600 - }, - { - "epoch": 2.0712349980642664, - "eval_loss": 0.4841337502002716, - "eval_runtime": 162.7772, - "eval_samples_per_second": 34.747, - "eval_steps_per_second": 4.343, - "eval_wer": 0.37845645231179087, - "step": 10700 - }, - { - "epoch": 2.0905923344947737, - "eval_loss": 0.4809282422065735, - "eval_runtime": 162.8688, - "eval_samples_per_second": 34.727, - "eval_steps_per_second": 4.341, - "eval_wer": 0.3738184269230152, - "step": 10800 - }, - { - "epoch": 2.1099496709252805, - "eval_loss": 0.47966596484184265, - "eval_runtime": 163.5921, - "eval_samples_per_second": 34.574, - "eval_steps_per_second": 4.322, - "eval_wer": 0.3713469531864358, - "step": 10900 - }, - { - "epoch": 2.129307007355788, - "grad_norm": 0.6634272933006287, - "learning_rate": 0.0001933220338983051, - "loss": 0.3967, - "step": 11000 - }, - { - "epoch": 2.129307007355788, - "eval_loss": 0.4866289794445038, - "eval_runtime": 162.8573, - "eval_samples_per_second": 34.73, - "eval_steps_per_second": 4.341, - "eval_wer": 0.37497392113751987, - "step": 11000 - }, - { - "epoch": 2.148664343786295, - "eval_loss": 0.4938376843929291, - "eval_runtime": 163.5145, - "eval_samples_per_second": 34.59, - "eval_steps_per_second": 4.324, - "eval_wer": 0.3749097270144918, - "step": 11100 - }, - { - "epoch": 2.168021680216802, - "eval_loss": 0.48603999614715576, - "eval_runtime": 162.8433, - "eval_samples_per_second": 34.733, - "eval_steps_per_second": 4.342, - "eval_wer": 0.36796071319670687, - "step": 11200 - }, - { - "epoch": 2.1873790166473093, - "eval_loss": 0.4849016070365906, - "eval_runtime": 162.5249, - "eval_samples_per_second": 34.801, - "eval_steps_per_second": 4.35, - "eval_wer": 0.369966779541333, - "step": 11300 - }, - { - "epoch": 2.2067363530778166, - "eval_loss": 0.49077799916267395, - "eval_runtime": 162.3783, - "eval_samples_per_second": 34.832, - "eval_steps_per_second": 4.354, - "eval_wer": 0.36377204666912744, - "step": 11400 - }, - { - "epoch": 2.226093689508324, - "grad_norm": 0.9132543206214905, - "learning_rate": 0.0001882372881355932, - "loss": 0.406, - "step": 11500 - }, - { - "epoch": 2.226093689508324, - "eval_loss": 0.4797042906284332, - "eval_runtime": 162.1749, - "eval_samples_per_second": 34.876, - "eval_steps_per_second": 4.359, - "eval_wer": 0.3678644220121648, - "step": 11500 - }, - { - "epoch": 2.2454510259388307, - "eval_loss": 0.48121991753578186, - "eval_runtime": 161.7765, - "eval_samples_per_second": 34.962, - "eval_steps_per_second": 4.37, - "eval_wer": 0.37585659032915536, - "step": 11600 - }, - { - "epoch": 2.264808362369338, - "eval_loss": 0.47043049335479736, - "eval_runtime": 163.1227, - "eval_samples_per_second": 34.673, - "eval_steps_per_second": 4.334, - "eval_wer": 0.361268475871034, - "step": 11700 - }, - { - "epoch": 2.2841656987998453, - "eval_loss": 0.4715932607650757, - "eval_runtime": 163.2336, - "eval_samples_per_second": 34.65, - "eval_steps_per_second": 4.331, - "eval_wer": 0.36345107605398724, - "step": 11800 - }, - { - "epoch": 2.303523035230352, - "eval_loss": 0.4676753580570221, - "eval_runtime": 163.378, - "eval_samples_per_second": 34.619, - "eval_steps_per_second": 4.327, - "eval_wer": 0.3635473672385293, - "step": 11900 - }, - { - "epoch": 2.3228803716608595, - "grad_norm": 0.6134137511253357, - "learning_rate": 0.00018315254237288135, - "loss": 0.4088, - "step": 12000 - }, - { - "epoch": 2.3228803716608595, - "eval_loss": 0.47054949402809143, - "eval_runtime": 162.7912, - "eval_samples_per_second": 34.744, - "eval_steps_per_second": 4.343, - "eval_wer": 0.36399672609972555, - "step": 12000 - }, - { - "epoch": 2.3422377080913668, - "eval_loss": 0.4782082140445709, - "eval_runtime": 162.8776, - "eval_samples_per_second": 34.725, - "eval_steps_per_second": 4.341, - "eval_wer": 0.35905377862656673, - "step": 12100 - }, - { - "epoch": 2.3615950445218736, - "eval_loss": 0.4795554578304291, - "eval_runtime": 163.357, - "eval_samples_per_second": 34.624, - "eval_steps_per_second": 4.328, - "eval_wer": 0.36128452440179104, - "step": 12200 - }, - { - "epoch": 2.380952380952381, - "eval_loss": 0.47130346298217773, - "eval_runtime": 163.1765, - "eval_samples_per_second": 34.662, - "eval_steps_per_second": 4.333, - "eval_wer": 0.3558119754136509, - "step": 12300 - }, - { - "epoch": 2.4003097173828882, - "eval_loss": 0.47632816433906555, - "eval_runtime": 163.2851, - "eval_samples_per_second": 34.639, - "eval_steps_per_second": 4.33, - "eval_wer": 0.3588772447882396, - "step": 12400 - }, - { - "epoch": 2.419667053813395, - "grad_norm": 0.6301820874214172, - "learning_rate": 0.0001780779661016949, - "loss": 0.407, - "step": 12500 - }, - { - "epoch": 2.419667053813395, - "eval_loss": 0.46899136900901794, - "eval_runtime": 163.5219, - "eval_samples_per_second": 34.589, - "eval_steps_per_second": 4.324, - "eval_wer": 0.3565181107669593, - "step": 12500 - }, - { - "epoch": 2.4390243902439024, - "eval_loss": 0.4686334431171417, - "eval_runtime": 163.9109, - "eval_samples_per_second": 34.507, - "eval_steps_per_second": 4.313, - "eval_wer": 0.35767360498146394, - "step": 12600 - }, - { - "epoch": 2.4583817266744097, - "eval_loss": 0.467680424451828, - "eval_runtime": 163.7544, - "eval_samples_per_second": 34.54, - "eval_steps_per_second": 4.317, - "eval_wer": 0.3584278859270434, - "step": 12700 - }, - { - "epoch": 2.4777390631049165, - "eval_loss": 0.46144554018974304, - "eval_runtime": 163.8198, - "eval_samples_per_second": 34.526, - "eval_steps_per_second": 4.316, - "eval_wer": 0.35765755645070696, - "step": 12800 - }, - { - "epoch": 2.497096399535424, - "eval_loss": 0.455834299325943, - "eval_runtime": 163.2394, - "eval_samples_per_second": 34.649, - "eval_steps_per_second": 4.331, - "eval_wer": 0.35992039928744524, - "step": 12900 - }, - { - "epoch": 2.516453735965931, - "grad_norm": 0.41953468322753906, - "learning_rate": 0.00017299322033898304, - "loss": 0.3855, - "step": 13000 - }, - { - "epoch": 2.516453735965931, - "eval_loss": 0.4555678367614746, - "eval_runtime": 164.8785, - "eval_samples_per_second": 34.304, - "eval_steps_per_second": 4.288, - "eval_wer": 0.3565341592977163, - "step": 13000 - }, - { - "epoch": 2.535811072396438, - "eval_loss": 0.4600988030433655, - "eval_runtime": 163.6778, - "eval_samples_per_second": 34.556, - "eval_steps_per_second": 4.319, - "eval_wer": 0.3558280239444079, - "step": 13100 - }, - { - "epoch": 2.5551684088269453, - "eval_loss": 0.4650043547153473, - "eval_runtime": 163.58, - "eval_samples_per_second": 34.576, - "eval_steps_per_second": 4.322, - "eval_wer": 0.354303413522492, - "step": 13200 - }, - { - "epoch": 2.5745257452574526, - "eval_loss": 0.4737236201763153, - "eval_runtime": 163.9532, - "eval_samples_per_second": 34.498, - "eval_steps_per_second": 4.312, - "eval_wer": 0.35483301503747333, - "step": 13300 - }, - { - "epoch": 2.59388308168796, - "eval_loss": 0.45056912302970886, - "eval_runtime": 163.6462, - "eval_samples_per_second": 34.562, - "eval_steps_per_second": 4.32, - "eval_wer": 0.3534367928616135, - "step": 13400 - }, - { - "epoch": 2.6132404181184667, - "grad_norm": 1.5978127717971802, - "learning_rate": 0.0001679186440677966, - "loss": 0.3748, - "step": 13500 - }, - { - "epoch": 2.6132404181184667, - "eval_loss": 0.4606887698173523, - "eval_runtime": 163.0481, - "eval_samples_per_second": 34.689, - "eval_steps_per_second": 4.336, - "eval_wer": 0.3589253903805107, - "step": 13500 - }, - { - "epoch": 2.632597754548974, - "eval_loss": 0.45494645833969116, - "eval_runtime": 162.9584, - "eval_samples_per_second": 34.708, - "eval_steps_per_second": 4.339, - "eval_wer": 0.35372566641523967, - "step": 13600 - }, - { - "epoch": 2.6519550909794813, - "eval_loss": 0.4562608301639557, - "eval_runtime": 164.1384, - "eval_samples_per_second": 34.459, - "eval_steps_per_second": 4.307, - "eval_wer": 0.36409301728426763, - "step": 13700 - }, - { - "epoch": 2.6713124274099886, - "eval_loss": 0.44666969776153564, - "eval_runtime": 163.7423, - "eval_samples_per_second": 34.542, - "eval_steps_per_second": 4.318, - "eval_wer": 0.34369533469210894, - "step": 13800 - }, - { - "epoch": 2.6906697638404955, - "eval_loss": 0.4536294639110565, - "eval_runtime": 163.3892, - "eval_samples_per_second": 34.617, - "eval_steps_per_second": 4.327, - "eval_wer": 0.35446389883006213, - "step": 13900 - }, - { - "epoch": 2.710027100271003, - "grad_norm": 0.6591352224349976, - "learning_rate": 0.00016283389830508475, - "loss": 0.3888, - "step": 14000 - }, - { - "epoch": 2.710027100271003, - "eval_loss": 0.4504217505455017, - "eval_runtime": 163.3199, - "eval_samples_per_second": 34.631, - "eval_steps_per_second": 4.329, - "eval_wer": 0.3509653191250341, - "step": 14000 - }, - { - "epoch": 2.72938443670151, - "eval_loss": 0.44697660207748413, - "eval_runtime": 163.5943, - "eval_samples_per_second": 34.573, - "eval_steps_per_second": 4.322, - "eval_wer": 0.3602092728410714, - "step": 14100 - }, - { - "epoch": 2.748741773132017, - "eval_loss": 0.45640549063682556, - "eval_runtime": 163.6363, - "eval_samples_per_second": 34.564, - "eval_steps_per_second": 4.321, - "eval_wer": 0.3539022002535668, - "step": 14200 - }, - { - "epoch": 2.7680991095625243, - "eval_loss": 0.45214343070983887, - "eval_runtime": 164.011, - "eval_samples_per_second": 34.485, - "eval_steps_per_second": 4.311, - "eval_wer": 0.3561971401518191, - "step": 14300 - }, - { - "epoch": 2.7874564459930316, - "eval_loss": 0.4452911913394928, - "eval_runtime": 163.9028, - "eval_samples_per_second": 34.508, - "eval_steps_per_second": 4.314, - "eval_wer": 0.35221710452408084, - "step": 14400 - }, - { - "epoch": 2.8068137824235384, - "grad_norm": 0.6879103779792786, - "learning_rate": 0.00015774915254237285, - "loss": 0.376, - "step": 14500 - }, - { - "epoch": 2.8068137824235384, - "eval_loss": 0.45518526434898376, - "eval_runtime": 164.0636, - "eval_samples_per_second": 34.474, - "eval_steps_per_second": 4.309, - "eval_wer": 0.35170355153985655, - "step": 14500 - }, - { - "epoch": 2.8261711188540457, - "eval_loss": 0.45344606041908264, - "eval_runtime": 163.5444, - "eval_samples_per_second": 34.584, - "eval_steps_per_second": 4.323, - "eval_wer": 0.3549774518142864, - "step": 14600 - }, - { - "epoch": 2.845528455284553, - "eval_loss": 0.45520055294036865, - "eval_runtime": 163.3824, - "eval_samples_per_second": 34.618, - "eval_steps_per_second": 4.327, - "eval_wer": 0.3405016770714641, - "step": 14700 - }, - { - "epoch": 2.86488579171506, - "eval_loss": 0.45560306310653687, - "eval_runtime": 164.0292, - "eval_samples_per_second": 34.482, - "eval_steps_per_second": 4.31, - "eval_wer": 0.35138258092471636, - "step": 14800 - }, - { - "epoch": 2.884243128145567, - "eval_loss": 0.44232824444770813, - "eval_runtime": 164.3662, - "eval_samples_per_second": 34.411, - "eval_steps_per_second": 4.301, - "eval_wer": 0.3467606040666977, - "step": 14900 - }, - { - "epoch": 2.9036004645760745, - "grad_norm": 0.5280432105064392, - "learning_rate": 0.00015267457627118642, - "loss": 0.379, - "step": 15000 - }, - { - "epoch": 2.9036004645760745, - "eval_loss": 0.43873003125190735, - "eval_runtime": 163.5232, - "eval_samples_per_second": 34.588, - "eval_steps_per_second": 4.324, - "eval_wer": 0.34268427725441736, - "step": 15000 - }, - { - "epoch": 2.9229578010065813, - "eval_loss": 0.4372723400592804, - "eval_runtime": 163.4018, - "eval_samples_per_second": 34.614, - "eval_steps_per_second": 4.327, - "eval_wer": 0.34364718909983794, - "step": 15100 - }, - { - "epoch": 2.9423151374370886, - "eval_loss": 0.4399470090866089, - "eval_runtime": 164.335, - "eval_samples_per_second": 34.418, - "eval_steps_per_second": 4.302, - "eval_wer": 0.33870424162667906, - "step": 15200 - }, - { - "epoch": 2.961672473867596, - "eval_loss": 0.44378861784935, - "eval_runtime": 164.0477, - "eval_samples_per_second": 34.478, - "eval_steps_per_second": 4.31, - "eval_wer": 0.3380462518656417, - "step": 15300 - }, - { - "epoch": 2.9810298102981028, - "eval_loss": 0.436974436044693, - "eval_runtime": 163.6525, - "eval_samples_per_second": 34.561, - "eval_steps_per_second": 4.32, - "eval_wer": 0.3430694419925856, - "step": 15400 - }, - { - "epoch": 3.00038714672861, - "grad_norm": 0.660970151424408, - "learning_rate": 0.00014758983050847457, - "loss": 0.3731, - "step": 15500 - }, - { - "epoch": 3.00038714672861, - "eval_loss": 0.43810611963272095, - "eval_runtime": 162.9215, - "eval_samples_per_second": 34.716, - "eval_steps_per_second": 4.34, - "eval_wer": 0.33413041036093144, - "step": 15500 - }, - { - "epoch": 3.0197444831591174, - "eval_loss": 0.45139721035957336, - "eval_runtime": 164.2748, - "eval_samples_per_second": 34.43, - "eval_steps_per_second": 4.304, - "eval_wer": 0.3286418128420343, - "step": 15600 - }, - { - "epoch": 3.0391018195896247, - "eval_loss": 0.43782538175582886, - "eval_runtime": 164.2188, - "eval_samples_per_second": 34.442, - "eval_steps_per_second": 4.305, - "eval_wer": 0.3340180706456324, - "step": 15700 - }, - { - "epoch": 3.0584591560201315, - "eval_loss": 0.44340020418167114, - "eval_runtime": 163.4459, - "eval_samples_per_second": 34.605, - "eval_steps_per_second": 4.326, - "eval_wer": 0.3441446935533052, - "step": 15800 - }, - { - "epoch": 3.077816492450639, - "eval_loss": 0.44192126393318176, - "eval_runtime": 164.2009, - "eval_samples_per_second": 34.446, - "eval_steps_per_second": 4.306, - "eval_wer": 0.3399239299642118, - "step": 15900 - }, - { - "epoch": 3.097173828881146, - "grad_norm": 0.6999391913414001, - "learning_rate": 0.0001425050847457627, - "loss": 0.3176, - "step": 16000 - }, - { - "epoch": 3.097173828881146, - "eval_loss": 0.4407601058483124, - "eval_runtime": 164.7219, - "eval_samples_per_second": 34.337, - "eval_steps_per_second": 4.292, - "eval_wer": 0.3335366147229221, - "step": 16000 - }, - { - "epoch": 3.116531165311653, - "eval_loss": 0.436761736869812, - "eval_runtime": 164.2581, - "eval_samples_per_second": 34.434, - "eval_steps_per_second": 4.304, - "eval_wer": 0.33584760315193146, - "step": 16100 - }, - { - "epoch": 3.1358885017421603, - "eval_loss": 0.4477560520172119, - "eval_runtime": 163.8979, - "eval_samples_per_second": 34.509, - "eval_steps_per_second": 4.314, - "eval_wer": 0.3400523182102678, - "step": 16200 - }, - { - "epoch": 3.1552458381726676, - "eval_loss": 0.4414171576499939, - "eval_runtime": 164.4332, - "eval_samples_per_second": 34.397, - "eval_steps_per_second": 4.3, - "eval_wer": 0.3373882621046043, - "step": 16300 - }, - { - "epoch": 3.1746031746031744, - "eval_loss": 0.4476623833179474, - "eval_runtime": 163.6322, - "eval_samples_per_second": 34.565, - "eval_steps_per_second": 4.321, - "eval_wer": 0.335013079552567, - "step": 16400 - }, - { - "epoch": 3.1939605110336817, - "grad_norm": 0.5408484935760498, - "learning_rate": 0.00013742033898305083, - "loss": 0.3201, - "step": 16500 - }, - { - "epoch": 3.1939605110336817, - "eval_loss": 0.4305751919746399, - "eval_runtime": 163.8253, - "eval_samples_per_second": 34.525, - "eval_steps_per_second": 4.316, - "eval_wer": 0.32917141435701563, - "step": 16500 - }, - { - "epoch": 3.213317847464189, - "eval_loss": 0.4534677267074585, - "eval_runtime": 163.8666, - "eval_samples_per_second": 34.516, - "eval_steps_per_second": 4.314, - "eval_wer": 0.32941214231837074, - "step": 16600 - }, - { - "epoch": 3.2326751838946963, - "eval_loss": 0.4379562437534332, - "eval_runtime": 164.1184, - "eval_samples_per_second": 34.463, - "eval_steps_per_second": 4.308, - "eval_wer": 0.33408226476866043, - "step": 16700 - }, - { - "epoch": 3.252032520325203, - "eval_loss": 0.43677663803100586, - "eval_runtime": 163.6899, - "eval_samples_per_second": 34.553, - "eval_steps_per_second": 4.319, - "eval_wer": 0.33252555728523053, - "step": 16800 - }, - { - "epoch": 3.2713898567557105, - "eval_loss": 0.4359833598136902, - "eval_runtime": 164.1551, - "eval_samples_per_second": 34.455, - "eval_steps_per_second": 4.307, - "eval_wer": 0.33043924828681936, - "step": 16900 - }, - { - "epoch": 3.290747193186218, - "grad_norm": 0.5537161231040955, - "learning_rate": 0.00013233559322033898, - "loss": 0.3101, - "step": 17000 - }, - { - "epoch": 3.290747193186218, - "eval_loss": 0.4347226917743683, - "eval_runtime": 164.2964, - "eval_samples_per_second": 34.426, - "eval_steps_per_second": 4.303, - "eval_wer": 0.32812825985781, - "step": 17000 - }, - { - "epoch": 3.3101045296167246, - "eval_loss": 0.4375491738319397, - "eval_runtime": 162.8938, - "eval_samples_per_second": 34.722, - "eval_steps_per_second": 4.34, - "eval_wer": 0.3284973760652212, - "step": 17100 - }, - { - "epoch": 3.329461866047232, - "eval_loss": 0.4491961896419525, - "eval_runtime": 163.5698, - "eval_samples_per_second": 34.579, - "eval_steps_per_second": 4.322, - "eval_wer": 0.33032690857152025, - "step": 17200 - }, - { - "epoch": 3.3488192024777392, - "eval_loss": 0.4268127977848053, - "eval_runtime": 164.9468, - "eval_samples_per_second": 34.29, - "eval_steps_per_second": 4.286, - "eval_wer": 0.3284652790037072, - "step": 17300 - }, - { - "epoch": 3.368176538908246, - "eval_loss": 0.4377237558364868, - "eval_runtime": 164.0847, - "eval_samples_per_second": 34.47, - "eval_steps_per_second": 4.309, - "eval_wer": 0.3269888141740624, - "step": 17400 - }, - { - "epoch": 3.3875338753387534, - "grad_norm": 0.5330023765563965, - "learning_rate": 0.00012726101694915254, - "loss": 0.2963, - "step": 17500 - }, - { - "epoch": 3.3875338753387534, - "eval_loss": 0.42494186758995056, - "eval_runtime": 163.0698, - "eval_samples_per_second": 34.685, - "eval_steps_per_second": 4.336, - "eval_wer": 0.3322527322623614, - "step": 17500 - }, - { - "epoch": 3.4068912117692607, - "eval_loss": 0.4404699206352234, - "eval_runtime": 164.4408, - "eval_samples_per_second": 34.395, - "eval_steps_per_second": 4.299, - "eval_wer": 0.3338736338688193, - "step": 17600 - }, - { - "epoch": 3.4262485481997675, - "eval_loss": 0.43636277318000793, - "eval_runtime": 163.9679, - "eval_samples_per_second": 34.495, - "eval_steps_per_second": 4.312, - "eval_wer": 0.3285615701882493, - "step": 17700 - }, - { - "epoch": 3.445605884630275, - "eval_loss": 0.4350505769252777, - "eval_runtime": 163.6477, - "eval_samples_per_second": 34.562, - "eval_steps_per_second": 4.32, - "eval_wer": 0.3309207042095296, - "step": 17800 - }, - { - "epoch": 3.464963221060782, - "eval_loss": 0.42997920513153076, - "eval_runtime": 163.7592, - "eval_samples_per_second": 34.539, - "eval_steps_per_second": 4.317, - "eval_wer": 0.322880390300268, - "step": 17900 - }, - { - "epoch": 3.484320557491289, - "grad_norm": 0.31616127490997314, - "learning_rate": 0.00012217627118644067, - "loss": 0.3062, - "step": 18000 - }, - { - "epoch": 3.484320557491289, - "eval_loss": 0.42307358980178833, - "eval_runtime": 164.6469, - "eval_samples_per_second": 34.352, - "eval_steps_per_second": 4.294, - "eval_wer": 0.32523952432154835, - "step": 18000 - }, - { - "epoch": 3.5036778939217963, - "eval_loss": 0.4325993061065674, - "eval_runtime": 164.1143, - "eval_samples_per_second": 34.464, - "eval_steps_per_second": 4.308, - "eval_wer": 0.32326555503843624, - "step": 18100 - }, - { - "epoch": 3.5230352303523036, - "eval_loss": 0.43141353130340576, - "eval_runtime": 163.522, - "eval_samples_per_second": 34.589, - "eval_steps_per_second": 4.324, - "eval_wer": 0.3282405995731091, - "step": 18200 - }, - { - "epoch": 3.5423925667828104, - "eval_loss": 0.4343957006931305, - "eval_runtime": 163.8089, - "eval_samples_per_second": 34.528, - "eval_steps_per_second": 4.316, - "eval_wer": 0.32894673492641746, - "step": 18300 - }, - { - "epoch": 3.5617499032133177, - "eval_loss": 0.42664915323257446, - "eval_runtime": 166.5849, - "eval_samples_per_second": 33.953, - "eval_steps_per_second": 4.244, - "eval_wer": 0.32207796376241754, - "step": 18400 - }, - { - "epoch": 3.581107239643825, - "grad_norm": 0.40817028284072876, - "learning_rate": 0.00011710169491525424, - "loss": 0.2968, - "step": 18500 - }, - { - "epoch": 3.581107239643825, - "eval_loss": 0.4305819571018219, - "eval_runtime": 164.3676, - "eval_samples_per_second": 34.411, - "eval_steps_per_second": 4.301, - "eval_wer": 0.32161255637046426, - "step": 18500 - }, - { - "epoch": 3.600464576074332, - "eval_loss": 0.4318545460700989, - "eval_runtime": 166.4377, - "eval_samples_per_second": 33.983, - "eval_steps_per_second": 4.248, - "eval_wer": 0.3238914477379596, - "step": 18600 - }, - { - "epoch": 3.619821912504839, - "eval_loss": 0.4271145164966583, - "eval_runtime": 166.5812, - "eval_samples_per_second": 33.953, - "eval_steps_per_second": 4.244, - "eval_wer": 0.3232013609154082, - "step": 18700 - }, - { - "epoch": 3.6391792489353465, - "eval_loss": 0.41837719082832336, - "eval_runtime": 164.5624, - "eval_samples_per_second": 34.37, - "eval_steps_per_second": 4.296, - "eval_wer": 0.32641106706681006, - "step": 18800 - }, - { - "epoch": 3.658536585365854, - "eval_loss": 0.4237981140613556, - "eval_runtime": 165.5252, - "eval_samples_per_second": 34.17, - "eval_steps_per_second": 4.271, - "eval_wer": 0.31997560623324933, - "step": 18900 - }, - { - "epoch": 3.6778939217963607, - "grad_norm": 0.9548519253730774, - "learning_rate": 0.00011201694915254236, - "loss": 0.3191, - "step": 19000 - }, - { - "epoch": 3.6778939217963607, - "eval_loss": 0.41389960050582886, - "eval_runtime": 163.7093, - "eval_samples_per_second": 34.549, - "eval_steps_per_second": 4.319, - "eval_wer": 0.3225915167466418, - "step": 19000 - }, - { - "epoch": 3.697251258226868, - "eval_loss": 0.42384064197540283, - "eval_runtime": 164.2841, - "eval_samples_per_second": 34.428, - "eval_steps_per_second": 4.304, - "eval_wer": 0.3159955706055111, - "step": 19100 - }, - { - "epoch": 3.7166085946573753, - "eval_loss": 0.4176156520843506, - "eval_runtime": 163.8287, - "eval_samples_per_second": 34.524, - "eval_steps_per_second": 4.315, - "eval_wer": 0.319301567941455, - "step": 19200 - }, - { - "epoch": 3.7359659310878826, - "eval_loss": 0.4196203351020813, - "eval_runtime": 165.0023, - "eval_samples_per_second": 34.278, - "eval_steps_per_second": 4.285, - "eval_wer": 0.3202644797868755, - "step": 19300 - }, - { - "epoch": 3.7553232675183894, - "eval_loss": 0.409524530172348, - "eval_runtime": 164.4937, - "eval_samples_per_second": 34.384, - "eval_steps_per_second": 4.298, - "eval_wer": 0.3181621222577073, - "step": 19400 - }, - { - "epoch": 3.7746806039488967, - "grad_norm": 0.43373510241508484, - "learning_rate": 0.00010693220338983049, - "loss": 0.2921, - "step": 19500 - }, - { - "epoch": 3.7746806039488967, - "eval_loss": 0.41209807991981506, - "eval_runtime": 164.8329, - "eval_samples_per_second": 34.314, - "eval_steps_per_second": 4.289, - "eval_wer": 0.31665356036654846, - "step": 19500 - }, - { - "epoch": 3.794037940379404, - "eval_loss": 0.4112759530544281, - "eval_runtime": 164.1863, - "eval_samples_per_second": 34.449, - "eval_steps_per_second": 4.306, - "eval_wer": 0.31455120283738025, - "step": 19600 - }, - { - "epoch": 3.813395276809911, - "eval_loss": 0.4094259738922119, - "eval_runtime": 164.4841, - "eval_samples_per_second": 34.386, - "eval_steps_per_second": 4.298, - "eval_wer": 0.3160758132592961, - "step": 19700 - }, - { - "epoch": 3.832752613240418, - "eval_loss": 0.40931811928749084, - "eval_runtime": 164.2911, - "eval_samples_per_second": 34.427, - "eval_steps_per_second": 4.303, - "eval_wer": 0.31386111601482886, - "step": 19800 - }, - { - "epoch": 3.8521099496709255, - "eval_loss": 0.41117748618125916, - "eval_runtime": 165.1732, - "eval_samples_per_second": 34.243, - "eval_steps_per_second": 4.28, - "eval_wer": 0.31731155012758583, - "step": 19900 - }, - { - "epoch": 3.8714672861014323, - "grad_norm": 0.5022397637367249, - "learning_rate": 0.00010184745762711863, - "loss": 0.3007, - "step": 20000 - }, - { - "epoch": 3.8714672861014323, - "eval_loss": 0.4092504680156708, - "eval_runtime": 163.9434, - "eval_samples_per_second": 34.5, - "eval_steps_per_second": 4.312, - "eval_wer": 0.31593137648248304, - "step": 20000 - }, - { - "epoch": 3.8908246225319396, - "eval_loss": 0.4147598147392273, - "eval_runtime": 164.6303, - "eval_samples_per_second": 34.356, - "eval_steps_per_second": 4.294, - "eval_wer": 0.31565855145961386, - "step": 20100 - }, - { - "epoch": 3.910181958962447, - "eval_loss": 0.41137251257896423, - "eval_runtime": 164.6634, - "eval_samples_per_second": 34.349, - "eval_steps_per_second": 4.294, - "eval_wer": 0.3150326587600905, - "step": 20200 - }, - { - "epoch": 3.9295392953929538, - "eval_loss": 0.4155375361442566, - "eval_runtime": 164.3406, - "eval_samples_per_second": 34.416, - "eval_steps_per_second": 4.302, - "eval_wer": 0.31456725136813724, - "step": 20300 - }, - { - "epoch": 3.948896631823461, - "eval_loss": 0.4075925648212433, - "eval_runtime": 164.3692, - "eval_samples_per_second": 34.41, - "eval_steps_per_second": 4.301, - "eval_wer": 0.3135722424612027, - "step": 20400 - }, - { - "epoch": 3.9682539682539684, - "grad_norm": 0.6109060049057007, - "learning_rate": 9.67728813559322e-05, - "loss": 0.296, - "step": 20500 - }, - { - "epoch": 3.9682539682539684, - "eval_loss": 0.4066578149795532, - "eval_runtime": 164.7852, - "eval_samples_per_second": 34.323, - "eval_steps_per_second": 4.29, - "eval_wer": 0.3125611850235111, - "step": 20500 - }, - { - "epoch": 3.987611304684475, - "eval_loss": 0.40839362144470215, - "eval_runtime": 164.8883, - "eval_samples_per_second": 34.302, - "eval_steps_per_second": 4.288, - "eval_wer": 0.3150487072908475, - "step": 20600 - }, - { - "epoch": 4.006968641114983, - "eval_loss": 0.4150494635105133, - "eval_runtime": 164.1525, - "eval_samples_per_second": 34.456, - "eval_steps_per_second": 4.307, - "eval_wer": 0.312432796777455, - "step": 20700 - }, - { - "epoch": 4.02632597754549, - "eval_loss": 0.41322341561317444, - "eval_runtime": 164.6726, - "eval_samples_per_second": 34.347, - "eval_steps_per_second": 4.293, - "eval_wer": 0.3132512718460625, - "step": 20800 - }, - { - "epoch": 4.045683313975997, - "eval_loss": 0.4182606339454651, - "eval_runtime": 164.9667, - "eval_samples_per_second": 34.286, - "eval_steps_per_second": 4.286, - "eval_wer": 0.31464749402192227, - "step": 20900 - }, - { - "epoch": 4.065040650406504, - "grad_norm": 0.9771650433540344, - "learning_rate": 9.168813559322032e-05, - "loss": 0.2611, - "step": 21000 - }, - { - "epoch": 4.065040650406504, - "eval_loss": 0.41840454936027527, - "eval_runtime": 164.8893, - "eval_samples_per_second": 34.302, - "eval_steps_per_second": 4.288, - "eval_wer": 0.30952801271043634, - "step": 21000 - }, - { - "epoch": 4.084397986837011, - "eval_loss": 0.4167742431163788, - "eval_runtime": 165.1089, - "eval_samples_per_second": 34.256, - "eval_steps_per_second": 4.282, - "eval_wer": 0.30845276114971676, - "step": 21100 - }, - { - "epoch": 4.103755323267518, - "eval_loss": 0.42244288325309753, - "eval_runtime": 164.9167, - "eval_samples_per_second": 34.296, - "eval_steps_per_second": 4.287, - "eval_wer": 0.31015390540995974, - "step": 21200 - }, - { - "epoch": 4.123112659698026, - "eval_loss": 0.4187394678592682, - "eval_runtime": 164.7166, - "eval_samples_per_second": 34.338, - "eval_steps_per_second": 4.292, - "eval_wer": 0.30456901670652053, - "step": 21300 - }, - { - "epoch": 4.142469996128533, - "eval_loss": 0.41454723477363586, - "eval_runtime": 164.38, - "eval_samples_per_second": 34.408, - "eval_steps_per_second": 4.301, - "eval_wer": 0.3110044775400812, - "step": 21400 - }, - { - "epoch": 4.16182733255904, - "grad_norm": 0.8976078629493713, - "learning_rate": 8.660338983050847e-05, - "loss": 0.2431, - "step": 21500 - }, - { - "epoch": 4.16182733255904, - "eval_loss": 0.42720434069633484, - "eval_runtime": 165.0533, - "eval_samples_per_second": 34.268, - "eval_steps_per_second": 4.283, - "eval_wer": 0.31071560398645504, - "step": 21500 - }, - { - "epoch": 4.181184668989547, - "eval_loss": 0.41736435890197754, - "eval_runtime": 164.6627, - "eval_samples_per_second": 34.349, - "eval_steps_per_second": 4.294, - "eval_wer": 0.3069923448508289, - "step": 21600 - }, - { - "epoch": 4.200542005420054, - "eval_loss": 0.41904589533805847, - "eval_runtime": 165.6169, - "eval_samples_per_second": 34.151, - "eval_steps_per_second": 4.269, - "eval_wer": 0.3085811493957728, - "step": 21700 - }, - { - "epoch": 4.219899341850561, - "eval_loss": 0.41643446683883667, - "eval_runtime": 165.1417, - "eval_samples_per_second": 34.249, - "eval_steps_per_second": 4.281, - "eval_wer": 0.3050825696907448, - "step": 21800 - }, - { - "epoch": 4.239256678281069, - "eval_loss": 0.41955476999282837, - "eval_runtime": 165.2591, - "eval_samples_per_second": 34.225, - "eval_steps_per_second": 4.278, - "eval_wer": 0.30777872285792235, - "step": 21900 - }, - { - "epoch": 4.258614014711576, - "grad_norm": 1.5854851007461548, - "learning_rate": 8.15186440677966e-05, - "loss": 0.2453, - "step": 22000 - }, - { - "epoch": 4.258614014711576, - "eval_loss": 0.42485129833221436, - "eval_runtime": 164.7312, - "eval_samples_per_second": 34.335, - "eval_steps_per_second": 4.292, - "eval_wer": 0.30915889650302514, - "step": 22000 - }, - { - "epoch": 4.2779713511420825, - "eval_loss": 0.4246067404747009, - "eval_runtime": 164.9099, - "eval_samples_per_second": 34.298, - "eval_steps_per_second": 4.287, - "eval_wer": 0.30736146105824014, - "step": 22100 - }, - { - "epoch": 4.29732868757259, - "eval_loss": 0.4166228771209717, - "eval_runtime": 164.9564, - "eval_samples_per_second": 34.288, - "eval_steps_per_second": 4.286, - "eval_wer": 0.30740960665051115, - "step": 22200 - }, - { - "epoch": 4.316686024003097, - "eval_loss": 0.4192067086696625, - "eval_runtime": 165.6591, - "eval_samples_per_second": 34.142, - "eval_steps_per_second": 4.268, - "eval_wer": 0.3027555327309785, - "step": 22300 - }, - { - "epoch": 4.336043360433604, - "eval_loss": 0.41863906383514404, - "eval_runtime": 164.9558, - "eval_samples_per_second": 34.288, - "eval_steps_per_second": 4.286, - "eval_wer": 0.3020975429699411, - "step": 22400 - }, - { - "epoch": 4.355400696864112, - "grad_norm": 1.1900339126586914, - "learning_rate": 7.645423728813559e-05, - "loss": 0.2336, - "step": 22500 - }, - { - "epoch": 4.355400696864112, - "eval_loss": 0.4268459677696228, - "eval_runtime": 166.7137, - "eval_samples_per_second": 33.926, - "eval_steps_per_second": 4.241, - "eval_wer": 0.3083885670266887, - "step": 22500 - }, - { - "epoch": 4.3747580332946185, - "eval_loss": 0.4346672296524048, - "eval_runtime": 170.3751, - "eval_samples_per_second": 33.197, - "eval_steps_per_second": 4.15, - "eval_wer": 0.307104684566128, - "step": 22600 - }, - { - "epoch": 4.394115369725126, - "eval_loss": 0.47525468468666077, - "eval_runtime": 164.9807, - "eval_samples_per_second": 34.283, - "eval_steps_per_second": 4.285, - "eval_wer": 0.3208582754248849, - "step": 22700 - }, - { - "epoch": 4.413472706155633, - "eval_loss": 0.582381546497345, - "eval_runtime": 165.1397, - "eval_samples_per_second": 34.25, - "eval_steps_per_second": 4.281, - "eval_wer": 0.415416218645183, - "step": 22800 - }, - { - "epoch": 4.43283004258614, - "eval_loss": 0.5073803067207336, - "eval_runtime": 165.3352, - "eval_samples_per_second": 34.209, - "eval_steps_per_second": 4.276, - "eval_wer": 0.3415448315706697, - "step": 22900 - }, - { - "epoch": 4.452187379016648, - "grad_norm": 1.5807456970214844, - "learning_rate": 7.136949152542373e-05, - "loss": 0.3426, - "step": 23000 - }, - { - "epoch": 4.452187379016648, - "eval_loss": 0.6242379546165466, - "eval_runtime": 164.8642, - "eval_samples_per_second": 34.307, - "eval_steps_per_second": 4.288, - "eval_wer": 0.41979746754184655, - "step": 23000 - }, - { - "epoch": 4.471544715447155, - "eval_loss": 0.5862211585044861, - "eval_runtime": 164.8283, - "eval_samples_per_second": 34.314, - "eval_steps_per_second": 4.289, - "eval_wer": 0.4200702925647157, - "step": 23100 - }, - { - "epoch": 4.4909020518776614, - "eval_loss": 0.6151086091995239, - "eval_runtime": 165.0995, - "eval_samples_per_second": 34.258, - "eval_steps_per_second": 4.282, - "eval_wer": 0.39638266116737014, - "step": 23200 - }, - { - "epoch": 4.510259388308169, - "eval_loss": 0.5640283226966858, - "eval_runtime": 164.849, - "eval_samples_per_second": 34.31, - "eval_steps_per_second": 4.289, - "eval_wer": 0.3685705573654732, - "step": 23300 - }, - { - "epoch": 4.529616724738676, - "eval_loss": 0.6589744091033936, - "eval_runtime": 164.8194, - "eval_samples_per_second": 34.316, - "eval_steps_per_second": 4.29, - "eval_wer": 0.4647494021922293, - "step": 23400 - }, - { - "epoch": 4.548974061169183, - "grad_norm": 1.0218427181243896, - "learning_rate": 6.628474576271186e-05, - "loss": 0.4541, - "step": 23500 - }, - { - "epoch": 4.548974061169183, - "eval_loss": 0.6010532975196838, - "eval_runtime": 165.0253, - "eval_samples_per_second": 34.274, - "eval_steps_per_second": 4.284, - "eval_wer": 0.3959974964292019, - "step": 23500 - }, - { - "epoch": 4.568331397599691, - "eval_loss": 0.5802894830703735, - "eval_runtime": 166.0838, - "eval_samples_per_second": 34.055, - "eval_steps_per_second": 4.257, - "eval_wer": 0.39505063311453836, - "step": 23600 - }, - { - "epoch": 4.5876887340301975, - "eval_loss": 0.5762883424758911, - "eval_runtime": 165.1308, - "eval_samples_per_second": 34.252, - "eval_steps_per_second": 4.281, - "eval_wer": 0.3910545489560431, - "step": 23700 - }, - { - "epoch": 4.607046070460704, - "eval_loss": 0.5418487787246704, - "eval_runtime": 165.1407, - "eval_samples_per_second": 34.25, - "eval_steps_per_second": 4.281, - "eval_wer": 0.36550528799088444, - "step": 23800 - }, - { - "epoch": 4.626403406891212, - "eval_loss": 0.5546759366989136, - "eval_runtime": 165.8678, - "eval_samples_per_second": 34.099, - "eval_steps_per_second": 4.262, - "eval_wer": 0.38877565758854776, - "step": 23900 - }, - { - "epoch": 4.645760743321719, - "grad_norm": 32.50680923461914, - "learning_rate": 6.12e-05, - "loss": 0.4145, - "step": 24000 - }, - { - "epoch": 4.645760743321719, - "eval_loss": 0.5300523638725281, - "eval_runtime": 164.9724, - "eval_samples_per_second": 34.285, - "eval_steps_per_second": 4.286, - "eval_wer": 0.3608030684790807, - "step": 24000 - }, - { - "epoch": 4.665118079752226, - "eval_loss": 0.573882520198822, - "eval_runtime": 165.2226, - "eval_samples_per_second": 34.233, - "eval_steps_per_second": 4.279, - "eval_wer": 0.39927139670363176, - "step": 24100 - }, - { - "epoch": 4.6844754161827336, - "eval_loss": 0.5775899887084961, - "eval_runtime": 165.3691, - "eval_samples_per_second": 34.202, - "eval_steps_per_second": 4.275, - "eval_wer": 0.39816404808139816, - "step": 24200 - }, - { - "epoch": 4.70383275261324, - "eval_loss": 0.5412492156028748, - "eval_runtime": 164.9818, - "eval_samples_per_second": 34.283, - "eval_steps_per_second": 4.285, - "eval_wer": 0.37078525460994044, - "step": 24300 - }, - { - "epoch": 4.723190089043747, - "eval_loss": 0.5329325199127197, - "eval_runtime": 165.8065, - "eval_samples_per_second": 34.112, - "eval_steps_per_second": 4.264, - "eval_wer": 0.37044823546404326, - "step": 24400 - }, - { - "epoch": 4.742547425474255, - "grad_norm": 1.8765805959701538, - "learning_rate": 5.611525423728813e-05, - "loss": 0.3834, - "step": 24500 - }, - { - "epoch": 4.742547425474255, - "eval_loss": 0.5299070477485657, - "eval_runtime": 165.3917, - "eval_samples_per_second": 34.198, - "eval_steps_per_second": 4.275, - "eval_wer": 0.3732246312850059, - "step": 24500 - }, - { - "epoch": 4.761904761904762, - "eval_loss": 0.5424681901931763, - "eval_runtime": 165.4071, - "eval_samples_per_second": 34.194, - "eval_steps_per_second": 4.274, - "eval_wer": 0.3928519844008281, - "step": 24600 - }, - { - "epoch": 4.781262098335269, - "eval_loss": 0.5111268758773804, - "eval_runtime": 165.4914, - "eval_samples_per_second": 34.177, - "eval_steps_per_second": 4.272, - "eval_wer": 0.3585241771115854, - "step": 24700 - }, - { - "epoch": 4.8006194347657765, - "eval_loss": 0.5076457858085632, - "eval_runtime": 165.6732, - "eval_samples_per_second": 34.14, - "eval_steps_per_second": 4.267, - "eval_wer": 0.35033942642551075, - "step": 24800 - }, - { - "epoch": 4.819976771196283, - "eval_loss": 0.5261921882629395, - "eval_runtime": 165.2946, - "eval_samples_per_second": 34.218, - "eval_steps_per_second": 4.277, - "eval_wer": 0.3681372470350339, - "step": 24900 - }, - { - "epoch": 4.83933410762679, - "grad_norm": 5.934371471405029, - "learning_rate": 5.1030508474576264e-05, - "loss": 0.3719, - "step": 25000 - }, - { - "epoch": 4.83933410762679, - "eval_loss": 0.547415018081665, - "eval_runtime": 165.6997, - "eval_samples_per_second": 34.134, - "eval_steps_per_second": 4.267, - "eval_wer": 0.3833031086004076, - "step": 25000 - }, - { - "epoch": 4.858691444057298, - "eval_loss": 0.5746738910675049, - "eval_runtime": 165.8407, - "eval_samples_per_second": 34.105, - "eval_steps_per_second": 4.263, - "eval_wer": 0.40389337356165045, - "step": 25100 - }, - { - "epoch": 4.878048780487805, - "eval_loss": 0.5188133120536804, - "eval_runtime": 165.5746, - "eval_samples_per_second": 34.16, - "eval_steps_per_second": 4.27, - "eval_wer": 0.3503073293639967, - "step": 25200 - }, - { - "epoch": 4.897406116918312, - "eval_loss": 0.5522667169570923, - "eval_runtime": 165.1011, - "eval_samples_per_second": 34.258, - "eval_steps_per_second": 4.282, - "eval_wer": 0.3865609603440805, - "step": 25300 - }, - { - "epoch": 4.916763453348819, - "eval_loss": 0.5302358865737915, - "eval_runtime": 165.871, - "eval_samples_per_second": 34.099, - "eval_steps_per_second": 4.262, - "eval_wer": 0.36446213349167883, - "step": 25400 - }, - { - "epoch": 4.936120789779326, - "grad_norm": 1.1752023696899414, - "learning_rate": 4.595593220338983e-05, - "loss": 0.3798, - "step": 25500 - }, - { - "epoch": 4.936120789779326, - "eval_loss": 0.5099266767501831, - "eval_runtime": 165.9652, - "eval_samples_per_second": 34.079, - "eval_steps_per_second": 4.26, - "eval_wer": 0.3499542616873425, - "step": 25500 - }, - { - "epoch": 4.955478126209833, - "eval_loss": 0.4823363125324249, - "eval_runtime": 164.8602, - "eval_samples_per_second": 34.308, - "eval_steps_per_second": 4.288, - "eval_wer": 0.33761294153520244, - "step": 25600 - }, - { - "epoch": 4.974835462640341, - "eval_loss": 0.4805842936038971, - "eval_runtime": 166.8028, - "eval_samples_per_second": 33.908, - "eval_steps_per_second": 4.239, - "eval_wer": 0.3357352634366324, - "step": 25700 - }, - { - "epoch": 4.994192799070848, - "eval_loss": 0.4942820370197296, - "eval_runtime": 165.4552, - "eval_samples_per_second": 34.184, - "eval_steps_per_second": 4.273, - "eval_wer": 0.35093322206352007, - "step": 25800 - }, - { - "epoch": 5.013550135501355, - "eval_loss": 0.49528568983078003, - "eval_runtime": 165.947, - "eval_samples_per_second": 34.083, - "eval_steps_per_second": 4.26, - "eval_wer": 0.35245783248543594, - "step": 25900 - }, - { - "epoch": 5.032907471931862, - "grad_norm": 27.542322158813477, - "learning_rate": 4.087118644067796e-05, - "loss": 0.3158, - "step": 26000 - }, - { - "epoch": 5.032907471931862, - "eval_loss": 0.485315203666687, - "eval_runtime": 165.2844, - "eval_samples_per_second": 34.22, - "eval_steps_per_second": 4.277, - "eval_wer": 0.34703342908956686, - "step": 26000 - }, - { - "epoch": 5.052264808362369, - "eval_loss": 0.5204781293869019, - "eval_runtime": 165.773, - "eval_samples_per_second": 34.119, - "eval_steps_per_second": 4.265, - "eval_wer": 0.36183017444752935, - "step": 26100 - }, - { - "epoch": 5.071622144792877, - "eval_loss": 0.5013459920883179, - "eval_runtime": 165.057, - "eval_samples_per_second": 34.267, - "eval_steps_per_second": 4.283, - "eval_wer": 0.3510455617788191, - "step": 26200 - }, - { - "epoch": 5.090979481223384, - "eval_loss": 0.4863474667072296, - "eval_runtime": 165.6964, - "eval_samples_per_second": 34.135, - "eval_steps_per_second": 4.267, - "eval_wer": 0.3396511049413426, - "step": 26300 - }, - { - "epoch": 5.110336817653891, - "eval_loss": 0.47152572870254517, - "eval_runtime": 166.0563, - "eval_samples_per_second": 34.061, - "eval_steps_per_second": 4.258, - "eval_wer": 0.32851342459597827, - "step": 26400 - }, - { - "epoch": 5.129694154084398, - "grad_norm": 0.8464019894599915, - "learning_rate": 3.5786440677966095e-05, - "loss": 0.2993, - "step": 26500 - }, - { - "epoch": 5.129694154084398, - "eval_loss": 0.4816218912601471, - "eval_runtime": 165.4176, - "eval_samples_per_second": 34.192, - "eval_steps_per_second": 4.274, - "eval_wer": 0.33273418818507167, - "step": 26500 - }, - { - "epoch": 5.149051490514905, - "eval_loss": 0.48058804869651794, - "eval_runtime": 166.4075, - "eval_samples_per_second": 33.989, - "eval_steps_per_second": 4.249, - "eval_wer": 0.33811044598866974, - "step": 26600 - }, - { - "epoch": 5.168408826945412, - "eval_loss": 0.4854019284248352, - "eval_runtime": 165.1934, - "eval_samples_per_second": 34.239, - "eval_steps_per_second": 4.28, - "eval_wer": 0.33416250742244547, - "step": 26700 - }, - { - "epoch": 5.18776616337592, - "eval_loss": 0.49545472860336304, - "eval_runtime": 165.6735, - "eval_samples_per_second": 34.139, - "eval_steps_per_second": 4.267, - "eval_wer": 0.3433422670154547, - "step": 26800 - }, - { - "epoch": 5.207123499806427, - "eval_loss": 0.4862872064113617, - "eval_runtime": 165.5277, - "eval_samples_per_second": 34.17, - "eval_steps_per_second": 4.271, - "eval_wer": 0.34337436407696875, - "step": 26900 - }, - { - "epoch": 5.2264808362369335, - "grad_norm": 10.611580848693848, - "learning_rate": 3.0701694915254236e-05, - "loss": 0.2902, - "step": 27000 - }, - { - "epoch": 5.2264808362369335, - "eval_loss": 0.48670876026153564, - "eval_runtime": 165.4986, - "eval_samples_per_second": 34.176, - "eval_steps_per_second": 4.272, - "eval_wer": 0.3448508289066136, - "step": 27000 - }, - { - "epoch": 5.245838172667441, - "eval_loss": 0.4787338674068451, - "eval_runtime": 165.461, - "eval_samples_per_second": 34.183, - "eval_steps_per_second": 4.273, - "eval_wer": 0.33778947537352955, - "step": 27100 - }, - { - "epoch": 5.265195509097948, - "eval_loss": 0.4861724376678467, - "eval_runtime": 165.3459, - "eval_samples_per_second": 34.207, - "eval_steps_per_second": 4.276, - "eval_wer": 0.33793391215034263, - "step": 27200 - }, - { - "epoch": 5.284552845528455, - "eval_loss": 0.4954308271408081, - "eval_runtime": 165.6637, - "eval_samples_per_second": 34.141, - "eval_steps_per_second": 4.268, - "eval_wer": 0.3467927011282117, - "step": 27300 - }, - { - "epoch": 5.303910181958963, - "eval_loss": 0.572640061378479, - "eval_runtime": 165.6804, - "eval_samples_per_second": 34.138, - "eval_steps_per_second": 4.267, - "eval_wer": 0.41416443324613633, - "step": 27400 - }, - { - "epoch": 5.3232675183894695, - "grad_norm": 1.2211335897445679, - "learning_rate": 2.5627118644067793e-05, - "loss": 0.305, - "step": 27500 - }, - { - "epoch": 5.3232675183894695, - "eval_loss": 0.5179979205131531, - "eval_runtime": 165.6208, - "eval_samples_per_second": 34.15, - "eval_steps_per_second": 4.269, - "eval_wer": 0.35735263436632375, - "step": 27500 - }, - { - "epoch": 5.342624854819976, - "eval_loss": 0.4996646046638489, - "eval_runtime": 164.9613, - "eval_samples_per_second": 34.287, - "eval_steps_per_second": 4.286, - "eval_wer": 0.3452038965832678, - "step": 27600 - }, - { - "epoch": 5.361982191250484, - "eval_loss": 0.4949517846107483, - "eval_runtime": 165.643, - "eval_samples_per_second": 34.146, - "eval_steps_per_second": 4.268, - "eval_wer": 0.34130410360931457, - "step": 27700 - }, - { - "epoch": 5.381339527680991, - "eval_loss": 0.5071349143981934, - "eval_runtime": 166.3001, - "eval_samples_per_second": 34.011, - "eval_steps_per_second": 4.251, - "eval_wer": 0.3491999807417631, - "step": 27800 - }, - { - "epoch": 5.400696864111498, - "eval_loss": 0.5095939040184021, - "eval_runtime": 165.4785, - "eval_samples_per_second": 34.18, - "eval_steps_per_second": 4.272, - "eval_wer": 0.3544799473608191, - "step": 27900 - }, - { - "epoch": 5.420054200542006, - "grad_norm": 15.024033546447754, - "learning_rate": 2.054237288135593e-05, - "loss": 0.3163, - "step": 28000 - }, - { - "epoch": 5.420054200542006, - "eval_loss": 0.5129156112670898, - "eval_runtime": 166.1505, - "eval_samples_per_second": 34.041, - "eval_steps_per_second": 4.255, - "eval_wer": 0.3565983534207443, - "step": 28000 - }, - { - "epoch": 5.4394115369725125, - "eval_loss": 0.5067318677902222, - "eval_runtime": 165.9899, - "eval_samples_per_second": 34.074, - "eval_steps_per_second": 4.259, - "eval_wer": 0.3506122514483799, - "step": 28100 - }, - { - "epoch": 5.45876887340302, - "eval_loss": 0.5053198337554932, - "eval_runtime": 165.3351, - "eval_samples_per_second": 34.209, - "eval_steps_per_second": 4.276, - "eval_wer": 0.35000240727961357, - "step": 28200 - }, - { - "epoch": 5.478126209833527, - "eval_loss": 0.5077947974205017, - "eval_runtime": 165.3012, - "eval_samples_per_second": 34.216, - "eval_steps_per_second": 4.277, - "eval_wer": 0.3518640368474266, - "step": 28300 - }, - { - "epoch": 5.497483546264034, - "eval_loss": 0.48453789949417114, - "eval_runtime": 165.2767, - "eval_samples_per_second": 34.221, - "eval_steps_per_second": 4.278, - "eval_wer": 0.3375166503506604, - "step": 28400 - }, - { - "epoch": 5.516840882694542, - "grad_norm": 0.43120303750038147, - "learning_rate": 1.5457627118644067e-05, - "loss": 0.3136, - "step": 28500 - }, - { - "epoch": 5.516840882694542, - "eval_loss": 0.4930485486984253, - "eval_runtime": 165.9777, - "eval_samples_per_second": 34.077, - "eval_steps_per_second": 4.26, - "eval_wer": 0.3439842082457351, - "step": 28500 - }, - { - "epoch": 5.5361982191250485, - "eval_loss": 0.5025920271873474, - "eval_runtime": 165.742, - "eval_samples_per_second": 34.125, - "eval_steps_per_second": 4.266, - "eval_wer": 0.35122209561714623, - "step": 28600 - }, - { - "epoch": 5.555555555555555, - "eval_loss": 0.5056036710739136, - "eval_runtime": 165.717, - "eval_samples_per_second": 34.13, - "eval_steps_per_second": 4.266, - "eval_wer": 0.3518800853781836, - "step": 28700 - }, - { - "epoch": 5.574912891986063, - "eval_loss": 0.5090658068656921, - "eval_runtime": 165.5604, - "eval_samples_per_second": 34.163, - "eval_steps_per_second": 4.27, - "eval_wer": 0.3546404326683892, - "step": 28800 - }, - { - "epoch": 5.59427022841657, - "eval_loss": 0.5027741193771362, - "eval_runtime": 170.2845, - "eval_samples_per_second": 33.215, - "eval_steps_per_second": 4.152, - "eval_wer": 0.34952095135690325, - "step": 28900 - }, - { - "epoch": 5.613627564847077, - "grad_norm": 1.859834909439087, - "learning_rate": 1.0372881355932203e-05, - "loss": 0.3092, - "step": 29000 - }, - { - "epoch": 5.613627564847077, - "eval_loss": 0.505651593208313, - "eval_runtime": 164.9869, - "eval_samples_per_second": 34.282, - "eval_steps_per_second": 4.285, - "eval_wer": 0.3509974161865481, - "step": 29000 - }, - { - "epoch": 5.6329849012775846, - "eval_loss": 0.5085631608963013, - "eval_runtime": 165.6325, - "eval_samples_per_second": 34.148, - "eval_steps_per_second": 4.268, - "eval_wer": 0.3532923560848004, - "step": 29100 - }, - { - "epoch": 5.652342237708091, - "eval_loss": 0.5055486559867859, - "eval_runtime": 165.6348, - "eval_samples_per_second": 34.147, - "eval_steps_per_second": 4.268, - "eval_wer": 0.35144677504774435, - "step": 29200 - }, - { - "epoch": 5.671699574138598, - "eval_loss": 0.5133376717567444, - "eval_runtime": 166.8503, - "eval_samples_per_second": 33.899, - "eval_steps_per_second": 4.237, - "eval_wer": 0.35765755645070696, - "step": 29300 - }, - { - "epoch": 5.691056910569106, - "eval_loss": 0.5129527449607849, - "eval_runtime": 165.063, - "eval_samples_per_second": 34.266, - "eval_steps_per_second": 4.283, - "eval_wer": 0.35703166375118356, - "step": 29400 - }, - { - "epoch": 5.710414246999613, - "grad_norm": 1.5260862112045288, - "learning_rate": 5.288135593220339e-06, - "loss": 0.3152, - "step": 29500 - }, - { - "epoch": 5.710414246999613, - "eval_loss": 0.5147610902786255, - "eval_runtime": 165.1582, - "eval_samples_per_second": 34.246, - "eval_steps_per_second": 4.281, - "eval_wer": 0.3581390123734172, - "step": 29500 - }, - { - "epoch": 5.72977158343012, - "eval_loss": 0.5114809274673462, - "eval_runtime": 165.7617, - "eval_samples_per_second": 34.121, - "eval_steps_per_second": 4.265, - "eval_wer": 0.3554589077369967, - "step": 29600 - }, - { - "epoch": 5.7491289198606275, - "eval_loss": 0.5053985714912415, - "eval_runtime": 165.7121, - "eval_samples_per_second": 34.131, - "eval_steps_per_second": 4.266, - "eval_wer": 0.35263436632376305, - "step": 29700 - }, - { - "epoch": 5.768486256291134, - "eval_loss": 0.5080947279930115, - "eval_runtime": 165.6502, - "eval_samples_per_second": 34.144, - "eval_steps_per_second": 4.268, - "eval_wer": 0.3535651811076696, - "step": 29800 - }, - { - "epoch": 5.787843592721641, - "eval_loss": 0.5076740384101868, - "eval_runtime": 164.8589, - "eval_samples_per_second": 34.308, - "eval_steps_per_second": 4.289, - "eval_wer": 0.35348493845388457, - "step": 29900 - }, - { - "epoch": 5.807200929152149, - "grad_norm": 24.957311630249023, - "learning_rate": 2.0338983050847458e-07, - "loss": 0.3085, - "step": 30000 - }, - { - "epoch": 5.807200929152149, - "eval_loss": 0.5066753029823303, - "eval_runtime": 165.8811, - "eval_samples_per_second": 34.097, - "eval_steps_per_second": 4.262, - "eval_wer": 0.35224920158559486, - "step": 30000 - }, - { - "epoch": 5.807200929152149, - "step": 30000, - "total_flos": 3.3745707679449666e+19, - "train_loss": 0.49102539647420246, - "train_runtime": 61359.0363, - "train_samples_per_second": 3.911, - "train_steps_per_second": 0.489 + "epoch": 1.9357336430507162, + "step": 10000, + "total_flos": 1.1255918428180738e+19, + "train_loss": 0.7339932418823242, + "train_runtime": 18725.1494, + "train_samples_per_second": 4.272, + "train_steps_per_second": 0.534 } ], "logging_steps": 500, - "max_steps": 30000, + "max_steps": 10000, "num_input_tokens_seen": 0, - "num_train_epochs": 6, + "num_train_epochs": 2, "save_steps": 400, - "total_flos": 3.3745707679449666e+19, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.1255918428180738e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null