wav2vec2-xlsr-53-ft-btb-ccv-cy / trainer_state.json
DewiBrynJones's picture
End of training
62db3c8 verified
raw
history blame
87.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.807200929152149,
"eval_steps": 100,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019357336430507164,
"eval_loss": 3.567659854888916,
"eval_runtime": 160.9588,
"eval_samples_per_second": 35.139,
"eval_steps_per_second": 4.392,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.03871467286101433,
"eval_loss": 3.0471677780151367,
"eval_runtime": 158.7973,
"eval_samples_per_second": 35.618,
"eval_steps_per_second": 4.452,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.05807200929152149,
"eval_loss": 2.9665186405181885,
"eval_runtime": 159.308,
"eval_samples_per_second": 35.504,
"eval_steps_per_second": 4.438,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 0.07742934572202866,
"eval_loss": 2.464332103729248,
"eval_runtime": 159.8297,
"eval_samples_per_second": 35.388,
"eval_steps_per_second": 4.423,
"eval_wer": 0.9813195101988413,
"step": 400
},
{
"epoch": 0.09678668215253582,
"grad_norm": 6.005111217498779,
"learning_rate": 0.00029759999999999997,
"loss": 4.1279,
"step": 500
},
{
"epoch": 0.09678668215253582,
"eval_loss": 1.625333547592163,
"eval_runtime": 160.6655,
"eval_samples_per_second": 35.204,
"eval_steps_per_second": 4.4,
"eval_wer": 0.9345380430421595,
"step": 500
},
{
"epoch": 0.11614401858304298,
"eval_loss": 1.24808931350708,
"eval_runtime": 160.4402,
"eval_samples_per_second": 35.253,
"eval_steps_per_second": 4.407,
"eval_wer": 0.8190528157147213,
"step": 600
},
{
"epoch": 0.13550135501355012,
"eval_loss": 1.0997203588485718,
"eval_runtime": 161.1872,
"eval_samples_per_second": 35.09,
"eval_steps_per_second": 4.386,
"eval_wer": 0.7769735680698432,
"step": 700
},
{
"epoch": 0.1548586914440573,
"eval_loss": 1.0475263595581055,
"eval_runtime": 161.0626,
"eval_samples_per_second": 35.117,
"eval_steps_per_second": 4.39,
"eval_wer": 0.7339795541718156,
"step": 800
},
{
"epoch": 0.17421602787456447,
"eval_loss": 0.9692754149436951,
"eval_runtime": 161.3683,
"eval_samples_per_second": 35.05,
"eval_steps_per_second": 4.381,
"eval_wer": 0.7012565999582738,
"step": 900
},
{
"epoch": 0.19357336430507163,
"grad_norm": 2.414348602294922,
"learning_rate": 0.0002949762711864406,
"loss": 1.0598,
"step": 1000
},
{
"epoch": 0.19357336430507163,
"eval_loss": 0.911480724811554,
"eval_runtime": 162.0835,
"eval_samples_per_second": 34.896,
"eval_steps_per_second": 4.362,
"eval_wer": 0.6749049124552647,
"step": 1000
},
{
"epoch": 0.2129307007355788,
"eval_loss": 0.8823792338371277,
"eval_runtime": 161.5426,
"eval_samples_per_second": 35.012,
"eval_steps_per_second": 4.377,
"eval_wer": 0.65625651971562,
"step": 1100
},
{
"epoch": 0.23228803716608595,
"eval_loss": 0.8609552383422852,
"eval_runtime": 161.6778,
"eval_samples_per_second": 34.983,
"eval_steps_per_second": 4.373,
"eval_wer": 0.6431288215563865,
"step": 1200
},
{
"epoch": 0.2516453735965931,
"eval_loss": 0.8330459594726562,
"eval_runtime": 161.7974,
"eval_samples_per_second": 34.957,
"eval_steps_per_second": 4.37,
"eval_wer": 0.6114169247805363,
"step": 1300
},
{
"epoch": 0.27100271002710025,
"eval_loss": 0.8172780871391296,
"eval_runtime": 161.4977,
"eval_samples_per_second": 35.022,
"eval_steps_per_second": 4.378,
"eval_wer": 0.6017396607340598,
"step": 1400
},
{
"epoch": 0.29036004645760743,
"grad_norm": 4.460846424102783,
"learning_rate": 0.0002898915254237288,
"loss": 0.8546,
"step": 1500
},
{
"epoch": 0.29036004645760743,
"eval_loss": 0.8102588653564453,
"eval_runtime": 161.397,
"eval_samples_per_second": 35.044,
"eval_steps_per_second": 4.381,
"eval_wer": 0.6139204955786298,
"step": 1500
},
{
"epoch": 0.3097173828881146,
"eval_loss": 0.7860382795333862,
"eval_runtime": 162.0586,
"eval_samples_per_second": 34.901,
"eval_steps_per_second": 4.363,
"eval_wer": 0.6077739082986953,
"step": 1600
},
{
"epoch": 0.32907471931862176,
"eval_loss": 0.857550323009491,
"eval_runtime": 161.2906,
"eval_samples_per_second": 35.067,
"eval_steps_per_second": 4.383,
"eval_wer": 0.5990114105053682,
"step": 1700
},
{
"epoch": 0.34843205574912894,
"eval_loss": 0.7555657029151917,
"eval_runtime": 161.7718,
"eval_samples_per_second": 34.963,
"eval_steps_per_second": 4.37,
"eval_wer": 0.5773298454526488,
"step": 1800
},
{
"epoch": 0.3677893921796361,
"eval_loss": 0.7365372180938721,
"eval_runtime": 162.1133,
"eval_samples_per_second": 34.889,
"eval_steps_per_second": 4.361,
"eval_wer": 0.5825777150101908,
"step": 1900
},
{
"epoch": 0.38714672861014326,
"grad_norm": 3.4646999835968018,
"learning_rate": 0.0002848067796610169,
"loss": 0.7776,
"step": 2000
},
{
"epoch": 0.38714672861014326,
"eval_loss": 0.7291606068611145,
"eval_runtime": 162.354,
"eval_samples_per_second": 34.837,
"eval_steps_per_second": 4.355,
"eval_wer": 0.5551989215387332,
"step": 2000
},
{
"epoch": 0.4065040650406504,
"eval_loss": 0.716595470905304,
"eval_runtime": 162.461,
"eval_samples_per_second": 34.815,
"eval_steps_per_second": 4.352,
"eval_wer": 0.5385726436744716,
"step": 2100
},
{
"epoch": 0.4258614014711576,
"eval_loss": 0.7117305397987366,
"eval_runtime": 161.9033,
"eval_samples_per_second": 34.934,
"eval_steps_per_second": 4.367,
"eval_wer": 0.5401774967501726,
"step": 2200
},
{
"epoch": 0.4452187379016647,
"eval_loss": 0.7060667872428894,
"eval_runtime": 162.0901,
"eval_samples_per_second": 34.894,
"eval_steps_per_second": 4.362,
"eval_wer": 0.5388294201665838,
"step": 2300
},
{
"epoch": 0.4645760743321719,
"eval_loss": 0.7044907212257385,
"eval_runtime": 162.5576,
"eval_samples_per_second": 34.794,
"eval_steps_per_second": 4.349,
"eval_wer": 0.5364060920222754,
"step": 2400
},
{
"epoch": 0.48393341076267904,
"grad_norm": 2.70296573638916,
"learning_rate": 0.00027972203389830505,
"loss": 0.706,
"step": 2500
},
{
"epoch": 0.48393341076267904,
"eval_loss": 0.7062936425209045,
"eval_runtime": 162.4753,
"eval_samples_per_second": 34.811,
"eval_steps_per_second": 4.351,
"eval_wer": 0.5428736499173501,
"step": 2500
},
{
"epoch": 0.5032907471931862,
"eval_loss": 0.6941363215446472,
"eval_runtime": 162.6699,
"eval_samples_per_second": 34.77,
"eval_steps_per_second": 4.346,
"eval_wer": 0.5433872029015744,
"step": 2600
},
{
"epoch": 0.5226480836236934,
"eval_loss": 0.6840428113937378,
"eval_runtime": 162.5617,
"eval_samples_per_second": 34.793,
"eval_steps_per_second": 4.349,
"eval_wer": 0.5203094156729952,
"step": 2700
},
{
"epoch": 0.5420054200542005,
"eval_loss": 0.6902298331260681,
"eval_runtime": 162.7532,
"eval_samples_per_second": 34.752,
"eval_steps_per_second": 4.344,
"eval_wer": 0.5593715395355555,
"step": 2800
},
{
"epoch": 0.5613627564847077,
"eval_loss": 0.6594961881637573,
"eval_runtime": 163.1259,
"eval_samples_per_second": 34.673,
"eval_steps_per_second": 4.334,
"eval_wer": 0.5149171093386401,
"step": 2900
},
{
"epoch": 0.5807200929152149,
"grad_norm": 4.962900161743164,
"learning_rate": 0.0002746372881355932,
"loss": 0.7002,
"step": 3000
},
{
"epoch": 0.5807200929152149,
"eval_loss": 0.6767885088920593,
"eval_runtime": 162.9945,
"eval_samples_per_second": 34.701,
"eval_steps_per_second": 4.338,
"eval_wer": 0.525284460207668,
"step": 3000
},
{
"epoch": 0.6000774293457221,
"eval_loss": 0.6656874418258667,
"eval_runtime": 163.0572,
"eval_samples_per_second": 34.687,
"eval_steps_per_second": 4.336,
"eval_wer": 0.5063953395066682,
"step": 3100
},
{
"epoch": 0.6194347657762292,
"eval_loss": 0.6758668422698975,
"eval_runtime": 163.796,
"eval_samples_per_second": 34.531,
"eval_steps_per_second": 4.316,
"eval_wer": 0.5409478262265089,
"step": 3200
},
{
"epoch": 0.6387921022067363,
"eval_loss": 0.6709346175193787,
"eval_runtime": 162.7448,
"eval_samples_per_second": 34.754,
"eval_steps_per_second": 4.344,
"eval_wer": 0.5090914926738457,
"step": 3300
},
{
"epoch": 0.6581494386372435,
"eval_loss": 0.6478992104530334,
"eval_runtime": 163.2374,
"eval_samples_per_second": 34.649,
"eval_steps_per_second": 4.331,
"eval_wer": 0.5037473319317617,
"step": 3400
},
{
"epoch": 0.6775067750677507,
"grad_norm": 3.27418851852417,
"learning_rate": 0.0002695525423728813,
"loss": 0.685,
"step": 3500
},
{
"epoch": 0.6775067750677507,
"eval_loss": 0.6378278136253357,
"eval_runtime": 162.9066,
"eval_samples_per_second": 34.719,
"eval_steps_per_second": 4.34,
"eval_wer": 0.5033782157243505,
"step": 3500
},
{
"epoch": 0.6968641114982579,
"eval_loss": 0.6492822170257568,
"eval_runtime": 162.8688,
"eval_samples_per_second": 34.727,
"eval_steps_per_second": 4.341,
"eval_wer": 0.49883648152011684,
"step": 3600
},
{
"epoch": 0.716221447928765,
"eval_loss": 0.6340391635894775,
"eval_runtime": 163.0198,
"eval_samples_per_second": 34.695,
"eval_steps_per_second": 4.337,
"eval_wer": 0.4832694066858179,
"step": 3700
},
{
"epoch": 0.7355787843592722,
"eval_loss": 0.6226627826690674,
"eval_runtime": 164.0506,
"eval_samples_per_second": 34.477,
"eval_steps_per_second": 4.31,
"eval_wer": 0.47354399704707034,
"step": 3800
},
{
"epoch": 0.7549361207897793,
"eval_loss": 0.6257476210594177,
"eval_runtime": 167.0907,
"eval_samples_per_second": 33.85,
"eval_steps_per_second": 4.231,
"eval_wer": 0.49068382789555615,
"step": 3900
},
{
"epoch": 0.7742934572202865,
"grad_norm": 5.494376182556152,
"learning_rate": 0.0002644677966101695,
"loss": 0.6655,
"step": 4000
},
{
"epoch": 0.7742934572202865,
"eval_loss": 0.6420141458511353,
"eval_runtime": 163.4141,
"eval_samples_per_second": 34.611,
"eval_steps_per_second": 4.326,
"eval_wer": 0.49987963601932245,
"step": 4000
},
{
"epoch": 0.7936507936507936,
"eval_loss": 0.6111469268798828,
"eval_runtime": 163.5321,
"eval_samples_per_second": 34.586,
"eval_steps_per_second": 4.323,
"eval_wer": 0.4790646916274815,
"step": 4100
},
{
"epoch": 0.8130081300813008,
"eval_loss": 0.6136205196380615,
"eval_runtime": 163.9442,
"eval_samples_per_second": 34.5,
"eval_steps_per_second": 4.312,
"eval_wer": 0.48073373882621045,
"step": 4200
},
{
"epoch": 0.832365466511808,
"eval_loss": 0.6218396425247192,
"eval_runtime": 163.3391,
"eval_samples_per_second": 34.627,
"eval_steps_per_second": 4.328,
"eval_wer": 0.48596555985299544,
"step": 4300
},
{
"epoch": 0.8517228029423152,
"eval_loss": 0.6084252595901489,
"eval_runtime": 162.8945,
"eval_samples_per_second": 34.722,
"eval_steps_per_second": 4.34,
"eval_wer": 0.4585386207892667,
"step": 4400
},
{
"epoch": 0.8710801393728222,
"grad_norm": 3.0379676818847656,
"learning_rate": 0.0002593830508474576,
"loss": 0.6431,
"step": 4500
},
{
"epoch": 0.8710801393728222,
"eval_loss": 0.6008957624435425,
"eval_runtime": 163.8125,
"eval_samples_per_second": 34.527,
"eval_steps_per_second": 4.316,
"eval_wer": 0.4627593843783602,
"step": 4500
},
{
"epoch": 0.8904374758033294,
"eval_loss": 0.6009930968284607,
"eval_runtime": 163.1039,
"eval_samples_per_second": 34.677,
"eval_steps_per_second": 4.335,
"eval_wer": 0.46295196674744427,
"step": 4600
},
{
"epoch": 0.9097948122338366,
"eval_loss": 0.5823432207107544,
"eval_runtime": 163.3804,
"eval_samples_per_second": 34.619,
"eval_steps_per_second": 4.327,
"eval_wer": 0.45035387010319206,
"step": 4700
},
{
"epoch": 0.9291521486643438,
"eval_loss": 0.6118789315223694,
"eval_runtime": 163.7255,
"eval_samples_per_second": 34.546,
"eval_steps_per_second": 4.318,
"eval_wer": 0.4630001123397153,
"step": 4800
},
{
"epoch": 0.948509485094851,
"eval_loss": 0.6001989245414734,
"eval_runtime": 163.3492,
"eval_samples_per_second": 34.625,
"eval_steps_per_second": 4.328,
"eval_wer": 0.4600150856189116,
"step": 4900
},
{
"epoch": 0.9678668215253581,
"grad_norm": 3.1605985164642334,
"learning_rate": 0.00025430847457627115,
"loss": 0.6235,
"step": 5000
},
{
"epoch": 0.9678668215253581,
"eval_loss": 0.5892329216003418,
"eval_runtime": 163.5255,
"eval_samples_per_second": 34.588,
"eval_steps_per_second": 4.323,
"eval_wer": 0.4551844778610518,
"step": 5000
},
{
"epoch": 0.9872241579558653,
"eval_loss": 0.5673592686653137,
"eval_runtime": 163.343,
"eval_samples_per_second": 34.627,
"eval_steps_per_second": 4.328,
"eval_wer": 0.44889345380430423,
"step": 5100
},
{
"epoch": 1.0065814943863725,
"eval_loss": 0.5792257785797119,
"eval_runtime": 162.8592,
"eval_samples_per_second": 34.729,
"eval_steps_per_second": 4.341,
"eval_wer": 0.43167338030203334,
"step": 5200
},
{
"epoch": 1.0259388308168795,
"eval_loss": 0.5752869844436646,
"eval_runtime": 162.7856,
"eval_samples_per_second": 34.745,
"eval_steps_per_second": 4.343,
"eval_wer": 0.43331033043924827,
"step": 5300
},
{
"epoch": 1.0452961672473868,
"eval_loss": 0.5698733925819397,
"eval_runtime": 161.9949,
"eval_samples_per_second": 34.915,
"eval_steps_per_second": 4.364,
"eval_wer": 0.44619730063712665,
"step": 5400
},
{
"epoch": 1.064653503677894,
"grad_norm": 0.8791279792785645,
"learning_rate": 0.0002492338983050847,
"loss": 0.5527,
"step": 5500
},
{
"epoch": 1.064653503677894,
"eval_loss": 0.5666691660881042,
"eval_runtime": 162.0517,
"eval_samples_per_second": 34.902,
"eval_steps_per_second": 4.363,
"eval_wer": 0.43639164834459404,
"step": 5500
},
{
"epoch": 1.084010840108401,
"eval_loss": 0.5558171272277832,
"eval_runtime": 161.9847,
"eval_samples_per_second": 34.917,
"eval_steps_per_second": 4.365,
"eval_wer": 0.42945868305756607,
"step": 5600
},
{
"epoch": 1.1033681765389083,
"eval_loss": 0.5602455139160156,
"eval_runtime": 162.701,
"eval_samples_per_second": 34.763,
"eval_steps_per_second": 4.345,
"eval_wer": 0.422349183932211,
"step": 5700
},
{
"epoch": 1.1227255129694154,
"eval_loss": 0.559140145778656,
"eval_runtime": 162.4402,
"eval_samples_per_second": 34.819,
"eval_steps_per_second": 4.352,
"eval_wer": 0.41942835133443535,
"step": 5800
},
{
"epoch": 1.1420828493999227,
"eval_loss": 0.5399234890937805,
"eval_runtime": 162.3316,
"eval_samples_per_second": 34.842,
"eval_steps_per_second": 4.355,
"eval_wer": 0.418818507165669,
"step": 5900
},
{
"epoch": 1.1614401858304297,
"grad_norm": 0.9803772568702698,
"learning_rate": 0.00024414915254237287,
"loss": 0.533,
"step": 6000
},
{
"epoch": 1.1614401858304297,
"eval_loss": 0.545900821685791,
"eval_runtime": 161.6822,
"eval_samples_per_second": 34.982,
"eval_steps_per_second": 4.373,
"eval_wer": 0.431063536133267,
"step": 6000
},
{
"epoch": 1.1807975222609368,
"eval_loss": 0.5347985625267029,
"eval_runtime": 161.8121,
"eval_samples_per_second": 34.954,
"eval_steps_per_second": 4.369,
"eval_wer": 0.41183739628637,
"step": 6100
},
{
"epoch": 1.2001548586914441,
"eval_loss": 0.5453631281852722,
"eval_runtime": 161.8802,
"eval_samples_per_second": 34.939,
"eval_steps_per_second": 4.367,
"eval_wer": 0.4176309158896503,
"step": 6200
},
{
"epoch": 1.2195121951219512,
"eval_loss": 0.5442932844161987,
"eval_runtime": 162.1767,
"eval_samples_per_second": 34.876,
"eval_steps_per_second": 4.359,
"eval_wer": 0.42157885445587456,
"step": 6300
},
{
"epoch": 1.2388695315524583,
"eval_loss": 0.5382806658744812,
"eval_runtime": 161.5364,
"eval_samples_per_second": 35.014,
"eval_steps_per_second": 4.377,
"eval_wer": 0.40962269904190274,
"step": 6400
},
{
"epoch": 1.2582268679829656,
"grad_norm": 2.7026009559631348,
"learning_rate": 0.00023906440677966102,
"loss": 0.5228,
"step": 6500
},
{
"epoch": 1.2582268679829656,
"eval_loss": 0.540704071521759,
"eval_runtime": 161.6157,
"eval_samples_per_second": 34.997,
"eval_steps_per_second": 4.375,
"eval_wer": 0.41260772576270643,
"step": 6500
},
{
"epoch": 1.2775842044134726,
"eval_loss": 0.5527251362800598,
"eval_runtime": 161.6568,
"eval_samples_per_second": 34.988,
"eval_steps_per_second": 4.373,
"eval_wer": 0.41426072443067835,
"step": 6600
},
{
"epoch": 1.29694154084398,
"eval_loss": 0.5312824845314026,
"eval_runtime": 161.7821,
"eval_samples_per_second": 34.961,
"eval_steps_per_second": 4.37,
"eval_wer": 0.40811413715074385,
"step": 6700
},
{
"epoch": 1.316298877274487,
"eval_loss": 0.533909797668457,
"eval_runtime": 161.8361,
"eval_samples_per_second": 34.949,
"eval_steps_per_second": 4.369,
"eval_wer": 0.4150471024377718,
"step": 6800
},
{
"epoch": 1.3356562137049943,
"eval_loss": 0.523649275302887,
"eval_runtime": 161.5476,
"eval_samples_per_second": 35.011,
"eval_steps_per_second": 4.376,
"eval_wer": 0.4120781242477251,
"step": 6900
},
{
"epoch": 1.3550135501355014,
"grad_norm": 0.709751546382904,
"learning_rate": 0.00023397966101694912,
"loss": 0.5204,
"step": 7000
},
{
"epoch": 1.3550135501355014,
"eval_loss": 0.5527586340904236,
"eval_runtime": 162.1137,
"eval_samples_per_second": 34.889,
"eval_steps_per_second": 4.361,
"eval_wer": 0.4165877613904447,
"step": 7000
},
{
"epoch": 1.3743708865660085,
"eval_loss": 0.5330629944801331,
"eval_runtime": 161.681,
"eval_samples_per_second": 34.982,
"eval_steps_per_second": 4.373,
"eval_wer": 0.40559451782189343,
"step": 7100
},
{
"epoch": 1.3937282229965158,
"eval_loss": 0.5242415070533752,
"eval_runtime": 162.2742,
"eval_samples_per_second": 34.855,
"eval_steps_per_second": 4.357,
"eval_wer": 0.4058833913755196,
"step": 7200
},
{
"epoch": 1.4130855594270229,
"eval_loss": 0.5309507250785828,
"eval_runtime": 163.2224,
"eval_samples_per_second": 34.652,
"eval_steps_per_second": 4.332,
"eval_wer": 0.4092856798960055,
"step": 7300
},
{
"epoch": 1.43244289585753,
"eval_loss": 0.5278186798095703,
"eval_runtime": 162.9755,
"eval_samples_per_second": 34.705,
"eval_steps_per_second": 4.338,
"eval_wer": 0.4063006531752018,
"step": 7400
},
{
"epoch": 1.4518002322880372,
"grad_norm": 0.9905166029930115,
"learning_rate": 0.00022889491525423728,
"loss": 0.5199,
"step": 7500
},
{
"epoch": 1.4518002322880372,
"eval_loss": 0.5168124437332153,
"eval_runtime": 162.1336,
"eval_samples_per_second": 34.885,
"eval_steps_per_second": 4.361,
"eval_wer": 0.3955802346295197,
"step": 7500
},
{
"epoch": 1.4711575687185443,
"eval_loss": 0.5236623287200928,
"eval_runtime": 161.1426,
"eval_samples_per_second": 35.099,
"eval_steps_per_second": 4.387,
"eval_wer": 0.40235271460897754,
"step": 7600
},
{
"epoch": 1.4905149051490514,
"eval_loss": 0.5316073894500732,
"eval_runtime": 162.1382,
"eval_samples_per_second": 34.884,
"eval_steps_per_second": 4.36,
"eval_wer": 0.4179358379740335,
"step": 7700
},
{
"epoch": 1.5098722415795587,
"eval_loss": 0.5182381868362427,
"eval_runtime": 161.7911,
"eval_samples_per_second": 34.959,
"eval_steps_per_second": 4.37,
"eval_wer": 0.40326748086212705,
"step": 7800
},
{
"epoch": 1.5292295780100658,
"eval_loss": 0.5175392627716064,
"eval_runtime": 161.5725,
"eval_samples_per_second": 35.006,
"eval_steps_per_second": 4.376,
"eval_wer": 0.3983887275119963,
"step": 7900
},
{
"epoch": 1.5485869144405728,
"grad_norm": 0.8261615037918091,
"learning_rate": 0.00022382033898305084,
"loss": 0.5066,
"step": 8000
},
{
"epoch": 1.5485869144405728,
"eval_loss": 0.5138476490974426,
"eval_runtime": 162.4044,
"eval_samples_per_second": 34.827,
"eval_steps_per_second": 4.353,
"eval_wer": 0.39492224486848226,
"step": 8000
},
{
"epoch": 1.5679442508710801,
"eval_loss": 0.515566885471344,
"eval_runtime": 162.5299,
"eval_samples_per_second": 34.8,
"eval_steps_per_second": 4.35,
"eval_wer": 0.4016305307249121,
"step": 8100
},
{
"epoch": 1.5873015873015874,
"eval_loss": 0.5131089091300964,
"eval_runtime": 162.835,
"eval_samples_per_second": 34.735,
"eval_steps_per_second": 4.342,
"eval_wer": 0.39793936865080004,
"step": 8200
},
{
"epoch": 1.6066589237320945,
"eval_loss": 0.5139849185943604,
"eval_runtime": 162.5015,
"eval_samples_per_second": 34.806,
"eval_steps_per_second": 4.351,
"eval_wer": 0.39413586686138885,
"step": 8300
},
{
"epoch": 1.6260162601626016,
"eval_loss": 0.5224258303642273,
"eval_runtime": 162.1349,
"eval_samples_per_second": 34.885,
"eval_steps_per_second": 4.361,
"eval_wer": 0.39853316428880936,
"step": 8400
},
{
"epoch": 1.645373596593109,
"grad_norm": 1.0760446786880493,
"learning_rate": 0.00021873559322033897,
"loss": 0.502,
"step": 8500
},
{
"epoch": 1.645373596593109,
"eval_loss": 0.5274536609649658,
"eval_runtime": 162.3162,
"eval_samples_per_second": 34.846,
"eval_steps_per_second": 4.356,
"eval_wer": 0.40023430854905234,
"step": 8500
},
{
"epoch": 1.664730933023616,
"eval_loss": 0.5054244995117188,
"eval_runtime": 162.7623,
"eval_samples_per_second": 34.75,
"eval_steps_per_second": 4.344,
"eval_wer": 0.3860955529521272,
"step": 8600
},
{
"epoch": 1.684088269454123,
"eval_loss": 0.5144466161727905,
"eval_runtime": 162.1114,
"eval_samples_per_second": 34.89,
"eval_steps_per_second": 4.361,
"eval_wer": 0.3912631798558842,
"step": 8700
},
{
"epoch": 1.7034456058846303,
"eval_loss": 0.5017980933189392,
"eval_runtime": 162.1144,
"eval_samples_per_second": 34.889,
"eval_steps_per_second": 4.361,
"eval_wer": 0.3860955529521272,
"step": 8800
},
{
"epoch": 1.7228029423151374,
"eval_loss": 0.5001707673072815,
"eval_runtime": 162.7958,
"eval_samples_per_second": 34.743,
"eval_steps_per_second": 4.343,
"eval_wer": 0.39978494968785605,
"step": 8900
},
{
"epoch": 1.7421602787456445,
"grad_norm": 2.3791110515594482,
"learning_rate": 0.00021366101694915253,
"loss": 0.4965,
"step": 9000
},
{
"epoch": 1.7421602787456445,
"eval_loss": 0.5074877142906189,
"eval_runtime": 162.5487,
"eval_samples_per_second": 34.796,
"eval_steps_per_second": 4.349,
"eval_wer": 0.38896823995763186,
"step": 9000
},
{
"epoch": 1.7615176151761518,
"eval_loss": 0.4928957521915436,
"eval_runtime": 162.2035,
"eval_samples_per_second": 34.87,
"eval_steps_per_second": 4.359,
"eval_wer": 0.3865449118133235,
"step": 9100
},
{
"epoch": 1.7808749516066589,
"eval_loss": 0.49622705578804016,
"eval_runtime": 162.8808,
"eval_samples_per_second": 34.725,
"eval_steps_per_second": 4.341,
"eval_wer": 0.38559804849866,
"step": 9200
},
{
"epoch": 1.800232288037166,
"eval_loss": 0.49036508798599243,
"eval_runtime": 162.3886,
"eval_samples_per_second": 34.83,
"eval_steps_per_second": 4.354,
"eval_wer": 0.3759689300444544,
"step": 9300
},
{
"epoch": 1.8195896244676733,
"eval_loss": 0.49964088201522827,
"eval_runtime": 162.526,
"eval_samples_per_second": 34.801,
"eval_steps_per_second": 4.35,
"eval_wer": 0.3901237341721365,
"step": 9400
},
{
"epoch": 1.8389469608981805,
"grad_norm": 1.2548748254776,
"learning_rate": 0.00020857627118644066,
"loss": 0.4776,
"step": 9500
},
{
"epoch": 1.8389469608981805,
"eval_loss": 0.4899130165576935,
"eval_runtime": 162.5723,
"eval_samples_per_second": 34.791,
"eval_steps_per_second": 4.349,
"eval_wer": 0.37616151241353857,
"step": 9500
},
{
"epoch": 1.8583042973286876,
"eval_loss": 0.4918155074119568,
"eval_runtime": 162.1914,
"eval_samples_per_second": 34.872,
"eval_steps_per_second": 4.359,
"eval_wer": 0.37948355828023944,
"step": 9600
},
{
"epoch": 1.8776616337591947,
"eval_loss": 0.49148374795913696,
"eval_runtime": 162.2813,
"eval_samples_per_second": 34.853,
"eval_steps_per_second": 4.357,
"eval_wer": 0.37980452889537963,
"step": 9700
},
{
"epoch": 1.897018970189702,
"eval_loss": 0.4841060936450958,
"eval_runtime": 162.2556,
"eval_samples_per_second": 34.859,
"eval_steps_per_second": 4.357,
"eval_wer": 0.37060872077161333,
"step": 9800
},
{
"epoch": 1.916376306620209,
"eval_loss": 0.4834117293357849,
"eval_runtime": 163.5813,
"eval_samples_per_second": 34.576,
"eval_steps_per_second": 4.322,
"eval_wer": 0.37728490956652916,
"step": 9900
},
{
"epoch": 1.9357336430507162,
"grad_norm": 1.011767029762268,
"learning_rate": 0.00020349152542372878,
"loss": 0.4752,
"step": 10000
},
{
"epoch": 1.9357336430507162,
"eval_loss": 0.4831894338130951,
"eval_runtime": 162.4305,
"eval_samples_per_second": 34.821,
"eval_steps_per_second": 4.353,
"eval_wer": 0.3711704193481087,
"step": 10000
},
{
"epoch": 1.9550909794812235,
"eval_loss": 0.4890592396259308,
"eval_runtime": 162.3568,
"eval_samples_per_second": 34.837,
"eval_steps_per_second": 4.355,
"eval_wer": 0.37829596700422075,
"step": 10100
},
{
"epoch": 1.9744483159117305,
"eval_loss": 0.4786697328090668,
"eval_runtime": 163.1452,
"eval_samples_per_second": 34.669,
"eval_steps_per_second": 4.334,
"eval_wer": 0.3783441125964918,
"step": 10200
},
{
"epoch": 1.9938056523422376,
"eval_loss": 0.4726457893848419,
"eval_runtime": 162.2371,
"eval_samples_per_second": 34.863,
"eval_steps_per_second": 4.358,
"eval_wer": 0.37141114730946384,
"step": 10300
},
{
"epoch": 2.013162988772745,
"eval_loss": 0.49166908860206604,
"eval_runtime": 162.2356,
"eval_samples_per_second": 34.863,
"eval_steps_per_second": 4.358,
"eval_wer": 0.37320858275424884,
"step": 10400
},
{
"epoch": 2.032520325203252,
"grad_norm": 0.9389815926551819,
"learning_rate": 0.00019840677966101694,
"loss": 0.4587,
"step": 10500
},
{
"epoch": 2.032520325203252,
"eval_loss": 0.48015162348747253,
"eval_runtime": 162.7857,
"eval_samples_per_second": 34.745,
"eval_steps_per_second": 4.343,
"eval_wer": 0.37264688417775355,
"step": 10500
},
{
"epoch": 2.051877661633759,
"eval_loss": 0.4883776903152466,
"eval_runtime": 162.7407,
"eval_samples_per_second": 34.755,
"eval_steps_per_second": 4.344,
"eval_wer": 0.3825327791240712,
"step": 10600
},
{
"epoch": 2.0712349980642664,
"eval_loss": 0.4841337502002716,
"eval_runtime": 162.7772,
"eval_samples_per_second": 34.747,
"eval_steps_per_second": 4.343,
"eval_wer": 0.37845645231179087,
"step": 10700
},
{
"epoch": 2.0905923344947737,
"eval_loss": 0.4809282422065735,
"eval_runtime": 162.8688,
"eval_samples_per_second": 34.727,
"eval_steps_per_second": 4.341,
"eval_wer": 0.3738184269230152,
"step": 10800
},
{
"epoch": 2.1099496709252805,
"eval_loss": 0.47966596484184265,
"eval_runtime": 163.5921,
"eval_samples_per_second": 34.574,
"eval_steps_per_second": 4.322,
"eval_wer": 0.3713469531864358,
"step": 10900
},
{
"epoch": 2.129307007355788,
"grad_norm": 0.6634272933006287,
"learning_rate": 0.0001933220338983051,
"loss": 0.3967,
"step": 11000
},
{
"epoch": 2.129307007355788,
"eval_loss": 0.4866289794445038,
"eval_runtime": 162.8573,
"eval_samples_per_second": 34.73,
"eval_steps_per_second": 4.341,
"eval_wer": 0.37497392113751987,
"step": 11000
},
{
"epoch": 2.148664343786295,
"eval_loss": 0.4938376843929291,
"eval_runtime": 163.5145,
"eval_samples_per_second": 34.59,
"eval_steps_per_second": 4.324,
"eval_wer": 0.3749097270144918,
"step": 11100
},
{
"epoch": 2.168021680216802,
"eval_loss": 0.48603999614715576,
"eval_runtime": 162.8433,
"eval_samples_per_second": 34.733,
"eval_steps_per_second": 4.342,
"eval_wer": 0.36796071319670687,
"step": 11200
},
{
"epoch": 2.1873790166473093,
"eval_loss": 0.4849016070365906,
"eval_runtime": 162.5249,
"eval_samples_per_second": 34.801,
"eval_steps_per_second": 4.35,
"eval_wer": 0.369966779541333,
"step": 11300
},
{
"epoch": 2.2067363530778166,
"eval_loss": 0.49077799916267395,
"eval_runtime": 162.3783,
"eval_samples_per_second": 34.832,
"eval_steps_per_second": 4.354,
"eval_wer": 0.36377204666912744,
"step": 11400
},
{
"epoch": 2.226093689508324,
"grad_norm": 0.9132543206214905,
"learning_rate": 0.0001882372881355932,
"loss": 0.406,
"step": 11500
},
{
"epoch": 2.226093689508324,
"eval_loss": 0.4797042906284332,
"eval_runtime": 162.1749,
"eval_samples_per_second": 34.876,
"eval_steps_per_second": 4.359,
"eval_wer": 0.3678644220121648,
"step": 11500
},
{
"epoch": 2.2454510259388307,
"eval_loss": 0.48121991753578186,
"eval_runtime": 161.7765,
"eval_samples_per_second": 34.962,
"eval_steps_per_second": 4.37,
"eval_wer": 0.37585659032915536,
"step": 11600
},
{
"epoch": 2.264808362369338,
"eval_loss": 0.47043049335479736,
"eval_runtime": 163.1227,
"eval_samples_per_second": 34.673,
"eval_steps_per_second": 4.334,
"eval_wer": 0.361268475871034,
"step": 11700
},
{
"epoch": 2.2841656987998453,
"eval_loss": 0.4715932607650757,
"eval_runtime": 163.2336,
"eval_samples_per_second": 34.65,
"eval_steps_per_second": 4.331,
"eval_wer": 0.36345107605398724,
"step": 11800
},
{
"epoch": 2.303523035230352,
"eval_loss": 0.4676753580570221,
"eval_runtime": 163.378,
"eval_samples_per_second": 34.619,
"eval_steps_per_second": 4.327,
"eval_wer": 0.3635473672385293,
"step": 11900
},
{
"epoch": 2.3228803716608595,
"grad_norm": 0.6134137511253357,
"learning_rate": 0.00018315254237288135,
"loss": 0.4088,
"step": 12000
},
{
"epoch": 2.3228803716608595,
"eval_loss": 0.47054949402809143,
"eval_runtime": 162.7912,
"eval_samples_per_second": 34.744,
"eval_steps_per_second": 4.343,
"eval_wer": 0.36399672609972555,
"step": 12000
},
{
"epoch": 2.3422377080913668,
"eval_loss": 0.4782082140445709,
"eval_runtime": 162.8776,
"eval_samples_per_second": 34.725,
"eval_steps_per_second": 4.341,
"eval_wer": 0.35905377862656673,
"step": 12100
},
{
"epoch": 2.3615950445218736,
"eval_loss": 0.4795554578304291,
"eval_runtime": 163.357,
"eval_samples_per_second": 34.624,
"eval_steps_per_second": 4.328,
"eval_wer": 0.36128452440179104,
"step": 12200
},
{
"epoch": 2.380952380952381,
"eval_loss": 0.47130346298217773,
"eval_runtime": 163.1765,
"eval_samples_per_second": 34.662,
"eval_steps_per_second": 4.333,
"eval_wer": 0.3558119754136509,
"step": 12300
},
{
"epoch": 2.4003097173828882,
"eval_loss": 0.47632816433906555,
"eval_runtime": 163.2851,
"eval_samples_per_second": 34.639,
"eval_steps_per_second": 4.33,
"eval_wer": 0.3588772447882396,
"step": 12400
},
{
"epoch": 2.419667053813395,
"grad_norm": 0.6301820874214172,
"learning_rate": 0.0001780779661016949,
"loss": 0.407,
"step": 12500
},
{
"epoch": 2.419667053813395,
"eval_loss": 0.46899136900901794,
"eval_runtime": 163.5219,
"eval_samples_per_second": 34.589,
"eval_steps_per_second": 4.324,
"eval_wer": 0.3565181107669593,
"step": 12500
},
{
"epoch": 2.4390243902439024,
"eval_loss": 0.4686334431171417,
"eval_runtime": 163.9109,
"eval_samples_per_second": 34.507,
"eval_steps_per_second": 4.313,
"eval_wer": 0.35767360498146394,
"step": 12600
},
{
"epoch": 2.4583817266744097,
"eval_loss": 0.467680424451828,
"eval_runtime": 163.7544,
"eval_samples_per_second": 34.54,
"eval_steps_per_second": 4.317,
"eval_wer": 0.3584278859270434,
"step": 12700
},
{
"epoch": 2.4777390631049165,
"eval_loss": 0.46144554018974304,
"eval_runtime": 163.8198,
"eval_samples_per_second": 34.526,
"eval_steps_per_second": 4.316,
"eval_wer": 0.35765755645070696,
"step": 12800
},
{
"epoch": 2.497096399535424,
"eval_loss": 0.455834299325943,
"eval_runtime": 163.2394,
"eval_samples_per_second": 34.649,
"eval_steps_per_second": 4.331,
"eval_wer": 0.35992039928744524,
"step": 12900
},
{
"epoch": 2.516453735965931,
"grad_norm": 0.41953468322753906,
"learning_rate": 0.00017299322033898304,
"loss": 0.3855,
"step": 13000
},
{
"epoch": 2.516453735965931,
"eval_loss": 0.4555678367614746,
"eval_runtime": 164.8785,
"eval_samples_per_second": 34.304,
"eval_steps_per_second": 4.288,
"eval_wer": 0.3565341592977163,
"step": 13000
},
{
"epoch": 2.535811072396438,
"eval_loss": 0.4600988030433655,
"eval_runtime": 163.6778,
"eval_samples_per_second": 34.556,
"eval_steps_per_second": 4.319,
"eval_wer": 0.3558280239444079,
"step": 13100
},
{
"epoch": 2.5551684088269453,
"eval_loss": 0.4650043547153473,
"eval_runtime": 163.58,
"eval_samples_per_second": 34.576,
"eval_steps_per_second": 4.322,
"eval_wer": 0.354303413522492,
"step": 13200
},
{
"epoch": 2.5745257452574526,
"eval_loss": 0.4737236201763153,
"eval_runtime": 163.9532,
"eval_samples_per_second": 34.498,
"eval_steps_per_second": 4.312,
"eval_wer": 0.35483301503747333,
"step": 13300
},
{
"epoch": 2.59388308168796,
"eval_loss": 0.45056912302970886,
"eval_runtime": 163.6462,
"eval_samples_per_second": 34.562,
"eval_steps_per_second": 4.32,
"eval_wer": 0.3534367928616135,
"step": 13400
},
{
"epoch": 2.6132404181184667,
"grad_norm": 1.5978127717971802,
"learning_rate": 0.0001679186440677966,
"loss": 0.3748,
"step": 13500
},
{
"epoch": 2.6132404181184667,
"eval_loss": 0.4606887698173523,
"eval_runtime": 163.0481,
"eval_samples_per_second": 34.689,
"eval_steps_per_second": 4.336,
"eval_wer": 0.3589253903805107,
"step": 13500
},
{
"epoch": 2.632597754548974,
"eval_loss": 0.45494645833969116,
"eval_runtime": 162.9584,
"eval_samples_per_second": 34.708,
"eval_steps_per_second": 4.339,
"eval_wer": 0.35372566641523967,
"step": 13600
},
{
"epoch": 2.6519550909794813,
"eval_loss": 0.4562608301639557,
"eval_runtime": 164.1384,
"eval_samples_per_second": 34.459,
"eval_steps_per_second": 4.307,
"eval_wer": 0.36409301728426763,
"step": 13700
},
{
"epoch": 2.6713124274099886,
"eval_loss": 0.44666969776153564,
"eval_runtime": 163.7423,
"eval_samples_per_second": 34.542,
"eval_steps_per_second": 4.318,
"eval_wer": 0.34369533469210894,
"step": 13800
},
{
"epoch": 2.6906697638404955,
"eval_loss": 0.4536294639110565,
"eval_runtime": 163.3892,
"eval_samples_per_second": 34.617,
"eval_steps_per_second": 4.327,
"eval_wer": 0.35446389883006213,
"step": 13900
},
{
"epoch": 2.710027100271003,
"grad_norm": 0.6591352224349976,
"learning_rate": 0.00016283389830508475,
"loss": 0.3888,
"step": 14000
},
{
"epoch": 2.710027100271003,
"eval_loss": 0.4504217505455017,
"eval_runtime": 163.3199,
"eval_samples_per_second": 34.631,
"eval_steps_per_second": 4.329,
"eval_wer": 0.3509653191250341,
"step": 14000
},
{
"epoch": 2.72938443670151,
"eval_loss": 0.44697660207748413,
"eval_runtime": 163.5943,
"eval_samples_per_second": 34.573,
"eval_steps_per_second": 4.322,
"eval_wer": 0.3602092728410714,
"step": 14100
},
{
"epoch": 2.748741773132017,
"eval_loss": 0.45640549063682556,
"eval_runtime": 163.6363,
"eval_samples_per_second": 34.564,
"eval_steps_per_second": 4.321,
"eval_wer": 0.3539022002535668,
"step": 14200
},
{
"epoch": 2.7680991095625243,
"eval_loss": 0.45214343070983887,
"eval_runtime": 164.011,
"eval_samples_per_second": 34.485,
"eval_steps_per_second": 4.311,
"eval_wer": 0.3561971401518191,
"step": 14300
},
{
"epoch": 2.7874564459930316,
"eval_loss": 0.4452911913394928,
"eval_runtime": 163.9028,
"eval_samples_per_second": 34.508,
"eval_steps_per_second": 4.314,
"eval_wer": 0.35221710452408084,
"step": 14400
},
{
"epoch": 2.8068137824235384,
"grad_norm": 0.6879103779792786,
"learning_rate": 0.00015774915254237285,
"loss": 0.376,
"step": 14500
},
{
"epoch": 2.8068137824235384,
"eval_loss": 0.45518526434898376,
"eval_runtime": 164.0636,
"eval_samples_per_second": 34.474,
"eval_steps_per_second": 4.309,
"eval_wer": 0.35170355153985655,
"step": 14500
},
{
"epoch": 2.8261711188540457,
"eval_loss": 0.45344606041908264,
"eval_runtime": 163.5444,
"eval_samples_per_second": 34.584,
"eval_steps_per_second": 4.323,
"eval_wer": 0.3549774518142864,
"step": 14600
},
{
"epoch": 2.845528455284553,
"eval_loss": 0.45520055294036865,
"eval_runtime": 163.3824,
"eval_samples_per_second": 34.618,
"eval_steps_per_second": 4.327,
"eval_wer": 0.3405016770714641,
"step": 14700
},
{
"epoch": 2.86488579171506,
"eval_loss": 0.45560306310653687,
"eval_runtime": 164.0292,
"eval_samples_per_second": 34.482,
"eval_steps_per_second": 4.31,
"eval_wer": 0.35138258092471636,
"step": 14800
},
{
"epoch": 2.884243128145567,
"eval_loss": 0.44232824444770813,
"eval_runtime": 164.3662,
"eval_samples_per_second": 34.411,
"eval_steps_per_second": 4.301,
"eval_wer": 0.3467606040666977,
"step": 14900
},
{
"epoch": 2.9036004645760745,
"grad_norm": 0.5280432105064392,
"learning_rate": 0.00015267457627118642,
"loss": 0.379,
"step": 15000
},
{
"epoch": 2.9036004645760745,
"eval_loss": 0.43873003125190735,
"eval_runtime": 163.5232,
"eval_samples_per_second": 34.588,
"eval_steps_per_second": 4.324,
"eval_wer": 0.34268427725441736,
"step": 15000
},
{
"epoch": 2.9229578010065813,
"eval_loss": 0.4372723400592804,
"eval_runtime": 163.4018,
"eval_samples_per_second": 34.614,
"eval_steps_per_second": 4.327,
"eval_wer": 0.34364718909983794,
"step": 15100
},
{
"epoch": 2.9423151374370886,
"eval_loss": 0.4399470090866089,
"eval_runtime": 164.335,
"eval_samples_per_second": 34.418,
"eval_steps_per_second": 4.302,
"eval_wer": 0.33870424162667906,
"step": 15200
},
{
"epoch": 2.961672473867596,
"eval_loss": 0.44378861784935,
"eval_runtime": 164.0477,
"eval_samples_per_second": 34.478,
"eval_steps_per_second": 4.31,
"eval_wer": 0.3380462518656417,
"step": 15300
},
{
"epoch": 2.9810298102981028,
"eval_loss": 0.436974436044693,
"eval_runtime": 163.6525,
"eval_samples_per_second": 34.561,
"eval_steps_per_second": 4.32,
"eval_wer": 0.3430694419925856,
"step": 15400
},
{
"epoch": 3.00038714672861,
"grad_norm": 0.660970151424408,
"learning_rate": 0.00014758983050847457,
"loss": 0.3731,
"step": 15500
},
{
"epoch": 3.00038714672861,
"eval_loss": 0.43810611963272095,
"eval_runtime": 162.9215,
"eval_samples_per_second": 34.716,
"eval_steps_per_second": 4.34,
"eval_wer": 0.33413041036093144,
"step": 15500
},
{
"epoch": 3.0197444831591174,
"eval_loss": 0.45139721035957336,
"eval_runtime": 164.2748,
"eval_samples_per_second": 34.43,
"eval_steps_per_second": 4.304,
"eval_wer": 0.3286418128420343,
"step": 15600
},
{
"epoch": 3.0391018195896247,
"eval_loss": 0.43782538175582886,
"eval_runtime": 164.2188,
"eval_samples_per_second": 34.442,
"eval_steps_per_second": 4.305,
"eval_wer": 0.3340180706456324,
"step": 15700
},
{
"epoch": 3.0584591560201315,
"eval_loss": 0.44340020418167114,
"eval_runtime": 163.4459,
"eval_samples_per_second": 34.605,
"eval_steps_per_second": 4.326,
"eval_wer": 0.3441446935533052,
"step": 15800
},
{
"epoch": 3.077816492450639,
"eval_loss": 0.44192126393318176,
"eval_runtime": 164.2009,
"eval_samples_per_second": 34.446,
"eval_steps_per_second": 4.306,
"eval_wer": 0.3399239299642118,
"step": 15900
},
{
"epoch": 3.097173828881146,
"grad_norm": 0.6999391913414001,
"learning_rate": 0.0001425050847457627,
"loss": 0.3176,
"step": 16000
},
{
"epoch": 3.097173828881146,
"eval_loss": 0.4407601058483124,
"eval_runtime": 164.7219,
"eval_samples_per_second": 34.337,
"eval_steps_per_second": 4.292,
"eval_wer": 0.3335366147229221,
"step": 16000
},
{
"epoch": 3.116531165311653,
"eval_loss": 0.436761736869812,
"eval_runtime": 164.2581,
"eval_samples_per_second": 34.434,
"eval_steps_per_second": 4.304,
"eval_wer": 0.33584760315193146,
"step": 16100
},
{
"epoch": 3.1358885017421603,
"eval_loss": 0.4477560520172119,
"eval_runtime": 163.8979,
"eval_samples_per_second": 34.509,
"eval_steps_per_second": 4.314,
"eval_wer": 0.3400523182102678,
"step": 16200
},
{
"epoch": 3.1552458381726676,
"eval_loss": 0.4414171576499939,
"eval_runtime": 164.4332,
"eval_samples_per_second": 34.397,
"eval_steps_per_second": 4.3,
"eval_wer": 0.3373882621046043,
"step": 16300
},
{
"epoch": 3.1746031746031744,
"eval_loss": 0.4476623833179474,
"eval_runtime": 163.6322,
"eval_samples_per_second": 34.565,
"eval_steps_per_second": 4.321,
"eval_wer": 0.335013079552567,
"step": 16400
},
{
"epoch": 3.1939605110336817,
"grad_norm": 0.5408484935760498,
"learning_rate": 0.00013742033898305083,
"loss": 0.3201,
"step": 16500
},
{
"epoch": 3.1939605110336817,
"eval_loss": 0.4305751919746399,
"eval_runtime": 163.8253,
"eval_samples_per_second": 34.525,
"eval_steps_per_second": 4.316,
"eval_wer": 0.32917141435701563,
"step": 16500
},
{
"epoch": 3.213317847464189,
"eval_loss": 0.4534677267074585,
"eval_runtime": 163.8666,
"eval_samples_per_second": 34.516,
"eval_steps_per_second": 4.314,
"eval_wer": 0.32941214231837074,
"step": 16600
},
{
"epoch": 3.2326751838946963,
"eval_loss": 0.4379562437534332,
"eval_runtime": 164.1184,
"eval_samples_per_second": 34.463,
"eval_steps_per_second": 4.308,
"eval_wer": 0.33408226476866043,
"step": 16700
},
{
"epoch": 3.252032520325203,
"eval_loss": 0.43677663803100586,
"eval_runtime": 163.6899,
"eval_samples_per_second": 34.553,
"eval_steps_per_second": 4.319,
"eval_wer": 0.33252555728523053,
"step": 16800
},
{
"epoch": 3.2713898567557105,
"eval_loss": 0.4359833598136902,
"eval_runtime": 164.1551,
"eval_samples_per_second": 34.455,
"eval_steps_per_second": 4.307,
"eval_wer": 0.33043924828681936,
"step": 16900
},
{
"epoch": 3.290747193186218,
"grad_norm": 0.5537161231040955,
"learning_rate": 0.00013233559322033898,
"loss": 0.3101,
"step": 17000
},
{
"epoch": 3.290747193186218,
"eval_loss": 0.4347226917743683,
"eval_runtime": 164.2964,
"eval_samples_per_second": 34.426,
"eval_steps_per_second": 4.303,
"eval_wer": 0.32812825985781,
"step": 17000
},
{
"epoch": 3.3101045296167246,
"eval_loss": 0.4375491738319397,
"eval_runtime": 162.8938,
"eval_samples_per_second": 34.722,
"eval_steps_per_second": 4.34,
"eval_wer": 0.3284973760652212,
"step": 17100
},
{
"epoch": 3.329461866047232,
"eval_loss": 0.4491961896419525,
"eval_runtime": 163.5698,
"eval_samples_per_second": 34.579,
"eval_steps_per_second": 4.322,
"eval_wer": 0.33032690857152025,
"step": 17200
},
{
"epoch": 3.3488192024777392,
"eval_loss": 0.4268127977848053,
"eval_runtime": 164.9468,
"eval_samples_per_second": 34.29,
"eval_steps_per_second": 4.286,
"eval_wer": 0.3284652790037072,
"step": 17300
},
{
"epoch": 3.368176538908246,
"eval_loss": 0.4377237558364868,
"eval_runtime": 164.0847,
"eval_samples_per_second": 34.47,
"eval_steps_per_second": 4.309,
"eval_wer": 0.3269888141740624,
"step": 17400
},
{
"epoch": 3.3875338753387534,
"grad_norm": 0.5330023765563965,
"learning_rate": 0.00012726101694915254,
"loss": 0.2963,
"step": 17500
},
{
"epoch": 3.3875338753387534,
"eval_loss": 0.42494186758995056,
"eval_runtime": 163.0698,
"eval_samples_per_second": 34.685,
"eval_steps_per_second": 4.336,
"eval_wer": 0.3322527322623614,
"step": 17500
},
{
"epoch": 3.4068912117692607,
"eval_loss": 0.4404699206352234,
"eval_runtime": 164.4408,
"eval_samples_per_second": 34.395,
"eval_steps_per_second": 4.299,
"eval_wer": 0.3338736338688193,
"step": 17600
},
{
"epoch": 3.4262485481997675,
"eval_loss": 0.43636277318000793,
"eval_runtime": 163.9679,
"eval_samples_per_second": 34.495,
"eval_steps_per_second": 4.312,
"eval_wer": 0.3285615701882493,
"step": 17700
},
{
"epoch": 3.445605884630275,
"eval_loss": 0.4350505769252777,
"eval_runtime": 163.6477,
"eval_samples_per_second": 34.562,
"eval_steps_per_second": 4.32,
"eval_wer": 0.3309207042095296,
"step": 17800
},
{
"epoch": 3.464963221060782,
"eval_loss": 0.42997920513153076,
"eval_runtime": 163.7592,
"eval_samples_per_second": 34.539,
"eval_steps_per_second": 4.317,
"eval_wer": 0.322880390300268,
"step": 17900
},
{
"epoch": 3.484320557491289,
"grad_norm": 0.31616127490997314,
"learning_rate": 0.00012217627118644067,
"loss": 0.3062,
"step": 18000
},
{
"epoch": 3.484320557491289,
"eval_loss": 0.42307358980178833,
"eval_runtime": 164.6469,
"eval_samples_per_second": 34.352,
"eval_steps_per_second": 4.294,
"eval_wer": 0.32523952432154835,
"step": 18000
},
{
"epoch": 3.5036778939217963,
"eval_loss": 0.4325993061065674,
"eval_runtime": 164.1143,
"eval_samples_per_second": 34.464,
"eval_steps_per_second": 4.308,
"eval_wer": 0.32326555503843624,
"step": 18100
},
{
"epoch": 3.5230352303523036,
"eval_loss": 0.43141353130340576,
"eval_runtime": 163.522,
"eval_samples_per_second": 34.589,
"eval_steps_per_second": 4.324,
"eval_wer": 0.3282405995731091,
"step": 18200
},
{
"epoch": 3.5423925667828104,
"eval_loss": 0.4343957006931305,
"eval_runtime": 163.8089,
"eval_samples_per_second": 34.528,
"eval_steps_per_second": 4.316,
"eval_wer": 0.32894673492641746,
"step": 18300
},
{
"epoch": 3.5617499032133177,
"eval_loss": 0.42664915323257446,
"eval_runtime": 166.5849,
"eval_samples_per_second": 33.953,
"eval_steps_per_second": 4.244,
"eval_wer": 0.32207796376241754,
"step": 18400
},
{
"epoch": 3.581107239643825,
"grad_norm": 0.40817028284072876,
"learning_rate": 0.00011710169491525424,
"loss": 0.2968,
"step": 18500
},
{
"epoch": 3.581107239643825,
"eval_loss": 0.4305819571018219,
"eval_runtime": 164.3676,
"eval_samples_per_second": 34.411,
"eval_steps_per_second": 4.301,
"eval_wer": 0.32161255637046426,
"step": 18500
},
{
"epoch": 3.600464576074332,
"eval_loss": 0.4318545460700989,
"eval_runtime": 166.4377,
"eval_samples_per_second": 33.983,
"eval_steps_per_second": 4.248,
"eval_wer": 0.3238914477379596,
"step": 18600
},
{
"epoch": 3.619821912504839,
"eval_loss": 0.4271145164966583,
"eval_runtime": 166.5812,
"eval_samples_per_second": 33.953,
"eval_steps_per_second": 4.244,
"eval_wer": 0.3232013609154082,
"step": 18700
},
{
"epoch": 3.6391792489353465,
"eval_loss": 0.41837719082832336,
"eval_runtime": 164.5624,
"eval_samples_per_second": 34.37,
"eval_steps_per_second": 4.296,
"eval_wer": 0.32641106706681006,
"step": 18800
},
{
"epoch": 3.658536585365854,
"eval_loss": 0.4237981140613556,
"eval_runtime": 165.5252,
"eval_samples_per_second": 34.17,
"eval_steps_per_second": 4.271,
"eval_wer": 0.31997560623324933,
"step": 18900
},
{
"epoch": 3.6778939217963607,
"grad_norm": 0.9548519253730774,
"learning_rate": 0.00011201694915254236,
"loss": 0.3191,
"step": 19000
},
{
"epoch": 3.6778939217963607,
"eval_loss": 0.41389960050582886,
"eval_runtime": 163.7093,
"eval_samples_per_second": 34.549,
"eval_steps_per_second": 4.319,
"eval_wer": 0.3225915167466418,
"step": 19000
},
{
"epoch": 3.697251258226868,
"eval_loss": 0.42384064197540283,
"eval_runtime": 164.2841,
"eval_samples_per_second": 34.428,
"eval_steps_per_second": 4.304,
"eval_wer": 0.3159955706055111,
"step": 19100
},
{
"epoch": 3.7166085946573753,
"eval_loss": 0.4176156520843506,
"eval_runtime": 163.8287,
"eval_samples_per_second": 34.524,
"eval_steps_per_second": 4.315,
"eval_wer": 0.319301567941455,
"step": 19200
},
{
"epoch": 3.7359659310878826,
"eval_loss": 0.4196203351020813,
"eval_runtime": 165.0023,
"eval_samples_per_second": 34.278,
"eval_steps_per_second": 4.285,
"eval_wer": 0.3202644797868755,
"step": 19300
},
{
"epoch": 3.7553232675183894,
"eval_loss": 0.409524530172348,
"eval_runtime": 164.4937,
"eval_samples_per_second": 34.384,
"eval_steps_per_second": 4.298,
"eval_wer": 0.3181621222577073,
"step": 19400
},
{
"epoch": 3.7746806039488967,
"grad_norm": 0.43373510241508484,
"learning_rate": 0.00010693220338983049,
"loss": 0.2921,
"step": 19500
},
{
"epoch": 3.7746806039488967,
"eval_loss": 0.41209807991981506,
"eval_runtime": 164.8329,
"eval_samples_per_second": 34.314,
"eval_steps_per_second": 4.289,
"eval_wer": 0.31665356036654846,
"step": 19500
},
{
"epoch": 3.794037940379404,
"eval_loss": 0.4112759530544281,
"eval_runtime": 164.1863,
"eval_samples_per_second": 34.449,
"eval_steps_per_second": 4.306,
"eval_wer": 0.31455120283738025,
"step": 19600
},
{
"epoch": 3.813395276809911,
"eval_loss": 0.4094259738922119,
"eval_runtime": 164.4841,
"eval_samples_per_second": 34.386,
"eval_steps_per_second": 4.298,
"eval_wer": 0.3160758132592961,
"step": 19700
},
{
"epoch": 3.832752613240418,
"eval_loss": 0.40931811928749084,
"eval_runtime": 164.2911,
"eval_samples_per_second": 34.427,
"eval_steps_per_second": 4.303,
"eval_wer": 0.31386111601482886,
"step": 19800
},
{
"epoch": 3.8521099496709255,
"eval_loss": 0.41117748618125916,
"eval_runtime": 165.1732,
"eval_samples_per_second": 34.243,
"eval_steps_per_second": 4.28,
"eval_wer": 0.31731155012758583,
"step": 19900
},
{
"epoch": 3.8714672861014323,
"grad_norm": 0.5022397637367249,
"learning_rate": 0.00010184745762711863,
"loss": 0.3007,
"step": 20000
},
{
"epoch": 3.8714672861014323,
"eval_loss": 0.4092504680156708,
"eval_runtime": 163.9434,
"eval_samples_per_second": 34.5,
"eval_steps_per_second": 4.312,
"eval_wer": 0.31593137648248304,
"step": 20000
},
{
"epoch": 3.8908246225319396,
"eval_loss": 0.4147598147392273,
"eval_runtime": 164.6303,
"eval_samples_per_second": 34.356,
"eval_steps_per_second": 4.294,
"eval_wer": 0.31565855145961386,
"step": 20100
},
{
"epoch": 3.910181958962447,
"eval_loss": 0.41137251257896423,
"eval_runtime": 164.6634,
"eval_samples_per_second": 34.349,
"eval_steps_per_second": 4.294,
"eval_wer": 0.3150326587600905,
"step": 20200
},
{
"epoch": 3.9295392953929538,
"eval_loss": 0.4155375361442566,
"eval_runtime": 164.3406,
"eval_samples_per_second": 34.416,
"eval_steps_per_second": 4.302,
"eval_wer": 0.31456725136813724,
"step": 20300
},
{
"epoch": 3.948896631823461,
"eval_loss": 0.4075925648212433,
"eval_runtime": 164.3692,
"eval_samples_per_second": 34.41,
"eval_steps_per_second": 4.301,
"eval_wer": 0.3135722424612027,
"step": 20400
},
{
"epoch": 3.9682539682539684,
"grad_norm": 0.6109060049057007,
"learning_rate": 9.67728813559322e-05,
"loss": 0.296,
"step": 20500
},
{
"epoch": 3.9682539682539684,
"eval_loss": 0.4066578149795532,
"eval_runtime": 164.7852,
"eval_samples_per_second": 34.323,
"eval_steps_per_second": 4.29,
"eval_wer": 0.3125611850235111,
"step": 20500
},
{
"epoch": 3.987611304684475,
"eval_loss": 0.40839362144470215,
"eval_runtime": 164.8883,
"eval_samples_per_second": 34.302,
"eval_steps_per_second": 4.288,
"eval_wer": 0.3150487072908475,
"step": 20600
},
{
"epoch": 4.006968641114983,
"eval_loss": 0.4150494635105133,
"eval_runtime": 164.1525,
"eval_samples_per_second": 34.456,
"eval_steps_per_second": 4.307,
"eval_wer": 0.312432796777455,
"step": 20700
},
{
"epoch": 4.02632597754549,
"eval_loss": 0.41322341561317444,
"eval_runtime": 164.6726,
"eval_samples_per_second": 34.347,
"eval_steps_per_second": 4.293,
"eval_wer": 0.3132512718460625,
"step": 20800
},
{
"epoch": 4.045683313975997,
"eval_loss": 0.4182606339454651,
"eval_runtime": 164.9667,
"eval_samples_per_second": 34.286,
"eval_steps_per_second": 4.286,
"eval_wer": 0.31464749402192227,
"step": 20900
},
{
"epoch": 4.065040650406504,
"grad_norm": 0.9771650433540344,
"learning_rate": 9.168813559322032e-05,
"loss": 0.2611,
"step": 21000
},
{
"epoch": 4.065040650406504,
"eval_loss": 0.41840454936027527,
"eval_runtime": 164.8893,
"eval_samples_per_second": 34.302,
"eval_steps_per_second": 4.288,
"eval_wer": 0.30952801271043634,
"step": 21000
},
{
"epoch": 4.084397986837011,
"eval_loss": 0.4167742431163788,
"eval_runtime": 165.1089,
"eval_samples_per_second": 34.256,
"eval_steps_per_second": 4.282,
"eval_wer": 0.30845276114971676,
"step": 21100
},
{
"epoch": 4.103755323267518,
"eval_loss": 0.42244288325309753,
"eval_runtime": 164.9167,
"eval_samples_per_second": 34.296,
"eval_steps_per_second": 4.287,
"eval_wer": 0.31015390540995974,
"step": 21200
},
{
"epoch": 4.123112659698026,
"eval_loss": 0.4187394678592682,
"eval_runtime": 164.7166,
"eval_samples_per_second": 34.338,
"eval_steps_per_second": 4.292,
"eval_wer": 0.30456901670652053,
"step": 21300
},
{
"epoch": 4.142469996128533,
"eval_loss": 0.41454723477363586,
"eval_runtime": 164.38,
"eval_samples_per_second": 34.408,
"eval_steps_per_second": 4.301,
"eval_wer": 0.3110044775400812,
"step": 21400
},
{
"epoch": 4.16182733255904,
"grad_norm": 0.8976078629493713,
"learning_rate": 8.660338983050847e-05,
"loss": 0.2431,
"step": 21500
},
{
"epoch": 4.16182733255904,
"eval_loss": 0.42720434069633484,
"eval_runtime": 165.0533,
"eval_samples_per_second": 34.268,
"eval_steps_per_second": 4.283,
"eval_wer": 0.31071560398645504,
"step": 21500
},
{
"epoch": 4.181184668989547,
"eval_loss": 0.41736435890197754,
"eval_runtime": 164.6627,
"eval_samples_per_second": 34.349,
"eval_steps_per_second": 4.294,
"eval_wer": 0.3069923448508289,
"step": 21600
},
{
"epoch": 4.200542005420054,
"eval_loss": 0.41904589533805847,
"eval_runtime": 165.6169,
"eval_samples_per_second": 34.151,
"eval_steps_per_second": 4.269,
"eval_wer": 0.3085811493957728,
"step": 21700
},
{
"epoch": 4.219899341850561,
"eval_loss": 0.41643446683883667,
"eval_runtime": 165.1417,
"eval_samples_per_second": 34.249,
"eval_steps_per_second": 4.281,
"eval_wer": 0.3050825696907448,
"step": 21800
},
{
"epoch": 4.239256678281069,
"eval_loss": 0.41955476999282837,
"eval_runtime": 165.2591,
"eval_samples_per_second": 34.225,
"eval_steps_per_second": 4.278,
"eval_wer": 0.30777872285792235,
"step": 21900
},
{
"epoch": 4.258614014711576,
"grad_norm": 1.5854851007461548,
"learning_rate": 8.15186440677966e-05,
"loss": 0.2453,
"step": 22000
},
{
"epoch": 4.258614014711576,
"eval_loss": 0.42485129833221436,
"eval_runtime": 164.7312,
"eval_samples_per_second": 34.335,
"eval_steps_per_second": 4.292,
"eval_wer": 0.30915889650302514,
"step": 22000
},
{
"epoch": 4.2779713511420825,
"eval_loss": 0.4246067404747009,
"eval_runtime": 164.9099,
"eval_samples_per_second": 34.298,
"eval_steps_per_second": 4.287,
"eval_wer": 0.30736146105824014,
"step": 22100
},
{
"epoch": 4.29732868757259,
"eval_loss": 0.4166228771209717,
"eval_runtime": 164.9564,
"eval_samples_per_second": 34.288,
"eval_steps_per_second": 4.286,
"eval_wer": 0.30740960665051115,
"step": 22200
},
{
"epoch": 4.316686024003097,
"eval_loss": 0.4192067086696625,
"eval_runtime": 165.6591,
"eval_samples_per_second": 34.142,
"eval_steps_per_second": 4.268,
"eval_wer": 0.3027555327309785,
"step": 22300
},
{
"epoch": 4.336043360433604,
"eval_loss": 0.41863906383514404,
"eval_runtime": 164.9558,
"eval_samples_per_second": 34.288,
"eval_steps_per_second": 4.286,
"eval_wer": 0.3020975429699411,
"step": 22400
},
{
"epoch": 4.355400696864112,
"grad_norm": 1.1900339126586914,
"learning_rate": 7.645423728813559e-05,
"loss": 0.2336,
"step": 22500
},
{
"epoch": 4.355400696864112,
"eval_loss": 0.4268459677696228,
"eval_runtime": 166.7137,
"eval_samples_per_second": 33.926,
"eval_steps_per_second": 4.241,
"eval_wer": 0.3083885670266887,
"step": 22500
},
{
"epoch": 4.3747580332946185,
"eval_loss": 0.4346672296524048,
"eval_runtime": 170.3751,
"eval_samples_per_second": 33.197,
"eval_steps_per_second": 4.15,
"eval_wer": 0.307104684566128,
"step": 22600
},
{
"epoch": 4.394115369725126,
"eval_loss": 0.47525468468666077,
"eval_runtime": 164.9807,
"eval_samples_per_second": 34.283,
"eval_steps_per_second": 4.285,
"eval_wer": 0.3208582754248849,
"step": 22700
},
{
"epoch": 4.413472706155633,
"eval_loss": 0.582381546497345,
"eval_runtime": 165.1397,
"eval_samples_per_second": 34.25,
"eval_steps_per_second": 4.281,
"eval_wer": 0.415416218645183,
"step": 22800
},
{
"epoch": 4.43283004258614,
"eval_loss": 0.5073803067207336,
"eval_runtime": 165.3352,
"eval_samples_per_second": 34.209,
"eval_steps_per_second": 4.276,
"eval_wer": 0.3415448315706697,
"step": 22900
},
{
"epoch": 4.452187379016648,
"grad_norm": 1.5807456970214844,
"learning_rate": 7.136949152542373e-05,
"loss": 0.3426,
"step": 23000
},
{
"epoch": 4.452187379016648,
"eval_loss": 0.6242379546165466,
"eval_runtime": 164.8642,
"eval_samples_per_second": 34.307,
"eval_steps_per_second": 4.288,
"eval_wer": 0.41979746754184655,
"step": 23000
},
{
"epoch": 4.471544715447155,
"eval_loss": 0.5862211585044861,
"eval_runtime": 164.8283,
"eval_samples_per_second": 34.314,
"eval_steps_per_second": 4.289,
"eval_wer": 0.4200702925647157,
"step": 23100
},
{
"epoch": 4.4909020518776614,
"eval_loss": 0.6151086091995239,
"eval_runtime": 165.0995,
"eval_samples_per_second": 34.258,
"eval_steps_per_second": 4.282,
"eval_wer": 0.39638266116737014,
"step": 23200
},
{
"epoch": 4.510259388308169,
"eval_loss": 0.5640283226966858,
"eval_runtime": 164.849,
"eval_samples_per_second": 34.31,
"eval_steps_per_second": 4.289,
"eval_wer": 0.3685705573654732,
"step": 23300
},
{
"epoch": 4.529616724738676,
"eval_loss": 0.6589744091033936,
"eval_runtime": 164.8194,
"eval_samples_per_second": 34.316,
"eval_steps_per_second": 4.29,
"eval_wer": 0.4647494021922293,
"step": 23400
},
{
"epoch": 4.548974061169183,
"grad_norm": 1.0218427181243896,
"learning_rate": 6.628474576271186e-05,
"loss": 0.4541,
"step": 23500
},
{
"epoch": 4.548974061169183,
"eval_loss": 0.6010532975196838,
"eval_runtime": 165.0253,
"eval_samples_per_second": 34.274,
"eval_steps_per_second": 4.284,
"eval_wer": 0.3959974964292019,
"step": 23500
},
{
"epoch": 4.568331397599691,
"eval_loss": 0.5802894830703735,
"eval_runtime": 166.0838,
"eval_samples_per_second": 34.055,
"eval_steps_per_second": 4.257,
"eval_wer": 0.39505063311453836,
"step": 23600
},
{
"epoch": 4.5876887340301975,
"eval_loss": 0.5762883424758911,
"eval_runtime": 165.1308,
"eval_samples_per_second": 34.252,
"eval_steps_per_second": 4.281,
"eval_wer": 0.3910545489560431,
"step": 23700
},
{
"epoch": 4.607046070460704,
"eval_loss": 0.5418487787246704,
"eval_runtime": 165.1407,
"eval_samples_per_second": 34.25,
"eval_steps_per_second": 4.281,
"eval_wer": 0.36550528799088444,
"step": 23800
},
{
"epoch": 4.626403406891212,
"eval_loss": 0.5546759366989136,
"eval_runtime": 165.8678,
"eval_samples_per_second": 34.099,
"eval_steps_per_second": 4.262,
"eval_wer": 0.38877565758854776,
"step": 23900
},
{
"epoch": 4.645760743321719,
"grad_norm": 32.50680923461914,
"learning_rate": 6.12e-05,
"loss": 0.4145,
"step": 24000
},
{
"epoch": 4.645760743321719,
"eval_loss": 0.5300523638725281,
"eval_runtime": 164.9724,
"eval_samples_per_second": 34.285,
"eval_steps_per_second": 4.286,
"eval_wer": 0.3608030684790807,
"step": 24000
},
{
"epoch": 4.665118079752226,
"eval_loss": 0.573882520198822,
"eval_runtime": 165.2226,
"eval_samples_per_second": 34.233,
"eval_steps_per_second": 4.279,
"eval_wer": 0.39927139670363176,
"step": 24100
},
{
"epoch": 4.6844754161827336,
"eval_loss": 0.5775899887084961,
"eval_runtime": 165.3691,
"eval_samples_per_second": 34.202,
"eval_steps_per_second": 4.275,
"eval_wer": 0.39816404808139816,
"step": 24200
},
{
"epoch": 4.70383275261324,
"eval_loss": 0.5412492156028748,
"eval_runtime": 164.9818,
"eval_samples_per_second": 34.283,
"eval_steps_per_second": 4.285,
"eval_wer": 0.37078525460994044,
"step": 24300
},
{
"epoch": 4.723190089043747,
"eval_loss": 0.5329325199127197,
"eval_runtime": 165.8065,
"eval_samples_per_second": 34.112,
"eval_steps_per_second": 4.264,
"eval_wer": 0.37044823546404326,
"step": 24400
},
{
"epoch": 4.742547425474255,
"grad_norm": 1.8765805959701538,
"learning_rate": 5.611525423728813e-05,
"loss": 0.3834,
"step": 24500
},
{
"epoch": 4.742547425474255,
"eval_loss": 0.5299070477485657,
"eval_runtime": 165.3917,
"eval_samples_per_second": 34.198,
"eval_steps_per_second": 4.275,
"eval_wer": 0.3732246312850059,
"step": 24500
},
{
"epoch": 4.761904761904762,
"eval_loss": 0.5424681901931763,
"eval_runtime": 165.4071,
"eval_samples_per_second": 34.194,
"eval_steps_per_second": 4.274,
"eval_wer": 0.3928519844008281,
"step": 24600
},
{
"epoch": 4.781262098335269,
"eval_loss": 0.5111268758773804,
"eval_runtime": 165.4914,
"eval_samples_per_second": 34.177,
"eval_steps_per_second": 4.272,
"eval_wer": 0.3585241771115854,
"step": 24700
},
{
"epoch": 4.8006194347657765,
"eval_loss": 0.5076457858085632,
"eval_runtime": 165.6732,
"eval_samples_per_second": 34.14,
"eval_steps_per_second": 4.267,
"eval_wer": 0.35033942642551075,
"step": 24800
},
{
"epoch": 4.819976771196283,
"eval_loss": 0.5261921882629395,
"eval_runtime": 165.2946,
"eval_samples_per_second": 34.218,
"eval_steps_per_second": 4.277,
"eval_wer": 0.3681372470350339,
"step": 24900
},
{
"epoch": 4.83933410762679,
"grad_norm": 5.934371471405029,
"learning_rate": 5.1030508474576264e-05,
"loss": 0.3719,
"step": 25000
},
{
"epoch": 4.83933410762679,
"eval_loss": 0.547415018081665,
"eval_runtime": 165.6997,
"eval_samples_per_second": 34.134,
"eval_steps_per_second": 4.267,
"eval_wer": 0.3833031086004076,
"step": 25000
},
{
"epoch": 4.858691444057298,
"eval_loss": 0.5746738910675049,
"eval_runtime": 165.8407,
"eval_samples_per_second": 34.105,
"eval_steps_per_second": 4.263,
"eval_wer": 0.40389337356165045,
"step": 25100
},
{
"epoch": 4.878048780487805,
"eval_loss": 0.5188133120536804,
"eval_runtime": 165.5746,
"eval_samples_per_second": 34.16,
"eval_steps_per_second": 4.27,
"eval_wer": 0.3503073293639967,
"step": 25200
},
{
"epoch": 4.897406116918312,
"eval_loss": 0.5522667169570923,
"eval_runtime": 165.1011,
"eval_samples_per_second": 34.258,
"eval_steps_per_second": 4.282,
"eval_wer": 0.3865609603440805,
"step": 25300
},
{
"epoch": 4.916763453348819,
"eval_loss": 0.5302358865737915,
"eval_runtime": 165.871,
"eval_samples_per_second": 34.099,
"eval_steps_per_second": 4.262,
"eval_wer": 0.36446213349167883,
"step": 25400
},
{
"epoch": 4.936120789779326,
"grad_norm": 1.1752023696899414,
"learning_rate": 4.595593220338983e-05,
"loss": 0.3798,
"step": 25500
},
{
"epoch": 4.936120789779326,
"eval_loss": 0.5099266767501831,
"eval_runtime": 165.9652,
"eval_samples_per_second": 34.079,
"eval_steps_per_second": 4.26,
"eval_wer": 0.3499542616873425,
"step": 25500
},
{
"epoch": 4.955478126209833,
"eval_loss": 0.4823363125324249,
"eval_runtime": 164.8602,
"eval_samples_per_second": 34.308,
"eval_steps_per_second": 4.288,
"eval_wer": 0.33761294153520244,
"step": 25600
},
{
"epoch": 4.974835462640341,
"eval_loss": 0.4805842936038971,
"eval_runtime": 166.8028,
"eval_samples_per_second": 33.908,
"eval_steps_per_second": 4.239,
"eval_wer": 0.3357352634366324,
"step": 25700
},
{
"epoch": 4.994192799070848,
"eval_loss": 0.4942820370197296,
"eval_runtime": 165.4552,
"eval_samples_per_second": 34.184,
"eval_steps_per_second": 4.273,
"eval_wer": 0.35093322206352007,
"step": 25800
},
{
"epoch": 5.013550135501355,
"eval_loss": 0.49528568983078003,
"eval_runtime": 165.947,
"eval_samples_per_second": 34.083,
"eval_steps_per_second": 4.26,
"eval_wer": 0.35245783248543594,
"step": 25900
},
{
"epoch": 5.032907471931862,
"grad_norm": 27.542322158813477,
"learning_rate": 4.087118644067796e-05,
"loss": 0.3158,
"step": 26000
},
{
"epoch": 5.032907471931862,
"eval_loss": 0.485315203666687,
"eval_runtime": 165.2844,
"eval_samples_per_second": 34.22,
"eval_steps_per_second": 4.277,
"eval_wer": 0.34703342908956686,
"step": 26000
},
{
"epoch": 5.052264808362369,
"eval_loss": 0.5204781293869019,
"eval_runtime": 165.773,
"eval_samples_per_second": 34.119,
"eval_steps_per_second": 4.265,
"eval_wer": 0.36183017444752935,
"step": 26100
},
{
"epoch": 5.071622144792877,
"eval_loss": 0.5013459920883179,
"eval_runtime": 165.057,
"eval_samples_per_second": 34.267,
"eval_steps_per_second": 4.283,
"eval_wer": 0.3510455617788191,
"step": 26200
},
{
"epoch": 5.090979481223384,
"eval_loss": 0.4863474667072296,
"eval_runtime": 165.6964,
"eval_samples_per_second": 34.135,
"eval_steps_per_second": 4.267,
"eval_wer": 0.3396511049413426,
"step": 26300
},
{
"epoch": 5.110336817653891,
"eval_loss": 0.47152572870254517,
"eval_runtime": 166.0563,
"eval_samples_per_second": 34.061,
"eval_steps_per_second": 4.258,
"eval_wer": 0.32851342459597827,
"step": 26400
},
{
"epoch": 5.129694154084398,
"grad_norm": 0.8464019894599915,
"learning_rate": 3.5786440677966095e-05,
"loss": 0.2993,
"step": 26500
},
{
"epoch": 5.129694154084398,
"eval_loss": 0.4816218912601471,
"eval_runtime": 165.4176,
"eval_samples_per_second": 34.192,
"eval_steps_per_second": 4.274,
"eval_wer": 0.33273418818507167,
"step": 26500
},
{
"epoch": 5.149051490514905,
"eval_loss": 0.48058804869651794,
"eval_runtime": 166.4075,
"eval_samples_per_second": 33.989,
"eval_steps_per_second": 4.249,
"eval_wer": 0.33811044598866974,
"step": 26600
},
{
"epoch": 5.168408826945412,
"eval_loss": 0.4854019284248352,
"eval_runtime": 165.1934,
"eval_samples_per_second": 34.239,
"eval_steps_per_second": 4.28,
"eval_wer": 0.33416250742244547,
"step": 26700
},
{
"epoch": 5.18776616337592,
"eval_loss": 0.49545472860336304,
"eval_runtime": 165.6735,
"eval_samples_per_second": 34.139,
"eval_steps_per_second": 4.267,
"eval_wer": 0.3433422670154547,
"step": 26800
},
{
"epoch": 5.207123499806427,
"eval_loss": 0.4862872064113617,
"eval_runtime": 165.5277,
"eval_samples_per_second": 34.17,
"eval_steps_per_second": 4.271,
"eval_wer": 0.34337436407696875,
"step": 26900
},
{
"epoch": 5.2264808362369335,
"grad_norm": 10.611580848693848,
"learning_rate": 3.0701694915254236e-05,
"loss": 0.2902,
"step": 27000
},
{
"epoch": 5.2264808362369335,
"eval_loss": 0.48670876026153564,
"eval_runtime": 165.4986,
"eval_samples_per_second": 34.176,
"eval_steps_per_second": 4.272,
"eval_wer": 0.3448508289066136,
"step": 27000
},
{
"epoch": 5.245838172667441,
"eval_loss": 0.4787338674068451,
"eval_runtime": 165.461,
"eval_samples_per_second": 34.183,
"eval_steps_per_second": 4.273,
"eval_wer": 0.33778947537352955,
"step": 27100
},
{
"epoch": 5.265195509097948,
"eval_loss": 0.4861724376678467,
"eval_runtime": 165.3459,
"eval_samples_per_second": 34.207,
"eval_steps_per_second": 4.276,
"eval_wer": 0.33793391215034263,
"step": 27200
},
{
"epoch": 5.284552845528455,
"eval_loss": 0.4954308271408081,
"eval_runtime": 165.6637,
"eval_samples_per_second": 34.141,
"eval_steps_per_second": 4.268,
"eval_wer": 0.3467927011282117,
"step": 27300
},
{
"epoch": 5.303910181958963,
"eval_loss": 0.572640061378479,
"eval_runtime": 165.6804,
"eval_samples_per_second": 34.138,
"eval_steps_per_second": 4.267,
"eval_wer": 0.41416443324613633,
"step": 27400
},
{
"epoch": 5.3232675183894695,
"grad_norm": 1.2211335897445679,
"learning_rate": 2.5627118644067793e-05,
"loss": 0.305,
"step": 27500
},
{
"epoch": 5.3232675183894695,
"eval_loss": 0.5179979205131531,
"eval_runtime": 165.6208,
"eval_samples_per_second": 34.15,
"eval_steps_per_second": 4.269,
"eval_wer": 0.35735263436632375,
"step": 27500
},
{
"epoch": 5.342624854819976,
"eval_loss": 0.4996646046638489,
"eval_runtime": 164.9613,
"eval_samples_per_second": 34.287,
"eval_steps_per_second": 4.286,
"eval_wer": 0.3452038965832678,
"step": 27600
},
{
"epoch": 5.361982191250484,
"eval_loss": 0.4949517846107483,
"eval_runtime": 165.643,
"eval_samples_per_second": 34.146,
"eval_steps_per_second": 4.268,
"eval_wer": 0.34130410360931457,
"step": 27700
},
{
"epoch": 5.381339527680991,
"eval_loss": 0.5071349143981934,
"eval_runtime": 166.3001,
"eval_samples_per_second": 34.011,
"eval_steps_per_second": 4.251,
"eval_wer": 0.3491999807417631,
"step": 27800
},
{
"epoch": 5.400696864111498,
"eval_loss": 0.5095939040184021,
"eval_runtime": 165.4785,
"eval_samples_per_second": 34.18,
"eval_steps_per_second": 4.272,
"eval_wer": 0.3544799473608191,
"step": 27900
},
{
"epoch": 5.420054200542006,
"grad_norm": 15.024033546447754,
"learning_rate": 2.054237288135593e-05,
"loss": 0.3163,
"step": 28000
},
{
"epoch": 5.420054200542006,
"eval_loss": 0.5129156112670898,
"eval_runtime": 166.1505,
"eval_samples_per_second": 34.041,
"eval_steps_per_second": 4.255,
"eval_wer": 0.3565983534207443,
"step": 28000
},
{
"epoch": 5.4394115369725125,
"eval_loss": 0.5067318677902222,
"eval_runtime": 165.9899,
"eval_samples_per_second": 34.074,
"eval_steps_per_second": 4.259,
"eval_wer": 0.3506122514483799,
"step": 28100
},
{
"epoch": 5.45876887340302,
"eval_loss": 0.5053198337554932,
"eval_runtime": 165.3351,
"eval_samples_per_second": 34.209,
"eval_steps_per_second": 4.276,
"eval_wer": 0.35000240727961357,
"step": 28200
},
{
"epoch": 5.478126209833527,
"eval_loss": 0.5077947974205017,
"eval_runtime": 165.3012,
"eval_samples_per_second": 34.216,
"eval_steps_per_second": 4.277,
"eval_wer": 0.3518640368474266,
"step": 28300
},
{
"epoch": 5.497483546264034,
"eval_loss": 0.48453789949417114,
"eval_runtime": 165.2767,
"eval_samples_per_second": 34.221,
"eval_steps_per_second": 4.278,
"eval_wer": 0.3375166503506604,
"step": 28400
},
{
"epoch": 5.516840882694542,
"grad_norm": 0.43120303750038147,
"learning_rate": 1.5457627118644067e-05,
"loss": 0.3136,
"step": 28500
},
{
"epoch": 5.516840882694542,
"eval_loss": 0.4930485486984253,
"eval_runtime": 165.9777,
"eval_samples_per_second": 34.077,
"eval_steps_per_second": 4.26,
"eval_wer": 0.3439842082457351,
"step": 28500
},
{
"epoch": 5.5361982191250485,
"eval_loss": 0.5025920271873474,
"eval_runtime": 165.742,
"eval_samples_per_second": 34.125,
"eval_steps_per_second": 4.266,
"eval_wer": 0.35122209561714623,
"step": 28600
},
{
"epoch": 5.555555555555555,
"eval_loss": 0.5056036710739136,
"eval_runtime": 165.717,
"eval_samples_per_second": 34.13,
"eval_steps_per_second": 4.266,
"eval_wer": 0.3518800853781836,
"step": 28700
},
{
"epoch": 5.574912891986063,
"eval_loss": 0.5090658068656921,
"eval_runtime": 165.5604,
"eval_samples_per_second": 34.163,
"eval_steps_per_second": 4.27,
"eval_wer": 0.3546404326683892,
"step": 28800
},
{
"epoch": 5.59427022841657,
"eval_loss": 0.5027741193771362,
"eval_runtime": 170.2845,
"eval_samples_per_second": 33.215,
"eval_steps_per_second": 4.152,
"eval_wer": 0.34952095135690325,
"step": 28900
},
{
"epoch": 5.613627564847077,
"grad_norm": 1.859834909439087,
"learning_rate": 1.0372881355932203e-05,
"loss": 0.3092,
"step": 29000
},
{
"epoch": 5.613627564847077,
"eval_loss": 0.505651593208313,
"eval_runtime": 164.9869,
"eval_samples_per_second": 34.282,
"eval_steps_per_second": 4.285,
"eval_wer": 0.3509974161865481,
"step": 29000
},
{
"epoch": 5.6329849012775846,
"eval_loss": 0.5085631608963013,
"eval_runtime": 165.6325,
"eval_samples_per_second": 34.148,
"eval_steps_per_second": 4.268,
"eval_wer": 0.3532923560848004,
"step": 29100
},
{
"epoch": 5.652342237708091,
"eval_loss": 0.5055486559867859,
"eval_runtime": 165.6348,
"eval_samples_per_second": 34.147,
"eval_steps_per_second": 4.268,
"eval_wer": 0.35144677504774435,
"step": 29200
},
{
"epoch": 5.671699574138598,
"eval_loss": 0.5133376717567444,
"eval_runtime": 166.8503,
"eval_samples_per_second": 33.899,
"eval_steps_per_second": 4.237,
"eval_wer": 0.35765755645070696,
"step": 29300
},
{
"epoch": 5.691056910569106,
"eval_loss": 0.5129527449607849,
"eval_runtime": 165.063,
"eval_samples_per_second": 34.266,
"eval_steps_per_second": 4.283,
"eval_wer": 0.35703166375118356,
"step": 29400
},
{
"epoch": 5.710414246999613,
"grad_norm": 1.5260862112045288,
"learning_rate": 5.288135593220339e-06,
"loss": 0.3152,
"step": 29500
},
{
"epoch": 5.710414246999613,
"eval_loss": 0.5147610902786255,
"eval_runtime": 165.1582,
"eval_samples_per_second": 34.246,
"eval_steps_per_second": 4.281,
"eval_wer": 0.3581390123734172,
"step": 29500
},
{
"epoch": 5.72977158343012,
"eval_loss": 0.5114809274673462,
"eval_runtime": 165.7617,
"eval_samples_per_second": 34.121,
"eval_steps_per_second": 4.265,
"eval_wer": 0.3554589077369967,
"step": 29600
},
{
"epoch": 5.7491289198606275,
"eval_loss": 0.5053985714912415,
"eval_runtime": 165.7121,
"eval_samples_per_second": 34.131,
"eval_steps_per_second": 4.266,
"eval_wer": 0.35263436632376305,
"step": 29700
},
{
"epoch": 5.768486256291134,
"eval_loss": 0.5080947279930115,
"eval_runtime": 165.6502,
"eval_samples_per_second": 34.144,
"eval_steps_per_second": 4.268,
"eval_wer": 0.3535651811076696,
"step": 29800
},
{
"epoch": 5.787843592721641,
"eval_loss": 0.5076740384101868,
"eval_runtime": 164.8589,
"eval_samples_per_second": 34.308,
"eval_steps_per_second": 4.289,
"eval_wer": 0.35348493845388457,
"step": 29900
},
{
"epoch": 5.807200929152149,
"grad_norm": 24.957311630249023,
"learning_rate": 2.0338983050847458e-07,
"loss": 0.3085,
"step": 30000
},
{
"epoch": 5.807200929152149,
"eval_loss": 0.5066753029823303,
"eval_runtime": 165.8811,
"eval_samples_per_second": 34.097,
"eval_steps_per_second": 4.262,
"eval_wer": 0.35224920158559486,
"step": 30000
},
{
"epoch": 5.807200929152149,
"step": 30000,
"total_flos": 3.3745707679449666e+19,
"train_loss": 0.49102539647420246,
"train_runtime": 61359.0363,
"train_samples_per_second": 3.911,
"train_steps_per_second": 0.489
}
],
"logging_steps": 500,
"max_steps": 30000,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 400,
"total_flos": 3.3745707679449666e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}