|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.807200929152149, |
|
"eval_steps": 100, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019357336430507164, |
|
"eval_loss": 3.567659854888916, |
|
"eval_runtime": 160.9588, |
|
"eval_samples_per_second": 35.139, |
|
"eval_steps_per_second": 4.392, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03871467286101433, |
|
"eval_loss": 3.0471677780151367, |
|
"eval_runtime": 158.7973, |
|
"eval_samples_per_second": 35.618, |
|
"eval_steps_per_second": 4.452, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05807200929152149, |
|
"eval_loss": 2.9665186405181885, |
|
"eval_runtime": 159.308, |
|
"eval_samples_per_second": 35.504, |
|
"eval_steps_per_second": 4.438, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07742934572202866, |
|
"eval_loss": 2.464332103729248, |
|
"eval_runtime": 159.8297, |
|
"eval_samples_per_second": 35.388, |
|
"eval_steps_per_second": 4.423, |
|
"eval_wer": 0.9813195101988413, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"grad_norm": 6.005111217498779, |
|
"learning_rate": 0.00029759999999999997, |
|
"loss": 4.1279, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"eval_loss": 1.625333547592163, |
|
"eval_runtime": 160.6655, |
|
"eval_samples_per_second": 35.204, |
|
"eval_steps_per_second": 4.4, |
|
"eval_wer": 0.9345380430421595, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11614401858304298, |
|
"eval_loss": 1.24808931350708, |
|
"eval_runtime": 160.4402, |
|
"eval_samples_per_second": 35.253, |
|
"eval_steps_per_second": 4.407, |
|
"eval_wer": 0.8190528157147213, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13550135501355012, |
|
"eval_loss": 1.0997203588485718, |
|
"eval_runtime": 161.1872, |
|
"eval_samples_per_second": 35.09, |
|
"eval_steps_per_second": 4.386, |
|
"eval_wer": 0.7769735680698432, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1548586914440573, |
|
"eval_loss": 1.0475263595581055, |
|
"eval_runtime": 161.0626, |
|
"eval_samples_per_second": 35.117, |
|
"eval_steps_per_second": 4.39, |
|
"eval_wer": 0.7339795541718156, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"eval_loss": 0.9692754149436951, |
|
"eval_runtime": 161.3683, |
|
"eval_samples_per_second": 35.05, |
|
"eval_steps_per_second": 4.381, |
|
"eval_wer": 0.7012565999582738, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"grad_norm": 2.414348602294922, |
|
"learning_rate": 0.0002949762711864406, |
|
"loss": 1.0598, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"eval_loss": 0.911480724811554, |
|
"eval_runtime": 162.0835, |
|
"eval_samples_per_second": 34.896, |
|
"eval_steps_per_second": 4.362, |
|
"eval_wer": 0.6749049124552647, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2129307007355788, |
|
"eval_loss": 0.8823792338371277, |
|
"eval_runtime": 161.5426, |
|
"eval_samples_per_second": 35.012, |
|
"eval_steps_per_second": 4.377, |
|
"eval_wer": 0.65625651971562, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"eval_loss": 0.8609552383422852, |
|
"eval_runtime": 161.6778, |
|
"eval_samples_per_second": 34.983, |
|
"eval_steps_per_second": 4.373, |
|
"eval_wer": 0.6431288215563865, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2516453735965931, |
|
"eval_loss": 0.8330459594726562, |
|
"eval_runtime": 161.7974, |
|
"eval_samples_per_second": 34.957, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.6114169247805363, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27100271002710025, |
|
"eval_loss": 0.8172780871391296, |
|
"eval_runtime": 161.4977, |
|
"eval_samples_per_second": 35.022, |
|
"eval_steps_per_second": 4.378, |
|
"eval_wer": 0.6017396607340598, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"grad_norm": 4.460846424102783, |
|
"learning_rate": 0.0002898915254237288, |
|
"loss": 0.8546, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"eval_loss": 0.8102588653564453, |
|
"eval_runtime": 161.397, |
|
"eval_samples_per_second": 35.044, |
|
"eval_steps_per_second": 4.381, |
|
"eval_wer": 0.6139204955786298, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3097173828881146, |
|
"eval_loss": 0.7860382795333862, |
|
"eval_runtime": 162.0586, |
|
"eval_samples_per_second": 34.901, |
|
"eval_steps_per_second": 4.363, |
|
"eval_wer": 0.6077739082986953, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.32907471931862176, |
|
"eval_loss": 0.857550323009491, |
|
"eval_runtime": 161.2906, |
|
"eval_samples_per_second": 35.067, |
|
"eval_steps_per_second": 4.383, |
|
"eval_wer": 0.5990114105053682, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"eval_loss": 0.7555657029151917, |
|
"eval_runtime": 161.7718, |
|
"eval_samples_per_second": 34.963, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.5773298454526488, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3677893921796361, |
|
"eval_loss": 0.7365372180938721, |
|
"eval_runtime": 162.1133, |
|
"eval_samples_per_second": 34.889, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.5825777150101908, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"grad_norm": 3.4646999835968018, |
|
"learning_rate": 0.0002848067796610169, |
|
"loss": 0.7776, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"eval_loss": 0.7291606068611145, |
|
"eval_runtime": 162.354, |
|
"eval_samples_per_second": 34.837, |
|
"eval_steps_per_second": 4.355, |
|
"eval_wer": 0.5551989215387332, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"eval_loss": 0.716595470905304, |
|
"eval_runtime": 162.461, |
|
"eval_samples_per_second": 34.815, |
|
"eval_steps_per_second": 4.352, |
|
"eval_wer": 0.5385726436744716, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4258614014711576, |
|
"eval_loss": 0.7117305397987366, |
|
"eval_runtime": 161.9033, |
|
"eval_samples_per_second": 34.934, |
|
"eval_steps_per_second": 4.367, |
|
"eval_wer": 0.5401774967501726, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4452187379016647, |
|
"eval_loss": 0.7060667872428894, |
|
"eval_runtime": 162.0901, |
|
"eval_samples_per_second": 34.894, |
|
"eval_steps_per_second": 4.362, |
|
"eval_wer": 0.5388294201665838, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"eval_loss": 0.7044907212257385, |
|
"eval_runtime": 162.5576, |
|
"eval_samples_per_second": 34.794, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.5364060920222754, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"grad_norm": 2.70296573638916, |
|
"learning_rate": 0.00027972203389830505, |
|
"loss": 0.706, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"eval_loss": 0.7062936425209045, |
|
"eval_runtime": 162.4753, |
|
"eval_samples_per_second": 34.811, |
|
"eval_steps_per_second": 4.351, |
|
"eval_wer": 0.5428736499173501, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5032907471931862, |
|
"eval_loss": 0.6941363215446472, |
|
"eval_runtime": 162.6699, |
|
"eval_samples_per_second": 34.77, |
|
"eval_steps_per_second": 4.346, |
|
"eval_wer": 0.5433872029015744, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"eval_loss": 0.6840428113937378, |
|
"eval_runtime": 162.5617, |
|
"eval_samples_per_second": 34.793, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.5203094156729952, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"eval_loss": 0.6902298331260681, |
|
"eval_runtime": 162.7532, |
|
"eval_samples_per_second": 34.752, |
|
"eval_steps_per_second": 4.344, |
|
"eval_wer": 0.5593715395355555, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5613627564847077, |
|
"eval_loss": 0.6594961881637573, |
|
"eval_runtime": 163.1259, |
|
"eval_samples_per_second": 34.673, |
|
"eval_steps_per_second": 4.334, |
|
"eval_wer": 0.5149171093386401, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"grad_norm": 4.962900161743164, |
|
"learning_rate": 0.0002746372881355932, |
|
"loss": 0.7002, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"eval_loss": 0.6767885088920593, |
|
"eval_runtime": 162.9945, |
|
"eval_samples_per_second": 34.701, |
|
"eval_steps_per_second": 4.338, |
|
"eval_wer": 0.525284460207668, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6000774293457221, |
|
"eval_loss": 0.6656874418258667, |
|
"eval_runtime": 163.0572, |
|
"eval_samples_per_second": 34.687, |
|
"eval_steps_per_second": 4.336, |
|
"eval_wer": 0.5063953395066682, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6194347657762292, |
|
"eval_loss": 0.6758668422698975, |
|
"eval_runtime": 163.796, |
|
"eval_samples_per_second": 34.531, |
|
"eval_steps_per_second": 4.316, |
|
"eval_wer": 0.5409478262265089, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6387921022067363, |
|
"eval_loss": 0.6709346175193787, |
|
"eval_runtime": 162.7448, |
|
"eval_samples_per_second": 34.754, |
|
"eval_steps_per_second": 4.344, |
|
"eval_wer": 0.5090914926738457, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6581494386372435, |
|
"eval_loss": 0.6478992104530334, |
|
"eval_runtime": 163.2374, |
|
"eval_samples_per_second": 34.649, |
|
"eval_steps_per_second": 4.331, |
|
"eval_wer": 0.5037473319317617, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"grad_norm": 3.27418851852417, |
|
"learning_rate": 0.0002695525423728813, |
|
"loss": 0.685, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"eval_loss": 0.6378278136253357, |
|
"eval_runtime": 162.9066, |
|
"eval_samples_per_second": 34.719, |
|
"eval_steps_per_second": 4.34, |
|
"eval_wer": 0.5033782157243505, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_loss": 0.6492822170257568, |
|
"eval_runtime": 162.8688, |
|
"eval_samples_per_second": 34.727, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.49883648152011684, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.716221447928765, |
|
"eval_loss": 0.6340391635894775, |
|
"eval_runtime": 163.0198, |
|
"eval_samples_per_second": 34.695, |
|
"eval_steps_per_second": 4.337, |
|
"eval_wer": 0.4832694066858179, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7355787843592722, |
|
"eval_loss": 0.6226627826690674, |
|
"eval_runtime": 164.0506, |
|
"eval_samples_per_second": 34.477, |
|
"eval_steps_per_second": 4.31, |
|
"eval_wer": 0.47354399704707034, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7549361207897793, |
|
"eval_loss": 0.6257476210594177, |
|
"eval_runtime": 167.0907, |
|
"eval_samples_per_second": 33.85, |
|
"eval_steps_per_second": 4.231, |
|
"eval_wer": 0.49068382789555615, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"grad_norm": 5.494376182556152, |
|
"learning_rate": 0.0002644677966101695, |
|
"loss": 0.6655, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"eval_loss": 0.6420141458511353, |
|
"eval_runtime": 163.4141, |
|
"eval_samples_per_second": 34.611, |
|
"eval_steps_per_second": 4.326, |
|
"eval_wer": 0.49987963601932245, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"eval_loss": 0.6111469268798828, |
|
"eval_runtime": 163.5321, |
|
"eval_samples_per_second": 34.586, |
|
"eval_steps_per_second": 4.323, |
|
"eval_wer": 0.4790646916274815, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"eval_loss": 0.6136205196380615, |
|
"eval_runtime": 163.9442, |
|
"eval_samples_per_second": 34.5, |
|
"eval_steps_per_second": 4.312, |
|
"eval_wer": 0.48073373882621045, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.832365466511808, |
|
"eval_loss": 0.6218396425247192, |
|
"eval_runtime": 163.3391, |
|
"eval_samples_per_second": 34.627, |
|
"eval_steps_per_second": 4.328, |
|
"eval_wer": 0.48596555985299544, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8517228029423152, |
|
"eval_loss": 0.6084252595901489, |
|
"eval_runtime": 162.8945, |
|
"eval_samples_per_second": 34.722, |
|
"eval_steps_per_second": 4.34, |
|
"eval_wer": 0.4585386207892667, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 3.0379676818847656, |
|
"learning_rate": 0.0002593830508474576, |
|
"loss": 0.6431, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"eval_loss": 0.6008957624435425, |
|
"eval_runtime": 163.8125, |
|
"eval_samples_per_second": 34.527, |
|
"eval_steps_per_second": 4.316, |
|
"eval_wer": 0.4627593843783602, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8904374758033294, |
|
"eval_loss": 0.6009930968284607, |
|
"eval_runtime": 163.1039, |
|
"eval_samples_per_second": 34.677, |
|
"eval_steps_per_second": 4.335, |
|
"eval_wer": 0.46295196674744427, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9097948122338366, |
|
"eval_loss": 0.5823432207107544, |
|
"eval_runtime": 163.3804, |
|
"eval_samples_per_second": 34.619, |
|
"eval_steps_per_second": 4.327, |
|
"eval_wer": 0.45035387010319206, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"eval_loss": 0.6118789315223694, |
|
"eval_runtime": 163.7255, |
|
"eval_samples_per_second": 34.546, |
|
"eval_steps_per_second": 4.318, |
|
"eval_wer": 0.4630001123397153, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.948509485094851, |
|
"eval_loss": 0.6001989245414734, |
|
"eval_runtime": 163.3492, |
|
"eval_samples_per_second": 34.625, |
|
"eval_steps_per_second": 4.328, |
|
"eval_wer": 0.4600150856189116, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"grad_norm": 3.1605985164642334, |
|
"learning_rate": 0.00025430847457627115, |
|
"loss": 0.6235, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"eval_loss": 0.5892329216003418, |
|
"eval_runtime": 163.5255, |
|
"eval_samples_per_second": 34.588, |
|
"eval_steps_per_second": 4.323, |
|
"eval_wer": 0.4551844778610518, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9872241579558653, |
|
"eval_loss": 0.5673592686653137, |
|
"eval_runtime": 163.343, |
|
"eval_samples_per_second": 34.627, |
|
"eval_steps_per_second": 4.328, |
|
"eval_wer": 0.44889345380430423, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.0065814943863725, |
|
"eval_loss": 0.5792257785797119, |
|
"eval_runtime": 162.8592, |
|
"eval_samples_per_second": 34.729, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.43167338030203334, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.0259388308168795, |
|
"eval_loss": 0.5752869844436646, |
|
"eval_runtime": 162.7856, |
|
"eval_samples_per_second": 34.745, |
|
"eval_steps_per_second": 4.343, |
|
"eval_wer": 0.43331033043924827, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.0452961672473868, |
|
"eval_loss": 0.5698733925819397, |
|
"eval_runtime": 161.9949, |
|
"eval_samples_per_second": 34.915, |
|
"eval_steps_per_second": 4.364, |
|
"eval_wer": 0.44619730063712665, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"grad_norm": 0.8791279792785645, |
|
"learning_rate": 0.0002492338983050847, |
|
"loss": 0.5527, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"eval_loss": 0.5666691660881042, |
|
"eval_runtime": 162.0517, |
|
"eval_samples_per_second": 34.902, |
|
"eval_steps_per_second": 4.363, |
|
"eval_wer": 0.43639164834459404, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.084010840108401, |
|
"eval_loss": 0.5558171272277832, |
|
"eval_runtime": 161.9847, |
|
"eval_samples_per_second": 34.917, |
|
"eval_steps_per_second": 4.365, |
|
"eval_wer": 0.42945868305756607, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.1033681765389083, |
|
"eval_loss": 0.5602455139160156, |
|
"eval_runtime": 162.701, |
|
"eval_samples_per_second": 34.763, |
|
"eval_steps_per_second": 4.345, |
|
"eval_wer": 0.422349183932211, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.1227255129694154, |
|
"eval_loss": 0.559140145778656, |
|
"eval_runtime": 162.4402, |
|
"eval_samples_per_second": 34.819, |
|
"eval_steps_per_second": 4.352, |
|
"eval_wer": 0.41942835133443535, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.1420828493999227, |
|
"eval_loss": 0.5399234890937805, |
|
"eval_runtime": 162.3316, |
|
"eval_samples_per_second": 34.842, |
|
"eval_steps_per_second": 4.355, |
|
"eval_wer": 0.418818507165669, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"grad_norm": 0.9803772568702698, |
|
"learning_rate": 0.00024414915254237287, |
|
"loss": 0.533, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"eval_loss": 0.545900821685791, |
|
"eval_runtime": 161.6822, |
|
"eval_samples_per_second": 34.982, |
|
"eval_steps_per_second": 4.373, |
|
"eval_wer": 0.431063536133267, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1807975222609368, |
|
"eval_loss": 0.5347985625267029, |
|
"eval_runtime": 161.8121, |
|
"eval_samples_per_second": 34.954, |
|
"eval_steps_per_second": 4.369, |
|
"eval_wer": 0.41183739628637, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.2001548586914441, |
|
"eval_loss": 0.5453631281852722, |
|
"eval_runtime": 161.8802, |
|
"eval_samples_per_second": 34.939, |
|
"eval_steps_per_second": 4.367, |
|
"eval_wer": 0.4176309158896503, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"eval_loss": 0.5442932844161987, |
|
"eval_runtime": 162.1767, |
|
"eval_samples_per_second": 34.876, |
|
"eval_steps_per_second": 4.359, |
|
"eval_wer": 0.42157885445587456, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.2388695315524583, |
|
"eval_loss": 0.5382806658744812, |
|
"eval_runtime": 161.5364, |
|
"eval_samples_per_second": 35.014, |
|
"eval_steps_per_second": 4.377, |
|
"eval_wer": 0.40962269904190274, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"grad_norm": 2.7026009559631348, |
|
"learning_rate": 0.00023906440677966102, |
|
"loss": 0.5228, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"eval_loss": 0.540704071521759, |
|
"eval_runtime": 161.6157, |
|
"eval_samples_per_second": 34.997, |
|
"eval_steps_per_second": 4.375, |
|
"eval_wer": 0.41260772576270643, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2775842044134726, |
|
"eval_loss": 0.5527251362800598, |
|
"eval_runtime": 161.6568, |
|
"eval_samples_per_second": 34.988, |
|
"eval_steps_per_second": 4.373, |
|
"eval_wer": 0.41426072443067835, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.29694154084398, |
|
"eval_loss": 0.5312824845314026, |
|
"eval_runtime": 161.7821, |
|
"eval_samples_per_second": 34.961, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.40811413715074385, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.316298877274487, |
|
"eval_loss": 0.533909797668457, |
|
"eval_runtime": 161.8361, |
|
"eval_samples_per_second": 34.949, |
|
"eval_steps_per_second": 4.369, |
|
"eval_wer": 0.4150471024377718, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.3356562137049943, |
|
"eval_loss": 0.523649275302887, |
|
"eval_runtime": 161.5476, |
|
"eval_samples_per_second": 35.011, |
|
"eval_steps_per_second": 4.376, |
|
"eval_wer": 0.4120781242477251, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"grad_norm": 0.709751546382904, |
|
"learning_rate": 0.00023397966101694912, |
|
"loss": 0.5204, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"eval_loss": 0.5527586340904236, |
|
"eval_runtime": 162.1137, |
|
"eval_samples_per_second": 34.889, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.4165877613904447, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3743708865660085, |
|
"eval_loss": 0.5330629944801331, |
|
"eval_runtime": 161.681, |
|
"eval_samples_per_second": 34.982, |
|
"eval_steps_per_second": 4.373, |
|
"eval_wer": 0.40559451782189343, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"eval_loss": 0.5242415070533752, |
|
"eval_runtime": 162.2742, |
|
"eval_samples_per_second": 34.855, |
|
"eval_steps_per_second": 4.357, |
|
"eval_wer": 0.4058833913755196, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.4130855594270229, |
|
"eval_loss": 0.5309507250785828, |
|
"eval_runtime": 163.2224, |
|
"eval_samples_per_second": 34.652, |
|
"eval_steps_per_second": 4.332, |
|
"eval_wer": 0.4092856798960055, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.43244289585753, |
|
"eval_loss": 0.5278186798095703, |
|
"eval_runtime": 162.9755, |
|
"eval_samples_per_second": 34.705, |
|
"eval_steps_per_second": 4.338, |
|
"eval_wer": 0.4063006531752018, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"grad_norm": 0.9905166029930115, |
|
"learning_rate": 0.00022889491525423728, |
|
"loss": 0.5199, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"eval_loss": 0.5168124437332153, |
|
"eval_runtime": 162.1336, |
|
"eval_samples_per_second": 34.885, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.3955802346295197, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4711575687185443, |
|
"eval_loss": 0.5236623287200928, |
|
"eval_runtime": 161.1426, |
|
"eval_samples_per_second": 35.099, |
|
"eval_steps_per_second": 4.387, |
|
"eval_wer": 0.40235271460897754, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.4905149051490514, |
|
"eval_loss": 0.5316073894500732, |
|
"eval_runtime": 162.1382, |
|
"eval_samples_per_second": 34.884, |
|
"eval_steps_per_second": 4.36, |
|
"eval_wer": 0.4179358379740335, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.5098722415795587, |
|
"eval_loss": 0.5182381868362427, |
|
"eval_runtime": 161.7911, |
|
"eval_samples_per_second": 34.959, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.40326748086212705, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.5292295780100658, |
|
"eval_loss": 0.5175392627716064, |
|
"eval_runtime": 161.5725, |
|
"eval_samples_per_second": 35.006, |
|
"eval_steps_per_second": 4.376, |
|
"eval_wer": 0.3983887275119963, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"grad_norm": 0.8261615037918091, |
|
"learning_rate": 0.00022382033898305084, |
|
"loss": 0.5066, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"eval_loss": 0.5138476490974426, |
|
"eval_runtime": 162.4044, |
|
"eval_samples_per_second": 34.827, |
|
"eval_steps_per_second": 4.353, |
|
"eval_wer": 0.39492224486848226, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5679442508710801, |
|
"eval_loss": 0.515566885471344, |
|
"eval_runtime": 162.5299, |
|
"eval_samples_per_second": 34.8, |
|
"eval_steps_per_second": 4.35, |
|
"eval_wer": 0.4016305307249121, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"eval_loss": 0.5131089091300964, |
|
"eval_runtime": 162.835, |
|
"eval_samples_per_second": 34.735, |
|
"eval_steps_per_second": 4.342, |
|
"eval_wer": 0.39793936865080004, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.6066589237320945, |
|
"eval_loss": 0.5139849185943604, |
|
"eval_runtime": 162.5015, |
|
"eval_samples_per_second": 34.806, |
|
"eval_steps_per_second": 4.351, |
|
"eval_wer": 0.39413586686138885, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.5224258303642273, |
|
"eval_runtime": 162.1349, |
|
"eval_samples_per_second": 34.885, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.39853316428880936, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"grad_norm": 1.0760446786880493, |
|
"learning_rate": 0.00021873559322033897, |
|
"loss": 0.502, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"eval_loss": 0.5274536609649658, |
|
"eval_runtime": 162.3162, |
|
"eval_samples_per_second": 34.846, |
|
"eval_steps_per_second": 4.356, |
|
"eval_wer": 0.40023430854905234, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.664730933023616, |
|
"eval_loss": 0.5054244995117188, |
|
"eval_runtime": 162.7623, |
|
"eval_samples_per_second": 34.75, |
|
"eval_steps_per_second": 4.344, |
|
"eval_wer": 0.3860955529521272, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.684088269454123, |
|
"eval_loss": 0.5144466161727905, |
|
"eval_runtime": 162.1114, |
|
"eval_samples_per_second": 34.89, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.3912631798558842, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.7034456058846303, |
|
"eval_loss": 0.5017980933189392, |
|
"eval_runtime": 162.1144, |
|
"eval_samples_per_second": 34.889, |
|
"eval_steps_per_second": 4.361, |
|
"eval_wer": 0.3860955529521272, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.7228029423151374, |
|
"eval_loss": 0.5001707673072815, |
|
"eval_runtime": 162.7958, |
|
"eval_samples_per_second": 34.743, |
|
"eval_steps_per_second": 4.343, |
|
"eval_wer": 0.39978494968785605, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"grad_norm": 2.3791110515594482, |
|
"learning_rate": 0.00021366101694915253, |
|
"loss": 0.4965, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"eval_loss": 0.5074877142906189, |
|
"eval_runtime": 162.5487, |
|
"eval_samples_per_second": 34.796, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.38896823995763186, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7615176151761518, |
|
"eval_loss": 0.4928957521915436, |
|
"eval_runtime": 162.2035, |
|
"eval_samples_per_second": 34.87, |
|
"eval_steps_per_second": 4.359, |
|
"eval_wer": 0.3865449118133235, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.7808749516066589, |
|
"eval_loss": 0.49622705578804016, |
|
"eval_runtime": 162.8808, |
|
"eval_samples_per_second": 34.725, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.38559804849866, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.800232288037166, |
|
"eval_loss": 0.49036508798599243, |
|
"eval_runtime": 162.3886, |
|
"eval_samples_per_second": 34.83, |
|
"eval_steps_per_second": 4.354, |
|
"eval_wer": 0.3759689300444544, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.8195896244676733, |
|
"eval_loss": 0.49964088201522827, |
|
"eval_runtime": 162.526, |
|
"eval_samples_per_second": 34.801, |
|
"eval_steps_per_second": 4.35, |
|
"eval_wer": 0.3901237341721365, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"grad_norm": 1.2548748254776, |
|
"learning_rate": 0.00020857627118644066, |
|
"loss": 0.4776, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"eval_loss": 0.4899130165576935, |
|
"eval_runtime": 162.5723, |
|
"eval_samples_per_second": 34.791, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.37616151241353857, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8583042973286876, |
|
"eval_loss": 0.4918155074119568, |
|
"eval_runtime": 162.1914, |
|
"eval_samples_per_second": 34.872, |
|
"eval_steps_per_second": 4.359, |
|
"eval_wer": 0.37948355828023944, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8776616337591947, |
|
"eval_loss": 0.49148374795913696, |
|
"eval_runtime": 162.2813, |
|
"eval_samples_per_second": 34.853, |
|
"eval_steps_per_second": 4.357, |
|
"eval_wer": 0.37980452889537963, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.897018970189702, |
|
"eval_loss": 0.4841060936450958, |
|
"eval_runtime": 162.2556, |
|
"eval_samples_per_second": 34.859, |
|
"eval_steps_per_second": 4.357, |
|
"eval_wer": 0.37060872077161333, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.916376306620209, |
|
"eval_loss": 0.4834117293357849, |
|
"eval_runtime": 163.5813, |
|
"eval_samples_per_second": 34.576, |
|
"eval_steps_per_second": 4.322, |
|
"eval_wer": 0.37728490956652916, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"grad_norm": 1.011767029762268, |
|
"learning_rate": 0.00020349152542372878, |
|
"loss": 0.4752, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"eval_loss": 0.4831894338130951, |
|
"eval_runtime": 162.4305, |
|
"eval_samples_per_second": 34.821, |
|
"eval_steps_per_second": 4.353, |
|
"eval_wer": 0.3711704193481087, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9550909794812235, |
|
"eval_loss": 0.4890592396259308, |
|
"eval_runtime": 162.3568, |
|
"eval_samples_per_second": 34.837, |
|
"eval_steps_per_second": 4.355, |
|
"eval_wer": 0.37829596700422075, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.9744483159117305, |
|
"eval_loss": 0.4786697328090668, |
|
"eval_runtime": 163.1452, |
|
"eval_samples_per_second": 34.669, |
|
"eval_steps_per_second": 4.334, |
|
"eval_wer": 0.3783441125964918, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.9938056523422376, |
|
"eval_loss": 0.4726457893848419, |
|
"eval_runtime": 162.2371, |
|
"eval_samples_per_second": 34.863, |
|
"eval_steps_per_second": 4.358, |
|
"eval_wer": 0.37141114730946384, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.013162988772745, |
|
"eval_loss": 0.49166908860206604, |
|
"eval_runtime": 162.2356, |
|
"eval_samples_per_second": 34.863, |
|
"eval_steps_per_second": 4.358, |
|
"eval_wer": 0.37320858275424884, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"grad_norm": 0.9389815926551819, |
|
"learning_rate": 0.00019840677966101694, |
|
"loss": 0.4587, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"eval_loss": 0.48015162348747253, |
|
"eval_runtime": 162.7857, |
|
"eval_samples_per_second": 34.745, |
|
"eval_steps_per_second": 4.343, |
|
"eval_wer": 0.37264688417775355, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.051877661633759, |
|
"eval_loss": 0.4883776903152466, |
|
"eval_runtime": 162.7407, |
|
"eval_samples_per_second": 34.755, |
|
"eval_steps_per_second": 4.344, |
|
"eval_wer": 0.3825327791240712, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.0712349980642664, |
|
"eval_loss": 0.4841337502002716, |
|
"eval_runtime": 162.7772, |
|
"eval_samples_per_second": 34.747, |
|
"eval_steps_per_second": 4.343, |
|
"eval_wer": 0.37845645231179087, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.0905923344947737, |
|
"eval_loss": 0.4809282422065735, |
|
"eval_runtime": 162.8688, |
|
"eval_samples_per_second": 34.727, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.3738184269230152, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.1099496709252805, |
|
"eval_loss": 0.47966596484184265, |
|
"eval_runtime": 163.5921, |
|
"eval_samples_per_second": 34.574, |
|
"eval_steps_per_second": 4.322, |
|
"eval_wer": 0.3713469531864358, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.129307007355788, |
|
"grad_norm": 0.6634272933006287, |
|
"learning_rate": 0.0001933220338983051, |
|
"loss": 0.3967, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.129307007355788, |
|
"eval_loss": 0.4866289794445038, |
|
"eval_runtime": 162.8573, |
|
"eval_samples_per_second": 34.73, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.37497392113751987, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.148664343786295, |
|
"eval_loss": 0.4938376843929291, |
|
"eval_runtime": 163.5145, |
|
"eval_samples_per_second": 34.59, |
|
"eval_steps_per_second": 4.324, |
|
"eval_wer": 0.3749097270144918, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.168021680216802, |
|
"eval_loss": 0.48603999614715576, |
|
"eval_runtime": 162.8433, |
|
"eval_samples_per_second": 34.733, |
|
"eval_steps_per_second": 4.342, |
|
"eval_wer": 0.36796071319670687, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.1873790166473093, |
|
"eval_loss": 0.4849016070365906, |
|
"eval_runtime": 162.5249, |
|
"eval_samples_per_second": 34.801, |
|
"eval_steps_per_second": 4.35, |
|
"eval_wer": 0.369966779541333, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.2067363530778166, |
|
"eval_loss": 0.49077799916267395, |
|
"eval_runtime": 162.3783, |
|
"eval_samples_per_second": 34.832, |
|
"eval_steps_per_second": 4.354, |
|
"eval_wer": 0.36377204666912744, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.226093689508324, |
|
"grad_norm": 0.9132543206214905, |
|
"learning_rate": 0.0001882372881355932, |
|
"loss": 0.406, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.226093689508324, |
|
"eval_loss": 0.4797042906284332, |
|
"eval_runtime": 162.1749, |
|
"eval_samples_per_second": 34.876, |
|
"eval_steps_per_second": 4.359, |
|
"eval_wer": 0.3678644220121648, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.2454510259388307, |
|
"eval_loss": 0.48121991753578186, |
|
"eval_runtime": 161.7765, |
|
"eval_samples_per_second": 34.962, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.37585659032915536, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.264808362369338, |
|
"eval_loss": 0.47043049335479736, |
|
"eval_runtime": 163.1227, |
|
"eval_samples_per_second": 34.673, |
|
"eval_steps_per_second": 4.334, |
|
"eval_wer": 0.361268475871034, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.2841656987998453, |
|
"eval_loss": 0.4715932607650757, |
|
"eval_runtime": 163.2336, |
|
"eval_samples_per_second": 34.65, |
|
"eval_steps_per_second": 4.331, |
|
"eval_wer": 0.36345107605398724, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.303523035230352, |
|
"eval_loss": 0.4676753580570221, |
|
"eval_runtime": 163.378, |
|
"eval_samples_per_second": 34.619, |
|
"eval_steps_per_second": 4.327, |
|
"eval_wer": 0.3635473672385293, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.3228803716608595, |
|
"grad_norm": 0.6134137511253357, |
|
"learning_rate": 0.00018315254237288135, |
|
"loss": 0.4088, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.3228803716608595, |
|
"eval_loss": 0.47054949402809143, |
|
"eval_runtime": 162.7912, |
|
"eval_samples_per_second": 34.744, |
|
"eval_steps_per_second": 4.343, |
|
"eval_wer": 0.36399672609972555, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.3422377080913668, |
|
"eval_loss": 0.4782082140445709, |
|
"eval_runtime": 162.8776, |
|
"eval_samples_per_second": 34.725, |
|
"eval_steps_per_second": 4.341, |
|
"eval_wer": 0.35905377862656673, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.3615950445218736, |
|
"eval_loss": 0.4795554578304291, |
|
"eval_runtime": 163.357, |
|
"eval_samples_per_second": 34.624, |
|
"eval_steps_per_second": 4.328, |
|
"eval_wer": 0.36128452440179104, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"eval_loss": 0.47130346298217773, |
|
"eval_runtime": 163.1765, |
|
"eval_samples_per_second": 34.662, |
|
"eval_steps_per_second": 4.333, |
|
"eval_wer": 0.3558119754136509, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.4003097173828882, |
|
"eval_loss": 0.47632816433906555, |
|
"eval_runtime": 163.2851, |
|
"eval_samples_per_second": 34.639, |
|
"eval_steps_per_second": 4.33, |
|
"eval_wer": 0.3588772447882396, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.419667053813395, |
|
"grad_norm": 0.6301820874214172, |
|
"learning_rate": 0.0001780779661016949, |
|
"loss": 0.407, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.419667053813395, |
|
"eval_loss": 0.46899136900901794, |
|
"eval_runtime": 163.5219, |
|
"eval_samples_per_second": 34.589, |
|
"eval_steps_per_second": 4.324, |
|
"eval_wer": 0.3565181107669593, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"eval_loss": 0.4686334431171417, |
|
"eval_runtime": 163.9109, |
|
"eval_samples_per_second": 34.507, |
|
"eval_steps_per_second": 4.313, |
|
"eval_wer": 0.35767360498146394, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.4583817266744097, |
|
"eval_loss": 0.467680424451828, |
|
"eval_runtime": 163.7544, |
|
"eval_samples_per_second": 34.54, |
|
"eval_steps_per_second": 4.317, |
|
"eval_wer": 0.3584278859270434, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.4777390631049165, |
|
"eval_loss": 0.46144554018974304, |
|
"eval_runtime": 163.8198, |
|
"eval_samples_per_second": 34.526, |
|
"eval_steps_per_second": 4.316, |
|
"eval_wer": 0.35765755645070696, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.497096399535424, |
|
"eval_loss": 0.455834299325943, |
|
"eval_runtime": 163.2394, |
|
"eval_samples_per_second": 34.649, |
|
"eval_steps_per_second": 4.331, |
|
"eval_wer": 0.35992039928744524, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.516453735965931, |
|
"grad_norm": 0.41953468322753906, |
|
"learning_rate": 0.00017299322033898304, |
|
"loss": 0.3855, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.516453735965931, |
|
"eval_loss": 0.4555678367614746, |
|
"eval_runtime": 164.8785, |
|
"eval_samples_per_second": 34.304, |
|
"eval_steps_per_second": 4.288, |
|
"eval_wer": 0.3565341592977163, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.535811072396438, |
|
"eval_loss": 0.4600988030433655, |
|
"eval_runtime": 163.6778, |
|
"eval_samples_per_second": 34.556, |
|
"eval_steps_per_second": 4.319, |
|
"eval_wer": 0.3558280239444079, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.5551684088269453, |
|
"eval_loss": 0.4650043547153473, |
|
"eval_runtime": 163.58, |
|
"eval_samples_per_second": 34.576, |
|
"eval_steps_per_second": 4.322, |
|
"eval_wer": 0.354303413522492, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.5745257452574526, |
|
"eval_loss": 0.4737236201763153, |
|
"eval_runtime": 163.9532, |
|
"eval_samples_per_second": 34.498, |
|
"eval_steps_per_second": 4.312, |
|
"eval_wer": 0.35483301503747333, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.59388308168796, |
|
"eval_loss": 0.45056912302970886, |
|
"eval_runtime": 163.6462, |
|
"eval_samples_per_second": 34.562, |
|
"eval_steps_per_second": 4.32, |
|
"eval_wer": 0.3534367928616135, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.6132404181184667, |
|
"grad_norm": 1.5978127717971802, |
|
"learning_rate": 0.0001679186440677966, |
|
"loss": 0.3748, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.6132404181184667, |
|
"eval_loss": 0.4606887698173523, |
|
"eval_runtime": 163.0481, |
|
"eval_samples_per_second": 34.689, |
|
"eval_steps_per_second": 4.336, |
|
"eval_wer": 0.3589253903805107, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.632597754548974, |
|
"eval_loss": 0.45494645833969116, |
|
"eval_runtime": 162.9584, |
|
"eval_samples_per_second": 34.708, |
|
"eval_steps_per_second": 4.339, |
|
"eval_wer": 0.35372566641523967, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.6519550909794813, |
|
"eval_loss": 0.4562608301639557, |
|
"eval_runtime": 164.1384, |
|
"eval_samples_per_second": 34.459, |
|
"eval_steps_per_second": 4.307, |
|
"eval_wer": 0.36409301728426763, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.6713124274099886, |
|
"eval_loss": 0.44666969776153564, |
|
"eval_runtime": 163.7423, |
|
"eval_samples_per_second": 34.542, |
|
"eval_steps_per_second": 4.318, |
|
"eval_wer": 0.34369533469210894, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.6906697638404955, |
|
"eval_loss": 0.4536294639110565, |
|
"eval_runtime": 163.3892, |
|
"eval_samples_per_second": 34.617, |
|
"eval_steps_per_second": 4.327, |
|
"eval_wer": 0.35446389883006213, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.710027100271003, |
|
"grad_norm": 0.6591352224349976, |
|
"learning_rate": 0.00016283389830508475, |
|
"loss": 0.3888, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.710027100271003, |
|
"eval_loss": 0.4504217505455017, |
|
"eval_runtime": 163.3199, |
|
"eval_samples_per_second": 34.631, |
|
"eval_steps_per_second": 4.329, |
|
"eval_wer": 0.3509653191250341, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.72938443670151, |
|
"eval_loss": 0.44697660207748413, |
|
"eval_runtime": 163.5943, |
|
"eval_samples_per_second": 34.573, |
|
"eval_steps_per_second": 4.322, |
|
"eval_wer": 0.3602092728410714, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.748741773132017, |
|
"eval_loss": 0.45640549063682556, |
|
"eval_runtime": 163.6363, |
|
"eval_samples_per_second": 34.564, |
|
"eval_steps_per_second": 4.321, |
|
"eval_wer": 0.3539022002535668, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.7680991095625243, |
|
"eval_loss": 0.45214343070983887, |
|
"eval_runtime": 164.011, |
|
"eval_samples_per_second": 34.485, |
|
"eval_steps_per_second": 4.311, |
|
"eval_wer": 0.3561971401518191, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.7874564459930316, |
|
"eval_loss": 0.4452911913394928, |
|
"eval_runtime": 163.9028, |
|
"eval_samples_per_second": 34.508, |
|
"eval_steps_per_second": 4.314, |
|
"eval_wer": 0.35221710452408084, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.8068137824235384, |
|
"grad_norm": 0.6879103779792786, |
|
"learning_rate": 0.00015774915254237285, |
|
"loss": 0.376, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.8068137824235384, |
|
"eval_loss": 0.45518526434898376, |
|
"eval_runtime": 164.0636, |
|
"eval_samples_per_second": 34.474, |
|
"eval_steps_per_second": 4.309, |
|
"eval_wer": 0.35170355153985655, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.8261711188540457, |
|
"eval_loss": 0.45344606041908264, |
|
"eval_runtime": 163.5444, |
|
"eval_samples_per_second": 34.584, |
|
"eval_steps_per_second": 4.323, |
|
"eval_wer": 0.3549774518142864, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.845528455284553, |
|
"eval_loss": 0.45520055294036865, |
|
"eval_runtime": 163.3824, |
|
"eval_samples_per_second": 34.618, |
|
"eval_steps_per_second": 4.327, |
|
"eval_wer": 0.3405016770714641, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.86488579171506, |
|
"eval_loss": 0.45560306310653687, |
|
"eval_runtime": 164.0292, |
|
"eval_samples_per_second": 34.482, |
|
"eval_steps_per_second": 4.31, |
|
"eval_wer": 0.35138258092471636, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.884243128145567, |
|
"eval_loss": 0.44232824444770813, |
|
"eval_runtime": 164.3662, |
|
"eval_samples_per_second": 34.411, |
|
"eval_steps_per_second": 4.301, |
|
"eval_wer": 0.3467606040666977, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.9036004645760745, |
|
"grad_norm": 0.5280432105064392, |
|
"learning_rate": 0.00015267457627118642, |
|
"loss": 0.379, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.9036004645760745, |
|
"eval_loss": 0.43873003125190735, |
|
"eval_runtime": 163.5232, |
|
"eval_samples_per_second": 34.588, |
|
"eval_steps_per_second": 4.324, |
|
"eval_wer": 0.34268427725441736, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.9229578010065813, |
|
"eval_loss": 0.4372723400592804, |
|
"eval_runtime": 163.4018, |
|
"eval_samples_per_second": 34.614, |
|
"eval_steps_per_second": 4.327, |
|
"eval_wer": 0.34364718909983794, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.9423151374370886, |
|
"eval_loss": 0.4399470090866089, |
|
"eval_runtime": 164.335, |
|
"eval_samples_per_second": 34.418, |
|
"eval_steps_per_second": 4.302, |
|
"eval_wer": 0.33870424162667906, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.961672473867596, |
|
"eval_loss": 0.44378861784935, |
|
"eval_runtime": 164.0477, |
|
"eval_samples_per_second": 34.478, |
|
"eval_steps_per_second": 4.31, |
|
"eval_wer": 0.3380462518656417, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.9810298102981028, |
|
"eval_loss": 0.436974436044693, |
|
"eval_runtime": 163.6525, |
|
"eval_samples_per_second": 34.561, |
|
"eval_steps_per_second": 4.32, |
|
"eval_wer": 0.3430694419925856, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 3.00038714672861, |
|
"grad_norm": 0.660970151424408, |
|
"learning_rate": 0.00014758983050847457, |
|
"loss": 0.3731, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.00038714672861, |
|
"eval_loss": 0.43810611963272095, |
|
"eval_runtime": 162.9215, |
|
"eval_samples_per_second": 34.716, |
|
"eval_steps_per_second": 4.34, |
|
"eval_wer": 0.33413041036093144, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.0197444831591174, |
|
"eval_loss": 0.45139721035957336, |
|
"eval_runtime": 164.2748, |
|
"eval_samples_per_second": 34.43, |
|
"eval_steps_per_second": 4.304, |
|
"eval_wer": 0.3286418128420343, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 3.0391018195896247, |
|
"eval_loss": 0.43782538175582886, |
|
"eval_runtime": 164.2188, |
|
"eval_samples_per_second": 34.442, |
|
"eval_steps_per_second": 4.305, |
|
"eval_wer": 0.3340180706456324, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 3.0584591560201315, |
|
"eval_loss": 0.44340020418167114, |
|
"eval_runtime": 163.4459, |
|
"eval_samples_per_second": 34.605, |
|
"eval_steps_per_second": 4.326, |
|
"eval_wer": 0.3441446935533052, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 3.077816492450639, |
|
"eval_loss": 0.44192126393318176, |
|
"eval_runtime": 164.2009, |
|
"eval_samples_per_second": 34.446, |
|
"eval_steps_per_second": 4.306, |
|
"eval_wer": 0.3399239299642118, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 3.097173828881146, |
|
"grad_norm": 0.6999391913414001, |
|
"learning_rate": 0.0001425050847457627, |
|
"loss": 0.3176, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.097173828881146, |
|
"eval_loss": 0.4407601058483124, |
|
"eval_runtime": 164.7219, |
|
"eval_samples_per_second": 34.337, |
|
"eval_steps_per_second": 4.292, |
|
"eval_wer": 0.3335366147229221, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.116531165311653, |
|
"eval_loss": 0.436761736869812, |
|
"eval_runtime": 164.2581, |
|
"eval_samples_per_second": 34.434, |
|
"eval_steps_per_second": 4.304, |
|
"eval_wer": 0.33584760315193146, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 3.1358885017421603, |
|
"eval_loss": 0.4477560520172119, |
|
"eval_runtime": 163.8979, |
|
"eval_samples_per_second": 34.509, |
|
"eval_steps_per_second": 4.314, |
|
"eval_wer": 0.3400523182102678, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.1552458381726676, |
|
"eval_loss": 0.4414171576499939, |
|
"eval_runtime": 164.4332, |
|
"eval_samples_per_second": 34.397, |
|
"eval_steps_per_second": 4.3, |
|
"eval_wer": 0.3373882621046043, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 3.1746031746031744, |
|
"eval_loss": 0.4476623833179474, |
|
"eval_runtime": 163.6322, |
|
"eval_samples_per_second": 34.565, |
|
"eval_steps_per_second": 4.321, |
|
"eval_wer": 0.335013079552567, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 3.1939605110336817, |
|
"grad_norm": 0.5408484935760498, |
|
"learning_rate": 0.00013742033898305083, |
|
"loss": 0.3201, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.1939605110336817, |
|
"eval_loss": 0.4305751919746399, |
|
"eval_runtime": 163.8253, |
|
"eval_samples_per_second": 34.525, |
|
"eval_steps_per_second": 4.316, |
|
"eval_wer": 0.32917141435701563, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.213317847464189, |
|
"eval_loss": 0.4534677267074585, |
|
"eval_runtime": 163.8666, |
|
"eval_samples_per_second": 34.516, |
|
"eval_steps_per_second": 4.314, |
|
"eval_wer": 0.32941214231837074, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 3.2326751838946963, |
|
"eval_loss": 0.4379562437534332, |
|
"eval_runtime": 164.1184, |
|
"eval_samples_per_second": 34.463, |
|
"eval_steps_per_second": 4.308, |
|
"eval_wer": 0.33408226476866043, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"eval_loss": 0.43677663803100586, |
|
"eval_runtime": 163.6899, |
|
"eval_samples_per_second": 34.553, |
|
"eval_steps_per_second": 4.319, |
|
"eval_wer": 0.33252555728523053, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 3.2713898567557105, |
|
"eval_loss": 0.4359833598136902, |
|
"eval_runtime": 164.1551, |
|
"eval_samples_per_second": 34.455, |
|
"eval_steps_per_second": 4.307, |
|
"eval_wer": 0.33043924828681936, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 3.290747193186218, |
|
"grad_norm": 0.5537161231040955, |
|
"learning_rate": 0.00013233559322033898, |
|
"loss": 0.3101, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.290747193186218, |
|
"eval_loss": 0.4347226917743683, |
|
"eval_runtime": 164.2964, |
|
"eval_samples_per_second": 34.426, |
|
"eval_steps_per_second": 4.303, |
|
"eval_wer": 0.32812825985781, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.3101045296167246, |
|
"eval_loss": 0.4375491738319397, |
|
"eval_runtime": 162.8938, |
|
"eval_samples_per_second": 34.722, |
|
"eval_steps_per_second": 4.34, |
|
"eval_wer": 0.3284973760652212, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.329461866047232, |
|
"eval_loss": 0.4491961896419525, |
|
"eval_runtime": 163.5698, |
|
"eval_samples_per_second": 34.579, |
|
"eval_steps_per_second": 4.322, |
|
"eval_wer": 0.33032690857152025, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 3.3488192024777392, |
|
"eval_loss": 0.4268127977848053, |
|
"eval_runtime": 164.9468, |
|
"eval_samples_per_second": 34.29, |
|
"eval_steps_per_second": 4.286, |
|
"eval_wer": 0.3284652790037072, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 3.368176538908246, |
|
"eval_loss": 0.4377237558364868, |
|
"eval_runtime": 164.0847, |
|
"eval_samples_per_second": 34.47, |
|
"eval_steps_per_second": 4.309, |
|
"eval_wer": 0.3269888141740624, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 3.3875338753387534, |
|
"grad_norm": 0.5330023765563965, |
|
"learning_rate": 0.00012726101694915254, |
|
"loss": 0.2963, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.3875338753387534, |
|
"eval_loss": 0.42494186758995056, |
|
"eval_runtime": 163.0698, |
|
"eval_samples_per_second": 34.685, |
|
"eval_steps_per_second": 4.336, |
|
"eval_wer": 0.3322527322623614, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.4068912117692607, |
|
"eval_loss": 0.4404699206352234, |
|
"eval_runtime": 164.4408, |
|
"eval_samples_per_second": 34.395, |
|
"eval_steps_per_second": 4.299, |
|
"eval_wer": 0.3338736338688193, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 3.4262485481997675, |
|
"eval_loss": 0.43636277318000793, |
|
"eval_runtime": 163.9679, |
|
"eval_samples_per_second": 34.495, |
|
"eval_steps_per_second": 4.312, |
|
"eval_wer": 0.3285615701882493, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 3.445605884630275, |
|
"eval_loss": 0.4350505769252777, |
|
"eval_runtime": 163.6477, |
|
"eval_samples_per_second": 34.562, |
|
"eval_steps_per_second": 4.32, |
|
"eval_wer": 0.3309207042095296, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 3.464963221060782, |
|
"eval_loss": 0.42997920513153076, |
|
"eval_runtime": 163.7592, |
|
"eval_samples_per_second": 34.539, |
|
"eval_steps_per_second": 4.317, |
|
"eval_wer": 0.322880390300268, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 3.484320557491289, |
|
"grad_norm": 0.31616127490997314, |
|
"learning_rate": 0.00012217627118644067, |
|
"loss": 0.3062, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.484320557491289, |
|
"eval_loss": 0.42307358980178833, |
|
"eval_runtime": 164.6469, |
|
"eval_samples_per_second": 34.352, |
|
"eval_steps_per_second": 4.294, |
|
"eval_wer": 0.32523952432154835, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.5036778939217963, |
|
"eval_loss": 0.4325993061065674, |
|
"eval_runtime": 164.1143, |
|
"eval_samples_per_second": 34.464, |
|
"eval_steps_per_second": 4.308, |
|
"eval_wer": 0.32326555503843624, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 3.5230352303523036, |
|
"eval_loss": 0.43141353130340576, |
|
"eval_runtime": 163.522, |
|
"eval_samples_per_second": 34.589, |
|
"eval_steps_per_second": 4.324, |
|
"eval_wer": 0.3282405995731091, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 3.5423925667828104, |
|
"eval_loss": 0.4343957006931305, |
|
"eval_runtime": 163.8089, |
|
"eval_samples_per_second": 34.528, |
|
"eval_steps_per_second": 4.316, |
|
"eval_wer": 0.32894673492641746, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 3.5617499032133177, |
|
"eval_loss": 0.42664915323257446, |
|
"eval_runtime": 166.5849, |
|
"eval_samples_per_second": 33.953, |
|
"eval_steps_per_second": 4.244, |
|
"eval_wer": 0.32207796376241754, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 3.581107239643825, |
|
"grad_norm": 0.40817028284072876, |
|
"learning_rate": 0.00011710169491525424, |
|
"loss": 0.2968, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.581107239643825, |
|
"eval_loss": 0.4305819571018219, |
|
"eval_runtime": 164.3676, |
|
"eval_samples_per_second": 34.411, |
|
"eval_steps_per_second": 4.301, |
|
"eval_wer": 0.32161255637046426, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.600464576074332, |
|
"eval_loss": 0.4318545460700989, |
|
"eval_runtime": 166.4377, |
|
"eval_samples_per_second": 33.983, |
|
"eval_steps_per_second": 4.248, |
|
"eval_wer": 0.3238914477379596, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 3.619821912504839, |
|
"eval_loss": 0.4271145164966583, |
|
"eval_runtime": 166.5812, |
|
"eval_samples_per_second": 33.953, |
|
"eval_steps_per_second": 4.244, |
|
"eval_wer": 0.3232013609154082, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 3.6391792489353465, |
|
"eval_loss": 0.41837719082832336, |
|
"eval_runtime": 164.5624, |
|
"eval_samples_per_second": 34.37, |
|
"eval_steps_per_second": 4.296, |
|
"eval_wer": 0.32641106706681006, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"eval_loss": 0.4237981140613556, |
|
"eval_runtime": 165.5252, |
|
"eval_samples_per_second": 34.17, |
|
"eval_steps_per_second": 4.271, |
|
"eval_wer": 0.31997560623324933, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 3.6778939217963607, |
|
"grad_norm": 0.9548519253730774, |
|
"learning_rate": 0.00011201694915254236, |
|
"loss": 0.3191, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.6778939217963607, |
|
"eval_loss": 0.41389960050582886, |
|
"eval_runtime": 163.7093, |
|
"eval_samples_per_second": 34.549, |
|
"eval_steps_per_second": 4.319, |
|
"eval_wer": 0.3225915167466418, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.697251258226868, |
|
"eval_loss": 0.42384064197540283, |
|
"eval_runtime": 164.2841, |
|
"eval_samples_per_second": 34.428, |
|
"eval_steps_per_second": 4.304, |
|
"eval_wer": 0.3159955706055111, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 3.7166085946573753, |
|
"eval_loss": 0.4176156520843506, |
|
"eval_runtime": 163.8287, |
|
"eval_samples_per_second": 34.524, |
|
"eval_steps_per_second": 4.315, |
|
"eval_wer": 0.319301567941455, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 3.7359659310878826, |
|
"eval_loss": 0.4196203351020813, |
|
"eval_runtime": 165.0023, |
|
"eval_samples_per_second": 34.278, |
|
"eval_steps_per_second": 4.285, |
|
"eval_wer": 0.3202644797868755, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 3.7553232675183894, |
|
"eval_loss": 0.409524530172348, |
|
"eval_runtime": 164.4937, |
|
"eval_samples_per_second": 34.384, |
|
"eval_steps_per_second": 4.298, |
|
"eval_wer": 0.3181621222577073, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 3.7746806039488967, |
|
"grad_norm": 0.43373510241508484, |
|
"learning_rate": 0.00010693220338983049, |
|
"loss": 0.2921, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.7746806039488967, |
|
"eval_loss": 0.41209807991981506, |
|
"eval_runtime": 164.8329, |
|
"eval_samples_per_second": 34.314, |
|
"eval_steps_per_second": 4.289, |
|
"eval_wer": 0.31665356036654846, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.794037940379404, |
|
"eval_loss": 0.4112759530544281, |
|
"eval_runtime": 164.1863, |
|
"eval_samples_per_second": 34.449, |
|
"eval_steps_per_second": 4.306, |
|
"eval_wer": 0.31455120283738025, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 3.813395276809911, |
|
"eval_loss": 0.4094259738922119, |
|
"eval_runtime": 164.4841, |
|
"eval_samples_per_second": 34.386, |
|
"eval_steps_per_second": 4.298, |
|
"eval_wer": 0.3160758132592961, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 3.832752613240418, |
|
"eval_loss": 0.40931811928749084, |
|
"eval_runtime": 164.2911, |
|
"eval_samples_per_second": 34.427, |
|
"eval_steps_per_second": 4.303, |
|
"eval_wer": 0.31386111601482886, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.8521099496709255, |
|
"eval_loss": 0.41117748618125916, |
|
"eval_runtime": 165.1732, |
|
"eval_samples_per_second": 34.243, |
|
"eval_steps_per_second": 4.28, |
|
"eval_wer": 0.31731155012758583, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 3.8714672861014323, |
|
"grad_norm": 0.5022397637367249, |
|
"learning_rate": 0.00010184745762711863, |
|
"loss": 0.3007, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.8714672861014323, |
|
"eval_loss": 0.4092504680156708, |
|
"eval_runtime": 163.9434, |
|
"eval_samples_per_second": 34.5, |
|
"eval_steps_per_second": 4.312, |
|
"eval_wer": 0.31593137648248304, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.8908246225319396, |
|
"eval_loss": 0.4147598147392273, |
|
"eval_runtime": 164.6303, |
|
"eval_samples_per_second": 34.356, |
|
"eval_steps_per_second": 4.294, |
|
"eval_wer": 0.31565855145961386, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 3.910181958962447, |
|
"eval_loss": 0.41137251257896423, |
|
"eval_runtime": 164.6634, |
|
"eval_samples_per_second": 34.349, |
|
"eval_steps_per_second": 4.294, |
|
"eval_wer": 0.3150326587600905, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 3.9295392953929538, |
|
"eval_loss": 0.4155375361442566, |
|
"eval_runtime": 164.3406, |
|
"eval_samples_per_second": 34.416, |
|
"eval_steps_per_second": 4.302, |
|
"eval_wer": 0.31456725136813724, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 3.948896631823461, |
|
"eval_loss": 0.4075925648212433, |
|
"eval_runtime": 164.3692, |
|
"eval_samples_per_second": 34.41, |
|
"eval_steps_per_second": 4.301, |
|
"eval_wer": 0.3135722424612027, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 3.9682539682539684, |
|
"grad_norm": 0.6109060049057007, |
|
"learning_rate": 9.67728813559322e-05, |
|
"loss": 0.296, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.9682539682539684, |
|
"eval_loss": 0.4066578149795532, |
|
"eval_runtime": 164.7852, |
|
"eval_samples_per_second": 34.323, |
|
"eval_steps_per_second": 4.29, |
|
"eval_wer": 0.3125611850235111, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.987611304684475, |
|
"eval_loss": 0.40839362144470215, |
|
"eval_runtime": 164.8883, |
|
"eval_samples_per_second": 34.302, |
|
"eval_steps_per_second": 4.288, |
|
"eval_wer": 0.3150487072908475, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 4.006968641114983, |
|
"eval_loss": 0.4150494635105133, |
|
"eval_runtime": 164.1525, |
|
"eval_samples_per_second": 34.456, |
|
"eval_steps_per_second": 4.307, |
|
"eval_wer": 0.312432796777455, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 4.02632597754549, |
|
"eval_loss": 0.41322341561317444, |
|
"eval_runtime": 164.6726, |
|
"eval_samples_per_second": 34.347, |
|
"eval_steps_per_second": 4.293, |
|
"eval_wer": 0.3132512718460625, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 4.045683313975997, |
|
"eval_loss": 0.4182606339454651, |
|
"eval_runtime": 164.9667, |
|
"eval_samples_per_second": 34.286, |
|
"eval_steps_per_second": 4.286, |
|
"eval_wer": 0.31464749402192227, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 4.065040650406504, |
|
"grad_norm": 0.9771650433540344, |
|
"learning_rate": 9.168813559322032e-05, |
|
"loss": 0.2611, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.065040650406504, |
|
"eval_loss": 0.41840454936027527, |
|
"eval_runtime": 164.8893, |
|
"eval_samples_per_second": 34.302, |
|
"eval_steps_per_second": 4.288, |
|
"eval_wer": 0.30952801271043634, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.084397986837011, |
|
"eval_loss": 0.4167742431163788, |
|
"eval_runtime": 165.1089, |
|
"eval_samples_per_second": 34.256, |
|
"eval_steps_per_second": 4.282, |
|
"eval_wer": 0.30845276114971676, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 4.103755323267518, |
|
"eval_loss": 0.42244288325309753, |
|
"eval_runtime": 164.9167, |
|
"eval_samples_per_second": 34.296, |
|
"eval_steps_per_second": 4.287, |
|
"eval_wer": 0.31015390540995974, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 4.123112659698026, |
|
"eval_loss": 0.4187394678592682, |
|
"eval_runtime": 164.7166, |
|
"eval_samples_per_second": 34.338, |
|
"eval_steps_per_second": 4.292, |
|
"eval_wer": 0.30456901670652053, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 4.142469996128533, |
|
"eval_loss": 0.41454723477363586, |
|
"eval_runtime": 164.38, |
|
"eval_samples_per_second": 34.408, |
|
"eval_steps_per_second": 4.301, |
|
"eval_wer": 0.3110044775400812, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 4.16182733255904, |
|
"grad_norm": 0.8976078629493713, |
|
"learning_rate": 8.660338983050847e-05, |
|
"loss": 0.2431, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.16182733255904, |
|
"eval_loss": 0.42720434069633484, |
|
"eval_runtime": 165.0533, |
|
"eval_samples_per_second": 34.268, |
|
"eval_steps_per_second": 4.283, |
|
"eval_wer": 0.31071560398645504, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.181184668989547, |
|
"eval_loss": 0.41736435890197754, |
|
"eval_runtime": 164.6627, |
|
"eval_samples_per_second": 34.349, |
|
"eval_steps_per_second": 4.294, |
|
"eval_wer": 0.3069923448508289, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 4.200542005420054, |
|
"eval_loss": 0.41904589533805847, |
|
"eval_runtime": 165.6169, |
|
"eval_samples_per_second": 34.151, |
|
"eval_steps_per_second": 4.269, |
|
"eval_wer": 0.3085811493957728, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 4.219899341850561, |
|
"eval_loss": 0.41643446683883667, |
|
"eval_runtime": 165.1417, |
|
"eval_samples_per_second": 34.249, |
|
"eval_steps_per_second": 4.281, |
|
"eval_wer": 0.3050825696907448, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 4.239256678281069, |
|
"eval_loss": 0.41955476999282837, |
|
"eval_runtime": 165.2591, |
|
"eval_samples_per_second": 34.225, |
|
"eval_steps_per_second": 4.278, |
|
"eval_wer": 0.30777872285792235, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 4.258614014711576, |
|
"grad_norm": 1.5854851007461548, |
|
"learning_rate": 8.15186440677966e-05, |
|
"loss": 0.2453, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.258614014711576, |
|
"eval_loss": 0.42485129833221436, |
|
"eval_runtime": 164.7312, |
|
"eval_samples_per_second": 34.335, |
|
"eval_steps_per_second": 4.292, |
|
"eval_wer": 0.30915889650302514, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.2779713511420825, |
|
"eval_loss": 0.4246067404747009, |
|
"eval_runtime": 164.9099, |
|
"eval_samples_per_second": 34.298, |
|
"eval_steps_per_second": 4.287, |
|
"eval_wer": 0.30736146105824014, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 4.29732868757259, |
|
"eval_loss": 0.4166228771209717, |
|
"eval_runtime": 164.9564, |
|
"eval_samples_per_second": 34.288, |
|
"eval_steps_per_second": 4.286, |
|
"eval_wer": 0.30740960665051115, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 4.316686024003097, |
|
"eval_loss": 0.4192067086696625, |
|
"eval_runtime": 165.6591, |
|
"eval_samples_per_second": 34.142, |
|
"eval_steps_per_second": 4.268, |
|
"eval_wer": 0.3027555327309785, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 4.336043360433604, |
|
"eval_loss": 0.41863906383514404, |
|
"eval_runtime": 164.9558, |
|
"eval_samples_per_second": 34.288, |
|
"eval_steps_per_second": 4.286, |
|
"eval_wer": 0.3020975429699411, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 4.355400696864112, |
|
"grad_norm": 1.1900339126586914, |
|
"learning_rate": 7.645423728813559e-05, |
|
"loss": 0.2336, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.355400696864112, |
|
"eval_loss": 0.4268459677696228, |
|
"eval_runtime": 166.7137, |
|
"eval_samples_per_second": 33.926, |
|
"eval_steps_per_second": 4.241, |
|
"eval_wer": 0.3083885670266887, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.3747580332946185, |
|
"eval_loss": 0.4346672296524048, |
|
"eval_runtime": 170.3751, |
|
"eval_samples_per_second": 33.197, |
|
"eval_steps_per_second": 4.15, |
|
"eval_wer": 0.307104684566128, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 4.394115369725126, |
|
"eval_loss": 0.47525468468666077, |
|
"eval_runtime": 164.9807, |
|
"eval_samples_per_second": 34.283, |
|
"eval_steps_per_second": 4.285, |
|
"eval_wer": 0.3208582754248849, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 4.413472706155633, |
|
"eval_loss": 0.582381546497345, |
|
"eval_runtime": 165.1397, |
|
"eval_samples_per_second": 34.25, |
|
"eval_steps_per_second": 4.281, |
|
"eval_wer": 0.415416218645183, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 4.43283004258614, |
|
"eval_loss": 0.5073803067207336, |
|
"eval_runtime": 165.3352, |
|
"eval_samples_per_second": 34.209, |
|
"eval_steps_per_second": 4.276, |
|
"eval_wer": 0.3415448315706697, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 4.452187379016648, |
|
"grad_norm": 1.5807456970214844, |
|
"learning_rate": 7.136949152542373e-05, |
|
"loss": 0.3426, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.452187379016648, |
|
"eval_loss": 0.6242379546165466, |
|
"eval_runtime": 164.8642, |
|
"eval_samples_per_second": 34.307, |
|
"eval_steps_per_second": 4.288, |
|
"eval_wer": 0.41979746754184655, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.471544715447155, |
|
"eval_loss": 0.5862211585044861, |
|
"eval_runtime": 164.8283, |
|
"eval_samples_per_second": 34.314, |
|
"eval_steps_per_second": 4.289, |
|
"eval_wer": 0.4200702925647157, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 4.4909020518776614, |
|
"eval_loss": 0.6151086091995239, |
|
"eval_runtime": 165.0995, |
|
"eval_samples_per_second": 34.258, |
|
"eval_steps_per_second": 4.282, |
|
"eval_wer": 0.39638266116737014, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 4.510259388308169, |
|
"eval_loss": 0.5640283226966858, |
|
"eval_runtime": 164.849, |
|
"eval_samples_per_second": 34.31, |
|
"eval_steps_per_second": 4.289, |
|
"eval_wer": 0.3685705573654732, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 4.529616724738676, |
|
"eval_loss": 0.6589744091033936, |
|
"eval_runtime": 164.8194, |
|
"eval_samples_per_second": 34.316, |
|
"eval_steps_per_second": 4.29, |
|
"eval_wer": 0.4647494021922293, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 4.548974061169183, |
|
"grad_norm": 1.0218427181243896, |
|
"learning_rate": 6.628474576271186e-05, |
|
"loss": 0.4541, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.548974061169183, |
|
"eval_loss": 0.6010532975196838, |
|
"eval_runtime": 165.0253, |
|
"eval_samples_per_second": 34.274, |
|
"eval_steps_per_second": 4.284, |
|
"eval_wer": 0.3959974964292019, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.568331397599691, |
|
"eval_loss": 0.5802894830703735, |
|
"eval_runtime": 166.0838, |
|
"eval_samples_per_second": 34.055, |
|
"eval_steps_per_second": 4.257, |
|
"eval_wer": 0.39505063311453836, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 4.5876887340301975, |
|
"eval_loss": 0.5762883424758911, |
|
"eval_runtime": 165.1308, |
|
"eval_samples_per_second": 34.252, |
|
"eval_steps_per_second": 4.281, |
|
"eval_wer": 0.3910545489560431, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 4.607046070460704, |
|
"eval_loss": 0.5418487787246704, |
|
"eval_runtime": 165.1407, |
|
"eval_samples_per_second": 34.25, |
|
"eval_steps_per_second": 4.281, |
|
"eval_wer": 0.36550528799088444, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 4.626403406891212, |
|
"eval_loss": 0.5546759366989136, |
|
"eval_runtime": 165.8678, |
|
"eval_samples_per_second": 34.099, |
|
"eval_steps_per_second": 4.262, |
|
"eval_wer": 0.38877565758854776, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 4.645760743321719, |
|
"grad_norm": 32.50680923461914, |
|
"learning_rate": 6.12e-05, |
|
"loss": 0.4145, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.645760743321719, |
|
"eval_loss": 0.5300523638725281, |
|
"eval_runtime": 164.9724, |
|
"eval_samples_per_second": 34.285, |
|
"eval_steps_per_second": 4.286, |
|
"eval_wer": 0.3608030684790807, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.665118079752226, |
|
"eval_loss": 0.573882520198822, |
|
"eval_runtime": 165.2226, |
|
"eval_samples_per_second": 34.233, |
|
"eval_steps_per_second": 4.279, |
|
"eval_wer": 0.39927139670363176, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 4.6844754161827336, |
|
"eval_loss": 0.5775899887084961, |
|
"eval_runtime": 165.3691, |
|
"eval_samples_per_second": 34.202, |
|
"eval_steps_per_second": 4.275, |
|
"eval_wer": 0.39816404808139816, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 4.70383275261324, |
|
"eval_loss": 0.5412492156028748, |
|
"eval_runtime": 164.9818, |
|
"eval_samples_per_second": 34.283, |
|
"eval_steps_per_second": 4.285, |
|
"eval_wer": 0.37078525460994044, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 4.723190089043747, |
|
"eval_loss": 0.5329325199127197, |
|
"eval_runtime": 165.8065, |
|
"eval_samples_per_second": 34.112, |
|
"eval_steps_per_second": 4.264, |
|
"eval_wer": 0.37044823546404326, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 4.742547425474255, |
|
"grad_norm": 1.8765805959701538, |
|
"learning_rate": 5.611525423728813e-05, |
|
"loss": 0.3834, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.742547425474255, |
|
"eval_loss": 0.5299070477485657, |
|
"eval_runtime": 165.3917, |
|
"eval_samples_per_second": 34.198, |
|
"eval_steps_per_second": 4.275, |
|
"eval_wer": 0.3732246312850059, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.761904761904762, |
|
"eval_loss": 0.5424681901931763, |
|
"eval_runtime": 165.4071, |
|
"eval_samples_per_second": 34.194, |
|
"eval_steps_per_second": 4.274, |
|
"eval_wer": 0.3928519844008281, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 4.781262098335269, |
|
"eval_loss": 0.5111268758773804, |
|
"eval_runtime": 165.4914, |
|
"eval_samples_per_second": 34.177, |
|
"eval_steps_per_second": 4.272, |
|
"eval_wer": 0.3585241771115854, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 4.8006194347657765, |
|
"eval_loss": 0.5076457858085632, |
|
"eval_runtime": 165.6732, |
|
"eval_samples_per_second": 34.14, |
|
"eval_steps_per_second": 4.267, |
|
"eval_wer": 0.35033942642551075, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 4.819976771196283, |
|
"eval_loss": 0.5261921882629395, |
|
"eval_runtime": 165.2946, |
|
"eval_samples_per_second": 34.218, |
|
"eval_steps_per_second": 4.277, |
|
"eval_wer": 0.3681372470350339, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 4.83933410762679, |
|
"grad_norm": 5.934371471405029, |
|
"learning_rate": 5.1030508474576264e-05, |
|
"loss": 0.3719, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.83933410762679, |
|
"eval_loss": 0.547415018081665, |
|
"eval_runtime": 165.6997, |
|
"eval_samples_per_second": 34.134, |
|
"eval_steps_per_second": 4.267, |
|
"eval_wer": 0.3833031086004076, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.858691444057298, |
|
"eval_loss": 0.5746738910675049, |
|
"eval_runtime": 165.8407, |
|
"eval_samples_per_second": 34.105, |
|
"eval_steps_per_second": 4.263, |
|
"eval_wer": 0.40389337356165045, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"eval_loss": 0.5188133120536804, |
|
"eval_runtime": 165.5746, |
|
"eval_samples_per_second": 34.16, |
|
"eval_steps_per_second": 4.27, |
|
"eval_wer": 0.3503073293639967, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 4.897406116918312, |
|
"eval_loss": 0.5522667169570923, |
|
"eval_runtime": 165.1011, |
|
"eval_samples_per_second": 34.258, |
|
"eval_steps_per_second": 4.282, |
|
"eval_wer": 0.3865609603440805, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 4.916763453348819, |
|
"eval_loss": 0.5302358865737915, |
|
"eval_runtime": 165.871, |
|
"eval_samples_per_second": 34.099, |
|
"eval_steps_per_second": 4.262, |
|
"eval_wer": 0.36446213349167883, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 4.936120789779326, |
|
"grad_norm": 1.1752023696899414, |
|
"learning_rate": 4.595593220338983e-05, |
|
"loss": 0.3798, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.936120789779326, |
|
"eval_loss": 0.5099266767501831, |
|
"eval_runtime": 165.9652, |
|
"eval_samples_per_second": 34.079, |
|
"eval_steps_per_second": 4.26, |
|
"eval_wer": 0.3499542616873425, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.955478126209833, |
|
"eval_loss": 0.4823363125324249, |
|
"eval_runtime": 164.8602, |
|
"eval_samples_per_second": 34.308, |
|
"eval_steps_per_second": 4.288, |
|
"eval_wer": 0.33761294153520244, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 4.974835462640341, |
|
"eval_loss": 0.4805842936038971, |
|
"eval_runtime": 166.8028, |
|
"eval_samples_per_second": 33.908, |
|
"eval_steps_per_second": 4.239, |
|
"eval_wer": 0.3357352634366324, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 4.994192799070848, |
|
"eval_loss": 0.4942820370197296, |
|
"eval_runtime": 165.4552, |
|
"eval_samples_per_second": 34.184, |
|
"eval_steps_per_second": 4.273, |
|
"eval_wer": 0.35093322206352007, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 5.013550135501355, |
|
"eval_loss": 0.49528568983078003, |
|
"eval_runtime": 165.947, |
|
"eval_samples_per_second": 34.083, |
|
"eval_steps_per_second": 4.26, |
|
"eval_wer": 0.35245783248543594, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 5.032907471931862, |
|
"grad_norm": 27.542322158813477, |
|
"learning_rate": 4.087118644067796e-05, |
|
"loss": 0.3158, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.032907471931862, |
|
"eval_loss": 0.485315203666687, |
|
"eval_runtime": 165.2844, |
|
"eval_samples_per_second": 34.22, |
|
"eval_steps_per_second": 4.277, |
|
"eval_wer": 0.34703342908956686, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.052264808362369, |
|
"eval_loss": 0.5204781293869019, |
|
"eval_runtime": 165.773, |
|
"eval_samples_per_second": 34.119, |
|
"eval_steps_per_second": 4.265, |
|
"eval_wer": 0.36183017444752935, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 5.071622144792877, |
|
"eval_loss": 0.5013459920883179, |
|
"eval_runtime": 165.057, |
|
"eval_samples_per_second": 34.267, |
|
"eval_steps_per_second": 4.283, |
|
"eval_wer": 0.3510455617788191, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 5.090979481223384, |
|
"eval_loss": 0.4863474667072296, |
|
"eval_runtime": 165.6964, |
|
"eval_samples_per_second": 34.135, |
|
"eval_steps_per_second": 4.267, |
|
"eval_wer": 0.3396511049413426, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 5.110336817653891, |
|
"eval_loss": 0.47152572870254517, |
|
"eval_runtime": 166.0563, |
|
"eval_samples_per_second": 34.061, |
|
"eval_steps_per_second": 4.258, |
|
"eval_wer": 0.32851342459597827, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 5.129694154084398, |
|
"grad_norm": 0.8464019894599915, |
|
"learning_rate": 3.5786440677966095e-05, |
|
"loss": 0.2993, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.129694154084398, |
|
"eval_loss": 0.4816218912601471, |
|
"eval_runtime": 165.4176, |
|
"eval_samples_per_second": 34.192, |
|
"eval_steps_per_second": 4.274, |
|
"eval_wer": 0.33273418818507167, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.149051490514905, |
|
"eval_loss": 0.48058804869651794, |
|
"eval_runtime": 166.4075, |
|
"eval_samples_per_second": 33.989, |
|
"eval_steps_per_second": 4.249, |
|
"eval_wer": 0.33811044598866974, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 5.168408826945412, |
|
"eval_loss": 0.4854019284248352, |
|
"eval_runtime": 165.1934, |
|
"eval_samples_per_second": 34.239, |
|
"eval_steps_per_second": 4.28, |
|
"eval_wer": 0.33416250742244547, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 5.18776616337592, |
|
"eval_loss": 0.49545472860336304, |
|
"eval_runtime": 165.6735, |
|
"eval_samples_per_second": 34.139, |
|
"eval_steps_per_second": 4.267, |
|
"eval_wer": 0.3433422670154547, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 5.207123499806427, |
|
"eval_loss": 0.4862872064113617, |
|
"eval_runtime": 165.5277, |
|
"eval_samples_per_second": 34.17, |
|
"eval_steps_per_second": 4.271, |
|
"eval_wer": 0.34337436407696875, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 5.2264808362369335, |
|
"grad_norm": 10.611580848693848, |
|
"learning_rate": 3.0701694915254236e-05, |
|
"loss": 0.2902, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.2264808362369335, |
|
"eval_loss": 0.48670876026153564, |
|
"eval_runtime": 165.4986, |
|
"eval_samples_per_second": 34.176, |
|
"eval_steps_per_second": 4.272, |
|
"eval_wer": 0.3448508289066136, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.245838172667441, |
|
"eval_loss": 0.4787338674068451, |
|
"eval_runtime": 165.461, |
|
"eval_samples_per_second": 34.183, |
|
"eval_steps_per_second": 4.273, |
|
"eval_wer": 0.33778947537352955, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 5.265195509097948, |
|
"eval_loss": 0.4861724376678467, |
|
"eval_runtime": 165.3459, |
|
"eval_samples_per_second": 34.207, |
|
"eval_steps_per_second": 4.276, |
|
"eval_wer": 0.33793391215034263, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 5.284552845528455, |
|
"eval_loss": 0.4954308271408081, |
|
"eval_runtime": 165.6637, |
|
"eval_samples_per_second": 34.141, |
|
"eval_steps_per_second": 4.268, |
|
"eval_wer": 0.3467927011282117, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 5.303910181958963, |
|
"eval_loss": 0.572640061378479, |
|
"eval_runtime": 165.6804, |
|
"eval_samples_per_second": 34.138, |
|
"eval_steps_per_second": 4.267, |
|
"eval_wer": 0.41416443324613633, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 5.3232675183894695, |
|
"grad_norm": 1.2211335897445679, |
|
"learning_rate": 2.5627118644067793e-05, |
|
"loss": 0.305, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.3232675183894695, |
|
"eval_loss": 0.5179979205131531, |
|
"eval_runtime": 165.6208, |
|
"eval_samples_per_second": 34.15, |
|
"eval_steps_per_second": 4.269, |
|
"eval_wer": 0.35735263436632375, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.342624854819976, |
|
"eval_loss": 0.4996646046638489, |
|
"eval_runtime": 164.9613, |
|
"eval_samples_per_second": 34.287, |
|
"eval_steps_per_second": 4.286, |
|
"eval_wer": 0.3452038965832678, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 5.361982191250484, |
|
"eval_loss": 0.4949517846107483, |
|
"eval_runtime": 165.643, |
|
"eval_samples_per_second": 34.146, |
|
"eval_steps_per_second": 4.268, |
|
"eval_wer": 0.34130410360931457, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 5.381339527680991, |
|
"eval_loss": 0.5071349143981934, |
|
"eval_runtime": 166.3001, |
|
"eval_samples_per_second": 34.011, |
|
"eval_steps_per_second": 4.251, |
|
"eval_wer": 0.3491999807417631, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 5.400696864111498, |
|
"eval_loss": 0.5095939040184021, |
|
"eval_runtime": 165.4785, |
|
"eval_samples_per_second": 34.18, |
|
"eval_steps_per_second": 4.272, |
|
"eval_wer": 0.3544799473608191, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 5.420054200542006, |
|
"grad_norm": 15.024033546447754, |
|
"learning_rate": 2.054237288135593e-05, |
|
"loss": 0.3163, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.420054200542006, |
|
"eval_loss": 0.5129156112670898, |
|
"eval_runtime": 166.1505, |
|
"eval_samples_per_second": 34.041, |
|
"eval_steps_per_second": 4.255, |
|
"eval_wer": 0.3565983534207443, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.4394115369725125, |
|
"eval_loss": 0.5067318677902222, |
|
"eval_runtime": 165.9899, |
|
"eval_samples_per_second": 34.074, |
|
"eval_steps_per_second": 4.259, |
|
"eval_wer": 0.3506122514483799, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 5.45876887340302, |
|
"eval_loss": 0.5053198337554932, |
|
"eval_runtime": 165.3351, |
|
"eval_samples_per_second": 34.209, |
|
"eval_steps_per_second": 4.276, |
|
"eval_wer": 0.35000240727961357, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 5.478126209833527, |
|
"eval_loss": 0.5077947974205017, |
|
"eval_runtime": 165.3012, |
|
"eval_samples_per_second": 34.216, |
|
"eval_steps_per_second": 4.277, |
|
"eval_wer": 0.3518640368474266, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 5.497483546264034, |
|
"eval_loss": 0.48453789949417114, |
|
"eval_runtime": 165.2767, |
|
"eval_samples_per_second": 34.221, |
|
"eval_steps_per_second": 4.278, |
|
"eval_wer": 0.3375166503506604, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 5.516840882694542, |
|
"grad_norm": 0.43120303750038147, |
|
"learning_rate": 1.5457627118644067e-05, |
|
"loss": 0.3136, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.516840882694542, |
|
"eval_loss": 0.4930485486984253, |
|
"eval_runtime": 165.9777, |
|
"eval_samples_per_second": 34.077, |
|
"eval_steps_per_second": 4.26, |
|
"eval_wer": 0.3439842082457351, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.5361982191250485, |
|
"eval_loss": 0.5025920271873474, |
|
"eval_runtime": 165.742, |
|
"eval_samples_per_second": 34.125, |
|
"eval_steps_per_second": 4.266, |
|
"eval_wer": 0.35122209561714623, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"eval_loss": 0.5056036710739136, |
|
"eval_runtime": 165.717, |
|
"eval_samples_per_second": 34.13, |
|
"eval_steps_per_second": 4.266, |
|
"eval_wer": 0.3518800853781836, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 5.574912891986063, |
|
"eval_loss": 0.5090658068656921, |
|
"eval_runtime": 165.5604, |
|
"eval_samples_per_second": 34.163, |
|
"eval_steps_per_second": 4.27, |
|
"eval_wer": 0.3546404326683892, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 5.59427022841657, |
|
"eval_loss": 0.5027741193771362, |
|
"eval_runtime": 170.2845, |
|
"eval_samples_per_second": 33.215, |
|
"eval_steps_per_second": 4.152, |
|
"eval_wer": 0.34952095135690325, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 5.613627564847077, |
|
"grad_norm": 1.859834909439087, |
|
"learning_rate": 1.0372881355932203e-05, |
|
"loss": 0.3092, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.613627564847077, |
|
"eval_loss": 0.505651593208313, |
|
"eval_runtime": 164.9869, |
|
"eval_samples_per_second": 34.282, |
|
"eval_steps_per_second": 4.285, |
|
"eval_wer": 0.3509974161865481, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.6329849012775846, |
|
"eval_loss": 0.5085631608963013, |
|
"eval_runtime": 165.6325, |
|
"eval_samples_per_second": 34.148, |
|
"eval_steps_per_second": 4.268, |
|
"eval_wer": 0.3532923560848004, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 5.652342237708091, |
|
"eval_loss": 0.5055486559867859, |
|
"eval_runtime": 165.6348, |
|
"eval_samples_per_second": 34.147, |
|
"eval_steps_per_second": 4.268, |
|
"eval_wer": 0.35144677504774435, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 5.671699574138598, |
|
"eval_loss": 0.5133376717567444, |
|
"eval_runtime": 166.8503, |
|
"eval_samples_per_second": 33.899, |
|
"eval_steps_per_second": 4.237, |
|
"eval_wer": 0.35765755645070696, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 5.691056910569106, |
|
"eval_loss": 0.5129527449607849, |
|
"eval_runtime": 165.063, |
|
"eval_samples_per_second": 34.266, |
|
"eval_steps_per_second": 4.283, |
|
"eval_wer": 0.35703166375118356, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 5.710414246999613, |
|
"grad_norm": 1.5260862112045288, |
|
"learning_rate": 5.288135593220339e-06, |
|
"loss": 0.3152, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.710414246999613, |
|
"eval_loss": 0.5147610902786255, |
|
"eval_runtime": 165.1582, |
|
"eval_samples_per_second": 34.246, |
|
"eval_steps_per_second": 4.281, |
|
"eval_wer": 0.3581390123734172, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.72977158343012, |
|
"eval_loss": 0.5114809274673462, |
|
"eval_runtime": 165.7617, |
|
"eval_samples_per_second": 34.121, |
|
"eval_steps_per_second": 4.265, |
|
"eval_wer": 0.3554589077369967, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 5.7491289198606275, |
|
"eval_loss": 0.5053985714912415, |
|
"eval_runtime": 165.7121, |
|
"eval_samples_per_second": 34.131, |
|
"eval_steps_per_second": 4.266, |
|
"eval_wer": 0.35263436632376305, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 5.768486256291134, |
|
"eval_loss": 0.5080947279930115, |
|
"eval_runtime": 165.6502, |
|
"eval_samples_per_second": 34.144, |
|
"eval_steps_per_second": 4.268, |
|
"eval_wer": 0.3535651811076696, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 5.787843592721641, |
|
"eval_loss": 0.5076740384101868, |
|
"eval_runtime": 164.8589, |
|
"eval_samples_per_second": 34.308, |
|
"eval_steps_per_second": 4.289, |
|
"eval_wer": 0.35348493845388457, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 5.807200929152149, |
|
"grad_norm": 24.957311630249023, |
|
"learning_rate": 2.0338983050847458e-07, |
|
"loss": 0.3085, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.807200929152149, |
|
"eval_loss": 0.5066753029823303, |
|
"eval_runtime": 165.8811, |
|
"eval_samples_per_second": 34.097, |
|
"eval_steps_per_second": 4.262, |
|
"eval_wer": 0.35224920158559486, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.807200929152149, |
|
"step": 30000, |
|
"total_flos": 3.3745707679449666e+19, |
|
"train_loss": 0.49102539647420246, |
|
"train_runtime": 61359.0363, |
|
"train_samples_per_second": 3.911, |
|
"train_steps_per_second": 0.489 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 30000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 400, |
|
"total_flos": 3.3745707679449666e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|