|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9357336430507162, |
|
"eval_steps": 100, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019357336430507164, |
|
"eval_loss": 3.5565404891967773, |
|
"eval_runtime": 151.5266, |
|
"eval_samples_per_second": 37.327, |
|
"eval_steps_per_second": 4.666, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03871467286101433, |
|
"eval_loss": 3.0301756858825684, |
|
"eval_runtime": 150.582, |
|
"eval_samples_per_second": 37.561, |
|
"eval_steps_per_second": 4.695, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05807200929152149, |
|
"eval_loss": 2.9460911750793457, |
|
"eval_runtime": 148.9065, |
|
"eval_samples_per_second": 37.984, |
|
"eval_steps_per_second": 4.748, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07742934572202866, |
|
"eval_loss": 1.8142520189285278, |
|
"eval_runtime": 149.8655, |
|
"eval_samples_per_second": 37.741, |
|
"eval_steps_per_second": 4.718, |
|
"eval_wer": 0.940732775914365, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"grad_norm": 3.132490396499634, |
|
"learning_rate": 0.00029759999999999997, |
|
"loss": 3.9521, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"eval_loss": 1.4195518493652344, |
|
"eval_runtime": 150.5171, |
|
"eval_samples_per_second": 37.577, |
|
"eval_steps_per_second": 4.697, |
|
"eval_wer": 0.8693007655149171, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11614401858304298, |
|
"eval_loss": 1.16689133644104, |
|
"eval_runtime": 150.5387, |
|
"eval_samples_per_second": 37.572, |
|
"eval_steps_per_second": 4.696, |
|
"eval_wer": 0.8055239042865626, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13550135501355012, |
|
"eval_loss": 1.0756505727767944, |
|
"eval_runtime": 151.2385, |
|
"eval_samples_per_second": 37.398, |
|
"eval_steps_per_second": 4.675, |
|
"eval_wer": 0.7596251063215163, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1548586914440573, |
|
"eval_loss": 0.9944618344306946, |
|
"eval_runtime": 151.1646, |
|
"eval_samples_per_second": 37.416, |
|
"eval_steps_per_second": 4.677, |
|
"eval_wer": 0.7223925149652549, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"eval_loss": 0.9381263256072998, |
|
"eval_runtime": 151.6289, |
|
"eval_samples_per_second": 37.302, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.6870857472998347, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"grad_norm": 7.335289001464844, |
|
"learning_rate": 0.0002844, |
|
"loss": 1.0266, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"eval_loss": 0.8977694511413574, |
|
"eval_runtime": 156.0202, |
|
"eval_samples_per_second": 36.252, |
|
"eval_steps_per_second": 4.531, |
|
"eval_wer": 0.661472292211648, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2129307007355788, |
|
"eval_loss": 0.8770694136619568, |
|
"eval_runtime": 151.6589, |
|
"eval_samples_per_second": 37.294, |
|
"eval_steps_per_second": 4.662, |
|
"eval_wer": 0.6450385967164706, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"eval_loss": 0.851553201675415, |
|
"eval_runtime": 151.5945, |
|
"eval_samples_per_second": 37.31, |
|
"eval_steps_per_second": 4.664, |
|
"eval_wer": 0.640432668389209, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2516453735965931, |
|
"eval_loss": 0.8273979425430298, |
|
"eval_runtime": 151.4524, |
|
"eval_samples_per_second": 37.345, |
|
"eval_steps_per_second": 4.668, |
|
"eval_wer": 0.6138081558633307, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27100271002710025, |
|
"eval_loss": 0.7992698550224304, |
|
"eval_runtime": 152.8076, |
|
"eval_samples_per_second": 37.014, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.596973247099228, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"grad_norm": 4.0737223625183105, |
|
"learning_rate": 0.00026861052631578947, |
|
"loss": 0.8454, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"eval_loss": 0.7768516540527344, |
|
"eval_runtime": 152.3743, |
|
"eval_samples_per_second": 37.119, |
|
"eval_steps_per_second": 4.64, |
|
"eval_wer": 0.5887563993516394, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3097173828881146, |
|
"eval_loss": 0.7664207220077515, |
|
"eval_runtime": 154.3668, |
|
"eval_samples_per_second": 36.64, |
|
"eval_steps_per_second": 4.58, |
|
"eval_wer": 0.5997977885124617, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.32907471931862176, |
|
"eval_loss": 0.7400562763214111, |
|
"eval_runtime": 153.7228, |
|
"eval_samples_per_second": 36.793, |
|
"eval_steps_per_second": 4.599, |
|
"eval_wer": 0.5592110542279854, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"eval_loss": 0.746478796005249, |
|
"eval_runtime": 151.7535, |
|
"eval_samples_per_second": 37.271, |
|
"eval_steps_per_second": 4.659, |
|
"eval_wer": 0.5650206223620228, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3677893921796361, |
|
"eval_loss": 0.7252949476242065, |
|
"eval_runtime": 151.7548, |
|
"eval_samples_per_second": 37.271, |
|
"eval_steps_per_second": 4.659, |
|
"eval_wer": 0.5791272808974338, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"grad_norm": 2.4802448749542236, |
|
"learning_rate": 0.0002528210526315789, |
|
"loss": 0.7537, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"eval_loss": 0.7039346098899841, |
|
"eval_runtime": 152.7969, |
|
"eval_samples_per_second": 37.016, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.5343518800853782, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"eval_loss": 0.6932350397109985, |
|
"eval_runtime": 152.4406, |
|
"eval_samples_per_second": 37.103, |
|
"eval_steps_per_second": 4.638, |
|
"eval_wer": 0.5168429330294811, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4258614014711576, |
|
"eval_loss": 0.696869432926178, |
|
"eval_runtime": 153.0527, |
|
"eval_samples_per_second": 36.955, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.5364381890837894, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4452187379016647, |
|
"eval_loss": 0.6781283617019653, |
|
"eval_runtime": 152.1378, |
|
"eval_samples_per_second": 37.177, |
|
"eval_steps_per_second": 4.647, |
|
"eval_wer": 0.5173725345444624, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"eval_loss": 0.6760829091072083, |
|
"eval_runtime": 151.9712, |
|
"eval_samples_per_second": 37.218, |
|
"eval_steps_per_second": 4.652, |
|
"eval_wer": 0.5050312143923223, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"grad_norm": 3.791292667388916, |
|
"learning_rate": 0.0002370315789473684, |
|
"loss": 0.681, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"eval_loss": 0.6720712780952454, |
|
"eval_runtime": 152.2414, |
|
"eval_samples_per_second": 37.152, |
|
"eval_steps_per_second": 4.644, |
|
"eval_wer": 0.528718845789668, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5032907471931862, |
|
"eval_loss": 0.6598270535469055, |
|
"eval_runtime": 151.7192, |
|
"eval_samples_per_second": 37.279, |
|
"eval_steps_per_second": 4.66, |
|
"eval_wer": 0.5195069891351447, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"eval_loss": 0.6555168628692627, |
|
"eval_runtime": 152.5678, |
|
"eval_samples_per_second": 37.072, |
|
"eval_steps_per_second": 4.634, |
|
"eval_wer": 0.4975846961210701, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"eval_loss": 0.6535276770591736, |
|
"eval_runtime": 152.5246, |
|
"eval_samples_per_second": 37.083, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.49936608303509816, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5613627564847077, |
|
"eval_loss": 0.6258506178855896, |
|
"eval_runtime": 151.843, |
|
"eval_samples_per_second": 37.249, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.48192133010222915, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"grad_norm": 9.4619779586792, |
|
"learning_rate": 0.00022124210526315786, |
|
"loss": 0.6737, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"eval_loss": 0.629943311214447, |
|
"eval_runtime": 151.8389, |
|
"eval_samples_per_second": 37.25, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.48022018584198617, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6000774293457221, |
|
"eval_loss": 0.6378594636917114, |
|
"eval_runtime": 151.6255, |
|
"eval_samples_per_second": 37.302, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.4893197027812104, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6194347657762292, |
|
"eval_loss": 0.6225672364234924, |
|
"eval_runtime": 153.0144, |
|
"eval_samples_per_second": 36.964, |
|
"eval_steps_per_second": 4.62, |
|
"eval_wer": 0.4806053505801544, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6387921022067363, |
|
"eval_loss": 0.6088670492172241, |
|
"eval_runtime": 152.2222, |
|
"eval_samples_per_second": 37.156, |
|
"eval_steps_per_second": 4.645, |
|
"eval_wer": 0.4627112387860891, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6581494386372435, |
|
"eval_loss": 0.6028585433959961, |
|
"eval_runtime": 153.0615, |
|
"eval_samples_per_second": 36.952, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.47354399704707034, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"grad_norm": 3.4705822467803955, |
|
"learning_rate": 0.00020545263157894736, |
|
"loss": 0.6419, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6775067750677507, |
|
"eval_loss": 0.5871421694755554, |
|
"eval_runtime": 152.5739, |
|
"eval_samples_per_second": 37.071, |
|
"eval_steps_per_second": 4.634, |
|
"eval_wer": 0.4592126590810611, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_loss": 0.6001027226448059, |
|
"eval_runtime": 152.1697, |
|
"eval_samples_per_second": 37.169, |
|
"eval_steps_per_second": 4.646, |
|
"eval_wer": 0.4610742886488742, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.716221447928765, |
|
"eval_loss": 0.5848923921585083, |
|
"eval_runtime": 152.6563, |
|
"eval_samples_per_second": 37.051, |
|
"eval_steps_per_second": 4.631, |
|
"eval_wer": 0.4472565036670893, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7355787843592722, |
|
"eval_loss": 0.5923960208892822, |
|
"eval_runtime": 152.6559, |
|
"eval_samples_per_second": 37.051, |
|
"eval_steps_per_second": 4.631, |
|
"eval_wer": 0.46377044181605176, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7549361207897793, |
|
"eval_loss": 0.5767965316772461, |
|
"eval_runtime": 152.1652, |
|
"eval_samples_per_second": 37.17, |
|
"eval_steps_per_second": 4.646, |
|
"eval_wer": 0.4584904751969957, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"grad_norm": 3.628082275390625, |
|
"learning_rate": 0.00018966315789473683, |
|
"loss": 0.6183, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"eval_loss": 0.5672534704208374, |
|
"eval_runtime": 152.4329, |
|
"eval_samples_per_second": 37.105, |
|
"eval_steps_per_second": 4.638, |
|
"eval_wer": 0.44531463144549116, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"eval_loss": 0.5575382113456726, |
|
"eval_runtime": 152.2388, |
|
"eval_samples_per_second": 37.152, |
|
"eval_steps_per_second": 4.644, |
|
"eval_wer": 0.4451862431994351, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"eval_loss": 0.5631808042526245, |
|
"eval_runtime": 152.7545, |
|
"eval_samples_per_second": 37.027, |
|
"eval_steps_per_second": 4.628, |
|
"eval_wer": 0.4474972316284444, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.832365466511808, |
|
"eval_loss": 0.5498641729354858, |
|
"eval_runtime": 153.7788, |
|
"eval_samples_per_second": 36.78, |
|
"eval_steps_per_second": 4.598, |
|
"eval_wer": 0.44008281041870617, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8517228029423152, |
|
"eval_loss": 0.5662574172019958, |
|
"eval_runtime": 152.5034, |
|
"eval_samples_per_second": 37.088, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.43101539054099597, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 2.376349925994873, |
|
"learning_rate": 0.0001738736842105263, |
|
"loss": 0.5877, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"eval_loss": 0.5584732294082642, |
|
"eval_runtime": 152.1714, |
|
"eval_samples_per_second": 37.169, |
|
"eval_steps_per_second": 4.646, |
|
"eval_wer": 0.4317215258943044, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8904374758033294, |
|
"eval_loss": 0.5463821291923523, |
|
"eval_runtime": 152.4923, |
|
"eval_samples_per_second": 37.09, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.41997400138017366, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9097948122338366, |
|
"eval_loss": 0.5381494164466858, |
|
"eval_runtime": 153.2139, |
|
"eval_samples_per_second": 36.916, |
|
"eval_steps_per_second": 4.614, |
|
"eval_wer": 0.4192197204345942, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"eval_loss": 0.5453722476959229, |
|
"eval_runtime": 151.9737, |
|
"eval_samples_per_second": 37.217, |
|
"eval_steps_per_second": 4.652, |
|
"eval_wer": 0.4201986808107718, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.948509485094851, |
|
"eval_loss": 0.5237515568733215, |
|
"eval_runtime": 151.8558, |
|
"eval_samples_per_second": 37.246, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.41241514339362234, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"grad_norm": 2.5489518642425537, |
|
"learning_rate": 0.0001581157894736842, |
|
"loss": 0.5621, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9678668215253581, |
|
"eval_loss": 0.5303541421890259, |
|
"eval_runtime": 152.515, |
|
"eval_samples_per_second": 37.085, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.41353854054661293, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9872241579558653, |
|
"eval_loss": 0.5163344740867615, |
|
"eval_runtime": 156.7945, |
|
"eval_samples_per_second": 36.073, |
|
"eval_steps_per_second": 4.509, |
|
"eval_wer": 0.4061080708061177, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.0065814943863725, |
|
"eval_loss": 0.51596599817276, |
|
"eval_runtime": 153.2891, |
|
"eval_samples_per_second": 36.898, |
|
"eval_steps_per_second": 4.612, |
|
"eval_wer": 0.39927139670363176, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.0259388308168795, |
|
"eval_loss": 0.5088583827018738, |
|
"eval_runtime": 152.7112, |
|
"eval_samples_per_second": 37.037, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.3898509091492674, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.0452961672473868, |
|
"eval_loss": 0.5110610723495483, |
|
"eval_runtime": 152.5555, |
|
"eval_samples_per_second": 37.075, |
|
"eval_steps_per_second": 4.634, |
|
"eval_wer": 0.3985652613503234, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"grad_norm": 1.1362248659133911, |
|
"learning_rate": 0.0001423578947368421, |
|
"loss": 0.4882, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.064653503677894, |
|
"eval_loss": 0.5010027885437012, |
|
"eval_runtime": 152.1249, |
|
"eval_samples_per_second": 37.18, |
|
"eval_steps_per_second": 4.647, |
|
"eval_wer": 0.38574248527547306, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.084010840108401, |
|
"eval_loss": 0.49406561255455017, |
|
"eval_runtime": 151.5623, |
|
"eval_samples_per_second": 37.318, |
|
"eval_steps_per_second": 4.665, |
|
"eval_wer": 0.3858548249907721, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.1033681765389083, |
|
"eval_loss": 0.49403733015060425, |
|
"eval_runtime": 152.7631, |
|
"eval_samples_per_second": 37.025, |
|
"eval_steps_per_second": 4.628, |
|
"eval_wer": 0.3813451878480525, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.1227255129694154, |
|
"eval_loss": 0.4913772642612457, |
|
"eval_runtime": 152.1406, |
|
"eval_samples_per_second": 37.176, |
|
"eval_steps_per_second": 4.647, |
|
"eval_wer": 0.37815153022740766, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.1420828493999227, |
|
"eval_loss": 0.48747047781944275, |
|
"eval_runtime": 151.3195, |
|
"eval_samples_per_second": 37.378, |
|
"eval_steps_per_second": 4.672, |
|
"eval_wer": 0.3745406108070806, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"grad_norm": 1.0150744915008545, |
|
"learning_rate": 0.00012656842105263156, |
|
"loss": 0.4569, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"eval_loss": 0.4841971695423126, |
|
"eval_runtime": 151.8567, |
|
"eval_samples_per_second": 37.246, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.38071929514852915, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1807975222609368, |
|
"eval_loss": 0.48611822724342346, |
|
"eval_runtime": 150.971, |
|
"eval_samples_per_second": 37.464, |
|
"eval_steps_per_second": 4.683, |
|
"eval_wer": 0.37370608720771614, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.2001548586914441, |
|
"eval_loss": 0.48144644498825073, |
|
"eval_runtime": 151.4548, |
|
"eval_samples_per_second": 37.344, |
|
"eval_steps_per_second": 4.668, |
|
"eval_wer": 0.3760973182905105, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"eval_loss": 0.47813892364501953, |
|
"eval_runtime": 151.1935, |
|
"eval_samples_per_second": 37.409, |
|
"eval_steps_per_second": 4.676, |
|
"eval_wer": 0.37409125194588433, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.2388695315524583, |
|
"eval_loss": 0.4771001935005188, |
|
"eval_runtime": 151.1732, |
|
"eval_samples_per_second": 37.414, |
|
"eval_steps_per_second": 4.677, |
|
"eval_wer": 0.36815329556579096, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"grad_norm": 1.3292571306228638, |
|
"learning_rate": 0.00011077894736842105, |
|
"loss": 0.4416, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2582268679829656, |
|
"eval_loss": 0.47095027565956116, |
|
"eval_runtime": 151.5037, |
|
"eval_samples_per_second": 37.332, |
|
"eval_steps_per_second": 4.667, |
|
"eval_wer": 0.37338511659257595, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2775842044134726, |
|
"eval_loss": 0.47211408615112305, |
|
"eval_runtime": 150.9455, |
|
"eval_samples_per_second": 37.47, |
|
"eval_steps_per_second": 4.684, |
|
"eval_wer": 0.3659706953828377, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.29694154084398, |
|
"eval_loss": 0.4679400622844696, |
|
"eval_runtime": 151.4191, |
|
"eval_samples_per_second": 37.353, |
|
"eval_steps_per_second": 4.669, |
|
"eval_wer": 0.3638843863844265, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.316298877274487, |
|
"eval_loss": 0.46228036284446716, |
|
"eval_runtime": 151.3839, |
|
"eval_samples_per_second": 37.362, |
|
"eval_steps_per_second": 4.67, |
|
"eval_wer": 0.366532393959333, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.3356562137049943, |
|
"eval_loss": 0.46108925342559814, |
|
"eval_runtime": 151.8163, |
|
"eval_samples_per_second": 37.256, |
|
"eval_steps_per_second": 4.657, |
|
"eval_wer": 0.3601771757795574, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"grad_norm": 0.8062695860862732, |
|
"learning_rate": 9.498947368421052e-05, |
|
"loss": 0.4324, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3550135501355014, |
|
"eval_loss": 0.46888086199760437, |
|
"eval_runtime": 152.4379, |
|
"eval_samples_per_second": 37.104, |
|
"eval_steps_per_second": 4.638, |
|
"eval_wer": 0.3609314567251368, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.3743708865660085, |
|
"eval_loss": 0.4573034346103668, |
|
"eval_runtime": 151.3077, |
|
"eval_samples_per_second": 37.381, |
|
"eval_steps_per_second": 4.673, |
|
"eval_wer": 0.3602574184333424, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"eval_loss": 0.45749789476394653, |
|
"eval_runtime": 151.5824, |
|
"eval_samples_per_second": 37.313, |
|
"eval_steps_per_second": 4.664, |
|
"eval_wer": 0.3546083356068752, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.4130855594270229, |
|
"eval_loss": 0.4555954933166504, |
|
"eval_runtime": 151.6035, |
|
"eval_samples_per_second": 37.308, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.35836369180401534, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.43244289585753, |
|
"eval_loss": 0.4495578408241272, |
|
"eval_runtime": 152.5621, |
|
"eval_samples_per_second": 37.073, |
|
"eval_steps_per_second": 4.634, |
|
"eval_wer": 0.350724591163679, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"grad_norm": 0.7916799187660217, |
|
"learning_rate": 7.92e-05, |
|
"loss": 0.4255, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4518002322880372, |
|
"eval_loss": 0.44609567523002625, |
|
"eval_runtime": 151.8498, |
|
"eval_samples_per_second": 37.247, |
|
"eval_steps_per_second": 4.656, |
|
"eval_wer": 0.34671245847442667, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.4711575687185443, |
|
"eval_loss": 0.44341230392456055, |
|
"eval_runtime": 152.528, |
|
"eval_samples_per_second": 37.082, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.3462470510824734, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.4905149051490514, |
|
"eval_loss": 0.44362780451774597, |
|
"eval_runtime": 152.5253, |
|
"eval_samples_per_second": 37.082, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.3516393574168285, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.5098722415795587, |
|
"eval_loss": 0.4406072199344635, |
|
"eval_runtime": 152.4039, |
|
"eval_samples_per_second": 37.112, |
|
"eval_steps_per_second": 4.639, |
|
"eval_wer": 0.34579769222127715, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.5292295780100658, |
|
"eval_loss": 0.43874725699424744, |
|
"eval_runtime": 152.6604, |
|
"eval_samples_per_second": 37.05, |
|
"eval_steps_per_second": 4.631, |
|
"eval_wer": 0.3439360626534641, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"grad_norm": 0.7491864562034607, |
|
"learning_rate": 6.344210526315788e-05, |
|
"loss": 0.4094, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"eval_loss": 0.43253499269485474, |
|
"eval_runtime": 153.8006, |
|
"eval_samples_per_second": 36.775, |
|
"eval_steps_per_second": 4.597, |
|
"eval_wer": 0.3409831329941744, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.5679442508710801, |
|
"eval_loss": 0.4359830617904663, |
|
"eval_runtime": 153.3674, |
|
"eval_samples_per_second": 36.879, |
|
"eval_steps_per_second": 4.61, |
|
"eval_wer": 0.3419299963088379, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"eval_loss": 0.4285949170589447, |
|
"eval_runtime": 153.3711, |
|
"eval_samples_per_second": 36.878, |
|
"eval_steps_per_second": 4.61, |
|
"eval_wer": 0.3377252812505015, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.6066589237320945, |
|
"eval_loss": 0.43007034063339233, |
|
"eval_runtime": 152.2201, |
|
"eval_samples_per_second": 37.157, |
|
"eval_steps_per_second": 4.645, |
|
"eval_wer": 0.3335526632536791, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.42966797947883606, |
|
"eval_runtime": 152.0163, |
|
"eval_samples_per_second": 37.207, |
|
"eval_steps_per_second": 4.651, |
|
"eval_wer": 0.3322848293238754, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"grad_norm": 1.047472596168518, |
|
"learning_rate": 4.765263157894736e-05, |
|
"loss": 0.4018, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.645373596593109, |
|
"eval_loss": 0.4270441234111786, |
|
"eval_runtime": 152.8058, |
|
"eval_samples_per_second": 37.014, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.3338575853380623, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.664730933023616, |
|
"eval_loss": 0.4267289638519287, |
|
"eval_runtime": 152.5032, |
|
"eval_samples_per_second": 37.088, |
|
"eval_steps_per_second": 4.636, |
|
"eval_wer": 0.3319959557702492, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.684088269454123, |
|
"eval_loss": 0.4224300980567932, |
|
"eval_runtime": 152.5862, |
|
"eval_samples_per_second": 37.068, |
|
"eval_steps_per_second": 4.633, |
|
"eval_wer": 0.33275023671582865, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.7034456058846303, |
|
"eval_loss": 0.4207303822040558, |
|
"eval_runtime": 154.5205, |
|
"eval_samples_per_second": 36.604, |
|
"eval_steps_per_second": 4.575, |
|
"eval_wer": 0.32984545264881, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.7228029423151374, |
|
"eval_loss": 0.4197385013103485, |
|
"eval_runtime": 152.0624, |
|
"eval_samples_per_second": 37.195, |
|
"eval_steps_per_second": 4.649, |
|
"eval_wer": 0.32978125852578194, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"grad_norm": 1.4507739543914795, |
|
"learning_rate": 3.189473684210526e-05, |
|
"loss": 0.3899, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7421602787456445, |
|
"eval_loss": 0.4183507561683655, |
|
"eval_runtime": 157.4278, |
|
"eval_samples_per_second": 35.928, |
|
"eval_steps_per_second": 4.491, |
|
"eval_wer": 0.3258493684903147, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7615176151761518, |
|
"eval_loss": 0.4164830148220062, |
|
"eval_runtime": 153.0475, |
|
"eval_samples_per_second": 36.956, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.3262024361669689, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.7808749516066589, |
|
"eval_loss": 0.41182050108909607, |
|
"eval_runtime": 152.4839, |
|
"eval_samples_per_second": 37.092, |
|
"eval_steps_per_second": 4.637, |
|
"eval_wer": 0.322864341769511, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.800232288037166, |
|
"eval_loss": 0.4134317636489868, |
|
"eval_runtime": 152.6353, |
|
"eval_samples_per_second": 37.056, |
|
"eval_steps_per_second": 4.632, |
|
"eval_wer": 0.3232334579769222, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.8195896244676733, |
|
"eval_loss": 0.4126824736595154, |
|
"eval_runtime": 152.5246, |
|
"eval_samples_per_second": 37.083, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.3209064210171559, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"grad_norm": 1.0012460947036743, |
|
"learning_rate": 1.6105263157894736e-05, |
|
"loss": 0.3665, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8389469608981805, |
|
"eval_loss": 0.41083237528800964, |
|
"eval_runtime": 152.9993, |
|
"eval_samples_per_second": 36.967, |
|
"eval_steps_per_second": 4.621, |
|
"eval_wer": 0.32109900338624, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.8583042973286876, |
|
"eval_loss": 0.4090138077735901, |
|
"eval_runtime": 152.5291, |
|
"eval_samples_per_second": 37.081, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.3199114121102213, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8776616337591947, |
|
"eval_loss": 0.407578706741333, |
|
"eval_runtime": 153.0711, |
|
"eval_samples_per_second": 36.95, |
|
"eval_steps_per_second": 4.619, |
|
"eval_wer": 0.32087432395564186, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.897018970189702, |
|
"eval_loss": 0.40649694204330444, |
|
"eval_runtime": 154.4136, |
|
"eval_samples_per_second": 36.629, |
|
"eval_steps_per_second": 4.579, |
|
"eval_wer": 0.31981512092567926, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.916376306620209, |
|
"eval_loss": 0.40620651841163635, |
|
"eval_runtime": 153.7508, |
|
"eval_samples_per_second": 36.787, |
|
"eval_steps_per_second": 4.598, |
|
"eval_wer": 0.31923737381842693, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"grad_norm": 0.7244949340820312, |
|
"learning_rate": 3.157894736842105e-07, |
|
"loss": 0.3698, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"eval_loss": 0.4060620963573456, |
|
"eval_runtime": 153.976, |
|
"eval_samples_per_second": 36.733, |
|
"eval_steps_per_second": 4.592, |
|
"eval_wer": 0.31928551941069794, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"step": 10000, |
|
"total_flos": 1.1255918428180738e+19, |
|
"train_loss": 0.7262251342773437, |
|
"train_runtime": 19143.2125, |
|
"train_samples_per_second": 4.179, |
|
"train_steps_per_second": 0.522 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"total_flos": 1.1255918428180738e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|