wav2vec2-xlsr-53-ft-btb-ccv-cy / trainer_state.json
DewiBrynJones's picture
End of training
55efd32 verified
raw
history blame
29.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9357336430507162,
"eval_steps": 100,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019357336430507164,
"eval_loss": 3.5727736949920654,
"eval_runtime": 146.249,
"eval_samples_per_second": 38.674,
"eval_steps_per_second": 4.834,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.03871467286101433,
"eval_loss": 3.076800584793091,
"eval_runtime": 143.9591,
"eval_samples_per_second": 39.289,
"eval_steps_per_second": 4.911,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.05807200929152149,
"eval_loss": 3.500979423522949,
"eval_runtime": 144.2352,
"eval_samples_per_second": 39.214,
"eval_steps_per_second": 4.902,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 0.07742934572202866,
"eval_loss": 2.0594074726104736,
"eval_runtime": 144.7975,
"eval_samples_per_second": 39.061,
"eval_steps_per_second": 4.883,
"eval_wer": 0.9899857168076263,
"step": 400
},
{
"epoch": 0.09678668215253582,
"grad_norm": 3.567307710647583,
"learning_rate": 0.00029759999999999997,
"loss": 4.06,
"step": 500
},
{
"epoch": 0.09678668215253582,
"eval_loss": 1.4703481197357178,
"eval_runtime": 145.0673,
"eval_samples_per_second": 38.989,
"eval_steps_per_second": 4.874,
"eval_wer": 0.8800211840605993,
"step": 500
},
{
"epoch": 0.11614401858304298,
"eval_loss": 1.2463648319244385,
"eval_runtime": 146.5204,
"eval_samples_per_second": 38.602,
"eval_steps_per_second": 4.825,
"eval_wer": 0.8296608945451044,
"step": 600
},
{
"epoch": 0.13550135501355012,
"eval_loss": 1.0686180591583252,
"eval_runtime": 145.9363,
"eval_samples_per_second": 38.757,
"eval_steps_per_second": 4.845,
"eval_wer": 0.7492738039832453,
"step": 700
},
{
"epoch": 0.1548586914440573,
"eval_loss": 1.006879448890686,
"eval_runtime": 146.414,
"eval_samples_per_second": 38.63,
"eval_steps_per_second": 4.829,
"eval_wer": 0.7116239508273018,
"step": 800
},
{
"epoch": 0.17421602787456447,
"eval_loss": 0.936712920665741,
"eval_runtime": 146.8805,
"eval_samples_per_second": 38.507,
"eval_steps_per_second": 4.813,
"eval_wer": 0.6887868915600777,
"step": 900
},
{
"epoch": 0.19357336430507163,
"grad_norm": 2.5543222427368164,
"learning_rate": 0.0002844,
"loss": 1.0399,
"step": 1000
},
{
"epoch": 0.19357336430507163,
"eval_loss": 0.8960636854171753,
"eval_runtime": 146.9745,
"eval_samples_per_second": 38.483,
"eval_steps_per_second": 4.81,
"eval_wer": 0.6741987771019563,
"step": 1000
},
{
"epoch": 0.2129307007355788,
"eval_loss": 0.896744430065155,
"eval_runtime": 146.7401,
"eval_samples_per_second": 38.544,
"eval_steps_per_second": 4.818,
"eval_wer": 0.6412671919885734,
"step": 1100
},
{
"epoch": 0.23228803716608595,
"eval_loss": 0.8311247825622559,
"eval_runtime": 145.8768,
"eval_samples_per_second": 38.772,
"eval_steps_per_second": 4.847,
"eval_wer": 0.6152685721622185,
"step": 1200
},
{
"epoch": 0.2516453735965931,
"eval_loss": 0.8018700480461121,
"eval_runtime": 146.5709,
"eval_samples_per_second": 38.589,
"eval_steps_per_second": 4.824,
"eval_wer": 0.5965238882380318,
"step": 1300
},
{
"epoch": 0.27100271002710025,
"eval_loss": 0.7925447225570679,
"eval_runtime": 146.4405,
"eval_samples_per_second": 38.623,
"eval_steps_per_second": 4.828,
"eval_wer": 0.5926561923255926,
"step": 1400
},
{
"epoch": 0.29036004645760743,
"grad_norm": 2.373326539993286,
"learning_rate": 0.00026861052631578947,
"loss": 0.8395,
"step": 1500
},
{
"epoch": 0.29036004645760743,
"eval_loss": 0.8164969086647034,
"eval_runtime": 151.4209,
"eval_samples_per_second": 37.353,
"eval_steps_per_second": 4.669,
"eval_wer": 0.5986743913594711,
"step": 1500
},
{
"epoch": 0.3097173828881146,
"eval_loss": 0.7696186304092407,
"eval_runtime": 147.0556,
"eval_samples_per_second": 38.462,
"eval_steps_per_second": 4.808,
"eval_wer": 0.6150278442008634,
"step": 1600
},
{
"epoch": 0.32907471931862176,
"eval_loss": 0.7454735636711121,
"eval_runtime": 148.1475,
"eval_samples_per_second": 38.178,
"eval_steps_per_second": 4.772,
"eval_wer": 0.5624207603793873,
"step": 1700
},
{
"epoch": 0.34843205574912894,
"eval_loss": 0.7681124806404114,
"eval_runtime": 147.9355,
"eval_samples_per_second": 38.233,
"eval_steps_per_second": 4.779,
"eval_wer": 0.5684068623517518,
"step": 1800
},
{
"epoch": 0.3677893921796361,
"eval_loss": 0.7292491793632507,
"eval_runtime": 148.2347,
"eval_samples_per_second": 38.156,
"eval_steps_per_second": 4.769,
"eval_wer": 0.5609282470189854,
"step": 1900
},
{
"epoch": 0.38714672861014326,
"grad_norm": 2.988316059112549,
"learning_rate": 0.0002528210526315789,
"loss": 0.7574,
"step": 2000
},
{
"epoch": 0.38714672861014326,
"eval_loss": 0.7304644584655762,
"eval_runtime": 148.3775,
"eval_samples_per_second": 38.119,
"eval_steps_per_second": 4.765,
"eval_wer": 0.5534014860939481,
"step": 2000
},
{
"epoch": 0.4065040650406504,
"eval_loss": 0.7095713019371033,
"eval_runtime": 148.1439,
"eval_samples_per_second": 38.179,
"eval_steps_per_second": 4.772,
"eval_wer": 0.5363418978992474,
"step": 2100
},
{
"epoch": 0.4258614014711576,
"eval_loss": 0.7107743620872498,
"eval_runtime": 147.443,
"eval_samples_per_second": 38.361,
"eval_steps_per_second": 4.795,
"eval_wer": 0.5572370849448733,
"step": 2200
},
{
"epoch": 0.4452187379016647,
"eval_loss": 0.6702781319618225,
"eval_runtime": 147.3568,
"eval_samples_per_second": 38.383,
"eval_steps_per_second": 4.798,
"eval_wer": 0.5175330198520326,
"step": 2300
},
{
"epoch": 0.4645760743321719,
"eval_loss": 0.6596451997756958,
"eval_runtime": 148.5753,
"eval_samples_per_second": 38.068,
"eval_steps_per_second": 4.759,
"eval_wer": 0.514885012277126,
"step": 2400
},
{
"epoch": 0.48393341076267904,
"grad_norm": 3.3213086128234863,
"learning_rate": 0.0002370315789473684,
"loss": 0.6864,
"step": 2500
},
{
"epoch": 0.48393341076267904,
"eval_loss": 0.6845841407775879,
"eval_runtime": 149.4982,
"eval_samples_per_second": 37.833,
"eval_steps_per_second": 4.729,
"eval_wer": 0.5336457447320698,
"step": 2500
},
{
"epoch": 0.5032907471931862,
"eval_loss": 0.6666129231452942,
"eval_runtime": 148.0482,
"eval_samples_per_second": 38.204,
"eval_steps_per_second": 4.775,
"eval_wer": 0.5285744090128549,
"step": 2600
},
{
"epoch": 0.5226480836236934,
"eval_loss": 0.6390946507453918,
"eval_runtime": 148.4402,
"eval_samples_per_second": 38.103,
"eval_steps_per_second": 4.763,
"eval_wer": 0.4949366885461636,
"step": 2700
},
{
"epoch": 0.5420054200542005,
"eval_loss": 0.6295592188835144,
"eval_runtime": 147.8141,
"eval_samples_per_second": 38.264,
"eval_steps_per_second": 4.783,
"eval_wer": 0.4989648697661729,
"step": 2800
},
{
"epoch": 0.5613627564847077,
"eval_loss": 0.6291782855987549,
"eval_runtime": 148.1212,
"eval_samples_per_second": 38.185,
"eval_steps_per_second": 4.773,
"eval_wer": 0.4957391150840141,
"step": 2900
},
{
"epoch": 0.5807200929152149,
"grad_norm": 5.012236595153809,
"learning_rate": 0.00022124210526315786,
"loss": 0.6734,
"step": 3000
},
{
"epoch": 0.5807200929152149,
"eval_loss": 0.6164219975471497,
"eval_runtime": 148.0479,
"eval_samples_per_second": 38.204,
"eval_steps_per_second": 4.775,
"eval_wer": 0.47652902376787404,
"step": 3000
},
{
"epoch": 0.6000774293457221,
"eval_loss": 0.6179572343826294,
"eval_runtime": 148.2452,
"eval_samples_per_second": 38.153,
"eval_steps_per_second": 4.769,
"eval_wer": 0.4777808091669208,
"step": 3100
},
{
"epoch": 0.6194347657762292,
"eval_loss": 0.6132367849349976,
"eval_runtime": 148.4317,
"eval_samples_per_second": 38.105,
"eval_steps_per_second": 4.763,
"eval_wer": 0.49086036173388325,
"step": 3200
},
{
"epoch": 0.6387921022067363,
"eval_loss": 0.6107444763183594,
"eval_runtime": 148.2189,
"eval_samples_per_second": 38.16,
"eval_steps_per_second": 4.77,
"eval_wer": 0.4683442730817994,
"step": 3300
},
{
"epoch": 0.6581494386372435,
"eval_loss": 0.6068131327629089,
"eval_runtime": 147.7251,
"eval_samples_per_second": 38.287,
"eval_steps_per_second": 4.786,
"eval_wer": 0.4748760250999021,
"step": 3400
},
{
"epoch": 0.6775067750677507,
"grad_norm": 3.184985399246216,
"learning_rate": 0.00020545263157894736,
"loss": 0.6433,
"step": 3500
},
{
"epoch": 0.6775067750677507,
"eval_loss": 0.6008120775222778,
"eval_runtime": 147.947,
"eval_samples_per_second": 38.23,
"eval_steps_per_second": 4.779,
"eval_wer": 0.47725120765193946,
"step": 3500
},
{
"epoch": 0.6968641114982579,
"eval_loss": 0.5916668772697449,
"eval_runtime": 147.0363,
"eval_samples_per_second": 38.467,
"eval_steps_per_second": 4.808,
"eval_wer": 0.4656320713838648,
"step": 3600
},
{
"epoch": 0.716221447928765,
"eval_loss": 0.5885007381439209,
"eval_runtime": 148.9484,
"eval_samples_per_second": 37.973,
"eval_steps_per_second": 4.747,
"eval_wer": 0.4600953282726966,
"step": 3700
},
{
"epoch": 0.7355787843592722,
"eval_loss": 0.5848101377487183,
"eval_runtime": 148.7388,
"eval_samples_per_second": 38.026,
"eval_steps_per_second": 4.753,
"eval_wer": 0.44823546404326686,
"step": 3800
},
{
"epoch": 0.7549361207897793,
"eval_loss": 0.5852195620536804,
"eval_runtime": 148.3227,
"eval_samples_per_second": 38.133,
"eval_steps_per_second": 4.767,
"eval_wer": 0.44963168621912664,
"step": 3900
},
{
"epoch": 0.7742934572202865,
"grad_norm": 4.9515814781188965,
"learning_rate": 0.00018966315789473683,
"loss": 0.6217,
"step": 4000
},
{
"epoch": 0.7742934572202865,
"eval_loss": 0.577220618724823,
"eval_runtime": 147.6504,
"eval_samples_per_second": 38.307,
"eval_steps_per_second": 4.788,
"eval_wer": 0.44163951790213607,
"step": 4000
},
{
"epoch": 0.7936507936507936,
"eval_loss": 0.56705242395401,
"eval_runtime": 152.2357,
"eval_samples_per_second": 37.153,
"eval_steps_per_second": 4.644,
"eval_wer": 0.44691948452119207,
"step": 4100
},
{
"epoch": 0.8130081300813008,
"eval_loss": 0.5668296813964844,
"eval_runtime": 148.0111,
"eval_samples_per_second": 38.213,
"eval_steps_per_second": 4.777,
"eval_wer": 0.4462614947601547,
"step": 4200
},
{
"epoch": 0.832365466511808,
"eval_loss": 0.5557947754859924,
"eval_runtime": 149.4281,
"eval_samples_per_second": 37.851,
"eval_steps_per_second": 4.731,
"eval_wer": 0.44006676188794913,
"step": 4300
},
{
"epoch": 0.8517228029423152,
"eval_loss": 0.5651959776878357,
"eval_runtime": 149.3956,
"eval_samples_per_second": 37.859,
"eval_steps_per_second": 4.732,
"eval_wer": 0.4306783713950988,
"step": 4400
},
{
"epoch": 0.8710801393728222,
"grad_norm": 3.5483193397521973,
"learning_rate": 0.0001738736842105263,
"loss": 0.5954,
"step": 4500
},
{
"epoch": 0.8710801393728222,
"eval_loss": 0.5561267733573914,
"eval_runtime": 149.9212,
"eval_samples_per_second": 37.726,
"eval_steps_per_second": 4.716,
"eval_wer": 0.4307265169873698,
"step": 4500
},
{
"epoch": 0.8904374758033294,
"eval_loss": 0.5431749820709229,
"eval_runtime": 149.9454,
"eval_samples_per_second": 37.72,
"eval_steps_per_second": 4.715,
"eval_wer": 0.420648039671968,
"step": 4600
},
{
"epoch": 0.9097948122338366,
"eval_loss": 0.5294374823570251,
"eval_runtime": 148.9794,
"eval_samples_per_second": 37.965,
"eval_steps_per_second": 4.746,
"eval_wer": 0.41371507438494004,
"step": 4700
},
{
"epoch": 0.9291521486643438,
"eval_loss": 0.5444126725196838,
"eval_runtime": 148.5962,
"eval_samples_per_second": 38.063,
"eval_steps_per_second": 4.758,
"eval_wer": 0.4209529617563512,
"step": 4800
},
{
"epoch": 0.948509485094851,
"eval_loss": 0.5291473269462585,
"eval_runtime": 150.1832,
"eval_samples_per_second": 37.661,
"eval_steps_per_second": 4.708,
"eval_wer": 0.4156569466065382,
"step": 4900
},
{
"epoch": 0.9678668215253581,
"grad_norm": 3.1595053672790527,
"learning_rate": 0.0001581157894736842,
"loss": 0.5663,
"step": 5000
},
{
"epoch": 0.9678668215253581,
"eval_loss": 0.5428867340087891,
"eval_runtime": 149.6435,
"eval_samples_per_second": 37.797,
"eval_steps_per_second": 4.725,
"eval_wer": 0.4139558023462952,
"step": 5000
},
{
"epoch": 0.9872241579558653,
"eval_loss": 0.5208781361579895,
"eval_runtime": 149.0703,
"eval_samples_per_second": 37.942,
"eval_steps_per_second": 4.743,
"eval_wer": 0.41159666832501485,
"step": 5100
},
{
"epoch": 1.0065814943863725,
"eval_loss": 0.5281690359115601,
"eval_runtime": 148.6703,
"eval_samples_per_second": 38.044,
"eval_steps_per_second": 4.755,
"eval_wer": 0.40421434417679064,
"step": 5200
},
{
"epoch": 1.0259388308168795,
"eval_loss": 0.5118032693862915,
"eval_runtime": 148.0473,
"eval_samples_per_second": 38.204,
"eval_steps_per_second": 4.776,
"eval_wer": 0.39184092696313655,
"step": 5300
},
{
"epoch": 1.0452961672473868,
"eval_loss": 0.5089045166969299,
"eval_runtime": 147.9634,
"eval_samples_per_second": 38.226,
"eval_steps_per_second": 4.778,
"eval_wer": 0.39927139670363176,
"step": 5400
},
{
"epoch": 1.064653503677894,
"grad_norm": 2.1315221786499023,
"learning_rate": 0.0001423578947368421,
"loss": 0.4941,
"step": 5500
},
{
"epoch": 1.064653503677894,
"eval_loss": 0.5010989308357239,
"eval_runtime": 147.8753,
"eval_samples_per_second": 38.248,
"eval_steps_per_second": 4.781,
"eval_wer": 0.3921458490475197,
"step": 5500
},
{
"epoch": 1.084010840108401,
"eval_loss": 0.5022321343421936,
"eval_runtime": 148.3164,
"eval_samples_per_second": 38.135,
"eval_steps_per_second": 4.767,
"eval_wer": 0.38869541493476273,
"step": 5600
},
{
"epoch": 1.1033681765389083,
"eval_loss": 0.5066320896148682,
"eval_runtime": 148.554,
"eval_samples_per_second": 38.074,
"eval_steps_per_second": 4.759,
"eval_wer": 0.38526102935276274,
"step": 5700
},
{
"epoch": 1.1227255129694154,
"eval_loss": 0.49068546295166016,
"eval_runtime": 148.2455,
"eval_samples_per_second": 38.153,
"eval_steps_per_second": 4.769,
"eval_wer": 0.3815217216863796,
"step": 5800
},
{
"epoch": 1.1420828493999227,
"eval_loss": 0.4982084035873413,
"eval_runtime": 148.9817,
"eval_samples_per_second": 37.964,
"eval_steps_per_second": 4.746,
"eval_wer": 0.38086373192534223,
"step": 5900
},
{
"epoch": 1.1614401858304297,
"grad_norm": 0.8627763390541077,
"learning_rate": 0.00012656842105263156,
"loss": 0.4628,
"step": 6000
},
{
"epoch": 1.1614401858304297,
"eval_loss": 0.49128398299217224,
"eval_runtime": 149.7714,
"eval_samples_per_second": 37.764,
"eval_steps_per_second": 4.721,
"eval_wer": 0.38956203559564123,
"step": 6000
},
{
"epoch": 1.1807975222609368,
"eval_loss": 0.48260679841041565,
"eval_runtime": 149.8626,
"eval_samples_per_second": 37.741,
"eval_steps_per_second": 4.718,
"eval_wer": 0.373449310715604,
"step": 6100
},
{
"epoch": 1.2001548586914441,
"eval_loss": 0.4883708655834198,
"eval_runtime": 149.0462,
"eval_samples_per_second": 37.948,
"eval_steps_per_second": 4.743,
"eval_wer": 0.3739949607613423,
"step": 6200
},
{
"epoch": 1.2195121951219512,
"eval_loss": 0.4841243028640747,
"eval_runtime": 148.8948,
"eval_samples_per_second": 37.987,
"eval_steps_per_second": 4.748,
"eval_wer": 0.37004702219511804,
"step": 6300
},
{
"epoch": 1.2388695315524583,
"eval_loss": 0.4828014671802521,
"eval_runtime": 149.5102,
"eval_samples_per_second": 37.83,
"eval_steps_per_second": 4.729,
"eval_wer": 0.36971000304922086,
"step": 6400
},
{
"epoch": 1.2582268679829656,
"grad_norm": 1.5625278949737549,
"learning_rate": 0.00011077894736842105,
"loss": 0.4435,
"step": 6500
},
{
"epoch": 1.2582268679829656,
"eval_loss": 0.48161521553993225,
"eval_runtime": 148.9005,
"eval_samples_per_second": 37.985,
"eval_steps_per_second": 4.748,
"eval_wer": 0.37389866957680024,
"step": 6500
},
{
"epoch": 1.2775842044134726,
"eval_loss": 0.47928386926651,
"eval_runtime": 149.5106,
"eval_samples_per_second": 37.83,
"eval_steps_per_second": 4.729,
"eval_wer": 0.3673990146202115,
"step": 6600
},
{
"epoch": 1.29694154084398,
"eval_loss": 0.4744218587875366,
"eval_runtime": 148.9048,
"eval_samples_per_second": 37.984,
"eval_steps_per_second": 4.748,
"eval_wer": 0.36688546163598723,
"step": 6700
},
{
"epoch": 1.316298877274487,
"eval_loss": 0.46821942925453186,
"eval_runtime": 148.7411,
"eval_samples_per_second": 38.026,
"eval_steps_per_second": 4.753,
"eval_wer": 0.3608672626021088,
"step": 6800
},
{
"epoch": 1.3356562137049943,
"eval_loss": 0.46276068687438965,
"eval_runtime": 150.3036,
"eval_samples_per_second": 37.63,
"eval_steps_per_second": 4.704,
"eval_wer": 0.359438943364735,
"step": 6900
},
{
"epoch": 1.3550135501355014,
"grad_norm": 0.7794021964073181,
"learning_rate": 9.498947368421052e-05,
"loss": 0.4298,
"step": 7000
},
{
"epoch": 1.3550135501355014,
"eval_loss": 0.4662827253341675,
"eval_runtime": 149.5174,
"eval_samples_per_second": 37.828,
"eval_steps_per_second": 4.729,
"eval_wer": 0.3554428592062397,
"step": 7000
},
{
"epoch": 1.3743708865660085,
"eval_loss": 0.4656233489513397,
"eval_runtime": 148.8165,
"eval_samples_per_second": 38.007,
"eval_steps_per_second": 4.751,
"eval_wer": 0.3583797403347724,
"step": 7100
},
{
"epoch": 1.3937282229965158,
"eval_loss": 0.45931774377822876,
"eval_runtime": 150.2338,
"eval_samples_per_second": 37.648,
"eval_steps_per_second": 4.706,
"eval_wer": 0.35648601370544525,
"step": 7200
},
{
"epoch": 1.4130855594270229,
"eval_loss": 0.45989105105400085,
"eval_runtime": 150.9977,
"eval_samples_per_second": 37.458,
"eval_steps_per_second": 4.682,
"eval_wer": 0.3565823048899873,
"step": 7300
},
{
"epoch": 1.43244289585753,
"eval_loss": 0.46128061413764954,
"eval_runtime": 150.0246,
"eval_samples_per_second": 37.7,
"eval_steps_per_second": 4.713,
"eval_wer": 0.35208871627802474,
"step": 7400
},
{
"epoch": 1.4518002322880372,
"grad_norm": 0.7098228931427002,
"learning_rate": 7.92e-05,
"loss": 0.4292,
"step": 7500
},
{
"epoch": 1.4518002322880372,
"eval_loss": 0.4520701467990875,
"eval_runtime": 149.5493,
"eval_samples_per_second": 37.82,
"eval_steps_per_second": 4.728,
"eval_wer": 0.34745069088924907,
"step": 7500
},
{
"epoch": 1.4711575687185443,
"eval_loss": 0.4512416422367096,
"eval_runtime": 149.5055,
"eval_samples_per_second": 37.831,
"eval_steps_per_second": 4.729,
"eval_wer": 0.349071592495707,
"step": 7600
},
{
"epoch": 1.4905149051490514,
"eval_loss": 0.4478435218334198,
"eval_runtime": 149.0622,
"eval_samples_per_second": 37.944,
"eval_steps_per_second": 4.743,
"eval_wer": 0.35175169713212756,
"step": 7700
},
{
"epoch": 1.5098722415795587,
"eval_loss": 0.4415859878063202,
"eval_runtime": 148.899,
"eval_samples_per_second": 37.985,
"eval_steps_per_second": 4.748,
"eval_wer": 0.34213862720867905,
"step": 7800
},
{
"epoch": 1.5292295780100658,
"eval_loss": 0.4426974952220917,
"eval_runtime": 149.2815,
"eval_samples_per_second": 37.888,
"eval_steps_per_second": 4.736,
"eval_wer": 0.3458779348750622,
"step": 7900
},
{
"epoch": 1.5485869144405728,
"grad_norm": 1.0578420162200928,
"learning_rate": 6.344210526315788e-05,
"loss": 0.4072,
"step": 8000
},
{
"epoch": 1.5485869144405728,
"eval_loss": 0.43879374861717224,
"eval_runtime": 148.7049,
"eval_samples_per_second": 38.035,
"eval_steps_per_second": 4.754,
"eval_wer": 0.34565325544446407,
"step": 8000
},
{
"epoch": 1.5679442508710801,
"eval_loss": 0.44011563062667847,
"eval_runtime": 150.4046,
"eval_samples_per_second": 37.605,
"eval_steps_per_second": 4.701,
"eval_wer": 0.3453162362985669,
"step": 8100
},
{
"epoch": 1.5873015873015874,
"eval_loss": 0.43649429082870483,
"eval_runtime": 148.8759,
"eval_samples_per_second": 37.991,
"eval_steps_per_second": 4.749,
"eval_wer": 0.3434385581999968,
"step": 8200
},
{
"epoch": 1.6066589237320945,
"eval_loss": 0.4346481263637543,
"eval_runtime": 149.1351,
"eval_samples_per_second": 37.925,
"eval_steps_per_second": 4.741,
"eval_wer": 0.33974739612588467,
"step": 8300
},
{
"epoch": 1.6260162601626016,
"eval_loss": 0.43247029185295105,
"eval_runtime": 149.5691,
"eval_samples_per_second": 37.815,
"eval_steps_per_second": 4.727,
"eval_wer": 0.33604018552101556,
"step": 8400
},
{
"epoch": 1.645373596593109,
"grad_norm": 1.7964462041854858,
"learning_rate": 4.765263157894736e-05,
"loss": 0.3991,
"step": 8500
},
{
"epoch": 1.645373596593109,
"eval_loss": 0.43196219205856323,
"eval_runtime": 150.109,
"eval_samples_per_second": 37.679,
"eval_steps_per_second": 4.71,
"eval_wer": 0.3357834090289034,
"step": 8500
},
{
"epoch": 1.664730933023616,
"eval_loss": 0.42872872948646545,
"eval_runtime": 150.0401,
"eval_samples_per_second": 37.697,
"eval_steps_per_second": 4.712,
"eval_wer": 0.3354624384137632,
"step": 8600
},
{
"epoch": 1.684088269454123,
"eval_loss": 0.42928823828697205,
"eval_runtime": 149.2284,
"eval_samples_per_second": 37.902,
"eval_steps_per_second": 4.738,
"eval_wer": 0.33342427500762306,
"step": 8700
},
{
"epoch": 1.7034456058846303,
"eval_loss": 0.4271656274795532,
"eval_runtime": 149.5274,
"eval_samples_per_second": 37.826,
"eval_steps_per_second": 4.728,
"eval_wer": 0.333327983823081,
"step": 8800
},
{
"epoch": 1.7228029423151374,
"eval_loss": 0.4219857156276703,
"eval_runtime": 149.1865,
"eval_samples_per_second": 37.912,
"eval_steps_per_second": 4.739,
"eval_wer": 0.3302948115100063,
"step": 8900
},
{
"epoch": 1.7421602787456445,
"grad_norm": 1.7460029125213623,
"learning_rate": 3.189473684210526e-05,
"loss": 0.3916,
"step": 9000
},
{
"epoch": 1.7421602787456445,
"eval_loss": 0.4238153398036957,
"eval_runtime": 149.4733,
"eval_samples_per_second": 37.84,
"eval_steps_per_second": 4.73,
"eval_wer": 0.3291874628877726,
"step": 9000
},
{
"epoch": 1.7615176151761518,
"eval_loss": 0.42150619626045227,
"eval_runtime": 148.8948,
"eval_samples_per_second": 37.987,
"eval_steps_per_second": 4.748,
"eval_wer": 0.32812825985781,
"step": 9100
},
{
"epoch": 1.7808749516066589,
"eval_loss": 0.4176540672779083,
"eval_runtime": 150.0504,
"eval_samples_per_second": 37.694,
"eval_steps_per_second": 4.712,
"eval_wer": 0.3265876009051371,
"step": 9200
},
{
"epoch": 1.800232288037166,
"eval_loss": 0.41875413060188293,
"eval_runtime": 150.5043,
"eval_samples_per_second": 37.58,
"eval_steps_per_second": 4.698,
"eval_wer": 0.32573702877501565,
"step": 9300
},
{
"epoch": 1.8195896244676733,
"eval_loss": 0.41637665033340454,
"eval_runtime": 150.1757,
"eval_samples_per_second": 37.663,
"eval_steps_per_second": 4.708,
"eval_wer": 0.32469387427581003,
"step": 9400
},
{
"epoch": 1.8389469608981805,
"grad_norm": 0.8558129668235779,
"learning_rate": 1.6105263157894736e-05,
"loss": 0.3687,
"step": 9500
},
{
"epoch": 1.8389469608981805,
"eval_loss": 0.41629916429519653,
"eval_runtime": 149.3775,
"eval_samples_per_second": 37.864,
"eval_steps_per_second": 4.733,
"eval_wer": 0.3242766124761278,
"step": 9500
},
{
"epoch": 1.8583042973286876,
"eval_loss": 0.4140332341194153,
"eval_runtime": 149.5915,
"eval_samples_per_second": 37.81,
"eval_steps_per_second": 4.726,
"eval_wer": 0.3238914477379596,
"step": 9600
},
{
"epoch": 1.8776616337591947,
"eval_loss": 0.4132048189640045,
"eval_runtime": 150.4642,
"eval_samples_per_second": 37.59,
"eval_steps_per_second": 4.699,
"eval_wer": 0.324661777214296,
"step": 9700
},
{
"epoch": 1.897018970189702,
"eval_loss": 0.4122065007686615,
"eval_runtime": 150.0219,
"eval_samples_per_second": 37.701,
"eval_steps_per_second": 4.713,
"eval_wer": 0.3223668373160437,
"step": 9800
},
{
"epoch": 1.916376306620209,
"eval_loss": 0.41170838475227356,
"eval_runtime": 149.8162,
"eval_samples_per_second": 37.753,
"eval_steps_per_second": 4.719,
"eval_wer": 0.3218532843318194,
"step": 9900
},
{
"epoch": 1.9357336430507162,
"grad_norm": 2.01002836227417,
"learning_rate": 3.157894736842105e-07,
"loss": 0.3707,
"step": 10000
},
{
"epoch": 1.9357336430507162,
"eval_loss": 0.41177985072135925,
"eval_runtime": 148.9604,
"eval_samples_per_second": 37.97,
"eval_steps_per_second": 4.746,
"eval_wer": 0.32191747845484747,
"step": 10000
},
{
"epoch": 1.9357336430507162,
"step": 10000,
"total_flos": 1.1255918428180738e+19,
"train_loss": 0.7339932418823242,
"train_runtime": 18725.1494,
"train_samples_per_second": 4.272,
"train_steps_per_second": 0.534
}
],
"logging_steps": 500,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1255918428180738e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}