whisper2 / trainer_state.json
khaingsmon's picture
cheers again
cfa9ac9 verified
{
"best_metric": 28.05415617128463,
"best_model_checkpoint": "whisper2/checkpoint-430",
"epoch": 7.042253521126761,
"eval_steps": 10,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07042253521126761,
"grad_norm": 43.82194137573242,
"learning_rate": 1.0000000000000001e-07,
"loss": 3.9547,
"step": 5
},
{
"epoch": 0.14084507042253522,
"grad_norm": 45.53117370605469,
"learning_rate": 2.0000000000000002e-07,
"loss": 3.9553,
"step": 10
},
{
"epoch": 0.14084507042253522,
"eval_loss": 3.964555501937866,
"eval_runtime": 264.1292,
"eval_samples_per_second": 1.893,
"eval_steps_per_second": 0.239,
"eval_wer": 74.87405541561712,
"step": 10
},
{
"epoch": 0.2112676056338028,
"grad_norm": 46.162776947021484,
"learning_rate": 3.0000000000000004e-07,
"loss": 3.882,
"step": 15
},
{
"epoch": 0.28169014084507044,
"grad_norm": 46.07596206665039,
"learning_rate": 4.0000000000000003e-07,
"loss": 3.9548,
"step": 20
},
{
"epoch": 0.28169014084507044,
"eval_loss": 3.8793957233428955,
"eval_runtime": 256.6948,
"eval_samples_per_second": 1.948,
"eval_steps_per_second": 0.245,
"eval_wer": 77.67632241813602,
"step": 20
},
{
"epoch": 0.352112676056338,
"grad_norm": 45.13657760620117,
"learning_rate": 5.000000000000001e-07,
"loss": 3.9469,
"step": 25
},
{
"epoch": 0.4225352112676056,
"grad_norm": 44.565940856933594,
"learning_rate": 6.000000000000001e-07,
"loss": 3.8127,
"step": 30
},
{
"epoch": 0.4225352112676056,
"eval_loss": 3.740476608276367,
"eval_runtime": 257.3378,
"eval_samples_per_second": 1.943,
"eval_steps_per_second": 0.245,
"eval_wer": 76.4168765743073,
"step": 30
},
{
"epoch": 0.49295774647887325,
"grad_norm": 44.24871826171875,
"learning_rate": 7.000000000000001e-07,
"loss": 3.7507,
"step": 35
},
{
"epoch": 0.5633802816901409,
"grad_norm": 42.1717529296875,
"learning_rate": 8.000000000000001e-07,
"loss": 3.6178,
"step": 40
},
{
"epoch": 0.5633802816901409,
"eval_loss": 3.5547332763671875,
"eval_runtime": 256.8157,
"eval_samples_per_second": 1.947,
"eval_steps_per_second": 0.245,
"eval_wer": 75.31486146095719,
"step": 40
},
{
"epoch": 0.6338028169014085,
"grad_norm": 44.667205810546875,
"learning_rate": 9.000000000000001e-07,
"loss": 3.4825,
"step": 45
},
{
"epoch": 0.704225352112676,
"grad_norm": 43.76979064941406,
"learning_rate": 1.0000000000000002e-06,
"loss": 3.3992,
"step": 50
},
{
"epoch": 0.704225352112676,
"eval_loss": 3.323503255844116,
"eval_runtime": 255.1809,
"eval_samples_per_second": 1.959,
"eval_steps_per_second": 0.247,
"eval_wer": 70.27707808564232,
"step": 50
},
{
"epoch": 0.7746478873239436,
"grad_norm": 41.28179168701172,
"learning_rate": 1.1e-06,
"loss": 3.3124,
"step": 55
},
{
"epoch": 0.8450704225352113,
"grad_norm": 40.813392639160156,
"learning_rate": 1.2000000000000002e-06,
"loss": 3.1416,
"step": 60
},
{
"epoch": 0.8450704225352113,
"eval_loss": 3.040179491043091,
"eval_runtime": 255.4069,
"eval_samples_per_second": 1.958,
"eval_steps_per_second": 0.247,
"eval_wer": 67.85264483627203,
"step": 60
},
{
"epoch": 0.9154929577464789,
"grad_norm": 40.00282287597656,
"learning_rate": 1.3e-06,
"loss": 2.88,
"step": 65
},
{
"epoch": 0.9859154929577465,
"grad_norm": 40.60588455200195,
"learning_rate": 1.4000000000000001e-06,
"loss": 2.8052,
"step": 70
},
{
"epoch": 0.9859154929577465,
"eval_loss": 2.6852359771728516,
"eval_runtime": 254.3541,
"eval_samples_per_second": 1.966,
"eval_steps_per_second": 0.248,
"eval_wer": 65.96347607052897,
"step": 70
},
{
"epoch": 1.056338028169014,
"grad_norm": 44.205726623535156,
"learning_rate": 1.5e-06,
"loss": 2.4894,
"step": 75
},
{
"epoch": 1.1267605633802817,
"grad_norm": 40.45851516723633,
"learning_rate": 1.6000000000000001e-06,
"loss": 2.3513,
"step": 80
},
{
"epoch": 1.1267605633802817,
"eval_loss": 2.223541021347046,
"eval_runtime": 256.3144,
"eval_samples_per_second": 1.951,
"eval_steps_per_second": 0.246,
"eval_wer": 68.3249370277078,
"step": 80
},
{
"epoch": 1.1971830985915493,
"grad_norm": 37.049591064453125,
"learning_rate": 1.7000000000000002e-06,
"loss": 2.2021,
"step": 85
},
{
"epoch": 1.267605633802817,
"grad_norm": 32.15092468261719,
"learning_rate": 1.8000000000000001e-06,
"loss": 1.893,
"step": 90
},
{
"epoch": 1.267605633802817,
"eval_loss": 1.6707711219787598,
"eval_runtime": 254.2495,
"eval_samples_per_second": 1.967,
"eval_steps_per_second": 0.248,
"eval_wer": 63.822418136020154,
"step": 90
},
{
"epoch": 1.3380281690140845,
"grad_norm": 29.11300277709961,
"learning_rate": 1.9000000000000002e-06,
"loss": 1.6227,
"step": 95
},
{
"epoch": 1.408450704225352,
"grad_norm": 19.466663360595703,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.2871,
"step": 100
},
{
"epoch": 1.408450704225352,
"eval_loss": 1.164486050605774,
"eval_runtime": 254.5126,
"eval_samples_per_second": 1.965,
"eval_steps_per_second": 0.248,
"eval_wer": 63.25566750629723,
"step": 100
},
{
"epoch": 1.4788732394366197,
"grad_norm": 15.238794326782227,
"learning_rate": 2.1000000000000002e-06,
"loss": 1.09,
"step": 105
},
{
"epoch": 1.5492957746478875,
"grad_norm": 10.725071907043457,
"learning_rate": 2.2e-06,
"loss": 0.9146,
"step": 110
},
{
"epoch": 1.5492957746478875,
"eval_loss": 0.8784648776054382,
"eval_runtime": 256.185,
"eval_samples_per_second": 1.952,
"eval_steps_per_second": 0.246,
"eval_wer": 56.83249370277078,
"step": 110
},
{
"epoch": 1.619718309859155,
"grad_norm": 7.8202009201049805,
"learning_rate": 2.3000000000000004e-06,
"loss": 0.8882,
"step": 115
},
{
"epoch": 1.6901408450704225,
"grad_norm": 8.60835075378418,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.8044,
"step": 120
},
{
"epoch": 1.6901408450704225,
"eval_loss": 0.7906607985496521,
"eval_runtime": 255.9613,
"eval_samples_per_second": 1.953,
"eval_steps_per_second": 0.246,
"eval_wer": 46.977329974811084,
"step": 120
},
{
"epoch": 1.76056338028169,
"grad_norm": 9.780821800231934,
"learning_rate": 2.5e-06,
"loss": 0.6849,
"step": 125
},
{
"epoch": 1.8309859154929577,
"grad_norm": 9.33056926727295,
"learning_rate": 2.6e-06,
"loss": 0.6634,
"step": 130
},
{
"epoch": 1.8309859154929577,
"eval_loss": 0.7425487637519836,
"eval_runtime": 255.5846,
"eval_samples_per_second": 1.956,
"eval_steps_per_second": 0.246,
"eval_wer": 47.48110831234257,
"step": 130
},
{
"epoch": 1.9014084507042255,
"grad_norm": 8.966361999511719,
"learning_rate": 2.7000000000000004e-06,
"loss": 0.7421,
"step": 135
},
{
"epoch": 1.971830985915493,
"grad_norm": 7.636435031890869,
"learning_rate": 2.8000000000000003e-06,
"loss": 0.6722,
"step": 140
},
{
"epoch": 1.971830985915493,
"eval_loss": 0.7099979519844055,
"eval_runtime": 253.8483,
"eval_samples_per_second": 1.97,
"eval_steps_per_second": 0.248,
"eval_wer": 45.90680100755667,
"step": 140
},
{
"epoch": 2.0422535211267605,
"grad_norm": 8.085705757141113,
"learning_rate": 2.9e-06,
"loss": 0.6865,
"step": 145
},
{
"epoch": 2.112676056338028,
"grad_norm": 8.131012916564941,
"learning_rate": 3e-06,
"loss": 0.6823,
"step": 150
},
{
"epoch": 2.112676056338028,
"eval_loss": 0.6854478120803833,
"eval_runtime": 255.8245,
"eval_samples_per_second": 1.954,
"eval_steps_per_second": 0.246,
"eval_wer": 42.41183879093199,
"step": 150
},
{
"epoch": 2.183098591549296,
"grad_norm": 8.054609298706055,
"learning_rate": 3.1000000000000004e-06,
"loss": 0.6001,
"step": 155
},
{
"epoch": 2.2535211267605635,
"grad_norm": 6.9759063720703125,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.5802,
"step": 160
},
{
"epoch": 2.2535211267605635,
"eval_loss": 0.6659273505210876,
"eval_runtime": 254.855,
"eval_samples_per_second": 1.962,
"eval_steps_per_second": 0.247,
"eval_wer": 40.42821158690176,
"step": 160
},
{
"epoch": 2.323943661971831,
"grad_norm": 8.077522277832031,
"learning_rate": 3.3000000000000006e-06,
"loss": 0.6065,
"step": 165
},
{
"epoch": 2.3943661971830985,
"grad_norm": 6.6878228187561035,
"learning_rate": 3.4000000000000005e-06,
"loss": 0.6084,
"step": 170
},
{
"epoch": 2.3943661971830985,
"eval_loss": 0.6503352522850037,
"eval_runtime": 253.7567,
"eval_samples_per_second": 1.97,
"eval_steps_per_second": 0.248,
"eval_wer": 40.8375314861461,
"step": 170
},
{
"epoch": 2.464788732394366,
"grad_norm": 7.941697597503662,
"learning_rate": 3.5e-06,
"loss": 0.5972,
"step": 175
},
{
"epoch": 2.535211267605634,
"grad_norm": 7.986533164978027,
"learning_rate": 3.6000000000000003e-06,
"loss": 0.6038,
"step": 180
},
{
"epoch": 2.535211267605634,
"eval_loss": 0.6345599889755249,
"eval_runtime": 254.9306,
"eval_samples_per_second": 1.961,
"eval_steps_per_second": 0.247,
"eval_wer": 41.49874055415617,
"step": 180
},
{
"epoch": 2.6056338028169015,
"grad_norm": 6.744418144226074,
"learning_rate": 3.7e-06,
"loss": 0.5007,
"step": 185
},
{
"epoch": 2.676056338028169,
"grad_norm": 6.323821544647217,
"learning_rate": 3.8000000000000005e-06,
"loss": 0.5095,
"step": 190
},
{
"epoch": 2.676056338028169,
"eval_loss": 0.6247134804725647,
"eval_runtime": 257.1561,
"eval_samples_per_second": 1.944,
"eval_steps_per_second": 0.245,
"eval_wer": 42.03400503778337,
"step": 190
},
{
"epoch": 2.7464788732394365,
"grad_norm": 6.979465961456299,
"learning_rate": 3.900000000000001e-06,
"loss": 0.5943,
"step": 195
},
{
"epoch": 2.816901408450704,
"grad_norm": 6.675357818603516,
"learning_rate": 4.000000000000001e-06,
"loss": 0.5251,
"step": 200
},
{
"epoch": 2.816901408450704,
"eval_loss": 0.6154741644859314,
"eval_runtime": 255.2235,
"eval_samples_per_second": 1.959,
"eval_steps_per_second": 0.247,
"eval_wer": 39.357682619647356,
"step": 200
},
{
"epoch": 2.887323943661972,
"grad_norm": 6.802981853485107,
"learning_rate": 4.1e-06,
"loss": 0.5528,
"step": 205
},
{
"epoch": 2.9577464788732395,
"grad_norm": 6.836462497711182,
"learning_rate": 4.2000000000000004e-06,
"loss": 0.5699,
"step": 210
},
{
"epoch": 2.9577464788732395,
"eval_loss": 0.6045908331871033,
"eval_runtime": 254.5675,
"eval_samples_per_second": 1.964,
"eval_steps_per_second": 0.247,
"eval_wer": 38.350125944584384,
"step": 210
},
{
"epoch": 3.028169014084507,
"grad_norm": 6.114952087402344,
"learning_rate": 4.3e-06,
"loss": 0.478,
"step": 215
},
{
"epoch": 3.0985915492957745,
"grad_norm": 5.803236961364746,
"learning_rate": 4.4e-06,
"loss": 0.4839,
"step": 220
},
{
"epoch": 3.0985915492957745,
"eval_loss": 0.5944731831550598,
"eval_runtime": 254.5629,
"eval_samples_per_second": 1.964,
"eval_steps_per_second": 0.247,
"eval_wer": 37.27959697732997,
"step": 220
},
{
"epoch": 3.169014084507042,
"grad_norm": 5.95841646194458,
"learning_rate": 4.5e-06,
"loss": 0.4982,
"step": 225
},
{
"epoch": 3.23943661971831,
"grad_norm": 6.992792129516602,
"learning_rate": 4.600000000000001e-06,
"loss": 0.4843,
"step": 230
},
{
"epoch": 3.23943661971831,
"eval_loss": 0.5861312747001648,
"eval_runtime": 257.6573,
"eval_samples_per_second": 1.941,
"eval_steps_per_second": 0.245,
"eval_wer": 48.394206549118394,
"step": 230
},
{
"epoch": 3.3098591549295775,
"grad_norm": 5.872804164886475,
"learning_rate": 4.7e-06,
"loss": 0.4471,
"step": 235
},
{
"epoch": 3.380281690140845,
"grad_norm": 6.013182640075684,
"learning_rate": 4.800000000000001e-06,
"loss": 0.4538,
"step": 240
},
{
"epoch": 3.380281690140845,
"eval_loss": 0.5793710350990295,
"eval_runtime": 254.563,
"eval_samples_per_second": 1.964,
"eval_steps_per_second": 0.247,
"eval_wer": 34.66624685138539,
"step": 240
},
{
"epoch": 3.4507042253521125,
"grad_norm": 6.745495319366455,
"learning_rate": 4.9000000000000005e-06,
"loss": 0.4932,
"step": 245
},
{
"epoch": 3.52112676056338,
"grad_norm": 5.320774078369141,
"learning_rate": 5e-06,
"loss": 0.4741,
"step": 250
},
{
"epoch": 3.52112676056338,
"eval_loss": 0.5736850500106812,
"eval_runtime": 255.3883,
"eval_samples_per_second": 1.958,
"eval_steps_per_second": 0.247,
"eval_wer": 33.816120906801004,
"step": 250
},
{
"epoch": 3.591549295774648,
"grad_norm": 6.753683090209961,
"learning_rate": 5.1e-06,
"loss": 0.5025,
"step": 255
},
{
"epoch": 3.6619718309859155,
"grad_norm": 7.474066257476807,
"learning_rate": 5.2e-06,
"loss": 0.4542,
"step": 260
},
{
"epoch": 3.6619718309859155,
"eval_loss": 0.5662725567817688,
"eval_runtime": 255.3299,
"eval_samples_per_second": 1.958,
"eval_steps_per_second": 0.247,
"eval_wer": 41.97103274559194,
"step": 260
},
{
"epoch": 3.732394366197183,
"grad_norm": 5.626581192016602,
"learning_rate": 5.300000000000001e-06,
"loss": 0.4639,
"step": 265
},
{
"epoch": 3.802816901408451,
"grad_norm": 5.518383026123047,
"learning_rate": 5.400000000000001e-06,
"loss": 0.4163,
"step": 270
},
{
"epoch": 3.802816901408451,
"eval_loss": 0.5622957944869995,
"eval_runtime": 256.1828,
"eval_samples_per_second": 1.952,
"eval_steps_per_second": 0.246,
"eval_wer": 46.095717884130984,
"step": 270
},
{
"epoch": 3.873239436619718,
"grad_norm": 6.132260799407959,
"learning_rate": 5.500000000000001e-06,
"loss": 0.3922,
"step": 275
},
{
"epoch": 3.943661971830986,
"grad_norm": 5.8338942527771,
"learning_rate": 5.600000000000001e-06,
"loss": 0.3496,
"step": 280
},
{
"epoch": 3.943661971830986,
"eval_loss": 0.560535192489624,
"eval_runtime": 255.0016,
"eval_samples_per_second": 1.961,
"eval_steps_per_second": 0.247,
"eval_wer": 42.2544080604534,
"step": 280
},
{
"epoch": 4.014084507042254,
"grad_norm": 4.769192695617676,
"learning_rate": 5.7e-06,
"loss": 0.4389,
"step": 285
},
{
"epoch": 4.084507042253521,
"grad_norm": 5.79905366897583,
"learning_rate": 5.8e-06,
"loss": 0.3835,
"step": 290
},
{
"epoch": 4.084507042253521,
"eval_loss": 0.5556859374046326,
"eval_runtime": 255.3987,
"eval_samples_per_second": 1.958,
"eval_steps_per_second": 0.247,
"eval_wer": 41.656171284634766,
"step": 290
},
{
"epoch": 4.154929577464789,
"grad_norm": 5.353799819946289,
"learning_rate": 5.9e-06,
"loss": 0.385,
"step": 295
},
{
"epoch": 4.225352112676056,
"grad_norm": 5.164504528045654,
"learning_rate": 6e-06,
"loss": 0.3462,
"step": 300
},
{
"epoch": 4.225352112676056,
"eval_loss": 0.550672173500061,
"eval_runtime": 255.5806,
"eval_samples_per_second": 1.956,
"eval_steps_per_second": 0.246,
"eval_wer": 36.39798488664987,
"step": 300
},
{
"epoch": 4.295774647887324,
"grad_norm": 5.903466701507568,
"learning_rate": 6.1e-06,
"loss": 0.3733,
"step": 305
},
{
"epoch": 4.366197183098592,
"grad_norm": 6.308957099914551,
"learning_rate": 6.200000000000001e-06,
"loss": 0.3133,
"step": 310
},
{
"epoch": 4.366197183098592,
"eval_loss": 0.5452054738998413,
"eval_runtime": 255.9204,
"eval_samples_per_second": 1.954,
"eval_steps_per_second": 0.246,
"eval_wer": 42.56926952141058,
"step": 310
},
{
"epoch": 4.436619718309859,
"grad_norm": 4.767759323120117,
"learning_rate": 6.300000000000001e-06,
"loss": 0.3544,
"step": 315
},
{
"epoch": 4.507042253521127,
"grad_norm": 5.711643695831299,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.3638,
"step": 320
},
{
"epoch": 4.507042253521127,
"eval_loss": 0.5434854030609131,
"eval_runtime": 253.7024,
"eval_samples_per_second": 1.971,
"eval_steps_per_second": 0.248,
"eval_wer": 35.957178841309826,
"step": 320
},
{
"epoch": 4.577464788732394,
"grad_norm": 5.667789936065674,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.3974,
"step": 325
},
{
"epoch": 4.647887323943662,
"grad_norm": 6.108503341674805,
"learning_rate": 6.600000000000001e-06,
"loss": 0.3826,
"step": 330
},
{
"epoch": 4.647887323943662,
"eval_loss": 0.5396420955657959,
"eval_runtime": 252.7138,
"eval_samples_per_second": 1.979,
"eval_steps_per_second": 0.249,
"eval_wer": 31.95843828715365,
"step": 330
},
{
"epoch": 4.71830985915493,
"grad_norm": 5.889377117156982,
"learning_rate": 6.700000000000001e-06,
"loss": 0.3813,
"step": 335
},
{
"epoch": 4.788732394366197,
"grad_norm": 5.469658851623535,
"learning_rate": 6.800000000000001e-06,
"loss": 0.3581,
"step": 340
},
{
"epoch": 4.788732394366197,
"eval_loss": 0.5361477136611938,
"eval_runtime": 251.8728,
"eval_samples_per_second": 1.985,
"eval_steps_per_second": 0.25,
"eval_wer": 33.78463476070529,
"step": 340
},
{
"epoch": 4.859154929577465,
"grad_norm": 5.188804626464844,
"learning_rate": 6.9e-06,
"loss": 0.3351,
"step": 345
},
{
"epoch": 4.929577464788732,
"grad_norm": 5.103167533874512,
"learning_rate": 7e-06,
"loss": 0.3127,
"step": 350
},
{
"epoch": 4.929577464788732,
"eval_loss": 0.5339432954788208,
"eval_runtime": 252.7571,
"eval_samples_per_second": 1.978,
"eval_steps_per_second": 0.249,
"eval_wer": 37.342569269521405,
"step": 350
},
{
"epoch": 5.0,
"grad_norm": 9.485374450683594,
"learning_rate": 7.100000000000001e-06,
"loss": 0.3265,
"step": 355
},
{
"epoch": 5.070422535211268,
"grad_norm": 5.010895252227783,
"learning_rate": 7.2000000000000005e-06,
"loss": 0.2988,
"step": 360
},
{
"epoch": 5.070422535211268,
"eval_loss": 0.5347580909729004,
"eval_runtime": 253.3761,
"eval_samples_per_second": 1.973,
"eval_steps_per_second": 0.249,
"eval_wer": 38.727959697733,
"step": 360
},
{
"epoch": 5.140845070422535,
"grad_norm": 5.113419055938721,
"learning_rate": 7.3e-06,
"loss": 0.2953,
"step": 365
},
{
"epoch": 5.211267605633803,
"grad_norm": 5.5772247314453125,
"learning_rate": 7.4e-06,
"loss": 0.2807,
"step": 370
},
{
"epoch": 5.211267605633803,
"eval_loss": 0.5343714952468872,
"eval_runtime": 252.932,
"eval_samples_per_second": 1.977,
"eval_steps_per_second": 0.249,
"eval_wer": 35.51637279596977,
"step": 370
},
{
"epoch": 5.28169014084507,
"grad_norm": 5.650921821594238,
"learning_rate": 7.500000000000001e-06,
"loss": 0.3147,
"step": 375
},
{
"epoch": 5.352112676056338,
"grad_norm": 5.143499374389648,
"learning_rate": 7.600000000000001e-06,
"loss": 0.2612,
"step": 380
},
{
"epoch": 5.352112676056338,
"eval_loss": 0.5304917097091675,
"eval_runtime": 252.0942,
"eval_samples_per_second": 1.983,
"eval_steps_per_second": 0.25,
"eval_wer": 34.66624685138539,
"step": 380
},
{
"epoch": 5.422535211267606,
"grad_norm": 5.593881607055664,
"learning_rate": 7.7e-06,
"loss": 0.2606,
"step": 385
},
{
"epoch": 5.492957746478873,
"grad_norm": 5.4485392570495605,
"learning_rate": 7.800000000000002e-06,
"loss": 0.2762,
"step": 390
},
{
"epoch": 5.492957746478873,
"eval_loss": 0.5305802226066589,
"eval_runtime": 252.0179,
"eval_samples_per_second": 1.984,
"eval_steps_per_second": 0.25,
"eval_wer": 32.27329974811083,
"step": 390
},
{
"epoch": 5.563380281690141,
"grad_norm": 4.250403881072998,
"learning_rate": 7.9e-06,
"loss": 0.2609,
"step": 395
},
{
"epoch": 5.633802816901408,
"grad_norm": 5.564484596252441,
"learning_rate": 8.000000000000001e-06,
"loss": 0.299,
"step": 400
},
{
"epoch": 5.633802816901408,
"eval_loss": 0.5266876220703125,
"eval_runtime": 251.1581,
"eval_samples_per_second": 1.991,
"eval_steps_per_second": 0.251,
"eval_wer": 36.87027707808564,
"step": 400
},
{
"epoch": 5.704225352112676,
"grad_norm": 4.646668910980225,
"learning_rate": 8.1e-06,
"loss": 0.2368,
"step": 405
},
{
"epoch": 5.774647887323944,
"grad_norm": 5.00687313079834,
"learning_rate": 8.2e-06,
"loss": 0.2718,
"step": 410
},
{
"epoch": 5.774647887323944,
"eval_loss": 0.5231830477714539,
"eval_runtime": 252.6711,
"eval_samples_per_second": 1.979,
"eval_steps_per_second": 0.249,
"eval_wer": 41.68765743073048,
"step": 410
},
{
"epoch": 5.845070422535211,
"grad_norm": 4.078917503356934,
"learning_rate": 8.3e-06,
"loss": 0.252,
"step": 415
},
{
"epoch": 5.915492957746479,
"grad_norm": 4.877511501312256,
"learning_rate": 8.400000000000001e-06,
"loss": 0.2618,
"step": 420
},
{
"epoch": 5.915492957746479,
"eval_loss": 0.5207710266113281,
"eval_runtime": 252.1118,
"eval_samples_per_second": 1.983,
"eval_steps_per_second": 0.25,
"eval_wer": 34.09949622166247,
"step": 420
},
{
"epoch": 5.985915492957746,
"grad_norm": 5.141012191772461,
"learning_rate": 8.5e-06,
"loss": 0.3232,
"step": 425
},
{
"epoch": 6.056338028169014,
"grad_norm": 4.299196243286133,
"learning_rate": 8.6e-06,
"loss": 0.2121,
"step": 430
},
{
"epoch": 6.056338028169014,
"eval_loss": 0.5220197439193726,
"eval_runtime": 252.0399,
"eval_samples_per_second": 1.984,
"eval_steps_per_second": 0.25,
"eval_wer": 28.05415617128463,
"step": 430
},
{
"epoch": 6.126760563380282,
"grad_norm": 3.769075393676758,
"learning_rate": 8.700000000000001e-06,
"loss": 0.2119,
"step": 435
},
{
"epoch": 6.197183098591549,
"grad_norm": 4.311405181884766,
"learning_rate": 8.8e-06,
"loss": 0.1929,
"step": 440
},
{
"epoch": 6.197183098591549,
"eval_loss": 0.5256190299987793,
"eval_runtime": 253.2092,
"eval_samples_per_second": 1.975,
"eval_steps_per_second": 0.249,
"eval_wer": 35.79974811083124,
"step": 440
},
{
"epoch": 6.267605633802817,
"grad_norm": 3.735041618347168,
"learning_rate": 8.900000000000001e-06,
"loss": 0.2104,
"step": 445
},
{
"epoch": 6.338028169014084,
"grad_norm": 6.507180690765381,
"learning_rate": 9e-06,
"loss": 0.2504,
"step": 450
},
{
"epoch": 6.338028169014084,
"eval_loss": 0.529583215713501,
"eval_runtime": 252.3402,
"eval_samples_per_second": 1.981,
"eval_steps_per_second": 0.25,
"eval_wer": 32.87153652392947,
"step": 450
},
{
"epoch": 6.408450704225352,
"grad_norm": 4.1670355796813965,
"learning_rate": 9.100000000000001e-06,
"loss": 0.1931,
"step": 455
},
{
"epoch": 6.47887323943662,
"grad_norm": 4.260618209838867,
"learning_rate": 9.200000000000002e-06,
"loss": 0.2064,
"step": 460
},
{
"epoch": 6.47887323943662,
"eval_loss": 0.5265011191368103,
"eval_runtime": 253.5935,
"eval_samples_per_second": 1.972,
"eval_steps_per_second": 0.248,
"eval_wer": 35.3904282115869,
"step": 460
},
{
"epoch": 6.549295774647887,
"grad_norm": 4.580427169799805,
"learning_rate": 9.3e-06,
"loss": 0.2099,
"step": 465
},
{
"epoch": 6.619718309859155,
"grad_norm": 5.135242938995361,
"learning_rate": 9.4e-06,
"loss": 0.2044,
"step": 470
},
{
"epoch": 6.619718309859155,
"eval_loss": 0.5266779065132141,
"eval_runtime": 253.6172,
"eval_samples_per_second": 1.971,
"eval_steps_per_second": 0.248,
"eval_wer": 38.31863979848866,
"step": 470
},
{
"epoch": 6.690140845070422,
"grad_norm": 4.770451545715332,
"learning_rate": 9.5e-06,
"loss": 0.2118,
"step": 475
},
{
"epoch": 6.76056338028169,
"grad_norm": 4.276612758636475,
"learning_rate": 9.600000000000001e-06,
"loss": 0.1844,
"step": 480
},
{
"epoch": 6.76056338028169,
"eval_loss": 0.5231460332870483,
"eval_runtime": 253.7339,
"eval_samples_per_second": 1.971,
"eval_steps_per_second": 0.248,
"eval_wer": 35.107052896725435,
"step": 480
},
{
"epoch": 6.830985915492958,
"grad_norm": 6.741299152374268,
"learning_rate": 9.7e-06,
"loss": 0.2276,
"step": 485
},
{
"epoch": 6.901408450704225,
"grad_norm": 5.4448370933532715,
"learning_rate": 9.800000000000001e-06,
"loss": 0.1867,
"step": 490
},
{
"epoch": 6.901408450704225,
"eval_loss": 0.5235409140586853,
"eval_runtime": 252.1039,
"eval_samples_per_second": 1.983,
"eval_steps_per_second": 0.25,
"eval_wer": 31.580604534005037,
"step": 490
},
{
"epoch": 6.971830985915493,
"grad_norm": 5.26415491104126,
"learning_rate": 9.9e-06,
"loss": 0.2232,
"step": 495
},
{
"epoch": 7.042253521126761,
"grad_norm": 3.9737112522125244,
"learning_rate": 0.0,
"loss": 0.1562,
"step": 500
},
{
"epoch": 7.042253521126761,
"eval_loss": 0.5233400464057922,
"eval_runtime": 252.8742,
"eval_samples_per_second": 1.977,
"eval_steps_per_second": 0.249,
"eval_wer": 31.10831234256927,
"step": 500
},
{
"epoch": 7.042253521126761,
"step": 500,
"total_flos": 7.8022170722304e+17,
"train_loss": 0.9523631989955902,
"train_runtime": 13251.4495,
"train_samples_per_second": 2.415,
"train_steps_per_second": 0.038
}
],
"logging_steps": 5,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 10,
"total_flos": 7.8022170722304e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}