{ "best_metric": 28.05415617128463, "best_model_checkpoint": "whisper2/checkpoint-430", "epoch": 7.042253521126761, "eval_steps": 10, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07042253521126761, "grad_norm": 43.82194137573242, "learning_rate": 1.0000000000000001e-07, "loss": 3.9547, "step": 5 }, { "epoch": 0.14084507042253522, "grad_norm": 45.53117370605469, "learning_rate": 2.0000000000000002e-07, "loss": 3.9553, "step": 10 }, { "epoch": 0.14084507042253522, "eval_loss": 3.964555501937866, "eval_runtime": 264.1292, "eval_samples_per_second": 1.893, "eval_steps_per_second": 0.239, "eval_wer": 74.87405541561712, "step": 10 }, { "epoch": 0.2112676056338028, "grad_norm": 46.162776947021484, "learning_rate": 3.0000000000000004e-07, "loss": 3.882, "step": 15 }, { "epoch": 0.28169014084507044, "grad_norm": 46.07596206665039, "learning_rate": 4.0000000000000003e-07, "loss": 3.9548, "step": 20 }, { "epoch": 0.28169014084507044, "eval_loss": 3.8793957233428955, "eval_runtime": 256.6948, "eval_samples_per_second": 1.948, "eval_steps_per_second": 0.245, "eval_wer": 77.67632241813602, "step": 20 }, { "epoch": 0.352112676056338, "grad_norm": 45.13657760620117, "learning_rate": 5.000000000000001e-07, "loss": 3.9469, "step": 25 }, { "epoch": 0.4225352112676056, "grad_norm": 44.565940856933594, "learning_rate": 6.000000000000001e-07, "loss": 3.8127, "step": 30 }, { "epoch": 0.4225352112676056, "eval_loss": 3.740476608276367, "eval_runtime": 257.3378, "eval_samples_per_second": 1.943, "eval_steps_per_second": 0.245, "eval_wer": 76.4168765743073, "step": 30 }, { "epoch": 0.49295774647887325, "grad_norm": 44.24871826171875, "learning_rate": 7.000000000000001e-07, "loss": 3.7507, "step": 35 }, { "epoch": 0.5633802816901409, "grad_norm": 42.1717529296875, "learning_rate": 8.000000000000001e-07, "loss": 3.6178, "step": 40 }, { "epoch": 0.5633802816901409, "eval_loss": 3.5547332763671875, "eval_runtime": 256.8157, "eval_samples_per_second": 1.947, "eval_steps_per_second": 0.245, "eval_wer": 75.31486146095719, "step": 40 }, { "epoch": 0.6338028169014085, "grad_norm": 44.667205810546875, "learning_rate": 9.000000000000001e-07, "loss": 3.4825, "step": 45 }, { "epoch": 0.704225352112676, "grad_norm": 43.76979064941406, "learning_rate": 1.0000000000000002e-06, "loss": 3.3992, "step": 50 }, { "epoch": 0.704225352112676, "eval_loss": 3.323503255844116, "eval_runtime": 255.1809, "eval_samples_per_second": 1.959, "eval_steps_per_second": 0.247, "eval_wer": 70.27707808564232, "step": 50 }, { "epoch": 0.7746478873239436, "grad_norm": 41.28179168701172, "learning_rate": 1.1e-06, "loss": 3.3124, "step": 55 }, { "epoch": 0.8450704225352113, "grad_norm": 40.813392639160156, "learning_rate": 1.2000000000000002e-06, "loss": 3.1416, "step": 60 }, { "epoch": 0.8450704225352113, "eval_loss": 3.040179491043091, "eval_runtime": 255.4069, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.247, "eval_wer": 67.85264483627203, "step": 60 }, { "epoch": 0.9154929577464789, "grad_norm": 40.00282287597656, "learning_rate": 1.3e-06, "loss": 2.88, "step": 65 }, { "epoch": 0.9859154929577465, "grad_norm": 40.60588455200195, "learning_rate": 1.4000000000000001e-06, "loss": 2.8052, "step": 70 }, { "epoch": 0.9859154929577465, "eval_loss": 2.6852359771728516, "eval_runtime": 254.3541, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.248, "eval_wer": 65.96347607052897, "step": 70 }, { "epoch": 1.056338028169014, "grad_norm": 44.205726623535156, "learning_rate": 1.5e-06, "loss": 2.4894, "step": 75 }, { "epoch": 1.1267605633802817, "grad_norm": 40.45851516723633, "learning_rate": 1.6000000000000001e-06, "loss": 2.3513, "step": 80 }, { "epoch": 1.1267605633802817, "eval_loss": 2.223541021347046, "eval_runtime": 256.3144, "eval_samples_per_second": 1.951, "eval_steps_per_second": 0.246, "eval_wer": 68.3249370277078, "step": 80 }, { "epoch": 1.1971830985915493, "grad_norm": 37.049591064453125, "learning_rate": 1.7000000000000002e-06, "loss": 2.2021, "step": 85 }, { "epoch": 1.267605633802817, "grad_norm": 32.15092468261719, "learning_rate": 1.8000000000000001e-06, "loss": 1.893, "step": 90 }, { "epoch": 1.267605633802817, "eval_loss": 1.6707711219787598, "eval_runtime": 254.2495, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.248, "eval_wer": 63.822418136020154, "step": 90 }, { "epoch": 1.3380281690140845, "grad_norm": 29.11300277709961, "learning_rate": 1.9000000000000002e-06, "loss": 1.6227, "step": 95 }, { "epoch": 1.408450704225352, "grad_norm": 19.466663360595703, "learning_rate": 2.0000000000000003e-06, "loss": 1.2871, "step": 100 }, { "epoch": 1.408450704225352, "eval_loss": 1.164486050605774, "eval_runtime": 254.5126, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.248, "eval_wer": 63.25566750629723, "step": 100 }, { "epoch": 1.4788732394366197, "grad_norm": 15.238794326782227, "learning_rate": 2.1000000000000002e-06, "loss": 1.09, "step": 105 }, { "epoch": 1.5492957746478875, "grad_norm": 10.725071907043457, "learning_rate": 2.2e-06, "loss": 0.9146, "step": 110 }, { "epoch": 1.5492957746478875, "eval_loss": 0.8784648776054382, "eval_runtime": 256.185, "eval_samples_per_second": 1.952, "eval_steps_per_second": 0.246, "eval_wer": 56.83249370277078, "step": 110 }, { "epoch": 1.619718309859155, "grad_norm": 7.8202009201049805, "learning_rate": 2.3000000000000004e-06, "loss": 0.8882, "step": 115 }, { "epoch": 1.6901408450704225, "grad_norm": 8.60835075378418, "learning_rate": 2.4000000000000003e-06, "loss": 0.8044, "step": 120 }, { "epoch": 1.6901408450704225, "eval_loss": 0.7906607985496521, "eval_runtime": 255.9613, "eval_samples_per_second": 1.953, "eval_steps_per_second": 0.246, "eval_wer": 46.977329974811084, "step": 120 }, { "epoch": 1.76056338028169, "grad_norm": 9.780821800231934, "learning_rate": 2.5e-06, "loss": 0.6849, "step": 125 }, { "epoch": 1.8309859154929577, "grad_norm": 9.33056926727295, "learning_rate": 2.6e-06, "loss": 0.6634, "step": 130 }, { "epoch": 1.8309859154929577, "eval_loss": 0.7425487637519836, "eval_runtime": 255.5846, "eval_samples_per_second": 1.956, "eval_steps_per_second": 0.246, "eval_wer": 47.48110831234257, "step": 130 }, { "epoch": 1.9014084507042255, "grad_norm": 8.966361999511719, "learning_rate": 2.7000000000000004e-06, "loss": 0.7421, "step": 135 }, { "epoch": 1.971830985915493, "grad_norm": 7.636435031890869, "learning_rate": 2.8000000000000003e-06, "loss": 0.6722, "step": 140 }, { "epoch": 1.971830985915493, "eval_loss": 0.7099979519844055, "eval_runtime": 253.8483, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.248, "eval_wer": 45.90680100755667, "step": 140 }, { "epoch": 2.0422535211267605, "grad_norm": 8.085705757141113, "learning_rate": 2.9e-06, "loss": 0.6865, "step": 145 }, { "epoch": 2.112676056338028, "grad_norm": 8.131012916564941, "learning_rate": 3e-06, "loss": 0.6823, "step": 150 }, { "epoch": 2.112676056338028, "eval_loss": 0.6854478120803833, "eval_runtime": 255.8245, "eval_samples_per_second": 1.954, "eval_steps_per_second": 0.246, "eval_wer": 42.41183879093199, "step": 150 }, { "epoch": 2.183098591549296, "grad_norm": 8.054609298706055, "learning_rate": 3.1000000000000004e-06, "loss": 0.6001, "step": 155 }, { "epoch": 2.2535211267605635, "grad_norm": 6.9759063720703125, "learning_rate": 3.2000000000000003e-06, "loss": 0.5802, "step": 160 }, { "epoch": 2.2535211267605635, "eval_loss": 0.6659273505210876, "eval_runtime": 254.855, "eval_samples_per_second": 1.962, "eval_steps_per_second": 0.247, "eval_wer": 40.42821158690176, "step": 160 }, { "epoch": 2.323943661971831, "grad_norm": 8.077522277832031, "learning_rate": 3.3000000000000006e-06, "loss": 0.6065, "step": 165 }, { "epoch": 2.3943661971830985, "grad_norm": 6.6878228187561035, "learning_rate": 3.4000000000000005e-06, "loss": 0.6084, "step": 170 }, { "epoch": 2.3943661971830985, "eval_loss": 0.6503352522850037, "eval_runtime": 253.7567, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.248, "eval_wer": 40.8375314861461, "step": 170 }, { "epoch": 2.464788732394366, "grad_norm": 7.941697597503662, "learning_rate": 3.5e-06, "loss": 0.5972, "step": 175 }, { "epoch": 2.535211267605634, "grad_norm": 7.986533164978027, "learning_rate": 3.6000000000000003e-06, "loss": 0.6038, "step": 180 }, { "epoch": 2.535211267605634, "eval_loss": 0.6345599889755249, "eval_runtime": 254.9306, "eval_samples_per_second": 1.961, "eval_steps_per_second": 0.247, "eval_wer": 41.49874055415617, "step": 180 }, { "epoch": 2.6056338028169015, "grad_norm": 6.744418144226074, "learning_rate": 3.7e-06, "loss": 0.5007, "step": 185 }, { "epoch": 2.676056338028169, "grad_norm": 6.323821544647217, "learning_rate": 3.8000000000000005e-06, "loss": 0.5095, "step": 190 }, { "epoch": 2.676056338028169, "eval_loss": 0.6247134804725647, "eval_runtime": 257.1561, "eval_samples_per_second": 1.944, "eval_steps_per_second": 0.245, "eval_wer": 42.03400503778337, "step": 190 }, { "epoch": 2.7464788732394365, "grad_norm": 6.979465961456299, "learning_rate": 3.900000000000001e-06, "loss": 0.5943, "step": 195 }, { "epoch": 2.816901408450704, "grad_norm": 6.675357818603516, "learning_rate": 4.000000000000001e-06, "loss": 0.5251, "step": 200 }, { "epoch": 2.816901408450704, "eval_loss": 0.6154741644859314, "eval_runtime": 255.2235, "eval_samples_per_second": 1.959, "eval_steps_per_second": 0.247, "eval_wer": 39.357682619647356, "step": 200 }, { "epoch": 2.887323943661972, "grad_norm": 6.802981853485107, "learning_rate": 4.1e-06, "loss": 0.5528, "step": 205 }, { "epoch": 2.9577464788732395, "grad_norm": 6.836462497711182, "learning_rate": 4.2000000000000004e-06, "loss": 0.5699, "step": 210 }, { "epoch": 2.9577464788732395, "eval_loss": 0.6045908331871033, "eval_runtime": 254.5675, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.247, "eval_wer": 38.350125944584384, "step": 210 }, { "epoch": 3.028169014084507, "grad_norm": 6.114952087402344, "learning_rate": 4.3e-06, "loss": 0.478, "step": 215 }, { "epoch": 3.0985915492957745, "grad_norm": 5.803236961364746, "learning_rate": 4.4e-06, "loss": 0.4839, "step": 220 }, { "epoch": 3.0985915492957745, "eval_loss": 0.5944731831550598, "eval_runtime": 254.5629, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.247, "eval_wer": 37.27959697732997, "step": 220 }, { "epoch": 3.169014084507042, "grad_norm": 5.95841646194458, "learning_rate": 4.5e-06, "loss": 0.4982, "step": 225 }, { "epoch": 3.23943661971831, "grad_norm": 6.992792129516602, "learning_rate": 4.600000000000001e-06, "loss": 0.4843, "step": 230 }, { "epoch": 3.23943661971831, "eval_loss": 0.5861312747001648, "eval_runtime": 257.6573, "eval_samples_per_second": 1.941, "eval_steps_per_second": 0.245, "eval_wer": 48.394206549118394, "step": 230 }, { "epoch": 3.3098591549295775, "grad_norm": 5.872804164886475, "learning_rate": 4.7e-06, "loss": 0.4471, "step": 235 }, { "epoch": 3.380281690140845, "grad_norm": 6.013182640075684, "learning_rate": 4.800000000000001e-06, "loss": 0.4538, "step": 240 }, { "epoch": 3.380281690140845, "eval_loss": 0.5793710350990295, "eval_runtime": 254.563, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.247, "eval_wer": 34.66624685138539, "step": 240 }, { "epoch": 3.4507042253521125, "grad_norm": 6.745495319366455, "learning_rate": 4.9000000000000005e-06, "loss": 0.4932, "step": 245 }, { "epoch": 3.52112676056338, "grad_norm": 5.320774078369141, "learning_rate": 5e-06, "loss": 0.4741, "step": 250 }, { "epoch": 3.52112676056338, "eval_loss": 0.5736850500106812, "eval_runtime": 255.3883, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.247, "eval_wer": 33.816120906801004, "step": 250 }, { "epoch": 3.591549295774648, "grad_norm": 6.753683090209961, "learning_rate": 5.1e-06, "loss": 0.5025, "step": 255 }, { "epoch": 3.6619718309859155, "grad_norm": 7.474066257476807, "learning_rate": 5.2e-06, "loss": 0.4542, "step": 260 }, { "epoch": 3.6619718309859155, "eval_loss": 0.5662725567817688, "eval_runtime": 255.3299, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.247, "eval_wer": 41.97103274559194, "step": 260 }, { "epoch": 3.732394366197183, "grad_norm": 5.626581192016602, "learning_rate": 5.300000000000001e-06, "loss": 0.4639, "step": 265 }, { "epoch": 3.802816901408451, "grad_norm": 5.518383026123047, "learning_rate": 5.400000000000001e-06, "loss": 0.4163, "step": 270 }, { "epoch": 3.802816901408451, "eval_loss": 0.5622957944869995, "eval_runtime": 256.1828, "eval_samples_per_second": 1.952, "eval_steps_per_second": 0.246, "eval_wer": 46.095717884130984, "step": 270 }, { "epoch": 3.873239436619718, "grad_norm": 6.132260799407959, "learning_rate": 5.500000000000001e-06, "loss": 0.3922, "step": 275 }, { "epoch": 3.943661971830986, "grad_norm": 5.8338942527771, "learning_rate": 5.600000000000001e-06, "loss": 0.3496, "step": 280 }, { "epoch": 3.943661971830986, "eval_loss": 0.560535192489624, "eval_runtime": 255.0016, "eval_samples_per_second": 1.961, "eval_steps_per_second": 0.247, "eval_wer": 42.2544080604534, "step": 280 }, { "epoch": 4.014084507042254, "grad_norm": 4.769192695617676, "learning_rate": 5.7e-06, "loss": 0.4389, "step": 285 }, { "epoch": 4.084507042253521, "grad_norm": 5.79905366897583, "learning_rate": 5.8e-06, "loss": 0.3835, "step": 290 }, { "epoch": 4.084507042253521, "eval_loss": 0.5556859374046326, "eval_runtime": 255.3987, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.247, "eval_wer": 41.656171284634766, "step": 290 }, { "epoch": 4.154929577464789, "grad_norm": 5.353799819946289, "learning_rate": 5.9e-06, "loss": 0.385, "step": 295 }, { "epoch": 4.225352112676056, "grad_norm": 5.164504528045654, "learning_rate": 6e-06, "loss": 0.3462, "step": 300 }, { "epoch": 4.225352112676056, "eval_loss": 0.550672173500061, "eval_runtime": 255.5806, "eval_samples_per_second": 1.956, "eval_steps_per_second": 0.246, "eval_wer": 36.39798488664987, "step": 300 }, { "epoch": 4.295774647887324, "grad_norm": 5.903466701507568, "learning_rate": 6.1e-06, "loss": 0.3733, "step": 305 }, { "epoch": 4.366197183098592, "grad_norm": 6.308957099914551, "learning_rate": 6.200000000000001e-06, "loss": 0.3133, "step": 310 }, { "epoch": 4.366197183098592, "eval_loss": 0.5452054738998413, "eval_runtime": 255.9204, "eval_samples_per_second": 1.954, "eval_steps_per_second": 0.246, "eval_wer": 42.56926952141058, "step": 310 }, { "epoch": 4.436619718309859, "grad_norm": 4.767759323120117, "learning_rate": 6.300000000000001e-06, "loss": 0.3544, "step": 315 }, { "epoch": 4.507042253521127, "grad_norm": 5.711643695831299, "learning_rate": 6.4000000000000006e-06, "loss": 0.3638, "step": 320 }, { "epoch": 4.507042253521127, "eval_loss": 0.5434854030609131, "eval_runtime": 253.7024, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.248, "eval_wer": 35.957178841309826, "step": 320 }, { "epoch": 4.577464788732394, "grad_norm": 5.667789936065674, "learning_rate": 6.5000000000000004e-06, "loss": 0.3974, "step": 325 }, { "epoch": 4.647887323943662, "grad_norm": 6.108503341674805, "learning_rate": 6.600000000000001e-06, "loss": 0.3826, "step": 330 }, { "epoch": 4.647887323943662, "eval_loss": 0.5396420955657959, "eval_runtime": 252.7138, "eval_samples_per_second": 1.979, "eval_steps_per_second": 0.249, "eval_wer": 31.95843828715365, "step": 330 }, { "epoch": 4.71830985915493, "grad_norm": 5.889377117156982, "learning_rate": 6.700000000000001e-06, "loss": 0.3813, "step": 335 }, { "epoch": 4.788732394366197, "grad_norm": 5.469658851623535, "learning_rate": 6.800000000000001e-06, "loss": 0.3581, "step": 340 }, { "epoch": 4.788732394366197, "eval_loss": 0.5361477136611938, "eval_runtime": 251.8728, "eval_samples_per_second": 1.985, "eval_steps_per_second": 0.25, "eval_wer": 33.78463476070529, "step": 340 }, { "epoch": 4.859154929577465, "grad_norm": 5.188804626464844, "learning_rate": 6.9e-06, "loss": 0.3351, "step": 345 }, { "epoch": 4.929577464788732, "grad_norm": 5.103167533874512, "learning_rate": 7e-06, "loss": 0.3127, "step": 350 }, { "epoch": 4.929577464788732, "eval_loss": 0.5339432954788208, "eval_runtime": 252.7571, "eval_samples_per_second": 1.978, "eval_steps_per_second": 0.249, "eval_wer": 37.342569269521405, "step": 350 }, { "epoch": 5.0, "grad_norm": 9.485374450683594, "learning_rate": 7.100000000000001e-06, "loss": 0.3265, "step": 355 }, { "epoch": 5.070422535211268, "grad_norm": 5.010895252227783, "learning_rate": 7.2000000000000005e-06, "loss": 0.2988, "step": 360 }, { "epoch": 5.070422535211268, "eval_loss": 0.5347580909729004, "eval_runtime": 253.3761, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.249, "eval_wer": 38.727959697733, "step": 360 }, { "epoch": 5.140845070422535, "grad_norm": 5.113419055938721, "learning_rate": 7.3e-06, "loss": 0.2953, "step": 365 }, { "epoch": 5.211267605633803, "grad_norm": 5.5772247314453125, "learning_rate": 7.4e-06, "loss": 0.2807, "step": 370 }, { "epoch": 5.211267605633803, "eval_loss": 0.5343714952468872, "eval_runtime": 252.932, "eval_samples_per_second": 1.977, "eval_steps_per_second": 0.249, "eval_wer": 35.51637279596977, "step": 370 }, { "epoch": 5.28169014084507, "grad_norm": 5.650921821594238, "learning_rate": 7.500000000000001e-06, "loss": 0.3147, "step": 375 }, { "epoch": 5.352112676056338, "grad_norm": 5.143499374389648, "learning_rate": 7.600000000000001e-06, "loss": 0.2612, "step": 380 }, { "epoch": 5.352112676056338, "eval_loss": 0.5304917097091675, "eval_runtime": 252.0942, "eval_samples_per_second": 1.983, "eval_steps_per_second": 0.25, "eval_wer": 34.66624685138539, "step": 380 }, { "epoch": 5.422535211267606, "grad_norm": 5.593881607055664, "learning_rate": 7.7e-06, "loss": 0.2606, "step": 385 }, { "epoch": 5.492957746478873, "grad_norm": 5.4485392570495605, "learning_rate": 7.800000000000002e-06, "loss": 0.2762, "step": 390 }, { "epoch": 5.492957746478873, "eval_loss": 0.5305802226066589, "eval_runtime": 252.0179, "eval_samples_per_second": 1.984, "eval_steps_per_second": 0.25, "eval_wer": 32.27329974811083, "step": 390 }, { "epoch": 5.563380281690141, "grad_norm": 4.250403881072998, "learning_rate": 7.9e-06, "loss": 0.2609, "step": 395 }, { "epoch": 5.633802816901408, "grad_norm": 5.564484596252441, "learning_rate": 8.000000000000001e-06, "loss": 0.299, "step": 400 }, { "epoch": 5.633802816901408, "eval_loss": 0.5266876220703125, "eval_runtime": 251.1581, "eval_samples_per_second": 1.991, "eval_steps_per_second": 0.251, "eval_wer": 36.87027707808564, "step": 400 }, { "epoch": 5.704225352112676, "grad_norm": 4.646668910980225, "learning_rate": 8.1e-06, "loss": 0.2368, "step": 405 }, { "epoch": 5.774647887323944, "grad_norm": 5.00687313079834, "learning_rate": 8.2e-06, "loss": 0.2718, "step": 410 }, { "epoch": 5.774647887323944, "eval_loss": 0.5231830477714539, "eval_runtime": 252.6711, "eval_samples_per_second": 1.979, "eval_steps_per_second": 0.249, "eval_wer": 41.68765743073048, "step": 410 }, { "epoch": 5.845070422535211, "grad_norm": 4.078917503356934, "learning_rate": 8.3e-06, "loss": 0.252, "step": 415 }, { "epoch": 5.915492957746479, "grad_norm": 4.877511501312256, "learning_rate": 8.400000000000001e-06, "loss": 0.2618, "step": 420 }, { "epoch": 5.915492957746479, "eval_loss": 0.5207710266113281, "eval_runtime": 252.1118, "eval_samples_per_second": 1.983, "eval_steps_per_second": 0.25, "eval_wer": 34.09949622166247, "step": 420 }, { "epoch": 5.985915492957746, "grad_norm": 5.141012191772461, "learning_rate": 8.5e-06, "loss": 0.3232, "step": 425 }, { "epoch": 6.056338028169014, "grad_norm": 4.299196243286133, "learning_rate": 8.6e-06, "loss": 0.2121, "step": 430 }, { "epoch": 6.056338028169014, "eval_loss": 0.5220197439193726, "eval_runtime": 252.0399, "eval_samples_per_second": 1.984, "eval_steps_per_second": 0.25, "eval_wer": 28.05415617128463, "step": 430 }, { "epoch": 6.126760563380282, "grad_norm": 3.769075393676758, "learning_rate": 8.700000000000001e-06, "loss": 0.2119, "step": 435 }, { "epoch": 6.197183098591549, "grad_norm": 4.311405181884766, "learning_rate": 8.8e-06, "loss": 0.1929, "step": 440 }, { "epoch": 6.197183098591549, "eval_loss": 0.5256190299987793, "eval_runtime": 253.2092, "eval_samples_per_second": 1.975, "eval_steps_per_second": 0.249, "eval_wer": 35.79974811083124, "step": 440 }, { "epoch": 6.267605633802817, "grad_norm": 3.735041618347168, "learning_rate": 8.900000000000001e-06, "loss": 0.2104, "step": 445 }, { "epoch": 6.338028169014084, "grad_norm": 6.507180690765381, "learning_rate": 9e-06, "loss": 0.2504, "step": 450 }, { "epoch": 6.338028169014084, "eval_loss": 0.529583215713501, "eval_runtime": 252.3402, "eval_samples_per_second": 1.981, "eval_steps_per_second": 0.25, "eval_wer": 32.87153652392947, "step": 450 }, { "epoch": 6.408450704225352, "grad_norm": 4.1670355796813965, "learning_rate": 9.100000000000001e-06, "loss": 0.1931, "step": 455 }, { "epoch": 6.47887323943662, "grad_norm": 4.260618209838867, "learning_rate": 9.200000000000002e-06, "loss": 0.2064, "step": 460 }, { "epoch": 6.47887323943662, "eval_loss": 0.5265011191368103, "eval_runtime": 253.5935, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.248, "eval_wer": 35.3904282115869, "step": 460 }, { "epoch": 6.549295774647887, "grad_norm": 4.580427169799805, "learning_rate": 9.3e-06, "loss": 0.2099, "step": 465 }, { "epoch": 6.619718309859155, "grad_norm": 5.135242938995361, "learning_rate": 9.4e-06, "loss": 0.2044, "step": 470 }, { "epoch": 6.619718309859155, "eval_loss": 0.5266779065132141, "eval_runtime": 253.6172, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.248, "eval_wer": 38.31863979848866, "step": 470 }, { "epoch": 6.690140845070422, "grad_norm": 4.770451545715332, "learning_rate": 9.5e-06, "loss": 0.2118, "step": 475 }, { "epoch": 6.76056338028169, "grad_norm": 4.276612758636475, "learning_rate": 9.600000000000001e-06, "loss": 0.1844, "step": 480 }, { "epoch": 6.76056338028169, "eval_loss": 0.5231460332870483, "eval_runtime": 253.7339, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.248, "eval_wer": 35.107052896725435, "step": 480 }, { "epoch": 6.830985915492958, "grad_norm": 6.741299152374268, "learning_rate": 9.7e-06, "loss": 0.2276, "step": 485 }, { "epoch": 6.901408450704225, "grad_norm": 5.4448370933532715, "learning_rate": 9.800000000000001e-06, "loss": 0.1867, "step": 490 }, { "epoch": 6.901408450704225, "eval_loss": 0.5235409140586853, "eval_runtime": 252.1039, "eval_samples_per_second": 1.983, "eval_steps_per_second": 0.25, "eval_wer": 31.580604534005037, "step": 490 }, { "epoch": 6.971830985915493, "grad_norm": 5.26415491104126, "learning_rate": 9.9e-06, "loss": 0.2232, "step": 495 }, { "epoch": 7.042253521126761, "grad_norm": 3.9737112522125244, "learning_rate": 0.0, "loss": 0.1562, "step": 500 }, { "epoch": 7.042253521126761, "eval_loss": 0.5233400464057922, "eval_runtime": 252.8742, "eval_samples_per_second": 1.977, "eval_steps_per_second": 0.249, "eval_wer": 31.10831234256927, "step": 500 }, { "epoch": 7.042253521126761, "step": 500, "total_flos": 7.8022170722304e+17, "train_loss": 0.9523631989955902, "train_runtime": 13251.4495, "train_samples_per_second": 2.415, "train_steps_per_second": 0.038 } ], "logging_steps": 5, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 10, "total_flos": 7.8022170722304e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }