|
{ |
|
"best_metric": 28.05415617128463, |
|
"best_model_checkpoint": "whisper2/checkpoint-430", |
|
"epoch": 7.042253521126761, |
|
"eval_steps": 10, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07042253521126761, |
|
"grad_norm": 43.82194137573242, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 3.9547, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.14084507042253522, |
|
"grad_norm": 45.53117370605469, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 3.9553, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14084507042253522, |
|
"eval_loss": 3.964555501937866, |
|
"eval_runtime": 264.1292, |
|
"eval_samples_per_second": 1.893, |
|
"eval_steps_per_second": 0.239, |
|
"eval_wer": 74.87405541561712, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2112676056338028, |
|
"grad_norm": 46.162776947021484, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 3.882, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.28169014084507044, |
|
"grad_norm": 46.07596206665039, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 3.9548, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.28169014084507044, |
|
"eval_loss": 3.8793957233428955, |
|
"eval_runtime": 256.6948, |
|
"eval_samples_per_second": 1.948, |
|
"eval_steps_per_second": 0.245, |
|
"eval_wer": 77.67632241813602, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.352112676056338, |
|
"grad_norm": 45.13657760620117, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 3.9469, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4225352112676056, |
|
"grad_norm": 44.565940856933594, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 3.8127, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4225352112676056, |
|
"eval_loss": 3.740476608276367, |
|
"eval_runtime": 257.3378, |
|
"eval_samples_per_second": 1.943, |
|
"eval_steps_per_second": 0.245, |
|
"eval_wer": 76.4168765743073, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.49295774647887325, |
|
"grad_norm": 44.24871826171875, |
|
"learning_rate": 7.000000000000001e-07, |
|
"loss": 3.7507, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"grad_norm": 42.1717529296875, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 3.6178, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"eval_loss": 3.5547332763671875, |
|
"eval_runtime": 256.8157, |
|
"eval_samples_per_second": 1.947, |
|
"eval_steps_per_second": 0.245, |
|
"eval_wer": 75.31486146095719, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6338028169014085, |
|
"grad_norm": 44.667205810546875, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 3.4825, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"grad_norm": 43.76979064941406, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 3.3992, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"eval_loss": 3.323503255844116, |
|
"eval_runtime": 255.1809, |
|
"eval_samples_per_second": 1.959, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 70.27707808564232, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7746478873239436, |
|
"grad_norm": 41.28179168701172, |
|
"learning_rate": 1.1e-06, |
|
"loss": 3.3124, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8450704225352113, |
|
"grad_norm": 40.813392639160156, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 3.1416, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8450704225352113, |
|
"eval_loss": 3.040179491043091, |
|
"eval_runtime": 255.4069, |
|
"eval_samples_per_second": 1.958, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 67.85264483627203, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9154929577464789, |
|
"grad_norm": 40.00282287597656, |
|
"learning_rate": 1.3e-06, |
|
"loss": 2.88, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9859154929577465, |
|
"grad_norm": 40.60588455200195, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 2.8052, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9859154929577465, |
|
"eval_loss": 2.6852359771728516, |
|
"eval_runtime": 254.3541, |
|
"eval_samples_per_second": 1.966, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 65.96347607052897, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.056338028169014, |
|
"grad_norm": 44.205726623535156, |
|
"learning_rate": 1.5e-06, |
|
"loss": 2.4894, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1267605633802817, |
|
"grad_norm": 40.45851516723633, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 2.3513, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1267605633802817, |
|
"eval_loss": 2.223541021347046, |
|
"eval_runtime": 256.3144, |
|
"eval_samples_per_second": 1.951, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 68.3249370277078, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1971830985915493, |
|
"grad_norm": 37.049591064453125, |
|
"learning_rate": 1.7000000000000002e-06, |
|
"loss": 2.2021, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.267605633802817, |
|
"grad_norm": 32.15092468261719, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 1.893, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.267605633802817, |
|
"eval_loss": 1.6707711219787598, |
|
"eval_runtime": 254.2495, |
|
"eval_samples_per_second": 1.967, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 63.822418136020154, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3380281690140845, |
|
"grad_norm": 29.11300277709961, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 1.6227, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"grad_norm": 19.466663360595703, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.2871, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"eval_loss": 1.164486050605774, |
|
"eval_runtime": 254.5126, |
|
"eval_samples_per_second": 1.965, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 63.25566750629723, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4788732394366197, |
|
"grad_norm": 15.238794326782227, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 1.09, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.5492957746478875, |
|
"grad_norm": 10.725071907043457, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.9146, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.5492957746478875, |
|
"eval_loss": 0.8784648776054382, |
|
"eval_runtime": 256.185, |
|
"eval_samples_per_second": 1.952, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 56.83249370277078, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.619718309859155, |
|
"grad_norm": 7.8202009201049805, |
|
"learning_rate": 2.3000000000000004e-06, |
|
"loss": 0.8882, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.6901408450704225, |
|
"grad_norm": 8.60835075378418, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.8044, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.6901408450704225, |
|
"eval_loss": 0.7906607985496521, |
|
"eval_runtime": 255.9613, |
|
"eval_samples_per_second": 1.953, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 46.977329974811084, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.76056338028169, |
|
"grad_norm": 9.780821800231934, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.6849, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.8309859154929577, |
|
"grad_norm": 9.33056926727295, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.6634, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.8309859154929577, |
|
"eval_loss": 0.7425487637519836, |
|
"eval_runtime": 255.5846, |
|
"eval_samples_per_second": 1.956, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 47.48110831234257, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.9014084507042255, |
|
"grad_norm": 8.966361999511719, |
|
"learning_rate": 2.7000000000000004e-06, |
|
"loss": 0.7421, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.971830985915493, |
|
"grad_norm": 7.636435031890869, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.6722, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.971830985915493, |
|
"eval_loss": 0.7099979519844055, |
|
"eval_runtime": 253.8483, |
|
"eval_samples_per_second": 1.97, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 45.90680100755667, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.0422535211267605, |
|
"grad_norm": 8.085705757141113, |
|
"learning_rate": 2.9e-06, |
|
"loss": 0.6865, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"grad_norm": 8.131012916564941, |
|
"learning_rate": 3e-06, |
|
"loss": 0.6823, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"eval_loss": 0.6854478120803833, |
|
"eval_runtime": 255.8245, |
|
"eval_samples_per_second": 1.954, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 42.41183879093199, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.183098591549296, |
|
"grad_norm": 8.054609298706055, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 0.6001, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.2535211267605635, |
|
"grad_norm": 6.9759063720703125, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.5802, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.2535211267605635, |
|
"eval_loss": 0.6659273505210876, |
|
"eval_runtime": 254.855, |
|
"eval_samples_per_second": 1.962, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 40.42821158690176, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.323943661971831, |
|
"grad_norm": 8.077522277832031, |
|
"learning_rate": 3.3000000000000006e-06, |
|
"loss": 0.6065, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.3943661971830985, |
|
"grad_norm": 6.6878228187561035, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.6084, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.3943661971830985, |
|
"eval_loss": 0.6503352522850037, |
|
"eval_runtime": 253.7567, |
|
"eval_samples_per_second": 1.97, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 40.8375314861461, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.464788732394366, |
|
"grad_norm": 7.941697597503662, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.5972, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.535211267605634, |
|
"grad_norm": 7.986533164978027, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.6038, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.535211267605634, |
|
"eval_loss": 0.6345599889755249, |
|
"eval_runtime": 254.9306, |
|
"eval_samples_per_second": 1.961, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 41.49874055415617, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.6056338028169015, |
|
"grad_norm": 6.744418144226074, |
|
"learning_rate": 3.7e-06, |
|
"loss": 0.5007, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.676056338028169, |
|
"grad_norm": 6.323821544647217, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 0.5095, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.676056338028169, |
|
"eval_loss": 0.6247134804725647, |
|
"eval_runtime": 257.1561, |
|
"eval_samples_per_second": 1.944, |
|
"eval_steps_per_second": 0.245, |
|
"eval_wer": 42.03400503778337, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.7464788732394365, |
|
"grad_norm": 6.979465961456299, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 0.5943, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"grad_norm": 6.675357818603516, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.5251, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"eval_loss": 0.6154741644859314, |
|
"eval_runtime": 255.2235, |
|
"eval_samples_per_second": 1.959, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 39.357682619647356, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.887323943661972, |
|
"grad_norm": 6.802981853485107, |
|
"learning_rate": 4.1e-06, |
|
"loss": 0.5528, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.9577464788732395, |
|
"grad_norm": 6.836462497711182, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.5699, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.9577464788732395, |
|
"eval_loss": 0.6045908331871033, |
|
"eval_runtime": 254.5675, |
|
"eval_samples_per_second": 1.964, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 38.350125944584384, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.028169014084507, |
|
"grad_norm": 6.114952087402344, |
|
"learning_rate": 4.3e-06, |
|
"loss": 0.478, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.0985915492957745, |
|
"grad_norm": 5.803236961364746, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.4839, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0985915492957745, |
|
"eval_loss": 0.5944731831550598, |
|
"eval_runtime": 254.5629, |
|
"eval_samples_per_second": 1.964, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 37.27959697732997, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.169014084507042, |
|
"grad_norm": 5.95841646194458, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.4982, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.23943661971831, |
|
"grad_norm": 6.992792129516602, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.4843, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.23943661971831, |
|
"eval_loss": 0.5861312747001648, |
|
"eval_runtime": 257.6573, |
|
"eval_samples_per_second": 1.941, |
|
"eval_steps_per_second": 0.245, |
|
"eval_wer": 48.394206549118394, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.3098591549295775, |
|
"grad_norm": 5.872804164886475, |
|
"learning_rate": 4.7e-06, |
|
"loss": 0.4471, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 3.380281690140845, |
|
"grad_norm": 6.013182640075684, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.4538, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.380281690140845, |
|
"eval_loss": 0.5793710350990295, |
|
"eval_runtime": 254.563, |
|
"eval_samples_per_second": 1.964, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 34.66624685138539, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.4507042253521125, |
|
"grad_norm": 6.745495319366455, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 0.4932, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"grad_norm": 5.320774078369141, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4741, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"eval_loss": 0.5736850500106812, |
|
"eval_runtime": 255.3883, |
|
"eval_samples_per_second": 1.958, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 33.816120906801004, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.591549295774648, |
|
"grad_norm": 6.753683090209961, |
|
"learning_rate": 5.1e-06, |
|
"loss": 0.5025, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 3.6619718309859155, |
|
"grad_norm": 7.474066257476807, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.4542, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.6619718309859155, |
|
"eval_loss": 0.5662725567817688, |
|
"eval_runtime": 255.3299, |
|
"eval_samples_per_second": 1.958, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 41.97103274559194, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.732394366197183, |
|
"grad_norm": 5.626581192016602, |
|
"learning_rate": 5.300000000000001e-06, |
|
"loss": 0.4639, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 3.802816901408451, |
|
"grad_norm": 5.518383026123047, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.4163, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.802816901408451, |
|
"eval_loss": 0.5622957944869995, |
|
"eval_runtime": 256.1828, |
|
"eval_samples_per_second": 1.952, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 46.095717884130984, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.873239436619718, |
|
"grad_norm": 6.132260799407959, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.3922, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 3.943661971830986, |
|
"grad_norm": 5.8338942527771, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.3496, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.943661971830986, |
|
"eval_loss": 0.560535192489624, |
|
"eval_runtime": 255.0016, |
|
"eval_samples_per_second": 1.961, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 42.2544080604534, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.014084507042254, |
|
"grad_norm": 4.769192695617676, |
|
"learning_rate": 5.7e-06, |
|
"loss": 0.4389, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.084507042253521, |
|
"grad_norm": 5.79905366897583, |
|
"learning_rate": 5.8e-06, |
|
"loss": 0.3835, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.084507042253521, |
|
"eval_loss": 0.5556859374046326, |
|
"eval_runtime": 255.3987, |
|
"eval_samples_per_second": 1.958, |
|
"eval_steps_per_second": 0.247, |
|
"eval_wer": 41.656171284634766, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.154929577464789, |
|
"grad_norm": 5.353799819946289, |
|
"learning_rate": 5.9e-06, |
|
"loss": 0.385, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"grad_norm": 5.164504528045654, |
|
"learning_rate": 6e-06, |
|
"loss": 0.3462, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"eval_loss": 0.550672173500061, |
|
"eval_runtime": 255.5806, |
|
"eval_samples_per_second": 1.956, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 36.39798488664987, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.295774647887324, |
|
"grad_norm": 5.903466701507568, |
|
"learning_rate": 6.1e-06, |
|
"loss": 0.3733, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 4.366197183098592, |
|
"grad_norm": 6.308957099914551, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 0.3133, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.366197183098592, |
|
"eval_loss": 0.5452054738998413, |
|
"eval_runtime": 255.9204, |
|
"eval_samples_per_second": 1.954, |
|
"eval_steps_per_second": 0.246, |
|
"eval_wer": 42.56926952141058, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.436619718309859, |
|
"grad_norm": 4.767759323120117, |
|
"learning_rate": 6.300000000000001e-06, |
|
"loss": 0.3544, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.507042253521127, |
|
"grad_norm": 5.711643695831299, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.3638, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.507042253521127, |
|
"eval_loss": 0.5434854030609131, |
|
"eval_runtime": 253.7024, |
|
"eval_samples_per_second": 1.971, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 35.957178841309826, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.577464788732394, |
|
"grad_norm": 5.667789936065674, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.3974, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.647887323943662, |
|
"grad_norm": 6.108503341674805, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.3826, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.647887323943662, |
|
"eval_loss": 0.5396420955657959, |
|
"eval_runtime": 252.7138, |
|
"eval_samples_per_second": 1.979, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 31.95843828715365, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.71830985915493, |
|
"grad_norm": 5.889377117156982, |
|
"learning_rate": 6.700000000000001e-06, |
|
"loss": 0.3813, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 4.788732394366197, |
|
"grad_norm": 5.469658851623535, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.3581, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.788732394366197, |
|
"eval_loss": 0.5361477136611938, |
|
"eval_runtime": 251.8728, |
|
"eval_samples_per_second": 1.985, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 33.78463476070529, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.859154929577465, |
|
"grad_norm": 5.188804626464844, |
|
"learning_rate": 6.9e-06, |
|
"loss": 0.3351, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"grad_norm": 5.103167533874512, |
|
"learning_rate": 7e-06, |
|
"loss": 0.3127, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"eval_loss": 0.5339432954788208, |
|
"eval_runtime": 252.7571, |
|
"eval_samples_per_second": 1.978, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 37.342569269521405, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 9.485374450683594, |
|
"learning_rate": 7.100000000000001e-06, |
|
"loss": 0.3265, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 5.070422535211268, |
|
"grad_norm": 5.010895252227783, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.2988, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.070422535211268, |
|
"eval_loss": 0.5347580909729004, |
|
"eval_runtime": 253.3761, |
|
"eval_samples_per_second": 1.973, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 38.727959697733, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.140845070422535, |
|
"grad_norm": 5.113419055938721, |
|
"learning_rate": 7.3e-06, |
|
"loss": 0.2953, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 5.211267605633803, |
|
"grad_norm": 5.5772247314453125, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.2807, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.211267605633803, |
|
"eval_loss": 0.5343714952468872, |
|
"eval_runtime": 252.932, |
|
"eval_samples_per_second": 1.977, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 35.51637279596977, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.28169014084507, |
|
"grad_norm": 5.650921821594238, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3147, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.352112676056338, |
|
"grad_norm": 5.143499374389648, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.2612, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.352112676056338, |
|
"eval_loss": 0.5304917097091675, |
|
"eval_runtime": 252.0942, |
|
"eval_samples_per_second": 1.983, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 34.66624685138539, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.422535211267606, |
|
"grad_norm": 5.593881607055664, |
|
"learning_rate": 7.7e-06, |
|
"loss": 0.2606, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 5.492957746478873, |
|
"grad_norm": 5.4485392570495605, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 0.2762, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.492957746478873, |
|
"eval_loss": 0.5305802226066589, |
|
"eval_runtime": 252.0179, |
|
"eval_samples_per_second": 1.984, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 32.27329974811083, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.563380281690141, |
|
"grad_norm": 4.250403881072998, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.2609, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"grad_norm": 5.564484596252441, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.299, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"eval_loss": 0.5266876220703125, |
|
"eval_runtime": 251.1581, |
|
"eval_samples_per_second": 1.991, |
|
"eval_steps_per_second": 0.251, |
|
"eval_wer": 36.87027707808564, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.704225352112676, |
|
"grad_norm": 4.646668910980225, |
|
"learning_rate": 8.1e-06, |
|
"loss": 0.2368, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 5.774647887323944, |
|
"grad_norm": 5.00687313079834, |
|
"learning_rate": 8.2e-06, |
|
"loss": 0.2718, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.774647887323944, |
|
"eval_loss": 0.5231830477714539, |
|
"eval_runtime": 252.6711, |
|
"eval_samples_per_second": 1.979, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 41.68765743073048, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.845070422535211, |
|
"grad_norm": 4.078917503356934, |
|
"learning_rate": 8.3e-06, |
|
"loss": 0.252, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 5.915492957746479, |
|
"grad_norm": 4.877511501312256, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.2618, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.915492957746479, |
|
"eval_loss": 0.5207710266113281, |
|
"eval_runtime": 252.1118, |
|
"eval_samples_per_second": 1.983, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 34.09949622166247, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.985915492957746, |
|
"grad_norm": 5.141012191772461, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.3232, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 6.056338028169014, |
|
"grad_norm": 4.299196243286133, |
|
"learning_rate": 8.6e-06, |
|
"loss": 0.2121, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.056338028169014, |
|
"eval_loss": 0.5220197439193726, |
|
"eval_runtime": 252.0399, |
|
"eval_samples_per_second": 1.984, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 28.05415617128463, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.126760563380282, |
|
"grad_norm": 3.769075393676758, |
|
"learning_rate": 8.700000000000001e-06, |
|
"loss": 0.2119, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 6.197183098591549, |
|
"grad_norm": 4.311405181884766, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.1929, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.197183098591549, |
|
"eval_loss": 0.5256190299987793, |
|
"eval_runtime": 253.2092, |
|
"eval_samples_per_second": 1.975, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 35.79974811083124, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.267605633802817, |
|
"grad_norm": 3.735041618347168, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.2104, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 6.338028169014084, |
|
"grad_norm": 6.507180690765381, |
|
"learning_rate": 9e-06, |
|
"loss": 0.2504, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.338028169014084, |
|
"eval_loss": 0.529583215713501, |
|
"eval_runtime": 252.3402, |
|
"eval_samples_per_second": 1.981, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 32.87153652392947, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.408450704225352, |
|
"grad_norm": 4.1670355796813965, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 0.1931, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 6.47887323943662, |
|
"grad_norm": 4.260618209838867, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.2064, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.47887323943662, |
|
"eval_loss": 0.5265011191368103, |
|
"eval_runtime": 253.5935, |
|
"eval_samples_per_second": 1.972, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 35.3904282115869, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.549295774647887, |
|
"grad_norm": 4.580427169799805, |
|
"learning_rate": 9.3e-06, |
|
"loss": 0.2099, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 6.619718309859155, |
|
"grad_norm": 5.135242938995361, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.2044, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.619718309859155, |
|
"eval_loss": 0.5266779065132141, |
|
"eval_runtime": 253.6172, |
|
"eval_samples_per_second": 1.971, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 38.31863979848866, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.690140845070422, |
|
"grad_norm": 4.770451545715332, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.2118, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 6.76056338028169, |
|
"grad_norm": 4.276612758636475, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.1844, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.76056338028169, |
|
"eval_loss": 0.5231460332870483, |
|
"eval_runtime": 253.7339, |
|
"eval_samples_per_second": 1.971, |
|
"eval_steps_per_second": 0.248, |
|
"eval_wer": 35.107052896725435, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.830985915492958, |
|
"grad_norm": 6.741299152374268, |
|
"learning_rate": 9.7e-06, |
|
"loss": 0.2276, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 6.901408450704225, |
|
"grad_norm": 5.4448370933532715, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.1867, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.901408450704225, |
|
"eval_loss": 0.5235409140586853, |
|
"eval_runtime": 252.1039, |
|
"eval_samples_per_second": 1.983, |
|
"eval_steps_per_second": 0.25, |
|
"eval_wer": 31.580604534005037, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.971830985915493, |
|
"grad_norm": 5.26415491104126, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.2232, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 7.042253521126761, |
|
"grad_norm": 3.9737112522125244, |
|
"learning_rate": 0.0, |
|
"loss": 0.1562, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.042253521126761, |
|
"eval_loss": 0.5233400464057922, |
|
"eval_runtime": 252.8742, |
|
"eval_samples_per_second": 1.977, |
|
"eval_steps_per_second": 0.249, |
|
"eval_wer": 31.10831234256927, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.042253521126761, |
|
"step": 500, |
|
"total_flos": 7.8022170722304e+17, |
|
"train_loss": 0.9523631989955902, |
|
"train_runtime": 13251.4495, |
|
"train_samples_per_second": 2.415, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 10, |
|
"total_flos": 7.8022170722304e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|