{ "best_metric": 23.35348393254852, "best_model_checkpoint": "whisper3/checkpoint-240", "epoch": 8.333333333333334, "eval_steps": 10, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1388888888888889, "grad_norm": 46.06148147583008, "learning_rate": 1.0000000000000002e-06, "loss": 3.9402, "step": 5 }, { "epoch": 0.2777777777777778, "grad_norm": 43.4765625, "learning_rate": 2.0000000000000003e-06, "loss": 3.8281, "step": 10 }, { "epoch": 0.2777777777777778, "eval_loss": 3.7929115295410156, "eval_runtime": 253.0403, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.249, "eval_wer": 80.40089086859689, "step": 10 }, { "epoch": 0.4166666666666667, "grad_norm": 40.57815933227539, "learning_rate": 3e-06, "loss": 3.5929, "step": 15 }, { "epoch": 0.5555555555555556, "grad_norm": 39.72583770751953, "learning_rate": 4.000000000000001e-06, "loss": 3.209, "step": 20 }, { "epoch": 0.5555555555555556, "eval_loss": 3.0014312267303467, "eval_runtime": 246.2101, "eval_samples_per_second": 2.031, "eval_steps_per_second": 0.256, "eval_wer": 68.37416481069042, "step": 20 }, { "epoch": 0.6944444444444444, "grad_norm": 39.53627395629883, "learning_rate": 5e-06, "loss": 2.7486, "step": 25 }, { "epoch": 0.8333333333333334, "grad_norm": 30.079750061035156, "learning_rate": 6e-06, "loss": 2.1066, "step": 30 }, { "epoch": 0.8333333333333334, "eval_loss": 1.761271595954895, "eval_runtime": 245.5315, "eval_samples_per_second": 2.036, "eval_steps_per_second": 0.257, "eval_wer": 63.91982182628062, "step": 30 }, { "epoch": 0.9722222222222222, "grad_norm": 19.831071853637695, "learning_rate": 7.000000000000001e-06, "loss": 1.5134, "step": 35 }, { "epoch": 1.1111111111111112, "grad_norm": 9.755999565124512, "learning_rate": 8.000000000000001e-06, "loss": 0.9963, "step": 40 }, { "epoch": 1.1111111111111112, "eval_loss": 0.8740884065628052, "eval_runtime": 246.6146, "eval_samples_per_second": 2.027, "eval_steps_per_second": 0.255, "eval_wer": 52.43398027362392, "step": 40 }, { "epoch": 1.25, "grad_norm": 6.842897891998291, "learning_rate": 9e-06, "loss": 0.786, "step": 45 }, { "epoch": 1.3888888888888888, "grad_norm": 5.720729351043701, "learning_rate": 1e-05, "loss": 0.6922, "step": 50 }, { "epoch": 1.3888888888888888, "eval_loss": 0.7008740901947021, "eval_runtime": 245.5713, "eval_samples_per_second": 2.036, "eval_steps_per_second": 0.257, "eval_wer": 35.82564428889596, "step": 50 }, { "epoch": 1.5277777777777777, "grad_norm": 4.806775093078613, "learning_rate": 1.1000000000000001e-05, "loss": 0.6427, "step": 55 }, { "epoch": 1.6666666666666665, "grad_norm": 5.128376483917236, "learning_rate": 1.2e-05, "loss": 0.5816, "step": 60 }, { "epoch": 1.6666666666666665, "eval_loss": 0.6238442659378052, "eval_runtime": 245.679, "eval_samples_per_second": 2.035, "eval_steps_per_second": 0.256, "eval_wer": 31.148584155265667, "step": 60 }, { "epoch": 1.8055555555555556, "grad_norm": 4.993675231933594, "learning_rate": 1.3000000000000001e-05, "loss": 0.5805, "step": 65 }, { "epoch": 1.9444444444444444, "grad_norm": 4.856825351715088, "learning_rate": 1.4000000000000001e-05, "loss": 0.5684, "step": 70 }, { "epoch": 1.9444444444444444, "eval_loss": 0.5697694420814514, "eval_runtime": 245.5413, "eval_samples_per_second": 2.036, "eval_steps_per_second": 0.257, "eval_wer": 35.47566019726376, "step": 70 }, { "epoch": 2.0833333333333335, "grad_norm": 4.464582443237305, "learning_rate": 1.5e-05, "loss": 0.4534, "step": 75 }, { "epoch": 2.2222222222222223, "grad_norm": 4.251033306121826, "learning_rate": 1.6000000000000003e-05, "loss": 0.427, "step": 80 }, { "epoch": 2.2222222222222223, "eval_loss": 0.5380394458770752, "eval_runtime": 244.4819, "eval_samples_per_second": 2.045, "eval_steps_per_second": 0.258, "eval_wer": 27.266942411708563, "step": 80 }, { "epoch": 2.361111111111111, "grad_norm": 4.489510536193848, "learning_rate": 1.7000000000000003e-05, "loss": 0.3929, "step": 85 }, { "epoch": 2.5, "grad_norm": 4.552371025085449, "learning_rate": 1.8e-05, "loss": 0.4395, "step": 90 }, { "epoch": 2.5, "eval_loss": 0.5162410140037537, "eval_runtime": 245.2373, "eval_samples_per_second": 2.039, "eval_steps_per_second": 0.257, "eval_wer": 32.73942093541203, "step": 90 }, { "epoch": 2.638888888888889, "grad_norm": 4.691618919372559, "learning_rate": 1.9e-05, "loss": 0.3825, "step": 95 }, { "epoch": 2.7777777777777777, "grad_norm": 4.219367027282715, "learning_rate": 2e-05, "loss": 0.3861, "step": 100 }, { "epoch": 2.7777777777777777, "eval_loss": 0.495292991399765, "eval_runtime": 243.4193, "eval_samples_per_second": 2.054, "eval_steps_per_second": 0.259, "eval_wer": 24.530703149856826, "step": 100 }, { "epoch": 2.9166666666666665, "grad_norm": 4.323045253753662, "learning_rate": 2.1e-05, "loss": 0.3669, "step": 105 }, { "epoch": 3.0555555555555554, "grad_norm": 3.2159509658813477, "learning_rate": 2.2000000000000003e-05, "loss": 0.3745, "step": 110 }, { "epoch": 3.0555555555555554, "eval_loss": 0.4837464392185211, "eval_runtime": 244.5759, "eval_samples_per_second": 2.044, "eval_steps_per_second": 0.258, "eval_wer": 24.626153356665608, "step": 110 }, { "epoch": 3.1944444444444446, "grad_norm": 3.675457000732422, "learning_rate": 2.3000000000000003e-05, "loss": 0.257, "step": 115 }, { "epoch": 3.3333333333333335, "grad_norm": 2.8939876556396484, "learning_rate": 2.4e-05, "loss": 0.2487, "step": 120 }, { "epoch": 3.3333333333333335, "eval_loss": 0.4732927978038788, "eval_runtime": 244.6891, "eval_samples_per_second": 2.043, "eval_steps_per_second": 0.257, "eval_wer": 23.57620108176901, "step": 120 }, { "epoch": 3.4722222222222223, "grad_norm": 3.4589827060699463, "learning_rate": 2.5e-05, "loss": 0.253, "step": 125 }, { "epoch": 3.611111111111111, "grad_norm": 3.1798577308654785, "learning_rate": 2.6000000000000002e-05, "loss": 0.2343, "step": 130 }, { "epoch": 3.611111111111111, "eval_loss": 0.46519017219543457, "eval_runtime": 244.3925, "eval_samples_per_second": 2.046, "eval_steps_per_second": 0.258, "eval_wer": 24.94432071269488, "step": 130 }, { "epoch": 3.75, "grad_norm": 4.061887741088867, "learning_rate": 2.7000000000000002e-05, "loss": 0.2354, "step": 135 }, { "epoch": 3.888888888888889, "grad_norm": 4.474591255187988, "learning_rate": 2.8000000000000003e-05, "loss": 0.2429, "step": 140 }, { "epoch": 3.888888888888889, "eval_loss": 0.4581267833709717, "eval_runtime": 244.836, "eval_samples_per_second": 2.042, "eval_steps_per_second": 0.257, "eval_wer": 24.085268851415844, "step": 140 }, { "epoch": 4.027777777777778, "grad_norm": 2.3235318660736084, "learning_rate": 2.9e-05, "loss": 0.2728, "step": 145 }, { "epoch": 4.166666666666667, "grad_norm": 2.3824808597564697, "learning_rate": 3e-05, "loss": 0.1286, "step": 150 }, { "epoch": 4.166666666666667, "eval_loss": 0.46725359559059143, "eval_runtime": 245.6982, "eval_samples_per_second": 2.035, "eval_steps_per_second": 0.256, "eval_wer": 24.276169265033406, "step": 150 }, { "epoch": 4.305555555555555, "grad_norm": 2.5686404705047607, "learning_rate": 3.1e-05, "loss": 0.1301, "step": 155 }, { "epoch": 4.444444444444445, "grad_norm": 2.7436068058013916, "learning_rate": 3.2000000000000005e-05, "loss": 0.1304, "step": 160 }, { "epoch": 4.444444444444445, "eval_loss": 0.46984970569610596, "eval_runtime": 245.0991, "eval_samples_per_second": 2.04, "eval_steps_per_second": 0.257, "eval_wer": 31.72128539611836, "step": 160 }, { "epoch": 4.583333333333333, "grad_norm": 2.83823823928833, "learning_rate": 3.3e-05, "loss": 0.1408, "step": 165 }, { "epoch": 4.722222222222222, "grad_norm": 2.7204811573028564, "learning_rate": 3.4000000000000007e-05, "loss": 0.1361, "step": 170 }, { "epoch": 4.722222222222222, "eval_loss": 0.4690161943435669, "eval_runtime": 246.5232, "eval_samples_per_second": 2.028, "eval_steps_per_second": 0.256, "eval_wer": 33.08940502704423, "step": 170 }, { "epoch": 4.861111111111111, "grad_norm": 3.671097993850708, "learning_rate": 3.5e-05, "loss": 0.1511, "step": 175 }, { "epoch": 5.0, "grad_norm": 6.484060764312744, "learning_rate": 3.6e-05, "loss": 0.1447, "step": 180 }, { "epoch": 5.0, "eval_loss": 0.4811546802520752, "eval_runtime": 244.9356, "eval_samples_per_second": 2.041, "eval_steps_per_second": 0.257, "eval_wer": 24.657970092268535, "step": 180 }, { "epoch": 5.138888888888889, "grad_norm": 1.9667352437973022, "learning_rate": 3.7e-05, "loss": 0.063, "step": 185 }, { "epoch": 5.277777777777778, "grad_norm": 2.1828482151031494, "learning_rate": 3.8e-05, "loss": 0.0617, "step": 190 }, { "epoch": 5.277777777777778, "eval_loss": 0.48713362216949463, "eval_runtime": 244.9851, "eval_samples_per_second": 2.041, "eval_steps_per_second": 0.257, "eval_wer": 29.939548202354437, "step": 190 }, { "epoch": 5.416666666666667, "grad_norm": 1.8774911165237427, "learning_rate": 3.9000000000000006e-05, "loss": 0.0606, "step": 195 }, { "epoch": 5.555555555555555, "grad_norm": 1.8562583923339844, "learning_rate": 4e-05, "loss": 0.0617, "step": 200 }, { "epoch": 5.555555555555555, "eval_loss": 0.488438218832016, "eval_runtime": 244.9014, "eval_samples_per_second": 2.042, "eval_steps_per_second": 0.257, "eval_wer": 24.848870505886094, "step": 200 }, { "epoch": 5.694444444444445, "grad_norm": 1.9106348752975464, "learning_rate": 4.1e-05, "loss": 0.0617, "step": 205 }, { "epoch": 5.833333333333333, "grad_norm": 1.8114972114562988, "learning_rate": 4.2e-05, "loss": 0.0577, "step": 210 }, { "epoch": 5.833333333333333, "eval_loss": 0.4998014569282532, "eval_runtime": 244.1029, "eval_samples_per_second": 2.048, "eval_steps_per_second": 0.258, "eval_wer": 26.853324848870507, "step": 210 }, { "epoch": 5.972222222222222, "grad_norm": 5.00437593460083, "learning_rate": 4.3e-05, "loss": 0.078, "step": 215 }, { "epoch": 6.111111111111111, "grad_norm": 1.4013047218322754, "learning_rate": 4.4000000000000006e-05, "loss": 0.038, "step": 220 }, { "epoch": 6.111111111111111, "eval_loss": 0.500673770904541, "eval_runtime": 247.5538, "eval_samples_per_second": 2.02, "eval_steps_per_second": 0.254, "eval_wer": 24.848870505886094, "step": 220 }, { "epoch": 6.25, "grad_norm": 1.4778488874435425, "learning_rate": 4.5e-05, "loss": 0.0243, "step": 225 }, { "epoch": 6.388888888888889, "grad_norm": 1.3681198358535767, "learning_rate": 4.600000000000001e-05, "loss": 0.0269, "step": 230 }, { "epoch": 6.388888888888889, "eval_loss": 0.5122880935668945, "eval_runtime": 243.6648, "eval_samples_per_second": 2.052, "eval_steps_per_second": 0.259, "eval_wer": 27.139675469296847, "step": 230 }, { "epoch": 6.527777777777778, "grad_norm": 1.450726866722107, "learning_rate": 4.7e-05, "loss": 0.0297, "step": 235 }, { "epoch": 6.666666666666667, "grad_norm": 1.4052125215530396, "learning_rate": 4.8e-05, "loss": 0.0321, "step": 240 }, { "epoch": 6.666666666666667, "eval_loss": 0.500522792339325, "eval_runtime": 247.602, "eval_samples_per_second": 2.019, "eval_steps_per_second": 0.254, "eval_wer": 23.35348393254852, "step": 240 }, { "epoch": 6.805555555555555, "grad_norm": 1.2223644256591797, "learning_rate": 4.9e-05, "loss": 0.0291, "step": 245 }, { "epoch": 6.944444444444445, "grad_norm": 1.463398814201355, "learning_rate": 5e-05, "loss": 0.0296, "step": 250 }, { "epoch": 6.944444444444445, "eval_loss": 0.5332342386245728, "eval_runtime": 246.3422, "eval_samples_per_second": 2.03, "eval_steps_per_second": 0.256, "eval_wer": 31.880369074132993, "step": 250 }, { "epoch": 7.083333333333333, "grad_norm": 4.257472991943359, "learning_rate": 5.1000000000000006e-05, "loss": 0.027, "step": 255 }, { "epoch": 7.222222222222222, "grad_norm": 2.294562339782715, "learning_rate": 5.2000000000000004e-05, "loss": 0.0207, "step": 260 }, { "epoch": 7.222222222222222, "eval_loss": 0.5236981511116028, "eval_runtime": 244.1894, "eval_samples_per_second": 2.048, "eval_steps_per_second": 0.258, "eval_wer": 30.066815144766146, "step": 260 }, { "epoch": 7.361111111111111, "grad_norm": 1.2468712329864502, "learning_rate": 5.300000000000001e-05, "loss": 0.0228, "step": 265 }, { "epoch": 7.5, "grad_norm": 1.8487240076065063, "learning_rate": 5.4000000000000005e-05, "loss": 0.0215, "step": 270 }, { "epoch": 7.5, "eval_loss": 0.5222529768943787, "eval_runtime": 243.6778, "eval_samples_per_second": 2.052, "eval_steps_per_second": 0.259, "eval_wer": 25.548838689150493, "step": 270 }, { "epoch": 7.638888888888889, "grad_norm": 1.1909741163253784, "learning_rate": 5.500000000000001e-05, "loss": 0.0201, "step": 275 }, { "epoch": 7.777777777777778, "grad_norm": 1.6141778230667114, "learning_rate": 5.6000000000000006e-05, "loss": 0.0198, "step": 280 }, { "epoch": 7.777777777777778, "eval_loss": 0.5157026648521423, "eval_runtime": 244.0734, "eval_samples_per_second": 2.049, "eval_steps_per_second": 0.258, "eval_wer": 30.194082087177854, "step": 280 }, { "epoch": 7.916666666666667, "grad_norm": 1.1372332572937012, "learning_rate": 5.6999999999999996e-05, "loss": 0.0193, "step": 285 }, { "epoch": 8.055555555555555, "grad_norm": 2.210016965866089, "learning_rate": 5.8e-05, "loss": 0.0273, "step": 290 }, { "epoch": 8.055555555555555, "eval_loss": 0.5289562940597534, "eval_runtime": 243.9152, "eval_samples_per_second": 2.05, "eval_steps_per_second": 0.258, "eval_wer": 27.553293032134903, "step": 290 }, { "epoch": 8.194444444444445, "grad_norm": 1.942575454711914, "learning_rate": 5.9e-05, "loss": 0.0201, "step": 295 }, { "epoch": 8.333333333333334, "grad_norm": 1.3640440702438354, "learning_rate": 6e-05, "loss": 0.0197, "step": 300 }, { "epoch": 8.333333333333334, "eval_loss": 0.5509196519851685, "eval_runtime": 243.9508, "eval_samples_per_second": 2.05, "eval_steps_per_second": 0.258, "eval_wer": 26.948775055679285, "step": 300 }, { "epoch": 8.333333333333334, "step": 300, "total_flos": 9.2409447186432e+17, "train_loss": 0.5431244759509961, "train_runtime": 10016.0212, "train_samples_per_second": 3.834, "train_steps_per_second": 0.03 } ], "logging_steps": 5, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 10, "total_flos": 9.2409447186432e+17, "train_batch_size": 128, "trial_name": null, "trial_params": null }