{ "best_metric": 23.09402795425667, "best_model_checkpoint": "whisper4/checkpoint-130", "epoch": 8.333333333333334, "eval_steps": 10, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1388888888888889, "grad_norm": 45.38771057128906, "learning_rate": 1.0000000000000002e-06, "loss": 3.9755, "step": 5 }, { "epoch": 0.2777777777777778, "grad_norm": 42.970829010009766, "learning_rate": 2.0000000000000003e-06, "loss": 3.8231, "step": 10 }, { "epoch": 0.2777777777777778, "eval_loss": 3.7088065147399902, "eval_runtime": 261.4715, "eval_samples_per_second": 1.912, "eval_steps_per_second": 0.241, "eval_wer": 76.93773824650572, "step": 10 }, { "epoch": 0.4166666666666667, "grad_norm": 42.34159469604492, "learning_rate": 3e-06, "loss": 3.5982, "step": 15 }, { "epoch": 0.5555555555555556, "grad_norm": 41.4112548828125, "learning_rate": 4.000000000000001e-06, "loss": 3.1925, "step": 20 }, { "epoch": 0.5555555555555556, "eval_loss": 2.9438674449920654, "eval_runtime": 254.9124, "eval_samples_per_second": 1.961, "eval_steps_per_second": 0.247, "eval_wer": 65.56543837357052, "step": 20 }, { "epoch": 0.6944444444444444, "grad_norm": 39.323768615722656, "learning_rate": 5e-06, "loss": 2.7453, "step": 25 }, { "epoch": 0.8333333333333334, "grad_norm": 30.337932586669922, "learning_rate": 6e-06, "loss": 2.1383, "step": 30 }, { "epoch": 0.8333333333333334, "eval_loss": 1.722086787223816, "eval_runtime": 256.1025, "eval_samples_per_second": 1.952, "eval_steps_per_second": 0.246, "eval_wer": 61.53113087674714, "step": 30 }, { "epoch": 0.9722222222222222, "grad_norm": 18.886234283447266, "learning_rate": 7.000000000000001e-06, "loss": 1.4936, "step": 35 }, { "epoch": 1.1111111111111112, "grad_norm": 10.311893463134766, "learning_rate": 8.000000000000001e-06, "loss": 1.0671, "step": 40 }, { "epoch": 1.1111111111111112, "eval_loss": 0.8320145606994629, "eval_runtime": 254.169, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.248, "eval_wer": 50.69885641677255, "step": 40 }, { "epoch": 1.25, "grad_norm": 6.484711647033691, "learning_rate": 9e-06, "loss": 0.7616, "step": 45 }, { "epoch": 1.3888888888888888, "grad_norm": 5.488741874694824, "learning_rate": 1e-05, "loss": 0.6947, "step": 50 }, { "epoch": 1.3888888888888888, "eval_loss": 0.6587409377098083, "eval_runtime": 256.6295, "eval_samples_per_second": 1.948, "eval_steps_per_second": 0.245, "eval_wer": 41.01016518424396, "step": 50 }, { "epoch": 1.5277777777777777, "grad_norm": 5.178290843963623, "learning_rate": 1.1000000000000001e-05, "loss": 0.6099, "step": 55 }, { "epoch": 1.6666666666666665, "grad_norm": 5.149004936218262, "learning_rate": 1.2e-05, "loss": 0.6263, "step": 60 }, { "epoch": 1.6666666666666665, "eval_loss": 0.5873834490776062, "eval_runtime": 254.3491, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.248, "eval_wer": 29.796696315120712, "step": 60 }, { "epoch": 1.8055555555555556, "grad_norm": 5.192800521850586, "learning_rate": 1.3000000000000001e-05, "loss": 0.5654, "step": 65 }, { "epoch": 1.9444444444444444, "grad_norm": 4.883419036865234, "learning_rate": 1.4000000000000001e-05, "loss": 0.5827, "step": 70 }, { "epoch": 1.9444444444444444, "eval_loss": 0.5402054190635681, "eval_runtime": 256.4853, "eval_samples_per_second": 1.949, "eval_steps_per_second": 0.246, "eval_wer": 27.38246505717916, "step": 70 }, { "epoch": 2.0833333333333335, "grad_norm": 4.878260612487793, "learning_rate": 1.5e-05, "loss": 0.5057, "step": 75 }, { "epoch": 2.2222222222222223, "grad_norm": 4.442017555236816, "learning_rate": 1.6000000000000003e-05, "loss": 0.4222, "step": 80 }, { "epoch": 2.2222222222222223, "eval_loss": 0.5154020190238953, "eval_runtime": 256.4153, "eval_samples_per_second": 1.95, "eval_steps_per_second": 0.246, "eval_wer": 32.05209656925032, "step": 80 }, { "epoch": 2.361111111111111, "grad_norm": 4.793032646179199, "learning_rate": 1.7000000000000003e-05, "loss": 0.3807, "step": 85 }, { "epoch": 2.5, "grad_norm": 4.618255138397217, "learning_rate": 1.8e-05, "loss": 0.4065, "step": 90 }, { "epoch": 2.5, "eval_loss": 0.49971044063568115, "eval_runtime": 254.4642, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.248, "eval_wer": 25.698856416772554, "step": 90 }, { "epoch": 2.638888888888889, "grad_norm": 4.196300983428955, "learning_rate": 1.9e-05, "loss": 0.3807, "step": 95 }, { "epoch": 2.7777777777777777, "grad_norm": 3.9986796379089355, "learning_rate": 2e-05, "loss": 0.3959, "step": 100 }, { "epoch": 2.7777777777777777, "eval_loss": 0.48037058115005493, "eval_runtime": 254.5231, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.248, "eval_wer": 23.824650571791615, "step": 100 }, { "epoch": 2.9166666666666665, "grad_norm": 4.1782402992248535, "learning_rate": 2.1e-05, "loss": 0.3847, "step": 105 }, { "epoch": 3.0555555555555554, "grad_norm": 3.125694751739502, "learning_rate": 2.2000000000000003e-05, "loss": 0.3081, "step": 110 }, { "epoch": 3.0555555555555554, "eval_loss": 0.46701571345329285, "eval_runtime": 254.757, "eval_samples_per_second": 1.963, "eval_steps_per_second": 0.247, "eval_wer": 24.841168996188056, "step": 110 }, { "epoch": 3.1944444444444446, "grad_norm": 3.2956347465515137, "learning_rate": 2.3000000000000003e-05, "loss": 0.2701, "step": 115 }, { "epoch": 3.3333333333333335, "grad_norm": 3.4233288764953613, "learning_rate": 2.4e-05, "loss": 0.2497, "step": 120 }, { "epoch": 3.3333333333333335, "eval_loss": 0.4687294661998749, "eval_runtime": 255.0619, "eval_samples_per_second": 1.96, "eval_steps_per_second": 0.247, "eval_wer": 23.284625158831005, "step": 120 }, { "epoch": 3.4722222222222223, "grad_norm": 3.383148193359375, "learning_rate": 2.5e-05, "loss": 0.268, "step": 125 }, { "epoch": 3.611111111111111, "grad_norm": 3.4522266387939453, "learning_rate": 2.6000000000000002e-05, "loss": 0.2535, "step": 130 }, { "epoch": 3.611111111111111, "eval_loss": 0.4594000279903412, "eval_runtime": 254.9771, "eval_samples_per_second": 1.961, "eval_steps_per_second": 0.247, "eval_wer": 23.09402795425667, "step": 130 }, { "epoch": 3.75, "grad_norm": 3.3171420097351074, "learning_rate": 2.7000000000000002e-05, "loss": 0.2174, "step": 135 }, { "epoch": 3.888888888888889, "grad_norm": 3.3734307289123535, "learning_rate": 2.8000000000000003e-05, "loss": 0.2428, "step": 140 }, { "epoch": 3.888888888888889, "eval_loss": 0.45448967814445496, "eval_runtime": 252.6636, "eval_samples_per_second": 1.979, "eval_steps_per_second": 0.249, "eval_wer": 23.506988564167724, "step": 140 }, { "epoch": 4.027777777777778, "grad_norm": 3.152697801589966, "learning_rate": 2.9e-05, "loss": 0.2136, "step": 145 }, { "epoch": 4.166666666666667, "grad_norm": 3.5505619049072266, "learning_rate": 3e-05, "loss": 0.1627, "step": 150 }, { "epoch": 4.166666666666667, "eval_loss": 0.46508893370628357, "eval_runtime": 253.2857, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.249, "eval_wer": 24.49174078780178, "step": 150 }, { "epoch": 4.305555555555555, "grad_norm": 2.473661184310913, "learning_rate": 3.1e-05, "loss": 0.1302, "step": 155 }, { "epoch": 4.444444444444445, "grad_norm": 2.767871379852295, "learning_rate": 3.2000000000000005e-05, "loss": 0.1224, "step": 160 }, { "epoch": 4.444444444444445, "eval_loss": 0.46860620379447937, "eval_runtime": 252.3676, "eval_samples_per_second": 1.981, "eval_steps_per_second": 0.25, "eval_wer": 23.69758576874206, "step": 160 }, { "epoch": 4.583333333333333, "grad_norm": 2.4746224880218506, "learning_rate": 3.3e-05, "loss": 0.1386, "step": 165 }, { "epoch": 4.722222222222222, "grad_norm": 2.802751302719116, "learning_rate": 3.4000000000000007e-05, "loss": 0.1326, "step": 170 }, { "epoch": 4.722222222222222, "eval_loss": 0.46528080105781555, "eval_runtime": 252.901, "eval_samples_per_second": 1.977, "eval_steps_per_second": 0.249, "eval_wer": 23.69758576874206, "step": 170 }, { "epoch": 4.861111111111111, "grad_norm": 3.2861883640289307, "learning_rate": 3.5e-05, "loss": 0.1416, "step": 175 }, { "epoch": 5.0, "grad_norm": 7.430758953094482, "learning_rate": 3.6e-05, "loss": 0.1334, "step": 180 }, { "epoch": 5.0, "eval_loss": 0.474071204662323, "eval_runtime": 253.2666, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.249, "eval_wer": 24.74587039390089, "step": 180 }, { "epoch": 5.138888888888889, "grad_norm": 2.6109585762023926, "learning_rate": 3.7e-05, "loss": 0.0807, "step": 185 }, { "epoch": 5.277777777777778, "grad_norm": 1.921268343925476, "learning_rate": 3.8e-05, "loss": 0.0659, "step": 190 }, { "epoch": 5.277777777777778, "eval_loss": 0.4791569113731384, "eval_runtime": 252.5034, "eval_samples_per_second": 1.98, "eval_steps_per_second": 0.25, "eval_wer": 24.68233799237611, "step": 190 }, { "epoch": 5.416666666666667, "grad_norm": 1.5123528242111206, "learning_rate": 3.9000000000000006e-05, "loss": 0.0548, "step": 195 }, { "epoch": 5.555555555555555, "grad_norm": 1.7394624948501587, "learning_rate": 4e-05, "loss": 0.0639, "step": 200 }, { "epoch": 5.555555555555555, "eval_loss": 0.4760441780090332, "eval_runtime": 254.7055, "eval_samples_per_second": 1.963, "eval_steps_per_second": 0.247, "eval_wer": 33.38627700127065, "step": 200 }, { "epoch": 5.694444444444445, "grad_norm": 2.006833553314209, "learning_rate": 4.1e-05, "loss": 0.0718, "step": 205 }, { "epoch": 5.833333333333333, "grad_norm": 1.9101831912994385, "learning_rate": 4.2e-05, "loss": 0.0667, "step": 210 }, { "epoch": 5.833333333333333, "eval_loss": 0.48197290301322937, "eval_runtime": 252.8934, "eval_samples_per_second": 1.977, "eval_steps_per_second": 0.249, "eval_wer": 25.47649301143583, "step": 210 }, { "epoch": 5.972222222222222, "grad_norm": 2.15120267868042, "learning_rate": 4.3e-05, "loss": 0.0833, "step": 215 }, { "epoch": 6.111111111111111, "grad_norm": 1.230398178100586, "learning_rate": 4.4000000000000006e-05, "loss": 0.042, "step": 220 }, { "epoch": 6.111111111111111, "eval_loss": 0.4932812750339508, "eval_runtime": 254.0189, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.248, "eval_wer": 29.415501905972047, "step": 220 }, { "epoch": 6.25, "grad_norm": 1.1129037141799927, "learning_rate": 4.5e-05, "loss": 0.0285, "step": 225 }, { "epoch": 6.388888888888889, "grad_norm": 1.4872666597366333, "learning_rate": 4.600000000000001e-05, "loss": 0.0325, "step": 230 }, { "epoch": 6.388888888888889, "eval_loss": 0.5065749883651733, "eval_runtime": 253.3018, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.249, "eval_wer": 29.987293519695047, "step": 230 }, { "epoch": 6.527777777777778, "grad_norm": 1.527269959449768, "learning_rate": 4.7e-05, "loss": 0.0322, "step": 235 }, { "epoch": 6.666666666666667, "grad_norm": 1.6628929376602173, "learning_rate": 4.8e-05, "loss": 0.0333, "step": 240 }, { "epoch": 6.666666666666667, "eval_loss": 0.5125746130943298, "eval_runtime": 252.7719, "eval_samples_per_second": 1.978, "eval_steps_per_second": 0.249, "eval_wer": 26.08005082592122, "step": 240 }, { "epoch": 6.805555555555555, "grad_norm": 2.2532899379730225, "learning_rate": 4.9e-05, "loss": 0.0314, "step": 245 }, { "epoch": 6.944444444444445, "grad_norm": 3.415356397628784, "learning_rate": 5e-05, "loss": 0.0333, "step": 250 }, { "epoch": 6.944444444444445, "eval_loss": 0.5072929859161377, "eval_runtime": 252.6089, "eval_samples_per_second": 1.979, "eval_steps_per_second": 0.249, "eval_wer": 24.618805590851334, "step": 250 }, { "epoch": 7.083333333333333, "grad_norm": 0.6166062951087952, "learning_rate": 5.1000000000000006e-05, "loss": 0.0251, "step": 255 }, { "epoch": 7.222222222222222, "grad_norm": 1.0601465702056885, "learning_rate": 5.2000000000000004e-05, "loss": 0.0187, "step": 260 }, { "epoch": 7.222222222222222, "eval_loss": 0.5128570795059204, "eval_runtime": 253.8562, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.248, "eval_wer": 27.350698856416773, "step": 260 }, { "epoch": 7.361111111111111, "grad_norm": 1.31247878074646, "learning_rate": 5.300000000000001e-05, "loss": 0.0219, "step": 265 }, { "epoch": 7.5, "grad_norm": 0.7731389999389648, "learning_rate": 5.4000000000000005e-05, "loss": 0.0214, "step": 270 }, { "epoch": 7.5, "eval_loss": 0.5208793878555298, "eval_runtime": 254.7138, "eval_samples_per_second": 1.963, "eval_steps_per_second": 0.247, "eval_wer": 28.208386277001267, "step": 270 }, { "epoch": 7.638888888888889, "grad_norm": 2.0568604469299316, "learning_rate": 5.500000000000001e-05, "loss": 0.0248, "step": 275 }, { "epoch": 7.777777777777778, "grad_norm": 1.7497611045837402, "learning_rate": 5.6000000000000006e-05, "loss": 0.0187, "step": 280 }, { "epoch": 7.777777777777778, "eval_loss": 0.5212948322296143, "eval_runtime": 254.6913, "eval_samples_per_second": 1.963, "eval_steps_per_second": 0.247, "eval_wer": 29.320203303684877, "step": 280 }, { "epoch": 7.916666666666667, "grad_norm": 2.408604383468628, "learning_rate": 5.6999999999999996e-05, "loss": 0.0303, "step": 285 }, { "epoch": 8.055555555555555, "grad_norm": 1.3855714797973633, "learning_rate": 5.8e-05, "loss": 0.0312, "step": 290 }, { "epoch": 8.055555555555555, "eval_loss": 0.5274094939231873, "eval_runtime": 253.5749, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.248, "eval_wer": 34.6569250317662, "step": 290 }, { "epoch": 8.194444444444445, "grad_norm": 1.87900972366333, "learning_rate": 5.9e-05, "loss": 0.0153, "step": 295 }, { "epoch": 8.333333333333334, "grad_norm": 1.318336009979248, "learning_rate": 6e-05, "loss": 0.0172, "step": 300 }, { "epoch": 8.333333333333334, "eval_loss": 0.5408744812011719, "eval_runtime": 252.2148, "eval_samples_per_second": 1.982, "eval_steps_per_second": 0.25, "eval_wer": 28.27191867852605, "step": 300 }, { "epoch": 8.333333333333334, "step": 300, "total_flos": 9.2409447186432e+17, "train_loss": 0.5446730978041887, "train_runtime": 8175.9362, "train_samples_per_second": 4.697, "train_steps_per_second": 0.037 } ], "logging_steps": 5, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 10, "total_flos": 9.2409447186432e+17, "train_batch_size": 128, "trial_name": null, "trial_params": null }