{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 18300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.55, "learning_rate": 4.950000000000001e-06, "loss": 13.5246, "step": 100 }, { "epoch": 1.09, "learning_rate": 9.950000000000001e-06, "loss": 6.1441, "step": 200 }, { "epoch": 1.64, "learning_rate": 1.4950000000000001e-05, "loss": 4.2189, "step": 300 }, { "epoch": 2.19, "learning_rate": 1.995e-05, "loss": 3.6075, "step": 400 }, { "epoch": 2.73, "learning_rate": 2.495e-05, "loss": 3.3043, "step": 500 }, { "epoch": 2.73, "eval_loss": 3.241492748260498, "eval_runtime": 169.6485, "eval_samples_per_second": 15.444, "eval_steps_per_second": 1.933, "eval_wer": 1.0, "step": 500 }, { "epoch": 3.28, "learning_rate": 2.995e-05, "loss": 3.2087, "step": 600 }, { "epoch": 3.83, "learning_rate": 3.495e-05, "loss": 3.1835, "step": 700 }, { "epoch": 4.37, "learning_rate": 3.995e-05, "loss": 3.1414, "step": 800 }, { "epoch": 4.92, "learning_rate": 4.495e-05, "loss": 3.0995, "step": 900 }, { "epoch": 5.46, "learning_rate": 4.995e-05, "loss": 3.0482, "step": 1000 }, { "epoch": 5.46, "eval_loss": 2.9591026306152344, "eval_runtime": 128.3332, "eval_samples_per_second": 20.416, "eval_steps_per_second": 2.556, "eval_wer": 1.0, "step": 1000 }, { "epoch": 6.01, "learning_rate": 5.495e-05, "loss": 2.7368, "step": 1100 }, { "epoch": 6.56, "learning_rate": 5.995000000000001e-05, "loss": 2.0079, "step": 1200 }, { "epoch": 7.1, "learning_rate": 6.494999999999999e-05, "loss": 1.6588, "step": 1300 }, { "epoch": 7.65, "learning_rate": 6.995e-05, "loss": 1.5644, "step": 1400 }, { "epoch": 8.2, "learning_rate": 7.495e-05, "loss": 1.4767, "step": 1500 }, { "epoch": 8.2, "eval_loss": 0.47794264554977417, "eval_runtime": 128.1544, "eval_samples_per_second": 20.444, "eval_steps_per_second": 2.559, "eval_wer": 0.5776572037593256, "step": 1500 }, { "epoch": 8.74, "learning_rate": 7.995e-05, "loss": 1.4421, "step": 1600 }, { "epoch": 9.29, "learning_rate": 8.495e-05, "loss": 1.4009, "step": 1700 }, { "epoch": 9.84, "learning_rate": 8.995e-05, "loss": 1.3682, "step": 1800 }, { "epoch": 10.38, "learning_rate": 9.495e-05, "loss": 1.3377, "step": 1900 }, { "epoch": 10.93, "learning_rate": 9.995e-05, "loss": 1.3152, "step": 2000 }, { "epoch": 10.93, "eval_loss": 0.36967846751213074, "eval_runtime": 127.926, "eval_samples_per_second": 20.481, "eval_steps_per_second": 2.564, "eval_wer": 0.49384749539773276, "step": 2000 }, { "epoch": 11.48, "learning_rate": 9.939877300613497e-05, "loss": 1.3009, "step": 2100 }, { "epoch": 12.02, "learning_rate": 9.878527607361964e-05, "loss": 1.2799, "step": 2200 }, { "epoch": 12.57, "learning_rate": 9.81717791411043e-05, "loss": 1.2559, "step": 2300 }, { "epoch": 13.11, "learning_rate": 9.756441717791411e-05, "loss": 1.2379, "step": 2400 }, { "epoch": 13.66, "learning_rate": 9.695092024539878e-05, "loss": 1.2246, "step": 2500 }, { "epoch": 13.66, "eval_loss": 0.3084094822406769, "eval_runtime": 127.6781, "eval_samples_per_second": 20.52, "eval_steps_per_second": 2.569, "eval_wer": 0.44593547136905337, "step": 2500 }, { "epoch": 14.21, "learning_rate": 9.633742331288344e-05, "loss": 1.2209, "step": 2600 }, { "epoch": 14.75, "learning_rate": 9.57239263803681e-05, "loss": 1.1972, "step": 2700 }, { "epoch": 15.3, "learning_rate": 9.511042944785277e-05, "loss": 1.1885, "step": 2800 }, { "epoch": 15.85, "learning_rate": 9.449693251533743e-05, "loss": 1.1807, "step": 2900 }, { "epoch": 16.39, "learning_rate": 9.388343558282209e-05, "loss": 1.1781, "step": 3000 }, { "epoch": 16.39, "eval_loss": 0.2842142581939697, "eval_runtime": 128.3875, "eval_samples_per_second": 20.407, "eval_steps_per_second": 2.555, "eval_wer": 0.4154151729483577, "step": 3000 }, { "epoch": 16.94, "learning_rate": 9.326993865030675e-05, "loss": 1.1757, "step": 3100 }, { "epoch": 17.49, "learning_rate": 9.265644171779141e-05, "loss": 1.155, "step": 3200 }, { "epoch": 18.03, "learning_rate": 9.204294478527608e-05, "loss": 1.1455, "step": 3300 }, { "epoch": 18.58, "learning_rate": 9.142944785276074e-05, "loss": 1.1376, "step": 3400 }, { "epoch": 19.13, "learning_rate": 9.081595092024541e-05, "loss": 1.1351, "step": 3500 }, { "epoch": 19.13, "eval_loss": 0.26151829957962036, "eval_runtime": 127.9176, "eval_samples_per_second": 20.482, "eval_steps_per_second": 2.564, "eval_wer": 0.3929367309369247, "step": 3500 }, { "epoch": 19.67, "learning_rate": 9.020245398773006e-05, "loss": 1.1262, "step": 3600 }, { "epoch": 20.22, "learning_rate": 8.958895705521472e-05, "loss": 1.1265, "step": 3700 }, { "epoch": 20.77, "learning_rate": 8.897546012269939e-05, "loss": 1.1033, "step": 3800 }, { "epoch": 21.31, "learning_rate": 8.836196319018405e-05, "loss": 1.1016, "step": 3900 }, { "epoch": 21.86, "learning_rate": 8.774846625766872e-05, "loss": 1.1052, "step": 4000 }, { "epoch": 21.86, "eval_loss": 0.24618586897850037, "eval_runtime": 128.2681, "eval_samples_per_second": 20.426, "eval_steps_per_second": 2.557, "eval_wer": 0.3746729968026354, "step": 4000 }, { "epoch": 22.4, "learning_rate": 8.714110429447854e-05, "loss": 1.0964, "step": 4100 }, { "epoch": 22.95, "learning_rate": 8.652760736196319e-05, "loss": 1.0848, "step": 4200 }, { "epoch": 23.5, "learning_rate": 8.591411042944786e-05, "loss": 1.0714, "step": 4300 }, { "epoch": 24.04, "learning_rate": 8.530061349693252e-05, "loss": 1.0696, "step": 4400 }, { "epoch": 24.59, "learning_rate": 8.468711656441717e-05, "loss": 1.0711, "step": 4500 }, { "epoch": 24.59, "eval_loss": 0.23661433160305023, "eval_runtime": 128.1823, "eval_samples_per_second": 20.44, "eval_steps_per_second": 2.559, "eval_wer": 0.36522623776765817, "step": 4500 }, { "epoch": 25.14, "learning_rate": 8.407361963190185e-05, "loss": 1.0519, "step": 4600 }, { "epoch": 25.68, "learning_rate": 8.346012269938652e-05, "loss": 1.0673, "step": 4700 }, { "epoch": 26.23, "learning_rate": 8.284662576687117e-05, "loss": 1.0606, "step": 4800 }, { "epoch": 26.78, "learning_rate": 8.223312883435583e-05, "loss": 1.0711, "step": 4900 }, { "epoch": 27.32, "learning_rate": 8.16196319018405e-05, "loss": 1.035, "step": 5000 }, { "epoch": 27.32, "eval_loss": 0.22680768370628357, "eval_runtime": 127.7363, "eval_samples_per_second": 20.511, "eval_steps_per_second": 2.568, "eval_wer": 0.3556825888964248, "step": 5000 }, { "epoch": 27.87, "learning_rate": 8.100613496932515e-05, "loss": 1.0428, "step": 5100 }, { "epoch": 28.42, "learning_rate": 8.039877300613497e-05, "loss": 1.0322, "step": 5200 }, { "epoch": 28.96, "learning_rate": 7.978527607361964e-05, "loss": 1.0291, "step": 5300 }, { "epoch": 29.51, "learning_rate": 7.91717791411043e-05, "loss": 1.0258, "step": 5400 }, { "epoch": 30.05, "learning_rate": 7.855828220858897e-05, "loss": 1.0277, "step": 5500 }, { "epoch": 30.05, "eval_loss": 0.22434431314468384, "eval_runtime": 127.9511, "eval_samples_per_second": 20.477, "eval_steps_per_second": 2.563, "eval_wer": 0.3449762619901172, "step": 5500 }, { "epoch": 30.6, "learning_rate": 7.794478527607363e-05, "loss": 1.0208, "step": 5600 }, { "epoch": 31.15, "learning_rate": 7.733128834355828e-05, "loss": 1.0252, "step": 5700 }, { "epoch": 31.69, "learning_rate": 7.671779141104295e-05, "loss": 1.0157, "step": 5800 }, { "epoch": 32.24, "learning_rate": 7.610429447852761e-05, "loss": 1.0094, "step": 5900 }, { "epoch": 32.79, "learning_rate": 7.549079754601228e-05, "loss": 1.002, "step": 6000 }, { "epoch": 32.79, "eval_loss": 0.22044695913791656, "eval_runtime": 127.6026, "eval_samples_per_second": 20.532, "eval_steps_per_second": 2.57, "eval_wer": 0.3388722023059781, "step": 6000 }, { "epoch": 33.33, "learning_rate": 7.487730061349694e-05, "loss": 0.996, "step": 6100 }, { "epoch": 33.88, "learning_rate": 7.42638036809816e-05, "loss": 1.002, "step": 6200 }, { "epoch": 34.43, "learning_rate": 7.365030674846626e-05, "loss": 0.9845, "step": 6300 }, { "epoch": 34.97, "learning_rate": 7.303680981595092e-05, "loss": 0.9958, "step": 6400 }, { "epoch": 35.52, "learning_rate": 7.242331288343559e-05, "loss": 0.9837, "step": 6500 }, { "epoch": 35.52, "eval_loss": 0.2156379520893097, "eval_runtime": 127.7813, "eval_samples_per_second": 20.504, "eval_steps_per_second": 2.567, "eval_wer": 0.33489971901947485, "step": 6500 }, { "epoch": 36.07, "learning_rate": 7.180981595092025e-05, "loss": 1.0028, "step": 6600 }, { "epoch": 36.61, "learning_rate": 7.119631901840491e-05, "loss": 0.9722, "step": 6700 }, { "epoch": 37.16, "learning_rate": 7.058282208588958e-05, "loss": 0.9784, "step": 6800 }, { "epoch": 37.7, "learning_rate": 6.996932515337423e-05, "loss": 0.9822, "step": 6900 }, { "epoch": 38.25, "learning_rate": 6.93558282208589e-05, "loss": 0.9773, "step": 7000 }, { "epoch": 38.25, "eval_loss": 0.21265123784542084, "eval_runtime": 128.3357, "eval_samples_per_second": 20.415, "eval_steps_per_second": 2.556, "eval_wer": 0.3288925491715919, "step": 7000 }, { "epoch": 38.8, "learning_rate": 6.874233128834356e-05, "loss": 0.9649, "step": 7100 }, { "epoch": 39.34, "learning_rate": 6.812883435582822e-05, "loss": 0.9728, "step": 7200 }, { "epoch": 39.89, "learning_rate": 6.751533742331289e-05, "loss": 0.9663, "step": 7300 }, { "epoch": 40.44, "learning_rate": 6.690184049079755e-05, "loss": 0.9762, "step": 7400 }, { "epoch": 40.98, "learning_rate": 6.629447852760736e-05, "loss": 0.9807, "step": 7500 }, { "epoch": 40.98, "eval_loss": 0.21417580544948578, "eval_runtime": 128.3216, "eval_samples_per_second": 20.417, "eval_steps_per_second": 2.556, "eval_wer": 0.32743920162774925, "step": 7500 }, { "epoch": 41.53, "learning_rate": 6.568098159509203e-05, "loss": 0.9647, "step": 7600 }, { "epoch": 42.08, "learning_rate": 6.506748466257669e-05, "loss": 0.9748, "step": 7700 }, { "epoch": 42.62, "learning_rate": 6.445398773006134e-05, "loss": 0.9484, "step": 7800 }, { "epoch": 43.17, "learning_rate": 6.384049079754602e-05, "loss": 0.9558, "step": 7900 }, { "epoch": 43.72, "learning_rate": 6.322699386503069e-05, "loss": 0.9582, "step": 8000 }, { "epoch": 43.72, "eval_loss": 0.20038354396820068, "eval_runtime": 127.7709, "eval_samples_per_second": 20.505, "eval_steps_per_second": 2.567, "eval_wer": 0.314165294060653, "step": 8000 }, { "epoch": 44.26, "learning_rate": 6.261349693251534e-05, "loss": 0.949, "step": 8100 }, { "epoch": 44.81, "learning_rate": 6.2e-05, "loss": 0.9491, "step": 8200 }, { "epoch": 45.36, "learning_rate": 6.138650306748467e-05, "loss": 0.9447, "step": 8300 }, { "epoch": 45.9, "learning_rate": 6.0773006134969325e-05, "loss": 0.9368, "step": 8400 }, { "epoch": 46.45, "learning_rate": 6.015950920245399e-05, "loss": 0.9548, "step": 8500 }, { "epoch": 46.45, "eval_loss": 0.20219053328037262, "eval_runtime": 127.6869, "eval_samples_per_second": 20.519, "eval_steps_per_second": 2.569, "eval_wer": 0.30500920453444436, "step": 8500 }, { "epoch": 46.99, "learning_rate": 5.9546012269938655e-05, "loss": 0.9407, "step": 8600 }, { "epoch": 47.54, "learning_rate": 5.893251533742331e-05, "loss": 0.9312, "step": 8700 }, { "epoch": 48.09, "learning_rate": 5.831901840490798e-05, "loss": 0.9446, "step": 8800 }, { "epoch": 48.63, "learning_rate": 5.770552147239264e-05, "loss": 0.9225, "step": 8900 }, { "epoch": 49.18, "learning_rate": 5.70920245398773e-05, "loss": 0.9251, "step": 9000 }, { "epoch": 49.18, "eval_loss": 0.2018980085849762, "eval_runtime": 130.6299, "eval_samples_per_second": 20.057, "eval_steps_per_second": 2.511, "eval_wer": 0.3035074120724736, "step": 9000 }, { "epoch": 49.73, "learning_rate": 5.6478527607361965e-05, "loss": 0.9251, "step": 9100 }, { "epoch": 50.27, "learning_rate": 5.586503067484663e-05, "loss": 0.9172, "step": 9200 }, { "epoch": 50.82, "learning_rate": 5.5251533742331294e-05, "loss": 0.9103, "step": 9300 }, { "epoch": 51.37, "learning_rate": 5.463803680981595e-05, "loss": 0.9133, "step": 9400 }, { "epoch": 51.91, "learning_rate": 5.402453987730062e-05, "loss": 0.9103, "step": 9500 }, { "epoch": 51.91, "eval_loss": 0.1963759958744049, "eval_runtime": 127.8377, "eval_samples_per_second": 20.495, "eval_steps_per_second": 2.566, "eval_wer": 0.30210250944675904, "step": 9500 }, { "epoch": 52.46, "learning_rate": 5.341104294478528e-05, "loss": 0.9109, "step": 9600 }, { "epoch": 53.01, "learning_rate": 5.279754601226994e-05, "loss": 0.9153, "step": 9700 }, { "epoch": 53.55, "learning_rate": 5.2184049079754604e-05, "loss": 0.9113, "step": 9800 }, { "epoch": 54.1, "learning_rate": 5.157055214723927e-05, "loss": 0.9181, "step": 9900 }, { "epoch": 54.64, "learning_rate": 5.095705521472393e-05, "loss": 0.915, "step": 10000 }, { "epoch": 54.64, "eval_loss": 0.19702854752540588, "eval_runtime": 128.314, "eval_samples_per_second": 20.419, "eval_steps_per_second": 2.556, "eval_wer": 0.30316829764557696, "step": 10000 }, { "epoch": 55.19, "learning_rate": 5.034969325153375e-05, "loss": 0.8971, "step": 10100 }, { "epoch": 55.74, "learning_rate": 4.973619631901841e-05, "loss": 0.9066, "step": 10200 }, { "epoch": 56.28, "learning_rate": 4.9122699386503065e-05, "loss": 0.8993, "step": 10300 }, { "epoch": 56.83, "learning_rate": 4.850920245398774e-05, "loss": 0.8933, "step": 10400 }, { "epoch": 57.38, "learning_rate": 4.7895705521472395e-05, "loss": 0.8962, "step": 10500 }, { "epoch": 57.38, "eval_loss": 0.2006961703300476, "eval_runtime": 128.3801, "eval_samples_per_second": 20.408, "eval_steps_per_second": 2.555, "eval_wer": 0.30462164518941964, "step": 10500 }, { "epoch": 57.92, "learning_rate": 4.728220858895705e-05, "loss": 0.8829, "step": 10600 }, { "epoch": 58.47, "learning_rate": 4.6668711656441724e-05, "loss": 0.8932, "step": 10700 }, { "epoch": 59.02, "learning_rate": 4.605521472392638e-05, "loss": 0.8986, "step": 10800 }, { "epoch": 59.56, "learning_rate": 4.544171779141104e-05, "loss": 0.8892, "step": 10900 }, { "epoch": 60.11, "learning_rate": 4.482822085889571e-05, "loss": 0.8729, "step": 11000 }, { "epoch": 60.11, "eval_loss": 0.19668185710906982, "eval_runtime": 128.5612, "eval_samples_per_second": 20.379, "eval_steps_per_second": 2.551, "eval_wer": 0.2942059877918806, "step": 11000 }, { "epoch": 60.66, "learning_rate": 4.421472392638037e-05, "loss": 0.876, "step": 11100 }, { "epoch": 61.2, "learning_rate": 4.3601226993865034e-05, "loss": 0.8759, "step": 11200 }, { "epoch": 61.75, "learning_rate": 4.29877300613497e-05, "loss": 0.8813, "step": 11300 }, { "epoch": 62.3, "learning_rate": 4.237423312883436e-05, "loss": 0.8684, "step": 11400 }, { "epoch": 62.84, "learning_rate": 4.176073619631902e-05, "loss": 0.8744, "step": 11500 }, { "epoch": 62.84, "eval_loss": 0.19520752131938934, "eval_runtime": 127.705, "eval_samples_per_second": 20.516, "eval_steps_per_second": 2.568, "eval_wer": 0.2885379323708943, "step": 11500 }, { "epoch": 63.39, "learning_rate": 4.1147239263803686e-05, "loss": 0.8665, "step": 11600 }, { "epoch": 63.93, "learning_rate": 4.0533742331288344e-05, "loss": 0.8757, "step": 11700 }, { "epoch": 64.48, "learning_rate": 3.992024539877301e-05, "loss": 0.8694, "step": 11800 }, { "epoch": 65.03, "learning_rate": 3.930674846625767e-05, "loss": 0.8578, "step": 11900 }, { "epoch": 65.57, "learning_rate": 3.869938650306748e-05, "loss": 0.874, "step": 12000 }, { "epoch": 65.57, "eval_loss": 0.18939977884292603, "eval_runtime": 128.0037, "eval_samples_per_second": 20.468, "eval_steps_per_second": 2.562, "eval_wer": 0.28950683073345607, "step": 12000 }, { "epoch": 66.12, "learning_rate": 3.808588957055215e-05, "loss": 0.8628, "step": 12100 }, { "epoch": 66.67, "learning_rate": 3.747239263803681e-05, "loss": 0.8564, "step": 12200 }, { "epoch": 67.21, "learning_rate": 3.685889570552147e-05, "loss": 0.8502, "step": 12300 }, { "epoch": 67.76, "learning_rate": 3.6245398773006135e-05, "loss": 0.8521, "step": 12400 }, { "epoch": 68.31, "learning_rate": 3.56319018404908e-05, "loss": 0.8457, "step": 12500 }, { "epoch": 68.31, "eval_loss": 0.18946239352226257, "eval_runtime": 128.0623, "eval_samples_per_second": 20.459, "eval_steps_per_second": 2.561, "eval_wer": 0.28282143203177984, "step": 12500 }, { "epoch": 68.85, "learning_rate": 3.501840490797546e-05, "loss": 0.8624, "step": 12600 }, { "epoch": 69.4, "learning_rate": 3.440490797546013e-05, "loss": 0.8394, "step": 12700 }, { "epoch": 69.95, "learning_rate": 3.379141104294479e-05, "loss": 0.8381, "step": 12800 }, { "epoch": 70.49, "learning_rate": 3.3177914110429445e-05, "loss": 0.8431, "step": 12900 }, { "epoch": 71.04, "learning_rate": 3.2564417177914117e-05, "loss": 0.8519, "step": 13000 }, { "epoch": 71.04, "eval_loss": 0.19119836390018463, "eval_runtime": 127.8936, "eval_samples_per_second": 20.486, "eval_steps_per_second": 2.565, "eval_wer": 0.28747214417207634, "step": 13000 }, { "epoch": 71.58, "learning_rate": 3.1950920245398774e-05, "loss": 0.8463, "step": 13100 }, { "epoch": 72.13, "learning_rate": 3.133742331288343e-05, "loss": 0.8492, "step": 13200 }, { "epoch": 72.68, "learning_rate": 3.0723926380368104e-05, "loss": 0.8354, "step": 13300 }, { "epoch": 73.22, "learning_rate": 3.0110429447852762e-05, "loss": 0.8408, "step": 13400 }, { "epoch": 73.77, "learning_rate": 2.9496932515337423e-05, "loss": 0.8301, "step": 13500 }, { "epoch": 73.77, "eval_loss": 0.18781304359436035, "eval_runtime": 127.7845, "eval_samples_per_second": 20.503, "eval_steps_per_second": 2.567, "eval_wer": 0.2760391434938475, "step": 13500 }, { "epoch": 74.32, "learning_rate": 2.8883435582822088e-05, "loss": 0.8425, "step": 13600 }, { "epoch": 74.86, "learning_rate": 2.826993865030675e-05, "loss": 0.8416, "step": 13700 }, { "epoch": 75.41, "learning_rate": 2.7656441717791414e-05, "loss": 0.8269, "step": 13800 }, { "epoch": 75.96, "learning_rate": 2.7042944785276075e-05, "loss": 0.8212, "step": 13900 }, { "epoch": 76.5, "learning_rate": 2.6429447852760736e-05, "loss": 0.8226, "step": 14000 }, { "epoch": 76.5, "eval_loss": 0.18075355887413025, "eval_runtime": 127.7671, "eval_samples_per_second": 20.506, "eval_steps_per_second": 2.567, "eval_wer": 0.2701288634822207, "step": 14000 }, { "epoch": 77.05, "learning_rate": 2.58159509202454e-05, "loss": 0.8265, "step": 14100 }, { "epoch": 77.6, "learning_rate": 2.5202453987730063e-05, "loss": 0.8279, "step": 14200 }, { "epoch": 78.14, "learning_rate": 2.4588957055214727e-05, "loss": 0.8273, "step": 14300 }, { "epoch": 78.69, "learning_rate": 2.3975460122699385e-05, "loss": 0.8067, "step": 14400 }, { "epoch": 79.23, "learning_rate": 2.33680981595092e-05, "loss": 0.8071, "step": 14500 }, { "epoch": 79.23, "eval_loss": 0.18486912548542023, "eval_runtime": 127.9551, "eval_samples_per_second": 20.476, "eval_steps_per_second": 2.563, "eval_wer": 0.27410134676872394, "step": 14500 }, { "epoch": 79.78, "learning_rate": 2.2754601226993866e-05, "loss": 0.8231, "step": 14600 }, { "epoch": 80.33, "learning_rate": 2.214110429447853e-05, "loss": 0.808, "step": 14700 }, { "epoch": 80.87, "learning_rate": 2.1527607361963192e-05, "loss": 0.8107, "step": 14800 }, { "epoch": 81.42, "learning_rate": 2.0914110429447853e-05, "loss": 0.7966, "step": 14900 }, { "epoch": 81.97, "learning_rate": 2.0300613496932515e-05, "loss": 0.7999, "step": 15000 }, { "epoch": 81.97, "eval_loss": 0.18083913624286652, "eval_runtime": 127.4398, "eval_samples_per_second": 20.559, "eval_steps_per_second": 2.574, "eval_wer": 0.2717275457804476, "step": 15000 }, { "epoch": 82.51, "learning_rate": 1.968711656441718e-05, "loss": 0.8171, "step": 15100 }, { "epoch": 83.06, "learning_rate": 1.907361963190184e-05, "loss": 0.8034, "step": 15200 }, { "epoch": 83.61, "learning_rate": 1.8460122699386502e-05, "loss": 0.8086, "step": 15300 }, { "epoch": 84.15, "learning_rate": 1.7846625766871167e-05, "loss": 0.8005, "step": 15400 }, { "epoch": 84.7, "learning_rate": 1.723312883435583e-05, "loss": 0.7947, "step": 15500 }, { "epoch": 84.7, "eval_loss": 0.1820572018623352, "eval_runtime": 127.1268, "eval_samples_per_second": 20.609, "eval_steps_per_second": 2.58, "eval_wer": 0.2715822110260634, "step": 15500 }, { "epoch": 85.25, "learning_rate": 1.661963190184049e-05, "loss": 0.7947, "step": 15600 }, { "epoch": 85.79, "learning_rate": 1.6006134969325154e-05, "loss": 0.7892, "step": 15700 }, { "epoch": 86.34, "learning_rate": 1.539263803680982e-05, "loss": 0.7969, "step": 15800 }, { "epoch": 86.89, "learning_rate": 1.477914110429448e-05, "loss": 0.801, "step": 15900 }, { "epoch": 87.43, "learning_rate": 1.4165644171779141e-05, "loss": 0.7783, "step": 16000 }, { "epoch": 87.43, "eval_loss": 0.18241995573043823, "eval_runtime": 127.0573, "eval_samples_per_second": 20.621, "eval_steps_per_second": 2.582, "eval_wer": 0.26610793527758936, "step": 16000 }, { "epoch": 87.98, "learning_rate": 1.3552147239263804e-05, "loss": 0.7915, "step": 16100 }, { "epoch": 88.52, "learning_rate": 1.2938650306748467e-05, "loss": 0.7862, "step": 16200 }, { "epoch": 89.07, "learning_rate": 1.232515337423313e-05, "loss": 0.7816, "step": 16300 }, { "epoch": 89.62, "learning_rate": 1.1711656441717792e-05, "loss": 0.7816, "step": 16400 }, { "epoch": 90.16, "learning_rate": 1.1098159509202455e-05, "loss": 0.7729, "step": 16500 }, { "epoch": 90.16, "eval_loss": 0.17727895081043243, "eval_runtime": 128.0807, "eval_samples_per_second": 20.456, "eval_steps_per_second": 2.561, "eval_wer": 0.2638794690436973, "step": 16500 }, { "epoch": 90.71, "learning_rate": 1.0484662576687116e-05, "loss": 0.7778, "step": 16600 }, { "epoch": 91.26, "learning_rate": 9.87116564417178e-06, "loss": 0.7844, "step": 16700 }, { "epoch": 91.8, "learning_rate": 9.257668711656442e-06, "loss": 0.7842, "step": 16800 }, { "epoch": 92.35, "learning_rate": 8.644171779141105e-06, "loss": 0.7755, "step": 16900 }, { "epoch": 92.9, "learning_rate": 8.030674846625766e-06, "loss": 0.7759, "step": 17000 }, { "epoch": 92.9, "eval_loss": 0.17666833102703094, "eval_runtime": 128.2582, "eval_samples_per_second": 20.428, "eval_steps_per_second": 2.557, "eval_wer": 0.26291057068113555, "step": 17000 }, { "epoch": 93.44, "learning_rate": 7.417177914110429e-06, "loss": 0.7672, "step": 17100 }, { "epoch": 93.99, "learning_rate": 6.8036809815950924e-06, "loss": 0.7813, "step": 17200 }, { "epoch": 94.54, "learning_rate": 6.1963190184049085e-06, "loss": 0.7781, "step": 17300 }, { "epoch": 95.08, "learning_rate": 5.582822085889571e-06, "loss": 0.7711, "step": 17400 }, { "epoch": 95.63, "learning_rate": 4.969325153374233e-06, "loss": 0.7713, "step": 17500 }, { "epoch": 95.63, "eval_loss": 0.17804710566997528, "eval_runtime": 127.8273, "eval_samples_per_second": 20.496, "eval_steps_per_second": 2.566, "eval_wer": 0.26208700707295807, "step": 17500 }, { "epoch": 96.17, "learning_rate": 4.355828220858896e-06, "loss": 0.7789, "step": 17600 }, { "epoch": 96.72, "learning_rate": 3.7423312883435584e-06, "loss": 0.7732, "step": 17700 }, { "epoch": 97.27, "learning_rate": 3.128834355828221e-06, "loss": 0.7688, "step": 17800 }, { "epoch": 97.81, "learning_rate": 2.5153374233128836e-06, "loss": 0.7724, "step": 17900 }, { "epoch": 98.36, "learning_rate": 1.9018404907975462e-06, "loss": 0.7628, "step": 18000 }, { "epoch": 98.36, "eval_loss": 0.17734766006469727, "eval_runtime": 128.0092, "eval_samples_per_second": 20.467, "eval_steps_per_second": 2.562, "eval_wer": 0.2594225365759132, "step": 18000 }, { "epoch": 98.91, "learning_rate": 1.2883435582822088e-06, "loss": 0.7634, "step": 18100 }, { "epoch": 99.45, "learning_rate": 6.748466257668713e-07, "loss": 0.7638, "step": 18200 }, { "epoch": 100.0, "learning_rate": 6.134969325153375e-08, "loss": 0.7746, "step": 18300 }, { "epoch": 100.0, "step": 18300, "total_flos": 1.0288191185677785e+20, "train_loss": 1.1684310275218526, "train_runtime": 46836.9916, "train_samples_per_second": 12.501, "train_steps_per_second": 0.391 } ], "max_steps": 18300, "num_train_epochs": 100, "total_flos": 1.0288191185677785e+20, "trial_name": null, "trial_params": null }