{ "best_metric": 1.7582672834396362, "best_model_checkpoint": "ai-light-dance_singing2_ft_wav2vec2-large-xlsr-53/checkpoint-3360", "epoch": 29.998885172798218, "global_step": 3360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 2.8e-07, "loss": 34.7195, "step": 10 }, { "epoch": 0.18, "learning_rate": 6.800000000000001e-07, "loss": 35.9093, "step": 20 }, { "epoch": 0.27, "learning_rate": 1.08e-06, "loss": 31.3284, "step": 30 }, { "epoch": 0.36, "learning_rate": 1.48e-06, "loss": 34.058, "step": 40 }, { "epoch": 0.45, "learning_rate": 1.8799999999999998e-06, "loss": 33.406, "step": 50 }, { "epoch": 0.54, "learning_rate": 2.2799999999999998e-06, "loss": 34.702, "step": 60 }, { "epoch": 0.62, "learning_rate": 2.68e-06, "loss": 34.8616, "step": 70 }, { "epoch": 0.71, "learning_rate": 3.0799999999999997e-06, "loss": 30.6447, "step": 80 }, { "epoch": 0.8, "learning_rate": 3.4799999999999997e-06, "loss": 33.7505, "step": 90 }, { "epoch": 0.89, "learning_rate": 3.88e-06, "loss": 32.2757, "step": 100 }, { "epoch": 0.98, "learning_rate": 3.991411042944785e-06, "loss": 27.4755, "step": 110 }, { "epoch": 1.0, "eval_loss": 23.2618350982666, "eval_runtime": 226.4347, "eval_samples_per_second": 8.806, "eval_steps_per_second": 0.883, "eval_wer": 1.0, "step": 112 }, { "epoch": 1.07, "learning_rate": 3.979141104294478e-06, "loss": 20.6317, "step": 120 }, { "epoch": 1.16, "learning_rate": 3.9668711656441715e-06, "loss": 16.4159, "step": 130 }, { "epoch": 1.25, "learning_rate": 3.954601226993865e-06, "loss": 12.3928, "step": 140 }, { "epoch": 1.34, "learning_rate": 3.942331288343558e-06, "loss": 10.4952, "step": 150 }, { "epoch": 1.43, "learning_rate": 3.930061349693252e-06, "loss": 8.7608, "step": 160 }, { "epoch": 1.52, "learning_rate": 3.917791411042945e-06, "loss": 8.1101, "step": 170 }, { "epoch": 1.61, "learning_rate": 3.905521472392638e-06, "loss": 7.022, "step": 180 }, { "epoch": 1.7, "learning_rate": 3.893251533742331e-06, "loss": 6.5763, "step": 190 }, { "epoch": 1.78, "learning_rate": 3.880981595092024e-06, "loss": 6.0121, "step": 200 }, { "epoch": 1.87, "learning_rate": 3.868711656441717e-06, "loss": 5.7837, "step": 210 }, { "epoch": 1.96, "learning_rate": 3.8564417177914105e-06, "loss": 5.5145, "step": 220 }, { "epoch": 2.0, "eval_loss": 5.221268177032471, "eval_runtime": 225.8013, "eval_samples_per_second": 8.831, "eval_steps_per_second": 0.886, "eval_wer": 1.0, "step": 224 }, { "epoch": 2.05, "learning_rate": 3.8441717791411045e-06, "loss": 5.52, "step": 230 }, { "epoch": 2.14, "learning_rate": 3.831901840490798e-06, "loss": 4.9127, "step": 240 }, { "epoch": 2.23, "learning_rate": 3.819631901840491e-06, "loss": 4.7525, "step": 250 }, { "epoch": 2.32, "learning_rate": 3.807361963190184e-06, "loss": 4.5823, "step": 260 }, { "epoch": 2.41, "learning_rate": 3.795092024539877e-06, "loss": 4.5971, "step": 270 }, { "epoch": 2.5, "learning_rate": 3.7828220858895705e-06, "loss": 4.5216, "step": 280 }, { "epoch": 2.59, "learning_rate": 3.7705521472392636e-06, "loss": 4.5161, "step": 290 }, { "epoch": 2.68, "learning_rate": 3.7582822085889567e-06, "loss": 4.3186, "step": 300 }, { "epoch": 2.77, "learning_rate": 3.7460122699386503e-06, "loss": 4.3137, "step": 310 }, { "epoch": 2.86, "learning_rate": 3.7337423312883434e-06, "loss": 4.19, "step": 320 }, { "epoch": 2.95, "learning_rate": 3.7214723926380365e-06, "loss": 4.2211, "step": 330 }, { "epoch": 3.0, "eval_loss": 4.167309284210205, "eval_runtime": 226.6831, "eval_samples_per_second": 8.796, "eval_steps_per_second": 0.882, "eval_wer": 1.0, "step": 336 }, { "epoch": 3.04, "learning_rate": 3.7092024539877297e-06, "loss": 4.2938, "step": 340 }, { "epoch": 3.12, "learning_rate": 3.6969325153374232e-06, "loss": 4.0354, "step": 350 }, { "epoch": 3.21, "learning_rate": 3.6846625766871163e-06, "loss": 4.209, "step": 360 }, { "epoch": 3.3, "learning_rate": 3.67239263803681e-06, "loss": 3.9982, "step": 370 }, { "epoch": 3.39, "learning_rate": 3.660122699386503e-06, "loss": 4.0104, "step": 380 }, { "epoch": 3.48, "learning_rate": 3.647852760736196e-06, "loss": 3.8911, "step": 390 }, { "epoch": 3.57, "learning_rate": 3.6355828220858893e-06, "loss": 4.0256, "step": 400 }, { "epoch": 3.66, "learning_rate": 3.623312883435583e-06, "loss": 3.8258, "step": 410 }, { "epoch": 3.75, "learning_rate": 3.611042944785276e-06, "loss": 3.7316, "step": 420 }, { "epoch": 3.84, "learning_rate": 3.598773006134969e-06, "loss": 3.8932, "step": 430 }, { "epoch": 3.93, "learning_rate": 3.5865030674846626e-06, "loss": 3.8386, "step": 440 }, { "epoch": 4.0, "eval_loss": 3.825347661972046, "eval_runtime": 224.561, "eval_samples_per_second": 8.88, "eval_steps_per_second": 0.891, "eval_wer": 1.0, "step": 448 }, { "epoch": 4.02, "learning_rate": 3.5742331288343553e-06, "loss": 3.8842, "step": 450 }, { "epoch": 4.11, "learning_rate": 3.561963190184049e-06, "loss": 3.8318, "step": 460 }, { "epoch": 4.2, "learning_rate": 3.549693251533742e-06, "loss": 3.7975, "step": 470 }, { "epoch": 4.29, "learning_rate": 3.5374233128834355e-06, "loss": 3.7912, "step": 480 }, { "epoch": 4.37, "learning_rate": 3.5251533742331287e-06, "loss": 3.8111, "step": 490 }, { "epoch": 4.46, "learning_rate": 3.512883435582822e-06, "loss": 3.6635, "step": 500 }, { "epoch": 4.55, "learning_rate": 3.5006134969325153e-06, "loss": 3.6398, "step": 510 }, { "epoch": 4.64, "learning_rate": 3.4883435582822085e-06, "loss": 3.6428, "step": 520 }, { "epoch": 4.73, "learning_rate": 3.4760736196319016e-06, "loss": 3.6082, "step": 530 }, { "epoch": 4.82, "learning_rate": 3.463803680981595e-06, "loss": 3.5746, "step": 540 }, { "epoch": 4.91, "learning_rate": 3.4515337423312883e-06, "loss": 3.664, "step": 550 }, { "epoch": 5.0, "learning_rate": 3.4392638036809814e-06, "loss": 3.5531, "step": 560 }, { "epoch": 5.0, "eval_loss": 3.6285741329193115, "eval_runtime": 224.6131, "eval_samples_per_second": 8.877, "eval_steps_per_second": 0.89, "eval_wer": 1.0, "step": 560 }, { "epoch": 5.09, "learning_rate": 3.426993865030675e-06, "loss": 3.7207, "step": 570 }, { "epoch": 5.18, "learning_rate": 3.4147239263803676e-06, "loss": 3.62, "step": 580 }, { "epoch": 5.27, "learning_rate": 3.402453987730061e-06, "loss": 3.5644, "step": 590 }, { "epoch": 5.36, "learning_rate": 3.3901840490797543e-06, "loss": 3.4874, "step": 600 }, { "epoch": 5.45, "learning_rate": 3.377914110429448e-06, "loss": 3.5444, "step": 610 }, { "epoch": 5.54, "learning_rate": 3.365644171779141e-06, "loss": 3.4358, "step": 620 }, { "epoch": 5.62, "learning_rate": 3.3533742331288345e-06, "loss": 3.5017, "step": 630 }, { "epoch": 5.71, "learning_rate": 3.3411042944785272e-06, "loss": 3.4731, "step": 640 }, { "epoch": 5.8, "learning_rate": 3.3288343558282208e-06, "loss": 3.3658, "step": 650 }, { "epoch": 5.89, "learning_rate": 3.316564417177914e-06, "loss": 3.5377, "step": 660 }, { "epoch": 5.98, "learning_rate": 3.304294478527607e-06, "loss": 3.5215, "step": 670 }, { "epoch": 6.0, "eval_loss": 3.4761970043182373, "eval_runtime": 224.497, "eval_samples_per_second": 8.882, "eval_steps_per_second": 0.891, "eval_wer": 0.9864203906795881, "step": 672 }, { "epoch": 6.07, "learning_rate": 3.2920245398773006e-06, "loss": 3.4964, "step": 680 }, { "epoch": 6.16, "learning_rate": 3.2797546012269937e-06, "loss": 3.4217, "step": 690 }, { "epoch": 6.25, "learning_rate": 3.267484662576687e-06, "loss": 3.385, "step": 700 }, { "epoch": 6.34, "learning_rate": 3.25521472392638e-06, "loss": 3.338, "step": 710 }, { "epoch": 6.43, "learning_rate": 3.2429447852760735e-06, "loss": 3.3658, "step": 720 }, { "epoch": 6.52, "learning_rate": 3.2306748466257666e-06, "loss": 3.3997, "step": 730 }, { "epoch": 6.61, "learning_rate": 3.21840490797546e-06, "loss": 3.4159, "step": 740 }, { "epoch": 6.7, "learning_rate": 3.2061349693251533e-06, "loss": 3.3652, "step": 750 }, { "epoch": 6.78, "learning_rate": 3.193865030674847e-06, "loss": 3.3839, "step": 760 }, { "epoch": 6.87, "learning_rate": 3.1815950920245395e-06, "loss": 3.3874, "step": 770 }, { "epoch": 6.96, "learning_rate": 3.169325153374233e-06, "loss": 3.3493, "step": 780 }, { "epoch": 7.0, "eval_loss": 3.3548903465270996, "eval_runtime": 223.6828, "eval_samples_per_second": 8.914, "eval_steps_per_second": 0.894, "eval_wer": 0.9847191420846371, "step": 784 }, { "epoch": 7.05, "learning_rate": 3.1570552147239262e-06, "loss": 3.2476, "step": 790 }, { "epoch": 7.14, "learning_rate": 3.1447852760736193e-06, "loss": 3.3659, "step": 800 }, { "epoch": 7.23, "learning_rate": 3.132515337423313e-06, "loss": 3.2943, "step": 810 }, { "epoch": 7.32, "learning_rate": 3.120245398773006e-06, "loss": 3.3103, "step": 820 }, { "epoch": 7.41, "learning_rate": 3.107975460122699e-06, "loss": 3.2368, "step": 830 }, { "epoch": 7.5, "learning_rate": 3.0957055214723923e-06, "loss": 3.2216, "step": 840 }, { "epoch": 7.59, "learning_rate": 3.083435582822086e-06, "loss": 3.2502, "step": 850 }, { "epoch": 7.68, "learning_rate": 3.071165644171779e-06, "loss": 3.2379, "step": 860 }, { "epoch": 7.77, "learning_rate": 3.0588957055214725e-06, "loss": 3.2768, "step": 870 }, { "epoch": 7.86, "learning_rate": 3.0466257668711656e-06, "loss": 3.2413, "step": 880 }, { "epoch": 7.95, "learning_rate": 3.0343558282208587e-06, "loss": 3.1264, "step": 890 }, { "epoch": 8.0, "eval_loss": 3.1797306537628174, "eval_runtime": 226.8173, "eval_samples_per_second": 8.791, "eval_steps_per_second": 0.882, "eval_wer": 0.9758787252787313, "step": 896 }, { "epoch": 8.04, "learning_rate": 3.022085889570552e-06, "loss": 3.2241, "step": 900 }, { "epoch": 8.12, "learning_rate": 3.0098159509202454e-06, "loss": 3.0777, "step": 910 }, { "epoch": 8.21, "learning_rate": 2.9975460122699385e-06, "loss": 3.015, "step": 920 }, { "epoch": 8.3, "learning_rate": 2.9852760736196317e-06, "loss": 2.9733, "step": 930 }, { "epoch": 8.39, "learning_rate": 2.973006134969325e-06, "loss": 2.97, "step": 940 }, { "epoch": 8.48, "learning_rate": 2.960736196319018e-06, "loss": 2.8979, "step": 950 }, { "epoch": 8.57, "learning_rate": 2.9484662576687115e-06, "loss": 2.9348, "step": 960 }, { "epoch": 8.66, "learning_rate": 2.9361963190184046e-06, "loss": 2.847, "step": 970 }, { "epoch": 8.75, "learning_rate": 2.923926380368098e-06, "loss": 2.9345, "step": 980 }, { "epoch": 8.84, "learning_rate": 2.9116564417177913e-06, "loss": 2.9118, "step": 990 }, { "epoch": 8.93, "learning_rate": 2.899386503067485e-06, "loss": 2.7557, "step": 1000 }, { "epoch": 9.0, "eval_loss": 2.8702940940856934, "eval_runtime": 226.6648, "eval_samples_per_second": 8.797, "eval_steps_per_second": 0.882, "eval_wer": 0.9865115289971748, "step": 1008 }, { "epoch": 9.02, "learning_rate": 2.887116564417178e-06, "loss": 2.8224, "step": 1010 }, { "epoch": 9.11, "learning_rate": 2.874846625766871e-06, "loss": 2.756, "step": 1020 }, { "epoch": 9.2, "learning_rate": 2.862576687116564e-06, "loss": 2.8302, "step": 1030 }, { "epoch": 9.29, "learning_rate": 2.8503067484662577e-06, "loss": 2.7484, "step": 1040 }, { "epoch": 9.37, "learning_rate": 2.838036809815951e-06, "loss": 2.7708, "step": 1050 }, { "epoch": 9.46, "learning_rate": 2.825766871165644e-06, "loss": 2.7069, "step": 1060 }, { "epoch": 9.55, "learning_rate": 2.8134969325153375e-06, "loss": 2.7055, "step": 1070 }, { "epoch": 9.64, "learning_rate": 2.8012269938650302e-06, "loss": 2.6739, "step": 1080 }, { "epoch": 9.73, "learning_rate": 2.7889570552147238e-06, "loss": 2.6802, "step": 1090 }, { "epoch": 9.82, "learning_rate": 2.776687116564417e-06, "loss": 2.6417, "step": 1100 }, { "epoch": 9.91, "learning_rate": 2.7644171779141104e-06, "loss": 2.6462, "step": 1110 }, { "epoch": 10.0, "learning_rate": 2.7521472392638036e-06, "loss": 2.6345, "step": 1120 }, { "epoch": 10.0, "eval_loss": 2.6736390590667725, "eval_runtime": 229.3255, "eval_samples_per_second": 8.695, "eval_steps_per_second": 0.872, "eval_wer": 0.9969924355196403, "step": 1120 }, { "epoch": 10.09, "learning_rate": 2.739877300613497e-06, "loss": 2.5804, "step": 1130 }, { "epoch": 10.18, "learning_rate": 2.72760736196319e-06, "loss": 2.5819, "step": 1140 }, { "epoch": 10.27, "learning_rate": 2.7153374233128834e-06, "loss": 2.568, "step": 1150 }, { "epoch": 10.36, "learning_rate": 2.7030674846625765e-06, "loss": 2.5261, "step": 1160 }, { "epoch": 10.45, "learning_rate": 2.6907975460122696e-06, "loss": 2.5213, "step": 1170 }, { "epoch": 10.54, "learning_rate": 2.678527607361963e-06, "loss": 2.54, "step": 1180 }, { "epoch": 10.62, "learning_rate": 2.6662576687116563e-06, "loss": 2.465, "step": 1190 }, { "epoch": 10.71, "learning_rate": 2.65398773006135e-06, "loss": 2.4933, "step": 1200 }, { "epoch": 10.8, "learning_rate": 2.6417177914110425e-06, "loss": 2.4469, "step": 1210 }, { "epoch": 10.89, "learning_rate": 2.629447852760736e-06, "loss": 2.4227, "step": 1220 }, { "epoch": 10.98, "learning_rate": 2.617177914110429e-06, "loss": 2.4297, "step": 1230 }, { "epoch": 11.0, "eval_loss": 2.563849925994873, "eval_runtime": 228.8987, "eval_samples_per_second": 8.711, "eval_steps_per_second": 0.874, "eval_wer": 1.0336907980678676, "step": 1232 }, { "epoch": 11.07, "learning_rate": 2.6049079754601228e-06, "loss": 2.4706, "step": 1240 }, { "epoch": 11.16, "learning_rate": 2.592638036809816e-06, "loss": 2.4096, "step": 1250 }, { "epoch": 11.25, "learning_rate": 2.5803680981595094e-06, "loss": 2.3846, "step": 1260 }, { "epoch": 11.34, "learning_rate": 2.568098159509202e-06, "loss": 2.3384, "step": 1270 }, { "epoch": 11.43, "learning_rate": 2.5558282208588957e-06, "loss": 2.3856, "step": 1280 }, { "epoch": 11.52, "learning_rate": 2.543558282208589e-06, "loss": 2.3349, "step": 1290 }, { "epoch": 11.61, "learning_rate": 2.531288343558282e-06, "loss": 2.2744, "step": 1300 }, { "epoch": 11.7, "learning_rate": 2.5190184049079755e-06, "loss": 2.3033, "step": 1310 }, { "epoch": 11.78, "learning_rate": 2.5067484662576686e-06, "loss": 2.3258, "step": 1320 }, { "epoch": 11.87, "learning_rate": 2.4944785276073617e-06, "loss": 2.3106, "step": 1330 }, { "epoch": 11.96, "learning_rate": 2.482208588957055e-06, "loss": 2.3057, "step": 1340 }, { "epoch": 12.0, "eval_loss": 2.3679769039154053, "eval_runtime": 226.8372, "eval_samples_per_second": 8.79, "eval_steps_per_second": 0.882, "eval_wer": 0.9838685177871617, "step": 1344 }, { "epoch": 12.05, "learning_rate": 2.4699386503067484e-06, "loss": 2.2966, "step": 1350 }, { "epoch": 12.14, "learning_rate": 2.4576687116564415e-06, "loss": 2.2726, "step": 1360 }, { "epoch": 12.23, "learning_rate": 2.445398773006135e-06, "loss": 2.2556, "step": 1370 }, { "epoch": 12.32, "learning_rate": 2.433128834355828e-06, "loss": 2.2264, "step": 1380 }, { "epoch": 12.41, "learning_rate": 2.4208588957055213e-06, "loss": 2.229, "step": 1390 }, { "epoch": 12.5, "learning_rate": 2.4085889570552144e-06, "loss": 2.202, "step": 1400 }, { "epoch": 12.59, "learning_rate": 2.396319018404908e-06, "loss": 2.2028, "step": 1410 }, { "epoch": 12.68, "learning_rate": 2.384049079754601e-06, "loss": 2.1755, "step": 1420 }, { "epoch": 12.77, "learning_rate": 2.3717791411042942e-06, "loss": 2.1927, "step": 1430 }, { "epoch": 12.86, "learning_rate": 2.359509202453988e-06, "loss": 2.1783, "step": 1440 }, { "epoch": 12.95, "learning_rate": 2.347239263803681e-06, "loss": 2.1436, "step": 1450 }, { "epoch": 13.0, "eval_loss": 2.2366645336151123, "eval_runtime": 227.8374, "eval_samples_per_second": 8.752, "eval_steps_per_second": 0.878, "eval_wer": 0.9647902299723548, "step": 1456 }, { "epoch": 13.04, "learning_rate": 2.334969325153374e-06, "loss": 2.1661, "step": 1460 }, { "epoch": 13.12, "learning_rate": 2.322699386503067e-06, "loss": 2.1329, "step": 1470 }, { "epoch": 13.21, "learning_rate": 2.3104294478527607e-06, "loss": 2.1202, "step": 1480 }, { "epoch": 13.3, "learning_rate": 2.298159509202454e-06, "loss": 2.1499, "step": 1490 }, { "epoch": 13.39, "learning_rate": 2.2858895705521474e-06, "loss": 2.1047, "step": 1500 }, { "epoch": 13.48, "learning_rate": 2.2736196319018405e-06, "loss": 2.1364, "step": 1510 }, { "epoch": 13.57, "learning_rate": 2.2613496932515336e-06, "loss": 2.1224, "step": 1520 }, { "epoch": 13.66, "learning_rate": 2.2490797546012268e-06, "loss": 2.0951, "step": 1530 }, { "epoch": 13.75, "learning_rate": 2.2368098159509203e-06, "loss": 2.11, "step": 1540 }, { "epoch": 13.84, "learning_rate": 2.2245398773006134e-06, "loss": 2.1022, "step": 1550 }, { "epoch": 13.93, "learning_rate": 2.2122699386503066e-06, "loss": 2.0856, "step": 1560 }, { "epoch": 14.0, "eval_loss": 2.163454294204712, "eval_runtime": 228.8577, "eval_samples_per_second": 8.713, "eval_steps_per_second": 0.874, "eval_wer": 0.9585624449372665, "step": 1568 }, { "epoch": 14.02, "learning_rate": 2.2e-06, "loss": 2.1466, "step": 1570 }, { "epoch": 14.11, "learning_rate": 2.187730061349693e-06, "loss": 2.0836, "step": 1580 }, { "epoch": 14.2, "learning_rate": 2.1754601226993864e-06, "loss": 2.0697, "step": 1590 }, { "epoch": 14.29, "learning_rate": 2.1631901840490795e-06, "loss": 2.064, "step": 1600 }, { "epoch": 14.37, "learning_rate": 2.150920245398773e-06, "loss": 2.0571, "step": 1610 }, { "epoch": 14.46, "learning_rate": 2.138650306748466e-06, "loss": 2.0054, "step": 1620 }, { "epoch": 14.55, "learning_rate": 2.1263803680981597e-06, "loss": 2.0765, "step": 1630 }, { "epoch": 14.64, "learning_rate": 2.1141104294478524e-06, "loss": 2.0276, "step": 1640 }, { "epoch": 14.73, "learning_rate": 2.101840490797546e-06, "loss": 2.0324, "step": 1650 }, { "epoch": 14.82, "learning_rate": 2.089570552147239e-06, "loss": 2.0354, "step": 1660 }, { "epoch": 14.91, "learning_rate": 2.0773006134969326e-06, "loss": 2.0212, "step": 1670 }, { "epoch": 15.0, "learning_rate": 2.0650306748466258e-06, "loss": 2.0035, "step": 1680 }, { "epoch": 15.0, "eval_loss": 2.0944879055023193, "eval_runtime": 229.1871, "eval_samples_per_second": 8.7, "eval_steps_per_second": 0.873, "eval_wer": 0.9645471944587903, "step": 1680 }, { "epoch": 15.09, "learning_rate": 2.052760736196319e-06, "loss": 2.0578, "step": 1690 }, { "epoch": 15.18, "learning_rate": 2.0404907975460124e-06, "loss": 1.9819, "step": 1700 }, { "epoch": 15.27, "learning_rate": 2.028220858895705e-06, "loss": 2.0174, "step": 1710 }, { "epoch": 15.36, "learning_rate": 2.0159509202453987e-06, "loss": 2.0332, "step": 1720 }, { "epoch": 15.45, "learning_rate": 2.003680981595092e-06, "loss": 1.923, "step": 1730 }, { "epoch": 15.54, "learning_rate": 1.9914110429447854e-06, "loss": 1.9894, "step": 1740 }, { "epoch": 15.62, "learning_rate": 1.9791411042944785e-06, "loss": 1.9868, "step": 1750 }, { "epoch": 15.71, "learning_rate": 1.9668711656441716e-06, "loss": 1.9463, "step": 1760 }, { "epoch": 15.8, "learning_rate": 1.954601226993865e-06, "loss": 1.9641, "step": 1770 }, { "epoch": 15.89, "learning_rate": 1.9423312883435583e-06, "loss": 1.9391, "step": 1780 }, { "epoch": 15.98, "learning_rate": 1.9300613496932514e-06, "loss": 1.9134, "step": 1790 }, { "epoch": 16.0, "eval_loss": 2.039499044418335, "eval_runtime": 230.4499, "eval_samples_per_second": 8.653, "eval_steps_per_second": 0.868, "eval_wer": 0.9630282224990127, "step": 1792 }, { "epoch": 16.07, "learning_rate": 1.9177914110429445e-06, "loss": 2.0007, "step": 1800 }, { "epoch": 16.16, "learning_rate": 1.905521472392638e-06, "loss": 1.9244, "step": 1810 }, { "epoch": 16.25, "learning_rate": 1.8932515337423312e-06, "loss": 1.966, "step": 1820 }, { "epoch": 16.34, "learning_rate": 1.8809815950920245e-06, "loss": 1.9547, "step": 1830 }, { "epoch": 16.43, "learning_rate": 1.8687116564417179e-06, "loss": 1.918, "step": 1840 }, { "epoch": 16.52, "learning_rate": 1.856441717791411e-06, "loss": 1.9317, "step": 1850 }, { "epoch": 16.61, "learning_rate": 1.8441717791411041e-06, "loss": 1.9608, "step": 1860 }, { "epoch": 16.7, "learning_rate": 1.8319018404907975e-06, "loss": 1.9207, "step": 1870 }, { "epoch": 16.78, "learning_rate": 1.8196319018404906e-06, "loss": 1.9173, "step": 1880 }, { "epoch": 16.87, "learning_rate": 1.807361963190184e-06, "loss": 1.8698, "step": 1890 }, { "epoch": 16.96, "learning_rate": 1.7950920245398773e-06, "loss": 1.9443, "step": 1900 }, { "epoch": 17.0, "eval_loss": 2.001734495162964, "eval_runtime": 229.2485, "eval_samples_per_second": 8.698, "eval_steps_per_second": 0.872, "eval_wer": 0.940061366467175, "step": 1904 }, { "epoch": 17.05, "learning_rate": 1.7828220858895704e-06, "loss": 1.9318, "step": 1910 }, { "epoch": 17.14, "learning_rate": 1.7705521472392637e-06, "loss": 1.8787, "step": 1920 }, { "epoch": 17.23, "learning_rate": 1.758282208588957e-06, "loss": 1.9085, "step": 1930 }, { "epoch": 17.32, "learning_rate": 1.7460122699386502e-06, "loss": 1.9192, "step": 1940 }, { "epoch": 17.41, "learning_rate": 1.7337423312883435e-06, "loss": 1.876, "step": 1950 }, { "epoch": 17.5, "learning_rate": 1.7214723926380368e-06, "loss": 1.9029, "step": 1960 }, { "epoch": 17.59, "learning_rate": 1.70920245398773e-06, "loss": 1.8502, "step": 1970 }, { "epoch": 17.68, "learning_rate": 1.6969325153374233e-06, "loss": 1.8889, "step": 1980 }, { "epoch": 17.77, "learning_rate": 1.6846625766871164e-06, "loss": 1.8713, "step": 1990 }, { "epoch": 17.86, "learning_rate": 1.6723926380368096e-06, "loss": 1.8772, "step": 2000 }, { "epoch": 17.95, "learning_rate": 1.660122699386503e-06, "loss": 1.8988, "step": 2010 }, { "epoch": 18.0, "eval_loss": 1.9513905048370361, "eval_runtime": 229.6946, "eval_samples_per_second": 8.681, "eval_steps_per_second": 0.871, "eval_wer": 0.9492663365434274, "step": 2016 }, { "epoch": 18.04, "learning_rate": 1.6478527607361962e-06, "loss": 1.8788, "step": 2020 }, { "epoch": 18.12, "learning_rate": 1.6355828220858896e-06, "loss": 1.8656, "step": 2030 }, { "epoch": 18.21, "learning_rate": 1.6233128834355827e-06, "loss": 1.8622, "step": 2040 }, { "epoch": 18.3, "learning_rate": 1.611042944785276e-06, "loss": 1.8891, "step": 2050 }, { "epoch": 18.39, "learning_rate": 1.5987730061349694e-06, "loss": 1.8184, "step": 2060 }, { "epoch": 18.48, "learning_rate": 1.5865030674846625e-06, "loss": 1.8475, "step": 2070 }, { "epoch": 18.57, "learning_rate": 1.5742331288343558e-06, "loss": 1.8573, "step": 2080 }, { "epoch": 18.66, "learning_rate": 1.5619631901840492e-06, "loss": 1.819, "step": 2090 }, { "epoch": 18.75, "learning_rate": 1.5496932515337423e-06, "loss": 1.8366, "step": 2100 }, { "epoch": 18.84, "learning_rate": 1.5374233128834354e-06, "loss": 1.8187, "step": 2110 }, { "epoch": 18.93, "learning_rate": 1.5251533742331288e-06, "loss": 1.8141, "step": 2120 }, { "epoch": 19.0, "eval_loss": 1.9111149311065674, "eval_runtime": 229.8356, "eval_samples_per_second": 8.676, "eval_steps_per_second": 0.87, "eval_wer": 0.9475347085092809, "step": 2128 }, { "epoch": 19.02, "learning_rate": 1.5128834355828219e-06, "loss": 1.8492, "step": 2130 }, { "epoch": 19.11, "learning_rate": 1.5006134969325152e-06, "loss": 1.8041, "step": 2140 }, { "epoch": 19.2, "learning_rate": 1.4883435582822085e-06, "loss": 1.8007, "step": 2150 }, { "epoch": 19.29, "learning_rate": 1.4760736196319017e-06, "loss": 1.8104, "step": 2160 }, { "epoch": 19.37, "learning_rate": 1.463803680981595e-06, "loss": 1.8089, "step": 2170 }, { "epoch": 19.46, "learning_rate": 1.4515337423312883e-06, "loss": 1.8453, "step": 2180 }, { "epoch": 19.55, "learning_rate": 1.4392638036809815e-06, "loss": 1.8138, "step": 2190 }, { "epoch": 19.64, "learning_rate": 1.4269938650306748e-06, "loss": 1.7673, "step": 2200 }, { "epoch": 19.73, "learning_rate": 1.4147239263803681e-06, "loss": 1.8214, "step": 2210 }, { "epoch": 19.82, "learning_rate": 1.4024539877300613e-06, "loss": 1.8133, "step": 2220 }, { "epoch": 19.91, "learning_rate": 1.3901840490797546e-06, "loss": 1.766, "step": 2230 }, { "epoch": 20.0, "learning_rate": 1.3779141104294477e-06, "loss": 1.8344, "step": 2240 }, { "epoch": 20.0, "eval_loss": 1.8789727687835693, "eval_runtime": 230.1819, "eval_samples_per_second": 8.663, "eval_steps_per_second": 0.869, "eval_wer": 0.9395449160008507, "step": 2240 }, { "epoch": 20.09, "learning_rate": 1.365644171779141e-06, "loss": 1.7892, "step": 2250 }, { "epoch": 20.18, "learning_rate": 1.3533742331288342e-06, "loss": 1.7738, "step": 2260 }, { "epoch": 20.27, "learning_rate": 1.3411042944785275e-06, "loss": 1.8034, "step": 2270 }, { "epoch": 20.36, "learning_rate": 1.3288343558282209e-06, "loss": 1.7568, "step": 2280 }, { "epoch": 20.45, "learning_rate": 1.316564417177914e-06, "loss": 1.7828, "step": 2290 }, { "epoch": 20.54, "learning_rate": 1.3042944785276073e-06, "loss": 1.7748, "step": 2300 }, { "epoch": 20.62, "learning_rate": 1.2920245398773007e-06, "loss": 1.7529, "step": 2310 }, { "epoch": 20.71, "learning_rate": 1.2797546012269938e-06, "loss": 1.7858, "step": 2320 }, { "epoch": 20.8, "learning_rate": 1.2674846625766871e-06, "loss": 1.7939, "step": 2330 }, { "epoch": 20.89, "learning_rate": 1.2552147239263805e-06, "loss": 1.7734, "step": 2340 }, { "epoch": 20.98, "learning_rate": 1.2429447852760736e-06, "loss": 1.7775, "step": 2350 }, { "epoch": 21.0, "eval_loss": 1.8616416454315186, "eval_runtime": 230.4184, "eval_samples_per_second": 8.654, "eval_steps_per_second": 0.868, "eval_wer": 0.9502992374760761, "step": 2352 }, { "epoch": 21.07, "learning_rate": 1.2306748466257667e-06, "loss": 1.7807, "step": 2360 }, { "epoch": 21.16, "learning_rate": 1.21840490797546e-06, "loss": 1.7896, "step": 2370 }, { "epoch": 21.25, "learning_rate": 1.2061349693251532e-06, "loss": 1.7772, "step": 2380 }, { "epoch": 21.34, "learning_rate": 1.1938650306748465e-06, "loss": 1.7337, "step": 2390 }, { "epoch": 21.43, "learning_rate": 1.1815950920245398e-06, "loss": 1.7481, "step": 2400 }, { "epoch": 21.52, "learning_rate": 1.169325153374233e-06, "loss": 1.7551, "step": 2410 }, { "epoch": 21.61, "learning_rate": 1.1570552147239263e-06, "loss": 1.7527, "step": 2420 }, { "epoch": 21.7, "learning_rate": 1.1447852760736196e-06, "loss": 1.7565, "step": 2430 }, { "epoch": 21.78, "learning_rate": 1.1325153374233128e-06, "loss": 1.7389, "step": 2440 }, { "epoch": 21.87, "learning_rate": 1.120245398773006e-06, "loss": 1.73, "step": 2450 }, { "epoch": 21.96, "learning_rate": 1.1079754601226994e-06, "loss": 1.7517, "step": 2460 }, { "epoch": 22.0, "eval_loss": 1.8332698345184326, "eval_runtime": 231.0169, "eval_samples_per_second": 8.631, "eval_steps_per_second": 0.866, "eval_wer": 0.9433423459002946, "step": 2464 }, { "epoch": 22.05, "learning_rate": 1.0957055214723926e-06, "loss": 1.798, "step": 2470 }, { "epoch": 22.14, "learning_rate": 1.083435582822086e-06, "loss": 1.7413, "step": 2480 }, { "epoch": 22.23, "learning_rate": 1.071165644171779e-06, "loss": 1.7061, "step": 2490 }, { "epoch": 22.32, "learning_rate": 1.0588957055214724e-06, "loss": 1.7485, "step": 2500 }, { "epoch": 22.41, "learning_rate": 1.0466257668711655e-06, "loss": 1.722, "step": 2510 }, { "epoch": 22.5, "learning_rate": 1.0343558282208588e-06, "loss": 1.7105, "step": 2520 }, { "epoch": 22.59, "learning_rate": 1.0220858895705522e-06, "loss": 1.7343, "step": 2530 }, { "epoch": 22.68, "learning_rate": 1.0098159509202453e-06, "loss": 1.7403, "step": 2540 }, { "epoch": 22.77, "learning_rate": 9.975460122699386e-07, "loss": 1.7422, "step": 2550 }, { "epoch": 22.86, "learning_rate": 9.852760736196317e-07, "loss": 1.7464, "step": 2560 }, { "epoch": 22.95, "learning_rate": 9.73006134969325e-07, "loss": 1.7037, "step": 2570 }, { "epoch": 23.0, "eval_loss": 1.8155736923217773, "eval_runtime": 230.3013, "eval_samples_per_second": 8.658, "eval_steps_per_second": 0.868, "eval_wer": 0.9372056991827931, "step": 2576 }, { "epoch": 23.04, "learning_rate": 9.607361963190184e-07, "loss": 1.7301, "step": 2580 }, { "epoch": 23.12, "learning_rate": 9.484662576687115e-07, "loss": 1.7254, "step": 2590 }, { "epoch": 23.21, "learning_rate": 9.361963190184049e-07, "loss": 1.7005, "step": 2600 }, { "epoch": 23.3, "learning_rate": 9.239263803680981e-07, "loss": 1.726, "step": 2610 }, { "epoch": 23.39, "learning_rate": 9.116564417177913e-07, "loss": 1.7074, "step": 2620 }, { "epoch": 23.48, "learning_rate": 8.993865030674847e-07, "loss": 1.7301, "step": 2630 }, { "epoch": 23.57, "learning_rate": 8.871165644171779e-07, "loss": 1.7189, "step": 2640 }, { "epoch": 23.66, "learning_rate": 8.74846625766871e-07, "loss": 1.6932, "step": 2650 }, { "epoch": 23.75, "learning_rate": 8.625766871165644e-07, "loss": 1.7322, "step": 2660 }, { "epoch": 23.84, "learning_rate": 8.503067484662576e-07, "loss": 1.7056, "step": 2670 }, { "epoch": 23.93, "learning_rate": 8.380368098159509e-07, "loss": 1.7158, "step": 2680 }, { "epoch": 24.0, "eval_loss": 1.796068787574768, "eval_runtime": 229.8221, "eval_samples_per_second": 8.676, "eval_steps_per_second": 0.87, "eval_wer": 0.9482334356107787, "step": 2688 }, { "epoch": 24.02, "learning_rate": 8.257668711656442e-07, "loss": 1.7244, "step": 2690 }, { "epoch": 24.11, "learning_rate": 8.134969325153374e-07, "loss": 1.7245, "step": 2700 }, { "epoch": 24.2, "learning_rate": 8.012269938650306e-07, "loss": 1.6697, "step": 2710 }, { "epoch": 24.29, "learning_rate": 7.889570552147239e-07, "loss": 1.7091, "step": 2720 }, { "epoch": 24.37, "learning_rate": 7.766871165644171e-07, "loss": 1.7122, "step": 2730 }, { "epoch": 24.46, "learning_rate": 7.644171779141104e-07, "loss": 1.7035, "step": 2740 }, { "epoch": 24.55, "learning_rate": 7.521472392638037e-07, "loss": 1.6943, "step": 2750 }, { "epoch": 24.64, "learning_rate": 7.398773006134969e-07, "loss": 1.6933, "step": 2760 }, { "epoch": 24.73, "learning_rate": 7.276073619631901e-07, "loss": 1.6776, "step": 2770 }, { "epoch": 24.82, "learning_rate": 7.153374233128834e-07, "loss": 1.7123, "step": 2780 }, { "epoch": 24.91, "learning_rate": 7.030674846625767e-07, "loss": 1.6912, "step": 2790 }, { "epoch": 25.0, "learning_rate": 6.907975460122699e-07, "loss": 1.7111, "step": 2800 }, { "epoch": 25.0, "eval_loss": 1.7816540002822876, "eval_runtime": 230.1276, "eval_samples_per_second": 8.665, "eval_steps_per_second": 0.869, "eval_wer": 0.9422183066500592, "step": 2800 }, { "epoch": 25.09, "learning_rate": 6.785276073619631e-07, "loss": 1.7032, "step": 2810 }, { "epoch": 25.18, "learning_rate": 6.662576687116565e-07, "loss": 1.684, "step": 2820 }, { "epoch": 25.27, "learning_rate": 6.539877300613497e-07, "loss": 1.6899, "step": 2830 }, { "epoch": 25.36, "learning_rate": 6.417177914110428e-07, "loss": 1.6875, "step": 2840 }, { "epoch": 25.45, "learning_rate": 6.294478527607362e-07, "loss": 1.6621, "step": 2850 }, { "epoch": 25.54, "learning_rate": 6.171779141104294e-07, "loss": 1.6926, "step": 2860 }, { "epoch": 25.62, "learning_rate": 6.049079754601226e-07, "loss": 1.7014, "step": 2870 }, { "epoch": 25.71, "learning_rate": 5.92638036809816e-07, "loss": 1.7038, "step": 2880 }, { "epoch": 25.8, "learning_rate": 5.803680981595092e-07, "loss": 1.6665, "step": 2890 }, { "epoch": 25.89, "learning_rate": 5.680981595092024e-07, "loss": 1.6654, "step": 2900 }, { "epoch": 25.98, "learning_rate": 5.558282208588957e-07, "loss": 1.69, "step": 2910 }, { "epoch": 26.0, "eval_loss": 1.7818827629089355, "eval_runtime": 230.0994, "eval_samples_per_second": 8.666, "eval_steps_per_second": 0.869, "eval_wer": 0.9430081720691436, "step": 2912 }, { "epoch": 26.07, "learning_rate": 5.435582822085889e-07, "loss": 1.7023, "step": 2920 }, { "epoch": 26.16, "learning_rate": 5.312883435582822e-07, "loss": 1.6845, "step": 2930 }, { "epoch": 26.25, "learning_rate": 5.190184049079755e-07, "loss": 1.6649, "step": 2940 }, { "epoch": 26.34, "learning_rate": 5.067484662576687e-07, "loss": 1.6683, "step": 2950 }, { "epoch": 26.43, "learning_rate": 4.944785276073619e-07, "loss": 1.6638, "step": 2960 }, { "epoch": 26.52, "learning_rate": 4.822085889570552e-07, "loss": 1.6879, "step": 2970 }, { "epoch": 26.61, "learning_rate": 4.6993865030674844e-07, "loss": 1.6527, "step": 2980 }, { "epoch": 26.7, "learning_rate": 4.576687116564417e-07, "loss": 1.6745, "step": 2990 }, { "epoch": 26.78, "learning_rate": 4.4539877300613495e-07, "loss": 1.6944, "step": 3000 }, { "epoch": 26.87, "learning_rate": 4.331288343558282e-07, "loss": 1.6737, "step": 3010 }, { "epoch": 26.96, "learning_rate": 4.2085889570552147e-07, "loss": 1.6889, "step": 3020 }, { "epoch": 27.0, "eval_loss": 1.77213454246521, "eval_runtime": 230.2415, "eval_samples_per_second": 8.66, "eval_steps_per_second": 0.869, "eval_wer": 0.9386031533857885, "step": 3024 }, { "epoch": 27.05, "learning_rate": 4.085889570552147e-07, "loss": 1.6938, "step": 3030 }, { "epoch": 27.14, "learning_rate": 3.9631901840490793e-07, "loss": 1.6539, "step": 3040 }, { "epoch": 27.23, "learning_rate": 3.840490797546012e-07, "loss": 1.6533, "step": 3050 }, { "epoch": 27.32, "learning_rate": 3.717791411042945e-07, "loss": 1.6803, "step": 3060 }, { "epoch": 27.41, "learning_rate": 3.595092024539877e-07, "loss": 1.6623, "step": 3070 }, { "epoch": 27.5, "learning_rate": 3.4723926380368096e-07, "loss": 1.6803, "step": 3080 }, { "epoch": 27.59, "learning_rate": 3.3496932515337424e-07, "loss": 1.6694, "step": 3090 }, { "epoch": 27.68, "learning_rate": 3.2269938650306747e-07, "loss": 1.6578, "step": 3100 }, { "epoch": 27.77, "learning_rate": 3.104294478527607e-07, "loss": 1.6606, "step": 3110 }, { "epoch": 27.86, "learning_rate": 2.98159509202454e-07, "loss": 1.6786, "step": 3120 }, { "epoch": 27.95, "learning_rate": 2.858895705521472e-07, "loss": 1.6546, "step": 3130 }, { "epoch": 28.0, "eval_loss": 1.764729619026184, "eval_runtime": 230.6298, "eval_samples_per_second": 8.646, "eval_steps_per_second": 0.867, "eval_wer": 0.9452866300088101, "step": 3136 }, { "epoch": 28.04, "learning_rate": 2.7361963190184045e-07, "loss": 1.7145, "step": 3140 }, { "epoch": 28.12, "learning_rate": 2.6134969325153373e-07, "loss": 1.6281, "step": 3150 }, { "epoch": 28.21, "learning_rate": 2.4907975460122696e-07, "loss": 1.6589, "step": 3160 }, { "epoch": 28.3, "learning_rate": 2.3680981595092022e-07, "loss": 1.6808, "step": 3170 }, { "epoch": 28.39, "learning_rate": 2.245398773006135e-07, "loss": 1.6921, "step": 3180 }, { "epoch": 28.48, "learning_rate": 2.1226993865030673e-07, "loss": 1.6476, "step": 3190 }, { "epoch": 28.57, "learning_rate": 2e-07, "loss": 1.6768, "step": 3200 }, { "epoch": 28.66, "learning_rate": 1.8773006134969325e-07, "loss": 1.6676, "step": 3210 }, { "epoch": 28.75, "learning_rate": 1.7546012269938648e-07, "loss": 1.6758, "step": 3220 }, { "epoch": 28.84, "learning_rate": 1.6319018404907974e-07, "loss": 1.6158, "step": 3230 }, { "epoch": 28.93, "learning_rate": 1.50920245398773e-07, "loss": 1.6542, "step": 3240 }, { "epoch": 29.0, "eval_loss": 1.765263557434082, "eval_runtime": 230.3263, "eval_samples_per_second": 8.657, "eval_steps_per_second": 0.868, "eval_wer": 0.9375094935747487, "step": 3248 }, { "epoch": 29.02, "learning_rate": 1.3865030674846625e-07, "loss": 1.694, "step": 3250 }, { "epoch": 29.11, "learning_rate": 1.263803680981595e-07, "loss": 1.6854, "step": 3260 }, { "epoch": 29.2, "learning_rate": 1.1411042944785275e-07, "loss": 1.6717, "step": 3270 }, { "epoch": 29.29, "learning_rate": 1.0184049079754601e-07, "loss": 1.6737, "step": 3280 }, { "epoch": 29.37, "learning_rate": 8.957055214723925e-08, "loss": 1.6468, "step": 3290 }, { "epoch": 29.46, "learning_rate": 7.730061349693251e-08, "loss": 1.6409, "step": 3300 }, { "epoch": 29.55, "learning_rate": 6.503067484662577e-08, "loss": 1.66, "step": 3310 }, { "epoch": 29.64, "learning_rate": 5.276073619631902e-08, "loss": 1.6526, "step": 3320 }, { "epoch": 29.73, "learning_rate": 4.049079754601227e-08, "loss": 1.6681, "step": 3330 }, { "epoch": 29.82, "learning_rate": 2.822085889570552e-08, "loss": 1.6626, "step": 3340 }, { "epoch": 29.91, "learning_rate": 1.5950920245398775e-08, "loss": 1.6352, "step": 3350 }, { "epoch": 30.0, "learning_rate": 3.6809815950920243e-09, "loss": 1.647, "step": 3360 }, { "epoch": 30.0, "eval_loss": 1.7582672834396362, "eval_runtime": 230.7695, "eval_samples_per_second": 8.641, "eval_steps_per_second": 0.867, "eval_wer": 0.9386031533857885, "step": 3360 }, { "epoch": 30.0, "step": 3360, "total_flos": 1.6308106473516027e+20, "train_loss": 3.5984562555948894, "train_runtime": 102250.8509, "train_samples_per_second": 5.263, "train_steps_per_second": 0.033 } ], "max_steps": 3360, "num_train_epochs": 30, "total_flos": 1.6308106473516027e+20, "trial_name": null, "trial_params": null }