{ "best_metric": null, "best_model_checkpoint": null, "epoch": 139.66480446927375, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.56, "learning_rate": 0.000194, "loss": 3.3358, "step": 100 }, { "epoch": 1.12, "learning_rate": 0.00039400000000000004, "loss": 0.9471, "step": 200 }, { "epoch": 1.68, "learning_rate": 0.000594, "loss": 0.9236, "step": 300 }, { "epoch": 2.23, "learning_rate": 0.0007940000000000001, "loss": 0.996, "step": 400 }, { "epoch": 2.79, "learning_rate": 0.000994, "loss": 1.0296, "step": 500 }, { "epoch": 2.79, "eval_loss": 0.8106316328048706, "eval_runtime": 194.0347, "eval_samples_per_second": 13.467, "eval_steps_per_second": 0.423, "eval_wer": 0.8029419223941161, "step": 500 }, { "epoch": 3.35, "learning_rate": 0.0009960408163265306, "loss": 1.0459, "step": 600 }, { "epoch": 3.91, "learning_rate": 0.0009919591836734694, "loss": 0.9901, "step": 700 }, { "epoch": 4.47, "learning_rate": 0.0009878775510204082, "loss": 0.9455, "step": 800 }, { "epoch": 5.03, "learning_rate": 0.000983795918367347, "loss": 0.9814, "step": 900 }, { "epoch": 5.59, "learning_rate": 0.0009797142857142858, "loss": 0.9339, "step": 1000 }, { "epoch": 5.59, "eval_loss": 0.7419061064720154, "eval_runtime": 190.1458, "eval_samples_per_second": 13.742, "eval_steps_per_second": 0.431, "eval_wer": 0.7931777834136444, "step": 1000 }, { "epoch": 6.15, "learning_rate": 0.0009756326530612245, "loss": 0.9541, "step": 1100 }, { "epoch": 6.7, "learning_rate": 0.0009715510204081632, "loss": 0.8954, "step": 1200 }, { "epoch": 7.26, "learning_rate": 0.0009674693877551021, "loss": 0.9209, "step": 1300 }, { "epoch": 7.82, "learning_rate": 0.0009633877551020408, "loss": 0.9078, "step": 1400 }, { "epoch": 8.38, "learning_rate": 0.0009593061224489796, "loss": 0.8925, "step": 1500 }, { "epoch": 8.38, "eval_loss": 0.7136574983596802, "eval_runtime": 190.5136, "eval_samples_per_second": 13.716, "eval_steps_per_second": 0.43, "eval_wer": 0.7706061374587877, "step": 1500 }, { "epoch": 8.94, "learning_rate": 0.0009552244897959184, "loss": 0.8785, "step": 1600 }, { "epoch": 9.5, "learning_rate": 0.0009511428571428572, "loss": 0.8793, "step": 1700 }, { "epoch": 10.06, "learning_rate": 0.0009470612244897959, "loss": 0.8766, "step": 1800 }, { "epoch": 10.61, "learning_rate": 0.0009429795918367347, "loss": 0.8601, "step": 1900 }, { "epoch": 11.17, "learning_rate": 0.0009388979591836735, "loss": 0.8484, "step": 2000 }, { "epoch": 11.17, "eval_loss": 0.7019568681716919, "eval_runtime": 193.4802, "eval_samples_per_second": 13.505, "eval_steps_per_second": 0.424, "eval_wer": 0.7676895764646209, "step": 2000 }, { "epoch": 11.73, "learning_rate": 0.0009348163265306122, "loss": 0.8123, "step": 2100 }, { "epoch": 12.29, "learning_rate": 0.000930734693877551, "loss": 0.7892, "step": 2200 }, { "epoch": 12.85, "learning_rate": 0.0009266530612244898, "loss": 0.7836, "step": 2300 }, { "epoch": 13.41, "learning_rate": 0.0009225714285714286, "loss": 0.7694, "step": 2400 }, { "epoch": 13.97, "learning_rate": 0.0009184897959183673, "loss": 0.7521, "step": 2500 }, { "epoch": 13.97, "eval_loss": 0.7042976021766663, "eval_runtime": 193.425, "eval_samples_per_second": 13.509, "eval_steps_per_second": 0.424, "eval_wer": 0.7375095105249809, "step": 2500 }, { "epoch": 14.53, "learning_rate": 0.0009144081632653061, "loss": 0.7472, "step": 2600 }, { "epoch": 15.08, "learning_rate": 0.0009103265306122449, "loss": 0.7254, "step": 2700 }, { "epoch": 15.64, "learning_rate": 0.0009062448979591837, "loss": 0.7541, "step": 2800 }, { "epoch": 16.2, "learning_rate": 0.0009021632653061224, "loss": 0.7361, "step": 2900 }, { "epoch": 16.76, "learning_rate": 0.0008980816326530612, "loss": 0.719, "step": 3000 }, { "epoch": 16.76, "eval_loss": 0.6617498397827148, "eval_runtime": 191.0945, "eval_samples_per_second": 13.674, "eval_steps_per_second": 0.429, "eval_wer": 0.742772001014456, "step": 3000 }, { "epoch": 17.32, "learning_rate": 0.000894, "loss": 0.6862, "step": 3100 }, { "epoch": 17.88, "learning_rate": 0.0008899183673469387, "loss": 0.6599, "step": 3200 }, { "epoch": 18.44, "learning_rate": 0.0008858367346938775, "loss": 0.6363, "step": 3300 }, { "epoch": 18.99, "learning_rate": 0.0008817551020408164, "loss": 0.6861, "step": 3400 }, { "epoch": 19.55, "learning_rate": 0.0008776734693877552, "loss": 0.656, "step": 3500 }, { "epoch": 19.55, "eval_loss": 0.6388168334960938, "eval_runtime": 190.9254, "eval_samples_per_second": 13.686, "eval_steps_per_second": 0.429, "eval_wer": 0.7201686533096627, "step": 3500 }, { "epoch": 20.11, "learning_rate": 0.0008735918367346939, "loss": 0.6348, "step": 3600 }, { "epoch": 20.67, "learning_rate": 0.0008695102040816327, "loss": 0.5911, "step": 3700 }, { "epoch": 21.23, "learning_rate": 0.0008654285714285715, "loss": 0.605, "step": 3800 }, { "epoch": 21.79, "learning_rate": 0.0008613469387755102, "loss": 0.6051, "step": 3900 }, { "epoch": 22.35, "learning_rate": 0.0008572653061224491, "loss": 0.6085, "step": 4000 }, { "epoch": 22.35, "eval_loss": 0.6211119890213013, "eval_runtime": 188.9632, "eval_samples_per_second": 13.828, "eval_steps_per_second": 0.434, "eval_wer": 0.6960119198579762, "step": 4000 }, { "epoch": 22.91, "learning_rate": 0.0008531836734693878, "loss": 0.5777, "step": 4100 }, { "epoch": 23.46, "learning_rate": 0.0008491020408163266, "loss": 0.5582, "step": 4200 }, { "epoch": 24.02, "learning_rate": 0.0008450204081632653, "loss": 0.5859, "step": 4300 }, { "epoch": 24.58, "learning_rate": 0.0008409387755102042, "loss": 0.5368, "step": 4400 }, { "epoch": 25.14, "learning_rate": 0.0008368571428571429, "loss": 0.5598, "step": 4500 }, { "epoch": 25.14, "eval_loss": 0.6131592988967896, "eval_runtime": 191.2607, "eval_samples_per_second": 13.662, "eval_steps_per_second": 0.429, "eval_wer": 0.664436976921126, "step": 4500 }, { "epoch": 25.7, "learning_rate": 0.0008327755102040816, "loss": 0.5139, "step": 4600 }, { "epoch": 26.26, "learning_rate": 0.0008286938775510205, "loss": 0.5455, "step": 4700 }, { "epoch": 26.82, "learning_rate": 0.0008246122448979592, "loss": 0.5448, "step": 4800 }, { "epoch": 27.37, "learning_rate": 0.000820530612244898, "loss": 0.5316, "step": 4900 }, { "epoch": 27.93, "learning_rate": 0.0008164489795918367, "loss": 0.4969, "step": 5000 }, { "epoch": 27.93, "eval_loss": 0.6065218448638916, "eval_runtime": 194.8518, "eval_samples_per_second": 13.41, "eval_steps_per_second": 0.421, "eval_wer": 0.65210499619579, "step": 5000 }, { "epoch": 28.49, "learning_rate": 0.0008123673469387756, "loss": 0.48, "step": 5100 }, { "epoch": 29.05, "learning_rate": 0.0008082857142857143, "loss": 0.5028, "step": 5200 }, { "epoch": 29.61, "learning_rate": 0.0008042040816326531, "loss": 0.4577, "step": 5300 }, { "epoch": 30.17, "learning_rate": 0.0008001224489795919, "loss": 0.4636, "step": 5400 }, { "epoch": 30.73, "learning_rate": 0.0007960408163265306, "loss": 0.4638, "step": 5500 }, { "epoch": 30.73, "eval_loss": 0.697826623916626, "eval_runtime": 189.0268, "eval_samples_per_second": 13.823, "eval_steps_per_second": 0.434, "eval_wer": 0.6577479076845042, "step": 5500 }, { "epoch": 31.28, "learning_rate": 0.0007919591836734694, "loss": 0.46, "step": 5600 }, { "epoch": 31.84, "learning_rate": 0.0007878775510204081, "loss": 0.4577, "step": 5700 }, { "epoch": 32.4, "learning_rate": 0.000783795918367347, "loss": 0.4502, "step": 5800 }, { "epoch": 32.96, "learning_rate": 0.0007797142857142857, "loss": 0.4395, "step": 5900 }, { "epoch": 33.52, "learning_rate": 0.0007756326530612245, "loss": 0.4385, "step": 6000 }, { "epoch": 33.52, "eval_loss": 0.5994424223899841, "eval_runtime": 190.5342, "eval_samples_per_second": 13.714, "eval_steps_per_second": 0.43, "eval_wer": 0.6565115394369769, "step": 6000 }, { "epoch": 34.08, "learning_rate": 0.0007715510204081633, "loss": 0.4173, "step": 6100 }, { "epoch": 34.64, "learning_rate": 0.0007674693877551021, "loss": 0.4144, "step": 6200 }, { "epoch": 35.2, "learning_rate": 0.0007633877551020408, "loss": 0.3906, "step": 6300 }, { "epoch": 35.75, "learning_rate": 0.0007593061224489795, "loss": 0.4085, "step": 6400 }, { "epoch": 36.31, "learning_rate": 0.0007552244897959184, "loss": 0.396, "step": 6500 }, { "epoch": 36.31, "eval_loss": 0.6170300841331482, "eval_runtime": 190.0522, "eval_samples_per_second": 13.749, "eval_steps_per_second": 0.431, "eval_wer": 0.6257608419984784, "step": 6500 }, { "epoch": 36.87, "learning_rate": 0.0007511428571428571, "loss": 0.3864, "step": 6600 }, { "epoch": 37.43, "learning_rate": 0.0007470612244897959, "loss": 0.3864, "step": 6700 }, { "epoch": 37.99, "learning_rate": 0.0007429795918367347, "loss": 0.3975, "step": 6800 }, { "epoch": 38.55, "learning_rate": 0.0007388979591836735, "loss": 0.3873, "step": 6900 }, { "epoch": 39.11, "learning_rate": 0.0007348163265306122, "loss": 0.3861, "step": 7000 }, { "epoch": 39.11, "eval_loss": 0.6486256718635559, "eval_runtime": 191.5175, "eval_samples_per_second": 13.644, "eval_steps_per_second": 0.428, "eval_wer": 0.6216713162566574, "step": 7000 }, { "epoch": 39.66, "learning_rate": 0.0007307346938775509, "loss": 0.3629, "step": 7100 }, { "epoch": 40.22, "learning_rate": 0.0007266530612244898, "loss": 0.3545, "step": 7200 }, { "epoch": 40.78, "learning_rate": 0.0007225714285714285, "loss": 0.381, "step": 7300 }, { "epoch": 41.34, "learning_rate": 0.0007184897959183673, "loss": 0.3679, "step": 7400 }, { "epoch": 41.9, "learning_rate": 0.0007144081632653061, "loss": 0.3602, "step": 7500 }, { "epoch": 41.9, "eval_loss": 0.6508038640022278, "eval_runtime": 188.3071, "eval_samples_per_second": 13.876, "eval_steps_per_second": 0.435, "eval_wer": 0.6114633527770733, "step": 7500 }, { "epoch": 42.46, "learning_rate": 0.000710326530612245, "loss": 0.3431, "step": 7600 }, { "epoch": 43.02, "learning_rate": 0.0007062448979591836, "loss": 0.3555, "step": 7700 }, { "epoch": 43.58, "learning_rate": 0.0007021632653061226, "loss": 0.3327, "step": 7800 }, { "epoch": 44.13, "learning_rate": 0.0006980816326530613, "loss": 0.3327, "step": 7900 }, { "epoch": 44.69, "learning_rate": 0.000694, "loss": 0.3251, "step": 8000 }, { "epoch": 44.69, "eval_loss": 0.7021898031234741, "eval_runtime": 189.1301, "eval_samples_per_second": 13.816, "eval_steps_per_second": 0.434, "eval_wer": 0.625317017499366, "step": 8000 }, { "epoch": 45.25, "learning_rate": 0.0006899183673469388, "loss": 0.3403, "step": 8100 }, { "epoch": 45.81, "learning_rate": 0.0006858367346938776, "loss": 0.3242, "step": 8200 }, { "epoch": 46.37, "learning_rate": 0.0006817551020408164, "loss": 0.316, "step": 8300 }, { "epoch": 46.93, "learning_rate": 0.0006776734693877551, "loss": 0.3113, "step": 8400 }, { "epoch": 47.49, "learning_rate": 0.000673591836734694, "loss": 0.3197, "step": 8500 }, { "epoch": 47.49, "eval_loss": 0.7705556750297546, "eval_runtime": 188.475, "eval_samples_per_second": 13.864, "eval_steps_per_second": 0.435, "eval_wer": 0.621544509256911, "step": 8500 }, { "epoch": 48.04, "learning_rate": 0.0006695102040816327, "loss": 0.3188, "step": 8600 }, { "epoch": 48.6, "learning_rate": 0.0006654285714285715, "loss": 0.3134, "step": 8700 }, { "epoch": 49.16, "learning_rate": 0.0006613469387755102, "loss": 0.3093, "step": 8800 }, { "epoch": 49.72, "learning_rate": 0.000657265306122449, "loss": 0.2872, "step": 8900 }, { "epoch": 50.28, "learning_rate": 0.0006531836734693878, "loss": 0.3013, "step": 9000 }, { "epoch": 50.28, "eval_loss": 0.6418728232383728, "eval_runtime": 189.3407, "eval_samples_per_second": 13.801, "eval_steps_per_second": 0.433, "eval_wer": 0.5998922140502155, "step": 9000 }, { "epoch": 50.84, "learning_rate": 0.0006491020408163265, "loss": 0.2866, "step": 9100 }, { "epoch": 51.4, "learning_rate": 0.0006450204081632654, "loss": 0.2794, "step": 9200 }, { "epoch": 51.96, "learning_rate": 0.0006409387755102041, "loss": 0.2842, "step": 9300 }, { "epoch": 52.51, "learning_rate": 0.0006368571428571429, "loss": 0.2894, "step": 9400 }, { "epoch": 53.07, "learning_rate": 0.0006327755102040816, "loss": 0.2813, "step": 9500 }, { "epoch": 53.07, "eval_loss": 0.6907714009284973, "eval_runtime": 188.5038, "eval_samples_per_second": 13.862, "eval_steps_per_second": 0.435, "eval_wer": 0.595929495308141, "step": 9500 }, { "epoch": 53.63, "learning_rate": 0.0006286938775510205, "loss": 0.2846, "step": 9600 }, { "epoch": 54.19, "learning_rate": 0.0006246122448979592, "loss": 0.2784, "step": 9700 }, { "epoch": 54.75, "learning_rate": 0.0006205306122448979, "loss": 0.2841, "step": 9800 }, { "epoch": 55.31, "learning_rate": 0.0006164489795918368, "loss": 0.2838, "step": 9900 }, { "epoch": 55.87, "learning_rate": 0.0006123673469387755, "loss": 0.286, "step": 10000 }, { "epoch": 55.87, "eval_loss": 0.7150660157203674, "eval_runtime": 188.1197, "eval_samples_per_second": 13.89, "eval_steps_per_second": 0.436, "eval_wer": 0.5916180573167639, "step": 10000 }, { "epoch": 56.42, "learning_rate": 0.0006082857142857143, "loss": 0.2831, "step": 10100 }, { "epoch": 56.98, "learning_rate": 0.000604204081632653, "loss": 0.2738, "step": 10200 }, { "epoch": 57.54, "learning_rate": 0.0006001224489795919, "loss": 0.2613, "step": 10300 }, { "epoch": 58.1, "learning_rate": 0.0005960816326530613, "loss": 0.2645, "step": 10400 }, { "epoch": 58.66, "learning_rate": 0.000592, "loss": 0.2645, "step": 10500 }, { "epoch": 58.66, "eval_loss": 0.7181155681610107, "eval_runtime": 192.5668, "eval_samples_per_second": 13.569, "eval_steps_per_second": 0.426, "eval_wer": 0.5860068475779863, "step": 10500 }, { "epoch": 59.22, "learning_rate": 0.0005879183673469388, "loss": 0.2501, "step": 10600 }, { "epoch": 59.78, "learning_rate": 0.0005838367346938776, "loss": 0.2612, "step": 10700 }, { "epoch": 60.34, "learning_rate": 0.0005797551020408164, "loss": 0.2541, "step": 10800 }, { "epoch": 60.89, "learning_rate": 0.0005756734693877551, "loss": 0.2496, "step": 10900 }, { "epoch": 61.45, "learning_rate": 0.0005715918367346939, "loss": 0.2535, "step": 11000 }, { "epoch": 61.45, "eval_loss": 0.7877444624900818, "eval_runtime": 190.8548, "eval_samples_per_second": 13.691, "eval_steps_per_second": 0.43, "eval_wer": 0.5979267055541466, "step": 11000 }, { "epoch": 62.01, "learning_rate": 0.0005675102040816327, "loss": 0.2619, "step": 11100 }, { "epoch": 62.57, "learning_rate": 0.0005634285714285714, "loss": 0.2495, "step": 11200 }, { "epoch": 63.13, "learning_rate": 0.0005593469387755102, "loss": 0.2521, "step": 11300 }, { "epoch": 63.69, "learning_rate": 0.000555265306122449, "loss": 0.2326, "step": 11400 }, { "epoch": 64.25, "learning_rate": 0.0005511836734693878, "loss": 0.247, "step": 11500 }, { "epoch": 64.25, "eval_loss": 0.8198513984680176, "eval_runtime": 187.6839, "eval_samples_per_second": 13.922, "eval_steps_per_second": 0.437, "eval_wer": 0.6128899315242201, "step": 11500 }, { "epoch": 64.8, "learning_rate": 0.0005471020408163265, "loss": 0.249, "step": 11600 }, { "epoch": 65.36, "learning_rate": 0.0005430204081632654, "loss": 0.2463, "step": 11700 }, { "epoch": 65.92, "learning_rate": 0.0005389387755102041, "loss": 0.2333, "step": 11800 }, { "epoch": 66.48, "learning_rate": 0.0005348571428571428, "loss": 0.2381, "step": 11900 }, { "epoch": 67.04, "learning_rate": 0.0005307755102040816, "loss": 0.2412, "step": 12000 }, { "epoch": 67.04, "eval_loss": 0.767884373664856, "eval_runtime": 188.3627, "eval_samples_per_second": 13.872, "eval_steps_per_second": 0.435, "eval_wer": 0.5884161805731677, "step": 12000 }, { "epoch": 67.6, "learning_rate": 0.0005266938775510204, "loss": 0.2285, "step": 12100 }, { "epoch": 68.16, "learning_rate": 0.0005226122448979592, "loss": 0.241, "step": 12200 }, { "epoch": 68.72, "learning_rate": 0.0005185306122448979, "loss": 0.229, "step": 12300 }, { "epoch": 69.27, "learning_rate": 0.0005144489795918368, "loss": 0.2371, "step": 12400 }, { "epoch": 69.83, "learning_rate": 0.0005103673469387755, "loss": 0.2404, "step": 12500 }, { "epoch": 69.83, "eval_loss": 0.7266025543212891, "eval_runtime": 190.7393, "eval_samples_per_second": 13.699, "eval_steps_per_second": 0.43, "eval_wer": 0.581632006086736, "step": 12500 }, { "epoch": 70.39, "learning_rate": 0.0005062857142857143, "loss": 0.2315, "step": 12600 }, { "epoch": 70.95, "learning_rate": 0.0005022040816326531, "loss": 0.2305, "step": 12700 }, { "epoch": 71.51, "learning_rate": 0.0004981224489795918, "loss": 0.2307, "step": 12800 }, { "epoch": 72.07, "learning_rate": 0.0004940408163265306, "loss": 0.2204, "step": 12900 }, { "epoch": 72.63, "learning_rate": 0.00049, "loss": 0.2293, "step": 13000 }, { "epoch": 72.63, "eval_loss": 0.792820394039154, "eval_runtime": 188.6653, "eval_samples_per_second": 13.85, "eval_steps_per_second": 0.435, "eval_wer": 0.5794762870910474, "step": 13000 }, { "epoch": 73.18, "learning_rate": 0.0004859183673469388, "loss": 0.2185, "step": 13100 }, { "epoch": 73.74, "learning_rate": 0.00048183673469387754, "loss": 0.2126, "step": 13200 }, { "epoch": 74.3, "learning_rate": 0.00047775510204081634, "loss": 0.2177, "step": 13300 }, { "epoch": 74.86, "learning_rate": 0.00047367346938775515, "loss": 0.2204, "step": 13400 }, { "epoch": 75.42, "learning_rate": 0.0004695918367346939, "loss": 0.2176, "step": 13500 }, { "epoch": 75.42, "eval_loss": 0.7916468381881714, "eval_runtime": 189.1525, "eval_samples_per_second": 13.814, "eval_steps_per_second": 0.434, "eval_wer": 0.5845802688308395, "step": 13500 }, { "epoch": 75.98, "learning_rate": 0.00046551020408163265, "loss": 0.2119, "step": 13600 }, { "epoch": 76.54, "learning_rate": 0.0004614285714285714, "loss": 0.2149, "step": 13700 }, { "epoch": 77.09, "learning_rate": 0.0004573469387755102, "loss": 0.214, "step": 13800 }, { "epoch": 77.65, "learning_rate": 0.000453265306122449, "loss": 0.2095, "step": 13900 }, { "epoch": 78.21, "learning_rate": 0.00044918367346938776, "loss": 0.2143, "step": 14000 }, { "epoch": 78.21, "eval_loss": 0.7954298853874207, "eval_runtime": 187.5182, "eval_samples_per_second": 13.935, "eval_steps_per_second": 0.437, "eval_wer": 0.5764646208470707, "step": 14000 }, { "epoch": 78.77, "learning_rate": 0.00044510204081632656, "loss": 0.2182, "step": 14100 }, { "epoch": 79.33, "learning_rate": 0.0004410204081632653, "loss": 0.2042, "step": 14200 }, { "epoch": 79.89, "learning_rate": 0.0004369387755102041, "loss": 0.2184, "step": 14300 }, { "epoch": 80.45, "learning_rate": 0.00043285714285714287, "loss": 0.2104, "step": 14400 }, { "epoch": 81.01, "learning_rate": 0.0004287755102040816, "loss": 0.2185, "step": 14500 }, { "epoch": 81.01, "eval_loss": 0.8317196369171143, "eval_runtime": 188.0902, "eval_samples_per_second": 13.892, "eval_steps_per_second": 0.436, "eval_wer": 0.5906987065686026, "step": 14500 }, { "epoch": 81.56, "learning_rate": 0.0004246938775510204, "loss": 0.2026, "step": 14600 }, { "epoch": 82.12, "learning_rate": 0.0004206122448979592, "loss": 0.1972, "step": 14700 }, { "epoch": 82.68, "learning_rate": 0.000416530612244898, "loss": 0.2083, "step": 14800 }, { "epoch": 83.24, "learning_rate": 0.00041244897959183673, "loss": 0.2132, "step": 14900 }, { "epoch": 83.8, "learning_rate": 0.00040836734693877553, "loss": 0.2057, "step": 15000 }, { "epoch": 83.8, "eval_loss": 0.8015716075897217, "eval_runtime": 188.5358, "eval_samples_per_second": 13.859, "eval_steps_per_second": 0.435, "eval_wer": 0.5850557950798884, "step": 15000 }, { "epoch": 84.36, "learning_rate": 0.0004042857142857143, "loss": 0.2027, "step": 15100 }, { "epoch": 84.92, "learning_rate": 0.0004002040816326531, "loss": 0.2016, "step": 15200 }, { "epoch": 85.47, "learning_rate": 0.00039612244897959184, "loss": 0.1935, "step": 15300 }, { "epoch": 86.03, "learning_rate": 0.0003920408163265306, "loss": 0.1948, "step": 15400 }, { "epoch": 86.59, "learning_rate": 0.0003879591836734694, "loss": 0.1895, "step": 15500 }, { "epoch": 86.59, "eval_loss": 0.8080111145973206, "eval_runtime": 192.0878, "eval_samples_per_second": 13.603, "eval_steps_per_second": 0.427, "eval_wer": 0.5679368501141263, "step": 15500 }, { "epoch": 87.15, "learning_rate": 0.00038387755102040815, "loss": 0.1894, "step": 15600 }, { "epoch": 87.71, "learning_rate": 0.00037979591836734695, "loss": 0.1798, "step": 15700 }, { "epoch": 88.27, "learning_rate": 0.00037571428571428575, "loss": 0.1891, "step": 15800 }, { "epoch": 88.83, "learning_rate": 0.0003716326530612245, "loss": 0.1916, "step": 15900 }, { "epoch": 89.39, "learning_rate": 0.0003675510204081633, "loss": 0.1883, "step": 16000 }, { "epoch": 89.39, "eval_loss": 0.8103044033050537, "eval_runtime": 192.0932, "eval_samples_per_second": 13.603, "eval_steps_per_second": 0.427, "eval_wer": 0.5712338321075323, "step": 16000 }, { "epoch": 89.94, "learning_rate": 0.000363469387755102, "loss": 0.1774, "step": 16100 }, { "epoch": 90.5, "learning_rate": 0.0003593877551020408, "loss": 0.1831, "step": 16200 }, { "epoch": 91.06, "learning_rate": 0.00035530612244897956, "loss": 0.1858, "step": 16300 }, { "epoch": 91.62, "learning_rate": 0.00035122448979591837, "loss": 0.1863, "step": 16400 }, { "epoch": 92.18, "learning_rate": 0.00034714285714285717, "loss": 0.1802, "step": 16500 }, { "epoch": 92.18, "eval_loss": 0.8383206129074097, "eval_runtime": 187.0696, "eval_samples_per_second": 13.968, "eval_steps_per_second": 0.438, "eval_wer": 0.5644496576211007, "step": 16500 }, { "epoch": 92.74, "learning_rate": 0.0003430612244897959, "loss": 0.1818, "step": 16600 }, { "epoch": 93.3, "learning_rate": 0.0003389795918367347, "loss": 0.1805, "step": 16700 }, { "epoch": 93.85, "learning_rate": 0.0003348979591836735, "loss": 0.1784, "step": 16800 }, { "epoch": 94.41, "learning_rate": 0.0003308163265306123, "loss": 0.1731, "step": 16900 }, { "epoch": 94.97, "learning_rate": 0.000326734693877551, "loss": 0.1826, "step": 17000 }, { "epoch": 94.97, "eval_loss": 0.8798549771308899, "eval_runtime": 188.9431, "eval_samples_per_second": 13.83, "eval_steps_per_second": 0.434, "eval_wer": 0.5657494293685011, "step": 17000 }, { "epoch": 95.53, "learning_rate": 0.0003226530612244898, "loss": 0.1733, "step": 17100 }, { "epoch": 96.09, "learning_rate": 0.0003185714285714286, "loss": 0.1723, "step": 17200 }, { "epoch": 96.65, "learning_rate": 0.00031448979591836734, "loss": 0.1693, "step": 17300 }, { "epoch": 97.21, "learning_rate": 0.00031044897959183674, "loss": 0.1705, "step": 17400 }, { "epoch": 97.77, "learning_rate": 0.0003063673469387755, "loss": 0.1717, "step": 17500 }, { "epoch": 97.77, "eval_loss": 0.8619566559791565, "eval_runtime": 188.0319, "eval_samples_per_second": 13.897, "eval_steps_per_second": 0.436, "eval_wer": 0.5709168146081663, "step": 17500 }, { "epoch": 98.32, "learning_rate": 0.0003022857142857143, "loss": 0.1616, "step": 17600 }, { "epoch": 98.88, "learning_rate": 0.00029820408163265305, "loss": 0.1653, "step": 17700 }, { "epoch": 99.44, "learning_rate": 0.00029412244897959185, "loss": 0.1708, "step": 17800 }, { "epoch": 100.0, "learning_rate": 0.0002900408163265306, "loss": 0.1681, "step": 17900 }, { "epoch": 100.56, "learning_rate": 0.00028599999999999996, "loss": 0.1701, "step": 18000 }, { "epoch": 100.56, "eval_loss": 0.8717033267021179, "eval_runtime": 192.2586, "eval_samples_per_second": 13.591, "eval_steps_per_second": 0.427, "eval_wer": 0.5661932538676135, "step": 18000 }, { "epoch": 101.12, "learning_rate": 0.00028191836734693876, "loss": 0.1614, "step": 18100 }, { "epoch": 101.68, "learning_rate": 0.00027783673469387757, "loss": 0.1641, "step": 18200 }, { "epoch": 102.23, "learning_rate": 0.0002737551020408163, "loss": 0.1615, "step": 18300 }, { "epoch": 102.79, "learning_rate": 0.0002696734693877551, "loss": 0.1588, "step": 18400 }, { "epoch": 103.35, "learning_rate": 0.0002655918367346939, "loss": 0.1623, "step": 18500 }, { "epoch": 103.35, "eval_loss": 0.8533861041069031, "eval_runtime": 187.659, "eval_samples_per_second": 13.924, "eval_steps_per_second": 0.437, "eval_wer": 0.5594090793811819, "step": 18500 }, { "epoch": 103.91, "learning_rate": 0.0002615102040816327, "loss": 0.1566, "step": 18600 }, { "epoch": 104.47, "learning_rate": 0.0002574285714285715, "loss": 0.1528, "step": 18700 }, { "epoch": 105.03, "learning_rate": 0.0002533469387755102, "loss": 0.1538, "step": 18800 }, { "epoch": 105.59, "learning_rate": 0.000249265306122449, "loss": 0.1557, "step": 18900 }, { "epoch": 106.15, "learning_rate": 0.00024518367346938773, "loss": 0.158, "step": 19000 }, { "epoch": 106.15, "eval_loss": 0.8595470786094666, "eval_runtime": 194.975, "eval_samples_per_second": 13.402, "eval_steps_per_second": 0.421, "eval_wer": 0.5546221151407558, "step": 19000 }, { "epoch": 106.7, "learning_rate": 0.00024110204081632654, "loss": 0.1459, "step": 19100 }, { "epoch": 107.26, "learning_rate": 0.00023702040816326532, "loss": 0.152, "step": 19200 }, { "epoch": 107.82, "learning_rate": 0.0002329387755102041, "loss": 0.1533, "step": 19300 }, { "epoch": 108.38, "learning_rate": 0.00022885714285714287, "loss": 0.146, "step": 19400 }, { "epoch": 108.94, "learning_rate": 0.00022477551020408162, "loss": 0.1508, "step": 19500 }, { "epoch": 108.94, "eval_loss": 0.8573695421218872, "eval_runtime": 191.3527, "eval_samples_per_second": 13.655, "eval_steps_per_second": 0.429, "eval_wer": 0.5544636063910728, "step": 19500 }, { "epoch": 109.5, "learning_rate": 0.0002206938775510204, "loss": 0.1454, "step": 19600 }, { "epoch": 110.06, "learning_rate": 0.0002166122448979592, "loss": 0.1466, "step": 19700 }, { "epoch": 110.61, "learning_rate": 0.00021253061224489798, "loss": 0.1416, "step": 19800 }, { "epoch": 111.17, "learning_rate": 0.00020844897959183673, "loss": 0.1444, "step": 19900 }, { "epoch": 111.73, "learning_rate": 0.0002043673469387755, "loss": 0.142, "step": 20000 }, { "epoch": 111.73, "eval_loss": 0.867087721824646, "eval_runtime": 187.948, "eval_samples_per_second": 13.903, "eval_steps_per_second": 0.436, "eval_wer": 0.5536710626426579, "step": 20000 }, { "epoch": 112.29, "learning_rate": 0.0002002857142857143, "loss": 0.1485, "step": 20100 }, { "epoch": 112.85, "learning_rate": 0.00019620408163265306, "loss": 0.1466, "step": 20200 }, { "epoch": 113.41, "learning_rate": 0.00019212244897959184, "loss": 0.1364, "step": 20300 }, { "epoch": 113.97, "learning_rate": 0.00018804081632653062, "loss": 0.1421, "step": 20400 }, { "epoch": 114.53, "learning_rate": 0.0001839591836734694, "loss": 0.1395, "step": 20500 }, { "epoch": 114.53, "eval_loss": 0.843588650226593, "eval_runtime": 191.7789, "eval_samples_per_second": 13.625, "eval_steps_per_second": 0.428, "eval_wer": 0.5524663961450672, "step": 20500 }, { "epoch": 115.08, "learning_rate": 0.00017987755102040817, "loss": 0.1409, "step": 20600 }, { "epoch": 115.64, "learning_rate": 0.00017579591836734692, "loss": 0.1356, "step": 20700 }, { "epoch": 116.2, "learning_rate": 0.0001717142857142857, "loss": 0.1364, "step": 20800 }, { "epoch": 116.76, "learning_rate": 0.0001676326530612245, "loss": 0.1328, "step": 20900 }, { "epoch": 117.32, "learning_rate": 0.0001635918367346939, "loss": 0.1373, "step": 21000 }, { "epoch": 117.32, "eval_loss": 0.8807795643806458, "eval_runtime": 186.8391, "eval_samples_per_second": 13.985, "eval_steps_per_second": 0.439, "eval_wer": 0.5481866599036267, "step": 21000 }, { "epoch": 117.88, "learning_rate": 0.00015951020408163267, "loss": 0.1316, "step": 21100 }, { "epoch": 118.44, "learning_rate": 0.00015542857142857142, "loss": 0.133, "step": 21200 }, { "epoch": 118.99, "learning_rate": 0.0001513469387755102, "loss": 0.1348, "step": 21300 }, { "epoch": 119.55, "learning_rate": 0.00014726530612244897, "loss": 0.1321, "step": 21400 }, { "epoch": 120.11, "learning_rate": 0.00014318367346938778, "loss": 0.1338, "step": 21500 }, { "epoch": 120.11, "eval_loss": 0.902350127696991, "eval_runtime": 188.1097, "eval_samples_per_second": 13.891, "eval_steps_per_second": 0.436, "eval_wer": 0.5418146081663708, "step": 21500 }, { "epoch": 120.67, "learning_rate": 0.00013910204081632655, "loss": 0.1248, "step": 21600 }, { "epoch": 121.23, "learning_rate": 0.0001350204081632653, "loss": 0.1267, "step": 21700 }, { "epoch": 121.79, "learning_rate": 0.00013093877551020408, "loss": 0.1282, "step": 21800 }, { "epoch": 122.35, "learning_rate": 0.00012685714285714286, "loss": 0.1284, "step": 21900 }, { "epoch": 122.91, "learning_rate": 0.00012277551020408164, "loss": 0.1278, "step": 22000 }, { "epoch": 122.91, "eval_loss": 0.9142583608627319, "eval_runtime": 187.6194, "eval_samples_per_second": 13.927, "eval_steps_per_second": 0.437, "eval_wer": 0.5408952574182094, "step": 22000 }, { "epoch": 123.46, "learning_rate": 0.00011869387755102041, "loss": 0.1292, "step": 22100 }, { "epoch": 124.02, "learning_rate": 0.00011461224489795918, "loss": 0.1201, "step": 22200 }, { "epoch": 124.58, "learning_rate": 0.00011053061224489797, "loss": 0.1249, "step": 22300 }, { "epoch": 125.14, "learning_rate": 0.00010644897959183673, "loss": 0.1251, "step": 22400 }, { "epoch": 125.7, "learning_rate": 0.00010236734693877551, "loss": 0.1207, "step": 22500 }, { "epoch": 125.7, "eval_loss": 0.8916703462600708, "eval_runtime": 187.4806, "eval_samples_per_second": 13.937, "eval_steps_per_second": 0.437, "eval_wer": 0.5357912756784174, "step": 22500 }, { "epoch": 126.26, "learning_rate": 9.828571428571429e-05, "loss": 0.1233, "step": 22600 }, { "epoch": 126.82, "learning_rate": 9.420408163265307e-05, "loss": 0.1217, "step": 22700 }, { "epoch": 127.37, "learning_rate": 9.012244897959183e-05, "loss": 0.1197, "step": 22800 }, { "epoch": 127.93, "learning_rate": 8.604081632653062e-05, "loss": 0.1167, "step": 22900 }, { "epoch": 128.49, "learning_rate": 8.195918367346938e-05, "loss": 0.1203, "step": 23000 }, { "epoch": 128.49, "eval_loss": 0.9041045904159546, "eval_runtime": 186.9588, "eval_samples_per_second": 13.976, "eval_steps_per_second": 0.439, "eval_wer": 0.5341110829317778, "step": 23000 }, { "epoch": 129.05, "learning_rate": 7.791836734693878e-05, "loss": 0.1199, "step": 23100 }, { "epoch": 129.61, "learning_rate": 7.383673469387756e-05, "loss": 0.1171, "step": 23200 }, { "epoch": 130.17, "learning_rate": 6.975510204081632e-05, "loss": 0.1136, "step": 23300 }, { "epoch": 130.73, "learning_rate": 6.567346938775511e-05, "loss": 0.1133, "step": 23400 }, { "epoch": 131.28, "learning_rate": 6.159183673469388e-05, "loss": 0.1083, "step": 23500 }, { "epoch": 131.28, "eval_loss": 0.8883697986602783, "eval_runtime": 186.6245, "eval_samples_per_second": 14.001, "eval_steps_per_second": 0.439, "eval_wer": 0.5340793811818413, "step": 23500 }, { "epoch": 131.84, "learning_rate": 5.751020408163265e-05, "loss": 0.115, "step": 23600 }, { "epoch": 132.4, "learning_rate": 5.342857142857143e-05, "loss": 0.1132, "step": 23700 }, { "epoch": 132.96, "learning_rate": 4.934693877551021e-05, "loss": 0.1111, "step": 23800 }, { "epoch": 133.52, "learning_rate": 4.526530612244898e-05, "loss": 0.1079, "step": 23900 }, { "epoch": 134.08, "learning_rate": 4.1183673469387756e-05, "loss": 0.1147, "step": 24000 }, { "epoch": 134.08, "eval_loss": 0.891002357006073, "eval_runtime": 186.8507, "eval_samples_per_second": 13.984, "eval_steps_per_second": 0.439, "eval_wer": 0.525456505199087, "step": 24000 }, { "epoch": 134.64, "learning_rate": 3.7102040816326533e-05, "loss": 0.1085, "step": 24100 }, { "epoch": 135.2, "learning_rate": 3.3020408163265304e-05, "loss": 0.1104, "step": 24200 }, { "epoch": 135.75, "learning_rate": 2.8938775510204082e-05, "loss": 0.1096, "step": 24300 }, { "epoch": 136.31, "learning_rate": 2.4857142857142856e-05, "loss": 0.1064, "step": 24400 }, { "epoch": 136.87, "learning_rate": 2.0775510204081633e-05, "loss": 0.1129, "step": 24500 }, { "epoch": 136.87, "eval_loss": 0.8826149106025696, "eval_runtime": 187.6626, "eval_samples_per_second": 13.924, "eval_steps_per_second": 0.437, "eval_wer": 0.52412503170175, "step": 24500 }, { "epoch": 137.43, "learning_rate": 1.669387755102041e-05, "loss": 0.1086, "step": 24600 }, { "epoch": 137.99, "learning_rate": 1.2612244897959185e-05, "loss": 0.1061, "step": 24700 }, { "epoch": 138.55, "learning_rate": 8.53061224489796e-06, "loss": 0.1104, "step": 24800 }, { "epoch": 139.11, "learning_rate": 4.448979591836735e-06, "loss": 0.1095, "step": 24900 }, { "epoch": 139.66, "learning_rate": 3.673469387755102e-07, "loss": 0.1029, "step": 25000 }, { "epoch": 139.66, "eval_loss": 0.882430911064148, "eval_runtime": 187.8168, "eval_samples_per_second": 13.912, "eval_steps_per_second": 0.437, "eval_wer": 0.5246322597007355, "step": 25000 }, { "epoch": 139.66, "step": 25000, "total_flos": 4.393390203049772e+20, "train_loss": 0.3391121254348755, "train_runtime": 108834.0383, "train_samples_per_second": 7.351, "train_steps_per_second": 0.23 } ], "max_steps": 25000, "num_train_epochs": 140, "total_flos": 4.393390203049772e+20, "trial_name": null, "trial_params": null }