{ "best_metric": 17.199108469539375, "best_model_checkpoint": "./checkpoint-16000", "epoch": 48.850746268656714, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 5.045361133432068e-07, "loss": 0.3353, "step": 25 }, { "epoch": 0.12, "learning_rate": 6.229195710491767e-07, "loss": 0.2594, "step": 50 }, { "epoch": 0.19, "learning_rate": 6.903829450223391e-07, "loss": 0.2769, "step": 75 }, { "epoch": 0.25, "learning_rate": 7.377725845391017e-07, "loss": 0.2178, "step": 100 }, { "epoch": 0.31, "learning_rate": 7.743343231239582e-07, "loss": 0.2481, "step": 125 }, { "epoch": 0.37, "learning_rate": 8.041073861170493e-07, "loss": 0.2296, "step": 150 }, { "epoch": 0.44, "learning_rate": 8.292222957399572e-07, "loss": 0.2074, "step": 175 }, { "epoch": 0.5, "learning_rate": 8.509413541357753e-07, "loss": 0.2068, "step": 200 }, { "epoch": 0.56, "learning_rate": 8.700744577655555e-07, "loss": 0.2374, "step": 225 }, { "epoch": 0.62, "learning_rate": 8.871723942761202e-07, "loss": 0.2112, "step": 250 }, { "epoch": 0.68, "learning_rate": 9.026267958246847e-07, "loss": 0.2249, "step": 275 }, { "epoch": 0.75, "learning_rate": 9.167261066633988e-07, "loss": 0.1879, "step": 300 }, { "epoch": 0.81, "learning_rate": 9.296889251455014e-07, "loss": 0.2049, "step": 325 }, { "epoch": 0.87, "learning_rate": 9.416848797368691e-07, "loss": 0.1943, "step": 350 }, { "epoch": 0.93, "learning_rate": 9.528482449516371e-07, "loss": 0.1973, "step": 375 }, { "epoch": 1.0, "learning_rate": 9.632871309784313e-07, "loss": 0.1909, "step": 400 }, { "epoch": 1.06, "learning_rate": 9.730898687853907e-07, "loss": 0.2019, "step": 425 }, { "epoch": 1.12, "learning_rate": 9.823295589572114e-07, "loss": 0.1806, "step": 450 }, { "epoch": 1.18, "learning_rate": 9.910673836465484e-07, "loss": 0.1779, "step": 475 }, { "epoch": 1.24, "learning_rate": 9.993550644973805e-07, "loss": 0.1745, "step": 500 }, { "epoch": 1.31, "learning_rate": 1e-06, "loss": 0.1509, "step": 525 }, { "epoch": 1.37, "learning_rate": 1e-06, "loss": 0.1918, "step": 550 }, { "epoch": 1.43, "learning_rate": 1e-06, "loss": 0.1796, "step": 575 }, { "epoch": 1.49, "learning_rate": 1e-06, "loss": 0.2007, "step": 600 }, { "epoch": 1.55, "learning_rate": 1e-06, "loss": 0.1637, "step": 625 }, { "epoch": 1.62, "learning_rate": 1e-06, "loss": 0.1617, "step": 650 }, { "epoch": 1.68, "learning_rate": 1e-06, "loss": 0.1556, "step": 675 }, { "epoch": 1.74, "learning_rate": 1e-06, "loss": 0.1857, "step": 700 }, { "epoch": 1.8, "learning_rate": 1e-06, "loss": 0.1498, "step": 725 }, { "epoch": 1.87, "learning_rate": 1e-06, "loss": 0.1563, "step": 750 }, { "epoch": 1.93, "learning_rate": 1e-06, "loss": 0.1568, "step": 775 }, { "epoch": 1.99, "learning_rate": 1e-06, "loss": 0.1693, "step": 800 }, { "epoch": 2.05, "learning_rate": 1e-06, "loss": 0.1776, "step": 825 }, { "epoch": 2.11, "learning_rate": 1e-06, "loss": 0.1594, "step": 850 }, { "epoch": 2.18, "learning_rate": 1e-06, "loss": 0.1356, "step": 875 }, { "epoch": 2.24, "learning_rate": 1e-06, "loss": 0.1629, "step": 900 }, { "epoch": 2.3, "learning_rate": 1e-06, "loss": 0.1655, "step": 925 }, { "epoch": 2.36, "learning_rate": 1e-06, "loss": 0.1624, "step": 950 }, { "epoch": 2.43, "learning_rate": 1e-06, "loss": 0.1435, "step": 975 }, { "epoch": 2.49, "learning_rate": 1e-06, "loss": 0.1259, "step": 1000 }, { "epoch": 2.49, "eval_loss": 0.4833984375, "eval_runtime": 168.2921, "eval_samples_per_second": 10.078, "eval_steps_per_second": 0.63, "eval_wer": 18.36924219910847, "step": 1000 }, { "epoch": 2.06, "learning_rate": 1e-06, "loss": 0.1452, "step": 1025 }, { "epoch": 2.12, "learning_rate": 1e-06, "loss": 0.1351, "step": 1050 }, { "epoch": 2.19, "learning_rate": 1e-06, "loss": 0.1642, "step": 1075 }, { "epoch": 2.25, "learning_rate": 1e-06, "loss": 0.1298, "step": 1100 }, { "epoch": 2.31, "learning_rate": 1e-06, "loss": 0.1491, "step": 1125 }, { "epoch": 2.37, "learning_rate": 1e-06, "loss": 0.143, "step": 1150 }, { "epoch": 2.44, "learning_rate": 1e-06, "loss": 0.1376, "step": 1175 }, { "epoch": 2.5, "learning_rate": 1e-06, "loss": 0.1408, "step": 1200 }, { "epoch": 2.56, "learning_rate": 1e-06, "loss": 0.161, "step": 1225 }, { "epoch": 2.62, "learning_rate": 1e-06, "loss": 0.1466, "step": 1250 }, { "epoch": 2.68, "learning_rate": 1e-06, "loss": 0.1535, "step": 1275 }, { "epoch": 2.75, "learning_rate": 1e-06, "loss": 0.1325, "step": 1300 }, { "epoch": 2.81, "learning_rate": 1e-06, "loss": 0.1426, "step": 1325 }, { "epoch": 2.87, "learning_rate": 1e-06, "loss": 0.1365, "step": 1350 }, { "epoch": 2.93, "learning_rate": 1e-06, "loss": 0.1407, "step": 1375 }, { "epoch": 3.0, "learning_rate": 1e-06, "loss": 0.141, "step": 1400 }, { "epoch": 3.06, "learning_rate": 1e-06, "loss": 0.1449, "step": 1425 }, { "epoch": 3.12, "learning_rate": 1e-06, "loss": 0.1335, "step": 1450 }, { "epoch": 3.18, "learning_rate": 1e-06, "loss": 0.1304, "step": 1475 }, { "epoch": 3.24, "learning_rate": 1e-06, "loss": 0.1277, "step": 1500 }, { "epoch": 3.31, "learning_rate": 1e-06, "loss": 0.1107, "step": 1525 }, { "epoch": 3.37, "learning_rate": 1e-06, "loss": 0.1462, "step": 1550 }, { "epoch": 3.43, "learning_rate": 1e-06, "loss": 0.1399, "step": 1575 }, { "epoch": 3.49, "learning_rate": 1e-06, "loss": 0.1422, "step": 1600 }, { "epoch": 3.55, "learning_rate": 1e-06, "loss": 0.1211, "step": 1625 }, { "epoch": 3.62, "learning_rate": 1e-06, "loss": 0.1231, "step": 1650 }, { "epoch": 3.68, "learning_rate": 1e-06, "loss": 0.1151, "step": 1675 }, { "epoch": 3.74, "learning_rate": 1e-06, "loss": 0.1436, "step": 1700 }, { "epoch": 3.8, "learning_rate": 1e-06, "loss": 0.1178, "step": 1725 }, { "epoch": 3.87, "learning_rate": 1e-06, "loss": 0.1201, "step": 1750 }, { "epoch": 3.93, "learning_rate": 1e-06, "loss": 0.1237, "step": 1775 }, { "epoch": 3.99, "learning_rate": 1e-06, "loss": 0.1331, "step": 1800 }, { "epoch": 4.05, "learning_rate": 1e-06, "loss": 0.1412, "step": 1825 }, { "epoch": 4.11, "learning_rate": 1e-06, "loss": 0.1262, "step": 1850 }, { "epoch": 4.18, "learning_rate": 1e-06, "loss": 0.1042, "step": 1875 }, { "epoch": 4.24, "learning_rate": 1e-06, "loss": 0.1227, "step": 1900 }, { "epoch": 4.3, "learning_rate": 1e-06, "loss": 0.126, "step": 1925 }, { "epoch": 4.36, "learning_rate": 1e-06, "loss": 0.1301, "step": 1950 }, { "epoch": 4.43, "learning_rate": 1e-06, "loss": 0.1089, "step": 1975 }, { "epoch": 4.49, "learning_rate": 1e-06, "loss": 0.1002, "step": 2000 }, { "epoch": 4.49, "eval_loss": 0.46044921875, "eval_runtime": 168.0874, "eval_samples_per_second": 10.09, "eval_steps_per_second": 0.631, "eval_wer": 17.802748885586926, "step": 2000 }, { "epoch": 4.55, "learning_rate": 1e-06, "loss": 0.1021, "step": 2025 }, { "epoch": 4.61, "learning_rate": 1e-06, "loss": 0.1275, "step": 2050 }, { "epoch": 4.67, "learning_rate": 1e-06, "loss": 0.1281, "step": 2075 }, { "epoch": 4.74, "learning_rate": 1e-06, "loss": 0.131, "step": 2100 }, { "epoch": 4.8, "learning_rate": 1e-06, "loss": 0.107, "step": 2125 }, { "epoch": 4.86, "learning_rate": 1e-06, "loss": 0.1257, "step": 2150 }, { "epoch": 4.92, "learning_rate": 1e-06, "loss": 0.1182, "step": 2175 }, { "epoch": 4.99, "learning_rate": 1e-06, "loss": 0.1213, "step": 2200 }, { "epoch": 5.05, "learning_rate": 1e-06, "loss": 0.1106, "step": 2225 }, { "epoch": 5.11, "learning_rate": 1e-06, "loss": 0.1111, "step": 2250 }, { "epoch": 5.17, "learning_rate": 1e-06, "loss": 0.1192, "step": 2275 }, { "epoch": 5.23, "learning_rate": 1e-06, "loss": 0.1076, "step": 2300 }, { "epoch": 5.3, "learning_rate": 1e-06, "loss": 0.1187, "step": 2325 }, { "epoch": 5.36, "learning_rate": 1e-06, "loss": 0.12, "step": 2350 }, { "epoch": 5.42, "learning_rate": 1e-06, "loss": 0.1053, "step": 2375 }, { "epoch": 5.48, "learning_rate": 1e-06, "loss": 0.106, "step": 2400 }, { "epoch": 5.54, "learning_rate": 1e-06, "loss": 0.1108, "step": 2425 }, { "epoch": 5.61, "learning_rate": 1e-06, "loss": 0.1278, "step": 2450 }, { "epoch": 5.67, "learning_rate": 1e-06, "loss": 0.1233, "step": 2475 }, { "epoch": 5.73, "learning_rate": 1e-06, "loss": 0.1041, "step": 2500 }, { "epoch": 5.79, "learning_rate": 1e-06, "loss": 0.1018, "step": 2525 }, { "epoch": 5.86, "learning_rate": 1e-06, "loss": 0.0957, "step": 2550 }, { "epoch": 5.92, "learning_rate": 1e-06, "loss": 0.1136, "step": 2575 }, { "epoch": 5.98, "learning_rate": 1e-06, "loss": 0.1133, "step": 2600 }, { "epoch": 6.04, "learning_rate": 1e-06, "loss": 0.1157, "step": 2625 }, { "epoch": 6.1, "learning_rate": 1e-06, "loss": 0.1011, "step": 2650 }, { "epoch": 6.17, "learning_rate": 1e-06, "loss": 0.0926, "step": 2675 }, { "epoch": 6.23, "learning_rate": 1e-06, "loss": 0.1116, "step": 2700 }, { "epoch": 6.29, "learning_rate": 1e-06, "loss": 0.0931, "step": 2725 }, { "epoch": 6.35, "learning_rate": 1e-06, "loss": 0.098, "step": 2750 }, { "epoch": 6.42, "learning_rate": 1e-06, "loss": 0.0973, "step": 2775 }, { "epoch": 6.48, "learning_rate": 1e-06, "loss": 0.1092, "step": 2800 }, { "epoch": 6.54, "learning_rate": 1e-06, "loss": 0.1118, "step": 2825 }, { "epoch": 6.6, "learning_rate": 1e-06, "loss": 0.1032, "step": 2850 }, { "epoch": 6.66, "learning_rate": 1e-06, "loss": 0.098, "step": 2875 }, { "epoch": 6.73, "learning_rate": 1e-06, "loss": 0.1111, "step": 2900 }, { "epoch": 6.79, "learning_rate": 1e-06, "loss": 0.1102, "step": 2925 }, { "epoch": 6.85, "learning_rate": 1e-06, "loss": 0.094, "step": 2950 }, { "epoch": 6.91, "learning_rate": 1e-06, "loss": 0.1086, "step": 2975 }, { "epoch": 6.98, "learning_rate": 1e-06, "loss": 0.1096, "step": 3000 }, { "epoch": 6.98, "eval_loss": 0.455322265625, "eval_runtime": 167.3332, "eval_samples_per_second": 10.135, "eval_steps_per_second": 0.633, "eval_wer": 17.87704309063893, "step": 3000 }, { "epoch": 7.04, "learning_rate": 1e-06, "loss": 0.0979, "step": 3025 }, { "epoch": 7.1, "learning_rate": 1e-06, "loss": 0.0971, "step": 3050 }, { "epoch": 7.16, "learning_rate": 1e-06, "loss": 0.1025, "step": 3075 }, { "epoch": 7.22, "learning_rate": 1e-06, "loss": 0.0848, "step": 3100 }, { "epoch": 7.29, "learning_rate": 1e-06, "loss": 0.1134, "step": 3125 }, { "epoch": 7.35, "learning_rate": 1e-06, "loss": 0.0928, "step": 3150 }, { "epoch": 7.41, "learning_rate": 1e-06, "loss": 0.0959, "step": 3175 }, { "epoch": 7.47, "learning_rate": 1e-06, "loss": 0.0994, "step": 3200 }, { "epoch": 7.53, "learning_rate": 1e-06, "loss": 0.1094, "step": 3225 }, { "epoch": 7.6, "learning_rate": 1e-06, "loss": 0.0954, "step": 3250 }, { "epoch": 7.66, "learning_rate": 1e-06, "loss": 0.0914, "step": 3275 }, { "epoch": 7.72, "learning_rate": 1e-06, "loss": 0.0901, "step": 3300 }, { "epoch": 7.78, "learning_rate": 1e-06, "loss": 0.1004, "step": 3325 }, { "epoch": 7.85, "learning_rate": 1e-06, "loss": 0.0985, "step": 3350 }, { "epoch": 7.91, "learning_rate": 1e-06, "loss": 0.111, "step": 3375 }, { "epoch": 7.97, "learning_rate": 1e-06, "loss": 0.0901, "step": 3400 }, { "epoch": 8.03, "learning_rate": 1e-06, "loss": 0.0897, "step": 3425 }, { "epoch": 8.09, "learning_rate": 1e-06, "loss": 0.0865, "step": 3450 }, { "epoch": 8.16, "learning_rate": 1e-06, "loss": 0.0944, "step": 3475 }, { "epoch": 8.22, "learning_rate": 1e-06, "loss": 0.1072, "step": 3500 }, { "epoch": 8.28, "learning_rate": 1e-06, "loss": 0.0896, "step": 3525 }, { "epoch": 8.34, "learning_rate": 1e-06, "loss": 0.0963, "step": 3550 }, { "epoch": 8.41, "learning_rate": 1e-06, "loss": 0.0897, "step": 3575 }, { "epoch": 8.47, "learning_rate": 1e-06, "loss": 0.0806, "step": 3600 }, { "epoch": 8.53, "learning_rate": 1e-06, "loss": 0.1005, "step": 3625 }, { "epoch": 8.59, "learning_rate": 1e-06, "loss": 0.083, "step": 3650 }, { "epoch": 8.65, "learning_rate": 1e-06, "loss": 0.0957, "step": 3675 }, { "epoch": 8.72, "learning_rate": 1e-06, "loss": 0.0897, "step": 3700 }, { "epoch": 8.78, "learning_rate": 1e-06, "loss": 0.1062, "step": 3725 }, { "epoch": 8.84, "learning_rate": 1e-06, "loss": 0.0846, "step": 3750 }, { "epoch": 8.9, "learning_rate": 1e-06, "loss": 0.0937, "step": 3775 }, { "epoch": 8.97, "learning_rate": 1e-06, "loss": 0.0826, "step": 3800 }, { "epoch": 9.03, "learning_rate": 1e-06, "loss": 0.0857, "step": 3825 }, { "epoch": 9.09, "learning_rate": 1e-06, "loss": 0.0873, "step": 3850 }, { "epoch": 9.15, "learning_rate": 1e-06, "loss": 0.099, "step": 3875 }, { "epoch": 9.21, "learning_rate": 1e-06, "loss": 0.0895, "step": 3900 }, { "epoch": 9.28, "learning_rate": 1e-06, "loss": 0.0841, "step": 3925 }, { "epoch": 9.34, "learning_rate": 1e-06, "loss": 0.0771, "step": 3950 }, { "epoch": 9.4, "learning_rate": 1e-06, "loss": 0.0903, "step": 3975 }, { "epoch": 9.46, "learning_rate": 1e-06, "loss": 0.0885, "step": 4000 }, { "epoch": 9.46, "eval_loss": 0.455078125, "eval_runtime": 168.2379, "eval_samples_per_second": 10.081, "eval_steps_per_second": 0.63, "eval_wer": 17.960624071322435, "step": 4000 }, { "epoch": 9.52, "learning_rate": 1e-06, "loss": 0.0936, "step": 4025 }, { "epoch": 9.59, "learning_rate": 1e-06, "loss": 0.0765, "step": 4050 }, { "epoch": 9.65, "learning_rate": 1e-06, "loss": 0.0946, "step": 4075 }, { "epoch": 9.71, "learning_rate": 1e-06, "loss": 0.0856, "step": 4100 }, { "epoch": 9.77, "learning_rate": 1e-06, "loss": 0.0928, "step": 4125 }, { "epoch": 9.84, "learning_rate": 1e-06, "loss": 0.0843, "step": 4150 }, { "epoch": 9.9, "learning_rate": 1e-06, "loss": 0.0824, "step": 4175 }, { "epoch": 9.96, "learning_rate": 1e-06, "loss": 0.0947, "step": 4200 }, { "epoch": 10.02, "learning_rate": 1e-06, "loss": 0.0724, "step": 4225 }, { "epoch": 10.08, "learning_rate": 1e-06, "loss": 0.086, "step": 4250 }, { "epoch": 10.15, "learning_rate": 1e-06, "loss": 0.085, "step": 4275 }, { "epoch": 10.21, "learning_rate": 1e-06, "loss": 0.0894, "step": 4300 }, { "epoch": 10.27, "learning_rate": 1e-06, "loss": 0.0889, "step": 4325 }, { "epoch": 10.33, "learning_rate": 1e-06, "loss": 0.0753, "step": 4350 }, { "epoch": 10.4, "learning_rate": 1e-06, "loss": 0.0767, "step": 4375 }, { "epoch": 10.46, "learning_rate": 1e-06, "loss": 0.0844, "step": 4400 }, { "epoch": 10.52, "learning_rate": 1e-06, "loss": 0.0781, "step": 4425 }, { "epoch": 10.58, "learning_rate": 1e-06, "loss": 0.0838, "step": 4450 }, { "epoch": 10.64, "learning_rate": 1e-06, "loss": 0.0793, "step": 4475 }, { "epoch": 10.71, "learning_rate": 1e-06, "loss": 0.0863, "step": 4500 }, { "epoch": 10.77, "learning_rate": 1e-06, "loss": 0.0747, "step": 4525 }, { "epoch": 10.83, "learning_rate": 1e-06, "loss": 0.0853, "step": 4550 }, { "epoch": 10.89, "learning_rate": 1e-06, "loss": 0.0798, "step": 4575 }, { "epoch": 10.96, "learning_rate": 1e-06, "loss": 0.0758, "step": 4600 }, { "epoch": 11.02, "learning_rate": 1e-06, "loss": 0.0873, "step": 4625 }, { "epoch": 11.08, "learning_rate": 1e-06, "loss": 0.0795, "step": 4650 }, { "epoch": 11.14, "learning_rate": 1e-06, "loss": 0.0859, "step": 4675 }, { "epoch": 11.2, "learning_rate": 1e-06, "loss": 0.0747, "step": 4700 }, { "epoch": 11.27, "learning_rate": 1e-06, "loss": 0.078, "step": 4725 }, { "epoch": 11.33, "learning_rate": 1e-06, "loss": 0.0705, "step": 4750 }, { "epoch": 11.39, "learning_rate": 1e-06, "loss": 0.0829, "step": 4775 }, { "epoch": 11.45, "learning_rate": 1e-06, "loss": 0.0793, "step": 4800 }, { "epoch": 11.51, "learning_rate": 1e-06, "loss": 0.0806, "step": 4825 }, { "epoch": 11.58, "learning_rate": 1e-06, "loss": 0.0932, "step": 4850 }, { "epoch": 11.64, "learning_rate": 1e-06, "loss": 0.0699, "step": 4875 }, { "epoch": 11.7, "learning_rate": 1e-06, "loss": 0.0749, "step": 4900 }, { "epoch": 11.76, "learning_rate": 1e-06, "loss": 0.0814, "step": 4925 }, { "epoch": 11.83, "learning_rate": 1e-06, "loss": 0.085, "step": 4950 }, { "epoch": 11.89, "learning_rate": 1e-06, "loss": 0.0767, "step": 4975 }, { "epoch": 11.95, "learning_rate": 1e-06, "loss": 0.0675, "step": 5000 }, { "epoch": 11.95, "eval_loss": 0.463134765625, "eval_runtime": 167.4714, "eval_samples_per_second": 10.127, "eval_steps_per_second": 0.633, "eval_wer": 17.904903417533433, "step": 5000 }, { "epoch": 12.01, "learning_rate": 1e-06, "loss": 0.0769, "step": 5025 }, { "epoch": 12.07, "learning_rate": 1e-06, "loss": 0.0795, "step": 5050 }, { "epoch": 12.14, "learning_rate": 1e-06, "loss": 0.0744, "step": 5075 }, { "epoch": 12.2, "learning_rate": 1e-06, "loss": 0.0789, "step": 5100 }, { "epoch": 12.26, "learning_rate": 1e-06, "loss": 0.0672, "step": 5125 }, { "epoch": 12.32, "learning_rate": 1e-06, "loss": 0.072, "step": 5150 }, { "epoch": 12.39, "learning_rate": 1e-06, "loss": 0.0677, "step": 5175 }, { "epoch": 12.45, "learning_rate": 1e-06, "loss": 0.068, "step": 5200 }, { "epoch": 12.51, "learning_rate": 1e-06, "loss": 0.0646, "step": 5225 }, { "epoch": 12.57, "learning_rate": 1e-06, "loss": 0.0703, "step": 5250 }, { "epoch": 12.63, "learning_rate": 1e-06, "loss": 0.078, "step": 5275 }, { "epoch": 12.7, "learning_rate": 1e-06, "loss": 0.0714, "step": 5300 }, { "epoch": 12.76, "learning_rate": 1e-06, "loss": 0.0723, "step": 5325 }, { "epoch": 12.82, "learning_rate": 1e-06, "loss": 0.0871, "step": 5350 }, { "epoch": 12.88, "learning_rate": 1e-06, "loss": 0.0757, "step": 5375 }, { "epoch": 12.95, "learning_rate": 1e-06, "loss": 0.0835, "step": 5400 }, { "epoch": 13.01, "learning_rate": 1e-06, "loss": 0.0727, "step": 5425 }, { "epoch": 13.07, "learning_rate": 1e-06, "loss": 0.081, "step": 5450 }, { "epoch": 13.13, "learning_rate": 1e-06, "loss": 0.0642, "step": 5475 }, { "epoch": 13.19, "learning_rate": 1e-06, "loss": 0.066, "step": 5500 }, { "epoch": 13.26, "learning_rate": 1e-06, "loss": 0.0746, "step": 5525 }, { "epoch": 13.32, "learning_rate": 1e-06, "loss": 0.0725, "step": 5550 }, { "epoch": 13.38, "learning_rate": 1e-06, "loss": 0.0767, "step": 5575 }, { "epoch": 13.44, "learning_rate": 1e-06, "loss": 0.0731, "step": 5600 }, { "epoch": 13.5, "learning_rate": 1e-06, "loss": 0.0648, "step": 5625 }, { "epoch": 13.57, "learning_rate": 1e-06, "loss": 0.0686, "step": 5650 }, { "epoch": 13.63, "learning_rate": 1e-06, "loss": 0.0691, "step": 5675 }, { "epoch": 13.69, "learning_rate": 1e-06, "loss": 0.0787, "step": 5700 }, { "epoch": 13.75, "learning_rate": 1e-06, "loss": 0.0648, "step": 5725 }, { "epoch": 13.82, "learning_rate": 1e-06, "loss": 0.0659, "step": 5750 }, { "epoch": 13.88, "learning_rate": 1e-06, "loss": 0.0769, "step": 5775 }, { "epoch": 13.94, "learning_rate": 1e-06, "loss": 0.0596, "step": 5800 }, { "epoch": 14.0, "learning_rate": 1e-06, "loss": 0.0637, "step": 5825 }, { "epoch": 14.06, "learning_rate": 1e-06, "loss": 0.073, "step": 5850 }, { "epoch": 14.13, "learning_rate": 1e-06, "loss": 0.0579, "step": 5875 }, { "epoch": 14.19, "learning_rate": 1e-06, "loss": 0.07, "step": 5900 }, { "epoch": 14.25, "learning_rate": 1e-06, "loss": 0.0683, "step": 5925 }, { "epoch": 14.31, "learning_rate": 1e-06, "loss": 0.0585, "step": 5950 }, { "epoch": 14.38, "learning_rate": 1e-06, "loss": 0.069, "step": 5975 }, { "epoch": 14.44, "learning_rate": 1e-06, "loss": 0.0675, "step": 6000 }, { "epoch": 14.44, "eval_loss": 0.4619140625, "eval_runtime": 167.54, "eval_samples_per_second": 10.123, "eval_steps_per_second": 0.633, "eval_wer": 17.904903417533433, "step": 6000 }, { "epoch": 14.5, "learning_rate": 1e-06, "loss": 0.0641, "step": 6025 }, { "epoch": 14.56, "learning_rate": 1e-06, "loss": 0.0646, "step": 6050 }, { "epoch": 14.62, "learning_rate": 1e-06, "loss": 0.0718, "step": 6075 }, { "epoch": 14.69, "learning_rate": 1e-06, "loss": 0.0644, "step": 6100 }, { "epoch": 14.75, "learning_rate": 1e-06, "loss": 0.0758, "step": 6125 }, { "epoch": 14.81, "learning_rate": 1e-06, "loss": 0.0598, "step": 6150 }, { "epoch": 14.87, "learning_rate": 1e-06, "loss": 0.0747, "step": 6175 }, { "epoch": 14.94, "learning_rate": 1e-06, "loss": 0.0604, "step": 6200 }, { "epoch": 15.0, "learning_rate": 1e-06, "loss": 0.0657, "step": 6225 }, { "epoch": 15.06, "learning_rate": 1e-06, "loss": 0.0584, "step": 6250 }, { "epoch": 15.12, "learning_rate": 1e-06, "loss": 0.0646, "step": 6275 }, { "epoch": 15.18, "learning_rate": 1e-06, "loss": 0.0597, "step": 6300 }, { "epoch": 15.25, "learning_rate": 1e-06, "loss": 0.0609, "step": 6325 }, { "epoch": 15.31, "learning_rate": 1e-06, "loss": 0.059, "step": 6350 }, { "epoch": 15.37, "learning_rate": 1e-06, "loss": 0.0605, "step": 6375 }, { "epoch": 15.43, "learning_rate": 1e-06, "loss": 0.0637, "step": 6400 }, { "epoch": 15.5, "learning_rate": 1e-06, "loss": 0.051, "step": 6425 }, { "epoch": 15.56, "learning_rate": 1e-06, "loss": 0.0607, "step": 6450 }, { "epoch": 15.62, "learning_rate": 1e-06, "loss": 0.068, "step": 6475 }, { "epoch": 15.68, "learning_rate": 1e-06, "loss": 0.0613, "step": 6500 }, { "epoch": 15.74, "learning_rate": 1e-06, "loss": 0.0701, "step": 6525 }, { "epoch": 15.81, "learning_rate": 1e-06, "loss": 0.0656, "step": 6550 }, { "epoch": 15.87, "learning_rate": 1e-06, "loss": 0.0665, "step": 6575 }, { "epoch": 15.93, "learning_rate": 1e-06, "loss": 0.0769, "step": 6600 }, { "epoch": 15.99, "learning_rate": 1e-06, "loss": 0.0629, "step": 6625 }, { "epoch": 16.05, "learning_rate": 1e-06, "loss": 0.0569, "step": 6650 }, { "epoch": 16.12, "learning_rate": 1e-06, "loss": 0.0587, "step": 6675 }, { "epoch": 16.18, "learning_rate": 1e-06, "loss": 0.0597, "step": 6700 }, { "epoch": 16.24, "learning_rate": 1e-06, "loss": 0.059, "step": 6725 }, { "epoch": 16.3, "learning_rate": 1e-06, "loss": 0.0556, "step": 6750 }, { "epoch": 16.37, "learning_rate": 1e-06, "loss": 0.0596, "step": 6775 }, { "epoch": 16.43, "learning_rate": 1e-06, "loss": 0.0598, "step": 6800 }, { "epoch": 16.49, "learning_rate": 1e-06, "loss": 0.0606, "step": 6825 }, { "epoch": 16.55, "learning_rate": 1e-06, "loss": 0.0616, "step": 6850 }, { "epoch": 16.61, "learning_rate": 1e-06, "loss": 0.0547, "step": 6875 }, { "epoch": 16.68, "learning_rate": 1e-06, "loss": 0.0564, "step": 6900 }, { "epoch": 16.74, "learning_rate": 1e-06, "loss": 0.0623, "step": 6925 }, { "epoch": 16.8, "learning_rate": 1e-06, "loss": 0.0597, "step": 6950 }, { "epoch": 16.86, "learning_rate": 1e-06, "loss": 0.0666, "step": 6975 }, { "epoch": 16.93, "learning_rate": 1e-06, "loss": 0.0645, "step": 7000 }, { "epoch": 16.93, "eval_loss": 0.4677734375, "eval_runtime": 167.4906, "eval_samples_per_second": 10.126, "eval_steps_per_second": 0.633, "eval_wer": 17.672734026745914, "step": 7000 }, { "epoch": 16.99, "learning_rate": 1e-06, "loss": 0.0557, "step": 7025 }, { "epoch": 17.05, "learning_rate": 1e-06, "loss": 0.0678, "step": 7050 }, { "epoch": 17.11, "learning_rate": 1e-06, "loss": 0.0547, "step": 7075 }, { "epoch": 17.17, "learning_rate": 1e-06, "loss": 0.0533, "step": 7100 }, { "epoch": 17.24, "learning_rate": 1e-06, "loss": 0.0604, "step": 7125 }, { "epoch": 17.3, "learning_rate": 1e-06, "loss": 0.0587, "step": 7150 }, { "epoch": 17.36, "learning_rate": 1e-06, "loss": 0.0616, "step": 7175 }, { "epoch": 17.42, "learning_rate": 1e-06, "loss": 0.0585, "step": 7200 }, { "epoch": 17.49, "learning_rate": 1e-06, "loss": 0.0638, "step": 7225 }, { "epoch": 17.55, "learning_rate": 1e-06, "loss": 0.0586, "step": 7250 }, { "epoch": 17.61, "learning_rate": 1e-06, "loss": 0.0457, "step": 7275 }, { "epoch": 17.67, "learning_rate": 1e-06, "loss": 0.0531, "step": 7300 }, { "epoch": 17.73, "learning_rate": 1e-06, "loss": 0.0607, "step": 7325 }, { "epoch": 17.8, "learning_rate": 1e-06, "loss": 0.0564, "step": 7350 }, { "epoch": 17.86, "learning_rate": 1e-06, "loss": 0.0586, "step": 7375 }, { "epoch": 17.92, "learning_rate": 1e-06, "loss": 0.0647, "step": 7400 }, { "epoch": 17.98, "learning_rate": 1e-06, "loss": 0.0554, "step": 7425 }, { "epoch": 18.04, "learning_rate": 1e-06, "loss": 0.0558, "step": 7450 }, { "epoch": 18.11, "learning_rate": 1e-06, "loss": 0.0517, "step": 7475 }, { "epoch": 18.17, "learning_rate": 1e-06, "loss": 0.0507, "step": 7500 }, { "epoch": 18.23, "learning_rate": 1e-06, "loss": 0.063, "step": 7525 }, { "epoch": 18.29, "learning_rate": 1e-06, "loss": 0.0533, "step": 7550 }, { "epoch": 18.36, "learning_rate": 1e-06, "loss": 0.0666, "step": 7575 }, { "epoch": 18.42, "learning_rate": 1e-06, "loss": 0.0561, "step": 7600 }, { "epoch": 18.48, "learning_rate": 1e-06, "loss": 0.0545, "step": 7625 }, { "epoch": 18.54, "learning_rate": 1e-06, "loss": 0.0522, "step": 7650 }, { "epoch": 18.6, "learning_rate": 1e-06, "loss": 0.0527, "step": 7675 }, { "epoch": 18.67, "learning_rate": 1e-06, "loss": 0.049, "step": 7700 }, { "epoch": 18.73, "learning_rate": 1e-06, "loss": 0.0546, "step": 7725 }, { "epoch": 18.79, "learning_rate": 1e-06, "loss": 0.058, "step": 7750 }, { "epoch": 18.85, "learning_rate": 1e-06, "loss": 0.048, "step": 7775 }, { "epoch": 18.92, "learning_rate": 1e-06, "loss": 0.0585, "step": 7800 }, { "epoch": 18.98, "learning_rate": 1e-06, "loss": 0.0575, "step": 7825 }, { "epoch": 19.04, "learning_rate": 1e-06, "loss": 0.0629, "step": 7850 }, { "epoch": 19.1, "learning_rate": 1e-06, "loss": 0.0531, "step": 7875 }, { "epoch": 19.16, "learning_rate": 1e-06, "loss": 0.0581, "step": 7900 }, { "epoch": 19.23, "learning_rate": 1e-06, "loss": 0.0508, "step": 7925 }, { "epoch": 19.29, "learning_rate": 1e-06, "loss": 0.0531, "step": 7950 }, { "epoch": 19.35, "learning_rate": 1e-06, "loss": 0.0444, "step": 7975 }, { "epoch": 19.41, "learning_rate": 1e-06, "loss": 0.0535, "step": 8000 }, { "epoch": 19.41, "eval_loss": 0.468505859375, "eval_runtime": 167.6281, "eval_samples_per_second": 10.118, "eval_steps_per_second": 0.632, "eval_wer": 17.663447251114412, "step": 8000 }, { "epoch": 19.06, "learning_rate": 1e-06, "loss": 0.0499, "step": 8025 }, { "epoch": 19.12, "learning_rate": 1e-06, "loss": 0.0509, "step": 8050 }, { "epoch": 19.19, "learning_rate": 1e-06, "loss": 0.057, "step": 8075 }, { "epoch": 19.25, "learning_rate": 1e-06, "loss": 0.0482, "step": 8100 }, { "epoch": 19.31, "learning_rate": 1e-06, "loss": 0.0537, "step": 8125 }, { "epoch": 19.37, "learning_rate": 1e-06, "loss": 0.0534, "step": 8150 }, { "epoch": 19.44, "learning_rate": 1e-06, "loss": 0.0506, "step": 8175 }, { "epoch": 19.5, "learning_rate": 1e-06, "loss": 0.0496, "step": 8200 }, { "epoch": 19.56, "learning_rate": 1e-06, "loss": 0.0569, "step": 8225 }, { "epoch": 19.62, "learning_rate": 1e-06, "loss": 0.054, "step": 8250 }, { "epoch": 19.68, "learning_rate": 1e-06, "loss": 0.0568, "step": 8275 }, { "epoch": 19.75, "learning_rate": 1e-06, "loss": 0.048, "step": 8300 }, { "epoch": 19.81, "learning_rate": 1e-06, "loss": 0.0514, "step": 8325 }, { "epoch": 19.87, "learning_rate": 1e-06, "loss": 0.0506, "step": 8350 }, { "epoch": 19.93, "learning_rate": 1e-06, "loss": 0.055, "step": 8375 }, { "epoch": 20.0, "learning_rate": 1e-06, "loss": 0.0497, "step": 8400 }, { "epoch": 20.06, "learning_rate": 1e-06, "loss": 0.0547, "step": 8425 }, { "epoch": 20.12, "learning_rate": 1e-06, "loss": 0.0474, "step": 8450 }, { "epoch": 20.18, "learning_rate": 1e-06, "loss": 0.0473, "step": 8475 }, { "epoch": 20.24, "learning_rate": 1e-06, "loss": 0.0474, "step": 8500 }, { "epoch": 20.31, "learning_rate": 1e-06, "loss": 0.0451, "step": 8525 }, { "epoch": 20.37, "learning_rate": 1e-06, "loss": 0.0552, "step": 8550 }, { "epoch": 20.43, "learning_rate": 1e-06, "loss": 0.0525, "step": 8575 }, { "epoch": 20.49, "learning_rate": 1e-06, "loss": 0.0533, "step": 8600 }, { "epoch": 20.55, "learning_rate": 1e-06, "loss": 0.0448, "step": 8625 }, { "epoch": 20.62, "learning_rate": 1e-06, "loss": 0.0458, "step": 8650 }, { "epoch": 20.68, "learning_rate": 1e-06, "loss": 0.0459, "step": 8675 }, { "epoch": 20.74, "learning_rate": 1e-06, "loss": 0.0561, "step": 8700 }, { "epoch": 20.8, "learning_rate": 1e-06, "loss": 0.0465, "step": 8725 }, { "epoch": 20.87, "learning_rate": 1e-06, "loss": 0.0482, "step": 8750 }, { "epoch": 20.93, "learning_rate": 1e-06, "loss": 0.0483, "step": 8775 }, { "epoch": 20.99, "learning_rate": 1e-06, "loss": 0.0505, "step": 8800 }, { "epoch": 21.05, "learning_rate": 1e-06, "loss": 0.0542, "step": 8825 }, { "epoch": 21.11, "learning_rate": 1e-06, "loss": 0.0501, "step": 8850 }, { "epoch": 21.18, "learning_rate": 1e-06, "loss": 0.0437, "step": 8875 }, { "epoch": 21.24, "learning_rate": 1e-06, "loss": 0.0495, "step": 8900 }, { "epoch": 21.3, "learning_rate": 1e-06, "loss": 0.0504, "step": 8925 }, { "epoch": 21.36, "learning_rate": 1e-06, "loss": 0.0492, "step": 8950 }, { "epoch": 21.43, "learning_rate": 1e-06, "loss": 0.0456, "step": 8975 }, { "epoch": 21.49, "learning_rate": 1e-06, "loss": 0.039, "step": 9000 }, { "epoch": 21.49, "eval_loss": 0.474609375, "eval_runtime": 166.7783, "eval_samples_per_second": 10.169, "eval_steps_per_second": 0.636, "eval_wer": 17.672734026745914, "step": 9000 }, { "epoch": 21.55, "learning_rate": 1e-06, "loss": 0.0418, "step": 9025 }, { "epoch": 21.61, "learning_rate": 1e-06, "loss": 0.0516, "step": 9050 }, { "epoch": 21.67, "learning_rate": 1e-06, "loss": 0.0515, "step": 9075 }, { "epoch": 21.74, "learning_rate": 1e-06, "loss": 0.0494, "step": 9100 }, { "epoch": 21.8, "learning_rate": 1e-06, "loss": 0.0426, "step": 9125 }, { "epoch": 21.86, "learning_rate": 1e-06, "loss": 0.0511, "step": 9150 }, { "epoch": 21.92, "learning_rate": 1e-06, "loss": 0.0491, "step": 9175 }, { "epoch": 21.99, "learning_rate": 1e-06, "loss": 0.0479, "step": 9200 }, { "epoch": 22.05, "learning_rate": 1e-06, "loss": 0.0434, "step": 9225 }, { "epoch": 22.11, "learning_rate": 1e-06, "loss": 0.0447, "step": 9250 }, { "epoch": 22.17, "learning_rate": 1e-06, "loss": 0.0497, "step": 9275 }, { "epoch": 22.23, "learning_rate": 1e-06, "loss": 0.0418, "step": 9300 }, { "epoch": 22.3, "learning_rate": 1e-06, "loss": 0.0487, "step": 9325 }, { "epoch": 22.36, "learning_rate": 1e-06, "loss": 0.0507, "step": 9350 }, { "epoch": 22.42, "learning_rate": 1e-06, "loss": 0.0416, "step": 9375 }, { "epoch": 22.48, "learning_rate": 1e-06, "loss": 0.0433, "step": 9400 }, { "epoch": 22.54, "learning_rate": 1e-06, "loss": 0.0484, "step": 9425 }, { "epoch": 22.61, "learning_rate": 1e-06, "loss": 0.0499, "step": 9450 }, { "epoch": 22.67, "learning_rate": 1e-06, "loss": 0.0517, "step": 9475 }, { "epoch": 22.73, "learning_rate": 1e-06, "loss": 0.042, "step": 9500 }, { "epoch": 22.79, "learning_rate": 1e-06, "loss": 0.0419, "step": 9525 }, { "epoch": 22.86, "learning_rate": 1e-06, "loss": 0.0398, "step": 9550 }, { "epoch": 22.92, "learning_rate": 1e-06, "loss": 0.0452, "step": 9575 }, { "epoch": 22.98, "learning_rate": 1e-06, "loss": 0.0473, "step": 9600 }, { "epoch": 23.04, "learning_rate": 1e-06, "loss": 0.049, "step": 9625 }, { "epoch": 23.1, "learning_rate": 1e-06, "loss": 0.0422, "step": 9650 }, { "epoch": 23.17, "learning_rate": 1e-06, "loss": 0.0371, "step": 9675 }, { "epoch": 23.23, "learning_rate": 1e-06, "loss": 0.0455, "step": 9700 }, { "epoch": 23.29, "learning_rate": 1e-06, "loss": 0.0402, "step": 9725 }, { "epoch": 23.35, "learning_rate": 1e-06, "loss": 0.042, "step": 9750 }, { "epoch": 23.42, "learning_rate": 1e-06, "loss": 0.0454, "step": 9775 }, { "epoch": 23.48, "learning_rate": 1e-06, "loss": 0.0429, "step": 9800 }, { "epoch": 23.54, "learning_rate": 1e-06, "loss": 0.0468, "step": 9825 }, { "epoch": 23.6, "learning_rate": 1e-06, "loss": 0.0416, "step": 9850 }, { "epoch": 23.66, "learning_rate": 1e-06, "loss": 0.0445, "step": 9875 }, { "epoch": 23.73, "learning_rate": 1e-06, "loss": 0.0476, "step": 9900 }, { "epoch": 23.79, "learning_rate": 1e-06, "loss": 0.0446, "step": 9925 }, { "epoch": 23.85, "learning_rate": 1e-06, "loss": 0.0407, "step": 9950 }, { "epoch": 23.91, "learning_rate": 1e-06, "loss": 0.0453, "step": 9975 }, { "epoch": 23.98, "learning_rate": 1e-06, "loss": 0.0447, "step": 10000 }, { "epoch": 23.98, "eval_loss": 0.47607421875, "eval_runtime": 167.3024, "eval_samples_per_second": 10.137, "eval_steps_per_second": 0.634, "eval_wer": 17.663447251114412, "step": 10000 }, { "epoch": 24.04, "learning_rate": 1e-06, "loss": 0.0424, "step": 10025 }, { "epoch": 24.1, "learning_rate": 1e-06, "loss": 0.0398, "step": 10050 }, { "epoch": 24.16, "learning_rate": 1e-06, "loss": 0.0469, "step": 10075 }, { "epoch": 24.22, "learning_rate": 1e-06, "loss": 0.0351, "step": 10100 }, { "epoch": 24.29, "learning_rate": 1e-06, "loss": 0.0484, "step": 10125 }, { "epoch": 24.35, "learning_rate": 1e-06, "loss": 0.0416, "step": 10150 }, { "epoch": 24.41, "learning_rate": 1e-06, "loss": 0.0417, "step": 10175 }, { "epoch": 24.47, "learning_rate": 1e-06, "loss": 0.043, "step": 10200 }, { "epoch": 24.53, "learning_rate": 1e-06, "loss": 0.0483, "step": 10225 }, { "epoch": 24.6, "learning_rate": 1e-06, "loss": 0.0428, "step": 10250 }, { "epoch": 24.66, "learning_rate": 1e-06, "loss": 0.0382, "step": 10275 }, { "epoch": 24.72, "learning_rate": 1e-06, "loss": 0.0412, "step": 10300 }, { "epoch": 24.78, "learning_rate": 1e-06, "loss": 0.0392, "step": 10325 }, { "epoch": 24.85, "learning_rate": 1e-06, "loss": 0.0417, "step": 10350 }, { "epoch": 24.91, "learning_rate": 1e-06, "loss": 0.0481, "step": 10375 }, { "epoch": 24.97, "learning_rate": 1e-06, "loss": 0.0397, "step": 10400 }, { "epoch": 25.03, "learning_rate": 1e-06, "loss": 0.0388, "step": 10425 }, { "epoch": 25.09, "learning_rate": 1e-06, "loss": 0.0366, "step": 10450 }, { "epoch": 25.16, "learning_rate": 1e-06, "loss": 0.0424, "step": 10475 }, { "epoch": 25.22, "learning_rate": 1e-06, "loss": 0.0443, "step": 10500 }, { "epoch": 25.28, "learning_rate": 1e-06, "loss": 0.037, "step": 10525 }, { "epoch": 25.34, "learning_rate": 1e-06, "loss": 0.0422, "step": 10550 }, { "epoch": 25.41, "learning_rate": 1e-06, "loss": 0.0388, "step": 10575 }, { "epoch": 25.47, "learning_rate": 1e-06, "loss": 0.0366, "step": 10600 }, { "epoch": 25.53, "learning_rate": 1e-06, "loss": 0.0447, "step": 10625 }, { "epoch": 25.59, "learning_rate": 1e-06, "loss": 0.036, "step": 10650 }, { "epoch": 25.65, "learning_rate": 1e-06, "loss": 0.0408, "step": 10675 }, { "epoch": 25.72, "learning_rate": 1e-06, "loss": 0.0384, "step": 10700 }, { "epoch": 25.78, "learning_rate": 1e-06, "loss": 0.0464, "step": 10725 }, { "epoch": 25.84, "learning_rate": 1e-06, "loss": 0.0365, "step": 10750 }, { "epoch": 25.9, "learning_rate": 1e-06, "loss": 0.0398, "step": 10775 }, { "epoch": 25.97, "learning_rate": 1e-06, "loss": 0.0391, "step": 10800 }, { "epoch": 26.03, "learning_rate": 1e-06, "loss": 0.0392, "step": 10825 }, { "epoch": 26.09, "learning_rate": 1e-06, "loss": 0.039, "step": 10850 }, { "epoch": 26.15, "learning_rate": 1e-06, "loss": 0.0413, "step": 10875 }, { "epoch": 26.21, "learning_rate": 1e-06, "loss": 0.0407, "step": 10900 }, { "epoch": 26.28, "learning_rate": 1e-06, "loss": 0.0375, "step": 10925 }, { "epoch": 26.34, "learning_rate": 1e-06, "loss": 0.0342, "step": 10950 }, { "epoch": 26.4, "learning_rate": 1e-06, "loss": 0.0382, "step": 10975 }, { "epoch": 26.46, "learning_rate": 1e-06, "loss": 0.0393, "step": 11000 }, { "epoch": 26.46, "eval_loss": 0.479248046875, "eval_runtime": 166.6895, "eval_samples_per_second": 10.175, "eval_steps_per_second": 0.636, "eval_wer": 17.765601783060923, "step": 11000 }, { "epoch": 26.52, "learning_rate": 1e-06, "loss": 0.0419, "step": 11025 }, { "epoch": 26.59, "learning_rate": 1e-06, "loss": 0.0327, "step": 11050 }, { "epoch": 26.65, "learning_rate": 1e-06, "loss": 0.0419, "step": 11075 }, { "epoch": 26.71, "learning_rate": 1e-06, "loss": 0.0384, "step": 11100 }, { "epoch": 26.77, "learning_rate": 1e-06, "loss": 0.0432, "step": 11125 }, { "epoch": 26.84, "learning_rate": 1e-06, "loss": 0.0376, "step": 11150 }, { "epoch": 26.9, "learning_rate": 1e-06, "loss": 0.0378, "step": 11175 }, { "epoch": 26.96, "learning_rate": 1e-06, "loss": 0.0395, "step": 11200 }, { "epoch": 27.02, "learning_rate": 1e-06, "loss": 0.0337, "step": 11225 }, { "epoch": 27.08, "learning_rate": 1e-06, "loss": 0.0367, "step": 11250 }, { "epoch": 27.15, "learning_rate": 1e-06, "loss": 0.0367, "step": 11275 }, { "epoch": 27.21, "learning_rate": 1e-06, "loss": 0.0376, "step": 11300 }, { "epoch": 27.27, "learning_rate": 1e-06, "loss": 0.0412, "step": 11325 }, { "epoch": 27.33, "learning_rate": 1e-06, "loss": 0.0323, "step": 11350 }, { "epoch": 27.4, "learning_rate": 1e-06, "loss": 0.0335, "step": 11375 }, { "epoch": 27.46, "learning_rate": 1e-06, "loss": 0.0365, "step": 11400 }, { "epoch": 27.52, "learning_rate": 1e-06, "loss": 0.0374, "step": 11425 }, { "epoch": 27.58, "learning_rate": 1e-06, "loss": 0.0383, "step": 11450 }, { "epoch": 27.64, "learning_rate": 1e-06, "loss": 0.0326, "step": 11475 }, { "epoch": 27.71, "learning_rate": 1e-06, "loss": 0.0405, "step": 11500 }, { "epoch": 27.77, "learning_rate": 1e-06, "loss": 0.0347, "step": 11525 }, { "epoch": 27.83, "learning_rate": 1e-06, "loss": 0.0383, "step": 11550 }, { "epoch": 27.89, "learning_rate": 1e-06, "loss": 0.0361, "step": 11575 }, { "epoch": 27.96, "learning_rate": 1e-06, "loss": 0.0341, "step": 11600 }, { "epoch": 28.02, "learning_rate": 1e-06, "loss": 0.0376, "step": 11625 }, { "epoch": 28.08, "learning_rate": 1e-06, "loss": 0.0332, "step": 11650 }, { "epoch": 28.14, "learning_rate": 1e-06, "loss": 0.0429, "step": 11675 }, { "epoch": 28.2, "learning_rate": 1e-06, "loss": 0.0373, "step": 11700 }, { "epoch": 28.27, "learning_rate": 1e-06, "loss": 0.0354, "step": 11725 }, { "epoch": 28.33, "learning_rate": 1e-06, "loss": 0.0355, "step": 11750 }, { "epoch": 28.39, "learning_rate": 1e-06, "loss": 0.0368, "step": 11775 }, { "epoch": 28.45, "learning_rate": 1e-06, "loss": 0.0338, "step": 11800 }, { "epoch": 28.51, "learning_rate": 1e-06, "loss": 0.0363, "step": 11825 }, { "epoch": 28.58, "learning_rate": 1e-06, "loss": 0.0391, "step": 11850 }, { "epoch": 28.64, "learning_rate": 1e-06, "loss": 0.0309, "step": 11875 }, { "epoch": 28.7, "learning_rate": 1e-06, "loss": 0.0353, "step": 11900 }, { "epoch": 28.76, "learning_rate": 1e-06, "loss": 0.0398, "step": 11925 }, { "epoch": 28.83, "learning_rate": 1e-06, "loss": 0.0394, "step": 11950 }, { "epoch": 28.89, "learning_rate": 1e-06, "loss": 0.0351, "step": 11975 }, { "epoch": 28.95, "learning_rate": 1e-06, "loss": 0.0308, "step": 12000 }, { "epoch": 28.95, "eval_loss": 0.485107421875, "eval_runtime": 167.5369, "eval_samples_per_second": 10.123, "eval_steps_per_second": 0.633, "eval_wer": 17.86775631500743, "step": 12000 }, { "epoch": 29.01, "learning_rate": 1e-06, "loss": 0.0341, "step": 12025 }, { "epoch": 29.07, "learning_rate": 1e-06, "loss": 0.0358, "step": 12050 }, { "epoch": 29.14, "learning_rate": 1e-06, "loss": 0.0349, "step": 12075 }, { "epoch": 29.2, "learning_rate": 1e-06, "loss": 0.0382, "step": 12100 }, { "epoch": 29.26, "learning_rate": 1e-06, "loss": 0.0312, "step": 12125 }, { "epoch": 29.32, "learning_rate": 1e-06, "loss": 0.0344, "step": 12150 }, { "epoch": 29.39, "learning_rate": 1e-06, "loss": 0.0298, "step": 12175 }, { "epoch": 29.45, "learning_rate": 1e-06, "loss": 0.0313, "step": 12200 }, { "epoch": 29.51, "learning_rate": 1e-06, "loss": 0.0277, "step": 12225 }, { "epoch": 29.57, "learning_rate": 1e-06, "loss": 0.0326, "step": 12250 }, { "epoch": 29.63, "learning_rate": 1e-06, "loss": 0.0328, "step": 12275 }, { "epoch": 29.7, "learning_rate": 1e-06, "loss": 0.0333, "step": 12300 }, { "epoch": 29.76, "learning_rate": 1e-06, "loss": 0.033, "step": 12325 }, { "epoch": 29.82, "learning_rate": 1e-06, "loss": 0.0415, "step": 12350 }, { "epoch": 29.88, "learning_rate": 1e-06, "loss": 0.0346, "step": 12375 }, { "epoch": 29.95, "learning_rate": 1e-06, "loss": 0.0389, "step": 12400 }, { "epoch": 30.01, "learning_rate": 1e-06, "loss": 0.0355, "step": 12425 }, { "epoch": 30.07, "learning_rate": 1e-06, "loss": 0.0406, "step": 12450 }, { "epoch": 30.13, "learning_rate": 1e-06, "loss": 0.031, "step": 12475 }, { "epoch": 30.19, "learning_rate": 1e-06, "loss": 0.0311, "step": 12500 }, { "epoch": 30.26, "learning_rate": 1e-06, "loss": 0.0357, "step": 12525 }, { "epoch": 30.32, "learning_rate": 1e-06, "loss": 0.0351, "step": 12550 }, { "epoch": 30.38, "learning_rate": 1e-06, "loss": 0.0355, "step": 12575 }, { "epoch": 30.44, "learning_rate": 1e-06, "loss": 0.0347, "step": 12600 }, { "epoch": 30.5, "learning_rate": 1e-06, "loss": 0.0293, "step": 12625 }, { "epoch": 30.57, "learning_rate": 1e-06, "loss": 0.0346, "step": 12650 }, { "epoch": 30.63, "learning_rate": 1e-06, "loss": 0.0324, "step": 12675 }, { "epoch": 30.69, "learning_rate": 1e-06, "loss": 0.0374, "step": 12700 }, { "epoch": 30.75, "learning_rate": 1e-06, "loss": 0.0296, "step": 12725 }, { "epoch": 30.82, "learning_rate": 1e-06, "loss": 0.0308, "step": 12750 }, { "epoch": 30.88, "learning_rate": 1e-06, "loss": 0.0375, "step": 12775 }, { "epoch": 30.94, "learning_rate": 1e-06, "loss": 0.0284, "step": 12800 }, { "epoch": 31.0, "learning_rate": 1e-06, "loss": 0.0293, "step": 12825 }, { "epoch": 31.06, "learning_rate": 1e-06, "loss": 0.0354, "step": 12850 }, { "epoch": 31.13, "learning_rate": 1e-06, "loss": 0.0267, "step": 12875 }, { "epoch": 31.19, "learning_rate": 1e-06, "loss": 0.0342, "step": 12900 }, { "epoch": 31.25, "learning_rate": 1e-06, "loss": 0.0319, "step": 12925 }, { "epoch": 31.31, "learning_rate": 1e-06, "loss": 0.0291, "step": 12950 }, { "epoch": 31.38, "learning_rate": 1e-06, "loss": 0.0339, "step": 12975 }, { "epoch": 31.44, "learning_rate": 1e-06, "loss": 0.0301, "step": 13000 }, { "epoch": 31.44, "eval_loss": 0.484619140625, "eval_runtime": 166.8919, "eval_samples_per_second": 10.162, "eval_steps_per_second": 0.635, "eval_wer": 17.449851411589894, "step": 13000 }, { "epoch": 31.5, "learning_rate": 1e-06, "loss": 0.0311, "step": 13025 }, { "epoch": 31.56, "learning_rate": 1e-06, "loss": 0.0298, "step": 13050 }, { "epoch": 31.62, "learning_rate": 1e-06, "loss": 0.034, "step": 13075 }, { "epoch": 31.69, "learning_rate": 1e-06, "loss": 0.0323, "step": 13100 }, { "epoch": 31.75, "learning_rate": 1e-06, "loss": 0.0376, "step": 13125 }, { "epoch": 31.81, "learning_rate": 1e-06, "loss": 0.0315, "step": 13150 }, { "epoch": 31.87, "learning_rate": 1e-06, "loss": 0.0386, "step": 13175 }, { "epoch": 31.94, "learning_rate": 1e-06, "loss": 0.0294, "step": 13200 }, { "epoch": 32.0, "learning_rate": 1e-06, "loss": 0.0317, "step": 13225 }, { "epoch": 32.06, "learning_rate": 1e-06, "loss": 0.0302, "step": 13250 }, { "epoch": 32.12, "learning_rate": 1e-06, "loss": 0.0322, "step": 13275 }, { "epoch": 32.18, "learning_rate": 1e-06, "loss": 0.0265, "step": 13300 }, { "epoch": 32.25, "learning_rate": 1e-06, "loss": 0.0306, "step": 13325 }, { "epoch": 32.31, "learning_rate": 1e-06, "loss": 0.0276, "step": 13350 }, { "epoch": 32.37, "learning_rate": 1e-06, "loss": 0.0303, "step": 13375 }, { "epoch": 32.43, "learning_rate": 1e-06, "loss": 0.0306, "step": 13400 }, { "epoch": 32.5, "learning_rate": 1e-06, "loss": 0.0254, "step": 13425 }, { "epoch": 32.56, "learning_rate": 1e-06, "loss": 0.0274, "step": 13450 }, { "epoch": 32.62, "learning_rate": 1e-06, "loss": 0.0327, "step": 13475 }, { "epoch": 32.68, "learning_rate": 1e-06, "loss": 0.0297, "step": 13500 }, { "epoch": 32.74, "learning_rate": 1e-06, "loss": 0.0367, "step": 13525 }, { "epoch": 32.81, "learning_rate": 1e-06, "loss": 0.0315, "step": 13550 }, { "epoch": 32.87, "learning_rate": 1e-06, "loss": 0.0333, "step": 13575 }, { "epoch": 32.93, "learning_rate": 1e-06, "loss": 0.0388, "step": 13600 }, { "epoch": 32.99, "learning_rate": 1e-06, "loss": 0.0286, "step": 13625 }, { "epoch": 33.05, "learning_rate": 1e-06, "loss": 0.0299, "step": 13650 }, { "epoch": 33.12, "learning_rate": 1e-06, "loss": 0.0291, "step": 13675 }, { "epoch": 33.18, "learning_rate": 1e-06, "loss": 0.0259, "step": 13700 }, { "epoch": 33.24, "learning_rate": 1e-06, "loss": 0.0296, "step": 13725 }, { "epoch": 33.3, "learning_rate": 1e-06, "loss": 0.0267, "step": 13750 }, { "epoch": 33.37, "learning_rate": 1e-06, "loss": 0.0317, "step": 13775 }, { "epoch": 33.43, "learning_rate": 1e-06, "loss": 0.0296, "step": 13800 }, { "epoch": 33.49, "learning_rate": 1e-06, "loss": 0.0297, "step": 13825 }, { "epoch": 33.55, "learning_rate": 1e-06, "loss": 0.0314, "step": 13850 }, { "epoch": 33.61, "learning_rate": 1e-06, "loss": 0.0286, "step": 13875 }, { "epoch": 33.68, "learning_rate": 1e-06, "loss": 0.0306, "step": 13900 }, { "epoch": 33.74, "learning_rate": 1e-06, "loss": 0.0309, "step": 13925 }, { "epoch": 33.8, "learning_rate": 1e-06, "loss": 0.0294, "step": 13950 }, { "epoch": 33.86, "learning_rate": 1e-06, "loss": 0.0346, "step": 13975 }, { "epoch": 33.93, "learning_rate": 1e-06, "loss": 0.031, "step": 14000 }, { "epoch": 33.93, "eval_loss": 0.48486328125, "eval_runtime": 166.8129, "eval_samples_per_second": 10.167, "eval_steps_per_second": 0.635, "eval_wer": 17.830609212481427, "step": 14000 }, { "epoch": 33.99, "learning_rate": 1e-06, "loss": 0.0261, "step": 14025 }, { "epoch": 34.05, "learning_rate": 1e-06, "loss": 0.0344, "step": 14050 }, { "epoch": 34.11, "learning_rate": 1e-06, "loss": 0.0249, "step": 14075 }, { "epoch": 34.17, "learning_rate": 1e-06, "loss": 0.0258, "step": 14100 }, { "epoch": 34.24, "learning_rate": 1e-06, "loss": 0.0289, "step": 14125 }, { "epoch": 34.3, "learning_rate": 1e-06, "loss": 0.0306, "step": 14150 }, { "epoch": 34.36, "learning_rate": 1e-06, "loss": 0.0313, "step": 14175 }, { "epoch": 34.42, "learning_rate": 1e-06, "loss": 0.0286, "step": 14200 }, { "epoch": 34.49, "learning_rate": 1e-06, "loss": 0.0286, "step": 14225 }, { "epoch": 34.55, "learning_rate": 1e-06, "loss": 0.0274, "step": 14250 }, { "epoch": 34.61, "learning_rate": 1e-06, "loss": 0.0225, "step": 14275 }, { "epoch": 34.67, "learning_rate": 1e-06, "loss": 0.028, "step": 14300 }, { "epoch": 34.73, "learning_rate": 1e-06, "loss": 0.0288, "step": 14325 }, { "epoch": 34.8, "learning_rate": 1e-06, "loss": 0.0292, "step": 14350 }, { "epoch": 34.86, "learning_rate": 1e-06, "loss": 0.0283, "step": 14375 }, { "epoch": 34.92, "learning_rate": 1e-06, "loss": 0.0319, "step": 14400 }, { "epoch": 34.98, "learning_rate": 1e-06, "loss": 0.0254, "step": 14425 }, { "epoch": 35.04, "learning_rate": 1e-06, "loss": 0.0286, "step": 14450 }, { "epoch": 35.11, "learning_rate": 1e-06, "loss": 0.0257, "step": 14475 }, { "epoch": 35.17, "learning_rate": 1e-06, "loss": 0.0284, "step": 14500 }, { "epoch": 35.23, "learning_rate": 1e-06, "loss": 0.0306, "step": 14525 }, { "epoch": 35.29, "learning_rate": 1e-06, "loss": 0.0272, "step": 14550 }, { "epoch": 35.36, "learning_rate": 1e-06, "loss": 0.0313, "step": 14575 }, { "epoch": 35.42, "learning_rate": 1e-06, "loss": 0.0277, "step": 14600 }, { "epoch": 35.48, "learning_rate": 1e-06, "loss": 0.0251, "step": 14625 }, { "epoch": 35.54, "learning_rate": 1e-06, "loss": 0.0217, "step": 14650 }, { "epoch": 35.6, "learning_rate": 1e-06, "loss": 0.0296, "step": 14675 }, { "epoch": 35.67, "learning_rate": 1e-06, "loss": 0.0245, "step": 14700 }, { "epoch": 35.73, "learning_rate": 1e-06, "loss": 0.0257, "step": 14725 }, { "epoch": 35.79, "learning_rate": 1e-06, "loss": 0.0289, "step": 14750 }, { "epoch": 35.85, "learning_rate": 1e-06, "loss": 0.0265, "step": 14775 }, { "epoch": 35.92, "learning_rate": 1e-06, "loss": 0.0279, "step": 14800 }, { "epoch": 35.98, "learning_rate": 1e-06, "loss": 0.0292, "step": 14825 }, { "epoch": 36.04, "learning_rate": 1e-06, "loss": 0.0294, "step": 14850 }, { "epoch": 36.1, "learning_rate": 1e-06, "loss": 0.0256, "step": 14875 }, { "epoch": 36.16, "learning_rate": 1e-06, "loss": 0.0281, "step": 14900 }, { "epoch": 36.23, "learning_rate": 1e-06, "loss": 0.0278, "step": 14925 }, { "epoch": 36.29, "learning_rate": 1e-06, "loss": 0.0267, "step": 14950 }, { "epoch": 36.35, "learning_rate": 1e-06, "loss": 0.0208, "step": 14975 }, { "epoch": 36.41, "learning_rate": 1e-06, "loss": 0.0263, "step": 15000 }, { "epoch": 36.41, "eval_loss": 0.488037109375, "eval_runtime": 166.7242, "eval_samples_per_second": 10.172, "eval_steps_per_second": 0.636, "eval_wer": 17.61701337295691, "step": 15000 }, { "epoch": 36.48, "learning_rate": 1e-06, "loss": 0.0311, "step": 15025 }, { "epoch": 36.54, "learning_rate": 1e-06, "loss": 0.0255, "step": 15050 }, { "epoch": 36.6, "learning_rate": 1e-06, "loss": 0.0267, "step": 15075 }, { "epoch": 36.66, "learning_rate": 1e-06, "loss": 0.0277, "step": 15100 }, { "epoch": 36.72, "learning_rate": 1e-06, "loss": 0.0241, "step": 15125 }, { "epoch": 36.79, "learning_rate": 1e-06, "loss": 0.0267, "step": 15150 }, { "epoch": 36.85, "learning_rate": 1e-06, "loss": 0.0238, "step": 15175 }, { "epoch": 36.91, "learning_rate": 1e-06, "loss": 0.0289, "step": 15200 }, { "epoch": 36.97, "learning_rate": 1e-06, "loss": 0.026, "step": 15225 }, { "epoch": 37.03, "learning_rate": 1e-06, "loss": 0.0249, "step": 15250 }, { "epoch": 37.1, "learning_rate": 1e-06, "loss": 0.0259, "step": 15275 }, { "epoch": 37.16, "learning_rate": 1e-06, "loss": 0.0266, "step": 15300 }, { "epoch": 37.22, "learning_rate": 1e-06, "loss": 0.029, "step": 15325 }, { "epoch": 37.28, "learning_rate": 1e-06, "loss": 0.0277, "step": 15350 }, { "epoch": 37.35, "learning_rate": 1e-06, "loss": 0.0253, "step": 15375 }, { "epoch": 37.41, "learning_rate": 1e-06, "loss": 0.0276, "step": 15400 }, { "epoch": 37.47, "learning_rate": 1e-06, "loss": 0.0236, "step": 15425 }, { "epoch": 37.53, "learning_rate": 1e-06, "loss": 0.0245, "step": 15450 }, { "epoch": 37.59, "learning_rate": 1e-06, "loss": 0.0248, "step": 15475 }, { "epoch": 37.66, "learning_rate": 1e-06, "loss": 0.0248, "step": 15500 }, { "epoch": 37.72, "learning_rate": 1e-06, "loss": 0.022, "step": 15525 }, { "epoch": 37.78, "learning_rate": 1e-06, "loss": 0.0261, "step": 15550 }, { "epoch": 37.84, "learning_rate": 1e-06, "loss": 0.0265, "step": 15575 }, { "epoch": 37.91, "learning_rate": 1e-06, "loss": 0.0241, "step": 15600 }, { "epoch": 37.97, "learning_rate": 1e-06, "loss": 0.0261, "step": 15625 }, { "epoch": 38.03, "learning_rate": 1e-06, "loss": 0.0239, "step": 15650 }, { "epoch": 38.09, "learning_rate": 1e-06, "loss": 0.0255, "step": 15675 }, { "epoch": 38.15, "learning_rate": 1e-06, "loss": 0.0251, "step": 15700 }, { "epoch": 38.22, "learning_rate": 1e-06, "loss": 0.0255, "step": 15725 }, { "epoch": 38.28, "learning_rate": 1e-06, "loss": 0.0255, "step": 15750 }, { "epoch": 38.34, "learning_rate": 1e-06, "loss": 0.0252, "step": 15775 }, { "epoch": 38.4, "learning_rate": 1e-06, "loss": 0.026, "step": 15800 }, { "epoch": 38.47, "learning_rate": 1e-06, "loss": 0.0261, "step": 15825 }, { "epoch": 38.53, "learning_rate": 1e-06, "loss": 0.0243, "step": 15850 }, { "epoch": 38.59, "learning_rate": 1e-06, "loss": 0.0253, "step": 15875 }, { "epoch": 38.65, "learning_rate": 1e-06, "loss": 0.0276, "step": 15900 }, { "epoch": 38.71, "learning_rate": 1e-06, "loss": 0.0239, "step": 15925 }, { "epoch": 38.78, "learning_rate": 1e-06, "loss": 0.0259, "step": 15950 }, { "epoch": 38.84, "learning_rate": 1e-06, "loss": 0.0232, "step": 15975 }, { "epoch": 38.9, "learning_rate": 1e-06, "loss": 0.0256, "step": 16000 }, { "epoch": 38.9, "eval_loss": 0.487060546875, "eval_runtime": 166.9973, "eval_samples_per_second": 10.156, "eval_steps_per_second": 0.635, "eval_wer": 17.199108469539375, "step": 16000 }, { "epoch": 38.96, "learning_rate": 1e-06, "loss": 0.0228, "step": 16025 }, { "epoch": 39.02, "learning_rate": 1e-06, "loss": 0.0208, "step": 16050 }, { "epoch": 39.09, "learning_rate": 1e-06, "loss": 0.0237, "step": 16075 }, { "epoch": 39.15, "learning_rate": 1e-06, "loss": 0.0238, "step": 16100 }, { "epoch": 39.21, "learning_rate": 1e-06, "loss": 0.025, "step": 16125 }, { "epoch": 39.27, "learning_rate": 1e-06, "loss": 0.0219, "step": 16150 }, { "epoch": 39.34, "learning_rate": 1e-06, "loss": 0.0214, "step": 16175 }, { "epoch": 39.4, "learning_rate": 1e-06, "loss": 0.0235, "step": 16200 }, { "epoch": 39.46, "learning_rate": 1e-06, "loss": 0.0219, "step": 16225 }, { "epoch": 39.52, "learning_rate": 1e-06, "loss": 0.0249, "step": 16250 }, { "epoch": 39.58, "learning_rate": 1e-06, "loss": 0.0265, "step": 16275 }, { "epoch": 39.65, "learning_rate": 1e-06, "loss": 0.0252, "step": 16300 }, { "epoch": 39.71, "learning_rate": 1e-06, "loss": 0.0224, "step": 16325 }, { "epoch": 39.77, "learning_rate": 1e-06, "loss": 0.0289, "step": 16350 }, { "epoch": 39.83, "learning_rate": 1e-06, "loss": 0.0218, "step": 16375 }, { "epoch": 39.9, "learning_rate": 1e-06, "loss": 0.0256, "step": 16400 }, { "epoch": 39.96, "learning_rate": 1e-06, "loss": 0.0247, "step": 16425 }, { "epoch": 40.02, "learning_rate": 1e-06, "loss": 0.0233, "step": 16450 }, { "epoch": 40.08, "learning_rate": 1e-06, "loss": 0.0241, "step": 16475 }, { "epoch": 40.14, "learning_rate": 1e-06, "loss": 0.0242, "step": 16500 }, { "epoch": 40.21, "learning_rate": 1e-06, "loss": 0.0225, "step": 16525 }, { "epoch": 40.27, "learning_rate": 1e-06, "loss": 0.0234, "step": 16550 }, { "epoch": 40.33, "learning_rate": 1e-06, "loss": 0.0206, "step": 16575 }, { "epoch": 40.39, "learning_rate": 1e-06, "loss": 0.0234, "step": 16600 }, { "epoch": 40.46, "learning_rate": 1e-06, "loss": 0.0249, "step": 16625 }, { "epoch": 40.52, "learning_rate": 1e-06, "loss": 0.0243, "step": 16650 }, { "epoch": 40.58, "learning_rate": 1e-06, "loss": 0.0218, "step": 16675 }, { "epoch": 40.64, "learning_rate": 1e-06, "loss": 0.0203, "step": 16700 }, { "epoch": 40.7, "learning_rate": 1e-06, "loss": 0.024, "step": 16725 }, { "epoch": 40.77, "learning_rate": 1e-06, "loss": 0.0232, "step": 16750 }, { "epoch": 40.83, "learning_rate": 1e-06, "loss": 0.0197, "step": 16775 }, { "epoch": 40.89, "learning_rate": 1e-06, "loss": 0.0257, "step": 16800 }, { "epoch": 40.95, "learning_rate": 1e-06, "loss": 0.0259, "step": 16825 }, { "epoch": 41.01, "learning_rate": 1e-06, "loss": 0.0215, "step": 16850 }, { "epoch": 41.08, "learning_rate": 1e-06, "loss": 0.0228, "step": 16875 }, { "epoch": 41.14, "learning_rate": 1e-06, "loss": 0.0232, "step": 16900 }, { "epoch": 41.2, "learning_rate": 1e-06, "loss": 0.025, "step": 16925 }, { "epoch": 41.26, "learning_rate": 1e-06, "loss": 0.0231, "step": 16950 }, { "epoch": 41.33, "learning_rate": 1e-06, "loss": 0.0211, "step": 16975 }, { "epoch": 41.39, "learning_rate": 1e-06, "loss": 0.0236, "step": 17000 }, { "epoch": 41.39, "eval_loss": 0.48828125, "eval_runtime": 167.0132, "eval_samples_per_second": 10.155, "eval_steps_per_second": 0.635, "eval_wer": 17.26411589895988, "step": 17000 }, { "epoch": 41.45, "learning_rate": 1e-06, "loss": 0.0257, "step": 17025 }, { "epoch": 41.51, "learning_rate": 1e-06, "loss": 0.0236, "step": 17050 }, { "epoch": 41.57, "learning_rate": 1e-06, "loss": 0.0221, "step": 17075 }, { "epoch": 41.64, "learning_rate": 1e-06, "loss": 0.0182, "step": 17100 }, { "epoch": 41.7, "learning_rate": 1e-06, "loss": 0.0247, "step": 17125 }, { "epoch": 41.76, "learning_rate": 1e-06, "loss": 0.0204, "step": 17150 }, { "epoch": 41.82, "learning_rate": 1e-06, "loss": 0.0221, "step": 17175 }, { "epoch": 41.89, "learning_rate": 1e-06, "loss": 0.0227, "step": 17200 }, { "epoch": 41.95, "learning_rate": 1e-06, "loss": 0.0213, "step": 17225 }, { "epoch": 42.01, "learning_rate": 1e-06, "loss": 0.021, "step": 17250 }, { "epoch": 42.07, "learning_rate": 1e-06, "loss": 0.0219, "step": 17275 }, { "epoch": 42.13, "learning_rate": 1e-06, "loss": 0.0179, "step": 17300 }, { "epoch": 42.2, "learning_rate": 1e-06, "loss": 0.0207, "step": 17325 }, { "epoch": 42.26, "learning_rate": 1e-06, "loss": 0.0198, "step": 17350 }, { "epoch": 42.32, "learning_rate": 1e-06, "loss": 0.0192, "step": 17375 }, { "epoch": 42.38, "learning_rate": 1e-06, "loss": 0.0231, "step": 17400 }, { "epoch": 42.45, "learning_rate": 1e-06, "loss": 0.0216, "step": 17425 }, { "epoch": 42.51, "learning_rate": 1e-06, "loss": 0.0203, "step": 17450 }, { "epoch": 42.57, "learning_rate": 1e-06, "loss": 0.022, "step": 17475 }, { "epoch": 42.63, "learning_rate": 1e-06, "loss": 0.0198, "step": 17500 }, { "epoch": 42.69, "learning_rate": 1e-06, "loss": 0.0184, "step": 17525 }, { "epoch": 42.76, "learning_rate": 1e-06, "loss": 0.0223, "step": 17550 }, { "epoch": 42.82, "learning_rate": 1e-06, "loss": 0.0221, "step": 17575 }, { "epoch": 42.88, "learning_rate": 1e-06, "loss": 0.0254, "step": 17600 }, { "epoch": 42.94, "learning_rate": 1e-06, "loss": 0.024, "step": 17625 }, { "epoch": 43.0, "learning_rate": 1e-06, "loss": 0.02, "step": 17650 }, { "epoch": 43.07, "learning_rate": 1e-06, "loss": 0.022, "step": 17675 }, { "epoch": 43.13, "learning_rate": 1e-06, "loss": 0.0198, "step": 17700 }, { "epoch": 43.19, "learning_rate": 1e-06, "loss": 0.019, "step": 17725 }, { "epoch": 43.25, "learning_rate": 1e-06, "loss": 0.0214, "step": 17750 }, { "epoch": 43.32, "learning_rate": 1e-06, "loss": 0.0219, "step": 17775 }, { "epoch": 43.38, "learning_rate": 1e-06, "loss": 0.0176, "step": 17800 }, { "epoch": 43.44, "learning_rate": 1e-06, "loss": 0.0224, "step": 17825 }, { "epoch": 43.5, "learning_rate": 1e-06, "loss": 0.023, "step": 17850 }, { "epoch": 43.56, "learning_rate": 1e-06, "loss": 0.0185, "step": 17875 }, { "epoch": 43.63, "learning_rate": 1e-06, "loss": 0.0225, "step": 17900 }, { "epoch": 43.69, "learning_rate": 1e-06, "loss": 0.0201, "step": 17925 }, { "epoch": 43.75, "learning_rate": 1e-06, "loss": 0.022, "step": 17950 }, { "epoch": 43.81, "learning_rate": 1e-06, "loss": 0.0219, "step": 17975 }, { "epoch": 43.88, "learning_rate": 1e-06, "loss": 0.0195, "step": 18000 }, { "epoch": 43.88, "eval_loss": 0.488037109375, "eval_runtime": 167.5739, "eval_samples_per_second": 10.121, "eval_steps_per_second": 0.633, "eval_wer": 17.570579494799404, "step": 18000 }, { "epoch": 43.94, "learning_rate": 1e-06, "loss": 0.0189, "step": 18025 }, { "epoch": 44.0, "learning_rate": 1e-06, "loss": 0.0231, "step": 18050 }, { "epoch": 44.06, "learning_rate": 1e-06, "loss": 0.0194, "step": 18075 }, { "epoch": 44.12, "learning_rate": 1e-06, "loss": 0.022, "step": 18100 }, { "epoch": 44.19, "learning_rate": 1e-06, "loss": 0.0199, "step": 18125 }, { "epoch": 44.25, "learning_rate": 1e-06, "loss": 0.0227, "step": 18150 }, { "epoch": 44.31, "learning_rate": 1e-06, "loss": 0.0228, "step": 18175 }, { "epoch": 44.37, "learning_rate": 1e-06, "loss": 0.0181, "step": 18200 }, { "epoch": 44.44, "learning_rate": 1e-06, "loss": 0.0171, "step": 18225 }, { "epoch": 44.5, "learning_rate": 1e-06, "loss": 0.0207, "step": 18250 }, { "epoch": 44.56, "learning_rate": 1e-06, "loss": 0.022, "step": 18275 }, { "epoch": 44.62, "learning_rate": 1e-06, "loss": 0.0177, "step": 18300 }, { "epoch": 44.68, "learning_rate": 1e-06, "loss": 0.0191, "step": 18325 }, { "epoch": 44.75, "learning_rate": 1e-06, "loss": 0.022, "step": 18350 }, { "epoch": 44.81, "learning_rate": 1e-06, "loss": 0.0222, "step": 18375 }, { "epoch": 44.87, "learning_rate": 1e-06, "loss": 0.0194, "step": 18400 }, { "epoch": 44.93, "learning_rate": 1e-06, "loss": 0.0207, "step": 18425 }, { "epoch": 45.0, "learning_rate": 1e-06, "loss": 0.019, "step": 18450 }, { "epoch": 45.06, "learning_rate": 1e-06, "loss": 0.0181, "step": 18475 }, { "epoch": 45.12, "learning_rate": 1e-06, "loss": 0.0222, "step": 18500 }, { "epoch": 45.18, "learning_rate": 1e-06, "loss": 0.0181, "step": 18525 }, { "epoch": 45.24, "learning_rate": 1e-06, "loss": 0.018, "step": 18550 }, { "epoch": 45.31, "learning_rate": 1e-06, "loss": 0.02, "step": 18575 }, { "epoch": 45.37, "learning_rate": 1e-06, "loss": 0.0177, "step": 18600 }, { "epoch": 45.43, "learning_rate": 1e-06, "loss": 0.0185, "step": 18625 }, { "epoch": 45.49, "learning_rate": 1e-06, "loss": 0.0211, "step": 18650 }, { "epoch": 45.55, "learning_rate": 1e-06, "loss": 0.0188, "step": 18675 }, { "epoch": 45.62, "learning_rate": 1e-06, "loss": 0.0219, "step": 18700 }, { "epoch": 45.68, "learning_rate": 1e-06, "loss": 0.0207, "step": 18725 }, { "epoch": 45.74, "learning_rate": 1e-06, "loss": 0.0167, "step": 18750 }, { "epoch": 45.8, "learning_rate": 1e-06, "loss": 0.019, "step": 18775 }, { "epoch": 45.87, "learning_rate": 1e-06, "loss": 0.021, "step": 18800 }, { "epoch": 45.93, "learning_rate": 1e-06, "loss": 0.0203, "step": 18825 }, { "epoch": 45.99, "learning_rate": 1e-06, "loss": 0.0195, "step": 18850 }, { "epoch": 46.05, "learning_rate": 1e-06, "loss": 0.0203, "step": 18875 }, { "epoch": 46.11, "learning_rate": 1e-06, "loss": 0.0189, "step": 18900 }, { "epoch": 46.18, "learning_rate": 1e-06, "loss": 0.0182, "step": 18925 }, { "epoch": 46.24, "learning_rate": 1e-06, "loss": 0.0194, "step": 18950 }, { "epoch": 46.3, "learning_rate": 1e-06, "loss": 0.0176, "step": 18975 }, { "epoch": 46.36, "learning_rate": 1e-06, "loss": 0.0193, "step": 19000 }, { "epoch": 46.36, "eval_loss": 0.499267578125, "eval_runtime": 167.0506, "eval_samples_per_second": 10.153, "eval_steps_per_second": 0.635, "eval_wer": 17.72845468053492, "step": 19000 }, { "epoch": 46.43, "learning_rate": 1e-06, "loss": 0.0184, "step": 19025 }, { "epoch": 46.49, "learning_rate": 1e-06, "loss": 0.0183, "step": 19050 }, { "epoch": 46.55, "learning_rate": 1e-06, "loss": 0.0178, "step": 19075 }, { "epoch": 46.61, "learning_rate": 1e-06, "loss": 0.0173, "step": 19100 }, { "epoch": 46.67, "learning_rate": 1e-06, "loss": 0.0175, "step": 19125 }, { "epoch": 46.74, "learning_rate": 1e-06, "loss": 0.019, "step": 19150 }, { "epoch": 46.8, "learning_rate": 1e-06, "loss": 0.0195, "step": 19175 }, { "epoch": 46.86, "learning_rate": 1e-06, "loss": 0.0163, "step": 19200 }, { "epoch": 46.92, "learning_rate": 1e-06, "loss": 0.0212, "step": 19225 }, { "epoch": 46.99, "learning_rate": 1e-06, "loss": 0.0185, "step": 19250 }, { "epoch": 47.05, "learning_rate": 1e-06, "loss": 0.0168, "step": 19275 }, { "epoch": 47.11, "learning_rate": 1e-06, "loss": 0.0168, "step": 19300 }, { "epoch": 47.17, "learning_rate": 1e-06, "loss": 0.0198, "step": 19325 }, { "epoch": 47.23, "learning_rate": 1e-06, "loss": 0.0156, "step": 19350 }, { "epoch": 47.3, "learning_rate": 1e-06, "loss": 0.0157, "step": 19375 }, { "epoch": 47.36, "learning_rate": 1e-06, "loss": 0.0176, "step": 19400 }, { "epoch": 47.42, "learning_rate": 1e-06, "loss": 0.019, "step": 19425 }, { "epoch": 47.48, "learning_rate": 1e-06, "loss": 0.0198, "step": 19450 }, { "epoch": 47.54, "learning_rate": 1e-06, "loss": 0.0177, "step": 19475 }, { "epoch": 47.61, "learning_rate": 1e-06, "loss": 0.0178, "step": 19500 }, { "epoch": 47.67, "learning_rate": 1e-06, "loss": 0.0171, "step": 19525 }, { "epoch": 47.73, "learning_rate": 1e-06, "loss": 0.0196, "step": 19550 }, { "epoch": 47.79, "learning_rate": 1e-06, "loss": 0.0191, "step": 19575 }, { "epoch": 47.86, "learning_rate": 1e-06, "loss": 0.0206, "step": 19600 }, { "epoch": 47.92, "learning_rate": 1e-06, "loss": 0.0199, "step": 19625 }, { "epoch": 47.98, "learning_rate": 1e-06, "loss": 0.0205, "step": 19650 }, { "epoch": 48.04, "learning_rate": 1e-06, "loss": 0.0196, "step": 19675 }, { "epoch": 48.1, "learning_rate": 1e-06, "loss": 0.0197, "step": 19700 }, { "epoch": 48.17, "learning_rate": 1e-06, "loss": 0.0136, "step": 19725 }, { "epoch": 48.23, "learning_rate": 1e-06, "loss": 0.0151, "step": 19750 }, { "epoch": 48.29, "learning_rate": 1e-06, "loss": 0.0163, "step": 19775 }, { "epoch": 48.35, "learning_rate": 1e-06, "loss": 0.0188, "step": 19800 }, { "epoch": 48.42, "learning_rate": 1e-06, "loss": 0.0172, "step": 19825 }, { "epoch": 48.48, "learning_rate": 1e-06, "loss": 0.0188, "step": 19850 }, { "epoch": 48.54, "learning_rate": 1e-06, "loss": 0.0178, "step": 19875 }, { "epoch": 48.6, "learning_rate": 1e-06, "loss": 0.0204, "step": 19900 }, { "epoch": 48.66, "learning_rate": 1e-06, "loss": 0.0203, "step": 19925 }, { "epoch": 48.73, "learning_rate": 1e-06, "loss": 0.017, "step": 19950 }, { "epoch": 48.79, "learning_rate": 1e-06, "loss": 0.0173, "step": 19975 }, { "epoch": 48.85, "learning_rate": 1e-06, "loss": 0.0161, "step": 20000 }, { "epoch": 48.85, "eval_loss": 0.496826171875, "eval_runtime": 166.936, "eval_samples_per_second": 10.16, "eval_steps_per_second": 0.635, "eval_wer": 17.830609212481427, "step": 20000 }, { "epoch": 48.85, "step": 20000, "total_flos": 1.84611545204933e+20, "train_loss": 0.01866207084655762, "train_runtime": 23950.5227, "train_samples_per_second": 26.722, "train_steps_per_second": 0.835 } ], "max_steps": 20000, "num_train_epochs": 50, "total_flos": 1.84611545204933e+20, "trial_name": null, "trial_params": null }