{ "best_metric": 15.749067778021498, "best_model_checkpoint": "./checkpoint-4000", "epoch": 8.0048, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.6e-09, "loss": 0.9435, "step": 25 }, { "epoch": 0.01, "learning_rate": 9.6e-09, "loss": 0.9281, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.4599999999999998e-08, "loss": 0.9519, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.94e-08, "loss": 0.9436, "step": 100 }, { "epoch": 0.03, "learning_rate": 2.4399999999999997e-08, "loss": 0.9253, "step": 125 }, { "epoch": 0.03, "learning_rate": 2.94e-08, "loss": 0.9338, "step": 150 }, { "epoch": 0.04, "learning_rate": 3.44e-08, "loss": 0.9108, "step": 175 }, { "epoch": 0.04, "learning_rate": 3.94e-08, "loss": 0.8899, "step": 200 }, { "epoch": 0.04, "learning_rate": 4.44e-08, "loss": 0.8772, "step": 225 }, { "epoch": 0.05, "learning_rate": 4.92e-08, "loss": 0.8194, "step": 250 }, { "epoch": 0.06, "learning_rate": 5.42e-08, "loss": 0.7858, "step": 275 }, { "epoch": 0.06, "learning_rate": 5.9199999999999994e-08, "loss": 0.772, "step": 300 }, { "epoch": 0.07, "learning_rate": 6.419999999999999e-08, "loss": 0.7039, "step": 325 }, { "epoch": 0.07, "learning_rate": 6.92e-08, "loss": 0.6403, "step": 350 }, { "epoch": 0.07, "learning_rate": 7.419999999999999e-08, "loss": 0.5761, "step": 375 }, { "epoch": 0.08, "learning_rate": 7.92e-08, "loss": 0.4753, "step": 400 }, { "epoch": 0.09, "learning_rate": 8.42e-08, "loss": 0.4406, "step": 425 }, { "epoch": 0.09, "learning_rate": 8.919999999999999e-08, "loss": 0.4276, "step": 450 }, { "epoch": 0.1, "learning_rate": 9.42e-08, "loss": 0.3747, "step": 475 }, { "epoch": 0.1, "learning_rate": 9.919999999999999e-08, "loss": 0.3983, "step": 500 }, { "epoch": 0.1, "eval_cer": 10.639092728485657, "eval_loss": 0.5338271260261536, "eval_runtime": 1141.2363, "eval_samples_per_second": 1.084, "eval_steps_per_second": 0.272, "eval_wer": 19.587628865979383, "step": 500 }, { "epoch": 0.1, "learning_rate": 9.953333333333332e-08, "loss": 0.3437, "step": 525 }, { "epoch": 0.11, "learning_rate": 9.897777777777777e-08, "loss": 0.3388, "step": 550 }, { "epoch": 0.12, "learning_rate": 9.842222222222222e-08, "loss": 0.32, "step": 575 }, { "epoch": 0.12, "learning_rate": 9.786666666666667e-08, "loss": 0.2985, "step": 600 }, { "epoch": 1.0, "learning_rate": 9.73111111111111e-08, "loss": 0.3006, "step": 625 }, { "epoch": 1.01, "learning_rate": 9.675555555555554e-08, "loss": 0.2939, "step": 650 }, { "epoch": 1.01, "learning_rate": 9.619999999999999e-08, "loss": 0.2768, "step": 675 }, { "epoch": 1.02, "learning_rate": 9.564444444444444e-08, "loss": 0.2703, "step": 700 }, { "epoch": 1.02, "learning_rate": 9.511111111111111e-08, "loss": 0.2733, "step": 725 }, { "epoch": 1.03, "learning_rate": 9.455555555555556e-08, "loss": 0.2623, "step": 750 }, { "epoch": 1.03, "learning_rate": 9.4e-08, "loss": 0.2679, "step": 775 }, { "epoch": 1.04, "learning_rate": 9.344444444444444e-08, "loss": 0.2431, "step": 800 }, { "epoch": 1.04, "learning_rate": 9.288888888888888e-08, "loss": 0.2415, "step": 825 }, { "epoch": 1.05, "learning_rate": 9.233333333333333e-08, "loss": 0.2584, "step": 850 }, { "epoch": 1.05, "learning_rate": 9.177777777777777e-08, "loss": 0.229, "step": 875 }, { "epoch": 1.06, "learning_rate": 9.122222222222222e-08, "loss": 0.2333, "step": 900 }, { "epoch": 1.06, "learning_rate": 9.066666666666666e-08, "loss": 0.2333, "step": 925 }, { "epoch": 1.07, "learning_rate": 9.01111111111111e-08, "loss": 0.2233, "step": 950 }, { "epoch": 1.07, "learning_rate": 8.955555555555555e-08, "loss": 0.2306, "step": 975 }, { "epoch": 1.08, "learning_rate": 8.9e-08, "loss": 0.2277, "step": 1000 }, { "epoch": 1.08, "eval_cer": 8.266844563042028, "eval_loss": 0.4134095311164856, "eval_runtime": 1163.4198, "eval_samples_per_second": 1.063, "eval_steps_per_second": 0.266, "eval_wer": 16.582583899978065, "step": 1000 }, { "epoch": 1.08, "learning_rate": 8.844444444444444e-08, "loss": 0.213, "step": 1025 }, { "epoch": 1.09, "learning_rate": 8.788888888888889e-08, "loss": 0.2195, "step": 1050 }, { "epoch": 1.09, "learning_rate": 8.733333333333332e-08, "loss": 0.2242, "step": 1075 }, { "epoch": 1.1, "learning_rate": 8.677777777777777e-08, "loss": 0.2075, "step": 1100 }, { "epoch": 1.1, "learning_rate": 8.622222222222222e-08, "loss": 0.2077, "step": 1125 }, { "epoch": 1.11, "learning_rate": 8.566666666666667e-08, "loss": 0.2004, "step": 1150 }, { "epoch": 1.11, "learning_rate": 8.511111111111111e-08, "loss": 0.209, "step": 1175 }, { "epoch": 1.12, "learning_rate": 8.457777777777778e-08, "loss": 0.2083, "step": 1200 }, { "epoch": 1.12, "learning_rate": 8.402222222222221e-08, "loss": 0.1933, "step": 1225 }, { "epoch": 2.0, "learning_rate": 8.346666666666666e-08, "loss": 0.2018, "step": 1250 }, { "epoch": 2.01, "learning_rate": 8.291111111111111e-08, "loss": 0.1949, "step": 1275 }, { "epoch": 2.01, "learning_rate": 8.235555555555556e-08, "loss": 0.2019, "step": 1300 }, { "epoch": 2.02, "learning_rate": 8.179999999999999e-08, "loss": 0.1877, "step": 1325 }, { "epoch": 2.02, "learning_rate": 8.124444444444444e-08, "loss": 0.192, "step": 1350 }, { "epoch": 2.03, "learning_rate": 8.068888888888889e-08, "loss": 0.204, "step": 1375 }, { "epoch": 2.03, "learning_rate": 8.013333333333334e-08, "loss": 0.1995, "step": 1400 }, { "epoch": 2.04, "learning_rate": 7.957777777777777e-08, "loss": 0.1851, "step": 1425 }, { "epoch": 2.04, "learning_rate": 7.902222222222222e-08, "loss": 0.187, "step": 1450 }, { "epoch": 2.05, "learning_rate": 7.846666666666665e-08, "loss": 0.1929, "step": 1475 }, { "epoch": 2.05, "learning_rate": 7.79111111111111e-08, "loss": 0.172, "step": 1500 }, { "epoch": 2.05, "eval_cer": 7.978652434956637, "eval_loss": 0.3968185782432556, "eval_runtime": 1160.0567, "eval_samples_per_second": 1.066, "eval_steps_per_second": 0.267, "eval_wer": 16.30840096512393, "step": 1500 }, { "epoch": 2.06, "learning_rate": 7.735555555555555e-08, "loss": 0.1847, "step": 1525 }, { "epoch": 2.06, "learning_rate": 7.68e-08, "loss": 0.1803, "step": 1550 }, { "epoch": 2.07, "learning_rate": 7.624444444444445e-08, "loss": 0.1808, "step": 1575 }, { "epoch": 2.07, "learning_rate": 7.56888888888889e-08, "loss": 0.1782, "step": 1600 }, { "epoch": 2.08, "learning_rate": 7.513333333333333e-08, "loss": 0.1894, "step": 1625 }, { "epoch": 2.08, "learning_rate": 7.457777777777777e-08, "loss": 0.1754, "step": 1650 }, { "epoch": 2.09, "learning_rate": 7.402222222222221e-08, "loss": 0.1811, "step": 1675 }, { "epoch": 2.09, "learning_rate": 7.346666666666666e-08, "loss": 0.1733, "step": 1700 }, { "epoch": 2.1, "learning_rate": 7.291111111111111e-08, "loss": 0.1717, "step": 1725 }, { "epoch": 2.1, "learning_rate": 7.235555555555555e-08, "loss": 0.1814, "step": 1750 }, { "epoch": 2.11, "learning_rate": 7.18e-08, "loss": 0.1753, "step": 1775 }, { "epoch": 2.11, "learning_rate": 7.124444444444444e-08, "loss": 0.1671, "step": 1800 }, { "epoch": 2.12, "learning_rate": 7.068888888888889e-08, "loss": 0.1668, "step": 1825 }, { "epoch": 2.12, "learning_rate": 7.013333333333334e-08, "loss": 0.163, "step": 1850 }, { "epoch": 3.0, "learning_rate": 6.957777777777777e-08, "loss": 0.1589, "step": 1875 }, { "epoch": 3.01, "learning_rate": 6.902222222222221e-08, "loss": 0.1689, "step": 1900 }, { "epoch": 3.01, "learning_rate": 6.846666666666666e-08, "loss": 0.1641, "step": 1925 }, { "epoch": 3.02, "learning_rate": 6.79111111111111e-08, "loss": 0.1598, "step": 1950 }, { "epoch": 3.02, "learning_rate": 6.735555555555555e-08, "loss": 0.1692, "step": 1975 }, { "epoch": 3.03, "learning_rate": 6.68e-08, "loss": 0.1823, "step": 2000 }, { "epoch": 3.03, "eval_cer": 7.815877251501001, "eval_loss": 0.39564967155456543, "eval_runtime": 1167.325, "eval_samples_per_second": 1.06, "eval_steps_per_second": 0.266, "eval_wer": 16.176793156393945, "step": 2000 }, { "epoch": 3.03, "learning_rate": 6.624444444444444e-08, "loss": 0.1666, "step": 2025 }, { "epoch": 3.04, "learning_rate": 6.568888888888889e-08, "loss": 0.1582, "step": 2050 }, { "epoch": 3.04, "learning_rate": 6.513333333333333e-08, "loss": 0.1562, "step": 2075 }, { "epoch": 3.05, "learning_rate": 6.457777777777777e-08, "loss": 0.1703, "step": 2100 }, { "epoch": 3.05, "learning_rate": 6.402222222222222e-08, "loss": 0.1494, "step": 2125 }, { "epoch": 3.06, "learning_rate": 6.346666666666667e-08, "loss": 0.1726, "step": 2150 }, { "epoch": 3.06, "learning_rate": 6.29111111111111e-08, "loss": 0.1482, "step": 2175 }, { "epoch": 3.07, "learning_rate": 6.235555555555555e-08, "loss": 0.1645, "step": 2200 }, { "epoch": 3.07, "learning_rate": 6.18e-08, "loss": 0.1549, "step": 2225 }, { "epoch": 3.08, "learning_rate": 6.124444444444445e-08, "loss": 0.1496, "step": 2250 }, { "epoch": 3.08, "learning_rate": 6.06888888888889e-08, "loss": 0.1558, "step": 2275 }, { "epoch": 3.09, "learning_rate": 6.013333333333334e-08, "loss": 0.1611, "step": 2300 }, { "epoch": 3.09, "learning_rate": 5.957777777777777e-08, "loss": 0.1535, "step": 2325 }, { "epoch": 3.1, "learning_rate": 5.902222222222222e-08, "loss": 0.1482, "step": 2350 }, { "epoch": 3.1, "learning_rate": 5.846666666666667e-08, "loss": 0.156, "step": 2375 }, { "epoch": 3.11, "learning_rate": 5.791111111111111e-08, "loss": 0.1583, "step": 2400 }, { "epoch": 3.11, "learning_rate": 5.735555555555556e-08, "loss": 0.1505, "step": 2425 }, { "epoch": 3.12, "learning_rate": 5.679999999999999e-08, "loss": 0.1421, "step": 2450 }, { "epoch": 3.12, "learning_rate": 5.624444444444444e-08, "loss": 0.1498, "step": 2475 }, { "epoch": 4.0, "learning_rate": 5.568888888888888e-08, "loss": 0.1445, "step": 2500 }, { "epoch": 4.0, "eval_cer": 7.743829219479653, "eval_loss": 0.39546218514442444, "eval_runtime": 1163.0632, "eval_samples_per_second": 1.064, "eval_steps_per_second": 0.267, "eval_wer": 16.034218030269795, "step": 2500 }, { "epoch": 4.01, "learning_rate": 5.513333333333333e-08, "loss": 0.1497, "step": 2525 }, { "epoch": 4.01, "learning_rate": 5.457777777777778e-08, "loss": 0.1455, "step": 2550 }, { "epoch": 4.02, "learning_rate": 5.402222222222223e-08, "loss": 0.1453, "step": 2575 }, { "epoch": 4.02, "learning_rate": 5.346666666666666e-08, "loss": 0.1441, "step": 2600 }, { "epoch": 4.03, "learning_rate": 5.2911111111111104e-08, "loss": 0.1611, "step": 2625 }, { "epoch": 4.03, "learning_rate": 5.235555555555555e-08, "loss": 0.1483, "step": 2650 }, { "epoch": 4.04, "learning_rate": 5.18e-08, "loss": 0.1444, "step": 2675 }, { "epoch": 4.04, "learning_rate": 5.124444444444445e-08, "loss": 0.1414, "step": 2700 }, { "epoch": 4.05, "learning_rate": 5.068888888888889e-08, "loss": 0.148, "step": 2725 }, { "epoch": 4.05, "learning_rate": 5.0133333333333326e-08, "loss": 0.1401, "step": 2750 }, { "epoch": 4.06, "learning_rate": 4.9577777777777774e-08, "loss": 0.1486, "step": 2775 }, { "epoch": 4.06, "learning_rate": 4.902222222222222e-08, "loss": 0.1329, "step": 2800 }, { "epoch": 4.07, "learning_rate": 4.8466666666666664e-08, "loss": 0.1514, "step": 2825 }, { "epoch": 4.07, "learning_rate": 4.7911111111111106e-08, "loss": 0.1392, "step": 2850 }, { "epoch": 4.08, "learning_rate": 4.7355555555555554e-08, "loss": 0.1362, "step": 2875 }, { "epoch": 4.08, "learning_rate": 4.68e-08, "loss": 0.1417, "step": 2900 }, { "epoch": 4.09, "learning_rate": 4.6244444444444444e-08, "loss": 0.1514, "step": 2925 }, { "epoch": 4.09, "learning_rate": 4.5688888888888886e-08, "loss": 0.1461, "step": 2950 }, { "epoch": 4.1, "learning_rate": 4.513333333333333e-08, "loss": 0.1355, "step": 2975 }, { "epoch": 4.1, "learning_rate": 4.4577777777777775e-08, "loss": 0.147, "step": 3000 }, { "epoch": 4.1, "eval_cer": 7.71447631754503, "eval_loss": 0.3965264558792114, "eval_runtime": 1162.9158, "eval_samples_per_second": 1.064, "eval_steps_per_second": 0.267, "eval_wer": 15.88067558675148, "step": 3000 }, { "epoch": 4.11, "learning_rate": 4.4022222222222224e-08, "loss": 0.135, "step": 3025 }, { "epoch": 4.11, "learning_rate": 4.3466666666666665e-08, "loss": 0.133, "step": 3050 }, { "epoch": 4.12, "learning_rate": 4.291111111111111e-08, "loss": 0.141, "step": 3075 }, { "epoch": 4.12, "learning_rate": 4.2355555555555555e-08, "loss": 0.1312, "step": 3100 }, { "epoch": 5.0, "learning_rate": 4.18e-08, "loss": 0.1332, "step": 3125 }, { "epoch": 5.01, "learning_rate": 4.1244444444444445e-08, "loss": 0.1314, "step": 3150 }, { "epoch": 5.01, "learning_rate": 4.068888888888889e-08, "loss": 0.139, "step": 3175 }, { "epoch": 5.02, "learning_rate": 4.013333333333333e-08, "loss": 0.1368, "step": 3200 }, { "epoch": 5.02, "learning_rate": 3.957777777777778e-08, "loss": 0.1367, "step": 3225 }, { "epoch": 5.03, "learning_rate": 3.9022222222222225e-08, "loss": 0.1448, "step": 3250 }, { "epoch": 5.03, "learning_rate": 3.846666666666667e-08, "loss": 0.1383, "step": 3275 }, { "epoch": 5.04, "learning_rate": 3.791111111111111e-08, "loss": 0.1374, "step": 3300 }, { "epoch": 5.04, "learning_rate": 3.735555555555555e-08, "loss": 0.1333, "step": 3325 }, { "epoch": 5.05, "learning_rate": 3.68e-08, "loss": 0.1325, "step": 3350 }, { "epoch": 5.05, "learning_rate": 3.624444444444445e-08, "loss": 0.1315, "step": 3375 }, { "epoch": 5.06, "learning_rate": 3.571111111111111e-08, "loss": 0.1311, "step": 3400 }, { "epoch": 5.06, "learning_rate": 3.515555555555556e-08, "loss": 0.1401, "step": 3425 }, { "epoch": 5.07, "learning_rate": 3.46e-08, "loss": 0.1297, "step": 3450 }, { "epoch": 5.07, "learning_rate": 3.404444444444444e-08, "loss": 0.1339, "step": 3475 }, { "epoch": 5.08, "learning_rate": 3.348888888888889e-08, "loss": 0.1292, "step": 3500 }, { "epoch": 5.08, "eval_cer": 7.706470980653769, "eval_loss": 0.3999946713447571, "eval_runtime": 1162.3373, "eval_samples_per_second": 1.064, "eval_steps_per_second": 0.267, "eval_wer": 15.85874095196315, "step": 3500 }, { "epoch": 5.08, "learning_rate": 3.293333333333333e-08, "loss": 0.132, "step": 3525 }, { "epoch": 5.09, "learning_rate": 3.237777777777778e-08, "loss": 0.1399, "step": 3550 }, { "epoch": 5.09, "learning_rate": 3.182222222222222e-08, "loss": 0.1248, "step": 3575 }, { "epoch": 5.1, "learning_rate": 3.126666666666666e-08, "loss": 0.1364, "step": 3600 }, { "epoch": 5.1, "learning_rate": 3.071111111111111e-08, "loss": 0.1318, "step": 3625 }, { "epoch": 5.11, "learning_rate": 3.015555555555556e-08, "loss": 0.1319, "step": 3650 }, { "epoch": 5.11, "learning_rate": 2.9599999999999997e-08, "loss": 0.1284, "step": 3675 }, { "epoch": 5.12, "learning_rate": 2.9044444444444442e-08, "loss": 0.1231, "step": 3700 }, { "epoch": 5.12, "learning_rate": 2.848888888888889e-08, "loss": 0.1305, "step": 3725 }, { "epoch": 6.0, "learning_rate": 2.7933333333333332e-08, "loss": 0.1231, "step": 3750 }, { "epoch": 6.01, "learning_rate": 2.7377777777777777e-08, "loss": 0.1236, "step": 3775 }, { "epoch": 6.01, "learning_rate": 2.682222222222222e-08, "loss": 0.1296, "step": 3800 }, { "epoch": 6.02, "learning_rate": 2.6266666666666664e-08, "loss": 0.129, "step": 3825 }, { "epoch": 6.02, "learning_rate": 2.5711111111111112e-08, "loss": 0.1317, "step": 3850 }, { "epoch": 6.03, "learning_rate": 2.5155555555555554e-08, "loss": 0.1415, "step": 3875 }, { "epoch": 6.03, "learning_rate": 2.46e-08, "loss": 0.1189, "step": 3900 }, { "epoch": 6.04, "learning_rate": 2.4044444444444444e-08, "loss": 0.1356, "step": 3925 }, { "epoch": 6.04, "learning_rate": 2.348888888888889e-08, "loss": 0.1232, "step": 3950 }, { "epoch": 6.05, "learning_rate": 2.2933333333333334e-08, "loss": 0.1315, "step": 3975 }, { "epoch": 6.05, "learning_rate": 2.2377777777777775e-08, "loss": 0.1187, "step": 4000 }, { "epoch": 6.05, "eval_cer": 7.639759839893262, "eval_loss": 0.40291938185691833, "eval_runtime": 1161.4251, "eval_samples_per_second": 1.065, "eval_steps_per_second": 0.267, "eval_wer": 15.749067778021498, "step": 4000 }, { "epoch": 6.06, "learning_rate": 2.1822222222222224e-08, "loss": 0.127, "step": 4025 }, { "epoch": 6.06, "learning_rate": 2.1266666666666665e-08, "loss": 0.1269, "step": 4050 }, { "epoch": 6.07, "learning_rate": 2.071111111111111e-08, "loss": 0.1273, "step": 4075 }, { "epoch": 6.07, "learning_rate": 2.0155555555555552e-08, "loss": 0.1253, "step": 4100 }, { "epoch": 6.08, "learning_rate": 1.96e-08, "loss": 0.1218, "step": 4125 }, { "epoch": 6.08, "learning_rate": 1.906666666666667e-08, "loss": 0.1284, "step": 4150 }, { "epoch": 6.09, "learning_rate": 1.851111111111111e-08, "loss": 0.1379, "step": 4175 }, { "epoch": 6.09, "learning_rate": 1.7955555555555555e-08, "loss": 0.1184, "step": 4200 }, { "epoch": 6.1, "learning_rate": 1.7399999999999997e-08, "loss": 0.133, "step": 4225 }, { "epoch": 6.1, "learning_rate": 1.6844444444444445e-08, "loss": 0.1334, "step": 4250 }, { "epoch": 6.11, "learning_rate": 1.6288888888888887e-08, "loss": 0.1184, "step": 4275 }, { "epoch": 6.11, "learning_rate": 1.5733333333333332e-08, "loss": 0.1254, "step": 4300 }, { "epoch": 6.12, "learning_rate": 1.5177777777777777e-08, "loss": 0.1212, "step": 4325 }, { "epoch": 6.12, "learning_rate": 1.4622222222222222e-08, "loss": 0.118, "step": 4350 }, { "epoch": 7.0, "learning_rate": 1.4066666666666665e-08, "loss": 0.124, "step": 4375 }, { "epoch": 7.01, "learning_rate": 1.3511111111111109e-08, "loss": 0.12, "step": 4400 }, { "epoch": 7.01, "learning_rate": 1.2955555555555555e-08, "loss": 0.1266, "step": 4425 }, { "epoch": 7.02, "learning_rate": 1.2399999999999999e-08, "loss": 0.1211, "step": 4450 }, { "epoch": 7.02, "learning_rate": 1.1844444444444445e-08, "loss": 0.1311, "step": 4475 }, { "epoch": 7.03, "learning_rate": 1.1288888888888889e-08, "loss": 0.1368, "step": 4500 }, { "epoch": 7.03, "eval_cer": 7.655770513675784, "eval_loss": 0.40414080023765564, "eval_runtime": 1163.7244, "eval_samples_per_second": 1.063, "eval_steps_per_second": 0.266, "eval_wer": 15.760035095415661, "step": 4500 }, { "epoch": 7.03, "learning_rate": 1.0733333333333333e-08, "loss": 0.1263, "step": 4525 }, { "epoch": 7.04, "learning_rate": 1.0177777777777777e-08, "loss": 0.1122, "step": 4550 }, { "epoch": 7.04, "learning_rate": 9.622222222222222e-09, "loss": 0.1285, "step": 4575 }, { "epoch": 7.05, "learning_rate": 9.066666666666667e-09, "loss": 0.1236, "step": 4600 }, { "epoch": 7.05, "learning_rate": 8.51111111111111e-09, "loss": 0.1237, "step": 4625 }, { "epoch": 7.06, "learning_rate": 7.955555555555555e-09, "loss": 0.1276, "step": 4650 }, { "epoch": 7.06, "learning_rate": 7.399999999999999e-09, "loss": 0.1252, "step": 4675 }, { "epoch": 7.07, "learning_rate": 6.844444444444444e-09, "loss": 0.1223, "step": 4700 }, { "epoch": 7.07, "learning_rate": 6.288888888888888e-09, "loss": 0.1232, "step": 4725 }, { "epoch": 7.08, "learning_rate": 5.733333333333333e-09, "loss": 0.1205, "step": 4750 }, { "epoch": 7.08, "learning_rate": 5.1777777777777775e-09, "loss": 0.1253, "step": 4775 }, { "epoch": 7.09, "learning_rate": 4.622222222222222e-09, "loss": 0.1359, "step": 4800 }, { "epoch": 7.09, "learning_rate": 4.066666666666666e-09, "loss": 0.1182, "step": 4825 }, { "epoch": 7.1, "learning_rate": 3.5111111111111113e-09, "loss": 0.1315, "step": 4850 }, { "epoch": 7.1, "learning_rate": 2.9555555555555554e-09, "loss": 0.1221, "step": 4875 }, { "epoch": 7.11, "learning_rate": 2.4e-09, "loss": 0.1175, "step": 4900 }, { "epoch": 7.11, "learning_rate": 1.8444444444444443e-09, "loss": 0.1226, "step": 4925 }, { "epoch": 7.12, "learning_rate": 1.288888888888889e-09, "loss": 0.1228, "step": 4950 }, { "epoch": 7.12, "learning_rate": 7.333333333333333e-10, "loss": 0.1236, "step": 4975 }, { "epoch": 8.0, "learning_rate": 1.7777777777777778e-10, "loss": 0.1231, "step": 5000 }, { "epoch": 8.0, "eval_cer": 7.6691127418278855, "eval_loss": 0.40414172410964966, "eval_runtime": 1171.5804, "eval_samples_per_second": 1.056, "eval_steps_per_second": 0.265, "eval_wer": 15.771002412809827, "step": 5000 }, { "epoch": 8.0, "step": 5000, "total_flos": 2.546786102870016e+20, "train_loss": 0.2192393777370453, "train_runtime": 54122.9791, "train_samples_per_second": 2.217, "train_steps_per_second": 0.092 } ], "max_steps": 5000, "num_train_epochs": 9223372036854775807, "total_flos": 2.546786102870016e+20, "trial_name": null, "trial_params": null }