{ "best_metric": 67.00100704934542, "best_model_checkpoint": "./whisper-small-finetune_egyptian/checkpoint-3000", "epoch": 2.8200789622109417, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 27.050291061401367, "learning_rate": 5.000000000000001e-07, "loss": 1.6686, "step": 25 }, { "epoch": 0.03, "grad_norm": 30.311138153076172, "learning_rate": 1.0000000000000002e-06, "loss": 1.5534, "step": 50 }, { "epoch": 0.04, "grad_norm": 28.575551986694336, "learning_rate": 1.5e-06, "loss": 1.5919, "step": 75 }, { "epoch": 0.06, "grad_norm": 33.246788024902344, "learning_rate": 2.0000000000000003e-06, "loss": 1.6118, "step": 100 }, { "epoch": 0.07, "grad_norm": 22.33919334411621, "learning_rate": 2.5e-06, "loss": 1.3478, "step": 125 }, { "epoch": 0.08, "grad_norm": 23.980566024780273, "learning_rate": 3e-06, "loss": 1.4199, "step": 150 }, { "epoch": 0.1, "grad_norm": 24.947845458984375, "learning_rate": 3.5e-06, "loss": 1.3642, "step": 175 }, { "epoch": 0.11, "grad_norm": 19.380922317504883, "learning_rate": 4.000000000000001e-06, "loss": 1.3299, "step": 200 }, { "epoch": 0.13, "grad_norm": 23.713611602783203, "learning_rate": 4.5e-06, "loss": 1.4656, "step": 225 }, { "epoch": 0.14, "grad_norm": 24.127946853637695, "learning_rate": 5e-06, "loss": 1.3392, "step": 250 }, { "epoch": 0.16, "grad_norm": 19.620635986328125, "learning_rate": 5.500000000000001e-06, "loss": 1.2386, "step": 275 }, { "epoch": 0.17, "grad_norm": 30.834217071533203, "learning_rate": 6e-06, "loss": 1.2566, "step": 300 }, { "epoch": 0.18, "grad_norm": 27.610654830932617, "learning_rate": 6.5000000000000004e-06, "loss": 1.3781, "step": 325 }, { "epoch": 0.2, "grad_norm": 22.86842155456543, "learning_rate": 7e-06, "loss": 1.2213, "step": 350 }, { "epoch": 0.21, "grad_norm": 17.04463768005371, "learning_rate": 7.500000000000001e-06, "loss": 1.1576, "step": 375 }, { "epoch": 0.23, "grad_norm": 21.666824340820312, "learning_rate": 8.000000000000001e-06, "loss": 1.234, "step": 400 }, { "epoch": 0.24, "grad_norm": 22.232830047607422, "learning_rate": 8.5e-06, "loss": 1.1243, "step": 425 }, { "epoch": 0.25, "grad_norm": 22.612762451171875, "learning_rate": 9e-06, "loss": 1.1452, "step": 450 }, { "epoch": 0.27, "grad_norm": 15.275314331054688, "learning_rate": 9.5e-06, "loss": 1.1841, "step": 475 }, { "epoch": 0.28, "grad_norm": 15.876676559448242, "learning_rate": 1e-05, "loss": 1.2719, "step": 500 }, { "epoch": 0.3, "grad_norm": 21.2716007232666, "learning_rate": 9.944444444444445e-06, "loss": 1.2149, "step": 525 }, { "epoch": 0.31, "grad_norm": 22.795671463012695, "learning_rate": 9.88888888888889e-06, "loss": 1.2219, "step": 550 }, { "epoch": 0.32, "grad_norm": 19.169240951538086, "learning_rate": 9.833333333333333e-06, "loss": 1.2376, "step": 575 }, { "epoch": 0.34, "grad_norm": 15.571391105651855, "learning_rate": 9.777777777777779e-06, "loss": 1.1932, "step": 600 }, { "epoch": 0.35, "grad_norm": 16.36895751953125, "learning_rate": 9.722222222222223e-06, "loss": 1.2936, "step": 625 }, { "epoch": 0.37, "grad_norm": 21.461872100830078, "learning_rate": 9.666666666666667e-06, "loss": 1.2132, "step": 650 }, { "epoch": 0.38, "grad_norm": 19.754638671875, "learning_rate": 9.611111111111112e-06, "loss": 1.182, "step": 675 }, { "epoch": 0.39, "grad_norm": 15.755151748657227, "learning_rate": 9.555555555555556e-06, "loss": 1.1941, "step": 700 }, { "epoch": 0.41, "grad_norm": 22.760698318481445, "learning_rate": 9.5e-06, "loss": 1.2489, "step": 725 }, { "epoch": 0.42, "grad_norm": 18.82946014404297, "learning_rate": 9.444444444444445e-06, "loss": 1.1129, "step": 750 }, { "epoch": 0.44, "grad_norm": 18.776079177856445, "learning_rate": 9.38888888888889e-06, "loss": 1.1889, "step": 775 }, { "epoch": 0.45, "grad_norm": 19.27667999267578, "learning_rate": 9.333333333333334e-06, "loss": 1.1342, "step": 800 }, { "epoch": 0.47, "grad_norm": 17.593551635742188, "learning_rate": 9.277777777777778e-06, "loss": 1.2009, "step": 825 }, { "epoch": 0.48, "grad_norm": 19.147933959960938, "learning_rate": 9.222222222222224e-06, "loss": 1.1845, "step": 850 }, { "epoch": 0.49, "grad_norm": 22.792325973510742, "learning_rate": 9.166666666666666e-06, "loss": 1.2317, "step": 875 }, { "epoch": 0.51, "grad_norm": 18.69807243347168, "learning_rate": 9.111111111111112e-06, "loss": 1.0044, "step": 900 }, { "epoch": 0.52, "grad_norm": 28.471355438232422, "learning_rate": 9.055555555555556e-06, "loss": 1.3067, "step": 925 }, { "epoch": 0.54, "grad_norm": 18.05466079711914, "learning_rate": 9e-06, "loss": 1.1757, "step": 950 }, { "epoch": 0.55, "grad_norm": 21.076562881469727, "learning_rate": 8.944444444444446e-06, "loss": 1.1317, "step": 975 }, { "epoch": 0.56, "grad_norm": 19.519039154052734, "learning_rate": 8.888888888888888e-06, "loss": 1.1256, "step": 1000 }, { "epoch": 0.56, "eval_cer": 42.770119634502464, "eval_loss": 1.1109758615493774, "eval_runtime": 907.6046, "eval_samples_per_second": 3.907, "eval_steps_per_second": 0.489, "eval_wer": 71.8147029204431, "step": 1000 }, { "epoch": 0.58, "grad_norm": 15.736879348754883, "learning_rate": 8.833333333333334e-06, "loss": 1.0697, "step": 1025 }, { "epoch": 0.59, "grad_norm": 17.32091522216797, "learning_rate": 8.777777777777778e-06, "loss": 1.0977, "step": 1050 }, { "epoch": 0.61, "grad_norm": 19.34945297241211, "learning_rate": 8.722222222222224e-06, "loss": 1.1967, "step": 1075 }, { "epoch": 0.62, "grad_norm": 20.485715866088867, "learning_rate": 8.666666666666668e-06, "loss": 1.1102, "step": 1100 }, { "epoch": 0.63, "grad_norm": 23.4635009765625, "learning_rate": 8.611111111111112e-06, "loss": 1.1413, "step": 1125 }, { "epoch": 0.65, "grad_norm": 19.843730926513672, "learning_rate": 8.555555555555556e-06, "loss": 1.157, "step": 1150 }, { "epoch": 0.66, "grad_norm": 21.161502838134766, "learning_rate": 8.5e-06, "loss": 1.0979, "step": 1175 }, { "epoch": 0.68, "grad_norm": 20.150407791137695, "learning_rate": 8.444444444444446e-06, "loss": 1.2714, "step": 1200 }, { "epoch": 0.69, "grad_norm": 17.049306869506836, "learning_rate": 8.38888888888889e-06, "loss": 1.0982, "step": 1225 }, { "epoch": 0.71, "grad_norm": 19.597333908081055, "learning_rate": 8.333333333333334e-06, "loss": 1.0803, "step": 1250 }, { "epoch": 0.72, "grad_norm": 17.46756935119629, "learning_rate": 8.277777777777778e-06, "loss": 1.0542, "step": 1275 }, { "epoch": 0.73, "grad_norm": 16.63570213317871, "learning_rate": 8.222222222222222e-06, "loss": 1.0432, "step": 1300 }, { "epoch": 0.75, "grad_norm": 16.54294204711914, "learning_rate": 8.166666666666668e-06, "loss": 0.9944, "step": 1325 }, { "epoch": 0.76, "grad_norm": 10.231407165527344, "learning_rate": 8.111111111111112e-06, "loss": 1.1438, "step": 1350 }, { "epoch": 0.78, "grad_norm": 17.762981414794922, "learning_rate": 8.055555555555557e-06, "loss": 1.1539, "step": 1375 }, { "epoch": 0.79, "grad_norm": 13.66717529296875, "learning_rate": 8.000000000000001e-06, "loss": 1.0529, "step": 1400 }, { "epoch": 0.8, "grad_norm": 17.681964874267578, "learning_rate": 7.944444444444445e-06, "loss": 1.158, "step": 1425 }, { "epoch": 0.82, "grad_norm": 17.317996978759766, "learning_rate": 7.88888888888889e-06, "loss": 1.0955, "step": 1450 }, { "epoch": 0.83, "grad_norm": 15.947030067443848, "learning_rate": 7.833333333333333e-06, "loss": 1.1302, "step": 1475 }, { "epoch": 0.85, "grad_norm": 17.65252113342285, "learning_rate": 7.77777777777778e-06, "loss": 1.0589, "step": 1500 }, { "epoch": 0.86, "grad_norm": 17.65298843383789, "learning_rate": 7.722222222222223e-06, "loss": 1.1063, "step": 1525 }, { "epoch": 0.87, "grad_norm": 14.979933738708496, "learning_rate": 7.666666666666667e-06, "loss": 1.1141, "step": 1550 }, { "epoch": 0.89, "grad_norm": 16.299774169921875, "learning_rate": 7.611111111111111e-06, "loss": 1.0243, "step": 1575 }, { "epoch": 0.9, "grad_norm": 23.286108016967773, "learning_rate": 7.555555555555556e-06, "loss": 1.0325, "step": 1600 }, { "epoch": 0.92, "grad_norm": 20.537403106689453, "learning_rate": 7.500000000000001e-06, "loss": 1.0279, "step": 1625 }, { "epoch": 0.93, "grad_norm": 19.857568740844727, "learning_rate": 7.444444444444445e-06, "loss": 1.0283, "step": 1650 }, { "epoch": 0.94, "grad_norm": 18.773427963256836, "learning_rate": 7.38888888888889e-06, "loss": 1.1405, "step": 1675 }, { "epoch": 0.96, "grad_norm": 17.24260139465332, "learning_rate": 7.333333333333333e-06, "loss": 1.0614, "step": 1700 }, { "epoch": 0.97, "grad_norm": 21.773202896118164, "learning_rate": 7.277777777777778e-06, "loss": 1.0629, "step": 1725 }, { "epoch": 0.99, "grad_norm": 19.747053146362305, "learning_rate": 7.222222222222223e-06, "loss": 1.0446, "step": 1750 }, { "epoch": 1.0, "grad_norm": 11.02785587310791, "learning_rate": 7.166666666666667e-06, "loss": 1.0765, "step": 1775 }, { "epoch": 1.02, "grad_norm": 11.050880432128906, "learning_rate": 7.111111111111112e-06, "loss": 0.9379, "step": 1800 }, { "epoch": 1.03, "grad_norm": 16.35251808166504, "learning_rate": 7.055555555555557e-06, "loss": 0.9255, "step": 1825 }, { "epoch": 1.04, "grad_norm": 14.105949401855469, "learning_rate": 7e-06, "loss": 0.7552, "step": 1850 }, { "epoch": 1.06, "grad_norm": 12.735761642456055, "learning_rate": 6.944444444444445e-06, "loss": 0.8187, "step": 1875 }, { "epoch": 1.07, "grad_norm": 15.418607711791992, "learning_rate": 6.88888888888889e-06, "loss": 0.8395, "step": 1900 }, { "epoch": 1.09, "grad_norm": 14.72042179107666, "learning_rate": 6.833333333333334e-06, "loss": 0.8058, "step": 1925 }, { "epoch": 1.1, "grad_norm": 14.438790321350098, "learning_rate": 6.777777777777779e-06, "loss": 0.7582, "step": 1950 }, { "epoch": 1.11, "grad_norm": 17.728883743286133, "learning_rate": 6.7222222222222235e-06, "loss": 0.7984, "step": 1975 }, { "epoch": 1.13, "grad_norm": 19.346345901489258, "learning_rate": 6.666666666666667e-06, "loss": 0.7331, "step": 2000 }, { "epoch": 1.13, "eval_cer": 51.86124281721983, "eval_loss": 1.032954454421997, "eval_runtime": 959.1561, "eval_samples_per_second": 3.697, "eval_steps_per_second": 0.463, "eval_wer": 82.26384692849949, "step": 2000 }, { "epoch": 1.14, "grad_norm": 14.71066665649414, "learning_rate": 6.6111111111111115e-06, "loss": 0.7684, "step": 2025 }, { "epoch": 1.16, "grad_norm": 13.412025451660156, "learning_rate": 6.555555555555556e-06, "loss": 0.7742, "step": 2050 }, { "epoch": 1.17, "grad_norm": 17.332744598388672, "learning_rate": 6.5000000000000004e-06, "loss": 0.8515, "step": 2075 }, { "epoch": 1.18, "grad_norm": 14.944128036499023, "learning_rate": 6.444444444444445e-06, "loss": 0.8012, "step": 2100 }, { "epoch": 1.2, "grad_norm": 18.193222045898438, "learning_rate": 6.3888888888888885e-06, "loss": 0.8217, "step": 2125 }, { "epoch": 1.21, "grad_norm": 18.6146240234375, "learning_rate": 6.333333333333333e-06, "loss": 0.8732, "step": 2150 }, { "epoch": 1.23, "grad_norm": 17.37205696105957, "learning_rate": 6.277777777777778e-06, "loss": 0.8824, "step": 2175 }, { "epoch": 1.24, "grad_norm": 18.284181594848633, "learning_rate": 6.222222222222223e-06, "loss": 0.7562, "step": 2200 }, { "epoch": 1.25, "grad_norm": 13.632092475891113, "learning_rate": 6.166666666666667e-06, "loss": 0.7502, "step": 2225 }, { "epoch": 1.27, "grad_norm": 16.052021026611328, "learning_rate": 6.111111111111112e-06, "loss": 0.8167, "step": 2250 }, { "epoch": 1.28, "grad_norm": 17.9954833984375, "learning_rate": 6.055555555555555e-06, "loss": 0.7396, "step": 2275 }, { "epoch": 1.3, "grad_norm": 19.523056030273438, "learning_rate": 6e-06, "loss": 0.6644, "step": 2300 }, { "epoch": 1.31, "grad_norm": 15.319243431091309, "learning_rate": 5.944444444444445e-06, "loss": 0.7699, "step": 2325 }, { "epoch": 1.33, "grad_norm": 13.561873435974121, "learning_rate": 5.88888888888889e-06, "loss": 0.7416, "step": 2350 }, { "epoch": 1.34, "grad_norm": 13.100536346435547, "learning_rate": 5.833333333333334e-06, "loss": 0.8497, "step": 2375 }, { "epoch": 1.35, "grad_norm": 15.060547828674316, "learning_rate": 5.777777777777778e-06, "loss": 0.772, "step": 2400 }, { "epoch": 1.37, "grad_norm": 15.338216781616211, "learning_rate": 5.722222222222222e-06, "loss": 0.7752, "step": 2425 }, { "epoch": 1.38, "grad_norm": 12.978151321411133, "learning_rate": 5.666666666666667e-06, "loss": 0.8158, "step": 2450 }, { "epoch": 1.4, "grad_norm": 16.481496810913086, "learning_rate": 5.611111111111112e-06, "loss": 0.7345, "step": 2475 }, { "epoch": 1.41, "grad_norm": 15.723489761352539, "learning_rate": 5.555555555555557e-06, "loss": 0.8427, "step": 2500 }, { "epoch": 1.42, "grad_norm": 15.024873733520508, "learning_rate": 5.500000000000001e-06, "loss": 0.6852, "step": 2525 }, { "epoch": 1.44, "grad_norm": 14.445104598999023, "learning_rate": 5.444444444444445e-06, "loss": 0.8407, "step": 2550 }, { "epoch": 1.45, "grad_norm": 18.344518661499023, "learning_rate": 5.388888888888889e-06, "loss": 0.7858, "step": 2575 }, { "epoch": 1.47, "grad_norm": 15.053083419799805, "learning_rate": 5.333333333333334e-06, "loss": 0.8178, "step": 2600 }, { "epoch": 1.48, "grad_norm": 15.452193260192871, "learning_rate": 5.2777777777777785e-06, "loss": 0.6985, "step": 2625 }, { "epoch": 1.49, "grad_norm": 20.083093643188477, "learning_rate": 5.2222222222222226e-06, "loss": 0.7759, "step": 2650 }, { "epoch": 1.51, "grad_norm": 17.425199508666992, "learning_rate": 5.1666666666666675e-06, "loss": 0.7553, "step": 2675 }, { "epoch": 1.52, "grad_norm": 11.294227600097656, "learning_rate": 5.1111111111111115e-06, "loss": 0.7384, "step": 2700 }, { "epoch": 1.54, "grad_norm": 11.263272285461426, "learning_rate": 5.0555555555555555e-06, "loss": 0.7835, "step": 2725 }, { "epoch": 1.55, "grad_norm": 11.892974853515625, "learning_rate": 5e-06, "loss": 0.7622, "step": 2750 }, { "epoch": 1.57, "grad_norm": 11.682631492614746, "learning_rate": 4.944444444444445e-06, "loss": 0.6707, "step": 2775 }, { "epoch": 1.58, "grad_norm": 13.932768821716309, "learning_rate": 4.888888888888889e-06, "loss": 0.7527, "step": 2800 }, { "epoch": 1.59, "grad_norm": 14.168807983398438, "learning_rate": 4.833333333333333e-06, "loss": 0.7444, "step": 2825 }, { "epoch": 1.61, "grad_norm": 8.4957857131958, "learning_rate": 4.777777777777778e-06, "loss": 0.8083, "step": 2850 }, { "epoch": 1.62, "grad_norm": 17.012508392333984, "learning_rate": 4.722222222222222e-06, "loss": 0.8356, "step": 2875 }, { "epoch": 1.64, "grad_norm": 14.761796951293945, "learning_rate": 4.666666666666667e-06, "loss": 0.8432, "step": 2900 }, { "epoch": 1.65, "grad_norm": 16.764440536499023, "learning_rate": 4.611111111111112e-06, "loss": 0.8869, "step": 2925 }, { "epoch": 1.66, "grad_norm": 12.401867866516113, "learning_rate": 4.555555555555556e-06, "loss": 0.7753, "step": 2950 }, { "epoch": 1.68, "grad_norm": 10.731365203857422, "learning_rate": 4.5e-06, "loss": 0.7484, "step": 2975 }, { "epoch": 1.69, "grad_norm": 12.973823547363281, "learning_rate": 4.444444444444444e-06, "loss": 0.7332, "step": 3000 }, { "epoch": 1.69, "eval_cer": 39.18814959022828, "eval_loss": 0.994914174079895, "eval_runtime": 889.7059, "eval_samples_per_second": 3.986, "eval_steps_per_second": 0.499, "eval_wer": 67.00100704934542, "step": 3000 }, { "epoch": 1.71, "grad_norm": 12.647448539733887, "learning_rate": 4.388888888888889e-06, "loss": 0.7481, "step": 3025 }, { "epoch": 1.72, "grad_norm": 16.329065322875977, "learning_rate": 4.333333333333334e-06, "loss": 0.7734, "step": 3050 }, { "epoch": 1.73, "grad_norm": 13.567760467529297, "learning_rate": 4.277777777777778e-06, "loss": 0.7122, "step": 3075 }, { "epoch": 1.75, "grad_norm": 12.22362232208252, "learning_rate": 4.222222222222223e-06, "loss": 0.7515, "step": 3100 }, { "epoch": 1.76, "grad_norm": 18.0850772857666, "learning_rate": 4.166666666666667e-06, "loss": 0.6836, "step": 3125 }, { "epoch": 1.78, "grad_norm": 24.46388816833496, "learning_rate": 4.111111111111111e-06, "loss": 0.8578, "step": 3150 }, { "epoch": 1.79, "grad_norm": 15.071797370910645, "learning_rate": 4.055555555555556e-06, "loss": 0.7447, "step": 3175 }, { "epoch": 1.8, "grad_norm": 12.878135681152344, "learning_rate": 4.000000000000001e-06, "loss": 0.7035, "step": 3200 }, { "epoch": 1.82, "grad_norm": 14.950927734375, "learning_rate": 3.944444444444445e-06, "loss": 0.7605, "step": 3225 }, { "epoch": 1.83, "grad_norm": 15.3999605178833, "learning_rate": 3.88888888888889e-06, "loss": 0.7413, "step": 3250 }, { "epoch": 1.85, "grad_norm": 14.097206115722656, "learning_rate": 3.833333333333334e-06, "loss": 0.7473, "step": 3275 }, { "epoch": 1.86, "grad_norm": 15.387040138244629, "learning_rate": 3.777777777777778e-06, "loss": 0.716, "step": 3300 }, { "epoch": 1.88, "grad_norm": 20.502208709716797, "learning_rate": 3.7222222222222225e-06, "loss": 0.7577, "step": 3325 }, { "epoch": 1.89, "grad_norm": 16.519433975219727, "learning_rate": 3.6666666666666666e-06, "loss": 0.6927, "step": 3350 }, { "epoch": 1.9, "grad_norm": 15.588461875915527, "learning_rate": 3.6111111111111115e-06, "loss": 0.7123, "step": 3375 }, { "epoch": 1.92, "grad_norm": 12.752546310424805, "learning_rate": 3.555555555555556e-06, "loss": 0.8407, "step": 3400 }, { "epoch": 1.93, "grad_norm": 15.805379867553711, "learning_rate": 3.5e-06, "loss": 0.751, "step": 3425 }, { "epoch": 1.95, "grad_norm": 13.27651596069336, "learning_rate": 3.444444444444445e-06, "loss": 0.8211, "step": 3450 }, { "epoch": 1.96, "grad_norm": 11.968923568725586, "learning_rate": 3.3888888888888893e-06, "loss": 0.6975, "step": 3475 }, { "epoch": 1.97, "grad_norm": 13.906292915344238, "learning_rate": 3.3333333333333333e-06, "loss": 0.8429, "step": 3500 }, { "epoch": 1.99, "grad_norm": 13.810843467712402, "learning_rate": 3.277777777777778e-06, "loss": 0.7043, "step": 3525 }, { "epoch": 2.0, "grad_norm": 14.037764549255371, "learning_rate": 3.2222222222222227e-06, "loss": 0.6782, "step": 3550 }, { "epoch": 2.02, "grad_norm": 8.811666488647461, "learning_rate": 3.1666666666666667e-06, "loss": 0.5169, "step": 3575 }, { "epoch": 2.03, "grad_norm": 12.727725982666016, "learning_rate": 3.1111111111111116e-06, "loss": 0.5087, "step": 3600 }, { "epoch": 2.04, "grad_norm": 13.598219871520996, "learning_rate": 3.055555555555556e-06, "loss": 0.6104, "step": 3625 }, { "epoch": 2.06, "grad_norm": 12.1933012008667, "learning_rate": 3e-06, "loss": 0.5486, "step": 3650 }, { "epoch": 2.07, "grad_norm": 7.250199317932129, "learning_rate": 2.944444444444445e-06, "loss": 0.473, "step": 3675 }, { "epoch": 2.09, "grad_norm": 11.98234748840332, "learning_rate": 2.888888888888889e-06, "loss": 0.5483, "step": 3700 }, { "epoch": 2.1, "grad_norm": 15.183174133300781, "learning_rate": 2.8333333333333335e-06, "loss": 0.5781, "step": 3725 }, { "epoch": 2.12, "grad_norm": 14.038616180419922, "learning_rate": 2.7777777777777783e-06, "loss": 0.5383, "step": 3750 }, { "epoch": 2.13, "grad_norm": 11.775838851928711, "learning_rate": 2.7222222222222224e-06, "loss": 0.6216, "step": 3775 }, { "epoch": 2.14, "grad_norm": 11.929536819458008, "learning_rate": 2.666666666666667e-06, "loss": 0.4929, "step": 3800 }, { "epoch": 2.16, "grad_norm": 13.312314987182617, "learning_rate": 2.6111111111111113e-06, "loss": 0.4658, "step": 3825 }, { "epoch": 2.17, "grad_norm": 15.442246437072754, "learning_rate": 2.5555555555555557e-06, "loss": 0.498, "step": 3850 }, { "epoch": 2.19, "grad_norm": 10.368426322937012, "learning_rate": 2.5e-06, "loss": 0.5588, "step": 3875 }, { "epoch": 2.2, "grad_norm": 9.702683448791504, "learning_rate": 2.4444444444444447e-06, "loss": 0.5221, "step": 3900 }, { "epoch": 2.21, "grad_norm": 12.07742977142334, "learning_rate": 2.388888888888889e-06, "loss": 0.5033, "step": 3925 }, { "epoch": 2.23, "grad_norm": 15.718260765075684, "learning_rate": 2.3333333333333336e-06, "loss": 0.5913, "step": 3950 }, { "epoch": 2.24, "grad_norm": 10.115168571472168, "learning_rate": 2.277777777777778e-06, "loss": 0.5117, "step": 3975 }, { "epoch": 2.26, "grad_norm": 9.728811264038086, "learning_rate": 2.222222222222222e-06, "loss": 0.5493, "step": 4000 }, { "epoch": 2.26, "eval_cer": 41.269978333909, "eval_loss": 1.0036817789077759, "eval_runtime": 911.3291, "eval_samples_per_second": 3.891, "eval_steps_per_second": 0.487, "eval_wer": 68.10473313192347, "step": 4000 }, { "epoch": 2.27, "grad_norm": 12.741899490356445, "learning_rate": 2.166666666666667e-06, "loss": 0.4817, "step": 4025 }, { "epoch": 2.28, "grad_norm": 14.738693237304688, "learning_rate": 2.1111111111111114e-06, "loss": 0.5172, "step": 4050 }, { "epoch": 2.3, "grad_norm": 10.202011108398438, "learning_rate": 2.0555555555555555e-06, "loss": 0.4856, "step": 4075 }, { "epoch": 2.31, "grad_norm": 11.9408597946167, "learning_rate": 2.0000000000000003e-06, "loss": 0.527, "step": 4100 }, { "epoch": 2.33, "grad_norm": 8.329051971435547, "learning_rate": 1.944444444444445e-06, "loss": 0.432, "step": 4125 }, { "epoch": 2.34, "grad_norm": 12.512899398803711, "learning_rate": 1.888888888888889e-06, "loss": 0.4836, "step": 4150 }, { "epoch": 2.35, "grad_norm": 8.79201889038086, "learning_rate": 1.8333333333333333e-06, "loss": 0.52, "step": 4175 }, { "epoch": 2.37, "grad_norm": 15.417551040649414, "learning_rate": 1.777777777777778e-06, "loss": 0.5239, "step": 4200 }, { "epoch": 2.38, "grad_norm": 10.516593933105469, "learning_rate": 1.7222222222222224e-06, "loss": 0.5497, "step": 4225 }, { "epoch": 2.4, "grad_norm": 11.659682273864746, "learning_rate": 1.6666666666666667e-06, "loss": 0.4883, "step": 4250 }, { "epoch": 2.41, "grad_norm": 15.743680000305176, "learning_rate": 1.6111111111111113e-06, "loss": 0.5215, "step": 4275 }, { "epoch": 2.43, "grad_norm": 15.303753852844238, "learning_rate": 1.5555555555555558e-06, "loss": 0.5714, "step": 4300 }, { "epoch": 2.44, "grad_norm": 13.083234786987305, "learning_rate": 1.5e-06, "loss": 0.479, "step": 4325 }, { "epoch": 2.45, "grad_norm": 7.123412132263184, "learning_rate": 1.4444444444444445e-06, "loss": 0.4791, "step": 4350 }, { "epoch": 2.47, "grad_norm": 14.159947395324707, "learning_rate": 1.3888888888888892e-06, "loss": 0.6012, "step": 4375 }, { "epoch": 2.48, "grad_norm": 16.015972137451172, "learning_rate": 1.3333333333333334e-06, "loss": 0.4877, "step": 4400 }, { "epoch": 2.5, "grad_norm": 14.357626914978027, "learning_rate": 1.2777777777777779e-06, "loss": 0.5159, "step": 4425 }, { "epoch": 2.51, "grad_norm": 7.9363274574279785, "learning_rate": 1.2222222222222223e-06, "loss": 0.523, "step": 4450 }, { "epoch": 2.52, "grad_norm": 15.333666801452637, "learning_rate": 1.1666666666666668e-06, "loss": 0.5083, "step": 4475 }, { "epoch": 2.54, "grad_norm": 19.310766220092773, "learning_rate": 1.111111111111111e-06, "loss": 0.5721, "step": 4500 }, { "epoch": 2.55, "grad_norm": 16.54596710205078, "learning_rate": 1.0555555555555557e-06, "loss": 0.6224, "step": 4525 }, { "epoch": 2.57, "grad_norm": 10.166070938110352, "learning_rate": 1.0000000000000002e-06, "loss": 0.5366, "step": 4550 }, { "epoch": 2.58, "grad_norm": 12.599578857421875, "learning_rate": 9.444444444444445e-07, "loss": 0.4959, "step": 4575 }, { "epoch": 2.59, "grad_norm": 7.448824405670166, "learning_rate": 8.88888888888889e-07, "loss": 0.4986, "step": 4600 }, { "epoch": 2.61, "grad_norm": 11.634490013122559, "learning_rate": 8.333333333333333e-07, "loss": 0.4859, "step": 4625 }, { "epoch": 2.62, "grad_norm": 10.882704734802246, "learning_rate": 7.777777777777779e-07, "loss": 0.5328, "step": 4650 }, { "epoch": 2.64, "grad_norm": 9.251214027404785, "learning_rate": 7.222222222222222e-07, "loss": 0.5354, "step": 4675 }, { "epoch": 2.65, "grad_norm": 15.835794448852539, "learning_rate": 6.666666666666667e-07, "loss": 0.5906, "step": 4700 }, { "epoch": 2.66, "grad_norm": 11.154616355895996, "learning_rate": 6.111111111111112e-07, "loss": 0.5786, "step": 4725 }, { "epoch": 2.68, "grad_norm": 13.227851867675781, "learning_rate": 5.555555555555555e-07, "loss": 0.4945, "step": 4750 }, { "epoch": 2.69, "grad_norm": 13.796296119689941, "learning_rate": 5.000000000000001e-07, "loss": 0.5045, "step": 4775 }, { "epoch": 2.71, "grad_norm": 13.530144691467285, "learning_rate": 4.444444444444445e-07, "loss": 0.5801, "step": 4800 }, { "epoch": 2.72, "grad_norm": 7.212181091308594, "learning_rate": 3.8888888888888895e-07, "loss": 0.4827, "step": 4825 }, { "epoch": 2.74, "grad_norm": 15.471014022827148, "learning_rate": 3.3333333333333335e-07, "loss": 0.5754, "step": 4850 }, { "epoch": 2.75, "grad_norm": 14.308059692382812, "learning_rate": 2.7777777777777776e-07, "loss": 0.5391, "step": 4875 }, { "epoch": 2.76, "grad_norm": 10.918014526367188, "learning_rate": 2.2222222222222224e-07, "loss": 0.6082, "step": 4900 }, { "epoch": 2.78, "grad_norm": 7.789730072021484, "learning_rate": 1.6666666666666668e-07, "loss": 0.4707, "step": 4925 }, { "epoch": 2.79, "grad_norm": 12.3082275390625, "learning_rate": 1.1111111111111112e-07, "loss": 0.552, "step": 4950 }, { "epoch": 2.81, "grad_norm": 15.524524688720703, "learning_rate": 5.555555555555556e-08, "loss": 0.4998, "step": 4975 }, { "epoch": 2.82, "grad_norm": 11.517424583435059, "learning_rate": 0.0, "loss": 0.4667, "step": 5000 }, { "epoch": 2.82, "eval_cer": 42.82114484880836, "eval_loss": 0.9976443648338318, "eval_runtime": 928.1561, "eval_samples_per_second": 3.82, "eval_steps_per_second": 0.478, "eval_wer": 70.0221550855992, "step": 5000 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 1.154110732959744e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }