|
{ |
|
"best_metric": 67.00100704934542, |
|
"best_model_checkpoint": "./whisper-small-finetune_egyptian/checkpoint-3000", |
|
"epoch": 2.8200789622109417, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 27.050291061401367, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.6686, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 30.311138153076172, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.5534, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 28.575551986694336, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.5919, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 33.246788024902344, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.6118, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 22.33919334411621, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.3478, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 23.980566024780273, |
|
"learning_rate": 3e-06, |
|
"loss": 1.4199, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 24.947845458984375, |
|
"learning_rate": 3.5e-06, |
|
"loss": 1.3642, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 19.380922317504883, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.3299, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 23.713611602783203, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.4656, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 24.127946853637695, |
|
"learning_rate": 5e-06, |
|
"loss": 1.3392, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 19.620635986328125, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 1.2386, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 30.834217071533203, |
|
"learning_rate": 6e-06, |
|
"loss": 1.2566, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 27.610654830932617, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.3781, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 22.86842155456543, |
|
"learning_rate": 7e-06, |
|
"loss": 1.2213, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 17.04463768005371, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.1576, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 21.666824340820312, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.234, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 22.232830047607422, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.1243, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 22.612762451171875, |
|
"learning_rate": 9e-06, |
|
"loss": 1.1452, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 15.275314331054688, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.1841, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 15.876676559448242, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2719, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 21.2716007232666, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 1.2149, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 22.795671463012695, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 1.2219, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 19.169240951538086, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 1.2376, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 15.571391105651855, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 1.1932, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 16.36895751953125, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 1.2936, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 21.461872100830078, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 1.2132, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 19.754638671875, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 1.182, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 15.755151748657227, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 1.1941, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 22.760698318481445, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.2489, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 18.82946014404297, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 1.1129, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 18.776079177856445, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 1.1889, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 19.27667999267578, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 1.1342, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 17.593551635742188, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 1.2009, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 19.147933959960938, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 1.1845, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 22.792325973510742, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 1.2317, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 18.69807243347168, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 1.0044, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 28.471355438232422, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 1.3067, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 18.05466079711914, |
|
"learning_rate": 9e-06, |
|
"loss": 1.1757, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 21.076562881469727, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 1.1317, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 19.519039154052734, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.1256, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_cer": 42.770119634502464, |
|
"eval_loss": 1.1109758615493774, |
|
"eval_runtime": 907.6046, |
|
"eval_samples_per_second": 3.907, |
|
"eval_steps_per_second": 0.489, |
|
"eval_wer": 71.8147029204431, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 15.736879348754883, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 1.0697, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 17.32091522216797, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 1.0977, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 19.34945297241211, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 1.1967, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 20.485715866088867, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 1.1102, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 23.4635009765625, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 1.1413, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 19.843730926513672, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 1.157, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 21.161502838134766, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.0979, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 20.150407791137695, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 1.2714, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 17.049306869506836, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 1.0982, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 19.597333908081055, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.0803, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 17.46756935119629, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 1.0542, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 16.63570213317871, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 1.0432, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 16.54294204711914, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 0.9944, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 10.231407165527344, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 1.1438, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 17.762981414794922, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 1.1539, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 13.66717529296875, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.0529, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 17.681964874267578, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 1.158, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 17.317996978759766, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 1.0955, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 15.947030067443848, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 1.1302, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 17.65252113342285, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 1.0589, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 17.65298843383789, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 1.1063, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 14.979933738708496, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 1.1141, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 16.299774169921875, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 1.0243, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 23.286108016967773, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 1.0325, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 20.537403106689453, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.0279, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 19.857568740844727, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 1.0283, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 18.773427963256836, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 1.1405, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 17.24260139465332, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 1.0614, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 21.773202896118164, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 1.0629, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 19.747053146362305, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 1.0446, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 11.02785587310791, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 1.0765, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 11.050880432128906, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.9379, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 16.35251808166504, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 0.9255, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 14.105949401855469, |
|
"learning_rate": 7e-06, |
|
"loss": 0.7552, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 12.735761642456055, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.8187, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 15.418607711791992, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.8395, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 14.72042179107666, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 0.8058, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 14.438790321350098, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 0.7582, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 17.728883743286133, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 0.7984, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 19.346345901489258, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.7331, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_cer": 51.86124281721983, |
|
"eval_loss": 1.032954454421997, |
|
"eval_runtime": 959.1561, |
|
"eval_samples_per_second": 3.697, |
|
"eval_steps_per_second": 0.463, |
|
"eval_wer": 82.26384692849949, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 14.71066665649414, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 0.7684, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 13.412025451660156, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 0.7742, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 17.332744598388672, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.8515, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 14.944128036499023, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.8012, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 18.193222045898438, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.8217, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 18.6146240234375, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.8732, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 17.37205696105957, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 0.8824, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 18.284181594848633, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.7562, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 13.632092475891113, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.7502, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 16.052021026611328, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.8167, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 17.9954833984375, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 0.7396, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 19.523056030273438, |
|
"learning_rate": 6e-06, |
|
"loss": 0.6644, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 15.319243431091309, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 0.7699, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 13.561873435974121, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 0.7416, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 13.100536346435547, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.8497, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 15.060547828674316, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.772, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 15.338216781616211, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 0.7752, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 12.978151321411133, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.8158, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 16.481496810913086, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 0.7345, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 15.723489761352539, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.8427, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 15.024873733520508, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.6852, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 14.445104598999023, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 0.8407, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 18.344518661499023, |
|
"learning_rate": 5.388888888888889e-06, |
|
"loss": 0.7858, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 15.053083419799805, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.8178, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 15.452193260192871, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.6985, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 20.083093643188477, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.7759, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 17.425199508666992, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.7553, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 11.294227600097656, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.7384, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 11.263272285461426, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.7835, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 11.892974853515625, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7622, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 11.682631492614746, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.6707, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 13.932768821716309, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.7527, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 14.168807983398438, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.7444, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 8.4957857131958, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.8083, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 17.012508392333984, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.8356, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 14.761796951293945, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.8432, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 16.764440536499023, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 0.8869, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 12.401867866516113, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.7753, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 10.731365203857422, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.7484, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 12.973823547363281, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.7332, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_cer": 39.18814959022828, |
|
"eval_loss": 0.994914174079895, |
|
"eval_runtime": 889.7059, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 0.499, |
|
"eval_wer": 67.00100704934542, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 12.647448539733887, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.7481, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 16.329065322875977, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.7734, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 13.567760467529297, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 0.7122, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 12.22362232208252, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.7515, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 18.0850772857666, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.6836, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 24.46388816833496, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.8578, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 15.071797370910645, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.7447, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 12.878135681152344, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.7035, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 14.950927734375, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.7605, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 15.3999605178833, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.7413, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 14.097206115722656, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.7473, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 15.387040138244629, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.716, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 20.502208709716797, |
|
"learning_rate": 3.7222222222222225e-06, |
|
"loss": 0.7577, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 16.519433975219727, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.6927, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 15.588461875915527, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.7123, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 12.752546310424805, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.8407, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 15.805379867553711, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.751, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 13.27651596069336, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 0.8211, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 11.968923568725586, |
|
"learning_rate": 3.3888888888888893e-06, |
|
"loss": 0.6975, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 13.906292915344238, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.8429, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 13.810843467712402, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.7043, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 14.037764549255371, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 0.6782, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 8.811666488647461, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.5169, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 12.727725982666016, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.5087, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 13.598219871520996, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.6104, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 12.1933012008667, |
|
"learning_rate": 3e-06, |
|
"loss": 0.5486, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 7.250199317932129, |
|
"learning_rate": 2.944444444444445e-06, |
|
"loss": 0.473, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 11.98234748840332, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.5483, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 15.183174133300781, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.5781, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 14.038616180419922, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.5383, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 11.775838851928711, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.6216, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 11.929536819458008, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.4929, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 13.312314987182617, |
|
"learning_rate": 2.6111111111111113e-06, |
|
"loss": 0.4658, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 15.442246437072754, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 0.498, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 10.368426322937012, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.5588, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 9.702683448791504, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.5221, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 12.07742977142334, |
|
"learning_rate": 2.388888888888889e-06, |
|
"loss": 0.5033, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 15.718260765075684, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.5913, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 10.115168571472168, |
|
"learning_rate": 2.277777777777778e-06, |
|
"loss": 0.5117, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 9.728811264038086, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.5493, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_cer": 41.269978333909, |
|
"eval_loss": 1.0036817789077759, |
|
"eval_runtime": 911.3291, |
|
"eval_samples_per_second": 3.891, |
|
"eval_steps_per_second": 0.487, |
|
"eval_wer": 68.10473313192347, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 12.741899490356445, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.4817, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 14.738693237304688, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 0.5172, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 10.202011108398438, |
|
"learning_rate": 2.0555555555555555e-06, |
|
"loss": 0.4856, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 11.9408597946167, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.527, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"grad_norm": 8.329051971435547, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.432, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 12.512899398803711, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.4836, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 8.79201889038086, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.52, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 15.417551040649414, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.5239, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 10.516593933105469, |
|
"learning_rate": 1.7222222222222224e-06, |
|
"loss": 0.5497, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 11.659682273864746, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.4883, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 15.743680000305176, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.5215, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 15.303753852844238, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.5714, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 13.083234786987305, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.479, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 7.123412132263184, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.4791, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 14.159947395324707, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.6012, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 16.015972137451172, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.4877, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 14.357626914978027, |
|
"learning_rate": 1.2777777777777779e-06, |
|
"loss": 0.5159, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 7.9363274574279785, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 0.523, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 15.333666801452637, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.5083, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 19.310766220092773, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.5721, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 16.54596710205078, |
|
"learning_rate": 1.0555555555555557e-06, |
|
"loss": 0.6224, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 10.166070938110352, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.5366, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 12.599578857421875, |
|
"learning_rate": 9.444444444444445e-07, |
|
"loss": 0.4959, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 7.448824405670166, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.4986, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 11.634490013122559, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.4859, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 10.882704734802246, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.5328, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 9.251214027404785, |
|
"learning_rate": 7.222222222222222e-07, |
|
"loss": 0.5354, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 15.835794448852539, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.5906, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 11.154616355895996, |
|
"learning_rate": 6.111111111111112e-07, |
|
"loss": 0.5786, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 13.227851867675781, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.4945, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"grad_norm": 13.796296119689941, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.5045, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 13.530144691467285, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.5801, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 7.212181091308594, |
|
"learning_rate": 3.8888888888888895e-07, |
|
"loss": 0.4827, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 15.471014022827148, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.5754, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 14.308059692382812, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.5391, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 10.918014526367188, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.6082, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 7.789730072021484, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 0.4707, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 12.3082275390625, |
|
"learning_rate": 1.1111111111111112e-07, |
|
"loss": 0.552, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 15.524524688720703, |
|
"learning_rate": 5.555555555555556e-08, |
|
"loss": 0.4998, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 11.517424583435059, |
|
"learning_rate": 0.0, |
|
"loss": 0.4667, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_cer": 42.82114484880836, |
|
"eval_loss": 0.9976443648338318, |
|
"eval_runtime": 928.1561, |
|
"eval_samples_per_second": 3.82, |
|
"eval_steps_per_second": 0.478, |
|
"eval_wer": 70.0221550855992, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"total_flos": 1.154110732959744e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|