|
{ |
|
"best_metric": 75.78173858661663, |
|
"best_model_checkpoint": "./whisper-small-finetune_maghrebi/checkpoint-5000", |
|
"epoch": 2.995805871779509, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 26.755067825317383, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 2.3039, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 34.66984558105469, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.1708, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 26.377073287963867, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.8591, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 30.776378631591797, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.8478, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 30.643896102905273, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.8421, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 34.42327880859375, |
|
"learning_rate": 3e-06, |
|
"loss": 1.9252, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 24.890520095825195, |
|
"learning_rate": 3.5e-06, |
|
"loss": 1.7428, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 30.156938552856445, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.5537, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 27.96819496154785, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.6786, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 28.565181732177734, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6344, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 24.439979553222656, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 1.7475, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 19.079715728759766, |
|
"learning_rate": 6e-06, |
|
"loss": 1.5635, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 31.042789459228516, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.6371, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 21.58063316345215, |
|
"learning_rate": 7e-06, |
|
"loss": 1.5827, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 20.27756118774414, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.6198, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 29.146913528442383, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.462, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 27.70607566833496, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.6481, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 24.769601821899414, |
|
"learning_rate": 9e-06, |
|
"loss": 1.5338, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 28.691078186035156, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.469, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 27.30621337890625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5568, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 23.094369888305664, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 1.4752, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 25.179765701293945, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 1.564, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 23.27145004272461, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 1.3967, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 25.917579650878906, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 1.4498, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 17.447675704956055, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 1.4077, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 23.504993438720703, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 1.4868, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 20.719358444213867, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 1.3703, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 19.470386505126953, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 1.4705, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 21.750486373901367, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.5277, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 23.778789520263672, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 1.5371, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 23.14359474182129, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 1.5101, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 25.576568603515625, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 1.4284, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 26.62574005126953, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 1.4605, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 18.61768341064453, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 1.4202, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 26.829648971557617, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 1.3163, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 21.73237419128418, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 1.4229, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 26.230100631713867, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 1.4828, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 20.698078155517578, |
|
"learning_rate": 9e-06, |
|
"loss": 1.5082, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 24.733755111694336, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 1.291, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 22.547061920166016, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.3906, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_cer": 57.395050172609885, |
|
"eval_loss": 1.3834500312805176, |
|
"eval_runtime": 827.1285, |
|
"eval_samples_per_second": 4.036, |
|
"eval_steps_per_second": 0.505, |
|
"eval_wer": 87.96643735668125, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 24.679819107055664, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 1.3797, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 22.122909545898438, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 1.4223, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 17.617143630981445, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 1.3753, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 18.17972755432129, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 1.2559, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 26.944639205932617, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 1.3918, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 20.75006675720215, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 1.4469, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 25.37113380432129, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.3639, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 16.128446578979492, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 1.3684, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 22.034616470336914, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 1.2884, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 18.15462875366211, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.5307, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 27.089126586914062, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 1.2426, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 17.21516990661621, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 1.2479, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 21.62128257751465, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 1.3416, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 18.543970108032227, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 1.2417, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 25.98469352722168, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 1.4074, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 18.895614624023438, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.378, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 21.452943801879883, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 1.3844, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 21.81563949584961, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 1.3518, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 20.642499923706055, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 1.2924, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 33.33131408691406, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 1.2999, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 16.468210220336914, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 1.2118, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 19.231273651123047, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 1.3415, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 24.304243087768555, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 1.3811, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 20.244470596313477, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 1.2991, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 19.40199851989746, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.3042, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 18.921825408935547, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 1.2901, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 16.50575065612793, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 1.2142, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 14.586038589477539, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.9015, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 20.73207664489746, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 1.0864, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 11.946000099182129, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.9158, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 15.355812072753906, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 0.8193, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 15.703187942504883, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.8603, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 17.89627456665039, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 0.9686, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 19.415363311767578, |
|
"learning_rate": 7e-06, |
|
"loss": 1.0957, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 15.288445472717285, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.9709, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 18.04073143005371, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.9127, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 17.967145919799805, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 0.8762, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 22.560022354125977, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 1.0106, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 16.264883041381836, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 0.919, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 15.793697357177734, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.9716, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_cer": 54.57994907253641, |
|
"eval_loss": 1.2673263549804688, |
|
"eval_runtime": 825.2901, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.506, |
|
"eval_wer": 81.23827392120076, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 17.40117645263672, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 0.9739, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 22.668455123901367, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 1.0057, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 21.214128494262695, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.9888, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 19.409332275390625, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.9321, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 19.678197860717773, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.973, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 16.86955451965332, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 1.0285, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 15.891237258911133, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 0.9538, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 19.17290687561035, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.9328, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 16.7691593170166, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.8896, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 18.572256088256836, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 1.0089, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 17.122636795043945, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 1.0513, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 16.53435707092285, |
|
"learning_rate": 6e-06, |
|
"loss": 0.9443, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 20.76995086669922, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 1.0072, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 19.78141975402832, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 0.9116, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 18.738779067993164, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.9244, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 16.18290901184082, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.8844, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 23.43638038635254, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 0.9252, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 18.08245849609375, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.9689, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 20.146072387695312, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 0.9982, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 19.40115737915039, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.8793, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 19.48564910888672, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.9001, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 19.529861450195312, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 1.0249, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 19.736713409423828, |
|
"learning_rate": 5.388888888888889e-06, |
|
"loss": 0.9182, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 16.709518432617188, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.9904, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 15.45124340057373, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 1.0358, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 11.870710372924805, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.9011, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 17.464656829833984, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.918, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 24.91024398803711, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.927, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 18.0244140625, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.9138, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 18.960786819458008, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9982, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 17.35990333557129, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.9706, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 11.708779335021973, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.9477, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 19.692068099975586, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.9244, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 15.64211654663086, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.9989, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 24.154022216796875, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.9411, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 26.374160766601562, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.9849, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 16.004297256469727, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 1.031, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 19.26837730407715, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.9211, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 15.54520320892334, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.9234, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 21.567298889160156, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.9518, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_cer": 63.4530028080558, |
|
"eval_loss": 1.204471230506897, |
|
"eval_runtime": 857.8987, |
|
"eval_samples_per_second": 3.891, |
|
"eval_steps_per_second": 0.487, |
|
"eval_wer": 85.61079841567646, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 15.287321090698242, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.9597, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 19.927249908447266, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.9353, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 23.143543243408203, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 1.0893, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 16.31475257873535, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.8705, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 11.8856840133667, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.8493, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 14.792439460754395, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.9663, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 13.951410293579102, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.9282, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 19.062644958496094, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.8443, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 18.890697479248047, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.97, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 16.668237686157227, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.8924, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 20.245948791503906, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.983, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 17.0203800201416, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 1.0049, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 23.726200103759766, |
|
"learning_rate": 3.7222222222222225e-06, |
|
"loss": 0.8701, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 9.839333534240723, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.7549, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 16.390169143676758, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.603, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 16.594236373901367, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.6646, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 18.335918426513672, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.7228, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 14.09089183807373, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 0.6843, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 19.89991569519043, |
|
"learning_rate": 3.3888888888888893e-06, |
|
"loss": 0.6836, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 16.895814895629883, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.628, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 17.576400756835938, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.6666, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 15.989606857299805, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 0.6256, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 16.086654663085938, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.6641, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 14.707154273986816, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.62, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 14.809082984924316, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.6564, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 15.021686553955078, |
|
"learning_rate": 3e-06, |
|
"loss": 0.669, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 17.62252426147461, |
|
"learning_rate": 2.944444444444445e-06, |
|
"loss": 0.6823, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 15.664830207824707, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.695, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 8.63792610168457, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.6279, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 15.918222427368164, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.6861, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 14.93791389465332, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.6068, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 17.324039459228516, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.6098, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 17.186784744262695, |
|
"learning_rate": 2.6111111111111113e-06, |
|
"loss": 0.7561, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 16.359756469726562, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 0.5851, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 16.602548599243164, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.64, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 15.252476692199707, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.6555, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 15.010741233825684, |
|
"learning_rate": 2.388888888888889e-06, |
|
"loss": 0.5832, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 8.625840187072754, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.5749, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"grad_norm": 11.733368873596191, |
|
"learning_rate": 2.277777777777778e-06, |
|
"loss": 0.6156, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 16.896711349487305, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.6477, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_cer": 55.91453043066921, |
|
"eval_loss": 1.2121813297271729, |
|
"eval_runtime": 829.4327, |
|
"eval_samples_per_second": 4.024, |
|
"eval_steps_per_second": 0.504, |
|
"eval_wer": 79.73733583489681, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 16.266002655029297, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.6686, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 14.70505428314209, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 0.6445, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 18.52985191345215, |
|
"learning_rate": 2.0555555555555555e-06, |
|
"loss": 0.6195, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 13.347681045532227, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.6454, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 19.63964080810547, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.6106, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 20.261411666870117, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.6703, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 14.775351524353027, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.6232, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 15.170753479003906, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.6405, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 17.451885223388672, |
|
"learning_rate": 1.7222222222222224e-06, |
|
"loss": 0.668, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 16.877351760864258, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.7075, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 17.570926666259766, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.7038, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 11.851263999938965, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.5578, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 15.071412086486816, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.6107, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 13.913065910339355, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.5909, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 19.536226272583008, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.6972, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 12.989717483520508, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.5659, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 12.693081855773926, |
|
"learning_rate": 1.2777777777777779e-06, |
|
"loss": 0.5787, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 17.102140426635742, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 0.6361, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 13.896649360656738, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.6509, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 22.483936309814453, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.6536, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 15.584601402282715, |
|
"learning_rate": 1.0555555555555557e-06, |
|
"loss": 0.5859, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 18.10873794555664, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.6344, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 11.653306007385254, |
|
"learning_rate": 9.444444444444445e-07, |
|
"loss": 0.6182, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 10.643468856811523, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.6861, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 14.426728248596191, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.6564, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 17.03219223022461, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.6172, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 17.09849739074707, |
|
"learning_rate": 7.222222222222222e-07, |
|
"loss": 0.7349, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 12.162270545959473, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.5356, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 14.960759162902832, |
|
"learning_rate": 6.111111111111112e-07, |
|
"loss": 0.5448, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 16.547992706298828, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.6641, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 14.882378578186035, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.508, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 19.316282272338867, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.5793, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 15.735701560974121, |
|
"learning_rate": 3.8888888888888895e-07, |
|
"loss": 0.6396, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 16.93009376525879, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.5591, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 14.681782722473145, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.6488, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 16.71626091003418, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.633, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 20.87442398071289, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 0.6695, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 17.04303741455078, |
|
"learning_rate": 1.1111111111111112e-07, |
|
"loss": 0.5856, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 14.400847434997559, |
|
"learning_rate": 5.555555555555556e-08, |
|
"loss": 0.5595, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 12.768998146057129, |
|
"learning_rate": 0.0, |
|
"loss": 0.6857, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 48.94773392916453, |
|
"eval_loss": 1.1982322931289673, |
|
"eval_runtime": 797.5519, |
|
"eval_samples_per_second": 4.185, |
|
"eval_steps_per_second": 0.524, |
|
"eval_wer": 75.78173858661663, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"total_flos": 1.15434160128e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|