|
{ |
|
"best_metric": 75.64947360448383, |
|
"best_model_checkpoint": "./whisper-small-finetune_gulf/checkpoint-4000", |
|
"epoch": 2.311604253351826, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 23.472209930419922, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.8733, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 25.33260154724121, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.6853, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 28.451927185058594, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.6101, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 29.835905075073242, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.7625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 28.589879989624023, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.6508, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 28.195240020751953, |
|
"learning_rate": 3e-06, |
|
"loss": 1.6032, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 23.07231903076172, |
|
"learning_rate": 3.5e-06, |
|
"loss": 1.5002, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 26.471607208251953, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.3828, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 20.687870025634766, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.52, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 20.147708892822266, |
|
"learning_rate": 5e-06, |
|
"loss": 1.5193, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 21.233428955078125, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 1.5131, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 21.05452537536621, |
|
"learning_rate": 6e-06, |
|
"loss": 1.4526, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 24.340858459472656, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.5262, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 24.064132690429688, |
|
"learning_rate": 7e-06, |
|
"loss": 1.397, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 21.471921920776367, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.5244, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 22.753755569458008, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.3962, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 18.438940048217773, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.3372, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 19.2254581451416, |
|
"learning_rate": 9e-06, |
|
"loss": 1.426, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 18.967300415039062, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.3301, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 20.847196578979492, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2433, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 18.459182739257812, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 1.2761, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 22.492895126342773, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 1.4599, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 25.510766983032227, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 1.2511, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 20.4246768951416, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 1.2121, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 25.91058349609375, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 1.4637, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 25.087705612182617, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 1.2607, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 20.08827781677246, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 1.4525, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 16.310239791870117, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 1.2905, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 18.671165466308594, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.3568, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 25.499460220336914, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 1.2893, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 20.31481170654297, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 1.268, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 22.260953903198242, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 1.2924, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 19.559682846069336, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 1.3534, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 21.260534286499023, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 1.265, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 22.76539421081543, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 1.302, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 23.064783096313477, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 1.25, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 19.941423416137695, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 1.2915, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 17.240646362304688, |
|
"learning_rate": 9e-06, |
|
"loss": 1.2868, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 18.2373046875, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 1.2259, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 22.34012794494629, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.3947, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_cer": 65.38537817965462, |
|
"eval_loss": 1.2471961975097656, |
|
"eval_runtime": 1151.935, |
|
"eval_samples_per_second": 3.755, |
|
"eval_steps_per_second": 0.47, |
|
"eval_wer": 84.87843671892752, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 18.88918685913086, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 1.2824, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 20.2314395904541, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 1.3135, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 21.175655364990234, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 1.2259, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 16.999879837036133, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 1.2408, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 19.178285598754883, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 1.2721, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 20.64499855041504, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 1.3392, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 19.396726608276367, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.2863, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 19.87407875061035, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 1.3248, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 16.245336532592773, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 1.3406, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 17.97079086303711, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.211, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 25.904541015625, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 1.334, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 21.328983306884766, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 1.2982, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 19.949731826782227, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 1.2365, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 15.301751136779785, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 1.2033, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 22.96265411376953, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 1.2319, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 17.26740074157715, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.1544, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 27.209585189819336, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 1.3496, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 20.707279205322266, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 1.2727, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 20.74648666381836, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 1.2371, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 20.16001319885254, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 1.2456, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 20.684553146362305, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 1.1862, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 17.660783767700195, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 1.1802, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 27.349889755249023, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 1.1869, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 20.43325424194336, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 1.2752, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 18.21103286743164, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.083, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 17.000732421875, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 1.1785, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 16.118410110473633, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 1.2115, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 16.376646041870117, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 1.1399, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 23.872587203979492, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 1.2943, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 17.694063186645508, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 1.1768, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 15.69645881652832, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 1.1779, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 17.946985244750977, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 1.1575, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 24.68529510498047, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 1.2993, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 20.466711044311523, |
|
"learning_rate": 7e-06, |
|
"loss": 1.1476, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 20.040111541748047, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 1.1855, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 20.88412857055664, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 1.2041, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 16.89543342590332, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 1.2062, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 20.681123733520508, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 1.0671, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 20.805986404418945, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 1.197, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 16.39628791809082, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.333, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_cer": 72.85421517713513, |
|
"eval_loss": 1.1428595781326294, |
|
"eval_runtime": 1198.4252, |
|
"eval_samples_per_second": 3.61, |
|
"eval_steps_per_second": 0.451, |
|
"eval_wer": 92.31235325304856, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 18.98177719116211, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 1.1586, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 26.32819366455078, |
|
"learning_rate": 6.555555555555556e-06, |
|
"loss": 1.2463, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 17.621980667114258, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.1764, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 19.834068298339844, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 1.2383, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 18.443336486816406, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 1.1966, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 23.796865463256836, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 1.1506, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 13.802796363830566, |
|
"learning_rate": 6.277777777777778e-06, |
|
"loss": 1.004, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 11.86549186706543, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.8599, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 20.885765075683594, |
|
"learning_rate": 6.166666666666667e-06, |
|
"loss": 0.9144, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 15.317635536193848, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.863, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 16.995641708374023, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 0.8261, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 16.974023818969727, |
|
"learning_rate": 6e-06, |
|
"loss": 0.7936, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 15.467360496520996, |
|
"learning_rate": 5.944444444444445e-06, |
|
"loss": 0.8796, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 14.19994068145752, |
|
"learning_rate": 5.88888888888889e-06, |
|
"loss": 0.9499, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 18.083833694458008, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.8659, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 13.523449897766113, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.7468, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 22.28549575805664, |
|
"learning_rate": 5.722222222222222e-06, |
|
"loss": 0.9936, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 12.301285743713379, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 0.9272, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 15.811965942382812, |
|
"learning_rate": 5.611111111111112e-06, |
|
"loss": 0.8392, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 10.93188762664795, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.7856, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 15.368990898132324, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.991, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 23.429412841796875, |
|
"learning_rate": 5.444444444444445e-06, |
|
"loss": 0.956, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 15.563962936401367, |
|
"learning_rate": 5.388888888888889e-06, |
|
"loss": 0.9221, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 15.646391868591309, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.8138, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 13.61239242553711, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.9199, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 16.09264373779297, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.9146, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 18.509414672851562, |
|
"learning_rate": 5.1666666666666675e-06, |
|
"loss": 0.8323, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 12.82259750366211, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.8451, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 13.008484840393066, |
|
"learning_rate": 5.0555555555555555e-06, |
|
"loss": 0.8415, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 15.046307563781738, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8555, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 16.91812515258789, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 1.015, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 16.76761245727539, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.8488, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 20.7960262298584, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 0.9098, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 15.695616722106934, |
|
"learning_rate": 4.777777777777778e-06, |
|
"loss": 0.8866, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 18.41292953491211, |
|
"learning_rate": 4.722222222222222e-06, |
|
"loss": 0.8872, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 22.192108154296875, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.8102, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 21.19410514831543, |
|
"learning_rate": 4.611111111111112e-06, |
|
"loss": 0.9069, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 18.010290145874023, |
|
"learning_rate": 4.555555555555556e-06, |
|
"loss": 0.8646, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 18.863052368164062, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.9674, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 13.40828800201416, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.918, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_cer": 64.30927214097356, |
|
"eval_loss": 1.1143155097961426, |
|
"eval_runtime": 1160.055, |
|
"eval_samples_per_second": 3.729, |
|
"eval_steps_per_second": 0.466, |
|
"eval_wer": 82.85238203438612, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 16.100902557373047, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.8247, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 14.138301849365234, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 0.8077, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 21.747058868408203, |
|
"learning_rate": 4.277777777777778e-06, |
|
"loss": 0.9274, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 12.049752235412598, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.9581, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 13.890820503234863, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.8945, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 16.04409408569336, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.8455, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 14.720856666564941, |
|
"learning_rate": 4.055555555555556e-06, |
|
"loss": 0.8707, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 23.8120174407959, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9138, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 19.527795791625977, |
|
"learning_rate": 3.944444444444445e-06, |
|
"loss": 0.8293, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 16.076868057250977, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.8586, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 18.909570693969727, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.9239, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 20.29988670349121, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.8682, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 15.01752758026123, |
|
"learning_rate": 3.7222222222222225e-06, |
|
"loss": 0.8631, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 14.974254608154297, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 0.8882, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 15.673378944396973, |
|
"learning_rate": 3.6111111111111115e-06, |
|
"loss": 0.958, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 20.833036422729492, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.8278, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 15.56125545501709, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.8853, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 13.465311050415039, |
|
"learning_rate": 3.444444444444445e-06, |
|
"loss": 0.8877, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 19.25458335876465, |
|
"learning_rate": 3.3888888888888893e-06, |
|
"loss": 0.8935, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 17.35223960876465, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.8421, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 17.846412658691406, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.791, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 12.937151908874512, |
|
"learning_rate": 3.2222222222222227e-06, |
|
"loss": 0.811, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 19.468202590942383, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 0.8527, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 14.417373657226562, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.8507, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 19.41051483154297, |
|
"learning_rate": 3.055555555555556e-06, |
|
"loss": 0.8945, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 17.270503997802734, |
|
"learning_rate": 3e-06, |
|
"loss": 0.8866, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 15.31128215789795, |
|
"learning_rate": 2.944444444444445e-06, |
|
"loss": 0.8862, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 16.625324249267578, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.8744, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 23.434606552124023, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 0.927, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 14.484862327575684, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.8662, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 15.611286163330078, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.8296, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 14.025354385375977, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.8567, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 17.058929443359375, |
|
"learning_rate": 2.6111111111111113e-06, |
|
"loss": 0.8872, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 18.206918716430664, |
|
"learning_rate": 2.5555555555555557e-06, |
|
"loss": 0.9316, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 14.466046333312988, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.8486, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 16.97430992126465, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.8618, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 11.907506942749023, |
|
"learning_rate": 2.388888888888889e-06, |
|
"loss": 0.7407, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 17.779897689819336, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 0.762, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 14.020506858825684, |
|
"learning_rate": 2.277777777777778e-06, |
|
"loss": 0.8425, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 15.239115715026855, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.8402, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_cer": 53.700996775354604, |
|
"eval_loss": 1.0890547037124634, |
|
"eval_runtime": 1101.5884, |
|
"eval_samples_per_second": 3.927, |
|
"eval_steps_per_second": 0.491, |
|
"eval_wer": 75.64947360448383, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 16.369224548339844, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.9486, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 13.583636283874512, |
|
"learning_rate": 2.1111111111111114e-06, |
|
"loss": 0.8256, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 11.406463623046875, |
|
"learning_rate": 2.0555555555555555e-06, |
|
"loss": 0.7751, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 17.16242790222168, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.777, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 10.725821495056152, |
|
"learning_rate": 1.944444444444445e-06, |
|
"loss": 0.7842, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 18.373470306396484, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.7934, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 19.23195457458496, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 0.9092, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 16.823266983032227, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.8693, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 17.290674209594727, |
|
"learning_rate": 1.7222222222222224e-06, |
|
"loss": 0.8973, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 17.71505355834961, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.8385, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 16.774585723876953, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.7899, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 19.144384384155273, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.9822, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 16.863649368286133, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.7994, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 13.130481719970703, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.5804, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 19.954849243164062, |
|
"learning_rate": 1.3888888888888892e-06, |
|
"loss": 0.7005, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 16.30689811706543, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.6735, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 12.148699760437012, |
|
"learning_rate": 1.2777777777777779e-06, |
|
"loss": 0.6317, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 9.524985313415527, |
|
"learning_rate": 1.2222222222222223e-06, |
|
"loss": 0.5642, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 17.790037155151367, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.5801, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 11.228938102722168, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.7253, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 15.2631254196167, |
|
"learning_rate": 1.0555555555555557e-06, |
|
"loss": 0.6465, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 20.890735626220703, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.6849, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 15.065770149230957, |
|
"learning_rate": 9.444444444444445e-07, |
|
"loss": 0.5892, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 10.663036346435547, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.5681, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 17.5535831451416, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.6496, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 14.98912239074707, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.6773, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 14.315783500671387, |
|
"learning_rate": 7.222222222222222e-07, |
|
"loss": 0.6852, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 17.369815826416016, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.6886, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 9.153715133666992, |
|
"learning_rate": 6.111111111111112e-07, |
|
"loss": 0.6393, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 7.958740234375, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.6516, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 12.058722496032715, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.6594, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 10.650951385498047, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.6383, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 9.111865043640137, |
|
"learning_rate": 3.8888888888888895e-07, |
|
"loss": 0.5531, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 15.841039657592773, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 0.6738, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 16.60221290588379, |
|
"learning_rate": 2.7777777777777776e-07, |
|
"loss": 0.643, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 17.007307052612305, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.6843, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 13.0649995803833, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 0.6416, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 15.913739204406738, |
|
"learning_rate": 1.1111111111111112e-07, |
|
"loss": 0.6124, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 17.32563018798828, |
|
"learning_rate": 5.555555555555556e-08, |
|
"loss": 0.6627, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 16.87995147705078, |
|
"learning_rate": 0.0, |
|
"loss": 0.6894, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_cer": 57.09752532338271, |
|
"eval_loss": 1.100616455078125, |
|
"eval_runtime": 1127.1011, |
|
"eval_samples_per_second": 3.838, |
|
"eval_steps_per_second": 0.48, |
|
"eval_wer": 79.2130576384155, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"total_flos": 1.154283884199936e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|