|
{ |
|
"best_metric": 13.88186822698473, |
|
"best_model_checkpoint": "/speechbrain/data/whis/whisper-medium-ar-aug24-cont2/checkpoint-3000", |
|
"epoch": 0.7232084155161078, |
|
"eval_steps": 300, |
|
"global_step": 3300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005478851632697786, |
|
"grad_norm": Infinity, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 0.9139, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010957703265395573, |
|
"grad_norm": 3.586649179458618, |
|
"learning_rate": 9.600000000000001e-07, |
|
"loss": 0.8206, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01643655489809336, |
|
"grad_norm": 2.649549961090088, |
|
"learning_rate": 1.46e-06, |
|
"loss": 0.6306, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.021915406530791146, |
|
"grad_norm": 1.3442453145980835, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 0.4395, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.027394258163488932, |
|
"grad_norm": 1.2841758728027344, |
|
"learning_rate": 2.46e-06, |
|
"loss": 0.3333, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03287310979618672, |
|
"grad_norm": 1.4466370344161987, |
|
"learning_rate": 2.96e-06, |
|
"loss": 0.2598, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03835196142888451, |
|
"grad_norm": 1.1411246061325073, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.2296, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04383081306158229, |
|
"grad_norm": 0.7538527846336365, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.2049, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04930966469428008, |
|
"grad_norm": 0.5670434236526489, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.1957, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.054788516326977864, |
|
"grad_norm": 0.5980058312416077, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.189, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.060267367959675654, |
|
"grad_norm": 0.5877336263656616, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.177, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.06574621959237344, |
|
"grad_norm": 0.5727600455284119, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.1771, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06574621959237344, |
|
"eval_loss": 0.12760603427886963, |
|
"eval_runtime": 1379.0902, |
|
"eval_samples_per_second": 2.415, |
|
"eval_steps_per_second": 0.051, |
|
"eval_wer": 15.454304146614502, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07122507122507123, |
|
"grad_norm": 0.6107267141342163, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.1789, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07670392285776902, |
|
"grad_norm": 0.6271827220916748, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.1771, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08218277449046679, |
|
"grad_norm": 0.5440804362297058, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.1705, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.08766162612316458, |
|
"grad_norm": 0.6523880958557129, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.1673, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09314047775586237, |
|
"grad_norm": 0.6077700257301331, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.166, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.09861932938856016, |
|
"grad_norm": 0.5785207748413086, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.167, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10409818102125794, |
|
"grad_norm": 0.6054052710533142, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.1653, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.10957703265395573, |
|
"grad_norm": 0.5897749662399292, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.1604, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11505588428665352, |
|
"grad_norm": 0.6138154864311218, |
|
"learning_rate": 9.997465843984135e-06, |
|
"loss": 0.1633, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.12053473591935131, |
|
"grad_norm": 0.5625219941139221, |
|
"learning_rate": 9.994711326575585e-06, |
|
"loss": 0.1615, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12601358755204908, |
|
"grad_norm": 0.5982335805892944, |
|
"learning_rate": 9.991956809167035e-06, |
|
"loss": 0.1528, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.13149243918474687, |
|
"grad_norm": 0.5922495722770691, |
|
"learning_rate": 9.989202291758486e-06, |
|
"loss": 0.1612, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13149243918474687, |
|
"eval_loss": 0.11784320324659348, |
|
"eval_runtime": 1336.8758, |
|
"eval_samples_per_second": 2.491, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 14.94362312317511, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13697129081744466, |
|
"grad_norm": 0.5509978532791138, |
|
"learning_rate": 9.986447774349934e-06, |
|
"loss": 0.1664, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.14245014245014245, |
|
"grad_norm": 0.636675238609314, |
|
"learning_rate": 9.983693256941385e-06, |
|
"loss": 0.1572, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14792899408284024, |
|
"grad_norm": 0.5626579523086548, |
|
"learning_rate": 9.980938739532835e-06, |
|
"loss": 0.1563, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.15340784571553803, |
|
"grad_norm": 0.587232232093811, |
|
"learning_rate": 9.978184222124284e-06, |
|
"loss": 0.1603, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.15888669734823582, |
|
"grad_norm": 0.5765535235404968, |
|
"learning_rate": 9.975429704715734e-06, |
|
"loss": 0.1552, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.16436554898093358, |
|
"grad_norm": 0.5985578298568726, |
|
"learning_rate": 9.972675187307184e-06, |
|
"loss": 0.1567, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.16984440061363137, |
|
"grad_norm": 0.5767161250114441, |
|
"learning_rate": 9.969920669898635e-06, |
|
"loss": 0.1559, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.17532325224632916, |
|
"grad_norm": 0.549917459487915, |
|
"learning_rate": 9.967166152490085e-06, |
|
"loss": 0.1553, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.18080210387902695, |
|
"grad_norm": 0.5054299831390381, |
|
"learning_rate": 9.964411635081535e-06, |
|
"loss": 0.1569, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.18628095551172474, |
|
"grad_norm": 0.6237090826034546, |
|
"learning_rate": 9.961657117672986e-06, |
|
"loss": 0.1516, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.19175980714442253, |
|
"grad_norm": 0.6381577253341675, |
|
"learning_rate": 9.958902600264434e-06, |
|
"loss": 0.1548, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.19723865877712032, |
|
"grad_norm": 0.5359848737716675, |
|
"learning_rate": 9.956148082855884e-06, |
|
"loss": 0.1517, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19723865877712032, |
|
"eval_loss": 0.11450996994972229, |
|
"eval_runtime": 1336.2266, |
|
"eval_samples_per_second": 2.492, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 14.663758381516232, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2027175104098181, |
|
"grad_norm": 0.5749765038490295, |
|
"learning_rate": 9.953393565447335e-06, |
|
"loss": 0.1526, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.20819636204251588, |
|
"grad_norm": 0.5857889652252197, |
|
"learning_rate": 9.950639048038785e-06, |
|
"loss": 0.1505, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.21367521367521367, |
|
"grad_norm": 0.5977457165718079, |
|
"learning_rate": 9.947884530630234e-06, |
|
"loss": 0.1526, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.21915406530791146, |
|
"grad_norm": 0.5572954416275024, |
|
"learning_rate": 9.945130013221684e-06, |
|
"loss": 0.1413, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22463291694060925, |
|
"grad_norm": 0.6073783040046692, |
|
"learning_rate": 9.942375495813134e-06, |
|
"loss": 0.1512, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.23011176857330704, |
|
"grad_norm": 0.5608137845993042, |
|
"learning_rate": 9.939620978404585e-06, |
|
"loss": 0.1523, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.23559062020600482, |
|
"grad_norm": 0.6059215068817139, |
|
"learning_rate": 9.936866460996035e-06, |
|
"loss": 0.1461, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.24106947183870261, |
|
"grad_norm": 0.6084004640579224, |
|
"learning_rate": 9.934111943587485e-06, |
|
"loss": 0.1499, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2465483234714004, |
|
"grad_norm": 0.5527833104133606, |
|
"learning_rate": 9.931357426178936e-06, |
|
"loss": 0.1475, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.25202717510409817, |
|
"grad_norm": 0.5086527466773987, |
|
"learning_rate": 9.928602908770384e-06, |
|
"loss": 0.1487, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.25750602673679596, |
|
"grad_norm": 0.6129872798919678, |
|
"learning_rate": 9.925848391361834e-06, |
|
"loss": 0.1496, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.26298487836949375, |
|
"grad_norm": 0.6175655126571655, |
|
"learning_rate": 9.923093873953285e-06, |
|
"loss": 0.149, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26298487836949375, |
|
"eval_loss": 0.11248071491718292, |
|
"eval_runtime": 1336.7392, |
|
"eval_samples_per_second": 2.491, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 14.334268139274545, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26846373000219154, |
|
"grad_norm": 0.5802096128463745, |
|
"learning_rate": 9.920339356544733e-06, |
|
"loss": 0.145, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.2739425816348893, |
|
"grad_norm": 0.5316102504730225, |
|
"learning_rate": 9.917584839136184e-06, |
|
"loss": 0.1474, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2794214332675871, |
|
"grad_norm": 0.5899500846862793, |
|
"learning_rate": 9.914830321727634e-06, |
|
"loss": 0.1431, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.2849002849002849, |
|
"grad_norm": 0.5635184049606323, |
|
"learning_rate": 9.912075804319084e-06, |
|
"loss": 0.1418, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2903791365329827, |
|
"grad_norm": 0.5536847114562988, |
|
"learning_rate": 9.909321286910533e-06, |
|
"loss": 0.1461, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.2958579881656805, |
|
"grad_norm": 0.5551804900169373, |
|
"learning_rate": 9.906566769501983e-06, |
|
"loss": 0.1437, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3013368397983783, |
|
"grad_norm": 0.5187482833862305, |
|
"learning_rate": 9.903812252093434e-06, |
|
"loss": 0.1432, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.30681569143107607, |
|
"grad_norm": 0.6163837313652039, |
|
"learning_rate": 9.901057734684884e-06, |
|
"loss": 0.1497, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.31229454306377386, |
|
"grad_norm": 0.5211063027381897, |
|
"learning_rate": 9.898303217276334e-06, |
|
"loss": 0.1368, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.31777339469647164, |
|
"grad_norm": 0.5287565588951111, |
|
"learning_rate": 9.895548699867785e-06, |
|
"loss": 0.1393, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3232522463291694, |
|
"grad_norm": 0.5906191468238831, |
|
"learning_rate": 9.892794182459235e-06, |
|
"loss": 0.1511, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.32873109796186717, |
|
"grad_norm": 0.5539998412132263, |
|
"learning_rate": 9.890039665050683e-06, |
|
"loss": 0.1405, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.32873109796186717, |
|
"eval_loss": 0.11105980724096298, |
|
"eval_runtime": 1335.343, |
|
"eval_samples_per_second": 2.494, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 14.397165576059734, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33420994959456496, |
|
"grad_norm": 0.5551910400390625, |
|
"learning_rate": 9.887285147642134e-06, |
|
"loss": 0.1449, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.33968880122726275, |
|
"grad_norm": 0.5776943564414978, |
|
"learning_rate": 9.884530630233584e-06, |
|
"loss": 0.1438, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.34516765285996054, |
|
"grad_norm": 0.5213550925254822, |
|
"learning_rate": 9.881776112825033e-06, |
|
"loss": 0.1423, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.35064650449265833, |
|
"grad_norm": 0.5484446883201599, |
|
"learning_rate": 9.879021595416483e-06, |
|
"loss": 0.147, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3561253561253561, |
|
"grad_norm": 0.5987962484359741, |
|
"learning_rate": 9.876267078007933e-06, |
|
"loss": 0.1374, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.3616042077580539, |
|
"grad_norm": 0.5952353477478027, |
|
"learning_rate": 9.873512560599384e-06, |
|
"loss": 0.1439, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3670830593907517, |
|
"grad_norm": 0.6191999316215515, |
|
"learning_rate": 9.870758043190834e-06, |
|
"loss": 0.1396, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.3725619110234495, |
|
"grad_norm": 0.5286071300506592, |
|
"learning_rate": 9.868003525782284e-06, |
|
"loss": 0.1446, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3780407626561473, |
|
"grad_norm": 0.5466740131378174, |
|
"learning_rate": 9.865249008373735e-06, |
|
"loss": 0.1447, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.38351961428884507, |
|
"grad_norm": 0.5799579620361328, |
|
"learning_rate": 9.862494490965183e-06, |
|
"loss": 0.1426, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.38899846592154286, |
|
"grad_norm": 0.6488582491874695, |
|
"learning_rate": 9.859739973556633e-06, |
|
"loss": 0.1382, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.39447731755424065, |
|
"grad_norm": 0.5399666428565979, |
|
"learning_rate": 9.856985456148084e-06, |
|
"loss": 0.1422, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39447731755424065, |
|
"eval_loss": 0.11009500920772552, |
|
"eval_runtime": 1344.7575, |
|
"eval_samples_per_second": 2.476, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 14.303684981938625, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39995616918693844, |
|
"grad_norm": 0.508437991142273, |
|
"learning_rate": 9.854230938739534e-06, |
|
"loss": 0.1382, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.4054350208196362, |
|
"grad_norm": 0.5800564289093018, |
|
"learning_rate": 9.851476421330983e-06, |
|
"loss": 0.1446, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.410913872452334, |
|
"grad_norm": 0.5412048101425171, |
|
"learning_rate": 9.848721903922433e-06, |
|
"loss": 0.1404, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.41639272408503175, |
|
"grad_norm": 0.5806304216384888, |
|
"learning_rate": 9.845967386513883e-06, |
|
"loss": 0.1383, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.42187157571772954, |
|
"grad_norm": 0.5252566933631897, |
|
"learning_rate": 9.843212869105334e-06, |
|
"loss": 0.143, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"grad_norm": 0.5622444748878479, |
|
"learning_rate": 9.840458351696784e-06, |
|
"loss": 0.1416, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4328292789831251, |
|
"grad_norm": 0.5336723923683167, |
|
"learning_rate": 9.837703834288234e-06, |
|
"loss": 0.1373, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.4383081306158229, |
|
"grad_norm": 0.5631132125854492, |
|
"learning_rate": 9.834949316879685e-06, |
|
"loss": 0.1442, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4437869822485207, |
|
"grad_norm": 0.5697339177131653, |
|
"learning_rate": 9.832194799471133e-06, |
|
"loss": 0.1392, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.4492658338812185, |
|
"grad_norm": 0.5948196649551392, |
|
"learning_rate": 9.829440282062584e-06, |
|
"loss": 0.1418, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4547446855139163, |
|
"grad_norm": 0.5385356545448303, |
|
"learning_rate": 9.826685764654034e-06, |
|
"loss": 0.1378, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.46022353714661407, |
|
"grad_norm": 0.5994429588317871, |
|
"learning_rate": 9.823931247245482e-06, |
|
"loss": 0.1407, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.46022353714661407, |
|
"eval_loss": 0.10945474356412888, |
|
"eval_runtime": 1339.6539, |
|
"eval_samples_per_second": 2.486, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 13.913605465729553, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.46570238877931186, |
|
"grad_norm": 0.5439214110374451, |
|
"learning_rate": 9.821176729836933e-06, |
|
"loss": 0.1368, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.47118124041200965, |
|
"grad_norm": 0.5190943479537964, |
|
"learning_rate": 9.818422212428383e-06, |
|
"loss": 0.1382, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.47666009204470744, |
|
"grad_norm": 0.49516212940216064, |
|
"learning_rate": 9.815667695019833e-06, |
|
"loss": 0.1361, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.48213894367740523, |
|
"grad_norm": 0.5503794550895691, |
|
"learning_rate": 9.812913177611284e-06, |
|
"loss": 0.1359, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.487617795310103, |
|
"grad_norm": 0.555046796798706, |
|
"learning_rate": 9.810158660202734e-06, |
|
"loss": 0.1358, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.4930966469428008, |
|
"grad_norm": 0.5206555724143982, |
|
"learning_rate": 9.807404142794184e-06, |
|
"loss": 0.1357, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.4985754985754986, |
|
"grad_norm": 0.6200060844421387, |
|
"learning_rate": 9.804649625385633e-06, |
|
"loss": 0.1353, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.5040543502081963, |
|
"grad_norm": 0.5105571150779724, |
|
"learning_rate": 9.801895107977083e-06, |
|
"loss": 0.1309, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5095332018408941, |
|
"grad_norm": 0.5761134028434753, |
|
"learning_rate": 9.799140590568534e-06, |
|
"loss": 0.1378, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.5150120534735919, |
|
"grad_norm": 0.5118030309677124, |
|
"learning_rate": 9.796386073159984e-06, |
|
"loss": 0.1352, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5204909051062897, |
|
"grad_norm": 0.4720148742198944, |
|
"learning_rate": 9.793631555751433e-06, |
|
"loss": 0.1342, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.5259697567389875, |
|
"grad_norm": 0.5468887686729431, |
|
"learning_rate": 9.790877038342883e-06, |
|
"loss": 0.129, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5259697567389875, |
|
"eval_loss": 0.10870281606912613, |
|
"eval_runtime": 1334.3649, |
|
"eval_samples_per_second": 2.496, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 14.091911043405, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5314486083716853, |
|
"grad_norm": 0.5378099083900452, |
|
"learning_rate": 9.788122520934333e-06, |
|
"loss": 0.1331, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.5369274600043831, |
|
"grad_norm": 0.5661837458610535, |
|
"learning_rate": 9.785368003525782e-06, |
|
"loss": 0.1373, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5424063116370809, |
|
"grad_norm": 0.5715429782867432, |
|
"learning_rate": 9.782613486117232e-06, |
|
"loss": 0.1394, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5478851632697787, |
|
"grad_norm": 0.5901737213134766, |
|
"learning_rate": 9.779858968708682e-06, |
|
"loss": 0.135, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5533640149024764, |
|
"grad_norm": 0.5925373435020447, |
|
"learning_rate": 9.777104451300133e-06, |
|
"loss": 0.1354, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.5588428665351742, |
|
"grad_norm": 0.5552576780319214, |
|
"learning_rate": 9.774349933891583e-06, |
|
"loss": 0.1332, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.564321718167872, |
|
"grad_norm": 0.5988050103187561, |
|
"learning_rate": 9.771595416483033e-06, |
|
"loss": 0.1345, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.5698005698005698, |
|
"grad_norm": 0.535481333732605, |
|
"learning_rate": 9.768840899074484e-06, |
|
"loss": 0.1361, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5752794214332676, |
|
"grad_norm": 0.48405903577804565, |
|
"learning_rate": 9.766086381665934e-06, |
|
"loss": 0.1334, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.5807582730659654, |
|
"grad_norm": 0.5512611269950867, |
|
"learning_rate": 9.763331864257383e-06, |
|
"loss": 0.1317, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5862371246986632, |
|
"grad_norm": 0.509512722492218, |
|
"learning_rate": 9.760577346848833e-06, |
|
"loss": 0.1332, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"grad_norm": 0.5346803665161133, |
|
"learning_rate": 9.757822829440283e-06, |
|
"loss": 0.1289, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"eval_loss": 0.1080569475889206, |
|
"eval_runtime": 1334.6136, |
|
"eval_samples_per_second": 2.495, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 13.955152396450046, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5971948279640588, |
|
"grad_norm": 0.5665607452392578, |
|
"learning_rate": 9.755068312031732e-06, |
|
"loss": 0.1278, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.6026736795967566, |
|
"grad_norm": 0.5593467950820923, |
|
"learning_rate": 9.752313794623182e-06, |
|
"loss": 0.1311, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6081525312294543, |
|
"grad_norm": 0.5182927250862122, |
|
"learning_rate": 9.749559277214632e-06, |
|
"loss": 0.1304, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.6136313828621521, |
|
"grad_norm": 0.5251736044883728, |
|
"learning_rate": 9.746804759806083e-06, |
|
"loss": 0.1264, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6191102344948499, |
|
"grad_norm": 0.5915343761444092, |
|
"learning_rate": 9.744050242397533e-06, |
|
"loss": 0.1303, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.6245890861275477, |
|
"grad_norm": 0.5850628018379211, |
|
"learning_rate": 9.741295724988983e-06, |
|
"loss": 0.1324, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6300679377602455, |
|
"grad_norm": 0.5724135637283325, |
|
"learning_rate": 9.738541207580434e-06, |
|
"loss": 0.1291, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.6355467893929433, |
|
"grad_norm": 0.4958917200565338, |
|
"learning_rate": 9.735786690171882e-06, |
|
"loss": 0.1297, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 0.582492470741272, |
|
"learning_rate": 9.733032172763333e-06, |
|
"loss": 0.1312, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.6465044926583388, |
|
"grad_norm": 0.5806357264518738, |
|
"learning_rate": 9.730277655354783e-06, |
|
"loss": 0.1309, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.6519833442910365, |
|
"grad_norm": 0.5375156998634338, |
|
"learning_rate": 9.727523137946233e-06, |
|
"loss": 0.1294, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.6574621959237343, |
|
"grad_norm": 0.5173301696777344, |
|
"learning_rate": 9.724768620537682e-06, |
|
"loss": 0.1299, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6574621959237343, |
|
"eval_loss": 0.10775619745254517, |
|
"eval_runtime": 1347.5045, |
|
"eval_samples_per_second": 2.471, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 13.88186822698473, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6629410475564321, |
|
"grad_norm": 0.5214000940322876, |
|
"learning_rate": 9.722014103129132e-06, |
|
"loss": 0.1323, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.6684198991891299, |
|
"grad_norm": 0.5085532069206238, |
|
"learning_rate": 9.719259585720582e-06, |
|
"loss": 0.1305, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.6738987508218277, |
|
"grad_norm": 0.5308182239532471, |
|
"learning_rate": 9.716505068312033e-06, |
|
"loss": 0.1303, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.6793776024545255, |
|
"grad_norm": 0.5280865430831909, |
|
"learning_rate": 9.713750550903483e-06, |
|
"loss": 0.1255, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6848564540872233, |
|
"grad_norm": 0.6718000769615173, |
|
"learning_rate": 9.710996033494933e-06, |
|
"loss": 0.1281, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.6903353057199211, |
|
"grad_norm": 0.5682989954948425, |
|
"learning_rate": 9.708241516086384e-06, |
|
"loss": 0.1268, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.6958141573526189, |
|
"grad_norm": 0.6062389016151428, |
|
"learning_rate": 9.705486998677832e-06, |
|
"loss": 0.1286, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.7012930089853167, |
|
"grad_norm": 0.536504864692688, |
|
"learning_rate": 9.702732481269283e-06, |
|
"loss": 0.1304, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7067718606180144, |
|
"grad_norm": 0.5489104986190796, |
|
"learning_rate": 9.699977963860733e-06, |
|
"loss": 0.1264, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.7122507122507122, |
|
"grad_norm": 0.6555972695350647, |
|
"learning_rate": 9.697223446452182e-06, |
|
"loss": 0.1271, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.71772956388341, |
|
"grad_norm": 0.6393175721168518, |
|
"learning_rate": 9.694468929043632e-06, |
|
"loss": 0.1297, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.7232084155161078, |
|
"grad_norm": 0.4772554934024811, |
|
"learning_rate": 9.691714411635082e-06, |
|
"loss": 0.1264, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7232084155161078, |
|
"eval_loss": 0.10748741775751114, |
|
"eval_runtime": 1333.6749, |
|
"eval_samples_per_second": 2.497, |
|
"eval_steps_per_second": 0.052, |
|
"eval_wer": 14.096527369040611, |
|
"step": 3300 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 91260, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.233276471284146e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|