|
{ |
|
"best_metric": 2.2782892974889872, |
|
"best_model_checkpoint": "./whisper-medium_new_data/checkpoint-8000", |
|
"epoch": 3.261312678353037, |
|
"eval_steps": 1000, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010191602119853241, |
|
"grad_norm": 29.000947952270508, |
|
"learning_rate": 2.1000000000000003e-07, |
|
"loss": 3.777, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.020383204239706482, |
|
"grad_norm": 16.35529136657715, |
|
"learning_rate": 4.6000000000000004e-07, |
|
"loss": 3.379, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.030574806359559722, |
|
"grad_norm": 11.864029884338379, |
|
"learning_rate": 7.1e-07, |
|
"loss": 2.7049, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.040766408479412965, |
|
"grad_norm": 9.735739707946777, |
|
"learning_rate": 9.600000000000001e-07, |
|
"loss": 2.1205, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.050958010599266204, |
|
"grad_norm": 7.762232780456543, |
|
"learning_rate": 1.21e-06, |
|
"loss": 1.731, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.061149612719119444, |
|
"grad_norm": 8.16604995727539, |
|
"learning_rate": 1.46e-06, |
|
"loss": 1.4442, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07134121483897268, |
|
"grad_norm": 7.53511905670166, |
|
"learning_rate": 1.7100000000000004e-06, |
|
"loss": 1.244, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08153281695882593, |
|
"grad_norm": 7.6938557624816895, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 1.0535, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09172441907867916, |
|
"grad_norm": 7.2217912673950195, |
|
"learning_rate": 2.21e-06, |
|
"loss": 0.9467, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10191602119853241, |
|
"grad_norm": 6.709696292877197, |
|
"learning_rate": 2.46e-06, |
|
"loss": 0.8746, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11210762331838565, |
|
"grad_norm": 6.30186653137207, |
|
"learning_rate": 2.7100000000000003e-06, |
|
"loss": 0.7813, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12229922543823889, |
|
"grad_norm": 7.001236915588379, |
|
"learning_rate": 2.96e-06, |
|
"loss": 0.7312, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13249082755809213, |
|
"grad_norm": 6.0768232345581055, |
|
"learning_rate": 3.21e-06, |
|
"loss": 0.6602, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.14268242967794537, |
|
"grad_norm": 6.0217790603637695, |
|
"learning_rate": 3.46e-06, |
|
"loss": 0.6294, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15287403179779863, |
|
"grad_norm": 6.146008491516113, |
|
"learning_rate": 3.7100000000000005e-06, |
|
"loss": 0.6184, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.16306563391765186, |
|
"grad_norm": 5.988298416137695, |
|
"learning_rate": 3.96e-06, |
|
"loss": 0.5581, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1732572360375051, |
|
"grad_norm": 6.24209451675415, |
|
"learning_rate": 4.21e-06, |
|
"loss": 0.526, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.18344883815735832, |
|
"grad_norm": 5.728849411010742, |
|
"learning_rate": 4.4600000000000005e-06, |
|
"loss": 0.4973, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19364044027721158, |
|
"grad_norm": 5.146097660064697, |
|
"learning_rate": 4.71e-06, |
|
"loss": 0.4877, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.20383204239706482, |
|
"grad_norm": 5.125807285308838, |
|
"learning_rate": 4.960000000000001e-06, |
|
"loss": 0.4718, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21402364451691805, |
|
"grad_norm": 5.3624653816223145, |
|
"learning_rate": 5.210000000000001e-06, |
|
"loss": 0.455, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2242152466367713, |
|
"grad_norm": 5.1377854347229, |
|
"learning_rate": 5.460000000000001e-06, |
|
"loss": 0.4426, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.23440684875662454, |
|
"grad_norm": 5.11069393157959, |
|
"learning_rate": 5.71e-06, |
|
"loss": 0.4176, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.24459845087647777, |
|
"grad_norm": 5.100331783294678, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 0.4044, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.254790052996331, |
|
"grad_norm": 5.771068572998047, |
|
"learning_rate": 6.210000000000001e-06, |
|
"loss": 0.3994, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.26498165511618427, |
|
"grad_norm": 5.4431986808776855, |
|
"learning_rate": 6.460000000000001e-06, |
|
"loss": 0.366, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2751732572360375, |
|
"grad_norm": 5.490501880645752, |
|
"learning_rate": 6.710000000000001e-06, |
|
"loss": 0.3742, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.28536485935589073, |
|
"grad_norm": 5.428119659423828, |
|
"learning_rate": 6.96e-06, |
|
"loss": 0.3577, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.295556461475744, |
|
"grad_norm": 4.746588230133057, |
|
"learning_rate": 7.2100000000000004e-06, |
|
"loss": 0.3465, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.30574806359559725, |
|
"grad_norm": 4.743890762329102, |
|
"learning_rate": 7.4600000000000006e-06, |
|
"loss": 0.3226, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.31593966571545046, |
|
"grad_norm": 5.242619037628174, |
|
"learning_rate": 7.71e-06, |
|
"loss": 0.3185, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.3261312678353037, |
|
"grad_norm": 4.178338050842285, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 0.32, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.336322869955157, |
|
"grad_norm": 4.850783824920654, |
|
"learning_rate": 8.210000000000001e-06, |
|
"loss": 0.327, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3465144720750102, |
|
"grad_norm": 5.147852897644043, |
|
"learning_rate": 8.46e-06, |
|
"loss": 0.3334, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.35670607419486344, |
|
"grad_norm": 4.720470905303955, |
|
"learning_rate": 8.710000000000001e-06, |
|
"loss": 0.2821, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.36689767631471665, |
|
"grad_norm": 4.410367965698242, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.2894, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3770892784345699, |
|
"grad_norm": 4.329741477966309, |
|
"learning_rate": 9.210000000000002e-06, |
|
"loss": 0.2792, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.38728088055442317, |
|
"grad_norm": 4.4180803298950195, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.2842, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3974724826742764, |
|
"grad_norm": 4.213972568511963, |
|
"learning_rate": 9.71e-06, |
|
"loss": 0.2854, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.40766408479412963, |
|
"grad_norm": 3.454677104949951, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.2733, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.40766408479412963, |
|
"eval_loss": 0.2585422396659851, |
|
"eval_runtime": 2430.2946, |
|
"eval_samples_per_second": 4.195, |
|
"eval_steps_per_second": 0.131, |
|
"eval_wer": 32.5923736874671, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4178556869139829, |
|
"grad_norm": 3.999333620071411, |
|
"learning_rate": 9.970000000000001e-06, |
|
"loss": 0.2649, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.4280472890338361, |
|
"grad_norm": 3.658658504486084, |
|
"learning_rate": 9.934285714285715e-06, |
|
"loss": 0.2494, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.43823889115368936, |
|
"grad_norm": 4.042171955108643, |
|
"learning_rate": 9.89857142857143e-06, |
|
"loss": 0.2626, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4484304932735426, |
|
"grad_norm": 4.251903057098389, |
|
"learning_rate": 9.862857142857144e-06, |
|
"loss": 0.2457, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4586220953933958, |
|
"grad_norm": 4.49993896484375, |
|
"learning_rate": 9.827142857142859e-06, |
|
"loss": 0.2598, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.4688136975132491, |
|
"grad_norm": 4.043424129486084, |
|
"learning_rate": 9.791428571428571e-06, |
|
"loss": 0.2447, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.47900529963310234, |
|
"grad_norm": 3.787559747695923, |
|
"learning_rate": 9.755714285714286e-06, |
|
"loss": 0.2328, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.48919690175295555, |
|
"grad_norm": 3.875075340270996, |
|
"learning_rate": 9.72e-06, |
|
"loss": 0.2359, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4993885038728088, |
|
"grad_norm": 4.148637294769287, |
|
"learning_rate": 9.684285714285715e-06, |
|
"loss": 0.2234, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.509580105992662, |
|
"grad_norm": 3.9952499866485596, |
|
"learning_rate": 9.648571428571429e-06, |
|
"loss": 0.225, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5197717081125153, |
|
"grad_norm": 3.0705387592315674, |
|
"learning_rate": 9.612857142857144e-06, |
|
"loss": 0.2183, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5299633102323685, |
|
"grad_norm": 3.5826079845428467, |
|
"learning_rate": 9.577142857142858e-06, |
|
"loss": 0.2125, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5401549123522218, |
|
"grad_norm": 3.478602170944214, |
|
"learning_rate": 9.541428571428572e-06, |
|
"loss": 0.2067, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.550346514472075, |
|
"grad_norm": 3.646801233291626, |
|
"learning_rate": 9.505714285714287e-06, |
|
"loss": 0.2132, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5605381165919282, |
|
"grad_norm": 4.240411758422852, |
|
"learning_rate": 9.47e-06, |
|
"loss": 0.2077, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.5707297187117815, |
|
"grad_norm": 3.531076669692993, |
|
"learning_rate": 9.434285714285714e-06, |
|
"loss": 0.1978, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5809213208316347, |
|
"grad_norm": 4.578576564788818, |
|
"learning_rate": 9.39857142857143e-06, |
|
"loss": 0.1976, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.591112922951488, |
|
"grad_norm": 3.237501382827759, |
|
"learning_rate": 9.362857142857143e-06, |
|
"loss": 0.1829, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6013045250713412, |
|
"grad_norm": 3.9498815536499023, |
|
"learning_rate": 9.327142857142857e-06, |
|
"loss": 0.1926, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6114961271911945, |
|
"grad_norm": 3.386265277862549, |
|
"learning_rate": 9.291428571428572e-06, |
|
"loss": 0.1829, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6216877293110477, |
|
"grad_norm": 3.7739858627319336, |
|
"learning_rate": 9.255714285714286e-06, |
|
"loss": 0.1807, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.6318793314309009, |
|
"grad_norm": 2.9203953742980957, |
|
"learning_rate": 9.220000000000002e-06, |
|
"loss": 0.1816, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6420709335507542, |
|
"grad_norm": 3.465407371520996, |
|
"learning_rate": 9.184285714285715e-06, |
|
"loss": 0.1739, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.6522625356706074, |
|
"grad_norm": 3.1187219619750977, |
|
"learning_rate": 9.148571428571429e-06, |
|
"loss": 0.177, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6624541377904607, |
|
"grad_norm": 3.4814960956573486, |
|
"learning_rate": 9.112857142857142e-06, |
|
"loss": 0.1718, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.672645739910314, |
|
"grad_norm": 3.3516340255737305, |
|
"learning_rate": 9.077142857142858e-06, |
|
"loss": 0.1682, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6828373420301671, |
|
"grad_norm": 3.460319757461548, |
|
"learning_rate": 9.041428571428572e-06, |
|
"loss": 0.1645, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.6930289441500204, |
|
"grad_norm": 2.6608901023864746, |
|
"learning_rate": 9.005714285714287e-06, |
|
"loss": 0.1721, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7032205462698736, |
|
"grad_norm": 3.2124173641204834, |
|
"learning_rate": 8.97e-06, |
|
"loss": 0.1647, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7134121483897269, |
|
"grad_norm": 3.007030725479126, |
|
"learning_rate": 8.934285714285716e-06, |
|
"loss": 0.163, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7236037505095801, |
|
"grad_norm": 2.7804579734802246, |
|
"learning_rate": 8.89857142857143e-06, |
|
"loss": 0.1732, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.7337953526294333, |
|
"grad_norm": 3.615187644958496, |
|
"learning_rate": 8.862857142857143e-06, |
|
"loss": 0.1578, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7439869547492866, |
|
"grad_norm": 2.8256614208221436, |
|
"learning_rate": 8.827142857142857e-06, |
|
"loss": 0.1553, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.7541785568691398, |
|
"grad_norm": 2.9422616958618164, |
|
"learning_rate": 8.791428571428572e-06, |
|
"loss": 0.1483, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.7643701589889931, |
|
"grad_norm": 2.9863955974578857, |
|
"learning_rate": 8.755714285714286e-06, |
|
"loss": 0.1446, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.7745617611088463, |
|
"grad_norm": 3.2235851287841797, |
|
"learning_rate": 8.720000000000001e-06, |
|
"loss": 0.1488, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7847533632286996, |
|
"grad_norm": 3.379194974899292, |
|
"learning_rate": 8.684285714285715e-06, |
|
"loss": 0.1433, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.7949449653485527, |
|
"grad_norm": 3.080751895904541, |
|
"learning_rate": 8.64857142857143e-06, |
|
"loss": 0.1428, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.805136567468406, |
|
"grad_norm": 2.6511690616607666, |
|
"learning_rate": 8.612857142857144e-06, |
|
"loss": 0.1351, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.8153281695882593, |
|
"grad_norm": 3.0043816566467285, |
|
"learning_rate": 8.577142857142858e-06, |
|
"loss": 0.1527, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8153281695882593, |
|
"eval_loss": 0.12457678467035294, |
|
"eval_runtime": 2416.9884, |
|
"eval_samples_per_second": 4.218, |
|
"eval_steps_per_second": 0.132, |
|
"eval_wer": 16.723770120886936, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8255197717081125, |
|
"grad_norm": 2.428893804550171, |
|
"learning_rate": 8.541428571428571e-06, |
|
"loss": 0.1329, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.8357113738279658, |
|
"grad_norm": 2.5849223136901855, |
|
"learning_rate": 8.505714285714287e-06, |
|
"loss": 0.1355, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.845902975947819, |
|
"grad_norm": 2.7230465412139893, |
|
"learning_rate": 8.47e-06, |
|
"loss": 0.1409, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.8560945780676722, |
|
"grad_norm": 2.889012336730957, |
|
"learning_rate": 8.434285714285716e-06, |
|
"loss": 0.1339, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.8662861801875255, |
|
"grad_norm": 2.4034271240234375, |
|
"learning_rate": 8.39857142857143e-06, |
|
"loss": 0.1397, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.8764777823073787, |
|
"grad_norm": 2.2975339889526367, |
|
"learning_rate": 8.362857142857143e-06, |
|
"loss": 0.1254, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.886669384427232, |
|
"grad_norm": 2.7864370346069336, |
|
"learning_rate": 8.327142857142858e-06, |
|
"loss": 0.1298, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.8968609865470852, |
|
"grad_norm": 3.3211889266967773, |
|
"learning_rate": 8.291428571428572e-06, |
|
"loss": 0.1195, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9070525886669385, |
|
"grad_norm": 2.9740707874298096, |
|
"learning_rate": 8.255714285714287e-06, |
|
"loss": 0.1236, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.9172441907867916, |
|
"grad_norm": 2.8324942588806152, |
|
"learning_rate": 8.220000000000001e-06, |
|
"loss": 0.1256, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9274357929066449, |
|
"grad_norm": 2.7244651317596436, |
|
"learning_rate": 8.184285714285715e-06, |
|
"loss": 0.1232, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.9376273950264982, |
|
"grad_norm": 2.6477315425872803, |
|
"learning_rate": 8.148571428571428e-06, |
|
"loss": 0.1194, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9478189971463514, |
|
"grad_norm": 2.737881660461426, |
|
"learning_rate": 8.112857142857144e-06, |
|
"loss": 0.1184, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.9580105992662047, |
|
"grad_norm": 3.096505641937256, |
|
"learning_rate": 8.077142857142857e-06, |
|
"loss": 0.1316, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.9682022013860578, |
|
"grad_norm": 2.7507877349853516, |
|
"learning_rate": 8.041428571428573e-06, |
|
"loss": 0.109, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.9783938035059111, |
|
"grad_norm": 2.7306649684906006, |
|
"learning_rate": 8.005714285714286e-06, |
|
"loss": 0.1263, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9885854056257644, |
|
"grad_norm": 3.1028060913085938, |
|
"learning_rate": 7.970000000000002e-06, |
|
"loss": 0.1135, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.9987770077456176, |
|
"grad_norm": 2.456820011138916, |
|
"learning_rate": 7.934285714285715e-06, |
|
"loss": 0.1192, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.0089686098654709, |
|
"grad_norm": 2.0729124546051025, |
|
"learning_rate": 7.898571428571429e-06, |
|
"loss": 0.0789, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.019160211985324, |
|
"grad_norm": 2.0284624099731445, |
|
"learning_rate": 7.862857142857143e-06, |
|
"loss": 0.0741, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0293518141051774, |
|
"grad_norm": 1.9724788665771484, |
|
"learning_rate": 7.827142857142858e-06, |
|
"loss": 0.068, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.0395434162250305, |
|
"grad_norm": 2.1046948432922363, |
|
"learning_rate": 7.791428571428572e-06, |
|
"loss": 0.0785, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.049735018344884, |
|
"grad_norm": 2.5051779747009277, |
|
"learning_rate": 7.755714285714287e-06, |
|
"loss": 0.0788, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.059926620464737, |
|
"grad_norm": 2.253950357437134, |
|
"learning_rate": 7.72e-06, |
|
"loss": 0.0748, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0701182225845902, |
|
"grad_norm": 2.03273868560791, |
|
"learning_rate": 7.684285714285716e-06, |
|
"loss": 0.0795, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.0803098247044436, |
|
"grad_norm": 1.9357993602752686, |
|
"learning_rate": 7.64857142857143e-06, |
|
"loss": 0.0688, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.0905014268242967, |
|
"grad_norm": 2.5450971126556396, |
|
"learning_rate": 7.612857142857143e-06, |
|
"loss": 0.0807, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.10069302894415, |
|
"grad_norm": 2.2401187419891357, |
|
"learning_rate": 7.577142857142857e-06, |
|
"loss": 0.0741, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1108846310640033, |
|
"grad_norm": 2.1422953605651855, |
|
"learning_rate": 7.5414285714285715e-06, |
|
"loss": 0.0761, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.1210762331838564, |
|
"grad_norm": 2.204946994781494, |
|
"learning_rate": 7.505714285714286e-06, |
|
"loss": 0.0726, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.1312678353037098, |
|
"grad_norm": 2.2459845542907715, |
|
"learning_rate": 7.4700000000000005e-06, |
|
"loss": 0.0751, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.141459437423563, |
|
"grad_norm": 2.2025928497314453, |
|
"learning_rate": 7.434285714285715e-06, |
|
"loss": 0.074, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1516510395434163, |
|
"grad_norm": 1.94180166721344, |
|
"learning_rate": 7.3985714285714295e-06, |
|
"loss": 0.0768, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.1618426416632694, |
|
"grad_norm": 2.4948198795318604, |
|
"learning_rate": 7.362857142857144e-06, |
|
"loss": 0.0737, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.1720342437831226, |
|
"grad_norm": 2.136427640914917, |
|
"learning_rate": 7.3271428571428585e-06, |
|
"loss": 0.0711, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.182225845902976, |
|
"grad_norm": 1.7289948463439941, |
|
"learning_rate": 7.291428571428571e-06, |
|
"loss": 0.0713, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.1924174480228291, |
|
"grad_norm": 2.254673957824707, |
|
"learning_rate": 7.255714285714286e-06, |
|
"loss": 0.0696, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.2026090501426825, |
|
"grad_norm": 2.0539391040802, |
|
"learning_rate": 7.22e-06, |
|
"loss": 0.0669, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.2128006522625356, |
|
"grad_norm": 2.1277451515197754, |
|
"learning_rate": 7.184285714285715e-06, |
|
"loss": 0.0682, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.222992254382389, |
|
"grad_norm": 1.7704741954803467, |
|
"learning_rate": 7.148571428571429e-06, |
|
"loss": 0.0655, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.222992254382389, |
|
"eval_loss": 0.07764188200235367, |
|
"eval_runtime": 2406.4856, |
|
"eval_samples_per_second": 4.237, |
|
"eval_steps_per_second": 0.133, |
|
"eval_wer": 10.566755631077823, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2331838565022422, |
|
"grad_norm": 2.0602128505706787, |
|
"learning_rate": 7.112857142857144e-06, |
|
"loss": 0.0667, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.2433754586220953, |
|
"grad_norm": 2.0332977771759033, |
|
"learning_rate": 7.077142857142858e-06, |
|
"loss": 0.0656, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.2535670607419487, |
|
"grad_norm": 2.4050822257995605, |
|
"learning_rate": 7.041428571428572e-06, |
|
"loss": 0.0624, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.2637586628618018, |
|
"grad_norm": 2.1430561542510986, |
|
"learning_rate": 7.0057142857142865e-06, |
|
"loss": 0.0805, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.273950264981655, |
|
"grad_norm": 2.0134341716766357, |
|
"learning_rate": 6.97e-06, |
|
"loss": 0.0664, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.2841418671015083, |
|
"grad_norm": 2.217721462249756, |
|
"learning_rate": 6.934285714285715e-06, |
|
"loss": 0.0745, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.2943334692213617, |
|
"grad_norm": 1.5617090463638306, |
|
"learning_rate": 6.898571428571429e-06, |
|
"loss": 0.0648, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.3045250713412149, |
|
"grad_norm": 1.9269059896469116, |
|
"learning_rate": 6.862857142857144e-06, |
|
"loss": 0.0618, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.314716673461068, |
|
"grad_norm": 2.0952699184417725, |
|
"learning_rate": 6.827142857142857e-06, |
|
"loss": 0.0706, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.3249082755809214, |
|
"grad_norm": 1.764196753501892, |
|
"learning_rate": 6.791428571428572e-06, |
|
"loss": 0.0658, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.3350998777007745, |
|
"grad_norm": 2.4948887825012207, |
|
"learning_rate": 6.755714285714286e-06, |
|
"loss": 0.0573, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.3452914798206277, |
|
"grad_norm": 2.2995638847351074, |
|
"learning_rate": 6.720000000000001e-06, |
|
"loss": 0.0584, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.355483081940481, |
|
"grad_norm": 1.6779873371124268, |
|
"learning_rate": 6.684285714285715e-06, |
|
"loss": 0.0618, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.3656746840603342, |
|
"grad_norm": 2.435180425643921, |
|
"learning_rate": 6.648571428571429e-06, |
|
"loss": 0.0611, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.3758662861801876, |
|
"grad_norm": 1.7600586414337158, |
|
"learning_rate": 6.612857142857143e-06, |
|
"loss": 0.0638, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.3860578883000407, |
|
"grad_norm": 2.179009199142456, |
|
"learning_rate": 6.577142857142857e-06, |
|
"loss": 0.0577, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.396249490419894, |
|
"grad_norm": 1.8130829334259033, |
|
"learning_rate": 6.541428571428572e-06, |
|
"loss": 0.0572, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.4064410925397473, |
|
"grad_norm": 2.2357475757598877, |
|
"learning_rate": 6.505714285714286e-06, |
|
"loss": 0.0617, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.4166326946596004, |
|
"grad_norm": 2.523681163787842, |
|
"learning_rate": 6.470000000000001e-06, |
|
"loss": 0.0592, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.4268242967794538, |
|
"grad_norm": 1.9920995235443115, |
|
"learning_rate": 6.434285714285715e-06, |
|
"loss": 0.0594, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.437015898899307, |
|
"grad_norm": 1.876678705215454, |
|
"learning_rate": 6.39857142857143e-06, |
|
"loss": 0.0588, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.4472075010191603, |
|
"grad_norm": 2.1072115898132324, |
|
"learning_rate": 6.3628571428571426e-06, |
|
"loss": 0.0584, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.4573991031390134, |
|
"grad_norm": 1.9492688179016113, |
|
"learning_rate": 6.327142857142857e-06, |
|
"loss": 0.0619, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.4675907052588668, |
|
"grad_norm": 1.9974353313446045, |
|
"learning_rate": 6.2914285714285716e-06, |
|
"loss": 0.0526, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.47778230737872, |
|
"grad_norm": 2.0630576610565186, |
|
"learning_rate": 6.255714285714286e-06, |
|
"loss": 0.0587, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.487973909498573, |
|
"grad_norm": 2.312988042831421, |
|
"learning_rate": 6.220000000000001e-06, |
|
"loss": 0.0539, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.4981655116184265, |
|
"grad_norm": 2.3259403705596924, |
|
"learning_rate": 6.184285714285715e-06, |
|
"loss": 0.0547, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.5083571137382796, |
|
"grad_norm": 1.7665891647338867, |
|
"learning_rate": 6.14857142857143e-06, |
|
"loss": 0.0546, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.5185487158581328, |
|
"grad_norm": 1.8702497482299805, |
|
"learning_rate": 6.112857142857144e-06, |
|
"loss": 0.059, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.5287403179779862, |
|
"grad_norm": 1.725127935409546, |
|
"learning_rate": 6.077142857142858e-06, |
|
"loss": 0.0593, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.5389319200978395, |
|
"grad_norm": 1.6888962984085083, |
|
"learning_rate": 6.0414285714285714e-06, |
|
"loss": 0.0492, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.5491235222176927, |
|
"grad_norm": 1.8041000366210938, |
|
"learning_rate": 6.005714285714286e-06, |
|
"loss": 0.0524, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.5593151243375458, |
|
"grad_norm": 2.0858354568481445, |
|
"learning_rate": 5.9700000000000004e-06, |
|
"loss": 0.0532, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.5695067264573992, |
|
"grad_norm": 2.3499724864959717, |
|
"learning_rate": 5.934285714285715e-06, |
|
"loss": 0.0509, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.5796983285772523, |
|
"grad_norm": 1.6974126100540161, |
|
"learning_rate": 5.8985714285714295e-06, |
|
"loss": 0.0525, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.5898899306971055, |
|
"grad_norm": 2.386164426803589, |
|
"learning_rate": 5.862857142857143e-06, |
|
"loss": 0.056, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.6000815328169589, |
|
"grad_norm": 2.2181949615478516, |
|
"learning_rate": 5.827142857142858e-06, |
|
"loss": 0.0532, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.610273134936812, |
|
"grad_norm": 2.883762836456299, |
|
"learning_rate": 5.791428571428572e-06, |
|
"loss": 0.0554, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.6204647370566652, |
|
"grad_norm": 2.2483813762664795, |
|
"learning_rate": 5.755714285714287e-06, |
|
"loss": 0.053, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.6306563391765185, |
|
"grad_norm": 1.992173433303833, |
|
"learning_rate": 5.72e-06, |
|
"loss": 0.0455, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6306563391765185, |
|
"eval_loss": 0.05135625973343849, |
|
"eval_runtime": 2404.8469, |
|
"eval_samples_per_second": 4.24, |
|
"eval_steps_per_second": 0.133, |
|
"eval_wer": 6.767451954600445, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.640847941296372, |
|
"grad_norm": 1.7579491138458252, |
|
"learning_rate": 5.684285714285715e-06, |
|
"loss": 0.0521, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.651039543416225, |
|
"grad_norm": 1.9420897960662842, |
|
"learning_rate": 5.6485714285714285e-06, |
|
"loss": 0.0542, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.6612311455360782, |
|
"grad_norm": 2.2500171661376953, |
|
"learning_rate": 5.612857142857143e-06, |
|
"loss": 0.0516, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.6714227476559316, |
|
"grad_norm": 1.7925150394439697, |
|
"learning_rate": 5.5771428571428575e-06, |
|
"loss": 0.0493, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.6816143497757847, |
|
"grad_norm": 2.5120983123779297, |
|
"learning_rate": 5.541428571428572e-06, |
|
"loss": 0.0496, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.6918059518956379, |
|
"grad_norm": 1.6827411651611328, |
|
"learning_rate": 5.5057142857142865e-06, |
|
"loss": 0.048, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.7019975540154912, |
|
"grad_norm": 1.951802134513855, |
|
"learning_rate": 5.470000000000001e-06, |
|
"loss": 0.0491, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.7121891561353446, |
|
"grad_norm": 1.688637375831604, |
|
"learning_rate": 5.4342857142857155e-06, |
|
"loss": 0.0515, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.7223807582551978, |
|
"grad_norm": 2.034071207046509, |
|
"learning_rate": 5.398571428571428e-06, |
|
"loss": 0.0516, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.732572360375051, |
|
"grad_norm": 1.9581401348114014, |
|
"learning_rate": 5.362857142857143e-06, |
|
"loss": 0.0536, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.7427639624949043, |
|
"grad_norm": 1.8898799419403076, |
|
"learning_rate": 5.327142857142857e-06, |
|
"loss": 0.0491, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.7529555646147574, |
|
"grad_norm": 2.1761457920074463, |
|
"learning_rate": 5.291428571428572e-06, |
|
"loss": 0.0496, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.7631471667346106, |
|
"grad_norm": 1.9841150045394897, |
|
"learning_rate": 5.255714285714286e-06, |
|
"loss": 0.0469, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.773338768854464, |
|
"grad_norm": 1.3026888370513916, |
|
"learning_rate": 5.220000000000001e-06, |
|
"loss": 0.0482, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.7835303709743173, |
|
"grad_norm": 1.3047564029693604, |
|
"learning_rate": 5.184285714285715e-06, |
|
"loss": 0.0493, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.7937219730941703, |
|
"grad_norm": 1.6902995109558105, |
|
"learning_rate": 5.14857142857143e-06, |
|
"loss": 0.0494, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.8039135752140236, |
|
"grad_norm": 2.262953758239746, |
|
"learning_rate": 5.112857142857143e-06, |
|
"loss": 0.0476, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.814105177333877, |
|
"grad_norm": 1.777170181274414, |
|
"learning_rate": 5.077142857142857e-06, |
|
"loss": 0.0471, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.8242967794537301, |
|
"grad_norm": 1.9193710088729858, |
|
"learning_rate": 5.041428571428572e-06, |
|
"loss": 0.0467, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.8344883815735833, |
|
"grad_norm": 1.8790152072906494, |
|
"learning_rate": 5.005714285714286e-06, |
|
"loss": 0.0487, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.8446799836934367, |
|
"grad_norm": 2.3190362453460693, |
|
"learning_rate": 4.970000000000001e-06, |
|
"loss": 0.0466, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.8548715858132898, |
|
"grad_norm": 1.821876049041748, |
|
"learning_rate": 4.934285714285715e-06, |
|
"loss": 0.0456, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.865063187933143, |
|
"grad_norm": 1.3371105194091797, |
|
"learning_rate": 4.898571428571429e-06, |
|
"loss": 0.0414, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.8752547900529963, |
|
"grad_norm": 1.5593416690826416, |
|
"learning_rate": 4.862857142857143e-06, |
|
"loss": 0.0435, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.8854463921728497, |
|
"grad_norm": 1.9036990404129028, |
|
"learning_rate": 4.827142857142858e-06, |
|
"loss": 0.0448, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.8956379942927029, |
|
"grad_norm": 1.8190467357635498, |
|
"learning_rate": 4.7914285714285715e-06, |
|
"loss": 0.0388, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.905829596412556, |
|
"grad_norm": 1.7529155015945435, |
|
"learning_rate": 4.755714285714286e-06, |
|
"loss": 0.0421, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.9160211985324094, |
|
"grad_norm": 2.084097385406494, |
|
"learning_rate": 4.7200000000000005e-06, |
|
"loss": 0.0464, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.9262128006522625, |
|
"grad_norm": 1.569948673248291, |
|
"learning_rate": 4.684285714285714e-06, |
|
"loss": 0.0438, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.9364044027721157, |
|
"grad_norm": 2.0869407653808594, |
|
"learning_rate": 4.648571428571429e-06, |
|
"loss": 0.0421, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.946596004891969, |
|
"grad_norm": 1.8481653928756714, |
|
"learning_rate": 4.612857142857143e-06, |
|
"loss": 0.0429, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.9567876070118224, |
|
"grad_norm": 2.0351133346557617, |
|
"learning_rate": 4.577142857142858e-06, |
|
"loss": 0.0431, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.9669792091316753, |
|
"grad_norm": 1.88765549659729, |
|
"learning_rate": 4.541428571428571e-06, |
|
"loss": 0.0448, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.9771708112515287, |
|
"grad_norm": 1.8413023948669434, |
|
"learning_rate": 4.505714285714286e-06, |
|
"loss": 0.0391, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.987362413371382, |
|
"grad_norm": 2.0287082195281982, |
|
"learning_rate": 4.47e-06, |
|
"loss": 0.0417, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.9975540154912352, |
|
"grad_norm": 2.1331636905670166, |
|
"learning_rate": 4.434285714285715e-06, |
|
"loss": 0.0434, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.0077456176110884, |
|
"grad_norm": 1.133825421333313, |
|
"learning_rate": 4.3985714285714286e-06, |
|
"loss": 0.0246, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.0179372197309418, |
|
"grad_norm": 1.3025981187820435, |
|
"learning_rate": 4.362857142857143e-06, |
|
"loss": 0.017, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.028128821850795, |
|
"grad_norm": 1.008670687675476, |
|
"learning_rate": 4.327142857142858e-06, |
|
"loss": 0.0174, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.038320423970648, |
|
"grad_norm": 0.9607629179954529, |
|
"learning_rate": 4.291428571428572e-06, |
|
"loss": 0.0162, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.038320423970648, |
|
"eval_loss": 0.0353175513446331, |
|
"eval_runtime": 2408.5196, |
|
"eval_samples_per_second": 4.233, |
|
"eval_steps_per_second": 0.132, |
|
"eval_wer": 4.477157079135229, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0485120260905014, |
|
"grad_norm": 1.6613744497299194, |
|
"learning_rate": 4.255714285714286e-06, |
|
"loss": 0.0213, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.058703628210355, |
|
"grad_norm": 0.6007469296455383, |
|
"learning_rate": 4.22e-06, |
|
"loss": 0.015, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.0688952303302077, |
|
"grad_norm": 1.5992757081985474, |
|
"learning_rate": 4.184285714285715e-06, |
|
"loss": 0.0191, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.079086832450061, |
|
"grad_norm": 0.913711667060852, |
|
"learning_rate": 4.148571428571429e-06, |
|
"loss": 0.0155, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.0892784345699145, |
|
"grad_norm": 1.1569514274597168, |
|
"learning_rate": 4.112857142857144e-06, |
|
"loss": 0.0164, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.099470036689768, |
|
"grad_norm": 1.0935266017913818, |
|
"learning_rate": 4.0771428571428574e-06, |
|
"loss": 0.0146, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.1096616388096208, |
|
"grad_norm": 0.8869621157646179, |
|
"learning_rate": 4.041428571428572e-06, |
|
"loss": 0.0168, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.119853240929474, |
|
"grad_norm": 1.3779696226119995, |
|
"learning_rate": 4.0057142857142864e-06, |
|
"loss": 0.0164, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.1300448430493275, |
|
"grad_norm": 1.2132755517959595, |
|
"learning_rate": 3.97e-06, |
|
"loss": 0.0178, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.1402364451691804, |
|
"grad_norm": 1.8169926404953003, |
|
"learning_rate": 3.934285714285715e-06, |
|
"loss": 0.0181, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.150428047289034, |
|
"grad_norm": 0.7188290357589722, |
|
"learning_rate": 3.898571428571429e-06, |
|
"loss": 0.0172, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.160619649408887, |
|
"grad_norm": 1.0363893508911133, |
|
"learning_rate": 3.862857142857143e-06, |
|
"loss": 0.0151, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.1708112515287405, |
|
"grad_norm": 0.7467776536941528, |
|
"learning_rate": 3.827142857142857e-06, |
|
"loss": 0.0159, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.1810028536485935, |
|
"grad_norm": 1.4140534400939941, |
|
"learning_rate": 3.7914285714285722e-06, |
|
"loss": 0.0179, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.191194455768447, |
|
"grad_norm": 0.831071138381958, |
|
"learning_rate": 3.755714285714286e-06, |
|
"loss": 0.018, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.2013860578883, |
|
"grad_norm": 0.978151261806488, |
|
"learning_rate": 3.7200000000000004e-06, |
|
"loss": 0.0164, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.211577660008153, |
|
"grad_norm": 1.2061834335327148, |
|
"learning_rate": 3.684285714285715e-06, |
|
"loss": 0.0155, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.2217692621280065, |
|
"grad_norm": 1.0183775424957275, |
|
"learning_rate": 3.648571428571429e-06, |
|
"loss": 0.0166, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.23196086424786, |
|
"grad_norm": 0.7747207283973694, |
|
"learning_rate": 3.612857142857143e-06, |
|
"loss": 0.017, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.242152466367713, |
|
"grad_norm": 1.3631898164749146, |
|
"learning_rate": 3.5771428571428576e-06, |
|
"loss": 0.0147, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.252344068487566, |
|
"grad_norm": 2.3850202560424805, |
|
"learning_rate": 3.5414285714285716e-06, |
|
"loss": 0.0145, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.2625356706074196, |
|
"grad_norm": 1.2588169574737549, |
|
"learning_rate": 3.505714285714286e-06, |
|
"loss": 0.0191, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 1.3466730117797852, |
|
"learning_rate": 3.4700000000000002e-06, |
|
"loss": 0.0177, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.282918874847126, |
|
"grad_norm": 1.3090860843658447, |
|
"learning_rate": 3.4342857142857143e-06, |
|
"loss": 0.014, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.2931104769669792, |
|
"grad_norm": 1.4974130392074585, |
|
"learning_rate": 3.398571428571429e-06, |
|
"loss": 0.0152, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.3033020790868326, |
|
"grad_norm": 1.342842936515808, |
|
"learning_rate": 3.3628571428571433e-06, |
|
"loss": 0.0165, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.3134936812066855, |
|
"grad_norm": 0.9690020084381104, |
|
"learning_rate": 3.327142857142858e-06, |
|
"loss": 0.0146, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.323685283326539, |
|
"grad_norm": 0.4699741303920746, |
|
"learning_rate": 3.2914285714285715e-06, |
|
"loss": 0.0178, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.3338768854463923, |
|
"grad_norm": 1.166608214378357, |
|
"learning_rate": 3.255714285714286e-06, |
|
"loss": 0.0152, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.344068487566245, |
|
"grad_norm": 0.7220166921615601, |
|
"learning_rate": 3.2200000000000005e-06, |
|
"loss": 0.0142, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.3542600896860986, |
|
"grad_norm": 0.8474377989768982, |
|
"learning_rate": 3.1842857142857146e-06, |
|
"loss": 0.0186, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 2.364451691805952, |
|
"grad_norm": 1.246468186378479, |
|
"learning_rate": 3.1485714285714287e-06, |
|
"loss": 0.0143, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.3746432939258053, |
|
"grad_norm": 0.9267088770866394, |
|
"learning_rate": 3.112857142857143e-06, |
|
"loss": 0.0157, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 2.3848348960456582, |
|
"grad_norm": 1.0727224349975586, |
|
"learning_rate": 3.0771428571428573e-06, |
|
"loss": 0.0145, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.3950264981655116, |
|
"grad_norm": 1.3142344951629639, |
|
"learning_rate": 3.0414285714285718e-06, |
|
"loss": 0.0133, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 2.405218100285365, |
|
"grad_norm": 1.3349863290786743, |
|
"learning_rate": 3.005714285714286e-06, |
|
"loss": 0.0137, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.415409702405218, |
|
"grad_norm": 0.7742011547088623, |
|
"learning_rate": 2.97e-06, |
|
"loss": 0.0149, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 2.4256013045250713, |
|
"grad_norm": 1.1534104347229004, |
|
"learning_rate": 2.9342857142857144e-06, |
|
"loss": 0.0156, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.4357929066449246, |
|
"grad_norm": 1.1034826040267944, |
|
"learning_rate": 2.898571428571429e-06, |
|
"loss": 0.0136, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 2.445984508764778, |
|
"grad_norm": 1.3665175437927246, |
|
"learning_rate": 2.8628571428571435e-06, |
|
"loss": 0.0129, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.445984508764778, |
|
"eval_loss": 0.027366982772946358, |
|
"eval_runtime": 2410.6864, |
|
"eval_samples_per_second": 4.23, |
|
"eval_steps_per_second": 0.132, |
|
"eval_wer": 3.436365819195996, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.456176110884631, |
|
"grad_norm": 0.9552819728851318, |
|
"learning_rate": 2.827142857142857e-06, |
|
"loss": 0.015, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 2.4663677130044843, |
|
"grad_norm": 0.8750997185707092, |
|
"learning_rate": 2.7914285714285716e-06, |
|
"loss": 0.014, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.4765593151243377, |
|
"grad_norm": 1.3504140377044678, |
|
"learning_rate": 2.755714285714286e-06, |
|
"loss": 0.0172, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 2.4867509172441906, |
|
"grad_norm": 1.59817373752594, |
|
"learning_rate": 2.7200000000000002e-06, |
|
"loss": 0.0153, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.496942519364044, |
|
"grad_norm": 1.6574933528900146, |
|
"learning_rate": 2.6842857142857143e-06, |
|
"loss": 0.0167, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 2.5071341214838974, |
|
"grad_norm": 0.7508680820465088, |
|
"learning_rate": 2.648571428571429e-06, |
|
"loss": 0.013, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.5173257236037507, |
|
"grad_norm": 1.0604500770568848, |
|
"learning_rate": 2.612857142857143e-06, |
|
"loss": 0.0162, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 2.5275173257236037, |
|
"grad_norm": 1.3438018560409546, |
|
"learning_rate": 2.5771428571428574e-06, |
|
"loss": 0.0141, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.537708927843457, |
|
"grad_norm": 1.3562772274017334, |
|
"learning_rate": 2.541428571428572e-06, |
|
"loss": 0.0134, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 2.54790052996331, |
|
"grad_norm": 0.681122362613678, |
|
"learning_rate": 2.5057142857142856e-06, |
|
"loss": 0.0123, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.5580921320831633, |
|
"grad_norm": 0.8060537576675415, |
|
"learning_rate": 2.47e-06, |
|
"loss": 0.0143, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 2.5682837342030167, |
|
"grad_norm": 0.9794915318489075, |
|
"learning_rate": 2.4342857142857146e-06, |
|
"loss": 0.0128, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.57847533632287, |
|
"grad_norm": 0.7749600410461426, |
|
"learning_rate": 2.3985714285714287e-06, |
|
"loss": 0.0132, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 2.5886669384427234, |
|
"grad_norm": 0.808312177658081, |
|
"learning_rate": 2.362857142857143e-06, |
|
"loss": 0.0166, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.5988585405625764, |
|
"grad_norm": 1.0200546979904175, |
|
"learning_rate": 2.3271428571428572e-06, |
|
"loss": 0.0138, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 2.6090501426824297, |
|
"grad_norm": 1.4276586771011353, |
|
"learning_rate": 2.2914285714285718e-06, |
|
"loss": 0.0119, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.6192417448022827, |
|
"grad_norm": 1.1479567289352417, |
|
"learning_rate": 2.255714285714286e-06, |
|
"loss": 0.0127, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 2.629433346922136, |
|
"grad_norm": 0.9421886205673218, |
|
"learning_rate": 2.2200000000000003e-06, |
|
"loss": 0.0132, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.6396249490419894, |
|
"grad_norm": 0.573397696018219, |
|
"learning_rate": 2.1842857142857144e-06, |
|
"loss": 0.0131, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 2.649816551161843, |
|
"grad_norm": 1.2187764644622803, |
|
"learning_rate": 2.148571428571429e-06, |
|
"loss": 0.0125, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.660008153281696, |
|
"grad_norm": 0.7359505891799927, |
|
"learning_rate": 2.112857142857143e-06, |
|
"loss": 0.0141, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 2.670199755401549, |
|
"grad_norm": 1.8559486865997314, |
|
"learning_rate": 2.077142857142857e-06, |
|
"loss": 0.0142, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.6803913575214025, |
|
"grad_norm": 1.0747308731079102, |
|
"learning_rate": 2.0414285714285716e-06, |
|
"loss": 0.0105, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 2.6905829596412554, |
|
"grad_norm": 0.8565563559532166, |
|
"learning_rate": 2.0057142857142857e-06, |
|
"loss": 0.0144, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.7007745617611087, |
|
"grad_norm": 1.1864107847213745, |
|
"learning_rate": 1.97e-06, |
|
"loss": 0.013, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 2.710966163880962, |
|
"grad_norm": 0.6643468141555786, |
|
"learning_rate": 1.9342857142857143e-06, |
|
"loss": 0.0149, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.7211577660008155, |
|
"grad_norm": 0.6889024376869202, |
|
"learning_rate": 1.8985714285714288e-06, |
|
"loss": 0.0115, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.7313493681206684, |
|
"grad_norm": 1.5234169960021973, |
|
"learning_rate": 1.8628571428571429e-06, |
|
"loss": 0.0139, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.741540970240522, |
|
"grad_norm": 1.095461368560791, |
|
"learning_rate": 1.8271428571428574e-06, |
|
"loss": 0.0132, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 2.751732572360375, |
|
"grad_norm": 1.7404385805130005, |
|
"learning_rate": 1.7914285714285715e-06, |
|
"loss": 0.0135, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.761924174480228, |
|
"grad_norm": 0.8274975419044495, |
|
"learning_rate": 1.755714285714286e-06, |
|
"loss": 0.013, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 2.7721157766000815, |
|
"grad_norm": 0.8858354091644287, |
|
"learning_rate": 1.72e-06, |
|
"loss": 0.0118, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.782307378719935, |
|
"grad_norm": 0.5157521963119507, |
|
"learning_rate": 1.6842857142857143e-06, |
|
"loss": 0.0107, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 2.792498980839788, |
|
"grad_norm": 1.2508779764175415, |
|
"learning_rate": 1.6485714285714289e-06, |
|
"loss": 0.015, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.802690582959641, |
|
"grad_norm": 1.2929726839065552, |
|
"learning_rate": 1.612857142857143e-06, |
|
"loss": 0.0125, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 2.8128821850794945, |
|
"grad_norm": 0.8893064856529236, |
|
"learning_rate": 1.5771428571428574e-06, |
|
"loss": 0.0122, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.823073787199348, |
|
"grad_norm": 1.2606889009475708, |
|
"learning_rate": 1.5414285714285715e-06, |
|
"loss": 0.0137, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 2.833265389319201, |
|
"grad_norm": 1.410904884338379, |
|
"learning_rate": 1.5057142857142858e-06, |
|
"loss": 0.0121, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.843456991439054, |
|
"grad_norm": 0.792271077632904, |
|
"learning_rate": 1.4700000000000001e-06, |
|
"loss": 0.0135, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 2.8536485935589075, |
|
"grad_norm": 1.0926941633224487, |
|
"learning_rate": 1.4342857142857144e-06, |
|
"loss": 0.0117, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.8536485935589075, |
|
"eval_loss": 0.022013096138834953, |
|
"eval_runtime": 2391.195, |
|
"eval_samples_per_second": 4.264, |
|
"eval_steps_per_second": 0.133, |
|
"eval_wer": 2.5110128090281947, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.863840195678761, |
|
"grad_norm": 1.1174569129943848, |
|
"learning_rate": 1.3985714285714285e-06, |
|
"loss": 0.0123, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 2.874031797798614, |
|
"grad_norm": 1.3422982692718506, |
|
"learning_rate": 1.362857142857143e-06, |
|
"loss": 0.0155, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.884223399918467, |
|
"grad_norm": 1.1771271228790283, |
|
"learning_rate": 1.327142857142857e-06, |
|
"loss": 0.014, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 2.8944150020383206, |
|
"grad_norm": 0.8138777017593384, |
|
"learning_rate": 1.2914285714285716e-06, |
|
"loss": 0.0159, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.9046066041581735, |
|
"grad_norm": 0.964419960975647, |
|
"learning_rate": 1.2557142857142859e-06, |
|
"loss": 0.0119, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 2.914798206278027, |
|
"grad_norm": 1.0425853729248047, |
|
"learning_rate": 1.2200000000000002e-06, |
|
"loss": 0.0144, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.9249898083978803, |
|
"grad_norm": 1.2048903703689575, |
|
"learning_rate": 1.1842857142857143e-06, |
|
"loss": 0.0112, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 2.9351814105177336, |
|
"grad_norm": 0.9228710532188416, |
|
"learning_rate": 1.1485714285714286e-06, |
|
"loss": 0.0121, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.9453730126375866, |
|
"grad_norm": 0.35819801688194275, |
|
"learning_rate": 1.1128571428571429e-06, |
|
"loss": 0.0103, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 2.95556461475744, |
|
"grad_norm": 0.8136561512947083, |
|
"learning_rate": 1.0771428571428574e-06, |
|
"loss": 0.0129, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.965756216877293, |
|
"grad_norm": 1.0411508083343506, |
|
"learning_rate": 1.0414285714285717e-06, |
|
"loss": 0.0104, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 2.975947818997146, |
|
"grad_norm": 0.7591568231582642, |
|
"learning_rate": 1.0057142857142857e-06, |
|
"loss": 0.0104, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.9861394211169996, |
|
"grad_norm": 0.9218117594718933, |
|
"learning_rate": 9.7e-07, |
|
"loss": 0.0123, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 2.996331023236853, |
|
"grad_norm": 1.3467174768447876, |
|
"learning_rate": 9.342857142857144e-07, |
|
"loss": 0.01, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.006522625356706, |
|
"grad_norm": 0.14992065727710724, |
|
"learning_rate": 8.985714285714286e-07, |
|
"loss": 0.0068, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 3.0167142274765593, |
|
"grad_norm": 0.44808146357536316, |
|
"learning_rate": 8.628571428571429e-07, |
|
"loss": 0.0044, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.0269058295964126, |
|
"grad_norm": 0.4159485101699829, |
|
"learning_rate": 8.271428571428572e-07, |
|
"loss": 0.0045, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 3.037097431716266, |
|
"grad_norm": 0.5730934143066406, |
|
"learning_rate": 7.914285714285715e-07, |
|
"loss": 0.0041, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.047289033836119, |
|
"grad_norm": 0.2851293683052063, |
|
"learning_rate": 7.557142857142857e-07, |
|
"loss": 0.0048, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 3.0574806359559723, |
|
"grad_norm": 0.3623310625553131, |
|
"learning_rate": 7.2e-07, |
|
"loss": 0.0054, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.0676722380758257, |
|
"grad_norm": 0.1989881545305252, |
|
"learning_rate": 6.842857142857143e-07, |
|
"loss": 0.004, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 3.0778638401956786, |
|
"grad_norm": 0.24523906409740448, |
|
"learning_rate": 6.485714285714287e-07, |
|
"loss": 0.0052, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.088055442315532, |
|
"grad_norm": 0.09773947298526764, |
|
"learning_rate": 6.128571428571429e-07, |
|
"loss": 0.0037, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 3.0982470444353853, |
|
"grad_norm": 0.8551767468452454, |
|
"learning_rate": 5.771428571428572e-07, |
|
"loss": 0.0041, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.1084386465552383, |
|
"grad_norm": 0.32709449529647827, |
|
"learning_rate": 5.414285714285715e-07, |
|
"loss": 0.0046, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 3.1186302486750916, |
|
"grad_norm": 0.22505348920822144, |
|
"learning_rate": 5.057142857142858e-07, |
|
"loss": 0.0039, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.128821850794945, |
|
"grad_norm": 0.6920948028564453, |
|
"learning_rate": 4.7000000000000005e-07, |
|
"loss": 0.005, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 3.1390134529147984, |
|
"grad_norm": 0.37104272842407227, |
|
"learning_rate": 4.342857142857143e-07, |
|
"loss": 0.0041, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.1492050550346513, |
|
"grad_norm": 0.5752152800559998, |
|
"learning_rate": 3.985714285714286e-07, |
|
"loss": 0.0046, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 3.1593966571545047, |
|
"grad_norm": 0.40613701939582825, |
|
"learning_rate": 3.6285714285714283e-07, |
|
"loss": 0.0041, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.169588259274358, |
|
"grad_norm": 0.4888840317726135, |
|
"learning_rate": 3.271428571428572e-07, |
|
"loss": 0.0047, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 3.179779861394211, |
|
"grad_norm": 0.22719787061214447, |
|
"learning_rate": 2.914285714285715e-07, |
|
"loss": 0.0039, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.1899714635140644, |
|
"grad_norm": 0.3767295479774475, |
|
"learning_rate": 2.557142857142857e-07, |
|
"loss": 0.0047, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 3.2001630656339177, |
|
"grad_norm": 0.6686317324638367, |
|
"learning_rate": 2.2e-07, |
|
"loss": 0.0041, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.210354667753771, |
|
"grad_norm": 0.40455150604248047, |
|
"learning_rate": 1.842857142857143e-07, |
|
"loss": 0.0058, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 3.220546269873624, |
|
"grad_norm": 0.2686769962310791, |
|
"learning_rate": 1.4857142857142857e-07, |
|
"loss": 0.0046, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.2307378719934774, |
|
"grad_norm": 0.17891818284988403, |
|
"learning_rate": 1.1285714285714287e-07, |
|
"loss": 0.0038, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 3.2409294741133308, |
|
"grad_norm": 0.32351842522621155, |
|
"learning_rate": 7.714285714285715e-08, |
|
"loss": 0.0036, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 3.2511210762331837, |
|
"grad_norm": 0.2572202682495117, |
|
"learning_rate": 4.1428571428571426e-08, |
|
"loss": 0.0036, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 3.261312678353037, |
|
"grad_norm": 0.1762164831161499, |
|
"learning_rate": 5.714285714285715e-09, |
|
"loss": 0.0044, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.261312678353037, |
|
"eval_loss": 0.02037939429283142, |
|
"eval_runtime": 2392.6668, |
|
"eval_samples_per_second": 4.261, |
|
"eval_steps_per_second": 0.133, |
|
"eval_wer": 2.2782892974889872, |
|
"step": 8000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 8000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.6124729999261696e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|