|
{ |
|
"best_metric": 7.215361500971087, |
|
"best_model_checkpoint": "./checkpoint-9000", |
|
"epoch": 5.048, |
|
"eval_steps": 500, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0025, |
|
"grad_norm": 6.131621360778809, |
|
"learning_rate": 2.1875e-07, |
|
"loss": 0.9345, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.005, |
|
"grad_norm": 6.021520137786865, |
|
"learning_rate": 4.375e-07, |
|
"loss": 0.8231, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0075, |
|
"grad_norm": 5.526496410369873, |
|
"learning_rate": 6.5625e-07, |
|
"loss": 0.5623, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.9277825355529785, |
|
"learning_rate": 8.75e-07, |
|
"loss": 0.4173, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0125, |
|
"grad_norm": 4.292990684509277, |
|
"learning_rate": 1.09375e-06, |
|
"loss": 0.385, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.015, |
|
"grad_norm": 5.749295234680176, |
|
"learning_rate": 1.3125e-06, |
|
"loss": 0.3931, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0175, |
|
"grad_norm": 3.8306965827941895, |
|
"learning_rate": 1.5312499999999997e-06, |
|
"loss": 0.3516, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 4.687748908996582, |
|
"learning_rate": 1.75e-06, |
|
"loss": 0.3235, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0225, |
|
"grad_norm": 4.232759952545166, |
|
"learning_rate": 1.96875e-06, |
|
"loss": 0.3314, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 4.185751914978027, |
|
"learning_rate": 2.1875e-06, |
|
"loss": 0.309, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0275, |
|
"grad_norm": 4.818612098693848, |
|
"learning_rate": 2.40625e-06, |
|
"loss": 0.2991, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.171736717224121, |
|
"learning_rate": 2.625e-06, |
|
"loss": 0.2832, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0325, |
|
"grad_norm": 5.217376708984375, |
|
"learning_rate": 2.8437499999999997e-06, |
|
"loss": 0.2873, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.035, |
|
"grad_norm": 4.671106815338135, |
|
"learning_rate": 3.0624999999999995e-06, |
|
"loss": 0.2957, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0375, |
|
"grad_norm": 3.9175262451171875, |
|
"learning_rate": 3.2812499999999997e-06, |
|
"loss": 0.2634, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.647582054138184, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.2541, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0425, |
|
"grad_norm": 3.25675368309021, |
|
"learning_rate": 3.7187499999999998e-06, |
|
"loss": 0.2244, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.045, |
|
"grad_norm": 4.597206115722656, |
|
"learning_rate": 3.9375e-06, |
|
"loss": 0.2492, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0475, |
|
"grad_norm": 4.602332592010498, |
|
"learning_rate": 4.156249999999999e-06, |
|
"loss": 0.246, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.6419622898101807, |
|
"learning_rate": 4.375e-06, |
|
"loss": 0.2208, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.2592349350452423, |
|
"eval_runtime": 4116.5906, |
|
"eval_samples_per_second": 3.311, |
|
"eval_steps_per_second": 0.414, |
|
"eval_wer": 20.691487412510533, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0525, |
|
"grad_norm": 3.6599488258361816, |
|
"learning_rate": 4.363486842105263e-06, |
|
"loss": 0.2539, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.055, |
|
"grad_norm": 3.6934616565704346, |
|
"learning_rate": 4.351973684210526e-06, |
|
"loss": 0.2313, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0575, |
|
"grad_norm": 3.7546138763427734, |
|
"learning_rate": 4.340460526315789e-06, |
|
"loss": 0.2272, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.096877098083496, |
|
"learning_rate": 4.3289473684210525e-06, |
|
"loss": 0.2373, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 3.572812795639038, |
|
"learning_rate": 4.3174342105263155e-06, |
|
"loss": 0.2285, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.065, |
|
"grad_norm": 3.3494396209716797, |
|
"learning_rate": 4.3059210526315785e-06, |
|
"loss": 0.2293, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0675, |
|
"grad_norm": 3.5156869888305664, |
|
"learning_rate": 4.2944078947368415e-06, |
|
"loss": 0.2063, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.698807716369629, |
|
"learning_rate": 4.282894736842105e-06, |
|
"loss": 0.2113, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0725, |
|
"grad_norm": 3.716585636138916, |
|
"learning_rate": 4.271381578947368e-06, |
|
"loss": 0.2055, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 4.204227924346924, |
|
"learning_rate": 4.2598684210526314e-06, |
|
"loss": 0.2114, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.0775, |
|
"grad_norm": 3.479562282562256, |
|
"learning_rate": 4.2483552631578944e-06, |
|
"loss": 0.2224, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.5203094482421875, |
|
"learning_rate": 4.2368421052631575e-06, |
|
"loss": 0.2523, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0825, |
|
"grad_norm": 3.6081738471984863, |
|
"learning_rate": 4.2253289473684205e-06, |
|
"loss": 0.2383, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.085, |
|
"grad_norm": 3.2602758407592773, |
|
"learning_rate": 4.2138157894736835e-06, |
|
"loss": 0.1808, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.0875, |
|
"grad_norm": 3.6786868572235107, |
|
"learning_rate": 4.202302631578947e-06, |
|
"loss": 0.1747, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.1120803356170654, |
|
"learning_rate": 4.19078947368421e-06, |
|
"loss": 0.1662, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0925, |
|
"grad_norm": 3.1962203979492188, |
|
"learning_rate": 4.179276315789473e-06, |
|
"loss": 0.1771, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.095, |
|
"grad_norm": 3.172363758087158, |
|
"learning_rate": 4.167763157894736e-06, |
|
"loss": 0.1751, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.0975, |
|
"grad_norm": 2.4304590225219727, |
|
"learning_rate": 4.156249999999999e-06, |
|
"loss": 0.1701, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.193345308303833, |
|
"learning_rate": 4.144736842105262e-06, |
|
"loss": 0.1489, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.1971057653427124, |
|
"eval_runtime": 4130.6867, |
|
"eval_samples_per_second": 3.3, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 14.68265601524424, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1025, |
|
"grad_norm": 3.322065591812134, |
|
"learning_rate": 4.133223684210526e-06, |
|
"loss": 0.1701, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.105, |
|
"grad_norm": 3.5462722778320312, |
|
"learning_rate": 4.121710526315789e-06, |
|
"loss": 0.1875, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.1075, |
|
"grad_norm": 3.39326810836792, |
|
"learning_rate": 4.110197368421052e-06, |
|
"loss": 0.1506, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.9165821075439453, |
|
"learning_rate": 4.098684210526315e-06, |
|
"loss": 0.1525, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1125, |
|
"grad_norm": 3.262007236480713, |
|
"learning_rate": 4.087171052631578e-06, |
|
"loss": 0.157, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.115, |
|
"grad_norm": 2.4523119926452637, |
|
"learning_rate": 4.075657894736842e-06, |
|
"loss": 0.1416, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1175, |
|
"grad_norm": 2.7651798725128174, |
|
"learning_rate": 4.064144736842105e-06, |
|
"loss": 0.1527, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.609523296356201, |
|
"learning_rate": 4.052631578947368e-06, |
|
"loss": 0.1822, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1225, |
|
"grad_norm": 3.8101985454559326, |
|
"learning_rate": 4.041118421052631e-06, |
|
"loss": 0.1703, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 3.8921287059783936, |
|
"learning_rate": 4.029605263157894e-06, |
|
"loss": 0.1924, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1275, |
|
"grad_norm": 4.463279724121094, |
|
"learning_rate": 4.018092105263157e-06, |
|
"loss": 0.1818, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.6556308269500732, |
|
"learning_rate": 4.00657894736842e-06, |
|
"loss": 0.1726, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1325, |
|
"grad_norm": 2.98067569732666, |
|
"learning_rate": 3.995065789473683e-06, |
|
"loss": 0.174, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.135, |
|
"grad_norm": 2.8287429809570312, |
|
"learning_rate": 3.983552631578947e-06, |
|
"loss": 0.1631, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1375, |
|
"grad_norm": 2.6438794136047363, |
|
"learning_rate": 3.97203947368421e-06, |
|
"loss": 0.1475, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.513123035430908, |
|
"learning_rate": 3.960526315789473e-06, |
|
"loss": 0.1457, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1425, |
|
"grad_norm": 2.4688916206359863, |
|
"learning_rate": 3.949013157894737e-06, |
|
"loss": 0.1375, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.145, |
|
"grad_norm": 4.005943775177002, |
|
"learning_rate": 3.9375e-06, |
|
"loss": 0.1623, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.1475, |
|
"grad_norm": 2.91786789894104, |
|
"learning_rate": 3.925986842105263e-06, |
|
"loss": 0.1701, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.5332415103912354, |
|
"learning_rate": 3.914473684210526e-06, |
|
"loss": 0.1973, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.17469166219234467, |
|
"eval_runtime": 4132.0041, |
|
"eval_samples_per_second": 3.299, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 12.377697973542453, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1525, |
|
"grad_norm": 4.05070686340332, |
|
"learning_rate": 3.902960526315789e-06, |
|
"loss": 0.1796, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.155, |
|
"grad_norm": 2.989821195602417, |
|
"learning_rate": 3.891447368421052e-06, |
|
"loss": 0.1561, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.1575, |
|
"grad_norm": 2.9603219032287598, |
|
"learning_rate": 3.879934210526315e-06, |
|
"loss": 0.1609, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.2663583755493164, |
|
"learning_rate": 3.868421052631579e-06, |
|
"loss": 0.1833, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.1625, |
|
"grad_norm": 3.459775686264038, |
|
"learning_rate": 3.856907894736842e-06, |
|
"loss": 0.1727, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.165, |
|
"grad_norm": 3.427720069885254, |
|
"learning_rate": 3.845394736842105e-06, |
|
"loss": 0.181, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.1675, |
|
"grad_norm": 4.471118450164795, |
|
"learning_rate": 3.833881578947368e-06, |
|
"loss": 0.1536, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.1428306102752686, |
|
"learning_rate": 3.822368421052632e-06, |
|
"loss": 0.1372, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0021, |
|
"grad_norm": 2.8270132541656494, |
|
"learning_rate": 3.8108552631578944e-06, |
|
"loss": 0.1454, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.0046, |
|
"grad_norm": 3.0873589515686035, |
|
"learning_rate": 3.799342105263158e-06, |
|
"loss": 0.1303, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.0071, |
|
"grad_norm": 3.187711000442505, |
|
"learning_rate": 3.787828947368421e-06, |
|
"loss": 0.1383, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.0096, |
|
"grad_norm": 3.1710643768310547, |
|
"learning_rate": 3.776315789473684e-06, |
|
"loss": 0.1626, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0121, |
|
"grad_norm": 3.4516818523406982, |
|
"learning_rate": 3.7648026315789473e-06, |
|
"loss": 0.1405, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.0146, |
|
"grad_norm": 2.930408000946045, |
|
"learning_rate": 3.7532894736842103e-06, |
|
"loss": 0.143, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.0171, |
|
"grad_norm": 3.066941261291504, |
|
"learning_rate": 3.7417763157894733e-06, |
|
"loss": 0.1437, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.0196, |
|
"grad_norm": 3.389916181564331, |
|
"learning_rate": 3.7302631578947363e-06, |
|
"loss": 0.1289, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0221, |
|
"grad_norm": 3.048574209213257, |
|
"learning_rate": 3.7187499999999998e-06, |
|
"loss": 0.1415, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.0246, |
|
"grad_norm": 2.5267295837402344, |
|
"learning_rate": 3.7072368421052628e-06, |
|
"loss": 0.1386, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0271, |
|
"grad_norm": 3.151757001876831, |
|
"learning_rate": 3.6957236842105258e-06, |
|
"loss": 0.1436, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.0296, |
|
"grad_norm": 3.629039764404297, |
|
"learning_rate": 3.684210526315789e-06, |
|
"loss": 0.1353, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0296, |
|
"eval_loss": 0.1527385264635086, |
|
"eval_runtime": 4116.9756, |
|
"eval_samples_per_second": 3.311, |
|
"eval_steps_per_second": 0.414, |
|
"eval_wer": 10.719520685990693, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0321, |
|
"grad_norm": 1.8788173198699951, |
|
"learning_rate": 3.6726973684210522e-06, |
|
"loss": 0.1322, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.0346, |
|
"grad_norm": 2.587233066558838, |
|
"learning_rate": 3.6611842105263157e-06, |
|
"loss": 0.1176, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.0371, |
|
"grad_norm": 4.001532077789307, |
|
"learning_rate": 3.6496710526315787e-06, |
|
"loss": 0.1233, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.0396, |
|
"grad_norm": 3.3947739601135254, |
|
"learning_rate": 3.638157894736842e-06, |
|
"loss": 0.1188, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0421, |
|
"grad_norm": 3.4743120670318604, |
|
"learning_rate": 3.626644736842105e-06, |
|
"loss": 0.1318, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.0446, |
|
"grad_norm": 2.9288718700408936, |
|
"learning_rate": 3.615131578947368e-06, |
|
"loss": 0.1224, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.0471, |
|
"grad_norm": 2.6081368923187256, |
|
"learning_rate": 3.603618421052631e-06, |
|
"loss": 0.1232, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.0496, |
|
"grad_norm": 2.4068429470062256, |
|
"learning_rate": 3.5921052631578946e-06, |
|
"loss": 0.1073, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.0521, |
|
"grad_norm": 3.049074411392212, |
|
"learning_rate": 3.5805921052631576e-06, |
|
"loss": 0.1071, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.0546, |
|
"grad_norm": 2.0809032917022705, |
|
"learning_rate": 3.5690789473684206e-06, |
|
"loss": 0.1217, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.0571, |
|
"grad_norm": 3.0854332447052, |
|
"learning_rate": 3.5575657894736836e-06, |
|
"loss": 0.1332, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.0596, |
|
"grad_norm": 3.580145835876465, |
|
"learning_rate": 3.546052631578947e-06, |
|
"loss": 0.131, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0621, |
|
"grad_norm": 3.8924479484558105, |
|
"learning_rate": 3.53453947368421e-06, |
|
"loss": 0.136, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.0646, |
|
"grad_norm": 2.8398871421813965, |
|
"learning_rate": 3.523026315789473e-06, |
|
"loss": 0.1081, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.0671, |
|
"grad_norm": 3.007026195526123, |
|
"learning_rate": 3.511513157894737e-06, |
|
"loss": 0.1115, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.0695999999999999, |
|
"grad_norm": 1.5712552070617676, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.1183, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0721, |
|
"grad_norm": 3.844963312149048, |
|
"learning_rate": 3.488486842105263e-06, |
|
"loss": 0.113, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.0746, |
|
"grad_norm": 2.8939759731292725, |
|
"learning_rate": 3.476973684210526e-06, |
|
"loss": 0.1115, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.0771, |
|
"grad_norm": 1.8150537014007568, |
|
"learning_rate": 3.4654605263157894e-06, |
|
"loss": 0.1117, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.0796000000000001, |
|
"grad_norm": 2.839418649673462, |
|
"learning_rate": 3.4539473684210524e-06, |
|
"loss": 0.1065, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0796000000000001, |
|
"eval_loss": 0.1456422209739685, |
|
"eval_runtime": 4133.4016, |
|
"eval_samples_per_second": 3.298, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 9.869361281102277, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0821, |
|
"grad_norm": 3.4274985790252686, |
|
"learning_rate": 3.4424342105263154e-06, |
|
"loss": 0.1067, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.0846, |
|
"grad_norm": 2.2946057319641113, |
|
"learning_rate": 3.4309210526315784e-06, |
|
"loss": 0.1038, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.0871, |
|
"grad_norm": 2.5364551544189453, |
|
"learning_rate": 3.419407894736842e-06, |
|
"loss": 0.1073, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.0896, |
|
"grad_norm": 2.9779515266418457, |
|
"learning_rate": 3.4083552631578944e-06, |
|
"loss": 0.1067, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0921, |
|
"grad_norm": 2.502685308456421, |
|
"learning_rate": 3.3968421052631574e-06, |
|
"loss": 0.1229, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.0946, |
|
"grad_norm": 2.181756019592285, |
|
"learning_rate": 3.3853289473684205e-06, |
|
"loss": 0.1071, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.0971, |
|
"grad_norm": 2.428738594055176, |
|
"learning_rate": 3.3738157894736843e-06, |
|
"loss": 0.101, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.0996, |
|
"grad_norm": 3.797952651977539, |
|
"learning_rate": 3.3623026315789473e-06, |
|
"loss": 0.1198, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1021, |
|
"grad_norm": 2.9902758598327637, |
|
"learning_rate": 3.3507894736842103e-06, |
|
"loss": 0.1013, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.1046, |
|
"grad_norm": 3.0514307022094727, |
|
"learning_rate": 3.3392763157894734e-06, |
|
"loss": 0.1075, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.1071, |
|
"grad_norm": 3.2877554893493652, |
|
"learning_rate": 3.327763157894737e-06, |
|
"loss": 0.1059, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.1096, |
|
"grad_norm": 2.3952691555023193, |
|
"learning_rate": 3.31625e-06, |
|
"loss": 0.0926, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1121, |
|
"grad_norm": 2.2840464115142822, |
|
"learning_rate": 3.304736842105263e-06, |
|
"loss": 0.1048, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.1146, |
|
"grad_norm": 2.7062416076660156, |
|
"learning_rate": 3.293223684210526e-06, |
|
"loss": 0.1049, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.1171, |
|
"grad_norm": 2.971315860748291, |
|
"learning_rate": 3.2817105263157893e-06, |
|
"loss": 0.1073, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.1196, |
|
"grad_norm": 2.8689844608306885, |
|
"learning_rate": 3.2701973684210523e-06, |
|
"loss": 0.1141, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.1221, |
|
"grad_norm": 3.6150734424591064, |
|
"learning_rate": 3.2586842105263153e-06, |
|
"loss": 0.1066, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.1246, |
|
"grad_norm": 2.3004024028778076, |
|
"learning_rate": 3.2471710526315783e-06, |
|
"loss": 0.1248, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.1271, |
|
"grad_norm": 2.5995240211486816, |
|
"learning_rate": 3.2356578947368417e-06, |
|
"loss": 0.0972, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.1296, |
|
"grad_norm": 2.957960367202759, |
|
"learning_rate": 3.224144736842105e-06, |
|
"loss": 0.106, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.1296, |
|
"eval_loss": 0.13624447584152222, |
|
"eval_runtime": 4123.4662, |
|
"eval_samples_per_second": 3.305, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 9.09249148008355, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.1320999999999999, |
|
"grad_norm": 2.653007984161377, |
|
"learning_rate": 3.212631578947368e-06, |
|
"loss": 0.1083, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.1346, |
|
"grad_norm": 2.6895744800567627, |
|
"learning_rate": 3.2011184210526316e-06, |
|
"loss": 0.1119, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.1371, |
|
"grad_norm": 2.1507463455200195, |
|
"learning_rate": 3.1896052631578946e-06, |
|
"loss": 0.0944, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.1396, |
|
"grad_norm": 3.61063289642334, |
|
"learning_rate": 3.1780921052631576e-06, |
|
"loss": 0.095, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.1421000000000001, |
|
"grad_norm": 2.570584774017334, |
|
"learning_rate": 3.1665789473684206e-06, |
|
"loss": 0.1076, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.1446, |
|
"grad_norm": 3.05507230758667, |
|
"learning_rate": 3.155065789473684e-06, |
|
"loss": 0.1175, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.1471, |
|
"grad_norm": 2.82817006111145, |
|
"learning_rate": 3.143552631578947e-06, |
|
"loss": 0.0965, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.1496, |
|
"grad_norm": 2.336517572402954, |
|
"learning_rate": 3.13203947368421e-06, |
|
"loss": 0.0955, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.1521, |
|
"grad_norm": 3.8640036582946777, |
|
"learning_rate": 3.120526315789473e-06, |
|
"loss": 0.1044, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.1546, |
|
"grad_norm": 3.7205588817596436, |
|
"learning_rate": 3.1090131578947366e-06, |
|
"loss": 0.1013, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.1571, |
|
"grad_norm": 2.1962900161743164, |
|
"learning_rate": 3.0974999999999996e-06, |
|
"loss": 0.0978, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.1596, |
|
"grad_norm": 3.3310599327087402, |
|
"learning_rate": 3.0859868421052626e-06, |
|
"loss": 0.1089, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.1621, |
|
"grad_norm": 2.699566602706909, |
|
"learning_rate": 3.074473684210526e-06, |
|
"loss": 0.1078, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.1646, |
|
"grad_norm": 3.79370379447937, |
|
"learning_rate": 3.0629605263157894e-06, |
|
"loss": 0.1118, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.1671, |
|
"grad_norm": 1.9741384983062744, |
|
"learning_rate": 3.0514473684210525e-06, |
|
"loss": 0.1119, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.1696, |
|
"grad_norm": 2.29034686088562, |
|
"learning_rate": 3.0399342105263155e-06, |
|
"loss": 0.1015, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.0017, |
|
"grad_norm": 2.011443853378296, |
|
"learning_rate": 3.028421052631579e-06, |
|
"loss": 0.0708, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.0042, |
|
"grad_norm": 1.2196134328842163, |
|
"learning_rate": 3.016907894736842e-06, |
|
"loss": 0.0668, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.0067, |
|
"grad_norm": 2.863933563232422, |
|
"learning_rate": 3.005394736842105e-06, |
|
"loss": 0.0673, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.0092, |
|
"grad_norm": 1.9341013431549072, |
|
"learning_rate": 2.9938815789473684e-06, |
|
"loss": 0.0718, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0092, |
|
"eval_loss": 0.13255682587623596, |
|
"eval_runtime": 4133.4892, |
|
"eval_samples_per_second": 3.297, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 8.542819451060867, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0117, |
|
"grad_norm": 2.795734405517578, |
|
"learning_rate": 2.9823684210526314e-06, |
|
"loss": 0.071, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.0142, |
|
"grad_norm": 1.982479214668274, |
|
"learning_rate": 2.9708552631578944e-06, |
|
"loss": 0.0629, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.0167, |
|
"grad_norm": 3.168161630630493, |
|
"learning_rate": 2.9593421052631574e-06, |
|
"loss": 0.0593, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.0192, |
|
"grad_norm": 2.259500741958618, |
|
"learning_rate": 2.947828947368421e-06, |
|
"loss": 0.0696, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.0217, |
|
"grad_norm": 2.1626062393188477, |
|
"learning_rate": 2.936315789473684e-06, |
|
"loss": 0.0687, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.0242, |
|
"grad_norm": 2.4419946670532227, |
|
"learning_rate": 2.924802631578947e-06, |
|
"loss": 0.0686, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.0267, |
|
"grad_norm": 2.445758819580078, |
|
"learning_rate": 2.9132894736842103e-06, |
|
"loss": 0.0631, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.0292, |
|
"grad_norm": 2.614476442337036, |
|
"learning_rate": 2.9017763157894737e-06, |
|
"loss": 0.0647, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.0317, |
|
"grad_norm": 1.4166672229766846, |
|
"learning_rate": 2.8902631578947367e-06, |
|
"loss": 0.0653, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.0342, |
|
"grad_norm": 1.8435245752334595, |
|
"learning_rate": 2.8787499999999998e-06, |
|
"loss": 0.0567, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.0367, |
|
"grad_norm": 1.8179950714111328, |
|
"learning_rate": 2.867236842105263e-06, |
|
"loss": 0.0636, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.0392, |
|
"grad_norm": 1.487122893333435, |
|
"learning_rate": 2.855723684210526e-06, |
|
"loss": 0.0598, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.0417, |
|
"grad_norm": 2.9211690425872803, |
|
"learning_rate": 2.8442105263157892e-06, |
|
"loss": 0.0599, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.0442, |
|
"grad_norm": 2.5018093585968018, |
|
"learning_rate": 2.8326973684210522e-06, |
|
"loss": 0.055, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.0467, |
|
"grad_norm": 2.186502456665039, |
|
"learning_rate": 2.8211842105263157e-06, |
|
"loss": 0.0533, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.0492, |
|
"grad_norm": 1.039233922958374, |
|
"learning_rate": 2.8096710526315787e-06, |
|
"loss": 0.0514, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.0517, |
|
"grad_norm": 1.871267557144165, |
|
"learning_rate": 2.7981578947368417e-06, |
|
"loss": 0.0512, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.0542, |
|
"grad_norm": 2.0849483013153076, |
|
"learning_rate": 2.7866447368421047e-06, |
|
"loss": 0.0579, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.0567, |
|
"grad_norm": 1.6887531280517578, |
|
"learning_rate": 2.775131578947368e-06, |
|
"loss": 0.0575, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.0592, |
|
"grad_norm": 1.88097083568573, |
|
"learning_rate": 2.763618421052631e-06, |
|
"loss": 0.0683, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0592, |
|
"eval_loss": 0.1342601627111435, |
|
"eval_runtime": 4125.8373, |
|
"eval_samples_per_second": 3.304, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 8.485103888013485, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0617, |
|
"grad_norm": 2.1877427101135254, |
|
"learning_rate": 2.7521052631578946e-06, |
|
"loss": 0.0614, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.0642, |
|
"grad_norm": 1.4176368713378906, |
|
"learning_rate": 2.740592105263158e-06, |
|
"loss": 0.0559, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.0667, |
|
"grad_norm": 2.4362101554870605, |
|
"learning_rate": 2.729078947368421e-06, |
|
"loss": 0.0593, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.0692, |
|
"grad_norm": 1.8663033246994019, |
|
"learning_rate": 2.717565789473684e-06, |
|
"loss": 0.0591, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.0717, |
|
"grad_norm": 1.627626657485962, |
|
"learning_rate": 2.706052631578947e-06, |
|
"loss": 0.0637, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.0742, |
|
"grad_norm": 2.2072463035583496, |
|
"learning_rate": 2.6945394736842105e-06, |
|
"loss": 0.0571, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.0767, |
|
"grad_norm": 1.7411611080169678, |
|
"learning_rate": 2.6830263157894735e-06, |
|
"loss": 0.0588, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.0792, |
|
"grad_norm": 1.324000358581543, |
|
"learning_rate": 2.6715131578947365e-06, |
|
"loss": 0.0482, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.0817, |
|
"grad_norm": 1.4138795137405396, |
|
"learning_rate": 2.6599999999999995e-06, |
|
"loss": 0.0477, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.0842, |
|
"grad_norm": 2.403547763824463, |
|
"learning_rate": 2.648486842105263e-06, |
|
"loss": 0.0558, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.0867, |
|
"grad_norm": 1.3718703985214233, |
|
"learning_rate": 2.636973684210526e-06, |
|
"loss": 0.0546, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.0892, |
|
"grad_norm": 2.296445369720459, |
|
"learning_rate": 2.625460526315789e-06, |
|
"loss": 0.0554, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.0917, |
|
"grad_norm": 2.3471312522888184, |
|
"learning_rate": 2.613947368421052e-06, |
|
"loss": 0.051, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.0942, |
|
"grad_norm": 1.6061975955963135, |
|
"learning_rate": 2.602434210526316e-06, |
|
"loss": 0.0548, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.0967, |
|
"grad_norm": 2.979126453399658, |
|
"learning_rate": 2.590921052631579e-06, |
|
"loss": 0.0492, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.0992, |
|
"grad_norm": 1.7963169813156128, |
|
"learning_rate": 2.579407894736842e-06, |
|
"loss": 0.0514, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.1017, |
|
"grad_norm": 2.4996039867401123, |
|
"learning_rate": 2.5678947368421053e-06, |
|
"loss": 0.0399, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.1042, |
|
"grad_norm": 1.7498191595077515, |
|
"learning_rate": 2.5563815789473683e-06, |
|
"loss": 0.0522, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.1067, |
|
"grad_norm": 1.413889765739441, |
|
"learning_rate": 2.5448684210526313e-06, |
|
"loss": 0.0517, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.1092, |
|
"grad_norm": 2.0956978797912598, |
|
"learning_rate": 2.5333552631578943e-06, |
|
"loss": 0.0482, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.1092, |
|
"eval_loss": 0.1336347758769989, |
|
"eval_runtime": 4119.9162, |
|
"eval_samples_per_second": 3.308, |
|
"eval_steps_per_second": 0.414, |
|
"eval_wer": 8.104914067939463, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.1117, |
|
"grad_norm": 3.138298749923706, |
|
"learning_rate": 2.5218421052631578e-06, |
|
"loss": 0.0568, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.1142, |
|
"grad_norm": 1.4262772798538208, |
|
"learning_rate": 2.510328947368421e-06, |
|
"loss": 0.0475, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.1167, |
|
"grad_norm": 3.3500139713287354, |
|
"learning_rate": 2.498815789473684e-06, |
|
"loss": 0.0474, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.1192, |
|
"grad_norm": 4.509912014007568, |
|
"learning_rate": 2.4873026315789472e-06, |
|
"loss": 0.0586, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.1217, |
|
"grad_norm": 2.1386468410491943, |
|
"learning_rate": 2.4757894736842102e-06, |
|
"loss": 0.062, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.1242, |
|
"grad_norm": 1.1121129989624023, |
|
"learning_rate": 2.4642763157894733e-06, |
|
"loss": 0.0563, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.1267, |
|
"grad_norm": 1.677538514137268, |
|
"learning_rate": 2.4527631578947363e-06, |
|
"loss": 0.0519, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.1292, |
|
"grad_norm": 1.579513430595398, |
|
"learning_rate": 2.44125e-06, |
|
"loss": 0.0544, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.1317, |
|
"grad_norm": 2.1100914478302, |
|
"learning_rate": 2.429736842105263e-06, |
|
"loss": 0.0578, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.1342, |
|
"grad_norm": 1.779682993888855, |
|
"learning_rate": 2.418223684210526e-06, |
|
"loss": 0.0486, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.1367, |
|
"grad_norm": 1.7443439960479736, |
|
"learning_rate": 2.4067105263157896e-06, |
|
"loss": 0.0534, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.1391999999999998, |
|
"grad_norm": 1.9388935565948486, |
|
"learning_rate": 2.3951973684210526e-06, |
|
"loss": 0.0516, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.1417, |
|
"grad_norm": 1.82517409324646, |
|
"learning_rate": 2.3836842105263156e-06, |
|
"loss": 0.0451, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.1442, |
|
"grad_norm": 1.9101967811584473, |
|
"learning_rate": 2.3721710526315786e-06, |
|
"loss": 0.0546, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.1467, |
|
"grad_norm": 1.7242915630340576, |
|
"learning_rate": 2.360657894736842e-06, |
|
"loss": 0.0495, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.1492, |
|
"grad_norm": 1.9127079248428345, |
|
"learning_rate": 2.349144736842105e-06, |
|
"loss": 0.0465, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.1517, |
|
"grad_norm": 2.7716519832611084, |
|
"learning_rate": 2.337631578947368e-06, |
|
"loss": 0.0493, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.1542, |
|
"grad_norm": 3.141706705093384, |
|
"learning_rate": 2.326118421052631e-06, |
|
"loss": 0.046, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.1567, |
|
"grad_norm": 2.2624270915985107, |
|
"learning_rate": 2.3146052631578945e-06, |
|
"loss": 0.0522, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.1592000000000002, |
|
"grad_norm": 1.2777652740478516, |
|
"learning_rate": 2.3030921052631575e-06, |
|
"loss": 0.0548, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.1592000000000002, |
|
"eval_loss": 0.13162237405776978, |
|
"eval_runtime": 4127.2085, |
|
"eval_samples_per_second": 3.302, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 7.9244384184103485, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.1617, |
|
"grad_norm": 2.106818675994873, |
|
"learning_rate": 2.2915789473684206e-06, |
|
"loss": 0.0527, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.1642, |
|
"grad_norm": 2.2705554962158203, |
|
"learning_rate": 2.2800657894736844e-06, |
|
"loss": 0.0483, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.1667, |
|
"grad_norm": 1.5468271970748901, |
|
"learning_rate": 2.2685526315789474e-06, |
|
"loss": 0.0516, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.1692, |
|
"grad_norm": 2.0331270694732666, |
|
"learning_rate": 2.2570394736842104e-06, |
|
"loss": 0.0551, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.0013, |
|
"grad_norm": 1.107423186302185, |
|
"learning_rate": 2.2455263157894734e-06, |
|
"loss": 0.0434, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 3.0038, |
|
"grad_norm": 3.9103100299835205, |
|
"learning_rate": 2.234013157894737e-06, |
|
"loss": 0.0362, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 3.0063, |
|
"grad_norm": 1.193088173866272, |
|
"learning_rate": 2.2225e-06, |
|
"loss": 0.0327, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 3.0088, |
|
"grad_norm": 1.0432852506637573, |
|
"learning_rate": 2.210986842105263e-06, |
|
"loss": 0.0326, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.0113, |
|
"grad_norm": 0.7116020917892456, |
|
"learning_rate": 2.199473684210526e-06, |
|
"loss": 0.0296, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 3.0138, |
|
"grad_norm": 2.009617805480957, |
|
"learning_rate": 2.1879605263157894e-06, |
|
"loss": 0.0367, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.0163, |
|
"grad_norm": 1.9047244787216187, |
|
"learning_rate": 2.1764473684210524e-06, |
|
"loss": 0.0347, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 3.0188, |
|
"grad_norm": 1.630439043045044, |
|
"learning_rate": 2.164934210526316e-06, |
|
"loss": 0.0291, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.0213, |
|
"grad_norm": 1.4158824682235718, |
|
"learning_rate": 2.153421052631579e-06, |
|
"loss": 0.0321, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 3.0238, |
|
"grad_norm": 1.2792794704437256, |
|
"learning_rate": 2.141907894736842e-06, |
|
"loss": 0.0338, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 3.0263, |
|
"grad_norm": 1.6505346298217773, |
|
"learning_rate": 2.1303947368421053e-06, |
|
"loss": 0.0348, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 3.0288, |
|
"grad_norm": 1.5343618392944336, |
|
"learning_rate": 2.1188815789473683e-06, |
|
"loss": 0.0318, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.0313, |
|
"grad_norm": 1.8325493335723877, |
|
"learning_rate": 2.1073684210526313e-06, |
|
"loss": 0.0333, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 3.0338, |
|
"grad_norm": 1.7224900722503662, |
|
"learning_rate": 2.0958552631578943e-06, |
|
"loss": 0.0322, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 3.0362999999999998, |
|
"grad_norm": 1.3443737030029297, |
|
"learning_rate": 2.0843421052631577e-06, |
|
"loss": 0.0304, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 3.0388, |
|
"grad_norm": 1.3260679244995117, |
|
"learning_rate": 2.0728289473684207e-06, |
|
"loss": 0.0282, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.0388, |
|
"eval_loss": 0.13909843564033508, |
|
"eval_runtime": 4135.2147, |
|
"eval_samples_per_second": 3.296, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 7.8181684927992965, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.0413, |
|
"grad_norm": 1.0075204372406006, |
|
"learning_rate": 2.061315789473684e-06, |
|
"loss": 0.0308, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 3.0438, |
|
"grad_norm": 1.0206842422485352, |
|
"learning_rate": 2.049802631578947e-06, |
|
"loss": 0.0306, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 3.0463, |
|
"grad_norm": 1.411301851272583, |
|
"learning_rate": 2.03828947368421e-06, |
|
"loss": 0.0243, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 3.0488, |
|
"grad_norm": 0.959862470626831, |
|
"learning_rate": 2.0267763157894732e-06, |
|
"loss": 0.0272, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.0513, |
|
"grad_norm": 2.2999842166900635, |
|
"learning_rate": 2.0152631578947367e-06, |
|
"loss": 0.0246, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 3.0538, |
|
"grad_norm": 2.890066146850586, |
|
"learning_rate": 2.00375e-06, |
|
"loss": 0.0299, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 3.0563, |
|
"grad_norm": 1.7101376056671143, |
|
"learning_rate": 1.992236842105263e-06, |
|
"loss": 0.0322, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 3.0588, |
|
"grad_norm": 1.531943917274475, |
|
"learning_rate": 1.980723684210526e-06, |
|
"loss": 0.0345, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.0613, |
|
"grad_norm": 1.6334413290023804, |
|
"learning_rate": 1.969210526315789e-06, |
|
"loss": 0.032, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 3.0638, |
|
"grad_norm": 2.112278461456299, |
|
"learning_rate": 1.9576973684210526e-06, |
|
"loss": 0.0304, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 3.0663, |
|
"grad_norm": 1.7582517862319946, |
|
"learning_rate": 1.9461842105263156e-06, |
|
"loss": 0.0254, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 3.0688, |
|
"grad_norm": 1.3391777276992798, |
|
"learning_rate": 1.934671052631579e-06, |
|
"loss": 0.0316, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.0713, |
|
"grad_norm": 0.8350562453269958, |
|
"learning_rate": 1.923157894736842e-06, |
|
"loss": 0.0329, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 3.0738, |
|
"grad_norm": 0.7084619402885437, |
|
"learning_rate": 1.911644736842105e-06, |
|
"loss": 0.0325, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 3.0763, |
|
"grad_norm": 1.2961277961730957, |
|
"learning_rate": 1.9001315789473683e-06, |
|
"loss": 0.0313, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 3.0788, |
|
"grad_norm": 1.032840371131897, |
|
"learning_rate": 1.8886184210526315e-06, |
|
"loss": 0.0224, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.0813, |
|
"grad_norm": 1.2073044776916504, |
|
"learning_rate": 1.8771052631578945e-06, |
|
"loss": 0.0215, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 3.0838, |
|
"grad_norm": 0.8210967779159546, |
|
"learning_rate": 1.8655921052631577e-06, |
|
"loss": 0.0258, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 3.0863, |
|
"grad_norm": 1.5273653268814087, |
|
"learning_rate": 1.854078947368421e-06, |
|
"loss": 0.0254, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 3.0888, |
|
"grad_norm": 3.194197177886963, |
|
"learning_rate": 1.8425657894736842e-06, |
|
"loss": 0.025, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0888, |
|
"eval_loss": 0.14247554540634155, |
|
"eval_runtime": 4123.5746, |
|
"eval_samples_per_second": 3.305, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 7.940928579281029, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0913, |
|
"grad_norm": 2.1373400688171387, |
|
"learning_rate": 1.8310526315789472e-06, |
|
"loss": 0.031, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 3.0938, |
|
"grad_norm": 1.0779415369033813, |
|
"learning_rate": 1.8195394736842104e-06, |
|
"loss": 0.024, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 3.0963, |
|
"grad_norm": 0.9637121558189392, |
|
"learning_rate": 1.8080263157894734e-06, |
|
"loss": 0.0282, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 3.0987999999999998, |
|
"grad_norm": 1.1645703315734863, |
|
"learning_rate": 1.7965131578947366e-06, |
|
"loss": 0.0278, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.1013, |
|
"grad_norm": 1.2814173698425293, |
|
"learning_rate": 1.7849999999999996e-06, |
|
"loss": 0.0199, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 3.1038, |
|
"grad_norm": 1.458809494972229, |
|
"learning_rate": 1.773486842105263e-06, |
|
"loss": 0.0264, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 3.1063, |
|
"grad_norm": 1.6669671535491943, |
|
"learning_rate": 1.7619736842105263e-06, |
|
"loss": 0.0272, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 3.1088, |
|
"grad_norm": 1.5049173831939697, |
|
"learning_rate": 1.7504605263157893e-06, |
|
"loss": 0.0243, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.1113, |
|
"grad_norm": 0.861107587814331, |
|
"learning_rate": 1.7389473684210525e-06, |
|
"loss": 0.0274, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 3.1138, |
|
"grad_norm": 1.0454998016357422, |
|
"learning_rate": 1.7274342105263155e-06, |
|
"loss": 0.0258, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 3.1163, |
|
"grad_norm": 1.7108014822006226, |
|
"learning_rate": 1.7159210526315788e-06, |
|
"loss": 0.0259, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 3.1188, |
|
"grad_norm": 0.8804712295532227, |
|
"learning_rate": 1.704407894736842e-06, |
|
"loss": 0.0255, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.1213, |
|
"grad_norm": 2.0050883293151855, |
|
"learning_rate": 1.6928947368421052e-06, |
|
"loss": 0.0304, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 3.1238, |
|
"grad_norm": 1.4400875568389893, |
|
"learning_rate": 1.6813815789473682e-06, |
|
"loss": 0.0333, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 3.1263, |
|
"grad_norm": 1.4423948526382446, |
|
"learning_rate": 1.6698684210526315e-06, |
|
"loss": 0.0279, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 3.1288, |
|
"grad_norm": 1.3972327709197998, |
|
"learning_rate": 1.6583552631578947e-06, |
|
"loss": 0.0255, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.1313, |
|
"grad_norm": 1.6908966302871704, |
|
"learning_rate": 1.6468421052631577e-06, |
|
"loss": 0.0267, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 3.1338, |
|
"grad_norm": 0.9540082216262817, |
|
"learning_rate": 1.635328947368421e-06, |
|
"loss": 0.0265, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 3.1363, |
|
"grad_norm": 1.41488778591156, |
|
"learning_rate": 1.6238157894736841e-06, |
|
"loss": 0.0224, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 3.1388, |
|
"grad_norm": 0.4790860116481781, |
|
"learning_rate": 1.6123026315789474e-06, |
|
"loss": 0.0274, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.1388, |
|
"eval_loss": 0.13914132118225098, |
|
"eval_runtime": 4133.8202, |
|
"eval_samples_per_second": 3.297, |
|
"eval_steps_per_second": 0.412, |
|
"eval_wer": 7.731137088204039, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.1413, |
|
"grad_norm": 2.5638585090637207, |
|
"learning_rate": 1.6007894736842104e-06, |
|
"loss": 0.025, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 3.1438, |
|
"grad_norm": 1.8847306966781616, |
|
"learning_rate": 1.5892763157894736e-06, |
|
"loss": 0.0294, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 3.1463, |
|
"grad_norm": 1.0196236371994019, |
|
"learning_rate": 1.5777631578947366e-06, |
|
"loss": 0.0255, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 3.1488, |
|
"grad_norm": 1.0703202486038208, |
|
"learning_rate": 1.5662499999999998e-06, |
|
"loss": 0.0246, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.1513, |
|
"grad_norm": 2.646519422531128, |
|
"learning_rate": 1.5547368421052628e-06, |
|
"loss": 0.0213, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 3.1538, |
|
"grad_norm": 1.7430530786514282, |
|
"learning_rate": 1.5432236842105263e-06, |
|
"loss": 0.0267, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.1563, |
|
"grad_norm": 1.0606240034103394, |
|
"learning_rate": 1.5317105263157895e-06, |
|
"loss": 0.0269, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 3.1588, |
|
"grad_norm": 1.4670476913452148, |
|
"learning_rate": 1.5201973684210525e-06, |
|
"loss": 0.0271, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.1612999999999998, |
|
"grad_norm": 2.345014810562134, |
|
"learning_rate": 1.5086842105263157e-06, |
|
"loss": 0.0252, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 3.1638, |
|
"grad_norm": 2.9098987579345703, |
|
"learning_rate": 1.4971710526315787e-06, |
|
"loss": 0.0272, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.1663, |
|
"grad_norm": 0.5682694911956787, |
|
"learning_rate": 1.485657894736842e-06, |
|
"loss": 0.0237, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 3.1688, |
|
"grad_norm": 1.4645904302597046, |
|
"learning_rate": 1.4746052631578947e-06, |
|
"loss": 0.0303, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.0009, |
|
"grad_norm": 1.3764489889144897, |
|
"learning_rate": 1.4630921052631578e-06, |
|
"loss": 0.0242, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 4.0034, |
|
"grad_norm": 0.8848748803138733, |
|
"learning_rate": 1.451578947368421e-06, |
|
"loss": 0.0163, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 4.0059, |
|
"grad_norm": 0.619125485420227, |
|
"learning_rate": 1.440065789473684e-06, |
|
"loss": 0.0188, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 4.0084, |
|
"grad_norm": 0.9328649044036865, |
|
"learning_rate": 1.4285526315789472e-06, |
|
"loss": 0.0173, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.0109, |
|
"grad_norm": 1.77474045753479, |
|
"learning_rate": 1.4170394736842104e-06, |
|
"loss": 0.0146, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 4.0134, |
|
"grad_norm": 1.3934537172317505, |
|
"learning_rate": 1.4055263157894737e-06, |
|
"loss": 0.0156, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 4.0159, |
|
"grad_norm": 1.2856354713439941, |
|
"learning_rate": 1.3940131578947367e-06, |
|
"loss": 0.0173, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 4.0184, |
|
"grad_norm": 2.1229758262634277, |
|
"learning_rate": 1.3824999999999999e-06, |
|
"loss": 0.0155, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.0184, |
|
"eval_loss": 0.14916160702705383, |
|
"eval_runtime": 4128.7355, |
|
"eval_samples_per_second": 3.301, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 7.697240646414307, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.0209, |
|
"grad_norm": 0.44512999057769775, |
|
"learning_rate": 1.3709868421052631e-06, |
|
"loss": 0.0153, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 4.0234, |
|
"grad_norm": 1.8791674375534058, |
|
"learning_rate": 1.3594736842105261e-06, |
|
"loss": 0.0165, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 4.0259, |
|
"grad_norm": 5.244405746459961, |
|
"learning_rate": 1.3479605263157894e-06, |
|
"loss": 0.0179, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 4.0284, |
|
"grad_norm": 1.1926153898239136, |
|
"learning_rate": 1.3364473684210526e-06, |
|
"loss": 0.0161, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.0309, |
|
"grad_norm": 1.1147819757461548, |
|
"learning_rate": 1.3249342105263158e-06, |
|
"loss": 0.015, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 4.0334, |
|
"grad_norm": 1.9370721578598022, |
|
"learning_rate": 1.3134210526315788e-06, |
|
"loss": 0.0142, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 4.0359, |
|
"grad_norm": 0.49344903230667114, |
|
"learning_rate": 1.301907894736842e-06, |
|
"loss": 0.0134, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 4.0384, |
|
"grad_norm": 1.8190902471542358, |
|
"learning_rate": 1.290394736842105e-06, |
|
"loss": 0.0168, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.0409, |
|
"grad_norm": 0.7560425400733948, |
|
"learning_rate": 1.2788815789473683e-06, |
|
"loss": 0.0143, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 4.0434, |
|
"grad_norm": 1.0451087951660156, |
|
"learning_rate": 1.2673684210526313e-06, |
|
"loss": 0.0149, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 4.0459, |
|
"grad_norm": 1.0334726572036743, |
|
"learning_rate": 1.2558552631578947e-06, |
|
"loss": 0.0136, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 4.0484, |
|
"grad_norm": 0.6531663537025452, |
|
"learning_rate": 1.244342105263158e-06, |
|
"loss": 0.0137, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.0509, |
|
"grad_norm": 0.8954887986183167, |
|
"learning_rate": 1.232828947368421e-06, |
|
"loss": 0.0118, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 4.0534, |
|
"grad_norm": 1.0640511512756348, |
|
"learning_rate": 1.2213157894736842e-06, |
|
"loss": 0.0126, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 4.0559, |
|
"grad_norm": 0.2824617922306061, |
|
"learning_rate": 1.2098026315789472e-06, |
|
"loss": 0.0139, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 4.0584, |
|
"grad_norm": 1.0095443725585938, |
|
"learning_rate": 1.1982894736842104e-06, |
|
"loss": 0.018, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.0609, |
|
"grad_norm": 1.1475225687026978, |
|
"learning_rate": 1.1867763157894734e-06, |
|
"loss": 0.0133, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 4.0634, |
|
"grad_norm": 1.5951991081237793, |
|
"learning_rate": 1.1752631578947369e-06, |
|
"loss": 0.013, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 4.0659, |
|
"grad_norm": 0.3482917249202728, |
|
"learning_rate": 1.1637499999999999e-06, |
|
"loss": 0.0154, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 4.0684, |
|
"grad_norm": 1.1572391986846924, |
|
"learning_rate": 1.152236842105263e-06, |
|
"loss": 0.0189, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.0684, |
|
"eval_loss": 0.15172211825847626, |
|
"eval_runtime": 4117.5679, |
|
"eval_samples_per_second": 3.31, |
|
"eval_steps_per_second": 0.414, |
|
"eval_wer": 7.656931364285977, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.0709, |
|
"grad_norm": 1.3942557573318481, |
|
"learning_rate": 1.140723684210526e-06, |
|
"loss": 0.0143, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 4.0734, |
|
"grad_norm": 0.8097572326660156, |
|
"learning_rate": 1.1292105263157893e-06, |
|
"loss": 0.0127, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 4.0759, |
|
"grad_norm": 0.740375816822052, |
|
"learning_rate": 1.1176973684210526e-06, |
|
"loss": 0.0124, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 4.0784, |
|
"grad_norm": 0.8702480792999268, |
|
"learning_rate": 1.1061842105263156e-06, |
|
"loss": 0.0137, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.0809, |
|
"grad_norm": 1.223105788230896, |
|
"learning_rate": 1.094671052631579e-06, |
|
"loss": 0.0137, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 4.0834, |
|
"grad_norm": 0.43614983558654785, |
|
"learning_rate": 1.083157894736842e-06, |
|
"loss": 0.0109, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 4.0859, |
|
"grad_norm": 1.0974986553192139, |
|
"learning_rate": 1.0716447368421052e-06, |
|
"loss": 0.0118, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 4.0884, |
|
"grad_norm": 0.7234652042388916, |
|
"learning_rate": 1.0601315789473682e-06, |
|
"loss": 0.0125, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 4.0909, |
|
"grad_norm": 0.7752431035041809, |
|
"learning_rate": 1.0486184210526315e-06, |
|
"loss": 0.0135, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 4.0934, |
|
"grad_norm": 0.8796952366828918, |
|
"learning_rate": 1.0371052631578947e-06, |
|
"loss": 0.0158, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 4.0959, |
|
"grad_norm": 3.9135661125183105, |
|
"learning_rate": 1.0255921052631577e-06, |
|
"loss": 0.0139, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 4.0984, |
|
"grad_norm": 0.4837290942668915, |
|
"learning_rate": 1.014078947368421e-06, |
|
"loss": 0.0103, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.1009, |
|
"grad_norm": 1.1155998706817627, |
|
"learning_rate": 1.0025657894736842e-06, |
|
"loss": 0.0106, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 4.1034, |
|
"grad_norm": 2.628676652908325, |
|
"learning_rate": 9.910526315789474e-07, |
|
"loss": 0.0089, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 4.1059, |
|
"grad_norm": 1.716665506362915, |
|
"learning_rate": 9.795394736842104e-07, |
|
"loss": 0.0132, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 4.1084, |
|
"grad_norm": 1.6751716136932373, |
|
"learning_rate": 9.680263157894736e-07, |
|
"loss": 0.0137, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.1109, |
|
"grad_norm": 0.9773244261741638, |
|
"learning_rate": 9.565131578947368e-07, |
|
"loss": 0.0111, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 4.1134, |
|
"grad_norm": 1.44219172000885, |
|
"learning_rate": 9.45e-07, |
|
"loss": 0.0139, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 4.1159, |
|
"grad_norm": 0.8723123073577881, |
|
"learning_rate": 9.334868421052631e-07, |
|
"loss": 0.0117, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 4.1184, |
|
"grad_norm": 0.6484673023223877, |
|
"learning_rate": 9.219736842105263e-07, |
|
"loss": 0.0139, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.1184, |
|
"eval_loss": 0.15393850207328796, |
|
"eval_runtime": 4128.9341, |
|
"eval_samples_per_second": 3.301, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 7.626699402689728, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.1209, |
|
"grad_norm": 1.3702197074890137, |
|
"learning_rate": 9.104605263157894e-07, |
|
"loss": 0.0158, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 4.1234, |
|
"grad_norm": 1.425645351409912, |
|
"learning_rate": 8.989473684210525e-07, |
|
"loss": 0.0117, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 4.1259, |
|
"grad_norm": 1.4255399703979492, |
|
"learning_rate": 8.874342105263158e-07, |
|
"loss": 0.015, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 4.1284, |
|
"grad_norm": 0.6988621950149536, |
|
"learning_rate": 8.759210526315789e-07, |
|
"loss": 0.0141, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.1309, |
|
"grad_norm": 1.1563546657562256, |
|
"learning_rate": 8.64407894736842e-07, |
|
"loss": 0.0122, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 4.1334, |
|
"grad_norm": 1.2023714780807495, |
|
"learning_rate": 8.528947368421051e-07, |
|
"loss": 0.013, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 4.1359, |
|
"grad_norm": 0.9450110197067261, |
|
"learning_rate": 8.413815789473683e-07, |
|
"loss": 0.0123, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 4.1384, |
|
"grad_norm": 0.9265995621681213, |
|
"learning_rate": 8.298684210526316e-07, |
|
"loss": 0.0114, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.1409, |
|
"grad_norm": 0.4234980046749115, |
|
"learning_rate": 8.183552631578947e-07, |
|
"loss": 0.0085, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 4.1434, |
|
"grad_norm": 1.3323073387145996, |
|
"learning_rate": 8.068421052631579e-07, |
|
"loss": 0.014, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 4.1459, |
|
"grad_norm": 1.2050007581710815, |
|
"learning_rate": 7.95328947368421e-07, |
|
"loss": 0.0106, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 4.1484, |
|
"grad_norm": 1.261042594909668, |
|
"learning_rate": 7.838157894736841e-07, |
|
"loss": 0.0107, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.1509, |
|
"grad_norm": 1.2892303466796875, |
|
"learning_rate": 7.723026315789474e-07, |
|
"loss": 0.0145, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 4.1534, |
|
"grad_norm": 1.1626112461090088, |
|
"learning_rate": 7.607894736842105e-07, |
|
"loss": 0.0139, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 4.1559, |
|
"grad_norm": 1.0547322034835815, |
|
"learning_rate": 7.492763157894736e-07, |
|
"loss": 0.0154, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 4.1584, |
|
"grad_norm": 0.44805532693862915, |
|
"learning_rate": 7.377631578947367e-07, |
|
"loss": 0.0109, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.1609, |
|
"grad_norm": 0.7095866203308105, |
|
"learning_rate": 7.262499999999999e-07, |
|
"loss": 0.0114, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 4.1634, |
|
"grad_norm": 1.4220194816589355, |
|
"learning_rate": 7.14736842105263e-07, |
|
"loss": 0.0134, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 4.1659, |
|
"grad_norm": 1.0814168453216553, |
|
"learning_rate": 7.032236842105263e-07, |
|
"loss": 0.0142, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 4.1684, |
|
"grad_norm": 0.7026916146278381, |
|
"learning_rate": 6.917105263157895e-07, |
|
"loss": 0.0141, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.1684, |
|
"eval_loss": 0.15496784448623657, |
|
"eval_runtime": 4124.1829, |
|
"eval_samples_per_second": 3.305, |
|
"eval_steps_per_second": 0.413, |
|
"eval_wer": 7.542416358239584, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.0005, |
|
"grad_norm": 4.648550033569336, |
|
"learning_rate": 6.801973684210526e-07, |
|
"loss": 0.0285, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 5.003, |
|
"grad_norm": 1.9204503297805786, |
|
"learning_rate": 6.691447368421053e-07, |
|
"loss": 0.0761, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 5.0055, |
|
"grad_norm": 1.7285746335983276, |
|
"learning_rate": 6.576315789473684e-07, |
|
"loss": 0.0602, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 5.008, |
|
"grad_norm": 1.1516830921173096, |
|
"learning_rate": 6.461184210526315e-07, |
|
"loss": 0.0585, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 5.0105, |
|
"grad_norm": 3.3867828845977783, |
|
"learning_rate": 6.346052631578947e-07, |
|
"loss": 0.0656, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 5.013, |
|
"grad_norm": 4.064920902252197, |
|
"learning_rate": 6.230921052631579e-07, |
|
"loss": 0.0683, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 5.0155, |
|
"grad_norm": 3.695047378540039, |
|
"learning_rate": 6.11578947368421e-07, |
|
"loss": 0.0659, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 5.018, |
|
"grad_norm": 2.9087939262390137, |
|
"learning_rate": 6.000657894736842e-07, |
|
"loss": 0.0611, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 5.0205, |
|
"grad_norm": 3.368290424346924, |
|
"learning_rate": 5.885526315789473e-07, |
|
"loss": 0.0603, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 5.023, |
|
"grad_norm": 3.7565319538116455, |
|
"learning_rate": 5.770394736842104e-07, |
|
"loss": 0.0614, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 5.0255, |
|
"grad_norm": 2.4887771606445312, |
|
"learning_rate": 5.655263157894735e-07, |
|
"loss": 0.0497, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 5.028, |
|
"grad_norm": 2.1670076847076416, |
|
"learning_rate": 5.540131578947369e-07, |
|
"loss": 0.0662, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 5.0305, |
|
"grad_norm": 1.3746148347854614, |
|
"learning_rate": 5.425e-07, |
|
"loss": 0.0507, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 5.033, |
|
"grad_norm": 1.8274154663085938, |
|
"learning_rate": 5.309868421052631e-07, |
|
"loss": 0.0449, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 5.0355, |
|
"grad_norm": 2.9424078464508057, |
|
"learning_rate": 5.194736842105262e-07, |
|
"loss": 0.0529, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 5.038, |
|
"grad_norm": 2.457754611968994, |
|
"learning_rate": 5.079605263157895e-07, |
|
"loss": 0.042, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 5.0405, |
|
"grad_norm": 2.208768606185913, |
|
"learning_rate": 4.964473684210526e-07, |
|
"loss": 0.0407, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 5.043, |
|
"grad_norm": 1.9554438591003418, |
|
"learning_rate": 4.849342105263158e-07, |
|
"loss": 0.0465, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 5.0455, |
|
"grad_norm": 1.1464567184448242, |
|
"learning_rate": 4.734210526315789e-07, |
|
"loss": 0.0537, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 5.048, |
|
"grad_norm": 3.1216509342193604, |
|
"learning_rate": 4.6190789473684203e-07, |
|
"loss": 0.0368, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.048, |
|
"eval_loss": 0.12588092684745789, |
|
"eval_runtime": 4149.257, |
|
"eval_samples_per_second": 3.285, |
|
"eval_steps_per_second": 0.411, |
|
"eval_wer": 7.215361500971087, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.048, |
|
"step": 9000, |
|
"total_flos": 4.891718061785088e+20, |
|
"train_loss": 0.0, |
|
"train_runtime": 289.8068, |
|
"train_samples_per_second": 552.092, |
|
"train_steps_per_second": 34.506 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.891718061785088e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|