whisper-large-eu / trainer_state.json
xezpeleta's picture
End of training
cfe270a verified
raw
history blame
63.5 kB
{
"best_metric": 7.215361500971087,
"best_model_checkpoint": "./checkpoint-9000",
"epoch": 5.048,
"eval_steps": 500,
"global_step": 9000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0025,
"grad_norm": 6.131621360778809,
"learning_rate": 2.1875e-07,
"loss": 0.9345,
"step": 25
},
{
"epoch": 0.005,
"grad_norm": 6.021520137786865,
"learning_rate": 4.375e-07,
"loss": 0.8231,
"step": 50
},
{
"epoch": 0.0075,
"grad_norm": 5.526496410369873,
"learning_rate": 6.5625e-07,
"loss": 0.5623,
"step": 75
},
{
"epoch": 0.01,
"grad_norm": 4.9277825355529785,
"learning_rate": 8.75e-07,
"loss": 0.4173,
"step": 100
},
{
"epoch": 0.0125,
"grad_norm": 4.292990684509277,
"learning_rate": 1.09375e-06,
"loss": 0.385,
"step": 125
},
{
"epoch": 0.015,
"grad_norm": 5.749295234680176,
"learning_rate": 1.3125e-06,
"loss": 0.3931,
"step": 150
},
{
"epoch": 0.0175,
"grad_norm": 3.8306965827941895,
"learning_rate": 1.5312499999999997e-06,
"loss": 0.3516,
"step": 175
},
{
"epoch": 0.02,
"grad_norm": 4.687748908996582,
"learning_rate": 1.75e-06,
"loss": 0.3235,
"step": 200
},
{
"epoch": 0.0225,
"grad_norm": 4.232759952545166,
"learning_rate": 1.96875e-06,
"loss": 0.3314,
"step": 225
},
{
"epoch": 0.025,
"grad_norm": 4.185751914978027,
"learning_rate": 2.1875e-06,
"loss": 0.309,
"step": 250
},
{
"epoch": 0.0275,
"grad_norm": 4.818612098693848,
"learning_rate": 2.40625e-06,
"loss": 0.2991,
"step": 275
},
{
"epoch": 0.03,
"grad_norm": 4.171736717224121,
"learning_rate": 2.625e-06,
"loss": 0.2832,
"step": 300
},
{
"epoch": 0.0325,
"grad_norm": 5.217376708984375,
"learning_rate": 2.8437499999999997e-06,
"loss": 0.2873,
"step": 325
},
{
"epoch": 0.035,
"grad_norm": 4.671106815338135,
"learning_rate": 3.0624999999999995e-06,
"loss": 0.2957,
"step": 350
},
{
"epoch": 0.0375,
"grad_norm": 3.9175262451171875,
"learning_rate": 3.2812499999999997e-06,
"loss": 0.2634,
"step": 375
},
{
"epoch": 0.04,
"grad_norm": 4.647582054138184,
"learning_rate": 3.5e-06,
"loss": 0.2541,
"step": 400
},
{
"epoch": 0.0425,
"grad_norm": 3.25675368309021,
"learning_rate": 3.7187499999999998e-06,
"loss": 0.2244,
"step": 425
},
{
"epoch": 0.045,
"grad_norm": 4.597206115722656,
"learning_rate": 3.9375e-06,
"loss": 0.2492,
"step": 450
},
{
"epoch": 0.0475,
"grad_norm": 4.602332592010498,
"learning_rate": 4.156249999999999e-06,
"loss": 0.246,
"step": 475
},
{
"epoch": 0.05,
"grad_norm": 3.6419622898101807,
"learning_rate": 4.375e-06,
"loss": 0.2208,
"step": 500
},
{
"epoch": 0.05,
"eval_loss": 0.2592349350452423,
"eval_runtime": 4116.5906,
"eval_samples_per_second": 3.311,
"eval_steps_per_second": 0.414,
"eval_wer": 20.691487412510533,
"step": 500
},
{
"epoch": 0.0525,
"grad_norm": 3.6599488258361816,
"learning_rate": 4.363486842105263e-06,
"loss": 0.2539,
"step": 525
},
{
"epoch": 0.055,
"grad_norm": 3.6934616565704346,
"learning_rate": 4.351973684210526e-06,
"loss": 0.2313,
"step": 550
},
{
"epoch": 0.0575,
"grad_norm": 3.7546138763427734,
"learning_rate": 4.340460526315789e-06,
"loss": 0.2272,
"step": 575
},
{
"epoch": 0.06,
"grad_norm": 3.096877098083496,
"learning_rate": 4.3289473684210525e-06,
"loss": 0.2373,
"step": 600
},
{
"epoch": 0.0625,
"grad_norm": 3.572812795639038,
"learning_rate": 4.3174342105263155e-06,
"loss": 0.2285,
"step": 625
},
{
"epoch": 0.065,
"grad_norm": 3.3494396209716797,
"learning_rate": 4.3059210526315785e-06,
"loss": 0.2293,
"step": 650
},
{
"epoch": 0.0675,
"grad_norm": 3.5156869888305664,
"learning_rate": 4.2944078947368415e-06,
"loss": 0.2063,
"step": 675
},
{
"epoch": 0.07,
"grad_norm": 3.698807716369629,
"learning_rate": 4.282894736842105e-06,
"loss": 0.2113,
"step": 700
},
{
"epoch": 0.0725,
"grad_norm": 3.716585636138916,
"learning_rate": 4.271381578947368e-06,
"loss": 0.2055,
"step": 725
},
{
"epoch": 0.075,
"grad_norm": 4.204227924346924,
"learning_rate": 4.2598684210526314e-06,
"loss": 0.2114,
"step": 750
},
{
"epoch": 0.0775,
"grad_norm": 3.479562282562256,
"learning_rate": 4.2483552631578944e-06,
"loss": 0.2224,
"step": 775
},
{
"epoch": 0.08,
"grad_norm": 4.5203094482421875,
"learning_rate": 4.2368421052631575e-06,
"loss": 0.2523,
"step": 800
},
{
"epoch": 0.0825,
"grad_norm": 3.6081738471984863,
"learning_rate": 4.2253289473684205e-06,
"loss": 0.2383,
"step": 825
},
{
"epoch": 0.085,
"grad_norm": 3.2602758407592773,
"learning_rate": 4.2138157894736835e-06,
"loss": 0.1808,
"step": 850
},
{
"epoch": 0.0875,
"grad_norm": 3.6786868572235107,
"learning_rate": 4.202302631578947e-06,
"loss": 0.1747,
"step": 875
},
{
"epoch": 0.09,
"grad_norm": 3.1120803356170654,
"learning_rate": 4.19078947368421e-06,
"loss": 0.1662,
"step": 900
},
{
"epoch": 0.0925,
"grad_norm": 3.1962203979492188,
"learning_rate": 4.179276315789473e-06,
"loss": 0.1771,
"step": 925
},
{
"epoch": 0.095,
"grad_norm": 3.172363758087158,
"learning_rate": 4.167763157894736e-06,
"loss": 0.1751,
"step": 950
},
{
"epoch": 0.0975,
"grad_norm": 2.4304590225219727,
"learning_rate": 4.156249999999999e-06,
"loss": 0.1701,
"step": 975
},
{
"epoch": 0.1,
"grad_norm": 3.193345308303833,
"learning_rate": 4.144736842105262e-06,
"loss": 0.1489,
"step": 1000
},
{
"epoch": 0.1,
"eval_loss": 0.1971057653427124,
"eval_runtime": 4130.6867,
"eval_samples_per_second": 3.3,
"eval_steps_per_second": 0.413,
"eval_wer": 14.68265601524424,
"step": 1000
},
{
"epoch": 0.1025,
"grad_norm": 3.322065591812134,
"learning_rate": 4.133223684210526e-06,
"loss": 0.1701,
"step": 1025
},
{
"epoch": 0.105,
"grad_norm": 3.5462722778320312,
"learning_rate": 4.121710526315789e-06,
"loss": 0.1875,
"step": 1050
},
{
"epoch": 0.1075,
"grad_norm": 3.39326810836792,
"learning_rate": 4.110197368421052e-06,
"loss": 0.1506,
"step": 1075
},
{
"epoch": 0.11,
"grad_norm": 2.9165821075439453,
"learning_rate": 4.098684210526315e-06,
"loss": 0.1525,
"step": 1100
},
{
"epoch": 0.1125,
"grad_norm": 3.262007236480713,
"learning_rate": 4.087171052631578e-06,
"loss": 0.157,
"step": 1125
},
{
"epoch": 0.115,
"grad_norm": 2.4523119926452637,
"learning_rate": 4.075657894736842e-06,
"loss": 0.1416,
"step": 1150
},
{
"epoch": 0.1175,
"grad_norm": 2.7651798725128174,
"learning_rate": 4.064144736842105e-06,
"loss": 0.1527,
"step": 1175
},
{
"epoch": 0.12,
"grad_norm": 3.609523296356201,
"learning_rate": 4.052631578947368e-06,
"loss": 0.1822,
"step": 1200
},
{
"epoch": 0.1225,
"grad_norm": 3.8101985454559326,
"learning_rate": 4.041118421052631e-06,
"loss": 0.1703,
"step": 1225
},
{
"epoch": 0.125,
"grad_norm": 3.8921287059783936,
"learning_rate": 4.029605263157894e-06,
"loss": 0.1924,
"step": 1250
},
{
"epoch": 0.1275,
"grad_norm": 4.463279724121094,
"learning_rate": 4.018092105263157e-06,
"loss": 0.1818,
"step": 1275
},
{
"epoch": 0.13,
"grad_norm": 3.6556308269500732,
"learning_rate": 4.00657894736842e-06,
"loss": 0.1726,
"step": 1300
},
{
"epoch": 0.1325,
"grad_norm": 2.98067569732666,
"learning_rate": 3.995065789473683e-06,
"loss": 0.174,
"step": 1325
},
{
"epoch": 0.135,
"grad_norm": 2.8287429809570312,
"learning_rate": 3.983552631578947e-06,
"loss": 0.1631,
"step": 1350
},
{
"epoch": 0.1375,
"grad_norm": 2.6438794136047363,
"learning_rate": 3.97203947368421e-06,
"loss": 0.1475,
"step": 1375
},
{
"epoch": 0.14,
"grad_norm": 3.513123035430908,
"learning_rate": 3.960526315789473e-06,
"loss": 0.1457,
"step": 1400
},
{
"epoch": 0.1425,
"grad_norm": 2.4688916206359863,
"learning_rate": 3.949013157894737e-06,
"loss": 0.1375,
"step": 1425
},
{
"epoch": 0.145,
"grad_norm": 4.005943775177002,
"learning_rate": 3.9375e-06,
"loss": 0.1623,
"step": 1450
},
{
"epoch": 0.1475,
"grad_norm": 2.91786789894104,
"learning_rate": 3.925986842105263e-06,
"loss": 0.1701,
"step": 1475
},
{
"epoch": 0.15,
"grad_norm": 3.5332415103912354,
"learning_rate": 3.914473684210526e-06,
"loss": 0.1973,
"step": 1500
},
{
"epoch": 0.15,
"eval_loss": 0.17469166219234467,
"eval_runtime": 4132.0041,
"eval_samples_per_second": 3.299,
"eval_steps_per_second": 0.412,
"eval_wer": 12.377697973542453,
"step": 1500
},
{
"epoch": 0.1525,
"grad_norm": 4.05070686340332,
"learning_rate": 3.902960526315789e-06,
"loss": 0.1796,
"step": 1525
},
{
"epoch": 0.155,
"grad_norm": 2.989821195602417,
"learning_rate": 3.891447368421052e-06,
"loss": 0.1561,
"step": 1550
},
{
"epoch": 0.1575,
"grad_norm": 2.9603219032287598,
"learning_rate": 3.879934210526315e-06,
"loss": 0.1609,
"step": 1575
},
{
"epoch": 0.16,
"grad_norm": 3.2663583755493164,
"learning_rate": 3.868421052631579e-06,
"loss": 0.1833,
"step": 1600
},
{
"epoch": 0.1625,
"grad_norm": 3.459775686264038,
"learning_rate": 3.856907894736842e-06,
"loss": 0.1727,
"step": 1625
},
{
"epoch": 0.165,
"grad_norm": 3.427720069885254,
"learning_rate": 3.845394736842105e-06,
"loss": 0.181,
"step": 1650
},
{
"epoch": 0.1675,
"grad_norm": 4.471118450164795,
"learning_rate": 3.833881578947368e-06,
"loss": 0.1536,
"step": 1675
},
{
"epoch": 0.17,
"grad_norm": 3.1428306102752686,
"learning_rate": 3.822368421052632e-06,
"loss": 0.1372,
"step": 1700
},
{
"epoch": 1.0021,
"grad_norm": 2.8270132541656494,
"learning_rate": 3.8108552631578944e-06,
"loss": 0.1454,
"step": 1725
},
{
"epoch": 1.0046,
"grad_norm": 3.0873589515686035,
"learning_rate": 3.799342105263158e-06,
"loss": 0.1303,
"step": 1750
},
{
"epoch": 1.0071,
"grad_norm": 3.187711000442505,
"learning_rate": 3.787828947368421e-06,
"loss": 0.1383,
"step": 1775
},
{
"epoch": 1.0096,
"grad_norm": 3.1710643768310547,
"learning_rate": 3.776315789473684e-06,
"loss": 0.1626,
"step": 1800
},
{
"epoch": 1.0121,
"grad_norm": 3.4516818523406982,
"learning_rate": 3.7648026315789473e-06,
"loss": 0.1405,
"step": 1825
},
{
"epoch": 1.0146,
"grad_norm": 2.930408000946045,
"learning_rate": 3.7532894736842103e-06,
"loss": 0.143,
"step": 1850
},
{
"epoch": 1.0171,
"grad_norm": 3.066941261291504,
"learning_rate": 3.7417763157894733e-06,
"loss": 0.1437,
"step": 1875
},
{
"epoch": 1.0196,
"grad_norm": 3.389916181564331,
"learning_rate": 3.7302631578947363e-06,
"loss": 0.1289,
"step": 1900
},
{
"epoch": 1.0221,
"grad_norm": 3.048574209213257,
"learning_rate": 3.7187499999999998e-06,
"loss": 0.1415,
"step": 1925
},
{
"epoch": 1.0246,
"grad_norm": 2.5267295837402344,
"learning_rate": 3.7072368421052628e-06,
"loss": 0.1386,
"step": 1950
},
{
"epoch": 1.0271,
"grad_norm": 3.151757001876831,
"learning_rate": 3.6957236842105258e-06,
"loss": 0.1436,
"step": 1975
},
{
"epoch": 1.0296,
"grad_norm": 3.629039764404297,
"learning_rate": 3.684210526315789e-06,
"loss": 0.1353,
"step": 2000
},
{
"epoch": 1.0296,
"eval_loss": 0.1527385264635086,
"eval_runtime": 4116.9756,
"eval_samples_per_second": 3.311,
"eval_steps_per_second": 0.414,
"eval_wer": 10.719520685990693,
"step": 2000
},
{
"epoch": 1.0321,
"grad_norm": 1.8788173198699951,
"learning_rate": 3.6726973684210522e-06,
"loss": 0.1322,
"step": 2025
},
{
"epoch": 1.0346,
"grad_norm": 2.587233066558838,
"learning_rate": 3.6611842105263157e-06,
"loss": 0.1176,
"step": 2050
},
{
"epoch": 1.0371,
"grad_norm": 4.001532077789307,
"learning_rate": 3.6496710526315787e-06,
"loss": 0.1233,
"step": 2075
},
{
"epoch": 1.0396,
"grad_norm": 3.3947739601135254,
"learning_rate": 3.638157894736842e-06,
"loss": 0.1188,
"step": 2100
},
{
"epoch": 1.0421,
"grad_norm": 3.4743120670318604,
"learning_rate": 3.626644736842105e-06,
"loss": 0.1318,
"step": 2125
},
{
"epoch": 1.0446,
"grad_norm": 2.9288718700408936,
"learning_rate": 3.615131578947368e-06,
"loss": 0.1224,
"step": 2150
},
{
"epoch": 1.0471,
"grad_norm": 2.6081368923187256,
"learning_rate": 3.603618421052631e-06,
"loss": 0.1232,
"step": 2175
},
{
"epoch": 1.0496,
"grad_norm": 2.4068429470062256,
"learning_rate": 3.5921052631578946e-06,
"loss": 0.1073,
"step": 2200
},
{
"epoch": 1.0521,
"grad_norm": 3.049074411392212,
"learning_rate": 3.5805921052631576e-06,
"loss": 0.1071,
"step": 2225
},
{
"epoch": 1.0546,
"grad_norm": 2.0809032917022705,
"learning_rate": 3.5690789473684206e-06,
"loss": 0.1217,
"step": 2250
},
{
"epoch": 1.0571,
"grad_norm": 3.0854332447052,
"learning_rate": 3.5575657894736836e-06,
"loss": 0.1332,
"step": 2275
},
{
"epoch": 1.0596,
"grad_norm": 3.580145835876465,
"learning_rate": 3.546052631578947e-06,
"loss": 0.131,
"step": 2300
},
{
"epoch": 1.0621,
"grad_norm": 3.8924479484558105,
"learning_rate": 3.53453947368421e-06,
"loss": 0.136,
"step": 2325
},
{
"epoch": 1.0646,
"grad_norm": 2.8398871421813965,
"learning_rate": 3.523026315789473e-06,
"loss": 0.1081,
"step": 2350
},
{
"epoch": 1.0671,
"grad_norm": 3.007026195526123,
"learning_rate": 3.511513157894737e-06,
"loss": 0.1115,
"step": 2375
},
{
"epoch": 1.0695999999999999,
"grad_norm": 1.5712552070617676,
"learning_rate": 3.5e-06,
"loss": 0.1183,
"step": 2400
},
{
"epoch": 1.0721,
"grad_norm": 3.844963312149048,
"learning_rate": 3.488486842105263e-06,
"loss": 0.113,
"step": 2425
},
{
"epoch": 1.0746,
"grad_norm": 2.8939759731292725,
"learning_rate": 3.476973684210526e-06,
"loss": 0.1115,
"step": 2450
},
{
"epoch": 1.0771,
"grad_norm": 1.8150537014007568,
"learning_rate": 3.4654605263157894e-06,
"loss": 0.1117,
"step": 2475
},
{
"epoch": 1.0796000000000001,
"grad_norm": 2.839418649673462,
"learning_rate": 3.4539473684210524e-06,
"loss": 0.1065,
"step": 2500
},
{
"epoch": 1.0796000000000001,
"eval_loss": 0.1456422209739685,
"eval_runtime": 4133.4016,
"eval_samples_per_second": 3.298,
"eval_steps_per_second": 0.412,
"eval_wer": 9.869361281102277,
"step": 2500
},
{
"epoch": 1.0821,
"grad_norm": 3.4274985790252686,
"learning_rate": 3.4424342105263154e-06,
"loss": 0.1067,
"step": 2525
},
{
"epoch": 1.0846,
"grad_norm": 2.2946057319641113,
"learning_rate": 3.4309210526315784e-06,
"loss": 0.1038,
"step": 2550
},
{
"epoch": 1.0871,
"grad_norm": 2.5364551544189453,
"learning_rate": 3.419407894736842e-06,
"loss": 0.1073,
"step": 2575
},
{
"epoch": 1.0896,
"grad_norm": 2.9779515266418457,
"learning_rate": 3.4083552631578944e-06,
"loss": 0.1067,
"step": 2600
},
{
"epoch": 1.0921,
"grad_norm": 2.502685308456421,
"learning_rate": 3.3968421052631574e-06,
"loss": 0.1229,
"step": 2625
},
{
"epoch": 1.0946,
"grad_norm": 2.181756019592285,
"learning_rate": 3.3853289473684205e-06,
"loss": 0.1071,
"step": 2650
},
{
"epoch": 1.0971,
"grad_norm": 2.428738594055176,
"learning_rate": 3.3738157894736843e-06,
"loss": 0.101,
"step": 2675
},
{
"epoch": 1.0996,
"grad_norm": 3.797952651977539,
"learning_rate": 3.3623026315789473e-06,
"loss": 0.1198,
"step": 2700
},
{
"epoch": 1.1021,
"grad_norm": 2.9902758598327637,
"learning_rate": 3.3507894736842103e-06,
"loss": 0.1013,
"step": 2725
},
{
"epoch": 1.1046,
"grad_norm": 3.0514307022094727,
"learning_rate": 3.3392763157894734e-06,
"loss": 0.1075,
"step": 2750
},
{
"epoch": 1.1071,
"grad_norm": 3.2877554893493652,
"learning_rate": 3.327763157894737e-06,
"loss": 0.1059,
"step": 2775
},
{
"epoch": 1.1096,
"grad_norm": 2.3952691555023193,
"learning_rate": 3.31625e-06,
"loss": 0.0926,
"step": 2800
},
{
"epoch": 1.1121,
"grad_norm": 2.2840464115142822,
"learning_rate": 3.304736842105263e-06,
"loss": 0.1048,
"step": 2825
},
{
"epoch": 1.1146,
"grad_norm": 2.7062416076660156,
"learning_rate": 3.293223684210526e-06,
"loss": 0.1049,
"step": 2850
},
{
"epoch": 1.1171,
"grad_norm": 2.971315860748291,
"learning_rate": 3.2817105263157893e-06,
"loss": 0.1073,
"step": 2875
},
{
"epoch": 1.1196,
"grad_norm": 2.8689844608306885,
"learning_rate": 3.2701973684210523e-06,
"loss": 0.1141,
"step": 2900
},
{
"epoch": 1.1221,
"grad_norm": 3.6150734424591064,
"learning_rate": 3.2586842105263153e-06,
"loss": 0.1066,
"step": 2925
},
{
"epoch": 1.1246,
"grad_norm": 2.3004024028778076,
"learning_rate": 3.2471710526315783e-06,
"loss": 0.1248,
"step": 2950
},
{
"epoch": 1.1271,
"grad_norm": 2.5995240211486816,
"learning_rate": 3.2356578947368417e-06,
"loss": 0.0972,
"step": 2975
},
{
"epoch": 1.1296,
"grad_norm": 2.957960367202759,
"learning_rate": 3.224144736842105e-06,
"loss": 0.106,
"step": 3000
},
{
"epoch": 1.1296,
"eval_loss": 0.13624447584152222,
"eval_runtime": 4123.4662,
"eval_samples_per_second": 3.305,
"eval_steps_per_second": 0.413,
"eval_wer": 9.09249148008355,
"step": 3000
},
{
"epoch": 1.1320999999999999,
"grad_norm": 2.653007984161377,
"learning_rate": 3.212631578947368e-06,
"loss": 0.1083,
"step": 3025
},
{
"epoch": 1.1346,
"grad_norm": 2.6895744800567627,
"learning_rate": 3.2011184210526316e-06,
"loss": 0.1119,
"step": 3050
},
{
"epoch": 1.1371,
"grad_norm": 2.1507463455200195,
"learning_rate": 3.1896052631578946e-06,
"loss": 0.0944,
"step": 3075
},
{
"epoch": 1.1396,
"grad_norm": 3.61063289642334,
"learning_rate": 3.1780921052631576e-06,
"loss": 0.095,
"step": 3100
},
{
"epoch": 1.1421000000000001,
"grad_norm": 2.570584774017334,
"learning_rate": 3.1665789473684206e-06,
"loss": 0.1076,
"step": 3125
},
{
"epoch": 1.1446,
"grad_norm": 3.05507230758667,
"learning_rate": 3.155065789473684e-06,
"loss": 0.1175,
"step": 3150
},
{
"epoch": 1.1471,
"grad_norm": 2.82817006111145,
"learning_rate": 3.143552631578947e-06,
"loss": 0.0965,
"step": 3175
},
{
"epoch": 1.1496,
"grad_norm": 2.336517572402954,
"learning_rate": 3.13203947368421e-06,
"loss": 0.0955,
"step": 3200
},
{
"epoch": 1.1521,
"grad_norm": 3.8640036582946777,
"learning_rate": 3.120526315789473e-06,
"loss": 0.1044,
"step": 3225
},
{
"epoch": 1.1546,
"grad_norm": 3.7205588817596436,
"learning_rate": 3.1090131578947366e-06,
"loss": 0.1013,
"step": 3250
},
{
"epoch": 1.1571,
"grad_norm": 2.1962900161743164,
"learning_rate": 3.0974999999999996e-06,
"loss": 0.0978,
"step": 3275
},
{
"epoch": 1.1596,
"grad_norm": 3.3310599327087402,
"learning_rate": 3.0859868421052626e-06,
"loss": 0.1089,
"step": 3300
},
{
"epoch": 1.1621,
"grad_norm": 2.699566602706909,
"learning_rate": 3.074473684210526e-06,
"loss": 0.1078,
"step": 3325
},
{
"epoch": 1.1646,
"grad_norm": 3.79370379447937,
"learning_rate": 3.0629605263157894e-06,
"loss": 0.1118,
"step": 3350
},
{
"epoch": 1.1671,
"grad_norm": 1.9741384983062744,
"learning_rate": 3.0514473684210525e-06,
"loss": 0.1119,
"step": 3375
},
{
"epoch": 1.1696,
"grad_norm": 2.29034686088562,
"learning_rate": 3.0399342105263155e-06,
"loss": 0.1015,
"step": 3400
},
{
"epoch": 2.0017,
"grad_norm": 2.011443853378296,
"learning_rate": 3.028421052631579e-06,
"loss": 0.0708,
"step": 3425
},
{
"epoch": 2.0042,
"grad_norm": 1.2196134328842163,
"learning_rate": 3.016907894736842e-06,
"loss": 0.0668,
"step": 3450
},
{
"epoch": 2.0067,
"grad_norm": 2.863933563232422,
"learning_rate": 3.005394736842105e-06,
"loss": 0.0673,
"step": 3475
},
{
"epoch": 2.0092,
"grad_norm": 1.9341013431549072,
"learning_rate": 2.9938815789473684e-06,
"loss": 0.0718,
"step": 3500
},
{
"epoch": 2.0092,
"eval_loss": 0.13255682587623596,
"eval_runtime": 4133.4892,
"eval_samples_per_second": 3.297,
"eval_steps_per_second": 0.412,
"eval_wer": 8.542819451060867,
"step": 3500
},
{
"epoch": 2.0117,
"grad_norm": 2.795734405517578,
"learning_rate": 2.9823684210526314e-06,
"loss": 0.071,
"step": 3525
},
{
"epoch": 2.0142,
"grad_norm": 1.982479214668274,
"learning_rate": 2.9708552631578944e-06,
"loss": 0.0629,
"step": 3550
},
{
"epoch": 2.0167,
"grad_norm": 3.168161630630493,
"learning_rate": 2.9593421052631574e-06,
"loss": 0.0593,
"step": 3575
},
{
"epoch": 2.0192,
"grad_norm": 2.259500741958618,
"learning_rate": 2.947828947368421e-06,
"loss": 0.0696,
"step": 3600
},
{
"epoch": 2.0217,
"grad_norm": 2.1626062393188477,
"learning_rate": 2.936315789473684e-06,
"loss": 0.0687,
"step": 3625
},
{
"epoch": 2.0242,
"grad_norm": 2.4419946670532227,
"learning_rate": 2.924802631578947e-06,
"loss": 0.0686,
"step": 3650
},
{
"epoch": 2.0267,
"grad_norm": 2.445758819580078,
"learning_rate": 2.9132894736842103e-06,
"loss": 0.0631,
"step": 3675
},
{
"epoch": 2.0292,
"grad_norm": 2.614476442337036,
"learning_rate": 2.9017763157894737e-06,
"loss": 0.0647,
"step": 3700
},
{
"epoch": 2.0317,
"grad_norm": 1.4166672229766846,
"learning_rate": 2.8902631578947367e-06,
"loss": 0.0653,
"step": 3725
},
{
"epoch": 2.0342,
"grad_norm": 1.8435245752334595,
"learning_rate": 2.8787499999999998e-06,
"loss": 0.0567,
"step": 3750
},
{
"epoch": 2.0367,
"grad_norm": 1.8179950714111328,
"learning_rate": 2.867236842105263e-06,
"loss": 0.0636,
"step": 3775
},
{
"epoch": 2.0392,
"grad_norm": 1.487122893333435,
"learning_rate": 2.855723684210526e-06,
"loss": 0.0598,
"step": 3800
},
{
"epoch": 2.0417,
"grad_norm": 2.9211690425872803,
"learning_rate": 2.8442105263157892e-06,
"loss": 0.0599,
"step": 3825
},
{
"epoch": 2.0442,
"grad_norm": 2.5018093585968018,
"learning_rate": 2.8326973684210522e-06,
"loss": 0.055,
"step": 3850
},
{
"epoch": 2.0467,
"grad_norm": 2.186502456665039,
"learning_rate": 2.8211842105263157e-06,
"loss": 0.0533,
"step": 3875
},
{
"epoch": 2.0492,
"grad_norm": 1.039233922958374,
"learning_rate": 2.8096710526315787e-06,
"loss": 0.0514,
"step": 3900
},
{
"epoch": 2.0517,
"grad_norm": 1.871267557144165,
"learning_rate": 2.7981578947368417e-06,
"loss": 0.0512,
"step": 3925
},
{
"epoch": 2.0542,
"grad_norm": 2.0849483013153076,
"learning_rate": 2.7866447368421047e-06,
"loss": 0.0579,
"step": 3950
},
{
"epoch": 2.0567,
"grad_norm": 1.6887531280517578,
"learning_rate": 2.775131578947368e-06,
"loss": 0.0575,
"step": 3975
},
{
"epoch": 2.0592,
"grad_norm": 1.88097083568573,
"learning_rate": 2.763618421052631e-06,
"loss": 0.0683,
"step": 4000
},
{
"epoch": 2.0592,
"eval_loss": 0.1342601627111435,
"eval_runtime": 4125.8373,
"eval_samples_per_second": 3.304,
"eval_steps_per_second": 0.413,
"eval_wer": 8.485103888013485,
"step": 4000
},
{
"epoch": 2.0617,
"grad_norm": 2.1877427101135254,
"learning_rate": 2.7521052631578946e-06,
"loss": 0.0614,
"step": 4025
},
{
"epoch": 2.0642,
"grad_norm": 1.4176368713378906,
"learning_rate": 2.740592105263158e-06,
"loss": 0.0559,
"step": 4050
},
{
"epoch": 2.0667,
"grad_norm": 2.4362101554870605,
"learning_rate": 2.729078947368421e-06,
"loss": 0.0593,
"step": 4075
},
{
"epoch": 2.0692,
"grad_norm": 1.8663033246994019,
"learning_rate": 2.717565789473684e-06,
"loss": 0.0591,
"step": 4100
},
{
"epoch": 2.0717,
"grad_norm": 1.627626657485962,
"learning_rate": 2.706052631578947e-06,
"loss": 0.0637,
"step": 4125
},
{
"epoch": 2.0742,
"grad_norm": 2.2072463035583496,
"learning_rate": 2.6945394736842105e-06,
"loss": 0.0571,
"step": 4150
},
{
"epoch": 2.0767,
"grad_norm": 1.7411611080169678,
"learning_rate": 2.6830263157894735e-06,
"loss": 0.0588,
"step": 4175
},
{
"epoch": 2.0792,
"grad_norm": 1.324000358581543,
"learning_rate": 2.6715131578947365e-06,
"loss": 0.0482,
"step": 4200
},
{
"epoch": 2.0817,
"grad_norm": 1.4138795137405396,
"learning_rate": 2.6599999999999995e-06,
"loss": 0.0477,
"step": 4225
},
{
"epoch": 2.0842,
"grad_norm": 2.403547763824463,
"learning_rate": 2.648486842105263e-06,
"loss": 0.0558,
"step": 4250
},
{
"epoch": 2.0867,
"grad_norm": 1.3718703985214233,
"learning_rate": 2.636973684210526e-06,
"loss": 0.0546,
"step": 4275
},
{
"epoch": 2.0892,
"grad_norm": 2.296445369720459,
"learning_rate": 2.625460526315789e-06,
"loss": 0.0554,
"step": 4300
},
{
"epoch": 2.0917,
"grad_norm": 2.3471312522888184,
"learning_rate": 2.613947368421052e-06,
"loss": 0.051,
"step": 4325
},
{
"epoch": 2.0942,
"grad_norm": 1.6061975955963135,
"learning_rate": 2.602434210526316e-06,
"loss": 0.0548,
"step": 4350
},
{
"epoch": 2.0967,
"grad_norm": 2.979126453399658,
"learning_rate": 2.590921052631579e-06,
"loss": 0.0492,
"step": 4375
},
{
"epoch": 2.0992,
"grad_norm": 1.7963169813156128,
"learning_rate": 2.579407894736842e-06,
"loss": 0.0514,
"step": 4400
},
{
"epoch": 2.1017,
"grad_norm": 2.4996039867401123,
"learning_rate": 2.5678947368421053e-06,
"loss": 0.0399,
"step": 4425
},
{
"epoch": 2.1042,
"grad_norm": 1.7498191595077515,
"learning_rate": 2.5563815789473683e-06,
"loss": 0.0522,
"step": 4450
},
{
"epoch": 2.1067,
"grad_norm": 1.413889765739441,
"learning_rate": 2.5448684210526313e-06,
"loss": 0.0517,
"step": 4475
},
{
"epoch": 2.1092,
"grad_norm": 2.0956978797912598,
"learning_rate": 2.5333552631578943e-06,
"loss": 0.0482,
"step": 4500
},
{
"epoch": 2.1092,
"eval_loss": 0.1336347758769989,
"eval_runtime": 4119.9162,
"eval_samples_per_second": 3.308,
"eval_steps_per_second": 0.414,
"eval_wer": 8.104914067939463,
"step": 4500
},
{
"epoch": 2.1117,
"grad_norm": 3.138298749923706,
"learning_rate": 2.5218421052631578e-06,
"loss": 0.0568,
"step": 4525
},
{
"epoch": 2.1142,
"grad_norm": 1.4262772798538208,
"learning_rate": 2.510328947368421e-06,
"loss": 0.0475,
"step": 4550
},
{
"epoch": 2.1167,
"grad_norm": 3.3500139713287354,
"learning_rate": 2.498815789473684e-06,
"loss": 0.0474,
"step": 4575
},
{
"epoch": 2.1192,
"grad_norm": 4.509912014007568,
"learning_rate": 2.4873026315789472e-06,
"loss": 0.0586,
"step": 4600
},
{
"epoch": 2.1217,
"grad_norm": 2.1386468410491943,
"learning_rate": 2.4757894736842102e-06,
"loss": 0.062,
"step": 4625
},
{
"epoch": 2.1242,
"grad_norm": 1.1121129989624023,
"learning_rate": 2.4642763157894733e-06,
"loss": 0.0563,
"step": 4650
},
{
"epoch": 2.1267,
"grad_norm": 1.677538514137268,
"learning_rate": 2.4527631578947363e-06,
"loss": 0.0519,
"step": 4675
},
{
"epoch": 2.1292,
"grad_norm": 1.579513430595398,
"learning_rate": 2.44125e-06,
"loss": 0.0544,
"step": 4700
},
{
"epoch": 2.1317,
"grad_norm": 2.1100914478302,
"learning_rate": 2.429736842105263e-06,
"loss": 0.0578,
"step": 4725
},
{
"epoch": 2.1342,
"grad_norm": 1.779682993888855,
"learning_rate": 2.418223684210526e-06,
"loss": 0.0486,
"step": 4750
},
{
"epoch": 2.1367,
"grad_norm": 1.7443439960479736,
"learning_rate": 2.4067105263157896e-06,
"loss": 0.0534,
"step": 4775
},
{
"epoch": 2.1391999999999998,
"grad_norm": 1.9388935565948486,
"learning_rate": 2.3951973684210526e-06,
"loss": 0.0516,
"step": 4800
},
{
"epoch": 2.1417,
"grad_norm": 1.82517409324646,
"learning_rate": 2.3836842105263156e-06,
"loss": 0.0451,
"step": 4825
},
{
"epoch": 2.1442,
"grad_norm": 1.9101967811584473,
"learning_rate": 2.3721710526315786e-06,
"loss": 0.0546,
"step": 4850
},
{
"epoch": 2.1467,
"grad_norm": 1.7242915630340576,
"learning_rate": 2.360657894736842e-06,
"loss": 0.0495,
"step": 4875
},
{
"epoch": 2.1492,
"grad_norm": 1.9127079248428345,
"learning_rate": 2.349144736842105e-06,
"loss": 0.0465,
"step": 4900
},
{
"epoch": 2.1517,
"grad_norm": 2.7716519832611084,
"learning_rate": 2.337631578947368e-06,
"loss": 0.0493,
"step": 4925
},
{
"epoch": 2.1542,
"grad_norm": 3.141706705093384,
"learning_rate": 2.326118421052631e-06,
"loss": 0.046,
"step": 4950
},
{
"epoch": 2.1567,
"grad_norm": 2.2624270915985107,
"learning_rate": 2.3146052631578945e-06,
"loss": 0.0522,
"step": 4975
},
{
"epoch": 2.1592000000000002,
"grad_norm": 1.2777652740478516,
"learning_rate": 2.3030921052631575e-06,
"loss": 0.0548,
"step": 5000
},
{
"epoch": 2.1592000000000002,
"eval_loss": 0.13162237405776978,
"eval_runtime": 4127.2085,
"eval_samples_per_second": 3.302,
"eval_steps_per_second": 0.413,
"eval_wer": 7.9244384184103485,
"step": 5000
},
{
"epoch": 2.1617,
"grad_norm": 2.106818675994873,
"learning_rate": 2.2915789473684206e-06,
"loss": 0.0527,
"step": 5025
},
{
"epoch": 2.1642,
"grad_norm": 2.2705554962158203,
"learning_rate": 2.2800657894736844e-06,
"loss": 0.0483,
"step": 5050
},
{
"epoch": 2.1667,
"grad_norm": 1.5468271970748901,
"learning_rate": 2.2685526315789474e-06,
"loss": 0.0516,
"step": 5075
},
{
"epoch": 2.1692,
"grad_norm": 2.0331270694732666,
"learning_rate": 2.2570394736842104e-06,
"loss": 0.0551,
"step": 5100
},
{
"epoch": 3.0013,
"grad_norm": 1.107423186302185,
"learning_rate": 2.2455263157894734e-06,
"loss": 0.0434,
"step": 5125
},
{
"epoch": 3.0038,
"grad_norm": 3.9103100299835205,
"learning_rate": 2.234013157894737e-06,
"loss": 0.0362,
"step": 5150
},
{
"epoch": 3.0063,
"grad_norm": 1.193088173866272,
"learning_rate": 2.2225e-06,
"loss": 0.0327,
"step": 5175
},
{
"epoch": 3.0088,
"grad_norm": 1.0432852506637573,
"learning_rate": 2.210986842105263e-06,
"loss": 0.0326,
"step": 5200
},
{
"epoch": 3.0113,
"grad_norm": 0.7116020917892456,
"learning_rate": 2.199473684210526e-06,
"loss": 0.0296,
"step": 5225
},
{
"epoch": 3.0138,
"grad_norm": 2.009617805480957,
"learning_rate": 2.1879605263157894e-06,
"loss": 0.0367,
"step": 5250
},
{
"epoch": 3.0163,
"grad_norm": 1.9047244787216187,
"learning_rate": 2.1764473684210524e-06,
"loss": 0.0347,
"step": 5275
},
{
"epoch": 3.0188,
"grad_norm": 1.630439043045044,
"learning_rate": 2.164934210526316e-06,
"loss": 0.0291,
"step": 5300
},
{
"epoch": 3.0213,
"grad_norm": 1.4158824682235718,
"learning_rate": 2.153421052631579e-06,
"loss": 0.0321,
"step": 5325
},
{
"epoch": 3.0238,
"grad_norm": 1.2792794704437256,
"learning_rate": 2.141907894736842e-06,
"loss": 0.0338,
"step": 5350
},
{
"epoch": 3.0263,
"grad_norm": 1.6505346298217773,
"learning_rate": 2.1303947368421053e-06,
"loss": 0.0348,
"step": 5375
},
{
"epoch": 3.0288,
"grad_norm": 1.5343618392944336,
"learning_rate": 2.1188815789473683e-06,
"loss": 0.0318,
"step": 5400
},
{
"epoch": 3.0313,
"grad_norm": 1.8325493335723877,
"learning_rate": 2.1073684210526313e-06,
"loss": 0.0333,
"step": 5425
},
{
"epoch": 3.0338,
"grad_norm": 1.7224900722503662,
"learning_rate": 2.0958552631578943e-06,
"loss": 0.0322,
"step": 5450
},
{
"epoch": 3.0362999999999998,
"grad_norm": 1.3443737030029297,
"learning_rate": 2.0843421052631577e-06,
"loss": 0.0304,
"step": 5475
},
{
"epoch": 3.0388,
"grad_norm": 1.3260679244995117,
"learning_rate": 2.0728289473684207e-06,
"loss": 0.0282,
"step": 5500
},
{
"epoch": 3.0388,
"eval_loss": 0.13909843564033508,
"eval_runtime": 4135.2147,
"eval_samples_per_second": 3.296,
"eval_steps_per_second": 0.412,
"eval_wer": 7.8181684927992965,
"step": 5500
},
{
"epoch": 3.0413,
"grad_norm": 1.0075204372406006,
"learning_rate": 2.061315789473684e-06,
"loss": 0.0308,
"step": 5525
},
{
"epoch": 3.0438,
"grad_norm": 1.0206842422485352,
"learning_rate": 2.049802631578947e-06,
"loss": 0.0306,
"step": 5550
},
{
"epoch": 3.0463,
"grad_norm": 1.411301851272583,
"learning_rate": 2.03828947368421e-06,
"loss": 0.0243,
"step": 5575
},
{
"epoch": 3.0488,
"grad_norm": 0.959862470626831,
"learning_rate": 2.0267763157894732e-06,
"loss": 0.0272,
"step": 5600
},
{
"epoch": 3.0513,
"grad_norm": 2.2999842166900635,
"learning_rate": 2.0152631578947367e-06,
"loss": 0.0246,
"step": 5625
},
{
"epoch": 3.0538,
"grad_norm": 2.890066146850586,
"learning_rate": 2.00375e-06,
"loss": 0.0299,
"step": 5650
},
{
"epoch": 3.0563,
"grad_norm": 1.7101376056671143,
"learning_rate": 1.992236842105263e-06,
"loss": 0.0322,
"step": 5675
},
{
"epoch": 3.0588,
"grad_norm": 1.531943917274475,
"learning_rate": 1.980723684210526e-06,
"loss": 0.0345,
"step": 5700
},
{
"epoch": 3.0613,
"grad_norm": 1.6334413290023804,
"learning_rate": 1.969210526315789e-06,
"loss": 0.032,
"step": 5725
},
{
"epoch": 3.0638,
"grad_norm": 2.112278461456299,
"learning_rate": 1.9576973684210526e-06,
"loss": 0.0304,
"step": 5750
},
{
"epoch": 3.0663,
"grad_norm": 1.7582517862319946,
"learning_rate": 1.9461842105263156e-06,
"loss": 0.0254,
"step": 5775
},
{
"epoch": 3.0688,
"grad_norm": 1.3391777276992798,
"learning_rate": 1.934671052631579e-06,
"loss": 0.0316,
"step": 5800
},
{
"epoch": 3.0713,
"grad_norm": 0.8350562453269958,
"learning_rate": 1.923157894736842e-06,
"loss": 0.0329,
"step": 5825
},
{
"epoch": 3.0738,
"grad_norm": 0.7084619402885437,
"learning_rate": 1.911644736842105e-06,
"loss": 0.0325,
"step": 5850
},
{
"epoch": 3.0763,
"grad_norm": 1.2961277961730957,
"learning_rate": 1.9001315789473683e-06,
"loss": 0.0313,
"step": 5875
},
{
"epoch": 3.0788,
"grad_norm": 1.032840371131897,
"learning_rate": 1.8886184210526315e-06,
"loss": 0.0224,
"step": 5900
},
{
"epoch": 3.0813,
"grad_norm": 1.2073044776916504,
"learning_rate": 1.8771052631578945e-06,
"loss": 0.0215,
"step": 5925
},
{
"epoch": 3.0838,
"grad_norm": 0.8210967779159546,
"learning_rate": 1.8655921052631577e-06,
"loss": 0.0258,
"step": 5950
},
{
"epoch": 3.0863,
"grad_norm": 1.5273653268814087,
"learning_rate": 1.854078947368421e-06,
"loss": 0.0254,
"step": 5975
},
{
"epoch": 3.0888,
"grad_norm": 3.194197177886963,
"learning_rate": 1.8425657894736842e-06,
"loss": 0.025,
"step": 6000
},
{
"epoch": 3.0888,
"eval_loss": 0.14247554540634155,
"eval_runtime": 4123.5746,
"eval_samples_per_second": 3.305,
"eval_steps_per_second": 0.413,
"eval_wer": 7.940928579281029,
"step": 6000
},
{
"epoch": 3.0913,
"grad_norm": 2.1373400688171387,
"learning_rate": 1.8310526315789472e-06,
"loss": 0.031,
"step": 6025
},
{
"epoch": 3.0938,
"grad_norm": 1.0779415369033813,
"learning_rate": 1.8195394736842104e-06,
"loss": 0.024,
"step": 6050
},
{
"epoch": 3.0963,
"grad_norm": 0.9637121558189392,
"learning_rate": 1.8080263157894734e-06,
"loss": 0.0282,
"step": 6075
},
{
"epoch": 3.0987999999999998,
"grad_norm": 1.1645703315734863,
"learning_rate": 1.7965131578947366e-06,
"loss": 0.0278,
"step": 6100
},
{
"epoch": 3.1013,
"grad_norm": 1.2814173698425293,
"learning_rate": 1.7849999999999996e-06,
"loss": 0.0199,
"step": 6125
},
{
"epoch": 3.1038,
"grad_norm": 1.458809494972229,
"learning_rate": 1.773486842105263e-06,
"loss": 0.0264,
"step": 6150
},
{
"epoch": 3.1063,
"grad_norm": 1.6669671535491943,
"learning_rate": 1.7619736842105263e-06,
"loss": 0.0272,
"step": 6175
},
{
"epoch": 3.1088,
"grad_norm": 1.5049173831939697,
"learning_rate": 1.7504605263157893e-06,
"loss": 0.0243,
"step": 6200
},
{
"epoch": 3.1113,
"grad_norm": 0.861107587814331,
"learning_rate": 1.7389473684210525e-06,
"loss": 0.0274,
"step": 6225
},
{
"epoch": 3.1138,
"grad_norm": 1.0454998016357422,
"learning_rate": 1.7274342105263155e-06,
"loss": 0.0258,
"step": 6250
},
{
"epoch": 3.1163,
"grad_norm": 1.7108014822006226,
"learning_rate": 1.7159210526315788e-06,
"loss": 0.0259,
"step": 6275
},
{
"epoch": 3.1188,
"grad_norm": 0.8804712295532227,
"learning_rate": 1.704407894736842e-06,
"loss": 0.0255,
"step": 6300
},
{
"epoch": 3.1213,
"grad_norm": 2.0050883293151855,
"learning_rate": 1.6928947368421052e-06,
"loss": 0.0304,
"step": 6325
},
{
"epoch": 3.1238,
"grad_norm": 1.4400875568389893,
"learning_rate": 1.6813815789473682e-06,
"loss": 0.0333,
"step": 6350
},
{
"epoch": 3.1263,
"grad_norm": 1.4423948526382446,
"learning_rate": 1.6698684210526315e-06,
"loss": 0.0279,
"step": 6375
},
{
"epoch": 3.1288,
"grad_norm": 1.3972327709197998,
"learning_rate": 1.6583552631578947e-06,
"loss": 0.0255,
"step": 6400
},
{
"epoch": 3.1313,
"grad_norm": 1.6908966302871704,
"learning_rate": 1.6468421052631577e-06,
"loss": 0.0267,
"step": 6425
},
{
"epoch": 3.1338,
"grad_norm": 0.9540082216262817,
"learning_rate": 1.635328947368421e-06,
"loss": 0.0265,
"step": 6450
},
{
"epoch": 3.1363,
"grad_norm": 1.41488778591156,
"learning_rate": 1.6238157894736841e-06,
"loss": 0.0224,
"step": 6475
},
{
"epoch": 3.1388,
"grad_norm": 0.4790860116481781,
"learning_rate": 1.6123026315789474e-06,
"loss": 0.0274,
"step": 6500
},
{
"epoch": 3.1388,
"eval_loss": 0.13914132118225098,
"eval_runtime": 4133.8202,
"eval_samples_per_second": 3.297,
"eval_steps_per_second": 0.412,
"eval_wer": 7.731137088204039,
"step": 6500
},
{
"epoch": 3.1413,
"grad_norm": 2.5638585090637207,
"learning_rate": 1.6007894736842104e-06,
"loss": 0.025,
"step": 6525
},
{
"epoch": 3.1438,
"grad_norm": 1.8847306966781616,
"learning_rate": 1.5892763157894736e-06,
"loss": 0.0294,
"step": 6550
},
{
"epoch": 3.1463,
"grad_norm": 1.0196236371994019,
"learning_rate": 1.5777631578947366e-06,
"loss": 0.0255,
"step": 6575
},
{
"epoch": 3.1488,
"grad_norm": 1.0703202486038208,
"learning_rate": 1.5662499999999998e-06,
"loss": 0.0246,
"step": 6600
},
{
"epoch": 3.1513,
"grad_norm": 2.646519422531128,
"learning_rate": 1.5547368421052628e-06,
"loss": 0.0213,
"step": 6625
},
{
"epoch": 3.1538,
"grad_norm": 1.7430530786514282,
"learning_rate": 1.5432236842105263e-06,
"loss": 0.0267,
"step": 6650
},
{
"epoch": 3.1563,
"grad_norm": 1.0606240034103394,
"learning_rate": 1.5317105263157895e-06,
"loss": 0.0269,
"step": 6675
},
{
"epoch": 3.1588,
"grad_norm": 1.4670476913452148,
"learning_rate": 1.5201973684210525e-06,
"loss": 0.0271,
"step": 6700
},
{
"epoch": 3.1612999999999998,
"grad_norm": 2.345014810562134,
"learning_rate": 1.5086842105263157e-06,
"loss": 0.0252,
"step": 6725
},
{
"epoch": 3.1638,
"grad_norm": 2.9098987579345703,
"learning_rate": 1.4971710526315787e-06,
"loss": 0.0272,
"step": 6750
},
{
"epoch": 3.1663,
"grad_norm": 0.5682694911956787,
"learning_rate": 1.485657894736842e-06,
"loss": 0.0237,
"step": 6775
},
{
"epoch": 3.1688,
"grad_norm": 1.4645904302597046,
"learning_rate": 1.4746052631578947e-06,
"loss": 0.0303,
"step": 6800
},
{
"epoch": 4.0009,
"grad_norm": 1.3764489889144897,
"learning_rate": 1.4630921052631578e-06,
"loss": 0.0242,
"step": 6825
},
{
"epoch": 4.0034,
"grad_norm": 0.8848748803138733,
"learning_rate": 1.451578947368421e-06,
"loss": 0.0163,
"step": 6850
},
{
"epoch": 4.0059,
"grad_norm": 0.619125485420227,
"learning_rate": 1.440065789473684e-06,
"loss": 0.0188,
"step": 6875
},
{
"epoch": 4.0084,
"grad_norm": 0.9328649044036865,
"learning_rate": 1.4285526315789472e-06,
"loss": 0.0173,
"step": 6900
},
{
"epoch": 4.0109,
"grad_norm": 1.77474045753479,
"learning_rate": 1.4170394736842104e-06,
"loss": 0.0146,
"step": 6925
},
{
"epoch": 4.0134,
"grad_norm": 1.3934537172317505,
"learning_rate": 1.4055263157894737e-06,
"loss": 0.0156,
"step": 6950
},
{
"epoch": 4.0159,
"grad_norm": 1.2856354713439941,
"learning_rate": 1.3940131578947367e-06,
"loss": 0.0173,
"step": 6975
},
{
"epoch": 4.0184,
"grad_norm": 2.1229758262634277,
"learning_rate": 1.3824999999999999e-06,
"loss": 0.0155,
"step": 7000
},
{
"epoch": 4.0184,
"eval_loss": 0.14916160702705383,
"eval_runtime": 4128.7355,
"eval_samples_per_second": 3.301,
"eval_steps_per_second": 0.413,
"eval_wer": 7.697240646414307,
"step": 7000
},
{
"epoch": 4.0209,
"grad_norm": 0.44512999057769775,
"learning_rate": 1.3709868421052631e-06,
"loss": 0.0153,
"step": 7025
},
{
"epoch": 4.0234,
"grad_norm": 1.8791674375534058,
"learning_rate": 1.3594736842105261e-06,
"loss": 0.0165,
"step": 7050
},
{
"epoch": 4.0259,
"grad_norm": 5.244405746459961,
"learning_rate": 1.3479605263157894e-06,
"loss": 0.0179,
"step": 7075
},
{
"epoch": 4.0284,
"grad_norm": 1.1926153898239136,
"learning_rate": 1.3364473684210526e-06,
"loss": 0.0161,
"step": 7100
},
{
"epoch": 4.0309,
"grad_norm": 1.1147819757461548,
"learning_rate": 1.3249342105263158e-06,
"loss": 0.015,
"step": 7125
},
{
"epoch": 4.0334,
"grad_norm": 1.9370721578598022,
"learning_rate": 1.3134210526315788e-06,
"loss": 0.0142,
"step": 7150
},
{
"epoch": 4.0359,
"grad_norm": 0.49344903230667114,
"learning_rate": 1.301907894736842e-06,
"loss": 0.0134,
"step": 7175
},
{
"epoch": 4.0384,
"grad_norm": 1.8190902471542358,
"learning_rate": 1.290394736842105e-06,
"loss": 0.0168,
"step": 7200
},
{
"epoch": 4.0409,
"grad_norm": 0.7560425400733948,
"learning_rate": 1.2788815789473683e-06,
"loss": 0.0143,
"step": 7225
},
{
"epoch": 4.0434,
"grad_norm": 1.0451087951660156,
"learning_rate": 1.2673684210526313e-06,
"loss": 0.0149,
"step": 7250
},
{
"epoch": 4.0459,
"grad_norm": 1.0334726572036743,
"learning_rate": 1.2558552631578947e-06,
"loss": 0.0136,
"step": 7275
},
{
"epoch": 4.0484,
"grad_norm": 0.6531663537025452,
"learning_rate": 1.244342105263158e-06,
"loss": 0.0137,
"step": 7300
},
{
"epoch": 4.0509,
"grad_norm": 0.8954887986183167,
"learning_rate": 1.232828947368421e-06,
"loss": 0.0118,
"step": 7325
},
{
"epoch": 4.0534,
"grad_norm": 1.0640511512756348,
"learning_rate": 1.2213157894736842e-06,
"loss": 0.0126,
"step": 7350
},
{
"epoch": 4.0559,
"grad_norm": 0.2824617922306061,
"learning_rate": 1.2098026315789472e-06,
"loss": 0.0139,
"step": 7375
},
{
"epoch": 4.0584,
"grad_norm": 1.0095443725585938,
"learning_rate": 1.1982894736842104e-06,
"loss": 0.018,
"step": 7400
},
{
"epoch": 4.0609,
"grad_norm": 1.1475225687026978,
"learning_rate": 1.1867763157894734e-06,
"loss": 0.0133,
"step": 7425
},
{
"epoch": 4.0634,
"grad_norm": 1.5951991081237793,
"learning_rate": 1.1752631578947369e-06,
"loss": 0.013,
"step": 7450
},
{
"epoch": 4.0659,
"grad_norm": 0.3482917249202728,
"learning_rate": 1.1637499999999999e-06,
"loss": 0.0154,
"step": 7475
},
{
"epoch": 4.0684,
"grad_norm": 1.1572391986846924,
"learning_rate": 1.152236842105263e-06,
"loss": 0.0189,
"step": 7500
},
{
"epoch": 4.0684,
"eval_loss": 0.15172211825847626,
"eval_runtime": 4117.5679,
"eval_samples_per_second": 3.31,
"eval_steps_per_second": 0.414,
"eval_wer": 7.656931364285977,
"step": 7500
},
{
"epoch": 4.0709,
"grad_norm": 1.3942557573318481,
"learning_rate": 1.140723684210526e-06,
"loss": 0.0143,
"step": 7525
},
{
"epoch": 4.0734,
"grad_norm": 0.8097572326660156,
"learning_rate": 1.1292105263157893e-06,
"loss": 0.0127,
"step": 7550
},
{
"epoch": 4.0759,
"grad_norm": 0.740375816822052,
"learning_rate": 1.1176973684210526e-06,
"loss": 0.0124,
"step": 7575
},
{
"epoch": 4.0784,
"grad_norm": 0.8702480792999268,
"learning_rate": 1.1061842105263156e-06,
"loss": 0.0137,
"step": 7600
},
{
"epoch": 4.0809,
"grad_norm": 1.223105788230896,
"learning_rate": 1.094671052631579e-06,
"loss": 0.0137,
"step": 7625
},
{
"epoch": 4.0834,
"grad_norm": 0.43614983558654785,
"learning_rate": 1.083157894736842e-06,
"loss": 0.0109,
"step": 7650
},
{
"epoch": 4.0859,
"grad_norm": 1.0974986553192139,
"learning_rate": 1.0716447368421052e-06,
"loss": 0.0118,
"step": 7675
},
{
"epoch": 4.0884,
"grad_norm": 0.7234652042388916,
"learning_rate": 1.0601315789473682e-06,
"loss": 0.0125,
"step": 7700
},
{
"epoch": 4.0909,
"grad_norm": 0.7752431035041809,
"learning_rate": 1.0486184210526315e-06,
"loss": 0.0135,
"step": 7725
},
{
"epoch": 4.0934,
"grad_norm": 0.8796952366828918,
"learning_rate": 1.0371052631578947e-06,
"loss": 0.0158,
"step": 7750
},
{
"epoch": 4.0959,
"grad_norm": 3.9135661125183105,
"learning_rate": 1.0255921052631577e-06,
"loss": 0.0139,
"step": 7775
},
{
"epoch": 4.0984,
"grad_norm": 0.4837290942668915,
"learning_rate": 1.014078947368421e-06,
"loss": 0.0103,
"step": 7800
},
{
"epoch": 4.1009,
"grad_norm": 1.1155998706817627,
"learning_rate": 1.0025657894736842e-06,
"loss": 0.0106,
"step": 7825
},
{
"epoch": 4.1034,
"grad_norm": 2.628676652908325,
"learning_rate": 9.910526315789474e-07,
"loss": 0.0089,
"step": 7850
},
{
"epoch": 4.1059,
"grad_norm": 1.716665506362915,
"learning_rate": 9.795394736842104e-07,
"loss": 0.0132,
"step": 7875
},
{
"epoch": 4.1084,
"grad_norm": 1.6751716136932373,
"learning_rate": 9.680263157894736e-07,
"loss": 0.0137,
"step": 7900
},
{
"epoch": 4.1109,
"grad_norm": 0.9773244261741638,
"learning_rate": 9.565131578947368e-07,
"loss": 0.0111,
"step": 7925
},
{
"epoch": 4.1134,
"grad_norm": 1.44219172000885,
"learning_rate": 9.45e-07,
"loss": 0.0139,
"step": 7950
},
{
"epoch": 4.1159,
"grad_norm": 0.8723123073577881,
"learning_rate": 9.334868421052631e-07,
"loss": 0.0117,
"step": 7975
},
{
"epoch": 4.1184,
"grad_norm": 0.6484673023223877,
"learning_rate": 9.219736842105263e-07,
"loss": 0.0139,
"step": 8000
},
{
"epoch": 4.1184,
"eval_loss": 0.15393850207328796,
"eval_runtime": 4128.9341,
"eval_samples_per_second": 3.301,
"eval_steps_per_second": 0.413,
"eval_wer": 7.626699402689728,
"step": 8000
},
{
"epoch": 4.1209,
"grad_norm": 1.3702197074890137,
"learning_rate": 9.104605263157894e-07,
"loss": 0.0158,
"step": 8025
},
{
"epoch": 4.1234,
"grad_norm": 1.425645351409912,
"learning_rate": 8.989473684210525e-07,
"loss": 0.0117,
"step": 8050
},
{
"epoch": 4.1259,
"grad_norm": 1.4255399703979492,
"learning_rate": 8.874342105263158e-07,
"loss": 0.015,
"step": 8075
},
{
"epoch": 4.1284,
"grad_norm": 0.6988621950149536,
"learning_rate": 8.759210526315789e-07,
"loss": 0.0141,
"step": 8100
},
{
"epoch": 4.1309,
"grad_norm": 1.1563546657562256,
"learning_rate": 8.64407894736842e-07,
"loss": 0.0122,
"step": 8125
},
{
"epoch": 4.1334,
"grad_norm": 1.2023714780807495,
"learning_rate": 8.528947368421051e-07,
"loss": 0.013,
"step": 8150
},
{
"epoch": 4.1359,
"grad_norm": 0.9450110197067261,
"learning_rate": 8.413815789473683e-07,
"loss": 0.0123,
"step": 8175
},
{
"epoch": 4.1384,
"grad_norm": 0.9265995621681213,
"learning_rate": 8.298684210526316e-07,
"loss": 0.0114,
"step": 8200
},
{
"epoch": 4.1409,
"grad_norm": 0.4234980046749115,
"learning_rate": 8.183552631578947e-07,
"loss": 0.0085,
"step": 8225
},
{
"epoch": 4.1434,
"grad_norm": 1.3323073387145996,
"learning_rate": 8.068421052631579e-07,
"loss": 0.014,
"step": 8250
},
{
"epoch": 4.1459,
"grad_norm": 1.2050007581710815,
"learning_rate": 7.95328947368421e-07,
"loss": 0.0106,
"step": 8275
},
{
"epoch": 4.1484,
"grad_norm": 1.261042594909668,
"learning_rate": 7.838157894736841e-07,
"loss": 0.0107,
"step": 8300
},
{
"epoch": 4.1509,
"grad_norm": 1.2892303466796875,
"learning_rate": 7.723026315789474e-07,
"loss": 0.0145,
"step": 8325
},
{
"epoch": 4.1534,
"grad_norm": 1.1626112461090088,
"learning_rate": 7.607894736842105e-07,
"loss": 0.0139,
"step": 8350
},
{
"epoch": 4.1559,
"grad_norm": 1.0547322034835815,
"learning_rate": 7.492763157894736e-07,
"loss": 0.0154,
"step": 8375
},
{
"epoch": 4.1584,
"grad_norm": 0.44805532693862915,
"learning_rate": 7.377631578947367e-07,
"loss": 0.0109,
"step": 8400
},
{
"epoch": 4.1609,
"grad_norm": 0.7095866203308105,
"learning_rate": 7.262499999999999e-07,
"loss": 0.0114,
"step": 8425
},
{
"epoch": 4.1634,
"grad_norm": 1.4220194816589355,
"learning_rate": 7.14736842105263e-07,
"loss": 0.0134,
"step": 8450
},
{
"epoch": 4.1659,
"grad_norm": 1.0814168453216553,
"learning_rate": 7.032236842105263e-07,
"loss": 0.0142,
"step": 8475
},
{
"epoch": 4.1684,
"grad_norm": 0.7026916146278381,
"learning_rate": 6.917105263157895e-07,
"loss": 0.0141,
"step": 8500
},
{
"epoch": 4.1684,
"eval_loss": 0.15496784448623657,
"eval_runtime": 4124.1829,
"eval_samples_per_second": 3.305,
"eval_steps_per_second": 0.413,
"eval_wer": 7.542416358239584,
"step": 8500
},
{
"epoch": 5.0005,
"grad_norm": 4.648550033569336,
"learning_rate": 6.801973684210526e-07,
"loss": 0.0285,
"step": 8525
},
{
"epoch": 5.003,
"grad_norm": 1.9204503297805786,
"learning_rate": 6.691447368421053e-07,
"loss": 0.0761,
"step": 8550
},
{
"epoch": 5.0055,
"grad_norm": 1.7285746335983276,
"learning_rate": 6.576315789473684e-07,
"loss": 0.0602,
"step": 8575
},
{
"epoch": 5.008,
"grad_norm": 1.1516830921173096,
"learning_rate": 6.461184210526315e-07,
"loss": 0.0585,
"step": 8600
},
{
"epoch": 5.0105,
"grad_norm": 3.3867828845977783,
"learning_rate": 6.346052631578947e-07,
"loss": 0.0656,
"step": 8625
},
{
"epoch": 5.013,
"grad_norm": 4.064920902252197,
"learning_rate": 6.230921052631579e-07,
"loss": 0.0683,
"step": 8650
},
{
"epoch": 5.0155,
"grad_norm": 3.695047378540039,
"learning_rate": 6.11578947368421e-07,
"loss": 0.0659,
"step": 8675
},
{
"epoch": 5.018,
"grad_norm": 2.9087939262390137,
"learning_rate": 6.000657894736842e-07,
"loss": 0.0611,
"step": 8700
},
{
"epoch": 5.0205,
"grad_norm": 3.368290424346924,
"learning_rate": 5.885526315789473e-07,
"loss": 0.0603,
"step": 8725
},
{
"epoch": 5.023,
"grad_norm": 3.7565319538116455,
"learning_rate": 5.770394736842104e-07,
"loss": 0.0614,
"step": 8750
},
{
"epoch": 5.0255,
"grad_norm": 2.4887771606445312,
"learning_rate": 5.655263157894735e-07,
"loss": 0.0497,
"step": 8775
},
{
"epoch": 5.028,
"grad_norm": 2.1670076847076416,
"learning_rate": 5.540131578947369e-07,
"loss": 0.0662,
"step": 8800
},
{
"epoch": 5.0305,
"grad_norm": 1.3746148347854614,
"learning_rate": 5.425e-07,
"loss": 0.0507,
"step": 8825
},
{
"epoch": 5.033,
"grad_norm": 1.8274154663085938,
"learning_rate": 5.309868421052631e-07,
"loss": 0.0449,
"step": 8850
},
{
"epoch": 5.0355,
"grad_norm": 2.9424078464508057,
"learning_rate": 5.194736842105262e-07,
"loss": 0.0529,
"step": 8875
},
{
"epoch": 5.038,
"grad_norm": 2.457754611968994,
"learning_rate": 5.079605263157895e-07,
"loss": 0.042,
"step": 8900
},
{
"epoch": 5.0405,
"grad_norm": 2.208768606185913,
"learning_rate": 4.964473684210526e-07,
"loss": 0.0407,
"step": 8925
},
{
"epoch": 5.043,
"grad_norm": 1.9554438591003418,
"learning_rate": 4.849342105263158e-07,
"loss": 0.0465,
"step": 8950
},
{
"epoch": 5.0455,
"grad_norm": 1.1464567184448242,
"learning_rate": 4.734210526315789e-07,
"loss": 0.0537,
"step": 8975
},
{
"epoch": 5.048,
"grad_norm": 3.1216509342193604,
"learning_rate": 4.6190789473684203e-07,
"loss": 0.0368,
"step": 9000
},
{
"epoch": 5.048,
"eval_loss": 0.12588092684745789,
"eval_runtime": 4149.257,
"eval_samples_per_second": 3.285,
"eval_steps_per_second": 0.411,
"eval_wer": 7.215361500971087,
"step": 9000
},
{
"epoch": 5.048,
"step": 9000,
"total_flos": 4.891718061785088e+20,
"train_loss": 0.0,
"train_runtime": 289.8068,
"train_samples_per_second": 552.092,
"train_steps_per_second": 34.506
}
],
"logging_steps": 25,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.891718061785088e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}