whisper-medium-vaani-kannada / trainer_state.json
SujithPulikodan's picture
Upload 11 files
ba010e4 verified
{
"best_metric": 36.4960604331718,
"best_model_checkpoint": "./whisper-medium-ka/checkpoint-10000",
"epoch": 2.5131942699170646,
"eval_steps": 1000,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02535496957403651,
"grad_norm": 10.789830207824707,
"learning_rate": 4.6000000000000004e-07,
"loss": 1.2488,
"step": 25
},
{
"epoch": 0.05070993914807302,
"grad_norm": 6.402094841003418,
"learning_rate": 9.600000000000001e-07,
"loss": 0.9647,
"step": 50
},
{
"epoch": 0.07606490872210954,
"grad_norm": 6.477363586425781,
"learning_rate": 1.46e-06,
"loss": 0.7572,
"step": 75
},
{
"epoch": 0.10141987829614604,
"grad_norm": 5.079893589019775,
"learning_rate": 1.9600000000000003e-06,
"loss": 0.636,
"step": 100
},
{
"epoch": 0.12677484787018256,
"grad_norm": 4.092740535736084,
"learning_rate": 2.46e-06,
"loss": 0.5668,
"step": 125
},
{
"epoch": 0.15212981744421908,
"grad_norm": 5.041500091552734,
"learning_rate": 2.96e-06,
"loss": 0.5045,
"step": 150
},
{
"epoch": 0.17748478701825557,
"grad_norm": 5.795814514160156,
"learning_rate": 3.46e-06,
"loss": 0.4991,
"step": 175
},
{
"epoch": 0.2028397565922921,
"grad_norm": 5.117966651916504,
"learning_rate": 3.96e-06,
"loss": 0.4602,
"step": 200
},
{
"epoch": 0.2281947261663286,
"grad_norm": 5.343120574951172,
"learning_rate": 4.4600000000000005e-06,
"loss": 0.4458,
"step": 225
},
{
"epoch": 0.2535496957403651,
"grad_norm": 3.3774187564849854,
"learning_rate": 4.960000000000001e-06,
"loss": 0.4174,
"step": 250
},
{
"epoch": 0.2789046653144016,
"grad_norm": 3.576996326446533,
"learning_rate": 5.460000000000001e-06,
"loss": 0.4296,
"step": 275
},
{
"epoch": 0.30425963488843816,
"grad_norm": 3.852977991104126,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.3889,
"step": 300
},
{
"epoch": 0.32961460446247465,
"grad_norm": 4.133119583129883,
"learning_rate": 6.460000000000001e-06,
"loss": 0.3864,
"step": 325
},
{
"epoch": 0.35496957403651114,
"grad_norm": 4.041072368621826,
"learning_rate": 6.96e-06,
"loss": 0.3787,
"step": 350
},
{
"epoch": 0.3803245436105477,
"grad_norm": 4.2009806632995605,
"learning_rate": 7.4600000000000006e-06,
"loss": 0.3722,
"step": 375
},
{
"epoch": 0.4056795131845842,
"grad_norm": 3.9817683696746826,
"learning_rate": 7.960000000000002e-06,
"loss": 0.366,
"step": 400
},
{
"epoch": 0.43103448275862066,
"grad_norm": 3.8101816177368164,
"learning_rate": 8.46e-06,
"loss": 0.3712,
"step": 425
},
{
"epoch": 0.4563894523326572,
"grad_norm": 3.7913081645965576,
"learning_rate": 8.96e-06,
"loss": 0.3743,
"step": 450
},
{
"epoch": 0.4817444219066937,
"grad_norm": 4.321682929992676,
"learning_rate": 9.460000000000001e-06,
"loss": 0.3467,
"step": 475
},
{
"epoch": 0.5070993914807302,
"grad_norm": 3.346144199371338,
"learning_rate": 9.960000000000001e-06,
"loss": 0.3429,
"step": 500
},
{
"epoch": 0.5324543610547667,
"grad_norm": 4.033421516418457,
"learning_rate": 9.975789473684211e-06,
"loss": 0.3521,
"step": 525
},
{
"epoch": 0.5578093306288032,
"grad_norm": 4.106046676635742,
"learning_rate": 9.949473684210526e-06,
"loss": 0.3219,
"step": 550
},
{
"epoch": 0.5831643002028397,
"grad_norm": 3.8651669025421143,
"learning_rate": 9.923157894736844e-06,
"loss": 0.3323,
"step": 575
},
{
"epoch": 0.6085192697768763,
"grad_norm": 4.054211616516113,
"learning_rate": 9.89684210526316e-06,
"loss": 0.3443,
"step": 600
},
{
"epoch": 0.6338742393509128,
"grad_norm": 3.2193450927734375,
"learning_rate": 9.870526315789474e-06,
"loss": 0.3159,
"step": 625
},
{
"epoch": 0.6592292089249493,
"grad_norm": 2.698641300201416,
"learning_rate": 9.84421052631579e-06,
"loss": 0.3197,
"step": 650
},
{
"epoch": 0.6845841784989858,
"grad_norm": 2.68990159034729,
"learning_rate": 9.817894736842106e-06,
"loss": 0.3151,
"step": 675
},
{
"epoch": 0.7099391480730223,
"grad_norm": 3.144531726837158,
"learning_rate": 9.791578947368422e-06,
"loss": 0.3094,
"step": 700
},
{
"epoch": 0.7352941176470589,
"grad_norm": 3.1064047813415527,
"learning_rate": 9.765263157894737e-06,
"loss": 0.3026,
"step": 725
},
{
"epoch": 0.7606490872210954,
"grad_norm": 2.2833447456359863,
"learning_rate": 9.738947368421054e-06,
"loss": 0.3152,
"step": 750
},
{
"epoch": 0.7860040567951319,
"grad_norm": 2.9134676456451416,
"learning_rate": 9.712631578947369e-06,
"loss": 0.3018,
"step": 775
},
{
"epoch": 0.8113590263691683,
"grad_norm": 3.249375820159912,
"learning_rate": 9.686315789473684e-06,
"loss": 0.306,
"step": 800
},
{
"epoch": 0.8367139959432048,
"grad_norm": 3.1664018630981445,
"learning_rate": 9.66e-06,
"loss": 0.3053,
"step": 825
},
{
"epoch": 0.8620689655172413,
"grad_norm": 2.8798232078552246,
"learning_rate": 9.633684210526316e-06,
"loss": 0.2919,
"step": 850
},
{
"epoch": 0.8874239350912779,
"grad_norm": 3.306312322616577,
"learning_rate": 9.607368421052632e-06,
"loss": 0.3008,
"step": 875
},
{
"epoch": 0.9127789046653144,
"grad_norm": 3.241096258163452,
"learning_rate": 9.581052631578947e-06,
"loss": 0.3066,
"step": 900
},
{
"epoch": 0.9381338742393509,
"grad_norm": 3.2305490970611572,
"learning_rate": 9.554736842105264e-06,
"loss": 0.293,
"step": 925
},
{
"epoch": 0.9634888438133874,
"grad_norm": 4.4702229499816895,
"learning_rate": 9.52842105263158e-06,
"loss": 0.2901,
"step": 950
},
{
"epoch": 0.9888438133874239,
"grad_norm": 3.1923646926879883,
"learning_rate": 9.502105263157896e-06,
"loss": 0.29,
"step": 975
},
{
"epoch": 1.0141987829614605,
"grad_norm": 2.5901284217834473,
"learning_rate": 9.475789473684212e-06,
"loss": 0.2611,
"step": 1000
},
{
"epoch": 1.0141987829614605,
"eval_loss": 0.2935636639595032,
"eval_runtime": 7046.6021,
"eval_samples_per_second": 0.69,
"eval_steps_per_second": 0.086,
"eval_wer": 62.766441678696374,
"step": 1000
},
{
"epoch": 1.039553752535497,
"grad_norm": 2.9228506088256836,
"learning_rate": 9.449473684210527e-06,
"loss": 0.2488,
"step": 1025
},
{
"epoch": 1.0649087221095335,
"grad_norm": 2.1438565254211426,
"learning_rate": 9.423157894736842e-06,
"loss": 0.2441,
"step": 1050
},
{
"epoch": 1.09026369168357,
"grad_norm": 3.6437671184539795,
"learning_rate": 9.396842105263159e-06,
"loss": 0.2512,
"step": 1075
},
{
"epoch": 1.1156186612576064,
"grad_norm": 2.413125991821289,
"learning_rate": 9.370526315789474e-06,
"loss": 0.2447,
"step": 1100
},
{
"epoch": 1.140973630831643,
"grad_norm": 2.880699872970581,
"learning_rate": 9.34421052631579e-06,
"loss": 0.2463,
"step": 1125
},
{
"epoch": 1.1663286004056794,
"grad_norm": 2.283259630203247,
"learning_rate": 9.317894736842105e-06,
"loss": 0.2488,
"step": 1150
},
{
"epoch": 1.1916835699797161,
"grad_norm": 3.4118285179138184,
"learning_rate": 9.291578947368422e-06,
"loss": 0.2482,
"step": 1175
},
{
"epoch": 1.2170385395537526,
"grad_norm": 2.1249563694000244,
"learning_rate": 9.265263157894737e-06,
"loss": 0.243,
"step": 1200
},
{
"epoch": 1.2423935091277891,
"grad_norm": 2.6854898929595947,
"learning_rate": 9.238947368421052e-06,
"loss": 0.2563,
"step": 1225
},
{
"epoch": 1.2677484787018256,
"grad_norm": 2.975567102432251,
"learning_rate": 9.21263157894737e-06,
"loss": 0.2529,
"step": 1250
},
{
"epoch": 1.293103448275862,
"grad_norm": 2.6156153678894043,
"learning_rate": 9.186315789473685e-06,
"loss": 0.2378,
"step": 1275
},
{
"epoch": 1.3184584178498986,
"grad_norm": 2.3254947662353516,
"learning_rate": 9.16e-06,
"loss": 0.2457,
"step": 1300
},
{
"epoch": 1.343813387423935,
"grad_norm": 3.162607431411743,
"learning_rate": 9.133684210526317e-06,
"loss": 0.2452,
"step": 1325
},
{
"epoch": 1.3691683569979716,
"grad_norm": 2.94197678565979,
"learning_rate": 9.107368421052632e-06,
"loss": 0.2411,
"step": 1350
},
{
"epoch": 1.394523326572008,
"grad_norm": 2.53021502494812,
"learning_rate": 9.081052631578949e-06,
"loss": 0.2497,
"step": 1375
},
{
"epoch": 1.4198782961460445,
"grad_norm": 2.740830421447754,
"learning_rate": 9.054736842105264e-06,
"loss": 0.228,
"step": 1400
},
{
"epoch": 1.445233265720081,
"grad_norm": 3.1709723472595215,
"learning_rate": 9.02842105263158e-06,
"loss": 0.2598,
"step": 1425
},
{
"epoch": 1.4705882352941178,
"grad_norm": 2.508091688156128,
"learning_rate": 9.002105263157895e-06,
"loss": 0.2508,
"step": 1450
},
{
"epoch": 1.495943204868154,
"grad_norm": 2.7562551498413086,
"learning_rate": 8.97578947368421e-06,
"loss": 0.2462,
"step": 1475
},
{
"epoch": 1.5212981744421907,
"grad_norm": 3.4288785457611084,
"learning_rate": 8.949473684210527e-06,
"loss": 0.2351,
"step": 1500
},
{
"epoch": 1.5466531440162272,
"grad_norm": 2.4875781536102295,
"learning_rate": 8.923157894736842e-06,
"loss": 0.249,
"step": 1525
},
{
"epoch": 1.5720081135902637,
"grad_norm": 2.5765998363494873,
"learning_rate": 8.896842105263159e-06,
"loss": 0.2354,
"step": 1550
},
{
"epoch": 1.5973630831643002,
"grad_norm": 2.0511434078216553,
"learning_rate": 8.870526315789474e-06,
"loss": 0.248,
"step": 1575
},
{
"epoch": 1.6227180527383367,
"grad_norm": 3.0492985248565674,
"learning_rate": 8.84421052631579e-06,
"loss": 0.2443,
"step": 1600
},
{
"epoch": 1.6480730223123732,
"grad_norm": 3.98416805267334,
"learning_rate": 8.817894736842107e-06,
"loss": 0.2348,
"step": 1625
},
{
"epoch": 1.6734279918864097,
"grad_norm": 2.4991466999053955,
"learning_rate": 8.791578947368422e-06,
"loss": 0.2514,
"step": 1650
},
{
"epoch": 1.6987829614604464,
"grad_norm": 2.768167734146118,
"learning_rate": 8.765263157894739e-06,
"loss": 0.2517,
"step": 1675
},
{
"epoch": 1.7241379310344827,
"grad_norm": 3.1816017627716064,
"learning_rate": 8.738947368421053e-06,
"loss": 0.2421,
"step": 1700
},
{
"epoch": 1.7494929006085194,
"grad_norm": 3.7011590003967285,
"learning_rate": 8.712631578947368e-06,
"loss": 0.2376,
"step": 1725
},
{
"epoch": 1.7748478701825556,
"grad_norm": 2.3581457138061523,
"learning_rate": 8.686315789473685e-06,
"loss": 0.2281,
"step": 1750
},
{
"epoch": 1.8002028397565923,
"grad_norm": 2.812277317047119,
"learning_rate": 8.66e-06,
"loss": 0.2357,
"step": 1775
},
{
"epoch": 1.8255578093306288,
"grad_norm": 3.1732356548309326,
"learning_rate": 8.633684210526317e-06,
"loss": 0.237,
"step": 1800
},
{
"epoch": 1.8509127789046653,
"grad_norm": 1.9399900436401367,
"learning_rate": 8.607368421052632e-06,
"loss": 0.2395,
"step": 1825
},
{
"epoch": 1.8762677484787018,
"grad_norm": 1.8843597173690796,
"learning_rate": 8.581052631578948e-06,
"loss": 0.2282,
"step": 1850
},
{
"epoch": 1.9016227180527383,
"grad_norm": 2.314880132675171,
"learning_rate": 8.554736842105263e-06,
"loss": 0.2454,
"step": 1875
},
{
"epoch": 1.9269776876267748,
"grad_norm": 4.113000392913818,
"learning_rate": 8.528421052631578e-06,
"loss": 0.2342,
"step": 1900
},
{
"epoch": 1.9523326572008113,
"grad_norm": 2.076021432876587,
"learning_rate": 8.502105263157897e-06,
"loss": 0.2313,
"step": 1925
},
{
"epoch": 1.977687626774848,
"grad_norm": 3.887740135192871,
"learning_rate": 8.475789473684212e-06,
"loss": 0.245,
"step": 1950
},
{
"epoch": 2.0030425963488843,
"grad_norm": 2.359464406967163,
"learning_rate": 8.449473684210527e-06,
"loss": 0.2539,
"step": 1975
},
{
"epoch": 2.028397565922921,
"grad_norm": 2.299802303314209,
"learning_rate": 8.423157894736843e-06,
"loss": 0.18,
"step": 2000
},
{
"epoch": 2.028397565922921,
"eval_loss": 0.27181410789489746,
"eval_runtime": 7074.1416,
"eval_samples_per_second": 0.688,
"eval_steps_per_second": 0.086,
"eval_wer": 59.46705161718232,
"step": 2000
},
{
"epoch": 2.0537525354969572,
"grad_norm": 4.299188613891602,
"learning_rate": 8.396842105263158e-06,
"loss": 0.1803,
"step": 2025
},
{
"epoch": 2.079107505070994,
"grad_norm": 2.5189449787139893,
"learning_rate": 8.370526315789475e-06,
"loss": 0.1927,
"step": 2050
},
{
"epoch": 2.1044624746450302,
"grad_norm": 2.5737998485565186,
"learning_rate": 8.34421052631579e-06,
"loss": 0.1818,
"step": 2075
},
{
"epoch": 2.129817444219067,
"grad_norm": 3.098142623901367,
"learning_rate": 8.317894736842107e-06,
"loss": 0.1907,
"step": 2100
},
{
"epoch": 2.1551724137931036,
"grad_norm": 1.9791940450668335,
"learning_rate": 8.291578947368422e-06,
"loss": 0.1809,
"step": 2125
},
{
"epoch": 2.18052738336714,
"grad_norm": 1.9798074960708618,
"learning_rate": 8.265263157894737e-06,
"loss": 0.1842,
"step": 2150
},
{
"epoch": 2.2058823529411766,
"grad_norm": 2.5827345848083496,
"learning_rate": 8.238947368421053e-06,
"loss": 0.1769,
"step": 2175
},
{
"epoch": 2.231237322515213,
"grad_norm": 2.1781139373779297,
"learning_rate": 8.212631578947368e-06,
"loss": 0.1904,
"step": 2200
},
{
"epoch": 2.2565922920892496,
"grad_norm": 2.2573533058166504,
"learning_rate": 8.186315789473685e-06,
"loss": 0.1892,
"step": 2225
},
{
"epoch": 2.281947261663286,
"grad_norm": 2.1169838905334473,
"learning_rate": 8.16e-06,
"loss": 0.1868,
"step": 2250
},
{
"epoch": 2.3073022312373226,
"grad_norm": 3.3498687744140625,
"learning_rate": 8.133684210526316e-06,
"loss": 0.197,
"step": 2275
},
{
"epoch": 2.332657200811359,
"grad_norm": 2.25311541557312,
"learning_rate": 8.107368421052633e-06,
"loss": 0.1869,
"step": 2300
},
{
"epoch": 2.3580121703853956,
"grad_norm": 2.396975040435791,
"learning_rate": 8.081052631578948e-06,
"loss": 0.189,
"step": 2325
},
{
"epoch": 2.3833671399594323,
"grad_norm": 2.147547960281372,
"learning_rate": 8.054736842105265e-06,
"loss": 0.1893,
"step": 2350
},
{
"epoch": 2.4087221095334685,
"grad_norm": 1.9963374137878418,
"learning_rate": 8.02842105263158e-06,
"loss": 0.2003,
"step": 2375
},
{
"epoch": 2.4340770791075053,
"grad_norm": 3.571808099746704,
"learning_rate": 8.002105263157895e-06,
"loss": 0.1904,
"step": 2400
},
{
"epoch": 2.4594320486815415,
"grad_norm": 2.6122543811798096,
"learning_rate": 7.975789473684211e-06,
"loss": 0.1855,
"step": 2425
},
{
"epoch": 2.4847870182555782,
"grad_norm": 2.9478933811187744,
"learning_rate": 7.949473684210526e-06,
"loss": 0.1822,
"step": 2450
},
{
"epoch": 2.5101419878296145,
"grad_norm": 2.368997097015381,
"learning_rate": 7.923157894736843e-06,
"loss": 0.186,
"step": 2475
},
{
"epoch": 2.535496957403651,
"grad_norm": 2.529630661010742,
"learning_rate": 7.896842105263158e-06,
"loss": 0.191,
"step": 2500
},
{
"epoch": 2.5608519269776875,
"grad_norm": 2.9214324951171875,
"learning_rate": 7.870526315789475e-06,
"loss": 0.1885,
"step": 2525
},
{
"epoch": 2.586206896551724,
"grad_norm": 2.141688823699951,
"learning_rate": 7.84421052631579e-06,
"loss": 0.1847,
"step": 2550
},
{
"epoch": 2.6115618661257605,
"grad_norm": 2.1608083248138428,
"learning_rate": 7.817894736842105e-06,
"loss": 0.1873,
"step": 2575
},
{
"epoch": 2.636916835699797,
"grad_norm": 2.489492416381836,
"learning_rate": 7.791578947368423e-06,
"loss": 0.1959,
"step": 2600
},
{
"epoch": 2.662271805273834,
"grad_norm": 2.31791353225708,
"learning_rate": 7.765263157894738e-06,
"loss": 0.1915,
"step": 2625
},
{
"epoch": 2.68762677484787,
"grad_norm": 2.779777765274048,
"learning_rate": 7.738947368421053e-06,
"loss": 0.1878,
"step": 2650
},
{
"epoch": 2.7129817444219064,
"grad_norm": 2.5656425952911377,
"learning_rate": 7.71263157894737e-06,
"loss": 0.1925,
"step": 2675
},
{
"epoch": 2.738336713995943,
"grad_norm": 3.0531349182128906,
"learning_rate": 7.686315789473685e-06,
"loss": 0.189,
"step": 2700
},
{
"epoch": 2.76369168356998,
"grad_norm": 2.912122964859009,
"learning_rate": 7.660000000000001e-06,
"loss": 0.1906,
"step": 2725
},
{
"epoch": 2.789046653144016,
"grad_norm": 3.0555505752563477,
"learning_rate": 7.633684210526316e-06,
"loss": 0.1834,
"step": 2750
},
{
"epoch": 2.814401622718053,
"grad_norm": 2.2603671550750732,
"learning_rate": 7.607368421052632e-06,
"loss": 0.1872,
"step": 2775
},
{
"epoch": 2.839756592292089,
"grad_norm": 2.1887290477752686,
"learning_rate": 7.581052631578948e-06,
"loss": 0.1787,
"step": 2800
},
{
"epoch": 2.865111561866126,
"grad_norm": 2.393935203552246,
"learning_rate": 7.554736842105264e-06,
"loss": 0.1883,
"step": 2825
},
{
"epoch": 2.890466531440162,
"grad_norm": 2.3543920516967773,
"learning_rate": 7.5284210526315794e-06,
"loss": 0.1883,
"step": 2850
},
{
"epoch": 2.915821501014199,
"grad_norm": 2.139833927154541,
"learning_rate": 7.502105263157895e-06,
"loss": 0.1997,
"step": 2875
},
{
"epoch": 2.9411764705882355,
"grad_norm": 2.270552158355713,
"learning_rate": 7.475789473684211e-06,
"loss": 0.1932,
"step": 2900
},
{
"epoch": 2.9665314401622718,
"grad_norm": 2.4720232486724854,
"learning_rate": 7.449473684210526e-06,
"loss": 0.1926,
"step": 2925
},
{
"epoch": 2.991886409736308,
"grad_norm": 2.318767786026001,
"learning_rate": 7.4231578947368436e-06,
"loss": 0.1837,
"step": 2950
},
{
"epoch": 3.0172413793103448,
"grad_norm": 1.8572547435760498,
"learning_rate": 7.3968421052631585e-06,
"loss": 0.1584,
"step": 2975
},
{
"epoch": 3.0425963488843815,
"grad_norm": 2.6684017181396484,
"learning_rate": 7.370526315789474e-06,
"loss": 0.15,
"step": 3000
},
{
"epoch": 3.0425963488843815,
"eval_loss": 0.2813817858695984,
"eval_runtime": 7104.648,
"eval_samples_per_second": 0.685,
"eval_steps_per_second": 0.086,
"eval_wer": 59.24206364688593,
"step": 3000
},
{
"epoch": 0.760241266649912,
"grad_norm": 2.2316994667053223,
"learning_rate": 8.26e-06,
"loss": 0.1649,
"step": 3025
},
{
"epoch": 0.7665242523247047,
"grad_norm": 1.9445712566375732,
"learning_rate": 8.242758620689655e-06,
"loss": 0.1373,
"step": 3050
},
{
"epoch": 0.7728072379994974,
"grad_norm": 1.9695370197296143,
"learning_rate": 8.22551724137931e-06,
"loss": 0.1303,
"step": 3075
},
{
"epoch": 0.77909022367429,
"grad_norm": 2.19242262840271,
"learning_rate": 8.208275862068967e-06,
"loss": 0.1204,
"step": 3100
},
{
"epoch": 0.7853732093490827,
"grad_norm": 1.6107076406478882,
"learning_rate": 8.191034482758622e-06,
"loss": 0.1206,
"step": 3125
},
{
"epoch": 0.7916561950238753,
"grad_norm": 1.8804924488067627,
"learning_rate": 8.173793103448277e-06,
"loss": 0.116,
"step": 3150
},
{
"epoch": 0.797939180698668,
"grad_norm": 1.8132656812667847,
"learning_rate": 8.156551724137931e-06,
"loss": 0.1193,
"step": 3175
},
{
"epoch": 0.8042221663734607,
"grad_norm": 1.9557982683181763,
"learning_rate": 8.139310344827586e-06,
"loss": 0.1139,
"step": 3200
},
{
"epoch": 0.8105051520482534,
"grad_norm": 1.4428989887237549,
"learning_rate": 8.122068965517243e-06,
"loss": 0.1033,
"step": 3225
},
{
"epoch": 0.816788137723046,
"grad_norm": 1.4548839330673218,
"learning_rate": 8.104827586206898e-06,
"loss": 0.0975,
"step": 3250
},
{
"epoch": 0.8230711233978386,
"grad_norm": 1.8930418491363525,
"learning_rate": 8.087586206896553e-06,
"loss": 0.0997,
"step": 3275
},
{
"epoch": 0.8293541090726313,
"grad_norm": 1.905731439590454,
"learning_rate": 8.070344827586207e-06,
"loss": 0.1069,
"step": 3300
},
{
"epoch": 0.835637094747424,
"grad_norm": 1.7036992311477661,
"learning_rate": 8.053103448275862e-06,
"loss": 0.0921,
"step": 3325
},
{
"epoch": 0.8419200804222167,
"grad_norm": 1.7952214479446411,
"learning_rate": 8.035862068965517e-06,
"loss": 0.1027,
"step": 3350
},
{
"epoch": 0.8482030660970094,
"grad_norm": 2.0625522136688232,
"learning_rate": 8.018620689655174e-06,
"loss": 0.1051,
"step": 3375
},
{
"epoch": 0.8544860517718019,
"grad_norm": 1.8595600128173828,
"learning_rate": 8.001379310344829e-06,
"loss": 0.0968,
"step": 3400
},
{
"epoch": 0.8607690374465946,
"grad_norm": 1.5569204092025757,
"learning_rate": 7.984137931034484e-06,
"loss": 0.099,
"step": 3425
},
{
"epoch": 0.8670520231213873,
"grad_norm": 1.6187876462936401,
"learning_rate": 7.966896551724138e-06,
"loss": 0.0942,
"step": 3450
},
{
"epoch": 0.87333500879618,
"grad_norm": 1.494591474533081,
"learning_rate": 7.949655172413793e-06,
"loss": 0.096,
"step": 3475
},
{
"epoch": 0.8796179944709726,
"grad_norm": 1.5578557252883911,
"learning_rate": 7.932413793103448e-06,
"loss": 0.0885,
"step": 3500
},
{
"epoch": 0.8859009801457652,
"grad_norm": 2.047339677810669,
"learning_rate": 7.915172413793105e-06,
"loss": 0.098,
"step": 3525
},
{
"epoch": 0.8921839658205579,
"grad_norm": 1.7731865644454956,
"learning_rate": 7.89793103448276e-06,
"loss": 0.0889,
"step": 3550
},
{
"epoch": 0.8984669514953506,
"grad_norm": 2.1644279956817627,
"learning_rate": 7.880689655172414e-06,
"loss": 0.0864,
"step": 3575
},
{
"epoch": 0.9047499371701433,
"grad_norm": 1.8717072010040283,
"learning_rate": 7.86344827586207e-06,
"loss": 0.0847,
"step": 3600
},
{
"epoch": 0.9110329228449359,
"grad_norm": 1.481933832168579,
"learning_rate": 7.846206896551724e-06,
"loss": 0.0892,
"step": 3625
},
{
"epoch": 0.9173159085197286,
"grad_norm": 2.3373663425445557,
"learning_rate": 7.82896551724138e-06,
"loss": 0.0904,
"step": 3650
},
{
"epoch": 0.9235988941945212,
"grad_norm": 1.509282112121582,
"learning_rate": 7.811724137931036e-06,
"loss": 0.0973,
"step": 3675
},
{
"epoch": 0.9298818798693139,
"grad_norm": 1.7900352478027344,
"learning_rate": 7.79448275862069e-06,
"loss": 0.0815,
"step": 3700
},
{
"epoch": 0.9361648655441066,
"grad_norm": 1.6436471939086914,
"learning_rate": 7.777241379310345e-06,
"loss": 0.0853,
"step": 3725
},
{
"epoch": 0.9424478512188992,
"grad_norm": 1.4675796031951904,
"learning_rate": 7.76e-06,
"loss": 0.0876,
"step": 3750
},
{
"epoch": 0.9487308368936919,
"grad_norm": 1.8452798128128052,
"learning_rate": 7.742758620689655e-06,
"loss": 0.0808,
"step": 3775
},
{
"epoch": 0.9550138225684845,
"grad_norm": 1.3618487119674683,
"learning_rate": 7.725517241379312e-06,
"loss": 0.086,
"step": 3800
},
{
"epoch": 0.9612968082432772,
"grad_norm": 1.2715941667556763,
"learning_rate": 7.708275862068967e-06,
"loss": 0.088,
"step": 3825
},
{
"epoch": 0.9675797939180699,
"grad_norm": 1.8105791807174683,
"learning_rate": 7.691034482758621e-06,
"loss": 0.0899,
"step": 3850
},
{
"epoch": 0.9738627795928625,
"grad_norm": 2.1864423751831055,
"learning_rate": 7.673793103448276e-06,
"loss": 0.0857,
"step": 3875
},
{
"epoch": 0.9801457652676552,
"grad_norm": 1.5500792264938354,
"learning_rate": 7.656551724137931e-06,
"loss": 0.0859,
"step": 3900
},
{
"epoch": 0.9864287509424479,
"grad_norm": 1.8707691431045532,
"learning_rate": 7.639310344827588e-06,
"loss": 0.0882,
"step": 3925
},
{
"epoch": 0.9927117366172405,
"grad_norm": 1.5884675979614258,
"learning_rate": 7.622068965517242e-06,
"loss": 0.0832,
"step": 3950
},
{
"epoch": 0.9989947222920331,
"grad_norm": 1.444044828414917,
"learning_rate": 7.6048275862068975e-06,
"loss": 0.0786,
"step": 3975
},
{
"epoch": 1.0052777079668258,
"grad_norm": 1.6613413095474243,
"learning_rate": 7.588275862068966e-06,
"loss": 0.0694,
"step": 4000
},
{
"epoch": 1.0052777079668258,
"eval_loss": 0.13177034258842468,
"eval_runtime": 28232.4806,
"eval_samples_per_second": 0.692,
"eval_steps_per_second": 0.086,
"eval_wer": 40.84150014082778,
"step": 4000
},
{
"epoch": 1.0115606936416186,
"grad_norm": 1.5078247785568237,
"learning_rate": 7.571034482758622e-06,
"loss": 0.0786,
"step": 4025
},
{
"epoch": 1.0178436793164112,
"grad_norm": 2.1607069969177246,
"learning_rate": 7.553793103448277e-06,
"loss": 0.0748,
"step": 4050
},
{
"epoch": 1.0241266649912038,
"grad_norm": 1.3016276359558105,
"learning_rate": 7.5365517241379315e-06,
"loss": 0.0782,
"step": 4075
},
{
"epoch": 1.0304096506659965,
"grad_norm": 1.1447079181671143,
"learning_rate": 7.519310344827587e-06,
"loss": 0.0708,
"step": 4100
},
{
"epoch": 1.0366926363407891,
"grad_norm": 1.6340376138687134,
"learning_rate": 7.502068965517242e-06,
"loss": 0.0705,
"step": 4125
},
{
"epoch": 1.042975622015582,
"grad_norm": 1.5793544054031372,
"learning_rate": 7.484827586206898e-06,
"loss": 0.0723,
"step": 4150
},
{
"epoch": 1.0492586076903745,
"grad_norm": 1.2882245779037476,
"learning_rate": 7.467586206896552e-06,
"loss": 0.0692,
"step": 4175
},
{
"epoch": 1.055541593365167,
"grad_norm": 1.822688102722168,
"learning_rate": 7.4503448275862075e-06,
"loss": 0.071,
"step": 4200
},
{
"epoch": 1.0618245790399599,
"grad_norm": 1.410294771194458,
"learning_rate": 7.433103448275862e-06,
"loss": 0.0666,
"step": 4225
},
{
"epoch": 1.0681075647147524,
"grad_norm": 1.4143624305725098,
"learning_rate": 7.415862068965518e-06,
"loss": 0.0662,
"step": 4250
},
{
"epoch": 1.0743905503895452,
"grad_norm": 1.5564229488372803,
"learning_rate": 7.398620689655173e-06,
"loss": 0.0752,
"step": 4275
},
{
"epoch": 1.0806735360643378,
"grad_norm": 2.0174007415771484,
"learning_rate": 7.381379310344829e-06,
"loss": 0.0647,
"step": 4300
},
{
"epoch": 1.0869565217391304,
"grad_norm": 1.2202295064926147,
"learning_rate": 7.364137931034483e-06,
"loss": 0.0728,
"step": 4325
},
{
"epoch": 1.0932395074139232,
"grad_norm": 1.7252171039581299,
"learning_rate": 7.346896551724138e-06,
"loss": 0.0613,
"step": 4350
},
{
"epoch": 1.0995224930887157,
"grad_norm": 1.6477744579315186,
"learning_rate": 7.329655172413793e-06,
"loss": 0.0762,
"step": 4375
},
{
"epoch": 1.1058054787635083,
"grad_norm": 2.01273512840271,
"learning_rate": 7.312413793103449e-06,
"loss": 0.0723,
"step": 4400
},
{
"epoch": 1.1120884644383011,
"grad_norm": 2.0177805423736572,
"learning_rate": 7.295172413793105e-06,
"loss": 0.0713,
"step": 4425
},
{
"epoch": 1.1183714501130937,
"grad_norm": 1.0823686122894287,
"learning_rate": 7.27793103448276e-06,
"loss": 0.0699,
"step": 4450
},
{
"epoch": 1.1246544357878865,
"grad_norm": 1.8306968212127686,
"learning_rate": 7.2606896551724145e-06,
"loss": 0.0692,
"step": 4475
},
{
"epoch": 1.130937421462679,
"grad_norm": 1.9577009677886963,
"learning_rate": 7.243448275862069e-06,
"loss": 0.0707,
"step": 4500
},
{
"epoch": 1.1372204071374716,
"grad_norm": 1.8456298112869263,
"learning_rate": 7.226206896551725e-06,
"loss": 0.0702,
"step": 4525
},
{
"epoch": 1.1435033928122644,
"grad_norm": 1.369918942451477,
"learning_rate": 7.20896551724138e-06,
"loss": 0.0716,
"step": 4550
},
{
"epoch": 1.149786378487057,
"grad_norm": 1.7100856304168701,
"learning_rate": 7.191724137931036e-06,
"loss": 0.0773,
"step": 4575
},
{
"epoch": 1.1560693641618498,
"grad_norm": 1.2440359592437744,
"learning_rate": 7.17448275862069e-06,
"loss": 0.0736,
"step": 4600
},
{
"epoch": 1.1623523498366424,
"grad_norm": 1.26316237449646,
"learning_rate": 7.157241379310345e-06,
"loss": 0.0683,
"step": 4625
},
{
"epoch": 1.168635335511435,
"grad_norm": 1.6966075897216797,
"learning_rate": 7.14e-06,
"loss": 0.064,
"step": 4650
},
{
"epoch": 1.1749183211862277,
"grad_norm": 1.7377158403396606,
"learning_rate": 7.122758620689656e-06,
"loss": 0.0685,
"step": 4675
},
{
"epoch": 1.1812013068610203,
"grad_norm": 1.851913571357727,
"learning_rate": 7.105517241379311e-06,
"loss": 0.0586,
"step": 4700
},
{
"epoch": 1.1874842925358131,
"grad_norm": 1.6973158121109009,
"learning_rate": 7.0882758620689666e-06,
"loss": 0.0696,
"step": 4725
},
{
"epoch": 1.1937672782106057,
"grad_norm": 1.451185703277588,
"learning_rate": 7.0710344827586206e-06,
"loss": 0.0671,
"step": 4750
},
{
"epoch": 1.2000502638853983,
"grad_norm": 1.7117061614990234,
"learning_rate": 7.053793103448276e-06,
"loss": 0.0563,
"step": 4775
},
{
"epoch": 1.206333249560191,
"grad_norm": 1.4349240064620972,
"learning_rate": 7.036551724137931e-06,
"loss": 0.0696,
"step": 4800
},
{
"epoch": 1.2126162352349836,
"grad_norm": 1.5705769062042236,
"learning_rate": 7.019310344827587e-06,
"loss": 0.0634,
"step": 4825
},
{
"epoch": 1.2188992209097762,
"grad_norm": 1.5555791854858398,
"learning_rate": 7.002068965517243e-06,
"loss": 0.0665,
"step": 4850
},
{
"epoch": 1.225182206584569,
"grad_norm": 1.4251762628555298,
"learning_rate": 6.9848275862068975e-06,
"loss": 0.0636,
"step": 4875
},
{
"epoch": 1.2314651922593616,
"grad_norm": 1.7479225397109985,
"learning_rate": 6.967586206896552e-06,
"loss": 0.0736,
"step": 4900
},
{
"epoch": 1.2377481779341544,
"grad_norm": 1.4017658233642578,
"learning_rate": 6.950344827586207e-06,
"loss": 0.0642,
"step": 4925
},
{
"epoch": 1.244031163608947,
"grad_norm": 1.34666907787323,
"learning_rate": 6.933103448275863e-06,
"loss": 0.0647,
"step": 4950
},
{
"epoch": 1.2503141492837395,
"grad_norm": 1.6432591676712036,
"learning_rate": 6.915862068965518e-06,
"loss": 0.069,
"step": 4975
},
{
"epoch": 1.2565971349585323,
"grad_norm": 1.2850168943405151,
"learning_rate": 6.8986206896551735e-06,
"loss": 0.0638,
"step": 5000
},
{
"epoch": 1.2565971349585323,
"eval_loss": 0.12251746654510498,
"eval_runtime": 28262.6172,
"eval_samples_per_second": 0.691,
"eval_steps_per_second": 0.086,
"eval_wer": 38.8171910996844,
"step": 5000
},
{
"epoch": 1.2628801206333249,
"grad_norm": 1.6795750856399536,
"learning_rate": 6.8813793103448275e-06,
"loss": 0.069,
"step": 5025
},
{
"epoch": 1.2691631063081177,
"grad_norm": 1.418845772743225,
"learning_rate": 6.864137931034483e-06,
"loss": 0.0609,
"step": 5050
},
{
"epoch": 1.2754460919829103,
"grad_norm": 1.2165697813034058,
"learning_rate": 6.846896551724138e-06,
"loss": 0.0638,
"step": 5075
},
{
"epoch": 1.2817290776577028,
"grad_norm": 2.068269729614258,
"learning_rate": 6.829655172413794e-06,
"loss": 0.0673,
"step": 5100
},
{
"epoch": 1.2880120633324956,
"grad_norm": 1.2388827800750732,
"learning_rate": 6.812413793103449e-06,
"loss": 0.0701,
"step": 5125
},
{
"epoch": 1.2942950490072882,
"grad_norm": 1.2347630262374878,
"learning_rate": 6.7951724137931044e-06,
"loss": 0.065,
"step": 5150
},
{
"epoch": 1.300578034682081,
"grad_norm": 1.15070378780365,
"learning_rate": 6.7779310344827585e-06,
"loss": 0.0637,
"step": 5175
},
{
"epoch": 1.3068610203568736,
"grad_norm": 1.2261390686035156,
"learning_rate": 6.760689655172414e-06,
"loss": 0.0676,
"step": 5200
},
{
"epoch": 1.3131440060316661,
"grad_norm": 1.9254522323608398,
"learning_rate": 6.74344827586207e-06,
"loss": 0.0594,
"step": 5225
},
{
"epoch": 1.319426991706459,
"grad_norm": 1.3794054985046387,
"learning_rate": 6.726206896551725e-06,
"loss": 0.0649,
"step": 5250
},
{
"epoch": 1.3257099773812515,
"grad_norm": 1.6740128993988037,
"learning_rate": 6.7089655172413805e-06,
"loss": 0.0619,
"step": 5275
},
{
"epoch": 1.3319929630560443,
"grad_norm": 1.2363388538360596,
"learning_rate": 6.691724137931035e-06,
"loss": 0.0646,
"step": 5300
},
{
"epoch": 1.3382759487308369,
"grad_norm": 1.433228850364685,
"learning_rate": 6.67448275862069e-06,
"loss": 0.0663,
"step": 5325
},
{
"epoch": 1.3445589344056295,
"grad_norm": 1.898812174797058,
"learning_rate": 6.657241379310345e-06,
"loss": 0.0574,
"step": 5350
},
{
"epoch": 1.3508419200804223,
"grad_norm": 1.3165233135223389,
"learning_rate": 6.640000000000001e-06,
"loss": 0.0597,
"step": 5375
},
{
"epoch": 1.3571249057552148,
"grad_norm": 1.8794306516647339,
"learning_rate": 6.622758620689656e-06,
"loss": 0.0697,
"step": 5400
},
{
"epoch": 1.3634078914300076,
"grad_norm": 1.1872018575668335,
"learning_rate": 6.605517241379311e-06,
"loss": 0.0587,
"step": 5425
},
{
"epoch": 1.3696908771048002,
"grad_norm": 1.639711856842041,
"learning_rate": 6.588275862068965e-06,
"loss": 0.0645,
"step": 5450
},
{
"epoch": 1.3759738627795928,
"grad_norm": 1.2173725366592407,
"learning_rate": 6.571034482758621e-06,
"loss": 0.055,
"step": 5475
},
{
"epoch": 1.3822568484543856,
"grad_norm": 1.2602193355560303,
"learning_rate": 6.553793103448276e-06,
"loss": 0.0543,
"step": 5500
},
{
"epoch": 1.3885398341291781,
"grad_norm": 1.350376009941101,
"learning_rate": 6.536551724137932e-06,
"loss": 0.0675,
"step": 5525
},
{
"epoch": 1.394822819803971,
"grad_norm": 1.314760446548462,
"learning_rate": 6.519310344827587e-06,
"loss": 0.0597,
"step": 5550
},
{
"epoch": 1.4011058054787635,
"grad_norm": 1.0545654296875,
"learning_rate": 6.502068965517242e-06,
"loss": 0.0553,
"step": 5575
},
{
"epoch": 1.407388791153556,
"grad_norm": 1.2332383394241333,
"learning_rate": 6.484827586206896e-06,
"loss": 0.0607,
"step": 5600
},
{
"epoch": 1.4136717768283489,
"grad_norm": 1.3052699565887451,
"learning_rate": 6.467586206896552e-06,
"loss": 0.0689,
"step": 5625
},
{
"epoch": 1.4199547625031415,
"grad_norm": 1.505159854888916,
"learning_rate": 6.450344827586208e-06,
"loss": 0.0585,
"step": 5650
},
{
"epoch": 1.4262377481779343,
"grad_norm": 1.2076035737991333,
"learning_rate": 6.433103448275863e-06,
"loss": 0.0624,
"step": 5675
},
{
"epoch": 1.4325207338527268,
"grad_norm": 0.716097354888916,
"learning_rate": 6.415862068965518e-06,
"loss": 0.056,
"step": 5700
},
{
"epoch": 1.4388037195275194,
"grad_norm": 0.8870618939399719,
"learning_rate": 6.398620689655173e-06,
"loss": 0.0592,
"step": 5725
},
{
"epoch": 1.4450867052023122,
"grad_norm": 2.086239814758301,
"learning_rate": 6.381379310344828e-06,
"loss": 0.0647,
"step": 5750
},
{
"epoch": 1.4513696908771048,
"grad_norm": 1.2493882179260254,
"learning_rate": 6.364137931034483e-06,
"loss": 0.0664,
"step": 5775
},
{
"epoch": 1.4576526765518976,
"grad_norm": 0.78863924741745,
"learning_rate": 6.346896551724139e-06,
"loss": 0.0545,
"step": 5800
},
{
"epoch": 1.4639356622266901,
"grad_norm": 1.2974257469177246,
"learning_rate": 6.3296551724137935e-06,
"loss": 0.0643,
"step": 5825
},
{
"epoch": 1.4702186479014827,
"grad_norm": 1.220800518989563,
"learning_rate": 6.312413793103449e-06,
"loss": 0.0664,
"step": 5850
},
{
"epoch": 1.4765016335762755,
"grad_norm": 1.9717583656311035,
"learning_rate": 6.295172413793103e-06,
"loss": 0.0585,
"step": 5875
},
{
"epoch": 1.482784619251068,
"grad_norm": 1.9224556684494019,
"learning_rate": 6.277931034482759e-06,
"loss": 0.0607,
"step": 5900
},
{
"epoch": 1.4890676049258609,
"grad_norm": 1.3678847551345825,
"learning_rate": 6.260689655172414e-06,
"loss": 0.0619,
"step": 5925
},
{
"epoch": 1.4953505906006535,
"grad_norm": 1.2710736989974976,
"learning_rate": 6.24344827586207e-06,
"loss": 0.0615,
"step": 5950
},
{
"epoch": 1.501633576275446,
"grad_norm": 1.1889654397964478,
"learning_rate": 6.2262068965517245e-06,
"loss": 0.0595,
"step": 5975
},
{
"epoch": 1.5079165619502386,
"grad_norm": 1.165711760520935,
"learning_rate": 6.20896551724138e-06,
"loss": 0.0529,
"step": 6000
},
{
"epoch": 1.5079165619502386,
"eval_loss": 0.12242772430181503,
"eval_runtime": 28299.0323,
"eval_samples_per_second": 0.69,
"eval_steps_per_second": 0.086,
"eval_wer": 37.89097763366001,
"step": 6000
},
{
"epoch": 1.5141995476250314,
"grad_norm": 1.0195108652114868,
"learning_rate": 6.191724137931034e-06,
"loss": 0.0586,
"step": 6025
},
{
"epoch": 1.5204825332998242,
"grad_norm": 1.1056115627288818,
"learning_rate": 6.17448275862069e-06,
"loss": 0.0576,
"step": 6050
},
{
"epoch": 1.5267655189746168,
"grad_norm": 1.2018024921417236,
"learning_rate": 6.157241379310346e-06,
"loss": 0.0641,
"step": 6075
},
{
"epoch": 1.5330485046494093,
"grad_norm": 1.426589846611023,
"learning_rate": 6.1400000000000005e-06,
"loss": 0.0575,
"step": 6100
},
{
"epoch": 1.539331490324202,
"grad_norm": 1.0339651107788086,
"learning_rate": 6.122758620689656e-06,
"loss": 0.0459,
"step": 6125
},
{
"epoch": 1.5456144759989947,
"grad_norm": 1.3441721200942993,
"learning_rate": 6.105517241379311e-06,
"loss": 0.0588,
"step": 6150
},
{
"epoch": 1.5518974616737875,
"grad_norm": 1.2048940658569336,
"learning_rate": 6.088275862068966e-06,
"loss": 0.0573,
"step": 6175
},
{
"epoch": 1.55818044734858,
"grad_norm": 1.5876215696334839,
"learning_rate": 6.071034482758621e-06,
"loss": 0.0616,
"step": 6200
},
{
"epoch": 1.5644634330233727,
"grad_norm": 1.1515843868255615,
"learning_rate": 6.0537931034482766e-06,
"loss": 0.0566,
"step": 6225
},
{
"epoch": 1.5707464186981652,
"grad_norm": 1.2697322368621826,
"learning_rate": 6.036551724137931e-06,
"loss": 0.0654,
"step": 6250
},
{
"epoch": 1.577029404372958,
"grad_norm": 0.8662827014923096,
"learning_rate": 6.019310344827587e-06,
"loss": 0.0568,
"step": 6275
},
{
"epoch": 1.5833123900477508,
"grad_norm": 1.5702407360076904,
"learning_rate": 6.002068965517241e-06,
"loss": 0.0563,
"step": 6300
},
{
"epoch": 1.5895953757225434,
"grad_norm": 1.2121763229370117,
"learning_rate": 5.984827586206897e-06,
"loss": 0.0638,
"step": 6325
},
{
"epoch": 1.595878361397336,
"grad_norm": 1.257488489151001,
"learning_rate": 5.967586206896552e-06,
"loss": 0.0577,
"step": 6350
},
{
"epoch": 1.6021613470721285,
"grad_norm": 1.1616463661193848,
"learning_rate": 5.9503448275862075e-06,
"loss": 0.0604,
"step": 6375
},
{
"epoch": 1.6084443327469213,
"grad_norm": 1.3494690656661987,
"learning_rate": 5.933103448275862e-06,
"loss": 0.0628,
"step": 6400
},
{
"epoch": 1.6147273184217141,
"grad_norm": 1.5194650888442993,
"learning_rate": 5.915862068965518e-06,
"loss": 0.0549,
"step": 6425
},
{
"epoch": 1.6210103040965067,
"grad_norm": 1.3437527418136597,
"learning_rate": 5.898620689655174e-06,
"loss": 0.0579,
"step": 6450
},
{
"epoch": 1.6272932897712993,
"grad_norm": 1.445145845413208,
"learning_rate": 5.881379310344828e-06,
"loss": 0.0619,
"step": 6475
},
{
"epoch": 1.6335762754460919,
"grad_norm": 1.3654954433441162,
"learning_rate": 5.8641379310344835e-06,
"loss": 0.0572,
"step": 6500
},
{
"epoch": 1.6398592611208846,
"grad_norm": 1.4988460540771484,
"learning_rate": 5.846896551724138e-06,
"loss": 0.0599,
"step": 6525
},
{
"epoch": 1.6461422467956774,
"grad_norm": 1.3341230154037476,
"learning_rate": 5.829655172413794e-06,
"loss": 0.0555,
"step": 6550
},
{
"epoch": 1.65242523247047,
"grad_norm": 1.2729185819625854,
"learning_rate": 5.812413793103449e-06,
"loss": 0.0542,
"step": 6575
},
{
"epoch": 1.6587082181452626,
"grad_norm": 1.7550246715545654,
"learning_rate": 5.795172413793104e-06,
"loss": 0.0582,
"step": 6600
},
{
"epoch": 1.6649912038200552,
"grad_norm": 1.5241115093231201,
"learning_rate": 5.777931034482759e-06,
"loss": 0.063,
"step": 6625
},
{
"epoch": 1.671274189494848,
"grad_norm": 1.8420275449752808,
"learning_rate": 5.7606896551724144e-06,
"loss": 0.0656,
"step": 6650
},
{
"epoch": 1.6775571751696408,
"grad_norm": 1.0507006645202637,
"learning_rate": 5.743448275862069e-06,
"loss": 0.0562,
"step": 6675
},
{
"epoch": 1.6838401608444333,
"grad_norm": 2.0767569541931152,
"learning_rate": 5.726206896551725e-06,
"loss": 0.0582,
"step": 6700
},
{
"epoch": 1.690123146519226,
"grad_norm": 0.8954183459281921,
"learning_rate": 5.708965517241379e-06,
"loss": 0.0602,
"step": 6725
},
{
"epoch": 1.6964061321940185,
"grad_norm": 0.9078446626663208,
"learning_rate": 5.691724137931035e-06,
"loss": 0.0529,
"step": 6750
},
{
"epoch": 1.7026891178688113,
"grad_norm": 1.581921935081482,
"learning_rate": 5.67448275862069e-06,
"loss": 0.0548,
"step": 6775
},
{
"epoch": 1.708972103543604,
"grad_norm": 1.4554569721221924,
"learning_rate": 5.657241379310345e-06,
"loss": 0.0563,
"step": 6800
},
{
"epoch": 1.7152550892183966,
"grad_norm": 0.9179530739784241,
"learning_rate": 5.64e-06,
"loss": 0.0544,
"step": 6825
},
{
"epoch": 1.7215380748931892,
"grad_norm": 1.1374155282974243,
"learning_rate": 5.622758620689656e-06,
"loss": 0.057,
"step": 6850
},
{
"epoch": 1.7278210605679818,
"grad_norm": 1.0349596738815308,
"learning_rate": 5.605517241379312e-06,
"loss": 0.0573,
"step": 6875
},
{
"epoch": 1.7341040462427746,
"grad_norm": 1.4229092597961426,
"learning_rate": 5.588275862068966e-06,
"loss": 0.0487,
"step": 6900
},
{
"epoch": 1.7403870319175674,
"grad_norm": 1.2893837690353394,
"learning_rate": 5.571034482758621e-06,
"loss": 0.0605,
"step": 6925
},
{
"epoch": 1.74667001759236,
"grad_norm": 1.1475664377212524,
"learning_rate": 5.553793103448276e-06,
"loss": 0.0577,
"step": 6950
},
{
"epoch": 1.7529530032671525,
"grad_norm": 1.1052597761154175,
"learning_rate": 5.536551724137932e-06,
"loss": 0.0531,
"step": 6975
},
{
"epoch": 1.759235988941945,
"grad_norm": 1.0279254913330078,
"learning_rate": 5.519310344827587e-06,
"loss": 0.0624,
"step": 7000
},
{
"epoch": 1.759235988941945,
"eval_loss": 0.12031704932451248,
"eval_runtime": 28303.9606,
"eval_samples_per_second": 0.69,
"eval_steps_per_second": 0.086,
"eval_wer": 37.24605862768746,
"step": 7000
},
{
"epoch": 1.765518974616738,
"grad_norm": 1.0434989929199219,
"learning_rate": 5.502068965517242e-06,
"loss": 0.0488,
"step": 7025
},
{
"epoch": 1.7718019602915307,
"grad_norm": 1.1990073919296265,
"learning_rate": 5.484827586206897e-06,
"loss": 0.0525,
"step": 7050
},
{
"epoch": 1.7780849459663233,
"grad_norm": 1.026079535484314,
"learning_rate": 5.467586206896552e-06,
"loss": 0.0531,
"step": 7075
},
{
"epoch": 1.7843679316411158,
"grad_norm": 0.9900615215301514,
"learning_rate": 5.450344827586207e-06,
"loss": 0.0545,
"step": 7100
},
{
"epoch": 1.7906509173159084,
"grad_norm": 1.5279738903045654,
"learning_rate": 5.433103448275863e-06,
"loss": 0.0566,
"step": 7125
},
{
"epoch": 1.7969339029907012,
"grad_norm": 0.8226191401481628,
"learning_rate": 5.415862068965517e-06,
"loss": 0.056,
"step": 7150
},
{
"epoch": 1.8032168886654938,
"grad_norm": 1.3544007539749146,
"learning_rate": 5.398620689655173e-06,
"loss": 0.061,
"step": 7175
},
{
"epoch": 1.8094998743402866,
"grad_norm": 1.2771939039230347,
"learning_rate": 5.3813793103448275e-06,
"loss": 0.0542,
"step": 7200
},
{
"epoch": 1.8157828600150792,
"grad_norm": 1.200951099395752,
"learning_rate": 5.364137931034483e-06,
"loss": 0.0544,
"step": 7225
},
{
"epoch": 1.8220658456898717,
"grad_norm": 0.9072504639625549,
"learning_rate": 5.346896551724139e-06,
"loss": 0.0538,
"step": 7250
},
{
"epoch": 1.8283488313646645,
"grad_norm": 1.088958978652954,
"learning_rate": 5.329655172413794e-06,
"loss": 0.054,
"step": 7275
},
{
"epoch": 1.834631817039457,
"grad_norm": 1.359937071800232,
"learning_rate": 5.3124137931034495e-06,
"loss": 0.0623,
"step": 7300
},
{
"epoch": 1.84091480271425,
"grad_norm": 1.467264175415039,
"learning_rate": 5.2951724137931035e-06,
"loss": 0.0536,
"step": 7325
},
{
"epoch": 1.8471977883890425,
"grad_norm": 1.4082632064819336,
"learning_rate": 5.277931034482759e-06,
"loss": 0.051,
"step": 7350
},
{
"epoch": 1.853480774063835,
"grad_norm": 1.472396969795227,
"learning_rate": 5.260689655172414e-06,
"loss": 0.0662,
"step": 7375
},
{
"epoch": 1.8597637597386278,
"grad_norm": 1.587661862373352,
"learning_rate": 5.24344827586207e-06,
"loss": 0.055,
"step": 7400
},
{
"epoch": 1.8660467454134204,
"grad_norm": 1.0919044017791748,
"learning_rate": 5.226206896551725e-06,
"loss": 0.0509,
"step": 7425
},
{
"epoch": 1.8723297310882132,
"grad_norm": 0.9456779956817627,
"learning_rate": 5.20896551724138e-06,
"loss": 0.0557,
"step": 7450
},
{
"epoch": 1.8786127167630058,
"grad_norm": 1.6846345663070679,
"learning_rate": 5.1917241379310345e-06,
"loss": 0.0531,
"step": 7475
},
{
"epoch": 1.8848957024377984,
"grad_norm": 0.5773513317108154,
"learning_rate": 5.17448275862069e-06,
"loss": 0.0554,
"step": 7500
},
{
"epoch": 1.8911786881125912,
"grad_norm": 0.977917492389679,
"learning_rate": 5.157241379310345e-06,
"loss": 0.0561,
"step": 7525
},
{
"epoch": 1.8974616737873837,
"grad_norm": 1.2408355474472046,
"learning_rate": 5.140000000000001e-06,
"loss": 0.0615,
"step": 7550
},
{
"epoch": 1.9037446594621765,
"grad_norm": 1.273364543914795,
"learning_rate": 5.122758620689655e-06,
"loss": 0.0572,
"step": 7575
},
{
"epoch": 1.910027645136969,
"grad_norm": 1.2105774879455566,
"learning_rate": 5.1055172413793105e-06,
"loss": 0.0469,
"step": 7600
},
{
"epoch": 1.9163106308117617,
"grad_norm": 0.6686076521873474,
"learning_rate": 5.088275862068965e-06,
"loss": 0.0596,
"step": 7625
},
{
"epoch": 1.9225936164865542,
"grad_norm": 1.473767876625061,
"learning_rate": 5.071034482758621e-06,
"loss": 0.0558,
"step": 7650
},
{
"epoch": 1.928876602161347,
"grad_norm": 1.183693289756775,
"learning_rate": 5.053793103448277e-06,
"loss": 0.0578,
"step": 7675
},
{
"epoch": 1.9351595878361398,
"grad_norm": 1.661081075668335,
"learning_rate": 5.036551724137932e-06,
"loss": 0.0577,
"step": 7700
},
{
"epoch": 1.9414425735109324,
"grad_norm": 1.035583734512329,
"learning_rate": 5.019310344827587e-06,
"loss": 0.0504,
"step": 7725
},
{
"epoch": 1.947725559185725,
"grad_norm": 1.2706879377365112,
"learning_rate": 5.002068965517241e-06,
"loss": 0.0523,
"step": 7750
},
{
"epoch": 1.9540085448605176,
"grad_norm": 1.558969497680664,
"learning_rate": 4.984827586206897e-06,
"loss": 0.0527,
"step": 7775
},
{
"epoch": 1.9602915305353104,
"grad_norm": 2.107837677001953,
"learning_rate": 4.967586206896552e-06,
"loss": 0.0496,
"step": 7800
},
{
"epoch": 1.9665745162101032,
"grad_norm": 1.1281065940856934,
"learning_rate": 4.950344827586207e-06,
"loss": 0.0495,
"step": 7825
},
{
"epoch": 1.9728575018848957,
"grad_norm": 0.92071133852005,
"learning_rate": 4.933103448275863e-06,
"loss": 0.0543,
"step": 7850
},
{
"epoch": 1.9791404875596883,
"grad_norm": 1.5125892162322998,
"learning_rate": 4.9158620689655175e-06,
"loss": 0.0567,
"step": 7875
},
{
"epoch": 1.9854234732344809,
"grad_norm": 1.4018179178237915,
"learning_rate": 4.898620689655173e-06,
"loss": 0.0577,
"step": 7900
},
{
"epoch": 1.9917064589092737,
"grad_norm": 1.599665880203247,
"learning_rate": 4.881379310344828e-06,
"loss": 0.0511,
"step": 7925
},
{
"epoch": 1.9979894445840665,
"grad_norm": 1.3747309446334839,
"learning_rate": 4.864137931034483e-06,
"loss": 0.0545,
"step": 7950
},
{
"epoch": 2.004272430258859,
"grad_norm": 1.2372041940689087,
"learning_rate": 4.846896551724139e-06,
"loss": 0.0434,
"step": 7975
},
{
"epoch": 2.0105554159336516,
"grad_norm": 1.0974595546722412,
"learning_rate": 4.8296551724137935e-06,
"loss": 0.0426,
"step": 8000
},
{
"epoch": 2.0105554159336516,
"eval_loss": 0.1260567456483841,
"eval_runtime": 28488.8565,
"eval_samples_per_second": 0.686,
"eval_steps_per_second": 0.086,
"eval_wer": 36.9741526861996,
"step": 8000
},
{
"epoch": 2.016838401608444,
"grad_norm": 0.5434245467185974,
"learning_rate": 4.812413793103448e-06,
"loss": 0.0372,
"step": 8025
},
{
"epoch": 2.023121387283237,
"grad_norm": 1.4904873371124268,
"learning_rate": 4.795172413793104e-06,
"loss": 0.0322,
"step": 8050
},
{
"epoch": 2.02940437295803,
"grad_norm": 0.8786129355430603,
"learning_rate": 4.777931034482759e-06,
"loss": 0.0378,
"step": 8075
},
{
"epoch": 2.0356873586328224,
"grad_norm": 1.062193512916565,
"learning_rate": 4.760689655172414e-06,
"loss": 0.0335,
"step": 8100
},
{
"epoch": 2.041970344307615,
"grad_norm": 1.1229702234268188,
"learning_rate": 4.7434482758620696e-06,
"loss": 0.0341,
"step": 8125
},
{
"epoch": 2.0482533299824075,
"grad_norm": 1.5959960222244263,
"learning_rate": 4.726206896551724e-06,
"loss": 0.0326,
"step": 8150
},
{
"epoch": 2.0545363156572005,
"grad_norm": 1.3407766819000244,
"learning_rate": 4.708965517241379e-06,
"loss": 0.0282,
"step": 8175
},
{
"epoch": 2.060819301331993,
"grad_norm": 0.8177748322486877,
"learning_rate": 4.691724137931035e-06,
"loss": 0.0334,
"step": 8200
},
{
"epoch": 2.0671022870067857,
"grad_norm": 0.5434231162071228,
"learning_rate": 4.67448275862069e-06,
"loss": 0.0332,
"step": 8225
},
{
"epoch": 2.0733852726815782,
"grad_norm": 0.6810811758041382,
"learning_rate": 4.657241379310346e-06,
"loss": 0.0343,
"step": 8250
},
{
"epoch": 2.079668258356371,
"grad_norm": 1.3621633052825928,
"learning_rate": 4.6400000000000005e-06,
"loss": 0.0358,
"step": 8275
},
{
"epoch": 2.085951244031164,
"grad_norm": 0.8485309481620789,
"learning_rate": 4.622758620689655e-06,
"loss": 0.0283,
"step": 8300
},
{
"epoch": 2.0922342297059564,
"grad_norm": 1.2133398056030273,
"learning_rate": 4.605517241379311e-06,
"loss": 0.0342,
"step": 8325
},
{
"epoch": 2.098517215380749,
"grad_norm": 1.9074926376342773,
"learning_rate": 4.588275862068966e-06,
"loss": 0.0372,
"step": 8350
},
{
"epoch": 2.1048002010555416,
"grad_norm": 1.3371448516845703,
"learning_rate": 4.571034482758621e-06,
"loss": 0.0356,
"step": 8375
},
{
"epoch": 2.111083186730334,
"grad_norm": 1.3409150838851929,
"learning_rate": 4.5537931034482765e-06,
"loss": 0.0354,
"step": 8400
},
{
"epoch": 2.117366172405127,
"grad_norm": 1.1407537460327148,
"learning_rate": 4.536551724137931e-06,
"loss": 0.0338,
"step": 8425
},
{
"epoch": 2.1236491580799197,
"grad_norm": 1.0432764291763306,
"learning_rate": 4.519310344827586e-06,
"loss": 0.0325,
"step": 8450
},
{
"epoch": 2.1299321437547123,
"grad_norm": 1.2592930793762207,
"learning_rate": 4.502068965517242e-06,
"loss": 0.0374,
"step": 8475
},
{
"epoch": 2.136215129429505,
"grad_norm": 0.9935320019721985,
"learning_rate": 4.484827586206897e-06,
"loss": 0.0349,
"step": 8500
},
{
"epoch": 2.1424981151042974,
"grad_norm": 1.1453524827957153,
"learning_rate": 4.467586206896552e-06,
"loss": 0.0357,
"step": 8525
},
{
"epoch": 2.1487811007790905,
"grad_norm": 0.8577796816825867,
"learning_rate": 4.4503448275862074e-06,
"loss": 0.0349,
"step": 8550
},
{
"epoch": 2.155064086453883,
"grad_norm": 1.0337741374969482,
"learning_rate": 4.433103448275862e-06,
"loss": 0.0321,
"step": 8575
},
{
"epoch": 2.1613470721286756,
"grad_norm": 1.4780592918395996,
"learning_rate": 4.415862068965517e-06,
"loss": 0.0359,
"step": 8600
},
{
"epoch": 2.167630057803468,
"grad_norm": 1.6528609991073608,
"learning_rate": 4.398620689655173e-06,
"loss": 0.0384,
"step": 8625
},
{
"epoch": 2.1739130434782608,
"grad_norm": 0.7156565189361572,
"learning_rate": 4.381379310344829e-06,
"loss": 0.0353,
"step": 8650
},
{
"epoch": 2.1801960291530533,
"grad_norm": 1.1753544807434082,
"learning_rate": 4.3641379310344835e-06,
"loss": 0.031,
"step": 8675
},
{
"epoch": 2.1864790148278463,
"grad_norm": 0.7453944087028503,
"learning_rate": 4.346896551724138e-06,
"loss": 0.0348,
"step": 8700
},
{
"epoch": 2.192762000502639,
"grad_norm": 1.872745156288147,
"learning_rate": 4.329655172413793e-06,
"loss": 0.0351,
"step": 8725
},
{
"epoch": 2.1990449861774315,
"grad_norm": 0.6683670282363892,
"learning_rate": 4.312413793103449e-06,
"loss": 0.0351,
"step": 8750
},
{
"epoch": 2.205327971852224,
"grad_norm": 1.3862112760543823,
"learning_rate": 4.295172413793104e-06,
"loss": 0.0333,
"step": 8775
},
{
"epoch": 2.2116109575270166,
"grad_norm": 1.027766466140747,
"learning_rate": 4.277931034482759e-06,
"loss": 0.0287,
"step": 8800
},
{
"epoch": 2.2178939432018097,
"grad_norm": 0.8764299154281616,
"learning_rate": 4.260689655172414e-06,
"loss": 0.0324,
"step": 8825
},
{
"epoch": 2.2241769288766022,
"grad_norm": 0.8723062872886658,
"learning_rate": 4.243448275862069e-06,
"loss": 0.0283,
"step": 8850
},
{
"epoch": 2.230459914551395,
"grad_norm": 1.1235435009002686,
"learning_rate": 4.226206896551724e-06,
"loss": 0.0376,
"step": 8875
},
{
"epoch": 2.2367429002261874,
"grad_norm": 1.0615513324737549,
"learning_rate": 4.20896551724138e-06,
"loss": 0.0354,
"step": 8900
},
{
"epoch": 2.24302588590098,
"grad_norm": 0.8142825365066528,
"learning_rate": 4.191724137931035e-06,
"loss": 0.0274,
"step": 8925
},
{
"epoch": 2.249308871575773,
"grad_norm": 1.7816015481948853,
"learning_rate": 4.17448275862069e-06,
"loss": 0.0349,
"step": 8950
},
{
"epoch": 2.2555918572505655,
"grad_norm": 1.0881839990615845,
"learning_rate": 4.157241379310345e-06,
"loss": 0.0344,
"step": 8975
},
{
"epoch": 2.261874842925358,
"grad_norm": 0.554862916469574,
"learning_rate": 4.14e-06,
"loss": 0.0305,
"step": 9000
},
{
"epoch": 2.261874842925358,
"eval_loss": 0.1346791386604309,
"eval_runtime": 28846.9785,
"eval_samples_per_second": 0.677,
"eval_steps_per_second": 0.085,
"eval_wer": 36.786382314919805,
"step": 9000
},
{
"epoch": 2.2681578286001507,
"grad_norm": 1.2704734802246094,
"learning_rate": 4.122758620689655e-06,
"loss": 0.0334,
"step": 9025
},
{
"epoch": 2.2744408142749433,
"grad_norm": 1.3331466913223267,
"learning_rate": 4.105517241379311e-06,
"loss": 0.0343,
"step": 9050
},
{
"epoch": 2.2807237999497363,
"grad_norm": 0.3723588287830353,
"learning_rate": 4.0882758620689665e-06,
"loss": 0.0343,
"step": 9075
},
{
"epoch": 2.287006785624529,
"grad_norm": 2.03139591217041,
"learning_rate": 4.071034482758621e-06,
"loss": 0.034,
"step": 9100
},
{
"epoch": 2.2932897712993214,
"grad_norm": 1.230726718902588,
"learning_rate": 4.053793103448276e-06,
"loss": 0.0319,
"step": 9125
},
{
"epoch": 2.299572756974114,
"grad_norm": 1.408710241317749,
"learning_rate": 4.036551724137931e-06,
"loss": 0.0267,
"step": 9150
},
{
"epoch": 2.3058557426489066,
"grad_norm": 1.5532382726669312,
"learning_rate": 4.019310344827587e-06,
"loss": 0.0335,
"step": 9175
},
{
"epoch": 2.3121387283236996,
"grad_norm": 1.1856962442398071,
"learning_rate": 4.002068965517242e-06,
"loss": 0.0346,
"step": 9200
},
{
"epoch": 2.318421713998492,
"grad_norm": 1.0790759325027466,
"learning_rate": 3.9848275862068965e-06,
"loss": 0.0339,
"step": 9225
},
{
"epoch": 2.3247046996732847,
"grad_norm": 0.7154790759086609,
"learning_rate": 3.967586206896552e-06,
"loss": 0.0353,
"step": 9250
},
{
"epoch": 2.3309876853480773,
"grad_norm": 0.8203781843185425,
"learning_rate": 3.950344827586207e-06,
"loss": 0.0392,
"step": 9275
},
{
"epoch": 2.33727067102287,
"grad_norm": 1.3644154071807861,
"learning_rate": 3.933103448275862e-06,
"loss": 0.0333,
"step": 9300
},
{
"epoch": 2.343553656697663,
"grad_norm": 0.7084758877754211,
"learning_rate": 3.915862068965518e-06,
"loss": 0.0309,
"step": 9325
},
{
"epoch": 2.3498366423724555,
"grad_norm": 0.7974054217338562,
"learning_rate": 3.898620689655173e-06,
"loss": 0.0337,
"step": 9350
},
{
"epoch": 2.356119628047248,
"grad_norm": 0.9612919092178345,
"learning_rate": 3.8813793103448275e-06,
"loss": 0.0358,
"step": 9375
},
{
"epoch": 2.3624026137220406,
"grad_norm": 1.3854459524154663,
"learning_rate": 3.864137931034483e-06,
"loss": 0.0299,
"step": 9400
},
{
"epoch": 2.368685599396833,
"grad_norm": 1.1167787313461304,
"learning_rate": 3.846896551724138e-06,
"loss": 0.0337,
"step": 9425
},
{
"epoch": 2.3749685850716262,
"grad_norm": 0.9742668271064758,
"learning_rate": 3.829655172413793e-06,
"loss": 0.0286,
"step": 9450
},
{
"epoch": 2.381251570746419,
"grad_norm": 1.435309886932373,
"learning_rate": 3.8124137931034486e-06,
"loss": 0.0305,
"step": 9475
},
{
"epoch": 2.3875345564212114,
"grad_norm": 1.1362321376800537,
"learning_rate": 3.795172413793104e-06,
"loss": 0.0313,
"step": 9500
},
{
"epoch": 2.393817542096004,
"grad_norm": 1.0465309619903564,
"learning_rate": 3.7779310344827592e-06,
"loss": 0.0364,
"step": 9525
},
{
"epoch": 2.4001005277707965,
"grad_norm": 1.4722024202346802,
"learning_rate": 3.760689655172414e-06,
"loss": 0.0451,
"step": 9550
},
{
"epoch": 2.406383513445589,
"grad_norm": 1.1084930896759033,
"learning_rate": 3.7434482758620694e-06,
"loss": 0.0349,
"step": 9575
},
{
"epoch": 2.412666499120382,
"grad_norm": 1.4382020235061646,
"learning_rate": 3.7262068965517247e-06,
"loss": 0.0338,
"step": 9600
},
{
"epoch": 2.4189494847951747,
"grad_norm": 1.6590332984924316,
"learning_rate": 3.7089655172413795e-06,
"loss": 0.0335,
"step": 9625
},
{
"epoch": 2.4252324704699673,
"grad_norm": 0.7975425720214844,
"learning_rate": 3.691724137931035e-06,
"loss": 0.0371,
"step": 9650
},
{
"epoch": 2.43151545614476,
"grad_norm": 0.9135144352912903,
"learning_rate": 3.67448275862069e-06,
"loss": 0.0384,
"step": 9675
},
{
"epoch": 2.4377984418195524,
"grad_norm": 1.74324369430542,
"learning_rate": 3.657241379310345e-06,
"loss": 0.029,
"step": 9700
},
{
"epoch": 2.4440814274943454,
"grad_norm": 0.9986597299575806,
"learning_rate": 3.6400000000000003e-06,
"loss": 0.0363,
"step": 9725
},
{
"epoch": 2.450364413169138,
"grad_norm": 0.8304340839385986,
"learning_rate": 3.622758620689655e-06,
"loss": 0.0337,
"step": 9750
},
{
"epoch": 2.4566473988439306,
"grad_norm": 0.6401971578598022,
"learning_rate": 3.6055172413793105e-06,
"loss": 0.0309,
"step": 9775
},
{
"epoch": 2.462930384518723,
"grad_norm": 1.2303663492202759,
"learning_rate": 3.5882758620689658e-06,
"loss": 0.0333,
"step": 9800
},
{
"epoch": 2.4692133701935157,
"grad_norm": 1.2973604202270508,
"learning_rate": 3.5710344827586206e-06,
"loss": 0.0346,
"step": 9825
},
{
"epoch": 2.4754963558683087,
"grad_norm": 1.0538204908370972,
"learning_rate": 3.553793103448276e-06,
"loss": 0.0335,
"step": 9850
},
{
"epoch": 2.4817793415431013,
"grad_norm": 1.006469964981079,
"learning_rate": 3.5365517241379316e-06,
"loss": 0.0319,
"step": 9875
},
{
"epoch": 2.488062327217894,
"grad_norm": 1.2951979637145996,
"learning_rate": 3.5193103448275865e-06,
"loss": 0.0309,
"step": 9900
},
{
"epoch": 2.4943453128926865,
"grad_norm": 1.205333948135376,
"learning_rate": 3.502068965517242e-06,
"loss": 0.0322,
"step": 9925
},
{
"epoch": 2.500628298567479,
"grad_norm": 1.051314115524292,
"learning_rate": 3.484827586206897e-06,
"loss": 0.0347,
"step": 9950
},
{
"epoch": 2.506911284242272,
"grad_norm": 1.1379516124725342,
"learning_rate": 3.467586206896552e-06,
"loss": 0.0297,
"step": 9975
},
{
"epoch": 2.5131942699170646,
"grad_norm": 1.4470369815826416,
"learning_rate": 3.4503448275862073e-06,
"loss": 0.0344,
"step": 10000
},
{
"epoch": 2.5131942699170646,
"eval_loss": 0.13530105352401733,
"eval_runtime": 28721.0802,
"eval_samples_per_second": 0.68,
"eval_steps_per_second": 0.085,
"eval_wer": 36.4960604331718,
"step": 10000
}
],
"logging_steps": 25,
"max_steps": 15000,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.6324881677254656e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}