fsicoli's picture
End of training
cc13f4f verified
raw
history blame contribute delete
No virus
129 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.2431108463904117,
"eval_steps": 2000,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 29.186527252197266,
"learning_rate": 3.6666666666666664e-09,
"loss": 0.9853,
"step": 25
},
{
"epoch": 0.0,
"grad_norm": 50.51877975463867,
"learning_rate": 7.666666666666666e-09,
"loss": 2.3272,
"step": 50
},
{
"epoch": 0.0,
"grad_norm": 19.928226470947266,
"learning_rate": 1.1833333333333332e-08,
"loss": 0.8806,
"step": 75
},
{
"epoch": 0.0,
"grad_norm": 51.900150299072266,
"learning_rate": 1.6e-08,
"loss": 2.3406,
"step": 100
},
{
"epoch": 0.0,
"grad_norm": 28.492319107055664,
"learning_rate": 2.0166666666666665e-08,
"loss": 0.9825,
"step": 125
},
{
"epoch": 0.0,
"grad_norm": 52.842262268066406,
"learning_rate": 2.433333333333333e-08,
"loss": 2.175,
"step": 150
},
{
"epoch": 0.0,
"grad_norm": 21.88471031188965,
"learning_rate": 2.85e-08,
"loss": 1.0408,
"step": 175
},
{
"epoch": 0.0,
"grad_norm": 46.56641387939453,
"learning_rate": 3.266666666666666e-08,
"loss": 2.2269,
"step": 200
},
{
"epoch": 0.0,
"grad_norm": 19.953123092651367,
"learning_rate": 3.683333333333333e-08,
"loss": 0.981,
"step": 225
},
{
"epoch": 0.0,
"grad_norm": 54.310176849365234,
"learning_rate": 4.1e-08,
"loss": 2.3529,
"step": 250
},
{
"epoch": 0.0,
"grad_norm": 27.19457244873047,
"learning_rate": 4.516666666666667e-08,
"loss": 0.9664,
"step": 275
},
{
"epoch": 0.0,
"grad_norm": 46.24717712402344,
"learning_rate": 4.933333333333333e-08,
"loss": 2.4138,
"step": 300
},
{
"epoch": 0.0,
"grad_norm": 33.52537536621094,
"learning_rate": 5.3499999999999996e-08,
"loss": 0.8663,
"step": 325
},
{
"epoch": 0.0,
"grad_norm": 52.76784896850586,
"learning_rate": 5.766666666666666e-08,
"loss": 2.2765,
"step": 350
},
{
"epoch": 0.0,
"grad_norm": 22.990055084228516,
"learning_rate": 6.183333333333332e-08,
"loss": 0.9368,
"step": 375
},
{
"epoch": 0.0,
"grad_norm": 45.44700241088867,
"learning_rate": 6.6e-08,
"loss": 2.2677,
"step": 400
},
{
"epoch": 0.01,
"grad_norm": 17.737760543823242,
"learning_rate": 7.016666666666667e-08,
"loss": 0.8554,
"step": 425
},
{
"epoch": 0.01,
"grad_norm": 42.23236846923828,
"learning_rate": 7.433333333333334e-08,
"loss": 2.082,
"step": 450
},
{
"epoch": 0.01,
"grad_norm": 23.066301345825195,
"learning_rate": 7.85e-08,
"loss": 0.9648,
"step": 475
},
{
"epoch": 0.01,
"grad_norm": 40.47060775756836,
"learning_rate": 8.266666666666667e-08,
"loss": 2.0705,
"step": 500
},
{
"epoch": 0.01,
"grad_norm": 22.501338958740234,
"learning_rate": 8.683333333333332e-08,
"loss": 0.8634,
"step": 525
},
{
"epoch": 0.01,
"grad_norm": 47.01912307739258,
"learning_rate": 9.1e-08,
"loss": 1.9144,
"step": 550
},
{
"epoch": 0.01,
"grad_norm": 20.637834548950195,
"learning_rate": 9.516666666666667e-08,
"loss": 0.8638,
"step": 575
},
{
"epoch": 0.01,
"grad_norm": 42.16878890991211,
"learning_rate": 9.933333333333332e-08,
"loss": 1.9055,
"step": 600
},
{
"epoch": 0.01,
"grad_norm": 22.79128646850586,
"learning_rate": 1.035e-07,
"loss": 0.8053,
"step": 625
},
{
"epoch": 0.01,
"grad_norm": 42.09727096557617,
"learning_rate": 1.0766666666666665e-07,
"loss": 1.7969,
"step": 650
},
{
"epoch": 0.01,
"grad_norm": 25.856449127197266,
"learning_rate": 1.1183333333333334e-07,
"loss": 0.7391,
"step": 675
},
{
"epoch": 0.01,
"grad_norm": 44.90492248535156,
"learning_rate": 1.16e-07,
"loss": 1.6681,
"step": 700
},
{
"epoch": 0.01,
"grad_norm": 19.00392723083496,
"learning_rate": 1.2016666666666668e-07,
"loss": 0.6716,
"step": 725
},
{
"epoch": 0.01,
"grad_norm": 38.95526123046875,
"learning_rate": 1.2433333333333332e-07,
"loss": 1.358,
"step": 750
},
{
"epoch": 0.01,
"grad_norm": 23.28983497619629,
"learning_rate": 1.285e-07,
"loss": 0.5689,
"step": 775
},
{
"epoch": 0.01,
"grad_norm": 43.42043685913086,
"learning_rate": 1.3266666666666664e-07,
"loss": 1.2778,
"step": 800
},
{
"epoch": 0.01,
"grad_norm": 19.808530807495117,
"learning_rate": 1.3683333333333333e-07,
"loss": 0.5527,
"step": 825
},
{
"epoch": 0.01,
"grad_norm": 35.28702926635742,
"learning_rate": 1.4099999999999998e-07,
"loss": 1.184,
"step": 850
},
{
"epoch": 0.01,
"grad_norm": 19.137269973754883,
"learning_rate": 1.4516666666666665e-07,
"loss": 0.5565,
"step": 875
},
{
"epoch": 0.01,
"grad_norm": 41.621971130371094,
"learning_rate": 1.4933333333333335e-07,
"loss": 1.147,
"step": 900
},
{
"epoch": 0.01,
"grad_norm": 18.03120994567871,
"learning_rate": 1.535e-07,
"loss": 0.5302,
"step": 925
},
{
"epoch": 0.01,
"grad_norm": 39.525482177734375,
"learning_rate": 1.5766666666666666e-07,
"loss": 1.1378,
"step": 950
},
{
"epoch": 0.01,
"grad_norm": 25.292404174804688,
"learning_rate": 1.6183333333333333e-07,
"loss": 0.4831,
"step": 975
},
{
"epoch": 0.01,
"grad_norm": 35.80854797363281,
"learning_rate": 1.66e-07,
"loss": 1.0397,
"step": 1000
},
{
"epoch": 0.01,
"grad_norm": 18.648828506469727,
"learning_rate": 1.7016666666666665e-07,
"loss": 0.4555,
"step": 1025
},
{
"epoch": 0.01,
"grad_norm": 39.72472381591797,
"learning_rate": 1.7433333333333332e-07,
"loss": 1.1202,
"step": 1050
},
{
"epoch": 0.01,
"grad_norm": 16.013608932495117,
"learning_rate": 1.785e-07,
"loss": 0.4914,
"step": 1075
},
{
"epoch": 0.01,
"grad_norm": 36.78615188598633,
"learning_rate": 1.8266666666666666e-07,
"loss": 1.0005,
"step": 1100
},
{
"epoch": 0.01,
"grad_norm": 17.770790100097656,
"learning_rate": 1.868333333333333e-07,
"loss": 0.4111,
"step": 1125
},
{
"epoch": 0.01,
"grad_norm": 31.849889755249023,
"learning_rate": 1.91e-07,
"loss": 0.927,
"step": 1150
},
{
"epoch": 0.01,
"grad_norm": 18.490398406982422,
"learning_rate": 1.9516666666666665e-07,
"loss": 0.4016,
"step": 1175
},
{
"epoch": 0.01,
"grad_norm": 31.49574851989746,
"learning_rate": 1.9933333333333332e-07,
"loss": 0.9365,
"step": 1200
},
{
"epoch": 0.01,
"grad_norm": 17.154340744018555,
"learning_rate": 2.035e-07,
"loss": 0.4247,
"step": 1225
},
{
"epoch": 0.02,
"grad_norm": 25.48064422607422,
"learning_rate": 2.0766666666666666e-07,
"loss": 0.9058,
"step": 1250
},
{
"epoch": 0.02,
"grad_norm": 19.94418716430664,
"learning_rate": 2.1183333333333333e-07,
"loss": 0.449,
"step": 1275
},
{
"epoch": 0.02,
"grad_norm": 29.873273849487305,
"learning_rate": 2.1599999999999998e-07,
"loss": 0.8359,
"step": 1300
},
{
"epoch": 0.02,
"grad_norm": 15.53857135772705,
"learning_rate": 2.2016666666666667e-07,
"loss": 0.4133,
"step": 1325
},
{
"epoch": 0.02,
"grad_norm": 28.56229019165039,
"learning_rate": 2.2433333333333332e-07,
"loss": 0.8403,
"step": 1350
},
{
"epoch": 0.02,
"grad_norm": 8.155872344970703,
"learning_rate": 2.285e-07,
"loss": 0.3881,
"step": 1375
},
{
"epoch": 0.02,
"grad_norm": 27.127405166625977,
"learning_rate": 2.3266666666666666e-07,
"loss": 0.822,
"step": 1400
},
{
"epoch": 0.02,
"grad_norm": 11.950821876525879,
"learning_rate": 2.3683333333333333e-07,
"loss": 0.4201,
"step": 1425
},
{
"epoch": 0.02,
"grad_norm": 31.142114639282227,
"learning_rate": 2.41e-07,
"loss": 0.8028,
"step": 1450
},
{
"epoch": 0.02,
"grad_norm": 15.139409065246582,
"learning_rate": 2.4516666666666665e-07,
"loss": 0.3845,
"step": 1475
},
{
"epoch": 0.02,
"grad_norm": 27.636058807373047,
"learning_rate": 2.493333333333333e-07,
"loss": 0.8359,
"step": 1500
},
{
"epoch": 0.02,
"grad_norm": 25.11219024658203,
"learning_rate": 2.535e-07,
"loss": 0.4223,
"step": 1525
},
{
"epoch": 0.02,
"grad_norm": 31.15846061706543,
"learning_rate": 2.5766666666666663e-07,
"loss": 0.875,
"step": 1550
},
{
"epoch": 0.02,
"grad_norm": 19.472412109375,
"learning_rate": 2.618333333333333e-07,
"loss": 0.3604,
"step": 1575
},
{
"epoch": 0.02,
"grad_norm": 22.84535026550293,
"learning_rate": 2.66e-07,
"loss": 0.7952,
"step": 1600
},
{
"epoch": 0.02,
"grad_norm": 16.574451446533203,
"learning_rate": 2.7016666666666667e-07,
"loss": 0.3795,
"step": 1625
},
{
"epoch": 0.02,
"grad_norm": 39.41230392456055,
"learning_rate": 2.743333333333333e-07,
"loss": 0.8251,
"step": 1650
},
{
"epoch": 0.02,
"grad_norm": 13.901805877685547,
"learning_rate": 2.785e-07,
"loss": 0.39,
"step": 1675
},
{
"epoch": 0.02,
"grad_norm": 24.57431983947754,
"learning_rate": 2.8266666666666666e-07,
"loss": 0.8535,
"step": 1700
},
{
"epoch": 0.02,
"grad_norm": 15.14571475982666,
"learning_rate": 2.868333333333333e-07,
"loss": 0.3597,
"step": 1725
},
{
"epoch": 0.02,
"grad_norm": 23.563344955444336,
"learning_rate": 2.9099999999999995e-07,
"loss": 0.7706,
"step": 1750
},
{
"epoch": 0.02,
"grad_norm": 17.370380401611328,
"learning_rate": 2.951666666666667e-07,
"loss": 0.3564,
"step": 1775
},
{
"epoch": 0.02,
"grad_norm": 32.44514083862305,
"learning_rate": 2.9933333333333334e-07,
"loss": 0.7507,
"step": 1800
},
{
"epoch": 0.02,
"grad_norm": 12.74821949005127,
"learning_rate": 3.035e-07,
"loss": 0.3674,
"step": 1825
},
{
"epoch": 0.02,
"grad_norm": 22.746973037719727,
"learning_rate": 3.0766666666666663e-07,
"loss": 0.7485,
"step": 1850
},
{
"epoch": 0.02,
"grad_norm": 16.455074310302734,
"learning_rate": 3.118333333333333e-07,
"loss": 0.3533,
"step": 1875
},
{
"epoch": 0.02,
"grad_norm": 32.016258239746094,
"learning_rate": 3.1599999999999997e-07,
"loss": 0.7237,
"step": 1900
},
{
"epoch": 0.02,
"grad_norm": 22.601268768310547,
"learning_rate": 3.201666666666666e-07,
"loss": 0.4137,
"step": 1925
},
{
"epoch": 0.02,
"grad_norm": 21.461885452270508,
"learning_rate": 3.243333333333333e-07,
"loss": 0.6908,
"step": 1950
},
{
"epoch": 0.02,
"grad_norm": 16.42440414428711,
"learning_rate": 3.285e-07,
"loss": 0.3973,
"step": 1975
},
{
"epoch": 0.02,
"grad_norm": 34.0020637512207,
"learning_rate": 3.3266666666666665e-07,
"loss": 0.748,
"step": 2000
},
{
"epoch": 0.02,
"eval_loss": 0.3752075731754303,
"eval_runtime": 5729.5086,
"eval_samples_per_second": 1.643,
"eval_steps_per_second": 0.205,
"eval_wer": 0.21564669788470855,
"step": 2000
},
{
"epoch": 0.02,
"grad_norm": 15.8253755569458,
"learning_rate": 3.368333333333333e-07,
"loss": 0.328,
"step": 2025
},
{
"epoch": 0.02,
"grad_norm": 27.537580490112305,
"learning_rate": 3.41e-07,
"loss": 0.7222,
"step": 2050
},
{
"epoch": 0.03,
"grad_norm": 20.178504943847656,
"learning_rate": 3.4516666666666664e-07,
"loss": 0.4064,
"step": 2075
},
{
"epoch": 0.03,
"grad_norm": 27.599384307861328,
"learning_rate": 3.4933333333333334e-07,
"loss": 0.8175,
"step": 2100
},
{
"epoch": 0.03,
"grad_norm": 14.29587459564209,
"learning_rate": 3.535e-07,
"loss": 0.3303,
"step": 2125
},
{
"epoch": 0.03,
"grad_norm": 28.619686126708984,
"learning_rate": 3.576666666666667e-07,
"loss": 0.632,
"step": 2150
},
{
"epoch": 0.03,
"grad_norm": 20.577468872070312,
"learning_rate": 3.618333333333333e-07,
"loss": 0.3908,
"step": 2175
},
{
"epoch": 0.03,
"grad_norm": 24.521394729614258,
"learning_rate": 3.6599999999999997e-07,
"loss": 0.7307,
"step": 2200
},
{
"epoch": 0.03,
"grad_norm": 18.374794006347656,
"learning_rate": 3.701666666666666e-07,
"loss": 0.4117,
"step": 2225
},
{
"epoch": 0.03,
"grad_norm": 32.217586517333984,
"learning_rate": 3.743333333333333e-07,
"loss": 0.7281,
"step": 2250
},
{
"epoch": 0.03,
"grad_norm": 14.12385368347168,
"learning_rate": 3.785e-07,
"loss": 0.3208,
"step": 2275
},
{
"epoch": 0.03,
"grad_norm": 27.859783172607422,
"learning_rate": 3.8266666666666665e-07,
"loss": 0.7194,
"step": 2300
},
{
"epoch": 0.03,
"grad_norm": 16.94115447998047,
"learning_rate": 3.868333333333333e-07,
"loss": 0.3764,
"step": 2325
},
{
"epoch": 0.03,
"grad_norm": 28.36104393005371,
"learning_rate": 3.91e-07,
"loss": 0.7183,
"step": 2350
},
{
"epoch": 0.03,
"grad_norm": 10.514703750610352,
"learning_rate": 3.9516666666666664e-07,
"loss": 0.3764,
"step": 2375
},
{
"epoch": 0.03,
"grad_norm": 33.07015609741211,
"learning_rate": 3.993333333333333e-07,
"loss": 0.7127,
"step": 2400
},
{
"epoch": 0.03,
"grad_norm": 25.2418212890625,
"learning_rate": 4.0350000000000003e-07,
"loss": 0.3723,
"step": 2425
},
{
"epoch": 0.03,
"grad_norm": 25.15533447265625,
"learning_rate": 4.076666666666667e-07,
"loss": 0.7695,
"step": 2450
},
{
"epoch": 0.03,
"grad_norm": 12.2613525390625,
"learning_rate": 4.118333333333333e-07,
"loss": 0.3878,
"step": 2475
},
{
"epoch": 0.03,
"grad_norm": 27.38979148864746,
"learning_rate": 4.1599999999999997e-07,
"loss": 0.7286,
"step": 2500
},
{
"epoch": 0.03,
"grad_norm": 12.769454956054688,
"learning_rate": 4.2016666666666666e-07,
"loss": 0.3283,
"step": 2525
},
{
"epoch": 0.03,
"grad_norm": 25.558361053466797,
"learning_rate": 4.243333333333333e-07,
"loss": 0.655,
"step": 2550
},
{
"epoch": 0.03,
"grad_norm": 19.812528610229492,
"learning_rate": 4.2849999999999995e-07,
"loss": 0.3285,
"step": 2575
},
{
"epoch": 0.03,
"grad_norm": 23.480504989624023,
"learning_rate": 4.3266666666666665e-07,
"loss": 0.7011,
"step": 2600
},
{
"epoch": 0.03,
"grad_norm": 27.310148239135742,
"learning_rate": 4.3683333333333335e-07,
"loss": 0.3668,
"step": 2625
},
{
"epoch": 0.03,
"grad_norm": 21.11886978149414,
"learning_rate": 4.41e-07,
"loss": 0.7359,
"step": 2650
},
{
"epoch": 0.03,
"grad_norm": 13.02730655670166,
"learning_rate": 4.4516666666666664e-07,
"loss": 0.3218,
"step": 2675
},
{
"epoch": 0.03,
"grad_norm": 23.938663482666016,
"learning_rate": 4.493333333333333e-07,
"loss": 0.7084,
"step": 2700
},
{
"epoch": 0.03,
"grad_norm": 13.4992094039917,
"learning_rate": 4.535e-07,
"loss": 0.3586,
"step": 2725
},
{
"epoch": 0.03,
"grad_norm": 24.50682258605957,
"learning_rate": 4.576666666666666e-07,
"loss": 0.6918,
"step": 2750
},
{
"epoch": 0.03,
"grad_norm": 15.688295364379883,
"learning_rate": 4.618333333333333e-07,
"loss": 0.3183,
"step": 2775
},
{
"epoch": 0.03,
"grad_norm": 27.072919845581055,
"learning_rate": 4.66e-07,
"loss": 0.8227,
"step": 2800
},
{
"epoch": 0.03,
"grad_norm": 16.015159606933594,
"learning_rate": 4.7016666666666666e-07,
"loss": 0.3831,
"step": 2825
},
{
"epoch": 0.03,
"grad_norm": 31.845600128173828,
"learning_rate": 4.743333333333333e-07,
"loss": 0.6515,
"step": 2850
},
{
"epoch": 0.03,
"grad_norm": 12.855831146240234,
"learning_rate": 4.785e-07,
"loss": 0.345,
"step": 2875
},
{
"epoch": 0.04,
"grad_norm": 30.276899337768555,
"learning_rate": 4.826666666666666e-07,
"loss": 0.7189,
"step": 2900
},
{
"epoch": 0.04,
"grad_norm": 13.716350555419922,
"learning_rate": 4.868333333333333e-07,
"loss": 0.3857,
"step": 2925
},
{
"epoch": 0.04,
"grad_norm": 24.70086097717285,
"learning_rate": 4.909999999999999e-07,
"loss": 0.6669,
"step": 2950
},
{
"epoch": 0.04,
"grad_norm": 13.5408353805542,
"learning_rate": 4.951666666666666e-07,
"loss": 0.3805,
"step": 2975
},
{
"epoch": 0.04,
"grad_norm": 27.32679557800293,
"learning_rate": 4.993333333333333e-07,
"loss": 0.6949,
"step": 3000
},
{
"epoch": 0.04,
"grad_norm": 14.190648078918457,
"learning_rate": 5.034999999999999e-07,
"loss": 0.3463,
"step": 3025
},
{
"epoch": 0.04,
"grad_norm": 40.79596710205078,
"learning_rate": 5.076666666666667e-07,
"loss": 0.7188,
"step": 3050
},
{
"epoch": 0.04,
"grad_norm": 16.573617935180664,
"learning_rate": 5.118333333333333e-07,
"loss": 0.2875,
"step": 3075
},
{
"epoch": 0.04,
"grad_norm": 24.329591751098633,
"learning_rate": 5.16e-07,
"loss": 0.5879,
"step": 3100
},
{
"epoch": 0.04,
"grad_norm": 14.05192756652832,
"learning_rate": 5.201666666666666e-07,
"loss": 0.3572,
"step": 3125
},
{
"epoch": 0.04,
"grad_norm": 22.151996612548828,
"learning_rate": 5.243333333333333e-07,
"loss": 0.6409,
"step": 3150
},
{
"epoch": 0.04,
"grad_norm": 14.75150203704834,
"learning_rate": 5.284999999999999e-07,
"loss": 0.3657,
"step": 3175
},
{
"epoch": 0.04,
"grad_norm": 31.801061630249023,
"learning_rate": 5.326666666666666e-07,
"loss": 0.7341,
"step": 3200
},
{
"epoch": 0.04,
"grad_norm": 10.417610168457031,
"learning_rate": 5.368333333333334e-07,
"loss": 0.3016,
"step": 3225
},
{
"epoch": 0.04,
"grad_norm": 26.536266326904297,
"learning_rate": 5.41e-07,
"loss": 0.7012,
"step": 3250
},
{
"epoch": 0.04,
"grad_norm": 16.091211318969727,
"learning_rate": 5.451666666666667e-07,
"loss": 0.3128,
"step": 3275
},
{
"epoch": 0.04,
"grad_norm": 33.78011703491211,
"learning_rate": 5.493333333333333e-07,
"loss": 0.6206,
"step": 3300
},
{
"epoch": 0.04,
"grad_norm": 15.934347152709961,
"learning_rate": 5.535e-07,
"loss": 0.3373,
"step": 3325
},
{
"epoch": 0.04,
"grad_norm": 25.661874771118164,
"learning_rate": 5.576666666666667e-07,
"loss": 0.6734,
"step": 3350
},
{
"epoch": 0.04,
"grad_norm": 11.325174331665039,
"learning_rate": 5.618333333333333e-07,
"loss": 0.3077,
"step": 3375
},
{
"epoch": 0.04,
"grad_norm": 21.682788848876953,
"learning_rate": 5.66e-07,
"loss": 0.6616,
"step": 3400
},
{
"epoch": 0.04,
"grad_norm": 16.362476348876953,
"learning_rate": 5.701666666666667e-07,
"loss": 0.3484,
"step": 3425
},
{
"epoch": 0.04,
"grad_norm": 30.024934768676758,
"learning_rate": 5.743333333333334e-07,
"loss": 0.6997,
"step": 3450
},
{
"epoch": 0.04,
"grad_norm": 13.67060661315918,
"learning_rate": 5.784999999999999e-07,
"loss": 0.2889,
"step": 3475
},
{
"epoch": 0.04,
"grad_norm": Infinity,
"learning_rate": 5.825e-07,
"loss": 0.6621,
"step": 3500
},
{
"epoch": 0.04,
"grad_norm": 22.19521141052246,
"learning_rate": 5.866666666666666e-07,
"loss": 0.2992,
"step": 3525
},
{
"epoch": 0.04,
"grad_norm": 23.638166427612305,
"learning_rate": 5.908333333333333e-07,
"loss": 0.633,
"step": 3550
},
{
"epoch": 0.04,
"grad_norm": 13.818583488464355,
"learning_rate": 5.949999999999999e-07,
"loss": 0.3736,
"step": 3575
},
{
"epoch": 0.04,
"grad_norm": 33.79849624633789,
"learning_rate": 5.991666666666666e-07,
"loss": 0.665,
"step": 3600
},
{
"epoch": 0.04,
"grad_norm": 13.363014221191406,
"learning_rate": 6.033333333333333e-07,
"loss": 0.2737,
"step": 3625
},
{
"epoch": 0.04,
"grad_norm": 31.426158905029297,
"learning_rate": 6.075e-07,
"loss": 0.7285,
"step": 3650
},
{
"epoch": 0.04,
"grad_norm": 19.794960021972656,
"learning_rate": 6.116666666666667e-07,
"loss": 0.3175,
"step": 3675
},
{
"epoch": 0.04,
"grad_norm": 26.806880950927734,
"learning_rate": 6.158333333333333e-07,
"loss": 0.6022,
"step": 3700
},
{
"epoch": 0.05,
"grad_norm": 23.275510787963867,
"learning_rate": 6.2e-07,
"loss": 0.3038,
"step": 3725
},
{
"epoch": 0.05,
"grad_norm": 22.10231590270996,
"learning_rate": 6.241666666666666e-07,
"loss": 0.6785,
"step": 3750
},
{
"epoch": 0.05,
"grad_norm": 14.206493377685547,
"learning_rate": 6.283333333333333e-07,
"loss": 0.3128,
"step": 3775
},
{
"epoch": 0.05,
"grad_norm": 19.696733474731445,
"learning_rate": 6.324999999999999e-07,
"loss": 0.6817,
"step": 3800
},
{
"epoch": 0.05,
"grad_norm": 16.63739013671875,
"learning_rate": 6.366666666666667e-07,
"loss": 0.3067,
"step": 3825
},
{
"epoch": 0.05,
"grad_norm": 26.20191192626953,
"learning_rate": 6.408333333333334e-07,
"loss": 0.6781,
"step": 3850
},
{
"epoch": 0.05,
"grad_norm": 21.292455673217773,
"learning_rate": 6.45e-07,
"loss": 0.2981,
"step": 3875
},
{
"epoch": 0.05,
"grad_norm": 26.61952781677246,
"learning_rate": 6.491666666666667e-07,
"loss": 0.6331,
"step": 3900
},
{
"epoch": 0.05,
"grad_norm": 15.374119758605957,
"learning_rate": 6.533333333333333e-07,
"loss": 0.3302,
"step": 3925
},
{
"epoch": 0.05,
"grad_norm": 26.00304412841797,
"learning_rate": 6.575e-07,
"loss": 0.6184,
"step": 3950
},
{
"epoch": 0.05,
"grad_norm": 20.321468353271484,
"learning_rate": 6.616666666666665e-07,
"loss": 0.321,
"step": 3975
},
{
"epoch": 0.05,
"grad_norm": 22.67734146118164,
"learning_rate": 6.658333333333333e-07,
"loss": 0.6051,
"step": 4000
},
{
"epoch": 0.05,
"eval_loss": 0.4198240339756012,
"eval_runtime": 5680.6895,
"eval_samples_per_second": 1.657,
"eval_steps_per_second": 0.207,
"eval_wer": 0.1885031487162926,
"step": 4000
},
{
"epoch": 0.05,
"grad_norm": 20.116188049316406,
"learning_rate": 6.7e-07,
"loss": 0.3214,
"step": 4025
},
{
"epoch": 0.05,
"grad_norm": 21.24591827392578,
"learning_rate": 6.741666666666666e-07,
"loss": 0.6371,
"step": 4050
},
{
"epoch": 0.05,
"grad_norm": 16.673837661743164,
"learning_rate": 6.783333333333333e-07,
"loss": 0.3389,
"step": 4075
},
{
"epoch": 0.05,
"grad_norm": 27.16354751586914,
"learning_rate": 6.824999999999999e-07,
"loss": 0.6304,
"step": 4100
},
{
"epoch": 0.05,
"grad_norm": 18.105676651000977,
"learning_rate": 6.866666666666666e-07,
"loss": 0.3528,
"step": 4125
},
{
"epoch": 0.05,
"grad_norm": 27.059154510498047,
"learning_rate": 6.908333333333333e-07,
"loss": 0.6902,
"step": 4150
},
{
"epoch": 0.05,
"grad_norm": 22.998558044433594,
"learning_rate": 6.949999999999999e-07,
"loss": 0.3802,
"step": 4175
},
{
"epoch": 0.05,
"grad_norm": 27.54330062866211,
"learning_rate": 6.991666666666667e-07,
"loss": 0.6378,
"step": 4200
},
{
"epoch": 0.05,
"grad_norm": 16.366973876953125,
"learning_rate": 7.033333333333333e-07,
"loss": 0.339,
"step": 4225
},
{
"epoch": 0.05,
"grad_norm": 25.825971603393555,
"learning_rate": 7.075e-07,
"loss": 0.5944,
"step": 4250
},
{
"epoch": 0.05,
"grad_norm": 26.39034652709961,
"learning_rate": 7.116666666666666e-07,
"loss": 0.3246,
"step": 4275
},
{
"epoch": 0.05,
"grad_norm": 24.95716094970703,
"learning_rate": 7.158333333333333e-07,
"loss": 0.5966,
"step": 4300
},
{
"epoch": 0.05,
"grad_norm": 13.741753578186035,
"learning_rate": 7.2e-07,
"loss": 0.3068,
"step": 4325
},
{
"epoch": 0.05,
"grad_norm": 24.966617584228516,
"learning_rate": 7.241666666666666e-07,
"loss": 0.6098,
"step": 4350
},
{
"epoch": 0.05,
"grad_norm": 14.291642189025879,
"learning_rate": 7.283333333333334e-07,
"loss": 0.3096,
"step": 4375
},
{
"epoch": 0.05,
"grad_norm": 22.74665641784668,
"learning_rate": 7.325e-07,
"loss": 0.7213,
"step": 4400
},
{
"epoch": 0.05,
"grad_norm": 13.496010780334473,
"learning_rate": 7.366666666666667e-07,
"loss": 0.2867,
"step": 4425
},
{
"epoch": 0.05,
"grad_norm": 22.160560607910156,
"learning_rate": 7.408333333333333e-07,
"loss": 0.6883,
"step": 4450
},
{
"epoch": 0.05,
"grad_norm": 15.879670143127441,
"learning_rate": 7.45e-07,
"loss": 0.3274,
"step": 4475
},
{
"epoch": 0.05,
"grad_norm": 25.978158950805664,
"learning_rate": 7.491666666666667e-07,
"loss": 0.663,
"step": 4500
},
{
"epoch": 0.06,
"grad_norm": 12.220645904541016,
"learning_rate": 7.533333333333332e-07,
"loss": 0.3133,
"step": 4525
},
{
"epoch": 0.06,
"grad_norm": 22.561466217041016,
"learning_rate": 7.575e-07,
"loss": 0.6417,
"step": 4550
},
{
"epoch": 0.06,
"grad_norm": 13.648468017578125,
"learning_rate": 7.616666666666666e-07,
"loss": 0.2709,
"step": 4575
},
{
"epoch": 0.06,
"grad_norm": 24.237659454345703,
"learning_rate": 7.658333333333333e-07,
"loss": 0.609,
"step": 4600
},
{
"epoch": 0.06,
"grad_norm": 12.015928268432617,
"learning_rate": 7.699999999999999e-07,
"loss": 0.3517,
"step": 4625
},
{
"epoch": 0.06,
"grad_norm": 10.939224243164062,
"learning_rate": 7.741666666666666e-07,
"loss": 0.5428,
"step": 4650
},
{
"epoch": 0.06,
"grad_norm": 21.860904693603516,
"learning_rate": 7.783333333333333e-07,
"loss": 0.3269,
"step": 4675
},
{
"epoch": 0.06,
"grad_norm": 34.1320915222168,
"learning_rate": 7.824999999999999e-07,
"loss": 0.6189,
"step": 4700
},
{
"epoch": 0.06,
"grad_norm": 16.4717960357666,
"learning_rate": 7.866666666666666e-07,
"loss": 0.3147,
"step": 4725
},
{
"epoch": 0.06,
"grad_norm": 24.782644271850586,
"learning_rate": 7.908333333333333e-07,
"loss": 0.5405,
"step": 4750
},
{
"epoch": 0.06,
"grad_norm": 23.22640037536621,
"learning_rate": 7.95e-07,
"loss": 0.3325,
"step": 4775
},
{
"epoch": 0.06,
"grad_norm": 25.234392166137695,
"learning_rate": 7.991666666666666e-07,
"loss": 0.6092,
"step": 4800
},
{
"epoch": 0.06,
"grad_norm": 17.815200805664062,
"learning_rate": 8.033333333333333e-07,
"loss": 0.3421,
"step": 4825
},
{
"epoch": 0.06,
"grad_norm": 31.845441818237305,
"learning_rate": 8.075e-07,
"loss": 0.6604,
"step": 4850
},
{
"epoch": 0.06,
"grad_norm": 18.663719177246094,
"learning_rate": 8.116666666666666e-07,
"loss": 0.3373,
"step": 4875
},
{
"epoch": 0.06,
"grad_norm": 18.988887786865234,
"learning_rate": 8.158333333333333e-07,
"loss": 0.6191,
"step": 4900
},
{
"epoch": 0.06,
"grad_norm": 14.552395820617676,
"learning_rate": 8.199999999999999e-07,
"loss": 0.3123,
"step": 4925
},
{
"epoch": 0.06,
"grad_norm": 24.135231018066406,
"learning_rate": 8.241666666666667e-07,
"loss": 0.5547,
"step": 4950
},
{
"epoch": 0.06,
"grad_norm": 13.5560302734375,
"learning_rate": 8.283333333333334e-07,
"loss": 0.2659,
"step": 4975
},
{
"epoch": 0.06,
"grad_norm": 23.879039764404297,
"learning_rate": 8.325e-07,
"loss": 0.5899,
"step": 5000
},
{
"epoch": 0.06,
"grad_norm": 10.661543846130371,
"learning_rate": 8.366666666666667e-07,
"loss": 0.2815,
"step": 5025
},
{
"epoch": 0.06,
"grad_norm": 23.548585891723633,
"learning_rate": 8.408333333333333e-07,
"loss": 0.5315,
"step": 5050
},
{
"epoch": 0.06,
"grad_norm": 18.99573516845703,
"learning_rate": 8.45e-07,
"loss": 0.3102,
"step": 5075
},
{
"epoch": 0.06,
"grad_norm": 25.472843170166016,
"learning_rate": 8.491666666666665e-07,
"loss": 0.5856,
"step": 5100
},
{
"epoch": 0.06,
"grad_norm": 10.183819770812988,
"learning_rate": 8.533333333333334e-07,
"loss": 0.2289,
"step": 5125
},
{
"epoch": 0.06,
"grad_norm": 21.010845184326172,
"learning_rate": 8.575e-07,
"loss": 0.5432,
"step": 5150
},
{
"epoch": 0.06,
"grad_norm": 10.91658878326416,
"learning_rate": 8.616666666666666e-07,
"loss": 0.2439,
"step": 5175
},
{
"epoch": 0.06,
"grad_norm": 27.883943557739258,
"learning_rate": 8.658333333333333e-07,
"loss": 0.5995,
"step": 5200
},
{
"epoch": 0.06,
"grad_norm": 13.827203750610352,
"learning_rate": 8.699999999999999e-07,
"loss": 0.3348,
"step": 5225
},
{
"epoch": 0.06,
"grad_norm": 25.215486526489258,
"learning_rate": 8.741666666666666e-07,
"loss": 0.559,
"step": 5250
},
{
"epoch": 0.06,
"grad_norm": 19.4741153717041,
"learning_rate": 8.783333333333332e-07,
"loss": 0.3154,
"step": 5275
},
{
"epoch": 0.06,
"grad_norm": 21.052459716796875,
"learning_rate": 8.824999999999999e-07,
"loss": 0.581,
"step": 5300
},
{
"epoch": 0.06,
"grad_norm": 14.411347389221191,
"learning_rate": 8.866666666666667e-07,
"loss": 0.2701,
"step": 5325
},
{
"epoch": 0.07,
"grad_norm": 15.227518081665039,
"learning_rate": 8.908333333333333e-07,
"loss": 0.6402,
"step": 5350
},
{
"epoch": 0.07,
"grad_norm": 17.570085525512695,
"learning_rate": 8.95e-07,
"loss": 0.3374,
"step": 5375
},
{
"epoch": 0.07,
"grad_norm": 16.47588348388672,
"learning_rate": 8.991666666666666e-07,
"loss": 0.5876,
"step": 5400
},
{
"epoch": 0.07,
"grad_norm": 15.246146202087402,
"learning_rate": 9.033333333333333e-07,
"loss": 0.2909,
"step": 5425
},
{
"epoch": 0.07,
"grad_norm": 22.148481369018555,
"learning_rate": 9.074999999999999e-07,
"loss": 0.5704,
"step": 5450
},
{
"epoch": 0.07,
"grad_norm": 11.771788597106934,
"learning_rate": 9.116666666666666e-07,
"loss": 0.284,
"step": 5475
},
{
"epoch": 0.07,
"grad_norm": 34.46316146850586,
"learning_rate": 9.158333333333334e-07,
"loss": 0.6357,
"step": 5500
},
{
"epoch": 0.07,
"grad_norm": 17.2254695892334,
"learning_rate": 9.2e-07,
"loss": 0.3433,
"step": 5525
},
{
"epoch": 0.07,
"grad_norm": 23.89180564880371,
"learning_rate": 9.241666666666667e-07,
"loss": 0.5568,
"step": 5550
},
{
"epoch": 0.07,
"grad_norm": 16.978792190551758,
"learning_rate": 9.283333333333333e-07,
"loss": 0.3001,
"step": 5575
},
{
"epoch": 0.07,
"grad_norm": 25.58534049987793,
"learning_rate": 9.325e-07,
"loss": 0.5594,
"step": 5600
},
{
"epoch": 0.07,
"grad_norm": 6.470766067504883,
"learning_rate": 9.366666666666666e-07,
"loss": 0.3021,
"step": 5625
},
{
"epoch": 0.07,
"grad_norm": 15.57498836517334,
"learning_rate": 9.408333333333333e-07,
"loss": 0.548,
"step": 5650
},
{
"epoch": 0.07,
"grad_norm": 8.752182960510254,
"learning_rate": 9.45e-07,
"loss": 0.3141,
"step": 5675
},
{
"epoch": 0.07,
"grad_norm": Infinity,
"learning_rate": 9.489999999999999e-07,
"loss": 0.6522,
"step": 5700
},
{
"epoch": 0.07,
"grad_norm": 14.224292755126953,
"learning_rate": 9.531666666666666e-07,
"loss": 0.2806,
"step": 5725
},
{
"epoch": 0.07,
"grad_norm": 28.10686683654785,
"learning_rate": 9.573333333333333e-07,
"loss": 0.5524,
"step": 5750
},
{
"epoch": 0.07,
"grad_norm": 14.760727882385254,
"learning_rate": 9.615e-07,
"loss": 0.3168,
"step": 5775
},
{
"epoch": 0.07,
"grad_norm": 21.50145149230957,
"learning_rate": 9.656666666666667e-07,
"loss": 0.5542,
"step": 5800
},
{
"epoch": 0.07,
"grad_norm": 19.867286682128906,
"learning_rate": 9.698333333333332e-07,
"loss": 0.2855,
"step": 5825
},
{
"epoch": 0.07,
"grad_norm": 23.064851760864258,
"learning_rate": 9.74e-07,
"loss": 0.5558,
"step": 5850
},
{
"epoch": 0.07,
"grad_norm": 21.55337905883789,
"learning_rate": 9.781666666666666e-07,
"loss": 0.2855,
"step": 5875
},
{
"epoch": 0.07,
"grad_norm": 28.359773635864258,
"learning_rate": 9.823333333333333e-07,
"loss": 0.5859,
"step": 5900
},
{
"epoch": 0.07,
"grad_norm": 14.382160186767578,
"learning_rate": 9.865e-07,
"loss": 0.3203,
"step": 5925
},
{
"epoch": 0.07,
"grad_norm": 17.879419326782227,
"learning_rate": 9.906666666666667e-07,
"loss": 0.5121,
"step": 5950
},
{
"epoch": 0.07,
"grad_norm": 19.03439712524414,
"learning_rate": 9.948333333333334e-07,
"loss": 0.3244,
"step": 5975
},
{
"epoch": 0.07,
"grad_norm": 20.255910873413086,
"learning_rate": 9.989999999999999e-07,
"loss": 0.5578,
"step": 6000
},
{
"epoch": 0.07,
"eval_loss": 0.424468457698822,
"eval_runtime": 5766.0002,
"eval_samples_per_second": 1.633,
"eval_steps_per_second": 0.204,
"eval_wer": 0.17566607460035524,
"step": 6000
},
{
"epoch": 0.07,
"grad_norm": 10.418182373046875,
"learning_rate": 9.955e-07,
"loss": 0.2279,
"step": 6025
},
{
"epoch": 0.07,
"grad_norm": 32.746212005615234,
"learning_rate": 9.8975e-07,
"loss": 0.617,
"step": 6050
},
{
"epoch": 0.07,
"grad_norm": 14.78303050994873,
"learning_rate": 9.835e-07,
"loss": 0.2966,
"step": 6075
},
{
"epoch": 0.07,
"grad_norm": 24.772193908691406,
"learning_rate": 9.772499999999998e-07,
"loss": 0.5872,
"step": 6100
},
{
"epoch": 0.07,
"grad_norm": 17.655696868896484,
"learning_rate": 9.709999999999999e-07,
"loss": 0.3257,
"step": 6125
},
{
"epoch": 0.07,
"grad_norm": 22.594074249267578,
"learning_rate": 9.6475e-07,
"loss": 0.6294,
"step": 6150
},
{
"epoch": 0.08,
"grad_norm": 15.015108108520508,
"learning_rate": 9.585e-07,
"loss": 0.292,
"step": 6175
},
{
"epoch": 0.08,
"grad_norm": 27.46025276184082,
"learning_rate": 9.5225e-07,
"loss": 0.5023,
"step": 6200
},
{
"epoch": 0.08,
"grad_norm": 15.613073348999023,
"learning_rate": 9.459999999999999e-07,
"loss": 0.2915,
"step": 6225
},
{
"epoch": 0.08,
"grad_norm": 24.478090286254883,
"learning_rate": 9.3975e-07,
"loss": 0.5793,
"step": 6250
},
{
"epoch": 0.08,
"grad_norm": 15.330205917358398,
"learning_rate": 9.334999999999999e-07,
"loss": 0.366,
"step": 6275
},
{
"epoch": 0.08,
"grad_norm": 25.022016525268555,
"learning_rate": 9.2725e-07,
"loss": 0.5548,
"step": 6300
},
{
"epoch": 0.08,
"grad_norm": 13.249412536621094,
"learning_rate": 9.21e-07,
"loss": 0.3341,
"step": 6325
},
{
"epoch": 0.08,
"grad_norm": 25.130050659179688,
"learning_rate": 9.147499999999999e-07,
"loss": 0.5431,
"step": 6350
},
{
"epoch": 0.08,
"grad_norm": 15.13709831237793,
"learning_rate": 9.085e-07,
"loss": 0.2728,
"step": 6375
},
{
"epoch": 0.08,
"grad_norm": 19.128374099731445,
"learning_rate": 9.022499999999999e-07,
"loss": 0.5788,
"step": 6400
},
{
"epoch": 0.08,
"grad_norm": 13.50425910949707,
"learning_rate": 8.96e-07,
"loss": 0.3066,
"step": 6425
},
{
"epoch": 0.08,
"grad_norm": 27.552776336669922,
"learning_rate": 8.8975e-07,
"loss": 0.5665,
"step": 6450
},
{
"epoch": 0.08,
"grad_norm": 16.90278434753418,
"learning_rate": 8.834999999999999e-07,
"loss": 0.3108,
"step": 6475
},
{
"epoch": 0.08,
"grad_norm": 15.420982360839844,
"learning_rate": 8.772499999999999e-07,
"loss": 0.5931,
"step": 6500
},
{
"epoch": 0.08,
"grad_norm": 12.858573913574219,
"learning_rate": 8.71e-07,
"loss": 0.2762,
"step": 6525
},
{
"epoch": 0.08,
"grad_norm": 30.493162155151367,
"learning_rate": 8.6475e-07,
"loss": 0.548,
"step": 6550
},
{
"epoch": 0.08,
"grad_norm": 21.05153465270996,
"learning_rate": 8.585e-07,
"loss": 0.2743,
"step": 6575
},
{
"epoch": 0.08,
"grad_norm": 24.79546546936035,
"learning_rate": 8.522499999999999e-07,
"loss": 0.5789,
"step": 6600
},
{
"epoch": 0.08,
"grad_norm": 5.389392375946045,
"learning_rate": 8.459999999999999e-07,
"loss": 0.2551,
"step": 6625
},
{
"epoch": 0.08,
"grad_norm": 29.287887573242188,
"learning_rate": 8.3975e-07,
"loss": 0.5231,
"step": 6650
},
{
"epoch": 0.08,
"grad_norm": 15.32079792022705,
"learning_rate": 8.334999999999999e-07,
"loss": 0.2667,
"step": 6675
},
{
"epoch": 0.08,
"grad_norm": 21.276037216186523,
"learning_rate": 8.2725e-07,
"loss": 0.539,
"step": 6700
},
{
"epoch": 0.08,
"grad_norm": 10.917491912841797,
"learning_rate": 8.21e-07,
"loss": 0.2951,
"step": 6725
},
{
"epoch": 0.08,
"grad_norm": 23.395267486572266,
"learning_rate": 8.147499999999999e-07,
"loss": 0.5505,
"step": 6750
},
{
"epoch": 0.08,
"grad_norm": 20.620159149169922,
"learning_rate": 8.085e-07,
"loss": 0.3287,
"step": 6775
},
{
"epoch": 0.08,
"grad_norm": 32.70819091796875,
"learning_rate": 8.0225e-07,
"loss": 0.6325,
"step": 6800
},
{
"epoch": 0.08,
"grad_norm": 15.693964004516602,
"learning_rate": 7.96e-07,
"loss": 0.2545,
"step": 6825
},
{
"epoch": 0.08,
"grad_norm": 21.390012741088867,
"learning_rate": 7.897499999999999e-07,
"loss": 0.5113,
"step": 6850
},
{
"epoch": 0.08,
"grad_norm": 16.327972412109375,
"learning_rate": 7.834999999999999e-07,
"loss": 0.2523,
"step": 6875
},
{
"epoch": 0.08,
"grad_norm": 18.395044326782227,
"learning_rate": 7.7725e-07,
"loss": 0.5417,
"step": 6900
},
{
"epoch": 0.08,
"grad_norm": 14.424351692199707,
"learning_rate": 7.71e-07,
"loss": 0.3177,
"step": 6925
},
{
"epoch": 0.08,
"grad_norm": 28.32687759399414,
"learning_rate": 7.6475e-07,
"loss": 0.5442,
"step": 6950
},
{
"epoch": 0.08,
"grad_norm": 19.87085723876953,
"learning_rate": 7.584999999999999e-07,
"loss": 0.2768,
"step": 6975
},
{
"epoch": 0.09,
"grad_norm": 24.301137924194336,
"learning_rate": 7.5225e-07,
"loss": 0.6135,
"step": 7000
},
{
"epoch": 0.09,
"grad_norm": 22.25948715209961,
"learning_rate": 7.459999999999999e-07,
"loss": 0.3079,
"step": 7025
},
{
"epoch": 0.09,
"grad_norm": 27.84773063659668,
"learning_rate": 7.3975e-07,
"loss": 0.6219,
"step": 7050
},
{
"epoch": 0.09,
"grad_norm": 14.436989784240723,
"learning_rate": 7.335e-07,
"loss": 0.3135,
"step": 7075
},
{
"epoch": 0.09,
"grad_norm": 18.467741012573242,
"learning_rate": 7.272499999999999e-07,
"loss": 0.5659,
"step": 7100
},
{
"epoch": 0.09,
"grad_norm": 10.770480155944824,
"learning_rate": 7.21e-07,
"loss": 0.2582,
"step": 7125
},
{
"epoch": 0.09,
"grad_norm": 13.723494529724121,
"learning_rate": 7.147499999999999e-07,
"loss": 0.5401,
"step": 7150
},
{
"epoch": 0.09,
"grad_norm": 16.69199562072754,
"learning_rate": 7.085e-07,
"loss": 0.2815,
"step": 7175
},
{
"epoch": 0.09,
"grad_norm": 17.908796310424805,
"learning_rate": 7.0225e-07,
"loss": 0.6891,
"step": 7200
},
{
"epoch": 0.09,
"grad_norm": 21.230213165283203,
"learning_rate": 6.959999999999999e-07,
"loss": 0.31,
"step": 7225
},
{
"epoch": 0.09,
"grad_norm": 23.39426612854004,
"learning_rate": 6.897499999999999e-07,
"loss": 0.5378,
"step": 7250
},
{
"epoch": 0.09,
"grad_norm": 11.744900703430176,
"learning_rate": 6.835e-07,
"loss": 0.3132,
"step": 7275
},
{
"epoch": 0.09,
"grad_norm": 20.57970428466797,
"learning_rate": 6.7725e-07,
"loss": 0.4722,
"step": 7300
},
{
"epoch": 0.09,
"grad_norm": 17.392757415771484,
"learning_rate": 6.71e-07,
"loss": 0.336,
"step": 7325
},
{
"epoch": 0.09,
"grad_norm": 25.665088653564453,
"learning_rate": 6.6475e-07,
"loss": 0.6683,
"step": 7350
},
{
"epoch": 0.09,
"grad_norm": 16.384979248046875,
"learning_rate": 6.584999999999999e-07,
"loss": 0.2941,
"step": 7375
},
{
"epoch": 0.09,
"grad_norm": 16.39459800720215,
"learning_rate": 6.5225e-07,
"loss": 0.5657,
"step": 7400
},
{
"epoch": 0.09,
"grad_norm": 11.207764625549316,
"learning_rate": 6.46e-07,
"loss": 0.2659,
"step": 7425
},
{
"epoch": 0.09,
"grad_norm": 30.415802001953125,
"learning_rate": 6.3975e-07,
"loss": 0.5069,
"step": 7450
},
{
"epoch": 0.09,
"grad_norm": 14.5722074508667,
"learning_rate": 6.335e-07,
"loss": 0.3173,
"step": 7475
},
{
"epoch": 0.09,
"grad_norm": 19.074825286865234,
"learning_rate": 6.272499999999999e-07,
"loss": 0.542,
"step": 7500
},
{
"epoch": 0.09,
"grad_norm": 15.403111457824707,
"learning_rate": 6.21e-07,
"loss": 0.2693,
"step": 7525
},
{
"epoch": 0.09,
"grad_norm": 28.799901962280273,
"learning_rate": 6.1475e-07,
"loss": 0.5837,
"step": 7550
},
{
"epoch": 0.09,
"grad_norm": 11.103520393371582,
"learning_rate": 6.085e-07,
"loss": 0.2405,
"step": 7575
},
{
"epoch": 0.09,
"grad_norm": 27.544315338134766,
"learning_rate": 6.022499999999999e-07,
"loss": 0.5857,
"step": 7600
},
{
"epoch": 0.09,
"grad_norm": 17.446556091308594,
"learning_rate": 5.96e-07,
"loss": 0.317,
"step": 7625
},
{
"epoch": 0.09,
"grad_norm": 26.29592514038086,
"learning_rate": 5.897499999999999e-07,
"loss": 0.5691,
"step": 7650
},
{
"epoch": 0.09,
"grad_norm": 16.815860748291016,
"learning_rate": 5.835e-07,
"loss": 0.3095,
"step": 7675
},
{
"epoch": 0.09,
"grad_norm": 25.93883514404297,
"learning_rate": 5.772500000000001e-07,
"loss": 0.6384,
"step": 7700
},
{
"epoch": 0.09,
"grad_norm": 19.090412139892578,
"learning_rate": 5.709999999999999e-07,
"loss": 0.2832,
"step": 7725
},
{
"epoch": 0.09,
"grad_norm": 26.75637435913086,
"learning_rate": 5.6475e-07,
"loss": 0.5051,
"step": 7750
},
{
"epoch": 0.09,
"grad_norm": 15.7750883102417,
"learning_rate": 5.584999999999999e-07,
"loss": 0.2899,
"step": 7775
},
{
"epoch": 0.09,
"grad_norm": 24.66639518737793,
"learning_rate": 5.5225e-07,
"loss": 0.5858,
"step": 7800
},
{
"epoch": 0.1,
"grad_norm": 12.055851936340332,
"learning_rate": 5.46e-07,
"loss": 0.2841,
"step": 7825
},
{
"epoch": 0.1,
"grad_norm": 24.859352111816406,
"learning_rate": 5.397499999999999e-07,
"loss": 0.5694,
"step": 7850
},
{
"epoch": 0.1,
"grad_norm": 10.433359146118164,
"learning_rate": 5.335e-07,
"loss": 0.2717,
"step": 7875
},
{
"epoch": 0.1,
"grad_norm": 22.402997970581055,
"learning_rate": 5.2725e-07,
"loss": 0.553,
"step": 7900
},
{
"epoch": 0.1,
"grad_norm": 19.177494049072266,
"learning_rate": 5.21e-07,
"loss": 0.2599,
"step": 7925
},
{
"epoch": 0.1,
"grad_norm": 31.914413452148438,
"learning_rate": 5.1475e-07,
"loss": 0.5566,
"step": 7950
},
{
"epoch": 0.1,
"grad_norm": 14.158437728881836,
"learning_rate": 5.085e-07,
"loss": 0.2437,
"step": 7975
},
{
"epoch": 0.1,
"grad_norm": 21.340044021606445,
"learning_rate": 5.022499999999999e-07,
"loss": 0.4632,
"step": 8000
},
{
"epoch": 0.1,
"eval_loss": 0.43929019570350647,
"eval_runtime": 5740.2852,
"eval_samples_per_second": 1.64,
"eval_steps_per_second": 0.205,
"eval_wer": 0.17359922493137414,
"step": 8000
},
{
"epoch": 0.1,
"grad_norm": 15.289112091064453,
"learning_rate": 4.96e-07,
"loss": 0.29,
"step": 8025
},
{
"epoch": 0.1,
"grad_norm": 22.75160789489746,
"learning_rate": 4.9e-07,
"loss": 0.5262,
"step": 8050
},
{
"epoch": 0.1,
"grad_norm": 30.43035125732422,
"learning_rate": 4.8375e-07,
"loss": 0.3062,
"step": 8075
},
{
"epoch": 0.1,
"grad_norm": 31.55746841430664,
"learning_rate": 4.775e-07,
"loss": 0.5132,
"step": 8100
},
{
"epoch": 0.1,
"grad_norm": 17.411041259765625,
"learning_rate": 4.7125e-07,
"loss": 0.2732,
"step": 8125
},
{
"epoch": 0.1,
"grad_norm": 11.30186653137207,
"learning_rate": 4.65e-07,
"loss": 0.5143,
"step": 8150
},
{
"epoch": 0.1,
"grad_norm": 15.796743392944336,
"learning_rate": 4.5874999999999995e-07,
"loss": 0.302,
"step": 8175
},
{
"epoch": 0.1,
"grad_norm": 23.819459915161133,
"learning_rate": 4.525e-07,
"loss": 0.5925,
"step": 8200
},
{
"epoch": 0.1,
"grad_norm": 18.643949508666992,
"learning_rate": 4.4624999999999996e-07,
"loss": 0.2842,
"step": 8225
},
{
"epoch": 0.1,
"grad_norm": 25.308483123779297,
"learning_rate": 4.3999999999999997e-07,
"loss": 0.5643,
"step": 8250
},
{
"epoch": 0.1,
"grad_norm": 11.329629898071289,
"learning_rate": 4.3375000000000003e-07,
"loss": 0.3107,
"step": 8275
},
{
"epoch": 0.1,
"grad_norm": 20.567607879638672,
"learning_rate": 4.275e-07,
"loss": 0.5414,
"step": 8300
},
{
"epoch": 0.1,
"grad_norm": 12.207862854003906,
"learning_rate": 4.2125e-07,
"loss": 0.2623,
"step": 8325
},
{
"epoch": 0.1,
"grad_norm": 18.38344955444336,
"learning_rate": 4.1499999999999994e-07,
"loss": 0.5246,
"step": 8350
},
{
"epoch": 0.1,
"grad_norm": 9.61295223236084,
"learning_rate": 4.0875e-07,
"loss": 0.3014,
"step": 8375
},
{
"epoch": 0.1,
"grad_norm": 24.633577346801758,
"learning_rate": 4.025e-07,
"loss": 0.5996,
"step": 8400
},
{
"epoch": 0.1,
"grad_norm": 8.710371971130371,
"learning_rate": 3.9624999999999996e-07,
"loss": 0.2369,
"step": 8425
},
{
"epoch": 0.1,
"grad_norm": 20.97711753845215,
"learning_rate": 3.8999999999999997e-07,
"loss": 0.5599,
"step": 8450
},
{
"epoch": 0.1,
"grad_norm": 8.884387016296387,
"learning_rate": 3.8375e-07,
"loss": 0.2391,
"step": 8475
},
{
"epoch": 0.1,
"grad_norm": 22.268924713134766,
"learning_rate": 3.775e-07,
"loss": 0.5217,
"step": 8500
},
{
"epoch": 0.1,
"grad_norm": 14.277334213256836,
"learning_rate": 3.7125e-07,
"loss": 0.3079,
"step": 8525
},
{
"epoch": 0.1,
"grad_norm": 30.062461853027344,
"learning_rate": 3.65e-07,
"loss": 0.5072,
"step": 8550
},
{
"epoch": 0.1,
"grad_norm": 18.741954803466797,
"learning_rate": 3.5875e-07,
"loss": 0.2697,
"step": 8575
},
{
"epoch": 0.1,
"grad_norm": 27.5913028717041,
"learning_rate": 3.5249999999999996e-07,
"loss": 0.5521,
"step": 8600
},
{
"epoch": 0.1,
"grad_norm": 10.070502281188965,
"learning_rate": 3.4624999999999997e-07,
"loss": 0.2877,
"step": 8625
},
{
"epoch": 0.11,
"grad_norm": 18.297412872314453,
"learning_rate": 3.4000000000000003e-07,
"loss": 0.5527,
"step": 8650
},
{
"epoch": 0.11,
"grad_norm": 11.771409034729004,
"learning_rate": 3.3375e-07,
"loss": 0.2221,
"step": 8675
},
{
"epoch": 0.11,
"grad_norm": 22.42823600769043,
"learning_rate": 3.275e-07,
"loss": 0.472,
"step": 8700
},
{
"epoch": 0.11,
"grad_norm": 17.16645050048828,
"learning_rate": 3.2124999999999994e-07,
"loss": 0.2754,
"step": 8725
},
{
"epoch": 0.11,
"grad_norm": 26.470958709716797,
"learning_rate": 3.15e-07,
"loss": 0.6017,
"step": 8750
},
{
"epoch": 0.11,
"grad_norm": 14.21453857421875,
"learning_rate": 3.0875e-07,
"loss": 0.2395,
"step": 8775
},
{
"epoch": 0.11,
"grad_norm": 13.669867515563965,
"learning_rate": 3.0249999999999996e-07,
"loss": 0.4772,
"step": 8800
},
{
"epoch": 0.11,
"grad_norm": 13.404635429382324,
"learning_rate": 2.9625e-07,
"loss": 0.2681,
"step": 8825
},
{
"epoch": 0.11,
"grad_norm": 32.027488708496094,
"learning_rate": 2.9e-07,
"loss": 0.591,
"step": 8850
},
{
"epoch": 0.11,
"grad_norm": 12.78139591217041,
"learning_rate": 2.8375e-07,
"loss": 0.2708,
"step": 8875
},
{
"epoch": 0.11,
"grad_norm": 19.808069229125977,
"learning_rate": 2.775e-07,
"loss": 0.5718,
"step": 8900
},
{
"epoch": 0.11,
"grad_norm": 8.049223899841309,
"learning_rate": 2.7125e-07,
"loss": 0.3105,
"step": 8925
},
{
"epoch": 0.11,
"grad_norm": 28.265857696533203,
"learning_rate": 2.65e-07,
"loss": 0.5694,
"step": 8950
},
{
"epoch": 0.11,
"grad_norm": 11.352721214294434,
"learning_rate": 2.5874999999999996e-07,
"loss": 0.2624,
"step": 8975
},
{
"epoch": 0.11,
"grad_norm": 26.757156372070312,
"learning_rate": 2.5249999999999996e-07,
"loss": 0.5656,
"step": 9000
},
{
"epoch": 0.11,
"grad_norm": 18.198179244995117,
"learning_rate": 2.4624999999999997e-07,
"loss": 0.288,
"step": 9025
},
{
"epoch": 0.11,
"grad_norm": 23.22308349609375,
"learning_rate": 2.4e-07,
"loss": 0.6548,
"step": 9050
},
{
"epoch": 0.11,
"grad_norm": 11.637288093566895,
"learning_rate": 2.3375e-07,
"loss": 0.2776,
"step": 9075
},
{
"epoch": 0.11,
"grad_norm": 29.29697036743164,
"learning_rate": 2.275e-07,
"loss": 0.5056,
"step": 9100
},
{
"epoch": 0.11,
"grad_norm": 17.896018981933594,
"learning_rate": 2.2125e-07,
"loss": 0.2556,
"step": 9125
},
{
"epoch": 0.11,
"grad_norm": 22.04491424560547,
"learning_rate": 2.1499999999999998e-07,
"loss": 0.5296,
"step": 9150
},
{
"epoch": 0.11,
"grad_norm": 15.423519134521484,
"learning_rate": 2.0874999999999999e-07,
"loss": 0.2784,
"step": 9175
},
{
"epoch": 0.11,
"grad_norm": 21.817243576049805,
"learning_rate": 2.025e-07,
"loss": 0.5813,
"step": 9200
},
{
"epoch": 0.11,
"grad_norm": 14.63697624206543,
"learning_rate": 1.9625e-07,
"loss": 0.2917,
"step": 9225
},
{
"epoch": 0.11,
"grad_norm": 22.647859573364258,
"learning_rate": 1.8999999999999998e-07,
"loss": 0.5685,
"step": 9250
},
{
"epoch": 0.11,
"grad_norm": 13.825846672058105,
"learning_rate": 1.8375e-07,
"loss": 0.2498,
"step": 9275
},
{
"epoch": 0.11,
"grad_norm": 32.008758544921875,
"learning_rate": 1.775e-07,
"loss": 0.5868,
"step": 9300
},
{
"epoch": 0.11,
"grad_norm": 16.272075653076172,
"learning_rate": 1.7125e-07,
"loss": 0.269,
"step": 9325
},
{
"epoch": 0.11,
"grad_norm": 18.543703079223633,
"learning_rate": 1.65e-07,
"loss": 0.5475,
"step": 9350
},
{
"epoch": 0.11,
"grad_norm": 16.70926284790039,
"learning_rate": 1.5875e-07,
"loss": 0.2535,
"step": 9375
},
{
"epoch": 0.11,
"grad_norm": 20.96632194519043,
"learning_rate": 1.525e-07,
"loss": 0.5715,
"step": 9400
},
{
"epoch": 0.11,
"grad_norm": 14.177762985229492,
"learning_rate": 1.4624999999999998e-07,
"loss": 0.2631,
"step": 9425
},
{
"epoch": 0.11,
"grad_norm": 22.189653396606445,
"learning_rate": 1.4e-07,
"loss": 0.5152,
"step": 9450
},
{
"epoch": 0.12,
"grad_norm": 16.282991409301758,
"learning_rate": 1.3375e-07,
"loss": 0.2671,
"step": 9475
},
{
"epoch": 0.12,
"grad_norm": 28.795602798461914,
"learning_rate": 1.275e-07,
"loss": 0.598,
"step": 9500
},
{
"epoch": 0.12,
"grad_norm": 19.492183685302734,
"learning_rate": 1.2125e-07,
"loss": 0.2908,
"step": 9525
},
{
"epoch": 0.12,
"grad_norm": 23.78717613220215,
"learning_rate": 1.15e-07,
"loss": 0.4983,
"step": 9550
},
{
"epoch": 0.12,
"grad_norm": 11.744542121887207,
"learning_rate": 1.0874999999999999e-07,
"loss": 0.2812,
"step": 9575
},
{
"epoch": 0.12,
"grad_norm": 28.317659378051758,
"learning_rate": 1.0249999999999998e-07,
"loss": 0.6472,
"step": 9600
},
{
"epoch": 0.12,
"grad_norm": 17.57823944091797,
"learning_rate": 9.624999999999999e-08,
"loss": 0.254,
"step": 9625
},
{
"epoch": 0.12,
"grad_norm": 24.610986709594727,
"learning_rate": 9e-08,
"loss": 0.5404,
"step": 9650
},
{
"epoch": 0.12,
"grad_norm": 15.964032173156738,
"learning_rate": 8.375e-08,
"loss": 0.2462,
"step": 9675
},
{
"epoch": 0.12,
"grad_norm": 21.19764518737793,
"learning_rate": 7.75e-08,
"loss": 0.6009,
"step": 9700
},
{
"epoch": 0.12,
"grad_norm": 15.994060516357422,
"learning_rate": 7.124999999999999e-08,
"loss": 0.2784,
"step": 9725
},
{
"epoch": 0.12,
"grad_norm": 22.544214248657227,
"learning_rate": 6.5e-08,
"loss": 0.5595,
"step": 9750
},
{
"epoch": 0.12,
"grad_norm": 15.244584083557129,
"learning_rate": 5.8749999999999993e-08,
"loss": 0.257,
"step": 9775
},
{
"epoch": 0.12,
"grad_norm": 19.461477279663086,
"learning_rate": 5.2499999999999994e-08,
"loss": 0.5941,
"step": 9800
},
{
"epoch": 0.12,
"grad_norm": 9.137640953063965,
"learning_rate": 4.625e-08,
"loss": 0.2462,
"step": 9825
},
{
"epoch": 0.12,
"grad_norm": 16.050039291381836,
"learning_rate": 4e-08,
"loss": 0.5579,
"step": 9850
},
{
"epoch": 0.12,
"grad_norm": 12.063644409179688,
"learning_rate": 3.375e-08,
"loss": 0.2443,
"step": 9875
},
{
"epoch": 0.12,
"grad_norm": 19.16944122314453,
"learning_rate": 2.7499999999999998e-08,
"loss": 0.4084,
"step": 9900
},
{
"epoch": 0.12,
"grad_norm": 13.46724796295166,
"learning_rate": 2.1250000000000002e-08,
"loss": 0.2948,
"step": 9925
},
{
"epoch": 0.12,
"grad_norm": 23.261869430541992,
"learning_rate": 1.5e-08,
"loss": 0.5243,
"step": 9950
},
{
"epoch": 0.12,
"grad_norm": 17.960269927978516,
"learning_rate": 8.75e-09,
"loss": 0.227,
"step": 9975
},
{
"epoch": 0.12,
"grad_norm": 25.144319534301758,
"learning_rate": 2.5e-09,
"loss": 0.5151,
"step": 10000
},
{
"epoch": 0.12,
"eval_loss": 0.44570982456207275,
"eval_runtime": 5594.932,
"eval_samples_per_second": 1.683,
"eval_steps_per_second": 0.21,
"eval_wer": 0.1700952688519296,
"step": 10000
},
{
"epoch": 0.12,
"grad_norm": Infinity,
"learning_rate": 7.133571428571429e-07,
"loss": 0.2487,
"step": 10025
},
{
"epoch": 0.12,
"grad_norm": Infinity,
"learning_rate": 7.116428571428571e-07,
"loss": 0.4951,
"step": 10050
},
{
"epoch": 0.12,
"grad_norm": 18.395633697509766,
"learning_rate": 7.098571428571429e-07,
"loss": 0.2762,
"step": 10075
},
{
"epoch": 0.12,
"grad_norm": 14.118087768554688,
"learning_rate": 7.080714285714285e-07,
"loss": 0.5376,
"step": 10100
},
{
"epoch": 0.12,
"grad_norm": 15.895506858825684,
"learning_rate": 7.062857142857142e-07,
"loss": 0.2819,
"step": 10125
},
{
"epoch": 0.12,
"grad_norm": 26.209333419799805,
"learning_rate": 7.045e-07,
"loss": 0.5038,
"step": 10150
},
{
"epoch": 0.12,
"grad_norm": 9.421802520751953,
"learning_rate": 7.027142857142857e-07,
"loss": 0.2401,
"step": 10175
},
{
"epoch": 0.12,
"grad_norm": 35.024810791015625,
"learning_rate": 7.009285714285714e-07,
"loss": 0.5469,
"step": 10200
},
{
"epoch": 0.12,
"grad_norm": 8.198253631591797,
"learning_rate": 6.991428571428571e-07,
"loss": 0.3045,
"step": 10225
},
{
"epoch": 0.12,
"grad_norm": 23.263959884643555,
"learning_rate": 6.973571428571429e-07,
"loss": 0.5404,
"step": 10250
},
{
"epoch": 0.12,
"grad_norm": 24.623031616210938,
"learning_rate": 6.955714285714286e-07,
"loss": 0.2843,
"step": 10275
},
{
"epoch": 0.13,
"grad_norm": 16.614179611206055,
"learning_rate": 6.937857142857143e-07,
"loss": 0.5677,
"step": 10300
},
{
"epoch": 0.13,
"grad_norm": 13.380021095275879,
"learning_rate": 6.919999999999999e-07,
"loss": 0.2898,
"step": 10325
},
{
"epoch": 0.13,
"grad_norm": 20.24687957763672,
"learning_rate": 6.902142857142856e-07,
"loss": 0.5123,
"step": 10350
},
{
"epoch": 0.13,
"grad_norm": 15.209970474243164,
"learning_rate": 6.884285714285714e-07,
"loss": 0.2592,
"step": 10375
},
{
"epoch": 0.13,
"grad_norm": 25.526763916015625,
"learning_rate": 6.86642857142857e-07,
"loss": 0.4767,
"step": 10400
},
{
"epoch": 0.13,
"grad_norm": 13.861222267150879,
"learning_rate": 6.848571428571428e-07,
"loss": 0.3133,
"step": 10425
},
{
"epoch": 0.13,
"grad_norm": 21.006370544433594,
"learning_rate": 6.830714285714285e-07,
"loss": 0.4938,
"step": 10450
},
{
"epoch": 0.13,
"grad_norm": 19.099258422851562,
"learning_rate": 6.812857142857143e-07,
"loss": 0.2615,
"step": 10475
},
{
"epoch": 0.13,
"grad_norm": 23.394371032714844,
"learning_rate": 6.794999999999999e-07,
"loss": 0.5587,
"step": 10500
},
{
"epoch": 0.13,
"grad_norm": 17.32550048828125,
"learning_rate": 6.777142857142857e-07,
"loss": 0.2864,
"step": 10525
},
{
"epoch": 0.13,
"grad_norm": 21.120290756225586,
"learning_rate": 6.759285714285714e-07,
"loss": 0.5864,
"step": 10550
},
{
"epoch": 0.13,
"grad_norm": 13.446245193481445,
"learning_rate": 6.741428571428572e-07,
"loss": 0.2657,
"step": 10575
},
{
"epoch": 0.13,
"grad_norm": 20.756250381469727,
"learning_rate": 6.723571428571429e-07,
"loss": 0.5236,
"step": 10600
},
{
"epoch": 0.13,
"grad_norm": 19.133296966552734,
"learning_rate": 6.705714285714285e-07,
"loss": 0.3078,
"step": 10625
},
{
"epoch": 0.13,
"grad_norm": 31.154388427734375,
"learning_rate": 6.687857142857143e-07,
"loss": 0.5568,
"step": 10650
},
{
"epoch": 0.13,
"grad_norm": 16.301631927490234,
"learning_rate": 6.67e-07,
"loss": 0.2456,
"step": 10675
},
{
"epoch": 0.13,
"grad_norm": 27.033632278442383,
"learning_rate": 6.652142857142858e-07,
"loss": 0.5484,
"step": 10700
},
{
"epoch": 0.13,
"grad_norm": 5.143960952758789,
"learning_rate": 6.634285714285714e-07,
"loss": 0.2738,
"step": 10725
},
{
"epoch": 0.13,
"grad_norm": 24.503541946411133,
"learning_rate": 6.616428571428571e-07,
"loss": 0.5612,
"step": 10750
},
{
"epoch": 0.13,
"grad_norm": 19.447397232055664,
"learning_rate": 6.598571428571428e-07,
"loss": 0.2954,
"step": 10775
},
{
"epoch": 0.13,
"grad_norm": 19.933080673217773,
"learning_rate": 6.580714285714286e-07,
"loss": 0.5517,
"step": 10800
},
{
"epoch": 0.13,
"grad_norm": 18.4548282623291,
"learning_rate": 6.562857142857142e-07,
"loss": 0.2887,
"step": 10825
},
{
"epoch": 0.13,
"grad_norm": 27.120908737182617,
"learning_rate": 6.544999999999999e-07,
"loss": 0.5242,
"step": 10850
},
{
"epoch": 0.13,
"grad_norm": 11.658120155334473,
"learning_rate": 6.527142857142857e-07,
"loss": 0.2301,
"step": 10875
},
{
"epoch": 0.13,
"grad_norm": 21.553525924682617,
"learning_rate": 6.509285714285714e-07,
"loss": 0.5728,
"step": 10900
},
{
"epoch": 0.13,
"grad_norm": 16.63558006286621,
"learning_rate": 6.491428571428571e-07,
"loss": 0.2946,
"step": 10925
},
{
"epoch": 0.13,
"grad_norm": 21.972610473632812,
"learning_rate": 6.473571428571428e-07,
"loss": 0.5271,
"step": 10950
},
{
"epoch": 0.13,
"grad_norm": 12.130209922790527,
"learning_rate": 6.455714285714286e-07,
"loss": 0.236,
"step": 10975
},
{
"epoch": 0.13,
"grad_norm": 21.220565795898438,
"learning_rate": 6.437857142857143e-07,
"loss": 0.4897,
"step": 11000
},
{
"epoch": 0.13,
"grad_norm": 16.87418556213379,
"learning_rate": 6.42e-07,
"loss": 0.3331,
"step": 11025
},
{
"epoch": 0.13,
"grad_norm": 37.632633209228516,
"learning_rate": 6.402142857142857e-07,
"loss": 0.5686,
"step": 11050
},
{
"epoch": 0.13,
"grad_norm": 22.07012176513672,
"learning_rate": 6.384285714285714e-07,
"loss": 0.2804,
"step": 11075
},
{
"epoch": 0.13,
"grad_norm": 28.006338119506836,
"learning_rate": 6.366428571428572e-07,
"loss": 0.6237,
"step": 11100
},
{
"epoch": 0.14,
"grad_norm": 11.953436851501465,
"learning_rate": 6.348571428571428e-07,
"loss": 0.2531,
"step": 11125
},
{
"epoch": 0.14,
"grad_norm": 18.82573699951172,
"learning_rate": 6.330714285714286e-07,
"loss": 0.5206,
"step": 11150
},
{
"epoch": 0.14,
"grad_norm": 15.685461044311523,
"learning_rate": 6.312857142857143e-07,
"loss": 0.2804,
"step": 11175
},
{
"epoch": 0.14,
"grad_norm": 20.35894775390625,
"learning_rate": 6.295e-07,
"loss": 0.5052,
"step": 11200
},
{
"epoch": 0.14,
"grad_norm": 10.60390567779541,
"learning_rate": 6.277142857142856e-07,
"loss": 0.2865,
"step": 11225
},
{
"epoch": 0.14,
"grad_norm": 29.205368041992188,
"learning_rate": 6.259285714285714e-07,
"loss": 0.5974,
"step": 11250
},
{
"epoch": 0.14,
"grad_norm": 17.855728149414062,
"learning_rate": 6.241428571428571e-07,
"loss": 0.3193,
"step": 11275
},
{
"epoch": 0.14,
"grad_norm": 24.602779388427734,
"learning_rate": 6.223571428571428e-07,
"loss": 0.4878,
"step": 11300
},
{
"epoch": 0.14,
"grad_norm": 18.369998931884766,
"learning_rate": 6.205714285714285e-07,
"loss": 0.2899,
"step": 11325
},
{
"epoch": 0.14,
"grad_norm": Infinity,
"learning_rate": 6.188571428571429e-07,
"loss": 0.474,
"step": 11350
},
{
"epoch": 0.14,
"grad_norm": 12.755717277526855,
"learning_rate": 6.170714285714285e-07,
"loss": 0.2261,
"step": 11375
},
{
"epoch": 0.14,
"grad_norm": 25.792221069335938,
"learning_rate": 6.152857142857143e-07,
"loss": 0.5289,
"step": 11400
},
{
"epoch": 0.14,
"grad_norm": 12.499135971069336,
"learning_rate": 6.135e-07,
"loss": 0.3456,
"step": 11425
},
{
"epoch": 0.14,
"grad_norm": 23.764741897583008,
"learning_rate": 6.117142857142858e-07,
"loss": 0.4705,
"step": 11450
},
{
"epoch": 0.14,
"grad_norm": 15.811933517456055,
"learning_rate": 6.099285714285713e-07,
"loss": 0.2818,
"step": 11475
},
{
"epoch": 0.14,
"grad_norm": 27.48798179626465,
"learning_rate": 6.081428571428571e-07,
"loss": 0.4752,
"step": 11500
},
{
"epoch": 0.14,
"grad_norm": 29.260459899902344,
"learning_rate": 6.063571428571428e-07,
"loss": 0.3103,
"step": 11525
},
{
"epoch": 0.14,
"grad_norm": 24.313169479370117,
"learning_rate": 6.045714285714286e-07,
"loss": 0.5345,
"step": 11550
},
{
"epoch": 0.14,
"grad_norm": 14.812594413757324,
"learning_rate": 6.027857142857142e-07,
"loss": 0.2789,
"step": 11575
},
{
"epoch": 0.14,
"grad_norm": 19.387434005737305,
"learning_rate": 6.009999999999999e-07,
"loss": 0.5909,
"step": 11600
},
{
"epoch": 0.14,
"grad_norm": 6.080020904541016,
"learning_rate": 5.992142857142857e-07,
"loss": 0.2259,
"step": 11625
},
{
"epoch": 0.14,
"grad_norm": 23.574949264526367,
"learning_rate": 5.974285714285714e-07,
"loss": 0.5457,
"step": 11650
},
{
"epoch": 0.14,
"grad_norm": 13.463486671447754,
"learning_rate": 5.956428571428571e-07,
"loss": 0.2709,
"step": 11675
},
{
"epoch": 0.14,
"grad_norm": 28.031545639038086,
"learning_rate": 5.938571428571428e-07,
"loss": 0.5709,
"step": 11700
},
{
"epoch": 0.14,
"grad_norm": 16.419267654418945,
"learning_rate": 5.920714285714286e-07,
"loss": 0.2395,
"step": 11725
},
{
"epoch": 0.14,
"grad_norm": 24.46676254272461,
"learning_rate": 5.902857142857143e-07,
"loss": 0.4678,
"step": 11750
},
{
"epoch": 0.14,
"grad_norm": 13.469151496887207,
"learning_rate": 5.885e-07,
"loss": 0.2192,
"step": 11775
},
{
"epoch": 0.14,
"grad_norm": 22.810352325439453,
"learning_rate": 5.867142857142857e-07,
"loss": 0.5398,
"step": 11800
},
{
"epoch": 0.14,
"grad_norm": 12.745511054992676,
"learning_rate": 5.849285714285714e-07,
"loss": 0.3098,
"step": 11825
},
{
"epoch": 0.14,
"grad_norm": 18.4826602935791,
"learning_rate": 5.831428571428572e-07,
"loss": 0.5599,
"step": 11850
},
{
"epoch": 0.14,
"grad_norm": 11.836509704589844,
"learning_rate": 5.813571428571428e-07,
"loss": 0.2207,
"step": 11875
},
{
"epoch": 0.14,
"grad_norm": 21.229785919189453,
"learning_rate": 5.795714285714286e-07,
"loss": 0.5776,
"step": 11900
},
{
"epoch": 0.14,
"grad_norm": 11.208019256591797,
"learning_rate": 5.777857142857142e-07,
"loss": 0.247,
"step": 11925
},
{
"epoch": 0.15,
"grad_norm": 25.267257690429688,
"learning_rate": 5.76e-07,
"loss": 0.4998,
"step": 11950
},
{
"epoch": 0.15,
"grad_norm": 14.406000137329102,
"learning_rate": 5.742142857142856e-07,
"loss": 0.2457,
"step": 11975
},
{
"epoch": 0.15,
"grad_norm": 21.350296020507812,
"learning_rate": 5.724285714285714e-07,
"loss": 0.5158,
"step": 12000
},
{
"epoch": 0.15,
"eval_loss": 0.43139249086380005,
"eval_runtime": 6062.945,
"eval_samples_per_second": 1.553,
"eval_steps_per_second": 0.194,
"eval_wer": 0.1670757306636525,
"step": 12000
},
{
"epoch": 0.15,
"grad_norm": 10.665511131286621,
"learning_rate": 5.706428571428571e-07,
"loss": 0.3042,
"step": 12025
},
{
"epoch": 0.15,
"grad_norm": 20.711769104003906,
"learning_rate": 5.688571428571428e-07,
"loss": 0.5473,
"step": 12050
},
{
"epoch": 0.15,
"grad_norm": 15.992523193359375,
"learning_rate": 5.670714285714285e-07,
"loss": 0.2341,
"step": 12075
},
{
"epoch": 0.15,
"grad_norm": 22.357173919677734,
"learning_rate": 5.652857142857142e-07,
"loss": 0.5332,
"step": 12100
},
{
"epoch": 0.15,
"grad_norm": 12.882672309875488,
"learning_rate": 5.635e-07,
"loss": 0.295,
"step": 12125
},
{
"epoch": 0.15,
"grad_norm": 19.411699295043945,
"learning_rate": 5.617142857142857e-07,
"loss": 0.5108,
"step": 12150
},
{
"epoch": 0.15,
"grad_norm": 11.176936149597168,
"learning_rate": 5.599285714285714e-07,
"loss": 0.2256,
"step": 12175
},
{
"epoch": 0.15,
"grad_norm": 21.744428634643555,
"learning_rate": 5.581428571428571e-07,
"loss": 0.5773,
"step": 12200
},
{
"epoch": 0.15,
"grad_norm": 14.22724723815918,
"learning_rate": 5.563571428571429e-07,
"loss": 0.2695,
"step": 12225
},
{
"epoch": 0.15,
"grad_norm": 38.878807067871094,
"learning_rate": 5.545714285714286e-07,
"loss": 0.5021,
"step": 12250
},
{
"epoch": 0.15,
"grad_norm": 18.826614379882812,
"learning_rate": 5.527857142857143e-07,
"loss": 0.2635,
"step": 12275
},
{
"epoch": 0.15,
"grad_norm": 14.811022758483887,
"learning_rate": 5.51e-07,
"loss": 0.543,
"step": 12300
},
{
"epoch": 0.15,
"grad_norm": 9.75754165649414,
"learning_rate": 5.492142857142857e-07,
"loss": 0.3347,
"step": 12325
},
{
"epoch": 0.15,
"grad_norm": 25.858911514282227,
"learning_rate": 5.474285714285714e-07,
"loss": 0.5124,
"step": 12350
},
{
"epoch": 0.15,
"grad_norm": 10.652257919311523,
"learning_rate": 5.45642857142857e-07,
"loss": 0.2577,
"step": 12375
},
{
"epoch": 0.15,
"grad_norm": 29.380006790161133,
"learning_rate": 5.438571428571428e-07,
"loss": 0.534,
"step": 12400
},
{
"epoch": 0.15,
"grad_norm": 11.318015098571777,
"learning_rate": 5.420714285714285e-07,
"loss": 0.2509,
"step": 12425
},
{
"epoch": 0.15,
"grad_norm": 25.717960357666016,
"learning_rate": 5.402857142857143e-07,
"loss": 0.5379,
"step": 12450
},
{
"epoch": 0.15,
"grad_norm": 20.083452224731445,
"learning_rate": 5.384999999999999e-07,
"loss": 0.2902,
"step": 12475
},
{
"epoch": 0.15,
"grad_norm": 23.3952579498291,
"learning_rate": 5.367142857142857e-07,
"loss": 0.5801,
"step": 12500
},
{
"epoch": 0.15,
"grad_norm": 9.673083305358887,
"learning_rate": 5.349285714285714e-07,
"loss": 0.2961,
"step": 12525
},
{
"epoch": 0.15,
"grad_norm": 21.045778274536133,
"learning_rate": 5.331428571428571e-07,
"loss": 0.5459,
"step": 12550
},
{
"epoch": 0.15,
"grad_norm": 15.67827320098877,
"learning_rate": 5.313571428571428e-07,
"loss": 0.2613,
"step": 12575
},
{
"epoch": 0.15,
"grad_norm": 29.011518478393555,
"learning_rate": 5.295714285714285e-07,
"loss": 0.5127,
"step": 12600
},
{
"epoch": 0.15,
"grad_norm": 9.100702285766602,
"learning_rate": 5.277857142857143e-07,
"loss": 0.2787,
"step": 12625
},
{
"epoch": 0.15,
"grad_norm": 26.210891723632812,
"learning_rate": 5.26e-07,
"loss": 0.6275,
"step": 12650
},
{
"epoch": 0.15,
"grad_norm": 14.144155502319336,
"learning_rate": 5.242142857142858e-07,
"loss": 0.2381,
"step": 12675
},
{
"epoch": 0.15,
"grad_norm": 29.32146644592285,
"learning_rate": 5.224285714285714e-07,
"loss": 0.5048,
"step": 12700
},
{
"epoch": 0.15,
"grad_norm": 15.683405876159668,
"learning_rate": 5.206428571428572e-07,
"loss": 0.3016,
"step": 12725
},
{
"epoch": 0.15,
"grad_norm": 25.86359214782715,
"learning_rate": 5.188571428571429e-07,
"loss": 0.5358,
"step": 12750
},
{
"epoch": 0.16,
"grad_norm": 10.712611198425293,
"learning_rate": 5.170714285714287e-07,
"loss": 0.242,
"step": 12775
},
{
"epoch": 0.16,
"grad_norm": 14.679198265075684,
"learning_rate": 5.152857142857142e-07,
"loss": 0.5162,
"step": 12800
},
{
"epoch": 0.16,
"grad_norm": 16.717084884643555,
"learning_rate": 5.134999999999999e-07,
"loss": 0.2672,
"step": 12825
},
{
"epoch": 0.16,
"grad_norm": 18.414783477783203,
"learning_rate": 5.117142857142857e-07,
"loss": 0.5164,
"step": 12850
},
{
"epoch": 0.16,
"grad_norm": 9.667854309082031,
"learning_rate": 5.099285714285714e-07,
"loss": 0.2618,
"step": 12875
},
{
"epoch": 0.16,
"grad_norm": 31.132843017578125,
"learning_rate": 5.081428571428571e-07,
"loss": 0.4941,
"step": 12900
},
{
"epoch": 0.16,
"grad_norm": 14.9796781539917,
"learning_rate": 5.063571428571428e-07,
"loss": 0.2561,
"step": 12925
},
{
"epoch": 0.16,
"grad_norm": 24.235136032104492,
"learning_rate": 5.045714285714286e-07,
"loss": 0.541,
"step": 12950
},
{
"epoch": 0.16,
"grad_norm": 11.6466064453125,
"learning_rate": 5.027857142857143e-07,
"loss": 0.2448,
"step": 12975
},
{
"epoch": 0.16,
"grad_norm": 25.600833892822266,
"learning_rate": 5.009999999999999e-07,
"loss": 0.535,
"step": 13000
},
{
"epoch": 0.16,
"grad_norm": 19.636394500732422,
"learning_rate": 4.992142857142857e-07,
"loss": 0.2688,
"step": 13025
},
{
"epoch": 0.16,
"grad_norm": 20.44740867614746,
"learning_rate": 4.974285714285714e-07,
"loss": 0.4732,
"step": 13050
},
{
"epoch": 0.16,
"grad_norm": 17.387100219726562,
"learning_rate": 4.956428571428572e-07,
"loss": 0.2469,
"step": 13075
},
{
"epoch": 0.16,
"grad_norm": 24.79920196533203,
"learning_rate": 4.938571428571428e-07,
"loss": 0.4996,
"step": 13100
},
{
"epoch": 0.16,
"grad_norm": 16.365625381469727,
"learning_rate": 4.920714285714286e-07,
"loss": 0.2572,
"step": 13125
},
{
"epoch": 0.16,
"grad_norm": 16.618408203125,
"learning_rate": 4.902857142857142e-07,
"loss": 0.5222,
"step": 13150
},
{
"epoch": 0.16,
"grad_norm": 11.194711685180664,
"learning_rate": 4.885e-07,
"loss": 0.291,
"step": 13175
},
{
"epoch": 0.16,
"grad_norm": 23.214378356933594,
"learning_rate": 4.867142857142857e-07,
"loss": 0.4937,
"step": 13200
},
{
"epoch": 0.16,
"grad_norm": 19.738113403320312,
"learning_rate": 4.849285714285715e-07,
"loss": 0.2994,
"step": 13225
},
{
"epoch": 0.16,
"grad_norm": 23.572124481201172,
"learning_rate": 4.831428571428571e-07,
"loss": 0.512,
"step": 13250
},
{
"epoch": 0.16,
"grad_norm": 17.797889709472656,
"learning_rate": 4.813571428571428e-07,
"loss": 0.2503,
"step": 13275
},
{
"epoch": 0.16,
"grad_norm": 19.600574493408203,
"learning_rate": 4.795714285714286e-07,
"loss": 0.498,
"step": 13300
},
{
"epoch": 0.16,
"grad_norm": 16.852575302124023,
"learning_rate": 4.777857142857142e-07,
"loss": 0.2647,
"step": 13325
},
{
"epoch": 0.16,
"grad_norm": 22.06913185119629,
"learning_rate": 4.76e-07,
"loss": 0.5222,
"step": 13350
},
{
"epoch": 0.16,
"grad_norm": 11.366828918457031,
"learning_rate": 4.7421428571428567e-07,
"loss": 0.2006,
"step": 13375
},
{
"epoch": 0.16,
"grad_norm": 26.567928314208984,
"learning_rate": 4.724285714285714e-07,
"loss": 0.6113,
"step": 13400
},
{
"epoch": 0.16,
"grad_norm": 17.440595626831055,
"learning_rate": 4.706428571428571e-07,
"loss": 0.2967,
"step": 13425
},
{
"epoch": 0.16,
"grad_norm": 32.105918884277344,
"learning_rate": 4.689285714285714e-07,
"loss": 0.4915,
"step": 13450
},
{
"epoch": 0.16,
"grad_norm": 15.742903709411621,
"learning_rate": 4.671428571428571e-07,
"loss": 0.2735,
"step": 13475
},
{
"epoch": 0.16,
"grad_norm": 16.67759895324707,
"learning_rate": 4.6535714285714286e-07,
"loss": 0.5659,
"step": 13500
},
{
"epoch": 0.16,
"grad_norm": 11.289206504821777,
"learning_rate": 4.6357142857142855e-07,
"loss": 0.2511,
"step": 13525
},
{
"epoch": 0.16,
"grad_norm": 24.552431106567383,
"learning_rate": 4.617857142857143e-07,
"loss": 0.5276,
"step": 13550
},
{
"epoch": 0.17,
"grad_norm": 16.458602905273438,
"learning_rate": 4.6e-07,
"loss": 0.2565,
"step": 13575
},
{
"epoch": 0.17,
"grad_norm": 20.243223190307617,
"learning_rate": 4.5821428571428574e-07,
"loss": 0.5313,
"step": 13600
},
{
"epoch": 0.17,
"grad_norm": 9.613998413085938,
"learning_rate": 4.564285714285714e-07,
"loss": 0.2872,
"step": 13625
},
{
"epoch": 0.17,
"grad_norm": 28.92829132080078,
"learning_rate": 4.546428571428571e-07,
"loss": 0.5673,
"step": 13650
},
{
"epoch": 0.17,
"grad_norm": 14.831592559814453,
"learning_rate": 4.528571428571428e-07,
"loss": 0.251,
"step": 13675
},
{
"epoch": 0.17,
"grad_norm": 29.575359344482422,
"learning_rate": 4.5107142857142856e-07,
"loss": 0.5256,
"step": 13700
},
{
"epoch": 0.17,
"grad_norm": 15.341351509094238,
"learning_rate": 4.4928571428571426e-07,
"loss": 0.2483,
"step": 13725
},
{
"epoch": 0.17,
"grad_norm": 31.83696174621582,
"learning_rate": 4.475e-07,
"loss": 0.5649,
"step": 13750
},
{
"epoch": 0.17,
"grad_norm": 15.396077156066895,
"learning_rate": 4.457142857142857e-07,
"loss": 0.3735,
"step": 13775
},
{
"epoch": 0.17,
"grad_norm": 23.51662826538086,
"learning_rate": 4.4392857142857144e-07,
"loss": 0.5416,
"step": 13800
},
{
"epoch": 0.17,
"grad_norm": 11.324971199035645,
"learning_rate": 4.421428571428571e-07,
"loss": 0.2627,
"step": 13825
},
{
"epoch": 0.17,
"grad_norm": 24.304067611694336,
"learning_rate": 4.4035714285714283e-07,
"loss": 0.5778,
"step": 13850
},
{
"epoch": 0.17,
"grad_norm": 16.256675720214844,
"learning_rate": 4.385714285714285e-07,
"loss": 0.2809,
"step": 13875
},
{
"epoch": 0.17,
"grad_norm": 20.40346908569336,
"learning_rate": 4.3678571428571427e-07,
"loss": 0.4767,
"step": 13900
},
{
"epoch": 0.17,
"grad_norm": 15.67408275604248,
"learning_rate": 4.3499999999999996e-07,
"loss": 0.2654,
"step": 13925
},
{
"epoch": 0.17,
"grad_norm": 23.163921356201172,
"learning_rate": 4.332142857142857e-07,
"loss": 0.5091,
"step": 13950
},
{
"epoch": 0.17,
"grad_norm": 17.081083297729492,
"learning_rate": 4.314285714285714e-07,
"loss": 0.2929,
"step": 13975
},
{
"epoch": 0.17,
"grad_norm": 26.59824562072754,
"learning_rate": 4.2964285714285715e-07,
"loss": 0.55,
"step": 14000
},
{
"epoch": 0.17,
"eval_loss": 0.45657408237457275,
"eval_runtime": 6018.009,
"eval_samples_per_second": 1.564,
"eval_steps_per_second": 0.196,
"eval_wer": 0.1674794122396254,
"step": 14000
},
{
"epoch": 0.17,
"grad_norm": 14.647737503051758,
"learning_rate": 4.2785714285714284e-07,
"loss": 0.2821,
"step": 14025
},
{
"epoch": 0.17,
"grad_norm": 22.898862838745117,
"learning_rate": 4.2607142857142854e-07,
"loss": 0.4583,
"step": 14050
},
{
"epoch": 0.17,
"grad_norm": 8.966913223266602,
"learning_rate": 4.2428571428571423e-07,
"loss": 0.283,
"step": 14075
},
{
"epoch": 0.17,
"grad_norm": 16.121633529663086,
"learning_rate": 4.225e-07,
"loss": 0.5825,
"step": 14100
},
{
"epoch": 0.17,
"grad_norm": 13.079733848571777,
"learning_rate": 4.2071428571428567e-07,
"loss": 0.2593,
"step": 14125
},
{
"epoch": 0.17,
"grad_norm": 17.657615661621094,
"learning_rate": 4.189285714285714e-07,
"loss": 0.5296,
"step": 14150
},
{
"epoch": 0.17,
"grad_norm": 16.596885681152344,
"learning_rate": 4.171428571428571e-07,
"loss": 0.2974,
"step": 14175
},
{
"epoch": 0.17,
"grad_norm": 20.227094650268555,
"learning_rate": 4.1535714285714286e-07,
"loss": 0.4874,
"step": 14200
},
{
"epoch": 0.17,
"grad_norm": 11.48043441772461,
"learning_rate": 4.1357142857142855e-07,
"loss": 0.2495,
"step": 14225
},
{
"epoch": 0.17,
"grad_norm": 27.15212631225586,
"learning_rate": 4.117857142857143e-07,
"loss": 0.4958,
"step": 14250
},
{
"epoch": 0.17,
"grad_norm": 13.044829368591309,
"learning_rate": 4.0999999999999994e-07,
"loss": 0.319,
"step": 14275
},
{
"epoch": 0.17,
"grad_norm": 25.95546531677246,
"learning_rate": 4.082142857142857e-07,
"loss": 0.5586,
"step": 14300
},
{
"epoch": 0.17,
"grad_norm": 16.749534606933594,
"learning_rate": 4.064285714285714e-07,
"loss": 0.2233,
"step": 14325
},
{
"epoch": 0.17,
"grad_norm": 17.384183883666992,
"learning_rate": 4.046428571428571e-07,
"loss": 0.5221,
"step": 14350
},
{
"epoch": 0.17,
"grad_norm": 23.787689208984375,
"learning_rate": 4.028571428571428e-07,
"loss": 0.2446,
"step": 14375
},
{
"epoch": 0.18,
"grad_norm": 23.294313430786133,
"learning_rate": 4.0107142857142857e-07,
"loss": 0.4733,
"step": 14400
},
{
"epoch": 0.18,
"grad_norm": 12.99344253540039,
"learning_rate": 3.9928571428571426e-07,
"loss": 0.2601,
"step": 14425
},
{
"epoch": 0.18,
"grad_norm": 27.727008819580078,
"learning_rate": 3.975e-07,
"loss": 0.5488,
"step": 14450
},
{
"epoch": 0.18,
"grad_norm": 13.73043441772461,
"learning_rate": 3.957142857142857e-07,
"loss": 0.2472,
"step": 14475
},
{
"epoch": 0.18,
"grad_norm": 22.068260192871094,
"learning_rate": 3.939285714285714e-07,
"loss": 0.4978,
"step": 14500
},
{
"epoch": 0.18,
"grad_norm": 11.672805786132812,
"learning_rate": 3.921428571428571e-07,
"loss": 0.243,
"step": 14525
},
{
"epoch": 0.18,
"grad_norm": 19.580429077148438,
"learning_rate": 3.9035714285714283e-07,
"loss": 0.4744,
"step": 14550
},
{
"epoch": 0.18,
"grad_norm": 12.825048446655273,
"learning_rate": 3.8857142857142853e-07,
"loss": 0.2659,
"step": 14575
},
{
"epoch": 0.18,
"grad_norm": 24.486330032348633,
"learning_rate": 3.8678571428571427e-07,
"loss": 0.512,
"step": 14600
},
{
"epoch": 0.18,
"grad_norm": 17.93408203125,
"learning_rate": 3.8499999999999997e-07,
"loss": 0.267,
"step": 14625
},
{
"epoch": 0.18,
"grad_norm": 23.19489288330078,
"learning_rate": 3.832142857142857e-07,
"loss": 0.4564,
"step": 14650
},
{
"epoch": 0.18,
"grad_norm": 11.471485137939453,
"learning_rate": 3.814285714285714e-07,
"loss": 0.2921,
"step": 14675
},
{
"epoch": 0.18,
"grad_norm": 21.568504333496094,
"learning_rate": 3.796428571428571e-07,
"loss": 0.4536,
"step": 14700
},
{
"epoch": 0.18,
"grad_norm": 20.54655647277832,
"learning_rate": 3.778571428571428e-07,
"loss": 0.2324,
"step": 14725
},
{
"epoch": 0.18,
"grad_norm": 23.148265838623047,
"learning_rate": 3.7607142857142854e-07,
"loss": 0.5221,
"step": 14750
},
{
"epoch": 0.18,
"grad_norm": 20.88414192199707,
"learning_rate": 3.7428571428571423e-07,
"loss": 0.3087,
"step": 14775
},
{
"epoch": 0.18,
"grad_norm": 22.698204040527344,
"learning_rate": 3.725e-07,
"loss": 0.5205,
"step": 14800
},
{
"epoch": 0.18,
"grad_norm": 10.197999000549316,
"learning_rate": 3.7071428571428573e-07,
"loss": 0.2257,
"step": 14825
},
{
"epoch": 0.18,
"grad_norm": 21.910158157348633,
"learning_rate": 3.689285714285714e-07,
"loss": 0.4669,
"step": 14850
},
{
"epoch": 0.18,
"grad_norm": 14.414984703063965,
"learning_rate": 3.6714285714285717e-07,
"loss": 0.2946,
"step": 14875
},
{
"epoch": 0.18,
"grad_norm": 25.156875610351562,
"learning_rate": 3.6535714285714286e-07,
"loss": 0.4716,
"step": 14900
},
{
"epoch": 0.18,
"grad_norm": 15.973970413208008,
"learning_rate": 3.6357142857142855e-07,
"loss": 0.2882,
"step": 14925
},
{
"epoch": 0.18,
"grad_norm": 20.180315017700195,
"learning_rate": 3.6178571428571425e-07,
"loss": 0.5465,
"step": 14950
},
{
"epoch": 0.18,
"grad_norm": 13.591038703918457,
"learning_rate": 3.6e-07,
"loss": 0.2794,
"step": 14975
},
{
"epoch": 0.18,
"grad_norm": 19.940364837646484,
"learning_rate": 3.582142857142857e-07,
"loss": 0.4463,
"step": 15000
},
{
"epoch": 0.18,
"grad_norm": 15.9667329788208,
"learning_rate": 3.5642857142857143e-07,
"loss": 0.2443,
"step": 15025
},
{
"epoch": 0.18,
"grad_norm": 35.515045166015625,
"learning_rate": 3.5464285714285713e-07,
"loss": 0.5523,
"step": 15050
},
{
"epoch": 0.18,
"grad_norm": 10.865702629089355,
"learning_rate": 3.528571428571429e-07,
"loss": 0.2733,
"step": 15075
},
{
"epoch": 0.18,
"grad_norm": 19.473037719726562,
"learning_rate": 3.5107142857142857e-07,
"loss": 0.5079,
"step": 15100
},
{
"epoch": 0.18,
"grad_norm": 31.667394638061523,
"learning_rate": 3.492857142857143e-07,
"loss": 0.2393,
"step": 15125
},
{
"epoch": 0.18,
"grad_norm": 29.721817016601562,
"learning_rate": 3.4749999999999996e-07,
"loss": 0.5714,
"step": 15150
},
{
"epoch": 0.18,
"grad_norm": 20.16938018798828,
"learning_rate": 3.457142857142857e-07,
"loss": 0.3115,
"step": 15175
},
{
"epoch": 0.18,
"grad_norm": 22.576316833496094,
"learning_rate": 3.439285714285714e-07,
"loss": 0.5191,
"step": 15200
},
{
"epoch": 0.19,
"grad_norm": 16.064035415649414,
"learning_rate": 3.4214285714285714e-07,
"loss": 0.305,
"step": 15225
},
{
"epoch": 0.19,
"grad_norm": 20.527408599853516,
"learning_rate": 3.4035714285714284e-07,
"loss": 0.5645,
"step": 15250
},
{
"epoch": 0.19,
"grad_norm": 6.0652337074279785,
"learning_rate": 3.385714285714286e-07,
"loss": 0.2188,
"step": 15275
},
{
"epoch": 0.19,
"grad_norm": 29.08500862121582,
"learning_rate": 3.367857142857143e-07,
"loss": 0.5295,
"step": 15300
},
{
"epoch": 0.19,
"grad_norm": 11.278789520263672,
"learning_rate": 3.35e-07,
"loss": 0.2096,
"step": 15325
},
{
"epoch": 0.19,
"grad_norm": 16.811601638793945,
"learning_rate": 3.332142857142857e-07,
"loss": 0.5849,
"step": 15350
},
{
"epoch": 0.19,
"grad_norm": 22.24079704284668,
"learning_rate": 3.314285714285714e-07,
"loss": 0.2348,
"step": 15375
},
{
"epoch": 0.19,
"grad_norm": 22.854068756103516,
"learning_rate": 3.296428571428571e-07,
"loss": 0.58,
"step": 15400
},
{
"epoch": 0.19,
"grad_norm": 20.740047454833984,
"learning_rate": 3.2785714285714285e-07,
"loss": 0.2833,
"step": 15425
},
{
"epoch": 0.19,
"grad_norm": 26.679668426513672,
"learning_rate": 3.2607142857142854e-07,
"loss": 0.5188,
"step": 15450
},
{
"epoch": 0.19,
"grad_norm": 8.891940116882324,
"learning_rate": 3.242857142857143e-07,
"loss": 0.2301,
"step": 15475
},
{
"epoch": 0.19,
"grad_norm": 28.738801956176758,
"learning_rate": 3.225e-07,
"loss": 0.6019,
"step": 15500
},
{
"epoch": 0.19,
"grad_norm": 25.064411163330078,
"learning_rate": 3.2071428571428573e-07,
"loss": 0.2489,
"step": 15525
},
{
"epoch": 0.19,
"grad_norm": 22.51979637145996,
"learning_rate": 3.189285714285714e-07,
"loss": 0.4956,
"step": 15550
},
{
"epoch": 0.19,
"grad_norm": 21.971162796020508,
"learning_rate": 3.171428571428571e-07,
"loss": 0.2867,
"step": 15575
},
{
"epoch": 0.19,
"grad_norm": 29.71180534362793,
"learning_rate": 3.153571428571428e-07,
"loss": 0.5688,
"step": 15600
},
{
"epoch": 0.19,
"grad_norm": 8.722527503967285,
"learning_rate": 3.1357142857142856e-07,
"loss": 0.2506,
"step": 15625
},
{
"epoch": 0.19,
"grad_norm": 24.56787872314453,
"learning_rate": 3.1178571428571425e-07,
"loss": 0.4948,
"step": 15650
},
{
"epoch": 0.19,
"grad_norm": 9.346379280090332,
"learning_rate": 3.1e-07,
"loss": 0.2228,
"step": 15675
},
{
"epoch": 0.19,
"grad_norm": 25.692184448242188,
"learning_rate": 3.082142857142857e-07,
"loss": 0.459,
"step": 15700
},
{
"epoch": 0.19,
"grad_norm": 29.484182357788086,
"learning_rate": 3.0642857142857144e-07,
"loss": 0.2462,
"step": 15725
},
{
"epoch": 0.19,
"grad_norm": 17.136838912963867,
"learning_rate": 3.0464285714285713e-07,
"loss": 0.4357,
"step": 15750
},
{
"epoch": 0.19,
"grad_norm": 17.58600425720215,
"learning_rate": 3.028571428571429e-07,
"loss": 0.2663,
"step": 15775
},
{
"epoch": 0.19,
"grad_norm": 18.859933853149414,
"learning_rate": 3.010714285714285e-07,
"loss": 0.5014,
"step": 15800
},
{
"epoch": 0.19,
"grad_norm": 10.988012313842773,
"learning_rate": 2.9928571428571426e-07,
"loss": 0.2599,
"step": 15825
},
{
"epoch": 0.19,
"grad_norm": 26.970216751098633,
"learning_rate": 2.9749999999999996e-07,
"loss": 0.527,
"step": 15850
},
{
"epoch": 0.19,
"grad_norm": 16.885482788085938,
"learning_rate": 2.957142857142857e-07,
"loss": 0.3002,
"step": 15875
},
{
"epoch": 0.19,
"grad_norm": 21.51068115234375,
"learning_rate": 2.939285714285714e-07,
"loss": 0.5631,
"step": 15900
},
{
"epoch": 0.19,
"grad_norm": 9.604096412658691,
"learning_rate": 2.9214285714285714e-07,
"loss": 0.2152,
"step": 15925
},
{
"epoch": 0.19,
"grad_norm": 17.939319610595703,
"learning_rate": 2.9035714285714284e-07,
"loss": 0.5335,
"step": 15950
},
{
"epoch": 0.19,
"grad_norm": 8.930473327636719,
"learning_rate": 2.885714285714286e-07,
"loss": 0.2041,
"step": 15975
},
{
"epoch": 0.19,
"grad_norm": 29.47776222229004,
"learning_rate": 2.867857142857143e-07,
"loss": 0.4524,
"step": 16000
},
{
"epoch": 0.19,
"eval_loss": 0.46478670835494995,
"eval_runtime": 5851.5448,
"eval_samples_per_second": 1.609,
"eval_steps_per_second": 0.201,
"eval_wer": 0.16631680930082351,
"step": 16000
},
{
"epoch": 0.19,
"grad_norm": 14.176447868347168,
"learning_rate": 2.8499999999999997e-07,
"loss": 0.2508,
"step": 16025
},
{
"epoch": 0.2,
"grad_norm": 23.6607666015625,
"learning_rate": 2.8321428571428566e-07,
"loss": 0.4966,
"step": 16050
},
{
"epoch": 0.2,
"grad_norm": 14.520540237426758,
"learning_rate": 2.814285714285714e-07,
"loss": 0.2649,
"step": 16075
},
{
"epoch": 0.2,
"grad_norm": 22.186479568481445,
"learning_rate": 2.796428571428571e-07,
"loss": 0.5105,
"step": 16100
},
{
"epoch": 0.2,
"grad_norm": 21.052902221679688,
"learning_rate": 2.7785714285714285e-07,
"loss": 0.2248,
"step": 16125
},
{
"epoch": 0.2,
"grad_norm": 25.37480354309082,
"learning_rate": 2.7607142857142854e-07,
"loss": 0.5451,
"step": 16150
},
{
"epoch": 0.2,
"grad_norm": 22.131818771362305,
"learning_rate": 2.742857142857143e-07,
"loss": 0.2533,
"step": 16175
},
{
"epoch": 0.2,
"grad_norm": 27.51265525817871,
"learning_rate": 2.725e-07,
"loss": 0.4579,
"step": 16200
},
{
"epoch": 0.2,
"grad_norm": 10.886811256408691,
"learning_rate": 2.7071428571428573e-07,
"loss": 0.197,
"step": 16225
},
{
"epoch": 0.2,
"grad_norm": 24.875947952270508,
"learning_rate": 2.6892857142857137e-07,
"loss": 0.46,
"step": 16250
},
{
"epoch": 0.2,
"grad_norm": 9.09632396697998,
"learning_rate": 2.671428571428571e-07,
"loss": 0.2771,
"step": 16275
},
{
"epoch": 0.2,
"grad_norm": 13.609580993652344,
"learning_rate": 2.653571428571428e-07,
"loss": 0.4694,
"step": 16300
},
{
"epoch": 0.2,
"grad_norm": 9.03917121887207,
"learning_rate": 2.6357142857142856e-07,
"loss": 0.2659,
"step": 16325
},
{
"epoch": 0.2,
"grad_norm": 23.09002113342285,
"learning_rate": 2.6178571428571425e-07,
"loss": 0.5136,
"step": 16350
},
{
"epoch": 0.2,
"grad_norm": 16.277647018432617,
"learning_rate": 2.6e-07,
"loss": 0.2674,
"step": 16375
},
{
"epoch": 0.2,
"grad_norm": 28.242874145507812,
"learning_rate": 2.582142857142857e-07,
"loss": 0.5222,
"step": 16400
},
{
"epoch": 0.2,
"grad_norm": 14.258796691894531,
"learning_rate": 2.5642857142857144e-07,
"loss": 0.2876,
"step": 16425
},
{
"epoch": 0.2,
"grad_norm": 31.952289581298828,
"learning_rate": 2.546428571428571e-07,
"loss": 0.5867,
"step": 16450
},
{
"epoch": 0.2,
"grad_norm": 13.42287540435791,
"learning_rate": 2.528571428571428e-07,
"loss": 0.292,
"step": 16475
},
{
"epoch": 0.2,
"grad_norm": 19.588457107543945,
"learning_rate": 2.510714285714285e-07,
"loss": 0.5209,
"step": 16500
},
{
"epoch": 0.2,
"grad_norm": 9.94702434539795,
"learning_rate": 2.4928571428571427e-07,
"loss": 0.2441,
"step": 16525
},
{
"epoch": 0.2,
"grad_norm": 21.62166404724121,
"learning_rate": 2.475e-07,
"loss": 0.4807,
"step": 16550
},
{
"epoch": 0.2,
"grad_norm": 14.849344253540039,
"learning_rate": 2.457142857142857e-07,
"loss": 0.2493,
"step": 16575
},
{
"epoch": 0.2,
"grad_norm": 27.06203842163086,
"learning_rate": 2.4392857142857145e-07,
"loss": 0.5525,
"step": 16600
},
{
"epoch": 0.2,
"grad_norm": 10.47172737121582,
"learning_rate": 2.4214285714285715e-07,
"loss": 0.2425,
"step": 16625
},
{
"epoch": 0.2,
"grad_norm": 26.490150451660156,
"learning_rate": 2.4035714285714284e-07,
"loss": 0.5732,
"step": 16650
},
{
"epoch": 0.2,
"grad_norm": 9.605720520019531,
"learning_rate": 2.385714285714286e-07,
"loss": 0.2543,
"step": 16675
},
{
"epoch": 0.2,
"grad_norm": 20.90159034729004,
"learning_rate": 2.3678571428571428e-07,
"loss": 0.42,
"step": 16700
},
{
"epoch": 0.2,
"grad_norm": 9.984245300292969,
"learning_rate": 2.3499999999999997e-07,
"loss": 0.2732,
"step": 16725
},
{
"epoch": 0.2,
"grad_norm": 20.062936782836914,
"learning_rate": 2.332142857142857e-07,
"loss": 0.5472,
"step": 16750
},
{
"epoch": 0.2,
"grad_norm": 14.146800994873047,
"learning_rate": 2.3142857142857141e-07,
"loss": 0.2751,
"step": 16775
},
{
"epoch": 0.2,
"grad_norm": 23.891094207763672,
"learning_rate": 2.2964285714285713e-07,
"loss": 0.5239,
"step": 16800
},
{
"epoch": 0.2,
"grad_norm": 8.14566707611084,
"learning_rate": 2.2785714285714285e-07,
"loss": 0.2631,
"step": 16825
},
{
"epoch": 0.2,
"grad_norm": 19.639022827148438,
"learning_rate": 2.2607142857142855e-07,
"loss": 0.5249,
"step": 16850
},
{
"epoch": 0.21,
"grad_norm": 9.657629013061523,
"learning_rate": 2.2428571428571427e-07,
"loss": 0.3039,
"step": 16875
},
{
"epoch": 0.21,
"grad_norm": 17.923763275146484,
"learning_rate": 2.225e-07,
"loss": 0.4895,
"step": 16900
},
{
"epoch": 0.21,
"grad_norm": 13.95570182800293,
"learning_rate": 2.207142857142857e-07,
"loss": 0.2654,
"step": 16925
},
{
"epoch": 0.21,
"grad_norm": 15.692480087280273,
"learning_rate": 2.189285714285714e-07,
"loss": 0.4649,
"step": 16950
},
{
"epoch": 0.21,
"grad_norm": 16.137231826782227,
"learning_rate": 2.1714285714285712e-07,
"loss": 0.2288,
"step": 16975
},
{
"epoch": 0.21,
"grad_norm": 29.005409240722656,
"learning_rate": 2.1535714285714284e-07,
"loss": 0.5068,
"step": 17000
},
{
"epoch": 0.21,
"grad_norm": 16.533184051513672,
"learning_rate": 2.1357142857142856e-07,
"loss": 0.2713,
"step": 17025
},
{
"epoch": 0.21,
"grad_norm": 26.245988845825195,
"learning_rate": 2.1178571428571428e-07,
"loss": 0.51,
"step": 17050
},
{
"epoch": 0.21,
"grad_norm": 16.645214080810547,
"learning_rate": 2.0999999999999997e-07,
"loss": 0.2328,
"step": 17075
},
{
"epoch": 0.21,
"grad_norm": 20.229503631591797,
"learning_rate": 2.082142857142857e-07,
"loss": 0.5205,
"step": 17100
},
{
"epoch": 0.21,
"grad_norm": 16.3220157623291,
"learning_rate": 2.0642857142857141e-07,
"loss": 0.2531,
"step": 17125
},
{
"epoch": 0.21,
"grad_norm": 25.806249618530273,
"learning_rate": 2.0464285714285713e-07,
"loss": 0.5184,
"step": 17150
},
{
"epoch": 0.21,
"grad_norm": 12.498074531555176,
"learning_rate": 2.0285714285714283e-07,
"loss": 0.2544,
"step": 17175
},
{
"epoch": 0.21,
"grad_norm": 24.79607582092285,
"learning_rate": 2.0107142857142855e-07,
"loss": 0.5435,
"step": 17200
},
{
"epoch": 0.21,
"grad_norm": 24.32138442993164,
"learning_rate": 1.9928571428571427e-07,
"loss": 0.2794,
"step": 17225
},
{
"epoch": 0.21,
"grad_norm": 22.922056198120117,
"learning_rate": 1.975e-07,
"loss": 0.4487,
"step": 17250
},
{
"epoch": 0.21,
"grad_norm": 16.88331413269043,
"learning_rate": 1.9571428571428568e-07,
"loss": 0.262,
"step": 17275
},
{
"epoch": 0.21,
"grad_norm": 27.17171287536621,
"learning_rate": 1.939285714285714e-07,
"loss": 0.4624,
"step": 17300
},
{
"epoch": 0.21,
"grad_norm": 13.903470039367676,
"learning_rate": 1.9214285714285712e-07,
"loss": 0.27,
"step": 17325
},
{
"epoch": 0.21,
"grad_norm": 9.872733116149902,
"learning_rate": 1.9035714285714284e-07,
"loss": 0.5501,
"step": 17350
},
{
"epoch": 0.21,
"grad_norm": 20.017364501953125,
"learning_rate": 1.885714285714286e-07,
"loss": 0.2589,
"step": 17375
},
{
"epoch": 0.21,
"grad_norm": 24.02932357788086,
"learning_rate": 1.8678571428571426e-07,
"loss": 0.5821,
"step": 17400
},
{
"epoch": 0.21,
"grad_norm": 19.6367244720459,
"learning_rate": 1.85e-07,
"loss": 0.3638,
"step": 17425
},
{
"epoch": 0.21,
"grad_norm": Infinity,
"learning_rate": 1.8328571428571426e-07,
"loss": 0.5555,
"step": 17450
},
{
"epoch": 0.21,
"grad_norm": 13.749106407165527,
"learning_rate": 1.8149999999999998e-07,
"loss": 0.3132,
"step": 17475
},
{
"epoch": 0.21,
"grad_norm": 15.566059112548828,
"learning_rate": 1.797142857142857e-07,
"loss": 0.5162,
"step": 17500
},
{
"epoch": 0.21,
"grad_norm": 21.73044204711914,
"learning_rate": 1.7792857142857142e-07,
"loss": 0.24,
"step": 17525
},
{
"epoch": 0.21,
"grad_norm": 29.611074447631836,
"learning_rate": 1.7614285714285714e-07,
"loss": 0.5108,
"step": 17550
},
{
"epoch": 0.21,
"grad_norm": 16.056873321533203,
"learning_rate": 1.7435714285714283e-07,
"loss": 0.2247,
"step": 17575
},
{
"epoch": 0.21,
"grad_norm": 26.387723922729492,
"learning_rate": 1.7257142857142855e-07,
"loss": 0.5551,
"step": 17600
},
{
"epoch": 0.21,
"grad_norm": 15.484909057617188,
"learning_rate": 1.7078571428571427e-07,
"loss": 0.2685,
"step": 17625
},
{
"epoch": 0.21,
"grad_norm": 12.681870460510254,
"learning_rate": 1.69e-07,
"loss": 0.5002,
"step": 17650
},
{
"epoch": 0.21,
"grad_norm": 20.047056198120117,
"learning_rate": 1.6721428571428568e-07,
"loss": 0.305,
"step": 17675
},
{
"epoch": 0.22,
"grad_norm": 18.66116714477539,
"learning_rate": 1.654285714285714e-07,
"loss": 0.5289,
"step": 17700
},
{
"epoch": 0.22,
"grad_norm": 20.76094627380371,
"learning_rate": 1.6364285714285712e-07,
"loss": 0.2464,
"step": 17725
},
{
"epoch": 0.22,
"grad_norm": 18.275331497192383,
"learning_rate": 1.6185714285714287e-07,
"loss": 0.4905,
"step": 17750
},
{
"epoch": 0.22,
"grad_norm": 13.712215423583984,
"learning_rate": 1.6007142857142854e-07,
"loss": 0.2137,
"step": 17775
},
{
"epoch": 0.22,
"grad_norm": 22.638090133666992,
"learning_rate": 1.5828571428571429e-07,
"loss": 0.5996,
"step": 17800
},
{
"epoch": 0.22,
"grad_norm": 22.729068756103516,
"learning_rate": 1.565e-07,
"loss": 0.2864,
"step": 17825
},
{
"epoch": 0.22,
"grad_norm": 15.553844451904297,
"learning_rate": 1.5471428571428573e-07,
"loss": 0.4922,
"step": 17850
},
{
"epoch": 0.22,
"grad_norm": 20.399259567260742,
"learning_rate": 1.5292857142857145e-07,
"loss": 0.276,
"step": 17875
},
{
"epoch": 0.22,
"grad_norm": 20.935850143432617,
"learning_rate": 1.5114285714285714e-07,
"loss": 0.5087,
"step": 17900
},
{
"epoch": 0.22,
"grad_norm": 18.157344818115234,
"learning_rate": 1.4935714285714286e-07,
"loss": 0.2693,
"step": 17925
},
{
"epoch": 0.22,
"grad_norm": 14.853775024414062,
"learning_rate": 1.4757142857142858e-07,
"loss": 0.5126,
"step": 17950
},
{
"epoch": 0.22,
"grad_norm": 15.481501579284668,
"learning_rate": 1.457857142857143e-07,
"loss": 0.2645,
"step": 17975
},
{
"epoch": 0.22,
"grad_norm": 25.41742515563965,
"learning_rate": 1.44e-07,
"loss": 0.4667,
"step": 18000
},
{
"epoch": 0.22,
"eval_loss": 0.46059784293174744,
"eval_runtime": 5876.4376,
"eval_samples_per_second": 1.602,
"eval_steps_per_second": 0.2,
"eval_wer": 0.16560632972711126,
"step": 18000
},
{
"epoch": 0.22,
"grad_norm": 13.322805404663086,
"learning_rate": 1.422142857142857e-07,
"loss": 0.2463,
"step": 18025
},
{
"epoch": 0.22,
"grad_norm": 20.623361587524414,
"learning_rate": 1.4042857142857143e-07,
"loss": 0.4702,
"step": 18050
},
{
"epoch": 0.22,
"grad_norm": 11.25727653503418,
"learning_rate": 1.3864285714285715e-07,
"loss": 0.2297,
"step": 18075
},
{
"epoch": 0.22,
"grad_norm": 24.11102867126465,
"learning_rate": 1.3685714285714285e-07,
"loss": 0.5188,
"step": 18100
},
{
"epoch": 0.22,
"grad_norm": 14.874909400939941,
"learning_rate": 1.3507142857142857e-07,
"loss": 0.2141,
"step": 18125
},
{
"epoch": 0.22,
"grad_norm": 24.302215576171875,
"learning_rate": 1.3328571428571429e-07,
"loss": 0.4678,
"step": 18150
},
{
"epoch": 0.22,
"grad_norm": 15.379274368286133,
"learning_rate": 1.315e-07,
"loss": 0.2166,
"step": 18175
},
{
"epoch": 0.22,
"grad_norm": 24.384815216064453,
"learning_rate": 1.2971428571428573e-07,
"loss": 0.4773,
"step": 18200
},
{
"epoch": 0.22,
"grad_norm": 14.261998176574707,
"learning_rate": 1.2792857142857142e-07,
"loss": 0.2611,
"step": 18225
},
{
"epoch": 0.22,
"grad_norm": 26.193994522094727,
"learning_rate": 1.2614285714285714e-07,
"loss": 0.4536,
"step": 18250
},
{
"epoch": 0.22,
"grad_norm": 15.70848560333252,
"learning_rate": 1.2435714285714286e-07,
"loss": 0.2462,
"step": 18275
},
{
"epoch": 0.22,
"grad_norm": 21.586435317993164,
"learning_rate": 1.2257142857142855e-07,
"loss": 0.5351,
"step": 18300
},
{
"epoch": 0.22,
"grad_norm": 13.686724662780762,
"learning_rate": 1.2078571428571427e-07,
"loss": 0.2181,
"step": 18325
},
{
"epoch": 0.22,
"grad_norm": 24.10140609741211,
"learning_rate": 1.19e-07,
"loss": 0.5414,
"step": 18350
},
{
"epoch": 0.22,
"grad_norm": 16.506885528564453,
"learning_rate": 1.1721428571428571e-07,
"loss": 0.248,
"step": 18375
},
{
"epoch": 0.22,
"grad_norm": 19.96807289123535,
"learning_rate": 1.1542857142857142e-07,
"loss": 0.4668,
"step": 18400
},
{
"epoch": 0.22,
"grad_norm": 7.820561408996582,
"learning_rate": 1.1364285714285714e-07,
"loss": 0.2736,
"step": 18425
},
{
"epoch": 0.22,
"grad_norm": 25.755311965942383,
"learning_rate": 1.1185714285714286e-07,
"loss": 0.5258,
"step": 18450
},
{
"epoch": 0.22,
"grad_norm": 12.378037452697754,
"learning_rate": 1.1007142857142857e-07,
"loss": 0.2869,
"step": 18475
},
{
"epoch": 0.22,
"grad_norm": 15.160594940185547,
"learning_rate": 1.0828571428571429e-07,
"loss": 0.4904,
"step": 18500
},
{
"epoch": 0.23,
"grad_norm": 20.206640243530273,
"learning_rate": 1.065e-07,
"loss": 0.2504,
"step": 18525
},
{
"epoch": 0.23,
"grad_norm": 22.9788875579834,
"learning_rate": 1.0471428571428571e-07,
"loss": 0.5201,
"step": 18550
},
{
"epoch": 0.23,
"grad_norm": 15.308506965637207,
"learning_rate": 1.0292857142857142e-07,
"loss": 0.258,
"step": 18575
},
{
"epoch": 0.23,
"grad_norm": 14.326108932495117,
"learning_rate": 1.0114285714285714e-07,
"loss": 0.4509,
"step": 18600
},
{
"epoch": 0.23,
"grad_norm": 26.459646224975586,
"learning_rate": 9.935714285714285e-08,
"loss": 0.2744,
"step": 18625
},
{
"epoch": 0.23,
"grad_norm": 19.211641311645508,
"learning_rate": 9.757142857142857e-08,
"loss": 0.5296,
"step": 18650
},
{
"epoch": 0.23,
"grad_norm": 15.568469047546387,
"learning_rate": 9.578571428571427e-08,
"loss": 0.2472,
"step": 18675
},
{
"epoch": 0.23,
"grad_norm": 29.26220703125,
"learning_rate": 9.4e-08,
"loss": 0.4798,
"step": 18700
},
{
"epoch": 0.23,
"grad_norm": 25.08895492553711,
"learning_rate": 9.221428571428571e-08,
"loss": 0.2771,
"step": 18725
},
{
"epoch": 0.23,
"grad_norm": 19.168804168701172,
"learning_rate": 9.042857142857142e-08,
"loss": 0.506,
"step": 18750
},
{
"epoch": 0.23,
"grad_norm": 17.489612579345703,
"learning_rate": 8.864285714285714e-08,
"loss": 0.2914,
"step": 18775
},
{
"epoch": 0.23,
"grad_norm": 16.952068328857422,
"learning_rate": 8.685714285714285e-08,
"loss": 0.5445,
"step": 18800
},
{
"epoch": 0.23,
"grad_norm": 13.241303443908691,
"learning_rate": 8.507142857142857e-08,
"loss": 0.2818,
"step": 18825
},
{
"epoch": 0.23,
"grad_norm": 21.655899047851562,
"learning_rate": 8.328571428571428e-08,
"loss": 0.5256,
"step": 18850
},
{
"epoch": 0.23,
"grad_norm": 18.215364456176758,
"learning_rate": 8.15e-08,
"loss": 0.2672,
"step": 18875
},
{
"epoch": 0.23,
"grad_norm": 26.478195190429688,
"learning_rate": 7.97142857142857e-08,
"loss": 0.4729,
"step": 18900
},
{
"epoch": 0.23,
"grad_norm": 16.756649017333984,
"learning_rate": 7.792857142857142e-08,
"loss": 0.2469,
"step": 18925
},
{
"epoch": 0.23,
"grad_norm": 25.294132232666016,
"learning_rate": 7.614285714285713e-08,
"loss": 0.5517,
"step": 18950
},
{
"epoch": 0.23,
"grad_norm": 15.660456657409668,
"learning_rate": 7.435714285714285e-08,
"loss": 0.2261,
"step": 18975
},
{
"epoch": 0.23,
"grad_norm": 24.570951461791992,
"learning_rate": 7.257142857142856e-08,
"loss": 0.464,
"step": 19000
},
{
"epoch": 0.23,
"grad_norm": 13.32069206237793,
"learning_rate": 7.078571428571428e-08,
"loss": 0.2759,
"step": 19025
},
{
"epoch": 0.23,
"grad_norm": 27.2366943359375,
"learning_rate": 6.900000000000001e-08,
"loss": 0.4982,
"step": 19050
},
{
"epoch": 0.23,
"grad_norm": 10.226125717163086,
"learning_rate": 6.721428571428572e-08,
"loss": 0.2669,
"step": 19075
},
{
"epoch": 0.23,
"grad_norm": 28.88689613342285,
"learning_rate": 6.542857142857144e-08,
"loss": 0.5166,
"step": 19100
},
{
"epoch": 0.23,
"grad_norm": 9.242531776428223,
"learning_rate": 6.364285714285714e-08,
"loss": 0.2363,
"step": 19125
},
{
"epoch": 0.23,
"grad_norm": 26.46955108642578,
"learning_rate": 6.185714285714286e-08,
"loss": 0.5085,
"step": 19150
},
{
"epoch": 0.23,
"grad_norm": 17.96638298034668,
"learning_rate": 6.007142857142857e-08,
"loss": 0.2484,
"step": 19175
},
{
"epoch": 0.23,
"grad_norm": 19.205671310424805,
"learning_rate": 5.828571428571428e-08,
"loss": 0.4366,
"step": 19200
},
{
"epoch": 0.23,
"grad_norm": 19.420578002929688,
"learning_rate": 5.6499999999999996e-08,
"loss": 0.2316,
"step": 19225
},
{
"epoch": 0.23,
"grad_norm": Infinity,
"learning_rate": 5.4785714285714285e-08,
"loss": 0.5216,
"step": 19250
},
{
"epoch": 0.23,
"grad_norm": 11.171784400939941,
"learning_rate": 5.3e-08,
"loss": 0.2496,
"step": 19275
},
{
"epoch": 0.23,
"grad_norm": 18.622215270996094,
"learning_rate": 5.121428571428571e-08,
"loss": 0.5563,
"step": 19300
},
{
"epoch": 0.23,
"grad_norm": 15.999675750732422,
"learning_rate": 4.9428571428571425e-08,
"loss": 0.2272,
"step": 19325
},
{
"epoch": 0.24,
"grad_norm": 29.481321334838867,
"learning_rate": 4.764285714285714e-08,
"loss": 0.5892,
"step": 19350
},
{
"epoch": 0.24,
"grad_norm": 17.218955993652344,
"learning_rate": 4.585714285714286e-08,
"loss": 0.2746,
"step": 19375
},
{
"epoch": 0.24,
"grad_norm": 17.27052879333496,
"learning_rate": 4.407142857142857e-08,
"loss": 0.4847,
"step": 19400
},
{
"epoch": 0.24,
"grad_norm": 13.923643112182617,
"learning_rate": 4.2285714285714285e-08,
"loss": 0.2418,
"step": 19425
},
{
"epoch": 0.24,
"grad_norm": 20.42319679260254,
"learning_rate": 4.05e-08,
"loss": 0.5244,
"step": 19450
},
{
"epoch": 0.24,
"grad_norm": 21.954940795898438,
"learning_rate": 3.871428571428571e-08,
"loss": 0.265,
"step": 19475
},
{
"epoch": 0.24,
"grad_norm": 22.70041847229004,
"learning_rate": 3.6928571428571426e-08,
"loss": 0.4468,
"step": 19500
},
{
"epoch": 0.24,
"grad_norm": 6.995919227600098,
"learning_rate": 3.514285714285714e-08,
"loss": 0.271,
"step": 19525
},
{
"epoch": 0.24,
"grad_norm": 19.12382698059082,
"learning_rate": 3.335714285714285e-08,
"loss": 0.5083,
"step": 19550
},
{
"epoch": 0.24,
"grad_norm": 13.486324310302734,
"learning_rate": 3.1571428571428566e-08,
"loss": 0.2645,
"step": 19575
},
{
"epoch": 0.24,
"grad_norm": 20.960376739501953,
"learning_rate": 2.9785714285714286e-08,
"loss": 0.4991,
"step": 19600
},
{
"epoch": 0.24,
"grad_norm": 19.863283157348633,
"learning_rate": 2.8e-08,
"loss": 0.25,
"step": 19625
},
{
"epoch": 0.24,
"grad_norm": 24.58002281188965,
"learning_rate": 2.6214285714285713e-08,
"loss": 0.5011,
"step": 19650
},
{
"epoch": 0.24,
"grad_norm": 17.584407806396484,
"learning_rate": 2.4428571428571426e-08,
"loss": 0.3192,
"step": 19675
},
{
"epoch": 0.24,
"grad_norm": 26.56721305847168,
"learning_rate": 2.264285714285714e-08,
"loss": 0.4995,
"step": 19700
},
{
"epoch": 0.24,
"grad_norm": 13.253254890441895,
"learning_rate": 2.0857142857142856e-08,
"loss": 0.2682,
"step": 19725
},
{
"epoch": 0.24,
"grad_norm": 22.812877655029297,
"learning_rate": 1.9071428571428573e-08,
"loss": 0.5177,
"step": 19750
},
{
"epoch": 0.24,
"grad_norm": 10.498454093933105,
"learning_rate": 1.7285714285714286e-08,
"loss": 0.2192,
"step": 19775
},
{
"epoch": 0.24,
"grad_norm": 31.69744110107422,
"learning_rate": 1.55e-08,
"loss": 0.5125,
"step": 19800
},
{
"epoch": 0.24,
"grad_norm": 11.616924285888672,
"learning_rate": 1.3714285714285713e-08,
"loss": 0.2494,
"step": 19825
},
{
"epoch": 0.24,
"grad_norm": 25.574071884155273,
"learning_rate": 1.1928571428571428e-08,
"loss": 0.4375,
"step": 19850
},
{
"epoch": 0.24,
"grad_norm": 16.213420867919922,
"learning_rate": 1.0142857142857142e-08,
"loss": 0.2279,
"step": 19875
},
{
"epoch": 0.24,
"grad_norm": 16.850711822509766,
"learning_rate": 8.357142857142857e-09,
"loss": 0.466,
"step": 19900
},
{
"epoch": 0.24,
"grad_norm": 11.44871997833252,
"learning_rate": 6.571428571428572e-09,
"loss": 0.2349,
"step": 19925
},
{
"epoch": 0.24,
"grad_norm": 24.55624008178711,
"learning_rate": 4.785714285714285e-09,
"loss": 0.4876,
"step": 19950
},
{
"epoch": 0.24,
"grad_norm": 11.952068328857422,
"learning_rate": 3e-09,
"loss": 0.2713,
"step": 19975
},
{
"epoch": 0.24,
"grad_norm": 17.582544326782227,
"learning_rate": 1.2142857142857142e-09,
"loss": 0.5133,
"step": 20000
},
{
"epoch": 0.24,
"eval_loss": 0.4545673429965973,
"eval_runtime": 5956.6112,
"eval_samples_per_second": 1.58,
"eval_steps_per_second": 0.198,
"eval_wer": 0.16517035362506055,
"step": 20000
},
{
"epoch": 0.24,
"step": 20000,
"total_flos": 1.632967852032e+20,
"train_loss": 0.19572856886386872,
"train_runtime": 76264.8528,
"train_samples_per_second": 2.098,
"train_steps_per_second": 0.262
}
],
"logging_steps": 25,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 1.632967852032e+20,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}