224s-finetune-xty / trainer_state.json
alicebz's picture
Upload 7 files
47ea28a verified
raw
history blame
47.6 kB
{
"best_metric": 0.923943661971831,
"best_model_checkpoint": "./ssw-finetune/checkpoint-1150",
"epoch": 115.0,
"eval_steps": 25,
"global_step": 1150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5,
"grad_norm": 5.561491012573242,
"learning_rate": 2.9999999999999997e-06,
"loss": 7.7799,
"step": 5
},
{
"epoch": 1.0,
"grad_norm": 4.18166971206665,
"learning_rate": 6.749999999999999e-06,
"loss": 7.4713,
"step": 10
},
{
"epoch": 1.5,
"grad_norm": 6.806884288787842,
"learning_rate": 1.05e-05,
"loss": 7.696,
"step": 15
},
{
"epoch": 2.0,
"grad_norm": NaN,
"learning_rate": 1.3499999999999998e-05,
"loss": 7.9462,
"step": 20
},
{
"epoch": 2.5,
"grad_norm": NaN,
"learning_rate": 1.6499999999999998e-05,
"loss": 8.0165,
"step": 25
},
{
"epoch": 2.5,
"eval_loss": 7.467132091522217,
"eval_runtime": 1.054,
"eval_samples_per_second": 22.77,
"eval_steps_per_second": 0.949,
"eval_wer": 1.0,
"step": 25
},
{
"epoch": 3.0,
"grad_norm": 2.7979044914245605,
"learning_rate": 2.025e-05,
"loss": 6.5204,
"step": 30
},
{
"epoch": 3.5,
"grad_norm": 9.716986656188965,
"learning_rate": 2.3999999999999997e-05,
"loss": 7.6715,
"step": 35
},
{
"epoch": 4.0,
"grad_norm": 3.3519299030303955,
"learning_rate": 2.7749999999999997e-05,
"loss": 7.0161,
"step": 40
},
{
"epoch": 4.5,
"grad_norm": 5.6749138832092285,
"learning_rate": 3.149999999999999e-05,
"loss": 8.0617,
"step": 45
},
{
"epoch": 5.0,
"grad_norm": 8.150848388671875,
"learning_rate": 3.5249999999999996e-05,
"loss": 6.3142,
"step": 50
},
{
"epoch": 5.0,
"eval_loss": 6.626723766326904,
"eval_runtime": 1.071,
"eval_samples_per_second": 22.409,
"eval_steps_per_second": 0.934,
"eval_wer": 1.0,
"step": 50
},
{
"epoch": 5.5,
"grad_norm": 6.339476585388184,
"learning_rate": 3.9e-05,
"loss": 6.2643,
"step": 55
},
{
"epoch": 6.0,
"grad_norm": 13.012835502624512,
"learning_rate": 4.2749999999999996e-05,
"loss": 7.1655,
"step": 60
},
{
"epoch": 6.5,
"grad_norm": 11.24893569946289,
"learning_rate": 4.65e-05,
"loss": 5.8178,
"step": 65
},
{
"epoch": 7.0,
"grad_norm": 24.677473068237305,
"learning_rate": 5.025e-05,
"loss": 5.5684,
"step": 70
},
{
"epoch": 7.5,
"grad_norm": 8.214367866516113,
"learning_rate": 5.399999999999999e-05,
"loss": 4.3185,
"step": 75
},
{
"epoch": 7.5,
"eval_loss": 3.72790789604187,
"eval_runtime": 1.0391,
"eval_samples_per_second": 23.097,
"eval_steps_per_second": 0.962,
"eval_wer": 1.0,
"step": 75
},
{
"epoch": 8.0,
"grad_norm": 8.643641471862793,
"learning_rate": 5.7749999999999994e-05,
"loss": 4.1807,
"step": 80
},
{
"epoch": 8.5,
"grad_norm": 10.54008674621582,
"learning_rate": 6.149999999999999e-05,
"loss": 3.7552,
"step": 85
},
{
"epoch": 9.0,
"grad_norm": 3.332289934158325,
"learning_rate": 6.525e-05,
"loss": 3.7053,
"step": 90
},
{
"epoch": 9.5,
"grad_norm": 4.925398349761963,
"learning_rate": 6.9e-05,
"loss": 3.3661,
"step": 95
},
{
"epoch": 10.0,
"grad_norm": 5.291933536529541,
"learning_rate": 7.274999999999999e-05,
"loss": 3.1777,
"step": 100
},
{
"epoch": 10.0,
"eval_loss": 3.050647735595703,
"eval_runtime": 1.0273,
"eval_samples_per_second": 23.362,
"eval_steps_per_second": 0.973,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 10.5,
"grad_norm": 1.6660319566726685,
"learning_rate": 7.649999999999999e-05,
"loss": 3.0435,
"step": 105
},
{
"epoch": 11.0,
"grad_norm": 0.681082546710968,
"learning_rate": 8.025e-05,
"loss": 3.254,
"step": 110
},
{
"epoch": 11.5,
"grad_norm": 2.713016986846924,
"learning_rate": 8.4e-05,
"loss": 2.972,
"step": 115
},
{
"epoch": 12.0,
"grad_norm": 7.211615085601807,
"learning_rate": 8.774999999999999e-05,
"loss": 3.1145,
"step": 120
},
{
"epoch": 12.5,
"grad_norm": 3.3372182846069336,
"learning_rate": 9.149999999999999e-05,
"loss": 3.0587,
"step": 125
},
{
"epoch": 12.5,
"eval_loss": 2.936924695968628,
"eval_runtime": 1.0424,
"eval_samples_per_second": 23.023,
"eval_steps_per_second": 0.959,
"eval_wer": 1.0,
"step": 125
},
{
"epoch": 13.0,
"grad_norm": 1.895374059677124,
"learning_rate": 9.525e-05,
"loss": 2.9096,
"step": 130
},
{
"epoch": 13.5,
"grad_norm": 8.356375694274902,
"learning_rate": 9.9e-05,
"loss": 3.3159,
"step": 135
},
{
"epoch": 14.0,
"grad_norm": 1.6825320720672607,
"learning_rate": 0.00010275,
"loss": 2.9022,
"step": 140
},
{
"epoch": 14.5,
"grad_norm": 0.7314967513084412,
"learning_rate": 0.00010649999999999999,
"loss": 2.9058,
"step": 145
},
{
"epoch": 15.0,
"grad_norm": 3.183772563934326,
"learning_rate": 0.00011024999999999998,
"loss": 3.0633,
"step": 150
},
{
"epoch": 15.0,
"eval_loss": 2.9296257495880127,
"eval_runtime": 1.0347,
"eval_samples_per_second": 23.194,
"eval_steps_per_second": 0.966,
"eval_wer": 1.0,
"step": 150
},
{
"epoch": 15.5,
"grad_norm": 1.4891362190246582,
"learning_rate": 0.00011399999999999999,
"loss": 2.9901,
"step": 155
},
{
"epoch": 16.0,
"grad_norm": 5.58284854888916,
"learning_rate": 0.00011774999999999999,
"loss": 2.9861,
"step": 160
},
{
"epoch": 16.5,
"grad_norm": 1.3804000616073608,
"learning_rate": 0.0001215,
"loss": 2.9584,
"step": 165
},
{
"epoch": 17.0,
"grad_norm": 1.6562563180923462,
"learning_rate": 0.00012524999999999998,
"loss": 3.0194,
"step": 170
},
{
"epoch": 17.5,
"grad_norm": 0.653541088104248,
"learning_rate": 0.000129,
"loss": 2.9639,
"step": 175
},
{
"epoch": 17.5,
"eval_loss": 2.926556348800659,
"eval_runtime": 1.0442,
"eval_samples_per_second": 22.985,
"eval_steps_per_second": 0.958,
"eval_wer": 1.0,
"step": 175
},
{
"epoch": 18.0,
"grad_norm": 9.456038475036621,
"learning_rate": 0.00013275,
"loss": 2.8944,
"step": 180
},
{
"epoch": 18.5,
"grad_norm": 0.3759576082229614,
"learning_rate": 0.00013649999999999998,
"loss": 2.9149,
"step": 185
},
{
"epoch": 19.0,
"grad_norm": 3.0567305088043213,
"learning_rate": 0.00014025,
"loss": 3.0321,
"step": 190
},
{
"epoch": 19.5,
"grad_norm": 8.436885833740234,
"learning_rate": 0.00014399999999999998,
"loss": 2.9683,
"step": 195
},
{
"epoch": 20.0,
"grad_norm": 1.9778860807418823,
"learning_rate": 0.00014774999999999999,
"loss": 2.9576,
"step": 200
},
{
"epoch": 20.0,
"eval_loss": 2.9644908905029297,
"eval_runtime": 1.0268,
"eval_samples_per_second": 23.374,
"eval_steps_per_second": 0.974,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 20.5,
"grad_norm": 0.6856608390808105,
"learning_rate": 0.00014976923076923077,
"loss": 2.9374,
"step": 205
},
{
"epoch": 21.0,
"grad_norm": 1.157402515411377,
"learning_rate": 0.00014919230769230767,
"loss": 2.875,
"step": 210
},
{
"epoch": 21.5,
"grad_norm": 0.42920613288879395,
"learning_rate": 0.0001486153846153846,
"loss": 2.9796,
"step": 215
},
{
"epoch": 22.0,
"grad_norm": 4.603660583496094,
"learning_rate": 0.00014803846153846152,
"loss": 2.9233,
"step": 220
},
{
"epoch": 22.5,
"grad_norm": 1.3661619424819946,
"learning_rate": 0.00014746153846153845,
"loss": 2.8708,
"step": 225
},
{
"epoch": 22.5,
"eval_loss": 2.9085776805877686,
"eval_runtime": 1.0387,
"eval_samples_per_second": 23.106,
"eval_steps_per_second": 0.963,
"eval_wer": 1.0,
"step": 225
},
{
"epoch": 23.0,
"grad_norm": 0.7445681691169739,
"learning_rate": 0.00014688461538461537,
"loss": 2.933,
"step": 230
},
{
"epoch": 23.5,
"grad_norm": 1.2040903568267822,
"learning_rate": 0.0001463076923076923,
"loss": 2.9217,
"step": 235
},
{
"epoch": 24.0,
"grad_norm": 4.538419246673584,
"learning_rate": 0.00014573076923076923,
"loss": 2.9043,
"step": 240
},
{
"epoch": 24.5,
"grad_norm": 0.36169031262397766,
"learning_rate": 0.00014515384615384615,
"loss": 2.8554,
"step": 245
},
{
"epoch": 25.0,
"grad_norm": 1.2133870124816895,
"learning_rate": 0.00014457692307692305,
"loss": 2.943,
"step": 250
},
{
"epoch": 25.0,
"eval_loss": 2.900446653366089,
"eval_runtime": 1.0279,
"eval_samples_per_second": 23.348,
"eval_steps_per_second": 0.973,
"eval_wer": 1.0,
"step": 250
},
{
"epoch": 25.5,
"grad_norm": 1.1455128192901611,
"learning_rate": 0.00014399999999999998,
"loss": 2.8775,
"step": 255
},
{
"epoch": 26.0,
"grad_norm": 3.7162177562713623,
"learning_rate": 0.0001434230769230769,
"loss": 2.9401,
"step": 260
},
{
"epoch": 26.5,
"grad_norm": 4.095553398132324,
"learning_rate": 0.00014284615384615383,
"loss": 2.9053,
"step": 265
},
{
"epoch": 27.0,
"grad_norm": 2.0302634239196777,
"learning_rate": 0.00014226923076923075,
"loss": 2.975,
"step": 270
},
{
"epoch": 27.5,
"grad_norm": 3.123234510421753,
"learning_rate": 0.00014169230769230768,
"loss": 2.9225,
"step": 275
},
{
"epoch": 27.5,
"eval_loss": 2.9469966888427734,
"eval_runtime": 1.023,
"eval_samples_per_second": 23.46,
"eval_steps_per_second": 0.978,
"eval_wer": 1.0,
"step": 275
},
{
"epoch": 28.0,
"grad_norm": 0.886202871799469,
"learning_rate": 0.0001411153846153846,
"loss": 2.8783,
"step": 280
},
{
"epoch": 28.5,
"grad_norm": 0.48980531096458435,
"learning_rate": 0.00014053846153846153,
"loss": 2.8977,
"step": 285
},
{
"epoch": 29.0,
"grad_norm": 2.4499869346618652,
"learning_rate": 0.00013996153846153843,
"loss": 2.9178,
"step": 290
},
{
"epoch": 29.5,
"grad_norm": 3.5155863761901855,
"learning_rate": 0.00013938461538461536,
"loss": 2.8955,
"step": 295
},
{
"epoch": 30.0,
"grad_norm": 3.8240697383880615,
"learning_rate": 0.00013880769230769228,
"loss": 2.9897,
"step": 300
},
{
"epoch": 30.0,
"eval_loss": 2.9530646800994873,
"eval_runtime": 1.0334,
"eval_samples_per_second": 23.224,
"eval_steps_per_second": 0.968,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 30.5,
"grad_norm": 1.4881560802459717,
"learning_rate": 0.0001382307692307692,
"loss": 2.8732,
"step": 305
},
{
"epoch": 31.0,
"grad_norm": 5.950206756591797,
"learning_rate": 0.00013765384615384613,
"loss": 2.9688,
"step": 310
},
{
"epoch": 31.5,
"grad_norm": 0.8825148940086365,
"learning_rate": 0.00013707692307692306,
"loss": 2.869,
"step": 315
},
{
"epoch": 32.0,
"grad_norm": 1.6368755102157593,
"learning_rate": 0.00013649999999999998,
"loss": 2.8843,
"step": 320
},
{
"epoch": 32.5,
"grad_norm": 1.556404709815979,
"learning_rate": 0.0001359230769230769,
"loss": 2.8514,
"step": 325
},
{
"epoch": 32.5,
"eval_loss": 2.911478042602539,
"eval_runtime": 1.021,
"eval_samples_per_second": 23.505,
"eval_steps_per_second": 0.979,
"eval_wer": 1.0,
"step": 325
},
{
"epoch": 33.0,
"grad_norm": 0.6802976131439209,
"learning_rate": 0.00013534615384615384,
"loss": 2.8542,
"step": 330
},
{
"epoch": 33.5,
"grad_norm": 0.5035978555679321,
"learning_rate": 0.00013476923076923076,
"loss": 2.9064,
"step": 335
},
{
"epoch": 34.0,
"grad_norm": 1.6443456411361694,
"learning_rate": 0.0001341923076923077,
"loss": 2.8498,
"step": 340
},
{
"epoch": 34.5,
"grad_norm": 0.6262179017066956,
"learning_rate": 0.0001336153846153846,
"loss": 2.8368,
"step": 345
},
{
"epoch": 35.0,
"grad_norm": 0.8266497850418091,
"learning_rate": 0.00013303846153846154,
"loss": 2.8681,
"step": 350
},
{
"epoch": 35.0,
"eval_loss": 2.9094789028167725,
"eval_runtime": 1.0369,
"eval_samples_per_second": 23.145,
"eval_steps_per_second": 0.964,
"eval_wer": 1.0,
"step": 350
},
{
"epoch": 35.5,
"grad_norm": 0.33677324652671814,
"learning_rate": 0.00013246153846153846,
"loss": 2.8163,
"step": 355
},
{
"epoch": 36.0,
"grad_norm": 0.6221341490745544,
"learning_rate": 0.0001318846153846154,
"loss": 2.8746,
"step": 360
},
{
"epoch": 36.5,
"grad_norm": 0.5015878677368164,
"learning_rate": 0.00013130769230769232,
"loss": 2.8477,
"step": 365
},
{
"epoch": 37.0,
"grad_norm": 0.6005992889404297,
"learning_rate": 0.00013073076923076921,
"loss": 2.838,
"step": 370
},
{
"epoch": 37.5,
"grad_norm": 0.4997330605983734,
"learning_rate": 0.00013015384615384614,
"loss": 2.8431,
"step": 375
},
{
"epoch": 37.5,
"eval_loss": 2.90104603767395,
"eval_runtime": 1.017,
"eval_samples_per_second": 23.599,
"eval_steps_per_second": 0.983,
"eval_wer": 1.0,
"step": 375
},
{
"epoch": 38.0,
"grad_norm": 1.342210292816162,
"learning_rate": 0.00012957692307692307,
"loss": 2.8672,
"step": 380
},
{
"epoch": 38.5,
"grad_norm": 1.2935914993286133,
"learning_rate": 0.000129,
"loss": 2.848,
"step": 385
},
{
"epoch": 39.0,
"grad_norm": 0.41487249732017517,
"learning_rate": 0.00012842307692307692,
"loss": 2.8244,
"step": 390
},
{
"epoch": 39.5,
"grad_norm": 1.1988450288772583,
"learning_rate": 0.00012784615384615384,
"loss": 2.8328,
"step": 395
},
{
"epoch": 40.0,
"grad_norm": 2.2671468257904053,
"learning_rate": 0.00012726923076923077,
"loss": 2.8843,
"step": 400
},
{
"epoch": 40.0,
"eval_loss": 2.9156665802001953,
"eval_runtime": 1.0579,
"eval_samples_per_second": 22.686,
"eval_steps_per_second": 0.945,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 40.5,
"grad_norm": 1.003772497177124,
"learning_rate": 0.0001266923076923077,
"loss": 2.8312,
"step": 405
},
{
"epoch": 41.0,
"grad_norm": 1.2402571439743042,
"learning_rate": 0.00012611538461538462,
"loss": 2.8291,
"step": 410
},
{
"epoch": 41.5,
"grad_norm": 0.29388442635536194,
"learning_rate": 0.00012553846153846152,
"loss": 2.8275,
"step": 415
},
{
"epoch": 42.0,
"grad_norm": 0.9477460980415344,
"learning_rate": 0.00012496153846153844,
"loss": 2.8384,
"step": 420
},
{
"epoch": 42.5,
"grad_norm": 1.4519686698913574,
"learning_rate": 0.00012438461538461537,
"loss": 2.9357,
"step": 425
},
{
"epoch": 42.5,
"eval_loss": 2.902658462524414,
"eval_runtime": 1.0363,
"eval_samples_per_second": 23.158,
"eval_steps_per_second": 0.965,
"eval_wer": 1.0,
"step": 425
},
{
"epoch": 43.0,
"grad_norm": 0.4391646087169647,
"learning_rate": 0.0001238076923076923,
"loss": 2.8395,
"step": 430
},
{
"epoch": 43.5,
"grad_norm": 2.1784377098083496,
"learning_rate": 0.00012323076923076922,
"loss": 2.8599,
"step": 435
},
{
"epoch": 44.0,
"grad_norm": 0.9729048609733582,
"learning_rate": 0.00012265384615384615,
"loss": 2.8489,
"step": 440
},
{
"epoch": 44.5,
"grad_norm": 0.5243009328842163,
"learning_rate": 0.00012207692307692307,
"loss": 2.83,
"step": 445
},
{
"epoch": 45.0,
"grad_norm": 0.7081323862075806,
"learning_rate": 0.0001215,
"loss": 2.8236,
"step": 450
},
{
"epoch": 45.0,
"eval_loss": 2.901521682739258,
"eval_runtime": 1.0318,
"eval_samples_per_second": 23.261,
"eval_steps_per_second": 0.969,
"eval_wer": 1.0,
"step": 450
},
{
"epoch": 45.5,
"grad_norm": 0.3105088770389557,
"learning_rate": 0.00012092307692307691,
"loss": 2.8189,
"step": 455
},
{
"epoch": 46.0,
"grad_norm": 0.6120209097862244,
"learning_rate": 0.00012034615384615384,
"loss": 2.8075,
"step": 460
},
{
"epoch": 46.5,
"grad_norm": 0.996507465839386,
"learning_rate": 0.00011976923076923076,
"loss": 2.8318,
"step": 465
},
{
"epoch": 47.0,
"grad_norm": 7.280458927154541,
"learning_rate": 0.00011919230769230767,
"loss": 2.871,
"step": 470
},
{
"epoch": 47.5,
"grad_norm": 0.8332684636116028,
"learning_rate": 0.0001186153846153846,
"loss": 2.8376,
"step": 475
},
{
"epoch": 47.5,
"eval_loss": 2.900068998336792,
"eval_runtime": 1.0322,
"eval_samples_per_second": 23.251,
"eval_steps_per_second": 0.969,
"eval_wer": 1.0,
"step": 475
},
{
"epoch": 48.0,
"grad_norm": 0.6555355191230774,
"learning_rate": 0.00011803846153846153,
"loss": 2.7954,
"step": 480
},
{
"epoch": 48.5,
"grad_norm": 1.127866268157959,
"learning_rate": 0.00011746153846153845,
"loss": 2.8494,
"step": 485
},
{
"epoch": 49.0,
"grad_norm": 0.7961714863777161,
"learning_rate": 0.00011688461538461538,
"loss": 2.8446,
"step": 490
},
{
"epoch": 49.5,
"grad_norm": 1.9832100868225098,
"learning_rate": 0.00011630769230769229,
"loss": 2.8353,
"step": 495
},
{
"epoch": 50.0,
"grad_norm": 0.9229313731193542,
"learning_rate": 0.00011573076923076922,
"loss": 2.8148,
"step": 500
},
{
"epoch": 50.0,
"eval_loss": 2.8878333568573,
"eval_runtime": 1.0279,
"eval_samples_per_second": 23.349,
"eval_steps_per_second": 0.973,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 50.5,
"grad_norm": 2.113555669784546,
"learning_rate": 0.00011515384615384614,
"loss": 2.816,
"step": 505
},
{
"epoch": 51.0,
"grad_norm": 2.10042667388916,
"learning_rate": 0.00011457692307692307,
"loss": 2.8544,
"step": 510
},
{
"epoch": 51.5,
"grad_norm": 0.48272839188575745,
"learning_rate": 0.00011399999999999999,
"loss": 2.8207,
"step": 515
},
{
"epoch": 52.0,
"grad_norm": 0.9009172320365906,
"learning_rate": 0.00011342307692307692,
"loss": 2.8008,
"step": 520
},
{
"epoch": 52.5,
"grad_norm": 1.0341640710830688,
"learning_rate": 0.00011284615384615384,
"loss": 2.8057,
"step": 525
},
{
"epoch": 52.5,
"eval_loss": 2.8624706268310547,
"eval_runtime": 1.037,
"eval_samples_per_second": 23.144,
"eval_steps_per_second": 0.964,
"eval_wer": 1.0,
"step": 525
},
{
"epoch": 53.0,
"grad_norm": 1.3395497798919678,
"learning_rate": 0.00011226923076923077,
"loss": 2.7866,
"step": 530
},
{
"epoch": 53.5,
"grad_norm": 0.3619355261325836,
"learning_rate": 0.00011169230769230768,
"loss": 2.7779,
"step": 535
},
{
"epoch": 54.0,
"grad_norm": 1.4029289484024048,
"learning_rate": 0.0001111153846153846,
"loss": 2.789,
"step": 540
},
{
"epoch": 54.5,
"grad_norm": 0.29736635088920593,
"learning_rate": 0.00011053846153846152,
"loss": 2.7452,
"step": 545
},
{
"epoch": 55.0,
"grad_norm": 1.7570823431015015,
"learning_rate": 0.00010996153846153845,
"loss": 2.7268,
"step": 550
},
{
"epoch": 55.0,
"eval_loss": 2.819674253463745,
"eval_runtime": 1.0343,
"eval_samples_per_second": 23.205,
"eval_steps_per_second": 0.967,
"eval_wer": 1.0,
"step": 550
},
{
"epoch": 55.5,
"grad_norm": 0.3762887418270111,
"learning_rate": 0.00010938461538461537,
"loss": 2.7224,
"step": 555
},
{
"epoch": 56.0,
"grad_norm": 1.0835281610488892,
"learning_rate": 0.0001088076923076923,
"loss": 2.7022,
"step": 560
},
{
"epoch": 56.5,
"grad_norm": 1.721433401107788,
"learning_rate": 0.00010823076923076922,
"loss": 2.6927,
"step": 565
},
{
"epoch": 57.0,
"grad_norm": 2.9872403144836426,
"learning_rate": 0.00010765384615384615,
"loss": 2.7924,
"step": 570
},
{
"epoch": 57.5,
"grad_norm": 0.5493649840354919,
"learning_rate": 0.00010707692307692306,
"loss": 2.6252,
"step": 575
},
{
"epoch": 57.5,
"eval_loss": 2.807591676712036,
"eval_runtime": 1.0323,
"eval_samples_per_second": 23.25,
"eval_steps_per_second": 0.969,
"eval_wer": 1.0,
"step": 575
},
{
"epoch": 58.0,
"grad_norm": 1.2353851795196533,
"learning_rate": 0.00010649999999999999,
"loss": 2.6458,
"step": 580
},
{
"epoch": 58.5,
"grad_norm": 0.7240511775016785,
"learning_rate": 0.00010592307692307691,
"loss": 2.5911,
"step": 585
},
{
"epoch": 59.0,
"grad_norm": 0.9982340335845947,
"learning_rate": 0.00010534615384615384,
"loss": 2.6489,
"step": 590
},
{
"epoch": 59.5,
"grad_norm": 0.6784680485725403,
"learning_rate": 0.00010476923076923076,
"loss": 2.5169,
"step": 595
},
{
"epoch": 60.0,
"grad_norm": 1.9756778478622437,
"learning_rate": 0.00010419230769230769,
"loss": 2.5511,
"step": 600
},
{
"epoch": 60.0,
"eval_loss": 2.615316152572632,
"eval_runtime": 1.0274,
"eval_samples_per_second": 23.361,
"eval_steps_per_second": 0.973,
"eval_wer": 1.0056338028169014,
"step": 600
},
{
"epoch": 60.5,
"grad_norm": 1.3284317255020142,
"learning_rate": 0.00010361538461538462,
"loss": 2.4731,
"step": 605
},
{
"epoch": 61.0,
"grad_norm": 1.3110464811325073,
"learning_rate": 0.00010303846153846154,
"loss": 2.4817,
"step": 610
},
{
"epoch": 61.5,
"grad_norm": 1.003812551498413,
"learning_rate": 0.00010246153846153844,
"loss": 2.3945,
"step": 615
},
{
"epoch": 62.0,
"grad_norm": 1.148573398590088,
"learning_rate": 0.00010188461538461537,
"loss": 2.399,
"step": 620
},
{
"epoch": 62.5,
"grad_norm": 0.5585479736328125,
"learning_rate": 0.00010130769230769229,
"loss": 2.323,
"step": 625
},
{
"epoch": 62.5,
"eval_loss": 2.4444546699523926,
"eval_runtime": 1.0272,
"eval_samples_per_second": 23.365,
"eval_steps_per_second": 0.974,
"eval_wer": 1.0169014084507042,
"step": 625
},
{
"epoch": 63.0,
"grad_norm": 2.2142958641052246,
"learning_rate": 0.00010073076923076922,
"loss": 2.2927,
"step": 630
},
{
"epoch": 63.5,
"grad_norm": 1.0168890953063965,
"learning_rate": 0.00010015384615384614,
"loss": 2.2108,
"step": 635
},
{
"epoch": 64.0,
"grad_norm": 1.312639832496643,
"learning_rate": 9.957692307692307e-05,
"loss": 2.1866,
"step": 640
},
{
"epoch": 64.5,
"grad_norm": 0.5699294209480286,
"learning_rate": 9.9e-05,
"loss": 2.1114,
"step": 645
},
{
"epoch": 65.0,
"grad_norm": 1.4273818731307983,
"learning_rate": 9.842307692307692e-05,
"loss": 2.1119,
"step": 650
},
{
"epoch": 65.0,
"eval_loss": 2.2476181983947754,
"eval_runtime": 1.0519,
"eval_samples_per_second": 22.815,
"eval_steps_per_second": 0.951,
"eval_wer": 1.1183098591549296,
"step": 650
},
{
"epoch": 65.5,
"grad_norm": 0.5214980244636536,
"learning_rate": 9.784615384615383e-05,
"loss": 2.0414,
"step": 655
},
{
"epoch": 66.0,
"grad_norm": 2.480297803878784,
"learning_rate": 9.726923076923076e-05,
"loss": 2.0609,
"step": 660
},
{
"epoch": 66.5,
"grad_norm": 3.5270726680755615,
"learning_rate": 9.669230769230768e-05,
"loss": 1.9963,
"step": 665
},
{
"epoch": 67.0,
"grad_norm": 14.827882766723633,
"learning_rate": 9.611538461538461e-05,
"loss": 1.9333,
"step": 670
},
{
"epoch": 67.5,
"grad_norm": 1.1005451679229736,
"learning_rate": 9.553846153846153e-05,
"loss": 1.8514,
"step": 675
},
{
"epoch": 67.5,
"eval_loss": 2.173093318939209,
"eval_runtime": 1.033,
"eval_samples_per_second": 23.233,
"eval_steps_per_second": 0.968,
"eval_wer": 1.095774647887324,
"step": 675
},
{
"epoch": 68.0,
"grad_norm": 1.5897767543792725,
"learning_rate": 9.496153846153846e-05,
"loss": 1.9986,
"step": 680
},
{
"epoch": 68.5,
"grad_norm": 0.8863438963890076,
"learning_rate": 9.438461538461539e-05,
"loss": 1.8067,
"step": 685
},
{
"epoch": 69.0,
"grad_norm": 1.305874228477478,
"learning_rate": 9.380769230769231e-05,
"loss": 1.7975,
"step": 690
},
{
"epoch": 69.5,
"grad_norm": 0.6541560292243958,
"learning_rate": 9.323076923076921e-05,
"loss": 1.7655,
"step": 695
},
{
"epoch": 70.0,
"grad_norm": 1.056104063987732,
"learning_rate": 9.265384615384614e-05,
"loss": 1.7094,
"step": 700
},
{
"epoch": 70.0,
"eval_loss": 2.0642001628875732,
"eval_runtime": 1.0377,
"eval_samples_per_second": 23.129,
"eval_steps_per_second": 0.964,
"eval_wer": 1.0309859154929577,
"step": 700
},
{
"epoch": 70.5,
"grad_norm": 0.5228053331375122,
"learning_rate": 9.207692307692306e-05,
"loss": 1.6764,
"step": 705
},
{
"epoch": 71.0,
"grad_norm": 6.9655256271362305,
"learning_rate": 9.149999999999999e-05,
"loss": 1.7414,
"step": 710
},
{
"epoch": 71.5,
"grad_norm": 0.6360809206962585,
"learning_rate": 9.092307692307691e-05,
"loss": 1.6232,
"step": 715
},
{
"epoch": 72.0,
"grad_norm": 1.2141180038452148,
"learning_rate": 9.034615384615384e-05,
"loss": 1.6497,
"step": 720
},
{
"epoch": 72.5,
"grad_norm": 0.874902606010437,
"learning_rate": 8.976923076923077e-05,
"loss": 1.6069,
"step": 725
},
{
"epoch": 72.5,
"eval_loss": 2.0792412757873535,
"eval_runtime": 1.0243,
"eval_samples_per_second": 23.431,
"eval_steps_per_second": 0.976,
"eval_wer": 1.0788732394366196,
"step": 725
},
{
"epoch": 73.0,
"grad_norm": 0.9335172176361084,
"learning_rate": 8.919230769230769e-05,
"loss": 1.4947,
"step": 730
},
{
"epoch": 73.5,
"grad_norm": 1.299177885055542,
"learning_rate": 8.861538461538462e-05,
"loss": 1.5304,
"step": 735
},
{
"epoch": 74.0,
"grad_norm": 1.6317135095596313,
"learning_rate": 8.803846153846153e-05,
"loss": 1.5218,
"step": 740
},
{
"epoch": 74.5,
"grad_norm": 0.8083561062812805,
"learning_rate": 8.746153846153845e-05,
"loss": 1.5259,
"step": 745
},
{
"epoch": 75.0,
"grad_norm": 1.805677890777588,
"learning_rate": 8.688461538461538e-05,
"loss": 1.4663,
"step": 750
},
{
"epoch": 75.0,
"eval_loss": 2.0323963165283203,
"eval_runtime": 1.0407,
"eval_samples_per_second": 23.062,
"eval_steps_per_second": 0.961,
"eval_wer": 1.036619718309859,
"step": 750
},
{
"epoch": 75.5,
"grad_norm": 0.8463692665100098,
"learning_rate": 8.63076923076923e-05,
"loss": 1.4244,
"step": 755
},
{
"epoch": 76.0,
"grad_norm": 2.091686248779297,
"learning_rate": 8.573076923076923e-05,
"loss": 1.3791,
"step": 760
},
{
"epoch": 76.5,
"grad_norm": 0.7040625810623169,
"learning_rate": 8.515384615384614e-05,
"loss": 1.3495,
"step": 765
},
{
"epoch": 77.0,
"grad_norm": 1.7725024223327637,
"learning_rate": 8.457692307692307e-05,
"loss": 1.3497,
"step": 770
},
{
"epoch": 77.5,
"grad_norm": 0.808942437171936,
"learning_rate": 8.4e-05,
"loss": 1.288,
"step": 775
},
{
"epoch": 77.5,
"eval_loss": 2.0642640590667725,
"eval_runtime": 1.0443,
"eval_samples_per_second": 22.982,
"eval_steps_per_second": 0.958,
"eval_wer": 1.0929577464788733,
"step": 775
},
{
"epoch": 78.0,
"grad_norm": 3.843997001647949,
"learning_rate": 8.342307692307691e-05,
"loss": 1.2597,
"step": 780
},
{
"epoch": 78.5,
"grad_norm": 0.9082187414169312,
"learning_rate": 8.284615384615383e-05,
"loss": 1.2702,
"step": 785
},
{
"epoch": 79.0,
"grad_norm": 1.4159339666366577,
"learning_rate": 8.226923076923076e-05,
"loss": 1.2833,
"step": 790
},
{
"epoch": 79.5,
"grad_norm": 1.0848701000213623,
"learning_rate": 8.169230769230768e-05,
"loss": 1.2117,
"step": 795
},
{
"epoch": 80.0,
"grad_norm": 2.275663137435913,
"learning_rate": 8.111538461538461e-05,
"loss": 1.262,
"step": 800
},
{
"epoch": 80.0,
"eval_loss": 2.084003210067749,
"eval_runtime": 1.0408,
"eval_samples_per_second": 23.059,
"eval_steps_per_second": 0.961,
"eval_wer": 1.076056338028169,
"step": 800
},
{
"epoch": 80.5,
"grad_norm": 0.9842613339424133,
"learning_rate": 8.053846153846154e-05,
"loss": 1.2799,
"step": 805
},
{
"epoch": 81.0,
"grad_norm": 20.336593627929688,
"learning_rate": 7.996153846153846e-05,
"loss": 1.2903,
"step": 810
},
{
"epoch": 81.5,
"grad_norm": 0.8291641473770142,
"learning_rate": 7.938461538461539e-05,
"loss": 1.1215,
"step": 815
},
{
"epoch": 82.0,
"grad_norm": 1.6971830129623413,
"learning_rate": 7.88076923076923e-05,
"loss": 1.1435,
"step": 820
},
{
"epoch": 82.5,
"grad_norm": 0.69861900806427,
"learning_rate": 7.823076923076923e-05,
"loss": 1.043,
"step": 825
},
{
"epoch": 82.5,
"eval_loss": 2.149214506149292,
"eval_runtime": 1.0296,
"eval_samples_per_second": 23.311,
"eval_steps_per_second": 0.971,
"eval_wer": 1.0901408450704226,
"step": 825
},
{
"epoch": 83.0,
"grad_norm": 1.7208884954452515,
"learning_rate": 7.776923076923076e-05,
"loss": 1.203,
"step": 830
},
{
"epoch": 83.5,
"grad_norm": 0.8559800982475281,
"learning_rate": 7.719230769230768e-05,
"loss": 1.0825,
"step": 835
},
{
"epoch": 84.0,
"grad_norm": 1.6605381965637207,
"learning_rate": 7.661538461538461e-05,
"loss": 1.1121,
"step": 840
},
{
"epoch": 84.5,
"grad_norm": 1.077573537826538,
"learning_rate": 7.603846153846154e-05,
"loss": 1.0145,
"step": 845
},
{
"epoch": 85.0,
"grad_norm": 2.7091293334960938,
"learning_rate": 7.546153846153846e-05,
"loss": 1.0501,
"step": 850
},
{
"epoch": 85.0,
"eval_loss": 2.177476644515991,
"eval_runtime": 1.0552,
"eval_samples_per_second": 22.744,
"eval_steps_per_second": 0.948,
"eval_wer": 1.0591549295774647,
"step": 850
},
{
"epoch": 85.5,
"grad_norm": 1.3562541007995605,
"learning_rate": 7.488461538461539e-05,
"loss": 1.1098,
"step": 855
},
{
"epoch": 86.0,
"grad_norm": 2.6526386737823486,
"learning_rate": 7.43076923076923e-05,
"loss": 0.8642,
"step": 860
},
{
"epoch": 86.5,
"grad_norm": 1.1710244417190552,
"learning_rate": 7.373076923076922e-05,
"loss": 0.9004,
"step": 865
},
{
"epoch": 87.0,
"grad_norm": 2.9008164405822754,
"learning_rate": 7.315384615384615e-05,
"loss": 1.037,
"step": 870
},
{
"epoch": 87.5,
"grad_norm": 0.6306678056716919,
"learning_rate": 7.257692307692308e-05,
"loss": 0.9726,
"step": 875
},
{
"epoch": 87.5,
"eval_loss": 2.176731586456299,
"eval_runtime": 1.0313,
"eval_samples_per_second": 23.271,
"eval_steps_per_second": 0.97,
"eval_wer": 1.028169014084507,
"step": 875
},
{
"epoch": 88.0,
"grad_norm": 1.6984366178512573,
"learning_rate": 7.199999999999999e-05,
"loss": 1.1201,
"step": 880
},
{
"epoch": 88.5,
"grad_norm": 0.803970992565155,
"learning_rate": 7.142307692307691e-05,
"loss": 0.908,
"step": 885
},
{
"epoch": 89.0,
"grad_norm": 2.103391408920288,
"learning_rate": 7.084615384615384e-05,
"loss": 0.8684,
"step": 890
},
{
"epoch": 89.5,
"grad_norm": 0.9575273990631104,
"learning_rate": 7.026923076923077e-05,
"loss": 0.9791,
"step": 895
},
{
"epoch": 90.0,
"grad_norm": 3.000880479812622,
"learning_rate": 6.969230769230768e-05,
"loss": 0.8079,
"step": 900
},
{
"epoch": 90.0,
"eval_loss": 2.1965668201446533,
"eval_runtime": 1.0433,
"eval_samples_per_second": 23.003,
"eval_steps_per_second": 0.958,
"eval_wer": 0.9943661971830986,
"step": 900
},
{
"epoch": 90.5,
"grad_norm": 0.6576473712921143,
"learning_rate": 6.91153846153846e-05,
"loss": 0.846,
"step": 905
},
{
"epoch": 91.0,
"grad_norm": 2.2526416778564453,
"learning_rate": 6.853846153846153e-05,
"loss": 0.8868,
"step": 910
},
{
"epoch": 91.5,
"grad_norm": 0.5678216814994812,
"learning_rate": 6.796153846153845e-05,
"loss": 0.8925,
"step": 915
},
{
"epoch": 92.0,
"grad_norm": 2.549266815185547,
"learning_rate": 6.738461538461538e-05,
"loss": 1.0163,
"step": 920
},
{
"epoch": 92.5,
"grad_norm": 0.7736966013908386,
"learning_rate": 6.68076923076923e-05,
"loss": 0.7198,
"step": 925
},
{
"epoch": 92.5,
"eval_loss": 2.2433066368103027,
"eval_runtime": 1.0523,
"eval_samples_per_second": 22.808,
"eval_steps_per_second": 0.95,
"eval_wer": 1.0028169014084507,
"step": 925
},
{
"epoch": 93.0,
"grad_norm": 3.742175817489624,
"learning_rate": 6.623076923076923e-05,
"loss": 1.011,
"step": 930
},
{
"epoch": 93.5,
"grad_norm": 0.748150110244751,
"learning_rate": 6.565384615384616e-05,
"loss": 0.7659,
"step": 935
},
{
"epoch": 94.0,
"grad_norm": 2.121845006942749,
"learning_rate": 6.507692307692307e-05,
"loss": 0.7862,
"step": 940
},
{
"epoch": 94.5,
"grad_norm": 0.7966519594192505,
"learning_rate": 6.45e-05,
"loss": 0.8271,
"step": 945
},
{
"epoch": 95.0,
"grad_norm": 1.6206731796264648,
"learning_rate": 6.392307692307692e-05,
"loss": 0.6312,
"step": 950
},
{
"epoch": 95.0,
"eval_loss": 2.309884786605835,
"eval_runtime": 1.062,
"eval_samples_per_second": 22.599,
"eval_steps_per_second": 0.942,
"eval_wer": 0.9971830985915493,
"step": 950
},
{
"epoch": 95.5,
"grad_norm": 3.240893602371216,
"learning_rate": 6.334615384615385e-05,
"loss": 0.723,
"step": 955
},
{
"epoch": 96.0,
"grad_norm": 1.4926756620407104,
"learning_rate": 6.276923076923076e-05,
"loss": 0.7344,
"step": 960
},
{
"epoch": 96.5,
"grad_norm": 0.8542086482048035,
"learning_rate": 6.219230769230769e-05,
"loss": 0.7649,
"step": 965
},
{
"epoch": 97.0,
"grad_norm": 2.2014851570129395,
"learning_rate": 6.161538461538461e-05,
"loss": 0.6969,
"step": 970
},
{
"epoch": 97.5,
"grad_norm": 0.6612327694892883,
"learning_rate": 6.103846153846154e-05,
"loss": 0.6336,
"step": 975
},
{
"epoch": 97.5,
"eval_loss": 2.3546626567840576,
"eval_runtime": 1.0484,
"eval_samples_per_second": 22.893,
"eval_steps_per_second": 0.954,
"eval_wer": 0.9971830985915493,
"step": 975
},
{
"epoch": 98.0,
"grad_norm": 2.117011547088623,
"learning_rate": 6.0461538461538456e-05,
"loss": 0.7537,
"step": 980
},
{
"epoch": 98.5,
"grad_norm": 8.142460823059082,
"learning_rate": 5.988461538461538e-05,
"loss": 0.6593,
"step": 985
},
{
"epoch": 99.0,
"grad_norm": 2.6468851566314697,
"learning_rate": 5.93076923076923e-05,
"loss": 0.8069,
"step": 990
},
{
"epoch": 99.5,
"grad_norm": 1.392821192741394,
"learning_rate": 5.8730769230769226e-05,
"loss": 0.746,
"step": 995
},
{
"epoch": 100.0,
"grad_norm": 2.0805888175964355,
"learning_rate": 5.8153846153846145e-05,
"loss": 0.9073,
"step": 1000
},
{
"epoch": 100.0,
"eval_loss": 2.350856304168701,
"eval_runtime": 1.0707,
"eval_samples_per_second": 22.414,
"eval_steps_per_second": 0.934,
"eval_wer": 0.9943661971830986,
"step": 1000
},
{
"epoch": 100.5,
"grad_norm": 18.686534881591797,
"learning_rate": 5.757692307692307e-05,
"loss": 0.7907,
"step": 1005
},
{
"epoch": 101.0,
"grad_norm": 1.7688676118850708,
"learning_rate": 5.6999999999999996e-05,
"loss": 0.5693,
"step": 1010
},
{
"epoch": 101.5,
"grad_norm": 0.9006216526031494,
"learning_rate": 5.642307692307692e-05,
"loss": 0.6408,
"step": 1015
},
{
"epoch": 102.0,
"grad_norm": 2.382704496383667,
"learning_rate": 5.584615384615384e-05,
"loss": 0.7203,
"step": 1020
},
{
"epoch": 102.5,
"grad_norm": 0.8852857351303101,
"learning_rate": 5.526923076923076e-05,
"loss": 0.6431,
"step": 1025
},
{
"epoch": 102.5,
"eval_loss": 2.4202942848205566,
"eval_runtime": 1.0529,
"eval_samples_per_second": 22.794,
"eval_steps_per_second": 0.95,
"eval_wer": 1.0056338028169014,
"step": 1025
},
{
"epoch": 103.0,
"grad_norm": 3.3610403537750244,
"learning_rate": 5.4692307692307686e-05,
"loss": 0.6476,
"step": 1030
},
{
"epoch": 103.5,
"grad_norm": 0.8738270401954651,
"learning_rate": 5.411538461538461e-05,
"loss": 0.5492,
"step": 1035
},
{
"epoch": 104.0,
"grad_norm": 2.4251339435577393,
"learning_rate": 5.353846153846153e-05,
"loss": 0.6005,
"step": 1040
},
{
"epoch": 104.5,
"grad_norm": 0.7935536503791809,
"learning_rate": 5.2961538461538456e-05,
"loss": 0.5855,
"step": 1045
},
{
"epoch": 105.0,
"grad_norm": 2.805385112762451,
"learning_rate": 5.238461538461538e-05,
"loss": 0.62,
"step": 1050
},
{
"epoch": 105.0,
"eval_loss": 2.3933348655700684,
"eval_runtime": 1.0674,
"eval_samples_per_second": 22.485,
"eval_steps_per_second": 0.937,
"eval_wer": 0.9746478873239437,
"step": 1050
},
{
"epoch": 105.5,
"grad_norm": 1.2249245643615723,
"learning_rate": 5.180769230769231e-05,
"loss": 0.652,
"step": 1055
},
{
"epoch": 106.0,
"grad_norm": 1.2247533798217773,
"learning_rate": 5.123076923076922e-05,
"loss": 0.6108,
"step": 1060
},
{
"epoch": 106.5,
"grad_norm": 0.8812918663024902,
"learning_rate": 5.0653846153846146e-05,
"loss": 0.6453,
"step": 1065
},
{
"epoch": 107.0,
"grad_norm": 2.7638535499572754,
"learning_rate": 5.007692307692307e-05,
"loss": 0.568,
"step": 1070
},
{
"epoch": 107.5,
"grad_norm": 1.3182368278503418,
"learning_rate": 4.95e-05,
"loss": 0.708,
"step": 1075
},
{
"epoch": 107.5,
"eval_loss": 2.4381346702575684,
"eval_runtime": 1.061,
"eval_samples_per_second": 22.619,
"eval_steps_per_second": 0.942,
"eval_wer": 0.9690140845070423,
"step": 1075
},
{
"epoch": 108.0,
"grad_norm": 2.4760406017303467,
"learning_rate": 4.8923076923076916e-05,
"loss": 0.6171,
"step": 1080
},
{
"epoch": 108.5,
"grad_norm": 0.5409008264541626,
"learning_rate": 4.834615384615384e-05,
"loss": 0.5542,
"step": 1085
},
{
"epoch": 109.0,
"grad_norm": 1.675410509109497,
"learning_rate": 4.776923076923077e-05,
"loss": 0.6491,
"step": 1090
},
{
"epoch": 109.5,
"grad_norm": 0.8941754698753357,
"learning_rate": 4.719230769230769e-05,
"loss": 0.7266,
"step": 1095
},
{
"epoch": 110.0,
"grad_norm": 1.9851211309432983,
"learning_rate": 4.6615384615384605e-05,
"loss": 0.6729,
"step": 1100
},
{
"epoch": 110.0,
"eval_loss": 2.474308967590332,
"eval_runtime": 1.0636,
"eval_samples_per_second": 22.564,
"eval_steps_per_second": 0.94,
"eval_wer": 1.0,
"step": 1100
},
{
"epoch": 110.5,
"grad_norm": 0.677306592464447,
"learning_rate": 4.603846153846153e-05,
"loss": 0.7625,
"step": 1105
},
{
"epoch": 111.0,
"grad_norm": 2.572356700897217,
"learning_rate": 4.546153846153846e-05,
"loss": 0.5146,
"step": 1110
},
{
"epoch": 111.5,
"grad_norm": 1.2789101600646973,
"learning_rate": 4.488461538461538e-05,
"loss": 0.5504,
"step": 1115
},
{
"epoch": 112.0,
"grad_norm": 2.3920390605926514,
"learning_rate": 4.430769230769231e-05,
"loss": 0.4821,
"step": 1120
},
{
"epoch": 112.5,
"grad_norm": 1.219436764717102,
"learning_rate": 4.373076923076923e-05,
"loss": 0.5779,
"step": 1125
},
{
"epoch": 112.5,
"eval_loss": 2.492933988571167,
"eval_runtime": 1.0274,
"eval_samples_per_second": 23.36,
"eval_steps_per_second": 0.973,
"eval_wer": 0.9549295774647887,
"step": 1125
},
{
"epoch": 113.0,
"grad_norm": 3.558155059814453,
"learning_rate": 4.315384615384615e-05,
"loss": 0.4743,
"step": 1130
},
{
"epoch": 113.5,
"grad_norm": 0.9398171901702881,
"learning_rate": 4.257692307692307e-05,
"loss": 0.493,
"step": 1135
},
{
"epoch": 114.0,
"grad_norm": 4.514529705047607,
"learning_rate": 4.2e-05,
"loss": 0.4341,
"step": 1140
},
{
"epoch": 114.5,
"grad_norm": 1.015120029449463,
"learning_rate": 4.142307692307692e-05,
"loss": 0.5069,
"step": 1145
},
{
"epoch": 115.0,
"grad_norm": 2.043063163757324,
"learning_rate": 4.084615384615384e-05,
"loss": 0.6303,
"step": 1150
},
{
"epoch": 115.0,
"eval_loss": 2.5056331157684326,
"eval_runtime": 1.0408,
"eval_samples_per_second": 23.06,
"eval_steps_per_second": 0.961,
"eval_wer": 0.923943661971831,
"step": 1150
}
],
"logging_steps": 5,
"max_steps": 1500,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.771505223996499e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}