{ "best_metric": 0.16414035979450686, "best_model_checkpoint": "./checkpoints/w2v-pa-v2/checkpoint-48600", "epoch": 2.02776364561378, "eval_steps": 600, "global_step": 55800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00036339850279816846, "grad_norm": 12.712770462036133, "learning_rate": 3.633985027981685e-08, "loss": 8.1015, "step": 10 }, { "epoch": 0.0007267970055963369, "grad_norm": 14.929631233215332, "learning_rate": 7.26797005596337e-08, "loss": 8.5111, "step": 20 }, { "epoch": 0.0010901955083945055, "grad_norm": 12.774781227111816, "learning_rate": 1.0901955083945056e-07, "loss": 8.71, "step": 30 }, { "epoch": 0.0014535940111926739, "grad_norm": 15.27083969116211, "learning_rate": 1.417254160912857e-07, "loss": 9.2894, "step": 40 }, { "epoch": 0.0018169925139908424, "grad_norm": 31.209775924682617, "learning_rate": 1.7806526637110256e-07, "loss": 8.6779, "step": 50 }, { "epoch": 0.002180391016789011, "grad_norm": 12.960335731506348, "learning_rate": 2.1440511665091943e-07, "loss": 8.0399, "step": 60 }, { "epoch": 0.002543789519587179, "grad_norm": 14.452157020568848, "learning_rate": 2.5074496693073626e-07, "loss": 7.9357, "step": 70 }, { "epoch": 0.0029071880223853477, "grad_norm": 12.74867057800293, "learning_rate": 2.8708481721055307e-07, "loss": 8.0764, "step": 80 }, { "epoch": 0.0032705865251835163, "grad_norm": 19.905397415161133, "learning_rate": 3.2342466749036993e-07, "loss": 8.2522, "step": 90 }, { "epoch": 0.003633985027981685, "grad_norm": Infinity, "learning_rate": 3.5613053274220513e-07, "loss": 7.8161, "step": 100 }, { "epoch": 0.003997383530779853, "grad_norm": 15.877684593200684, "learning_rate": 3.92470383022022e-07, "loss": 7.4866, "step": 110 }, { "epoch": 0.004360782033578022, "grad_norm": 19.216800689697266, "learning_rate": 4.2881023330183885e-07, "loss": 7.0324, "step": 120 }, { "epoch": 0.0047241805363761906, "grad_norm": 16.937118530273438, "learning_rate": 4.651500835816557e-07, "loss": 6.7873, "step": 130 }, { "epoch": 0.005087579039174358, "grad_norm": 27.858692169189453, "learning_rate": 5.014899338614725e-07, "loss": 7.2063, "step": 140 }, { "epoch": 0.005450977541972527, "grad_norm": Infinity, "learning_rate": 5.341957991133076e-07, "loss": 6.5827, "step": 150 }, { "epoch": 0.005814376044770695, "grad_norm": 21.252164840698242, "learning_rate": 5.669016643651428e-07, "loss": 7.819, "step": 160 }, { "epoch": 0.006177774547568864, "grad_norm": 20.977886199951172, "learning_rate": 6.032415146449597e-07, "loss": 5.4741, "step": 170 }, { "epoch": 0.0065411730503670325, "grad_norm": 35.25390625, "learning_rate": 6.395813649247765e-07, "loss": 5.7225, "step": 180 }, { "epoch": 0.006904571553165201, "grad_norm": 7.165033340454102, "learning_rate": 6.759212152045934e-07, "loss": 4.7475, "step": 190 }, { "epoch": 0.00726797005596337, "grad_norm": 14.877301216125488, "learning_rate": 7.122610654844103e-07, "loss": 4.6375, "step": 200 }, { "epoch": 0.007631368558761538, "grad_norm": 5.826667785644531, "learning_rate": 7.486009157642272e-07, "loss": 4.356, "step": 210 }, { "epoch": 0.007994767061559707, "grad_norm": 6.022212982177734, "learning_rate": 7.84940766044044e-07, "loss": 4.2138, "step": 220 }, { "epoch": 0.008358165564357875, "grad_norm": 4.790489196777344, "learning_rate": 8.212806163238608e-07, "loss": 4.0662, "step": 230 }, { "epoch": 0.008721564067156044, "grad_norm": 4.448057174682617, "learning_rate": 8.576204666036777e-07, "loss": 3.9507, "step": 240 }, { "epoch": 0.009084962569954213, "grad_norm": 47.487003326416016, "learning_rate": 8.939603168834945e-07, "loss": 3.9576, "step": 250 }, { "epoch": 0.009448361072752381, "grad_norm": 8.64856243133545, "learning_rate": 9.303001671633114e-07, "loss": 3.7102, "step": 260 }, { "epoch": 0.009811759575550548, "grad_norm": 8.821709632873535, "learning_rate": 9.66640017443128e-07, "loss": 3.6644, "step": 270 }, { "epoch": 0.010175158078348717, "grad_norm": 14.071539878845215, "learning_rate": 1.002979867722945e-06, "loss": 3.6909, "step": 280 }, { "epoch": 0.010538556581146885, "grad_norm": 6.68039083480835, "learning_rate": 1.0393197180027619e-06, "loss": 3.6458, "step": 290 }, { "epoch": 0.010901955083945054, "grad_norm": 20.664649963378906, "learning_rate": 1.0756595682825787e-06, "loss": 3.6332, "step": 300 }, { "epoch": 0.011265353586743222, "grad_norm": 2.5272624492645264, "learning_rate": 1.1119994185623955e-06, "loss": 3.4835, "step": 310 }, { "epoch": 0.01162875208954139, "grad_norm": 8.353235244750977, "learning_rate": 1.1483392688422123e-06, "loss": 3.4892, "step": 320 }, { "epoch": 0.01199215059233956, "grad_norm": 7.0964531898498535, "learning_rate": 1.1846791191220293e-06, "loss": 3.5586, "step": 330 }, { "epoch": 0.012355549095137728, "grad_norm": 4.734161376953125, "learning_rate": 1.2210189694018461e-06, "loss": 3.4996, "step": 340 }, { "epoch": 0.012718947597935897, "grad_norm": 47.409996032714844, "learning_rate": 1.257358819681663e-06, "loss": 3.5565, "step": 350 }, { "epoch": 0.013082346100734065, "grad_norm": 2.880244016647339, "learning_rate": 1.2936986699614797e-06, "loss": 3.4154, "step": 360 }, { "epoch": 0.013445744603532234, "grad_norm": 6.637233734130859, "learning_rate": 1.3300385202412968e-06, "loss": 3.4119, "step": 370 }, { "epoch": 0.013809143106330402, "grad_norm": 11.791736602783203, "learning_rate": 1.3663783705211136e-06, "loss": 3.4347, "step": 380 }, { "epoch": 0.01417254160912857, "grad_norm": 8.274836540222168, "learning_rate": 1.4027182208009304e-06, "loss": 3.4253, "step": 390 }, { "epoch": 0.01453594011192674, "grad_norm": 10.09929084777832, "learning_rate": 1.4390580710807472e-06, "loss": 3.4702, "step": 400 }, { "epoch": 0.014899338614724908, "grad_norm": 6.32951545715332, "learning_rate": 1.4753979213605642e-06, "loss": 3.3513, "step": 410 }, { "epoch": 0.015262737117523077, "grad_norm": 2.4888486862182617, "learning_rate": 1.511737771640381e-06, "loss": 3.3421, "step": 420 }, { "epoch": 0.015626135620321245, "grad_norm": 3.02103328704834, "learning_rate": 1.5480776219201978e-06, "loss": 3.397, "step": 430 }, { "epoch": 0.015989534123119414, "grad_norm": 7.464268207550049, "learning_rate": 1.5844174722000146e-06, "loss": 3.3582, "step": 440 }, { "epoch": 0.016352932625917582, "grad_norm": 18.908123016357422, "learning_rate": 1.6207573224798317e-06, "loss": 3.4034, "step": 450 }, { "epoch": 0.01671633112871575, "grad_norm": 2.487326145172119, "learning_rate": 1.6570971727596485e-06, "loss": 3.2229, "step": 460 }, { "epoch": 0.01707972963151392, "grad_norm": 2.3999946117401123, "learning_rate": 1.6934370230394653e-06, "loss": 3.2185, "step": 470 }, { "epoch": 0.017443128134312088, "grad_norm": 5.007234573364258, "learning_rate": 1.729776873319282e-06, "loss": 3.2069, "step": 480 }, { "epoch": 0.017806526637110257, "grad_norm": 6.393301963806152, "learning_rate": 1.766116723599099e-06, "loss": 3.0687, "step": 490 }, { "epoch": 0.018169925139908425, "grad_norm": 45.44938278198242, "learning_rate": 1.802456573878916e-06, "loss": 3.112, "step": 500 }, { "epoch": 0.018533323642706594, "grad_norm": 7.32182502746582, "learning_rate": 1.8387964241587327e-06, "loss": 2.951, "step": 510 }, { "epoch": 0.018896722145504762, "grad_norm": 3.3864173889160156, "learning_rate": 1.8751362744385495e-06, "loss": 2.8879, "step": 520 }, { "epoch": 0.019260120648302927, "grad_norm": 5.429958343505859, "learning_rate": 1.911476124718366e-06, "loss": 2.7393, "step": 530 }, { "epoch": 0.019623519151101096, "grad_norm": 5.3577985763549805, "learning_rate": 1.947815974998183e-06, "loss": 2.4813, "step": 540 }, { "epoch": 0.019986917653899264, "grad_norm": 13.970659255981445, "learning_rate": 1.9841558252779998e-06, "loss": 2.3787, "step": 550 }, { "epoch": 0.020350316156697433, "grad_norm": 5.2666754722595215, "learning_rate": 2.0204956755578166e-06, "loss": 2.207, "step": 560 }, { "epoch": 0.0207137146594956, "grad_norm": 4.184991359710693, "learning_rate": 2.0568355258376334e-06, "loss": 2.0383, "step": 570 }, { "epoch": 0.02107711316229377, "grad_norm": 6.312343597412109, "learning_rate": 2.09317537611745e-06, "loss": 1.8416, "step": 580 }, { "epoch": 0.02144051166509194, "grad_norm": 4.754147529602051, "learning_rate": 2.1295152263972674e-06, "loss": 1.6002, "step": 590 }, { "epoch": 0.021803910167890107, "grad_norm": 21.47913360595703, "learning_rate": 2.1658550766770842e-06, "loss": 1.6015, "step": 600 }, { "epoch": 0.021803910167890107, "eval_loss": 1.5154471397399902, "eval_runtime": 180.9184, "eval_samples_per_second": 40.98, "eval_steps_per_second": 5.124, "eval_wer": 0.7997531177954872, "step": 600 }, { "epoch": 0.022167308670688276, "grad_norm": 4.2374348640441895, "learning_rate": 2.202194926956901e-06, "loss": 1.4842, "step": 610 }, { "epoch": 0.022530707173486444, "grad_norm": 4.392132759094238, "learning_rate": 2.238534777236718e-06, "loss": 1.3776, "step": 620 }, { "epoch": 0.022894105676284613, "grad_norm": 4.682064533233643, "learning_rate": 2.2748746275165347e-06, "loss": 1.3177, "step": 630 }, { "epoch": 0.02325750417908278, "grad_norm": 4.8396077156066895, "learning_rate": 2.3112144777963515e-06, "loss": 1.0737, "step": 640 }, { "epoch": 0.02362090268188095, "grad_norm": 33.27382278442383, "learning_rate": 2.3475543280761683e-06, "loss": 1.3046, "step": 650 }, { "epoch": 0.02398430118467912, "grad_norm": 5.410325050354004, "learning_rate": 2.383894178355985e-06, "loss": 1.1021, "step": 660 }, { "epoch": 0.024347699687477287, "grad_norm": 3.9523680210113525, "learning_rate": 2.420234028635802e-06, "loss": 1.0602, "step": 670 }, { "epoch": 0.024711098190275456, "grad_norm": 9.141073226928711, "learning_rate": 2.456573878915619e-06, "loss": 1.0631, "step": 680 }, { "epoch": 0.025074496693073624, "grad_norm": 5.3534626960754395, "learning_rate": 2.492913729195436e-06, "loss": 0.8968, "step": 690 }, { "epoch": 0.025437895195871793, "grad_norm": 32.30677795410156, "learning_rate": 2.5292535794752527e-06, "loss": 1.0439, "step": 700 }, { "epoch": 0.02580129369866996, "grad_norm": 4.310474872589111, "learning_rate": 2.5655934297550696e-06, "loss": 0.954, "step": 710 }, { "epoch": 0.02616469220146813, "grad_norm": 5.586440563201904, "learning_rate": 2.6019332800348864e-06, "loss": 1.0031, "step": 720 }, { "epoch": 0.0265280907042663, "grad_norm": 3.6927313804626465, "learning_rate": 2.638273130314703e-06, "loss": 0.7956, "step": 730 }, { "epoch": 0.026891489207064467, "grad_norm": 4.270529747009277, "learning_rate": 2.67461298059452e-06, "loss": 0.8874, "step": 740 }, { "epoch": 0.027254887709862636, "grad_norm": 23.553489685058594, "learning_rate": 2.710952830874337e-06, "loss": 0.8523, "step": 750 }, { "epoch": 0.027618286212660804, "grad_norm": 5.342041492462158, "learning_rate": 2.747292681154154e-06, "loss": 0.9029, "step": 760 }, { "epoch": 0.027981684715458973, "grad_norm": 3.3802621364593506, "learning_rate": 2.783632531433971e-06, "loss": 0.8378, "step": 770 }, { "epoch": 0.02834508321825714, "grad_norm": 6.378807067871094, "learning_rate": 2.8199723817137876e-06, "loss": 0.8085, "step": 780 }, { "epoch": 0.02870848172105531, "grad_norm": 4.007000923156738, "learning_rate": 2.8563122319936045e-06, "loss": 0.8218, "step": 790 }, { "epoch": 0.02907188022385348, "grad_norm": 68.16226196289062, "learning_rate": 2.8926520822734213e-06, "loss": 1.2055, "step": 800 }, { "epoch": 0.029435278726651647, "grad_norm": 6.70043420791626, "learning_rate": 2.928991932553238e-06, "loss": 0.7641, "step": 810 }, { "epoch": 0.029798677229449816, "grad_norm": 5.498161315917969, "learning_rate": 2.965331782833055e-06, "loss": 0.7739, "step": 820 }, { "epoch": 0.030162075732247984, "grad_norm": 9.515852928161621, "learning_rate": 3.0016716331128717e-06, "loss": 0.8293, "step": 830 }, { "epoch": 0.030525474235046153, "grad_norm": 13.3881196975708, "learning_rate": 3.0380114833926885e-06, "loss": 0.5597, "step": 840 }, { "epoch": 0.03088887273784432, "grad_norm": 13.670549392700195, "learning_rate": 3.0743513336725057e-06, "loss": 0.7658, "step": 850 }, { "epoch": 0.03125227124064249, "grad_norm": 3.58305287361145, "learning_rate": 3.1106911839523226e-06, "loss": 0.7036, "step": 860 }, { "epoch": 0.031615669743440655, "grad_norm": 4.119450569152832, "learning_rate": 3.147031034232139e-06, "loss": 0.6842, "step": 870 }, { "epoch": 0.03197906824623883, "grad_norm": 6.412299156188965, "learning_rate": 3.183370884511956e-06, "loss": 0.7148, "step": 880 }, { "epoch": 0.03234246674903699, "grad_norm": 8.700023651123047, "learning_rate": 3.2197107347917726e-06, "loss": 1.4861, "step": 890 }, { "epoch": 0.032705865251835164, "grad_norm": 18.78075408935547, "learning_rate": 3.25605058507159e-06, "loss": 0.7162, "step": 900 }, { "epoch": 0.03306926375463333, "grad_norm": 4.078335762023926, "learning_rate": 3.292390435351406e-06, "loss": 0.643, "step": 910 }, { "epoch": 0.0334326622574315, "grad_norm": 6.603452682495117, "learning_rate": 3.3287302856312234e-06, "loss": 0.6623, "step": 920 }, { "epoch": 0.03379606076022967, "grad_norm": 5.817732334136963, "learning_rate": 3.3650701359110402e-06, "loss": 0.6265, "step": 930 }, { "epoch": 0.03415945926302784, "grad_norm": 8.310086250305176, "learning_rate": 3.4014099861908575e-06, "loss": 0.5343, "step": 940 }, { "epoch": 0.034522857765826004, "grad_norm": NaN, "learning_rate": 3.4341158514426923e-06, "loss": 0.6999, "step": 950 }, { "epoch": 0.034886256268624176, "grad_norm": 4.416926860809326, "learning_rate": 3.4704557017225087e-06, "loss": 0.6688, "step": 960 }, { "epoch": 0.03524965477142234, "grad_norm": 3.2407495975494385, "learning_rate": 3.506795552002326e-06, "loss": 0.5808, "step": 970 }, { "epoch": 0.03561305327422051, "grad_norm": NaN, "learning_rate": 3.539501417254161e-06, "loss": 3.0266, "step": 980 }, { "epoch": 0.03597645177701868, "grad_norm": 8.086112022399902, "learning_rate": 3.575841267533978e-06, "loss": 0.5829, "step": 990 }, { "epoch": 0.03633985027981685, "grad_norm": 252.45077514648438, "learning_rate": 3.612181117813795e-06, "loss": 0.7371, "step": 1000 }, { "epoch": 0.036703248782615015, "grad_norm": 3.5969936847686768, "learning_rate": 3.648520968093612e-06, "loss": 0.6632, "step": 1010 }, { "epoch": 0.03706664728541319, "grad_norm": 3.0116841793060303, "learning_rate": 3.6848608183734285e-06, "loss": 0.537, "step": 1020 }, { "epoch": 0.03743004578821135, "grad_norm": 5.494657039642334, "learning_rate": 3.7212006686532457e-06, "loss": 0.5422, "step": 1030 }, { "epoch": 0.037793444291009524, "grad_norm": 21.526798248291016, "learning_rate": 3.757540518933062e-06, "loss": 0.5003, "step": 1040 }, { "epoch": 0.03815684279380769, "grad_norm": 80.90055084228516, "learning_rate": 3.7938803692128793e-06, "loss": 0.6566, "step": 1050 }, { "epoch": 0.038520241296605855, "grad_norm": 3.7678096294403076, "learning_rate": 3.830220219492696e-06, "loss": 0.5758, "step": 1060 }, { "epoch": 0.03888363979940403, "grad_norm": 4.526616096496582, "learning_rate": 3.866560069772512e-06, "loss": 0.5648, "step": 1070 }, { "epoch": 0.03924703830220219, "grad_norm": 4.571674346923828, "learning_rate": 3.90289992005233e-06, "loss": 0.5864, "step": 1080 }, { "epoch": 0.039610436805000364, "grad_norm": 5.295219421386719, "learning_rate": 3.939239770332146e-06, "loss": 0.4476, "step": 1090 }, { "epoch": 0.03997383530779853, "grad_norm": 16.631162643432617, "learning_rate": 3.975579620611963e-06, "loss": 0.6198, "step": 1100 }, { "epoch": 0.0403372338105967, "grad_norm": 4.685397624969482, "learning_rate": 4.01191947089178e-06, "loss": 0.7512, "step": 1110 }, { "epoch": 0.040700632313394866, "grad_norm": 3.333232879638672, "learning_rate": 4.048259321171597e-06, "loss": 0.5087, "step": 1120 }, { "epoch": 0.04106403081619304, "grad_norm": 5.501911640167236, "learning_rate": 4.084599171451414e-06, "loss": 0.5772, "step": 1130 }, { "epoch": 0.0414274293189912, "grad_norm": 8.066693305969238, "learning_rate": 4.120939021731231e-06, "loss": 0.4641, "step": 1140 }, { "epoch": 0.041790827821789375, "grad_norm": 13.463829040527344, "learning_rate": 4.1572788720110474e-06, "loss": 0.5192, "step": 1150 }, { "epoch": 0.04215422632458754, "grad_norm": 4.132773399353027, "learning_rate": 4.193618722290864e-06, "loss": 0.4696, "step": 1160 }, { "epoch": 0.04251762482738571, "grad_norm": 6.176777362823486, "learning_rate": 4.229958572570681e-06, "loss": 0.4851, "step": 1170 }, { "epoch": 0.04288102333018388, "grad_norm": 8.26610279083252, "learning_rate": 4.266298422850498e-06, "loss": 0.4967, "step": 1180 }, { "epoch": 0.04324442183298205, "grad_norm": 3.9725544452667236, "learning_rate": 4.302638273130315e-06, "loss": 0.431, "step": 1190 }, { "epoch": 0.043607820335780215, "grad_norm": 22.353294372558594, "learning_rate": 4.338978123410132e-06, "loss": 0.6523, "step": 1200 }, { "epoch": 0.043607820335780215, "eval_loss": 0.5945897102355957, "eval_runtime": 180.5168, "eval_samples_per_second": 41.071, "eval_steps_per_second": 5.135, "eval_wer": 0.3718390908925881, "step": 1200 }, { "epoch": 0.04397121883857839, "grad_norm": 3.7954189777374268, "learning_rate": 4.375317973689948e-06, "loss": 0.4511, "step": 1210 }, { "epoch": 0.04433461734137655, "grad_norm": 5.583435535430908, "learning_rate": 4.411657823969766e-06, "loss": 1.4019, "step": 1220 }, { "epoch": 0.044698015844174724, "grad_norm": 8.544243812561035, "learning_rate": 4.447997674249582e-06, "loss": 0.4467, "step": 1230 }, { "epoch": 0.04506141434697289, "grad_norm": 3.8716418743133545, "learning_rate": 4.4843375245293996e-06, "loss": 0.4347, "step": 1240 }, { "epoch": 0.04542481284977106, "grad_norm": 19.459606170654297, "learning_rate": 4.5206773748092155e-06, "loss": 0.616, "step": 1250 }, { "epoch": 0.045788211352569226, "grad_norm": 5.474793434143066, "learning_rate": 4.557017225089033e-06, "loss": 0.4689, "step": 1260 }, { "epoch": 0.0461516098553674, "grad_norm": 4.705495834350586, "learning_rate": 4.593357075368849e-06, "loss": 0.4623, "step": 1270 }, { "epoch": 0.04651500835816556, "grad_norm": 6.779942035675049, "learning_rate": 4.629696925648667e-06, "loss": 0.4418, "step": 1280 }, { "epoch": 0.046878406860963735, "grad_norm": 6.802936553955078, "learning_rate": 4.666036775928484e-06, "loss": 0.4429, "step": 1290 }, { "epoch": 0.0472418053637619, "grad_norm": 17.47754669189453, "learning_rate": 4.7023766262083004e-06, "loss": 0.596, "step": 1300 }, { "epoch": 0.04760520386656007, "grad_norm": 4.036036968231201, "learning_rate": 4.738716476488117e-06, "loss": 0.4362, "step": 1310 }, { "epoch": 0.04796860236935824, "grad_norm": 6.022701740264893, "learning_rate": 4.775056326767934e-06, "loss": 0.5092, "step": 1320 }, { "epoch": 0.04833200087215641, "grad_norm": 5.533923625946045, "learning_rate": 4.811396177047751e-06, "loss": 0.4358, "step": 1330 }, { "epoch": 0.048695399374954575, "grad_norm": 3.4037017822265625, "learning_rate": 4.847736027327568e-06, "loss": 0.3684, "step": 1340 }, { "epoch": 0.04905879787775275, "grad_norm": 13.625974655151367, "learning_rate": 4.8840758776073845e-06, "loss": 0.583, "step": 1350 }, { "epoch": 0.04942219638055091, "grad_norm": 3.597294330596924, "learning_rate": 4.920415727887201e-06, "loss": 0.4561, "step": 1360 }, { "epoch": 0.049785594883349084, "grad_norm": 2.8846936225891113, "learning_rate": 4.956755578167018e-06, "loss": 0.409, "step": 1370 }, { "epoch": 0.05014899338614725, "grad_norm": 5.500187397003174, "learning_rate": 4.993095428446836e-06, "loss": 0.4531, "step": 1380 }, { "epoch": 0.05051239188894542, "grad_norm": 3.1203413009643555, "learning_rate": 5.029435278726652e-06, "loss": 0.4004, "step": 1390 }, { "epoch": 0.050875790391743586, "grad_norm": 98.18115234375, "learning_rate": 5.0657751290064685e-06, "loss": 0.522, "step": 1400 }, { "epoch": 0.05123918889454175, "grad_norm": 3.901418924331665, "learning_rate": 5.102114979286285e-06, "loss": 0.4041, "step": 1410 }, { "epoch": 0.05160258739733992, "grad_norm": 4.045637130737305, "learning_rate": 5.138454829566102e-06, "loss": 0.4051, "step": 1420 }, { "epoch": 0.05196598590013809, "grad_norm": 6.835183143615723, "learning_rate": 5.174794679845919e-06, "loss": 0.4937, "step": 1430 }, { "epoch": 0.05232938440293626, "grad_norm": 7.708272457122803, "learning_rate": 5.211134530125736e-06, "loss": 0.3818, "step": 1440 }, { "epoch": 0.052692782905734425, "grad_norm": 24.2607364654541, "learning_rate": 5.247474380405553e-06, "loss": 0.5445, "step": 1450 }, { "epoch": 0.0530561814085326, "grad_norm": 3.3517005443573, "learning_rate": 5.283814230685369e-06, "loss": 0.4079, "step": 1460 }, { "epoch": 0.05341957991133076, "grad_norm": 12.727778434753418, "learning_rate": 5.320154080965187e-06, "loss": 0.4285, "step": 1470 }, { "epoch": 0.053782978414128935, "grad_norm": 4.984294891357422, "learning_rate": 5.356493931245003e-06, "loss": 0.5006, "step": 1480 }, { "epoch": 0.0541463769169271, "grad_norm": 3.3041558265686035, "learning_rate": 5.392833781524821e-06, "loss": 0.3729, "step": 1490 }, { "epoch": 0.05450977541972527, "grad_norm": 38.074546813964844, "learning_rate": 5.429173631804637e-06, "loss": 0.5401, "step": 1500 }, { "epoch": 0.05487317392252344, "grad_norm": 5.649720668792725, "learning_rate": 5.465513482084454e-06, "loss": 0.3879, "step": 1510 }, { "epoch": 0.05523657242532161, "grad_norm": 3.107583522796631, "learning_rate": 5.501853332364271e-06, "loss": 0.4144, "step": 1520 }, { "epoch": 0.055599970928119774, "grad_norm": 19.246564865112305, "learning_rate": 5.538193182644088e-06, "loss": 0.4314, "step": 1530 }, { "epoch": 0.055963369430917946, "grad_norm": 4.72367525100708, "learning_rate": 5.574533032923905e-06, "loss": 0.3576, "step": 1540 }, { "epoch": 0.05632676793371611, "grad_norm": 25.88886260986328, "learning_rate": 5.6108728832037215e-06, "loss": 0.5385, "step": 1550 }, { "epoch": 0.05669016643651428, "grad_norm": 3.1524956226348877, "learning_rate": 5.647212733483538e-06, "loss": 0.4075, "step": 1560 }, { "epoch": 0.05705356493931245, "grad_norm": 3.883281707763672, "learning_rate": 5.683552583763355e-06, "loss": 0.4242, "step": 1570 }, { "epoch": 0.05741696344211062, "grad_norm": 16.935935974121094, "learning_rate": 5.719892434043172e-06, "loss": 0.6194, "step": 1580 }, { "epoch": 0.057780361944908785, "grad_norm": 4.23909330368042, "learning_rate": 5.756232284322989e-06, "loss": 0.4206, "step": 1590 }, { "epoch": 0.05814376044770696, "grad_norm": 16.6039981842041, "learning_rate": 5.7925721346028056e-06, "loss": 0.4854, "step": 1600 }, { "epoch": 0.05850715895050512, "grad_norm": 2.5220890045166016, "learning_rate": 5.828911984882622e-06, "loss": 0.4186, "step": 1610 }, { "epoch": 0.058870557453303295, "grad_norm": 3.075101613998413, "learning_rate": 5.865251835162439e-06, "loss": 0.8877, "step": 1620 }, { "epoch": 0.05923395595610146, "grad_norm": 5.511383056640625, "learning_rate": 5.901591685442257e-06, "loss": 0.4219, "step": 1630 }, { "epoch": 0.05959735445889963, "grad_norm": 2.9449989795684814, "learning_rate": 5.937931535722073e-06, "loss": 0.2992, "step": 1640 }, { "epoch": 0.0599607529616978, "grad_norm": 31.823612213134766, "learning_rate": 5.9742713860018905e-06, "loss": 0.5939, "step": 1650 }, { "epoch": 0.06032415146449597, "grad_norm": 4.240995407104492, "learning_rate": 6.010611236281706e-06, "loss": 0.4176, "step": 1660 }, { "epoch": 0.060687549967294134, "grad_norm": 2.6084980964660645, "learning_rate": 6.046951086561524e-06, "loss": 0.3542, "step": 1670 }, { "epoch": 0.061050948470092306, "grad_norm": 8.318774223327637, "learning_rate": 6.08329093684134e-06, "loss": 0.3968, "step": 1680 }, { "epoch": 0.06141434697289047, "grad_norm": 5.18604850769043, "learning_rate": 6.119630787121158e-06, "loss": 0.3879, "step": 1690 }, { "epoch": 0.06177774547568864, "grad_norm": 51.732086181640625, "learning_rate": 6.1559706374009745e-06, "loss": 0.5025, "step": 1700 }, { "epoch": 0.06214114397848681, "grad_norm": 2.5876500606536865, "learning_rate": 6.192310487680791e-06, "loss": 0.3558, "step": 1710 }, { "epoch": 0.06250454248128498, "grad_norm": 5.071794033050537, "learning_rate": 6.228650337960608e-06, "loss": 0.3534, "step": 1720 }, { "epoch": 0.06286794098408315, "grad_norm": 11.539891242980957, "learning_rate": 6.264990188240424e-06, "loss": 0.4628, "step": 1730 }, { "epoch": 0.06323133948688131, "grad_norm": 3.275383710861206, "learning_rate": 6.301330038520243e-06, "loss": 0.3368, "step": 1740 }, { "epoch": 0.06359473798967948, "grad_norm": 41.4942741394043, "learning_rate": 6.3376698888000586e-06, "loss": 0.53, "step": 1750 }, { "epoch": 0.06395813649247765, "grad_norm": 3.0071399211883545, "learning_rate": 6.374009739079875e-06, "loss": 0.3623, "step": 1760 }, { "epoch": 0.06432153499527582, "grad_norm": 3.385955333709717, "learning_rate": 6.410349589359692e-06, "loss": 0.3476, "step": 1770 }, { "epoch": 0.06468493349807398, "grad_norm": 3.872527599334717, "learning_rate": 6.446689439639508e-06, "loss": 0.3168, "step": 1780 }, { "epoch": 0.06504833200087215, "grad_norm": 4.668768882751465, "learning_rate": 6.483029289919327e-06, "loss": 0.3813, "step": 1790 }, { "epoch": 0.06541173050367033, "grad_norm": 69.33656311035156, "learning_rate": 6.519369140199143e-06, "loss": 0.4557, "step": 1800 }, { "epoch": 0.06541173050367033, "eval_loss": 0.5579342246055603, "eval_runtime": 180.31, "eval_samples_per_second": 41.118, "eval_steps_per_second": 5.141, "eval_wer": 0.34055221740156477, "step": 1800 }, { "epoch": 0.0657751290064685, "grad_norm": 4.2848381996154785, "learning_rate": 6.555708990478959e-06, "loss": 0.5481, "step": 1810 }, { "epoch": 0.06613852750926666, "grad_norm": 11.31700325012207, "learning_rate": 6.592048840758775e-06, "loss": 0.3479, "step": 1820 }, { "epoch": 0.06650192601206482, "grad_norm": 6.088991165161133, "learning_rate": 6.628388691038594e-06, "loss": 0.3994, "step": 1830 }, { "epoch": 0.066865324514863, "grad_norm": 4.342681407928467, "learning_rate": 6.66472854131841e-06, "loss": 0.2953, "step": 1840 }, { "epoch": 0.06722872301766117, "grad_norm": 75.97467041015625, "learning_rate": 6.701068391598227e-06, "loss": 0.4807, "step": 1850 }, { "epoch": 0.06759212152045933, "grad_norm": 3.8739049434661865, "learning_rate": 6.7374082418780435e-06, "loss": 0.3963, "step": 1860 }, { "epoch": 0.0679555200232575, "grad_norm": 2.6209168434143066, "learning_rate": 6.773748092157861e-06, "loss": 0.422, "step": 1870 }, { "epoch": 0.06831891852605568, "grad_norm": 17.530773162841797, "learning_rate": 6.810087942437678e-06, "loss": 0.3939, "step": 1880 }, { "epoch": 0.06868231702885384, "grad_norm": 3.475748300552368, "learning_rate": 6.846427792717494e-06, "loss": 0.2996, "step": 1890 }, { "epoch": 0.06904571553165201, "grad_norm": 20.979995727539062, "learning_rate": 6.882767642997311e-06, "loss": 0.4528, "step": 1900 }, { "epoch": 0.06940911403445017, "grad_norm": 3.8432774543762207, "learning_rate": 6.919107493277128e-06, "loss": 0.2959, "step": 1910 }, { "epoch": 0.06977251253724835, "grad_norm": 7.830467700958252, "learning_rate": 6.955447343556945e-06, "loss": 0.3378, "step": 1920 }, { "epoch": 0.07013591104004652, "grad_norm": 15.633039474487305, "learning_rate": 6.991787193836762e-06, "loss": 0.4, "step": 1930 }, { "epoch": 0.07049930954284468, "grad_norm": 13.628314971923828, "learning_rate": 7.028127044116578e-06, "loss": 0.3255, "step": 1940 }, { "epoch": 0.07086270804564285, "grad_norm": 33.001773834228516, "learning_rate": 7.064466894396396e-06, "loss": 0.4367, "step": 1950 }, { "epoch": 0.07122610654844103, "grad_norm": 3.5115041732788086, "learning_rate": 7.100806744676212e-06, "loss": 0.3279, "step": 1960 }, { "epoch": 0.07158950505123919, "grad_norm": 3.0497541427612305, "learning_rate": 7.137146594956029e-06, "loss": 2.8797, "step": 1970 }, { "epoch": 0.07195290355403736, "grad_norm": 6.17769718170166, "learning_rate": 7.173486445235845e-06, "loss": 0.3534, "step": 1980 }, { "epoch": 0.07231630205683552, "grad_norm": 5.4114789962768555, "learning_rate": 7.209826295515664e-06, "loss": 0.3309, "step": 1990 }, { "epoch": 0.0726797005596337, "grad_norm": 11.600439071655273, "learning_rate": 7.24616614579548e-06, "loss": 0.4382, "step": 2000 }, { "epoch": 0.07304309906243187, "grad_norm": 3.4476027488708496, "learning_rate": 7.2825059960752965e-06, "loss": 0.3487, "step": 2010 }, { "epoch": 0.07340649756523003, "grad_norm": 5.642564296722412, "learning_rate": 7.318845846355113e-06, "loss": 0.3513, "step": 2020 }, { "epoch": 0.0737698960680282, "grad_norm": 7.132052898406982, "learning_rate": 7.355185696634931e-06, "loss": 0.4564, "step": 2030 }, { "epoch": 0.07413329457082637, "grad_norm": 6.583246231079102, "learning_rate": 7.391525546914748e-06, "loss": 0.3376, "step": 2040 }, { "epoch": 0.07449669307362454, "grad_norm": 23.98805809020996, "learning_rate": 7.427865397194564e-06, "loss": 0.4217, "step": 2050 }, { "epoch": 0.0748600915764227, "grad_norm": 3.9135584831237793, "learning_rate": 7.4642052474743805e-06, "loss": 0.3324, "step": 2060 }, { "epoch": 0.07522349007922087, "grad_norm": 3.4022698402404785, "learning_rate": 7.500545097754198e-06, "loss": 0.3391, "step": 2070 }, { "epoch": 0.07558688858201905, "grad_norm": 8.37547779083252, "learning_rate": 7.536884948034015e-06, "loss": 0.3119, "step": 2080 }, { "epoch": 0.07595028708481721, "grad_norm": 6.2167558670043945, "learning_rate": 7.573224798313831e-06, "loss": 0.3247, "step": 2090 }, { "epoch": 0.07631368558761538, "grad_norm": 81.76036834716797, "learning_rate": 7.609564648593648e-06, "loss": 0.4281, "step": 2100 }, { "epoch": 0.07667708409041354, "grad_norm": 2.8961973190307617, "learning_rate": 7.645904498873465e-06, "loss": 0.3368, "step": 2110 }, { "epoch": 0.07704048259321171, "grad_norm": 4.699477195739746, "learning_rate": 7.682244349153282e-06, "loss": 0.3403, "step": 2120 }, { "epoch": 0.07740388109600989, "grad_norm": 4.429138660430908, "learning_rate": 7.718584199433098e-06, "loss": 0.3182, "step": 2130 }, { "epoch": 0.07776727959880805, "grad_norm": 2.7269580364227295, "learning_rate": 7.754924049712916e-06, "loss": 0.2828, "step": 2140 }, { "epoch": 0.07813067810160622, "grad_norm": 15.126232147216797, "learning_rate": 7.791263899992732e-06, "loss": 0.4606, "step": 2150 }, { "epoch": 0.07849407660440438, "grad_norm": 10.14072322845459, "learning_rate": 7.82760375027255e-06, "loss": 0.3451, "step": 2160 }, { "epoch": 0.07885747510720256, "grad_norm": 4.95914363861084, "learning_rate": 7.863943600552365e-06, "loss": 0.3612, "step": 2170 }, { "epoch": 0.07922087361000073, "grad_norm": 4.115192413330078, "learning_rate": 7.900283450832183e-06, "loss": 0.3222, "step": 2180 }, { "epoch": 0.07958427211279889, "grad_norm": 5.405594825744629, "learning_rate": 7.936623301111999e-06, "loss": 0.3474, "step": 2190 }, { "epoch": 0.07994767061559706, "grad_norm": 23.328718185424805, "learning_rate": 7.972963151391817e-06, "loss": 0.4797, "step": 2200 }, { "epoch": 0.08031106911839524, "grad_norm": 3.5595099925994873, "learning_rate": 8.009303001671634e-06, "loss": 0.3305, "step": 2210 }, { "epoch": 0.0806744676211934, "grad_norm": 3.048445463180542, "learning_rate": 8.04564285195145e-06, "loss": 0.318, "step": 2220 }, { "epoch": 0.08103786612399157, "grad_norm": 5.857702732086182, "learning_rate": 8.081982702231266e-06, "loss": 0.3497, "step": 2230 }, { "epoch": 0.08140126462678973, "grad_norm": 3.0092968940734863, "learning_rate": 8.118322552511084e-06, "loss": 0.2995, "step": 2240 }, { "epoch": 0.08176466312958791, "grad_norm": 9.337843894958496, "learning_rate": 8.154662402790902e-06, "loss": 0.4517, "step": 2250 }, { "epoch": 0.08212806163238608, "grad_norm": 3.136950969696045, "learning_rate": 8.191002253070718e-06, "loss": 0.2927, "step": 2260 }, { "epoch": 0.08249146013518424, "grad_norm": 4.228198051452637, "learning_rate": 8.227342103350534e-06, "loss": 1.2185, "step": 2270 }, { "epoch": 0.0828548586379824, "grad_norm": 7.404679298400879, "learning_rate": 8.263681953630351e-06, "loss": 0.3448, "step": 2280 }, { "epoch": 0.08321825714078059, "grad_norm": 7.873497009277344, "learning_rate": 8.300021803910169e-06, "loss": 0.2965, "step": 2290 }, { "epoch": 0.08358165564357875, "grad_norm": 12.266081809997559, "learning_rate": 8.336361654189985e-06, "loss": 0.4631, "step": 2300 }, { "epoch": 0.08394505414637692, "grad_norm": 3.3576557636260986, "learning_rate": 8.3727015044698e-06, "loss": 0.3339, "step": 2310 }, { "epoch": 0.08430845264917508, "grad_norm": 3.0854902267456055, "learning_rate": 8.40904135474962e-06, "loss": 0.3448, "step": 2320 }, { "epoch": 0.08467185115197326, "grad_norm": 6.1308746337890625, "learning_rate": 8.445381205029436e-06, "loss": 0.386, "step": 2330 }, { "epoch": 0.08503524965477142, "grad_norm": 4.458275318145752, "learning_rate": 8.481721055309252e-06, "loss": 0.2916, "step": 2340 }, { "epoch": 0.08539864815756959, "grad_norm": 25.443647384643555, "learning_rate": 8.51806090558907e-06, "loss": 0.4232, "step": 2350 }, { "epoch": 0.08576204666036776, "grad_norm": 324.4353332519531, "learning_rate": 8.554400755868887e-06, "loss": 2.4995, "step": 2360 }, { "epoch": 0.08612544516316593, "grad_norm": 17.593692779541016, "learning_rate": 8.590740606148703e-06, "loss": 0.2952, "step": 2370 }, { "epoch": 0.0864888436659641, "grad_norm": 3.4646732807159424, "learning_rate": 8.62708045642852e-06, "loss": 0.2961, "step": 2380 }, { "epoch": 0.08685224216876226, "grad_norm": 2.9895999431610107, "learning_rate": 8.663420306708337e-06, "loss": 0.2852, "step": 2390 }, { "epoch": 0.08721564067156043, "grad_norm": 24.221176147460938, "learning_rate": 8.699760156988155e-06, "loss": 0.4343, "step": 2400 }, { "epoch": 0.08721564067156043, "eval_loss": 0.47036415338516235, "eval_runtime": 180.1154, "eval_samples_per_second": 41.162, "eval_steps_per_second": 5.147, "eval_wer": 0.28054713453264835, "step": 2400 }, { "epoch": 0.08757903917435861, "grad_norm": 161.69967651367188, "learning_rate": 8.73610000726797e-06, "loss": 1.4598, "step": 2410 }, { "epoch": 0.08794243767715677, "grad_norm": 10.37559700012207, "learning_rate": 8.772439857547786e-06, "loss": 0.3042, "step": 2420 }, { "epoch": 0.08830583617995494, "grad_norm": 5.90106725692749, "learning_rate": 8.808779707827604e-06, "loss": 0.3385, "step": 2430 }, { "epoch": 0.0886692346827531, "grad_norm": 9.207955360412598, "learning_rate": 8.845119558107422e-06, "loss": 0.2963, "step": 2440 }, { "epoch": 0.08903263318555127, "grad_norm": 22.280956268310547, "learning_rate": 8.881459408387238e-06, "loss": 0.4505, "step": 2450 }, { "epoch": 0.08939603168834945, "grad_norm": 3.090710401535034, "learning_rate": 8.917799258667055e-06, "loss": 0.3114, "step": 2460 }, { "epoch": 0.08975943019114761, "grad_norm": 4.144134044647217, "learning_rate": 8.954139108946871e-06, "loss": 0.2855, "step": 2470 }, { "epoch": 0.09012282869394578, "grad_norm": 4.343112468719482, "learning_rate": 8.990478959226687e-06, "loss": 0.2906, "step": 2480 }, { "epoch": 0.09048622719674394, "grad_norm": 2.6925292015075684, "learning_rate": 9.026818809506505e-06, "loss": 0.284, "step": 2490 }, { "epoch": 0.09084962569954212, "grad_norm": 29.639341354370117, "learning_rate": 9.063158659786323e-06, "loss": 0.3411, "step": 2500 }, { "epoch": 0.09121302420234029, "grad_norm": 4.425374984741211, "learning_rate": 9.099498510066139e-06, "loss": 0.3041, "step": 2510 }, { "epoch": 0.09157642270513845, "grad_norm": 5.6643195152282715, "learning_rate": 9.135838360345955e-06, "loss": 0.3123, "step": 2520 }, { "epoch": 0.09193982120793662, "grad_norm": 3.9098479747772217, "learning_rate": 9.172178210625772e-06, "loss": 0.3664, "step": 2530 }, { "epoch": 0.0923032197107348, "grad_norm": 3.133389949798584, "learning_rate": 9.20851806090559e-06, "loss": 0.2708, "step": 2540 }, { "epoch": 0.09266661821353296, "grad_norm": 43.00468063354492, "learning_rate": 9.244857911185406e-06, "loss": 0.4215, "step": 2550 }, { "epoch": 0.09303001671633113, "grad_norm": 3.1411876678466797, "learning_rate": 9.281197761465222e-06, "loss": 0.2983, "step": 2560 }, { "epoch": 0.09339341521912929, "grad_norm": 3.263828754425049, "learning_rate": 9.317537611745041e-06, "loss": 0.3137, "step": 2570 }, { "epoch": 0.09375681372192747, "grad_norm": 3.618751049041748, "learning_rate": 9.353877462024857e-06, "loss": 0.3279, "step": 2580 }, { "epoch": 0.09412021222472564, "grad_norm": 3.6551568508148193, "learning_rate": 9.390217312304673e-06, "loss": 0.2409, "step": 2590 }, { "epoch": 0.0944836107275238, "grad_norm": 8.680901527404785, "learning_rate": 9.42655716258449e-06, "loss": 0.373, "step": 2600 }, { "epoch": 0.09484700923032197, "grad_norm": 4.761026382446289, "learning_rate": 9.462897012864308e-06, "loss": 0.2777, "step": 2610 }, { "epoch": 0.09521040773312014, "grad_norm": 3.142723321914673, "learning_rate": 9.499236863144124e-06, "loss": 0.2882, "step": 2620 }, { "epoch": 0.09557380623591831, "grad_norm": 2.969968795776367, "learning_rate": 9.53557671342394e-06, "loss": 0.3086, "step": 2630 }, { "epoch": 0.09593720473871648, "grad_norm": 3.754549264907837, "learning_rate": 9.571916563703758e-06, "loss": 0.259, "step": 2640 }, { "epoch": 0.09630060324151464, "grad_norm": 23.7288761138916, "learning_rate": 9.608256413983576e-06, "loss": 0.4284, "step": 2650 }, { "epoch": 0.09666400174431282, "grad_norm": 2.7727372646331787, "learning_rate": 9.644596264263392e-06, "loss": 0.2602, "step": 2660 }, { "epoch": 0.09702740024711098, "grad_norm": 14.707064628601074, "learning_rate": 9.680936114543208e-06, "loss": 0.3059, "step": 2670 }, { "epoch": 0.09739079874990915, "grad_norm": 3.8396642208099365, "learning_rate": 9.717275964823025e-06, "loss": 0.2811, "step": 2680 }, { "epoch": 0.09775419725270731, "grad_norm": 2.9460713863372803, "learning_rate": 9.753615815102843e-06, "loss": 0.2686, "step": 2690 }, { "epoch": 0.0981175957555055, "grad_norm": 20.107336044311523, "learning_rate": 9.789955665382659e-06, "loss": 0.4306, "step": 2700 }, { "epoch": 0.09848099425830366, "grad_norm": 3.1286280155181885, "learning_rate": 9.826295515662477e-06, "loss": 0.3059, "step": 2710 }, { "epoch": 0.09884439276110182, "grad_norm": 6.160215854644775, "learning_rate": 9.862635365942292e-06, "loss": 0.3046, "step": 2720 }, { "epoch": 0.09920779126389999, "grad_norm": 6.1921186447143555, "learning_rate": 9.89897521622211e-06, "loss": 0.285, "step": 2730 }, { "epoch": 0.09957118976669817, "grad_norm": 13.759759902954102, "learning_rate": 9.935315066501926e-06, "loss": 0.2888, "step": 2740 }, { "epoch": 0.09993458826949633, "grad_norm": 13.92764949798584, "learning_rate": 9.971654916781744e-06, "loss": 0.4266, "step": 2750 }, { "epoch": 0.1002979867722945, "grad_norm": 3.3999857902526855, "learning_rate": 1.000799476706156e-05, "loss": 0.2858, "step": 2760 }, { "epoch": 0.10066138527509266, "grad_norm": 4.103928089141846, "learning_rate": 1.0044334617341377e-05, "loss": 0.262, "step": 2770 }, { "epoch": 0.10102478377789084, "grad_norm": 6.15985107421875, "learning_rate": 1.0080674467621195e-05, "loss": 0.2866, "step": 2780 }, { "epoch": 0.10138818228068901, "grad_norm": 4.904097557067871, "learning_rate": 1.0117014317901011e-05, "loss": 0.5057, "step": 2790 }, { "epoch": 0.10175158078348717, "grad_norm": 15.2875337600708, "learning_rate": 1.0153354168180827e-05, "loss": 0.4345, "step": 2800 }, { "epoch": 0.10211497928628534, "grad_norm": 2.4697763919830322, "learning_rate": 1.0189694018460643e-05, "loss": 0.2693, "step": 2810 }, { "epoch": 0.1024783777890835, "grad_norm": 5.04618501663208, "learning_rate": 1.0226033868740462e-05, "loss": 0.2868, "step": 2820 }, { "epoch": 0.10284177629188168, "grad_norm": 5.851120948791504, "learning_rate": 1.0262373719020278e-05, "loss": 0.3425, "step": 2830 }, { "epoch": 0.10320517479467985, "grad_norm": 2.1007258892059326, "learning_rate": 1.0298713569300094e-05, "loss": 0.2394, "step": 2840 }, { "epoch": 0.10356857329747801, "grad_norm": 23.411701202392578, "learning_rate": 1.0335053419579912e-05, "loss": 0.4125, "step": 2850 }, { "epoch": 0.10393197180027618, "grad_norm": 4.178852558135986, "learning_rate": 1.037139326985973e-05, "loss": 0.2951, "step": 2860 }, { "epoch": 0.10429537030307436, "grad_norm": 1.7873708009719849, "learning_rate": 1.0407733120139545e-05, "loss": 0.3272, "step": 2870 }, { "epoch": 0.10465876880587252, "grad_norm": 7.603367328643799, "learning_rate": 1.0444072970419361e-05, "loss": 0.2779, "step": 2880 }, { "epoch": 0.10502216730867069, "grad_norm": 3.468761444091797, "learning_rate": 1.0480412820699179e-05, "loss": 0.3007, "step": 2890 }, { "epoch": 0.10538556581146885, "grad_norm": 16.35407829284668, "learning_rate": 1.0516752670978997e-05, "loss": 0.3918, "step": 2900 }, { "epoch": 0.10574896431426703, "grad_norm": 3.4226725101470947, "learning_rate": 1.0553092521258813e-05, "loss": 3.7156, "step": 2910 }, { "epoch": 0.1061123628170652, "grad_norm": 9.006295204162598, "learning_rate": 1.058943237153863e-05, "loss": 0.4075, "step": 2920 }, { "epoch": 0.10647576131986336, "grad_norm": 4.993385314941406, "learning_rate": 1.0625772221818446e-05, "loss": 0.3588, "step": 2930 }, { "epoch": 0.10683915982266153, "grad_norm": 3.7684736251831055, "learning_rate": 1.0662112072098264e-05, "loss": 0.2429, "step": 2940 }, { "epoch": 0.1072025583254597, "grad_norm": 40.301170349121094, "learning_rate": 1.069845192237808e-05, "loss": 0.4739, "step": 2950 }, { "epoch": 0.10756595682825787, "grad_norm": 3.772693157196045, "learning_rate": 1.0734791772657898e-05, "loss": 0.3284, "step": 2960 }, { "epoch": 0.10792935533105603, "grad_norm": 3.0183212757110596, "learning_rate": 1.0771131622937714e-05, "loss": 0.38, "step": 2970 }, { "epoch": 0.1082927538338542, "grad_norm": 6.61776876449585, "learning_rate": 1.0807471473217531e-05, "loss": 0.2793, "step": 2980 }, { "epoch": 0.10865615233665238, "grad_norm": 6.112472057342529, "learning_rate": 1.0843811323497347e-05, "loss": 0.2447, "step": 2990 }, { "epoch": 0.10901955083945054, "grad_norm": 10.800559997558594, "learning_rate": 1.0880151173777165e-05, "loss": 0.373, "step": 3000 }, { "epoch": 0.10901955083945054, "eval_loss": 0.4652940630912781, "eval_runtime": 180.0765, "eval_samples_per_second": 41.171, "eval_steps_per_second": 5.148, "eval_wer": 0.27681667181004593, "step": 3000 }, { "epoch": 0.10938294934224871, "grad_norm": 7.778831958770752, "learning_rate": 1.091649102405698e-05, "loss": 0.29, "step": 3010 }, { "epoch": 0.10974634784504687, "grad_norm": 2.855592966079712, "learning_rate": 1.0952830874336798e-05, "loss": 0.2411, "step": 3020 }, { "epoch": 0.11010974634784505, "grad_norm": 4.229335784912109, "learning_rate": 1.0989170724616616e-05, "loss": 0.3247, "step": 3030 }, { "epoch": 0.11047314485064322, "grad_norm": 3.8145949840545654, "learning_rate": 1.1025510574896432e-05, "loss": 0.2242, "step": 3040 }, { "epoch": 0.11083654335344138, "grad_norm": 22.571304321289062, "learning_rate": 1.1061850425176248e-05, "loss": 0.3959, "step": 3050 }, { "epoch": 0.11119994185623955, "grad_norm": 2.4706461429595947, "learning_rate": 1.1098190275456066e-05, "loss": 0.2466, "step": 3060 }, { "epoch": 0.11156334035903773, "grad_norm": 4.497069358825684, "learning_rate": 1.1134530125735883e-05, "loss": 2.1968, "step": 3070 }, { "epoch": 0.11192673886183589, "grad_norm": 5.060062885284424, "learning_rate": 1.11708699760157e-05, "loss": 0.2921, "step": 3080 }, { "epoch": 0.11229013736463406, "grad_norm": 2.7882325649261475, "learning_rate": 1.1207209826295515e-05, "loss": 0.2534, "step": 3090 }, { "epoch": 0.11265353586743222, "grad_norm": 9.96241569519043, "learning_rate": 1.1243549676575333e-05, "loss": 0.421, "step": 3100 }, { "epoch": 0.1130169343702304, "grad_norm": 31.262916564941406, "learning_rate": 1.127988952685515e-05, "loss": 0.4048, "step": 3110 }, { "epoch": 0.11338033287302857, "grad_norm": 3.472343921661377, "learning_rate": 1.1316229377134967e-05, "loss": 0.2798, "step": 3120 }, { "epoch": 0.11374373137582673, "grad_norm": 4.074085235595703, "learning_rate": 1.1352569227414783e-05, "loss": 0.299, "step": 3130 }, { "epoch": 0.1141071298786249, "grad_norm": 2.879512310028076, "learning_rate": 1.1388909077694602e-05, "loss": 0.2137, "step": 3140 }, { "epoch": 0.11447052838142306, "grad_norm": 125.17889404296875, "learning_rate": 1.1425248927974418e-05, "loss": 0.5418, "step": 3150 }, { "epoch": 0.11483392688422124, "grad_norm": 4.171487808227539, "learning_rate": 1.1461588778254234e-05, "loss": 0.2685, "step": 3160 }, { "epoch": 0.1151973253870194, "grad_norm": 2.1496529579162598, "learning_rate": 1.1497928628534051e-05, "loss": 0.2421, "step": 3170 }, { "epoch": 0.11556072388981757, "grad_norm": 2.6266047954559326, "learning_rate": 1.1534268478813867e-05, "loss": 0.3288, "step": 3180 }, { "epoch": 0.11592412239261574, "grad_norm": 3.7677230834960938, "learning_rate": 1.1570608329093685e-05, "loss": 0.3093, "step": 3190 }, { "epoch": 0.11628752089541392, "grad_norm": 9.4945707321167, "learning_rate": 1.1606948179373501e-05, "loss": 0.3066, "step": 3200 }, { "epoch": 0.11665091939821208, "grad_norm": 2.5509915351867676, "learning_rate": 1.1643288029653319e-05, "loss": 0.2615, "step": 3210 }, { "epoch": 0.11701431790101025, "grad_norm": 3.066624641418457, "learning_rate": 1.1679627879933135e-05, "loss": 0.3224, "step": 3220 }, { "epoch": 0.11737771640380841, "grad_norm": 6.494440078735352, "learning_rate": 1.1715967730212952e-05, "loss": 0.3017, "step": 3230 }, { "epoch": 0.11774111490660659, "grad_norm": 3.4675605297088623, "learning_rate": 1.1752307580492768e-05, "loss": 0.2152, "step": 3240 }, { "epoch": 0.11810451340940475, "grad_norm": 15.5110445022583, "learning_rate": 1.1788647430772586e-05, "loss": 0.349, "step": 3250 }, { "epoch": 0.11846791191220292, "grad_norm": 1.972530484199524, "learning_rate": 1.1824987281052402e-05, "loss": 0.2728, "step": 3260 }, { "epoch": 0.11883131041500108, "grad_norm": 4.018677711486816, "learning_rate": 1.186132713133222e-05, "loss": 0.254, "step": 3270 }, { "epoch": 0.11919470891779926, "grad_norm": 4.95416784286499, "learning_rate": 1.1897666981612037e-05, "loss": 0.2465, "step": 3280 }, { "epoch": 0.11955810742059743, "grad_norm": 3.165599822998047, "learning_rate": 1.1934006831891853e-05, "loss": 0.2537, "step": 3290 }, { "epoch": 0.1199215059233956, "grad_norm": 8.508636474609375, "learning_rate": 1.1970346682171669e-05, "loss": 0.3655, "step": 3300 }, { "epoch": 0.12028490442619376, "grad_norm": 2.3892879486083984, "learning_rate": 1.2006686532451487e-05, "loss": 0.252, "step": 3310 }, { "epoch": 0.12064830292899194, "grad_norm": 3.591564178466797, "learning_rate": 1.2043026382731304e-05, "loss": 0.2401, "step": 3320 }, { "epoch": 0.1210117014317901, "grad_norm": 3.891261577606201, "learning_rate": 1.207936623301112e-05, "loss": 0.2909, "step": 3330 }, { "epoch": 0.12137509993458827, "grad_norm": 4.691511154174805, "learning_rate": 1.2115706083290936e-05, "loss": 0.2304, "step": 3340 }, { "epoch": 0.12173849843738643, "grad_norm": 18.415170669555664, "learning_rate": 1.2152045933570754e-05, "loss": 0.35, "step": 3350 }, { "epoch": 0.12210189694018461, "grad_norm": 3.9105615615844727, "learning_rate": 1.2188385783850572e-05, "loss": 0.3112, "step": 3360 }, { "epoch": 0.12246529544298278, "grad_norm": 3.215313196182251, "learning_rate": 1.2224725634130388e-05, "loss": 0.2492, "step": 3370 }, { "epoch": 0.12282869394578094, "grad_norm": 9.30749225616455, "learning_rate": 1.2261065484410204e-05, "loss": 0.2696, "step": 3380 }, { "epoch": 0.12319209244857911, "grad_norm": 4.9797682762146, "learning_rate": 1.2297405334690023e-05, "loss": 0.2197, "step": 3390 }, { "epoch": 0.12355549095137729, "grad_norm": 19.632797241210938, "learning_rate": 1.2333745184969839e-05, "loss": 0.3411, "step": 3400 }, { "epoch": 0.12391888945417545, "grad_norm": 4.509830474853516, "learning_rate": 1.2370085035249655e-05, "loss": 0.2394, "step": 3410 }, { "epoch": 0.12428228795697362, "grad_norm": 2.253514051437378, "learning_rate": 1.2406424885529473e-05, "loss": 0.266, "step": 3420 }, { "epoch": 0.12464568645977178, "grad_norm": 3.123828172683716, "learning_rate": 1.244276473580929e-05, "loss": 0.2675, "step": 3430 }, { "epoch": 0.12500908496256996, "grad_norm": 34.37680435180664, "learning_rate": 1.2479104586089106e-05, "loss": 0.279, "step": 3440 }, { "epoch": 0.1253724834653681, "grad_norm": 10.051690101623535, "learning_rate": 1.2515444436368922e-05, "loss": 0.3364, "step": 3450 }, { "epoch": 0.1257358819681663, "grad_norm": 2.1765711307525635, "learning_rate": 1.255178428664874e-05, "loss": 0.2288, "step": 3460 }, { "epoch": 0.12609928047096447, "grad_norm": 2.4910778999328613, "learning_rate": 1.2588124136928556e-05, "loss": 0.2866, "step": 3470 }, { "epoch": 0.12646267897376262, "grad_norm": 7.379613876342773, "learning_rate": 1.2624463987208373e-05, "loss": 0.2618, "step": 3480 }, { "epoch": 0.1268260774765608, "grad_norm": 2.681814432144165, "learning_rate": 1.266080383748819e-05, "loss": 0.2405, "step": 3490 }, { "epoch": 0.12718947597935895, "grad_norm": 83.93474578857422, "learning_rate": 1.2697143687768007e-05, "loss": 0.338, "step": 3500 }, { "epoch": 0.12755287448215713, "grad_norm": 1.5564826726913452, "learning_rate": 1.2733483538047825e-05, "loss": 0.2305, "step": 3510 }, { "epoch": 0.1279162729849553, "grad_norm": 2.6026437282562256, "learning_rate": 1.2769823388327639e-05, "loss": 0.2618, "step": 3520 }, { "epoch": 0.12827967148775346, "grad_norm": 8.228372573852539, "learning_rate": 1.2806163238607458e-05, "loss": 0.2586, "step": 3530 }, { "epoch": 0.12864306999055164, "grad_norm": 2.643139362335205, "learning_rate": 1.2842503088887276e-05, "loss": 0.2197, "step": 3540 }, { "epoch": 0.12900646849334982, "grad_norm": 400.0296325683594, "learning_rate": 1.287884293916709e-05, "loss": 0.3586, "step": 3550 }, { "epoch": 0.12936986699614797, "grad_norm": 1.6349281072616577, "learning_rate": 1.2915182789446908e-05, "loss": 0.2364, "step": 3560 }, { "epoch": 0.12973326549894615, "grad_norm": 2.6573753356933594, "learning_rate": 1.2951522639726724e-05, "loss": 0.2195, "step": 3570 }, { "epoch": 0.1300966640017443, "grad_norm": 4.2721686363220215, "learning_rate": 1.2987862490006542e-05, "loss": 0.3092, "step": 3580 }, { "epoch": 0.13046006250454248, "grad_norm": 2.9982502460479736, "learning_rate": 1.302420234028636e-05, "loss": 0.2826, "step": 3590 }, { "epoch": 0.13082346100734066, "grad_norm": 8.903009414672852, "learning_rate": 1.3060542190566175e-05, "loss": 0.3367, "step": 3600 }, { "epoch": 0.13082346100734066, "eval_loss": 0.4490436017513275, "eval_runtime": 179.8743, "eval_samples_per_second": 41.218, "eval_steps_per_second": 5.154, "eval_wer": 0.2664058670829778, "step": 3600 }, { "epoch": 0.1311868595101388, "grad_norm": 2.9746363162994385, "learning_rate": 1.3096882040845993e-05, "loss": 0.2418, "step": 3610 }, { "epoch": 0.131550258012937, "grad_norm": 2.274872303009033, "learning_rate": 1.313322189112581e-05, "loss": 0.3052, "step": 3620 }, { "epoch": 0.13191365651573517, "grad_norm": 7.114847660064697, "learning_rate": 1.3169561741405625e-05, "loss": 0.2821, "step": 3630 }, { "epoch": 0.13227705501853332, "grad_norm": 3.2101128101348877, "learning_rate": 1.3205901591685444e-05, "loss": 0.2223, "step": 3640 }, { "epoch": 0.1326404535213315, "grad_norm": 18.914968490600586, "learning_rate": 1.3242241441965258e-05, "loss": 0.3809, "step": 3650 }, { "epoch": 0.13300385202412965, "grad_norm": 2.399569272994995, "learning_rate": 1.3278581292245076e-05, "loss": 0.2221, "step": 3660 }, { "epoch": 0.13336725052692783, "grad_norm": 5.76792573928833, "learning_rate": 1.3314921142524894e-05, "loss": 0.2487, "step": 3670 }, { "epoch": 0.133730649029726, "grad_norm": 3.6859967708587646, "learning_rate": 1.335126099280471e-05, "loss": 0.2781, "step": 3680 }, { "epoch": 0.13409404753252416, "grad_norm": 2.9653141498565674, "learning_rate": 1.3387600843084527e-05, "loss": 0.2258, "step": 3690 }, { "epoch": 0.13445744603532234, "grad_norm": 19.170753479003906, "learning_rate": 1.3423940693364345e-05, "loss": 0.3902, "step": 3700 }, { "epoch": 0.13482084453812052, "grad_norm": 2.2880115509033203, "learning_rate": 1.3460280543644161e-05, "loss": 0.2745, "step": 3710 }, { "epoch": 0.13518424304091867, "grad_norm": 2.5196125507354736, "learning_rate": 1.3496620393923979e-05, "loss": 0.2293, "step": 3720 }, { "epoch": 0.13554764154371685, "grad_norm": 3.827986001968384, "learning_rate": 1.3532960244203793e-05, "loss": 0.259, "step": 3730 }, { "epoch": 0.135911040046515, "grad_norm": 3.4211530685424805, "learning_rate": 1.356930009448361e-05, "loss": 0.3256, "step": 3740 }, { "epoch": 0.13627443854931318, "grad_norm": 26.879398345947266, "learning_rate": 1.360563994476343e-05, "loss": 0.3208, "step": 3750 }, { "epoch": 0.13663783705211135, "grad_norm": 2.316091775894165, "learning_rate": 1.3641979795043244e-05, "loss": 0.2316, "step": 3760 }, { "epoch": 0.1370012355549095, "grad_norm": 4.098924160003662, "learning_rate": 1.3678319645323062e-05, "loss": 0.2399, "step": 3770 }, { "epoch": 0.13736463405770769, "grad_norm": 6.9372687339782715, "learning_rate": 1.371465949560288e-05, "loss": 0.2858, "step": 3780 }, { "epoch": 0.13772803256050586, "grad_norm": 2.509535789489746, "learning_rate": 1.3750999345882695e-05, "loss": 0.2113, "step": 3790 }, { "epoch": 0.13809143106330402, "grad_norm": 7.7181077003479, "learning_rate": 1.3787339196162513e-05, "loss": 0.3279, "step": 3800 }, { "epoch": 0.1384548295661022, "grad_norm": 2.6843245029449463, "learning_rate": 1.3823679046442329e-05, "loss": 0.2267, "step": 3810 }, { "epoch": 0.13881822806890035, "grad_norm": 3.05159068107605, "learning_rate": 1.3860018896722147e-05, "loss": 0.229, "step": 3820 }, { "epoch": 0.13918162657169852, "grad_norm": 5.029635429382324, "learning_rate": 1.3896358747001964e-05, "loss": 0.2627, "step": 3830 }, { "epoch": 0.1395450250744967, "grad_norm": 2.8287103176116943, "learning_rate": 1.3932698597281779e-05, "loss": 0.2294, "step": 3840 }, { "epoch": 0.13990842357729485, "grad_norm": 24.862224578857422, "learning_rate": 1.3969038447561598e-05, "loss": 0.3198, "step": 3850 }, { "epoch": 0.14027182208009303, "grad_norm": 5.624647617340088, "learning_rate": 1.4005378297841412e-05, "loss": 0.2641, "step": 3860 }, { "epoch": 0.14063522058289118, "grad_norm": 1.6199389696121216, "learning_rate": 1.404171814812123e-05, "loss": 0.2279, "step": 3870 }, { "epoch": 0.14099861908568936, "grad_norm": 2.864058017730713, "learning_rate": 1.4078057998401047e-05, "loss": 0.2448, "step": 3880 }, { "epoch": 0.14136201758848754, "grad_norm": 3.897899627685547, "learning_rate": 1.4114397848680863e-05, "loss": 0.2438, "step": 3890 }, { "epoch": 0.1417254160912857, "grad_norm": 42.4840087890625, "learning_rate": 1.4150737698960681e-05, "loss": 0.3604, "step": 3900 }, { "epoch": 0.14208881459408387, "grad_norm": 1.6532913446426392, "learning_rate": 1.4187077549240499e-05, "loss": 0.2469, "step": 3910 }, { "epoch": 0.14245221309688205, "grad_norm": 2.3755931854248047, "learning_rate": 1.4223417399520315e-05, "loss": 0.2233, "step": 3920 }, { "epoch": 0.1428156115996802, "grad_norm": 5.866461277008057, "learning_rate": 1.4259757249800132e-05, "loss": 0.2952, "step": 3930 }, { "epoch": 0.14317901010247838, "grad_norm": 3.171570301055908, "learning_rate": 1.4296097100079947e-05, "loss": 0.2329, "step": 3940 }, { "epoch": 0.14354240860527653, "grad_norm": 23.302635192871094, "learning_rate": 1.4332436950359764e-05, "loss": 0.3592, "step": 3950 }, { "epoch": 0.1439058071080747, "grad_norm": 2.3609213829040527, "learning_rate": 1.4368776800639584e-05, "loss": 1.1975, "step": 3960 }, { "epoch": 0.1442692056108729, "grad_norm": 2.857872486114502, "learning_rate": 1.4405116650919398e-05, "loss": 0.265, "step": 3970 }, { "epoch": 0.14463260411367104, "grad_norm": 6.918335914611816, "learning_rate": 1.4441456501199216e-05, "loss": 0.4057, "step": 3980 }, { "epoch": 0.14499600261646922, "grad_norm": 3.8019461631774902, "learning_rate": 1.4477796351479033e-05, "loss": 0.299, "step": 3990 }, { "epoch": 0.1453594011192674, "grad_norm": 30.963428497314453, "learning_rate": 1.451413620175885e-05, "loss": 0.335, "step": 4000 }, { "epoch": 0.14572279962206555, "grad_norm": 2.3968963623046875, "learning_rate": 1.4550476052038667e-05, "loss": 0.224, "step": 4010 }, { "epoch": 0.14608619812486373, "grad_norm": 6.7229485511779785, "learning_rate": 1.4586815902318481e-05, "loss": 0.2657, "step": 4020 }, { "epoch": 0.14644959662766188, "grad_norm": 17.447879791259766, "learning_rate": 1.46231557525983e-05, "loss": 0.2199, "step": 4030 }, { "epoch": 0.14681299513046006, "grad_norm": 2.020756721496582, "learning_rate": 1.4659495602878118e-05, "loss": 0.31, "step": 4040 }, { "epoch": 0.14717639363325824, "grad_norm": 38.28268814086914, "learning_rate": 1.4695835453157932e-05, "loss": 0.3861, "step": 4050 }, { "epoch": 0.1475397921360564, "grad_norm": 2.085073232650757, "learning_rate": 1.473217530343775e-05, "loss": 0.2078, "step": 4060 }, { "epoch": 0.14790319063885457, "grad_norm": 3.453597068786621, "learning_rate": 1.476851515371757e-05, "loss": 0.296, "step": 4070 }, { "epoch": 0.14826658914165275, "grad_norm": 2.3039424419403076, "learning_rate": 1.4804855003997384e-05, "loss": 0.2346, "step": 4080 }, { "epoch": 0.1486299876444509, "grad_norm": 3.217890977859497, "learning_rate": 1.4841194854277201e-05, "loss": 0.2243, "step": 4090 }, { "epoch": 0.14899338614724908, "grad_norm": 12.48748779296875, "learning_rate": 1.4877534704557017e-05, "loss": 0.3378, "step": 4100 }, { "epoch": 0.14935678465004723, "grad_norm": 2.781388282775879, "learning_rate": 1.4913874554836835e-05, "loss": 0.2167, "step": 4110 }, { "epoch": 0.1497201831528454, "grad_norm": 2.564457893371582, "learning_rate": 1.4950214405116653e-05, "loss": 0.2187, "step": 4120 }, { "epoch": 0.1500835816556436, "grad_norm": 9.590895652770996, "learning_rate": 1.4986554255396469e-05, "loss": 0.2444, "step": 4130 }, { "epoch": 0.15044698015844174, "grad_norm": 2.8055028915405273, "learning_rate": 1.5022894105676286e-05, "loss": 0.2499, "step": 4140 }, { "epoch": 0.15081037866123992, "grad_norm": 7.157045364379883, "learning_rate": 1.5059233955956104e-05, "loss": 0.361, "step": 4150 }, { "epoch": 0.1511737771640381, "grad_norm": 3.369006633758545, "learning_rate": 1.5095573806235918e-05, "loss": 0.259, "step": 4160 }, { "epoch": 0.15153717566683625, "grad_norm": 5.334355354309082, "learning_rate": 1.5131913656515736e-05, "loss": 0.2797, "step": 4170 }, { "epoch": 0.15190057416963443, "grad_norm": 6.667120456695557, "learning_rate": 1.5168253506795552e-05, "loss": 0.2678, "step": 4180 }, { "epoch": 0.15226397267243258, "grad_norm": 1.7419887781143188, "learning_rate": 1.520459335707537e-05, "loss": 0.2432, "step": 4190 }, { "epoch": 0.15262737117523076, "grad_norm": 7.022573947906494, "learning_rate": 1.5240933207355187e-05, "loss": 0.2955, "step": 4200 }, { "epoch": 0.15262737117523076, "eval_loss": 0.4361402690410614, "eval_runtime": 180.5933, "eval_samples_per_second": 41.054, "eval_steps_per_second": 5.133, "eval_wer": 0.2589540181894095, "step": 4200 }, { "epoch": 0.15299076967802894, "grad_norm": 3.108078718185425, "learning_rate": 1.5277273057635e-05, "loss": 3.6147, "step": 4210 }, { "epoch": 0.1533541681808271, "grad_norm": 2.6063787937164307, "learning_rate": 1.531361290791482e-05, "loss": 0.232, "step": 4220 }, { "epoch": 0.15371756668362527, "grad_norm": 3.581697463989258, "learning_rate": 1.5349952758194637e-05, "loss": 0.2451, "step": 4230 }, { "epoch": 0.15408096518642342, "grad_norm": 2.5910837650299072, "learning_rate": 1.5386292608474453e-05, "loss": 0.2283, "step": 4240 }, { "epoch": 0.1544443636892216, "grad_norm": 70.38739013671875, "learning_rate": 1.5422632458754272e-05, "loss": 0.396, "step": 4250 }, { "epoch": 0.15480776219201978, "grad_norm": 3.5658187866210938, "learning_rate": 1.5458972309034088e-05, "loss": 0.2116, "step": 4260 }, { "epoch": 0.15517116069481793, "grad_norm": 5.393126487731934, "learning_rate": 1.5495312159313904e-05, "loss": 0.2382, "step": 4270 }, { "epoch": 0.1555345591976161, "grad_norm": 10.135586738586426, "learning_rate": 1.5531652009593723e-05, "loss": 0.2485, "step": 4280 }, { "epoch": 0.15589795770041429, "grad_norm": 2.1143031120300293, "learning_rate": 1.5567991859873536e-05, "loss": 0.1936, "step": 4290 }, { "epoch": 0.15626135620321244, "grad_norm": 20.077383041381836, "learning_rate": 1.5604331710153355e-05, "loss": 0.3818, "step": 4300 }, { "epoch": 0.15662475470601062, "grad_norm": 3.793126344680786, "learning_rate": 1.564067156043317e-05, "loss": 0.2245, "step": 4310 }, { "epoch": 0.15698815320880877, "grad_norm": 3.2057955265045166, "learning_rate": 1.5677011410712987e-05, "loss": 0.2551, "step": 4320 }, { "epoch": 0.15735155171160695, "grad_norm": 5.002716064453125, "learning_rate": 1.5713351260992806e-05, "loss": 0.2951, "step": 4330 }, { "epoch": 0.15771495021440513, "grad_norm": 2.2240726947784424, "learning_rate": 1.5749691111272622e-05, "loss": 0.1993, "step": 4340 }, { "epoch": 0.15807834871720328, "grad_norm": 55.30891036987305, "learning_rate": 1.578603096155244e-05, "loss": 0.2803, "step": 4350 }, { "epoch": 0.15844174722000146, "grad_norm": 1.9186596870422363, "learning_rate": 1.5822370811832258e-05, "loss": 0.2234, "step": 4360 }, { "epoch": 0.15880514572279963, "grad_norm": 1.7817661762237549, "learning_rate": 1.5858710662112074e-05, "loss": 0.2038, "step": 4370 }, { "epoch": 0.15916854422559779, "grad_norm": 3.046330690383911, "learning_rate": 1.589505051239189e-05, "loss": 0.2809, "step": 4380 }, { "epoch": 0.15953194272839596, "grad_norm": 5.43302583694458, "learning_rate": 1.5931390362671706e-05, "loss": 0.1896, "step": 4390 }, { "epoch": 0.15989534123119412, "grad_norm": 12.185855865478516, "learning_rate": 1.596773021295152e-05, "loss": 0.2984, "step": 4400 }, { "epoch": 0.1602587397339923, "grad_norm": 1.9507842063903809, "learning_rate": 1.600407006323134e-05, "loss": 0.2064, "step": 4410 }, { "epoch": 0.16062213823679047, "grad_norm": 4.536543846130371, "learning_rate": 1.6040409913511157e-05, "loss": 0.2433, "step": 4420 }, { "epoch": 0.16098553673958862, "grad_norm": 3.101174831390381, "learning_rate": 1.6076749763790973e-05, "loss": 0.2746, "step": 4430 }, { "epoch": 0.1613489352423868, "grad_norm": 2.2098021507263184, "learning_rate": 1.6113089614070792e-05, "loss": 0.17, "step": 4440 }, { "epoch": 0.16171233374518498, "grad_norm": 59.360809326171875, "learning_rate": 1.6149429464350608e-05, "loss": 0.379, "step": 4450 }, { "epoch": 0.16207573224798313, "grad_norm": 6.364736557006836, "learning_rate": 1.6185769314630424e-05, "loss": 0.2224, "step": 4460 }, { "epoch": 0.1624391307507813, "grad_norm": 3.2455356121063232, "learning_rate": 1.622210916491024e-05, "loss": 0.2195, "step": 4470 }, { "epoch": 0.16280252925357946, "grad_norm": 6.399629592895508, "learning_rate": 1.625844901519006e-05, "loss": 0.266, "step": 4480 }, { "epoch": 0.16316592775637764, "grad_norm": 16.19785499572754, "learning_rate": 1.6294788865469875e-05, "loss": 0.1836, "step": 4490 }, { "epoch": 0.16352932625917582, "grad_norm": 7.909778594970703, "learning_rate": 1.633112871574969e-05, "loss": 0.6016, "step": 4500 }, { "epoch": 0.16389272476197397, "grad_norm": 2.8134663105010986, "learning_rate": 1.636746856602951e-05, "loss": 0.2148, "step": 4510 }, { "epoch": 0.16425612326477215, "grad_norm": 2.667999505996704, "learning_rate": 1.6403808416309327e-05, "loss": 0.2294, "step": 4520 }, { "epoch": 0.1646195217675703, "grad_norm": 3.355242967605591, "learning_rate": 1.6440148266589143e-05, "loss": 0.2097, "step": 4530 }, { "epoch": 0.16498292027036848, "grad_norm": 2.6241908073425293, "learning_rate": 1.647648811686896e-05, "loss": 0.2337, "step": 4540 }, { "epoch": 0.16534631877316666, "grad_norm": 16.759428024291992, "learning_rate": 1.6512827967148775e-05, "loss": 0.2944, "step": 4550 }, { "epoch": 0.1657097172759648, "grad_norm": 3.098898410797119, "learning_rate": 1.6549167817428594e-05, "loss": 0.1895, "step": 4560 }, { "epoch": 0.166073115778763, "grad_norm": 4.042644023895264, "learning_rate": 1.658550766770841e-05, "loss": 0.2369, "step": 4570 }, { "epoch": 0.16643651428156117, "grad_norm": 7.174807548522949, "learning_rate": 1.6621847517988226e-05, "loss": 0.2331, "step": 4580 }, { "epoch": 0.16679991278435932, "grad_norm": 2.1805012226104736, "learning_rate": 1.6658187368268045e-05, "loss": 0.2422, "step": 4590 }, { "epoch": 0.1671633112871575, "grad_norm": 18.097871780395508, "learning_rate": 1.6694527218547858e-05, "loss": 0.347, "step": 4600 }, { "epoch": 0.16752670978995565, "grad_norm": 3.48561429977417, "learning_rate": 1.6730867068827677e-05, "loss": 0.2985, "step": 4610 }, { "epoch": 0.16789010829275383, "grad_norm": 1.7519229650497437, "learning_rate": 1.6767206919107496e-05, "loss": 0.2204, "step": 4620 }, { "epoch": 0.168253506795552, "grad_norm": 3.7641661167144775, "learning_rate": 1.680354676938731e-05, "loss": 0.2348, "step": 4630 }, { "epoch": 0.16861690529835016, "grad_norm": 3.0688085556030273, "learning_rate": 1.683988661966713e-05, "loss": 0.2147, "step": 4640 }, { "epoch": 0.16898030380114834, "grad_norm": 25.845094680786133, "learning_rate": 1.6876226469946944e-05, "loss": 0.3671, "step": 4650 }, { "epoch": 0.16934370230394652, "grad_norm": 2.841994524002075, "learning_rate": 1.691256632022676e-05, "loss": 0.2182, "step": 4660 }, { "epoch": 0.16970710080674467, "grad_norm": 1.0501997470855713, "learning_rate": 1.694890617050658e-05, "loss": 0.1791, "step": 4670 }, { "epoch": 0.17007049930954285, "grad_norm": 3.3973441123962402, "learning_rate": 1.6985246020786392e-05, "loss": 0.3338, "step": 4680 }, { "epoch": 0.170433897812341, "grad_norm": 1.8442267179489136, "learning_rate": 1.702158587106621e-05, "loss": 0.2528, "step": 4690 }, { "epoch": 0.17079729631513918, "grad_norm": 42.373409271240234, "learning_rate": 1.705792572134603e-05, "loss": 0.2892, "step": 4700 }, { "epoch": 0.17116069481793736, "grad_norm": 6.344671726226807, "learning_rate": 1.7094265571625844e-05, "loss": 0.2474, "step": 4710 }, { "epoch": 0.1715240933207355, "grad_norm": 1.6177664995193481, "learning_rate": 1.7130605421905663e-05, "loss": 0.2364, "step": 4720 }, { "epoch": 0.1718874918235337, "grad_norm": 4.98591423034668, "learning_rate": 1.7166945272185482e-05, "loss": 0.2046, "step": 4730 }, { "epoch": 0.17225089032633187, "grad_norm": 7.943169116973877, "learning_rate": 1.7203285122465295e-05, "loss": 0.293, "step": 4740 }, { "epoch": 0.17261428882913002, "grad_norm": 7.402034759521484, "learning_rate": 1.7239624972745114e-05, "loss": 0.2722, "step": 4750 }, { "epoch": 0.1729776873319282, "grad_norm": 13.290019035339355, "learning_rate": 1.727596482302493e-05, "loss": 0.347, "step": 4760 }, { "epoch": 0.17334108583472635, "grad_norm": 1.8591586351394653, "learning_rate": 1.7312304673304746e-05, "loss": 0.2291, "step": 4770 }, { "epoch": 0.17370448433752453, "grad_norm": 2.5220861434936523, "learning_rate": 1.7348644523584565e-05, "loss": 0.2436, "step": 4780 }, { "epoch": 0.1740678828403227, "grad_norm": 1.8692690134048462, "learning_rate": 1.738498437386438e-05, "loss": 0.1782, "step": 4790 }, { "epoch": 0.17443128134312086, "grad_norm": 12.558557510375977, "learning_rate": 1.7421324224144197e-05, "loss": 0.3347, "step": 4800 }, { "epoch": 0.17443128134312086, "eval_loss": 0.4148472547531128, "eval_runtime": 180.0999, "eval_samples_per_second": 41.166, "eval_steps_per_second": 5.147, "eval_wer": 0.23564543358687168, "step": 4800 }, { "epoch": 0.17479467984591904, "grad_norm": 6.168694972991943, "learning_rate": 1.7457664074424017e-05, "loss": 0.2183, "step": 4810 }, { "epoch": 0.17515807834871722, "grad_norm": 5.153416633605957, "learning_rate": 1.749400392470383e-05, "loss": 0.2689, "step": 4820 }, { "epoch": 0.17552147685151537, "grad_norm": 2.8500893115997314, "learning_rate": 1.753034377498365e-05, "loss": 0.2848, "step": 4830 }, { "epoch": 0.17588487535431355, "grad_norm": 17.89117431640625, "learning_rate": 1.7566683625263465e-05, "loss": 0.2539, "step": 4840 }, { "epoch": 0.1762482738571117, "grad_norm": 19.455005645751953, "learning_rate": 1.760302347554328e-05, "loss": 0.3166, "step": 4850 }, { "epoch": 0.17661167235990988, "grad_norm": 1.7975777387619019, "learning_rate": 1.76393633258231e-05, "loss": 0.1927, "step": 4860 }, { "epoch": 0.17697507086270806, "grad_norm": 4.6790690422058105, "learning_rate": 1.7675703176102916e-05, "loss": 0.2248, "step": 4870 }, { "epoch": 0.1773384693655062, "grad_norm": 3.2644243240356445, "learning_rate": 1.7712043026382732e-05, "loss": 0.2239, "step": 4880 }, { "epoch": 0.1777018678683044, "grad_norm": 1.9375410079956055, "learning_rate": 1.7748382876662548e-05, "loss": 0.2053, "step": 4890 }, { "epoch": 0.17806526637110254, "grad_norm": 15.435178756713867, "learning_rate": 1.7784722726942367e-05, "loss": 0.2903, "step": 4900 }, { "epoch": 0.17842866487390072, "grad_norm": 2.486330270767212, "learning_rate": 1.7821062577222183e-05, "loss": 0.2598, "step": 4910 }, { "epoch": 0.1787920633766989, "grad_norm": 2.5542314052581787, "learning_rate": 1.7857402427502e-05, "loss": 0.2305, "step": 4920 }, { "epoch": 0.17915546187949705, "grad_norm": 3.6416103839874268, "learning_rate": 1.7893742277781815e-05, "loss": 1.046, "step": 4930 }, { "epoch": 0.17951886038229523, "grad_norm": 1.9395058155059814, "learning_rate": 1.7930082128061634e-05, "loss": 0.2466, "step": 4940 }, { "epoch": 0.1798822588850934, "grad_norm": 7.664824962615967, "learning_rate": 1.796642197834145e-05, "loss": 0.2871, "step": 4950 }, { "epoch": 0.18024565738789156, "grad_norm": 2.0301320552825928, "learning_rate": 1.8002761828621266e-05, "loss": 0.1996, "step": 4960 }, { "epoch": 0.18060905589068973, "grad_norm": 8.371182441711426, "learning_rate": 1.8039101678901082e-05, "loss": 0.1947, "step": 4970 }, { "epoch": 0.18097245439348789, "grad_norm": 2.6746129989624023, "learning_rate": 1.80754415291809e-05, "loss": 0.2679, "step": 4980 }, { "epoch": 0.18133585289628606, "grad_norm": 3.448202133178711, "learning_rate": 1.8111781379460718e-05, "loss": 0.1859, "step": 4990 }, { "epoch": 0.18169925139908424, "grad_norm": 28.57021141052246, "learning_rate": 1.8148121229740534e-05, "loss": 0.3318, "step": 5000 }, { "epoch": 0.1820626499018824, "grad_norm": 4.731750965118408, "learning_rate": 1.8184461080020353e-05, "loss": 0.2354, "step": 5010 }, { "epoch": 0.18242604840468057, "grad_norm": 1.6815394163131714, "learning_rate": 1.822080093030017e-05, "loss": 0.2075, "step": 5020 }, { "epoch": 0.18278944690747875, "grad_norm": 3.868263006210327, "learning_rate": 1.8257140780579985e-05, "loss": 0.2345, "step": 5030 }, { "epoch": 0.1831528454102769, "grad_norm": 1.964240550994873, "learning_rate": 1.82934806308598e-05, "loss": 0.222, "step": 5040 }, { "epoch": 0.18351624391307508, "grad_norm": 11.881858825683594, "learning_rate": 1.8329820481139617e-05, "loss": 0.3251, "step": 5050 }, { "epoch": 0.18387964241587323, "grad_norm": 1.8463056087493896, "learning_rate": 1.8366160331419436e-05, "loss": 0.2255, "step": 5060 }, { "epoch": 0.1842430409186714, "grad_norm": 2.592672348022461, "learning_rate": 1.8402500181699252e-05, "loss": 0.1904, "step": 5070 }, { "epoch": 0.1846064394214696, "grad_norm": 4.0694074630737305, "learning_rate": 1.8438840031979068e-05, "loss": 0.2, "step": 5080 }, { "epoch": 0.18496983792426774, "grad_norm": 2.101837396621704, "learning_rate": 1.8475179882258887e-05, "loss": 0.1927, "step": 5090 }, { "epoch": 0.18533323642706592, "grad_norm": 22.162702560424805, "learning_rate": 1.8511519732538703e-05, "loss": 0.3481, "step": 5100 }, { "epoch": 0.1856966349298641, "grad_norm": 2.7928340435028076, "learning_rate": 1.854785958281852e-05, "loss": 0.2344, "step": 5110 }, { "epoch": 0.18606003343266225, "grad_norm": 1.8618485927581787, "learning_rate": 1.858419943309834e-05, "loss": 0.2139, "step": 5120 }, { "epoch": 0.18642343193546043, "grad_norm": 2.9611120223999023, "learning_rate": 1.862053928337815e-05, "loss": 0.2194, "step": 5130 }, { "epoch": 0.18678683043825858, "grad_norm": 5.181276321411133, "learning_rate": 1.865687913365797e-05, "loss": 0.2596, "step": 5140 }, { "epoch": 0.18715022894105676, "grad_norm": 10.01041030883789, "learning_rate": 1.8693218983937787e-05, "loss": 0.3122, "step": 5150 }, { "epoch": 0.18751362744385494, "grad_norm": 4.952126979827881, "learning_rate": 1.8729558834217603e-05, "loss": 0.2183, "step": 5160 }, { "epoch": 0.1878770259466531, "grad_norm": 2.19279146194458, "learning_rate": 1.8765898684497422e-05, "loss": 0.2439, "step": 5170 }, { "epoch": 0.18824042444945127, "grad_norm": 3.5189321041107178, "learning_rate": 1.8802238534777238e-05, "loss": 0.2343, "step": 5180 }, { "epoch": 0.18860382295224945, "grad_norm": 2.0936787128448486, "learning_rate": 1.8838578385057054e-05, "loss": 0.1831, "step": 5190 }, { "epoch": 0.1889672214550476, "grad_norm": 12.835061073303223, "learning_rate": 1.8874918235336873e-05, "loss": 0.2561, "step": 5200 }, { "epoch": 0.18933061995784578, "grad_norm": 1.6738308668136597, "learning_rate": 1.8911258085616686e-05, "loss": 1.0257, "step": 5210 }, { "epoch": 0.18969401846064393, "grad_norm": 2.7661142349243164, "learning_rate": 1.8947597935896505e-05, "loss": 0.2398, "step": 5220 }, { "epoch": 0.1900574169634421, "grad_norm": 4.173921585083008, "learning_rate": 1.8983937786176324e-05, "loss": 0.2157, "step": 5230 }, { "epoch": 0.1904208154662403, "grad_norm": 3.7037158012390137, "learning_rate": 1.9020277636456137e-05, "loss": 0.2182, "step": 5240 }, { "epoch": 0.19078421396903844, "grad_norm": 16.288227081298828, "learning_rate": 1.9056617486735956e-05, "loss": 0.2829, "step": 5250 }, { "epoch": 0.19114761247183662, "grad_norm": 2.0504090785980225, "learning_rate": 1.9092957337015772e-05, "loss": 0.201, "step": 5260 }, { "epoch": 0.19151101097463477, "grad_norm": 1.2266415357589722, "learning_rate": 1.9129297187295588e-05, "loss": 0.2072, "step": 5270 }, { "epoch": 0.19187440947743295, "grad_norm": 4.910546779632568, "learning_rate": 1.9165637037575408e-05, "loss": 0.1824, "step": 5280 }, { "epoch": 0.19223780798023113, "grad_norm": 3.093318223953247, "learning_rate": 1.9201976887855224e-05, "loss": 0.2471, "step": 5290 }, { "epoch": 0.19260120648302928, "grad_norm": 6.74167013168335, "learning_rate": 1.923831673813504e-05, "loss": 0.2912, "step": 5300 }, { "epoch": 0.19296460498582746, "grad_norm": 2.0540058612823486, "learning_rate": 1.927465658841486e-05, "loss": 0.2599, "step": 5310 }, { "epoch": 0.19332800348862564, "grad_norm": 2.407750129699707, "learning_rate": 1.931099643869467e-05, "loss": 0.2478, "step": 5320 }, { "epoch": 0.1936914019914238, "grad_norm": 5.479567527770996, "learning_rate": 1.934733628897449e-05, "loss": 0.5936, "step": 5330 }, { "epoch": 0.19405480049422197, "grad_norm": 1.912705659866333, "learning_rate": 1.9383676139254307e-05, "loss": 0.215, "step": 5340 }, { "epoch": 0.19441819899702012, "grad_norm": 38.24689865112305, "learning_rate": 1.9420015989534123e-05, "loss": 0.286, "step": 5350 }, { "epoch": 0.1947815974998183, "grad_norm": 3.4196550846099854, "learning_rate": 1.9456355839813942e-05, "loss": 0.4764, "step": 5360 }, { "epoch": 0.19514499600261648, "grad_norm": 1.705702781677246, "learning_rate": 1.9492695690093758e-05, "loss": 0.195, "step": 5370 }, { "epoch": 0.19550839450541463, "grad_norm": 2.7188572883605957, "learning_rate": 1.9529035540373574e-05, "loss": 0.2318, "step": 5380 }, { "epoch": 0.1958717930082128, "grad_norm": 5.217918872833252, "learning_rate": 1.9565375390653393e-05, "loss": 0.2288, "step": 5390 }, { "epoch": 0.196235191511011, "grad_norm": 7.094780921936035, "learning_rate": 1.960171524093321e-05, "loss": 0.3607, "step": 5400 }, { "epoch": 0.196235191511011, "eval_loss": 0.3953820765018463, "eval_runtime": 180.5214, "eval_samples_per_second": 41.07, "eval_steps_per_second": 5.135, "eval_wer": 0.23525514186650207, "step": 5400 }, { "epoch": 0.19659859001380914, "grad_norm": 2.379298448562622, "learning_rate": 1.9638055091213025e-05, "loss": 0.1981, "step": 5410 }, { "epoch": 0.19696198851660732, "grad_norm": 1.2755372524261475, "learning_rate": 1.967439494149284e-05, "loss": 0.3185, "step": 5420 }, { "epoch": 0.19732538701940547, "grad_norm": 2.6385338306427, "learning_rate": 1.9710734791772657e-05, "loss": 0.2231, "step": 5430 }, { "epoch": 0.19768878552220365, "grad_norm": 4.030337810516357, "learning_rate": 1.9747074642052477e-05, "loss": 0.2417, "step": 5440 }, { "epoch": 0.19805218402500183, "grad_norm": 10.988908767700195, "learning_rate": 1.9783414492332293e-05, "loss": 0.3163, "step": 5450 }, { "epoch": 0.19841558252779998, "grad_norm": 2.8273231983184814, "learning_rate": 1.981975434261211e-05, "loss": 0.2062, "step": 5460 }, { "epoch": 0.19877898103059816, "grad_norm": 1.880952000617981, "learning_rate": 1.9856094192891928e-05, "loss": 0.2103, "step": 5470 }, { "epoch": 0.19914237953339634, "grad_norm": 12.882647514343262, "learning_rate": 1.9892434043171744e-05, "loss": 0.2513, "step": 5480 }, { "epoch": 0.1995057780361945, "grad_norm": 2.8202428817749023, "learning_rate": 1.992877389345156e-05, "loss": 0.2002, "step": 5490 }, { "epoch": 0.19986917653899267, "grad_norm": 11.30123519897461, "learning_rate": 1.9965113743731376e-05, "loss": 0.3399, "step": 5500 }, { "epoch": 0.20023257504179082, "grad_norm": 3.016954183578491, "learning_rate": 2.0001453594011195e-05, "loss": 0.2016, "step": 5510 }, { "epoch": 0.200595973544589, "grad_norm": 1.3506131172180176, "learning_rate": 2.003779344429101e-05, "loss": 0.6008, "step": 5520 }, { "epoch": 0.20095937204738717, "grad_norm": 3.711284637451172, "learning_rate": 2.0074133294570827e-05, "loss": 0.2297, "step": 5530 }, { "epoch": 0.20132277055018533, "grad_norm": 2.8310322761535645, "learning_rate": 2.0110473144850643e-05, "loss": 0.19, "step": 5540 }, { "epoch": 0.2016861690529835, "grad_norm": 14.37038516998291, "learning_rate": 2.0146812995130462e-05, "loss": 0.3418, "step": 5550 }, { "epoch": 0.20204956755578168, "grad_norm": 2.037245988845825, "learning_rate": 2.0183152845410278e-05, "loss": 0.2054, "step": 5560 }, { "epoch": 0.20241296605857984, "grad_norm": 2.47495698928833, "learning_rate": 2.0219492695690094e-05, "loss": 0.2102, "step": 5570 }, { "epoch": 0.20277636456137801, "grad_norm": 5.948564529418945, "learning_rate": 2.025583254596991e-05, "loss": 0.2299, "step": 5580 }, { "epoch": 0.20313976306417617, "grad_norm": 2.010765552520752, "learning_rate": 2.029217239624973e-05, "loss": 0.2214, "step": 5590 }, { "epoch": 0.20350316156697434, "grad_norm": 109.07927703857422, "learning_rate": 2.0328512246529546e-05, "loss": 0.327, "step": 5600 }, { "epoch": 0.20386656006977252, "grad_norm": 2.708141565322876, "learning_rate": 2.036485209680936e-05, "loss": 0.2128, "step": 5610 }, { "epoch": 0.20422995857257067, "grad_norm": 4.145051002502441, "learning_rate": 2.040119194708918e-05, "loss": 1.5499, "step": 5620 }, { "epoch": 0.20459335707536885, "grad_norm": 5.204433917999268, "learning_rate": 2.0437531797368993e-05, "loss": 0.2238, "step": 5630 }, { "epoch": 0.204956755578167, "grad_norm": 3.625671625137329, "learning_rate": 2.0473871647648813e-05, "loss": 0.2009, "step": 5640 }, { "epoch": 0.20532015408096518, "grad_norm": 7.134413719177246, "learning_rate": 2.051021149792863e-05, "loss": 0.3236, "step": 5650 }, { "epoch": 0.20568355258376336, "grad_norm": 3.090585708618164, "learning_rate": 2.0546551348208445e-05, "loss": 0.2245, "step": 5660 }, { "epoch": 0.20604695108656151, "grad_norm": 1.5290725231170654, "learning_rate": 2.0582891198488264e-05, "loss": 0.9725, "step": 5670 }, { "epoch": 0.2064103495893597, "grad_norm": 12.433088302612305, "learning_rate": 2.061923104876808e-05, "loss": 0.2755, "step": 5680 }, { "epoch": 0.20677374809215787, "grad_norm": 4.399518013000488, "learning_rate": 2.0655570899047896e-05, "loss": 0.2136, "step": 5690 }, { "epoch": 0.20713714659495602, "grad_norm": 12.662751197814941, "learning_rate": 2.0691910749327715e-05, "loss": 0.3022, "step": 5700 }, { "epoch": 0.2075005450977542, "grad_norm": 1.8056265115737915, "learning_rate": 2.0728250599607528e-05, "loss": 0.3538, "step": 5710 }, { "epoch": 0.20786394360055235, "grad_norm": 1.3133045434951782, "learning_rate": 2.0764590449887347e-05, "loss": 0.1829, "step": 5720 }, { "epoch": 0.20822734210335053, "grad_norm": 6.10534143447876, "learning_rate": 2.0800930300167167e-05, "loss": 0.2819, "step": 5730 }, { "epoch": 0.2085907406061487, "grad_norm": 4.327618598937988, "learning_rate": 2.083727015044698e-05, "loss": 0.2029, "step": 5740 }, { "epoch": 0.20895413910894686, "grad_norm": 6.878536224365234, "learning_rate": 2.08736100007268e-05, "loss": 0.3301, "step": 5750 }, { "epoch": 0.20931753761174504, "grad_norm": 2.8301913738250732, "learning_rate": 2.0909949851006614e-05, "loss": 0.2144, "step": 5760 }, { "epoch": 0.20968093611454322, "grad_norm": 2.248054265975952, "learning_rate": 2.094628970128643e-05, "loss": 0.2046, "step": 5770 }, { "epoch": 0.21004433461734137, "grad_norm": 4.619300842285156, "learning_rate": 2.098262955156625e-05, "loss": 0.2487, "step": 5780 }, { "epoch": 0.21040773312013955, "grad_norm": 2.6446404457092285, "learning_rate": 2.1018969401846066e-05, "loss": 0.2222, "step": 5790 }, { "epoch": 0.2107711316229377, "grad_norm": 7.827177047729492, "learning_rate": 2.1055309252125882e-05, "loss": 0.2684, "step": 5800 }, { "epoch": 0.21113453012573588, "grad_norm": 5.37054967880249, "learning_rate": 2.10916491024057e-05, "loss": 0.216, "step": 5810 }, { "epoch": 0.21149792862853406, "grad_norm": 1.5430680513381958, "learning_rate": 2.1127988952685514e-05, "loss": 0.1723, "step": 5820 }, { "epoch": 0.2118613271313322, "grad_norm": 4.355040550231934, "learning_rate": 2.1164328802965333e-05, "loss": 0.3078, "step": 5830 }, { "epoch": 0.2122247256341304, "grad_norm": 2.70613169670105, "learning_rate": 2.1200668653245152e-05, "loss": 0.1857, "step": 5840 }, { "epoch": 0.21258812413692857, "grad_norm": 17.876861572265625, "learning_rate": 2.1237008503524965e-05, "loss": 0.335, "step": 5850 }, { "epoch": 0.21295152263972672, "grad_norm": 2.048499822616577, "learning_rate": 2.1273348353804784e-05, "loss": 0.2588, "step": 5860 }, { "epoch": 0.2133149211425249, "grad_norm": 2.2033607959747314, "learning_rate": 2.13096882040846e-05, "loss": 0.1973, "step": 5870 }, { "epoch": 0.21367831964532305, "grad_norm": 5.563814640045166, "learning_rate": 2.1346028054364416e-05, "loss": 0.2632, "step": 5880 }, { "epoch": 0.21404171814812123, "grad_norm": 1.4629203081130981, "learning_rate": 2.1382367904644236e-05, "loss": 0.1714, "step": 5890 }, { "epoch": 0.2144051166509194, "grad_norm": 9.641836166381836, "learning_rate": 2.141870775492405e-05, "loss": 0.3329, "step": 5900 }, { "epoch": 0.21476851515371756, "grad_norm": 3.0128610134124756, "learning_rate": 2.1455047605203867e-05, "loss": 0.205, "step": 5910 }, { "epoch": 0.21513191365651574, "grad_norm": 6.38659143447876, "learning_rate": 2.1491387455483687e-05, "loss": 0.2066, "step": 5920 }, { "epoch": 0.2154953121593139, "grad_norm": 3.397566080093384, "learning_rate": 2.15277273057635e-05, "loss": 0.2682, "step": 5930 }, { "epoch": 0.21585871066211207, "grad_norm": 1.8110759258270264, "learning_rate": 2.156406715604332e-05, "loss": 0.2357, "step": 5940 }, { "epoch": 0.21622210916491025, "grad_norm": 12.391556739807129, "learning_rate": 2.1600407006323135e-05, "loss": 0.3043, "step": 5950 }, { "epoch": 0.2165855076677084, "grad_norm": 1.8203914165496826, "learning_rate": 2.163674685660295e-05, "loss": 0.2979, "step": 5960 }, { "epoch": 0.21694890617050658, "grad_norm": 3.362252950668335, "learning_rate": 2.167308670688277e-05, "loss": 0.1667, "step": 5970 }, { "epoch": 0.21731230467330476, "grad_norm": 4.1468000411987305, "learning_rate": 2.1709426557162586e-05, "loss": 0.3419, "step": 5980 }, { "epoch": 0.2176757031761029, "grad_norm": 2.479288339614868, "learning_rate": 2.1745766407442402e-05, "loss": 0.1938, "step": 5990 }, { "epoch": 0.2180391016789011, "grad_norm": 26.185468673706055, "learning_rate": 2.1782106257722218e-05, "loss": 0.2818, "step": 6000 }, { "epoch": 0.2180391016789011, "eval_loss": 0.4106527864933014, "eval_runtime": 179.9044, "eval_samples_per_second": 41.211, "eval_steps_per_second": 5.153, "eval_wer": 0.2305625646704304, "step": 6000 }, { "epoch": 0.21840250018169924, "grad_norm": 2.2452592849731445, "learning_rate": 2.1818446108002037e-05, "loss": 0.2208, "step": 6010 }, { "epoch": 0.21876589868449742, "grad_norm": 2.273920774459839, "learning_rate": 2.1854785958281853e-05, "loss": 0.2268, "step": 6020 }, { "epoch": 0.2191292971872956, "grad_norm": 1.9621226787567139, "learning_rate": 2.189112580856167e-05, "loss": 0.1965, "step": 6030 }, { "epoch": 0.21949269569009375, "grad_norm": 2.866110324859619, "learning_rate": 2.1927465658841485e-05, "loss": 0.223, "step": 6040 }, { "epoch": 0.21985609419289193, "grad_norm": 15.169930458068848, "learning_rate": 2.1963805509121305e-05, "loss": 0.254, "step": 6050 }, { "epoch": 0.2202194926956901, "grad_norm": 2.174626350402832, "learning_rate": 2.200014535940112e-05, "loss": 0.2056, "step": 6060 }, { "epoch": 0.22058289119848826, "grad_norm": 1.9627354145050049, "learning_rate": 2.2036485209680936e-05, "loss": 0.2211, "step": 6070 }, { "epoch": 0.22094628970128644, "grad_norm": 5.444493770599365, "learning_rate": 2.2072825059960752e-05, "loss": 0.2819, "step": 6080 }, { "epoch": 0.2213096882040846, "grad_norm": 2.5131990909576416, "learning_rate": 2.2109164910240572e-05, "loss": 0.2262, "step": 6090 }, { "epoch": 0.22167308670688277, "grad_norm": 15.716779708862305, "learning_rate": 2.2145504760520388e-05, "loss": 0.2833, "step": 6100 }, { "epoch": 0.22203648520968094, "grad_norm": 1.7514111995697021, "learning_rate": 2.2181844610800204e-05, "loss": 0.2238, "step": 6110 }, { "epoch": 0.2223998837124791, "grad_norm": 1.8236886262893677, "learning_rate": 2.2218184461080023e-05, "loss": 0.1872, "step": 6120 }, { "epoch": 0.22276328221527728, "grad_norm": 4.081092834472656, "learning_rate": 2.225452431135984e-05, "loss": 0.4926, "step": 6130 }, { "epoch": 0.22312668071807545, "grad_norm": 3.3254685401916504, "learning_rate": 2.2290864161639655e-05, "loss": 0.2179, "step": 6140 }, { "epoch": 0.2234900792208736, "grad_norm": 9.953665733337402, "learning_rate": 2.232720401191947e-05, "loss": 0.3221, "step": 6150 }, { "epoch": 0.22385347772367178, "grad_norm": 3.531538724899292, "learning_rate": 2.2363543862199287e-05, "loss": 0.3639, "step": 6160 }, { "epoch": 0.22421687622646994, "grad_norm": 1.6166915893554688, "learning_rate": 2.2399883712479106e-05, "loss": 0.1906, "step": 6170 }, { "epoch": 0.22458027472926811, "grad_norm": 3.0561792850494385, "learning_rate": 2.2436223562758922e-05, "loss": 0.222, "step": 6180 }, { "epoch": 0.2249436732320663, "grad_norm": 7.607283115386963, "learning_rate": 2.2472563413038738e-05, "loss": 0.1769, "step": 6190 }, { "epoch": 0.22530707173486444, "grad_norm": 38.86745834350586, "learning_rate": 2.2508903263318557e-05, "loss": 0.3523, "step": 6200 }, { "epoch": 0.22567047023766262, "grad_norm": 1.2490432262420654, "learning_rate": 2.2545243113598373e-05, "loss": 0.2241, "step": 6210 }, { "epoch": 0.2260338687404608, "grad_norm": 3.8632936477661133, "learning_rate": 2.258158296387819e-05, "loss": 0.2761, "step": 6220 }, { "epoch": 0.22639726724325895, "grad_norm": 6.057976722717285, "learning_rate": 2.261792281415801e-05, "loss": 0.2534, "step": 6230 }, { "epoch": 0.22676066574605713, "grad_norm": 5.2983551025390625, "learning_rate": 2.265426266443782e-05, "loss": 0.1972, "step": 6240 }, { "epoch": 0.22712406424885528, "grad_norm": 7.395950794219971, "learning_rate": 2.269060251471764e-05, "loss": 0.3446, "step": 6250 }, { "epoch": 0.22748746275165346, "grad_norm": 2.7409260272979736, "learning_rate": 2.2726942364997457e-05, "loss": 0.1894, "step": 6260 }, { "epoch": 0.22785086125445164, "grad_norm": 1.7545270919799805, "learning_rate": 2.2763282215277273e-05, "loss": 0.2376, "step": 6270 }, { "epoch": 0.2282142597572498, "grad_norm": 112.10614013671875, "learning_rate": 2.2799622065557092e-05, "loss": 2.0322, "step": 6280 }, { "epoch": 0.22857765826004797, "grad_norm": 3.6547396183013916, "learning_rate": 2.2835961915836908e-05, "loss": 0.2942, "step": 6290 }, { "epoch": 0.22894105676284612, "grad_norm": 50.726261138916016, "learning_rate": 2.2872301766116724e-05, "loss": 0.3279, "step": 6300 }, { "epoch": 0.2293044552656443, "grad_norm": 1.2374241352081299, "learning_rate": 2.2908641616396543e-05, "loss": 0.1912, "step": 6310 }, { "epoch": 0.22966785376844248, "grad_norm": 1.6278152465820312, "learning_rate": 2.2944981466676356e-05, "loss": 0.1913, "step": 6320 }, { "epoch": 0.23003125227124063, "grad_norm": 7.58544397354126, "learning_rate": 2.2981321316956175e-05, "loss": 0.2393, "step": 6330 }, { "epoch": 0.2303946507740388, "grad_norm": 1.7094483375549316, "learning_rate": 2.3017661167235995e-05, "loss": 0.2333, "step": 6340 }, { "epoch": 0.230758049276837, "grad_norm": 24.214885711669922, "learning_rate": 2.3054001017515807e-05, "loss": 0.3019, "step": 6350 }, { "epoch": 0.23112144777963514, "grad_norm": 1.962106704711914, "learning_rate": 2.3090340867795626e-05, "loss": 0.8948, "step": 6360 }, { "epoch": 0.23148484628243332, "grad_norm": 1.3703123331069946, "learning_rate": 2.3126680718075442e-05, "loss": 0.1936, "step": 6370 }, { "epoch": 0.23184824478523147, "grad_norm": 7.507201194763184, "learning_rate": 2.316302056835526e-05, "loss": 0.2185, "step": 6380 }, { "epoch": 0.23221164328802965, "grad_norm": 2.6310977935791016, "learning_rate": 2.3199360418635078e-05, "loss": 0.1961, "step": 6390 }, { "epoch": 0.23257504179082783, "grad_norm": 4.186092376708984, "learning_rate": 2.3235700268914894e-05, "loss": 0.2734, "step": 6400 }, { "epoch": 0.23293844029362598, "grad_norm": 1.817269206047058, "learning_rate": 2.327204011919471e-05, "loss": 0.1966, "step": 6410 }, { "epoch": 0.23330183879642416, "grad_norm": 1.9503989219665527, "learning_rate": 2.330837996947453e-05, "loss": 2.7438, "step": 6420 }, { "epoch": 0.23366523729922234, "grad_norm": 3.1107656955718994, "learning_rate": 2.334471981975434e-05, "loss": 0.2534, "step": 6430 }, { "epoch": 0.2340286358020205, "grad_norm": 5.268273830413818, "learning_rate": 2.338105967003416e-05, "loss": 0.1963, "step": 6440 }, { "epoch": 0.23439203430481867, "grad_norm": 9.586852073669434, "learning_rate": 2.3417399520313977e-05, "loss": 0.2342, "step": 6450 }, { "epoch": 0.23475543280761682, "grad_norm": 3.0218632221221924, "learning_rate": 2.3453739370593793e-05, "loss": 0.231, "step": 6460 }, { "epoch": 0.235118831310415, "grad_norm": 1.9708057641983032, "learning_rate": 2.3490079220873612e-05, "loss": 0.2156, "step": 6470 }, { "epoch": 0.23548222981321318, "grad_norm": 3.6212944984436035, "learning_rate": 2.3526419071153428e-05, "loss": 0.2172, "step": 6480 }, { "epoch": 0.23584562831601133, "grad_norm": 2.5205702781677246, "learning_rate": 2.3562758921433244e-05, "loss": 0.4643, "step": 6490 }, { "epoch": 0.2362090268188095, "grad_norm": 4.1570305824279785, "learning_rate": 2.3599098771713063e-05, "loss": 0.2722, "step": 6500 }, { "epoch": 0.2365724253216077, "grad_norm": 1.8376798629760742, "learning_rate": 2.363543862199288e-05, "loss": 0.2027, "step": 6510 }, { "epoch": 0.23693582382440584, "grad_norm": 2.0464930534362793, "learning_rate": 2.3671778472272695e-05, "loss": 0.183, "step": 6520 }, { "epoch": 0.23729922232720402, "grad_norm": 4.8776469230651855, "learning_rate": 2.370811832255251e-05, "loss": 0.2169, "step": 6530 }, { "epoch": 0.23766262083000217, "grad_norm": 1.5764952898025513, "learning_rate": 2.3744458172832327e-05, "loss": 0.1917, "step": 6540 }, { "epoch": 0.23802601933280035, "grad_norm": 16.132232666015625, "learning_rate": 2.3780798023112147e-05, "loss": 0.2732, "step": 6550 }, { "epoch": 0.23838941783559853, "grad_norm": 8.105748176574707, "learning_rate": 2.3817137873391963e-05, "loss": 0.2055, "step": 6560 }, { "epoch": 0.23875281633839668, "grad_norm": 2.087362051010132, "learning_rate": 2.385347772367178e-05, "loss": 0.186, "step": 6570 }, { "epoch": 0.23911621484119486, "grad_norm": 2.8280205726623535, "learning_rate": 2.3889817573951598e-05, "loss": 0.201, "step": 6580 }, { "epoch": 0.23947961334399304, "grad_norm": 1.2525794506072998, "learning_rate": 2.3926157424231414e-05, "loss": 0.1893, "step": 6590 }, { "epoch": 0.2398430118467912, "grad_norm": 23.419832229614258, "learning_rate": 2.396249727451123e-05, "loss": 0.2554, "step": 6600 }, { "epoch": 0.2398430118467912, "eval_loss": 0.4065987765789032, "eval_runtime": 179.638, "eval_samples_per_second": 41.272, "eval_steps_per_second": 5.16, "eval_wer": 0.24529380797647357, "step": 6600 }, { "epoch": 0.24020641034958937, "grad_norm": 1.3757339715957642, "learning_rate": 2.3998837124791046e-05, "loss": 0.1962, "step": 6610 }, { "epoch": 0.24056980885238752, "grad_norm": 4.00860071182251, "learning_rate": 2.4035176975070865e-05, "loss": 0.1848, "step": 6620 }, { "epoch": 0.2409332073551857, "grad_norm": 5.544015407562256, "learning_rate": 2.407151682535068e-05, "loss": 0.245, "step": 6630 }, { "epoch": 0.24129660585798388, "grad_norm": 1.0618844032287598, "learning_rate": 2.4107856675630497e-05, "loss": 0.191, "step": 6640 }, { "epoch": 0.24166000436078203, "grad_norm": 125.15505981445312, "learning_rate": 2.4144196525910313e-05, "loss": 0.3055, "step": 6650 }, { "epoch": 0.2420234028635802, "grad_norm": 5.015167713165283, "learning_rate": 2.418053637619013e-05, "loss": 0.2701, "step": 6660 }, { "epoch": 0.24238680136637836, "grad_norm": 3.944514274597168, "learning_rate": 2.421687622646995e-05, "loss": 0.2107, "step": 6670 }, { "epoch": 0.24275019986917654, "grad_norm": 3.1539418697357178, "learning_rate": 2.4253216076749764e-05, "loss": 0.232, "step": 6680 }, { "epoch": 0.24311359837197472, "grad_norm": 2.980459213256836, "learning_rate": 2.428955592702958e-05, "loss": 0.2391, "step": 6690 }, { "epoch": 0.24347699687477287, "grad_norm": 35.02157211303711, "learning_rate": 2.43258957773094e-05, "loss": 0.3172, "step": 6700 }, { "epoch": 0.24384039537757105, "grad_norm": 1.606570839881897, "learning_rate": 2.4362235627589216e-05, "loss": 1.5707, "step": 6710 }, { "epoch": 0.24420379388036922, "grad_norm": 3.940394401550293, "learning_rate": 2.439857547786903e-05, "loss": 0.1969, "step": 6720 }, { "epoch": 0.24456719238316738, "grad_norm": 3.8990156650543213, "learning_rate": 2.443491532814885e-05, "loss": 0.2475, "step": 6730 }, { "epoch": 0.24493059088596555, "grad_norm": 2.523500442504883, "learning_rate": 2.4471255178428664e-05, "loss": 0.194, "step": 6740 }, { "epoch": 0.2452939893887637, "grad_norm": 4.920846939086914, "learning_rate": 2.4507595028708483e-05, "loss": 0.2417, "step": 6750 }, { "epoch": 0.24565738789156188, "grad_norm": 2.2269723415374756, "learning_rate": 2.4543934878988302e-05, "loss": 0.2148, "step": 6760 }, { "epoch": 0.24602078639436006, "grad_norm": 1.669722557067871, "learning_rate": 2.4580274729268115e-05, "loss": 0.1979, "step": 6770 }, { "epoch": 0.24638418489715821, "grad_norm": 4.581501007080078, "learning_rate": 2.4616614579547934e-05, "loss": 0.2412, "step": 6780 }, { "epoch": 0.2467475833999564, "grad_norm": 2.6605944633483887, "learning_rate": 2.465295442982775e-05, "loss": 0.1992, "step": 6790 }, { "epoch": 0.24711098190275457, "grad_norm": 7.089646816253662, "learning_rate": 2.4689294280107566e-05, "loss": 0.2789, "step": 6800 }, { "epoch": 0.24747438040555272, "grad_norm": 1.9901385307312012, "learning_rate": 2.4725634130387385e-05, "loss": 1.907, "step": 6810 }, { "epoch": 0.2478377789083509, "grad_norm": 2.5120224952697754, "learning_rate": 2.4761973980667198e-05, "loss": 0.1908, "step": 6820 }, { "epoch": 0.24820117741114905, "grad_norm": 1.553806185722351, "learning_rate": 2.4794679845919035e-05, "loss": 1.6707, "step": 6830 }, { "epoch": 0.24856457591394723, "grad_norm": 2.130095958709717, "learning_rate": 2.4831019696198855e-05, "loss": 0.222, "step": 6840 }, { "epoch": 0.2489279744167454, "grad_norm": 15.832701683044434, "learning_rate": 2.486735954647867e-05, "loss": 0.4634, "step": 6850 }, { "epoch": 0.24929137291954356, "grad_norm": 1.87086820602417, "learning_rate": 2.4903699396758487e-05, "loss": 0.1887, "step": 6860 }, { "epoch": 0.24965477142234174, "grad_norm": 2.32084584236145, "learning_rate": 2.4940039247038303e-05, "loss": 0.1881, "step": 6870 }, { "epoch": 0.2500181699251399, "grad_norm": 3.3228461742401123, "learning_rate": 2.497637909731812e-05, "loss": 0.264, "step": 6880 }, { "epoch": 0.2503815684279381, "grad_norm": 1.8676607608795166, "learning_rate": 2.5012718947597935e-05, "loss": 0.2102, "step": 6890 }, { "epoch": 0.2507449669307362, "grad_norm": 17.540319442749023, "learning_rate": 2.5049058797877757e-05, "loss": 0.2567, "step": 6900 }, { "epoch": 0.25110836543353443, "grad_norm": 1.6276856660842896, "learning_rate": 2.508539864815757e-05, "loss": 0.1917, "step": 6910 }, { "epoch": 0.2514717639363326, "grad_norm": 2.347691059112549, "learning_rate": 2.5121738498437386e-05, "loss": 0.1998, "step": 6920 }, { "epoch": 0.25183516243913073, "grad_norm": 3.5337650775909424, "learning_rate": 2.5158078348717205e-05, "loss": 0.2418, "step": 6930 }, { "epoch": 0.25219856094192894, "grad_norm": 3.7415404319763184, "learning_rate": 2.519441819899702e-05, "loss": 0.2074, "step": 6940 }, { "epoch": 0.2525619594447271, "grad_norm": 16.603042602539062, "learning_rate": 2.5230758049276837e-05, "loss": 0.3104, "step": 6950 }, { "epoch": 0.25292535794752524, "grad_norm": 1.4864579439163208, "learning_rate": 2.5267097899556656e-05, "loss": 0.1771, "step": 6960 }, { "epoch": 0.25328875645032345, "grad_norm": 1.7935876846313477, "learning_rate": 2.5303437749836472e-05, "loss": 0.1984, "step": 6970 }, { "epoch": 0.2536521549531216, "grad_norm": 3.187351942062378, "learning_rate": 2.533977760011629e-05, "loss": 0.1828, "step": 6980 }, { "epoch": 0.25401555345591975, "grad_norm": 1.7930549383163452, "learning_rate": 2.5376117450396104e-05, "loss": 0.2132, "step": 6990 }, { "epoch": 0.2543789519587179, "grad_norm": 4.86196231842041, "learning_rate": 2.5412457300675924e-05, "loss": 0.2426, "step": 7000 }, { "epoch": 0.2547423504615161, "grad_norm": 2.784335136413574, "learning_rate": 2.544879715095574e-05, "loss": 1.6557, "step": 7010 }, { "epoch": 0.25510574896431426, "grad_norm": 1.460509181022644, "learning_rate": 2.5485137001235552e-05, "loss": 0.1812, "step": 7020 }, { "epoch": 0.2554691474671124, "grad_norm": 2.5204946994781494, "learning_rate": 2.5521476851515375e-05, "loss": 0.3731, "step": 7030 }, { "epoch": 0.2558325459699106, "grad_norm": 1.6122281551361084, "learning_rate": 2.555781670179519e-05, "loss": 0.2256, "step": 7040 }, { "epoch": 0.25619594447270877, "grad_norm": 8.13974666595459, "learning_rate": 2.5594156552075004e-05, "loss": 0.2756, "step": 7050 }, { "epoch": 0.2565593429755069, "grad_norm": 2.1560494899749756, "learning_rate": 2.5630496402354826e-05, "loss": 0.1869, "step": 7060 }, { "epoch": 0.25692274147830513, "grad_norm": 2.938570737838745, "learning_rate": 2.5666836252634642e-05, "loss": 0.187, "step": 7070 }, { "epoch": 0.2572861399811033, "grad_norm": 1.6697754859924316, "learning_rate": 2.5703176102914455e-05, "loss": 0.1841, "step": 7080 }, { "epoch": 0.25764953848390143, "grad_norm": 2.500377655029297, "learning_rate": 2.5739515953194278e-05, "loss": 0.4097, "step": 7090 }, { "epoch": 0.25801293698669964, "grad_norm": 6.614553928375244, "learning_rate": 2.577585580347409e-05, "loss": 0.2779, "step": 7100 }, { "epoch": 0.2583763354894978, "grad_norm": 2.1538803577423096, "learning_rate": 2.5812195653753906e-05, "loss": 0.2035, "step": 7110 }, { "epoch": 0.25873973399229594, "grad_norm": 2.64719820022583, "learning_rate": 2.584853550403373e-05, "loss": 0.1815, "step": 7120 }, { "epoch": 0.25910313249509415, "grad_norm": 4.064308166503906, "learning_rate": 2.588487535431354e-05, "loss": 0.2115, "step": 7130 }, { "epoch": 0.2594665309978923, "grad_norm": 4.535513877868652, "learning_rate": 2.5921215204593357e-05, "loss": 0.1733, "step": 7140 }, { "epoch": 0.25982992950069045, "grad_norm": 14.761083602905273, "learning_rate": 2.5957555054873173e-05, "loss": 0.3061, "step": 7150 }, { "epoch": 0.2601933280034886, "grad_norm": 2.902010202407837, "learning_rate": 2.5993894905152993e-05, "loss": 0.2539, "step": 7160 }, { "epoch": 0.2605567265062868, "grad_norm": 2.6499462127685547, "learning_rate": 2.603023475543281e-05, "loss": 0.209, "step": 7170 }, { "epoch": 0.26092012500908496, "grad_norm": 2.0298879146575928, "learning_rate": 2.6066574605712625e-05, "loss": 0.1966, "step": 7180 }, { "epoch": 0.2612835235118831, "grad_norm": 5.285839080810547, "learning_rate": 2.6102914455992444e-05, "loss": 0.2416, "step": 7190 }, { "epoch": 0.2616469220146813, "grad_norm": 14.89932918548584, "learning_rate": 2.613925430627226e-05, "loss": 0.2649, "step": 7200 }, { "epoch": 0.2616469220146813, "eval_loss": 0.43822312355041504, "eval_runtime": 180.398, "eval_samples_per_second": 41.098, "eval_steps_per_second": 5.139, "eval_wer": 0.23023580881151634, "step": 7200 }, { "epoch": 0.26201032051747947, "grad_norm": 2.9772818088531494, "learning_rate": 2.6175594156552076e-05, "loss": 0.2158, "step": 7210 }, { "epoch": 0.2623737190202776, "grad_norm": 1.4703949689865112, "learning_rate": 2.6211934006831895e-05, "loss": 0.1925, "step": 7220 }, { "epoch": 0.2627371175230758, "grad_norm": 2.6034176349639893, "learning_rate": 2.624827385711171e-05, "loss": 0.2065, "step": 7230 }, { "epoch": 0.263100516025874, "grad_norm": 2.8392562866210938, "learning_rate": 2.6284613707391527e-05, "loss": 0.2097, "step": 7240 }, { "epoch": 0.2634639145286721, "grad_norm": 8.892645835876465, "learning_rate": 2.6320953557671347e-05, "loss": 0.2835, "step": 7250 }, { "epoch": 0.26382731303147033, "grad_norm": 1.616268277168274, "learning_rate": 2.6357293407951162e-05, "loss": 0.1875, "step": 7260 }, { "epoch": 0.2641907115342685, "grad_norm": 2.1791138648986816, "learning_rate": 2.6393633258230975e-05, "loss": 0.1722, "step": 7270 }, { "epoch": 0.26455411003706664, "grad_norm": 2.8691608905792236, "learning_rate": 2.642997310851079e-05, "loss": 0.2377, "step": 7280 }, { "epoch": 0.26491750853986484, "grad_norm": 1.5673551559448242, "learning_rate": 2.6466312958790614e-05, "loss": 0.4404, "step": 7290 }, { "epoch": 0.265280907042663, "grad_norm": 7.296738147735596, "learning_rate": 2.6502652809070426e-05, "loss": 0.3198, "step": 7300 }, { "epoch": 0.26564430554546115, "grad_norm": 6.389322757720947, "learning_rate": 2.6538992659350242e-05, "loss": 0.2041, "step": 7310 }, { "epoch": 0.2660077040482593, "grad_norm": 11.64201831817627, "learning_rate": 2.657533250963006e-05, "loss": 0.2014, "step": 7320 }, { "epoch": 0.2663711025510575, "grad_norm": 4.454049587249756, "learning_rate": 2.6611672359909878e-05, "loss": 0.2295, "step": 7330 }, { "epoch": 0.26673450105385565, "grad_norm": 2.091968297958374, "learning_rate": 2.6648012210189694e-05, "loss": 0.1784, "step": 7340 }, { "epoch": 0.2670978995566538, "grad_norm": 6.904966354370117, "learning_rate": 2.6684352060469513e-05, "loss": 0.3303, "step": 7350 }, { "epoch": 0.267461298059452, "grad_norm": 1.6893994808197021, "learning_rate": 2.672069191074933e-05, "loss": 0.2534, "step": 7360 }, { "epoch": 0.26782469656225016, "grad_norm": 1.3456122875213623, "learning_rate": 2.6757031761029145e-05, "loss": 0.1829, "step": 7370 }, { "epoch": 0.2681880950650483, "grad_norm": 7.959611892700195, "learning_rate": 2.6793371611308964e-05, "loss": 0.2425, "step": 7380 }, { "epoch": 0.2685514935678465, "grad_norm": 1.5833840370178223, "learning_rate": 2.682971146158878e-05, "loss": 0.1988, "step": 7390 }, { "epoch": 0.2689148920706447, "grad_norm": 19.886600494384766, "learning_rate": 2.6866051311868596e-05, "loss": 0.3563, "step": 7400 }, { "epoch": 0.2692782905734428, "grad_norm": 2.55553936958313, "learning_rate": 2.6902391162148415e-05, "loss": 0.1857, "step": 7410 }, { "epoch": 0.26964168907624103, "grad_norm": 2.125661849975586, "learning_rate": 2.693873101242823e-05, "loss": 0.7398, "step": 7420 }, { "epoch": 0.2700050875790392, "grad_norm": 2.577770233154297, "learning_rate": 2.6975070862708047e-05, "loss": 0.5703, "step": 7430 }, { "epoch": 0.27036848608183733, "grad_norm": 2.3848683834075928, "learning_rate": 2.701141071298786e-05, "loss": 0.173, "step": 7440 }, { "epoch": 0.2707318845846355, "grad_norm": 22.96078109741211, "learning_rate": 2.7047750563267683e-05, "loss": 0.293, "step": 7450 }, { "epoch": 0.2710952830874337, "grad_norm": 3.206329822540283, "learning_rate": 2.70840904135475e-05, "loss": 0.4585, "step": 7460 }, { "epoch": 0.27145868159023184, "grad_norm": 2.251904010772705, "learning_rate": 2.712043026382731e-05, "loss": 0.2196, "step": 7470 }, { "epoch": 0.27182208009303, "grad_norm": 3.7445387840270996, "learning_rate": 2.7156770114107134e-05, "loss": 0.2195, "step": 7480 }, { "epoch": 0.2721854785958282, "grad_norm": 1.5370314121246338, "learning_rate": 2.7193109964386947e-05, "loss": 0.2007, "step": 7490 }, { "epoch": 0.27254887709862635, "grad_norm": 18.44324493408203, "learning_rate": 2.7229449814666763e-05, "loss": 0.3091, "step": 7500 }, { "epoch": 0.2729122756014245, "grad_norm": 1.5792795419692993, "learning_rate": 2.7265789664946585e-05, "loss": 0.1601, "step": 7510 }, { "epoch": 0.2732756741042227, "grad_norm": 9.128384590148926, "learning_rate": 2.7302129515226398e-05, "loss": 0.178, "step": 7520 }, { "epoch": 0.27363907260702086, "grad_norm": 2.2285592555999756, "learning_rate": 2.7338469365506214e-05, "loss": 2.4074, "step": 7530 }, { "epoch": 0.274002471109819, "grad_norm": 2.2741541862487793, "learning_rate": 2.7374809215786033e-05, "loss": 0.246, "step": 7540 }, { "epoch": 0.2743658696126172, "grad_norm": 17.185470581054688, "learning_rate": 2.741114906606585e-05, "loss": 0.2577, "step": 7550 }, { "epoch": 0.27472926811541537, "grad_norm": 1.1907752752304077, "learning_rate": 2.7447488916345665e-05, "loss": 0.2073, "step": 7560 }, { "epoch": 0.2750926666182135, "grad_norm": 3.535682201385498, "learning_rate": 2.748382876662548e-05, "loss": 0.2012, "step": 7570 }, { "epoch": 0.27545606512101173, "grad_norm": 3.585460662841797, "learning_rate": 2.75201686169053e-05, "loss": 0.2147, "step": 7580 }, { "epoch": 0.2758194636238099, "grad_norm": 1.9034504890441895, "learning_rate": 2.7556508467185116e-05, "loss": 0.1626, "step": 7590 }, { "epoch": 0.27618286212660803, "grad_norm": 39.66155242919922, "learning_rate": 2.7592848317464932e-05, "loss": 0.2617, "step": 7600 }, { "epoch": 0.2765462606294062, "grad_norm": 1.5698285102844238, "learning_rate": 2.762918816774475e-05, "loss": 0.3136, "step": 7610 }, { "epoch": 0.2769096591322044, "grad_norm": 2.4866106510162354, "learning_rate": 2.7665528018024568e-05, "loss": 0.1971, "step": 7620 }, { "epoch": 0.27727305763500254, "grad_norm": 9.244050025939941, "learning_rate": 2.7701867868304384e-05, "loss": 0.2025, "step": 7630 }, { "epoch": 0.2776364561378007, "grad_norm": 2.1344380378723145, "learning_rate": 2.7738207718584203e-05, "loss": 0.2055, "step": 7640 }, { "epoch": 0.2779998546405989, "grad_norm": 13.503227233886719, "learning_rate": 2.777454756886402e-05, "loss": 0.2671, "step": 7650 }, { "epoch": 0.27836325314339705, "grad_norm": 2.238834857940674, "learning_rate": 2.781088741914383e-05, "loss": 0.1714, "step": 7660 }, { "epoch": 0.2787266516461952, "grad_norm": 0.897280216217041, "learning_rate": 2.7847227269423654e-05, "loss": 0.1615, "step": 7670 }, { "epoch": 0.2790900501489934, "grad_norm": 5.808285713195801, "learning_rate": 2.788356711970347e-05, "loss": 0.2052, "step": 7680 }, { "epoch": 0.27945344865179156, "grad_norm": 1.8924663066864014, "learning_rate": 2.7919906969983283e-05, "loss": 0.1769, "step": 7690 }, { "epoch": 0.2798168471545897, "grad_norm": 11.939653396606445, "learning_rate": 2.7956246820263105e-05, "loss": 0.2859, "step": 7700 }, { "epoch": 0.2801802456573879, "grad_norm": 2.5077621936798096, "learning_rate": 2.7992586670542918e-05, "loss": 0.1767, "step": 7710 }, { "epoch": 0.28054364416018607, "grad_norm": 2.0336718559265137, "learning_rate": 2.8028926520822734e-05, "loss": 0.6757, "step": 7720 }, { "epoch": 0.2809070426629842, "grad_norm": 3.9547739028930664, "learning_rate": 2.806526637110255e-05, "loss": 0.2322, "step": 7730 }, { "epoch": 0.28127044116578237, "grad_norm": 1.8082466125488281, "learning_rate": 2.810160622138237e-05, "loss": 0.1758, "step": 7740 }, { "epoch": 0.2816338396685806, "grad_norm": 16.173986434936523, "learning_rate": 2.8137946071662185e-05, "loss": 0.2642, "step": 7750 }, { "epoch": 0.28199723817137873, "grad_norm": 3.341475486755371, "learning_rate": 2.8174285921942e-05, "loss": 3.4407, "step": 7760 }, { "epoch": 0.2823606366741769, "grad_norm": 1.7220288515090942, "learning_rate": 2.821062577222182e-05, "loss": 0.1965, "step": 7770 }, { "epoch": 0.2827240351769751, "grad_norm": 3.8534610271453857, "learning_rate": 2.8246965622501637e-05, "loss": 0.1966, "step": 7780 }, { "epoch": 0.28308743367977324, "grad_norm": 1.962780475616455, "learning_rate": 2.8283305472781453e-05, "loss": 0.1859, "step": 7790 }, { "epoch": 0.2834508321825714, "grad_norm": 40.28166961669922, "learning_rate": 2.8319645323061272e-05, "loss": 0.6588, "step": 7800 }, { "epoch": 0.2834508321825714, "eval_loss": 0.42970865964889526, "eval_runtime": 180.6321, "eval_samples_per_second": 41.045, "eval_steps_per_second": 5.132, "eval_wer": 0.2413455080145951, "step": 7800 }, { "epoch": 0.2838142306853696, "grad_norm": 1.748349666595459, "learning_rate": 2.8355985173341088e-05, "loss": 0.1786, "step": 7810 }, { "epoch": 0.28417762918816775, "grad_norm": 2.1137237548828125, "learning_rate": 2.8392325023620904e-05, "loss": 0.1803, "step": 7820 }, { "epoch": 0.2845410276909659, "grad_norm": 1.59931218624115, "learning_rate": 2.8428664873900723e-05, "loss": 0.2107, "step": 7830 }, { "epoch": 0.2849044261937641, "grad_norm": 2.263493061065674, "learning_rate": 2.846500472418054e-05, "loss": 0.1967, "step": 7840 }, { "epoch": 0.28526782469656226, "grad_norm": 20.798656463623047, "learning_rate": 2.8501344574460355e-05, "loss": 0.268, "step": 7850 }, { "epoch": 0.2856312231993604, "grad_norm": 3.0182480812072754, "learning_rate": 2.8537684424740168e-05, "loss": 0.1901, "step": 7860 }, { "epoch": 0.2859946217021586, "grad_norm": 6.6378493309021, "learning_rate": 2.857402427501999e-05, "loss": 0.1804, "step": 7870 }, { "epoch": 0.28635802020495676, "grad_norm": 2.5524067878723145, "learning_rate": 2.8610364125299803e-05, "loss": 0.233, "step": 7880 }, { "epoch": 0.2867214187077549, "grad_norm": 2.6409335136413574, "learning_rate": 2.864670397557962e-05, "loss": 0.1717, "step": 7890 }, { "epoch": 0.28708481721055307, "grad_norm": 6.834221363067627, "learning_rate": 2.868304382585944e-05, "loss": 0.2956, "step": 7900 }, { "epoch": 0.2874482157133513, "grad_norm": 2.760669708251953, "learning_rate": 2.8719383676139254e-05, "loss": 0.1789, "step": 7910 }, { "epoch": 0.2878116142161494, "grad_norm": 1.7543925046920776, "learning_rate": 2.875572352641907e-05, "loss": 0.2041, "step": 7920 }, { "epoch": 0.2881750127189476, "grad_norm": 4.784151077270508, "learning_rate": 2.879206337669889e-05, "loss": 0.2259, "step": 7930 }, { "epoch": 0.2885384112217458, "grad_norm": 2.1769356727600098, "learning_rate": 2.8828403226978706e-05, "loss": 0.2023, "step": 7940 }, { "epoch": 0.28890180972454393, "grad_norm": 9.373051643371582, "learning_rate": 2.886474307725852e-05, "loss": 0.3511, "step": 7950 }, { "epoch": 0.2892652082273421, "grad_norm": 1.895190715789795, "learning_rate": 2.890108292753834e-05, "loss": 0.1976, "step": 7960 }, { "epoch": 0.2896286067301403, "grad_norm": 3.4400076866149902, "learning_rate": 2.8937422777818157e-05, "loss": 0.1902, "step": 7970 }, { "epoch": 0.28999200523293844, "grad_norm": 9.663911819458008, "learning_rate": 2.8973762628097973e-05, "loss": 0.2551, "step": 7980 }, { "epoch": 0.2903554037357366, "grad_norm": 5.1054463386535645, "learning_rate": 2.9010102478377792e-05, "loss": 0.2001, "step": 7990 }, { "epoch": 0.2907188022385348, "grad_norm": 9.06143569946289, "learning_rate": 2.9046442328657608e-05, "loss": 0.2266, "step": 8000 }, { "epoch": 0.29108220074133295, "grad_norm": 1.604077696800232, "learning_rate": 2.9082782178937424e-05, "loss": 0.1883, "step": 8010 }, { "epoch": 0.2914455992441311, "grad_norm": 2.245687246322632, "learning_rate": 2.911912202921724e-05, "loss": 0.2093, "step": 8020 }, { "epoch": 0.29180899774692926, "grad_norm": 3.8099372386932373, "learning_rate": 2.915546187949706e-05, "loss": 0.2283, "step": 8030 }, { "epoch": 0.29217239624972746, "grad_norm": 2.135115623474121, "learning_rate": 2.9191801729776875e-05, "loss": 0.2369, "step": 8040 }, { "epoch": 0.2925357947525256, "grad_norm": 5.596993446350098, "learning_rate": 2.9228141580056688e-05, "loss": 0.2709, "step": 8050 }, { "epoch": 0.29289919325532376, "grad_norm": 1.3212496042251587, "learning_rate": 2.926448143033651e-05, "loss": 0.1968, "step": 8060 }, { "epoch": 0.29326259175812197, "grad_norm": 1.9241231679916382, "learning_rate": 2.9300821280616327e-05, "loss": 0.3883, "step": 8070 }, { "epoch": 0.2936259902609201, "grad_norm": 4.008016109466553, "learning_rate": 2.933716113089614e-05, "loss": 0.2074, "step": 8080 }, { "epoch": 0.2939893887637183, "grad_norm": 1.5871399641036987, "learning_rate": 2.9373500981175962e-05, "loss": 0.1698, "step": 8090 }, { "epoch": 0.2943527872665165, "grad_norm": 19.480670928955078, "learning_rate": 2.9409840831455774e-05, "loss": 0.4023, "step": 8100 }, { "epoch": 0.29471618576931463, "grad_norm": 3.8420443534851074, "learning_rate": 2.944618068173559e-05, "loss": 0.181, "step": 8110 }, { "epoch": 0.2950795842721128, "grad_norm": 1.9951499700546265, "learning_rate": 2.9482520532015413e-05, "loss": 0.2872, "step": 8120 }, { "epoch": 0.295442982774911, "grad_norm": 4.958978176116943, "learning_rate": 2.9518860382295226e-05, "loss": 0.2359, "step": 8130 }, { "epoch": 0.29580638127770914, "grad_norm": 1.5531708002090454, "learning_rate": 2.9555200232575042e-05, "loss": 0.2138, "step": 8140 }, { "epoch": 0.2961697797805073, "grad_norm": 5.297884941101074, "learning_rate": 2.9591540082854864e-05, "loss": 0.2694, "step": 8150 }, { "epoch": 0.2965331782833055, "grad_norm": 1.5989892482757568, "learning_rate": 2.9627879933134677e-05, "loss": 0.1686, "step": 8160 }, { "epoch": 0.29689657678610365, "grad_norm": 3.347722291946411, "learning_rate": 2.9664219783414493e-05, "loss": 0.2206, "step": 8170 }, { "epoch": 0.2972599752889018, "grad_norm": 2.9551491737365723, "learning_rate": 2.970055963369431e-05, "loss": 0.2274, "step": 8180 }, { "epoch": 0.29762337379169995, "grad_norm": 2.527963638305664, "learning_rate": 2.973689948397413e-05, "loss": 0.1731, "step": 8190 }, { "epoch": 0.29798677229449816, "grad_norm": 5.818012714385986, "learning_rate": 2.9773239334253944e-05, "loss": 0.265, "step": 8200 }, { "epoch": 0.2983501707972963, "grad_norm": 1.5580624341964722, "learning_rate": 2.980594519950578e-05, "loss": 2.627, "step": 8210 }, { "epoch": 0.29871356930009446, "grad_norm": 1.6011282205581665, "learning_rate": 2.9842285049785594e-05, "loss": 0.1811, "step": 8220 }, { "epoch": 0.29907696780289267, "grad_norm": 44.825157165527344, "learning_rate": 2.987862490006541e-05, "loss": 0.4799, "step": 8230 }, { "epoch": 0.2994403663056908, "grad_norm": 1.520982027053833, "learning_rate": 2.991496475034523e-05, "loss": 0.1935, "step": 8240 }, { "epoch": 0.29980376480848897, "grad_norm": 6.3379058837890625, "learning_rate": 2.9951304600625046e-05, "loss": 0.2435, "step": 8250 }, { "epoch": 0.3001671633112872, "grad_norm": 2.2493958473205566, "learning_rate": 2.998764445090486e-05, "loss": 0.1984, "step": 8260 }, { "epoch": 0.30053056181408533, "grad_norm": 3.234196186065674, "learning_rate": 3.002398430118468e-05, "loss": 0.1785, "step": 8270 }, { "epoch": 0.3008939603168835, "grad_norm": 4.99449348449707, "learning_rate": 3.0060324151464497e-05, "loss": 0.1888, "step": 8280 }, { "epoch": 0.3012573588196817, "grad_norm": 1.8624048233032227, "learning_rate": 3.0096664001744313e-05, "loss": 1.6561, "step": 8290 }, { "epoch": 0.30162075732247984, "grad_norm": 7.615640640258789, "learning_rate": 3.0133003852024132e-05, "loss": 0.2918, "step": 8300 }, { "epoch": 0.301984155825278, "grad_norm": 1.6900697946548462, "learning_rate": 3.0169343702303948e-05, "loss": 0.2255, "step": 8310 }, { "epoch": 0.3023475543280762, "grad_norm": 2.2034566402435303, "learning_rate": 3.0205683552583764e-05, "loss": 0.198, "step": 8320 }, { "epoch": 0.30271095283087435, "grad_norm": 2.044597625732422, "learning_rate": 3.0242023402863583e-05, "loss": 0.1946, "step": 8330 }, { "epoch": 0.3030743513336725, "grad_norm": 1.6171079874038696, "learning_rate": 3.02783632531434e-05, "loss": 0.1935, "step": 8340 }, { "epoch": 0.30343774983647065, "grad_norm": 2.8435897827148438, "learning_rate": 3.0314703103423215e-05, "loss": 0.3876, "step": 8350 }, { "epoch": 0.30380114833926886, "grad_norm": 2.023019552230835, "learning_rate": 3.0351042953703035e-05, "loss": 0.1879, "step": 8360 }, { "epoch": 0.304164546842067, "grad_norm": 1.7610963582992554, "learning_rate": 3.038738280398285e-05, "loss": 0.1901, "step": 8370 }, { "epoch": 0.30452794534486516, "grad_norm": 1.9482131004333496, "learning_rate": 3.0423722654262667e-05, "loss": 0.2119, "step": 8380 }, { "epoch": 0.30489134384766337, "grad_norm": 1.6463958024978638, "learning_rate": 3.046006250454248e-05, "loss": 0.2067, "step": 8390 }, { "epoch": 0.3052547423504615, "grad_norm": 10.607688903808594, "learning_rate": 3.0496402354822302e-05, "loss": 0.2709, "step": 8400 }, { "epoch": 0.3052547423504615, "eval_loss": 0.3912598192691803, "eval_runtime": 179.9461, "eval_samples_per_second": 41.201, "eval_steps_per_second": 5.152, "eval_wer": 0.22865648882676493, "step": 8400 }, { "epoch": 0.30561814085325967, "grad_norm": 5.675121307373047, "learning_rate": 3.053274220510212e-05, "loss": 0.1937, "step": 8410 }, { "epoch": 0.3059815393560579, "grad_norm": 1.9001195430755615, "learning_rate": 3.056908205538193e-05, "loss": 0.1668, "step": 8420 }, { "epoch": 0.306344937858856, "grad_norm": 6.807525157928467, "learning_rate": 3.060542190566175e-05, "loss": 0.2077, "step": 8430 }, { "epoch": 0.3067083363616542, "grad_norm": 2.067265272140503, "learning_rate": 3.064176175594157e-05, "loss": 0.1596, "step": 8440 }, { "epoch": 0.3070717348644524, "grad_norm": 15.267791748046875, "learning_rate": 3.067810160622138e-05, "loss": 0.2667, "step": 8450 }, { "epoch": 0.30743513336725053, "grad_norm": 1.367903709411621, "learning_rate": 3.07144414565012e-05, "loss": 0.1819, "step": 8460 }, { "epoch": 0.3077985318700487, "grad_norm": 1.531816840171814, "learning_rate": 3.075078130678102e-05, "loss": 0.1681, "step": 8470 }, { "epoch": 0.30816193037284684, "grad_norm": 3.668304204940796, "learning_rate": 3.078712115706083e-05, "loss": 0.2488, "step": 8480 }, { "epoch": 0.30852532887564504, "grad_norm": 2.2622220516204834, "learning_rate": 3.082346100734065e-05, "loss": 0.1866, "step": 8490 }, { "epoch": 0.3088887273784432, "grad_norm": 6.450117111206055, "learning_rate": 3.085980085762047e-05, "loss": 0.2676, "step": 8500 }, { "epoch": 0.30925212588124135, "grad_norm": 2.096731424331665, "learning_rate": 3.0896140707900284e-05, "loss": 0.1952, "step": 8510 }, { "epoch": 0.30961552438403955, "grad_norm": 1.3809120655059814, "learning_rate": 3.09324805581801e-05, "loss": 0.3478, "step": 8520 }, { "epoch": 0.3099789228868377, "grad_norm": 4.2257585525512695, "learning_rate": 3.096882040845992e-05, "loss": 0.2126, "step": 8530 }, { "epoch": 0.31034232138963586, "grad_norm": 2.8543758392333984, "learning_rate": 3.1005160258739736e-05, "loss": 0.8169, "step": 8540 }, { "epoch": 0.31070571989243406, "grad_norm": 5.897162437438965, "learning_rate": 3.104150010901955e-05, "loss": 0.2421, "step": 8550 }, { "epoch": 0.3110691183952322, "grad_norm": 1.8980865478515625, "learning_rate": 3.107783995929937e-05, "loss": 0.193, "step": 8560 }, { "epoch": 0.31143251689803036, "grad_norm": 2.113833427429199, "learning_rate": 3.111417980957919e-05, "loss": 0.1553, "step": 8570 }, { "epoch": 0.31179591540082857, "grad_norm": 2.7569572925567627, "learning_rate": 3.1150519659859e-05, "loss": 0.2003, "step": 8580 }, { "epoch": 0.3121593139036267, "grad_norm": 2.480473756790161, "learning_rate": 3.118685951013882e-05, "loss": 0.2173, "step": 8590 }, { "epoch": 0.3125227124064249, "grad_norm": 12.174234390258789, "learning_rate": 3.122319936041864e-05, "loss": 0.3081, "step": 8600 }, { "epoch": 0.3128861109092231, "grad_norm": 2.8075544834136963, "learning_rate": 3.125953921069845e-05, "loss": 0.263, "step": 8610 }, { "epoch": 0.31324950941202123, "grad_norm": 16.535009384155273, "learning_rate": 3.129587906097827e-05, "loss": 0.1968, "step": 8620 }, { "epoch": 0.3136129079148194, "grad_norm": 6.4783711433410645, "learning_rate": 3.133221891125809e-05, "loss": 0.2396, "step": 8630 }, { "epoch": 0.31397630641761753, "grad_norm": 0.945353090763092, "learning_rate": 3.13685587615379e-05, "loss": 0.1623, "step": 8640 }, { "epoch": 0.31433970492041574, "grad_norm": 7.135663032531738, "learning_rate": 3.140489861181772e-05, "loss": 0.3006, "step": 8650 }, { "epoch": 0.3147031034232139, "grad_norm": 1.275896430015564, "learning_rate": 3.144123846209754e-05, "loss": 0.1845, "step": 8660 }, { "epoch": 0.31506650192601204, "grad_norm": 2.1660525798797607, "learning_rate": 3.147757831237735e-05, "loss": 0.1614, "step": 8670 }, { "epoch": 0.31542990042881025, "grad_norm": 3.878882646560669, "learning_rate": 3.1513918162657166e-05, "loss": 0.2124, "step": 8680 }, { "epoch": 0.3157932989316084, "grad_norm": 3.452864170074463, "learning_rate": 3.155025801293699e-05, "loss": 0.1659, "step": 8690 }, { "epoch": 0.31615669743440655, "grad_norm": 4.0493292808532715, "learning_rate": 3.1586597863216805e-05, "loss": 0.2653, "step": 8700 }, { "epoch": 0.31652009593720476, "grad_norm": 1.9184757471084595, "learning_rate": 3.162293771349662e-05, "loss": 0.2043, "step": 8710 }, { "epoch": 0.3168834944400029, "grad_norm": 4.22302770614624, "learning_rate": 3.165927756377644e-05, "loss": 0.2005, "step": 8720 }, { "epoch": 0.31724689294280106, "grad_norm": 8.557464599609375, "learning_rate": 3.1695617414056256e-05, "loss": 0.2135, "step": 8730 }, { "epoch": 0.31761029144559927, "grad_norm": 1.6090949773788452, "learning_rate": 3.173195726433607e-05, "loss": 0.1565, "step": 8740 }, { "epoch": 0.3179736899483974, "grad_norm": 35.859737396240234, "learning_rate": 3.1768297114615894e-05, "loss": 0.3239, "step": 8750 }, { "epoch": 0.31833708845119557, "grad_norm": 2.837944507598877, "learning_rate": 3.180463696489571e-05, "loss": 0.1902, "step": 8760 }, { "epoch": 0.3187004869539937, "grad_norm": 1.6548888683319092, "learning_rate": 3.184097681517552e-05, "loss": 0.1732, "step": 8770 }, { "epoch": 0.31906388545679193, "grad_norm": 3.840034246444702, "learning_rate": 3.187731666545534e-05, "loss": 0.2318, "step": 8780 }, { "epoch": 0.3194272839595901, "grad_norm": 3.3684277534484863, "learning_rate": 3.191365651573516e-05, "loss": 0.1794, "step": 8790 }, { "epoch": 0.31979068246238823, "grad_norm": 8.668655395507812, "learning_rate": 3.194999636601497e-05, "loss": 0.2745, "step": 8800 }, { "epoch": 0.32015408096518644, "grad_norm": 1.412441611289978, "learning_rate": 3.198633621629479e-05, "loss": 0.1913, "step": 8810 }, { "epoch": 0.3205174794679846, "grad_norm": 1.6273925304412842, "learning_rate": 3.202267606657461e-05, "loss": 0.1905, "step": 8820 }, { "epoch": 0.32088087797078274, "grad_norm": 5.704558372497559, "learning_rate": 3.205901591685442e-05, "loss": 0.2217, "step": 8830 }, { "epoch": 0.32124427647358095, "grad_norm": 2.248072385787964, "learning_rate": 3.209535576713424e-05, "loss": 0.1752, "step": 8840 }, { "epoch": 0.3216076749763791, "grad_norm": 8.330979347229004, "learning_rate": 3.213169561741406e-05, "loss": 0.2693, "step": 8850 }, { "epoch": 0.32197107347917725, "grad_norm": 6.713444709777832, "learning_rate": 3.2168035467693873e-05, "loss": 0.1821, "step": 8860 }, { "epoch": 0.32233447198197546, "grad_norm": 1.7717983722686768, "learning_rate": 3.220437531797369e-05, "loss": 0.1572, "step": 8870 }, { "epoch": 0.3226978704847736, "grad_norm": 3.8419570922851562, "learning_rate": 3.224071516825351e-05, "loss": 0.2168, "step": 8880 }, { "epoch": 0.32306126898757176, "grad_norm": 1.8515948057174683, "learning_rate": 3.2277055018533325e-05, "loss": 0.1474, "step": 8890 }, { "epoch": 0.32342466749036997, "grad_norm": 12.963587760925293, "learning_rate": 3.231339486881314e-05, "loss": 0.2349, "step": 8900 }, { "epoch": 0.3237880659931681, "grad_norm": 1.078845500946045, "learning_rate": 3.2349734719092963e-05, "loss": 0.1968, "step": 8910 }, { "epoch": 0.32415146449596627, "grad_norm": 1.5369044542312622, "learning_rate": 3.2386074569372776e-05, "loss": 0.1681, "step": 8920 }, { "epoch": 0.3245148629987644, "grad_norm": 3.8013484477996826, "learning_rate": 3.242241441965259e-05, "loss": 0.2214, "step": 8930 }, { "epoch": 0.3248782615015626, "grad_norm": 2.0259406566619873, "learning_rate": 3.2458754269932415e-05, "loss": 0.4227, "step": 8940 }, { "epoch": 0.3252416600043608, "grad_norm": 6.423609256744385, "learning_rate": 3.249509412021223e-05, "loss": 0.2835, "step": 8950 }, { "epoch": 0.32560505850715893, "grad_norm": 2.363159656524658, "learning_rate": 3.253143397049204e-05, "loss": 0.2038, "step": 8960 }, { "epoch": 0.32596845700995714, "grad_norm": 2.4034435749053955, "learning_rate": 3.256777382077186e-05, "loss": 0.1907, "step": 8970 }, { "epoch": 0.3263318555127553, "grad_norm": 4.032980442047119, "learning_rate": 3.260411367105168e-05, "loss": 0.1973, "step": 8980 }, { "epoch": 0.32669525401555344, "grad_norm": 6.102022647857666, "learning_rate": 3.264045352133149e-05, "loss": 0.197, "step": 8990 }, { "epoch": 0.32705865251835164, "grad_norm": 35.67893981933594, "learning_rate": 3.267679337161131e-05, "loss": 0.2682, "step": 9000 }, { "epoch": 0.32705865251835164, "eval_loss": 0.40712428092956543, "eval_runtime": 179.2194, "eval_samples_per_second": 41.368, "eval_steps_per_second": 5.172, "eval_wer": 0.226941020567466, "step": 9000 }, { "epoch": 0.3274220510211498, "grad_norm": 1.8014717102050781, "learning_rate": 3.271313322189113e-05, "loss": 0.1591, "step": 9010 }, { "epoch": 0.32778544952394795, "grad_norm": 1.7404965162277222, "learning_rate": 3.274947307217094e-05, "loss": 0.17, "step": 9020 }, { "epoch": 0.32814884802674615, "grad_norm": 3.7020771503448486, "learning_rate": 3.278581292245076e-05, "loss": 0.2225, "step": 9030 }, { "epoch": 0.3285122465295443, "grad_norm": 1.045998454093933, "learning_rate": 3.282215277273058e-05, "loss": 0.1681, "step": 9040 }, { "epoch": 0.32887564503234246, "grad_norm": 5.282716751098633, "learning_rate": 3.2858492623010394e-05, "loss": 0.2856, "step": 9050 }, { "epoch": 0.3292390435351406, "grad_norm": 3.3956387042999268, "learning_rate": 3.289483247329021e-05, "loss": 0.1782, "step": 9060 }, { "epoch": 0.3296024420379388, "grad_norm": 1.855603575706482, "learning_rate": 3.293117232357003e-05, "loss": 0.1582, "step": 9070 }, { "epoch": 0.32996584054073697, "grad_norm": 7.214013576507568, "learning_rate": 3.2967512173849845e-05, "loss": 0.1691, "step": 9080 }, { "epoch": 0.3303292390435351, "grad_norm": 3.140125036239624, "learning_rate": 3.3003852024129664e-05, "loss": 0.1872, "step": 9090 }, { "epoch": 0.3306926375463333, "grad_norm": 17.094255447387695, "learning_rate": 3.304019187440948e-05, "loss": 0.2848, "step": 9100 }, { "epoch": 0.3310560360491315, "grad_norm": 1.9439010620117188, "learning_rate": 3.3076531724689296e-05, "loss": 0.1625, "step": 9110 }, { "epoch": 0.3314194345519296, "grad_norm": 1.609747290611267, "learning_rate": 3.311287157496911e-05, "loss": 0.1915, "step": 9120 }, { "epoch": 0.33178283305472783, "grad_norm": 4.03629207611084, "learning_rate": 3.314921142524893e-05, "loss": 0.2291, "step": 9130 }, { "epoch": 0.332146231557526, "grad_norm": 1.9643129110336304, "learning_rate": 3.318555127552875e-05, "loss": 0.1747, "step": 9140 }, { "epoch": 0.33250963006032414, "grad_norm": 9.304847717285156, "learning_rate": 3.322189112580856e-05, "loss": 0.2539, "step": 9150 }, { "epoch": 0.33287302856312234, "grad_norm": 1.991467833518982, "learning_rate": 3.325823097608838e-05, "loss": 3.61, "step": 9160 }, { "epoch": 0.3332364270659205, "grad_norm": 2.7127187252044678, "learning_rate": 3.32945708263682e-05, "loss": 0.1985, "step": 9170 }, { "epoch": 0.33359982556871864, "grad_norm": 2.831299304962158, "learning_rate": 3.333091067664801e-05, "loss": 1.7334, "step": 9180 }, { "epoch": 0.33396322407151685, "grad_norm": 1.5434614419937134, "learning_rate": 3.336725052692783e-05, "loss": 0.1718, "step": 9190 }, { "epoch": 0.334326622574315, "grad_norm": 10.254124641418457, "learning_rate": 3.340359037720765e-05, "loss": 0.3246, "step": 9200 }, { "epoch": 0.33469002107711315, "grad_norm": 1.169886589050293, "learning_rate": 3.343993022748746e-05, "loss": 0.1936, "step": 9210 }, { "epoch": 0.3350534195799113, "grad_norm": 3.697627544403076, "learning_rate": 3.347627007776728e-05, "loss": 0.205, "step": 9220 }, { "epoch": 0.3354168180827095, "grad_norm": 3.15781307220459, "learning_rate": 3.35126099280471e-05, "loss": 0.2222, "step": 9230 }, { "epoch": 0.33578021658550766, "grad_norm": 1.903701663017273, "learning_rate": 3.3548949778326914e-05, "loss": 0.1611, "step": 9240 }, { "epoch": 0.3361436150883058, "grad_norm": 26.77275848388672, "learning_rate": 3.358528962860673e-05, "loss": 0.2872, "step": 9250 }, { "epoch": 0.336507013591104, "grad_norm": 1.588224172592163, "learning_rate": 3.3621629478886546e-05, "loss": 3.404, "step": 9260 }, { "epoch": 0.33687041209390217, "grad_norm": 1.8802090883255005, "learning_rate": 3.3657969329166365e-05, "loss": 0.1715, "step": 9270 }, { "epoch": 0.3372338105967003, "grad_norm": 5.38352632522583, "learning_rate": 3.3694309179446185e-05, "loss": 0.1906, "step": 9280 }, { "epoch": 0.33759720909949853, "grad_norm": 1.736177921295166, "learning_rate": 3.3730649029726e-05, "loss": 0.1881, "step": 9290 }, { "epoch": 0.3379606076022967, "grad_norm": 17.865558624267578, "learning_rate": 3.3766988880005816e-05, "loss": 0.3003, "step": 9300 }, { "epoch": 0.33832400610509483, "grad_norm": 1.532173991203308, "learning_rate": 3.3803328730285636e-05, "loss": 0.188, "step": 9310 }, { "epoch": 0.33868740460789304, "grad_norm": 3.8595352172851562, "learning_rate": 3.383966858056545e-05, "loss": 0.1869, "step": 9320 }, { "epoch": 0.3390508031106912, "grad_norm": 2.5906641483306885, "learning_rate": 3.387600843084527e-05, "loss": 0.1993, "step": 9330 }, { "epoch": 0.33941420161348934, "grad_norm": 2.5224273204803467, "learning_rate": 3.391234828112508e-05, "loss": 0.1935, "step": 9340 }, { "epoch": 0.33977760011628755, "grad_norm": 11.555095672607422, "learning_rate": 3.39486881314049e-05, "loss": 0.2891, "step": 9350 }, { "epoch": 0.3401409986190857, "grad_norm": 1.3724703788757324, "learning_rate": 3.398502798168472e-05, "loss": 0.1656, "step": 9360 }, { "epoch": 0.34050439712188385, "grad_norm": 2.1549072265625, "learning_rate": 3.402136783196453e-05, "loss": 0.1769, "step": 9370 }, { "epoch": 0.340867795624682, "grad_norm": 1.793492317199707, "learning_rate": 3.405770768224435e-05, "loss": 0.2661, "step": 9380 }, { "epoch": 0.3412311941274802, "grad_norm": 4.038620948791504, "learning_rate": 3.409404753252417e-05, "loss": 0.1871, "step": 9390 }, { "epoch": 0.34159459263027836, "grad_norm": 31.7847900390625, "learning_rate": 3.413038738280398e-05, "loss": 0.2967, "step": 9400 }, { "epoch": 0.3419579911330765, "grad_norm": 2.398646354675293, "learning_rate": 3.41667272330838e-05, "loss": 0.2086, "step": 9410 }, { "epoch": 0.3423213896358747, "grad_norm": 2.2226221561431885, "learning_rate": 3.4203067083363615e-05, "loss": 0.1665, "step": 9420 }, { "epoch": 0.34268478813867287, "grad_norm": 39.96380615234375, "learning_rate": 3.4239406933643434e-05, "loss": 0.9468, "step": 9430 }, { "epoch": 0.343048186641471, "grad_norm": 1.5465339422225952, "learning_rate": 3.4275746783923254e-05, "loss": 0.1827, "step": 9440 }, { "epoch": 0.3434115851442692, "grad_norm": 7.941345691680908, "learning_rate": 3.4312086634203066e-05, "loss": 0.2786, "step": 9450 }, { "epoch": 0.3437749836470674, "grad_norm": 1.2575476169586182, "learning_rate": 3.4348426484482885e-05, "loss": 0.1764, "step": 9460 }, { "epoch": 0.34413838214986553, "grad_norm": 1.3529596328735352, "learning_rate": 3.4384766334762705e-05, "loss": 0.207, "step": 9470 }, { "epoch": 0.34450178065266374, "grad_norm": 3.2839174270629883, "learning_rate": 3.442110618504252e-05, "loss": 0.2672, "step": 9480 }, { "epoch": 0.3448651791554619, "grad_norm": 3.246384859085083, "learning_rate": 3.445744603532234e-05, "loss": 0.1906, "step": 9490 }, { "epoch": 0.34522857765826004, "grad_norm": 2.595038652420044, "learning_rate": 3.4493785885602156e-05, "loss": 0.2441, "step": 9500 }, { "epoch": 0.3455919761610582, "grad_norm": 1.3803220987319946, "learning_rate": 3.453012573588197e-05, "loss": 0.1745, "step": 9510 }, { "epoch": 0.3459553746638564, "grad_norm": 1.2091724872589111, "learning_rate": 3.456646558616179e-05, "loss": 0.1441, "step": 9520 }, { "epoch": 0.34631877316665455, "grad_norm": 6.582603931427002, "learning_rate": 3.460280543644161e-05, "loss": 0.1835, "step": 9530 }, { "epoch": 0.3466821716694527, "grad_norm": 2.6845383644104004, "learning_rate": 3.463914528672142e-05, "loss": 0.2048, "step": 9540 }, { "epoch": 0.3470455701722509, "grad_norm": 11.775678634643555, "learning_rate": 3.467548513700123e-05, "loss": 0.2841, "step": 9550 }, { "epoch": 0.34740896867504906, "grad_norm": 2.256279706954956, "learning_rate": 3.471182498728106e-05, "loss": 0.6472, "step": 9560 }, { "epoch": 0.3477723671778472, "grad_norm": 1.4487576484680176, "learning_rate": 3.474816483756087e-05, "loss": 0.2722, "step": 9570 }, { "epoch": 0.3481357656806454, "grad_norm": 3.843964099884033, "learning_rate": 3.4784504687840684e-05, "loss": 0.1855, "step": 9580 }, { "epoch": 0.34849916418344357, "grad_norm": 1.5561772584915161, "learning_rate": 3.48208445381205e-05, "loss": 0.1908, "step": 9590 }, { "epoch": 0.3488625626862417, "grad_norm": 3.757232666015625, "learning_rate": 3.485718438840032e-05, "loss": 0.2198, "step": 9600 }, { "epoch": 0.3488625626862417, "eval_loss": 0.3895765244960785, "eval_runtime": 179.7435, "eval_samples_per_second": 41.248, "eval_steps_per_second": 5.157, "eval_wer": 0.21512335033674007, "step": 9600 }, { "epoch": 0.3492259611890399, "grad_norm": 1.3912307024002075, "learning_rate": 3.4893524238680135e-05, "loss": 0.1616, "step": 9610 }, { "epoch": 0.3495893596918381, "grad_norm": 2.4036080837249756, "learning_rate": 3.4929864088959954e-05, "loss": 0.1579, "step": 9620 }, { "epoch": 0.3499527581946362, "grad_norm": 2.611175537109375, "learning_rate": 3.4966203939239774e-05, "loss": 0.1746, "step": 9630 }, { "epoch": 0.35031615669743443, "grad_norm": 1.4045140743255615, "learning_rate": 3.5002543789519586e-05, "loss": 0.1594, "step": 9640 }, { "epoch": 0.3506795552002326, "grad_norm": 12.708057403564453, "learning_rate": 3.5038883639799406e-05, "loss": 0.3118, "step": 9650 }, { "epoch": 0.35104295370303074, "grad_norm": 3.0364696979522705, "learning_rate": 3.5075223490079225e-05, "loss": 0.3062, "step": 9660 }, { "epoch": 0.3514063522058289, "grad_norm": 1.4527848958969116, "learning_rate": 3.511156334035904e-05, "loss": 0.1603, "step": 9670 }, { "epoch": 0.3517697507086271, "grad_norm": 5.697939395904541, "learning_rate": 3.514790319063886e-05, "loss": 0.2069, "step": 9680 }, { "epoch": 0.35213314921142524, "grad_norm": 2.1645712852478027, "learning_rate": 3.5184243040918676e-05, "loss": 0.162, "step": 9690 }, { "epoch": 0.3524965477142234, "grad_norm": 8.024601936340332, "learning_rate": 3.522058289119849e-05, "loss": 0.898, "step": 9700 }, { "epoch": 0.3528599462170216, "grad_norm": 1.4516103267669678, "learning_rate": 3.52569227414783e-05, "loss": 0.189, "step": 9710 }, { "epoch": 0.35322334471981975, "grad_norm": 1.0467925071716309, "learning_rate": 3.529326259175813e-05, "loss": 0.1547, "step": 9720 }, { "epoch": 0.3535867432226179, "grad_norm": 3.9237303733825684, "learning_rate": 3.532960244203794e-05, "loss": 0.1968, "step": 9730 }, { "epoch": 0.3539501417254161, "grad_norm": 2.502257823944092, "learning_rate": 3.536594229231775e-05, "loss": 0.1645, "step": 9740 }, { "epoch": 0.35431354022821426, "grad_norm": 30.662227630615234, "learning_rate": 3.540228214259758e-05, "loss": 0.2847, "step": 9750 }, { "epoch": 0.3546769387310124, "grad_norm": 1.7106624841690063, "learning_rate": 3.543862199287739e-05, "loss": 0.1951, "step": 9760 }, { "epoch": 0.3550403372338106, "grad_norm": 2.169036865234375, "learning_rate": 3.5474961843157204e-05, "loss": 0.172, "step": 9770 }, { "epoch": 0.3554037357366088, "grad_norm": 6.116454124450684, "learning_rate": 3.551130169343703e-05, "loss": 0.1934, "step": 9780 }, { "epoch": 0.3557671342394069, "grad_norm": 1.8530545234680176, "learning_rate": 3.554764154371684e-05, "loss": 0.217, "step": 9790 }, { "epoch": 0.3561305327422051, "grad_norm": 11.060449600219727, "learning_rate": 3.5583981393996655e-05, "loss": 0.2145, "step": 9800 }, { "epoch": 0.3564939312450033, "grad_norm": 7.748067378997803, "learning_rate": 3.5620321244276475e-05, "loss": 0.2114, "step": 9810 }, { "epoch": 0.35685732974780143, "grad_norm": 3.562528610229492, "learning_rate": 3.5656661094556294e-05, "loss": 0.221, "step": 9820 }, { "epoch": 0.3572207282505996, "grad_norm": 2.798417091369629, "learning_rate": 3.5693000944836107e-05, "loss": 0.2071, "step": 9830 }, { "epoch": 0.3575841267533978, "grad_norm": 2.3908724784851074, "learning_rate": 3.5729340795115926e-05, "loss": 0.1678, "step": 9840 }, { "epoch": 0.35794752525619594, "grad_norm": 7.205004692077637, "learning_rate": 3.5765680645395745e-05, "loss": 0.2953, "step": 9850 }, { "epoch": 0.3583109237589941, "grad_norm": 2.5064749717712402, "learning_rate": 3.580202049567556e-05, "loss": 0.197, "step": 9860 }, { "epoch": 0.3586743222617923, "grad_norm": 2.0985934734344482, "learning_rate": 3.583836034595538e-05, "loss": 0.1441, "step": 9870 }, { "epoch": 0.35903772076459045, "grad_norm": 5.256442070007324, "learning_rate": 3.5874700196235197e-05, "loss": 0.203, "step": 9880 }, { "epoch": 0.3594011192673886, "grad_norm": 2.3590219020843506, "learning_rate": 3.591104004651501e-05, "loss": 0.1811, "step": 9890 }, { "epoch": 0.3597645177701868, "grad_norm": 24.96747398376465, "learning_rate": 3.594737989679482e-05, "loss": 0.293, "step": 9900 }, { "epoch": 0.36012791627298496, "grad_norm": 1.727751612663269, "learning_rate": 3.598371974707465e-05, "loss": 0.1896, "step": 9910 }, { "epoch": 0.3604913147757831, "grad_norm": 2.349269151687622, "learning_rate": 3.602005959735446e-05, "loss": 0.1649, "step": 9920 }, { "epoch": 0.3608547132785813, "grad_norm": 3.139385223388672, "learning_rate": 3.605639944763427e-05, "loss": 0.2181, "step": 9930 }, { "epoch": 0.36121811178137947, "grad_norm": 2.1249756813049316, "learning_rate": 3.60927392979141e-05, "loss": 0.1751, "step": 9940 }, { "epoch": 0.3615815102841776, "grad_norm": 3.6616756916046143, "learning_rate": 3.612907914819391e-05, "loss": 0.2729, "step": 9950 }, { "epoch": 0.36194490878697577, "grad_norm": 1.367600440979004, "learning_rate": 3.6165418998473724e-05, "loss": 0.1592, "step": 9960 }, { "epoch": 0.362308307289774, "grad_norm": 1.8141239881515503, "learning_rate": 3.620175884875355e-05, "loss": 0.2867, "step": 9970 }, { "epoch": 0.36267170579257213, "grad_norm": 7.0058794021606445, "learning_rate": 3.623809869903336e-05, "loss": 0.207, "step": 9980 }, { "epoch": 0.3630351042953703, "grad_norm": 1.923048734664917, "learning_rate": 3.6274438549313176e-05, "loss": 0.346, "step": 9990 }, { "epoch": 0.3633985027981685, "grad_norm": 16.30779457092285, "learning_rate": 3.6310778399592995e-05, "loss": 0.3107, "step": 10000 }, { "epoch": 0.36376190130096664, "grad_norm": 1.979866862297058, "learning_rate": 3.6347118249872814e-05, "loss": 0.7999, "step": 10010 }, { "epoch": 0.3641252998037648, "grad_norm": 2.7377023696899414, "learning_rate": 3.638345810015263e-05, "loss": 0.2005, "step": 10020 }, { "epoch": 0.364488698306563, "grad_norm": 5.546159744262695, "learning_rate": 3.6419797950432446e-05, "loss": 0.1964, "step": 10030 }, { "epoch": 0.36485209680936115, "grad_norm": 2.2417142391204834, "learning_rate": 3.6456137800712265e-05, "loss": 0.2078, "step": 10040 }, { "epoch": 0.3652154953121593, "grad_norm": 7.2175092697143555, "learning_rate": 3.649247765099208e-05, "loss": 0.291, "step": 10050 }, { "epoch": 0.3655788938149575, "grad_norm": 2.6172754764556885, "learning_rate": 3.65288175012719e-05, "loss": 0.2037, "step": 10060 }, { "epoch": 0.36594229231775566, "grad_norm": 2.0634214878082275, "learning_rate": 3.656515735155172e-05, "loss": 0.1668, "step": 10070 }, { "epoch": 0.3663056908205538, "grad_norm": 3.5431976318359375, "learning_rate": 3.660149720183153e-05, "loss": 0.475, "step": 10080 }, { "epoch": 0.366669089323352, "grad_norm": 2.147472381591797, "learning_rate": 3.663783705211135e-05, "loss": 0.1869, "step": 10090 }, { "epoch": 0.36703248782615017, "grad_norm": 18.726482391357422, "learning_rate": 3.667417690239117e-05, "loss": 0.2773, "step": 10100 }, { "epoch": 0.3673958863289483, "grad_norm": 1.6554090976715088, "learning_rate": 3.671051675267098e-05, "loss": 0.1707, "step": 10110 }, { "epoch": 0.36775928483174647, "grad_norm": 1.8967760801315308, "learning_rate": 3.674685660295079e-05, "loss": 0.2159, "step": 10120 }, { "epoch": 0.3681226833345447, "grad_norm": 2.3765788078308105, "learning_rate": 3.678319645323061e-05, "loss": 0.2229, "step": 10130 }, { "epoch": 0.3684860818373428, "grad_norm": 5.890452861785889, "learning_rate": 3.681953630351043e-05, "loss": 0.195, "step": 10140 }, { "epoch": 0.368849480340141, "grad_norm": 5.045167446136475, "learning_rate": 3.6855876153790244e-05, "loss": 0.3111, "step": 10150 }, { "epoch": 0.3692128788429392, "grad_norm": 2.37107253074646, "learning_rate": 3.6892216004070064e-05, "loss": 0.1942, "step": 10160 }, { "epoch": 0.36957627734573734, "grad_norm": 1.9943170547485352, "learning_rate": 3.692855585434988e-05, "loss": 0.1906, "step": 10170 }, { "epoch": 0.3699396758485355, "grad_norm": 3.16873836517334, "learning_rate": 3.6964895704629696e-05, "loss": 0.1791, "step": 10180 }, { "epoch": 0.3703030743513337, "grad_norm": 15.252134323120117, "learning_rate": 3.7001235554909515e-05, "loss": 0.3702, "step": 10190 }, { "epoch": 0.37066647285413185, "grad_norm": 8.845834732055664, "learning_rate": 3.7037575405189334e-05, "loss": 0.2765, "step": 10200 }, { "epoch": 0.37066647285413185, "eval_loss": 0.4178149104118347, "eval_runtime": 179.6523, "eval_samples_per_second": 41.269, "eval_steps_per_second": 5.16, "eval_wer": 0.2237551509430537, "step": 10200 }, { "epoch": 0.37102987135693, "grad_norm": 6.2689313888549805, "learning_rate": 3.707391525546915e-05, "loss": 0.1922, "step": 10210 }, { "epoch": 0.3713932698597282, "grad_norm": 1.00067138671875, "learning_rate": 3.7110255105748966e-05, "loss": 0.1535, "step": 10220 }, { "epoch": 0.37175666836252635, "grad_norm": 2.6602060794830322, "learning_rate": 3.7146594956028786e-05, "loss": 0.1959, "step": 10230 }, { "epoch": 0.3721200668653245, "grad_norm": 4.743015766143799, "learning_rate": 3.71829348063086e-05, "loss": 0.2058, "step": 10240 }, { "epoch": 0.37248346536812266, "grad_norm": 8.304347038269043, "learning_rate": 3.721927465658842e-05, "loss": 0.3027, "step": 10250 }, { "epoch": 0.37284686387092086, "grad_norm": 1.8180521726608276, "learning_rate": 3.725561450686824e-05, "loss": 0.1708, "step": 10260 }, { "epoch": 0.373210262373719, "grad_norm": 2.05625057220459, "learning_rate": 3.729195435714805e-05, "loss": 0.1824, "step": 10270 }, { "epoch": 0.37357366087651717, "grad_norm": 2.426814317703247, "learning_rate": 3.732829420742787e-05, "loss": 0.197, "step": 10280 }, { "epoch": 0.3739370593793154, "grad_norm": 1.658158540725708, "learning_rate": 3.736463405770768e-05, "loss": 0.1578, "step": 10290 }, { "epoch": 0.3743004578821135, "grad_norm": 10.913407325744629, "learning_rate": 3.74009739079875e-05, "loss": 0.2728, "step": 10300 }, { "epoch": 0.3746638563849117, "grad_norm": 1.6443781852722168, "learning_rate": 3.743731375826732e-05, "loss": 0.1656, "step": 10310 }, { "epoch": 0.3750272548877099, "grad_norm": 1.0702744722366333, "learning_rate": 3.747365360854713e-05, "loss": 0.7132, "step": 10320 }, { "epoch": 0.37539065339050803, "grad_norm": 5.8824052810668945, "learning_rate": 3.750999345882695e-05, "loss": 0.2701, "step": 10330 }, { "epoch": 0.3757540518933062, "grad_norm": 4.373916149139404, "learning_rate": 3.754633330910677e-05, "loss": 0.2053, "step": 10340 }, { "epoch": 0.3761174503961044, "grad_norm": 22.25397300720215, "learning_rate": 3.7582673159386584e-05, "loss": 0.2781, "step": 10350 }, { "epoch": 0.37648084889890254, "grad_norm": 1.8272254467010498, "learning_rate": 3.7619013009666403e-05, "loss": 0.1833, "step": 10360 }, { "epoch": 0.3768442474017007, "grad_norm": 3.286931037902832, "learning_rate": 3.7655352859946216e-05, "loss": 0.1576, "step": 10370 }, { "epoch": 0.3772076459044989, "grad_norm": 5.283690929412842, "learning_rate": 3.7691692710226035e-05, "loss": 0.21, "step": 10380 }, { "epoch": 0.37757104440729705, "grad_norm": 1.184476375579834, "learning_rate": 3.7728032560505855e-05, "loss": 0.2597, "step": 10390 }, { "epoch": 0.3779344429100952, "grad_norm": 5.685116767883301, "learning_rate": 3.776437241078567e-05, "loss": 0.2476, "step": 10400 }, { "epoch": 0.37829784141289335, "grad_norm": 1.1873399019241333, "learning_rate": 3.7800712261065487e-05, "loss": 0.1597, "step": 10410 }, { "epoch": 0.37866123991569156, "grad_norm": 1.6136255264282227, "learning_rate": 3.7837052111345306e-05, "loss": 0.188, "step": 10420 }, { "epoch": 0.3790246384184897, "grad_norm": 4.743179798126221, "learning_rate": 3.787339196162512e-05, "loss": 0.1962, "step": 10430 }, { "epoch": 0.37938803692128786, "grad_norm": 2.603379011154175, "learning_rate": 3.790973181190494e-05, "loss": 0.1854, "step": 10440 }, { "epoch": 0.37975143542408607, "grad_norm": 6.267378807067871, "learning_rate": 3.794607166218475e-05, "loss": 0.2569, "step": 10450 }, { "epoch": 0.3801148339268842, "grad_norm": 5.370235919952393, "learning_rate": 3.798241151246457e-05, "loss": 0.1796, "step": 10460 }, { "epoch": 0.3804782324296824, "grad_norm": 2.170964002609253, "learning_rate": 3.801875136274439e-05, "loss": 0.1713, "step": 10470 }, { "epoch": 0.3808416309324806, "grad_norm": 4.134753704071045, "learning_rate": 3.80550912130242e-05, "loss": 0.2269, "step": 10480 }, { "epoch": 0.38120502943527873, "grad_norm": 2.7026259899139404, "learning_rate": 3.809143106330402e-05, "loss": 0.1938, "step": 10490 }, { "epoch": 0.3815684279380769, "grad_norm": 7.368224143981934, "learning_rate": 3.812777091358384e-05, "loss": 0.2617, "step": 10500 }, { "epoch": 0.3819318264408751, "grad_norm": 1.3194938898086548, "learning_rate": 3.816411076386365e-05, "loss": 0.2066, "step": 10510 }, { "epoch": 0.38229522494367324, "grad_norm": 1.901505470275879, "learning_rate": 3.820045061414347e-05, "loss": 0.1716, "step": 10520 }, { "epoch": 0.3826586234464714, "grad_norm": 3.4045536518096924, "learning_rate": 3.823679046442329e-05, "loss": 0.1625, "step": 10530 }, { "epoch": 0.38302202194926954, "grad_norm": 2.1540184020996094, "learning_rate": 3.8273130314703104e-05, "loss": 0.1829, "step": 10540 }, { "epoch": 0.38338542045206775, "grad_norm": 14.377511024475098, "learning_rate": 3.8309470164982924e-05, "loss": 0.2747, "step": 10550 }, { "epoch": 0.3837488189548659, "grad_norm": 1.9092762470245361, "learning_rate": 3.834581001526274e-05, "loss": 0.1728, "step": 10560 }, { "epoch": 0.38411221745766405, "grad_norm": 1.867458462715149, "learning_rate": 3.8382149865542556e-05, "loss": 0.1752, "step": 10570 }, { "epoch": 0.38447561596046226, "grad_norm": 5.246692657470703, "learning_rate": 3.841848971582237e-05, "loss": 0.1823, "step": 10580 }, { "epoch": 0.3848390144632604, "grad_norm": 2.9294533729553223, "learning_rate": 3.845482956610219e-05, "loss": 0.2052, "step": 10590 }, { "epoch": 0.38520241296605856, "grad_norm": 11.946113586425781, "learning_rate": 3.849116941638201e-05, "loss": 0.309, "step": 10600 }, { "epoch": 0.38556581146885677, "grad_norm": 1.7155182361602783, "learning_rate": 3.852750926666182e-05, "loss": 0.175, "step": 10610 }, { "epoch": 0.3859292099716549, "grad_norm": 1.1520076990127563, "learning_rate": 3.856384911694164e-05, "loss": 0.2129, "step": 10620 }, { "epoch": 0.38629260847445307, "grad_norm": 1.9750351905822754, "learning_rate": 3.860018896722146e-05, "loss": 0.1725, "step": 10630 }, { "epoch": 0.3866560069772513, "grad_norm": 4.309560298919678, "learning_rate": 3.863652881750127e-05, "loss": 0.1516, "step": 10640 }, { "epoch": 0.3870194054800494, "grad_norm": 7.554156303405762, "learning_rate": 3.867286866778109e-05, "loss": 0.3069, "step": 10650 }, { "epoch": 0.3873828039828476, "grad_norm": 3.7965683937072754, "learning_rate": 3.870920851806091e-05, "loss": 0.2014, "step": 10660 }, { "epoch": 0.3877462024856458, "grad_norm": 3.8691935539245605, "learning_rate": 3.874554836834072e-05, "loss": 0.1678, "step": 10670 }, { "epoch": 0.38810960098844394, "grad_norm": 4.144315719604492, "learning_rate": 3.878188821862054e-05, "loss": 0.2936, "step": 10680 }, { "epoch": 0.3884729994912421, "grad_norm": 1.5667825937271118, "learning_rate": 3.881822806890036e-05, "loss": 0.1871, "step": 10690 }, { "epoch": 0.38883639799404024, "grad_norm": 7.6076788902282715, "learning_rate": 3.885456791918017e-05, "loss": 0.2661, "step": 10700 }, { "epoch": 0.38919979649683845, "grad_norm": 1.7828059196472168, "learning_rate": 3.889090776945999e-05, "loss": 0.1808, "step": 10710 }, { "epoch": 0.3895631949996366, "grad_norm": 7.039370059967041, "learning_rate": 3.892724761973981e-05, "loss": 0.2484, "step": 10720 }, { "epoch": 0.38992659350243475, "grad_norm": 2.1001148223876953, "learning_rate": 3.8963587470019625e-05, "loss": 0.1644, "step": 10730 }, { "epoch": 0.39028999200523296, "grad_norm": 0.9235002398490906, "learning_rate": 3.899992732029944e-05, "loss": 0.172, "step": 10740 }, { "epoch": 0.3906533905080311, "grad_norm": 10.066643714904785, "learning_rate": 3.903626717057926e-05, "loss": 0.2999, "step": 10750 }, { "epoch": 0.39101678901082926, "grad_norm": 2.256965160369873, "learning_rate": 3.9072607020859076e-05, "loss": 0.2116, "step": 10760 }, { "epoch": 0.39138018751362746, "grad_norm": 1.742125153541565, "learning_rate": 3.910894687113889e-05, "loss": 0.1838, "step": 10770 }, { "epoch": 0.3917435860164256, "grad_norm": 5.397392749786377, "learning_rate": 3.9145286721418714e-05, "loss": 0.2213, "step": 10780 }, { "epoch": 0.39210698451922377, "grad_norm": 2.439197540283203, "learning_rate": 3.918162657169853e-05, "loss": 0.1984, "step": 10790 }, { "epoch": 0.392470383022022, "grad_norm": 6.7387895584106445, "learning_rate": 3.921796642197834e-05, "loss": 0.2842, "step": 10800 }, { "epoch": 0.392470383022022, "eval_loss": 0.39516785740852356, "eval_runtime": 180.1522, "eval_samples_per_second": 41.154, "eval_steps_per_second": 5.146, "eval_wer": 0.21758309583023216, "step": 10800 }, { "epoch": 0.3928337815248201, "grad_norm": 1.5229130983352661, "learning_rate": 3.925430627225816e-05, "loss": 0.1809, "step": 10810 }, { "epoch": 0.3931971800276183, "grad_norm": 1.6385318040847778, "learning_rate": 3.929064612253798e-05, "loss": 0.155, "step": 10820 }, { "epoch": 0.3935605785304164, "grad_norm": 2.403878927230835, "learning_rate": 3.932698597281779e-05, "loss": 0.2837, "step": 10830 }, { "epoch": 0.39392397703321463, "grad_norm": 2.818368434906006, "learning_rate": 3.936332582309761e-05, "loss": 0.2298, "step": 10840 }, { "epoch": 0.3942873755360128, "grad_norm": 6.08942174911499, "learning_rate": 3.939966567337743e-05, "loss": 0.2262, "step": 10850 }, { "epoch": 0.39465077403881094, "grad_norm": 1.2632570266723633, "learning_rate": 3.943600552365724e-05, "loss": 0.2087, "step": 10860 }, { "epoch": 0.39501417254160914, "grad_norm": 2.2119662761688232, "learning_rate": 3.947234537393706e-05, "loss": 0.1974, "step": 10870 }, { "epoch": 0.3953775710444073, "grad_norm": 2.936021089553833, "learning_rate": 3.950868522421688e-05, "loss": 0.1909, "step": 10880 }, { "epoch": 0.39574096954720545, "grad_norm": 1.3898749351501465, "learning_rate": 3.9545025074496693e-05, "loss": 0.184, "step": 10890 }, { "epoch": 0.39610436805000365, "grad_norm": 9.063791275024414, "learning_rate": 3.958136492477651e-05, "loss": 0.338, "step": 10900 }, { "epoch": 0.3964677665528018, "grad_norm": 1.3791584968566895, "learning_rate": 3.961770477505633e-05, "loss": 0.2256, "step": 10910 }, { "epoch": 0.39683116505559995, "grad_norm": 0.9377845525741577, "learning_rate": 3.9654044625336145e-05, "loss": 0.9822, "step": 10920 }, { "epoch": 0.39719456355839816, "grad_norm": 3.9755465984344482, "learning_rate": 3.969038447561596e-05, "loss": 0.2257, "step": 10930 }, { "epoch": 0.3975579620611963, "grad_norm": 1.559699535369873, "learning_rate": 3.9726724325895783e-05, "loss": 0.2116, "step": 10940 }, { "epoch": 0.39792136056399446, "grad_norm": 7.545668601989746, "learning_rate": 3.9763064176175596e-05, "loss": 0.2515, "step": 10950 }, { "epoch": 0.39828475906679267, "grad_norm": 1.980197548866272, "learning_rate": 3.979940402645541e-05, "loss": 0.1721, "step": 10960 }, { "epoch": 0.3986481575695908, "grad_norm": 2.5450973510742188, "learning_rate": 3.9835743876735235e-05, "loss": 1.7152, "step": 10970 }, { "epoch": 0.399011556072389, "grad_norm": 3.518233060836792, "learning_rate": 3.987208372701505e-05, "loss": 0.2521, "step": 10980 }, { "epoch": 0.3993749545751871, "grad_norm": 2.678774356842041, "learning_rate": 3.990842357729486e-05, "loss": 0.2025, "step": 10990 }, { "epoch": 0.39973835307798533, "grad_norm": 11.46552848815918, "learning_rate": 3.9944763427574686e-05, "loss": 0.2683, "step": 11000 }, { "epoch": 0.4001017515807835, "grad_norm": 2.3148844242095947, "learning_rate": 3.99811032778545e-05, "loss": 1.5331, "step": 11010 }, { "epoch": 0.40046515008358163, "grad_norm": 1.2145686149597168, "learning_rate": 4.001744312813431e-05, "loss": 0.1931, "step": 11020 }, { "epoch": 0.40082854858637984, "grad_norm": 3.581883192062378, "learning_rate": 4.005378297841413e-05, "loss": 0.18, "step": 11030 }, { "epoch": 0.401191947089178, "grad_norm": 2.4645683765411377, "learning_rate": 4.009012282869395e-05, "loss": 0.2303, "step": 11040 }, { "epoch": 0.40155534559197614, "grad_norm": 13.845566749572754, "learning_rate": 4.012646267897376e-05, "loss": 0.2515, "step": 11050 }, { "epoch": 0.40191874409477435, "grad_norm": 1.6929864883422852, "learning_rate": 4.016280252925358e-05, "loss": 2.9232, "step": 11060 }, { "epoch": 0.4022821425975725, "grad_norm": 1.5453213453292847, "learning_rate": 4.01991423795334e-05, "loss": 0.1703, "step": 11070 }, { "epoch": 0.40264554110037065, "grad_norm": 1.5723987817764282, "learning_rate": 4.0235482229813214e-05, "loss": 0.1694, "step": 11080 }, { "epoch": 0.40300893960316886, "grad_norm": 1.4501444101333618, "learning_rate": 4.027182208009303e-05, "loss": 0.2477, "step": 11090 }, { "epoch": 0.403372338105967, "grad_norm": 20.50950813293457, "learning_rate": 4.030816193037285e-05, "loss": 0.2641, "step": 11100 }, { "epoch": 0.40373573660876516, "grad_norm": 1.9846757650375366, "learning_rate": 4.0344501780652665e-05, "loss": 0.1807, "step": 11110 }, { "epoch": 0.40409913511156337, "grad_norm": 1.3933240175247192, "learning_rate": 4.0380841630932484e-05, "loss": 0.1683, "step": 11120 }, { "epoch": 0.4044625336143615, "grad_norm": 2.370534658432007, "learning_rate": 4.0417181481212304e-05, "loss": 0.2476, "step": 11130 }, { "epoch": 0.40482593211715967, "grad_norm": 2.6382100582122803, "learning_rate": 4.0453521331492116e-05, "loss": 0.1723, "step": 11140 }, { "epoch": 0.4051893306199578, "grad_norm": 27.381826400756836, "learning_rate": 4.048986118177193e-05, "loss": 0.2058, "step": 11150 }, { "epoch": 0.40555272912275603, "grad_norm": 1.3622616529464722, "learning_rate": 4.052620103205175e-05, "loss": 0.1744, "step": 11160 }, { "epoch": 0.4059161276255542, "grad_norm": 1.4734828472137451, "learning_rate": 4.056254088233157e-05, "loss": 0.1685, "step": 11170 }, { "epoch": 0.40627952612835233, "grad_norm": 5.694312572479248, "learning_rate": 4.059888073261138e-05, "loss": 0.3549, "step": 11180 }, { "epoch": 0.40664292463115054, "grad_norm": 1.9976438283920288, "learning_rate": 4.06352205828912e-05, "loss": 0.1525, "step": 11190 }, { "epoch": 0.4070063231339487, "grad_norm": 5.735686779022217, "learning_rate": 4.067156043317102e-05, "loss": 0.2642, "step": 11200 }, { "epoch": 0.40736972163674684, "grad_norm": 5.192315101623535, "learning_rate": 4.070790028345083e-05, "loss": 0.1636, "step": 11210 }, { "epoch": 0.40773312013954505, "grad_norm": 2.6324477195739746, "learning_rate": 4.074424013373065e-05, "loss": 0.3451, "step": 11220 }, { "epoch": 0.4080965186423432, "grad_norm": 2.496997356414795, "learning_rate": 4.078057998401047e-05, "loss": 0.1792, "step": 11230 }, { "epoch": 0.40845991714514135, "grad_norm": 3.928255558013916, "learning_rate": 4.081691983429028e-05, "loss": 0.2203, "step": 11240 }, { "epoch": 0.40882331564793956, "grad_norm": 14.433273315429688, "learning_rate": 4.08532596845701e-05, "loss": 0.3283, "step": 11250 }, { "epoch": 0.4091867141507377, "grad_norm": 1.9282217025756836, "learning_rate": 4.088959953484992e-05, "loss": 0.2191, "step": 11260 }, { "epoch": 0.40955011265353586, "grad_norm": 1.8360569477081299, "learning_rate": 4.0925939385129734e-05, "loss": 0.1623, "step": 11270 }, { "epoch": 0.409913511156334, "grad_norm": 4.518060207366943, "learning_rate": 4.096227923540955e-05, "loss": 0.2036, "step": 11280 }, { "epoch": 0.4102769096591322, "grad_norm": 1.4292632341384888, "learning_rate": 4.099861908568937e-05, "loss": 0.1515, "step": 11290 }, { "epoch": 0.41064030816193037, "grad_norm": 23.795089721679688, "learning_rate": 4.1034958935969185e-05, "loss": 0.3228, "step": 11300 }, { "epoch": 0.4110037066647285, "grad_norm": 1.7721456289291382, "learning_rate": 4.1071298786249005e-05, "loss": 0.15, "step": 11310 }, { "epoch": 0.4113671051675267, "grad_norm": 3.544579029083252, "learning_rate": 4.110763863652882e-05, "loss": 0.2349, "step": 11320 }, { "epoch": 0.4117305036703249, "grad_norm": 4.25554895401001, "learning_rate": 4.1143978486808636e-05, "loss": 0.5458, "step": 11330 }, { "epoch": 0.41209390217312303, "grad_norm": 3.069894313812256, "learning_rate": 4.1180318337088456e-05, "loss": 0.2131, "step": 11340 }, { "epoch": 0.41245730067592123, "grad_norm": 5.389547348022461, "learning_rate": 4.121665818736827e-05, "loss": 0.2895, "step": 11350 }, { "epoch": 0.4128206991787194, "grad_norm": 2.308717727661133, "learning_rate": 4.125299803764809e-05, "loss": 0.209, "step": 11360 }, { "epoch": 0.41318409768151754, "grad_norm": 2.071504831314087, "learning_rate": 4.12893378879279e-05, "loss": 0.225, "step": 11370 }, { "epoch": 0.41354749618431574, "grad_norm": 10.397724151611328, "learning_rate": 4.132567773820772e-05, "loss": 0.5041, "step": 11380 }, { "epoch": 0.4139108946871139, "grad_norm": 3.3916842937469482, "learning_rate": 4.136201758848754e-05, "loss": 0.2055, "step": 11390 }, { "epoch": 0.41427429318991205, "grad_norm": 27.703519821166992, "learning_rate": 4.139835743876735e-05, "loss": 0.3002, "step": 11400 }, { "epoch": 0.41427429318991205, "eval_loss": 0.40216270089149475, "eval_runtime": 180.385, "eval_samples_per_second": 41.101, "eval_steps_per_second": 5.139, "eval_wer": 0.22132263510447112, "step": 11400 }, { "epoch": 0.41463769169271025, "grad_norm": 3.024658203125, "learning_rate": 4.143469728904717e-05, "loss": 0.168, "step": 11410 }, { "epoch": 0.4150010901955084, "grad_norm": 2.899369478225708, "learning_rate": 4.147103713932699e-05, "loss": 3.0252, "step": 11420 }, { "epoch": 0.41536448869830656, "grad_norm": 3.960700511932373, "learning_rate": 4.15073769896068e-05, "loss": 0.1972, "step": 11430 }, { "epoch": 0.4157278872011047, "grad_norm": 1.542468786239624, "learning_rate": 4.154371683988662e-05, "loss": 0.1971, "step": 11440 }, { "epoch": 0.4160912857039029, "grad_norm": 16.871423721313477, "learning_rate": 4.158005669016644e-05, "loss": 0.2768, "step": 11450 }, { "epoch": 0.41645468420670106, "grad_norm": 3.142385721206665, "learning_rate": 4.1616396540446254e-05, "loss": 0.2173, "step": 11460 }, { "epoch": 0.4168180827094992, "grad_norm": 0.9852932095527649, "learning_rate": 4.1652736390726074e-05, "loss": 0.2529, "step": 11470 }, { "epoch": 0.4171814812122974, "grad_norm": 2.4834413528442383, "learning_rate": 4.1689076241005886e-05, "loss": 0.9175, "step": 11480 }, { "epoch": 0.4175448797150956, "grad_norm": 2.7286272048950195, "learning_rate": 4.1725416091285705e-05, "loss": 0.1864, "step": 11490 }, { "epoch": 0.4179082782178937, "grad_norm": 5.711360454559326, "learning_rate": 4.1761755941565525e-05, "loss": 0.2285, "step": 11500 }, { "epoch": 0.41827167672069193, "grad_norm": 1.160866379737854, "learning_rate": 4.179809579184534e-05, "loss": 0.1959, "step": 11510 }, { "epoch": 0.4186350752234901, "grad_norm": 2.5051305294036865, "learning_rate": 4.183443564212516e-05, "loss": 0.185, "step": 11520 }, { "epoch": 0.41899847372628823, "grad_norm": 3.641874313354492, "learning_rate": 4.1870775492404976e-05, "loss": 0.199, "step": 11530 }, { "epoch": 0.41936187222908644, "grad_norm": 1.676038146018982, "learning_rate": 4.190711534268479e-05, "loss": 0.1895, "step": 11540 }, { "epoch": 0.4197252707318846, "grad_norm": 11.47658634185791, "learning_rate": 4.194345519296461e-05, "loss": 0.246, "step": 11550 }, { "epoch": 0.42008866923468274, "grad_norm": 1.7632570266723633, "learning_rate": 4.197979504324443e-05, "loss": 0.1761, "step": 11560 }, { "epoch": 0.4204520677374809, "grad_norm": 2.2994728088378906, "learning_rate": 4.201613489352424e-05, "loss": 0.1799, "step": 11570 }, { "epoch": 0.4208154662402791, "grad_norm": 3.964228391647339, "learning_rate": 4.205247474380406e-05, "loss": 0.7376, "step": 11580 }, { "epoch": 0.42117886474307725, "grad_norm": 1.866466760635376, "learning_rate": 4.208881459408387e-05, "loss": 0.2293, "step": 11590 }, { "epoch": 0.4215422632458754, "grad_norm": 4.722428798675537, "learning_rate": 4.212515444436369e-05, "loss": 0.2303, "step": 11600 }, { "epoch": 0.4219056617486736, "grad_norm": 2.8812968730926514, "learning_rate": 4.2161494294643504e-05, "loss": 0.1628, "step": 11610 }, { "epoch": 0.42226906025147176, "grad_norm": 8.05451488494873, "learning_rate": 4.219783414492332e-05, "loss": 0.1978, "step": 11620 }, { "epoch": 0.4226324587542699, "grad_norm": 3.4176700115203857, "learning_rate": 4.223417399520314e-05, "loss": 0.1986, "step": 11630 }, { "epoch": 0.4229958572570681, "grad_norm": 5.204764366149902, "learning_rate": 4.2270513845482955e-05, "loss": 0.1959, "step": 11640 }, { "epoch": 0.42335925575986627, "grad_norm": 6.184700965881348, "learning_rate": 4.2306853695762774e-05, "loss": 0.2822, "step": 11650 }, { "epoch": 0.4237226542626644, "grad_norm": 2.288935422897339, "learning_rate": 4.2343193546042594e-05, "loss": 0.2073, "step": 11660 }, { "epoch": 0.42408605276546263, "grad_norm": 3.8856844902038574, "learning_rate": 4.2379533396322406e-05, "loss": 0.2134, "step": 11670 }, { "epoch": 0.4244494512682608, "grad_norm": 4.048069953918457, "learning_rate": 4.2415873246602226e-05, "loss": 0.1922, "step": 11680 }, { "epoch": 0.42481284977105893, "grad_norm": 1.466927409172058, "learning_rate": 4.2452213096882045e-05, "loss": 0.1653, "step": 11690 }, { "epoch": 0.42517624827385714, "grad_norm": 35.94015121459961, "learning_rate": 4.248855294716186e-05, "loss": 0.2398, "step": 11700 }, { "epoch": 0.4255396467766553, "grad_norm": 2.575195789337158, "learning_rate": 4.252489279744168e-05, "loss": 0.2241, "step": 11710 }, { "epoch": 0.42590304527945344, "grad_norm": 1.4232568740844727, "learning_rate": 4.2561232647721496e-05, "loss": 0.1817, "step": 11720 }, { "epoch": 0.4262664437822516, "grad_norm": 2.8543412685394287, "learning_rate": 4.259757249800131e-05, "loss": 0.2094, "step": 11730 }, { "epoch": 0.4266298422850498, "grad_norm": 0.85033118724823, "learning_rate": 4.263391234828113e-05, "loss": 0.1578, "step": 11740 }, { "epoch": 0.42699324078784795, "grad_norm": 7.382369041442871, "learning_rate": 4.267025219856095e-05, "loss": 0.2763, "step": 11750 }, { "epoch": 0.4273566392906461, "grad_norm": 1.3994635343551636, "learning_rate": 4.270659204884076e-05, "loss": 0.199, "step": 11760 }, { "epoch": 0.4277200377934443, "grad_norm": 1.4978888034820557, "learning_rate": 4.274293189912057e-05, "loss": 0.1804, "step": 11770 }, { "epoch": 0.42808343629624246, "grad_norm": 5.206210136413574, "learning_rate": 4.27792717494004e-05, "loss": 0.2483, "step": 11780 }, { "epoch": 0.4284468347990406, "grad_norm": 1.4130820035934448, "learning_rate": 4.281561159968021e-05, "loss": 0.1792, "step": 11790 }, { "epoch": 0.4288102333018388, "grad_norm": 2.60227370262146, "learning_rate": 4.2851951449960024e-05, "loss": 0.214, "step": 11800 }, { "epoch": 0.42917363180463697, "grad_norm": 1.8874465227127075, "learning_rate": 4.288829130023985e-05, "loss": 0.1894, "step": 11810 }, { "epoch": 0.4295370303074351, "grad_norm": 2.921766519546509, "learning_rate": 4.292463115051966e-05, "loss": 0.1608, "step": 11820 }, { "epoch": 0.4299004288102333, "grad_norm": 2.812821626663208, "learning_rate": 4.2960971000799475e-05, "loss": 0.2381, "step": 11830 }, { "epoch": 0.4302638273130315, "grad_norm": 1.8063637018203735, "learning_rate": 4.2997310851079295e-05, "loss": 0.198, "step": 11840 }, { "epoch": 0.43062722581582963, "grad_norm": 16.433927536010742, "learning_rate": 4.3033650701359114e-05, "loss": 0.3015, "step": 11850 }, { "epoch": 0.4309906243186278, "grad_norm": 1.295142650604248, "learning_rate": 4.3069990551638927e-05, "loss": 0.1678, "step": 11860 }, { "epoch": 0.431354022821426, "grad_norm": 135.4871063232422, "learning_rate": 4.3106330401918746e-05, "loss": 1.8542, "step": 11870 }, { "epoch": 0.43171742132422414, "grad_norm": 2.3314764499664307, "learning_rate": 4.3142670252198565e-05, "loss": 0.1983, "step": 11880 }, { "epoch": 0.4320808198270223, "grad_norm": 1.6635117530822754, "learning_rate": 4.317901010247838e-05, "loss": 0.1737, "step": 11890 }, { "epoch": 0.4324442183298205, "grad_norm": 32.102664947509766, "learning_rate": 4.32153499527582e-05, "loss": 0.3092, "step": 11900 }, { "epoch": 0.43280761683261865, "grad_norm": 2.3491451740264893, "learning_rate": 4.3251689803038017e-05, "loss": 0.1849, "step": 11910 }, { "epoch": 0.4331710153354168, "grad_norm": 3.8088629245758057, "learning_rate": 4.328802965331783e-05, "loss": 0.2023, "step": 11920 }, { "epoch": 0.433534413838215, "grad_norm": 2.7132246494293213, "learning_rate": 4.332436950359764e-05, "loss": 0.1935, "step": 11930 }, { "epoch": 0.43389781234101316, "grad_norm": 1.2917368412017822, "learning_rate": 4.336070935387747e-05, "loss": 0.1918, "step": 11940 }, { "epoch": 0.4342612108438113, "grad_norm": 9.690601348876953, "learning_rate": 4.339704920415728e-05, "loss": 0.3059, "step": 11950 }, { "epoch": 0.4346246093466095, "grad_norm": 1.2652380466461182, "learning_rate": 4.343338905443709e-05, "loss": 0.1587, "step": 11960 }, { "epoch": 0.43498800784940767, "grad_norm": 0.9622058272361755, "learning_rate": 4.346972890471692e-05, "loss": 0.1755, "step": 11970 }, { "epoch": 0.4353514063522058, "grad_norm": 5.316989898681641, "learning_rate": 4.350606875499673e-05, "loss": 0.1794, "step": 11980 }, { "epoch": 0.435714804855004, "grad_norm": 3.428891181945801, "learning_rate": 4.3542408605276544e-05, "loss": 0.2105, "step": 11990 }, { "epoch": 0.4360782033578022, "grad_norm": 12.879768371582031, "learning_rate": 4.357874845555637e-05, "loss": 0.2904, "step": 12000 }, { "epoch": 0.4360782033578022, "eval_loss": 0.3918191194534302, "eval_runtime": 180.0676, "eval_samples_per_second": 41.173, "eval_steps_per_second": 5.148, "eval_wer": 0.22659611160527893, "step": 12000 }, { "epoch": 0.4364416018606003, "grad_norm": 2.0471973419189453, "learning_rate": 4.361508830583618e-05, "loss": 0.2544, "step": 12010 }, { "epoch": 0.4368050003633985, "grad_norm": 1.3883107900619507, "learning_rate": 4.3651428156115995e-05, "loss": 0.1957, "step": 12020 }, { "epoch": 0.4371683988661967, "grad_norm": 1.786475419998169, "learning_rate": 4.368776800639582e-05, "loss": 0.1732, "step": 12030 }, { "epoch": 0.43753179736899483, "grad_norm": 3.3099594116210938, "learning_rate": 4.3724107856675634e-05, "loss": 0.1871, "step": 12040 }, { "epoch": 0.437895195871793, "grad_norm": 9.09699535369873, "learning_rate": 4.376044770695545e-05, "loss": 0.2745, "step": 12050 }, { "epoch": 0.4382585943745912, "grad_norm": 2.0993807315826416, "learning_rate": 4.3796787557235266e-05, "loss": 0.2076, "step": 12060 }, { "epoch": 0.43862199287738934, "grad_norm": 27.799428939819336, "learning_rate": 4.3833127407515085e-05, "loss": 0.548, "step": 12070 }, { "epoch": 0.4389853913801875, "grad_norm": 3.8897557258605957, "learning_rate": 4.38694672577949e-05, "loss": 0.1799, "step": 12080 }, { "epoch": 0.4393487898829857, "grad_norm": 3.4620189666748047, "learning_rate": 4.390580710807472e-05, "loss": 0.1735, "step": 12090 }, { "epoch": 0.43971218838578385, "grad_norm": 9.587783813476562, "learning_rate": 4.394214695835454e-05, "loss": 0.3344, "step": 12100 }, { "epoch": 0.440075586888582, "grad_norm": 1.2581641674041748, "learning_rate": 4.397848680863435e-05, "loss": 0.1863, "step": 12110 }, { "epoch": 0.4404389853913802, "grad_norm": 1.3624401092529297, "learning_rate": 4.401482665891417e-05, "loss": 3.7692, "step": 12120 }, { "epoch": 0.44080238389417836, "grad_norm": 2.0099213123321533, "learning_rate": 4.405116650919399e-05, "loss": 0.1999, "step": 12130 }, { "epoch": 0.4411657823969765, "grad_norm": 2.7499871253967285, "learning_rate": 4.40875063594738e-05, "loss": 0.1854, "step": 12140 }, { "epoch": 0.4415291808997747, "grad_norm": 6.473042964935303, "learning_rate": 4.412384620975361e-05, "loss": 0.2843, "step": 12150 }, { "epoch": 0.44189257940257287, "grad_norm": 3.845900535583496, "learning_rate": 4.416018606003344e-05, "loss": 0.1747, "step": 12160 }, { "epoch": 0.442255977905371, "grad_norm": 1.4052759408950806, "learning_rate": 4.419652591031325e-05, "loss": 0.16, "step": 12170 }, { "epoch": 0.4426193764081692, "grad_norm": 3.5824673175811768, "learning_rate": 4.4232865760593064e-05, "loss": 0.7205, "step": 12180 }, { "epoch": 0.4429827749109674, "grad_norm": 1.237358570098877, "learning_rate": 4.426920561087289e-05, "loss": 0.2043, "step": 12190 }, { "epoch": 0.44334617341376553, "grad_norm": 11.106649398803711, "learning_rate": 4.43055454611527e-05, "loss": 0.2537, "step": 12200 }, { "epoch": 0.4437095719165637, "grad_norm": 1.4566165208816528, "learning_rate": 4.4341885311432516e-05, "loss": 0.174, "step": 12210 }, { "epoch": 0.4440729704193619, "grad_norm": 1.4067914485931396, "learning_rate": 4.4378225161712335e-05, "loss": 0.1672, "step": 12220 }, { "epoch": 0.44443636892216004, "grad_norm": 3.1289005279541016, "learning_rate": 4.4414565011992154e-05, "loss": 0.2459, "step": 12230 }, { "epoch": 0.4447997674249582, "grad_norm": 1.2487775087356567, "learning_rate": 4.445090486227197e-05, "loss": 0.1911, "step": 12240 }, { "epoch": 0.4451631659277564, "grad_norm": 4.373108863830566, "learning_rate": 4.4487244712551786e-05, "loss": 0.265, "step": 12250 }, { "epoch": 0.44552656443055455, "grad_norm": 3.0927655696868896, "learning_rate": 4.4523584562831606e-05, "loss": 0.166, "step": 12260 }, { "epoch": 0.4458899629333527, "grad_norm": 1.4012075662612915, "learning_rate": 4.455992441311142e-05, "loss": 0.1631, "step": 12270 }, { "epoch": 0.4462533614361509, "grad_norm": 3.9944920539855957, "learning_rate": 4.459626426339124e-05, "loss": 0.2616, "step": 12280 }, { "epoch": 0.44661675993894906, "grad_norm": 2.412261962890625, "learning_rate": 4.463260411367106e-05, "loss": 0.1963, "step": 12290 }, { "epoch": 0.4469801584417472, "grad_norm": 8.601739883422852, "learning_rate": 4.466894396395087e-05, "loss": 0.3057, "step": 12300 }, { "epoch": 0.44734355694454536, "grad_norm": 2.1279587745666504, "learning_rate": 4.470528381423069e-05, "loss": 0.1931, "step": 12310 }, { "epoch": 0.44770695544734357, "grad_norm": 2.465534210205078, "learning_rate": 4.474162366451051e-05, "loss": 0.1701, "step": 12320 }, { "epoch": 0.4480703539501417, "grad_norm": 6.147269248962402, "learning_rate": 4.477796351479032e-05, "loss": 0.7176, "step": 12330 }, { "epoch": 0.44843375245293987, "grad_norm": 1.6242046356201172, "learning_rate": 4.481430336507014e-05, "loss": 0.1769, "step": 12340 }, { "epoch": 0.4487971509557381, "grad_norm": 7.065566539764404, "learning_rate": 4.485064321534995e-05, "loss": 0.2967, "step": 12350 }, { "epoch": 0.44916054945853623, "grad_norm": 1.9389359951019287, "learning_rate": 4.488698306562977e-05, "loss": 0.1853, "step": 12360 }, { "epoch": 0.4495239479613344, "grad_norm": 1.011250376701355, "learning_rate": 4.492332291590959e-05, "loss": 0.2036, "step": 12370 }, { "epoch": 0.4498873464641326, "grad_norm": 2.459062099456787, "learning_rate": 4.4959662766189404e-05, "loss": 0.1865, "step": 12380 }, { "epoch": 0.45025074496693074, "grad_norm": 1.8472875356674194, "learning_rate": 4.499600261646922e-05, "loss": 0.2178, "step": 12390 }, { "epoch": 0.4506141434697289, "grad_norm": 40.6389045715332, "learning_rate": 4.5032342466749036e-05, "loss": 0.2506, "step": 12400 }, { "epoch": 0.4509775419725271, "grad_norm": 3.9729344844818115, "learning_rate": 4.5068682317028855e-05, "loss": 0.1917, "step": 12410 }, { "epoch": 0.45134094047532525, "grad_norm": 1.0262936353683472, "learning_rate": 4.5105022167308675e-05, "loss": 0.2115, "step": 12420 }, { "epoch": 0.4517043389781234, "grad_norm": 1.5356003046035767, "learning_rate": 4.514136201758849e-05, "loss": 0.1907, "step": 12430 }, { "epoch": 0.4520677374809216, "grad_norm": 1.3107296228408813, "learning_rate": 4.5177701867868307e-05, "loss": 0.195, "step": 12440 }, { "epoch": 0.45243113598371976, "grad_norm": 11.025674819946289, "learning_rate": 4.5214041718148126e-05, "loss": 0.2794, "step": 12450 }, { "epoch": 0.4527945344865179, "grad_norm": 1.8793771266937256, "learning_rate": 4.525038156842794e-05, "loss": 0.2143, "step": 12460 }, { "epoch": 0.45315793298931606, "grad_norm": 1.6508142948150635, "learning_rate": 4.528672141870776e-05, "loss": 0.1863, "step": 12470 }, { "epoch": 0.45352133149211427, "grad_norm": 4.942420959472656, "learning_rate": 4.532306126898758e-05, "loss": 0.1997, "step": 12480 }, { "epoch": 0.4538847299949124, "grad_norm": 3.1977925300598145, "learning_rate": 4.535940111926739e-05, "loss": 1.9163, "step": 12490 }, { "epoch": 0.45424812849771057, "grad_norm": 8.74572467803955, "learning_rate": 4.539574096954721e-05, "loss": 0.3186, "step": 12500 }, { "epoch": 0.4546115270005088, "grad_norm": 1.5346311330795288, "learning_rate": 4.543208081982702e-05, "loss": 0.1958, "step": 12510 }, { "epoch": 0.4549749255033069, "grad_norm": 1.622859239578247, "learning_rate": 4.546842067010684e-05, "loss": 0.1828, "step": 12520 }, { "epoch": 0.4553383240061051, "grad_norm": 1.9394720792770386, "learning_rate": 4.550476052038666e-05, "loss": 0.2198, "step": 12530 }, { "epoch": 0.4557017225089033, "grad_norm": 1.8405578136444092, "learning_rate": 4.554110037066647e-05, "loss": 0.1789, "step": 12540 }, { "epoch": 0.45606512101170144, "grad_norm": 6.24867582321167, "learning_rate": 4.557744022094629e-05, "loss": 0.2593, "step": 12550 }, { "epoch": 0.4564285195144996, "grad_norm": 1.6062959432601929, "learning_rate": 4.561378007122611e-05, "loss": 0.1665, "step": 12560 }, { "epoch": 0.4567919180172978, "grad_norm": 1.1478540897369385, "learning_rate": 4.5650119921505924e-05, "loss": 0.1942, "step": 12570 }, { "epoch": 0.45715531652009594, "grad_norm": 2.0299808979034424, "learning_rate": 4.5686459771785744e-05, "loss": 0.2092, "step": 12580 }, { "epoch": 0.4575187150228941, "grad_norm": 1.6643180847167969, "learning_rate": 4.572279962206556e-05, "loss": 0.1714, "step": 12590 }, { "epoch": 0.45788211352569225, "grad_norm": 10.169012069702148, "learning_rate": 4.5759139472345376e-05, "loss": 0.3101, "step": 12600 }, { "epoch": 0.45788211352569225, "eval_loss": 0.408176064491272, "eval_runtime": 179.7843, "eval_samples_per_second": 41.238, "eval_steps_per_second": 5.156, "eval_wer": 0.24004756113057527, "step": 12600 }, { "epoch": 0.45824551202849045, "grad_norm": 1.8151092529296875, "learning_rate": 4.5795479322625195e-05, "loss": 0.1739, "step": 12610 }, { "epoch": 0.4586089105312886, "grad_norm": 1.1606543064117432, "learning_rate": 4.583181917290501e-05, "loss": 0.1781, "step": 12620 }, { "epoch": 0.45897230903408676, "grad_norm": 2.5139431953430176, "learning_rate": 4.586815902318483e-05, "loss": 0.2101, "step": 12630 }, { "epoch": 0.45933570753688496, "grad_norm": 3.1557183265686035, "learning_rate": 4.590449887346464e-05, "loss": 0.1925, "step": 12640 }, { "epoch": 0.4596991060396831, "grad_norm": 13.978137016296387, "learning_rate": 4.594083872374446e-05, "loss": 0.3085, "step": 12650 }, { "epoch": 0.46006250454248127, "grad_norm": 1.5187938213348389, "learning_rate": 4.597717857402428e-05, "loss": 0.1909, "step": 12660 }, { "epoch": 0.46042590304527947, "grad_norm": 1.661890983581543, "learning_rate": 4.601351842430409e-05, "loss": 0.1729, "step": 12670 }, { "epoch": 0.4607893015480776, "grad_norm": 5.693175792694092, "learning_rate": 4.604985827458391e-05, "loss": 0.2069, "step": 12680 }, { "epoch": 0.4611527000508758, "grad_norm": 2.5228755474090576, "learning_rate": 4.608619812486373e-05, "loss": 0.1899, "step": 12690 }, { "epoch": 0.461516098553674, "grad_norm": 12.629317283630371, "learning_rate": 4.612253797514354e-05, "loss": 0.2441, "step": 12700 }, { "epoch": 0.46187949705647213, "grad_norm": 1.5003726482391357, "learning_rate": 4.615887782542336e-05, "loss": 0.1845, "step": 12710 }, { "epoch": 0.4622428955592703, "grad_norm": 1.596705675125122, "learning_rate": 4.619521767570318e-05, "loss": 0.1942, "step": 12720 }, { "epoch": 0.4626062940620685, "grad_norm": 4.299325466156006, "learning_rate": 4.623155752598299e-05, "loss": 0.1881, "step": 12730 }, { "epoch": 0.46296969256486664, "grad_norm": 2.242932081222534, "learning_rate": 4.626789737626281e-05, "loss": 0.1655, "step": 12740 }, { "epoch": 0.4633330910676648, "grad_norm": 17.353313446044922, "learning_rate": 4.630423722654263e-05, "loss": 0.3002, "step": 12750 }, { "epoch": 0.46369648957046294, "grad_norm": 1.8967528343200684, "learning_rate": 4.6340577076822444e-05, "loss": 0.1967, "step": 12760 }, { "epoch": 0.46405988807326115, "grad_norm": 1.9839125871658325, "learning_rate": 4.6376916927102264e-05, "loss": 0.1582, "step": 12770 }, { "epoch": 0.4644232865760593, "grad_norm": 1.8139293193817139, "learning_rate": 4.641325677738208e-05, "loss": 0.2527, "step": 12780 }, { "epoch": 0.46478668507885745, "grad_norm": 1.6944659948349, "learning_rate": 4.6449596627661896e-05, "loss": 0.1656, "step": 12790 }, { "epoch": 0.46515008358165566, "grad_norm": 3.7842020988464355, "learning_rate": 4.648593647794171e-05, "loss": 0.2375, "step": 12800 }, { "epoch": 0.4655134820844538, "grad_norm": 1.8103773593902588, "learning_rate": 4.6522276328221534e-05, "loss": 0.1932, "step": 12810 }, { "epoch": 0.46587688058725196, "grad_norm": 1.4419440031051636, "learning_rate": 4.655861617850135e-05, "loss": 0.1808, "step": 12820 }, { "epoch": 0.46624027909005017, "grad_norm": 6.361825466156006, "learning_rate": 4.659495602878116e-05, "loss": 0.2105, "step": 12830 }, { "epoch": 0.4666036775928483, "grad_norm": 1.4687098264694214, "learning_rate": 4.663129587906098e-05, "loss": 0.18, "step": 12840 }, { "epoch": 0.46696707609564647, "grad_norm": 14.758776664733887, "learning_rate": 4.66676357293408e-05, "loss": 0.3001, "step": 12850 }, { "epoch": 0.4673304745984447, "grad_norm": 1.4836699962615967, "learning_rate": 4.670397557962061e-05, "loss": 0.1713, "step": 12860 }, { "epoch": 0.46769387310124283, "grad_norm": 4.860133171081543, "learning_rate": 4.674031542990043e-05, "loss": 0.1791, "step": 12870 }, { "epoch": 0.468057271604041, "grad_norm": 1.9861228466033936, "learning_rate": 4.677665528018025e-05, "loss": 0.2029, "step": 12880 }, { "epoch": 0.46842067010683913, "grad_norm": 1.9190025329589844, "learning_rate": 4.681299513046006e-05, "loss": 0.1611, "step": 12890 }, { "epoch": 0.46878406860963734, "grad_norm": 4.6381516456604, "learning_rate": 4.684933498073988e-05, "loss": 0.2646, "step": 12900 }, { "epoch": 0.4691474671124355, "grad_norm": 1.2092620134353638, "learning_rate": 4.68856748310197e-05, "loss": 0.1865, "step": 12910 }, { "epoch": 0.46951086561523364, "grad_norm": 2.7816121578216553, "learning_rate": 4.6922014681299513e-05, "loss": 0.2047, "step": 12920 }, { "epoch": 0.46987426411803185, "grad_norm": 0.629324734210968, "learning_rate": 4.6958354531579326e-05, "loss": 0.2404, "step": 12930 }, { "epoch": 0.47023766262083, "grad_norm": 4.156667232513428, "learning_rate": 4.699469438185915e-05, "loss": 0.1604, "step": 12940 }, { "epoch": 0.47060106112362815, "grad_norm": 1.8534492254257202, "learning_rate": 4.7031034232138965e-05, "loss": 0.2364, "step": 12950 }, { "epoch": 0.47096445962642636, "grad_norm": 1.382408857345581, "learning_rate": 4.706737408241878e-05, "loss": 0.2078, "step": 12960 }, { "epoch": 0.4713278581292245, "grad_norm": 2.499023914337158, "learning_rate": 4.7103713932698603e-05, "loss": 0.1935, "step": 12970 }, { "epoch": 0.47169125663202266, "grad_norm": 2.726032257080078, "learning_rate": 4.7140053782978416e-05, "loss": 0.2143, "step": 12980 }, { "epoch": 0.47205465513482087, "grad_norm": 2.1388118267059326, "learning_rate": 4.717639363325823e-05, "loss": 0.1704, "step": 12990 }, { "epoch": 0.472418053637619, "grad_norm": 5.408501148223877, "learning_rate": 4.7212733483538055e-05, "loss": 0.2492, "step": 13000 }, { "epoch": 0.47278145214041717, "grad_norm": 1.8640841245651245, "learning_rate": 4.724907333381787e-05, "loss": 0.1958, "step": 13010 }, { "epoch": 0.4731448506432154, "grad_norm": 1.4251651763916016, "learning_rate": 4.728541318409768e-05, "loss": 0.1969, "step": 13020 }, { "epoch": 0.4735082491460135, "grad_norm": 2.2603137493133545, "learning_rate": 4.7321753034377506e-05, "loss": 0.1879, "step": 13030 }, { "epoch": 0.4738716476488117, "grad_norm": 1.7813081741333008, "learning_rate": 4.735809288465732e-05, "loss": 0.1627, "step": 13040 }, { "epoch": 0.47423504615160983, "grad_norm": 16.746126174926758, "learning_rate": 4.739443273493713e-05, "loss": 0.3058, "step": 13050 }, { "epoch": 0.47459844465440804, "grad_norm": 2.56193470954895, "learning_rate": 4.743077258521695e-05, "loss": 0.1729, "step": 13060 }, { "epoch": 0.4749618431572062, "grad_norm": 2.1787185668945312, "learning_rate": 4.746711243549677e-05, "loss": 0.1804, "step": 13070 }, { "epoch": 0.47532524166000434, "grad_norm": 3.385338544845581, "learning_rate": 4.750345228577658e-05, "loss": 0.1884, "step": 13080 }, { "epoch": 0.47568864016280255, "grad_norm": 2.48083233833313, "learning_rate": 4.75397921360564e-05, "loss": 0.1728, "step": 13090 }, { "epoch": 0.4760520386656007, "grad_norm": 47.18072509765625, "learning_rate": 4.757613198633622e-05, "loss": 0.2427, "step": 13100 }, { "epoch": 0.47641543716839885, "grad_norm": 1.3267533779144287, "learning_rate": 4.7612471836616034e-05, "loss": 0.1847, "step": 13110 }, { "epoch": 0.47677883567119705, "grad_norm": 2.098389148712158, "learning_rate": 4.764881168689585e-05, "loss": 0.1682, "step": 13120 }, { "epoch": 0.4771422341739952, "grad_norm": 1.1197071075439453, "learning_rate": 4.768515153717567e-05, "loss": 0.166, "step": 13130 }, { "epoch": 0.47750563267679336, "grad_norm": 1.431281328201294, "learning_rate": 4.7721491387455485e-05, "loss": 0.3262, "step": 13140 }, { "epoch": 0.47786903117959156, "grad_norm": 15.357772827148438, "learning_rate": 4.7757831237735304e-05, "loss": 0.2906, "step": 13150 }, { "epoch": 0.4782324296823897, "grad_norm": 3.03275465965271, "learning_rate": 4.7794171088015124e-05, "loss": 0.207, "step": 13160 }, { "epoch": 0.47859582818518787, "grad_norm": 1.0988962650299072, "learning_rate": 4.7830510938294936e-05, "loss": 0.1788, "step": 13170 }, { "epoch": 0.4789592266879861, "grad_norm": 1.9456548690795898, "learning_rate": 4.786685078857475e-05, "loss": 0.2397, "step": 13180 }, { "epoch": 0.4793226251907842, "grad_norm": 1.7383311986923218, "learning_rate": 4.7903190638854575e-05, "loss": 0.1841, "step": 13190 }, { "epoch": 0.4796860236935824, "grad_norm": 5.512730121612549, "learning_rate": 4.793953048913439e-05, "loss": 0.2708, "step": 13200 }, { "epoch": 0.4796860236935824, "eval_loss": 0.3998795747756958, "eval_runtime": 180.9114, "eval_samples_per_second": 40.981, "eval_steps_per_second": 5.124, "eval_wer": 0.2369433804708915, "step": 13200 }, { "epoch": 0.4800494221963805, "grad_norm": 1.5843122005462646, "learning_rate": 4.79758703394142e-05, "loss": 1.0933, "step": 13210 }, { "epoch": 0.48041282069917873, "grad_norm": 1.4696934223175049, "learning_rate": 4.8012210189694026e-05, "loss": 0.1771, "step": 13220 }, { "epoch": 0.4807762192019769, "grad_norm": 2.5620357990264893, "learning_rate": 4.804855003997384e-05, "loss": 0.2202, "step": 13230 }, { "epoch": 0.48113961770477504, "grad_norm": 73.08427429199219, "learning_rate": 4.808488989025365e-05, "loss": 0.2471, "step": 13240 }, { "epoch": 0.48150301620757324, "grad_norm": 7.291989803314209, "learning_rate": 4.812122974053347e-05, "loss": 0.2542, "step": 13250 }, { "epoch": 0.4818664147103714, "grad_norm": 1.7582112550735474, "learning_rate": 4.815756959081329e-05, "loss": 0.1884, "step": 13260 }, { "epoch": 0.48222981321316954, "grad_norm": 0.9253680109977722, "learning_rate": 4.81939094410931e-05, "loss": 0.1797, "step": 13270 }, { "epoch": 0.48259321171596775, "grad_norm": 8.042390823364258, "learning_rate": 4.823024929137292e-05, "loss": 0.192, "step": 13280 }, { "epoch": 0.4829566102187659, "grad_norm": 3.2288219928741455, "learning_rate": 4.826658914165274e-05, "loss": 0.2041, "step": 13290 }, { "epoch": 0.48332000872156405, "grad_norm": 7.657989978790283, "learning_rate": 4.8302928991932554e-05, "loss": 0.3034, "step": 13300 }, { "epoch": 0.48368340722436226, "grad_norm": 2.9273271560668945, "learning_rate": 4.833926884221237e-05, "loss": 0.2028, "step": 13310 }, { "epoch": 0.4840468057271604, "grad_norm": 4.2344865798950195, "learning_rate": 4.837560869249219e-05, "loss": 0.1817, "step": 13320 }, { "epoch": 0.48441020422995856, "grad_norm": 4.074464797973633, "learning_rate": 4.8411948542772005e-05, "loss": 0.2197, "step": 13330 }, { "epoch": 0.4847736027327567, "grad_norm": 1.7070029973983765, "learning_rate": 4.8448288393051825e-05, "loss": 0.2374, "step": 13340 }, { "epoch": 0.4851370012355549, "grad_norm": 2.5278494358062744, "learning_rate": 4.8484628243331644e-05, "loss": 0.265, "step": 13350 }, { "epoch": 0.4855003997383531, "grad_norm": 1.4800697565078735, "learning_rate": 4.8520968093611456e-05, "loss": 0.1597, "step": 13360 }, { "epoch": 0.4858637982411512, "grad_norm": 1.238171935081482, "learning_rate": 4.8557307943891276e-05, "loss": 0.1862, "step": 13370 }, { "epoch": 0.48622719674394943, "grad_norm": 2.7711944580078125, "learning_rate": 4.859364779417109e-05, "loss": 0.1572, "step": 13380 }, { "epoch": 0.4865905952467476, "grad_norm": 2.386011838912964, "learning_rate": 4.862998764445091e-05, "loss": 0.1624, "step": 13390 }, { "epoch": 0.48695399374954573, "grad_norm": 10.38249397277832, "learning_rate": 4.866632749473072e-05, "loss": 0.2182, "step": 13400 }, { "epoch": 0.48731739225234394, "grad_norm": 1.1541043519973755, "learning_rate": 4.870266734501054e-05, "loss": 0.1867, "step": 13410 }, { "epoch": 0.4876807907551421, "grad_norm": 0.7680534720420837, "learning_rate": 4.873900719529036e-05, "loss": 0.1619, "step": 13420 }, { "epoch": 0.48804418925794024, "grad_norm": 2.6120142936706543, "learning_rate": 4.877534704557017e-05, "loss": 1.0657, "step": 13430 }, { "epoch": 0.48840758776073845, "grad_norm": 2.1559348106384277, "learning_rate": 4.881168689584999e-05, "loss": 0.1576, "step": 13440 }, { "epoch": 0.4887709862635366, "grad_norm": 8.222488403320312, "learning_rate": 4.884802674612981e-05, "loss": 0.2596, "step": 13450 }, { "epoch": 0.48913438476633475, "grad_norm": 1.7630010843276978, "learning_rate": 4.888436659640962e-05, "loss": 0.1755, "step": 13460 }, { "epoch": 0.48949778326913296, "grad_norm": 1.489050269126892, "learning_rate": 4.892070644668944e-05, "loss": 0.1844, "step": 13470 }, { "epoch": 0.4898611817719311, "grad_norm": 4.412111759185791, "learning_rate": 4.895704629696926e-05, "loss": 0.2114, "step": 13480 }, { "epoch": 0.49022458027472926, "grad_norm": 2.060366630554199, "learning_rate": 4.8993386147249074e-05, "loss": 0.1932, "step": 13490 }, { "epoch": 0.4905879787775274, "grad_norm": 9.488603591918945, "learning_rate": 4.9029725997528893e-05, "loss": 0.303, "step": 13500 }, { "epoch": 0.4909513772803256, "grad_norm": 2.295671224594116, "learning_rate": 4.906606584780871e-05, "loss": 0.1583, "step": 13510 }, { "epoch": 0.49131477578312377, "grad_norm": 4.13812255859375, "learning_rate": 4.9102405698088525e-05, "loss": 1.9041, "step": 13520 }, { "epoch": 0.4916781742859219, "grad_norm": 3.7411348819732666, "learning_rate": 4.9138745548368345e-05, "loss": 0.1927, "step": 13530 }, { "epoch": 0.4920415727887201, "grad_norm": 1.523505449295044, "learning_rate": 4.917508539864816e-05, "loss": 0.1721, "step": 13540 }, { "epoch": 0.4924049712915183, "grad_norm": 8.239662170410156, "learning_rate": 4.921142524892798e-05, "loss": 0.3205, "step": 13550 }, { "epoch": 0.49276836979431643, "grad_norm": 1.8316904306411743, "learning_rate": 4.9247765099207796e-05, "loss": 0.172, "step": 13560 }, { "epoch": 0.49313176829711464, "grad_norm": 4.627805233001709, "learning_rate": 4.928410494948761e-05, "loss": 0.1731, "step": 13570 }, { "epoch": 0.4934951667999128, "grad_norm": 4.277485370635986, "learning_rate": 4.932044479976743e-05, "loss": 0.2522, "step": 13580 }, { "epoch": 0.49385856530271094, "grad_norm": 2.131641149520874, "learning_rate": 4.935678465004725e-05, "loss": 0.1766, "step": 13590 }, { "epoch": 0.49422196380550915, "grad_norm": 2.9195988178253174, "learning_rate": 4.939312450032706e-05, "loss": 0.3745, "step": 13600 }, { "epoch": 0.4945853623083073, "grad_norm": 1.5876374244689941, "learning_rate": 4.942946435060688e-05, "loss": 0.1716, "step": 13610 }, { "epoch": 0.49494876081110545, "grad_norm": 4.506389617919922, "learning_rate": 4.946580420088669e-05, "loss": 0.5847, "step": 13620 }, { "epoch": 0.4953121593139036, "grad_norm": 3.497152090072632, "learning_rate": 4.950214405116651e-05, "loss": 0.2179, "step": 13630 }, { "epoch": 0.4956755578167018, "grad_norm": 1.7728289365768433, "learning_rate": 4.953848390144633e-05, "loss": 0.165, "step": 13640 }, { "epoch": 0.49603895631949996, "grad_norm": 12.01921558380127, "learning_rate": 4.957482375172614e-05, "loss": 0.2447, "step": 13650 }, { "epoch": 0.4964023548222981, "grad_norm": 2.5448553562164307, "learning_rate": 4.961116360200596e-05, "loss": 0.2089, "step": 13660 }, { "epoch": 0.4967657533250963, "grad_norm": 2.3643887042999268, "learning_rate": 4.9647503452285775e-05, "loss": 0.1724, "step": 13670 }, { "epoch": 0.49712915182789447, "grad_norm": 2.096191644668579, "learning_rate": 4.9683843302565594e-05, "loss": 0.1759, "step": 13680 }, { "epoch": 0.4974925503306926, "grad_norm": 0.9760168790817261, "learning_rate": 4.9720183152845414e-05, "loss": 0.1817, "step": 13690 }, { "epoch": 0.4978559488334908, "grad_norm": 3.019702434539795, "learning_rate": 4.9756523003125226e-05, "loss": 0.2275, "step": 13700 }, { "epoch": 0.498219347336289, "grad_norm": 1.0820231437683105, "learning_rate": 4.9789228868377064e-05, "loss": 2.5822, "step": 13710 }, { "epoch": 0.4985827458390871, "grad_norm": 3.2908883094787598, "learning_rate": 4.982556871865688e-05, "loss": 0.1898, "step": 13720 }, { "epoch": 0.49894614434188533, "grad_norm": 3.4303886890411377, "learning_rate": 4.98619085689367e-05, "loss": 0.2295, "step": 13730 }, { "epoch": 0.4993095428446835, "grad_norm": 1.8785525560379028, "learning_rate": 4.9898248419216515e-05, "loss": 0.1699, "step": 13740 }, { "epoch": 0.49967294134748164, "grad_norm": 7.539544105529785, "learning_rate": 4.993458826949633e-05, "loss": 0.2955, "step": 13750 }, { "epoch": 0.5000363398502798, "grad_norm": 1.6091630458831787, "learning_rate": 4.997092811977615e-05, "loss": 0.1696, "step": 13760 }, { "epoch": 0.5003997383530779, "grad_norm": 1.023695945739746, "learning_rate": 4.9999999978456776e-05, "loss": 0.1872, "step": 13770 }, { "epoch": 0.5007631368558761, "grad_norm": 8.364274978637695, "learning_rate": 4.999999922444405e-05, "loss": 0.1844, "step": 13780 }, { "epoch": 0.5011265353586744, "grad_norm": 1.7257829904556274, "learning_rate": 4.99999973932703e-05, "loss": 0.241, "step": 13790 }, { "epoch": 0.5014899338614724, "grad_norm": 7.256163597106934, "learning_rate": 4.999999448493561e-05, "loss": 0.2714, "step": 13800 }, { "epoch": 0.5014899338614724, "eval_loss": 0.4298999607563019, "eval_runtime": 179.7223, "eval_samples_per_second": 41.253, "eval_steps_per_second": 5.158, "eval_wer": 0.22517109299834806, "step": 13800 }, { "epoch": 0.5018533323642707, "grad_norm": 3.087979316711426, "learning_rate": 4.999999049944011e-05, "loss": 0.3094, "step": 13810 }, { "epoch": 0.5022167308670689, "grad_norm": 1.7626384496688843, "learning_rate": 4.999998543678397e-05, "loss": 0.2521, "step": 13820 }, { "epoch": 0.502580129369867, "grad_norm": 2.257432699203491, "learning_rate": 4.999997929696741e-05, "loss": 0.1913, "step": 13830 }, { "epoch": 0.5029435278726652, "grad_norm": 1.7763293981552124, "learning_rate": 4.999997207999069e-05, "loss": 0.1812, "step": 13840 }, { "epoch": 0.5033069263754634, "grad_norm": 8.228759765625, "learning_rate": 4.9999963785854124e-05, "loss": 0.2953, "step": 13850 }, { "epoch": 0.5036703248782615, "grad_norm": 1.200305461883545, "learning_rate": 4.999995441455807e-05, "loss": 0.3246, "step": 13860 }, { "epoch": 0.5040337233810597, "grad_norm": 1.9264732599258423, "learning_rate": 4.999994396610292e-05, "loss": 0.1749, "step": 13870 }, { "epoch": 0.5043971218838579, "grad_norm": 2.547212839126587, "learning_rate": 4.999993244048915e-05, "loss": 0.2714, "step": 13880 }, { "epoch": 0.504760520386656, "grad_norm": 2.7918379306793213, "learning_rate": 4.999991983771723e-05, "loss": 0.1984, "step": 13890 }, { "epoch": 0.5051239188894542, "grad_norm": 16.789764404296875, "learning_rate": 4.999990615778772e-05, "loss": 0.239, "step": 13900 }, { "epoch": 0.5054873173922524, "grad_norm": 1.1825790405273438, "learning_rate": 4.9999891400701205e-05, "loss": 0.1774, "step": 13910 }, { "epoch": 0.5058507158950505, "grad_norm": 2.1524746417999268, "learning_rate": 4.999987556645832e-05, "loss": 0.1956, "step": 13920 }, { "epoch": 0.5062141143978487, "grad_norm": 2.8159048557281494, "learning_rate": 4.999985865505974e-05, "loss": 0.2315, "step": 13930 }, { "epoch": 0.5065775129006469, "grad_norm": 1.7412035465240479, "learning_rate": 4.99998406665062e-05, "loss": 0.2106, "step": 13940 }, { "epoch": 0.506940911403445, "grad_norm": 61.967708587646484, "learning_rate": 4.999982160079848e-05, "loss": 0.3067, "step": 13950 }, { "epoch": 0.5073043099062432, "grad_norm": 2.378682851791382, "learning_rate": 4.9999801457937404e-05, "loss": 0.21, "step": 13960 }, { "epoch": 0.5076677084090414, "grad_norm": 1.3668854236602783, "learning_rate": 4.9999780237923824e-05, "loss": 0.1529, "step": 13970 }, { "epoch": 0.5080311069118395, "grad_norm": 2.2655959129333496, "learning_rate": 4.9999757940758665e-05, "loss": 0.1747, "step": 13980 }, { "epoch": 0.5083945054146377, "grad_norm": 1.5975615978240967, "learning_rate": 4.9999734566442877e-05, "loss": 0.1728, "step": 13990 }, { "epoch": 0.5087579039174358, "grad_norm": 9.869553565979004, "learning_rate": 4.999971011497748e-05, "loss": 0.3207, "step": 14000 }, { "epoch": 0.509121302420234, "grad_norm": 2.3095829486846924, "learning_rate": 4.999968458636353e-05, "loss": 0.1872, "step": 14010 }, { "epoch": 0.5094847009230322, "grad_norm": 2.059575080871582, "learning_rate": 4.999965798060212e-05, "loss": 0.3059, "step": 14020 }, { "epoch": 0.5098480994258303, "grad_norm": 1.7138803005218506, "learning_rate": 4.9999630297694395e-05, "loss": 0.2025, "step": 14030 }, { "epoch": 0.5102114979286285, "grad_norm": 2.831191062927246, "learning_rate": 4.999960153764155e-05, "loss": 0.1685, "step": 14040 }, { "epoch": 0.5105748964314267, "grad_norm": 15.457362174987793, "learning_rate": 4.999957170044482e-05, "loss": 0.2165, "step": 14050 }, { "epoch": 0.5109382949342248, "grad_norm": 3.923633337020874, "learning_rate": 4.999954078610549e-05, "loss": 0.1888, "step": 14060 }, { "epoch": 0.511301693437023, "grad_norm": 0.8243936896324158, "learning_rate": 4.999950879462491e-05, "loss": 0.295, "step": 14070 }, { "epoch": 0.5116650919398212, "grad_norm": 2.921447277069092, "learning_rate": 4.9999475726004434e-05, "loss": 0.3208, "step": 14080 }, { "epoch": 0.5120284904426193, "grad_norm": 0.9395463466644287, "learning_rate": 4.99994415802455e-05, "loss": 0.1936, "step": 14090 }, { "epoch": 0.5123918889454175, "grad_norm": 11.025691986083984, "learning_rate": 4.999940635734958e-05, "loss": 0.2581, "step": 14100 }, { "epoch": 0.5127552874482157, "grad_norm": 2.2102460861206055, "learning_rate": 4.999937005731818e-05, "loss": 0.1888, "step": 14110 }, { "epoch": 0.5131186859510138, "grad_norm": 1.6075447797775269, "learning_rate": 4.9999332680152876e-05, "loss": 0.1557, "step": 14120 }, { "epoch": 0.513482084453812, "grad_norm": 3.0174403190612793, "learning_rate": 4.999929422585528e-05, "loss": 0.2137, "step": 14130 }, { "epoch": 0.5138454829566103, "grad_norm": 3.2911272048950195, "learning_rate": 4.999925469442705e-05, "loss": 0.2249, "step": 14140 }, { "epoch": 0.5142088814594084, "grad_norm": 4.0001444816589355, "learning_rate": 4.999921408586986e-05, "loss": 0.2548, "step": 14150 }, { "epoch": 0.5145722799622066, "grad_norm": 2.7695538997650146, "learning_rate": 4.9999172400185504e-05, "loss": 0.2107, "step": 14160 }, { "epoch": 0.5149356784650048, "grad_norm": 1.420189380645752, "learning_rate": 4.999912963737574e-05, "loss": 0.1887, "step": 14170 }, { "epoch": 0.5152990769678029, "grad_norm": 1.4330711364746094, "learning_rate": 4.9999085797442434e-05, "loss": 0.2295, "step": 14180 }, { "epoch": 0.5156624754706011, "grad_norm": 1.9518648386001587, "learning_rate": 4.999904088038747e-05, "loss": 0.181, "step": 14190 }, { "epoch": 0.5160258739733993, "grad_norm": 9.763446807861328, "learning_rate": 4.999899488621278e-05, "loss": 0.2163, "step": 14200 }, { "epoch": 0.5163892724761974, "grad_norm": 1.63487708568573, "learning_rate": 4.999894781492035e-05, "loss": 0.1675, "step": 14210 }, { "epoch": 0.5167526709789956, "grad_norm": 1.3337619304656982, "learning_rate": 4.99988996665122e-05, "loss": 1.8258, "step": 14220 }, { "epoch": 0.5171160694817938, "grad_norm": 4.741299152374268, "learning_rate": 4.9998850440990414e-05, "loss": 0.199, "step": 14230 }, { "epoch": 0.5174794679845919, "grad_norm": 2.203994035720825, "learning_rate": 4.9998800138357106e-05, "loss": 0.1666, "step": 14240 }, { "epoch": 0.5178428664873901, "grad_norm": 9.144301414489746, "learning_rate": 4.999874875861444e-05, "loss": 0.2567, "step": 14250 }, { "epoch": 0.5182062649901883, "grad_norm": 1.432627558708191, "learning_rate": 4.9998696301764644e-05, "loss": 0.1842, "step": 14260 }, { "epoch": 0.5185696634929864, "grad_norm": 1.5303106307983398, "learning_rate": 4.999864276780998e-05, "loss": 0.1726, "step": 14270 }, { "epoch": 0.5189330619957846, "grad_norm": 13.468036651611328, "learning_rate": 4.999858815675273e-05, "loss": 0.1927, "step": 14280 }, { "epoch": 0.5192964604985827, "grad_norm": 3.7133965492248535, "learning_rate": 4.999853246859526e-05, "loss": 0.1822, "step": 14290 }, { "epoch": 0.5196598590013809, "grad_norm": 10.077652931213379, "learning_rate": 4.999847570333998e-05, "loss": 0.2847, "step": 14300 }, { "epoch": 0.5200232575041791, "grad_norm": 2.3906922340393066, "learning_rate": 4.9998417860989325e-05, "loss": 0.1962, "step": 14310 }, { "epoch": 0.5203866560069772, "grad_norm": 0.8041434288024902, "learning_rate": 4.999835894154579e-05, "loss": 0.1661, "step": 14320 }, { "epoch": 0.5207500545097754, "grad_norm": 4.1071953773498535, "learning_rate": 4.99982989450119e-05, "loss": 0.2012, "step": 14330 }, { "epoch": 0.5211134530125736, "grad_norm": 0.9645094871520996, "learning_rate": 4.999823787139026e-05, "loss": 0.209, "step": 14340 }, { "epoch": 0.5214768515153717, "grad_norm": 19.18789291381836, "learning_rate": 4.9998175720683506e-05, "loss": 0.3019, "step": 14350 }, { "epoch": 0.5218402500181699, "grad_norm": 1.6560392379760742, "learning_rate": 4.999811249289429e-05, "loss": 0.1696, "step": 14360 }, { "epoch": 0.5222036485209681, "grad_norm": 1.993741512298584, "learning_rate": 4.999804818802535e-05, "loss": 0.1895, "step": 14370 }, { "epoch": 0.5225670470237662, "grad_norm": 3.4508492946624756, "learning_rate": 4.999798280607947e-05, "loss": 0.2111, "step": 14380 }, { "epoch": 0.5229304455265644, "grad_norm": 8.431037902832031, "learning_rate": 4.999791634705944e-05, "loss": 0.1898, "step": 14390 }, { "epoch": 0.5232938440293626, "grad_norm": 10.659805297851562, "learning_rate": 4.9997848810968137e-05, "loss": 0.4744, "step": 14400 }, { "epoch": 0.5232938440293626, "eval_loss": 0.40915772318840027, "eval_runtime": 179.6286, "eval_samples_per_second": 41.274, "eval_steps_per_second": 5.161, "eval_wer": 0.2273222357361991, "step": 14400 }, { "epoch": 0.5236572425321607, "grad_norm": 1.676483392715454, "learning_rate": 4.999778019780849e-05, "loss": 0.1856, "step": 14410 }, { "epoch": 0.5240206410349589, "grad_norm": 3.4859771728515625, "learning_rate": 4.9997710507583414e-05, "loss": 0.1641, "step": 14420 }, { "epoch": 0.5243840395377571, "grad_norm": 2.583261251449585, "learning_rate": 4.999763974029595e-05, "loss": 0.2545, "step": 14430 }, { "epoch": 0.5247474380405552, "grad_norm": 2.0467324256896973, "learning_rate": 4.999756789594913e-05, "loss": 0.1974, "step": 14440 }, { "epoch": 0.5251108365433534, "grad_norm": 4.777310848236084, "learning_rate": 4.999749497454605e-05, "loss": 0.2653, "step": 14450 }, { "epoch": 0.5254742350461516, "grad_norm": 1.6312458515167236, "learning_rate": 4.999742097608984e-05, "loss": 0.1503, "step": 14460 }, { "epoch": 0.5258376335489497, "grad_norm": 1.1725629568099976, "learning_rate": 4.999734590058371e-05, "loss": 0.1636, "step": 14470 }, { "epoch": 0.526201032051748, "grad_norm": 3.2061386108398438, "learning_rate": 4.999726974803089e-05, "loss": 0.1988, "step": 14480 }, { "epoch": 0.5265644305545462, "grad_norm": 1.7078185081481934, "learning_rate": 4.9997192518434655e-05, "loss": 0.1763, "step": 14490 }, { "epoch": 0.5269278290573443, "grad_norm": 3.5756313800811768, "learning_rate": 4.999711421179833e-05, "loss": 0.2651, "step": 14500 }, { "epoch": 0.5272912275601425, "grad_norm": 1.8054040670394897, "learning_rate": 4.99970348281253e-05, "loss": 0.1923, "step": 14510 }, { "epoch": 0.5276546260629407, "grad_norm": 2.8949921131134033, "learning_rate": 4.9996954367418976e-05, "loss": 0.1897, "step": 14520 }, { "epoch": 0.5280180245657388, "grad_norm": 2.0020744800567627, "learning_rate": 4.9996872829682825e-05, "loss": 0.2469, "step": 14530 }, { "epoch": 0.528381423068537, "grad_norm": 1.1650570631027222, "learning_rate": 4.999679021492037e-05, "loss": 0.2088, "step": 14540 }, { "epoch": 0.5287448215713352, "grad_norm": 14.624237060546875, "learning_rate": 4.999670652313516e-05, "loss": 0.2918, "step": 14550 }, { "epoch": 0.5291082200741333, "grad_norm": 1.6658445596694946, "learning_rate": 4.99966217543308e-05, "loss": 0.1936, "step": 14560 }, { "epoch": 0.5294716185769315, "grad_norm": 2.0761842727661133, "learning_rate": 4.9996535908510955e-05, "loss": 0.2318, "step": 14570 }, { "epoch": 0.5298350170797297, "grad_norm": 2.475193977355957, "learning_rate": 4.999644898567931e-05, "loss": 0.1682, "step": 14580 }, { "epoch": 0.5301984155825278, "grad_norm": 1.7537975311279297, "learning_rate": 4.9996360985839616e-05, "loss": 0.1528, "step": 14590 }, { "epoch": 0.530561814085326, "grad_norm": 6.486474990844727, "learning_rate": 4.9996271908995666e-05, "loss": 0.2571, "step": 14600 }, { "epoch": 0.5309252125881241, "grad_norm": 2.308250665664673, "learning_rate": 4.9996181755151294e-05, "loss": 0.1764, "step": 14610 }, { "epoch": 0.5312886110909223, "grad_norm": 4.871829032897949, "learning_rate": 4.999609052431039e-05, "loss": 0.3045, "step": 14620 }, { "epoch": 0.5316520095937205, "grad_norm": 2.796844959259033, "learning_rate": 4.999599821647688e-05, "loss": 0.2102, "step": 14630 }, { "epoch": 0.5320154080965186, "grad_norm": 1.9681658744812012, "learning_rate": 4.999590483165475e-05, "loss": 0.1882, "step": 14640 }, { "epoch": 0.5323788065993168, "grad_norm": 5.858233451843262, "learning_rate": 4.9995810369848006e-05, "loss": 0.314, "step": 14650 }, { "epoch": 0.532742205102115, "grad_norm": 6.469663143157959, "learning_rate": 4.9995714831060736e-05, "loss": 0.2103, "step": 14660 }, { "epoch": 0.5331056036049131, "grad_norm": 1.8543453216552734, "learning_rate": 4.999561821529705e-05, "loss": 0.219, "step": 14670 }, { "epoch": 0.5334690021077113, "grad_norm": 2.222320318222046, "learning_rate": 4.99955205225611e-05, "loss": 0.1879, "step": 14680 }, { "epoch": 0.5338324006105095, "grad_norm": 5.018227577209473, "learning_rate": 4.999542175285711e-05, "loss": 0.1437, "step": 14690 }, { "epoch": 0.5341957991133076, "grad_norm": 6.225541114807129, "learning_rate": 4.999532190618933e-05, "loss": 0.268, "step": 14700 }, { "epoch": 0.5345591976161058, "grad_norm": 1.8122676610946655, "learning_rate": 4.999522098256206e-05, "loss": 0.1644, "step": 14710 }, { "epoch": 0.534922596118904, "grad_norm": 2.4057557582855225, "learning_rate": 4.999511898197966e-05, "loss": 0.1663, "step": 14720 }, { "epoch": 0.5352859946217021, "grad_norm": 1.756697416305542, "learning_rate": 4.9995015904446513e-05, "loss": 0.1771, "step": 14730 }, { "epoch": 0.5356493931245003, "grad_norm": 1.5457457304000854, "learning_rate": 4.999491174996706e-05, "loss": 0.1889, "step": 14740 }, { "epoch": 0.5360127916272985, "grad_norm": 3.108682155609131, "learning_rate": 4.999480651854579e-05, "loss": 0.2063, "step": 14750 }, { "epoch": 0.5363761901300966, "grad_norm": 2.2037875652313232, "learning_rate": 4.9994700210187246e-05, "loss": 0.1579, "step": 14760 }, { "epoch": 0.5367395886328948, "grad_norm": 1.2102454900741577, "learning_rate": 4.9994592824895994e-05, "loss": 0.2361, "step": 14770 }, { "epoch": 0.537102987135693, "grad_norm": 6.5722455978393555, "learning_rate": 4.999448436267667e-05, "loss": 0.2165, "step": 14780 }, { "epoch": 0.5374663856384911, "grad_norm": 1.606378197669983, "learning_rate": 4.999437482353395e-05, "loss": 0.1642, "step": 14790 }, { "epoch": 0.5378297841412893, "grad_norm": 24.709177017211914, "learning_rate": 4.999426420747255e-05, "loss": 0.2628, "step": 14800 }, { "epoch": 0.5381931826440876, "grad_norm": 2.543760299682617, "learning_rate": 4.999415251449723e-05, "loss": 0.1883, "step": 14810 }, { "epoch": 0.5385565811468856, "grad_norm": 2.0813279151916504, "learning_rate": 4.999403974461281e-05, "loss": 0.1842, "step": 14820 }, { "epoch": 0.5389199796496839, "grad_norm": 4.744104385375977, "learning_rate": 4.9993925897824144e-05, "loss": 0.1981, "step": 14830 }, { "epoch": 0.5392833781524821, "grad_norm": 3.2407493591308594, "learning_rate": 4.9993810974136146e-05, "loss": 0.2169, "step": 14840 }, { "epoch": 0.5396467766552802, "grad_norm": 13.33681869506836, "learning_rate": 4.999369497355375e-05, "loss": 0.2775, "step": 14850 }, { "epoch": 0.5400101751580784, "grad_norm": 2.3192784786224365, "learning_rate": 4.9993577896081975e-05, "loss": 0.1987, "step": 14860 }, { "epoch": 0.5403735736608766, "grad_norm": 1.6611911058425903, "learning_rate": 4.999345974172586e-05, "loss": 0.188, "step": 14870 }, { "epoch": 0.5407369721636747, "grad_norm": 4.368532180786133, "learning_rate": 4.9993340510490485e-05, "loss": 0.2201, "step": 14880 }, { "epoch": 0.5411003706664729, "grad_norm": 1.4825586080551147, "learning_rate": 4.999322020238099e-05, "loss": 0.185, "step": 14890 }, { "epoch": 0.541463769169271, "grad_norm": 4.346343994140625, "learning_rate": 4.9993098817402564e-05, "loss": 0.2415, "step": 14900 }, { "epoch": 0.5418271676720692, "grad_norm": 1.0175251960754395, "learning_rate": 4.999297635556044e-05, "loss": 0.1991, "step": 14910 }, { "epoch": 0.5421905661748674, "grad_norm": 1.600205421447754, "learning_rate": 4.999285281685989e-05, "loss": 0.1706, "step": 14920 }, { "epoch": 0.5425539646776655, "grad_norm": 4.332497596740723, "learning_rate": 4.999272820130623e-05, "loss": 0.1964, "step": 14930 }, { "epoch": 0.5429173631804637, "grad_norm": 2.0384531021118164, "learning_rate": 4.999260250890484e-05, "loss": 0.1571, "step": 14940 }, { "epoch": 0.5432807616832619, "grad_norm": 11.780756950378418, "learning_rate": 4.999247573966114e-05, "loss": 0.319, "step": 14950 }, { "epoch": 0.54364416018606, "grad_norm": 2.7058663368225098, "learning_rate": 4.999234789358057e-05, "loss": 0.2009, "step": 14960 }, { "epoch": 0.5440075586888582, "grad_norm": 1.966780662536621, "learning_rate": 4.999221897066866e-05, "loss": 0.177, "step": 14970 }, { "epoch": 0.5443709571916564, "grad_norm": 2.2129642963409424, "learning_rate": 4.999208897093096e-05, "loss": 0.2472, "step": 14980 }, { "epoch": 0.5447343556944545, "grad_norm": 2.726358652114868, "learning_rate": 4.9991957894373064e-05, "loss": 0.2239, "step": 14990 }, { "epoch": 0.5450977541972527, "grad_norm": 28.577600479125977, "learning_rate": 4.999182574100063e-05, "loss": 0.2524, "step": 15000 }, { "epoch": 0.5450977541972527, "eval_loss": 0.3972287178039551, "eval_runtime": 180.8086, "eval_samples_per_second": 41.005, "eval_steps_per_second": 5.127, "eval_wer": 0.2289560150307695, "step": 15000 }, { "epoch": 0.5454611527000509, "grad_norm": 1.9243866205215454, "learning_rate": 4.9991692510819335e-05, "loss": 0.1679, "step": 15010 }, { "epoch": 0.545824551202849, "grad_norm": 1.3926585912704468, "learning_rate": 4.9991558203834944e-05, "loss": 0.1933, "step": 15020 }, { "epoch": 0.5461879497056472, "grad_norm": 5.275027751922607, "learning_rate": 4.999142282005322e-05, "loss": 0.2838, "step": 15030 }, { "epoch": 0.5465513482084454, "grad_norm": 2.142784357070923, "learning_rate": 4.999128635948e-05, "loss": 0.1754, "step": 15040 }, { "epoch": 0.5469147467112435, "grad_norm": 40.32966995239258, "learning_rate": 4.999114882212119e-05, "loss": 0.323, "step": 15050 }, { "epoch": 0.5472781452140417, "grad_norm": 1.056662678718567, "learning_rate": 4.999101020798268e-05, "loss": 0.1462, "step": 15060 }, { "epoch": 0.5476415437168399, "grad_norm": 3.7527568340301514, "learning_rate": 4.9990870517070464e-05, "loss": 0.2106, "step": 15070 }, { "epoch": 0.548004942219638, "grad_norm": 3.396487236022949, "learning_rate": 4.9990729749390555e-05, "loss": 0.1995, "step": 15080 }, { "epoch": 0.5483683407224362, "grad_norm": 1.650519609451294, "learning_rate": 4.999058790494902e-05, "loss": 0.195, "step": 15090 }, { "epoch": 0.5487317392252344, "grad_norm": 16.096418380737305, "learning_rate": 4.9990444983751975e-05, "loss": 0.2705, "step": 15100 }, { "epoch": 0.5490951377280325, "grad_norm": 1.273149847984314, "learning_rate": 4.999030098580556e-05, "loss": 0.2216, "step": 15110 }, { "epoch": 0.5494585362308307, "grad_norm": 1.5414496660232544, "learning_rate": 4.9990155911115995e-05, "loss": 0.1876, "step": 15120 }, { "epoch": 0.549821934733629, "grad_norm": 4.707805633544922, "learning_rate": 4.9990009759689524e-05, "loss": 0.1895, "step": 15130 }, { "epoch": 0.550185333236427, "grad_norm": 2.033162832260132, "learning_rate": 4.9989862531532456e-05, "loss": 0.1705, "step": 15140 }, { "epoch": 0.5505487317392252, "grad_norm": 7.349232196807861, "learning_rate": 4.998971422665112e-05, "loss": 0.2815, "step": 15150 }, { "epoch": 0.5509121302420235, "grad_norm": 1.293078064918518, "learning_rate": 4.9989564845051915e-05, "loss": 0.1789, "step": 15160 }, { "epoch": 0.5512755287448216, "grad_norm": 1.7343147993087769, "learning_rate": 4.998941438674127e-05, "loss": 0.1781, "step": 15170 }, { "epoch": 0.5516389272476198, "grad_norm": 2.440030574798584, "learning_rate": 4.9989262851725674e-05, "loss": 0.1927, "step": 15180 }, { "epoch": 0.5520023257504179, "grad_norm": 2.276111364364624, "learning_rate": 4.998911024001165e-05, "loss": 0.1774, "step": 15190 }, { "epoch": 0.5523657242532161, "grad_norm": 9.360533714294434, "learning_rate": 4.9988956551605783e-05, "loss": 0.2761, "step": 15200 }, { "epoch": 0.5527291227560143, "grad_norm": 3.8025522232055664, "learning_rate": 4.998880178651468e-05, "loss": 0.2855, "step": 15210 }, { "epoch": 0.5530925212588124, "grad_norm": 3.816631555557251, "learning_rate": 4.998864594474503e-05, "loss": 0.1559, "step": 15220 }, { "epoch": 0.5534559197616106, "grad_norm": 3.2255067825317383, "learning_rate": 4.998848902630353e-05, "loss": 0.1632, "step": 15230 }, { "epoch": 0.5538193182644088, "grad_norm": 1.077268123626709, "learning_rate": 4.9988331031196944e-05, "loss": 0.1969, "step": 15240 }, { "epoch": 0.5541827167672069, "grad_norm": 5.657801151275635, "learning_rate": 4.998817195943209e-05, "loss": 0.2361, "step": 15250 }, { "epoch": 0.5545461152700051, "grad_norm": 1.180039882659912, "learning_rate": 4.998801181101581e-05, "loss": 0.1779, "step": 15260 }, { "epoch": 0.5549095137728033, "grad_norm": 2.12725830078125, "learning_rate": 4.998785058595501e-05, "loss": 0.1505, "step": 15270 }, { "epoch": 0.5552729122756014, "grad_norm": 2.0784361362457275, "learning_rate": 4.998768828425664e-05, "loss": 0.2221, "step": 15280 }, { "epoch": 0.5556363107783996, "grad_norm": 2.0133538246154785, "learning_rate": 4.998752490592768e-05, "loss": 0.1759, "step": 15290 }, { "epoch": 0.5559997092811978, "grad_norm": 3.3181140422821045, "learning_rate": 4.998736045097518e-05, "loss": 0.229, "step": 15300 }, { "epoch": 0.5563631077839959, "grad_norm": 1.2881536483764648, "learning_rate": 4.998719491940622e-05, "loss": 0.1928, "step": 15310 }, { "epoch": 0.5567265062867941, "grad_norm": 1.0155376195907593, "learning_rate": 4.998702831122794e-05, "loss": 0.1986, "step": 15320 }, { "epoch": 0.5570899047895923, "grad_norm": 7.5557661056518555, "learning_rate": 4.998686062644752e-05, "loss": 0.2317, "step": 15330 }, { "epoch": 0.5574533032923904, "grad_norm": 2.3196377754211426, "learning_rate": 4.9986691865072176e-05, "loss": 0.1827, "step": 15340 }, { "epoch": 0.5578167017951886, "grad_norm": 25.910188674926758, "learning_rate": 4.998652202710918e-05, "loss": 0.2824, "step": 15350 }, { "epoch": 0.5581801002979868, "grad_norm": 1.0091907978057861, "learning_rate": 4.9986351112565846e-05, "loss": 0.1946, "step": 15360 }, { "epoch": 0.5585434988007849, "grad_norm": 3.0022408962249756, "learning_rate": 4.998617912144956e-05, "loss": 0.2028, "step": 15370 }, { "epoch": 0.5589068973035831, "grad_norm": 2.9837419986724854, "learning_rate": 4.99860060537677e-05, "loss": 0.203, "step": 15380 }, { "epoch": 0.5592702958063813, "grad_norm": 2.238867998123169, "learning_rate": 4.9985831909527746e-05, "loss": 0.1392, "step": 15390 }, { "epoch": 0.5596336943091794, "grad_norm": 3.8585119247436523, "learning_rate": 4.9985656688737205e-05, "loss": 0.2289, "step": 15400 }, { "epoch": 0.5599970928119776, "grad_norm": 2.4951331615448, "learning_rate": 4.998548039140361e-05, "loss": 0.1852, "step": 15410 }, { "epoch": 0.5603604913147758, "grad_norm": 1.8404667377471924, "learning_rate": 4.998530301753455e-05, "loss": 0.1813, "step": 15420 }, { "epoch": 0.5607238898175739, "grad_norm": 2.615247964859009, "learning_rate": 4.9985124567137695e-05, "loss": 0.228, "step": 15430 }, { "epoch": 0.5610872883203721, "grad_norm": 1.2074272632598877, "learning_rate": 4.9984945040220715e-05, "loss": 0.1879, "step": 15440 }, { "epoch": 0.5614506868231703, "grad_norm": 38.466712951660156, "learning_rate": 4.9984764436791355e-05, "loss": 0.3965, "step": 15450 }, { "epoch": 0.5618140853259684, "grad_norm": 1.4196547269821167, "learning_rate": 4.998458275685739e-05, "loss": 0.2061, "step": 15460 }, { "epoch": 0.5621774838287666, "grad_norm": 1.2451281547546387, "learning_rate": 4.998440000042664e-05, "loss": 0.2118, "step": 15470 }, { "epoch": 0.5625408823315647, "grad_norm": 3.7021896839141846, "learning_rate": 4.9984216167507005e-05, "loss": 0.2294, "step": 15480 }, { "epoch": 0.562904280834363, "grad_norm": 2.8826780319213867, "learning_rate": 4.998403125810638e-05, "loss": 0.1654, "step": 15490 }, { "epoch": 0.5632676793371612, "grad_norm": 8.366926193237305, "learning_rate": 4.998384527223274e-05, "loss": 0.2467, "step": 15500 }, { "epoch": 0.5636310778399592, "grad_norm": 2.2532148361206055, "learning_rate": 4.99836582098941e-05, "loss": 0.2569, "step": 15510 }, { "epoch": 0.5639944763427575, "grad_norm": 2.164987325668335, "learning_rate": 4.998347007109853e-05, "loss": 0.2167, "step": 15520 }, { "epoch": 0.5643578748455557, "grad_norm": 4.651108264923096, "learning_rate": 4.998328085585411e-05, "loss": 0.2138, "step": 15530 }, { "epoch": 0.5647212733483538, "grad_norm": 1.5128902196884155, "learning_rate": 4.9983090564169024e-05, "loss": 0.1821, "step": 15540 }, { "epoch": 0.565084671851152, "grad_norm": 8.516124725341797, "learning_rate": 4.998289919605145e-05, "loss": 0.2546, "step": 15550 }, { "epoch": 0.5654480703539502, "grad_norm": 1.6480666399002075, "learning_rate": 4.9982706751509635e-05, "loss": 0.2069, "step": 15560 }, { "epoch": 0.5658114688567483, "grad_norm": 1.3768938779830933, "learning_rate": 4.998251323055187e-05, "loss": 0.1775, "step": 15570 }, { "epoch": 0.5661748673595465, "grad_norm": 1.8793795108795166, "learning_rate": 4.998231863318651e-05, "loss": 0.14, "step": 15580 }, { "epoch": 0.5665382658623447, "grad_norm": 1.2361701726913452, "learning_rate": 4.9982122959421924e-05, "loss": 0.1797, "step": 15590 }, { "epoch": 0.5669016643651428, "grad_norm": 14.16727352142334, "learning_rate": 4.998192620926655e-05, "loss": 0.3523, "step": 15600 }, { "epoch": 0.5669016643651428, "eval_loss": 0.40661031007766724, "eval_runtime": 180.2598, "eval_samples_per_second": 41.13, "eval_steps_per_second": 5.143, "eval_wer": 0.21753771307204967, "step": 15600 }, { "epoch": 0.567265062867941, "grad_norm": 2.460245370864868, "learning_rate": 4.9981728382728855e-05, "loss": 0.1824, "step": 15610 }, { "epoch": 0.5676284613707392, "grad_norm": 1.603381633758545, "learning_rate": 4.9981529479817366e-05, "loss": 0.1506, "step": 15620 }, { "epoch": 0.5679918598735373, "grad_norm": 3.650087356567383, "learning_rate": 4.9981329500540664e-05, "loss": 0.2351, "step": 15630 }, { "epoch": 0.5683552583763355, "grad_norm": 2.2338075637817383, "learning_rate": 4.9981128444907354e-05, "loss": 0.1785, "step": 15640 }, { "epoch": 0.5687186568791337, "grad_norm": 7.641642093658447, "learning_rate": 4.998092631292611e-05, "loss": 0.2816, "step": 15650 }, { "epoch": 0.5690820553819318, "grad_norm": 1.5877048969268799, "learning_rate": 4.998072310460562e-05, "loss": 0.1784, "step": 15660 }, { "epoch": 0.56944545388473, "grad_norm": 3.5917787551879883, "learning_rate": 4.998051881995466e-05, "loss": 0.1685, "step": 15670 }, { "epoch": 0.5698088523875282, "grad_norm": 6.459184169769287, "learning_rate": 4.998031345898203e-05, "loss": 0.2031, "step": 15680 }, { "epoch": 0.5701722508903263, "grad_norm": 2.7518184185028076, "learning_rate": 4.9980107021696565e-05, "loss": 0.168, "step": 15690 }, { "epoch": 0.5705356493931245, "grad_norm": 9.814598083496094, "learning_rate": 4.997989950810718e-05, "loss": 0.2778, "step": 15700 }, { "epoch": 0.5708990478959227, "grad_norm": 2.0985398292541504, "learning_rate": 4.9979690918222785e-05, "loss": 0.1864, "step": 15710 }, { "epoch": 0.5712624463987208, "grad_norm": 1.9264591932296753, "learning_rate": 4.997948125205241e-05, "loss": 0.1682, "step": 15720 }, { "epoch": 0.571625844901519, "grad_norm": 4.2961955070495605, "learning_rate": 4.997927050960505e-05, "loss": 0.198, "step": 15730 }, { "epoch": 0.5719892434043172, "grad_norm": 4.524483680725098, "learning_rate": 4.99790586908898e-05, "loss": 0.3235, "step": 15740 }, { "epoch": 0.5723526419071153, "grad_norm": 5.259559154510498, "learning_rate": 4.997884579591578e-05, "loss": 0.335, "step": 15750 }, { "epoch": 0.5727160404099135, "grad_norm": 1.7875639200210571, "learning_rate": 4.997863182469219e-05, "loss": 0.1674, "step": 15760 }, { "epoch": 0.5730794389127116, "grad_norm": 1.1852960586547852, "learning_rate": 4.9978416777228216e-05, "loss": 0.1968, "step": 15770 }, { "epoch": 0.5734428374155098, "grad_norm": 1.253061294555664, "learning_rate": 4.997820065353314e-05, "loss": 0.2177, "step": 15780 }, { "epoch": 0.573806235918308, "grad_norm": 2.0577871799468994, "learning_rate": 4.9977983453616266e-05, "loss": 0.1498, "step": 15790 }, { "epoch": 0.5741696344211061, "grad_norm": 7.4168901443481445, "learning_rate": 4.997776517748696e-05, "loss": 0.3137, "step": 15800 }, { "epoch": 0.5745330329239043, "grad_norm": 2.9957845211029053, "learning_rate": 4.9977545825154625e-05, "loss": 0.1819, "step": 15810 }, { "epoch": 0.5748964314267025, "grad_norm": 1.251610517501831, "learning_rate": 4.997732539662871e-05, "loss": 0.1633, "step": 15820 }, { "epoch": 0.5752598299295006, "grad_norm": 3.229581594467163, "learning_rate": 4.997710389191871e-05, "loss": 0.1888, "step": 15830 }, { "epoch": 0.5756232284322989, "grad_norm": 1.2718089818954468, "learning_rate": 4.997688131103417e-05, "loss": 0.1938, "step": 15840 }, { "epoch": 0.5759866269350971, "grad_norm": 4.77078104019165, "learning_rate": 4.9976657653984694e-05, "loss": 0.2311, "step": 15850 }, { "epoch": 0.5763500254378952, "grad_norm": 1.9487907886505127, "learning_rate": 4.9976432920779904e-05, "loss": 0.7679, "step": 15860 }, { "epoch": 0.5767134239406934, "grad_norm": 2.1322100162506104, "learning_rate": 4.997620711142948e-05, "loss": 0.204, "step": 15870 }, { "epoch": 0.5770768224434916, "grad_norm": 3.0756008625030518, "learning_rate": 4.997598022594316e-05, "loss": 0.205, "step": 15880 }, { "epoch": 0.5774402209462897, "grad_norm": 2.2399511337280273, "learning_rate": 4.997575226433071e-05, "loss": 0.183, "step": 15890 }, { "epoch": 0.5778036194490879, "grad_norm": 4.17095947265625, "learning_rate": 4.997552322660197e-05, "loss": 0.2209, "step": 15900 }, { "epoch": 0.5781670179518861, "grad_norm": 1.7085528373718262, "learning_rate": 4.9975293112766794e-05, "loss": 0.4196, "step": 15910 }, { "epoch": 0.5785304164546842, "grad_norm": 1.6818984746932983, "learning_rate": 4.99750619228351e-05, "loss": 0.1592, "step": 15920 }, { "epoch": 0.5788938149574824, "grad_norm": 2.526503324508667, "learning_rate": 4.9974829656816846e-05, "loss": 0.7523, "step": 15930 }, { "epoch": 0.5792572134602806, "grad_norm": 4.456855297088623, "learning_rate": 4.997459631472205e-05, "loss": 0.1664, "step": 15940 }, { "epoch": 0.5796206119630787, "grad_norm": 28.427839279174805, "learning_rate": 4.9974361896560746e-05, "loss": 0.2891, "step": 15950 }, { "epoch": 0.5799840104658769, "grad_norm": 1.6720882654190063, "learning_rate": 4.997412640234306e-05, "loss": 0.1522, "step": 15960 }, { "epoch": 0.5803474089686751, "grad_norm": 1.6327390670776367, "learning_rate": 4.997388983207911e-05, "loss": 0.1957, "step": 15970 }, { "epoch": 0.5807108074714732, "grad_norm": 1.5792416334152222, "learning_rate": 4.997365218577912e-05, "loss": 0.2325, "step": 15980 }, { "epoch": 0.5810742059742714, "grad_norm": 1.7585738897323608, "learning_rate": 4.9973413463453305e-05, "loss": 0.2023, "step": 15990 }, { "epoch": 0.5814376044770696, "grad_norm": 8.14810562133789, "learning_rate": 4.997317366511196e-05, "loss": 0.2093, "step": 16000 }, { "epoch": 0.5818010029798677, "grad_norm": 3.1430416107177734, "learning_rate": 4.997293279076543e-05, "loss": 0.1742, "step": 16010 }, { "epoch": 0.5821644014826659, "grad_norm": 1.9447312355041504, "learning_rate": 4.997269084042406e-05, "loss": 0.1852, "step": 16020 }, { "epoch": 0.5825277999854641, "grad_norm": 2.1479732990264893, "learning_rate": 4.997244781409831e-05, "loss": 0.2197, "step": 16030 }, { "epoch": 0.5828911984882622, "grad_norm": 3.7066800594329834, "learning_rate": 4.9972203711798625e-05, "loss": 0.1899, "step": 16040 }, { "epoch": 0.5832545969910604, "grad_norm": 4.3598432540893555, "learning_rate": 4.9971958533535544e-05, "loss": 0.237, "step": 16050 }, { "epoch": 0.5836179954938585, "grad_norm": 3.410356283187866, "learning_rate": 4.997171227931962e-05, "loss": 0.1985, "step": 16060 }, { "epoch": 0.5839813939966567, "grad_norm": 1.6299129724502563, "learning_rate": 4.9971464949161454e-05, "loss": 0.1758, "step": 16070 }, { "epoch": 0.5843447924994549, "grad_norm": 1.993067979812622, "learning_rate": 4.9971216543071716e-05, "loss": 0.1822, "step": 16080 }, { "epoch": 0.584708191002253, "grad_norm": 1.2057979106903076, "learning_rate": 4.9970967061061104e-05, "loss": 0.1953, "step": 16090 }, { "epoch": 0.5850715895050512, "grad_norm": 34.54500961303711, "learning_rate": 4.997071650314037e-05, "loss": 0.277, "step": 16100 }, { "epoch": 0.5854349880078494, "grad_norm": 1.243656039237976, "learning_rate": 4.997046486932031e-05, "loss": 0.172, "step": 16110 }, { "epoch": 0.5857983865106475, "grad_norm": 0.6155187487602234, "learning_rate": 4.997021215961176e-05, "loss": 0.1523, "step": 16120 }, { "epoch": 0.5861617850134457, "grad_norm": 2.0203208923339844, "learning_rate": 4.9969958374025615e-05, "loss": 0.1857, "step": 16130 }, { "epoch": 0.5865251835162439, "grad_norm": 1.1912654638290405, "learning_rate": 4.9969703512572805e-05, "loss": 0.2169, "step": 16140 }, { "epoch": 0.586888582019042, "grad_norm": 3.528538227081299, "learning_rate": 4.9969447575264315e-05, "loss": 0.2428, "step": 16150 }, { "epoch": 0.5872519805218402, "grad_norm": 0.9166990518569946, "learning_rate": 4.996919056211117e-05, "loss": 0.2206, "step": 16160 }, { "epoch": 0.5876153790246385, "grad_norm": 1.4956426620483398, "learning_rate": 4.996893247312444e-05, "loss": 0.168, "step": 16170 }, { "epoch": 0.5879787775274365, "grad_norm": 1.4502993822097778, "learning_rate": 4.996867330831526e-05, "loss": 0.1767, "step": 16180 }, { "epoch": 0.5883421760302348, "grad_norm": 0.9337482452392578, "learning_rate": 4.9968413067694775e-05, "loss": 0.2035, "step": 16190 }, { "epoch": 0.588705574533033, "grad_norm": 13.500269889831543, "learning_rate": 4.996815175127422e-05, "loss": 0.2335, "step": 16200 }, { "epoch": 0.588705574533033, "eval_loss": 0.4428017735481262, "eval_runtime": 180.2159, "eval_samples_per_second": 41.14, "eval_steps_per_second": 5.144, "eval_wer": 0.22281118957285748, "step": 16200 }, { "epoch": 0.5890689730358311, "grad_norm": 5.091770648956299, "learning_rate": 4.996788935906483e-05, "loss": 0.1884, "step": 16210 }, { "epoch": 0.5894323715386293, "grad_norm": 2.379033327102661, "learning_rate": 4.996762589107793e-05, "loss": 0.196, "step": 16220 }, { "epoch": 0.5897957700414275, "grad_norm": 2.576484203338623, "learning_rate": 4.996736134732487e-05, "loss": 0.2069, "step": 16230 }, { "epoch": 0.5901591685442256, "grad_norm": 1.4235923290252686, "learning_rate": 4.9967095727817035e-05, "loss": 0.1905, "step": 16240 }, { "epoch": 0.5905225670470238, "grad_norm": 7.119918346405029, "learning_rate": 4.9966829032565886e-05, "loss": 0.2803, "step": 16250 }, { "epoch": 0.590885965549822, "grad_norm": 1.1050286293029785, "learning_rate": 4.99665612615829e-05, "loss": 0.1914, "step": 16260 }, { "epoch": 0.5912493640526201, "grad_norm": 1.403601884841919, "learning_rate": 4.9966292414879625e-05, "loss": 0.1649, "step": 16270 }, { "epoch": 0.5916127625554183, "grad_norm": 5.439052104949951, "learning_rate": 4.9966022492467635e-05, "loss": 0.1897, "step": 16280 }, { "epoch": 0.5919761610582165, "grad_norm": 1.0014379024505615, "learning_rate": 4.996575149435857e-05, "loss": 0.1472, "step": 16290 }, { "epoch": 0.5923395595610146, "grad_norm": 9.480517387390137, "learning_rate": 4.99654794205641e-05, "loss": 0.2351, "step": 16300 }, { "epoch": 0.5927029580638128, "grad_norm": 0.8406987190246582, "learning_rate": 4.9965206271095955e-05, "loss": 0.1795, "step": 16310 }, { "epoch": 0.593066356566611, "grad_norm": 1.378169298171997, "learning_rate": 4.996493204596589e-05, "loss": 0.1597, "step": 16320 }, { "epoch": 0.5934297550694091, "grad_norm": 3.9748549461364746, "learning_rate": 4.996465674518573e-05, "loss": 0.2264, "step": 16330 }, { "epoch": 0.5937931535722073, "grad_norm": 2.2626171112060547, "learning_rate": 4.996438036876734e-05, "loss": 0.1647, "step": 16340 }, { "epoch": 0.5941565520750055, "grad_norm": 3.8039205074310303, "learning_rate": 4.996410291672262e-05, "loss": 0.2204, "step": 16350 }, { "epoch": 0.5945199505778036, "grad_norm": 1.5219416618347168, "learning_rate": 4.996382438906353e-05, "loss": 0.1518, "step": 16360 }, { "epoch": 0.5948833490806018, "grad_norm": 1.4811570644378662, "learning_rate": 4.9963544785802064e-05, "loss": 0.2006, "step": 16370 }, { "epoch": 0.5952467475833999, "grad_norm": 4.7030558586120605, "learning_rate": 4.996326410695028e-05, "loss": 0.2524, "step": 16380 }, { "epoch": 0.5956101460861981, "grad_norm": 1.103624939918518, "learning_rate": 4.996298235252026e-05, "loss": 0.1558, "step": 16390 }, { "epoch": 0.5959735445889963, "grad_norm": 4.654818534851074, "learning_rate": 4.996269952252415e-05, "loss": 0.2746, "step": 16400 }, { "epoch": 0.5963369430917944, "grad_norm": 1.6746747493743896, "learning_rate": 4.996241561697413e-05, "loss": 0.1838, "step": 16410 }, { "epoch": 0.5967003415945926, "grad_norm": 3.1955924034118652, "learning_rate": 4.996213063588245e-05, "loss": 0.1773, "step": 16420 }, { "epoch": 0.5970637400973908, "grad_norm": 1.782669186592102, "learning_rate": 4.996184457926137e-05, "loss": 0.1939, "step": 16430 }, { "epoch": 0.5974271386001889, "grad_norm": 1.2277849912643433, "learning_rate": 4.996155744712322e-05, "loss": 0.1724, "step": 16440 }, { "epoch": 0.5977905371029871, "grad_norm": 25.578798294067383, "learning_rate": 4.996126923948038e-05, "loss": 0.2612, "step": 16450 }, { "epoch": 0.5981539356057853, "grad_norm": 0.984426736831665, "learning_rate": 4.9960979956345254e-05, "loss": 0.1621, "step": 16460 }, { "epoch": 0.5985173341085834, "grad_norm": 2.1299145221710205, "learning_rate": 4.9960689597730315e-05, "loss": 0.161, "step": 16470 }, { "epoch": 0.5988807326113816, "grad_norm": 2.6153085231781006, "learning_rate": 4.996039816364807e-05, "loss": 0.2122, "step": 16480 }, { "epoch": 0.5992441311141798, "grad_norm": 4.464552879333496, "learning_rate": 4.996010565411108e-05, "loss": 0.2417, "step": 16490 }, { "epoch": 0.5996075296169779, "grad_norm": 26.441349029541016, "learning_rate": 4.995981206913194e-05, "loss": 0.3103, "step": 16500 }, { "epoch": 0.5999709281197761, "grad_norm": 2.353302478790283, "learning_rate": 4.995951740872331e-05, "loss": 1.0256, "step": 16510 }, { "epoch": 0.6003343266225744, "grad_norm": 0.8436356782913208, "learning_rate": 4.995922167289788e-05, "loss": 0.1563, "step": 16520 }, { "epoch": 0.6006977251253725, "grad_norm": 3.3516342639923096, "learning_rate": 4.99589248616684e-05, "loss": 0.2441, "step": 16530 }, { "epoch": 0.6010611236281707, "grad_norm": 2.0286059379577637, "learning_rate": 4.995862697504764e-05, "loss": 0.1767, "step": 16540 }, { "epoch": 0.6014245221309689, "grad_norm": 18.248151779174805, "learning_rate": 4.9958328013048464e-05, "loss": 0.3522, "step": 16550 }, { "epoch": 0.601787920633767, "grad_norm": 2.1514463424682617, "learning_rate": 4.995802797568372e-05, "loss": 0.1771, "step": 16560 }, { "epoch": 0.6021513191365652, "grad_norm": 5.868020534515381, "learning_rate": 4.995772686296635e-05, "loss": 0.1776, "step": 16570 }, { "epoch": 0.6025147176393634, "grad_norm": 4.539637565612793, "learning_rate": 4.9957424674909336e-05, "loss": 0.2002, "step": 16580 }, { "epoch": 0.6028781161421615, "grad_norm": 1.7226190567016602, "learning_rate": 4.99571214115257e-05, "loss": 0.1927, "step": 16590 }, { "epoch": 0.6032415146449597, "grad_norm": 22.087247848510742, "learning_rate": 4.9956817072828485e-05, "loss": 0.249, "step": 16600 }, { "epoch": 0.6036049131477579, "grad_norm": 2.4267120361328125, "learning_rate": 4.995651165883083e-05, "loss": 0.1935, "step": 16610 }, { "epoch": 0.603968311650556, "grad_norm": 2.5284249782562256, "learning_rate": 4.995620516954588e-05, "loss": 0.1495, "step": 16620 }, { "epoch": 0.6043317101533542, "grad_norm": 1.5988596677780151, "learning_rate": 4.995589760498684e-05, "loss": 0.2329, "step": 16630 }, { "epoch": 0.6046951086561524, "grad_norm": 1.0771689414978027, "learning_rate": 4.9955588965166966e-05, "loss": 0.1634, "step": 16640 }, { "epoch": 0.6050585071589505, "grad_norm": 8.72423267364502, "learning_rate": 4.995527925009956e-05, "loss": 0.27, "step": 16650 }, { "epoch": 0.6054219056617487, "grad_norm": 1.3176789283752441, "learning_rate": 4.9954968459797955e-05, "loss": 1.1913, "step": 16660 }, { "epoch": 0.6057853041645468, "grad_norm": 1.8307547569274902, "learning_rate": 4.9954656594275555e-05, "loss": 0.188, "step": 16670 }, { "epoch": 0.606148702667345, "grad_norm": 2.783604621887207, "learning_rate": 4.9954343653545795e-05, "loss": 0.1791, "step": 16680 }, { "epoch": 0.6065121011701432, "grad_norm": 1.6639970541000366, "learning_rate": 4.9954029637622146e-05, "loss": 0.1829, "step": 16690 }, { "epoch": 0.6068754996729413, "grad_norm": 11.055110931396484, "learning_rate": 4.995371454651815e-05, "loss": 0.2229, "step": 16700 }, { "epoch": 0.6072388981757395, "grad_norm": 1.8166972398757935, "learning_rate": 4.9953398380247384e-05, "loss": 0.1734, "step": 16710 }, { "epoch": 0.6076022966785377, "grad_norm": 4.851889610290527, "learning_rate": 4.995308113882346e-05, "loss": 0.1716, "step": 16720 }, { "epoch": 0.6079656951813358, "grad_norm": 3.0047857761383057, "learning_rate": 4.9952762822260056e-05, "loss": 0.2125, "step": 16730 }, { "epoch": 0.608329093684134, "grad_norm": 1.1506407260894775, "learning_rate": 4.9952443430570887e-05, "loss": 0.171, "step": 16740 }, { "epoch": 0.6086924921869322, "grad_norm": 4.324979782104492, "learning_rate": 4.995212296376971e-05, "loss": 0.2365, "step": 16750 }, { "epoch": 0.6090558906897303, "grad_norm": 1.2295490503311157, "learning_rate": 4.995180142187033e-05, "loss": 0.2002, "step": 16760 }, { "epoch": 0.6094192891925285, "grad_norm": 1.454434871673584, "learning_rate": 4.995147880488661e-05, "loss": 0.1602, "step": 16770 }, { "epoch": 0.6097826876953267, "grad_norm": 2.6185641288757324, "learning_rate": 4.995115511283244e-05, "loss": 0.1904, "step": 16780 }, { "epoch": 0.6101460861981248, "grad_norm": 1.2603826522827148, "learning_rate": 4.9950830345721774e-05, "loss": 0.1892, "step": 16790 }, { "epoch": 0.610509484700923, "grad_norm": 14.189190864562988, "learning_rate": 4.9950504503568615e-05, "loss": 0.281, "step": 16800 }, { "epoch": 0.610509484700923, "eval_loss": 0.4123116433620453, "eval_runtime": 179.5081, "eval_samples_per_second": 41.302, "eval_steps_per_second": 5.164, "eval_wer": 0.22986367019441972, "step": 16800 }, { "epoch": 0.6108728832037212, "grad_norm": 1.7670204639434814, "learning_rate": 4.995017758638698e-05, "loss": 0.2581, "step": 16810 }, { "epoch": 0.6112362817065193, "grad_norm": 1.2099360227584839, "learning_rate": 4.9949849594190964e-05, "loss": 0.1762, "step": 16820 }, { "epoch": 0.6115996802093175, "grad_norm": 2.7719335556030273, "learning_rate": 4.9949520526994716e-05, "loss": 0.1867, "step": 16830 }, { "epoch": 0.6119630787121157, "grad_norm": 1.5935924053192139, "learning_rate": 4.9949190384812386e-05, "loss": 0.191, "step": 16840 }, { "epoch": 0.6123264772149138, "grad_norm": 3.511439085006714, "learning_rate": 4.994885916765821e-05, "loss": 0.2132, "step": 16850 }, { "epoch": 0.612689875717712, "grad_norm": 1.693789005279541, "learning_rate": 4.994852687554647e-05, "loss": 0.167, "step": 16860 }, { "epoch": 0.6130532742205103, "grad_norm": 2.1199066638946533, "learning_rate": 4.994819350849147e-05, "loss": 0.172, "step": 16870 }, { "epoch": 0.6134166727233084, "grad_norm": 2.724487543106079, "learning_rate": 4.9947859066507575e-05, "loss": 0.2083, "step": 16880 }, { "epoch": 0.6137800712261066, "grad_norm": 0.926547110080719, "learning_rate": 4.99475235496092e-05, "loss": 0.1517, "step": 16890 }, { "epoch": 0.6141434697289048, "grad_norm": 14.503059387207031, "learning_rate": 4.99471869578108e-05, "loss": 0.1945, "step": 16900 }, { "epoch": 0.6145068682317029, "grad_norm": 3.2206919193267822, "learning_rate": 4.994684929112687e-05, "loss": 0.1882, "step": 16910 }, { "epoch": 0.6148702667345011, "grad_norm": 2.004995107650757, "learning_rate": 4.994651054957198e-05, "loss": 0.1876, "step": 16920 }, { "epoch": 0.6152336652372993, "grad_norm": 2.0580127239227295, "learning_rate": 4.99461707331607e-05, "loss": 0.2104, "step": 16930 }, { "epoch": 0.6155970637400974, "grad_norm": 3.3028602600097656, "learning_rate": 4.9945829841907684e-05, "loss": 0.1494, "step": 16940 }, { "epoch": 0.6159604622428956, "grad_norm": 7.572249412536621, "learning_rate": 4.994548787582761e-05, "loss": 0.2381, "step": 16950 }, { "epoch": 0.6163238607456937, "grad_norm": 1.4220709800720215, "learning_rate": 4.9945144834935234e-05, "loss": 0.1916, "step": 16960 }, { "epoch": 0.6166872592484919, "grad_norm": 1.2397724390029907, "learning_rate": 4.994480071924531e-05, "loss": 0.1593, "step": 16970 }, { "epoch": 0.6170506577512901, "grad_norm": 2.2569403648376465, "learning_rate": 4.9944455528772684e-05, "loss": 0.1984, "step": 16980 }, { "epoch": 0.6174140562540882, "grad_norm": 1.811727523803711, "learning_rate": 4.994410926353221e-05, "loss": 0.1838, "step": 16990 }, { "epoch": 0.6177774547568864, "grad_norm": 2.783061981201172, "learning_rate": 4.9943761923538834e-05, "loss": 0.2217, "step": 17000 }, { "epoch": 0.6181408532596846, "grad_norm": 2.816331148147583, "learning_rate": 4.99434135088075e-05, "loss": 0.1911, "step": 17010 }, { "epoch": 0.6185042517624827, "grad_norm": 1.238916039466858, "learning_rate": 4.9943064019353234e-05, "loss": 0.1854, "step": 17020 }, { "epoch": 0.6188676502652809, "grad_norm": 5.16685152053833, "learning_rate": 4.9942713455191075e-05, "loss": 0.1797, "step": 17030 }, { "epoch": 0.6192310487680791, "grad_norm": 2.486461639404297, "learning_rate": 4.9942361816336146e-05, "loss": 0.1926, "step": 17040 }, { "epoch": 0.6195944472708772, "grad_norm": 9.018515586853027, "learning_rate": 4.994200910280359e-05, "loss": 0.2193, "step": 17050 }, { "epoch": 0.6199578457736754, "grad_norm": 1.804166555404663, "learning_rate": 4.994165531460861e-05, "loss": 0.1977, "step": 17060 }, { "epoch": 0.6203212442764736, "grad_norm": 1.2862845659255981, "learning_rate": 4.994130045176644e-05, "loss": 0.1493, "step": 17070 }, { "epoch": 0.6206846427792717, "grad_norm": 4.164750576019287, "learning_rate": 4.994094451429237e-05, "loss": 0.2548, "step": 17080 }, { "epoch": 0.6210480412820699, "grad_norm": 1.577255368232727, "learning_rate": 4.994058750220176e-05, "loss": 0.1703, "step": 17090 }, { "epoch": 0.6214114397848681, "grad_norm": 5.805021286010742, "learning_rate": 4.994022941550996e-05, "loss": 0.2976, "step": 17100 }, { "epoch": 0.6217748382876662, "grad_norm": 0.9706230163574219, "learning_rate": 4.993987025423241e-05, "loss": 0.1454, "step": 17110 }, { "epoch": 0.6221382367904644, "grad_norm": 1.4393014907836914, "learning_rate": 4.993951001838459e-05, "loss": 0.1496, "step": 17120 }, { "epoch": 0.6225016352932626, "grad_norm": 1.839086651802063, "learning_rate": 4.993914870798202e-05, "loss": 0.2256, "step": 17130 }, { "epoch": 0.6228650337960607, "grad_norm": 1.8924603462219238, "learning_rate": 4.993878632304027e-05, "loss": 0.1415, "step": 17140 }, { "epoch": 0.6232284322988589, "grad_norm": 12.03149700164795, "learning_rate": 4.993842286357494e-05, "loss": 0.7236, "step": 17150 }, { "epoch": 0.6235918308016571, "grad_norm": 2.0251877307891846, "learning_rate": 4.993805832960171e-05, "loss": 0.1913, "step": 17160 }, { "epoch": 0.6239552293044552, "grad_norm": 2.341251850128174, "learning_rate": 4.993769272113628e-05, "loss": 0.1734, "step": 17170 }, { "epoch": 0.6243186278072534, "grad_norm": 2.517820358276367, "learning_rate": 4.993732603819438e-05, "loss": 0.18, "step": 17180 }, { "epoch": 0.6246820263100517, "grad_norm": 1.6384356021881104, "learning_rate": 4.993695828079184e-05, "loss": 0.1513, "step": 17190 }, { "epoch": 0.6250454248128497, "grad_norm": 10.794693946838379, "learning_rate": 4.993658944894449e-05, "loss": 0.2282, "step": 17200 }, { "epoch": 0.625408823315648, "grad_norm": 1.2552087306976318, "learning_rate": 4.9936219542668236e-05, "loss": 0.1938, "step": 17210 }, { "epoch": 0.6257722218184462, "grad_norm": 2.423431634902954, "learning_rate": 4.993584856197899e-05, "loss": 0.1487, "step": 17220 }, { "epoch": 0.6261356203212443, "grad_norm": 1.7924834489822388, "learning_rate": 4.9935476506892763e-05, "loss": 0.195, "step": 17230 }, { "epoch": 0.6264990188240425, "grad_norm": 1.6521999835968018, "learning_rate": 4.9935103377425566e-05, "loss": 0.1652, "step": 17240 }, { "epoch": 0.6268624173268406, "grad_norm": 6.472127437591553, "learning_rate": 4.9934729173593494e-05, "loss": 0.2481, "step": 17250 }, { "epoch": 0.6272258158296388, "grad_norm": 1.8962410688400269, "learning_rate": 4.993435389541265e-05, "loss": 0.1487, "step": 17260 }, { "epoch": 0.627589214332437, "grad_norm": 1.2054486274719238, "learning_rate": 4.993397754289922e-05, "loss": 0.1496, "step": 17270 }, { "epoch": 0.6279526128352351, "grad_norm": 3.9840786457061768, "learning_rate": 4.993360011606941e-05, "loss": 0.1776, "step": 17280 }, { "epoch": 0.6283160113380333, "grad_norm": 0.9625970125198364, "learning_rate": 4.9933221614939485e-05, "loss": 0.1652, "step": 17290 }, { "epoch": 0.6286794098408315, "grad_norm": 11.166252136230469, "learning_rate": 4.993284203952575e-05, "loss": 0.233, "step": 17300 }, { "epoch": 0.6290428083436296, "grad_norm": 2.356268882751465, "learning_rate": 4.9932461389844566e-05, "loss": 0.1498, "step": 17310 }, { "epoch": 0.6294062068464278, "grad_norm": 0.9366337656974792, "learning_rate": 4.993207966591234e-05, "loss": 0.1483, "step": 17320 }, { "epoch": 0.629769605349226, "grad_norm": 5.854847431182861, "learning_rate": 4.9931696867745495e-05, "loss": 0.1603, "step": 17330 }, { "epoch": 0.6301330038520241, "grad_norm": 1.0090773105621338, "learning_rate": 4.9931312995360546e-05, "loss": 0.1475, "step": 17340 }, { "epoch": 0.6304964023548223, "grad_norm": 3.896676540374756, "learning_rate": 4.9930928048774024e-05, "loss": 0.244, "step": 17350 }, { "epoch": 0.6308598008576205, "grad_norm": 1.1872800588607788, "learning_rate": 4.993054202800252e-05, "loss": 0.1618, "step": 17360 }, { "epoch": 0.6312231993604186, "grad_norm": 1.8078994750976562, "learning_rate": 4.9930154933062654e-05, "loss": 0.1554, "step": 17370 }, { "epoch": 0.6315865978632168, "grad_norm": 1.8264563083648682, "learning_rate": 4.9929766763971126e-05, "loss": 0.162, "step": 17380 }, { "epoch": 0.631949996366015, "grad_norm": 0.6304519176483154, "learning_rate": 4.992937752074465e-05, "loss": 0.209, "step": 17390 }, { "epoch": 0.6323133948688131, "grad_norm": 4.7621917724609375, "learning_rate": 4.992898720339998e-05, "loss": 0.2393, "step": 17400 }, { "epoch": 0.6323133948688131, "eval_loss": 0.3943130671977997, "eval_runtime": 180.0553, "eval_samples_per_second": 41.176, "eval_steps_per_second": 5.148, "eval_wer": 0.21144734692395664, "step": 17400 }, { "epoch": 0.6326767933716113, "grad_norm": 1.1110138893127441, "learning_rate": 4.992859581195396e-05, "loss": 0.143, "step": 17410 }, { "epoch": 0.6330401918744095, "grad_norm": 1.2453794479370117, "learning_rate": 4.992820334642344e-05, "loss": 0.1454, "step": 17420 }, { "epoch": 0.6334035903772076, "grad_norm": 3.669144630432129, "learning_rate": 4.9927809806825335e-05, "loss": 0.2496, "step": 17430 }, { "epoch": 0.6337669888800058, "grad_norm": 2.7898483276367188, "learning_rate": 4.99274151931766e-05, "loss": 0.1614, "step": 17440 }, { "epoch": 0.634130387382804, "grad_norm": 6.725431442260742, "learning_rate": 4.992701950549423e-05, "loss": 0.2622, "step": 17450 }, { "epoch": 0.6344937858856021, "grad_norm": 1.6481575965881348, "learning_rate": 4.992662274379528e-05, "loss": 0.1713, "step": 17460 }, { "epoch": 0.6348571843884003, "grad_norm": 1.3567384481430054, "learning_rate": 4.9926224908096856e-05, "loss": 0.1725, "step": 17470 }, { "epoch": 0.6352205828911985, "grad_norm": 1.8207722902297974, "learning_rate": 4.9925825998416076e-05, "loss": 0.1973, "step": 17480 }, { "epoch": 0.6355839813939966, "grad_norm": 2.2345893383026123, "learning_rate": 4.9925426014770146e-05, "loss": 0.1847, "step": 17490 }, { "epoch": 0.6359473798967948, "grad_norm": 7.193591594696045, "learning_rate": 4.992502495717629e-05, "loss": 0.2605, "step": 17500 }, { "epoch": 0.636310778399593, "grad_norm": 1.346073865890503, "learning_rate": 4.99246228256518e-05, "loss": 0.1518, "step": 17510 }, { "epoch": 0.6366741769023911, "grad_norm": 1.5637879371643066, "learning_rate": 4.9924219620213995e-05, "loss": 0.1648, "step": 17520 }, { "epoch": 0.6370375754051893, "grad_norm": 3.2450170516967773, "learning_rate": 4.9923815340880236e-05, "loss": 0.1974, "step": 17530 }, { "epoch": 0.6374009739079874, "grad_norm": 0.9553948640823364, "learning_rate": 4.992340998766796e-05, "loss": 0.1694, "step": 17540 }, { "epoch": 0.6377643724107857, "grad_norm": 8.901055335998535, "learning_rate": 4.9923003560594625e-05, "loss": 0.2625, "step": 17550 }, { "epoch": 0.6381277709135839, "grad_norm": 1.7500522136688232, "learning_rate": 4.992259605967774e-05, "loss": 0.1799, "step": 17560 }, { "epoch": 0.638491169416382, "grad_norm": 1.4673160314559937, "learning_rate": 4.9922187484934865e-05, "loss": 0.1698, "step": 17570 }, { "epoch": 0.6388545679191802, "grad_norm": 2.5377135276794434, "learning_rate": 4.992177783638361e-05, "loss": 0.1822, "step": 17580 }, { "epoch": 0.6392179664219784, "grad_norm": 1.660311222076416, "learning_rate": 4.9921367114041625e-05, "loss": 0.1659, "step": 17590 }, { "epoch": 0.6395813649247765, "grad_norm": 8.248649597167969, "learning_rate": 4.9920955317926595e-05, "loss": 0.2384, "step": 17600 }, { "epoch": 0.6399447634275747, "grad_norm": 1.5581409931182861, "learning_rate": 4.992054244805627e-05, "loss": 0.1665, "step": 17610 }, { "epoch": 0.6403081619303729, "grad_norm": 0.9654737710952759, "learning_rate": 4.992012850444844e-05, "loss": 0.3493, "step": 17620 }, { "epoch": 0.640671560433171, "grad_norm": 3.4477317333221436, "learning_rate": 4.9919713487120935e-05, "loss": 0.2097, "step": 17630 }, { "epoch": 0.6410349589359692, "grad_norm": 1.3745356798171997, "learning_rate": 4.9919297396091634e-05, "loss": 0.1459, "step": 17640 }, { "epoch": 0.6413983574387674, "grad_norm": 4.813534259796143, "learning_rate": 4.991888023137849e-05, "loss": 0.1905, "step": 17650 }, { "epoch": 0.6417617559415655, "grad_norm": 3.118452310562134, "learning_rate": 4.9918461992999445e-05, "loss": 0.1527, "step": 17660 }, { "epoch": 0.6421251544443637, "grad_norm": 1.8424941301345825, "learning_rate": 4.991804268097253e-05, "loss": 0.1759, "step": 17670 }, { "epoch": 0.6424885529471619, "grad_norm": 7.301458835601807, "learning_rate": 4.9917622295315826e-05, "loss": 0.1662, "step": 17680 }, { "epoch": 0.64285195144996, "grad_norm": 3.133114814758301, "learning_rate": 4.991720083604743e-05, "loss": 0.1692, "step": 17690 }, { "epoch": 0.6432153499527582, "grad_norm": 11.538620948791504, "learning_rate": 4.99167783031855e-05, "loss": 0.2443, "step": 17700 }, { "epoch": 0.6435787484555564, "grad_norm": 1.3739595413208008, "learning_rate": 4.991635469674825e-05, "loss": 0.1465, "step": 17710 }, { "epoch": 0.6439421469583545, "grad_norm": 1.6855549812316895, "learning_rate": 4.991593001675393e-05, "loss": 0.1819, "step": 17720 }, { "epoch": 0.6443055454611527, "grad_norm": 1.692335844039917, "learning_rate": 4.991550426322083e-05, "loss": 0.1654, "step": 17730 }, { "epoch": 0.6446689439639509, "grad_norm": 1.1132971048355103, "learning_rate": 4.9915077436167313e-05, "loss": 0.1688, "step": 17740 }, { "epoch": 0.645032342466749, "grad_norm": 5.6813201904296875, "learning_rate": 4.9914649535611756e-05, "loss": 0.2235, "step": 17750 }, { "epoch": 0.6453957409695472, "grad_norm": 1.5107471942901611, "learning_rate": 4.99142205615726e-05, "loss": 0.1747, "step": 17760 }, { "epoch": 0.6457591394723454, "grad_norm": 2.4552764892578125, "learning_rate": 4.9913790514068316e-05, "loss": 0.1739, "step": 17770 }, { "epoch": 0.6461225379751435, "grad_norm": 1.5664808750152588, "learning_rate": 4.991335939311744e-05, "loss": 0.1766, "step": 17780 }, { "epoch": 0.6464859364779417, "grad_norm": 2.935850143432617, "learning_rate": 4.9912927198738556e-05, "loss": 0.2148, "step": 17790 }, { "epoch": 0.6468493349807399, "grad_norm": 10.267364501953125, "learning_rate": 4.991249393095028e-05, "loss": 0.2521, "step": 17800 }, { "epoch": 0.647212733483538, "grad_norm": 1.3392564058303833, "learning_rate": 4.9912059589771274e-05, "loss": 0.172, "step": 17810 }, { "epoch": 0.6475761319863362, "grad_norm": 0.895491361618042, "learning_rate": 4.991162417522026e-05, "loss": 0.1379, "step": 17820 }, { "epoch": 0.6479395304891343, "grad_norm": 2.536397695541382, "learning_rate": 4.9911187687315997e-05, "loss": 0.1477, "step": 17830 }, { "epoch": 0.6483029289919325, "grad_norm": 1.7795464992523193, "learning_rate": 4.9910750126077296e-05, "loss": 0.1786, "step": 17840 }, { "epoch": 0.6486663274947307, "grad_norm": 62.683929443359375, "learning_rate": 4.9910311491523e-05, "loss": 0.266, "step": 17850 }, { "epoch": 0.6490297259975288, "grad_norm": 2.0866358280181885, "learning_rate": 4.990987178367201e-05, "loss": 0.1428, "step": 17860 }, { "epoch": 0.649393124500327, "grad_norm": 1.5636661052703857, "learning_rate": 4.990943100254328e-05, "loss": 0.1845, "step": 17870 }, { "epoch": 0.6497565230031253, "grad_norm": 3.540689468383789, "learning_rate": 4.9908989148155796e-05, "loss": 0.2348, "step": 17880 }, { "epoch": 0.6501199215059233, "grad_norm": 1.720421314239502, "learning_rate": 4.990854622052859e-05, "loss": 0.1742, "step": 17890 }, { "epoch": 0.6504833200087216, "grad_norm": 7.7201056480407715, "learning_rate": 4.9908102219680756e-05, "loss": 0.2573, "step": 17900 }, { "epoch": 0.6508467185115198, "grad_norm": 3.826190948486328, "learning_rate": 4.9907701701329876e-05, "loss": 3.6024, "step": 17910 }, { "epoch": 0.6512101170143179, "grad_norm": 2.047307252883911, "learning_rate": 4.990725566141558e-05, "loss": 0.1551, "step": 17920 }, { "epoch": 0.6515735155171161, "grad_norm": 6.462743282318115, "learning_rate": 4.990680854833626e-05, "loss": 0.2109, "step": 17930 }, { "epoch": 0.6519369140199143, "grad_norm": 1.7611109018325806, "learning_rate": 4.9906360362111184e-05, "loss": 0.1959, "step": 17940 }, { "epoch": 0.6523003125227124, "grad_norm": 5.253514766693115, "learning_rate": 4.9905911102759655e-05, "loss": 0.2436, "step": 17950 }, { "epoch": 0.6526637110255106, "grad_norm": 0.9357771873474121, "learning_rate": 4.9905460770301035e-05, "loss": 0.1664, "step": 17960 }, { "epoch": 0.6530271095283088, "grad_norm": 1.219488263130188, "learning_rate": 4.990500936475472e-05, "loss": 0.2286, "step": 17970 }, { "epoch": 0.6533905080311069, "grad_norm": 2.8499608039855957, "learning_rate": 4.990455688614016e-05, "loss": 0.2664, "step": 17980 }, { "epoch": 0.6537539065339051, "grad_norm": 1.5652077198028564, "learning_rate": 4.990410333447686e-05, "loss": 0.1341, "step": 17990 }, { "epoch": 0.6541173050367033, "grad_norm": 5.98219633102417, "learning_rate": 4.9903648709784356e-05, "loss": 0.2338, "step": 18000 }, { "epoch": 0.6541173050367033, "eval_loss": 0.37892404198646545, "eval_runtime": 180.0524, "eval_samples_per_second": 41.177, "eval_steps_per_second": 5.149, "eval_wer": 0.200864087715795, "step": 18000 }, { "epoch": 0.6544807035395014, "grad_norm": 2.105100154876709, "learning_rate": 4.990319301208223e-05, "loss": 0.1764, "step": 18010 }, { "epoch": 0.6548441020422996, "grad_norm": 1.0867921113967896, "learning_rate": 4.990273624139013e-05, "loss": 0.1507, "step": 18020 }, { "epoch": 0.6552075005450978, "grad_norm": 2.9895503520965576, "learning_rate": 4.9902278397727734e-05, "loss": 0.1479, "step": 18030 }, { "epoch": 0.6555708990478959, "grad_norm": 0.9947407841682434, "learning_rate": 4.990181948111475e-05, "loss": 0.1558, "step": 18040 }, { "epoch": 0.6559342975506941, "grad_norm": 7.774895191192627, "learning_rate": 4.9901359491570974e-05, "loss": 0.2202, "step": 18050 }, { "epoch": 0.6562976960534923, "grad_norm": 1.8466017246246338, "learning_rate": 4.990089842911622e-05, "loss": 0.1929, "step": 18060 }, { "epoch": 0.6566610945562904, "grad_norm": 0.8435410261154175, "learning_rate": 4.9900436293770345e-05, "loss": 0.1377, "step": 18070 }, { "epoch": 0.6570244930590886, "grad_norm": 3.10648512840271, "learning_rate": 4.989997308555326e-05, "loss": 0.202, "step": 18080 }, { "epoch": 0.6573878915618868, "grad_norm": 1.112806797027588, "learning_rate": 4.989950880448494e-05, "loss": 0.1486, "step": 18090 }, { "epoch": 0.6577512900646849, "grad_norm": 18.821117401123047, "learning_rate": 4.989904345058538e-05, "loss": 0.2677, "step": 18100 }, { "epoch": 0.6581146885674831, "grad_norm": 1.254798412322998, "learning_rate": 4.989857702387463e-05, "loss": 3.5769, "step": 18110 }, { "epoch": 0.6584780870702812, "grad_norm": 0.9956761002540588, "learning_rate": 4.989810952437277e-05, "loss": 0.1958, "step": 18120 }, { "epoch": 0.6588414855730794, "grad_norm": 2.9471828937530518, "learning_rate": 4.9897640952099975e-05, "loss": 0.1988, "step": 18130 }, { "epoch": 0.6592048840758776, "grad_norm": 1.3806344270706177, "learning_rate": 4.989717130707641e-05, "loss": 0.1552, "step": 18140 }, { "epoch": 0.6595682825786757, "grad_norm": 3.0857722759246826, "learning_rate": 4.989670058932231e-05, "loss": 0.2168, "step": 18150 }, { "epoch": 0.6599316810814739, "grad_norm": 1.8781664371490479, "learning_rate": 4.989622879885798e-05, "loss": 0.1571, "step": 18160 }, { "epoch": 0.6602950795842721, "grad_norm": 1.1139156818389893, "learning_rate": 4.9895755935703725e-05, "loss": 0.1365, "step": 18170 }, { "epoch": 0.6606584780870702, "grad_norm": 2.3965742588043213, "learning_rate": 4.9895281999879925e-05, "loss": 0.1879, "step": 18180 }, { "epoch": 0.6610218765898684, "grad_norm": 1.2575726509094238, "learning_rate": 4.9894806991407e-05, "loss": 0.2197, "step": 18190 }, { "epoch": 0.6613852750926666, "grad_norm": 10.392169952392578, "learning_rate": 4.989433091030542e-05, "loss": 0.2318, "step": 18200 }, { "epoch": 0.6617486735954647, "grad_norm": 0.8268498182296753, "learning_rate": 4.98938537565957e-05, "loss": 0.1416, "step": 18210 }, { "epoch": 0.662112072098263, "grad_norm": 0.9257369637489319, "learning_rate": 4.9893375530298384e-05, "loss": 0.1855, "step": 18220 }, { "epoch": 0.6624754706010612, "grad_norm": 1.7720370292663574, "learning_rate": 4.9892896231434094e-05, "loss": 1.0276, "step": 18230 }, { "epoch": 0.6628388691038593, "grad_norm": 2.2012548446655273, "learning_rate": 4.9892415860023476e-05, "loss": 1.1909, "step": 18240 }, { "epoch": 0.6632022676066575, "grad_norm": 9.690247535705566, "learning_rate": 4.9891934416087224e-05, "loss": 0.2603, "step": 18250 }, { "epoch": 0.6635656661094557, "grad_norm": 2.528682231903076, "learning_rate": 4.989145189964608e-05, "loss": 0.1912, "step": 18260 }, { "epoch": 0.6639290646122538, "grad_norm": 1.4666227102279663, "learning_rate": 4.989096831072084e-05, "loss": 0.2316, "step": 18270 }, { "epoch": 0.664292463115052, "grad_norm": 1.463526725769043, "learning_rate": 4.989048364933234e-05, "loss": 0.1388, "step": 18280 }, { "epoch": 0.6646558616178502, "grad_norm": 1.2156569957733154, "learning_rate": 4.988999791550146e-05, "loss": 0.4086, "step": 18290 }, { "epoch": 0.6650192601206483, "grad_norm": 4.909139156341553, "learning_rate": 4.988951110924913e-05, "loss": 0.2631, "step": 18300 }, { "epoch": 0.6653826586234465, "grad_norm": 1.3692512512207031, "learning_rate": 4.988902323059632e-05, "loss": 0.1525, "step": 18310 }, { "epoch": 0.6657460571262447, "grad_norm": 1.153344988822937, "learning_rate": 4.988853427956406e-05, "loss": 0.1904, "step": 18320 }, { "epoch": 0.6661094556290428, "grad_norm": 2.052828073501587, "learning_rate": 4.988804425617341e-05, "loss": 0.1979, "step": 18330 }, { "epoch": 0.666472854131841, "grad_norm": 1.373213768005371, "learning_rate": 4.988755316044548e-05, "loss": 0.1836, "step": 18340 }, { "epoch": 0.6668362526346392, "grad_norm": 24.185970306396484, "learning_rate": 4.9887060992401436e-05, "loss": 0.2546, "step": 18350 }, { "epoch": 0.6671996511374373, "grad_norm": 1.702205777168274, "learning_rate": 4.988656775206248e-05, "loss": 0.1433, "step": 18360 }, { "epoch": 0.6675630496402355, "grad_norm": 2.279100179672241, "learning_rate": 4.9886073439449864e-05, "loss": 0.1671, "step": 18370 }, { "epoch": 0.6679264481430337, "grad_norm": 3.928740978240967, "learning_rate": 4.98855780545849e-05, "loss": 0.1506, "step": 18380 }, { "epoch": 0.6682898466458318, "grad_norm": 2.2895402908325195, "learning_rate": 4.988508159748891e-05, "loss": 0.1523, "step": 18390 }, { "epoch": 0.66865324514863, "grad_norm": 10.151689529418945, "learning_rate": 4.98845840681833e-05, "loss": 0.2284, "step": 18400 }, { "epoch": 0.6690166436514282, "grad_norm": 1.268561840057373, "learning_rate": 4.9884085466689504e-05, "loss": 0.171, "step": 18410 }, { "epoch": 0.6693800421542263, "grad_norm": 1.0731265544891357, "learning_rate": 4.9883585793029e-05, "loss": 0.7778, "step": 18420 }, { "epoch": 0.6697434406570245, "grad_norm": 1.0762509107589722, "learning_rate": 4.988308504722332e-05, "loss": 0.1966, "step": 18430 }, { "epoch": 0.6701068391598226, "grad_norm": 6.763409614562988, "learning_rate": 4.9882583229294044e-05, "loss": 0.156, "step": 18440 }, { "epoch": 0.6704702376626208, "grad_norm": 8.312501907348633, "learning_rate": 4.988208033926279e-05, "loss": 0.2573, "step": 18450 }, { "epoch": 0.670833636165419, "grad_norm": 1.7566003799438477, "learning_rate": 4.988157637715122e-05, "loss": 0.1639, "step": 18460 }, { "epoch": 0.6711970346682171, "grad_norm": 2.336911916732788, "learning_rate": 4.988107134298105e-05, "loss": 0.1536, "step": 18470 }, { "epoch": 0.6715604331710153, "grad_norm": 2.2477078437805176, "learning_rate": 4.988056523677405e-05, "loss": 0.2734, "step": 18480 }, { "epoch": 0.6719238316738135, "grad_norm": 1.62912917137146, "learning_rate": 4.9880058058552015e-05, "loss": 0.1501, "step": 18490 }, { "epoch": 0.6722872301766116, "grad_norm": 8.896906852722168, "learning_rate": 4.98795498083368e-05, "loss": 0.213, "step": 18500 }, { "epoch": 0.6726506286794098, "grad_norm": 1.804291009902954, "learning_rate": 4.987904048615031e-05, "loss": 0.2175, "step": 18510 }, { "epoch": 0.673014027182208, "grad_norm": 0.9261330366134644, "learning_rate": 4.9878530092014486e-05, "loss": 0.1553, "step": 18520 }, { "epoch": 0.6733774256850061, "grad_norm": 4.854642868041992, "learning_rate": 4.987801862595132e-05, "loss": 0.2065, "step": 18530 }, { "epoch": 0.6737408241878043, "grad_norm": 0.9362125992774963, "learning_rate": 4.987750608798284e-05, "loss": 0.1611, "step": 18540 }, { "epoch": 0.6741042226906026, "grad_norm": 13.348092079162598, "learning_rate": 4.987699247813114e-05, "loss": 0.2834, "step": 18550 }, { "epoch": 0.6744676211934006, "grad_norm": 1.3235937356948853, "learning_rate": 4.987647779641835e-05, "loss": 0.166, "step": 18560 }, { "epoch": 0.6748310196961989, "grad_norm": 1.7941697835922241, "learning_rate": 4.987596204286664e-05, "loss": 0.186, "step": 18570 }, { "epoch": 0.6751944181989971, "grad_norm": 6.945876121520996, "learning_rate": 4.987544521749824e-05, "loss": 0.1859, "step": 18580 }, { "epoch": 0.6755578167017952, "grad_norm": 1.1671024560928345, "learning_rate": 4.98749273203354e-05, "loss": 0.2007, "step": 18590 }, { "epoch": 0.6759212152045934, "grad_norm": 46.817718505859375, "learning_rate": 4.987440835140046e-05, "loss": 0.275, "step": 18600 }, { "epoch": 0.6759212152045934, "eval_loss": 0.4186328053474426, "eval_runtime": 180.6066, "eval_samples_per_second": 41.051, "eval_steps_per_second": 5.133, "eval_wer": 0.21444260896400238, "step": 18600 }, { "epoch": 0.6762846137073916, "grad_norm": 0.9619908928871155, "learning_rate": 4.987388831071575e-05, "loss": 0.2147, "step": 18610 }, { "epoch": 0.6766480122101897, "grad_norm": 1.139666199684143, "learning_rate": 4.9873367198303714e-05, "loss": 0.2591, "step": 18620 }, { "epoch": 0.6770114107129879, "grad_norm": 2.6673026084899902, "learning_rate": 4.9872845014186776e-05, "loss": 0.2013, "step": 18630 }, { "epoch": 0.6773748092157861, "grad_norm": 1.0486637353897095, "learning_rate": 4.987232175838745e-05, "loss": 0.2326, "step": 18640 }, { "epoch": 0.6777382077185842, "grad_norm": 6.457462787628174, "learning_rate": 4.987179743092827e-05, "loss": 0.2395, "step": 18650 }, { "epoch": 0.6781016062213824, "grad_norm": 3.296480178833008, "learning_rate": 4.987127203183183e-05, "loss": 0.1857, "step": 18660 }, { "epoch": 0.6784650047241806, "grad_norm": 2.828460454940796, "learning_rate": 4.987074556112078e-05, "loss": 0.1391, "step": 18670 }, { "epoch": 0.6788284032269787, "grad_norm": 10.424219131469727, "learning_rate": 4.987021801881779e-05, "loss": 0.1583, "step": 18680 }, { "epoch": 0.6791918017297769, "grad_norm": 5.248502254486084, "learning_rate": 4.986968940494559e-05, "loss": 0.1676, "step": 18690 }, { "epoch": 0.6795552002325751, "grad_norm": 8.20375919342041, "learning_rate": 4.986915971952696e-05, "loss": 0.2844, "step": 18700 }, { "epoch": 0.6799185987353732, "grad_norm": 2.415562152862549, "learning_rate": 4.986862896258473e-05, "loss": 0.1634, "step": 18710 }, { "epoch": 0.6802819972381714, "grad_norm": 1.635680079460144, "learning_rate": 4.986809713414176e-05, "loss": 0.509, "step": 18720 }, { "epoch": 0.6806453957409695, "grad_norm": 10.641048431396484, "learning_rate": 4.986756423422095e-05, "loss": 0.2015, "step": 18730 }, { "epoch": 0.6810087942437677, "grad_norm": 1.3304156064987183, "learning_rate": 4.986703026284529e-05, "loss": 0.1598, "step": 18740 }, { "epoch": 0.6813721927465659, "grad_norm": 4.707154750823975, "learning_rate": 4.986649522003778e-05, "loss": 0.2486, "step": 18750 }, { "epoch": 0.681735591249364, "grad_norm": 1.671863317489624, "learning_rate": 4.9865959105821454e-05, "loss": 0.1628, "step": 18760 }, { "epoch": 0.6820989897521622, "grad_norm": 2.4183709621429443, "learning_rate": 4.986542192021942e-05, "loss": 0.1636, "step": 18770 }, { "epoch": 0.6824623882549604, "grad_norm": 120.8931884765625, "learning_rate": 4.9864883663254836e-05, "loss": 2.0172, "step": 18780 }, { "epoch": 0.6828257867577585, "grad_norm": 2.785879135131836, "learning_rate": 4.986434433495089e-05, "loss": 0.1669, "step": 18790 }, { "epoch": 0.6831891852605567, "grad_norm": 3.662753105163574, "learning_rate": 4.98638039353308e-05, "loss": 0.3095, "step": 18800 }, { "epoch": 0.6835525837633549, "grad_norm": 1.1632777452468872, "learning_rate": 4.986326246441787e-05, "loss": 0.1632, "step": 18810 }, { "epoch": 0.683915982266153, "grad_norm": 0.9660913348197937, "learning_rate": 4.986271992223543e-05, "loss": 0.1509, "step": 18820 }, { "epoch": 0.6842793807689512, "grad_norm": 2.810391426086426, "learning_rate": 4.986217630880684e-05, "loss": 0.1507, "step": 18830 }, { "epoch": 0.6846427792717494, "grad_norm": 2.008641242980957, "learning_rate": 4.986163162415554e-05, "loss": 0.1858, "step": 18840 }, { "epoch": 0.6850061777745475, "grad_norm": 3.4007887840270996, "learning_rate": 4.986108586830499e-05, "loss": 0.2389, "step": 18850 }, { "epoch": 0.6853695762773457, "grad_norm": 0.8250002861022949, "learning_rate": 4.986053904127871e-05, "loss": 0.1618, "step": 18860 }, { "epoch": 0.685732974780144, "grad_norm": 0.792607307434082, "learning_rate": 4.986004598111927e-05, "loss": 1.5835, "step": 18870 }, { "epoch": 0.686096373282942, "grad_norm": 2.740478038787842, "learning_rate": 4.985949711892404e-05, "loss": 0.2021, "step": 18880 }, { "epoch": 0.6864597717857402, "grad_norm": 1.1361775398254395, "learning_rate": 4.985894718562153e-05, "loss": 0.2244, "step": 18890 }, { "epoch": 0.6868231702885385, "grad_norm": 2.692542314529419, "learning_rate": 4.985839618123543e-05, "loss": 0.2095, "step": 18900 }, { "epoch": 0.6871865687913365, "grad_norm": 1.2691428661346436, "learning_rate": 4.9857844105789485e-05, "loss": 0.1533, "step": 18910 }, { "epoch": 0.6875499672941348, "grad_norm": 2.087209939956665, "learning_rate": 4.9857290959307483e-05, "loss": 0.1469, "step": 18920 }, { "epoch": 0.687913365796933, "grad_norm": 1.5252209901809692, "learning_rate": 4.985673674181326e-05, "loss": 0.2099, "step": 18930 }, { "epoch": 0.6882767642997311, "grad_norm": 1.81588876247406, "learning_rate": 4.9856181453330685e-05, "loss": 0.174, "step": 18940 }, { "epoch": 0.6886401628025293, "grad_norm": 21.244775772094727, "learning_rate": 4.9855625093883695e-05, "loss": 0.2455, "step": 18950 }, { "epoch": 0.6890035613053275, "grad_norm": 1.53201425075531, "learning_rate": 4.9855067663496255e-05, "loss": 0.1731, "step": 18960 }, { "epoch": 0.6893669598081256, "grad_norm": 0.9922922849655151, "learning_rate": 4.985450916219239e-05, "loss": 0.1569, "step": 18970 }, { "epoch": 0.6897303583109238, "grad_norm": 1.6983296871185303, "learning_rate": 4.985394958999615e-05, "loss": 0.1784, "step": 18980 }, { "epoch": 0.690093756813722, "grad_norm": 2.5069353580474854, "learning_rate": 4.9853388946931654e-05, "loss": 0.1484, "step": 18990 }, { "epoch": 0.6904571553165201, "grad_norm": 52.345367431640625, "learning_rate": 4.985282723302306e-05, "loss": 0.2431, "step": 19000 }, { "epoch": 0.6908205538193183, "grad_norm": 1.5318138599395752, "learning_rate": 4.9852264448294564e-05, "loss": 0.1662, "step": 19010 }, { "epoch": 0.6911839523221164, "grad_norm": 1.5980876684188843, "learning_rate": 4.985170059277041e-05, "loss": 1.3532, "step": 19020 }, { "epoch": 0.6915473508249146, "grad_norm": 2.355023145675659, "learning_rate": 4.9851135666474915e-05, "loss": 0.1688, "step": 19030 }, { "epoch": 0.6919107493277128, "grad_norm": 3.2141480445861816, "learning_rate": 4.98505696694324e-05, "loss": 0.1303, "step": 19040 }, { "epoch": 0.6922741478305109, "grad_norm": 19.482290267944336, "learning_rate": 4.985000260166725e-05, "loss": 0.2337, "step": 19050 }, { "epoch": 0.6926375463333091, "grad_norm": 0.8456101417541504, "learning_rate": 4.9849434463203915e-05, "loss": 0.1732, "step": 19060 }, { "epoch": 0.6930009448361073, "grad_norm": 2.2158889770507812, "learning_rate": 4.9848865254066856e-05, "loss": 0.1524, "step": 19070 }, { "epoch": 0.6933643433389054, "grad_norm": 2.0843331813812256, "learning_rate": 4.9848294974280605e-05, "loss": 0.1943, "step": 19080 }, { "epoch": 0.6937277418417036, "grad_norm": 2.6970462799072266, "learning_rate": 4.9847723623869734e-05, "loss": 0.1697, "step": 19090 }, { "epoch": 0.6940911403445018, "grad_norm": 9.394730567932129, "learning_rate": 4.984715120285887e-05, "loss": 0.2151, "step": 19100 }, { "epoch": 0.6944545388472999, "grad_norm": 1.922090768814087, "learning_rate": 4.9846577711272656e-05, "loss": 0.1737, "step": 19110 }, { "epoch": 0.6948179373500981, "grad_norm": 1.3870245218276978, "learning_rate": 4.9846003149135815e-05, "loss": 0.1694, "step": 19120 }, { "epoch": 0.6951813358528963, "grad_norm": 1.6474970579147339, "learning_rate": 4.9845427516473104e-05, "loss": 0.219, "step": 19130 }, { "epoch": 0.6955447343556944, "grad_norm": 1.4302411079406738, "learning_rate": 4.984485081330932e-05, "loss": 0.1489, "step": 19140 }, { "epoch": 0.6959081328584926, "grad_norm": 3.888967990875244, "learning_rate": 4.984427303966932e-05, "loss": 0.2425, "step": 19150 }, { "epoch": 0.6962715313612908, "grad_norm": 1.2002874612808228, "learning_rate": 4.984369419557798e-05, "loss": 0.1575, "step": 19160 }, { "epoch": 0.6966349298640889, "grad_norm": 1.9064863920211792, "learning_rate": 4.984311428106025e-05, "loss": 0.1526, "step": 19170 }, { "epoch": 0.6969983283668871, "grad_norm": 1.3838772773742676, "learning_rate": 4.984253329614112e-05, "loss": 0.1601, "step": 19180 }, { "epoch": 0.6973617268696853, "grad_norm": 3.6261801719665527, "learning_rate": 4.984195124084563e-05, "loss": 0.1668, "step": 19190 }, { "epoch": 0.6977251253724834, "grad_norm": 7.647263526916504, "learning_rate": 4.984136811519884e-05, "loss": 0.1879, "step": 19200 }, { "epoch": 0.6977251253724834, "eval_loss": 0.3865276575088501, "eval_runtime": 179.6651, "eval_samples_per_second": 41.266, "eval_steps_per_second": 5.16, "eval_wer": 0.20815255867990634, "step": 19200 }, { "epoch": 0.6980885238752816, "grad_norm": 1.7563225030899048, "learning_rate": 4.984078391922589e-05, "loss": 0.1481, "step": 19210 }, { "epoch": 0.6984519223780798, "grad_norm": 1.8016029596328735, "learning_rate": 4.984019865295194e-05, "loss": 0.1713, "step": 19220 }, { "epoch": 0.6988153208808779, "grad_norm": 2.0969181060791016, "learning_rate": 4.983961231640221e-05, "loss": 0.1959, "step": 19230 }, { "epoch": 0.6991787193836762, "grad_norm": 1.6823608875274658, "learning_rate": 4.9839024909601964e-05, "loss": 0.1729, "step": 19240 }, { "epoch": 0.6995421178864744, "grad_norm": 11.533753395080566, "learning_rate": 4.983843643257652e-05, "loss": 0.2264, "step": 19250 }, { "epoch": 0.6999055163892725, "grad_norm": 4.1039204597473145, "learning_rate": 4.983784688535122e-05, "loss": 0.1738, "step": 19260 }, { "epoch": 0.7002689148920707, "grad_norm": 1.1051629781723022, "learning_rate": 4.983725626795147e-05, "loss": 0.155, "step": 19270 }, { "epoch": 0.7006323133948689, "grad_norm": 4.303994178771973, "learning_rate": 4.983666458040273e-05, "loss": 0.1593, "step": 19280 }, { "epoch": 0.700995711897667, "grad_norm": 1.2324292659759521, "learning_rate": 4.983607182273047e-05, "loss": 0.1642, "step": 19290 }, { "epoch": 0.7013591104004652, "grad_norm": 6.101926326751709, "learning_rate": 4.983547799496024e-05, "loss": 0.2338, "step": 19300 }, { "epoch": 0.7017225089032633, "grad_norm": 1.1532049179077148, "learning_rate": 4.983488309711763e-05, "loss": 0.1591, "step": 19310 }, { "epoch": 0.7020859074060615, "grad_norm": 0.8216233253479004, "learning_rate": 4.983428712922828e-05, "loss": 0.1489, "step": 19320 }, { "epoch": 0.7024493059088597, "grad_norm": 1.489461064338684, "learning_rate": 4.983369009131785e-05, "loss": 0.2048, "step": 19330 }, { "epoch": 0.7028127044116578, "grad_norm": 1.0493615865707397, "learning_rate": 4.983309198341207e-05, "loss": 0.1525, "step": 19340 }, { "epoch": 0.703176102914456, "grad_norm": 10.2578706741333, "learning_rate": 4.983249280553672e-05, "loss": 0.2297, "step": 19350 }, { "epoch": 0.7035395014172542, "grad_norm": 1.5366660356521606, "learning_rate": 4.983189255771761e-05, "loss": 0.1644, "step": 19360 }, { "epoch": 0.7039028999200523, "grad_norm": 1.4915844202041626, "learning_rate": 4.9831291239980596e-05, "loss": 0.1599, "step": 19370 }, { "epoch": 0.7042662984228505, "grad_norm": 1.3012590408325195, "learning_rate": 4.98306888523516e-05, "loss": 0.1907, "step": 19380 }, { "epoch": 0.7046296969256487, "grad_norm": 1.5029476881027222, "learning_rate": 4.983008539485656e-05, "loss": 0.1391, "step": 19390 }, { "epoch": 0.7049930954284468, "grad_norm": 3.0202033519744873, "learning_rate": 4.9829480867521495e-05, "loss": 0.2218, "step": 19400 }, { "epoch": 0.705356493931245, "grad_norm": 1.7761317491531372, "learning_rate": 4.9828875270372434e-05, "loss": 0.1605, "step": 19410 }, { "epoch": 0.7057198924340432, "grad_norm": 1.420793890953064, "learning_rate": 4.9828268603435485e-05, "loss": 1.5838, "step": 19420 }, { "epoch": 0.7060832909368413, "grad_norm": 2.079665422439575, "learning_rate": 4.982766086673678e-05, "loss": 0.2146, "step": 19430 }, { "epoch": 0.7064466894396395, "grad_norm": 2.440471887588501, "learning_rate": 4.98270520603025e-05, "loss": 0.1733, "step": 19440 }, { "epoch": 0.7068100879424377, "grad_norm": 7.773731708526611, "learning_rate": 4.982644218415889e-05, "loss": 0.2126, "step": 19450 }, { "epoch": 0.7071734864452358, "grad_norm": 0.9480405449867249, "learning_rate": 4.982583123833221e-05, "loss": 0.1575, "step": 19460 }, { "epoch": 0.707536884948034, "grad_norm": 12.79196548461914, "learning_rate": 4.982521922284881e-05, "loss": 0.2745, "step": 19470 }, { "epoch": 0.7079002834508322, "grad_norm": 4.492150783538818, "learning_rate": 4.982460613773502e-05, "loss": 0.1663, "step": 19480 }, { "epoch": 0.7082636819536303, "grad_norm": 1.2373683452606201, "learning_rate": 4.9823991983017295e-05, "loss": 0.1699, "step": 19490 }, { "epoch": 0.7086270804564285, "grad_norm": 5.8804402351379395, "learning_rate": 4.982337675872207e-05, "loss": 0.242, "step": 19500 }, { "epoch": 0.7089904789592267, "grad_norm": 0.9465837478637695, "learning_rate": 4.982276046487586e-05, "loss": 0.1471, "step": 19510 }, { "epoch": 0.7093538774620248, "grad_norm": 1.6178842782974243, "learning_rate": 4.9822143101505226e-05, "loss": 0.1619, "step": 19520 }, { "epoch": 0.709717275964823, "grad_norm": 2.4963414669036865, "learning_rate": 4.9821524668636766e-05, "loss": 0.1426, "step": 19530 }, { "epoch": 0.7100806744676212, "grad_norm": 1.1380610466003418, "learning_rate": 4.982090516629712e-05, "loss": 0.2364, "step": 19540 }, { "epoch": 0.7104440729704193, "grad_norm": 5.2998046875, "learning_rate": 4.982028459451298e-05, "loss": 0.2661, "step": 19550 }, { "epoch": 0.7108074714732175, "grad_norm": 1.1476637125015259, "learning_rate": 4.9819662953311096e-05, "loss": 0.1306, "step": 19560 }, { "epoch": 0.7111708699760158, "grad_norm": 0.7960777878761292, "learning_rate": 4.981904024271824e-05, "loss": 0.1604, "step": 19570 }, { "epoch": 0.7115342684788138, "grad_norm": 1.9035999774932861, "learning_rate": 4.981841646276124e-05, "loss": 0.1728, "step": 19580 }, { "epoch": 0.711897666981612, "grad_norm": 0.9725393056869507, "learning_rate": 4.981779161346699e-05, "loss": 0.2529, "step": 19590 }, { "epoch": 0.7122610654844101, "grad_norm": 5.759589672088623, "learning_rate": 4.98171656948624e-05, "loss": 0.25, "step": 19600 }, { "epoch": 0.7126244639872084, "grad_norm": 1.3716357946395874, "learning_rate": 4.9816538706974434e-05, "loss": 0.1603, "step": 19610 }, { "epoch": 0.7129878624900066, "grad_norm": 1.4253743886947632, "learning_rate": 4.981591064983011e-05, "loss": 0.1496, "step": 19620 }, { "epoch": 0.7133512609928047, "grad_norm": 2.4253408908843994, "learning_rate": 4.98152815234565e-05, "loss": 0.1694, "step": 19630 }, { "epoch": 0.7137146594956029, "grad_norm": 1.212689757347107, "learning_rate": 4.9814651327880696e-05, "loss": 0.1869, "step": 19640 }, { "epoch": 0.7140780579984011, "grad_norm": 7.003270626068115, "learning_rate": 4.981402006312986e-05, "loss": 0.2709, "step": 19650 }, { "epoch": 0.7144414565011992, "grad_norm": 1.6173512935638428, "learning_rate": 4.981338772923119e-05, "loss": 0.1651, "step": 19660 }, { "epoch": 0.7148048550039974, "grad_norm": 2.2197723388671875, "learning_rate": 4.981275432621192e-05, "loss": 0.1657, "step": 19670 }, { "epoch": 0.7151682535067956, "grad_norm": 1.8906898498535156, "learning_rate": 4.981211985409936e-05, "loss": 2.3111, "step": 19680 }, { "epoch": 0.7155316520095937, "grad_norm": 3.50747013092041, "learning_rate": 4.981148431292084e-05, "loss": 0.1498, "step": 19690 }, { "epoch": 0.7158950505123919, "grad_norm": 4.080805778503418, "learning_rate": 4.981084770270373e-05, "loss": 0.2094, "step": 19700 }, { "epoch": 0.7162584490151901, "grad_norm": 2.1056652069091797, "learning_rate": 4.981021002347547e-05, "loss": 0.157, "step": 19710 }, { "epoch": 0.7166218475179882, "grad_norm": 1.07776939868927, "learning_rate": 4.980957127526354e-05, "loss": 0.2049, "step": 19720 }, { "epoch": 0.7169852460207864, "grad_norm": 3.5387072563171387, "learning_rate": 4.980893145809546e-05, "loss": 0.1706, "step": 19730 }, { "epoch": 0.7173486445235846, "grad_norm": 1.5516027212142944, "learning_rate": 4.980829057199879e-05, "loss": 0.1371, "step": 19740 }, { "epoch": 0.7177120430263827, "grad_norm": 6.618633270263672, "learning_rate": 4.9807648617001145e-05, "loss": 0.1833, "step": 19750 }, { "epoch": 0.7180754415291809, "grad_norm": 1.7093079090118408, "learning_rate": 4.980700559313019e-05, "loss": 0.1592, "step": 19760 }, { "epoch": 0.7184388400319791, "grad_norm": 1.1217936277389526, "learning_rate": 4.9806361500413626e-05, "loss": 0.145, "step": 19770 }, { "epoch": 0.7188022385347772, "grad_norm": 1.869722604751587, "learning_rate": 4.980571633887921e-05, "loss": 0.1605, "step": 19780 }, { "epoch": 0.7191656370375754, "grad_norm": 1.1555829048156738, "learning_rate": 4.980507010855473e-05, "loss": 0.1539, "step": 19790 }, { "epoch": 0.7195290355403736, "grad_norm": 5.0145111083984375, "learning_rate": 4.9804422809468046e-05, "loss": 0.2334, "step": 19800 }, { "epoch": 0.7195290355403736, "eval_loss": 0.394449919462204, "eval_runtime": 180.0311, "eval_samples_per_second": 41.182, "eval_steps_per_second": 5.149, "eval_wer": 0.2100677110752083, "step": 19800 }, { "epoch": 0.7198924340431717, "grad_norm": 1.0865716934204102, "learning_rate": 4.980377444164702e-05, "loss": 0.1569, "step": 19810 }, { "epoch": 0.7202558325459699, "grad_norm": 1.5475140810012817, "learning_rate": 4.980312500511962e-05, "loss": 0.1268, "step": 19820 }, { "epoch": 0.7206192310487681, "grad_norm": 1.9507659673690796, "learning_rate": 4.980247449991381e-05, "loss": 0.2092, "step": 19830 }, { "epoch": 0.7209826295515662, "grad_norm": 1.185339093208313, "learning_rate": 4.980182292605762e-05, "loss": 0.1432, "step": 19840 }, { "epoch": 0.7213460280543644, "grad_norm": 5.294797420501709, "learning_rate": 4.980117028357912e-05, "loss": 0.2459, "step": 19850 }, { "epoch": 0.7217094265571626, "grad_norm": 2.691941976547241, "learning_rate": 4.980051657250645e-05, "loss": 0.1747, "step": 19860 }, { "epoch": 0.7220728250599607, "grad_norm": 1.3377537727355957, "learning_rate": 4.9799861792867756e-05, "loss": 0.1541, "step": 19870 }, { "epoch": 0.7224362235627589, "grad_norm": 3.39907169342041, "learning_rate": 4.979920594469124e-05, "loss": 0.166, "step": 19880 }, { "epoch": 0.722799622065557, "grad_norm": 1.738271951675415, "learning_rate": 4.9798549028005195e-05, "loss": 0.1591, "step": 19890 }, { "epoch": 0.7231630205683552, "grad_norm": 4.062039852142334, "learning_rate": 4.9797891042837893e-05, "loss": 0.2372, "step": 19900 }, { "epoch": 0.7235264190711534, "grad_norm": 2.46109676361084, "learning_rate": 4.979723198921771e-05, "loss": 0.1606, "step": 19910 }, { "epoch": 0.7238898175739515, "grad_norm": 1.3511689901351929, "learning_rate": 4.9796571867173017e-05, "loss": 0.148, "step": 19920 }, { "epoch": 0.7242532160767498, "grad_norm": 4.831977844238281, "learning_rate": 4.979591067673227e-05, "loss": 0.1832, "step": 19930 }, { "epoch": 0.724616614579548, "grad_norm": 0.9530340433120728, "learning_rate": 4.979524841792397e-05, "loss": 0.1776, "step": 19940 }, { "epoch": 0.724980013082346, "grad_norm": 2.886121988296509, "learning_rate": 4.979458509077663e-05, "loss": 0.217, "step": 19950 }, { "epoch": 0.7253434115851443, "grad_norm": 2.6050822734832764, "learning_rate": 4.979392069531883e-05, "loss": 0.1709, "step": 19960 }, { "epoch": 0.7257068100879425, "grad_norm": 1.1615772247314453, "learning_rate": 4.979325523157921e-05, "loss": 0.1891, "step": 19970 }, { "epoch": 0.7260702085907406, "grad_norm": 5.947473526000977, "learning_rate": 4.979258869958643e-05, "loss": 0.1685, "step": 19980 }, { "epoch": 0.7264336070935388, "grad_norm": 2.2721457481384277, "learning_rate": 4.979192109936922e-05, "loss": 0.1733, "step": 19990 }, { "epoch": 0.726797005596337, "grad_norm": 2.83907413482666, "learning_rate": 4.979125243095635e-05, "loss": 0.2067, "step": 20000 }, { "epoch": 0.7271604040991351, "grad_norm": 1.84774649143219, "learning_rate": 4.9790582694376605e-05, "loss": 0.1634, "step": 20010 }, { "epoch": 0.7275238026019333, "grad_norm": 3.5162901878356934, "learning_rate": 4.978991188965887e-05, "loss": 0.1546, "step": 20020 }, { "epoch": 0.7278872011047315, "grad_norm": 1.3396214246749878, "learning_rate": 4.9789240016832026e-05, "loss": 0.1549, "step": 20030 }, { "epoch": 0.7282505996075296, "grad_norm": 0.8957159519195557, "learning_rate": 4.978856707592503e-05, "loss": 0.4856, "step": 20040 }, { "epoch": 0.7286139981103278, "grad_norm": 3.291719913482666, "learning_rate": 4.978789306696688e-05, "loss": 0.1672, "step": 20050 }, { "epoch": 0.728977396613126, "grad_norm": 1.2237446308135986, "learning_rate": 4.978721798998661e-05, "loss": 0.1547, "step": 20060 }, { "epoch": 0.7293407951159241, "grad_norm": 1.5760120153427124, "learning_rate": 4.978654184501331e-05, "loss": 0.1491, "step": 20070 }, { "epoch": 0.7297041936187223, "grad_norm": 2.661914587020874, "learning_rate": 4.978586463207612e-05, "loss": 0.2399, "step": 20080 }, { "epoch": 0.7300675921215205, "grad_norm": 1.4015228748321533, "learning_rate": 4.978518635120421e-05, "loss": 0.1592, "step": 20090 }, { "epoch": 0.7304309906243186, "grad_norm": 11.479881286621094, "learning_rate": 4.9784507002426793e-05, "loss": 0.2478, "step": 20100 }, { "epoch": 0.7307943891271168, "grad_norm": 2.3282432556152344, "learning_rate": 4.9783826585773164e-05, "loss": 0.1565, "step": 20110 }, { "epoch": 0.731157787629915, "grad_norm": 1.0281476974487305, "learning_rate": 4.9783145101272625e-05, "loss": 2.6872, "step": 20120 }, { "epoch": 0.7315211861327131, "grad_norm": 1.4759191274642944, "learning_rate": 4.978246254895455e-05, "loss": 0.1755, "step": 20130 }, { "epoch": 0.7318845846355113, "grad_norm": 1.1100878715515137, "learning_rate": 4.978177892884833e-05, "loss": 0.1519, "step": 20140 }, { "epoch": 0.7322479831383095, "grad_norm": 5.326310157775879, "learning_rate": 4.9781094240983435e-05, "loss": 0.257, "step": 20150 }, { "epoch": 0.7326113816411076, "grad_norm": 8.199230194091797, "learning_rate": 4.978040848538936e-05, "loss": 0.192, "step": 20160 }, { "epoch": 0.7329747801439058, "grad_norm": 1.579663872718811, "learning_rate": 4.9779721662095654e-05, "loss": 0.1738, "step": 20170 }, { "epoch": 0.733338178646704, "grad_norm": 3.319883346557617, "learning_rate": 4.97790337711319e-05, "loss": 0.1809, "step": 20180 }, { "epoch": 0.7337015771495021, "grad_norm": 1.4813331365585327, "learning_rate": 4.977834481252776e-05, "loss": 0.1645, "step": 20190 }, { "epoch": 0.7340649756523003, "grad_norm": 4.392731666564941, "learning_rate": 4.9777654786312886e-05, "loss": 0.1897, "step": 20200 }, { "epoch": 0.7344283741550984, "grad_norm": 1.7336299419403076, "learning_rate": 4.9776963692517034e-05, "loss": 0.1751, "step": 20210 }, { "epoch": 0.7347917726578966, "grad_norm": 1.6261765956878662, "learning_rate": 4.977627153116998e-05, "loss": 0.156, "step": 20220 }, { "epoch": 0.7351551711606948, "grad_norm": 1.9801748991012573, "learning_rate": 4.977557830230153e-05, "loss": 0.2069, "step": 20230 }, { "epoch": 0.7355185696634929, "grad_norm": 1.4615390300750732, "learning_rate": 4.977488400594157e-05, "loss": 0.1458, "step": 20240 }, { "epoch": 0.7358819681662911, "grad_norm": 3.78981876373291, "learning_rate": 4.977418864212e-05, "loss": 0.1765, "step": 20250 }, { "epoch": 0.7362453666690894, "grad_norm": 0.813947319984436, "learning_rate": 4.97734922108668e-05, "loss": 0.1482, "step": 20260 }, { "epoch": 0.7366087651718874, "grad_norm": 1.1082271337509155, "learning_rate": 4.977279471221195e-05, "loss": 0.149, "step": 20270 }, { "epoch": 0.7369721636746857, "grad_norm": 4.023866176605225, "learning_rate": 4.9772096146185527e-05, "loss": 0.1797, "step": 20280 }, { "epoch": 0.7373355621774839, "grad_norm": 1.3649333715438843, "learning_rate": 4.977139651281762e-05, "loss": 0.182, "step": 20290 }, { "epoch": 0.737698960680282, "grad_norm": 8.213293075561523, "learning_rate": 4.977069581213837e-05, "loss": 0.2117, "step": 20300 }, { "epoch": 0.7380623591830802, "grad_norm": 1.0769990682601929, "learning_rate": 4.9769994044177976e-05, "loss": 0.1689, "step": 20310 }, { "epoch": 0.7384257576858784, "grad_norm": 1.712949275970459, "learning_rate": 4.9769291208966674e-05, "loss": 0.1402, "step": 20320 }, { "epoch": 0.7387891561886765, "grad_norm": 2.213164806365967, "learning_rate": 4.976858730653473e-05, "loss": 0.193, "step": 20330 }, { "epoch": 0.7391525546914747, "grad_norm": 1.9228605031967163, "learning_rate": 4.97678823369125e-05, "loss": 0.1517, "step": 20340 }, { "epoch": 0.7395159531942729, "grad_norm": 8.813825607299805, "learning_rate": 4.976717630013034e-05, "loss": 0.2682, "step": 20350 }, { "epoch": 0.739879351697071, "grad_norm": 1.9778189659118652, "learning_rate": 4.976646919621867e-05, "loss": 0.1701, "step": 20360 }, { "epoch": 0.7402427501998692, "grad_norm": 1.8553961515426636, "learning_rate": 4.976576102520797e-05, "loss": 0.1455, "step": 20370 }, { "epoch": 0.7406061487026674, "grad_norm": 3.1159512996673584, "learning_rate": 4.976505178712874e-05, "loss": 0.2252, "step": 20380 }, { "epoch": 0.7409695472054655, "grad_norm": 1.9035766124725342, "learning_rate": 4.9764341482011545e-05, "loss": 0.1815, "step": 20390 }, { "epoch": 0.7413329457082637, "grad_norm": 2.228940725326538, "learning_rate": 4.976363010988698e-05, "loss": 0.1995, "step": 20400 }, { "epoch": 0.7413329457082637, "eval_loss": 0.35944151878356934, "eval_runtime": 179.8589, "eval_samples_per_second": 41.221, "eval_steps_per_second": 5.154, "eval_wer": 0.200864087715795, "step": 20400 }, { "epoch": 0.7416963442110619, "grad_norm": 1.5204256772994995, "learning_rate": 4.976291767078571e-05, "loss": 1.6497, "step": 20410 }, { "epoch": 0.74205974271386, "grad_norm": 1.3520594835281372, "learning_rate": 4.976220416473842e-05, "loss": 0.1503, "step": 20420 }, { "epoch": 0.7424231412166582, "grad_norm": 2.7322440147399902, "learning_rate": 4.976148959177586e-05, "loss": 0.1784, "step": 20430 }, { "epoch": 0.7427865397194564, "grad_norm": 1.3193668127059937, "learning_rate": 4.9760773951928815e-05, "loss": 0.1685, "step": 20440 }, { "epoch": 0.7431499382222545, "grad_norm": 11.000434875488281, "learning_rate": 4.976005724522812e-05, "loss": 0.2147, "step": 20450 }, { "epoch": 0.7435133367250527, "grad_norm": 1.1825796365737915, "learning_rate": 4.9759339471704656e-05, "loss": 0.2116, "step": 20460 }, { "epoch": 0.7438767352278509, "grad_norm": 1.1518877744674683, "learning_rate": 4.975862063138934e-05, "loss": 0.141, "step": 20470 }, { "epoch": 0.744240133730649, "grad_norm": 6.054372310638428, "learning_rate": 4.975790072431316e-05, "loss": 0.1766, "step": 20480 }, { "epoch": 0.7446035322334472, "grad_norm": 1.0629233121871948, "learning_rate": 4.975717975050713e-05, "loss": 0.1641, "step": 20490 }, { "epoch": 0.7449669307362453, "grad_norm": 2.4782843589782715, "learning_rate": 4.97564577100023e-05, "loss": 0.2186, "step": 20500 }, { "epoch": 0.7453303292390435, "grad_norm": 1.5713534355163574, "learning_rate": 4.975573460282979e-05, "loss": 0.1535, "step": 20510 }, { "epoch": 0.7456937277418417, "grad_norm": 0.7279618382453918, "learning_rate": 4.975501042902078e-05, "loss": 0.1372, "step": 20520 }, { "epoch": 0.7460571262446398, "grad_norm": 5.573297500610352, "learning_rate": 4.975428518860643e-05, "loss": 0.161, "step": 20530 }, { "epoch": 0.746420524747438, "grad_norm": 1.022141695022583, "learning_rate": 4.975355888161801e-05, "loss": 0.1645, "step": 20540 }, { "epoch": 0.7467839232502362, "grad_norm": 2.9584996700286865, "learning_rate": 4.9752831508086805e-05, "loss": 0.2085, "step": 20550 }, { "epoch": 0.7471473217530343, "grad_norm": 2.2749557495117188, "learning_rate": 4.975210306804418e-05, "loss": 0.1531, "step": 20560 }, { "epoch": 0.7475107202558325, "grad_norm": 1.877822995185852, "learning_rate": 4.9751373561521484e-05, "loss": 0.1654, "step": 20570 }, { "epoch": 0.7478741187586307, "grad_norm": 7.727886199951172, "learning_rate": 4.975064298855017e-05, "loss": 0.2026, "step": 20580 }, { "epoch": 0.7482375172614288, "grad_norm": 1.2424033880233765, "learning_rate": 4.974991134916171e-05, "loss": 0.1834, "step": 20590 }, { "epoch": 0.748600915764227, "grad_norm": 7.272613525390625, "learning_rate": 4.974917864338764e-05, "loss": 0.2266, "step": 20600 }, { "epoch": 0.7489643142670253, "grad_norm": 0.6424925327301025, "learning_rate": 4.974844487125952e-05, "loss": 0.1496, "step": 20610 }, { "epoch": 0.7493277127698234, "grad_norm": 2.064819097518921, "learning_rate": 4.974771003280896e-05, "loss": 0.192, "step": 20620 }, { "epoch": 0.7496911112726216, "grad_norm": 2.55157470703125, "learning_rate": 4.974697412806763e-05, "loss": 0.1863, "step": 20630 }, { "epoch": 0.7500545097754198, "grad_norm": 1.10732901096344, "learning_rate": 4.974623715706723e-05, "loss": 0.1452, "step": 20640 }, { "epoch": 0.7504179082782179, "grad_norm": 6.665337562561035, "learning_rate": 4.9745499119839526e-05, "loss": 0.2393, "step": 20650 }, { "epoch": 0.7507813067810161, "grad_norm": 2.315764904022217, "learning_rate": 4.974476001641631e-05, "loss": 0.1724, "step": 20660 }, { "epoch": 0.7511447052838143, "grad_norm": 1.7643327713012695, "learning_rate": 4.974401984682942e-05, "loss": 0.1676, "step": 20670 }, { "epoch": 0.7515081037866124, "grad_norm": 2.556265115737915, "learning_rate": 4.974327861111075e-05, "loss": 0.1706, "step": 20680 }, { "epoch": 0.7518715022894106, "grad_norm": 1.0939987897872925, "learning_rate": 4.9742536309292257e-05, "loss": 0.1514, "step": 20690 }, { "epoch": 0.7522349007922088, "grad_norm": 2.3087685108184814, "learning_rate": 4.97417929414059e-05, "loss": 0.2064, "step": 20700 }, { "epoch": 0.7525982992950069, "grad_norm": 1.6968719959259033, "learning_rate": 4.974104850748372e-05, "loss": 0.65, "step": 20710 }, { "epoch": 0.7529616977978051, "grad_norm": 1.3144559860229492, "learning_rate": 4.974030300755779e-05, "loss": 3.2825, "step": 20720 }, { "epoch": 0.7533250963006033, "grad_norm": 2.346266031265259, "learning_rate": 4.973955644166022e-05, "loss": 0.1621, "step": 20730 }, { "epoch": 0.7536884948034014, "grad_norm": 0.8026605248451233, "learning_rate": 4.973880880982319e-05, "loss": 0.1566, "step": 20740 }, { "epoch": 0.7540518933061996, "grad_norm": 8.70439624786377, "learning_rate": 4.973806011207891e-05, "loss": 0.2671, "step": 20750 }, { "epoch": 0.7544152918089978, "grad_norm": 0.9762817025184631, "learning_rate": 4.973731034845964e-05, "loss": 0.1692, "step": 20760 }, { "epoch": 0.7547786903117959, "grad_norm": 1.3316736221313477, "learning_rate": 4.973655951899768e-05, "loss": 0.1605, "step": 20770 }, { "epoch": 0.7551420888145941, "grad_norm": 1.9772186279296875, "learning_rate": 4.9735807623725394e-05, "loss": 0.1551, "step": 20780 }, { "epoch": 0.7555054873173922, "grad_norm": 1.4639058113098145, "learning_rate": 4.9735054662675154e-05, "loss": 0.2075, "step": 20790 }, { "epoch": 0.7558688858201904, "grad_norm": 10.605428695678711, "learning_rate": 4.973430063587943e-05, "loss": 0.2542, "step": 20800 }, { "epoch": 0.7562322843229886, "grad_norm": 1.9553091526031494, "learning_rate": 4.9733545543370684e-05, "loss": 0.1353, "step": 20810 }, { "epoch": 0.7565956828257867, "grad_norm": 2.2855403423309326, "learning_rate": 4.9732789385181466e-05, "loss": 0.5004, "step": 20820 }, { "epoch": 0.7569590813285849, "grad_norm": 1.7468841075897217, "learning_rate": 4.973203216134435e-05, "loss": 0.1433, "step": 20830 }, { "epoch": 0.7573224798313831, "grad_norm": 0.9522268772125244, "learning_rate": 4.973127387189197e-05, "loss": 0.1488, "step": 20840 }, { "epoch": 0.7576858783341812, "grad_norm": 13.445122718811035, "learning_rate": 4.9730514516856996e-05, "loss": 0.2154, "step": 20850 }, { "epoch": 0.7580492768369794, "grad_norm": 1.0712549686431885, "learning_rate": 4.972975409627214e-05, "loss": 0.144, "step": 20860 }, { "epoch": 0.7584126753397776, "grad_norm": 0.6894069314002991, "learning_rate": 4.972899261017017e-05, "loss": 0.1612, "step": 20870 }, { "epoch": 0.7587760738425757, "grad_norm": 2.059844970703125, "learning_rate": 4.9728230058583893e-05, "loss": 0.1664, "step": 20880 }, { "epoch": 0.7591394723453739, "grad_norm": 2.0392911434173584, "learning_rate": 4.972746644154616e-05, "loss": 0.1991, "step": 20890 }, { "epoch": 0.7595028708481721, "grad_norm": 2.9800570011138916, "learning_rate": 4.972670175908989e-05, "loss": 0.2725, "step": 20900 }, { "epoch": 0.7598662693509702, "grad_norm": 2.390784502029419, "learning_rate": 4.972593601124801e-05, "loss": 0.3158, "step": 20910 }, { "epoch": 0.7602296678537684, "grad_norm": 6.595739364624023, "learning_rate": 4.972516919805352e-05, "loss": 0.1658, "step": 20920 }, { "epoch": 0.7605930663565666, "grad_norm": 2.2043120861053467, "learning_rate": 4.972440131953947e-05, "loss": 0.163, "step": 20930 }, { "epoch": 0.7609564648593647, "grad_norm": 0.9223461747169495, "learning_rate": 4.972363237573894e-05, "loss": 0.1276, "step": 20940 }, { "epoch": 0.761319863362163, "grad_norm": 12.165254592895508, "learning_rate": 4.972286236668505e-05, "loss": 0.2105, "step": 20950 }, { "epoch": 0.7616832618649612, "grad_norm": 1.2093875408172607, "learning_rate": 4.9722091292410984e-05, "loss": 0.1697, "step": 20960 }, { "epoch": 0.7620466603677593, "grad_norm": 0.8847984075546265, "learning_rate": 4.9721396414828535e-05, "loss": 3.043, "step": 20970 }, { "epoch": 0.7624100588705575, "grad_norm": 1.6682274341583252, "learning_rate": 4.9720623316727705e-05, "loss": 0.1841, "step": 20980 }, { "epoch": 0.7627734573733557, "grad_norm": 1.2780869007110596, "learning_rate": 4.971984915350317e-05, "loss": 0.1412, "step": 20990 }, { "epoch": 0.7631368558761538, "grad_norm": 36.68233108520508, "learning_rate": 4.97190739251883e-05, "loss": 0.2059, "step": 21000 }, { "epoch": 0.7631368558761538, "eval_loss": 0.3906314969062805, "eval_runtime": 180.3915, "eval_samples_per_second": 41.1, "eval_steps_per_second": 5.139, "eval_wer": 0.21157441864686768, "step": 21000 }, { "epoch": 0.763500254378952, "grad_norm": 24.036775588989258, "learning_rate": 4.971829763181647e-05, "loss": 0.3942, "step": 21010 }, { "epoch": 0.7638636528817502, "grad_norm": 1.6546601057052612, "learning_rate": 4.971752027342115e-05, "loss": 0.1555, "step": 21020 }, { "epoch": 0.7642270513845483, "grad_norm": 3.100032091140747, "learning_rate": 4.971674185003583e-05, "loss": 0.1917, "step": 21030 }, { "epoch": 0.7645904498873465, "grad_norm": 3.2824084758758545, "learning_rate": 4.9715962361694045e-05, "loss": 0.1744, "step": 21040 }, { "epoch": 0.7649538483901447, "grad_norm": 7.680720329284668, "learning_rate": 4.9715181808429376e-05, "loss": 0.2567, "step": 21050 }, { "epoch": 0.7653172468929428, "grad_norm": 1.5478154420852661, "learning_rate": 4.971440019027547e-05, "loss": 0.1949, "step": 21060 }, { "epoch": 0.765680645395741, "grad_norm": 1.1294565200805664, "learning_rate": 4.971361750726598e-05, "loss": 0.1546, "step": 21070 }, { "epoch": 0.7660440438985391, "grad_norm": 3.339749813079834, "learning_rate": 4.971283375943465e-05, "loss": 0.1784, "step": 21080 }, { "epoch": 0.7664074424013373, "grad_norm": 1.9784200191497803, "learning_rate": 4.9712048946815244e-05, "loss": 0.8969, "step": 21090 }, { "epoch": 0.7667708409041355, "grad_norm": 13.550655364990234, "learning_rate": 4.971126306944157e-05, "loss": 0.2037, "step": 21100 }, { "epoch": 0.7671342394069336, "grad_norm": 60.52021408081055, "learning_rate": 4.971047612734749e-05, "loss": 1.0649, "step": 21110 }, { "epoch": 0.7674976379097318, "grad_norm": 1.7544801235198975, "learning_rate": 4.970968812056693e-05, "loss": 0.1619, "step": 21120 }, { "epoch": 0.76786103641253, "grad_norm": 2.0749471187591553, "learning_rate": 4.970889904913382e-05, "loss": 0.1934, "step": 21130 }, { "epoch": 0.7682244349153281, "grad_norm": 2.33097767829895, "learning_rate": 4.970810891308215e-05, "loss": 0.3121, "step": 21140 }, { "epoch": 0.7685878334181263, "grad_norm": 3.5586440563201904, "learning_rate": 4.9707317712445996e-05, "loss": 0.2198, "step": 21150 }, { "epoch": 0.7689512319209245, "grad_norm": 1.7430351972579956, "learning_rate": 4.970652544725942e-05, "loss": 0.1884, "step": 21160 }, { "epoch": 0.7693146304237226, "grad_norm": 1.2475924491882324, "learning_rate": 4.9705732117556574e-05, "loss": 0.183, "step": 21170 }, { "epoch": 0.7696780289265208, "grad_norm": 1.369491457939148, "learning_rate": 4.970493772337164e-05, "loss": 0.1854, "step": 21180 }, { "epoch": 0.770041427429319, "grad_norm": 1.8093339204788208, "learning_rate": 4.970414226473883e-05, "loss": 0.1389, "step": 21190 }, { "epoch": 0.7704048259321171, "grad_norm": 15.3746919631958, "learning_rate": 4.9703345741692425e-05, "loss": 0.2603, "step": 21200 }, { "epoch": 0.7707682244349153, "grad_norm": 0.9604819416999817, "learning_rate": 4.970254815426675e-05, "loss": 0.1663, "step": 21210 }, { "epoch": 0.7711316229377135, "grad_norm": 1.3457413911819458, "learning_rate": 4.970174950249617e-05, "loss": 0.1784, "step": 21220 }, { "epoch": 0.7714950214405116, "grad_norm": 3.19975209236145, "learning_rate": 4.970094978641509e-05, "loss": 0.2369, "step": 21230 }, { "epoch": 0.7718584199433098, "grad_norm": 1.4974329471588135, "learning_rate": 4.970014900605797e-05, "loss": 0.1553, "step": 21240 }, { "epoch": 0.772221818446108, "grad_norm": 6.426448345184326, "learning_rate": 4.969934716145932e-05, "loss": 0.1848, "step": 21250 }, { "epoch": 0.7725852169489061, "grad_norm": 4.081672668457031, "learning_rate": 4.969854425265368e-05, "loss": 0.2135, "step": 21260 }, { "epoch": 0.7729486154517043, "grad_norm": 0.7796603441238403, "learning_rate": 4.9697740279675635e-05, "loss": 0.2853, "step": 21270 }, { "epoch": 0.7733120139545026, "grad_norm": 1.2303035259246826, "learning_rate": 4.969693524255984e-05, "loss": 0.5319, "step": 21280 }, { "epoch": 0.7736754124573006, "grad_norm": 0.9134958386421204, "learning_rate": 4.9696129141340986e-05, "loss": 0.1789, "step": 21290 }, { "epoch": 0.7740388109600989, "grad_norm": 1.8099846839904785, "learning_rate": 4.969532197605379e-05, "loss": 0.1967, "step": 21300 }, { "epoch": 0.7744022094628971, "grad_norm": 3.75593900680542, "learning_rate": 4.969451374673304e-05, "loss": 0.1908, "step": 21310 }, { "epoch": 0.7747656079656952, "grad_norm": 2.851921319961548, "learning_rate": 4.969370445341355e-05, "loss": 0.1616, "step": 21320 }, { "epoch": 0.7751290064684934, "grad_norm": 2.978349447250366, "learning_rate": 4.96928940961302e-05, "loss": 0.1682, "step": 21330 }, { "epoch": 0.7754924049712916, "grad_norm": 2.945326089859009, "learning_rate": 4.96920826749179e-05, "loss": 0.1897, "step": 21340 }, { "epoch": 0.7758558034740897, "grad_norm": 5.529159069061279, "learning_rate": 4.9691270189811614e-05, "loss": 0.2351, "step": 21350 }, { "epoch": 0.7762192019768879, "grad_norm": 0.816582441329956, "learning_rate": 4.969045664084634e-05, "loss": 0.255, "step": 21360 }, { "epoch": 0.776582600479686, "grad_norm": 3.373413324356079, "learning_rate": 4.968964202805715e-05, "loss": 0.165, "step": 21370 }, { "epoch": 0.7769459989824842, "grad_norm": 1.4986653327941895, "learning_rate": 4.968882635147912e-05, "loss": 0.1803, "step": 21380 }, { "epoch": 0.7773093974852824, "grad_norm": 4.049030303955078, "learning_rate": 4.968800961114741e-05, "loss": 0.2312, "step": 21390 }, { "epoch": 0.7776727959880805, "grad_norm": 1.8616725206375122, "learning_rate": 4.968719180709721e-05, "loss": 0.2038, "step": 21400 }, { "epoch": 0.7780361944908787, "grad_norm": 0.7410339117050171, "learning_rate": 4.968637293936374e-05, "loss": 0.1736, "step": 21410 }, { "epoch": 0.7783995929936769, "grad_norm": 0.9004227519035339, "learning_rate": 4.968555300798231e-05, "loss": 0.6926, "step": 21420 }, { "epoch": 0.778762991496475, "grad_norm": 1.9912917613983154, "learning_rate": 4.968473201298822e-05, "loss": 0.183, "step": 21430 }, { "epoch": 0.7791263899992732, "grad_norm": 1.5098110437393188, "learning_rate": 4.968390995441686e-05, "loss": 0.1555, "step": 21440 }, { "epoch": 0.7794897885020714, "grad_norm": 1.5687317848205566, "learning_rate": 4.9683086832303655e-05, "loss": 0.199, "step": 21450 }, { "epoch": 0.7798531870048695, "grad_norm": 1.456758975982666, "learning_rate": 4.9682262646684054e-05, "loss": 0.1573, "step": 21460 }, { "epoch": 0.7802165855076677, "grad_norm": 1.152894377708435, "learning_rate": 4.9681437397593575e-05, "loss": 0.136, "step": 21470 }, { "epoch": 0.7805799840104659, "grad_norm": 6.458597183227539, "learning_rate": 4.968061108506777e-05, "loss": 0.2111, "step": 21480 }, { "epoch": 0.780943382513264, "grad_norm": 1.3398655652999878, "learning_rate": 4.967978370914226e-05, "loss": 0.1785, "step": 21490 }, { "epoch": 0.7813067810160622, "grad_norm": 12.363832473754883, "learning_rate": 4.967895526985267e-05, "loss": 0.217, "step": 21500 }, { "epoch": 0.7816701795188604, "grad_norm": 3.800936698913574, "learning_rate": 4.967812576723471e-05, "loss": 0.1533, "step": 21510 }, { "epoch": 0.7820335780216585, "grad_norm": 0.9531782865524292, "learning_rate": 4.967729520132411e-05, "loss": 1.353, "step": 21520 }, { "epoch": 0.7823969765244567, "grad_norm": 1.3066377639770508, "learning_rate": 4.967646357215667e-05, "loss": 0.1338, "step": 21530 }, { "epoch": 0.7827603750272549, "grad_norm": 1.1814554929733276, "learning_rate": 4.967563087976821e-05, "loss": 0.1735, "step": 21540 }, { "epoch": 0.783123773530053, "grad_norm": 4.6233367919921875, "learning_rate": 4.967479712419461e-05, "loss": 0.2266, "step": 21550 }, { "epoch": 0.7834871720328512, "grad_norm": 1.366377353668213, "learning_rate": 4.96739623054718e-05, "loss": 0.1595, "step": 21560 }, { "epoch": 0.7838505705356494, "grad_norm": 2.0722217559814453, "learning_rate": 4.967312642363574e-05, "loss": 0.1721, "step": 21570 }, { "epoch": 0.7842139690384475, "grad_norm": 2.186340570449829, "learning_rate": 4.967228947872245e-05, "loss": 0.1653, "step": 21580 }, { "epoch": 0.7845773675412457, "grad_norm": 2.4222512245178223, "learning_rate": 4.9671451470767996e-05, "loss": 0.1446, "step": 21590 }, { "epoch": 0.784940766044044, "grad_norm": 62.15577697753906, "learning_rate": 4.9670612399808467e-05, "loss": 0.2911, "step": 21600 }, { "epoch": 0.784940766044044, "eval_loss": 0.3627218008041382, "eval_runtime": 179.8971, "eval_samples_per_second": 41.212, "eval_steps_per_second": 5.153, "eval_wer": 0.21580409170947773, "step": 21600 }, { "epoch": 0.785304164546842, "grad_norm": 1.236609935760498, "learning_rate": 4.9669772265880044e-05, "loss": 0.1417, "step": 21610 }, { "epoch": 0.7856675630496402, "grad_norm": 1.2447402477264404, "learning_rate": 4.96689310690189e-05, "loss": 0.1508, "step": 21620 }, { "epoch": 0.7860309615524385, "grad_norm": 4.567975997924805, "learning_rate": 4.966808880926129e-05, "loss": 0.3503, "step": 21630 }, { "epoch": 0.7863943600552366, "grad_norm": 0.9699403047561646, "learning_rate": 4.96672454866435e-05, "loss": 0.1615, "step": 21640 }, { "epoch": 0.7867577585580348, "grad_norm": 11.004621505737305, "learning_rate": 4.966640110120187e-05, "loss": 0.2604, "step": 21650 }, { "epoch": 0.7871211570608329, "grad_norm": 1.3322606086730957, "learning_rate": 4.9665555652972784e-05, "loss": 0.1958, "step": 21660 }, { "epoch": 0.7874845555636311, "grad_norm": 1.0020729303359985, "learning_rate": 4.966470914199266e-05, "loss": 0.1207, "step": 21670 }, { "epoch": 0.7878479540664293, "grad_norm": 3.457019567489624, "learning_rate": 4.9663861568297976e-05, "loss": 0.3774, "step": 21680 }, { "epoch": 0.7882113525692274, "grad_norm": 2.4993362426757812, "learning_rate": 4.9663012931925254e-05, "loss": 0.1537, "step": 21690 }, { "epoch": 0.7885747510720256, "grad_norm": 11.104598999023438, "learning_rate": 4.966216323291106e-05, "loss": 0.2472, "step": 21700 }, { "epoch": 0.7889381495748238, "grad_norm": 1.5027676820755005, "learning_rate": 4.9661312471291996e-05, "loss": 0.154, "step": 21710 }, { "epoch": 0.7893015480776219, "grad_norm": 1.1929068565368652, "learning_rate": 4.9660460647104726e-05, "loss": 0.1416, "step": 21720 }, { "epoch": 0.7896649465804201, "grad_norm": 17.008617401123047, "learning_rate": 4.965960776038594e-05, "loss": 0.3858, "step": 21730 }, { "epoch": 0.7900283450832183, "grad_norm": 1.6043013334274292, "learning_rate": 4.96587538111724e-05, "loss": 0.1624, "step": 21740 }, { "epoch": 0.7903917435860164, "grad_norm": 10.960922241210938, "learning_rate": 4.96578987995009e-05, "loss": 0.2034, "step": 21750 }, { "epoch": 0.7907551420888146, "grad_norm": 1.4807969331741333, "learning_rate": 4.965704272540826e-05, "loss": 0.1491, "step": 21760 }, { "epoch": 0.7911185405916128, "grad_norm": 0.9724571108818054, "learning_rate": 4.965618558893139e-05, "loss": 0.1455, "step": 21770 }, { "epoch": 0.7914819390944109, "grad_norm": 2.6035313606262207, "learning_rate": 4.965532739010722e-05, "loss": 0.1696, "step": 21780 }, { "epoch": 0.7918453375972091, "grad_norm": 0.7998749017715454, "learning_rate": 4.9654468128972695e-05, "loss": 0.1549, "step": 21790 }, { "epoch": 0.7922087361000073, "grad_norm": 14.13917350769043, "learning_rate": 4.965360780556487e-05, "loss": 0.2124, "step": 21800 }, { "epoch": 0.7925721346028054, "grad_norm": 1.88883638381958, "learning_rate": 4.9652746419920804e-05, "loss": 0.1475, "step": 21810 }, { "epoch": 0.7929355331056036, "grad_norm": 1.5585650205612183, "learning_rate": 4.965188397207761e-05, "loss": 0.1534, "step": 21820 }, { "epoch": 0.7932989316084018, "grad_norm": 2.6418206691741943, "learning_rate": 4.965102046207244e-05, "loss": 0.1608, "step": 21830 }, { "epoch": 0.7936623301111999, "grad_norm": 1.1672085523605347, "learning_rate": 4.965015588994251e-05, "loss": 0.1596, "step": 21840 }, { "epoch": 0.7940257286139981, "grad_norm": 3.009610652923584, "learning_rate": 4.964929025572507e-05, "loss": 0.1805, "step": 21850 }, { "epoch": 0.7943891271167963, "grad_norm": 1.8774985074996948, "learning_rate": 4.964842355945742e-05, "loss": 0.1583, "step": 21860 }, { "epoch": 0.7947525256195944, "grad_norm": 1.1219382286071777, "learning_rate": 4.964755580117689e-05, "loss": 0.1524, "step": 21870 }, { "epoch": 0.7951159241223926, "grad_norm": 6.0511627197265625, "learning_rate": 4.964668698092088e-05, "loss": 0.2349, "step": 21880 }, { "epoch": 0.7954793226251908, "grad_norm": 3.4487464427948, "learning_rate": 4.9645817098726824e-05, "loss": 0.1915, "step": 21890 }, { "epoch": 0.7958427211279889, "grad_norm": 4.096559524536133, "learning_rate": 4.9644946154632196e-05, "loss": 0.2067, "step": 21900 }, { "epoch": 0.7962061196307871, "grad_norm": 4.144627571105957, "learning_rate": 4.9644074148674526e-05, "loss": 0.1564, "step": 21910 }, { "epoch": 0.7965695181335853, "grad_norm": 1.3851386308670044, "learning_rate": 4.9643201080891384e-05, "loss": 0.1656, "step": 21920 }, { "epoch": 0.7969329166363834, "grad_norm": 1.3050576448440552, "learning_rate": 4.9642326951320384e-05, "loss": 0.1555, "step": 21930 }, { "epoch": 0.7972963151391816, "grad_norm": 1.578134298324585, "learning_rate": 4.96414517599992e-05, "loss": 0.1637, "step": 21940 }, { "epoch": 0.7976597136419797, "grad_norm": 10.813237190246582, "learning_rate": 4.9640575506965535e-05, "loss": 0.3143, "step": 21950 }, { "epoch": 0.798023112144778, "grad_norm": 0.7118828892707825, "learning_rate": 4.963969819225713e-05, "loss": 0.1581, "step": 21960 }, { "epoch": 0.7983865106475762, "grad_norm": 1.389856219291687, "learning_rate": 4.963881981591182e-05, "loss": 0.1466, "step": 21970 }, { "epoch": 0.7987499091503742, "grad_norm": 1.1921494007110596, "learning_rate": 4.963794037796741e-05, "loss": 0.1604, "step": 21980 }, { "epoch": 0.7991133076531725, "grad_norm": 4.355441093444824, "learning_rate": 4.963705987846182e-05, "loss": 0.1792, "step": 21990 }, { "epoch": 0.7994767061559707, "grad_norm": 8.20235824584961, "learning_rate": 4.963617831743298e-05, "loss": 0.2314, "step": 22000 }, { "epoch": 0.7998401046587688, "grad_norm": 1.3720426559448242, "learning_rate": 4.963529569491887e-05, "loss": 0.1378, "step": 22010 }, { "epoch": 0.800203503161567, "grad_norm": 1.490679383277893, "learning_rate": 4.963441201095752e-05, "loss": 0.1505, "step": 22020 }, { "epoch": 0.8005669016643652, "grad_norm": 1.576416254043579, "learning_rate": 4.963352726558701e-05, "loss": 0.1379, "step": 22030 }, { "epoch": 0.8009303001671633, "grad_norm": 1.547780156135559, "learning_rate": 4.9632641458845454e-05, "loss": 0.1584, "step": 22040 }, { "epoch": 0.8012936986699615, "grad_norm": 41.95133972167969, "learning_rate": 4.963175459077102e-05, "loss": 0.6762, "step": 22050 }, { "epoch": 0.8016570971727597, "grad_norm": 0.8984355330467224, "learning_rate": 4.963086666140192e-05, "loss": 0.1513, "step": 22060 }, { "epoch": 0.8020204956755578, "grad_norm": 1.6865235567092896, "learning_rate": 4.9629977670776404e-05, "loss": 0.1659, "step": 22070 }, { "epoch": 0.802383894178356, "grad_norm": 5.291965007781982, "learning_rate": 4.96290876189328e-05, "loss": 0.1735, "step": 22080 }, { "epoch": 0.8027472926811542, "grad_norm": 0.9124179482460022, "learning_rate": 4.962819650590943e-05, "loss": 0.163, "step": 22090 }, { "epoch": 0.8031106911839523, "grad_norm": 5.151334762573242, "learning_rate": 4.9627304331744705e-05, "loss": 0.2997, "step": 22100 }, { "epoch": 0.8034740896867505, "grad_norm": 0.7093039155006409, "learning_rate": 4.9626411096477066e-05, "loss": 0.1297, "step": 22110 }, { "epoch": 0.8038374881895487, "grad_norm": 0.7643496990203857, "learning_rate": 4.962551680014499e-05, "loss": 0.1568, "step": 22120 }, { "epoch": 0.8042008866923468, "grad_norm": 2.0619888305664062, "learning_rate": 4.9624621442787005e-05, "loss": 0.1685, "step": 22130 }, { "epoch": 0.804564285195145, "grad_norm": 1.3836963176727295, "learning_rate": 4.9623725024441704e-05, "loss": 0.1597, "step": 22140 }, { "epoch": 0.8049276836979432, "grad_norm": 10.014172554016113, "learning_rate": 4.96228275451477e-05, "loss": 0.2371, "step": 22150 }, { "epoch": 0.8052910822007413, "grad_norm": 0.8201650381088257, "learning_rate": 4.962192900494367e-05, "loss": 0.1457, "step": 22160 }, { "epoch": 0.8056544807035395, "grad_norm": 2.9909164905548096, "learning_rate": 4.962102940386832e-05, "loss": 0.1584, "step": 22170 }, { "epoch": 0.8060178792063377, "grad_norm": 1.8986990451812744, "learning_rate": 4.9620128741960414e-05, "loss": 0.1521, "step": 22180 }, { "epoch": 0.8063812777091358, "grad_norm": 1.2521679401397705, "learning_rate": 4.9619227019258766e-05, "loss": 0.1398, "step": 22190 }, { "epoch": 0.806744676211934, "grad_norm": 9.087230682373047, "learning_rate": 4.9618324235802214e-05, "loss": 0.2414, "step": 22200 }, { "epoch": 0.806744676211934, "eval_loss": 0.3814217448234558, "eval_runtime": 180.9296, "eval_samples_per_second": 40.977, "eval_steps_per_second": 5.124, "eval_wer": 0.21859059306188394, "step": 22200 }, { "epoch": 0.8071080747147322, "grad_norm": 1.3065155744552612, "learning_rate": 4.9617420391629666e-05, "loss": 0.1382, "step": 22210 }, { "epoch": 0.8074714732175303, "grad_norm": 1.0691299438476562, "learning_rate": 4.961651548678006e-05, "loss": 0.1692, "step": 22220 }, { "epoch": 0.8078348717203285, "grad_norm": 2.515131711959839, "learning_rate": 4.961560952129239e-05, "loss": 0.1719, "step": 22230 }, { "epoch": 0.8081982702231267, "grad_norm": 1.3650884628295898, "learning_rate": 4.9614702495205686e-05, "loss": 0.1918, "step": 22240 }, { "epoch": 0.8085616687259248, "grad_norm": 4.730445384979248, "learning_rate": 4.961379440855903e-05, "loss": 0.2002, "step": 22250 }, { "epoch": 0.808925067228723, "grad_norm": 1.0421544313430786, "learning_rate": 4.9612885261391555e-05, "loss": 0.1544, "step": 22260 }, { "epoch": 0.8092884657315211, "grad_norm": 1.1957643032073975, "learning_rate": 4.961197505374242e-05, "loss": 0.1471, "step": 22270 }, { "epoch": 0.8096518642343193, "grad_norm": 2.936429977416992, "learning_rate": 4.961106378565086e-05, "loss": 0.2068, "step": 22280 }, { "epoch": 0.8100152627371175, "grad_norm": 2.0803070068359375, "learning_rate": 4.961015145715612e-05, "loss": 0.1496, "step": 22290 }, { "epoch": 0.8103786612399156, "grad_norm": 10.564451217651367, "learning_rate": 4.960923806829752e-05, "loss": 0.2549, "step": 22300 }, { "epoch": 0.8107420597427138, "grad_norm": 1.0569120645523071, "learning_rate": 4.9608323619114406e-05, "loss": 0.1624, "step": 22310 }, { "epoch": 0.8111054582455121, "grad_norm": 1.4505226612091064, "learning_rate": 4.960740810964619e-05, "loss": 0.1523, "step": 22320 }, { "epoch": 0.8114688567483102, "grad_norm": 5.100767135620117, "learning_rate": 4.960649153993231e-05, "loss": 0.1562, "step": 22330 }, { "epoch": 0.8118322552511084, "grad_norm": 2.2787342071533203, "learning_rate": 4.960557391001226e-05, "loss": 0.1691, "step": 22340 }, { "epoch": 0.8121956537539066, "grad_norm": 15.405048370361328, "learning_rate": 4.960465521992558e-05, "loss": 0.2542, "step": 22350 }, { "epoch": 0.8125590522567047, "grad_norm": 0.7388777732849121, "learning_rate": 4.9603735469711845e-05, "loss": 0.1522, "step": 22360 }, { "epoch": 0.8129224507595029, "grad_norm": 0.9490914344787598, "learning_rate": 4.960281465941069e-05, "loss": 0.1317, "step": 22370 }, { "epoch": 0.8132858492623011, "grad_norm": 2.281085252761841, "learning_rate": 4.960189278906179e-05, "loss": 0.1503, "step": 22380 }, { "epoch": 0.8136492477650992, "grad_norm": 0.9328985810279846, "learning_rate": 4.960096985870486e-05, "loss": 0.1556, "step": 22390 }, { "epoch": 0.8140126462678974, "grad_norm": 4.4524617195129395, "learning_rate": 4.960004586837967e-05, "loss": 0.2387, "step": 22400 }, { "epoch": 0.8143760447706956, "grad_norm": 1.5577040910720825, "learning_rate": 4.959912081812603e-05, "loss": 0.1557, "step": 22410 }, { "epoch": 0.8147394432734937, "grad_norm": 2.358896493911743, "learning_rate": 4.95981947079838e-05, "loss": 0.2016, "step": 22420 }, { "epoch": 0.8151028417762919, "grad_norm": 2.1001386642456055, "learning_rate": 4.9597267537992885e-05, "loss": 0.1587, "step": 22430 }, { "epoch": 0.8154662402790901, "grad_norm": 2.7561607360839844, "learning_rate": 4.959633930819323e-05, "loss": 0.1616, "step": 22440 }, { "epoch": 0.8158296387818882, "grad_norm": 4.204514980316162, "learning_rate": 4.959541001862482e-05, "loss": 0.6089, "step": 22450 }, { "epoch": 0.8161930372846864, "grad_norm": 1.3738398551940918, "learning_rate": 4.959447966932771e-05, "loss": 0.1756, "step": 22460 }, { "epoch": 0.8165564357874846, "grad_norm": 0.705806314945221, "learning_rate": 4.959354826034197e-05, "loss": 0.1213, "step": 22470 }, { "epoch": 0.8169198342902827, "grad_norm": 2.053788661956787, "learning_rate": 4.9592615791707755e-05, "loss": 0.1765, "step": 22480 }, { "epoch": 0.8172832327930809, "grad_norm": 2.0120911598205566, "learning_rate": 4.959168226346521e-05, "loss": 0.1444, "step": 22490 }, { "epoch": 0.8176466312958791, "grad_norm": 6.552361011505127, "learning_rate": 4.959074767565458e-05, "loss": 0.2201, "step": 22500 }, { "epoch": 0.8180100297986772, "grad_norm": 1.3007264137268066, "learning_rate": 4.958981202831613e-05, "loss": 0.1488, "step": 22510 }, { "epoch": 0.8183734283014754, "grad_norm": 1.7885551452636719, "learning_rate": 4.958887532149016e-05, "loss": 2.6491, "step": 22520 }, { "epoch": 0.8187368268042736, "grad_norm": 1.7092806100845337, "learning_rate": 4.9587937555217054e-05, "loss": 0.1946, "step": 22530 }, { "epoch": 0.8191002253070717, "grad_norm": 2.56215238571167, "learning_rate": 4.958699872953719e-05, "loss": 0.1676, "step": 22540 }, { "epoch": 0.8194636238098699, "grad_norm": 2.085753917694092, "learning_rate": 4.958605884449104e-05, "loss": 0.2038, "step": 22550 }, { "epoch": 0.819827022312668, "grad_norm": 0.8225610852241516, "learning_rate": 4.958511790011909e-05, "loss": 0.5185, "step": 22560 }, { "epoch": 0.8201904208154662, "grad_norm": 1.6775872707366943, "learning_rate": 4.9584175896461884e-05, "loss": 0.17, "step": 22570 }, { "epoch": 0.8205538193182644, "grad_norm": 3.4285826683044434, "learning_rate": 4.958323283356001e-05, "loss": 0.164, "step": 22580 }, { "epoch": 0.8209172178210625, "grad_norm": 1.892842411994934, "learning_rate": 4.95822887114541e-05, "loss": 1.2783, "step": 22590 }, { "epoch": 0.8212806163238607, "grad_norm": 4.959444522857666, "learning_rate": 4.9581343530184834e-05, "loss": 0.2062, "step": 22600 }, { "epoch": 0.8216440148266589, "grad_norm": 2.4584267139434814, "learning_rate": 4.958039728979293e-05, "loss": 0.1443, "step": 22610 }, { "epoch": 0.822007413329457, "grad_norm": 1.118804693222046, "learning_rate": 4.957944999031917e-05, "loss": 0.16, "step": 22620 }, { "epoch": 0.8223708118322552, "grad_norm": 1.5434421300888062, "learning_rate": 4.9578501631804365e-05, "loss": 0.2104, "step": 22630 }, { "epoch": 0.8227342103350535, "grad_norm": 1.3116744756698608, "learning_rate": 4.9577552214289374e-05, "loss": 0.1326, "step": 22640 }, { "epoch": 0.8230976088378515, "grad_norm": 11.34653377532959, "learning_rate": 4.95766017378151e-05, "loss": 0.2231, "step": 22650 }, { "epoch": 0.8234610073406498, "grad_norm": 1.0379194021224976, "learning_rate": 4.957565020242251e-05, "loss": 0.1805, "step": 22660 }, { "epoch": 0.823824405843448, "grad_norm": 1.8218019008636475, "learning_rate": 4.957469760815259e-05, "loss": 0.1287, "step": 22670 }, { "epoch": 0.8241878043462461, "grad_norm": 1.1962164640426636, "learning_rate": 4.957374395504638e-05, "loss": 0.4115, "step": 22680 }, { "epoch": 0.8245512028490443, "grad_norm": 1.9947481155395508, "learning_rate": 4.957278924314499e-05, "loss": 0.1407, "step": 22690 }, { "epoch": 0.8249146013518425, "grad_norm": 25.343172073364258, "learning_rate": 4.957183347248953e-05, "loss": 0.4247, "step": 22700 }, { "epoch": 0.8252779998546406, "grad_norm": 1.4444775581359863, "learning_rate": 4.95708766431212e-05, "loss": 0.1641, "step": 22710 }, { "epoch": 0.8256413983574388, "grad_norm": 1.621640920639038, "learning_rate": 4.9569918755081216e-05, "loss": 0.1289, "step": 22720 }, { "epoch": 0.826004796860237, "grad_norm": 1.018471360206604, "learning_rate": 4.9568959808410854e-05, "loss": 0.1694, "step": 22730 }, { "epoch": 0.8263681953630351, "grad_norm": 3.1913223266601562, "learning_rate": 4.9567999803151424e-05, "loss": 0.1898, "step": 22740 }, { "epoch": 0.8267315938658333, "grad_norm": 8.095772743225098, "learning_rate": 4.956703873934431e-05, "loss": 0.2246, "step": 22750 }, { "epoch": 0.8270949923686315, "grad_norm": 1.738887906074524, "learning_rate": 4.956607661703089e-05, "loss": 0.1678, "step": 22760 }, { "epoch": 0.8274583908714296, "grad_norm": 0.9688615202903748, "learning_rate": 4.9565113436252644e-05, "loss": 0.1341, "step": 22770 }, { "epoch": 0.8278217893742278, "grad_norm": 2.2478010654449463, "learning_rate": 4.956414919705106e-05, "loss": 0.1823, "step": 22780 }, { "epoch": 0.828185187877026, "grad_norm": 1.6718928813934326, "learning_rate": 4.956318389946769e-05, "loss": 0.1543, "step": 22790 }, { "epoch": 0.8285485863798241, "grad_norm": 5.168727874755859, "learning_rate": 4.956221754354412e-05, "loss": 0.1795, "step": 22800 }, { "epoch": 0.8285485863798241, "eval_loss": 0.3908107876777649, "eval_runtime": 180.5873, "eval_samples_per_second": 41.055, "eval_steps_per_second": 5.133, "eval_wer": 0.20674569317624847, "step": 22800 }, { "epoch": 0.8289119848826223, "grad_norm": 0.9549854397773743, "learning_rate": 4.956125012932199e-05, "loss": 0.1559, "step": 22810 }, { "epoch": 0.8292753833854205, "grad_norm": 3.2057716846466064, "learning_rate": 4.9560281656842977e-05, "loss": 0.1675, "step": 22820 }, { "epoch": 0.8296387818882186, "grad_norm": 1.7775851488113403, "learning_rate": 4.955931212614882e-05, "loss": 0.1997, "step": 22830 }, { "epoch": 0.8300021803910168, "grad_norm": 1.7028132677078247, "learning_rate": 4.9558341537281274e-05, "loss": 0.1505, "step": 22840 }, { "epoch": 0.8303655788938149, "grad_norm": 2.7027060985565186, "learning_rate": 4.955736989028218e-05, "loss": 0.2009, "step": 22850 }, { "epoch": 0.8307289773966131, "grad_norm": 1.8419814109802246, "learning_rate": 4.955639718519339e-05, "loss": 0.1355, "step": 22860 }, { "epoch": 0.8310923758994113, "grad_norm": 0.8633226156234741, "learning_rate": 4.955542342205682e-05, "loss": 0.178, "step": 22870 }, { "epoch": 0.8314557744022094, "grad_norm": 6.966017723083496, "learning_rate": 4.955444860091442e-05, "loss": 0.1885, "step": 22880 }, { "epoch": 0.8318191729050076, "grad_norm": 1.9565801620483398, "learning_rate": 4.955347272180819e-05, "loss": 0.1485, "step": 22890 }, { "epoch": 0.8321825714078058, "grad_norm": 22.704593658447266, "learning_rate": 4.9552495784780196e-05, "loss": 0.2294, "step": 22900 }, { "epoch": 0.8325459699106039, "grad_norm": 2.0515658855438232, "learning_rate": 4.95515177898725e-05, "loss": 0.166, "step": 22910 }, { "epoch": 0.8329093684134021, "grad_norm": 2.9277150630950928, "learning_rate": 4.9550538737127275e-05, "loss": 0.8898, "step": 22920 }, { "epoch": 0.8332727669162003, "grad_norm": 3.9280052185058594, "learning_rate": 4.9549558626586676e-05, "loss": 0.171, "step": 22930 }, { "epoch": 0.8336361654189984, "grad_norm": 2.5431272983551025, "learning_rate": 4.954857745829294e-05, "loss": 0.1539, "step": 22940 }, { "epoch": 0.8339995639217966, "grad_norm": 2.815434694290161, "learning_rate": 4.954759523228835e-05, "loss": 0.2126, "step": 22950 }, { "epoch": 0.8343629624245948, "grad_norm": 0.6958141922950745, "learning_rate": 4.9546611948615224e-05, "loss": 0.2069, "step": 22960 }, { "epoch": 0.8347263609273929, "grad_norm": 0.7068191766738892, "learning_rate": 4.9545627607315924e-05, "loss": 0.1287, "step": 22970 }, { "epoch": 0.8350897594301911, "grad_norm": 1.8746801614761353, "learning_rate": 4.954464220843287e-05, "loss": 0.1488, "step": 22980 }, { "epoch": 0.8354531579329894, "grad_norm": 1.5134693384170532, "learning_rate": 4.95436557520085e-05, "loss": 0.1337, "step": 22990 }, { "epoch": 0.8358165564357874, "grad_norm": 4.778042316436768, "learning_rate": 4.9542668238085344e-05, "loss": 0.2172, "step": 23000 }, { "epoch": 0.8361799549385857, "grad_norm": 1.074409008026123, "learning_rate": 4.9541679666705924e-05, "loss": 0.1696, "step": 23010 }, { "epoch": 0.8365433534413839, "grad_norm": 1.6725049018859863, "learning_rate": 4.954069003791286e-05, "loss": 0.136, "step": 23020 }, { "epoch": 0.836906751944182, "grad_norm": 3.194450616836548, "learning_rate": 4.953969935174877e-05, "loss": 0.2067, "step": 23030 }, { "epoch": 0.8372701504469802, "grad_norm": 7.7923150062561035, "learning_rate": 4.9538707608256345e-05, "loss": 0.1938, "step": 23040 }, { "epoch": 0.8376335489497784, "grad_norm": 8.767574310302734, "learning_rate": 4.953771480747833e-05, "loss": 0.2473, "step": 23050 }, { "epoch": 0.8379969474525765, "grad_norm": 1.3911685943603516, "learning_rate": 4.953672094945748e-05, "loss": 0.1497, "step": 23060 }, { "epoch": 0.8383603459553747, "grad_norm": 0.7775372266769409, "learning_rate": 4.953572603423662e-05, "loss": 0.7581, "step": 23070 }, { "epoch": 0.8387237444581729, "grad_norm": 2.6937413215637207, "learning_rate": 4.9534730061858634e-05, "loss": 0.1849, "step": 23080 }, { "epoch": 0.839087142960971, "grad_norm": 0.7375633716583252, "learning_rate": 4.953373303236642e-05, "loss": 0.1706, "step": 23090 }, { "epoch": 0.8394505414637692, "grad_norm": 3.070746421813965, "learning_rate": 4.953273494580295e-05, "loss": 0.2114, "step": 23100 }, { "epoch": 0.8398139399665674, "grad_norm": 0.7470118403434753, "learning_rate": 4.953173580221121e-05, "loss": 0.13, "step": 23110 }, { "epoch": 0.8401773384693655, "grad_norm": 1.040595531463623, "learning_rate": 4.953073560163426e-05, "loss": 0.2088, "step": 23120 }, { "epoch": 0.8405407369721637, "grad_norm": 3.9858949184417725, "learning_rate": 4.95297343441152e-05, "loss": 0.1528, "step": 23130 }, { "epoch": 0.8409041354749618, "grad_norm": 1.4031178951263428, "learning_rate": 4.952873202969716e-05, "loss": 2.5826, "step": 23140 }, { "epoch": 0.84126753397776, "grad_norm": 16.660646438598633, "learning_rate": 4.952772865842332e-05, "loss": 0.3101, "step": 23150 }, { "epoch": 0.8416309324805582, "grad_norm": 1.21910560131073, "learning_rate": 4.952672423033693e-05, "loss": 0.1326, "step": 23160 }, { "epoch": 0.8419943309833563, "grad_norm": 1.4494057893753052, "learning_rate": 4.952571874548126e-05, "loss": 0.1567, "step": 23170 }, { "epoch": 0.8423577294861545, "grad_norm": 1.1903733015060425, "learning_rate": 4.952471220389964e-05, "loss": 0.1537, "step": 23180 }, { "epoch": 0.8427211279889527, "grad_norm": 1.0293620824813843, "learning_rate": 4.9523704605635414e-05, "loss": 0.1695, "step": 23190 }, { "epoch": 0.8430845264917508, "grad_norm": 9.536385536193848, "learning_rate": 4.9522695950732025e-05, "loss": 0.2702, "step": 23200 }, { "epoch": 0.843447924994549, "grad_norm": 1.1565468311309814, "learning_rate": 4.9521686239232915e-05, "loss": 0.1452, "step": 23210 }, { "epoch": 0.8438113234973472, "grad_norm": 1.0805953741073608, "learning_rate": 4.9520675471181586e-05, "loss": 0.1478, "step": 23220 }, { "epoch": 0.8441747220001453, "grad_norm": 2.7216696739196777, "learning_rate": 4.95196636466216e-05, "loss": 0.1965, "step": 23230 }, { "epoch": 0.8445381205029435, "grad_norm": 2.2064578533172607, "learning_rate": 4.9518650765596564e-05, "loss": 0.213, "step": 23240 }, { "epoch": 0.8449015190057417, "grad_norm": 11.686285972595215, "learning_rate": 4.951763682815009e-05, "loss": 0.2929, "step": 23250 }, { "epoch": 0.8452649175085398, "grad_norm": 1.6271568536758423, "learning_rate": 4.9516621834325885e-05, "loss": 0.1406, "step": 23260 }, { "epoch": 0.845628316011338, "grad_norm": 2.791619300842285, "learning_rate": 4.951560578416767e-05, "loss": 0.1431, "step": 23270 }, { "epoch": 0.8459917145141362, "grad_norm": 1.9396895170211792, "learning_rate": 4.951458867771923e-05, "loss": 0.1516, "step": 23280 }, { "epoch": 0.8463551130169343, "grad_norm": 0.9364364147186279, "learning_rate": 4.951357051502439e-05, "loss": 0.1935, "step": 23290 }, { "epoch": 0.8467185115197325, "grad_norm": 2.275146007537842, "learning_rate": 4.9512551296127005e-05, "loss": 0.1832, "step": 23300 }, { "epoch": 0.8470819100225307, "grad_norm": 1.4089415073394775, "learning_rate": 4.951153102107101e-05, "loss": 0.1511, "step": 23310 }, { "epoch": 0.8474453085253288, "grad_norm": 1.2446107864379883, "learning_rate": 4.951050968990035e-05, "loss": 0.282, "step": 23320 }, { "epoch": 0.847808707028127, "grad_norm": 2.595438241958618, "learning_rate": 4.950948730265905e-05, "loss": 0.1643, "step": 23330 }, { "epoch": 0.8481721055309253, "grad_norm": 1.1884585618972778, "learning_rate": 4.950846385939114e-05, "loss": 0.1445, "step": 23340 }, { "epoch": 0.8485355040337234, "grad_norm": 33.609004974365234, "learning_rate": 4.9507439360140716e-05, "loss": 0.185, "step": 23350 }, { "epoch": 0.8488989025365216, "grad_norm": 0.573637068271637, "learning_rate": 4.950641380495194e-05, "loss": 0.1417, "step": 23360 }, { "epoch": 0.8492623010393198, "grad_norm": 1.1126424074172974, "learning_rate": 4.9505387193868975e-05, "loss": 0.1592, "step": 23370 }, { "epoch": 0.8496256995421179, "grad_norm": 2.466045379638672, "learning_rate": 4.9504359526936074e-05, "loss": 0.1507, "step": 23380 }, { "epoch": 0.8499890980449161, "grad_norm": 1.273472547531128, "learning_rate": 4.95033308041975e-05, "loss": 0.174, "step": 23390 }, { "epoch": 0.8503524965477143, "grad_norm": 5.497190475463867, "learning_rate": 4.9502301025697595e-05, "loss": 0.2269, "step": 23400 }, { "epoch": 0.8503524965477143, "eval_loss": 0.3661801218986511, "eval_runtime": 181.0852, "eval_samples_per_second": 40.942, "eval_steps_per_second": 5.119, "eval_wer": 0.198767404287763, "step": 23400 }, { "epoch": 0.8507158950505124, "grad_norm": 0.740798830986023, "learning_rate": 4.950127019148071e-05, "loss": 0.148, "step": 23410 }, { "epoch": 0.8510792935533106, "grad_norm": 1.7785030603408813, "learning_rate": 4.950023830159127e-05, "loss": 0.175, "step": 23420 }, { "epoch": 0.8514426920561087, "grad_norm": 0.7675313949584961, "learning_rate": 4.949920535607374e-05, "loss": 0.1635, "step": 23430 }, { "epoch": 0.8518060905589069, "grad_norm": 0.9880558252334595, "learning_rate": 4.9498171354972617e-05, "loss": 0.1732, "step": 23440 }, { "epoch": 0.8521694890617051, "grad_norm": 5.804686069488525, "learning_rate": 4.9497136298332454e-05, "loss": 0.2142, "step": 23450 }, { "epoch": 0.8525328875645032, "grad_norm": 1.063359022140503, "learning_rate": 4.949610018619785e-05, "loss": 0.1529, "step": 23460 }, { "epoch": 0.8528962860673014, "grad_norm": 1.9043885469436646, "learning_rate": 4.949506301861344e-05, "loss": 0.1633, "step": 23470 }, { "epoch": 0.8532596845700996, "grad_norm": 2.0380702018737793, "learning_rate": 4.9494024795623926e-05, "loss": 0.1595, "step": 23480 }, { "epoch": 0.8536230830728977, "grad_norm": 1.65935218334198, "learning_rate": 4.949298551727403e-05, "loss": 0.1526, "step": 23490 }, { "epoch": 0.8539864815756959, "grad_norm": 1.7575215101242065, "learning_rate": 4.9491945183608536e-05, "loss": 0.1924, "step": 23500 }, { "epoch": 0.8543498800784941, "grad_norm": 2.332193374633789, "learning_rate": 4.949090379467226e-05, "loss": 0.1536, "step": 23510 }, { "epoch": 0.8547132785812922, "grad_norm": 1.0475032329559326, "learning_rate": 4.948986135051009e-05, "loss": 0.1322, "step": 23520 }, { "epoch": 0.8550766770840904, "grad_norm": 3.1753509044647217, "learning_rate": 4.948881785116692e-05, "loss": 0.1457, "step": 23530 }, { "epoch": 0.8554400755868886, "grad_norm": 0.7468664646148682, "learning_rate": 4.948777329668772e-05, "loss": 0.1385, "step": 23540 }, { "epoch": 0.8558034740896867, "grad_norm": 6.77406120300293, "learning_rate": 4.9486727687117507e-05, "loss": 0.19, "step": 23550 }, { "epoch": 0.8561668725924849, "grad_norm": 1.6008226871490479, "learning_rate": 4.9485681022501316e-05, "loss": 0.1609, "step": 23560 }, { "epoch": 0.8565302710952831, "grad_norm": 1.1062623262405396, "learning_rate": 4.948463330288425e-05, "loss": 0.1624, "step": 23570 }, { "epoch": 0.8568936695980812, "grad_norm": 1.6599873304367065, "learning_rate": 4.948358452831145e-05, "loss": 0.1532, "step": 23580 }, { "epoch": 0.8572570681008794, "grad_norm": 1.264592170715332, "learning_rate": 4.9482534698828106e-05, "loss": 0.1696, "step": 23590 }, { "epoch": 0.8576204666036776, "grad_norm": 2.027796745300293, "learning_rate": 4.948148381447945e-05, "loss": 0.1913, "step": 23600 }, { "epoch": 0.8579838651064757, "grad_norm": 1.3213417530059814, "learning_rate": 4.948043187531076e-05, "loss": 0.1517, "step": 23610 }, { "epoch": 0.8583472636092739, "grad_norm": 1.6190669536590576, "learning_rate": 4.9479378881367366e-05, "loss": 0.1517, "step": 23620 }, { "epoch": 0.8587106621120721, "grad_norm": 5.381803512573242, "learning_rate": 4.947832483269464e-05, "loss": 0.1504, "step": 23630 }, { "epoch": 0.8590740606148702, "grad_norm": 3.4807474613189697, "learning_rate": 4.947726972933798e-05, "loss": 0.1887, "step": 23640 }, { "epoch": 0.8594374591176684, "grad_norm": 4.890349864959717, "learning_rate": 4.947621357134287e-05, "loss": 0.219, "step": 23650 }, { "epoch": 0.8598008576204667, "grad_norm": 1.1006419658660889, "learning_rate": 4.947515635875479e-05, "loss": 0.1743, "step": 23660 }, { "epoch": 0.8601642561232647, "grad_norm": 0.9933237433433533, "learning_rate": 4.9474098091619314e-05, "loss": 0.1294, "step": 23670 }, { "epoch": 0.860527654626063, "grad_norm": 3.392524480819702, "learning_rate": 4.947303876998203e-05, "loss": 0.1784, "step": 23680 }, { "epoch": 0.8608910531288612, "grad_norm": 1.466454029083252, "learning_rate": 4.947197839388857e-05, "loss": 0.1828, "step": 23690 }, { "epoch": 0.8612544516316593, "grad_norm": 3.670731544494629, "learning_rate": 4.947091696338465e-05, "loss": 0.1772, "step": 23700 }, { "epoch": 0.8616178501344575, "grad_norm": 1.3586241006851196, "learning_rate": 4.9469854478515976e-05, "loss": 0.1512, "step": 23710 }, { "epoch": 0.8619812486372556, "grad_norm": 0.8312864303588867, "learning_rate": 4.9468790939328336e-05, "loss": 0.1582, "step": 23720 }, { "epoch": 0.8623446471400538, "grad_norm": 0.9825647473335266, "learning_rate": 4.946772634586756e-05, "loss": 0.1662, "step": 23730 }, { "epoch": 0.862708045642852, "grad_norm": 2.7960050106048584, "learning_rate": 4.94666606981795e-05, "loss": 0.226, "step": 23740 }, { "epoch": 0.8630714441456501, "grad_norm": 5.3017683029174805, "learning_rate": 4.94655939963101e-05, "loss": 0.2065, "step": 23750 }, { "epoch": 0.8634348426484483, "grad_norm": 1.0958201885223389, "learning_rate": 4.946452624030529e-05, "loss": 0.2177, "step": 23760 }, { "epoch": 0.8637982411512465, "grad_norm": 1.0320892333984375, "learning_rate": 4.94634574302111e-05, "loss": 0.1263, "step": 23770 }, { "epoch": 0.8641616396540446, "grad_norm": 1.0401560068130493, "learning_rate": 4.946238756607356e-05, "loss": 0.6474, "step": 23780 }, { "epoch": 0.8645250381568428, "grad_norm": 1.378184199333191, "learning_rate": 4.9461316647938785e-05, "loss": 0.1783, "step": 23790 }, { "epoch": 0.864888436659641, "grad_norm": 7.429476261138916, "learning_rate": 4.9460244675852906e-05, "loss": 0.2744, "step": 23800 }, { "epoch": 0.8652518351624391, "grad_norm": 2.2409234046936035, "learning_rate": 4.945917164986211e-05, "loss": 0.2088, "step": 23810 }, { "epoch": 0.8656152336652373, "grad_norm": 1.1307353973388672, "learning_rate": 4.945809757001264e-05, "loss": 0.1311, "step": 23820 }, { "epoch": 0.8659786321680355, "grad_norm": 1.6061898469924927, "learning_rate": 4.945702243635077e-05, "loss": 0.1683, "step": 23830 }, { "epoch": 0.8663420306708336, "grad_norm": 1.0011060237884521, "learning_rate": 4.945594624892281e-05, "loss": 0.8323, "step": 23840 }, { "epoch": 0.8667054291736318, "grad_norm": 6.631030082702637, "learning_rate": 4.9454869007775154e-05, "loss": 0.177, "step": 23850 }, { "epoch": 0.86706882767643, "grad_norm": 2.8532910346984863, "learning_rate": 4.9453790712954195e-05, "loss": 0.145, "step": 23860 }, { "epoch": 0.8674322261792281, "grad_norm": 2.6437554359436035, "learning_rate": 4.945271136450641e-05, "loss": 0.1496, "step": 23870 }, { "epoch": 0.8677956246820263, "grad_norm": 3.0070180892944336, "learning_rate": 4.945163096247829e-05, "loss": 0.1582, "step": 23880 }, { "epoch": 0.8681590231848245, "grad_norm": 0.8612903356552124, "learning_rate": 4.9450549506916386e-05, "loss": 0.157, "step": 23890 }, { "epoch": 0.8685224216876226, "grad_norm": 9.475138664245605, "learning_rate": 4.94494669978673e-05, "loss": 0.312, "step": 23900 }, { "epoch": 0.8688858201904208, "grad_norm": 0.789193868637085, "learning_rate": 4.944838343537768e-05, "loss": 0.1385, "step": 23910 }, { "epoch": 0.869249218693219, "grad_norm": 0.9372280240058899, "learning_rate": 4.94472988194942e-05, "loss": 0.1581, "step": 23920 }, { "epoch": 0.8696126171960171, "grad_norm": 4.738519191741943, "learning_rate": 4.94462131502636e-05, "loss": 0.1693, "step": 23930 }, { "epoch": 0.8699760156988153, "grad_norm": 0.9660571217536926, "learning_rate": 4.9445126427732654e-05, "loss": 0.1578, "step": 23940 }, { "epoch": 0.8703394142016135, "grad_norm": 8.137104034423828, "learning_rate": 4.944403865194818e-05, "loss": 0.1857, "step": 23950 }, { "epoch": 0.8707028127044116, "grad_norm": 1.1240946054458618, "learning_rate": 4.944294982295706e-05, "loss": 0.2508, "step": 23960 }, { "epoch": 0.8710662112072098, "grad_norm": 3.6192643642425537, "learning_rate": 4.94418599408062e-05, "loss": 0.1354, "step": 23970 }, { "epoch": 0.871429609710008, "grad_norm": 2.76771879196167, "learning_rate": 4.944076900554256e-05, "loss": 0.1638, "step": 23980 }, { "epoch": 0.8717930082128061, "grad_norm": 1.734529972076416, "learning_rate": 4.9439677017213143e-05, "loss": 0.1414, "step": 23990 }, { "epoch": 0.8721564067156043, "grad_norm": 6.897458553314209, "learning_rate": 4.9438583975864996e-05, "loss": 0.2154, "step": 24000 }, { "epoch": 0.8721564067156043, "eval_loss": 0.37997984886169434, "eval_runtime": 180.3101, "eval_samples_per_second": 41.118, "eval_steps_per_second": 5.141, "eval_wer": 0.20322399114128561, "step": 24000 }, { "epoch": 0.8725198052184026, "grad_norm": 1.5639888048171997, "learning_rate": 4.943748988154523e-05, "loss": 0.1372, "step": 24010 }, { "epoch": 0.8728832037212007, "grad_norm": 4.484424114227295, "learning_rate": 4.943639473430096e-05, "loss": 0.3205, "step": 24020 }, { "epoch": 0.8732466022239989, "grad_norm": 1.9517849683761597, "learning_rate": 4.9435298534179396e-05, "loss": 0.2085, "step": 24030 }, { "epoch": 0.873610000726797, "grad_norm": 1.3041925430297852, "learning_rate": 4.943420128122776e-05, "loss": 0.1446, "step": 24040 }, { "epoch": 0.8739733992295952, "grad_norm": 29.67850685119629, "learning_rate": 4.943310297549332e-05, "loss": 0.2643, "step": 24050 }, { "epoch": 0.8743367977323934, "grad_norm": 4.462527751922607, "learning_rate": 4.9432003617023405e-05, "loss": 0.2067, "step": 24060 }, { "epoch": 0.8747001962351915, "grad_norm": 1.2176992893218994, "learning_rate": 4.9430903205865384e-05, "loss": 0.1353, "step": 24070 }, { "epoch": 0.8750635947379897, "grad_norm": 2.044191360473633, "learning_rate": 4.9429801742066675e-05, "loss": 0.1632, "step": 24080 }, { "epoch": 0.8754269932407879, "grad_norm": 3.0303845405578613, "learning_rate": 4.942869922567473e-05, "loss": 0.1533, "step": 24090 }, { "epoch": 0.875790391743586, "grad_norm": 4.44179105758667, "learning_rate": 4.942759565673705e-05, "loss": 0.2054, "step": 24100 }, { "epoch": 0.8761537902463842, "grad_norm": 2.158686637878418, "learning_rate": 4.942649103530119e-05, "loss": 0.1457, "step": 24110 }, { "epoch": 0.8765171887491824, "grad_norm": 5.875476837158203, "learning_rate": 4.942538536141473e-05, "loss": 0.1941, "step": 24120 }, { "epoch": 0.8768805872519805, "grad_norm": 1.7252172231674194, "learning_rate": 4.9424278635125335e-05, "loss": 0.155, "step": 24130 }, { "epoch": 0.8772439857547787, "grad_norm": 1.6594487428665161, "learning_rate": 4.9423170856480674e-05, "loss": 0.1736, "step": 24140 }, { "epoch": 0.8776073842575769, "grad_norm": 6.2919697761535645, "learning_rate": 4.9422062025528474e-05, "loss": 0.2313, "step": 24150 }, { "epoch": 0.877970782760375, "grad_norm": 2.1133229732513428, "learning_rate": 4.942095214231651e-05, "loss": 0.1642, "step": 24160 }, { "epoch": 0.8783341812631732, "grad_norm": 1.02867591381073, "learning_rate": 4.941984120689262e-05, "loss": 0.1554, "step": 24170 }, { "epoch": 0.8786975797659714, "grad_norm": 1.7262704372406006, "learning_rate": 4.941872921930465e-05, "loss": 0.1428, "step": 24180 }, { "epoch": 0.8790609782687695, "grad_norm": 1.095211386680603, "learning_rate": 4.9417616179600526e-05, "loss": 0.1683, "step": 24190 }, { "epoch": 0.8794243767715677, "grad_norm": 9.772414207458496, "learning_rate": 4.94165020878282e-05, "loss": 0.2224, "step": 24200 }, { "epoch": 0.8797877752743659, "grad_norm": 0.6741021871566772, "learning_rate": 4.9415386944035665e-05, "loss": 0.7216, "step": 24210 }, { "epoch": 0.880151173777164, "grad_norm": 0.6714327335357666, "learning_rate": 4.941427074827098e-05, "loss": 0.1321, "step": 24220 }, { "epoch": 0.8805145722799622, "grad_norm": 9.116118431091309, "learning_rate": 4.941315350058223e-05, "loss": 0.1738, "step": 24230 }, { "epoch": 0.8808779707827604, "grad_norm": 1.119581937789917, "learning_rate": 4.941203520101757e-05, "loss": 0.1076, "step": 24240 }, { "epoch": 0.8812413692855585, "grad_norm": 1.5630614757537842, "learning_rate": 4.941091584962516e-05, "loss": 0.1734, "step": 24250 }, { "epoch": 0.8816047677883567, "grad_norm": 3.4376001358032227, "learning_rate": 4.940979544645325e-05, "loss": 0.1567, "step": 24260 }, { "epoch": 0.8819681662911549, "grad_norm": 1.1688649654388428, "learning_rate": 4.94086739915501e-05, "loss": 0.137, "step": 24270 }, { "epoch": 0.882331564793953, "grad_norm": 2.02235746383667, "learning_rate": 4.9407551484964035e-05, "loss": 0.1718, "step": 24280 }, { "epoch": 0.8826949632967512, "grad_norm": 1.7484105825424194, "learning_rate": 4.940642792674341e-05, "loss": 0.1973, "step": 24290 }, { "epoch": 0.8830583617995494, "grad_norm": 7.056839942932129, "learning_rate": 4.940530331693666e-05, "loss": 0.1916, "step": 24300 }, { "epoch": 0.8834217603023475, "grad_norm": 1.4804614782333374, "learning_rate": 4.940417765559221e-05, "loss": 0.1418, "step": 24310 }, { "epoch": 0.8837851588051457, "grad_norm": 1.3168327808380127, "learning_rate": 4.940305094275859e-05, "loss": 0.1466, "step": 24320 }, { "epoch": 0.8841485573079438, "grad_norm": 2.4612350463867188, "learning_rate": 4.9401923178484325e-05, "loss": 0.1956, "step": 24330 }, { "epoch": 0.884511955810742, "grad_norm": 0.8389832973480225, "learning_rate": 4.9400794362818005e-05, "loss": 0.1751, "step": 24340 }, { "epoch": 0.8848753543135403, "grad_norm": 2.618521213531494, "learning_rate": 4.939966449580828e-05, "loss": 0.2133, "step": 24350 }, { "epoch": 0.8852387528163383, "grad_norm": 0.767784833908081, "learning_rate": 4.9398533577503826e-05, "loss": 0.1256, "step": 24360 }, { "epoch": 0.8856021513191366, "grad_norm": 1.7649836540222168, "learning_rate": 4.939740160795336e-05, "loss": 0.1925, "step": 24370 }, { "epoch": 0.8859655498219348, "grad_norm": 2.182840347290039, "learning_rate": 4.9396268587205685e-05, "loss": 0.184, "step": 24380 }, { "epoch": 0.8863289483247329, "grad_norm": 1.6524356603622437, "learning_rate": 4.939513451530958e-05, "loss": 0.1582, "step": 24390 }, { "epoch": 0.8866923468275311, "grad_norm": 13.93655776977539, "learning_rate": 4.939399939231394e-05, "loss": 0.1813, "step": 24400 }, { "epoch": 0.8870557453303293, "grad_norm": 1.9153752326965332, "learning_rate": 4.939286321826766e-05, "loss": 0.2093, "step": 24410 }, { "epoch": 0.8874191438331274, "grad_norm": 1.9444178342819214, "learning_rate": 4.9391725993219685e-05, "loss": 0.1489, "step": 24420 }, { "epoch": 0.8877825423359256, "grad_norm": 2.9371562004089355, "learning_rate": 4.939058771721903e-05, "loss": 0.1648, "step": 24430 }, { "epoch": 0.8881459408387238, "grad_norm": 3.127439498901367, "learning_rate": 4.938944839031473e-05, "loss": 0.1756, "step": 24440 }, { "epoch": 0.8885093393415219, "grad_norm": 11.735489845275879, "learning_rate": 4.938830801255588e-05, "loss": 0.2049, "step": 24450 }, { "epoch": 0.8888727378443201, "grad_norm": 1.0685577392578125, "learning_rate": 4.938716658399161e-05, "loss": 0.147, "step": 24460 }, { "epoch": 0.8892361363471183, "grad_norm": 3.6975417137145996, "learning_rate": 4.93860241046711e-05, "loss": 0.1402, "step": 24470 }, { "epoch": 0.8895995348499164, "grad_norm": 1.703731894493103, "learning_rate": 4.938488057464358e-05, "loss": 0.1418, "step": 24480 }, { "epoch": 0.8899629333527146, "grad_norm": 1.5911983251571655, "learning_rate": 4.938373599395831e-05, "loss": 0.1268, "step": 24490 }, { "epoch": 0.8903263318555128, "grad_norm": 5.278975486755371, "learning_rate": 4.9382590362664613e-05, "loss": 0.2388, "step": 24500 }, { "epoch": 0.8906897303583109, "grad_norm": 1.673403263092041, "learning_rate": 4.9381443680811865e-05, "loss": 0.1568, "step": 24510 }, { "epoch": 0.8910531288611091, "grad_norm": 0.5384930968284607, "learning_rate": 4.938029594844945e-05, "loss": 0.1364, "step": 24520 }, { "epoch": 0.8914165273639073, "grad_norm": 1.4231863021850586, "learning_rate": 4.937914716562683e-05, "loss": 0.1358, "step": 24530 }, { "epoch": 0.8917799258667054, "grad_norm": 1.2151052951812744, "learning_rate": 4.937799733239349e-05, "loss": 0.1673, "step": 24540 }, { "epoch": 0.8921433243695036, "grad_norm": 9.278292655944824, "learning_rate": 4.937684644879899e-05, "loss": 0.2505, "step": 24550 }, { "epoch": 0.8925067228723018, "grad_norm": 2.3570127487182617, "learning_rate": 4.937569451489291e-05, "loss": 0.1447, "step": 24560 }, { "epoch": 0.8928701213750999, "grad_norm": 0.44337037205696106, "learning_rate": 4.937454153072488e-05, "loss": 0.2015, "step": 24570 }, { "epoch": 0.8932335198778981, "grad_norm": 2.4552314281463623, "learning_rate": 4.937338749634458e-05, "loss": 0.1838, "step": 24580 }, { "epoch": 0.8935969183806963, "grad_norm": 0.9864338636398315, "learning_rate": 4.937223241180174e-05, "loss": 0.1356, "step": 24590 }, { "epoch": 0.8939603168834944, "grad_norm": 8.218843460083008, "learning_rate": 4.937107627714612e-05, "loss": 0.2109, "step": 24600 }, { "epoch": 0.8939603168834944, "eval_loss": 0.38069987297058105, "eval_runtime": 180.4244, "eval_samples_per_second": 41.092, "eval_steps_per_second": 5.138, "eval_wer": 0.20163559460489772, "step": 24600 }, { "epoch": 0.8943237153862926, "grad_norm": 0.7269652485847473, "learning_rate": 4.936991909242753e-05, "loss": 0.1756, "step": 24610 }, { "epoch": 0.8946871138890907, "grad_norm": 0.9835095405578613, "learning_rate": 4.9368760857695836e-05, "loss": 0.1297, "step": 24620 }, { "epoch": 0.8950505123918889, "grad_norm": 3.5632708072662354, "learning_rate": 4.9367601573000944e-05, "loss": 0.146, "step": 24630 }, { "epoch": 0.8954139108946871, "grad_norm": 0.7898311614990234, "learning_rate": 4.93664412383928e-05, "loss": 0.1693, "step": 24640 }, { "epoch": 0.8957773093974852, "grad_norm": 3.8220248222351074, "learning_rate": 4.93652798539214e-05, "loss": 0.1739, "step": 24650 }, { "epoch": 0.8961407079002834, "grad_norm": 0.7946699857711792, "learning_rate": 4.936411741963678e-05, "loss": 0.1271, "step": 24660 }, { "epoch": 0.8965041064030816, "grad_norm": 1.5677101612091064, "learning_rate": 4.936295393558903e-05, "loss": 0.1365, "step": 24670 }, { "epoch": 0.8968675049058797, "grad_norm": 18.39532470703125, "learning_rate": 4.9361789401828285e-05, "loss": 0.2035, "step": 24680 }, { "epoch": 0.897230903408678, "grad_norm": 2.577984094619751, "learning_rate": 4.93606238184047e-05, "loss": 0.127, "step": 24690 }, { "epoch": 0.8975943019114762, "grad_norm": 3.4822871685028076, "learning_rate": 4.9359457185368515e-05, "loss": 0.2335, "step": 24700 }, { "epoch": 0.8979577004142743, "grad_norm": 1.6475412845611572, "learning_rate": 4.935828950277e-05, "loss": 0.1581, "step": 24710 }, { "epoch": 0.8983210989170725, "grad_norm": 2.0972635746002197, "learning_rate": 4.9357120770659446e-05, "loss": 0.1608, "step": 24720 }, { "epoch": 0.8986844974198707, "grad_norm": 3.194946050643921, "learning_rate": 4.9355950989087226e-05, "loss": 0.1911, "step": 24730 }, { "epoch": 0.8990478959226688, "grad_norm": 1.1382654905319214, "learning_rate": 4.9354780158103744e-05, "loss": 0.1671, "step": 24740 }, { "epoch": 0.899411294425467, "grad_norm": 7.309133052825928, "learning_rate": 4.9353608277759433e-05, "loss": 0.192, "step": 24750 }, { "epoch": 0.8997746929282652, "grad_norm": 1.0215349197387695, "learning_rate": 4.9352435348104786e-05, "loss": 0.1713, "step": 24760 }, { "epoch": 0.9001380914310633, "grad_norm": 2.319836378097534, "learning_rate": 4.935126136919035e-05, "loss": 0.1441, "step": 24770 }, { "epoch": 0.9005014899338615, "grad_norm": 3.443413496017456, "learning_rate": 4.9350086341066716e-05, "loss": 0.2136, "step": 24780 }, { "epoch": 0.9008648884366597, "grad_norm": 0.9862478971481323, "learning_rate": 4.934891026378449e-05, "loss": 0.134, "step": 24790 }, { "epoch": 0.9012282869394578, "grad_norm": 10.3681640625, "learning_rate": 4.934773313739435e-05, "loss": 0.3034, "step": 24800 }, { "epoch": 0.901591685442256, "grad_norm": 0.9848408102989197, "learning_rate": 4.9346554961947014e-05, "loss": 0.1503, "step": 24810 }, { "epoch": 0.9019550839450542, "grad_norm": 1.3456752300262451, "learning_rate": 4.934537573749326e-05, "loss": 2.2881, "step": 24820 }, { "epoch": 0.9023184824478523, "grad_norm": 0.8639931082725525, "learning_rate": 4.9344195464083884e-05, "loss": 0.1565, "step": 24830 }, { "epoch": 0.9026818809506505, "grad_norm": 1.1297109127044678, "learning_rate": 4.9343014141769744e-05, "loss": 0.1338, "step": 24840 }, { "epoch": 0.9030452794534487, "grad_norm": 20.8160343170166, "learning_rate": 4.934183177060173e-05, "loss": 0.2155, "step": 24850 }, { "epoch": 0.9034086779562468, "grad_norm": 0.8113746643066406, "learning_rate": 4.9340648350630804e-05, "loss": 0.126, "step": 24860 }, { "epoch": 0.903772076459045, "grad_norm": 1.7760541439056396, "learning_rate": 4.9339463881907946e-05, "loss": 0.119, "step": 24870 }, { "epoch": 0.9041354749618432, "grad_norm": 1.8657050132751465, "learning_rate": 4.933827836448418e-05, "loss": 0.1772, "step": 24880 }, { "epoch": 0.9044988734646413, "grad_norm": 1.2576991319656372, "learning_rate": 4.9337091798410594e-05, "loss": 0.1609, "step": 24890 }, { "epoch": 0.9048622719674395, "grad_norm": 4.8249311447143555, "learning_rate": 4.933590418373833e-05, "loss": 1.7033, "step": 24900 }, { "epoch": 0.9052256704702376, "grad_norm": 1.065819501876831, "learning_rate": 4.9334715520518526e-05, "loss": 0.1559, "step": 24910 }, { "epoch": 0.9055890689730358, "grad_norm": 0.961330771446228, "learning_rate": 4.933352580880242e-05, "loss": 0.1459, "step": 24920 }, { "epoch": 0.905952467475834, "grad_norm": 2.0911202430725098, "learning_rate": 4.933233504864126e-05, "loss": 0.2173, "step": 24930 }, { "epoch": 0.9063158659786321, "grad_norm": 0.5074183940887451, "learning_rate": 4.933114324008636e-05, "loss": 0.1544, "step": 24940 }, { "epoch": 0.9066792644814303, "grad_norm": 3.663172483444214, "learning_rate": 4.932995038318907e-05, "loss": 0.2042, "step": 24950 }, { "epoch": 0.9070426629842285, "grad_norm": 1.691545844078064, "learning_rate": 4.9328756478000784e-05, "loss": 0.1616, "step": 24960 }, { "epoch": 0.9074060614870266, "grad_norm": 1.6613342761993408, "learning_rate": 4.9327561524572944e-05, "loss": 0.1212, "step": 24970 }, { "epoch": 0.9077694599898248, "grad_norm": 2.5737128257751465, "learning_rate": 4.9326365522957044e-05, "loss": 0.1753, "step": 24980 }, { "epoch": 0.908132858492623, "grad_norm": 1.717429280281067, "learning_rate": 4.932516847320459e-05, "loss": 0.1436, "step": 24990 }, { "epoch": 0.9084962569954211, "grad_norm": 13.324812889099121, "learning_rate": 4.9323970375367186e-05, "loss": 0.1983, "step": 25000 }, { "epoch": 0.9088596554982193, "grad_norm": 1.374232530593872, "learning_rate": 4.932277122949644e-05, "loss": 0.1588, "step": 25010 }, { "epoch": 0.9092230540010175, "grad_norm": 1.1790850162506104, "learning_rate": 4.932157103564402e-05, "loss": 0.1603, "step": 25020 }, { "epoch": 0.9095864525038156, "grad_norm": 2.7326996326446533, "learning_rate": 4.932036979386165e-05, "loss": 0.1656, "step": 25030 }, { "epoch": 0.9099498510066139, "grad_norm": 1.2364397048950195, "learning_rate": 4.931916750420107e-05, "loss": 0.2311, "step": 25040 }, { "epoch": 0.9103132495094121, "grad_norm": 3.7070934772491455, "learning_rate": 4.9317964166714095e-05, "loss": 0.2286, "step": 25050 }, { "epoch": 0.9106766480122102, "grad_norm": 2.05336594581604, "learning_rate": 4.931675978145256e-05, "loss": 0.1404, "step": 25060 }, { "epoch": 0.9110400465150084, "grad_norm": 1.3064135313034058, "learning_rate": 4.931555434846837e-05, "loss": 0.1395, "step": 25070 }, { "epoch": 0.9114034450178066, "grad_norm": 1.252254843711853, "learning_rate": 4.931434786781346e-05, "loss": 0.1595, "step": 25080 }, { "epoch": 0.9117668435206047, "grad_norm": 1.399654507637024, "learning_rate": 4.931314033953981e-05, "loss": 0.1495, "step": 25090 }, { "epoch": 0.9121302420234029, "grad_norm": 9.340110778808594, "learning_rate": 4.931193176369945e-05, "loss": 0.2489, "step": 25100 }, { "epoch": 0.9124936405262011, "grad_norm": 1.4071942567825317, "learning_rate": 4.931072214034445e-05, "loss": 0.1409, "step": 25110 }, { "epoch": 0.9128570390289992, "grad_norm": 1.5617743730545044, "learning_rate": 4.9309511469526934e-05, "loss": 0.2026, "step": 25120 }, { "epoch": 0.9132204375317974, "grad_norm": 1.4382219314575195, "learning_rate": 4.930829975129906e-05, "loss": 0.1426, "step": 25130 }, { "epoch": 0.9135838360345956, "grad_norm": 1.0388094186782837, "learning_rate": 4.930708698571303e-05, "loss": 0.132, "step": 25140 }, { "epoch": 0.9139472345373937, "grad_norm": 3.9398436546325684, "learning_rate": 4.9305873172821126e-05, "loss": 0.2257, "step": 25150 }, { "epoch": 0.9143106330401919, "grad_norm": 2.5586395263671875, "learning_rate": 4.930465831267562e-05, "loss": 0.1508, "step": 25160 }, { "epoch": 0.9146740315429901, "grad_norm": 1.6908849477767944, "learning_rate": 4.930344240532886e-05, "loss": 0.1407, "step": 25170 }, { "epoch": 0.9150374300457882, "grad_norm": 3.980564594268799, "learning_rate": 4.930222545083324e-05, "loss": 0.1749, "step": 25180 }, { "epoch": 0.9154008285485864, "grad_norm": 1.7451142072677612, "learning_rate": 4.930100744924119e-05, "loss": 0.1415, "step": 25190 }, { "epoch": 0.9157642270513845, "grad_norm": 11.09490966796875, "learning_rate": 4.9299788400605194e-05, "loss": 0.248, "step": 25200 }, { "epoch": 0.9157642270513845, "eval_loss": 0.36305877566337585, "eval_runtime": 180.0742, "eval_samples_per_second": 41.172, "eval_steps_per_second": 5.148, "eval_wer": 0.19528200845934612, "step": 25200 }, { "epoch": 0.9161276255541827, "grad_norm": 0.9552545547485352, "learning_rate": 4.929856830497778e-05, "loss": 0.1765, "step": 25210 }, { "epoch": 0.9164910240569809, "grad_norm": 1.0652204751968384, "learning_rate": 4.929734716241151e-05, "loss": 0.1412, "step": 25220 }, { "epoch": 0.916854422559779, "grad_norm": 2.473240375518799, "learning_rate": 4.929612497295899e-05, "loss": 0.1511, "step": 25230 }, { "epoch": 0.9172178210625772, "grad_norm": 2.0563089847564697, "learning_rate": 4.929490173667291e-05, "loss": 0.1562, "step": 25240 }, { "epoch": 0.9175812195653754, "grad_norm": 5.446952819824219, "learning_rate": 4.929367745360593e-05, "loss": 0.2416, "step": 25250 }, { "epoch": 0.9179446180681735, "grad_norm": 0.724795937538147, "learning_rate": 4.929245212381085e-05, "loss": 0.1554, "step": 25260 }, { "epoch": 0.9183080165709717, "grad_norm": 1.0962814092636108, "learning_rate": 4.929122574734043e-05, "loss": 0.1567, "step": 25270 }, { "epoch": 0.9186714150737699, "grad_norm": 1.3689608573913574, "learning_rate": 4.9289998324247524e-05, "loss": 0.1498, "step": 25280 }, { "epoch": 0.919034813576568, "grad_norm": 3.039569139480591, "learning_rate": 4.9288769854585015e-05, "loss": 0.1666, "step": 25290 }, { "epoch": 0.9193982120793662, "grad_norm": 10.71928882598877, "learning_rate": 4.928754033840583e-05, "loss": 0.2487, "step": 25300 }, { "epoch": 0.9197616105821644, "grad_norm": 0.47624918818473816, "learning_rate": 4.928630977576295e-05, "loss": 0.1457, "step": 25310 }, { "epoch": 0.9201250090849625, "grad_norm": 1.2840664386749268, "learning_rate": 4.9285078166709386e-05, "loss": 0.1437, "step": 25320 }, { "epoch": 0.9204884075877607, "grad_norm": 2.118415117263794, "learning_rate": 4.928384551129822e-05, "loss": 0.1861, "step": 25330 }, { "epoch": 0.9208518060905589, "grad_norm": 0.8363248109817505, "learning_rate": 4.928261180958255e-05, "loss": 0.1494, "step": 25340 }, { "epoch": 0.921215204593357, "grad_norm": 20.23488998413086, "learning_rate": 4.928137706161553e-05, "loss": 0.2414, "step": 25350 }, { "epoch": 0.9215786030961552, "grad_norm": 1.1590826511383057, "learning_rate": 4.928014126745037e-05, "loss": 0.162, "step": 25360 }, { "epoch": 0.9219420015989535, "grad_norm": 1.1986241340637207, "learning_rate": 4.9278904427140315e-05, "loss": 0.1323, "step": 25370 }, { "epoch": 0.9223054001017515, "grad_norm": 5.075083255767822, "learning_rate": 4.927766654073864e-05, "loss": 0.2944, "step": 25380 }, { "epoch": 0.9226687986045498, "grad_norm": 3.1853582859039307, "learning_rate": 4.927642760829871e-05, "loss": 0.1792, "step": 25390 }, { "epoch": 0.923032197107348, "grad_norm": 5.919759273529053, "learning_rate": 4.927518762987388e-05, "loss": 0.2182, "step": 25400 }, { "epoch": 0.9233955956101461, "grad_norm": 1.4639918804168701, "learning_rate": 4.927394660551759e-05, "loss": 0.1277, "step": 25410 }, { "epoch": 0.9237589941129443, "grad_norm": 1.205178141593933, "learning_rate": 4.927270453528331e-05, "loss": 0.1197, "step": 25420 }, { "epoch": 0.9241223926157425, "grad_norm": 1.4328303337097168, "learning_rate": 4.927146141922455e-05, "loss": 0.1522, "step": 25430 }, { "epoch": 0.9244857911185406, "grad_norm": 0.6114678382873535, "learning_rate": 4.927021725739488e-05, "loss": 0.1661, "step": 25440 }, { "epoch": 0.9248491896213388, "grad_norm": 34.13093566894531, "learning_rate": 4.92689720498479e-05, "loss": 0.2852, "step": 25450 }, { "epoch": 0.925212588124137, "grad_norm": 0.9967424273490906, "learning_rate": 4.9267725796637256e-05, "loss": 0.1433, "step": 25460 }, { "epoch": 0.9255759866269351, "grad_norm": 1.0493268966674805, "learning_rate": 4.926647849781666e-05, "loss": 0.1361, "step": 25470 }, { "epoch": 0.9259393851297333, "grad_norm": 2.582016944885254, "learning_rate": 4.926523015343985e-05, "loss": 0.1829, "step": 25480 }, { "epoch": 0.9263027836325314, "grad_norm": 5.122225284576416, "learning_rate": 4.92639807635606e-05, "loss": 0.1148, "step": 25490 }, { "epoch": 0.9266661821353296, "grad_norm": 8.054966926574707, "learning_rate": 4.9262730328232755e-05, "loss": 0.2363, "step": 25500 }, { "epoch": 0.9270295806381278, "grad_norm": 3.3668735027313232, "learning_rate": 4.926147884751018e-05, "loss": 0.1311, "step": 25510 }, { "epoch": 0.9273929791409259, "grad_norm": 1.0643728971481323, "learning_rate": 4.926022632144681e-05, "loss": 0.1318, "step": 25520 }, { "epoch": 0.9277563776437241, "grad_norm": 1.632354497909546, "learning_rate": 4.9258972750096614e-05, "loss": 0.1958, "step": 25530 }, { "epoch": 0.9281197761465223, "grad_norm": 0.7638659477233887, "learning_rate": 4.9257718133513586e-05, "loss": 0.168, "step": 25540 }, { "epoch": 0.9284831746493204, "grad_norm": 4.14115571975708, "learning_rate": 4.9256462471751796e-05, "loss": 0.1976, "step": 25550 }, { "epoch": 0.9288465731521186, "grad_norm": 39.925689697265625, "learning_rate": 4.925520576486534e-05, "loss": 0.67, "step": 25560 }, { "epoch": 0.9292099716549168, "grad_norm": 1.349623441696167, "learning_rate": 4.9253948012908366e-05, "loss": 0.1475, "step": 25570 }, { "epoch": 0.9295733701577149, "grad_norm": 10.941556930541992, "learning_rate": 4.925268921593508e-05, "loss": 0.1696, "step": 25580 }, { "epoch": 0.9299367686605131, "grad_norm": 1.5406817197799683, "learning_rate": 4.925142937399969e-05, "loss": 0.1444, "step": 25590 }, { "epoch": 0.9303001671633113, "grad_norm": 3.9542319774627686, "learning_rate": 4.925016848715651e-05, "loss": 0.216, "step": 25600 }, { "epoch": 0.9306635656661094, "grad_norm": 2.0055665969848633, "learning_rate": 4.924890655545984e-05, "loss": 0.1248, "step": 25610 }, { "epoch": 0.9310269641689076, "grad_norm": 2.145512819290161, "learning_rate": 4.924764357896408e-05, "loss": 0.1278, "step": 25620 }, { "epoch": 0.9313903626717058, "grad_norm": 6.076485633850098, "learning_rate": 4.924637955772361e-05, "loss": 0.1586, "step": 25630 }, { "epoch": 0.9317537611745039, "grad_norm": 0.9482760429382324, "learning_rate": 4.924511449179293e-05, "loss": 0.1547, "step": 25640 }, { "epoch": 0.9321171596773021, "grad_norm": 2.335090398788452, "learning_rate": 4.924384838122653e-05, "loss": 0.1709, "step": 25650 }, { "epoch": 0.9324805581801003, "grad_norm": 2.1309449672698975, "learning_rate": 4.924258122607895e-05, "loss": 0.1425, "step": 25660 }, { "epoch": 0.9328439566828984, "grad_norm": 1.092887282371521, "learning_rate": 4.924131302640482e-05, "loss": 0.1578, "step": 25670 }, { "epoch": 0.9332073551856966, "grad_norm": 0.7325641512870789, "learning_rate": 4.9240043782258746e-05, "loss": 0.1473, "step": 25680 }, { "epoch": 0.9335707536884948, "grad_norm": 1.296338677406311, "learning_rate": 4.9238773493695443e-05, "loss": 0.2279, "step": 25690 }, { "epoch": 0.9339341521912929, "grad_norm": 1.196590542793274, "learning_rate": 4.923750216076963e-05, "loss": 0.1524, "step": 25700 }, { "epoch": 0.9342975506940912, "grad_norm": 1.5417845249176025, "learning_rate": 4.923622978353608e-05, "loss": 0.1385, "step": 25710 }, { "epoch": 0.9346609491968894, "grad_norm": 1.4865704774856567, "learning_rate": 4.923495636204963e-05, "loss": 0.1435, "step": 25720 }, { "epoch": 0.9350243476996875, "grad_norm": 1.6445010900497437, "learning_rate": 4.923368189636513e-05, "loss": 0.223, "step": 25730 }, { "epoch": 0.9353877462024857, "grad_norm": 0.6629343032836914, "learning_rate": 4.9232406386537505e-05, "loss": 0.1479, "step": 25740 }, { "epoch": 0.9357511447052839, "grad_norm": 8.440834999084473, "learning_rate": 4.923112983262171e-05, "loss": 0.7624, "step": 25750 }, { "epoch": 0.936114543208082, "grad_norm": 1.088809847831726, "learning_rate": 4.922985223467274e-05, "loss": 0.134, "step": 25760 }, { "epoch": 0.9364779417108802, "grad_norm": 1.1839587688446045, "learning_rate": 4.922857359274565e-05, "loss": 0.1284, "step": 25770 }, { "epoch": 0.9368413402136783, "grad_norm": 2.278588056564331, "learning_rate": 4.922729390689553e-05, "loss": 0.1873, "step": 25780 }, { "epoch": 0.9372047387164765, "grad_norm": 1.6524765491485596, "learning_rate": 4.9226013177177515e-05, "loss": 0.1769, "step": 25790 }, { "epoch": 0.9375681372192747, "grad_norm": 18.044713973999023, "learning_rate": 4.922473140364679e-05, "loss": 0.2122, "step": 25800 }, { "epoch": 0.9375681372192747, "eval_loss": 0.3881298005580902, "eval_runtime": 180.2979, "eval_samples_per_second": 41.121, "eval_steps_per_second": 5.141, "eval_wer": 0.1963348884491804, "step": 25800 }, { "epoch": 0.9379315357220728, "grad_norm": 1.1691884994506836, "learning_rate": 4.9223448586358576e-05, "loss": 0.1573, "step": 25810 }, { "epoch": 0.938294934224871, "grad_norm": 1.1012376546859741, "learning_rate": 4.9222164725368156e-05, "loss": 0.1511, "step": 25820 }, { "epoch": 0.9386583327276692, "grad_norm": 2.1937880516052246, "learning_rate": 4.9220879820730844e-05, "loss": 0.1684, "step": 25830 }, { "epoch": 0.9390217312304673, "grad_norm": 1.5964059829711914, "learning_rate": 4.921959387250199e-05, "loss": 0.1897, "step": 25840 }, { "epoch": 0.9393851297332655, "grad_norm": 6.693167209625244, "learning_rate": 4.921830688073701e-05, "loss": 0.2155, "step": 25850 }, { "epoch": 0.9397485282360637, "grad_norm": 1.679046869277954, "learning_rate": 4.921701884549136e-05, "loss": 0.1566, "step": 25860 }, { "epoch": 0.9401119267388618, "grad_norm": 0.648047924041748, "learning_rate": 4.9215729766820536e-05, "loss": 0.1398, "step": 25870 }, { "epoch": 0.94047532524166, "grad_norm": 0.7286267876625061, "learning_rate": 4.921443964478007e-05, "loss": 0.1598, "step": 25880 }, { "epoch": 0.9408387237444582, "grad_norm": 1.3676726818084717, "learning_rate": 4.921314847942555e-05, "loss": 0.1627, "step": 25890 }, { "epoch": 0.9412021222472563, "grad_norm": 11.982099533081055, "learning_rate": 4.921185627081263e-05, "loss": 0.2181, "step": 25900 }, { "epoch": 0.9415655207500545, "grad_norm": 0.8863544464111328, "learning_rate": 4.9210563018996955e-05, "loss": 0.1296, "step": 25910 }, { "epoch": 0.9419289192528527, "grad_norm": 0.8388992547988892, "learning_rate": 4.9209268724034265e-05, "loss": 0.1406, "step": 25920 }, { "epoch": 0.9422923177556508, "grad_norm": 2.4800333976745605, "learning_rate": 4.9207973385980324e-05, "loss": 0.1694, "step": 25930 }, { "epoch": 0.942655716258449, "grad_norm": 4.2597174644470215, "learning_rate": 4.920667700489093e-05, "loss": 0.9439, "step": 25940 }, { "epoch": 0.9430191147612472, "grad_norm": 5.32108736038208, "learning_rate": 4.920537958082196e-05, "loss": 0.1745, "step": 25950 }, { "epoch": 0.9433825132640453, "grad_norm": 1.3563112020492554, "learning_rate": 4.9204081113829316e-05, "loss": 0.1554, "step": 25960 }, { "epoch": 0.9437459117668435, "grad_norm": 8.575587272644043, "learning_rate": 4.9202781603968926e-05, "loss": 0.2015, "step": 25970 }, { "epoch": 0.9441093102696417, "grad_norm": 6.85026216506958, "learning_rate": 4.920148105129679e-05, "loss": 0.1548, "step": 25980 }, { "epoch": 0.9444727087724398, "grad_norm": 1.2886810302734375, "learning_rate": 4.9200179455868944e-05, "loss": 0.136, "step": 25990 }, { "epoch": 0.944836107275238, "grad_norm": 2.0779457092285156, "learning_rate": 4.919887681774148e-05, "loss": 0.1744, "step": 26000 }, { "epoch": 0.9451995057780362, "grad_norm": Infinity, "learning_rate": 4.919770355196496e-05, "loss": 2.7706, "step": 26010 }, { "epoch": 0.9455629042808343, "grad_norm": 0.9514101147651672, "learning_rate": 4.919639893286285e-05, "loss": 0.1435, "step": 26020 }, { "epoch": 0.9459263027836325, "grad_norm": 0.3761270046234131, "learning_rate": 4.9195093271224016e-05, "loss": 0.1525, "step": 26030 }, { "epoch": 0.9462897012864308, "grad_norm": 1.2147834300994873, "learning_rate": 4.919378656710469e-05, "loss": 0.1922, "step": 26040 }, { "epoch": 0.9466530997892288, "grad_norm": 15.408570289611816, "learning_rate": 4.919247882056119e-05, "loss": 0.2773, "step": 26050 }, { "epoch": 0.947016498292027, "grad_norm": 2.2306370735168457, "learning_rate": 4.919117003164985e-05, "loss": 0.1446, "step": 26060 }, { "epoch": 0.9473798967948253, "grad_norm": 1.3414242267608643, "learning_rate": 4.918986020042706e-05, "loss": 0.1484, "step": 26070 }, { "epoch": 0.9477432952976234, "grad_norm": 1.9740337133407593, "learning_rate": 4.9188549326949275e-05, "loss": 0.1845, "step": 26080 }, { "epoch": 0.9481066938004216, "grad_norm": 0.7002670764923096, "learning_rate": 4.9187237411272955e-05, "loss": 0.1559, "step": 26090 }, { "epoch": 0.9484700923032197, "grad_norm": 8.308074951171875, "learning_rate": 4.9185924453454635e-05, "loss": 0.223, "step": 26100 }, { "epoch": 0.9488334908060179, "grad_norm": 0.8129051327705383, "learning_rate": 4.9184610453550884e-05, "loss": 0.1459, "step": 26110 }, { "epoch": 0.9491968893088161, "grad_norm": 1.5998592376708984, "learning_rate": 4.918329541161831e-05, "loss": 0.1394, "step": 26120 }, { "epoch": 0.9495602878116142, "grad_norm": 1.8726842403411865, "learning_rate": 4.918197932771359e-05, "loss": 0.1859, "step": 26130 }, { "epoch": 0.9499236863144124, "grad_norm": 1.1915557384490967, "learning_rate": 4.9180662201893424e-05, "loss": 0.1621, "step": 26140 }, { "epoch": 0.9502870848172106, "grad_norm": 6.970126152038574, "learning_rate": 4.917934403421455e-05, "loss": 0.2613, "step": 26150 }, { "epoch": 0.9506504833200087, "grad_norm": 1.0738050937652588, "learning_rate": 4.9178024824733776e-05, "loss": 0.1383, "step": 26160 }, { "epoch": 0.9510138818228069, "grad_norm": 2.1130123138427734, "learning_rate": 4.9176704573507933e-05, "loss": 0.222, "step": 26170 }, { "epoch": 0.9513772803256051, "grad_norm": 3.1722593307495117, "learning_rate": 4.9175383280593925e-05, "loss": 0.1624, "step": 26180 }, { "epoch": 0.9517406788284032, "grad_norm": 0.9101456999778748, "learning_rate": 4.917406094604865e-05, "loss": 0.16, "step": 26190 }, { "epoch": 0.9521040773312014, "grad_norm": 3.284672260284424, "learning_rate": 4.917273756992911e-05, "loss": 0.1817, "step": 26200 }, { "epoch": 0.9524674758339996, "grad_norm": 2.2083284854888916, "learning_rate": 4.917141315229232e-05, "loss": 0.169, "step": 26210 }, { "epoch": 0.9528308743367977, "grad_norm": 4.354351997375488, "learning_rate": 4.9170087693195335e-05, "loss": 0.2629, "step": 26220 }, { "epoch": 0.9531942728395959, "grad_norm": 2.520522117614746, "learning_rate": 4.916876119269526e-05, "loss": 0.1852, "step": 26230 }, { "epoch": 0.9535576713423941, "grad_norm": 0.8573399186134338, "learning_rate": 4.9167433650849264e-05, "loss": 0.1524, "step": 26240 }, { "epoch": 0.9539210698451922, "grad_norm": 6.314918041229248, "learning_rate": 4.916610506771454e-05, "loss": 0.2685, "step": 26250 }, { "epoch": 0.9542844683479904, "grad_norm": 3.0610973834991455, "learning_rate": 4.916477544334833e-05, "loss": 0.1374, "step": 26260 }, { "epoch": 0.9546478668507886, "grad_norm": 0.9085964560508728, "learning_rate": 4.916344477780793e-05, "loss": 0.1754, "step": 26270 }, { "epoch": 0.9550112653535867, "grad_norm": 3.8550243377685547, "learning_rate": 4.916211307115067e-05, "loss": 0.3546, "step": 26280 }, { "epoch": 0.9553746638563849, "grad_norm": 5.278194904327393, "learning_rate": 4.916078032343392e-05, "loss": 0.1298, "step": 26290 }, { "epoch": 0.9557380623591831, "grad_norm": 2.707965612411499, "learning_rate": 4.9159446534715116e-05, "loss": 0.1689, "step": 26300 }, { "epoch": 0.9561014608619812, "grad_norm": 1.3821223974227905, "learning_rate": 4.9158111705051716e-05, "loss": 0.2117, "step": 26310 }, { "epoch": 0.9564648593647794, "grad_norm": 1.0195057392120361, "learning_rate": 4.915677583450123e-05, "loss": 0.1151, "step": 26320 }, { "epoch": 0.9568282578675776, "grad_norm": 2.078343629837036, "learning_rate": 4.915543892312124e-05, "loss": 0.1433, "step": 26330 }, { "epoch": 0.9571916563703757, "grad_norm": 1.6972254514694214, "learning_rate": 4.915410097096932e-05, "loss": 0.1307, "step": 26340 }, { "epoch": 0.9575550548731739, "grad_norm": 4.440702438354492, "learning_rate": 4.915276197810313e-05, "loss": 0.1806, "step": 26350 }, { "epoch": 0.9579184533759721, "grad_norm": 0.778567373752594, "learning_rate": 4.9151421944580374e-05, "loss": 0.177, "step": 26360 }, { "epoch": 0.9582818518787702, "grad_norm": 1.2955224514007568, "learning_rate": 4.915008087045877e-05, "loss": 0.1395, "step": 26370 }, { "epoch": 0.9586452503815684, "grad_norm": 2.077195405960083, "learning_rate": 4.9148738755796104e-05, "loss": 0.1583, "step": 26380 }, { "epoch": 0.9590086488843665, "grad_norm": 0.8736408352851868, "learning_rate": 4.914739560065021e-05, "loss": 0.1284, "step": 26390 }, { "epoch": 0.9593720473871648, "grad_norm": 2.9465060234069824, "learning_rate": 4.914605140507895e-05, "loss": 0.1929, "step": 26400 }, { "epoch": 0.9593720473871648, "eval_loss": 0.3894718587398529, "eval_runtime": 180.7535, "eval_samples_per_second": 41.017, "eval_steps_per_second": 5.129, "eval_wer": 0.18666836095630548, "step": 26400 }, { "epoch": 0.959735445889963, "grad_norm": 0.7856747508049011, "learning_rate": 4.9144706169140256e-05, "loss": 0.1548, "step": 26410 }, { "epoch": 0.960098844392761, "grad_norm": 3.3245174884796143, "learning_rate": 4.914335989289208e-05, "loss": 0.1328, "step": 26420 }, { "epoch": 0.9604622428955593, "grad_norm": 4.848336219787598, "learning_rate": 4.914201257639243e-05, "loss": 0.145, "step": 26430 }, { "epoch": 0.9608256413983575, "grad_norm": 2.2661678791046143, "learning_rate": 4.9140664219699344e-05, "loss": 0.1482, "step": 26440 }, { "epoch": 0.9611890399011556, "grad_norm": 6.279752731323242, "learning_rate": 4.913931482287094e-05, "loss": 0.2087, "step": 26450 }, { "epoch": 0.9615524384039538, "grad_norm": 0.9201165437698364, "learning_rate": 4.913796438596534e-05, "loss": 0.1641, "step": 26460 }, { "epoch": 0.961915836906752, "grad_norm": 1.0935853719711304, "learning_rate": 4.9136612909040746e-05, "loss": 0.1678, "step": 26470 }, { "epoch": 0.9622792354095501, "grad_norm": 5.511369705200195, "learning_rate": 4.913526039215538e-05, "loss": 0.2284, "step": 26480 }, { "epoch": 0.9626426339123483, "grad_norm": 0.8109707832336426, "learning_rate": 4.913390683536751e-05, "loss": 0.1314, "step": 26490 }, { "epoch": 0.9630060324151465, "grad_norm": 20.594274520874023, "learning_rate": 4.9132552238735464e-05, "loss": 0.2612, "step": 26500 }, { "epoch": 0.9633694309179446, "grad_norm": 40.1435661315918, "learning_rate": 4.913119660231761e-05, "loss": 0.5943, "step": 26510 }, { "epoch": 0.9637328294207428, "grad_norm": 0.6886749863624573, "learning_rate": 4.912983992617235e-05, "loss": 0.1445, "step": 26520 }, { "epoch": 0.964096227923541, "grad_norm": 4.847496032714844, "learning_rate": 4.912848221035815e-05, "loss": 0.1645, "step": 26530 }, { "epoch": 0.9644596264263391, "grad_norm": 1.3625943660736084, "learning_rate": 4.912712345493349e-05, "loss": 0.1403, "step": 26540 }, { "epoch": 0.9648230249291373, "grad_norm": 6.022468090057373, "learning_rate": 4.9125763659956934e-05, "loss": 0.2215, "step": 26550 }, { "epoch": 0.9651864234319355, "grad_norm": 1.5577186346054077, "learning_rate": 4.912440282548706e-05, "loss": 0.1401, "step": 26560 }, { "epoch": 0.9655498219347336, "grad_norm": 1.5680512189865112, "learning_rate": 4.91230409515825e-05, "loss": 0.1373, "step": 26570 }, { "epoch": 0.9659132204375318, "grad_norm": 2.3043782711029053, "learning_rate": 4.912167803830193e-05, "loss": 0.1501, "step": 26580 }, { "epoch": 0.96627661894033, "grad_norm": 1.5168496370315552, "learning_rate": 4.912031408570409e-05, "loss": 0.1665, "step": 26590 }, { "epoch": 0.9666400174431281, "grad_norm": 10.529095649719238, "learning_rate": 4.911894909384773e-05, "loss": 0.1642, "step": 26600 }, { "epoch": 0.9670034159459263, "grad_norm": 2.458815097808838, "learning_rate": 4.911758306279167e-05, "loss": 3.3307, "step": 26610 }, { "epoch": 0.9673668144487245, "grad_norm": 1.2745519876480103, "learning_rate": 4.911621599259477e-05, "loss": 0.1681, "step": 26620 }, { "epoch": 0.9677302129515226, "grad_norm": 1.877960443496704, "learning_rate": 4.911484788331593e-05, "loss": 0.1445, "step": 26630 }, { "epoch": 0.9680936114543208, "grad_norm": 1.3567255735397339, "learning_rate": 4.911347873501408e-05, "loss": 0.132, "step": 26640 }, { "epoch": 0.968457009957119, "grad_norm": 3.44063138961792, "learning_rate": 4.911210854774825e-05, "loss": 0.205, "step": 26650 }, { "epoch": 0.9688204084599171, "grad_norm": 5.335951805114746, "learning_rate": 4.911073732157744e-05, "loss": 0.1475, "step": 26660 }, { "epoch": 0.9691838069627153, "grad_norm": 3.0675578117370605, "learning_rate": 4.910936505656074e-05, "loss": 0.1602, "step": 26670 }, { "epoch": 0.9695472054655134, "grad_norm": 5.95693826675415, "learning_rate": 4.910799175275729e-05, "loss": 0.1888, "step": 26680 }, { "epoch": 0.9699106039683116, "grad_norm": 1.7128913402557373, "learning_rate": 4.910661741022625e-05, "loss": 0.1402, "step": 26690 }, { "epoch": 0.9702740024711098, "grad_norm": 11.855730056762695, "learning_rate": 4.9105242029026844e-05, "loss": 0.1939, "step": 26700 }, { "epoch": 0.9706374009739079, "grad_norm": 2.21028208732605, "learning_rate": 4.910386560921831e-05, "loss": 0.1365, "step": 26710 }, { "epoch": 0.9710007994767061, "grad_norm": 3.6761391162872314, "learning_rate": 4.910248815085998e-05, "loss": 0.1661, "step": 26720 }, { "epoch": 0.9713641979795044, "grad_norm": 1.9474952220916748, "learning_rate": 4.9101109654011196e-05, "loss": 0.1176, "step": 26730 }, { "epoch": 0.9717275964823024, "grad_norm": 4.190001010894775, "learning_rate": 4.909973011873135e-05, "loss": 0.1426, "step": 26740 }, { "epoch": 0.9720909949851007, "grad_norm": 2.785562753677368, "learning_rate": 4.909834954507987e-05, "loss": 0.201, "step": 26750 }, { "epoch": 0.9724543934878989, "grad_norm": 2.296952724456787, "learning_rate": 4.909696793311625e-05, "loss": 1.5478, "step": 26760 }, { "epoch": 0.972817791990697, "grad_norm": 1.471690058708191, "learning_rate": 4.909558528290002e-05, "loss": 0.1254, "step": 26770 }, { "epoch": 0.9731811904934952, "grad_norm": 5.213918685913086, "learning_rate": 4.9094340010048675e-05, "loss": 0.8881, "step": 26780 }, { "epoch": 0.9735445889962934, "grad_norm": 1.5338894128799438, "learning_rate": 4.909295538731665e-05, "loss": 0.1621, "step": 26790 }, { "epoch": 0.9739079874990915, "grad_norm": 4.493140697479248, "learning_rate": 4.909156972650491e-05, "loss": 0.1653, "step": 26800 }, { "epoch": 0.9742713860018897, "grad_norm": 0.9602924585342407, "learning_rate": 4.909018302767313e-05, "loss": 0.1388, "step": 26810 }, { "epoch": 0.9746347845046879, "grad_norm": 1.038445234298706, "learning_rate": 4.9088795290881085e-05, "loss": 0.145, "step": 26820 }, { "epoch": 0.974998183007486, "grad_norm": 3.1368119716644287, "learning_rate": 4.908740651618856e-05, "loss": 0.1732, "step": 26830 }, { "epoch": 0.9753615815102842, "grad_norm": 0.6875894069671631, "learning_rate": 4.908601670365539e-05, "loss": 0.1582, "step": 26840 }, { "epoch": 0.9757249800130824, "grad_norm": 14.604360580444336, "learning_rate": 4.908462585334146e-05, "loss": 0.228, "step": 26850 }, { "epoch": 0.9760883785158805, "grad_norm": 1.819300889968872, "learning_rate": 4.9083233965306694e-05, "loss": 0.5066, "step": 26860 }, { "epoch": 0.9764517770186787, "grad_norm": 5.712610721588135, "learning_rate": 4.908184103961106e-05, "loss": 0.2109, "step": 26870 }, { "epoch": 0.9768151755214769, "grad_norm": 2.8377017974853516, "learning_rate": 4.908044707631459e-05, "loss": 0.1417, "step": 26880 }, { "epoch": 0.977178574024275, "grad_norm": 1.0483819246292114, "learning_rate": 4.907905207547733e-05, "loss": 0.1526, "step": 26890 }, { "epoch": 0.9775419725270732, "grad_norm": 3.996112823486328, "learning_rate": 4.907765603715938e-05, "loss": 0.6109, "step": 26900 }, { "epoch": 0.9779053710298714, "grad_norm": 1.336004614830017, "learning_rate": 4.907625896142091e-05, "loss": 0.8939, "step": 26910 }, { "epoch": 0.9782687695326695, "grad_norm": 0.9394060373306274, "learning_rate": 4.907486084832211e-05, "loss": 0.1515, "step": 26920 }, { "epoch": 0.9786321680354677, "grad_norm": 2.356201410293579, "learning_rate": 4.907346169792321e-05, "loss": 0.1567, "step": 26930 }, { "epoch": 0.9789955665382659, "grad_norm": 0.926143229007721, "learning_rate": 4.907206151028449e-05, "loss": 0.1669, "step": 26940 }, { "epoch": 0.979358965041064, "grad_norm": 4.815629482269287, "learning_rate": 4.90706602854663e-05, "loss": 0.2068, "step": 26950 }, { "epoch": 0.9797223635438622, "grad_norm": 1.3679453134536743, "learning_rate": 4.906925802352899e-05, "loss": 0.15, "step": 26960 }, { "epoch": 0.9800857620466603, "grad_norm": 1.1221717596054077, "learning_rate": 4.9067854724533e-05, "loss": 0.223, "step": 26970 }, { "epoch": 0.9804491605494585, "grad_norm": 2.198657512664795, "learning_rate": 4.906645038853878e-05, "loss": 0.1662, "step": 26980 }, { "epoch": 0.9808125590522567, "grad_norm": 1.7014293670654297, "learning_rate": 4.906504501560684e-05, "loss": 0.1601, "step": 26990 }, { "epoch": 0.9811759575550548, "grad_norm": 8.204177856445312, "learning_rate": 4.906363860579774e-05, "loss": 0.2345, "step": 27000 }, { "epoch": 0.9811759575550548, "eval_loss": 0.3534720242023468, "eval_runtime": 180.0448, "eval_samples_per_second": 41.179, "eval_steps_per_second": 5.149, "eval_wer": 0.19012652712981284, "step": 27000 }, { "epoch": 0.981539356057853, "grad_norm": 1.1557930707931519, "learning_rate": 4.906223115917207e-05, "loss": 0.1357, "step": 27010 }, { "epoch": 0.9819027545606512, "grad_norm": 0.7808053493499756, "learning_rate": 4.906082267579047e-05, "loss": 0.1366, "step": 27020 }, { "epoch": 0.9822661530634493, "grad_norm": 1.4547855854034424, "learning_rate": 4.9059413155713626e-05, "loss": 0.1472, "step": 27030 }, { "epoch": 0.9826295515662475, "grad_norm": 0.5997300148010254, "learning_rate": 4.9058002599002275e-05, "loss": 0.1455, "step": 27040 }, { "epoch": 0.9829929500690457, "grad_norm": 12.663732528686523, "learning_rate": 4.90565910057172e-05, "loss": 0.2728, "step": 27050 }, { "epoch": 0.9833563485718438, "grad_norm": 0.9098420739173889, "learning_rate": 4.9055178375919196e-05, "loss": 0.1467, "step": 27060 }, { "epoch": 0.983719747074642, "grad_norm": 3.4135582447052, "learning_rate": 4.9053764709669156e-05, "loss": 0.2571, "step": 27070 }, { "epoch": 0.9840831455774403, "grad_norm": 1.7984899282455444, "learning_rate": 4.905235000702798e-05, "loss": 0.2026, "step": 27080 }, { "epoch": 0.9844465440802384, "grad_norm": 1.2022316455841064, "learning_rate": 4.9050934268056615e-05, "loss": 0.1378, "step": 27090 }, { "epoch": 0.9848099425830366, "grad_norm": 1.499773621559143, "learning_rate": 4.9049517492816066e-05, "loss": 0.1765, "step": 27100 }, { "epoch": 0.9851733410858348, "grad_norm": 0.8122308254241943, "learning_rate": 4.9048099681367377e-05, "loss": 0.1422, "step": 27110 }, { "epoch": 0.9855367395886329, "grad_norm": 1.203873872756958, "learning_rate": 4.904668083377164e-05, "loss": 0.1436, "step": 27120 }, { "epoch": 0.9859001380914311, "grad_norm": 1.6162346601486206, "learning_rate": 4.9045260950089976e-05, "loss": 0.1667, "step": 27130 }, { "epoch": 0.9862635365942293, "grad_norm": 0.5100680589675903, "learning_rate": 4.904384003038358e-05, "loss": 0.1573, "step": 27140 }, { "epoch": 0.9866269350970274, "grad_norm": 6.364781379699707, "learning_rate": 4.904241807471366e-05, "loss": 0.2245, "step": 27150 }, { "epoch": 0.9869903335998256, "grad_norm": 0.6124529242515564, "learning_rate": 4.9040995083141495e-05, "loss": 0.2296, "step": 27160 }, { "epoch": 0.9873537321026238, "grad_norm": 1.3477269411087036, "learning_rate": 4.903957105572838e-05, "loss": 0.1173, "step": 27170 }, { "epoch": 0.9877171306054219, "grad_norm": 0.8505461812019348, "learning_rate": 4.903814599253569e-05, "loss": 0.1913, "step": 27180 }, { "epoch": 0.9880805291082201, "grad_norm": 0.928269624710083, "learning_rate": 4.903671989362481e-05, "loss": 0.1449, "step": 27190 }, { "epoch": 0.9884439276110183, "grad_norm": 9.115983963012695, "learning_rate": 4.903529275905719e-05, "loss": 0.2025, "step": 27200 }, { "epoch": 0.9888073261138164, "grad_norm": 0.8631362318992615, "learning_rate": 4.903386458889434e-05, "loss": 0.1313, "step": 27210 }, { "epoch": 0.9891707246166146, "grad_norm": 1.5814399719238281, "learning_rate": 4.9032435383197764e-05, "loss": 0.1547, "step": 27220 }, { "epoch": 0.9895341231194128, "grad_norm": 2.2507669925689697, "learning_rate": 4.9031005142029054e-05, "loss": 2.0754, "step": 27230 }, { "epoch": 0.9898975216222109, "grad_norm": 2.0611305236816406, "learning_rate": 4.902957386544984e-05, "loss": 0.1351, "step": 27240 }, { "epoch": 0.9902609201250091, "grad_norm": 4.219666481018066, "learning_rate": 4.9028141553521785e-05, "loss": 0.194, "step": 27250 }, { "epoch": 0.9906243186278072, "grad_norm": 2.4156904220581055, "learning_rate": 4.90267082063066e-05, "loss": 1.9594, "step": 27260 }, { "epoch": 0.9909877171306054, "grad_norm": 4.805545806884766, "learning_rate": 4.9025273823866046e-05, "loss": 0.1608, "step": 27270 }, { "epoch": 0.9913511156334036, "grad_norm": 3.431521415710449, "learning_rate": 4.902383840626193e-05, "loss": 0.1439, "step": 27280 }, { "epoch": 0.9917145141362017, "grad_norm": 0.9847241640090942, "learning_rate": 4.902240195355609e-05, "loss": 0.1568, "step": 27290 }, { "epoch": 0.9920779126389999, "grad_norm": 4.65169095993042, "learning_rate": 4.9020964465810426e-05, "loss": 0.2039, "step": 27300 }, { "epoch": 0.9924413111417981, "grad_norm": 1.466956377029419, "learning_rate": 4.9019525943086865e-05, "loss": 0.1649, "step": 27310 }, { "epoch": 0.9928047096445962, "grad_norm": 4.803518772125244, "learning_rate": 4.901808638544739e-05, "loss": 0.1645, "step": 27320 }, { "epoch": 0.9931681081473944, "grad_norm": 3.4496331214904785, "learning_rate": 4.901664579295404e-05, "loss": 0.1751, "step": 27330 }, { "epoch": 0.9935315066501926, "grad_norm": 0.9507334232330322, "learning_rate": 4.9015204165668866e-05, "loss": 0.1228, "step": 27340 }, { "epoch": 0.9938949051529907, "grad_norm": 5.97396993637085, "learning_rate": 4.901376150365399e-05, "loss": 0.2178, "step": 27350 }, { "epoch": 0.9942583036557889, "grad_norm": 1.7720214128494263, "learning_rate": 4.9012317806971573e-05, "loss": 0.1468, "step": 27360 }, { "epoch": 0.9946217021585871, "grad_norm": 0.9194307923316956, "learning_rate": 4.9010873075683825e-05, "loss": 0.1481, "step": 27370 }, { "epoch": 0.9949851006613852, "grad_norm": 2.8458971977233887, "learning_rate": 4.9009427309852986e-05, "loss": 0.1402, "step": 27380 }, { "epoch": 0.9953484991641834, "grad_norm": 1.9232338666915894, "learning_rate": 4.900798050954134e-05, "loss": 0.155, "step": 27390 }, { "epoch": 0.9957118976669816, "grad_norm": 4.017787456512451, "learning_rate": 4.900653267481125e-05, "loss": 0.2279, "step": 27400 }, { "epoch": 0.9960752961697797, "grad_norm": 0.714726448059082, "learning_rate": 4.9005083805725064e-05, "loss": 0.1271, "step": 27410 }, { "epoch": 0.996438694672578, "grad_norm": 0.8059016466140747, "learning_rate": 4.900363390234524e-05, "loss": 0.138, "step": 27420 }, { "epoch": 0.9968020931753762, "grad_norm": 2.650024175643921, "learning_rate": 4.9002182964734234e-05, "loss": 0.1483, "step": 27430 }, { "epoch": 0.9971654916781743, "grad_norm": 1.200749397277832, "learning_rate": 4.900073099295456e-05, "loss": 0.9832, "step": 27440 }, { "epoch": 0.9975288901809725, "grad_norm": 3.3051798343658447, "learning_rate": 4.8999277987068785e-05, "loss": 0.2136, "step": 27450 }, { "epoch": 0.9978922886837707, "grad_norm": 1.3630801439285278, "learning_rate": 4.899782394713951e-05, "loss": 0.2136, "step": 27460 }, { "epoch": 0.9982556871865688, "grad_norm": 2.5952398777008057, "learning_rate": 4.899636887322939e-05, "loss": 0.1515, "step": 27470 }, { "epoch": 0.998619085689367, "grad_norm": 5.025683879852295, "learning_rate": 4.8994912765401116e-05, "loss": 0.1862, "step": 27480 }, { "epoch": 0.9989824841921652, "grad_norm": 1.1604958772659302, "learning_rate": 4.8993455623717415e-05, "loss": 0.1433, "step": 27490 }, { "epoch": 0.9993458826949633, "grad_norm": 14.864492416381836, "learning_rate": 4.899199744824109e-05, "loss": 0.1752, "step": 27500 }, { "epoch": 0.9997092811977615, "grad_norm": 1.072911024093628, "learning_rate": 4.8990538239034956e-05, "loss": 0.1524, "step": 27510 }, { "epoch": 1.0000726797005597, "grad_norm": 0.41248244047164917, "learning_rate": 4.898907799616188e-05, "loss": 0.1457, "step": 27520 }, { "epoch": 1.000436078203358, "grad_norm": 1.0402699708938599, "learning_rate": 4.89876167196848e-05, "loss": 0.1394, "step": 27530 }, { "epoch": 1.0007994767061559, "grad_norm": 0.8177555203437805, "learning_rate": 4.8986154409666654e-05, "loss": 0.1134, "step": 27540 }, { "epoch": 1.001162875208954, "grad_norm": 1.1209142208099365, "learning_rate": 4.8984691066170465e-05, "loss": 0.1574, "step": 27550 }, { "epoch": 1.0015262737117523, "grad_norm": 1.4969863891601562, "learning_rate": 4.8983226689259264e-05, "loss": 0.1144, "step": 27560 }, { "epoch": 1.0018896722145505, "grad_norm": 0.8014885783195496, "learning_rate": 4.898176127899617e-05, "loss": 0.1616, "step": 27570 }, { "epoch": 1.0022530707173487, "grad_norm": 1.1477352380752563, "learning_rate": 4.89802948354443e-05, "loss": 0.1209, "step": 27580 }, { "epoch": 1.002616469220147, "grad_norm": 1.0199166536331177, "learning_rate": 4.897882735866686e-05, "loss": 0.1422, "step": 27590 }, { "epoch": 1.002979867722945, "grad_norm": 0.8987438678741455, "learning_rate": 4.897735884872705e-05, "loss": 0.1328, "step": 27600 }, { "epoch": 1.002979867722945, "eval_loss": 0.38110727071762085, "eval_runtime": 180.6687, "eval_samples_per_second": 41.036, "eval_steps_per_second": 5.131, "eval_wer": 0.18068691342785048, "step": 27600 }, { "epoch": 1.003343266225743, "grad_norm": 1.1313180923461914, "learning_rate": 4.897588930568817e-05, "loss": 0.1395, "step": 27610 }, { "epoch": 1.0037066647285413, "grad_norm": 0.6739907264709473, "learning_rate": 4.8974418729613526e-05, "loss": 0.2011, "step": 27620 }, { "epoch": 1.0040700632313395, "grad_norm": 0.986926257610321, "learning_rate": 4.8972947120566475e-05, "loss": 0.1212, "step": 27630 }, { "epoch": 1.0044334617341377, "grad_norm": 0.795300304889679, "learning_rate": 4.8971474478610437e-05, "loss": 0.1404, "step": 27640 }, { "epoch": 1.004796860236936, "grad_norm": 1.7036499977111816, "learning_rate": 4.897000080380885e-05, "loss": 0.1677, "step": 27650 }, { "epoch": 1.005160258739734, "grad_norm": 1.4313631057739258, "learning_rate": 4.896852609622521e-05, "loss": 0.1188, "step": 27660 }, { "epoch": 1.0055236572425321, "grad_norm": 0.5508180260658264, "learning_rate": 4.896705035592306e-05, "loss": 0.1627, "step": 27670 }, { "epoch": 1.0058870557453303, "grad_norm": 2.3307416439056396, "learning_rate": 4.896557358296599e-05, "loss": 0.1173, "step": 27680 }, { "epoch": 1.0062504542481285, "grad_norm": 3.0311474800109863, "learning_rate": 4.896409577741762e-05, "loss": 0.1176, "step": 27690 }, { "epoch": 1.0066138527509267, "grad_norm": 1.8580576181411743, "learning_rate": 4.896261693934163e-05, "loss": 0.1647, "step": 27700 }, { "epoch": 1.0069772512537247, "grad_norm": 1.094754934310913, "learning_rate": 4.896113706880174e-05, "loss": 0.1137, "step": 27710 }, { "epoch": 1.007340649756523, "grad_norm": 0.8240002393722534, "learning_rate": 4.89596561658617e-05, "loss": 0.1835, "step": 27720 }, { "epoch": 1.0077040482593211, "grad_norm": 1.4678568840026855, "learning_rate": 4.895817423058533e-05, "loss": 0.1612, "step": 27730 }, { "epoch": 1.0080674467621193, "grad_norm": 0.8481863737106323, "learning_rate": 4.8956691263036473e-05, "loss": 0.1211, "step": 27740 }, { "epoch": 1.0084308452649176, "grad_norm": 5.045682907104492, "learning_rate": 4.895520726327903e-05, "loss": 0.2747, "step": 27750 }, { "epoch": 1.0087942437677158, "grad_norm": 2.3443167209625244, "learning_rate": 4.895372223137694e-05, "loss": 0.1437, "step": 27760 }, { "epoch": 1.0091576422705137, "grad_norm": 1.6994588375091553, "learning_rate": 4.895223616739418e-05, "loss": 0.16, "step": 27770 }, { "epoch": 1.009521040773312, "grad_norm": 2.073699712753296, "learning_rate": 4.8950749071394794e-05, "loss": 0.1341, "step": 27780 }, { "epoch": 1.0098844392761102, "grad_norm": 1.1939536333084106, "learning_rate": 4.894926094344284e-05, "loss": 0.1284, "step": 27790 }, { "epoch": 1.0102478377789084, "grad_norm": 1.4820387363433838, "learning_rate": 4.8947771783602444e-05, "loss": 0.1644, "step": 27800 }, { "epoch": 1.0106112362817066, "grad_norm": 1.8140612840652466, "learning_rate": 4.894628159193778e-05, "loss": 0.1681, "step": 27810 }, { "epoch": 1.0109746347845048, "grad_norm": 1.7120946645736694, "learning_rate": 4.894479036851303e-05, "loss": 0.2066, "step": 27820 }, { "epoch": 1.0113380332873028, "grad_norm": 1.0871057510375977, "learning_rate": 4.894329811339247e-05, "loss": 0.1428, "step": 27830 }, { "epoch": 1.011701431790101, "grad_norm": 0.897597074508667, "learning_rate": 4.8941804826640375e-05, "loss": 0.1202, "step": 27840 }, { "epoch": 1.0120648302928992, "grad_norm": 1.2489410638809204, "learning_rate": 4.89403105083211e-05, "loss": 0.1435, "step": 27850 }, { "epoch": 1.0124282287956974, "grad_norm": 1.043281078338623, "learning_rate": 4.893881515849902e-05, "loss": 0.1657, "step": 27860 }, { "epoch": 1.0127916272984956, "grad_norm": 1.0345379114151, "learning_rate": 4.893731877723857e-05, "loss": 0.1669, "step": 27870 }, { "epoch": 1.0131550258012938, "grad_norm": 3.5156590938568115, "learning_rate": 4.893582136460423e-05, "loss": 0.1356, "step": 27880 }, { "epoch": 1.0135184243040918, "grad_norm": 1.0468858480453491, "learning_rate": 4.893432292066051e-05, "loss": 0.1201, "step": 27890 }, { "epoch": 1.01388182280689, "grad_norm": 1.3872016668319702, "learning_rate": 4.893282344547197e-05, "loss": 0.1472, "step": 27900 }, { "epoch": 1.0142452213096882, "grad_norm": 0.83976811170578, "learning_rate": 4.893132293910322e-05, "loss": 0.1467, "step": 27910 }, { "epoch": 1.0146086198124864, "grad_norm": 0.625514566898346, "learning_rate": 4.892982140161892e-05, "loss": 0.1661, "step": 27920 }, { "epoch": 1.0149720183152846, "grad_norm": 1.0802186727523804, "learning_rate": 4.892831883308375e-05, "loss": 0.1444, "step": 27930 }, { "epoch": 1.0153354168180828, "grad_norm": 0.567722499370575, "learning_rate": 4.892681523356246e-05, "loss": 0.1003, "step": 27940 }, { "epoch": 1.0156988153208808, "grad_norm": 1.1036186218261719, "learning_rate": 4.892531060311985e-05, "loss": 0.1438, "step": 27950 }, { "epoch": 1.016062213823679, "grad_norm": 1.2610325813293457, "learning_rate": 4.892380494182071e-05, "loss": 0.1478, "step": 27960 }, { "epoch": 1.0164256123264772, "grad_norm": 4.7541913986206055, "learning_rate": 4.892229824972995e-05, "loss": 0.2068, "step": 27970 }, { "epoch": 1.0167890108292754, "grad_norm": 14.794916152954102, "learning_rate": 4.8920790526912464e-05, "loss": 0.4626, "step": 27980 }, { "epoch": 1.0171524093320736, "grad_norm": 0.8083056807518005, "learning_rate": 4.891928177343323e-05, "loss": 0.4919, "step": 27990 }, { "epoch": 1.0175158078348718, "grad_norm": 1.1072735786437988, "learning_rate": 4.8917771989357246e-05, "loss": 0.2863, "step": 28000 }, { "epoch": 1.0178792063376698, "grad_norm": 0.8811991810798645, "learning_rate": 4.891626117474957e-05, "loss": 0.1361, "step": 28010 }, { "epoch": 1.018242604840468, "grad_norm": 0.43256062269210815, "learning_rate": 4.8914749329675294e-05, "loss": 0.1668, "step": 28020 }, { "epoch": 1.0186060033432662, "grad_norm": 1.7490280866622925, "learning_rate": 4.891323645419956e-05, "loss": 0.1328, "step": 28030 }, { "epoch": 1.0189694018460644, "grad_norm": 1.5770010948181152, "learning_rate": 4.891172254838755e-05, "loss": 0.1429, "step": 28040 }, { "epoch": 1.0193328003488626, "grad_norm": 0.5603241920471191, "learning_rate": 4.8910207612304495e-05, "loss": 0.1319, "step": 28050 }, { "epoch": 1.0196961988516606, "grad_norm": 1.5490175485610962, "learning_rate": 4.890869164601566e-05, "loss": 0.1292, "step": 28060 }, { "epoch": 1.0200595973544588, "grad_norm": 0.7562422752380371, "learning_rate": 4.8907174649586376e-05, "loss": 0.1978, "step": 28070 }, { "epoch": 1.020422995857257, "grad_norm": 2.67669415473938, "learning_rate": 4.8905656623082e-05, "loss": 0.1367, "step": 28080 }, { "epoch": 1.0207863943600552, "grad_norm": 1.4589964151382446, "learning_rate": 4.890413756656793e-05, "loss": 0.136, "step": 28090 }, { "epoch": 1.0211497928628535, "grad_norm": 0.5042529702186584, "learning_rate": 4.8902617480109626e-05, "loss": 0.1768, "step": 28100 }, { "epoch": 1.0215131913656517, "grad_norm": 3.3886609077453613, "learning_rate": 4.890109636377258e-05, "loss": 0.1827, "step": 28110 }, { "epoch": 1.0218765898684496, "grad_norm": 0.8882365226745605, "learning_rate": 4.889957421762234e-05, "loss": 0.2176, "step": 28120 }, { "epoch": 1.0222399883712479, "grad_norm": 1.5471583604812622, "learning_rate": 4.889805104172447e-05, "loss": 0.1934, "step": 28130 }, { "epoch": 1.022603386874046, "grad_norm": 1.221699595451355, "learning_rate": 4.889652683614461e-05, "loss": 0.1217, "step": 28140 }, { "epoch": 1.0229667853768443, "grad_norm": 1.1075172424316406, "learning_rate": 4.8895001600948444e-05, "loss": 2.944, "step": 28150 }, { "epoch": 1.0233301838796425, "grad_norm": 0.9731149077415466, "learning_rate": 4.889347533620167e-05, "loss": 0.1123, "step": 28160 }, { "epoch": 1.0236935823824407, "grad_norm": 0.8448407649993896, "learning_rate": 4.889194804197006e-05, "loss": 0.1755, "step": 28170 }, { "epoch": 1.0240569808852387, "grad_norm": 0.8480188250541687, "learning_rate": 4.8890419718319414e-05, "loss": 0.1434, "step": 28180 }, { "epoch": 1.0244203793880369, "grad_norm": 1.5608705282211304, "learning_rate": 4.8888890365315584e-05, "loss": 0.1309, "step": 28190 }, { "epoch": 1.024783777890835, "grad_norm": 7.765607833862305, "learning_rate": 4.888735998302447e-05, "loss": 0.1584, "step": 28200 }, { "epoch": 1.024783777890835, "eval_loss": 0.3628901541233063, "eval_runtime": 180.7805, "eval_samples_per_second": 41.011, "eval_steps_per_second": 5.128, "eval_wer": 0.1830921996115236, "step": 28200 }, { "epoch": 1.0251471763936333, "grad_norm": 2.208989381790161, "learning_rate": 4.8885828571512e-05, "loss": 0.1206, "step": 28210 }, { "epoch": 1.0255105748964315, "grad_norm": 0.4320629835128784, "learning_rate": 4.8884296130844166e-05, "loss": 0.2776, "step": 28220 }, { "epoch": 1.0258739733992297, "grad_norm": 1.4430392980575562, "learning_rate": 4.888276266108699e-05, "loss": 0.1181, "step": 28230 }, { "epoch": 1.0262373719020277, "grad_norm": 0.893260657787323, "learning_rate": 4.888122816230655e-05, "loss": 0.1141, "step": 28240 }, { "epoch": 1.026600770404826, "grad_norm": 1.9237782955169678, "learning_rate": 4.887969263456895e-05, "loss": 0.1676, "step": 28250 }, { "epoch": 1.026964168907624, "grad_norm": 1.0318949222564697, "learning_rate": 4.8878156077940376e-05, "loss": 0.1256, "step": 28260 }, { "epoch": 1.0273275674104223, "grad_norm": 0.8919249773025513, "learning_rate": 4.8876618492487e-05, "loss": 0.2314, "step": 28270 }, { "epoch": 1.0276909659132205, "grad_norm": 1.31845223903656, "learning_rate": 4.8875079878275085e-05, "loss": 0.1414, "step": 28280 }, { "epoch": 1.0280543644160187, "grad_norm": 8.070326805114746, "learning_rate": 4.887354023537094e-05, "loss": 0.2391, "step": 28290 }, { "epoch": 1.0284177629188167, "grad_norm": 0.7600485682487488, "learning_rate": 4.887199956384088e-05, "loss": 0.164, "step": 28300 }, { "epoch": 1.028781161421615, "grad_norm": 1.0197162628173828, "learning_rate": 4.88704578637513e-05, "loss": 0.1324, "step": 28310 }, { "epoch": 1.0291445599244131, "grad_norm": 0.5989790558815002, "learning_rate": 4.886891513516861e-05, "loss": 0.2162, "step": 28320 }, { "epoch": 1.0295079584272113, "grad_norm": 1.2145419120788574, "learning_rate": 4.88673713781593e-05, "loss": 1.7629, "step": 28330 }, { "epoch": 1.0298713569300095, "grad_norm": 0.7220103740692139, "learning_rate": 4.8865826592789876e-05, "loss": 0.105, "step": 28340 }, { "epoch": 1.0302347554328075, "grad_norm": 1.2737821340560913, "learning_rate": 4.88642807791269e-05, "loss": 0.172, "step": 28350 }, { "epoch": 1.0305981539356057, "grad_norm": 2.3391408920288086, "learning_rate": 4.886273393723698e-05, "loss": 0.1431, "step": 28360 }, { "epoch": 1.030961552438404, "grad_norm": 1.1937615871429443, "learning_rate": 4.8861186067186756e-05, "loss": 0.1776, "step": 28370 }, { "epoch": 1.0313249509412021, "grad_norm": 0.5789287090301514, "learning_rate": 4.885963716904292e-05, "loss": 0.1412, "step": 28380 }, { "epoch": 1.0316883494440003, "grad_norm": 1.2566107511520386, "learning_rate": 4.885808724287221e-05, "loss": 0.1284, "step": 28390 }, { "epoch": 1.0320517479467985, "grad_norm": 5.225760459899902, "learning_rate": 4.885653628874141e-05, "loss": 0.1411, "step": 28400 }, { "epoch": 1.0324151464495965, "grad_norm": 1.2525557279586792, "learning_rate": 4.885498430671735e-05, "loss": 0.1372, "step": 28410 }, { "epoch": 1.0327785449523947, "grad_norm": 0.5048568844795227, "learning_rate": 4.885343129686688e-05, "loss": 0.1595, "step": 28420 }, { "epoch": 1.033141943455193, "grad_norm": 0.8768513202667236, "learning_rate": 4.8851877259256933e-05, "loss": 0.286, "step": 28430 }, { "epoch": 1.0335053419579912, "grad_norm": 1.2799090147018433, "learning_rate": 4.885032219395446e-05, "loss": 0.1431, "step": 28440 }, { "epoch": 1.0338687404607894, "grad_norm": 0.9944593906402588, "learning_rate": 4.8848766101026466e-05, "loss": 0.13, "step": 28450 }, { "epoch": 1.0342321389635876, "grad_norm": 1.3601889610290527, "learning_rate": 4.8847208980539994e-05, "loss": 0.1379, "step": 28460 }, { "epoch": 1.0345955374663856, "grad_norm": 0.6347102522850037, "learning_rate": 4.884565083256213e-05, "loss": 0.1833, "step": 28470 }, { "epoch": 1.0349589359691838, "grad_norm": NaN, "learning_rate": 4.884424762093241e-05, "loss": 3.779, "step": 28480 }, { "epoch": 1.035322334471982, "grad_norm": 1.6947808265686035, "learning_rate": 4.8842687520905906e-05, "loss": 0.1571, "step": 28490 }, { "epoch": 1.0356857329747802, "grad_norm": 4.521624565124512, "learning_rate": 4.884112639358283e-05, "loss": 0.1429, "step": 28500 }, { "epoch": 1.0360491314775784, "grad_norm": 1.9370489120483398, "learning_rate": 4.883956423903044e-05, "loss": 0.1375, "step": 28510 }, { "epoch": 1.0364125299803766, "grad_norm": 2.3492047786712646, "learning_rate": 4.883800105731606e-05, "loss": 0.1496, "step": 28520 }, { "epoch": 1.0367759284831746, "grad_norm": 1.1862452030181885, "learning_rate": 4.8836436848507026e-05, "loss": 0.1239, "step": 28530 }, { "epoch": 1.0371393269859728, "grad_norm": 2.223708391189575, "learning_rate": 4.883487161267074e-05, "loss": 0.1159, "step": 28540 }, { "epoch": 1.037502725488771, "grad_norm": 5.854187965393066, "learning_rate": 4.8833305349874636e-05, "loss": 0.1732, "step": 28550 }, { "epoch": 1.0378661239915692, "grad_norm": 1.4000542163848877, "learning_rate": 4.883173806018621e-05, "loss": 0.1428, "step": 28560 }, { "epoch": 1.0382295224943674, "grad_norm": 1.8862130641937256, "learning_rate": 4.883016974367298e-05, "loss": 0.2339, "step": 28570 }, { "epoch": 1.0385929209971656, "grad_norm": 1.701545238494873, "learning_rate": 4.8828600400402525e-05, "loss": 0.2063, "step": 28580 }, { "epoch": 1.0389563194999636, "grad_norm": 3.8795692920684814, "learning_rate": 4.8827030030442466e-05, "loss": 0.1317, "step": 28590 }, { "epoch": 1.0393197180027618, "grad_norm": 1.0597456693649292, "learning_rate": 4.882545863386046e-05, "loss": 0.1783, "step": 28600 }, { "epoch": 1.03968311650556, "grad_norm": 0.8949028849601746, "learning_rate": 4.88238862107242e-05, "loss": 0.1453, "step": 28610 }, { "epoch": 1.0400465150083582, "grad_norm": 0.6270145773887634, "learning_rate": 4.8822312761101456e-05, "loss": 0.2118, "step": 28620 }, { "epoch": 1.0404099135111564, "grad_norm": 0.8819754719734192, "learning_rate": 4.8820738285060016e-05, "loss": 0.1398, "step": 28630 }, { "epoch": 1.0407733120139544, "grad_norm": 1.5963236093521118, "learning_rate": 4.881916278266772e-05, "loss": 0.1592, "step": 28640 }, { "epoch": 1.0411367105167526, "grad_norm": 1.2960532903671265, "learning_rate": 4.8817586253992445e-05, "loss": 0.2044, "step": 28650 }, { "epoch": 1.0415001090195508, "grad_norm": 1.6735124588012695, "learning_rate": 4.881600869910212e-05, "loss": 0.143, "step": 28660 }, { "epoch": 1.041863507522349, "grad_norm": 1.2382493019104004, "learning_rate": 4.8814430118064724e-05, "loss": 0.182, "step": 28670 }, { "epoch": 1.0422269060251472, "grad_norm": 1.614788293838501, "learning_rate": 4.881285051094826e-05, "loss": 0.149, "step": 28680 }, { "epoch": 1.0425903045279454, "grad_norm": 1.549124002456665, "learning_rate": 4.88112698778208e-05, "loss": 0.1238, "step": 28690 }, { "epoch": 1.0429537030307434, "grad_norm": 0.8877584338188171, "learning_rate": 4.8809688218750435e-05, "loss": 0.1541, "step": 28700 }, { "epoch": 1.0433171015335416, "grad_norm": 1.1061103343963623, "learning_rate": 4.8808105533805325e-05, "loss": 0.1209, "step": 28710 }, { "epoch": 1.0436805000363398, "grad_norm": 1.8957878351211548, "learning_rate": 4.880652182305365e-05, "loss": 0.1739, "step": 28720 }, { "epoch": 1.044043898539138, "grad_norm": 0.9069591164588928, "learning_rate": 4.880493708656366e-05, "loss": 0.2014, "step": 28730 }, { "epoch": 1.0444072970419362, "grad_norm": 0.7086552381515503, "learning_rate": 4.880335132440364e-05, "loss": 0.1149, "step": 28740 }, { "epoch": 1.0447706955447345, "grad_norm": 0.5514993667602539, "learning_rate": 4.8801764536641883e-05, "loss": 0.163, "step": 28750 }, { "epoch": 1.0451340940475324, "grad_norm": 0.5786269903182983, "learning_rate": 4.880017672334679e-05, "loss": 0.126, "step": 28760 }, { "epoch": 1.0454974925503306, "grad_norm": 0.8554352521896362, "learning_rate": 4.879858788458676e-05, "loss": 0.2564, "step": 28770 }, { "epoch": 1.0458608910531288, "grad_norm": 3.329148769378662, "learning_rate": 4.8796998020430253e-05, "loss": 0.1297, "step": 28780 }, { "epoch": 1.046224289555927, "grad_norm": 1.1520358324050903, "learning_rate": 4.879540713094578e-05, "loss": 0.1156, "step": 28790 }, { "epoch": 1.0465876880587253, "grad_norm": 1.6375194787979126, "learning_rate": 4.879381521620187e-05, "loss": 0.1418, "step": 28800 }, { "epoch": 1.0465876880587253, "eval_loss": 0.35767313838005066, "eval_runtime": 180.3335, "eval_samples_per_second": 41.113, "eval_steps_per_second": 5.14, "eval_wer": 0.18009693757147785, "step": 28800 }, { "epoch": 1.0469510865615235, "grad_norm": 2.6099300384521484, "learning_rate": 4.879222227626712e-05, "loss": 2.0354, "step": 28810 }, { "epoch": 1.0473144850643215, "grad_norm": 0.9497049450874329, "learning_rate": 4.879062831121017e-05, "loss": 0.2014, "step": 28820 }, { "epoch": 1.0476778835671197, "grad_norm": 1.100393533706665, "learning_rate": 4.878903332109969e-05, "loss": 0.1294, "step": 28830 }, { "epoch": 1.0480412820699179, "grad_norm": 0.46238216757774353, "learning_rate": 4.87874373060044e-05, "loss": 0.1103, "step": 28840 }, { "epoch": 1.048404680572716, "grad_norm": 1.111619234085083, "learning_rate": 4.8785840265993085e-05, "loss": 0.1635, "step": 28850 }, { "epoch": 1.0487680790755143, "grad_norm": 1.8693902492523193, "learning_rate": 4.8784242201134534e-05, "loss": 0.1145, "step": 28860 }, { "epoch": 1.0491314775783125, "grad_norm": 0.5382725596427917, "learning_rate": 4.878264311149762e-05, "loss": 0.1699, "step": 28870 }, { "epoch": 1.0494948760811105, "grad_norm": 1.3384134769439697, "learning_rate": 4.878104299715123e-05, "loss": 0.1479, "step": 28880 }, { "epoch": 1.0498582745839087, "grad_norm": 3.7112338542938232, "learning_rate": 4.87794418581643e-05, "loss": 2.828, "step": 28890 }, { "epoch": 1.0502216730867069, "grad_norm": 0.8874093890190125, "learning_rate": 4.8777839694605844e-05, "loss": 0.1274, "step": 28900 }, { "epoch": 1.050585071589505, "grad_norm": 2.278064489364624, "learning_rate": 4.877623650654487e-05, "loss": 0.1298, "step": 28910 }, { "epoch": 1.0509484700923033, "grad_norm": 0.8750000596046448, "learning_rate": 4.877463229405046e-05, "loss": 0.2705, "step": 28920 }, { "epoch": 1.0513118685951013, "grad_norm": 0.5634777545928955, "learning_rate": 4.8773027057191735e-05, "loss": 0.13, "step": 28930 }, { "epoch": 1.0516752670978995, "grad_norm": 1.1990102529525757, "learning_rate": 4.877142079603786e-05, "loss": 0.1115, "step": 28940 }, { "epoch": 1.0520386656006977, "grad_norm": 5.793541431427002, "learning_rate": 4.8769813510658035e-05, "loss": 0.1909, "step": 28950 }, { "epoch": 1.052402064103496, "grad_norm": 1.0433887243270874, "learning_rate": 4.876820520112153e-05, "loss": 0.1225, "step": 28960 }, { "epoch": 1.0527654626062941, "grad_norm": 0.8786159753799438, "learning_rate": 4.8766595867497624e-05, "loss": 0.1772, "step": 28970 }, { "epoch": 1.0531288611090923, "grad_norm": 1.1270724534988403, "learning_rate": 4.8764985509855664e-05, "loss": 0.1683, "step": 28980 }, { "epoch": 1.0534922596118903, "grad_norm": 0.9916827082633972, "learning_rate": 4.876337412826504e-05, "loss": 0.1834, "step": 28990 }, { "epoch": 1.0538556581146885, "grad_norm": 1.1295456886291504, "learning_rate": 4.876176172279517e-05, "loss": 0.1677, "step": 29000 }, { "epoch": 1.0542190566174867, "grad_norm": 1.625546932220459, "learning_rate": 4.876014829351553e-05, "loss": 0.1374, "step": 29010 }, { "epoch": 1.054582455120285, "grad_norm": 0.4282989799976349, "learning_rate": 4.875853384049564e-05, "loss": 0.1826, "step": 29020 }, { "epoch": 1.0549458536230831, "grad_norm": 0.8806937336921692, "learning_rate": 4.875691836380507e-05, "loss": 0.116, "step": 29030 }, { "epoch": 1.0553092521258813, "grad_norm": 0.5082537531852722, "learning_rate": 4.87553018635134e-05, "loss": 0.1867, "step": 29040 }, { "epoch": 1.0556726506286793, "grad_norm": 3.172614336013794, "learning_rate": 4.875368433969031e-05, "loss": 0.1872, "step": 29050 }, { "epoch": 1.0560360491314775, "grad_norm": 4.570537090301514, "learning_rate": 4.875206579240546e-05, "loss": 0.1501, "step": 29060 }, { "epoch": 1.0563994476342757, "grad_norm": 0.9751003980636597, "learning_rate": 4.875044622172862e-05, "loss": 0.1533, "step": 29070 }, { "epoch": 1.056762846137074, "grad_norm": 0.9446988701820374, "learning_rate": 4.874882562772955e-05, "loss": 0.1462, "step": 29080 }, { "epoch": 1.0571262446398721, "grad_norm": 5.769078254699707, "learning_rate": 4.8747204010478086e-05, "loss": 0.1796, "step": 29090 }, { "epoch": 1.0574896431426704, "grad_norm": 6.486478328704834, "learning_rate": 4.8745581370044094e-05, "loss": 0.1674, "step": 29100 }, { "epoch": 1.0578530416454683, "grad_norm": 0.622352123260498, "learning_rate": 4.874395770649748e-05, "loss": 0.137, "step": 29110 }, { "epoch": 1.0582164401482665, "grad_norm": 0.5244133472442627, "learning_rate": 4.8742333019908215e-05, "loss": 0.2035, "step": 29120 }, { "epoch": 1.0585798386510648, "grad_norm": 1.7058534622192383, "learning_rate": 4.87407073103463e-05, "loss": 0.1408, "step": 29130 }, { "epoch": 1.058943237153863, "grad_norm": 0.9428019523620605, "learning_rate": 4.873908057788177e-05, "loss": 0.1128, "step": 29140 }, { "epoch": 1.0593066356566612, "grad_norm": 0.4694746136665344, "learning_rate": 4.8737452822584724e-05, "loss": 0.142, "step": 29150 }, { "epoch": 1.0596700341594594, "grad_norm": 1.3985977172851562, "learning_rate": 4.873582404452529e-05, "loss": 0.1169, "step": 29160 }, { "epoch": 1.0600334326622574, "grad_norm": 0.8285462856292725, "learning_rate": 4.873419424377366e-05, "loss": 0.1945, "step": 29170 }, { "epoch": 1.0603968311650556, "grad_norm": 1.657012939453125, "learning_rate": 4.8732563420400037e-05, "loss": 0.1904, "step": 29180 }, { "epoch": 1.0607602296678538, "grad_norm": 1.4633735418319702, "learning_rate": 4.87309315744747e-05, "loss": 0.1183, "step": 29190 }, { "epoch": 1.061123628170652, "grad_norm": 0.9722393155097961, "learning_rate": 4.872929870606796e-05, "loss": 0.1608, "step": 29200 }, { "epoch": 1.0614870266734502, "grad_norm": 0.6080673933029175, "learning_rate": 4.872766481525016e-05, "loss": 0.1198, "step": 29210 }, { "epoch": 1.0618504251762482, "grad_norm": 0.7502457499504089, "learning_rate": 4.8726029902091715e-05, "loss": 0.1893, "step": 29220 }, { "epoch": 1.0622138236790464, "grad_norm": 1.7775638103485107, "learning_rate": 4.8724393966663054e-05, "loss": 1.4777, "step": 29230 }, { "epoch": 1.0625772221818446, "grad_norm": 1.1095236539840698, "learning_rate": 4.8722757009034666e-05, "loss": 0.1443, "step": 29240 }, { "epoch": 1.0629406206846428, "grad_norm": 0.6879424452781677, "learning_rate": 4.872111902927709e-05, "loss": 0.2048, "step": 29250 }, { "epoch": 1.063304019187441, "grad_norm": 1.2532442808151245, "learning_rate": 4.8719480027460895e-05, "loss": 0.1343, "step": 29260 }, { "epoch": 1.0636674176902392, "grad_norm": 1.0296350717544556, "learning_rate": 4.87178400036567e-05, "loss": 0.1656, "step": 29270 }, { "epoch": 1.0640308161930372, "grad_norm": 1.0346356630325317, "learning_rate": 4.871619895793517e-05, "loss": 0.1466, "step": 29280 }, { "epoch": 1.0643942146958354, "grad_norm": 1.9428579807281494, "learning_rate": 4.8714556890367e-05, "loss": 0.1496, "step": 29290 }, { "epoch": 1.0647576131986336, "grad_norm": 2.6400890350341797, "learning_rate": 4.871291380102295e-05, "loss": 0.1857, "step": 29300 }, { "epoch": 1.0651210117014318, "grad_norm": 7.17543888092041, "learning_rate": 4.8711269689973826e-05, "loss": 0.1242, "step": 29310 }, { "epoch": 1.06548441020423, "grad_norm": 1.8619358539581299, "learning_rate": 4.870962455729045e-05, "loss": 0.2137, "step": 29320 }, { "epoch": 1.0658478087070282, "grad_norm": 4.936455726623535, "learning_rate": 4.8707978403043716e-05, "loss": 0.1551, "step": 29330 }, { "epoch": 1.0662112072098262, "grad_norm": 1.2196155786514282, "learning_rate": 4.8706331227304533e-05, "loss": 0.184, "step": 29340 }, { "epoch": 1.0665746057126244, "grad_norm": 2.0982654094696045, "learning_rate": 4.87046830301439e-05, "loss": 0.1566, "step": 29350 }, { "epoch": 1.0669380042154226, "grad_norm": 1.0265774726867676, "learning_rate": 4.8703033811632806e-05, "loss": 0.1263, "step": 29360 }, { "epoch": 1.0673014027182208, "grad_norm": 2.413862705230713, "learning_rate": 4.870138357184233e-05, "loss": 0.135, "step": 29370 }, { "epoch": 1.067664801221019, "grad_norm": 0.800736665725708, "learning_rate": 4.869973231084356e-05, "loss": 0.1169, "step": 29380 }, { "epoch": 1.0680281997238172, "grad_norm": 4.07125186920166, "learning_rate": 4.8698080028707647e-05, "loss": 0.1745, "step": 29390 }, { "epoch": 1.0683915982266152, "grad_norm": 1.1997871398925781, "learning_rate": 4.8696426725505784e-05, "loss": 0.1427, "step": 29400 }, { "epoch": 1.0683915982266152, "eval_loss": 0.3479246199131012, "eval_runtime": 180.3605, "eval_samples_per_second": 41.107, "eval_steps_per_second": 5.14, "eval_wer": 0.1802693920525714, "step": 29400 }, { "epoch": 1.0687549967294134, "grad_norm": 1.5684832334518433, "learning_rate": 4.8694772401309205e-05, "loss": 0.1491, "step": 29410 }, { "epoch": 1.0691183952322116, "grad_norm": 1.3784462213516235, "learning_rate": 4.8693117056189194e-05, "loss": 0.1741, "step": 29420 }, { "epoch": 1.0694817937350098, "grad_norm": 2.0766236782073975, "learning_rate": 4.869146069021707e-05, "loss": 0.1375, "step": 29430 }, { "epoch": 1.069845192237808, "grad_norm": 0.6553940773010254, "learning_rate": 4.86898033034642e-05, "loss": 0.1371, "step": 29440 }, { "epoch": 1.0702085907406063, "grad_norm": 0.9652252197265625, "learning_rate": 4.868814489600199e-05, "loss": 0.1446, "step": 29450 }, { "epoch": 1.0705719892434042, "grad_norm": 1.123075008392334, "learning_rate": 4.8686485467901896e-05, "loss": 0.1628, "step": 29460 }, { "epoch": 1.0709353877462024, "grad_norm": 1.3370702266693115, "learning_rate": 4.868482501923543e-05, "loss": 0.1822, "step": 29470 }, { "epoch": 1.0712987862490007, "grad_norm": 1.1716543436050415, "learning_rate": 4.868316355007412e-05, "loss": 0.7928, "step": 29480 }, { "epoch": 1.0716621847517989, "grad_norm": 2.255791187286377, "learning_rate": 4.868150106048955e-05, "loss": 0.3897, "step": 29490 }, { "epoch": 1.072025583254597, "grad_norm": 2.2386605739593506, "learning_rate": 4.8679837550553366e-05, "loss": 0.174, "step": 29500 }, { "epoch": 1.072388981757395, "grad_norm": 0.7938382625579834, "learning_rate": 4.867817302033724e-05, "loss": 0.1274, "step": 29510 }, { "epoch": 1.0727523802601933, "grad_norm": 0.8619611263275146, "learning_rate": 4.8676507469912866e-05, "loss": 0.1756, "step": 29520 }, { "epoch": 1.0731157787629915, "grad_norm": 5.2337727546691895, "learning_rate": 4.867484089935205e-05, "loss": 0.1131, "step": 29530 }, { "epoch": 1.0734791772657897, "grad_norm": 1.339237093925476, "learning_rate": 4.867317330872656e-05, "loss": 0.2385, "step": 29540 }, { "epoch": 1.0738425757685879, "grad_norm": 0.9693569540977478, "learning_rate": 4.8671504698108266e-05, "loss": 0.1455, "step": 29550 }, { "epoch": 1.074205974271386, "grad_norm": 0.8684889674186707, "learning_rate": 4.866983506756906e-05, "loss": 0.1516, "step": 29560 }, { "epoch": 1.074569372774184, "grad_norm": 0.5518342852592468, "learning_rate": 4.866816441718088e-05, "loss": 0.2068, "step": 29570 }, { "epoch": 1.0749327712769823, "grad_norm": 1.049777865409851, "learning_rate": 4.86664927470157e-05, "loss": 0.1473, "step": 29580 }, { "epoch": 1.0752961697797805, "grad_norm": 3.8799684047698975, "learning_rate": 4.8664820057145556e-05, "loss": 0.1398, "step": 29590 }, { "epoch": 1.0756595682825787, "grad_norm": 0.9927829504013062, "learning_rate": 4.866314634764252e-05, "loss": 2.0386, "step": 29600 }, { "epoch": 1.076022966785377, "grad_norm": 1.2022935152053833, "learning_rate": 4.86614716185787e-05, "loss": 0.1539, "step": 29610 }, { "epoch": 1.076386365288175, "grad_norm": 0.7556710243225098, "learning_rate": 4.865979587002625e-05, "loss": 0.1718, "step": 29620 }, { "epoch": 1.076749763790973, "grad_norm": 1.0953086614608765, "learning_rate": 4.865811910205738e-05, "loss": 0.1537, "step": 29630 }, { "epoch": 1.0771131622937713, "grad_norm": 0.49788376688957214, "learning_rate": 4.865644131474434e-05, "loss": 0.1436, "step": 29640 }, { "epoch": 1.0774765607965695, "grad_norm": 2.5504343509674072, "learning_rate": 4.865476250815941e-05, "loss": 0.1535, "step": 29650 }, { "epoch": 1.0778399592993677, "grad_norm": 1.280085802078247, "learning_rate": 4.865308268237492e-05, "loss": 0.1389, "step": 29660 }, { "epoch": 1.078203357802166, "grad_norm": 0.4341859519481659, "learning_rate": 4.865140183746326e-05, "loss": 0.2016, "step": 29670 }, { "epoch": 1.0785667563049641, "grad_norm": 0.744679868221283, "learning_rate": 4.864971997349685e-05, "loss": 0.1216, "step": 29680 }, { "epoch": 1.078930154807762, "grad_norm": 1.449559211730957, "learning_rate": 4.8648037090548154e-05, "loss": 0.1202, "step": 29690 }, { "epoch": 1.0792935533105603, "grad_norm": 3.58284330368042, "learning_rate": 4.8646353188689674e-05, "loss": 0.145, "step": 29700 }, { "epoch": 1.0796569518133585, "grad_norm": 1.7318589687347412, "learning_rate": 4.864466826799398e-05, "loss": 0.1361, "step": 29710 }, { "epoch": 1.0800203503161567, "grad_norm": 0.44806694984436035, "learning_rate": 4.864298232853364e-05, "loss": 0.212, "step": 29720 }, { "epoch": 1.080383748818955, "grad_norm": 0.8236504197120667, "learning_rate": 4.864129537038132e-05, "loss": 0.1446, "step": 29730 }, { "epoch": 1.0807471473217531, "grad_norm": 0.920353889465332, "learning_rate": 4.863960739360971e-05, "loss": 0.1472, "step": 29740 }, { "epoch": 1.0811105458245511, "grad_norm": 1.415685772895813, "learning_rate": 4.8637918398291514e-05, "loss": 1.9346, "step": 29750 }, { "epoch": 1.0814739443273493, "grad_norm": 0.7517853379249573, "learning_rate": 4.8636228384499524e-05, "loss": 0.1276, "step": 29760 }, { "epoch": 1.0818373428301475, "grad_norm": 0.5632757544517517, "learning_rate": 4.8634537352306554e-05, "loss": 0.1385, "step": 29770 }, { "epoch": 1.0822007413329457, "grad_norm": 1.5767742395401, "learning_rate": 4.8632845301785455e-05, "loss": 0.1451, "step": 29780 }, { "epoch": 1.082564139835744, "grad_norm": 0.7501896619796753, "learning_rate": 4.8631152233009146e-05, "loss": 0.1337, "step": 29790 }, { "epoch": 1.082927538338542, "grad_norm": 0.7235280871391296, "learning_rate": 4.862945814605056e-05, "loss": 0.1727, "step": 29800 }, { "epoch": 1.0832909368413401, "grad_norm": 0.9608789682388306, "learning_rate": 4.86277630409827e-05, "loss": 0.1346, "step": 29810 }, { "epoch": 1.0836543353441384, "grad_norm": 0.5176007151603699, "learning_rate": 4.862606691787859e-05, "loss": 0.1477, "step": 29820 }, { "epoch": 1.0840177338469366, "grad_norm": 1.1901780366897583, "learning_rate": 4.862436977681133e-05, "loss": 0.1478, "step": 29830 }, { "epoch": 1.0843811323497348, "grad_norm": 4.09995698928833, "learning_rate": 4.8622671617854026e-05, "loss": 0.1369, "step": 29840 }, { "epoch": 1.084744530852533, "grad_norm": 10.049054145812988, "learning_rate": 4.8620972441079855e-05, "loss": 0.8392, "step": 29850 }, { "epoch": 1.085107929355331, "grad_norm": 1.131095051765442, "learning_rate": 4.861927224656202e-05, "loss": 0.1404, "step": 29860 }, { "epoch": 1.0854713278581292, "grad_norm": 1.2740205526351929, "learning_rate": 4.861757103437379e-05, "loss": 0.1726, "step": 29870 }, { "epoch": 1.0858347263609274, "grad_norm": 0.9203113317489624, "learning_rate": 4.861586880458845e-05, "loss": 0.1088, "step": 29880 }, { "epoch": 1.0861981248637256, "grad_norm": 0.8646379113197327, "learning_rate": 4.8614165557279345e-05, "loss": 0.124, "step": 29890 }, { "epoch": 1.0865615233665238, "grad_norm": 1.283758521080017, "learning_rate": 4.861246129251987e-05, "loss": 0.1616, "step": 29900 }, { "epoch": 1.086924921869322, "grad_norm": 2.0251550674438477, "learning_rate": 4.861075601038345e-05, "loss": 0.1475, "step": 29910 }, { "epoch": 1.08728832037212, "grad_norm": 0.7173452973365784, "learning_rate": 4.860904971094356e-05, "loss": 0.182, "step": 29920 }, { "epoch": 1.0876517188749182, "grad_norm": 0.7154909372329712, "learning_rate": 4.8607342394273725e-05, "loss": 0.1263, "step": 29930 }, { "epoch": 1.0880151173777164, "grad_norm": 2.5288286209106445, "learning_rate": 4.860563406044749e-05, "loss": 0.1289, "step": 29940 }, { "epoch": 1.0883785158805146, "grad_norm": 1.4772063493728638, "learning_rate": 4.860392470953848e-05, "loss": 0.1556, "step": 29950 }, { "epoch": 1.0887419143833128, "grad_norm": 1.623298168182373, "learning_rate": 4.8602214341620346e-05, "loss": 0.1493, "step": 29960 }, { "epoch": 1.089105312886111, "grad_norm": 0.4302707016468048, "learning_rate": 4.860050295676676e-05, "loss": 0.2303, "step": 29970 }, { "epoch": 1.089468711388909, "grad_norm": 1.4086140394210815, "learning_rate": 4.8598790555051474e-05, "loss": 0.1549, "step": 29980 }, { "epoch": 1.0898321098917072, "grad_norm": 1.1924636363983154, "learning_rate": 4.859707713654828e-05, "loss": 0.1426, "step": 29990 }, { "epoch": 1.0901955083945054, "grad_norm": 0.8468578457832336, "learning_rate": 4.859536270133097e-05, "loss": 0.1607, "step": 30000 }, { "epoch": 1.0901955083945054, "eval_loss": 0.38150739669799805, "eval_runtime": 180.8598, "eval_samples_per_second": 40.993, "eval_steps_per_second": 5.126, "eval_wer": 0.18088659756385353, "step": 30000 }, { "epoch": 1.0905589068973036, "grad_norm": 1.3293052911758423, "learning_rate": 4.859364724947345e-05, "loss": 0.1199, "step": 30010 }, { "epoch": 1.0909223054001018, "grad_norm": 1.421976923942566, "learning_rate": 4.859193078104961e-05, "loss": 0.1932, "step": 30020 }, { "epoch": 1.0912857039029, "grad_norm": 5.226151466369629, "learning_rate": 4.8590213296133415e-05, "loss": 0.1365, "step": 30030 }, { "epoch": 1.091649102405698, "grad_norm": 1.6307711601257324, "learning_rate": 4.8588494794798866e-05, "loss": 0.1533, "step": 30040 }, { "epoch": 1.0920125009084962, "grad_norm": 1.190746784210205, "learning_rate": 4.858677527712e-05, "loss": 0.1701, "step": 30050 }, { "epoch": 1.0923758994112944, "grad_norm": 1.1558239459991455, "learning_rate": 4.858505474317091e-05, "loss": 0.1792, "step": 30060 }, { "epoch": 1.0927392979140926, "grad_norm": 0.5284643769264221, "learning_rate": 4.858333319302573e-05, "loss": 0.1789, "step": 30070 }, { "epoch": 1.0931026964168908, "grad_norm": 0.7858747243881226, "learning_rate": 4.858161062675863e-05, "loss": 0.1456, "step": 30080 }, { "epoch": 1.0934660949196888, "grad_norm": 1.2685805559158325, "learning_rate": 4.857988704444383e-05, "loss": 0.1326, "step": 30090 }, { "epoch": 1.093829493422487, "grad_norm": 0.9551296830177307, "learning_rate": 4.8578162446155595e-05, "loss": 0.327, "step": 30100 }, { "epoch": 1.0941928919252852, "grad_norm": 3.6769495010375977, "learning_rate": 4.857643683196823e-05, "loss": 0.1111, "step": 30110 }, { "epoch": 1.0945562904280834, "grad_norm": 0.757580041885376, "learning_rate": 4.8574710201956095e-05, "loss": 0.1622, "step": 30120 }, { "epoch": 1.0949196889308817, "grad_norm": 0.762323796749115, "learning_rate": 4.857298255619357e-05, "loss": 0.1218, "step": 30130 }, { "epoch": 1.0952830874336799, "grad_norm": 0.6065217852592468, "learning_rate": 4.85712538947551e-05, "loss": 0.1297, "step": 30140 }, { "epoch": 1.0956464859364778, "grad_norm": 1.1257789134979248, "learning_rate": 4.856952421771517e-05, "loss": 0.1862, "step": 30150 }, { "epoch": 1.096009884439276, "grad_norm": 1.128233790397644, "learning_rate": 4.85677935251483e-05, "loss": 0.1377, "step": 30160 }, { "epoch": 1.0963732829420743, "grad_norm": 0.48844701051712036, "learning_rate": 4.856606181712906e-05, "loss": 0.1967, "step": 30170 }, { "epoch": 1.0967366814448725, "grad_norm": 0.682921290397644, "learning_rate": 4.856432909373206e-05, "loss": 0.1268, "step": 30180 }, { "epoch": 1.0971000799476707, "grad_norm": 0.8049948215484619, "learning_rate": 4.856259535503197e-05, "loss": 0.0971, "step": 30190 }, { "epoch": 1.0974634784504689, "grad_norm": 0.6435711979866028, "learning_rate": 4.8560860601103485e-05, "loss": 1.0273, "step": 30200 }, { "epoch": 1.0978268769532669, "grad_norm": 1.014172911643982, "learning_rate": 4.855912483202134e-05, "loss": 0.1137, "step": 30210 }, { "epoch": 1.098190275456065, "grad_norm": 1.4760230779647827, "learning_rate": 4.8557388047860334e-05, "loss": 0.1585, "step": 30220 }, { "epoch": 1.0985536739588633, "grad_norm": 1.4756141901016235, "learning_rate": 4.855565024869529e-05, "loss": 0.1261, "step": 30230 }, { "epoch": 1.0989170724616615, "grad_norm": 2.1977133750915527, "learning_rate": 4.8553911434601085e-05, "loss": 0.1532, "step": 30240 }, { "epoch": 1.0992804709644597, "grad_norm": 1.9084991216659546, "learning_rate": 4.855217160565265e-05, "loss": 0.1611, "step": 30250 }, { "epoch": 1.099643869467258, "grad_norm": 1.3657923936843872, "learning_rate": 4.855043076192494e-05, "loss": 0.1569, "step": 30260 }, { "epoch": 1.1000072679700559, "grad_norm": 0.6824470162391663, "learning_rate": 4.8548688903492943e-05, "loss": 0.1632, "step": 30270 }, { "epoch": 1.100370666472854, "grad_norm": 0.595958948135376, "learning_rate": 4.854694603043175e-05, "loss": 0.1394, "step": 30280 }, { "epoch": 1.1007340649756523, "grad_norm": 1.1626547574996948, "learning_rate": 4.854520214281642e-05, "loss": 0.1193, "step": 30290 }, { "epoch": 1.1010974634784505, "grad_norm": 1.2703717947006226, "learning_rate": 4.8543457240722104e-05, "loss": 0.1949, "step": 30300 }, { "epoch": 1.1014608619812487, "grad_norm": 1.7159488201141357, "learning_rate": 4.854171132422399e-05, "loss": 0.1524, "step": 30310 }, { "epoch": 1.101824260484047, "grad_norm": 0.7651236057281494, "learning_rate": 4.85399643933973e-05, "loss": 0.1992, "step": 30320 }, { "epoch": 1.102187658986845, "grad_norm": 0.7985833287239075, "learning_rate": 4.8538216448317286e-05, "loss": 0.1363, "step": 30330 }, { "epoch": 1.102551057489643, "grad_norm": 1.2583733797073364, "learning_rate": 4.853646748905928e-05, "loss": 0.1215, "step": 30340 }, { "epoch": 1.1029144559924413, "grad_norm": 1.1982141733169556, "learning_rate": 4.853471751569864e-05, "loss": 0.1656, "step": 30350 }, { "epoch": 1.1032778544952395, "grad_norm": 2.399423599243164, "learning_rate": 4.853296652831075e-05, "loss": 0.1164, "step": 30360 }, { "epoch": 1.1036412529980377, "grad_norm": 1.5785446166992188, "learning_rate": 4.853121452697107e-05, "loss": 0.1682, "step": 30370 }, { "epoch": 1.1040046515008357, "grad_norm": 1.3818514347076416, "learning_rate": 4.852946151175508e-05, "loss": 0.5349, "step": 30380 }, { "epoch": 1.104368050003634, "grad_norm": 1.6894676685333252, "learning_rate": 4.8527707482738305e-05, "loss": 0.1314, "step": 30390 }, { "epoch": 1.1047314485064321, "grad_norm": 1.1517245769500732, "learning_rate": 4.852595243999633e-05, "loss": 0.1515, "step": 30400 }, { "epoch": 1.1050948470092303, "grad_norm": 1.2149289846420288, "learning_rate": 4.852419638360477e-05, "loss": 0.2572, "step": 30410 }, { "epoch": 1.1054582455120285, "grad_norm": 0.8241190314292908, "learning_rate": 4.852243931363929e-05, "loss": 0.2045, "step": 30420 }, { "epoch": 1.1058216440148267, "grad_norm": 0.8909230828285217, "learning_rate": 4.852068123017559e-05, "loss": 0.1281, "step": 30430 }, { "epoch": 1.1061850425176247, "grad_norm": 0.7718971967697144, "learning_rate": 4.8518922133289424e-05, "loss": 0.1207, "step": 30440 }, { "epoch": 1.106548441020423, "grad_norm": 3.393324136734009, "learning_rate": 4.8517162023056575e-05, "loss": 0.4812, "step": 30450 }, { "epoch": 1.1069118395232211, "grad_norm": 1.5000587701797485, "learning_rate": 4.85154008995529e-05, "loss": 0.1246, "step": 30460 }, { "epoch": 1.1072752380260193, "grad_norm": 1.3177014589309692, "learning_rate": 4.8513638762854264e-05, "loss": 0.2352, "step": 30470 }, { "epoch": 1.1076386365288176, "grad_norm": 0.8771611452102661, "learning_rate": 4.8511875613036596e-05, "loss": 1.4369, "step": 30480 }, { "epoch": 1.1080020350316158, "grad_norm": 0.46630170941352844, "learning_rate": 4.8510111450175865e-05, "loss": 0.1184, "step": 30490 }, { "epoch": 1.1083654335344137, "grad_norm": 1.582541823387146, "learning_rate": 4.850834627434808e-05, "loss": 0.154, "step": 30500 }, { "epoch": 1.108728832037212, "grad_norm": 0.9425756335258484, "learning_rate": 4.850658008562929e-05, "loss": 0.1632, "step": 30510 }, { "epoch": 1.1090922305400102, "grad_norm": 0.6453799605369568, "learning_rate": 4.8504812884095616e-05, "loss": 0.2024, "step": 30520 }, { "epoch": 1.1094556290428084, "grad_norm": 0.8643505573272705, "learning_rate": 4.850304466982317e-05, "loss": 0.1373, "step": 30530 }, { "epoch": 1.1098190275456066, "grad_norm": 0.6463938355445862, "learning_rate": 4.850127544288816e-05, "loss": 0.1308, "step": 30540 }, { "epoch": 1.1101824260484048, "grad_norm": 1.2465693950653076, "learning_rate": 4.8499505203366816e-05, "loss": 0.1628, "step": 30550 }, { "epoch": 1.1105458245512028, "grad_norm": 1.085317850112915, "learning_rate": 4.84977339513354e-05, "loss": 0.1533, "step": 30560 }, { "epoch": 1.110909223054001, "grad_norm": 0.5834909081459045, "learning_rate": 4.849596168687022e-05, "loss": 0.1811, "step": 30570 }, { "epoch": 1.1112726215567992, "grad_norm": 1.407309889793396, "learning_rate": 4.849418841004766e-05, "loss": 0.1621, "step": 30580 }, { "epoch": 1.1116360200595974, "grad_norm": 1.0903669595718384, "learning_rate": 4.8492414120944116e-05, "loss": 0.271, "step": 30590 }, { "epoch": 1.1119994185623956, "grad_norm": 1.6495404243469238, "learning_rate": 4.8490638819636036e-05, "loss": 0.1602, "step": 30600 }, { "epoch": 1.1119994185623956, "eval_loss": 0.3786245882511139, "eval_runtime": 180.2563, "eval_samples_per_second": 41.13, "eval_steps_per_second": 5.143, "eval_wer": 0.17622125002269137, "step": 30600 }, { "epoch": 1.1123628170651938, "grad_norm": 1.6046833992004395, "learning_rate": 4.8488862506199905e-05, "loss": 0.142, "step": 30610 }, { "epoch": 1.1127262155679918, "grad_norm": 0.7779229879379272, "learning_rate": 4.848708518071226e-05, "loss": 0.1556, "step": 30620 }, { "epoch": 1.11308961407079, "grad_norm": 6.0123677253723145, "learning_rate": 4.848530684324969e-05, "loss": 0.1379, "step": 30630 }, { "epoch": 1.1134530125735882, "grad_norm": 1.1593163013458252, "learning_rate": 4.8483527493888796e-05, "loss": 0.1091, "step": 30640 }, { "epoch": 1.1138164110763864, "grad_norm": 1.1061301231384277, "learning_rate": 4.848174713270627e-05, "loss": 0.1521, "step": 30650 }, { "epoch": 1.1141798095791846, "grad_norm": 2.006169080734253, "learning_rate": 4.8479965759778804e-05, "loss": 0.1188, "step": 30660 }, { "epoch": 1.1145432080819826, "grad_norm": 0.632653534412384, "learning_rate": 4.8478183375183154e-05, "loss": 0.2039, "step": 30670 }, { "epoch": 1.1149066065847808, "grad_norm": 2.2631378173828125, "learning_rate": 4.847639997899611e-05, "loss": 0.1324, "step": 30680 }, { "epoch": 1.115270005087579, "grad_norm": 0.7694458365440369, "learning_rate": 4.847461557129454e-05, "loss": 0.1043, "step": 30690 }, { "epoch": 1.1156334035903772, "grad_norm": 1.5386550426483154, "learning_rate": 4.847283015215529e-05, "loss": 0.1605, "step": 30700 }, { "epoch": 1.1159968020931754, "grad_norm": 0.9068945646286011, "learning_rate": 4.847104372165531e-05, "loss": 0.1178, "step": 30710 }, { "epoch": 1.1163602005959736, "grad_norm": 1.4700278043746948, "learning_rate": 4.8469256279871564e-05, "loss": 0.1458, "step": 30720 }, { "epoch": 1.1167235990987716, "grad_norm": 1.125613808631897, "learning_rate": 4.846746782688108e-05, "loss": 0.1212, "step": 30730 }, { "epoch": 1.1170869976015698, "grad_norm": 1.081297516822815, "learning_rate": 4.846567836276089e-05, "loss": 0.1218, "step": 30740 }, { "epoch": 1.117450396104368, "grad_norm": 0.6549712419509888, "learning_rate": 4.846388788758812e-05, "loss": 0.1684, "step": 30750 }, { "epoch": 1.1178137946071662, "grad_norm": 0.7256012558937073, "learning_rate": 4.84620964014399e-05, "loss": 0.1425, "step": 30760 }, { "epoch": 1.1181771931099644, "grad_norm": 0.6661650538444519, "learning_rate": 4.846030390439343e-05, "loss": 0.2043, "step": 30770 }, { "epoch": 1.1185405916127626, "grad_norm": 2.5043599605560303, "learning_rate": 4.845851039652594e-05, "loss": 0.1337, "step": 30780 }, { "epoch": 1.1189039901155606, "grad_norm": 1.7362638711929321, "learning_rate": 4.84567158779147e-05, "loss": 0.1146, "step": 30790 }, { "epoch": 1.1192673886183588, "grad_norm": 2.156850576400757, "learning_rate": 4.845492034863703e-05, "loss": 0.1402, "step": 30800 }, { "epoch": 1.119630787121157, "grad_norm": 6.733970642089844, "learning_rate": 4.8453123808770295e-05, "loss": 0.137, "step": 30810 }, { "epoch": 1.1199941856239553, "grad_norm": 1.2163270711898804, "learning_rate": 4.84513262583919e-05, "loss": 0.2038, "step": 30820 }, { "epoch": 1.1203575841267535, "grad_norm": 1.0911026000976562, "learning_rate": 4.84495276975793e-05, "loss": 0.1247, "step": 30830 }, { "epoch": 1.1207209826295517, "grad_norm": 8.4699125289917, "learning_rate": 4.844772812640998e-05, "loss": 0.1883, "step": 30840 }, { "epoch": 1.1210843811323496, "grad_norm": 1.9448401927947998, "learning_rate": 4.8445927544961486e-05, "loss": 0.1259, "step": 30850 }, { "epoch": 1.1214477796351479, "grad_norm": 1.2070740461349487, "learning_rate": 4.844412595331139e-05, "loss": 0.1478, "step": 30860 }, { "epoch": 1.121811178137946, "grad_norm": 0.5514017939567566, "learning_rate": 4.844232335153733e-05, "loss": 0.2209, "step": 30870 }, { "epoch": 1.1221745766407443, "grad_norm": 0.6462703943252563, "learning_rate": 4.844051973971696e-05, "loss": 0.1182, "step": 30880 }, { "epoch": 1.1225379751435425, "grad_norm": 0.9222347140312195, "learning_rate": 4.8438715117927995e-05, "loss": 0.1079, "step": 30890 }, { "epoch": 1.1229013736463407, "grad_norm": 1.1663174629211426, "learning_rate": 4.8436909486248196e-05, "loss": 0.1516, "step": 30900 }, { "epoch": 1.1232647721491387, "grad_norm": 0.8301449418067932, "learning_rate": 4.8435102844755356e-05, "loss": 0.1204, "step": 30910 }, { "epoch": 1.1236281706519369, "grad_norm": 0.8328074216842651, "learning_rate": 4.8433295193527305e-05, "loss": 0.1432, "step": 30920 }, { "epoch": 1.123991569154735, "grad_norm": 1.0741894245147705, "learning_rate": 4.843148653264195e-05, "loss": 0.1286, "step": 30930 }, { "epoch": 1.1243549676575333, "grad_norm": 1.5792789459228516, "learning_rate": 4.842967686217721e-05, "loss": 0.1235, "step": 30940 }, { "epoch": 1.1247183661603315, "grad_norm": 10.680551528930664, "learning_rate": 4.8427866182211056e-05, "loss": 0.2624, "step": 30950 }, { "epoch": 1.1250817646631295, "grad_norm": 0.753760576248169, "learning_rate": 4.8426054492821503e-05, "loss": 0.1185, "step": 30960 }, { "epoch": 1.1254451631659277, "grad_norm": 0.681735098361969, "learning_rate": 4.8424241794086614e-05, "loss": 0.1249, "step": 30970 }, { "epoch": 1.125808561668726, "grad_norm": 1.0460690259933472, "learning_rate": 4.842242808608449e-05, "loss": 0.1437, "step": 30980 }, { "epoch": 1.126171960171524, "grad_norm": 0.8870137929916382, "learning_rate": 4.8420613368893275e-05, "loss": 0.1321, "step": 30990 }, { "epoch": 1.1265353586743223, "grad_norm": 1.765331506729126, "learning_rate": 4.841879764259116e-05, "loss": 0.1722, "step": 31000 }, { "epoch": 1.1268987571771205, "grad_norm": 2.9972617626190186, "learning_rate": 4.841698090725638e-05, "loss": 0.095, "step": 31010 }, { "epoch": 1.1272621556799187, "grad_norm": 0.7516260743141174, "learning_rate": 4.841516316296722e-05, "loss": 0.2073, "step": 31020 }, { "epoch": 1.1276255541827167, "grad_norm": 7.545155048370361, "learning_rate": 4.841334440980197e-05, "loss": 0.2066, "step": 31030 }, { "epoch": 1.127988952685515, "grad_norm": 0.7127543687820435, "learning_rate": 4.841152464783903e-05, "loss": 0.1483, "step": 31040 }, { "epoch": 1.1283523511883131, "grad_norm": 0.8178777694702148, "learning_rate": 4.8409703877156786e-05, "loss": 0.122, "step": 31050 }, { "epoch": 1.1287157496911113, "grad_norm": 2.2176194190979004, "learning_rate": 4.84078820978337e-05, "loss": 0.1074, "step": 31060 }, { "epoch": 1.1290791481939095, "grad_norm": 0.5790374279022217, "learning_rate": 4.8406059309948246e-05, "loss": 0.2401, "step": 31070 }, { "epoch": 1.1294425466967075, "grad_norm": 1.335080623626709, "learning_rate": 4.840423551357899e-05, "loss": 0.1412, "step": 31080 }, { "epoch": 1.1298059451995057, "grad_norm": 2.9304592609405518, "learning_rate": 4.840241070880449e-05, "loss": 0.1137, "step": 31090 }, { "epoch": 1.130169343702304, "grad_norm": 0.6828371286392212, "learning_rate": 4.840058489570338e-05, "loss": 0.162, "step": 31100 }, { "epoch": 1.1305327422051021, "grad_norm": 0.7623898983001709, "learning_rate": 4.8398758074354334e-05, "loss": 0.1164, "step": 31110 }, { "epoch": 1.1308961407079003, "grad_norm": 0.44123783707618713, "learning_rate": 4.8396930244836045e-05, "loss": 0.1378, "step": 31120 }, { "epoch": 1.1312595392106983, "grad_norm": 0.880264937877655, "learning_rate": 4.839510140722728e-05, "loss": 0.1168, "step": 31130 }, { "epoch": 1.1316229377134965, "grad_norm": 0.9946479797363281, "learning_rate": 4.839327156160684e-05, "loss": 0.1099, "step": 31140 }, { "epoch": 1.1319863362162947, "grad_norm": 1.5061123371124268, "learning_rate": 4.8391440708053565e-05, "loss": 0.1167, "step": 31150 }, { "epoch": 1.132349734719093, "grad_norm": 3.687218427658081, "learning_rate": 4.838960884664633e-05, "loss": 0.1141, "step": 31160 }, { "epoch": 1.1327131332218912, "grad_norm": 1.015309453010559, "learning_rate": 4.838777597746408e-05, "loss": 0.1851, "step": 31170 }, { "epoch": 1.1330765317246894, "grad_norm": 0.9765021204948425, "learning_rate": 4.838594210058577e-05, "loss": 0.1276, "step": 31180 }, { "epoch": 1.1334399302274876, "grad_norm": 0.986419677734375, "learning_rate": 4.838410721609041e-05, "loss": 0.107, "step": 31190 }, { "epoch": 1.1338033287302856, "grad_norm": 0.850581169128418, "learning_rate": 4.838227132405709e-05, "loss": 0.1441, "step": 31200 }, { "epoch": 1.1338033287302856, "eval_loss": 0.3806535005569458, "eval_runtime": 180.0975, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.17877176103254852, "step": 31200 }, { "epoch": 1.1341667272330838, "grad_norm": 0.5492041110992432, "learning_rate": 4.8380434424564885e-05, "loss": 0.1644, "step": 31210 }, { "epoch": 1.134530125735882, "grad_norm": 0.6244884729385376, "learning_rate": 4.837859651769295e-05, "loss": 0.1754, "step": 31220 }, { "epoch": 1.1348935242386802, "grad_norm": 0.7327109575271606, "learning_rate": 4.837675760352047e-05, "loss": 0.1182, "step": 31230 }, { "epoch": 1.1352569227414784, "grad_norm": 1.8642997741699219, "learning_rate": 4.837491768212669e-05, "loss": 0.1262, "step": 31240 }, { "epoch": 1.1356203212442764, "grad_norm": 0.7738135457038879, "learning_rate": 4.837307675359086e-05, "loss": 0.1733, "step": 31250 }, { "epoch": 1.1359837197470746, "grad_norm": 1.2241661548614502, "learning_rate": 4.837123481799232e-05, "loss": 0.158, "step": 31260 }, { "epoch": 1.1363471182498728, "grad_norm": 0.5471898317337036, "learning_rate": 4.836939187541043e-05, "loss": 0.1745, "step": 31270 }, { "epoch": 1.136710516752671, "grad_norm": 1.110005497932434, "learning_rate": 4.836754792592459e-05, "loss": 0.14, "step": 31280 }, { "epoch": 1.1370739152554692, "grad_norm": 18.33467674255371, "learning_rate": 4.836570296961425e-05, "loss": 0.4864, "step": 31290 }, { "epoch": 1.1374373137582674, "grad_norm": 2.1457314491271973, "learning_rate": 4.836385700655891e-05, "loss": 0.1431, "step": 31300 }, { "epoch": 1.1378007122610656, "grad_norm": 1.3444671630859375, "learning_rate": 4.8362010036838096e-05, "loss": 0.1287, "step": 31310 }, { "epoch": 1.1381641107638636, "grad_norm": 1.0178183317184448, "learning_rate": 4.8360162060531395e-05, "loss": 0.1984, "step": 31320 }, { "epoch": 1.1385275092666618, "grad_norm": 1.013101577758789, "learning_rate": 4.835831307771842e-05, "loss": 0.1354, "step": 31330 }, { "epoch": 1.13889090776946, "grad_norm": 1.1397134065628052, "learning_rate": 4.8356463088478855e-05, "loss": 0.1149, "step": 31340 }, { "epoch": 1.1392543062722582, "grad_norm": 0.6582014560699463, "learning_rate": 4.835461209289239e-05, "loss": 0.1581, "step": 31350 }, { "epoch": 1.1396177047750564, "grad_norm": 6.679111480712891, "learning_rate": 4.835276009103878e-05, "loss": 0.1136, "step": 31360 }, { "epoch": 1.1399811032778544, "grad_norm": 1.710073709487915, "learning_rate": 4.835090708299784e-05, "loss": 0.1523, "step": 31370 }, { "epoch": 1.1403445017806526, "grad_norm": 0.8167402148246765, "learning_rate": 4.834905306884939e-05, "loss": 0.1333, "step": 31380 }, { "epoch": 1.1407079002834508, "grad_norm": 1.0377804040908813, "learning_rate": 4.834719804867332e-05, "loss": 0.1484, "step": 31390 }, { "epoch": 1.141071298786249, "grad_norm": 0.6715871691703796, "learning_rate": 4.8345342022549556e-05, "loss": 0.133, "step": 31400 }, { "epoch": 1.1414346972890472, "grad_norm": 0.8593924641609192, "learning_rate": 4.834348499055807e-05, "loss": 0.1381, "step": 31410 }, { "epoch": 1.1417980957918452, "grad_norm": 1.4667985439300537, "learning_rate": 4.834162695277887e-05, "loss": 0.2026, "step": 31420 }, { "epoch": 1.1421614942946434, "grad_norm": 1.1011070013046265, "learning_rate": 4.8339767909292014e-05, "loss": 0.1324, "step": 31430 }, { "epoch": 1.1425248927974416, "grad_norm": 0.6192152500152588, "learning_rate": 4.83379078601776e-05, "loss": 0.116, "step": 31440 }, { "epoch": 1.1428882913002398, "grad_norm": 0.7945598363876343, "learning_rate": 4.8336046805515775e-05, "loss": 0.1652, "step": 31450 }, { "epoch": 1.143251689803038, "grad_norm": 0.9201329350471497, "learning_rate": 4.833418474538672e-05, "loss": 0.1279, "step": 31460 }, { "epoch": 1.1436150883058362, "grad_norm": 0.7190477252006531, "learning_rate": 4.833232167987067e-05, "loss": 0.1603, "step": 31470 }, { "epoch": 1.1439784868086345, "grad_norm": 0.92894047498703, "learning_rate": 4.83304576090479e-05, "loss": 0.1268, "step": 31480 }, { "epoch": 1.1443418853114324, "grad_norm": 0.7764700055122375, "learning_rate": 4.8328592532998716e-05, "loss": 0.1307, "step": 31490 }, { "epoch": 1.1447052838142306, "grad_norm": 1.3679301738739014, "learning_rate": 4.832672645180348e-05, "loss": 0.1734, "step": 31500 }, { "epoch": 1.1450686823170289, "grad_norm": 2.3595213890075684, "learning_rate": 4.832485936554261e-05, "loss": 0.1335, "step": 31510 }, { "epoch": 1.145432080819827, "grad_norm": 0.610569953918457, "learning_rate": 4.832299127429653e-05, "loss": 0.249, "step": 31520 }, { "epoch": 1.1457954793226253, "grad_norm": 1.4595023393630981, "learning_rate": 4.832112217814575e-05, "loss": 0.1103, "step": 31530 }, { "epoch": 1.1461588778254233, "grad_norm": 5.723475933074951, "learning_rate": 4.831925207717077e-05, "loss": 0.126, "step": 31540 }, { "epoch": 1.1465222763282215, "grad_norm": 1.8982267379760742, "learning_rate": 4.8317380971452205e-05, "loss": 0.1422, "step": 31550 }, { "epoch": 1.1468856748310197, "grad_norm": 0.8732501268386841, "learning_rate": 4.831550886107066e-05, "loss": 0.1364, "step": 31560 }, { "epoch": 1.1472490733338179, "grad_norm": 1.0855740308761597, "learning_rate": 4.831363574610679e-05, "loss": 0.1728, "step": 31570 }, { "epoch": 1.147612471836616, "grad_norm": 1.5332953929901123, "learning_rate": 4.8311761626641304e-05, "loss": 0.1336, "step": 31580 }, { "epoch": 1.1479758703394143, "grad_norm": 0.5183860063552856, "learning_rate": 4.8309886502754954e-05, "loss": 0.6253, "step": 31590 }, { "epoch": 1.1483392688422125, "grad_norm": 0.7912465929985046, "learning_rate": 4.830801037452853e-05, "loss": 0.1644, "step": 31600 }, { "epoch": 1.1487026673450105, "grad_norm": 1.5750758647918701, "learning_rate": 4.8306133242042875e-05, "loss": 0.1787, "step": 31610 }, { "epoch": 1.1490660658478087, "grad_norm": 0.5864933133125305, "learning_rate": 4.830425510537886e-05, "loss": 0.1751, "step": 31620 }, { "epoch": 1.1494294643506069, "grad_norm": 0.9228208661079407, "learning_rate": 4.830237596461741e-05, "loss": 0.8842, "step": 31630 }, { "epoch": 1.149792862853405, "grad_norm": 1.0034486055374146, "learning_rate": 4.8300495819839486e-05, "loss": 0.1047, "step": 31640 }, { "epoch": 1.1501562613562033, "grad_norm": 1.125537395477295, "learning_rate": 4.82986146711261e-05, "loss": 0.1447, "step": 31650 }, { "epoch": 1.1505196598590013, "grad_norm": 2.6315014362335205, "learning_rate": 4.829673251855831e-05, "loss": 0.116, "step": 31660 }, { "epoch": 1.1508830583617995, "grad_norm": 0.8134027719497681, "learning_rate": 4.82948493622172e-05, "loss": 0.1889, "step": 31670 }, { "epoch": 1.1512464568645977, "grad_norm": 1.003691554069519, "learning_rate": 4.8292965202183916e-05, "loss": 0.1477, "step": 31680 }, { "epoch": 1.151609855367396, "grad_norm": 1.7551583051681519, "learning_rate": 4.829108003853964e-05, "loss": 0.1156, "step": 31690 }, { "epoch": 1.1519732538701941, "grad_norm": 1.5970351696014404, "learning_rate": 4.8289193871365594e-05, "loss": 0.3698, "step": 31700 }, { "epoch": 1.152336652372992, "grad_norm": 4.339359760284424, "learning_rate": 4.828730670074305e-05, "loss": 0.1259, "step": 31710 }, { "epoch": 1.1527000508757903, "grad_norm": 0.76881343126297, "learning_rate": 4.828541852675331e-05, "loss": 0.3217, "step": 31720 }, { "epoch": 1.1530634493785885, "grad_norm": 3.1044371128082275, "learning_rate": 4.828352934947774e-05, "loss": 0.13, "step": 31730 }, { "epoch": 1.1534268478813867, "grad_norm": 0.5273496508598328, "learning_rate": 4.828163916899774e-05, "loss": 0.1197, "step": 31740 }, { "epoch": 1.153790246384185, "grad_norm": 0.8816530108451843, "learning_rate": 4.827974798539473e-05, "loss": 0.1633, "step": 31750 }, { "epoch": 1.1541536448869831, "grad_norm": 1.220786452293396, "learning_rate": 4.827785579875022e-05, "loss": 0.1293, "step": 31760 }, { "epoch": 1.1545170433897813, "grad_norm": 2.700749397277832, "learning_rate": 4.827596260914572e-05, "loss": 0.1427, "step": 31770 }, { "epoch": 1.1548804418925793, "grad_norm": 1.6649949550628662, "learning_rate": 4.827406841666281e-05, "loss": 3.8743, "step": 31780 }, { "epoch": 1.1552438403953775, "grad_norm": 1.2381266355514526, "learning_rate": 4.827217322138311e-05, "loss": 0.1322, "step": 31790 }, { "epoch": 1.1556072388981757, "grad_norm": 0.6668787598609924, "learning_rate": 4.8270277023388255e-05, "loss": 0.1566, "step": 31800 }, { "epoch": 1.1556072388981757, "eval_loss": 0.35771968960762024, "eval_runtime": 257.3302, "eval_samples_per_second": 28.811, "eval_steps_per_second": 3.602, "eval_wer": 0.1810772051482201, "step": 31800 }, { "epoch": 1.155970637400974, "grad_norm": 1.1917558908462524, "learning_rate": 4.826837982275996e-05, "loss": 0.1274, "step": 31810 }, { "epoch": 1.1563340359037722, "grad_norm": 0.5347509980201721, "learning_rate": 4.8266481619579973e-05, "loss": 0.1872, "step": 31820 }, { "epoch": 1.1566974344065701, "grad_norm": 1.023681402206421, "learning_rate": 4.8264582413930076e-05, "loss": 0.1505, "step": 31830 }, { "epoch": 1.1570608329093683, "grad_norm": 1.00868821144104, "learning_rate": 4.82626822058921e-05, "loss": 0.1364, "step": 31840 }, { "epoch": 1.1574242314121665, "grad_norm": 1.4557231664657593, "learning_rate": 4.8260780995547905e-05, "loss": 0.1799, "step": 31850 }, { "epoch": 1.1577876299149648, "grad_norm": 1.1228946447372437, "learning_rate": 4.8258878782979434e-05, "loss": 0.1392, "step": 31860 }, { "epoch": 1.158151028417763, "grad_norm": 0.6818620562553406, "learning_rate": 4.825716593483377e-05, "loss": 0.1876, "step": 31870 }, { "epoch": 1.1585144269205612, "grad_norm": 1.4734445810317993, "learning_rate": 4.8255261818264976e-05, "loss": 0.1502, "step": 31880 }, { "epoch": 1.1588778254233594, "grad_norm": 0.8915801048278809, "learning_rate": 4.825335669970969e-05, "loss": 0.1207, "step": 31890 }, { "epoch": 1.1592412239261574, "grad_norm": 0.324372798204422, "learning_rate": 4.825145057925e-05, "loss": 0.1674, "step": 31900 }, { "epoch": 1.1596046224289556, "grad_norm": 1.1831437349319458, "learning_rate": 4.824954345696803e-05, "loss": 0.1192, "step": 31910 }, { "epoch": 1.1599680209317538, "grad_norm": 0.5911235809326172, "learning_rate": 4.824763533294596e-05, "loss": 0.1652, "step": 31920 }, { "epoch": 1.160331419434552, "grad_norm": 2.4116334915161133, "learning_rate": 4.8245726207265997e-05, "loss": 0.1297, "step": 31930 }, { "epoch": 1.1606948179373502, "grad_norm": 0.5179087519645691, "learning_rate": 4.8243816080010404e-05, "loss": 0.1066, "step": 31940 }, { "epoch": 1.1610582164401482, "grad_norm": 0.6537795066833496, "learning_rate": 4.824190495126148e-05, "loss": 0.1409, "step": 31950 }, { "epoch": 1.1614216149429464, "grad_norm": 1.0357365608215332, "learning_rate": 4.823999282110155e-05, "loss": 0.1146, "step": 31960 }, { "epoch": 1.1617850134457446, "grad_norm": 0.4709915220737457, "learning_rate": 4.823807968961303e-05, "loss": 0.1726, "step": 31970 }, { "epoch": 1.1621484119485428, "grad_norm": 0.9699262380599976, "learning_rate": 4.823616555687833e-05, "loss": 0.125, "step": 31980 }, { "epoch": 1.162511810451341, "grad_norm": 1.2052152156829834, "learning_rate": 4.8234250422979946e-05, "loss": 0.1164, "step": 31990 }, { "epoch": 1.1628752089541392, "grad_norm": 1.1892735958099365, "learning_rate": 4.823233428800037e-05, "loss": 0.1412, "step": 32000 }, { "epoch": 1.1632386074569372, "grad_norm": 0.7427589893341064, "learning_rate": 4.8230417152022165e-05, "loss": 0.1315, "step": 32010 }, { "epoch": 1.1636020059597354, "grad_norm": 0.7582072019577026, "learning_rate": 4.8228499015127945e-05, "loss": 0.1783, "step": 32020 }, { "epoch": 1.1639654044625336, "grad_norm": 1.1409790515899658, "learning_rate": 4.8226579877400345e-05, "loss": 0.1303, "step": 32030 }, { "epoch": 1.1643288029653318, "grad_norm": 1.1382596492767334, "learning_rate": 4.822465973892206e-05, "loss": 0.1426, "step": 32040 }, { "epoch": 1.16469220146813, "grad_norm": 1.27096688747406, "learning_rate": 4.822273859977583e-05, "loss": 0.1505, "step": 32050 }, { "epoch": 1.1650555999709282, "grad_norm": 1.5508397817611694, "learning_rate": 4.822081646004441e-05, "loss": 0.1366, "step": 32060 }, { "epoch": 1.1654189984737262, "grad_norm": 0.2970573604106903, "learning_rate": 4.821889331981063e-05, "loss": 0.1505, "step": 32070 }, { "epoch": 1.1657823969765244, "grad_norm": 0.9228662848472595, "learning_rate": 4.821696917915736e-05, "loss": 0.1112, "step": 32080 }, { "epoch": 1.1661457954793226, "grad_norm": 0.770660936832428, "learning_rate": 4.821504403816748e-05, "loss": 0.1284, "step": 32090 }, { "epoch": 1.1665091939821208, "grad_norm": 0.8875694274902344, "learning_rate": 4.8213117896923954e-05, "loss": 0.1421, "step": 32100 }, { "epoch": 1.166872592484919, "grad_norm": 0.9725656509399414, "learning_rate": 4.821119075550978e-05, "loss": 0.1269, "step": 32110 }, { "epoch": 1.167235990987717, "grad_norm": 0.5882539749145508, "learning_rate": 4.820926261400797e-05, "loss": 0.159, "step": 32120 }, { "epoch": 1.1675993894905152, "grad_norm": 0.9551408886909485, "learning_rate": 4.820733347250162e-05, "loss": 0.1788, "step": 32130 }, { "epoch": 1.1679627879933134, "grad_norm": 0.929642915725708, "learning_rate": 4.820540333107384e-05, "loss": 0.1329, "step": 32140 }, { "epoch": 1.1683261864961116, "grad_norm": 1.707643747329712, "learning_rate": 4.8203472189807795e-05, "loss": 0.1317, "step": 32150 }, { "epoch": 1.1686895849989098, "grad_norm": 1.401150107383728, "learning_rate": 4.82015400487867e-05, "loss": 0.126, "step": 32160 }, { "epoch": 1.169052983501708, "grad_norm": 0.7058550715446472, "learning_rate": 4.8199606908093785e-05, "loss": 0.1811, "step": 32170 }, { "epoch": 1.1694163820045063, "grad_norm": 1.2024914026260376, "learning_rate": 4.8197672767812366e-05, "loss": 0.1524, "step": 32180 }, { "epoch": 1.1697797805073042, "grad_norm": 0.8119955062866211, "learning_rate": 4.819573762802575e-05, "loss": 0.1281, "step": 32190 }, { "epoch": 1.1701431790101025, "grad_norm": 0.8531884551048279, "learning_rate": 4.8193801488817336e-05, "loss": 0.1362, "step": 32200 }, { "epoch": 1.1705065775129007, "grad_norm": 0.7170140743255615, "learning_rate": 4.819186435027054e-05, "loss": 0.1276, "step": 32210 }, { "epoch": 1.1708699760156989, "grad_norm": 1.7031468152999878, "learning_rate": 4.8189926212468825e-05, "loss": 0.1369, "step": 32220 }, { "epoch": 1.171233374518497, "grad_norm": 1.639916181564331, "learning_rate": 4.81879870754957e-05, "loss": 0.1318, "step": 32230 }, { "epoch": 1.171596773021295, "grad_norm": 1.0148886442184448, "learning_rate": 4.8186046939434716e-05, "loss": 0.119, "step": 32240 }, { "epoch": 1.1719601715240933, "grad_norm": 1.4640549421310425, "learning_rate": 4.818410580436947e-05, "loss": 0.1603, "step": 32250 }, { "epoch": 1.1723235700268915, "grad_norm": 1.0362626314163208, "learning_rate": 4.818216367038358e-05, "loss": 0.1318, "step": 32260 }, { "epoch": 1.1726869685296897, "grad_norm": 0.507990837097168, "learning_rate": 4.818022053756076e-05, "loss": 0.1681, "step": 32270 }, { "epoch": 1.1730503670324879, "grad_norm": 0.7118284106254578, "learning_rate": 4.81782764059847e-05, "loss": 0.1129, "step": 32280 }, { "epoch": 1.173413765535286, "grad_norm": 2.637918472290039, "learning_rate": 4.8176331275739175e-05, "loss": 0.1866, "step": 32290 }, { "epoch": 1.173777164038084, "grad_norm": 1.5417594909667969, "learning_rate": 4.817438514690801e-05, "loss": 0.1382, "step": 32300 }, { "epoch": 1.1741405625408823, "grad_norm": 1.4842432737350464, "learning_rate": 4.817243801957503e-05, "loss": 0.1381, "step": 32310 }, { "epoch": 1.1745039610436805, "grad_norm": 2.0502350330352783, "learning_rate": 4.817048989382415e-05, "loss": 0.1515, "step": 32320 }, { "epoch": 1.1748673595464787, "grad_norm": 1.8963838815689087, "learning_rate": 4.81685407697393e-05, "loss": 0.1453, "step": 32330 }, { "epoch": 1.175230758049277, "grad_norm": 0.6867222785949707, "learning_rate": 4.8166590647404466e-05, "loss": 0.1566, "step": 32340 }, { "epoch": 1.175594156552075, "grad_norm": 1.4324911832809448, "learning_rate": 4.8164639526903665e-05, "loss": 0.1261, "step": 32350 }, { "epoch": 1.175957555054873, "grad_norm": 4.706410884857178, "learning_rate": 4.8162687408320963e-05, "loss": 0.1168, "step": 32360 }, { "epoch": 1.1763209535576713, "grad_norm": 0.6849080324172974, "learning_rate": 4.8160734291740476e-05, "loss": 0.1587, "step": 32370 }, { "epoch": 1.1766843520604695, "grad_norm": 3.180955171585083, "learning_rate": 4.815878017724636e-05, "loss": 0.1312, "step": 32380 }, { "epoch": 1.1770477505632677, "grad_norm": 0.5583860278129578, "learning_rate": 4.81568250649228e-05, "loss": 0.1385, "step": 32390 }, { "epoch": 1.177411149066066, "grad_norm": 0.8250964283943176, "learning_rate": 4.8154868954854036e-05, "loss": 0.1393, "step": 32400 }, { "epoch": 1.177411149066066, "eval_loss": 0.38513997197151184, "eval_runtime": 179.4965, "eval_samples_per_second": 41.304, "eval_steps_per_second": 5.164, "eval_wer": 0.18090475066712655, "step": 32400 }, { "epoch": 1.177774547568864, "grad_norm": 2.3377466201782227, "learning_rate": 4.815291184712437e-05, "loss": 0.1197, "step": 32410 }, { "epoch": 1.1781379460716621, "grad_norm": 0.7508591413497925, "learning_rate": 4.81509537418181e-05, "loss": 0.1786, "step": 32420 }, { "epoch": 1.1785013445744603, "grad_norm": 0.8103131651878357, "learning_rate": 4.81489946390196e-05, "loss": 0.1617, "step": 32430 }, { "epoch": 1.1788647430772585, "grad_norm": 1.2582241296768188, "learning_rate": 4.814703453881329e-05, "loss": 0.1326, "step": 32440 }, { "epoch": 1.1792281415800567, "grad_norm": 1.110107660293579, "learning_rate": 4.8145073441283613e-05, "loss": 0.1504, "step": 32450 }, { "epoch": 1.179591540082855, "grad_norm": 0.9912093281745911, "learning_rate": 4.814311134651509e-05, "loss": 0.133, "step": 32460 }, { "epoch": 1.1799549385856531, "grad_norm": 1.1711434125900269, "learning_rate": 4.814114825459223e-05, "loss": 0.1328, "step": 32470 }, { "epoch": 1.1803183370884511, "grad_norm": 3.884737491607666, "learning_rate": 4.813918416559963e-05, "loss": 0.1225, "step": 32480 }, { "epoch": 1.1806817355912493, "grad_norm": 0.9459224939346313, "learning_rate": 4.8137219079621906e-05, "loss": 0.1507, "step": 32490 }, { "epoch": 1.1810451340940475, "grad_norm": 1.7159967422485352, "learning_rate": 4.813525299674374e-05, "loss": 0.1823, "step": 32500 }, { "epoch": 1.1814085325968458, "grad_norm": 1.3824647665023804, "learning_rate": 4.8133285917049844e-05, "loss": 0.1393, "step": 32510 }, { "epoch": 1.181771931099644, "grad_norm": 214.12107849121094, "learning_rate": 4.813131784062496e-05, "loss": 4.0762, "step": 32520 }, { "epoch": 1.182135329602442, "grad_norm": 2.5384116172790527, "learning_rate": 4.812934876755389e-05, "loss": 0.1379, "step": 32530 }, { "epoch": 1.1824987281052401, "grad_norm": 1.4254207611083984, "learning_rate": 4.812737869792148e-05, "loss": 0.1461, "step": 32540 }, { "epoch": 1.1828621266080384, "grad_norm": 1.571662187576294, "learning_rate": 4.812540763181261e-05, "loss": 0.178, "step": 32550 }, { "epoch": 1.1832255251108366, "grad_norm": 5.712926864624023, "learning_rate": 4.8123435569312206e-05, "loss": 0.1071, "step": 32560 }, { "epoch": 1.1835889236136348, "grad_norm": 0.8147953152656555, "learning_rate": 4.812146251050523e-05, "loss": 0.1211, "step": 32570 }, { "epoch": 1.183952322116433, "grad_norm": 1.1877583265304565, "learning_rate": 4.8119488455476714e-05, "loss": 0.1668, "step": 32580 }, { "epoch": 1.184315720619231, "grad_norm": 0.7466074824333191, "learning_rate": 4.8117513404311686e-05, "loss": 0.098, "step": 32590 }, { "epoch": 1.1846791191220292, "grad_norm": 1.6904805898666382, "learning_rate": 4.8115537357095265e-05, "loss": 0.1626, "step": 32600 }, { "epoch": 1.1850425176248274, "grad_norm": 0.879503607749939, "learning_rate": 4.811356031391259e-05, "loss": 0.1129, "step": 32610 }, { "epoch": 1.1854059161276256, "grad_norm": 2.447317600250244, "learning_rate": 4.811158227484883e-05, "loss": 0.1255, "step": 32620 }, { "epoch": 1.1857693146304238, "grad_norm": 0.9513424038887024, "learning_rate": 4.810960323998922e-05, "loss": 0.1347, "step": 32630 }, { "epoch": 1.186132713133222, "grad_norm": 0.46179428696632385, "learning_rate": 4.810762320941903e-05, "loss": 0.1002, "step": 32640 }, { "epoch": 1.18649611163602, "grad_norm": 0.7595782279968262, "learning_rate": 4.8105642183223585e-05, "loss": 0.1585, "step": 32650 }, { "epoch": 1.1868595101388182, "grad_norm": 1.8892844915390015, "learning_rate": 4.8103660161488216e-05, "loss": 0.1475, "step": 32660 }, { "epoch": 1.1872229086416164, "grad_norm": 3.290606737136841, "learning_rate": 4.810167714429834e-05, "loss": 0.1603, "step": 32670 }, { "epoch": 1.1875863071444146, "grad_norm": 1.3222955465316772, "learning_rate": 4.809969313173939e-05, "loss": 0.1251, "step": 32680 }, { "epoch": 1.1879497056472128, "grad_norm": 1.8568757772445679, "learning_rate": 4.809770812389686e-05, "loss": 0.1517, "step": 32690 }, { "epoch": 1.1883131041500108, "grad_norm": 1.3318365812301636, "learning_rate": 4.8095722120856255e-05, "loss": 0.1778, "step": 32700 }, { "epoch": 1.188676502652809, "grad_norm": 1.513069748878479, "learning_rate": 4.8093735122703164e-05, "loss": 0.1325, "step": 32710 }, { "epoch": 1.1890399011556072, "grad_norm": 0.37486693263053894, "learning_rate": 4.809174712952319e-05, "loss": 0.1482, "step": 32720 }, { "epoch": 1.1894032996584054, "grad_norm": 3.7855522632598877, "learning_rate": 4.8089758141402e-05, "loss": 0.1237, "step": 32730 }, { "epoch": 1.1897666981612036, "grad_norm": 0.6902849674224854, "learning_rate": 4.8087768158425285e-05, "loss": 0.1099, "step": 32740 }, { "epoch": 1.1901300966640018, "grad_norm": 0.6842343211174011, "learning_rate": 4.808577718067878e-05, "loss": 0.17, "step": 32750 }, { "epoch": 1.1904934951668, "grad_norm": 0.9745518565177917, "learning_rate": 4.808378520824829e-05, "loss": 0.1446, "step": 32760 }, { "epoch": 1.190856893669598, "grad_norm": 1.468474268913269, "learning_rate": 4.808179224121962e-05, "loss": 0.1563, "step": 32770 }, { "epoch": 1.1912202921723962, "grad_norm": 1.6509790420532227, "learning_rate": 4.807979827967864e-05, "loss": 0.1416, "step": 32780 }, { "epoch": 1.1915836906751944, "grad_norm": 0.9928446412086487, "learning_rate": 4.8077803323711277e-05, "loss": 1.6808, "step": 32790 }, { "epoch": 1.1919470891779926, "grad_norm": 3.463270425796509, "learning_rate": 4.807580737340348e-05, "loss": 0.1462, "step": 32800 }, { "epoch": 1.1923104876807908, "grad_norm": 1.0357753038406372, "learning_rate": 4.807381042884125e-05, "loss": 0.1475, "step": 32810 }, { "epoch": 1.1926738861835888, "grad_norm": 0.6824864745140076, "learning_rate": 4.807181249011062e-05, "loss": 0.1884, "step": 32820 }, { "epoch": 1.193037284686387, "grad_norm": 8.779791831970215, "learning_rate": 4.8069813557297685e-05, "loss": 0.128, "step": 32830 }, { "epoch": 1.1934006831891852, "grad_norm": 1.07723867893219, "learning_rate": 4.806781363048856e-05, "loss": 0.1433, "step": 32840 }, { "epoch": 1.1937640816919834, "grad_norm": 1.9113037586212158, "learning_rate": 4.806581270976942e-05, "loss": 0.1575, "step": 32850 }, { "epoch": 1.1941274801947817, "grad_norm": 1.2443821430206299, "learning_rate": 4.806381079522648e-05, "loss": 0.1585, "step": 32860 }, { "epoch": 1.1944908786975799, "grad_norm": 0.46389827132225037, "learning_rate": 4.8061807886946e-05, "loss": 0.2578, "step": 32870 }, { "epoch": 1.1948542772003778, "grad_norm": 0.9189543128013611, "learning_rate": 4.8059803985014274e-05, "loss": 0.125, "step": 32880 }, { "epoch": 1.195217675703176, "grad_norm": 0.8623115420341492, "learning_rate": 4.805779908951763e-05, "loss": 0.1094, "step": 32890 }, { "epoch": 1.1955810742059743, "grad_norm": 0.5328871607780457, "learning_rate": 4.805579320054247e-05, "loss": 0.142, "step": 32900 }, { "epoch": 1.1959444727087725, "grad_norm": 0.8360912799835205, "learning_rate": 4.805378631817522e-05, "loss": 0.1465, "step": 32910 }, { "epoch": 1.1963078712115707, "grad_norm": 0.4089026153087616, "learning_rate": 4.805177844250234e-05, "loss": 0.1779, "step": 32920 }, { "epoch": 1.1966712697143689, "grad_norm": 1.4934437274932861, "learning_rate": 4.8049769573610336e-05, "loss": 0.1577, "step": 32930 }, { "epoch": 1.1970346682171669, "grad_norm": 0.991147518157959, "learning_rate": 4.8047759711585784e-05, "loss": 0.1302, "step": 32940 }, { "epoch": 1.197398066719965, "grad_norm": 4.548572540283203, "learning_rate": 4.804574885651526e-05, "loss": 0.1184, "step": 32950 }, { "epoch": 1.1977614652227633, "grad_norm": 1.7906454801559448, "learning_rate": 4.8043737008485424e-05, "loss": 0.138, "step": 32960 }, { "epoch": 1.1981248637255615, "grad_norm": 0.4827491044998169, "learning_rate": 4.804172416758294e-05, "loss": 0.1573, "step": 32970 }, { "epoch": 1.1984882622283597, "grad_norm": 0.8055851459503174, "learning_rate": 4.803971033389455e-05, "loss": 0.1203, "step": 32980 }, { "epoch": 1.1988516607311577, "grad_norm": 0.7492426633834839, "learning_rate": 4.8037695507507016e-05, "loss": 0.1158, "step": 32990 }, { "epoch": 1.1992150592339559, "grad_norm": 0.8737430572509766, "learning_rate": 4.8035679688507154e-05, "loss": 0.1672, "step": 33000 }, { "epoch": 1.1992150592339559, "eval_loss": 0.36745160818099976, "eval_runtime": 180.2517, "eval_samples_per_second": 41.131, "eval_steps_per_second": 5.143, "eval_wer": 0.17665692450124348, "step": 33000 }, { "epoch": 1.199578457736754, "grad_norm": 0.9123022556304932, "learning_rate": 4.803366287698182e-05, "loss": 0.1182, "step": 33010 }, { "epoch": 1.1999418562395523, "grad_norm": 0.5147042870521545, "learning_rate": 4.803164507301789e-05, "loss": 0.1293, "step": 33020 }, { "epoch": 1.2003052547423505, "grad_norm": 2.508376359939575, "learning_rate": 4.8029626276702336e-05, "loss": 0.1518, "step": 33030 }, { "epoch": 1.2006686532451487, "grad_norm": 1.3006081581115723, "learning_rate": 4.802760648812213e-05, "loss": 0.1503, "step": 33040 }, { "epoch": 1.201032051747947, "grad_norm": 1.490337610244751, "learning_rate": 4.802558570736427e-05, "loss": 0.1589, "step": 33050 }, { "epoch": 1.201395450250745, "grad_norm": 0.6895734667778015, "learning_rate": 4.802356393451587e-05, "loss": 0.137, "step": 33060 }, { "epoch": 1.201758848753543, "grad_norm": 0.45895853638648987, "learning_rate": 4.8021541169664006e-05, "loss": 0.2112, "step": 33070 }, { "epoch": 1.2021222472563413, "grad_norm": 1.6609526872634888, "learning_rate": 4.801951741289585e-05, "loss": 0.1392, "step": 33080 }, { "epoch": 1.2024856457591395, "grad_norm": 1.1131823062896729, "learning_rate": 4.801749266429858e-05, "loss": 0.124, "step": 33090 }, { "epoch": 1.2028490442619377, "grad_norm": 0.547478973865509, "learning_rate": 4.8015466923959465e-05, "loss": 0.166, "step": 33100 }, { "epoch": 1.2032124427647357, "grad_norm": 0.778753936290741, "learning_rate": 4.801344019196576e-05, "loss": 0.1414, "step": 33110 }, { "epoch": 1.203575841267534, "grad_norm": 1.1527098417282104, "learning_rate": 4.801141246840481e-05, "loss": 0.1719, "step": 33120 }, { "epoch": 1.2039392397703321, "grad_norm": 0.9628286361694336, "learning_rate": 4.800938375336395e-05, "loss": 0.1168, "step": 33130 }, { "epoch": 1.2043026382731303, "grad_norm": 1.7359286546707153, "learning_rate": 4.8007354046930624e-05, "loss": 0.1145, "step": 33140 }, { "epoch": 1.2046660367759285, "grad_norm": 0.8443882465362549, "learning_rate": 4.8005323349192276e-05, "loss": 0.138, "step": 33150 }, { "epoch": 1.2050294352787267, "grad_norm": 1.166198968887329, "learning_rate": 4.8003291660236396e-05, "loss": 0.164, "step": 33160 }, { "epoch": 1.2053928337815247, "grad_norm": 0.42992278933525085, "learning_rate": 4.800125898015052e-05, "loss": 0.1786, "step": 33170 }, { "epoch": 1.205756232284323, "grad_norm": 0.7348678112030029, "learning_rate": 4.799922530902223e-05, "loss": 0.1175, "step": 33180 }, { "epoch": 1.2061196307871211, "grad_norm": 1.4282450675964355, "learning_rate": 4.799719064693917e-05, "loss": 0.1397, "step": 33190 }, { "epoch": 1.2064830292899194, "grad_norm": 0.9985376596450806, "learning_rate": 4.7995154993988974e-05, "loss": 0.1382, "step": 33200 }, { "epoch": 1.2068464277927176, "grad_norm": 0.7168998718261719, "learning_rate": 4.799311835025937e-05, "loss": 0.1123, "step": 33210 }, { "epoch": 1.2072098262955158, "grad_norm": 0.521123468875885, "learning_rate": 4.799108071583811e-05, "loss": 0.1753, "step": 33220 }, { "epoch": 1.2075732247983137, "grad_norm": 1.0951159000396729, "learning_rate": 4.7989042090812976e-05, "loss": 0.1182, "step": 33230 }, { "epoch": 1.207936623301112, "grad_norm": 1.108727216720581, "learning_rate": 4.798700247527182e-05, "loss": 1.4247, "step": 33240 }, { "epoch": 1.2083000218039102, "grad_norm": 0.4534373879432678, "learning_rate": 4.7984961869302516e-05, "loss": 0.1715, "step": 33250 }, { "epoch": 1.2086634203067084, "grad_norm": 0.6849185824394226, "learning_rate": 4.798292027299298e-05, "loss": 0.1367, "step": 33260 }, { "epoch": 1.2090268188095066, "grad_norm": 0.8563576340675354, "learning_rate": 4.7980877686431195e-05, "loss": 0.2058, "step": 33270 }, { "epoch": 1.2093902173123046, "grad_norm": 0.5488440990447998, "learning_rate": 4.797883410970514e-05, "loss": 0.1246, "step": 33280 }, { "epoch": 1.2097536158151028, "grad_norm": 0.5783109068870544, "learning_rate": 4.7976789542902895e-05, "loss": 0.1135, "step": 33290 }, { "epoch": 1.210117014317901, "grad_norm": 2.218514919281006, "learning_rate": 4.7974743986112536e-05, "loss": 0.8269, "step": 33300 }, { "epoch": 1.2104804128206992, "grad_norm": 1.6320664882659912, "learning_rate": 4.79726974394222e-05, "loss": 0.1185, "step": 33310 }, { "epoch": 1.2108438113234974, "grad_norm": 1.287618637084961, "learning_rate": 4.797064990292007e-05, "loss": 0.1815, "step": 33320 }, { "epoch": 1.2112072098262956, "grad_norm": 2.3232581615448, "learning_rate": 4.796860137669437e-05, "loss": 0.1285, "step": 33330 }, { "epoch": 1.2115706083290938, "grad_norm": 1.2804290056228638, "learning_rate": 4.796655186083335e-05, "loss": 0.1339, "step": 33340 }, { "epoch": 1.2119340068318918, "grad_norm": 0.6492500901222229, "learning_rate": 4.796450135542534e-05, "loss": 0.1278, "step": 33350 }, { "epoch": 1.21229740533469, "grad_norm": 1.7094756364822388, "learning_rate": 4.796244986055867e-05, "loss": 0.1337, "step": 33360 }, { "epoch": 1.2126608038374882, "grad_norm": 1.5763776302337646, "learning_rate": 4.796039737632173e-05, "loss": 0.2283, "step": 33370 }, { "epoch": 1.2130242023402864, "grad_norm": 0.631926417350769, "learning_rate": 4.795834390280296e-05, "loss": 0.2165, "step": 33380 }, { "epoch": 1.2133876008430846, "grad_norm": 1.4329982995986938, "learning_rate": 4.795628944009084e-05, "loss": 0.1255, "step": 33390 }, { "epoch": 1.2137509993458826, "grad_norm": 0.5400133728981018, "learning_rate": 4.795423398827389e-05, "loss": 0.1361, "step": 33400 }, { "epoch": 1.2141143978486808, "grad_norm": 0.8651421070098877, "learning_rate": 4.795217754744067e-05, "loss": 0.1336, "step": 33410 }, { "epoch": 1.214477796351479, "grad_norm": 0.32640397548675537, "learning_rate": 4.795012011767977e-05, "loss": 0.208, "step": 33420 }, { "epoch": 1.2148411948542772, "grad_norm": 2.425781726837158, "learning_rate": 4.794806169907987e-05, "loss": 0.1107, "step": 33430 }, { "epoch": 1.2152045933570754, "grad_norm": 1.9098165035247803, "learning_rate": 4.794600229172963e-05, "loss": 0.1087, "step": 33440 }, { "epoch": 1.2155679918598736, "grad_norm": 1.4842039346694946, "learning_rate": 4.794394189571779e-05, "loss": 0.141, "step": 33450 }, { "epoch": 1.2159313903626716, "grad_norm": 1.6379314661026, "learning_rate": 4.794188051113313e-05, "loss": 0.1382, "step": 33460 }, { "epoch": 1.2162947888654698, "grad_norm": 1.6831467151641846, "learning_rate": 4.7939818138064474e-05, "loss": 0.1564, "step": 33470 }, { "epoch": 1.216658187368268, "grad_norm": 0.4303675889968872, "learning_rate": 4.793775477660067e-05, "loss": 0.1153, "step": 33480 }, { "epoch": 1.2170215858710662, "grad_norm": 0.5871365070343018, "learning_rate": 4.7935690426830624e-05, "loss": 0.1122, "step": 33490 }, { "epoch": 1.2173849843738644, "grad_norm": 0.7488551735877991, "learning_rate": 4.7933625088843287e-05, "loss": 1.8561, "step": 33500 }, { "epoch": 1.2177483828766626, "grad_norm": 1.4515953063964844, "learning_rate": 4.793155876272764e-05, "loss": 0.127, "step": 33510 }, { "epoch": 1.2181117813794606, "grad_norm": 0.9288650155067444, "learning_rate": 4.7929491448572716e-05, "loss": 0.2149, "step": 33520 }, { "epoch": 1.2184751798822588, "grad_norm": 1.544545292854309, "learning_rate": 4.792742314646759e-05, "loss": 0.6921, "step": 33530 }, { "epoch": 1.218838578385057, "grad_norm": 1.1275858879089355, "learning_rate": 4.792535385650138e-05, "loss": 0.1592, "step": 33540 }, { "epoch": 1.2192019768878553, "grad_norm": 0.7861330509185791, "learning_rate": 4.7923283578763236e-05, "loss": 0.156, "step": 33550 }, { "epoch": 1.2195653753906535, "grad_norm": 1.7547698020935059, "learning_rate": 4.792121231334237e-05, "loss": 0.135, "step": 33560 }, { "epoch": 1.2199287738934514, "grad_norm": 0.9989791512489319, "learning_rate": 4.7919140060328014e-05, "loss": 0.2015, "step": 33570 }, { "epoch": 1.2202921723962497, "grad_norm": 0.8089576959609985, "learning_rate": 4.791706681980945e-05, "loss": 2.7874, "step": 33580 }, { "epoch": 1.2206555708990479, "grad_norm": 1.2729178667068481, "learning_rate": 4.791499259187603e-05, "loss": 0.1749, "step": 33590 }, { "epoch": 1.221018969401846, "grad_norm": 1.6203336715698242, "learning_rate": 4.7912917376617106e-05, "loss": 0.1524, "step": 33600 }, { "epoch": 1.221018969401846, "eval_loss": 0.3562403917312622, "eval_runtime": 180.3906, "eval_samples_per_second": 41.1, "eval_steps_per_second": 5.139, "eval_wer": 0.1815582623849547, "step": 33600 }, { "epoch": 1.2213823679046443, "grad_norm": 1.8868520259857178, "learning_rate": 4.7910841174122104e-05, "loss": 0.1514, "step": 33610 }, { "epoch": 1.2217457664074425, "grad_norm": 1.3601691722869873, "learning_rate": 4.7908763984480465e-05, "loss": 0.1675, "step": 33620 }, { "epoch": 1.2221091649102407, "grad_norm": 1.2268040180206299, "learning_rate": 4.790668580778169e-05, "loss": 0.1363, "step": 33630 }, { "epoch": 1.2224725634130387, "grad_norm": 1.918747901916504, "learning_rate": 4.790460664411534e-05, "loss": 0.1397, "step": 33640 }, { "epoch": 1.2228359619158369, "grad_norm": 0.6259877681732178, "learning_rate": 4.790252649357098e-05, "loss": 0.1555, "step": 33650 }, { "epoch": 1.223199360418635, "grad_norm": 2.5940511226654053, "learning_rate": 4.7900445356238235e-05, "loss": 0.1508, "step": 33660 }, { "epoch": 1.2235627589214333, "grad_norm": 1.1692243814468384, "learning_rate": 4.7898363232206785e-05, "loss": 0.1642, "step": 33670 }, { "epoch": 1.2239261574242315, "grad_norm": 1.459763526916504, "learning_rate": 4.789628012156633e-05, "loss": 0.1325, "step": 33680 }, { "epoch": 1.2242895559270295, "grad_norm": 0.4898362159729004, "learning_rate": 4.789419602440663e-05, "loss": 0.1604, "step": 33690 }, { "epoch": 1.2246529544298277, "grad_norm": 1.6771429777145386, "learning_rate": 4.7892110940817495e-05, "loss": 0.217, "step": 33700 }, { "epoch": 1.225016352932626, "grad_norm": 1.0040748119354248, "learning_rate": 4.789002487088874e-05, "loss": 0.1428, "step": 33710 }, { "epoch": 1.225379751435424, "grad_norm": 0.5210689306259155, "learning_rate": 4.788793781471025e-05, "loss": 0.1777, "step": 33720 }, { "epoch": 1.2257431499382223, "grad_norm": 2.0783729553222656, "learning_rate": 4.788584977237196e-05, "loss": 0.1373, "step": 33730 }, { "epoch": 1.2261065484410205, "grad_norm": 0.8238822221755981, "learning_rate": 4.788376074396384e-05, "loss": 0.1246, "step": 33740 }, { "epoch": 1.2264699469438185, "grad_norm": 1.1031908988952637, "learning_rate": 4.7881670729575875e-05, "loss": 0.1488, "step": 33750 }, { "epoch": 1.2268333454466167, "grad_norm": 1.3136149644851685, "learning_rate": 4.787957972929814e-05, "loss": 0.1382, "step": 33760 }, { "epoch": 1.227196743949415, "grad_norm": 0.9418723583221436, "learning_rate": 4.7877487743220726e-05, "loss": 0.1531, "step": 33770 }, { "epoch": 1.2275601424522131, "grad_norm": 1.3498002290725708, "learning_rate": 4.7875394771433755e-05, "loss": 0.1345, "step": 33780 }, { "epoch": 1.2279235409550113, "grad_norm": 1.0489355325698853, "learning_rate": 4.7873300814027415e-05, "loss": 0.1522, "step": 33790 }, { "epoch": 1.2282869394578095, "grad_norm": 1.1034955978393555, "learning_rate": 4.7871205871091926e-05, "loss": 0.1721, "step": 33800 }, { "epoch": 1.2286503379606075, "grad_norm": 1.1162317991256714, "learning_rate": 4.786910994271756e-05, "loss": 0.1774, "step": 33810 }, { "epoch": 1.2290137364634057, "grad_norm": 0.6511724591255188, "learning_rate": 4.786701302899461e-05, "loss": 0.1491, "step": 33820 }, { "epoch": 1.229377134966204, "grad_norm": 0.730034589767456, "learning_rate": 4.786491513001343e-05, "loss": 0.1304, "step": 33830 }, { "epoch": 1.2297405334690021, "grad_norm": 0.3531613051891327, "learning_rate": 4.786281624586441e-05, "loss": 0.1022, "step": 33840 }, { "epoch": 1.2301039319718003, "grad_norm": 0.8404261469841003, "learning_rate": 4.786071637663798e-05, "loss": 0.1366, "step": 33850 }, { "epoch": 1.2304673304745983, "grad_norm": 1.0911661386489868, "learning_rate": 4.785861552242462e-05, "loss": 0.1132, "step": 33860 }, { "epoch": 1.2308307289773965, "grad_norm": 0.9053283333778381, "learning_rate": 4.785651368331485e-05, "loss": 0.1393, "step": 33870 }, { "epoch": 1.2311941274801947, "grad_norm": 1.065520167350769, "learning_rate": 4.7854410859399236e-05, "loss": 0.1277, "step": 33880 }, { "epoch": 1.231557525982993, "grad_norm": 0.3727855384349823, "learning_rate": 4.785230705076837e-05, "loss": 0.1213, "step": 33890 }, { "epoch": 1.2319209244857912, "grad_norm": 1.7203010320663452, "learning_rate": 4.78502022575129e-05, "loss": 0.1735, "step": 33900 }, { "epoch": 1.2322843229885894, "grad_norm": 0.7186889052391052, "learning_rate": 4.7848096479723516e-05, "loss": 0.1195, "step": 33910 }, { "epoch": 1.2326477214913876, "grad_norm": 1.0675809383392334, "learning_rate": 4.784598971749095e-05, "loss": 0.1699, "step": 33920 }, { "epoch": 1.2330111199941856, "grad_norm": 1.6882377862930298, "learning_rate": 4.784388197090597e-05, "loss": 0.1275, "step": 33930 }, { "epoch": 1.2333745184969838, "grad_norm": 0.4500318765640259, "learning_rate": 4.78417732400594e-05, "loss": 0.1248, "step": 33940 }, { "epoch": 1.233737916999782, "grad_norm": 1.0862751007080078, "learning_rate": 4.783966352504209e-05, "loss": 0.1585, "step": 33950 }, { "epoch": 1.2341013155025802, "grad_norm": 0.9130736589431763, "learning_rate": 4.7837552825944943e-05, "loss": 0.1489, "step": 33960 }, { "epoch": 1.2344647140053784, "grad_norm": 0.47646433115005493, "learning_rate": 4.783544114285891e-05, "loss": 0.135, "step": 33970 }, { "epoch": 1.2348281125081764, "grad_norm": 0.7090937495231628, "learning_rate": 4.783332847587495e-05, "loss": 0.1231, "step": 33980 }, { "epoch": 1.2351915110109746, "grad_norm": 2.1009280681610107, "learning_rate": 4.7831214825084117e-05, "loss": 0.1239, "step": 33990 }, { "epoch": 1.2355549095137728, "grad_norm": 0.6040928363800049, "learning_rate": 4.782910019057747e-05, "loss": 0.1757, "step": 34000 }, { "epoch": 1.235918308016571, "grad_norm": 3.8224098682403564, "learning_rate": 4.782698457244612e-05, "loss": 0.1201, "step": 34010 }, { "epoch": 1.2362817065193692, "grad_norm": 0.4506910741329193, "learning_rate": 4.782486797078122e-05, "loss": 0.1381, "step": 34020 }, { "epoch": 1.2366451050221674, "grad_norm": 1.0445079803466797, "learning_rate": 4.782275038567398e-05, "loss": 0.1386, "step": 34030 }, { "epoch": 1.2370085035249654, "grad_norm": 1.1579469442367554, "learning_rate": 4.7820631817215625e-05, "loss": 0.1221, "step": 34040 }, { "epoch": 1.2373719020277636, "grad_norm": 1.023468017578125, "learning_rate": 4.781851226549743e-05, "loss": 0.1524, "step": 34050 }, { "epoch": 1.2377353005305618, "grad_norm": 1.0542868375778198, "learning_rate": 4.781639173061074e-05, "loss": 0.1268, "step": 34060 }, { "epoch": 1.23809869903336, "grad_norm": 0.7573347687721252, "learning_rate": 4.7814270212646915e-05, "loss": 0.2058, "step": 34070 }, { "epoch": 1.2384620975361582, "grad_norm": 1.2218323945999146, "learning_rate": 4.781214771169736e-05, "loss": 0.1141, "step": 34080 }, { "epoch": 1.2388254960389564, "grad_norm": 0.7725077867507935, "learning_rate": 4.781002422785352e-05, "loss": 0.1221, "step": 34090 }, { "epoch": 1.2391888945417544, "grad_norm": 2.2234578132629395, "learning_rate": 4.78078997612069e-05, "loss": 1.9034, "step": 34100 }, { "epoch": 1.2395522930445526, "grad_norm": 1.260764718055725, "learning_rate": 4.780577431184902e-05, "loss": 0.1205, "step": 34110 }, { "epoch": 1.2399156915473508, "grad_norm": 0.5173097252845764, "learning_rate": 4.780364787987148e-05, "loss": 0.2101, "step": 34120 }, { "epoch": 1.240279090050149, "grad_norm": 0.9755317568778992, "learning_rate": 4.780152046536588e-05, "loss": 0.1041, "step": 34130 }, { "epoch": 1.2406424885529472, "grad_norm": 1.4319573640823364, "learning_rate": 4.77993920684239e-05, "loss": 0.1382, "step": 34140 }, { "epoch": 1.2410058870557452, "grad_norm": 0.8623887896537781, "learning_rate": 4.7797262689137224e-05, "loss": 0.1646, "step": 34150 }, { "epoch": 1.2413692855585434, "grad_norm": 1.1775789260864258, "learning_rate": 4.779513232759762e-05, "loss": 0.124, "step": 34160 }, { "epoch": 1.2417326840613416, "grad_norm": 23.601593017578125, "learning_rate": 4.779300098389687e-05, "loss": 0.304, "step": 34170 }, { "epoch": 1.2420960825641398, "grad_norm": 0.9336787462234497, "learning_rate": 4.77908686581268e-05, "loss": 0.1376, "step": 34180 }, { "epoch": 1.242459481066938, "grad_norm": 0.7417952418327332, "learning_rate": 4.77887353503793e-05, "loss": 0.1208, "step": 34190 }, { "epoch": 1.2428228795697362, "grad_norm": 1.48567795753479, "learning_rate": 4.778660106074626e-05, "loss": 0.1198, "step": 34200 }, { "epoch": 1.2428228795697362, "eval_loss": 0.3608033359050751, "eval_runtime": 180.8757, "eval_samples_per_second": 40.989, "eval_steps_per_second": 5.125, "eval_wer": 0.18227530996423838, "step": 34200 }, { "epoch": 1.2431862780725345, "grad_norm": 4.077025890350342, "learning_rate": 4.778446578931967e-05, "loss": 0.13, "step": 34210 }, { "epoch": 1.2435496765753324, "grad_norm": 1.267830729484558, "learning_rate": 4.7782329536191504e-05, "loss": 0.183, "step": 34220 }, { "epoch": 1.2439130750781306, "grad_norm": 0.9263830780982971, "learning_rate": 4.778019230145383e-05, "loss": 0.135, "step": 34230 }, { "epoch": 1.2442764735809289, "grad_norm": 1.3920031785964966, "learning_rate": 4.777805408519872e-05, "loss": 0.1485, "step": 34240 }, { "epoch": 1.244639872083727, "grad_norm": 1.263641357421875, "learning_rate": 4.7775914887518306e-05, "loss": 0.1503, "step": 34250 }, { "epoch": 1.2450032705865253, "grad_norm": 1.514445185661316, "learning_rate": 4.777377470850475e-05, "loss": 0.1604, "step": 34260 }, { "epoch": 1.2453666690893233, "grad_norm": 0.9733619093894958, "learning_rate": 4.7771633548250266e-05, "loss": 0.1674, "step": 34270 }, { "epoch": 1.2457300675921215, "grad_norm": 1.6468124389648438, "learning_rate": 4.776949140684712e-05, "loss": 0.1229, "step": 34280 }, { "epoch": 1.2460934660949197, "grad_norm": 0.9954056739807129, "learning_rate": 4.77673482843876e-05, "loss": 0.1237, "step": 34290 }, { "epoch": 1.2464568645977179, "grad_norm": 1.7785327434539795, "learning_rate": 4.776520418096406e-05, "loss": 0.1784, "step": 34300 }, { "epoch": 1.246820263100516, "grad_norm": 1.952333688735962, "learning_rate": 4.776305909666886e-05, "loss": 0.1355, "step": 34310 }, { "epoch": 1.2471836616033143, "grad_norm": 0.7019221782684326, "learning_rate": 4.7760913031594445e-05, "loss": 0.1856, "step": 34320 }, { "epoch": 1.2475470601061123, "grad_norm": 2.3900887966156006, "learning_rate": 4.775876598583327e-05, "loss": 0.1279, "step": 34330 }, { "epoch": 1.2479104586089105, "grad_norm": 2.4521565437316895, "learning_rate": 4.7756617959477834e-05, "loss": 0.1384, "step": 34340 }, { "epoch": 1.2482738571117087, "grad_norm": 1.043819546699524, "learning_rate": 4.7754468952620704e-05, "loss": 0.1485, "step": 34350 }, { "epoch": 1.248637255614507, "grad_norm": 2.2905571460723877, "learning_rate": 4.775231896535446e-05, "loss": 0.1342, "step": 34360 }, { "epoch": 1.249000654117305, "grad_norm": 1.3930597305297852, "learning_rate": 4.7750167997771756e-05, "loss": 0.1989, "step": 34370 }, { "epoch": 1.2493640526201033, "grad_norm": 1.1254252195358276, "learning_rate": 4.7748016049965255e-05, "loss": 0.113, "step": 34380 }, { "epoch": 1.2497274511229013, "grad_norm": 0.8257030248641968, "learning_rate": 4.774586312202768e-05, "loss": 0.1212, "step": 34390 }, { "epoch": 1.2500908496256995, "grad_norm": 1.2986866235733032, "learning_rate": 4.774370921405179e-05, "loss": 0.1446, "step": 34400 }, { "epoch": 1.2504542481284977, "grad_norm": 2.2006325721740723, "learning_rate": 4.77415543261304e-05, "loss": 0.1567, "step": 34410 }, { "epoch": 1.250817646631296, "grad_norm": 0.6778092384338379, "learning_rate": 4.7739398458356335e-05, "loss": 0.1768, "step": 34420 }, { "epoch": 1.2511810451340941, "grad_norm": 1.152696132659912, "learning_rate": 4.773724161082251e-05, "loss": 0.1024, "step": 34430 }, { "epoch": 1.251544443636892, "grad_norm": 2.375783681869507, "learning_rate": 4.7735083783621835e-05, "loss": 0.2642, "step": 34440 }, { "epoch": 1.2519078421396903, "grad_norm": 1.3765895366668701, "learning_rate": 4.77329249768473e-05, "loss": 0.1861, "step": 34450 }, { "epoch": 1.2522712406424885, "grad_norm": 1.7743607759475708, "learning_rate": 4.773076519059191e-05, "loss": 0.1429, "step": 34460 }, { "epoch": 1.2526346391452867, "grad_norm": 1.4731152057647705, "learning_rate": 4.772860442494872e-05, "loss": 0.1508, "step": 34470 }, { "epoch": 1.252998037648085, "grad_norm": 1.5612653493881226, "learning_rate": 4.7726442680010836e-05, "loss": 0.1291, "step": 34480 }, { "epoch": 1.2533614361508831, "grad_norm": 1.9972872734069824, "learning_rate": 4.77242799558714e-05, "loss": 0.1154, "step": 34490 }, { "epoch": 1.2537248346536813, "grad_norm": 0.7144235372543335, "learning_rate": 4.772211625262359e-05, "loss": 0.1793, "step": 34500 }, { "epoch": 1.2540882331564793, "grad_norm": 10.059864044189453, "learning_rate": 4.7719951570360636e-05, "loss": 0.1346, "step": 34510 }, { "epoch": 1.2544516316592775, "grad_norm": 1.0801091194152832, "learning_rate": 4.771778590917581e-05, "loss": 0.1918, "step": 34520 }, { "epoch": 1.2548150301620757, "grad_norm": 2.0628061294555664, "learning_rate": 4.771561926916242e-05, "loss": 0.1477, "step": 34530 }, { "epoch": 1.255178428664874, "grad_norm": 2.5143215656280518, "learning_rate": 4.771345165041381e-05, "loss": 0.1226, "step": 34540 }, { "epoch": 1.2555418271676722, "grad_norm": 1.197352409362793, "learning_rate": 4.7711283053023394e-05, "loss": 0.152, "step": 34550 }, { "epoch": 1.2559052256704701, "grad_norm": 0.9427943825721741, "learning_rate": 4.7709113477084595e-05, "loss": 0.1359, "step": 34560 }, { "epoch": 1.2562686241732683, "grad_norm": 1.0930500030517578, "learning_rate": 4.770694292269089e-05, "loss": 0.1659, "step": 34570 }, { "epoch": 1.2566320226760666, "grad_norm": 0.7914316654205322, "learning_rate": 4.770477138993581e-05, "loss": 0.1224, "step": 34580 }, { "epoch": 1.2569954211788648, "grad_norm": 0.6064370274543762, "learning_rate": 4.770259887891292e-05, "loss": 0.1153, "step": 34590 }, { "epoch": 1.257358819681663, "grad_norm": 0.8653318285942078, "learning_rate": 4.770042538971581e-05, "loss": 0.1715, "step": 34600 }, { "epoch": 1.257722218184461, "grad_norm": 0.5470715761184692, "learning_rate": 4.7698250922438145e-05, "loss": 0.1447, "step": 34610 }, { "epoch": 1.2580856166872594, "grad_norm": 0.8058337569236755, "learning_rate": 4.769607547717361e-05, "loss": 0.1742, "step": 34620 }, { "epoch": 1.2584490151900574, "grad_norm": 2.5231611728668213, "learning_rate": 4.7693899054015926e-05, "loss": 1.9069, "step": 34630 }, { "epoch": 1.2588124136928556, "grad_norm": 0.603464663028717, "learning_rate": 4.7691721653058886e-05, "loss": 0.1244, "step": 34640 }, { "epoch": 1.2591758121956538, "grad_norm": 0.7844828963279724, "learning_rate": 4.76895432743963e-05, "loss": 0.3787, "step": 34650 }, { "epoch": 1.259539210698452, "grad_norm": 0.7887173295021057, "learning_rate": 4.7687363918122016e-05, "loss": 0.1268, "step": 34660 }, { "epoch": 1.2599026092012502, "grad_norm": 0.669452965259552, "learning_rate": 4.768518358432994e-05, "loss": 0.1572, "step": 34670 }, { "epoch": 1.2602660077040482, "grad_norm": 1.193303108215332, "learning_rate": 4.768300227311403e-05, "loss": 0.5754, "step": 34680 }, { "epoch": 1.2606294062068464, "grad_norm": 0.8210042715072632, "learning_rate": 4.7680819984568246e-05, "loss": 0.1372, "step": 34690 }, { "epoch": 1.2609928047096446, "grad_norm": 2.98244309425354, "learning_rate": 4.767863671878663e-05, "loss": 0.2028, "step": 34700 }, { "epoch": 1.2613562032124428, "grad_norm": 1.3739604949951172, "learning_rate": 4.767645247586325e-05, "loss": 0.164, "step": 34710 }, { "epoch": 1.261719601715241, "grad_norm": 0.6770296096801758, "learning_rate": 4.7674267255892226e-05, "loss": 0.2234, "step": 34720 }, { "epoch": 1.262083000218039, "grad_norm": 0.5827689170837402, "learning_rate": 4.767208105896769e-05, "loss": 0.1405, "step": 34730 }, { "epoch": 1.2624463987208372, "grad_norm": 0.7818326354026794, "learning_rate": 4.766989388518385e-05, "loss": 0.1194, "step": 34740 }, { "epoch": 1.2628097972236354, "grad_norm": 0.8514626026153564, "learning_rate": 4.7667705734634946e-05, "loss": 0.1529, "step": 34750 }, { "epoch": 1.2631731957264336, "grad_norm": 0.7973842024803162, "learning_rate": 4.766551660741525e-05, "loss": 0.1224, "step": 34760 }, { "epoch": 1.2635365942292318, "grad_norm": 1.019089937210083, "learning_rate": 4.766332650361909e-05, "loss": 0.1768, "step": 34770 }, { "epoch": 1.26389999273203, "grad_norm": 1.0458087921142578, "learning_rate": 4.766113542334082e-05, "loss": 0.1382, "step": 34780 }, { "epoch": 1.2642633912348282, "grad_norm": 1.0272470712661743, "learning_rate": 4.765894336667486e-05, "loss": 0.1272, "step": 34790 }, { "epoch": 1.2646267897376262, "grad_norm": 2.0589025020599365, "learning_rate": 4.765675033371565e-05, "loss": 0.1682, "step": 34800 }, { "epoch": 1.2646267897376262, "eval_loss": 0.3476085662841797, "eval_runtime": 181.248, "eval_samples_per_second": 40.905, "eval_steps_per_second": 5.115, "eval_wer": 0.17651169967505945, "step": 34800 }, { "epoch": 1.2649901882404244, "grad_norm": 1.1303410530090332, "learning_rate": 4.7654556324557685e-05, "loss": 0.1348, "step": 34810 }, { "epoch": 1.2653535867432226, "grad_norm": 0.799231231212616, "learning_rate": 4.765236133929549e-05, "loss": 0.1645, "step": 34820 }, { "epoch": 1.2657169852460208, "grad_norm": 1.2402738332748413, "learning_rate": 4.765016537802364e-05, "loss": 0.1235, "step": 34830 }, { "epoch": 1.266080383748819, "grad_norm": 4.433220386505127, "learning_rate": 4.7647968440836753e-05, "loss": 0.1706, "step": 34840 }, { "epoch": 1.266443782251617, "grad_norm": 0.8201845288276672, "learning_rate": 4.764577052782949e-05, "loss": 0.1704, "step": 34850 }, { "epoch": 1.2668071807544152, "grad_norm": 1.2809802293777466, "learning_rate": 4.764357163909655e-05, "loss": 0.1368, "step": 34860 }, { "epoch": 1.2671705792572134, "grad_norm": 0.7995765209197998, "learning_rate": 4.7641371774732676e-05, "loss": 0.1773, "step": 34870 }, { "epoch": 1.2675339777600116, "grad_norm": 1.1338168382644653, "learning_rate": 4.763917093483264e-05, "loss": 0.138, "step": 34880 }, { "epoch": 1.2678973762628098, "grad_norm": 0.85684734582901, "learning_rate": 4.763696911949129e-05, "loss": 0.1387, "step": 34890 }, { "epoch": 1.2682607747656078, "grad_norm": 1.212156057357788, "learning_rate": 4.763476632880348e-05, "loss": 0.1377, "step": 34900 }, { "epoch": 1.2686241732684063, "grad_norm": 2.2248573303222656, "learning_rate": 4.7632562562864125e-05, "loss": 0.1295, "step": 34910 }, { "epoch": 1.2689875717712042, "grad_norm": 1.2567734718322754, "learning_rate": 4.763035782176818e-05, "loss": 0.2109, "step": 34920 }, { "epoch": 1.2693509702740025, "grad_norm": 0.9226292967796326, "learning_rate": 4.7628152105610624e-05, "loss": 0.1315, "step": 34930 }, { "epoch": 1.2697143687768007, "grad_norm": 0.9735257029533386, "learning_rate": 4.762594541448651e-05, "loss": 0.1139, "step": 34940 }, { "epoch": 1.2700777672795989, "grad_norm": 3.8411102294921875, "learning_rate": 4.7623737748490914e-05, "loss": 0.5175, "step": 34950 }, { "epoch": 1.270441165782397, "grad_norm": 1.0780479907989502, "learning_rate": 4.762152910771895e-05, "loss": 0.1226, "step": 34960 }, { "epoch": 1.270804564285195, "grad_norm": 0.7885404229164124, "learning_rate": 4.761931949226579e-05, "loss": 0.1821, "step": 34970 }, { "epoch": 1.2711679627879933, "grad_norm": 3.387125015258789, "learning_rate": 4.761710890222663e-05, "loss": 0.1608, "step": 34980 }, { "epoch": 1.2715313612907915, "grad_norm": 0.9549399614334106, "learning_rate": 4.761489733769672e-05, "loss": 0.1071, "step": 34990 }, { "epoch": 1.2718947597935897, "grad_norm": 1.3453798294067383, "learning_rate": 4.761268479877134e-05, "loss": 0.1466, "step": 35000 }, { "epoch": 1.2722581582963879, "grad_norm": 0.9733071327209473, "learning_rate": 4.761047128554584e-05, "loss": 0.1646, "step": 35010 }, { "epoch": 1.2726215567991859, "grad_norm": 0.5874946713447571, "learning_rate": 4.760825679811557e-05, "loss": 0.2291, "step": 35020 }, { "epoch": 1.272984955301984, "grad_norm": 1.9506993293762207, "learning_rate": 4.7606041336575965e-05, "loss": 0.2256, "step": 35030 }, { "epoch": 1.2733483538047823, "grad_norm": 1.2343640327453613, "learning_rate": 4.760382490102247e-05, "loss": 0.133, "step": 35040 }, { "epoch": 1.2737117523075805, "grad_norm": 0.38314980268478394, "learning_rate": 4.7601607491550574e-05, "loss": 0.1682, "step": 35050 }, { "epoch": 1.2740751508103787, "grad_norm": 0.928424596786499, "learning_rate": 4.7599389108255846e-05, "loss": 0.0998, "step": 35060 }, { "epoch": 1.274438549313177, "grad_norm": 1.730793833732605, "learning_rate": 4.7597169751233833e-05, "loss": 0.1682, "step": 35070 }, { "epoch": 1.2748019478159751, "grad_norm": 1.0121824741363525, "learning_rate": 4.7594949420580184e-05, "loss": 0.1093, "step": 35080 }, { "epoch": 1.275165346318773, "grad_norm": 1.4093934297561646, "learning_rate": 4.759272811639055e-05, "loss": 0.156, "step": 35090 }, { "epoch": 1.2755287448215713, "grad_norm": 0.9377339482307434, "learning_rate": 4.759050583876066e-05, "loss": 0.1577, "step": 35100 }, { "epoch": 1.2758921433243695, "grad_norm": 1.6535552740097046, "learning_rate": 4.7588282587786246e-05, "loss": 0.1117, "step": 35110 }, { "epoch": 1.2762555418271677, "grad_norm": 0.7115573287010193, "learning_rate": 4.75860583635631e-05, "loss": 0.1443, "step": 35120 }, { "epoch": 1.276618940329966, "grad_norm": 1.6995899677276611, "learning_rate": 4.7583833166187065e-05, "loss": 0.126, "step": 35130 }, { "epoch": 1.276982338832764, "grad_norm": 1.9350817203521729, "learning_rate": 4.7581606995754005e-05, "loss": 0.1224, "step": 35140 }, { "epoch": 1.2773457373355621, "grad_norm": 1.8662594556808472, "learning_rate": 4.757937985235985e-05, "loss": 0.1735, "step": 35150 }, { "epoch": 1.2777091358383603, "grad_norm": 3.6977062225341797, "learning_rate": 4.7577151736100554e-05, "loss": 0.162, "step": 35160 }, { "epoch": 1.2780725343411585, "grad_norm": 0.5185838341712952, "learning_rate": 4.757492264707213e-05, "loss": 0.1593, "step": 35170 }, { "epoch": 1.2784359328439567, "grad_norm": 0.6665944457054138, "learning_rate": 4.7572692585370596e-05, "loss": 0.1144, "step": 35180 }, { "epoch": 1.2787993313467547, "grad_norm": 1.1617207527160645, "learning_rate": 4.757046155109206e-05, "loss": 0.1162, "step": 35190 }, { "epoch": 1.2791627298495531, "grad_norm": 1.3124502897262573, "learning_rate": 4.756822954433264e-05, "loss": 0.1441, "step": 35200 }, { "epoch": 1.2795261283523511, "grad_norm": 1.4907313585281372, "learning_rate": 4.756599656518851e-05, "loss": 0.1275, "step": 35210 }, { "epoch": 1.2798895268551493, "grad_norm": 1.0033677816390991, "learning_rate": 4.756376261375587e-05, "loss": 0.1795, "step": 35220 }, { "epoch": 1.2802529253579475, "grad_norm": 0.9439616799354553, "learning_rate": 4.756152769013099e-05, "loss": 0.1158, "step": 35230 }, { "epoch": 1.2806163238607458, "grad_norm": 2.641103506088257, "learning_rate": 4.755929179441016e-05, "loss": 0.1102, "step": 35240 }, { "epoch": 1.280979722363544, "grad_norm": 1.1864644289016724, "learning_rate": 4.7557054926689694e-05, "loss": 0.136, "step": 35250 }, { "epoch": 1.281343120866342, "grad_norm": 0.7801216244697571, "learning_rate": 4.755481708706601e-05, "loss": 0.1436, "step": 35260 }, { "epoch": 1.2817065193691402, "grad_norm": 0.5771633386611938, "learning_rate": 4.7552578275635494e-05, "loss": 0.1687, "step": 35270 }, { "epoch": 1.2820699178719384, "grad_norm": 1.247269630432129, "learning_rate": 4.755033849249463e-05, "loss": 0.1168, "step": 35280 }, { "epoch": 1.2824333163747366, "grad_norm": 1.3845196962356567, "learning_rate": 4.7548097737739905e-05, "loss": 0.1122, "step": 35290 }, { "epoch": 1.2827967148775348, "grad_norm": 1.6793280839920044, "learning_rate": 4.754585601146788e-05, "loss": 0.168, "step": 35300 }, { "epoch": 1.2831601133803328, "grad_norm": 0.8970069289207458, "learning_rate": 4.754361331377514e-05, "loss": 0.1172, "step": 35310 }, { "epoch": 1.283523511883131, "grad_norm": 0.6038461923599243, "learning_rate": 4.7541369644758315e-05, "loss": 0.1962, "step": 35320 }, { "epoch": 1.2838869103859292, "grad_norm": 0.9473531246185303, "learning_rate": 4.753912500451407e-05, "loss": 0.1288, "step": 35330 }, { "epoch": 1.2842503088887274, "grad_norm": 1.5339337587356567, "learning_rate": 4.753687939313912e-05, "loss": 0.1644, "step": 35340 }, { "epoch": 1.2846137073915256, "grad_norm": 0.9454206228256226, "learning_rate": 4.753463281073023e-05, "loss": 0.1319, "step": 35350 }, { "epoch": 1.2849771058943238, "grad_norm": 4.068907260894775, "learning_rate": 4.753238525738419e-05, "loss": 0.1241, "step": 35360 }, { "epoch": 1.285340504397122, "grad_norm": 1.1616491079330444, "learning_rate": 4.753013673319784e-05, "loss": 0.2266, "step": 35370 }, { "epoch": 1.28570390289992, "grad_norm": 1.232142448425293, "learning_rate": 4.7527887238268065e-05, "loss": 0.1438, "step": 35380 }, { "epoch": 1.2860673014027182, "grad_norm": 0.5053390860557556, "learning_rate": 4.7525636772691775e-05, "loss": 0.1214, "step": 35390 }, { "epoch": 1.2864306999055164, "grad_norm": 0.9241679310798645, "learning_rate": 4.752338533656594e-05, "loss": 0.3857, "step": 35400 }, { "epoch": 1.2864306999055164, "eval_loss": 0.33814677596092224, "eval_runtime": 180.0316, "eval_samples_per_second": 41.182, "eval_steps_per_second": 5.149, "eval_wer": 0.1815401092816817, "step": 35400 }, { "epoch": 1.2867940984083146, "grad_norm": 1.1576555967330933, "learning_rate": 4.7521132929987575e-05, "loss": 0.1429, "step": 35410 }, { "epoch": 1.2871574969111128, "grad_norm": 0.4765828549861908, "learning_rate": 4.751887955305372e-05, "loss": 0.1976, "step": 35420 }, { "epoch": 1.2875208954139108, "grad_norm": 0.8831065893173218, "learning_rate": 4.751662520586148e-05, "loss": 0.1543, "step": 35430 }, { "epoch": 1.287884293916709, "grad_norm": 1.9074327945709229, "learning_rate": 4.751436988850796e-05, "loss": 0.133, "step": 35440 }, { "epoch": 1.2882476924195072, "grad_norm": 1.6553431749343872, "learning_rate": 4.7512113601090356e-05, "loss": 0.1731, "step": 35450 }, { "epoch": 1.2886110909223054, "grad_norm": 1.2409085035324097, "learning_rate": 4.750985634370587e-05, "loss": 0.119, "step": 35460 }, { "epoch": 1.2889744894251036, "grad_norm": 4.575315475463867, "learning_rate": 4.7507598116451763e-05, "loss": 0.2287, "step": 35470 }, { "epoch": 1.2893378879279016, "grad_norm": 1.1517298221588135, "learning_rate": 4.7505338919425334e-05, "loss": 0.1263, "step": 35480 }, { "epoch": 1.2897012864307, "grad_norm": 0.8862209320068359, "learning_rate": 4.7503078752723935e-05, "loss": 0.9246, "step": 35490 }, { "epoch": 1.290064684933498, "grad_norm": 2.2056024074554443, "learning_rate": 4.750081761644493e-05, "loss": 0.1316, "step": 35500 }, { "epoch": 1.2904280834362962, "grad_norm": 1.9249043464660645, "learning_rate": 4.749855551068576e-05, "loss": 0.1341, "step": 35510 }, { "epoch": 1.2907914819390944, "grad_norm": 0.4050438404083252, "learning_rate": 4.749629243554387e-05, "loss": 0.1876, "step": 35520 }, { "epoch": 1.2911548804418926, "grad_norm": 0.8166261911392212, "learning_rate": 4.74940283911168e-05, "loss": 0.1141, "step": 35530 }, { "epoch": 1.2915182789446908, "grad_norm": 0.4988127648830414, "learning_rate": 4.749176337750206e-05, "loss": 0.1548, "step": 35540 }, { "epoch": 1.2918816774474888, "grad_norm": 1.80185067653656, "learning_rate": 4.748949739479728e-05, "loss": 0.1658, "step": 35550 }, { "epoch": 1.292245075950287, "grad_norm": 0.9549736380577087, "learning_rate": 4.748723044310006e-05, "loss": 0.1373, "step": 35560 }, { "epoch": 1.2926084744530852, "grad_norm": 1.3219162225723267, "learning_rate": 4.74849625225081e-05, "loss": 0.2101, "step": 35570 }, { "epoch": 1.2929718729558835, "grad_norm": 1.4803717136383057, "learning_rate": 4.74826936331191e-05, "loss": 0.121, "step": 35580 }, { "epoch": 1.2933352714586817, "grad_norm": 2.6438705921173096, "learning_rate": 4.7480423775030834e-05, "loss": 0.13, "step": 35590 }, { "epoch": 1.2936986699614796, "grad_norm": 2.5969061851501465, "learning_rate": 4.7478152948341094e-05, "loss": 0.165, "step": 35600 }, { "epoch": 1.2940620684642778, "grad_norm": 0.6594710946083069, "learning_rate": 4.74758811531477e-05, "loss": 0.3067, "step": 35610 }, { "epoch": 1.294425466967076, "grad_norm": 1.1262328624725342, "learning_rate": 4.747360838954858e-05, "loss": 0.1782, "step": 35620 }, { "epoch": 1.2947888654698743, "grad_norm": 0.9950854182243347, "learning_rate": 4.747133465764163e-05, "loss": 0.1254, "step": 35630 }, { "epoch": 1.2951522639726725, "grad_norm": 0.7515049576759338, "learning_rate": 4.746905995752482e-05, "loss": 0.144, "step": 35640 }, { "epoch": 1.2955156624754707, "grad_norm": 0.9141899347305298, "learning_rate": 4.746678428929616e-05, "loss": 0.1263, "step": 35650 }, { "epoch": 1.2958790609782689, "grad_norm": 1.5138301849365234, "learning_rate": 4.74645076530537e-05, "loss": 0.143, "step": 35660 }, { "epoch": 1.2962424594810669, "grad_norm": 0.6234374046325684, "learning_rate": 4.746223004889554e-05, "loss": 0.1492, "step": 35670 }, { "epoch": 1.296605857983865, "grad_norm": 0.6530427932739258, "learning_rate": 4.745995147691981e-05, "loss": 0.1579, "step": 35680 }, { "epoch": 1.2969692564866633, "grad_norm": 0.9193394780158997, "learning_rate": 4.745767193722468e-05, "loss": 0.141, "step": 35690 }, { "epoch": 1.2973326549894615, "grad_norm": 0.8602085709571838, "learning_rate": 4.745539142990837e-05, "loss": 0.1302, "step": 35700 }, { "epoch": 1.2976960534922597, "grad_norm": 0.7672144174575806, "learning_rate": 4.745310995506914e-05, "loss": 0.1632, "step": 35710 }, { "epoch": 1.2980594519950577, "grad_norm": 0.728992760181427, "learning_rate": 4.74508275128053e-05, "loss": 0.161, "step": 35720 }, { "epoch": 1.2984228504978559, "grad_norm": 0.923298716545105, "learning_rate": 4.7448544103215164e-05, "loss": 0.125, "step": 35730 }, { "epoch": 1.298786249000654, "grad_norm": 0.6956040859222412, "learning_rate": 4.744625972639715e-05, "loss": 0.1071, "step": 35740 }, { "epoch": 1.2991496475034523, "grad_norm": 0.7756535410881042, "learning_rate": 4.7443974382449664e-05, "loss": 0.1452, "step": 35750 }, { "epoch": 1.2995130460062505, "grad_norm": 1.3024572134017944, "learning_rate": 4.7441688071471174e-05, "loss": 0.1328, "step": 35760 }, { "epoch": 1.2998764445090485, "grad_norm": 1.8605810403823853, "learning_rate": 4.7439400793560196e-05, "loss": 0.2063, "step": 35770 }, { "epoch": 1.300239843011847, "grad_norm": 1.6558598279953003, "learning_rate": 4.743711254881528e-05, "loss": 0.1282, "step": 35780 }, { "epoch": 1.300603241514645, "grad_norm": 0.8223969340324402, "learning_rate": 4.743482333733501e-05, "loss": 0.1348, "step": 35790 }, { "epoch": 1.300966640017443, "grad_norm": 1.5702069997787476, "learning_rate": 4.743253315921803e-05, "loss": 0.1656, "step": 35800 }, { "epoch": 1.3013300385202413, "grad_norm": 0.6384185552597046, "learning_rate": 4.743024201456301e-05, "loss": 0.1369, "step": 35810 }, { "epoch": 1.3016934370230395, "grad_norm": 1.1003926992416382, "learning_rate": 4.7427949903468667e-05, "loss": 0.1606, "step": 35820 }, { "epoch": 1.3020568355258377, "grad_norm": 1.1869399547576904, "learning_rate": 4.742565682603376e-05, "loss": 0.131, "step": 35830 }, { "epoch": 1.3024202340286357, "grad_norm": 1.0088342428207397, "learning_rate": 4.7423362782357096e-05, "loss": 0.1183, "step": 35840 }, { "epoch": 1.302783632531434, "grad_norm": 0.8730582594871521, "learning_rate": 4.7421067772537506e-05, "loss": 0.1228, "step": 35850 }, { "epoch": 1.3031470310342321, "grad_norm": 1.3678339719772339, "learning_rate": 4.7418771796673886e-05, "loss": 0.119, "step": 35860 }, { "epoch": 1.3035104295370303, "grad_norm": 0.5349250435829163, "learning_rate": 4.7416474854865154e-05, "loss": 0.1571, "step": 35870 }, { "epoch": 1.3038738280398285, "grad_norm": 1.4240535497665405, "learning_rate": 4.741417694721028e-05, "loss": 0.1221, "step": 35880 }, { "epoch": 1.3042372265426265, "grad_norm": 2.4048521518707275, "learning_rate": 4.741187807380827e-05, "loss": 0.1265, "step": 35890 }, { "epoch": 1.3046006250454247, "grad_norm": 0.9620640277862549, "learning_rate": 4.740957823475818e-05, "loss": 0.1601, "step": 35900 }, { "epoch": 1.304964023548223, "grad_norm": 2.351884603500366, "learning_rate": 4.740727743015909e-05, "loss": 0.1134, "step": 35910 }, { "epoch": 1.3053274220510211, "grad_norm": 0.6285625100135803, "learning_rate": 4.7404975660110146e-05, "loss": 0.4726, "step": 35920 }, { "epoch": 1.3056908205538194, "grad_norm": 0.6645105481147766, "learning_rate": 4.740267292471051e-05, "loss": 0.1163, "step": 35930 }, { "epoch": 1.3060542190566176, "grad_norm": 1.6493772268295288, "learning_rate": 4.7400369224059415e-05, "loss": 1.652, "step": 35940 }, { "epoch": 1.3064176175594158, "grad_norm": 0.6978940367698669, "learning_rate": 4.739806455825611e-05, "loss": 0.1534, "step": 35950 }, { "epoch": 1.3067810160622138, "grad_norm": 3.241497039794922, "learning_rate": 4.739575892739989e-05, "loss": 0.1196, "step": 35960 }, { "epoch": 1.307144414565012, "grad_norm": 1.1746867895126343, "learning_rate": 4.739345233159011e-05, "loss": 0.1576, "step": 35970 }, { "epoch": 1.3075078130678102, "grad_norm": 0.9227213859558105, "learning_rate": 4.7391144770926144e-05, "loss": 0.5262, "step": 35980 }, { "epoch": 1.3078712115706084, "grad_norm": 2.3250370025634766, "learning_rate": 4.738883624550741e-05, "loss": 0.1231, "step": 35990 }, { "epoch": 1.3082346100734066, "grad_norm": 1.344184398651123, "learning_rate": 4.738652675543339e-05, "loss": 0.1508, "step": 36000 }, { "epoch": 1.3082346100734066, "eval_loss": 0.3749592900276184, "eval_runtime": 180.4784, "eval_samples_per_second": 41.08, "eval_steps_per_second": 5.136, "eval_wer": 0.17836331620890591, "step": 36000 }, { "epoch": 1.3085980085762046, "grad_norm": 2.1917757987976074, "learning_rate": 4.738421630080358e-05, "loss": 0.1232, "step": 36010 }, { "epoch": 1.3089614070790028, "grad_norm": 0.7760763764381409, "learning_rate": 4.738190488171753e-05, "loss": 0.1965, "step": 36020 }, { "epoch": 1.309324805581801, "grad_norm": 1.6439956426620483, "learning_rate": 4.737959249827484e-05, "loss": 0.13, "step": 36030 }, { "epoch": 1.3096882040845992, "grad_norm": 1.612452507019043, "learning_rate": 4.7377279150575137e-05, "loss": 0.1335, "step": 36040 }, { "epoch": 1.3100516025873974, "grad_norm": 0.9884024858474731, "learning_rate": 4.737496483871809e-05, "loss": 1.4829, "step": 36050 }, { "epoch": 1.3104150010901954, "grad_norm": 0.8376805782318115, "learning_rate": 4.737264956280342e-05, "loss": 0.1502, "step": 36060 }, { "epoch": 1.3107783995929938, "grad_norm": 0.7544919848442078, "learning_rate": 4.7370333322930884e-05, "loss": 0.1799, "step": 36070 }, { "epoch": 1.3111417980957918, "grad_norm": 1.0415360927581787, "learning_rate": 4.736801611920028e-05, "loss": 0.133, "step": 36080 }, { "epoch": 1.31150519659859, "grad_norm": 1.3284482955932617, "learning_rate": 4.736569795171144e-05, "loss": 0.6017, "step": 36090 }, { "epoch": 1.3118685951013882, "grad_norm": 0.766444742679596, "learning_rate": 4.736337882056425e-05, "loss": 0.1629, "step": 36100 }, { "epoch": 1.3122319936041864, "grad_norm": 1.8423712253570557, "learning_rate": 4.7361058725858645e-05, "loss": 0.1417, "step": 36110 }, { "epoch": 1.3125953921069846, "grad_norm": 0.7671094536781311, "learning_rate": 4.735873766769458e-05, "loss": 0.1498, "step": 36120 }, { "epoch": 1.3129587906097826, "grad_norm": 0.7939559817314148, "learning_rate": 4.735641564617206e-05, "loss": 0.1101, "step": 36130 }, { "epoch": 1.3133221891125808, "grad_norm": 1.1651771068572998, "learning_rate": 4.735409266139113e-05, "loss": 0.1478, "step": 36140 }, { "epoch": 1.313685587615379, "grad_norm": 0.3433835506439209, "learning_rate": 4.735176871345188e-05, "loss": 0.1775, "step": 36150 }, { "epoch": 1.3140489861181772, "grad_norm": 1.2903847694396973, "learning_rate": 4.734944380245445e-05, "loss": 0.129, "step": 36160 }, { "epoch": 1.3144123846209754, "grad_norm": 1.4051779508590698, "learning_rate": 4.734711792849901e-05, "loss": 0.2257, "step": 36170 }, { "epoch": 1.3147757831237734, "grad_norm": 0.7781183123588562, "learning_rate": 4.734479109168577e-05, "loss": 0.1072, "step": 36180 }, { "epoch": 1.3151391816265716, "grad_norm": 3.805746078491211, "learning_rate": 4.734246329211498e-05, "loss": 0.1327, "step": 36190 }, { "epoch": 1.3155025801293698, "grad_norm": 1.0254390239715576, "learning_rate": 4.734013452988694e-05, "loss": 0.1479, "step": 36200 }, { "epoch": 1.315865978632168, "grad_norm": 1.2095835208892822, "learning_rate": 4.7337804805101994e-05, "loss": 0.1286, "step": 36210 }, { "epoch": 1.3162293771349662, "grad_norm": 0.7073403596878052, "learning_rate": 4.733547411786052e-05, "loss": 0.1385, "step": 36220 }, { "epoch": 1.3165927756377644, "grad_norm": 6.6172709465026855, "learning_rate": 4.7333142468262924e-05, "loss": 0.1202, "step": 36230 }, { "epoch": 1.3169561741405627, "grad_norm": 0.9846429228782654, "learning_rate": 4.733080985640969e-05, "loss": 0.1202, "step": 36240 }, { "epoch": 1.3173195726433606, "grad_norm": 1.7326525449752808, "learning_rate": 4.7328476282401313e-05, "loss": 0.1269, "step": 36250 }, { "epoch": 1.3176829711461588, "grad_norm": 1.7362529039382935, "learning_rate": 4.7326141746338334e-05, "loss": 0.139, "step": 36260 }, { "epoch": 1.318046369648957, "grad_norm": 0.5070465207099915, "learning_rate": 4.732380624832135e-05, "loss": 0.1963, "step": 36270 }, { "epoch": 1.3184097681517553, "grad_norm": 2.0635170936584473, "learning_rate": 4.7321469788450976e-05, "loss": 0.1251, "step": 36280 }, { "epoch": 1.3187731666545535, "grad_norm": 17.00756072998047, "learning_rate": 4.731913236682789e-05, "loss": 0.4956, "step": 36290 }, { "epoch": 1.3191365651573514, "grad_norm": 0.4699925184249878, "learning_rate": 4.7316793983552806e-05, "loss": 0.1397, "step": 36300 }, { "epoch": 1.3194999636601497, "grad_norm": 1.5378074645996094, "learning_rate": 4.731445463872647e-05, "loss": 0.1444, "step": 36310 }, { "epoch": 1.3198633621629479, "grad_norm": 0.6688012480735779, "learning_rate": 4.731211433244967e-05, "loss": 0.2393, "step": 36320 }, { "epoch": 1.320226760665746, "grad_norm": 1.1277016401290894, "learning_rate": 4.7309773064823274e-05, "loss": 0.6115, "step": 36330 }, { "epoch": 1.3205901591685443, "grad_norm": 0.7446697354316711, "learning_rate": 4.7307430835948114e-05, "loss": 0.1687, "step": 36340 }, { "epoch": 1.3209535576713423, "grad_norm": 1.6629223823547363, "learning_rate": 4.730508764592514e-05, "loss": 0.1681, "step": 36350 }, { "epoch": 1.3213169561741407, "grad_norm": 1.4008903503417969, "learning_rate": 4.73027434948553e-05, "loss": 0.1343, "step": 36360 }, { "epoch": 1.3216803546769387, "grad_norm": 0.6811515092849731, "learning_rate": 4.7300398382839586e-05, "loss": 0.186, "step": 36370 }, { "epoch": 1.3220437531797369, "grad_norm": 0.822640597820282, "learning_rate": 4.7298052309979055e-05, "loss": 0.1336, "step": 36380 }, { "epoch": 1.322407151682535, "grad_norm": 1.145392894744873, "learning_rate": 4.729570527637479e-05, "loss": 0.1267, "step": 36390 }, { "epoch": 1.3227705501853333, "grad_norm": 1.6381548643112183, "learning_rate": 4.729335728212792e-05, "loss": 0.1213, "step": 36400 }, { "epoch": 1.3231339486881315, "grad_norm": 1.882562518119812, "learning_rate": 4.729100832733959e-05, "loss": 0.1384, "step": 36410 }, { "epoch": 1.3234973471909295, "grad_norm": 0.7675313949584961, "learning_rate": 4.728865841211103e-05, "loss": 0.1554, "step": 36420 }, { "epoch": 1.3238607456937277, "grad_norm": 0.900806725025177, "learning_rate": 4.728630753654349e-05, "loss": 0.1174, "step": 36430 }, { "epoch": 1.324224144196526, "grad_norm": 1.4791582822799683, "learning_rate": 4.7283955700738235e-05, "loss": 0.1449, "step": 36440 }, { "epoch": 1.324587542699324, "grad_norm": 0.5582447052001953, "learning_rate": 4.728160290479663e-05, "loss": 0.1754, "step": 36450 }, { "epoch": 1.3249509412021223, "grad_norm": 2.113154411315918, "learning_rate": 4.727924914882002e-05, "loss": 0.1091, "step": 36460 }, { "epoch": 1.3253143397049203, "grad_norm": 0.9747204184532166, "learning_rate": 4.727689443290985e-05, "loss": 0.144, "step": 36470 }, { "epoch": 1.3256777382077185, "grad_norm": 0.6118887662887573, "learning_rate": 4.727453875716755e-05, "loss": 0.1144, "step": 36480 }, { "epoch": 1.3260411367105167, "grad_norm": 1.146438717842102, "learning_rate": 4.727218212169464e-05, "loss": 0.1138, "step": 36490 }, { "epoch": 1.326404535213315, "grad_norm": 1.2453789710998535, "learning_rate": 4.7269824526592636e-05, "loss": 0.1642, "step": 36500 }, { "epoch": 1.3267679337161131, "grad_norm": 2.001384973526001, "learning_rate": 4.726746597196313e-05, "loss": 0.1317, "step": 36510 }, { "epoch": 1.3271313322189113, "grad_norm": 0.4389583170413971, "learning_rate": 4.726510645790775e-05, "loss": 0.1477, "step": 36520 }, { "epoch": 1.3274947307217095, "grad_norm": 0.5064995884895325, "learning_rate": 4.726274598452815e-05, "loss": 0.1208, "step": 36530 }, { "epoch": 1.3278581292245075, "grad_norm": 1.937470555305481, "learning_rate": 4.726038455192603e-05, "loss": 0.1425, "step": 36540 }, { "epoch": 1.3282215277273057, "grad_norm": 0.7836539149284363, "learning_rate": 4.725802216020315e-05, "loss": 0.1942, "step": 36550 }, { "epoch": 1.328584926230104, "grad_norm": 0.7476559281349182, "learning_rate": 4.725565880946129e-05, "loss": 0.1269, "step": 36560 }, { "epoch": 1.3289483247329021, "grad_norm": 1.584917426109314, "learning_rate": 4.725329449980227e-05, "loss": 0.1939, "step": 36570 }, { "epoch": 1.3293117232357003, "grad_norm": 1.6503409147262573, "learning_rate": 4.7250929231327975e-05, "loss": 0.1281, "step": 36580 }, { "epoch": 1.3296751217384983, "grad_norm": 1.2938586473464966, "learning_rate": 4.72485630041403e-05, "loss": 0.1505, "step": 36590 }, { "epoch": 1.3300385202412965, "grad_norm": 0.7093682289123535, "learning_rate": 4.724619581834121e-05, "loss": 0.1517, "step": 36600 }, { "epoch": 1.3300385202412965, "eval_loss": 0.3578657805919647, "eval_runtime": 181.1919, "eval_samples_per_second": 40.918, "eval_steps_per_second": 5.116, "eval_wer": 0.17275400729754753, "step": 36600 }, { "epoch": 1.3304019187440947, "grad_norm": 2.326840877532959, "learning_rate": 4.72438276740327e-05, "loss": 0.1333, "step": 36610 }, { "epoch": 1.330765317246893, "grad_norm": 1.0283209085464478, "learning_rate": 4.7241458571316794e-05, "loss": 0.177, "step": 36620 }, { "epoch": 1.3311287157496912, "grad_norm": 1.416473388671875, "learning_rate": 4.7239325559518525e-05, "loss": 3.9838, "step": 36630 }, { "epoch": 1.3314921142524891, "grad_norm": 0.40842917561531067, "learning_rate": 4.7236954636109833e-05, "loss": 0.1116, "step": 36640 }, { "epoch": 1.3318555127552876, "grad_norm": 2.1062543392181396, "learning_rate": 4.7234582754589886e-05, "loss": 0.121, "step": 36650 }, { "epoch": 1.3322189112580856, "grad_norm": 0.6768646836280823, "learning_rate": 4.723220991506088e-05, "loss": 0.1271, "step": 36660 }, { "epoch": 1.3325823097608838, "grad_norm": 1.1778359413146973, "learning_rate": 4.7229836117625044e-05, "loss": 0.1289, "step": 36670 }, { "epoch": 1.332945708263682, "grad_norm": 2.1446762084960938, "learning_rate": 4.7227461362384664e-05, "loss": 0.163, "step": 36680 }, { "epoch": 1.3333091067664802, "grad_norm": 0.9764724373817444, "learning_rate": 4.7225085649442063e-05, "loss": 0.1131, "step": 36690 }, { "epoch": 1.3336725052692784, "grad_norm": 0.6134273409843445, "learning_rate": 4.72227089788996e-05, "loss": 0.1269, "step": 36700 }, { "epoch": 1.3340359037720764, "grad_norm": 0.8482096195220947, "learning_rate": 4.722033135085967e-05, "loss": 0.1211, "step": 36710 }, { "epoch": 1.3343993022748746, "grad_norm": 1.1198707818984985, "learning_rate": 4.7217952765424734e-05, "loss": 0.1681, "step": 36720 }, { "epoch": 1.3347627007776728, "grad_norm": 1.5771534442901611, "learning_rate": 4.721557322269725e-05, "loss": 0.7935, "step": 36730 }, { "epoch": 1.335126099280471, "grad_norm": 1.8235740661621094, "learning_rate": 4.721319272277977e-05, "loss": 0.1249, "step": 36740 }, { "epoch": 1.3354894977832692, "grad_norm": 2.7422354221343994, "learning_rate": 4.7210811265774845e-05, "loss": 0.1637, "step": 36750 }, { "epoch": 1.3358528962860672, "grad_norm": 3.130943775177002, "learning_rate": 4.720842885178509e-05, "loss": 0.1437, "step": 36760 }, { "epoch": 1.3362162947888654, "grad_norm": 0.48167362809181213, "learning_rate": 4.720604548091316e-05, "loss": 0.2081, "step": 36770 }, { "epoch": 1.3365796932916636, "grad_norm": 1.3520551919937134, "learning_rate": 4.720366115326174e-05, "loss": 0.1566, "step": 36780 }, { "epoch": 1.3369430917944618, "grad_norm": 0.9920271635055542, "learning_rate": 4.720127586893355e-05, "loss": 0.0995, "step": 36790 }, { "epoch": 1.33730649029726, "grad_norm": 0.6343932747840881, "learning_rate": 4.7198889628031376e-05, "loss": 0.6462, "step": 36800 }, { "epoch": 1.3376698888000582, "grad_norm": 0.8678078651428223, "learning_rate": 4.719650243065804e-05, "loss": 0.1224, "step": 36810 }, { "epoch": 1.3380332873028564, "grad_norm": 1.1642274856567383, "learning_rate": 4.719411427691639e-05, "loss": 0.1438, "step": 36820 }, { "epoch": 1.3383966858056544, "grad_norm": 0.6214116811752319, "learning_rate": 4.719172516690932e-05, "loss": 0.1172, "step": 36830 }, { "epoch": 1.3387600843084526, "grad_norm": 0.9811148643493652, "learning_rate": 4.7189335100739764e-05, "loss": 0.1229, "step": 36840 }, { "epoch": 1.3391234828112508, "grad_norm": 0.6965753436088562, "learning_rate": 4.718694407851072e-05, "loss": 0.1432, "step": 36850 }, { "epoch": 1.339486881314049, "grad_norm": 2.3423101902008057, "learning_rate": 4.718455210032519e-05, "loss": 0.1578, "step": 36860 }, { "epoch": 1.3398502798168472, "grad_norm": 1.1584868431091309, "learning_rate": 4.718215916628625e-05, "loss": 0.1792, "step": 36870 }, { "epoch": 1.3402136783196452, "grad_norm": 1.1057560443878174, "learning_rate": 4.717976527649698e-05, "loss": 0.129, "step": 36880 }, { "epoch": 1.3405770768224434, "grad_norm": 1.6027841567993164, "learning_rate": 4.7177370431060554e-05, "loss": 0.0984, "step": 36890 }, { "epoch": 1.3409404753252416, "grad_norm": 1.4970412254333496, "learning_rate": 4.717497463008014e-05, "loss": 0.1764, "step": 36900 }, { "epoch": 1.3413038738280398, "grad_norm": 7.802013397216797, "learning_rate": 4.717257787365897e-05, "loss": 0.1511, "step": 36910 }, { "epoch": 1.341667272330838, "grad_norm": 0.6998898983001709, "learning_rate": 4.717018016190031e-05, "loss": 0.1874, "step": 36920 }, { "epoch": 1.342030670833636, "grad_norm": 1.1323654651641846, "learning_rate": 4.716778149490747e-05, "loss": 0.0968, "step": 36930 }, { "epoch": 1.3423940693364345, "grad_norm": 0.9213439226150513, "learning_rate": 4.716538187278379e-05, "loss": 0.11, "step": 36940 }, { "epoch": 1.3427574678392324, "grad_norm": 3.241694688796997, "learning_rate": 4.7162981295632676e-05, "loss": 0.1695, "step": 36950 }, { "epoch": 1.3431208663420307, "grad_norm": 2.2264153957366943, "learning_rate": 4.716057976355755e-05, "loss": 0.1568, "step": 36960 }, { "epoch": 1.3434842648448289, "grad_norm": 2.439816474914551, "learning_rate": 4.715817727666189e-05, "loss": 0.2052, "step": 36970 }, { "epoch": 1.343847663347627, "grad_norm": 0.8145691752433777, "learning_rate": 4.715577383504921e-05, "loss": 0.1501, "step": 36980 }, { "epoch": 1.3442110618504253, "grad_norm": 0.8044644594192505, "learning_rate": 4.7153369438823074e-05, "loss": 0.1088, "step": 36990 }, { "epoch": 1.3445744603532233, "grad_norm": 1.2818701267242432, "learning_rate": 4.715096408808707e-05, "loss": 0.1492, "step": 37000 }, { "epoch": 1.3449378588560215, "grad_norm": 3.879460096359253, "learning_rate": 4.714855778294482e-05, "loss": 0.1407, "step": 37010 }, { "epoch": 1.3453012573588197, "grad_norm": 0.7606347799301147, "learning_rate": 4.714615052350004e-05, "loss": 0.2055, "step": 37020 }, { "epoch": 1.3456646558616179, "grad_norm": 14.105179786682129, "learning_rate": 4.714374230985642e-05, "loss": 0.2572, "step": 37030 }, { "epoch": 1.346028054364416, "grad_norm": 0.9659761786460876, "learning_rate": 4.714133314211774e-05, "loss": 0.1126, "step": 37040 }, { "epoch": 1.346391452867214, "grad_norm": 0.8018509745597839, "learning_rate": 4.7138923020387785e-05, "loss": 0.1731, "step": 37050 }, { "epoch": 1.3467548513700123, "grad_norm": 1.450352668762207, "learning_rate": 4.7136511944770414e-05, "loss": 0.139, "step": 37060 }, { "epoch": 1.3471182498728105, "grad_norm": 0.4030288755893707, "learning_rate": 4.71340999153695e-05, "loss": 0.1317, "step": 37070 }, { "epoch": 1.3474816483756087, "grad_norm": 1.5737247467041016, "learning_rate": 4.713168693228898e-05, "loss": 0.1183, "step": 37080 }, { "epoch": 1.347845046878407, "grad_norm": 0.9841533303260803, "learning_rate": 4.712927299563281e-05, "loss": 0.126, "step": 37090 }, { "epoch": 1.348208445381205, "grad_norm": 0.9880457520484924, "learning_rate": 4.7126858105505004e-05, "loss": 0.1518, "step": 37100 }, { "epoch": 1.3485718438840033, "grad_norm": 1.250982403755188, "learning_rate": 4.7124442262009605e-05, "loss": 0.1241, "step": 37110 }, { "epoch": 1.3489352423868013, "grad_norm": 0.8015254139900208, "learning_rate": 4.712202546525071e-05, "loss": 0.1294, "step": 37120 }, { "epoch": 1.3492986408895995, "grad_norm": 1.3923901319503784, "learning_rate": 4.711960771533245e-05, "loss": 0.1273, "step": 37130 }, { "epoch": 1.3496620393923977, "grad_norm": 1.7166532278060913, "learning_rate": 4.7117189012359e-05, "loss": 0.1309, "step": 37140 }, { "epoch": 1.350025437895196, "grad_norm": 1.0079472064971924, "learning_rate": 4.711476935643456e-05, "loss": 0.1275, "step": 37150 }, { "epoch": 1.3503888363979941, "grad_norm": 0.7986971139907837, "learning_rate": 4.71123487476634e-05, "loss": 0.1422, "step": 37160 }, { "epoch": 1.350752234900792, "grad_norm": 0.3768475353717804, "learning_rate": 4.71099271861498e-05, "loss": 0.1943, "step": 37170 }, { "epoch": 1.3511156334035903, "grad_norm": 1.1428521871566772, "learning_rate": 4.7107504671998115e-05, "loss": 0.1335, "step": 37180 }, { "epoch": 1.3514790319063885, "grad_norm": 0.627876341342926, "learning_rate": 4.7105081205312715e-05, "loss": 0.1301, "step": 37190 }, { "epoch": 1.3518424304091867, "grad_norm": 1.823798656463623, "learning_rate": 4.710265678619801e-05, "loss": 0.1732, "step": 37200 }, { "epoch": 1.3518424304091867, "eval_loss": 0.3695838451385498, "eval_runtime": 181.0815, "eval_samples_per_second": 40.943, "eval_steps_per_second": 5.119, "eval_wer": 0.17419717900775136, "step": 37200 }, { "epoch": 1.352205828911985, "grad_norm": 1.1491807699203491, "learning_rate": 4.710023141475846e-05, "loss": 0.1267, "step": 37210 }, { "epoch": 1.352569227414783, "grad_norm": 0.9230825901031494, "learning_rate": 4.709780509109858e-05, "loss": 0.1339, "step": 37220 }, { "epoch": 1.3529326259175813, "grad_norm": 1.4637092351913452, "learning_rate": 4.7095377815322893e-05, "loss": 0.1199, "step": 37230 }, { "epoch": 1.3532960244203793, "grad_norm": 2.766608476638794, "learning_rate": 4.7092949587536e-05, "loss": 0.1153, "step": 37240 }, { "epoch": 1.3536594229231775, "grad_norm": 0.6508689522743225, "learning_rate": 4.7090520407842516e-05, "loss": 0.1263, "step": 37250 }, { "epoch": 1.3540228214259757, "grad_norm": 1.2959270477294922, "learning_rate": 4.70880902763471e-05, "loss": 0.207, "step": 37260 }, { "epoch": 1.354386219928774, "grad_norm": 0.4432971477508545, "learning_rate": 4.708565919315447e-05, "loss": 0.189, "step": 37270 }, { "epoch": 1.3547496184315722, "grad_norm": 1.1359493732452393, "learning_rate": 4.708322715836936e-05, "loss": 0.1161, "step": 37280 }, { "epoch": 1.3551130169343701, "grad_norm": 1.1669936180114746, "learning_rate": 4.708079417209657e-05, "loss": 0.1298, "step": 37290 }, { "epoch": 1.3554764154371683, "grad_norm": 1.0905638933181763, "learning_rate": 4.707836023444092e-05, "loss": 0.1763, "step": 37300 }, { "epoch": 1.3558398139399666, "grad_norm": 1.1597601175308228, "learning_rate": 4.707592534550729e-05, "loss": 0.1346, "step": 37310 }, { "epoch": 1.3562032124427648, "grad_norm": 0.3999848961830139, "learning_rate": 4.707348950540057e-05, "loss": 0.1614, "step": 37320 }, { "epoch": 1.356566610945563, "grad_norm": 1.9580241441726685, "learning_rate": 4.7071052714225736e-05, "loss": 0.134, "step": 37330 }, { "epoch": 1.356930009448361, "grad_norm": 0.7617779970169067, "learning_rate": 4.7068614972087764e-05, "loss": 0.1372, "step": 37340 }, { "epoch": 1.3572934079511592, "grad_norm": 1.103390097618103, "learning_rate": 4.706617627909169e-05, "loss": 0.1162, "step": 37350 }, { "epoch": 1.3576568064539574, "grad_norm": 1.1971250772476196, "learning_rate": 4.70637366353426e-05, "loss": 0.1358, "step": 37360 }, { "epoch": 1.3580202049567556, "grad_norm": 0.47730955481529236, "learning_rate": 4.70612960409456e-05, "loss": 0.1518, "step": 37370 }, { "epoch": 1.3583836034595538, "grad_norm": 1.2747211456298828, "learning_rate": 4.705885449600584e-05, "loss": 0.1264, "step": 37380 }, { "epoch": 1.358747001962352, "grad_norm": 1.5373166799545288, "learning_rate": 4.705641200062854e-05, "loss": 0.1362, "step": 37390 }, { "epoch": 1.3591104004651502, "grad_norm": 7.401641368865967, "learning_rate": 4.705396855491891e-05, "loss": 0.1524, "step": 37400 }, { "epoch": 1.3594737989679482, "grad_norm": 1.0285519361495972, "learning_rate": 4.705152415898225e-05, "loss": 0.1343, "step": 37410 }, { "epoch": 1.3598371974707464, "grad_norm": 0.9249128699302673, "learning_rate": 4.704907881292387e-05, "loss": 0.1857, "step": 37420 }, { "epoch": 1.3602005959735446, "grad_norm": 1.0541716814041138, "learning_rate": 4.7046632516849135e-05, "loss": 0.1519, "step": 37430 }, { "epoch": 1.3605639944763428, "grad_norm": 0.9401641488075256, "learning_rate": 4.704418527086345e-05, "loss": 0.1239, "step": 37440 }, { "epoch": 1.360927392979141, "grad_norm": 1.179436445236206, "learning_rate": 4.7041737075072254e-05, "loss": 0.1345, "step": 37450 }, { "epoch": 1.361290791481939, "grad_norm": 0.5075955390930176, "learning_rate": 4.703928792958103e-05, "loss": 0.1119, "step": 37460 }, { "epoch": 1.3616541899847372, "grad_norm": 1.7815593481063843, "learning_rate": 4.7036837834495306e-05, "loss": 0.1378, "step": 37470 }, { "epoch": 1.3620175884875354, "grad_norm": 1.0368989706039429, "learning_rate": 4.7034386789920646e-05, "loss": 0.1193, "step": 37480 }, { "epoch": 1.3623809869903336, "grad_norm": 0.9185715913772583, "learning_rate": 4.703193479596266e-05, "loss": 0.1539, "step": 37490 }, { "epoch": 1.3627443854931318, "grad_norm": 1.1198723316192627, "learning_rate": 4.7029481852726996e-05, "loss": 0.1483, "step": 37500 }, { "epoch": 1.3631077839959298, "grad_norm": 0.8500091433525085, "learning_rate": 4.702702796031934e-05, "loss": 0.1742, "step": 37510 }, { "epoch": 1.3634711824987282, "grad_norm": 1.0554280281066895, "learning_rate": 4.7024573118845414e-05, "loss": 0.1406, "step": 37520 }, { "epoch": 1.3638345810015262, "grad_norm": 1.524234414100647, "learning_rate": 4.702211732841101e-05, "loss": 0.1242, "step": 37530 }, { "epoch": 1.3641979795043244, "grad_norm": 1.3234226703643799, "learning_rate": 4.701966058912191e-05, "loss": 0.1114, "step": 37540 }, { "epoch": 1.3645613780071226, "grad_norm": 4.683910846710205, "learning_rate": 4.701720290108399e-05, "loss": 0.1377, "step": 37550 }, { "epoch": 1.3649247765099208, "grad_norm": 1.4473618268966675, "learning_rate": 4.701474426440313e-05, "loss": 0.1136, "step": 37560 }, { "epoch": 1.365288175012719, "grad_norm": 1.2548261880874634, "learning_rate": 4.701228467918527e-05, "loss": 0.1376, "step": 37570 }, { "epoch": 1.365651573515517, "grad_norm": 0.5335317850112915, "learning_rate": 4.7009824145536385e-05, "loss": 0.1103, "step": 37580 }, { "epoch": 1.3660149720183152, "grad_norm": 2.0894274711608887, "learning_rate": 4.700736266356249e-05, "loss": 0.2852, "step": 37590 }, { "epoch": 1.3663783705211134, "grad_norm": 4.296121120452881, "learning_rate": 4.700490023336963e-05, "loss": 0.1475, "step": 37600 }, { "epoch": 1.3667417690239116, "grad_norm": 3.073425054550171, "learning_rate": 4.700243685506393e-05, "loss": 0.1254, "step": 37610 }, { "epoch": 1.3671051675267099, "grad_norm": 0.5121023058891296, "learning_rate": 4.69999725287515e-05, "loss": 0.1737, "step": 37620 }, { "epoch": 1.3674685660295078, "grad_norm": 0.8064444661140442, "learning_rate": 4.699750725453853e-05, "loss": 0.1045, "step": 37630 }, { "epoch": 1.367831964532306, "grad_norm": 0.5956308841705322, "learning_rate": 4.699504103253124e-05, "loss": 0.1059, "step": 37640 }, { "epoch": 1.3681953630351043, "grad_norm": 1.8260743618011475, "learning_rate": 4.699257386283589e-05, "loss": 0.1677, "step": 37650 }, { "epoch": 1.3685587615379025, "grad_norm": 2.3063583374023438, "learning_rate": 4.699010574555879e-05, "loss": 0.1406, "step": 37660 }, { "epoch": 1.3689221600407007, "grad_norm": 0.5565524697303772, "learning_rate": 4.698763668080627e-05, "loss": 0.1556, "step": 37670 }, { "epoch": 1.3692855585434989, "grad_norm": 1.148147702217102, "learning_rate": 4.698516666868471e-05, "loss": 0.124, "step": 37680 }, { "epoch": 1.369648957046297, "grad_norm": 1.5487512350082397, "learning_rate": 4.698269570930055e-05, "loss": 0.1478, "step": 37690 }, { "epoch": 1.370012355549095, "grad_norm": 1.8273712396621704, "learning_rate": 4.698022380276024e-05, "loss": 0.1591, "step": 37700 }, { "epoch": 1.3703757540518933, "grad_norm": 1.7402414083480835, "learning_rate": 4.6977750949170294e-05, "loss": 0.1075, "step": 37710 }, { "epoch": 1.3707391525546915, "grad_norm": 0.7992825508117676, "learning_rate": 4.697527714863726e-05, "loss": 0.1653, "step": 37720 }, { "epoch": 1.3711025510574897, "grad_norm": 5.170393943786621, "learning_rate": 4.697280240126772e-05, "loss": 0.1563, "step": 37730 }, { "epoch": 1.371465949560288, "grad_norm": 1.3735640048980713, "learning_rate": 4.697032670716831e-05, "loss": 0.1231, "step": 37740 }, { "epoch": 1.3718293480630859, "grad_norm": 1.8720015287399292, "learning_rate": 4.696785006644569e-05, "loss": 1.526, "step": 37750 }, { "epoch": 1.372192746565884, "grad_norm": 1.9550750255584717, "learning_rate": 4.696537247920657e-05, "loss": 0.1503, "step": 37760 }, { "epoch": 1.3725561450686823, "grad_norm": 0.5374103784561157, "learning_rate": 4.6962893945557704e-05, "loss": 0.1949, "step": 37770 }, { "epoch": 1.3729195435714805, "grad_norm": 1.097432255744934, "learning_rate": 4.6960414465605876e-05, "loss": 0.1157, "step": 37780 }, { "epoch": 1.3732829420742787, "grad_norm": 0.40494269132614136, "learning_rate": 4.695793403945793e-05, "loss": 0.126, "step": 37790 }, { "epoch": 1.3736463405770767, "grad_norm": 1.9734747409820557, "learning_rate": 4.695545266722073e-05, "loss": 0.1457, "step": 37800 }, { "epoch": 1.3736463405770767, "eval_loss": 0.3492252230644226, "eval_runtime": 181.0909, "eval_samples_per_second": 40.941, "eval_steps_per_second": 5.119, "eval_wer": 0.18146749686858968, "step": 37800 }, { "epoch": 1.3740097390798751, "grad_norm": 19.56266212463379, "learning_rate": 4.6952970349001204e-05, "loss": 0.421, "step": 37810 }, { "epoch": 1.374373137582673, "grad_norm": 0.8468944430351257, "learning_rate": 4.695048708490628e-05, "loss": 0.2035, "step": 37820 }, { "epoch": 1.3747365360854713, "grad_norm": 0.6463280320167542, "learning_rate": 4.6948002875042976e-05, "loss": 0.1323, "step": 37830 }, { "epoch": 1.3750999345882695, "grad_norm": 1.1540967226028442, "learning_rate": 4.694551771951831e-05, "loss": 1.2856, "step": 37840 }, { "epoch": 1.3754633330910677, "grad_norm": 1.295023798942566, "learning_rate": 4.6943031618439374e-05, "loss": 0.7144, "step": 37850 }, { "epoch": 1.375826731593866, "grad_norm": 1.5403015613555908, "learning_rate": 4.694054457191328e-05, "loss": 0.1429, "step": 37860 }, { "epoch": 1.376190130096664, "grad_norm": 1.434574842453003, "learning_rate": 4.693805658004718e-05, "loss": 0.1686, "step": 37870 }, { "epoch": 1.3765535285994621, "grad_norm": 0.6648684740066528, "learning_rate": 4.693556764294829e-05, "loss": 0.1098, "step": 37880 }, { "epoch": 1.3769169271022603, "grad_norm": 0.7901143431663513, "learning_rate": 4.6933077760723824e-05, "loss": 0.115, "step": 37890 }, { "epoch": 1.3772803256050585, "grad_norm": 0.9131706953048706, "learning_rate": 4.693058693348108e-05, "loss": 0.1266, "step": 37900 }, { "epoch": 1.3776437241078567, "grad_norm": 1.2103451490402222, "learning_rate": 4.692809516132738e-05, "loss": 0.1205, "step": 37910 }, { "epoch": 1.3780071226106547, "grad_norm": 0.42282989621162415, "learning_rate": 4.6925602444370075e-05, "loss": 0.1753, "step": 37920 }, { "epoch": 1.3783705211134532, "grad_norm": 0.51373291015625, "learning_rate": 4.692310878271658e-05, "loss": 0.1635, "step": 37930 }, { "epoch": 1.3787339196162511, "grad_norm": 0.7226901650428772, "learning_rate": 4.692061417647431e-05, "loss": 0.1226, "step": 37940 }, { "epoch": 1.3790973181190493, "grad_norm": 5.131813049316406, "learning_rate": 4.6918118625750784e-05, "loss": 0.1682, "step": 37950 }, { "epoch": 1.3794607166218475, "grad_norm": 1.30665922164917, "learning_rate": 4.6915622130653506e-05, "loss": 0.128, "step": 37960 }, { "epoch": 1.3798241151246458, "grad_norm": 0.3638138175010681, "learning_rate": 4.691312469129006e-05, "loss": 0.1852, "step": 37970 }, { "epoch": 1.380187513627444, "grad_norm": 1.5417994260787964, "learning_rate": 4.691062630776802e-05, "loss": 0.119, "step": 37980 }, { "epoch": 1.380550912130242, "grad_norm": 1.4151712656021118, "learning_rate": 4.6908126980195055e-05, "loss": 0.1079, "step": 37990 }, { "epoch": 1.3809143106330402, "grad_norm": 1.137370228767395, "learning_rate": 4.6905626708678855e-05, "loss": 0.2152, "step": 38000 }, { "epoch": 1.3812777091358384, "grad_norm": 1.0430890321731567, "learning_rate": 4.690312549332714e-05, "loss": 0.1243, "step": 38010 }, { "epoch": 1.3816411076386366, "grad_norm": 1.260365605354309, "learning_rate": 4.690062333424767e-05, "loss": 0.1755, "step": 38020 }, { "epoch": 1.3820045061414348, "grad_norm": 0.8367292284965515, "learning_rate": 4.689812023154827e-05, "loss": 1.1881, "step": 38030 }, { "epoch": 1.3823679046442328, "grad_norm": 1.2440451383590698, "learning_rate": 4.6895616185336775e-05, "loss": 0.1292, "step": 38040 }, { "epoch": 1.382731303147031, "grad_norm": 2.23522686958313, "learning_rate": 4.6893111195721094e-05, "loss": 0.1491, "step": 38050 }, { "epoch": 1.3830947016498292, "grad_norm": 1.0018017292022705, "learning_rate": 4.6890605262809145e-05, "loss": 0.112, "step": 38060 }, { "epoch": 1.3834581001526274, "grad_norm": 0.6445533037185669, "learning_rate": 4.68880983867089e-05, "loss": 0.1597, "step": 38070 }, { "epoch": 1.3838214986554256, "grad_norm": 1.5876944065093994, "learning_rate": 4.6885590567528375e-05, "loss": 0.1341, "step": 38080 }, { "epoch": 1.3841848971582236, "grad_norm": 1.5150282382965088, "learning_rate": 4.6883081805375616e-05, "loss": 0.1167, "step": 38090 }, { "epoch": 1.384548295661022, "grad_norm": 1.7657722234725952, "learning_rate": 4.688057210035873e-05, "loss": 0.1608, "step": 38100 }, { "epoch": 1.38491169416382, "grad_norm": 1.027761459350586, "learning_rate": 4.687806145258584e-05, "loss": 0.1566, "step": 38110 }, { "epoch": 1.3852750926666182, "grad_norm": 0.3475823998451233, "learning_rate": 4.6875549862165126e-05, "loss": 0.1502, "step": 38120 }, { "epoch": 1.3856384911694164, "grad_norm": 0.7863835692405701, "learning_rate": 4.687303732920481e-05, "loss": 0.1761, "step": 38130 }, { "epoch": 1.3860018896722146, "grad_norm": 2.0150928497314453, "learning_rate": 4.687052385381313e-05, "loss": 0.1417, "step": 38140 }, { "epoch": 1.3863652881750128, "grad_norm": 0.6676269769668579, "learning_rate": 4.6868009436098386e-05, "loss": 0.1307, "step": 38150 }, { "epoch": 1.3867286866778108, "grad_norm": 1.6957210302352905, "learning_rate": 4.6865494076168934e-05, "loss": 0.1385, "step": 38160 }, { "epoch": 1.387092085180609, "grad_norm": 0.8534975051879883, "learning_rate": 4.686297777413313e-05, "loss": 0.1568, "step": 38170 }, { "epoch": 1.3874554836834072, "grad_norm": 0.7309104800224304, "learning_rate": 4.6860460530099416e-05, "loss": 0.5466, "step": 38180 }, { "epoch": 1.3878188821862054, "grad_norm": 1.2103863954544067, "learning_rate": 4.6857942344176225e-05, "loss": 0.1227, "step": 38190 }, { "epoch": 1.3881822806890036, "grad_norm": 0.7991679906845093, "learning_rate": 4.685542321647207e-05, "loss": 0.1603, "step": 38200 }, { "epoch": 1.3885456791918016, "grad_norm": 1.146906852722168, "learning_rate": 4.685290314709549e-05, "loss": 0.1268, "step": 38210 }, { "epoch": 1.3889090776946, "grad_norm": 1.020175576210022, "learning_rate": 4.685038213615508e-05, "loss": 0.1517, "step": 38220 }, { "epoch": 1.389272476197398, "grad_norm": 1.1214244365692139, "learning_rate": 4.684786018375944e-05, "loss": 0.1185, "step": 38230 }, { "epoch": 1.3896358747001962, "grad_norm": 0.830916166305542, "learning_rate": 4.6845337290017235e-05, "loss": 0.1281, "step": 38240 }, { "epoch": 1.3899992732029944, "grad_norm": 0.5939742922782898, "learning_rate": 4.684281345503718e-05, "loss": 0.1599, "step": 38250 }, { "epoch": 1.3903626717057926, "grad_norm": 4.498940467834473, "learning_rate": 4.6840288678928003e-05, "loss": 0.1159, "step": 38260 }, { "epoch": 1.3907260702085908, "grad_norm": 0.6612393856048584, "learning_rate": 4.6837762961798495e-05, "loss": 0.1842, "step": 38270 }, { "epoch": 1.3910894687113888, "grad_norm": 2.456289529800415, "learning_rate": 4.683523630375748e-05, "loss": 0.1558, "step": 38280 }, { "epoch": 1.391452867214187, "grad_norm": 0.5414180159568787, "learning_rate": 4.683270870491383e-05, "loss": 0.1347, "step": 38290 }, { "epoch": 1.3918162657169852, "grad_norm": 2.1812076568603516, "learning_rate": 4.683018016537644e-05, "loss": 0.1558, "step": 38300 }, { "epoch": 1.3921796642197835, "grad_norm": 1.2050772905349731, "learning_rate": 4.682765068525425e-05, "loss": 0.1591, "step": 38310 }, { "epoch": 1.3925430627225817, "grad_norm": 1.050423264503479, "learning_rate": 4.6825120264656266e-05, "loss": 0.1578, "step": 38320 }, { "epoch": 1.3929064612253796, "grad_norm": 2.753676652908325, "learning_rate": 4.68225889036915e-05, "loss": 0.1373, "step": 38330 }, { "epoch": 1.3932698597281779, "grad_norm": 2.3123908042907715, "learning_rate": 4.682005660246902e-05, "loss": 0.1198, "step": 38340 }, { "epoch": 1.393633258230976, "grad_norm": 0.6317697167396545, "learning_rate": 4.681752336109794e-05, "loss": 0.157, "step": 38350 }, { "epoch": 1.3939966567337743, "grad_norm": 1.788620948791504, "learning_rate": 4.681498917968741e-05, "loss": 0.1424, "step": 38360 }, { "epoch": 1.3943600552365725, "grad_norm": 1.064799189567566, "learning_rate": 4.68124540583466e-05, "loss": 0.1762, "step": 38370 }, { "epoch": 1.3947234537393705, "grad_norm": 1.3951762914657593, "learning_rate": 4.6809917997184764e-05, "loss": 0.1198, "step": 38380 }, { "epoch": 1.3950868522421689, "grad_norm": 1.0863114595413208, "learning_rate": 4.6807380996311154e-05, "loss": 0.1393, "step": 38390 }, { "epoch": 1.3954502507449669, "grad_norm": 1.141787052154541, "learning_rate": 4.6804843055835105e-05, "loss": 0.1603, "step": 38400 }, { "epoch": 1.3954502507449669, "eval_loss": 0.3523618280887604, "eval_runtime": 180.8149, "eval_samples_per_second": 41.003, "eval_steps_per_second": 5.127, "eval_wer": 0.17399749487174831, "step": 38400 }, { "epoch": 1.395813649247765, "grad_norm": 1.5692111253738403, "learning_rate": 4.6802304175865936e-05, "loss": 0.1328, "step": 38410 }, { "epoch": 1.3961770477505633, "grad_norm": 0.4182591140270233, "learning_rate": 4.679976435651305e-05, "loss": 0.1562, "step": 38420 }, { "epoch": 1.3965404462533615, "grad_norm": 0.6963622570037842, "learning_rate": 4.67972235978859e-05, "loss": 0.1131, "step": 38430 }, { "epoch": 1.3969038447561597, "grad_norm": 1.0345783233642578, "learning_rate": 4.679468190009392e-05, "loss": 0.1231, "step": 38440 }, { "epoch": 1.3972672432589577, "grad_norm": 1.6084190607070923, "learning_rate": 4.679213926324665e-05, "loss": 0.133, "step": 38450 }, { "epoch": 1.3976306417617559, "grad_norm": 1.2635602951049805, "learning_rate": 4.678959568745364e-05, "loss": 0.1344, "step": 38460 }, { "epoch": 1.397994040264554, "grad_norm": 0.6128044724464417, "learning_rate": 4.678705117282447e-05, "loss": 0.1639, "step": 38470 }, { "epoch": 1.3983574387673523, "grad_norm": 1.128151297569275, "learning_rate": 4.6784505719468795e-05, "loss": 0.1342, "step": 38480 }, { "epoch": 1.3987208372701505, "grad_norm": 1.6067559719085693, "learning_rate": 4.678195932749627e-05, "loss": 0.1344, "step": 38490 }, { "epoch": 1.3990842357729485, "grad_norm": 0.4303024113178253, "learning_rate": 4.677941199701662e-05, "loss": 0.1519, "step": 38500 }, { "epoch": 1.399447634275747, "grad_norm": 3.09531307220459, "learning_rate": 4.6776863728139596e-05, "loss": 0.1197, "step": 38510 }, { "epoch": 1.399811032778545, "grad_norm": 1.2062981128692627, "learning_rate": 4.6774314520975e-05, "loss": 0.1488, "step": 38520 }, { "epoch": 1.4001744312813431, "grad_norm": 0.7981544733047485, "learning_rate": 4.6771764375632664e-05, "loss": 0.1155, "step": 38530 }, { "epoch": 1.4005378297841413, "grad_norm": 0.6589852571487427, "learning_rate": 4.676921329222247e-05, "loss": 0.1463, "step": 38540 }, { "epoch": 1.4009012282869395, "grad_norm": 1.496664047241211, "learning_rate": 4.676666127085433e-05, "loss": 0.1811, "step": 38550 }, { "epoch": 1.4012646267897377, "grad_norm": 0.7335402965545654, "learning_rate": 4.676410831163819e-05, "loss": 0.1364, "step": 38560 }, { "epoch": 1.4016280252925357, "grad_norm": 0.8753761053085327, "learning_rate": 4.676155441468407e-05, "loss": 0.1954, "step": 38570 }, { "epoch": 1.401991423795334, "grad_norm": 1.4288660287857056, "learning_rate": 4.6758999580101994e-05, "loss": 0.1347, "step": 38580 }, { "epoch": 1.4023548222981321, "grad_norm": 1.1383757591247559, "learning_rate": 4.675644380800205e-05, "loss": 0.1604, "step": 38590 }, { "epoch": 1.4027182208009303, "grad_norm": 1.4642599821090698, "learning_rate": 4.6753887098494344e-05, "loss": 0.1878, "step": 38600 }, { "epoch": 1.4030816193037285, "grad_norm": 0.9396153688430786, "learning_rate": 4.675132945168905e-05, "loss": 0.1201, "step": 38610 }, { "epoch": 1.4034450178065265, "grad_norm": 0.835436999797821, "learning_rate": 4.674877086769636e-05, "loss": 0.4225, "step": 38620 }, { "epoch": 1.4038084163093247, "grad_norm": 0.9934596419334412, "learning_rate": 4.674621134662651e-05, "loss": 0.1145, "step": 38630 }, { "epoch": 1.404171814812123, "grad_norm": 1.5066030025482178, "learning_rate": 4.674365088858979e-05, "loss": 0.101, "step": 38640 }, { "epoch": 1.4045352133149211, "grad_norm": 2.4759950637817383, "learning_rate": 4.674108949369652e-05, "loss": 0.1225, "step": 38650 }, { "epoch": 1.4048986118177194, "grad_norm": 2.4329168796539307, "learning_rate": 4.6738527162057054e-05, "loss": 0.1469, "step": 38660 }, { "epoch": 1.4052620103205173, "grad_norm": 0.7068483829498291, "learning_rate": 4.67359638937818e-05, "loss": 0.237, "step": 38670 }, { "epoch": 1.4056254088233158, "grad_norm": 2.3423826694488525, "learning_rate": 4.6733399688981207e-05, "loss": 0.1095, "step": 38680 }, { "epoch": 1.4059888073261138, "grad_norm": 0.7500453591346741, "learning_rate": 4.673083454776575e-05, "loss": 0.1337, "step": 38690 }, { "epoch": 1.406352205828912, "grad_norm": 2.1220805644989014, "learning_rate": 4.6728268470245937e-05, "loss": 0.1489, "step": 38700 }, { "epoch": 1.4067156043317102, "grad_norm": 3.195551633834839, "learning_rate": 4.672570145653234e-05, "loss": 0.1456, "step": 38710 }, { "epoch": 1.4070790028345084, "grad_norm": 0.8845533728599548, "learning_rate": 4.672313350673558e-05, "loss": 0.1529, "step": 38720 }, { "epoch": 1.4074424013373066, "grad_norm": 1.278830885887146, "learning_rate": 4.6720564620966294e-05, "loss": 0.1209, "step": 38730 }, { "epoch": 1.4078057998401046, "grad_norm": 1.8450745344161987, "learning_rate": 4.671799479933515e-05, "loss": 0.0863, "step": 38740 }, { "epoch": 1.4081691983429028, "grad_norm": 0.5718597173690796, "learning_rate": 4.6715424041952894e-05, "loss": 0.1597, "step": 38750 }, { "epoch": 1.408532596845701, "grad_norm": 3.594273090362549, "learning_rate": 4.671285234893027e-05, "loss": 0.1373, "step": 38760 }, { "epoch": 1.4088959953484992, "grad_norm": 1.3270690441131592, "learning_rate": 4.671027972037809e-05, "loss": 0.1727, "step": 38770 }, { "epoch": 1.4092593938512974, "grad_norm": 88.81269073486328, "learning_rate": 4.670770615640721e-05, "loss": 1.6965, "step": 38780 }, { "epoch": 1.4096227923540954, "grad_norm": 1.1233614683151245, "learning_rate": 4.670513165712851e-05, "loss": 0.1316, "step": 38790 }, { "epoch": 1.4099861908568938, "grad_norm": 0.49995678663253784, "learning_rate": 4.6702556222652905e-05, "loss": 0.1492, "step": 38800 }, { "epoch": 1.4103495893596918, "grad_norm": 0.7330392599105835, "learning_rate": 4.669997985309138e-05, "loss": 0.1065, "step": 38810 }, { "epoch": 1.41071298786249, "grad_norm": 0.49762871861457825, "learning_rate": 4.6697402548554925e-05, "loss": 0.167, "step": 38820 }, { "epoch": 1.4110763863652882, "grad_norm": 3.198273181915283, "learning_rate": 4.6694824309154596e-05, "loss": 0.5685, "step": 38830 }, { "epoch": 1.4114397848680864, "grad_norm": 0.7750107645988464, "learning_rate": 4.6692245135001476e-05, "loss": 0.1291, "step": 38840 }, { "epoch": 1.4118031833708846, "grad_norm": 0.6449529528617859, "learning_rate": 4.66896650262067e-05, "loss": 0.1522, "step": 38850 }, { "epoch": 1.4121665818736826, "grad_norm": 0.7553302049636841, "learning_rate": 4.668708398288142e-05, "loss": 0.1089, "step": 38860 }, { "epoch": 1.4125299803764808, "grad_norm": 0.3948783576488495, "learning_rate": 4.6684502005136864e-05, "loss": 0.1421, "step": 38870 }, { "epoch": 1.412893378879279, "grad_norm": 0.7775730490684509, "learning_rate": 4.668191909308426e-05, "loss": 0.1014, "step": 38880 }, { "epoch": 1.4132567773820772, "grad_norm": 0.3911081850528717, "learning_rate": 4.667933524683492e-05, "loss": 0.1504, "step": 38890 }, { "epoch": 1.4136201758848754, "grad_norm": 0.48814857006073, "learning_rate": 4.667675046650015e-05, "loss": 0.138, "step": 38900 }, { "epoch": 1.4139835743876734, "grad_norm": 2.613859176635742, "learning_rate": 4.667416475219133e-05, "loss": 0.1158, "step": 38910 }, { "epoch": 1.4143469728904716, "grad_norm": 0.9073649048805237, "learning_rate": 4.667157810401987e-05, "loss": 0.1753, "step": 38920 }, { "epoch": 1.4147103713932698, "grad_norm": 1.4347561597824097, "learning_rate": 4.666899052209722e-05, "loss": 0.1397, "step": 38930 }, { "epoch": 1.415073769896068, "grad_norm": 1.014145851135254, "learning_rate": 4.666640200653486e-05, "loss": 0.1425, "step": 38940 }, { "epoch": 1.4154371683988662, "grad_norm": 0.5404003858566284, "learning_rate": 4.6663812557444334e-05, "loss": 0.1918, "step": 38950 }, { "epoch": 1.4158005669016644, "grad_norm": 0.7507174015045166, "learning_rate": 4.66612221749372e-05, "loss": 0.1492, "step": 38960 }, { "epoch": 1.4161639654044627, "grad_norm": 0.47643178701400757, "learning_rate": 4.665863085912508e-05, "loss": 0.136, "step": 38970 }, { "epoch": 1.4165273639072606, "grad_norm": 1.3153865337371826, "learning_rate": 4.66560386101196e-05, "loss": 0.1421, "step": 38980 }, { "epoch": 1.4168907624100588, "grad_norm": 0.7111690640449524, "learning_rate": 4.665344542803248e-05, "loss": 0.1184, "step": 38990 }, { "epoch": 1.417254160912857, "grad_norm": 5.245561599731445, "learning_rate": 4.665085131297544e-05, "loss": 0.1343, "step": 39000 }, { "epoch": 1.417254160912857, "eval_loss": 0.34118154644966125, "eval_runtime": 180.7794, "eval_samples_per_second": 41.011, "eval_steps_per_second": 5.128, "eval_wer": 0.17092962041861057, "step": 39000 }, { "epoch": 1.4176175594156553, "grad_norm": 1.7938792705535889, "learning_rate": 4.664825626506025e-05, "loss": 0.1703, "step": 39010 }, { "epoch": 1.4179809579184535, "grad_norm": 0.7494391202926636, "learning_rate": 4.664566028439873e-05, "loss": 0.1644, "step": 39020 }, { "epoch": 1.4183443564212515, "grad_norm": 0.7234100699424744, "learning_rate": 4.664306337110272e-05, "loss": 0.1115, "step": 39030 }, { "epoch": 1.4187077549240497, "grad_norm": 0.6583457589149475, "learning_rate": 4.6640465525284114e-05, "loss": 0.1448, "step": 39040 }, { "epoch": 1.4190711534268479, "grad_norm": 1.7157262563705444, "learning_rate": 4.663786674705484e-05, "loss": 0.176, "step": 39050 }, { "epoch": 1.419434551929646, "grad_norm": 1.9635696411132812, "learning_rate": 4.663526703652688e-05, "loss": 0.1453, "step": 39060 }, { "epoch": 1.4197979504324443, "grad_norm": 0.5495097041130066, "learning_rate": 4.663266639381224e-05, "loss": 0.1114, "step": 39070 }, { "epoch": 1.4201613489352423, "grad_norm": 1.3244194984436035, "learning_rate": 4.663006481902298e-05, "loss": 0.146, "step": 39080 }, { "epoch": 1.4205247474380407, "grad_norm": 1.1683903932571411, "learning_rate": 4.662746231227119e-05, "loss": 0.1351, "step": 39090 }, { "epoch": 1.4208881459408387, "grad_norm": 2.392890214920044, "learning_rate": 4.662485887366899e-05, "loss": 0.2378, "step": 39100 }, { "epoch": 1.4212515444436369, "grad_norm": 1.7971110343933105, "learning_rate": 4.662225450332856e-05, "loss": 0.1168, "step": 39110 }, { "epoch": 1.421614942946435, "grad_norm": 1.0140278339385986, "learning_rate": 4.6619649201362124e-05, "loss": 0.1332, "step": 39120 }, { "epoch": 1.4219783414492333, "grad_norm": 3.1083390712738037, "learning_rate": 4.661704296788193e-05, "loss": 0.1483, "step": 39130 }, { "epoch": 1.4223417399520315, "grad_norm": 0.6209553480148315, "learning_rate": 4.661443580300026e-05, "loss": 0.1212, "step": 39140 }, { "epoch": 1.4227051384548295, "grad_norm": 0.6733147501945496, "learning_rate": 4.661182770682946e-05, "loss": 0.1343, "step": 39150 }, { "epoch": 1.4230685369576277, "grad_norm": 1.1502153873443604, "learning_rate": 4.660921867948189e-05, "loss": 0.1508, "step": 39160 }, { "epoch": 1.423431935460426, "grad_norm": 0.33853545784950256, "learning_rate": 4.660660872106999e-05, "loss": 0.1661, "step": 39170 }, { "epoch": 1.423795333963224, "grad_norm": 1.0470768213272095, "learning_rate": 4.660399783170618e-05, "loss": 0.1231, "step": 39180 }, { "epoch": 1.4241587324660223, "grad_norm": 0.5467321872711182, "learning_rate": 4.660138601150298e-05, "loss": 0.1166, "step": 39190 }, { "epoch": 1.4245221309688203, "grad_norm": 1.3816486597061157, "learning_rate": 4.659877326057291e-05, "loss": 0.1177, "step": 39200 }, { "epoch": 1.4248855294716185, "grad_norm": 1.0260194540023804, "learning_rate": 4.659615957902855e-05, "loss": 0.153, "step": 39210 }, { "epoch": 1.4252489279744167, "grad_norm": 0.7443385124206543, "learning_rate": 4.6593544966982524e-05, "loss": 0.1735, "step": 39220 }, { "epoch": 1.425612326477215, "grad_norm": 1.1526659727096558, "learning_rate": 4.659092942454746e-05, "loss": 0.1491, "step": 39230 }, { "epoch": 1.4259757249800131, "grad_norm": 0.8841147422790527, "learning_rate": 4.658831295183608e-05, "loss": 0.1611, "step": 39240 }, { "epoch": 1.4263391234828113, "grad_norm": 1.596132516860962, "learning_rate": 4.65856955489611e-05, "loss": 0.1543, "step": 39250 }, { "epoch": 1.4267025219856095, "grad_norm": 2.1355278491973877, "learning_rate": 4.65830772160353e-05, "loss": 0.1489, "step": 39260 }, { "epoch": 1.4270659204884075, "grad_norm": 0.9080690145492554, "learning_rate": 4.6580457953171496e-05, "loss": 0.28, "step": 39270 }, { "epoch": 1.4274293189912057, "grad_norm": 0.9187225699424744, "learning_rate": 4.6577837760482546e-05, "loss": 0.1243, "step": 39280 }, { "epoch": 1.427792717494004, "grad_norm": 1.0374051332473755, "learning_rate": 4.6575216638081335e-05, "loss": 0.1088, "step": 39290 }, { "epoch": 1.4281561159968021, "grad_norm": 0.5795188546180725, "learning_rate": 4.657259458608081e-05, "loss": 0.1288, "step": 39300 }, { "epoch": 1.4285195144996004, "grad_norm": 1.0621544122695923, "learning_rate": 4.656997160459394e-05, "loss": 0.1311, "step": 39310 }, { "epoch": 1.4288829130023983, "grad_norm": 0.35324281454086304, "learning_rate": 4.656734769373373e-05, "loss": 0.1635, "step": 39320 }, { "epoch": 1.4292463115051965, "grad_norm": 1.4020544290542603, "learning_rate": 4.656472285361326e-05, "loss": 0.1281, "step": 39330 }, { "epoch": 1.4296097100079947, "grad_norm": 0.9644222259521484, "learning_rate": 4.65620970843456e-05, "loss": 0.111, "step": 39340 }, { "epoch": 1.429973108510793, "grad_norm": 3.3897273540496826, "learning_rate": 4.65594703860439e-05, "loss": 0.2219, "step": 39350 }, { "epoch": 1.4303365070135912, "grad_norm": 1.1418486833572388, "learning_rate": 4.655684275882132e-05, "loss": 0.1347, "step": 39360 }, { "epoch": 1.4306999055163891, "grad_norm": 0.7159132361412048, "learning_rate": 4.655421420279109e-05, "loss": 0.2765, "step": 39370 }, { "epoch": 1.4310633040191876, "grad_norm": 1.4189454317092896, "learning_rate": 4.655158471806647e-05, "loss": 0.1247, "step": 39380 }, { "epoch": 1.4314267025219856, "grad_norm": 1.0472137928009033, "learning_rate": 4.6548954304760725e-05, "loss": 0.1193, "step": 39390 }, { "epoch": 1.4317901010247838, "grad_norm": 1.6883853673934937, "learning_rate": 4.654632296298723e-05, "loss": 0.1336, "step": 39400 }, { "epoch": 1.432153499527582, "grad_norm": 3.4792749881744385, "learning_rate": 4.654369069285933e-05, "loss": 0.1224, "step": 39410 }, { "epoch": 1.4325168980303802, "grad_norm": 0.9245648384094238, "learning_rate": 4.654105749449046e-05, "loss": 0.1684, "step": 39420 }, { "epoch": 1.4328802965331784, "grad_norm": 0.7134508490562439, "learning_rate": 4.653842336799406e-05, "loss": 0.1268, "step": 39430 }, { "epoch": 1.4332436950359764, "grad_norm": 0.8069209456443787, "learning_rate": 4.6535788313483624e-05, "loss": 0.1224, "step": 39440 }, { "epoch": 1.4336070935387746, "grad_norm": 1.5594000816345215, "learning_rate": 4.6533152331072706e-05, "loss": 0.178, "step": 39450 }, { "epoch": 1.4339704920415728, "grad_norm": 0.6994547843933105, "learning_rate": 4.653051542087486e-05, "loss": 0.1184, "step": 39460 }, { "epoch": 1.434333890544371, "grad_norm": 1.0693833827972412, "learning_rate": 4.6527877583003714e-05, "loss": 0.1696, "step": 39470 }, { "epoch": 1.4346972890471692, "grad_norm": 1.625401496887207, "learning_rate": 4.652523881757292e-05, "loss": 0.9774, "step": 39480 }, { "epoch": 1.4350606875499672, "grad_norm": 1.0620099306106567, "learning_rate": 4.652259912469618e-05, "loss": 0.141, "step": 39490 }, { "epoch": 1.4354240860527654, "grad_norm": 0.8238838315010071, "learning_rate": 4.6519958504487206e-05, "loss": 0.2848, "step": 39500 }, { "epoch": 1.4357874845555636, "grad_norm": 4.339720726013184, "learning_rate": 4.6517316957059796e-05, "loss": 0.1307, "step": 39510 }, { "epoch": 1.4361508830583618, "grad_norm": 1.6212254762649536, "learning_rate": 4.6514674482527754e-05, "loss": 0.1486, "step": 39520 }, { "epoch": 1.43651428156116, "grad_norm": 1.0148829221725464, "learning_rate": 4.651203108100494e-05, "loss": 0.1216, "step": 39530 }, { "epoch": 1.4368776800639582, "grad_norm": 0.8317530155181885, "learning_rate": 4.650938675260525e-05, "loss": 0.1058, "step": 39540 }, { "epoch": 1.4372410785667564, "grad_norm": 1.1663634777069092, "learning_rate": 4.6506741497442614e-05, "loss": 0.2442, "step": 39550 }, { "epoch": 1.4376044770695544, "grad_norm": 1.4678232669830322, "learning_rate": 4.6504095315631006e-05, "loss": 0.1353, "step": 39560 }, { "epoch": 1.4379678755723526, "grad_norm": 0.8588351011276245, "learning_rate": 4.6501448207284446e-05, "loss": 0.157, "step": 39570 }, { "epoch": 1.4383312740751508, "grad_norm": 0.7000893950462341, "learning_rate": 4.6498800172516985e-05, "loss": 0.1191, "step": 39580 }, { "epoch": 1.438694672577949, "grad_norm": 1.0854928493499756, "learning_rate": 4.649615121144271e-05, "loss": 0.1258, "step": 39590 }, { "epoch": 1.4390580710807472, "grad_norm": 1.003110647201538, "learning_rate": 4.649350132417577e-05, "loss": 0.1264, "step": 39600 }, { "epoch": 1.4390580710807472, "eval_loss": 0.34889447689056396, "eval_runtime": 180.6213, "eval_samples_per_second": 41.047, "eval_steps_per_second": 5.132, "eval_wer": 0.17173743351425927, "step": 39600 }, { "epoch": 1.4394214695835452, "grad_norm": 0.8560311198234558, "learning_rate": 4.649085051083033e-05, "loss": 0.129, "step": 39610 }, { "epoch": 1.4397848680863434, "grad_norm": 1.6158629655838013, "learning_rate": 4.6488198771520605e-05, "loss": 0.1359, "step": 39620 }, { "epoch": 1.4401482665891416, "grad_norm": 0.6946542263031006, "learning_rate": 4.6485546106360856e-05, "loss": 0.1195, "step": 39630 }, { "epoch": 1.4405116650919398, "grad_norm": 0.6091057062149048, "learning_rate": 4.648289251546536e-05, "loss": 0.1321, "step": 39640 }, { "epoch": 1.440875063594738, "grad_norm": 0.37464994192123413, "learning_rate": 4.648023799894847e-05, "loss": 0.1339, "step": 39650 }, { "epoch": 1.441238462097536, "grad_norm": 0.5378652215003967, "learning_rate": 4.647758255692456e-05, "loss": 0.1127, "step": 39660 }, { "epoch": 1.4416018606003345, "grad_norm": 1.4586265087127686, "learning_rate": 4.647492618950802e-05, "loss": 0.2027, "step": 39670 }, { "epoch": 1.4419652591031324, "grad_norm": 0.6388387084007263, "learning_rate": 4.647226889681333e-05, "loss": 0.1197, "step": 39680 }, { "epoch": 1.4423286576059307, "grad_norm": 0.7849758267402649, "learning_rate": 4.646961067895496e-05, "loss": 0.1165, "step": 39690 }, { "epoch": 1.4426920561087289, "grad_norm": 1.16459059715271, "learning_rate": 4.6466951536047464e-05, "loss": 0.1799, "step": 39700 }, { "epoch": 1.443055454611527, "grad_norm": 3.2792208194732666, "learning_rate": 4.64642914682054e-05, "loss": 0.1191, "step": 39710 }, { "epoch": 1.4434188531143253, "grad_norm": 0.3408263921737671, "learning_rate": 4.64616304755434e-05, "loss": 0.1971, "step": 39720 }, { "epoch": 1.4437822516171233, "grad_norm": 0.45033156871795654, "learning_rate": 4.645896855817609e-05, "loss": 0.1441, "step": 39730 }, { "epoch": 1.4441456501199215, "grad_norm": 2.284130096435547, "learning_rate": 4.645630571621817e-05, "loss": 0.122, "step": 39740 }, { "epoch": 1.4445090486227197, "grad_norm": 3.047889232635498, "learning_rate": 4.645364194978439e-05, "loss": 0.172, "step": 39750 }, { "epoch": 1.4448724471255179, "grad_norm": 1.2850980758666992, "learning_rate": 4.645097725898951e-05, "loss": 0.1088, "step": 39760 }, { "epoch": 1.445235845628316, "grad_norm": 0.8555011749267578, "learning_rate": 4.644831164394834e-05, "loss": 0.1516, "step": 39770 }, { "epoch": 1.445599244131114, "grad_norm": 0.9414917230606079, "learning_rate": 4.644564510477574e-05, "loss": 0.1104, "step": 39780 }, { "epoch": 1.4459626426339123, "grad_norm": 1.658109188079834, "learning_rate": 4.644297764158659e-05, "loss": 0.1361, "step": 39790 }, { "epoch": 1.4463260411367105, "grad_norm": 1.9984872341156006, "learning_rate": 4.644030925449583e-05, "loss": 0.5671, "step": 39800 }, { "epoch": 1.4466894396395087, "grad_norm": 2.086899995803833, "learning_rate": 4.6437639943618424e-05, "loss": 0.124, "step": 39810 }, { "epoch": 1.447052838142307, "grad_norm": 0.39096391201019287, "learning_rate": 4.64349697090694e-05, "loss": 0.2021, "step": 39820 }, { "epoch": 1.447416236645105, "grad_norm": 0.9561779499053955, "learning_rate": 4.643229855096378e-05, "loss": 0.1371, "step": 39830 }, { "epoch": 1.4477796351479033, "grad_norm": 1.6168954372406006, "learning_rate": 4.6429626469416685e-05, "loss": 0.1149, "step": 39840 }, { "epoch": 1.4481430336507013, "grad_norm": 1.4393991231918335, "learning_rate": 4.642695346454323e-05, "loss": 0.1472, "step": 39850 }, { "epoch": 1.4485064321534995, "grad_norm": 1.9806978702545166, "learning_rate": 4.642427953645859e-05, "loss": 0.1443, "step": 39860 }, { "epoch": 1.4488698306562977, "grad_norm": 1.012040615081787, "learning_rate": 4.642160468527797e-05, "loss": 0.155, "step": 39870 }, { "epoch": 1.449233229159096, "grad_norm": 0.620448112487793, "learning_rate": 4.641892891111662e-05, "loss": 0.12, "step": 39880 }, { "epoch": 1.4495966276618941, "grad_norm": 0.5192741751670837, "learning_rate": 4.6416252214089834e-05, "loss": 0.1096, "step": 39890 }, { "epoch": 1.449960026164692, "grad_norm": 1.4234672784805298, "learning_rate": 4.641357459431294e-05, "loss": 0.1548, "step": 39900 }, { "epoch": 1.4503234246674903, "grad_norm": 1.2218151092529297, "learning_rate": 4.641089605190131e-05, "loss": 0.1452, "step": 39910 }, { "epoch": 1.4506868231702885, "grad_norm": 0.5271123051643372, "learning_rate": 4.6408216586970344e-05, "loss": 0.1238, "step": 39920 }, { "epoch": 1.4510502216730867, "grad_norm": 1.0670936107635498, "learning_rate": 4.640553619963549e-05, "loss": 0.7119, "step": 39930 }, { "epoch": 1.451413620175885, "grad_norm": 2.9407644271850586, "learning_rate": 4.6402854890012256e-05, "loss": 0.1246, "step": 39940 }, { "epoch": 1.451777018678683, "grad_norm": 0.8619846701622009, "learning_rate": 4.6400172658216144e-05, "loss": 0.1524, "step": 39950 }, { "epoch": 1.4521404171814813, "grad_norm": 1.110069751739502, "learning_rate": 4.639748950436275e-05, "loss": 0.1147, "step": 39960 }, { "epoch": 1.4525038156842793, "grad_norm": 0.6605796813964844, "learning_rate": 4.639480542856764e-05, "loss": 0.1266, "step": 39970 }, { "epoch": 1.4528672141870775, "grad_norm": 0.8681196570396423, "learning_rate": 4.639212043094651e-05, "loss": 0.1168, "step": 39980 }, { "epoch": 1.4532306126898757, "grad_norm": 0.7025002241134644, "learning_rate": 4.6389434511615015e-05, "loss": 0.1117, "step": 39990 }, { "epoch": 1.453594011192674, "grad_norm": 1.203703761100769, "learning_rate": 4.6386747670688897e-05, "loss": 0.1524, "step": 40000 }, { "epoch": 1.4539574096954722, "grad_norm": 2.274060010910034, "learning_rate": 4.638405990828391e-05, "loss": 0.113, "step": 40010 }, { "epoch": 1.4543208081982701, "grad_norm": 1.424842357635498, "learning_rate": 4.638137122451587e-05, "loss": 0.1986, "step": 40020 }, { "epoch": 1.4546842067010683, "grad_norm": 2.1440541744232178, "learning_rate": 4.637868161950062e-05, "loss": 0.1406, "step": 40030 }, { "epoch": 1.4550476052038666, "grad_norm": 0.9488077759742737, "learning_rate": 4.6375991093354035e-05, "loss": 0.1827, "step": 40040 }, { "epoch": 1.4554110037066648, "grad_norm": 7.7812724113464355, "learning_rate": 4.637329964619206e-05, "loss": 0.2488, "step": 40050 }, { "epoch": 1.455774402209463, "grad_norm": 1.2816716432571411, "learning_rate": 4.6370607278130646e-05, "loss": 0.1125, "step": 40060 }, { "epoch": 1.456137800712261, "grad_norm": 0.6140567660331726, "learning_rate": 4.63679139892858e-05, "loss": 0.1526, "step": 40070 }, { "epoch": 1.4565011992150592, "grad_norm": 1.3745895624160767, "learning_rate": 4.636521977977357e-05, "loss": 0.1406, "step": 40080 }, { "epoch": 1.4568645977178574, "grad_norm": 0.934893786907196, "learning_rate": 4.636252464971004e-05, "loss": 0.1236, "step": 40090 }, { "epoch": 1.4572279962206556, "grad_norm": 4.1601738929748535, "learning_rate": 4.635982859921132e-05, "loss": 0.1256, "step": 40100 }, { "epoch": 1.4575913947234538, "grad_norm": 1.3297815322875977, "learning_rate": 4.635713162839359e-05, "loss": 0.1076, "step": 40110 }, { "epoch": 1.457954793226252, "grad_norm": 1.6878186464309692, "learning_rate": 4.6354433737373055e-05, "loss": 0.2212, "step": 40120 }, { "epoch": 1.4583181917290502, "grad_norm": 1.2743428945541382, "learning_rate": 4.6351734926265946e-05, "loss": 0.105, "step": 40130 }, { "epoch": 1.4586815902318482, "grad_norm": 1.5052075386047363, "learning_rate": 4.634903519518854e-05, "loss": 0.234, "step": 40140 }, { "epoch": 1.4590449887346464, "grad_norm": 1.7959517240524292, "learning_rate": 4.634633454425718e-05, "loss": 0.1572, "step": 40150 }, { "epoch": 1.4594083872374446, "grad_norm": 0.578747034072876, "learning_rate": 4.63436329735882e-05, "loss": 0.1111, "step": 40160 }, { "epoch": 1.4597717857402428, "grad_norm": 0.3820185959339142, "learning_rate": 4.634093048329803e-05, "loss": 0.1589, "step": 40170 }, { "epoch": 1.460135184243041, "grad_norm": 1.621700406074524, "learning_rate": 4.633822707350309e-05, "loss": 0.1401, "step": 40180 }, { "epoch": 1.460498582745839, "grad_norm": 1.2941464185714722, "learning_rate": 4.633552274431987e-05, "loss": 0.1678, "step": 40190 }, { "epoch": 1.4608619812486372, "grad_norm": 0.9888546466827393, "learning_rate": 4.633281749586488e-05, "loss": 0.1649, "step": 40200 }, { "epoch": 1.4608619812486372, "eval_loss": 0.3407399654388428, "eval_runtime": 180.8259, "eval_samples_per_second": 41.001, "eval_steps_per_second": 5.126, "eval_wer": 0.17137437144879916, "step": 40200 }, { "epoch": 1.4612253797514354, "grad_norm": 0.6030024290084839, "learning_rate": 4.633011132825469e-05, "loss": 0.1086, "step": 40210 }, { "epoch": 1.4615887782542336, "grad_norm": 2.6498842239379883, "learning_rate": 4.63274042416059e-05, "loss": 0.1341, "step": 40220 }, { "epoch": 1.4619521767570318, "grad_norm": 1.0614917278289795, "learning_rate": 4.632469623603514e-05, "loss": 0.1197, "step": 40230 }, { "epoch": 1.4623155752598298, "grad_norm": 1.7546344995498657, "learning_rate": 4.63219873116591e-05, "loss": 0.1518, "step": 40240 }, { "epoch": 1.4626789737626282, "grad_norm": 2.726959705352783, "learning_rate": 4.631927746859448e-05, "loss": 0.1514, "step": 40250 }, { "epoch": 1.4630423722654262, "grad_norm": 1.0468662977218628, "learning_rate": 4.6316566706958055e-05, "loss": 0.1309, "step": 40260 }, { "epoch": 1.4634057707682244, "grad_norm": 0.7446948885917664, "learning_rate": 4.631385502686661e-05, "loss": 0.176, "step": 40270 }, { "epoch": 1.4637691692710226, "grad_norm": 2.804288148880005, "learning_rate": 4.6311142428436996e-05, "loss": 0.103, "step": 40280 }, { "epoch": 1.4641325677738208, "grad_norm": 1.1131904125213623, "learning_rate": 4.630842891178607e-05, "loss": 0.1168, "step": 40290 }, { "epoch": 1.464495966276619, "grad_norm": 1.084128975868225, "learning_rate": 4.6305714477030766e-05, "loss": 0.5731, "step": 40300 }, { "epoch": 1.464859364779417, "grad_norm": 2.9369328022003174, "learning_rate": 4.630299912428803e-05, "loss": 0.12, "step": 40310 }, { "epoch": 1.4652227632822152, "grad_norm": 0.449259489774704, "learning_rate": 4.630028285367485e-05, "loss": 0.1396, "step": 40320 }, { "epoch": 1.4655861617850134, "grad_norm": 0.6570121049880981, "learning_rate": 4.6297565665308276e-05, "loss": 0.1143, "step": 40330 }, { "epoch": 1.4659495602878116, "grad_norm": 0.9117491841316223, "learning_rate": 4.629484755930537e-05, "loss": 0.0996, "step": 40340 }, { "epoch": 1.4663129587906099, "grad_norm": 1.197102665901184, "learning_rate": 4.629212853578325e-05, "loss": 0.1522, "step": 40350 }, { "epoch": 1.4666763572934078, "grad_norm": 2.8265323638916016, "learning_rate": 4.6289408594859075e-05, "loss": 0.1239, "step": 40360 }, { "epoch": 1.467039755796206, "grad_norm": 0.4153755307197571, "learning_rate": 4.628668773665002e-05, "loss": 0.1448, "step": 40370 }, { "epoch": 1.4674031542990043, "grad_norm": 0.9625080823898315, "learning_rate": 4.628396596127335e-05, "loss": 0.1249, "step": 40380 }, { "epoch": 1.4677665528018025, "grad_norm": 3.2815330028533936, "learning_rate": 4.62812432688463e-05, "loss": 0.1238, "step": 40390 }, { "epoch": 1.4681299513046007, "grad_norm": 0.5435966849327087, "learning_rate": 4.627851965948619e-05, "loss": 0.149, "step": 40400 }, { "epoch": 1.4684933498073989, "grad_norm": 1.5212714672088623, "learning_rate": 4.6275795133310383e-05, "loss": 0.1458, "step": 40410 }, { "epoch": 1.468856748310197, "grad_norm": 0.9295603632926941, "learning_rate": 4.627306969043627e-05, "loss": 0.2099, "step": 40420 }, { "epoch": 1.469220146812995, "grad_norm": 1.0282838344573975, "learning_rate": 4.627034333098127e-05, "loss": 0.1408, "step": 40430 }, { "epoch": 1.4695835453157933, "grad_norm": 1.5155305862426758, "learning_rate": 4.6267616055062855e-05, "loss": 0.2335, "step": 40440 }, { "epoch": 1.4699469438185915, "grad_norm": 0.6780584454536438, "learning_rate": 4.626488786279854e-05, "loss": 0.1611, "step": 40450 }, { "epoch": 1.4703103423213897, "grad_norm": 1.1376898288726807, "learning_rate": 4.626215875430586e-05, "loss": 0.1282, "step": 40460 }, { "epoch": 1.470673740824188, "grad_norm": 9.535052299499512, "learning_rate": 4.6259428729702414e-05, "loss": 0.3186, "step": 40470 }, { "epoch": 1.4710371393269859, "grad_norm": 1.4366358518600464, "learning_rate": 4.625669778910582e-05, "loss": 0.1202, "step": 40480 }, { "epoch": 1.471400537829784, "grad_norm": 0.9380308985710144, "learning_rate": 4.625396593263376e-05, "loss": 0.1125, "step": 40490 }, { "epoch": 1.4717639363325823, "grad_norm": 1.2832533121109009, "learning_rate": 4.6251233160403916e-05, "loss": 2.9509, "step": 40500 }, { "epoch": 1.4721273348353805, "grad_norm": 3.405505895614624, "learning_rate": 4.624849947253406e-05, "loss": 0.1369, "step": 40510 }, { "epoch": 1.4724907333381787, "grad_norm": 0.7077997326850891, "learning_rate": 4.624576486914196e-05, "loss": 0.1233, "step": 40520 }, { "epoch": 1.4728541318409767, "grad_norm": 0.7341346740722656, "learning_rate": 4.624302935034545e-05, "loss": 0.1146, "step": 40530 }, { "epoch": 1.4732175303437751, "grad_norm": 0.9452338218688965, "learning_rate": 4.6240292916262376e-05, "loss": 0.1696, "step": 40540 }, { "epoch": 1.473580928846573, "grad_norm": 0.8162540197372437, "learning_rate": 4.623755556701066e-05, "loss": 0.138, "step": 40550 }, { "epoch": 1.4739443273493713, "grad_norm": 0.551977276802063, "learning_rate": 4.623481730270824e-05, "loss": 0.2382, "step": 40560 }, { "epoch": 1.4743077258521695, "grad_norm": 0.6273486018180847, "learning_rate": 4.623207812347309e-05, "loss": 0.1902, "step": 40570 }, { "epoch": 1.4746711243549677, "grad_norm": 0.5531787872314453, "learning_rate": 4.622933802942324e-05, "loss": 0.1103, "step": 40580 }, { "epoch": 1.475034522857766, "grad_norm": 0.4982399344444275, "learning_rate": 4.622659702067675e-05, "loss": 0.1081, "step": 40590 }, { "epoch": 1.475397921360564, "grad_norm": 0.9091404676437378, "learning_rate": 4.622385509735172e-05, "loss": 0.3927, "step": 40600 }, { "epoch": 1.4757613198633621, "grad_norm": 0.6481756567955017, "learning_rate": 4.622111225956629e-05, "loss": 0.1186, "step": 40610 }, { "epoch": 1.4761247183661603, "grad_norm": 1.1302157640457153, "learning_rate": 4.621836850743864e-05, "loss": 0.1667, "step": 40620 }, { "epoch": 1.4764881168689585, "grad_norm": 2.197112560272217, "learning_rate": 4.6215623841086974e-05, "loss": 0.1226, "step": 40630 }, { "epoch": 1.4768515153717567, "grad_norm": 3.960108995437622, "learning_rate": 4.621287826062957e-05, "loss": 0.1343, "step": 40640 }, { "epoch": 1.4772149138745547, "grad_norm": 1.0748779773712158, "learning_rate": 4.6210131766184714e-05, "loss": 0.1492, "step": 40650 }, { "epoch": 1.477578312377353, "grad_norm": 1.8779007196426392, "learning_rate": 4.620738435787075e-05, "loss": 0.1446, "step": 40660 }, { "epoch": 1.4779417108801511, "grad_norm": 0.5000749230384827, "learning_rate": 4.620463603580605e-05, "loss": 0.1762, "step": 40670 }, { "epoch": 1.4783051093829493, "grad_norm": 0.6756991147994995, "learning_rate": 4.620188680010903e-05, "loss": 0.1375, "step": 40680 }, { "epoch": 1.4786685078857476, "grad_norm": 0.7340139746665955, "learning_rate": 4.619941170692398e-05, "loss": 2.0738, "step": 40690 }, { "epoch": 1.4790319063885458, "grad_norm": 4.0170722007751465, "learning_rate": 4.6196660735651925e-05, "loss": 0.157, "step": 40700 }, { "epoch": 1.479395304891344, "grad_norm": 0.8254412412643433, "learning_rate": 4.619390885109118e-05, "loss": 0.1385, "step": 40710 }, { "epoch": 1.479758703394142, "grad_norm": 0.7477695345878601, "learning_rate": 4.619115605336031e-05, "loss": 0.1772, "step": 40720 }, { "epoch": 1.4801221018969402, "grad_norm": 0.9684391617774963, "learning_rate": 4.618840234257792e-05, "loss": 3.1891, "step": 40730 }, { "epoch": 1.4804855003997384, "grad_norm": 0.9600037336349487, "learning_rate": 4.6185647718862655e-05, "loss": 0.1289, "step": 40740 }, { "epoch": 1.4808488989025366, "grad_norm": 1.848919153213501, "learning_rate": 4.6182892182333226e-05, "loss": 0.1588, "step": 40750 }, { "epoch": 1.4812122974053348, "grad_norm": 1.707576036453247, "learning_rate": 4.6180135733108335e-05, "loss": 0.1578, "step": 40760 }, { "epoch": 1.4815756959081328, "grad_norm": 0.9908716678619385, "learning_rate": 4.617737837130675e-05, "loss": 0.1763, "step": 40770 }, { "epoch": 1.481939094410931, "grad_norm": 1.638818383216858, "learning_rate": 4.617462009704728e-05, "loss": 0.1323, "step": 40780 }, { "epoch": 1.4823024929137292, "grad_norm": 2.1605606079101562, "learning_rate": 4.6171860910448774e-05, "loss": 0.123, "step": 40790 }, { "epoch": 1.4826658914165274, "grad_norm": 0.7089453935623169, "learning_rate": 4.6169100811630106e-05, "loss": 0.1489, "step": 40800 }, { "epoch": 1.4826658914165274, "eval_loss": 0.3524834215641022, "eval_runtime": 180.6874, "eval_samples_per_second": 41.032, "eval_steps_per_second": 5.13, "eval_wer": 0.17247263419681594, "step": 40800 }, { "epoch": 1.4830292899193256, "grad_norm": 0.8966375589370728, "learning_rate": 4.616633980071021e-05, "loss": 0.1114, "step": 40810 }, { "epoch": 1.4833926884221236, "grad_norm": 0.8344945907592773, "learning_rate": 4.616357787780804e-05, "loss": 0.1765, "step": 40820 }, { "epoch": 1.483756086924922, "grad_norm": 1.2400190830230713, "learning_rate": 4.616081504304259e-05, "loss": 0.1805, "step": 40830 }, { "epoch": 1.48411948542772, "grad_norm": 0.9686151742935181, "learning_rate": 4.615805129653292e-05, "loss": 0.1259, "step": 40840 }, { "epoch": 1.4844828839305182, "grad_norm": 1.00034499168396, "learning_rate": 4.615528663839811e-05, "loss": 0.1632, "step": 40850 }, { "epoch": 1.4848462824333164, "grad_norm": 3.2312328815460205, "learning_rate": 4.6152521068757256e-05, "loss": 0.116, "step": 40860 }, { "epoch": 1.4852096809361146, "grad_norm": 0.4837055206298828, "learning_rate": 4.6149754587729535e-05, "loss": 0.1534, "step": 40870 }, { "epoch": 1.4855730794389128, "grad_norm": 8.643519401550293, "learning_rate": 4.614698719543413e-05, "loss": 0.1122, "step": 40880 }, { "epoch": 1.4859364779417108, "grad_norm": 0.9113799333572388, "learning_rate": 4.61442188919903e-05, "loss": 0.1046, "step": 40890 }, { "epoch": 1.486299876444509, "grad_norm": 0.7763462662696838, "learning_rate": 4.61414496775173e-05, "loss": 0.1515, "step": 40900 }, { "epoch": 1.4866632749473072, "grad_norm": 1.2019357681274414, "learning_rate": 4.6138679552134464e-05, "loss": 0.1372, "step": 40910 }, { "epoch": 1.4870266734501054, "grad_norm": 1.1948570013046265, "learning_rate": 4.6135908515961136e-05, "loss": 0.2073, "step": 40920 }, { "epoch": 1.4873900719529036, "grad_norm": 1.3027549982070923, "learning_rate": 4.6133136569116706e-05, "loss": 0.12, "step": 40930 }, { "epoch": 1.4877534704557016, "grad_norm": 1.4980496168136597, "learning_rate": 4.613036371172062e-05, "loss": 2.4225, "step": 40940 }, { "epoch": 1.4881168689584998, "grad_norm": 0.7265346050262451, "learning_rate": 4.612758994389234e-05, "loss": 0.1631, "step": 40950 }, { "epoch": 1.488480267461298, "grad_norm": 0.6485431790351868, "learning_rate": 4.612481526575138e-05, "loss": 0.1175, "step": 40960 }, { "epoch": 1.4888436659640962, "grad_norm": 0.9532496333122253, "learning_rate": 4.612203967741729e-05, "loss": 0.2368, "step": 40970 }, { "epoch": 1.4892070644668944, "grad_norm": 3.3696892261505127, "learning_rate": 4.6119263179009676e-05, "loss": 0.1388, "step": 40980 }, { "epoch": 1.4895704629696926, "grad_norm": 0.7628744840621948, "learning_rate": 4.611648577064814e-05, "loss": 0.1475, "step": 40990 }, { "epoch": 1.4899338614724909, "grad_norm": 1.4854507446289062, "learning_rate": 4.611370745245237e-05, "loss": 0.1717, "step": 41000 }, { "epoch": 1.4902972599752888, "grad_norm": 1.2280082702636719, "learning_rate": 4.6110928224542074e-05, "loss": 0.1389, "step": 41010 }, { "epoch": 1.490660658478087, "grad_norm": 0.5658448934555054, "learning_rate": 4.6108148087036984e-05, "loss": 0.1625, "step": 41020 }, { "epoch": 1.4910240569808852, "grad_norm": 1.1708754301071167, "learning_rate": 4.6105367040056903e-05, "loss": 0.1283, "step": 41030 }, { "epoch": 1.4913874554836835, "grad_norm": 1.175658106803894, "learning_rate": 4.610258508372165e-05, "loss": 0.1197, "step": 41040 }, { "epoch": 1.4917508539864817, "grad_norm": 1.0719672441482544, "learning_rate": 4.609980221815109e-05, "loss": 0.1361, "step": 41050 }, { "epoch": 1.4921142524892796, "grad_norm": 0.7982541918754578, "learning_rate": 4.6097018443465114e-05, "loss": 0.1302, "step": 41060 }, { "epoch": 1.4924776509920779, "grad_norm": 0.360454797744751, "learning_rate": 4.609423375978369e-05, "loss": 0.2231, "step": 41070 }, { "epoch": 1.492841049494876, "grad_norm": 0.64405757188797, "learning_rate": 4.609144816722678e-05, "loss": 0.1212, "step": 41080 }, { "epoch": 1.4932044479976743, "grad_norm": 0.7874402403831482, "learning_rate": 4.608866166591441e-05, "loss": 3.1348, "step": 41090 }, { "epoch": 1.4935678465004725, "grad_norm": 1.059163212776184, "learning_rate": 4.608587425596665e-05, "loss": 0.1464, "step": 41100 }, { "epoch": 1.4939312450032705, "grad_norm": 1.5717148780822754, "learning_rate": 4.608308593750359e-05, "loss": 0.1104, "step": 41110 }, { "epoch": 1.4942946435060689, "grad_norm": 0.6417020559310913, "learning_rate": 4.6080296710645365e-05, "loss": 0.1573, "step": 41120 }, { "epoch": 1.4946580420088669, "grad_norm": 0.8871016502380371, "learning_rate": 4.607750657551216e-05, "loss": 0.1087, "step": 41130 }, { "epoch": 1.495021440511665, "grad_norm": 2.3125686645507812, "learning_rate": 4.6074715532224196e-05, "loss": 0.1379, "step": 41140 }, { "epoch": 1.4953848390144633, "grad_norm": 2.087214708328247, "learning_rate": 4.607192358090172e-05, "loss": 0.1311, "step": 41150 }, { "epoch": 1.4957482375172615, "grad_norm": 3.1915369033813477, "learning_rate": 4.6069130721665035e-05, "loss": 0.1231, "step": 41160 }, { "epoch": 1.4961116360200597, "grad_norm": 0.4626937508583069, "learning_rate": 4.606633695463447e-05, "loss": 0.1544, "step": 41170 }, { "epoch": 1.4964750345228577, "grad_norm": 161.15541076660156, "learning_rate": 4.6063542279930395e-05, "loss": 3.413, "step": 41180 }, { "epoch": 1.496838433025656, "grad_norm": 0.9905474185943604, "learning_rate": 4.606074669767323e-05, "loss": 0.1285, "step": 41190 }, { "epoch": 1.497201831528454, "grad_norm": 0.5389920473098755, "learning_rate": 4.6057950207983426e-05, "loss": 0.1184, "step": 41200 }, { "epoch": 1.4975652300312523, "grad_norm": 2.7976090908050537, "learning_rate": 4.605515281098147e-05, "loss": 0.2461, "step": 41210 }, { "epoch": 1.4979286285340505, "grad_norm": 0.4971259534358978, "learning_rate": 4.60523545067879e-05, "loss": 0.2034, "step": 41220 }, { "epoch": 1.4982920270368485, "grad_norm": 1.5046378374099731, "learning_rate": 4.6049555295523274e-05, "loss": 0.1342, "step": 41230 }, { "epoch": 1.4986554255396467, "grad_norm": 1.337195634841919, "learning_rate": 4.60467551773082e-05, "loss": 0.1198, "step": 41240 }, { "epoch": 1.499018824042445, "grad_norm": 1.2729612588882446, "learning_rate": 4.6043954152263336e-05, "loss": 0.1312, "step": 41250 }, { "epoch": 1.4993822225452431, "grad_norm": 0.9693030714988708, "learning_rate": 4.6041152220509365e-05, "loss": 0.1554, "step": 41260 }, { "epoch": 1.4997456210480413, "grad_norm": 0.48035889863967896, "learning_rate": 4.6038349382167e-05, "loss": 0.1611, "step": 41270 }, { "epoch": 1.5001090195508393, "grad_norm": 1.015608787536621, "learning_rate": 4.603554563735702e-05, "loss": 0.1322, "step": 41280 }, { "epoch": 1.5004724180536377, "grad_norm": 1.900895595550537, "learning_rate": 4.603274098620023e-05, "loss": 0.1819, "step": 41290 }, { "epoch": 1.5008358165564357, "grad_norm": 1.780765414237976, "learning_rate": 4.602993542881745e-05, "loss": 0.1396, "step": 41300 }, { "epoch": 1.501199215059234, "grad_norm": 3.2523162364959717, "learning_rate": 4.602712896532959e-05, "loss": 0.1333, "step": 41310 }, { "epoch": 1.5015626135620321, "grad_norm": 0.344933420419693, "learning_rate": 4.6024321595857554e-05, "loss": 0.1417, "step": 41320 }, { "epoch": 1.5019260120648303, "grad_norm": 0.7336893081665039, "learning_rate": 4.6021513320522304e-05, "loss": 0.1551, "step": 41330 }, { "epoch": 1.5022894105676285, "grad_norm": 0.9252750873565674, "learning_rate": 4.601870413944484e-05, "loss": 0.1049, "step": 41340 }, { "epoch": 1.5026528090704265, "grad_norm": 2.0064470767974854, "learning_rate": 4.60158940527462e-05, "loss": 0.1593, "step": 41350 }, { "epoch": 1.503016207573225, "grad_norm": 1.2280207872390747, "learning_rate": 4.601308306054746e-05, "loss": 0.1276, "step": 41360 }, { "epoch": 1.503379606076023, "grad_norm": 0.7326213717460632, "learning_rate": 4.601027116296974e-05, "loss": 0.1853, "step": 41370 }, { "epoch": 1.5037430045788212, "grad_norm": 2.208380937576294, "learning_rate": 4.600745836013418e-05, "loss": 0.1343, "step": 41380 }, { "epoch": 1.5041064030816194, "grad_norm": 0.7113050818443298, "learning_rate": 4.6004644652161996e-05, "loss": 0.198, "step": 41390 }, { "epoch": 1.5044698015844173, "grad_norm": 1.8392037153244019, "learning_rate": 4.60018300391744e-05, "loss": 0.1283, "step": 41400 }, { "epoch": 1.5044698015844173, "eval_loss": 0.35653889179229736, "eval_runtime": 180.3372, "eval_samples_per_second": 41.112, "eval_steps_per_second": 5.14, "eval_wer": 0.16764390872619675, "step": 41400 }, { "epoch": 1.5048332000872158, "grad_norm": 1.0530060529708862, "learning_rate": 4.5999014521292674e-05, "loss": 0.1072, "step": 41410 }, { "epoch": 1.5051965985900138, "grad_norm": 1.0648863315582275, "learning_rate": 4.599619809863813e-05, "loss": 0.1939, "step": 41420 }, { "epoch": 1.505559997092812, "grad_norm": 1.4178556203842163, "learning_rate": 4.599338077133212e-05, "loss": 0.1325, "step": 41430 }, { "epoch": 1.5059233955956102, "grad_norm": 0.6156584024429321, "learning_rate": 4.5990562539496015e-05, "loss": 0.1337, "step": 41440 }, { "epoch": 1.5062867940984082, "grad_norm": 0.9399839639663696, "learning_rate": 4.598774340325126e-05, "loss": 0.1697, "step": 41450 }, { "epoch": 1.5066501926012066, "grad_norm": 0.9702737927436829, "learning_rate": 4.598492336271931e-05, "loss": 0.0978, "step": 41460 }, { "epoch": 1.5070135911040046, "grad_norm": 0.8199527263641357, "learning_rate": 4.598210241802169e-05, "loss": 0.185, "step": 41470 }, { "epoch": 1.5073769896068028, "grad_norm": 1.122827172279358, "learning_rate": 4.597928056927993e-05, "loss": 0.1234, "step": 41480 }, { "epoch": 1.507740388109601, "grad_norm": 1.9142221212387085, "learning_rate": 4.5976457816615606e-05, "loss": 0.1346, "step": 41490 }, { "epoch": 1.5081037866123992, "grad_norm": 1.0756717920303345, "learning_rate": 4.5973634160150345e-05, "loss": 0.1431, "step": 41500 }, { "epoch": 1.5084671851151974, "grad_norm": 1.6231876611709595, "learning_rate": 4.5970809600005826e-05, "loss": 0.1608, "step": 41510 }, { "epoch": 1.5088305836179954, "grad_norm": 0.3704961836338043, "learning_rate": 4.596798413630373e-05, "loss": 0.1501, "step": 41520 }, { "epoch": 1.5091939821207938, "grad_norm": 0.7752798199653625, "learning_rate": 4.59651577691658e-05, "loss": 0.1344, "step": 41530 }, { "epoch": 1.5095573806235918, "grad_norm": 2.622103214263916, "learning_rate": 4.596233049871382e-05, "loss": 0.1232, "step": 41540 }, { "epoch": 1.50992077912639, "grad_norm": 0.4142579436302185, "learning_rate": 4.595950232506961e-05, "loss": 0.1227, "step": 41550 }, { "epoch": 1.5102841776291882, "grad_norm": 0.9995001554489136, "learning_rate": 4.5956673248355e-05, "loss": 0.1143, "step": 41560 }, { "epoch": 1.5106475761319862, "grad_norm": 2.1356821060180664, "learning_rate": 4.595384326869191e-05, "loss": 0.1969, "step": 41570 }, { "epoch": 1.5110109746347846, "grad_norm": 0.9950689673423767, "learning_rate": 4.5951012386202274e-05, "loss": 0.1362, "step": 41580 }, { "epoch": 1.5113743731375826, "grad_norm": 0.6441085934638977, "learning_rate": 4.5948180601008054e-05, "loss": 0.1557, "step": 41590 }, { "epoch": 1.5117377716403808, "grad_norm": 2.1033713817596436, "learning_rate": 4.594534791323127e-05, "loss": 0.1718, "step": 41600 }, { "epoch": 1.512101170143179, "grad_norm": 1.3968003988265991, "learning_rate": 4.5942514322993965e-05, "loss": 0.2915, "step": 41610 }, { "epoch": 1.5124645686459772, "grad_norm": 0.7833322882652283, "learning_rate": 4.593967983041823e-05, "loss": 0.1379, "step": 41620 }, { "epoch": 1.5128279671487754, "grad_norm": 1.0050405263900757, "learning_rate": 4.5936844435626196e-05, "loss": 0.1307, "step": 41630 }, { "epoch": 1.5131913656515734, "grad_norm": 1.9530189037322998, "learning_rate": 4.593400813874003e-05, "loss": 0.139, "step": 41640 }, { "epoch": 1.5135547641543718, "grad_norm": 0.45743170380592346, "learning_rate": 4.593117093988194e-05, "loss": 0.1422, "step": 41650 }, { "epoch": 1.5139181626571698, "grad_norm": 1.310746431350708, "learning_rate": 4.592833283917416e-05, "loss": 1.58, "step": 41660 }, { "epoch": 1.514281561159968, "grad_norm": 0.6696259379386902, "learning_rate": 4.592549383673898e-05, "loss": 0.1466, "step": 41670 }, { "epoch": 1.5146449596627662, "grad_norm": 1.0350476503372192, "learning_rate": 4.5922653932698734e-05, "loss": 0.1114, "step": 41680 }, { "epoch": 1.5150083581655642, "grad_norm": 1.5413391590118408, "learning_rate": 4.591981312717577e-05, "loss": 0.1225, "step": 41690 }, { "epoch": 1.5153717566683627, "grad_norm": 0.8129068613052368, "learning_rate": 4.5916971420292485e-05, "loss": 0.1951, "step": 41700 }, { "epoch": 1.5157351551711606, "grad_norm": 1.1114506721496582, "learning_rate": 4.591412881217133e-05, "loss": 0.1227, "step": 41710 }, { "epoch": 1.5160985536739588, "grad_norm": 0.5106993317604065, "learning_rate": 4.5911285302934775e-05, "loss": 0.1985, "step": 41720 }, { "epoch": 1.516461952176757, "grad_norm": 1.2125110626220703, "learning_rate": 4.590844089270534e-05, "loss": 0.1233, "step": 41730 }, { "epoch": 1.516825350679555, "grad_norm": 1.3580394983291626, "learning_rate": 4.590559558160558e-05, "loss": 0.1227, "step": 41740 }, { "epoch": 1.5171887491823535, "grad_norm": 0.4338432252407074, "learning_rate": 4.590274936975809e-05, "loss": 0.1462, "step": 41750 }, { "epoch": 1.5175521476851515, "grad_norm": 0.9010568857192993, "learning_rate": 4.58999022572855e-05, "loss": 0.1372, "step": 41760 }, { "epoch": 1.5179155461879497, "grad_norm": 0.737705647945404, "learning_rate": 4.589705424431048e-05, "loss": 0.1538, "step": 41770 }, { "epoch": 1.5182789446907479, "grad_norm": 1.0285004377365112, "learning_rate": 4.589420533095575e-05, "loss": 0.1101, "step": 41780 }, { "epoch": 1.518642343193546, "grad_norm": 0.5717383027076721, "learning_rate": 4.589135551734405e-05, "loss": 0.1157, "step": 41790 }, { "epoch": 1.5190057416963443, "grad_norm": 1.1417220830917358, "learning_rate": 4.588850480359818e-05, "loss": 0.1359, "step": 41800 }, { "epoch": 1.5193691401991423, "grad_norm": 2.673459768295288, "learning_rate": 4.588565318984095e-05, "loss": 0.1238, "step": 41810 }, { "epoch": 1.5197325387019407, "grad_norm": 1.1211605072021484, "learning_rate": 4.588280067619524e-05, "loss": 0.1642, "step": 41820 }, { "epoch": 1.5200959372047387, "grad_norm": 2.358137369155884, "learning_rate": 4.587994726278395e-05, "loss": 0.1234, "step": 41830 }, { "epoch": 1.5204593357075369, "grad_norm": 0.8301489949226379, "learning_rate": 4.587709294973002e-05, "loss": 0.1274, "step": 41840 }, { "epoch": 1.520822734210335, "grad_norm": 2.1138226985931396, "learning_rate": 4.587423773715644e-05, "loss": 0.1326, "step": 41850 }, { "epoch": 1.521186132713133, "grad_norm": 0.7757201194763184, "learning_rate": 4.587138162518623e-05, "loss": 0.1183, "step": 41860 }, { "epoch": 1.5215495312159315, "grad_norm": 0.7807698249816895, "learning_rate": 4.586852461394243e-05, "loss": 0.1485, "step": 41870 }, { "epoch": 1.5219129297187295, "grad_norm": 2.2938053607940674, "learning_rate": 4.586566670354817e-05, "loss": 0.1152, "step": 41880 }, { "epoch": 1.5222763282215277, "grad_norm": 1.2340235710144043, "learning_rate": 4.5862807894126566e-05, "loss": 0.1766, "step": 41890 }, { "epoch": 1.522639726724326, "grad_norm": 0.9382178783416748, "learning_rate": 4.5859948185800806e-05, "loss": 0.1273, "step": 41900 }, { "epoch": 1.523003125227124, "grad_norm": 4.5072526931762695, "learning_rate": 4.58570875786941e-05, "loss": 0.1333, "step": 41910 }, { "epoch": 1.5233665237299223, "grad_norm": 0.41228216886520386, "learning_rate": 4.5854226072929696e-05, "loss": 0.1766, "step": 41920 }, { "epoch": 1.5237299222327203, "grad_norm": 0.869669497013092, "learning_rate": 4.5851363668630886e-05, "loss": 0.1271, "step": 41930 }, { "epoch": 1.5240933207355187, "grad_norm": 1.169318675994873, "learning_rate": 4.584850036592101e-05, "loss": 0.083, "step": 41940 }, { "epoch": 1.5244567192383167, "grad_norm": 3.336904287338257, "learning_rate": 4.5845636164923426e-05, "loss": 0.1357, "step": 41950 }, { "epoch": 1.524820117741115, "grad_norm": 1.167758584022522, "learning_rate": 4.584277106576156e-05, "loss": 0.1162, "step": 41960 }, { "epoch": 1.5251835162439131, "grad_norm": 0.9635423421859741, "learning_rate": 4.5839905068558835e-05, "loss": 0.2177, "step": 41970 }, { "epoch": 1.5255469147467111, "grad_norm": 1.3818042278289795, "learning_rate": 4.583703817343876e-05, "loss": 0.1246, "step": 41980 }, { "epoch": 1.5259103132495095, "grad_norm": 1.1299431324005127, "learning_rate": 4.583417038052484e-05, "loss": 0.1359, "step": 41990 }, { "epoch": 1.5262737117523075, "grad_norm": 2.181351661682129, "learning_rate": 4.583130168994065e-05, "loss": 0.1706, "step": 42000 }, { "epoch": 1.5262737117523075, "eval_loss": 0.3528802692890167, "eval_runtime": 181.0519, "eval_samples_per_second": 40.95, "eval_steps_per_second": 5.12, "eval_wer": 0.17613956105796286, "step": 42000 }, { "epoch": 1.5266371102551057, "grad_norm": 1.0958346128463745, "learning_rate": 4.582843210180979e-05, "loss": 0.1187, "step": 42010 }, { "epoch": 1.527000508757904, "grad_norm": 0.463438481092453, "learning_rate": 4.58255616162559e-05, "loss": 0.1539, "step": 42020 }, { "epoch": 1.527363907260702, "grad_norm": 0.5655350685119629, "learning_rate": 4.5822690233402656e-05, "loss": 0.1503, "step": 42030 }, { "epoch": 1.5277273057635004, "grad_norm": 1.5692224502563477, "learning_rate": 4.5819817953373764e-05, "loss": 0.1219, "step": 42040 }, { "epoch": 1.5280907042662983, "grad_norm": 0.48884958028793335, "learning_rate": 4.5816944776293016e-05, "loss": 0.1455, "step": 42050 }, { "epoch": 1.5284541027690965, "grad_norm": 0.8623284697532654, "learning_rate": 4.5814070702284175e-05, "loss": 0.1498, "step": 42060 }, { "epoch": 1.5288175012718948, "grad_norm": 0.5985013246536255, "learning_rate": 4.581119573147108e-05, "loss": 0.4594, "step": 42070 }, { "epoch": 1.529180899774693, "grad_norm": 0.9812720417976379, "learning_rate": 4.580831986397761e-05, "loss": 0.1234, "step": 42080 }, { "epoch": 1.5295442982774912, "grad_norm": 0.5680709481239319, "learning_rate": 4.5805443099927666e-05, "loss": 0.1061, "step": 42090 }, { "epoch": 1.5299076967802892, "grad_norm": 0.6387588977813721, "learning_rate": 4.5802565439445225e-05, "loss": 0.1436, "step": 42100 }, { "epoch": 1.5302710952830876, "grad_norm": 1.1865098476409912, "learning_rate": 4.5799686882654236e-05, "loss": 0.1155, "step": 42110 }, { "epoch": 1.5306344937858856, "grad_norm": 0.7588171362876892, "learning_rate": 4.579680742967875e-05, "loss": 0.1799, "step": 42120 }, { "epoch": 1.5309978922886838, "grad_norm": 0.9183505773544312, "learning_rate": 4.579392708064283e-05, "loss": 0.1133, "step": 42130 }, { "epoch": 1.531361290791482, "grad_norm": 1.1988872289657593, "learning_rate": 4.5791045835670575e-05, "loss": 0.1107, "step": 42140 }, { "epoch": 1.53172468929428, "grad_norm": 0.6209965944290161, "learning_rate": 4.578816369488613e-05, "loss": 0.1518, "step": 42150 }, { "epoch": 1.5320880877970784, "grad_norm": 1.3487142324447632, "learning_rate": 4.5785280658413674e-05, "loss": 0.1126, "step": 42160 }, { "epoch": 1.5324514862998764, "grad_norm": 0.6516602039337158, "learning_rate": 4.578239672637743e-05, "loss": 0.1498, "step": 42170 }, { "epoch": 1.5328148848026746, "grad_norm": 2.4193315505981445, "learning_rate": 4.577951189890166e-05, "loss": 0.1408, "step": 42180 }, { "epoch": 1.5331782833054728, "grad_norm": 0.6747106313705444, "learning_rate": 4.577662617611065e-05, "loss": 0.1226, "step": 42190 }, { "epoch": 1.533541681808271, "grad_norm": 3.124244451522827, "learning_rate": 4.5773739558128744e-05, "loss": 0.1512, "step": 42200 }, { "epoch": 1.5339050803110692, "grad_norm": 0.8625807762145996, "learning_rate": 4.5770852045080314e-05, "loss": 0.1187, "step": 42210 }, { "epoch": 1.5342684788138672, "grad_norm": 0.9007976651191711, "learning_rate": 4.576796363708977e-05, "loss": 0.2001, "step": 42220 }, { "epoch": 1.5346318773166656, "grad_norm": 0.7381039261817932, "learning_rate": 4.576507433428157e-05, "loss": 0.1063, "step": 42230 }, { "epoch": 1.5349952758194636, "grad_norm": 0.9550501704216003, "learning_rate": 4.57621841367802e-05, "loss": 0.1448, "step": 42240 }, { "epoch": 1.5353586743222618, "grad_norm": 0.5087346434593201, "learning_rate": 4.5759293044710175e-05, "loss": 0.1665, "step": 42250 }, { "epoch": 1.53572207282506, "grad_norm": 0.4684658646583557, "learning_rate": 4.575640105819609e-05, "loss": 0.1089, "step": 42260 }, { "epoch": 1.536085471327858, "grad_norm": 0.6353893876075745, "learning_rate": 4.575350817736252e-05, "loss": 0.2437, "step": 42270 }, { "epoch": 1.5364488698306564, "grad_norm": 0.7524349689483643, "learning_rate": 4.575061440233414e-05, "loss": 0.1858, "step": 42280 }, { "epoch": 1.5368122683334544, "grad_norm": 0.9425112009048462, "learning_rate": 4.57477197332356e-05, "loss": 0.0948, "step": 42290 }, { "epoch": 1.5371756668362526, "grad_norm": 1.419872522354126, "learning_rate": 4.574482417019165e-05, "loss": 0.1272, "step": 42300 }, { "epoch": 1.5375390653390508, "grad_norm": 0.6511875987052917, "learning_rate": 4.574192771332703e-05, "loss": 0.176, "step": 42310 }, { "epoch": 1.5379024638418488, "grad_norm": 1.2612382173538208, "learning_rate": 4.573903036276655e-05, "loss": 0.1681, "step": 42320 }, { "epoch": 1.5382658623446472, "grad_norm": 0.828471839427948, "learning_rate": 4.573613211863504e-05, "loss": 0.1218, "step": 42330 }, { "epoch": 1.5386292608474452, "grad_norm": 0.7098140716552734, "learning_rate": 4.573323298105737e-05, "loss": 0.1264, "step": 42340 }, { "epoch": 1.5389926593502434, "grad_norm": 0.612920343875885, "learning_rate": 4.573033295015847e-05, "loss": 0.1457, "step": 42350 }, { "epoch": 1.5393560578530416, "grad_norm": 2.700010299682617, "learning_rate": 4.572743202606328e-05, "loss": 0.1416, "step": 42360 }, { "epoch": 1.5397194563558398, "grad_norm": 0.4544985890388489, "learning_rate": 4.5724530208896784e-05, "loss": 0.2174, "step": 42370 }, { "epoch": 1.540082854858638, "grad_norm": 1.7702118158340454, "learning_rate": 4.5721627498784025e-05, "loss": 0.7935, "step": 42380 }, { "epoch": 1.540446253361436, "grad_norm": 2.3855764865875244, "learning_rate": 4.571872389585007e-05, "loss": 0.1142, "step": 42390 }, { "epoch": 1.5408096518642345, "grad_norm": 1.9382286071777344, "learning_rate": 4.5715819400220004e-05, "loss": 0.1349, "step": 42400 }, { "epoch": 1.5411730503670324, "grad_norm": 1.8577841520309448, "learning_rate": 4.5712914012019003e-05, "loss": 0.1154, "step": 42410 }, { "epoch": 1.5415364488698307, "grad_norm": 1.4880726337432861, "learning_rate": 4.571000773137223e-05, "loss": 0.1402, "step": 42420 }, { "epoch": 1.5418998473726289, "grad_norm": 0.6903501152992249, "learning_rate": 4.570710055840491e-05, "loss": 0.1137, "step": 42430 }, { "epoch": 1.5422632458754268, "grad_norm": 1.4438791275024414, "learning_rate": 4.57041924932423e-05, "loss": 0.1285, "step": 42440 }, { "epoch": 1.5426266443782253, "grad_norm": 0.41870322823524475, "learning_rate": 4.57012835360097e-05, "loss": 0.1366, "step": 42450 }, { "epoch": 1.5429900428810233, "grad_norm": 0.9365738034248352, "learning_rate": 4.569837368683245e-05, "loss": 0.1051, "step": 42460 }, { "epoch": 1.5433534413838215, "grad_norm": 1.940673828125, "learning_rate": 4.569546294583593e-05, "loss": 0.157, "step": 42470 }, { "epoch": 1.5437168398866197, "grad_norm": 1.1944515705108643, "learning_rate": 4.5692551313145536e-05, "loss": 1.5159, "step": 42480 }, { "epoch": 1.5440802383894179, "grad_norm": 0.6140870451927185, "learning_rate": 4.568963878888673e-05, "loss": 0.0986, "step": 42490 }, { "epoch": 1.544443636892216, "grad_norm": 1.8208271265029907, "learning_rate": 4.5686725373185016e-05, "loss": 0.1519, "step": 42500 }, { "epoch": 1.544807035395014, "grad_norm": 1.2457455396652222, "learning_rate": 4.56838110661659e-05, "loss": 0.1634, "step": 42510 }, { "epoch": 1.5451704338978125, "grad_norm": 0.5140019655227661, "learning_rate": 4.568089586795496e-05, "loss": 0.1628, "step": 42520 }, { "epoch": 1.5455338324006105, "grad_norm": 0.8539334535598755, "learning_rate": 4.5677979778677796e-05, "loss": 0.1243, "step": 42530 }, { "epoch": 1.5458972309034087, "grad_norm": 1.2581802606582642, "learning_rate": 4.567506279846006e-05, "loss": 0.1715, "step": 42540 }, { "epoch": 1.546260629406207, "grad_norm": 1.8808507919311523, "learning_rate": 4.567214492742743e-05, "loss": 0.1415, "step": 42550 }, { "epoch": 1.5466240279090049, "grad_norm": 1.948970079421997, "learning_rate": 4.566922616570562e-05, "loss": 0.1147, "step": 42560 }, { "epoch": 1.5469874264118033, "grad_norm": 1.5000864267349243, "learning_rate": 4.566630651342041e-05, "loss": 0.1614, "step": 42570 }, { "epoch": 1.5473508249146013, "grad_norm": 1.5625576972961426, "learning_rate": 4.566338597069757e-05, "loss": 0.1145, "step": 42580 }, { "epoch": 1.5477142234173995, "grad_norm": 1.2443382740020752, "learning_rate": 4.566046453766295e-05, "loss": 0.1203, "step": 42590 }, { "epoch": 1.5480776219201977, "grad_norm": 1.5014569759368896, "learning_rate": 4.5657542214442426e-05, "loss": 0.1459, "step": 42600 }, { "epoch": 1.5480776219201977, "eval_loss": 0.351544588804245, "eval_runtime": 180.3519, "eval_samples_per_second": 41.109, "eval_steps_per_second": 5.14, "eval_wer": 0.17858115344818196, "step": 42600 }, { "epoch": 1.5484410204229957, "grad_norm": 1.0584172010421753, "learning_rate": 4.565461900116191e-05, "loss": 0.1046, "step": 42610 }, { "epoch": 1.5488044189257941, "grad_norm": 0.6157267689704895, "learning_rate": 4.565169489794735e-05, "loss": 0.135, "step": 42620 }, { "epoch": 1.549167817428592, "grad_norm": 0.898263692855835, "learning_rate": 4.564876990492474e-05, "loss": 0.1157, "step": 42630 }, { "epoch": 1.5495312159313903, "grad_norm": 0.6782193779945374, "learning_rate": 4.5645844022220096e-05, "loss": 0.2191, "step": 42640 }, { "epoch": 1.5498946144341885, "grad_norm": 0.6636195182800293, "learning_rate": 4.5642917249959493e-05, "loss": 0.1709, "step": 42650 }, { "epoch": 1.5502580129369867, "grad_norm": 1.3367676734924316, "learning_rate": 4.563998958826904e-05, "loss": 0.1197, "step": 42660 }, { "epoch": 1.550621411439785, "grad_norm": 0.470985472202301, "learning_rate": 4.563706103727486e-05, "loss": 0.1395, "step": 42670 }, { "epoch": 1.550984809942583, "grad_norm": 1.1232322454452515, "learning_rate": 4.563413159710316e-05, "loss": 0.1139, "step": 42680 }, { "epoch": 1.5513482084453813, "grad_norm": 1.0105756521224976, "learning_rate": 4.563120126788013e-05, "loss": 0.1243, "step": 42690 }, { "epoch": 1.5517116069481793, "grad_norm": 0.785205602645874, "learning_rate": 4.562827004973206e-05, "loss": 0.1588, "step": 42700 }, { "epoch": 1.5520750054509775, "grad_norm": 1.4863699674606323, "learning_rate": 4.5625337942785224e-05, "loss": 0.0913, "step": 42710 }, { "epoch": 1.5524384039537757, "grad_norm": 0.33174383640289307, "learning_rate": 4.562240494716596e-05, "loss": 0.158, "step": 42720 }, { "epoch": 1.5528018024565737, "grad_norm": 0.7735195159912109, "learning_rate": 4.5619471063000644e-05, "loss": 0.1295, "step": 42730 }, { "epoch": 1.5531652009593722, "grad_norm": 3.2964320182800293, "learning_rate": 4.561653629041568e-05, "loss": 0.1144, "step": 42740 }, { "epoch": 1.5535285994621701, "grad_norm": 0.6756449937820435, "learning_rate": 4.5613600629537526e-05, "loss": 0.119, "step": 42750 }, { "epoch": 1.5538919979649684, "grad_norm": 1.7608799934387207, "learning_rate": 4.5610664080492655e-05, "loss": 0.1239, "step": 42760 }, { "epoch": 1.5542553964677666, "grad_norm": 0.8312143087387085, "learning_rate": 4.5607726643407614e-05, "loss": 0.1434, "step": 42770 }, { "epoch": 1.5546187949705648, "grad_norm": 1.3083513975143433, "learning_rate": 4.560478831840894e-05, "loss": 0.125, "step": 42780 }, { "epoch": 1.554982193473363, "grad_norm": 1.4495130777359009, "learning_rate": 4.560184910562326e-05, "loss": 0.1172, "step": 42790 }, { "epoch": 1.555345591976161, "grad_norm": 0.5549319982528687, "learning_rate": 4.559890900517721e-05, "loss": 1.7985, "step": 42800 }, { "epoch": 1.5557089904789594, "grad_norm": 1.0677647590637207, "learning_rate": 4.5595968017197446e-05, "loss": 0.1485, "step": 42810 }, { "epoch": 1.5560723889817574, "grad_norm": 0.5432078242301941, "learning_rate": 4.559302614181071e-05, "loss": 0.1372, "step": 42820 }, { "epoch": 1.5564357874845556, "grad_norm": 2.0982048511505127, "learning_rate": 4.559008337914375e-05, "loss": 0.1543, "step": 42830 }, { "epoch": 1.5567991859873538, "grad_norm": 2.8568451404571533, "learning_rate": 4.558713972932335e-05, "loss": 0.1271, "step": 42840 }, { "epoch": 1.5571625844901518, "grad_norm": 0.9933029413223267, "learning_rate": 4.558419519247635e-05, "loss": 1.0891, "step": 42850 }, { "epoch": 1.5575259829929502, "grad_norm": 0.6010461449623108, "learning_rate": 4.5581249768729614e-05, "loss": 0.1509, "step": 42860 }, { "epoch": 1.5578893814957482, "grad_norm": 0.6242499351501465, "learning_rate": 4.557830345821006e-05, "loss": 0.1527, "step": 42870 }, { "epoch": 1.5582527799985464, "grad_norm": 0.48831334710121155, "learning_rate": 4.557535626104463e-05, "loss": 0.1451, "step": 42880 }, { "epoch": 1.5586161785013446, "grad_norm": 1.1660668849945068, "learning_rate": 4.55724081773603e-05, "loss": 0.1558, "step": 42890 }, { "epoch": 1.5589795770041426, "grad_norm": 1.067808747291565, "learning_rate": 4.5569459207284106e-05, "loss": 0.1634, "step": 42900 }, { "epoch": 1.559342975506941, "grad_norm": 1.6434768438339233, "learning_rate": 4.556650935094309e-05, "loss": 0.1269, "step": 42910 }, { "epoch": 1.559706374009739, "grad_norm": 0.4303635358810425, "learning_rate": 4.556355860846437e-05, "loss": 0.1536, "step": 42920 }, { "epoch": 1.5600697725125372, "grad_norm": 3.148212194442749, "learning_rate": 4.5560606979975075e-05, "loss": 0.1062, "step": 42930 }, { "epoch": 1.5604331710153354, "grad_norm": 3.3599109649658203, "learning_rate": 4.5557654465602376e-05, "loss": 0.1158, "step": 42940 }, { "epoch": 1.5607965695181336, "grad_norm": 3.2170286178588867, "learning_rate": 4.5554701065473494e-05, "loss": 0.1491, "step": 42950 }, { "epoch": 1.5611599680209318, "grad_norm": 1.1147798299789429, "learning_rate": 4.555174677971567e-05, "loss": 0.1143, "step": 42960 }, { "epoch": 1.5615233665237298, "grad_norm": 0.4949367046356201, "learning_rate": 4.5548791608456206e-05, "loss": 0.1639, "step": 42970 }, { "epoch": 1.5618867650265282, "grad_norm": 0.7166339755058289, "learning_rate": 4.554583555182244e-05, "loss": 0.137, "step": 42980 }, { "epoch": 1.5622501635293262, "grad_norm": 0.48903581500053406, "learning_rate": 4.55428786099417e-05, "loss": 0.126, "step": 42990 }, { "epoch": 1.5626135620321244, "grad_norm": 0.43728914856910706, "learning_rate": 4.553992078294142e-05, "loss": 0.1371, "step": 43000 }, { "epoch": 1.5629769605349226, "grad_norm": 0.7486665844917297, "learning_rate": 4.5536962070949035e-05, "loss": 0.1233, "step": 43010 }, { "epoch": 1.5633403590377206, "grad_norm": 0.7540434002876282, "learning_rate": 4.5534002474092025e-05, "loss": 0.1356, "step": 43020 }, { "epoch": 1.563703757540519, "grad_norm": 1.2763710021972656, "learning_rate": 4.55310419924979e-05, "loss": 0.1234, "step": 43030 }, { "epoch": 1.564067156043317, "grad_norm": 0.5709404945373535, "learning_rate": 4.552808062629424e-05, "loss": 0.1224, "step": 43040 }, { "epoch": 1.5644305545461152, "grad_norm": 0.5243006348609924, "learning_rate": 4.552511837560862e-05, "loss": 0.1175, "step": 43050 }, { "epoch": 1.5647939530489134, "grad_norm": 1.3225644826889038, "learning_rate": 4.552215524056867e-05, "loss": 0.1408, "step": 43060 }, { "epoch": 1.5651573515517117, "grad_norm": 0.2830749452114105, "learning_rate": 4.551919122130208e-05, "loss": 0.1588, "step": 43070 }, { "epoch": 1.5655207500545099, "grad_norm": 1.7666617631912231, "learning_rate": 4.551622631793654e-05, "loss": 0.109, "step": 43080 }, { "epoch": 1.5658841485573078, "grad_norm": 0.6468254327774048, "learning_rate": 4.551326053059981e-05, "loss": 0.1199, "step": 43090 }, { "epoch": 1.5662475470601063, "grad_norm": 0.7526164650917053, "learning_rate": 4.551029385941967e-05, "loss": 0.1648, "step": 43100 }, { "epoch": 1.5666109455629043, "grad_norm": 3.8184330463409424, "learning_rate": 4.550732630452394e-05, "loss": 0.1392, "step": 43110 }, { "epoch": 1.5669743440657025, "grad_norm": 0.9396213293075562, "learning_rate": 4.550435786604049e-05, "loss": 0.1659, "step": 43120 }, { "epoch": 1.5673377425685007, "grad_norm": 1.536440372467041, "learning_rate": 4.550168551604358e-05, "loss": 0.1227, "step": 43130 }, { "epoch": 1.5677011410712987, "grad_norm": 1.6777888536453247, "learning_rate": 4.549871539909584e-05, "loss": 0.128, "step": 43140 }, { "epoch": 1.568064539574097, "grad_norm": 21.312944412231445, "learning_rate": 4.5495744398931396e-05, "loss": 0.2651, "step": 43150 }, { "epoch": 1.568427938076895, "grad_norm": 0.8739009499549866, "learning_rate": 4.549277251567824e-05, "loss": 0.12, "step": 43160 }, { "epoch": 1.5687913365796933, "grad_norm": 0.3690776526927948, "learning_rate": 4.548979974946444e-05, "loss": 0.1665, "step": 43170 }, { "epoch": 1.5691547350824915, "grad_norm": 1.3902113437652588, "learning_rate": 4.548682610041807e-05, "loss": 0.1502, "step": 43180 }, { "epoch": 1.5695181335852895, "grad_norm": 0.9234703779220581, "learning_rate": 4.5483851568667244e-05, "loss": 0.1168, "step": 43190 }, { "epoch": 1.569881532088088, "grad_norm": 0.7674643397331238, "learning_rate": 4.5480876154340145e-05, "loss": 0.1404, "step": 43200 }, { "epoch": 1.569881532088088, "eval_loss": 0.3601061701774597, "eval_runtime": 180.5599, "eval_samples_per_second": 41.061, "eval_steps_per_second": 5.134, "eval_wer": 0.16855156388984696, "step": 43200 }, { "epoch": 1.5702449305908859, "grad_norm": 0.7668557167053223, "learning_rate": 4.5477899857564966e-05, "loss": 0.1842, "step": 43210 }, { "epoch": 1.570608329093684, "grad_norm": 0.7534570693969727, "learning_rate": 4.5474922678469936e-05, "loss": 0.1558, "step": 43220 }, { "epoch": 1.5709717275964823, "grad_norm": 0.9190795421600342, "learning_rate": 4.547194461718334e-05, "loss": 0.1808, "step": 43230 }, { "epoch": 1.5713351260992805, "grad_norm": 0.4574483633041382, "learning_rate": 4.54689656738335e-05, "loss": 0.1146, "step": 43240 }, { "epoch": 1.5716985246020787, "grad_norm": 1.1554951667785645, "learning_rate": 4.5465985848548744e-05, "loss": 0.8771, "step": 43250 }, { "epoch": 1.5720619231048767, "grad_norm": 1.175336480140686, "learning_rate": 4.546300514145748e-05, "loss": 0.1337, "step": 43260 }, { "epoch": 1.5724253216076751, "grad_norm": 0.4004783630371094, "learning_rate": 4.5460023552688136e-05, "loss": 0.1963, "step": 43270 }, { "epoch": 1.572788720110473, "grad_norm": 0.5944772362709045, "learning_rate": 4.5457041082369164e-05, "loss": 0.1223, "step": 43280 }, { "epoch": 1.5731521186132713, "grad_norm": 0.7069734334945679, "learning_rate": 4.545405773062909e-05, "loss": 3.2472, "step": 43290 }, { "epoch": 1.5735155171160695, "grad_norm": 1.0471086502075195, "learning_rate": 4.545107349759644e-05, "loss": 0.1558, "step": 43300 }, { "epoch": 1.5738789156188675, "grad_norm": 0.6987308263778687, "learning_rate": 4.54480883833998e-05, "loss": 0.4641, "step": 43310 }, { "epoch": 1.574242314121666, "grad_norm": 0.599287211894989, "learning_rate": 4.5445102388167785e-05, "loss": 0.1592, "step": 43320 }, { "epoch": 1.574605712624464, "grad_norm": 0.9643434286117554, "learning_rate": 4.544211551202904e-05, "loss": 0.1165, "step": 43330 }, { "epoch": 1.5749691111272621, "grad_norm": 0.5655382871627808, "learning_rate": 4.5439127755112285e-05, "loss": 0.1234, "step": 43340 }, { "epoch": 1.5753325096300603, "grad_norm": 1.7126801013946533, "learning_rate": 4.5436139117546235e-05, "loss": 0.1647, "step": 43350 }, { "epoch": 1.5756959081328585, "grad_norm": 0.6298018097877502, "learning_rate": 4.543314959945966e-05, "loss": 0.1028, "step": 43360 }, { "epoch": 1.5760593066356567, "grad_norm": 0.5706765651702881, "learning_rate": 4.543015920098137e-05, "loss": 0.5641, "step": 43370 }, { "epoch": 1.5764227051384547, "grad_norm": 0.9098716974258423, "learning_rate": 4.542716792224022e-05, "loss": 0.1233, "step": 43380 }, { "epoch": 1.5767861036412532, "grad_norm": 1.0217915773391724, "learning_rate": 4.5424175763365075e-05, "loss": 0.1306, "step": 43390 }, { "epoch": 1.5771495021440511, "grad_norm": 0.651685893535614, "learning_rate": 4.5421182724484866e-05, "loss": 0.1433, "step": 43400 }, { "epoch": 1.5775129006468493, "grad_norm": 0.6281771659851074, "learning_rate": 4.541818880572856e-05, "loss": 0.1313, "step": 43410 }, { "epoch": 1.5778762991496476, "grad_norm": 1.7486456632614136, "learning_rate": 4.541519400722514e-05, "loss": 0.122, "step": 43420 }, { "epoch": 1.5782396976524455, "grad_norm": 1.2109237909317017, "learning_rate": 4.541219832910364e-05, "loss": 0.1297, "step": 43430 }, { "epoch": 1.578603096155244, "grad_norm": 1.041900634765625, "learning_rate": 4.540920177149315e-05, "loss": 0.1014, "step": 43440 }, { "epoch": 1.578966494658042, "grad_norm": 0.7674359083175659, "learning_rate": 4.540620433452277e-05, "loss": 0.1838, "step": 43450 }, { "epoch": 1.5793298931608402, "grad_norm": 1.9548803567886353, "learning_rate": 4.540320601832165e-05, "loss": 0.1345, "step": 43460 }, { "epoch": 1.5796932916636384, "grad_norm": 0.39995163679122925, "learning_rate": 4.540020682301898e-05, "loss": 0.1305, "step": 43470 }, { "epoch": 1.5800566901664364, "grad_norm": 0.9415978789329529, "learning_rate": 4.539720674874398e-05, "loss": 0.123, "step": 43480 }, { "epoch": 1.5804200886692348, "grad_norm": 0.8457926511764526, "learning_rate": 4.539420579562592e-05, "loss": 0.1145, "step": 43490 }, { "epoch": 1.5807834871720328, "grad_norm": 2.9950082302093506, "learning_rate": 4.539120396379409e-05, "loss": 0.1551, "step": 43500 }, { "epoch": 1.581146885674831, "grad_norm": 1.8456460237503052, "learning_rate": 4.5388201253377834e-05, "loss": 0.0885, "step": 43510 }, { "epoch": 1.5815102841776292, "grad_norm": 0.4476306736469269, "learning_rate": 4.538519766450653e-05, "loss": 0.1351, "step": 43520 }, { "epoch": 1.5818736826804274, "grad_norm": 0.7363295555114746, "learning_rate": 4.5382193197309584e-05, "loss": 0.1045, "step": 43530 }, { "epoch": 1.5822370811832256, "grad_norm": 2.1484272480010986, "learning_rate": 4.5379187851916463e-05, "loss": 0.1304, "step": 43540 }, { "epoch": 1.5826004796860236, "grad_norm": 0.5627908706665039, "learning_rate": 4.537618162845664e-05, "loss": 0.1454, "step": 43550 }, { "epoch": 1.582963878188822, "grad_norm": 1.4841351509094238, "learning_rate": 4.537317452705964e-05, "loss": 0.1301, "step": 43560 }, { "epoch": 1.58332727669162, "grad_norm": 0.7127716541290283, "learning_rate": 4.537016654785505e-05, "loss": 0.1608, "step": 43570 }, { "epoch": 1.5836906751944182, "grad_norm": 1.0103297233581543, "learning_rate": 4.536715769097246e-05, "loss": 0.1137, "step": 43580 }, { "epoch": 1.5840540736972164, "grad_norm": 0.8980743288993835, "learning_rate": 4.536414795654151e-05, "loss": 0.1213, "step": 43590 }, { "epoch": 1.5844174722000144, "grad_norm": 0.5678355097770691, "learning_rate": 4.536113734469188e-05, "loss": 0.1253, "step": 43600 }, { "epoch": 1.5847808707028128, "grad_norm": 0.6713634729385376, "learning_rate": 4.535812585555328e-05, "loss": 0.1144, "step": 43610 }, { "epoch": 1.5851442692056108, "grad_norm": 0.4925456643104553, "learning_rate": 4.5355113489255484e-05, "loss": 0.1448, "step": 43620 }, { "epoch": 1.585507667708409, "grad_norm": 1.3464380502700806, "learning_rate": 4.5352100245928267e-05, "loss": 0.1213, "step": 43630 }, { "epoch": 1.5858710662112072, "grad_norm": 1.3755130767822266, "learning_rate": 4.5349086125701456e-05, "loss": 0.1277, "step": 43640 }, { "epoch": 1.5862344647140054, "grad_norm": 1.2649788856506348, "learning_rate": 4.534607112870494e-05, "loss": 0.1379, "step": 43650 }, { "epoch": 1.5865978632168036, "grad_norm": 0.6860102415084839, "learning_rate": 4.53430552550686e-05, "loss": 0.1209, "step": 43660 }, { "epoch": 1.5869612617196016, "grad_norm": 0.9149149656295776, "learning_rate": 4.534003850492239e-05, "loss": 0.158, "step": 43670 }, { "epoch": 1.5873246602224, "grad_norm": 1.1880120038986206, "learning_rate": 4.53370208783963e-05, "loss": 0.1283, "step": 43680 }, { "epoch": 1.587688058725198, "grad_norm": 2.6330199241638184, "learning_rate": 4.533400237562033e-05, "loss": 0.1414, "step": 43690 }, { "epoch": 1.5880514572279962, "grad_norm": 0.7637589573860168, "learning_rate": 4.533098299672455e-05, "loss": 0.1267, "step": 43700 }, { "epoch": 1.5884148557307944, "grad_norm": 1.7144758701324463, "learning_rate": 4.5327962741839044e-05, "loss": 0.1222, "step": 43710 }, { "epoch": 1.5887782542335924, "grad_norm": 1.0269776582717896, "learning_rate": 4.532494161109396e-05, "loss": 0.1862, "step": 43720 }, { "epoch": 1.5891416527363909, "grad_norm": 0.8622583746910095, "learning_rate": 4.532191960461946e-05, "loss": 0.1894, "step": 43730 }, { "epoch": 1.5895050512391888, "grad_norm": 1.0310677289962769, "learning_rate": 4.531889672254575e-05, "loss": 0.1284, "step": 43740 }, { "epoch": 1.589868449741987, "grad_norm": 2.753690242767334, "learning_rate": 4.531587296500306e-05, "loss": 0.1404, "step": 43750 }, { "epoch": 1.5902318482447853, "grad_norm": 0.5997269749641418, "learning_rate": 4.53128483321217e-05, "loss": 0.1119, "step": 43760 }, { "epoch": 1.5905952467475832, "grad_norm": 0.8589096665382385, "learning_rate": 4.5309822824031976e-05, "loss": 0.1319, "step": 43770 }, { "epoch": 1.5909586452503817, "grad_norm": 0.7129044532775879, "learning_rate": 4.530679644086425e-05, "loss": 0.2389, "step": 43780 }, { "epoch": 1.5913220437531796, "grad_norm": 0.6947050094604492, "learning_rate": 4.530376918274892e-05, "loss": 0.115, "step": 43790 }, { "epoch": 1.5916854422559779, "grad_norm": 0.9983404278755188, "learning_rate": 4.530074104981641e-05, "loss": 0.1446, "step": 43800 }, { "epoch": 1.5916854422559779, "eval_loss": 0.3569597005844116, "eval_runtime": 180.6536, "eval_samples_per_second": 41.04, "eval_steps_per_second": 5.131, "eval_wer": 0.17362535625465172, "step": 43800 }, { "epoch": 1.592048840758776, "grad_norm": 0.712482750415802, "learning_rate": 4.529771204219721e-05, "loss": 0.1434, "step": 43810 }, { "epoch": 1.5924122392615743, "grad_norm": 0.5298041105270386, "learning_rate": 4.5294682160021806e-05, "loss": 0.1771, "step": 43820 }, { "epoch": 1.5927756377643725, "grad_norm": 1.337560772895813, "learning_rate": 4.529165140342076e-05, "loss": 0.1144, "step": 43830 }, { "epoch": 1.5931390362671705, "grad_norm": 0.5129504203796387, "learning_rate": 4.5288619772524654e-05, "loss": 0.1001, "step": 43840 }, { "epoch": 1.593502434769969, "grad_norm": 0.7407031059265137, "learning_rate": 4.528558726746411e-05, "loss": 0.1302, "step": 43850 }, { "epoch": 1.5938658332727669, "grad_norm": 0.9279839992523193, "learning_rate": 4.5282553888369785e-05, "loss": 0.1452, "step": 43860 }, { "epoch": 1.594229231775565, "grad_norm": 0.5245470404624939, "learning_rate": 4.5279519635372374e-05, "loss": 0.1756, "step": 43870 }, { "epoch": 1.5945926302783633, "grad_norm": 0.6099745631217957, "learning_rate": 4.527648450860262e-05, "loss": 0.2019, "step": 43880 }, { "epoch": 1.5949560287811613, "grad_norm": 0.9615786075592041, "learning_rate": 4.52734485081913e-05, "loss": 0.1252, "step": 43890 }, { "epoch": 1.5953194272839597, "grad_norm": 1.52881920337677, "learning_rate": 4.527041163426921e-05, "loss": 1.8751, "step": 43900 }, { "epoch": 1.5956828257867577, "grad_norm": 0.8344588875770569, "learning_rate": 4.526737388696721e-05, "loss": 0.129, "step": 43910 }, { "epoch": 1.596046224289556, "grad_norm": 0.5732100605964661, "learning_rate": 4.526433526641617e-05, "loss": 0.1475, "step": 43920 }, { "epoch": 1.596409622792354, "grad_norm": 0.8947811722755432, "learning_rate": 4.526129577274704e-05, "loss": 0.4153, "step": 43930 }, { "epoch": 1.5967730212951523, "grad_norm": 1.6199461221694946, "learning_rate": 4.5258255406090746e-05, "loss": 0.1379, "step": 43940 }, { "epoch": 1.5971364197979505, "grad_norm": 1.3465640544891357, "learning_rate": 4.525521416657832e-05, "loss": 0.1515, "step": 43950 }, { "epoch": 1.5974998183007485, "grad_norm": 1.7875219583511353, "learning_rate": 4.525217205434078e-05, "loss": 0.1119, "step": 43960 }, { "epoch": 1.597863216803547, "grad_norm": 0.5457040071487427, "learning_rate": 4.52491290695092e-05, "loss": 0.1499, "step": 43970 }, { "epoch": 1.598226615306345, "grad_norm": 1.2962692975997925, "learning_rate": 4.52460852122147e-05, "loss": 0.1311, "step": 43980 }, { "epoch": 1.5985900138091431, "grad_norm": 0.679913341999054, "learning_rate": 4.5243040482588426e-05, "loss": 0.1298, "step": 43990 }, { "epoch": 1.5989534123119413, "grad_norm": 1.5390740633010864, "learning_rate": 4.523999488076156e-05, "loss": 0.1483, "step": 44000 }, { "epoch": 1.5993168108147393, "grad_norm": 3.566751003265381, "learning_rate": 4.523694840686532e-05, "loss": 0.1303, "step": 44010 }, { "epoch": 1.5996802093175377, "grad_norm": 0.7023512125015259, "learning_rate": 4.5233901061030984e-05, "loss": 0.1305, "step": 44020 }, { "epoch": 1.6000436078203357, "grad_norm": 1.47295343875885, "learning_rate": 4.523085284338985e-05, "loss": 0.1173, "step": 44030 }, { "epoch": 1.600407006323134, "grad_norm": 0.7622318863868713, "learning_rate": 4.522780375407324e-05, "loss": 0.1494, "step": 44040 }, { "epoch": 1.6007704048259321, "grad_norm": 2.0168585777282715, "learning_rate": 4.522475379321254e-05, "loss": 0.1575, "step": 44050 }, { "epoch": 1.6011338033287301, "grad_norm": 0.9191824793815613, "learning_rate": 4.522170296093916e-05, "loss": 0.1111, "step": 44060 }, { "epoch": 1.6014972018315285, "grad_norm": 0.5007340908050537, "learning_rate": 4.521865125738455e-05, "loss": 0.193, "step": 44070 }, { "epoch": 1.6018606003343265, "grad_norm": 0.8389549851417542, "learning_rate": 4.5215598682680186e-05, "loss": 0.1227, "step": 44080 }, { "epoch": 1.6022239988371247, "grad_norm": 0.7387205362319946, "learning_rate": 4.521254523695761e-05, "loss": 0.2035, "step": 44090 }, { "epoch": 1.602587397339923, "grad_norm": 1.1978685855865479, "learning_rate": 4.520949092034837e-05, "loss": 0.1739, "step": 44100 }, { "epoch": 1.6029507958427212, "grad_norm": 1.9989899396896362, "learning_rate": 4.5206435732984085e-05, "loss": 0.1285, "step": 44110 }, { "epoch": 1.6033141943455194, "grad_norm": 0.6451914310455322, "learning_rate": 4.5203379674996365e-05, "loss": 0.1466, "step": 44120 }, { "epoch": 1.6036775928483173, "grad_norm": 0.6689841747283936, "learning_rate": 4.5200322746516904e-05, "loss": 0.113, "step": 44130 }, { "epoch": 1.6040409913511158, "grad_norm": 1.1558260917663574, "learning_rate": 4.519726494767741e-05, "loss": 0.1005, "step": 44140 }, { "epoch": 1.6044043898539138, "grad_norm": 13.844839096069336, "learning_rate": 4.519420627860963e-05, "loss": 0.1279, "step": 44150 }, { "epoch": 1.604767788356712, "grad_norm": 0.6856222152709961, "learning_rate": 4.519114673944536e-05, "loss": 0.1147, "step": 44160 }, { "epoch": 1.6051311868595102, "grad_norm": 0.7829769253730774, "learning_rate": 4.5188086330316405e-05, "loss": 0.1336, "step": 44170 }, { "epoch": 1.6054945853623082, "grad_norm": 1.3698971271514893, "learning_rate": 4.518502505135465e-05, "loss": 0.1158, "step": 44180 }, { "epoch": 1.6058579838651066, "grad_norm": 1.3197015523910522, "learning_rate": 4.5181962902691975e-05, "loss": 0.1293, "step": 44190 }, { "epoch": 1.6062213823679046, "grad_norm": 0.8092926740646362, "learning_rate": 4.517889988446033e-05, "loss": 0.1466, "step": 44200 }, { "epoch": 1.6065847808707028, "grad_norm": 2.015113115310669, "learning_rate": 4.5175835996791684e-05, "loss": 0.1228, "step": 44210 }, { "epoch": 1.606948179373501, "grad_norm": 1.2220087051391602, "learning_rate": 4.5172771239818056e-05, "loss": 0.199, "step": 44220 }, { "epoch": 1.6073115778762992, "grad_norm": 0.5432813167572021, "learning_rate": 4.516970561367149e-05, "loss": 0.1453, "step": 44230 }, { "epoch": 1.6076749763790974, "grad_norm": 0.6337705850601196, "learning_rate": 4.516663911848407e-05, "loss": 0.1257, "step": 44240 }, { "epoch": 1.6080383748818954, "grad_norm": 0.6741940379142761, "learning_rate": 4.5163571754387915e-05, "loss": 0.1062, "step": 44250 }, { "epoch": 1.6084017733846938, "grad_norm": 2.3033409118652344, "learning_rate": 4.516050352151521e-05, "loss": 0.1452, "step": 44260 }, { "epoch": 1.6087651718874918, "grad_norm": 0.4420888125896454, "learning_rate": 4.515743441999814e-05, "loss": 0.1358, "step": 44270 }, { "epoch": 1.60912857039029, "grad_norm": 1.5571812391281128, "learning_rate": 4.515436444996893e-05, "loss": 0.1102, "step": 44280 }, { "epoch": 1.6094919688930882, "grad_norm": 1.084507703781128, "learning_rate": 4.5151293611559865e-05, "loss": 0.1099, "step": 44290 }, { "epoch": 1.6098553673958862, "grad_norm": 0.7025009989738464, "learning_rate": 4.514822190490327e-05, "loss": 0.2296, "step": 44300 }, { "epoch": 1.6102187658986846, "grad_norm": 2.125432252883911, "learning_rate": 4.514514933013147e-05, "loss": 0.1189, "step": 44310 }, { "epoch": 1.6105821644014826, "grad_norm": 0.47693368792533875, "learning_rate": 4.5142075887376856e-05, "loss": 0.1488, "step": 44320 }, { "epoch": 1.6109455629042808, "grad_norm": 0.7935511469841003, "learning_rate": 4.5139001576771865e-05, "loss": 1.8833, "step": 44330 }, { "epoch": 1.611308961407079, "grad_norm": 0.6441402435302734, "learning_rate": 4.513592639844896e-05, "loss": 0.1173, "step": 44340 }, { "epoch": 1.611672359909877, "grad_norm": 1.3646268844604492, "learning_rate": 4.513285035254062e-05, "loss": 0.1171, "step": 44350 }, { "epoch": 1.6120357584126754, "grad_norm": 1.0334749221801758, "learning_rate": 4.512977343917939e-05, "loss": 0.1069, "step": 44360 }, { "epoch": 1.6123991569154734, "grad_norm": 0.3879293203353882, "learning_rate": 4.5126695658497856e-05, "loss": 0.1244, "step": 44370 }, { "epoch": 1.6127625554182716, "grad_norm": 0.6635248064994812, "learning_rate": 4.5123617010628606e-05, "loss": 0.1102, "step": 44380 }, { "epoch": 1.6131259539210698, "grad_norm": 0.8040985465049744, "learning_rate": 4.51205374957043e-05, "loss": 0.1455, "step": 44390 }, { "epoch": 1.613489352423868, "grad_norm": 0.5279836654663086, "learning_rate": 4.511745711385763e-05, "loss": 0.1547, "step": 44400 }, { "epoch": 1.613489352423868, "eval_loss": 0.34678882360458374, "eval_runtime": 179.7828, "eval_samples_per_second": 41.239, "eval_steps_per_second": 5.156, "eval_wer": 0.17216403144117487, "step": 44400 }, { "epoch": 1.6138527509266662, "grad_norm": 1.1375586986541748, "learning_rate": 4.51143758652213e-05, "loss": 0.14, "step": 44410 }, { "epoch": 1.6142161494294642, "grad_norm": 1.5960606336593628, "learning_rate": 4.511129374992809e-05, "loss": 0.1336, "step": 44420 }, { "epoch": 1.6145795479322627, "grad_norm": 0.5347716808319092, "learning_rate": 4.5108210768110785e-05, "loss": 0.1083, "step": 44430 }, { "epoch": 1.6149429464350606, "grad_norm": 1.816926121711731, "learning_rate": 4.510512691990222e-05, "loss": 0.1122, "step": 44440 }, { "epoch": 1.6153063449378589, "grad_norm": 1.2517473697662354, "learning_rate": 4.510204220543528e-05, "loss": 0.144, "step": 44450 }, { "epoch": 1.615669743440657, "grad_norm": 1.0830953121185303, "learning_rate": 4.509895662484286e-05, "loss": 0.1851, "step": 44460 }, { "epoch": 1.616033141943455, "grad_norm": 0.45219525694847107, "learning_rate": 4.50958701782579e-05, "loss": 0.1589, "step": 44470 }, { "epoch": 1.6163965404462535, "grad_norm": 0.940949559211731, "learning_rate": 4.509278286581341e-05, "loss": 0.113, "step": 44480 }, { "epoch": 1.6167599389490515, "grad_norm": 0.7262178659439087, "learning_rate": 4.5089694687642394e-05, "loss": 0.1294, "step": 44490 }, { "epoch": 1.6171233374518497, "grad_norm": 0.8851106762886047, "learning_rate": 4.508660564387791e-05, "loss": 0.1563, "step": 44500 }, { "epoch": 1.6174867359546479, "grad_norm": 1.4259148836135864, "learning_rate": 4.508351573465306e-05, "loss": 0.1298, "step": 44510 }, { "epoch": 1.617850134457446, "grad_norm": 1.7158180475234985, "learning_rate": 4.508042496010098e-05, "loss": 0.197, "step": 44520 }, { "epoch": 1.6182135329602443, "grad_norm": 1.1961179971694946, "learning_rate": 4.507733332035482e-05, "loss": 2.6746, "step": 44530 }, { "epoch": 1.6185769314630423, "grad_norm": 1.0735702514648438, "learning_rate": 4.507424081554782e-05, "loss": 0.1132, "step": 44540 }, { "epoch": 1.6189403299658407, "grad_norm": 0.8479132056236267, "learning_rate": 4.507114744581319e-05, "loss": 0.1411, "step": 44550 }, { "epoch": 1.6193037284686387, "grad_norm": 0.804205596446991, "learning_rate": 4.506805321128424e-05, "loss": 0.1301, "step": 44560 }, { "epoch": 1.6196671269714369, "grad_norm": 0.4933542013168335, "learning_rate": 4.506495811209428e-05, "loss": 0.1765, "step": 44570 }, { "epoch": 1.620030525474235, "grad_norm": 1.0244536399841309, "learning_rate": 4.506186214837666e-05, "loss": 0.131, "step": 44580 }, { "epoch": 1.620393923977033, "grad_norm": 0.4374043941497803, "learning_rate": 4.5058765320264784e-05, "loss": 0.102, "step": 44590 }, { "epoch": 1.6207573224798315, "grad_norm": 0.5329868197441101, "learning_rate": 4.505566762789208e-05, "loss": 0.1168, "step": 44600 }, { "epoch": 1.6211207209826295, "grad_norm": 0.9576613306999207, "learning_rate": 4.5052569071392014e-05, "loss": 0.0948, "step": 44610 }, { "epoch": 1.6214841194854277, "grad_norm": 0.3620557188987732, "learning_rate": 4.50494696508981e-05, "loss": 0.1693, "step": 44620 }, { "epoch": 1.621847517988226, "grad_norm": 119.01215362548828, "learning_rate": 4.504636936654387e-05, "loss": 2.0014, "step": 44630 }, { "epoch": 1.622210916491024, "grad_norm": 0.491005003452301, "learning_rate": 4.504326821846291e-05, "loss": 0.0958, "step": 44640 }, { "epoch": 1.6225743149938223, "grad_norm": 0.8035761713981628, "learning_rate": 4.504016620678883e-05, "loss": 0.6652, "step": 44650 }, { "epoch": 1.6229377134966203, "grad_norm": 1.4501937627792358, "learning_rate": 4.5037063331655305e-05, "loss": 0.1282, "step": 44660 }, { "epoch": 1.6233011119994185, "grad_norm": 0.3285962641239166, "learning_rate": 4.503395959319601e-05, "loss": 0.6589, "step": 44670 }, { "epoch": 1.6236645105022167, "grad_norm": 1.3429205417633057, "learning_rate": 4.5030854991544666e-05, "loss": 0.1224, "step": 44680 }, { "epoch": 1.624027909005015, "grad_norm": 0.6868845224380493, "learning_rate": 4.502774952683506e-05, "loss": 0.1229, "step": 44690 }, { "epoch": 1.6243913075078131, "grad_norm": 0.7645006775856018, "learning_rate": 4.502464319920099e-05, "loss": 0.1587, "step": 44700 }, { "epoch": 1.6247547060106111, "grad_norm": 1.2401680946350098, "learning_rate": 4.502153600877628e-05, "loss": 0.1274, "step": 44710 }, { "epoch": 1.6251181045134095, "grad_norm": 0.4394826292991638, "learning_rate": 4.501842795569483e-05, "loss": 0.1434, "step": 44720 }, { "epoch": 1.6254815030162075, "grad_norm": 0.5105617046356201, "learning_rate": 4.5015319040090545e-05, "loss": 0.1089, "step": 44730 }, { "epoch": 1.6258449015190057, "grad_norm": 1.5043278932571411, "learning_rate": 4.5012209262097365e-05, "loss": 0.1391, "step": 44740 }, { "epoch": 1.626208300021804, "grad_norm": 0.8561335802078247, "learning_rate": 4.5009098621849296e-05, "loss": 0.2735, "step": 44750 }, { "epoch": 1.626571698524602, "grad_norm": 1.71244478225708, "learning_rate": 4.500598711948037e-05, "loss": 0.1855, "step": 44760 }, { "epoch": 1.6269350970274004, "grad_norm": 0.6392226815223694, "learning_rate": 4.500287475512463e-05, "loss": 0.1675, "step": 44770 }, { "epoch": 1.6272984955301983, "grad_norm": 0.9670777916908264, "learning_rate": 4.4999761528916194e-05, "loss": 0.1201, "step": 44780 }, { "epoch": 1.6276618940329965, "grad_norm": 0.6879392862319946, "learning_rate": 4.4996647440989195e-05, "loss": 0.15, "step": 44790 }, { "epoch": 1.6280252925357948, "grad_norm": 1.038004994392395, "learning_rate": 4.49935324914778e-05, "loss": 0.148, "step": 44800 }, { "epoch": 1.628388691038593, "grad_norm": 1.1731406450271606, "learning_rate": 4.499041668051624e-05, "loss": 0.1225, "step": 44810 }, { "epoch": 1.6287520895413912, "grad_norm": 1.0449947118759155, "learning_rate": 4.498730000823873e-05, "loss": 0.1348, "step": 44820 }, { "epoch": 1.6291154880441892, "grad_norm": 0.7107880115509033, "learning_rate": 4.498418247477959e-05, "loss": 0.1185, "step": 44830 }, { "epoch": 1.6294788865469876, "grad_norm": 0.9275081157684326, "learning_rate": 4.498106408027313e-05, "loss": 0.1405, "step": 44840 }, { "epoch": 1.6298422850497856, "grad_norm": 1.5348129272460938, "learning_rate": 4.497794482485371e-05, "loss": 0.1401, "step": 44850 }, { "epoch": 1.6302056835525838, "grad_norm": 1.6144418716430664, "learning_rate": 4.497482470865574e-05, "loss": 0.1191, "step": 44860 }, { "epoch": 1.630569082055382, "grad_norm": 1.1674468517303467, "learning_rate": 4.497170373181363e-05, "loss": 0.7629, "step": 44870 }, { "epoch": 1.63093248055818, "grad_norm": 0.9818703532218933, "learning_rate": 4.496858189446187e-05, "loss": 0.1275, "step": 44880 }, { "epoch": 1.6312958790609784, "grad_norm": 8.3660249710083, "learning_rate": 4.496545919673496e-05, "loss": 0.1166, "step": 44890 }, { "epoch": 1.6316592775637764, "grad_norm": 0.7371792793273926, "learning_rate": 4.496233563876746e-05, "loss": 0.1559, "step": 44900 }, { "epoch": 1.6320226760665746, "grad_norm": 0.9537221789360046, "learning_rate": 4.4959211220693945e-05, "loss": 0.125, "step": 44910 }, { "epoch": 1.6323860745693728, "grad_norm": 0.9887855648994446, "learning_rate": 4.495608594264902e-05, "loss": 0.1406, "step": 44920 }, { "epoch": 1.632749473072171, "grad_norm": 0.7480888962745667, "learning_rate": 4.495295980476737e-05, "loss": 0.1227, "step": 44930 }, { "epoch": 1.6331128715749692, "grad_norm": 0.784050703048706, "learning_rate": 4.494983280718367e-05, "loss": 0.1068, "step": 44940 }, { "epoch": 1.6334762700777672, "grad_norm": 2.8426759243011475, "learning_rate": 4.494670495003265e-05, "loss": 0.1327, "step": 44950 }, { "epoch": 1.6338396685805654, "grad_norm": 3.448587417602539, "learning_rate": 4.494357623344909e-05, "loss": 0.142, "step": 44960 }, { "epoch": 1.6342030670833636, "grad_norm": 0.669575572013855, "learning_rate": 4.4940446657567784e-05, "loss": 0.1398, "step": 44970 }, { "epoch": 1.6345664655861618, "grad_norm": 1.1868761777877808, "learning_rate": 4.493731622252358e-05, "loss": 0.117, "step": 44980 }, { "epoch": 1.63492986408896, "grad_norm": 0.8725171685218811, "learning_rate": 4.4934184928451364e-05, "loss": 0.0914, "step": 44990 }, { "epoch": 1.635293262591758, "grad_norm": 1.300013780593872, "learning_rate": 4.493105277548605e-05, "loss": 0.1761, "step": 45000 }, { "epoch": 1.635293262591758, "eval_loss": 0.3556674122810364, "eval_runtime": 180.1231, "eval_samples_per_second": 41.161, "eval_steps_per_second": 5.146, "eval_wer": 0.17496868589685408, "step": 45000 }, { "epoch": 1.6356566610945564, "grad_norm": 2.436525583267212, "learning_rate": 4.4927919763762574e-05, "loss": 0.1164, "step": 45010 }, { "epoch": 1.6360200595973544, "grad_norm": 1.3236192464828491, "learning_rate": 4.492478589341594e-05, "loss": 0.1496, "step": 45020 }, { "epoch": 1.6363834581001526, "grad_norm": 0.6237584948539734, "learning_rate": 4.4921651164581185e-05, "loss": 0.1449, "step": 45030 }, { "epoch": 1.6367468566029508, "grad_norm": 0.5335447192192078, "learning_rate": 4.491851557739336e-05, "loss": 0.1349, "step": 45040 }, { "epoch": 1.6371102551057488, "grad_norm": 1.4458340406417847, "learning_rate": 4.491537913198757e-05, "loss": 0.1994, "step": 45050 }, { "epoch": 1.6374736536085472, "grad_norm": 1.4140558242797852, "learning_rate": 4.4912241828498944e-05, "loss": 0.1432, "step": 45060 }, { "epoch": 1.6378370521113452, "grad_norm": 1.127317190170288, "learning_rate": 4.4909103667062666e-05, "loss": 0.1932, "step": 45070 }, { "epoch": 1.6382004506141434, "grad_norm": 3.4496073722839355, "learning_rate": 4.490596464781395e-05, "loss": 0.1057, "step": 45080 }, { "epoch": 1.6385638491169416, "grad_norm": 0.663720428943634, "learning_rate": 4.490282477088805e-05, "loss": 0.1081, "step": 45090 }, { "epoch": 1.6389272476197398, "grad_norm": 0.8442180156707764, "learning_rate": 4.4899684036420244e-05, "loss": 0.1354, "step": 45100 }, { "epoch": 1.639290646122538, "grad_norm": 1.3163623809814453, "learning_rate": 4.489654244454585e-05, "loss": 0.1247, "step": 45110 }, { "epoch": 1.639654044625336, "grad_norm": 0.639021635055542, "learning_rate": 4.489339999540023e-05, "loss": 0.1598, "step": 45120 }, { "epoch": 1.6400174431281345, "grad_norm": 0.549207329750061, "learning_rate": 4.489025668911879e-05, "loss": 0.1157, "step": 45130 }, { "epoch": 1.6403808416309325, "grad_norm": 3.4274439811706543, "learning_rate": 4.488711252583696e-05, "loss": 0.1635, "step": 45140 }, { "epoch": 1.6407442401337307, "grad_norm": 3.244072914123535, "learning_rate": 4.488396750569022e-05, "loss": 0.1216, "step": 45150 }, { "epoch": 1.6411076386365289, "grad_norm": 1.9557846784591675, "learning_rate": 4.4880821628814054e-05, "loss": 0.1197, "step": 45160 }, { "epoch": 1.6414710371393268, "grad_norm": 0.8653383851051331, "learning_rate": 4.487767489534402e-05, "loss": 0.1929, "step": 45170 }, { "epoch": 1.6418344356421253, "grad_norm": 0.9569295048713684, "learning_rate": 4.4874527305415706e-05, "loss": 0.1064, "step": 45180 }, { "epoch": 1.6421978341449233, "grad_norm": 0.9595149159431458, "learning_rate": 4.487137885916473e-05, "loss": 0.1087, "step": 45190 }, { "epoch": 1.6425612326477215, "grad_norm": 1.06610906124115, "learning_rate": 4.486822955672673e-05, "loss": 0.1126, "step": 45200 }, { "epoch": 1.6429246311505197, "grad_norm": 0.5096926689147949, "learning_rate": 4.4865079398237407e-05, "loss": 0.1175, "step": 45210 }, { "epoch": 1.6432880296533179, "grad_norm": 0.6575452089309692, "learning_rate": 4.48619283838325e-05, "loss": 0.2033, "step": 45220 }, { "epoch": 1.643651428156116, "grad_norm": 1.5489494800567627, "learning_rate": 4.485877651364777e-05, "loss": 0.1283, "step": 45230 }, { "epoch": 1.644014826658914, "grad_norm": 1.2202279567718506, "learning_rate": 4.485562378781901e-05, "loss": 0.129, "step": 45240 }, { "epoch": 1.6443782251617123, "grad_norm": 2.960289716720581, "learning_rate": 4.485247020648208e-05, "loss": 0.1547, "step": 45250 }, { "epoch": 1.6447416236645105, "grad_norm": 1.178314447402954, "learning_rate": 4.4849315769772835e-05, "loss": 0.1424, "step": 45260 }, { "epoch": 1.6451050221673087, "grad_norm": 0.47237566113471985, "learning_rate": 4.484616047782719e-05, "loss": 0.1003, "step": 45270 }, { "epoch": 1.645468420670107, "grad_norm": 0.7487808465957642, "learning_rate": 4.484300433078112e-05, "loss": 1.3485, "step": 45280 }, { "epoch": 1.6458318191729049, "grad_norm": 1.5242539644241333, "learning_rate": 4.483984732877059e-05, "loss": 0.1301, "step": 45290 }, { "epoch": 1.6461952176757033, "grad_norm": 1.1392406225204468, "learning_rate": 4.4836689471931624e-05, "loss": 0.1385, "step": 45300 }, { "epoch": 1.6465586161785013, "grad_norm": 0.856468677520752, "learning_rate": 4.483353076040029e-05, "loss": 0.1029, "step": 45310 }, { "epoch": 1.6469220146812995, "grad_norm": 0.414461225271225, "learning_rate": 4.483037119431268e-05, "loss": 0.1697, "step": 45320 }, { "epoch": 1.6472854131840977, "grad_norm": 1.7020654678344727, "learning_rate": 4.482721077380494e-05, "loss": 0.1102, "step": 45330 }, { "epoch": 1.6476488116868957, "grad_norm": 0.9631456136703491, "learning_rate": 4.482404949901323e-05, "loss": 0.1193, "step": 45340 }, { "epoch": 1.6480122101896941, "grad_norm": 0.5286620855331421, "learning_rate": 4.482088737007376e-05, "loss": 0.628, "step": 45350 }, { "epoch": 1.6483756086924921, "grad_norm": 1.0761183500289917, "learning_rate": 4.481772438712277e-05, "loss": 0.1131, "step": 45360 }, { "epoch": 1.6487390071952903, "grad_norm": 0.46266233921051025, "learning_rate": 4.481456055029656e-05, "loss": 0.1641, "step": 45370 }, { "epoch": 1.6491024056980885, "grad_norm": 1.1350431442260742, "learning_rate": 4.481139585973142e-05, "loss": 0.122, "step": 45380 }, { "epoch": 1.6494658042008867, "grad_norm": 4.3756632804870605, "learning_rate": 4.4808230315563735e-05, "loss": 0.1424, "step": 45390 }, { "epoch": 1.649829202703685, "grad_norm": 1.386616826057434, "learning_rate": 4.480506391792988e-05, "loss": 0.1579, "step": 45400 }, { "epoch": 1.650192601206483, "grad_norm": 0.6531800031661987, "learning_rate": 4.480189666696629e-05, "loss": 0.1275, "step": 45410 }, { "epoch": 1.6505559997092814, "grad_norm": 1.8200130462646484, "learning_rate": 4.479872856280942e-05, "loss": 0.1943, "step": 45420 }, { "epoch": 1.6509193982120793, "grad_norm": 0.6366170048713684, "learning_rate": 4.47955596055958e-05, "loss": 0.1172, "step": 45430 }, { "epoch": 1.6512827967148775, "grad_norm": 0.8036410808563232, "learning_rate": 4.479238979546193e-05, "loss": 0.1181, "step": 45440 }, { "epoch": 1.6516461952176757, "grad_norm": 0.7934151291847229, "learning_rate": 4.47892191325444e-05, "loss": 0.1319, "step": 45450 }, { "epoch": 1.6520095937204737, "grad_norm": 1.5175780057907104, "learning_rate": 4.4786047616979845e-05, "loss": 0.1328, "step": 45460 }, { "epoch": 1.6523729922232722, "grad_norm": 31.52168083190918, "learning_rate": 4.478287524890489e-05, "loss": 0.4917, "step": 45470 }, { "epoch": 1.6527363907260701, "grad_norm": 0.8360010981559753, "learning_rate": 4.477970202845623e-05, "loss": 0.1071, "step": 45480 }, { "epoch": 1.6530997892288684, "grad_norm": 0.5976376533508301, "learning_rate": 4.4776527955770586e-05, "loss": 0.1098, "step": 45490 }, { "epoch": 1.6534631877316666, "grad_norm": 0.779091477394104, "learning_rate": 4.4773353030984715e-05, "loss": 0.1621, "step": 45500 }, { "epoch": 1.6538265862344648, "grad_norm": 0.7147294282913208, "learning_rate": 4.477017725423542e-05, "loss": 0.2085, "step": 45510 }, { "epoch": 1.654189984737263, "grad_norm": 1.0562430620193481, "learning_rate": 4.4767000625659525e-05, "loss": 0.1489, "step": 45520 }, { "epoch": 1.654553383240061, "grad_norm": 0.6119662523269653, "learning_rate": 4.4763823145393906e-05, "loss": 0.0975, "step": 45530 }, { "epoch": 1.6549167817428592, "grad_norm": 2.1033360958099365, "learning_rate": 4.476064481357547e-05, "loss": 0.1151, "step": 45540 }, { "epoch": 1.6552801802456574, "grad_norm": 0.5644105672836304, "learning_rate": 4.4757465630341154e-05, "loss": 0.1479, "step": 45550 }, { "epoch": 1.6556435787484556, "grad_norm": 1.2466843128204346, "learning_rate": 4.475428559582794e-05, "loss": 0.1047, "step": 45560 }, { "epoch": 1.6560069772512538, "grad_norm": 0.6210132241249084, "learning_rate": 4.475110471017285e-05, "loss": 0.1703, "step": 45570 }, { "epoch": 1.6563703757540518, "grad_norm": 0.5731077194213867, "learning_rate": 4.474792297351293e-05, "loss": 0.1154, "step": 45580 }, { "epoch": 1.6567337742568502, "grad_norm": 1.2748225927352905, "learning_rate": 4.474474038598527e-05, "loss": 0.1106, "step": 45590 }, { "epoch": 1.6570971727596482, "grad_norm": 1.0353822708129883, "learning_rate": 4.4741556947727e-05, "loss": 0.1453, "step": 45600 }, { "epoch": 1.6570971727596482, "eval_loss": 0.35429847240448, "eval_runtime": 180.0899, "eval_samples_per_second": 41.168, "eval_steps_per_second": 5.147, "eval_wer": 0.16677255976909253, "step": 45600 }, { "epoch": 1.6574605712624464, "grad_norm": 0.8197756409645081, "learning_rate": 4.4738372658875286e-05, "loss": 0.6064, "step": 45610 }, { "epoch": 1.6578239697652446, "grad_norm": 1.9029946327209473, "learning_rate": 4.473518751956732e-05, "loss": 0.122, "step": 45620 }, { "epoch": 1.6581873682680426, "grad_norm": 0.897566020488739, "learning_rate": 4.473200152994035e-05, "loss": 0.5106, "step": 45630 }, { "epoch": 1.658550766770841, "grad_norm": 0.448548823595047, "learning_rate": 4.472881469013163e-05, "loss": 0.1316, "step": 45640 }, { "epoch": 1.658914165273639, "grad_norm": 0.9315693974494934, "learning_rate": 4.472562700027849e-05, "loss": 0.1616, "step": 45650 }, { "epoch": 1.6592775637764372, "grad_norm": 0.6731955409049988, "learning_rate": 4.4722438460518255e-05, "loss": 0.1138, "step": 45660 }, { "epoch": 1.6596409622792354, "grad_norm": 0.949320375919342, "learning_rate": 4.4719249070988325e-05, "loss": 0.1464, "step": 45670 }, { "epoch": 1.6600043607820336, "grad_norm": 1.0242235660552979, "learning_rate": 4.471605883182611e-05, "loss": 0.1135, "step": 45680 }, { "epoch": 1.6603677592848318, "grad_norm": 2.5394222736358643, "learning_rate": 4.471318689025813e-05, "loss": 1.5802, "step": 45690 }, { "epoch": 1.6607311577876298, "grad_norm": 0.5729508996009827, "learning_rate": 4.4709995037173305e-05, "loss": 0.44, "step": 45700 }, { "epoch": 1.6610945562904282, "grad_norm": 1.372788906097412, "learning_rate": 4.470680233485492e-05, "loss": 0.1286, "step": 45710 }, { "epoch": 1.6614579547932262, "grad_norm": 1.5759491920471191, "learning_rate": 4.470360878344055e-05, "loss": 0.1221, "step": 45720 }, { "epoch": 1.6618213532960244, "grad_norm": 1.0494245290756226, "learning_rate": 4.470041438306778e-05, "loss": 0.126, "step": 45730 }, { "epoch": 1.6621847517988226, "grad_norm": 0.469928115606308, "learning_rate": 4.469721913387424e-05, "loss": 0.1295, "step": 45740 }, { "epoch": 1.6625481503016206, "grad_norm": 0.9547176361083984, "learning_rate": 4.469402303599761e-05, "loss": 0.1724, "step": 45750 }, { "epoch": 1.662911548804419, "grad_norm": 0.5945098400115967, "learning_rate": 4.469082608957561e-05, "loss": 0.1282, "step": 45760 }, { "epoch": 1.663274947307217, "grad_norm": 0.8782799243927002, "learning_rate": 4.468762829474597e-05, "loss": 0.1594, "step": 45770 }, { "epoch": 1.6636383458100152, "grad_norm": 0.8542808294296265, "learning_rate": 4.4684429651646476e-05, "loss": 0.1147, "step": 45780 }, { "epoch": 1.6640017443128134, "grad_norm": 1.189684271812439, "learning_rate": 4.4681230160414946e-05, "loss": 0.1348, "step": 45790 }, { "epoch": 1.6643651428156117, "grad_norm": 0.9197025895118713, "learning_rate": 4.467802982118923e-05, "loss": 0.1528, "step": 45800 }, { "epoch": 1.6647285413184099, "grad_norm": 0.5935563445091248, "learning_rate": 4.4674828634107226e-05, "loss": 0.1356, "step": 45810 }, { "epoch": 1.6650919398212078, "grad_norm": 0.7441408038139343, "learning_rate": 4.467162659930686e-05, "loss": 0.1553, "step": 45820 }, { "epoch": 1.665455338324006, "grad_norm": 0.5700821280479431, "learning_rate": 4.466842371692609e-05, "loss": 0.1206, "step": 45830 }, { "epoch": 1.6658187368268043, "grad_norm": 1.0566598176956177, "learning_rate": 4.466521998710292e-05, "loss": 0.1137, "step": 45840 }, { "epoch": 1.6661821353296025, "grad_norm": 0.8243798613548279, "learning_rate": 4.4662015409975406e-05, "loss": 0.1531, "step": 45850 }, { "epoch": 1.6665455338324007, "grad_norm": 1.1144201755523682, "learning_rate": 4.465880998568159e-05, "loss": 0.1122, "step": 45860 }, { "epoch": 1.6669089323351987, "grad_norm": 0.8346664309501648, "learning_rate": 4.46556037143596e-05, "loss": 0.1365, "step": 45870 }, { "epoch": 1.667272330837997, "grad_norm": 1.140259027481079, "learning_rate": 4.46523965961476e-05, "loss": 0.1105, "step": 45880 }, { "epoch": 1.667635729340795, "grad_norm": 1.7616723775863647, "learning_rate": 4.464918863118374e-05, "loss": 0.1092, "step": 45890 }, { "epoch": 1.6679991278435933, "grad_norm": 0.5135784149169922, "learning_rate": 4.464597981960625e-05, "loss": 0.1502, "step": 45900 }, { "epoch": 1.6683625263463915, "grad_norm": 1.542801022529602, "learning_rate": 4.464277016155339e-05, "loss": 0.125, "step": 45910 }, { "epoch": 1.6687259248491895, "grad_norm": 0.31144529581069946, "learning_rate": 4.463955965716346e-05, "loss": 0.21, "step": 45920 }, { "epoch": 1.669089323351988, "grad_norm": 0.642985463142395, "learning_rate": 4.463634830657478e-05, "loss": 0.1213, "step": 45930 }, { "epoch": 1.6694527218547859, "grad_norm": 2.417689800262451, "learning_rate": 4.4633136109925716e-05, "loss": 0.1244, "step": 45940 }, { "epoch": 1.669816120357584, "grad_norm": 0.5426376461982727, "learning_rate": 4.462992306735467e-05, "loss": 0.1789, "step": 45950 }, { "epoch": 1.6701795188603823, "grad_norm": 0.5924781560897827, "learning_rate": 4.4626709179000094e-05, "loss": 0.118, "step": 45960 }, { "epoch": 1.6705429173631805, "grad_norm": 0.5799354314804077, "learning_rate": 4.4623494445000435e-05, "loss": 0.1714, "step": 45970 }, { "epoch": 1.6709063158659787, "grad_norm": 0.6282142400741577, "learning_rate": 4.462027886549423e-05, "loss": 0.1099, "step": 45980 }, { "epoch": 1.6712697143687767, "grad_norm": 1.1201330423355103, "learning_rate": 4.461706244062002e-05, "loss": 0.1281, "step": 45990 }, { "epoch": 1.6716331128715751, "grad_norm": 1.1922492980957031, "learning_rate": 4.461384517051638e-05, "loss": 0.1245, "step": 46000 }, { "epoch": 1.671996511374373, "grad_norm": 1.0188281536102295, "learning_rate": 4.461062705532194e-05, "loss": 0.1715, "step": 46010 }, { "epoch": 1.6723599098771713, "grad_norm": 0.5861912369728088, "learning_rate": 4.4607408095175364e-05, "loss": 0.1391, "step": 46020 }, { "epoch": 1.6727233083799695, "grad_norm": 0.6984696388244629, "learning_rate": 4.4604188290215324e-05, "loss": 0.116, "step": 46030 }, { "epoch": 1.6730867068827675, "grad_norm": 0.5184624791145325, "learning_rate": 4.460096764058057e-05, "loss": 0.1173, "step": 46040 }, { "epoch": 1.673450105385566, "grad_norm": 0.39695462584495544, "learning_rate": 4.4597746146409856e-05, "loss": 0.1325, "step": 46050 }, { "epoch": 1.673813503888364, "grad_norm": 0.687271237373352, "learning_rate": 4.459452380784199e-05, "loss": 0.1123, "step": 46060 }, { "epoch": 1.6741769023911621, "grad_norm": 0.8372097015380859, "learning_rate": 4.459130062501582e-05, "loss": 0.1895, "step": 46070 }, { "epoch": 1.6745403008939603, "grad_norm": 1.8692165613174438, "learning_rate": 4.4588076598070206e-05, "loss": 0.1213, "step": 46080 }, { "epoch": 1.6749036993967585, "grad_norm": 12.919623374938965, "learning_rate": 4.458485172714406e-05, "loss": 0.115, "step": 46090 }, { "epoch": 1.6752670978995567, "grad_norm": 0.6733956933021545, "learning_rate": 4.458162601237634e-05, "loss": 0.1473, "step": 46100 }, { "epoch": 1.6756304964023547, "grad_norm": 0.8653566241264343, "learning_rate": 4.457839945390603e-05, "loss": 0.0995, "step": 46110 }, { "epoch": 1.675993894905153, "grad_norm": 0.536120593547821, "learning_rate": 4.4575172051872145e-05, "loss": 0.1494, "step": 46120 }, { "epoch": 1.6763572934079511, "grad_norm": 3.844902753829956, "learning_rate": 4.4571943806413743e-05, "loss": 0.1086, "step": 46130 }, { "epoch": 1.6767206919107494, "grad_norm": 2.0951857566833496, "learning_rate": 4.4568714717669926e-05, "loss": 0.111, "step": 46140 }, { "epoch": 1.6770840904135476, "grad_norm": 2.954204559326172, "learning_rate": 4.456548478577981e-05, "loss": 0.1481, "step": 46150 }, { "epoch": 1.6774474889163455, "grad_norm": 0.7243287563323975, "learning_rate": 4.456225401088258e-05, "loss": 0.1192, "step": 46160 }, { "epoch": 1.677810887419144, "grad_norm": 1.103082299232483, "learning_rate": 4.455902239311741e-05, "loss": 0.156, "step": 46170 }, { "epoch": 1.678174285921942, "grad_norm": 1.2734848260879517, "learning_rate": 4.455578993262357e-05, "loss": 0.1154, "step": 46180 }, { "epoch": 1.6785376844247402, "grad_norm": 0.9912572503089905, "learning_rate": 4.455255662954032e-05, "loss": 0.1228, "step": 46190 }, { "epoch": 1.6789010829275384, "grad_norm": 0.8736640214920044, "learning_rate": 4.454932248400697e-05, "loss": 0.12, "step": 46200 }, { "epoch": 1.6789010829275384, "eval_loss": 0.3570244014263153, "eval_runtime": 179.9492, "eval_samples_per_second": 41.201, "eval_steps_per_second": 5.151, "eval_wer": 0.17022164939096338, "step": 46200 }, { "epoch": 1.6792644814303364, "grad_norm": 3.907130241394043, "learning_rate": 4.454608749616287e-05, "loss": 0.1108, "step": 46210 }, { "epoch": 1.6796278799331348, "grad_norm": 0.3899100720882416, "learning_rate": 4.4542851666147404e-05, "loss": 0.1699, "step": 46220 }, { "epoch": 1.6799912784359328, "grad_norm": 1.6752989292144775, "learning_rate": 4.45396149941e-05, "loss": 0.124, "step": 46230 }, { "epoch": 1.680354676938731, "grad_norm": 5.3016886711120605, "learning_rate": 4.453637748016011e-05, "loss": 0.1239, "step": 46240 }, { "epoch": 1.6807180754415292, "grad_norm": 2.723459482192993, "learning_rate": 4.453313912446722e-05, "loss": 0.1245, "step": 46250 }, { "epoch": 1.6810814739443274, "grad_norm": 2.0152530670166016, "learning_rate": 4.4529899927160854e-05, "loss": 0.1056, "step": 46260 }, { "epoch": 1.6814448724471256, "grad_norm": 0.7301231622695923, "learning_rate": 4.452665988838059e-05, "loss": 0.1899, "step": 46270 }, { "epoch": 1.6818082709499236, "grad_norm": 0.7544482350349426, "learning_rate": 4.4523419008266045e-05, "loss": 0.1233, "step": 46280 }, { "epoch": 1.682171669452722, "grad_norm": 0.9912691712379456, "learning_rate": 4.4520177286956824e-05, "loss": 0.1263, "step": 46290 }, { "epoch": 1.68253506795552, "grad_norm": 2.0335001945495605, "learning_rate": 4.451693472459262e-05, "loss": 0.1328, "step": 46300 }, { "epoch": 1.6828984664583182, "grad_norm": 0.5679906606674194, "learning_rate": 4.451369132131314e-05, "loss": 0.0828, "step": 46310 }, { "epoch": 1.6832618649611164, "grad_norm": 1.3262155055999756, "learning_rate": 4.451044707725814e-05, "loss": 0.1426, "step": 46320 }, { "epoch": 1.6836252634639144, "grad_norm": 1.1101963520050049, "learning_rate": 4.4507201992567386e-05, "loss": 0.1385, "step": 46330 }, { "epoch": 1.6839886619667128, "grad_norm": 0.8079712390899658, "learning_rate": 4.4503956067380704e-05, "loss": 0.1278, "step": 46340 }, { "epoch": 1.6843520604695108, "grad_norm": 0.79506516456604, "learning_rate": 4.450070930183795e-05, "loss": 0.1281, "step": 46350 }, { "epoch": 1.684715458972309, "grad_norm": 1.1767312288284302, "learning_rate": 4.4497461696079024e-05, "loss": 0.1299, "step": 46360 }, { "epoch": 1.6850788574751072, "grad_norm": 0.7996610403060913, "learning_rate": 4.449421325024384e-05, "loss": 0.156, "step": 46370 }, { "epoch": 1.6854422559779054, "grad_norm": 0.638761579990387, "learning_rate": 4.449096396447237e-05, "loss": 0.1197, "step": 46380 }, { "epoch": 1.6858056544807036, "grad_norm": 3.2339584827423096, "learning_rate": 4.448771383890461e-05, "loss": 0.0992, "step": 46390 }, { "epoch": 1.6861690529835016, "grad_norm": 1.0168710947036743, "learning_rate": 4.448446287368059e-05, "loss": 0.1471, "step": 46400 }, { "epoch": 1.6865324514862998, "grad_norm": 0.7783123850822449, "learning_rate": 4.44812110689404e-05, "loss": 0.1012, "step": 46410 }, { "epoch": 1.686895849989098, "grad_norm": 0.7757607698440552, "learning_rate": 4.447795842482414e-05, "loss": 0.1355, "step": 46420 }, { "epoch": 1.6872592484918962, "grad_norm": 1.9442954063415527, "learning_rate": 4.447470494147195e-05, "loss": 0.0953, "step": 46430 }, { "epoch": 1.6876226469946944, "grad_norm": 1.0810720920562744, "learning_rate": 4.447145061902401e-05, "loss": 0.1037, "step": 46440 }, { "epoch": 1.6879860454974924, "grad_norm": 5.812492847442627, "learning_rate": 4.4468195457620556e-05, "loss": 0.1376, "step": 46450 }, { "epoch": 1.6883494440002909, "grad_norm": 2.3588967323303223, "learning_rate": 4.4464939457401825e-05, "loss": 0.1164, "step": 46460 }, { "epoch": 1.6887128425030888, "grad_norm": 1.6339848041534424, "learning_rate": 4.4461682618508106e-05, "loss": 0.1604, "step": 46470 }, { "epoch": 1.689076241005887, "grad_norm": 1.6590059995651245, "learning_rate": 4.445842494107973e-05, "loss": 0.1529, "step": 46480 }, { "epoch": 1.6894396395086853, "grad_norm": 0.8776388764381409, "learning_rate": 4.445516642525705e-05, "loss": 0.1165, "step": 46490 }, { "epoch": 1.6898030380114832, "grad_norm": 2.1173806190490723, "learning_rate": 4.4451907071180474e-05, "loss": 0.1431, "step": 46500 }, { "epoch": 1.6901664365142817, "grad_norm": 1.3882853984832764, "learning_rate": 4.444864687899043e-05, "loss": 0.134, "step": 46510 }, { "epoch": 1.6905298350170797, "grad_norm": 0.5224485993385315, "learning_rate": 4.4445385848827395e-05, "loss": 0.1586, "step": 46520 }, { "epoch": 1.6908932335198779, "grad_norm": 1.3461922407150269, "learning_rate": 4.444212398083187e-05, "loss": 0.119, "step": 46530 }, { "epoch": 1.691256632022676, "grad_norm": 0.7005299925804138, "learning_rate": 4.4438861275144395e-05, "loss": 0.1046, "step": 46540 }, { "epoch": 1.6916200305254743, "grad_norm": 2.497610092163086, "learning_rate": 4.4435597731905554e-05, "loss": 0.1164, "step": 46550 }, { "epoch": 1.6919834290282725, "grad_norm": 4.168522357940674, "learning_rate": 4.443233335125596e-05, "loss": 0.1342, "step": 46560 }, { "epoch": 1.6923468275310705, "grad_norm": 0.59686678647995, "learning_rate": 4.442906813333626e-05, "loss": 0.1536, "step": 46570 }, { "epoch": 1.692710226033869, "grad_norm": 0.9300062656402588, "learning_rate": 4.442580207828715e-05, "loss": 0.1176, "step": 46580 }, { "epoch": 1.6930736245366669, "grad_norm": 0.8389412760734558, "learning_rate": 4.442253518624934e-05, "loss": 0.1085, "step": 46590 }, { "epoch": 1.693437023039465, "grad_norm": 1.0124256610870361, "learning_rate": 4.441926745736359e-05, "loss": 0.1357, "step": 46600 }, { "epoch": 1.6938004215422633, "grad_norm": 1.5312106609344482, "learning_rate": 4.4415998891770704e-05, "loss": 0.0911, "step": 46610 }, { "epoch": 1.6941638200450613, "grad_norm": 0.7724300622940063, "learning_rate": 4.441272948961151e-05, "loss": 0.1338, "step": 46620 }, { "epoch": 1.6945272185478597, "grad_norm": 0.9552409052848816, "learning_rate": 4.4409459251026864e-05, "loss": 0.1181, "step": 46630 }, { "epoch": 1.6948906170506577, "grad_norm": 0.7531419992446899, "learning_rate": 4.440618817615768e-05, "loss": 0.1301, "step": 46640 }, { "epoch": 1.695254015553456, "grad_norm": 1.4831467866897583, "learning_rate": 4.44029162651449e-05, "loss": 0.1734, "step": 46650 }, { "epoch": 1.695617414056254, "grad_norm": 1.008574366569519, "learning_rate": 4.43996435181295e-05, "loss": 0.1205, "step": 46660 }, { "epoch": 1.6959808125590523, "grad_norm": 0.7653814554214478, "learning_rate": 4.4396369935252475e-05, "loss": 0.1886, "step": 46670 }, { "epoch": 1.6963442110618505, "grad_norm": 0.668803334236145, "learning_rate": 4.439309551665488e-05, "loss": 0.1066, "step": 46680 }, { "epoch": 1.6967076095646485, "grad_norm": 0.6227422952651978, "learning_rate": 4.438982026247781e-05, "loss": 0.1191, "step": 46690 }, { "epoch": 1.6970710080674467, "grad_norm": 4.826232433319092, "learning_rate": 4.438654417286237e-05, "loss": 0.1594, "step": 46700 }, { "epoch": 1.697434406570245, "grad_norm": 7.682708740234375, "learning_rate": 4.4383267247949714e-05, "loss": 0.1131, "step": 46710 }, { "epoch": 1.6977978050730431, "grad_norm": 1.0136793851852417, "learning_rate": 4.4379989487881046e-05, "loss": 0.1575, "step": 46720 }, { "epoch": 1.6981612035758413, "grad_norm": 2.774850606918335, "learning_rate": 4.437671089279758e-05, "loss": 0.1384, "step": 46730 }, { "epoch": 1.6985246020786393, "grad_norm": 11.532723426818848, "learning_rate": 4.4373431462840584e-05, "loss": 0.1338, "step": 46740 }, { "epoch": 1.6988880005814377, "grad_norm": 1.9600322246551514, "learning_rate": 4.437015119815136e-05, "loss": 0.1311, "step": 46750 }, { "epoch": 1.6992513990842357, "grad_norm": 0.5886809229850769, "learning_rate": 4.436687009887124e-05, "loss": 0.1647, "step": 46760 }, { "epoch": 1.699614797587034, "grad_norm": 2.5982067584991455, "learning_rate": 4.436358816514159e-05, "loss": 0.2078, "step": 46770 }, { "epoch": 1.6999781960898321, "grad_norm": 0.7927113771438599, "learning_rate": 4.436030539710383e-05, "loss": 0.1017, "step": 46780 }, { "epoch": 1.7003415945926301, "grad_norm": 0.5954931974411011, "learning_rate": 4.435702179489939e-05, "loss": 0.1354, "step": 46790 }, { "epoch": 1.7007049930954286, "grad_norm": 1.10712468624115, "learning_rate": 4.4353737358669755e-05, "loss": 0.1531, "step": 46800 }, { "epoch": 1.7007049930954286, "eval_loss": 0.3568388819694519, "eval_runtime": 179.9769, "eval_samples_per_second": 41.194, "eval_steps_per_second": 5.151, "eval_wer": 0.16614627770617388, "step": 46800 }, { "epoch": 1.7010683915982265, "grad_norm": 2.1790926456451416, "learning_rate": 4.435045208855644e-05, "loss": 0.1336, "step": 46810 }, { "epoch": 1.7014317901010247, "grad_norm": 0.3032105267047882, "learning_rate": 4.434716598470099e-05, "loss": 0.1234, "step": 46820 }, { "epoch": 1.701795188603823, "grad_norm": 0.8836687207221985, "learning_rate": 4.434387904724499e-05, "loss": 0.1007, "step": 46830 }, { "epoch": 1.7021585871066212, "grad_norm": 0.9671638011932373, "learning_rate": 4.4340591276330075e-05, "loss": 0.3498, "step": 46840 }, { "epoch": 1.7025219856094194, "grad_norm": 3.225950002670288, "learning_rate": 4.4337302672097894e-05, "loss": 0.1493, "step": 46850 }, { "epoch": 1.7028853841122173, "grad_norm": 1.0754051208496094, "learning_rate": 4.4334013234690144e-05, "loss": 0.11, "step": 46860 }, { "epoch": 1.7032487826150158, "grad_norm": 0.3170652687549591, "learning_rate": 4.433072296424855e-05, "loss": 0.1543, "step": 46870 }, { "epoch": 1.7036121811178138, "grad_norm": 2.504772424697876, "learning_rate": 4.4327431860914885e-05, "loss": 0.1221, "step": 46880 }, { "epoch": 1.703975579620612, "grad_norm": 3.071819305419922, "learning_rate": 4.4324139924830956e-05, "loss": 2.0837, "step": 46890 }, { "epoch": 1.7043389781234102, "grad_norm": 0.6085606813430786, "learning_rate": 4.4320847156138584e-05, "loss": 0.2395, "step": 46900 }, { "epoch": 1.7047023766262082, "grad_norm": 1.1668941974639893, "learning_rate": 4.431755355497965e-05, "loss": 1.7169, "step": 46910 }, { "epoch": 1.7050657751290066, "grad_norm": 2.0856447219848633, "learning_rate": 4.431425912149607e-05, "loss": 0.1911, "step": 46920 }, { "epoch": 1.7054291736318046, "grad_norm": 2.468442916870117, "learning_rate": 4.431096385582979e-05, "loss": 0.1408, "step": 46930 }, { "epoch": 1.7057925721346028, "grad_norm": 0.5875902771949768, "learning_rate": 4.430766775812278e-05, "loss": 0.0959, "step": 46940 }, { "epoch": 1.706155970637401, "grad_norm": 0.7292661070823669, "learning_rate": 4.430437082851706e-05, "loss": 0.1416, "step": 46950 }, { "epoch": 1.7065193691401992, "grad_norm": 0.9893856644630432, "learning_rate": 4.43010730671547e-05, "loss": 0.1233, "step": 46960 }, { "epoch": 1.7068827676429974, "grad_norm": 0.5555475950241089, "learning_rate": 4.4297774474177755e-05, "loss": 0.1817, "step": 46970 }, { "epoch": 1.7072461661457954, "grad_norm": 2.520148754119873, "learning_rate": 4.429447504972838e-05, "loss": 0.123, "step": 46980 }, { "epoch": 1.7076095646485938, "grad_norm": 0.4278533160686493, "learning_rate": 4.429117479394873e-05, "loss": 0.1161, "step": 46990 }, { "epoch": 1.7079729631513918, "grad_norm": 1.8297946453094482, "learning_rate": 4.428787370698099e-05, "loss": 0.1347, "step": 47000 }, { "epoch": 1.70833636165419, "grad_norm": 1.1131938695907593, "learning_rate": 4.42845717889674e-05, "loss": 0.113, "step": 47010 }, { "epoch": 1.7086997601569882, "grad_norm": 0.6185646057128906, "learning_rate": 4.428126904005022e-05, "loss": 0.179, "step": 47020 }, { "epoch": 1.7090631586597862, "grad_norm": 0.5343379378318787, "learning_rate": 4.4277965460371775e-05, "loss": 0.1108, "step": 47030 }, { "epoch": 1.7094265571625846, "grad_norm": 1.2087206840515137, "learning_rate": 4.427466105007437e-05, "loss": 0.1308, "step": 47040 }, { "epoch": 1.7097899556653826, "grad_norm": 1.3067313432693481, "learning_rate": 4.4271355809300416e-05, "loss": 0.126, "step": 47050 }, { "epoch": 1.7101533541681808, "grad_norm": 3.69439697265625, "learning_rate": 4.42680497381923e-05, "loss": 0.1235, "step": 47060 }, { "epoch": 1.710516752670979, "grad_norm": 1.7959043979644775, "learning_rate": 4.4264742836892484e-05, "loss": 0.1596, "step": 47070 }, { "epoch": 1.710880151173777, "grad_norm": 0.7556483149528503, "learning_rate": 4.4261435105543434e-05, "loss": 0.139, "step": 47080 }, { "epoch": 1.7112435496765754, "grad_norm": 1.0691754817962646, "learning_rate": 4.425812654428768e-05, "loss": 0.1137, "step": 47090 }, { "epoch": 1.7116069481793734, "grad_norm": 0.39985036849975586, "learning_rate": 4.425481715326778e-05, "loss": 0.1147, "step": 47100 }, { "epoch": 1.7119703466821716, "grad_norm": 0.8595932126045227, "learning_rate": 4.425150693262631e-05, "loss": 0.124, "step": 47110 }, { "epoch": 1.7123337451849698, "grad_norm": 1.0184409618377686, "learning_rate": 4.424819588250591e-05, "loss": 0.1196, "step": 47120 }, { "epoch": 1.712697143687768, "grad_norm": 0.9141554236412048, "learning_rate": 4.4244884003049234e-05, "loss": 0.1139, "step": 47130 }, { "epoch": 1.7130605421905662, "grad_norm": 3.891220808029175, "learning_rate": 4.424157129439897e-05, "loss": 0.1123, "step": 47140 }, { "epoch": 1.7134239406933642, "grad_norm": 1.1739959716796875, "learning_rate": 4.4238257756697875e-05, "loss": 0.1472, "step": 47150 }, { "epoch": 1.7137873391961627, "grad_norm": 1.062530517578125, "learning_rate": 4.423494339008869e-05, "loss": 0.1222, "step": 47160 }, { "epoch": 1.7141507376989606, "grad_norm": 0.840980052947998, "learning_rate": 4.423162819471424e-05, "loss": 0.209, "step": 47170 }, { "epoch": 1.7145141362017589, "grad_norm": 1.5960949659347534, "learning_rate": 4.4228312170717353e-05, "loss": 0.2366, "step": 47180 }, { "epoch": 1.714877534704557, "grad_norm": 1.9222055673599243, "learning_rate": 4.4224995318240914e-05, "loss": 0.1195, "step": 47190 }, { "epoch": 1.715240933207355, "grad_norm": 0.9651756286621094, "learning_rate": 4.422167763742783e-05, "loss": 1.1511, "step": 47200 }, { "epoch": 1.7156043317101535, "grad_norm": 0.7599210739135742, "learning_rate": 4.421835912842105e-05, "loss": 0.1067, "step": 47210 }, { "epoch": 1.7159677302129515, "grad_norm": 0.9147046804428101, "learning_rate": 4.4215039791363546e-05, "loss": 0.2011, "step": 47220 }, { "epoch": 1.7163311287157497, "grad_norm": 3.473452091217041, "learning_rate": 4.421171962639835e-05, "loss": 0.1297, "step": 47230 }, { "epoch": 1.7166945272185479, "grad_norm": 0.8271628618240356, "learning_rate": 4.420839863366851e-05, "loss": 0.1042, "step": 47240 }, { "epoch": 1.717057925721346, "grad_norm": 1.2343850135803223, "learning_rate": 4.4205076813317115e-05, "loss": 0.1401, "step": 47250 }, { "epoch": 1.7174213242241443, "grad_norm": 2.5408592224121094, "learning_rate": 4.420175416548729e-05, "loss": 0.1331, "step": 47260 }, { "epoch": 1.7177847227269423, "grad_norm": 0.9061927795410156, "learning_rate": 4.41984306903222e-05, "loss": 0.151, "step": 47270 }, { "epoch": 1.7181481212297407, "grad_norm": 1.4675298929214478, "learning_rate": 4.419510638796505e-05, "loss": 0.133, "step": 47280 }, { "epoch": 1.7185115197325387, "grad_norm": 0.5093546509742737, "learning_rate": 4.4191781258559044e-05, "loss": 0.1091, "step": 47290 }, { "epoch": 1.718874918235337, "grad_norm": 1.7886688709259033, "learning_rate": 4.418845530224748e-05, "loss": 0.1364, "step": 47300 }, { "epoch": 1.719238316738135, "grad_norm": 0.9077005982398987, "learning_rate": 4.418512851917365e-05, "loss": 0.0886, "step": 47310 }, { "epoch": 1.719601715240933, "grad_norm": 0.9992018938064575, "learning_rate": 4.418180090948088e-05, "loss": 0.204, "step": 47320 }, { "epoch": 1.7199651137437315, "grad_norm": 3.1602091789245605, "learning_rate": 4.417847247331257e-05, "loss": 0.1099, "step": 47330 }, { "epoch": 1.7203285122465295, "grad_norm": 0.867824912071228, "learning_rate": 4.417514321081212e-05, "loss": 0.1015, "step": 47340 }, { "epoch": 1.7206919107493277, "grad_norm": 0.8763206005096436, "learning_rate": 4.4171813122122966e-05, "loss": 0.1669, "step": 47350 }, { "epoch": 1.721055309252126, "grad_norm": 0.579663097858429, "learning_rate": 4.4168482207388604e-05, "loss": 0.1216, "step": 47360 }, { "epoch": 1.721418707754924, "grad_norm": 0.9902794361114502, "learning_rate": 4.416515046675255e-05, "loss": 0.1507, "step": 47370 }, { "epoch": 1.7217821062577223, "grad_norm": 1.082514762878418, "learning_rate": 4.4161817900358334e-05, "loss": 0.1218, "step": 47380 }, { "epoch": 1.7221455047605203, "grad_norm": 1.6784402132034302, "learning_rate": 4.415848450834958e-05, "loss": 0.1112, "step": 47390 }, { "epoch": 1.7225089032633185, "grad_norm": 1.4955846071243286, "learning_rate": 4.415515029086989e-05, "loss": 0.1539, "step": 47400 }, { "epoch": 1.7225089032633185, "eval_loss": 0.3519718050956726, "eval_runtime": 180.5197, "eval_samples_per_second": 41.07, "eval_steps_per_second": 5.135, "eval_wer": 0.1726087824713635, "step": 47400 }, { "epoch": 1.7228723017661167, "grad_norm": 1.609122395515442, "learning_rate": 4.415181524806293e-05, "loss": 0.1218, "step": 47410 }, { "epoch": 1.723235700268915, "grad_norm": 0.5392347574234009, "learning_rate": 4.4148479380072386e-05, "loss": 0.1304, "step": 47420 }, { "epoch": 1.7235990987717131, "grad_norm": 1.874009132385254, "learning_rate": 4.4145142687042e-05, "loss": 0.1251, "step": 47430 }, { "epoch": 1.7239624972745111, "grad_norm": 0.5162834525108337, "learning_rate": 4.4141805169115534e-05, "loss": 0.0922, "step": 47440 }, { "epoch": 1.7243258957773095, "grad_norm": 0.5982137322425842, "learning_rate": 4.41384668264368e-05, "loss": 0.1274, "step": 47450 }, { "epoch": 1.7246892942801075, "grad_norm": 3.50940203666687, "learning_rate": 4.413512765914961e-05, "loss": 0.134, "step": 47460 }, { "epoch": 1.7250526927829057, "grad_norm": 0.49185237288475037, "learning_rate": 4.413178766739786e-05, "loss": 0.1573, "step": 47470 }, { "epoch": 1.725416091285704, "grad_norm": 0.7658770680427551, "learning_rate": 4.412844685132545e-05, "loss": 0.1231, "step": 47480 }, { "epoch": 1.725779489788502, "grad_norm": 0.7022835612297058, "learning_rate": 4.4125105211076324e-05, "loss": 0.124, "step": 47490 }, { "epoch": 1.7261428882913004, "grad_norm": 0.7009884119033813, "learning_rate": 4.4121762746794456e-05, "loss": 0.1207, "step": 47500 }, { "epoch": 1.7265062867940983, "grad_norm": 1.4415069818496704, "learning_rate": 4.4118419458623875e-05, "loss": 0.1223, "step": 47510 }, { "epoch": 1.7268696852968966, "grad_norm": 0.8234976530075073, "learning_rate": 4.411507534670862e-05, "loss": 0.1524, "step": 47520 }, { "epoch": 1.7272330837996948, "grad_norm": 0.6677774786949158, "learning_rate": 4.411173041119278e-05, "loss": 0.1298, "step": 47530 }, { "epoch": 1.727596482302493, "grad_norm": 9.5011625289917, "learning_rate": 4.410838465222048e-05, "loss": 0.1264, "step": 47540 }, { "epoch": 1.7279598808052912, "grad_norm": 1.5103870630264282, "learning_rate": 4.410503806993587e-05, "loss": 0.1182, "step": 47550 }, { "epoch": 1.7283232793080892, "grad_norm": 1.5079401731491089, "learning_rate": 4.410169066448314e-05, "loss": 0.1295, "step": 47560 }, { "epoch": 1.7286866778108876, "grad_norm": 1.1625335216522217, "learning_rate": 4.4098342436006536e-05, "loss": 0.169, "step": 47570 }, { "epoch": 1.7290500763136856, "grad_norm": 0.8692395687103271, "learning_rate": 4.40949933846503e-05, "loss": 0.147, "step": 47580 }, { "epoch": 1.7294134748164838, "grad_norm": 0.49405890703201294, "learning_rate": 4.409164351055873e-05, "loss": 0.1297, "step": 47590 }, { "epoch": 1.729776873319282, "grad_norm": 2.2081878185272217, "learning_rate": 4.408829281387619e-05, "loss": 0.161, "step": 47600 }, { "epoch": 1.73014027182208, "grad_norm": 2.5445384979248047, "learning_rate": 4.408494129474701e-05, "loss": 0.105, "step": 47610 }, { "epoch": 1.7305036703248784, "grad_norm": 0.4841686487197876, "learning_rate": 4.408158895331562e-05, "loss": 0.1352, "step": 47620 }, { "epoch": 1.7308670688276764, "grad_norm": 91.45899963378906, "learning_rate": 4.407823578972646e-05, "loss": 1.4893, "step": 47630 }, { "epoch": 1.7312304673304746, "grad_norm": 1.3897613286972046, "learning_rate": 4.4074881804124e-05, "loss": 0.1109, "step": 47640 }, { "epoch": 1.7315938658332728, "grad_norm": 2.621211528778076, "learning_rate": 4.407152699665275e-05, "loss": 0.1332, "step": 47650 }, { "epoch": 1.7319572643360708, "grad_norm": 0.8939427733421326, "learning_rate": 4.406817136745726e-05, "loss": 0.1101, "step": 47660 }, { "epoch": 1.7323206628388692, "grad_norm": 0.9848506450653076, "learning_rate": 4.4064814916682105e-05, "loss": 0.1855, "step": 47670 }, { "epoch": 1.7326840613416672, "grad_norm": 0.7746869921684265, "learning_rate": 4.406145764447192e-05, "loss": 0.1791, "step": 47680 }, { "epoch": 1.7330474598444654, "grad_norm": 0.6325744390487671, "learning_rate": 4.405809955097133e-05, "loss": 0.1067, "step": 47690 }, { "epoch": 1.7334108583472636, "grad_norm": 0.4917563498020172, "learning_rate": 4.405474063632505e-05, "loss": 0.1405, "step": 47700 }, { "epoch": 1.7337742568500618, "grad_norm": 0.8248608708381653, "learning_rate": 4.405138090067779e-05, "loss": 0.2285, "step": 47710 }, { "epoch": 1.73413765535286, "grad_norm": 1.1260930299758911, "learning_rate": 4.404802034417431e-05, "loss": 0.1606, "step": 47720 }, { "epoch": 1.734501053855658, "grad_norm": 1.282873272895813, "learning_rate": 4.404465896695941e-05, "loss": 0.1251, "step": 47730 }, { "epoch": 1.7348644523584564, "grad_norm": 1.245103120803833, "learning_rate": 4.404129676917791e-05, "loss": 0.1009, "step": 47740 }, { "epoch": 1.7352278508612544, "grad_norm": 0.8262288570404053, "learning_rate": 4.4037933750974686e-05, "loss": 0.2019, "step": 47750 }, { "epoch": 1.7355912493640526, "grad_norm": 0.6815000772476196, "learning_rate": 4.403456991249464e-05, "loss": 0.1395, "step": 47760 }, { "epoch": 1.7359546478668508, "grad_norm": 0.3317665159702301, "learning_rate": 4.403120525388269e-05, "loss": 0.1484, "step": 47770 }, { "epoch": 1.7363180463696488, "grad_norm": 3.1392834186553955, "learning_rate": 4.402783977528383e-05, "loss": 0.1404, "step": 47780 }, { "epoch": 1.7366814448724472, "grad_norm": 0.8862357139587402, "learning_rate": 4.4024473476843043e-05, "loss": 0.1012, "step": 47790 }, { "epoch": 1.7370448433752452, "grad_norm": 0.40148264169692993, "learning_rate": 4.402110635870539e-05, "loss": 0.1284, "step": 47800 }, { "epoch": 1.7374082418780434, "grad_norm": 0.6237661838531494, "learning_rate": 4.401773842101594e-05, "loss": 0.1583, "step": 47810 }, { "epoch": 1.7377716403808416, "grad_norm": 0.6144997477531433, "learning_rate": 4.4014369663919805e-05, "loss": 0.2531, "step": 47820 }, { "epoch": 1.7381350388836398, "grad_norm": 0.6017129421234131, "learning_rate": 4.4011000087562135e-05, "loss": 0.1141, "step": 47830 }, { "epoch": 1.738498437386438, "grad_norm": 1.1838932037353516, "learning_rate": 4.400762969208812e-05, "loss": 0.145, "step": 47840 }, { "epoch": 1.738861835889236, "grad_norm": 1.8152750730514526, "learning_rate": 4.400425847764297e-05, "loss": 0.1485, "step": 47850 }, { "epoch": 1.7392252343920345, "grad_norm": 1.8269041776657104, "learning_rate": 4.400088644437193e-05, "loss": 0.1248, "step": 47860 }, { "epoch": 1.7395886328948325, "grad_norm": 0.36247947812080383, "learning_rate": 4.39975135924203e-05, "loss": 0.1293, "step": 47870 }, { "epoch": 1.7399520313976307, "grad_norm": 0.5409418344497681, "learning_rate": 4.399413992193341e-05, "loss": 0.0932, "step": 47880 }, { "epoch": 1.7403154299004289, "grad_norm": 0.8623117804527283, "learning_rate": 4.3990765433056616e-05, "loss": 0.1024, "step": 47890 }, { "epoch": 1.7406788284032269, "grad_norm": 1.7151434421539307, "learning_rate": 4.39873901259353e-05, "loss": 0.1591, "step": 47900 }, { "epoch": 1.7410422269060253, "grad_norm": 0.580970287322998, "learning_rate": 4.39840140007149e-05, "loss": 0.1124, "step": 47910 }, { "epoch": 1.7414056254088233, "grad_norm": 0.7153110504150391, "learning_rate": 4.3980637057540884e-05, "loss": 0.2013, "step": 47920 }, { "epoch": 1.7417690239116215, "grad_norm": 0.6043591499328613, "learning_rate": 4.397725929655875e-05, "loss": 1.2071, "step": 47930 }, { "epoch": 1.7421324224144197, "grad_norm": 1.4633545875549316, "learning_rate": 4.397388071791403e-05, "loss": 0.1533, "step": 47940 }, { "epoch": 1.7424958209172177, "grad_norm": 2.011232376098633, "learning_rate": 4.3970501321752314e-05, "loss": 0.1288, "step": 47950 }, { "epoch": 1.742859219420016, "grad_norm": 1.238098382949829, "learning_rate": 4.396712110821918e-05, "loss": 0.1553, "step": 47960 }, { "epoch": 1.743222617922814, "grad_norm": 0.30882275104522705, "learning_rate": 4.3963740077460285e-05, "loss": 0.1547, "step": 47970 }, { "epoch": 1.7435860164256123, "grad_norm": 0.6614134311676025, "learning_rate": 4.39603582296213e-05, "loss": 0.0926, "step": 47980 }, { "epoch": 1.7439494149284105, "grad_norm": 0.8839965462684631, "learning_rate": 4.3956975564847944e-05, "loss": 0.1228, "step": 47990 }, { "epoch": 1.7443128134312087, "grad_norm": 1.4926025867462158, "learning_rate": 4.395359208328597e-05, "loss": 0.1692, "step": 48000 }, { "epoch": 1.7443128134312087, "eval_loss": 0.3166210651397705, "eval_runtime": 179.7213, "eval_samples_per_second": 41.253, "eval_steps_per_second": 5.158, "eval_wer": 0.1702307259425999, "step": 48000 }, { "epoch": 1.744676211934007, "grad_norm": 0.8344828486442566, "learning_rate": 4.395020778508114e-05, "loss": 0.115, "step": 48010 }, { "epoch": 1.7450396104368049, "grad_norm": 0.7698808908462524, "learning_rate": 4.394682267037928e-05, "loss": 0.11, "step": 48020 }, { "epoch": 1.7454030089396033, "grad_norm": 0.7840538024902344, "learning_rate": 4.394343673932625e-05, "loss": 0.1209, "step": 48030 }, { "epoch": 1.7457664074424013, "grad_norm": 0.9926084876060486, "learning_rate": 4.394004999206792e-05, "loss": 0.1711, "step": 48040 }, { "epoch": 1.7461298059451995, "grad_norm": 0.8097075819969177, "learning_rate": 4.3936662428750234e-05, "loss": 0.1564, "step": 48050 }, { "epoch": 1.7464932044479977, "grad_norm": 1.1143872737884521, "learning_rate": 4.393327404951915e-05, "loss": 0.1254, "step": 48060 }, { "epoch": 1.7468566029507957, "grad_norm": 0.8600324988365173, "learning_rate": 4.392988485452063e-05, "loss": 0.1512, "step": 48070 }, { "epoch": 1.7472200014535941, "grad_norm": 1.2914844751358032, "learning_rate": 4.3926494843900745e-05, "loss": 0.1215, "step": 48080 }, { "epoch": 1.7475833999563921, "grad_norm": 2.0937047004699707, "learning_rate": 4.3923104017805524e-05, "loss": 0.1222, "step": 48090 }, { "epoch": 1.7479467984591903, "grad_norm": 0.5339716076850891, "learning_rate": 4.391971237638108e-05, "loss": 0.1561, "step": 48100 }, { "epoch": 1.7483101969619885, "grad_norm": 2.8374857902526855, "learning_rate": 4.391631991977356e-05, "loss": 0.1204, "step": 48110 }, { "epoch": 1.7486735954647867, "grad_norm": 0.6171733140945435, "learning_rate": 4.39129266481291e-05, "loss": 0.1742, "step": 48120 }, { "epoch": 1.749036993967585, "grad_norm": 1.0918267965316772, "learning_rate": 4.390953256159394e-05, "loss": 0.1062, "step": 48130 }, { "epoch": 1.749400392470383, "grad_norm": 1.273437738418579, "learning_rate": 4.3906137660314296e-05, "loss": 0.1166, "step": 48140 }, { "epoch": 1.7497637909731814, "grad_norm": 0.9305084943771362, "learning_rate": 4.390274194443645e-05, "loss": 0.1264, "step": 48150 }, { "epoch": 1.7501271894759793, "grad_norm": 1.1687837839126587, "learning_rate": 4.389934541410671e-05, "loss": 0.1437, "step": 48160 }, { "epoch": 1.7504905879787775, "grad_norm": 0.5981254577636719, "learning_rate": 4.389594806947142e-05, "loss": 0.155, "step": 48170 }, { "epoch": 1.7508539864815758, "grad_norm": 0.8380612730979919, "learning_rate": 4.389254991067695e-05, "loss": 0.118, "step": 48180 }, { "epoch": 1.7512173849843737, "grad_norm": 0.9481167197227478, "learning_rate": 4.388915093786973e-05, "loss": 0.1164, "step": 48190 }, { "epoch": 1.7515807834871722, "grad_norm": 0.4648977220058441, "learning_rate": 4.3885751151196206e-05, "loss": 0.139, "step": 48200 }, { "epoch": 1.7519441819899702, "grad_norm": 1.5835154056549072, "learning_rate": 4.388235055080287e-05, "loss": 0.1288, "step": 48210 }, { "epoch": 1.7523075804927684, "grad_norm": 2.212986469268799, "learning_rate": 4.387894913683622e-05, "loss": 0.1271, "step": 48220 }, { "epoch": 1.7526709789955666, "grad_norm": 1.1150215864181519, "learning_rate": 4.3875887168782186e-05, "loss": 3.4347, "step": 48230 }, { "epoch": 1.7530343774983645, "grad_norm": 4.514034271240234, "learning_rate": 4.3872484209430055e-05, "loss": 0.1084, "step": 48240 }, { "epoch": 1.753397776001163, "grad_norm": 1.3234679698944092, "learning_rate": 4.386908043692973e-05, "loss": 0.1258, "step": 48250 }, { "epoch": 1.753761174503961, "grad_norm": 0.579300045967102, "learning_rate": 4.3865675851427856e-05, "loss": 0.1375, "step": 48260 }, { "epoch": 1.7541245730067592, "grad_norm": Infinity, "learning_rate": 4.3862611029481096e-05, "loss": 3.7136, "step": 48270 }, { "epoch": 1.7544879715095574, "grad_norm": 0.5648366808891296, "learning_rate": 4.3859204899680476e-05, "loss": 0.1164, "step": 48280 }, { "epoch": 1.7548513700123556, "grad_norm": 2.9147396087646484, "learning_rate": 4.38557979573038e-05, "loss": 0.1269, "step": 48290 }, { "epoch": 1.7552147685151538, "grad_norm": 1.5536600351333618, "learning_rate": 4.385239020249789e-05, "loss": 0.1275, "step": 48300 }, { "epoch": 1.7555781670179518, "grad_norm": 1.4855754375457764, "learning_rate": 4.384898163540956e-05, "loss": 0.1281, "step": 48310 }, { "epoch": 1.7559415655207502, "grad_norm": 182.5458984375, "learning_rate": 4.384557225618567e-05, "loss": 2.7885, "step": 48320 }, { "epoch": 1.7563049640235482, "grad_norm": 1.1808582544326782, "learning_rate": 4.3842162064973134e-05, "loss": 0.1378, "step": 48330 }, { "epoch": 1.7566683625263464, "grad_norm": 2.970055341720581, "learning_rate": 4.383875106191888e-05, "loss": 0.1319, "step": 48340 }, { "epoch": 1.7570317610291446, "grad_norm": 1.0342578887939453, "learning_rate": 4.383533924716986e-05, "loss": 0.1254, "step": 48350 }, { "epoch": 1.7573951595319426, "grad_norm": 2.821300745010376, "learning_rate": 4.383192662087309e-05, "loss": 0.1098, "step": 48360 }, { "epoch": 1.757758558034741, "grad_norm": 4.711688041687012, "learning_rate": 4.382851318317561e-05, "loss": 0.2667, "step": 48370 }, { "epoch": 1.758121956537539, "grad_norm": 0.6478423476219177, "learning_rate": 4.382509893422448e-05, "loss": 0.1041, "step": 48380 }, { "epoch": 1.7584853550403372, "grad_norm": 0.4265103042125702, "learning_rate": 4.382168387416683e-05, "loss": 0.0854, "step": 48390 }, { "epoch": 1.7588487535431354, "grad_norm": 1.3017734289169312, "learning_rate": 4.381826800314979e-05, "loss": 0.1411, "step": 48400 }, { "epoch": 1.7592121520459336, "grad_norm": 2.5378410816192627, "learning_rate": 4.3814851321320524e-05, "loss": 0.1361, "step": 48410 }, { "epoch": 1.7595755505487318, "grad_norm": 1.133738398551941, "learning_rate": 4.381143382882627e-05, "loss": 0.1628, "step": 48420 }, { "epoch": 1.7599389490515298, "grad_norm": 0.851696252822876, "learning_rate": 4.3808015525814254e-05, "loss": 0.1237, "step": 48430 }, { "epoch": 1.7603023475543282, "grad_norm": 0.6289827823638916, "learning_rate": 4.380459641243177e-05, "loss": 0.1126, "step": 48440 }, { "epoch": 1.7606657460571262, "grad_norm": 0.9123024940490723, "learning_rate": 4.380117648882614e-05, "loss": 0.1588, "step": 48450 }, { "epoch": 1.7610291445599244, "grad_norm": 0.9411369562149048, "learning_rate": 4.379775575514471e-05, "loss": 0.1505, "step": 48460 }, { "epoch": 1.7613925430627226, "grad_norm": 0.7461705207824707, "learning_rate": 4.379433421153486e-05, "loss": 0.1404, "step": 48470 }, { "epoch": 1.7617559415655206, "grad_norm": 3.351199150085449, "learning_rate": 4.3790911858144025e-05, "loss": 0.1126, "step": 48480 }, { "epoch": 1.762119340068319, "grad_norm": 0.8594498634338379, "learning_rate": 4.378748869511965e-05, "loss": 0.0944, "step": 48490 }, { "epoch": 1.762482738571117, "grad_norm": 8.456245422363281, "learning_rate": 4.378406472260924e-05, "loss": 0.1368, "step": 48500 }, { "epoch": 1.7628461370739152, "grad_norm": 2.1628379821777344, "learning_rate": 4.3780639940760306e-05, "loss": 0.1052, "step": 48510 }, { "epoch": 1.7632095355767134, "grad_norm": 0.7315357327461243, "learning_rate": 4.377721434972043e-05, "loss": 0.1247, "step": 48520 }, { "epoch": 1.7635729340795114, "grad_norm": 0.8521216511726379, "learning_rate": 4.377378794963719e-05, "loss": 4.0948, "step": 48530 }, { "epoch": 1.7639363325823099, "grad_norm": 1.6623167991638184, "learning_rate": 4.377036074065823e-05, "loss": 0.1117, "step": 48540 }, { "epoch": 1.7642997310851078, "grad_norm": 0.8901768326759338, "learning_rate": 4.3766932722931206e-05, "loss": 0.1484, "step": 48550 }, { "epoch": 1.764663129587906, "grad_norm": 1.522369623184204, "learning_rate": 4.3763503896603826e-05, "loss": 0.1206, "step": 48560 }, { "epoch": 1.7650265280907043, "grad_norm": 0.5716699957847595, "learning_rate": 4.3760074261823824e-05, "loss": 0.1744, "step": 48570 }, { "epoch": 1.7653899265935025, "grad_norm": 2.5722410678863525, "learning_rate": 4.375664381873896e-05, "loss": 0.1224, "step": 48580 }, { "epoch": 1.7657533250963007, "grad_norm": 2.1870505809783936, "learning_rate": 4.3753212567497065e-05, "loss": 0.1261, "step": 48590 }, { "epoch": 1.7661167235990987, "grad_norm": 0.8871011137962341, "learning_rate": 4.374978050824596e-05, "loss": 0.1399, "step": 48600 }, { "epoch": 1.7661167235990987, "eval_loss": 0.3405693769454956, "eval_runtime": 179.6168, "eval_samples_per_second": 41.277, "eval_steps_per_second": 5.161, "eval_wer": 0.16414035979450686, "step": 48600 }, { "epoch": 1.766480122101897, "grad_norm": 1.1147382259368896, "learning_rate": 4.374634764113352e-05, "loss": 0.1198, "step": 48610 }, { "epoch": 1.766843520604695, "grad_norm": 0.48618343472480774, "learning_rate": 4.374291396630767e-05, "loss": 0.199, "step": 48620 }, { "epoch": 1.7672069191074933, "grad_norm": 0.9088806509971619, "learning_rate": 4.373947948391633e-05, "loss": 0.1168, "step": 48630 }, { "epoch": 1.7675703176102915, "grad_norm": 0.4064035415649414, "learning_rate": 4.373604419410751e-05, "loss": 0.0935, "step": 48640 }, { "epoch": 1.7679337161130895, "grad_norm": 0.7718721032142639, "learning_rate": 4.373260809702921e-05, "loss": 0.1229, "step": 48650 }, { "epoch": 1.768297114615888, "grad_norm": 0.9764898419380188, "learning_rate": 4.3729171192829465e-05, "loss": 0.1043, "step": 48660 }, { "epoch": 1.7686605131186859, "grad_norm": 1.2039941549301147, "learning_rate": 4.372573348165638e-05, "loss": 0.142, "step": 48670 }, { "epoch": 1.769023911621484, "grad_norm": 0.5939382314682007, "learning_rate": 4.3722294963658064e-05, "loss": 0.1373, "step": 48680 }, { "epoch": 1.7693873101242823, "grad_norm": 1.034637451171875, "learning_rate": 4.3718855638982664e-05, "loss": 0.114, "step": 48690 }, { "epoch": 1.7697507086270805, "grad_norm": 1.1438782215118408, "learning_rate": 4.371541550777838e-05, "loss": 0.123, "step": 48700 }, { "epoch": 1.7701141071298787, "grad_norm": 0.8836175799369812, "learning_rate": 4.3711974570193435e-05, "loss": 0.1159, "step": 48710 }, { "epoch": 1.7704775056326767, "grad_norm": 0.4500117897987366, "learning_rate": 4.370853282637609e-05, "loss": 0.157, "step": 48720 }, { "epoch": 1.7708409041354751, "grad_norm": 0.8643542528152466, "learning_rate": 4.370509027647462e-05, "loss": 0.4179, "step": 48730 }, { "epoch": 1.771204302638273, "grad_norm": 1.6022706031799316, "learning_rate": 4.370164692063737e-05, "loss": 0.1222, "step": 48740 }, { "epoch": 1.7715677011410713, "grad_norm": 2.06821870803833, "learning_rate": 4.3698202759012685e-05, "loss": 0.1353, "step": 48750 }, { "epoch": 1.7719310996438695, "grad_norm": 0.6547145843505859, "learning_rate": 4.369475779174898e-05, "loss": 0.1053, "step": 48760 }, { "epoch": 1.7722944981466675, "grad_norm": 1.154436707496643, "learning_rate": 4.369131201899468e-05, "loss": 0.1642, "step": 48770 }, { "epoch": 1.772657896649466, "grad_norm": 1.4460755586624146, "learning_rate": 4.3687865440898243e-05, "loss": 0.1194, "step": 48780 }, { "epoch": 1.773021295152264, "grad_norm": 0.8164231777191162, "learning_rate": 4.368441805760818e-05, "loss": 0.1205, "step": 48790 }, { "epoch": 1.7733846936550621, "grad_norm": 3.0235085487365723, "learning_rate": 4.3680969869273016e-05, "loss": 0.2118, "step": 48800 }, { "epoch": 1.7737480921578603, "grad_norm": 1.397639513015747, "learning_rate": 4.367752087604134e-05, "loss": 0.1163, "step": 48810 }, { "epoch": 1.7741114906606583, "grad_norm": 0.5514954328536987, "learning_rate": 4.3674071078061726e-05, "loss": 0.1373, "step": 48820 }, { "epoch": 1.7744748891634567, "grad_norm": 1.3183518648147583, "learning_rate": 4.3670620475482836e-05, "loss": 0.1059, "step": 48830 }, { "epoch": 1.7748382876662547, "grad_norm": 0.6846873164176941, "learning_rate": 4.366716906845335e-05, "loss": 0.1139, "step": 48840 }, { "epoch": 1.775201686169053, "grad_norm": 1.2583421468734741, "learning_rate": 4.366371685712196e-05, "loss": 0.1248, "step": 48850 }, { "epoch": 1.7755650846718511, "grad_norm": 0.7057945728302002, "learning_rate": 4.366026384163742e-05, "loss": 0.1089, "step": 48860 }, { "epoch": 1.7759284831746494, "grad_norm": 1.1777584552764893, "learning_rate": 4.36568100221485e-05, "loss": 0.1842, "step": 48870 }, { "epoch": 1.7762918816774476, "grad_norm": 0.8768916726112366, "learning_rate": 4.3653355398804025e-05, "loss": 0.1095, "step": 48880 }, { "epoch": 1.7766552801802455, "grad_norm": 1.5699349641799927, "learning_rate": 4.364989997175283e-05, "loss": 0.0982, "step": 48890 }, { "epoch": 1.777018678683044, "grad_norm": 1.1270577907562256, "learning_rate": 4.36464437411438e-05, "loss": 0.1329, "step": 48900 }, { "epoch": 1.777382077185842, "grad_norm": 0.545153021812439, "learning_rate": 4.3642986707125856e-05, "loss": 0.1169, "step": 48910 }, { "epoch": 1.7777454756886402, "grad_norm": 1.2134042978286743, "learning_rate": 4.363952886984795e-05, "loss": 0.1704, "step": 48920 }, { "epoch": 1.7781088741914384, "grad_norm": 1.079684853553772, "learning_rate": 4.3636070229459055e-05, "loss": 0.1077, "step": 48930 }, { "epoch": 1.7784722726942364, "grad_norm": 0.6559361815452576, "learning_rate": 4.3632610786108205e-05, "loss": 0.1044, "step": 48940 }, { "epoch": 1.7788356711970348, "grad_norm": 0.9042558670043945, "learning_rate": 4.3629150539944454e-05, "loss": 0.8377, "step": 48950 }, { "epoch": 1.7791990696998328, "grad_norm": 2.102360725402832, "learning_rate": 4.362568949111689e-05, "loss": 0.1407, "step": 48960 }, { "epoch": 1.779562468202631, "grad_norm": 2.581956148147583, "learning_rate": 4.3622227639774635e-05, "loss": 0.182, "step": 48970 }, { "epoch": 1.7799258667054292, "grad_norm": 0.9113497138023376, "learning_rate": 4.361876498606685e-05, "loss": 0.0965, "step": 48980 }, { "epoch": 1.7802892652082274, "grad_norm": 2.428302049636841, "learning_rate": 4.361530153014273e-05, "loss": 0.1093, "step": 48990 }, { "epoch": 1.7806526637110256, "grad_norm": 1.9562546014785767, "learning_rate": 4.361183727215149e-05, "loss": 0.1437, "step": 49000 }, { "epoch": 1.7810160622138236, "grad_norm": 0.7445639967918396, "learning_rate": 4.360837221224241e-05, "loss": 0.1037, "step": 49010 }, { "epoch": 1.781379460716622, "grad_norm": 0.9966205954551697, "learning_rate": 4.360490635056478e-05, "loss": 0.1649, "step": 49020 }, { "epoch": 1.78174285921942, "grad_norm": 1.8854800462722778, "learning_rate": 4.360143968726793e-05, "loss": 0.1139, "step": 49030 }, { "epoch": 1.7821062577222182, "grad_norm": 0.5688827037811279, "learning_rate": 4.3597972222501225e-05, "loss": 0.1134, "step": 49040 }, { "epoch": 1.7824696562250164, "grad_norm": 0.7284519076347351, "learning_rate": 4.359450395641408e-05, "loss": 0.1636, "step": 49050 }, { "epoch": 1.7828330547278144, "grad_norm": 0.7459525465965271, "learning_rate": 4.359103488915591e-05, "loss": 0.1282, "step": 49060 }, { "epoch": 1.7831964532306128, "grad_norm": 0.3692184090614319, "learning_rate": 4.35875650208762e-05, "loss": 0.1491, "step": 49070 }, { "epoch": 1.7835598517334108, "grad_norm": 1.8872483968734741, "learning_rate": 4.358409435172443e-05, "loss": 0.1701, "step": 49080 }, { "epoch": 1.783923250236209, "grad_norm": 2.4084055423736572, "learning_rate": 4.358062288185018e-05, "loss": 0.1038, "step": 49090 }, { "epoch": 1.7842866487390072, "grad_norm": 1.3348972797393799, "learning_rate": 4.357715061140299e-05, "loss": 0.1723, "step": 49100 }, { "epoch": 1.7846500472418052, "grad_norm": 1.2129530906677246, "learning_rate": 4.357367754053248e-05, "loss": 0.1127, "step": 49110 }, { "epoch": 1.7850134457446036, "grad_norm": 0.3813287615776062, "learning_rate": 4.3570203669388285e-05, "loss": 0.1193, "step": 49120 }, { "epoch": 1.7853768442474016, "grad_norm": 0.5162807703018188, "learning_rate": 4.356672899812009e-05, "loss": 0.1023, "step": 49130 }, { "epoch": 1.7857402427501998, "grad_norm": 1.0435444116592407, "learning_rate": 4.356325352687761e-05, "loss": 0.0866, "step": 49140 }, { "epoch": 1.786103641252998, "grad_norm": 1.3136024475097656, "learning_rate": 4.355977725581058e-05, "loss": 0.1726, "step": 49150 }, { "epoch": 1.7864670397557962, "grad_norm": 3.7956295013427734, "learning_rate": 4.355630018506878e-05, "loss": 0.1215, "step": 49160 }, { "epoch": 1.7868304382585944, "grad_norm": 0.8079971075057983, "learning_rate": 4.3552822314802025e-05, "loss": 0.1389, "step": 49170 }, { "epoch": 1.7871938367613924, "grad_norm": 0.7602683901786804, "learning_rate": 4.354934364516018e-05, "loss": 0.1433, "step": 49180 }, { "epoch": 1.7875572352641909, "grad_norm": 1.5899375677108765, "learning_rate": 4.3545864176293104e-05, "loss": 0.1194, "step": 49190 }, { "epoch": 1.7879206337669888, "grad_norm": 57.75679016113281, "learning_rate": 4.354238390835073e-05, "loss": 0.4817, "step": 49200 }, { "epoch": 1.7879206337669888, "eval_loss": 0.3314359784126282, "eval_runtime": 180.9298, "eval_samples_per_second": 40.977, "eval_steps_per_second": 5.124, "eval_wer": 0.17101130938333908, "step": 49200 }, { "epoch": 1.788284032269787, "grad_norm": 0.689213216304779, "learning_rate": 4.353890284148301e-05, "loss": 0.1062, "step": 49210 }, { "epoch": 1.7886474307725853, "grad_norm": 0.43430793285369873, "learning_rate": 4.3535420975839924e-05, "loss": 0.1184, "step": 49220 }, { "epoch": 1.7890108292753832, "grad_norm": 0.5645721554756165, "learning_rate": 4.353193831157151e-05, "loss": 0.5598, "step": 49230 }, { "epoch": 1.7893742277781817, "grad_norm": 0.9227817058563232, "learning_rate": 4.352845484882779e-05, "loss": 0.1123, "step": 49240 }, { "epoch": 1.7897376262809797, "grad_norm": 1.031924843788147, "learning_rate": 4.35249705877589e-05, "loss": 0.1266, "step": 49250 }, { "epoch": 1.7901010247837779, "grad_norm": 0.7044590711593628, "learning_rate": 4.3521485528514914e-05, "loss": 0.1203, "step": 49260 }, { "epoch": 1.790464423286576, "grad_norm": 0.669763445854187, "learning_rate": 4.3517999671246034e-05, "loss": 0.1206, "step": 49270 }, { "epoch": 1.7908278217893743, "grad_norm": 1.149156928062439, "learning_rate": 4.351451301610243e-05, "loss": 0.1309, "step": 49280 }, { "epoch": 1.7911912202921725, "grad_norm": 0.42814984917640686, "learning_rate": 4.3511025563234334e-05, "loss": 0.6053, "step": 49290 }, { "epoch": 1.7915546187949705, "grad_norm": 1.2521979808807373, "learning_rate": 4.350753731279201e-05, "loss": 0.1397, "step": 49300 }, { "epoch": 1.791918017297769, "grad_norm": 1.2080492973327637, "learning_rate": 4.3504048264925756e-05, "loss": 0.1416, "step": 49310 }, { "epoch": 1.7922814158005669, "grad_norm": 0.479490727186203, "learning_rate": 4.3500558419785897e-05, "loss": 0.1247, "step": 49320 }, { "epoch": 1.792644814303365, "grad_norm": 0.6005672216415405, "learning_rate": 4.349706777752279e-05, "loss": 0.1049, "step": 49330 }, { "epoch": 1.7930082128061633, "grad_norm": 0.7790218591690063, "learning_rate": 4.349357633828687e-05, "loss": 0.111, "step": 49340 }, { "epoch": 1.7933716113089613, "grad_norm": 0.9393801689147949, "learning_rate": 4.3490084102228523e-05, "loss": 0.1228, "step": 49350 }, { "epoch": 1.7937350098117597, "grad_norm": 6.127364158630371, "learning_rate": 4.348659106949825e-05, "loss": 0.1369, "step": 49360 }, { "epoch": 1.7940984083145577, "grad_norm": 0.746756911277771, "learning_rate": 4.3483097240246546e-05, "loss": 0.1515, "step": 49370 }, { "epoch": 1.794461806817356, "grad_norm": 0.8121843934059143, "learning_rate": 4.347960261462394e-05, "loss": 0.1007, "step": 49380 }, { "epoch": 1.794825205320154, "grad_norm": 1.695778250694275, "learning_rate": 4.347610719278101e-05, "loss": 0.1302, "step": 49390 }, { "epoch": 1.795188603822952, "grad_norm": 0.9138917922973633, "learning_rate": 4.3472610974868354e-05, "loss": 0.1758, "step": 49400 }, { "epoch": 1.7955520023257505, "grad_norm": 0.6245046257972717, "learning_rate": 4.3469113961036625e-05, "loss": 0.1269, "step": 49410 }, { "epoch": 1.7959154008285485, "grad_norm": 0.4156048595905304, "learning_rate": 4.3465616151436484e-05, "loss": 0.1782, "step": 49420 }, { "epoch": 1.7962787993313467, "grad_norm": 0.7024033069610596, "learning_rate": 4.346211754621865e-05, "loss": 0.1817, "step": 49430 }, { "epoch": 1.796642197834145, "grad_norm": 1.7370237112045288, "learning_rate": 4.345861814553385e-05, "loss": 0.1453, "step": 49440 }, { "epoch": 1.7970055963369431, "grad_norm": 2.4251365661621094, "learning_rate": 4.3455117949532875e-05, "loss": 1.928, "step": 49450 }, { "epoch": 1.7973689948397413, "grad_norm": 0.7227508425712585, "learning_rate": 4.3451616958366524e-05, "loss": 0.1225, "step": 49460 }, { "epoch": 1.7977323933425393, "grad_norm": 0.27948006987571716, "learning_rate": 4.344811517218566e-05, "loss": 0.1819, "step": 49470 }, { "epoch": 1.7980957918453377, "grad_norm": 0.583686888217926, "learning_rate": 4.344461259114116e-05, "loss": 0.1155, "step": 49480 }, { "epoch": 1.7984591903481357, "grad_norm": 0.7126079797744751, "learning_rate": 4.344110921538391e-05, "loss": 0.1466, "step": 49490 }, { "epoch": 1.798822588850934, "grad_norm": 1.089173674583435, "learning_rate": 4.343760504506488e-05, "loss": 0.142, "step": 49500 }, { "epoch": 1.7991859873537321, "grad_norm": 0.950932502746582, "learning_rate": 4.343410008033506e-05, "loss": 0.1036, "step": 49510 }, { "epoch": 1.7995493858565301, "grad_norm": 0.6006519794464111, "learning_rate": 4.343059432134545e-05, "loss": 0.2374, "step": 49520 }, { "epoch": 1.7999127843593286, "grad_norm": 2.3363699913024902, "learning_rate": 4.342708776824711e-05, "loss": 0.1084, "step": 49530 }, { "epoch": 1.8002761828621265, "grad_norm": 0.5308919548988342, "learning_rate": 4.342358042119111e-05, "loss": 0.1194, "step": 49540 }, { "epoch": 1.8006395813649247, "grad_norm": 0.5925958752632141, "learning_rate": 4.3420072280328594e-05, "loss": 0.1374, "step": 49550 }, { "epoch": 1.801002979867723, "grad_norm": 0.9156503081321716, "learning_rate": 4.34165633458107e-05, "loss": 0.1201, "step": 49560 }, { "epoch": 1.8013663783705212, "grad_norm": 1.0838543176651, "learning_rate": 4.341305361778862e-05, "loss": 0.1442, "step": 49570 }, { "epoch": 1.8017297768733194, "grad_norm": 0.5390272736549377, "learning_rate": 4.340954309641357e-05, "loss": 0.1318, "step": 49580 }, { "epoch": 1.8020931753761174, "grad_norm": 1.546189785003662, "learning_rate": 4.340603178183681e-05, "loss": 0.1106, "step": 49590 }, { "epoch": 1.8024565738789158, "grad_norm": 0.6207401752471924, "learning_rate": 4.340251967420963e-05, "loss": 0.1309, "step": 49600 }, { "epoch": 1.8028199723817138, "grad_norm": 0.7735322713851929, "learning_rate": 4.339900677368335e-05, "loss": 0.1116, "step": 49610 }, { "epoch": 1.803183370884512, "grad_norm": 0.30109134316444397, "learning_rate": 4.3395493080409335e-05, "loss": 0.1424, "step": 49620 }, { "epoch": 1.8035467693873102, "grad_norm": 0.9261472225189209, "learning_rate": 4.339197859453897e-05, "loss": 0.1243, "step": 49630 }, { "epoch": 1.8039101678901082, "grad_norm": 1.4092273712158203, "learning_rate": 4.3388463316223696e-05, "loss": 0.0978, "step": 49640 }, { "epoch": 1.8042735663929066, "grad_norm": 1.4663946628570557, "learning_rate": 4.338494724561496e-05, "loss": 0.1159, "step": 49650 }, { "epoch": 1.8046369648957046, "grad_norm": 0.9966018795967102, "learning_rate": 4.338143038286425e-05, "loss": 0.1606, "step": 49660 }, { "epoch": 1.8050003633985028, "grad_norm": 1.0136394500732422, "learning_rate": 4.3377912728123124e-05, "loss": 0.147, "step": 49670 }, { "epoch": 1.805363761901301, "grad_norm": 0.647540807723999, "learning_rate": 4.337439428154312e-05, "loss": 3.8271, "step": 49680 }, { "epoch": 1.805727160404099, "grad_norm": 0.9579293131828308, "learning_rate": 4.3370875043275835e-05, "loss": 0.1021, "step": 49690 }, { "epoch": 1.8060905589068974, "grad_norm": 0.9937068819999695, "learning_rate": 4.3367355013472924e-05, "loss": 0.1295, "step": 49700 }, { "epoch": 1.8064539574096954, "grad_norm": 0.7757039070129395, "learning_rate": 4.3363834192286026e-05, "loss": 0.1347, "step": 49710 }, { "epoch": 1.8068173559124936, "grad_norm": 0.4056714177131653, "learning_rate": 4.336031257986685e-05, "loss": 0.1225, "step": 49720 }, { "epoch": 1.8071807544152918, "grad_norm": 0.7865206003189087, "learning_rate": 4.335679017636714e-05, "loss": 0.1149, "step": 49730 }, { "epoch": 1.80754415291809, "grad_norm": 0.8593509793281555, "learning_rate": 4.335326698193864e-05, "loss": 0.2783, "step": 49740 }, { "epoch": 1.8079075514208882, "grad_norm": 1.0015538930892944, "learning_rate": 4.334974299673318e-05, "loss": 0.1143, "step": 49750 }, { "epoch": 1.8082709499236862, "grad_norm": 0.6025313138961792, "learning_rate": 4.334621822090258e-05, "loss": 0.0951, "step": 49760 }, { "epoch": 1.8086343484264846, "grad_norm": 0.5842722058296204, "learning_rate": 4.33426926545987e-05, "loss": 0.1468, "step": 49770 }, { "epoch": 1.8089977469292826, "grad_norm": 0.5118249654769897, "learning_rate": 4.333916629797348e-05, "loss": 0.1095, "step": 49780 }, { "epoch": 1.8093611454320808, "grad_norm": 2.118723154067993, "learning_rate": 4.333563915117882e-05, "loss": 0.1106, "step": 49790 }, { "epoch": 1.809724543934879, "grad_norm": 0.7443241477012634, "learning_rate": 4.3332111214366714e-05, "loss": 0.1103, "step": 49800 }, { "epoch": 1.809724543934879, "eval_loss": 0.3337153196334839, "eval_runtime": 180.2527, "eval_samples_per_second": 41.131, "eval_steps_per_second": 5.143, "eval_wer": 0.1598834570769873, "step": 49800 }, { "epoch": 1.810087942437677, "grad_norm": 1.2676368951797485, "learning_rate": 4.332858248768916e-05, "loss": 0.1016, "step": 49810 }, { "epoch": 1.8104513409404754, "grad_norm": 2.088113307952881, "learning_rate": 4.3325052971298195e-05, "loss": 0.1931, "step": 49820 }, { "epoch": 1.8108147394432734, "grad_norm": 0.6147329807281494, "learning_rate": 4.332152266534591e-05, "loss": 0.0989, "step": 49830 }, { "epoch": 1.8111781379460716, "grad_norm": 1.3880411386489868, "learning_rate": 4.3317991569984384e-05, "loss": 0.1297, "step": 49840 }, { "epoch": 1.8115415364488698, "grad_norm": 1.794396996498108, "learning_rate": 4.331445968536579e-05, "loss": 0.1191, "step": 49850 }, { "epoch": 1.811904934951668, "grad_norm": 1.7588627338409424, "learning_rate": 4.331092701164229e-05, "loss": 0.1594, "step": 49860 }, { "epoch": 1.8122683334544663, "grad_norm": 0.9384113550186157, "learning_rate": 4.330739354896609e-05, "loss": 0.1634, "step": 49870 }, { "epoch": 1.8126317319572642, "grad_norm": 0.5903241634368896, "learning_rate": 4.330385929748945e-05, "loss": 0.1436, "step": 49880 }, { "epoch": 1.8129951304600627, "grad_norm": 0.6767405271530151, "learning_rate": 4.330032425736462e-05, "loss": 0.1139, "step": 49890 }, { "epoch": 1.8133585289628606, "grad_norm": 0.8860883116722107, "learning_rate": 4.329678842874395e-05, "loss": 0.1279, "step": 49900 }, { "epoch": 1.8137219274656589, "grad_norm": 58.24811553955078, "learning_rate": 4.3293251811779755e-05, "loss": 0.5721, "step": 49910 }, { "epoch": 1.814085325968457, "grad_norm": 0.965320348739624, "learning_rate": 4.328971440662443e-05, "loss": 0.1297, "step": 49920 }, { "epoch": 1.814448724471255, "grad_norm": 0.5799686908721924, "learning_rate": 4.328617621343039e-05, "loss": 0.1166, "step": 49930 }, { "epoch": 1.8148121229740535, "grad_norm": 2.6377480030059814, "learning_rate": 4.3282637232350074e-05, "loss": 0.0991, "step": 49940 }, { "epoch": 1.8151755214768515, "grad_norm": 0.9475420117378235, "learning_rate": 4.327909746353597e-05, "loss": 0.119, "step": 49950 }, { "epoch": 1.8155389199796497, "grad_norm": 0.36662977933883667, "learning_rate": 4.32755569071406e-05, "loss": 0.1224, "step": 49960 }, { "epoch": 1.8159023184824479, "grad_norm": 1.361423373222351, "learning_rate": 4.3272015563316506e-05, "loss": 0.1463, "step": 49970 }, { "epoch": 1.8162657169852459, "grad_norm": 1.3705862760543823, "learning_rate": 4.326847343221627e-05, "loss": 0.9571, "step": 49980 }, { "epoch": 1.8166291154880443, "grad_norm": 1.0312581062316895, "learning_rate": 4.326493051399251e-05, "loss": 0.1055, "step": 49990 }, { "epoch": 1.8169925139908423, "grad_norm": 1.8485617637634277, "learning_rate": 4.326138680879789e-05, "loss": 0.1521, "step": 50000 }, { "epoch": 1.8173559124936405, "grad_norm": 0.8536475300788879, "learning_rate": 4.325784231678508e-05, "loss": 0.1067, "step": 50010 }, { "epoch": 1.8177193109964387, "grad_norm": 0.8633929491043091, "learning_rate": 4.325429703810681e-05, "loss": 0.1326, "step": 50020 }, { "epoch": 1.818082709499237, "grad_norm": 0.9228955507278442, "learning_rate": 4.325075097291582e-05, "loss": 0.1123, "step": 50030 }, { "epoch": 1.818446108002035, "grad_norm": 0.6627784967422485, "learning_rate": 4.324720412136491e-05, "loss": 0.1049, "step": 50040 }, { "epoch": 1.818809506504833, "grad_norm": 2.006086826324463, "learning_rate": 4.324365648360691e-05, "loss": 0.1699, "step": 50050 }, { "epoch": 1.8191729050076315, "grad_norm": 0.8955428600311279, "learning_rate": 4.3240108059794646e-05, "loss": 0.1357, "step": 50060 }, { "epoch": 1.8195363035104295, "grad_norm": 164.89707946777344, "learning_rate": 4.3236558850081036e-05, "loss": 3.7378, "step": 50070 }, { "epoch": 1.8198997020132277, "grad_norm": 1.0500569343566895, "learning_rate": 4.3233008854618994e-05, "loss": 0.3275, "step": 50080 }, { "epoch": 1.820263100516026, "grad_norm": 23.444902420043945, "learning_rate": 4.3229458073561466e-05, "loss": 0.1081, "step": 50090 }, { "epoch": 1.820626499018824, "grad_norm": 2.446769952774048, "learning_rate": 4.322590650706145e-05, "loss": 0.1738, "step": 50100 }, { "epoch": 1.8209898975216223, "grad_norm": 4.417498588562012, "learning_rate": 4.322235415527198e-05, "loss": 0.1196, "step": 50110 }, { "epoch": 1.8213532960244203, "grad_norm": 0.8139522671699524, "learning_rate": 4.321880101834609e-05, "loss": 0.1339, "step": 50120 }, { "epoch": 1.8217166945272185, "grad_norm": 0.6750831007957458, "learning_rate": 4.32152470964369e-05, "loss": 0.1169, "step": 50130 }, { "epoch": 1.8220800930300167, "grad_norm": 0.6926230192184448, "learning_rate": 4.3211692389697514e-05, "loss": 0.1192, "step": 50140 }, { "epoch": 1.822443491532815, "grad_norm": 0.5890200138092041, "learning_rate": 4.3208136898281106e-05, "loss": 0.1431, "step": 50150 }, { "epoch": 1.8228068900356131, "grad_norm": 1.0174696445465088, "learning_rate": 4.3204580622340865e-05, "loss": 0.7309, "step": 50160 }, { "epoch": 1.8231702885384111, "grad_norm": 0.9049250483512878, "learning_rate": 4.320102356203001e-05, "loss": 0.1451, "step": 50170 }, { "epoch": 1.8235336870412096, "grad_norm": 0.7444465160369873, "learning_rate": 4.3197465717501815e-05, "loss": 0.1111, "step": 50180 }, { "epoch": 1.8238970855440075, "grad_norm": 1.0209647417068481, "learning_rate": 4.319390708890957e-05, "loss": 0.0911, "step": 50190 }, { "epoch": 1.8242604840468057, "grad_norm": 0.7396380305290222, "learning_rate": 4.31903476764066e-05, "loss": 0.1367, "step": 50200 }, { "epoch": 1.824623882549604, "grad_norm": 0.7910483479499817, "learning_rate": 4.318678748014626e-05, "loss": 0.0978, "step": 50210 }, { "epoch": 1.824987281052402, "grad_norm": 0.5519534349441528, "learning_rate": 4.318322650028197e-05, "loss": 0.1502, "step": 50220 }, { "epoch": 1.8253506795552004, "grad_norm": 0.7698003053665161, "learning_rate": 4.317966473696714e-05, "loss": 0.1257, "step": 50230 }, { "epoch": 1.8257140780579983, "grad_norm": 0.6308223605155945, "learning_rate": 4.3176102190355246e-05, "loss": 0.1048, "step": 50240 }, { "epoch": 1.8260774765607966, "grad_norm": 0.7210573554039001, "learning_rate": 4.317253886059978e-05, "loss": 2.1908, "step": 50250 }, { "epoch": 1.8264408750635948, "grad_norm": 1.0156967639923096, "learning_rate": 4.316897474785426e-05, "loss": 0.1612, "step": 50260 }, { "epoch": 1.8268042735663927, "grad_norm": 1.366592288017273, "learning_rate": 4.3165409852272276e-05, "loss": 0.1524, "step": 50270 }, { "epoch": 1.8271676720691912, "grad_norm": 1.1312644481658936, "learning_rate": 4.3161844174007406e-05, "loss": 0.1118, "step": 50280 }, { "epoch": 1.8275310705719892, "grad_norm": 0.3259322941303253, "learning_rate": 4.3158277713213295e-05, "loss": 0.1043, "step": 50290 }, { "epoch": 1.8278944690747874, "grad_norm": 0.8572397828102112, "learning_rate": 4.31547104700436e-05, "loss": 0.1346, "step": 50300 }, { "epoch": 1.8282578675775856, "grad_norm": 1.2105157375335693, "learning_rate": 4.3151142444652035e-05, "loss": 0.1135, "step": 50310 }, { "epoch": 1.8286212660803838, "grad_norm": 0.6126898527145386, "learning_rate": 4.314757363719232e-05, "loss": 0.1719, "step": 50320 }, { "epoch": 1.828984664583182, "grad_norm": 0.7111977338790894, "learning_rate": 4.314400404781822e-05, "loss": 0.1089, "step": 50330 }, { "epoch": 1.82934806308598, "grad_norm": 1.016365647315979, "learning_rate": 4.314043367668355e-05, "loss": 0.0869, "step": 50340 }, { "epoch": 1.8297114615887784, "grad_norm": 1.1696245670318604, "learning_rate": 4.3136862523942136e-05, "loss": 0.1241, "step": 50350 }, { "epoch": 1.8300748600915764, "grad_norm": 1.2339487075805664, "learning_rate": 4.313329058974784e-05, "loss": 0.1202, "step": 50360 }, { "epoch": 1.8304382585943746, "grad_norm": 1.6147994995117188, "learning_rate": 4.312971787425458e-05, "loss": 0.1502, "step": 50370 }, { "epoch": 1.8308016570971728, "grad_norm": 0.8028876781463623, "learning_rate": 4.312614437761628e-05, "loss": 0.1153, "step": 50380 }, { "epoch": 1.8311650555999708, "grad_norm": 0.6366049647331238, "learning_rate": 4.312257009998691e-05, "loss": 0.1367, "step": 50390 }, { "epoch": 1.8315284541027692, "grad_norm": 1.6343673467636108, "learning_rate": 4.311899504152047e-05, "loss": 0.1502, "step": 50400 }, { "epoch": 1.8315284541027692, "eval_loss": 0.34016337990760803, "eval_runtime": 180.2837, "eval_samples_per_second": 41.124, "eval_steps_per_second": 5.142, "eval_wer": 0.16422204875923538, "step": 50400 }, { "epoch": 1.8318918526055672, "grad_norm": 0.5833923816680908, "learning_rate": 4.311541920237101e-05, "loss": 0.1262, "step": 50410 }, { "epoch": 1.8322552511083654, "grad_norm": 0.5683671832084656, "learning_rate": 4.311184258269258e-05, "loss": 0.1626, "step": 50420 }, { "epoch": 1.8326186496111636, "grad_norm": 2.4269814491271973, "learning_rate": 4.3108265182639304e-05, "loss": 0.1293, "step": 50430 }, { "epoch": 1.8329820481139618, "grad_norm": 0.8372895121574402, "learning_rate": 4.310468700236532e-05, "loss": 0.1004, "step": 50440 }, { "epoch": 1.83334544661676, "grad_norm": 0.7744470238685608, "learning_rate": 4.3101108042024776e-05, "loss": 0.126, "step": 50450 }, { "epoch": 1.833708845119558, "grad_norm": 1.1404407024383545, "learning_rate": 4.3097528301771895e-05, "loss": 0.1267, "step": 50460 }, { "epoch": 1.8340722436223564, "grad_norm": 0.4898841381072998, "learning_rate": 4.30939477817609e-05, "loss": 0.1353, "step": 50470 }, { "epoch": 1.8344356421251544, "grad_norm": 0.949220597743988, "learning_rate": 4.3090366482146085e-05, "loss": 0.1188, "step": 50480 }, { "epoch": 1.8347990406279526, "grad_norm": 0.2803521156311035, "learning_rate": 4.308678440308175e-05, "loss": 0.0978, "step": 50490 }, { "epoch": 1.8351624391307508, "grad_norm": 1.3130167722702026, "learning_rate": 4.308320154472221e-05, "loss": 0.1228, "step": 50500 }, { "epoch": 1.8355258376335488, "grad_norm": 1.2615669965744019, "learning_rate": 4.307961790722187e-05, "loss": 0.1146, "step": 50510 }, { "epoch": 1.8358892361363472, "grad_norm": 1.296720266342163, "learning_rate": 4.307603349073512e-05, "loss": 0.1497, "step": 50520 }, { "epoch": 1.8362526346391452, "grad_norm": 0.7248766422271729, "learning_rate": 4.30724482954164e-05, "loss": 2.0, "step": 50530 }, { "epoch": 1.8366160331419434, "grad_norm": 0.9493942260742188, "learning_rate": 4.306886232142018e-05, "loss": 0.1092, "step": 50540 }, { "epoch": 1.8369794316447416, "grad_norm": 0.8784381151199341, "learning_rate": 4.306527556890097e-05, "loss": 0.1332, "step": 50550 }, { "epoch": 1.8373428301475396, "grad_norm": 1.1002815961837769, "learning_rate": 4.306168803801332e-05, "loss": 0.1166, "step": 50560 }, { "epoch": 1.837706228650338, "grad_norm": 1.0042141675949097, "learning_rate": 4.3058099728911795e-05, "loss": 0.1494, "step": 50570 }, { "epoch": 1.838069627153136, "grad_norm": 0.8974900841712952, "learning_rate": 4.305451064175101e-05, "loss": 0.1281, "step": 50580 }, { "epoch": 1.8384330256559342, "grad_norm": 6.133358001708984, "learning_rate": 4.3050920776685587e-05, "loss": 0.1136, "step": 50590 }, { "epoch": 1.8387964241587325, "grad_norm": 1.828365445137024, "learning_rate": 4.304733013387022e-05, "loss": 0.1634, "step": 50600 }, { "epoch": 1.8391598226615307, "grad_norm": 1.1756354570388794, "learning_rate": 4.3043738713459605e-05, "loss": 0.1039, "step": 50610 }, { "epoch": 1.8395232211643289, "grad_norm": 0.3747727870941162, "learning_rate": 4.304014651560849e-05, "loss": 0.1966, "step": 50620 }, { "epoch": 1.8398866196671269, "grad_norm": 2.3143324851989746, "learning_rate": 4.303655354047166e-05, "loss": 0.1294, "step": 50630 }, { "epoch": 1.8402500181699253, "grad_norm": 0.408648818731308, "learning_rate": 4.3032959788203894e-05, "loss": 0.1097, "step": 50640 }, { "epoch": 1.8406134166727233, "grad_norm": 2.8158206939697266, "learning_rate": 4.3029365258960065e-05, "loss": 0.1418, "step": 50650 }, { "epoch": 1.8409768151755215, "grad_norm": 1.1815099716186523, "learning_rate": 4.3025769952895036e-05, "loss": 0.1053, "step": 50660 }, { "epoch": 1.8413402136783197, "grad_norm": 0.5927807688713074, "learning_rate": 4.3022173870163705e-05, "loss": 0.1731, "step": 50670 }, { "epoch": 1.8417036121811177, "grad_norm": 0.945854663848877, "learning_rate": 4.301857701092103e-05, "loss": 0.1355, "step": 50680 }, { "epoch": 1.842067010683916, "grad_norm": 1.1901466846466064, "learning_rate": 4.301497937532199e-05, "loss": 0.1237, "step": 50690 }, { "epoch": 1.842430409186714, "grad_norm": 2.328880548477173, "learning_rate": 4.301138096352158e-05, "loss": 0.184, "step": 50700 }, { "epoch": 1.8427938076895123, "grad_norm": 1.652759075164795, "learning_rate": 4.3007781775674846e-05, "loss": 0.1085, "step": 50710 }, { "epoch": 1.8431572061923105, "grad_norm": 0.7879970669746399, "learning_rate": 4.3004181811936874e-05, "loss": 0.1291, "step": 50720 }, { "epoch": 1.8435206046951087, "grad_norm": 0.9201347827911377, "learning_rate": 4.3000581072462765e-05, "loss": 0.1069, "step": 50730 }, { "epoch": 1.843884003197907, "grad_norm": 0.4384756088256836, "learning_rate": 4.299697955740766e-05, "loss": 0.0966, "step": 50740 }, { "epoch": 1.844247401700705, "grad_norm": 1.022146463394165, "learning_rate": 4.299337726692674e-05, "loss": 0.4218, "step": 50750 }, { "epoch": 1.8446108002035033, "grad_norm": 1.298736810684204, "learning_rate": 4.298977420117521e-05, "loss": 0.1266, "step": 50760 }, { "epoch": 1.8449741987063013, "grad_norm": 0.3361910283565521, "learning_rate": 4.2986170360308324e-05, "loss": 0.1357, "step": 50770 }, { "epoch": 1.8453375972090995, "grad_norm": 1.3413841724395752, "learning_rate": 4.298256574448135e-05, "loss": 0.1116, "step": 50780 }, { "epoch": 1.8457009957118977, "grad_norm": 1.324555516242981, "learning_rate": 4.2978960353849606e-05, "loss": 0.1171, "step": 50790 }, { "epoch": 1.8460643942146957, "grad_norm": 2.6282169818878174, "learning_rate": 4.297535418856843e-05, "loss": 0.1341, "step": 50800 }, { "epoch": 1.8464277927174941, "grad_norm": 1.0654820203781128, "learning_rate": 4.297174724879319e-05, "loss": 0.1358, "step": 50810 }, { "epoch": 1.8467911912202921, "grad_norm": 0.45108261704444885, "learning_rate": 4.296813953467931e-05, "loss": 0.21, "step": 50820 }, { "epoch": 1.8471545897230903, "grad_norm": 0.4607618749141693, "learning_rate": 4.296453104638222e-05, "loss": 0.1004, "step": 50830 }, { "epoch": 1.8475179882258885, "grad_norm": 1.834341049194336, "learning_rate": 4.296092178405741e-05, "loss": 0.1092, "step": 50840 }, { "epoch": 1.8478813867286865, "grad_norm": 2.6476006507873535, "learning_rate": 4.295731174786039e-05, "loss": 0.1255, "step": 50850 }, { "epoch": 1.848244785231485, "grad_norm": 0.6069791316986084, "learning_rate": 4.2953700937946696e-05, "loss": 0.1166, "step": 50860 }, { "epoch": 1.848608183734283, "grad_norm": 0.9340389966964722, "learning_rate": 4.2950089354471915e-05, "loss": 0.1548, "step": 50870 }, { "epoch": 1.8489715822370811, "grad_norm": 1.0438172817230225, "learning_rate": 4.294647699759163e-05, "loss": 0.1996, "step": 50880 }, { "epoch": 1.8493349807398793, "grad_norm": 0.9623711109161377, "learning_rate": 4.294286386746152e-05, "loss": 0.11, "step": 50890 }, { "epoch": 1.8496983792426775, "grad_norm": 0.7084048986434937, "learning_rate": 4.2939249964237246e-05, "loss": 0.1411, "step": 50900 }, { "epoch": 1.8500617777454758, "grad_norm": 2.6798365116119385, "learning_rate": 4.293563528807453e-05, "loss": 0.1253, "step": 50910 }, { "epoch": 1.8504251762482737, "grad_norm": 1.17062246799469, "learning_rate": 4.2932019839129087e-05, "loss": 0.1676, "step": 50920 }, { "epoch": 1.8507885747510722, "grad_norm": 3.6233065128326416, "learning_rate": 4.2928403617556714e-05, "loss": 0.1128, "step": 50930 }, { "epoch": 1.8511519732538702, "grad_norm": 1.1630785465240479, "learning_rate": 4.2924786623513225e-05, "loss": 0.084, "step": 50940 }, { "epoch": 1.8515153717566684, "grad_norm": 1.654990553855896, "learning_rate": 4.292116885715446e-05, "loss": 0.1527, "step": 50950 }, { "epoch": 1.8518787702594666, "grad_norm": 1.1175593137741089, "learning_rate": 4.291755031863628e-05, "loss": 0.1132, "step": 50960 }, { "epoch": 1.8522421687622646, "grad_norm": 0.46370136737823486, "learning_rate": 4.291393100811462e-05, "loss": 0.1335, "step": 50970 }, { "epoch": 1.852605567265063, "grad_norm": 1.1095346212387085, "learning_rate": 4.2910310925745404e-05, "loss": 0.1024, "step": 50980 }, { "epoch": 1.852968965767861, "grad_norm": 1.7305604219436646, "learning_rate": 4.290669007168462e-05, "loss": 0.0964, "step": 50990 }, { "epoch": 1.8533323642706592, "grad_norm": 0.5683947801589966, "learning_rate": 4.290306844608827e-05, "loss": 0.1324, "step": 51000 }, { "epoch": 1.8533323642706592, "eval_loss": 0.35921338200569153, "eval_runtime": 179.6912, "eval_samples_per_second": 41.26, "eval_steps_per_second": 5.159, "eval_wer": 0.164648646686151, "step": 51000 }, { "epoch": 1.8536957627734574, "grad_norm": 0.417925089597702, "learning_rate": 4.289944604911239e-05, "loss": 0.156, "step": 51010 }, { "epoch": 1.8540591612762556, "grad_norm": 0.3907199501991272, "learning_rate": 4.2895822880913076e-05, "loss": 0.1322, "step": 51020 }, { "epoch": 1.8544225597790538, "grad_norm": 0.8322422504425049, "learning_rate": 4.2892198941646436e-05, "loss": 0.1278, "step": 51030 }, { "epoch": 1.8547859582818518, "grad_norm": 1.3560541868209839, "learning_rate": 4.2888574231468595e-05, "loss": 0.0933, "step": 51040 }, { "epoch": 1.8551493567846502, "grad_norm": 1.353043556213379, "learning_rate": 4.288494875053573e-05, "loss": 0.1185, "step": 51050 }, { "epoch": 1.8555127552874482, "grad_norm": 7.476738929748535, "learning_rate": 4.2881322499004076e-05, "loss": 0.127, "step": 51060 }, { "epoch": 1.8558761537902464, "grad_norm": 0.5096439719200134, "learning_rate": 4.2877695477029844e-05, "loss": 0.1768, "step": 51070 }, { "epoch": 1.8562395522930446, "grad_norm": 2.3075900077819824, "learning_rate": 4.2874067684769325e-05, "loss": 0.1288, "step": 51080 }, { "epoch": 1.8566029507958426, "grad_norm": 0.615508496761322, "learning_rate": 4.287043912237883e-05, "loss": 0.0903, "step": 51090 }, { "epoch": 1.856966349298641, "grad_norm": 2.022796154022217, "learning_rate": 4.286680979001469e-05, "loss": 0.1372, "step": 51100 }, { "epoch": 1.857329747801439, "grad_norm": 0.8577511310577393, "learning_rate": 4.28631796878333e-05, "loss": 0.1216, "step": 51110 }, { "epoch": 1.8576931463042372, "grad_norm": 0.5876504182815552, "learning_rate": 4.285954881599104e-05, "loss": 0.1672, "step": 51120 }, { "epoch": 1.8580565448070354, "grad_norm": 1.0467904806137085, "learning_rate": 4.2855917174644374e-05, "loss": 0.1175, "step": 51130 }, { "epoch": 1.8584199433098334, "grad_norm": 0.6273336410522461, "learning_rate": 4.285228476394977e-05, "loss": 0.1048, "step": 51140 }, { "epoch": 1.8587833418126318, "grad_norm": 0.7880851030349731, "learning_rate": 4.284865158406372e-05, "loss": 0.1418, "step": 51150 }, { "epoch": 1.8591467403154298, "grad_norm": 1.316256046295166, "learning_rate": 4.284501763514279e-05, "loss": 0.1373, "step": 51160 }, { "epoch": 1.859510138818228, "grad_norm": 0.46383532881736755, "learning_rate": 4.284138291734355e-05, "loss": 0.167, "step": 51170 }, { "epoch": 1.8598735373210262, "grad_norm": 0.7432321310043335, "learning_rate": 4.283774743082259e-05, "loss": 0.1534, "step": 51180 }, { "epoch": 1.8602369358238244, "grad_norm": 0.673897922039032, "learning_rate": 4.2834111175736555e-05, "loss": 0.1122, "step": 51190 }, { "epoch": 1.8606003343266226, "grad_norm": 0.7635305523872375, "learning_rate": 4.2830474152242136e-05, "loss": 0.1054, "step": 51200 }, { "epoch": 1.8609637328294206, "grad_norm": 1.281503677368164, "learning_rate": 4.282683636049602e-05, "loss": 0.1175, "step": 51210 }, { "epoch": 1.861327131332219, "grad_norm": 1.249510407447815, "learning_rate": 4.282319780065496e-05, "loss": 0.1624, "step": 51220 }, { "epoch": 1.861690529835017, "grad_norm": 1.7470375299453735, "learning_rate": 4.281955847287571e-05, "loss": 0.1173, "step": 51230 }, { "epoch": 1.8620539283378152, "grad_norm": 0.4860547184944153, "learning_rate": 4.2815918377315096e-05, "loss": 0.1109, "step": 51240 }, { "epoch": 1.8624173268406135, "grad_norm": 1.2206679582595825, "learning_rate": 4.281227751412995e-05, "loss": 0.139, "step": 51250 }, { "epoch": 1.8627807253434114, "grad_norm": 5.966470241546631, "learning_rate": 4.2808635883477134e-05, "loss": 0.0989, "step": 51260 }, { "epoch": 1.8631441238462099, "grad_norm": 0.6490101218223572, "learning_rate": 4.280499348551357e-05, "loss": 0.1876, "step": 51270 }, { "epoch": 1.8635075223490078, "grad_norm": 1.0723692178726196, "learning_rate": 4.280135032039618e-05, "loss": 0.1081, "step": 51280 }, { "epoch": 1.863870920851806, "grad_norm": 1.9690381288528442, "learning_rate": 4.2797706388281944e-05, "loss": 0.1112, "step": 51290 }, { "epoch": 1.8642343193546043, "grad_norm": 5.197057723999023, "learning_rate": 4.279406168932787e-05, "loss": 0.1374, "step": 51300 }, { "epoch": 1.8645977178574025, "grad_norm": 1.063194751739502, "learning_rate": 4.279041622369098e-05, "loss": 0.1221, "step": 51310 }, { "epoch": 1.8649611163602007, "grad_norm": 0.9989191889762878, "learning_rate": 4.2786769991528356e-05, "loss": 0.1602, "step": 51320 }, { "epoch": 1.8653245148629987, "grad_norm": 1.2675135135650635, "learning_rate": 4.278312299299711e-05, "loss": 0.1038, "step": 51330 }, { "epoch": 1.865687913365797, "grad_norm": 0.7109481692314148, "learning_rate": 4.277947522825435e-05, "loss": 0.1323, "step": 51340 }, { "epoch": 1.866051311868595, "grad_norm": 0.522361695766449, "learning_rate": 4.2775826697457265e-05, "loss": 0.1339, "step": 51350 }, { "epoch": 1.8664147103713933, "grad_norm": 0.6734838485717773, "learning_rate": 4.277217740076306e-05, "loss": 0.0991, "step": 51360 }, { "epoch": 1.8667781088741915, "grad_norm": 0.7802498936653137, "learning_rate": 4.276852733832897e-05, "loss": 0.1614, "step": 51370 }, { "epoch": 1.8671415073769895, "grad_norm": 0.7883875966072083, "learning_rate": 4.276487651031225e-05, "loss": 0.1093, "step": 51380 }, { "epoch": 1.867504905879788, "grad_norm": 2.097398519515991, "learning_rate": 4.27612249168702e-05, "loss": 0.0925, "step": 51390 }, { "epoch": 1.8678683043825859, "grad_norm": 2.9690377712249756, "learning_rate": 4.2757572558160176e-05, "loss": 0.1693, "step": 51400 }, { "epoch": 1.868231702885384, "grad_norm": 0.681706428527832, "learning_rate": 4.275391943433953e-05, "loss": 0.1045, "step": 51410 }, { "epoch": 1.8685951013881823, "grad_norm": 0.32064223289489746, "learning_rate": 4.275026554556566e-05, "loss": 0.1511, "step": 51420 }, { "epoch": 1.8689584998909803, "grad_norm": 0.30609723925590515, "learning_rate": 4.2746610891996006e-05, "loss": 0.1089, "step": 51430 }, { "epoch": 1.8693218983937787, "grad_norm": 0.5510174036026001, "learning_rate": 4.274295547378803e-05, "loss": 0.097, "step": 51440 }, { "epoch": 1.8696852968965767, "grad_norm": 1.0738519430160522, "learning_rate": 4.2739299291099233e-05, "loss": 0.1309, "step": 51450 }, { "epoch": 1.870048695399375, "grad_norm": 0.7475055456161499, "learning_rate": 4.2735642344087144e-05, "loss": 0.2601, "step": 51460 }, { "epoch": 1.8704120939021731, "grad_norm": 0.5625230669975281, "learning_rate": 4.273198463290934e-05, "loss": 0.1464, "step": 51470 }, { "epoch": 1.8707754924049713, "grad_norm": 1.03018057346344, "learning_rate": 4.2728326157723396e-05, "loss": 0.1346, "step": 51480 }, { "epoch": 1.8711388909077695, "grad_norm": 0.7019144892692566, "learning_rate": 4.272466691868696e-05, "loss": 0.1281, "step": 51490 }, { "epoch": 1.8715022894105675, "grad_norm": 1.6843324899673462, "learning_rate": 4.2721006915957695e-05, "loss": 0.8711, "step": 51500 }, { "epoch": 1.871865687913366, "grad_norm": 1.4415316581726074, "learning_rate": 4.271734614969329e-05, "loss": 0.1193, "step": 51510 }, { "epoch": 1.872229086416164, "grad_norm": 0.5036882162094116, "learning_rate": 4.271368462005148e-05, "loss": 0.1821, "step": 51520 }, { "epoch": 1.8725924849189621, "grad_norm": 80.8698959350586, "learning_rate": 4.2710022327190026e-05, "loss": 1.5666, "step": 51530 }, { "epoch": 1.8729558834217603, "grad_norm": 1.3852354288101196, "learning_rate": 4.2706359271266716e-05, "loss": 0.1154, "step": 51540 }, { "epoch": 1.8733192819245583, "grad_norm": 2.0087506771087646, "learning_rate": 4.270269545243939e-05, "loss": 0.1289, "step": 51550 }, { "epoch": 1.8736826804273568, "grad_norm": 18.74397087097168, "learning_rate": 4.26990308708659e-05, "loss": 0.166, "step": 51560 }, { "epoch": 1.8740460789301547, "grad_norm": 0.5033942461013794, "learning_rate": 4.2695365526704144e-05, "loss": 0.2648, "step": 51570 }, { "epoch": 1.874409477432953, "grad_norm": 0.7396907210350037, "learning_rate": 4.269169942011205e-05, "loss": 0.1235, "step": 51580 }, { "epoch": 1.8747728759357511, "grad_norm": 1.0756418704986572, "learning_rate": 4.2688032551247574e-05, "loss": 0.1113, "step": 51590 }, { "epoch": 1.8751362744385494, "grad_norm": 0.8277359008789062, "learning_rate": 4.268436492026871e-05, "loss": 0.1556, "step": 51600 }, { "epoch": 1.8751362744385494, "eval_loss": 0.3514460623264313, "eval_runtime": 180.5008, "eval_samples_per_second": 41.075, "eval_steps_per_second": 5.136, "eval_wer": 0.16721731079928115, "step": 51600 }, { "epoch": 1.8754996729413476, "grad_norm": 1.8270063400268555, "learning_rate": 4.268069652733349e-05, "loss": 0.1154, "step": 51610 }, { "epoch": 1.8758630714441455, "grad_norm": 1.2547001838684082, "learning_rate": 4.267702737259995e-05, "loss": 0.1256, "step": 51620 }, { "epoch": 1.876226469946944, "grad_norm": 2.664400100708008, "learning_rate": 4.26733574562262e-05, "loss": 0.1301, "step": 51630 }, { "epoch": 1.876589868449742, "grad_norm": 1.9625864028930664, "learning_rate": 4.266968677837037e-05, "loss": 0.1347, "step": 51640 }, { "epoch": 1.8769532669525402, "grad_norm": 1.5494035482406616, "learning_rate": 4.266601533919059e-05, "loss": 0.1537, "step": 51650 }, { "epoch": 1.8773166654553384, "grad_norm": 1.1424529552459717, "learning_rate": 4.2662343138845076e-05, "loss": 0.1212, "step": 51660 }, { "epoch": 1.8776800639581364, "grad_norm": 0.8773604035377502, "learning_rate": 4.265867017749203e-05, "loss": 0.2055, "step": 51670 }, { "epoch": 1.8780434624609348, "grad_norm": 0.9486932158470154, "learning_rate": 4.265499645528972e-05, "loss": 0.114, "step": 51680 }, { "epoch": 1.8784068609637328, "grad_norm": 0.5126560926437378, "learning_rate": 4.265132197239643e-05, "loss": 0.1068, "step": 51690 }, { "epoch": 1.878770259466531, "grad_norm": 1.4426672458648682, "learning_rate": 4.264764672897049e-05, "loss": 0.1268, "step": 51700 }, { "epoch": 1.8791336579693292, "grad_norm": 1.2795157432556152, "learning_rate": 4.264397072517023e-05, "loss": 0.1, "step": 51710 }, { "epoch": 1.8794970564721272, "grad_norm": 0.34891799092292786, "learning_rate": 4.2640293961154055e-05, "loss": 0.1683, "step": 51720 }, { "epoch": 1.8798604549749256, "grad_norm": 0.6939824223518372, "learning_rate": 4.2636616437080366e-05, "loss": 0.1131, "step": 51730 }, { "epoch": 1.8802238534777236, "grad_norm": 1.4243013858795166, "learning_rate": 4.2632938153107636e-05, "loss": 0.1019, "step": 51740 }, { "epoch": 1.8805872519805218, "grad_norm": 0.699863851070404, "learning_rate": 4.2629259109394335e-05, "loss": 0.1652, "step": 51750 }, { "epoch": 1.88095065048332, "grad_norm": 1.4008554220199585, "learning_rate": 4.2625579306098994e-05, "loss": 0.1062, "step": 51760 }, { "epoch": 1.8813140489861182, "grad_norm": 1.0460174083709717, "learning_rate": 4.2621898743380144e-05, "loss": 0.1216, "step": 51770 }, { "epoch": 1.8816774474889164, "grad_norm": 1.277803897857666, "learning_rate": 4.2618217421396375e-05, "loss": 1.8922, "step": 51780 }, { "epoch": 1.8820408459917144, "grad_norm": 2.2651615142822266, "learning_rate": 4.2614535340306314e-05, "loss": 0.1014, "step": 51790 }, { "epoch": 1.8824042444945128, "grad_norm": 0.8989794254302979, "learning_rate": 4.2610852500268586e-05, "loss": 0.1089, "step": 51800 }, { "epoch": 1.8827676429973108, "grad_norm": 0.9130983948707581, "learning_rate": 4.2607168901441885e-05, "loss": 0.1098, "step": 51810 }, { "epoch": 1.883131041500109, "grad_norm": 0.5734561085700989, "learning_rate": 4.260348454398493e-05, "loss": 0.1674, "step": 51820 }, { "epoch": 1.8834944400029072, "grad_norm": 0.9285537600517273, "learning_rate": 4.259979942805645e-05, "loss": 0.1197, "step": 51830 }, { "epoch": 1.8838578385057052, "grad_norm": 1.951344609260559, "learning_rate": 4.259611355381524e-05, "loss": 0.1009, "step": 51840 }, { "epoch": 1.8842212370085036, "grad_norm": 0.6593104004859924, "learning_rate": 4.2592426921420106e-05, "loss": 0.1121, "step": 51850 }, { "epoch": 1.8845846355113016, "grad_norm": 0.35744279623031616, "learning_rate": 4.258873953102987e-05, "loss": 0.1029, "step": 51860 }, { "epoch": 1.8849480340140998, "grad_norm": 0.7135227918624878, "learning_rate": 4.2585051382803455e-05, "loss": 0.153, "step": 51870 }, { "epoch": 1.885311432516898, "grad_norm": 1.8943212032318115, "learning_rate": 4.258136247689973e-05, "loss": 0.1079, "step": 51880 }, { "epoch": 1.8856748310196962, "grad_norm": 1.6160852909088135, "learning_rate": 4.2577672813477656e-05, "loss": 0.1259, "step": 51890 }, { "epoch": 1.8860382295224944, "grad_norm": 0.8660845160484314, "learning_rate": 4.25739823926962e-05, "loss": 0.1151, "step": 51900 }, { "epoch": 1.8864016280252924, "grad_norm": 1.074818730354309, "learning_rate": 4.2570291214714365e-05, "loss": 0.1181, "step": 51910 }, { "epoch": 1.8867650265280909, "grad_norm": 0.4410496950149536, "learning_rate": 4.2566599279691205e-05, "loss": 0.1296, "step": 51920 }, { "epoch": 1.8871284250308888, "grad_norm": 0.6509966254234314, "learning_rate": 4.2562906587785776e-05, "loss": 0.1096, "step": 51930 }, { "epoch": 1.887491823533687, "grad_norm": 0.8408392667770386, "learning_rate": 4.25592131391572e-05, "loss": 0.0968, "step": 51940 }, { "epoch": 1.8878552220364853, "grad_norm": 0.6419994831085205, "learning_rate": 4.25555189339646e-05, "loss": 0.1336, "step": 51950 }, { "epoch": 1.8882186205392832, "grad_norm": 1.4039828777313232, "learning_rate": 4.2551823972367156e-05, "loss": 0.1102, "step": 51960 }, { "epoch": 1.8885820190420817, "grad_norm": 1.1315640211105347, "learning_rate": 4.2548128254524066e-05, "loss": 0.1265, "step": 51970 }, { "epoch": 1.8889454175448797, "grad_norm": 0.6739282011985779, "learning_rate": 4.254443178059456e-05, "loss": 0.101, "step": 51980 }, { "epoch": 1.8893088160476779, "grad_norm": 0.9321909546852112, "learning_rate": 4.254073455073792e-05, "loss": 0.1093, "step": 51990 }, { "epoch": 1.889672214550476, "grad_norm": 8.199972152709961, "learning_rate": 4.2537036565113435e-05, "loss": 0.2084, "step": 52000 }, { "epoch": 1.890035613053274, "grad_norm": 0.5499328970909119, "learning_rate": 4.253333782388044e-05, "loss": 0.138, "step": 52010 }, { "epoch": 1.8903990115560725, "grad_norm": 1.2447484731674194, "learning_rate": 4.252963832719831e-05, "loss": 0.1353, "step": 52020 }, { "epoch": 1.8907624100588705, "grad_norm": 1.7561428546905518, "learning_rate": 4.252593807522642e-05, "loss": 0.1267, "step": 52030 }, { "epoch": 1.8911258085616687, "grad_norm": 0.6353381872177124, "learning_rate": 4.252223706812423e-05, "loss": 0.1105, "step": 52040 }, { "epoch": 1.8914892070644669, "grad_norm": 1.8371816873550415, "learning_rate": 4.251853530605118e-05, "loss": 0.1498, "step": 52050 }, { "epoch": 1.891852605567265, "grad_norm": 2.9866833686828613, "learning_rate": 4.251483278916678e-05, "loss": 0.1133, "step": 52060 }, { "epoch": 1.8922160040700633, "grad_norm": 0.9184136986732483, "learning_rate": 4.2511129517630555e-05, "loss": 0.114, "step": 52070 }, { "epoch": 1.8925794025728613, "grad_norm": 1.407132863998413, "learning_rate": 4.250742549160206e-05, "loss": 0.1055, "step": 52080 }, { "epoch": 1.8929428010756597, "grad_norm": 1.2703722715377808, "learning_rate": 4.250372071124089e-05, "loss": 0.1429, "step": 52090 }, { "epoch": 1.8933061995784577, "grad_norm": 1.260004997253418, "learning_rate": 4.2500385764093334e-05, "loss": 1.4537, "step": 52100 }, { "epoch": 1.893669598081256, "grad_norm": 0.9646703004837036, "learning_rate": 4.249667955093988e-05, "loss": 0.1465, "step": 52110 }, { "epoch": 1.894032996584054, "grad_norm": 0.7287250757217407, "learning_rate": 4.249297258391677e-05, "loss": 0.1387, "step": 52120 }, { "epoch": 1.894396395086852, "grad_norm": 1.042417049407959, "learning_rate": 4.24892648631837e-05, "loss": 0.1031, "step": 52130 }, { "epoch": 1.8947597935896505, "grad_norm": 0.9328198432922363, "learning_rate": 4.248555638890043e-05, "loss": 0.1324, "step": 52140 }, { "epoch": 1.8951231920924485, "grad_norm": 3.1417503356933594, "learning_rate": 4.2481847161226764e-05, "loss": 0.1798, "step": 52150 }, { "epoch": 1.8954865905952467, "grad_norm": 0.8507013916969299, "learning_rate": 4.247813718032249e-05, "loss": 0.1182, "step": 52160 }, { "epoch": 1.895849989098045, "grad_norm": 1.0147353410720825, "learning_rate": 4.247442644634748e-05, "loss": 0.1341, "step": 52170 }, { "epoch": 1.8962133876008431, "grad_norm": 2.492661476135254, "learning_rate": 4.2470714959461614e-05, "loss": 0.1109, "step": 52180 }, { "epoch": 1.8965767861036413, "grad_norm": 1.8121393918991089, "learning_rate": 4.246700271982479e-05, "loss": 0.1381, "step": 52190 }, { "epoch": 1.8969401846064393, "grad_norm": 1.2550605535507202, "learning_rate": 4.2463289727596965e-05, "loss": 0.1469, "step": 52200 }, { "epoch": 1.8969401846064393, "eval_loss": 0.3317066729068756, "eval_runtime": 179.6114, "eval_samples_per_second": 41.278, "eval_steps_per_second": 5.161, "eval_wer": 0.16958629077640822, "step": 52200 }, { "epoch": 1.8973035831092377, "grad_norm": 0.7204797863960266, "learning_rate": 4.245957598293813e-05, "loss": 0.1328, "step": 52210 }, { "epoch": 1.8976669816120357, "grad_norm": 0.5142366290092468, "learning_rate": 4.245586148600829e-05, "loss": 0.1457, "step": 52220 }, { "epoch": 1.898030380114834, "grad_norm": 0.8868045210838318, "learning_rate": 4.2452146236967474e-05, "loss": 0.1344, "step": 52230 }, { "epoch": 1.8983937786176321, "grad_norm": 0.7489217519760132, "learning_rate": 4.2448430235975777e-05, "loss": 0.1119, "step": 52240 }, { "epoch": 1.8987571771204301, "grad_norm": 1.2009568214416504, "learning_rate": 4.244471348319331e-05, "loss": 0.1503, "step": 52250 }, { "epoch": 1.8991205756232286, "grad_norm": 1.1081483364105225, "learning_rate": 4.24409959787802e-05, "loss": 0.1265, "step": 52260 }, { "epoch": 1.8994839741260265, "grad_norm": 0.42917948961257935, "learning_rate": 4.243727772289663e-05, "loss": 0.1825, "step": 52270 }, { "epoch": 1.8998473726288247, "grad_norm": 1.7099511623382568, "learning_rate": 4.2433558715702804e-05, "loss": 0.133, "step": 52280 }, { "epoch": 1.900210771131623, "grad_norm": 1.212544560432434, "learning_rate": 4.242983895735896e-05, "loss": 0.1306, "step": 52290 }, { "epoch": 1.900574169634421, "grad_norm": 0.48001641035079956, "learning_rate": 4.242611844802538e-05, "loss": 0.1739, "step": 52300 }, { "epoch": 1.9009375681372194, "grad_norm": 2.3596603870391846, "learning_rate": 4.242239718786235e-05, "loss": 0.129, "step": 52310 }, { "epoch": 1.9013009666400174, "grad_norm": 0.41326409578323364, "learning_rate": 4.241867517703022e-05, "loss": 0.1218, "step": 52320 }, { "epoch": 1.9016643651428156, "grad_norm": 0.9740013480186462, "learning_rate": 4.241495241568935e-05, "loss": 0.1248, "step": 52330 }, { "epoch": 1.9020277636456138, "grad_norm": 0.8275489807128906, "learning_rate": 4.2411228904000136e-05, "loss": 0.1067, "step": 52340 }, { "epoch": 1.902391162148412, "grad_norm": 9.790162086486816, "learning_rate": 4.240750464212303e-05, "loss": 0.1548, "step": 52350 }, { "epoch": 1.9027545606512102, "grad_norm": 3.4635374546051025, "learning_rate": 4.240377963021847e-05, "loss": 0.1157, "step": 52360 }, { "epoch": 1.9031179591540082, "grad_norm": 0.5103577971458435, "learning_rate": 4.2400053868446976e-05, "loss": 0.1356, "step": 52370 }, { "epoch": 1.9034813576568066, "grad_norm": 1.608657956123352, "learning_rate": 4.239632735696908e-05, "loss": 0.114, "step": 52380 }, { "epoch": 1.9038447561596046, "grad_norm": 0.9395160675048828, "learning_rate": 4.2392600095945324e-05, "loss": 0.1079, "step": 52390 }, { "epoch": 1.9042081546624028, "grad_norm": 1.1032116413116455, "learning_rate": 4.2388872085536314e-05, "loss": 0.1789, "step": 52400 }, { "epoch": 1.904571553165201, "grad_norm": 0.765036940574646, "learning_rate": 4.2385143325902675e-05, "loss": 0.0984, "step": 52410 }, { "epoch": 1.904934951667999, "grad_norm": 2.460920572280884, "learning_rate": 4.238141381720507e-05, "loss": 0.1201, "step": 52420 }, { "epoch": 1.9052983501707974, "grad_norm": 0.6005275845527649, "learning_rate": 4.237768355960418e-05, "loss": 0.1145, "step": 52430 }, { "epoch": 1.9056617486735954, "grad_norm": 0.606640636920929, "learning_rate": 4.2373952553260745e-05, "loss": 0.1204, "step": 52440 }, { "epoch": 1.9060251471763936, "grad_norm": 1.0981110334396362, "learning_rate": 4.237022079833551e-05, "loss": 0.1237, "step": 52450 }, { "epoch": 1.9063885456791918, "grad_norm": 1.2138440608978271, "learning_rate": 4.236648829498926e-05, "loss": 0.1027, "step": 52460 }, { "epoch": 1.90675194418199, "grad_norm": 0.38126930594444275, "learning_rate": 4.2362755043382816e-05, "loss": 0.1787, "step": 52470 }, { "epoch": 1.9071153426847882, "grad_norm": 1.1713272333145142, "learning_rate": 4.235902104367704e-05, "loss": 0.1098, "step": 52480 }, { "epoch": 1.9074787411875862, "grad_norm": 1.0597947835922241, "learning_rate": 4.235528629603282e-05, "loss": 0.1085, "step": 52490 }, { "epoch": 1.9078421396903846, "grad_norm": 0.5749408602714539, "learning_rate": 4.235155080061105e-05, "loss": 0.1295, "step": 52500 }, { "epoch": 1.9082055381931826, "grad_norm": 1.4702091217041016, "learning_rate": 4.234781455757269e-05, "loss": 0.1281, "step": 52510 }, { "epoch": 1.9085689366959808, "grad_norm": 0.586208164691925, "learning_rate": 4.234407756707873e-05, "loss": 0.1412, "step": 52520 }, { "epoch": 1.908932335198779, "grad_norm": 0.8572281002998352, "learning_rate": 4.2340339829290174e-05, "loss": 0.1059, "step": 52530 }, { "epoch": 1.909295733701577, "grad_norm": 0.7896180152893066, "learning_rate": 4.233660134436809e-05, "loss": 0.1144, "step": 52540 }, { "epoch": 1.9096591322043754, "grad_norm": 0.9928715825080872, "learning_rate": 4.233286211247351e-05, "loss": 0.1507, "step": 52550 }, { "epoch": 1.9100225307071734, "grad_norm": 1.1396877765655518, "learning_rate": 4.23291221337676e-05, "loss": 0.1306, "step": 52560 }, { "epoch": 1.9103859292099716, "grad_norm": 0.743976891040802, "learning_rate": 4.232538140841146e-05, "loss": 0.1839, "step": 52570 }, { "epoch": 1.9107493277127698, "grad_norm": 0.40765443444252014, "learning_rate": 4.232163993656628e-05, "loss": 0.1303, "step": 52580 }, { "epoch": 1.911112726215568, "grad_norm": 0.6623360514640808, "learning_rate": 4.231789771839326e-05, "loss": 0.1202, "step": 52590 }, { "epoch": 1.9114761247183663, "grad_norm": 0.9128944873809814, "learning_rate": 4.2314154754053656e-05, "loss": 0.827, "step": 52600 }, { "epoch": 1.9118395232211642, "grad_norm": 0.9086483716964722, "learning_rate": 4.231041104370872e-05, "loss": 0.1147, "step": 52610 }, { "epoch": 1.9122029217239624, "grad_norm": 1.0172945261001587, "learning_rate": 4.2306666587519765e-05, "loss": 0.1501, "step": 52620 }, { "epoch": 1.9125663202267607, "grad_norm": 1.759474277496338, "learning_rate": 4.2302921385648126e-05, "loss": 0.1389, "step": 52630 }, { "epoch": 1.9129297187295589, "grad_norm": 1.5807387828826904, "learning_rate": 4.229917543825517e-05, "loss": 0.1067, "step": 52640 }, { "epoch": 1.913293117232357, "grad_norm": 0.8961324095726013, "learning_rate": 4.2295428745502284e-05, "loss": 0.1204, "step": 52650 }, { "epoch": 1.913656515735155, "grad_norm": 1.3519996404647827, "learning_rate": 4.229168130755092e-05, "loss": 0.1163, "step": 52660 }, { "epoch": 1.9140199142379535, "grad_norm": 1.4970946311950684, "learning_rate": 4.2287933124562526e-05, "loss": 0.1532, "step": 52670 }, { "epoch": 1.9143833127407515, "grad_norm": 48.62047576904297, "learning_rate": 4.2284184196698615e-05, "loss": 0.474, "step": 52680 }, { "epoch": 1.9147467112435497, "grad_norm": 1.5001195669174194, "learning_rate": 4.22804345241207e-05, "loss": 0.1108, "step": 52690 }, { "epoch": 1.9151101097463479, "grad_norm": 0.6537098288536072, "learning_rate": 4.227668410699034e-05, "loss": 0.1457, "step": 52700 }, { "epoch": 1.9154735082491459, "grad_norm": 1.2610722780227661, "learning_rate": 4.227293294546914e-05, "loss": 0.1223, "step": 52710 }, { "epoch": 1.9158369067519443, "grad_norm": 1.0688477754592896, "learning_rate": 4.226918103971871e-05, "loss": 0.1319, "step": 52720 }, { "epoch": 1.9162003052547423, "grad_norm": 0.532785952091217, "learning_rate": 4.226542838990072e-05, "loss": 0.1355, "step": 52730 }, { "epoch": 1.9165637037575405, "grad_norm": 0.6391937136650085, "learning_rate": 4.226167499617684e-05, "loss": 0.1215, "step": 52740 }, { "epoch": 1.9169271022603387, "grad_norm": 0.9662737250328064, "learning_rate": 4.225792085870881e-05, "loss": 0.1522, "step": 52750 }, { "epoch": 1.917290500763137, "grad_norm": 1.3882033824920654, "learning_rate": 4.225416597765838e-05, "loss": 0.1234, "step": 52760 }, { "epoch": 1.917653899265935, "grad_norm": 1.2721084356307983, "learning_rate": 4.225041035318732e-05, "loss": 0.1645, "step": 52770 }, { "epoch": 1.918017297768733, "grad_norm": 1.06475830078125, "learning_rate": 4.224665398545745e-05, "loss": 0.1278, "step": 52780 }, { "epoch": 1.9183806962715315, "grad_norm": 1.8718911409378052, "learning_rate": 4.224289687463063e-05, "loss": 0.1081, "step": 52790 }, { "epoch": 1.9187440947743295, "grad_norm": 0.7336494326591492, "learning_rate": 4.223913902086874e-05, "loss": 0.1398, "step": 52800 }, { "epoch": 1.9187440947743295, "eval_loss": 0.33621227741241455, "eval_runtime": 180.2286, "eval_samples_per_second": 41.137, "eval_steps_per_second": 5.143, "eval_wer": 0.16169876740428776, "step": 52800 }, { "epoch": 1.9191074932771277, "grad_norm": 1.407049298286438, "learning_rate": 4.223538042433368e-05, "loss": 0.1219, "step": 52810 }, { "epoch": 1.919470891779926, "grad_norm": 0.27716466784477234, "learning_rate": 4.22316210851874e-05, "loss": 0.1303, "step": 52820 }, { "epoch": 1.919834290282724, "grad_norm": 1.0262128114700317, "learning_rate": 4.222786100359188e-05, "loss": 0.1053, "step": 52830 }, { "epoch": 1.9201976887855223, "grad_norm": 0.6818228960037231, "learning_rate": 4.222410017970913e-05, "loss": 0.09, "step": 52840 }, { "epoch": 1.9205610872883203, "grad_norm": 0.6761994361877441, "learning_rate": 4.2220338613701185e-05, "loss": 0.108, "step": 52850 }, { "epoch": 1.9209244857911185, "grad_norm": 4.313242435455322, "learning_rate": 4.2216576305730104e-05, "loss": 0.1262, "step": 52860 }, { "epoch": 1.9212878842939167, "grad_norm": 0.6098904609680176, "learning_rate": 4.221281325595803e-05, "loss": 0.2005, "step": 52870 }, { "epoch": 1.921651282796715, "grad_norm": 1.2213470935821533, "learning_rate": 4.2209049464547064e-05, "loss": 0.1088, "step": 52880 }, { "epoch": 1.9220146812995131, "grad_norm": 0.705827534198761, "learning_rate": 4.220528493165938e-05, "loss": 0.1207, "step": 52890 }, { "epoch": 1.9223780798023111, "grad_norm": 0.8161284327507019, "learning_rate": 4.22015196574572e-05, "loss": 0.1855, "step": 52900 }, { "epoch": 1.9227414783051093, "grad_norm": 0.7296738028526306, "learning_rate": 4.2197753642102734e-05, "loss": 0.1224, "step": 52910 }, { "epoch": 1.9231048768079075, "grad_norm": 1.1311039924621582, "learning_rate": 4.2193986885758255e-05, "loss": 0.1331, "step": 52920 }, { "epoch": 1.9234682753107057, "grad_norm": 1.0949995517730713, "learning_rate": 4.219021938858605e-05, "loss": 0.1172, "step": 52930 }, { "epoch": 1.923831673813504, "grad_norm": 2.3175034523010254, "learning_rate": 4.2186451150748465e-05, "loss": 0.1061, "step": 52940 }, { "epoch": 1.924195072316302, "grad_norm": 0.4657406806945801, "learning_rate": 4.2182682172407853e-05, "loss": 0.1099, "step": 52950 }, { "epoch": 1.9245584708191004, "grad_norm": 1.0153266191482544, "learning_rate": 4.2178912453726585e-05, "loss": 0.1028, "step": 52960 }, { "epoch": 1.9249218693218983, "grad_norm": 0.48774194717407227, "learning_rate": 4.217514199486712e-05, "loss": 0.1196, "step": 52970 }, { "epoch": 1.9252852678246966, "grad_norm": 0.5909627079963684, "learning_rate": 4.2171370795991886e-05, "loss": 3.5511, "step": 52980 }, { "epoch": 1.9256486663274948, "grad_norm": 1.0662988424301147, "learning_rate": 4.216759885726338e-05, "loss": 0.2095, "step": 52990 }, { "epoch": 1.9260120648302927, "grad_norm": 1.2562239170074463, "learning_rate": 4.2163826178844124e-05, "loss": 0.6666, "step": 53000 }, { "epoch": 1.9263754633330912, "grad_norm": 0.6966450214385986, "learning_rate": 4.216005276089666e-05, "loss": 0.1059, "step": 53010 }, { "epoch": 1.9267388618358892, "grad_norm": 0.7130870819091797, "learning_rate": 4.215627860358359e-05, "loss": 0.1822, "step": 53020 }, { "epoch": 1.9271022603386874, "grad_norm": 0.8667415380477905, "learning_rate": 4.215250370706752e-05, "loss": 0.1297, "step": 53030 }, { "epoch": 1.9274656588414856, "grad_norm": 0.8106217384338379, "learning_rate": 4.214872807151108e-05, "loss": 0.1198, "step": 53040 }, { "epoch": 1.9278290573442838, "grad_norm": 0.6625964045524597, "learning_rate": 4.214495169707697e-05, "loss": 0.1554, "step": 53050 }, { "epoch": 1.928192455847082, "grad_norm": 1.328296422958374, "learning_rate": 4.214117458392789e-05, "loss": 0.1275, "step": 53060 }, { "epoch": 1.92855585434988, "grad_norm": 0.5741416811943054, "learning_rate": 4.213739673222659e-05, "loss": 0.122, "step": 53070 }, { "epoch": 1.9289192528526784, "grad_norm": 0.6884883046150208, "learning_rate": 4.213361814213584e-05, "loss": 0.2229, "step": 53080 }, { "epoch": 1.9292826513554764, "grad_norm": 1.364357590675354, "learning_rate": 4.212983881381844e-05, "loss": 0.1169, "step": 53090 }, { "epoch": 1.9296460498582746, "grad_norm": 1.6540427207946777, "learning_rate": 4.2126058747437236e-05, "loss": 0.1273, "step": 53100 }, { "epoch": 1.9300094483610728, "grad_norm": 1.8838560581207275, "learning_rate": 4.21222779431551e-05, "loss": 0.1395, "step": 53110 }, { "epoch": 1.9303728468638708, "grad_norm": 1.0048059225082397, "learning_rate": 4.2118496401134925e-05, "loss": 0.1516, "step": 53120 }, { "epoch": 1.9307362453666692, "grad_norm": 1.0288422107696533, "learning_rate": 4.211471412153965e-05, "loss": 0.114, "step": 53130 }, { "epoch": 1.9310996438694672, "grad_norm": 0.8214828968048096, "learning_rate": 4.2110931104532236e-05, "loss": 0.1154, "step": 53140 }, { "epoch": 1.9314630423722654, "grad_norm": 1.7350075244903564, "learning_rate": 4.210714735027568e-05, "loss": 0.1351, "step": 53150 }, { "epoch": 1.9318264408750636, "grad_norm": 1.1846505403518677, "learning_rate": 4.210336285893302e-05, "loss": 0.1213, "step": 53160 }, { "epoch": 1.9321898393778618, "grad_norm": 0.36710694432258606, "learning_rate": 4.2099577630667295e-05, "loss": 0.1328, "step": 53170 }, { "epoch": 1.93255323788066, "grad_norm": 1.5242916345596313, "learning_rate": 4.209579166564162e-05, "loss": 0.1068, "step": 53180 }, { "epoch": 1.932916636383458, "grad_norm": 0.5341594219207764, "learning_rate": 4.209200496401911e-05, "loss": 0.1132, "step": 53190 }, { "epoch": 1.9332800348862562, "grad_norm": 1.32260000705719, "learning_rate": 4.2088217525962914e-05, "loss": 0.2021, "step": 53200 }, { "epoch": 1.9336434333890544, "grad_norm": 1.7666555643081665, "learning_rate": 4.208442935163622e-05, "loss": 0.1199, "step": 53210 }, { "epoch": 1.9340068318918526, "grad_norm": 0.7060844302177429, "learning_rate": 4.2080640441202265e-05, "loss": 0.2058, "step": 53220 }, { "epoch": 1.9343702303946508, "grad_norm": 0.6064701676368713, "learning_rate": 4.207685079482428e-05, "loss": 0.1163, "step": 53230 }, { "epoch": 1.9347336288974488, "grad_norm": 0.8445596694946289, "learning_rate": 4.2073060412665554e-05, "loss": 0.1094, "step": 53240 }, { "epoch": 1.9350970274002472, "grad_norm": 1.8160717487335205, "learning_rate": 4.20692692948894e-05, "loss": 0.1421, "step": 53250 }, { "epoch": 1.9354604259030452, "grad_norm": 0.8465480208396912, "learning_rate": 4.206547744165918e-05, "loss": 0.1151, "step": 53260 }, { "epoch": 1.9358238244058434, "grad_norm": 0.4361567795276642, "learning_rate": 4.206168485313823e-05, "loss": 0.1343, "step": 53270 }, { "epoch": 1.9361872229086416, "grad_norm": 0.4682723581790924, "learning_rate": 4.2057891529490004e-05, "loss": 0.2349, "step": 53280 }, { "epoch": 1.9365506214114396, "grad_norm": 0.7894558310508728, "learning_rate": 4.205409747087792e-05, "loss": 0.1211, "step": 53290 }, { "epoch": 1.936914019914238, "grad_norm": 1.9797241687774658, "learning_rate": 4.205030267746545e-05, "loss": 0.1403, "step": 53300 }, { "epoch": 1.937277418417036, "grad_norm": 0.7554487586021423, "learning_rate": 4.20465071494161e-05, "loss": 0.133, "step": 53310 }, { "epoch": 1.9376408169198343, "grad_norm": 0.5056400895118713, "learning_rate": 4.2042710886893414e-05, "loss": 0.1429, "step": 53320 }, { "epoch": 1.9380042154226325, "grad_norm": 4.12957763671875, "learning_rate": 4.203891389006096e-05, "loss": 0.1154, "step": 53330 }, { "epoch": 1.9383676139254307, "grad_norm": 0.7138916850090027, "learning_rate": 4.203511615908232e-05, "loss": 0.1235, "step": 53340 }, { "epoch": 1.9387310124282289, "grad_norm": 0.6101375818252563, "learning_rate": 4.2031317694121144e-05, "loss": 0.1015, "step": 53350 }, { "epoch": 1.9390944109310269, "grad_norm": 0.9244548082351685, "learning_rate": 4.202751849534108e-05, "loss": 0.104, "step": 53360 }, { "epoch": 1.9394578094338253, "grad_norm": 0.38235339522361755, "learning_rate": 4.202371856290583e-05, "loss": 0.2562, "step": 53370 }, { "epoch": 1.9398212079366233, "grad_norm": 1.2204453945159912, "learning_rate": 4.201991789697912e-05, "loss": 0.1074, "step": 53380 }, { "epoch": 1.9401846064394215, "grad_norm": 0.9025306105613708, "learning_rate": 4.2016116497724715e-05, "loss": 0.1154, "step": 53390 }, { "epoch": 1.9405480049422197, "grad_norm": 0.6132228970527649, "learning_rate": 4.201231436530637e-05, "loss": 0.1332, "step": 53400 }, { "epoch": 1.9405480049422197, "eval_loss": 0.34726399183273315, "eval_runtime": 180.5053, "eval_samples_per_second": 41.074, "eval_steps_per_second": 5.136, "eval_wer": 0.16638226804872294, "step": 53400 }, { "epoch": 1.9409114034450177, "grad_norm": 1.0227421522140503, "learning_rate": 4.2008511499887945e-05, "loss": 0.1042, "step": 53410 }, { "epoch": 1.941274801947816, "grad_norm": 1.9135148525238037, "learning_rate": 4.2004707901633274e-05, "loss": 0.1953, "step": 53420 }, { "epoch": 1.941638200450614, "grad_norm": 1.0358216762542725, "learning_rate": 4.200090357070624e-05, "loss": 0.1029, "step": 53430 }, { "epoch": 1.9420015989534123, "grad_norm": 0.9207081198692322, "learning_rate": 4.199709850727076e-05, "loss": 0.1171, "step": 53440 }, { "epoch": 1.9423649974562105, "grad_norm": 0.558474600315094, "learning_rate": 4.1993292711490784e-05, "loss": 0.1185, "step": 53450 }, { "epoch": 1.9427283959590087, "grad_norm": 1.7064687013626099, "learning_rate": 4.198948618353029e-05, "loss": 0.1429, "step": 53460 }, { "epoch": 1.943091794461807, "grad_norm": 0.4840683341026306, "learning_rate": 4.198567892355328e-05, "loss": 0.1566, "step": 53470 }, { "epoch": 1.943455192964605, "grad_norm": 2.152949810028076, "learning_rate": 4.19818709317238e-05, "loss": 0.11, "step": 53480 }, { "epoch": 1.943818591467403, "grad_norm": 1.0784387588500977, "learning_rate": 4.197806220820592e-05, "loss": 0.1211, "step": 53490 }, { "epoch": 1.9441819899702013, "grad_norm": 0.9039841890335083, "learning_rate": 4.197425275316376e-05, "loss": 0.1167, "step": 53500 }, { "epoch": 1.9445453884729995, "grad_norm": 0.8237749934196472, "learning_rate": 4.1970442566761436e-05, "loss": 0.1443, "step": 53510 }, { "epoch": 1.9449087869757977, "grad_norm": 0.5178882479667664, "learning_rate": 4.196663164916313e-05, "loss": 0.1251, "step": 53520 }, { "epoch": 1.9452721854785957, "grad_norm": 0.883787989616394, "learning_rate": 4.196282000053301e-05, "loss": 0.1078, "step": 53530 }, { "epoch": 1.9456355839813941, "grad_norm": 0.6376329064369202, "learning_rate": 4.195900762103535e-05, "loss": 0.1286, "step": 53540 }, { "epoch": 1.9459989824841921, "grad_norm": 1.3312426805496216, "learning_rate": 4.1955194510834394e-05, "loss": 0.1421, "step": 53550 }, { "epoch": 1.9463623809869903, "grad_norm": 0.8462713360786438, "learning_rate": 4.1951380670094424e-05, "loss": 0.1183, "step": 53560 }, { "epoch": 1.9467257794897885, "grad_norm": 0.4300178587436676, "learning_rate": 4.194756609897978e-05, "loss": 0.11, "step": 53570 }, { "epoch": 1.9470891779925865, "grad_norm": 0.5355455875396729, "learning_rate": 4.1943750797654816e-05, "loss": 0.1197, "step": 53580 }, { "epoch": 1.947452576495385, "grad_norm": 0.8750283122062683, "learning_rate": 4.193993476628391e-05, "loss": 0.1024, "step": 53590 }, { "epoch": 1.947815974998183, "grad_norm": 1.2552978992462158, "learning_rate": 4.193611800503148e-05, "loss": 0.152, "step": 53600 }, { "epoch": 1.9481793735009811, "grad_norm": 0.8852622509002686, "learning_rate": 4.1932300514062e-05, "loss": 0.1077, "step": 53610 }, { "epoch": 1.9485427720037793, "grad_norm": 0.6841835379600525, "learning_rate": 4.192848229353992e-05, "loss": 0.1281, "step": 53620 }, { "epoch": 1.9489061705065776, "grad_norm": 1.0521607398986816, "learning_rate": 4.192466334362978e-05, "loss": 0.1136, "step": 53630 }, { "epoch": 1.9492695690093758, "grad_norm": 4.119276523590088, "learning_rate": 4.192084366449612e-05, "loss": 0.0939, "step": 53640 }, { "epoch": 1.9496329675121737, "grad_norm": 0.8290958404541016, "learning_rate": 4.19170232563035e-05, "loss": 0.1625, "step": 53650 }, { "epoch": 1.9499963660149722, "grad_norm": 0.6359632015228271, "learning_rate": 4.191320211921654e-05, "loss": 0.1208, "step": 53660 }, { "epoch": 1.9503597645177702, "grad_norm": 0.699052631855011, "learning_rate": 4.1909380253399875e-05, "loss": 0.1248, "step": 53670 }, { "epoch": 1.9507231630205684, "grad_norm": 17.17115592956543, "learning_rate": 4.190555765901819e-05, "loss": 0.3458, "step": 53680 }, { "epoch": 1.9510865615233666, "grad_norm": 3.899052858352661, "learning_rate": 4.190173433623618e-05, "loss": 0.1144, "step": 53690 }, { "epoch": 1.9514499600261646, "grad_norm": 1.1907508373260498, "learning_rate": 4.1897910285218556e-05, "loss": 0.152, "step": 53700 }, { "epoch": 1.951813358528963, "grad_norm": 0.7645424008369446, "learning_rate": 4.189408550613011e-05, "loss": 0.1258, "step": 53710 }, { "epoch": 1.952176757031761, "grad_norm": 0.2523237466812134, "learning_rate": 4.1890259999135625e-05, "loss": 0.1213, "step": 53720 }, { "epoch": 1.9525401555345592, "grad_norm": 1.3578497171401978, "learning_rate": 4.188643376439993e-05, "loss": 1.5489, "step": 53730 }, { "epoch": 1.9529035540373574, "grad_norm": 0.6249386072158813, "learning_rate": 4.1882606802087896e-05, "loss": 0.1699, "step": 53740 }, { "epoch": 1.9532669525401556, "grad_norm": 0.9699862599372864, "learning_rate": 4.1878779112364394e-05, "loss": 0.1724, "step": 53750 }, { "epoch": 1.9536303510429538, "grad_norm": 1.3478792905807495, "learning_rate": 4.187495069539437e-05, "loss": 0.133, "step": 53760 }, { "epoch": 1.9539937495457518, "grad_norm": 0.6324986815452576, "learning_rate": 4.187112155134275e-05, "loss": 0.1354, "step": 53770 }, { "epoch": 1.95435714804855, "grad_norm": 2.009544610977173, "learning_rate": 4.186729168037453e-05, "loss": 0.1347, "step": 53780 }, { "epoch": 1.9547205465513482, "grad_norm": 0.6510929465293884, "learning_rate": 4.186346108265472e-05, "loss": 0.1227, "step": 53790 }, { "epoch": 1.9550839450541464, "grad_norm": 1.5079245567321777, "learning_rate": 4.185962975834838e-05, "loss": 0.1347, "step": 53800 }, { "epoch": 1.9554473435569446, "grad_norm": 3.214449882507324, "learning_rate": 4.1855797707620586e-05, "loss": 0.1138, "step": 53810 }, { "epoch": 1.9558107420597426, "grad_norm": 0.7995330095291138, "learning_rate": 4.1851964930636434e-05, "loss": 0.1522, "step": 53820 }, { "epoch": 1.956174140562541, "grad_norm": 1.6713122129440308, "learning_rate": 4.184813142756108e-05, "loss": 0.125, "step": 53830 }, { "epoch": 1.956537539065339, "grad_norm": 0.7136033177375793, "learning_rate": 4.184429719855968e-05, "loss": 0.1267, "step": 53840 }, { "epoch": 1.9569009375681372, "grad_norm": 0.5580174922943115, "learning_rate": 4.1840462243797444e-05, "loss": 0.1126, "step": 53850 }, { "epoch": 1.9572643360709354, "grad_norm": 0.8671419024467468, "learning_rate": 4.183662656343961e-05, "loss": 0.1209, "step": 53860 }, { "epoch": 1.9576277345737334, "grad_norm": 0.6624314188957214, "learning_rate": 4.183279015765145e-05, "loss": 0.1397, "step": 53870 }, { "epoch": 1.9579911330765318, "grad_norm": 1.4401901960372925, "learning_rate": 4.182895302659825e-05, "loss": 0.0894, "step": 53880 }, { "epoch": 1.9583545315793298, "grad_norm": 0.9187797904014587, "learning_rate": 4.182511517044534e-05, "loss": 0.127, "step": 53890 }, { "epoch": 1.958717930082128, "grad_norm": 1.2426072359085083, "learning_rate": 4.1821276589358084e-05, "loss": 0.1381, "step": 53900 }, { "epoch": 1.9590813285849262, "grad_norm": 0.8035231828689575, "learning_rate": 4.1817437283501865e-05, "loss": 0.0953, "step": 53910 }, { "epoch": 1.9594447270877244, "grad_norm": 0.32439205050468445, "learning_rate": 4.1813597253042115e-05, "loss": 0.138, "step": 53920 }, { "epoch": 1.9598081255905226, "grad_norm": 1.0287327766418457, "learning_rate": 4.180975649814428e-05, "loss": 3.1039, "step": 53930 }, { "epoch": 1.9601715240933206, "grad_norm": 1.3450182676315308, "learning_rate": 4.180591501897384e-05, "loss": 0.1081, "step": 53940 }, { "epoch": 1.960534922596119, "grad_norm": 7.1403961181640625, "learning_rate": 4.180207281569633e-05, "loss": 0.1484, "step": 53950 }, { "epoch": 1.960898321098917, "grad_norm": 1.2163225412368774, "learning_rate": 4.179822988847728e-05, "loss": 0.1041, "step": 53960 }, { "epoch": 1.9612617196017152, "grad_norm": 0.9922796487808228, "learning_rate": 4.179438623748228e-05, "loss": 0.1343, "step": 53970 }, { "epoch": 1.9616251181045135, "grad_norm": 2.245447874069214, "learning_rate": 4.1790541862876906e-05, "loss": 0.1015, "step": 53980 }, { "epoch": 1.9619885166073114, "grad_norm": 2.284679651260376, "learning_rate": 4.178669676482685e-05, "loss": 0.0913, "step": 53990 }, { "epoch": 1.9623519151101099, "grad_norm": 0.9692349433898926, "learning_rate": 4.178285094349775e-05, "loss": 0.1282, "step": 54000 }, { "epoch": 1.9623519151101099, "eval_loss": 0.3314037322998047, "eval_runtime": 180.606, "eval_samples_per_second": 41.051, "eval_steps_per_second": 5.133, "eval_wer": 0.16505709150979359, "step": 54000 }, { "epoch": 1.9627153136129079, "grad_norm": 0.4108816683292389, "learning_rate": 4.177900439905531e-05, "loss": 0.1272, "step": 54010 }, { "epoch": 1.963078712115706, "grad_norm": 0.3358526825904846, "learning_rate": 4.1775157131665276e-05, "loss": 0.1453, "step": 54020 }, { "epoch": 1.9634421106185043, "grad_norm": 1.476314663887024, "learning_rate": 4.177130914149341e-05, "loss": 0.1162, "step": 54030 }, { "epoch": 1.9638055091213025, "grad_norm": 0.7912114262580872, "learning_rate": 4.17674604287055e-05, "loss": 0.1056, "step": 54040 }, { "epoch": 1.9641689076241007, "grad_norm": 0.4801596403121948, "learning_rate": 4.176361099346738e-05, "loss": 0.1478, "step": 54050 }, { "epoch": 1.9645323061268987, "grad_norm": 0.7710531949996948, "learning_rate": 4.175976083594491e-05, "loss": 0.1131, "step": 54060 }, { "epoch": 1.9648957046296969, "grad_norm": 0.6709341406822205, "learning_rate": 4.175590995630398e-05, "loss": 0.1586, "step": 54070 }, { "epoch": 1.965259103132495, "grad_norm": 1.3941307067871094, "learning_rate": 4.17520583547105e-05, "loss": 0.1131, "step": 54080 }, { "epoch": 1.9656225016352933, "grad_norm": 0.759842038154602, "learning_rate": 4.174820603133043e-05, "loss": 0.0985, "step": 54090 }, { "epoch": 1.9659859001380915, "grad_norm": 0.9153608679771423, "learning_rate": 4.174435298632976e-05, "loss": 0.1547, "step": 54100 }, { "epoch": 1.9663492986408895, "grad_norm": 1.4363652467727661, "learning_rate": 4.174049921987449e-05, "loss": 0.1127, "step": 54110 }, { "epoch": 1.966712697143688, "grad_norm": 0.7368317246437073, "learning_rate": 4.173664473213067e-05, "loss": 0.1302, "step": 54120 }, { "epoch": 1.967076095646486, "grad_norm": 1.2740521430969238, "learning_rate": 4.173278952326438e-05, "loss": 0.1294, "step": 54130 }, { "epoch": 1.967439494149284, "grad_norm": 2.7798774242401123, "learning_rate": 4.1728933593441735e-05, "loss": 0.1011, "step": 54140 }, { "epoch": 1.9678028926520823, "grad_norm": 1.9629179239273071, "learning_rate": 4.172507694282885e-05, "loss": 0.3149, "step": 54150 }, { "epoch": 1.9681662911548803, "grad_norm": 3.5863332748413086, "learning_rate": 4.1721219571591915e-05, "loss": 0.1323, "step": 54160 }, { "epoch": 1.9685296896576787, "grad_norm": 0.29740679264068604, "learning_rate": 4.1717361479897116e-05, "loss": 0.1725, "step": 54170 }, { "epoch": 1.9688930881604767, "grad_norm": 1.0469319820404053, "learning_rate": 4.17135026679107e-05, "loss": 0.1138, "step": 54180 }, { "epoch": 1.969256486663275, "grad_norm": 0.5336177945137024, "learning_rate": 4.170964313579891e-05, "loss": 0.2207, "step": 54190 }, { "epoch": 1.9696198851660731, "grad_norm": 0.973862886428833, "learning_rate": 4.1705782883728055e-05, "loss": 0.1328, "step": 54200 }, { "epoch": 1.9699832836688713, "grad_norm": 0.8640954494476318, "learning_rate": 4.170192191186446e-05, "loss": 0.1315, "step": 54210 }, { "epoch": 1.9703466821716695, "grad_norm": 0.47578397393226624, "learning_rate": 4.169806022037447e-05, "loss": 0.1823, "step": 54220 }, { "epoch": 1.9707100806744675, "grad_norm": 1.4527409076690674, "learning_rate": 4.169419780942448e-05, "loss": 2.5822, "step": 54230 }, { "epoch": 1.971073479177266, "grad_norm": 0.48623302578926086, "learning_rate": 4.1690334679180896e-05, "loss": 0.1093, "step": 54240 }, { "epoch": 1.971436877680064, "grad_norm": 1.1767234802246094, "learning_rate": 4.1686470829810185e-05, "loss": 0.1329, "step": 54250 }, { "epoch": 1.9718002761828621, "grad_norm": 1.128841519355774, "learning_rate": 4.1682606261478816e-05, "loss": 0.1102, "step": 54260 }, { "epoch": 1.9721636746856603, "grad_norm": 1.4685746431350708, "learning_rate": 4.16787409743533e-05, "loss": 0.14, "step": 54270 }, { "epoch": 1.9725270731884583, "grad_norm": 0.9918948411941528, "learning_rate": 4.167487496860018e-05, "loss": 0.1093, "step": 54280 }, { "epoch": 1.9728904716912568, "grad_norm": 0.5849924683570862, "learning_rate": 4.167100824438602e-05, "loss": 0.3633, "step": 54290 }, { "epoch": 1.9732538701940547, "grad_norm": 1.0083026885986328, "learning_rate": 4.1667140801877433e-05, "loss": 0.3471, "step": 54300 }, { "epoch": 1.973617268696853, "grad_norm": 4.210540771484375, "learning_rate": 4.1663272641241056e-05, "loss": 0.111, "step": 54310 }, { "epoch": 1.9739806671996512, "grad_norm": 0.47457021474838257, "learning_rate": 4.165940376264354e-05, "loss": 0.1304, "step": 54320 }, { "epoch": 1.9743440657024494, "grad_norm": 0.6626879572868347, "learning_rate": 4.1655534166251596e-05, "loss": 0.1362, "step": 54330 }, { "epoch": 1.9747074642052476, "grad_norm": 1.0823551416397095, "learning_rate": 4.1651663852231946e-05, "loss": 0.1009, "step": 54340 }, { "epoch": 1.9750708627080455, "grad_norm": 1.6723361015319824, "learning_rate": 4.164779282075134e-05, "loss": 0.1539, "step": 54350 }, { "epoch": 1.9754342612108438, "grad_norm": 1.5842360258102417, "learning_rate": 4.1643921071976584e-05, "loss": 0.1342, "step": 54360 }, { "epoch": 1.975797659713642, "grad_norm": 1.055336594581604, "learning_rate": 4.164004860607448e-05, "loss": 0.176, "step": 54370 }, { "epoch": 1.9761610582164402, "grad_norm": 0.81571364402771, "learning_rate": 4.16361754232119e-05, "loss": 0.1187, "step": 54380 }, { "epoch": 1.9765244567192384, "grad_norm": 1.0346819162368774, "learning_rate": 4.1632301523555693e-05, "loss": 0.1255, "step": 54390 }, { "epoch": 1.9768878552220364, "grad_norm": 1.1211163997650146, "learning_rate": 4.162842690727281e-05, "loss": 0.1165, "step": 54400 }, { "epoch": 1.9772512537248348, "grad_norm": 0.5160552263259888, "learning_rate": 4.162455157453017e-05, "loss": 0.1393, "step": 54410 }, { "epoch": 1.9776146522276328, "grad_norm": 0.767784833908081, "learning_rate": 4.1620675525494746e-05, "loss": 0.1552, "step": 54420 }, { "epoch": 1.977978050730431, "grad_norm": 1.101317286491394, "learning_rate": 4.1616798760333554e-05, "loss": 0.1182, "step": 54430 }, { "epoch": 1.9783414492332292, "grad_norm": 0.7279396653175354, "learning_rate": 4.161292127921363e-05, "loss": 0.12, "step": 54440 }, { "epoch": 1.9787048477360272, "grad_norm": 1.5998153686523438, "learning_rate": 4.1609043082302036e-05, "loss": 0.1335, "step": 54450 }, { "epoch": 1.9790682462388256, "grad_norm": 0.8245583772659302, "learning_rate": 4.160516416976587e-05, "loss": 0.1249, "step": 54460 }, { "epoch": 1.9794316447416236, "grad_norm": 0.5749397277832031, "learning_rate": 4.1601284541772255e-05, "loss": 0.1939, "step": 54470 }, { "epoch": 1.9797950432444218, "grad_norm": 0.7786006927490234, "learning_rate": 4.159740419848837e-05, "loss": 1.8059, "step": 54480 }, { "epoch": 1.98015844174722, "grad_norm": 0.41233259439468384, "learning_rate": 4.159352314008138e-05, "loss": 0.1208, "step": 54490 }, { "epoch": 1.9805218402500182, "grad_norm": 0.5091323256492615, "learning_rate": 4.158964136671852e-05, "loss": 0.1279, "step": 54500 }, { "epoch": 1.9808852387528164, "grad_norm": 4.300207138061523, "learning_rate": 4.158575887856704e-05, "loss": 0.1744, "step": 54510 }, { "epoch": 1.9812486372556144, "grad_norm": 0.7447227239608765, "learning_rate": 4.1581875675794226e-05, "loss": 0.1652, "step": 54520 }, { "epoch": 1.9816120357584128, "grad_norm": 0.6846696734428406, "learning_rate": 4.157799175856738e-05, "loss": 0.1027, "step": 54530 }, { "epoch": 1.9819754342612108, "grad_norm": 0.8642467260360718, "learning_rate": 4.157410712705386e-05, "loss": 0.1165, "step": 54540 }, { "epoch": 1.982338832764009, "grad_norm": 0.8407902121543884, "learning_rate": 4.157022178142104e-05, "loss": 0.1623, "step": 54550 }, { "epoch": 1.9827022312668072, "grad_norm": 0.8839777708053589, "learning_rate": 4.156633572183631e-05, "loss": 0.1131, "step": 54560 }, { "epoch": 1.9830656297696052, "grad_norm": 1.39069402217865, "learning_rate": 4.1562448948467126e-05, "loss": 0.1906, "step": 54570 }, { "epoch": 1.9834290282724036, "grad_norm": 2.1196155548095703, "learning_rate": 4.1558561461480936e-05, "loss": 0.1261, "step": 54580 }, { "epoch": 1.9837924267752016, "grad_norm": 1.092934250831604, "learning_rate": 4.155467326104525e-05, "loss": 0.1029, "step": 54590 }, { "epoch": 1.9841558252779998, "grad_norm": 0.7902958989143372, "learning_rate": 4.1550784347327607e-05, "loss": 0.1159, "step": 54600 }, { "epoch": 1.9841558252779998, "eval_loss": 0.3433511555194855, "eval_runtime": 180.0868, "eval_samples_per_second": 41.169, "eval_steps_per_second": 5.148, "eval_wer": 0.16009221776462687, "step": 54600 }, { "epoch": 1.984519223780798, "grad_norm": 1.3083094358444214, "learning_rate": 4.1546894720495546e-05, "loss": 0.1172, "step": 54610 }, { "epoch": 1.9848826222835962, "grad_norm": 1.9061583280563354, "learning_rate": 4.154300438071666e-05, "loss": 0.1335, "step": 54620 }, { "epoch": 1.9852460207863944, "grad_norm": 1.9469786882400513, "learning_rate": 4.153911332815859e-05, "loss": 0.1014, "step": 54630 }, { "epoch": 1.9856094192891924, "grad_norm": 6.232102394104004, "learning_rate": 4.153522156298896e-05, "loss": 0.1216, "step": 54640 }, { "epoch": 1.9859728177919909, "grad_norm": 0.6339765191078186, "learning_rate": 4.153132908537547e-05, "loss": 0.1236, "step": 54650 }, { "epoch": 1.9863362162947888, "grad_norm": 0.9476169943809509, "learning_rate": 4.152743589548582e-05, "loss": 0.0962, "step": 54660 }, { "epoch": 1.986699614797587, "grad_norm": 1.0691879987716675, "learning_rate": 4.152354199348777e-05, "loss": 0.3789, "step": 54670 }, { "epoch": 1.9870630133003853, "grad_norm": 0.9338876605033875, "learning_rate": 4.1519647379549084e-05, "loss": 0.0914, "step": 54680 }, { "epoch": 1.9874264118031832, "grad_norm": 0.6754772663116455, "learning_rate": 4.151575205383758e-05, "loss": 0.1044, "step": 54690 }, { "epoch": 1.9877898103059817, "grad_norm": 0.6961863040924072, "learning_rate": 4.151185601652107e-05, "loss": 0.1322, "step": 54700 }, { "epoch": 1.9881532088087797, "grad_norm": 1.1425034999847412, "learning_rate": 4.150795926776744e-05, "loss": 0.1381, "step": 54710 }, { "epoch": 1.9885166073115779, "grad_norm": 1.4080971479415894, "learning_rate": 4.150406180774458e-05, "loss": 0.1234, "step": 54720 }, { "epoch": 1.988880005814376, "grad_norm": 0.7941197752952576, "learning_rate": 4.1500163636620414e-05, "loss": 0.0903, "step": 54730 }, { "epoch": 1.989243404317174, "grad_norm": 0.8813301920890808, "learning_rate": 4.149626475456291e-05, "loss": 0.0965, "step": 54740 }, { "epoch": 1.9896068028199725, "grad_norm": 0.727293848991394, "learning_rate": 4.1492365161740054e-05, "loss": 0.1269, "step": 54750 }, { "epoch": 1.9899702013227705, "grad_norm": 3.7548305988311768, "learning_rate": 4.148846485831986e-05, "loss": 0.0992, "step": 54760 }, { "epoch": 1.9903335998255687, "grad_norm": 0.5141910910606384, "learning_rate": 4.148456384447037e-05, "loss": 0.1275, "step": 54770 }, { "epoch": 1.9906969983283669, "grad_norm": 0.5424654483795166, "learning_rate": 4.1480662120359696e-05, "loss": 0.6733, "step": 54780 }, { "epoch": 1.991060396831165, "grad_norm": 0.8342083096504211, "learning_rate": 4.147675968615592e-05, "loss": 0.1126, "step": 54790 }, { "epoch": 1.9914237953339633, "grad_norm": 0.3992403745651245, "learning_rate": 4.147285654202719e-05, "loss": 0.1589, "step": 54800 }, { "epoch": 1.9917871938367613, "grad_norm": 0.9092950820922852, "learning_rate": 4.146895268814169e-05, "loss": 0.1217, "step": 54810 }, { "epoch": 1.9921505923395597, "grad_norm": 0.4327254295349121, "learning_rate": 4.1465048124667605e-05, "loss": 0.1615, "step": 54820 }, { "epoch": 1.9925139908423577, "grad_norm": 1.1109565496444702, "learning_rate": 4.146114285177319e-05, "loss": 0.1965, "step": 54830 }, { "epoch": 1.992877389345156, "grad_norm": 10.526979446411133, "learning_rate": 4.145723686962669e-05, "loss": 0.2047, "step": 54840 }, { "epoch": 1.993240787847954, "grad_norm": 1.4240983724594116, "learning_rate": 4.1453330178396415e-05, "loss": 0.1261, "step": 54850 }, { "epoch": 1.993604186350752, "grad_norm": 3.436688184738159, "learning_rate": 4.144942277825068e-05, "loss": 0.1194, "step": 54860 }, { "epoch": 1.9939675848535505, "grad_norm": 0.3504880666732788, "learning_rate": 4.1445514669357846e-05, "loss": 0.1269, "step": 54870 }, { "epoch": 1.9943309833563485, "grad_norm": 1.8600322008132935, "learning_rate": 4.14416058518863e-05, "loss": 0.1202, "step": 54880 }, { "epoch": 1.9946943818591467, "grad_norm": 0.7843186259269714, "learning_rate": 4.1437696326004456e-05, "loss": 0.1047, "step": 54890 }, { "epoch": 1.995057780361945, "grad_norm": 1.593837022781372, "learning_rate": 4.1433786091880765e-05, "loss": 0.1269, "step": 54900 }, { "epoch": 1.9954211788647431, "grad_norm": 1.9453426599502563, "learning_rate": 4.14298751496837e-05, "loss": 0.1054, "step": 54910 }, { "epoch": 1.9957845773675413, "grad_norm": 0.7861382365226746, "learning_rate": 4.142596349958177e-05, "loss": 0.1467, "step": 54920 }, { "epoch": 1.9961479758703393, "grad_norm": 0.9338520169258118, "learning_rate": 4.142205114174352e-05, "loss": 0.1014, "step": 54930 }, { "epoch": 1.9965113743731377, "grad_norm": 3.8717129230499268, "learning_rate": 4.1418138076337516e-05, "loss": 0.1426, "step": 54940 }, { "epoch": 1.9968747728759357, "grad_norm": 0.579759418964386, "learning_rate": 4.141422430353236e-05, "loss": 0.1154, "step": 54950 }, { "epoch": 1.997238171378734, "grad_norm": 1.129913091659546, "learning_rate": 4.141030982349668e-05, "loss": 0.1019, "step": 54960 }, { "epoch": 1.9976015698815321, "grad_norm": 0.5852164626121521, "learning_rate": 4.140639463639913e-05, "loss": 0.1719, "step": 54970 }, { "epoch": 1.9979649683843301, "grad_norm": 3.1367127895355225, "learning_rate": 4.1402478742408415e-05, "loss": 0.0909, "step": 54980 }, { "epoch": 1.9983283668871286, "grad_norm": 0.5207622051239014, "learning_rate": 4.1398562141693253e-05, "loss": 0.1212, "step": 54990 }, { "epoch": 1.9986917653899265, "grad_norm": 0.5118950605392456, "learning_rate": 4.1394644834422394e-05, "loss": 0.1217, "step": 55000 }, { "epoch": 1.9990551638927248, "grad_norm": 0.45482707023620605, "learning_rate": 4.1390726820764614e-05, "loss": 0.0986, "step": 55010 }, { "epoch": 1.999418562395523, "grad_norm": 1.9805399179458618, "learning_rate": 4.138680810088875e-05, "loss": 0.1356, "step": 55020 }, { "epoch": 1.999781960898321, "grad_norm": 1.0094414949417114, "learning_rate": 4.138288867496362e-05, "loss": 0.1751, "step": 55030 }, { "epoch": 2.0001453594011194, "grad_norm": 1.6492732763290405, "learning_rate": 4.1378968543158106e-05, "loss": 0.1792, "step": 55040 }, { "epoch": 2.0005087579039174, "grad_norm": 6.960714340209961, "learning_rate": 4.137504770564111e-05, "loss": 0.1707, "step": 55050 }, { "epoch": 2.000872156406716, "grad_norm": 0.483518123626709, "learning_rate": 4.1371126162581576e-05, "loss": 0.1043, "step": 55060 }, { "epoch": 2.0012355549095138, "grad_norm": 0.5076984763145447, "learning_rate": 4.1367203914148464e-05, "loss": 0.1356, "step": 55070 }, { "epoch": 2.0015989534123118, "grad_norm": 2.341773509979248, "learning_rate": 4.136328096051077e-05, "loss": 0.1096, "step": 55080 }, { "epoch": 2.00196235191511, "grad_norm": 0.5860946178436279, "learning_rate": 4.135935730183752e-05, "loss": 0.1076, "step": 55090 }, { "epoch": 2.002325750417908, "grad_norm": 0.4653785824775696, "learning_rate": 4.1355432938297774e-05, "loss": 0.1517, "step": 55100 }, { "epoch": 2.0026891489207066, "grad_norm": 1.198096513748169, "learning_rate": 4.135150787006061e-05, "loss": 0.369, "step": 55110 }, { "epoch": 2.0030525474235046, "grad_norm": 1.07427978515625, "learning_rate": 4.134758209729516e-05, "loss": 0.1476, "step": 55120 }, { "epoch": 2.0034159459263026, "grad_norm": 0.7984631657600403, "learning_rate": 4.134365562017055e-05, "loss": 0.0972, "step": 55130 }, { "epoch": 2.003779344429101, "grad_norm": 1.2470594644546509, "learning_rate": 4.133972843885598e-05, "loss": 0.0884, "step": 55140 }, { "epoch": 2.004142742931899, "grad_norm": 0.6046581268310547, "learning_rate": 4.133580055352064e-05, "loss": 0.2083, "step": 55150 }, { "epoch": 2.0045061414346974, "grad_norm": 0.8026099801063538, "learning_rate": 4.133187196433379e-05, "loss": 0.1278, "step": 55160 }, { "epoch": 2.0048695399374954, "grad_norm": 0.6957481503486633, "learning_rate": 4.132794267146467e-05, "loss": 0.1106, "step": 55170 }, { "epoch": 2.005232938440294, "grad_norm": 1.2208986282348633, "learning_rate": 4.13240126750826e-05, "loss": 0.1058, "step": 55180 }, { "epoch": 2.005596336943092, "grad_norm": 0.9665369391441345, "learning_rate": 4.132008197535692e-05, "loss": 0.1195, "step": 55190 }, { "epoch": 2.00595973544589, "grad_norm": 1.0869636535644531, "learning_rate": 4.131615057245696e-05, "loss": 0.1004, "step": 55200 }, { "epoch": 2.00595973544589, "eval_loss": 0.3372127115726471, "eval_runtime": 180.3164, "eval_samples_per_second": 41.117, "eval_steps_per_second": 5.141, "eval_wer": 0.16414035979450686, "step": 55200 }, { "epoch": 2.006323133948688, "grad_norm": 1.0461617708206177, "learning_rate": 4.131221846655212e-05, "loss": 0.1003, "step": 55210 }, { "epoch": 2.006686532451486, "grad_norm": 1.1234357357025146, "learning_rate": 4.130828565781183e-05, "loss": 0.131, "step": 55220 }, { "epoch": 2.0070499309542846, "grad_norm": 0.792592465877533, "learning_rate": 4.1304352146405544e-05, "loss": 0.1236, "step": 55230 }, { "epoch": 2.0074133294570826, "grad_norm": 2.0296480655670166, "learning_rate": 4.130041793250273e-05, "loss": 0.1162, "step": 55240 }, { "epoch": 2.0077767279598806, "grad_norm": 0.8490334153175354, "learning_rate": 4.12964830162729e-05, "loss": 0.0891, "step": 55250 }, { "epoch": 2.008140126462679, "grad_norm": 2.996204376220703, "learning_rate": 4.129254739788561e-05, "loss": 0.088, "step": 55260 }, { "epoch": 2.008503524965477, "grad_norm": 0.785502016544342, "learning_rate": 4.128861107751041e-05, "loss": 2.8838, "step": 55270 }, { "epoch": 2.0088669234682754, "grad_norm": 0.9276618957519531, "learning_rate": 4.128467405531693e-05, "loss": 0.1125, "step": 55280 }, { "epoch": 2.0092303219710734, "grad_norm": 0.6827619671821594, "learning_rate": 4.128073633147477e-05, "loss": 0.1538, "step": 55290 }, { "epoch": 2.009593720473872, "grad_norm": 0.5531404614448547, "learning_rate": 4.1276797906153614e-05, "loss": 0.1451, "step": 55300 }, { "epoch": 2.00995711897667, "grad_norm": 1.3195756673812866, "learning_rate": 4.127285877952315e-05, "loss": 0.0831, "step": 55310 }, { "epoch": 2.010320517479468, "grad_norm": 1.291306734085083, "learning_rate": 4.12689189517531e-05, "loss": 0.1168, "step": 55320 }, { "epoch": 2.0106839159822663, "grad_norm": 0.7740198373794556, "learning_rate": 4.126497842301322e-05, "loss": 0.1293, "step": 55330 }, { "epoch": 2.0110473144850642, "grad_norm": 0.619372546672821, "learning_rate": 4.126103719347329e-05, "loss": 0.1151, "step": 55340 }, { "epoch": 2.0114107129878627, "grad_norm": 0.6809590458869934, "learning_rate": 4.1257095263303114e-05, "loss": 0.0808, "step": 55350 }, { "epoch": 2.0117741114906607, "grad_norm": 0.7653446197509766, "learning_rate": 4.125315263267255e-05, "loss": 0.0847, "step": 55360 }, { "epoch": 2.0121375099934586, "grad_norm": 0.7010202407836914, "learning_rate": 4.124920930175148e-05, "loss": 0.1856, "step": 55370 }, { "epoch": 2.012500908496257, "grad_norm": 0.8760896921157837, "learning_rate": 4.1245265270709786e-05, "loss": 0.0966, "step": 55380 }, { "epoch": 2.012864306999055, "grad_norm": 0.8872328400611877, "learning_rate": 4.124132053971741e-05, "loss": 0.1225, "step": 55390 }, { "epoch": 2.0132277055018535, "grad_norm": 0.7111076712608337, "learning_rate": 4.123737510894433e-05, "loss": 0.0917, "step": 55400 }, { "epoch": 2.0135911040046515, "grad_norm": 0.2959582209587097, "learning_rate": 4.1233428978560515e-05, "loss": 0.084, "step": 55410 }, { "epoch": 2.0139545025074495, "grad_norm": 0.5472272038459778, "learning_rate": 4.122948214873602e-05, "loss": 0.1165, "step": 55420 }, { "epoch": 2.014317901010248, "grad_norm": 1.7232263088226318, "learning_rate": 4.1225534619640874e-05, "loss": 0.1483, "step": 55430 }, { "epoch": 2.014681299513046, "grad_norm": 0.6070485711097717, "learning_rate": 4.1221586391445164e-05, "loss": 0.1181, "step": 55440 }, { "epoch": 2.0150446980158443, "grad_norm": 0.42631739377975464, "learning_rate": 4.121763746431903e-05, "loss": 0.2435, "step": 55450 }, { "epoch": 2.0154080965186423, "grad_norm": 0.4716903865337372, "learning_rate": 4.1213687838432594e-05, "loss": 0.102, "step": 55460 }, { "epoch": 2.0157714950214407, "grad_norm": 1.0024840831756592, "learning_rate": 4.120973751395604e-05, "loss": 2.0817, "step": 55470 }, { "epoch": 2.0161348935242387, "grad_norm": 0.6983594298362732, "learning_rate": 4.1205786491059565e-05, "loss": 0.1091, "step": 55480 }, { "epoch": 2.0164982920270367, "grad_norm": 3.18595814704895, "learning_rate": 4.1201834769913405e-05, "loss": 0.1334, "step": 55490 }, { "epoch": 2.016861690529835, "grad_norm": 1.0065993070602417, "learning_rate": 4.119788235068785e-05, "loss": 0.0893, "step": 55500 }, { "epoch": 2.017225089032633, "grad_norm": 1.9013348817825317, "learning_rate": 4.119392923355315e-05, "loss": 0.1055, "step": 55510 }, { "epoch": 2.0175884875354315, "grad_norm": 0.727342963218689, "learning_rate": 4.118997541867968e-05, "loss": 0.1577, "step": 55520 }, { "epoch": 2.0179518860382295, "grad_norm": 1.3305946588516235, "learning_rate": 4.118602090623777e-05, "loss": 0.1156, "step": 55530 }, { "epoch": 2.0183152845410275, "grad_norm": 166.5440673828125, "learning_rate": 4.11820656963978e-05, "loss": 3.13, "step": 55540 }, { "epoch": 2.018678683043826, "grad_norm": 1.1718511581420898, "learning_rate": 4.11781097893302e-05, "loss": 1.2695, "step": 55550 }, { "epoch": 2.019042081546624, "grad_norm": 0.7488642930984497, "learning_rate": 4.117415318520541e-05, "loss": 0.1179, "step": 55560 }, { "epoch": 2.0194054800494223, "grad_norm": 0.8934155702590942, "learning_rate": 4.117019588419391e-05, "loss": 0.0957, "step": 55570 }, { "epoch": 2.0197688785522203, "grad_norm": 1.2470290660858154, "learning_rate": 4.11662378864662e-05, "loss": 0.0974, "step": 55580 }, { "epoch": 2.0201322770550187, "grad_norm": 2.387202501296997, "learning_rate": 4.116227919219282e-05, "loss": 0.2065, "step": 55590 }, { "epoch": 2.0204956755578167, "grad_norm": 0.9765509963035583, "learning_rate": 4.115831980154434e-05, "loss": 0.083, "step": 55600 }, { "epoch": 2.0208590740606147, "grad_norm": 1.544554591178894, "learning_rate": 4.115435971469135e-05, "loss": 0.1067, "step": 55610 }, { "epoch": 2.021222472563413, "grad_norm": 1.8516936302185059, "learning_rate": 4.1150398931804465e-05, "loss": 0.1292, "step": 55620 }, { "epoch": 2.021585871066211, "grad_norm": 1.211599349975586, "learning_rate": 4.114643745305437e-05, "loss": 0.098, "step": 55630 }, { "epoch": 2.0219492695690096, "grad_norm": 0.8160383105278015, "learning_rate": 4.114247527861173e-05, "loss": 0.0919, "step": 55640 }, { "epoch": 2.0223126680718075, "grad_norm": 0.8116459846496582, "learning_rate": 4.1138512408647256e-05, "loss": 0.0929, "step": 55650 }, { "epoch": 2.0226760665746055, "grad_norm": 0.9536616206169128, "learning_rate": 4.113454884333171e-05, "loss": 0.1269, "step": 55660 }, { "epoch": 2.023039465077404, "grad_norm": 0.6211200952529907, "learning_rate": 4.113058458283586e-05, "loss": 0.1285, "step": 55670 }, { "epoch": 2.023402863580202, "grad_norm": 1.3393282890319824, "learning_rate": 4.112661962733052e-05, "loss": 0.1211, "step": 55680 }, { "epoch": 2.0237662620830004, "grad_norm": 0.9137499928474426, "learning_rate": 4.1122653976986514e-05, "loss": 0.1492, "step": 55690 }, { "epoch": 2.0241296605857984, "grad_norm": 8.595315933227539, "learning_rate": 4.1118687631974705e-05, "loss": 0.0813, "step": 55700 }, { "epoch": 2.0244930590885963, "grad_norm": 8.519613265991211, "learning_rate": 4.111472059246601e-05, "loss": 0.0971, "step": 55710 }, { "epoch": 2.0248564575913948, "grad_norm": 0.906406819820404, "learning_rate": 4.111075285863133e-05, "loss": 0.1068, "step": 55720 }, { "epoch": 2.0252198560941927, "grad_norm": 0.6413214206695557, "learning_rate": 4.1106784430641634e-05, "loss": 0.0904, "step": 55730 }, { "epoch": 2.025583254596991, "grad_norm": 1.054943561553955, "learning_rate": 4.110281530866791e-05, "loss": 0.1087, "step": 55740 }, { "epoch": 2.025946653099789, "grad_norm": 0.686661958694458, "learning_rate": 4.1098845492881164e-05, "loss": 0.1022, "step": 55750 }, { "epoch": 2.0263100516025876, "grad_norm": 1.9529190063476562, "learning_rate": 4.109487498345245e-05, "loss": 0.1089, "step": 55760 }, { "epoch": 2.0266734501053856, "grad_norm": 0.5279061198234558, "learning_rate": 4.109090378055284e-05, "loss": 0.1115, "step": 55770 }, { "epoch": 2.0270368486081836, "grad_norm": 1.3651883602142334, "learning_rate": 4.108693188435343e-05, "loss": 0.1206, "step": 55780 }, { "epoch": 2.027400247110982, "grad_norm": 0.9911472201347351, "learning_rate": 4.108295929502536e-05, "loss": 0.1235, "step": 55790 }, { "epoch": 2.02776364561378, "grad_norm": 1.1165162324905396, "learning_rate": 4.107898601273981e-05, "loss": 0.0944, "step": 55800 }, { "epoch": 2.02776364561378, "eval_loss": 0.342909038066864, "eval_runtime": 179.8346, "eval_samples_per_second": 41.227, "eval_steps_per_second": 5.155, "eval_wer": 0.16486648392542705, "step": 55800 } ], "logging_steps": 10, "max_steps": 165108, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8265938801823344e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }