{ "best_metric": 0.200864087715795, "best_model_checkpoint": "./checkpoints/w2v-pa-v2/checkpoint-18000", "epoch": 0.7195290355403736, "eval_steps": 600, "global_step": 19800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00036339850279816846, "grad_norm": 12.712770462036133, "learning_rate": 3.633985027981685e-08, "loss": 8.1015, "step": 10 }, { "epoch": 0.0007267970055963369, "grad_norm": 14.929631233215332, "learning_rate": 7.26797005596337e-08, "loss": 8.5111, "step": 20 }, { "epoch": 0.0010901955083945055, "grad_norm": 12.774781227111816, "learning_rate": 1.0901955083945056e-07, "loss": 8.71, "step": 30 }, { "epoch": 0.0014535940111926739, "grad_norm": 15.27083969116211, "learning_rate": 1.417254160912857e-07, "loss": 9.2894, "step": 40 }, { "epoch": 0.0018169925139908424, "grad_norm": 31.209775924682617, "learning_rate": 1.7806526637110256e-07, "loss": 8.6779, "step": 50 }, { "epoch": 0.002180391016789011, "grad_norm": 12.960335731506348, "learning_rate": 2.1440511665091943e-07, "loss": 8.0399, "step": 60 }, { "epoch": 0.002543789519587179, "grad_norm": 14.452157020568848, "learning_rate": 2.5074496693073626e-07, "loss": 7.9357, "step": 70 }, { "epoch": 0.0029071880223853477, "grad_norm": 12.74867057800293, "learning_rate": 2.8708481721055307e-07, "loss": 8.0764, "step": 80 }, { "epoch": 0.0032705865251835163, "grad_norm": 19.905397415161133, "learning_rate": 3.2342466749036993e-07, "loss": 8.2522, "step": 90 }, { "epoch": 0.003633985027981685, "grad_norm": Infinity, "learning_rate": 3.5613053274220513e-07, "loss": 7.8161, "step": 100 }, { "epoch": 0.003997383530779853, "grad_norm": 15.877684593200684, "learning_rate": 3.92470383022022e-07, "loss": 7.4866, "step": 110 }, { "epoch": 0.004360782033578022, "grad_norm": 19.216800689697266, "learning_rate": 4.2881023330183885e-07, "loss": 7.0324, "step": 120 }, { "epoch": 0.0047241805363761906, "grad_norm": 16.937118530273438, "learning_rate": 4.651500835816557e-07, "loss": 6.7873, "step": 130 }, { "epoch": 0.005087579039174358, "grad_norm": 27.858692169189453, "learning_rate": 5.014899338614725e-07, "loss": 7.2063, "step": 140 }, { "epoch": 0.005450977541972527, "grad_norm": Infinity, "learning_rate": 5.341957991133076e-07, "loss": 6.5827, "step": 150 }, { "epoch": 0.005814376044770695, "grad_norm": 21.252164840698242, "learning_rate": 5.669016643651428e-07, "loss": 7.819, "step": 160 }, { "epoch": 0.006177774547568864, "grad_norm": 20.977886199951172, "learning_rate": 6.032415146449597e-07, "loss": 5.4741, "step": 170 }, { "epoch": 0.0065411730503670325, "grad_norm": 35.25390625, "learning_rate": 6.395813649247765e-07, "loss": 5.7225, "step": 180 }, { "epoch": 0.006904571553165201, "grad_norm": 7.165033340454102, "learning_rate": 6.759212152045934e-07, "loss": 4.7475, "step": 190 }, { "epoch": 0.00726797005596337, "grad_norm": 14.877301216125488, "learning_rate": 7.122610654844103e-07, "loss": 4.6375, "step": 200 }, { "epoch": 0.007631368558761538, "grad_norm": 5.826667785644531, "learning_rate": 7.486009157642272e-07, "loss": 4.356, "step": 210 }, { "epoch": 0.007994767061559707, "grad_norm": 6.022212982177734, "learning_rate": 7.84940766044044e-07, "loss": 4.2138, "step": 220 }, { "epoch": 0.008358165564357875, "grad_norm": 4.790489196777344, "learning_rate": 8.212806163238608e-07, "loss": 4.0662, "step": 230 }, { "epoch": 0.008721564067156044, "grad_norm": 4.448057174682617, "learning_rate": 8.576204666036777e-07, "loss": 3.9507, "step": 240 }, { "epoch": 0.009084962569954213, "grad_norm": 47.487003326416016, "learning_rate": 8.939603168834945e-07, "loss": 3.9576, "step": 250 }, { "epoch": 0.009448361072752381, "grad_norm": 8.64856243133545, "learning_rate": 9.303001671633114e-07, "loss": 3.7102, "step": 260 }, { "epoch": 0.009811759575550548, "grad_norm": 8.821709632873535, "learning_rate": 9.66640017443128e-07, "loss": 3.6644, "step": 270 }, { "epoch": 0.010175158078348717, "grad_norm": 14.071539878845215, "learning_rate": 1.002979867722945e-06, "loss": 3.6909, "step": 280 }, { "epoch": 0.010538556581146885, "grad_norm": 6.68039083480835, "learning_rate": 1.0393197180027619e-06, "loss": 3.6458, "step": 290 }, { "epoch": 0.010901955083945054, "grad_norm": 20.664649963378906, "learning_rate": 1.0756595682825787e-06, "loss": 3.6332, "step": 300 }, { "epoch": 0.011265353586743222, "grad_norm": 2.5272624492645264, "learning_rate": 1.1119994185623955e-06, "loss": 3.4835, "step": 310 }, { "epoch": 0.01162875208954139, "grad_norm": 8.353235244750977, "learning_rate": 1.1483392688422123e-06, "loss": 3.4892, "step": 320 }, { "epoch": 0.01199215059233956, "grad_norm": 7.0964531898498535, "learning_rate": 1.1846791191220293e-06, "loss": 3.5586, "step": 330 }, { "epoch": 0.012355549095137728, "grad_norm": 4.734161376953125, "learning_rate": 1.2210189694018461e-06, "loss": 3.4996, "step": 340 }, { "epoch": 0.012718947597935897, "grad_norm": 47.409996032714844, "learning_rate": 1.257358819681663e-06, "loss": 3.5565, "step": 350 }, { "epoch": 0.013082346100734065, "grad_norm": 2.880244016647339, "learning_rate": 1.2936986699614797e-06, "loss": 3.4154, "step": 360 }, { "epoch": 0.013445744603532234, "grad_norm": 6.637233734130859, "learning_rate": 1.3300385202412968e-06, "loss": 3.4119, "step": 370 }, { "epoch": 0.013809143106330402, "grad_norm": 11.791736602783203, "learning_rate": 1.3663783705211136e-06, "loss": 3.4347, "step": 380 }, { "epoch": 0.01417254160912857, "grad_norm": 8.274836540222168, "learning_rate": 1.4027182208009304e-06, "loss": 3.4253, "step": 390 }, { "epoch": 0.01453594011192674, "grad_norm": 10.09929084777832, "learning_rate": 1.4390580710807472e-06, "loss": 3.4702, "step": 400 }, { "epoch": 0.014899338614724908, "grad_norm": 6.32951545715332, "learning_rate": 1.4753979213605642e-06, "loss": 3.3513, "step": 410 }, { "epoch": 0.015262737117523077, "grad_norm": 2.4888486862182617, "learning_rate": 1.511737771640381e-06, "loss": 3.3421, "step": 420 }, { "epoch": 0.015626135620321245, "grad_norm": 3.02103328704834, "learning_rate": 1.5480776219201978e-06, "loss": 3.397, "step": 430 }, { "epoch": 0.015989534123119414, "grad_norm": 7.464268207550049, "learning_rate": 1.5844174722000146e-06, "loss": 3.3582, "step": 440 }, { "epoch": 0.016352932625917582, "grad_norm": 18.908123016357422, "learning_rate": 1.6207573224798317e-06, "loss": 3.4034, "step": 450 }, { "epoch": 0.01671633112871575, "grad_norm": 2.487326145172119, "learning_rate": 1.6570971727596485e-06, "loss": 3.2229, "step": 460 }, { "epoch": 0.01707972963151392, "grad_norm": 2.3999946117401123, "learning_rate": 1.6934370230394653e-06, "loss": 3.2185, "step": 470 }, { "epoch": 0.017443128134312088, "grad_norm": 5.007234573364258, "learning_rate": 1.729776873319282e-06, "loss": 3.2069, "step": 480 }, { "epoch": 0.017806526637110257, "grad_norm": 6.393301963806152, "learning_rate": 1.766116723599099e-06, "loss": 3.0687, "step": 490 }, { "epoch": 0.018169925139908425, "grad_norm": 45.44938278198242, "learning_rate": 1.802456573878916e-06, "loss": 3.112, "step": 500 }, { "epoch": 0.018533323642706594, "grad_norm": 7.32182502746582, "learning_rate": 1.8387964241587327e-06, "loss": 2.951, "step": 510 }, { "epoch": 0.018896722145504762, "grad_norm": 3.3864173889160156, "learning_rate": 1.8751362744385495e-06, "loss": 2.8879, "step": 520 }, { "epoch": 0.019260120648302927, "grad_norm": 5.429958343505859, "learning_rate": 1.911476124718366e-06, "loss": 2.7393, "step": 530 }, { "epoch": 0.019623519151101096, "grad_norm": 5.3577985763549805, "learning_rate": 1.947815974998183e-06, "loss": 2.4813, "step": 540 }, { "epoch": 0.019986917653899264, "grad_norm": 13.970659255981445, "learning_rate": 1.9841558252779998e-06, "loss": 2.3787, "step": 550 }, { "epoch": 0.020350316156697433, "grad_norm": 5.2666754722595215, "learning_rate": 2.0204956755578166e-06, "loss": 2.207, "step": 560 }, { "epoch": 0.0207137146594956, "grad_norm": 4.184991359710693, "learning_rate": 2.0568355258376334e-06, "loss": 2.0383, "step": 570 }, { "epoch": 0.02107711316229377, "grad_norm": 6.312343597412109, "learning_rate": 2.09317537611745e-06, "loss": 1.8416, "step": 580 }, { "epoch": 0.02144051166509194, "grad_norm": 4.754147529602051, "learning_rate": 2.1295152263972674e-06, "loss": 1.6002, "step": 590 }, { "epoch": 0.021803910167890107, "grad_norm": 21.47913360595703, "learning_rate": 2.1658550766770842e-06, "loss": 1.6015, "step": 600 }, { "epoch": 0.021803910167890107, "eval_loss": 1.5154471397399902, "eval_runtime": 180.9184, "eval_samples_per_second": 40.98, "eval_steps_per_second": 5.124, "eval_wer": 0.7997531177954872, "step": 600 }, { "epoch": 0.022167308670688276, "grad_norm": 4.2374348640441895, "learning_rate": 2.202194926956901e-06, "loss": 1.4842, "step": 610 }, { "epoch": 0.022530707173486444, "grad_norm": 4.392132759094238, "learning_rate": 2.238534777236718e-06, "loss": 1.3776, "step": 620 }, { "epoch": 0.022894105676284613, "grad_norm": 4.682064533233643, "learning_rate": 2.2748746275165347e-06, "loss": 1.3177, "step": 630 }, { "epoch": 0.02325750417908278, "grad_norm": 4.8396077156066895, "learning_rate": 2.3112144777963515e-06, "loss": 1.0737, "step": 640 }, { "epoch": 0.02362090268188095, "grad_norm": 33.27382278442383, "learning_rate": 2.3475543280761683e-06, "loss": 1.3046, "step": 650 }, { "epoch": 0.02398430118467912, "grad_norm": 5.410325050354004, "learning_rate": 2.383894178355985e-06, "loss": 1.1021, "step": 660 }, { "epoch": 0.024347699687477287, "grad_norm": 3.9523680210113525, "learning_rate": 2.420234028635802e-06, "loss": 1.0602, "step": 670 }, { "epoch": 0.024711098190275456, "grad_norm": 9.141073226928711, "learning_rate": 2.456573878915619e-06, "loss": 1.0631, "step": 680 }, { "epoch": 0.025074496693073624, "grad_norm": 5.3534626960754395, "learning_rate": 2.492913729195436e-06, "loss": 0.8968, "step": 690 }, { "epoch": 0.025437895195871793, "grad_norm": 32.30677795410156, "learning_rate": 2.5292535794752527e-06, "loss": 1.0439, "step": 700 }, { "epoch": 0.02580129369866996, "grad_norm": 4.310474872589111, "learning_rate": 2.5655934297550696e-06, "loss": 0.954, "step": 710 }, { "epoch": 0.02616469220146813, "grad_norm": 5.586440563201904, "learning_rate": 2.6019332800348864e-06, "loss": 1.0031, "step": 720 }, { "epoch": 0.0265280907042663, "grad_norm": 3.6927313804626465, "learning_rate": 2.638273130314703e-06, "loss": 0.7956, "step": 730 }, { "epoch": 0.026891489207064467, "grad_norm": 4.270529747009277, "learning_rate": 2.67461298059452e-06, "loss": 0.8874, "step": 740 }, { "epoch": 0.027254887709862636, "grad_norm": 23.553489685058594, "learning_rate": 2.710952830874337e-06, "loss": 0.8523, "step": 750 }, { "epoch": 0.027618286212660804, "grad_norm": 5.342041492462158, "learning_rate": 2.747292681154154e-06, "loss": 0.9029, "step": 760 }, { "epoch": 0.027981684715458973, "grad_norm": 3.3802621364593506, "learning_rate": 2.783632531433971e-06, "loss": 0.8378, "step": 770 }, { "epoch": 0.02834508321825714, "grad_norm": 6.378807067871094, "learning_rate": 2.8199723817137876e-06, "loss": 0.8085, "step": 780 }, { "epoch": 0.02870848172105531, "grad_norm": 4.007000923156738, "learning_rate": 2.8563122319936045e-06, "loss": 0.8218, "step": 790 }, { "epoch": 0.02907188022385348, "grad_norm": 68.16226196289062, "learning_rate": 2.8926520822734213e-06, "loss": 1.2055, "step": 800 }, { "epoch": 0.029435278726651647, "grad_norm": 6.70043420791626, "learning_rate": 2.928991932553238e-06, "loss": 0.7641, "step": 810 }, { "epoch": 0.029798677229449816, "grad_norm": 5.498161315917969, "learning_rate": 2.965331782833055e-06, "loss": 0.7739, "step": 820 }, { "epoch": 0.030162075732247984, "grad_norm": 9.515852928161621, "learning_rate": 3.0016716331128717e-06, "loss": 0.8293, "step": 830 }, { "epoch": 0.030525474235046153, "grad_norm": 13.3881196975708, "learning_rate": 3.0380114833926885e-06, "loss": 0.5597, "step": 840 }, { "epoch": 0.03088887273784432, "grad_norm": 13.670549392700195, "learning_rate": 3.0743513336725057e-06, "loss": 0.7658, "step": 850 }, { "epoch": 0.03125227124064249, "grad_norm": 3.58305287361145, "learning_rate": 3.1106911839523226e-06, "loss": 0.7036, "step": 860 }, { "epoch": 0.031615669743440655, "grad_norm": 4.119450569152832, "learning_rate": 3.147031034232139e-06, "loss": 0.6842, "step": 870 }, { "epoch": 0.03197906824623883, "grad_norm": 6.412299156188965, "learning_rate": 3.183370884511956e-06, "loss": 0.7148, "step": 880 }, { "epoch": 0.03234246674903699, "grad_norm": 8.700023651123047, "learning_rate": 3.2197107347917726e-06, "loss": 1.4861, "step": 890 }, { "epoch": 0.032705865251835164, "grad_norm": 18.78075408935547, "learning_rate": 3.25605058507159e-06, "loss": 0.7162, "step": 900 }, { "epoch": 0.03306926375463333, "grad_norm": 4.078335762023926, "learning_rate": 3.292390435351406e-06, "loss": 0.643, "step": 910 }, { "epoch": 0.0334326622574315, "grad_norm": 6.603452682495117, "learning_rate": 3.3287302856312234e-06, "loss": 0.6623, "step": 920 }, { "epoch": 0.03379606076022967, "grad_norm": 5.817732334136963, "learning_rate": 3.3650701359110402e-06, "loss": 0.6265, "step": 930 }, { "epoch": 0.03415945926302784, "grad_norm": 8.310086250305176, "learning_rate": 3.4014099861908575e-06, "loss": 0.5343, "step": 940 }, { "epoch": 0.034522857765826004, "grad_norm": NaN, "learning_rate": 3.4341158514426923e-06, "loss": 0.6999, "step": 950 }, { "epoch": 0.034886256268624176, "grad_norm": 4.416926860809326, "learning_rate": 3.4704557017225087e-06, "loss": 0.6688, "step": 960 }, { "epoch": 0.03524965477142234, "grad_norm": 3.2407495975494385, "learning_rate": 3.506795552002326e-06, "loss": 0.5808, "step": 970 }, { "epoch": 0.03561305327422051, "grad_norm": NaN, "learning_rate": 3.539501417254161e-06, "loss": 3.0266, "step": 980 }, { "epoch": 0.03597645177701868, "grad_norm": 8.086112022399902, "learning_rate": 3.575841267533978e-06, "loss": 0.5829, "step": 990 }, { "epoch": 0.03633985027981685, "grad_norm": 252.45077514648438, "learning_rate": 3.612181117813795e-06, "loss": 0.7371, "step": 1000 }, { "epoch": 0.036703248782615015, "grad_norm": 3.5969936847686768, "learning_rate": 3.648520968093612e-06, "loss": 0.6632, "step": 1010 }, { "epoch": 0.03706664728541319, "grad_norm": 3.0116841793060303, "learning_rate": 3.6848608183734285e-06, "loss": 0.537, "step": 1020 }, { "epoch": 0.03743004578821135, "grad_norm": 5.494657039642334, "learning_rate": 3.7212006686532457e-06, "loss": 0.5422, "step": 1030 }, { "epoch": 0.037793444291009524, "grad_norm": 21.526798248291016, "learning_rate": 3.757540518933062e-06, "loss": 0.5003, "step": 1040 }, { "epoch": 0.03815684279380769, "grad_norm": 80.90055084228516, "learning_rate": 3.7938803692128793e-06, "loss": 0.6566, "step": 1050 }, { "epoch": 0.038520241296605855, "grad_norm": 3.7678096294403076, "learning_rate": 3.830220219492696e-06, "loss": 0.5758, "step": 1060 }, { "epoch": 0.03888363979940403, "grad_norm": 4.526616096496582, "learning_rate": 3.866560069772512e-06, "loss": 0.5648, "step": 1070 }, { "epoch": 0.03924703830220219, "grad_norm": 4.571674346923828, "learning_rate": 3.90289992005233e-06, "loss": 0.5864, "step": 1080 }, { "epoch": 0.039610436805000364, "grad_norm": 5.295219421386719, "learning_rate": 3.939239770332146e-06, "loss": 0.4476, "step": 1090 }, { "epoch": 0.03997383530779853, "grad_norm": 16.631162643432617, "learning_rate": 3.975579620611963e-06, "loss": 0.6198, "step": 1100 }, { "epoch": 0.0403372338105967, "grad_norm": 4.685397624969482, "learning_rate": 4.01191947089178e-06, "loss": 0.7512, "step": 1110 }, { "epoch": 0.040700632313394866, "grad_norm": 3.333232879638672, "learning_rate": 4.048259321171597e-06, "loss": 0.5087, "step": 1120 }, { "epoch": 0.04106403081619304, "grad_norm": 5.501911640167236, "learning_rate": 4.084599171451414e-06, "loss": 0.5772, "step": 1130 }, { "epoch": 0.0414274293189912, "grad_norm": 8.066693305969238, "learning_rate": 4.120939021731231e-06, "loss": 0.4641, "step": 1140 }, { "epoch": 0.041790827821789375, "grad_norm": 13.463829040527344, "learning_rate": 4.1572788720110474e-06, "loss": 0.5192, "step": 1150 }, { "epoch": 0.04215422632458754, "grad_norm": 4.132773399353027, "learning_rate": 4.193618722290864e-06, "loss": 0.4696, "step": 1160 }, { "epoch": 0.04251762482738571, "grad_norm": 6.176777362823486, "learning_rate": 4.229958572570681e-06, "loss": 0.4851, "step": 1170 }, { "epoch": 0.04288102333018388, "grad_norm": 8.26610279083252, "learning_rate": 4.266298422850498e-06, "loss": 0.4967, "step": 1180 }, { "epoch": 0.04324442183298205, "grad_norm": 3.9725544452667236, "learning_rate": 4.302638273130315e-06, "loss": 0.431, "step": 1190 }, { "epoch": 0.043607820335780215, "grad_norm": 22.353294372558594, "learning_rate": 4.338978123410132e-06, "loss": 0.6523, "step": 1200 }, { "epoch": 0.043607820335780215, "eval_loss": 0.5945897102355957, "eval_runtime": 180.5168, "eval_samples_per_second": 41.071, "eval_steps_per_second": 5.135, "eval_wer": 0.3718390908925881, "step": 1200 }, { "epoch": 0.04397121883857839, "grad_norm": 3.7954189777374268, "learning_rate": 4.375317973689948e-06, "loss": 0.4511, "step": 1210 }, { "epoch": 0.04433461734137655, "grad_norm": 5.583435535430908, "learning_rate": 4.411657823969766e-06, "loss": 1.4019, "step": 1220 }, { "epoch": 0.044698015844174724, "grad_norm": 8.544243812561035, "learning_rate": 4.447997674249582e-06, "loss": 0.4467, "step": 1230 }, { "epoch": 0.04506141434697289, "grad_norm": 3.8716418743133545, "learning_rate": 4.4843375245293996e-06, "loss": 0.4347, "step": 1240 }, { "epoch": 0.04542481284977106, "grad_norm": 19.459606170654297, "learning_rate": 4.5206773748092155e-06, "loss": 0.616, "step": 1250 }, { "epoch": 0.045788211352569226, "grad_norm": 5.474793434143066, "learning_rate": 4.557017225089033e-06, "loss": 0.4689, "step": 1260 }, { "epoch": 0.0461516098553674, "grad_norm": 4.705495834350586, "learning_rate": 4.593357075368849e-06, "loss": 0.4623, "step": 1270 }, { "epoch": 0.04651500835816556, "grad_norm": 6.779942035675049, "learning_rate": 4.629696925648667e-06, "loss": 0.4418, "step": 1280 }, { "epoch": 0.046878406860963735, "grad_norm": 6.802936553955078, "learning_rate": 4.666036775928484e-06, "loss": 0.4429, "step": 1290 }, { "epoch": 0.0472418053637619, "grad_norm": 17.47754669189453, "learning_rate": 4.7023766262083004e-06, "loss": 0.596, "step": 1300 }, { "epoch": 0.04760520386656007, "grad_norm": 4.036036968231201, "learning_rate": 4.738716476488117e-06, "loss": 0.4362, "step": 1310 }, { "epoch": 0.04796860236935824, "grad_norm": 6.022701740264893, "learning_rate": 4.775056326767934e-06, "loss": 0.5092, "step": 1320 }, { "epoch": 0.04833200087215641, "grad_norm": 5.533923625946045, "learning_rate": 4.811396177047751e-06, "loss": 0.4358, "step": 1330 }, { "epoch": 0.048695399374954575, "grad_norm": 3.4037017822265625, "learning_rate": 4.847736027327568e-06, "loss": 0.3684, "step": 1340 }, { "epoch": 0.04905879787775275, "grad_norm": 13.625974655151367, "learning_rate": 4.8840758776073845e-06, "loss": 0.583, "step": 1350 }, { "epoch": 0.04942219638055091, "grad_norm": 3.597294330596924, "learning_rate": 4.920415727887201e-06, "loss": 0.4561, "step": 1360 }, { "epoch": 0.049785594883349084, "grad_norm": 2.8846936225891113, "learning_rate": 4.956755578167018e-06, "loss": 0.409, "step": 1370 }, { "epoch": 0.05014899338614725, "grad_norm": 5.500187397003174, "learning_rate": 4.993095428446836e-06, "loss": 0.4531, "step": 1380 }, { "epoch": 0.05051239188894542, "grad_norm": 3.1203413009643555, "learning_rate": 5.029435278726652e-06, "loss": 0.4004, "step": 1390 }, { "epoch": 0.050875790391743586, "grad_norm": 98.18115234375, "learning_rate": 5.0657751290064685e-06, "loss": 0.522, "step": 1400 }, { "epoch": 0.05123918889454175, "grad_norm": 3.901418924331665, "learning_rate": 5.102114979286285e-06, "loss": 0.4041, "step": 1410 }, { "epoch": 0.05160258739733992, "grad_norm": 4.045637130737305, "learning_rate": 5.138454829566102e-06, "loss": 0.4051, "step": 1420 }, { "epoch": 0.05196598590013809, "grad_norm": 6.835183143615723, "learning_rate": 5.174794679845919e-06, "loss": 0.4937, "step": 1430 }, { "epoch": 0.05232938440293626, "grad_norm": 7.708272457122803, "learning_rate": 5.211134530125736e-06, "loss": 0.3818, "step": 1440 }, { "epoch": 0.052692782905734425, "grad_norm": 24.2607364654541, "learning_rate": 5.247474380405553e-06, "loss": 0.5445, "step": 1450 }, { "epoch": 0.0530561814085326, "grad_norm": 3.3517005443573, "learning_rate": 5.283814230685369e-06, "loss": 0.4079, "step": 1460 }, { "epoch": 0.05341957991133076, "grad_norm": 12.727778434753418, "learning_rate": 5.320154080965187e-06, "loss": 0.4285, "step": 1470 }, { "epoch": 0.053782978414128935, "grad_norm": 4.984294891357422, "learning_rate": 5.356493931245003e-06, "loss": 0.5006, "step": 1480 }, { "epoch": 0.0541463769169271, "grad_norm": 3.3041558265686035, "learning_rate": 5.392833781524821e-06, "loss": 0.3729, "step": 1490 }, { "epoch": 0.05450977541972527, "grad_norm": 38.074546813964844, "learning_rate": 5.429173631804637e-06, "loss": 0.5401, "step": 1500 }, { "epoch": 0.05487317392252344, "grad_norm": 5.649720668792725, "learning_rate": 5.465513482084454e-06, "loss": 0.3879, "step": 1510 }, { "epoch": 0.05523657242532161, "grad_norm": 3.107583522796631, "learning_rate": 5.501853332364271e-06, "loss": 0.4144, "step": 1520 }, { "epoch": 0.055599970928119774, "grad_norm": 19.246564865112305, "learning_rate": 5.538193182644088e-06, "loss": 0.4314, "step": 1530 }, { "epoch": 0.055963369430917946, "grad_norm": 4.72367525100708, "learning_rate": 5.574533032923905e-06, "loss": 0.3576, "step": 1540 }, { "epoch": 0.05632676793371611, "grad_norm": 25.88886260986328, "learning_rate": 5.6108728832037215e-06, "loss": 0.5385, "step": 1550 }, { "epoch": 0.05669016643651428, "grad_norm": 3.1524956226348877, "learning_rate": 5.647212733483538e-06, "loss": 0.4075, "step": 1560 }, { "epoch": 0.05705356493931245, "grad_norm": 3.883281707763672, "learning_rate": 5.683552583763355e-06, "loss": 0.4242, "step": 1570 }, { "epoch": 0.05741696344211062, "grad_norm": 16.935935974121094, "learning_rate": 5.719892434043172e-06, "loss": 0.6194, "step": 1580 }, { "epoch": 0.057780361944908785, "grad_norm": 4.23909330368042, "learning_rate": 5.756232284322989e-06, "loss": 0.4206, "step": 1590 }, { "epoch": 0.05814376044770696, "grad_norm": 16.6039981842041, "learning_rate": 5.7925721346028056e-06, "loss": 0.4854, "step": 1600 }, { "epoch": 0.05850715895050512, "grad_norm": 2.5220890045166016, "learning_rate": 5.828911984882622e-06, "loss": 0.4186, "step": 1610 }, { "epoch": 0.058870557453303295, "grad_norm": 3.075101613998413, "learning_rate": 5.865251835162439e-06, "loss": 0.8877, "step": 1620 }, { "epoch": 0.05923395595610146, "grad_norm": 5.511383056640625, "learning_rate": 5.901591685442257e-06, "loss": 0.4219, "step": 1630 }, { "epoch": 0.05959735445889963, "grad_norm": 2.9449989795684814, "learning_rate": 5.937931535722073e-06, "loss": 0.2992, "step": 1640 }, { "epoch": 0.0599607529616978, "grad_norm": 31.823612213134766, "learning_rate": 5.9742713860018905e-06, "loss": 0.5939, "step": 1650 }, { "epoch": 0.06032415146449597, "grad_norm": 4.240995407104492, "learning_rate": 6.010611236281706e-06, "loss": 0.4176, "step": 1660 }, { "epoch": 0.060687549967294134, "grad_norm": 2.6084980964660645, "learning_rate": 6.046951086561524e-06, "loss": 0.3542, "step": 1670 }, { "epoch": 0.061050948470092306, "grad_norm": 8.318774223327637, "learning_rate": 6.08329093684134e-06, "loss": 0.3968, "step": 1680 }, { "epoch": 0.06141434697289047, "grad_norm": 5.18604850769043, "learning_rate": 6.119630787121158e-06, "loss": 0.3879, "step": 1690 }, { "epoch": 0.06177774547568864, "grad_norm": 51.732086181640625, "learning_rate": 6.1559706374009745e-06, "loss": 0.5025, "step": 1700 }, { "epoch": 0.06214114397848681, "grad_norm": 2.5876500606536865, "learning_rate": 6.192310487680791e-06, "loss": 0.3558, "step": 1710 }, { "epoch": 0.06250454248128498, "grad_norm": 5.071794033050537, "learning_rate": 6.228650337960608e-06, "loss": 0.3534, "step": 1720 }, { "epoch": 0.06286794098408315, "grad_norm": 11.539891242980957, "learning_rate": 6.264990188240424e-06, "loss": 0.4628, "step": 1730 }, { "epoch": 0.06323133948688131, "grad_norm": 3.275383710861206, "learning_rate": 6.301330038520243e-06, "loss": 0.3368, "step": 1740 }, { "epoch": 0.06359473798967948, "grad_norm": 41.4942741394043, "learning_rate": 6.3376698888000586e-06, "loss": 0.53, "step": 1750 }, { "epoch": 0.06395813649247765, "grad_norm": 3.0071399211883545, "learning_rate": 6.374009739079875e-06, "loss": 0.3623, "step": 1760 }, { "epoch": 0.06432153499527582, "grad_norm": 3.385955333709717, "learning_rate": 6.410349589359692e-06, "loss": 0.3476, "step": 1770 }, { "epoch": 0.06468493349807398, "grad_norm": 3.872527599334717, "learning_rate": 6.446689439639508e-06, "loss": 0.3168, "step": 1780 }, { "epoch": 0.06504833200087215, "grad_norm": 4.668768882751465, "learning_rate": 6.483029289919327e-06, "loss": 0.3813, "step": 1790 }, { "epoch": 0.06541173050367033, "grad_norm": 69.33656311035156, "learning_rate": 6.519369140199143e-06, "loss": 0.4557, "step": 1800 }, { "epoch": 0.06541173050367033, "eval_loss": 0.5579342246055603, "eval_runtime": 180.31, "eval_samples_per_second": 41.118, "eval_steps_per_second": 5.141, "eval_wer": 0.34055221740156477, "step": 1800 }, { "epoch": 0.0657751290064685, "grad_norm": 4.2848381996154785, "learning_rate": 6.555708990478959e-06, "loss": 0.5481, "step": 1810 }, { "epoch": 0.06613852750926666, "grad_norm": 11.31700325012207, "learning_rate": 6.592048840758775e-06, "loss": 0.3479, "step": 1820 }, { "epoch": 0.06650192601206482, "grad_norm": 6.088991165161133, "learning_rate": 6.628388691038594e-06, "loss": 0.3994, "step": 1830 }, { "epoch": 0.066865324514863, "grad_norm": 4.342681407928467, "learning_rate": 6.66472854131841e-06, "loss": 0.2953, "step": 1840 }, { "epoch": 0.06722872301766117, "grad_norm": 75.97467041015625, "learning_rate": 6.701068391598227e-06, "loss": 0.4807, "step": 1850 }, { "epoch": 0.06759212152045933, "grad_norm": 3.8739049434661865, "learning_rate": 6.7374082418780435e-06, "loss": 0.3963, "step": 1860 }, { "epoch": 0.0679555200232575, "grad_norm": 2.6209168434143066, "learning_rate": 6.773748092157861e-06, "loss": 0.422, "step": 1870 }, { "epoch": 0.06831891852605568, "grad_norm": 17.530773162841797, "learning_rate": 6.810087942437678e-06, "loss": 0.3939, "step": 1880 }, { "epoch": 0.06868231702885384, "grad_norm": 3.475748300552368, "learning_rate": 6.846427792717494e-06, "loss": 0.2996, "step": 1890 }, { "epoch": 0.06904571553165201, "grad_norm": 20.979995727539062, "learning_rate": 6.882767642997311e-06, "loss": 0.4528, "step": 1900 }, { "epoch": 0.06940911403445017, "grad_norm": 3.8432774543762207, "learning_rate": 6.919107493277128e-06, "loss": 0.2959, "step": 1910 }, { "epoch": 0.06977251253724835, "grad_norm": 7.830467700958252, "learning_rate": 6.955447343556945e-06, "loss": 0.3378, "step": 1920 }, { "epoch": 0.07013591104004652, "grad_norm": 15.633039474487305, "learning_rate": 6.991787193836762e-06, "loss": 0.4, "step": 1930 }, { "epoch": 0.07049930954284468, "grad_norm": 13.628314971923828, "learning_rate": 7.028127044116578e-06, "loss": 0.3255, "step": 1940 }, { "epoch": 0.07086270804564285, "grad_norm": 33.001773834228516, "learning_rate": 7.064466894396396e-06, "loss": 0.4367, "step": 1950 }, { "epoch": 0.07122610654844103, "grad_norm": 3.5115041732788086, "learning_rate": 7.100806744676212e-06, "loss": 0.3279, "step": 1960 }, { "epoch": 0.07158950505123919, "grad_norm": 3.0497541427612305, "learning_rate": 7.137146594956029e-06, "loss": 2.8797, "step": 1970 }, { "epoch": 0.07195290355403736, "grad_norm": 6.17769718170166, "learning_rate": 7.173486445235845e-06, "loss": 0.3534, "step": 1980 }, { "epoch": 0.07231630205683552, "grad_norm": 5.4114789962768555, "learning_rate": 7.209826295515664e-06, "loss": 0.3309, "step": 1990 }, { "epoch": 0.0726797005596337, "grad_norm": 11.600439071655273, "learning_rate": 7.24616614579548e-06, "loss": 0.4382, "step": 2000 }, { "epoch": 0.07304309906243187, "grad_norm": 3.4476027488708496, "learning_rate": 7.2825059960752965e-06, "loss": 0.3487, "step": 2010 }, { "epoch": 0.07340649756523003, "grad_norm": 5.642564296722412, "learning_rate": 7.318845846355113e-06, "loss": 0.3513, "step": 2020 }, { "epoch": 0.0737698960680282, "grad_norm": 7.132052898406982, "learning_rate": 7.355185696634931e-06, "loss": 0.4564, "step": 2030 }, { "epoch": 0.07413329457082637, "grad_norm": 6.583246231079102, "learning_rate": 7.391525546914748e-06, "loss": 0.3376, "step": 2040 }, { "epoch": 0.07449669307362454, "grad_norm": 23.98805809020996, "learning_rate": 7.427865397194564e-06, "loss": 0.4217, "step": 2050 }, { "epoch": 0.0748600915764227, "grad_norm": 3.9135584831237793, "learning_rate": 7.4642052474743805e-06, "loss": 0.3324, "step": 2060 }, { "epoch": 0.07522349007922087, "grad_norm": 3.4022698402404785, "learning_rate": 7.500545097754198e-06, "loss": 0.3391, "step": 2070 }, { "epoch": 0.07558688858201905, "grad_norm": 8.37547779083252, "learning_rate": 7.536884948034015e-06, "loss": 0.3119, "step": 2080 }, { "epoch": 0.07595028708481721, "grad_norm": 6.2167558670043945, "learning_rate": 7.573224798313831e-06, "loss": 0.3247, "step": 2090 }, { "epoch": 0.07631368558761538, "grad_norm": 81.76036834716797, "learning_rate": 7.609564648593648e-06, "loss": 0.4281, "step": 2100 }, { "epoch": 0.07667708409041354, "grad_norm": 2.8961973190307617, "learning_rate": 7.645904498873465e-06, "loss": 0.3368, "step": 2110 }, { "epoch": 0.07704048259321171, "grad_norm": 4.699477195739746, "learning_rate": 7.682244349153282e-06, "loss": 0.3403, "step": 2120 }, { "epoch": 0.07740388109600989, "grad_norm": 4.429138660430908, "learning_rate": 7.718584199433098e-06, "loss": 0.3182, "step": 2130 }, { "epoch": 0.07776727959880805, "grad_norm": 2.7269580364227295, "learning_rate": 7.754924049712916e-06, "loss": 0.2828, "step": 2140 }, { "epoch": 0.07813067810160622, "grad_norm": 15.126232147216797, "learning_rate": 7.791263899992732e-06, "loss": 0.4606, "step": 2150 }, { "epoch": 0.07849407660440438, "grad_norm": 10.14072322845459, "learning_rate": 7.82760375027255e-06, "loss": 0.3451, "step": 2160 }, { "epoch": 0.07885747510720256, "grad_norm": 4.95914363861084, "learning_rate": 7.863943600552365e-06, "loss": 0.3612, "step": 2170 }, { "epoch": 0.07922087361000073, "grad_norm": 4.115192413330078, "learning_rate": 7.900283450832183e-06, "loss": 0.3222, "step": 2180 }, { "epoch": 0.07958427211279889, "grad_norm": 5.405594825744629, "learning_rate": 7.936623301111999e-06, "loss": 0.3474, "step": 2190 }, { "epoch": 0.07994767061559706, "grad_norm": 23.328718185424805, "learning_rate": 7.972963151391817e-06, "loss": 0.4797, "step": 2200 }, { "epoch": 0.08031106911839524, "grad_norm": 3.5595099925994873, "learning_rate": 8.009303001671634e-06, "loss": 0.3305, "step": 2210 }, { "epoch": 0.0806744676211934, "grad_norm": 3.048445463180542, "learning_rate": 8.04564285195145e-06, "loss": 0.318, "step": 2220 }, { "epoch": 0.08103786612399157, "grad_norm": 5.857702732086182, "learning_rate": 8.081982702231266e-06, "loss": 0.3497, "step": 2230 }, { "epoch": 0.08140126462678973, "grad_norm": 3.0092968940734863, "learning_rate": 8.118322552511084e-06, "loss": 0.2995, "step": 2240 }, { "epoch": 0.08176466312958791, "grad_norm": 9.337843894958496, "learning_rate": 8.154662402790902e-06, "loss": 0.4517, "step": 2250 }, { "epoch": 0.08212806163238608, "grad_norm": 3.136950969696045, "learning_rate": 8.191002253070718e-06, "loss": 0.2927, "step": 2260 }, { "epoch": 0.08249146013518424, "grad_norm": 4.228198051452637, "learning_rate": 8.227342103350534e-06, "loss": 1.2185, "step": 2270 }, { "epoch": 0.0828548586379824, "grad_norm": 7.404679298400879, "learning_rate": 8.263681953630351e-06, "loss": 0.3448, "step": 2280 }, { "epoch": 0.08321825714078059, "grad_norm": 7.873497009277344, "learning_rate": 8.300021803910169e-06, "loss": 0.2965, "step": 2290 }, { "epoch": 0.08358165564357875, "grad_norm": 12.266081809997559, "learning_rate": 8.336361654189985e-06, "loss": 0.4631, "step": 2300 }, { "epoch": 0.08394505414637692, "grad_norm": 3.3576557636260986, "learning_rate": 8.3727015044698e-06, "loss": 0.3339, "step": 2310 }, { "epoch": 0.08430845264917508, "grad_norm": 3.0854902267456055, "learning_rate": 8.40904135474962e-06, "loss": 0.3448, "step": 2320 }, { "epoch": 0.08467185115197326, "grad_norm": 6.1308746337890625, "learning_rate": 8.445381205029436e-06, "loss": 0.386, "step": 2330 }, { "epoch": 0.08503524965477142, "grad_norm": 4.458275318145752, "learning_rate": 8.481721055309252e-06, "loss": 0.2916, "step": 2340 }, { "epoch": 0.08539864815756959, "grad_norm": 25.443647384643555, "learning_rate": 8.51806090558907e-06, "loss": 0.4232, "step": 2350 }, { "epoch": 0.08576204666036776, "grad_norm": 324.4353332519531, "learning_rate": 8.554400755868887e-06, "loss": 2.4995, "step": 2360 }, { "epoch": 0.08612544516316593, "grad_norm": 17.593692779541016, "learning_rate": 8.590740606148703e-06, "loss": 0.2952, "step": 2370 }, { "epoch": 0.0864888436659641, "grad_norm": 3.4646732807159424, "learning_rate": 8.62708045642852e-06, "loss": 0.2961, "step": 2380 }, { "epoch": 0.08685224216876226, "grad_norm": 2.9895999431610107, "learning_rate": 8.663420306708337e-06, "loss": 0.2852, "step": 2390 }, { "epoch": 0.08721564067156043, "grad_norm": 24.221176147460938, "learning_rate": 8.699760156988155e-06, "loss": 0.4343, "step": 2400 }, { "epoch": 0.08721564067156043, "eval_loss": 0.47036415338516235, "eval_runtime": 180.1154, "eval_samples_per_second": 41.162, "eval_steps_per_second": 5.147, "eval_wer": 0.28054713453264835, "step": 2400 }, { "epoch": 0.08757903917435861, "grad_norm": 161.69967651367188, "learning_rate": 8.73610000726797e-06, "loss": 1.4598, "step": 2410 }, { "epoch": 0.08794243767715677, "grad_norm": 10.37559700012207, "learning_rate": 8.772439857547786e-06, "loss": 0.3042, "step": 2420 }, { "epoch": 0.08830583617995494, "grad_norm": 5.90106725692749, "learning_rate": 8.808779707827604e-06, "loss": 0.3385, "step": 2430 }, { "epoch": 0.0886692346827531, "grad_norm": 9.207955360412598, "learning_rate": 8.845119558107422e-06, "loss": 0.2963, "step": 2440 }, { "epoch": 0.08903263318555127, "grad_norm": 22.280956268310547, "learning_rate": 8.881459408387238e-06, "loss": 0.4505, "step": 2450 }, { "epoch": 0.08939603168834945, "grad_norm": 3.090710401535034, "learning_rate": 8.917799258667055e-06, "loss": 0.3114, "step": 2460 }, { "epoch": 0.08975943019114761, "grad_norm": 4.144134044647217, "learning_rate": 8.954139108946871e-06, "loss": 0.2855, "step": 2470 }, { "epoch": 0.09012282869394578, "grad_norm": 4.343112468719482, "learning_rate": 8.990478959226687e-06, "loss": 0.2906, "step": 2480 }, { "epoch": 0.09048622719674394, "grad_norm": 2.6925292015075684, "learning_rate": 9.026818809506505e-06, "loss": 0.284, "step": 2490 }, { "epoch": 0.09084962569954212, "grad_norm": 29.639341354370117, "learning_rate": 9.063158659786323e-06, "loss": 0.3411, "step": 2500 }, { "epoch": 0.09121302420234029, "grad_norm": 4.425374984741211, "learning_rate": 9.099498510066139e-06, "loss": 0.3041, "step": 2510 }, { "epoch": 0.09157642270513845, "grad_norm": 5.6643195152282715, "learning_rate": 9.135838360345955e-06, "loss": 0.3123, "step": 2520 }, { "epoch": 0.09193982120793662, "grad_norm": 3.9098479747772217, "learning_rate": 9.172178210625772e-06, "loss": 0.3664, "step": 2530 }, { "epoch": 0.0923032197107348, "grad_norm": 3.133389949798584, "learning_rate": 9.20851806090559e-06, "loss": 0.2708, "step": 2540 }, { "epoch": 0.09266661821353296, "grad_norm": 43.00468063354492, "learning_rate": 9.244857911185406e-06, "loss": 0.4215, "step": 2550 }, { "epoch": 0.09303001671633113, "grad_norm": 3.1411876678466797, "learning_rate": 9.281197761465222e-06, "loss": 0.2983, "step": 2560 }, { "epoch": 0.09339341521912929, "grad_norm": 3.263828754425049, "learning_rate": 9.317537611745041e-06, "loss": 0.3137, "step": 2570 }, { "epoch": 0.09375681372192747, "grad_norm": 3.618751049041748, "learning_rate": 9.353877462024857e-06, "loss": 0.3279, "step": 2580 }, { "epoch": 0.09412021222472564, "grad_norm": 3.6551568508148193, "learning_rate": 9.390217312304673e-06, "loss": 0.2409, "step": 2590 }, { "epoch": 0.0944836107275238, "grad_norm": 8.680901527404785, "learning_rate": 9.42655716258449e-06, "loss": 0.373, "step": 2600 }, { "epoch": 0.09484700923032197, "grad_norm": 4.761026382446289, "learning_rate": 9.462897012864308e-06, "loss": 0.2777, "step": 2610 }, { "epoch": 0.09521040773312014, "grad_norm": 3.142723321914673, "learning_rate": 9.499236863144124e-06, "loss": 0.2882, "step": 2620 }, { "epoch": 0.09557380623591831, "grad_norm": 2.969968795776367, "learning_rate": 9.53557671342394e-06, "loss": 0.3086, "step": 2630 }, { "epoch": 0.09593720473871648, "grad_norm": 3.754549264907837, "learning_rate": 9.571916563703758e-06, "loss": 0.259, "step": 2640 }, { "epoch": 0.09630060324151464, "grad_norm": 23.7288761138916, "learning_rate": 9.608256413983576e-06, "loss": 0.4284, "step": 2650 }, { "epoch": 0.09666400174431282, "grad_norm": 2.7727372646331787, "learning_rate": 9.644596264263392e-06, "loss": 0.2602, "step": 2660 }, { "epoch": 0.09702740024711098, "grad_norm": 14.707064628601074, "learning_rate": 9.680936114543208e-06, "loss": 0.3059, "step": 2670 }, { "epoch": 0.09739079874990915, "grad_norm": 3.8396642208099365, "learning_rate": 9.717275964823025e-06, "loss": 0.2811, "step": 2680 }, { "epoch": 0.09775419725270731, "grad_norm": 2.9460713863372803, "learning_rate": 9.753615815102843e-06, "loss": 0.2686, "step": 2690 }, { "epoch": 0.0981175957555055, "grad_norm": 20.107336044311523, "learning_rate": 9.789955665382659e-06, "loss": 0.4306, "step": 2700 }, { "epoch": 0.09848099425830366, "grad_norm": 3.1286280155181885, "learning_rate": 9.826295515662477e-06, "loss": 0.3059, "step": 2710 }, { "epoch": 0.09884439276110182, "grad_norm": 6.160215854644775, "learning_rate": 9.862635365942292e-06, "loss": 0.3046, "step": 2720 }, { "epoch": 0.09920779126389999, "grad_norm": 6.1921186447143555, "learning_rate": 9.89897521622211e-06, "loss": 0.285, "step": 2730 }, { "epoch": 0.09957118976669817, "grad_norm": 13.759759902954102, "learning_rate": 9.935315066501926e-06, "loss": 0.2888, "step": 2740 }, { "epoch": 0.09993458826949633, "grad_norm": 13.92764949798584, "learning_rate": 9.971654916781744e-06, "loss": 0.4266, "step": 2750 }, { "epoch": 0.1002979867722945, "grad_norm": 3.3999857902526855, "learning_rate": 1.000799476706156e-05, "loss": 0.2858, "step": 2760 }, { "epoch": 0.10066138527509266, "grad_norm": 4.103928089141846, "learning_rate": 1.0044334617341377e-05, "loss": 0.262, "step": 2770 }, { "epoch": 0.10102478377789084, "grad_norm": 6.15985107421875, "learning_rate": 1.0080674467621195e-05, "loss": 0.2866, "step": 2780 }, { "epoch": 0.10138818228068901, "grad_norm": 4.904097557067871, "learning_rate": 1.0117014317901011e-05, "loss": 0.5057, "step": 2790 }, { "epoch": 0.10175158078348717, "grad_norm": 15.2875337600708, "learning_rate": 1.0153354168180827e-05, "loss": 0.4345, "step": 2800 }, { "epoch": 0.10211497928628534, "grad_norm": 2.4697763919830322, "learning_rate": 1.0189694018460643e-05, "loss": 0.2693, "step": 2810 }, { "epoch": 0.1024783777890835, "grad_norm": 5.04618501663208, "learning_rate": 1.0226033868740462e-05, "loss": 0.2868, "step": 2820 }, { "epoch": 0.10284177629188168, "grad_norm": 5.851120948791504, "learning_rate": 1.0262373719020278e-05, "loss": 0.3425, "step": 2830 }, { "epoch": 0.10320517479467985, "grad_norm": 2.1007258892059326, "learning_rate": 1.0298713569300094e-05, "loss": 0.2394, "step": 2840 }, { "epoch": 0.10356857329747801, "grad_norm": 23.411701202392578, "learning_rate": 1.0335053419579912e-05, "loss": 0.4125, "step": 2850 }, { "epoch": 0.10393197180027618, "grad_norm": 4.178852558135986, "learning_rate": 1.037139326985973e-05, "loss": 0.2951, "step": 2860 }, { "epoch": 0.10429537030307436, "grad_norm": 1.7873708009719849, "learning_rate": 1.0407733120139545e-05, "loss": 0.3272, "step": 2870 }, { "epoch": 0.10465876880587252, "grad_norm": 7.603367328643799, "learning_rate": 1.0444072970419361e-05, "loss": 0.2779, "step": 2880 }, { "epoch": 0.10502216730867069, "grad_norm": 3.468761444091797, "learning_rate": 1.0480412820699179e-05, "loss": 0.3007, "step": 2890 }, { "epoch": 0.10538556581146885, "grad_norm": 16.35407829284668, "learning_rate": 1.0516752670978997e-05, "loss": 0.3918, "step": 2900 }, { "epoch": 0.10574896431426703, "grad_norm": 3.4226725101470947, "learning_rate": 1.0553092521258813e-05, "loss": 3.7156, "step": 2910 }, { "epoch": 0.1061123628170652, "grad_norm": 9.006295204162598, "learning_rate": 1.058943237153863e-05, "loss": 0.4075, "step": 2920 }, { "epoch": 0.10647576131986336, "grad_norm": 4.993385314941406, "learning_rate": 1.0625772221818446e-05, "loss": 0.3588, "step": 2930 }, { "epoch": 0.10683915982266153, "grad_norm": 3.7684736251831055, "learning_rate": 1.0662112072098264e-05, "loss": 0.2429, "step": 2940 }, { "epoch": 0.1072025583254597, "grad_norm": 40.301170349121094, "learning_rate": 1.069845192237808e-05, "loss": 0.4739, "step": 2950 }, { "epoch": 0.10756595682825787, "grad_norm": 3.772693157196045, "learning_rate": 1.0734791772657898e-05, "loss": 0.3284, "step": 2960 }, { "epoch": 0.10792935533105603, "grad_norm": 3.0183212757110596, "learning_rate": 1.0771131622937714e-05, "loss": 0.38, "step": 2970 }, { "epoch": 0.1082927538338542, "grad_norm": 6.61776876449585, "learning_rate": 1.0807471473217531e-05, "loss": 0.2793, "step": 2980 }, { "epoch": 0.10865615233665238, "grad_norm": 6.112472057342529, "learning_rate": 1.0843811323497347e-05, "loss": 0.2447, "step": 2990 }, { "epoch": 0.10901955083945054, "grad_norm": 10.800559997558594, "learning_rate": 1.0880151173777165e-05, "loss": 0.373, "step": 3000 }, { "epoch": 0.10901955083945054, "eval_loss": 0.4652940630912781, "eval_runtime": 180.0765, "eval_samples_per_second": 41.171, "eval_steps_per_second": 5.148, "eval_wer": 0.27681667181004593, "step": 3000 }, { "epoch": 0.10938294934224871, "grad_norm": 7.778831958770752, "learning_rate": 1.091649102405698e-05, "loss": 0.29, "step": 3010 }, { "epoch": 0.10974634784504687, "grad_norm": 2.855592966079712, "learning_rate": 1.0952830874336798e-05, "loss": 0.2411, "step": 3020 }, { "epoch": 0.11010974634784505, "grad_norm": 4.229335784912109, "learning_rate": 1.0989170724616616e-05, "loss": 0.3247, "step": 3030 }, { "epoch": 0.11047314485064322, "grad_norm": 3.8145949840545654, "learning_rate": 1.1025510574896432e-05, "loss": 0.2242, "step": 3040 }, { "epoch": 0.11083654335344138, "grad_norm": 22.571304321289062, "learning_rate": 1.1061850425176248e-05, "loss": 0.3959, "step": 3050 }, { "epoch": 0.11119994185623955, "grad_norm": 2.4706461429595947, "learning_rate": 1.1098190275456066e-05, "loss": 0.2466, "step": 3060 }, { "epoch": 0.11156334035903773, "grad_norm": 4.497069358825684, "learning_rate": 1.1134530125735883e-05, "loss": 2.1968, "step": 3070 }, { "epoch": 0.11192673886183589, "grad_norm": 5.060062885284424, "learning_rate": 1.11708699760157e-05, "loss": 0.2921, "step": 3080 }, { "epoch": 0.11229013736463406, "grad_norm": 2.7882325649261475, "learning_rate": 1.1207209826295515e-05, "loss": 0.2534, "step": 3090 }, { "epoch": 0.11265353586743222, "grad_norm": 9.96241569519043, "learning_rate": 1.1243549676575333e-05, "loss": 0.421, "step": 3100 }, { "epoch": 0.1130169343702304, "grad_norm": 31.262916564941406, "learning_rate": 1.127988952685515e-05, "loss": 0.4048, "step": 3110 }, { "epoch": 0.11338033287302857, "grad_norm": 3.472343921661377, "learning_rate": 1.1316229377134967e-05, "loss": 0.2798, "step": 3120 }, { "epoch": 0.11374373137582673, "grad_norm": 4.074085235595703, "learning_rate": 1.1352569227414783e-05, "loss": 0.299, "step": 3130 }, { "epoch": 0.1141071298786249, "grad_norm": 2.879512310028076, "learning_rate": 1.1388909077694602e-05, "loss": 0.2137, "step": 3140 }, { "epoch": 0.11447052838142306, "grad_norm": 125.17889404296875, "learning_rate": 1.1425248927974418e-05, "loss": 0.5418, "step": 3150 }, { "epoch": 0.11483392688422124, "grad_norm": 4.171487808227539, "learning_rate": 1.1461588778254234e-05, "loss": 0.2685, "step": 3160 }, { "epoch": 0.1151973253870194, "grad_norm": 2.1496529579162598, "learning_rate": 1.1497928628534051e-05, "loss": 0.2421, "step": 3170 }, { "epoch": 0.11556072388981757, "grad_norm": 2.6266047954559326, "learning_rate": 1.1534268478813867e-05, "loss": 0.3288, "step": 3180 }, { "epoch": 0.11592412239261574, "grad_norm": 3.7677230834960938, "learning_rate": 1.1570608329093685e-05, "loss": 0.3093, "step": 3190 }, { "epoch": 0.11628752089541392, "grad_norm": 9.4945707321167, "learning_rate": 1.1606948179373501e-05, "loss": 0.3066, "step": 3200 }, { "epoch": 0.11665091939821208, "grad_norm": 2.5509915351867676, "learning_rate": 1.1643288029653319e-05, "loss": 0.2615, "step": 3210 }, { "epoch": 0.11701431790101025, "grad_norm": 3.066624641418457, "learning_rate": 1.1679627879933135e-05, "loss": 0.3224, "step": 3220 }, { "epoch": 0.11737771640380841, "grad_norm": 6.494440078735352, "learning_rate": 1.1715967730212952e-05, "loss": 0.3017, "step": 3230 }, { "epoch": 0.11774111490660659, "grad_norm": 3.4675605297088623, "learning_rate": 1.1752307580492768e-05, "loss": 0.2152, "step": 3240 }, { "epoch": 0.11810451340940475, "grad_norm": 15.5110445022583, "learning_rate": 1.1788647430772586e-05, "loss": 0.349, "step": 3250 }, { "epoch": 0.11846791191220292, "grad_norm": 1.972530484199524, "learning_rate": 1.1824987281052402e-05, "loss": 0.2728, "step": 3260 }, { "epoch": 0.11883131041500108, "grad_norm": 4.018677711486816, "learning_rate": 1.186132713133222e-05, "loss": 0.254, "step": 3270 }, { "epoch": 0.11919470891779926, "grad_norm": 4.95416784286499, "learning_rate": 1.1897666981612037e-05, "loss": 0.2465, "step": 3280 }, { "epoch": 0.11955810742059743, "grad_norm": 3.165599822998047, "learning_rate": 1.1934006831891853e-05, "loss": 0.2537, "step": 3290 }, { "epoch": 0.1199215059233956, "grad_norm": 8.508636474609375, "learning_rate": 1.1970346682171669e-05, "loss": 0.3655, "step": 3300 }, { "epoch": 0.12028490442619376, "grad_norm": 2.3892879486083984, "learning_rate": 1.2006686532451487e-05, "loss": 0.252, "step": 3310 }, { "epoch": 0.12064830292899194, "grad_norm": 3.591564178466797, "learning_rate": 1.2043026382731304e-05, "loss": 0.2401, "step": 3320 }, { "epoch": 0.1210117014317901, "grad_norm": 3.891261577606201, "learning_rate": 1.207936623301112e-05, "loss": 0.2909, "step": 3330 }, { "epoch": 0.12137509993458827, "grad_norm": 4.691511154174805, "learning_rate": 1.2115706083290936e-05, "loss": 0.2304, "step": 3340 }, { "epoch": 0.12173849843738643, "grad_norm": 18.415170669555664, "learning_rate": 1.2152045933570754e-05, "loss": 0.35, "step": 3350 }, { "epoch": 0.12210189694018461, "grad_norm": 3.9105615615844727, "learning_rate": 1.2188385783850572e-05, "loss": 0.3112, "step": 3360 }, { "epoch": 0.12246529544298278, "grad_norm": 3.215313196182251, "learning_rate": 1.2224725634130388e-05, "loss": 0.2492, "step": 3370 }, { "epoch": 0.12282869394578094, "grad_norm": 9.30749225616455, "learning_rate": 1.2261065484410204e-05, "loss": 0.2696, "step": 3380 }, { "epoch": 0.12319209244857911, "grad_norm": 4.9797682762146, "learning_rate": 1.2297405334690023e-05, "loss": 0.2197, "step": 3390 }, { "epoch": 0.12355549095137729, "grad_norm": 19.632797241210938, "learning_rate": 1.2333745184969839e-05, "loss": 0.3411, "step": 3400 }, { "epoch": 0.12391888945417545, "grad_norm": 4.509830474853516, "learning_rate": 1.2370085035249655e-05, "loss": 0.2394, "step": 3410 }, { "epoch": 0.12428228795697362, "grad_norm": 2.253514051437378, "learning_rate": 1.2406424885529473e-05, "loss": 0.266, "step": 3420 }, { "epoch": 0.12464568645977178, "grad_norm": 3.123828172683716, "learning_rate": 1.244276473580929e-05, "loss": 0.2675, "step": 3430 }, { "epoch": 0.12500908496256996, "grad_norm": 34.37680435180664, "learning_rate": 1.2479104586089106e-05, "loss": 0.279, "step": 3440 }, { "epoch": 0.1253724834653681, "grad_norm": 10.051690101623535, "learning_rate": 1.2515444436368922e-05, "loss": 0.3364, "step": 3450 }, { "epoch": 0.1257358819681663, "grad_norm": 2.1765711307525635, "learning_rate": 1.255178428664874e-05, "loss": 0.2288, "step": 3460 }, { "epoch": 0.12609928047096447, "grad_norm": 2.4910778999328613, "learning_rate": 1.2588124136928556e-05, "loss": 0.2866, "step": 3470 }, { "epoch": 0.12646267897376262, "grad_norm": 7.379613876342773, "learning_rate": 1.2624463987208373e-05, "loss": 0.2618, "step": 3480 }, { "epoch": 0.1268260774765608, "grad_norm": 2.681814432144165, "learning_rate": 1.266080383748819e-05, "loss": 0.2405, "step": 3490 }, { "epoch": 0.12718947597935895, "grad_norm": 83.93474578857422, "learning_rate": 1.2697143687768007e-05, "loss": 0.338, "step": 3500 }, { "epoch": 0.12755287448215713, "grad_norm": 1.5564826726913452, "learning_rate": 1.2733483538047825e-05, "loss": 0.2305, "step": 3510 }, { "epoch": 0.1279162729849553, "grad_norm": 2.6026437282562256, "learning_rate": 1.2769823388327639e-05, "loss": 0.2618, "step": 3520 }, { "epoch": 0.12827967148775346, "grad_norm": 8.228372573852539, "learning_rate": 1.2806163238607458e-05, "loss": 0.2586, "step": 3530 }, { "epoch": 0.12864306999055164, "grad_norm": 2.643139362335205, "learning_rate": 1.2842503088887276e-05, "loss": 0.2197, "step": 3540 }, { "epoch": 0.12900646849334982, "grad_norm": 400.0296325683594, "learning_rate": 1.287884293916709e-05, "loss": 0.3586, "step": 3550 }, { "epoch": 0.12936986699614797, "grad_norm": 1.6349281072616577, "learning_rate": 1.2915182789446908e-05, "loss": 0.2364, "step": 3560 }, { "epoch": 0.12973326549894615, "grad_norm": 2.6573753356933594, "learning_rate": 1.2951522639726724e-05, "loss": 0.2195, "step": 3570 }, { "epoch": 0.1300966640017443, "grad_norm": 4.2721686363220215, "learning_rate": 1.2987862490006542e-05, "loss": 0.3092, "step": 3580 }, { "epoch": 0.13046006250454248, "grad_norm": 2.9982502460479736, "learning_rate": 1.302420234028636e-05, "loss": 0.2826, "step": 3590 }, { "epoch": 0.13082346100734066, "grad_norm": 8.903009414672852, "learning_rate": 1.3060542190566175e-05, "loss": 0.3367, "step": 3600 }, { "epoch": 0.13082346100734066, "eval_loss": 0.4490436017513275, "eval_runtime": 179.8743, "eval_samples_per_second": 41.218, "eval_steps_per_second": 5.154, "eval_wer": 0.2664058670829778, "step": 3600 }, { "epoch": 0.1311868595101388, "grad_norm": 2.9746363162994385, "learning_rate": 1.3096882040845993e-05, "loss": 0.2418, "step": 3610 }, { "epoch": 0.131550258012937, "grad_norm": 2.274872303009033, "learning_rate": 1.313322189112581e-05, "loss": 0.3052, "step": 3620 }, { "epoch": 0.13191365651573517, "grad_norm": 7.114847660064697, "learning_rate": 1.3169561741405625e-05, "loss": 0.2821, "step": 3630 }, { "epoch": 0.13227705501853332, "grad_norm": 3.2101128101348877, "learning_rate": 1.3205901591685444e-05, "loss": 0.2223, "step": 3640 }, { "epoch": 0.1326404535213315, "grad_norm": 18.914968490600586, "learning_rate": 1.3242241441965258e-05, "loss": 0.3809, "step": 3650 }, { "epoch": 0.13300385202412965, "grad_norm": 2.399569272994995, "learning_rate": 1.3278581292245076e-05, "loss": 0.2221, "step": 3660 }, { "epoch": 0.13336725052692783, "grad_norm": 5.76792573928833, "learning_rate": 1.3314921142524894e-05, "loss": 0.2487, "step": 3670 }, { "epoch": 0.133730649029726, "grad_norm": 3.6859967708587646, "learning_rate": 1.335126099280471e-05, "loss": 0.2781, "step": 3680 }, { "epoch": 0.13409404753252416, "grad_norm": 2.9653141498565674, "learning_rate": 1.3387600843084527e-05, "loss": 0.2258, "step": 3690 }, { "epoch": 0.13445744603532234, "grad_norm": 19.170753479003906, "learning_rate": 1.3423940693364345e-05, "loss": 0.3902, "step": 3700 }, { "epoch": 0.13482084453812052, "grad_norm": 2.2880115509033203, "learning_rate": 1.3460280543644161e-05, "loss": 0.2745, "step": 3710 }, { "epoch": 0.13518424304091867, "grad_norm": 2.5196125507354736, "learning_rate": 1.3496620393923979e-05, "loss": 0.2293, "step": 3720 }, { "epoch": 0.13554764154371685, "grad_norm": 3.827986001968384, "learning_rate": 1.3532960244203793e-05, "loss": 0.259, "step": 3730 }, { "epoch": 0.135911040046515, "grad_norm": 3.4211530685424805, "learning_rate": 1.356930009448361e-05, "loss": 0.3256, "step": 3740 }, { "epoch": 0.13627443854931318, "grad_norm": 26.879398345947266, "learning_rate": 1.360563994476343e-05, "loss": 0.3208, "step": 3750 }, { "epoch": 0.13663783705211135, "grad_norm": 2.316091775894165, "learning_rate": 1.3641979795043244e-05, "loss": 0.2316, "step": 3760 }, { "epoch": 0.1370012355549095, "grad_norm": 4.098924160003662, "learning_rate": 1.3678319645323062e-05, "loss": 0.2399, "step": 3770 }, { "epoch": 0.13736463405770769, "grad_norm": 6.9372687339782715, "learning_rate": 1.371465949560288e-05, "loss": 0.2858, "step": 3780 }, { "epoch": 0.13772803256050586, "grad_norm": 2.509535789489746, "learning_rate": 1.3750999345882695e-05, "loss": 0.2113, "step": 3790 }, { "epoch": 0.13809143106330402, "grad_norm": 7.7181077003479, "learning_rate": 1.3787339196162513e-05, "loss": 0.3279, "step": 3800 }, { "epoch": 0.1384548295661022, "grad_norm": 2.6843245029449463, "learning_rate": 1.3823679046442329e-05, "loss": 0.2267, "step": 3810 }, { "epoch": 0.13881822806890035, "grad_norm": 3.05159068107605, "learning_rate": 1.3860018896722147e-05, "loss": 0.229, "step": 3820 }, { "epoch": 0.13918162657169852, "grad_norm": 5.029635429382324, "learning_rate": 1.3896358747001964e-05, "loss": 0.2627, "step": 3830 }, { "epoch": 0.1395450250744967, "grad_norm": 2.8287103176116943, "learning_rate": 1.3932698597281779e-05, "loss": 0.2294, "step": 3840 }, { "epoch": 0.13990842357729485, "grad_norm": 24.862224578857422, "learning_rate": 1.3969038447561598e-05, "loss": 0.3198, "step": 3850 }, { "epoch": 0.14027182208009303, "grad_norm": 5.624647617340088, "learning_rate": 1.4005378297841412e-05, "loss": 0.2641, "step": 3860 }, { "epoch": 0.14063522058289118, "grad_norm": 1.6199389696121216, "learning_rate": 1.404171814812123e-05, "loss": 0.2279, "step": 3870 }, { "epoch": 0.14099861908568936, "grad_norm": 2.864058017730713, "learning_rate": 1.4078057998401047e-05, "loss": 0.2448, "step": 3880 }, { "epoch": 0.14136201758848754, "grad_norm": 3.897899627685547, "learning_rate": 1.4114397848680863e-05, "loss": 0.2438, "step": 3890 }, { "epoch": 0.1417254160912857, "grad_norm": 42.4840087890625, "learning_rate": 1.4150737698960681e-05, "loss": 0.3604, "step": 3900 }, { "epoch": 0.14208881459408387, "grad_norm": 1.6532913446426392, "learning_rate": 1.4187077549240499e-05, "loss": 0.2469, "step": 3910 }, { "epoch": 0.14245221309688205, "grad_norm": 2.3755931854248047, "learning_rate": 1.4223417399520315e-05, "loss": 0.2233, "step": 3920 }, { "epoch": 0.1428156115996802, "grad_norm": 5.866461277008057, "learning_rate": 1.4259757249800132e-05, "loss": 0.2952, "step": 3930 }, { "epoch": 0.14317901010247838, "grad_norm": 3.171570301055908, "learning_rate": 1.4296097100079947e-05, "loss": 0.2329, "step": 3940 }, { "epoch": 0.14354240860527653, "grad_norm": 23.302635192871094, "learning_rate": 1.4332436950359764e-05, "loss": 0.3592, "step": 3950 }, { "epoch": 0.1439058071080747, "grad_norm": 2.3609213829040527, "learning_rate": 1.4368776800639584e-05, "loss": 1.1975, "step": 3960 }, { "epoch": 0.1442692056108729, "grad_norm": 2.857872486114502, "learning_rate": 1.4405116650919398e-05, "loss": 0.265, "step": 3970 }, { "epoch": 0.14463260411367104, "grad_norm": 6.918335914611816, "learning_rate": 1.4441456501199216e-05, "loss": 0.4057, "step": 3980 }, { "epoch": 0.14499600261646922, "grad_norm": 3.8019461631774902, "learning_rate": 1.4477796351479033e-05, "loss": 0.299, "step": 3990 }, { "epoch": 0.1453594011192674, "grad_norm": 30.963428497314453, "learning_rate": 1.451413620175885e-05, "loss": 0.335, "step": 4000 }, { "epoch": 0.14572279962206555, "grad_norm": 2.3968963623046875, "learning_rate": 1.4550476052038667e-05, "loss": 0.224, "step": 4010 }, { "epoch": 0.14608619812486373, "grad_norm": 6.7229485511779785, "learning_rate": 1.4586815902318481e-05, "loss": 0.2657, "step": 4020 }, { "epoch": 0.14644959662766188, "grad_norm": 17.447879791259766, "learning_rate": 1.46231557525983e-05, "loss": 0.2199, "step": 4030 }, { "epoch": 0.14681299513046006, "grad_norm": 2.020756721496582, "learning_rate": 1.4659495602878118e-05, "loss": 0.31, "step": 4040 }, { "epoch": 0.14717639363325824, "grad_norm": 38.28268814086914, "learning_rate": 1.4695835453157932e-05, "loss": 0.3861, "step": 4050 }, { "epoch": 0.1475397921360564, "grad_norm": 2.085073232650757, "learning_rate": 1.473217530343775e-05, "loss": 0.2078, "step": 4060 }, { "epoch": 0.14790319063885457, "grad_norm": 3.453597068786621, "learning_rate": 1.476851515371757e-05, "loss": 0.296, "step": 4070 }, { "epoch": 0.14826658914165275, "grad_norm": 2.3039424419403076, "learning_rate": 1.4804855003997384e-05, "loss": 0.2346, "step": 4080 }, { "epoch": 0.1486299876444509, "grad_norm": 3.217890977859497, "learning_rate": 1.4841194854277201e-05, "loss": 0.2243, "step": 4090 }, { "epoch": 0.14899338614724908, "grad_norm": 12.48748779296875, "learning_rate": 1.4877534704557017e-05, "loss": 0.3378, "step": 4100 }, { "epoch": 0.14935678465004723, "grad_norm": 2.781388282775879, "learning_rate": 1.4913874554836835e-05, "loss": 0.2167, "step": 4110 }, { "epoch": 0.1497201831528454, "grad_norm": 2.564457893371582, "learning_rate": 1.4950214405116653e-05, "loss": 0.2187, "step": 4120 }, { "epoch": 0.1500835816556436, "grad_norm": 9.590895652770996, "learning_rate": 1.4986554255396469e-05, "loss": 0.2444, "step": 4130 }, { "epoch": 0.15044698015844174, "grad_norm": 2.8055028915405273, "learning_rate": 1.5022894105676286e-05, "loss": 0.2499, "step": 4140 }, { "epoch": 0.15081037866123992, "grad_norm": 7.157045364379883, "learning_rate": 1.5059233955956104e-05, "loss": 0.361, "step": 4150 }, { "epoch": 0.1511737771640381, "grad_norm": 3.369006633758545, "learning_rate": 1.5095573806235918e-05, "loss": 0.259, "step": 4160 }, { "epoch": 0.15153717566683625, "grad_norm": 5.334355354309082, "learning_rate": 1.5131913656515736e-05, "loss": 0.2797, "step": 4170 }, { "epoch": 0.15190057416963443, "grad_norm": 6.667120456695557, "learning_rate": 1.5168253506795552e-05, "loss": 0.2678, "step": 4180 }, { "epoch": 0.15226397267243258, "grad_norm": 1.7419887781143188, "learning_rate": 1.520459335707537e-05, "loss": 0.2432, "step": 4190 }, { "epoch": 0.15262737117523076, "grad_norm": 7.022573947906494, "learning_rate": 1.5240933207355187e-05, "loss": 0.2955, "step": 4200 }, { "epoch": 0.15262737117523076, "eval_loss": 0.4361402690410614, "eval_runtime": 180.5933, "eval_samples_per_second": 41.054, "eval_steps_per_second": 5.133, "eval_wer": 0.2589540181894095, "step": 4200 }, { "epoch": 0.15299076967802894, "grad_norm": 3.108078718185425, "learning_rate": 1.5277273057635e-05, "loss": 3.6147, "step": 4210 }, { "epoch": 0.1533541681808271, "grad_norm": 2.6063787937164307, "learning_rate": 1.531361290791482e-05, "loss": 0.232, "step": 4220 }, { "epoch": 0.15371756668362527, "grad_norm": 3.581697463989258, "learning_rate": 1.5349952758194637e-05, "loss": 0.2451, "step": 4230 }, { "epoch": 0.15408096518642342, "grad_norm": 2.5910837650299072, "learning_rate": 1.5386292608474453e-05, "loss": 0.2283, "step": 4240 }, { "epoch": 0.1544443636892216, "grad_norm": 70.38739013671875, "learning_rate": 1.5422632458754272e-05, "loss": 0.396, "step": 4250 }, { "epoch": 0.15480776219201978, "grad_norm": 3.5658187866210938, "learning_rate": 1.5458972309034088e-05, "loss": 0.2116, "step": 4260 }, { "epoch": 0.15517116069481793, "grad_norm": 5.393126487731934, "learning_rate": 1.5495312159313904e-05, "loss": 0.2382, "step": 4270 }, { "epoch": 0.1555345591976161, "grad_norm": 10.135586738586426, "learning_rate": 1.5531652009593723e-05, "loss": 0.2485, "step": 4280 }, { "epoch": 0.15589795770041429, "grad_norm": 2.1143031120300293, "learning_rate": 1.5567991859873536e-05, "loss": 0.1936, "step": 4290 }, { "epoch": 0.15626135620321244, "grad_norm": 20.077383041381836, "learning_rate": 1.5604331710153355e-05, "loss": 0.3818, "step": 4300 }, { "epoch": 0.15662475470601062, "grad_norm": 3.793126344680786, "learning_rate": 1.564067156043317e-05, "loss": 0.2245, "step": 4310 }, { "epoch": 0.15698815320880877, "grad_norm": 3.2057955265045166, "learning_rate": 1.5677011410712987e-05, "loss": 0.2551, "step": 4320 }, { "epoch": 0.15735155171160695, "grad_norm": 5.002716064453125, "learning_rate": 1.5713351260992806e-05, "loss": 0.2951, "step": 4330 }, { "epoch": 0.15771495021440513, "grad_norm": 2.2240726947784424, "learning_rate": 1.5749691111272622e-05, "loss": 0.1993, "step": 4340 }, { "epoch": 0.15807834871720328, "grad_norm": 55.30891036987305, "learning_rate": 1.578603096155244e-05, "loss": 0.2803, "step": 4350 }, { "epoch": 0.15844174722000146, "grad_norm": 1.9186596870422363, "learning_rate": 1.5822370811832258e-05, "loss": 0.2234, "step": 4360 }, { "epoch": 0.15880514572279963, "grad_norm": 1.7817661762237549, "learning_rate": 1.5858710662112074e-05, "loss": 0.2038, "step": 4370 }, { "epoch": 0.15916854422559779, "grad_norm": 3.046330690383911, "learning_rate": 1.589505051239189e-05, "loss": 0.2809, "step": 4380 }, { "epoch": 0.15953194272839596, "grad_norm": 5.43302583694458, "learning_rate": 1.5931390362671706e-05, "loss": 0.1896, "step": 4390 }, { "epoch": 0.15989534123119412, "grad_norm": 12.185855865478516, "learning_rate": 1.596773021295152e-05, "loss": 0.2984, "step": 4400 }, { "epoch": 0.1602587397339923, "grad_norm": 1.9507842063903809, "learning_rate": 1.600407006323134e-05, "loss": 0.2064, "step": 4410 }, { "epoch": 0.16062213823679047, "grad_norm": 4.536543846130371, "learning_rate": 1.6040409913511157e-05, "loss": 0.2433, "step": 4420 }, { "epoch": 0.16098553673958862, "grad_norm": 3.101174831390381, "learning_rate": 1.6076749763790973e-05, "loss": 0.2746, "step": 4430 }, { "epoch": 0.1613489352423868, "grad_norm": 2.2098021507263184, "learning_rate": 1.6113089614070792e-05, "loss": 0.17, "step": 4440 }, { "epoch": 0.16171233374518498, "grad_norm": 59.360809326171875, "learning_rate": 1.6149429464350608e-05, "loss": 0.379, "step": 4450 }, { "epoch": 0.16207573224798313, "grad_norm": 6.364736557006836, "learning_rate": 1.6185769314630424e-05, "loss": 0.2224, "step": 4460 }, { "epoch": 0.1624391307507813, "grad_norm": 3.2455356121063232, "learning_rate": 1.622210916491024e-05, "loss": 0.2195, "step": 4470 }, { "epoch": 0.16280252925357946, "grad_norm": 6.399629592895508, "learning_rate": 1.625844901519006e-05, "loss": 0.266, "step": 4480 }, { "epoch": 0.16316592775637764, "grad_norm": 16.19785499572754, "learning_rate": 1.6294788865469875e-05, "loss": 0.1836, "step": 4490 }, { "epoch": 0.16352932625917582, "grad_norm": 7.909778594970703, "learning_rate": 1.633112871574969e-05, "loss": 0.6016, "step": 4500 }, { "epoch": 0.16389272476197397, "grad_norm": 2.8134663105010986, "learning_rate": 1.636746856602951e-05, "loss": 0.2148, "step": 4510 }, { "epoch": 0.16425612326477215, "grad_norm": 2.667999505996704, "learning_rate": 1.6403808416309327e-05, "loss": 0.2294, "step": 4520 }, { "epoch": 0.1646195217675703, "grad_norm": 3.355242967605591, "learning_rate": 1.6440148266589143e-05, "loss": 0.2097, "step": 4530 }, { "epoch": 0.16498292027036848, "grad_norm": 2.6241908073425293, "learning_rate": 1.647648811686896e-05, "loss": 0.2337, "step": 4540 }, { "epoch": 0.16534631877316666, "grad_norm": 16.759428024291992, "learning_rate": 1.6512827967148775e-05, "loss": 0.2944, "step": 4550 }, { "epoch": 0.1657097172759648, "grad_norm": 3.098898410797119, "learning_rate": 1.6549167817428594e-05, "loss": 0.1895, "step": 4560 }, { "epoch": 0.166073115778763, "grad_norm": 4.042644023895264, "learning_rate": 1.658550766770841e-05, "loss": 0.2369, "step": 4570 }, { "epoch": 0.16643651428156117, "grad_norm": 7.174807548522949, "learning_rate": 1.6621847517988226e-05, "loss": 0.2331, "step": 4580 }, { "epoch": 0.16679991278435932, "grad_norm": 2.1805012226104736, "learning_rate": 1.6658187368268045e-05, "loss": 0.2422, "step": 4590 }, { "epoch": 0.1671633112871575, "grad_norm": 18.097871780395508, "learning_rate": 1.6694527218547858e-05, "loss": 0.347, "step": 4600 }, { "epoch": 0.16752670978995565, "grad_norm": 3.48561429977417, "learning_rate": 1.6730867068827677e-05, "loss": 0.2985, "step": 4610 }, { "epoch": 0.16789010829275383, "grad_norm": 1.7519229650497437, "learning_rate": 1.6767206919107496e-05, "loss": 0.2204, "step": 4620 }, { "epoch": 0.168253506795552, "grad_norm": 3.7641661167144775, "learning_rate": 1.680354676938731e-05, "loss": 0.2348, "step": 4630 }, { "epoch": 0.16861690529835016, "grad_norm": 3.0688085556030273, "learning_rate": 1.683988661966713e-05, "loss": 0.2147, "step": 4640 }, { "epoch": 0.16898030380114834, "grad_norm": 25.845094680786133, "learning_rate": 1.6876226469946944e-05, "loss": 0.3671, "step": 4650 }, { "epoch": 0.16934370230394652, "grad_norm": 2.841994524002075, "learning_rate": 1.691256632022676e-05, "loss": 0.2182, "step": 4660 }, { "epoch": 0.16970710080674467, "grad_norm": 1.0501997470855713, "learning_rate": 1.694890617050658e-05, "loss": 0.1791, "step": 4670 }, { "epoch": 0.17007049930954285, "grad_norm": 3.3973441123962402, "learning_rate": 1.6985246020786392e-05, "loss": 0.3338, "step": 4680 }, { "epoch": 0.170433897812341, "grad_norm": 1.8442267179489136, "learning_rate": 1.702158587106621e-05, "loss": 0.2528, "step": 4690 }, { "epoch": 0.17079729631513918, "grad_norm": 42.373409271240234, "learning_rate": 1.705792572134603e-05, "loss": 0.2892, "step": 4700 }, { "epoch": 0.17116069481793736, "grad_norm": 6.344671726226807, "learning_rate": 1.7094265571625844e-05, "loss": 0.2474, "step": 4710 }, { "epoch": 0.1715240933207355, "grad_norm": 1.6177664995193481, "learning_rate": 1.7130605421905663e-05, "loss": 0.2364, "step": 4720 }, { "epoch": 0.1718874918235337, "grad_norm": 4.98591423034668, "learning_rate": 1.7166945272185482e-05, "loss": 0.2046, "step": 4730 }, { "epoch": 0.17225089032633187, "grad_norm": 7.943169116973877, "learning_rate": 1.7203285122465295e-05, "loss": 0.293, "step": 4740 }, { "epoch": 0.17261428882913002, "grad_norm": 7.402034759521484, "learning_rate": 1.7239624972745114e-05, "loss": 0.2722, "step": 4750 }, { "epoch": 0.1729776873319282, "grad_norm": 13.290019035339355, "learning_rate": 1.727596482302493e-05, "loss": 0.347, "step": 4760 }, { "epoch": 0.17334108583472635, "grad_norm": 1.8591586351394653, "learning_rate": 1.7312304673304746e-05, "loss": 0.2291, "step": 4770 }, { "epoch": 0.17370448433752453, "grad_norm": 2.5220861434936523, "learning_rate": 1.7348644523584565e-05, "loss": 0.2436, "step": 4780 }, { "epoch": 0.1740678828403227, "grad_norm": 1.8692690134048462, "learning_rate": 1.738498437386438e-05, "loss": 0.1782, "step": 4790 }, { "epoch": 0.17443128134312086, "grad_norm": 12.558557510375977, "learning_rate": 1.7421324224144197e-05, "loss": 0.3347, "step": 4800 }, { "epoch": 0.17443128134312086, "eval_loss": 0.4148472547531128, "eval_runtime": 180.0999, "eval_samples_per_second": 41.166, "eval_steps_per_second": 5.147, "eval_wer": 0.23564543358687168, "step": 4800 }, { "epoch": 0.17479467984591904, "grad_norm": 6.168694972991943, "learning_rate": 1.7457664074424017e-05, "loss": 0.2183, "step": 4810 }, { "epoch": 0.17515807834871722, "grad_norm": 5.153416633605957, "learning_rate": 1.749400392470383e-05, "loss": 0.2689, "step": 4820 }, { "epoch": 0.17552147685151537, "grad_norm": 2.8500893115997314, "learning_rate": 1.753034377498365e-05, "loss": 0.2848, "step": 4830 }, { "epoch": 0.17588487535431355, "grad_norm": 17.89117431640625, "learning_rate": 1.7566683625263465e-05, "loss": 0.2539, "step": 4840 }, { "epoch": 0.1762482738571117, "grad_norm": 19.455005645751953, "learning_rate": 1.760302347554328e-05, "loss": 0.3166, "step": 4850 }, { "epoch": 0.17661167235990988, "grad_norm": 1.7975777387619019, "learning_rate": 1.76393633258231e-05, "loss": 0.1927, "step": 4860 }, { "epoch": 0.17697507086270806, "grad_norm": 4.6790690422058105, "learning_rate": 1.7675703176102916e-05, "loss": 0.2248, "step": 4870 }, { "epoch": 0.1773384693655062, "grad_norm": 3.2644243240356445, "learning_rate": 1.7712043026382732e-05, "loss": 0.2239, "step": 4880 }, { "epoch": 0.1777018678683044, "grad_norm": 1.9375410079956055, "learning_rate": 1.7748382876662548e-05, "loss": 0.2053, "step": 4890 }, { "epoch": 0.17806526637110254, "grad_norm": 15.435178756713867, "learning_rate": 1.7784722726942367e-05, "loss": 0.2903, "step": 4900 }, { "epoch": 0.17842866487390072, "grad_norm": 2.486330270767212, "learning_rate": 1.7821062577222183e-05, "loss": 0.2598, "step": 4910 }, { "epoch": 0.1787920633766989, "grad_norm": 2.5542314052581787, "learning_rate": 1.7857402427502e-05, "loss": 0.2305, "step": 4920 }, { "epoch": 0.17915546187949705, "grad_norm": 3.6416103839874268, "learning_rate": 1.7893742277781815e-05, "loss": 1.046, "step": 4930 }, { "epoch": 0.17951886038229523, "grad_norm": 1.9395058155059814, "learning_rate": 1.7930082128061634e-05, "loss": 0.2466, "step": 4940 }, { "epoch": 0.1798822588850934, "grad_norm": 7.664824962615967, "learning_rate": 1.796642197834145e-05, "loss": 0.2871, "step": 4950 }, { "epoch": 0.18024565738789156, "grad_norm": 2.0301320552825928, "learning_rate": 1.8002761828621266e-05, "loss": 0.1996, "step": 4960 }, { "epoch": 0.18060905589068973, "grad_norm": 8.371182441711426, "learning_rate": 1.8039101678901082e-05, "loss": 0.1947, "step": 4970 }, { "epoch": 0.18097245439348789, "grad_norm": 2.6746129989624023, "learning_rate": 1.80754415291809e-05, "loss": 0.2679, "step": 4980 }, { "epoch": 0.18133585289628606, "grad_norm": 3.448202133178711, "learning_rate": 1.8111781379460718e-05, "loss": 0.1859, "step": 4990 }, { "epoch": 0.18169925139908424, "grad_norm": 28.57021141052246, "learning_rate": 1.8148121229740534e-05, "loss": 0.3318, "step": 5000 }, { "epoch": 0.1820626499018824, "grad_norm": 4.731750965118408, "learning_rate": 1.8184461080020353e-05, "loss": 0.2354, "step": 5010 }, { "epoch": 0.18242604840468057, "grad_norm": 1.6815394163131714, "learning_rate": 1.822080093030017e-05, "loss": 0.2075, "step": 5020 }, { "epoch": 0.18278944690747875, "grad_norm": 3.868263006210327, "learning_rate": 1.8257140780579985e-05, "loss": 0.2345, "step": 5030 }, { "epoch": 0.1831528454102769, "grad_norm": 1.964240550994873, "learning_rate": 1.82934806308598e-05, "loss": 0.222, "step": 5040 }, { "epoch": 0.18351624391307508, "grad_norm": 11.881858825683594, "learning_rate": 1.8329820481139617e-05, "loss": 0.3251, "step": 5050 }, { "epoch": 0.18387964241587323, "grad_norm": 1.8463056087493896, "learning_rate": 1.8366160331419436e-05, "loss": 0.2255, "step": 5060 }, { "epoch": 0.1842430409186714, "grad_norm": 2.592672348022461, "learning_rate": 1.8402500181699252e-05, "loss": 0.1904, "step": 5070 }, { "epoch": 0.1846064394214696, "grad_norm": 4.0694074630737305, "learning_rate": 1.8438840031979068e-05, "loss": 0.2, "step": 5080 }, { "epoch": 0.18496983792426774, "grad_norm": 2.101837396621704, "learning_rate": 1.8475179882258887e-05, "loss": 0.1927, "step": 5090 }, { "epoch": 0.18533323642706592, "grad_norm": 22.162702560424805, "learning_rate": 1.8511519732538703e-05, "loss": 0.3481, "step": 5100 }, { "epoch": 0.1856966349298641, "grad_norm": 2.7928340435028076, "learning_rate": 1.854785958281852e-05, "loss": 0.2344, "step": 5110 }, { "epoch": 0.18606003343266225, "grad_norm": 1.8618485927581787, "learning_rate": 1.858419943309834e-05, "loss": 0.2139, "step": 5120 }, { "epoch": 0.18642343193546043, "grad_norm": 2.9611120223999023, "learning_rate": 1.862053928337815e-05, "loss": 0.2194, "step": 5130 }, { "epoch": 0.18678683043825858, "grad_norm": 5.181276321411133, "learning_rate": 1.865687913365797e-05, "loss": 0.2596, "step": 5140 }, { "epoch": 0.18715022894105676, "grad_norm": 10.01041030883789, "learning_rate": 1.8693218983937787e-05, "loss": 0.3122, "step": 5150 }, { "epoch": 0.18751362744385494, "grad_norm": 4.952126979827881, "learning_rate": 1.8729558834217603e-05, "loss": 0.2183, "step": 5160 }, { "epoch": 0.1878770259466531, "grad_norm": 2.19279146194458, "learning_rate": 1.8765898684497422e-05, "loss": 0.2439, "step": 5170 }, { "epoch": 0.18824042444945127, "grad_norm": 3.5189321041107178, "learning_rate": 1.8802238534777238e-05, "loss": 0.2343, "step": 5180 }, { "epoch": 0.18860382295224945, "grad_norm": 2.0936787128448486, "learning_rate": 1.8838578385057054e-05, "loss": 0.1831, "step": 5190 }, { "epoch": 0.1889672214550476, "grad_norm": 12.835061073303223, "learning_rate": 1.8874918235336873e-05, "loss": 0.2561, "step": 5200 }, { "epoch": 0.18933061995784578, "grad_norm": 1.6738308668136597, "learning_rate": 1.8911258085616686e-05, "loss": 1.0257, "step": 5210 }, { "epoch": 0.18969401846064393, "grad_norm": 2.7661142349243164, "learning_rate": 1.8947597935896505e-05, "loss": 0.2398, "step": 5220 }, { "epoch": 0.1900574169634421, "grad_norm": 4.173921585083008, "learning_rate": 1.8983937786176324e-05, "loss": 0.2157, "step": 5230 }, { "epoch": 0.1904208154662403, "grad_norm": 3.7037158012390137, "learning_rate": 1.9020277636456137e-05, "loss": 0.2182, "step": 5240 }, { "epoch": 0.19078421396903844, "grad_norm": 16.288227081298828, "learning_rate": 1.9056617486735956e-05, "loss": 0.2829, "step": 5250 }, { "epoch": 0.19114761247183662, "grad_norm": 2.0504090785980225, "learning_rate": 1.9092957337015772e-05, "loss": 0.201, "step": 5260 }, { "epoch": 0.19151101097463477, "grad_norm": 1.2266415357589722, "learning_rate": 1.9129297187295588e-05, "loss": 0.2072, "step": 5270 }, { "epoch": 0.19187440947743295, "grad_norm": 4.910546779632568, "learning_rate": 1.9165637037575408e-05, "loss": 0.1824, "step": 5280 }, { "epoch": 0.19223780798023113, "grad_norm": 3.093318223953247, "learning_rate": 1.9201976887855224e-05, "loss": 0.2471, "step": 5290 }, { "epoch": 0.19260120648302928, "grad_norm": 6.74167013168335, "learning_rate": 1.923831673813504e-05, "loss": 0.2912, "step": 5300 }, { "epoch": 0.19296460498582746, "grad_norm": 2.0540058612823486, "learning_rate": 1.927465658841486e-05, "loss": 0.2599, "step": 5310 }, { "epoch": 0.19332800348862564, "grad_norm": 2.407750129699707, "learning_rate": 1.931099643869467e-05, "loss": 0.2478, "step": 5320 }, { "epoch": 0.1936914019914238, "grad_norm": 5.479567527770996, "learning_rate": 1.934733628897449e-05, "loss": 0.5936, "step": 5330 }, { "epoch": 0.19405480049422197, "grad_norm": 1.912705659866333, "learning_rate": 1.9383676139254307e-05, "loss": 0.215, "step": 5340 }, { "epoch": 0.19441819899702012, "grad_norm": 38.24689865112305, "learning_rate": 1.9420015989534123e-05, "loss": 0.286, "step": 5350 }, { "epoch": 0.1947815974998183, "grad_norm": 3.4196550846099854, "learning_rate": 1.9456355839813942e-05, "loss": 0.4764, "step": 5360 }, { "epoch": 0.19514499600261648, "grad_norm": 1.705702781677246, "learning_rate": 1.9492695690093758e-05, "loss": 0.195, "step": 5370 }, { "epoch": 0.19550839450541463, "grad_norm": 2.7188572883605957, "learning_rate": 1.9529035540373574e-05, "loss": 0.2318, "step": 5380 }, { "epoch": 0.1958717930082128, "grad_norm": 5.217918872833252, "learning_rate": 1.9565375390653393e-05, "loss": 0.2288, "step": 5390 }, { "epoch": 0.196235191511011, "grad_norm": 7.094780921936035, "learning_rate": 1.960171524093321e-05, "loss": 0.3607, "step": 5400 }, { "epoch": 0.196235191511011, "eval_loss": 0.3953820765018463, "eval_runtime": 180.5214, "eval_samples_per_second": 41.07, "eval_steps_per_second": 5.135, "eval_wer": 0.23525514186650207, "step": 5400 }, { "epoch": 0.19659859001380914, "grad_norm": 2.379298448562622, "learning_rate": 1.9638055091213025e-05, "loss": 0.1981, "step": 5410 }, { "epoch": 0.19696198851660732, "grad_norm": 1.2755372524261475, "learning_rate": 1.967439494149284e-05, "loss": 0.3185, "step": 5420 }, { "epoch": 0.19732538701940547, "grad_norm": 2.6385338306427, "learning_rate": 1.9710734791772657e-05, "loss": 0.2231, "step": 5430 }, { "epoch": 0.19768878552220365, "grad_norm": 4.030337810516357, "learning_rate": 1.9747074642052477e-05, "loss": 0.2417, "step": 5440 }, { "epoch": 0.19805218402500183, "grad_norm": 10.988908767700195, "learning_rate": 1.9783414492332293e-05, "loss": 0.3163, "step": 5450 }, { "epoch": 0.19841558252779998, "grad_norm": 2.8273231983184814, "learning_rate": 1.981975434261211e-05, "loss": 0.2062, "step": 5460 }, { "epoch": 0.19877898103059816, "grad_norm": 1.880952000617981, "learning_rate": 1.9856094192891928e-05, "loss": 0.2103, "step": 5470 }, { "epoch": 0.19914237953339634, "grad_norm": 12.882647514343262, "learning_rate": 1.9892434043171744e-05, "loss": 0.2513, "step": 5480 }, { "epoch": 0.1995057780361945, "grad_norm": 2.8202428817749023, "learning_rate": 1.992877389345156e-05, "loss": 0.2002, "step": 5490 }, { "epoch": 0.19986917653899267, "grad_norm": 11.30123519897461, "learning_rate": 1.9965113743731376e-05, "loss": 0.3399, "step": 5500 }, { "epoch": 0.20023257504179082, "grad_norm": 3.016954183578491, "learning_rate": 2.0001453594011195e-05, "loss": 0.2016, "step": 5510 }, { "epoch": 0.200595973544589, "grad_norm": 1.3506131172180176, "learning_rate": 2.003779344429101e-05, "loss": 0.6008, "step": 5520 }, { "epoch": 0.20095937204738717, "grad_norm": 3.711284637451172, "learning_rate": 2.0074133294570827e-05, "loss": 0.2297, "step": 5530 }, { "epoch": 0.20132277055018533, "grad_norm": 2.8310322761535645, "learning_rate": 2.0110473144850643e-05, "loss": 0.19, "step": 5540 }, { "epoch": 0.2016861690529835, "grad_norm": 14.37038516998291, "learning_rate": 2.0146812995130462e-05, "loss": 0.3418, "step": 5550 }, { "epoch": 0.20204956755578168, "grad_norm": 2.037245988845825, "learning_rate": 2.0183152845410278e-05, "loss": 0.2054, "step": 5560 }, { "epoch": 0.20241296605857984, "grad_norm": 2.47495698928833, "learning_rate": 2.0219492695690094e-05, "loss": 0.2102, "step": 5570 }, { "epoch": 0.20277636456137801, "grad_norm": 5.948564529418945, "learning_rate": 2.025583254596991e-05, "loss": 0.2299, "step": 5580 }, { "epoch": 0.20313976306417617, "grad_norm": 2.010765552520752, "learning_rate": 2.029217239624973e-05, "loss": 0.2214, "step": 5590 }, { "epoch": 0.20350316156697434, "grad_norm": 109.07927703857422, "learning_rate": 2.0328512246529546e-05, "loss": 0.327, "step": 5600 }, { "epoch": 0.20386656006977252, "grad_norm": 2.708141565322876, "learning_rate": 2.036485209680936e-05, "loss": 0.2128, "step": 5610 }, { "epoch": 0.20422995857257067, "grad_norm": 4.145051002502441, "learning_rate": 2.040119194708918e-05, "loss": 1.5499, "step": 5620 }, { "epoch": 0.20459335707536885, "grad_norm": 5.204433917999268, "learning_rate": 2.0437531797368993e-05, "loss": 0.2238, "step": 5630 }, { "epoch": 0.204956755578167, "grad_norm": 3.625671625137329, "learning_rate": 2.0473871647648813e-05, "loss": 0.2009, "step": 5640 }, { "epoch": 0.20532015408096518, "grad_norm": 7.134413719177246, "learning_rate": 2.051021149792863e-05, "loss": 0.3236, "step": 5650 }, { "epoch": 0.20568355258376336, "grad_norm": 3.090585708618164, "learning_rate": 2.0546551348208445e-05, "loss": 0.2245, "step": 5660 }, { "epoch": 0.20604695108656151, "grad_norm": 1.5290725231170654, "learning_rate": 2.0582891198488264e-05, "loss": 0.9725, "step": 5670 }, { "epoch": 0.2064103495893597, "grad_norm": 12.433088302612305, "learning_rate": 2.061923104876808e-05, "loss": 0.2755, "step": 5680 }, { "epoch": 0.20677374809215787, "grad_norm": 4.399518013000488, "learning_rate": 2.0655570899047896e-05, "loss": 0.2136, "step": 5690 }, { "epoch": 0.20713714659495602, "grad_norm": 12.662751197814941, "learning_rate": 2.0691910749327715e-05, "loss": 0.3022, "step": 5700 }, { "epoch": 0.2075005450977542, "grad_norm": 1.8056265115737915, "learning_rate": 2.0728250599607528e-05, "loss": 0.3538, "step": 5710 }, { "epoch": 0.20786394360055235, "grad_norm": 1.3133045434951782, "learning_rate": 2.0764590449887347e-05, "loss": 0.1829, "step": 5720 }, { "epoch": 0.20822734210335053, "grad_norm": 6.10534143447876, "learning_rate": 2.0800930300167167e-05, "loss": 0.2819, "step": 5730 }, { "epoch": 0.2085907406061487, "grad_norm": 4.327618598937988, "learning_rate": 2.083727015044698e-05, "loss": 0.2029, "step": 5740 }, { "epoch": 0.20895413910894686, "grad_norm": 6.878536224365234, "learning_rate": 2.08736100007268e-05, "loss": 0.3301, "step": 5750 }, { "epoch": 0.20931753761174504, "grad_norm": 2.8301913738250732, "learning_rate": 2.0909949851006614e-05, "loss": 0.2144, "step": 5760 }, { "epoch": 0.20968093611454322, "grad_norm": 2.248054265975952, "learning_rate": 2.094628970128643e-05, "loss": 0.2046, "step": 5770 }, { "epoch": 0.21004433461734137, "grad_norm": 4.619300842285156, "learning_rate": 2.098262955156625e-05, "loss": 0.2487, "step": 5780 }, { "epoch": 0.21040773312013955, "grad_norm": 2.6446404457092285, "learning_rate": 2.1018969401846066e-05, "loss": 0.2222, "step": 5790 }, { "epoch": 0.2107711316229377, "grad_norm": 7.827177047729492, "learning_rate": 2.1055309252125882e-05, "loss": 0.2684, "step": 5800 }, { "epoch": 0.21113453012573588, "grad_norm": 5.37054967880249, "learning_rate": 2.10916491024057e-05, "loss": 0.216, "step": 5810 }, { "epoch": 0.21149792862853406, "grad_norm": 1.5430680513381958, "learning_rate": 2.1127988952685514e-05, "loss": 0.1723, "step": 5820 }, { "epoch": 0.2118613271313322, "grad_norm": 4.355040550231934, "learning_rate": 2.1164328802965333e-05, "loss": 0.3078, "step": 5830 }, { "epoch": 0.2122247256341304, "grad_norm": 2.70613169670105, "learning_rate": 2.1200668653245152e-05, "loss": 0.1857, "step": 5840 }, { "epoch": 0.21258812413692857, "grad_norm": 17.876861572265625, "learning_rate": 2.1237008503524965e-05, "loss": 0.335, "step": 5850 }, { "epoch": 0.21295152263972672, "grad_norm": 2.048499822616577, "learning_rate": 2.1273348353804784e-05, "loss": 0.2588, "step": 5860 }, { "epoch": 0.2133149211425249, "grad_norm": 2.2033607959747314, "learning_rate": 2.13096882040846e-05, "loss": 0.1973, "step": 5870 }, { "epoch": 0.21367831964532305, "grad_norm": 5.563814640045166, "learning_rate": 2.1346028054364416e-05, "loss": 0.2632, "step": 5880 }, { "epoch": 0.21404171814812123, "grad_norm": 1.4629203081130981, "learning_rate": 2.1382367904644236e-05, "loss": 0.1714, "step": 5890 }, { "epoch": 0.2144051166509194, "grad_norm": 9.641836166381836, "learning_rate": 2.141870775492405e-05, "loss": 0.3329, "step": 5900 }, { "epoch": 0.21476851515371756, "grad_norm": 3.0128610134124756, "learning_rate": 2.1455047605203867e-05, "loss": 0.205, "step": 5910 }, { "epoch": 0.21513191365651574, "grad_norm": 6.38659143447876, "learning_rate": 2.1491387455483687e-05, "loss": 0.2066, "step": 5920 }, { "epoch": 0.2154953121593139, "grad_norm": 3.397566080093384, "learning_rate": 2.15277273057635e-05, "loss": 0.2682, "step": 5930 }, { "epoch": 0.21585871066211207, "grad_norm": 1.8110759258270264, "learning_rate": 2.156406715604332e-05, "loss": 0.2357, "step": 5940 }, { "epoch": 0.21622210916491025, "grad_norm": 12.391556739807129, "learning_rate": 2.1600407006323135e-05, "loss": 0.3043, "step": 5950 }, { "epoch": 0.2165855076677084, "grad_norm": 1.8203914165496826, "learning_rate": 2.163674685660295e-05, "loss": 0.2979, "step": 5960 }, { "epoch": 0.21694890617050658, "grad_norm": 3.362252950668335, "learning_rate": 2.167308670688277e-05, "loss": 0.1667, "step": 5970 }, { "epoch": 0.21731230467330476, "grad_norm": 4.1468000411987305, "learning_rate": 2.1709426557162586e-05, "loss": 0.3419, "step": 5980 }, { "epoch": 0.2176757031761029, "grad_norm": 2.479288339614868, "learning_rate": 2.1745766407442402e-05, "loss": 0.1938, "step": 5990 }, { "epoch": 0.2180391016789011, "grad_norm": 26.185468673706055, "learning_rate": 2.1782106257722218e-05, "loss": 0.2818, "step": 6000 }, { "epoch": 0.2180391016789011, "eval_loss": 0.4106527864933014, "eval_runtime": 179.9044, "eval_samples_per_second": 41.211, "eval_steps_per_second": 5.153, "eval_wer": 0.2305625646704304, "step": 6000 }, { "epoch": 0.21840250018169924, "grad_norm": 2.2452592849731445, "learning_rate": 2.1818446108002037e-05, "loss": 0.2208, "step": 6010 }, { "epoch": 0.21876589868449742, "grad_norm": 2.273920774459839, "learning_rate": 2.1854785958281853e-05, "loss": 0.2268, "step": 6020 }, { "epoch": 0.2191292971872956, "grad_norm": 1.9621226787567139, "learning_rate": 2.189112580856167e-05, "loss": 0.1965, "step": 6030 }, { "epoch": 0.21949269569009375, "grad_norm": 2.866110324859619, "learning_rate": 2.1927465658841485e-05, "loss": 0.223, "step": 6040 }, { "epoch": 0.21985609419289193, "grad_norm": 15.169930458068848, "learning_rate": 2.1963805509121305e-05, "loss": 0.254, "step": 6050 }, { "epoch": 0.2202194926956901, "grad_norm": 2.174626350402832, "learning_rate": 2.200014535940112e-05, "loss": 0.2056, "step": 6060 }, { "epoch": 0.22058289119848826, "grad_norm": 1.9627354145050049, "learning_rate": 2.2036485209680936e-05, "loss": 0.2211, "step": 6070 }, { "epoch": 0.22094628970128644, "grad_norm": 5.444493770599365, "learning_rate": 2.2072825059960752e-05, "loss": 0.2819, "step": 6080 }, { "epoch": 0.2213096882040846, "grad_norm": 2.5131990909576416, "learning_rate": 2.2109164910240572e-05, "loss": 0.2262, "step": 6090 }, { "epoch": 0.22167308670688277, "grad_norm": 15.716779708862305, "learning_rate": 2.2145504760520388e-05, "loss": 0.2833, "step": 6100 }, { "epoch": 0.22203648520968094, "grad_norm": 1.7514111995697021, "learning_rate": 2.2181844610800204e-05, "loss": 0.2238, "step": 6110 }, { "epoch": 0.2223998837124791, "grad_norm": 1.8236886262893677, "learning_rate": 2.2218184461080023e-05, "loss": 0.1872, "step": 6120 }, { "epoch": 0.22276328221527728, "grad_norm": 4.081092834472656, "learning_rate": 2.225452431135984e-05, "loss": 0.4926, "step": 6130 }, { "epoch": 0.22312668071807545, "grad_norm": 3.3254685401916504, "learning_rate": 2.2290864161639655e-05, "loss": 0.2179, "step": 6140 }, { "epoch": 0.2234900792208736, "grad_norm": 9.953665733337402, "learning_rate": 2.232720401191947e-05, "loss": 0.3221, "step": 6150 }, { "epoch": 0.22385347772367178, "grad_norm": 3.531538724899292, "learning_rate": 2.2363543862199287e-05, "loss": 0.3639, "step": 6160 }, { "epoch": 0.22421687622646994, "grad_norm": 1.6166915893554688, "learning_rate": 2.2399883712479106e-05, "loss": 0.1906, "step": 6170 }, { "epoch": 0.22458027472926811, "grad_norm": 3.0561792850494385, "learning_rate": 2.2436223562758922e-05, "loss": 0.222, "step": 6180 }, { "epoch": 0.2249436732320663, "grad_norm": 7.607283115386963, "learning_rate": 2.2472563413038738e-05, "loss": 0.1769, "step": 6190 }, { "epoch": 0.22530707173486444, "grad_norm": 38.86745834350586, "learning_rate": 2.2508903263318557e-05, "loss": 0.3523, "step": 6200 }, { "epoch": 0.22567047023766262, "grad_norm": 1.2490432262420654, "learning_rate": 2.2545243113598373e-05, "loss": 0.2241, "step": 6210 }, { "epoch": 0.2260338687404608, "grad_norm": 3.8632936477661133, "learning_rate": 2.258158296387819e-05, "loss": 0.2761, "step": 6220 }, { "epoch": 0.22639726724325895, "grad_norm": 6.057976722717285, "learning_rate": 2.261792281415801e-05, "loss": 0.2534, "step": 6230 }, { "epoch": 0.22676066574605713, "grad_norm": 5.2983551025390625, "learning_rate": 2.265426266443782e-05, "loss": 0.1972, "step": 6240 }, { "epoch": 0.22712406424885528, "grad_norm": 7.395950794219971, "learning_rate": 2.269060251471764e-05, "loss": 0.3446, "step": 6250 }, { "epoch": 0.22748746275165346, "grad_norm": 2.7409260272979736, "learning_rate": 2.2726942364997457e-05, "loss": 0.1894, "step": 6260 }, { "epoch": 0.22785086125445164, "grad_norm": 1.7545270919799805, "learning_rate": 2.2763282215277273e-05, "loss": 0.2376, "step": 6270 }, { "epoch": 0.2282142597572498, "grad_norm": 112.10614013671875, "learning_rate": 2.2799622065557092e-05, "loss": 2.0322, "step": 6280 }, { "epoch": 0.22857765826004797, "grad_norm": 3.6547396183013916, "learning_rate": 2.2835961915836908e-05, "loss": 0.2942, "step": 6290 }, { "epoch": 0.22894105676284612, "grad_norm": 50.726261138916016, "learning_rate": 2.2872301766116724e-05, "loss": 0.3279, "step": 6300 }, { "epoch": 0.2293044552656443, "grad_norm": 1.2374241352081299, "learning_rate": 2.2908641616396543e-05, "loss": 0.1912, "step": 6310 }, { "epoch": 0.22966785376844248, "grad_norm": 1.6278152465820312, "learning_rate": 2.2944981466676356e-05, "loss": 0.1913, "step": 6320 }, { "epoch": 0.23003125227124063, "grad_norm": 7.58544397354126, "learning_rate": 2.2981321316956175e-05, "loss": 0.2393, "step": 6330 }, { "epoch": 0.2303946507740388, "grad_norm": 1.7094483375549316, "learning_rate": 2.3017661167235995e-05, "loss": 0.2333, "step": 6340 }, { "epoch": 0.230758049276837, "grad_norm": 24.214885711669922, "learning_rate": 2.3054001017515807e-05, "loss": 0.3019, "step": 6350 }, { "epoch": 0.23112144777963514, "grad_norm": 1.962106704711914, "learning_rate": 2.3090340867795626e-05, "loss": 0.8948, "step": 6360 }, { "epoch": 0.23148484628243332, "grad_norm": 1.3703123331069946, "learning_rate": 2.3126680718075442e-05, "loss": 0.1936, "step": 6370 }, { "epoch": 0.23184824478523147, "grad_norm": 7.507201194763184, "learning_rate": 2.316302056835526e-05, "loss": 0.2185, "step": 6380 }, { "epoch": 0.23221164328802965, "grad_norm": 2.6310977935791016, "learning_rate": 2.3199360418635078e-05, "loss": 0.1961, "step": 6390 }, { "epoch": 0.23257504179082783, "grad_norm": 4.186092376708984, "learning_rate": 2.3235700268914894e-05, "loss": 0.2734, "step": 6400 }, { "epoch": 0.23293844029362598, "grad_norm": 1.817269206047058, "learning_rate": 2.327204011919471e-05, "loss": 0.1966, "step": 6410 }, { "epoch": 0.23330183879642416, "grad_norm": 1.9503989219665527, "learning_rate": 2.330837996947453e-05, "loss": 2.7438, "step": 6420 }, { "epoch": 0.23366523729922234, "grad_norm": 3.1107656955718994, "learning_rate": 2.334471981975434e-05, "loss": 0.2534, "step": 6430 }, { "epoch": 0.2340286358020205, "grad_norm": 5.268273830413818, "learning_rate": 2.338105967003416e-05, "loss": 0.1963, "step": 6440 }, { "epoch": 0.23439203430481867, "grad_norm": 9.586852073669434, "learning_rate": 2.3417399520313977e-05, "loss": 0.2342, "step": 6450 }, { "epoch": 0.23475543280761682, "grad_norm": 3.0218632221221924, "learning_rate": 2.3453739370593793e-05, "loss": 0.231, "step": 6460 }, { "epoch": 0.235118831310415, "grad_norm": 1.9708057641983032, "learning_rate": 2.3490079220873612e-05, "loss": 0.2156, "step": 6470 }, { "epoch": 0.23548222981321318, "grad_norm": 3.6212944984436035, "learning_rate": 2.3526419071153428e-05, "loss": 0.2172, "step": 6480 }, { "epoch": 0.23584562831601133, "grad_norm": 2.5205702781677246, "learning_rate": 2.3562758921433244e-05, "loss": 0.4643, "step": 6490 }, { "epoch": 0.2362090268188095, "grad_norm": 4.1570305824279785, "learning_rate": 2.3599098771713063e-05, "loss": 0.2722, "step": 6500 }, { "epoch": 0.2365724253216077, "grad_norm": 1.8376798629760742, "learning_rate": 2.363543862199288e-05, "loss": 0.2027, "step": 6510 }, { "epoch": 0.23693582382440584, "grad_norm": 2.0464930534362793, "learning_rate": 2.3671778472272695e-05, "loss": 0.183, "step": 6520 }, { "epoch": 0.23729922232720402, "grad_norm": 4.8776469230651855, "learning_rate": 2.370811832255251e-05, "loss": 0.2169, "step": 6530 }, { "epoch": 0.23766262083000217, "grad_norm": 1.5764952898025513, "learning_rate": 2.3744458172832327e-05, "loss": 0.1917, "step": 6540 }, { "epoch": 0.23802601933280035, "grad_norm": 16.132232666015625, "learning_rate": 2.3780798023112147e-05, "loss": 0.2732, "step": 6550 }, { "epoch": 0.23838941783559853, "grad_norm": 8.105748176574707, "learning_rate": 2.3817137873391963e-05, "loss": 0.2055, "step": 6560 }, { "epoch": 0.23875281633839668, "grad_norm": 2.087362051010132, "learning_rate": 2.385347772367178e-05, "loss": 0.186, "step": 6570 }, { "epoch": 0.23911621484119486, "grad_norm": 2.8280205726623535, "learning_rate": 2.3889817573951598e-05, "loss": 0.201, "step": 6580 }, { "epoch": 0.23947961334399304, "grad_norm": 1.2525794506072998, "learning_rate": 2.3926157424231414e-05, "loss": 0.1893, "step": 6590 }, { "epoch": 0.2398430118467912, "grad_norm": 23.419832229614258, "learning_rate": 2.396249727451123e-05, "loss": 0.2554, "step": 6600 }, { "epoch": 0.2398430118467912, "eval_loss": 0.4065987765789032, "eval_runtime": 179.638, "eval_samples_per_second": 41.272, "eval_steps_per_second": 5.16, "eval_wer": 0.24529380797647357, "step": 6600 }, { "epoch": 0.24020641034958937, "grad_norm": 1.3757339715957642, "learning_rate": 2.3998837124791046e-05, "loss": 0.1962, "step": 6610 }, { "epoch": 0.24056980885238752, "grad_norm": 4.00860071182251, "learning_rate": 2.4035176975070865e-05, "loss": 0.1848, "step": 6620 }, { "epoch": 0.2409332073551857, "grad_norm": 5.544015407562256, "learning_rate": 2.407151682535068e-05, "loss": 0.245, "step": 6630 }, { "epoch": 0.24129660585798388, "grad_norm": 1.0618844032287598, "learning_rate": 2.4107856675630497e-05, "loss": 0.191, "step": 6640 }, { "epoch": 0.24166000436078203, "grad_norm": 125.15505981445312, "learning_rate": 2.4144196525910313e-05, "loss": 0.3055, "step": 6650 }, { "epoch": 0.2420234028635802, "grad_norm": 5.015167713165283, "learning_rate": 2.418053637619013e-05, "loss": 0.2701, "step": 6660 }, { "epoch": 0.24238680136637836, "grad_norm": 3.944514274597168, "learning_rate": 2.421687622646995e-05, "loss": 0.2107, "step": 6670 }, { "epoch": 0.24275019986917654, "grad_norm": 3.1539418697357178, "learning_rate": 2.4253216076749764e-05, "loss": 0.232, "step": 6680 }, { "epoch": 0.24311359837197472, "grad_norm": 2.980459213256836, "learning_rate": 2.428955592702958e-05, "loss": 0.2391, "step": 6690 }, { "epoch": 0.24347699687477287, "grad_norm": 35.02157211303711, "learning_rate": 2.43258957773094e-05, "loss": 0.3172, "step": 6700 }, { "epoch": 0.24384039537757105, "grad_norm": 1.606570839881897, "learning_rate": 2.4362235627589216e-05, "loss": 1.5707, "step": 6710 }, { "epoch": 0.24420379388036922, "grad_norm": 3.940394401550293, "learning_rate": 2.439857547786903e-05, "loss": 0.1969, "step": 6720 }, { "epoch": 0.24456719238316738, "grad_norm": 3.8990156650543213, "learning_rate": 2.443491532814885e-05, "loss": 0.2475, "step": 6730 }, { "epoch": 0.24493059088596555, "grad_norm": 2.523500442504883, "learning_rate": 2.4471255178428664e-05, "loss": 0.194, "step": 6740 }, { "epoch": 0.2452939893887637, "grad_norm": 4.920846939086914, "learning_rate": 2.4507595028708483e-05, "loss": 0.2417, "step": 6750 }, { "epoch": 0.24565738789156188, "grad_norm": 2.2269723415374756, "learning_rate": 2.4543934878988302e-05, "loss": 0.2148, "step": 6760 }, { "epoch": 0.24602078639436006, "grad_norm": 1.669722557067871, "learning_rate": 2.4580274729268115e-05, "loss": 0.1979, "step": 6770 }, { "epoch": 0.24638418489715821, "grad_norm": 4.581501007080078, "learning_rate": 2.4616614579547934e-05, "loss": 0.2412, "step": 6780 }, { "epoch": 0.2467475833999564, "grad_norm": 2.6605944633483887, "learning_rate": 2.465295442982775e-05, "loss": 0.1992, "step": 6790 }, { "epoch": 0.24711098190275457, "grad_norm": 7.089646816253662, "learning_rate": 2.4689294280107566e-05, "loss": 0.2789, "step": 6800 }, { "epoch": 0.24747438040555272, "grad_norm": 1.9901385307312012, "learning_rate": 2.4725634130387385e-05, "loss": 1.907, "step": 6810 }, { "epoch": 0.2478377789083509, "grad_norm": 2.5120224952697754, "learning_rate": 2.4761973980667198e-05, "loss": 0.1908, "step": 6820 }, { "epoch": 0.24820117741114905, "grad_norm": 1.553806185722351, "learning_rate": 2.4794679845919035e-05, "loss": 1.6707, "step": 6830 }, { "epoch": 0.24856457591394723, "grad_norm": 2.130095958709717, "learning_rate": 2.4831019696198855e-05, "loss": 0.222, "step": 6840 }, { "epoch": 0.2489279744167454, "grad_norm": 15.832701683044434, "learning_rate": 2.486735954647867e-05, "loss": 0.4634, "step": 6850 }, { "epoch": 0.24929137291954356, "grad_norm": 1.87086820602417, "learning_rate": 2.4903699396758487e-05, "loss": 0.1887, "step": 6860 }, { "epoch": 0.24965477142234174, "grad_norm": 2.32084584236145, "learning_rate": 2.4940039247038303e-05, "loss": 0.1881, "step": 6870 }, { "epoch": 0.2500181699251399, "grad_norm": 3.3228461742401123, "learning_rate": 2.497637909731812e-05, "loss": 0.264, "step": 6880 }, { "epoch": 0.2503815684279381, "grad_norm": 1.8676607608795166, "learning_rate": 2.5012718947597935e-05, "loss": 0.2102, "step": 6890 }, { "epoch": 0.2507449669307362, "grad_norm": 17.540319442749023, "learning_rate": 2.5049058797877757e-05, "loss": 0.2567, "step": 6900 }, { "epoch": 0.25110836543353443, "grad_norm": 1.6276856660842896, "learning_rate": 2.508539864815757e-05, "loss": 0.1917, "step": 6910 }, { "epoch": 0.2514717639363326, "grad_norm": 2.347691059112549, "learning_rate": 2.5121738498437386e-05, "loss": 0.1998, "step": 6920 }, { "epoch": 0.25183516243913073, "grad_norm": 3.5337650775909424, "learning_rate": 2.5158078348717205e-05, "loss": 0.2418, "step": 6930 }, { "epoch": 0.25219856094192894, "grad_norm": 3.7415404319763184, "learning_rate": 2.519441819899702e-05, "loss": 0.2074, "step": 6940 }, { "epoch": 0.2525619594447271, "grad_norm": 16.603042602539062, "learning_rate": 2.5230758049276837e-05, "loss": 0.3104, "step": 6950 }, { "epoch": 0.25292535794752524, "grad_norm": 1.4864579439163208, "learning_rate": 2.5267097899556656e-05, "loss": 0.1771, "step": 6960 }, { "epoch": 0.25328875645032345, "grad_norm": 1.7935876846313477, "learning_rate": 2.5303437749836472e-05, "loss": 0.1984, "step": 6970 }, { "epoch": 0.2536521549531216, "grad_norm": 3.187351942062378, "learning_rate": 2.533977760011629e-05, "loss": 0.1828, "step": 6980 }, { "epoch": 0.25401555345591975, "grad_norm": 1.7930549383163452, "learning_rate": 2.5376117450396104e-05, "loss": 0.2132, "step": 6990 }, { "epoch": 0.2543789519587179, "grad_norm": 4.86196231842041, "learning_rate": 2.5412457300675924e-05, "loss": 0.2426, "step": 7000 }, { "epoch": 0.2547423504615161, "grad_norm": 2.784335136413574, "learning_rate": 2.544879715095574e-05, "loss": 1.6557, "step": 7010 }, { "epoch": 0.25510574896431426, "grad_norm": 1.460509181022644, "learning_rate": 2.5485137001235552e-05, "loss": 0.1812, "step": 7020 }, { "epoch": 0.2554691474671124, "grad_norm": 2.5204946994781494, "learning_rate": 2.5521476851515375e-05, "loss": 0.3731, "step": 7030 }, { "epoch": 0.2558325459699106, "grad_norm": 1.6122281551361084, "learning_rate": 2.555781670179519e-05, "loss": 0.2256, "step": 7040 }, { "epoch": 0.25619594447270877, "grad_norm": 8.13974666595459, "learning_rate": 2.5594156552075004e-05, "loss": 0.2756, "step": 7050 }, { "epoch": 0.2565593429755069, "grad_norm": 2.1560494899749756, "learning_rate": 2.5630496402354826e-05, "loss": 0.1869, "step": 7060 }, { "epoch": 0.25692274147830513, "grad_norm": 2.938570737838745, "learning_rate": 2.5666836252634642e-05, "loss": 0.187, "step": 7070 }, { "epoch": 0.2572861399811033, "grad_norm": 1.6697754859924316, "learning_rate": 2.5703176102914455e-05, "loss": 0.1841, "step": 7080 }, { "epoch": 0.25764953848390143, "grad_norm": 2.500377655029297, "learning_rate": 2.5739515953194278e-05, "loss": 0.4097, "step": 7090 }, { "epoch": 0.25801293698669964, "grad_norm": 6.614553928375244, "learning_rate": 2.577585580347409e-05, "loss": 0.2779, "step": 7100 }, { "epoch": 0.2583763354894978, "grad_norm": 2.1538803577423096, "learning_rate": 2.5812195653753906e-05, "loss": 0.2035, "step": 7110 }, { "epoch": 0.25873973399229594, "grad_norm": 2.64719820022583, "learning_rate": 2.584853550403373e-05, "loss": 0.1815, "step": 7120 }, { "epoch": 0.25910313249509415, "grad_norm": 4.064308166503906, "learning_rate": 2.588487535431354e-05, "loss": 0.2115, "step": 7130 }, { "epoch": 0.2594665309978923, "grad_norm": 4.535513877868652, "learning_rate": 2.5921215204593357e-05, "loss": 0.1733, "step": 7140 }, { "epoch": 0.25982992950069045, "grad_norm": 14.761083602905273, "learning_rate": 2.5957555054873173e-05, "loss": 0.3061, "step": 7150 }, { "epoch": 0.2601933280034886, "grad_norm": 2.902010202407837, "learning_rate": 2.5993894905152993e-05, "loss": 0.2539, "step": 7160 }, { "epoch": 0.2605567265062868, "grad_norm": 2.6499462127685547, "learning_rate": 2.603023475543281e-05, "loss": 0.209, "step": 7170 }, { "epoch": 0.26092012500908496, "grad_norm": 2.0298879146575928, "learning_rate": 2.6066574605712625e-05, "loss": 0.1966, "step": 7180 }, { "epoch": 0.2612835235118831, "grad_norm": 5.285839080810547, "learning_rate": 2.6102914455992444e-05, "loss": 0.2416, "step": 7190 }, { "epoch": 0.2616469220146813, "grad_norm": 14.89932918548584, "learning_rate": 2.613925430627226e-05, "loss": 0.2649, "step": 7200 }, { "epoch": 0.2616469220146813, "eval_loss": 0.43822312355041504, "eval_runtime": 180.398, "eval_samples_per_second": 41.098, "eval_steps_per_second": 5.139, "eval_wer": 0.23023580881151634, "step": 7200 }, { "epoch": 0.26201032051747947, "grad_norm": 2.9772818088531494, "learning_rate": 2.6175594156552076e-05, "loss": 0.2158, "step": 7210 }, { "epoch": 0.2623737190202776, "grad_norm": 1.4703949689865112, "learning_rate": 2.6211934006831895e-05, "loss": 0.1925, "step": 7220 }, { "epoch": 0.2627371175230758, "grad_norm": 2.6034176349639893, "learning_rate": 2.624827385711171e-05, "loss": 0.2065, "step": 7230 }, { "epoch": 0.263100516025874, "grad_norm": 2.8392562866210938, "learning_rate": 2.6284613707391527e-05, "loss": 0.2097, "step": 7240 }, { "epoch": 0.2634639145286721, "grad_norm": 8.892645835876465, "learning_rate": 2.6320953557671347e-05, "loss": 0.2835, "step": 7250 }, { "epoch": 0.26382731303147033, "grad_norm": 1.616268277168274, "learning_rate": 2.6357293407951162e-05, "loss": 0.1875, "step": 7260 }, { "epoch": 0.2641907115342685, "grad_norm": 2.1791138648986816, "learning_rate": 2.6393633258230975e-05, "loss": 0.1722, "step": 7270 }, { "epoch": 0.26455411003706664, "grad_norm": 2.8691608905792236, "learning_rate": 2.642997310851079e-05, "loss": 0.2377, "step": 7280 }, { "epoch": 0.26491750853986484, "grad_norm": 1.5673551559448242, "learning_rate": 2.6466312958790614e-05, "loss": 0.4404, "step": 7290 }, { "epoch": 0.265280907042663, "grad_norm": 7.296738147735596, "learning_rate": 2.6502652809070426e-05, "loss": 0.3198, "step": 7300 }, { "epoch": 0.26564430554546115, "grad_norm": 6.389322757720947, "learning_rate": 2.6538992659350242e-05, "loss": 0.2041, "step": 7310 }, { "epoch": 0.2660077040482593, "grad_norm": 11.64201831817627, "learning_rate": 2.657533250963006e-05, "loss": 0.2014, "step": 7320 }, { "epoch": 0.2663711025510575, "grad_norm": 4.454049587249756, "learning_rate": 2.6611672359909878e-05, "loss": 0.2295, "step": 7330 }, { "epoch": 0.26673450105385565, "grad_norm": 2.091968297958374, "learning_rate": 2.6648012210189694e-05, "loss": 0.1784, "step": 7340 }, { "epoch": 0.2670978995566538, "grad_norm": 6.904966354370117, "learning_rate": 2.6684352060469513e-05, "loss": 0.3303, "step": 7350 }, { "epoch": 0.267461298059452, "grad_norm": 1.6893994808197021, "learning_rate": 2.672069191074933e-05, "loss": 0.2534, "step": 7360 }, { "epoch": 0.26782469656225016, "grad_norm": 1.3456122875213623, "learning_rate": 2.6757031761029145e-05, "loss": 0.1829, "step": 7370 }, { "epoch": 0.2681880950650483, "grad_norm": 7.959611892700195, "learning_rate": 2.6793371611308964e-05, "loss": 0.2425, "step": 7380 }, { "epoch": 0.2685514935678465, "grad_norm": 1.5833840370178223, "learning_rate": 2.682971146158878e-05, "loss": 0.1988, "step": 7390 }, { "epoch": 0.2689148920706447, "grad_norm": 19.886600494384766, "learning_rate": 2.6866051311868596e-05, "loss": 0.3563, "step": 7400 }, { "epoch": 0.2692782905734428, "grad_norm": 2.55553936958313, "learning_rate": 2.6902391162148415e-05, "loss": 0.1857, "step": 7410 }, { "epoch": 0.26964168907624103, "grad_norm": 2.125661849975586, "learning_rate": 2.693873101242823e-05, "loss": 0.7398, "step": 7420 }, { "epoch": 0.2700050875790392, "grad_norm": 2.577770233154297, "learning_rate": 2.6975070862708047e-05, "loss": 0.5703, "step": 7430 }, { "epoch": 0.27036848608183733, "grad_norm": 2.3848683834075928, "learning_rate": 2.701141071298786e-05, "loss": 0.173, "step": 7440 }, { "epoch": 0.2707318845846355, "grad_norm": 22.96078109741211, "learning_rate": 2.7047750563267683e-05, "loss": 0.293, "step": 7450 }, { "epoch": 0.2710952830874337, "grad_norm": 3.206329822540283, "learning_rate": 2.70840904135475e-05, "loss": 0.4585, "step": 7460 }, { "epoch": 0.27145868159023184, "grad_norm": 2.251904010772705, "learning_rate": 2.712043026382731e-05, "loss": 0.2196, "step": 7470 }, { "epoch": 0.27182208009303, "grad_norm": 3.7445387840270996, "learning_rate": 2.7156770114107134e-05, "loss": 0.2195, "step": 7480 }, { "epoch": 0.2721854785958282, "grad_norm": 1.5370314121246338, "learning_rate": 2.7193109964386947e-05, "loss": 0.2007, "step": 7490 }, { "epoch": 0.27254887709862635, "grad_norm": 18.44324493408203, "learning_rate": 2.7229449814666763e-05, "loss": 0.3091, "step": 7500 }, { "epoch": 0.2729122756014245, "grad_norm": 1.5792795419692993, "learning_rate": 2.7265789664946585e-05, "loss": 0.1601, "step": 7510 }, { "epoch": 0.2732756741042227, "grad_norm": 9.128384590148926, "learning_rate": 2.7302129515226398e-05, "loss": 0.178, "step": 7520 }, { "epoch": 0.27363907260702086, "grad_norm": 2.2285592555999756, "learning_rate": 2.7338469365506214e-05, "loss": 2.4074, "step": 7530 }, { "epoch": 0.274002471109819, "grad_norm": 2.2741541862487793, "learning_rate": 2.7374809215786033e-05, "loss": 0.246, "step": 7540 }, { "epoch": 0.2743658696126172, "grad_norm": 17.185470581054688, "learning_rate": 2.741114906606585e-05, "loss": 0.2577, "step": 7550 }, { "epoch": 0.27472926811541537, "grad_norm": 1.1907752752304077, "learning_rate": 2.7447488916345665e-05, "loss": 0.2073, "step": 7560 }, { "epoch": 0.2750926666182135, "grad_norm": 3.535682201385498, "learning_rate": 2.748382876662548e-05, "loss": 0.2012, "step": 7570 }, { "epoch": 0.27545606512101173, "grad_norm": 3.585460662841797, "learning_rate": 2.75201686169053e-05, "loss": 0.2147, "step": 7580 }, { "epoch": 0.2758194636238099, "grad_norm": 1.9034504890441895, "learning_rate": 2.7556508467185116e-05, "loss": 0.1626, "step": 7590 }, { "epoch": 0.27618286212660803, "grad_norm": 39.66155242919922, "learning_rate": 2.7592848317464932e-05, "loss": 0.2617, "step": 7600 }, { "epoch": 0.2765462606294062, "grad_norm": 1.5698285102844238, "learning_rate": 2.762918816774475e-05, "loss": 0.3136, "step": 7610 }, { "epoch": 0.2769096591322044, "grad_norm": 2.4866106510162354, "learning_rate": 2.7665528018024568e-05, "loss": 0.1971, "step": 7620 }, { "epoch": 0.27727305763500254, "grad_norm": 9.244050025939941, "learning_rate": 2.7701867868304384e-05, "loss": 0.2025, "step": 7630 }, { "epoch": 0.2776364561378007, "grad_norm": 2.1344380378723145, "learning_rate": 2.7738207718584203e-05, "loss": 0.2055, "step": 7640 }, { "epoch": 0.2779998546405989, "grad_norm": 13.503227233886719, "learning_rate": 2.777454756886402e-05, "loss": 0.2671, "step": 7650 }, { "epoch": 0.27836325314339705, "grad_norm": 2.238834857940674, "learning_rate": 2.781088741914383e-05, "loss": 0.1714, "step": 7660 }, { "epoch": 0.2787266516461952, "grad_norm": 0.897280216217041, "learning_rate": 2.7847227269423654e-05, "loss": 0.1615, "step": 7670 }, { "epoch": 0.2790900501489934, "grad_norm": 5.808285713195801, "learning_rate": 2.788356711970347e-05, "loss": 0.2052, "step": 7680 }, { "epoch": 0.27945344865179156, "grad_norm": 1.8924663066864014, "learning_rate": 2.7919906969983283e-05, "loss": 0.1769, "step": 7690 }, { "epoch": 0.2798168471545897, "grad_norm": 11.939653396606445, "learning_rate": 2.7956246820263105e-05, "loss": 0.2859, "step": 7700 }, { "epoch": 0.2801802456573879, "grad_norm": 2.5077621936798096, "learning_rate": 2.7992586670542918e-05, "loss": 0.1767, "step": 7710 }, { "epoch": 0.28054364416018607, "grad_norm": 2.0336718559265137, "learning_rate": 2.8028926520822734e-05, "loss": 0.6757, "step": 7720 }, { "epoch": 0.2809070426629842, "grad_norm": 3.9547739028930664, "learning_rate": 2.806526637110255e-05, "loss": 0.2322, "step": 7730 }, { "epoch": 0.28127044116578237, "grad_norm": 1.8082466125488281, "learning_rate": 2.810160622138237e-05, "loss": 0.1758, "step": 7740 }, { "epoch": 0.2816338396685806, "grad_norm": 16.173986434936523, "learning_rate": 2.8137946071662185e-05, "loss": 0.2642, "step": 7750 }, { "epoch": 0.28199723817137873, "grad_norm": 3.341475486755371, "learning_rate": 2.8174285921942e-05, "loss": 3.4407, "step": 7760 }, { "epoch": 0.2823606366741769, "grad_norm": 1.7220288515090942, "learning_rate": 2.821062577222182e-05, "loss": 0.1965, "step": 7770 }, { "epoch": 0.2827240351769751, "grad_norm": 3.8534610271453857, "learning_rate": 2.8246965622501637e-05, "loss": 0.1966, "step": 7780 }, { "epoch": 0.28308743367977324, "grad_norm": 1.962780475616455, "learning_rate": 2.8283305472781453e-05, "loss": 0.1859, "step": 7790 }, { "epoch": 0.2834508321825714, "grad_norm": 40.28166961669922, "learning_rate": 2.8319645323061272e-05, "loss": 0.6588, "step": 7800 }, { "epoch": 0.2834508321825714, "eval_loss": 0.42970865964889526, "eval_runtime": 180.6321, "eval_samples_per_second": 41.045, "eval_steps_per_second": 5.132, "eval_wer": 0.2413455080145951, "step": 7800 }, { "epoch": 0.2838142306853696, "grad_norm": 1.748349666595459, "learning_rate": 2.8355985173341088e-05, "loss": 0.1786, "step": 7810 }, { "epoch": 0.28417762918816775, "grad_norm": 2.1137237548828125, "learning_rate": 2.8392325023620904e-05, "loss": 0.1803, "step": 7820 }, { "epoch": 0.2845410276909659, "grad_norm": 1.59931218624115, "learning_rate": 2.8428664873900723e-05, "loss": 0.2107, "step": 7830 }, { "epoch": 0.2849044261937641, "grad_norm": 2.263493061065674, "learning_rate": 2.846500472418054e-05, "loss": 0.1967, "step": 7840 }, { "epoch": 0.28526782469656226, "grad_norm": 20.798656463623047, "learning_rate": 2.8501344574460355e-05, "loss": 0.268, "step": 7850 }, { "epoch": 0.2856312231993604, "grad_norm": 3.0182480812072754, "learning_rate": 2.8537684424740168e-05, "loss": 0.1901, "step": 7860 }, { "epoch": 0.2859946217021586, "grad_norm": 6.6378493309021, "learning_rate": 2.857402427501999e-05, "loss": 0.1804, "step": 7870 }, { "epoch": 0.28635802020495676, "grad_norm": 2.5524067878723145, "learning_rate": 2.8610364125299803e-05, "loss": 0.233, "step": 7880 }, { "epoch": 0.2867214187077549, "grad_norm": 2.6409335136413574, "learning_rate": 2.864670397557962e-05, "loss": 0.1717, "step": 7890 }, { "epoch": 0.28708481721055307, "grad_norm": 6.834221363067627, "learning_rate": 2.868304382585944e-05, "loss": 0.2956, "step": 7900 }, { "epoch": 0.2874482157133513, "grad_norm": 2.760669708251953, "learning_rate": 2.8719383676139254e-05, "loss": 0.1789, "step": 7910 }, { "epoch": 0.2878116142161494, "grad_norm": 1.7543925046920776, "learning_rate": 2.875572352641907e-05, "loss": 0.2041, "step": 7920 }, { "epoch": 0.2881750127189476, "grad_norm": 4.784151077270508, "learning_rate": 2.879206337669889e-05, "loss": 0.2259, "step": 7930 }, { "epoch": 0.2885384112217458, "grad_norm": 2.1769356727600098, "learning_rate": 2.8828403226978706e-05, "loss": 0.2023, "step": 7940 }, { "epoch": 0.28890180972454393, "grad_norm": 9.373051643371582, "learning_rate": 2.886474307725852e-05, "loss": 0.3511, "step": 7950 }, { "epoch": 0.2892652082273421, "grad_norm": 1.895190715789795, "learning_rate": 2.890108292753834e-05, "loss": 0.1976, "step": 7960 }, { "epoch": 0.2896286067301403, "grad_norm": 3.4400076866149902, "learning_rate": 2.8937422777818157e-05, "loss": 0.1902, "step": 7970 }, { "epoch": 0.28999200523293844, "grad_norm": 9.663911819458008, "learning_rate": 2.8973762628097973e-05, "loss": 0.2551, "step": 7980 }, { "epoch": 0.2903554037357366, "grad_norm": 5.1054463386535645, "learning_rate": 2.9010102478377792e-05, "loss": 0.2001, "step": 7990 }, { "epoch": 0.2907188022385348, "grad_norm": 9.06143569946289, "learning_rate": 2.9046442328657608e-05, "loss": 0.2266, "step": 8000 }, { "epoch": 0.29108220074133295, "grad_norm": 1.604077696800232, "learning_rate": 2.9082782178937424e-05, "loss": 0.1883, "step": 8010 }, { "epoch": 0.2914455992441311, "grad_norm": 2.245687246322632, "learning_rate": 2.911912202921724e-05, "loss": 0.2093, "step": 8020 }, { "epoch": 0.29180899774692926, "grad_norm": 3.8099372386932373, "learning_rate": 2.915546187949706e-05, "loss": 0.2283, "step": 8030 }, { "epoch": 0.29217239624972746, "grad_norm": 2.135115623474121, "learning_rate": 2.9191801729776875e-05, "loss": 0.2369, "step": 8040 }, { "epoch": 0.2925357947525256, "grad_norm": 5.596993446350098, "learning_rate": 2.9228141580056688e-05, "loss": 0.2709, "step": 8050 }, { "epoch": 0.29289919325532376, "grad_norm": 1.3212496042251587, "learning_rate": 2.926448143033651e-05, "loss": 0.1968, "step": 8060 }, { "epoch": 0.29326259175812197, "grad_norm": 1.9241231679916382, "learning_rate": 2.9300821280616327e-05, "loss": 0.3883, "step": 8070 }, { "epoch": 0.2936259902609201, "grad_norm": 4.008016109466553, "learning_rate": 2.933716113089614e-05, "loss": 0.2074, "step": 8080 }, { "epoch": 0.2939893887637183, "grad_norm": 1.5871399641036987, "learning_rate": 2.9373500981175962e-05, "loss": 0.1698, "step": 8090 }, { "epoch": 0.2943527872665165, "grad_norm": 19.480670928955078, "learning_rate": 2.9409840831455774e-05, "loss": 0.4023, "step": 8100 }, { "epoch": 0.29471618576931463, "grad_norm": 3.8420443534851074, "learning_rate": 2.944618068173559e-05, "loss": 0.181, "step": 8110 }, { "epoch": 0.2950795842721128, "grad_norm": 1.9951499700546265, "learning_rate": 2.9482520532015413e-05, "loss": 0.2872, "step": 8120 }, { "epoch": 0.295442982774911, "grad_norm": 4.958978176116943, "learning_rate": 2.9518860382295226e-05, "loss": 0.2359, "step": 8130 }, { "epoch": 0.29580638127770914, "grad_norm": 1.5531708002090454, "learning_rate": 2.9555200232575042e-05, "loss": 0.2138, "step": 8140 }, { "epoch": 0.2961697797805073, "grad_norm": 5.297884941101074, "learning_rate": 2.9591540082854864e-05, "loss": 0.2694, "step": 8150 }, { "epoch": 0.2965331782833055, "grad_norm": 1.5989892482757568, "learning_rate": 2.9627879933134677e-05, "loss": 0.1686, "step": 8160 }, { "epoch": 0.29689657678610365, "grad_norm": 3.347722291946411, "learning_rate": 2.9664219783414493e-05, "loss": 0.2206, "step": 8170 }, { "epoch": 0.2972599752889018, "grad_norm": 2.9551491737365723, "learning_rate": 2.970055963369431e-05, "loss": 0.2274, "step": 8180 }, { "epoch": 0.29762337379169995, "grad_norm": 2.527963638305664, "learning_rate": 2.973689948397413e-05, "loss": 0.1731, "step": 8190 }, { "epoch": 0.29798677229449816, "grad_norm": 5.818012714385986, "learning_rate": 2.9773239334253944e-05, "loss": 0.265, "step": 8200 }, { "epoch": 0.2983501707972963, "grad_norm": 1.5580624341964722, "learning_rate": 2.980594519950578e-05, "loss": 2.627, "step": 8210 }, { "epoch": 0.29871356930009446, "grad_norm": 1.6011282205581665, "learning_rate": 2.9842285049785594e-05, "loss": 0.1811, "step": 8220 }, { "epoch": 0.29907696780289267, "grad_norm": 44.825157165527344, "learning_rate": 2.987862490006541e-05, "loss": 0.4799, "step": 8230 }, { "epoch": 0.2994403663056908, "grad_norm": 1.520982027053833, "learning_rate": 2.991496475034523e-05, "loss": 0.1935, "step": 8240 }, { "epoch": 0.29980376480848897, "grad_norm": 6.3379058837890625, "learning_rate": 2.9951304600625046e-05, "loss": 0.2435, "step": 8250 }, { "epoch": 0.3001671633112872, "grad_norm": 2.2493958473205566, "learning_rate": 2.998764445090486e-05, "loss": 0.1984, "step": 8260 }, { "epoch": 0.30053056181408533, "grad_norm": 3.234196186065674, "learning_rate": 3.002398430118468e-05, "loss": 0.1785, "step": 8270 }, { "epoch": 0.3008939603168835, "grad_norm": 4.99449348449707, "learning_rate": 3.0060324151464497e-05, "loss": 0.1888, "step": 8280 }, { "epoch": 0.3012573588196817, "grad_norm": 1.8624048233032227, "learning_rate": 3.0096664001744313e-05, "loss": 1.6561, "step": 8290 }, { "epoch": 0.30162075732247984, "grad_norm": 7.615640640258789, "learning_rate": 3.0133003852024132e-05, "loss": 0.2918, "step": 8300 }, { "epoch": 0.301984155825278, "grad_norm": 1.6900697946548462, "learning_rate": 3.0169343702303948e-05, "loss": 0.2255, "step": 8310 }, { "epoch": 0.3023475543280762, "grad_norm": 2.2034566402435303, "learning_rate": 3.0205683552583764e-05, "loss": 0.198, "step": 8320 }, { "epoch": 0.30271095283087435, "grad_norm": 2.044597625732422, "learning_rate": 3.0242023402863583e-05, "loss": 0.1946, "step": 8330 }, { "epoch": 0.3030743513336725, "grad_norm": 1.6171079874038696, "learning_rate": 3.02783632531434e-05, "loss": 0.1935, "step": 8340 }, { "epoch": 0.30343774983647065, "grad_norm": 2.8435897827148438, "learning_rate": 3.0314703103423215e-05, "loss": 0.3876, "step": 8350 }, { "epoch": 0.30380114833926886, "grad_norm": 2.023019552230835, "learning_rate": 3.0351042953703035e-05, "loss": 0.1879, "step": 8360 }, { "epoch": 0.304164546842067, "grad_norm": 1.7610963582992554, "learning_rate": 3.038738280398285e-05, "loss": 0.1901, "step": 8370 }, { "epoch": 0.30452794534486516, "grad_norm": 1.9482131004333496, "learning_rate": 3.0423722654262667e-05, "loss": 0.2119, "step": 8380 }, { "epoch": 0.30489134384766337, "grad_norm": 1.6463958024978638, "learning_rate": 3.046006250454248e-05, "loss": 0.2067, "step": 8390 }, { "epoch": 0.3052547423504615, "grad_norm": 10.607688903808594, "learning_rate": 3.0496402354822302e-05, "loss": 0.2709, "step": 8400 }, { "epoch": 0.3052547423504615, "eval_loss": 0.3912598192691803, "eval_runtime": 179.9461, "eval_samples_per_second": 41.201, "eval_steps_per_second": 5.152, "eval_wer": 0.22865648882676493, "step": 8400 }, { "epoch": 0.30561814085325967, "grad_norm": 5.675121307373047, "learning_rate": 3.053274220510212e-05, "loss": 0.1937, "step": 8410 }, { "epoch": 0.3059815393560579, "grad_norm": 1.9001195430755615, "learning_rate": 3.056908205538193e-05, "loss": 0.1668, "step": 8420 }, { "epoch": 0.306344937858856, "grad_norm": 6.807525157928467, "learning_rate": 3.060542190566175e-05, "loss": 0.2077, "step": 8430 }, { "epoch": 0.3067083363616542, "grad_norm": 2.067265272140503, "learning_rate": 3.064176175594157e-05, "loss": 0.1596, "step": 8440 }, { "epoch": 0.3070717348644524, "grad_norm": 15.267791748046875, "learning_rate": 3.067810160622138e-05, "loss": 0.2667, "step": 8450 }, { "epoch": 0.30743513336725053, "grad_norm": 1.367903709411621, "learning_rate": 3.07144414565012e-05, "loss": 0.1819, "step": 8460 }, { "epoch": 0.3077985318700487, "grad_norm": 1.531816840171814, "learning_rate": 3.075078130678102e-05, "loss": 0.1681, "step": 8470 }, { "epoch": 0.30816193037284684, "grad_norm": 3.668304204940796, "learning_rate": 3.078712115706083e-05, "loss": 0.2488, "step": 8480 }, { "epoch": 0.30852532887564504, "grad_norm": 2.2622220516204834, "learning_rate": 3.082346100734065e-05, "loss": 0.1866, "step": 8490 }, { "epoch": 0.3088887273784432, "grad_norm": 6.450117111206055, "learning_rate": 3.085980085762047e-05, "loss": 0.2676, "step": 8500 }, { "epoch": 0.30925212588124135, "grad_norm": 2.096731424331665, "learning_rate": 3.0896140707900284e-05, "loss": 0.1952, "step": 8510 }, { "epoch": 0.30961552438403955, "grad_norm": 1.3809120655059814, "learning_rate": 3.09324805581801e-05, "loss": 0.3478, "step": 8520 }, { "epoch": 0.3099789228868377, "grad_norm": 4.2257585525512695, "learning_rate": 3.096882040845992e-05, "loss": 0.2126, "step": 8530 }, { "epoch": 0.31034232138963586, "grad_norm": 2.8543758392333984, "learning_rate": 3.1005160258739736e-05, "loss": 0.8169, "step": 8540 }, { "epoch": 0.31070571989243406, "grad_norm": 5.897162437438965, "learning_rate": 3.104150010901955e-05, "loss": 0.2421, "step": 8550 }, { "epoch": 0.3110691183952322, "grad_norm": 1.8980865478515625, "learning_rate": 3.107783995929937e-05, "loss": 0.193, "step": 8560 }, { "epoch": 0.31143251689803036, "grad_norm": 2.113833427429199, "learning_rate": 3.111417980957919e-05, "loss": 0.1553, "step": 8570 }, { "epoch": 0.31179591540082857, "grad_norm": 2.7569572925567627, "learning_rate": 3.1150519659859e-05, "loss": 0.2003, "step": 8580 }, { "epoch": 0.3121593139036267, "grad_norm": 2.480473756790161, "learning_rate": 3.118685951013882e-05, "loss": 0.2173, "step": 8590 }, { "epoch": 0.3125227124064249, "grad_norm": 12.174234390258789, "learning_rate": 3.122319936041864e-05, "loss": 0.3081, "step": 8600 }, { "epoch": 0.3128861109092231, "grad_norm": 2.8075544834136963, "learning_rate": 3.125953921069845e-05, "loss": 0.263, "step": 8610 }, { "epoch": 0.31324950941202123, "grad_norm": 16.535009384155273, "learning_rate": 3.129587906097827e-05, "loss": 0.1968, "step": 8620 }, { "epoch": 0.3136129079148194, "grad_norm": 6.4783711433410645, "learning_rate": 3.133221891125809e-05, "loss": 0.2396, "step": 8630 }, { "epoch": 0.31397630641761753, "grad_norm": 0.945353090763092, "learning_rate": 3.13685587615379e-05, "loss": 0.1623, "step": 8640 }, { "epoch": 0.31433970492041574, "grad_norm": 7.135663032531738, "learning_rate": 3.140489861181772e-05, "loss": 0.3006, "step": 8650 }, { "epoch": 0.3147031034232139, "grad_norm": 1.275896430015564, "learning_rate": 3.144123846209754e-05, "loss": 0.1845, "step": 8660 }, { "epoch": 0.31506650192601204, "grad_norm": 2.1660525798797607, "learning_rate": 3.147757831237735e-05, "loss": 0.1614, "step": 8670 }, { "epoch": 0.31542990042881025, "grad_norm": 3.878882646560669, "learning_rate": 3.1513918162657166e-05, "loss": 0.2124, "step": 8680 }, { "epoch": 0.3157932989316084, "grad_norm": 3.452864170074463, "learning_rate": 3.155025801293699e-05, "loss": 0.1659, "step": 8690 }, { "epoch": 0.31615669743440655, "grad_norm": 4.0493292808532715, "learning_rate": 3.1586597863216805e-05, "loss": 0.2653, "step": 8700 }, { "epoch": 0.31652009593720476, "grad_norm": 1.9184757471084595, "learning_rate": 3.162293771349662e-05, "loss": 0.2043, "step": 8710 }, { "epoch": 0.3168834944400029, "grad_norm": 4.22302770614624, "learning_rate": 3.165927756377644e-05, "loss": 0.2005, "step": 8720 }, { "epoch": 0.31724689294280106, "grad_norm": 8.557464599609375, "learning_rate": 3.1695617414056256e-05, "loss": 0.2135, "step": 8730 }, { "epoch": 0.31761029144559927, "grad_norm": 1.6090949773788452, "learning_rate": 3.173195726433607e-05, "loss": 0.1565, "step": 8740 }, { "epoch": 0.3179736899483974, "grad_norm": 35.859737396240234, "learning_rate": 3.1768297114615894e-05, "loss": 0.3239, "step": 8750 }, { "epoch": 0.31833708845119557, "grad_norm": 2.837944507598877, "learning_rate": 3.180463696489571e-05, "loss": 0.1902, "step": 8760 }, { "epoch": 0.3187004869539937, "grad_norm": 1.6548888683319092, "learning_rate": 3.184097681517552e-05, "loss": 0.1732, "step": 8770 }, { "epoch": 0.31906388545679193, "grad_norm": 3.840034246444702, "learning_rate": 3.187731666545534e-05, "loss": 0.2318, "step": 8780 }, { "epoch": 0.3194272839595901, "grad_norm": 3.3684277534484863, "learning_rate": 3.191365651573516e-05, "loss": 0.1794, "step": 8790 }, { "epoch": 0.31979068246238823, "grad_norm": 8.668655395507812, "learning_rate": 3.194999636601497e-05, "loss": 0.2745, "step": 8800 }, { "epoch": 0.32015408096518644, "grad_norm": 1.412441611289978, "learning_rate": 3.198633621629479e-05, "loss": 0.1913, "step": 8810 }, { "epoch": 0.3205174794679846, "grad_norm": 1.6273925304412842, "learning_rate": 3.202267606657461e-05, "loss": 0.1905, "step": 8820 }, { "epoch": 0.32088087797078274, "grad_norm": 5.704558372497559, "learning_rate": 3.205901591685442e-05, "loss": 0.2217, "step": 8830 }, { "epoch": 0.32124427647358095, "grad_norm": 2.248072385787964, "learning_rate": 3.209535576713424e-05, "loss": 0.1752, "step": 8840 }, { "epoch": 0.3216076749763791, "grad_norm": 8.330979347229004, "learning_rate": 3.213169561741406e-05, "loss": 0.2693, "step": 8850 }, { "epoch": 0.32197107347917725, "grad_norm": 6.713444709777832, "learning_rate": 3.2168035467693873e-05, "loss": 0.1821, "step": 8860 }, { "epoch": 0.32233447198197546, "grad_norm": 1.7717983722686768, "learning_rate": 3.220437531797369e-05, "loss": 0.1572, "step": 8870 }, { "epoch": 0.3226978704847736, "grad_norm": 3.8419570922851562, "learning_rate": 3.224071516825351e-05, "loss": 0.2168, "step": 8880 }, { "epoch": 0.32306126898757176, "grad_norm": 1.8515948057174683, "learning_rate": 3.2277055018533325e-05, "loss": 0.1474, "step": 8890 }, { "epoch": 0.32342466749036997, "grad_norm": 12.963587760925293, "learning_rate": 3.231339486881314e-05, "loss": 0.2349, "step": 8900 }, { "epoch": 0.3237880659931681, "grad_norm": 1.078845500946045, "learning_rate": 3.2349734719092963e-05, "loss": 0.1968, "step": 8910 }, { "epoch": 0.32415146449596627, "grad_norm": 1.5369044542312622, "learning_rate": 3.2386074569372776e-05, "loss": 0.1681, "step": 8920 }, { "epoch": 0.3245148629987644, "grad_norm": 3.8013484477996826, "learning_rate": 3.242241441965259e-05, "loss": 0.2214, "step": 8930 }, { "epoch": 0.3248782615015626, "grad_norm": 2.0259406566619873, "learning_rate": 3.2458754269932415e-05, "loss": 0.4227, "step": 8940 }, { "epoch": 0.3252416600043608, "grad_norm": 6.423609256744385, "learning_rate": 3.249509412021223e-05, "loss": 0.2835, "step": 8950 }, { "epoch": 0.32560505850715893, "grad_norm": 2.363159656524658, "learning_rate": 3.253143397049204e-05, "loss": 0.2038, "step": 8960 }, { "epoch": 0.32596845700995714, "grad_norm": 2.4034435749053955, "learning_rate": 3.256777382077186e-05, "loss": 0.1907, "step": 8970 }, { "epoch": 0.3263318555127553, "grad_norm": 4.032980442047119, "learning_rate": 3.260411367105168e-05, "loss": 0.1973, "step": 8980 }, { "epoch": 0.32669525401555344, "grad_norm": 6.102022647857666, "learning_rate": 3.264045352133149e-05, "loss": 0.197, "step": 8990 }, { "epoch": 0.32705865251835164, "grad_norm": 35.67893981933594, "learning_rate": 3.267679337161131e-05, "loss": 0.2682, "step": 9000 }, { "epoch": 0.32705865251835164, "eval_loss": 0.40712428092956543, "eval_runtime": 179.2194, "eval_samples_per_second": 41.368, "eval_steps_per_second": 5.172, "eval_wer": 0.226941020567466, "step": 9000 }, { "epoch": 0.3274220510211498, "grad_norm": 1.8014717102050781, "learning_rate": 3.271313322189113e-05, "loss": 0.1591, "step": 9010 }, { "epoch": 0.32778544952394795, "grad_norm": 1.7404965162277222, "learning_rate": 3.274947307217094e-05, "loss": 0.17, "step": 9020 }, { "epoch": 0.32814884802674615, "grad_norm": 3.7020771503448486, "learning_rate": 3.278581292245076e-05, "loss": 0.2225, "step": 9030 }, { "epoch": 0.3285122465295443, "grad_norm": 1.045998454093933, "learning_rate": 3.282215277273058e-05, "loss": 0.1681, "step": 9040 }, { "epoch": 0.32887564503234246, "grad_norm": 5.282716751098633, "learning_rate": 3.2858492623010394e-05, "loss": 0.2856, "step": 9050 }, { "epoch": 0.3292390435351406, "grad_norm": 3.3956387042999268, "learning_rate": 3.289483247329021e-05, "loss": 0.1782, "step": 9060 }, { "epoch": 0.3296024420379388, "grad_norm": 1.855603575706482, "learning_rate": 3.293117232357003e-05, "loss": 0.1582, "step": 9070 }, { "epoch": 0.32996584054073697, "grad_norm": 7.214013576507568, "learning_rate": 3.2967512173849845e-05, "loss": 0.1691, "step": 9080 }, { "epoch": 0.3303292390435351, "grad_norm": 3.140125036239624, "learning_rate": 3.3003852024129664e-05, "loss": 0.1872, "step": 9090 }, { "epoch": 0.3306926375463333, "grad_norm": 17.094255447387695, "learning_rate": 3.304019187440948e-05, "loss": 0.2848, "step": 9100 }, { "epoch": 0.3310560360491315, "grad_norm": 1.9439010620117188, "learning_rate": 3.3076531724689296e-05, "loss": 0.1625, "step": 9110 }, { "epoch": 0.3314194345519296, "grad_norm": 1.609747290611267, "learning_rate": 3.311287157496911e-05, "loss": 0.1915, "step": 9120 }, { "epoch": 0.33178283305472783, "grad_norm": 4.03629207611084, "learning_rate": 3.314921142524893e-05, "loss": 0.2291, "step": 9130 }, { "epoch": 0.332146231557526, "grad_norm": 1.9643129110336304, "learning_rate": 3.318555127552875e-05, "loss": 0.1747, "step": 9140 }, { "epoch": 0.33250963006032414, "grad_norm": 9.304847717285156, "learning_rate": 3.322189112580856e-05, "loss": 0.2539, "step": 9150 }, { "epoch": 0.33287302856312234, "grad_norm": 1.991467833518982, "learning_rate": 3.325823097608838e-05, "loss": 3.61, "step": 9160 }, { "epoch": 0.3332364270659205, "grad_norm": 2.7127187252044678, "learning_rate": 3.32945708263682e-05, "loss": 0.1985, "step": 9170 }, { "epoch": 0.33359982556871864, "grad_norm": 2.831299304962158, "learning_rate": 3.333091067664801e-05, "loss": 1.7334, "step": 9180 }, { "epoch": 0.33396322407151685, "grad_norm": 1.5434614419937134, "learning_rate": 3.336725052692783e-05, "loss": 0.1718, "step": 9190 }, { "epoch": 0.334326622574315, "grad_norm": 10.254124641418457, "learning_rate": 3.340359037720765e-05, "loss": 0.3246, "step": 9200 }, { "epoch": 0.33469002107711315, "grad_norm": 1.169886589050293, "learning_rate": 3.343993022748746e-05, "loss": 0.1936, "step": 9210 }, { "epoch": 0.3350534195799113, "grad_norm": 3.697627544403076, "learning_rate": 3.347627007776728e-05, "loss": 0.205, "step": 9220 }, { "epoch": 0.3354168180827095, "grad_norm": 3.15781307220459, "learning_rate": 3.35126099280471e-05, "loss": 0.2222, "step": 9230 }, { "epoch": 0.33578021658550766, "grad_norm": 1.903701663017273, "learning_rate": 3.3548949778326914e-05, "loss": 0.1611, "step": 9240 }, { "epoch": 0.3361436150883058, "grad_norm": 26.77275848388672, "learning_rate": 3.358528962860673e-05, "loss": 0.2872, "step": 9250 }, { "epoch": 0.336507013591104, "grad_norm": 1.588224172592163, "learning_rate": 3.3621629478886546e-05, "loss": 3.404, "step": 9260 }, { "epoch": 0.33687041209390217, "grad_norm": 1.8802090883255005, "learning_rate": 3.3657969329166365e-05, "loss": 0.1715, "step": 9270 }, { "epoch": 0.3372338105967003, "grad_norm": 5.38352632522583, "learning_rate": 3.3694309179446185e-05, "loss": 0.1906, "step": 9280 }, { "epoch": 0.33759720909949853, "grad_norm": 1.736177921295166, "learning_rate": 3.3730649029726e-05, "loss": 0.1881, "step": 9290 }, { "epoch": 0.3379606076022967, "grad_norm": 17.865558624267578, "learning_rate": 3.3766988880005816e-05, "loss": 0.3003, "step": 9300 }, { "epoch": 0.33832400610509483, "grad_norm": 1.532173991203308, "learning_rate": 3.3803328730285636e-05, "loss": 0.188, "step": 9310 }, { "epoch": 0.33868740460789304, "grad_norm": 3.8595352172851562, "learning_rate": 3.383966858056545e-05, "loss": 0.1869, "step": 9320 }, { "epoch": 0.3390508031106912, "grad_norm": 2.5906641483306885, "learning_rate": 3.387600843084527e-05, "loss": 0.1993, "step": 9330 }, { "epoch": 0.33941420161348934, "grad_norm": 2.5224273204803467, "learning_rate": 3.391234828112508e-05, "loss": 0.1935, "step": 9340 }, { "epoch": 0.33977760011628755, "grad_norm": 11.555095672607422, "learning_rate": 3.39486881314049e-05, "loss": 0.2891, "step": 9350 }, { "epoch": 0.3401409986190857, "grad_norm": 1.3724703788757324, "learning_rate": 3.398502798168472e-05, "loss": 0.1656, "step": 9360 }, { "epoch": 0.34050439712188385, "grad_norm": 2.1549072265625, "learning_rate": 3.402136783196453e-05, "loss": 0.1769, "step": 9370 }, { "epoch": 0.340867795624682, "grad_norm": 1.793492317199707, "learning_rate": 3.405770768224435e-05, "loss": 0.2661, "step": 9380 }, { "epoch": 0.3412311941274802, "grad_norm": 4.038620948791504, "learning_rate": 3.409404753252417e-05, "loss": 0.1871, "step": 9390 }, { "epoch": 0.34159459263027836, "grad_norm": 31.7847900390625, "learning_rate": 3.413038738280398e-05, "loss": 0.2967, "step": 9400 }, { "epoch": 0.3419579911330765, "grad_norm": 2.398646354675293, "learning_rate": 3.41667272330838e-05, "loss": 0.2086, "step": 9410 }, { "epoch": 0.3423213896358747, "grad_norm": 2.2226221561431885, "learning_rate": 3.4203067083363615e-05, "loss": 0.1665, "step": 9420 }, { "epoch": 0.34268478813867287, "grad_norm": 39.96380615234375, "learning_rate": 3.4239406933643434e-05, "loss": 0.9468, "step": 9430 }, { "epoch": 0.343048186641471, "grad_norm": 1.5465339422225952, "learning_rate": 3.4275746783923254e-05, "loss": 0.1827, "step": 9440 }, { "epoch": 0.3434115851442692, "grad_norm": 7.941345691680908, "learning_rate": 3.4312086634203066e-05, "loss": 0.2786, "step": 9450 }, { "epoch": 0.3437749836470674, "grad_norm": 1.2575476169586182, "learning_rate": 3.4348426484482885e-05, "loss": 0.1764, "step": 9460 }, { "epoch": 0.34413838214986553, "grad_norm": 1.3529596328735352, "learning_rate": 3.4384766334762705e-05, "loss": 0.207, "step": 9470 }, { "epoch": 0.34450178065266374, "grad_norm": 3.2839174270629883, "learning_rate": 3.442110618504252e-05, "loss": 0.2672, "step": 9480 }, { "epoch": 0.3448651791554619, "grad_norm": 3.246384859085083, "learning_rate": 3.445744603532234e-05, "loss": 0.1906, "step": 9490 }, { "epoch": 0.34522857765826004, "grad_norm": 2.595038652420044, "learning_rate": 3.4493785885602156e-05, "loss": 0.2441, "step": 9500 }, { "epoch": 0.3455919761610582, "grad_norm": 1.3803220987319946, "learning_rate": 3.453012573588197e-05, "loss": 0.1745, "step": 9510 }, { "epoch": 0.3459553746638564, "grad_norm": 1.2091724872589111, "learning_rate": 3.456646558616179e-05, "loss": 0.1441, "step": 9520 }, { "epoch": 0.34631877316665455, "grad_norm": 6.582603931427002, "learning_rate": 3.460280543644161e-05, "loss": 0.1835, "step": 9530 }, { "epoch": 0.3466821716694527, "grad_norm": 2.6845383644104004, "learning_rate": 3.463914528672142e-05, "loss": 0.2048, "step": 9540 }, { "epoch": 0.3470455701722509, "grad_norm": 11.775678634643555, "learning_rate": 3.467548513700123e-05, "loss": 0.2841, "step": 9550 }, { "epoch": 0.34740896867504906, "grad_norm": 2.256279706954956, "learning_rate": 3.471182498728106e-05, "loss": 0.6472, "step": 9560 }, { "epoch": 0.3477723671778472, "grad_norm": 1.4487576484680176, "learning_rate": 3.474816483756087e-05, "loss": 0.2722, "step": 9570 }, { "epoch": 0.3481357656806454, "grad_norm": 3.843964099884033, "learning_rate": 3.4784504687840684e-05, "loss": 0.1855, "step": 9580 }, { "epoch": 0.34849916418344357, "grad_norm": 1.5561772584915161, "learning_rate": 3.48208445381205e-05, "loss": 0.1908, "step": 9590 }, { "epoch": 0.3488625626862417, "grad_norm": 3.757232666015625, "learning_rate": 3.485718438840032e-05, "loss": 0.2198, "step": 9600 }, { "epoch": 0.3488625626862417, "eval_loss": 0.3895765244960785, "eval_runtime": 179.7435, "eval_samples_per_second": 41.248, "eval_steps_per_second": 5.157, "eval_wer": 0.21512335033674007, "step": 9600 }, { "epoch": 0.3492259611890399, "grad_norm": 1.3912307024002075, "learning_rate": 3.4893524238680135e-05, "loss": 0.1616, "step": 9610 }, { "epoch": 0.3495893596918381, "grad_norm": 2.4036080837249756, "learning_rate": 3.4929864088959954e-05, "loss": 0.1579, "step": 9620 }, { "epoch": 0.3499527581946362, "grad_norm": 2.611175537109375, "learning_rate": 3.4966203939239774e-05, "loss": 0.1746, "step": 9630 }, { "epoch": 0.35031615669743443, "grad_norm": 1.4045140743255615, "learning_rate": 3.5002543789519586e-05, "loss": 0.1594, "step": 9640 }, { "epoch": 0.3506795552002326, "grad_norm": 12.708057403564453, "learning_rate": 3.5038883639799406e-05, "loss": 0.3118, "step": 9650 }, { "epoch": 0.35104295370303074, "grad_norm": 3.0364696979522705, "learning_rate": 3.5075223490079225e-05, "loss": 0.3062, "step": 9660 }, { "epoch": 0.3514063522058289, "grad_norm": 1.4527848958969116, "learning_rate": 3.511156334035904e-05, "loss": 0.1603, "step": 9670 }, { "epoch": 0.3517697507086271, "grad_norm": 5.697939395904541, "learning_rate": 3.514790319063886e-05, "loss": 0.2069, "step": 9680 }, { "epoch": 0.35213314921142524, "grad_norm": 2.1645712852478027, "learning_rate": 3.5184243040918676e-05, "loss": 0.162, "step": 9690 }, { "epoch": 0.3524965477142234, "grad_norm": 8.024601936340332, "learning_rate": 3.522058289119849e-05, "loss": 0.898, "step": 9700 }, { "epoch": 0.3528599462170216, "grad_norm": 1.4516103267669678, "learning_rate": 3.52569227414783e-05, "loss": 0.189, "step": 9710 }, { "epoch": 0.35322334471981975, "grad_norm": 1.0467925071716309, "learning_rate": 3.529326259175813e-05, "loss": 0.1547, "step": 9720 }, { "epoch": 0.3535867432226179, "grad_norm": 3.9237303733825684, "learning_rate": 3.532960244203794e-05, "loss": 0.1968, "step": 9730 }, { "epoch": 0.3539501417254161, "grad_norm": 2.502257823944092, "learning_rate": 3.536594229231775e-05, "loss": 0.1645, "step": 9740 }, { "epoch": 0.35431354022821426, "grad_norm": 30.662227630615234, "learning_rate": 3.540228214259758e-05, "loss": 0.2847, "step": 9750 }, { "epoch": 0.3546769387310124, "grad_norm": 1.7106624841690063, "learning_rate": 3.543862199287739e-05, "loss": 0.1951, "step": 9760 }, { "epoch": 0.3550403372338106, "grad_norm": 2.169036865234375, "learning_rate": 3.5474961843157204e-05, "loss": 0.172, "step": 9770 }, { "epoch": 0.3554037357366088, "grad_norm": 6.116454124450684, "learning_rate": 3.551130169343703e-05, "loss": 0.1934, "step": 9780 }, { "epoch": 0.3557671342394069, "grad_norm": 1.8530545234680176, "learning_rate": 3.554764154371684e-05, "loss": 0.217, "step": 9790 }, { "epoch": 0.3561305327422051, "grad_norm": 11.060449600219727, "learning_rate": 3.5583981393996655e-05, "loss": 0.2145, "step": 9800 }, { "epoch": 0.3564939312450033, "grad_norm": 7.748067378997803, "learning_rate": 3.5620321244276475e-05, "loss": 0.2114, "step": 9810 }, { "epoch": 0.35685732974780143, "grad_norm": 3.562528610229492, "learning_rate": 3.5656661094556294e-05, "loss": 0.221, "step": 9820 }, { "epoch": 0.3572207282505996, "grad_norm": 2.798417091369629, "learning_rate": 3.5693000944836107e-05, "loss": 0.2071, "step": 9830 }, { "epoch": 0.3575841267533978, "grad_norm": 2.3908724784851074, "learning_rate": 3.5729340795115926e-05, "loss": 0.1678, "step": 9840 }, { "epoch": 0.35794752525619594, "grad_norm": 7.205004692077637, "learning_rate": 3.5765680645395745e-05, "loss": 0.2953, "step": 9850 }, { "epoch": 0.3583109237589941, "grad_norm": 2.5064749717712402, "learning_rate": 3.580202049567556e-05, "loss": 0.197, "step": 9860 }, { "epoch": 0.3586743222617923, "grad_norm": 2.0985934734344482, "learning_rate": 3.583836034595538e-05, "loss": 0.1441, "step": 9870 }, { "epoch": 0.35903772076459045, "grad_norm": 5.256442070007324, "learning_rate": 3.5874700196235197e-05, "loss": 0.203, "step": 9880 }, { "epoch": 0.3594011192673886, "grad_norm": 2.3590219020843506, "learning_rate": 3.591104004651501e-05, "loss": 0.1811, "step": 9890 }, { "epoch": 0.3597645177701868, "grad_norm": 24.96747398376465, "learning_rate": 3.594737989679482e-05, "loss": 0.293, "step": 9900 }, { "epoch": 0.36012791627298496, "grad_norm": 1.727751612663269, "learning_rate": 3.598371974707465e-05, "loss": 0.1896, "step": 9910 }, { "epoch": 0.3604913147757831, "grad_norm": 2.349269151687622, "learning_rate": 3.602005959735446e-05, "loss": 0.1649, "step": 9920 }, { "epoch": 0.3608547132785813, "grad_norm": 3.139385223388672, "learning_rate": 3.605639944763427e-05, "loss": 0.2181, "step": 9930 }, { "epoch": 0.36121811178137947, "grad_norm": 2.1249756813049316, "learning_rate": 3.60927392979141e-05, "loss": 0.1751, "step": 9940 }, { "epoch": 0.3615815102841776, "grad_norm": 3.6616756916046143, "learning_rate": 3.612907914819391e-05, "loss": 0.2729, "step": 9950 }, { "epoch": 0.36194490878697577, "grad_norm": 1.367600440979004, "learning_rate": 3.6165418998473724e-05, "loss": 0.1592, "step": 9960 }, { "epoch": 0.362308307289774, "grad_norm": 1.8141239881515503, "learning_rate": 3.620175884875355e-05, "loss": 0.2867, "step": 9970 }, { "epoch": 0.36267170579257213, "grad_norm": 7.0058794021606445, "learning_rate": 3.623809869903336e-05, "loss": 0.207, "step": 9980 }, { "epoch": 0.3630351042953703, "grad_norm": 1.923048734664917, "learning_rate": 3.6274438549313176e-05, "loss": 0.346, "step": 9990 }, { "epoch": 0.3633985027981685, "grad_norm": 16.30779457092285, "learning_rate": 3.6310778399592995e-05, "loss": 0.3107, "step": 10000 }, { "epoch": 0.36376190130096664, "grad_norm": 1.979866862297058, "learning_rate": 3.6347118249872814e-05, "loss": 0.7999, "step": 10010 }, { "epoch": 0.3641252998037648, "grad_norm": 2.7377023696899414, "learning_rate": 3.638345810015263e-05, "loss": 0.2005, "step": 10020 }, { "epoch": 0.364488698306563, "grad_norm": 5.546159744262695, "learning_rate": 3.6419797950432446e-05, "loss": 0.1964, "step": 10030 }, { "epoch": 0.36485209680936115, "grad_norm": 2.2417142391204834, "learning_rate": 3.6456137800712265e-05, "loss": 0.2078, "step": 10040 }, { "epoch": 0.3652154953121593, "grad_norm": 7.2175092697143555, "learning_rate": 3.649247765099208e-05, "loss": 0.291, "step": 10050 }, { "epoch": 0.3655788938149575, "grad_norm": 2.6172754764556885, "learning_rate": 3.65288175012719e-05, "loss": 0.2037, "step": 10060 }, { "epoch": 0.36594229231775566, "grad_norm": 2.0634214878082275, "learning_rate": 3.656515735155172e-05, "loss": 0.1668, "step": 10070 }, { "epoch": 0.3663056908205538, "grad_norm": 3.5431976318359375, "learning_rate": 3.660149720183153e-05, "loss": 0.475, "step": 10080 }, { "epoch": 0.366669089323352, "grad_norm": 2.147472381591797, "learning_rate": 3.663783705211135e-05, "loss": 0.1869, "step": 10090 }, { "epoch": 0.36703248782615017, "grad_norm": 18.726482391357422, "learning_rate": 3.667417690239117e-05, "loss": 0.2773, "step": 10100 }, { "epoch": 0.3673958863289483, "grad_norm": 1.6554090976715088, "learning_rate": 3.671051675267098e-05, "loss": 0.1707, "step": 10110 }, { "epoch": 0.36775928483174647, "grad_norm": 1.8967760801315308, "learning_rate": 3.674685660295079e-05, "loss": 0.2159, "step": 10120 }, { "epoch": 0.3681226833345447, "grad_norm": 2.3765788078308105, "learning_rate": 3.678319645323061e-05, "loss": 0.2229, "step": 10130 }, { "epoch": 0.3684860818373428, "grad_norm": 5.890452861785889, "learning_rate": 3.681953630351043e-05, "loss": 0.195, "step": 10140 }, { "epoch": 0.368849480340141, "grad_norm": 5.045167446136475, "learning_rate": 3.6855876153790244e-05, "loss": 0.3111, "step": 10150 }, { "epoch": 0.3692128788429392, "grad_norm": 2.37107253074646, "learning_rate": 3.6892216004070064e-05, "loss": 0.1942, "step": 10160 }, { "epoch": 0.36957627734573734, "grad_norm": 1.9943170547485352, "learning_rate": 3.692855585434988e-05, "loss": 0.1906, "step": 10170 }, { "epoch": 0.3699396758485355, "grad_norm": 3.16873836517334, "learning_rate": 3.6964895704629696e-05, "loss": 0.1791, "step": 10180 }, { "epoch": 0.3703030743513337, "grad_norm": 15.252134323120117, "learning_rate": 3.7001235554909515e-05, "loss": 0.3702, "step": 10190 }, { "epoch": 0.37066647285413185, "grad_norm": 8.845834732055664, "learning_rate": 3.7037575405189334e-05, "loss": 0.2765, "step": 10200 }, { "epoch": 0.37066647285413185, "eval_loss": 0.4178149104118347, "eval_runtime": 179.6523, "eval_samples_per_second": 41.269, "eval_steps_per_second": 5.16, "eval_wer": 0.2237551509430537, "step": 10200 }, { "epoch": 0.37102987135693, "grad_norm": 6.2689313888549805, "learning_rate": 3.707391525546915e-05, "loss": 0.1922, "step": 10210 }, { "epoch": 0.3713932698597282, "grad_norm": 1.00067138671875, "learning_rate": 3.7110255105748966e-05, "loss": 0.1535, "step": 10220 }, { "epoch": 0.37175666836252635, "grad_norm": 2.6602060794830322, "learning_rate": 3.7146594956028786e-05, "loss": 0.1959, "step": 10230 }, { "epoch": 0.3721200668653245, "grad_norm": 4.743015766143799, "learning_rate": 3.71829348063086e-05, "loss": 0.2058, "step": 10240 }, { "epoch": 0.37248346536812266, "grad_norm": 8.304347038269043, "learning_rate": 3.721927465658842e-05, "loss": 0.3027, "step": 10250 }, { "epoch": 0.37284686387092086, "grad_norm": 1.8180521726608276, "learning_rate": 3.725561450686824e-05, "loss": 0.1708, "step": 10260 }, { "epoch": 0.373210262373719, "grad_norm": 2.05625057220459, "learning_rate": 3.729195435714805e-05, "loss": 0.1824, "step": 10270 }, { "epoch": 0.37357366087651717, "grad_norm": 2.426814317703247, "learning_rate": 3.732829420742787e-05, "loss": 0.197, "step": 10280 }, { "epoch": 0.3739370593793154, "grad_norm": 1.658158540725708, "learning_rate": 3.736463405770768e-05, "loss": 0.1578, "step": 10290 }, { "epoch": 0.3743004578821135, "grad_norm": 10.913407325744629, "learning_rate": 3.74009739079875e-05, "loss": 0.2728, "step": 10300 }, { "epoch": 0.3746638563849117, "grad_norm": 1.6443781852722168, "learning_rate": 3.743731375826732e-05, "loss": 0.1656, "step": 10310 }, { "epoch": 0.3750272548877099, "grad_norm": 1.0702744722366333, "learning_rate": 3.747365360854713e-05, "loss": 0.7132, "step": 10320 }, { "epoch": 0.37539065339050803, "grad_norm": 5.8824052810668945, "learning_rate": 3.750999345882695e-05, "loss": 0.2701, "step": 10330 }, { "epoch": 0.3757540518933062, "grad_norm": 4.373916149139404, "learning_rate": 3.754633330910677e-05, "loss": 0.2053, "step": 10340 }, { "epoch": 0.3761174503961044, "grad_norm": 22.25397300720215, "learning_rate": 3.7582673159386584e-05, "loss": 0.2781, "step": 10350 }, { "epoch": 0.37648084889890254, "grad_norm": 1.8272254467010498, "learning_rate": 3.7619013009666403e-05, "loss": 0.1833, "step": 10360 }, { "epoch": 0.3768442474017007, "grad_norm": 3.286931037902832, "learning_rate": 3.7655352859946216e-05, "loss": 0.1576, "step": 10370 }, { "epoch": 0.3772076459044989, "grad_norm": 5.283690929412842, "learning_rate": 3.7691692710226035e-05, "loss": 0.21, "step": 10380 }, { "epoch": 0.37757104440729705, "grad_norm": 1.184476375579834, "learning_rate": 3.7728032560505855e-05, "loss": 0.2597, "step": 10390 }, { "epoch": 0.3779344429100952, "grad_norm": 5.685116767883301, "learning_rate": 3.776437241078567e-05, "loss": 0.2476, "step": 10400 }, { "epoch": 0.37829784141289335, "grad_norm": 1.1873399019241333, "learning_rate": 3.7800712261065487e-05, "loss": 0.1597, "step": 10410 }, { "epoch": 0.37866123991569156, "grad_norm": 1.6136255264282227, "learning_rate": 3.7837052111345306e-05, "loss": 0.188, "step": 10420 }, { "epoch": 0.3790246384184897, "grad_norm": 4.743179798126221, "learning_rate": 3.787339196162512e-05, "loss": 0.1962, "step": 10430 }, { "epoch": 0.37938803692128786, "grad_norm": 2.603379011154175, "learning_rate": 3.790973181190494e-05, "loss": 0.1854, "step": 10440 }, { "epoch": 0.37975143542408607, "grad_norm": 6.267378807067871, "learning_rate": 3.794607166218475e-05, "loss": 0.2569, "step": 10450 }, { "epoch": 0.3801148339268842, "grad_norm": 5.370235919952393, "learning_rate": 3.798241151246457e-05, "loss": 0.1796, "step": 10460 }, { "epoch": 0.3804782324296824, "grad_norm": 2.170964002609253, "learning_rate": 3.801875136274439e-05, "loss": 0.1713, "step": 10470 }, { "epoch": 0.3808416309324806, "grad_norm": 4.134753704071045, "learning_rate": 3.80550912130242e-05, "loss": 0.2269, "step": 10480 }, { "epoch": 0.38120502943527873, "grad_norm": 2.7026259899139404, "learning_rate": 3.809143106330402e-05, "loss": 0.1938, "step": 10490 }, { "epoch": 0.3815684279380769, "grad_norm": 7.368224143981934, "learning_rate": 3.812777091358384e-05, "loss": 0.2617, "step": 10500 }, { "epoch": 0.3819318264408751, "grad_norm": 1.3194938898086548, "learning_rate": 3.816411076386365e-05, "loss": 0.2066, "step": 10510 }, { "epoch": 0.38229522494367324, "grad_norm": 1.901505470275879, "learning_rate": 3.820045061414347e-05, "loss": 0.1716, "step": 10520 }, { "epoch": 0.3826586234464714, "grad_norm": 3.4045536518096924, "learning_rate": 3.823679046442329e-05, "loss": 0.1625, "step": 10530 }, { "epoch": 0.38302202194926954, "grad_norm": 2.1540184020996094, "learning_rate": 3.8273130314703104e-05, "loss": 0.1829, "step": 10540 }, { "epoch": 0.38338542045206775, "grad_norm": 14.377511024475098, "learning_rate": 3.8309470164982924e-05, "loss": 0.2747, "step": 10550 }, { "epoch": 0.3837488189548659, "grad_norm": 1.9092762470245361, "learning_rate": 3.834581001526274e-05, "loss": 0.1728, "step": 10560 }, { "epoch": 0.38411221745766405, "grad_norm": 1.867458462715149, "learning_rate": 3.8382149865542556e-05, "loss": 0.1752, "step": 10570 }, { "epoch": 0.38447561596046226, "grad_norm": 5.246692657470703, "learning_rate": 3.841848971582237e-05, "loss": 0.1823, "step": 10580 }, { "epoch": 0.3848390144632604, "grad_norm": 2.9294533729553223, "learning_rate": 3.845482956610219e-05, "loss": 0.2052, "step": 10590 }, { "epoch": 0.38520241296605856, "grad_norm": 11.946113586425781, "learning_rate": 3.849116941638201e-05, "loss": 0.309, "step": 10600 }, { "epoch": 0.38556581146885677, "grad_norm": 1.7155182361602783, "learning_rate": 3.852750926666182e-05, "loss": 0.175, "step": 10610 }, { "epoch": 0.3859292099716549, "grad_norm": 1.1520076990127563, "learning_rate": 3.856384911694164e-05, "loss": 0.2129, "step": 10620 }, { "epoch": 0.38629260847445307, "grad_norm": 1.9750351905822754, "learning_rate": 3.860018896722146e-05, "loss": 0.1725, "step": 10630 }, { "epoch": 0.3866560069772513, "grad_norm": 4.309560298919678, "learning_rate": 3.863652881750127e-05, "loss": 0.1516, "step": 10640 }, { "epoch": 0.3870194054800494, "grad_norm": 7.554156303405762, "learning_rate": 3.867286866778109e-05, "loss": 0.3069, "step": 10650 }, { "epoch": 0.3873828039828476, "grad_norm": 3.7965683937072754, "learning_rate": 3.870920851806091e-05, "loss": 0.2014, "step": 10660 }, { "epoch": 0.3877462024856458, "grad_norm": 3.8691935539245605, "learning_rate": 3.874554836834072e-05, "loss": 0.1678, "step": 10670 }, { "epoch": 0.38810960098844394, "grad_norm": 4.144315719604492, "learning_rate": 3.878188821862054e-05, "loss": 0.2936, "step": 10680 }, { "epoch": 0.3884729994912421, "grad_norm": 1.5667825937271118, "learning_rate": 3.881822806890036e-05, "loss": 0.1871, "step": 10690 }, { "epoch": 0.38883639799404024, "grad_norm": 7.6076788902282715, "learning_rate": 3.885456791918017e-05, "loss": 0.2661, "step": 10700 }, { "epoch": 0.38919979649683845, "grad_norm": 1.7828059196472168, "learning_rate": 3.889090776945999e-05, "loss": 0.1808, "step": 10710 }, { "epoch": 0.3895631949996366, "grad_norm": 7.039370059967041, "learning_rate": 3.892724761973981e-05, "loss": 0.2484, "step": 10720 }, { "epoch": 0.38992659350243475, "grad_norm": 2.1001148223876953, "learning_rate": 3.8963587470019625e-05, "loss": 0.1644, "step": 10730 }, { "epoch": 0.39028999200523296, "grad_norm": 0.9235002398490906, "learning_rate": 3.899992732029944e-05, "loss": 0.172, "step": 10740 }, { "epoch": 0.3906533905080311, "grad_norm": 10.066643714904785, "learning_rate": 3.903626717057926e-05, "loss": 0.2999, "step": 10750 }, { "epoch": 0.39101678901082926, "grad_norm": 2.256965160369873, "learning_rate": 3.9072607020859076e-05, "loss": 0.2116, "step": 10760 }, { "epoch": 0.39138018751362746, "grad_norm": 1.742125153541565, "learning_rate": 3.910894687113889e-05, "loss": 0.1838, "step": 10770 }, { "epoch": 0.3917435860164256, "grad_norm": 5.397392749786377, "learning_rate": 3.9145286721418714e-05, "loss": 0.2213, "step": 10780 }, { "epoch": 0.39210698451922377, "grad_norm": 2.439197540283203, "learning_rate": 3.918162657169853e-05, "loss": 0.1984, "step": 10790 }, { "epoch": 0.392470383022022, "grad_norm": 6.7387895584106445, "learning_rate": 3.921796642197834e-05, "loss": 0.2842, "step": 10800 }, { "epoch": 0.392470383022022, "eval_loss": 0.39516785740852356, "eval_runtime": 180.1522, "eval_samples_per_second": 41.154, "eval_steps_per_second": 5.146, "eval_wer": 0.21758309583023216, "step": 10800 }, { "epoch": 0.3928337815248201, "grad_norm": 1.5229130983352661, "learning_rate": 3.925430627225816e-05, "loss": 0.1809, "step": 10810 }, { "epoch": 0.3931971800276183, "grad_norm": 1.6385318040847778, "learning_rate": 3.929064612253798e-05, "loss": 0.155, "step": 10820 }, { "epoch": 0.3935605785304164, "grad_norm": 2.403878927230835, "learning_rate": 3.932698597281779e-05, "loss": 0.2837, "step": 10830 }, { "epoch": 0.39392397703321463, "grad_norm": 2.818368434906006, "learning_rate": 3.936332582309761e-05, "loss": 0.2298, "step": 10840 }, { "epoch": 0.3942873755360128, "grad_norm": 6.08942174911499, "learning_rate": 3.939966567337743e-05, "loss": 0.2262, "step": 10850 }, { "epoch": 0.39465077403881094, "grad_norm": 1.2632570266723633, "learning_rate": 3.943600552365724e-05, "loss": 0.2087, "step": 10860 }, { "epoch": 0.39501417254160914, "grad_norm": 2.2119662761688232, "learning_rate": 3.947234537393706e-05, "loss": 0.1974, "step": 10870 }, { "epoch": 0.3953775710444073, "grad_norm": 2.936021089553833, "learning_rate": 3.950868522421688e-05, "loss": 0.1909, "step": 10880 }, { "epoch": 0.39574096954720545, "grad_norm": 1.3898749351501465, "learning_rate": 3.9545025074496693e-05, "loss": 0.184, "step": 10890 }, { "epoch": 0.39610436805000365, "grad_norm": 9.063791275024414, "learning_rate": 3.958136492477651e-05, "loss": 0.338, "step": 10900 }, { "epoch": 0.3964677665528018, "grad_norm": 1.3791584968566895, "learning_rate": 3.961770477505633e-05, "loss": 0.2256, "step": 10910 }, { "epoch": 0.39683116505559995, "grad_norm": 0.9377845525741577, "learning_rate": 3.9654044625336145e-05, "loss": 0.9822, "step": 10920 }, { "epoch": 0.39719456355839816, "grad_norm": 3.9755465984344482, "learning_rate": 3.969038447561596e-05, "loss": 0.2257, "step": 10930 }, { "epoch": 0.3975579620611963, "grad_norm": 1.559699535369873, "learning_rate": 3.9726724325895783e-05, "loss": 0.2116, "step": 10940 }, { "epoch": 0.39792136056399446, "grad_norm": 7.545668601989746, "learning_rate": 3.9763064176175596e-05, "loss": 0.2515, "step": 10950 }, { "epoch": 0.39828475906679267, "grad_norm": 1.980197548866272, "learning_rate": 3.979940402645541e-05, "loss": 0.1721, "step": 10960 }, { "epoch": 0.3986481575695908, "grad_norm": 2.5450973510742188, "learning_rate": 3.9835743876735235e-05, "loss": 1.7152, "step": 10970 }, { "epoch": 0.399011556072389, "grad_norm": 3.518233060836792, "learning_rate": 3.987208372701505e-05, "loss": 0.2521, "step": 10980 }, { "epoch": 0.3993749545751871, "grad_norm": 2.678774356842041, "learning_rate": 3.990842357729486e-05, "loss": 0.2025, "step": 10990 }, { "epoch": 0.39973835307798533, "grad_norm": 11.46552848815918, "learning_rate": 3.9944763427574686e-05, "loss": 0.2683, "step": 11000 }, { "epoch": 0.4001017515807835, "grad_norm": 2.3148844242095947, "learning_rate": 3.99811032778545e-05, "loss": 1.5331, "step": 11010 }, { "epoch": 0.40046515008358163, "grad_norm": 1.2145686149597168, "learning_rate": 4.001744312813431e-05, "loss": 0.1931, "step": 11020 }, { "epoch": 0.40082854858637984, "grad_norm": 3.581883192062378, "learning_rate": 4.005378297841413e-05, "loss": 0.18, "step": 11030 }, { "epoch": 0.401191947089178, "grad_norm": 2.4645683765411377, "learning_rate": 4.009012282869395e-05, "loss": 0.2303, "step": 11040 }, { "epoch": 0.40155534559197614, "grad_norm": 13.845566749572754, "learning_rate": 4.012646267897376e-05, "loss": 0.2515, "step": 11050 }, { "epoch": 0.40191874409477435, "grad_norm": 1.6929864883422852, "learning_rate": 4.016280252925358e-05, "loss": 2.9232, "step": 11060 }, { "epoch": 0.4022821425975725, "grad_norm": 1.5453213453292847, "learning_rate": 4.01991423795334e-05, "loss": 0.1703, "step": 11070 }, { "epoch": 0.40264554110037065, "grad_norm": 1.5723987817764282, "learning_rate": 4.0235482229813214e-05, "loss": 0.1694, "step": 11080 }, { "epoch": 0.40300893960316886, "grad_norm": 1.4501444101333618, "learning_rate": 4.027182208009303e-05, "loss": 0.2477, "step": 11090 }, { "epoch": 0.403372338105967, "grad_norm": 20.50950813293457, "learning_rate": 4.030816193037285e-05, "loss": 0.2641, "step": 11100 }, { "epoch": 0.40373573660876516, "grad_norm": 1.9846757650375366, "learning_rate": 4.0344501780652665e-05, "loss": 0.1807, "step": 11110 }, { "epoch": 0.40409913511156337, "grad_norm": 1.3933240175247192, "learning_rate": 4.0380841630932484e-05, "loss": 0.1683, "step": 11120 }, { "epoch": 0.4044625336143615, "grad_norm": 2.370534658432007, "learning_rate": 4.0417181481212304e-05, "loss": 0.2476, "step": 11130 }, { "epoch": 0.40482593211715967, "grad_norm": 2.6382100582122803, "learning_rate": 4.0453521331492116e-05, "loss": 0.1723, "step": 11140 }, { "epoch": 0.4051893306199578, "grad_norm": 27.381826400756836, "learning_rate": 4.048986118177193e-05, "loss": 0.2058, "step": 11150 }, { "epoch": 0.40555272912275603, "grad_norm": 1.3622616529464722, "learning_rate": 4.052620103205175e-05, "loss": 0.1744, "step": 11160 }, { "epoch": 0.4059161276255542, "grad_norm": 1.4734828472137451, "learning_rate": 4.056254088233157e-05, "loss": 0.1685, "step": 11170 }, { "epoch": 0.40627952612835233, "grad_norm": 5.694312572479248, "learning_rate": 4.059888073261138e-05, "loss": 0.3549, "step": 11180 }, { "epoch": 0.40664292463115054, "grad_norm": 1.9976438283920288, "learning_rate": 4.06352205828912e-05, "loss": 0.1525, "step": 11190 }, { "epoch": 0.4070063231339487, "grad_norm": 5.735686779022217, "learning_rate": 4.067156043317102e-05, "loss": 0.2642, "step": 11200 }, { "epoch": 0.40736972163674684, "grad_norm": 5.192315101623535, "learning_rate": 4.070790028345083e-05, "loss": 0.1636, "step": 11210 }, { "epoch": 0.40773312013954505, "grad_norm": 2.6324477195739746, "learning_rate": 4.074424013373065e-05, "loss": 0.3451, "step": 11220 }, { "epoch": 0.4080965186423432, "grad_norm": 2.496997356414795, "learning_rate": 4.078057998401047e-05, "loss": 0.1792, "step": 11230 }, { "epoch": 0.40845991714514135, "grad_norm": 3.928255558013916, "learning_rate": 4.081691983429028e-05, "loss": 0.2203, "step": 11240 }, { "epoch": 0.40882331564793956, "grad_norm": 14.433273315429688, "learning_rate": 4.08532596845701e-05, "loss": 0.3283, "step": 11250 }, { "epoch": 0.4091867141507377, "grad_norm": 1.9282217025756836, "learning_rate": 4.088959953484992e-05, "loss": 0.2191, "step": 11260 }, { "epoch": 0.40955011265353586, "grad_norm": 1.8360569477081299, "learning_rate": 4.0925939385129734e-05, "loss": 0.1623, "step": 11270 }, { "epoch": 0.409913511156334, "grad_norm": 4.518060207366943, "learning_rate": 4.096227923540955e-05, "loss": 0.2036, "step": 11280 }, { "epoch": 0.4102769096591322, "grad_norm": 1.4292632341384888, "learning_rate": 4.099861908568937e-05, "loss": 0.1515, "step": 11290 }, { "epoch": 0.41064030816193037, "grad_norm": 23.795089721679688, "learning_rate": 4.1034958935969185e-05, "loss": 0.3228, "step": 11300 }, { "epoch": 0.4110037066647285, "grad_norm": 1.7721456289291382, "learning_rate": 4.1071298786249005e-05, "loss": 0.15, "step": 11310 }, { "epoch": 0.4113671051675267, "grad_norm": 3.544579029083252, "learning_rate": 4.110763863652882e-05, "loss": 0.2349, "step": 11320 }, { "epoch": 0.4117305036703249, "grad_norm": 4.25554895401001, "learning_rate": 4.1143978486808636e-05, "loss": 0.5458, "step": 11330 }, { "epoch": 0.41209390217312303, "grad_norm": 3.069894313812256, "learning_rate": 4.1180318337088456e-05, "loss": 0.2131, "step": 11340 }, { "epoch": 0.41245730067592123, "grad_norm": 5.389547348022461, "learning_rate": 4.121665818736827e-05, "loss": 0.2895, "step": 11350 }, { "epoch": 0.4128206991787194, "grad_norm": 2.308717727661133, "learning_rate": 4.125299803764809e-05, "loss": 0.209, "step": 11360 }, { "epoch": 0.41318409768151754, "grad_norm": 2.071504831314087, "learning_rate": 4.12893378879279e-05, "loss": 0.225, "step": 11370 }, { "epoch": 0.41354749618431574, "grad_norm": 10.397724151611328, "learning_rate": 4.132567773820772e-05, "loss": 0.5041, "step": 11380 }, { "epoch": 0.4139108946871139, "grad_norm": 3.3916842937469482, "learning_rate": 4.136201758848754e-05, "loss": 0.2055, "step": 11390 }, { "epoch": 0.41427429318991205, "grad_norm": 27.703519821166992, "learning_rate": 4.139835743876735e-05, "loss": 0.3002, "step": 11400 }, { "epoch": 0.41427429318991205, "eval_loss": 0.40216270089149475, "eval_runtime": 180.385, "eval_samples_per_second": 41.101, "eval_steps_per_second": 5.139, "eval_wer": 0.22132263510447112, "step": 11400 }, { "epoch": 0.41463769169271025, "grad_norm": 3.024658203125, "learning_rate": 4.143469728904717e-05, "loss": 0.168, "step": 11410 }, { "epoch": 0.4150010901955084, "grad_norm": 2.899369478225708, "learning_rate": 4.147103713932699e-05, "loss": 3.0252, "step": 11420 }, { "epoch": 0.41536448869830656, "grad_norm": 3.960700511932373, "learning_rate": 4.15073769896068e-05, "loss": 0.1972, "step": 11430 }, { "epoch": 0.4157278872011047, "grad_norm": 1.542468786239624, "learning_rate": 4.154371683988662e-05, "loss": 0.1971, "step": 11440 }, { "epoch": 0.4160912857039029, "grad_norm": 16.871423721313477, "learning_rate": 4.158005669016644e-05, "loss": 0.2768, "step": 11450 }, { "epoch": 0.41645468420670106, "grad_norm": 3.142385721206665, "learning_rate": 4.1616396540446254e-05, "loss": 0.2173, "step": 11460 }, { "epoch": 0.4168180827094992, "grad_norm": 0.9852932095527649, "learning_rate": 4.1652736390726074e-05, "loss": 0.2529, "step": 11470 }, { "epoch": 0.4171814812122974, "grad_norm": 2.4834413528442383, "learning_rate": 4.1689076241005886e-05, "loss": 0.9175, "step": 11480 }, { "epoch": 0.4175448797150956, "grad_norm": 2.7286272048950195, "learning_rate": 4.1725416091285705e-05, "loss": 0.1864, "step": 11490 }, { "epoch": 0.4179082782178937, "grad_norm": 5.711360454559326, "learning_rate": 4.1761755941565525e-05, "loss": 0.2285, "step": 11500 }, { "epoch": 0.41827167672069193, "grad_norm": 1.160866379737854, "learning_rate": 4.179809579184534e-05, "loss": 0.1959, "step": 11510 }, { "epoch": 0.4186350752234901, "grad_norm": 2.5051305294036865, "learning_rate": 4.183443564212516e-05, "loss": 0.185, "step": 11520 }, { "epoch": 0.41899847372628823, "grad_norm": 3.641874313354492, "learning_rate": 4.1870775492404976e-05, "loss": 0.199, "step": 11530 }, { "epoch": 0.41936187222908644, "grad_norm": 1.676038146018982, "learning_rate": 4.190711534268479e-05, "loss": 0.1895, "step": 11540 }, { "epoch": 0.4197252707318846, "grad_norm": 11.47658634185791, "learning_rate": 4.194345519296461e-05, "loss": 0.246, "step": 11550 }, { "epoch": 0.42008866923468274, "grad_norm": 1.7632570266723633, "learning_rate": 4.197979504324443e-05, "loss": 0.1761, "step": 11560 }, { "epoch": 0.4204520677374809, "grad_norm": 2.2994728088378906, "learning_rate": 4.201613489352424e-05, "loss": 0.1799, "step": 11570 }, { "epoch": 0.4208154662402791, "grad_norm": 3.964228391647339, "learning_rate": 4.205247474380406e-05, "loss": 0.7376, "step": 11580 }, { "epoch": 0.42117886474307725, "grad_norm": 1.866466760635376, "learning_rate": 4.208881459408387e-05, "loss": 0.2293, "step": 11590 }, { "epoch": 0.4215422632458754, "grad_norm": 4.722428798675537, "learning_rate": 4.212515444436369e-05, "loss": 0.2303, "step": 11600 }, { "epoch": 0.4219056617486736, "grad_norm": 2.8812968730926514, "learning_rate": 4.2161494294643504e-05, "loss": 0.1628, "step": 11610 }, { "epoch": 0.42226906025147176, "grad_norm": 8.05451488494873, "learning_rate": 4.219783414492332e-05, "loss": 0.1978, "step": 11620 }, { "epoch": 0.4226324587542699, "grad_norm": 3.4176700115203857, "learning_rate": 4.223417399520314e-05, "loss": 0.1986, "step": 11630 }, { "epoch": 0.4229958572570681, "grad_norm": 5.204764366149902, "learning_rate": 4.2270513845482955e-05, "loss": 0.1959, "step": 11640 }, { "epoch": 0.42335925575986627, "grad_norm": 6.184700965881348, "learning_rate": 4.2306853695762774e-05, "loss": 0.2822, "step": 11650 }, { "epoch": 0.4237226542626644, "grad_norm": 2.288935422897339, "learning_rate": 4.2343193546042594e-05, "loss": 0.2073, "step": 11660 }, { "epoch": 0.42408605276546263, "grad_norm": 3.8856844902038574, "learning_rate": 4.2379533396322406e-05, "loss": 0.2134, "step": 11670 }, { "epoch": 0.4244494512682608, "grad_norm": 4.048069953918457, "learning_rate": 4.2415873246602226e-05, "loss": 0.1922, "step": 11680 }, { "epoch": 0.42481284977105893, "grad_norm": 1.466927409172058, "learning_rate": 4.2452213096882045e-05, "loss": 0.1653, "step": 11690 }, { "epoch": 0.42517624827385714, "grad_norm": 35.94015121459961, "learning_rate": 4.248855294716186e-05, "loss": 0.2398, "step": 11700 }, { "epoch": 0.4255396467766553, "grad_norm": 2.575195789337158, "learning_rate": 4.252489279744168e-05, "loss": 0.2241, "step": 11710 }, { "epoch": 0.42590304527945344, "grad_norm": 1.4232568740844727, "learning_rate": 4.2561232647721496e-05, "loss": 0.1817, "step": 11720 }, { "epoch": 0.4262664437822516, "grad_norm": 2.8543412685394287, "learning_rate": 4.259757249800131e-05, "loss": 0.2094, "step": 11730 }, { "epoch": 0.4266298422850498, "grad_norm": 0.85033118724823, "learning_rate": 4.263391234828113e-05, "loss": 0.1578, "step": 11740 }, { "epoch": 0.42699324078784795, "grad_norm": 7.382369041442871, "learning_rate": 4.267025219856095e-05, "loss": 0.2763, "step": 11750 }, { "epoch": 0.4273566392906461, "grad_norm": 1.3994635343551636, "learning_rate": 4.270659204884076e-05, "loss": 0.199, "step": 11760 }, { "epoch": 0.4277200377934443, "grad_norm": 1.4978888034820557, "learning_rate": 4.274293189912057e-05, "loss": 0.1804, "step": 11770 }, { "epoch": 0.42808343629624246, "grad_norm": 5.206210136413574, "learning_rate": 4.27792717494004e-05, "loss": 0.2483, "step": 11780 }, { "epoch": 0.4284468347990406, "grad_norm": 1.4130820035934448, "learning_rate": 4.281561159968021e-05, "loss": 0.1792, "step": 11790 }, { "epoch": 0.4288102333018388, "grad_norm": 2.60227370262146, "learning_rate": 4.2851951449960024e-05, "loss": 0.214, "step": 11800 }, { "epoch": 0.42917363180463697, "grad_norm": 1.8874465227127075, "learning_rate": 4.288829130023985e-05, "loss": 0.1894, "step": 11810 }, { "epoch": 0.4295370303074351, "grad_norm": 2.921766519546509, "learning_rate": 4.292463115051966e-05, "loss": 0.1608, "step": 11820 }, { "epoch": 0.4299004288102333, "grad_norm": 2.812821626663208, "learning_rate": 4.2960971000799475e-05, "loss": 0.2381, "step": 11830 }, { "epoch": 0.4302638273130315, "grad_norm": 1.8063637018203735, "learning_rate": 4.2997310851079295e-05, "loss": 0.198, "step": 11840 }, { "epoch": 0.43062722581582963, "grad_norm": 16.433927536010742, "learning_rate": 4.3033650701359114e-05, "loss": 0.3015, "step": 11850 }, { "epoch": 0.4309906243186278, "grad_norm": 1.295142650604248, "learning_rate": 4.3069990551638927e-05, "loss": 0.1678, "step": 11860 }, { "epoch": 0.431354022821426, "grad_norm": 135.4871063232422, "learning_rate": 4.3106330401918746e-05, "loss": 1.8542, "step": 11870 }, { "epoch": 0.43171742132422414, "grad_norm": 2.3314764499664307, "learning_rate": 4.3142670252198565e-05, "loss": 0.1983, "step": 11880 }, { "epoch": 0.4320808198270223, "grad_norm": 1.6635117530822754, "learning_rate": 4.317901010247838e-05, "loss": 0.1737, "step": 11890 }, { "epoch": 0.4324442183298205, "grad_norm": 32.102664947509766, "learning_rate": 4.32153499527582e-05, "loss": 0.3092, "step": 11900 }, { "epoch": 0.43280761683261865, "grad_norm": 2.3491451740264893, "learning_rate": 4.3251689803038017e-05, "loss": 0.1849, "step": 11910 }, { "epoch": 0.4331710153354168, "grad_norm": 3.8088629245758057, "learning_rate": 4.328802965331783e-05, "loss": 0.2023, "step": 11920 }, { "epoch": 0.433534413838215, "grad_norm": 2.7132246494293213, "learning_rate": 4.332436950359764e-05, "loss": 0.1935, "step": 11930 }, { "epoch": 0.43389781234101316, "grad_norm": 1.2917368412017822, "learning_rate": 4.336070935387747e-05, "loss": 0.1918, "step": 11940 }, { "epoch": 0.4342612108438113, "grad_norm": 9.690601348876953, "learning_rate": 4.339704920415728e-05, "loss": 0.3059, "step": 11950 }, { "epoch": 0.4346246093466095, "grad_norm": 1.2652380466461182, "learning_rate": 4.343338905443709e-05, "loss": 0.1587, "step": 11960 }, { "epoch": 0.43498800784940767, "grad_norm": 0.9622058272361755, "learning_rate": 4.346972890471692e-05, "loss": 0.1755, "step": 11970 }, { "epoch": 0.4353514063522058, "grad_norm": 5.316989898681641, "learning_rate": 4.350606875499673e-05, "loss": 0.1794, "step": 11980 }, { "epoch": 0.435714804855004, "grad_norm": 3.428891181945801, "learning_rate": 4.3542408605276544e-05, "loss": 0.2105, "step": 11990 }, { "epoch": 0.4360782033578022, "grad_norm": 12.879768371582031, "learning_rate": 4.357874845555637e-05, "loss": 0.2904, "step": 12000 }, { "epoch": 0.4360782033578022, "eval_loss": 0.3918191194534302, "eval_runtime": 180.0676, "eval_samples_per_second": 41.173, "eval_steps_per_second": 5.148, "eval_wer": 0.22659611160527893, "step": 12000 }, { "epoch": 0.4364416018606003, "grad_norm": 2.0471973419189453, "learning_rate": 4.361508830583618e-05, "loss": 0.2544, "step": 12010 }, { "epoch": 0.4368050003633985, "grad_norm": 1.3883107900619507, "learning_rate": 4.3651428156115995e-05, "loss": 0.1957, "step": 12020 }, { "epoch": 0.4371683988661967, "grad_norm": 1.786475419998169, "learning_rate": 4.368776800639582e-05, "loss": 0.1732, "step": 12030 }, { "epoch": 0.43753179736899483, "grad_norm": 3.3099594116210938, "learning_rate": 4.3724107856675634e-05, "loss": 0.1871, "step": 12040 }, { "epoch": 0.437895195871793, "grad_norm": 9.09699535369873, "learning_rate": 4.376044770695545e-05, "loss": 0.2745, "step": 12050 }, { "epoch": 0.4382585943745912, "grad_norm": 2.0993807315826416, "learning_rate": 4.3796787557235266e-05, "loss": 0.2076, "step": 12060 }, { "epoch": 0.43862199287738934, "grad_norm": 27.799428939819336, "learning_rate": 4.3833127407515085e-05, "loss": 0.548, "step": 12070 }, { "epoch": 0.4389853913801875, "grad_norm": 3.8897557258605957, "learning_rate": 4.38694672577949e-05, "loss": 0.1799, "step": 12080 }, { "epoch": 0.4393487898829857, "grad_norm": 3.4620189666748047, "learning_rate": 4.390580710807472e-05, "loss": 0.1735, "step": 12090 }, { "epoch": 0.43971218838578385, "grad_norm": 9.587783813476562, "learning_rate": 4.394214695835454e-05, "loss": 0.3344, "step": 12100 }, { "epoch": 0.440075586888582, "grad_norm": 1.2581641674041748, "learning_rate": 4.397848680863435e-05, "loss": 0.1863, "step": 12110 }, { "epoch": 0.4404389853913802, "grad_norm": 1.3624401092529297, "learning_rate": 4.401482665891417e-05, "loss": 3.7692, "step": 12120 }, { "epoch": 0.44080238389417836, "grad_norm": 2.0099213123321533, "learning_rate": 4.405116650919399e-05, "loss": 0.1999, "step": 12130 }, { "epoch": 0.4411657823969765, "grad_norm": 2.7499871253967285, "learning_rate": 4.40875063594738e-05, "loss": 0.1854, "step": 12140 }, { "epoch": 0.4415291808997747, "grad_norm": 6.473042964935303, "learning_rate": 4.412384620975361e-05, "loss": 0.2843, "step": 12150 }, { "epoch": 0.44189257940257287, "grad_norm": 3.845900535583496, "learning_rate": 4.416018606003344e-05, "loss": 0.1747, "step": 12160 }, { "epoch": 0.442255977905371, "grad_norm": 1.4052759408950806, "learning_rate": 4.419652591031325e-05, "loss": 0.16, "step": 12170 }, { "epoch": 0.4426193764081692, "grad_norm": 3.5824673175811768, "learning_rate": 4.4232865760593064e-05, "loss": 0.7205, "step": 12180 }, { "epoch": 0.4429827749109674, "grad_norm": 1.237358570098877, "learning_rate": 4.426920561087289e-05, "loss": 0.2043, "step": 12190 }, { "epoch": 0.44334617341376553, "grad_norm": 11.106649398803711, "learning_rate": 4.43055454611527e-05, "loss": 0.2537, "step": 12200 }, { "epoch": 0.4437095719165637, "grad_norm": 1.4566165208816528, "learning_rate": 4.4341885311432516e-05, "loss": 0.174, "step": 12210 }, { "epoch": 0.4440729704193619, "grad_norm": 1.4067914485931396, "learning_rate": 4.4378225161712335e-05, "loss": 0.1672, "step": 12220 }, { "epoch": 0.44443636892216004, "grad_norm": 3.1289005279541016, "learning_rate": 4.4414565011992154e-05, "loss": 0.2459, "step": 12230 }, { "epoch": 0.4447997674249582, "grad_norm": 1.2487775087356567, "learning_rate": 4.445090486227197e-05, "loss": 0.1911, "step": 12240 }, { "epoch": 0.4451631659277564, "grad_norm": 4.373108863830566, "learning_rate": 4.4487244712551786e-05, "loss": 0.265, "step": 12250 }, { "epoch": 0.44552656443055455, "grad_norm": 3.0927655696868896, "learning_rate": 4.4523584562831606e-05, "loss": 0.166, "step": 12260 }, { "epoch": 0.4458899629333527, "grad_norm": 1.4012075662612915, "learning_rate": 4.455992441311142e-05, "loss": 0.1631, "step": 12270 }, { "epoch": 0.4462533614361509, "grad_norm": 3.9944920539855957, "learning_rate": 4.459626426339124e-05, "loss": 0.2616, "step": 12280 }, { "epoch": 0.44661675993894906, "grad_norm": 2.412261962890625, "learning_rate": 4.463260411367106e-05, "loss": 0.1963, "step": 12290 }, { "epoch": 0.4469801584417472, "grad_norm": 8.601739883422852, "learning_rate": 4.466894396395087e-05, "loss": 0.3057, "step": 12300 }, { "epoch": 0.44734355694454536, "grad_norm": 2.1279587745666504, "learning_rate": 4.470528381423069e-05, "loss": 0.1931, "step": 12310 }, { "epoch": 0.44770695544734357, "grad_norm": 2.465534210205078, "learning_rate": 4.474162366451051e-05, "loss": 0.1701, "step": 12320 }, { "epoch": 0.4480703539501417, "grad_norm": 6.147269248962402, "learning_rate": 4.477796351479032e-05, "loss": 0.7176, "step": 12330 }, { "epoch": 0.44843375245293987, "grad_norm": 1.6242046356201172, "learning_rate": 4.481430336507014e-05, "loss": 0.1769, "step": 12340 }, { "epoch": 0.4487971509557381, "grad_norm": 7.065566539764404, "learning_rate": 4.485064321534995e-05, "loss": 0.2967, "step": 12350 }, { "epoch": 0.44916054945853623, "grad_norm": 1.9389359951019287, "learning_rate": 4.488698306562977e-05, "loss": 0.1853, "step": 12360 }, { "epoch": 0.4495239479613344, "grad_norm": 1.011250376701355, "learning_rate": 4.492332291590959e-05, "loss": 0.2036, "step": 12370 }, { "epoch": 0.4498873464641326, "grad_norm": 2.459062099456787, "learning_rate": 4.4959662766189404e-05, "loss": 0.1865, "step": 12380 }, { "epoch": 0.45025074496693074, "grad_norm": 1.8472875356674194, "learning_rate": 4.499600261646922e-05, "loss": 0.2178, "step": 12390 }, { "epoch": 0.4506141434697289, "grad_norm": 40.6389045715332, "learning_rate": 4.5032342466749036e-05, "loss": 0.2506, "step": 12400 }, { "epoch": 0.4509775419725271, "grad_norm": 3.9729344844818115, "learning_rate": 4.5068682317028855e-05, "loss": 0.1917, "step": 12410 }, { "epoch": 0.45134094047532525, "grad_norm": 1.0262936353683472, "learning_rate": 4.5105022167308675e-05, "loss": 0.2115, "step": 12420 }, { "epoch": 0.4517043389781234, "grad_norm": 1.5356003046035767, "learning_rate": 4.514136201758849e-05, "loss": 0.1907, "step": 12430 }, { "epoch": 0.4520677374809216, "grad_norm": 1.3107296228408813, "learning_rate": 4.5177701867868307e-05, "loss": 0.195, "step": 12440 }, { "epoch": 0.45243113598371976, "grad_norm": 11.025674819946289, "learning_rate": 4.5214041718148126e-05, "loss": 0.2794, "step": 12450 }, { "epoch": 0.4527945344865179, "grad_norm": 1.8793771266937256, "learning_rate": 4.525038156842794e-05, "loss": 0.2143, "step": 12460 }, { "epoch": 0.45315793298931606, "grad_norm": 1.6508142948150635, "learning_rate": 4.528672141870776e-05, "loss": 0.1863, "step": 12470 }, { "epoch": 0.45352133149211427, "grad_norm": 4.942420959472656, "learning_rate": 4.532306126898758e-05, "loss": 0.1997, "step": 12480 }, { "epoch": 0.4538847299949124, "grad_norm": 3.1977925300598145, "learning_rate": 4.535940111926739e-05, "loss": 1.9163, "step": 12490 }, { "epoch": 0.45424812849771057, "grad_norm": 8.74572467803955, "learning_rate": 4.539574096954721e-05, "loss": 0.3186, "step": 12500 }, { "epoch": 0.4546115270005088, "grad_norm": 1.5346311330795288, "learning_rate": 4.543208081982702e-05, "loss": 0.1958, "step": 12510 }, { "epoch": 0.4549749255033069, "grad_norm": 1.622859239578247, "learning_rate": 4.546842067010684e-05, "loss": 0.1828, "step": 12520 }, { "epoch": 0.4553383240061051, "grad_norm": 1.9394720792770386, "learning_rate": 4.550476052038666e-05, "loss": 0.2198, "step": 12530 }, { "epoch": 0.4557017225089033, "grad_norm": 1.8405578136444092, "learning_rate": 4.554110037066647e-05, "loss": 0.1789, "step": 12540 }, { "epoch": 0.45606512101170144, "grad_norm": 6.24867582321167, "learning_rate": 4.557744022094629e-05, "loss": 0.2593, "step": 12550 }, { "epoch": 0.4564285195144996, "grad_norm": 1.6062959432601929, "learning_rate": 4.561378007122611e-05, "loss": 0.1665, "step": 12560 }, { "epoch": 0.4567919180172978, "grad_norm": 1.1478540897369385, "learning_rate": 4.5650119921505924e-05, "loss": 0.1942, "step": 12570 }, { "epoch": 0.45715531652009594, "grad_norm": 2.0299808979034424, "learning_rate": 4.5686459771785744e-05, "loss": 0.2092, "step": 12580 }, { "epoch": 0.4575187150228941, "grad_norm": 1.6643180847167969, "learning_rate": 4.572279962206556e-05, "loss": 0.1714, "step": 12590 }, { "epoch": 0.45788211352569225, "grad_norm": 10.169012069702148, "learning_rate": 4.5759139472345376e-05, "loss": 0.3101, "step": 12600 }, { "epoch": 0.45788211352569225, "eval_loss": 0.408176064491272, "eval_runtime": 179.7843, "eval_samples_per_second": 41.238, "eval_steps_per_second": 5.156, "eval_wer": 0.24004756113057527, "step": 12600 }, { "epoch": 0.45824551202849045, "grad_norm": 1.8151092529296875, "learning_rate": 4.5795479322625195e-05, "loss": 0.1739, "step": 12610 }, { "epoch": 0.4586089105312886, "grad_norm": 1.1606543064117432, "learning_rate": 4.583181917290501e-05, "loss": 0.1781, "step": 12620 }, { "epoch": 0.45897230903408676, "grad_norm": 2.5139431953430176, "learning_rate": 4.586815902318483e-05, "loss": 0.2101, "step": 12630 }, { "epoch": 0.45933570753688496, "grad_norm": 3.1557183265686035, "learning_rate": 4.590449887346464e-05, "loss": 0.1925, "step": 12640 }, { "epoch": 0.4596991060396831, "grad_norm": 13.978137016296387, "learning_rate": 4.594083872374446e-05, "loss": 0.3085, "step": 12650 }, { "epoch": 0.46006250454248127, "grad_norm": 1.5187938213348389, "learning_rate": 4.597717857402428e-05, "loss": 0.1909, "step": 12660 }, { "epoch": 0.46042590304527947, "grad_norm": 1.661890983581543, "learning_rate": 4.601351842430409e-05, "loss": 0.1729, "step": 12670 }, { "epoch": 0.4607893015480776, "grad_norm": 5.693175792694092, "learning_rate": 4.604985827458391e-05, "loss": 0.2069, "step": 12680 }, { "epoch": 0.4611527000508758, "grad_norm": 2.5228755474090576, "learning_rate": 4.608619812486373e-05, "loss": 0.1899, "step": 12690 }, { "epoch": 0.461516098553674, "grad_norm": 12.629317283630371, "learning_rate": 4.612253797514354e-05, "loss": 0.2441, "step": 12700 }, { "epoch": 0.46187949705647213, "grad_norm": 1.5003726482391357, "learning_rate": 4.615887782542336e-05, "loss": 0.1845, "step": 12710 }, { "epoch": 0.4622428955592703, "grad_norm": 1.596705675125122, "learning_rate": 4.619521767570318e-05, "loss": 0.1942, "step": 12720 }, { "epoch": 0.4626062940620685, "grad_norm": 4.299325466156006, "learning_rate": 4.623155752598299e-05, "loss": 0.1881, "step": 12730 }, { "epoch": 0.46296969256486664, "grad_norm": 2.242932081222534, "learning_rate": 4.626789737626281e-05, "loss": 0.1655, "step": 12740 }, { "epoch": 0.4633330910676648, "grad_norm": 17.353313446044922, "learning_rate": 4.630423722654263e-05, "loss": 0.3002, "step": 12750 }, { "epoch": 0.46369648957046294, "grad_norm": 1.8967528343200684, "learning_rate": 4.6340577076822444e-05, "loss": 0.1967, "step": 12760 }, { "epoch": 0.46405988807326115, "grad_norm": 1.9839125871658325, "learning_rate": 4.6376916927102264e-05, "loss": 0.1582, "step": 12770 }, { "epoch": 0.4644232865760593, "grad_norm": 1.8139293193817139, "learning_rate": 4.641325677738208e-05, "loss": 0.2527, "step": 12780 }, { "epoch": 0.46478668507885745, "grad_norm": 1.6944659948349, "learning_rate": 4.6449596627661896e-05, "loss": 0.1656, "step": 12790 }, { "epoch": 0.46515008358165566, "grad_norm": 3.7842020988464355, "learning_rate": 4.648593647794171e-05, "loss": 0.2375, "step": 12800 }, { "epoch": 0.4655134820844538, "grad_norm": 1.8103773593902588, "learning_rate": 4.6522276328221534e-05, "loss": 0.1932, "step": 12810 }, { "epoch": 0.46587688058725196, "grad_norm": 1.4419440031051636, "learning_rate": 4.655861617850135e-05, "loss": 0.1808, "step": 12820 }, { "epoch": 0.46624027909005017, "grad_norm": 6.361825466156006, "learning_rate": 4.659495602878116e-05, "loss": 0.2105, "step": 12830 }, { "epoch": 0.4666036775928483, "grad_norm": 1.4687098264694214, "learning_rate": 4.663129587906098e-05, "loss": 0.18, "step": 12840 }, { "epoch": 0.46696707609564647, "grad_norm": 14.758776664733887, "learning_rate": 4.66676357293408e-05, "loss": 0.3001, "step": 12850 }, { "epoch": 0.4673304745984447, "grad_norm": 1.4836699962615967, "learning_rate": 4.670397557962061e-05, "loss": 0.1713, "step": 12860 }, { "epoch": 0.46769387310124283, "grad_norm": 4.860133171081543, "learning_rate": 4.674031542990043e-05, "loss": 0.1791, "step": 12870 }, { "epoch": 0.468057271604041, "grad_norm": 1.9861228466033936, "learning_rate": 4.677665528018025e-05, "loss": 0.2029, "step": 12880 }, { "epoch": 0.46842067010683913, "grad_norm": 1.9190025329589844, "learning_rate": 4.681299513046006e-05, "loss": 0.1611, "step": 12890 }, { "epoch": 0.46878406860963734, "grad_norm": 4.6381516456604, "learning_rate": 4.684933498073988e-05, "loss": 0.2646, "step": 12900 }, { "epoch": 0.4691474671124355, "grad_norm": 1.2092620134353638, "learning_rate": 4.68856748310197e-05, "loss": 0.1865, "step": 12910 }, { "epoch": 0.46951086561523364, "grad_norm": 2.7816121578216553, "learning_rate": 4.6922014681299513e-05, "loss": 0.2047, "step": 12920 }, { "epoch": 0.46987426411803185, "grad_norm": 0.629324734210968, "learning_rate": 4.6958354531579326e-05, "loss": 0.2404, "step": 12930 }, { "epoch": 0.47023766262083, "grad_norm": 4.156667232513428, "learning_rate": 4.699469438185915e-05, "loss": 0.1604, "step": 12940 }, { "epoch": 0.47060106112362815, "grad_norm": 1.8534492254257202, "learning_rate": 4.7031034232138965e-05, "loss": 0.2364, "step": 12950 }, { "epoch": 0.47096445962642636, "grad_norm": 1.382408857345581, "learning_rate": 4.706737408241878e-05, "loss": 0.2078, "step": 12960 }, { "epoch": 0.4713278581292245, "grad_norm": 2.499023914337158, "learning_rate": 4.7103713932698603e-05, "loss": 0.1935, "step": 12970 }, { "epoch": 0.47169125663202266, "grad_norm": 2.726032257080078, "learning_rate": 4.7140053782978416e-05, "loss": 0.2143, "step": 12980 }, { "epoch": 0.47205465513482087, "grad_norm": 2.1388118267059326, "learning_rate": 4.717639363325823e-05, "loss": 0.1704, "step": 12990 }, { "epoch": 0.472418053637619, "grad_norm": 5.408501148223877, "learning_rate": 4.7212733483538055e-05, "loss": 0.2492, "step": 13000 }, { "epoch": 0.47278145214041717, "grad_norm": 1.8640841245651245, "learning_rate": 4.724907333381787e-05, "loss": 0.1958, "step": 13010 }, { "epoch": 0.4731448506432154, "grad_norm": 1.4251651763916016, "learning_rate": 4.728541318409768e-05, "loss": 0.1969, "step": 13020 }, { "epoch": 0.4735082491460135, "grad_norm": 2.2603137493133545, "learning_rate": 4.7321753034377506e-05, "loss": 0.1879, "step": 13030 }, { "epoch": 0.4738716476488117, "grad_norm": 1.7813081741333008, "learning_rate": 4.735809288465732e-05, "loss": 0.1627, "step": 13040 }, { "epoch": 0.47423504615160983, "grad_norm": 16.746126174926758, "learning_rate": 4.739443273493713e-05, "loss": 0.3058, "step": 13050 }, { "epoch": 0.47459844465440804, "grad_norm": 2.56193470954895, "learning_rate": 4.743077258521695e-05, "loss": 0.1729, "step": 13060 }, { "epoch": 0.4749618431572062, "grad_norm": 2.1787185668945312, "learning_rate": 4.746711243549677e-05, "loss": 0.1804, "step": 13070 }, { "epoch": 0.47532524166000434, "grad_norm": 3.385338544845581, "learning_rate": 4.750345228577658e-05, "loss": 0.1884, "step": 13080 }, { "epoch": 0.47568864016280255, "grad_norm": 2.48083233833313, "learning_rate": 4.75397921360564e-05, "loss": 0.1728, "step": 13090 }, { "epoch": 0.4760520386656007, "grad_norm": 47.18072509765625, "learning_rate": 4.757613198633622e-05, "loss": 0.2427, "step": 13100 }, { "epoch": 0.47641543716839885, "grad_norm": 1.3267533779144287, "learning_rate": 4.7612471836616034e-05, "loss": 0.1847, "step": 13110 }, { "epoch": 0.47677883567119705, "grad_norm": 2.098389148712158, "learning_rate": 4.764881168689585e-05, "loss": 0.1682, "step": 13120 }, { "epoch": 0.4771422341739952, "grad_norm": 1.1197071075439453, "learning_rate": 4.768515153717567e-05, "loss": 0.166, "step": 13130 }, { "epoch": 0.47750563267679336, "grad_norm": 1.431281328201294, "learning_rate": 4.7721491387455485e-05, "loss": 0.3262, "step": 13140 }, { "epoch": 0.47786903117959156, "grad_norm": 15.357772827148438, "learning_rate": 4.7757831237735304e-05, "loss": 0.2906, "step": 13150 }, { "epoch": 0.4782324296823897, "grad_norm": 3.03275465965271, "learning_rate": 4.7794171088015124e-05, "loss": 0.207, "step": 13160 }, { "epoch": 0.47859582818518787, "grad_norm": 1.0988962650299072, "learning_rate": 4.7830510938294936e-05, "loss": 0.1788, "step": 13170 }, { "epoch": 0.4789592266879861, "grad_norm": 1.9456548690795898, "learning_rate": 4.786685078857475e-05, "loss": 0.2397, "step": 13180 }, { "epoch": 0.4793226251907842, "grad_norm": 1.7383311986923218, "learning_rate": 4.7903190638854575e-05, "loss": 0.1841, "step": 13190 }, { "epoch": 0.4796860236935824, "grad_norm": 5.512730121612549, "learning_rate": 4.793953048913439e-05, "loss": 0.2708, "step": 13200 }, { "epoch": 0.4796860236935824, "eval_loss": 0.3998795747756958, "eval_runtime": 180.9114, "eval_samples_per_second": 40.981, "eval_steps_per_second": 5.124, "eval_wer": 0.2369433804708915, "step": 13200 }, { "epoch": 0.4800494221963805, "grad_norm": 1.5843122005462646, "learning_rate": 4.79758703394142e-05, "loss": 1.0933, "step": 13210 }, { "epoch": 0.48041282069917873, "grad_norm": 1.4696934223175049, "learning_rate": 4.8012210189694026e-05, "loss": 0.1771, "step": 13220 }, { "epoch": 0.4807762192019769, "grad_norm": 2.5620357990264893, "learning_rate": 4.804855003997384e-05, "loss": 0.2202, "step": 13230 }, { "epoch": 0.48113961770477504, "grad_norm": 73.08427429199219, "learning_rate": 4.808488989025365e-05, "loss": 0.2471, "step": 13240 }, { "epoch": 0.48150301620757324, "grad_norm": 7.291989803314209, "learning_rate": 4.812122974053347e-05, "loss": 0.2542, "step": 13250 }, { "epoch": 0.4818664147103714, "grad_norm": 1.7582112550735474, "learning_rate": 4.815756959081329e-05, "loss": 0.1884, "step": 13260 }, { "epoch": 0.48222981321316954, "grad_norm": 0.9253680109977722, "learning_rate": 4.81939094410931e-05, "loss": 0.1797, "step": 13270 }, { "epoch": 0.48259321171596775, "grad_norm": 8.042390823364258, "learning_rate": 4.823024929137292e-05, "loss": 0.192, "step": 13280 }, { "epoch": 0.4829566102187659, "grad_norm": 3.2288219928741455, "learning_rate": 4.826658914165274e-05, "loss": 0.2041, "step": 13290 }, { "epoch": 0.48332000872156405, "grad_norm": 7.657989978790283, "learning_rate": 4.8302928991932554e-05, "loss": 0.3034, "step": 13300 }, { "epoch": 0.48368340722436226, "grad_norm": 2.9273271560668945, "learning_rate": 4.833926884221237e-05, "loss": 0.2028, "step": 13310 }, { "epoch": 0.4840468057271604, "grad_norm": 4.2344865798950195, "learning_rate": 4.837560869249219e-05, "loss": 0.1817, "step": 13320 }, { "epoch": 0.48441020422995856, "grad_norm": 4.074464797973633, "learning_rate": 4.8411948542772005e-05, "loss": 0.2197, "step": 13330 }, { "epoch": 0.4847736027327567, "grad_norm": 1.7070029973983765, "learning_rate": 4.8448288393051825e-05, "loss": 0.2374, "step": 13340 }, { "epoch": 0.4851370012355549, "grad_norm": 2.5278494358062744, "learning_rate": 4.8484628243331644e-05, "loss": 0.265, "step": 13350 }, { "epoch": 0.4855003997383531, "grad_norm": 1.4800697565078735, "learning_rate": 4.8520968093611456e-05, "loss": 0.1597, "step": 13360 }, { "epoch": 0.4858637982411512, "grad_norm": 1.238171935081482, "learning_rate": 4.8557307943891276e-05, "loss": 0.1862, "step": 13370 }, { "epoch": 0.48622719674394943, "grad_norm": 2.7711944580078125, "learning_rate": 4.859364779417109e-05, "loss": 0.1572, "step": 13380 }, { "epoch": 0.4865905952467476, "grad_norm": 2.386011838912964, "learning_rate": 4.862998764445091e-05, "loss": 0.1624, "step": 13390 }, { "epoch": 0.48695399374954573, "grad_norm": 10.38249397277832, "learning_rate": 4.866632749473072e-05, "loss": 0.2182, "step": 13400 }, { "epoch": 0.48731739225234394, "grad_norm": 1.1541043519973755, "learning_rate": 4.870266734501054e-05, "loss": 0.1867, "step": 13410 }, { "epoch": 0.4876807907551421, "grad_norm": 0.7680534720420837, "learning_rate": 4.873900719529036e-05, "loss": 0.1619, "step": 13420 }, { "epoch": 0.48804418925794024, "grad_norm": 2.6120142936706543, "learning_rate": 4.877534704557017e-05, "loss": 1.0657, "step": 13430 }, { "epoch": 0.48840758776073845, "grad_norm": 2.1559348106384277, "learning_rate": 4.881168689584999e-05, "loss": 0.1576, "step": 13440 }, { "epoch": 0.4887709862635366, "grad_norm": 8.222488403320312, "learning_rate": 4.884802674612981e-05, "loss": 0.2596, "step": 13450 }, { "epoch": 0.48913438476633475, "grad_norm": 1.7630010843276978, "learning_rate": 4.888436659640962e-05, "loss": 0.1755, "step": 13460 }, { "epoch": 0.48949778326913296, "grad_norm": 1.489050269126892, "learning_rate": 4.892070644668944e-05, "loss": 0.1844, "step": 13470 }, { "epoch": 0.4898611817719311, "grad_norm": 4.412111759185791, "learning_rate": 4.895704629696926e-05, "loss": 0.2114, "step": 13480 }, { "epoch": 0.49022458027472926, "grad_norm": 2.060366630554199, "learning_rate": 4.8993386147249074e-05, "loss": 0.1932, "step": 13490 }, { "epoch": 0.4905879787775274, "grad_norm": 9.488603591918945, "learning_rate": 4.9029725997528893e-05, "loss": 0.303, "step": 13500 }, { "epoch": 0.4909513772803256, "grad_norm": 2.295671224594116, "learning_rate": 4.906606584780871e-05, "loss": 0.1583, "step": 13510 }, { "epoch": 0.49131477578312377, "grad_norm": 4.13812255859375, "learning_rate": 4.9102405698088525e-05, "loss": 1.9041, "step": 13520 }, { "epoch": 0.4916781742859219, "grad_norm": 3.7411348819732666, "learning_rate": 4.9138745548368345e-05, "loss": 0.1927, "step": 13530 }, { "epoch": 0.4920415727887201, "grad_norm": 1.523505449295044, "learning_rate": 4.917508539864816e-05, "loss": 0.1721, "step": 13540 }, { "epoch": 0.4924049712915183, "grad_norm": 8.239662170410156, "learning_rate": 4.921142524892798e-05, "loss": 0.3205, "step": 13550 }, { "epoch": 0.49276836979431643, "grad_norm": 1.8316904306411743, "learning_rate": 4.9247765099207796e-05, "loss": 0.172, "step": 13560 }, { "epoch": 0.49313176829711464, "grad_norm": 4.627805233001709, "learning_rate": 4.928410494948761e-05, "loss": 0.1731, "step": 13570 }, { "epoch": 0.4934951667999128, "grad_norm": 4.277485370635986, "learning_rate": 4.932044479976743e-05, "loss": 0.2522, "step": 13580 }, { "epoch": 0.49385856530271094, "grad_norm": 2.131641149520874, "learning_rate": 4.935678465004725e-05, "loss": 0.1766, "step": 13590 }, { "epoch": 0.49422196380550915, "grad_norm": 2.9195988178253174, "learning_rate": 4.939312450032706e-05, "loss": 0.3745, "step": 13600 }, { "epoch": 0.4945853623083073, "grad_norm": 1.5876374244689941, "learning_rate": 4.942946435060688e-05, "loss": 0.1716, "step": 13610 }, { "epoch": 0.49494876081110545, "grad_norm": 4.506389617919922, "learning_rate": 4.946580420088669e-05, "loss": 0.5847, "step": 13620 }, { "epoch": 0.4953121593139036, "grad_norm": 3.497152090072632, "learning_rate": 4.950214405116651e-05, "loss": 0.2179, "step": 13630 }, { "epoch": 0.4956755578167018, "grad_norm": 1.7728289365768433, "learning_rate": 4.953848390144633e-05, "loss": 0.165, "step": 13640 }, { "epoch": 0.49603895631949996, "grad_norm": 12.01921558380127, "learning_rate": 4.957482375172614e-05, "loss": 0.2447, "step": 13650 }, { "epoch": 0.4964023548222981, "grad_norm": 2.5448553562164307, "learning_rate": 4.961116360200596e-05, "loss": 0.2089, "step": 13660 }, { "epoch": 0.4967657533250963, "grad_norm": 2.3643887042999268, "learning_rate": 4.9647503452285775e-05, "loss": 0.1724, "step": 13670 }, { "epoch": 0.49712915182789447, "grad_norm": 2.096191644668579, "learning_rate": 4.9683843302565594e-05, "loss": 0.1759, "step": 13680 }, { "epoch": 0.4974925503306926, "grad_norm": 0.9760168790817261, "learning_rate": 4.9720183152845414e-05, "loss": 0.1817, "step": 13690 }, { "epoch": 0.4978559488334908, "grad_norm": 3.019702434539795, "learning_rate": 4.9756523003125226e-05, "loss": 0.2275, "step": 13700 }, { "epoch": 0.498219347336289, "grad_norm": 1.0820231437683105, "learning_rate": 4.9789228868377064e-05, "loss": 2.5822, "step": 13710 }, { "epoch": 0.4985827458390871, "grad_norm": 3.2908883094787598, "learning_rate": 4.982556871865688e-05, "loss": 0.1898, "step": 13720 }, { "epoch": 0.49894614434188533, "grad_norm": 3.4303886890411377, "learning_rate": 4.98619085689367e-05, "loss": 0.2295, "step": 13730 }, { "epoch": 0.4993095428446835, "grad_norm": 1.8785525560379028, "learning_rate": 4.9898248419216515e-05, "loss": 0.1699, "step": 13740 }, { "epoch": 0.49967294134748164, "grad_norm": 7.539544105529785, "learning_rate": 4.993458826949633e-05, "loss": 0.2955, "step": 13750 }, { "epoch": 0.5000363398502798, "grad_norm": 1.6091630458831787, "learning_rate": 4.997092811977615e-05, "loss": 0.1696, "step": 13760 }, { "epoch": 0.5003997383530779, "grad_norm": 1.023695945739746, "learning_rate": 4.9999999978456776e-05, "loss": 0.1872, "step": 13770 }, { "epoch": 0.5007631368558761, "grad_norm": 8.364274978637695, "learning_rate": 4.999999922444405e-05, "loss": 0.1844, "step": 13780 }, { "epoch": 0.5011265353586744, "grad_norm": 1.7257829904556274, "learning_rate": 4.99999973932703e-05, "loss": 0.241, "step": 13790 }, { "epoch": 0.5014899338614724, "grad_norm": 7.256163597106934, "learning_rate": 4.999999448493561e-05, "loss": 0.2714, "step": 13800 }, { "epoch": 0.5014899338614724, "eval_loss": 0.4298999607563019, "eval_runtime": 179.7223, "eval_samples_per_second": 41.253, "eval_steps_per_second": 5.158, "eval_wer": 0.22517109299834806, "step": 13800 }, { "epoch": 0.5018533323642707, "grad_norm": 3.087979316711426, "learning_rate": 4.999999049944011e-05, "loss": 0.3094, "step": 13810 }, { "epoch": 0.5022167308670689, "grad_norm": 1.7626384496688843, "learning_rate": 4.999998543678397e-05, "loss": 0.2521, "step": 13820 }, { "epoch": 0.502580129369867, "grad_norm": 2.257432699203491, "learning_rate": 4.999997929696741e-05, "loss": 0.1913, "step": 13830 }, { "epoch": 0.5029435278726652, "grad_norm": 1.7763293981552124, "learning_rate": 4.999997207999069e-05, "loss": 0.1812, "step": 13840 }, { "epoch": 0.5033069263754634, "grad_norm": 8.228759765625, "learning_rate": 4.9999963785854124e-05, "loss": 0.2953, "step": 13850 }, { "epoch": 0.5036703248782615, "grad_norm": 1.200305461883545, "learning_rate": 4.999995441455807e-05, "loss": 0.3246, "step": 13860 }, { "epoch": 0.5040337233810597, "grad_norm": 1.9264732599258423, "learning_rate": 4.999994396610292e-05, "loss": 0.1749, "step": 13870 }, { "epoch": 0.5043971218838579, "grad_norm": 2.547212839126587, "learning_rate": 4.999993244048915e-05, "loss": 0.2714, "step": 13880 }, { "epoch": 0.504760520386656, "grad_norm": 2.7918379306793213, "learning_rate": 4.999991983771723e-05, "loss": 0.1984, "step": 13890 }, { "epoch": 0.5051239188894542, "grad_norm": 16.789764404296875, "learning_rate": 4.999990615778772e-05, "loss": 0.239, "step": 13900 }, { "epoch": 0.5054873173922524, "grad_norm": 1.1825790405273438, "learning_rate": 4.9999891400701205e-05, "loss": 0.1774, "step": 13910 }, { "epoch": 0.5058507158950505, "grad_norm": 2.1524746417999268, "learning_rate": 4.999987556645832e-05, "loss": 0.1956, "step": 13920 }, { "epoch": 0.5062141143978487, "grad_norm": 2.8159048557281494, "learning_rate": 4.999985865505974e-05, "loss": 0.2315, "step": 13930 }, { "epoch": 0.5065775129006469, "grad_norm": 1.7412035465240479, "learning_rate": 4.99998406665062e-05, "loss": 0.2106, "step": 13940 }, { "epoch": 0.506940911403445, "grad_norm": 61.967708587646484, "learning_rate": 4.999982160079848e-05, "loss": 0.3067, "step": 13950 }, { "epoch": 0.5073043099062432, "grad_norm": 2.378682851791382, "learning_rate": 4.9999801457937404e-05, "loss": 0.21, "step": 13960 }, { "epoch": 0.5076677084090414, "grad_norm": 1.3668854236602783, "learning_rate": 4.9999780237923824e-05, "loss": 0.1529, "step": 13970 }, { "epoch": 0.5080311069118395, "grad_norm": 2.2655959129333496, "learning_rate": 4.9999757940758665e-05, "loss": 0.1747, "step": 13980 }, { "epoch": 0.5083945054146377, "grad_norm": 1.5975615978240967, "learning_rate": 4.9999734566442877e-05, "loss": 0.1728, "step": 13990 }, { "epoch": 0.5087579039174358, "grad_norm": 9.869553565979004, "learning_rate": 4.999971011497748e-05, "loss": 0.3207, "step": 14000 }, { "epoch": 0.509121302420234, "grad_norm": 2.3095829486846924, "learning_rate": 4.999968458636353e-05, "loss": 0.1872, "step": 14010 }, { "epoch": 0.5094847009230322, "grad_norm": 2.059575080871582, "learning_rate": 4.999965798060212e-05, "loss": 0.3059, "step": 14020 }, { "epoch": 0.5098480994258303, "grad_norm": 1.7138803005218506, "learning_rate": 4.9999630297694395e-05, "loss": 0.2025, "step": 14030 }, { "epoch": 0.5102114979286285, "grad_norm": 2.831191062927246, "learning_rate": 4.999960153764155e-05, "loss": 0.1685, "step": 14040 }, { "epoch": 0.5105748964314267, "grad_norm": 15.457362174987793, "learning_rate": 4.999957170044482e-05, "loss": 0.2165, "step": 14050 }, { "epoch": 0.5109382949342248, "grad_norm": 3.923633337020874, "learning_rate": 4.999954078610549e-05, "loss": 0.1888, "step": 14060 }, { "epoch": 0.511301693437023, "grad_norm": 0.8243936896324158, "learning_rate": 4.999950879462491e-05, "loss": 0.295, "step": 14070 }, { "epoch": 0.5116650919398212, "grad_norm": 2.921447277069092, "learning_rate": 4.9999475726004434e-05, "loss": 0.3208, "step": 14080 }, { "epoch": 0.5120284904426193, "grad_norm": 0.9395463466644287, "learning_rate": 4.99994415802455e-05, "loss": 0.1936, "step": 14090 }, { "epoch": 0.5123918889454175, "grad_norm": 11.025691986083984, "learning_rate": 4.999940635734958e-05, "loss": 0.2581, "step": 14100 }, { "epoch": 0.5127552874482157, "grad_norm": 2.2102460861206055, "learning_rate": 4.999937005731818e-05, "loss": 0.1888, "step": 14110 }, { "epoch": 0.5131186859510138, "grad_norm": 1.6075447797775269, "learning_rate": 4.9999332680152876e-05, "loss": 0.1557, "step": 14120 }, { "epoch": 0.513482084453812, "grad_norm": 3.0174403190612793, "learning_rate": 4.999929422585528e-05, "loss": 0.2137, "step": 14130 }, { "epoch": 0.5138454829566103, "grad_norm": 3.2911272048950195, "learning_rate": 4.999925469442705e-05, "loss": 0.2249, "step": 14140 }, { "epoch": 0.5142088814594084, "grad_norm": 4.0001444816589355, "learning_rate": 4.999921408586986e-05, "loss": 0.2548, "step": 14150 }, { "epoch": 0.5145722799622066, "grad_norm": 2.7695538997650146, "learning_rate": 4.9999172400185504e-05, "loss": 0.2107, "step": 14160 }, { "epoch": 0.5149356784650048, "grad_norm": 1.420189380645752, "learning_rate": 4.999912963737574e-05, "loss": 0.1887, "step": 14170 }, { "epoch": 0.5152990769678029, "grad_norm": 1.4330711364746094, "learning_rate": 4.9999085797442434e-05, "loss": 0.2295, "step": 14180 }, { "epoch": 0.5156624754706011, "grad_norm": 1.9518648386001587, "learning_rate": 4.999904088038747e-05, "loss": 0.181, "step": 14190 }, { "epoch": 0.5160258739733993, "grad_norm": 9.763446807861328, "learning_rate": 4.999899488621278e-05, "loss": 0.2163, "step": 14200 }, { "epoch": 0.5163892724761974, "grad_norm": 1.63487708568573, "learning_rate": 4.999894781492035e-05, "loss": 0.1675, "step": 14210 }, { "epoch": 0.5167526709789956, "grad_norm": 1.3337619304656982, "learning_rate": 4.99988996665122e-05, "loss": 1.8258, "step": 14220 }, { "epoch": 0.5171160694817938, "grad_norm": 4.741299152374268, "learning_rate": 4.9998850440990414e-05, "loss": 0.199, "step": 14230 }, { "epoch": 0.5174794679845919, "grad_norm": 2.203994035720825, "learning_rate": 4.9998800138357106e-05, "loss": 0.1666, "step": 14240 }, { "epoch": 0.5178428664873901, "grad_norm": 9.144301414489746, "learning_rate": 4.999874875861444e-05, "loss": 0.2567, "step": 14250 }, { "epoch": 0.5182062649901883, "grad_norm": 1.432627558708191, "learning_rate": 4.9998696301764644e-05, "loss": 0.1842, "step": 14260 }, { "epoch": 0.5185696634929864, "grad_norm": 1.5303106307983398, "learning_rate": 4.999864276780998e-05, "loss": 0.1726, "step": 14270 }, { "epoch": 0.5189330619957846, "grad_norm": 13.468036651611328, "learning_rate": 4.999858815675273e-05, "loss": 0.1927, "step": 14280 }, { "epoch": 0.5192964604985827, "grad_norm": 3.7133965492248535, "learning_rate": 4.999853246859526e-05, "loss": 0.1822, "step": 14290 }, { "epoch": 0.5196598590013809, "grad_norm": 10.077652931213379, "learning_rate": 4.999847570333998e-05, "loss": 0.2847, "step": 14300 }, { "epoch": 0.5200232575041791, "grad_norm": 2.3906922340393066, "learning_rate": 4.9998417860989325e-05, "loss": 0.1962, "step": 14310 }, { "epoch": 0.5203866560069772, "grad_norm": 0.8041434288024902, "learning_rate": 4.999835894154579e-05, "loss": 0.1661, "step": 14320 }, { "epoch": 0.5207500545097754, "grad_norm": 4.1071953773498535, "learning_rate": 4.99982989450119e-05, "loss": 0.2012, "step": 14330 }, { "epoch": 0.5211134530125736, "grad_norm": 0.9645094871520996, "learning_rate": 4.999823787139026e-05, "loss": 0.209, "step": 14340 }, { "epoch": 0.5214768515153717, "grad_norm": 19.18789291381836, "learning_rate": 4.9998175720683506e-05, "loss": 0.3019, "step": 14350 }, { "epoch": 0.5218402500181699, "grad_norm": 1.6560392379760742, "learning_rate": 4.999811249289429e-05, "loss": 0.1696, "step": 14360 }, { "epoch": 0.5222036485209681, "grad_norm": 1.993741512298584, "learning_rate": 4.999804818802535e-05, "loss": 0.1895, "step": 14370 }, { "epoch": 0.5225670470237662, "grad_norm": 3.4508492946624756, "learning_rate": 4.999798280607947e-05, "loss": 0.2111, "step": 14380 }, { "epoch": 0.5229304455265644, "grad_norm": 8.431037902832031, "learning_rate": 4.999791634705944e-05, "loss": 0.1898, "step": 14390 }, { "epoch": 0.5232938440293626, "grad_norm": 10.659805297851562, "learning_rate": 4.9997848810968137e-05, "loss": 0.4744, "step": 14400 }, { "epoch": 0.5232938440293626, "eval_loss": 0.40915772318840027, "eval_runtime": 179.6286, "eval_samples_per_second": 41.274, "eval_steps_per_second": 5.161, "eval_wer": 0.2273222357361991, "step": 14400 }, { "epoch": 0.5236572425321607, "grad_norm": 1.676483392715454, "learning_rate": 4.999778019780849e-05, "loss": 0.1856, "step": 14410 }, { "epoch": 0.5240206410349589, "grad_norm": 3.4859771728515625, "learning_rate": 4.9997710507583414e-05, "loss": 0.1641, "step": 14420 }, { "epoch": 0.5243840395377571, "grad_norm": 2.583261251449585, "learning_rate": 4.999763974029595e-05, "loss": 0.2545, "step": 14430 }, { "epoch": 0.5247474380405552, "grad_norm": 2.0467324256896973, "learning_rate": 4.999756789594913e-05, "loss": 0.1974, "step": 14440 }, { "epoch": 0.5251108365433534, "grad_norm": 4.777310848236084, "learning_rate": 4.999749497454605e-05, "loss": 0.2653, "step": 14450 }, { "epoch": 0.5254742350461516, "grad_norm": 1.6312458515167236, "learning_rate": 4.999742097608984e-05, "loss": 0.1503, "step": 14460 }, { "epoch": 0.5258376335489497, "grad_norm": 1.1725629568099976, "learning_rate": 4.999734590058371e-05, "loss": 0.1636, "step": 14470 }, { "epoch": 0.526201032051748, "grad_norm": 3.2061386108398438, "learning_rate": 4.999726974803089e-05, "loss": 0.1988, "step": 14480 }, { "epoch": 0.5265644305545462, "grad_norm": 1.7078185081481934, "learning_rate": 4.9997192518434655e-05, "loss": 0.1763, "step": 14490 }, { "epoch": 0.5269278290573443, "grad_norm": 3.5756313800811768, "learning_rate": 4.999711421179833e-05, "loss": 0.2651, "step": 14500 }, { "epoch": 0.5272912275601425, "grad_norm": 1.8054040670394897, "learning_rate": 4.99970348281253e-05, "loss": 0.1923, "step": 14510 }, { "epoch": 0.5276546260629407, "grad_norm": 2.8949921131134033, "learning_rate": 4.9996954367418976e-05, "loss": 0.1897, "step": 14520 }, { "epoch": 0.5280180245657388, "grad_norm": 2.0020744800567627, "learning_rate": 4.9996872829682825e-05, "loss": 0.2469, "step": 14530 }, { "epoch": 0.528381423068537, "grad_norm": 1.1650570631027222, "learning_rate": 4.999679021492037e-05, "loss": 0.2088, "step": 14540 }, { "epoch": 0.5287448215713352, "grad_norm": 14.624237060546875, "learning_rate": 4.999670652313516e-05, "loss": 0.2918, "step": 14550 }, { "epoch": 0.5291082200741333, "grad_norm": 1.6658445596694946, "learning_rate": 4.99966217543308e-05, "loss": 0.1936, "step": 14560 }, { "epoch": 0.5294716185769315, "grad_norm": 2.0761842727661133, "learning_rate": 4.9996535908510955e-05, "loss": 0.2318, "step": 14570 }, { "epoch": 0.5298350170797297, "grad_norm": 2.475193977355957, "learning_rate": 4.999644898567931e-05, "loss": 0.1682, "step": 14580 }, { "epoch": 0.5301984155825278, "grad_norm": 1.7537975311279297, "learning_rate": 4.9996360985839616e-05, "loss": 0.1528, "step": 14590 }, { "epoch": 0.530561814085326, "grad_norm": 6.486474990844727, "learning_rate": 4.9996271908995666e-05, "loss": 0.2571, "step": 14600 }, { "epoch": 0.5309252125881241, "grad_norm": 2.308250665664673, "learning_rate": 4.9996181755151294e-05, "loss": 0.1764, "step": 14610 }, { "epoch": 0.5312886110909223, "grad_norm": 4.871829032897949, "learning_rate": 4.999609052431039e-05, "loss": 0.3045, "step": 14620 }, { "epoch": 0.5316520095937205, "grad_norm": 2.796844959259033, "learning_rate": 4.999599821647688e-05, "loss": 0.2102, "step": 14630 }, { "epoch": 0.5320154080965186, "grad_norm": 1.9681658744812012, "learning_rate": 4.999590483165475e-05, "loss": 0.1882, "step": 14640 }, { "epoch": 0.5323788065993168, "grad_norm": 5.858233451843262, "learning_rate": 4.9995810369848006e-05, "loss": 0.314, "step": 14650 }, { "epoch": 0.532742205102115, "grad_norm": 6.469663143157959, "learning_rate": 4.9995714831060736e-05, "loss": 0.2103, "step": 14660 }, { "epoch": 0.5331056036049131, "grad_norm": 1.8543453216552734, "learning_rate": 4.999561821529705e-05, "loss": 0.219, "step": 14670 }, { "epoch": 0.5334690021077113, "grad_norm": 2.222320318222046, "learning_rate": 4.99955205225611e-05, "loss": 0.1879, "step": 14680 }, { "epoch": 0.5338324006105095, "grad_norm": 5.018227577209473, "learning_rate": 4.999542175285711e-05, "loss": 0.1437, "step": 14690 }, { "epoch": 0.5341957991133076, "grad_norm": 6.225541114807129, "learning_rate": 4.999532190618933e-05, "loss": 0.268, "step": 14700 }, { "epoch": 0.5345591976161058, "grad_norm": 1.8122676610946655, "learning_rate": 4.999522098256206e-05, "loss": 0.1644, "step": 14710 }, { "epoch": 0.534922596118904, "grad_norm": 2.4057557582855225, "learning_rate": 4.999511898197966e-05, "loss": 0.1663, "step": 14720 }, { "epoch": 0.5352859946217021, "grad_norm": 1.756697416305542, "learning_rate": 4.9995015904446513e-05, "loss": 0.1771, "step": 14730 }, { "epoch": 0.5356493931245003, "grad_norm": 1.5457457304000854, "learning_rate": 4.999491174996706e-05, "loss": 0.1889, "step": 14740 }, { "epoch": 0.5360127916272985, "grad_norm": 3.108682155609131, "learning_rate": 4.999480651854579e-05, "loss": 0.2063, "step": 14750 }, { "epoch": 0.5363761901300966, "grad_norm": 2.2037875652313232, "learning_rate": 4.9994700210187246e-05, "loss": 0.1579, "step": 14760 }, { "epoch": 0.5367395886328948, "grad_norm": 1.2102454900741577, "learning_rate": 4.9994592824895994e-05, "loss": 0.2361, "step": 14770 }, { "epoch": 0.537102987135693, "grad_norm": 6.5722455978393555, "learning_rate": 4.999448436267667e-05, "loss": 0.2165, "step": 14780 }, { "epoch": 0.5374663856384911, "grad_norm": 1.606378197669983, "learning_rate": 4.999437482353395e-05, "loss": 0.1642, "step": 14790 }, { "epoch": 0.5378297841412893, "grad_norm": 24.709177017211914, "learning_rate": 4.999426420747255e-05, "loss": 0.2628, "step": 14800 }, { "epoch": 0.5381931826440876, "grad_norm": 2.543760299682617, "learning_rate": 4.999415251449723e-05, "loss": 0.1883, "step": 14810 }, { "epoch": 0.5385565811468856, "grad_norm": 2.0813279151916504, "learning_rate": 4.999403974461281e-05, "loss": 0.1842, "step": 14820 }, { "epoch": 0.5389199796496839, "grad_norm": 4.744104385375977, "learning_rate": 4.9993925897824144e-05, "loss": 0.1981, "step": 14830 }, { "epoch": 0.5392833781524821, "grad_norm": 3.2407493591308594, "learning_rate": 4.9993810974136146e-05, "loss": 0.2169, "step": 14840 }, { "epoch": 0.5396467766552802, "grad_norm": 13.33681869506836, "learning_rate": 4.999369497355375e-05, "loss": 0.2775, "step": 14850 }, { "epoch": 0.5400101751580784, "grad_norm": 2.3192784786224365, "learning_rate": 4.9993577896081975e-05, "loss": 0.1987, "step": 14860 }, { "epoch": 0.5403735736608766, "grad_norm": 1.6611911058425903, "learning_rate": 4.999345974172586e-05, "loss": 0.188, "step": 14870 }, { "epoch": 0.5407369721636747, "grad_norm": 4.368532180786133, "learning_rate": 4.9993340510490485e-05, "loss": 0.2201, "step": 14880 }, { "epoch": 0.5411003706664729, "grad_norm": 1.4825586080551147, "learning_rate": 4.999322020238099e-05, "loss": 0.185, "step": 14890 }, { "epoch": 0.541463769169271, "grad_norm": 4.346343994140625, "learning_rate": 4.9993098817402564e-05, "loss": 0.2415, "step": 14900 }, { "epoch": 0.5418271676720692, "grad_norm": 1.0175251960754395, "learning_rate": 4.999297635556044e-05, "loss": 0.1991, "step": 14910 }, { "epoch": 0.5421905661748674, "grad_norm": 1.600205421447754, "learning_rate": 4.999285281685989e-05, "loss": 0.1706, "step": 14920 }, { "epoch": 0.5425539646776655, "grad_norm": 4.332497596740723, "learning_rate": 4.999272820130623e-05, "loss": 0.1964, "step": 14930 }, { "epoch": 0.5429173631804637, "grad_norm": 2.0384531021118164, "learning_rate": 4.999260250890484e-05, "loss": 0.1571, "step": 14940 }, { "epoch": 0.5432807616832619, "grad_norm": 11.780756950378418, "learning_rate": 4.999247573966114e-05, "loss": 0.319, "step": 14950 }, { "epoch": 0.54364416018606, "grad_norm": 2.7058663368225098, "learning_rate": 4.999234789358057e-05, "loss": 0.2009, "step": 14960 }, { "epoch": 0.5440075586888582, "grad_norm": 1.966780662536621, "learning_rate": 4.999221897066866e-05, "loss": 0.177, "step": 14970 }, { "epoch": 0.5443709571916564, "grad_norm": 2.2129642963409424, "learning_rate": 4.999208897093096e-05, "loss": 0.2472, "step": 14980 }, { "epoch": 0.5447343556944545, "grad_norm": 2.726358652114868, "learning_rate": 4.9991957894373064e-05, "loss": 0.2239, "step": 14990 }, { "epoch": 0.5450977541972527, "grad_norm": 28.577600479125977, "learning_rate": 4.999182574100063e-05, "loss": 0.2524, "step": 15000 }, { "epoch": 0.5450977541972527, "eval_loss": 0.3972287178039551, "eval_runtime": 180.8086, "eval_samples_per_second": 41.005, "eval_steps_per_second": 5.127, "eval_wer": 0.2289560150307695, "step": 15000 }, { "epoch": 0.5454611527000509, "grad_norm": 1.9243866205215454, "learning_rate": 4.9991692510819335e-05, "loss": 0.1679, "step": 15010 }, { "epoch": 0.545824551202849, "grad_norm": 1.3926585912704468, "learning_rate": 4.9991558203834944e-05, "loss": 0.1933, "step": 15020 }, { "epoch": 0.5461879497056472, "grad_norm": 5.275027751922607, "learning_rate": 4.999142282005322e-05, "loss": 0.2838, "step": 15030 }, { "epoch": 0.5465513482084454, "grad_norm": 2.142784357070923, "learning_rate": 4.999128635948e-05, "loss": 0.1754, "step": 15040 }, { "epoch": 0.5469147467112435, "grad_norm": 40.32966995239258, "learning_rate": 4.999114882212119e-05, "loss": 0.323, "step": 15050 }, { "epoch": 0.5472781452140417, "grad_norm": 1.056662678718567, "learning_rate": 4.999101020798268e-05, "loss": 0.1462, "step": 15060 }, { "epoch": 0.5476415437168399, "grad_norm": 3.7527568340301514, "learning_rate": 4.9990870517070464e-05, "loss": 0.2106, "step": 15070 }, { "epoch": 0.548004942219638, "grad_norm": 3.396487236022949, "learning_rate": 4.9990729749390555e-05, "loss": 0.1995, "step": 15080 }, { "epoch": 0.5483683407224362, "grad_norm": 1.650519609451294, "learning_rate": 4.999058790494902e-05, "loss": 0.195, "step": 15090 }, { "epoch": 0.5487317392252344, "grad_norm": 16.096418380737305, "learning_rate": 4.9990444983751975e-05, "loss": 0.2705, "step": 15100 }, { "epoch": 0.5490951377280325, "grad_norm": 1.273149847984314, "learning_rate": 4.999030098580556e-05, "loss": 0.2216, "step": 15110 }, { "epoch": 0.5494585362308307, "grad_norm": 1.5414496660232544, "learning_rate": 4.9990155911115995e-05, "loss": 0.1876, "step": 15120 }, { "epoch": 0.549821934733629, "grad_norm": 4.707805633544922, "learning_rate": 4.9990009759689524e-05, "loss": 0.1895, "step": 15130 }, { "epoch": 0.550185333236427, "grad_norm": 2.033162832260132, "learning_rate": 4.9989862531532456e-05, "loss": 0.1705, "step": 15140 }, { "epoch": 0.5505487317392252, "grad_norm": 7.349232196807861, "learning_rate": 4.998971422665112e-05, "loss": 0.2815, "step": 15150 }, { "epoch": 0.5509121302420235, "grad_norm": 1.293078064918518, "learning_rate": 4.9989564845051915e-05, "loss": 0.1789, "step": 15160 }, { "epoch": 0.5512755287448216, "grad_norm": 1.7343147993087769, "learning_rate": 4.998941438674127e-05, "loss": 0.1781, "step": 15170 }, { "epoch": 0.5516389272476198, "grad_norm": 2.440030574798584, "learning_rate": 4.9989262851725674e-05, "loss": 0.1927, "step": 15180 }, { "epoch": 0.5520023257504179, "grad_norm": 2.276111364364624, "learning_rate": 4.998911024001165e-05, "loss": 0.1774, "step": 15190 }, { "epoch": 0.5523657242532161, "grad_norm": 9.360533714294434, "learning_rate": 4.9988956551605783e-05, "loss": 0.2761, "step": 15200 }, { "epoch": 0.5527291227560143, "grad_norm": 3.8025522232055664, "learning_rate": 4.998880178651468e-05, "loss": 0.2855, "step": 15210 }, { "epoch": 0.5530925212588124, "grad_norm": 3.816631555557251, "learning_rate": 4.998864594474503e-05, "loss": 0.1559, "step": 15220 }, { "epoch": 0.5534559197616106, "grad_norm": 3.2255067825317383, "learning_rate": 4.998848902630353e-05, "loss": 0.1632, "step": 15230 }, { "epoch": 0.5538193182644088, "grad_norm": 1.077268123626709, "learning_rate": 4.9988331031196944e-05, "loss": 0.1969, "step": 15240 }, { "epoch": 0.5541827167672069, "grad_norm": 5.657801151275635, "learning_rate": 4.998817195943209e-05, "loss": 0.2361, "step": 15250 }, { "epoch": 0.5545461152700051, "grad_norm": 1.180039882659912, "learning_rate": 4.998801181101581e-05, "loss": 0.1779, "step": 15260 }, { "epoch": 0.5549095137728033, "grad_norm": 2.12725830078125, "learning_rate": 4.998785058595501e-05, "loss": 0.1505, "step": 15270 }, { "epoch": 0.5552729122756014, "grad_norm": 2.0784361362457275, "learning_rate": 4.998768828425664e-05, "loss": 0.2221, "step": 15280 }, { "epoch": 0.5556363107783996, "grad_norm": 2.0133538246154785, "learning_rate": 4.998752490592768e-05, "loss": 0.1759, "step": 15290 }, { "epoch": 0.5559997092811978, "grad_norm": 3.3181140422821045, "learning_rate": 4.998736045097518e-05, "loss": 0.229, "step": 15300 }, { "epoch": 0.5563631077839959, "grad_norm": 1.2881536483764648, "learning_rate": 4.998719491940622e-05, "loss": 0.1928, "step": 15310 }, { "epoch": 0.5567265062867941, "grad_norm": 1.0155376195907593, "learning_rate": 4.998702831122794e-05, "loss": 0.1986, "step": 15320 }, { "epoch": 0.5570899047895923, "grad_norm": 7.5557661056518555, "learning_rate": 4.998686062644752e-05, "loss": 0.2317, "step": 15330 }, { "epoch": 0.5574533032923904, "grad_norm": 2.3196377754211426, "learning_rate": 4.9986691865072176e-05, "loss": 0.1827, "step": 15340 }, { "epoch": 0.5578167017951886, "grad_norm": 25.910188674926758, "learning_rate": 4.998652202710918e-05, "loss": 0.2824, "step": 15350 }, { "epoch": 0.5581801002979868, "grad_norm": 1.0091907978057861, "learning_rate": 4.9986351112565846e-05, "loss": 0.1946, "step": 15360 }, { "epoch": 0.5585434988007849, "grad_norm": 3.0022408962249756, "learning_rate": 4.998617912144956e-05, "loss": 0.2028, "step": 15370 }, { "epoch": 0.5589068973035831, "grad_norm": 2.9837419986724854, "learning_rate": 4.99860060537677e-05, "loss": 0.203, "step": 15380 }, { "epoch": 0.5592702958063813, "grad_norm": 2.238867998123169, "learning_rate": 4.9985831909527746e-05, "loss": 0.1392, "step": 15390 }, { "epoch": 0.5596336943091794, "grad_norm": 3.8585119247436523, "learning_rate": 4.9985656688737205e-05, "loss": 0.2289, "step": 15400 }, { "epoch": 0.5599970928119776, "grad_norm": 2.4951331615448, "learning_rate": 4.998548039140361e-05, "loss": 0.1852, "step": 15410 }, { "epoch": 0.5603604913147758, "grad_norm": 1.8404667377471924, "learning_rate": 4.998530301753455e-05, "loss": 0.1813, "step": 15420 }, { "epoch": 0.5607238898175739, "grad_norm": 2.615247964859009, "learning_rate": 4.9985124567137695e-05, "loss": 0.228, "step": 15430 }, { "epoch": 0.5610872883203721, "grad_norm": 1.2074272632598877, "learning_rate": 4.9984945040220715e-05, "loss": 0.1879, "step": 15440 }, { "epoch": 0.5614506868231703, "grad_norm": 38.466712951660156, "learning_rate": 4.9984764436791355e-05, "loss": 0.3965, "step": 15450 }, { "epoch": 0.5618140853259684, "grad_norm": 1.4196547269821167, "learning_rate": 4.998458275685739e-05, "loss": 0.2061, "step": 15460 }, { "epoch": 0.5621774838287666, "grad_norm": 1.2451281547546387, "learning_rate": 4.998440000042664e-05, "loss": 0.2118, "step": 15470 }, { "epoch": 0.5625408823315647, "grad_norm": 3.7021896839141846, "learning_rate": 4.9984216167507005e-05, "loss": 0.2294, "step": 15480 }, { "epoch": 0.562904280834363, "grad_norm": 2.8826780319213867, "learning_rate": 4.998403125810638e-05, "loss": 0.1654, "step": 15490 }, { "epoch": 0.5632676793371612, "grad_norm": 8.366926193237305, "learning_rate": 4.998384527223274e-05, "loss": 0.2467, "step": 15500 }, { "epoch": 0.5636310778399592, "grad_norm": 2.2532148361206055, "learning_rate": 4.99836582098941e-05, "loss": 0.2569, "step": 15510 }, { "epoch": 0.5639944763427575, "grad_norm": 2.164987325668335, "learning_rate": 4.998347007109853e-05, "loss": 0.2167, "step": 15520 }, { "epoch": 0.5643578748455557, "grad_norm": 4.651108264923096, "learning_rate": 4.998328085585411e-05, "loss": 0.2138, "step": 15530 }, { "epoch": 0.5647212733483538, "grad_norm": 1.5128902196884155, "learning_rate": 4.9983090564169024e-05, "loss": 0.1821, "step": 15540 }, { "epoch": 0.565084671851152, "grad_norm": 8.516124725341797, "learning_rate": 4.998289919605145e-05, "loss": 0.2546, "step": 15550 }, { "epoch": 0.5654480703539502, "grad_norm": 1.6480666399002075, "learning_rate": 4.9982706751509635e-05, "loss": 0.2069, "step": 15560 }, { "epoch": 0.5658114688567483, "grad_norm": 1.3768938779830933, "learning_rate": 4.998251323055187e-05, "loss": 0.1775, "step": 15570 }, { "epoch": 0.5661748673595465, "grad_norm": 1.8793795108795166, "learning_rate": 4.998231863318651e-05, "loss": 0.14, "step": 15580 }, { "epoch": 0.5665382658623447, "grad_norm": 1.2361701726913452, "learning_rate": 4.9982122959421924e-05, "loss": 0.1797, "step": 15590 }, { "epoch": 0.5669016643651428, "grad_norm": 14.16727352142334, "learning_rate": 4.998192620926655e-05, "loss": 0.3523, "step": 15600 }, { "epoch": 0.5669016643651428, "eval_loss": 0.40661031007766724, "eval_runtime": 180.2598, "eval_samples_per_second": 41.13, "eval_steps_per_second": 5.143, "eval_wer": 0.21753771307204967, "step": 15600 }, { "epoch": 0.567265062867941, "grad_norm": 2.460245370864868, "learning_rate": 4.9981728382728855e-05, "loss": 0.1824, "step": 15610 }, { "epoch": 0.5676284613707392, "grad_norm": 1.603381633758545, "learning_rate": 4.9981529479817366e-05, "loss": 0.1506, "step": 15620 }, { "epoch": 0.5679918598735373, "grad_norm": 3.650087356567383, "learning_rate": 4.9981329500540664e-05, "loss": 0.2351, "step": 15630 }, { "epoch": 0.5683552583763355, "grad_norm": 2.2338075637817383, "learning_rate": 4.9981128444907354e-05, "loss": 0.1785, "step": 15640 }, { "epoch": 0.5687186568791337, "grad_norm": 7.641642093658447, "learning_rate": 4.998092631292611e-05, "loss": 0.2816, "step": 15650 }, { "epoch": 0.5690820553819318, "grad_norm": 1.5877048969268799, "learning_rate": 4.998072310460562e-05, "loss": 0.1784, "step": 15660 }, { "epoch": 0.56944545388473, "grad_norm": 3.5917787551879883, "learning_rate": 4.998051881995466e-05, "loss": 0.1685, "step": 15670 }, { "epoch": 0.5698088523875282, "grad_norm": 6.459184169769287, "learning_rate": 4.998031345898203e-05, "loss": 0.2031, "step": 15680 }, { "epoch": 0.5701722508903263, "grad_norm": 2.7518184185028076, "learning_rate": 4.9980107021696565e-05, "loss": 0.168, "step": 15690 }, { "epoch": 0.5705356493931245, "grad_norm": 9.814598083496094, "learning_rate": 4.997989950810718e-05, "loss": 0.2778, "step": 15700 }, { "epoch": 0.5708990478959227, "grad_norm": 2.0985398292541504, "learning_rate": 4.9979690918222785e-05, "loss": 0.1864, "step": 15710 }, { "epoch": 0.5712624463987208, "grad_norm": 1.9264591932296753, "learning_rate": 4.997948125205241e-05, "loss": 0.1682, "step": 15720 }, { "epoch": 0.571625844901519, "grad_norm": 4.2961955070495605, "learning_rate": 4.997927050960505e-05, "loss": 0.198, "step": 15730 }, { "epoch": 0.5719892434043172, "grad_norm": 4.524483680725098, "learning_rate": 4.99790586908898e-05, "loss": 0.3235, "step": 15740 }, { "epoch": 0.5723526419071153, "grad_norm": 5.259559154510498, "learning_rate": 4.997884579591578e-05, "loss": 0.335, "step": 15750 }, { "epoch": 0.5727160404099135, "grad_norm": 1.7875639200210571, "learning_rate": 4.997863182469219e-05, "loss": 0.1674, "step": 15760 }, { "epoch": 0.5730794389127116, "grad_norm": 1.1852960586547852, "learning_rate": 4.9978416777228216e-05, "loss": 0.1968, "step": 15770 }, { "epoch": 0.5734428374155098, "grad_norm": 1.253061294555664, "learning_rate": 4.997820065353314e-05, "loss": 0.2177, "step": 15780 }, { "epoch": 0.573806235918308, "grad_norm": 2.0577871799468994, "learning_rate": 4.9977983453616266e-05, "loss": 0.1498, "step": 15790 }, { "epoch": 0.5741696344211061, "grad_norm": 7.4168901443481445, "learning_rate": 4.997776517748696e-05, "loss": 0.3137, "step": 15800 }, { "epoch": 0.5745330329239043, "grad_norm": 2.9957845211029053, "learning_rate": 4.9977545825154625e-05, "loss": 0.1819, "step": 15810 }, { "epoch": 0.5748964314267025, "grad_norm": 1.251610517501831, "learning_rate": 4.997732539662871e-05, "loss": 0.1633, "step": 15820 }, { "epoch": 0.5752598299295006, "grad_norm": 3.229581594467163, "learning_rate": 4.997710389191871e-05, "loss": 0.1888, "step": 15830 }, { "epoch": 0.5756232284322989, "grad_norm": 1.2718089818954468, "learning_rate": 4.997688131103417e-05, "loss": 0.1938, "step": 15840 }, { "epoch": 0.5759866269350971, "grad_norm": 4.77078104019165, "learning_rate": 4.9976657653984694e-05, "loss": 0.2311, "step": 15850 }, { "epoch": 0.5763500254378952, "grad_norm": 1.9487907886505127, "learning_rate": 4.9976432920779904e-05, "loss": 0.7679, "step": 15860 }, { "epoch": 0.5767134239406934, "grad_norm": 2.1322100162506104, "learning_rate": 4.997620711142948e-05, "loss": 0.204, "step": 15870 }, { "epoch": 0.5770768224434916, "grad_norm": 3.0756008625030518, "learning_rate": 4.997598022594316e-05, "loss": 0.205, "step": 15880 }, { "epoch": 0.5774402209462897, "grad_norm": 2.2399511337280273, "learning_rate": 4.997575226433071e-05, "loss": 0.183, "step": 15890 }, { "epoch": 0.5778036194490879, "grad_norm": 4.17095947265625, "learning_rate": 4.997552322660197e-05, "loss": 0.2209, "step": 15900 }, { "epoch": 0.5781670179518861, "grad_norm": 1.7085528373718262, "learning_rate": 4.9975293112766794e-05, "loss": 0.4196, "step": 15910 }, { "epoch": 0.5785304164546842, "grad_norm": 1.6818984746932983, "learning_rate": 4.99750619228351e-05, "loss": 0.1592, "step": 15920 }, { "epoch": 0.5788938149574824, "grad_norm": 2.526503324508667, "learning_rate": 4.9974829656816846e-05, "loss": 0.7523, "step": 15930 }, { "epoch": 0.5792572134602806, "grad_norm": 4.456855297088623, "learning_rate": 4.997459631472205e-05, "loss": 0.1664, "step": 15940 }, { "epoch": 0.5796206119630787, "grad_norm": 28.427839279174805, "learning_rate": 4.9974361896560746e-05, "loss": 0.2891, "step": 15950 }, { "epoch": 0.5799840104658769, "grad_norm": 1.6720882654190063, "learning_rate": 4.997412640234306e-05, "loss": 0.1522, "step": 15960 }, { "epoch": 0.5803474089686751, "grad_norm": 1.6327390670776367, "learning_rate": 4.997388983207911e-05, "loss": 0.1957, "step": 15970 }, { "epoch": 0.5807108074714732, "grad_norm": 1.5792416334152222, "learning_rate": 4.997365218577912e-05, "loss": 0.2325, "step": 15980 }, { "epoch": 0.5810742059742714, "grad_norm": 1.7585738897323608, "learning_rate": 4.9973413463453305e-05, "loss": 0.2023, "step": 15990 }, { "epoch": 0.5814376044770696, "grad_norm": 8.14810562133789, "learning_rate": 4.997317366511196e-05, "loss": 0.2093, "step": 16000 }, { "epoch": 0.5818010029798677, "grad_norm": 3.1430416107177734, "learning_rate": 4.997293279076543e-05, "loss": 0.1742, "step": 16010 }, { "epoch": 0.5821644014826659, "grad_norm": 1.9447312355041504, "learning_rate": 4.997269084042406e-05, "loss": 0.1852, "step": 16020 }, { "epoch": 0.5825277999854641, "grad_norm": 2.1479732990264893, "learning_rate": 4.997244781409831e-05, "loss": 0.2197, "step": 16030 }, { "epoch": 0.5828911984882622, "grad_norm": 3.7066800594329834, "learning_rate": 4.9972203711798625e-05, "loss": 0.1899, "step": 16040 }, { "epoch": 0.5832545969910604, "grad_norm": 4.3598432540893555, "learning_rate": 4.9971958533535544e-05, "loss": 0.237, "step": 16050 }, { "epoch": 0.5836179954938585, "grad_norm": 3.410356283187866, "learning_rate": 4.997171227931962e-05, "loss": 0.1985, "step": 16060 }, { "epoch": 0.5839813939966567, "grad_norm": 1.6299129724502563, "learning_rate": 4.9971464949161454e-05, "loss": 0.1758, "step": 16070 }, { "epoch": 0.5843447924994549, "grad_norm": 1.993067979812622, "learning_rate": 4.9971216543071716e-05, "loss": 0.1822, "step": 16080 }, { "epoch": 0.584708191002253, "grad_norm": 1.2057979106903076, "learning_rate": 4.9970967061061104e-05, "loss": 0.1953, "step": 16090 }, { "epoch": 0.5850715895050512, "grad_norm": 34.54500961303711, "learning_rate": 4.997071650314037e-05, "loss": 0.277, "step": 16100 }, { "epoch": 0.5854349880078494, "grad_norm": 1.243656039237976, "learning_rate": 4.997046486932031e-05, "loss": 0.172, "step": 16110 }, { "epoch": 0.5857983865106475, "grad_norm": 0.6155187487602234, "learning_rate": 4.997021215961176e-05, "loss": 0.1523, "step": 16120 }, { "epoch": 0.5861617850134457, "grad_norm": 2.0203208923339844, "learning_rate": 4.9969958374025615e-05, "loss": 0.1857, "step": 16130 }, { "epoch": 0.5865251835162439, "grad_norm": 1.1912654638290405, "learning_rate": 4.9969703512572805e-05, "loss": 0.2169, "step": 16140 }, { "epoch": 0.586888582019042, "grad_norm": 3.528538227081299, "learning_rate": 4.9969447575264315e-05, "loss": 0.2428, "step": 16150 }, { "epoch": 0.5872519805218402, "grad_norm": 0.9166990518569946, "learning_rate": 4.996919056211117e-05, "loss": 0.2206, "step": 16160 }, { "epoch": 0.5876153790246385, "grad_norm": 1.4956426620483398, "learning_rate": 4.996893247312444e-05, "loss": 0.168, "step": 16170 }, { "epoch": 0.5879787775274365, "grad_norm": 1.4502993822097778, "learning_rate": 4.996867330831526e-05, "loss": 0.1767, "step": 16180 }, { "epoch": 0.5883421760302348, "grad_norm": 0.9337482452392578, "learning_rate": 4.9968413067694775e-05, "loss": 0.2035, "step": 16190 }, { "epoch": 0.588705574533033, "grad_norm": 13.500269889831543, "learning_rate": 4.996815175127422e-05, "loss": 0.2335, "step": 16200 }, { "epoch": 0.588705574533033, "eval_loss": 0.4428017735481262, "eval_runtime": 180.2159, "eval_samples_per_second": 41.14, "eval_steps_per_second": 5.144, "eval_wer": 0.22281118957285748, "step": 16200 }, { "epoch": 0.5890689730358311, "grad_norm": 5.091770648956299, "learning_rate": 4.996788935906483e-05, "loss": 0.1884, "step": 16210 }, { "epoch": 0.5894323715386293, "grad_norm": 2.379033327102661, "learning_rate": 4.996762589107793e-05, "loss": 0.196, "step": 16220 }, { "epoch": 0.5897957700414275, "grad_norm": 2.576484203338623, "learning_rate": 4.996736134732487e-05, "loss": 0.2069, "step": 16230 }, { "epoch": 0.5901591685442256, "grad_norm": 1.4235923290252686, "learning_rate": 4.9967095727817035e-05, "loss": 0.1905, "step": 16240 }, { "epoch": 0.5905225670470238, "grad_norm": 7.119918346405029, "learning_rate": 4.9966829032565886e-05, "loss": 0.2803, "step": 16250 }, { "epoch": 0.590885965549822, "grad_norm": 1.1050286293029785, "learning_rate": 4.99665612615829e-05, "loss": 0.1914, "step": 16260 }, { "epoch": 0.5912493640526201, "grad_norm": 1.403601884841919, "learning_rate": 4.9966292414879625e-05, "loss": 0.1649, "step": 16270 }, { "epoch": 0.5916127625554183, "grad_norm": 5.439052104949951, "learning_rate": 4.9966022492467635e-05, "loss": 0.1897, "step": 16280 }, { "epoch": 0.5919761610582165, "grad_norm": 1.0014379024505615, "learning_rate": 4.996575149435857e-05, "loss": 0.1472, "step": 16290 }, { "epoch": 0.5923395595610146, "grad_norm": 9.480517387390137, "learning_rate": 4.99654794205641e-05, "loss": 0.2351, "step": 16300 }, { "epoch": 0.5927029580638128, "grad_norm": 0.8406987190246582, "learning_rate": 4.9965206271095955e-05, "loss": 0.1795, "step": 16310 }, { "epoch": 0.593066356566611, "grad_norm": 1.378169298171997, "learning_rate": 4.996493204596589e-05, "loss": 0.1597, "step": 16320 }, { "epoch": 0.5934297550694091, "grad_norm": 3.9748549461364746, "learning_rate": 4.996465674518573e-05, "loss": 0.2264, "step": 16330 }, { "epoch": 0.5937931535722073, "grad_norm": 2.2626171112060547, "learning_rate": 4.996438036876734e-05, "loss": 0.1647, "step": 16340 }, { "epoch": 0.5941565520750055, "grad_norm": 3.8039205074310303, "learning_rate": 4.996410291672262e-05, "loss": 0.2204, "step": 16350 }, { "epoch": 0.5945199505778036, "grad_norm": 1.5219416618347168, "learning_rate": 4.996382438906353e-05, "loss": 0.1518, "step": 16360 }, { "epoch": 0.5948833490806018, "grad_norm": 1.4811570644378662, "learning_rate": 4.9963544785802064e-05, "loss": 0.2006, "step": 16370 }, { "epoch": 0.5952467475833999, "grad_norm": 4.7030558586120605, "learning_rate": 4.996326410695028e-05, "loss": 0.2524, "step": 16380 }, { "epoch": 0.5956101460861981, "grad_norm": 1.103624939918518, "learning_rate": 4.996298235252026e-05, "loss": 0.1558, "step": 16390 }, { "epoch": 0.5959735445889963, "grad_norm": 4.654818534851074, "learning_rate": 4.996269952252415e-05, "loss": 0.2746, "step": 16400 }, { "epoch": 0.5963369430917944, "grad_norm": 1.6746747493743896, "learning_rate": 4.996241561697413e-05, "loss": 0.1838, "step": 16410 }, { "epoch": 0.5967003415945926, "grad_norm": 3.1955924034118652, "learning_rate": 4.996213063588245e-05, "loss": 0.1773, "step": 16420 }, { "epoch": 0.5970637400973908, "grad_norm": 1.782669186592102, "learning_rate": 4.996184457926137e-05, "loss": 0.1939, "step": 16430 }, { "epoch": 0.5974271386001889, "grad_norm": 1.2277849912643433, "learning_rate": 4.996155744712322e-05, "loss": 0.1724, "step": 16440 }, { "epoch": 0.5977905371029871, "grad_norm": 25.578798294067383, "learning_rate": 4.996126923948038e-05, "loss": 0.2612, "step": 16450 }, { "epoch": 0.5981539356057853, "grad_norm": 0.984426736831665, "learning_rate": 4.9960979956345254e-05, "loss": 0.1621, "step": 16460 }, { "epoch": 0.5985173341085834, "grad_norm": 2.1299145221710205, "learning_rate": 4.9960689597730315e-05, "loss": 0.161, "step": 16470 }, { "epoch": 0.5988807326113816, "grad_norm": 2.6153085231781006, "learning_rate": 4.996039816364807e-05, "loss": 0.2122, "step": 16480 }, { "epoch": 0.5992441311141798, "grad_norm": 4.464552879333496, "learning_rate": 4.996010565411108e-05, "loss": 0.2417, "step": 16490 }, { "epoch": 0.5996075296169779, "grad_norm": 26.441349029541016, "learning_rate": 4.995981206913194e-05, "loss": 0.3103, "step": 16500 }, { "epoch": 0.5999709281197761, "grad_norm": 2.353302478790283, "learning_rate": 4.995951740872331e-05, "loss": 1.0256, "step": 16510 }, { "epoch": 0.6003343266225744, "grad_norm": 0.8436356782913208, "learning_rate": 4.995922167289788e-05, "loss": 0.1563, "step": 16520 }, { "epoch": 0.6006977251253725, "grad_norm": 3.3516342639923096, "learning_rate": 4.99589248616684e-05, "loss": 0.2441, "step": 16530 }, { "epoch": 0.6010611236281707, "grad_norm": 2.0286059379577637, "learning_rate": 4.995862697504764e-05, "loss": 0.1767, "step": 16540 }, { "epoch": 0.6014245221309689, "grad_norm": 18.248151779174805, "learning_rate": 4.9958328013048464e-05, "loss": 0.3522, "step": 16550 }, { "epoch": 0.601787920633767, "grad_norm": 2.1514463424682617, "learning_rate": 4.995802797568372e-05, "loss": 0.1771, "step": 16560 }, { "epoch": 0.6021513191365652, "grad_norm": 5.868020534515381, "learning_rate": 4.995772686296635e-05, "loss": 0.1776, "step": 16570 }, { "epoch": 0.6025147176393634, "grad_norm": 4.539637565612793, "learning_rate": 4.9957424674909336e-05, "loss": 0.2002, "step": 16580 }, { "epoch": 0.6028781161421615, "grad_norm": 1.7226190567016602, "learning_rate": 4.99571214115257e-05, "loss": 0.1927, "step": 16590 }, { "epoch": 0.6032415146449597, "grad_norm": 22.087247848510742, "learning_rate": 4.9956817072828485e-05, "loss": 0.249, "step": 16600 }, { "epoch": 0.6036049131477579, "grad_norm": 2.4267120361328125, "learning_rate": 4.995651165883083e-05, "loss": 0.1935, "step": 16610 }, { "epoch": 0.603968311650556, "grad_norm": 2.5284249782562256, "learning_rate": 4.995620516954588e-05, "loss": 0.1495, "step": 16620 }, { "epoch": 0.6043317101533542, "grad_norm": 1.5988596677780151, "learning_rate": 4.995589760498684e-05, "loss": 0.2329, "step": 16630 }, { "epoch": 0.6046951086561524, "grad_norm": 1.0771689414978027, "learning_rate": 4.9955588965166966e-05, "loss": 0.1634, "step": 16640 }, { "epoch": 0.6050585071589505, "grad_norm": 8.72423267364502, "learning_rate": 4.995527925009956e-05, "loss": 0.27, "step": 16650 }, { "epoch": 0.6054219056617487, "grad_norm": 1.3176789283752441, "learning_rate": 4.9954968459797955e-05, "loss": 1.1913, "step": 16660 }, { "epoch": 0.6057853041645468, "grad_norm": 1.8307547569274902, "learning_rate": 4.9954656594275555e-05, "loss": 0.188, "step": 16670 }, { "epoch": 0.606148702667345, "grad_norm": 2.783604621887207, "learning_rate": 4.9954343653545795e-05, "loss": 0.1791, "step": 16680 }, { "epoch": 0.6065121011701432, "grad_norm": 1.6639970541000366, "learning_rate": 4.9954029637622146e-05, "loss": 0.1829, "step": 16690 }, { "epoch": 0.6068754996729413, "grad_norm": 11.055110931396484, "learning_rate": 4.995371454651815e-05, "loss": 0.2229, "step": 16700 }, { "epoch": 0.6072388981757395, "grad_norm": 1.8166972398757935, "learning_rate": 4.9953398380247384e-05, "loss": 0.1734, "step": 16710 }, { "epoch": 0.6076022966785377, "grad_norm": 4.851889610290527, "learning_rate": 4.995308113882346e-05, "loss": 0.1716, "step": 16720 }, { "epoch": 0.6079656951813358, "grad_norm": 3.0047857761383057, "learning_rate": 4.9952762822260056e-05, "loss": 0.2125, "step": 16730 }, { "epoch": 0.608329093684134, "grad_norm": 1.1506407260894775, "learning_rate": 4.9952443430570887e-05, "loss": 0.171, "step": 16740 }, { "epoch": 0.6086924921869322, "grad_norm": 4.324979782104492, "learning_rate": 4.995212296376971e-05, "loss": 0.2365, "step": 16750 }, { "epoch": 0.6090558906897303, "grad_norm": 1.2295490503311157, "learning_rate": 4.995180142187033e-05, "loss": 0.2002, "step": 16760 }, { "epoch": 0.6094192891925285, "grad_norm": 1.454434871673584, "learning_rate": 4.995147880488661e-05, "loss": 0.1602, "step": 16770 }, { "epoch": 0.6097826876953267, "grad_norm": 2.6185641288757324, "learning_rate": 4.995115511283244e-05, "loss": 0.1904, "step": 16780 }, { "epoch": 0.6101460861981248, "grad_norm": 1.2603826522827148, "learning_rate": 4.9950830345721774e-05, "loss": 0.1892, "step": 16790 }, { "epoch": 0.610509484700923, "grad_norm": 14.189190864562988, "learning_rate": 4.9950504503568615e-05, "loss": 0.281, "step": 16800 }, { "epoch": 0.610509484700923, "eval_loss": 0.4123116433620453, "eval_runtime": 179.5081, "eval_samples_per_second": 41.302, "eval_steps_per_second": 5.164, "eval_wer": 0.22986367019441972, "step": 16800 }, { "epoch": 0.6108728832037212, "grad_norm": 1.7670204639434814, "learning_rate": 4.995017758638698e-05, "loss": 0.2581, "step": 16810 }, { "epoch": 0.6112362817065193, "grad_norm": 1.2099360227584839, "learning_rate": 4.9949849594190964e-05, "loss": 0.1762, "step": 16820 }, { "epoch": 0.6115996802093175, "grad_norm": 2.7719335556030273, "learning_rate": 4.9949520526994716e-05, "loss": 0.1867, "step": 16830 }, { "epoch": 0.6119630787121157, "grad_norm": 1.5935924053192139, "learning_rate": 4.9949190384812386e-05, "loss": 0.191, "step": 16840 }, { "epoch": 0.6123264772149138, "grad_norm": 3.511439085006714, "learning_rate": 4.994885916765821e-05, "loss": 0.2132, "step": 16850 }, { "epoch": 0.612689875717712, "grad_norm": 1.693789005279541, "learning_rate": 4.994852687554647e-05, "loss": 0.167, "step": 16860 }, { "epoch": 0.6130532742205103, "grad_norm": 2.1199066638946533, "learning_rate": 4.994819350849147e-05, "loss": 0.172, "step": 16870 }, { "epoch": 0.6134166727233084, "grad_norm": 2.724487543106079, "learning_rate": 4.9947859066507575e-05, "loss": 0.2083, "step": 16880 }, { "epoch": 0.6137800712261066, "grad_norm": 0.926547110080719, "learning_rate": 4.99475235496092e-05, "loss": 0.1517, "step": 16890 }, { "epoch": 0.6141434697289048, "grad_norm": 14.503059387207031, "learning_rate": 4.99471869578108e-05, "loss": 0.1945, "step": 16900 }, { "epoch": 0.6145068682317029, "grad_norm": 3.2206919193267822, "learning_rate": 4.994684929112687e-05, "loss": 0.1882, "step": 16910 }, { "epoch": 0.6148702667345011, "grad_norm": 2.004995107650757, "learning_rate": 4.994651054957198e-05, "loss": 0.1876, "step": 16920 }, { "epoch": 0.6152336652372993, "grad_norm": 2.0580127239227295, "learning_rate": 4.99461707331607e-05, "loss": 0.2104, "step": 16930 }, { "epoch": 0.6155970637400974, "grad_norm": 3.3028602600097656, "learning_rate": 4.9945829841907684e-05, "loss": 0.1494, "step": 16940 }, { "epoch": 0.6159604622428956, "grad_norm": 7.572249412536621, "learning_rate": 4.994548787582761e-05, "loss": 0.2381, "step": 16950 }, { "epoch": 0.6163238607456937, "grad_norm": 1.4220709800720215, "learning_rate": 4.9945144834935234e-05, "loss": 0.1916, "step": 16960 }, { "epoch": 0.6166872592484919, "grad_norm": 1.2397724390029907, "learning_rate": 4.994480071924531e-05, "loss": 0.1593, "step": 16970 }, { "epoch": 0.6170506577512901, "grad_norm": 2.2569403648376465, "learning_rate": 4.9944455528772684e-05, "loss": 0.1984, "step": 16980 }, { "epoch": 0.6174140562540882, "grad_norm": 1.811727523803711, "learning_rate": 4.994410926353221e-05, "loss": 0.1838, "step": 16990 }, { "epoch": 0.6177774547568864, "grad_norm": 2.783061981201172, "learning_rate": 4.9943761923538834e-05, "loss": 0.2217, "step": 17000 }, { "epoch": 0.6181408532596846, "grad_norm": 2.816331148147583, "learning_rate": 4.99434135088075e-05, "loss": 0.1911, "step": 17010 }, { "epoch": 0.6185042517624827, "grad_norm": 1.238916039466858, "learning_rate": 4.9943064019353234e-05, "loss": 0.1854, "step": 17020 }, { "epoch": 0.6188676502652809, "grad_norm": 5.16685152053833, "learning_rate": 4.9942713455191075e-05, "loss": 0.1797, "step": 17030 }, { "epoch": 0.6192310487680791, "grad_norm": 2.486461639404297, "learning_rate": 4.9942361816336146e-05, "loss": 0.1926, "step": 17040 }, { "epoch": 0.6195944472708772, "grad_norm": 9.018515586853027, "learning_rate": 4.994200910280359e-05, "loss": 0.2193, "step": 17050 }, { "epoch": 0.6199578457736754, "grad_norm": 1.804166555404663, "learning_rate": 4.994165531460861e-05, "loss": 0.1977, "step": 17060 }, { "epoch": 0.6203212442764736, "grad_norm": 1.2862845659255981, "learning_rate": 4.994130045176644e-05, "loss": 0.1493, "step": 17070 }, { "epoch": 0.6206846427792717, "grad_norm": 4.164750576019287, "learning_rate": 4.994094451429237e-05, "loss": 0.2548, "step": 17080 }, { "epoch": 0.6210480412820699, "grad_norm": 1.577255368232727, "learning_rate": 4.994058750220176e-05, "loss": 0.1703, "step": 17090 }, { "epoch": 0.6214114397848681, "grad_norm": 5.805021286010742, "learning_rate": 4.994022941550996e-05, "loss": 0.2976, "step": 17100 }, { "epoch": 0.6217748382876662, "grad_norm": 0.9706230163574219, "learning_rate": 4.993987025423241e-05, "loss": 0.1454, "step": 17110 }, { "epoch": 0.6221382367904644, "grad_norm": 1.4393014907836914, "learning_rate": 4.993951001838459e-05, "loss": 0.1496, "step": 17120 }, { "epoch": 0.6225016352932626, "grad_norm": 1.839086651802063, "learning_rate": 4.993914870798202e-05, "loss": 0.2256, "step": 17130 }, { "epoch": 0.6228650337960607, "grad_norm": 1.8924603462219238, "learning_rate": 4.993878632304027e-05, "loss": 0.1415, "step": 17140 }, { "epoch": 0.6232284322988589, "grad_norm": 12.03149700164795, "learning_rate": 4.993842286357494e-05, "loss": 0.7236, "step": 17150 }, { "epoch": 0.6235918308016571, "grad_norm": 2.0251877307891846, "learning_rate": 4.993805832960171e-05, "loss": 0.1913, "step": 17160 }, { "epoch": 0.6239552293044552, "grad_norm": 2.341251850128174, "learning_rate": 4.993769272113628e-05, "loss": 0.1734, "step": 17170 }, { "epoch": 0.6243186278072534, "grad_norm": 2.517820358276367, "learning_rate": 4.993732603819438e-05, "loss": 0.18, "step": 17180 }, { "epoch": 0.6246820263100517, "grad_norm": 1.6384356021881104, "learning_rate": 4.993695828079184e-05, "loss": 0.1513, "step": 17190 }, { "epoch": 0.6250454248128497, "grad_norm": 10.794693946838379, "learning_rate": 4.993658944894449e-05, "loss": 0.2282, "step": 17200 }, { "epoch": 0.625408823315648, "grad_norm": 1.2552087306976318, "learning_rate": 4.9936219542668236e-05, "loss": 0.1938, "step": 17210 }, { "epoch": 0.6257722218184462, "grad_norm": 2.423431634902954, "learning_rate": 4.993584856197899e-05, "loss": 0.1487, "step": 17220 }, { "epoch": 0.6261356203212443, "grad_norm": 1.7924834489822388, "learning_rate": 4.9935476506892763e-05, "loss": 0.195, "step": 17230 }, { "epoch": 0.6264990188240425, "grad_norm": 1.6521999835968018, "learning_rate": 4.9935103377425566e-05, "loss": 0.1652, "step": 17240 }, { "epoch": 0.6268624173268406, "grad_norm": 6.472127437591553, "learning_rate": 4.9934729173593494e-05, "loss": 0.2481, "step": 17250 }, { "epoch": 0.6272258158296388, "grad_norm": 1.8962410688400269, "learning_rate": 4.993435389541265e-05, "loss": 0.1487, "step": 17260 }, { "epoch": 0.627589214332437, "grad_norm": 1.2054486274719238, "learning_rate": 4.993397754289922e-05, "loss": 0.1496, "step": 17270 }, { "epoch": 0.6279526128352351, "grad_norm": 3.9840786457061768, "learning_rate": 4.993360011606941e-05, "loss": 0.1776, "step": 17280 }, { "epoch": 0.6283160113380333, "grad_norm": 0.9625970125198364, "learning_rate": 4.9933221614939485e-05, "loss": 0.1652, "step": 17290 }, { "epoch": 0.6286794098408315, "grad_norm": 11.166252136230469, "learning_rate": 4.993284203952575e-05, "loss": 0.233, "step": 17300 }, { "epoch": 0.6290428083436296, "grad_norm": 2.356268882751465, "learning_rate": 4.9932461389844566e-05, "loss": 0.1498, "step": 17310 }, { "epoch": 0.6294062068464278, "grad_norm": 0.9366337656974792, "learning_rate": 4.993207966591234e-05, "loss": 0.1483, "step": 17320 }, { "epoch": 0.629769605349226, "grad_norm": 5.854847431182861, "learning_rate": 4.9931696867745495e-05, "loss": 0.1603, "step": 17330 }, { "epoch": 0.6301330038520241, "grad_norm": 1.0090773105621338, "learning_rate": 4.9931312995360546e-05, "loss": 0.1475, "step": 17340 }, { "epoch": 0.6304964023548223, "grad_norm": 3.896676540374756, "learning_rate": 4.9930928048774024e-05, "loss": 0.244, "step": 17350 }, { "epoch": 0.6308598008576205, "grad_norm": 1.1872800588607788, "learning_rate": 4.993054202800252e-05, "loss": 0.1618, "step": 17360 }, { "epoch": 0.6312231993604186, "grad_norm": 1.8078994750976562, "learning_rate": 4.9930154933062654e-05, "loss": 0.1554, "step": 17370 }, { "epoch": 0.6315865978632168, "grad_norm": 1.8264563083648682, "learning_rate": 4.9929766763971126e-05, "loss": 0.162, "step": 17380 }, { "epoch": 0.631949996366015, "grad_norm": 0.6304519176483154, "learning_rate": 4.992937752074465e-05, "loss": 0.209, "step": 17390 }, { "epoch": 0.6323133948688131, "grad_norm": 4.7621917724609375, "learning_rate": 4.992898720339998e-05, "loss": 0.2393, "step": 17400 }, { "epoch": 0.6323133948688131, "eval_loss": 0.3943130671977997, "eval_runtime": 180.0553, "eval_samples_per_second": 41.176, "eval_steps_per_second": 5.148, "eval_wer": 0.21144734692395664, "step": 17400 }, { "epoch": 0.6326767933716113, "grad_norm": 1.1110138893127441, "learning_rate": 4.992859581195396e-05, "loss": 0.143, "step": 17410 }, { "epoch": 0.6330401918744095, "grad_norm": 1.2453794479370117, "learning_rate": 4.992820334642344e-05, "loss": 0.1454, "step": 17420 }, { "epoch": 0.6334035903772076, "grad_norm": 3.669144630432129, "learning_rate": 4.9927809806825335e-05, "loss": 0.2496, "step": 17430 }, { "epoch": 0.6337669888800058, "grad_norm": 2.7898483276367188, "learning_rate": 4.99274151931766e-05, "loss": 0.1614, "step": 17440 }, { "epoch": 0.634130387382804, "grad_norm": 6.725431442260742, "learning_rate": 4.992701950549423e-05, "loss": 0.2622, "step": 17450 }, { "epoch": 0.6344937858856021, "grad_norm": 1.6481575965881348, "learning_rate": 4.992662274379528e-05, "loss": 0.1713, "step": 17460 }, { "epoch": 0.6348571843884003, "grad_norm": 1.3567384481430054, "learning_rate": 4.9926224908096856e-05, "loss": 0.1725, "step": 17470 }, { "epoch": 0.6352205828911985, "grad_norm": 1.8207722902297974, "learning_rate": 4.9925825998416076e-05, "loss": 0.1973, "step": 17480 }, { "epoch": 0.6355839813939966, "grad_norm": 2.2345893383026123, "learning_rate": 4.9925426014770146e-05, "loss": 0.1847, "step": 17490 }, { "epoch": 0.6359473798967948, "grad_norm": 7.193591594696045, "learning_rate": 4.992502495717629e-05, "loss": 0.2605, "step": 17500 }, { "epoch": 0.636310778399593, "grad_norm": 1.346073865890503, "learning_rate": 4.99246228256518e-05, "loss": 0.1518, "step": 17510 }, { "epoch": 0.6366741769023911, "grad_norm": 1.5637879371643066, "learning_rate": 4.9924219620213995e-05, "loss": 0.1648, "step": 17520 }, { "epoch": 0.6370375754051893, "grad_norm": 3.2450170516967773, "learning_rate": 4.9923815340880236e-05, "loss": 0.1974, "step": 17530 }, { "epoch": 0.6374009739079874, "grad_norm": 0.9553948640823364, "learning_rate": 4.992340998766796e-05, "loss": 0.1694, "step": 17540 }, { "epoch": 0.6377643724107857, "grad_norm": 8.901055335998535, "learning_rate": 4.9923003560594625e-05, "loss": 0.2625, "step": 17550 }, { "epoch": 0.6381277709135839, "grad_norm": 1.7500522136688232, "learning_rate": 4.992259605967774e-05, "loss": 0.1799, "step": 17560 }, { "epoch": 0.638491169416382, "grad_norm": 1.4673160314559937, "learning_rate": 4.9922187484934865e-05, "loss": 0.1698, "step": 17570 }, { "epoch": 0.6388545679191802, "grad_norm": 2.5377135276794434, "learning_rate": 4.992177783638361e-05, "loss": 0.1822, "step": 17580 }, { "epoch": 0.6392179664219784, "grad_norm": 1.660311222076416, "learning_rate": 4.9921367114041625e-05, "loss": 0.1659, "step": 17590 }, { "epoch": 0.6395813649247765, "grad_norm": 8.248649597167969, "learning_rate": 4.9920955317926595e-05, "loss": 0.2384, "step": 17600 }, { "epoch": 0.6399447634275747, "grad_norm": 1.5581409931182861, "learning_rate": 4.992054244805627e-05, "loss": 0.1665, "step": 17610 }, { "epoch": 0.6403081619303729, "grad_norm": 0.9654737710952759, "learning_rate": 4.992012850444844e-05, "loss": 0.3493, "step": 17620 }, { "epoch": 0.640671560433171, "grad_norm": 3.4477317333221436, "learning_rate": 4.9919713487120935e-05, "loss": 0.2097, "step": 17630 }, { "epoch": 0.6410349589359692, "grad_norm": 1.3745356798171997, "learning_rate": 4.9919297396091634e-05, "loss": 0.1459, "step": 17640 }, { "epoch": 0.6413983574387674, "grad_norm": 4.813534259796143, "learning_rate": 4.991888023137849e-05, "loss": 0.1905, "step": 17650 }, { "epoch": 0.6417617559415655, "grad_norm": 3.118452310562134, "learning_rate": 4.9918461992999445e-05, "loss": 0.1527, "step": 17660 }, { "epoch": 0.6421251544443637, "grad_norm": 1.8424941301345825, "learning_rate": 4.991804268097253e-05, "loss": 0.1759, "step": 17670 }, { "epoch": 0.6424885529471619, "grad_norm": 7.301458835601807, "learning_rate": 4.9917622295315826e-05, "loss": 0.1662, "step": 17680 }, { "epoch": 0.64285195144996, "grad_norm": 3.133114814758301, "learning_rate": 4.991720083604743e-05, "loss": 0.1692, "step": 17690 }, { "epoch": 0.6432153499527582, "grad_norm": 11.538620948791504, "learning_rate": 4.99167783031855e-05, "loss": 0.2443, "step": 17700 }, { "epoch": 0.6435787484555564, "grad_norm": 1.3739595413208008, "learning_rate": 4.991635469674825e-05, "loss": 0.1465, "step": 17710 }, { "epoch": 0.6439421469583545, "grad_norm": 1.6855549812316895, "learning_rate": 4.991593001675393e-05, "loss": 0.1819, "step": 17720 }, { "epoch": 0.6443055454611527, "grad_norm": 1.692335844039917, "learning_rate": 4.991550426322083e-05, "loss": 0.1654, "step": 17730 }, { "epoch": 0.6446689439639509, "grad_norm": 1.1132971048355103, "learning_rate": 4.9915077436167313e-05, "loss": 0.1688, "step": 17740 }, { "epoch": 0.645032342466749, "grad_norm": 5.6813201904296875, "learning_rate": 4.9914649535611756e-05, "loss": 0.2235, "step": 17750 }, { "epoch": 0.6453957409695472, "grad_norm": 1.5107471942901611, "learning_rate": 4.99142205615726e-05, "loss": 0.1747, "step": 17760 }, { "epoch": 0.6457591394723454, "grad_norm": 2.4552764892578125, "learning_rate": 4.9913790514068316e-05, "loss": 0.1739, "step": 17770 }, { "epoch": 0.6461225379751435, "grad_norm": 1.5664808750152588, "learning_rate": 4.991335939311744e-05, "loss": 0.1766, "step": 17780 }, { "epoch": 0.6464859364779417, "grad_norm": 2.935850143432617, "learning_rate": 4.9912927198738556e-05, "loss": 0.2148, "step": 17790 }, { "epoch": 0.6468493349807399, "grad_norm": 10.267364501953125, "learning_rate": 4.991249393095028e-05, "loss": 0.2521, "step": 17800 }, { "epoch": 0.647212733483538, "grad_norm": 1.3392564058303833, "learning_rate": 4.9912059589771274e-05, "loss": 0.172, "step": 17810 }, { "epoch": 0.6475761319863362, "grad_norm": 0.895491361618042, "learning_rate": 4.991162417522026e-05, "loss": 0.1379, "step": 17820 }, { "epoch": 0.6479395304891343, "grad_norm": 2.536397695541382, "learning_rate": 4.9911187687315997e-05, "loss": 0.1477, "step": 17830 }, { "epoch": 0.6483029289919325, "grad_norm": 1.7795464992523193, "learning_rate": 4.9910750126077296e-05, "loss": 0.1786, "step": 17840 }, { "epoch": 0.6486663274947307, "grad_norm": 62.683929443359375, "learning_rate": 4.9910311491523e-05, "loss": 0.266, "step": 17850 }, { "epoch": 0.6490297259975288, "grad_norm": 2.0866358280181885, "learning_rate": 4.990987178367201e-05, "loss": 0.1428, "step": 17860 }, { "epoch": 0.649393124500327, "grad_norm": 1.5636661052703857, "learning_rate": 4.990943100254328e-05, "loss": 0.1845, "step": 17870 }, { "epoch": 0.6497565230031253, "grad_norm": 3.540689468383789, "learning_rate": 4.9908989148155796e-05, "loss": 0.2348, "step": 17880 }, { "epoch": 0.6501199215059233, "grad_norm": 1.720421314239502, "learning_rate": 4.990854622052859e-05, "loss": 0.1742, "step": 17890 }, { "epoch": 0.6504833200087216, "grad_norm": 7.7201056480407715, "learning_rate": 4.9908102219680756e-05, "loss": 0.2573, "step": 17900 }, { "epoch": 0.6508467185115198, "grad_norm": 3.826190948486328, "learning_rate": 4.9907701701329876e-05, "loss": 3.6024, "step": 17910 }, { "epoch": 0.6512101170143179, "grad_norm": 2.047307252883911, "learning_rate": 4.990725566141558e-05, "loss": 0.1551, "step": 17920 }, { "epoch": 0.6515735155171161, "grad_norm": 6.462743282318115, "learning_rate": 4.990680854833626e-05, "loss": 0.2109, "step": 17930 }, { "epoch": 0.6519369140199143, "grad_norm": 1.7611109018325806, "learning_rate": 4.9906360362111184e-05, "loss": 0.1959, "step": 17940 }, { "epoch": 0.6523003125227124, "grad_norm": 5.253514766693115, "learning_rate": 4.9905911102759655e-05, "loss": 0.2436, "step": 17950 }, { "epoch": 0.6526637110255106, "grad_norm": 0.9357771873474121, "learning_rate": 4.9905460770301035e-05, "loss": 0.1664, "step": 17960 }, { "epoch": 0.6530271095283088, "grad_norm": 1.219488263130188, "learning_rate": 4.990500936475472e-05, "loss": 0.2286, "step": 17970 }, { "epoch": 0.6533905080311069, "grad_norm": 2.8499608039855957, "learning_rate": 4.990455688614016e-05, "loss": 0.2664, "step": 17980 }, { "epoch": 0.6537539065339051, "grad_norm": 1.5652077198028564, "learning_rate": 4.990410333447686e-05, "loss": 0.1341, "step": 17990 }, { "epoch": 0.6541173050367033, "grad_norm": 5.98219633102417, "learning_rate": 4.9903648709784356e-05, "loss": 0.2338, "step": 18000 }, { "epoch": 0.6541173050367033, "eval_loss": 0.37892404198646545, "eval_runtime": 180.0524, "eval_samples_per_second": 41.177, "eval_steps_per_second": 5.149, "eval_wer": 0.200864087715795, "step": 18000 }, { "epoch": 0.6544807035395014, "grad_norm": 2.105100154876709, "learning_rate": 4.990319301208223e-05, "loss": 0.1764, "step": 18010 }, { "epoch": 0.6548441020422996, "grad_norm": 1.0867921113967896, "learning_rate": 4.990273624139013e-05, "loss": 0.1507, "step": 18020 }, { "epoch": 0.6552075005450978, "grad_norm": 2.9895503520965576, "learning_rate": 4.9902278397727734e-05, "loss": 0.1479, "step": 18030 }, { "epoch": 0.6555708990478959, "grad_norm": 0.9947407841682434, "learning_rate": 4.990181948111475e-05, "loss": 0.1558, "step": 18040 }, { "epoch": 0.6559342975506941, "grad_norm": 7.774895191192627, "learning_rate": 4.9901359491570974e-05, "loss": 0.2202, "step": 18050 }, { "epoch": 0.6562976960534923, "grad_norm": 1.8466017246246338, "learning_rate": 4.990089842911622e-05, "loss": 0.1929, "step": 18060 }, { "epoch": 0.6566610945562904, "grad_norm": 0.8435410261154175, "learning_rate": 4.9900436293770345e-05, "loss": 0.1377, "step": 18070 }, { "epoch": 0.6570244930590886, "grad_norm": 3.10648512840271, "learning_rate": 4.989997308555326e-05, "loss": 0.202, "step": 18080 }, { "epoch": 0.6573878915618868, "grad_norm": 1.112806797027588, "learning_rate": 4.989950880448494e-05, "loss": 0.1486, "step": 18090 }, { "epoch": 0.6577512900646849, "grad_norm": 18.821117401123047, "learning_rate": 4.989904345058538e-05, "loss": 0.2677, "step": 18100 }, { "epoch": 0.6581146885674831, "grad_norm": 1.254798412322998, "learning_rate": 4.989857702387463e-05, "loss": 3.5769, "step": 18110 }, { "epoch": 0.6584780870702812, "grad_norm": 0.9956761002540588, "learning_rate": 4.989810952437277e-05, "loss": 0.1958, "step": 18120 }, { "epoch": 0.6588414855730794, "grad_norm": 2.9471828937530518, "learning_rate": 4.9897640952099975e-05, "loss": 0.1988, "step": 18130 }, { "epoch": 0.6592048840758776, "grad_norm": 1.3806344270706177, "learning_rate": 4.989717130707641e-05, "loss": 0.1552, "step": 18140 }, { "epoch": 0.6595682825786757, "grad_norm": 3.0857722759246826, "learning_rate": 4.989670058932231e-05, "loss": 0.2168, "step": 18150 }, { "epoch": 0.6599316810814739, "grad_norm": 1.8781664371490479, "learning_rate": 4.989622879885798e-05, "loss": 0.1571, "step": 18160 }, { "epoch": 0.6602950795842721, "grad_norm": 1.1139156818389893, "learning_rate": 4.9895755935703725e-05, "loss": 0.1365, "step": 18170 }, { "epoch": 0.6606584780870702, "grad_norm": 2.3965742588043213, "learning_rate": 4.9895281999879925e-05, "loss": 0.1879, "step": 18180 }, { "epoch": 0.6610218765898684, "grad_norm": 1.2575726509094238, "learning_rate": 4.9894806991407e-05, "loss": 0.2197, "step": 18190 }, { "epoch": 0.6613852750926666, "grad_norm": 10.392169952392578, "learning_rate": 4.989433091030542e-05, "loss": 0.2318, "step": 18200 }, { "epoch": 0.6617486735954647, "grad_norm": 0.8268498182296753, "learning_rate": 4.98938537565957e-05, "loss": 0.1416, "step": 18210 }, { "epoch": 0.662112072098263, "grad_norm": 0.9257369637489319, "learning_rate": 4.9893375530298384e-05, "loss": 0.1855, "step": 18220 }, { "epoch": 0.6624754706010612, "grad_norm": 1.7720370292663574, "learning_rate": 4.9892896231434094e-05, "loss": 1.0276, "step": 18230 }, { "epoch": 0.6628388691038593, "grad_norm": 2.2012548446655273, "learning_rate": 4.9892415860023476e-05, "loss": 1.1909, "step": 18240 }, { "epoch": 0.6632022676066575, "grad_norm": 9.690247535705566, "learning_rate": 4.9891934416087224e-05, "loss": 0.2603, "step": 18250 }, { "epoch": 0.6635656661094557, "grad_norm": 2.528682231903076, "learning_rate": 4.989145189964608e-05, "loss": 0.1912, "step": 18260 }, { "epoch": 0.6639290646122538, "grad_norm": 1.4666227102279663, "learning_rate": 4.989096831072084e-05, "loss": 0.2316, "step": 18270 }, { "epoch": 0.664292463115052, "grad_norm": 1.463526725769043, "learning_rate": 4.989048364933234e-05, "loss": 0.1388, "step": 18280 }, { "epoch": 0.6646558616178502, "grad_norm": 1.2156569957733154, "learning_rate": 4.988999791550146e-05, "loss": 0.4086, "step": 18290 }, { "epoch": 0.6650192601206483, "grad_norm": 4.909139156341553, "learning_rate": 4.988951110924913e-05, "loss": 0.2631, "step": 18300 }, { "epoch": 0.6653826586234465, "grad_norm": 1.3692512512207031, "learning_rate": 4.988902323059632e-05, "loss": 0.1525, "step": 18310 }, { "epoch": 0.6657460571262447, "grad_norm": 1.153344988822937, "learning_rate": 4.988853427956406e-05, "loss": 0.1904, "step": 18320 }, { "epoch": 0.6661094556290428, "grad_norm": 2.052828073501587, "learning_rate": 4.988804425617341e-05, "loss": 0.1979, "step": 18330 }, { "epoch": 0.666472854131841, "grad_norm": 1.373213768005371, "learning_rate": 4.988755316044548e-05, "loss": 0.1836, "step": 18340 }, { "epoch": 0.6668362526346392, "grad_norm": 24.185970306396484, "learning_rate": 4.9887060992401436e-05, "loss": 0.2546, "step": 18350 }, { "epoch": 0.6671996511374373, "grad_norm": 1.702205777168274, "learning_rate": 4.988656775206248e-05, "loss": 0.1433, "step": 18360 }, { "epoch": 0.6675630496402355, "grad_norm": 2.279100179672241, "learning_rate": 4.9886073439449864e-05, "loss": 0.1671, "step": 18370 }, { "epoch": 0.6679264481430337, "grad_norm": 3.928740978240967, "learning_rate": 4.98855780545849e-05, "loss": 0.1506, "step": 18380 }, { "epoch": 0.6682898466458318, "grad_norm": 2.2895402908325195, "learning_rate": 4.988508159748891e-05, "loss": 0.1523, "step": 18390 }, { "epoch": 0.66865324514863, "grad_norm": 10.151689529418945, "learning_rate": 4.98845840681833e-05, "loss": 0.2284, "step": 18400 }, { "epoch": 0.6690166436514282, "grad_norm": 1.268561840057373, "learning_rate": 4.9884085466689504e-05, "loss": 0.171, "step": 18410 }, { "epoch": 0.6693800421542263, "grad_norm": 1.0731265544891357, "learning_rate": 4.9883585793029e-05, "loss": 0.7778, "step": 18420 }, { "epoch": 0.6697434406570245, "grad_norm": 1.0762509107589722, "learning_rate": 4.988308504722332e-05, "loss": 0.1966, "step": 18430 }, { "epoch": 0.6701068391598226, "grad_norm": 6.763409614562988, "learning_rate": 4.9882583229294044e-05, "loss": 0.156, "step": 18440 }, { "epoch": 0.6704702376626208, "grad_norm": 8.312501907348633, "learning_rate": 4.988208033926279e-05, "loss": 0.2573, "step": 18450 }, { "epoch": 0.670833636165419, "grad_norm": 1.7566003799438477, "learning_rate": 4.988157637715122e-05, "loss": 0.1639, "step": 18460 }, { "epoch": 0.6711970346682171, "grad_norm": 2.336911916732788, "learning_rate": 4.988107134298105e-05, "loss": 0.1536, "step": 18470 }, { "epoch": 0.6715604331710153, "grad_norm": 2.2477078437805176, "learning_rate": 4.988056523677405e-05, "loss": 0.2734, "step": 18480 }, { "epoch": 0.6719238316738135, "grad_norm": 1.62912917137146, "learning_rate": 4.9880058058552015e-05, "loss": 0.1501, "step": 18490 }, { "epoch": 0.6722872301766116, "grad_norm": 8.896906852722168, "learning_rate": 4.98795498083368e-05, "loss": 0.213, "step": 18500 }, { "epoch": 0.6726506286794098, "grad_norm": 1.804291009902954, "learning_rate": 4.987904048615031e-05, "loss": 0.2175, "step": 18510 }, { "epoch": 0.673014027182208, "grad_norm": 0.9261330366134644, "learning_rate": 4.9878530092014486e-05, "loss": 0.1553, "step": 18520 }, { "epoch": 0.6733774256850061, "grad_norm": 4.854642868041992, "learning_rate": 4.987801862595132e-05, "loss": 0.2065, "step": 18530 }, { "epoch": 0.6737408241878043, "grad_norm": 0.9362125992774963, "learning_rate": 4.987750608798284e-05, "loss": 0.1611, "step": 18540 }, { "epoch": 0.6741042226906026, "grad_norm": 13.348092079162598, "learning_rate": 4.987699247813114e-05, "loss": 0.2834, "step": 18550 }, { "epoch": 0.6744676211934006, "grad_norm": 1.3235937356948853, "learning_rate": 4.987647779641835e-05, "loss": 0.166, "step": 18560 }, { "epoch": 0.6748310196961989, "grad_norm": 1.7941697835922241, "learning_rate": 4.987596204286664e-05, "loss": 0.186, "step": 18570 }, { "epoch": 0.6751944181989971, "grad_norm": 6.945876121520996, "learning_rate": 4.987544521749824e-05, "loss": 0.1859, "step": 18580 }, { "epoch": 0.6755578167017952, "grad_norm": 1.1671024560928345, "learning_rate": 4.98749273203354e-05, "loss": 0.2007, "step": 18590 }, { "epoch": 0.6759212152045934, "grad_norm": 46.817718505859375, "learning_rate": 4.987440835140046e-05, "loss": 0.275, "step": 18600 }, { "epoch": 0.6759212152045934, "eval_loss": 0.4186328053474426, "eval_runtime": 180.6066, "eval_samples_per_second": 41.051, "eval_steps_per_second": 5.133, "eval_wer": 0.21444260896400238, "step": 18600 }, { "epoch": 0.6762846137073916, "grad_norm": 0.9619908928871155, "learning_rate": 4.987388831071575e-05, "loss": 0.2147, "step": 18610 }, { "epoch": 0.6766480122101897, "grad_norm": 1.139666199684143, "learning_rate": 4.9873367198303714e-05, "loss": 0.2591, "step": 18620 }, { "epoch": 0.6770114107129879, "grad_norm": 2.6673026084899902, "learning_rate": 4.9872845014186776e-05, "loss": 0.2013, "step": 18630 }, { "epoch": 0.6773748092157861, "grad_norm": 1.0486637353897095, "learning_rate": 4.987232175838745e-05, "loss": 0.2326, "step": 18640 }, { "epoch": 0.6777382077185842, "grad_norm": 6.457462787628174, "learning_rate": 4.987179743092827e-05, "loss": 0.2395, "step": 18650 }, { "epoch": 0.6781016062213824, "grad_norm": 3.296480178833008, "learning_rate": 4.987127203183183e-05, "loss": 0.1857, "step": 18660 }, { "epoch": 0.6784650047241806, "grad_norm": 2.828460454940796, "learning_rate": 4.987074556112078e-05, "loss": 0.1391, "step": 18670 }, { "epoch": 0.6788284032269787, "grad_norm": 10.424219131469727, "learning_rate": 4.987021801881779e-05, "loss": 0.1583, "step": 18680 }, { "epoch": 0.6791918017297769, "grad_norm": 5.248502254486084, "learning_rate": 4.986968940494559e-05, "loss": 0.1676, "step": 18690 }, { "epoch": 0.6795552002325751, "grad_norm": 8.20375919342041, "learning_rate": 4.986915971952696e-05, "loss": 0.2844, "step": 18700 }, { "epoch": 0.6799185987353732, "grad_norm": 2.415562152862549, "learning_rate": 4.986862896258473e-05, "loss": 0.1634, "step": 18710 }, { "epoch": 0.6802819972381714, "grad_norm": 1.635680079460144, "learning_rate": 4.986809713414176e-05, "loss": 0.509, "step": 18720 }, { "epoch": 0.6806453957409695, "grad_norm": 10.641048431396484, "learning_rate": 4.986756423422095e-05, "loss": 0.2015, "step": 18730 }, { "epoch": 0.6810087942437677, "grad_norm": 1.3304156064987183, "learning_rate": 4.986703026284529e-05, "loss": 0.1598, "step": 18740 }, { "epoch": 0.6813721927465659, "grad_norm": 4.707154750823975, "learning_rate": 4.986649522003778e-05, "loss": 0.2486, "step": 18750 }, { "epoch": 0.681735591249364, "grad_norm": 1.671863317489624, "learning_rate": 4.9865959105821454e-05, "loss": 0.1628, "step": 18760 }, { "epoch": 0.6820989897521622, "grad_norm": 2.4183709621429443, "learning_rate": 4.986542192021942e-05, "loss": 0.1636, "step": 18770 }, { "epoch": 0.6824623882549604, "grad_norm": 120.8931884765625, "learning_rate": 4.9864883663254836e-05, "loss": 2.0172, "step": 18780 }, { "epoch": 0.6828257867577585, "grad_norm": 2.785879135131836, "learning_rate": 4.986434433495089e-05, "loss": 0.1669, "step": 18790 }, { "epoch": 0.6831891852605567, "grad_norm": 3.662753105163574, "learning_rate": 4.98638039353308e-05, "loss": 0.3095, "step": 18800 }, { "epoch": 0.6835525837633549, "grad_norm": 1.1632777452468872, "learning_rate": 4.986326246441787e-05, "loss": 0.1632, "step": 18810 }, { "epoch": 0.683915982266153, "grad_norm": 0.9660913348197937, "learning_rate": 4.986271992223543e-05, "loss": 0.1509, "step": 18820 }, { "epoch": 0.6842793807689512, "grad_norm": 2.810391426086426, "learning_rate": 4.986217630880684e-05, "loss": 0.1507, "step": 18830 }, { "epoch": 0.6846427792717494, "grad_norm": 2.008641242980957, "learning_rate": 4.986163162415554e-05, "loss": 0.1858, "step": 18840 }, { "epoch": 0.6850061777745475, "grad_norm": 3.4007887840270996, "learning_rate": 4.986108586830499e-05, "loss": 0.2389, "step": 18850 }, { "epoch": 0.6853695762773457, "grad_norm": 0.8250002861022949, "learning_rate": 4.986053904127871e-05, "loss": 0.1618, "step": 18860 }, { "epoch": 0.685732974780144, "grad_norm": 0.792607307434082, "learning_rate": 4.986004598111927e-05, "loss": 1.5835, "step": 18870 }, { "epoch": 0.686096373282942, "grad_norm": 2.740478038787842, "learning_rate": 4.985949711892404e-05, "loss": 0.2021, "step": 18880 }, { "epoch": 0.6864597717857402, "grad_norm": 1.1361775398254395, "learning_rate": 4.985894718562153e-05, "loss": 0.2244, "step": 18890 }, { "epoch": 0.6868231702885385, "grad_norm": 2.692542314529419, "learning_rate": 4.985839618123543e-05, "loss": 0.2095, "step": 18900 }, { "epoch": 0.6871865687913365, "grad_norm": 1.2691428661346436, "learning_rate": 4.9857844105789485e-05, "loss": 0.1533, "step": 18910 }, { "epoch": 0.6875499672941348, "grad_norm": 2.087209939956665, "learning_rate": 4.9857290959307483e-05, "loss": 0.1469, "step": 18920 }, { "epoch": 0.687913365796933, "grad_norm": 1.5252209901809692, "learning_rate": 4.985673674181326e-05, "loss": 0.2099, "step": 18930 }, { "epoch": 0.6882767642997311, "grad_norm": 1.81588876247406, "learning_rate": 4.9856181453330685e-05, "loss": 0.174, "step": 18940 }, { "epoch": 0.6886401628025293, "grad_norm": 21.244775772094727, "learning_rate": 4.9855625093883695e-05, "loss": 0.2455, "step": 18950 }, { "epoch": 0.6890035613053275, "grad_norm": 1.53201425075531, "learning_rate": 4.9855067663496255e-05, "loss": 0.1731, "step": 18960 }, { "epoch": 0.6893669598081256, "grad_norm": 0.9922922849655151, "learning_rate": 4.985450916219239e-05, "loss": 0.1569, "step": 18970 }, { "epoch": 0.6897303583109238, "grad_norm": 1.6983296871185303, "learning_rate": 4.985394958999615e-05, "loss": 0.1784, "step": 18980 }, { "epoch": 0.690093756813722, "grad_norm": 2.5069353580474854, "learning_rate": 4.9853388946931654e-05, "loss": 0.1484, "step": 18990 }, { "epoch": 0.6904571553165201, "grad_norm": 52.345367431640625, "learning_rate": 4.985282723302306e-05, "loss": 0.2431, "step": 19000 }, { "epoch": 0.6908205538193183, "grad_norm": 1.5318138599395752, "learning_rate": 4.9852264448294564e-05, "loss": 0.1662, "step": 19010 }, { "epoch": 0.6911839523221164, "grad_norm": 1.5980876684188843, "learning_rate": 4.985170059277041e-05, "loss": 1.3532, "step": 19020 }, { "epoch": 0.6915473508249146, "grad_norm": 2.355023145675659, "learning_rate": 4.9851135666474915e-05, "loss": 0.1688, "step": 19030 }, { "epoch": 0.6919107493277128, "grad_norm": 3.2141480445861816, "learning_rate": 4.98505696694324e-05, "loss": 0.1303, "step": 19040 }, { "epoch": 0.6922741478305109, "grad_norm": 19.482290267944336, "learning_rate": 4.985000260166725e-05, "loss": 0.2337, "step": 19050 }, { "epoch": 0.6926375463333091, "grad_norm": 0.8456101417541504, "learning_rate": 4.9849434463203915e-05, "loss": 0.1732, "step": 19060 }, { "epoch": 0.6930009448361073, "grad_norm": 2.2158889770507812, "learning_rate": 4.9848865254066856e-05, "loss": 0.1524, "step": 19070 }, { "epoch": 0.6933643433389054, "grad_norm": 2.0843331813812256, "learning_rate": 4.9848294974280605e-05, "loss": 0.1943, "step": 19080 }, { "epoch": 0.6937277418417036, "grad_norm": 2.6970462799072266, "learning_rate": 4.9847723623869734e-05, "loss": 0.1697, "step": 19090 }, { "epoch": 0.6940911403445018, "grad_norm": 9.394730567932129, "learning_rate": 4.984715120285887e-05, "loss": 0.2151, "step": 19100 }, { "epoch": 0.6944545388472999, "grad_norm": 1.922090768814087, "learning_rate": 4.9846577711272656e-05, "loss": 0.1737, "step": 19110 }, { "epoch": 0.6948179373500981, "grad_norm": 1.3870245218276978, "learning_rate": 4.9846003149135815e-05, "loss": 0.1694, "step": 19120 }, { "epoch": 0.6951813358528963, "grad_norm": 1.6474970579147339, "learning_rate": 4.9845427516473104e-05, "loss": 0.219, "step": 19130 }, { "epoch": 0.6955447343556944, "grad_norm": 1.4302411079406738, "learning_rate": 4.984485081330932e-05, "loss": 0.1489, "step": 19140 }, { "epoch": 0.6959081328584926, "grad_norm": 3.888967990875244, "learning_rate": 4.984427303966932e-05, "loss": 0.2425, "step": 19150 }, { "epoch": 0.6962715313612908, "grad_norm": 1.2002874612808228, "learning_rate": 4.984369419557798e-05, "loss": 0.1575, "step": 19160 }, { "epoch": 0.6966349298640889, "grad_norm": 1.9064863920211792, "learning_rate": 4.984311428106025e-05, "loss": 0.1526, "step": 19170 }, { "epoch": 0.6969983283668871, "grad_norm": 1.3838772773742676, "learning_rate": 4.984253329614112e-05, "loss": 0.1601, "step": 19180 }, { "epoch": 0.6973617268696853, "grad_norm": 3.6261801719665527, "learning_rate": 4.984195124084563e-05, "loss": 0.1668, "step": 19190 }, { "epoch": 0.6977251253724834, "grad_norm": 7.647263526916504, "learning_rate": 4.984136811519884e-05, "loss": 0.1879, "step": 19200 }, { "epoch": 0.6977251253724834, "eval_loss": 0.3865276575088501, "eval_runtime": 179.6651, "eval_samples_per_second": 41.266, "eval_steps_per_second": 5.16, "eval_wer": 0.20815255867990634, "step": 19200 }, { "epoch": 0.6980885238752816, "grad_norm": 1.7563225030899048, "learning_rate": 4.984078391922589e-05, "loss": 0.1481, "step": 19210 }, { "epoch": 0.6984519223780798, "grad_norm": 1.8016029596328735, "learning_rate": 4.984019865295194e-05, "loss": 0.1713, "step": 19220 }, { "epoch": 0.6988153208808779, "grad_norm": 2.0969181060791016, "learning_rate": 4.983961231640221e-05, "loss": 0.1959, "step": 19230 }, { "epoch": 0.6991787193836762, "grad_norm": 1.6823608875274658, "learning_rate": 4.9839024909601964e-05, "loss": 0.1729, "step": 19240 }, { "epoch": 0.6995421178864744, "grad_norm": 11.533753395080566, "learning_rate": 4.983843643257652e-05, "loss": 0.2264, "step": 19250 }, { "epoch": 0.6999055163892725, "grad_norm": 4.1039204597473145, "learning_rate": 4.983784688535122e-05, "loss": 0.1738, "step": 19260 }, { "epoch": 0.7002689148920707, "grad_norm": 1.1051629781723022, "learning_rate": 4.983725626795147e-05, "loss": 0.155, "step": 19270 }, { "epoch": 0.7006323133948689, "grad_norm": 4.303994178771973, "learning_rate": 4.983666458040273e-05, "loss": 0.1593, "step": 19280 }, { "epoch": 0.700995711897667, "grad_norm": 1.2324292659759521, "learning_rate": 4.983607182273047e-05, "loss": 0.1642, "step": 19290 }, { "epoch": 0.7013591104004652, "grad_norm": 6.101926326751709, "learning_rate": 4.983547799496024e-05, "loss": 0.2338, "step": 19300 }, { "epoch": 0.7017225089032633, "grad_norm": 1.1532049179077148, "learning_rate": 4.983488309711763e-05, "loss": 0.1591, "step": 19310 }, { "epoch": 0.7020859074060615, "grad_norm": 0.8216233253479004, "learning_rate": 4.983428712922828e-05, "loss": 0.1489, "step": 19320 }, { "epoch": 0.7024493059088597, "grad_norm": 1.489461064338684, "learning_rate": 4.983369009131785e-05, "loss": 0.2048, "step": 19330 }, { "epoch": 0.7028127044116578, "grad_norm": 1.0493615865707397, "learning_rate": 4.983309198341207e-05, "loss": 0.1525, "step": 19340 }, { "epoch": 0.703176102914456, "grad_norm": 10.2578706741333, "learning_rate": 4.983249280553672e-05, "loss": 0.2297, "step": 19350 }, { "epoch": 0.7035395014172542, "grad_norm": 1.5366660356521606, "learning_rate": 4.983189255771761e-05, "loss": 0.1644, "step": 19360 }, { "epoch": 0.7039028999200523, "grad_norm": 1.4915844202041626, "learning_rate": 4.9831291239980596e-05, "loss": 0.1599, "step": 19370 }, { "epoch": 0.7042662984228505, "grad_norm": 1.3012590408325195, "learning_rate": 4.98306888523516e-05, "loss": 0.1907, "step": 19380 }, { "epoch": 0.7046296969256487, "grad_norm": 1.5029476881027222, "learning_rate": 4.983008539485656e-05, "loss": 0.1391, "step": 19390 }, { "epoch": 0.7049930954284468, "grad_norm": 3.0202033519744873, "learning_rate": 4.9829480867521495e-05, "loss": 0.2218, "step": 19400 }, { "epoch": 0.705356493931245, "grad_norm": 1.7761317491531372, "learning_rate": 4.9828875270372434e-05, "loss": 0.1605, "step": 19410 }, { "epoch": 0.7057198924340432, "grad_norm": 1.420793890953064, "learning_rate": 4.9828268603435485e-05, "loss": 1.5838, "step": 19420 }, { "epoch": 0.7060832909368413, "grad_norm": 2.079665422439575, "learning_rate": 4.982766086673678e-05, "loss": 0.2146, "step": 19430 }, { "epoch": 0.7064466894396395, "grad_norm": 2.440471887588501, "learning_rate": 4.98270520603025e-05, "loss": 0.1733, "step": 19440 }, { "epoch": 0.7068100879424377, "grad_norm": 7.773731708526611, "learning_rate": 4.982644218415889e-05, "loss": 0.2126, "step": 19450 }, { "epoch": 0.7071734864452358, "grad_norm": 0.9480405449867249, "learning_rate": 4.982583123833221e-05, "loss": 0.1575, "step": 19460 }, { "epoch": 0.707536884948034, "grad_norm": 12.79196548461914, "learning_rate": 4.982521922284881e-05, "loss": 0.2745, "step": 19470 }, { "epoch": 0.7079002834508322, "grad_norm": 4.492150783538818, "learning_rate": 4.982460613773502e-05, "loss": 0.1663, "step": 19480 }, { "epoch": 0.7082636819536303, "grad_norm": 1.2373683452606201, "learning_rate": 4.9823991983017295e-05, "loss": 0.1699, "step": 19490 }, { "epoch": 0.7086270804564285, "grad_norm": 5.8804402351379395, "learning_rate": 4.982337675872207e-05, "loss": 0.242, "step": 19500 }, { "epoch": 0.7089904789592267, "grad_norm": 0.9465837478637695, "learning_rate": 4.982276046487586e-05, "loss": 0.1471, "step": 19510 }, { "epoch": 0.7093538774620248, "grad_norm": 1.6178842782974243, "learning_rate": 4.9822143101505226e-05, "loss": 0.1619, "step": 19520 }, { "epoch": 0.709717275964823, "grad_norm": 2.4963414669036865, "learning_rate": 4.9821524668636766e-05, "loss": 0.1426, "step": 19530 }, { "epoch": 0.7100806744676212, "grad_norm": 1.1380610466003418, "learning_rate": 4.982090516629712e-05, "loss": 0.2364, "step": 19540 }, { "epoch": 0.7104440729704193, "grad_norm": 5.2998046875, "learning_rate": 4.982028459451298e-05, "loss": 0.2661, "step": 19550 }, { "epoch": 0.7108074714732175, "grad_norm": 1.1476637125015259, "learning_rate": 4.9819662953311096e-05, "loss": 0.1306, "step": 19560 }, { "epoch": 0.7111708699760158, "grad_norm": 0.7960777878761292, "learning_rate": 4.981904024271824e-05, "loss": 0.1604, "step": 19570 }, { "epoch": 0.7115342684788138, "grad_norm": 1.9035999774932861, "learning_rate": 4.981841646276124e-05, "loss": 0.1728, "step": 19580 }, { "epoch": 0.711897666981612, "grad_norm": 0.9725393056869507, "learning_rate": 4.981779161346699e-05, "loss": 0.2529, "step": 19590 }, { "epoch": 0.7122610654844101, "grad_norm": 5.759589672088623, "learning_rate": 4.98171656948624e-05, "loss": 0.25, "step": 19600 }, { "epoch": 0.7126244639872084, "grad_norm": 1.3716357946395874, "learning_rate": 4.9816538706974434e-05, "loss": 0.1603, "step": 19610 }, { "epoch": 0.7129878624900066, "grad_norm": 1.4253743886947632, "learning_rate": 4.981591064983011e-05, "loss": 0.1496, "step": 19620 }, { "epoch": 0.7133512609928047, "grad_norm": 2.4253408908843994, "learning_rate": 4.98152815234565e-05, "loss": 0.1694, "step": 19630 }, { "epoch": 0.7137146594956029, "grad_norm": 1.212689757347107, "learning_rate": 4.9814651327880696e-05, "loss": 0.1869, "step": 19640 }, { "epoch": 0.7140780579984011, "grad_norm": 7.003270626068115, "learning_rate": 4.981402006312986e-05, "loss": 0.2709, "step": 19650 }, { "epoch": 0.7144414565011992, "grad_norm": 1.6173512935638428, "learning_rate": 4.981338772923119e-05, "loss": 0.1651, "step": 19660 }, { "epoch": 0.7148048550039974, "grad_norm": 2.2197723388671875, "learning_rate": 4.981275432621192e-05, "loss": 0.1657, "step": 19670 }, { "epoch": 0.7151682535067956, "grad_norm": 1.8906898498535156, "learning_rate": 4.981211985409936e-05, "loss": 2.3111, "step": 19680 }, { "epoch": 0.7155316520095937, "grad_norm": 3.50747013092041, "learning_rate": 4.981148431292084e-05, "loss": 0.1498, "step": 19690 }, { "epoch": 0.7158950505123919, "grad_norm": 4.080805778503418, "learning_rate": 4.981084770270373e-05, "loss": 0.2094, "step": 19700 }, { "epoch": 0.7162584490151901, "grad_norm": 2.1056652069091797, "learning_rate": 4.981021002347547e-05, "loss": 0.157, "step": 19710 }, { "epoch": 0.7166218475179882, "grad_norm": 1.07776939868927, "learning_rate": 4.980957127526354e-05, "loss": 0.2049, "step": 19720 }, { "epoch": 0.7169852460207864, "grad_norm": 3.5387072563171387, "learning_rate": 4.980893145809546e-05, "loss": 0.1706, "step": 19730 }, { "epoch": 0.7173486445235846, "grad_norm": 1.5516027212142944, "learning_rate": 4.980829057199879e-05, "loss": 0.1371, "step": 19740 }, { "epoch": 0.7177120430263827, "grad_norm": 6.618633270263672, "learning_rate": 4.9807648617001145e-05, "loss": 0.1833, "step": 19750 }, { "epoch": 0.7180754415291809, "grad_norm": 1.7093079090118408, "learning_rate": 4.980700559313019e-05, "loss": 0.1592, "step": 19760 }, { "epoch": 0.7184388400319791, "grad_norm": 1.1217936277389526, "learning_rate": 4.9806361500413626e-05, "loss": 0.145, "step": 19770 }, { "epoch": 0.7188022385347772, "grad_norm": 1.869722604751587, "learning_rate": 4.980571633887921e-05, "loss": 0.1605, "step": 19780 }, { "epoch": 0.7191656370375754, "grad_norm": 1.1555829048156738, "learning_rate": 4.980507010855473e-05, "loss": 0.1539, "step": 19790 }, { "epoch": 0.7195290355403736, "grad_norm": 5.0145111083984375, "learning_rate": 4.9804422809468046e-05, "loss": 0.2334, "step": 19800 }, { "epoch": 0.7195290355403736, "eval_loss": 0.394449919462204, "eval_runtime": 180.0311, "eval_samples_per_second": 41.182, "eval_steps_per_second": 5.149, "eval_wer": 0.2100677110752083, "step": 19800 } ], "logging_steps": 10, "max_steps": 165108, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.4817411791479415e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }