{ "best_metric": 0.13551291593297873, "best_model_checkpoint": "./checkpoints/w2v-pa-v2/checkpoint-97200", "epoch": 3.5322334471981973, "eval_steps": 600, "global_step": 97200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00036339850279816846, "grad_norm": 12.712770462036133, "learning_rate": 3.633985027981685e-08, "loss": 8.1015, "step": 10 }, { "epoch": 0.0007267970055963369, "grad_norm": 14.929631233215332, "learning_rate": 7.26797005596337e-08, "loss": 8.5111, "step": 20 }, { "epoch": 0.0010901955083945055, "grad_norm": 12.774781227111816, "learning_rate": 1.0901955083945056e-07, "loss": 8.71, "step": 30 }, { "epoch": 0.0014535940111926739, "grad_norm": 15.27083969116211, "learning_rate": 1.417254160912857e-07, "loss": 9.2894, "step": 40 }, { "epoch": 0.0018169925139908424, "grad_norm": 31.209775924682617, "learning_rate": 1.7806526637110256e-07, "loss": 8.6779, "step": 50 }, { "epoch": 0.002180391016789011, "grad_norm": 12.960335731506348, "learning_rate": 2.1440511665091943e-07, "loss": 8.0399, "step": 60 }, { "epoch": 0.002543789519587179, "grad_norm": 14.452157020568848, "learning_rate": 2.5074496693073626e-07, "loss": 7.9357, "step": 70 }, { "epoch": 0.0029071880223853477, "grad_norm": 12.74867057800293, "learning_rate": 2.8708481721055307e-07, "loss": 8.0764, "step": 80 }, { "epoch": 0.0032705865251835163, "grad_norm": 19.905397415161133, "learning_rate": 3.2342466749036993e-07, "loss": 8.2522, "step": 90 }, { "epoch": 0.003633985027981685, "grad_norm": Infinity, "learning_rate": 3.5613053274220513e-07, "loss": 7.8161, "step": 100 }, { "epoch": 0.003997383530779853, "grad_norm": 15.877684593200684, "learning_rate": 3.92470383022022e-07, "loss": 7.4866, "step": 110 }, { "epoch": 0.004360782033578022, "grad_norm": 19.216800689697266, "learning_rate": 4.2881023330183885e-07, "loss": 7.0324, "step": 120 }, { "epoch": 0.0047241805363761906, "grad_norm": 16.937118530273438, "learning_rate": 4.651500835816557e-07, "loss": 6.7873, "step": 130 }, { "epoch": 0.005087579039174358, "grad_norm": 27.858692169189453, "learning_rate": 5.014899338614725e-07, "loss": 7.2063, "step": 140 }, { "epoch": 0.005450977541972527, "grad_norm": Infinity, "learning_rate": 5.341957991133076e-07, "loss": 6.5827, "step": 150 }, { "epoch": 0.005814376044770695, "grad_norm": 21.252164840698242, "learning_rate": 5.669016643651428e-07, "loss": 7.819, "step": 160 }, { "epoch": 0.006177774547568864, "grad_norm": 20.977886199951172, "learning_rate": 6.032415146449597e-07, "loss": 5.4741, "step": 170 }, { "epoch": 0.0065411730503670325, "grad_norm": 35.25390625, "learning_rate": 6.395813649247765e-07, "loss": 5.7225, "step": 180 }, { "epoch": 0.006904571553165201, "grad_norm": 7.165033340454102, "learning_rate": 6.759212152045934e-07, "loss": 4.7475, "step": 190 }, { "epoch": 0.00726797005596337, "grad_norm": 14.877301216125488, "learning_rate": 7.122610654844103e-07, "loss": 4.6375, "step": 200 }, { "epoch": 0.007631368558761538, "grad_norm": 5.826667785644531, "learning_rate": 7.486009157642272e-07, "loss": 4.356, "step": 210 }, { "epoch": 0.007994767061559707, "grad_norm": 6.022212982177734, "learning_rate": 7.84940766044044e-07, "loss": 4.2138, "step": 220 }, { "epoch": 0.008358165564357875, "grad_norm": 4.790489196777344, "learning_rate": 8.212806163238608e-07, "loss": 4.0662, "step": 230 }, { "epoch": 0.008721564067156044, "grad_norm": 4.448057174682617, "learning_rate": 8.576204666036777e-07, "loss": 3.9507, "step": 240 }, { "epoch": 0.009084962569954213, "grad_norm": 47.487003326416016, "learning_rate": 8.939603168834945e-07, "loss": 3.9576, "step": 250 }, { "epoch": 0.009448361072752381, "grad_norm": 8.64856243133545, "learning_rate": 9.303001671633114e-07, "loss": 3.7102, "step": 260 }, { "epoch": 0.009811759575550548, "grad_norm": 8.821709632873535, "learning_rate": 9.66640017443128e-07, "loss": 3.6644, "step": 270 }, { "epoch": 0.010175158078348717, "grad_norm": 14.071539878845215, "learning_rate": 1.002979867722945e-06, "loss": 3.6909, "step": 280 }, { "epoch": 0.010538556581146885, "grad_norm": 6.68039083480835, "learning_rate": 1.0393197180027619e-06, "loss": 3.6458, "step": 290 }, { "epoch": 0.010901955083945054, "grad_norm": 20.664649963378906, "learning_rate": 1.0756595682825787e-06, "loss": 3.6332, "step": 300 }, { "epoch": 0.011265353586743222, "grad_norm": 2.5272624492645264, "learning_rate": 1.1119994185623955e-06, "loss": 3.4835, "step": 310 }, { "epoch": 0.01162875208954139, "grad_norm": 8.353235244750977, "learning_rate": 1.1483392688422123e-06, "loss": 3.4892, "step": 320 }, { "epoch": 0.01199215059233956, "grad_norm": 7.0964531898498535, "learning_rate": 1.1846791191220293e-06, "loss": 3.5586, "step": 330 }, { "epoch": 0.012355549095137728, "grad_norm": 4.734161376953125, "learning_rate": 1.2210189694018461e-06, "loss": 3.4996, "step": 340 }, { "epoch": 0.012718947597935897, "grad_norm": 47.409996032714844, "learning_rate": 1.257358819681663e-06, "loss": 3.5565, "step": 350 }, { "epoch": 0.013082346100734065, "grad_norm": 2.880244016647339, "learning_rate": 1.2936986699614797e-06, "loss": 3.4154, "step": 360 }, { "epoch": 0.013445744603532234, "grad_norm": 6.637233734130859, "learning_rate": 1.3300385202412968e-06, "loss": 3.4119, "step": 370 }, { "epoch": 0.013809143106330402, "grad_norm": 11.791736602783203, "learning_rate": 1.3663783705211136e-06, "loss": 3.4347, "step": 380 }, { "epoch": 0.01417254160912857, "grad_norm": 8.274836540222168, "learning_rate": 1.4027182208009304e-06, "loss": 3.4253, "step": 390 }, { "epoch": 0.01453594011192674, "grad_norm": 10.09929084777832, "learning_rate": 1.4390580710807472e-06, "loss": 3.4702, "step": 400 }, { "epoch": 0.014899338614724908, "grad_norm": 6.32951545715332, "learning_rate": 1.4753979213605642e-06, "loss": 3.3513, "step": 410 }, { "epoch": 0.015262737117523077, "grad_norm": 2.4888486862182617, "learning_rate": 1.511737771640381e-06, "loss": 3.3421, "step": 420 }, { "epoch": 0.015626135620321245, "grad_norm": 3.02103328704834, "learning_rate": 1.5480776219201978e-06, "loss": 3.397, "step": 430 }, { "epoch": 0.015989534123119414, "grad_norm": 7.464268207550049, "learning_rate": 1.5844174722000146e-06, "loss": 3.3582, "step": 440 }, { "epoch": 0.016352932625917582, "grad_norm": 18.908123016357422, "learning_rate": 1.6207573224798317e-06, "loss": 3.4034, "step": 450 }, { "epoch": 0.01671633112871575, "grad_norm": 2.487326145172119, "learning_rate": 1.6570971727596485e-06, "loss": 3.2229, "step": 460 }, { "epoch": 0.01707972963151392, "grad_norm": 2.3999946117401123, "learning_rate": 1.6934370230394653e-06, "loss": 3.2185, "step": 470 }, { "epoch": 0.017443128134312088, "grad_norm": 5.007234573364258, "learning_rate": 1.729776873319282e-06, "loss": 3.2069, "step": 480 }, { "epoch": 0.017806526637110257, "grad_norm": 6.393301963806152, "learning_rate": 1.766116723599099e-06, "loss": 3.0687, "step": 490 }, { "epoch": 0.018169925139908425, "grad_norm": 45.44938278198242, "learning_rate": 1.802456573878916e-06, "loss": 3.112, "step": 500 }, { "epoch": 0.018533323642706594, "grad_norm": 7.32182502746582, "learning_rate": 1.8387964241587327e-06, "loss": 2.951, "step": 510 }, { "epoch": 0.018896722145504762, "grad_norm": 3.3864173889160156, "learning_rate": 1.8751362744385495e-06, "loss": 2.8879, "step": 520 }, { "epoch": 0.019260120648302927, "grad_norm": 5.429958343505859, "learning_rate": 1.911476124718366e-06, "loss": 2.7393, "step": 530 }, { "epoch": 0.019623519151101096, "grad_norm": 5.3577985763549805, "learning_rate": 1.947815974998183e-06, "loss": 2.4813, "step": 540 }, { "epoch": 0.019986917653899264, "grad_norm": 13.970659255981445, "learning_rate": 1.9841558252779998e-06, "loss": 2.3787, "step": 550 }, { "epoch": 0.020350316156697433, "grad_norm": 5.2666754722595215, "learning_rate": 2.0204956755578166e-06, "loss": 2.207, "step": 560 }, { "epoch": 0.0207137146594956, "grad_norm": 4.184991359710693, "learning_rate": 2.0568355258376334e-06, "loss": 2.0383, "step": 570 }, { "epoch": 0.02107711316229377, "grad_norm": 6.312343597412109, "learning_rate": 2.09317537611745e-06, "loss": 1.8416, "step": 580 }, { "epoch": 0.02144051166509194, "grad_norm": 4.754147529602051, "learning_rate": 2.1295152263972674e-06, "loss": 1.6002, "step": 590 }, { "epoch": 0.021803910167890107, "grad_norm": 21.47913360595703, "learning_rate": 2.1658550766770842e-06, "loss": 1.6015, "step": 600 }, { "epoch": 0.021803910167890107, "eval_loss": 1.5154471397399902, "eval_runtime": 180.9184, "eval_samples_per_second": 40.98, "eval_steps_per_second": 5.124, "eval_wer": 0.7997531177954872, "step": 600 }, { "epoch": 0.022167308670688276, "grad_norm": 4.2374348640441895, "learning_rate": 2.202194926956901e-06, "loss": 1.4842, "step": 610 }, { "epoch": 0.022530707173486444, "grad_norm": 4.392132759094238, "learning_rate": 2.238534777236718e-06, "loss": 1.3776, "step": 620 }, { "epoch": 0.022894105676284613, "grad_norm": 4.682064533233643, "learning_rate": 2.2748746275165347e-06, "loss": 1.3177, "step": 630 }, { "epoch": 0.02325750417908278, "grad_norm": 4.8396077156066895, "learning_rate": 2.3112144777963515e-06, "loss": 1.0737, "step": 640 }, { "epoch": 0.02362090268188095, "grad_norm": 33.27382278442383, "learning_rate": 2.3475543280761683e-06, "loss": 1.3046, "step": 650 }, { "epoch": 0.02398430118467912, "grad_norm": 5.410325050354004, "learning_rate": 2.383894178355985e-06, "loss": 1.1021, "step": 660 }, { "epoch": 0.024347699687477287, "grad_norm": 3.9523680210113525, "learning_rate": 2.420234028635802e-06, "loss": 1.0602, "step": 670 }, { "epoch": 0.024711098190275456, "grad_norm": 9.141073226928711, "learning_rate": 2.456573878915619e-06, "loss": 1.0631, "step": 680 }, { "epoch": 0.025074496693073624, "grad_norm": 5.3534626960754395, "learning_rate": 2.492913729195436e-06, "loss": 0.8968, "step": 690 }, { "epoch": 0.025437895195871793, "grad_norm": 32.30677795410156, "learning_rate": 2.5292535794752527e-06, "loss": 1.0439, "step": 700 }, { "epoch": 0.02580129369866996, "grad_norm": 4.310474872589111, "learning_rate": 2.5655934297550696e-06, "loss": 0.954, "step": 710 }, { "epoch": 0.02616469220146813, "grad_norm": 5.586440563201904, "learning_rate": 2.6019332800348864e-06, "loss": 1.0031, "step": 720 }, { "epoch": 0.0265280907042663, "grad_norm": 3.6927313804626465, "learning_rate": 2.638273130314703e-06, "loss": 0.7956, "step": 730 }, { "epoch": 0.026891489207064467, "grad_norm": 4.270529747009277, "learning_rate": 2.67461298059452e-06, "loss": 0.8874, "step": 740 }, { "epoch": 0.027254887709862636, "grad_norm": 23.553489685058594, "learning_rate": 2.710952830874337e-06, "loss": 0.8523, "step": 750 }, { "epoch": 0.027618286212660804, "grad_norm": 5.342041492462158, "learning_rate": 2.747292681154154e-06, "loss": 0.9029, "step": 760 }, { "epoch": 0.027981684715458973, "grad_norm": 3.3802621364593506, "learning_rate": 2.783632531433971e-06, "loss": 0.8378, "step": 770 }, { "epoch": 0.02834508321825714, "grad_norm": 6.378807067871094, "learning_rate": 2.8199723817137876e-06, "loss": 0.8085, "step": 780 }, { "epoch": 0.02870848172105531, "grad_norm": 4.007000923156738, "learning_rate": 2.8563122319936045e-06, "loss": 0.8218, "step": 790 }, { "epoch": 0.02907188022385348, "grad_norm": 68.16226196289062, "learning_rate": 2.8926520822734213e-06, "loss": 1.2055, "step": 800 }, { "epoch": 0.029435278726651647, "grad_norm": 6.70043420791626, "learning_rate": 2.928991932553238e-06, "loss": 0.7641, "step": 810 }, { "epoch": 0.029798677229449816, "grad_norm": 5.498161315917969, "learning_rate": 2.965331782833055e-06, "loss": 0.7739, "step": 820 }, { "epoch": 0.030162075732247984, "grad_norm": 9.515852928161621, "learning_rate": 3.0016716331128717e-06, "loss": 0.8293, "step": 830 }, { "epoch": 0.030525474235046153, "grad_norm": 13.3881196975708, "learning_rate": 3.0380114833926885e-06, "loss": 0.5597, "step": 840 }, { "epoch": 0.03088887273784432, "grad_norm": 13.670549392700195, "learning_rate": 3.0743513336725057e-06, "loss": 0.7658, "step": 850 }, { "epoch": 0.03125227124064249, "grad_norm": 3.58305287361145, "learning_rate": 3.1106911839523226e-06, "loss": 0.7036, "step": 860 }, { "epoch": 0.031615669743440655, "grad_norm": 4.119450569152832, "learning_rate": 3.147031034232139e-06, "loss": 0.6842, "step": 870 }, { "epoch": 0.03197906824623883, "grad_norm": 6.412299156188965, "learning_rate": 3.183370884511956e-06, "loss": 0.7148, "step": 880 }, { "epoch": 0.03234246674903699, "grad_norm": 8.700023651123047, "learning_rate": 3.2197107347917726e-06, "loss": 1.4861, "step": 890 }, { "epoch": 0.032705865251835164, "grad_norm": 18.78075408935547, "learning_rate": 3.25605058507159e-06, "loss": 0.7162, "step": 900 }, { "epoch": 0.03306926375463333, "grad_norm": 4.078335762023926, "learning_rate": 3.292390435351406e-06, "loss": 0.643, "step": 910 }, { "epoch": 0.0334326622574315, "grad_norm": 6.603452682495117, "learning_rate": 3.3287302856312234e-06, "loss": 0.6623, "step": 920 }, { "epoch": 0.03379606076022967, "grad_norm": 5.817732334136963, "learning_rate": 3.3650701359110402e-06, "loss": 0.6265, "step": 930 }, { "epoch": 0.03415945926302784, "grad_norm": 8.310086250305176, "learning_rate": 3.4014099861908575e-06, "loss": 0.5343, "step": 940 }, { "epoch": 0.034522857765826004, "grad_norm": NaN, "learning_rate": 3.4341158514426923e-06, "loss": 0.6999, "step": 950 }, { "epoch": 0.034886256268624176, "grad_norm": 4.416926860809326, "learning_rate": 3.4704557017225087e-06, "loss": 0.6688, "step": 960 }, { "epoch": 0.03524965477142234, "grad_norm": 3.2407495975494385, "learning_rate": 3.506795552002326e-06, "loss": 0.5808, "step": 970 }, { "epoch": 0.03561305327422051, "grad_norm": NaN, "learning_rate": 3.539501417254161e-06, "loss": 3.0266, "step": 980 }, { "epoch": 0.03597645177701868, "grad_norm": 8.086112022399902, "learning_rate": 3.575841267533978e-06, "loss": 0.5829, "step": 990 }, { "epoch": 0.03633985027981685, "grad_norm": 252.45077514648438, "learning_rate": 3.612181117813795e-06, "loss": 0.7371, "step": 1000 }, { "epoch": 0.036703248782615015, "grad_norm": 3.5969936847686768, "learning_rate": 3.648520968093612e-06, "loss": 0.6632, "step": 1010 }, { "epoch": 0.03706664728541319, "grad_norm": 3.0116841793060303, "learning_rate": 3.6848608183734285e-06, "loss": 0.537, "step": 1020 }, { "epoch": 0.03743004578821135, "grad_norm": 5.494657039642334, "learning_rate": 3.7212006686532457e-06, "loss": 0.5422, "step": 1030 }, { "epoch": 0.037793444291009524, "grad_norm": 21.526798248291016, "learning_rate": 3.757540518933062e-06, "loss": 0.5003, "step": 1040 }, { "epoch": 0.03815684279380769, "grad_norm": 80.90055084228516, "learning_rate": 3.7938803692128793e-06, "loss": 0.6566, "step": 1050 }, { "epoch": 0.038520241296605855, "grad_norm": 3.7678096294403076, "learning_rate": 3.830220219492696e-06, "loss": 0.5758, "step": 1060 }, { "epoch": 0.03888363979940403, "grad_norm": 4.526616096496582, "learning_rate": 3.866560069772512e-06, "loss": 0.5648, "step": 1070 }, { "epoch": 0.03924703830220219, "grad_norm": 4.571674346923828, "learning_rate": 3.90289992005233e-06, "loss": 0.5864, "step": 1080 }, { "epoch": 0.039610436805000364, "grad_norm": 5.295219421386719, "learning_rate": 3.939239770332146e-06, "loss": 0.4476, "step": 1090 }, { "epoch": 0.03997383530779853, "grad_norm": 16.631162643432617, "learning_rate": 3.975579620611963e-06, "loss": 0.6198, "step": 1100 }, { "epoch": 0.0403372338105967, "grad_norm": 4.685397624969482, "learning_rate": 4.01191947089178e-06, "loss": 0.7512, "step": 1110 }, { "epoch": 0.040700632313394866, "grad_norm": 3.333232879638672, "learning_rate": 4.048259321171597e-06, "loss": 0.5087, "step": 1120 }, { "epoch": 0.04106403081619304, "grad_norm": 5.501911640167236, "learning_rate": 4.084599171451414e-06, "loss": 0.5772, "step": 1130 }, { "epoch": 0.0414274293189912, "grad_norm": 8.066693305969238, "learning_rate": 4.120939021731231e-06, "loss": 0.4641, "step": 1140 }, { "epoch": 0.041790827821789375, "grad_norm": 13.463829040527344, "learning_rate": 4.1572788720110474e-06, "loss": 0.5192, "step": 1150 }, { "epoch": 0.04215422632458754, "grad_norm": 4.132773399353027, "learning_rate": 4.193618722290864e-06, "loss": 0.4696, "step": 1160 }, { "epoch": 0.04251762482738571, "grad_norm": 6.176777362823486, "learning_rate": 4.229958572570681e-06, "loss": 0.4851, "step": 1170 }, { "epoch": 0.04288102333018388, "grad_norm": 8.26610279083252, "learning_rate": 4.266298422850498e-06, "loss": 0.4967, "step": 1180 }, { "epoch": 0.04324442183298205, "grad_norm": 3.9725544452667236, "learning_rate": 4.302638273130315e-06, "loss": 0.431, "step": 1190 }, { "epoch": 0.043607820335780215, "grad_norm": 22.353294372558594, "learning_rate": 4.338978123410132e-06, "loss": 0.6523, "step": 1200 }, { "epoch": 0.043607820335780215, "eval_loss": 0.5945897102355957, "eval_runtime": 180.5168, "eval_samples_per_second": 41.071, "eval_steps_per_second": 5.135, "eval_wer": 0.3718390908925881, "step": 1200 }, { "epoch": 0.04397121883857839, "grad_norm": 3.7954189777374268, "learning_rate": 4.375317973689948e-06, "loss": 0.4511, "step": 1210 }, { "epoch": 0.04433461734137655, "grad_norm": 5.583435535430908, "learning_rate": 4.411657823969766e-06, "loss": 1.4019, "step": 1220 }, { "epoch": 0.044698015844174724, "grad_norm": 8.544243812561035, "learning_rate": 4.447997674249582e-06, "loss": 0.4467, "step": 1230 }, { "epoch": 0.04506141434697289, "grad_norm": 3.8716418743133545, "learning_rate": 4.4843375245293996e-06, "loss": 0.4347, "step": 1240 }, { "epoch": 0.04542481284977106, "grad_norm": 19.459606170654297, "learning_rate": 4.5206773748092155e-06, "loss": 0.616, "step": 1250 }, { "epoch": 0.045788211352569226, "grad_norm": 5.474793434143066, "learning_rate": 4.557017225089033e-06, "loss": 0.4689, "step": 1260 }, { "epoch": 0.0461516098553674, "grad_norm": 4.705495834350586, "learning_rate": 4.593357075368849e-06, "loss": 0.4623, "step": 1270 }, { "epoch": 0.04651500835816556, "grad_norm": 6.779942035675049, "learning_rate": 4.629696925648667e-06, "loss": 0.4418, "step": 1280 }, { "epoch": 0.046878406860963735, "grad_norm": 6.802936553955078, "learning_rate": 4.666036775928484e-06, "loss": 0.4429, "step": 1290 }, { "epoch": 0.0472418053637619, "grad_norm": 17.47754669189453, "learning_rate": 4.7023766262083004e-06, "loss": 0.596, "step": 1300 }, { "epoch": 0.04760520386656007, "grad_norm": 4.036036968231201, "learning_rate": 4.738716476488117e-06, "loss": 0.4362, "step": 1310 }, { "epoch": 0.04796860236935824, "grad_norm": 6.022701740264893, "learning_rate": 4.775056326767934e-06, "loss": 0.5092, "step": 1320 }, { "epoch": 0.04833200087215641, "grad_norm": 5.533923625946045, "learning_rate": 4.811396177047751e-06, "loss": 0.4358, "step": 1330 }, { "epoch": 0.048695399374954575, "grad_norm": 3.4037017822265625, "learning_rate": 4.847736027327568e-06, "loss": 0.3684, "step": 1340 }, { "epoch": 0.04905879787775275, "grad_norm": 13.625974655151367, "learning_rate": 4.8840758776073845e-06, "loss": 0.583, "step": 1350 }, { "epoch": 0.04942219638055091, "grad_norm": 3.597294330596924, "learning_rate": 4.920415727887201e-06, "loss": 0.4561, "step": 1360 }, { "epoch": 0.049785594883349084, "grad_norm": 2.8846936225891113, "learning_rate": 4.956755578167018e-06, "loss": 0.409, "step": 1370 }, { "epoch": 0.05014899338614725, "grad_norm": 5.500187397003174, "learning_rate": 4.993095428446836e-06, "loss": 0.4531, "step": 1380 }, { "epoch": 0.05051239188894542, "grad_norm": 3.1203413009643555, "learning_rate": 5.029435278726652e-06, "loss": 0.4004, "step": 1390 }, { "epoch": 0.050875790391743586, "grad_norm": 98.18115234375, "learning_rate": 5.0657751290064685e-06, "loss": 0.522, "step": 1400 }, { "epoch": 0.05123918889454175, "grad_norm": 3.901418924331665, "learning_rate": 5.102114979286285e-06, "loss": 0.4041, "step": 1410 }, { "epoch": 0.05160258739733992, "grad_norm": 4.045637130737305, "learning_rate": 5.138454829566102e-06, "loss": 0.4051, "step": 1420 }, { "epoch": 0.05196598590013809, "grad_norm": 6.835183143615723, "learning_rate": 5.174794679845919e-06, "loss": 0.4937, "step": 1430 }, { "epoch": 0.05232938440293626, "grad_norm": 7.708272457122803, "learning_rate": 5.211134530125736e-06, "loss": 0.3818, "step": 1440 }, { "epoch": 0.052692782905734425, "grad_norm": 24.2607364654541, "learning_rate": 5.247474380405553e-06, "loss": 0.5445, "step": 1450 }, { "epoch": 0.0530561814085326, "grad_norm": 3.3517005443573, "learning_rate": 5.283814230685369e-06, "loss": 0.4079, "step": 1460 }, { "epoch": 0.05341957991133076, "grad_norm": 12.727778434753418, "learning_rate": 5.320154080965187e-06, "loss": 0.4285, "step": 1470 }, { "epoch": 0.053782978414128935, "grad_norm": 4.984294891357422, "learning_rate": 5.356493931245003e-06, "loss": 0.5006, "step": 1480 }, { "epoch": 0.0541463769169271, "grad_norm": 3.3041558265686035, "learning_rate": 5.392833781524821e-06, "loss": 0.3729, "step": 1490 }, { "epoch": 0.05450977541972527, "grad_norm": 38.074546813964844, "learning_rate": 5.429173631804637e-06, "loss": 0.5401, "step": 1500 }, { "epoch": 0.05487317392252344, "grad_norm": 5.649720668792725, "learning_rate": 5.465513482084454e-06, "loss": 0.3879, "step": 1510 }, { "epoch": 0.05523657242532161, "grad_norm": 3.107583522796631, "learning_rate": 5.501853332364271e-06, "loss": 0.4144, "step": 1520 }, { "epoch": 0.055599970928119774, "grad_norm": 19.246564865112305, "learning_rate": 5.538193182644088e-06, "loss": 0.4314, "step": 1530 }, { "epoch": 0.055963369430917946, "grad_norm": 4.72367525100708, "learning_rate": 5.574533032923905e-06, "loss": 0.3576, "step": 1540 }, { "epoch": 0.05632676793371611, "grad_norm": 25.88886260986328, "learning_rate": 5.6108728832037215e-06, "loss": 0.5385, "step": 1550 }, { "epoch": 0.05669016643651428, "grad_norm": 3.1524956226348877, "learning_rate": 5.647212733483538e-06, "loss": 0.4075, "step": 1560 }, { "epoch": 0.05705356493931245, "grad_norm": 3.883281707763672, "learning_rate": 5.683552583763355e-06, "loss": 0.4242, "step": 1570 }, { "epoch": 0.05741696344211062, "grad_norm": 16.935935974121094, "learning_rate": 5.719892434043172e-06, "loss": 0.6194, "step": 1580 }, { "epoch": 0.057780361944908785, "grad_norm": 4.23909330368042, "learning_rate": 5.756232284322989e-06, "loss": 0.4206, "step": 1590 }, { "epoch": 0.05814376044770696, "grad_norm": 16.6039981842041, "learning_rate": 5.7925721346028056e-06, "loss": 0.4854, "step": 1600 }, { "epoch": 0.05850715895050512, "grad_norm": 2.5220890045166016, "learning_rate": 5.828911984882622e-06, "loss": 0.4186, "step": 1610 }, { "epoch": 0.058870557453303295, "grad_norm": 3.075101613998413, "learning_rate": 5.865251835162439e-06, "loss": 0.8877, "step": 1620 }, { "epoch": 0.05923395595610146, "grad_norm": 5.511383056640625, "learning_rate": 5.901591685442257e-06, "loss": 0.4219, "step": 1630 }, { "epoch": 0.05959735445889963, "grad_norm": 2.9449989795684814, "learning_rate": 5.937931535722073e-06, "loss": 0.2992, "step": 1640 }, { "epoch": 0.0599607529616978, "grad_norm": 31.823612213134766, "learning_rate": 5.9742713860018905e-06, "loss": 0.5939, "step": 1650 }, { "epoch": 0.06032415146449597, "grad_norm": 4.240995407104492, "learning_rate": 6.010611236281706e-06, "loss": 0.4176, "step": 1660 }, { "epoch": 0.060687549967294134, "grad_norm": 2.6084980964660645, "learning_rate": 6.046951086561524e-06, "loss": 0.3542, "step": 1670 }, { "epoch": 0.061050948470092306, "grad_norm": 8.318774223327637, "learning_rate": 6.08329093684134e-06, "loss": 0.3968, "step": 1680 }, { "epoch": 0.06141434697289047, "grad_norm": 5.18604850769043, "learning_rate": 6.119630787121158e-06, "loss": 0.3879, "step": 1690 }, { "epoch": 0.06177774547568864, "grad_norm": 51.732086181640625, "learning_rate": 6.1559706374009745e-06, "loss": 0.5025, "step": 1700 }, { "epoch": 0.06214114397848681, "grad_norm": 2.5876500606536865, "learning_rate": 6.192310487680791e-06, "loss": 0.3558, "step": 1710 }, { "epoch": 0.06250454248128498, "grad_norm": 5.071794033050537, "learning_rate": 6.228650337960608e-06, "loss": 0.3534, "step": 1720 }, { "epoch": 0.06286794098408315, "grad_norm": 11.539891242980957, "learning_rate": 6.264990188240424e-06, "loss": 0.4628, "step": 1730 }, { "epoch": 0.06323133948688131, "grad_norm": 3.275383710861206, "learning_rate": 6.301330038520243e-06, "loss": 0.3368, "step": 1740 }, { "epoch": 0.06359473798967948, "grad_norm": 41.4942741394043, "learning_rate": 6.3376698888000586e-06, "loss": 0.53, "step": 1750 }, { "epoch": 0.06395813649247765, "grad_norm": 3.0071399211883545, "learning_rate": 6.374009739079875e-06, "loss": 0.3623, "step": 1760 }, { "epoch": 0.06432153499527582, "grad_norm": 3.385955333709717, "learning_rate": 6.410349589359692e-06, "loss": 0.3476, "step": 1770 }, { "epoch": 0.06468493349807398, "grad_norm": 3.872527599334717, "learning_rate": 6.446689439639508e-06, "loss": 0.3168, "step": 1780 }, { "epoch": 0.06504833200087215, "grad_norm": 4.668768882751465, "learning_rate": 6.483029289919327e-06, "loss": 0.3813, "step": 1790 }, { "epoch": 0.06541173050367033, "grad_norm": 69.33656311035156, "learning_rate": 6.519369140199143e-06, "loss": 0.4557, "step": 1800 }, { "epoch": 0.06541173050367033, "eval_loss": 0.5579342246055603, "eval_runtime": 180.31, "eval_samples_per_second": 41.118, "eval_steps_per_second": 5.141, "eval_wer": 0.34055221740156477, "step": 1800 }, { "epoch": 0.0657751290064685, "grad_norm": 4.2848381996154785, "learning_rate": 6.555708990478959e-06, "loss": 0.5481, "step": 1810 }, { "epoch": 0.06613852750926666, "grad_norm": 11.31700325012207, "learning_rate": 6.592048840758775e-06, "loss": 0.3479, "step": 1820 }, { "epoch": 0.06650192601206482, "grad_norm": 6.088991165161133, "learning_rate": 6.628388691038594e-06, "loss": 0.3994, "step": 1830 }, { "epoch": 0.066865324514863, "grad_norm": 4.342681407928467, "learning_rate": 6.66472854131841e-06, "loss": 0.2953, "step": 1840 }, { "epoch": 0.06722872301766117, "grad_norm": 75.97467041015625, "learning_rate": 6.701068391598227e-06, "loss": 0.4807, "step": 1850 }, { "epoch": 0.06759212152045933, "grad_norm": 3.8739049434661865, "learning_rate": 6.7374082418780435e-06, "loss": 0.3963, "step": 1860 }, { "epoch": 0.0679555200232575, "grad_norm": 2.6209168434143066, "learning_rate": 6.773748092157861e-06, "loss": 0.422, "step": 1870 }, { "epoch": 0.06831891852605568, "grad_norm": 17.530773162841797, "learning_rate": 6.810087942437678e-06, "loss": 0.3939, "step": 1880 }, { "epoch": 0.06868231702885384, "grad_norm": 3.475748300552368, "learning_rate": 6.846427792717494e-06, "loss": 0.2996, "step": 1890 }, { "epoch": 0.06904571553165201, "grad_norm": 20.979995727539062, "learning_rate": 6.882767642997311e-06, "loss": 0.4528, "step": 1900 }, { "epoch": 0.06940911403445017, "grad_norm": 3.8432774543762207, "learning_rate": 6.919107493277128e-06, "loss": 0.2959, "step": 1910 }, { "epoch": 0.06977251253724835, "grad_norm": 7.830467700958252, "learning_rate": 6.955447343556945e-06, "loss": 0.3378, "step": 1920 }, { "epoch": 0.07013591104004652, "grad_norm": 15.633039474487305, "learning_rate": 6.991787193836762e-06, "loss": 0.4, "step": 1930 }, { "epoch": 0.07049930954284468, "grad_norm": 13.628314971923828, "learning_rate": 7.028127044116578e-06, "loss": 0.3255, "step": 1940 }, { "epoch": 0.07086270804564285, "grad_norm": 33.001773834228516, "learning_rate": 7.064466894396396e-06, "loss": 0.4367, "step": 1950 }, { "epoch": 0.07122610654844103, "grad_norm": 3.5115041732788086, "learning_rate": 7.100806744676212e-06, "loss": 0.3279, "step": 1960 }, { "epoch": 0.07158950505123919, "grad_norm": 3.0497541427612305, "learning_rate": 7.137146594956029e-06, "loss": 2.8797, "step": 1970 }, { "epoch": 0.07195290355403736, "grad_norm": 6.17769718170166, "learning_rate": 7.173486445235845e-06, "loss": 0.3534, "step": 1980 }, { "epoch": 0.07231630205683552, "grad_norm": 5.4114789962768555, "learning_rate": 7.209826295515664e-06, "loss": 0.3309, "step": 1990 }, { "epoch": 0.0726797005596337, "grad_norm": 11.600439071655273, "learning_rate": 7.24616614579548e-06, "loss": 0.4382, "step": 2000 }, { "epoch": 0.07304309906243187, "grad_norm": 3.4476027488708496, "learning_rate": 7.2825059960752965e-06, "loss": 0.3487, "step": 2010 }, { "epoch": 0.07340649756523003, "grad_norm": 5.642564296722412, "learning_rate": 7.318845846355113e-06, "loss": 0.3513, "step": 2020 }, { "epoch": 0.0737698960680282, "grad_norm": 7.132052898406982, "learning_rate": 7.355185696634931e-06, "loss": 0.4564, "step": 2030 }, { "epoch": 0.07413329457082637, "grad_norm": 6.583246231079102, "learning_rate": 7.391525546914748e-06, "loss": 0.3376, "step": 2040 }, { "epoch": 0.07449669307362454, "grad_norm": 23.98805809020996, "learning_rate": 7.427865397194564e-06, "loss": 0.4217, "step": 2050 }, { "epoch": 0.0748600915764227, "grad_norm": 3.9135584831237793, "learning_rate": 7.4642052474743805e-06, "loss": 0.3324, "step": 2060 }, { "epoch": 0.07522349007922087, "grad_norm": 3.4022698402404785, "learning_rate": 7.500545097754198e-06, "loss": 0.3391, "step": 2070 }, { "epoch": 0.07558688858201905, "grad_norm": 8.37547779083252, "learning_rate": 7.536884948034015e-06, "loss": 0.3119, "step": 2080 }, { "epoch": 0.07595028708481721, "grad_norm": 6.2167558670043945, "learning_rate": 7.573224798313831e-06, "loss": 0.3247, "step": 2090 }, { "epoch": 0.07631368558761538, "grad_norm": 81.76036834716797, "learning_rate": 7.609564648593648e-06, "loss": 0.4281, "step": 2100 }, { "epoch": 0.07667708409041354, "grad_norm": 2.8961973190307617, "learning_rate": 7.645904498873465e-06, "loss": 0.3368, "step": 2110 }, { "epoch": 0.07704048259321171, "grad_norm": 4.699477195739746, "learning_rate": 7.682244349153282e-06, "loss": 0.3403, "step": 2120 }, { "epoch": 0.07740388109600989, "grad_norm": 4.429138660430908, "learning_rate": 7.718584199433098e-06, "loss": 0.3182, "step": 2130 }, { "epoch": 0.07776727959880805, "grad_norm": 2.7269580364227295, "learning_rate": 7.754924049712916e-06, "loss": 0.2828, "step": 2140 }, { "epoch": 0.07813067810160622, "grad_norm": 15.126232147216797, "learning_rate": 7.791263899992732e-06, "loss": 0.4606, "step": 2150 }, { "epoch": 0.07849407660440438, "grad_norm": 10.14072322845459, "learning_rate": 7.82760375027255e-06, "loss": 0.3451, "step": 2160 }, { "epoch": 0.07885747510720256, "grad_norm": 4.95914363861084, "learning_rate": 7.863943600552365e-06, "loss": 0.3612, "step": 2170 }, { "epoch": 0.07922087361000073, "grad_norm": 4.115192413330078, "learning_rate": 7.900283450832183e-06, "loss": 0.3222, "step": 2180 }, { "epoch": 0.07958427211279889, "grad_norm": 5.405594825744629, "learning_rate": 7.936623301111999e-06, "loss": 0.3474, "step": 2190 }, { "epoch": 0.07994767061559706, "grad_norm": 23.328718185424805, "learning_rate": 7.972963151391817e-06, "loss": 0.4797, "step": 2200 }, { "epoch": 0.08031106911839524, "grad_norm": 3.5595099925994873, "learning_rate": 8.009303001671634e-06, "loss": 0.3305, "step": 2210 }, { "epoch": 0.0806744676211934, "grad_norm": 3.048445463180542, "learning_rate": 8.04564285195145e-06, "loss": 0.318, "step": 2220 }, { "epoch": 0.08103786612399157, "grad_norm": 5.857702732086182, "learning_rate": 8.081982702231266e-06, "loss": 0.3497, "step": 2230 }, { "epoch": 0.08140126462678973, "grad_norm": 3.0092968940734863, "learning_rate": 8.118322552511084e-06, "loss": 0.2995, "step": 2240 }, { "epoch": 0.08176466312958791, "grad_norm": 9.337843894958496, "learning_rate": 8.154662402790902e-06, "loss": 0.4517, "step": 2250 }, { "epoch": 0.08212806163238608, "grad_norm": 3.136950969696045, "learning_rate": 8.191002253070718e-06, "loss": 0.2927, "step": 2260 }, { "epoch": 0.08249146013518424, "grad_norm": 4.228198051452637, "learning_rate": 8.227342103350534e-06, "loss": 1.2185, "step": 2270 }, { "epoch": 0.0828548586379824, "grad_norm": 7.404679298400879, "learning_rate": 8.263681953630351e-06, "loss": 0.3448, "step": 2280 }, { "epoch": 0.08321825714078059, "grad_norm": 7.873497009277344, "learning_rate": 8.300021803910169e-06, "loss": 0.2965, "step": 2290 }, { "epoch": 0.08358165564357875, "grad_norm": 12.266081809997559, "learning_rate": 8.336361654189985e-06, "loss": 0.4631, "step": 2300 }, { "epoch": 0.08394505414637692, "grad_norm": 3.3576557636260986, "learning_rate": 8.3727015044698e-06, "loss": 0.3339, "step": 2310 }, { "epoch": 0.08430845264917508, "grad_norm": 3.0854902267456055, "learning_rate": 8.40904135474962e-06, "loss": 0.3448, "step": 2320 }, { "epoch": 0.08467185115197326, "grad_norm": 6.1308746337890625, "learning_rate": 8.445381205029436e-06, "loss": 0.386, "step": 2330 }, { "epoch": 0.08503524965477142, "grad_norm": 4.458275318145752, "learning_rate": 8.481721055309252e-06, "loss": 0.2916, "step": 2340 }, { "epoch": 0.08539864815756959, "grad_norm": 25.443647384643555, "learning_rate": 8.51806090558907e-06, "loss": 0.4232, "step": 2350 }, { "epoch": 0.08576204666036776, "grad_norm": 324.4353332519531, "learning_rate": 8.554400755868887e-06, "loss": 2.4995, "step": 2360 }, { "epoch": 0.08612544516316593, "grad_norm": 17.593692779541016, "learning_rate": 8.590740606148703e-06, "loss": 0.2952, "step": 2370 }, { "epoch": 0.0864888436659641, "grad_norm": 3.4646732807159424, "learning_rate": 8.62708045642852e-06, "loss": 0.2961, "step": 2380 }, { "epoch": 0.08685224216876226, "grad_norm": 2.9895999431610107, "learning_rate": 8.663420306708337e-06, "loss": 0.2852, "step": 2390 }, { "epoch": 0.08721564067156043, "grad_norm": 24.221176147460938, "learning_rate": 8.699760156988155e-06, "loss": 0.4343, "step": 2400 }, { "epoch": 0.08721564067156043, "eval_loss": 0.47036415338516235, "eval_runtime": 180.1154, "eval_samples_per_second": 41.162, "eval_steps_per_second": 5.147, "eval_wer": 0.28054713453264835, "step": 2400 }, { "epoch": 0.08757903917435861, "grad_norm": 161.69967651367188, "learning_rate": 8.73610000726797e-06, "loss": 1.4598, "step": 2410 }, { "epoch": 0.08794243767715677, "grad_norm": 10.37559700012207, "learning_rate": 8.772439857547786e-06, "loss": 0.3042, "step": 2420 }, { "epoch": 0.08830583617995494, "grad_norm": 5.90106725692749, "learning_rate": 8.808779707827604e-06, "loss": 0.3385, "step": 2430 }, { "epoch": 0.0886692346827531, "grad_norm": 9.207955360412598, "learning_rate": 8.845119558107422e-06, "loss": 0.2963, "step": 2440 }, { "epoch": 0.08903263318555127, "grad_norm": 22.280956268310547, "learning_rate": 8.881459408387238e-06, "loss": 0.4505, "step": 2450 }, { "epoch": 0.08939603168834945, "grad_norm": 3.090710401535034, "learning_rate": 8.917799258667055e-06, "loss": 0.3114, "step": 2460 }, { "epoch": 0.08975943019114761, "grad_norm": 4.144134044647217, "learning_rate": 8.954139108946871e-06, "loss": 0.2855, "step": 2470 }, { "epoch": 0.09012282869394578, "grad_norm": 4.343112468719482, "learning_rate": 8.990478959226687e-06, "loss": 0.2906, "step": 2480 }, { "epoch": 0.09048622719674394, "grad_norm": 2.6925292015075684, "learning_rate": 9.026818809506505e-06, "loss": 0.284, "step": 2490 }, { "epoch": 0.09084962569954212, "grad_norm": 29.639341354370117, "learning_rate": 9.063158659786323e-06, "loss": 0.3411, "step": 2500 }, { "epoch": 0.09121302420234029, "grad_norm": 4.425374984741211, "learning_rate": 9.099498510066139e-06, "loss": 0.3041, "step": 2510 }, { "epoch": 0.09157642270513845, "grad_norm": 5.6643195152282715, "learning_rate": 9.135838360345955e-06, "loss": 0.3123, "step": 2520 }, { "epoch": 0.09193982120793662, "grad_norm": 3.9098479747772217, "learning_rate": 9.172178210625772e-06, "loss": 0.3664, "step": 2530 }, { "epoch": 0.0923032197107348, "grad_norm": 3.133389949798584, "learning_rate": 9.20851806090559e-06, "loss": 0.2708, "step": 2540 }, { "epoch": 0.09266661821353296, "grad_norm": 43.00468063354492, "learning_rate": 9.244857911185406e-06, "loss": 0.4215, "step": 2550 }, { "epoch": 0.09303001671633113, "grad_norm": 3.1411876678466797, "learning_rate": 9.281197761465222e-06, "loss": 0.2983, "step": 2560 }, { "epoch": 0.09339341521912929, "grad_norm": 3.263828754425049, "learning_rate": 9.317537611745041e-06, "loss": 0.3137, "step": 2570 }, { "epoch": 0.09375681372192747, "grad_norm": 3.618751049041748, "learning_rate": 9.353877462024857e-06, "loss": 0.3279, "step": 2580 }, { "epoch": 0.09412021222472564, "grad_norm": 3.6551568508148193, "learning_rate": 9.390217312304673e-06, "loss": 0.2409, "step": 2590 }, { "epoch": 0.0944836107275238, "grad_norm": 8.680901527404785, "learning_rate": 9.42655716258449e-06, "loss": 0.373, "step": 2600 }, { "epoch": 0.09484700923032197, "grad_norm": 4.761026382446289, "learning_rate": 9.462897012864308e-06, "loss": 0.2777, "step": 2610 }, { "epoch": 0.09521040773312014, "grad_norm": 3.142723321914673, "learning_rate": 9.499236863144124e-06, "loss": 0.2882, "step": 2620 }, { "epoch": 0.09557380623591831, "grad_norm": 2.969968795776367, "learning_rate": 9.53557671342394e-06, "loss": 0.3086, "step": 2630 }, { "epoch": 0.09593720473871648, "grad_norm": 3.754549264907837, "learning_rate": 9.571916563703758e-06, "loss": 0.259, "step": 2640 }, { "epoch": 0.09630060324151464, "grad_norm": 23.7288761138916, "learning_rate": 9.608256413983576e-06, "loss": 0.4284, "step": 2650 }, { "epoch": 0.09666400174431282, "grad_norm": 2.7727372646331787, "learning_rate": 9.644596264263392e-06, "loss": 0.2602, "step": 2660 }, { "epoch": 0.09702740024711098, "grad_norm": 14.707064628601074, "learning_rate": 9.680936114543208e-06, "loss": 0.3059, "step": 2670 }, { "epoch": 0.09739079874990915, "grad_norm": 3.8396642208099365, "learning_rate": 9.717275964823025e-06, "loss": 0.2811, "step": 2680 }, { "epoch": 0.09775419725270731, "grad_norm": 2.9460713863372803, "learning_rate": 9.753615815102843e-06, "loss": 0.2686, "step": 2690 }, { "epoch": 0.0981175957555055, "grad_norm": 20.107336044311523, "learning_rate": 9.789955665382659e-06, "loss": 0.4306, "step": 2700 }, { "epoch": 0.09848099425830366, "grad_norm": 3.1286280155181885, "learning_rate": 9.826295515662477e-06, "loss": 0.3059, "step": 2710 }, { "epoch": 0.09884439276110182, "grad_norm": 6.160215854644775, "learning_rate": 9.862635365942292e-06, "loss": 0.3046, "step": 2720 }, { "epoch": 0.09920779126389999, "grad_norm": 6.1921186447143555, "learning_rate": 9.89897521622211e-06, "loss": 0.285, "step": 2730 }, { "epoch": 0.09957118976669817, "grad_norm": 13.759759902954102, "learning_rate": 9.935315066501926e-06, "loss": 0.2888, "step": 2740 }, { "epoch": 0.09993458826949633, "grad_norm": 13.92764949798584, "learning_rate": 9.971654916781744e-06, "loss": 0.4266, "step": 2750 }, { "epoch": 0.1002979867722945, "grad_norm": 3.3999857902526855, "learning_rate": 1.000799476706156e-05, "loss": 0.2858, "step": 2760 }, { "epoch": 0.10066138527509266, "grad_norm": 4.103928089141846, "learning_rate": 1.0044334617341377e-05, "loss": 0.262, "step": 2770 }, { "epoch": 0.10102478377789084, "grad_norm": 6.15985107421875, "learning_rate": 1.0080674467621195e-05, "loss": 0.2866, "step": 2780 }, { "epoch": 0.10138818228068901, "grad_norm": 4.904097557067871, "learning_rate": 1.0117014317901011e-05, "loss": 0.5057, "step": 2790 }, { "epoch": 0.10175158078348717, "grad_norm": 15.2875337600708, "learning_rate": 1.0153354168180827e-05, "loss": 0.4345, "step": 2800 }, { "epoch": 0.10211497928628534, "grad_norm": 2.4697763919830322, "learning_rate": 1.0189694018460643e-05, "loss": 0.2693, "step": 2810 }, { "epoch": 0.1024783777890835, "grad_norm": 5.04618501663208, "learning_rate": 1.0226033868740462e-05, "loss": 0.2868, "step": 2820 }, { "epoch": 0.10284177629188168, "grad_norm": 5.851120948791504, "learning_rate": 1.0262373719020278e-05, "loss": 0.3425, "step": 2830 }, { "epoch": 0.10320517479467985, "grad_norm": 2.1007258892059326, "learning_rate": 1.0298713569300094e-05, "loss": 0.2394, "step": 2840 }, { "epoch": 0.10356857329747801, "grad_norm": 23.411701202392578, "learning_rate": 1.0335053419579912e-05, "loss": 0.4125, "step": 2850 }, { "epoch": 0.10393197180027618, "grad_norm": 4.178852558135986, "learning_rate": 1.037139326985973e-05, "loss": 0.2951, "step": 2860 }, { "epoch": 0.10429537030307436, "grad_norm": 1.7873708009719849, "learning_rate": 1.0407733120139545e-05, "loss": 0.3272, "step": 2870 }, { "epoch": 0.10465876880587252, "grad_norm": 7.603367328643799, "learning_rate": 1.0444072970419361e-05, "loss": 0.2779, "step": 2880 }, { "epoch": 0.10502216730867069, "grad_norm": 3.468761444091797, "learning_rate": 1.0480412820699179e-05, "loss": 0.3007, "step": 2890 }, { "epoch": 0.10538556581146885, "grad_norm": 16.35407829284668, "learning_rate": 1.0516752670978997e-05, "loss": 0.3918, "step": 2900 }, { "epoch": 0.10574896431426703, "grad_norm": 3.4226725101470947, "learning_rate": 1.0553092521258813e-05, "loss": 3.7156, "step": 2910 }, { "epoch": 0.1061123628170652, "grad_norm": 9.006295204162598, "learning_rate": 1.058943237153863e-05, "loss": 0.4075, "step": 2920 }, { "epoch": 0.10647576131986336, "grad_norm": 4.993385314941406, "learning_rate": 1.0625772221818446e-05, "loss": 0.3588, "step": 2930 }, { "epoch": 0.10683915982266153, "grad_norm": 3.7684736251831055, "learning_rate": 1.0662112072098264e-05, "loss": 0.2429, "step": 2940 }, { "epoch": 0.1072025583254597, "grad_norm": 40.301170349121094, "learning_rate": 1.069845192237808e-05, "loss": 0.4739, "step": 2950 }, { "epoch": 0.10756595682825787, "grad_norm": 3.772693157196045, "learning_rate": 1.0734791772657898e-05, "loss": 0.3284, "step": 2960 }, { "epoch": 0.10792935533105603, "grad_norm": 3.0183212757110596, "learning_rate": 1.0771131622937714e-05, "loss": 0.38, "step": 2970 }, { "epoch": 0.1082927538338542, "grad_norm": 6.61776876449585, "learning_rate": 1.0807471473217531e-05, "loss": 0.2793, "step": 2980 }, { "epoch": 0.10865615233665238, "grad_norm": 6.112472057342529, "learning_rate": 1.0843811323497347e-05, "loss": 0.2447, "step": 2990 }, { "epoch": 0.10901955083945054, "grad_norm": 10.800559997558594, "learning_rate": 1.0880151173777165e-05, "loss": 0.373, "step": 3000 }, { "epoch": 0.10901955083945054, "eval_loss": 0.4652940630912781, "eval_runtime": 180.0765, "eval_samples_per_second": 41.171, "eval_steps_per_second": 5.148, "eval_wer": 0.27681667181004593, "step": 3000 }, { "epoch": 0.10938294934224871, "grad_norm": 7.778831958770752, "learning_rate": 1.091649102405698e-05, "loss": 0.29, "step": 3010 }, { "epoch": 0.10974634784504687, "grad_norm": 2.855592966079712, "learning_rate": 1.0952830874336798e-05, "loss": 0.2411, "step": 3020 }, { "epoch": 0.11010974634784505, "grad_norm": 4.229335784912109, "learning_rate": 1.0989170724616616e-05, "loss": 0.3247, "step": 3030 }, { "epoch": 0.11047314485064322, "grad_norm": 3.8145949840545654, "learning_rate": 1.1025510574896432e-05, "loss": 0.2242, "step": 3040 }, { "epoch": 0.11083654335344138, "grad_norm": 22.571304321289062, "learning_rate": 1.1061850425176248e-05, "loss": 0.3959, "step": 3050 }, { "epoch": 0.11119994185623955, "grad_norm": 2.4706461429595947, "learning_rate": 1.1098190275456066e-05, "loss": 0.2466, "step": 3060 }, { "epoch": 0.11156334035903773, "grad_norm": 4.497069358825684, "learning_rate": 1.1134530125735883e-05, "loss": 2.1968, "step": 3070 }, { "epoch": 0.11192673886183589, "grad_norm": 5.060062885284424, "learning_rate": 1.11708699760157e-05, "loss": 0.2921, "step": 3080 }, { "epoch": 0.11229013736463406, "grad_norm": 2.7882325649261475, "learning_rate": 1.1207209826295515e-05, "loss": 0.2534, "step": 3090 }, { "epoch": 0.11265353586743222, "grad_norm": 9.96241569519043, "learning_rate": 1.1243549676575333e-05, "loss": 0.421, "step": 3100 }, { "epoch": 0.1130169343702304, "grad_norm": 31.262916564941406, "learning_rate": 1.127988952685515e-05, "loss": 0.4048, "step": 3110 }, { "epoch": 0.11338033287302857, "grad_norm": 3.472343921661377, "learning_rate": 1.1316229377134967e-05, "loss": 0.2798, "step": 3120 }, { "epoch": 0.11374373137582673, "grad_norm": 4.074085235595703, "learning_rate": 1.1352569227414783e-05, "loss": 0.299, "step": 3130 }, { "epoch": 0.1141071298786249, "grad_norm": 2.879512310028076, "learning_rate": 1.1388909077694602e-05, "loss": 0.2137, "step": 3140 }, { "epoch": 0.11447052838142306, "grad_norm": 125.17889404296875, "learning_rate": 1.1425248927974418e-05, "loss": 0.5418, "step": 3150 }, { "epoch": 0.11483392688422124, "grad_norm": 4.171487808227539, "learning_rate": 1.1461588778254234e-05, "loss": 0.2685, "step": 3160 }, { "epoch": 0.1151973253870194, "grad_norm": 2.1496529579162598, "learning_rate": 1.1497928628534051e-05, "loss": 0.2421, "step": 3170 }, { "epoch": 0.11556072388981757, "grad_norm": 2.6266047954559326, "learning_rate": 1.1534268478813867e-05, "loss": 0.3288, "step": 3180 }, { "epoch": 0.11592412239261574, "grad_norm": 3.7677230834960938, "learning_rate": 1.1570608329093685e-05, "loss": 0.3093, "step": 3190 }, { "epoch": 0.11628752089541392, "grad_norm": 9.4945707321167, "learning_rate": 1.1606948179373501e-05, "loss": 0.3066, "step": 3200 }, { "epoch": 0.11665091939821208, "grad_norm": 2.5509915351867676, "learning_rate": 1.1643288029653319e-05, "loss": 0.2615, "step": 3210 }, { "epoch": 0.11701431790101025, "grad_norm": 3.066624641418457, "learning_rate": 1.1679627879933135e-05, "loss": 0.3224, "step": 3220 }, { "epoch": 0.11737771640380841, "grad_norm": 6.494440078735352, "learning_rate": 1.1715967730212952e-05, "loss": 0.3017, "step": 3230 }, { "epoch": 0.11774111490660659, "grad_norm": 3.4675605297088623, "learning_rate": 1.1752307580492768e-05, "loss": 0.2152, "step": 3240 }, { "epoch": 0.11810451340940475, "grad_norm": 15.5110445022583, "learning_rate": 1.1788647430772586e-05, "loss": 0.349, "step": 3250 }, { "epoch": 0.11846791191220292, "grad_norm": 1.972530484199524, "learning_rate": 1.1824987281052402e-05, "loss": 0.2728, "step": 3260 }, { "epoch": 0.11883131041500108, "grad_norm": 4.018677711486816, "learning_rate": 1.186132713133222e-05, "loss": 0.254, "step": 3270 }, { "epoch": 0.11919470891779926, "grad_norm": 4.95416784286499, "learning_rate": 1.1897666981612037e-05, "loss": 0.2465, "step": 3280 }, { "epoch": 0.11955810742059743, "grad_norm": 3.165599822998047, "learning_rate": 1.1934006831891853e-05, "loss": 0.2537, "step": 3290 }, { "epoch": 0.1199215059233956, "grad_norm": 8.508636474609375, "learning_rate": 1.1970346682171669e-05, "loss": 0.3655, "step": 3300 }, { "epoch": 0.12028490442619376, "grad_norm": 2.3892879486083984, "learning_rate": 1.2006686532451487e-05, "loss": 0.252, "step": 3310 }, { "epoch": 0.12064830292899194, "grad_norm": 3.591564178466797, "learning_rate": 1.2043026382731304e-05, "loss": 0.2401, "step": 3320 }, { "epoch": 0.1210117014317901, "grad_norm": 3.891261577606201, "learning_rate": 1.207936623301112e-05, "loss": 0.2909, "step": 3330 }, { "epoch": 0.12137509993458827, "grad_norm": 4.691511154174805, "learning_rate": 1.2115706083290936e-05, "loss": 0.2304, "step": 3340 }, { "epoch": 0.12173849843738643, "grad_norm": 18.415170669555664, "learning_rate": 1.2152045933570754e-05, "loss": 0.35, "step": 3350 }, { "epoch": 0.12210189694018461, "grad_norm": 3.9105615615844727, "learning_rate": 1.2188385783850572e-05, "loss": 0.3112, "step": 3360 }, { "epoch": 0.12246529544298278, "grad_norm": 3.215313196182251, "learning_rate": 1.2224725634130388e-05, "loss": 0.2492, "step": 3370 }, { "epoch": 0.12282869394578094, "grad_norm": 9.30749225616455, "learning_rate": 1.2261065484410204e-05, "loss": 0.2696, "step": 3380 }, { "epoch": 0.12319209244857911, "grad_norm": 4.9797682762146, "learning_rate": 1.2297405334690023e-05, "loss": 0.2197, "step": 3390 }, { "epoch": 0.12355549095137729, "grad_norm": 19.632797241210938, "learning_rate": 1.2333745184969839e-05, "loss": 0.3411, "step": 3400 }, { "epoch": 0.12391888945417545, "grad_norm": 4.509830474853516, "learning_rate": 1.2370085035249655e-05, "loss": 0.2394, "step": 3410 }, { "epoch": 0.12428228795697362, "grad_norm": 2.253514051437378, "learning_rate": 1.2406424885529473e-05, "loss": 0.266, "step": 3420 }, { "epoch": 0.12464568645977178, "grad_norm": 3.123828172683716, "learning_rate": 1.244276473580929e-05, "loss": 0.2675, "step": 3430 }, { "epoch": 0.12500908496256996, "grad_norm": 34.37680435180664, "learning_rate": 1.2479104586089106e-05, "loss": 0.279, "step": 3440 }, { "epoch": 0.1253724834653681, "grad_norm": 10.051690101623535, "learning_rate": 1.2515444436368922e-05, "loss": 0.3364, "step": 3450 }, { "epoch": 0.1257358819681663, "grad_norm": 2.1765711307525635, "learning_rate": 1.255178428664874e-05, "loss": 0.2288, "step": 3460 }, { "epoch": 0.12609928047096447, "grad_norm": 2.4910778999328613, "learning_rate": 1.2588124136928556e-05, "loss": 0.2866, "step": 3470 }, { "epoch": 0.12646267897376262, "grad_norm": 7.379613876342773, "learning_rate": 1.2624463987208373e-05, "loss": 0.2618, "step": 3480 }, { "epoch": 0.1268260774765608, "grad_norm": 2.681814432144165, "learning_rate": 1.266080383748819e-05, "loss": 0.2405, "step": 3490 }, { "epoch": 0.12718947597935895, "grad_norm": 83.93474578857422, "learning_rate": 1.2697143687768007e-05, "loss": 0.338, "step": 3500 }, { "epoch": 0.12755287448215713, "grad_norm": 1.5564826726913452, "learning_rate": 1.2733483538047825e-05, "loss": 0.2305, "step": 3510 }, { "epoch": 0.1279162729849553, "grad_norm": 2.6026437282562256, "learning_rate": 1.2769823388327639e-05, "loss": 0.2618, "step": 3520 }, { "epoch": 0.12827967148775346, "grad_norm": 8.228372573852539, "learning_rate": 1.2806163238607458e-05, "loss": 0.2586, "step": 3530 }, { "epoch": 0.12864306999055164, "grad_norm": 2.643139362335205, "learning_rate": 1.2842503088887276e-05, "loss": 0.2197, "step": 3540 }, { "epoch": 0.12900646849334982, "grad_norm": 400.0296325683594, "learning_rate": 1.287884293916709e-05, "loss": 0.3586, "step": 3550 }, { "epoch": 0.12936986699614797, "grad_norm": 1.6349281072616577, "learning_rate": 1.2915182789446908e-05, "loss": 0.2364, "step": 3560 }, { "epoch": 0.12973326549894615, "grad_norm": 2.6573753356933594, "learning_rate": 1.2951522639726724e-05, "loss": 0.2195, "step": 3570 }, { "epoch": 0.1300966640017443, "grad_norm": 4.2721686363220215, "learning_rate": 1.2987862490006542e-05, "loss": 0.3092, "step": 3580 }, { "epoch": 0.13046006250454248, "grad_norm": 2.9982502460479736, "learning_rate": 1.302420234028636e-05, "loss": 0.2826, "step": 3590 }, { "epoch": 0.13082346100734066, "grad_norm": 8.903009414672852, "learning_rate": 1.3060542190566175e-05, "loss": 0.3367, "step": 3600 }, { "epoch": 0.13082346100734066, "eval_loss": 0.4490436017513275, "eval_runtime": 179.8743, "eval_samples_per_second": 41.218, "eval_steps_per_second": 5.154, "eval_wer": 0.2664058670829778, "step": 3600 }, { "epoch": 0.1311868595101388, "grad_norm": 2.9746363162994385, "learning_rate": 1.3096882040845993e-05, "loss": 0.2418, "step": 3610 }, { "epoch": 0.131550258012937, "grad_norm": 2.274872303009033, "learning_rate": 1.313322189112581e-05, "loss": 0.3052, "step": 3620 }, { "epoch": 0.13191365651573517, "grad_norm": 7.114847660064697, "learning_rate": 1.3169561741405625e-05, "loss": 0.2821, "step": 3630 }, { "epoch": 0.13227705501853332, "grad_norm": 3.2101128101348877, "learning_rate": 1.3205901591685444e-05, "loss": 0.2223, "step": 3640 }, { "epoch": 0.1326404535213315, "grad_norm": 18.914968490600586, "learning_rate": 1.3242241441965258e-05, "loss": 0.3809, "step": 3650 }, { "epoch": 0.13300385202412965, "grad_norm": 2.399569272994995, "learning_rate": 1.3278581292245076e-05, "loss": 0.2221, "step": 3660 }, { "epoch": 0.13336725052692783, "grad_norm": 5.76792573928833, "learning_rate": 1.3314921142524894e-05, "loss": 0.2487, "step": 3670 }, { "epoch": 0.133730649029726, "grad_norm": 3.6859967708587646, "learning_rate": 1.335126099280471e-05, "loss": 0.2781, "step": 3680 }, { "epoch": 0.13409404753252416, "grad_norm": 2.9653141498565674, "learning_rate": 1.3387600843084527e-05, "loss": 0.2258, "step": 3690 }, { "epoch": 0.13445744603532234, "grad_norm": 19.170753479003906, "learning_rate": 1.3423940693364345e-05, "loss": 0.3902, "step": 3700 }, { "epoch": 0.13482084453812052, "grad_norm": 2.2880115509033203, "learning_rate": 1.3460280543644161e-05, "loss": 0.2745, "step": 3710 }, { "epoch": 0.13518424304091867, "grad_norm": 2.5196125507354736, "learning_rate": 1.3496620393923979e-05, "loss": 0.2293, "step": 3720 }, { "epoch": 0.13554764154371685, "grad_norm": 3.827986001968384, "learning_rate": 1.3532960244203793e-05, "loss": 0.259, "step": 3730 }, { "epoch": 0.135911040046515, "grad_norm": 3.4211530685424805, "learning_rate": 1.356930009448361e-05, "loss": 0.3256, "step": 3740 }, { "epoch": 0.13627443854931318, "grad_norm": 26.879398345947266, "learning_rate": 1.360563994476343e-05, "loss": 0.3208, "step": 3750 }, { "epoch": 0.13663783705211135, "grad_norm": 2.316091775894165, "learning_rate": 1.3641979795043244e-05, "loss": 0.2316, "step": 3760 }, { "epoch": 0.1370012355549095, "grad_norm": 4.098924160003662, "learning_rate": 1.3678319645323062e-05, "loss": 0.2399, "step": 3770 }, { "epoch": 0.13736463405770769, "grad_norm": 6.9372687339782715, "learning_rate": 1.371465949560288e-05, "loss": 0.2858, "step": 3780 }, { "epoch": 0.13772803256050586, "grad_norm": 2.509535789489746, "learning_rate": 1.3750999345882695e-05, "loss": 0.2113, "step": 3790 }, { "epoch": 0.13809143106330402, "grad_norm": 7.7181077003479, "learning_rate": 1.3787339196162513e-05, "loss": 0.3279, "step": 3800 }, { "epoch": 0.1384548295661022, "grad_norm": 2.6843245029449463, "learning_rate": 1.3823679046442329e-05, "loss": 0.2267, "step": 3810 }, { "epoch": 0.13881822806890035, "grad_norm": 3.05159068107605, "learning_rate": 1.3860018896722147e-05, "loss": 0.229, "step": 3820 }, { "epoch": 0.13918162657169852, "grad_norm": 5.029635429382324, "learning_rate": 1.3896358747001964e-05, "loss": 0.2627, "step": 3830 }, { "epoch": 0.1395450250744967, "grad_norm": 2.8287103176116943, "learning_rate": 1.3932698597281779e-05, "loss": 0.2294, "step": 3840 }, { "epoch": 0.13990842357729485, "grad_norm": 24.862224578857422, "learning_rate": 1.3969038447561598e-05, "loss": 0.3198, "step": 3850 }, { "epoch": 0.14027182208009303, "grad_norm": 5.624647617340088, "learning_rate": 1.4005378297841412e-05, "loss": 0.2641, "step": 3860 }, { "epoch": 0.14063522058289118, "grad_norm": 1.6199389696121216, "learning_rate": 1.404171814812123e-05, "loss": 0.2279, "step": 3870 }, { "epoch": 0.14099861908568936, "grad_norm": 2.864058017730713, "learning_rate": 1.4078057998401047e-05, "loss": 0.2448, "step": 3880 }, { "epoch": 0.14136201758848754, "grad_norm": 3.897899627685547, "learning_rate": 1.4114397848680863e-05, "loss": 0.2438, "step": 3890 }, { "epoch": 0.1417254160912857, "grad_norm": 42.4840087890625, "learning_rate": 1.4150737698960681e-05, "loss": 0.3604, "step": 3900 }, { "epoch": 0.14208881459408387, "grad_norm": 1.6532913446426392, "learning_rate": 1.4187077549240499e-05, "loss": 0.2469, "step": 3910 }, { "epoch": 0.14245221309688205, "grad_norm": 2.3755931854248047, "learning_rate": 1.4223417399520315e-05, "loss": 0.2233, "step": 3920 }, { "epoch": 0.1428156115996802, "grad_norm": 5.866461277008057, "learning_rate": 1.4259757249800132e-05, "loss": 0.2952, "step": 3930 }, { "epoch": 0.14317901010247838, "grad_norm": 3.171570301055908, "learning_rate": 1.4296097100079947e-05, "loss": 0.2329, "step": 3940 }, { "epoch": 0.14354240860527653, "grad_norm": 23.302635192871094, "learning_rate": 1.4332436950359764e-05, "loss": 0.3592, "step": 3950 }, { "epoch": 0.1439058071080747, "grad_norm": 2.3609213829040527, "learning_rate": 1.4368776800639584e-05, "loss": 1.1975, "step": 3960 }, { "epoch": 0.1442692056108729, "grad_norm": 2.857872486114502, "learning_rate": 1.4405116650919398e-05, "loss": 0.265, "step": 3970 }, { "epoch": 0.14463260411367104, "grad_norm": 6.918335914611816, "learning_rate": 1.4441456501199216e-05, "loss": 0.4057, "step": 3980 }, { "epoch": 0.14499600261646922, "grad_norm": 3.8019461631774902, "learning_rate": 1.4477796351479033e-05, "loss": 0.299, "step": 3990 }, { "epoch": 0.1453594011192674, "grad_norm": 30.963428497314453, "learning_rate": 1.451413620175885e-05, "loss": 0.335, "step": 4000 }, { "epoch": 0.14572279962206555, "grad_norm": 2.3968963623046875, "learning_rate": 1.4550476052038667e-05, "loss": 0.224, "step": 4010 }, { "epoch": 0.14608619812486373, "grad_norm": 6.7229485511779785, "learning_rate": 1.4586815902318481e-05, "loss": 0.2657, "step": 4020 }, { "epoch": 0.14644959662766188, "grad_norm": 17.447879791259766, "learning_rate": 1.46231557525983e-05, "loss": 0.2199, "step": 4030 }, { "epoch": 0.14681299513046006, "grad_norm": 2.020756721496582, "learning_rate": 1.4659495602878118e-05, "loss": 0.31, "step": 4040 }, { "epoch": 0.14717639363325824, "grad_norm": 38.28268814086914, "learning_rate": 1.4695835453157932e-05, "loss": 0.3861, "step": 4050 }, { "epoch": 0.1475397921360564, "grad_norm": 2.085073232650757, "learning_rate": 1.473217530343775e-05, "loss": 0.2078, "step": 4060 }, { "epoch": 0.14790319063885457, "grad_norm": 3.453597068786621, "learning_rate": 1.476851515371757e-05, "loss": 0.296, "step": 4070 }, { "epoch": 0.14826658914165275, "grad_norm": 2.3039424419403076, "learning_rate": 1.4804855003997384e-05, "loss": 0.2346, "step": 4080 }, { "epoch": 0.1486299876444509, "grad_norm": 3.217890977859497, "learning_rate": 1.4841194854277201e-05, "loss": 0.2243, "step": 4090 }, { "epoch": 0.14899338614724908, "grad_norm": 12.48748779296875, "learning_rate": 1.4877534704557017e-05, "loss": 0.3378, "step": 4100 }, { "epoch": 0.14935678465004723, "grad_norm": 2.781388282775879, "learning_rate": 1.4913874554836835e-05, "loss": 0.2167, "step": 4110 }, { "epoch": 0.1497201831528454, "grad_norm": 2.564457893371582, "learning_rate": 1.4950214405116653e-05, "loss": 0.2187, "step": 4120 }, { "epoch": 0.1500835816556436, "grad_norm": 9.590895652770996, "learning_rate": 1.4986554255396469e-05, "loss": 0.2444, "step": 4130 }, { "epoch": 0.15044698015844174, "grad_norm": 2.8055028915405273, "learning_rate": 1.5022894105676286e-05, "loss": 0.2499, "step": 4140 }, { "epoch": 0.15081037866123992, "grad_norm": 7.157045364379883, "learning_rate": 1.5059233955956104e-05, "loss": 0.361, "step": 4150 }, { "epoch": 0.1511737771640381, "grad_norm": 3.369006633758545, "learning_rate": 1.5095573806235918e-05, "loss": 0.259, "step": 4160 }, { "epoch": 0.15153717566683625, "grad_norm": 5.334355354309082, "learning_rate": 1.5131913656515736e-05, "loss": 0.2797, "step": 4170 }, { "epoch": 0.15190057416963443, "grad_norm": 6.667120456695557, "learning_rate": 1.5168253506795552e-05, "loss": 0.2678, "step": 4180 }, { "epoch": 0.15226397267243258, "grad_norm": 1.7419887781143188, "learning_rate": 1.520459335707537e-05, "loss": 0.2432, "step": 4190 }, { "epoch": 0.15262737117523076, "grad_norm": 7.022573947906494, "learning_rate": 1.5240933207355187e-05, "loss": 0.2955, "step": 4200 }, { "epoch": 0.15262737117523076, "eval_loss": 0.4361402690410614, "eval_runtime": 180.5933, "eval_samples_per_second": 41.054, "eval_steps_per_second": 5.133, "eval_wer": 0.2589540181894095, "step": 4200 }, { "epoch": 0.15299076967802894, "grad_norm": 3.108078718185425, "learning_rate": 1.5277273057635e-05, "loss": 3.6147, "step": 4210 }, { "epoch": 0.1533541681808271, "grad_norm": 2.6063787937164307, "learning_rate": 1.531361290791482e-05, "loss": 0.232, "step": 4220 }, { "epoch": 0.15371756668362527, "grad_norm": 3.581697463989258, "learning_rate": 1.5349952758194637e-05, "loss": 0.2451, "step": 4230 }, { "epoch": 0.15408096518642342, "grad_norm": 2.5910837650299072, "learning_rate": 1.5386292608474453e-05, "loss": 0.2283, "step": 4240 }, { "epoch": 0.1544443636892216, "grad_norm": 70.38739013671875, "learning_rate": 1.5422632458754272e-05, "loss": 0.396, "step": 4250 }, { "epoch": 0.15480776219201978, "grad_norm": 3.5658187866210938, "learning_rate": 1.5458972309034088e-05, "loss": 0.2116, "step": 4260 }, { "epoch": 0.15517116069481793, "grad_norm": 5.393126487731934, "learning_rate": 1.5495312159313904e-05, "loss": 0.2382, "step": 4270 }, { "epoch": 0.1555345591976161, "grad_norm": 10.135586738586426, "learning_rate": 1.5531652009593723e-05, "loss": 0.2485, "step": 4280 }, { "epoch": 0.15589795770041429, "grad_norm": 2.1143031120300293, "learning_rate": 1.5567991859873536e-05, "loss": 0.1936, "step": 4290 }, { "epoch": 0.15626135620321244, "grad_norm": 20.077383041381836, "learning_rate": 1.5604331710153355e-05, "loss": 0.3818, "step": 4300 }, { "epoch": 0.15662475470601062, "grad_norm": 3.793126344680786, "learning_rate": 1.564067156043317e-05, "loss": 0.2245, "step": 4310 }, { "epoch": 0.15698815320880877, "grad_norm": 3.2057955265045166, "learning_rate": 1.5677011410712987e-05, "loss": 0.2551, "step": 4320 }, { "epoch": 0.15735155171160695, "grad_norm": 5.002716064453125, "learning_rate": 1.5713351260992806e-05, "loss": 0.2951, "step": 4330 }, { "epoch": 0.15771495021440513, "grad_norm": 2.2240726947784424, "learning_rate": 1.5749691111272622e-05, "loss": 0.1993, "step": 4340 }, { "epoch": 0.15807834871720328, "grad_norm": 55.30891036987305, "learning_rate": 1.578603096155244e-05, "loss": 0.2803, "step": 4350 }, { "epoch": 0.15844174722000146, "grad_norm": 1.9186596870422363, "learning_rate": 1.5822370811832258e-05, "loss": 0.2234, "step": 4360 }, { "epoch": 0.15880514572279963, "grad_norm": 1.7817661762237549, "learning_rate": 1.5858710662112074e-05, "loss": 0.2038, "step": 4370 }, { "epoch": 0.15916854422559779, "grad_norm": 3.046330690383911, "learning_rate": 1.589505051239189e-05, "loss": 0.2809, "step": 4380 }, { "epoch": 0.15953194272839596, "grad_norm": 5.43302583694458, "learning_rate": 1.5931390362671706e-05, "loss": 0.1896, "step": 4390 }, { "epoch": 0.15989534123119412, "grad_norm": 12.185855865478516, "learning_rate": 1.596773021295152e-05, "loss": 0.2984, "step": 4400 }, { "epoch": 0.1602587397339923, "grad_norm": 1.9507842063903809, "learning_rate": 1.600407006323134e-05, "loss": 0.2064, "step": 4410 }, { "epoch": 0.16062213823679047, "grad_norm": 4.536543846130371, "learning_rate": 1.6040409913511157e-05, "loss": 0.2433, "step": 4420 }, { "epoch": 0.16098553673958862, "grad_norm": 3.101174831390381, "learning_rate": 1.6076749763790973e-05, "loss": 0.2746, "step": 4430 }, { "epoch": 0.1613489352423868, "grad_norm": 2.2098021507263184, "learning_rate": 1.6113089614070792e-05, "loss": 0.17, "step": 4440 }, { "epoch": 0.16171233374518498, "grad_norm": 59.360809326171875, "learning_rate": 1.6149429464350608e-05, "loss": 0.379, "step": 4450 }, { "epoch": 0.16207573224798313, "grad_norm": 6.364736557006836, "learning_rate": 1.6185769314630424e-05, "loss": 0.2224, "step": 4460 }, { "epoch": 0.1624391307507813, "grad_norm": 3.2455356121063232, "learning_rate": 1.622210916491024e-05, "loss": 0.2195, "step": 4470 }, { "epoch": 0.16280252925357946, "grad_norm": 6.399629592895508, "learning_rate": 1.625844901519006e-05, "loss": 0.266, "step": 4480 }, { "epoch": 0.16316592775637764, "grad_norm": 16.19785499572754, "learning_rate": 1.6294788865469875e-05, "loss": 0.1836, "step": 4490 }, { "epoch": 0.16352932625917582, "grad_norm": 7.909778594970703, "learning_rate": 1.633112871574969e-05, "loss": 0.6016, "step": 4500 }, { "epoch": 0.16389272476197397, "grad_norm": 2.8134663105010986, "learning_rate": 1.636746856602951e-05, "loss": 0.2148, "step": 4510 }, { "epoch": 0.16425612326477215, "grad_norm": 2.667999505996704, "learning_rate": 1.6403808416309327e-05, "loss": 0.2294, "step": 4520 }, { "epoch": 0.1646195217675703, "grad_norm": 3.355242967605591, "learning_rate": 1.6440148266589143e-05, "loss": 0.2097, "step": 4530 }, { "epoch": 0.16498292027036848, "grad_norm": 2.6241908073425293, "learning_rate": 1.647648811686896e-05, "loss": 0.2337, "step": 4540 }, { "epoch": 0.16534631877316666, "grad_norm": 16.759428024291992, "learning_rate": 1.6512827967148775e-05, "loss": 0.2944, "step": 4550 }, { "epoch": 0.1657097172759648, "grad_norm": 3.098898410797119, "learning_rate": 1.6549167817428594e-05, "loss": 0.1895, "step": 4560 }, { "epoch": 0.166073115778763, "grad_norm": 4.042644023895264, "learning_rate": 1.658550766770841e-05, "loss": 0.2369, "step": 4570 }, { "epoch": 0.16643651428156117, "grad_norm": 7.174807548522949, "learning_rate": 1.6621847517988226e-05, "loss": 0.2331, "step": 4580 }, { "epoch": 0.16679991278435932, "grad_norm": 2.1805012226104736, "learning_rate": 1.6658187368268045e-05, "loss": 0.2422, "step": 4590 }, { "epoch": 0.1671633112871575, "grad_norm": 18.097871780395508, "learning_rate": 1.6694527218547858e-05, "loss": 0.347, "step": 4600 }, { "epoch": 0.16752670978995565, "grad_norm": 3.48561429977417, "learning_rate": 1.6730867068827677e-05, "loss": 0.2985, "step": 4610 }, { "epoch": 0.16789010829275383, "grad_norm": 1.7519229650497437, "learning_rate": 1.6767206919107496e-05, "loss": 0.2204, "step": 4620 }, { "epoch": 0.168253506795552, "grad_norm": 3.7641661167144775, "learning_rate": 1.680354676938731e-05, "loss": 0.2348, "step": 4630 }, { "epoch": 0.16861690529835016, "grad_norm": 3.0688085556030273, "learning_rate": 1.683988661966713e-05, "loss": 0.2147, "step": 4640 }, { "epoch": 0.16898030380114834, "grad_norm": 25.845094680786133, "learning_rate": 1.6876226469946944e-05, "loss": 0.3671, "step": 4650 }, { "epoch": 0.16934370230394652, "grad_norm": 2.841994524002075, "learning_rate": 1.691256632022676e-05, "loss": 0.2182, "step": 4660 }, { "epoch": 0.16970710080674467, "grad_norm": 1.0501997470855713, "learning_rate": 1.694890617050658e-05, "loss": 0.1791, "step": 4670 }, { "epoch": 0.17007049930954285, "grad_norm": 3.3973441123962402, "learning_rate": 1.6985246020786392e-05, "loss": 0.3338, "step": 4680 }, { "epoch": 0.170433897812341, "grad_norm": 1.8442267179489136, "learning_rate": 1.702158587106621e-05, "loss": 0.2528, "step": 4690 }, { "epoch": 0.17079729631513918, "grad_norm": 42.373409271240234, "learning_rate": 1.705792572134603e-05, "loss": 0.2892, "step": 4700 }, { "epoch": 0.17116069481793736, "grad_norm": 6.344671726226807, "learning_rate": 1.7094265571625844e-05, "loss": 0.2474, "step": 4710 }, { "epoch": 0.1715240933207355, "grad_norm": 1.6177664995193481, "learning_rate": 1.7130605421905663e-05, "loss": 0.2364, "step": 4720 }, { "epoch": 0.1718874918235337, "grad_norm": 4.98591423034668, "learning_rate": 1.7166945272185482e-05, "loss": 0.2046, "step": 4730 }, { "epoch": 0.17225089032633187, "grad_norm": 7.943169116973877, "learning_rate": 1.7203285122465295e-05, "loss": 0.293, "step": 4740 }, { "epoch": 0.17261428882913002, "grad_norm": 7.402034759521484, "learning_rate": 1.7239624972745114e-05, "loss": 0.2722, "step": 4750 }, { "epoch": 0.1729776873319282, "grad_norm": 13.290019035339355, "learning_rate": 1.727596482302493e-05, "loss": 0.347, "step": 4760 }, { "epoch": 0.17334108583472635, "grad_norm": 1.8591586351394653, "learning_rate": 1.7312304673304746e-05, "loss": 0.2291, "step": 4770 }, { "epoch": 0.17370448433752453, "grad_norm": 2.5220861434936523, "learning_rate": 1.7348644523584565e-05, "loss": 0.2436, "step": 4780 }, { "epoch": 0.1740678828403227, "grad_norm": 1.8692690134048462, "learning_rate": 1.738498437386438e-05, "loss": 0.1782, "step": 4790 }, { "epoch": 0.17443128134312086, "grad_norm": 12.558557510375977, "learning_rate": 1.7421324224144197e-05, "loss": 0.3347, "step": 4800 }, { "epoch": 0.17443128134312086, "eval_loss": 0.4148472547531128, "eval_runtime": 180.0999, "eval_samples_per_second": 41.166, "eval_steps_per_second": 5.147, "eval_wer": 0.23564543358687168, "step": 4800 }, { "epoch": 0.17479467984591904, "grad_norm": 6.168694972991943, "learning_rate": 1.7457664074424017e-05, "loss": 0.2183, "step": 4810 }, { "epoch": 0.17515807834871722, "grad_norm": 5.153416633605957, "learning_rate": 1.749400392470383e-05, "loss": 0.2689, "step": 4820 }, { "epoch": 0.17552147685151537, "grad_norm": 2.8500893115997314, "learning_rate": 1.753034377498365e-05, "loss": 0.2848, "step": 4830 }, { "epoch": 0.17588487535431355, "grad_norm": 17.89117431640625, "learning_rate": 1.7566683625263465e-05, "loss": 0.2539, "step": 4840 }, { "epoch": 0.1762482738571117, "grad_norm": 19.455005645751953, "learning_rate": 1.760302347554328e-05, "loss": 0.3166, "step": 4850 }, { "epoch": 0.17661167235990988, "grad_norm": 1.7975777387619019, "learning_rate": 1.76393633258231e-05, "loss": 0.1927, "step": 4860 }, { "epoch": 0.17697507086270806, "grad_norm": 4.6790690422058105, "learning_rate": 1.7675703176102916e-05, "loss": 0.2248, "step": 4870 }, { "epoch": 0.1773384693655062, "grad_norm": 3.2644243240356445, "learning_rate": 1.7712043026382732e-05, "loss": 0.2239, "step": 4880 }, { "epoch": 0.1777018678683044, "grad_norm": 1.9375410079956055, "learning_rate": 1.7748382876662548e-05, "loss": 0.2053, "step": 4890 }, { "epoch": 0.17806526637110254, "grad_norm": 15.435178756713867, "learning_rate": 1.7784722726942367e-05, "loss": 0.2903, "step": 4900 }, { "epoch": 0.17842866487390072, "grad_norm": 2.486330270767212, "learning_rate": 1.7821062577222183e-05, "loss": 0.2598, "step": 4910 }, { "epoch": 0.1787920633766989, "grad_norm": 2.5542314052581787, "learning_rate": 1.7857402427502e-05, "loss": 0.2305, "step": 4920 }, { "epoch": 0.17915546187949705, "grad_norm": 3.6416103839874268, "learning_rate": 1.7893742277781815e-05, "loss": 1.046, "step": 4930 }, { "epoch": 0.17951886038229523, "grad_norm": 1.9395058155059814, "learning_rate": 1.7930082128061634e-05, "loss": 0.2466, "step": 4940 }, { "epoch": 0.1798822588850934, "grad_norm": 7.664824962615967, "learning_rate": 1.796642197834145e-05, "loss": 0.2871, "step": 4950 }, { "epoch": 0.18024565738789156, "grad_norm": 2.0301320552825928, "learning_rate": 1.8002761828621266e-05, "loss": 0.1996, "step": 4960 }, { "epoch": 0.18060905589068973, "grad_norm": 8.371182441711426, "learning_rate": 1.8039101678901082e-05, "loss": 0.1947, "step": 4970 }, { "epoch": 0.18097245439348789, "grad_norm": 2.6746129989624023, "learning_rate": 1.80754415291809e-05, "loss": 0.2679, "step": 4980 }, { "epoch": 0.18133585289628606, "grad_norm": 3.448202133178711, "learning_rate": 1.8111781379460718e-05, "loss": 0.1859, "step": 4990 }, { "epoch": 0.18169925139908424, "grad_norm": 28.57021141052246, "learning_rate": 1.8148121229740534e-05, "loss": 0.3318, "step": 5000 }, { "epoch": 0.1820626499018824, "grad_norm": 4.731750965118408, "learning_rate": 1.8184461080020353e-05, "loss": 0.2354, "step": 5010 }, { "epoch": 0.18242604840468057, "grad_norm": 1.6815394163131714, "learning_rate": 1.822080093030017e-05, "loss": 0.2075, "step": 5020 }, { "epoch": 0.18278944690747875, "grad_norm": 3.868263006210327, "learning_rate": 1.8257140780579985e-05, "loss": 0.2345, "step": 5030 }, { "epoch": 0.1831528454102769, "grad_norm": 1.964240550994873, "learning_rate": 1.82934806308598e-05, "loss": 0.222, "step": 5040 }, { "epoch": 0.18351624391307508, "grad_norm": 11.881858825683594, "learning_rate": 1.8329820481139617e-05, "loss": 0.3251, "step": 5050 }, { "epoch": 0.18387964241587323, "grad_norm": 1.8463056087493896, "learning_rate": 1.8366160331419436e-05, "loss": 0.2255, "step": 5060 }, { "epoch": 0.1842430409186714, "grad_norm": 2.592672348022461, "learning_rate": 1.8402500181699252e-05, "loss": 0.1904, "step": 5070 }, { "epoch": 0.1846064394214696, "grad_norm": 4.0694074630737305, "learning_rate": 1.8438840031979068e-05, "loss": 0.2, "step": 5080 }, { "epoch": 0.18496983792426774, "grad_norm": 2.101837396621704, "learning_rate": 1.8475179882258887e-05, "loss": 0.1927, "step": 5090 }, { "epoch": 0.18533323642706592, "grad_norm": 22.162702560424805, "learning_rate": 1.8511519732538703e-05, "loss": 0.3481, "step": 5100 }, { "epoch": 0.1856966349298641, "grad_norm": 2.7928340435028076, "learning_rate": 1.854785958281852e-05, "loss": 0.2344, "step": 5110 }, { "epoch": 0.18606003343266225, "grad_norm": 1.8618485927581787, "learning_rate": 1.858419943309834e-05, "loss": 0.2139, "step": 5120 }, { "epoch": 0.18642343193546043, "grad_norm": 2.9611120223999023, "learning_rate": 1.862053928337815e-05, "loss": 0.2194, "step": 5130 }, { "epoch": 0.18678683043825858, "grad_norm": 5.181276321411133, "learning_rate": 1.865687913365797e-05, "loss": 0.2596, "step": 5140 }, { "epoch": 0.18715022894105676, "grad_norm": 10.01041030883789, "learning_rate": 1.8693218983937787e-05, "loss": 0.3122, "step": 5150 }, { "epoch": 0.18751362744385494, "grad_norm": 4.952126979827881, "learning_rate": 1.8729558834217603e-05, "loss": 0.2183, "step": 5160 }, { "epoch": 0.1878770259466531, "grad_norm": 2.19279146194458, "learning_rate": 1.8765898684497422e-05, "loss": 0.2439, "step": 5170 }, { "epoch": 0.18824042444945127, "grad_norm": 3.5189321041107178, "learning_rate": 1.8802238534777238e-05, "loss": 0.2343, "step": 5180 }, { "epoch": 0.18860382295224945, "grad_norm": 2.0936787128448486, "learning_rate": 1.8838578385057054e-05, "loss": 0.1831, "step": 5190 }, { "epoch": 0.1889672214550476, "grad_norm": 12.835061073303223, "learning_rate": 1.8874918235336873e-05, "loss": 0.2561, "step": 5200 }, { "epoch": 0.18933061995784578, "grad_norm": 1.6738308668136597, "learning_rate": 1.8911258085616686e-05, "loss": 1.0257, "step": 5210 }, { "epoch": 0.18969401846064393, "grad_norm": 2.7661142349243164, "learning_rate": 1.8947597935896505e-05, "loss": 0.2398, "step": 5220 }, { "epoch": 0.1900574169634421, "grad_norm": 4.173921585083008, "learning_rate": 1.8983937786176324e-05, "loss": 0.2157, "step": 5230 }, { "epoch": 0.1904208154662403, "grad_norm": 3.7037158012390137, "learning_rate": 1.9020277636456137e-05, "loss": 0.2182, "step": 5240 }, { "epoch": 0.19078421396903844, "grad_norm": 16.288227081298828, "learning_rate": 1.9056617486735956e-05, "loss": 0.2829, "step": 5250 }, { "epoch": 0.19114761247183662, "grad_norm": 2.0504090785980225, "learning_rate": 1.9092957337015772e-05, "loss": 0.201, "step": 5260 }, { "epoch": 0.19151101097463477, "grad_norm": 1.2266415357589722, "learning_rate": 1.9129297187295588e-05, "loss": 0.2072, "step": 5270 }, { "epoch": 0.19187440947743295, "grad_norm": 4.910546779632568, "learning_rate": 1.9165637037575408e-05, "loss": 0.1824, "step": 5280 }, { "epoch": 0.19223780798023113, "grad_norm": 3.093318223953247, "learning_rate": 1.9201976887855224e-05, "loss": 0.2471, "step": 5290 }, { "epoch": 0.19260120648302928, "grad_norm": 6.74167013168335, "learning_rate": 1.923831673813504e-05, "loss": 0.2912, "step": 5300 }, { "epoch": 0.19296460498582746, "grad_norm": 2.0540058612823486, "learning_rate": 1.927465658841486e-05, "loss": 0.2599, "step": 5310 }, { "epoch": 0.19332800348862564, "grad_norm": 2.407750129699707, "learning_rate": 1.931099643869467e-05, "loss": 0.2478, "step": 5320 }, { "epoch": 0.1936914019914238, "grad_norm": 5.479567527770996, "learning_rate": 1.934733628897449e-05, "loss": 0.5936, "step": 5330 }, { "epoch": 0.19405480049422197, "grad_norm": 1.912705659866333, "learning_rate": 1.9383676139254307e-05, "loss": 0.215, "step": 5340 }, { "epoch": 0.19441819899702012, "grad_norm": 38.24689865112305, "learning_rate": 1.9420015989534123e-05, "loss": 0.286, "step": 5350 }, { "epoch": 0.1947815974998183, "grad_norm": 3.4196550846099854, "learning_rate": 1.9456355839813942e-05, "loss": 0.4764, "step": 5360 }, { "epoch": 0.19514499600261648, "grad_norm": 1.705702781677246, "learning_rate": 1.9492695690093758e-05, "loss": 0.195, "step": 5370 }, { "epoch": 0.19550839450541463, "grad_norm": 2.7188572883605957, "learning_rate": 1.9529035540373574e-05, "loss": 0.2318, "step": 5380 }, { "epoch": 0.1958717930082128, "grad_norm": 5.217918872833252, "learning_rate": 1.9565375390653393e-05, "loss": 0.2288, "step": 5390 }, { "epoch": 0.196235191511011, "grad_norm": 7.094780921936035, "learning_rate": 1.960171524093321e-05, "loss": 0.3607, "step": 5400 }, { "epoch": 0.196235191511011, "eval_loss": 0.3953820765018463, "eval_runtime": 180.5214, "eval_samples_per_second": 41.07, "eval_steps_per_second": 5.135, "eval_wer": 0.23525514186650207, "step": 5400 }, { "epoch": 0.19659859001380914, "grad_norm": 2.379298448562622, "learning_rate": 1.9638055091213025e-05, "loss": 0.1981, "step": 5410 }, { "epoch": 0.19696198851660732, "grad_norm": 1.2755372524261475, "learning_rate": 1.967439494149284e-05, "loss": 0.3185, "step": 5420 }, { "epoch": 0.19732538701940547, "grad_norm": 2.6385338306427, "learning_rate": 1.9710734791772657e-05, "loss": 0.2231, "step": 5430 }, { "epoch": 0.19768878552220365, "grad_norm": 4.030337810516357, "learning_rate": 1.9747074642052477e-05, "loss": 0.2417, "step": 5440 }, { "epoch": 0.19805218402500183, "grad_norm": 10.988908767700195, "learning_rate": 1.9783414492332293e-05, "loss": 0.3163, "step": 5450 }, { "epoch": 0.19841558252779998, "grad_norm": 2.8273231983184814, "learning_rate": 1.981975434261211e-05, "loss": 0.2062, "step": 5460 }, { "epoch": 0.19877898103059816, "grad_norm": 1.880952000617981, "learning_rate": 1.9856094192891928e-05, "loss": 0.2103, "step": 5470 }, { "epoch": 0.19914237953339634, "grad_norm": 12.882647514343262, "learning_rate": 1.9892434043171744e-05, "loss": 0.2513, "step": 5480 }, { "epoch": 0.1995057780361945, "grad_norm": 2.8202428817749023, "learning_rate": 1.992877389345156e-05, "loss": 0.2002, "step": 5490 }, { "epoch": 0.19986917653899267, "grad_norm": 11.30123519897461, "learning_rate": 1.9965113743731376e-05, "loss": 0.3399, "step": 5500 }, { "epoch": 0.20023257504179082, "grad_norm": 3.016954183578491, "learning_rate": 2.0001453594011195e-05, "loss": 0.2016, "step": 5510 }, { "epoch": 0.200595973544589, "grad_norm": 1.3506131172180176, "learning_rate": 2.003779344429101e-05, "loss": 0.6008, "step": 5520 }, { "epoch": 0.20095937204738717, "grad_norm": 3.711284637451172, "learning_rate": 2.0074133294570827e-05, "loss": 0.2297, "step": 5530 }, { "epoch": 0.20132277055018533, "grad_norm": 2.8310322761535645, "learning_rate": 2.0110473144850643e-05, "loss": 0.19, "step": 5540 }, { "epoch": 0.2016861690529835, "grad_norm": 14.37038516998291, "learning_rate": 2.0146812995130462e-05, "loss": 0.3418, "step": 5550 }, { "epoch": 0.20204956755578168, "grad_norm": 2.037245988845825, "learning_rate": 2.0183152845410278e-05, "loss": 0.2054, "step": 5560 }, { "epoch": 0.20241296605857984, "grad_norm": 2.47495698928833, "learning_rate": 2.0219492695690094e-05, "loss": 0.2102, "step": 5570 }, { "epoch": 0.20277636456137801, "grad_norm": 5.948564529418945, "learning_rate": 2.025583254596991e-05, "loss": 0.2299, "step": 5580 }, { "epoch": 0.20313976306417617, "grad_norm": 2.010765552520752, "learning_rate": 2.029217239624973e-05, "loss": 0.2214, "step": 5590 }, { "epoch": 0.20350316156697434, "grad_norm": 109.07927703857422, "learning_rate": 2.0328512246529546e-05, "loss": 0.327, "step": 5600 }, { "epoch": 0.20386656006977252, "grad_norm": 2.708141565322876, "learning_rate": 2.036485209680936e-05, "loss": 0.2128, "step": 5610 }, { "epoch": 0.20422995857257067, "grad_norm": 4.145051002502441, "learning_rate": 2.040119194708918e-05, "loss": 1.5499, "step": 5620 }, { "epoch": 0.20459335707536885, "grad_norm": 5.204433917999268, "learning_rate": 2.0437531797368993e-05, "loss": 0.2238, "step": 5630 }, { "epoch": 0.204956755578167, "grad_norm": 3.625671625137329, "learning_rate": 2.0473871647648813e-05, "loss": 0.2009, "step": 5640 }, { "epoch": 0.20532015408096518, "grad_norm": 7.134413719177246, "learning_rate": 2.051021149792863e-05, "loss": 0.3236, "step": 5650 }, { "epoch": 0.20568355258376336, "grad_norm": 3.090585708618164, "learning_rate": 2.0546551348208445e-05, "loss": 0.2245, "step": 5660 }, { "epoch": 0.20604695108656151, "grad_norm": 1.5290725231170654, "learning_rate": 2.0582891198488264e-05, "loss": 0.9725, "step": 5670 }, { "epoch": 0.2064103495893597, "grad_norm": 12.433088302612305, "learning_rate": 2.061923104876808e-05, "loss": 0.2755, "step": 5680 }, { "epoch": 0.20677374809215787, "grad_norm": 4.399518013000488, "learning_rate": 2.0655570899047896e-05, "loss": 0.2136, "step": 5690 }, { "epoch": 0.20713714659495602, "grad_norm": 12.662751197814941, "learning_rate": 2.0691910749327715e-05, "loss": 0.3022, "step": 5700 }, { "epoch": 0.2075005450977542, "grad_norm": 1.8056265115737915, "learning_rate": 2.0728250599607528e-05, "loss": 0.3538, "step": 5710 }, { "epoch": 0.20786394360055235, "grad_norm": 1.3133045434951782, "learning_rate": 2.0764590449887347e-05, "loss": 0.1829, "step": 5720 }, { "epoch": 0.20822734210335053, "grad_norm": 6.10534143447876, "learning_rate": 2.0800930300167167e-05, "loss": 0.2819, "step": 5730 }, { "epoch": 0.2085907406061487, "grad_norm": 4.327618598937988, "learning_rate": 2.083727015044698e-05, "loss": 0.2029, "step": 5740 }, { "epoch": 0.20895413910894686, "grad_norm": 6.878536224365234, "learning_rate": 2.08736100007268e-05, "loss": 0.3301, "step": 5750 }, { "epoch": 0.20931753761174504, "grad_norm": 2.8301913738250732, "learning_rate": 2.0909949851006614e-05, "loss": 0.2144, "step": 5760 }, { "epoch": 0.20968093611454322, "grad_norm": 2.248054265975952, "learning_rate": 2.094628970128643e-05, "loss": 0.2046, "step": 5770 }, { "epoch": 0.21004433461734137, "grad_norm": 4.619300842285156, "learning_rate": 2.098262955156625e-05, "loss": 0.2487, "step": 5780 }, { "epoch": 0.21040773312013955, "grad_norm": 2.6446404457092285, "learning_rate": 2.1018969401846066e-05, "loss": 0.2222, "step": 5790 }, { "epoch": 0.2107711316229377, "grad_norm": 7.827177047729492, "learning_rate": 2.1055309252125882e-05, "loss": 0.2684, "step": 5800 }, { "epoch": 0.21113453012573588, "grad_norm": 5.37054967880249, "learning_rate": 2.10916491024057e-05, "loss": 0.216, "step": 5810 }, { "epoch": 0.21149792862853406, "grad_norm": 1.5430680513381958, "learning_rate": 2.1127988952685514e-05, "loss": 0.1723, "step": 5820 }, { "epoch": 0.2118613271313322, "grad_norm": 4.355040550231934, "learning_rate": 2.1164328802965333e-05, "loss": 0.3078, "step": 5830 }, { "epoch": 0.2122247256341304, "grad_norm": 2.70613169670105, "learning_rate": 2.1200668653245152e-05, "loss": 0.1857, "step": 5840 }, { "epoch": 0.21258812413692857, "grad_norm": 17.876861572265625, "learning_rate": 2.1237008503524965e-05, "loss": 0.335, "step": 5850 }, { "epoch": 0.21295152263972672, "grad_norm": 2.048499822616577, "learning_rate": 2.1273348353804784e-05, "loss": 0.2588, "step": 5860 }, { "epoch": 0.2133149211425249, "grad_norm": 2.2033607959747314, "learning_rate": 2.13096882040846e-05, "loss": 0.1973, "step": 5870 }, { "epoch": 0.21367831964532305, "grad_norm": 5.563814640045166, "learning_rate": 2.1346028054364416e-05, "loss": 0.2632, "step": 5880 }, { "epoch": 0.21404171814812123, "grad_norm": 1.4629203081130981, "learning_rate": 2.1382367904644236e-05, "loss": 0.1714, "step": 5890 }, { "epoch": 0.2144051166509194, "grad_norm": 9.641836166381836, "learning_rate": 2.141870775492405e-05, "loss": 0.3329, "step": 5900 }, { "epoch": 0.21476851515371756, "grad_norm": 3.0128610134124756, "learning_rate": 2.1455047605203867e-05, "loss": 0.205, "step": 5910 }, { "epoch": 0.21513191365651574, "grad_norm": 6.38659143447876, "learning_rate": 2.1491387455483687e-05, "loss": 0.2066, "step": 5920 }, { "epoch": 0.2154953121593139, "grad_norm": 3.397566080093384, "learning_rate": 2.15277273057635e-05, "loss": 0.2682, "step": 5930 }, { "epoch": 0.21585871066211207, "grad_norm": 1.8110759258270264, "learning_rate": 2.156406715604332e-05, "loss": 0.2357, "step": 5940 }, { "epoch": 0.21622210916491025, "grad_norm": 12.391556739807129, "learning_rate": 2.1600407006323135e-05, "loss": 0.3043, "step": 5950 }, { "epoch": 0.2165855076677084, "grad_norm": 1.8203914165496826, "learning_rate": 2.163674685660295e-05, "loss": 0.2979, "step": 5960 }, { "epoch": 0.21694890617050658, "grad_norm": 3.362252950668335, "learning_rate": 2.167308670688277e-05, "loss": 0.1667, "step": 5970 }, { "epoch": 0.21731230467330476, "grad_norm": 4.1468000411987305, "learning_rate": 2.1709426557162586e-05, "loss": 0.3419, "step": 5980 }, { "epoch": 0.2176757031761029, "grad_norm": 2.479288339614868, "learning_rate": 2.1745766407442402e-05, "loss": 0.1938, "step": 5990 }, { "epoch": 0.2180391016789011, "grad_norm": 26.185468673706055, "learning_rate": 2.1782106257722218e-05, "loss": 0.2818, "step": 6000 }, { "epoch": 0.2180391016789011, "eval_loss": 0.4106527864933014, "eval_runtime": 179.9044, "eval_samples_per_second": 41.211, "eval_steps_per_second": 5.153, "eval_wer": 0.2305625646704304, "step": 6000 }, { "epoch": 0.21840250018169924, "grad_norm": 2.2452592849731445, "learning_rate": 2.1818446108002037e-05, "loss": 0.2208, "step": 6010 }, { "epoch": 0.21876589868449742, "grad_norm": 2.273920774459839, "learning_rate": 2.1854785958281853e-05, "loss": 0.2268, "step": 6020 }, { "epoch": 0.2191292971872956, "grad_norm": 1.9621226787567139, "learning_rate": 2.189112580856167e-05, "loss": 0.1965, "step": 6030 }, { "epoch": 0.21949269569009375, "grad_norm": 2.866110324859619, "learning_rate": 2.1927465658841485e-05, "loss": 0.223, "step": 6040 }, { "epoch": 0.21985609419289193, "grad_norm": 15.169930458068848, "learning_rate": 2.1963805509121305e-05, "loss": 0.254, "step": 6050 }, { "epoch": 0.2202194926956901, "grad_norm": 2.174626350402832, "learning_rate": 2.200014535940112e-05, "loss": 0.2056, "step": 6060 }, { "epoch": 0.22058289119848826, "grad_norm": 1.9627354145050049, "learning_rate": 2.2036485209680936e-05, "loss": 0.2211, "step": 6070 }, { "epoch": 0.22094628970128644, "grad_norm": 5.444493770599365, "learning_rate": 2.2072825059960752e-05, "loss": 0.2819, "step": 6080 }, { "epoch": 0.2213096882040846, "grad_norm": 2.5131990909576416, "learning_rate": 2.2109164910240572e-05, "loss": 0.2262, "step": 6090 }, { "epoch": 0.22167308670688277, "grad_norm": 15.716779708862305, "learning_rate": 2.2145504760520388e-05, "loss": 0.2833, "step": 6100 }, { "epoch": 0.22203648520968094, "grad_norm": 1.7514111995697021, "learning_rate": 2.2181844610800204e-05, "loss": 0.2238, "step": 6110 }, { "epoch": 0.2223998837124791, "grad_norm": 1.8236886262893677, "learning_rate": 2.2218184461080023e-05, "loss": 0.1872, "step": 6120 }, { "epoch": 0.22276328221527728, "grad_norm": 4.081092834472656, "learning_rate": 2.225452431135984e-05, "loss": 0.4926, "step": 6130 }, { "epoch": 0.22312668071807545, "grad_norm": 3.3254685401916504, "learning_rate": 2.2290864161639655e-05, "loss": 0.2179, "step": 6140 }, { "epoch": 0.2234900792208736, "grad_norm": 9.953665733337402, "learning_rate": 2.232720401191947e-05, "loss": 0.3221, "step": 6150 }, { "epoch": 0.22385347772367178, "grad_norm": 3.531538724899292, "learning_rate": 2.2363543862199287e-05, "loss": 0.3639, "step": 6160 }, { "epoch": 0.22421687622646994, "grad_norm": 1.6166915893554688, "learning_rate": 2.2399883712479106e-05, "loss": 0.1906, "step": 6170 }, { "epoch": 0.22458027472926811, "grad_norm": 3.0561792850494385, "learning_rate": 2.2436223562758922e-05, "loss": 0.222, "step": 6180 }, { "epoch": 0.2249436732320663, "grad_norm": 7.607283115386963, "learning_rate": 2.2472563413038738e-05, "loss": 0.1769, "step": 6190 }, { "epoch": 0.22530707173486444, "grad_norm": 38.86745834350586, "learning_rate": 2.2508903263318557e-05, "loss": 0.3523, "step": 6200 }, { "epoch": 0.22567047023766262, "grad_norm": 1.2490432262420654, "learning_rate": 2.2545243113598373e-05, "loss": 0.2241, "step": 6210 }, { "epoch": 0.2260338687404608, "grad_norm": 3.8632936477661133, "learning_rate": 2.258158296387819e-05, "loss": 0.2761, "step": 6220 }, { "epoch": 0.22639726724325895, "grad_norm": 6.057976722717285, "learning_rate": 2.261792281415801e-05, "loss": 0.2534, "step": 6230 }, { "epoch": 0.22676066574605713, "grad_norm": 5.2983551025390625, "learning_rate": 2.265426266443782e-05, "loss": 0.1972, "step": 6240 }, { "epoch": 0.22712406424885528, "grad_norm": 7.395950794219971, "learning_rate": 2.269060251471764e-05, "loss": 0.3446, "step": 6250 }, { "epoch": 0.22748746275165346, "grad_norm": 2.7409260272979736, "learning_rate": 2.2726942364997457e-05, "loss": 0.1894, "step": 6260 }, { "epoch": 0.22785086125445164, "grad_norm": 1.7545270919799805, "learning_rate": 2.2763282215277273e-05, "loss": 0.2376, "step": 6270 }, { "epoch": 0.2282142597572498, "grad_norm": 112.10614013671875, "learning_rate": 2.2799622065557092e-05, "loss": 2.0322, "step": 6280 }, { "epoch": 0.22857765826004797, "grad_norm": 3.6547396183013916, "learning_rate": 2.2835961915836908e-05, "loss": 0.2942, "step": 6290 }, { "epoch": 0.22894105676284612, "grad_norm": 50.726261138916016, "learning_rate": 2.2872301766116724e-05, "loss": 0.3279, "step": 6300 }, { "epoch": 0.2293044552656443, "grad_norm": 1.2374241352081299, "learning_rate": 2.2908641616396543e-05, "loss": 0.1912, "step": 6310 }, { "epoch": 0.22966785376844248, "grad_norm": 1.6278152465820312, "learning_rate": 2.2944981466676356e-05, "loss": 0.1913, "step": 6320 }, { "epoch": 0.23003125227124063, "grad_norm": 7.58544397354126, "learning_rate": 2.2981321316956175e-05, "loss": 0.2393, "step": 6330 }, { "epoch": 0.2303946507740388, "grad_norm": 1.7094483375549316, "learning_rate": 2.3017661167235995e-05, "loss": 0.2333, "step": 6340 }, { "epoch": 0.230758049276837, "grad_norm": 24.214885711669922, "learning_rate": 2.3054001017515807e-05, "loss": 0.3019, "step": 6350 }, { "epoch": 0.23112144777963514, "grad_norm": 1.962106704711914, "learning_rate": 2.3090340867795626e-05, "loss": 0.8948, "step": 6360 }, { "epoch": 0.23148484628243332, "grad_norm": 1.3703123331069946, "learning_rate": 2.3126680718075442e-05, "loss": 0.1936, "step": 6370 }, { "epoch": 0.23184824478523147, "grad_norm": 7.507201194763184, "learning_rate": 2.316302056835526e-05, "loss": 0.2185, "step": 6380 }, { "epoch": 0.23221164328802965, "grad_norm": 2.6310977935791016, "learning_rate": 2.3199360418635078e-05, "loss": 0.1961, "step": 6390 }, { "epoch": 0.23257504179082783, "grad_norm": 4.186092376708984, "learning_rate": 2.3235700268914894e-05, "loss": 0.2734, "step": 6400 }, { "epoch": 0.23293844029362598, "grad_norm": 1.817269206047058, "learning_rate": 2.327204011919471e-05, "loss": 0.1966, "step": 6410 }, { "epoch": 0.23330183879642416, "grad_norm": 1.9503989219665527, "learning_rate": 2.330837996947453e-05, "loss": 2.7438, "step": 6420 }, { "epoch": 0.23366523729922234, "grad_norm": 3.1107656955718994, "learning_rate": 2.334471981975434e-05, "loss": 0.2534, "step": 6430 }, { "epoch": 0.2340286358020205, "grad_norm": 5.268273830413818, "learning_rate": 2.338105967003416e-05, "loss": 0.1963, "step": 6440 }, { "epoch": 0.23439203430481867, "grad_norm": 9.586852073669434, "learning_rate": 2.3417399520313977e-05, "loss": 0.2342, "step": 6450 }, { "epoch": 0.23475543280761682, "grad_norm": 3.0218632221221924, "learning_rate": 2.3453739370593793e-05, "loss": 0.231, "step": 6460 }, { "epoch": 0.235118831310415, "grad_norm": 1.9708057641983032, "learning_rate": 2.3490079220873612e-05, "loss": 0.2156, "step": 6470 }, { "epoch": 0.23548222981321318, "grad_norm": 3.6212944984436035, "learning_rate": 2.3526419071153428e-05, "loss": 0.2172, "step": 6480 }, { "epoch": 0.23584562831601133, "grad_norm": 2.5205702781677246, "learning_rate": 2.3562758921433244e-05, "loss": 0.4643, "step": 6490 }, { "epoch": 0.2362090268188095, "grad_norm": 4.1570305824279785, "learning_rate": 2.3599098771713063e-05, "loss": 0.2722, "step": 6500 }, { "epoch": 0.2365724253216077, "grad_norm": 1.8376798629760742, "learning_rate": 2.363543862199288e-05, "loss": 0.2027, "step": 6510 }, { "epoch": 0.23693582382440584, "grad_norm": 2.0464930534362793, "learning_rate": 2.3671778472272695e-05, "loss": 0.183, "step": 6520 }, { "epoch": 0.23729922232720402, "grad_norm": 4.8776469230651855, "learning_rate": 2.370811832255251e-05, "loss": 0.2169, "step": 6530 }, { "epoch": 0.23766262083000217, "grad_norm": 1.5764952898025513, "learning_rate": 2.3744458172832327e-05, "loss": 0.1917, "step": 6540 }, { "epoch": 0.23802601933280035, "grad_norm": 16.132232666015625, "learning_rate": 2.3780798023112147e-05, "loss": 0.2732, "step": 6550 }, { "epoch": 0.23838941783559853, "grad_norm": 8.105748176574707, "learning_rate": 2.3817137873391963e-05, "loss": 0.2055, "step": 6560 }, { "epoch": 0.23875281633839668, "grad_norm": 2.087362051010132, "learning_rate": 2.385347772367178e-05, "loss": 0.186, "step": 6570 }, { "epoch": 0.23911621484119486, "grad_norm": 2.8280205726623535, "learning_rate": 2.3889817573951598e-05, "loss": 0.201, "step": 6580 }, { "epoch": 0.23947961334399304, "grad_norm": 1.2525794506072998, "learning_rate": 2.3926157424231414e-05, "loss": 0.1893, "step": 6590 }, { "epoch": 0.2398430118467912, "grad_norm": 23.419832229614258, "learning_rate": 2.396249727451123e-05, "loss": 0.2554, "step": 6600 }, { "epoch": 0.2398430118467912, "eval_loss": 0.4065987765789032, "eval_runtime": 179.638, "eval_samples_per_second": 41.272, "eval_steps_per_second": 5.16, "eval_wer": 0.24529380797647357, "step": 6600 }, { "epoch": 0.24020641034958937, "grad_norm": 1.3757339715957642, "learning_rate": 2.3998837124791046e-05, "loss": 0.1962, "step": 6610 }, { "epoch": 0.24056980885238752, "grad_norm": 4.00860071182251, "learning_rate": 2.4035176975070865e-05, "loss": 0.1848, "step": 6620 }, { "epoch": 0.2409332073551857, "grad_norm": 5.544015407562256, "learning_rate": 2.407151682535068e-05, "loss": 0.245, "step": 6630 }, { "epoch": 0.24129660585798388, "grad_norm": 1.0618844032287598, "learning_rate": 2.4107856675630497e-05, "loss": 0.191, "step": 6640 }, { "epoch": 0.24166000436078203, "grad_norm": 125.15505981445312, "learning_rate": 2.4144196525910313e-05, "loss": 0.3055, "step": 6650 }, { "epoch": 0.2420234028635802, "grad_norm": 5.015167713165283, "learning_rate": 2.418053637619013e-05, "loss": 0.2701, "step": 6660 }, { "epoch": 0.24238680136637836, "grad_norm": 3.944514274597168, "learning_rate": 2.421687622646995e-05, "loss": 0.2107, "step": 6670 }, { "epoch": 0.24275019986917654, "grad_norm": 3.1539418697357178, "learning_rate": 2.4253216076749764e-05, "loss": 0.232, "step": 6680 }, { "epoch": 0.24311359837197472, "grad_norm": 2.980459213256836, "learning_rate": 2.428955592702958e-05, "loss": 0.2391, "step": 6690 }, { "epoch": 0.24347699687477287, "grad_norm": 35.02157211303711, "learning_rate": 2.43258957773094e-05, "loss": 0.3172, "step": 6700 }, { "epoch": 0.24384039537757105, "grad_norm": 1.606570839881897, "learning_rate": 2.4362235627589216e-05, "loss": 1.5707, "step": 6710 }, { "epoch": 0.24420379388036922, "grad_norm": 3.940394401550293, "learning_rate": 2.439857547786903e-05, "loss": 0.1969, "step": 6720 }, { "epoch": 0.24456719238316738, "grad_norm": 3.8990156650543213, "learning_rate": 2.443491532814885e-05, "loss": 0.2475, "step": 6730 }, { "epoch": 0.24493059088596555, "grad_norm": 2.523500442504883, "learning_rate": 2.4471255178428664e-05, "loss": 0.194, "step": 6740 }, { "epoch": 0.2452939893887637, "grad_norm": 4.920846939086914, "learning_rate": 2.4507595028708483e-05, "loss": 0.2417, "step": 6750 }, { "epoch": 0.24565738789156188, "grad_norm": 2.2269723415374756, "learning_rate": 2.4543934878988302e-05, "loss": 0.2148, "step": 6760 }, { "epoch": 0.24602078639436006, "grad_norm": 1.669722557067871, "learning_rate": 2.4580274729268115e-05, "loss": 0.1979, "step": 6770 }, { "epoch": 0.24638418489715821, "grad_norm": 4.581501007080078, "learning_rate": 2.4616614579547934e-05, "loss": 0.2412, "step": 6780 }, { "epoch": 0.2467475833999564, "grad_norm": 2.6605944633483887, "learning_rate": 2.465295442982775e-05, "loss": 0.1992, "step": 6790 }, { "epoch": 0.24711098190275457, "grad_norm": 7.089646816253662, "learning_rate": 2.4689294280107566e-05, "loss": 0.2789, "step": 6800 }, { "epoch": 0.24747438040555272, "grad_norm": 1.9901385307312012, "learning_rate": 2.4725634130387385e-05, "loss": 1.907, "step": 6810 }, { "epoch": 0.2478377789083509, "grad_norm": 2.5120224952697754, "learning_rate": 2.4761973980667198e-05, "loss": 0.1908, "step": 6820 }, { "epoch": 0.24820117741114905, "grad_norm": 1.553806185722351, "learning_rate": 2.4794679845919035e-05, "loss": 1.6707, "step": 6830 }, { "epoch": 0.24856457591394723, "grad_norm": 2.130095958709717, "learning_rate": 2.4831019696198855e-05, "loss": 0.222, "step": 6840 }, { "epoch": 0.2489279744167454, "grad_norm": 15.832701683044434, "learning_rate": 2.486735954647867e-05, "loss": 0.4634, "step": 6850 }, { "epoch": 0.24929137291954356, "grad_norm": 1.87086820602417, "learning_rate": 2.4903699396758487e-05, "loss": 0.1887, "step": 6860 }, { "epoch": 0.24965477142234174, "grad_norm": 2.32084584236145, "learning_rate": 2.4940039247038303e-05, "loss": 0.1881, "step": 6870 }, { "epoch": 0.2500181699251399, "grad_norm": 3.3228461742401123, "learning_rate": 2.497637909731812e-05, "loss": 0.264, "step": 6880 }, { "epoch": 0.2503815684279381, "grad_norm": 1.8676607608795166, "learning_rate": 2.5012718947597935e-05, "loss": 0.2102, "step": 6890 }, { "epoch": 0.2507449669307362, "grad_norm": 17.540319442749023, "learning_rate": 2.5049058797877757e-05, "loss": 0.2567, "step": 6900 }, { "epoch": 0.25110836543353443, "grad_norm": 1.6276856660842896, "learning_rate": 2.508539864815757e-05, "loss": 0.1917, "step": 6910 }, { "epoch": 0.2514717639363326, "grad_norm": 2.347691059112549, "learning_rate": 2.5121738498437386e-05, "loss": 0.1998, "step": 6920 }, { "epoch": 0.25183516243913073, "grad_norm": 3.5337650775909424, "learning_rate": 2.5158078348717205e-05, "loss": 0.2418, "step": 6930 }, { "epoch": 0.25219856094192894, "grad_norm": 3.7415404319763184, "learning_rate": 2.519441819899702e-05, "loss": 0.2074, "step": 6940 }, { "epoch": 0.2525619594447271, "grad_norm": 16.603042602539062, "learning_rate": 2.5230758049276837e-05, "loss": 0.3104, "step": 6950 }, { "epoch": 0.25292535794752524, "grad_norm": 1.4864579439163208, "learning_rate": 2.5267097899556656e-05, "loss": 0.1771, "step": 6960 }, { "epoch": 0.25328875645032345, "grad_norm": 1.7935876846313477, "learning_rate": 2.5303437749836472e-05, "loss": 0.1984, "step": 6970 }, { "epoch": 0.2536521549531216, "grad_norm": 3.187351942062378, "learning_rate": 2.533977760011629e-05, "loss": 0.1828, "step": 6980 }, { "epoch": 0.25401555345591975, "grad_norm": 1.7930549383163452, "learning_rate": 2.5376117450396104e-05, "loss": 0.2132, "step": 6990 }, { "epoch": 0.2543789519587179, "grad_norm": 4.86196231842041, "learning_rate": 2.5412457300675924e-05, "loss": 0.2426, "step": 7000 }, { "epoch": 0.2547423504615161, "grad_norm": 2.784335136413574, "learning_rate": 2.544879715095574e-05, "loss": 1.6557, "step": 7010 }, { "epoch": 0.25510574896431426, "grad_norm": 1.460509181022644, "learning_rate": 2.5485137001235552e-05, "loss": 0.1812, "step": 7020 }, { "epoch": 0.2554691474671124, "grad_norm": 2.5204946994781494, "learning_rate": 2.5521476851515375e-05, "loss": 0.3731, "step": 7030 }, { "epoch": 0.2558325459699106, "grad_norm": 1.6122281551361084, "learning_rate": 2.555781670179519e-05, "loss": 0.2256, "step": 7040 }, { "epoch": 0.25619594447270877, "grad_norm": 8.13974666595459, "learning_rate": 2.5594156552075004e-05, "loss": 0.2756, "step": 7050 }, { "epoch": 0.2565593429755069, "grad_norm": 2.1560494899749756, "learning_rate": 2.5630496402354826e-05, "loss": 0.1869, "step": 7060 }, { "epoch": 0.25692274147830513, "grad_norm": 2.938570737838745, "learning_rate": 2.5666836252634642e-05, "loss": 0.187, "step": 7070 }, { "epoch": 0.2572861399811033, "grad_norm": 1.6697754859924316, "learning_rate": 2.5703176102914455e-05, "loss": 0.1841, "step": 7080 }, { "epoch": 0.25764953848390143, "grad_norm": 2.500377655029297, "learning_rate": 2.5739515953194278e-05, "loss": 0.4097, "step": 7090 }, { "epoch": 0.25801293698669964, "grad_norm": 6.614553928375244, "learning_rate": 2.577585580347409e-05, "loss": 0.2779, "step": 7100 }, { "epoch": 0.2583763354894978, "grad_norm": 2.1538803577423096, "learning_rate": 2.5812195653753906e-05, "loss": 0.2035, "step": 7110 }, { "epoch": 0.25873973399229594, "grad_norm": 2.64719820022583, "learning_rate": 2.584853550403373e-05, "loss": 0.1815, "step": 7120 }, { "epoch": 0.25910313249509415, "grad_norm": 4.064308166503906, "learning_rate": 2.588487535431354e-05, "loss": 0.2115, "step": 7130 }, { "epoch": 0.2594665309978923, "grad_norm": 4.535513877868652, "learning_rate": 2.5921215204593357e-05, "loss": 0.1733, "step": 7140 }, { "epoch": 0.25982992950069045, "grad_norm": 14.761083602905273, "learning_rate": 2.5957555054873173e-05, "loss": 0.3061, "step": 7150 }, { "epoch": 0.2601933280034886, "grad_norm": 2.902010202407837, "learning_rate": 2.5993894905152993e-05, "loss": 0.2539, "step": 7160 }, { "epoch": 0.2605567265062868, "grad_norm": 2.6499462127685547, "learning_rate": 2.603023475543281e-05, "loss": 0.209, "step": 7170 }, { "epoch": 0.26092012500908496, "grad_norm": 2.0298879146575928, "learning_rate": 2.6066574605712625e-05, "loss": 0.1966, "step": 7180 }, { "epoch": 0.2612835235118831, "grad_norm": 5.285839080810547, "learning_rate": 2.6102914455992444e-05, "loss": 0.2416, "step": 7190 }, { "epoch": 0.2616469220146813, "grad_norm": 14.89932918548584, "learning_rate": 2.613925430627226e-05, "loss": 0.2649, "step": 7200 }, { "epoch": 0.2616469220146813, "eval_loss": 0.43822312355041504, "eval_runtime": 180.398, "eval_samples_per_second": 41.098, "eval_steps_per_second": 5.139, "eval_wer": 0.23023580881151634, "step": 7200 }, { "epoch": 0.26201032051747947, "grad_norm": 2.9772818088531494, "learning_rate": 2.6175594156552076e-05, "loss": 0.2158, "step": 7210 }, { "epoch": 0.2623737190202776, "grad_norm": 1.4703949689865112, "learning_rate": 2.6211934006831895e-05, "loss": 0.1925, "step": 7220 }, { "epoch": 0.2627371175230758, "grad_norm": 2.6034176349639893, "learning_rate": 2.624827385711171e-05, "loss": 0.2065, "step": 7230 }, { "epoch": 0.263100516025874, "grad_norm": 2.8392562866210938, "learning_rate": 2.6284613707391527e-05, "loss": 0.2097, "step": 7240 }, { "epoch": 0.2634639145286721, "grad_norm": 8.892645835876465, "learning_rate": 2.6320953557671347e-05, "loss": 0.2835, "step": 7250 }, { "epoch": 0.26382731303147033, "grad_norm": 1.616268277168274, "learning_rate": 2.6357293407951162e-05, "loss": 0.1875, "step": 7260 }, { "epoch": 0.2641907115342685, "grad_norm": 2.1791138648986816, "learning_rate": 2.6393633258230975e-05, "loss": 0.1722, "step": 7270 }, { "epoch": 0.26455411003706664, "grad_norm": 2.8691608905792236, "learning_rate": 2.642997310851079e-05, "loss": 0.2377, "step": 7280 }, { "epoch": 0.26491750853986484, "grad_norm": 1.5673551559448242, "learning_rate": 2.6466312958790614e-05, "loss": 0.4404, "step": 7290 }, { "epoch": 0.265280907042663, "grad_norm": 7.296738147735596, "learning_rate": 2.6502652809070426e-05, "loss": 0.3198, "step": 7300 }, { "epoch": 0.26564430554546115, "grad_norm": 6.389322757720947, "learning_rate": 2.6538992659350242e-05, "loss": 0.2041, "step": 7310 }, { "epoch": 0.2660077040482593, "grad_norm": 11.64201831817627, "learning_rate": 2.657533250963006e-05, "loss": 0.2014, "step": 7320 }, { "epoch": 0.2663711025510575, "grad_norm": 4.454049587249756, "learning_rate": 2.6611672359909878e-05, "loss": 0.2295, "step": 7330 }, { "epoch": 0.26673450105385565, "grad_norm": 2.091968297958374, "learning_rate": 2.6648012210189694e-05, "loss": 0.1784, "step": 7340 }, { "epoch": 0.2670978995566538, "grad_norm": 6.904966354370117, "learning_rate": 2.6684352060469513e-05, "loss": 0.3303, "step": 7350 }, { "epoch": 0.267461298059452, "grad_norm": 1.6893994808197021, "learning_rate": 2.672069191074933e-05, "loss": 0.2534, "step": 7360 }, { "epoch": 0.26782469656225016, "grad_norm": 1.3456122875213623, "learning_rate": 2.6757031761029145e-05, "loss": 0.1829, "step": 7370 }, { "epoch": 0.2681880950650483, "grad_norm": 7.959611892700195, "learning_rate": 2.6793371611308964e-05, "loss": 0.2425, "step": 7380 }, { "epoch": 0.2685514935678465, "grad_norm": 1.5833840370178223, "learning_rate": 2.682971146158878e-05, "loss": 0.1988, "step": 7390 }, { "epoch": 0.2689148920706447, "grad_norm": 19.886600494384766, "learning_rate": 2.6866051311868596e-05, "loss": 0.3563, "step": 7400 }, { "epoch": 0.2692782905734428, "grad_norm": 2.55553936958313, "learning_rate": 2.6902391162148415e-05, "loss": 0.1857, "step": 7410 }, { "epoch": 0.26964168907624103, "grad_norm": 2.125661849975586, "learning_rate": 2.693873101242823e-05, "loss": 0.7398, "step": 7420 }, { "epoch": 0.2700050875790392, "grad_norm": 2.577770233154297, "learning_rate": 2.6975070862708047e-05, "loss": 0.5703, "step": 7430 }, { "epoch": 0.27036848608183733, "grad_norm": 2.3848683834075928, "learning_rate": 2.701141071298786e-05, "loss": 0.173, "step": 7440 }, { "epoch": 0.2707318845846355, "grad_norm": 22.96078109741211, "learning_rate": 2.7047750563267683e-05, "loss": 0.293, "step": 7450 }, { "epoch": 0.2710952830874337, "grad_norm": 3.206329822540283, "learning_rate": 2.70840904135475e-05, "loss": 0.4585, "step": 7460 }, { "epoch": 0.27145868159023184, "grad_norm": 2.251904010772705, "learning_rate": 2.712043026382731e-05, "loss": 0.2196, "step": 7470 }, { "epoch": 0.27182208009303, "grad_norm": 3.7445387840270996, "learning_rate": 2.7156770114107134e-05, "loss": 0.2195, "step": 7480 }, { "epoch": 0.2721854785958282, "grad_norm": 1.5370314121246338, "learning_rate": 2.7193109964386947e-05, "loss": 0.2007, "step": 7490 }, { "epoch": 0.27254887709862635, "grad_norm": 18.44324493408203, "learning_rate": 2.7229449814666763e-05, "loss": 0.3091, "step": 7500 }, { "epoch": 0.2729122756014245, "grad_norm": 1.5792795419692993, "learning_rate": 2.7265789664946585e-05, "loss": 0.1601, "step": 7510 }, { "epoch": 0.2732756741042227, "grad_norm": 9.128384590148926, "learning_rate": 2.7302129515226398e-05, "loss": 0.178, "step": 7520 }, { "epoch": 0.27363907260702086, "grad_norm": 2.2285592555999756, "learning_rate": 2.7338469365506214e-05, "loss": 2.4074, "step": 7530 }, { "epoch": 0.274002471109819, "grad_norm": 2.2741541862487793, "learning_rate": 2.7374809215786033e-05, "loss": 0.246, "step": 7540 }, { "epoch": 0.2743658696126172, "grad_norm": 17.185470581054688, "learning_rate": 2.741114906606585e-05, "loss": 0.2577, "step": 7550 }, { "epoch": 0.27472926811541537, "grad_norm": 1.1907752752304077, "learning_rate": 2.7447488916345665e-05, "loss": 0.2073, "step": 7560 }, { "epoch": 0.2750926666182135, "grad_norm": 3.535682201385498, "learning_rate": 2.748382876662548e-05, "loss": 0.2012, "step": 7570 }, { "epoch": 0.27545606512101173, "grad_norm": 3.585460662841797, "learning_rate": 2.75201686169053e-05, "loss": 0.2147, "step": 7580 }, { "epoch": 0.2758194636238099, "grad_norm": 1.9034504890441895, "learning_rate": 2.7556508467185116e-05, "loss": 0.1626, "step": 7590 }, { "epoch": 0.27618286212660803, "grad_norm": 39.66155242919922, "learning_rate": 2.7592848317464932e-05, "loss": 0.2617, "step": 7600 }, { "epoch": 0.2765462606294062, "grad_norm": 1.5698285102844238, "learning_rate": 2.762918816774475e-05, "loss": 0.3136, "step": 7610 }, { "epoch": 0.2769096591322044, "grad_norm": 2.4866106510162354, "learning_rate": 2.7665528018024568e-05, "loss": 0.1971, "step": 7620 }, { "epoch": 0.27727305763500254, "grad_norm": 9.244050025939941, "learning_rate": 2.7701867868304384e-05, "loss": 0.2025, "step": 7630 }, { "epoch": 0.2776364561378007, "grad_norm": 2.1344380378723145, "learning_rate": 2.7738207718584203e-05, "loss": 0.2055, "step": 7640 }, { "epoch": 0.2779998546405989, "grad_norm": 13.503227233886719, "learning_rate": 2.777454756886402e-05, "loss": 0.2671, "step": 7650 }, { "epoch": 0.27836325314339705, "grad_norm": 2.238834857940674, "learning_rate": 2.781088741914383e-05, "loss": 0.1714, "step": 7660 }, { "epoch": 0.2787266516461952, "grad_norm": 0.897280216217041, "learning_rate": 2.7847227269423654e-05, "loss": 0.1615, "step": 7670 }, { "epoch": 0.2790900501489934, "grad_norm": 5.808285713195801, "learning_rate": 2.788356711970347e-05, "loss": 0.2052, "step": 7680 }, { "epoch": 0.27945344865179156, "grad_norm": 1.8924663066864014, "learning_rate": 2.7919906969983283e-05, "loss": 0.1769, "step": 7690 }, { "epoch": 0.2798168471545897, "grad_norm": 11.939653396606445, "learning_rate": 2.7956246820263105e-05, "loss": 0.2859, "step": 7700 }, { "epoch": 0.2801802456573879, "grad_norm": 2.5077621936798096, "learning_rate": 2.7992586670542918e-05, "loss": 0.1767, "step": 7710 }, { "epoch": 0.28054364416018607, "grad_norm": 2.0336718559265137, "learning_rate": 2.8028926520822734e-05, "loss": 0.6757, "step": 7720 }, { "epoch": 0.2809070426629842, "grad_norm": 3.9547739028930664, "learning_rate": 2.806526637110255e-05, "loss": 0.2322, "step": 7730 }, { "epoch": 0.28127044116578237, "grad_norm": 1.8082466125488281, "learning_rate": 2.810160622138237e-05, "loss": 0.1758, "step": 7740 }, { "epoch": 0.2816338396685806, "grad_norm": 16.173986434936523, "learning_rate": 2.8137946071662185e-05, "loss": 0.2642, "step": 7750 }, { "epoch": 0.28199723817137873, "grad_norm": 3.341475486755371, "learning_rate": 2.8174285921942e-05, "loss": 3.4407, "step": 7760 }, { "epoch": 0.2823606366741769, "grad_norm": 1.7220288515090942, "learning_rate": 2.821062577222182e-05, "loss": 0.1965, "step": 7770 }, { "epoch": 0.2827240351769751, "grad_norm": 3.8534610271453857, "learning_rate": 2.8246965622501637e-05, "loss": 0.1966, "step": 7780 }, { "epoch": 0.28308743367977324, "grad_norm": 1.962780475616455, "learning_rate": 2.8283305472781453e-05, "loss": 0.1859, "step": 7790 }, { "epoch": 0.2834508321825714, "grad_norm": 40.28166961669922, "learning_rate": 2.8319645323061272e-05, "loss": 0.6588, "step": 7800 }, { "epoch": 0.2834508321825714, "eval_loss": 0.42970865964889526, "eval_runtime": 180.6321, "eval_samples_per_second": 41.045, "eval_steps_per_second": 5.132, "eval_wer": 0.2413455080145951, "step": 7800 }, { "epoch": 0.2838142306853696, "grad_norm": 1.748349666595459, "learning_rate": 2.8355985173341088e-05, "loss": 0.1786, "step": 7810 }, { "epoch": 0.28417762918816775, "grad_norm": 2.1137237548828125, "learning_rate": 2.8392325023620904e-05, "loss": 0.1803, "step": 7820 }, { "epoch": 0.2845410276909659, "grad_norm": 1.59931218624115, "learning_rate": 2.8428664873900723e-05, "loss": 0.2107, "step": 7830 }, { "epoch": 0.2849044261937641, "grad_norm": 2.263493061065674, "learning_rate": 2.846500472418054e-05, "loss": 0.1967, "step": 7840 }, { "epoch": 0.28526782469656226, "grad_norm": 20.798656463623047, "learning_rate": 2.8501344574460355e-05, "loss": 0.268, "step": 7850 }, { "epoch": 0.2856312231993604, "grad_norm": 3.0182480812072754, "learning_rate": 2.8537684424740168e-05, "loss": 0.1901, "step": 7860 }, { "epoch": 0.2859946217021586, "grad_norm": 6.6378493309021, "learning_rate": 2.857402427501999e-05, "loss": 0.1804, "step": 7870 }, { "epoch": 0.28635802020495676, "grad_norm": 2.5524067878723145, "learning_rate": 2.8610364125299803e-05, "loss": 0.233, "step": 7880 }, { "epoch": 0.2867214187077549, "grad_norm": 2.6409335136413574, "learning_rate": 2.864670397557962e-05, "loss": 0.1717, "step": 7890 }, { "epoch": 0.28708481721055307, "grad_norm": 6.834221363067627, "learning_rate": 2.868304382585944e-05, "loss": 0.2956, "step": 7900 }, { "epoch": 0.2874482157133513, "grad_norm": 2.760669708251953, "learning_rate": 2.8719383676139254e-05, "loss": 0.1789, "step": 7910 }, { "epoch": 0.2878116142161494, "grad_norm": 1.7543925046920776, "learning_rate": 2.875572352641907e-05, "loss": 0.2041, "step": 7920 }, { "epoch": 0.2881750127189476, "grad_norm": 4.784151077270508, "learning_rate": 2.879206337669889e-05, "loss": 0.2259, "step": 7930 }, { "epoch": 0.2885384112217458, "grad_norm": 2.1769356727600098, "learning_rate": 2.8828403226978706e-05, "loss": 0.2023, "step": 7940 }, { "epoch": 0.28890180972454393, "grad_norm": 9.373051643371582, "learning_rate": 2.886474307725852e-05, "loss": 0.3511, "step": 7950 }, { "epoch": 0.2892652082273421, "grad_norm": 1.895190715789795, "learning_rate": 2.890108292753834e-05, "loss": 0.1976, "step": 7960 }, { "epoch": 0.2896286067301403, "grad_norm": 3.4400076866149902, "learning_rate": 2.8937422777818157e-05, "loss": 0.1902, "step": 7970 }, { "epoch": 0.28999200523293844, "grad_norm": 9.663911819458008, "learning_rate": 2.8973762628097973e-05, "loss": 0.2551, "step": 7980 }, { "epoch": 0.2903554037357366, "grad_norm": 5.1054463386535645, "learning_rate": 2.9010102478377792e-05, "loss": 0.2001, "step": 7990 }, { "epoch": 0.2907188022385348, "grad_norm": 9.06143569946289, "learning_rate": 2.9046442328657608e-05, "loss": 0.2266, "step": 8000 }, { "epoch": 0.29108220074133295, "grad_norm": 1.604077696800232, "learning_rate": 2.9082782178937424e-05, "loss": 0.1883, "step": 8010 }, { "epoch": 0.2914455992441311, "grad_norm": 2.245687246322632, "learning_rate": 2.911912202921724e-05, "loss": 0.2093, "step": 8020 }, { "epoch": 0.29180899774692926, "grad_norm": 3.8099372386932373, "learning_rate": 2.915546187949706e-05, "loss": 0.2283, "step": 8030 }, { "epoch": 0.29217239624972746, "grad_norm": 2.135115623474121, "learning_rate": 2.9191801729776875e-05, "loss": 0.2369, "step": 8040 }, { "epoch": 0.2925357947525256, "grad_norm": 5.596993446350098, "learning_rate": 2.9228141580056688e-05, "loss": 0.2709, "step": 8050 }, { "epoch": 0.29289919325532376, "grad_norm": 1.3212496042251587, "learning_rate": 2.926448143033651e-05, "loss": 0.1968, "step": 8060 }, { "epoch": 0.29326259175812197, "grad_norm": 1.9241231679916382, "learning_rate": 2.9300821280616327e-05, "loss": 0.3883, "step": 8070 }, { "epoch": 0.2936259902609201, "grad_norm": 4.008016109466553, "learning_rate": 2.933716113089614e-05, "loss": 0.2074, "step": 8080 }, { "epoch": 0.2939893887637183, "grad_norm": 1.5871399641036987, "learning_rate": 2.9373500981175962e-05, "loss": 0.1698, "step": 8090 }, { "epoch": 0.2943527872665165, "grad_norm": 19.480670928955078, "learning_rate": 2.9409840831455774e-05, "loss": 0.4023, "step": 8100 }, { "epoch": 0.29471618576931463, "grad_norm": 3.8420443534851074, "learning_rate": 2.944618068173559e-05, "loss": 0.181, "step": 8110 }, { "epoch": 0.2950795842721128, "grad_norm": 1.9951499700546265, "learning_rate": 2.9482520532015413e-05, "loss": 0.2872, "step": 8120 }, { "epoch": 0.295442982774911, "grad_norm": 4.958978176116943, "learning_rate": 2.9518860382295226e-05, "loss": 0.2359, "step": 8130 }, { "epoch": 0.29580638127770914, "grad_norm": 1.5531708002090454, "learning_rate": 2.9555200232575042e-05, "loss": 0.2138, "step": 8140 }, { "epoch": 0.2961697797805073, "grad_norm": 5.297884941101074, "learning_rate": 2.9591540082854864e-05, "loss": 0.2694, "step": 8150 }, { "epoch": 0.2965331782833055, "grad_norm": 1.5989892482757568, "learning_rate": 2.9627879933134677e-05, "loss": 0.1686, "step": 8160 }, { "epoch": 0.29689657678610365, "grad_norm": 3.347722291946411, "learning_rate": 2.9664219783414493e-05, "loss": 0.2206, "step": 8170 }, { "epoch": 0.2972599752889018, "grad_norm": 2.9551491737365723, "learning_rate": 2.970055963369431e-05, "loss": 0.2274, "step": 8180 }, { "epoch": 0.29762337379169995, "grad_norm": 2.527963638305664, "learning_rate": 2.973689948397413e-05, "loss": 0.1731, "step": 8190 }, { "epoch": 0.29798677229449816, "grad_norm": 5.818012714385986, "learning_rate": 2.9773239334253944e-05, "loss": 0.265, "step": 8200 }, { "epoch": 0.2983501707972963, "grad_norm": 1.5580624341964722, "learning_rate": 2.980594519950578e-05, "loss": 2.627, "step": 8210 }, { "epoch": 0.29871356930009446, "grad_norm": 1.6011282205581665, "learning_rate": 2.9842285049785594e-05, "loss": 0.1811, "step": 8220 }, { "epoch": 0.29907696780289267, "grad_norm": 44.825157165527344, "learning_rate": 2.987862490006541e-05, "loss": 0.4799, "step": 8230 }, { "epoch": 0.2994403663056908, "grad_norm": 1.520982027053833, "learning_rate": 2.991496475034523e-05, "loss": 0.1935, "step": 8240 }, { "epoch": 0.29980376480848897, "grad_norm": 6.3379058837890625, "learning_rate": 2.9951304600625046e-05, "loss": 0.2435, "step": 8250 }, { "epoch": 0.3001671633112872, "grad_norm": 2.2493958473205566, "learning_rate": 2.998764445090486e-05, "loss": 0.1984, "step": 8260 }, { "epoch": 0.30053056181408533, "grad_norm": 3.234196186065674, "learning_rate": 3.002398430118468e-05, "loss": 0.1785, "step": 8270 }, { "epoch": 0.3008939603168835, "grad_norm": 4.99449348449707, "learning_rate": 3.0060324151464497e-05, "loss": 0.1888, "step": 8280 }, { "epoch": 0.3012573588196817, "grad_norm": 1.8624048233032227, "learning_rate": 3.0096664001744313e-05, "loss": 1.6561, "step": 8290 }, { "epoch": 0.30162075732247984, "grad_norm": 7.615640640258789, "learning_rate": 3.0133003852024132e-05, "loss": 0.2918, "step": 8300 }, { "epoch": 0.301984155825278, "grad_norm": 1.6900697946548462, "learning_rate": 3.0169343702303948e-05, "loss": 0.2255, "step": 8310 }, { "epoch": 0.3023475543280762, "grad_norm": 2.2034566402435303, "learning_rate": 3.0205683552583764e-05, "loss": 0.198, "step": 8320 }, { "epoch": 0.30271095283087435, "grad_norm": 2.044597625732422, "learning_rate": 3.0242023402863583e-05, "loss": 0.1946, "step": 8330 }, { "epoch": 0.3030743513336725, "grad_norm": 1.6171079874038696, "learning_rate": 3.02783632531434e-05, "loss": 0.1935, "step": 8340 }, { "epoch": 0.30343774983647065, "grad_norm": 2.8435897827148438, "learning_rate": 3.0314703103423215e-05, "loss": 0.3876, "step": 8350 }, { "epoch": 0.30380114833926886, "grad_norm": 2.023019552230835, "learning_rate": 3.0351042953703035e-05, "loss": 0.1879, "step": 8360 }, { "epoch": 0.304164546842067, "grad_norm": 1.7610963582992554, "learning_rate": 3.038738280398285e-05, "loss": 0.1901, "step": 8370 }, { "epoch": 0.30452794534486516, "grad_norm": 1.9482131004333496, "learning_rate": 3.0423722654262667e-05, "loss": 0.2119, "step": 8380 }, { "epoch": 0.30489134384766337, "grad_norm": 1.6463958024978638, "learning_rate": 3.046006250454248e-05, "loss": 0.2067, "step": 8390 }, { "epoch": 0.3052547423504615, "grad_norm": 10.607688903808594, "learning_rate": 3.0496402354822302e-05, "loss": 0.2709, "step": 8400 }, { "epoch": 0.3052547423504615, "eval_loss": 0.3912598192691803, "eval_runtime": 179.9461, "eval_samples_per_second": 41.201, "eval_steps_per_second": 5.152, "eval_wer": 0.22865648882676493, "step": 8400 }, { "epoch": 0.30561814085325967, "grad_norm": 5.675121307373047, "learning_rate": 3.053274220510212e-05, "loss": 0.1937, "step": 8410 }, { "epoch": 0.3059815393560579, "grad_norm": 1.9001195430755615, "learning_rate": 3.056908205538193e-05, "loss": 0.1668, "step": 8420 }, { "epoch": 0.306344937858856, "grad_norm": 6.807525157928467, "learning_rate": 3.060542190566175e-05, "loss": 0.2077, "step": 8430 }, { "epoch": 0.3067083363616542, "grad_norm": 2.067265272140503, "learning_rate": 3.064176175594157e-05, "loss": 0.1596, "step": 8440 }, { "epoch": 0.3070717348644524, "grad_norm": 15.267791748046875, "learning_rate": 3.067810160622138e-05, "loss": 0.2667, "step": 8450 }, { "epoch": 0.30743513336725053, "grad_norm": 1.367903709411621, "learning_rate": 3.07144414565012e-05, "loss": 0.1819, "step": 8460 }, { "epoch": 0.3077985318700487, "grad_norm": 1.531816840171814, "learning_rate": 3.075078130678102e-05, "loss": 0.1681, "step": 8470 }, { "epoch": 0.30816193037284684, "grad_norm": 3.668304204940796, "learning_rate": 3.078712115706083e-05, "loss": 0.2488, "step": 8480 }, { "epoch": 0.30852532887564504, "grad_norm": 2.2622220516204834, "learning_rate": 3.082346100734065e-05, "loss": 0.1866, "step": 8490 }, { "epoch": 0.3088887273784432, "grad_norm": 6.450117111206055, "learning_rate": 3.085980085762047e-05, "loss": 0.2676, "step": 8500 }, { "epoch": 0.30925212588124135, "grad_norm": 2.096731424331665, "learning_rate": 3.0896140707900284e-05, "loss": 0.1952, "step": 8510 }, { "epoch": 0.30961552438403955, "grad_norm": 1.3809120655059814, "learning_rate": 3.09324805581801e-05, "loss": 0.3478, "step": 8520 }, { "epoch": 0.3099789228868377, "grad_norm": 4.2257585525512695, "learning_rate": 3.096882040845992e-05, "loss": 0.2126, "step": 8530 }, { "epoch": 0.31034232138963586, "grad_norm": 2.8543758392333984, "learning_rate": 3.1005160258739736e-05, "loss": 0.8169, "step": 8540 }, { "epoch": 0.31070571989243406, "grad_norm": 5.897162437438965, "learning_rate": 3.104150010901955e-05, "loss": 0.2421, "step": 8550 }, { "epoch": 0.3110691183952322, "grad_norm": 1.8980865478515625, "learning_rate": 3.107783995929937e-05, "loss": 0.193, "step": 8560 }, { "epoch": 0.31143251689803036, "grad_norm": 2.113833427429199, "learning_rate": 3.111417980957919e-05, "loss": 0.1553, "step": 8570 }, { "epoch": 0.31179591540082857, "grad_norm": 2.7569572925567627, "learning_rate": 3.1150519659859e-05, "loss": 0.2003, "step": 8580 }, { "epoch": 0.3121593139036267, "grad_norm": 2.480473756790161, "learning_rate": 3.118685951013882e-05, "loss": 0.2173, "step": 8590 }, { "epoch": 0.3125227124064249, "grad_norm": 12.174234390258789, "learning_rate": 3.122319936041864e-05, "loss": 0.3081, "step": 8600 }, { "epoch": 0.3128861109092231, "grad_norm": 2.8075544834136963, "learning_rate": 3.125953921069845e-05, "loss": 0.263, "step": 8610 }, { "epoch": 0.31324950941202123, "grad_norm": 16.535009384155273, "learning_rate": 3.129587906097827e-05, "loss": 0.1968, "step": 8620 }, { "epoch": 0.3136129079148194, "grad_norm": 6.4783711433410645, "learning_rate": 3.133221891125809e-05, "loss": 0.2396, "step": 8630 }, { "epoch": 0.31397630641761753, "grad_norm": 0.945353090763092, "learning_rate": 3.13685587615379e-05, "loss": 0.1623, "step": 8640 }, { "epoch": 0.31433970492041574, "grad_norm": 7.135663032531738, "learning_rate": 3.140489861181772e-05, "loss": 0.3006, "step": 8650 }, { "epoch": 0.3147031034232139, "grad_norm": 1.275896430015564, "learning_rate": 3.144123846209754e-05, "loss": 0.1845, "step": 8660 }, { "epoch": 0.31506650192601204, "grad_norm": 2.1660525798797607, "learning_rate": 3.147757831237735e-05, "loss": 0.1614, "step": 8670 }, { "epoch": 0.31542990042881025, "grad_norm": 3.878882646560669, "learning_rate": 3.1513918162657166e-05, "loss": 0.2124, "step": 8680 }, { "epoch": 0.3157932989316084, "grad_norm": 3.452864170074463, "learning_rate": 3.155025801293699e-05, "loss": 0.1659, "step": 8690 }, { "epoch": 0.31615669743440655, "grad_norm": 4.0493292808532715, "learning_rate": 3.1586597863216805e-05, "loss": 0.2653, "step": 8700 }, { "epoch": 0.31652009593720476, "grad_norm": 1.9184757471084595, "learning_rate": 3.162293771349662e-05, "loss": 0.2043, "step": 8710 }, { "epoch": 0.3168834944400029, "grad_norm": 4.22302770614624, "learning_rate": 3.165927756377644e-05, "loss": 0.2005, "step": 8720 }, { "epoch": 0.31724689294280106, "grad_norm": 8.557464599609375, "learning_rate": 3.1695617414056256e-05, "loss": 0.2135, "step": 8730 }, { "epoch": 0.31761029144559927, "grad_norm": 1.6090949773788452, "learning_rate": 3.173195726433607e-05, "loss": 0.1565, "step": 8740 }, { "epoch": 0.3179736899483974, "grad_norm": 35.859737396240234, "learning_rate": 3.1768297114615894e-05, "loss": 0.3239, "step": 8750 }, { "epoch": 0.31833708845119557, "grad_norm": 2.837944507598877, "learning_rate": 3.180463696489571e-05, "loss": 0.1902, "step": 8760 }, { "epoch": 0.3187004869539937, "grad_norm": 1.6548888683319092, "learning_rate": 3.184097681517552e-05, "loss": 0.1732, "step": 8770 }, { "epoch": 0.31906388545679193, "grad_norm": 3.840034246444702, "learning_rate": 3.187731666545534e-05, "loss": 0.2318, "step": 8780 }, { "epoch": 0.3194272839595901, "grad_norm": 3.3684277534484863, "learning_rate": 3.191365651573516e-05, "loss": 0.1794, "step": 8790 }, { "epoch": 0.31979068246238823, "grad_norm": 8.668655395507812, "learning_rate": 3.194999636601497e-05, "loss": 0.2745, "step": 8800 }, { "epoch": 0.32015408096518644, "grad_norm": 1.412441611289978, "learning_rate": 3.198633621629479e-05, "loss": 0.1913, "step": 8810 }, { "epoch": 0.3205174794679846, "grad_norm": 1.6273925304412842, "learning_rate": 3.202267606657461e-05, "loss": 0.1905, "step": 8820 }, { "epoch": 0.32088087797078274, "grad_norm": 5.704558372497559, "learning_rate": 3.205901591685442e-05, "loss": 0.2217, "step": 8830 }, { "epoch": 0.32124427647358095, "grad_norm": 2.248072385787964, "learning_rate": 3.209535576713424e-05, "loss": 0.1752, "step": 8840 }, { "epoch": 0.3216076749763791, "grad_norm": 8.330979347229004, "learning_rate": 3.213169561741406e-05, "loss": 0.2693, "step": 8850 }, { "epoch": 0.32197107347917725, "grad_norm": 6.713444709777832, "learning_rate": 3.2168035467693873e-05, "loss": 0.1821, "step": 8860 }, { "epoch": 0.32233447198197546, "grad_norm": 1.7717983722686768, "learning_rate": 3.220437531797369e-05, "loss": 0.1572, "step": 8870 }, { "epoch": 0.3226978704847736, "grad_norm": 3.8419570922851562, "learning_rate": 3.224071516825351e-05, "loss": 0.2168, "step": 8880 }, { "epoch": 0.32306126898757176, "grad_norm": 1.8515948057174683, "learning_rate": 3.2277055018533325e-05, "loss": 0.1474, "step": 8890 }, { "epoch": 0.32342466749036997, "grad_norm": 12.963587760925293, "learning_rate": 3.231339486881314e-05, "loss": 0.2349, "step": 8900 }, { "epoch": 0.3237880659931681, "grad_norm": 1.078845500946045, "learning_rate": 3.2349734719092963e-05, "loss": 0.1968, "step": 8910 }, { "epoch": 0.32415146449596627, "grad_norm": 1.5369044542312622, "learning_rate": 3.2386074569372776e-05, "loss": 0.1681, "step": 8920 }, { "epoch": 0.3245148629987644, "grad_norm": 3.8013484477996826, "learning_rate": 3.242241441965259e-05, "loss": 0.2214, "step": 8930 }, { "epoch": 0.3248782615015626, "grad_norm": 2.0259406566619873, "learning_rate": 3.2458754269932415e-05, "loss": 0.4227, "step": 8940 }, { "epoch": 0.3252416600043608, "grad_norm": 6.423609256744385, "learning_rate": 3.249509412021223e-05, "loss": 0.2835, "step": 8950 }, { "epoch": 0.32560505850715893, "grad_norm": 2.363159656524658, "learning_rate": 3.253143397049204e-05, "loss": 0.2038, "step": 8960 }, { "epoch": 0.32596845700995714, "grad_norm": 2.4034435749053955, "learning_rate": 3.256777382077186e-05, "loss": 0.1907, "step": 8970 }, { "epoch": 0.3263318555127553, "grad_norm": 4.032980442047119, "learning_rate": 3.260411367105168e-05, "loss": 0.1973, "step": 8980 }, { "epoch": 0.32669525401555344, "grad_norm": 6.102022647857666, "learning_rate": 3.264045352133149e-05, "loss": 0.197, "step": 8990 }, { "epoch": 0.32705865251835164, "grad_norm": 35.67893981933594, "learning_rate": 3.267679337161131e-05, "loss": 0.2682, "step": 9000 }, { "epoch": 0.32705865251835164, "eval_loss": 0.40712428092956543, "eval_runtime": 179.2194, "eval_samples_per_second": 41.368, "eval_steps_per_second": 5.172, "eval_wer": 0.226941020567466, "step": 9000 }, { "epoch": 0.3274220510211498, "grad_norm": 1.8014717102050781, "learning_rate": 3.271313322189113e-05, "loss": 0.1591, "step": 9010 }, { "epoch": 0.32778544952394795, "grad_norm": 1.7404965162277222, "learning_rate": 3.274947307217094e-05, "loss": 0.17, "step": 9020 }, { "epoch": 0.32814884802674615, "grad_norm": 3.7020771503448486, "learning_rate": 3.278581292245076e-05, "loss": 0.2225, "step": 9030 }, { "epoch": 0.3285122465295443, "grad_norm": 1.045998454093933, "learning_rate": 3.282215277273058e-05, "loss": 0.1681, "step": 9040 }, { "epoch": 0.32887564503234246, "grad_norm": 5.282716751098633, "learning_rate": 3.2858492623010394e-05, "loss": 0.2856, "step": 9050 }, { "epoch": 0.3292390435351406, "grad_norm": 3.3956387042999268, "learning_rate": 3.289483247329021e-05, "loss": 0.1782, "step": 9060 }, { "epoch": 0.3296024420379388, "grad_norm": 1.855603575706482, "learning_rate": 3.293117232357003e-05, "loss": 0.1582, "step": 9070 }, { "epoch": 0.32996584054073697, "grad_norm": 7.214013576507568, "learning_rate": 3.2967512173849845e-05, "loss": 0.1691, "step": 9080 }, { "epoch": 0.3303292390435351, "grad_norm": 3.140125036239624, "learning_rate": 3.3003852024129664e-05, "loss": 0.1872, "step": 9090 }, { "epoch": 0.3306926375463333, "grad_norm": 17.094255447387695, "learning_rate": 3.304019187440948e-05, "loss": 0.2848, "step": 9100 }, { "epoch": 0.3310560360491315, "grad_norm": 1.9439010620117188, "learning_rate": 3.3076531724689296e-05, "loss": 0.1625, "step": 9110 }, { "epoch": 0.3314194345519296, "grad_norm": 1.609747290611267, "learning_rate": 3.311287157496911e-05, "loss": 0.1915, "step": 9120 }, { "epoch": 0.33178283305472783, "grad_norm": 4.03629207611084, "learning_rate": 3.314921142524893e-05, "loss": 0.2291, "step": 9130 }, { "epoch": 0.332146231557526, "grad_norm": 1.9643129110336304, "learning_rate": 3.318555127552875e-05, "loss": 0.1747, "step": 9140 }, { "epoch": 0.33250963006032414, "grad_norm": 9.304847717285156, "learning_rate": 3.322189112580856e-05, "loss": 0.2539, "step": 9150 }, { "epoch": 0.33287302856312234, "grad_norm": 1.991467833518982, "learning_rate": 3.325823097608838e-05, "loss": 3.61, "step": 9160 }, { "epoch": 0.3332364270659205, "grad_norm": 2.7127187252044678, "learning_rate": 3.32945708263682e-05, "loss": 0.1985, "step": 9170 }, { "epoch": 0.33359982556871864, "grad_norm": 2.831299304962158, "learning_rate": 3.333091067664801e-05, "loss": 1.7334, "step": 9180 }, { "epoch": 0.33396322407151685, "grad_norm": 1.5434614419937134, "learning_rate": 3.336725052692783e-05, "loss": 0.1718, "step": 9190 }, { "epoch": 0.334326622574315, "grad_norm": 10.254124641418457, "learning_rate": 3.340359037720765e-05, "loss": 0.3246, "step": 9200 }, { "epoch": 0.33469002107711315, "grad_norm": 1.169886589050293, "learning_rate": 3.343993022748746e-05, "loss": 0.1936, "step": 9210 }, { "epoch": 0.3350534195799113, "grad_norm": 3.697627544403076, "learning_rate": 3.347627007776728e-05, "loss": 0.205, "step": 9220 }, { "epoch": 0.3354168180827095, "grad_norm": 3.15781307220459, "learning_rate": 3.35126099280471e-05, "loss": 0.2222, "step": 9230 }, { "epoch": 0.33578021658550766, "grad_norm": 1.903701663017273, "learning_rate": 3.3548949778326914e-05, "loss": 0.1611, "step": 9240 }, { "epoch": 0.3361436150883058, "grad_norm": 26.77275848388672, "learning_rate": 3.358528962860673e-05, "loss": 0.2872, "step": 9250 }, { "epoch": 0.336507013591104, "grad_norm": 1.588224172592163, "learning_rate": 3.3621629478886546e-05, "loss": 3.404, "step": 9260 }, { "epoch": 0.33687041209390217, "grad_norm": 1.8802090883255005, "learning_rate": 3.3657969329166365e-05, "loss": 0.1715, "step": 9270 }, { "epoch": 0.3372338105967003, "grad_norm": 5.38352632522583, "learning_rate": 3.3694309179446185e-05, "loss": 0.1906, "step": 9280 }, { "epoch": 0.33759720909949853, "grad_norm": 1.736177921295166, "learning_rate": 3.3730649029726e-05, "loss": 0.1881, "step": 9290 }, { "epoch": 0.3379606076022967, "grad_norm": 17.865558624267578, "learning_rate": 3.3766988880005816e-05, "loss": 0.3003, "step": 9300 }, { "epoch": 0.33832400610509483, "grad_norm": 1.532173991203308, "learning_rate": 3.3803328730285636e-05, "loss": 0.188, "step": 9310 }, { "epoch": 0.33868740460789304, "grad_norm": 3.8595352172851562, "learning_rate": 3.383966858056545e-05, "loss": 0.1869, "step": 9320 }, { "epoch": 0.3390508031106912, "grad_norm": 2.5906641483306885, "learning_rate": 3.387600843084527e-05, "loss": 0.1993, "step": 9330 }, { "epoch": 0.33941420161348934, "grad_norm": 2.5224273204803467, "learning_rate": 3.391234828112508e-05, "loss": 0.1935, "step": 9340 }, { "epoch": 0.33977760011628755, "grad_norm": 11.555095672607422, "learning_rate": 3.39486881314049e-05, "loss": 0.2891, "step": 9350 }, { "epoch": 0.3401409986190857, "grad_norm": 1.3724703788757324, "learning_rate": 3.398502798168472e-05, "loss": 0.1656, "step": 9360 }, { "epoch": 0.34050439712188385, "grad_norm": 2.1549072265625, "learning_rate": 3.402136783196453e-05, "loss": 0.1769, "step": 9370 }, { "epoch": 0.340867795624682, "grad_norm": 1.793492317199707, "learning_rate": 3.405770768224435e-05, "loss": 0.2661, "step": 9380 }, { "epoch": 0.3412311941274802, "grad_norm": 4.038620948791504, "learning_rate": 3.409404753252417e-05, "loss": 0.1871, "step": 9390 }, { "epoch": 0.34159459263027836, "grad_norm": 31.7847900390625, "learning_rate": 3.413038738280398e-05, "loss": 0.2967, "step": 9400 }, { "epoch": 0.3419579911330765, "grad_norm": 2.398646354675293, "learning_rate": 3.41667272330838e-05, "loss": 0.2086, "step": 9410 }, { "epoch": 0.3423213896358747, "grad_norm": 2.2226221561431885, "learning_rate": 3.4203067083363615e-05, "loss": 0.1665, "step": 9420 }, { "epoch": 0.34268478813867287, "grad_norm": 39.96380615234375, "learning_rate": 3.4239406933643434e-05, "loss": 0.9468, "step": 9430 }, { "epoch": 0.343048186641471, "grad_norm": 1.5465339422225952, "learning_rate": 3.4275746783923254e-05, "loss": 0.1827, "step": 9440 }, { "epoch": 0.3434115851442692, "grad_norm": 7.941345691680908, "learning_rate": 3.4312086634203066e-05, "loss": 0.2786, "step": 9450 }, { "epoch": 0.3437749836470674, "grad_norm": 1.2575476169586182, "learning_rate": 3.4348426484482885e-05, "loss": 0.1764, "step": 9460 }, { "epoch": 0.34413838214986553, "grad_norm": 1.3529596328735352, "learning_rate": 3.4384766334762705e-05, "loss": 0.207, "step": 9470 }, { "epoch": 0.34450178065266374, "grad_norm": 3.2839174270629883, "learning_rate": 3.442110618504252e-05, "loss": 0.2672, "step": 9480 }, { "epoch": 0.3448651791554619, "grad_norm": 3.246384859085083, "learning_rate": 3.445744603532234e-05, "loss": 0.1906, "step": 9490 }, { "epoch": 0.34522857765826004, "grad_norm": 2.595038652420044, "learning_rate": 3.4493785885602156e-05, "loss": 0.2441, "step": 9500 }, { "epoch": 0.3455919761610582, "grad_norm": 1.3803220987319946, "learning_rate": 3.453012573588197e-05, "loss": 0.1745, "step": 9510 }, { "epoch": 0.3459553746638564, "grad_norm": 1.2091724872589111, "learning_rate": 3.456646558616179e-05, "loss": 0.1441, "step": 9520 }, { "epoch": 0.34631877316665455, "grad_norm": 6.582603931427002, "learning_rate": 3.460280543644161e-05, "loss": 0.1835, "step": 9530 }, { "epoch": 0.3466821716694527, "grad_norm": 2.6845383644104004, "learning_rate": 3.463914528672142e-05, "loss": 0.2048, "step": 9540 }, { "epoch": 0.3470455701722509, "grad_norm": 11.775678634643555, "learning_rate": 3.467548513700123e-05, "loss": 0.2841, "step": 9550 }, { "epoch": 0.34740896867504906, "grad_norm": 2.256279706954956, "learning_rate": 3.471182498728106e-05, "loss": 0.6472, "step": 9560 }, { "epoch": 0.3477723671778472, "grad_norm": 1.4487576484680176, "learning_rate": 3.474816483756087e-05, "loss": 0.2722, "step": 9570 }, { "epoch": 0.3481357656806454, "grad_norm": 3.843964099884033, "learning_rate": 3.4784504687840684e-05, "loss": 0.1855, "step": 9580 }, { "epoch": 0.34849916418344357, "grad_norm": 1.5561772584915161, "learning_rate": 3.48208445381205e-05, "loss": 0.1908, "step": 9590 }, { "epoch": 0.3488625626862417, "grad_norm": 3.757232666015625, "learning_rate": 3.485718438840032e-05, "loss": 0.2198, "step": 9600 }, { "epoch": 0.3488625626862417, "eval_loss": 0.3895765244960785, "eval_runtime": 179.7435, "eval_samples_per_second": 41.248, "eval_steps_per_second": 5.157, "eval_wer": 0.21512335033674007, "step": 9600 }, { "epoch": 0.3492259611890399, "grad_norm": 1.3912307024002075, "learning_rate": 3.4893524238680135e-05, "loss": 0.1616, "step": 9610 }, { "epoch": 0.3495893596918381, "grad_norm": 2.4036080837249756, "learning_rate": 3.4929864088959954e-05, "loss": 0.1579, "step": 9620 }, { "epoch": 0.3499527581946362, "grad_norm": 2.611175537109375, "learning_rate": 3.4966203939239774e-05, "loss": 0.1746, "step": 9630 }, { "epoch": 0.35031615669743443, "grad_norm": 1.4045140743255615, "learning_rate": 3.5002543789519586e-05, "loss": 0.1594, "step": 9640 }, { "epoch": 0.3506795552002326, "grad_norm": 12.708057403564453, "learning_rate": 3.5038883639799406e-05, "loss": 0.3118, "step": 9650 }, { "epoch": 0.35104295370303074, "grad_norm": 3.0364696979522705, "learning_rate": 3.5075223490079225e-05, "loss": 0.3062, "step": 9660 }, { "epoch": 0.3514063522058289, "grad_norm": 1.4527848958969116, "learning_rate": 3.511156334035904e-05, "loss": 0.1603, "step": 9670 }, { "epoch": 0.3517697507086271, "grad_norm": 5.697939395904541, "learning_rate": 3.514790319063886e-05, "loss": 0.2069, "step": 9680 }, { "epoch": 0.35213314921142524, "grad_norm": 2.1645712852478027, "learning_rate": 3.5184243040918676e-05, "loss": 0.162, "step": 9690 }, { "epoch": 0.3524965477142234, "grad_norm": 8.024601936340332, "learning_rate": 3.522058289119849e-05, "loss": 0.898, "step": 9700 }, { "epoch": 0.3528599462170216, "grad_norm": 1.4516103267669678, "learning_rate": 3.52569227414783e-05, "loss": 0.189, "step": 9710 }, { "epoch": 0.35322334471981975, "grad_norm": 1.0467925071716309, "learning_rate": 3.529326259175813e-05, "loss": 0.1547, "step": 9720 }, { "epoch": 0.3535867432226179, "grad_norm": 3.9237303733825684, "learning_rate": 3.532960244203794e-05, "loss": 0.1968, "step": 9730 }, { "epoch": 0.3539501417254161, "grad_norm": 2.502257823944092, "learning_rate": 3.536594229231775e-05, "loss": 0.1645, "step": 9740 }, { "epoch": 0.35431354022821426, "grad_norm": 30.662227630615234, "learning_rate": 3.540228214259758e-05, "loss": 0.2847, "step": 9750 }, { "epoch": 0.3546769387310124, "grad_norm": 1.7106624841690063, "learning_rate": 3.543862199287739e-05, "loss": 0.1951, "step": 9760 }, { "epoch": 0.3550403372338106, "grad_norm": 2.169036865234375, "learning_rate": 3.5474961843157204e-05, "loss": 0.172, "step": 9770 }, { "epoch": 0.3554037357366088, "grad_norm": 6.116454124450684, "learning_rate": 3.551130169343703e-05, "loss": 0.1934, "step": 9780 }, { "epoch": 0.3557671342394069, "grad_norm": 1.8530545234680176, "learning_rate": 3.554764154371684e-05, "loss": 0.217, "step": 9790 }, { "epoch": 0.3561305327422051, "grad_norm": 11.060449600219727, "learning_rate": 3.5583981393996655e-05, "loss": 0.2145, "step": 9800 }, { "epoch": 0.3564939312450033, "grad_norm": 7.748067378997803, "learning_rate": 3.5620321244276475e-05, "loss": 0.2114, "step": 9810 }, { "epoch": 0.35685732974780143, "grad_norm": 3.562528610229492, "learning_rate": 3.5656661094556294e-05, "loss": 0.221, "step": 9820 }, { "epoch": 0.3572207282505996, "grad_norm": 2.798417091369629, "learning_rate": 3.5693000944836107e-05, "loss": 0.2071, "step": 9830 }, { "epoch": 0.3575841267533978, "grad_norm": 2.3908724784851074, "learning_rate": 3.5729340795115926e-05, "loss": 0.1678, "step": 9840 }, { "epoch": 0.35794752525619594, "grad_norm": 7.205004692077637, "learning_rate": 3.5765680645395745e-05, "loss": 0.2953, "step": 9850 }, { "epoch": 0.3583109237589941, "grad_norm": 2.5064749717712402, "learning_rate": 3.580202049567556e-05, "loss": 0.197, "step": 9860 }, { "epoch": 0.3586743222617923, "grad_norm": 2.0985934734344482, "learning_rate": 3.583836034595538e-05, "loss": 0.1441, "step": 9870 }, { "epoch": 0.35903772076459045, "grad_norm": 5.256442070007324, "learning_rate": 3.5874700196235197e-05, "loss": 0.203, "step": 9880 }, { "epoch": 0.3594011192673886, "grad_norm": 2.3590219020843506, "learning_rate": 3.591104004651501e-05, "loss": 0.1811, "step": 9890 }, { "epoch": 0.3597645177701868, "grad_norm": 24.96747398376465, "learning_rate": 3.594737989679482e-05, "loss": 0.293, "step": 9900 }, { "epoch": 0.36012791627298496, "grad_norm": 1.727751612663269, "learning_rate": 3.598371974707465e-05, "loss": 0.1896, "step": 9910 }, { "epoch": 0.3604913147757831, "grad_norm": 2.349269151687622, "learning_rate": 3.602005959735446e-05, "loss": 0.1649, "step": 9920 }, { "epoch": 0.3608547132785813, "grad_norm": 3.139385223388672, "learning_rate": 3.605639944763427e-05, "loss": 0.2181, "step": 9930 }, { "epoch": 0.36121811178137947, "grad_norm": 2.1249756813049316, "learning_rate": 3.60927392979141e-05, "loss": 0.1751, "step": 9940 }, { "epoch": 0.3615815102841776, "grad_norm": 3.6616756916046143, "learning_rate": 3.612907914819391e-05, "loss": 0.2729, "step": 9950 }, { "epoch": 0.36194490878697577, "grad_norm": 1.367600440979004, "learning_rate": 3.6165418998473724e-05, "loss": 0.1592, "step": 9960 }, { "epoch": 0.362308307289774, "grad_norm": 1.8141239881515503, "learning_rate": 3.620175884875355e-05, "loss": 0.2867, "step": 9970 }, { "epoch": 0.36267170579257213, "grad_norm": 7.0058794021606445, "learning_rate": 3.623809869903336e-05, "loss": 0.207, "step": 9980 }, { "epoch": 0.3630351042953703, "grad_norm": 1.923048734664917, "learning_rate": 3.6274438549313176e-05, "loss": 0.346, "step": 9990 }, { "epoch": 0.3633985027981685, "grad_norm": 16.30779457092285, "learning_rate": 3.6310778399592995e-05, "loss": 0.3107, "step": 10000 }, { "epoch": 0.36376190130096664, "grad_norm": 1.979866862297058, "learning_rate": 3.6347118249872814e-05, "loss": 0.7999, "step": 10010 }, { "epoch": 0.3641252998037648, "grad_norm": 2.7377023696899414, "learning_rate": 3.638345810015263e-05, "loss": 0.2005, "step": 10020 }, { "epoch": 0.364488698306563, "grad_norm": 5.546159744262695, "learning_rate": 3.6419797950432446e-05, "loss": 0.1964, "step": 10030 }, { "epoch": 0.36485209680936115, "grad_norm": 2.2417142391204834, "learning_rate": 3.6456137800712265e-05, "loss": 0.2078, "step": 10040 }, { "epoch": 0.3652154953121593, "grad_norm": 7.2175092697143555, "learning_rate": 3.649247765099208e-05, "loss": 0.291, "step": 10050 }, { "epoch": 0.3655788938149575, "grad_norm": 2.6172754764556885, "learning_rate": 3.65288175012719e-05, "loss": 0.2037, "step": 10060 }, { "epoch": 0.36594229231775566, "grad_norm": 2.0634214878082275, "learning_rate": 3.656515735155172e-05, "loss": 0.1668, "step": 10070 }, { "epoch": 0.3663056908205538, "grad_norm": 3.5431976318359375, "learning_rate": 3.660149720183153e-05, "loss": 0.475, "step": 10080 }, { "epoch": 0.366669089323352, "grad_norm": 2.147472381591797, "learning_rate": 3.663783705211135e-05, "loss": 0.1869, "step": 10090 }, { "epoch": 0.36703248782615017, "grad_norm": 18.726482391357422, "learning_rate": 3.667417690239117e-05, "loss": 0.2773, "step": 10100 }, { "epoch": 0.3673958863289483, "grad_norm": 1.6554090976715088, "learning_rate": 3.671051675267098e-05, "loss": 0.1707, "step": 10110 }, { "epoch": 0.36775928483174647, "grad_norm": 1.8967760801315308, "learning_rate": 3.674685660295079e-05, "loss": 0.2159, "step": 10120 }, { "epoch": 0.3681226833345447, "grad_norm": 2.3765788078308105, "learning_rate": 3.678319645323061e-05, "loss": 0.2229, "step": 10130 }, { "epoch": 0.3684860818373428, "grad_norm": 5.890452861785889, "learning_rate": 3.681953630351043e-05, "loss": 0.195, "step": 10140 }, { "epoch": 0.368849480340141, "grad_norm": 5.045167446136475, "learning_rate": 3.6855876153790244e-05, "loss": 0.3111, "step": 10150 }, { "epoch": 0.3692128788429392, "grad_norm": 2.37107253074646, "learning_rate": 3.6892216004070064e-05, "loss": 0.1942, "step": 10160 }, { "epoch": 0.36957627734573734, "grad_norm": 1.9943170547485352, "learning_rate": 3.692855585434988e-05, "loss": 0.1906, "step": 10170 }, { "epoch": 0.3699396758485355, "grad_norm": 3.16873836517334, "learning_rate": 3.6964895704629696e-05, "loss": 0.1791, "step": 10180 }, { "epoch": 0.3703030743513337, "grad_norm": 15.252134323120117, "learning_rate": 3.7001235554909515e-05, "loss": 0.3702, "step": 10190 }, { "epoch": 0.37066647285413185, "grad_norm": 8.845834732055664, "learning_rate": 3.7037575405189334e-05, "loss": 0.2765, "step": 10200 }, { "epoch": 0.37066647285413185, "eval_loss": 0.4178149104118347, "eval_runtime": 179.6523, "eval_samples_per_second": 41.269, "eval_steps_per_second": 5.16, "eval_wer": 0.2237551509430537, "step": 10200 }, { "epoch": 0.37102987135693, "grad_norm": 6.2689313888549805, "learning_rate": 3.707391525546915e-05, "loss": 0.1922, "step": 10210 }, { "epoch": 0.3713932698597282, "grad_norm": 1.00067138671875, "learning_rate": 3.7110255105748966e-05, "loss": 0.1535, "step": 10220 }, { "epoch": 0.37175666836252635, "grad_norm": 2.6602060794830322, "learning_rate": 3.7146594956028786e-05, "loss": 0.1959, "step": 10230 }, { "epoch": 0.3721200668653245, "grad_norm": 4.743015766143799, "learning_rate": 3.71829348063086e-05, "loss": 0.2058, "step": 10240 }, { "epoch": 0.37248346536812266, "grad_norm": 8.304347038269043, "learning_rate": 3.721927465658842e-05, "loss": 0.3027, "step": 10250 }, { "epoch": 0.37284686387092086, "grad_norm": 1.8180521726608276, "learning_rate": 3.725561450686824e-05, "loss": 0.1708, "step": 10260 }, { "epoch": 0.373210262373719, "grad_norm": 2.05625057220459, "learning_rate": 3.729195435714805e-05, "loss": 0.1824, "step": 10270 }, { "epoch": 0.37357366087651717, "grad_norm": 2.426814317703247, "learning_rate": 3.732829420742787e-05, "loss": 0.197, "step": 10280 }, { "epoch": 0.3739370593793154, "grad_norm": 1.658158540725708, "learning_rate": 3.736463405770768e-05, "loss": 0.1578, "step": 10290 }, { "epoch": 0.3743004578821135, "grad_norm": 10.913407325744629, "learning_rate": 3.74009739079875e-05, "loss": 0.2728, "step": 10300 }, { "epoch": 0.3746638563849117, "grad_norm": 1.6443781852722168, "learning_rate": 3.743731375826732e-05, "loss": 0.1656, "step": 10310 }, { "epoch": 0.3750272548877099, "grad_norm": 1.0702744722366333, "learning_rate": 3.747365360854713e-05, "loss": 0.7132, "step": 10320 }, { "epoch": 0.37539065339050803, "grad_norm": 5.8824052810668945, "learning_rate": 3.750999345882695e-05, "loss": 0.2701, "step": 10330 }, { "epoch": 0.3757540518933062, "grad_norm": 4.373916149139404, "learning_rate": 3.754633330910677e-05, "loss": 0.2053, "step": 10340 }, { "epoch": 0.3761174503961044, "grad_norm": 22.25397300720215, "learning_rate": 3.7582673159386584e-05, "loss": 0.2781, "step": 10350 }, { "epoch": 0.37648084889890254, "grad_norm": 1.8272254467010498, "learning_rate": 3.7619013009666403e-05, "loss": 0.1833, "step": 10360 }, { "epoch": 0.3768442474017007, "grad_norm": 3.286931037902832, "learning_rate": 3.7655352859946216e-05, "loss": 0.1576, "step": 10370 }, { "epoch": 0.3772076459044989, "grad_norm": 5.283690929412842, "learning_rate": 3.7691692710226035e-05, "loss": 0.21, "step": 10380 }, { "epoch": 0.37757104440729705, "grad_norm": 1.184476375579834, "learning_rate": 3.7728032560505855e-05, "loss": 0.2597, "step": 10390 }, { "epoch": 0.3779344429100952, "grad_norm": 5.685116767883301, "learning_rate": 3.776437241078567e-05, "loss": 0.2476, "step": 10400 }, { "epoch": 0.37829784141289335, "grad_norm": 1.1873399019241333, "learning_rate": 3.7800712261065487e-05, "loss": 0.1597, "step": 10410 }, { "epoch": 0.37866123991569156, "grad_norm": 1.6136255264282227, "learning_rate": 3.7837052111345306e-05, "loss": 0.188, "step": 10420 }, { "epoch": 0.3790246384184897, "grad_norm": 4.743179798126221, "learning_rate": 3.787339196162512e-05, "loss": 0.1962, "step": 10430 }, { "epoch": 0.37938803692128786, "grad_norm": 2.603379011154175, "learning_rate": 3.790973181190494e-05, "loss": 0.1854, "step": 10440 }, { "epoch": 0.37975143542408607, "grad_norm": 6.267378807067871, "learning_rate": 3.794607166218475e-05, "loss": 0.2569, "step": 10450 }, { "epoch": 0.3801148339268842, "grad_norm": 5.370235919952393, "learning_rate": 3.798241151246457e-05, "loss": 0.1796, "step": 10460 }, { "epoch": 0.3804782324296824, "grad_norm": 2.170964002609253, "learning_rate": 3.801875136274439e-05, "loss": 0.1713, "step": 10470 }, { "epoch": 0.3808416309324806, "grad_norm": 4.134753704071045, "learning_rate": 3.80550912130242e-05, "loss": 0.2269, "step": 10480 }, { "epoch": 0.38120502943527873, "grad_norm": 2.7026259899139404, "learning_rate": 3.809143106330402e-05, "loss": 0.1938, "step": 10490 }, { "epoch": 0.3815684279380769, "grad_norm": 7.368224143981934, "learning_rate": 3.812777091358384e-05, "loss": 0.2617, "step": 10500 }, { "epoch": 0.3819318264408751, "grad_norm": 1.3194938898086548, "learning_rate": 3.816411076386365e-05, "loss": 0.2066, "step": 10510 }, { "epoch": 0.38229522494367324, "grad_norm": 1.901505470275879, "learning_rate": 3.820045061414347e-05, "loss": 0.1716, "step": 10520 }, { "epoch": 0.3826586234464714, "grad_norm": 3.4045536518096924, "learning_rate": 3.823679046442329e-05, "loss": 0.1625, "step": 10530 }, { "epoch": 0.38302202194926954, "grad_norm": 2.1540184020996094, "learning_rate": 3.8273130314703104e-05, "loss": 0.1829, "step": 10540 }, { "epoch": 0.38338542045206775, "grad_norm": 14.377511024475098, "learning_rate": 3.8309470164982924e-05, "loss": 0.2747, "step": 10550 }, { "epoch": 0.3837488189548659, "grad_norm": 1.9092762470245361, "learning_rate": 3.834581001526274e-05, "loss": 0.1728, "step": 10560 }, { "epoch": 0.38411221745766405, "grad_norm": 1.867458462715149, "learning_rate": 3.8382149865542556e-05, "loss": 0.1752, "step": 10570 }, { "epoch": 0.38447561596046226, "grad_norm": 5.246692657470703, "learning_rate": 3.841848971582237e-05, "loss": 0.1823, "step": 10580 }, { "epoch": 0.3848390144632604, "grad_norm": 2.9294533729553223, "learning_rate": 3.845482956610219e-05, "loss": 0.2052, "step": 10590 }, { "epoch": 0.38520241296605856, "grad_norm": 11.946113586425781, "learning_rate": 3.849116941638201e-05, "loss": 0.309, "step": 10600 }, { "epoch": 0.38556581146885677, "grad_norm": 1.7155182361602783, "learning_rate": 3.852750926666182e-05, "loss": 0.175, "step": 10610 }, { "epoch": 0.3859292099716549, "grad_norm": 1.1520076990127563, "learning_rate": 3.856384911694164e-05, "loss": 0.2129, "step": 10620 }, { "epoch": 0.38629260847445307, "grad_norm": 1.9750351905822754, "learning_rate": 3.860018896722146e-05, "loss": 0.1725, "step": 10630 }, { "epoch": 0.3866560069772513, "grad_norm": 4.309560298919678, "learning_rate": 3.863652881750127e-05, "loss": 0.1516, "step": 10640 }, { "epoch": 0.3870194054800494, "grad_norm": 7.554156303405762, "learning_rate": 3.867286866778109e-05, "loss": 0.3069, "step": 10650 }, { "epoch": 0.3873828039828476, "grad_norm": 3.7965683937072754, "learning_rate": 3.870920851806091e-05, "loss": 0.2014, "step": 10660 }, { "epoch": 0.3877462024856458, "grad_norm": 3.8691935539245605, "learning_rate": 3.874554836834072e-05, "loss": 0.1678, "step": 10670 }, { "epoch": 0.38810960098844394, "grad_norm": 4.144315719604492, "learning_rate": 3.878188821862054e-05, "loss": 0.2936, "step": 10680 }, { "epoch": 0.3884729994912421, "grad_norm": 1.5667825937271118, "learning_rate": 3.881822806890036e-05, "loss": 0.1871, "step": 10690 }, { "epoch": 0.38883639799404024, "grad_norm": 7.6076788902282715, "learning_rate": 3.885456791918017e-05, "loss": 0.2661, "step": 10700 }, { "epoch": 0.38919979649683845, "grad_norm": 1.7828059196472168, "learning_rate": 3.889090776945999e-05, "loss": 0.1808, "step": 10710 }, { "epoch": 0.3895631949996366, "grad_norm": 7.039370059967041, "learning_rate": 3.892724761973981e-05, "loss": 0.2484, "step": 10720 }, { "epoch": 0.38992659350243475, "grad_norm": 2.1001148223876953, "learning_rate": 3.8963587470019625e-05, "loss": 0.1644, "step": 10730 }, { "epoch": 0.39028999200523296, "grad_norm": 0.9235002398490906, "learning_rate": 3.899992732029944e-05, "loss": 0.172, "step": 10740 }, { "epoch": 0.3906533905080311, "grad_norm": 10.066643714904785, "learning_rate": 3.903626717057926e-05, "loss": 0.2999, "step": 10750 }, { "epoch": 0.39101678901082926, "grad_norm": 2.256965160369873, "learning_rate": 3.9072607020859076e-05, "loss": 0.2116, "step": 10760 }, { "epoch": 0.39138018751362746, "grad_norm": 1.742125153541565, "learning_rate": 3.910894687113889e-05, "loss": 0.1838, "step": 10770 }, { "epoch": 0.3917435860164256, "grad_norm": 5.397392749786377, "learning_rate": 3.9145286721418714e-05, "loss": 0.2213, "step": 10780 }, { "epoch": 0.39210698451922377, "grad_norm": 2.439197540283203, "learning_rate": 3.918162657169853e-05, "loss": 0.1984, "step": 10790 }, { "epoch": 0.392470383022022, "grad_norm": 6.7387895584106445, "learning_rate": 3.921796642197834e-05, "loss": 0.2842, "step": 10800 }, { "epoch": 0.392470383022022, "eval_loss": 0.39516785740852356, "eval_runtime": 180.1522, "eval_samples_per_second": 41.154, "eval_steps_per_second": 5.146, "eval_wer": 0.21758309583023216, "step": 10800 }, { "epoch": 0.3928337815248201, "grad_norm": 1.5229130983352661, "learning_rate": 3.925430627225816e-05, "loss": 0.1809, "step": 10810 }, { "epoch": 0.3931971800276183, "grad_norm": 1.6385318040847778, "learning_rate": 3.929064612253798e-05, "loss": 0.155, "step": 10820 }, { "epoch": 0.3935605785304164, "grad_norm": 2.403878927230835, "learning_rate": 3.932698597281779e-05, "loss": 0.2837, "step": 10830 }, { "epoch": 0.39392397703321463, "grad_norm": 2.818368434906006, "learning_rate": 3.936332582309761e-05, "loss": 0.2298, "step": 10840 }, { "epoch": 0.3942873755360128, "grad_norm": 6.08942174911499, "learning_rate": 3.939966567337743e-05, "loss": 0.2262, "step": 10850 }, { "epoch": 0.39465077403881094, "grad_norm": 1.2632570266723633, "learning_rate": 3.943600552365724e-05, "loss": 0.2087, "step": 10860 }, { "epoch": 0.39501417254160914, "grad_norm": 2.2119662761688232, "learning_rate": 3.947234537393706e-05, "loss": 0.1974, "step": 10870 }, { "epoch": 0.3953775710444073, "grad_norm": 2.936021089553833, "learning_rate": 3.950868522421688e-05, "loss": 0.1909, "step": 10880 }, { "epoch": 0.39574096954720545, "grad_norm": 1.3898749351501465, "learning_rate": 3.9545025074496693e-05, "loss": 0.184, "step": 10890 }, { "epoch": 0.39610436805000365, "grad_norm": 9.063791275024414, "learning_rate": 3.958136492477651e-05, "loss": 0.338, "step": 10900 }, { "epoch": 0.3964677665528018, "grad_norm": 1.3791584968566895, "learning_rate": 3.961770477505633e-05, "loss": 0.2256, "step": 10910 }, { "epoch": 0.39683116505559995, "grad_norm": 0.9377845525741577, "learning_rate": 3.9654044625336145e-05, "loss": 0.9822, "step": 10920 }, { "epoch": 0.39719456355839816, "grad_norm": 3.9755465984344482, "learning_rate": 3.969038447561596e-05, "loss": 0.2257, "step": 10930 }, { "epoch": 0.3975579620611963, "grad_norm": 1.559699535369873, "learning_rate": 3.9726724325895783e-05, "loss": 0.2116, "step": 10940 }, { "epoch": 0.39792136056399446, "grad_norm": 7.545668601989746, "learning_rate": 3.9763064176175596e-05, "loss": 0.2515, "step": 10950 }, { "epoch": 0.39828475906679267, "grad_norm": 1.980197548866272, "learning_rate": 3.979940402645541e-05, "loss": 0.1721, "step": 10960 }, { "epoch": 0.3986481575695908, "grad_norm": 2.5450973510742188, "learning_rate": 3.9835743876735235e-05, "loss": 1.7152, "step": 10970 }, { "epoch": 0.399011556072389, "grad_norm": 3.518233060836792, "learning_rate": 3.987208372701505e-05, "loss": 0.2521, "step": 10980 }, { "epoch": 0.3993749545751871, "grad_norm": 2.678774356842041, "learning_rate": 3.990842357729486e-05, "loss": 0.2025, "step": 10990 }, { "epoch": 0.39973835307798533, "grad_norm": 11.46552848815918, "learning_rate": 3.9944763427574686e-05, "loss": 0.2683, "step": 11000 }, { "epoch": 0.4001017515807835, "grad_norm": 2.3148844242095947, "learning_rate": 3.99811032778545e-05, "loss": 1.5331, "step": 11010 }, { "epoch": 0.40046515008358163, "grad_norm": 1.2145686149597168, "learning_rate": 4.001744312813431e-05, "loss": 0.1931, "step": 11020 }, { "epoch": 0.40082854858637984, "grad_norm": 3.581883192062378, "learning_rate": 4.005378297841413e-05, "loss": 0.18, "step": 11030 }, { "epoch": 0.401191947089178, "grad_norm": 2.4645683765411377, "learning_rate": 4.009012282869395e-05, "loss": 0.2303, "step": 11040 }, { "epoch": 0.40155534559197614, "grad_norm": 13.845566749572754, "learning_rate": 4.012646267897376e-05, "loss": 0.2515, "step": 11050 }, { "epoch": 0.40191874409477435, "grad_norm": 1.6929864883422852, "learning_rate": 4.016280252925358e-05, "loss": 2.9232, "step": 11060 }, { "epoch": 0.4022821425975725, "grad_norm": 1.5453213453292847, "learning_rate": 4.01991423795334e-05, "loss": 0.1703, "step": 11070 }, { "epoch": 0.40264554110037065, "grad_norm": 1.5723987817764282, "learning_rate": 4.0235482229813214e-05, "loss": 0.1694, "step": 11080 }, { "epoch": 0.40300893960316886, "grad_norm": 1.4501444101333618, "learning_rate": 4.027182208009303e-05, "loss": 0.2477, "step": 11090 }, { "epoch": 0.403372338105967, "grad_norm": 20.50950813293457, "learning_rate": 4.030816193037285e-05, "loss": 0.2641, "step": 11100 }, { "epoch": 0.40373573660876516, "grad_norm": 1.9846757650375366, "learning_rate": 4.0344501780652665e-05, "loss": 0.1807, "step": 11110 }, { "epoch": 0.40409913511156337, "grad_norm": 1.3933240175247192, "learning_rate": 4.0380841630932484e-05, "loss": 0.1683, "step": 11120 }, { "epoch": 0.4044625336143615, "grad_norm": 2.370534658432007, "learning_rate": 4.0417181481212304e-05, "loss": 0.2476, "step": 11130 }, { "epoch": 0.40482593211715967, "grad_norm": 2.6382100582122803, "learning_rate": 4.0453521331492116e-05, "loss": 0.1723, "step": 11140 }, { "epoch": 0.4051893306199578, "grad_norm": 27.381826400756836, "learning_rate": 4.048986118177193e-05, "loss": 0.2058, "step": 11150 }, { "epoch": 0.40555272912275603, "grad_norm": 1.3622616529464722, "learning_rate": 4.052620103205175e-05, "loss": 0.1744, "step": 11160 }, { "epoch": 0.4059161276255542, "grad_norm": 1.4734828472137451, "learning_rate": 4.056254088233157e-05, "loss": 0.1685, "step": 11170 }, { "epoch": 0.40627952612835233, "grad_norm": 5.694312572479248, "learning_rate": 4.059888073261138e-05, "loss": 0.3549, "step": 11180 }, { "epoch": 0.40664292463115054, "grad_norm": 1.9976438283920288, "learning_rate": 4.06352205828912e-05, "loss": 0.1525, "step": 11190 }, { "epoch": 0.4070063231339487, "grad_norm": 5.735686779022217, "learning_rate": 4.067156043317102e-05, "loss": 0.2642, "step": 11200 }, { "epoch": 0.40736972163674684, "grad_norm": 5.192315101623535, "learning_rate": 4.070790028345083e-05, "loss": 0.1636, "step": 11210 }, { "epoch": 0.40773312013954505, "grad_norm": 2.6324477195739746, "learning_rate": 4.074424013373065e-05, "loss": 0.3451, "step": 11220 }, { "epoch": 0.4080965186423432, "grad_norm": 2.496997356414795, "learning_rate": 4.078057998401047e-05, "loss": 0.1792, "step": 11230 }, { "epoch": 0.40845991714514135, "grad_norm": 3.928255558013916, "learning_rate": 4.081691983429028e-05, "loss": 0.2203, "step": 11240 }, { "epoch": 0.40882331564793956, "grad_norm": 14.433273315429688, "learning_rate": 4.08532596845701e-05, "loss": 0.3283, "step": 11250 }, { "epoch": 0.4091867141507377, "grad_norm": 1.9282217025756836, "learning_rate": 4.088959953484992e-05, "loss": 0.2191, "step": 11260 }, { "epoch": 0.40955011265353586, "grad_norm": 1.8360569477081299, "learning_rate": 4.0925939385129734e-05, "loss": 0.1623, "step": 11270 }, { "epoch": 0.409913511156334, "grad_norm": 4.518060207366943, "learning_rate": 4.096227923540955e-05, "loss": 0.2036, "step": 11280 }, { "epoch": 0.4102769096591322, "grad_norm": 1.4292632341384888, "learning_rate": 4.099861908568937e-05, "loss": 0.1515, "step": 11290 }, { "epoch": 0.41064030816193037, "grad_norm": 23.795089721679688, "learning_rate": 4.1034958935969185e-05, "loss": 0.3228, "step": 11300 }, { "epoch": 0.4110037066647285, "grad_norm": 1.7721456289291382, "learning_rate": 4.1071298786249005e-05, "loss": 0.15, "step": 11310 }, { "epoch": 0.4113671051675267, "grad_norm": 3.544579029083252, "learning_rate": 4.110763863652882e-05, "loss": 0.2349, "step": 11320 }, { "epoch": 0.4117305036703249, "grad_norm": 4.25554895401001, "learning_rate": 4.1143978486808636e-05, "loss": 0.5458, "step": 11330 }, { "epoch": 0.41209390217312303, "grad_norm": 3.069894313812256, "learning_rate": 4.1180318337088456e-05, "loss": 0.2131, "step": 11340 }, { "epoch": 0.41245730067592123, "grad_norm": 5.389547348022461, "learning_rate": 4.121665818736827e-05, "loss": 0.2895, "step": 11350 }, { "epoch": 0.4128206991787194, "grad_norm": 2.308717727661133, "learning_rate": 4.125299803764809e-05, "loss": 0.209, "step": 11360 }, { "epoch": 0.41318409768151754, "grad_norm": 2.071504831314087, "learning_rate": 4.12893378879279e-05, "loss": 0.225, "step": 11370 }, { "epoch": 0.41354749618431574, "grad_norm": 10.397724151611328, "learning_rate": 4.132567773820772e-05, "loss": 0.5041, "step": 11380 }, { "epoch": 0.4139108946871139, "grad_norm": 3.3916842937469482, "learning_rate": 4.136201758848754e-05, "loss": 0.2055, "step": 11390 }, { "epoch": 0.41427429318991205, "grad_norm": 27.703519821166992, "learning_rate": 4.139835743876735e-05, "loss": 0.3002, "step": 11400 }, { "epoch": 0.41427429318991205, "eval_loss": 0.40216270089149475, "eval_runtime": 180.385, "eval_samples_per_second": 41.101, "eval_steps_per_second": 5.139, "eval_wer": 0.22132263510447112, "step": 11400 }, { "epoch": 0.41463769169271025, "grad_norm": 3.024658203125, "learning_rate": 4.143469728904717e-05, "loss": 0.168, "step": 11410 }, { "epoch": 0.4150010901955084, "grad_norm": 2.899369478225708, "learning_rate": 4.147103713932699e-05, "loss": 3.0252, "step": 11420 }, { "epoch": 0.41536448869830656, "grad_norm": 3.960700511932373, "learning_rate": 4.15073769896068e-05, "loss": 0.1972, "step": 11430 }, { "epoch": 0.4157278872011047, "grad_norm": 1.542468786239624, "learning_rate": 4.154371683988662e-05, "loss": 0.1971, "step": 11440 }, { "epoch": 0.4160912857039029, "grad_norm": 16.871423721313477, "learning_rate": 4.158005669016644e-05, "loss": 0.2768, "step": 11450 }, { "epoch": 0.41645468420670106, "grad_norm": 3.142385721206665, "learning_rate": 4.1616396540446254e-05, "loss": 0.2173, "step": 11460 }, { "epoch": 0.4168180827094992, "grad_norm": 0.9852932095527649, "learning_rate": 4.1652736390726074e-05, "loss": 0.2529, "step": 11470 }, { "epoch": 0.4171814812122974, "grad_norm": 2.4834413528442383, "learning_rate": 4.1689076241005886e-05, "loss": 0.9175, "step": 11480 }, { "epoch": 0.4175448797150956, "grad_norm": 2.7286272048950195, "learning_rate": 4.1725416091285705e-05, "loss": 0.1864, "step": 11490 }, { "epoch": 0.4179082782178937, "grad_norm": 5.711360454559326, "learning_rate": 4.1761755941565525e-05, "loss": 0.2285, "step": 11500 }, { "epoch": 0.41827167672069193, "grad_norm": 1.160866379737854, "learning_rate": 4.179809579184534e-05, "loss": 0.1959, "step": 11510 }, { "epoch": 0.4186350752234901, "grad_norm": 2.5051305294036865, "learning_rate": 4.183443564212516e-05, "loss": 0.185, "step": 11520 }, { "epoch": 0.41899847372628823, "grad_norm": 3.641874313354492, "learning_rate": 4.1870775492404976e-05, "loss": 0.199, "step": 11530 }, { "epoch": 0.41936187222908644, "grad_norm": 1.676038146018982, "learning_rate": 4.190711534268479e-05, "loss": 0.1895, "step": 11540 }, { "epoch": 0.4197252707318846, "grad_norm": 11.47658634185791, "learning_rate": 4.194345519296461e-05, "loss": 0.246, "step": 11550 }, { "epoch": 0.42008866923468274, "grad_norm": 1.7632570266723633, "learning_rate": 4.197979504324443e-05, "loss": 0.1761, "step": 11560 }, { "epoch": 0.4204520677374809, "grad_norm": 2.2994728088378906, "learning_rate": 4.201613489352424e-05, "loss": 0.1799, "step": 11570 }, { "epoch": 0.4208154662402791, "grad_norm": 3.964228391647339, "learning_rate": 4.205247474380406e-05, "loss": 0.7376, "step": 11580 }, { "epoch": 0.42117886474307725, "grad_norm": 1.866466760635376, "learning_rate": 4.208881459408387e-05, "loss": 0.2293, "step": 11590 }, { "epoch": 0.4215422632458754, "grad_norm": 4.722428798675537, "learning_rate": 4.212515444436369e-05, "loss": 0.2303, "step": 11600 }, { "epoch": 0.4219056617486736, "grad_norm": 2.8812968730926514, "learning_rate": 4.2161494294643504e-05, "loss": 0.1628, "step": 11610 }, { "epoch": 0.42226906025147176, "grad_norm": 8.05451488494873, "learning_rate": 4.219783414492332e-05, "loss": 0.1978, "step": 11620 }, { "epoch": 0.4226324587542699, "grad_norm": 3.4176700115203857, "learning_rate": 4.223417399520314e-05, "loss": 0.1986, "step": 11630 }, { "epoch": 0.4229958572570681, "grad_norm": 5.204764366149902, "learning_rate": 4.2270513845482955e-05, "loss": 0.1959, "step": 11640 }, { "epoch": 0.42335925575986627, "grad_norm": 6.184700965881348, "learning_rate": 4.2306853695762774e-05, "loss": 0.2822, "step": 11650 }, { "epoch": 0.4237226542626644, "grad_norm": 2.288935422897339, "learning_rate": 4.2343193546042594e-05, "loss": 0.2073, "step": 11660 }, { "epoch": 0.42408605276546263, "grad_norm": 3.8856844902038574, "learning_rate": 4.2379533396322406e-05, "loss": 0.2134, "step": 11670 }, { "epoch": 0.4244494512682608, "grad_norm": 4.048069953918457, "learning_rate": 4.2415873246602226e-05, "loss": 0.1922, "step": 11680 }, { "epoch": 0.42481284977105893, "grad_norm": 1.466927409172058, "learning_rate": 4.2452213096882045e-05, "loss": 0.1653, "step": 11690 }, { "epoch": 0.42517624827385714, "grad_norm": 35.94015121459961, "learning_rate": 4.248855294716186e-05, "loss": 0.2398, "step": 11700 }, { "epoch": 0.4255396467766553, "grad_norm": 2.575195789337158, "learning_rate": 4.252489279744168e-05, "loss": 0.2241, "step": 11710 }, { "epoch": 0.42590304527945344, "grad_norm": 1.4232568740844727, "learning_rate": 4.2561232647721496e-05, "loss": 0.1817, "step": 11720 }, { "epoch": 0.4262664437822516, "grad_norm": 2.8543412685394287, "learning_rate": 4.259757249800131e-05, "loss": 0.2094, "step": 11730 }, { "epoch": 0.4266298422850498, "grad_norm": 0.85033118724823, "learning_rate": 4.263391234828113e-05, "loss": 0.1578, "step": 11740 }, { "epoch": 0.42699324078784795, "grad_norm": 7.382369041442871, "learning_rate": 4.267025219856095e-05, "loss": 0.2763, "step": 11750 }, { "epoch": 0.4273566392906461, "grad_norm": 1.3994635343551636, "learning_rate": 4.270659204884076e-05, "loss": 0.199, "step": 11760 }, { "epoch": 0.4277200377934443, "grad_norm": 1.4978888034820557, "learning_rate": 4.274293189912057e-05, "loss": 0.1804, "step": 11770 }, { "epoch": 0.42808343629624246, "grad_norm": 5.206210136413574, "learning_rate": 4.27792717494004e-05, "loss": 0.2483, "step": 11780 }, { "epoch": 0.4284468347990406, "grad_norm": 1.4130820035934448, "learning_rate": 4.281561159968021e-05, "loss": 0.1792, "step": 11790 }, { "epoch": 0.4288102333018388, "grad_norm": 2.60227370262146, "learning_rate": 4.2851951449960024e-05, "loss": 0.214, "step": 11800 }, { "epoch": 0.42917363180463697, "grad_norm": 1.8874465227127075, "learning_rate": 4.288829130023985e-05, "loss": 0.1894, "step": 11810 }, { "epoch": 0.4295370303074351, "grad_norm": 2.921766519546509, "learning_rate": 4.292463115051966e-05, "loss": 0.1608, "step": 11820 }, { "epoch": 0.4299004288102333, "grad_norm": 2.812821626663208, "learning_rate": 4.2960971000799475e-05, "loss": 0.2381, "step": 11830 }, { "epoch": 0.4302638273130315, "grad_norm": 1.8063637018203735, "learning_rate": 4.2997310851079295e-05, "loss": 0.198, "step": 11840 }, { "epoch": 0.43062722581582963, "grad_norm": 16.433927536010742, "learning_rate": 4.3033650701359114e-05, "loss": 0.3015, "step": 11850 }, { "epoch": 0.4309906243186278, "grad_norm": 1.295142650604248, "learning_rate": 4.3069990551638927e-05, "loss": 0.1678, "step": 11860 }, { "epoch": 0.431354022821426, "grad_norm": 135.4871063232422, "learning_rate": 4.3106330401918746e-05, "loss": 1.8542, "step": 11870 }, { "epoch": 0.43171742132422414, "grad_norm": 2.3314764499664307, "learning_rate": 4.3142670252198565e-05, "loss": 0.1983, "step": 11880 }, { "epoch": 0.4320808198270223, "grad_norm": 1.6635117530822754, "learning_rate": 4.317901010247838e-05, "loss": 0.1737, "step": 11890 }, { "epoch": 0.4324442183298205, "grad_norm": 32.102664947509766, "learning_rate": 4.32153499527582e-05, "loss": 0.3092, "step": 11900 }, { "epoch": 0.43280761683261865, "grad_norm": 2.3491451740264893, "learning_rate": 4.3251689803038017e-05, "loss": 0.1849, "step": 11910 }, { "epoch": 0.4331710153354168, "grad_norm": 3.8088629245758057, "learning_rate": 4.328802965331783e-05, "loss": 0.2023, "step": 11920 }, { "epoch": 0.433534413838215, "grad_norm": 2.7132246494293213, "learning_rate": 4.332436950359764e-05, "loss": 0.1935, "step": 11930 }, { "epoch": 0.43389781234101316, "grad_norm": 1.2917368412017822, "learning_rate": 4.336070935387747e-05, "loss": 0.1918, "step": 11940 }, { "epoch": 0.4342612108438113, "grad_norm": 9.690601348876953, "learning_rate": 4.339704920415728e-05, "loss": 0.3059, "step": 11950 }, { "epoch": 0.4346246093466095, "grad_norm": 1.2652380466461182, "learning_rate": 4.343338905443709e-05, "loss": 0.1587, "step": 11960 }, { "epoch": 0.43498800784940767, "grad_norm": 0.9622058272361755, "learning_rate": 4.346972890471692e-05, "loss": 0.1755, "step": 11970 }, { "epoch": 0.4353514063522058, "grad_norm": 5.316989898681641, "learning_rate": 4.350606875499673e-05, "loss": 0.1794, "step": 11980 }, { "epoch": 0.435714804855004, "grad_norm": 3.428891181945801, "learning_rate": 4.3542408605276544e-05, "loss": 0.2105, "step": 11990 }, { "epoch": 0.4360782033578022, "grad_norm": 12.879768371582031, "learning_rate": 4.357874845555637e-05, "loss": 0.2904, "step": 12000 }, { "epoch": 0.4360782033578022, "eval_loss": 0.3918191194534302, "eval_runtime": 180.0676, "eval_samples_per_second": 41.173, "eval_steps_per_second": 5.148, "eval_wer": 0.22659611160527893, "step": 12000 }, { "epoch": 0.4364416018606003, "grad_norm": 2.0471973419189453, "learning_rate": 4.361508830583618e-05, "loss": 0.2544, "step": 12010 }, { "epoch": 0.4368050003633985, "grad_norm": 1.3883107900619507, "learning_rate": 4.3651428156115995e-05, "loss": 0.1957, "step": 12020 }, { "epoch": 0.4371683988661967, "grad_norm": 1.786475419998169, "learning_rate": 4.368776800639582e-05, "loss": 0.1732, "step": 12030 }, { "epoch": 0.43753179736899483, "grad_norm": 3.3099594116210938, "learning_rate": 4.3724107856675634e-05, "loss": 0.1871, "step": 12040 }, { "epoch": 0.437895195871793, "grad_norm": 9.09699535369873, "learning_rate": 4.376044770695545e-05, "loss": 0.2745, "step": 12050 }, { "epoch": 0.4382585943745912, "grad_norm": 2.0993807315826416, "learning_rate": 4.3796787557235266e-05, "loss": 0.2076, "step": 12060 }, { "epoch": 0.43862199287738934, "grad_norm": 27.799428939819336, "learning_rate": 4.3833127407515085e-05, "loss": 0.548, "step": 12070 }, { "epoch": 0.4389853913801875, "grad_norm": 3.8897557258605957, "learning_rate": 4.38694672577949e-05, "loss": 0.1799, "step": 12080 }, { "epoch": 0.4393487898829857, "grad_norm": 3.4620189666748047, "learning_rate": 4.390580710807472e-05, "loss": 0.1735, "step": 12090 }, { "epoch": 0.43971218838578385, "grad_norm": 9.587783813476562, "learning_rate": 4.394214695835454e-05, "loss": 0.3344, "step": 12100 }, { "epoch": 0.440075586888582, "grad_norm": 1.2581641674041748, "learning_rate": 4.397848680863435e-05, "loss": 0.1863, "step": 12110 }, { "epoch": 0.4404389853913802, "grad_norm": 1.3624401092529297, "learning_rate": 4.401482665891417e-05, "loss": 3.7692, "step": 12120 }, { "epoch": 0.44080238389417836, "grad_norm": 2.0099213123321533, "learning_rate": 4.405116650919399e-05, "loss": 0.1999, "step": 12130 }, { "epoch": 0.4411657823969765, "grad_norm": 2.7499871253967285, "learning_rate": 4.40875063594738e-05, "loss": 0.1854, "step": 12140 }, { "epoch": 0.4415291808997747, "grad_norm": 6.473042964935303, "learning_rate": 4.412384620975361e-05, "loss": 0.2843, "step": 12150 }, { "epoch": 0.44189257940257287, "grad_norm": 3.845900535583496, "learning_rate": 4.416018606003344e-05, "loss": 0.1747, "step": 12160 }, { "epoch": 0.442255977905371, "grad_norm": 1.4052759408950806, "learning_rate": 4.419652591031325e-05, "loss": 0.16, "step": 12170 }, { "epoch": 0.4426193764081692, "grad_norm": 3.5824673175811768, "learning_rate": 4.4232865760593064e-05, "loss": 0.7205, "step": 12180 }, { "epoch": 0.4429827749109674, "grad_norm": 1.237358570098877, "learning_rate": 4.426920561087289e-05, "loss": 0.2043, "step": 12190 }, { "epoch": 0.44334617341376553, "grad_norm": 11.106649398803711, "learning_rate": 4.43055454611527e-05, "loss": 0.2537, "step": 12200 }, { "epoch": 0.4437095719165637, "grad_norm": 1.4566165208816528, "learning_rate": 4.4341885311432516e-05, "loss": 0.174, "step": 12210 }, { "epoch": 0.4440729704193619, "grad_norm": 1.4067914485931396, "learning_rate": 4.4378225161712335e-05, "loss": 0.1672, "step": 12220 }, { "epoch": 0.44443636892216004, "grad_norm": 3.1289005279541016, "learning_rate": 4.4414565011992154e-05, "loss": 0.2459, "step": 12230 }, { "epoch": 0.4447997674249582, "grad_norm": 1.2487775087356567, "learning_rate": 4.445090486227197e-05, "loss": 0.1911, "step": 12240 }, { "epoch": 0.4451631659277564, "grad_norm": 4.373108863830566, "learning_rate": 4.4487244712551786e-05, "loss": 0.265, "step": 12250 }, { "epoch": 0.44552656443055455, "grad_norm": 3.0927655696868896, "learning_rate": 4.4523584562831606e-05, "loss": 0.166, "step": 12260 }, { "epoch": 0.4458899629333527, "grad_norm": 1.4012075662612915, "learning_rate": 4.455992441311142e-05, "loss": 0.1631, "step": 12270 }, { "epoch": 0.4462533614361509, "grad_norm": 3.9944920539855957, "learning_rate": 4.459626426339124e-05, "loss": 0.2616, "step": 12280 }, { "epoch": 0.44661675993894906, "grad_norm": 2.412261962890625, "learning_rate": 4.463260411367106e-05, "loss": 0.1963, "step": 12290 }, { "epoch": 0.4469801584417472, "grad_norm": 8.601739883422852, "learning_rate": 4.466894396395087e-05, "loss": 0.3057, "step": 12300 }, { "epoch": 0.44734355694454536, "grad_norm": 2.1279587745666504, "learning_rate": 4.470528381423069e-05, "loss": 0.1931, "step": 12310 }, { "epoch": 0.44770695544734357, "grad_norm": 2.465534210205078, "learning_rate": 4.474162366451051e-05, "loss": 0.1701, "step": 12320 }, { "epoch": 0.4480703539501417, "grad_norm": 6.147269248962402, "learning_rate": 4.477796351479032e-05, "loss": 0.7176, "step": 12330 }, { "epoch": 0.44843375245293987, "grad_norm": 1.6242046356201172, "learning_rate": 4.481430336507014e-05, "loss": 0.1769, "step": 12340 }, { "epoch": 0.4487971509557381, "grad_norm": 7.065566539764404, "learning_rate": 4.485064321534995e-05, "loss": 0.2967, "step": 12350 }, { "epoch": 0.44916054945853623, "grad_norm": 1.9389359951019287, "learning_rate": 4.488698306562977e-05, "loss": 0.1853, "step": 12360 }, { "epoch": 0.4495239479613344, "grad_norm": 1.011250376701355, "learning_rate": 4.492332291590959e-05, "loss": 0.2036, "step": 12370 }, { "epoch": 0.4498873464641326, "grad_norm": 2.459062099456787, "learning_rate": 4.4959662766189404e-05, "loss": 0.1865, "step": 12380 }, { "epoch": 0.45025074496693074, "grad_norm": 1.8472875356674194, "learning_rate": 4.499600261646922e-05, "loss": 0.2178, "step": 12390 }, { "epoch": 0.4506141434697289, "grad_norm": 40.6389045715332, "learning_rate": 4.5032342466749036e-05, "loss": 0.2506, "step": 12400 }, { "epoch": 0.4509775419725271, "grad_norm": 3.9729344844818115, "learning_rate": 4.5068682317028855e-05, "loss": 0.1917, "step": 12410 }, { "epoch": 0.45134094047532525, "grad_norm": 1.0262936353683472, "learning_rate": 4.5105022167308675e-05, "loss": 0.2115, "step": 12420 }, { "epoch": 0.4517043389781234, "grad_norm": 1.5356003046035767, "learning_rate": 4.514136201758849e-05, "loss": 0.1907, "step": 12430 }, { "epoch": 0.4520677374809216, "grad_norm": 1.3107296228408813, "learning_rate": 4.5177701867868307e-05, "loss": 0.195, "step": 12440 }, { "epoch": 0.45243113598371976, "grad_norm": 11.025674819946289, "learning_rate": 4.5214041718148126e-05, "loss": 0.2794, "step": 12450 }, { "epoch": 0.4527945344865179, "grad_norm": 1.8793771266937256, "learning_rate": 4.525038156842794e-05, "loss": 0.2143, "step": 12460 }, { "epoch": 0.45315793298931606, "grad_norm": 1.6508142948150635, "learning_rate": 4.528672141870776e-05, "loss": 0.1863, "step": 12470 }, { "epoch": 0.45352133149211427, "grad_norm": 4.942420959472656, "learning_rate": 4.532306126898758e-05, "loss": 0.1997, "step": 12480 }, { "epoch": 0.4538847299949124, "grad_norm": 3.1977925300598145, "learning_rate": 4.535940111926739e-05, "loss": 1.9163, "step": 12490 }, { "epoch": 0.45424812849771057, "grad_norm": 8.74572467803955, "learning_rate": 4.539574096954721e-05, "loss": 0.3186, "step": 12500 }, { "epoch": 0.4546115270005088, "grad_norm": 1.5346311330795288, "learning_rate": 4.543208081982702e-05, "loss": 0.1958, "step": 12510 }, { "epoch": 0.4549749255033069, "grad_norm": 1.622859239578247, "learning_rate": 4.546842067010684e-05, "loss": 0.1828, "step": 12520 }, { "epoch": 0.4553383240061051, "grad_norm": 1.9394720792770386, "learning_rate": 4.550476052038666e-05, "loss": 0.2198, "step": 12530 }, { "epoch": 0.4557017225089033, "grad_norm": 1.8405578136444092, "learning_rate": 4.554110037066647e-05, "loss": 0.1789, "step": 12540 }, { "epoch": 0.45606512101170144, "grad_norm": 6.24867582321167, "learning_rate": 4.557744022094629e-05, "loss": 0.2593, "step": 12550 }, { "epoch": 0.4564285195144996, "grad_norm": 1.6062959432601929, "learning_rate": 4.561378007122611e-05, "loss": 0.1665, "step": 12560 }, { "epoch": 0.4567919180172978, "grad_norm": 1.1478540897369385, "learning_rate": 4.5650119921505924e-05, "loss": 0.1942, "step": 12570 }, { "epoch": 0.45715531652009594, "grad_norm": 2.0299808979034424, "learning_rate": 4.5686459771785744e-05, "loss": 0.2092, "step": 12580 }, { "epoch": 0.4575187150228941, "grad_norm": 1.6643180847167969, "learning_rate": 4.572279962206556e-05, "loss": 0.1714, "step": 12590 }, { "epoch": 0.45788211352569225, "grad_norm": 10.169012069702148, "learning_rate": 4.5759139472345376e-05, "loss": 0.3101, "step": 12600 }, { "epoch": 0.45788211352569225, "eval_loss": 0.408176064491272, "eval_runtime": 179.7843, "eval_samples_per_second": 41.238, "eval_steps_per_second": 5.156, "eval_wer": 0.24004756113057527, "step": 12600 }, { "epoch": 0.45824551202849045, "grad_norm": 1.8151092529296875, "learning_rate": 4.5795479322625195e-05, "loss": 0.1739, "step": 12610 }, { "epoch": 0.4586089105312886, "grad_norm": 1.1606543064117432, "learning_rate": 4.583181917290501e-05, "loss": 0.1781, "step": 12620 }, { "epoch": 0.45897230903408676, "grad_norm": 2.5139431953430176, "learning_rate": 4.586815902318483e-05, "loss": 0.2101, "step": 12630 }, { "epoch": 0.45933570753688496, "grad_norm": 3.1557183265686035, "learning_rate": 4.590449887346464e-05, "loss": 0.1925, "step": 12640 }, { "epoch": 0.4596991060396831, "grad_norm": 13.978137016296387, "learning_rate": 4.594083872374446e-05, "loss": 0.3085, "step": 12650 }, { "epoch": 0.46006250454248127, "grad_norm": 1.5187938213348389, "learning_rate": 4.597717857402428e-05, "loss": 0.1909, "step": 12660 }, { "epoch": 0.46042590304527947, "grad_norm": 1.661890983581543, "learning_rate": 4.601351842430409e-05, "loss": 0.1729, "step": 12670 }, { "epoch": 0.4607893015480776, "grad_norm": 5.693175792694092, "learning_rate": 4.604985827458391e-05, "loss": 0.2069, "step": 12680 }, { "epoch": 0.4611527000508758, "grad_norm": 2.5228755474090576, "learning_rate": 4.608619812486373e-05, "loss": 0.1899, "step": 12690 }, { "epoch": 0.461516098553674, "grad_norm": 12.629317283630371, "learning_rate": 4.612253797514354e-05, "loss": 0.2441, "step": 12700 }, { "epoch": 0.46187949705647213, "grad_norm": 1.5003726482391357, "learning_rate": 4.615887782542336e-05, "loss": 0.1845, "step": 12710 }, { "epoch": 0.4622428955592703, "grad_norm": 1.596705675125122, "learning_rate": 4.619521767570318e-05, "loss": 0.1942, "step": 12720 }, { "epoch": 0.4626062940620685, "grad_norm": 4.299325466156006, "learning_rate": 4.623155752598299e-05, "loss": 0.1881, "step": 12730 }, { "epoch": 0.46296969256486664, "grad_norm": 2.242932081222534, "learning_rate": 4.626789737626281e-05, "loss": 0.1655, "step": 12740 }, { "epoch": 0.4633330910676648, "grad_norm": 17.353313446044922, "learning_rate": 4.630423722654263e-05, "loss": 0.3002, "step": 12750 }, { "epoch": 0.46369648957046294, "grad_norm": 1.8967528343200684, "learning_rate": 4.6340577076822444e-05, "loss": 0.1967, "step": 12760 }, { "epoch": 0.46405988807326115, "grad_norm": 1.9839125871658325, "learning_rate": 4.6376916927102264e-05, "loss": 0.1582, "step": 12770 }, { "epoch": 0.4644232865760593, "grad_norm": 1.8139293193817139, "learning_rate": 4.641325677738208e-05, "loss": 0.2527, "step": 12780 }, { "epoch": 0.46478668507885745, "grad_norm": 1.6944659948349, "learning_rate": 4.6449596627661896e-05, "loss": 0.1656, "step": 12790 }, { "epoch": 0.46515008358165566, "grad_norm": 3.7842020988464355, "learning_rate": 4.648593647794171e-05, "loss": 0.2375, "step": 12800 }, { "epoch": 0.4655134820844538, "grad_norm": 1.8103773593902588, "learning_rate": 4.6522276328221534e-05, "loss": 0.1932, "step": 12810 }, { "epoch": 0.46587688058725196, "grad_norm": 1.4419440031051636, "learning_rate": 4.655861617850135e-05, "loss": 0.1808, "step": 12820 }, { "epoch": 0.46624027909005017, "grad_norm": 6.361825466156006, "learning_rate": 4.659495602878116e-05, "loss": 0.2105, "step": 12830 }, { "epoch": 0.4666036775928483, "grad_norm": 1.4687098264694214, "learning_rate": 4.663129587906098e-05, "loss": 0.18, "step": 12840 }, { "epoch": 0.46696707609564647, "grad_norm": 14.758776664733887, "learning_rate": 4.66676357293408e-05, "loss": 0.3001, "step": 12850 }, { "epoch": 0.4673304745984447, "grad_norm": 1.4836699962615967, "learning_rate": 4.670397557962061e-05, "loss": 0.1713, "step": 12860 }, { "epoch": 0.46769387310124283, "grad_norm": 4.860133171081543, "learning_rate": 4.674031542990043e-05, "loss": 0.1791, "step": 12870 }, { "epoch": 0.468057271604041, "grad_norm": 1.9861228466033936, "learning_rate": 4.677665528018025e-05, "loss": 0.2029, "step": 12880 }, { "epoch": 0.46842067010683913, "grad_norm": 1.9190025329589844, "learning_rate": 4.681299513046006e-05, "loss": 0.1611, "step": 12890 }, { "epoch": 0.46878406860963734, "grad_norm": 4.6381516456604, "learning_rate": 4.684933498073988e-05, "loss": 0.2646, "step": 12900 }, { "epoch": 0.4691474671124355, "grad_norm": 1.2092620134353638, "learning_rate": 4.68856748310197e-05, "loss": 0.1865, "step": 12910 }, { "epoch": 0.46951086561523364, "grad_norm": 2.7816121578216553, "learning_rate": 4.6922014681299513e-05, "loss": 0.2047, "step": 12920 }, { "epoch": 0.46987426411803185, "grad_norm": 0.629324734210968, "learning_rate": 4.6958354531579326e-05, "loss": 0.2404, "step": 12930 }, { "epoch": 0.47023766262083, "grad_norm": 4.156667232513428, "learning_rate": 4.699469438185915e-05, "loss": 0.1604, "step": 12940 }, { "epoch": 0.47060106112362815, "grad_norm": 1.8534492254257202, "learning_rate": 4.7031034232138965e-05, "loss": 0.2364, "step": 12950 }, { "epoch": 0.47096445962642636, "grad_norm": 1.382408857345581, "learning_rate": 4.706737408241878e-05, "loss": 0.2078, "step": 12960 }, { "epoch": 0.4713278581292245, "grad_norm": 2.499023914337158, "learning_rate": 4.7103713932698603e-05, "loss": 0.1935, "step": 12970 }, { "epoch": 0.47169125663202266, "grad_norm": 2.726032257080078, "learning_rate": 4.7140053782978416e-05, "loss": 0.2143, "step": 12980 }, { "epoch": 0.47205465513482087, "grad_norm": 2.1388118267059326, "learning_rate": 4.717639363325823e-05, "loss": 0.1704, "step": 12990 }, { "epoch": 0.472418053637619, "grad_norm": 5.408501148223877, "learning_rate": 4.7212733483538055e-05, "loss": 0.2492, "step": 13000 }, { "epoch": 0.47278145214041717, "grad_norm": 1.8640841245651245, "learning_rate": 4.724907333381787e-05, "loss": 0.1958, "step": 13010 }, { "epoch": 0.4731448506432154, "grad_norm": 1.4251651763916016, "learning_rate": 4.728541318409768e-05, "loss": 0.1969, "step": 13020 }, { "epoch": 0.4735082491460135, "grad_norm": 2.2603137493133545, "learning_rate": 4.7321753034377506e-05, "loss": 0.1879, "step": 13030 }, { "epoch": 0.4738716476488117, "grad_norm": 1.7813081741333008, "learning_rate": 4.735809288465732e-05, "loss": 0.1627, "step": 13040 }, { "epoch": 0.47423504615160983, "grad_norm": 16.746126174926758, "learning_rate": 4.739443273493713e-05, "loss": 0.3058, "step": 13050 }, { "epoch": 0.47459844465440804, "grad_norm": 2.56193470954895, "learning_rate": 4.743077258521695e-05, "loss": 0.1729, "step": 13060 }, { "epoch": 0.4749618431572062, "grad_norm": 2.1787185668945312, "learning_rate": 4.746711243549677e-05, "loss": 0.1804, "step": 13070 }, { "epoch": 0.47532524166000434, "grad_norm": 3.385338544845581, "learning_rate": 4.750345228577658e-05, "loss": 0.1884, "step": 13080 }, { "epoch": 0.47568864016280255, "grad_norm": 2.48083233833313, "learning_rate": 4.75397921360564e-05, "loss": 0.1728, "step": 13090 }, { "epoch": 0.4760520386656007, "grad_norm": 47.18072509765625, "learning_rate": 4.757613198633622e-05, "loss": 0.2427, "step": 13100 }, { "epoch": 0.47641543716839885, "grad_norm": 1.3267533779144287, "learning_rate": 4.7612471836616034e-05, "loss": 0.1847, "step": 13110 }, { "epoch": 0.47677883567119705, "grad_norm": 2.098389148712158, "learning_rate": 4.764881168689585e-05, "loss": 0.1682, "step": 13120 }, { "epoch": 0.4771422341739952, "grad_norm": 1.1197071075439453, "learning_rate": 4.768515153717567e-05, "loss": 0.166, "step": 13130 }, { "epoch": 0.47750563267679336, "grad_norm": 1.431281328201294, "learning_rate": 4.7721491387455485e-05, "loss": 0.3262, "step": 13140 }, { "epoch": 0.47786903117959156, "grad_norm": 15.357772827148438, "learning_rate": 4.7757831237735304e-05, "loss": 0.2906, "step": 13150 }, { "epoch": 0.4782324296823897, "grad_norm": 3.03275465965271, "learning_rate": 4.7794171088015124e-05, "loss": 0.207, "step": 13160 }, { "epoch": 0.47859582818518787, "grad_norm": 1.0988962650299072, "learning_rate": 4.7830510938294936e-05, "loss": 0.1788, "step": 13170 }, { "epoch": 0.4789592266879861, "grad_norm": 1.9456548690795898, "learning_rate": 4.786685078857475e-05, "loss": 0.2397, "step": 13180 }, { "epoch": 0.4793226251907842, "grad_norm": 1.7383311986923218, "learning_rate": 4.7903190638854575e-05, "loss": 0.1841, "step": 13190 }, { "epoch": 0.4796860236935824, "grad_norm": 5.512730121612549, "learning_rate": 4.793953048913439e-05, "loss": 0.2708, "step": 13200 }, { "epoch": 0.4796860236935824, "eval_loss": 0.3998795747756958, "eval_runtime": 180.9114, "eval_samples_per_second": 40.981, "eval_steps_per_second": 5.124, "eval_wer": 0.2369433804708915, "step": 13200 }, { "epoch": 0.4800494221963805, "grad_norm": 1.5843122005462646, "learning_rate": 4.79758703394142e-05, "loss": 1.0933, "step": 13210 }, { "epoch": 0.48041282069917873, "grad_norm": 1.4696934223175049, "learning_rate": 4.8012210189694026e-05, "loss": 0.1771, "step": 13220 }, { "epoch": 0.4807762192019769, "grad_norm": 2.5620357990264893, "learning_rate": 4.804855003997384e-05, "loss": 0.2202, "step": 13230 }, { "epoch": 0.48113961770477504, "grad_norm": 73.08427429199219, "learning_rate": 4.808488989025365e-05, "loss": 0.2471, "step": 13240 }, { "epoch": 0.48150301620757324, "grad_norm": 7.291989803314209, "learning_rate": 4.812122974053347e-05, "loss": 0.2542, "step": 13250 }, { "epoch": 0.4818664147103714, "grad_norm": 1.7582112550735474, "learning_rate": 4.815756959081329e-05, "loss": 0.1884, "step": 13260 }, { "epoch": 0.48222981321316954, "grad_norm": 0.9253680109977722, "learning_rate": 4.81939094410931e-05, "loss": 0.1797, "step": 13270 }, { "epoch": 0.48259321171596775, "grad_norm": 8.042390823364258, "learning_rate": 4.823024929137292e-05, "loss": 0.192, "step": 13280 }, { "epoch": 0.4829566102187659, "grad_norm": 3.2288219928741455, "learning_rate": 4.826658914165274e-05, "loss": 0.2041, "step": 13290 }, { "epoch": 0.48332000872156405, "grad_norm": 7.657989978790283, "learning_rate": 4.8302928991932554e-05, "loss": 0.3034, "step": 13300 }, { "epoch": 0.48368340722436226, "grad_norm": 2.9273271560668945, "learning_rate": 4.833926884221237e-05, "loss": 0.2028, "step": 13310 }, { "epoch": 0.4840468057271604, "grad_norm": 4.2344865798950195, "learning_rate": 4.837560869249219e-05, "loss": 0.1817, "step": 13320 }, { "epoch": 0.48441020422995856, "grad_norm": 4.074464797973633, "learning_rate": 4.8411948542772005e-05, "loss": 0.2197, "step": 13330 }, { "epoch": 0.4847736027327567, "grad_norm": 1.7070029973983765, "learning_rate": 4.8448288393051825e-05, "loss": 0.2374, "step": 13340 }, { "epoch": 0.4851370012355549, "grad_norm": 2.5278494358062744, "learning_rate": 4.8484628243331644e-05, "loss": 0.265, "step": 13350 }, { "epoch": 0.4855003997383531, "grad_norm": 1.4800697565078735, "learning_rate": 4.8520968093611456e-05, "loss": 0.1597, "step": 13360 }, { "epoch": 0.4858637982411512, "grad_norm": 1.238171935081482, "learning_rate": 4.8557307943891276e-05, "loss": 0.1862, "step": 13370 }, { "epoch": 0.48622719674394943, "grad_norm": 2.7711944580078125, "learning_rate": 4.859364779417109e-05, "loss": 0.1572, "step": 13380 }, { "epoch": 0.4865905952467476, "grad_norm": 2.386011838912964, "learning_rate": 4.862998764445091e-05, "loss": 0.1624, "step": 13390 }, { "epoch": 0.48695399374954573, "grad_norm": 10.38249397277832, "learning_rate": 4.866632749473072e-05, "loss": 0.2182, "step": 13400 }, { "epoch": 0.48731739225234394, "grad_norm": 1.1541043519973755, "learning_rate": 4.870266734501054e-05, "loss": 0.1867, "step": 13410 }, { "epoch": 0.4876807907551421, "grad_norm": 0.7680534720420837, "learning_rate": 4.873900719529036e-05, "loss": 0.1619, "step": 13420 }, { "epoch": 0.48804418925794024, "grad_norm": 2.6120142936706543, "learning_rate": 4.877534704557017e-05, "loss": 1.0657, "step": 13430 }, { "epoch": 0.48840758776073845, "grad_norm": 2.1559348106384277, "learning_rate": 4.881168689584999e-05, "loss": 0.1576, "step": 13440 }, { "epoch": 0.4887709862635366, "grad_norm": 8.222488403320312, "learning_rate": 4.884802674612981e-05, "loss": 0.2596, "step": 13450 }, { "epoch": 0.48913438476633475, "grad_norm": 1.7630010843276978, "learning_rate": 4.888436659640962e-05, "loss": 0.1755, "step": 13460 }, { "epoch": 0.48949778326913296, "grad_norm": 1.489050269126892, "learning_rate": 4.892070644668944e-05, "loss": 0.1844, "step": 13470 }, { "epoch": 0.4898611817719311, "grad_norm": 4.412111759185791, "learning_rate": 4.895704629696926e-05, "loss": 0.2114, "step": 13480 }, { "epoch": 0.49022458027472926, "grad_norm": 2.060366630554199, "learning_rate": 4.8993386147249074e-05, "loss": 0.1932, "step": 13490 }, { "epoch": 0.4905879787775274, "grad_norm": 9.488603591918945, "learning_rate": 4.9029725997528893e-05, "loss": 0.303, "step": 13500 }, { "epoch": 0.4909513772803256, "grad_norm": 2.295671224594116, "learning_rate": 4.906606584780871e-05, "loss": 0.1583, "step": 13510 }, { "epoch": 0.49131477578312377, "grad_norm": 4.13812255859375, "learning_rate": 4.9102405698088525e-05, "loss": 1.9041, "step": 13520 }, { "epoch": 0.4916781742859219, "grad_norm": 3.7411348819732666, "learning_rate": 4.9138745548368345e-05, "loss": 0.1927, "step": 13530 }, { "epoch": 0.4920415727887201, "grad_norm": 1.523505449295044, "learning_rate": 4.917508539864816e-05, "loss": 0.1721, "step": 13540 }, { "epoch": 0.4924049712915183, "grad_norm": 8.239662170410156, "learning_rate": 4.921142524892798e-05, "loss": 0.3205, "step": 13550 }, { "epoch": 0.49276836979431643, "grad_norm": 1.8316904306411743, "learning_rate": 4.9247765099207796e-05, "loss": 0.172, "step": 13560 }, { "epoch": 0.49313176829711464, "grad_norm": 4.627805233001709, "learning_rate": 4.928410494948761e-05, "loss": 0.1731, "step": 13570 }, { "epoch": 0.4934951667999128, "grad_norm": 4.277485370635986, "learning_rate": 4.932044479976743e-05, "loss": 0.2522, "step": 13580 }, { "epoch": 0.49385856530271094, "grad_norm": 2.131641149520874, "learning_rate": 4.935678465004725e-05, "loss": 0.1766, "step": 13590 }, { "epoch": 0.49422196380550915, "grad_norm": 2.9195988178253174, "learning_rate": 4.939312450032706e-05, "loss": 0.3745, "step": 13600 }, { "epoch": 0.4945853623083073, "grad_norm": 1.5876374244689941, "learning_rate": 4.942946435060688e-05, "loss": 0.1716, "step": 13610 }, { "epoch": 0.49494876081110545, "grad_norm": 4.506389617919922, "learning_rate": 4.946580420088669e-05, "loss": 0.5847, "step": 13620 }, { "epoch": 0.4953121593139036, "grad_norm": 3.497152090072632, "learning_rate": 4.950214405116651e-05, "loss": 0.2179, "step": 13630 }, { "epoch": 0.4956755578167018, "grad_norm": 1.7728289365768433, "learning_rate": 4.953848390144633e-05, "loss": 0.165, "step": 13640 }, { "epoch": 0.49603895631949996, "grad_norm": 12.01921558380127, "learning_rate": 4.957482375172614e-05, "loss": 0.2447, "step": 13650 }, { "epoch": 0.4964023548222981, "grad_norm": 2.5448553562164307, "learning_rate": 4.961116360200596e-05, "loss": 0.2089, "step": 13660 }, { "epoch": 0.4967657533250963, "grad_norm": 2.3643887042999268, "learning_rate": 4.9647503452285775e-05, "loss": 0.1724, "step": 13670 }, { "epoch": 0.49712915182789447, "grad_norm": 2.096191644668579, "learning_rate": 4.9683843302565594e-05, "loss": 0.1759, "step": 13680 }, { "epoch": 0.4974925503306926, "grad_norm": 0.9760168790817261, "learning_rate": 4.9720183152845414e-05, "loss": 0.1817, "step": 13690 }, { "epoch": 0.4978559488334908, "grad_norm": 3.019702434539795, "learning_rate": 4.9756523003125226e-05, "loss": 0.2275, "step": 13700 }, { "epoch": 0.498219347336289, "grad_norm": 1.0820231437683105, "learning_rate": 4.9789228868377064e-05, "loss": 2.5822, "step": 13710 }, { "epoch": 0.4985827458390871, "grad_norm": 3.2908883094787598, "learning_rate": 4.982556871865688e-05, "loss": 0.1898, "step": 13720 }, { "epoch": 0.49894614434188533, "grad_norm": 3.4303886890411377, "learning_rate": 4.98619085689367e-05, "loss": 0.2295, "step": 13730 }, { "epoch": 0.4993095428446835, "grad_norm": 1.8785525560379028, "learning_rate": 4.9898248419216515e-05, "loss": 0.1699, "step": 13740 }, { "epoch": 0.49967294134748164, "grad_norm": 7.539544105529785, "learning_rate": 4.993458826949633e-05, "loss": 0.2955, "step": 13750 }, { "epoch": 0.5000363398502798, "grad_norm": 1.6091630458831787, "learning_rate": 4.997092811977615e-05, "loss": 0.1696, "step": 13760 }, { "epoch": 0.5003997383530779, "grad_norm": 1.023695945739746, "learning_rate": 4.9999999978456776e-05, "loss": 0.1872, "step": 13770 }, { "epoch": 0.5007631368558761, "grad_norm": 8.364274978637695, "learning_rate": 4.999999922444405e-05, "loss": 0.1844, "step": 13780 }, { "epoch": 0.5011265353586744, "grad_norm": 1.7257829904556274, "learning_rate": 4.99999973932703e-05, "loss": 0.241, "step": 13790 }, { "epoch": 0.5014899338614724, "grad_norm": 7.256163597106934, "learning_rate": 4.999999448493561e-05, "loss": 0.2714, "step": 13800 }, { "epoch": 0.5014899338614724, "eval_loss": 0.4298999607563019, "eval_runtime": 179.7223, "eval_samples_per_second": 41.253, "eval_steps_per_second": 5.158, "eval_wer": 0.22517109299834806, "step": 13800 }, { "epoch": 0.5018533323642707, "grad_norm": 3.087979316711426, "learning_rate": 4.999999049944011e-05, "loss": 0.3094, "step": 13810 }, { "epoch": 0.5022167308670689, "grad_norm": 1.7626384496688843, "learning_rate": 4.999998543678397e-05, "loss": 0.2521, "step": 13820 }, { "epoch": 0.502580129369867, "grad_norm": 2.257432699203491, "learning_rate": 4.999997929696741e-05, "loss": 0.1913, "step": 13830 }, { "epoch": 0.5029435278726652, "grad_norm": 1.7763293981552124, "learning_rate": 4.999997207999069e-05, "loss": 0.1812, "step": 13840 }, { "epoch": 0.5033069263754634, "grad_norm": 8.228759765625, "learning_rate": 4.9999963785854124e-05, "loss": 0.2953, "step": 13850 }, { "epoch": 0.5036703248782615, "grad_norm": 1.200305461883545, "learning_rate": 4.999995441455807e-05, "loss": 0.3246, "step": 13860 }, { "epoch": 0.5040337233810597, "grad_norm": 1.9264732599258423, "learning_rate": 4.999994396610292e-05, "loss": 0.1749, "step": 13870 }, { "epoch": 0.5043971218838579, "grad_norm": 2.547212839126587, "learning_rate": 4.999993244048915e-05, "loss": 0.2714, "step": 13880 }, { "epoch": 0.504760520386656, "grad_norm": 2.7918379306793213, "learning_rate": 4.999991983771723e-05, "loss": 0.1984, "step": 13890 }, { "epoch": 0.5051239188894542, "grad_norm": 16.789764404296875, "learning_rate": 4.999990615778772e-05, "loss": 0.239, "step": 13900 }, { "epoch": 0.5054873173922524, "grad_norm": 1.1825790405273438, "learning_rate": 4.9999891400701205e-05, "loss": 0.1774, "step": 13910 }, { "epoch": 0.5058507158950505, "grad_norm": 2.1524746417999268, "learning_rate": 4.999987556645832e-05, "loss": 0.1956, "step": 13920 }, { "epoch": 0.5062141143978487, "grad_norm": 2.8159048557281494, "learning_rate": 4.999985865505974e-05, "loss": 0.2315, "step": 13930 }, { "epoch": 0.5065775129006469, "grad_norm": 1.7412035465240479, "learning_rate": 4.99998406665062e-05, "loss": 0.2106, "step": 13940 }, { "epoch": 0.506940911403445, "grad_norm": 61.967708587646484, "learning_rate": 4.999982160079848e-05, "loss": 0.3067, "step": 13950 }, { "epoch": 0.5073043099062432, "grad_norm": 2.378682851791382, "learning_rate": 4.9999801457937404e-05, "loss": 0.21, "step": 13960 }, { "epoch": 0.5076677084090414, "grad_norm": 1.3668854236602783, "learning_rate": 4.9999780237923824e-05, "loss": 0.1529, "step": 13970 }, { "epoch": 0.5080311069118395, "grad_norm": 2.2655959129333496, "learning_rate": 4.9999757940758665e-05, "loss": 0.1747, "step": 13980 }, { "epoch": 0.5083945054146377, "grad_norm": 1.5975615978240967, "learning_rate": 4.9999734566442877e-05, "loss": 0.1728, "step": 13990 }, { "epoch": 0.5087579039174358, "grad_norm": 9.869553565979004, "learning_rate": 4.999971011497748e-05, "loss": 0.3207, "step": 14000 }, { "epoch": 0.509121302420234, "grad_norm": 2.3095829486846924, "learning_rate": 4.999968458636353e-05, "loss": 0.1872, "step": 14010 }, { "epoch": 0.5094847009230322, "grad_norm": 2.059575080871582, "learning_rate": 4.999965798060212e-05, "loss": 0.3059, "step": 14020 }, { "epoch": 0.5098480994258303, "grad_norm": 1.7138803005218506, "learning_rate": 4.9999630297694395e-05, "loss": 0.2025, "step": 14030 }, { "epoch": 0.5102114979286285, "grad_norm": 2.831191062927246, "learning_rate": 4.999960153764155e-05, "loss": 0.1685, "step": 14040 }, { "epoch": 0.5105748964314267, "grad_norm": 15.457362174987793, "learning_rate": 4.999957170044482e-05, "loss": 0.2165, "step": 14050 }, { "epoch": 0.5109382949342248, "grad_norm": 3.923633337020874, "learning_rate": 4.999954078610549e-05, "loss": 0.1888, "step": 14060 }, { "epoch": 0.511301693437023, "grad_norm": 0.8243936896324158, "learning_rate": 4.999950879462491e-05, "loss": 0.295, "step": 14070 }, { "epoch": 0.5116650919398212, "grad_norm": 2.921447277069092, "learning_rate": 4.9999475726004434e-05, "loss": 0.3208, "step": 14080 }, { "epoch": 0.5120284904426193, "grad_norm": 0.9395463466644287, "learning_rate": 4.99994415802455e-05, "loss": 0.1936, "step": 14090 }, { "epoch": 0.5123918889454175, "grad_norm": 11.025691986083984, "learning_rate": 4.999940635734958e-05, "loss": 0.2581, "step": 14100 }, { "epoch": 0.5127552874482157, "grad_norm": 2.2102460861206055, "learning_rate": 4.999937005731818e-05, "loss": 0.1888, "step": 14110 }, { "epoch": 0.5131186859510138, "grad_norm": 1.6075447797775269, "learning_rate": 4.9999332680152876e-05, "loss": 0.1557, "step": 14120 }, { "epoch": 0.513482084453812, "grad_norm": 3.0174403190612793, "learning_rate": 4.999929422585528e-05, "loss": 0.2137, "step": 14130 }, { "epoch": 0.5138454829566103, "grad_norm": 3.2911272048950195, "learning_rate": 4.999925469442705e-05, "loss": 0.2249, "step": 14140 }, { "epoch": 0.5142088814594084, "grad_norm": 4.0001444816589355, "learning_rate": 4.999921408586986e-05, "loss": 0.2548, "step": 14150 }, { "epoch": 0.5145722799622066, "grad_norm": 2.7695538997650146, "learning_rate": 4.9999172400185504e-05, "loss": 0.2107, "step": 14160 }, { "epoch": 0.5149356784650048, "grad_norm": 1.420189380645752, "learning_rate": 4.999912963737574e-05, "loss": 0.1887, "step": 14170 }, { "epoch": 0.5152990769678029, "grad_norm": 1.4330711364746094, "learning_rate": 4.9999085797442434e-05, "loss": 0.2295, "step": 14180 }, { "epoch": 0.5156624754706011, "grad_norm": 1.9518648386001587, "learning_rate": 4.999904088038747e-05, "loss": 0.181, "step": 14190 }, { "epoch": 0.5160258739733993, "grad_norm": 9.763446807861328, "learning_rate": 4.999899488621278e-05, "loss": 0.2163, "step": 14200 }, { "epoch": 0.5163892724761974, "grad_norm": 1.63487708568573, "learning_rate": 4.999894781492035e-05, "loss": 0.1675, "step": 14210 }, { "epoch": 0.5167526709789956, "grad_norm": 1.3337619304656982, "learning_rate": 4.99988996665122e-05, "loss": 1.8258, "step": 14220 }, { "epoch": 0.5171160694817938, "grad_norm": 4.741299152374268, "learning_rate": 4.9998850440990414e-05, "loss": 0.199, "step": 14230 }, { "epoch": 0.5174794679845919, "grad_norm": 2.203994035720825, "learning_rate": 4.9998800138357106e-05, "loss": 0.1666, "step": 14240 }, { "epoch": 0.5178428664873901, "grad_norm": 9.144301414489746, "learning_rate": 4.999874875861444e-05, "loss": 0.2567, "step": 14250 }, { "epoch": 0.5182062649901883, "grad_norm": 1.432627558708191, "learning_rate": 4.9998696301764644e-05, "loss": 0.1842, "step": 14260 }, { "epoch": 0.5185696634929864, "grad_norm": 1.5303106307983398, "learning_rate": 4.999864276780998e-05, "loss": 0.1726, "step": 14270 }, { "epoch": 0.5189330619957846, "grad_norm": 13.468036651611328, "learning_rate": 4.999858815675273e-05, "loss": 0.1927, "step": 14280 }, { "epoch": 0.5192964604985827, "grad_norm": 3.7133965492248535, "learning_rate": 4.999853246859526e-05, "loss": 0.1822, "step": 14290 }, { "epoch": 0.5196598590013809, "grad_norm": 10.077652931213379, "learning_rate": 4.999847570333998e-05, "loss": 0.2847, "step": 14300 }, { "epoch": 0.5200232575041791, "grad_norm": 2.3906922340393066, "learning_rate": 4.9998417860989325e-05, "loss": 0.1962, "step": 14310 }, { "epoch": 0.5203866560069772, "grad_norm": 0.8041434288024902, "learning_rate": 4.999835894154579e-05, "loss": 0.1661, "step": 14320 }, { "epoch": 0.5207500545097754, "grad_norm": 4.1071953773498535, "learning_rate": 4.99982989450119e-05, "loss": 0.2012, "step": 14330 }, { "epoch": 0.5211134530125736, "grad_norm": 0.9645094871520996, "learning_rate": 4.999823787139026e-05, "loss": 0.209, "step": 14340 }, { "epoch": 0.5214768515153717, "grad_norm": 19.18789291381836, "learning_rate": 4.9998175720683506e-05, "loss": 0.3019, "step": 14350 }, { "epoch": 0.5218402500181699, "grad_norm": 1.6560392379760742, "learning_rate": 4.999811249289429e-05, "loss": 0.1696, "step": 14360 }, { "epoch": 0.5222036485209681, "grad_norm": 1.993741512298584, "learning_rate": 4.999804818802535e-05, "loss": 0.1895, "step": 14370 }, { "epoch": 0.5225670470237662, "grad_norm": 3.4508492946624756, "learning_rate": 4.999798280607947e-05, "loss": 0.2111, "step": 14380 }, { "epoch": 0.5229304455265644, "grad_norm": 8.431037902832031, "learning_rate": 4.999791634705944e-05, "loss": 0.1898, "step": 14390 }, { "epoch": 0.5232938440293626, "grad_norm": 10.659805297851562, "learning_rate": 4.9997848810968137e-05, "loss": 0.4744, "step": 14400 }, { "epoch": 0.5232938440293626, "eval_loss": 0.40915772318840027, "eval_runtime": 179.6286, "eval_samples_per_second": 41.274, "eval_steps_per_second": 5.161, "eval_wer": 0.2273222357361991, "step": 14400 }, { "epoch": 0.5236572425321607, "grad_norm": 1.676483392715454, "learning_rate": 4.999778019780849e-05, "loss": 0.1856, "step": 14410 }, { "epoch": 0.5240206410349589, "grad_norm": 3.4859771728515625, "learning_rate": 4.9997710507583414e-05, "loss": 0.1641, "step": 14420 }, { "epoch": 0.5243840395377571, "grad_norm": 2.583261251449585, "learning_rate": 4.999763974029595e-05, "loss": 0.2545, "step": 14430 }, { "epoch": 0.5247474380405552, "grad_norm": 2.0467324256896973, "learning_rate": 4.999756789594913e-05, "loss": 0.1974, "step": 14440 }, { "epoch": 0.5251108365433534, "grad_norm": 4.777310848236084, "learning_rate": 4.999749497454605e-05, "loss": 0.2653, "step": 14450 }, { "epoch": 0.5254742350461516, "grad_norm": 1.6312458515167236, "learning_rate": 4.999742097608984e-05, "loss": 0.1503, "step": 14460 }, { "epoch": 0.5258376335489497, "grad_norm": 1.1725629568099976, "learning_rate": 4.999734590058371e-05, "loss": 0.1636, "step": 14470 }, { "epoch": 0.526201032051748, "grad_norm": 3.2061386108398438, "learning_rate": 4.999726974803089e-05, "loss": 0.1988, "step": 14480 }, { "epoch": 0.5265644305545462, "grad_norm": 1.7078185081481934, "learning_rate": 4.9997192518434655e-05, "loss": 0.1763, "step": 14490 }, { "epoch": 0.5269278290573443, "grad_norm": 3.5756313800811768, "learning_rate": 4.999711421179833e-05, "loss": 0.2651, "step": 14500 }, { "epoch": 0.5272912275601425, "grad_norm": 1.8054040670394897, "learning_rate": 4.99970348281253e-05, "loss": 0.1923, "step": 14510 }, { "epoch": 0.5276546260629407, "grad_norm": 2.8949921131134033, "learning_rate": 4.9996954367418976e-05, "loss": 0.1897, "step": 14520 }, { "epoch": 0.5280180245657388, "grad_norm": 2.0020744800567627, "learning_rate": 4.9996872829682825e-05, "loss": 0.2469, "step": 14530 }, { "epoch": 0.528381423068537, "grad_norm": 1.1650570631027222, "learning_rate": 4.999679021492037e-05, "loss": 0.2088, "step": 14540 }, { "epoch": 0.5287448215713352, "grad_norm": 14.624237060546875, "learning_rate": 4.999670652313516e-05, "loss": 0.2918, "step": 14550 }, { "epoch": 0.5291082200741333, "grad_norm": 1.6658445596694946, "learning_rate": 4.99966217543308e-05, "loss": 0.1936, "step": 14560 }, { "epoch": 0.5294716185769315, "grad_norm": 2.0761842727661133, "learning_rate": 4.9996535908510955e-05, "loss": 0.2318, "step": 14570 }, { "epoch": 0.5298350170797297, "grad_norm": 2.475193977355957, "learning_rate": 4.999644898567931e-05, "loss": 0.1682, "step": 14580 }, { "epoch": 0.5301984155825278, "grad_norm": 1.7537975311279297, "learning_rate": 4.9996360985839616e-05, "loss": 0.1528, "step": 14590 }, { "epoch": 0.530561814085326, "grad_norm": 6.486474990844727, "learning_rate": 4.9996271908995666e-05, "loss": 0.2571, "step": 14600 }, { "epoch": 0.5309252125881241, "grad_norm": 2.308250665664673, "learning_rate": 4.9996181755151294e-05, "loss": 0.1764, "step": 14610 }, { "epoch": 0.5312886110909223, "grad_norm": 4.871829032897949, "learning_rate": 4.999609052431039e-05, "loss": 0.3045, "step": 14620 }, { "epoch": 0.5316520095937205, "grad_norm": 2.796844959259033, "learning_rate": 4.999599821647688e-05, "loss": 0.2102, "step": 14630 }, { "epoch": 0.5320154080965186, "grad_norm": 1.9681658744812012, "learning_rate": 4.999590483165475e-05, "loss": 0.1882, "step": 14640 }, { "epoch": 0.5323788065993168, "grad_norm": 5.858233451843262, "learning_rate": 4.9995810369848006e-05, "loss": 0.314, "step": 14650 }, { "epoch": 0.532742205102115, "grad_norm": 6.469663143157959, "learning_rate": 4.9995714831060736e-05, "loss": 0.2103, "step": 14660 }, { "epoch": 0.5331056036049131, "grad_norm": 1.8543453216552734, "learning_rate": 4.999561821529705e-05, "loss": 0.219, "step": 14670 }, { "epoch": 0.5334690021077113, "grad_norm": 2.222320318222046, "learning_rate": 4.99955205225611e-05, "loss": 0.1879, "step": 14680 }, { "epoch": 0.5338324006105095, "grad_norm": 5.018227577209473, "learning_rate": 4.999542175285711e-05, "loss": 0.1437, "step": 14690 }, { "epoch": 0.5341957991133076, "grad_norm": 6.225541114807129, "learning_rate": 4.999532190618933e-05, "loss": 0.268, "step": 14700 }, { "epoch": 0.5345591976161058, "grad_norm": 1.8122676610946655, "learning_rate": 4.999522098256206e-05, "loss": 0.1644, "step": 14710 }, { "epoch": 0.534922596118904, "grad_norm": 2.4057557582855225, "learning_rate": 4.999511898197966e-05, "loss": 0.1663, "step": 14720 }, { "epoch": 0.5352859946217021, "grad_norm": 1.756697416305542, "learning_rate": 4.9995015904446513e-05, "loss": 0.1771, "step": 14730 }, { "epoch": 0.5356493931245003, "grad_norm": 1.5457457304000854, "learning_rate": 4.999491174996706e-05, "loss": 0.1889, "step": 14740 }, { "epoch": 0.5360127916272985, "grad_norm": 3.108682155609131, "learning_rate": 4.999480651854579e-05, "loss": 0.2063, "step": 14750 }, { "epoch": 0.5363761901300966, "grad_norm": 2.2037875652313232, "learning_rate": 4.9994700210187246e-05, "loss": 0.1579, "step": 14760 }, { "epoch": 0.5367395886328948, "grad_norm": 1.2102454900741577, "learning_rate": 4.9994592824895994e-05, "loss": 0.2361, "step": 14770 }, { "epoch": 0.537102987135693, "grad_norm": 6.5722455978393555, "learning_rate": 4.999448436267667e-05, "loss": 0.2165, "step": 14780 }, { "epoch": 0.5374663856384911, "grad_norm": 1.606378197669983, "learning_rate": 4.999437482353395e-05, "loss": 0.1642, "step": 14790 }, { "epoch": 0.5378297841412893, "grad_norm": 24.709177017211914, "learning_rate": 4.999426420747255e-05, "loss": 0.2628, "step": 14800 }, { "epoch": 0.5381931826440876, "grad_norm": 2.543760299682617, "learning_rate": 4.999415251449723e-05, "loss": 0.1883, "step": 14810 }, { "epoch": 0.5385565811468856, "grad_norm": 2.0813279151916504, "learning_rate": 4.999403974461281e-05, "loss": 0.1842, "step": 14820 }, { "epoch": 0.5389199796496839, "grad_norm": 4.744104385375977, "learning_rate": 4.9993925897824144e-05, "loss": 0.1981, "step": 14830 }, { "epoch": 0.5392833781524821, "grad_norm": 3.2407493591308594, "learning_rate": 4.9993810974136146e-05, "loss": 0.2169, "step": 14840 }, { "epoch": 0.5396467766552802, "grad_norm": 13.33681869506836, "learning_rate": 4.999369497355375e-05, "loss": 0.2775, "step": 14850 }, { "epoch": 0.5400101751580784, "grad_norm": 2.3192784786224365, "learning_rate": 4.9993577896081975e-05, "loss": 0.1987, "step": 14860 }, { "epoch": 0.5403735736608766, "grad_norm": 1.6611911058425903, "learning_rate": 4.999345974172586e-05, "loss": 0.188, "step": 14870 }, { "epoch": 0.5407369721636747, "grad_norm": 4.368532180786133, "learning_rate": 4.9993340510490485e-05, "loss": 0.2201, "step": 14880 }, { "epoch": 0.5411003706664729, "grad_norm": 1.4825586080551147, "learning_rate": 4.999322020238099e-05, "loss": 0.185, "step": 14890 }, { "epoch": 0.541463769169271, "grad_norm": 4.346343994140625, "learning_rate": 4.9993098817402564e-05, "loss": 0.2415, "step": 14900 }, { "epoch": 0.5418271676720692, "grad_norm": 1.0175251960754395, "learning_rate": 4.999297635556044e-05, "loss": 0.1991, "step": 14910 }, { "epoch": 0.5421905661748674, "grad_norm": 1.600205421447754, "learning_rate": 4.999285281685989e-05, "loss": 0.1706, "step": 14920 }, { "epoch": 0.5425539646776655, "grad_norm": 4.332497596740723, "learning_rate": 4.999272820130623e-05, "loss": 0.1964, "step": 14930 }, { "epoch": 0.5429173631804637, "grad_norm": 2.0384531021118164, "learning_rate": 4.999260250890484e-05, "loss": 0.1571, "step": 14940 }, { "epoch": 0.5432807616832619, "grad_norm": 11.780756950378418, "learning_rate": 4.999247573966114e-05, "loss": 0.319, "step": 14950 }, { "epoch": 0.54364416018606, "grad_norm": 2.7058663368225098, "learning_rate": 4.999234789358057e-05, "loss": 0.2009, "step": 14960 }, { "epoch": 0.5440075586888582, "grad_norm": 1.966780662536621, "learning_rate": 4.999221897066866e-05, "loss": 0.177, "step": 14970 }, { "epoch": 0.5443709571916564, "grad_norm": 2.2129642963409424, "learning_rate": 4.999208897093096e-05, "loss": 0.2472, "step": 14980 }, { "epoch": 0.5447343556944545, "grad_norm": 2.726358652114868, "learning_rate": 4.9991957894373064e-05, "loss": 0.2239, "step": 14990 }, { "epoch": 0.5450977541972527, "grad_norm": 28.577600479125977, "learning_rate": 4.999182574100063e-05, "loss": 0.2524, "step": 15000 }, { "epoch": 0.5450977541972527, "eval_loss": 0.3972287178039551, "eval_runtime": 180.8086, "eval_samples_per_second": 41.005, "eval_steps_per_second": 5.127, "eval_wer": 0.2289560150307695, "step": 15000 }, { "epoch": 0.5454611527000509, "grad_norm": 1.9243866205215454, "learning_rate": 4.9991692510819335e-05, "loss": 0.1679, "step": 15010 }, { "epoch": 0.545824551202849, "grad_norm": 1.3926585912704468, "learning_rate": 4.9991558203834944e-05, "loss": 0.1933, "step": 15020 }, { "epoch": 0.5461879497056472, "grad_norm": 5.275027751922607, "learning_rate": 4.999142282005322e-05, "loss": 0.2838, "step": 15030 }, { "epoch": 0.5465513482084454, "grad_norm": 2.142784357070923, "learning_rate": 4.999128635948e-05, "loss": 0.1754, "step": 15040 }, { "epoch": 0.5469147467112435, "grad_norm": 40.32966995239258, "learning_rate": 4.999114882212119e-05, "loss": 0.323, "step": 15050 }, { "epoch": 0.5472781452140417, "grad_norm": 1.056662678718567, "learning_rate": 4.999101020798268e-05, "loss": 0.1462, "step": 15060 }, { "epoch": 0.5476415437168399, "grad_norm": 3.7527568340301514, "learning_rate": 4.9990870517070464e-05, "loss": 0.2106, "step": 15070 }, { "epoch": 0.548004942219638, "grad_norm": 3.396487236022949, "learning_rate": 4.9990729749390555e-05, "loss": 0.1995, "step": 15080 }, { "epoch": 0.5483683407224362, "grad_norm": 1.650519609451294, "learning_rate": 4.999058790494902e-05, "loss": 0.195, "step": 15090 }, { "epoch": 0.5487317392252344, "grad_norm": 16.096418380737305, "learning_rate": 4.9990444983751975e-05, "loss": 0.2705, "step": 15100 }, { "epoch": 0.5490951377280325, "grad_norm": 1.273149847984314, "learning_rate": 4.999030098580556e-05, "loss": 0.2216, "step": 15110 }, { "epoch": 0.5494585362308307, "grad_norm": 1.5414496660232544, "learning_rate": 4.9990155911115995e-05, "loss": 0.1876, "step": 15120 }, { "epoch": 0.549821934733629, "grad_norm": 4.707805633544922, "learning_rate": 4.9990009759689524e-05, "loss": 0.1895, "step": 15130 }, { "epoch": 0.550185333236427, "grad_norm": 2.033162832260132, "learning_rate": 4.9989862531532456e-05, "loss": 0.1705, "step": 15140 }, { "epoch": 0.5505487317392252, "grad_norm": 7.349232196807861, "learning_rate": 4.998971422665112e-05, "loss": 0.2815, "step": 15150 }, { "epoch": 0.5509121302420235, "grad_norm": 1.293078064918518, "learning_rate": 4.9989564845051915e-05, "loss": 0.1789, "step": 15160 }, { "epoch": 0.5512755287448216, "grad_norm": 1.7343147993087769, "learning_rate": 4.998941438674127e-05, "loss": 0.1781, "step": 15170 }, { "epoch": 0.5516389272476198, "grad_norm": 2.440030574798584, "learning_rate": 4.9989262851725674e-05, "loss": 0.1927, "step": 15180 }, { "epoch": 0.5520023257504179, "grad_norm": 2.276111364364624, "learning_rate": 4.998911024001165e-05, "loss": 0.1774, "step": 15190 }, { "epoch": 0.5523657242532161, "grad_norm": 9.360533714294434, "learning_rate": 4.9988956551605783e-05, "loss": 0.2761, "step": 15200 }, { "epoch": 0.5527291227560143, "grad_norm": 3.8025522232055664, "learning_rate": 4.998880178651468e-05, "loss": 0.2855, "step": 15210 }, { "epoch": 0.5530925212588124, "grad_norm": 3.816631555557251, "learning_rate": 4.998864594474503e-05, "loss": 0.1559, "step": 15220 }, { "epoch": 0.5534559197616106, "grad_norm": 3.2255067825317383, "learning_rate": 4.998848902630353e-05, "loss": 0.1632, "step": 15230 }, { "epoch": 0.5538193182644088, "grad_norm": 1.077268123626709, "learning_rate": 4.9988331031196944e-05, "loss": 0.1969, "step": 15240 }, { "epoch": 0.5541827167672069, "grad_norm": 5.657801151275635, "learning_rate": 4.998817195943209e-05, "loss": 0.2361, "step": 15250 }, { "epoch": 0.5545461152700051, "grad_norm": 1.180039882659912, "learning_rate": 4.998801181101581e-05, "loss": 0.1779, "step": 15260 }, { "epoch": 0.5549095137728033, "grad_norm": 2.12725830078125, "learning_rate": 4.998785058595501e-05, "loss": 0.1505, "step": 15270 }, { "epoch": 0.5552729122756014, "grad_norm": 2.0784361362457275, "learning_rate": 4.998768828425664e-05, "loss": 0.2221, "step": 15280 }, { "epoch": 0.5556363107783996, "grad_norm": 2.0133538246154785, "learning_rate": 4.998752490592768e-05, "loss": 0.1759, "step": 15290 }, { "epoch": 0.5559997092811978, "grad_norm": 3.3181140422821045, "learning_rate": 4.998736045097518e-05, "loss": 0.229, "step": 15300 }, { "epoch": 0.5563631077839959, "grad_norm": 1.2881536483764648, "learning_rate": 4.998719491940622e-05, "loss": 0.1928, "step": 15310 }, { "epoch": 0.5567265062867941, "grad_norm": 1.0155376195907593, "learning_rate": 4.998702831122794e-05, "loss": 0.1986, "step": 15320 }, { "epoch": 0.5570899047895923, "grad_norm": 7.5557661056518555, "learning_rate": 4.998686062644752e-05, "loss": 0.2317, "step": 15330 }, { "epoch": 0.5574533032923904, "grad_norm": 2.3196377754211426, "learning_rate": 4.9986691865072176e-05, "loss": 0.1827, "step": 15340 }, { "epoch": 0.5578167017951886, "grad_norm": 25.910188674926758, "learning_rate": 4.998652202710918e-05, "loss": 0.2824, "step": 15350 }, { "epoch": 0.5581801002979868, "grad_norm": 1.0091907978057861, "learning_rate": 4.9986351112565846e-05, "loss": 0.1946, "step": 15360 }, { "epoch": 0.5585434988007849, "grad_norm": 3.0022408962249756, "learning_rate": 4.998617912144956e-05, "loss": 0.2028, "step": 15370 }, { "epoch": 0.5589068973035831, "grad_norm": 2.9837419986724854, "learning_rate": 4.99860060537677e-05, "loss": 0.203, "step": 15380 }, { "epoch": 0.5592702958063813, "grad_norm": 2.238867998123169, "learning_rate": 4.9985831909527746e-05, "loss": 0.1392, "step": 15390 }, { "epoch": 0.5596336943091794, "grad_norm": 3.8585119247436523, "learning_rate": 4.9985656688737205e-05, "loss": 0.2289, "step": 15400 }, { "epoch": 0.5599970928119776, "grad_norm": 2.4951331615448, "learning_rate": 4.998548039140361e-05, "loss": 0.1852, "step": 15410 }, { "epoch": 0.5603604913147758, "grad_norm": 1.8404667377471924, "learning_rate": 4.998530301753455e-05, "loss": 0.1813, "step": 15420 }, { "epoch": 0.5607238898175739, "grad_norm": 2.615247964859009, "learning_rate": 4.9985124567137695e-05, "loss": 0.228, "step": 15430 }, { "epoch": 0.5610872883203721, "grad_norm": 1.2074272632598877, "learning_rate": 4.9984945040220715e-05, "loss": 0.1879, "step": 15440 }, { "epoch": 0.5614506868231703, "grad_norm": 38.466712951660156, "learning_rate": 4.9984764436791355e-05, "loss": 0.3965, "step": 15450 }, { "epoch": 0.5618140853259684, "grad_norm": 1.4196547269821167, "learning_rate": 4.998458275685739e-05, "loss": 0.2061, "step": 15460 }, { "epoch": 0.5621774838287666, "grad_norm": 1.2451281547546387, "learning_rate": 4.998440000042664e-05, "loss": 0.2118, "step": 15470 }, { "epoch": 0.5625408823315647, "grad_norm": 3.7021896839141846, "learning_rate": 4.9984216167507005e-05, "loss": 0.2294, "step": 15480 }, { "epoch": 0.562904280834363, "grad_norm": 2.8826780319213867, "learning_rate": 4.998403125810638e-05, "loss": 0.1654, "step": 15490 }, { "epoch": 0.5632676793371612, "grad_norm": 8.366926193237305, "learning_rate": 4.998384527223274e-05, "loss": 0.2467, "step": 15500 }, { "epoch": 0.5636310778399592, "grad_norm": 2.2532148361206055, "learning_rate": 4.99836582098941e-05, "loss": 0.2569, "step": 15510 }, { "epoch": 0.5639944763427575, "grad_norm": 2.164987325668335, "learning_rate": 4.998347007109853e-05, "loss": 0.2167, "step": 15520 }, { "epoch": 0.5643578748455557, "grad_norm": 4.651108264923096, "learning_rate": 4.998328085585411e-05, "loss": 0.2138, "step": 15530 }, { "epoch": 0.5647212733483538, "grad_norm": 1.5128902196884155, "learning_rate": 4.9983090564169024e-05, "loss": 0.1821, "step": 15540 }, { "epoch": 0.565084671851152, "grad_norm": 8.516124725341797, "learning_rate": 4.998289919605145e-05, "loss": 0.2546, "step": 15550 }, { "epoch": 0.5654480703539502, "grad_norm": 1.6480666399002075, "learning_rate": 4.9982706751509635e-05, "loss": 0.2069, "step": 15560 }, { "epoch": 0.5658114688567483, "grad_norm": 1.3768938779830933, "learning_rate": 4.998251323055187e-05, "loss": 0.1775, "step": 15570 }, { "epoch": 0.5661748673595465, "grad_norm": 1.8793795108795166, "learning_rate": 4.998231863318651e-05, "loss": 0.14, "step": 15580 }, { "epoch": 0.5665382658623447, "grad_norm": 1.2361701726913452, "learning_rate": 4.9982122959421924e-05, "loss": 0.1797, "step": 15590 }, { "epoch": 0.5669016643651428, "grad_norm": 14.16727352142334, "learning_rate": 4.998192620926655e-05, "loss": 0.3523, "step": 15600 }, { "epoch": 0.5669016643651428, "eval_loss": 0.40661031007766724, "eval_runtime": 180.2598, "eval_samples_per_second": 41.13, "eval_steps_per_second": 5.143, "eval_wer": 0.21753771307204967, "step": 15600 }, { "epoch": 0.567265062867941, "grad_norm": 2.460245370864868, "learning_rate": 4.9981728382728855e-05, "loss": 0.1824, "step": 15610 }, { "epoch": 0.5676284613707392, "grad_norm": 1.603381633758545, "learning_rate": 4.9981529479817366e-05, "loss": 0.1506, "step": 15620 }, { "epoch": 0.5679918598735373, "grad_norm": 3.650087356567383, "learning_rate": 4.9981329500540664e-05, "loss": 0.2351, "step": 15630 }, { "epoch": 0.5683552583763355, "grad_norm": 2.2338075637817383, "learning_rate": 4.9981128444907354e-05, "loss": 0.1785, "step": 15640 }, { "epoch": 0.5687186568791337, "grad_norm": 7.641642093658447, "learning_rate": 4.998092631292611e-05, "loss": 0.2816, "step": 15650 }, { "epoch": 0.5690820553819318, "grad_norm": 1.5877048969268799, "learning_rate": 4.998072310460562e-05, "loss": 0.1784, "step": 15660 }, { "epoch": 0.56944545388473, "grad_norm": 3.5917787551879883, "learning_rate": 4.998051881995466e-05, "loss": 0.1685, "step": 15670 }, { "epoch": 0.5698088523875282, "grad_norm": 6.459184169769287, "learning_rate": 4.998031345898203e-05, "loss": 0.2031, "step": 15680 }, { "epoch": 0.5701722508903263, "grad_norm": 2.7518184185028076, "learning_rate": 4.9980107021696565e-05, "loss": 0.168, "step": 15690 }, { "epoch": 0.5705356493931245, "grad_norm": 9.814598083496094, "learning_rate": 4.997989950810718e-05, "loss": 0.2778, "step": 15700 }, { "epoch": 0.5708990478959227, "grad_norm": 2.0985398292541504, "learning_rate": 4.9979690918222785e-05, "loss": 0.1864, "step": 15710 }, { "epoch": 0.5712624463987208, "grad_norm": 1.9264591932296753, "learning_rate": 4.997948125205241e-05, "loss": 0.1682, "step": 15720 }, { "epoch": 0.571625844901519, "grad_norm": 4.2961955070495605, "learning_rate": 4.997927050960505e-05, "loss": 0.198, "step": 15730 }, { "epoch": 0.5719892434043172, "grad_norm": 4.524483680725098, "learning_rate": 4.99790586908898e-05, "loss": 0.3235, "step": 15740 }, { "epoch": 0.5723526419071153, "grad_norm": 5.259559154510498, "learning_rate": 4.997884579591578e-05, "loss": 0.335, "step": 15750 }, { "epoch": 0.5727160404099135, "grad_norm": 1.7875639200210571, "learning_rate": 4.997863182469219e-05, "loss": 0.1674, "step": 15760 }, { "epoch": 0.5730794389127116, "grad_norm": 1.1852960586547852, "learning_rate": 4.9978416777228216e-05, "loss": 0.1968, "step": 15770 }, { "epoch": 0.5734428374155098, "grad_norm": 1.253061294555664, "learning_rate": 4.997820065353314e-05, "loss": 0.2177, "step": 15780 }, { "epoch": 0.573806235918308, "grad_norm": 2.0577871799468994, "learning_rate": 4.9977983453616266e-05, "loss": 0.1498, "step": 15790 }, { "epoch": 0.5741696344211061, "grad_norm": 7.4168901443481445, "learning_rate": 4.997776517748696e-05, "loss": 0.3137, "step": 15800 }, { "epoch": 0.5745330329239043, "grad_norm": 2.9957845211029053, "learning_rate": 4.9977545825154625e-05, "loss": 0.1819, "step": 15810 }, { "epoch": 0.5748964314267025, "grad_norm": 1.251610517501831, "learning_rate": 4.997732539662871e-05, "loss": 0.1633, "step": 15820 }, { "epoch": 0.5752598299295006, "grad_norm": 3.229581594467163, "learning_rate": 4.997710389191871e-05, "loss": 0.1888, "step": 15830 }, { "epoch": 0.5756232284322989, "grad_norm": 1.2718089818954468, "learning_rate": 4.997688131103417e-05, "loss": 0.1938, "step": 15840 }, { "epoch": 0.5759866269350971, "grad_norm": 4.77078104019165, "learning_rate": 4.9976657653984694e-05, "loss": 0.2311, "step": 15850 }, { "epoch": 0.5763500254378952, "grad_norm": 1.9487907886505127, "learning_rate": 4.9976432920779904e-05, "loss": 0.7679, "step": 15860 }, { "epoch": 0.5767134239406934, "grad_norm": 2.1322100162506104, "learning_rate": 4.997620711142948e-05, "loss": 0.204, "step": 15870 }, { "epoch": 0.5770768224434916, "grad_norm": 3.0756008625030518, "learning_rate": 4.997598022594316e-05, "loss": 0.205, "step": 15880 }, { "epoch": 0.5774402209462897, "grad_norm": 2.2399511337280273, "learning_rate": 4.997575226433071e-05, "loss": 0.183, "step": 15890 }, { "epoch": 0.5778036194490879, "grad_norm": 4.17095947265625, "learning_rate": 4.997552322660197e-05, "loss": 0.2209, "step": 15900 }, { "epoch": 0.5781670179518861, "grad_norm": 1.7085528373718262, "learning_rate": 4.9975293112766794e-05, "loss": 0.4196, "step": 15910 }, { "epoch": 0.5785304164546842, "grad_norm": 1.6818984746932983, "learning_rate": 4.99750619228351e-05, "loss": 0.1592, "step": 15920 }, { "epoch": 0.5788938149574824, "grad_norm": 2.526503324508667, "learning_rate": 4.9974829656816846e-05, "loss": 0.7523, "step": 15930 }, { "epoch": 0.5792572134602806, "grad_norm": 4.456855297088623, "learning_rate": 4.997459631472205e-05, "loss": 0.1664, "step": 15940 }, { "epoch": 0.5796206119630787, "grad_norm": 28.427839279174805, "learning_rate": 4.9974361896560746e-05, "loss": 0.2891, "step": 15950 }, { "epoch": 0.5799840104658769, "grad_norm": 1.6720882654190063, "learning_rate": 4.997412640234306e-05, "loss": 0.1522, "step": 15960 }, { "epoch": 0.5803474089686751, "grad_norm": 1.6327390670776367, "learning_rate": 4.997388983207911e-05, "loss": 0.1957, "step": 15970 }, { "epoch": 0.5807108074714732, "grad_norm": 1.5792416334152222, "learning_rate": 4.997365218577912e-05, "loss": 0.2325, "step": 15980 }, { "epoch": 0.5810742059742714, "grad_norm": 1.7585738897323608, "learning_rate": 4.9973413463453305e-05, "loss": 0.2023, "step": 15990 }, { "epoch": 0.5814376044770696, "grad_norm": 8.14810562133789, "learning_rate": 4.997317366511196e-05, "loss": 0.2093, "step": 16000 }, { "epoch": 0.5818010029798677, "grad_norm": 3.1430416107177734, "learning_rate": 4.997293279076543e-05, "loss": 0.1742, "step": 16010 }, { "epoch": 0.5821644014826659, "grad_norm": 1.9447312355041504, "learning_rate": 4.997269084042406e-05, "loss": 0.1852, "step": 16020 }, { "epoch": 0.5825277999854641, "grad_norm": 2.1479732990264893, "learning_rate": 4.997244781409831e-05, "loss": 0.2197, "step": 16030 }, { "epoch": 0.5828911984882622, "grad_norm": 3.7066800594329834, "learning_rate": 4.9972203711798625e-05, "loss": 0.1899, "step": 16040 }, { "epoch": 0.5832545969910604, "grad_norm": 4.3598432540893555, "learning_rate": 4.9971958533535544e-05, "loss": 0.237, "step": 16050 }, { "epoch": 0.5836179954938585, "grad_norm": 3.410356283187866, "learning_rate": 4.997171227931962e-05, "loss": 0.1985, "step": 16060 }, { "epoch": 0.5839813939966567, "grad_norm": 1.6299129724502563, "learning_rate": 4.9971464949161454e-05, "loss": 0.1758, "step": 16070 }, { "epoch": 0.5843447924994549, "grad_norm": 1.993067979812622, "learning_rate": 4.9971216543071716e-05, "loss": 0.1822, "step": 16080 }, { "epoch": 0.584708191002253, "grad_norm": 1.2057979106903076, "learning_rate": 4.9970967061061104e-05, "loss": 0.1953, "step": 16090 }, { "epoch": 0.5850715895050512, "grad_norm": 34.54500961303711, "learning_rate": 4.997071650314037e-05, "loss": 0.277, "step": 16100 }, { "epoch": 0.5854349880078494, "grad_norm": 1.243656039237976, "learning_rate": 4.997046486932031e-05, "loss": 0.172, "step": 16110 }, { "epoch": 0.5857983865106475, "grad_norm": 0.6155187487602234, "learning_rate": 4.997021215961176e-05, "loss": 0.1523, "step": 16120 }, { "epoch": 0.5861617850134457, "grad_norm": 2.0203208923339844, "learning_rate": 4.9969958374025615e-05, "loss": 0.1857, "step": 16130 }, { "epoch": 0.5865251835162439, "grad_norm": 1.1912654638290405, "learning_rate": 4.9969703512572805e-05, "loss": 0.2169, "step": 16140 }, { "epoch": 0.586888582019042, "grad_norm": 3.528538227081299, "learning_rate": 4.9969447575264315e-05, "loss": 0.2428, "step": 16150 }, { "epoch": 0.5872519805218402, "grad_norm": 0.9166990518569946, "learning_rate": 4.996919056211117e-05, "loss": 0.2206, "step": 16160 }, { "epoch": 0.5876153790246385, "grad_norm": 1.4956426620483398, "learning_rate": 4.996893247312444e-05, "loss": 0.168, "step": 16170 }, { "epoch": 0.5879787775274365, "grad_norm": 1.4502993822097778, "learning_rate": 4.996867330831526e-05, "loss": 0.1767, "step": 16180 }, { "epoch": 0.5883421760302348, "grad_norm": 0.9337482452392578, "learning_rate": 4.9968413067694775e-05, "loss": 0.2035, "step": 16190 }, { "epoch": 0.588705574533033, "grad_norm": 13.500269889831543, "learning_rate": 4.996815175127422e-05, "loss": 0.2335, "step": 16200 }, { "epoch": 0.588705574533033, "eval_loss": 0.4428017735481262, "eval_runtime": 180.2159, "eval_samples_per_second": 41.14, "eval_steps_per_second": 5.144, "eval_wer": 0.22281118957285748, "step": 16200 }, { "epoch": 0.5890689730358311, "grad_norm": 5.091770648956299, "learning_rate": 4.996788935906483e-05, "loss": 0.1884, "step": 16210 }, { "epoch": 0.5894323715386293, "grad_norm": 2.379033327102661, "learning_rate": 4.996762589107793e-05, "loss": 0.196, "step": 16220 }, { "epoch": 0.5897957700414275, "grad_norm": 2.576484203338623, "learning_rate": 4.996736134732487e-05, "loss": 0.2069, "step": 16230 }, { "epoch": 0.5901591685442256, "grad_norm": 1.4235923290252686, "learning_rate": 4.9967095727817035e-05, "loss": 0.1905, "step": 16240 }, { "epoch": 0.5905225670470238, "grad_norm": 7.119918346405029, "learning_rate": 4.9966829032565886e-05, "loss": 0.2803, "step": 16250 }, { "epoch": 0.590885965549822, "grad_norm": 1.1050286293029785, "learning_rate": 4.99665612615829e-05, "loss": 0.1914, "step": 16260 }, { "epoch": 0.5912493640526201, "grad_norm": 1.403601884841919, "learning_rate": 4.9966292414879625e-05, "loss": 0.1649, "step": 16270 }, { "epoch": 0.5916127625554183, "grad_norm": 5.439052104949951, "learning_rate": 4.9966022492467635e-05, "loss": 0.1897, "step": 16280 }, { "epoch": 0.5919761610582165, "grad_norm": 1.0014379024505615, "learning_rate": 4.996575149435857e-05, "loss": 0.1472, "step": 16290 }, { "epoch": 0.5923395595610146, "grad_norm": 9.480517387390137, "learning_rate": 4.99654794205641e-05, "loss": 0.2351, "step": 16300 }, { "epoch": 0.5927029580638128, "grad_norm": 0.8406987190246582, "learning_rate": 4.9965206271095955e-05, "loss": 0.1795, "step": 16310 }, { "epoch": 0.593066356566611, "grad_norm": 1.378169298171997, "learning_rate": 4.996493204596589e-05, "loss": 0.1597, "step": 16320 }, { "epoch": 0.5934297550694091, "grad_norm": 3.9748549461364746, "learning_rate": 4.996465674518573e-05, "loss": 0.2264, "step": 16330 }, { "epoch": 0.5937931535722073, "grad_norm": 2.2626171112060547, "learning_rate": 4.996438036876734e-05, "loss": 0.1647, "step": 16340 }, { "epoch": 0.5941565520750055, "grad_norm": 3.8039205074310303, "learning_rate": 4.996410291672262e-05, "loss": 0.2204, "step": 16350 }, { "epoch": 0.5945199505778036, "grad_norm": 1.5219416618347168, "learning_rate": 4.996382438906353e-05, "loss": 0.1518, "step": 16360 }, { "epoch": 0.5948833490806018, "grad_norm": 1.4811570644378662, "learning_rate": 4.9963544785802064e-05, "loss": 0.2006, "step": 16370 }, { "epoch": 0.5952467475833999, "grad_norm": 4.7030558586120605, "learning_rate": 4.996326410695028e-05, "loss": 0.2524, "step": 16380 }, { "epoch": 0.5956101460861981, "grad_norm": 1.103624939918518, "learning_rate": 4.996298235252026e-05, "loss": 0.1558, "step": 16390 }, { "epoch": 0.5959735445889963, "grad_norm": 4.654818534851074, "learning_rate": 4.996269952252415e-05, "loss": 0.2746, "step": 16400 }, { "epoch": 0.5963369430917944, "grad_norm": 1.6746747493743896, "learning_rate": 4.996241561697413e-05, "loss": 0.1838, "step": 16410 }, { "epoch": 0.5967003415945926, "grad_norm": 3.1955924034118652, "learning_rate": 4.996213063588245e-05, "loss": 0.1773, "step": 16420 }, { "epoch": 0.5970637400973908, "grad_norm": 1.782669186592102, "learning_rate": 4.996184457926137e-05, "loss": 0.1939, "step": 16430 }, { "epoch": 0.5974271386001889, "grad_norm": 1.2277849912643433, "learning_rate": 4.996155744712322e-05, "loss": 0.1724, "step": 16440 }, { "epoch": 0.5977905371029871, "grad_norm": 25.578798294067383, "learning_rate": 4.996126923948038e-05, "loss": 0.2612, "step": 16450 }, { "epoch": 0.5981539356057853, "grad_norm": 0.984426736831665, "learning_rate": 4.9960979956345254e-05, "loss": 0.1621, "step": 16460 }, { "epoch": 0.5985173341085834, "grad_norm": 2.1299145221710205, "learning_rate": 4.9960689597730315e-05, "loss": 0.161, "step": 16470 }, { "epoch": 0.5988807326113816, "grad_norm": 2.6153085231781006, "learning_rate": 4.996039816364807e-05, "loss": 0.2122, "step": 16480 }, { "epoch": 0.5992441311141798, "grad_norm": 4.464552879333496, "learning_rate": 4.996010565411108e-05, "loss": 0.2417, "step": 16490 }, { "epoch": 0.5996075296169779, "grad_norm": 26.441349029541016, "learning_rate": 4.995981206913194e-05, "loss": 0.3103, "step": 16500 }, { "epoch": 0.5999709281197761, "grad_norm": 2.353302478790283, "learning_rate": 4.995951740872331e-05, "loss": 1.0256, "step": 16510 }, { "epoch": 0.6003343266225744, "grad_norm": 0.8436356782913208, "learning_rate": 4.995922167289788e-05, "loss": 0.1563, "step": 16520 }, { "epoch": 0.6006977251253725, "grad_norm": 3.3516342639923096, "learning_rate": 4.99589248616684e-05, "loss": 0.2441, "step": 16530 }, { "epoch": 0.6010611236281707, "grad_norm": 2.0286059379577637, "learning_rate": 4.995862697504764e-05, "loss": 0.1767, "step": 16540 }, { "epoch": 0.6014245221309689, "grad_norm": 18.248151779174805, "learning_rate": 4.9958328013048464e-05, "loss": 0.3522, "step": 16550 }, { "epoch": 0.601787920633767, "grad_norm": 2.1514463424682617, "learning_rate": 4.995802797568372e-05, "loss": 0.1771, "step": 16560 }, { "epoch": 0.6021513191365652, "grad_norm": 5.868020534515381, "learning_rate": 4.995772686296635e-05, "loss": 0.1776, "step": 16570 }, { "epoch": 0.6025147176393634, "grad_norm": 4.539637565612793, "learning_rate": 4.9957424674909336e-05, "loss": 0.2002, "step": 16580 }, { "epoch": 0.6028781161421615, "grad_norm": 1.7226190567016602, "learning_rate": 4.99571214115257e-05, "loss": 0.1927, "step": 16590 }, { "epoch": 0.6032415146449597, "grad_norm": 22.087247848510742, "learning_rate": 4.9956817072828485e-05, "loss": 0.249, "step": 16600 }, { "epoch": 0.6036049131477579, "grad_norm": 2.4267120361328125, "learning_rate": 4.995651165883083e-05, "loss": 0.1935, "step": 16610 }, { "epoch": 0.603968311650556, "grad_norm": 2.5284249782562256, "learning_rate": 4.995620516954588e-05, "loss": 0.1495, "step": 16620 }, { "epoch": 0.6043317101533542, "grad_norm": 1.5988596677780151, "learning_rate": 4.995589760498684e-05, "loss": 0.2329, "step": 16630 }, { "epoch": 0.6046951086561524, "grad_norm": 1.0771689414978027, "learning_rate": 4.9955588965166966e-05, "loss": 0.1634, "step": 16640 }, { "epoch": 0.6050585071589505, "grad_norm": 8.72423267364502, "learning_rate": 4.995527925009956e-05, "loss": 0.27, "step": 16650 }, { "epoch": 0.6054219056617487, "grad_norm": 1.3176789283752441, "learning_rate": 4.9954968459797955e-05, "loss": 1.1913, "step": 16660 }, { "epoch": 0.6057853041645468, "grad_norm": 1.8307547569274902, "learning_rate": 4.9954656594275555e-05, "loss": 0.188, "step": 16670 }, { "epoch": 0.606148702667345, "grad_norm": 2.783604621887207, "learning_rate": 4.9954343653545795e-05, "loss": 0.1791, "step": 16680 }, { "epoch": 0.6065121011701432, "grad_norm": 1.6639970541000366, "learning_rate": 4.9954029637622146e-05, "loss": 0.1829, "step": 16690 }, { "epoch": 0.6068754996729413, "grad_norm": 11.055110931396484, "learning_rate": 4.995371454651815e-05, "loss": 0.2229, "step": 16700 }, { "epoch": 0.6072388981757395, "grad_norm": 1.8166972398757935, "learning_rate": 4.9953398380247384e-05, "loss": 0.1734, "step": 16710 }, { "epoch": 0.6076022966785377, "grad_norm": 4.851889610290527, "learning_rate": 4.995308113882346e-05, "loss": 0.1716, "step": 16720 }, { "epoch": 0.6079656951813358, "grad_norm": 3.0047857761383057, "learning_rate": 4.9952762822260056e-05, "loss": 0.2125, "step": 16730 }, { "epoch": 0.608329093684134, "grad_norm": 1.1506407260894775, "learning_rate": 4.9952443430570887e-05, "loss": 0.171, "step": 16740 }, { "epoch": 0.6086924921869322, "grad_norm": 4.324979782104492, "learning_rate": 4.995212296376971e-05, "loss": 0.2365, "step": 16750 }, { "epoch": 0.6090558906897303, "grad_norm": 1.2295490503311157, "learning_rate": 4.995180142187033e-05, "loss": 0.2002, "step": 16760 }, { "epoch": 0.6094192891925285, "grad_norm": 1.454434871673584, "learning_rate": 4.995147880488661e-05, "loss": 0.1602, "step": 16770 }, { "epoch": 0.6097826876953267, "grad_norm": 2.6185641288757324, "learning_rate": 4.995115511283244e-05, "loss": 0.1904, "step": 16780 }, { "epoch": 0.6101460861981248, "grad_norm": 1.2603826522827148, "learning_rate": 4.9950830345721774e-05, "loss": 0.1892, "step": 16790 }, { "epoch": 0.610509484700923, "grad_norm": 14.189190864562988, "learning_rate": 4.9950504503568615e-05, "loss": 0.281, "step": 16800 }, { "epoch": 0.610509484700923, "eval_loss": 0.4123116433620453, "eval_runtime": 179.5081, "eval_samples_per_second": 41.302, "eval_steps_per_second": 5.164, "eval_wer": 0.22986367019441972, "step": 16800 }, { "epoch": 0.6108728832037212, "grad_norm": 1.7670204639434814, "learning_rate": 4.995017758638698e-05, "loss": 0.2581, "step": 16810 }, { "epoch": 0.6112362817065193, "grad_norm": 1.2099360227584839, "learning_rate": 4.9949849594190964e-05, "loss": 0.1762, "step": 16820 }, { "epoch": 0.6115996802093175, "grad_norm": 2.7719335556030273, "learning_rate": 4.9949520526994716e-05, "loss": 0.1867, "step": 16830 }, { "epoch": 0.6119630787121157, "grad_norm": 1.5935924053192139, "learning_rate": 4.9949190384812386e-05, "loss": 0.191, "step": 16840 }, { "epoch": 0.6123264772149138, "grad_norm": 3.511439085006714, "learning_rate": 4.994885916765821e-05, "loss": 0.2132, "step": 16850 }, { "epoch": 0.612689875717712, "grad_norm": 1.693789005279541, "learning_rate": 4.994852687554647e-05, "loss": 0.167, "step": 16860 }, { "epoch": 0.6130532742205103, "grad_norm": 2.1199066638946533, "learning_rate": 4.994819350849147e-05, "loss": 0.172, "step": 16870 }, { "epoch": 0.6134166727233084, "grad_norm": 2.724487543106079, "learning_rate": 4.9947859066507575e-05, "loss": 0.2083, "step": 16880 }, { "epoch": 0.6137800712261066, "grad_norm": 0.926547110080719, "learning_rate": 4.99475235496092e-05, "loss": 0.1517, "step": 16890 }, { "epoch": 0.6141434697289048, "grad_norm": 14.503059387207031, "learning_rate": 4.99471869578108e-05, "loss": 0.1945, "step": 16900 }, { "epoch": 0.6145068682317029, "grad_norm": 3.2206919193267822, "learning_rate": 4.994684929112687e-05, "loss": 0.1882, "step": 16910 }, { "epoch": 0.6148702667345011, "grad_norm": 2.004995107650757, "learning_rate": 4.994651054957198e-05, "loss": 0.1876, "step": 16920 }, { "epoch": 0.6152336652372993, "grad_norm": 2.0580127239227295, "learning_rate": 4.99461707331607e-05, "loss": 0.2104, "step": 16930 }, { "epoch": 0.6155970637400974, "grad_norm": 3.3028602600097656, "learning_rate": 4.9945829841907684e-05, "loss": 0.1494, "step": 16940 }, { "epoch": 0.6159604622428956, "grad_norm": 7.572249412536621, "learning_rate": 4.994548787582761e-05, "loss": 0.2381, "step": 16950 }, { "epoch": 0.6163238607456937, "grad_norm": 1.4220709800720215, "learning_rate": 4.9945144834935234e-05, "loss": 0.1916, "step": 16960 }, { "epoch": 0.6166872592484919, "grad_norm": 1.2397724390029907, "learning_rate": 4.994480071924531e-05, "loss": 0.1593, "step": 16970 }, { "epoch": 0.6170506577512901, "grad_norm": 2.2569403648376465, "learning_rate": 4.9944455528772684e-05, "loss": 0.1984, "step": 16980 }, { "epoch": 0.6174140562540882, "grad_norm": 1.811727523803711, "learning_rate": 4.994410926353221e-05, "loss": 0.1838, "step": 16990 }, { "epoch": 0.6177774547568864, "grad_norm": 2.783061981201172, "learning_rate": 4.9943761923538834e-05, "loss": 0.2217, "step": 17000 }, { "epoch": 0.6181408532596846, "grad_norm": 2.816331148147583, "learning_rate": 4.99434135088075e-05, "loss": 0.1911, "step": 17010 }, { "epoch": 0.6185042517624827, "grad_norm": 1.238916039466858, "learning_rate": 4.9943064019353234e-05, "loss": 0.1854, "step": 17020 }, { "epoch": 0.6188676502652809, "grad_norm": 5.16685152053833, "learning_rate": 4.9942713455191075e-05, "loss": 0.1797, "step": 17030 }, { "epoch": 0.6192310487680791, "grad_norm": 2.486461639404297, "learning_rate": 4.9942361816336146e-05, "loss": 0.1926, "step": 17040 }, { "epoch": 0.6195944472708772, "grad_norm": 9.018515586853027, "learning_rate": 4.994200910280359e-05, "loss": 0.2193, "step": 17050 }, { "epoch": 0.6199578457736754, "grad_norm": 1.804166555404663, "learning_rate": 4.994165531460861e-05, "loss": 0.1977, "step": 17060 }, { "epoch": 0.6203212442764736, "grad_norm": 1.2862845659255981, "learning_rate": 4.994130045176644e-05, "loss": 0.1493, "step": 17070 }, { "epoch": 0.6206846427792717, "grad_norm": 4.164750576019287, "learning_rate": 4.994094451429237e-05, "loss": 0.2548, "step": 17080 }, { "epoch": 0.6210480412820699, "grad_norm": 1.577255368232727, "learning_rate": 4.994058750220176e-05, "loss": 0.1703, "step": 17090 }, { "epoch": 0.6214114397848681, "grad_norm": 5.805021286010742, "learning_rate": 4.994022941550996e-05, "loss": 0.2976, "step": 17100 }, { "epoch": 0.6217748382876662, "grad_norm": 0.9706230163574219, "learning_rate": 4.993987025423241e-05, "loss": 0.1454, "step": 17110 }, { "epoch": 0.6221382367904644, "grad_norm": 1.4393014907836914, "learning_rate": 4.993951001838459e-05, "loss": 0.1496, "step": 17120 }, { "epoch": 0.6225016352932626, "grad_norm": 1.839086651802063, "learning_rate": 4.993914870798202e-05, "loss": 0.2256, "step": 17130 }, { "epoch": 0.6228650337960607, "grad_norm": 1.8924603462219238, "learning_rate": 4.993878632304027e-05, "loss": 0.1415, "step": 17140 }, { "epoch": 0.6232284322988589, "grad_norm": 12.03149700164795, "learning_rate": 4.993842286357494e-05, "loss": 0.7236, "step": 17150 }, { "epoch": 0.6235918308016571, "grad_norm": 2.0251877307891846, "learning_rate": 4.993805832960171e-05, "loss": 0.1913, "step": 17160 }, { "epoch": 0.6239552293044552, "grad_norm": 2.341251850128174, "learning_rate": 4.993769272113628e-05, "loss": 0.1734, "step": 17170 }, { "epoch": 0.6243186278072534, "grad_norm": 2.517820358276367, "learning_rate": 4.993732603819438e-05, "loss": 0.18, "step": 17180 }, { "epoch": 0.6246820263100517, "grad_norm": 1.6384356021881104, "learning_rate": 4.993695828079184e-05, "loss": 0.1513, "step": 17190 }, { "epoch": 0.6250454248128497, "grad_norm": 10.794693946838379, "learning_rate": 4.993658944894449e-05, "loss": 0.2282, "step": 17200 }, { "epoch": 0.625408823315648, "grad_norm": 1.2552087306976318, "learning_rate": 4.9936219542668236e-05, "loss": 0.1938, "step": 17210 }, { "epoch": 0.6257722218184462, "grad_norm": 2.423431634902954, "learning_rate": 4.993584856197899e-05, "loss": 0.1487, "step": 17220 }, { "epoch": 0.6261356203212443, "grad_norm": 1.7924834489822388, "learning_rate": 4.9935476506892763e-05, "loss": 0.195, "step": 17230 }, { "epoch": 0.6264990188240425, "grad_norm": 1.6521999835968018, "learning_rate": 4.9935103377425566e-05, "loss": 0.1652, "step": 17240 }, { "epoch": 0.6268624173268406, "grad_norm": 6.472127437591553, "learning_rate": 4.9934729173593494e-05, "loss": 0.2481, "step": 17250 }, { "epoch": 0.6272258158296388, "grad_norm": 1.8962410688400269, "learning_rate": 4.993435389541265e-05, "loss": 0.1487, "step": 17260 }, { "epoch": 0.627589214332437, "grad_norm": 1.2054486274719238, "learning_rate": 4.993397754289922e-05, "loss": 0.1496, "step": 17270 }, { "epoch": 0.6279526128352351, "grad_norm": 3.9840786457061768, "learning_rate": 4.993360011606941e-05, "loss": 0.1776, "step": 17280 }, { "epoch": 0.6283160113380333, "grad_norm": 0.9625970125198364, "learning_rate": 4.9933221614939485e-05, "loss": 0.1652, "step": 17290 }, { "epoch": 0.6286794098408315, "grad_norm": 11.166252136230469, "learning_rate": 4.993284203952575e-05, "loss": 0.233, "step": 17300 }, { "epoch": 0.6290428083436296, "grad_norm": 2.356268882751465, "learning_rate": 4.9932461389844566e-05, "loss": 0.1498, "step": 17310 }, { "epoch": 0.6294062068464278, "grad_norm": 0.9366337656974792, "learning_rate": 4.993207966591234e-05, "loss": 0.1483, "step": 17320 }, { "epoch": 0.629769605349226, "grad_norm": 5.854847431182861, "learning_rate": 4.9931696867745495e-05, "loss": 0.1603, "step": 17330 }, { "epoch": 0.6301330038520241, "grad_norm": 1.0090773105621338, "learning_rate": 4.9931312995360546e-05, "loss": 0.1475, "step": 17340 }, { "epoch": 0.6304964023548223, "grad_norm": 3.896676540374756, "learning_rate": 4.9930928048774024e-05, "loss": 0.244, "step": 17350 }, { "epoch": 0.6308598008576205, "grad_norm": 1.1872800588607788, "learning_rate": 4.993054202800252e-05, "loss": 0.1618, "step": 17360 }, { "epoch": 0.6312231993604186, "grad_norm": 1.8078994750976562, "learning_rate": 4.9930154933062654e-05, "loss": 0.1554, "step": 17370 }, { "epoch": 0.6315865978632168, "grad_norm": 1.8264563083648682, "learning_rate": 4.9929766763971126e-05, "loss": 0.162, "step": 17380 }, { "epoch": 0.631949996366015, "grad_norm": 0.6304519176483154, "learning_rate": 4.992937752074465e-05, "loss": 0.209, "step": 17390 }, { "epoch": 0.6323133948688131, "grad_norm": 4.7621917724609375, "learning_rate": 4.992898720339998e-05, "loss": 0.2393, "step": 17400 }, { "epoch": 0.6323133948688131, "eval_loss": 0.3943130671977997, "eval_runtime": 180.0553, "eval_samples_per_second": 41.176, "eval_steps_per_second": 5.148, "eval_wer": 0.21144734692395664, "step": 17400 }, { "epoch": 0.6326767933716113, "grad_norm": 1.1110138893127441, "learning_rate": 4.992859581195396e-05, "loss": 0.143, "step": 17410 }, { "epoch": 0.6330401918744095, "grad_norm": 1.2453794479370117, "learning_rate": 4.992820334642344e-05, "loss": 0.1454, "step": 17420 }, { "epoch": 0.6334035903772076, "grad_norm": 3.669144630432129, "learning_rate": 4.9927809806825335e-05, "loss": 0.2496, "step": 17430 }, { "epoch": 0.6337669888800058, "grad_norm": 2.7898483276367188, "learning_rate": 4.99274151931766e-05, "loss": 0.1614, "step": 17440 }, { "epoch": 0.634130387382804, "grad_norm": 6.725431442260742, "learning_rate": 4.992701950549423e-05, "loss": 0.2622, "step": 17450 }, { "epoch": 0.6344937858856021, "grad_norm": 1.6481575965881348, "learning_rate": 4.992662274379528e-05, "loss": 0.1713, "step": 17460 }, { "epoch": 0.6348571843884003, "grad_norm": 1.3567384481430054, "learning_rate": 4.9926224908096856e-05, "loss": 0.1725, "step": 17470 }, { "epoch": 0.6352205828911985, "grad_norm": 1.8207722902297974, "learning_rate": 4.9925825998416076e-05, "loss": 0.1973, "step": 17480 }, { "epoch": 0.6355839813939966, "grad_norm": 2.2345893383026123, "learning_rate": 4.9925426014770146e-05, "loss": 0.1847, "step": 17490 }, { "epoch": 0.6359473798967948, "grad_norm": 7.193591594696045, "learning_rate": 4.992502495717629e-05, "loss": 0.2605, "step": 17500 }, { "epoch": 0.636310778399593, "grad_norm": 1.346073865890503, "learning_rate": 4.99246228256518e-05, "loss": 0.1518, "step": 17510 }, { "epoch": 0.6366741769023911, "grad_norm": 1.5637879371643066, "learning_rate": 4.9924219620213995e-05, "loss": 0.1648, "step": 17520 }, { "epoch": 0.6370375754051893, "grad_norm": 3.2450170516967773, "learning_rate": 4.9923815340880236e-05, "loss": 0.1974, "step": 17530 }, { "epoch": 0.6374009739079874, "grad_norm": 0.9553948640823364, "learning_rate": 4.992340998766796e-05, "loss": 0.1694, "step": 17540 }, { "epoch": 0.6377643724107857, "grad_norm": 8.901055335998535, "learning_rate": 4.9923003560594625e-05, "loss": 0.2625, "step": 17550 }, { "epoch": 0.6381277709135839, "grad_norm": 1.7500522136688232, "learning_rate": 4.992259605967774e-05, "loss": 0.1799, "step": 17560 }, { "epoch": 0.638491169416382, "grad_norm": 1.4673160314559937, "learning_rate": 4.9922187484934865e-05, "loss": 0.1698, "step": 17570 }, { "epoch": 0.6388545679191802, "grad_norm": 2.5377135276794434, "learning_rate": 4.992177783638361e-05, "loss": 0.1822, "step": 17580 }, { "epoch": 0.6392179664219784, "grad_norm": 1.660311222076416, "learning_rate": 4.9921367114041625e-05, "loss": 0.1659, "step": 17590 }, { "epoch": 0.6395813649247765, "grad_norm": 8.248649597167969, "learning_rate": 4.9920955317926595e-05, "loss": 0.2384, "step": 17600 }, { "epoch": 0.6399447634275747, "grad_norm": 1.5581409931182861, "learning_rate": 4.992054244805627e-05, "loss": 0.1665, "step": 17610 }, { "epoch": 0.6403081619303729, "grad_norm": 0.9654737710952759, "learning_rate": 4.992012850444844e-05, "loss": 0.3493, "step": 17620 }, { "epoch": 0.640671560433171, "grad_norm": 3.4477317333221436, "learning_rate": 4.9919713487120935e-05, "loss": 0.2097, "step": 17630 }, { "epoch": 0.6410349589359692, "grad_norm": 1.3745356798171997, "learning_rate": 4.9919297396091634e-05, "loss": 0.1459, "step": 17640 }, { "epoch": 0.6413983574387674, "grad_norm": 4.813534259796143, "learning_rate": 4.991888023137849e-05, "loss": 0.1905, "step": 17650 }, { "epoch": 0.6417617559415655, "grad_norm": 3.118452310562134, "learning_rate": 4.9918461992999445e-05, "loss": 0.1527, "step": 17660 }, { "epoch": 0.6421251544443637, "grad_norm": 1.8424941301345825, "learning_rate": 4.991804268097253e-05, "loss": 0.1759, "step": 17670 }, { "epoch": 0.6424885529471619, "grad_norm": 7.301458835601807, "learning_rate": 4.9917622295315826e-05, "loss": 0.1662, "step": 17680 }, { "epoch": 0.64285195144996, "grad_norm": 3.133114814758301, "learning_rate": 4.991720083604743e-05, "loss": 0.1692, "step": 17690 }, { "epoch": 0.6432153499527582, "grad_norm": 11.538620948791504, "learning_rate": 4.99167783031855e-05, "loss": 0.2443, "step": 17700 }, { "epoch": 0.6435787484555564, "grad_norm": 1.3739595413208008, "learning_rate": 4.991635469674825e-05, "loss": 0.1465, "step": 17710 }, { "epoch": 0.6439421469583545, "grad_norm": 1.6855549812316895, "learning_rate": 4.991593001675393e-05, "loss": 0.1819, "step": 17720 }, { "epoch": 0.6443055454611527, "grad_norm": 1.692335844039917, "learning_rate": 4.991550426322083e-05, "loss": 0.1654, "step": 17730 }, { "epoch": 0.6446689439639509, "grad_norm": 1.1132971048355103, "learning_rate": 4.9915077436167313e-05, "loss": 0.1688, "step": 17740 }, { "epoch": 0.645032342466749, "grad_norm": 5.6813201904296875, "learning_rate": 4.9914649535611756e-05, "loss": 0.2235, "step": 17750 }, { "epoch": 0.6453957409695472, "grad_norm": 1.5107471942901611, "learning_rate": 4.99142205615726e-05, "loss": 0.1747, "step": 17760 }, { "epoch": 0.6457591394723454, "grad_norm": 2.4552764892578125, "learning_rate": 4.9913790514068316e-05, "loss": 0.1739, "step": 17770 }, { "epoch": 0.6461225379751435, "grad_norm": 1.5664808750152588, "learning_rate": 4.991335939311744e-05, "loss": 0.1766, "step": 17780 }, { "epoch": 0.6464859364779417, "grad_norm": 2.935850143432617, "learning_rate": 4.9912927198738556e-05, "loss": 0.2148, "step": 17790 }, { "epoch": 0.6468493349807399, "grad_norm": 10.267364501953125, "learning_rate": 4.991249393095028e-05, "loss": 0.2521, "step": 17800 }, { "epoch": 0.647212733483538, "grad_norm": 1.3392564058303833, "learning_rate": 4.9912059589771274e-05, "loss": 0.172, "step": 17810 }, { "epoch": 0.6475761319863362, "grad_norm": 0.895491361618042, "learning_rate": 4.991162417522026e-05, "loss": 0.1379, "step": 17820 }, { "epoch": 0.6479395304891343, "grad_norm": 2.536397695541382, "learning_rate": 4.9911187687315997e-05, "loss": 0.1477, "step": 17830 }, { "epoch": 0.6483029289919325, "grad_norm": 1.7795464992523193, "learning_rate": 4.9910750126077296e-05, "loss": 0.1786, "step": 17840 }, { "epoch": 0.6486663274947307, "grad_norm": 62.683929443359375, "learning_rate": 4.9910311491523e-05, "loss": 0.266, "step": 17850 }, { "epoch": 0.6490297259975288, "grad_norm": 2.0866358280181885, "learning_rate": 4.990987178367201e-05, "loss": 0.1428, "step": 17860 }, { "epoch": 0.649393124500327, "grad_norm": 1.5636661052703857, "learning_rate": 4.990943100254328e-05, "loss": 0.1845, "step": 17870 }, { "epoch": 0.6497565230031253, "grad_norm": 3.540689468383789, "learning_rate": 4.9908989148155796e-05, "loss": 0.2348, "step": 17880 }, { "epoch": 0.6501199215059233, "grad_norm": 1.720421314239502, "learning_rate": 4.990854622052859e-05, "loss": 0.1742, "step": 17890 }, { "epoch": 0.6504833200087216, "grad_norm": 7.7201056480407715, "learning_rate": 4.9908102219680756e-05, "loss": 0.2573, "step": 17900 }, { "epoch": 0.6508467185115198, "grad_norm": 3.826190948486328, "learning_rate": 4.9907701701329876e-05, "loss": 3.6024, "step": 17910 }, { "epoch": 0.6512101170143179, "grad_norm": 2.047307252883911, "learning_rate": 4.990725566141558e-05, "loss": 0.1551, "step": 17920 }, { "epoch": 0.6515735155171161, "grad_norm": 6.462743282318115, "learning_rate": 4.990680854833626e-05, "loss": 0.2109, "step": 17930 }, { "epoch": 0.6519369140199143, "grad_norm": 1.7611109018325806, "learning_rate": 4.9906360362111184e-05, "loss": 0.1959, "step": 17940 }, { "epoch": 0.6523003125227124, "grad_norm": 5.253514766693115, "learning_rate": 4.9905911102759655e-05, "loss": 0.2436, "step": 17950 }, { "epoch": 0.6526637110255106, "grad_norm": 0.9357771873474121, "learning_rate": 4.9905460770301035e-05, "loss": 0.1664, "step": 17960 }, { "epoch": 0.6530271095283088, "grad_norm": 1.219488263130188, "learning_rate": 4.990500936475472e-05, "loss": 0.2286, "step": 17970 }, { "epoch": 0.6533905080311069, "grad_norm": 2.8499608039855957, "learning_rate": 4.990455688614016e-05, "loss": 0.2664, "step": 17980 }, { "epoch": 0.6537539065339051, "grad_norm": 1.5652077198028564, "learning_rate": 4.990410333447686e-05, "loss": 0.1341, "step": 17990 }, { "epoch": 0.6541173050367033, "grad_norm": 5.98219633102417, "learning_rate": 4.9903648709784356e-05, "loss": 0.2338, "step": 18000 }, { "epoch": 0.6541173050367033, "eval_loss": 0.37892404198646545, "eval_runtime": 180.0524, "eval_samples_per_second": 41.177, "eval_steps_per_second": 5.149, "eval_wer": 0.200864087715795, "step": 18000 }, { "epoch": 0.6544807035395014, "grad_norm": 2.105100154876709, "learning_rate": 4.990319301208223e-05, "loss": 0.1764, "step": 18010 }, { "epoch": 0.6548441020422996, "grad_norm": 1.0867921113967896, "learning_rate": 4.990273624139013e-05, "loss": 0.1507, "step": 18020 }, { "epoch": 0.6552075005450978, "grad_norm": 2.9895503520965576, "learning_rate": 4.9902278397727734e-05, "loss": 0.1479, "step": 18030 }, { "epoch": 0.6555708990478959, "grad_norm": 0.9947407841682434, "learning_rate": 4.990181948111475e-05, "loss": 0.1558, "step": 18040 }, { "epoch": 0.6559342975506941, "grad_norm": 7.774895191192627, "learning_rate": 4.9901359491570974e-05, "loss": 0.2202, "step": 18050 }, { "epoch": 0.6562976960534923, "grad_norm": 1.8466017246246338, "learning_rate": 4.990089842911622e-05, "loss": 0.1929, "step": 18060 }, { "epoch": 0.6566610945562904, "grad_norm": 0.8435410261154175, "learning_rate": 4.9900436293770345e-05, "loss": 0.1377, "step": 18070 }, { "epoch": 0.6570244930590886, "grad_norm": 3.10648512840271, "learning_rate": 4.989997308555326e-05, "loss": 0.202, "step": 18080 }, { "epoch": 0.6573878915618868, "grad_norm": 1.112806797027588, "learning_rate": 4.989950880448494e-05, "loss": 0.1486, "step": 18090 }, { "epoch": 0.6577512900646849, "grad_norm": 18.821117401123047, "learning_rate": 4.989904345058538e-05, "loss": 0.2677, "step": 18100 }, { "epoch": 0.6581146885674831, "grad_norm": 1.254798412322998, "learning_rate": 4.989857702387463e-05, "loss": 3.5769, "step": 18110 }, { "epoch": 0.6584780870702812, "grad_norm": 0.9956761002540588, "learning_rate": 4.989810952437277e-05, "loss": 0.1958, "step": 18120 }, { "epoch": 0.6588414855730794, "grad_norm": 2.9471828937530518, "learning_rate": 4.9897640952099975e-05, "loss": 0.1988, "step": 18130 }, { "epoch": 0.6592048840758776, "grad_norm": 1.3806344270706177, "learning_rate": 4.989717130707641e-05, "loss": 0.1552, "step": 18140 }, { "epoch": 0.6595682825786757, "grad_norm": 3.0857722759246826, "learning_rate": 4.989670058932231e-05, "loss": 0.2168, "step": 18150 }, { "epoch": 0.6599316810814739, "grad_norm": 1.8781664371490479, "learning_rate": 4.989622879885798e-05, "loss": 0.1571, "step": 18160 }, { "epoch": 0.6602950795842721, "grad_norm": 1.1139156818389893, "learning_rate": 4.9895755935703725e-05, "loss": 0.1365, "step": 18170 }, { "epoch": 0.6606584780870702, "grad_norm": 2.3965742588043213, "learning_rate": 4.9895281999879925e-05, "loss": 0.1879, "step": 18180 }, { "epoch": 0.6610218765898684, "grad_norm": 1.2575726509094238, "learning_rate": 4.9894806991407e-05, "loss": 0.2197, "step": 18190 }, { "epoch": 0.6613852750926666, "grad_norm": 10.392169952392578, "learning_rate": 4.989433091030542e-05, "loss": 0.2318, "step": 18200 }, { "epoch": 0.6617486735954647, "grad_norm": 0.8268498182296753, "learning_rate": 4.98938537565957e-05, "loss": 0.1416, "step": 18210 }, { "epoch": 0.662112072098263, "grad_norm": 0.9257369637489319, "learning_rate": 4.9893375530298384e-05, "loss": 0.1855, "step": 18220 }, { "epoch": 0.6624754706010612, "grad_norm": 1.7720370292663574, "learning_rate": 4.9892896231434094e-05, "loss": 1.0276, "step": 18230 }, { "epoch": 0.6628388691038593, "grad_norm": 2.2012548446655273, "learning_rate": 4.9892415860023476e-05, "loss": 1.1909, "step": 18240 }, { "epoch": 0.6632022676066575, "grad_norm": 9.690247535705566, "learning_rate": 4.9891934416087224e-05, "loss": 0.2603, "step": 18250 }, { "epoch": 0.6635656661094557, "grad_norm": 2.528682231903076, "learning_rate": 4.989145189964608e-05, "loss": 0.1912, "step": 18260 }, { "epoch": 0.6639290646122538, "grad_norm": 1.4666227102279663, "learning_rate": 4.989096831072084e-05, "loss": 0.2316, "step": 18270 }, { "epoch": 0.664292463115052, "grad_norm": 1.463526725769043, "learning_rate": 4.989048364933234e-05, "loss": 0.1388, "step": 18280 }, { "epoch": 0.6646558616178502, "grad_norm": 1.2156569957733154, "learning_rate": 4.988999791550146e-05, "loss": 0.4086, "step": 18290 }, { "epoch": 0.6650192601206483, "grad_norm": 4.909139156341553, "learning_rate": 4.988951110924913e-05, "loss": 0.2631, "step": 18300 }, { "epoch": 0.6653826586234465, "grad_norm": 1.3692512512207031, "learning_rate": 4.988902323059632e-05, "loss": 0.1525, "step": 18310 }, { "epoch": 0.6657460571262447, "grad_norm": 1.153344988822937, "learning_rate": 4.988853427956406e-05, "loss": 0.1904, "step": 18320 }, { "epoch": 0.6661094556290428, "grad_norm": 2.052828073501587, "learning_rate": 4.988804425617341e-05, "loss": 0.1979, "step": 18330 }, { "epoch": 0.666472854131841, "grad_norm": 1.373213768005371, "learning_rate": 4.988755316044548e-05, "loss": 0.1836, "step": 18340 }, { "epoch": 0.6668362526346392, "grad_norm": 24.185970306396484, "learning_rate": 4.9887060992401436e-05, "loss": 0.2546, "step": 18350 }, { "epoch": 0.6671996511374373, "grad_norm": 1.702205777168274, "learning_rate": 4.988656775206248e-05, "loss": 0.1433, "step": 18360 }, { "epoch": 0.6675630496402355, "grad_norm": 2.279100179672241, "learning_rate": 4.9886073439449864e-05, "loss": 0.1671, "step": 18370 }, { "epoch": 0.6679264481430337, "grad_norm": 3.928740978240967, "learning_rate": 4.98855780545849e-05, "loss": 0.1506, "step": 18380 }, { "epoch": 0.6682898466458318, "grad_norm": 2.2895402908325195, "learning_rate": 4.988508159748891e-05, "loss": 0.1523, "step": 18390 }, { "epoch": 0.66865324514863, "grad_norm": 10.151689529418945, "learning_rate": 4.98845840681833e-05, "loss": 0.2284, "step": 18400 }, { "epoch": 0.6690166436514282, "grad_norm": 1.268561840057373, "learning_rate": 4.9884085466689504e-05, "loss": 0.171, "step": 18410 }, { "epoch": 0.6693800421542263, "grad_norm": 1.0731265544891357, "learning_rate": 4.9883585793029e-05, "loss": 0.7778, "step": 18420 }, { "epoch": 0.6697434406570245, "grad_norm": 1.0762509107589722, "learning_rate": 4.988308504722332e-05, "loss": 0.1966, "step": 18430 }, { "epoch": 0.6701068391598226, "grad_norm": 6.763409614562988, "learning_rate": 4.9882583229294044e-05, "loss": 0.156, "step": 18440 }, { "epoch": 0.6704702376626208, "grad_norm": 8.312501907348633, "learning_rate": 4.988208033926279e-05, "loss": 0.2573, "step": 18450 }, { "epoch": 0.670833636165419, "grad_norm": 1.7566003799438477, "learning_rate": 4.988157637715122e-05, "loss": 0.1639, "step": 18460 }, { "epoch": 0.6711970346682171, "grad_norm": 2.336911916732788, "learning_rate": 4.988107134298105e-05, "loss": 0.1536, "step": 18470 }, { "epoch": 0.6715604331710153, "grad_norm": 2.2477078437805176, "learning_rate": 4.988056523677405e-05, "loss": 0.2734, "step": 18480 }, { "epoch": 0.6719238316738135, "grad_norm": 1.62912917137146, "learning_rate": 4.9880058058552015e-05, "loss": 0.1501, "step": 18490 }, { "epoch": 0.6722872301766116, "grad_norm": 8.896906852722168, "learning_rate": 4.98795498083368e-05, "loss": 0.213, "step": 18500 }, { "epoch": 0.6726506286794098, "grad_norm": 1.804291009902954, "learning_rate": 4.987904048615031e-05, "loss": 0.2175, "step": 18510 }, { "epoch": 0.673014027182208, "grad_norm": 0.9261330366134644, "learning_rate": 4.9878530092014486e-05, "loss": 0.1553, "step": 18520 }, { "epoch": 0.6733774256850061, "grad_norm": 4.854642868041992, "learning_rate": 4.987801862595132e-05, "loss": 0.2065, "step": 18530 }, { "epoch": 0.6737408241878043, "grad_norm": 0.9362125992774963, "learning_rate": 4.987750608798284e-05, "loss": 0.1611, "step": 18540 }, { "epoch": 0.6741042226906026, "grad_norm": 13.348092079162598, "learning_rate": 4.987699247813114e-05, "loss": 0.2834, "step": 18550 }, { "epoch": 0.6744676211934006, "grad_norm": 1.3235937356948853, "learning_rate": 4.987647779641835e-05, "loss": 0.166, "step": 18560 }, { "epoch": 0.6748310196961989, "grad_norm": 1.7941697835922241, "learning_rate": 4.987596204286664e-05, "loss": 0.186, "step": 18570 }, { "epoch": 0.6751944181989971, "grad_norm": 6.945876121520996, "learning_rate": 4.987544521749824e-05, "loss": 0.1859, "step": 18580 }, { "epoch": 0.6755578167017952, "grad_norm": 1.1671024560928345, "learning_rate": 4.98749273203354e-05, "loss": 0.2007, "step": 18590 }, { "epoch": 0.6759212152045934, "grad_norm": 46.817718505859375, "learning_rate": 4.987440835140046e-05, "loss": 0.275, "step": 18600 }, { "epoch": 0.6759212152045934, "eval_loss": 0.4186328053474426, "eval_runtime": 180.6066, "eval_samples_per_second": 41.051, "eval_steps_per_second": 5.133, "eval_wer": 0.21444260896400238, "step": 18600 }, { "epoch": 0.6762846137073916, "grad_norm": 0.9619908928871155, "learning_rate": 4.987388831071575e-05, "loss": 0.2147, "step": 18610 }, { "epoch": 0.6766480122101897, "grad_norm": 1.139666199684143, "learning_rate": 4.9873367198303714e-05, "loss": 0.2591, "step": 18620 }, { "epoch": 0.6770114107129879, "grad_norm": 2.6673026084899902, "learning_rate": 4.9872845014186776e-05, "loss": 0.2013, "step": 18630 }, { "epoch": 0.6773748092157861, "grad_norm": 1.0486637353897095, "learning_rate": 4.987232175838745e-05, "loss": 0.2326, "step": 18640 }, { "epoch": 0.6777382077185842, "grad_norm": 6.457462787628174, "learning_rate": 4.987179743092827e-05, "loss": 0.2395, "step": 18650 }, { "epoch": 0.6781016062213824, "grad_norm": 3.296480178833008, "learning_rate": 4.987127203183183e-05, "loss": 0.1857, "step": 18660 }, { "epoch": 0.6784650047241806, "grad_norm": 2.828460454940796, "learning_rate": 4.987074556112078e-05, "loss": 0.1391, "step": 18670 }, { "epoch": 0.6788284032269787, "grad_norm": 10.424219131469727, "learning_rate": 4.987021801881779e-05, "loss": 0.1583, "step": 18680 }, { "epoch": 0.6791918017297769, "grad_norm": 5.248502254486084, "learning_rate": 4.986968940494559e-05, "loss": 0.1676, "step": 18690 }, { "epoch": 0.6795552002325751, "grad_norm": 8.20375919342041, "learning_rate": 4.986915971952696e-05, "loss": 0.2844, "step": 18700 }, { "epoch": 0.6799185987353732, "grad_norm": 2.415562152862549, "learning_rate": 4.986862896258473e-05, "loss": 0.1634, "step": 18710 }, { "epoch": 0.6802819972381714, "grad_norm": 1.635680079460144, "learning_rate": 4.986809713414176e-05, "loss": 0.509, "step": 18720 }, { "epoch": 0.6806453957409695, "grad_norm": 10.641048431396484, "learning_rate": 4.986756423422095e-05, "loss": 0.2015, "step": 18730 }, { "epoch": 0.6810087942437677, "grad_norm": 1.3304156064987183, "learning_rate": 4.986703026284529e-05, "loss": 0.1598, "step": 18740 }, { "epoch": 0.6813721927465659, "grad_norm": 4.707154750823975, "learning_rate": 4.986649522003778e-05, "loss": 0.2486, "step": 18750 }, { "epoch": 0.681735591249364, "grad_norm": 1.671863317489624, "learning_rate": 4.9865959105821454e-05, "loss": 0.1628, "step": 18760 }, { "epoch": 0.6820989897521622, "grad_norm": 2.4183709621429443, "learning_rate": 4.986542192021942e-05, "loss": 0.1636, "step": 18770 }, { "epoch": 0.6824623882549604, "grad_norm": 120.8931884765625, "learning_rate": 4.9864883663254836e-05, "loss": 2.0172, "step": 18780 }, { "epoch": 0.6828257867577585, "grad_norm": 2.785879135131836, "learning_rate": 4.986434433495089e-05, "loss": 0.1669, "step": 18790 }, { "epoch": 0.6831891852605567, "grad_norm": 3.662753105163574, "learning_rate": 4.98638039353308e-05, "loss": 0.3095, "step": 18800 }, { "epoch": 0.6835525837633549, "grad_norm": 1.1632777452468872, "learning_rate": 4.986326246441787e-05, "loss": 0.1632, "step": 18810 }, { "epoch": 0.683915982266153, "grad_norm": 0.9660913348197937, "learning_rate": 4.986271992223543e-05, "loss": 0.1509, "step": 18820 }, { "epoch": 0.6842793807689512, "grad_norm": 2.810391426086426, "learning_rate": 4.986217630880684e-05, "loss": 0.1507, "step": 18830 }, { "epoch": 0.6846427792717494, "grad_norm": 2.008641242980957, "learning_rate": 4.986163162415554e-05, "loss": 0.1858, "step": 18840 }, { "epoch": 0.6850061777745475, "grad_norm": 3.4007887840270996, "learning_rate": 4.986108586830499e-05, "loss": 0.2389, "step": 18850 }, { "epoch": 0.6853695762773457, "grad_norm": 0.8250002861022949, "learning_rate": 4.986053904127871e-05, "loss": 0.1618, "step": 18860 }, { "epoch": 0.685732974780144, "grad_norm": 0.792607307434082, "learning_rate": 4.986004598111927e-05, "loss": 1.5835, "step": 18870 }, { "epoch": 0.686096373282942, "grad_norm": 2.740478038787842, "learning_rate": 4.985949711892404e-05, "loss": 0.2021, "step": 18880 }, { "epoch": 0.6864597717857402, "grad_norm": 1.1361775398254395, "learning_rate": 4.985894718562153e-05, "loss": 0.2244, "step": 18890 }, { "epoch": 0.6868231702885385, "grad_norm": 2.692542314529419, "learning_rate": 4.985839618123543e-05, "loss": 0.2095, "step": 18900 }, { "epoch": 0.6871865687913365, "grad_norm": 1.2691428661346436, "learning_rate": 4.9857844105789485e-05, "loss": 0.1533, "step": 18910 }, { "epoch": 0.6875499672941348, "grad_norm": 2.087209939956665, "learning_rate": 4.9857290959307483e-05, "loss": 0.1469, "step": 18920 }, { "epoch": 0.687913365796933, "grad_norm": 1.5252209901809692, "learning_rate": 4.985673674181326e-05, "loss": 0.2099, "step": 18930 }, { "epoch": 0.6882767642997311, "grad_norm": 1.81588876247406, "learning_rate": 4.9856181453330685e-05, "loss": 0.174, "step": 18940 }, { "epoch": 0.6886401628025293, "grad_norm": 21.244775772094727, "learning_rate": 4.9855625093883695e-05, "loss": 0.2455, "step": 18950 }, { "epoch": 0.6890035613053275, "grad_norm": 1.53201425075531, "learning_rate": 4.9855067663496255e-05, "loss": 0.1731, "step": 18960 }, { "epoch": 0.6893669598081256, "grad_norm": 0.9922922849655151, "learning_rate": 4.985450916219239e-05, "loss": 0.1569, "step": 18970 }, { "epoch": 0.6897303583109238, "grad_norm": 1.6983296871185303, "learning_rate": 4.985394958999615e-05, "loss": 0.1784, "step": 18980 }, { "epoch": 0.690093756813722, "grad_norm": 2.5069353580474854, "learning_rate": 4.9853388946931654e-05, "loss": 0.1484, "step": 18990 }, { "epoch": 0.6904571553165201, "grad_norm": 52.345367431640625, "learning_rate": 4.985282723302306e-05, "loss": 0.2431, "step": 19000 }, { "epoch": 0.6908205538193183, "grad_norm": 1.5318138599395752, "learning_rate": 4.9852264448294564e-05, "loss": 0.1662, "step": 19010 }, { "epoch": 0.6911839523221164, "grad_norm": 1.5980876684188843, "learning_rate": 4.985170059277041e-05, "loss": 1.3532, "step": 19020 }, { "epoch": 0.6915473508249146, "grad_norm": 2.355023145675659, "learning_rate": 4.9851135666474915e-05, "loss": 0.1688, "step": 19030 }, { "epoch": 0.6919107493277128, "grad_norm": 3.2141480445861816, "learning_rate": 4.98505696694324e-05, "loss": 0.1303, "step": 19040 }, { "epoch": 0.6922741478305109, "grad_norm": 19.482290267944336, "learning_rate": 4.985000260166725e-05, "loss": 0.2337, "step": 19050 }, { "epoch": 0.6926375463333091, "grad_norm": 0.8456101417541504, "learning_rate": 4.9849434463203915e-05, "loss": 0.1732, "step": 19060 }, { "epoch": 0.6930009448361073, "grad_norm": 2.2158889770507812, "learning_rate": 4.9848865254066856e-05, "loss": 0.1524, "step": 19070 }, { "epoch": 0.6933643433389054, "grad_norm": 2.0843331813812256, "learning_rate": 4.9848294974280605e-05, "loss": 0.1943, "step": 19080 }, { "epoch": 0.6937277418417036, "grad_norm": 2.6970462799072266, "learning_rate": 4.9847723623869734e-05, "loss": 0.1697, "step": 19090 }, { "epoch": 0.6940911403445018, "grad_norm": 9.394730567932129, "learning_rate": 4.984715120285887e-05, "loss": 0.2151, "step": 19100 }, { "epoch": 0.6944545388472999, "grad_norm": 1.922090768814087, "learning_rate": 4.9846577711272656e-05, "loss": 0.1737, "step": 19110 }, { "epoch": 0.6948179373500981, "grad_norm": 1.3870245218276978, "learning_rate": 4.9846003149135815e-05, "loss": 0.1694, "step": 19120 }, { "epoch": 0.6951813358528963, "grad_norm": 1.6474970579147339, "learning_rate": 4.9845427516473104e-05, "loss": 0.219, "step": 19130 }, { "epoch": 0.6955447343556944, "grad_norm": 1.4302411079406738, "learning_rate": 4.984485081330932e-05, "loss": 0.1489, "step": 19140 }, { "epoch": 0.6959081328584926, "grad_norm": 3.888967990875244, "learning_rate": 4.984427303966932e-05, "loss": 0.2425, "step": 19150 }, { "epoch": 0.6962715313612908, "grad_norm": 1.2002874612808228, "learning_rate": 4.984369419557798e-05, "loss": 0.1575, "step": 19160 }, { "epoch": 0.6966349298640889, "grad_norm": 1.9064863920211792, "learning_rate": 4.984311428106025e-05, "loss": 0.1526, "step": 19170 }, { "epoch": 0.6969983283668871, "grad_norm": 1.3838772773742676, "learning_rate": 4.984253329614112e-05, "loss": 0.1601, "step": 19180 }, { "epoch": 0.6973617268696853, "grad_norm": 3.6261801719665527, "learning_rate": 4.984195124084563e-05, "loss": 0.1668, "step": 19190 }, { "epoch": 0.6977251253724834, "grad_norm": 7.647263526916504, "learning_rate": 4.984136811519884e-05, "loss": 0.1879, "step": 19200 }, { "epoch": 0.6977251253724834, "eval_loss": 0.3865276575088501, "eval_runtime": 179.6651, "eval_samples_per_second": 41.266, "eval_steps_per_second": 5.16, "eval_wer": 0.20815255867990634, "step": 19200 }, { "epoch": 0.6980885238752816, "grad_norm": 1.7563225030899048, "learning_rate": 4.984078391922589e-05, "loss": 0.1481, "step": 19210 }, { "epoch": 0.6984519223780798, "grad_norm": 1.8016029596328735, "learning_rate": 4.984019865295194e-05, "loss": 0.1713, "step": 19220 }, { "epoch": 0.6988153208808779, "grad_norm": 2.0969181060791016, "learning_rate": 4.983961231640221e-05, "loss": 0.1959, "step": 19230 }, { "epoch": 0.6991787193836762, "grad_norm": 1.6823608875274658, "learning_rate": 4.9839024909601964e-05, "loss": 0.1729, "step": 19240 }, { "epoch": 0.6995421178864744, "grad_norm": 11.533753395080566, "learning_rate": 4.983843643257652e-05, "loss": 0.2264, "step": 19250 }, { "epoch": 0.6999055163892725, "grad_norm": 4.1039204597473145, "learning_rate": 4.983784688535122e-05, "loss": 0.1738, "step": 19260 }, { "epoch": 0.7002689148920707, "grad_norm": 1.1051629781723022, "learning_rate": 4.983725626795147e-05, "loss": 0.155, "step": 19270 }, { "epoch": 0.7006323133948689, "grad_norm": 4.303994178771973, "learning_rate": 4.983666458040273e-05, "loss": 0.1593, "step": 19280 }, { "epoch": 0.700995711897667, "grad_norm": 1.2324292659759521, "learning_rate": 4.983607182273047e-05, "loss": 0.1642, "step": 19290 }, { "epoch": 0.7013591104004652, "grad_norm": 6.101926326751709, "learning_rate": 4.983547799496024e-05, "loss": 0.2338, "step": 19300 }, { "epoch": 0.7017225089032633, "grad_norm": 1.1532049179077148, "learning_rate": 4.983488309711763e-05, "loss": 0.1591, "step": 19310 }, { "epoch": 0.7020859074060615, "grad_norm": 0.8216233253479004, "learning_rate": 4.983428712922828e-05, "loss": 0.1489, "step": 19320 }, { "epoch": 0.7024493059088597, "grad_norm": 1.489461064338684, "learning_rate": 4.983369009131785e-05, "loss": 0.2048, "step": 19330 }, { "epoch": 0.7028127044116578, "grad_norm": 1.0493615865707397, "learning_rate": 4.983309198341207e-05, "loss": 0.1525, "step": 19340 }, { "epoch": 0.703176102914456, "grad_norm": 10.2578706741333, "learning_rate": 4.983249280553672e-05, "loss": 0.2297, "step": 19350 }, { "epoch": 0.7035395014172542, "grad_norm": 1.5366660356521606, "learning_rate": 4.983189255771761e-05, "loss": 0.1644, "step": 19360 }, { "epoch": 0.7039028999200523, "grad_norm": 1.4915844202041626, "learning_rate": 4.9831291239980596e-05, "loss": 0.1599, "step": 19370 }, { "epoch": 0.7042662984228505, "grad_norm": 1.3012590408325195, "learning_rate": 4.98306888523516e-05, "loss": 0.1907, "step": 19380 }, { "epoch": 0.7046296969256487, "grad_norm": 1.5029476881027222, "learning_rate": 4.983008539485656e-05, "loss": 0.1391, "step": 19390 }, { "epoch": 0.7049930954284468, "grad_norm": 3.0202033519744873, "learning_rate": 4.9829480867521495e-05, "loss": 0.2218, "step": 19400 }, { "epoch": 0.705356493931245, "grad_norm": 1.7761317491531372, "learning_rate": 4.9828875270372434e-05, "loss": 0.1605, "step": 19410 }, { "epoch": 0.7057198924340432, "grad_norm": 1.420793890953064, "learning_rate": 4.9828268603435485e-05, "loss": 1.5838, "step": 19420 }, { "epoch": 0.7060832909368413, "grad_norm": 2.079665422439575, "learning_rate": 4.982766086673678e-05, "loss": 0.2146, "step": 19430 }, { "epoch": 0.7064466894396395, "grad_norm": 2.440471887588501, "learning_rate": 4.98270520603025e-05, "loss": 0.1733, "step": 19440 }, { "epoch": 0.7068100879424377, "grad_norm": 7.773731708526611, "learning_rate": 4.982644218415889e-05, "loss": 0.2126, "step": 19450 }, { "epoch": 0.7071734864452358, "grad_norm": 0.9480405449867249, "learning_rate": 4.982583123833221e-05, "loss": 0.1575, "step": 19460 }, { "epoch": 0.707536884948034, "grad_norm": 12.79196548461914, "learning_rate": 4.982521922284881e-05, "loss": 0.2745, "step": 19470 }, { "epoch": 0.7079002834508322, "grad_norm": 4.492150783538818, "learning_rate": 4.982460613773502e-05, "loss": 0.1663, "step": 19480 }, { "epoch": 0.7082636819536303, "grad_norm": 1.2373683452606201, "learning_rate": 4.9823991983017295e-05, "loss": 0.1699, "step": 19490 }, { "epoch": 0.7086270804564285, "grad_norm": 5.8804402351379395, "learning_rate": 4.982337675872207e-05, "loss": 0.242, "step": 19500 }, { "epoch": 0.7089904789592267, "grad_norm": 0.9465837478637695, "learning_rate": 4.982276046487586e-05, "loss": 0.1471, "step": 19510 }, { "epoch": 0.7093538774620248, "grad_norm": 1.6178842782974243, "learning_rate": 4.9822143101505226e-05, "loss": 0.1619, "step": 19520 }, { "epoch": 0.709717275964823, "grad_norm": 2.4963414669036865, "learning_rate": 4.9821524668636766e-05, "loss": 0.1426, "step": 19530 }, { "epoch": 0.7100806744676212, "grad_norm": 1.1380610466003418, "learning_rate": 4.982090516629712e-05, "loss": 0.2364, "step": 19540 }, { "epoch": 0.7104440729704193, "grad_norm": 5.2998046875, "learning_rate": 4.982028459451298e-05, "loss": 0.2661, "step": 19550 }, { "epoch": 0.7108074714732175, "grad_norm": 1.1476637125015259, "learning_rate": 4.9819662953311096e-05, "loss": 0.1306, "step": 19560 }, { "epoch": 0.7111708699760158, "grad_norm": 0.7960777878761292, "learning_rate": 4.981904024271824e-05, "loss": 0.1604, "step": 19570 }, { "epoch": 0.7115342684788138, "grad_norm": 1.9035999774932861, "learning_rate": 4.981841646276124e-05, "loss": 0.1728, "step": 19580 }, { "epoch": 0.711897666981612, "grad_norm": 0.9725393056869507, "learning_rate": 4.981779161346699e-05, "loss": 0.2529, "step": 19590 }, { "epoch": 0.7122610654844101, "grad_norm": 5.759589672088623, "learning_rate": 4.98171656948624e-05, "loss": 0.25, "step": 19600 }, { "epoch": 0.7126244639872084, "grad_norm": 1.3716357946395874, "learning_rate": 4.9816538706974434e-05, "loss": 0.1603, "step": 19610 }, { "epoch": 0.7129878624900066, "grad_norm": 1.4253743886947632, "learning_rate": 4.981591064983011e-05, "loss": 0.1496, "step": 19620 }, { "epoch": 0.7133512609928047, "grad_norm": 2.4253408908843994, "learning_rate": 4.98152815234565e-05, "loss": 0.1694, "step": 19630 }, { "epoch": 0.7137146594956029, "grad_norm": 1.212689757347107, "learning_rate": 4.9814651327880696e-05, "loss": 0.1869, "step": 19640 }, { "epoch": 0.7140780579984011, "grad_norm": 7.003270626068115, "learning_rate": 4.981402006312986e-05, "loss": 0.2709, "step": 19650 }, { "epoch": 0.7144414565011992, "grad_norm": 1.6173512935638428, "learning_rate": 4.981338772923119e-05, "loss": 0.1651, "step": 19660 }, { "epoch": 0.7148048550039974, "grad_norm": 2.2197723388671875, "learning_rate": 4.981275432621192e-05, "loss": 0.1657, "step": 19670 }, { "epoch": 0.7151682535067956, "grad_norm": 1.8906898498535156, "learning_rate": 4.981211985409936e-05, "loss": 2.3111, "step": 19680 }, { "epoch": 0.7155316520095937, "grad_norm": 3.50747013092041, "learning_rate": 4.981148431292084e-05, "loss": 0.1498, "step": 19690 }, { "epoch": 0.7158950505123919, "grad_norm": 4.080805778503418, "learning_rate": 4.981084770270373e-05, "loss": 0.2094, "step": 19700 }, { "epoch": 0.7162584490151901, "grad_norm": 2.1056652069091797, "learning_rate": 4.981021002347547e-05, "loss": 0.157, "step": 19710 }, { "epoch": 0.7166218475179882, "grad_norm": 1.07776939868927, "learning_rate": 4.980957127526354e-05, "loss": 0.2049, "step": 19720 }, { "epoch": 0.7169852460207864, "grad_norm": 3.5387072563171387, "learning_rate": 4.980893145809546e-05, "loss": 0.1706, "step": 19730 }, { "epoch": 0.7173486445235846, "grad_norm": 1.5516027212142944, "learning_rate": 4.980829057199879e-05, "loss": 0.1371, "step": 19740 }, { "epoch": 0.7177120430263827, "grad_norm": 6.618633270263672, "learning_rate": 4.9807648617001145e-05, "loss": 0.1833, "step": 19750 }, { "epoch": 0.7180754415291809, "grad_norm": 1.7093079090118408, "learning_rate": 4.980700559313019e-05, "loss": 0.1592, "step": 19760 }, { "epoch": 0.7184388400319791, "grad_norm": 1.1217936277389526, "learning_rate": 4.9806361500413626e-05, "loss": 0.145, "step": 19770 }, { "epoch": 0.7188022385347772, "grad_norm": 1.869722604751587, "learning_rate": 4.980571633887921e-05, "loss": 0.1605, "step": 19780 }, { "epoch": 0.7191656370375754, "grad_norm": 1.1555829048156738, "learning_rate": 4.980507010855473e-05, "loss": 0.1539, "step": 19790 }, { "epoch": 0.7195290355403736, "grad_norm": 5.0145111083984375, "learning_rate": 4.9804422809468046e-05, "loss": 0.2334, "step": 19800 }, { "epoch": 0.7195290355403736, "eval_loss": 0.394449919462204, "eval_runtime": 180.0311, "eval_samples_per_second": 41.182, "eval_steps_per_second": 5.149, "eval_wer": 0.2100677110752083, "step": 19800 }, { "epoch": 0.7198924340431717, "grad_norm": 1.0865716934204102, "learning_rate": 4.980377444164702e-05, "loss": 0.1569, "step": 19810 }, { "epoch": 0.7202558325459699, "grad_norm": 1.5475140810012817, "learning_rate": 4.980312500511962e-05, "loss": 0.1268, "step": 19820 }, { "epoch": 0.7206192310487681, "grad_norm": 1.9507659673690796, "learning_rate": 4.980247449991381e-05, "loss": 0.2092, "step": 19830 }, { "epoch": 0.7209826295515662, "grad_norm": 1.185339093208313, "learning_rate": 4.980182292605762e-05, "loss": 0.1432, "step": 19840 }, { "epoch": 0.7213460280543644, "grad_norm": 5.294797420501709, "learning_rate": 4.980117028357912e-05, "loss": 0.2459, "step": 19850 }, { "epoch": 0.7217094265571626, "grad_norm": 2.691941976547241, "learning_rate": 4.980051657250645e-05, "loss": 0.1747, "step": 19860 }, { "epoch": 0.7220728250599607, "grad_norm": 1.3377537727355957, "learning_rate": 4.9799861792867756e-05, "loss": 0.1541, "step": 19870 }, { "epoch": 0.7224362235627589, "grad_norm": 3.39907169342041, "learning_rate": 4.979920594469124e-05, "loss": 0.166, "step": 19880 }, { "epoch": 0.722799622065557, "grad_norm": 1.738271951675415, "learning_rate": 4.9798549028005195e-05, "loss": 0.1591, "step": 19890 }, { "epoch": 0.7231630205683552, "grad_norm": 4.062039852142334, "learning_rate": 4.9797891042837893e-05, "loss": 0.2372, "step": 19900 }, { "epoch": 0.7235264190711534, "grad_norm": 2.46109676361084, "learning_rate": 4.979723198921771e-05, "loss": 0.1606, "step": 19910 }, { "epoch": 0.7238898175739515, "grad_norm": 1.3511689901351929, "learning_rate": 4.9796571867173017e-05, "loss": 0.148, "step": 19920 }, { "epoch": 0.7242532160767498, "grad_norm": 4.831977844238281, "learning_rate": 4.979591067673227e-05, "loss": 0.1832, "step": 19930 }, { "epoch": 0.724616614579548, "grad_norm": 0.9530340433120728, "learning_rate": 4.979524841792397e-05, "loss": 0.1776, "step": 19940 }, { "epoch": 0.724980013082346, "grad_norm": 2.886121988296509, "learning_rate": 4.979458509077663e-05, "loss": 0.217, "step": 19950 }, { "epoch": 0.7253434115851443, "grad_norm": 2.6050822734832764, "learning_rate": 4.979392069531883e-05, "loss": 0.1709, "step": 19960 }, { "epoch": 0.7257068100879425, "grad_norm": 1.1615772247314453, "learning_rate": 4.979325523157921e-05, "loss": 0.1891, "step": 19970 }, { "epoch": 0.7260702085907406, "grad_norm": 5.947473526000977, "learning_rate": 4.979258869958643e-05, "loss": 0.1685, "step": 19980 }, { "epoch": 0.7264336070935388, "grad_norm": 2.2721457481384277, "learning_rate": 4.979192109936922e-05, "loss": 0.1733, "step": 19990 }, { "epoch": 0.726797005596337, "grad_norm": 2.83907413482666, "learning_rate": 4.979125243095635e-05, "loss": 0.2067, "step": 20000 }, { "epoch": 0.7271604040991351, "grad_norm": 1.84774649143219, "learning_rate": 4.9790582694376605e-05, "loss": 0.1634, "step": 20010 }, { "epoch": 0.7275238026019333, "grad_norm": 3.5162901878356934, "learning_rate": 4.978991188965887e-05, "loss": 0.1546, "step": 20020 }, { "epoch": 0.7278872011047315, "grad_norm": 1.3396214246749878, "learning_rate": 4.9789240016832026e-05, "loss": 0.1549, "step": 20030 }, { "epoch": 0.7282505996075296, "grad_norm": 0.8957159519195557, "learning_rate": 4.978856707592503e-05, "loss": 0.4856, "step": 20040 }, { "epoch": 0.7286139981103278, "grad_norm": 3.291719913482666, "learning_rate": 4.978789306696688e-05, "loss": 0.1672, "step": 20050 }, { "epoch": 0.728977396613126, "grad_norm": 1.2237446308135986, "learning_rate": 4.978721798998661e-05, "loss": 0.1547, "step": 20060 }, { "epoch": 0.7293407951159241, "grad_norm": 1.5760120153427124, "learning_rate": 4.978654184501331e-05, "loss": 0.1491, "step": 20070 }, { "epoch": 0.7297041936187223, "grad_norm": 2.661914587020874, "learning_rate": 4.978586463207612e-05, "loss": 0.2399, "step": 20080 }, { "epoch": 0.7300675921215205, "grad_norm": 1.4015228748321533, "learning_rate": 4.978518635120421e-05, "loss": 0.1592, "step": 20090 }, { "epoch": 0.7304309906243186, "grad_norm": 11.479881286621094, "learning_rate": 4.9784507002426793e-05, "loss": 0.2478, "step": 20100 }, { "epoch": 0.7307943891271168, "grad_norm": 2.3282432556152344, "learning_rate": 4.9783826585773164e-05, "loss": 0.1565, "step": 20110 }, { "epoch": 0.731157787629915, "grad_norm": 1.0281476974487305, "learning_rate": 4.9783145101272625e-05, "loss": 2.6872, "step": 20120 }, { "epoch": 0.7315211861327131, "grad_norm": 1.4759191274642944, "learning_rate": 4.978246254895455e-05, "loss": 0.1755, "step": 20130 }, { "epoch": 0.7318845846355113, "grad_norm": 1.1100878715515137, "learning_rate": 4.978177892884833e-05, "loss": 0.1519, "step": 20140 }, { "epoch": 0.7322479831383095, "grad_norm": 5.326310157775879, "learning_rate": 4.9781094240983435e-05, "loss": 0.257, "step": 20150 }, { "epoch": 0.7326113816411076, "grad_norm": 8.199230194091797, "learning_rate": 4.978040848538936e-05, "loss": 0.192, "step": 20160 }, { "epoch": 0.7329747801439058, "grad_norm": 1.579663872718811, "learning_rate": 4.9779721662095654e-05, "loss": 0.1738, "step": 20170 }, { "epoch": 0.733338178646704, "grad_norm": 3.319883346557617, "learning_rate": 4.97790337711319e-05, "loss": 0.1809, "step": 20180 }, { "epoch": 0.7337015771495021, "grad_norm": 1.4813331365585327, "learning_rate": 4.977834481252776e-05, "loss": 0.1645, "step": 20190 }, { "epoch": 0.7340649756523003, "grad_norm": 4.392731666564941, "learning_rate": 4.9777654786312886e-05, "loss": 0.1897, "step": 20200 }, { "epoch": 0.7344283741550984, "grad_norm": 1.7336299419403076, "learning_rate": 4.9776963692517034e-05, "loss": 0.1751, "step": 20210 }, { "epoch": 0.7347917726578966, "grad_norm": 1.6261765956878662, "learning_rate": 4.977627153116998e-05, "loss": 0.156, "step": 20220 }, { "epoch": 0.7351551711606948, "grad_norm": 1.9801748991012573, "learning_rate": 4.977557830230153e-05, "loss": 0.2069, "step": 20230 }, { "epoch": 0.7355185696634929, "grad_norm": 1.4615390300750732, "learning_rate": 4.977488400594157e-05, "loss": 0.1458, "step": 20240 }, { "epoch": 0.7358819681662911, "grad_norm": 3.78981876373291, "learning_rate": 4.977418864212e-05, "loss": 0.1765, "step": 20250 }, { "epoch": 0.7362453666690894, "grad_norm": 0.813947319984436, "learning_rate": 4.97734922108668e-05, "loss": 0.1482, "step": 20260 }, { "epoch": 0.7366087651718874, "grad_norm": 1.1082271337509155, "learning_rate": 4.977279471221195e-05, "loss": 0.149, "step": 20270 }, { "epoch": 0.7369721636746857, "grad_norm": 4.023866176605225, "learning_rate": 4.9772096146185527e-05, "loss": 0.1797, "step": 20280 }, { "epoch": 0.7373355621774839, "grad_norm": 1.3649333715438843, "learning_rate": 4.977139651281762e-05, "loss": 0.182, "step": 20290 }, { "epoch": 0.737698960680282, "grad_norm": 8.213293075561523, "learning_rate": 4.977069581213837e-05, "loss": 0.2117, "step": 20300 }, { "epoch": 0.7380623591830802, "grad_norm": 1.0769990682601929, "learning_rate": 4.9769994044177976e-05, "loss": 0.1689, "step": 20310 }, { "epoch": 0.7384257576858784, "grad_norm": 1.712949275970459, "learning_rate": 4.9769291208966674e-05, "loss": 0.1402, "step": 20320 }, { "epoch": 0.7387891561886765, "grad_norm": 2.213164806365967, "learning_rate": 4.976858730653473e-05, "loss": 0.193, "step": 20330 }, { "epoch": 0.7391525546914747, "grad_norm": 1.9228605031967163, "learning_rate": 4.97678823369125e-05, "loss": 0.1517, "step": 20340 }, { "epoch": 0.7395159531942729, "grad_norm": 8.813825607299805, "learning_rate": 4.976717630013034e-05, "loss": 0.2682, "step": 20350 }, { "epoch": 0.739879351697071, "grad_norm": 1.9778189659118652, "learning_rate": 4.976646919621867e-05, "loss": 0.1701, "step": 20360 }, { "epoch": 0.7402427501998692, "grad_norm": 1.8553961515426636, "learning_rate": 4.976576102520797e-05, "loss": 0.1455, "step": 20370 }, { "epoch": 0.7406061487026674, "grad_norm": 3.1159512996673584, "learning_rate": 4.976505178712874e-05, "loss": 0.2252, "step": 20380 }, { "epoch": 0.7409695472054655, "grad_norm": 1.9035766124725342, "learning_rate": 4.9764341482011545e-05, "loss": 0.1815, "step": 20390 }, { "epoch": 0.7413329457082637, "grad_norm": 2.228940725326538, "learning_rate": 4.976363010988698e-05, "loss": 0.1995, "step": 20400 }, { "epoch": 0.7413329457082637, "eval_loss": 0.35944151878356934, "eval_runtime": 179.8589, "eval_samples_per_second": 41.221, "eval_steps_per_second": 5.154, "eval_wer": 0.200864087715795, "step": 20400 }, { "epoch": 0.7416963442110619, "grad_norm": 1.5204256772994995, "learning_rate": 4.976291767078571e-05, "loss": 1.6497, "step": 20410 }, { "epoch": 0.74205974271386, "grad_norm": 1.3520594835281372, "learning_rate": 4.976220416473842e-05, "loss": 0.1503, "step": 20420 }, { "epoch": 0.7424231412166582, "grad_norm": 2.7322440147399902, "learning_rate": 4.976148959177586e-05, "loss": 0.1784, "step": 20430 }, { "epoch": 0.7427865397194564, "grad_norm": 1.3193668127059937, "learning_rate": 4.9760773951928815e-05, "loss": 0.1685, "step": 20440 }, { "epoch": 0.7431499382222545, "grad_norm": 11.000434875488281, "learning_rate": 4.976005724522812e-05, "loss": 0.2147, "step": 20450 }, { "epoch": 0.7435133367250527, "grad_norm": 1.1825796365737915, "learning_rate": 4.9759339471704656e-05, "loss": 0.2116, "step": 20460 }, { "epoch": 0.7438767352278509, "grad_norm": 1.1518877744674683, "learning_rate": 4.975862063138934e-05, "loss": 0.141, "step": 20470 }, { "epoch": 0.744240133730649, "grad_norm": 6.054372310638428, "learning_rate": 4.975790072431316e-05, "loss": 0.1766, "step": 20480 }, { "epoch": 0.7446035322334472, "grad_norm": 1.0629233121871948, "learning_rate": 4.975717975050713e-05, "loss": 0.1641, "step": 20490 }, { "epoch": 0.7449669307362453, "grad_norm": 2.4782843589782715, "learning_rate": 4.97564577100023e-05, "loss": 0.2186, "step": 20500 }, { "epoch": 0.7453303292390435, "grad_norm": 1.5713534355163574, "learning_rate": 4.975573460282979e-05, "loss": 0.1535, "step": 20510 }, { "epoch": 0.7456937277418417, "grad_norm": 0.7279618382453918, "learning_rate": 4.975501042902078e-05, "loss": 0.1372, "step": 20520 }, { "epoch": 0.7460571262446398, "grad_norm": 5.573297500610352, "learning_rate": 4.975428518860643e-05, "loss": 0.161, "step": 20530 }, { "epoch": 0.746420524747438, "grad_norm": 1.022141695022583, "learning_rate": 4.975355888161801e-05, "loss": 0.1645, "step": 20540 }, { "epoch": 0.7467839232502362, "grad_norm": 2.9584996700286865, "learning_rate": 4.9752831508086805e-05, "loss": 0.2085, "step": 20550 }, { "epoch": 0.7471473217530343, "grad_norm": 2.2749557495117188, "learning_rate": 4.975210306804418e-05, "loss": 0.1531, "step": 20560 }, { "epoch": 0.7475107202558325, "grad_norm": 1.877822995185852, "learning_rate": 4.9751373561521484e-05, "loss": 0.1654, "step": 20570 }, { "epoch": 0.7478741187586307, "grad_norm": 7.727886199951172, "learning_rate": 4.975064298855017e-05, "loss": 0.2026, "step": 20580 }, { "epoch": 0.7482375172614288, "grad_norm": 1.2424033880233765, "learning_rate": 4.974991134916171e-05, "loss": 0.1834, "step": 20590 }, { "epoch": 0.748600915764227, "grad_norm": 7.272613525390625, "learning_rate": 4.974917864338764e-05, "loss": 0.2266, "step": 20600 }, { "epoch": 0.7489643142670253, "grad_norm": 0.6424925327301025, "learning_rate": 4.974844487125952e-05, "loss": 0.1496, "step": 20610 }, { "epoch": 0.7493277127698234, "grad_norm": 2.064819097518921, "learning_rate": 4.974771003280896e-05, "loss": 0.192, "step": 20620 }, { "epoch": 0.7496911112726216, "grad_norm": 2.55157470703125, "learning_rate": 4.974697412806763e-05, "loss": 0.1863, "step": 20630 }, { "epoch": 0.7500545097754198, "grad_norm": 1.10732901096344, "learning_rate": 4.974623715706723e-05, "loss": 0.1452, "step": 20640 }, { "epoch": 0.7504179082782179, "grad_norm": 6.665337562561035, "learning_rate": 4.9745499119839526e-05, "loss": 0.2393, "step": 20650 }, { "epoch": 0.7507813067810161, "grad_norm": 2.315764904022217, "learning_rate": 4.974476001641631e-05, "loss": 0.1724, "step": 20660 }, { "epoch": 0.7511447052838143, "grad_norm": 1.7643327713012695, "learning_rate": 4.974401984682942e-05, "loss": 0.1676, "step": 20670 }, { "epoch": 0.7515081037866124, "grad_norm": 2.556265115737915, "learning_rate": 4.974327861111075e-05, "loss": 0.1706, "step": 20680 }, { "epoch": 0.7518715022894106, "grad_norm": 1.0939987897872925, "learning_rate": 4.9742536309292257e-05, "loss": 0.1514, "step": 20690 }, { "epoch": 0.7522349007922088, "grad_norm": 2.3087685108184814, "learning_rate": 4.97417929414059e-05, "loss": 0.2064, "step": 20700 }, { "epoch": 0.7525982992950069, "grad_norm": 1.6968719959259033, "learning_rate": 4.974104850748372e-05, "loss": 0.65, "step": 20710 }, { "epoch": 0.7529616977978051, "grad_norm": 1.3144559860229492, "learning_rate": 4.974030300755779e-05, "loss": 3.2825, "step": 20720 }, { "epoch": 0.7533250963006033, "grad_norm": 2.346266031265259, "learning_rate": 4.973955644166022e-05, "loss": 0.1621, "step": 20730 }, { "epoch": 0.7536884948034014, "grad_norm": 0.8026605248451233, "learning_rate": 4.973880880982319e-05, "loss": 0.1566, "step": 20740 }, { "epoch": 0.7540518933061996, "grad_norm": 8.70439624786377, "learning_rate": 4.973806011207891e-05, "loss": 0.2671, "step": 20750 }, { "epoch": 0.7544152918089978, "grad_norm": 0.9762817025184631, "learning_rate": 4.973731034845964e-05, "loss": 0.1692, "step": 20760 }, { "epoch": 0.7547786903117959, "grad_norm": 1.3316736221313477, "learning_rate": 4.973655951899768e-05, "loss": 0.1605, "step": 20770 }, { "epoch": 0.7551420888145941, "grad_norm": 1.9772186279296875, "learning_rate": 4.9735807623725394e-05, "loss": 0.1551, "step": 20780 }, { "epoch": 0.7555054873173922, "grad_norm": 1.4639058113098145, "learning_rate": 4.9735054662675154e-05, "loss": 0.2075, "step": 20790 }, { "epoch": 0.7558688858201904, "grad_norm": 10.605428695678711, "learning_rate": 4.973430063587943e-05, "loss": 0.2542, "step": 20800 }, { "epoch": 0.7562322843229886, "grad_norm": 1.9553091526031494, "learning_rate": 4.9733545543370684e-05, "loss": 0.1353, "step": 20810 }, { "epoch": 0.7565956828257867, "grad_norm": 2.2855403423309326, "learning_rate": 4.9732789385181466e-05, "loss": 0.5004, "step": 20820 }, { "epoch": 0.7569590813285849, "grad_norm": 1.7468841075897217, "learning_rate": 4.973203216134435e-05, "loss": 0.1433, "step": 20830 }, { "epoch": 0.7573224798313831, "grad_norm": 0.9522268772125244, "learning_rate": 4.973127387189197e-05, "loss": 0.1488, "step": 20840 }, { "epoch": 0.7576858783341812, "grad_norm": 13.445122718811035, "learning_rate": 4.9730514516856996e-05, "loss": 0.2154, "step": 20850 }, { "epoch": 0.7580492768369794, "grad_norm": 1.0712549686431885, "learning_rate": 4.972975409627214e-05, "loss": 0.144, "step": 20860 }, { "epoch": 0.7584126753397776, "grad_norm": 0.6894069314002991, "learning_rate": 4.972899261017017e-05, "loss": 0.1612, "step": 20870 }, { "epoch": 0.7587760738425757, "grad_norm": 2.059844970703125, "learning_rate": 4.9728230058583893e-05, "loss": 0.1664, "step": 20880 }, { "epoch": 0.7591394723453739, "grad_norm": 2.0392911434173584, "learning_rate": 4.972746644154616e-05, "loss": 0.1991, "step": 20890 }, { "epoch": 0.7595028708481721, "grad_norm": 2.9800570011138916, "learning_rate": 4.972670175908989e-05, "loss": 0.2725, "step": 20900 }, { "epoch": 0.7598662693509702, "grad_norm": 2.390784502029419, "learning_rate": 4.972593601124801e-05, "loss": 0.3158, "step": 20910 }, { "epoch": 0.7602296678537684, "grad_norm": 6.595739364624023, "learning_rate": 4.972516919805352e-05, "loss": 0.1658, "step": 20920 }, { "epoch": 0.7605930663565666, "grad_norm": 2.2043120861053467, "learning_rate": 4.972440131953947e-05, "loss": 0.163, "step": 20930 }, { "epoch": 0.7609564648593647, "grad_norm": 0.9223461747169495, "learning_rate": 4.972363237573894e-05, "loss": 0.1276, "step": 20940 }, { "epoch": 0.761319863362163, "grad_norm": 12.165254592895508, "learning_rate": 4.972286236668505e-05, "loss": 0.2105, "step": 20950 }, { "epoch": 0.7616832618649612, "grad_norm": 1.2093875408172607, "learning_rate": 4.9722091292410984e-05, "loss": 0.1697, "step": 20960 }, { "epoch": 0.7620466603677593, "grad_norm": 0.8847984075546265, "learning_rate": 4.9721396414828535e-05, "loss": 3.043, "step": 20970 }, { "epoch": 0.7624100588705575, "grad_norm": 1.6682274341583252, "learning_rate": 4.9720623316727705e-05, "loss": 0.1841, "step": 20980 }, { "epoch": 0.7627734573733557, "grad_norm": 1.2780869007110596, "learning_rate": 4.971984915350317e-05, "loss": 0.1412, "step": 20990 }, { "epoch": 0.7631368558761538, "grad_norm": 36.68233108520508, "learning_rate": 4.97190739251883e-05, "loss": 0.2059, "step": 21000 }, { "epoch": 0.7631368558761538, "eval_loss": 0.3906314969062805, "eval_runtime": 180.3915, "eval_samples_per_second": 41.1, "eval_steps_per_second": 5.139, "eval_wer": 0.21157441864686768, "step": 21000 }, { "epoch": 0.763500254378952, "grad_norm": 24.036775588989258, "learning_rate": 4.971829763181647e-05, "loss": 0.3942, "step": 21010 }, { "epoch": 0.7638636528817502, "grad_norm": 1.6546601057052612, "learning_rate": 4.971752027342115e-05, "loss": 0.1555, "step": 21020 }, { "epoch": 0.7642270513845483, "grad_norm": 3.100032091140747, "learning_rate": 4.971674185003583e-05, "loss": 0.1917, "step": 21030 }, { "epoch": 0.7645904498873465, "grad_norm": 3.2824084758758545, "learning_rate": 4.9715962361694045e-05, "loss": 0.1744, "step": 21040 }, { "epoch": 0.7649538483901447, "grad_norm": 7.680720329284668, "learning_rate": 4.9715181808429376e-05, "loss": 0.2567, "step": 21050 }, { "epoch": 0.7653172468929428, "grad_norm": 1.5478154420852661, "learning_rate": 4.971440019027547e-05, "loss": 0.1949, "step": 21060 }, { "epoch": 0.765680645395741, "grad_norm": 1.1294565200805664, "learning_rate": 4.971361750726598e-05, "loss": 0.1546, "step": 21070 }, { "epoch": 0.7660440438985391, "grad_norm": 3.339749813079834, "learning_rate": 4.971283375943465e-05, "loss": 0.1784, "step": 21080 }, { "epoch": 0.7664074424013373, "grad_norm": 1.9784200191497803, "learning_rate": 4.9712048946815244e-05, "loss": 0.8969, "step": 21090 }, { "epoch": 0.7667708409041355, "grad_norm": 13.550655364990234, "learning_rate": 4.971126306944157e-05, "loss": 0.2037, "step": 21100 }, { "epoch": 0.7671342394069336, "grad_norm": 60.52021408081055, "learning_rate": 4.971047612734749e-05, "loss": 1.0649, "step": 21110 }, { "epoch": 0.7674976379097318, "grad_norm": 1.7544801235198975, "learning_rate": 4.970968812056693e-05, "loss": 0.1619, "step": 21120 }, { "epoch": 0.76786103641253, "grad_norm": 2.0749471187591553, "learning_rate": 4.970889904913382e-05, "loss": 0.1934, "step": 21130 }, { "epoch": 0.7682244349153281, "grad_norm": 2.33097767829895, "learning_rate": 4.970810891308215e-05, "loss": 0.3121, "step": 21140 }, { "epoch": 0.7685878334181263, "grad_norm": 3.5586440563201904, "learning_rate": 4.9707317712445996e-05, "loss": 0.2198, "step": 21150 }, { "epoch": 0.7689512319209245, "grad_norm": 1.7430351972579956, "learning_rate": 4.970652544725942e-05, "loss": 0.1884, "step": 21160 }, { "epoch": 0.7693146304237226, "grad_norm": 1.2475924491882324, "learning_rate": 4.9705732117556574e-05, "loss": 0.183, "step": 21170 }, { "epoch": 0.7696780289265208, "grad_norm": 1.369491457939148, "learning_rate": 4.970493772337164e-05, "loss": 0.1854, "step": 21180 }, { "epoch": 0.770041427429319, "grad_norm": 1.8093339204788208, "learning_rate": 4.970414226473883e-05, "loss": 0.1389, "step": 21190 }, { "epoch": 0.7704048259321171, "grad_norm": 15.3746919631958, "learning_rate": 4.9703345741692425e-05, "loss": 0.2603, "step": 21200 }, { "epoch": 0.7707682244349153, "grad_norm": 0.9604819416999817, "learning_rate": 4.970254815426675e-05, "loss": 0.1663, "step": 21210 }, { "epoch": 0.7711316229377135, "grad_norm": 1.3457413911819458, "learning_rate": 4.970174950249617e-05, "loss": 0.1784, "step": 21220 }, { "epoch": 0.7714950214405116, "grad_norm": 3.19975209236145, "learning_rate": 4.970094978641509e-05, "loss": 0.2369, "step": 21230 }, { "epoch": 0.7718584199433098, "grad_norm": 1.4974329471588135, "learning_rate": 4.970014900605797e-05, "loss": 0.1553, "step": 21240 }, { "epoch": 0.772221818446108, "grad_norm": 6.426448345184326, "learning_rate": 4.969934716145932e-05, "loss": 0.1848, "step": 21250 }, { "epoch": 0.7725852169489061, "grad_norm": 4.081672668457031, "learning_rate": 4.969854425265368e-05, "loss": 0.2135, "step": 21260 }, { "epoch": 0.7729486154517043, "grad_norm": 0.7796603441238403, "learning_rate": 4.9697740279675635e-05, "loss": 0.2853, "step": 21270 }, { "epoch": 0.7733120139545026, "grad_norm": 1.2303035259246826, "learning_rate": 4.969693524255984e-05, "loss": 0.5319, "step": 21280 }, { "epoch": 0.7736754124573006, "grad_norm": 0.9134958386421204, "learning_rate": 4.9696129141340986e-05, "loss": 0.1789, "step": 21290 }, { "epoch": 0.7740388109600989, "grad_norm": 1.8099846839904785, "learning_rate": 4.969532197605379e-05, "loss": 0.1967, "step": 21300 }, { "epoch": 0.7744022094628971, "grad_norm": 3.75593900680542, "learning_rate": 4.969451374673304e-05, "loss": 0.1908, "step": 21310 }, { "epoch": 0.7747656079656952, "grad_norm": 2.851921319961548, "learning_rate": 4.969370445341355e-05, "loss": 0.1616, "step": 21320 }, { "epoch": 0.7751290064684934, "grad_norm": 2.978349447250366, "learning_rate": 4.96928940961302e-05, "loss": 0.1682, "step": 21330 }, { "epoch": 0.7754924049712916, "grad_norm": 2.945326089859009, "learning_rate": 4.96920826749179e-05, "loss": 0.1897, "step": 21340 }, { "epoch": 0.7758558034740897, "grad_norm": 5.529159069061279, "learning_rate": 4.9691270189811614e-05, "loss": 0.2351, "step": 21350 }, { "epoch": 0.7762192019768879, "grad_norm": 0.816582441329956, "learning_rate": 4.969045664084634e-05, "loss": 0.255, "step": 21360 }, { "epoch": 0.776582600479686, "grad_norm": 3.373413324356079, "learning_rate": 4.968964202805715e-05, "loss": 0.165, "step": 21370 }, { "epoch": 0.7769459989824842, "grad_norm": 1.4986653327941895, "learning_rate": 4.968882635147912e-05, "loss": 0.1803, "step": 21380 }, { "epoch": 0.7773093974852824, "grad_norm": 4.049030303955078, "learning_rate": 4.968800961114741e-05, "loss": 0.2312, "step": 21390 }, { "epoch": 0.7776727959880805, "grad_norm": 1.8616725206375122, "learning_rate": 4.968719180709721e-05, "loss": 0.2038, "step": 21400 }, { "epoch": 0.7780361944908787, "grad_norm": 0.7410339117050171, "learning_rate": 4.968637293936374e-05, "loss": 0.1736, "step": 21410 }, { "epoch": 0.7783995929936769, "grad_norm": 0.9004227519035339, "learning_rate": 4.968555300798231e-05, "loss": 0.6926, "step": 21420 }, { "epoch": 0.778762991496475, "grad_norm": 1.9912917613983154, "learning_rate": 4.968473201298822e-05, "loss": 0.183, "step": 21430 }, { "epoch": 0.7791263899992732, "grad_norm": 1.5098110437393188, "learning_rate": 4.968390995441686e-05, "loss": 0.1555, "step": 21440 }, { "epoch": 0.7794897885020714, "grad_norm": 1.5687317848205566, "learning_rate": 4.9683086832303655e-05, "loss": 0.199, "step": 21450 }, { "epoch": 0.7798531870048695, "grad_norm": 1.456758975982666, "learning_rate": 4.9682262646684054e-05, "loss": 0.1573, "step": 21460 }, { "epoch": 0.7802165855076677, "grad_norm": 1.152894377708435, "learning_rate": 4.9681437397593575e-05, "loss": 0.136, "step": 21470 }, { "epoch": 0.7805799840104659, "grad_norm": 6.458597183227539, "learning_rate": 4.968061108506777e-05, "loss": 0.2111, "step": 21480 }, { "epoch": 0.780943382513264, "grad_norm": 1.3398655652999878, "learning_rate": 4.967978370914226e-05, "loss": 0.1785, "step": 21490 }, { "epoch": 0.7813067810160622, "grad_norm": 12.363832473754883, "learning_rate": 4.967895526985267e-05, "loss": 0.217, "step": 21500 }, { "epoch": 0.7816701795188604, "grad_norm": 3.800936698913574, "learning_rate": 4.967812576723471e-05, "loss": 0.1533, "step": 21510 }, { "epoch": 0.7820335780216585, "grad_norm": 0.9531782865524292, "learning_rate": 4.967729520132411e-05, "loss": 1.353, "step": 21520 }, { "epoch": 0.7823969765244567, "grad_norm": 1.3066377639770508, "learning_rate": 4.967646357215667e-05, "loss": 0.1338, "step": 21530 }, { "epoch": 0.7827603750272549, "grad_norm": 1.1814554929733276, "learning_rate": 4.967563087976821e-05, "loss": 0.1735, "step": 21540 }, { "epoch": 0.783123773530053, "grad_norm": 4.6233367919921875, "learning_rate": 4.967479712419461e-05, "loss": 0.2266, "step": 21550 }, { "epoch": 0.7834871720328512, "grad_norm": 1.366377353668213, "learning_rate": 4.96739623054718e-05, "loss": 0.1595, "step": 21560 }, { "epoch": 0.7838505705356494, "grad_norm": 2.0722217559814453, "learning_rate": 4.967312642363574e-05, "loss": 0.1721, "step": 21570 }, { "epoch": 0.7842139690384475, "grad_norm": 2.186340570449829, "learning_rate": 4.967228947872245e-05, "loss": 0.1653, "step": 21580 }, { "epoch": 0.7845773675412457, "grad_norm": 2.4222512245178223, "learning_rate": 4.9671451470767996e-05, "loss": 0.1446, "step": 21590 }, { "epoch": 0.784940766044044, "grad_norm": 62.15577697753906, "learning_rate": 4.9670612399808467e-05, "loss": 0.2911, "step": 21600 }, { "epoch": 0.784940766044044, "eval_loss": 0.3627218008041382, "eval_runtime": 179.8971, "eval_samples_per_second": 41.212, "eval_steps_per_second": 5.153, "eval_wer": 0.21580409170947773, "step": 21600 }, { "epoch": 0.785304164546842, "grad_norm": 1.236609935760498, "learning_rate": 4.9669772265880044e-05, "loss": 0.1417, "step": 21610 }, { "epoch": 0.7856675630496402, "grad_norm": 1.2447402477264404, "learning_rate": 4.96689310690189e-05, "loss": 0.1508, "step": 21620 }, { "epoch": 0.7860309615524385, "grad_norm": 4.567975997924805, "learning_rate": 4.966808880926129e-05, "loss": 0.3503, "step": 21630 }, { "epoch": 0.7863943600552366, "grad_norm": 0.9699403047561646, "learning_rate": 4.96672454866435e-05, "loss": 0.1615, "step": 21640 }, { "epoch": 0.7867577585580348, "grad_norm": 11.004621505737305, "learning_rate": 4.966640110120187e-05, "loss": 0.2604, "step": 21650 }, { "epoch": 0.7871211570608329, "grad_norm": 1.3322606086730957, "learning_rate": 4.9665555652972784e-05, "loss": 0.1958, "step": 21660 }, { "epoch": 0.7874845555636311, "grad_norm": 1.0020729303359985, "learning_rate": 4.966470914199266e-05, "loss": 0.1207, "step": 21670 }, { "epoch": 0.7878479540664293, "grad_norm": 3.457019567489624, "learning_rate": 4.9663861568297976e-05, "loss": 0.3774, "step": 21680 }, { "epoch": 0.7882113525692274, "grad_norm": 2.4993362426757812, "learning_rate": 4.9663012931925254e-05, "loss": 0.1537, "step": 21690 }, { "epoch": 0.7885747510720256, "grad_norm": 11.104598999023438, "learning_rate": 4.966216323291106e-05, "loss": 0.2472, "step": 21700 }, { "epoch": 0.7889381495748238, "grad_norm": 1.5027676820755005, "learning_rate": 4.9661312471291996e-05, "loss": 0.154, "step": 21710 }, { "epoch": 0.7893015480776219, "grad_norm": 1.1929068565368652, "learning_rate": 4.9660460647104726e-05, "loss": 0.1416, "step": 21720 }, { "epoch": 0.7896649465804201, "grad_norm": 17.008617401123047, "learning_rate": 4.965960776038594e-05, "loss": 0.3858, "step": 21730 }, { "epoch": 0.7900283450832183, "grad_norm": 1.6043013334274292, "learning_rate": 4.96587538111724e-05, "loss": 0.1624, "step": 21740 }, { "epoch": 0.7903917435860164, "grad_norm": 10.960922241210938, "learning_rate": 4.96578987995009e-05, "loss": 0.2034, "step": 21750 }, { "epoch": 0.7907551420888146, "grad_norm": 1.4807969331741333, "learning_rate": 4.965704272540826e-05, "loss": 0.1491, "step": 21760 }, { "epoch": 0.7911185405916128, "grad_norm": 0.9724571108818054, "learning_rate": 4.965618558893139e-05, "loss": 0.1455, "step": 21770 }, { "epoch": 0.7914819390944109, "grad_norm": 2.6035313606262207, "learning_rate": 4.965532739010722e-05, "loss": 0.1696, "step": 21780 }, { "epoch": 0.7918453375972091, "grad_norm": 0.7998749017715454, "learning_rate": 4.9654468128972695e-05, "loss": 0.1549, "step": 21790 }, { "epoch": 0.7922087361000073, "grad_norm": 14.13917350769043, "learning_rate": 4.965360780556487e-05, "loss": 0.2124, "step": 21800 }, { "epoch": 0.7925721346028054, "grad_norm": 1.88883638381958, "learning_rate": 4.9652746419920804e-05, "loss": 0.1475, "step": 21810 }, { "epoch": 0.7929355331056036, "grad_norm": 1.5585650205612183, "learning_rate": 4.965188397207761e-05, "loss": 0.1534, "step": 21820 }, { "epoch": 0.7932989316084018, "grad_norm": 2.6418206691741943, "learning_rate": 4.965102046207244e-05, "loss": 0.1608, "step": 21830 }, { "epoch": 0.7936623301111999, "grad_norm": 1.1672085523605347, "learning_rate": 4.965015588994251e-05, "loss": 0.1596, "step": 21840 }, { "epoch": 0.7940257286139981, "grad_norm": 3.009610652923584, "learning_rate": 4.964929025572507e-05, "loss": 0.1805, "step": 21850 }, { "epoch": 0.7943891271167963, "grad_norm": 1.8774985074996948, "learning_rate": 4.964842355945742e-05, "loss": 0.1583, "step": 21860 }, { "epoch": 0.7947525256195944, "grad_norm": 1.1219382286071777, "learning_rate": 4.964755580117689e-05, "loss": 0.1524, "step": 21870 }, { "epoch": 0.7951159241223926, "grad_norm": 6.0511627197265625, "learning_rate": 4.964668698092088e-05, "loss": 0.2349, "step": 21880 }, { "epoch": 0.7954793226251908, "grad_norm": 3.4487464427948, "learning_rate": 4.9645817098726824e-05, "loss": 0.1915, "step": 21890 }, { "epoch": 0.7958427211279889, "grad_norm": 4.096559524536133, "learning_rate": 4.9644946154632196e-05, "loss": 0.2067, "step": 21900 }, { "epoch": 0.7962061196307871, "grad_norm": 4.144627571105957, "learning_rate": 4.9644074148674526e-05, "loss": 0.1564, "step": 21910 }, { "epoch": 0.7965695181335853, "grad_norm": 1.3851386308670044, "learning_rate": 4.9643201080891384e-05, "loss": 0.1656, "step": 21920 }, { "epoch": 0.7969329166363834, "grad_norm": 1.3050576448440552, "learning_rate": 4.9642326951320384e-05, "loss": 0.1555, "step": 21930 }, { "epoch": 0.7972963151391816, "grad_norm": 1.578134298324585, "learning_rate": 4.96414517599992e-05, "loss": 0.1637, "step": 21940 }, { "epoch": 0.7976597136419797, "grad_norm": 10.813237190246582, "learning_rate": 4.9640575506965535e-05, "loss": 0.3143, "step": 21950 }, { "epoch": 0.798023112144778, "grad_norm": 0.7118828892707825, "learning_rate": 4.963969819225713e-05, "loss": 0.1581, "step": 21960 }, { "epoch": 0.7983865106475762, "grad_norm": 1.389856219291687, "learning_rate": 4.963881981591182e-05, "loss": 0.1466, "step": 21970 }, { "epoch": 0.7987499091503742, "grad_norm": 1.1921494007110596, "learning_rate": 4.963794037796741e-05, "loss": 0.1604, "step": 21980 }, { "epoch": 0.7991133076531725, "grad_norm": 4.355441093444824, "learning_rate": 4.963705987846182e-05, "loss": 0.1792, "step": 21990 }, { "epoch": 0.7994767061559707, "grad_norm": 8.20235824584961, "learning_rate": 4.963617831743298e-05, "loss": 0.2314, "step": 22000 }, { "epoch": 0.7998401046587688, "grad_norm": 1.3720426559448242, "learning_rate": 4.963529569491887e-05, "loss": 0.1378, "step": 22010 }, { "epoch": 0.800203503161567, "grad_norm": 1.490679383277893, "learning_rate": 4.963441201095752e-05, "loss": 0.1505, "step": 22020 }, { "epoch": 0.8005669016643652, "grad_norm": 1.576416254043579, "learning_rate": 4.963352726558701e-05, "loss": 0.1379, "step": 22030 }, { "epoch": 0.8009303001671633, "grad_norm": 1.547780156135559, "learning_rate": 4.9632641458845454e-05, "loss": 0.1584, "step": 22040 }, { "epoch": 0.8012936986699615, "grad_norm": 41.95133972167969, "learning_rate": 4.963175459077102e-05, "loss": 0.6762, "step": 22050 }, { "epoch": 0.8016570971727597, "grad_norm": 0.8984355330467224, "learning_rate": 4.963086666140192e-05, "loss": 0.1513, "step": 22060 }, { "epoch": 0.8020204956755578, "grad_norm": 1.6865235567092896, "learning_rate": 4.9629977670776404e-05, "loss": 0.1659, "step": 22070 }, { "epoch": 0.802383894178356, "grad_norm": 5.291965007781982, "learning_rate": 4.96290876189328e-05, "loss": 0.1735, "step": 22080 }, { "epoch": 0.8027472926811542, "grad_norm": 0.9124179482460022, "learning_rate": 4.962819650590943e-05, "loss": 0.163, "step": 22090 }, { "epoch": 0.8031106911839523, "grad_norm": 5.151334762573242, "learning_rate": 4.9627304331744705e-05, "loss": 0.2997, "step": 22100 }, { "epoch": 0.8034740896867505, "grad_norm": 0.7093039155006409, "learning_rate": 4.9626411096477066e-05, "loss": 0.1297, "step": 22110 }, { "epoch": 0.8038374881895487, "grad_norm": 0.7643496990203857, "learning_rate": 4.962551680014499e-05, "loss": 0.1568, "step": 22120 }, { "epoch": 0.8042008866923468, "grad_norm": 2.0619888305664062, "learning_rate": 4.9624621442787005e-05, "loss": 0.1685, "step": 22130 }, { "epoch": 0.804564285195145, "grad_norm": 1.3836963176727295, "learning_rate": 4.9623725024441704e-05, "loss": 0.1597, "step": 22140 }, { "epoch": 0.8049276836979432, "grad_norm": 10.014172554016113, "learning_rate": 4.96228275451477e-05, "loss": 0.2371, "step": 22150 }, { "epoch": 0.8052910822007413, "grad_norm": 0.8201650381088257, "learning_rate": 4.962192900494367e-05, "loss": 0.1457, "step": 22160 }, { "epoch": 0.8056544807035395, "grad_norm": 2.9909164905548096, "learning_rate": 4.962102940386832e-05, "loss": 0.1584, "step": 22170 }, { "epoch": 0.8060178792063377, "grad_norm": 1.8986990451812744, "learning_rate": 4.9620128741960414e-05, "loss": 0.1521, "step": 22180 }, { "epoch": 0.8063812777091358, "grad_norm": 1.2521679401397705, "learning_rate": 4.9619227019258766e-05, "loss": 0.1398, "step": 22190 }, { "epoch": 0.806744676211934, "grad_norm": 9.087230682373047, "learning_rate": 4.9618324235802214e-05, "loss": 0.2414, "step": 22200 }, { "epoch": 0.806744676211934, "eval_loss": 0.3814217448234558, "eval_runtime": 180.9296, "eval_samples_per_second": 40.977, "eval_steps_per_second": 5.124, "eval_wer": 0.21859059306188394, "step": 22200 }, { "epoch": 0.8071080747147322, "grad_norm": 1.3065155744552612, "learning_rate": 4.9617420391629666e-05, "loss": 0.1382, "step": 22210 }, { "epoch": 0.8074714732175303, "grad_norm": 1.0691299438476562, "learning_rate": 4.961651548678006e-05, "loss": 0.1692, "step": 22220 }, { "epoch": 0.8078348717203285, "grad_norm": 2.515131711959839, "learning_rate": 4.961560952129239e-05, "loss": 0.1719, "step": 22230 }, { "epoch": 0.8081982702231267, "grad_norm": 1.3650884628295898, "learning_rate": 4.9614702495205686e-05, "loss": 0.1918, "step": 22240 }, { "epoch": 0.8085616687259248, "grad_norm": 4.730445384979248, "learning_rate": 4.961379440855903e-05, "loss": 0.2002, "step": 22250 }, { "epoch": 0.808925067228723, "grad_norm": 1.0421544313430786, "learning_rate": 4.9612885261391555e-05, "loss": 0.1544, "step": 22260 }, { "epoch": 0.8092884657315211, "grad_norm": 1.1957643032073975, "learning_rate": 4.961197505374242e-05, "loss": 0.1471, "step": 22270 }, { "epoch": 0.8096518642343193, "grad_norm": 2.936429977416992, "learning_rate": 4.961106378565086e-05, "loss": 0.2068, "step": 22280 }, { "epoch": 0.8100152627371175, "grad_norm": 2.0803070068359375, "learning_rate": 4.961015145715612e-05, "loss": 0.1496, "step": 22290 }, { "epoch": 0.8103786612399156, "grad_norm": 10.564451217651367, "learning_rate": 4.960923806829752e-05, "loss": 0.2549, "step": 22300 }, { "epoch": 0.8107420597427138, "grad_norm": 1.0569120645523071, "learning_rate": 4.9608323619114406e-05, "loss": 0.1624, "step": 22310 }, { "epoch": 0.8111054582455121, "grad_norm": 1.4505226612091064, "learning_rate": 4.960740810964619e-05, "loss": 0.1523, "step": 22320 }, { "epoch": 0.8114688567483102, "grad_norm": 5.100767135620117, "learning_rate": 4.960649153993231e-05, "loss": 0.1562, "step": 22330 }, { "epoch": 0.8118322552511084, "grad_norm": 2.2787342071533203, "learning_rate": 4.960557391001226e-05, "loss": 0.1691, "step": 22340 }, { "epoch": 0.8121956537539066, "grad_norm": 15.405048370361328, "learning_rate": 4.960465521992558e-05, "loss": 0.2542, "step": 22350 }, { "epoch": 0.8125590522567047, "grad_norm": 0.7388777732849121, "learning_rate": 4.9603735469711845e-05, "loss": 0.1522, "step": 22360 }, { "epoch": 0.8129224507595029, "grad_norm": 0.9490914344787598, "learning_rate": 4.960281465941069e-05, "loss": 0.1317, "step": 22370 }, { "epoch": 0.8132858492623011, "grad_norm": 2.281085252761841, "learning_rate": 4.960189278906179e-05, "loss": 0.1503, "step": 22380 }, { "epoch": 0.8136492477650992, "grad_norm": 0.9328985810279846, "learning_rate": 4.960096985870486e-05, "loss": 0.1556, "step": 22390 }, { "epoch": 0.8140126462678974, "grad_norm": 4.4524617195129395, "learning_rate": 4.960004586837967e-05, "loss": 0.2387, "step": 22400 }, { "epoch": 0.8143760447706956, "grad_norm": 1.5577040910720825, "learning_rate": 4.959912081812603e-05, "loss": 0.1557, "step": 22410 }, { "epoch": 0.8147394432734937, "grad_norm": 2.358896493911743, "learning_rate": 4.95981947079838e-05, "loss": 0.2016, "step": 22420 }, { "epoch": 0.8151028417762919, "grad_norm": 2.1001386642456055, "learning_rate": 4.9597267537992885e-05, "loss": 0.1587, "step": 22430 }, { "epoch": 0.8154662402790901, "grad_norm": 2.7561607360839844, "learning_rate": 4.959633930819323e-05, "loss": 0.1616, "step": 22440 }, { "epoch": 0.8158296387818882, "grad_norm": 4.204514980316162, "learning_rate": 4.959541001862482e-05, "loss": 0.6089, "step": 22450 }, { "epoch": 0.8161930372846864, "grad_norm": 1.3738398551940918, "learning_rate": 4.959447966932771e-05, "loss": 0.1756, "step": 22460 }, { "epoch": 0.8165564357874846, "grad_norm": 0.705806314945221, "learning_rate": 4.959354826034197e-05, "loss": 0.1213, "step": 22470 }, { "epoch": 0.8169198342902827, "grad_norm": 2.053788661956787, "learning_rate": 4.9592615791707755e-05, "loss": 0.1765, "step": 22480 }, { "epoch": 0.8172832327930809, "grad_norm": 2.0120911598205566, "learning_rate": 4.959168226346521e-05, "loss": 0.1444, "step": 22490 }, { "epoch": 0.8176466312958791, "grad_norm": 6.552361011505127, "learning_rate": 4.959074767565458e-05, "loss": 0.2201, "step": 22500 }, { "epoch": 0.8180100297986772, "grad_norm": 1.3007264137268066, "learning_rate": 4.958981202831613e-05, "loss": 0.1488, "step": 22510 }, { "epoch": 0.8183734283014754, "grad_norm": 1.7885551452636719, "learning_rate": 4.958887532149016e-05, "loss": 2.6491, "step": 22520 }, { "epoch": 0.8187368268042736, "grad_norm": 1.7092806100845337, "learning_rate": 4.9587937555217054e-05, "loss": 0.1946, "step": 22530 }, { "epoch": 0.8191002253070717, "grad_norm": 2.56215238571167, "learning_rate": 4.958699872953719e-05, "loss": 0.1676, "step": 22540 }, { "epoch": 0.8194636238098699, "grad_norm": 2.085753917694092, "learning_rate": 4.958605884449104e-05, "loss": 0.2038, "step": 22550 }, { "epoch": 0.819827022312668, "grad_norm": 0.8225610852241516, "learning_rate": 4.958511790011909e-05, "loss": 0.5185, "step": 22560 }, { "epoch": 0.8201904208154662, "grad_norm": 1.6775872707366943, "learning_rate": 4.9584175896461884e-05, "loss": 0.17, "step": 22570 }, { "epoch": 0.8205538193182644, "grad_norm": 3.4285826683044434, "learning_rate": 4.958323283356001e-05, "loss": 0.164, "step": 22580 }, { "epoch": 0.8209172178210625, "grad_norm": 1.892842411994934, "learning_rate": 4.95822887114541e-05, "loss": 1.2783, "step": 22590 }, { "epoch": 0.8212806163238607, "grad_norm": 4.959444522857666, "learning_rate": 4.9581343530184834e-05, "loss": 0.2062, "step": 22600 }, { "epoch": 0.8216440148266589, "grad_norm": 2.4584267139434814, "learning_rate": 4.958039728979293e-05, "loss": 0.1443, "step": 22610 }, { "epoch": 0.822007413329457, "grad_norm": 1.118804693222046, "learning_rate": 4.957944999031917e-05, "loss": 0.16, "step": 22620 }, { "epoch": 0.8223708118322552, "grad_norm": 1.5434421300888062, "learning_rate": 4.9578501631804365e-05, "loss": 0.2104, "step": 22630 }, { "epoch": 0.8227342103350535, "grad_norm": 1.3116744756698608, "learning_rate": 4.9577552214289374e-05, "loss": 0.1326, "step": 22640 }, { "epoch": 0.8230976088378515, "grad_norm": 11.34653377532959, "learning_rate": 4.95766017378151e-05, "loss": 0.2231, "step": 22650 }, { "epoch": 0.8234610073406498, "grad_norm": 1.0379194021224976, "learning_rate": 4.957565020242251e-05, "loss": 0.1805, "step": 22660 }, { "epoch": 0.823824405843448, "grad_norm": 1.8218019008636475, "learning_rate": 4.957469760815259e-05, "loss": 0.1287, "step": 22670 }, { "epoch": 0.8241878043462461, "grad_norm": 1.1962164640426636, "learning_rate": 4.957374395504638e-05, "loss": 0.4115, "step": 22680 }, { "epoch": 0.8245512028490443, "grad_norm": 1.9947481155395508, "learning_rate": 4.957278924314499e-05, "loss": 0.1407, "step": 22690 }, { "epoch": 0.8249146013518425, "grad_norm": 25.343172073364258, "learning_rate": 4.957183347248953e-05, "loss": 0.4247, "step": 22700 }, { "epoch": 0.8252779998546406, "grad_norm": 1.4444775581359863, "learning_rate": 4.95708766431212e-05, "loss": 0.1641, "step": 22710 }, { "epoch": 0.8256413983574388, "grad_norm": 1.621640920639038, "learning_rate": 4.9569918755081216e-05, "loss": 0.1289, "step": 22720 }, { "epoch": 0.826004796860237, "grad_norm": 1.018471360206604, "learning_rate": 4.9568959808410854e-05, "loss": 0.1694, "step": 22730 }, { "epoch": 0.8263681953630351, "grad_norm": 3.1913223266601562, "learning_rate": 4.9567999803151424e-05, "loss": 0.1898, "step": 22740 }, { "epoch": 0.8267315938658333, "grad_norm": 8.095772743225098, "learning_rate": 4.956703873934431e-05, "loss": 0.2246, "step": 22750 }, { "epoch": 0.8270949923686315, "grad_norm": 1.738887906074524, "learning_rate": 4.956607661703089e-05, "loss": 0.1678, "step": 22760 }, { "epoch": 0.8274583908714296, "grad_norm": 0.9688615202903748, "learning_rate": 4.9565113436252644e-05, "loss": 0.1341, "step": 22770 }, { "epoch": 0.8278217893742278, "grad_norm": 2.2478010654449463, "learning_rate": 4.956414919705106e-05, "loss": 0.1823, "step": 22780 }, { "epoch": 0.828185187877026, "grad_norm": 1.6718928813934326, "learning_rate": 4.956318389946769e-05, "loss": 0.1543, "step": 22790 }, { "epoch": 0.8285485863798241, "grad_norm": 5.168727874755859, "learning_rate": 4.956221754354412e-05, "loss": 0.1795, "step": 22800 }, { "epoch": 0.8285485863798241, "eval_loss": 0.3908107876777649, "eval_runtime": 180.5873, "eval_samples_per_second": 41.055, "eval_steps_per_second": 5.133, "eval_wer": 0.20674569317624847, "step": 22800 }, { "epoch": 0.8289119848826223, "grad_norm": 0.9549854397773743, "learning_rate": 4.956125012932199e-05, "loss": 0.1559, "step": 22810 }, { "epoch": 0.8292753833854205, "grad_norm": 3.2057716846466064, "learning_rate": 4.9560281656842977e-05, "loss": 0.1675, "step": 22820 }, { "epoch": 0.8296387818882186, "grad_norm": 1.7775851488113403, "learning_rate": 4.955931212614882e-05, "loss": 0.1997, "step": 22830 }, { "epoch": 0.8300021803910168, "grad_norm": 1.7028132677078247, "learning_rate": 4.9558341537281274e-05, "loss": 0.1505, "step": 22840 }, { "epoch": 0.8303655788938149, "grad_norm": 2.7027060985565186, "learning_rate": 4.955736989028218e-05, "loss": 0.2009, "step": 22850 }, { "epoch": 0.8307289773966131, "grad_norm": 1.8419814109802246, "learning_rate": 4.955639718519339e-05, "loss": 0.1355, "step": 22860 }, { "epoch": 0.8310923758994113, "grad_norm": 0.8633226156234741, "learning_rate": 4.955542342205682e-05, "loss": 0.178, "step": 22870 }, { "epoch": 0.8314557744022094, "grad_norm": 6.966017723083496, "learning_rate": 4.955444860091442e-05, "loss": 0.1885, "step": 22880 }, { "epoch": 0.8318191729050076, "grad_norm": 1.9565801620483398, "learning_rate": 4.955347272180819e-05, "loss": 0.1485, "step": 22890 }, { "epoch": 0.8321825714078058, "grad_norm": 22.704593658447266, "learning_rate": 4.9552495784780196e-05, "loss": 0.2294, "step": 22900 }, { "epoch": 0.8325459699106039, "grad_norm": 2.0515658855438232, "learning_rate": 4.95515177898725e-05, "loss": 0.166, "step": 22910 }, { "epoch": 0.8329093684134021, "grad_norm": 2.9277150630950928, "learning_rate": 4.9550538737127275e-05, "loss": 0.8898, "step": 22920 }, { "epoch": 0.8332727669162003, "grad_norm": 3.9280052185058594, "learning_rate": 4.9549558626586676e-05, "loss": 0.171, "step": 22930 }, { "epoch": 0.8336361654189984, "grad_norm": 2.5431272983551025, "learning_rate": 4.954857745829294e-05, "loss": 0.1539, "step": 22940 }, { "epoch": 0.8339995639217966, "grad_norm": 2.815434694290161, "learning_rate": 4.954759523228835e-05, "loss": 0.2126, "step": 22950 }, { "epoch": 0.8343629624245948, "grad_norm": 0.6958141922950745, "learning_rate": 4.9546611948615224e-05, "loss": 0.2069, "step": 22960 }, { "epoch": 0.8347263609273929, "grad_norm": 0.7068191766738892, "learning_rate": 4.9545627607315924e-05, "loss": 0.1287, "step": 22970 }, { "epoch": 0.8350897594301911, "grad_norm": 1.8746801614761353, "learning_rate": 4.954464220843287e-05, "loss": 0.1488, "step": 22980 }, { "epoch": 0.8354531579329894, "grad_norm": 1.5134693384170532, "learning_rate": 4.95436557520085e-05, "loss": 0.1337, "step": 22990 }, { "epoch": 0.8358165564357874, "grad_norm": 4.778042316436768, "learning_rate": 4.9542668238085344e-05, "loss": 0.2172, "step": 23000 }, { "epoch": 0.8361799549385857, "grad_norm": 1.074409008026123, "learning_rate": 4.9541679666705924e-05, "loss": 0.1696, "step": 23010 }, { "epoch": 0.8365433534413839, "grad_norm": 1.6725049018859863, "learning_rate": 4.954069003791286e-05, "loss": 0.136, "step": 23020 }, { "epoch": 0.836906751944182, "grad_norm": 3.194450616836548, "learning_rate": 4.953969935174877e-05, "loss": 0.2067, "step": 23030 }, { "epoch": 0.8372701504469802, "grad_norm": 7.7923150062561035, "learning_rate": 4.9538707608256345e-05, "loss": 0.1938, "step": 23040 }, { "epoch": 0.8376335489497784, "grad_norm": 8.767574310302734, "learning_rate": 4.953771480747833e-05, "loss": 0.2473, "step": 23050 }, { "epoch": 0.8379969474525765, "grad_norm": 1.3911685943603516, "learning_rate": 4.953672094945748e-05, "loss": 0.1497, "step": 23060 }, { "epoch": 0.8383603459553747, "grad_norm": 0.7775372266769409, "learning_rate": 4.953572603423662e-05, "loss": 0.7581, "step": 23070 }, { "epoch": 0.8387237444581729, "grad_norm": 2.6937413215637207, "learning_rate": 4.9534730061858634e-05, "loss": 0.1849, "step": 23080 }, { "epoch": 0.839087142960971, "grad_norm": 0.7375633716583252, "learning_rate": 4.953373303236642e-05, "loss": 0.1706, "step": 23090 }, { "epoch": 0.8394505414637692, "grad_norm": 3.070746421813965, "learning_rate": 4.953273494580295e-05, "loss": 0.2114, "step": 23100 }, { "epoch": 0.8398139399665674, "grad_norm": 0.7470118403434753, "learning_rate": 4.953173580221121e-05, "loss": 0.13, "step": 23110 }, { "epoch": 0.8401773384693655, "grad_norm": 1.040595531463623, "learning_rate": 4.953073560163426e-05, "loss": 0.2088, "step": 23120 }, { "epoch": 0.8405407369721637, "grad_norm": 3.9858949184417725, "learning_rate": 4.95297343441152e-05, "loss": 0.1528, "step": 23130 }, { "epoch": 0.8409041354749618, "grad_norm": 1.4031178951263428, "learning_rate": 4.952873202969716e-05, "loss": 2.5826, "step": 23140 }, { "epoch": 0.84126753397776, "grad_norm": 16.660646438598633, "learning_rate": 4.952772865842332e-05, "loss": 0.3101, "step": 23150 }, { "epoch": 0.8416309324805582, "grad_norm": 1.21910560131073, "learning_rate": 4.952672423033693e-05, "loss": 0.1326, "step": 23160 }, { "epoch": 0.8419943309833563, "grad_norm": 1.4494057893753052, "learning_rate": 4.952571874548126e-05, "loss": 0.1567, "step": 23170 }, { "epoch": 0.8423577294861545, "grad_norm": 1.1903733015060425, "learning_rate": 4.952471220389964e-05, "loss": 0.1537, "step": 23180 }, { "epoch": 0.8427211279889527, "grad_norm": 1.0293620824813843, "learning_rate": 4.9523704605635414e-05, "loss": 0.1695, "step": 23190 }, { "epoch": 0.8430845264917508, "grad_norm": 9.536385536193848, "learning_rate": 4.9522695950732025e-05, "loss": 0.2702, "step": 23200 }, { "epoch": 0.843447924994549, "grad_norm": 1.1565468311309814, "learning_rate": 4.9521686239232915e-05, "loss": 0.1452, "step": 23210 }, { "epoch": 0.8438113234973472, "grad_norm": 1.0805953741073608, "learning_rate": 4.9520675471181586e-05, "loss": 0.1478, "step": 23220 }, { "epoch": 0.8441747220001453, "grad_norm": 2.7216696739196777, "learning_rate": 4.95196636466216e-05, "loss": 0.1965, "step": 23230 }, { "epoch": 0.8445381205029435, "grad_norm": 2.2064578533172607, "learning_rate": 4.9518650765596564e-05, "loss": 0.213, "step": 23240 }, { "epoch": 0.8449015190057417, "grad_norm": 11.686285972595215, "learning_rate": 4.951763682815009e-05, "loss": 0.2929, "step": 23250 }, { "epoch": 0.8452649175085398, "grad_norm": 1.6271568536758423, "learning_rate": 4.9516621834325885e-05, "loss": 0.1406, "step": 23260 }, { "epoch": 0.845628316011338, "grad_norm": 2.791619300842285, "learning_rate": 4.951560578416767e-05, "loss": 0.1431, "step": 23270 }, { "epoch": 0.8459917145141362, "grad_norm": 1.9396895170211792, "learning_rate": 4.951458867771923e-05, "loss": 0.1516, "step": 23280 }, { "epoch": 0.8463551130169343, "grad_norm": 0.9364364147186279, "learning_rate": 4.951357051502439e-05, "loss": 0.1935, "step": 23290 }, { "epoch": 0.8467185115197325, "grad_norm": 2.275146007537842, "learning_rate": 4.9512551296127005e-05, "loss": 0.1832, "step": 23300 }, { "epoch": 0.8470819100225307, "grad_norm": 1.4089415073394775, "learning_rate": 4.951153102107101e-05, "loss": 0.1511, "step": 23310 }, { "epoch": 0.8474453085253288, "grad_norm": 1.2446107864379883, "learning_rate": 4.951050968990035e-05, "loss": 0.282, "step": 23320 }, { "epoch": 0.847808707028127, "grad_norm": 2.595438241958618, "learning_rate": 4.950948730265905e-05, "loss": 0.1643, "step": 23330 }, { "epoch": 0.8481721055309253, "grad_norm": 1.1884585618972778, "learning_rate": 4.950846385939114e-05, "loss": 0.1445, "step": 23340 }, { "epoch": 0.8485355040337234, "grad_norm": 33.609004974365234, "learning_rate": 4.9507439360140716e-05, "loss": 0.185, "step": 23350 }, { "epoch": 0.8488989025365216, "grad_norm": 0.573637068271637, "learning_rate": 4.950641380495194e-05, "loss": 0.1417, "step": 23360 }, { "epoch": 0.8492623010393198, "grad_norm": 1.1126424074172974, "learning_rate": 4.9505387193868975e-05, "loss": 0.1592, "step": 23370 }, { "epoch": 0.8496256995421179, "grad_norm": 2.466045379638672, "learning_rate": 4.9504359526936074e-05, "loss": 0.1507, "step": 23380 }, { "epoch": 0.8499890980449161, "grad_norm": 1.273472547531128, "learning_rate": 4.95033308041975e-05, "loss": 0.174, "step": 23390 }, { "epoch": 0.8503524965477143, "grad_norm": 5.497190475463867, "learning_rate": 4.9502301025697595e-05, "loss": 0.2269, "step": 23400 }, { "epoch": 0.8503524965477143, "eval_loss": 0.3661801218986511, "eval_runtime": 181.0852, "eval_samples_per_second": 40.942, "eval_steps_per_second": 5.119, "eval_wer": 0.198767404287763, "step": 23400 }, { "epoch": 0.8507158950505124, "grad_norm": 0.740798830986023, "learning_rate": 4.950127019148071e-05, "loss": 0.148, "step": 23410 }, { "epoch": 0.8510792935533106, "grad_norm": 1.7785030603408813, "learning_rate": 4.950023830159127e-05, "loss": 0.175, "step": 23420 }, { "epoch": 0.8514426920561087, "grad_norm": 0.7675313949584961, "learning_rate": 4.949920535607374e-05, "loss": 0.1635, "step": 23430 }, { "epoch": 0.8518060905589069, "grad_norm": 0.9880558252334595, "learning_rate": 4.9498171354972617e-05, "loss": 0.1732, "step": 23440 }, { "epoch": 0.8521694890617051, "grad_norm": 5.804686069488525, "learning_rate": 4.9497136298332454e-05, "loss": 0.2142, "step": 23450 }, { "epoch": 0.8525328875645032, "grad_norm": 1.063359022140503, "learning_rate": 4.949610018619785e-05, "loss": 0.1529, "step": 23460 }, { "epoch": 0.8528962860673014, "grad_norm": 1.9043885469436646, "learning_rate": 4.949506301861344e-05, "loss": 0.1633, "step": 23470 }, { "epoch": 0.8532596845700996, "grad_norm": 2.0380702018737793, "learning_rate": 4.9494024795623926e-05, "loss": 0.1595, "step": 23480 }, { "epoch": 0.8536230830728977, "grad_norm": 1.65935218334198, "learning_rate": 4.949298551727403e-05, "loss": 0.1526, "step": 23490 }, { "epoch": 0.8539864815756959, "grad_norm": 1.7575215101242065, "learning_rate": 4.9491945183608536e-05, "loss": 0.1924, "step": 23500 }, { "epoch": 0.8543498800784941, "grad_norm": 2.332193374633789, "learning_rate": 4.949090379467226e-05, "loss": 0.1536, "step": 23510 }, { "epoch": 0.8547132785812922, "grad_norm": 1.0475032329559326, "learning_rate": 4.948986135051009e-05, "loss": 0.1322, "step": 23520 }, { "epoch": 0.8550766770840904, "grad_norm": 3.1753509044647217, "learning_rate": 4.948881785116692e-05, "loss": 0.1457, "step": 23530 }, { "epoch": 0.8554400755868886, "grad_norm": 0.7468664646148682, "learning_rate": 4.948777329668772e-05, "loss": 0.1385, "step": 23540 }, { "epoch": 0.8558034740896867, "grad_norm": 6.77406120300293, "learning_rate": 4.9486727687117507e-05, "loss": 0.19, "step": 23550 }, { "epoch": 0.8561668725924849, "grad_norm": 1.6008226871490479, "learning_rate": 4.9485681022501316e-05, "loss": 0.1609, "step": 23560 }, { "epoch": 0.8565302710952831, "grad_norm": 1.1062623262405396, "learning_rate": 4.948463330288425e-05, "loss": 0.1624, "step": 23570 }, { "epoch": 0.8568936695980812, "grad_norm": 1.6599873304367065, "learning_rate": 4.948358452831145e-05, "loss": 0.1532, "step": 23580 }, { "epoch": 0.8572570681008794, "grad_norm": 1.264592170715332, "learning_rate": 4.9482534698828106e-05, "loss": 0.1696, "step": 23590 }, { "epoch": 0.8576204666036776, "grad_norm": 2.027796745300293, "learning_rate": 4.948148381447945e-05, "loss": 0.1913, "step": 23600 }, { "epoch": 0.8579838651064757, "grad_norm": 1.3213417530059814, "learning_rate": 4.948043187531076e-05, "loss": 0.1517, "step": 23610 }, { "epoch": 0.8583472636092739, "grad_norm": 1.6190669536590576, "learning_rate": 4.9479378881367366e-05, "loss": 0.1517, "step": 23620 }, { "epoch": 0.8587106621120721, "grad_norm": 5.381803512573242, "learning_rate": 4.947832483269464e-05, "loss": 0.1504, "step": 23630 }, { "epoch": 0.8590740606148702, "grad_norm": 3.4807474613189697, "learning_rate": 4.947726972933798e-05, "loss": 0.1887, "step": 23640 }, { "epoch": 0.8594374591176684, "grad_norm": 4.890349864959717, "learning_rate": 4.947621357134287e-05, "loss": 0.219, "step": 23650 }, { "epoch": 0.8598008576204667, "grad_norm": 1.1006419658660889, "learning_rate": 4.947515635875479e-05, "loss": 0.1743, "step": 23660 }, { "epoch": 0.8601642561232647, "grad_norm": 0.9933237433433533, "learning_rate": 4.9474098091619314e-05, "loss": 0.1294, "step": 23670 }, { "epoch": 0.860527654626063, "grad_norm": 3.392524480819702, "learning_rate": 4.947303876998203e-05, "loss": 0.1784, "step": 23680 }, { "epoch": 0.8608910531288612, "grad_norm": 1.466454029083252, "learning_rate": 4.947197839388857e-05, "loss": 0.1828, "step": 23690 }, { "epoch": 0.8612544516316593, "grad_norm": 3.670731544494629, "learning_rate": 4.947091696338465e-05, "loss": 0.1772, "step": 23700 }, { "epoch": 0.8616178501344575, "grad_norm": 1.3586241006851196, "learning_rate": 4.9469854478515976e-05, "loss": 0.1512, "step": 23710 }, { "epoch": 0.8619812486372556, "grad_norm": 0.8312864303588867, "learning_rate": 4.9468790939328336e-05, "loss": 0.1582, "step": 23720 }, { "epoch": 0.8623446471400538, "grad_norm": 0.9825647473335266, "learning_rate": 4.946772634586756e-05, "loss": 0.1662, "step": 23730 }, { "epoch": 0.862708045642852, "grad_norm": 2.7960050106048584, "learning_rate": 4.94666606981795e-05, "loss": 0.226, "step": 23740 }, { "epoch": 0.8630714441456501, "grad_norm": 5.3017683029174805, "learning_rate": 4.94655939963101e-05, "loss": 0.2065, "step": 23750 }, { "epoch": 0.8634348426484483, "grad_norm": 1.0958201885223389, "learning_rate": 4.946452624030529e-05, "loss": 0.2177, "step": 23760 }, { "epoch": 0.8637982411512465, "grad_norm": 1.0320892333984375, "learning_rate": 4.94634574302111e-05, "loss": 0.1263, "step": 23770 }, { "epoch": 0.8641616396540446, "grad_norm": 1.0401560068130493, "learning_rate": 4.946238756607356e-05, "loss": 0.6474, "step": 23780 }, { "epoch": 0.8645250381568428, "grad_norm": 1.378184199333191, "learning_rate": 4.9461316647938785e-05, "loss": 0.1783, "step": 23790 }, { "epoch": 0.864888436659641, "grad_norm": 7.429476261138916, "learning_rate": 4.9460244675852906e-05, "loss": 0.2744, "step": 23800 }, { "epoch": 0.8652518351624391, "grad_norm": 2.2409234046936035, "learning_rate": 4.945917164986211e-05, "loss": 0.2088, "step": 23810 }, { "epoch": 0.8656152336652373, "grad_norm": 1.1307353973388672, "learning_rate": 4.945809757001264e-05, "loss": 0.1311, "step": 23820 }, { "epoch": 0.8659786321680355, "grad_norm": 1.6061898469924927, "learning_rate": 4.945702243635077e-05, "loss": 0.1683, "step": 23830 }, { "epoch": 0.8663420306708336, "grad_norm": 1.0011060237884521, "learning_rate": 4.945594624892281e-05, "loss": 0.8323, "step": 23840 }, { "epoch": 0.8667054291736318, "grad_norm": 6.631030082702637, "learning_rate": 4.9454869007775154e-05, "loss": 0.177, "step": 23850 }, { "epoch": 0.86706882767643, "grad_norm": 2.8532910346984863, "learning_rate": 4.9453790712954195e-05, "loss": 0.145, "step": 23860 }, { "epoch": 0.8674322261792281, "grad_norm": 2.6437554359436035, "learning_rate": 4.945271136450641e-05, "loss": 0.1496, "step": 23870 }, { "epoch": 0.8677956246820263, "grad_norm": 3.0070180892944336, "learning_rate": 4.945163096247829e-05, "loss": 0.1582, "step": 23880 }, { "epoch": 0.8681590231848245, "grad_norm": 0.8612903356552124, "learning_rate": 4.9450549506916386e-05, "loss": 0.157, "step": 23890 }, { "epoch": 0.8685224216876226, "grad_norm": 9.475138664245605, "learning_rate": 4.94494669978673e-05, "loss": 0.312, "step": 23900 }, { "epoch": 0.8688858201904208, "grad_norm": 0.789193868637085, "learning_rate": 4.944838343537768e-05, "loss": 0.1385, "step": 23910 }, { "epoch": 0.869249218693219, "grad_norm": 0.9372280240058899, "learning_rate": 4.94472988194942e-05, "loss": 0.1581, "step": 23920 }, { "epoch": 0.8696126171960171, "grad_norm": 4.738519191741943, "learning_rate": 4.94462131502636e-05, "loss": 0.1693, "step": 23930 }, { "epoch": 0.8699760156988153, "grad_norm": 0.9660571217536926, "learning_rate": 4.9445126427732654e-05, "loss": 0.1578, "step": 23940 }, { "epoch": 0.8703394142016135, "grad_norm": 8.137104034423828, "learning_rate": 4.944403865194818e-05, "loss": 0.1857, "step": 23950 }, { "epoch": 0.8707028127044116, "grad_norm": 1.1240946054458618, "learning_rate": 4.944294982295706e-05, "loss": 0.2508, "step": 23960 }, { "epoch": 0.8710662112072098, "grad_norm": 3.6192643642425537, "learning_rate": 4.94418599408062e-05, "loss": 0.1354, "step": 23970 }, { "epoch": 0.871429609710008, "grad_norm": 2.76771879196167, "learning_rate": 4.944076900554256e-05, "loss": 0.1638, "step": 23980 }, { "epoch": 0.8717930082128061, "grad_norm": 1.734529972076416, "learning_rate": 4.9439677017213143e-05, "loss": 0.1414, "step": 23990 }, { "epoch": 0.8721564067156043, "grad_norm": 6.897458553314209, "learning_rate": 4.9438583975864996e-05, "loss": 0.2154, "step": 24000 }, { "epoch": 0.8721564067156043, "eval_loss": 0.37997984886169434, "eval_runtime": 180.3101, "eval_samples_per_second": 41.118, "eval_steps_per_second": 5.141, "eval_wer": 0.20322399114128561, "step": 24000 }, { "epoch": 0.8725198052184026, "grad_norm": 1.5639888048171997, "learning_rate": 4.943748988154523e-05, "loss": 0.1372, "step": 24010 }, { "epoch": 0.8728832037212007, "grad_norm": 4.484424114227295, "learning_rate": 4.943639473430096e-05, "loss": 0.3205, "step": 24020 }, { "epoch": 0.8732466022239989, "grad_norm": 1.9517849683761597, "learning_rate": 4.9435298534179396e-05, "loss": 0.2085, "step": 24030 }, { "epoch": 0.873610000726797, "grad_norm": 1.3041925430297852, "learning_rate": 4.943420128122776e-05, "loss": 0.1446, "step": 24040 }, { "epoch": 0.8739733992295952, "grad_norm": 29.67850685119629, "learning_rate": 4.943310297549332e-05, "loss": 0.2643, "step": 24050 }, { "epoch": 0.8743367977323934, "grad_norm": 4.462527751922607, "learning_rate": 4.9432003617023405e-05, "loss": 0.2067, "step": 24060 }, { "epoch": 0.8747001962351915, "grad_norm": 1.2176992893218994, "learning_rate": 4.9430903205865384e-05, "loss": 0.1353, "step": 24070 }, { "epoch": 0.8750635947379897, "grad_norm": 2.044191360473633, "learning_rate": 4.9429801742066675e-05, "loss": 0.1632, "step": 24080 }, { "epoch": 0.8754269932407879, "grad_norm": 3.0303845405578613, "learning_rate": 4.942869922567473e-05, "loss": 0.1533, "step": 24090 }, { "epoch": 0.875790391743586, "grad_norm": 4.44179105758667, "learning_rate": 4.942759565673705e-05, "loss": 0.2054, "step": 24100 }, { "epoch": 0.8761537902463842, "grad_norm": 2.158686637878418, "learning_rate": 4.942649103530119e-05, "loss": 0.1457, "step": 24110 }, { "epoch": 0.8765171887491824, "grad_norm": 5.875476837158203, "learning_rate": 4.942538536141473e-05, "loss": 0.1941, "step": 24120 }, { "epoch": 0.8768805872519805, "grad_norm": 1.7252172231674194, "learning_rate": 4.9424278635125335e-05, "loss": 0.155, "step": 24130 }, { "epoch": 0.8772439857547787, "grad_norm": 1.6594487428665161, "learning_rate": 4.9423170856480674e-05, "loss": 0.1736, "step": 24140 }, { "epoch": 0.8776073842575769, "grad_norm": 6.2919697761535645, "learning_rate": 4.9422062025528474e-05, "loss": 0.2313, "step": 24150 }, { "epoch": 0.877970782760375, "grad_norm": 2.1133229732513428, "learning_rate": 4.942095214231651e-05, "loss": 0.1642, "step": 24160 }, { "epoch": 0.8783341812631732, "grad_norm": 1.02867591381073, "learning_rate": 4.941984120689262e-05, "loss": 0.1554, "step": 24170 }, { "epoch": 0.8786975797659714, "grad_norm": 1.7262704372406006, "learning_rate": 4.941872921930465e-05, "loss": 0.1428, "step": 24180 }, { "epoch": 0.8790609782687695, "grad_norm": 1.095211386680603, "learning_rate": 4.9417616179600526e-05, "loss": 0.1683, "step": 24190 }, { "epoch": 0.8794243767715677, "grad_norm": 9.772414207458496, "learning_rate": 4.94165020878282e-05, "loss": 0.2224, "step": 24200 }, { "epoch": 0.8797877752743659, "grad_norm": 0.6741021871566772, "learning_rate": 4.9415386944035665e-05, "loss": 0.7216, "step": 24210 }, { "epoch": 0.880151173777164, "grad_norm": 0.6714327335357666, "learning_rate": 4.941427074827098e-05, "loss": 0.1321, "step": 24220 }, { "epoch": 0.8805145722799622, "grad_norm": 9.116118431091309, "learning_rate": 4.941315350058223e-05, "loss": 0.1738, "step": 24230 }, { "epoch": 0.8808779707827604, "grad_norm": 1.119581937789917, "learning_rate": 4.941203520101757e-05, "loss": 0.1076, "step": 24240 }, { "epoch": 0.8812413692855585, "grad_norm": 1.5630614757537842, "learning_rate": 4.941091584962516e-05, "loss": 0.1734, "step": 24250 }, { "epoch": 0.8816047677883567, "grad_norm": 3.4376001358032227, "learning_rate": 4.940979544645325e-05, "loss": 0.1567, "step": 24260 }, { "epoch": 0.8819681662911549, "grad_norm": 1.1688649654388428, "learning_rate": 4.94086739915501e-05, "loss": 0.137, "step": 24270 }, { "epoch": 0.882331564793953, "grad_norm": 2.02235746383667, "learning_rate": 4.9407551484964035e-05, "loss": 0.1718, "step": 24280 }, { "epoch": 0.8826949632967512, "grad_norm": 1.7484105825424194, "learning_rate": 4.940642792674341e-05, "loss": 0.1973, "step": 24290 }, { "epoch": 0.8830583617995494, "grad_norm": 7.056839942932129, "learning_rate": 4.940530331693666e-05, "loss": 0.1916, "step": 24300 }, { "epoch": 0.8834217603023475, "grad_norm": 1.4804614782333374, "learning_rate": 4.940417765559221e-05, "loss": 0.1418, "step": 24310 }, { "epoch": 0.8837851588051457, "grad_norm": 1.3168327808380127, "learning_rate": 4.940305094275859e-05, "loss": 0.1466, "step": 24320 }, { "epoch": 0.8841485573079438, "grad_norm": 2.4612350463867188, "learning_rate": 4.9401923178484325e-05, "loss": 0.1956, "step": 24330 }, { "epoch": 0.884511955810742, "grad_norm": 0.8389832973480225, "learning_rate": 4.9400794362818005e-05, "loss": 0.1751, "step": 24340 }, { "epoch": 0.8848753543135403, "grad_norm": 2.618521213531494, "learning_rate": 4.939966449580828e-05, "loss": 0.2133, "step": 24350 }, { "epoch": 0.8852387528163383, "grad_norm": 0.767784833908081, "learning_rate": 4.9398533577503826e-05, "loss": 0.1256, "step": 24360 }, { "epoch": 0.8856021513191366, "grad_norm": 1.7649836540222168, "learning_rate": 4.939740160795336e-05, "loss": 0.1925, "step": 24370 }, { "epoch": 0.8859655498219348, "grad_norm": 2.182840347290039, "learning_rate": 4.9396268587205685e-05, "loss": 0.184, "step": 24380 }, { "epoch": 0.8863289483247329, "grad_norm": 1.6524356603622437, "learning_rate": 4.939513451530958e-05, "loss": 0.1582, "step": 24390 }, { "epoch": 0.8866923468275311, "grad_norm": 13.93655776977539, "learning_rate": 4.939399939231394e-05, "loss": 0.1813, "step": 24400 }, { "epoch": 0.8870557453303293, "grad_norm": 1.9153752326965332, "learning_rate": 4.939286321826766e-05, "loss": 0.2093, "step": 24410 }, { "epoch": 0.8874191438331274, "grad_norm": 1.9444178342819214, "learning_rate": 4.9391725993219685e-05, "loss": 0.1489, "step": 24420 }, { "epoch": 0.8877825423359256, "grad_norm": 2.9371562004089355, "learning_rate": 4.939058771721903e-05, "loss": 0.1648, "step": 24430 }, { "epoch": 0.8881459408387238, "grad_norm": 3.127439498901367, "learning_rate": 4.938944839031473e-05, "loss": 0.1756, "step": 24440 }, { "epoch": 0.8885093393415219, "grad_norm": 11.735489845275879, "learning_rate": 4.938830801255588e-05, "loss": 0.2049, "step": 24450 }, { "epoch": 0.8888727378443201, "grad_norm": 1.0685577392578125, "learning_rate": 4.938716658399161e-05, "loss": 0.147, "step": 24460 }, { "epoch": 0.8892361363471183, "grad_norm": 3.6975417137145996, "learning_rate": 4.93860241046711e-05, "loss": 0.1402, "step": 24470 }, { "epoch": 0.8895995348499164, "grad_norm": 1.703731894493103, "learning_rate": 4.938488057464358e-05, "loss": 0.1418, "step": 24480 }, { "epoch": 0.8899629333527146, "grad_norm": 1.5911983251571655, "learning_rate": 4.938373599395831e-05, "loss": 0.1268, "step": 24490 }, { "epoch": 0.8903263318555128, "grad_norm": 5.278975486755371, "learning_rate": 4.9382590362664613e-05, "loss": 0.2388, "step": 24500 }, { "epoch": 0.8906897303583109, "grad_norm": 1.673403263092041, "learning_rate": 4.9381443680811865e-05, "loss": 0.1568, "step": 24510 }, { "epoch": 0.8910531288611091, "grad_norm": 0.5384930968284607, "learning_rate": 4.938029594844945e-05, "loss": 0.1364, "step": 24520 }, { "epoch": 0.8914165273639073, "grad_norm": 1.4231863021850586, "learning_rate": 4.937914716562683e-05, "loss": 0.1358, "step": 24530 }, { "epoch": 0.8917799258667054, "grad_norm": 1.2151052951812744, "learning_rate": 4.937799733239349e-05, "loss": 0.1673, "step": 24540 }, { "epoch": 0.8921433243695036, "grad_norm": 9.278292655944824, "learning_rate": 4.937684644879899e-05, "loss": 0.2505, "step": 24550 }, { "epoch": 0.8925067228723018, "grad_norm": 2.3570127487182617, "learning_rate": 4.937569451489291e-05, "loss": 0.1447, "step": 24560 }, { "epoch": 0.8928701213750999, "grad_norm": 0.44337037205696106, "learning_rate": 4.937454153072488e-05, "loss": 0.2015, "step": 24570 }, { "epoch": 0.8932335198778981, "grad_norm": 2.4552314281463623, "learning_rate": 4.937338749634458e-05, "loss": 0.1838, "step": 24580 }, { "epoch": 0.8935969183806963, "grad_norm": 0.9864338636398315, "learning_rate": 4.937223241180174e-05, "loss": 0.1356, "step": 24590 }, { "epoch": 0.8939603168834944, "grad_norm": 8.218843460083008, "learning_rate": 4.937107627714612e-05, "loss": 0.2109, "step": 24600 }, { "epoch": 0.8939603168834944, "eval_loss": 0.38069987297058105, "eval_runtime": 180.4244, "eval_samples_per_second": 41.092, "eval_steps_per_second": 5.138, "eval_wer": 0.20163559460489772, "step": 24600 }, { "epoch": 0.8943237153862926, "grad_norm": 0.7269652485847473, "learning_rate": 4.936991909242753e-05, "loss": 0.1756, "step": 24610 }, { "epoch": 0.8946871138890907, "grad_norm": 0.9835095405578613, "learning_rate": 4.9368760857695836e-05, "loss": 0.1297, "step": 24620 }, { "epoch": 0.8950505123918889, "grad_norm": 3.5632708072662354, "learning_rate": 4.9367601573000944e-05, "loss": 0.146, "step": 24630 }, { "epoch": 0.8954139108946871, "grad_norm": 0.7898311614990234, "learning_rate": 4.93664412383928e-05, "loss": 0.1693, "step": 24640 }, { "epoch": 0.8957773093974852, "grad_norm": 3.8220248222351074, "learning_rate": 4.93652798539214e-05, "loss": 0.1739, "step": 24650 }, { "epoch": 0.8961407079002834, "grad_norm": 0.7946699857711792, "learning_rate": 4.936411741963678e-05, "loss": 0.1271, "step": 24660 }, { "epoch": 0.8965041064030816, "grad_norm": 1.5677101612091064, "learning_rate": 4.936295393558903e-05, "loss": 0.1365, "step": 24670 }, { "epoch": 0.8968675049058797, "grad_norm": 18.39532470703125, "learning_rate": 4.9361789401828285e-05, "loss": 0.2035, "step": 24680 }, { "epoch": 0.897230903408678, "grad_norm": 2.577984094619751, "learning_rate": 4.93606238184047e-05, "loss": 0.127, "step": 24690 }, { "epoch": 0.8975943019114762, "grad_norm": 3.4822871685028076, "learning_rate": 4.9359457185368515e-05, "loss": 0.2335, "step": 24700 }, { "epoch": 0.8979577004142743, "grad_norm": 1.6475412845611572, "learning_rate": 4.935828950277e-05, "loss": 0.1581, "step": 24710 }, { "epoch": 0.8983210989170725, "grad_norm": 2.0972635746002197, "learning_rate": 4.9357120770659446e-05, "loss": 0.1608, "step": 24720 }, { "epoch": 0.8986844974198707, "grad_norm": 3.194946050643921, "learning_rate": 4.9355950989087226e-05, "loss": 0.1911, "step": 24730 }, { "epoch": 0.8990478959226688, "grad_norm": 1.1382654905319214, "learning_rate": 4.9354780158103744e-05, "loss": 0.1671, "step": 24740 }, { "epoch": 0.899411294425467, "grad_norm": 7.309133052825928, "learning_rate": 4.9353608277759433e-05, "loss": 0.192, "step": 24750 }, { "epoch": 0.8997746929282652, "grad_norm": 1.0215349197387695, "learning_rate": 4.9352435348104786e-05, "loss": 0.1713, "step": 24760 }, { "epoch": 0.9001380914310633, "grad_norm": 2.319836378097534, "learning_rate": 4.935126136919035e-05, "loss": 0.1441, "step": 24770 }, { "epoch": 0.9005014899338615, "grad_norm": 3.443413496017456, "learning_rate": 4.9350086341066716e-05, "loss": 0.2136, "step": 24780 }, { "epoch": 0.9008648884366597, "grad_norm": 0.9862478971481323, "learning_rate": 4.934891026378449e-05, "loss": 0.134, "step": 24790 }, { "epoch": 0.9012282869394578, "grad_norm": 10.3681640625, "learning_rate": 4.934773313739435e-05, "loss": 0.3034, "step": 24800 }, { "epoch": 0.901591685442256, "grad_norm": 0.9848408102989197, "learning_rate": 4.9346554961947014e-05, "loss": 0.1503, "step": 24810 }, { "epoch": 0.9019550839450542, "grad_norm": 1.3456752300262451, "learning_rate": 4.934537573749326e-05, "loss": 2.2881, "step": 24820 }, { "epoch": 0.9023184824478523, "grad_norm": 0.8639931082725525, "learning_rate": 4.9344195464083884e-05, "loss": 0.1565, "step": 24830 }, { "epoch": 0.9026818809506505, "grad_norm": 1.1297109127044678, "learning_rate": 4.9343014141769744e-05, "loss": 0.1338, "step": 24840 }, { "epoch": 0.9030452794534487, "grad_norm": 20.8160343170166, "learning_rate": 4.934183177060173e-05, "loss": 0.2155, "step": 24850 }, { "epoch": 0.9034086779562468, "grad_norm": 0.8113746643066406, "learning_rate": 4.9340648350630804e-05, "loss": 0.126, "step": 24860 }, { "epoch": 0.903772076459045, "grad_norm": 1.7760541439056396, "learning_rate": 4.9339463881907946e-05, "loss": 0.119, "step": 24870 }, { "epoch": 0.9041354749618432, "grad_norm": 1.8657050132751465, "learning_rate": 4.933827836448418e-05, "loss": 0.1772, "step": 24880 }, { "epoch": 0.9044988734646413, "grad_norm": 1.2576991319656372, "learning_rate": 4.9337091798410594e-05, "loss": 0.1609, "step": 24890 }, { "epoch": 0.9048622719674395, "grad_norm": 4.8249311447143555, "learning_rate": 4.933590418373833e-05, "loss": 1.7033, "step": 24900 }, { "epoch": 0.9052256704702376, "grad_norm": 1.065819501876831, "learning_rate": 4.9334715520518526e-05, "loss": 0.1559, "step": 24910 }, { "epoch": 0.9055890689730358, "grad_norm": 0.961330771446228, "learning_rate": 4.933352580880242e-05, "loss": 0.1459, "step": 24920 }, { "epoch": 0.905952467475834, "grad_norm": 2.0911202430725098, "learning_rate": 4.933233504864126e-05, "loss": 0.2173, "step": 24930 }, { "epoch": 0.9063158659786321, "grad_norm": 0.5074183940887451, "learning_rate": 4.933114324008636e-05, "loss": 0.1544, "step": 24940 }, { "epoch": 0.9066792644814303, "grad_norm": 3.663172483444214, "learning_rate": 4.932995038318907e-05, "loss": 0.2042, "step": 24950 }, { "epoch": 0.9070426629842285, "grad_norm": 1.691545844078064, "learning_rate": 4.9328756478000784e-05, "loss": 0.1616, "step": 24960 }, { "epoch": 0.9074060614870266, "grad_norm": 1.6613342761993408, "learning_rate": 4.9327561524572944e-05, "loss": 0.1212, "step": 24970 }, { "epoch": 0.9077694599898248, "grad_norm": 2.5737128257751465, "learning_rate": 4.9326365522957044e-05, "loss": 0.1753, "step": 24980 }, { "epoch": 0.908132858492623, "grad_norm": 1.717429280281067, "learning_rate": 4.932516847320459e-05, "loss": 0.1436, "step": 24990 }, { "epoch": 0.9084962569954211, "grad_norm": 13.324812889099121, "learning_rate": 4.9323970375367186e-05, "loss": 0.1983, "step": 25000 }, { "epoch": 0.9088596554982193, "grad_norm": 1.374232530593872, "learning_rate": 4.932277122949644e-05, "loss": 0.1588, "step": 25010 }, { "epoch": 0.9092230540010175, "grad_norm": 1.1790850162506104, "learning_rate": 4.932157103564402e-05, "loss": 0.1603, "step": 25020 }, { "epoch": 0.9095864525038156, "grad_norm": 2.7326996326446533, "learning_rate": 4.932036979386165e-05, "loss": 0.1656, "step": 25030 }, { "epoch": 0.9099498510066139, "grad_norm": 1.2364397048950195, "learning_rate": 4.931916750420107e-05, "loss": 0.2311, "step": 25040 }, { "epoch": 0.9103132495094121, "grad_norm": 3.7070934772491455, "learning_rate": 4.9317964166714095e-05, "loss": 0.2286, "step": 25050 }, { "epoch": 0.9106766480122102, "grad_norm": 2.05336594581604, "learning_rate": 4.931675978145256e-05, "loss": 0.1404, "step": 25060 }, { "epoch": 0.9110400465150084, "grad_norm": 1.3064135313034058, "learning_rate": 4.931555434846837e-05, "loss": 0.1395, "step": 25070 }, { "epoch": 0.9114034450178066, "grad_norm": 1.252254843711853, "learning_rate": 4.931434786781346e-05, "loss": 0.1595, "step": 25080 }, { "epoch": 0.9117668435206047, "grad_norm": 1.399654507637024, "learning_rate": 4.931314033953981e-05, "loss": 0.1495, "step": 25090 }, { "epoch": 0.9121302420234029, "grad_norm": 9.340110778808594, "learning_rate": 4.931193176369945e-05, "loss": 0.2489, "step": 25100 }, { "epoch": 0.9124936405262011, "grad_norm": 1.4071942567825317, "learning_rate": 4.931072214034445e-05, "loss": 0.1409, "step": 25110 }, { "epoch": 0.9128570390289992, "grad_norm": 1.5617743730545044, "learning_rate": 4.9309511469526934e-05, "loss": 0.2026, "step": 25120 }, { "epoch": 0.9132204375317974, "grad_norm": 1.4382219314575195, "learning_rate": 4.930829975129906e-05, "loss": 0.1426, "step": 25130 }, { "epoch": 0.9135838360345956, "grad_norm": 1.0388094186782837, "learning_rate": 4.930708698571303e-05, "loss": 0.132, "step": 25140 }, { "epoch": 0.9139472345373937, "grad_norm": 3.9398436546325684, "learning_rate": 4.9305873172821126e-05, "loss": 0.2257, "step": 25150 }, { "epoch": 0.9143106330401919, "grad_norm": 2.5586395263671875, "learning_rate": 4.930465831267562e-05, "loss": 0.1508, "step": 25160 }, { "epoch": 0.9146740315429901, "grad_norm": 1.6908849477767944, "learning_rate": 4.930344240532886e-05, "loss": 0.1407, "step": 25170 }, { "epoch": 0.9150374300457882, "grad_norm": 3.980564594268799, "learning_rate": 4.930222545083324e-05, "loss": 0.1749, "step": 25180 }, { "epoch": 0.9154008285485864, "grad_norm": 1.7451142072677612, "learning_rate": 4.930100744924119e-05, "loss": 0.1415, "step": 25190 }, { "epoch": 0.9157642270513845, "grad_norm": 11.09490966796875, "learning_rate": 4.9299788400605194e-05, "loss": 0.248, "step": 25200 }, { "epoch": 0.9157642270513845, "eval_loss": 0.36305877566337585, "eval_runtime": 180.0742, "eval_samples_per_second": 41.172, "eval_steps_per_second": 5.148, "eval_wer": 0.19528200845934612, "step": 25200 }, { "epoch": 0.9161276255541827, "grad_norm": 0.9552545547485352, "learning_rate": 4.929856830497778e-05, "loss": 0.1765, "step": 25210 }, { "epoch": 0.9164910240569809, "grad_norm": 1.0652204751968384, "learning_rate": 4.929734716241151e-05, "loss": 0.1412, "step": 25220 }, { "epoch": 0.916854422559779, "grad_norm": 2.473240375518799, "learning_rate": 4.929612497295899e-05, "loss": 0.1511, "step": 25230 }, { "epoch": 0.9172178210625772, "grad_norm": 2.0563089847564697, "learning_rate": 4.929490173667291e-05, "loss": 0.1562, "step": 25240 }, { "epoch": 0.9175812195653754, "grad_norm": 5.446952819824219, "learning_rate": 4.929367745360593e-05, "loss": 0.2416, "step": 25250 }, { "epoch": 0.9179446180681735, "grad_norm": 0.724795937538147, "learning_rate": 4.929245212381085e-05, "loss": 0.1554, "step": 25260 }, { "epoch": 0.9183080165709717, "grad_norm": 1.0962814092636108, "learning_rate": 4.929122574734043e-05, "loss": 0.1567, "step": 25270 }, { "epoch": 0.9186714150737699, "grad_norm": 1.3689608573913574, "learning_rate": 4.9289998324247524e-05, "loss": 0.1498, "step": 25280 }, { "epoch": 0.919034813576568, "grad_norm": 3.039569139480591, "learning_rate": 4.9288769854585015e-05, "loss": 0.1666, "step": 25290 }, { "epoch": 0.9193982120793662, "grad_norm": 10.71928882598877, "learning_rate": 4.928754033840583e-05, "loss": 0.2487, "step": 25300 }, { "epoch": 0.9197616105821644, "grad_norm": 0.47624918818473816, "learning_rate": 4.928630977576295e-05, "loss": 0.1457, "step": 25310 }, { "epoch": 0.9201250090849625, "grad_norm": 1.2840664386749268, "learning_rate": 4.9285078166709386e-05, "loss": 0.1437, "step": 25320 }, { "epoch": 0.9204884075877607, "grad_norm": 2.118415117263794, "learning_rate": 4.928384551129822e-05, "loss": 0.1861, "step": 25330 }, { "epoch": 0.9208518060905589, "grad_norm": 0.8363248109817505, "learning_rate": 4.928261180958255e-05, "loss": 0.1494, "step": 25340 }, { "epoch": 0.921215204593357, "grad_norm": 20.23488998413086, "learning_rate": 4.928137706161553e-05, "loss": 0.2414, "step": 25350 }, { "epoch": 0.9215786030961552, "grad_norm": 1.1590826511383057, "learning_rate": 4.928014126745037e-05, "loss": 0.162, "step": 25360 }, { "epoch": 0.9219420015989535, "grad_norm": 1.1986241340637207, "learning_rate": 4.9278904427140315e-05, "loss": 0.1323, "step": 25370 }, { "epoch": 0.9223054001017515, "grad_norm": 5.075083255767822, "learning_rate": 4.927766654073864e-05, "loss": 0.2944, "step": 25380 }, { "epoch": 0.9226687986045498, "grad_norm": 3.1853582859039307, "learning_rate": 4.927642760829871e-05, "loss": 0.1792, "step": 25390 }, { "epoch": 0.923032197107348, "grad_norm": 5.919759273529053, "learning_rate": 4.927518762987388e-05, "loss": 0.2182, "step": 25400 }, { "epoch": 0.9233955956101461, "grad_norm": 1.4639918804168701, "learning_rate": 4.927394660551759e-05, "loss": 0.1277, "step": 25410 }, { "epoch": 0.9237589941129443, "grad_norm": 1.205178141593933, "learning_rate": 4.927270453528331e-05, "loss": 0.1197, "step": 25420 }, { "epoch": 0.9241223926157425, "grad_norm": 1.4328303337097168, "learning_rate": 4.927146141922455e-05, "loss": 0.1522, "step": 25430 }, { "epoch": 0.9244857911185406, "grad_norm": 0.6114678382873535, "learning_rate": 4.927021725739488e-05, "loss": 0.1661, "step": 25440 }, { "epoch": 0.9248491896213388, "grad_norm": 34.13093566894531, "learning_rate": 4.92689720498479e-05, "loss": 0.2852, "step": 25450 }, { "epoch": 0.925212588124137, "grad_norm": 0.9967424273490906, "learning_rate": 4.9267725796637256e-05, "loss": 0.1433, "step": 25460 }, { "epoch": 0.9255759866269351, "grad_norm": 1.0493268966674805, "learning_rate": 4.926647849781666e-05, "loss": 0.1361, "step": 25470 }, { "epoch": 0.9259393851297333, "grad_norm": 2.582016944885254, "learning_rate": 4.926523015343985e-05, "loss": 0.1829, "step": 25480 }, { "epoch": 0.9263027836325314, "grad_norm": 5.122225284576416, "learning_rate": 4.92639807635606e-05, "loss": 0.1148, "step": 25490 }, { "epoch": 0.9266661821353296, "grad_norm": 8.054966926574707, "learning_rate": 4.9262730328232755e-05, "loss": 0.2363, "step": 25500 }, { "epoch": 0.9270295806381278, "grad_norm": 3.3668735027313232, "learning_rate": 4.926147884751018e-05, "loss": 0.1311, "step": 25510 }, { "epoch": 0.9273929791409259, "grad_norm": 1.0643728971481323, "learning_rate": 4.926022632144681e-05, "loss": 0.1318, "step": 25520 }, { "epoch": 0.9277563776437241, "grad_norm": 1.632354497909546, "learning_rate": 4.9258972750096614e-05, "loss": 0.1958, "step": 25530 }, { "epoch": 0.9281197761465223, "grad_norm": 0.7638659477233887, "learning_rate": 4.9257718133513586e-05, "loss": 0.168, "step": 25540 }, { "epoch": 0.9284831746493204, "grad_norm": 4.14115571975708, "learning_rate": 4.9256462471751796e-05, "loss": 0.1976, "step": 25550 }, { "epoch": 0.9288465731521186, "grad_norm": 39.925689697265625, "learning_rate": 4.925520576486534e-05, "loss": 0.67, "step": 25560 }, { "epoch": 0.9292099716549168, "grad_norm": 1.349623441696167, "learning_rate": 4.9253948012908366e-05, "loss": 0.1475, "step": 25570 }, { "epoch": 0.9295733701577149, "grad_norm": 10.941556930541992, "learning_rate": 4.925268921593508e-05, "loss": 0.1696, "step": 25580 }, { "epoch": 0.9299367686605131, "grad_norm": 1.5406817197799683, "learning_rate": 4.925142937399969e-05, "loss": 0.1444, "step": 25590 }, { "epoch": 0.9303001671633113, "grad_norm": 3.9542319774627686, "learning_rate": 4.925016848715651e-05, "loss": 0.216, "step": 25600 }, { "epoch": 0.9306635656661094, "grad_norm": 2.0055665969848633, "learning_rate": 4.924890655545984e-05, "loss": 0.1248, "step": 25610 }, { "epoch": 0.9310269641689076, "grad_norm": 2.145512819290161, "learning_rate": 4.924764357896408e-05, "loss": 0.1278, "step": 25620 }, { "epoch": 0.9313903626717058, "grad_norm": 6.076485633850098, "learning_rate": 4.924637955772361e-05, "loss": 0.1586, "step": 25630 }, { "epoch": 0.9317537611745039, "grad_norm": 0.9482760429382324, "learning_rate": 4.924511449179293e-05, "loss": 0.1547, "step": 25640 }, { "epoch": 0.9321171596773021, "grad_norm": 2.335090398788452, "learning_rate": 4.924384838122653e-05, "loss": 0.1709, "step": 25650 }, { "epoch": 0.9324805581801003, "grad_norm": 2.1309449672698975, "learning_rate": 4.924258122607895e-05, "loss": 0.1425, "step": 25660 }, { "epoch": 0.9328439566828984, "grad_norm": 1.092887282371521, "learning_rate": 4.924131302640482e-05, "loss": 0.1578, "step": 25670 }, { "epoch": 0.9332073551856966, "grad_norm": 0.7325641512870789, "learning_rate": 4.9240043782258746e-05, "loss": 0.1473, "step": 25680 }, { "epoch": 0.9335707536884948, "grad_norm": 1.296338677406311, "learning_rate": 4.9238773493695443e-05, "loss": 0.2279, "step": 25690 }, { "epoch": 0.9339341521912929, "grad_norm": 1.196590542793274, "learning_rate": 4.923750216076963e-05, "loss": 0.1524, "step": 25700 }, { "epoch": 0.9342975506940912, "grad_norm": 1.5417845249176025, "learning_rate": 4.923622978353608e-05, "loss": 0.1385, "step": 25710 }, { "epoch": 0.9346609491968894, "grad_norm": 1.4865704774856567, "learning_rate": 4.923495636204963e-05, "loss": 0.1435, "step": 25720 }, { "epoch": 0.9350243476996875, "grad_norm": 1.6445010900497437, "learning_rate": 4.923368189636513e-05, "loss": 0.223, "step": 25730 }, { "epoch": 0.9353877462024857, "grad_norm": 0.6629343032836914, "learning_rate": 4.9232406386537505e-05, "loss": 0.1479, "step": 25740 }, { "epoch": 0.9357511447052839, "grad_norm": 8.440834999084473, "learning_rate": 4.923112983262171e-05, "loss": 0.7624, "step": 25750 }, { "epoch": 0.936114543208082, "grad_norm": 1.088809847831726, "learning_rate": 4.922985223467274e-05, "loss": 0.134, "step": 25760 }, { "epoch": 0.9364779417108802, "grad_norm": 1.1839587688446045, "learning_rate": 4.922857359274565e-05, "loss": 0.1284, "step": 25770 }, { "epoch": 0.9368413402136783, "grad_norm": 2.278588056564331, "learning_rate": 4.922729390689553e-05, "loss": 0.1873, "step": 25780 }, { "epoch": 0.9372047387164765, "grad_norm": 1.6524765491485596, "learning_rate": 4.9226013177177515e-05, "loss": 0.1769, "step": 25790 }, { "epoch": 0.9375681372192747, "grad_norm": 18.044713973999023, "learning_rate": 4.922473140364679e-05, "loss": 0.2122, "step": 25800 }, { "epoch": 0.9375681372192747, "eval_loss": 0.3881298005580902, "eval_runtime": 180.2979, "eval_samples_per_second": 41.121, "eval_steps_per_second": 5.141, "eval_wer": 0.1963348884491804, "step": 25800 }, { "epoch": 0.9379315357220728, "grad_norm": 1.1691884994506836, "learning_rate": 4.9223448586358576e-05, "loss": 0.1573, "step": 25810 }, { "epoch": 0.938294934224871, "grad_norm": 1.1012376546859741, "learning_rate": 4.9222164725368156e-05, "loss": 0.1511, "step": 25820 }, { "epoch": 0.9386583327276692, "grad_norm": 2.1937880516052246, "learning_rate": 4.9220879820730844e-05, "loss": 0.1684, "step": 25830 }, { "epoch": 0.9390217312304673, "grad_norm": 1.5964059829711914, "learning_rate": 4.921959387250199e-05, "loss": 0.1897, "step": 25840 }, { "epoch": 0.9393851297332655, "grad_norm": 6.693167209625244, "learning_rate": 4.921830688073701e-05, "loss": 0.2155, "step": 25850 }, { "epoch": 0.9397485282360637, "grad_norm": 1.679046869277954, "learning_rate": 4.921701884549136e-05, "loss": 0.1566, "step": 25860 }, { "epoch": 0.9401119267388618, "grad_norm": 0.648047924041748, "learning_rate": 4.9215729766820536e-05, "loss": 0.1398, "step": 25870 }, { "epoch": 0.94047532524166, "grad_norm": 0.7286267876625061, "learning_rate": 4.921443964478007e-05, "loss": 0.1598, "step": 25880 }, { "epoch": 0.9408387237444582, "grad_norm": 1.3676726818084717, "learning_rate": 4.921314847942555e-05, "loss": 0.1627, "step": 25890 }, { "epoch": 0.9412021222472563, "grad_norm": 11.982099533081055, "learning_rate": 4.921185627081263e-05, "loss": 0.2181, "step": 25900 }, { "epoch": 0.9415655207500545, "grad_norm": 0.8863544464111328, "learning_rate": 4.9210563018996955e-05, "loss": 0.1296, "step": 25910 }, { "epoch": 0.9419289192528527, "grad_norm": 0.8388992547988892, "learning_rate": 4.9209268724034265e-05, "loss": 0.1406, "step": 25920 }, { "epoch": 0.9422923177556508, "grad_norm": 2.4800333976745605, "learning_rate": 4.9207973385980324e-05, "loss": 0.1694, "step": 25930 }, { "epoch": 0.942655716258449, "grad_norm": 4.2597174644470215, "learning_rate": 4.920667700489093e-05, "loss": 0.9439, "step": 25940 }, { "epoch": 0.9430191147612472, "grad_norm": 5.32108736038208, "learning_rate": 4.920537958082196e-05, "loss": 0.1745, "step": 25950 }, { "epoch": 0.9433825132640453, "grad_norm": 1.3563112020492554, "learning_rate": 4.9204081113829316e-05, "loss": 0.1554, "step": 25960 }, { "epoch": 0.9437459117668435, "grad_norm": 8.575587272644043, "learning_rate": 4.9202781603968926e-05, "loss": 0.2015, "step": 25970 }, { "epoch": 0.9441093102696417, "grad_norm": 6.85026216506958, "learning_rate": 4.920148105129679e-05, "loss": 0.1548, "step": 25980 }, { "epoch": 0.9444727087724398, "grad_norm": 1.2886810302734375, "learning_rate": 4.9200179455868944e-05, "loss": 0.136, "step": 25990 }, { "epoch": 0.944836107275238, "grad_norm": 2.0779457092285156, "learning_rate": 4.919887681774148e-05, "loss": 0.1744, "step": 26000 }, { "epoch": 0.9451995057780362, "grad_norm": Infinity, "learning_rate": 4.919770355196496e-05, "loss": 2.7706, "step": 26010 }, { "epoch": 0.9455629042808343, "grad_norm": 0.9514101147651672, "learning_rate": 4.919639893286285e-05, "loss": 0.1435, "step": 26020 }, { "epoch": 0.9459263027836325, "grad_norm": 0.3761270046234131, "learning_rate": 4.9195093271224016e-05, "loss": 0.1525, "step": 26030 }, { "epoch": 0.9462897012864308, "grad_norm": 1.2147834300994873, "learning_rate": 4.919378656710469e-05, "loss": 0.1922, "step": 26040 }, { "epoch": 0.9466530997892288, "grad_norm": 15.408570289611816, "learning_rate": 4.919247882056119e-05, "loss": 0.2773, "step": 26050 }, { "epoch": 0.947016498292027, "grad_norm": 2.2306370735168457, "learning_rate": 4.919117003164985e-05, "loss": 0.1446, "step": 26060 }, { "epoch": 0.9473798967948253, "grad_norm": 1.3414242267608643, "learning_rate": 4.918986020042706e-05, "loss": 0.1484, "step": 26070 }, { "epoch": 0.9477432952976234, "grad_norm": 1.9740337133407593, "learning_rate": 4.9188549326949275e-05, "loss": 0.1845, "step": 26080 }, { "epoch": 0.9481066938004216, "grad_norm": 0.7002670764923096, "learning_rate": 4.9187237411272955e-05, "loss": 0.1559, "step": 26090 }, { "epoch": 0.9484700923032197, "grad_norm": 8.308074951171875, "learning_rate": 4.9185924453454635e-05, "loss": 0.223, "step": 26100 }, { "epoch": 0.9488334908060179, "grad_norm": 0.8129051327705383, "learning_rate": 4.9184610453550884e-05, "loss": 0.1459, "step": 26110 }, { "epoch": 0.9491968893088161, "grad_norm": 1.5998592376708984, "learning_rate": 4.918329541161831e-05, "loss": 0.1394, "step": 26120 }, { "epoch": 0.9495602878116142, "grad_norm": 1.8726842403411865, "learning_rate": 4.918197932771359e-05, "loss": 0.1859, "step": 26130 }, { "epoch": 0.9499236863144124, "grad_norm": 1.1915557384490967, "learning_rate": 4.9180662201893424e-05, "loss": 0.1621, "step": 26140 }, { "epoch": 0.9502870848172106, "grad_norm": 6.970126152038574, "learning_rate": 4.917934403421455e-05, "loss": 0.2613, "step": 26150 }, { "epoch": 0.9506504833200087, "grad_norm": 1.0738050937652588, "learning_rate": 4.9178024824733776e-05, "loss": 0.1383, "step": 26160 }, { "epoch": 0.9510138818228069, "grad_norm": 2.1130123138427734, "learning_rate": 4.9176704573507933e-05, "loss": 0.222, "step": 26170 }, { "epoch": 0.9513772803256051, "grad_norm": 3.1722593307495117, "learning_rate": 4.9175383280593925e-05, "loss": 0.1624, "step": 26180 }, { "epoch": 0.9517406788284032, "grad_norm": 0.9101456999778748, "learning_rate": 4.917406094604865e-05, "loss": 0.16, "step": 26190 }, { "epoch": 0.9521040773312014, "grad_norm": 3.284672260284424, "learning_rate": 4.917273756992911e-05, "loss": 0.1817, "step": 26200 }, { "epoch": 0.9524674758339996, "grad_norm": 2.2083284854888916, "learning_rate": 4.917141315229232e-05, "loss": 0.169, "step": 26210 }, { "epoch": 0.9528308743367977, "grad_norm": 4.354351997375488, "learning_rate": 4.9170087693195335e-05, "loss": 0.2629, "step": 26220 }, { "epoch": 0.9531942728395959, "grad_norm": 2.520522117614746, "learning_rate": 4.916876119269526e-05, "loss": 0.1852, "step": 26230 }, { "epoch": 0.9535576713423941, "grad_norm": 0.8573399186134338, "learning_rate": 4.9167433650849264e-05, "loss": 0.1524, "step": 26240 }, { "epoch": 0.9539210698451922, "grad_norm": 6.314918041229248, "learning_rate": 4.916610506771454e-05, "loss": 0.2685, "step": 26250 }, { "epoch": 0.9542844683479904, "grad_norm": 3.0610973834991455, "learning_rate": 4.916477544334833e-05, "loss": 0.1374, "step": 26260 }, { "epoch": 0.9546478668507886, "grad_norm": 0.9085964560508728, "learning_rate": 4.916344477780793e-05, "loss": 0.1754, "step": 26270 }, { "epoch": 0.9550112653535867, "grad_norm": 3.8550243377685547, "learning_rate": 4.916211307115067e-05, "loss": 0.3546, "step": 26280 }, { "epoch": 0.9553746638563849, "grad_norm": 5.278194904327393, "learning_rate": 4.916078032343392e-05, "loss": 0.1298, "step": 26290 }, { "epoch": 0.9557380623591831, "grad_norm": 2.707965612411499, "learning_rate": 4.9159446534715116e-05, "loss": 0.1689, "step": 26300 }, { "epoch": 0.9561014608619812, "grad_norm": 1.3821223974227905, "learning_rate": 4.9158111705051716e-05, "loss": 0.2117, "step": 26310 }, { "epoch": 0.9564648593647794, "grad_norm": 1.0195057392120361, "learning_rate": 4.915677583450123e-05, "loss": 0.1151, "step": 26320 }, { "epoch": 0.9568282578675776, "grad_norm": 2.078343629837036, "learning_rate": 4.915543892312124e-05, "loss": 0.1433, "step": 26330 }, { "epoch": 0.9571916563703757, "grad_norm": 1.6972254514694214, "learning_rate": 4.915410097096932e-05, "loss": 0.1307, "step": 26340 }, { "epoch": 0.9575550548731739, "grad_norm": 4.440702438354492, "learning_rate": 4.915276197810313e-05, "loss": 0.1806, "step": 26350 }, { "epoch": 0.9579184533759721, "grad_norm": 0.778567373752594, "learning_rate": 4.9151421944580374e-05, "loss": 0.177, "step": 26360 }, { "epoch": 0.9582818518787702, "grad_norm": 1.2955224514007568, "learning_rate": 4.915008087045877e-05, "loss": 0.1395, "step": 26370 }, { "epoch": 0.9586452503815684, "grad_norm": 2.077195405960083, "learning_rate": 4.9148738755796104e-05, "loss": 0.1583, "step": 26380 }, { "epoch": 0.9590086488843665, "grad_norm": 0.8736408352851868, "learning_rate": 4.914739560065021e-05, "loss": 0.1284, "step": 26390 }, { "epoch": 0.9593720473871648, "grad_norm": 2.9465060234069824, "learning_rate": 4.914605140507895e-05, "loss": 0.1929, "step": 26400 }, { "epoch": 0.9593720473871648, "eval_loss": 0.3894718587398529, "eval_runtime": 180.7535, "eval_samples_per_second": 41.017, "eval_steps_per_second": 5.129, "eval_wer": 0.18666836095630548, "step": 26400 }, { "epoch": 0.959735445889963, "grad_norm": 0.7856747508049011, "learning_rate": 4.9144706169140256e-05, "loss": 0.1548, "step": 26410 }, { "epoch": 0.960098844392761, "grad_norm": 3.3245174884796143, "learning_rate": 4.914335989289208e-05, "loss": 0.1328, "step": 26420 }, { "epoch": 0.9604622428955593, "grad_norm": 4.848336219787598, "learning_rate": 4.914201257639243e-05, "loss": 0.145, "step": 26430 }, { "epoch": 0.9608256413983575, "grad_norm": 2.2661678791046143, "learning_rate": 4.9140664219699344e-05, "loss": 0.1482, "step": 26440 }, { "epoch": 0.9611890399011556, "grad_norm": 6.279752731323242, "learning_rate": 4.913931482287094e-05, "loss": 0.2087, "step": 26450 }, { "epoch": 0.9615524384039538, "grad_norm": 0.9201165437698364, "learning_rate": 4.913796438596534e-05, "loss": 0.1641, "step": 26460 }, { "epoch": 0.961915836906752, "grad_norm": 1.0935853719711304, "learning_rate": 4.9136612909040746e-05, "loss": 0.1678, "step": 26470 }, { "epoch": 0.9622792354095501, "grad_norm": 5.511369705200195, "learning_rate": 4.913526039215538e-05, "loss": 0.2284, "step": 26480 }, { "epoch": 0.9626426339123483, "grad_norm": 0.8109707832336426, "learning_rate": 4.913390683536751e-05, "loss": 0.1314, "step": 26490 }, { "epoch": 0.9630060324151465, "grad_norm": 20.594274520874023, "learning_rate": 4.9132552238735464e-05, "loss": 0.2612, "step": 26500 }, { "epoch": 0.9633694309179446, "grad_norm": 40.1435661315918, "learning_rate": 4.913119660231761e-05, "loss": 0.5943, "step": 26510 }, { "epoch": 0.9637328294207428, "grad_norm": 0.6886749863624573, "learning_rate": 4.912983992617235e-05, "loss": 0.1445, "step": 26520 }, { "epoch": 0.964096227923541, "grad_norm": 4.847496032714844, "learning_rate": 4.912848221035815e-05, "loss": 0.1645, "step": 26530 }, { "epoch": 0.9644596264263391, "grad_norm": 1.3625943660736084, "learning_rate": 4.912712345493349e-05, "loss": 0.1403, "step": 26540 }, { "epoch": 0.9648230249291373, "grad_norm": 6.022468090057373, "learning_rate": 4.9125763659956934e-05, "loss": 0.2215, "step": 26550 }, { "epoch": 0.9651864234319355, "grad_norm": 1.5577186346054077, "learning_rate": 4.912440282548706e-05, "loss": 0.1401, "step": 26560 }, { "epoch": 0.9655498219347336, "grad_norm": 1.5680512189865112, "learning_rate": 4.91230409515825e-05, "loss": 0.1373, "step": 26570 }, { "epoch": 0.9659132204375318, "grad_norm": 2.3043782711029053, "learning_rate": 4.912167803830193e-05, "loss": 0.1501, "step": 26580 }, { "epoch": 0.96627661894033, "grad_norm": 1.5168496370315552, "learning_rate": 4.912031408570409e-05, "loss": 0.1665, "step": 26590 }, { "epoch": 0.9666400174431281, "grad_norm": 10.529095649719238, "learning_rate": 4.911894909384773e-05, "loss": 0.1642, "step": 26600 }, { "epoch": 0.9670034159459263, "grad_norm": 2.458815097808838, "learning_rate": 4.911758306279167e-05, "loss": 3.3307, "step": 26610 }, { "epoch": 0.9673668144487245, "grad_norm": 1.2745519876480103, "learning_rate": 4.911621599259477e-05, "loss": 0.1681, "step": 26620 }, { "epoch": 0.9677302129515226, "grad_norm": 1.877960443496704, "learning_rate": 4.911484788331593e-05, "loss": 0.1445, "step": 26630 }, { "epoch": 0.9680936114543208, "grad_norm": 1.3567255735397339, "learning_rate": 4.911347873501408e-05, "loss": 0.132, "step": 26640 }, { "epoch": 0.968457009957119, "grad_norm": 3.44063138961792, "learning_rate": 4.911210854774825e-05, "loss": 0.205, "step": 26650 }, { "epoch": 0.9688204084599171, "grad_norm": 5.335951805114746, "learning_rate": 4.911073732157744e-05, "loss": 0.1475, "step": 26660 }, { "epoch": 0.9691838069627153, "grad_norm": 3.0675578117370605, "learning_rate": 4.910936505656074e-05, "loss": 0.1602, "step": 26670 }, { "epoch": 0.9695472054655134, "grad_norm": 5.95693826675415, "learning_rate": 4.910799175275729e-05, "loss": 0.1888, "step": 26680 }, { "epoch": 0.9699106039683116, "grad_norm": 1.7128913402557373, "learning_rate": 4.910661741022625e-05, "loss": 0.1402, "step": 26690 }, { "epoch": 0.9702740024711098, "grad_norm": 11.855730056762695, "learning_rate": 4.9105242029026844e-05, "loss": 0.1939, "step": 26700 }, { "epoch": 0.9706374009739079, "grad_norm": 2.21028208732605, "learning_rate": 4.910386560921831e-05, "loss": 0.1365, "step": 26710 }, { "epoch": 0.9710007994767061, "grad_norm": 3.6761391162872314, "learning_rate": 4.910248815085998e-05, "loss": 0.1661, "step": 26720 }, { "epoch": 0.9713641979795044, "grad_norm": 1.9474952220916748, "learning_rate": 4.9101109654011196e-05, "loss": 0.1176, "step": 26730 }, { "epoch": 0.9717275964823024, "grad_norm": 4.190001010894775, "learning_rate": 4.909973011873135e-05, "loss": 0.1426, "step": 26740 }, { "epoch": 0.9720909949851007, "grad_norm": 2.785562753677368, "learning_rate": 4.909834954507987e-05, "loss": 0.201, "step": 26750 }, { "epoch": 0.9724543934878989, "grad_norm": 2.296952724456787, "learning_rate": 4.909696793311625e-05, "loss": 1.5478, "step": 26760 }, { "epoch": 0.972817791990697, "grad_norm": 1.471690058708191, "learning_rate": 4.909558528290002e-05, "loss": 0.1254, "step": 26770 }, { "epoch": 0.9731811904934952, "grad_norm": 5.213918685913086, "learning_rate": 4.9094340010048675e-05, "loss": 0.8881, "step": 26780 }, { "epoch": 0.9735445889962934, "grad_norm": 1.5338894128799438, "learning_rate": 4.909295538731665e-05, "loss": 0.1621, "step": 26790 }, { "epoch": 0.9739079874990915, "grad_norm": 4.493140697479248, "learning_rate": 4.909156972650491e-05, "loss": 0.1653, "step": 26800 }, { "epoch": 0.9742713860018897, "grad_norm": 0.9602924585342407, "learning_rate": 4.909018302767313e-05, "loss": 0.1388, "step": 26810 }, { "epoch": 0.9746347845046879, "grad_norm": 1.038445234298706, "learning_rate": 4.9088795290881085e-05, "loss": 0.145, "step": 26820 }, { "epoch": 0.974998183007486, "grad_norm": 3.1368119716644287, "learning_rate": 4.908740651618856e-05, "loss": 0.1732, "step": 26830 }, { "epoch": 0.9753615815102842, "grad_norm": 0.6875894069671631, "learning_rate": 4.908601670365539e-05, "loss": 0.1582, "step": 26840 }, { "epoch": 0.9757249800130824, "grad_norm": 14.604360580444336, "learning_rate": 4.908462585334146e-05, "loss": 0.228, "step": 26850 }, { "epoch": 0.9760883785158805, "grad_norm": 1.819300889968872, "learning_rate": 4.9083233965306694e-05, "loss": 0.5066, "step": 26860 }, { "epoch": 0.9764517770186787, "grad_norm": 5.712610721588135, "learning_rate": 4.908184103961106e-05, "loss": 0.2109, "step": 26870 }, { "epoch": 0.9768151755214769, "grad_norm": 2.8377017974853516, "learning_rate": 4.908044707631459e-05, "loss": 0.1417, "step": 26880 }, { "epoch": 0.977178574024275, "grad_norm": 1.0483819246292114, "learning_rate": 4.907905207547733e-05, "loss": 0.1526, "step": 26890 }, { "epoch": 0.9775419725270732, "grad_norm": 3.996112823486328, "learning_rate": 4.907765603715938e-05, "loss": 0.6109, "step": 26900 }, { "epoch": 0.9779053710298714, "grad_norm": 1.336004614830017, "learning_rate": 4.907625896142091e-05, "loss": 0.8939, "step": 26910 }, { "epoch": 0.9782687695326695, "grad_norm": 0.9394060373306274, "learning_rate": 4.907486084832211e-05, "loss": 0.1515, "step": 26920 }, { "epoch": 0.9786321680354677, "grad_norm": 2.356201410293579, "learning_rate": 4.907346169792321e-05, "loss": 0.1567, "step": 26930 }, { "epoch": 0.9789955665382659, "grad_norm": 0.926143229007721, "learning_rate": 4.907206151028449e-05, "loss": 0.1669, "step": 26940 }, { "epoch": 0.979358965041064, "grad_norm": 4.815629482269287, "learning_rate": 4.90706602854663e-05, "loss": 0.2068, "step": 26950 }, { "epoch": 0.9797223635438622, "grad_norm": 1.3679453134536743, "learning_rate": 4.906925802352899e-05, "loss": 0.15, "step": 26960 }, { "epoch": 0.9800857620466603, "grad_norm": 1.1221717596054077, "learning_rate": 4.9067854724533e-05, "loss": 0.223, "step": 26970 }, { "epoch": 0.9804491605494585, "grad_norm": 2.198657512664795, "learning_rate": 4.906645038853878e-05, "loss": 0.1662, "step": 26980 }, { "epoch": 0.9808125590522567, "grad_norm": 1.7014293670654297, "learning_rate": 4.906504501560684e-05, "loss": 0.1601, "step": 26990 }, { "epoch": 0.9811759575550548, "grad_norm": 8.204177856445312, "learning_rate": 4.906363860579774e-05, "loss": 0.2345, "step": 27000 }, { "epoch": 0.9811759575550548, "eval_loss": 0.3534720242023468, "eval_runtime": 180.0448, "eval_samples_per_second": 41.179, "eval_steps_per_second": 5.149, "eval_wer": 0.19012652712981284, "step": 27000 }, { "epoch": 0.981539356057853, "grad_norm": 1.1557930707931519, "learning_rate": 4.906223115917207e-05, "loss": 0.1357, "step": 27010 }, { "epoch": 0.9819027545606512, "grad_norm": 0.7808053493499756, "learning_rate": 4.906082267579047e-05, "loss": 0.1366, "step": 27020 }, { "epoch": 0.9822661530634493, "grad_norm": 1.4547855854034424, "learning_rate": 4.9059413155713626e-05, "loss": 0.1472, "step": 27030 }, { "epoch": 0.9826295515662475, "grad_norm": 0.5997300148010254, "learning_rate": 4.9058002599002275e-05, "loss": 0.1455, "step": 27040 }, { "epoch": 0.9829929500690457, "grad_norm": 12.663732528686523, "learning_rate": 4.90565910057172e-05, "loss": 0.2728, "step": 27050 }, { "epoch": 0.9833563485718438, "grad_norm": 0.9098420739173889, "learning_rate": 4.9055178375919196e-05, "loss": 0.1467, "step": 27060 }, { "epoch": 0.983719747074642, "grad_norm": 3.4135582447052, "learning_rate": 4.9053764709669156e-05, "loss": 0.2571, "step": 27070 }, { "epoch": 0.9840831455774403, "grad_norm": 1.7984899282455444, "learning_rate": 4.905235000702798e-05, "loss": 0.2026, "step": 27080 }, { "epoch": 0.9844465440802384, "grad_norm": 1.2022316455841064, "learning_rate": 4.9050934268056615e-05, "loss": 0.1378, "step": 27090 }, { "epoch": 0.9848099425830366, "grad_norm": 1.499773621559143, "learning_rate": 4.9049517492816066e-05, "loss": 0.1765, "step": 27100 }, { "epoch": 0.9851733410858348, "grad_norm": 0.8122308254241943, "learning_rate": 4.9048099681367377e-05, "loss": 0.1422, "step": 27110 }, { "epoch": 0.9855367395886329, "grad_norm": 1.203873872756958, "learning_rate": 4.904668083377164e-05, "loss": 0.1436, "step": 27120 }, { "epoch": 0.9859001380914311, "grad_norm": 1.6162346601486206, "learning_rate": 4.9045260950089976e-05, "loss": 0.1667, "step": 27130 }, { "epoch": 0.9862635365942293, "grad_norm": 0.5100680589675903, "learning_rate": 4.904384003038358e-05, "loss": 0.1573, "step": 27140 }, { "epoch": 0.9866269350970274, "grad_norm": 6.364781379699707, "learning_rate": 4.904241807471366e-05, "loss": 0.2245, "step": 27150 }, { "epoch": 0.9869903335998256, "grad_norm": 0.6124529242515564, "learning_rate": 4.9040995083141495e-05, "loss": 0.2296, "step": 27160 }, { "epoch": 0.9873537321026238, "grad_norm": 1.3477269411087036, "learning_rate": 4.903957105572838e-05, "loss": 0.1173, "step": 27170 }, { "epoch": 0.9877171306054219, "grad_norm": 0.8505461812019348, "learning_rate": 4.903814599253569e-05, "loss": 0.1913, "step": 27180 }, { "epoch": 0.9880805291082201, "grad_norm": 0.928269624710083, "learning_rate": 4.903671989362481e-05, "loss": 0.1449, "step": 27190 }, { "epoch": 0.9884439276110183, "grad_norm": 9.115983963012695, "learning_rate": 4.903529275905719e-05, "loss": 0.2025, "step": 27200 }, { "epoch": 0.9888073261138164, "grad_norm": 0.8631362318992615, "learning_rate": 4.903386458889434e-05, "loss": 0.1313, "step": 27210 }, { "epoch": 0.9891707246166146, "grad_norm": 1.5814399719238281, "learning_rate": 4.9032435383197764e-05, "loss": 0.1547, "step": 27220 }, { "epoch": 0.9895341231194128, "grad_norm": 2.2507669925689697, "learning_rate": 4.9031005142029054e-05, "loss": 2.0754, "step": 27230 }, { "epoch": 0.9898975216222109, "grad_norm": 2.0611305236816406, "learning_rate": 4.902957386544984e-05, "loss": 0.1351, "step": 27240 }, { "epoch": 0.9902609201250091, "grad_norm": 4.219666481018066, "learning_rate": 4.9028141553521785e-05, "loss": 0.194, "step": 27250 }, { "epoch": 0.9906243186278072, "grad_norm": 2.4156904220581055, "learning_rate": 4.90267082063066e-05, "loss": 1.9594, "step": 27260 }, { "epoch": 0.9909877171306054, "grad_norm": 4.805545806884766, "learning_rate": 4.9025273823866046e-05, "loss": 0.1608, "step": 27270 }, { "epoch": 0.9913511156334036, "grad_norm": 3.431521415710449, "learning_rate": 4.902383840626193e-05, "loss": 0.1439, "step": 27280 }, { "epoch": 0.9917145141362017, "grad_norm": 0.9847241640090942, "learning_rate": 4.902240195355609e-05, "loss": 0.1568, "step": 27290 }, { "epoch": 0.9920779126389999, "grad_norm": 4.65169095993042, "learning_rate": 4.9020964465810426e-05, "loss": 0.2039, "step": 27300 }, { "epoch": 0.9924413111417981, "grad_norm": 1.466956377029419, "learning_rate": 4.9019525943086865e-05, "loss": 0.1649, "step": 27310 }, { "epoch": 0.9928047096445962, "grad_norm": 4.803518772125244, "learning_rate": 4.901808638544739e-05, "loss": 0.1645, "step": 27320 }, { "epoch": 0.9931681081473944, "grad_norm": 3.4496331214904785, "learning_rate": 4.901664579295404e-05, "loss": 0.1751, "step": 27330 }, { "epoch": 0.9935315066501926, "grad_norm": 0.9507334232330322, "learning_rate": 4.9015204165668866e-05, "loss": 0.1228, "step": 27340 }, { "epoch": 0.9938949051529907, "grad_norm": 5.97396993637085, "learning_rate": 4.901376150365399e-05, "loss": 0.2178, "step": 27350 }, { "epoch": 0.9942583036557889, "grad_norm": 1.7720214128494263, "learning_rate": 4.9012317806971573e-05, "loss": 0.1468, "step": 27360 }, { "epoch": 0.9946217021585871, "grad_norm": 0.9194307923316956, "learning_rate": 4.9010873075683825e-05, "loss": 0.1481, "step": 27370 }, { "epoch": 0.9949851006613852, "grad_norm": 2.8458971977233887, "learning_rate": 4.9009427309852986e-05, "loss": 0.1402, "step": 27380 }, { "epoch": 0.9953484991641834, "grad_norm": 1.9232338666915894, "learning_rate": 4.900798050954134e-05, "loss": 0.155, "step": 27390 }, { "epoch": 0.9957118976669816, "grad_norm": 4.017787456512451, "learning_rate": 4.900653267481125e-05, "loss": 0.2279, "step": 27400 }, { "epoch": 0.9960752961697797, "grad_norm": 0.714726448059082, "learning_rate": 4.9005083805725064e-05, "loss": 0.1271, "step": 27410 }, { "epoch": 0.996438694672578, "grad_norm": 0.8059016466140747, "learning_rate": 4.900363390234524e-05, "loss": 0.138, "step": 27420 }, { "epoch": 0.9968020931753762, "grad_norm": 2.650024175643921, "learning_rate": 4.9002182964734234e-05, "loss": 0.1483, "step": 27430 }, { "epoch": 0.9971654916781743, "grad_norm": 1.200749397277832, "learning_rate": 4.900073099295456e-05, "loss": 0.9832, "step": 27440 }, { "epoch": 0.9975288901809725, "grad_norm": 3.3051798343658447, "learning_rate": 4.8999277987068785e-05, "loss": 0.2136, "step": 27450 }, { "epoch": 0.9978922886837707, "grad_norm": 1.3630801439285278, "learning_rate": 4.899782394713951e-05, "loss": 0.2136, "step": 27460 }, { "epoch": 0.9982556871865688, "grad_norm": 2.5952398777008057, "learning_rate": 4.899636887322939e-05, "loss": 0.1515, "step": 27470 }, { "epoch": 0.998619085689367, "grad_norm": 5.025683879852295, "learning_rate": 4.8994912765401116e-05, "loss": 0.1862, "step": 27480 }, { "epoch": 0.9989824841921652, "grad_norm": 1.1604958772659302, "learning_rate": 4.8993455623717415e-05, "loss": 0.1433, "step": 27490 }, { "epoch": 0.9993458826949633, "grad_norm": 14.864492416381836, "learning_rate": 4.899199744824109e-05, "loss": 0.1752, "step": 27500 }, { "epoch": 0.9997092811977615, "grad_norm": 1.072911024093628, "learning_rate": 4.8990538239034956e-05, "loss": 0.1524, "step": 27510 }, { "epoch": 1.0000726797005597, "grad_norm": 0.41248244047164917, "learning_rate": 4.898907799616188e-05, "loss": 0.1457, "step": 27520 }, { "epoch": 1.000436078203358, "grad_norm": 1.0402699708938599, "learning_rate": 4.89876167196848e-05, "loss": 0.1394, "step": 27530 }, { "epoch": 1.0007994767061559, "grad_norm": 0.8177555203437805, "learning_rate": 4.8986154409666654e-05, "loss": 0.1134, "step": 27540 }, { "epoch": 1.001162875208954, "grad_norm": 1.1209142208099365, "learning_rate": 4.8984691066170465e-05, "loss": 0.1574, "step": 27550 }, { "epoch": 1.0015262737117523, "grad_norm": 1.4969863891601562, "learning_rate": 4.8983226689259264e-05, "loss": 0.1144, "step": 27560 }, { "epoch": 1.0018896722145505, "grad_norm": 0.8014885783195496, "learning_rate": 4.898176127899617e-05, "loss": 0.1616, "step": 27570 }, { "epoch": 1.0022530707173487, "grad_norm": 1.1477352380752563, "learning_rate": 4.89802948354443e-05, "loss": 0.1209, "step": 27580 }, { "epoch": 1.002616469220147, "grad_norm": 1.0199166536331177, "learning_rate": 4.897882735866686e-05, "loss": 0.1422, "step": 27590 }, { "epoch": 1.002979867722945, "grad_norm": 0.8987438678741455, "learning_rate": 4.897735884872705e-05, "loss": 0.1328, "step": 27600 }, { "epoch": 1.002979867722945, "eval_loss": 0.38110727071762085, "eval_runtime": 180.6687, "eval_samples_per_second": 41.036, "eval_steps_per_second": 5.131, "eval_wer": 0.18068691342785048, "step": 27600 }, { "epoch": 1.003343266225743, "grad_norm": 1.1313180923461914, "learning_rate": 4.897588930568817e-05, "loss": 0.1395, "step": 27610 }, { "epoch": 1.0037066647285413, "grad_norm": 0.6739907264709473, "learning_rate": 4.8974418729613526e-05, "loss": 0.2011, "step": 27620 }, { "epoch": 1.0040700632313395, "grad_norm": 0.986926257610321, "learning_rate": 4.8972947120566475e-05, "loss": 0.1212, "step": 27630 }, { "epoch": 1.0044334617341377, "grad_norm": 0.795300304889679, "learning_rate": 4.8971474478610437e-05, "loss": 0.1404, "step": 27640 }, { "epoch": 1.004796860236936, "grad_norm": 1.7036499977111816, "learning_rate": 4.897000080380885e-05, "loss": 0.1677, "step": 27650 }, { "epoch": 1.005160258739734, "grad_norm": 1.4313631057739258, "learning_rate": 4.896852609622521e-05, "loss": 0.1188, "step": 27660 }, { "epoch": 1.0055236572425321, "grad_norm": 0.5508180260658264, "learning_rate": 4.896705035592306e-05, "loss": 0.1627, "step": 27670 }, { "epoch": 1.0058870557453303, "grad_norm": 2.3307416439056396, "learning_rate": 4.896557358296599e-05, "loss": 0.1173, "step": 27680 }, { "epoch": 1.0062504542481285, "grad_norm": 3.0311474800109863, "learning_rate": 4.896409577741762e-05, "loss": 0.1176, "step": 27690 }, { "epoch": 1.0066138527509267, "grad_norm": 1.8580576181411743, "learning_rate": 4.896261693934163e-05, "loss": 0.1647, "step": 27700 }, { "epoch": 1.0069772512537247, "grad_norm": 1.094754934310913, "learning_rate": 4.896113706880174e-05, "loss": 0.1137, "step": 27710 }, { "epoch": 1.007340649756523, "grad_norm": 0.8240002393722534, "learning_rate": 4.89596561658617e-05, "loss": 0.1835, "step": 27720 }, { "epoch": 1.0077040482593211, "grad_norm": 1.4678568840026855, "learning_rate": 4.895817423058533e-05, "loss": 0.1612, "step": 27730 }, { "epoch": 1.0080674467621193, "grad_norm": 0.8481863737106323, "learning_rate": 4.8956691263036473e-05, "loss": 0.1211, "step": 27740 }, { "epoch": 1.0084308452649176, "grad_norm": 5.045682907104492, "learning_rate": 4.895520726327903e-05, "loss": 0.2747, "step": 27750 }, { "epoch": 1.0087942437677158, "grad_norm": 2.3443167209625244, "learning_rate": 4.895372223137694e-05, "loss": 0.1437, "step": 27760 }, { "epoch": 1.0091576422705137, "grad_norm": 1.6994588375091553, "learning_rate": 4.895223616739418e-05, "loss": 0.16, "step": 27770 }, { "epoch": 1.009521040773312, "grad_norm": 2.073699712753296, "learning_rate": 4.8950749071394794e-05, "loss": 0.1341, "step": 27780 }, { "epoch": 1.0098844392761102, "grad_norm": 1.1939536333084106, "learning_rate": 4.894926094344284e-05, "loss": 0.1284, "step": 27790 }, { "epoch": 1.0102478377789084, "grad_norm": 1.4820387363433838, "learning_rate": 4.8947771783602444e-05, "loss": 0.1644, "step": 27800 }, { "epoch": 1.0106112362817066, "grad_norm": 1.8140612840652466, "learning_rate": 4.894628159193778e-05, "loss": 0.1681, "step": 27810 }, { "epoch": 1.0109746347845048, "grad_norm": 1.7120946645736694, "learning_rate": 4.894479036851303e-05, "loss": 0.2066, "step": 27820 }, { "epoch": 1.0113380332873028, "grad_norm": 1.0871057510375977, "learning_rate": 4.894329811339247e-05, "loss": 0.1428, "step": 27830 }, { "epoch": 1.011701431790101, "grad_norm": 0.897597074508667, "learning_rate": 4.8941804826640375e-05, "loss": 0.1202, "step": 27840 }, { "epoch": 1.0120648302928992, "grad_norm": 1.2489410638809204, "learning_rate": 4.89403105083211e-05, "loss": 0.1435, "step": 27850 }, { "epoch": 1.0124282287956974, "grad_norm": 1.043281078338623, "learning_rate": 4.893881515849902e-05, "loss": 0.1657, "step": 27860 }, { "epoch": 1.0127916272984956, "grad_norm": 1.0345379114151, "learning_rate": 4.893731877723857e-05, "loss": 0.1669, "step": 27870 }, { "epoch": 1.0131550258012938, "grad_norm": 3.5156590938568115, "learning_rate": 4.893582136460423e-05, "loss": 0.1356, "step": 27880 }, { "epoch": 1.0135184243040918, "grad_norm": 1.0468858480453491, "learning_rate": 4.893432292066051e-05, "loss": 0.1201, "step": 27890 }, { "epoch": 1.01388182280689, "grad_norm": 1.3872016668319702, "learning_rate": 4.893282344547197e-05, "loss": 0.1472, "step": 27900 }, { "epoch": 1.0142452213096882, "grad_norm": 0.83976811170578, "learning_rate": 4.893132293910322e-05, "loss": 0.1467, "step": 27910 }, { "epoch": 1.0146086198124864, "grad_norm": 0.625514566898346, "learning_rate": 4.892982140161892e-05, "loss": 0.1661, "step": 27920 }, { "epoch": 1.0149720183152846, "grad_norm": 1.0802186727523804, "learning_rate": 4.892831883308375e-05, "loss": 0.1444, "step": 27930 }, { "epoch": 1.0153354168180828, "grad_norm": 0.567722499370575, "learning_rate": 4.892681523356246e-05, "loss": 0.1003, "step": 27940 }, { "epoch": 1.0156988153208808, "grad_norm": 1.1036186218261719, "learning_rate": 4.892531060311985e-05, "loss": 0.1438, "step": 27950 }, { "epoch": 1.016062213823679, "grad_norm": 1.2610325813293457, "learning_rate": 4.892380494182071e-05, "loss": 0.1478, "step": 27960 }, { "epoch": 1.0164256123264772, "grad_norm": 4.7541913986206055, "learning_rate": 4.892229824972995e-05, "loss": 0.2068, "step": 27970 }, { "epoch": 1.0167890108292754, "grad_norm": 14.794916152954102, "learning_rate": 4.8920790526912464e-05, "loss": 0.4626, "step": 27980 }, { "epoch": 1.0171524093320736, "grad_norm": 0.8083056807518005, "learning_rate": 4.891928177343323e-05, "loss": 0.4919, "step": 27990 }, { "epoch": 1.0175158078348718, "grad_norm": 1.1072735786437988, "learning_rate": 4.8917771989357246e-05, "loss": 0.2863, "step": 28000 }, { "epoch": 1.0178792063376698, "grad_norm": 0.8811991810798645, "learning_rate": 4.891626117474957e-05, "loss": 0.1361, "step": 28010 }, { "epoch": 1.018242604840468, "grad_norm": 0.43256062269210815, "learning_rate": 4.8914749329675294e-05, "loss": 0.1668, "step": 28020 }, { "epoch": 1.0186060033432662, "grad_norm": 1.7490280866622925, "learning_rate": 4.891323645419956e-05, "loss": 0.1328, "step": 28030 }, { "epoch": 1.0189694018460644, "grad_norm": 1.5770010948181152, "learning_rate": 4.891172254838755e-05, "loss": 0.1429, "step": 28040 }, { "epoch": 1.0193328003488626, "grad_norm": 0.5603241920471191, "learning_rate": 4.8910207612304495e-05, "loss": 0.1319, "step": 28050 }, { "epoch": 1.0196961988516606, "grad_norm": 1.5490175485610962, "learning_rate": 4.890869164601566e-05, "loss": 0.1292, "step": 28060 }, { "epoch": 1.0200595973544588, "grad_norm": 0.7562422752380371, "learning_rate": 4.8907174649586376e-05, "loss": 0.1978, "step": 28070 }, { "epoch": 1.020422995857257, "grad_norm": 2.67669415473938, "learning_rate": 4.8905656623082e-05, "loss": 0.1367, "step": 28080 }, { "epoch": 1.0207863943600552, "grad_norm": 1.4589964151382446, "learning_rate": 4.890413756656793e-05, "loss": 0.136, "step": 28090 }, { "epoch": 1.0211497928628535, "grad_norm": 0.5042529702186584, "learning_rate": 4.8902617480109626e-05, "loss": 0.1768, "step": 28100 }, { "epoch": 1.0215131913656517, "grad_norm": 3.3886609077453613, "learning_rate": 4.890109636377258e-05, "loss": 0.1827, "step": 28110 }, { "epoch": 1.0218765898684496, "grad_norm": 0.8882365226745605, "learning_rate": 4.889957421762234e-05, "loss": 0.2176, "step": 28120 }, { "epoch": 1.0222399883712479, "grad_norm": 1.5471583604812622, "learning_rate": 4.889805104172447e-05, "loss": 0.1934, "step": 28130 }, { "epoch": 1.022603386874046, "grad_norm": 1.221699595451355, "learning_rate": 4.889652683614461e-05, "loss": 0.1217, "step": 28140 }, { "epoch": 1.0229667853768443, "grad_norm": 1.1075172424316406, "learning_rate": 4.8895001600948444e-05, "loss": 2.944, "step": 28150 }, { "epoch": 1.0233301838796425, "grad_norm": 0.9731149077415466, "learning_rate": 4.889347533620167e-05, "loss": 0.1123, "step": 28160 }, { "epoch": 1.0236935823824407, "grad_norm": 0.8448407649993896, "learning_rate": 4.889194804197006e-05, "loss": 0.1755, "step": 28170 }, { "epoch": 1.0240569808852387, "grad_norm": 0.8480188250541687, "learning_rate": 4.8890419718319414e-05, "loss": 0.1434, "step": 28180 }, { "epoch": 1.0244203793880369, "grad_norm": 1.5608705282211304, "learning_rate": 4.8888890365315584e-05, "loss": 0.1309, "step": 28190 }, { "epoch": 1.024783777890835, "grad_norm": 7.765607833862305, "learning_rate": 4.888735998302447e-05, "loss": 0.1584, "step": 28200 }, { "epoch": 1.024783777890835, "eval_loss": 0.3628901541233063, "eval_runtime": 180.7805, "eval_samples_per_second": 41.011, "eval_steps_per_second": 5.128, "eval_wer": 0.1830921996115236, "step": 28200 }, { "epoch": 1.0251471763936333, "grad_norm": 2.208989381790161, "learning_rate": 4.8885828571512e-05, "loss": 0.1206, "step": 28210 }, { "epoch": 1.0255105748964315, "grad_norm": 0.4320629835128784, "learning_rate": 4.8884296130844166e-05, "loss": 0.2776, "step": 28220 }, { "epoch": 1.0258739733992297, "grad_norm": 1.4430392980575562, "learning_rate": 4.888276266108699e-05, "loss": 0.1181, "step": 28230 }, { "epoch": 1.0262373719020277, "grad_norm": 0.893260657787323, "learning_rate": 4.888122816230655e-05, "loss": 0.1141, "step": 28240 }, { "epoch": 1.026600770404826, "grad_norm": 1.9237782955169678, "learning_rate": 4.887969263456895e-05, "loss": 0.1676, "step": 28250 }, { "epoch": 1.026964168907624, "grad_norm": 1.0318949222564697, "learning_rate": 4.8878156077940376e-05, "loss": 0.1256, "step": 28260 }, { "epoch": 1.0273275674104223, "grad_norm": 0.8919249773025513, "learning_rate": 4.8876618492487e-05, "loss": 0.2314, "step": 28270 }, { "epoch": 1.0276909659132205, "grad_norm": 1.31845223903656, "learning_rate": 4.8875079878275085e-05, "loss": 0.1414, "step": 28280 }, { "epoch": 1.0280543644160187, "grad_norm": 8.070326805114746, "learning_rate": 4.887354023537094e-05, "loss": 0.2391, "step": 28290 }, { "epoch": 1.0284177629188167, "grad_norm": 0.7600485682487488, "learning_rate": 4.887199956384088e-05, "loss": 0.164, "step": 28300 }, { "epoch": 1.028781161421615, "grad_norm": 1.0197162628173828, "learning_rate": 4.88704578637513e-05, "loss": 0.1324, "step": 28310 }, { "epoch": 1.0291445599244131, "grad_norm": 0.5989790558815002, "learning_rate": 4.886891513516861e-05, "loss": 0.2162, "step": 28320 }, { "epoch": 1.0295079584272113, "grad_norm": 1.2145419120788574, "learning_rate": 4.88673713781593e-05, "loss": 1.7629, "step": 28330 }, { "epoch": 1.0298713569300095, "grad_norm": 0.7220103740692139, "learning_rate": 4.8865826592789876e-05, "loss": 0.105, "step": 28340 }, { "epoch": 1.0302347554328075, "grad_norm": 1.2737821340560913, "learning_rate": 4.88642807791269e-05, "loss": 0.172, "step": 28350 }, { "epoch": 1.0305981539356057, "grad_norm": 2.3391408920288086, "learning_rate": 4.886273393723698e-05, "loss": 0.1431, "step": 28360 }, { "epoch": 1.030961552438404, "grad_norm": 1.1937615871429443, "learning_rate": 4.8861186067186756e-05, "loss": 0.1776, "step": 28370 }, { "epoch": 1.0313249509412021, "grad_norm": 0.5789287090301514, "learning_rate": 4.885963716904292e-05, "loss": 0.1412, "step": 28380 }, { "epoch": 1.0316883494440003, "grad_norm": 1.2566107511520386, "learning_rate": 4.885808724287221e-05, "loss": 0.1284, "step": 28390 }, { "epoch": 1.0320517479467985, "grad_norm": 5.225760459899902, "learning_rate": 4.885653628874141e-05, "loss": 0.1411, "step": 28400 }, { "epoch": 1.0324151464495965, "grad_norm": 1.2525557279586792, "learning_rate": 4.885498430671735e-05, "loss": 0.1372, "step": 28410 }, { "epoch": 1.0327785449523947, "grad_norm": 0.5048568844795227, "learning_rate": 4.885343129686688e-05, "loss": 0.1595, "step": 28420 }, { "epoch": 1.033141943455193, "grad_norm": 0.8768513202667236, "learning_rate": 4.8851877259256933e-05, "loss": 0.286, "step": 28430 }, { "epoch": 1.0335053419579912, "grad_norm": 1.2799090147018433, "learning_rate": 4.885032219395446e-05, "loss": 0.1431, "step": 28440 }, { "epoch": 1.0338687404607894, "grad_norm": 0.9944593906402588, "learning_rate": 4.8848766101026466e-05, "loss": 0.13, "step": 28450 }, { "epoch": 1.0342321389635876, "grad_norm": 1.3601889610290527, "learning_rate": 4.8847208980539994e-05, "loss": 0.1379, "step": 28460 }, { "epoch": 1.0345955374663856, "grad_norm": 0.6347102522850037, "learning_rate": 4.884565083256213e-05, "loss": 0.1833, "step": 28470 }, { "epoch": 1.0349589359691838, "grad_norm": NaN, "learning_rate": 4.884424762093241e-05, "loss": 3.779, "step": 28480 }, { "epoch": 1.035322334471982, "grad_norm": 1.6947808265686035, "learning_rate": 4.8842687520905906e-05, "loss": 0.1571, "step": 28490 }, { "epoch": 1.0356857329747802, "grad_norm": 4.521624565124512, "learning_rate": 4.884112639358283e-05, "loss": 0.1429, "step": 28500 }, { "epoch": 1.0360491314775784, "grad_norm": 1.9370489120483398, "learning_rate": 4.883956423903044e-05, "loss": 0.1375, "step": 28510 }, { "epoch": 1.0364125299803766, "grad_norm": 2.3492047786712646, "learning_rate": 4.883800105731606e-05, "loss": 0.1496, "step": 28520 }, { "epoch": 1.0367759284831746, "grad_norm": 1.1862452030181885, "learning_rate": 4.8836436848507026e-05, "loss": 0.1239, "step": 28530 }, { "epoch": 1.0371393269859728, "grad_norm": 2.223708391189575, "learning_rate": 4.883487161267074e-05, "loss": 0.1159, "step": 28540 }, { "epoch": 1.037502725488771, "grad_norm": 5.854187965393066, "learning_rate": 4.8833305349874636e-05, "loss": 0.1732, "step": 28550 }, { "epoch": 1.0378661239915692, "grad_norm": 1.4000542163848877, "learning_rate": 4.883173806018621e-05, "loss": 0.1428, "step": 28560 }, { "epoch": 1.0382295224943674, "grad_norm": 1.8862130641937256, "learning_rate": 4.883016974367298e-05, "loss": 0.2339, "step": 28570 }, { "epoch": 1.0385929209971656, "grad_norm": 1.701545238494873, "learning_rate": 4.8828600400402525e-05, "loss": 0.2063, "step": 28580 }, { "epoch": 1.0389563194999636, "grad_norm": 3.8795692920684814, "learning_rate": 4.8827030030442466e-05, "loss": 0.1317, "step": 28590 }, { "epoch": 1.0393197180027618, "grad_norm": 1.0597456693649292, "learning_rate": 4.882545863386046e-05, "loss": 0.1783, "step": 28600 }, { "epoch": 1.03968311650556, "grad_norm": 0.8949028849601746, "learning_rate": 4.88238862107242e-05, "loss": 0.1453, "step": 28610 }, { "epoch": 1.0400465150083582, "grad_norm": 0.6270145773887634, "learning_rate": 4.8822312761101456e-05, "loss": 0.2118, "step": 28620 }, { "epoch": 1.0404099135111564, "grad_norm": 0.8819754719734192, "learning_rate": 4.8820738285060016e-05, "loss": 0.1398, "step": 28630 }, { "epoch": 1.0407733120139544, "grad_norm": 1.5963236093521118, "learning_rate": 4.881916278266772e-05, "loss": 0.1592, "step": 28640 }, { "epoch": 1.0411367105167526, "grad_norm": 1.2960532903671265, "learning_rate": 4.8817586253992445e-05, "loss": 0.2044, "step": 28650 }, { "epoch": 1.0415001090195508, "grad_norm": 1.6735124588012695, "learning_rate": 4.881600869910212e-05, "loss": 0.143, "step": 28660 }, { "epoch": 1.041863507522349, "grad_norm": 1.2382493019104004, "learning_rate": 4.8814430118064724e-05, "loss": 0.182, "step": 28670 }, { "epoch": 1.0422269060251472, "grad_norm": 1.614788293838501, "learning_rate": 4.881285051094826e-05, "loss": 0.149, "step": 28680 }, { "epoch": 1.0425903045279454, "grad_norm": 1.549124002456665, "learning_rate": 4.88112698778208e-05, "loss": 0.1238, "step": 28690 }, { "epoch": 1.0429537030307434, "grad_norm": 0.8877584338188171, "learning_rate": 4.8809688218750435e-05, "loss": 0.1541, "step": 28700 }, { "epoch": 1.0433171015335416, "grad_norm": 1.1061103343963623, "learning_rate": 4.8808105533805325e-05, "loss": 0.1209, "step": 28710 }, { "epoch": 1.0436805000363398, "grad_norm": 1.8957878351211548, "learning_rate": 4.880652182305365e-05, "loss": 0.1739, "step": 28720 }, { "epoch": 1.044043898539138, "grad_norm": 0.9069591164588928, "learning_rate": 4.880493708656366e-05, "loss": 0.2014, "step": 28730 }, { "epoch": 1.0444072970419362, "grad_norm": 0.7086552381515503, "learning_rate": 4.880335132440364e-05, "loss": 0.1149, "step": 28740 }, { "epoch": 1.0447706955447345, "grad_norm": 0.5514993667602539, "learning_rate": 4.8801764536641883e-05, "loss": 0.163, "step": 28750 }, { "epoch": 1.0451340940475324, "grad_norm": 0.5786269903182983, "learning_rate": 4.880017672334679e-05, "loss": 0.126, "step": 28760 }, { "epoch": 1.0454974925503306, "grad_norm": 0.8554352521896362, "learning_rate": 4.879858788458676e-05, "loss": 0.2564, "step": 28770 }, { "epoch": 1.0458608910531288, "grad_norm": 3.329148769378662, "learning_rate": 4.8796998020430253e-05, "loss": 0.1297, "step": 28780 }, { "epoch": 1.046224289555927, "grad_norm": 1.1520358324050903, "learning_rate": 4.879540713094578e-05, "loss": 0.1156, "step": 28790 }, { "epoch": 1.0465876880587253, "grad_norm": 1.6375194787979126, "learning_rate": 4.879381521620187e-05, "loss": 0.1418, "step": 28800 }, { "epoch": 1.0465876880587253, "eval_loss": 0.35767313838005066, "eval_runtime": 180.3335, "eval_samples_per_second": 41.113, "eval_steps_per_second": 5.14, "eval_wer": 0.18009693757147785, "step": 28800 }, { "epoch": 1.0469510865615235, "grad_norm": 2.6099300384521484, "learning_rate": 4.879222227626712e-05, "loss": 2.0354, "step": 28810 }, { "epoch": 1.0473144850643215, "grad_norm": 0.9497049450874329, "learning_rate": 4.879062831121017e-05, "loss": 0.2014, "step": 28820 }, { "epoch": 1.0476778835671197, "grad_norm": 1.100393533706665, "learning_rate": 4.878903332109969e-05, "loss": 0.1294, "step": 28830 }, { "epoch": 1.0480412820699179, "grad_norm": 0.46238216757774353, "learning_rate": 4.87874373060044e-05, "loss": 0.1103, "step": 28840 }, { "epoch": 1.048404680572716, "grad_norm": 1.111619234085083, "learning_rate": 4.8785840265993085e-05, "loss": 0.1635, "step": 28850 }, { "epoch": 1.0487680790755143, "grad_norm": 1.8693902492523193, "learning_rate": 4.8784242201134534e-05, "loss": 0.1145, "step": 28860 }, { "epoch": 1.0491314775783125, "grad_norm": 0.5382725596427917, "learning_rate": 4.878264311149762e-05, "loss": 0.1699, "step": 28870 }, { "epoch": 1.0494948760811105, "grad_norm": 1.3384134769439697, "learning_rate": 4.878104299715123e-05, "loss": 0.1479, "step": 28880 }, { "epoch": 1.0498582745839087, "grad_norm": 3.7112338542938232, "learning_rate": 4.87794418581643e-05, "loss": 2.828, "step": 28890 }, { "epoch": 1.0502216730867069, "grad_norm": 0.8874093890190125, "learning_rate": 4.8777839694605844e-05, "loss": 0.1274, "step": 28900 }, { "epoch": 1.050585071589505, "grad_norm": 2.278064489364624, "learning_rate": 4.877623650654487e-05, "loss": 0.1298, "step": 28910 }, { "epoch": 1.0509484700923033, "grad_norm": 0.8750000596046448, "learning_rate": 4.877463229405046e-05, "loss": 0.2705, "step": 28920 }, { "epoch": 1.0513118685951013, "grad_norm": 0.5634777545928955, "learning_rate": 4.8773027057191735e-05, "loss": 0.13, "step": 28930 }, { "epoch": 1.0516752670978995, "grad_norm": 1.1990102529525757, "learning_rate": 4.877142079603786e-05, "loss": 0.1115, "step": 28940 }, { "epoch": 1.0520386656006977, "grad_norm": 5.793541431427002, "learning_rate": 4.8769813510658035e-05, "loss": 0.1909, "step": 28950 }, { "epoch": 1.052402064103496, "grad_norm": 1.0433887243270874, "learning_rate": 4.876820520112153e-05, "loss": 0.1225, "step": 28960 }, { "epoch": 1.0527654626062941, "grad_norm": 0.8786159753799438, "learning_rate": 4.8766595867497624e-05, "loss": 0.1772, "step": 28970 }, { "epoch": 1.0531288611090923, "grad_norm": 1.1270724534988403, "learning_rate": 4.8764985509855664e-05, "loss": 0.1683, "step": 28980 }, { "epoch": 1.0534922596118903, "grad_norm": 0.9916827082633972, "learning_rate": 4.876337412826504e-05, "loss": 0.1834, "step": 28990 }, { "epoch": 1.0538556581146885, "grad_norm": 1.1295456886291504, "learning_rate": 4.876176172279517e-05, "loss": 0.1677, "step": 29000 }, { "epoch": 1.0542190566174867, "grad_norm": 1.625546932220459, "learning_rate": 4.876014829351553e-05, "loss": 0.1374, "step": 29010 }, { "epoch": 1.054582455120285, "grad_norm": 0.4282989799976349, "learning_rate": 4.875853384049564e-05, "loss": 0.1826, "step": 29020 }, { "epoch": 1.0549458536230831, "grad_norm": 0.8806937336921692, "learning_rate": 4.875691836380507e-05, "loss": 0.116, "step": 29030 }, { "epoch": 1.0553092521258813, "grad_norm": 0.5082537531852722, "learning_rate": 4.87553018635134e-05, "loss": 0.1867, "step": 29040 }, { "epoch": 1.0556726506286793, "grad_norm": 3.172614336013794, "learning_rate": 4.875368433969031e-05, "loss": 0.1872, "step": 29050 }, { "epoch": 1.0560360491314775, "grad_norm": 4.570537090301514, "learning_rate": 4.875206579240546e-05, "loss": 0.1501, "step": 29060 }, { "epoch": 1.0563994476342757, "grad_norm": 0.9751003980636597, "learning_rate": 4.875044622172862e-05, "loss": 0.1533, "step": 29070 }, { "epoch": 1.056762846137074, "grad_norm": 0.9446988701820374, "learning_rate": 4.874882562772955e-05, "loss": 0.1462, "step": 29080 }, { "epoch": 1.0571262446398721, "grad_norm": 5.769078254699707, "learning_rate": 4.8747204010478086e-05, "loss": 0.1796, "step": 29090 }, { "epoch": 1.0574896431426704, "grad_norm": 6.486478328704834, "learning_rate": 4.8745581370044094e-05, "loss": 0.1674, "step": 29100 }, { "epoch": 1.0578530416454683, "grad_norm": 0.622352123260498, "learning_rate": 4.874395770649748e-05, "loss": 0.137, "step": 29110 }, { "epoch": 1.0582164401482665, "grad_norm": 0.5244133472442627, "learning_rate": 4.8742333019908215e-05, "loss": 0.2035, "step": 29120 }, { "epoch": 1.0585798386510648, "grad_norm": 1.7058534622192383, "learning_rate": 4.87407073103463e-05, "loss": 0.1408, "step": 29130 }, { "epoch": 1.058943237153863, "grad_norm": 0.9428019523620605, "learning_rate": 4.873908057788177e-05, "loss": 0.1128, "step": 29140 }, { "epoch": 1.0593066356566612, "grad_norm": 0.4694746136665344, "learning_rate": 4.8737452822584724e-05, "loss": 0.142, "step": 29150 }, { "epoch": 1.0596700341594594, "grad_norm": 1.3985977172851562, "learning_rate": 4.873582404452529e-05, "loss": 0.1169, "step": 29160 }, { "epoch": 1.0600334326622574, "grad_norm": 0.8285462856292725, "learning_rate": 4.873419424377366e-05, "loss": 0.1945, "step": 29170 }, { "epoch": 1.0603968311650556, "grad_norm": 1.657012939453125, "learning_rate": 4.8732563420400037e-05, "loss": 0.1904, "step": 29180 }, { "epoch": 1.0607602296678538, "grad_norm": 1.4633735418319702, "learning_rate": 4.87309315744747e-05, "loss": 0.1183, "step": 29190 }, { "epoch": 1.061123628170652, "grad_norm": 0.9722393155097961, "learning_rate": 4.872929870606796e-05, "loss": 0.1608, "step": 29200 }, { "epoch": 1.0614870266734502, "grad_norm": 0.6080673933029175, "learning_rate": 4.872766481525016e-05, "loss": 0.1198, "step": 29210 }, { "epoch": 1.0618504251762482, "grad_norm": 0.7502457499504089, "learning_rate": 4.8726029902091715e-05, "loss": 0.1893, "step": 29220 }, { "epoch": 1.0622138236790464, "grad_norm": 1.7775638103485107, "learning_rate": 4.8724393966663054e-05, "loss": 1.4777, "step": 29230 }, { "epoch": 1.0625772221818446, "grad_norm": 1.1095236539840698, "learning_rate": 4.8722757009034666e-05, "loss": 0.1443, "step": 29240 }, { "epoch": 1.0629406206846428, "grad_norm": 0.6879424452781677, "learning_rate": 4.872111902927709e-05, "loss": 0.2048, "step": 29250 }, { "epoch": 1.063304019187441, "grad_norm": 1.2532442808151245, "learning_rate": 4.8719480027460895e-05, "loss": 0.1343, "step": 29260 }, { "epoch": 1.0636674176902392, "grad_norm": 1.0296350717544556, "learning_rate": 4.87178400036567e-05, "loss": 0.1656, "step": 29270 }, { "epoch": 1.0640308161930372, "grad_norm": 1.0346356630325317, "learning_rate": 4.871619895793517e-05, "loss": 0.1466, "step": 29280 }, { "epoch": 1.0643942146958354, "grad_norm": 1.9428579807281494, "learning_rate": 4.8714556890367e-05, "loss": 0.1496, "step": 29290 }, { "epoch": 1.0647576131986336, "grad_norm": 2.6400890350341797, "learning_rate": 4.871291380102295e-05, "loss": 0.1857, "step": 29300 }, { "epoch": 1.0651210117014318, "grad_norm": 7.17543888092041, "learning_rate": 4.8711269689973826e-05, "loss": 0.1242, "step": 29310 }, { "epoch": 1.06548441020423, "grad_norm": 1.8619358539581299, "learning_rate": 4.870962455729045e-05, "loss": 0.2137, "step": 29320 }, { "epoch": 1.0658478087070282, "grad_norm": 4.936455726623535, "learning_rate": 4.8707978403043716e-05, "loss": 0.1551, "step": 29330 }, { "epoch": 1.0662112072098262, "grad_norm": 1.2196155786514282, "learning_rate": 4.8706331227304533e-05, "loss": 0.184, "step": 29340 }, { "epoch": 1.0665746057126244, "grad_norm": 2.0982654094696045, "learning_rate": 4.87046830301439e-05, "loss": 0.1566, "step": 29350 }, { "epoch": 1.0669380042154226, "grad_norm": 1.0265774726867676, "learning_rate": 4.8703033811632806e-05, "loss": 0.1263, "step": 29360 }, { "epoch": 1.0673014027182208, "grad_norm": 2.413862705230713, "learning_rate": 4.870138357184233e-05, "loss": 0.135, "step": 29370 }, { "epoch": 1.067664801221019, "grad_norm": 0.800736665725708, "learning_rate": 4.869973231084356e-05, "loss": 0.1169, "step": 29380 }, { "epoch": 1.0680281997238172, "grad_norm": 4.07125186920166, "learning_rate": 4.8698080028707647e-05, "loss": 0.1745, "step": 29390 }, { "epoch": 1.0683915982266152, "grad_norm": 1.1997871398925781, "learning_rate": 4.8696426725505784e-05, "loss": 0.1427, "step": 29400 }, { "epoch": 1.0683915982266152, "eval_loss": 0.3479246199131012, "eval_runtime": 180.3605, "eval_samples_per_second": 41.107, "eval_steps_per_second": 5.14, "eval_wer": 0.1802693920525714, "step": 29400 }, { "epoch": 1.0687549967294134, "grad_norm": 1.5684832334518433, "learning_rate": 4.8694772401309205e-05, "loss": 0.1491, "step": 29410 }, { "epoch": 1.0691183952322116, "grad_norm": 1.3784462213516235, "learning_rate": 4.8693117056189194e-05, "loss": 0.1741, "step": 29420 }, { "epoch": 1.0694817937350098, "grad_norm": 2.0766236782073975, "learning_rate": 4.869146069021707e-05, "loss": 0.1375, "step": 29430 }, { "epoch": 1.069845192237808, "grad_norm": 0.6553940773010254, "learning_rate": 4.86898033034642e-05, "loss": 0.1371, "step": 29440 }, { "epoch": 1.0702085907406063, "grad_norm": 0.9652252197265625, "learning_rate": 4.868814489600199e-05, "loss": 0.1446, "step": 29450 }, { "epoch": 1.0705719892434042, "grad_norm": 1.123075008392334, "learning_rate": 4.8686485467901896e-05, "loss": 0.1628, "step": 29460 }, { "epoch": 1.0709353877462024, "grad_norm": 1.3370702266693115, "learning_rate": 4.868482501923543e-05, "loss": 0.1822, "step": 29470 }, { "epoch": 1.0712987862490007, "grad_norm": 1.1716543436050415, "learning_rate": 4.868316355007412e-05, "loss": 0.7928, "step": 29480 }, { "epoch": 1.0716621847517989, "grad_norm": 2.255791187286377, "learning_rate": 4.868150106048955e-05, "loss": 0.3897, "step": 29490 }, { "epoch": 1.072025583254597, "grad_norm": 2.2386605739593506, "learning_rate": 4.8679837550553366e-05, "loss": 0.174, "step": 29500 }, { "epoch": 1.072388981757395, "grad_norm": 0.7938382625579834, "learning_rate": 4.867817302033724e-05, "loss": 0.1274, "step": 29510 }, { "epoch": 1.0727523802601933, "grad_norm": 0.8619611263275146, "learning_rate": 4.8676507469912866e-05, "loss": 0.1756, "step": 29520 }, { "epoch": 1.0731157787629915, "grad_norm": 5.2337727546691895, "learning_rate": 4.867484089935205e-05, "loss": 0.1131, "step": 29530 }, { "epoch": 1.0734791772657897, "grad_norm": 1.339237093925476, "learning_rate": 4.867317330872656e-05, "loss": 0.2385, "step": 29540 }, { "epoch": 1.0738425757685879, "grad_norm": 0.9693569540977478, "learning_rate": 4.8671504698108266e-05, "loss": 0.1455, "step": 29550 }, { "epoch": 1.074205974271386, "grad_norm": 0.8684889674186707, "learning_rate": 4.866983506756906e-05, "loss": 0.1516, "step": 29560 }, { "epoch": 1.074569372774184, "grad_norm": 0.5518342852592468, "learning_rate": 4.866816441718088e-05, "loss": 0.2068, "step": 29570 }, { "epoch": 1.0749327712769823, "grad_norm": 1.049777865409851, "learning_rate": 4.86664927470157e-05, "loss": 0.1473, "step": 29580 }, { "epoch": 1.0752961697797805, "grad_norm": 3.8799684047698975, "learning_rate": 4.8664820057145556e-05, "loss": 0.1398, "step": 29590 }, { "epoch": 1.0756595682825787, "grad_norm": 0.9927829504013062, "learning_rate": 4.866314634764252e-05, "loss": 2.0386, "step": 29600 }, { "epoch": 1.076022966785377, "grad_norm": 1.2022935152053833, "learning_rate": 4.86614716185787e-05, "loss": 0.1539, "step": 29610 }, { "epoch": 1.076386365288175, "grad_norm": 0.7556710243225098, "learning_rate": 4.865979587002625e-05, "loss": 0.1718, "step": 29620 }, { "epoch": 1.076749763790973, "grad_norm": 1.0953086614608765, "learning_rate": 4.865811910205738e-05, "loss": 0.1537, "step": 29630 }, { "epoch": 1.0771131622937713, "grad_norm": 0.49788376688957214, "learning_rate": 4.865644131474434e-05, "loss": 0.1436, "step": 29640 }, { "epoch": 1.0774765607965695, "grad_norm": 2.5504343509674072, "learning_rate": 4.865476250815941e-05, "loss": 0.1535, "step": 29650 }, { "epoch": 1.0778399592993677, "grad_norm": 1.280085802078247, "learning_rate": 4.865308268237492e-05, "loss": 0.1389, "step": 29660 }, { "epoch": 1.078203357802166, "grad_norm": 0.4341859519481659, "learning_rate": 4.865140183746326e-05, "loss": 0.2016, "step": 29670 }, { "epoch": 1.0785667563049641, "grad_norm": 0.744679868221283, "learning_rate": 4.864971997349685e-05, "loss": 0.1216, "step": 29680 }, { "epoch": 1.078930154807762, "grad_norm": 1.449559211730957, "learning_rate": 4.8648037090548154e-05, "loss": 0.1202, "step": 29690 }, { "epoch": 1.0792935533105603, "grad_norm": 3.58284330368042, "learning_rate": 4.8646353188689674e-05, "loss": 0.145, "step": 29700 }, { "epoch": 1.0796569518133585, "grad_norm": 1.7318589687347412, "learning_rate": 4.864466826799398e-05, "loss": 0.1361, "step": 29710 }, { "epoch": 1.0800203503161567, "grad_norm": 0.44806694984436035, "learning_rate": 4.864298232853364e-05, "loss": 0.212, "step": 29720 }, { "epoch": 1.080383748818955, "grad_norm": 0.8236504197120667, "learning_rate": 4.864129537038132e-05, "loss": 0.1446, "step": 29730 }, { "epoch": 1.0807471473217531, "grad_norm": 0.920353889465332, "learning_rate": 4.863960739360971e-05, "loss": 0.1472, "step": 29740 }, { "epoch": 1.0811105458245511, "grad_norm": 1.415685772895813, "learning_rate": 4.8637918398291514e-05, "loss": 1.9346, "step": 29750 }, { "epoch": 1.0814739443273493, "grad_norm": 0.7517853379249573, "learning_rate": 4.8636228384499524e-05, "loss": 0.1276, "step": 29760 }, { "epoch": 1.0818373428301475, "grad_norm": 0.5632757544517517, "learning_rate": 4.8634537352306554e-05, "loss": 0.1385, "step": 29770 }, { "epoch": 1.0822007413329457, "grad_norm": 1.5767742395401, "learning_rate": 4.8632845301785455e-05, "loss": 0.1451, "step": 29780 }, { "epoch": 1.082564139835744, "grad_norm": 0.7501896619796753, "learning_rate": 4.8631152233009146e-05, "loss": 0.1337, "step": 29790 }, { "epoch": 1.082927538338542, "grad_norm": 0.7235280871391296, "learning_rate": 4.862945814605056e-05, "loss": 0.1727, "step": 29800 }, { "epoch": 1.0832909368413401, "grad_norm": 0.9608789682388306, "learning_rate": 4.86277630409827e-05, "loss": 0.1346, "step": 29810 }, { "epoch": 1.0836543353441384, "grad_norm": 0.5176007151603699, "learning_rate": 4.862606691787859e-05, "loss": 0.1477, "step": 29820 }, { "epoch": 1.0840177338469366, "grad_norm": 1.1901780366897583, "learning_rate": 4.862436977681133e-05, "loss": 0.1478, "step": 29830 }, { "epoch": 1.0843811323497348, "grad_norm": 4.09995698928833, "learning_rate": 4.8622671617854026e-05, "loss": 0.1369, "step": 29840 }, { "epoch": 1.084744530852533, "grad_norm": 10.049054145812988, "learning_rate": 4.8620972441079855e-05, "loss": 0.8392, "step": 29850 }, { "epoch": 1.085107929355331, "grad_norm": 1.131095051765442, "learning_rate": 4.861927224656202e-05, "loss": 0.1404, "step": 29860 }, { "epoch": 1.0854713278581292, "grad_norm": 1.2740205526351929, "learning_rate": 4.861757103437379e-05, "loss": 0.1726, "step": 29870 }, { "epoch": 1.0858347263609274, "grad_norm": 0.9203113317489624, "learning_rate": 4.861586880458845e-05, "loss": 0.1088, "step": 29880 }, { "epoch": 1.0861981248637256, "grad_norm": 0.8646379113197327, "learning_rate": 4.8614165557279345e-05, "loss": 0.124, "step": 29890 }, { "epoch": 1.0865615233665238, "grad_norm": 1.283758521080017, "learning_rate": 4.861246129251987e-05, "loss": 0.1616, "step": 29900 }, { "epoch": 1.086924921869322, "grad_norm": 2.0251550674438477, "learning_rate": 4.861075601038345e-05, "loss": 0.1475, "step": 29910 }, { "epoch": 1.08728832037212, "grad_norm": 0.7173452973365784, "learning_rate": 4.860904971094356e-05, "loss": 0.182, "step": 29920 }, { "epoch": 1.0876517188749182, "grad_norm": 0.7154909372329712, "learning_rate": 4.8607342394273725e-05, "loss": 0.1263, "step": 29930 }, { "epoch": 1.0880151173777164, "grad_norm": 2.5288286209106445, "learning_rate": 4.860563406044749e-05, "loss": 0.1289, "step": 29940 }, { "epoch": 1.0883785158805146, "grad_norm": 1.4772063493728638, "learning_rate": 4.860392470953848e-05, "loss": 0.1556, "step": 29950 }, { "epoch": 1.0887419143833128, "grad_norm": 1.623298168182373, "learning_rate": 4.8602214341620346e-05, "loss": 0.1493, "step": 29960 }, { "epoch": 1.089105312886111, "grad_norm": 0.4302707016468048, "learning_rate": 4.860050295676676e-05, "loss": 0.2303, "step": 29970 }, { "epoch": 1.089468711388909, "grad_norm": 1.4086140394210815, "learning_rate": 4.8598790555051474e-05, "loss": 0.1549, "step": 29980 }, { "epoch": 1.0898321098917072, "grad_norm": 1.1924636363983154, "learning_rate": 4.859707713654828e-05, "loss": 0.1426, "step": 29990 }, { "epoch": 1.0901955083945054, "grad_norm": 0.8468578457832336, "learning_rate": 4.859536270133097e-05, "loss": 0.1607, "step": 30000 }, { "epoch": 1.0901955083945054, "eval_loss": 0.38150739669799805, "eval_runtime": 180.8598, "eval_samples_per_second": 40.993, "eval_steps_per_second": 5.126, "eval_wer": 0.18088659756385353, "step": 30000 }, { "epoch": 1.0905589068973036, "grad_norm": 1.3293052911758423, "learning_rate": 4.859364724947345e-05, "loss": 0.1199, "step": 30010 }, { "epoch": 1.0909223054001018, "grad_norm": 1.421976923942566, "learning_rate": 4.859193078104961e-05, "loss": 0.1932, "step": 30020 }, { "epoch": 1.0912857039029, "grad_norm": 5.226151466369629, "learning_rate": 4.8590213296133415e-05, "loss": 0.1365, "step": 30030 }, { "epoch": 1.091649102405698, "grad_norm": 1.6307711601257324, "learning_rate": 4.8588494794798866e-05, "loss": 0.1533, "step": 30040 }, { "epoch": 1.0920125009084962, "grad_norm": 1.190746784210205, "learning_rate": 4.858677527712e-05, "loss": 0.1701, "step": 30050 }, { "epoch": 1.0923758994112944, "grad_norm": 1.1558239459991455, "learning_rate": 4.858505474317091e-05, "loss": 0.1792, "step": 30060 }, { "epoch": 1.0927392979140926, "grad_norm": 0.5284643769264221, "learning_rate": 4.858333319302573e-05, "loss": 0.1789, "step": 30070 }, { "epoch": 1.0931026964168908, "grad_norm": 0.7858747243881226, "learning_rate": 4.858161062675863e-05, "loss": 0.1456, "step": 30080 }, { "epoch": 1.0934660949196888, "grad_norm": 1.2685805559158325, "learning_rate": 4.857988704444383e-05, "loss": 0.1326, "step": 30090 }, { "epoch": 1.093829493422487, "grad_norm": 0.9551296830177307, "learning_rate": 4.8578162446155595e-05, "loss": 0.327, "step": 30100 }, { "epoch": 1.0941928919252852, "grad_norm": 3.6769495010375977, "learning_rate": 4.857643683196823e-05, "loss": 0.1111, "step": 30110 }, { "epoch": 1.0945562904280834, "grad_norm": 0.757580041885376, "learning_rate": 4.8574710201956095e-05, "loss": 0.1622, "step": 30120 }, { "epoch": 1.0949196889308817, "grad_norm": 0.762323796749115, "learning_rate": 4.857298255619357e-05, "loss": 0.1218, "step": 30130 }, { "epoch": 1.0952830874336799, "grad_norm": 0.6065217852592468, "learning_rate": 4.85712538947551e-05, "loss": 0.1297, "step": 30140 }, { "epoch": 1.0956464859364778, "grad_norm": 1.1257789134979248, "learning_rate": 4.856952421771517e-05, "loss": 0.1862, "step": 30150 }, { "epoch": 1.096009884439276, "grad_norm": 1.128233790397644, "learning_rate": 4.85677935251483e-05, "loss": 0.1377, "step": 30160 }, { "epoch": 1.0963732829420743, "grad_norm": 0.48844701051712036, "learning_rate": 4.856606181712906e-05, "loss": 0.1967, "step": 30170 }, { "epoch": 1.0967366814448725, "grad_norm": 0.682921290397644, "learning_rate": 4.856432909373206e-05, "loss": 0.1268, "step": 30180 }, { "epoch": 1.0971000799476707, "grad_norm": 0.8049948215484619, "learning_rate": 4.856259535503197e-05, "loss": 0.0971, "step": 30190 }, { "epoch": 1.0974634784504689, "grad_norm": 0.6435711979866028, "learning_rate": 4.8560860601103485e-05, "loss": 1.0273, "step": 30200 }, { "epoch": 1.0978268769532669, "grad_norm": 1.014172911643982, "learning_rate": 4.855912483202134e-05, "loss": 0.1137, "step": 30210 }, { "epoch": 1.098190275456065, "grad_norm": 1.4760230779647827, "learning_rate": 4.8557388047860334e-05, "loss": 0.1585, "step": 30220 }, { "epoch": 1.0985536739588633, "grad_norm": 1.4756141901016235, "learning_rate": 4.855565024869529e-05, "loss": 0.1261, "step": 30230 }, { "epoch": 1.0989170724616615, "grad_norm": 2.1977133750915527, "learning_rate": 4.8553911434601085e-05, "loss": 0.1532, "step": 30240 }, { "epoch": 1.0992804709644597, "grad_norm": 1.9084991216659546, "learning_rate": 4.855217160565265e-05, "loss": 0.1611, "step": 30250 }, { "epoch": 1.099643869467258, "grad_norm": 1.3657923936843872, "learning_rate": 4.855043076192494e-05, "loss": 0.1569, "step": 30260 }, { "epoch": 1.1000072679700559, "grad_norm": 0.6824470162391663, "learning_rate": 4.8548688903492943e-05, "loss": 0.1632, "step": 30270 }, { "epoch": 1.100370666472854, "grad_norm": 0.595958948135376, "learning_rate": 4.854694603043175e-05, "loss": 0.1394, "step": 30280 }, { "epoch": 1.1007340649756523, "grad_norm": 1.1626547574996948, "learning_rate": 4.854520214281642e-05, "loss": 0.1193, "step": 30290 }, { "epoch": 1.1010974634784505, "grad_norm": 1.2703717947006226, "learning_rate": 4.8543457240722104e-05, "loss": 0.1949, "step": 30300 }, { "epoch": 1.1014608619812487, "grad_norm": 1.7159488201141357, "learning_rate": 4.854171132422399e-05, "loss": 0.1524, "step": 30310 }, { "epoch": 1.101824260484047, "grad_norm": 0.7651236057281494, "learning_rate": 4.85399643933973e-05, "loss": 0.1992, "step": 30320 }, { "epoch": 1.102187658986845, "grad_norm": 0.7985833287239075, "learning_rate": 4.8538216448317286e-05, "loss": 0.1363, "step": 30330 }, { "epoch": 1.102551057489643, "grad_norm": 1.2583733797073364, "learning_rate": 4.853646748905928e-05, "loss": 0.1215, "step": 30340 }, { "epoch": 1.1029144559924413, "grad_norm": 1.1982141733169556, "learning_rate": 4.853471751569864e-05, "loss": 0.1656, "step": 30350 }, { "epoch": 1.1032778544952395, "grad_norm": 2.399423599243164, "learning_rate": 4.853296652831075e-05, "loss": 0.1164, "step": 30360 }, { "epoch": 1.1036412529980377, "grad_norm": 1.5785446166992188, "learning_rate": 4.853121452697107e-05, "loss": 0.1682, "step": 30370 }, { "epoch": 1.1040046515008357, "grad_norm": 1.3818514347076416, "learning_rate": 4.852946151175508e-05, "loss": 0.5349, "step": 30380 }, { "epoch": 1.104368050003634, "grad_norm": 1.6894676685333252, "learning_rate": 4.8527707482738305e-05, "loss": 0.1314, "step": 30390 }, { "epoch": 1.1047314485064321, "grad_norm": 1.1517245769500732, "learning_rate": 4.852595243999633e-05, "loss": 0.1515, "step": 30400 }, { "epoch": 1.1050948470092303, "grad_norm": 1.2149289846420288, "learning_rate": 4.852419638360477e-05, "loss": 0.2572, "step": 30410 }, { "epoch": 1.1054582455120285, "grad_norm": 0.8241190314292908, "learning_rate": 4.852243931363929e-05, "loss": 0.2045, "step": 30420 }, { "epoch": 1.1058216440148267, "grad_norm": 0.8909230828285217, "learning_rate": 4.852068123017559e-05, "loss": 0.1281, "step": 30430 }, { "epoch": 1.1061850425176247, "grad_norm": 0.7718971967697144, "learning_rate": 4.8518922133289424e-05, "loss": 0.1207, "step": 30440 }, { "epoch": 1.106548441020423, "grad_norm": 3.393324136734009, "learning_rate": 4.8517162023056575e-05, "loss": 0.4812, "step": 30450 }, { "epoch": 1.1069118395232211, "grad_norm": 1.5000587701797485, "learning_rate": 4.85154008995529e-05, "loss": 0.1246, "step": 30460 }, { "epoch": 1.1072752380260193, "grad_norm": 1.3177014589309692, "learning_rate": 4.8513638762854264e-05, "loss": 0.2352, "step": 30470 }, { "epoch": 1.1076386365288176, "grad_norm": 0.8771611452102661, "learning_rate": 4.8511875613036596e-05, "loss": 1.4369, "step": 30480 }, { "epoch": 1.1080020350316158, "grad_norm": 0.46630170941352844, "learning_rate": 4.8510111450175865e-05, "loss": 0.1184, "step": 30490 }, { "epoch": 1.1083654335344137, "grad_norm": 1.582541823387146, "learning_rate": 4.850834627434808e-05, "loss": 0.154, "step": 30500 }, { "epoch": 1.108728832037212, "grad_norm": 0.9425756335258484, "learning_rate": 4.850658008562929e-05, "loss": 0.1632, "step": 30510 }, { "epoch": 1.1090922305400102, "grad_norm": 0.6453799605369568, "learning_rate": 4.8504812884095616e-05, "loss": 0.2024, "step": 30520 }, { "epoch": 1.1094556290428084, "grad_norm": 0.8643505573272705, "learning_rate": 4.850304466982317e-05, "loss": 0.1373, "step": 30530 }, { "epoch": 1.1098190275456066, "grad_norm": 0.6463938355445862, "learning_rate": 4.850127544288816e-05, "loss": 0.1308, "step": 30540 }, { "epoch": 1.1101824260484048, "grad_norm": 1.2465693950653076, "learning_rate": 4.8499505203366816e-05, "loss": 0.1628, "step": 30550 }, { "epoch": 1.1105458245512028, "grad_norm": 1.085317850112915, "learning_rate": 4.84977339513354e-05, "loss": 0.1533, "step": 30560 }, { "epoch": 1.110909223054001, "grad_norm": 0.5834909081459045, "learning_rate": 4.849596168687022e-05, "loss": 0.1811, "step": 30570 }, { "epoch": 1.1112726215567992, "grad_norm": 1.407309889793396, "learning_rate": 4.849418841004766e-05, "loss": 0.1621, "step": 30580 }, { "epoch": 1.1116360200595974, "grad_norm": 1.0903669595718384, "learning_rate": 4.8492414120944116e-05, "loss": 0.271, "step": 30590 }, { "epoch": 1.1119994185623956, "grad_norm": 1.6495404243469238, "learning_rate": 4.8490638819636036e-05, "loss": 0.1602, "step": 30600 }, { "epoch": 1.1119994185623956, "eval_loss": 0.3786245882511139, "eval_runtime": 180.2563, "eval_samples_per_second": 41.13, "eval_steps_per_second": 5.143, "eval_wer": 0.17622125002269137, "step": 30600 }, { "epoch": 1.1123628170651938, "grad_norm": 1.6046833992004395, "learning_rate": 4.8488862506199905e-05, "loss": 0.142, "step": 30610 }, { "epoch": 1.1127262155679918, "grad_norm": 0.7779229879379272, "learning_rate": 4.848708518071226e-05, "loss": 0.1556, "step": 30620 }, { "epoch": 1.11308961407079, "grad_norm": 6.0123677253723145, "learning_rate": 4.848530684324969e-05, "loss": 0.1379, "step": 30630 }, { "epoch": 1.1134530125735882, "grad_norm": 1.1593163013458252, "learning_rate": 4.8483527493888796e-05, "loss": 0.1091, "step": 30640 }, { "epoch": 1.1138164110763864, "grad_norm": 1.1061301231384277, "learning_rate": 4.848174713270627e-05, "loss": 0.1521, "step": 30650 }, { "epoch": 1.1141798095791846, "grad_norm": 2.006169080734253, "learning_rate": 4.8479965759778804e-05, "loss": 0.1188, "step": 30660 }, { "epoch": 1.1145432080819826, "grad_norm": 0.632653534412384, "learning_rate": 4.8478183375183154e-05, "loss": 0.2039, "step": 30670 }, { "epoch": 1.1149066065847808, "grad_norm": 2.2631378173828125, "learning_rate": 4.847639997899611e-05, "loss": 0.1324, "step": 30680 }, { "epoch": 1.115270005087579, "grad_norm": 0.7694458365440369, "learning_rate": 4.847461557129454e-05, "loss": 0.1043, "step": 30690 }, { "epoch": 1.1156334035903772, "grad_norm": 1.5386550426483154, "learning_rate": 4.847283015215529e-05, "loss": 0.1605, "step": 30700 }, { "epoch": 1.1159968020931754, "grad_norm": 0.9068945646286011, "learning_rate": 4.847104372165531e-05, "loss": 0.1178, "step": 30710 }, { "epoch": 1.1163602005959736, "grad_norm": 1.4700278043746948, "learning_rate": 4.8469256279871564e-05, "loss": 0.1458, "step": 30720 }, { "epoch": 1.1167235990987716, "grad_norm": 1.125613808631897, "learning_rate": 4.846746782688108e-05, "loss": 0.1212, "step": 30730 }, { "epoch": 1.1170869976015698, "grad_norm": 1.081297516822815, "learning_rate": 4.846567836276089e-05, "loss": 0.1218, "step": 30740 }, { "epoch": 1.117450396104368, "grad_norm": 0.6549712419509888, "learning_rate": 4.846388788758812e-05, "loss": 0.1684, "step": 30750 }, { "epoch": 1.1178137946071662, "grad_norm": 0.7256012558937073, "learning_rate": 4.84620964014399e-05, "loss": 0.1425, "step": 30760 }, { "epoch": 1.1181771931099644, "grad_norm": 0.6661650538444519, "learning_rate": 4.846030390439343e-05, "loss": 0.2043, "step": 30770 }, { "epoch": 1.1185405916127626, "grad_norm": 2.5043599605560303, "learning_rate": 4.845851039652594e-05, "loss": 0.1337, "step": 30780 }, { "epoch": 1.1189039901155606, "grad_norm": 1.7362638711929321, "learning_rate": 4.84567158779147e-05, "loss": 0.1146, "step": 30790 }, { "epoch": 1.1192673886183588, "grad_norm": 2.156850576400757, "learning_rate": 4.845492034863703e-05, "loss": 0.1402, "step": 30800 }, { "epoch": 1.119630787121157, "grad_norm": 6.733970642089844, "learning_rate": 4.8453123808770295e-05, "loss": 0.137, "step": 30810 }, { "epoch": 1.1199941856239553, "grad_norm": 1.2163270711898804, "learning_rate": 4.84513262583919e-05, "loss": 0.2038, "step": 30820 }, { "epoch": 1.1203575841267535, "grad_norm": 1.0911026000976562, "learning_rate": 4.84495276975793e-05, "loss": 0.1247, "step": 30830 }, { "epoch": 1.1207209826295517, "grad_norm": 8.4699125289917, "learning_rate": 4.844772812640998e-05, "loss": 0.1883, "step": 30840 }, { "epoch": 1.1210843811323496, "grad_norm": 1.9448401927947998, "learning_rate": 4.8445927544961486e-05, "loss": 0.1259, "step": 30850 }, { "epoch": 1.1214477796351479, "grad_norm": 1.2070740461349487, "learning_rate": 4.844412595331139e-05, "loss": 0.1478, "step": 30860 }, { "epoch": 1.121811178137946, "grad_norm": 0.5514017939567566, "learning_rate": 4.844232335153733e-05, "loss": 0.2209, "step": 30870 }, { "epoch": 1.1221745766407443, "grad_norm": 0.6462703943252563, "learning_rate": 4.844051973971696e-05, "loss": 0.1182, "step": 30880 }, { "epoch": 1.1225379751435425, "grad_norm": 0.9222347140312195, "learning_rate": 4.8438715117927995e-05, "loss": 0.1079, "step": 30890 }, { "epoch": 1.1229013736463407, "grad_norm": 1.1663174629211426, "learning_rate": 4.8436909486248196e-05, "loss": 0.1516, "step": 30900 }, { "epoch": 1.1232647721491387, "grad_norm": 0.8301449418067932, "learning_rate": 4.8435102844755356e-05, "loss": 0.1204, "step": 30910 }, { "epoch": 1.1236281706519369, "grad_norm": 0.8328074216842651, "learning_rate": 4.8433295193527305e-05, "loss": 0.1432, "step": 30920 }, { "epoch": 1.123991569154735, "grad_norm": 1.0741894245147705, "learning_rate": 4.843148653264195e-05, "loss": 0.1286, "step": 30930 }, { "epoch": 1.1243549676575333, "grad_norm": 1.5792789459228516, "learning_rate": 4.842967686217721e-05, "loss": 0.1235, "step": 30940 }, { "epoch": 1.1247183661603315, "grad_norm": 10.680551528930664, "learning_rate": 4.8427866182211056e-05, "loss": 0.2624, "step": 30950 }, { "epoch": 1.1250817646631295, "grad_norm": 0.753760576248169, "learning_rate": 4.8426054492821503e-05, "loss": 0.1185, "step": 30960 }, { "epoch": 1.1254451631659277, "grad_norm": 0.681735098361969, "learning_rate": 4.8424241794086614e-05, "loss": 0.1249, "step": 30970 }, { "epoch": 1.125808561668726, "grad_norm": 1.0460690259933472, "learning_rate": 4.842242808608449e-05, "loss": 0.1437, "step": 30980 }, { "epoch": 1.126171960171524, "grad_norm": 0.8870137929916382, "learning_rate": 4.8420613368893275e-05, "loss": 0.1321, "step": 30990 }, { "epoch": 1.1265353586743223, "grad_norm": 1.765331506729126, "learning_rate": 4.841879764259116e-05, "loss": 0.1722, "step": 31000 }, { "epoch": 1.1268987571771205, "grad_norm": 2.9972617626190186, "learning_rate": 4.841698090725638e-05, "loss": 0.095, "step": 31010 }, { "epoch": 1.1272621556799187, "grad_norm": 0.7516260743141174, "learning_rate": 4.841516316296722e-05, "loss": 0.2073, "step": 31020 }, { "epoch": 1.1276255541827167, "grad_norm": 7.545155048370361, "learning_rate": 4.841334440980197e-05, "loss": 0.2066, "step": 31030 }, { "epoch": 1.127988952685515, "grad_norm": 0.7127543687820435, "learning_rate": 4.841152464783903e-05, "loss": 0.1483, "step": 31040 }, { "epoch": 1.1283523511883131, "grad_norm": 0.8178777694702148, "learning_rate": 4.8409703877156786e-05, "loss": 0.122, "step": 31050 }, { "epoch": 1.1287157496911113, "grad_norm": 2.2176194190979004, "learning_rate": 4.84078820978337e-05, "loss": 0.1074, "step": 31060 }, { "epoch": 1.1290791481939095, "grad_norm": 0.5790374279022217, "learning_rate": 4.8406059309948246e-05, "loss": 0.2401, "step": 31070 }, { "epoch": 1.1294425466967075, "grad_norm": 1.335080623626709, "learning_rate": 4.840423551357899e-05, "loss": 0.1412, "step": 31080 }, { "epoch": 1.1298059451995057, "grad_norm": 2.9304592609405518, "learning_rate": 4.840241070880449e-05, "loss": 0.1137, "step": 31090 }, { "epoch": 1.130169343702304, "grad_norm": 0.6828371286392212, "learning_rate": 4.840058489570338e-05, "loss": 0.162, "step": 31100 }, { "epoch": 1.1305327422051021, "grad_norm": 0.7623898983001709, "learning_rate": 4.8398758074354334e-05, "loss": 0.1164, "step": 31110 }, { "epoch": 1.1308961407079003, "grad_norm": 0.44123783707618713, "learning_rate": 4.8396930244836045e-05, "loss": 0.1378, "step": 31120 }, { "epoch": 1.1312595392106983, "grad_norm": 0.880264937877655, "learning_rate": 4.839510140722728e-05, "loss": 0.1168, "step": 31130 }, { "epoch": 1.1316229377134965, "grad_norm": 0.9946479797363281, "learning_rate": 4.839327156160684e-05, "loss": 0.1099, "step": 31140 }, { "epoch": 1.1319863362162947, "grad_norm": 1.5061123371124268, "learning_rate": 4.8391440708053565e-05, "loss": 0.1167, "step": 31150 }, { "epoch": 1.132349734719093, "grad_norm": 3.687218427658081, "learning_rate": 4.838960884664633e-05, "loss": 0.1141, "step": 31160 }, { "epoch": 1.1327131332218912, "grad_norm": 1.015309453010559, "learning_rate": 4.838777597746408e-05, "loss": 0.1851, "step": 31170 }, { "epoch": 1.1330765317246894, "grad_norm": 0.9765021204948425, "learning_rate": 4.838594210058577e-05, "loss": 0.1276, "step": 31180 }, { "epoch": 1.1334399302274876, "grad_norm": 0.986419677734375, "learning_rate": 4.838410721609041e-05, "loss": 0.107, "step": 31190 }, { "epoch": 1.1338033287302856, "grad_norm": 0.850581169128418, "learning_rate": 4.838227132405709e-05, "loss": 0.1441, "step": 31200 }, { "epoch": 1.1338033287302856, "eval_loss": 0.3806535005569458, "eval_runtime": 180.0975, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.17877176103254852, "step": 31200 }, { "epoch": 1.1341667272330838, "grad_norm": 0.5492041110992432, "learning_rate": 4.8380434424564885e-05, "loss": 0.1644, "step": 31210 }, { "epoch": 1.134530125735882, "grad_norm": 0.6244884729385376, "learning_rate": 4.837859651769295e-05, "loss": 0.1754, "step": 31220 }, { "epoch": 1.1348935242386802, "grad_norm": 0.7327109575271606, "learning_rate": 4.837675760352047e-05, "loss": 0.1182, "step": 31230 }, { "epoch": 1.1352569227414784, "grad_norm": 1.8642997741699219, "learning_rate": 4.837491768212669e-05, "loss": 0.1262, "step": 31240 }, { "epoch": 1.1356203212442764, "grad_norm": 0.7738135457038879, "learning_rate": 4.837307675359086e-05, "loss": 0.1733, "step": 31250 }, { "epoch": 1.1359837197470746, "grad_norm": 1.2241661548614502, "learning_rate": 4.837123481799232e-05, "loss": 0.158, "step": 31260 }, { "epoch": 1.1363471182498728, "grad_norm": 0.5471898317337036, "learning_rate": 4.836939187541043e-05, "loss": 0.1745, "step": 31270 }, { "epoch": 1.136710516752671, "grad_norm": 1.110005497932434, "learning_rate": 4.836754792592459e-05, "loss": 0.14, "step": 31280 }, { "epoch": 1.1370739152554692, "grad_norm": 18.33467674255371, "learning_rate": 4.836570296961425e-05, "loss": 0.4864, "step": 31290 }, { "epoch": 1.1374373137582674, "grad_norm": 2.1457314491271973, "learning_rate": 4.836385700655891e-05, "loss": 0.1431, "step": 31300 }, { "epoch": 1.1378007122610656, "grad_norm": 1.3444671630859375, "learning_rate": 4.8362010036838096e-05, "loss": 0.1287, "step": 31310 }, { "epoch": 1.1381641107638636, "grad_norm": 1.0178183317184448, "learning_rate": 4.8360162060531395e-05, "loss": 0.1984, "step": 31320 }, { "epoch": 1.1385275092666618, "grad_norm": 1.013101577758789, "learning_rate": 4.835831307771842e-05, "loss": 0.1354, "step": 31330 }, { "epoch": 1.13889090776946, "grad_norm": 1.1397134065628052, "learning_rate": 4.8356463088478855e-05, "loss": 0.1149, "step": 31340 }, { "epoch": 1.1392543062722582, "grad_norm": 0.6582014560699463, "learning_rate": 4.835461209289239e-05, "loss": 0.1581, "step": 31350 }, { "epoch": 1.1396177047750564, "grad_norm": 6.679111480712891, "learning_rate": 4.835276009103878e-05, "loss": 0.1136, "step": 31360 }, { "epoch": 1.1399811032778544, "grad_norm": 1.710073709487915, "learning_rate": 4.835090708299784e-05, "loss": 0.1523, "step": 31370 }, { "epoch": 1.1403445017806526, "grad_norm": 0.8167402148246765, "learning_rate": 4.834905306884939e-05, "loss": 0.1333, "step": 31380 }, { "epoch": 1.1407079002834508, "grad_norm": 1.0377804040908813, "learning_rate": 4.834719804867332e-05, "loss": 0.1484, "step": 31390 }, { "epoch": 1.141071298786249, "grad_norm": 0.6715871691703796, "learning_rate": 4.8345342022549556e-05, "loss": 0.133, "step": 31400 }, { "epoch": 1.1414346972890472, "grad_norm": 0.8593924641609192, "learning_rate": 4.834348499055807e-05, "loss": 0.1381, "step": 31410 }, { "epoch": 1.1417980957918452, "grad_norm": 1.4667985439300537, "learning_rate": 4.834162695277887e-05, "loss": 0.2026, "step": 31420 }, { "epoch": 1.1421614942946434, "grad_norm": 1.1011070013046265, "learning_rate": 4.8339767909292014e-05, "loss": 0.1324, "step": 31430 }, { "epoch": 1.1425248927974416, "grad_norm": 0.6192152500152588, "learning_rate": 4.83379078601776e-05, "loss": 0.116, "step": 31440 }, { "epoch": 1.1428882913002398, "grad_norm": 0.7945598363876343, "learning_rate": 4.8336046805515775e-05, "loss": 0.1652, "step": 31450 }, { "epoch": 1.143251689803038, "grad_norm": 0.9201329350471497, "learning_rate": 4.833418474538672e-05, "loss": 0.1279, "step": 31460 }, { "epoch": 1.1436150883058362, "grad_norm": 0.7190477252006531, "learning_rate": 4.833232167987067e-05, "loss": 0.1603, "step": 31470 }, { "epoch": 1.1439784868086345, "grad_norm": 0.92894047498703, "learning_rate": 4.83304576090479e-05, "loss": 0.1268, "step": 31480 }, { "epoch": 1.1443418853114324, "grad_norm": 0.7764700055122375, "learning_rate": 4.8328592532998716e-05, "loss": 0.1307, "step": 31490 }, { "epoch": 1.1447052838142306, "grad_norm": 1.3679301738739014, "learning_rate": 4.832672645180348e-05, "loss": 0.1734, "step": 31500 }, { "epoch": 1.1450686823170289, "grad_norm": 2.3595213890075684, "learning_rate": 4.832485936554261e-05, "loss": 0.1335, "step": 31510 }, { "epoch": 1.145432080819827, "grad_norm": 0.610569953918457, "learning_rate": 4.832299127429653e-05, "loss": 0.249, "step": 31520 }, { "epoch": 1.1457954793226253, "grad_norm": 1.4595023393630981, "learning_rate": 4.832112217814575e-05, "loss": 0.1103, "step": 31530 }, { "epoch": 1.1461588778254233, "grad_norm": 5.723475933074951, "learning_rate": 4.831925207717077e-05, "loss": 0.126, "step": 31540 }, { "epoch": 1.1465222763282215, "grad_norm": 1.8982267379760742, "learning_rate": 4.8317380971452205e-05, "loss": 0.1422, "step": 31550 }, { "epoch": 1.1468856748310197, "grad_norm": 0.8732501268386841, "learning_rate": 4.831550886107066e-05, "loss": 0.1364, "step": 31560 }, { "epoch": 1.1472490733338179, "grad_norm": 1.0855740308761597, "learning_rate": 4.831363574610679e-05, "loss": 0.1728, "step": 31570 }, { "epoch": 1.147612471836616, "grad_norm": 1.5332953929901123, "learning_rate": 4.8311761626641304e-05, "loss": 0.1336, "step": 31580 }, { "epoch": 1.1479758703394143, "grad_norm": 0.5183860063552856, "learning_rate": 4.8309886502754954e-05, "loss": 0.6253, "step": 31590 }, { "epoch": 1.1483392688422125, "grad_norm": 0.7912465929985046, "learning_rate": 4.830801037452853e-05, "loss": 0.1644, "step": 31600 }, { "epoch": 1.1487026673450105, "grad_norm": 1.5750758647918701, "learning_rate": 4.8306133242042875e-05, "loss": 0.1787, "step": 31610 }, { "epoch": 1.1490660658478087, "grad_norm": 0.5864933133125305, "learning_rate": 4.830425510537886e-05, "loss": 0.1751, "step": 31620 }, { "epoch": 1.1494294643506069, "grad_norm": 0.9228208661079407, "learning_rate": 4.830237596461741e-05, "loss": 0.8842, "step": 31630 }, { "epoch": 1.149792862853405, "grad_norm": 1.0034486055374146, "learning_rate": 4.8300495819839486e-05, "loss": 0.1047, "step": 31640 }, { "epoch": 1.1501562613562033, "grad_norm": 1.125537395477295, "learning_rate": 4.82986146711261e-05, "loss": 0.1447, "step": 31650 }, { "epoch": 1.1505196598590013, "grad_norm": 2.6315014362335205, "learning_rate": 4.829673251855831e-05, "loss": 0.116, "step": 31660 }, { "epoch": 1.1508830583617995, "grad_norm": 0.8134027719497681, "learning_rate": 4.82948493622172e-05, "loss": 0.1889, "step": 31670 }, { "epoch": 1.1512464568645977, "grad_norm": 1.003691554069519, "learning_rate": 4.8292965202183916e-05, "loss": 0.1477, "step": 31680 }, { "epoch": 1.151609855367396, "grad_norm": 1.7551583051681519, "learning_rate": 4.829108003853964e-05, "loss": 0.1156, "step": 31690 }, { "epoch": 1.1519732538701941, "grad_norm": 1.5970351696014404, "learning_rate": 4.8289193871365594e-05, "loss": 0.3698, "step": 31700 }, { "epoch": 1.152336652372992, "grad_norm": 4.339359760284424, "learning_rate": 4.828730670074305e-05, "loss": 0.1259, "step": 31710 }, { "epoch": 1.1527000508757903, "grad_norm": 0.76881343126297, "learning_rate": 4.828541852675331e-05, "loss": 0.3217, "step": 31720 }, { "epoch": 1.1530634493785885, "grad_norm": 3.1044371128082275, "learning_rate": 4.828352934947774e-05, "loss": 0.13, "step": 31730 }, { "epoch": 1.1534268478813867, "grad_norm": 0.5273496508598328, "learning_rate": 4.828163916899774e-05, "loss": 0.1197, "step": 31740 }, { "epoch": 1.153790246384185, "grad_norm": 0.8816530108451843, "learning_rate": 4.827974798539473e-05, "loss": 0.1633, "step": 31750 }, { "epoch": 1.1541536448869831, "grad_norm": 1.220786452293396, "learning_rate": 4.827785579875022e-05, "loss": 0.1293, "step": 31760 }, { "epoch": 1.1545170433897813, "grad_norm": 2.700749397277832, "learning_rate": 4.827596260914572e-05, "loss": 0.1427, "step": 31770 }, { "epoch": 1.1548804418925793, "grad_norm": 1.6649949550628662, "learning_rate": 4.827406841666281e-05, "loss": 3.8743, "step": 31780 }, { "epoch": 1.1552438403953775, "grad_norm": 1.2381266355514526, "learning_rate": 4.827217322138311e-05, "loss": 0.1322, "step": 31790 }, { "epoch": 1.1556072388981757, "grad_norm": 0.6668787598609924, "learning_rate": 4.8270277023388255e-05, "loss": 0.1566, "step": 31800 }, { "epoch": 1.1556072388981757, "eval_loss": 0.35771968960762024, "eval_runtime": 257.3302, "eval_samples_per_second": 28.811, "eval_steps_per_second": 3.602, "eval_wer": 0.1810772051482201, "step": 31800 }, { "epoch": 1.155970637400974, "grad_norm": 1.1917558908462524, "learning_rate": 4.826837982275996e-05, "loss": 0.1274, "step": 31810 }, { "epoch": 1.1563340359037722, "grad_norm": 0.5347509980201721, "learning_rate": 4.8266481619579973e-05, "loss": 0.1872, "step": 31820 }, { "epoch": 1.1566974344065701, "grad_norm": 1.023681402206421, "learning_rate": 4.8264582413930076e-05, "loss": 0.1505, "step": 31830 }, { "epoch": 1.1570608329093683, "grad_norm": 1.00868821144104, "learning_rate": 4.82626822058921e-05, "loss": 0.1364, "step": 31840 }, { "epoch": 1.1574242314121665, "grad_norm": 1.4557231664657593, "learning_rate": 4.8260780995547905e-05, "loss": 0.1799, "step": 31850 }, { "epoch": 1.1577876299149648, "grad_norm": 1.1228946447372437, "learning_rate": 4.8258878782979434e-05, "loss": 0.1392, "step": 31860 }, { "epoch": 1.158151028417763, "grad_norm": 0.6818620562553406, "learning_rate": 4.825716593483377e-05, "loss": 0.1876, "step": 31870 }, { "epoch": 1.1585144269205612, "grad_norm": 1.4734445810317993, "learning_rate": 4.8255261818264976e-05, "loss": 0.1502, "step": 31880 }, { "epoch": 1.1588778254233594, "grad_norm": 0.8915801048278809, "learning_rate": 4.825335669970969e-05, "loss": 0.1207, "step": 31890 }, { "epoch": 1.1592412239261574, "grad_norm": 0.324372798204422, "learning_rate": 4.825145057925e-05, "loss": 0.1674, "step": 31900 }, { "epoch": 1.1596046224289556, "grad_norm": 1.1831437349319458, "learning_rate": 4.824954345696803e-05, "loss": 0.1192, "step": 31910 }, { "epoch": 1.1599680209317538, "grad_norm": 0.5911235809326172, "learning_rate": 4.824763533294596e-05, "loss": 0.1652, "step": 31920 }, { "epoch": 1.160331419434552, "grad_norm": 2.4116334915161133, "learning_rate": 4.8245726207265997e-05, "loss": 0.1297, "step": 31930 }, { "epoch": 1.1606948179373502, "grad_norm": 0.5179087519645691, "learning_rate": 4.8243816080010404e-05, "loss": 0.1066, "step": 31940 }, { "epoch": 1.1610582164401482, "grad_norm": 0.6537795066833496, "learning_rate": 4.824190495126148e-05, "loss": 0.1409, "step": 31950 }, { "epoch": 1.1614216149429464, "grad_norm": 1.0357365608215332, "learning_rate": 4.823999282110155e-05, "loss": 0.1146, "step": 31960 }, { "epoch": 1.1617850134457446, "grad_norm": 0.4709915220737457, "learning_rate": 4.823807968961303e-05, "loss": 0.1726, "step": 31970 }, { "epoch": 1.1621484119485428, "grad_norm": 0.9699262380599976, "learning_rate": 4.823616555687833e-05, "loss": 0.125, "step": 31980 }, { "epoch": 1.162511810451341, "grad_norm": 1.2052152156829834, "learning_rate": 4.8234250422979946e-05, "loss": 0.1164, "step": 31990 }, { "epoch": 1.1628752089541392, "grad_norm": 1.1892735958099365, "learning_rate": 4.823233428800037e-05, "loss": 0.1412, "step": 32000 }, { "epoch": 1.1632386074569372, "grad_norm": 0.7427589893341064, "learning_rate": 4.8230417152022165e-05, "loss": 0.1315, "step": 32010 }, { "epoch": 1.1636020059597354, "grad_norm": 0.7582072019577026, "learning_rate": 4.8228499015127945e-05, "loss": 0.1783, "step": 32020 }, { "epoch": 1.1639654044625336, "grad_norm": 1.1409790515899658, "learning_rate": 4.8226579877400345e-05, "loss": 0.1303, "step": 32030 }, { "epoch": 1.1643288029653318, "grad_norm": 1.1382596492767334, "learning_rate": 4.822465973892206e-05, "loss": 0.1426, "step": 32040 }, { "epoch": 1.16469220146813, "grad_norm": 1.27096688747406, "learning_rate": 4.822273859977583e-05, "loss": 0.1505, "step": 32050 }, { "epoch": 1.1650555999709282, "grad_norm": 1.5508397817611694, "learning_rate": 4.822081646004441e-05, "loss": 0.1366, "step": 32060 }, { "epoch": 1.1654189984737262, "grad_norm": 0.2970573604106903, "learning_rate": 4.821889331981063e-05, "loss": 0.1505, "step": 32070 }, { "epoch": 1.1657823969765244, "grad_norm": 0.9228662848472595, "learning_rate": 4.821696917915736e-05, "loss": 0.1112, "step": 32080 }, { "epoch": 1.1661457954793226, "grad_norm": 0.770660936832428, "learning_rate": 4.821504403816748e-05, "loss": 0.1284, "step": 32090 }, { "epoch": 1.1665091939821208, "grad_norm": 0.8875694274902344, "learning_rate": 4.8213117896923954e-05, "loss": 0.1421, "step": 32100 }, { "epoch": 1.166872592484919, "grad_norm": 0.9725656509399414, "learning_rate": 4.821119075550978e-05, "loss": 0.1269, "step": 32110 }, { "epoch": 1.167235990987717, "grad_norm": 0.5882539749145508, "learning_rate": 4.820926261400797e-05, "loss": 0.159, "step": 32120 }, { "epoch": 1.1675993894905152, "grad_norm": 0.9551408886909485, "learning_rate": 4.820733347250162e-05, "loss": 0.1788, "step": 32130 }, { "epoch": 1.1679627879933134, "grad_norm": 0.929642915725708, "learning_rate": 4.820540333107384e-05, "loss": 0.1329, "step": 32140 }, { "epoch": 1.1683261864961116, "grad_norm": 1.707643747329712, "learning_rate": 4.8203472189807795e-05, "loss": 0.1317, "step": 32150 }, { "epoch": 1.1686895849989098, "grad_norm": 1.401150107383728, "learning_rate": 4.82015400487867e-05, "loss": 0.126, "step": 32160 }, { "epoch": 1.169052983501708, "grad_norm": 0.7058550715446472, "learning_rate": 4.8199606908093785e-05, "loss": 0.1811, "step": 32170 }, { "epoch": 1.1694163820045063, "grad_norm": 1.2024914026260376, "learning_rate": 4.8197672767812366e-05, "loss": 0.1524, "step": 32180 }, { "epoch": 1.1697797805073042, "grad_norm": 0.8119955062866211, "learning_rate": 4.819573762802575e-05, "loss": 0.1281, "step": 32190 }, { "epoch": 1.1701431790101025, "grad_norm": 0.8531884551048279, "learning_rate": 4.8193801488817336e-05, "loss": 0.1362, "step": 32200 }, { "epoch": 1.1705065775129007, "grad_norm": 0.7170140743255615, "learning_rate": 4.819186435027054e-05, "loss": 0.1276, "step": 32210 }, { "epoch": 1.1708699760156989, "grad_norm": 1.7031468152999878, "learning_rate": 4.8189926212468825e-05, "loss": 0.1369, "step": 32220 }, { "epoch": 1.171233374518497, "grad_norm": 1.639916181564331, "learning_rate": 4.81879870754957e-05, "loss": 0.1318, "step": 32230 }, { "epoch": 1.171596773021295, "grad_norm": 1.0148886442184448, "learning_rate": 4.8186046939434716e-05, "loss": 0.119, "step": 32240 }, { "epoch": 1.1719601715240933, "grad_norm": 1.4640549421310425, "learning_rate": 4.818410580436947e-05, "loss": 0.1603, "step": 32250 }, { "epoch": 1.1723235700268915, "grad_norm": 1.0362626314163208, "learning_rate": 4.818216367038358e-05, "loss": 0.1318, "step": 32260 }, { "epoch": 1.1726869685296897, "grad_norm": 0.507990837097168, "learning_rate": 4.818022053756076e-05, "loss": 0.1681, "step": 32270 }, { "epoch": 1.1730503670324879, "grad_norm": 0.7118284106254578, "learning_rate": 4.81782764059847e-05, "loss": 0.1129, "step": 32280 }, { "epoch": 1.173413765535286, "grad_norm": 2.637918472290039, "learning_rate": 4.8176331275739175e-05, "loss": 0.1866, "step": 32290 }, { "epoch": 1.173777164038084, "grad_norm": 1.5417594909667969, "learning_rate": 4.817438514690801e-05, "loss": 0.1382, "step": 32300 }, { "epoch": 1.1741405625408823, "grad_norm": 1.4842432737350464, "learning_rate": 4.817243801957503e-05, "loss": 0.1381, "step": 32310 }, { "epoch": 1.1745039610436805, "grad_norm": 2.0502350330352783, "learning_rate": 4.817048989382415e-05, "loss": 0.1515, "step": 32320 }, { "epoch": 1.1748673595464787, "grad_norm": 1.8963838815689087, "learning_rate": 4.81685407697393e-05, "loss": 0.1453, "step": 32330 }, { "epoch": 1.175230758049277, "grad_norm": 0.6867222785949707, "learning_rate": 4.8166590647404466e-05, "loss": 0.1566, "step": 32340 }, { "epoch": 1.175594156552075, "grad_norm": 1.4324911832809448, "learning_rate": 4.8164639526903665e-05, "loss": 0.1261, "step": 32350 }, { "epoch": 1.175957555054873, "grad_norm": 4.706410884857178, "learning_rate": 4.8162687408320963e-05, "loss": 0.1168, "step": 32360 }, { "epoch": 1.1763209535576713, "grad_norm": 0.6849080324172974, "learning_rate": 4.8160734291740476e-05, "loss": 0.1587, "step": 32370 }, { "epoch": 1.1766843520604695, "grad_norm": 3.180955171585083, "learning_rate": 4.815878017724636e-05, "loss": 0.1312, "step": 32380 }, { "epoch": 1.1770477505632677, "grad_norm": 0.5583860278129578, "learning_rate": 4.81568250649228e-05, "loss": 0.1385, "step": 32390 }, { "epoch": 1.177411149066066, "grad_norm": 0.8250964283943176, "learning_rate": 4.8154868954854036e-05, "loss": 0.1393, "step": 32400 }, { "epoch": 1.177411149066066, "eval_loss": 0.38513997197151184, "eval_runtime": 179.4965, "eval_samples_per_second": 41.304, "eval_steps_per_second": 5.164, "eval_wer": 0.18090475066712655, "step": 32400 }, { "epoch": 1.177774547568864, "grad_norm": 2.3377466201782227, "learning_rate": 4.815291184712437e-05, "loss": 0.1197, "step": 32410 }, { "epoch": 1.1781379460716621, "grad_norm": 0.7508591413497925, "learning_rate": 4.81509537418181e-05, "loss": 0.1786, "step": 32420 }, { "epoch": 1.1785013445744603, "grad_norm": 0.8103131651878357, "learning_rate": 4.81489946390196e-05, "loss": 0.1617, "step": 32430 }, { "epoch": 1.1788647430772585, "grad_norm": 1.2582241296768188, "learning_rate": 4.814703453881329e-05, "loss": 0.1326, "step": 32440 }, { "epoch": 1.1792281415800567, "grad_norm": 1.110107660293579, "learning_rate": 4.8145073441283613e-05, "loss": 0.1504, "step": 32450 }, { "epoch": 1.179591540082855, "grad_norm": 0.9912093281745911, "learning_rate": 4.814311134651509e-05, "loss": 0.133, "step": 32460 }, { "epoch": 1.1799549385856531, "grad_norm": 1.1711434125900269, "learning_rate": 4.814114825459223e-05, "loss": 0.1328, "step": 32470 }, { "epoch": 1.1803183370884511, "grad_norm": 3.884737491607666, "learning_rate": 4.813918416559963e-05, "loss": 0.1225, "step": 32480 }, { "epoch": 1.1806817355912493, "grad_norm": 0.9459224939346313, "learning_rate": 4.8137219079621906e-05, "loss": 0.1507, "step": 32490 }, { "epoch": 1.1810451340940475, "grad_norm": 1.7159967422485352, "learning_rate": 4.813525299674374e-05, "loss": 0.1823, "step": 32500 }, { "epoch": 1.1814085325968458, "grad_norm": 1.3824647665023804, "learning_rate": 4.8133285917049844e-05, "loss": 0.1393, "step": 32510 }, { "epoch": 1.181771931099644, "grad_norm": 214.12107849121094, "learning_rate": 4.813131784062496e-05, "loss": 4.0762, "step": 32520 }, { "epoch": 1.182135329602442, "grad_norm": 2.5384116172790527, "learning_rate": 4.812934876755389e-05, "loss": 0.1379, "step": 32530 }, { "epoch": 1.1824987281052401, "grad_norm": 1.4254207611083984, "learning_rate": 4.812737869792148e-05, "loss": 0.1461, "step": 32540 }, { "epoch": 1.1828621266080384, "grad_norm": 1.571662187576294, "learning_rate": 4.812540763181261e-05, "loss": 0.178, "step": 32550 }, { "epoch": 1.1832255251108366, "grad_norm": 5.712926864624023, "learning_rate": 4.8123435569312206e-05, "loss": 0.1071, "step": 32560 }, { "epoch": 1.1835889236136348, "grad_norm": 0.8147953152656555, "learning_rate": 4.812146251050523e-05, "loss": 0.1211, "step": 32570 }, { "epoch": 1.183952322116433, "grad_norm": 1.1877583265304565, "learning_rate": 4.8119488455476714e-05, "loss": 0.1668, "step": 32580 }, { "epoch": 1.184315720619231, "grad_norm": 0.7466074824333191, "learning_rate": 4.8117513404311686e-05, "loss": 0.098, "step": 32590 }, { "epoch": 1.1846791191220292, "grad_norm": 1.6904805898666382, "learning_rate": 4.8115537357095265e-05, "loss": 0.1626, "step": 32600 }, { "epoch": 1.1850425176248274, "grad_norm": 0.879503607749939, "learning_rate": 4.811356031391259e-05, "loss": 0.1129, "step": 32610 }, { "epoch": 1.1854059161276256, "grad_norm": 2.447317600250244, "learning_rate": 4.811158227484883e-05, "loss": 0.1255, "step": 32620 }, { "epoch": 1.1857693146304238, "grad_norm": 0.9513424038887024, "learning_rate": 4.810960323998922e-05, "loss": 0.1347, "step": 32630 }, { "epoch": 1.186132713133222, "grad_norm": 0.46179428696632385, "learning_rate": 4.810762320941903e-05, "loss": 0.1002, "step": 32640 }, { "epoch": 1.18649611163602, "grad_norm": 0.7595782279968262, "learning_rate": 4.8105642183223585e-05, "loss": 0.1585, "step": 32650 }, { "epoch": 1.1868595101388182, "grad_norm": 1.8892844915390015, "learning_rate": 4.8103660161488216e-05, "loss": 0.1475, "step": 32660 }, { "epoch": 1.1872229086416164, "grad_norm": 3.290606737136841, "learning_rate": 4.810167714429834e-05, "loss": 0.1603, "step": 32670 }, { "epoch": 1.1875863071444146, "grad_norm": 1.3222955465316772, "learning_rate": 4.809969313173939e-05, "loss": 0.1251, "step": 32680 }, { "epoch": 1.1879497056472128, "grad_norm": 1.8568757772445679, "learning_rate": 4.809770812389686e-05, "loss": 0.1517, "step": 32690 }, { "epoch": 1.1883131041500108, "grad_norm": 1.3318365812301636, "learning_rate": 4.8095722120856255e-05, "loss": 0.1778, "step": 32700 }, { "epoch": 1.188676502652809, "grad_norm": 1.513069748878479, "learning_rate": 4.8093735122703164e-05, "loss": 0.1325, "step": 32710 }, { "epoch": 1.1890399011556072, "grad_norm": 0.37486693263053894, "learning_rate": 4.809174712952319e-05, "loss": 0.1482, "step": 32720 }, { "epoch": 1.1894032996584054, "grad_norm": 3.7855522632598877, "learning_rate": 4.8089758141402e-05, "loss": 0.1237, "step": 32730 }, { "epoch": 1.1897666981612036, "grad_norm": 0.6902849674224854, "learning_rate": 4.8087768158425285e-05, "loss": 0.1099, "step": 32740 }, { "epoch": 1.1901300966640018, "grad_norm": 0.6842343211174011, "learning_rate": 4.808577718067878e-05, "loss": 0.17, "step": 32750 }, { "epoch": 1.1904934951668, "grad_norm": 0.9745518565177917, "learning_rate": 4.808378520824829e-05, "loss": 0.1446, "step": 32760 }, { "epoch": 1.190856893669598, "grad_norm": 1.468474268913269, "learning_rate": 4.808179224121962e-05, "loss": 0.1563, "step": 32770 }, { "epoch": 1.1912202921723962, "grad_norm": 1.6509790420532227, "learning_rate": 4.807979827967864e-05, "loss": 0.1416, "step": 32780 }, { "epoch": 1.1915836906751944, "grad_norm": 0.9928446412086487, "learning_rate": 4.8077803323711277e-05, "loss": 1.6808, "step": 32790 }, { "epoch": 1.1919470891779926, "grad_norm": 3.463270425796509, "learning_rate": 4.807580737340348e-05, "loss": 0.1462, "step": 32800 }, { "epoch": 1.1923104876807908, "grad_norm": 1.0357753038406372, "learning_rate": 4.807381042884125e-05, "loss": 0.1475, "step": 32810 }, { "epoch": 1.1926738861835888, "grad_norm": 0.6824864745140076, "learning_rate": 4.807181249011062e-05, "loss": 0.1884, "step": 32820 }, { "epoch": 1.193037284686387, "grad_norm": 8.779791831970215, "learning_rate": 4.8069813557297685e-05, "loss": 0.128, "step": 32830 }, { "epoch": 1.1934006831891852, "grad_norm": 1.07723867893219, "learning_rate": 4.806781363048856e-05, "loss": 0.1433, "step": 32840 }, { "epoch": 1.1937640816919834, "grad_norm": 1.9113037586212158, "learning_rate": 4.806581270976942e-05, "loss": 0.1575, "step": 32850 }, { "epoch": 1.1941274801947817, "grad_norm": 1.2443821430206299, "learning_rate": 4.806381079522648e-05, "loss": 0.1585, "step": 32860 }, { "epoch": 1.1944908786975799, "grad_norm": 0.46389827132225037, "learning_rate": 4.8061807886946e-05, "loss": 0.2578, "step": 32870 }, { "epoch": 1.1948542772003778, "grad_norm": 0.9189543128013611, "learning_rate": 4.8059803985014274e-05, "loss": 0.125, "step": 32880 }, { "epoch": 1.195217675703176, "grad_norm": 0.8623115420341492, "learning_rate": 4.805779908951763e-05, "loss": 0.1094, "step": 32890 }, { "epoch": 1.1955810742059743, "grad_norm": 0.5328871607780457, "learning_rate": 4.805579320054247e-05, "loss": 0.142, "step": 32900 }, { "epoch": 1.1959444727087725, "grad_norm": 0.8360912799835205, "learning_rate": 4.805378631817522e-05, "loss": 0.1465, "step": 32910 }, { "epoch": 1.1963078712115707, "grad_norm": 0.4089026153087616, "learning_rate": 4.805177844250234e-05, "loss": 0.1779, "step": 32920 }, { "epoch": 1.1966712697143689, "grad_norm": 1.4934437274932861, "learning_rate": 4.8049769573610336e-05, "loss": 0.1577, "step": 32930 }, { "epoch": 1.1970346682171669, "grad_norm": 0.991147518157959, "learning_rate": 4.8047759711585784e-05, "loss": 0.1302, "step": 32940 }, { "epoch": 1.197398066719965, "grad_norm": 4.548572540283203, "learning_rate": 4.804574885651526e-05, "loss": 0.1184, "step": 32950 }, { "epoch": 1.1977614652227633, "grad_norm": 1.7906454801559448, "learning_rate": 4.8043737008485424e-05, "loss": 0.138, "step": 32960 }, { "epoch": 1.1981248637255615, "grad_norm": 0.4827491044998169, "learning_rate": 4.804172416758294e-05, "loss": 0.1573, "step": 32970 }, { "epoch": 1.1984882622283597, "grad_norm": 0.8055851459503174, "learning_rate": 4.803971033389455e-05, "loss": 0.1203, "step": 32980 }, { "epoch": 1.1988516607311577, "grad_norm": 0.7492426633834839, "learning_rate": 4.8037695507507016e-05, "loss": 0.1158, "step": 32990 }, { "epoch": 1.1992150592339559, "grad_norm": 0.8737430572509766, "learning_rate": 4.8035679688507154e-05, "loss": 0.1672, "step": 33000 }, { "epoch": 1.1992150592339559, "eval_loss": 0.36745160818099976, "eval_runtime": 180.2517, "eval_samples_per_second": 41.131, "eval_steps_per_second": 5.143, "eval_wer": 0.17665692450124348, "step": 33000 }, { "epoch": 1.199578457736754, "grad_norm": 0.9123022556304932, "learning_rate": 4.803366287698182e-05, "loss": 0.1182, "step": 33010 }, { "epoch": 1.1999418562395523, "grad_norm": 0.5147042870521545, "learning_rate": 4.803164507301789e-05, "loss": 0.1293, "step": 33020 }, { "epoch": 1.2003052547423505, "grad_norm": 2.508376359939575, "learning_rate": 4.8029626276702336e-05, "loss": 0.1518, "step": 33030 }, { "epoch": 1.2006686532451487, "grad_norm": 1.3006081581115723, "learning_rate": 4.802760648812213e-05, "loss": 0.1503, "step": 33040 }, { "epoch": 1.201032051747947, "grad_norm": 1.490337610244751, "learning_rate": 4.802558570736427e-05, "loss": 0.1589, "step": 33050 }, { "epoch": 1.201395450250745, "grad_norm": 0.6895734667778015, "learning_rate": 4.802356393451587e-05, "loss": 0.137, "step": 33060 }, { "epoch": 1.201758848753543, "grad_norm": 0.45895853638648987, "learning_rate": 4.8021541169664006e-05, "loss": 0.2112, "step": 33070 }, { "epoch": 1.2021222472563413, "grad_norm": 1.6609526872634888, "learning_rate": 4.801951741289585e-05, "loss": 0.1392, "step": 33080 }, { "epoch": 1.2024856457591395, "grad_norm": 1.1131823062896729, "learning_rate": 4.801749266429858e-05, "loss": 0.124, "step": 33090 }, { "epoch": 1.2028490442619377, "grad_norm": 0.547478973865509, "learning_rate": 4.8015466923959465e-05, "loss": 0.166, "step": 33100 }, { "epoch": 1.2032124427647357, "grad_norm": 0.778753936290741, "learning_rate": 4.801344019196576e-05, "loss": 0.1414, "step": 33110 }, { "epoch": 1.203575841267534, "grad_norm": 1.1527098417282104, "learning_rate": 4.801141246840481e-05, "loss": 0.1719, "step": 33120 }, { "epoch": 1.2039392397703321, "grad_norm": 0.9628286361694336, "learning_rate": 4.800938375336395e-05, "loss": 0.1168, "step": 33130 }, { "epoch": 1.2043026382731303, "grad_norm": 1.7359286546707153, "learning_rate": 4.8007354046930624e-05, "loss": 0.1145, "step": 33140 }, { "epoch": 1.2046660367759285, "grad_norm": 0.8443882465362549, "learning_rate": 4.8005323349192276e-05, "loss": 0.138, "step": 33150 }, { "epoch": 1.2050294352787267, "grad_norm": 1.166198968887329, "learning_rate": 4.8003291660236396e-05, "loss": 0.164, "step": 33160 }, { "epoch": 1.2053928337815247, "grad_norm": 0.42992278933525085, "learning_rate": 4.800125898015052e-05, "loss": 0.1786, "step": 33170 }, { "epoch": 1.205756232284323, "grad_norm": 0.7348678112030029, "learning_rate": 4.799922530902223e-05, "loss": 0.1175, "step": 33180 }, { "epoch": 1.2061196307871211, "grad_norm": 1.4282450675964355, "learning_rate": 4.799719064693917e-05, "loss": 0.1397, "step": 33190 }, { "epoch": 1.2064830292899194, "grad_norm": 0.9985376596450806, "learning_rate": 4.7995154993988974e-05, "loss": 0.1382, "step": 33200 }, { "epoch": 1.2068464277927176, "grad_norm": 0.7168998718261719, "learning_rate": 4.799311835025937e-05, "loss": 0.1123, "step": 33210 }, { "epoch": 1.2072098262955158, "grad_norm": 0.521123468875885, "learning_rate": 4.799108071583811e-05, "loss": 0.1753, "step": 33220 }, { "epoch": 1.2075732247983137, "grad_norm": 1.0951159000396729, "learning_rate": 4.7989042090812976e-05, "loss": 0.1182, "step": 33230 }, { "epoch": 1.207936623301112, "grad_norm": 1.108727216720581, "learning_rate": 4.798700247527182e-05, "loss": 1.4247, "step": 33240 }, { "epoch": 1.2083000218039102, "grad_norm": 0.4534373879432678, "learning_rate": 4.7984961869302516e-05, "loss": 0.1715, "step": 33250 }, { "epoch": 1.2086634203067084, "grad_norm": 0.6849185824394226, "learning_rate": 4.798292027299298e-05, "loss": 0.1367, "step": 33260 }, { "epoch": 1.2090268188095066, "grad_norm": 0.8563576340675354, "learning_rate": 4.7980877686431195e-05, "loss": 0.2058, "step": 33270 }, { "epoch": 1.2093902173123046, "grad_norm": 0.5488440990447998, "learning_rate": 4.797883410970514e-05, "loss": 0.1246, "step": 33280 }, { "epoch": 1.2097536158151028, "grad_norm": 0.5783109068870544, "learning_rate": 4.7976789542902895e-05, "loss": 0.1135, "step": 33290 }, { "epoch": 1.210117014317901, "grad_norm": 2.218514919281006, "learning_rate": 4.7974743986112536e-05, "loss": 0.8269, "step": 33300 }, { "epoch": 1.2104804128206992, "grad_norm": 1.6320664882659912, "learning_rate": 4.79726974394222e-05, "loss": 0.1185, "step": 33310 }, { "epoch": 1.2108438113234974, "grad_norm": 1.287618637084961, "learning_rate": 4.797064990292007e-05, "loss": 0.1815, "step": 33320 }, { "epoch": 1.2112072098262956, "grad_norm": 2.3232581615448, "learning_rate": 4.796860137669437e-05, "loss": 0.1285, "step": 33330 }, { "epoch": 1.2115706083290938, "grad_norm": 1.2804290056228638, "learning_rate": 4.796655186083335e-05, "loss": 0.1339, "step": 33340 }, { "epoch": 1.2119340068318918, "grad_norm": 0.6492500901222229, "learning_rate": 4.796450135542534e-05, "loss": 0.1278, "step": 33350 }, { "epoch": 1.21229740533469, "grad_norm": 1.7094756364822388, "learning_rate": 4.796244986055867e-05, "loss": 0.1337, "step": 33360 }, { "epoch": 1.2126608038374882, "grad_norm": 1.5763776302337646, "learning_rate": 4.796039737632173e-05, "loss": 0.2283, "step": 33370 }, { "epoch": 1.2130242023402864, "grad_norm": 0.631926417350769, "learning_rate": 4.795834390280296e-05, "loss": 0.2165, "step": 33380 }, { "epoch": 1.2133876008430846, "grad_norm": 1.4329982995986938, "learning_rate": 4.795628944009084e-05, "loss": 0.1255, "step": 33390 }, { "epoch": 1.2137509993458826, "grad_norm": 0.5400133728981018, "learning_rate": 4.795423398827389e-05, "loss": 0.1361, "step": 33400 }, { "epoch": 1.2141143978486808, "grad_norm": 0.8651421070098877, "learning_rate": 4.795217754744067e-05, "loss": 0.1336, "step": 33410 }, { "epoch": 1.214477796351479, "grad_norm": 0.32640397548675537, "learning_rate": 4.795012011767977e-05, "loss": 0.208, "step": 33420 }, { "epoch": 1.2148411948542772, "grad_norm": 2.425781726837158, "learning_rate": 4.794806169907987e-05, "loss": 0.1107, "step": 33430 }, { "epoch": 1.2152045933570754, "grad_norm": 1.9098165035247803, "learning_rate": 4.794600229172963e-05, "loss": 0.1087, "step": 33440 }, { "epoch": 1.2155679918598736, "grad_norm": 1.4842039346694946, "learning_rate": 4.794394189571779e-05, "loss": 0.141, "step": 33450 }, { "epoch": 1.2159313903626716, "grad_norm": 1.6379314661026, "learning_rate": 4.794188051113313e-05, "loss": 0.1382, "step": 33460 }, { "epoch": 1.2162947888654698, "grad_norm": 1.6831467151641846, "learning_rate": 4.7939818138064474e-05, "loss": 0.1564, "step": 33470 }, { "epoch": 1.216658187368268, "grad_norm": 0.4303675889968872, "learning_rate": 4.793775477660067e-05, "loss": 0.1153, "step": 33480 }, { "epoch": 1.2170215858710662, "grad_norm": 0.5871365070343018, "learning_rate": 4.7935690426830624e-05, "loss": 0.1122, "step": 33490 }, { "epoch": 1.2173849843738644, "grad_norm": 0.7488551735877991, "learning_rate": 4.7933625088843287e-05, "loss": 1.8561, "step": 33500 }, { "epoch": 1.2177483828766626, "grad_norm": 1.4515953063964844, "learning_rate": 4.793155876272764e-05, "loss": 0.127, "step": 33510 }, { "epoch": 1.2181117813794606, "grad_norm": 0.9288650155067444, "learning_rate": 4.7929491448572716e-05, "loss": 0.2149, "step": 33520 }, { "epoch": 1.2184751798822588, "grad_norm": 1.544545292854309, "learning_rate": 4.792742314646759e-05, "loss": 0.6921, "step": 33530 }, { "epoch": 1.218838578385057, "grad_norm": 1.1275858879089355, "learning_rate": 4.792535385650138e-05, "loss": 0.1592, "step": 33540 }, { "epoch": 1.2192019768878553, "grad_norm": 0.7861330509185791, "learning_rate": 4.7923283578763236e-05, "loss": 0.156, "step": 33550 }, { "epoch": 1.2195653753906535, "grad_norm": 1.7547698020935059, "learning_rate": 4.792121231334237e-05, "loss": 0.135, "step": 33560 }, { "epoch": 1.2199287738934514, "grad_norm": 0.9989791512489319, "learning_rate": 4.7919140060328014e-05, "loss": 0.2015, "step": 33570 }, { "epoch": 1.2202921723962497, "grad_norm": 0.8089576959609985, "learning_rate": 4.791706681980945e-05, "loss": 2.7874, "step": 33580 }, { "epoch": 1.2206555708990479, "grad_norm": 1.2729178667068481, "learning_rate": 4.791499259187603e-05, "loss": 0.1749, "step": 33590 }, { "epoch": 1.221018969401846, "grad_norm": 1.6203336715698242, "learning_rate": 4.7912917376617106e-05, "loss": 0.1524, "step": 33600 }, { "epoch": 1.221018969401846, "eval_loss": 0.3562403917312622, "eval_runtime": 180.3906, "eval_samples_per_second": 41.1, "eval_steps_per_second": 5.139, "eval_wer": 0.1815582623849547, "step": 33600 }, { "epoch": 1.2213823679046443, "grad_norm": 1.8868520259857178, "learning_rate": 4.7910841174122104e-05, "loss": 0.1514, "step": 33610 }, { "epoch": 1.2217457664074425, "grad_norm": 1.3601691722869873, "learning_rate": 4.7908763984480465e-05, "loss": 0.1675, "step": 33620 }, { "epoch": 1.2221091649102407, "grad_norm": 1.2268040180206299, "learning_rate": 4.790668580778169e-05, "loss": 0.1363, "step": 33630 }, { "epoch": 1.2224725634130387, "grad_norm": 1.918747901916504, "learning_rate": 4.790460664411534e-05, "loss": 0.1397, "step": 33640 }, { "epoch": 1.2228359619158369, "grad_norm": 0.6259877681732178, "learning_rate": 4.790252649357098e-05, "loss": 0.1555, "step": 33650 }, { "epoch": 1.223199360418635, "grad_norm": 2.5940511226654053, "learning_rate": 4.7900445356238235e-05, "loss": 0.1508, "step": 33660 }, { "epoch": 1.2235627589214333, "grad_norm": 1.1692243814468384, "learning_rate": 4.7898363232206785e-05, "loss": 0.1642, "step": 33670 }, { "epoch": 1.2239261574242315, "grad_norm": 1.459763526916504, "learning_rate": 4.789628012156633e-05, "loss": 0.1325, "step": 33680 }, { "epoch": 1.2242895559270295, "grad_norm": 0.4898362159729004, "learning_rate": 4.789419602440663e-05, "loss": 0.1604, "step": 33690 }, { "epoch": 1.2246529544298277, "grad_norm": 1.6771429777145386, "learning_rate": 4.7892110940817495e-05, "loss": 0.217, "step": 33700 }, { "epoch": 1.225016352932626, "grad_norm": 1.0040748119354248, "learning_rate": 4.789002487088874e-05, "loss": 0.1428, "step": 33710 }, { "epoch": 1.225379751435424, "grad_norm": 0.5210689306259155, "learning_rate": 4.788793781471025e-05, "loss": 0.1777, "step": 33720 }, { "epoch": 1.2257431499382223, "grad_norm": 2.0783729553222656, "learning_rate": 4.788584977237196e-05, "loss": 0.1373, "step": 33730 }, { "epoch": 1.2261065484410205, "grad_norm": 0.8238822221755981, "learning_rate": 4.788376074396384e-05, "loss": 0.1246, "step": 33740 }, { "epoch": 1.2264699469438185, "grad_norm": 1.1031908988952637, "learning_rate": 4.7881670729575875e-05, "loss": 0.1488, "step": 33750 }, { "epoch": 1.2268333454466167, "grad_norm": 1.3136149644851685, "learning_rate": 4.787957972929814e-05, "loss": 0.1382, "step": 33760 }, { "epoch": 1.227196743949415, "grad_norm": 0.9418723583221436, "learning_rate": 4.7877487743220726e-05, "loss": 0.1531, "step": 33770 }, { "epoch": 1.2275601424522131, "grad_norm": 1.3498002290725708, "learning_rate": 4.7875394771433755e-05, "loss": 0.1345, "step": 33780 }, { "epoch": 1.2279235409550113, "grad_norm": 1.0489355325698853, "learning_rate": 4.7873300814027415e-05, "loss": 0.1522, "step": 33790 }, { "epoch": 1.2282869394578095, "grad_norm": 1.1034955978393555, "learning_rate": 4.7871205871091926e-05, "loss": 0.1721, "step": 33800 }, { "epoch": 1.2286503379606075, "grad_norm": 1.1162317991256714, "learning_rate": 4.786910994271756e-05, "loss": 0.1774, "step": 33810 }, { "epoch": 1.2290137364634057, "grad_norm": 0.6511724591255188, "learning_rate": 4.786701302899461e-05, "loss": 0.1491, "step": 33820 }, { "epoch": 1.229377134966204, "grad_norm": 0.730034589767456, "learning_rate": 4.786491513001343e-05, "loss": 0.1304, "step": 33830 }, { "epoch": 1.2297405334690021, "grad_norm": 0.3531613051891327, "learning_rate": 4.786281624586441e-05, "loss": 0.1022, "step": 33840 }, { "epoch": 1.2301039319718003, "grad_norm": 0.8404261469841003, "learning_rate": 4.786071637663798e-05, "loss": 0.1366, "step": 33850 }, { "epoch": 1.2304673304745983, "grad_norm": 1.0911661386489868, "learning_rate": 4.785861552242462e-05, "loss": 0.1132, "step": 33860 }, { "epoch": 1.2308307289773965, "grad_norm": 0.9053283333778381, "learning_rate": 4.785651368331485e-05, "loss": 0.1393, "step": 33870 }, { "epoch": 1.2311941274801947, "grad_norm": 1.065520167350769, "learning_rate": 4.7854410859399236e-05, "loss": 0.1277, "step": 33880 }, { "epoch": 1.231557525982993, "grad_norm": 0.3727855384349823, "learning_rate": 4.785230705076837e-05, "loss": 0.1213, "step": 33890 }, { "epoch": 1.2319209244857912, "grad_norm": 1.7203010320663452, "learning_rate": 4.78502022575129e-05, "loss": 0.1735, "step": 33900 }, { "epoch": 1.2322843229885894, "grad_norm": 0.7186889052391052, "learning_rate": 4.7848096479723516e-05, "loss": 0.1195, "step": 33910 }, { "epoch": 1.2326477214913876, "grad_norm": 1.0675809383392334, "learning_rate": 4.784598971749095e-05, "loss": 0.1699, "step": 33920 }, { "epoch": 1.2330111199941856, "grad_norm": 1.6882377862930298, "learning_rate": 4.784388197090597e-05, "loss": 0.1275, "step": 33930 }, { "epoch": 1.2333745184969838, "grad_norm": 0.4500318765640259, "learning_rate": 4.78417732400594e-05, "loss": 0.1248, "step": 33940 }, { "epoch": 1.233737916999782, "grad_norm": 1.0862751007080078, "learning_rate": 4.783966352504209e-05, "loss": 0.1585, "step": 33950 }, { "epoch": 1.2341013155025802, "grad_norm": 0.9130736589431763, "learning_rate": 4.7837552825944943e-05, "loss": 0.1489, "step": 33960 }, { "epoch": 1.2344647140053784, "grad_norm": 0.47646433115005493, "learning_rate": 4.783544114285891e-05, "loss": 0.135, "step": 33970 }, { "epoch": 1.2348281125081764, "grad_norm": 0.7090937495231628, "learning_rate": 4.783332847587495e-05, "loss": 0.1231, "step": 33980 }, { "epoch": 1.2351915110109746, "grad_norm": 2.1009280681610107, "learning_rate": 4.7831214825084117e-05, "loss": 0.1239, "step": 33990 }, { "epoch": 1.2355549095137728, "grad_norm": 0.6040928363800049, "learning_rate": 4.782910019057747e-05, "loss": 0.1757, "step": 34000 }, { "epoch": 1.235918308016571, "grad_norm": 3.8224098682403564, "learning_rate": 4.782698457244612e-05, "loss": 0.1201, "step": 34010 }, { "epoch": 1.2362817065193692, "grad_norm": 0.4506910741329193, "learning_rate": 4.782486797078122e-05, "loss": 0.1381, "step": 34020 }, { "epoch": 1.2366451050221674, "grad_norm": 1.0445079803466797, "learning_rate": 4.782275038567398e-05, "loss": 0.1386, "step": 34030 }, { "epoch": 1.2370085035249654, "grad_norm": 1.1579469442367554, "learning_rate": 4.7820631817215625e-05, "loss": 0.1221, "step": 34040 }, { "epoch": 1.2373719020277636, "grad_norm": 1.023468017578125, "learning_rate": 4.781851226549743e-05, "loss": 0.1524, "step": 34050 }, { "epoch": 1.2377353005305618, "grad_norm": 1.0542868375778198, "learning_rate": 4.781639173061074e-05, "loss": 0.1268, "step": 34060 }, { "epoch": 1.23809869903336, "grad_norm": 0.7573347687721252, "learning_rate": 4.7814270212646915e-05, "loss": 0.2058, "step": 34070 }, { "epoch": 1.2384620975361582, "grad_norm": 1.2218323945999146, "learning_rate": 4.781214771169736e-05, "loss": 0.1141, "step": 34080 }, { "epoch": 1.2388254960389564, "grad_norm": 0.7725077867507935, "learning_rate": 4.781002422785352e-05, "loss": 0.1221, "step": 34090 }, { "epoch": 1.2391888945417544, "grad_norm": 2.2234578132629395, "learning_rate": 4.78078997612069e-05, "loss": 1.9034, "step": 34100 }, { "epoch": 1.2395522930445526, "grad_norm": 1.260764718055725, "learning_rate": 4.780577431184902e-05, "loss": 0.1205, "step": 34110 }, { "epoch": 1.2399156915473508, "grad_norm": 0.5173097252845764, "learning_rate": 4.780364787987148e-05, "loss": 0.2101, "step": 34120 }, { "epoch": 1.240279090050149, "grad_norm": 0.9755317568778992, "learning_rate": 4.780152046536588e-05, "loss": 0.1041, "step": 34130 }, { "epoch": 1.2406424885529472, "grad_norm": 1.4319573640823364, "learning_rate": 4.77993920684239e-05, "loss": 0.1382, "step": 34140 }, { "epoch": 1.2410058870557452, "grad_norm": 0.8623887896537781, "learning_rate": 4.7797262689137224e-05, "loss": 0.1646, "step": 34150 }, { "epoch": 1.2413692855585434, "grad_norm": 1.1775789260864258, "learning_rate": 4.779513232759762e-05, "loss": 0.124, "step": 34160 }, { "epoch": 1.2417326840613416, "grad_norm": 23.601593017578125, "learning_rate": 4.779300098389687e-05, "loss": 0.304, "step": 34170 }, { "epoch": 1.2420960825641398, "grad_norm": 0.9336787462234497, "learning_rate": 4.77908686581268e-05, "loss": 0.1376, "step": 34180 }, { "epoch": 1.242459481066938, "grad_norm": 0.7417952418327332, "learning_rate": 4.77887353503793e-05, "loss": 0.1208, "step": 34190 }, { "epoch": 1.2428228795697362, "grad_norm": 1.48567795753479, "learning_rate": 4.778660106074626e-05, "loss": 0.1198, "step": 34200 }, { "epoch": 1.2428228795697362, "eval_loss": 0.3608033359050751, "eval_runtime": 180.8757, "eval_samples_per_second": 40.989, "eval_steps_per_second": 5.125, "eval_wer": 0.18227530996423838, "step": 34200 }, { "epoch": 1.2431862780725345, "grad_norm": 4.077025890350342, "learning_rate": 4.778446578931967e-05, "loss": 0.13, "step": 34210 }, { "epoch": 1.2435496765753324, "grad_norm": 1.267830729484558, "learning_rate": 4.7782329536191504e-05, "loss": 0.183, "step": 34220 }, { "epoch": 1.2439130750781306, "grad_norm": 0.9263830780982971, "learning_rate": 4.778019230145383e-05, "loss": 0.135, "step": 34230 }, { "epoch": 1.2442764735809289, "grad_norm": 1.3920031785964966, "learning_rate": 4.777805408519872e-05, "loss": 0.1485, "step": 34240 }, { "epoch": 1.244639872083727, "grad_norm": 1.263641357421875, "learning_rate": 4.7775914887518306e-05, "loss": 0.1503, "step": 34250 }, { "epoch": 1.2450032705865253, "grad_norm": 1.514445185661316, "learning_rate": 4.777377470850475e-05, "loss": 0.1604, "step": 34260 }, { "epoch": 1.2453666690893233, "grad_norm": 0.9733619093894958, "learning_rate": 4.7771633548250266e-05, "loss": 0.1674, "step": 34270 }, { "epoch": 1.2457300675921215, "grad_norm": 1.6468124389648438, "learning_rate": 4.776949140684712e-05, "loss": 0.1229, "step": 34280 }, { "epoch": 1.2460934660949197, "grad_norm": 0.9954056739807129, "learning_rate": 4.77673482843876e-05, "loss": 0.1237, "step": 34290 }, { "epoch": 1.2464568645977179, "grad_norm": 1.7785327434539795, "learning_rate": 4.776520418096406e-05, "loss": 0.1784, "step": 34300 }, { "epoch": 1.246820263100516, "grad_norm": 1.952333688735962, "learning_rate": 4.776305909666886e-05, "loss": 0.1355, "step": 34310 }, { "epoch": 1.2471836616033143, "grad_norm": 0.7019221782684326, "learning_rate": 4.7760913031594445e-05, "loss": 0.1856, "step": 34320 }, { "epoch": 1.2475470601061123, "grad_norm": 2.3900887966156006, "learning_rate": 4.775876598583327e-05, "loss": 0.1279, "step": 34330 }, { "epoch": 1.2479104586089105, "grad_norm": 2.4521565437316895, "learning_rate": 4.7756617959477834e-05, "loss": 0.1384, "step": 34340 }, { "epoch": 1.2482738571117087, "grad_norm": 1.043819546699524, "learning_rate": 4.7754468952620704e-05, "loss": 0.1485, "step": 34350 }, { "epoch": 1.248637255614507, "grad_norm": 2.2905571460723877, "learning_rate": 4.775231896535446e-05, "loss": 0.1342, "step": 34360 }, { "epoch": 1.249000654117305, "grad_norm": 1.3930597305297852, "learning_rate": 4.7750167997771756e-05, "loss": 0.1989, "step": 34370 }, { "epoch": 1.2493640526201033, "grad_norm": 1.1254252195358276, "learning_rate": 4.7748016049965255e-05, "loss": 0.113, "step": 34380 }, { "epoch": 1.2497274511229013, "grad_norm": 0.8257030248641968, "learning_rate": 4.774586312202768e-05, "loss": 0.1212, "step": 34390 }, { "epoch": 1.2500908496256995, "grad_norm": 1.2986866235733032, "learning_rate": 4.774370921405179e-05, "loss": 0.1446, "step": 34400 }, { "epoch": 1.2504542481284977, "grad_norm": 2.2006325721740723, "learning_rate": 4.77415543261304e-05, "loss": 0.1567, "step": 34410 }, { "epoch": 1.250817646631296, "grad_norm": 0.6778092384338379, "learning_rate": 4.7739398458356335e-05, "loss": 0.1768, "step": 34420 }, { "epoch": 1.2511810451340941, "grad_norm": 1.152696132659912, "learning_rate": 4.773724161082251e-05, "loss": 0.1024, "step": 34430 }, { "epoch": 1.251544443636892, "grad_norm": 2.375783681869507, "learning_rate": 4.7735083783621835e-05, "loss": 0.2642, "step": 34440 }, { "epoch": 1.2519078421396903, "grad_norm": 1.3765895366668701, "learning_rate": 4.77329249768473e-05, "loss": 0.1861, "step": 34450 }, { "epoch": 1.2522712406424885, "grad_norm": 1.7743607759475708, "learning_rate": 4.773076519059191e-05, "loss": 0.1429, "step": 34460 }, { "epoch": 1.2526346391452867, "grad_norm": 1.4731152057647705, "learning_rate": 4.772860442494872e-05, "loss": 0.1508, "step": 34470 }, { "epoch": 1.252998037648085, "grad_norm": 1.5612653493881226, "learning_rate": 4.7726442680010836e-05, "loss": 0.1291, "step": 34480 }, { "epoch": 1.2533614361508831, "grad_norm": 1.9972872734069824, "learning_rate": 4.77242799558714e-05, "loss": 0.1154, "step": 34490 }, { "epoch": 1.2537248346536813, "grad_norm": 0.7144235372543335, "learning_rate": 4.772211625262359e-05, "loss": 0.1793, "step": 34500 }, { "epoch": 1.2540882331564793, "grad_norm": 10.059864044189453, "learning_rate": 4.7719951570360636e-05, "loss": 0.1346, "step": 34510 }, { "epoch": 1.2544516316592775, "grad_norm": 1.0801091194152832, "learning_rate": 4.771778590917581e-05, "loss": 0.1918, "step": 34520 }, { "epoch": 1.2548150301620757, "grad_norm": 2.0628061294555664, "learning_rate": 4.771561926916242e-05, "loss": 0.1477, "step": 34530 }, { "epoch": 1.255178428664874, "grad_norm": 2.5143215656280518, "learning_rate": 4.771345165041381e-05, "loss": 0.1226, "step": 34540 }, { "epoch": 1.2555418271676722, "grad_norm": 1.197352409362793, "learning_rate": 4.7711283053023394e-05, "loss": 0.152, "step": 34550 }, { "epoch": 1.2559052256704701, "grad_norm": 0.9427943825721741, "learning_rate": 4.7709113477084595e-05, "loss": 0.1359, "step": 34560 }, { "epoch": 1.2562686241732683, "grad_norm": 1.0930500030517578, "learning_rate": 4.770694292269089e-05, "loss": 0.1659, "step": 34570 }, { "epoch": 1.2566320226760666, "grad_norm": 0.7914316654205322, "learning_rate": 4.770477138993581e-05, "loss": 0.1224, "step": 34580 }, { "epoch": 1.2569954211788648, "grad_norm": 0.6064370274543762, "learning_rate": 4.770259887891292e-05, "loss": 0.1153, "step": 34590 }, { "epoch": 1.257358819681663, "grad_norm": 0.8653318285942078, "learning_rate": 4.770042538971581e-05, "loss": 0.1715, "step": 34600 }, { "epoch": 1.257722218184461, "grad_norm": 0.5470715761184692, "learning_rate": 4.7698250922438145e-05, "loss": 0.1447, "step": 34610 }, { "epoch": 1.2580856166872594, "grad_norm": 0.8058337569236755, "learning_rate": 4.769607547717361e-05, "loss": 0.1742, "step": 34620 }, { "epoch": 1.2584490151900574, "grad_norm": 2.5231611728668213, "learning_rate": 4.7693899054015926e-05, "loss": 1.9069, "step": 34630 }, { "epoch": 1.2588124136928556, "grad_norm": 0.603464663028717, "learning_rate": 4.7691721653058886e-05, "loss": 0.1244, "step": 34640 }, { "epoch": 1.2591758121956538, "grad_norm": 0.7844828963279724, "learning_rate": 4.76895432743963e-05, "loss": 0.3787, "step": 34650 }, { "epoch": 1.259539210698452, "grad_norm": 0.7887173295021057, "learning_rate": 4.7687363918122016e-05, "loss": 0.1268, "step": 34660 }, { "epoch": 1.2599026092012502, "grad_norm": 0.669452965259552, "learning_rate": 4.768518358432994e-05, "loss": 0.1572, "step": 34670 }, { "epoch": 1.2602660077040482, "grad_norm": 1.193303108215332, "learning_rate": 4.768300227311403e-05, "loss": 0.5754, "step": 34680 }, { "epoch": 1.2606294062068464, "grad_norm": 0.8210042715072632, "learning_rate": 4.7680819984568246e-05, "loss": 0.1372, "step": 34690 }, { "epoch": 1.2609928047096446, "grad_norm": 2.98244309425354, "learning_rate": 4.767863671878663e-05, "loss": 0.2028, "step": 34700 }, { "epoch": 1.2613562032124428, "grad_norm": 1.3739604949951172, "learning_rate": 4.767645247586325e-05, "loss": 0.164, "step": 34710 }, { "epoch": 1.261719601715241, "grad_norm": 0.6770296096801758, "learning_rate": 4.7674267255892226e-05, "loss": 0.2234, "step": 34720 }, { "epoch": 1.262083000218039, "grad_norm": 0.5827689170837402, "learning_rate": 4.767208105896769e-05, "loss": 0.1405, "step": 34730 }, { "epoch": 1.2624463987208372, "grad_norm": 0.7818326354026794, "learning_rate": 4.766989388518385e-05, "loss": 0.1194, "step": 34740 }, { "epoch": 1.2628097972236354, "grad_norm": 0.8514626026153564, "learning_rate": 4.7667705734634946e-05, "loss": 0.1529, "step": 34750 }, { "epoch": 1.2631731957264336, "grad_norm": 0.7973842024803162, "learning_rate": 4.766551660741525e-05, "loss": 0.1224, "step": 34760 }, { "epoch": 1.2635365942292318, "grad_norm": 1.019089937210083, "learning_rate": 4.766332650361909e-05, "loss": 0.1768, "step": 34770 }, { "epoch": 1.26389999273203, "grad_norm": 1.0458087921142578, "learning_rate": 4.766113542334082e-05, "loss": 0.1382, "step": 34780 }, { "epoch": 1.2642633912348282, "grad_norm": 1.0272470712661743, "learning_rate": 4.765894336667486e-05, "loss": 0.1272, "step": 34790 }, { "epoch": 1.2646267897376262, "grad_norm": 2.0589025020599365, "learning_rate": 4.765675033371565e-05, "loss": 0.1682, "step": 34800 }, { "epoch": 1.2646267897376262, "eval_loss": 0.3476085662841797, "eval_runtime": 181.248, "eval_samples_per_second": 40.905, "eval_steps_per_second": 5.115, "eval_wer": 0.17651169967505945, "step": 34800 }, { "epoch": 1.2649901882404244, "grad_norm": 1.1303410530090332, "learning_rate": 4.7654556324557685e-05, "loss": 0.1348, "step": 34810 }, { "epoch": 1.2653535867432226, "grad_norm": 0.799231231212616, "learning_rate": 4.765236133929549e-05, "loss": 0.1645, "step": 34820 }, { "epoch": 1.2657169852460208, "grad_norm": 1.2402738332748413, "learning_rate": 4.765016537802364e-05, "loss": 0.1235, "step": 34830 }, { "epoch": 1.266080383748819, "grad_norm": 4.433220386505127, "learning_rate": 4.7647968440836753e-05, "loss": 0.1706, "step": 34840 }, { "epoch": 1.266443782251617, "grad_norm": 0.8201845288276672, "learning_rate": 4.764577052782949e-05, "loss": 0.1704, "step": 34850 }, { "epoch": 1.2668071807544152, "grad_norm": 1.2809802293777466, "learning_rate": 4.764357163909655e-05, "loss": 0.1368, "step": 34860 }, { "epoch": 1.2671705792572134, "grad_norm": 0.7995765209197998, "learning_rate": 4.7641371774732676e-05, "loss": 0.1773, "step": 34870 }, { "epoch": 1.2675339777600116, "grad_norm": 1.1338168382644653, "learning_rate": 4.763917093483264e-05, "loss": 0.138, "step": 34880 }, { "epoch": 1.2678973762628098, "grad_norm": 0.85684734582901, "learning_rate": 4.763696911949129e-05, "loss": 0.1387, "step": 34890 }, { "epoch": 1.2682607747656078, "grad_norm": 1.212156057357788, "learning_rate": 4.763476632880348e-05, "loss": 0.1377, "step": 34900 }, { "epoch": 1.2686241732684063, "grad_norm": 2.2248573303222656, "learning_rate": 4.7632562562864125e-05, "loss": 0.1295, "step": 34910 }, { "epoch": 1.2689875717712042, "grad_norm": 1.2567734718322754, "learning_rate": 4.763035782176818e-05, "loss": 0.2109, "step": 34920 }, { "epoch": 1.2693509702740025, "grad_norm": 0.9226292967796326, "learning_rate": 4.7628152105610624e-05, "loss": 0.1315, "step": 34930 }, { "epoch": 1.2697143687768007, "grad_norm": 0.9735257029533386, "learning_rate": 4.762594541448651e-05, "loss": 0.1139, "step": 34940 }, { "epoch": 1.2700777672795989, "grad_norm": 3.8411102294921875, "learning_rate": 4.7623737748490914e-05, "loss": 0.5175, "step": 34950 }, { "epoch": 1.270441165782397, "grad_norm": 1.0780479907989502, "learning_rate": 4.762152910771895e-05, "loss": 0.1226, "step": 34960 }, { "epoch": 1.270804564285195, "grad_norm": 0.7885404229164124, "learning_rate": 4.761931949226579e-05, "loss": 0.1821, "step": 34970 }, { "epoch": 1.2711679627879933, "grad_norm": 3.387125015258789, "learning_rate": 4.761710890222663e-05, "loss": 0.1608, "step": 34980 }, { "epoch": 1.2715313612907915, "grad_norm": 0.9549399614334106, "learning_rate": 4.761489733769672e-05, "loss": 0.1071, "step": 34990 }, { "epoch": 1.2718947597935897, "grad_norm": 1.3453798294067383, "learning_rate": 4.761268479877134e-05, "loss": 0.1466, "step": 35000 }, { "epoch": 1.2722581582963879, "grad_norm": 0.9733071327209473, "learning_rate": 4.761047128554584e-05, "loss": 0.1646, "step": 35010 }, { "epoch": 1.2726215567991859, "grad_norm": 0.5874946713447571, "learning_rate": 4.760825679811557e-05, "loss": 0.2291, "step": 35020 }, { "epoch": 1.272984955301984, "grad_norm": 1.9506993293762207, "learning_rate": 4.7606041336575965e-05, "loss": 0.2256, "step": 35030 }, { "epoch": 1.2733483538047823, "grad_norm": 1.2343640327453613, "learning_rate": 4.760382490102247e-05, "loss": 0.133, "step": 35040 }, { "epoch": 1.2737117523075805, "grad_norm": 0.38314980268478394, "learning_rate": 4.7601607491550574e-05, "loss": 0.1682, "step": 35050 }, { "epoch": 1.2740751508103787, "grad_norm": 0.928424596786499, "learning_rate": 4.7599389108255846e-05, "loss": 0.0998, "step": 35060 }, { "epoch": 1.274438549313177, "grad_norm": 1.730793833732605, "learning_rate": 4.7597169751233833e-05, "loss": 0.1682, "step": 35070 }, { "epoch": 1.2748019478159751, "grad_norm": 1.0121824741363525, "learning_rate": 4.7594949420580184e-05, "loss": 0.1093, "step": 35080 }, { "epoch": 1.275165346318773, "grad_norm": 1.4093934297561646, "learning_rate": 4.759272811639055e-05, "loss": 0.156, "step": 35090 }, { "epoch": 1.2755287448215713, "grad_norm": 0.9377339482307434, "learning_rate": 4.759050583876066e-05, "loss": 0.1577, "step": 35100 }, { "epoch": 1.2758921433243695, "grad_norm": 1.6535552740097046, "learning_rate": 4.7588282587786246e-05, "loss": 0.1117, "step": 35110 }, { "epoch": 1.2762555418271677, "grad_norm": 0.7115573287010193, "learning_rate": 4.75860583635631e-05, "loss": 0.1443, "step": 35120 }, { "epoch": 1.276618940329966, "grad_norm": 1.6995899677276611, "learning_rate": 4.7583833166187065e-05, "loss": 0.126, "step": 35130 }, { "epoch": 1.276982338832764, "grad_norm": 1.9350817203521729, "learning_rate": 4.7581606995754005e-05, "loss": 0.1224, "step": 35140 }, { "epoch": 1.2773457373355621, "grad_norm": 1.8662594556808472, "learning_rate": 4.757937985235985e-05, "loss": 0.1735, "step": 35150 }, { "epoch": 1.2777091358383603, "grad_norm": 3.6977062225341797, "learning_rate": 4.7577151736100554e-05, "loss": 0.162, "step": 35160 }, { "epoch": 1.2780725343411585, "grad_norm": 0.5185838341712952, "learning_rate": 4.757492264707213e-05, "loss": 0.1593, "step": 35170 }, { "epoch": 1.2784359328439567, "grad_norm": 0.6665944457054138, "learning_rate": 4.7572692585370596e-05, "loss": 0.1144, "step": 35180 }, { "epoch": 1.2787993313467547, "grad_norm": 1.1617207527160645, "learning_rate": 4.757046155109206e-05, "loss": 0.1162, "step": 35190 }, { "epoch": 1.2791627298495531, "grad_norm": 1.3124502897262573, "learning_rate": 4.756822954433264e-05, "loss": 0.1441, "step": 35200 }, { "epoch": 1.2795261283523511, "grad_norm": 1.4907313585281372, "learning_rate": 4.756599656518851e-05, "loss": 0.1275, "step": 35210 }, { "epoch": 1.2798895268551493, "grad_norm": 1.0033677816390991, "learning_rate": 4.756376261375587e-05, "loss": 0.1795, "step": 35220 }, { "epoch": 1.2802529253579475, "grad_norm": 0.9439616799354553, "learning_rate": 4.756152769013099e-05, "loss": 0.1158, "step": 35230 }, { "epoch": 1.2806163238607458, "grad_norm": 2.641103506088257, "learning_rate": 4.755929179441016e-05, "loss": 0.1102, "step": 35240 }, { "epoch": 1.280979722363544, "grad_norm": 1.1864644289016724, "learning_rate": 4.7557054926689694e-05, "loss": 0.136, "step": 35250 }, { "epoch": 1.281343120866342, "grad_norm": 0.7801216244697571, "learning_rate": 4.755481708706601e-05, "loss": 0.1436, "step": 35260 }, { "epoch": 1.2817065193691402, "grad_norm": 0.5771633386611938, "learning_rate": 4.7552578275635494e-05, "loss": 0.1687, "step": 35270 }, { "epoch": 1.2820699178719384, "grad_norm": 1.247269630432129, "learning_rate": 4.755033849249463e-05, "loss": 0.1168, "step": 35280 }, { "epoch": 1.2824333163747366, "grad_norm": 1.3845196962356567, "learning_rate": 4.7548097737739905e-05, "loss": 0.1122, "step": 35290 }, { "epoch": 1.2827967148775348, "grad_norm": 1.6793280839920044, "learning_rate": 4.754585601146788e-05, "loss": 0.168, "step": 35300 }, { "epoch": 1.2831601133803328, "grad_norm": 0.8970069289207458, "learning_rate": 4.754361331377514e-05, "loss": 0.1172, "step": 35310 }, { "epoch": 1.283523511883131, "grad_norm": 0.6038461923599243, "learning_rate": 4.7541369644758315e-05, "loss": 0.1962, "step": 35320 }, { "epoch": 1.2838869103859292, "grad_norm": 0.9473531246185303, "learning_rate": 4.753912500451407e-05, "loss": 0.1288, "step": 35330 }, { "epoch": 1.2842503088887274, "grad_norm": 1.5339337587356567, "learning_rate": 4.753687939313912e-05, "loss": 0.1644, "step": 35340 }, { "epoch": 1.2846137073915256, "grad_norm": 0.9454206228256226, "learning_rate": 4.753463281073023e-05, "loss": 0.1319, "step": 35350 }, { "epoch": 1.2849771058943238, "grad_norm": 4.068907260894775, "learning_rate": 4.753238525738419e-05, "loss": 0.1241, "step": 35360 }, { "epoch": 1.285340504397122, "grad_norm": 1.1616491079330444, "learning_rate": 4.753013673319784e-05, "loss": 0.2266, "step": 35370 }, { "epoch": 1.28570390289992, "grad_norm": 1.232142448425293, "learning_rate": 4.7527887238268065e-05, "loss": 0.1438, "step": 35380 }, { "epoch": 1.2860673014027182, "grad_norm": 0.5053390860557556, "learning_rate": 4.7525636772691775e-05, "loss": 0.1214, "step": 35390 }, { "epoch": 1.2864306999055164, "grad_norm": 0.9241679310798645, "learning_rate": 4.752338533656594e-05, "loss": 0.3857, "step": 35400 }, { "epoch": 1.2864306999055164, "eval_loss": 0.33814677596092224, "eval_runtime": 180.0316, "eval_samples_per_second": 41.182, "eval_steps_per_second": 5.149, "eval_wer": 0.1815401092816817, "step": 35400 }, { "epoch": 1.2867940984083146, "grad_norm": 1.1576555967330933, "learning_rate": 4.7521132929987575e-05, "loss": 0.1429, "step": 35410 }, { "epoch": 1.2871574969111128, "grad_norm": 0.4765828549861908, "learning_rate": 4.751887955305372e-05, "loss": 0.1976, "step": 35420 }, { "epoch": 1.2875208954139108, "grad_norm": 0.8831065893173218, "learning_rate": 4.751662520586148e-05, "loss": 0.1543, "step": 35430 }, { "epoch": 1.287884293916709, "grad_norm": 1.9074327945709229, "learning_rate": 4.751436988850796e-05, "loss": 0.133, "step": 35440 }, { "epoch": 1.2882476924195072, "grad_norm": 1.6553431749343872, "learning_rate": 4.7512113601090356e-05, "loss": 0.1731, "step": 35450 }, { "epoch": 1.2886110909223054, "grad_norm": 1.2409085035324097, "learning_rate": 4.750985634370587e-05, "loss": 0.119, "step": 35460 }, { "epoch": 1.2889744894251036, "grad_norm": 4.575315475463867, "learning_rate": 4.7507598116451763e-05, "loss": 0.2287, "step": 35470 }, { "epoch": 1.2893378879279016, "grad_norm": 1.1517298221588135, "learning_rate": 4.7505338919425334e-05, "loss": 0.1263, "step": 35480 }, { "epoch": 1.2897012864307, "grad_norm": 0.8862209320068359, "learning_rate": 4.7503078752723935e-05, "loss": 0.9246, "step": 35490 }, { "epoch": 1.290064684933498, "grad_norm": 2.2056024074554443, "learning_rate": 4.750081761644493e-05, "loss": 0.1316, "step": 35500 }, { "epoch": 1.2904280834362962, "grad_norm": 1.9249043464660645, "learning_rate": 4.749855551068576e-05, "loss": 0.1341, "step": 35510 }, { "epoch": 1.2907914819390944, "grad_norm": 0.4050438404083252, "learning_rate": 4.749629243554387e-05, "loss": 0.1876, "step": 35520 }, { "epoch": 1.2911548804418926, "grad_norm": 0.8166261911392212, "learning_rate": 4.74940283911168e-05, "loss": 0.1141, "step": 35530 }, { "epoch": 1.2915182789446908, "grad_norm": 0.4988127648830414, "learning_rate": 4.749176337750206e-05, "loss": 0.1548, "step": 35540 }, { "epoch": 1.2918816774474888, "grad_norm": 1.80185067653656, "learning_rate": 4.748949739479728e-05, "loss": 0.1658, "step": 35550 }, { "epoch": 1.292245075950287, "grad_norm": 0.9549736380577087, "learning_rate": 4.748723044310006e-05, "loss": 0.1373, "step": 35560 }, { "epoch": 1.2926084744530852, "grad_norm": 1.3219162225723267, "learning_rate": 4.74849625225081e-05, "loss": 0.2101, "step": 35570 }, { "epoch": 1.2929718729558835, "grad_norm": 1.4803717136383057, "learning_rate": 4.74826936331191e-05, "loss": 0.121, "step": 35580 }, { "epoch": 1.2933352714586817, "grad_norm": 2.6438705921173096, "learning_rate": 4.7480423775030834e-05, "loss": 0.13, "step": 35590 }, { "epoch": 1.2936986699614796, "grad_norm": 2.5969061851501465, "learning_rate": 4.7478152948341094e-05, "loss": 0.165, "step": 35600 }, { "epoch": 1.2940620684642778, "grad_norm": 0.6594710946083069, "learning_rate": 4.74758811531477e-05, "loss": 0.3067, "step": 35610 }, { "epoch": 1.294425466967076, "grad_norm": 1.1262328624725342, "learning_rate": 4.747360838954858e-05, "loss": 0.1782, "step": 35620 }, { "epoch": 1.2947888654698743, "grad_norm": 0.9950854182243347, "learning_rate": 4.747133465764163e-05, "loss": 0.1254, "step": 35630 }, { "epoch": 1.2951522639726725, "grad_norm": 0.7515049576759338, "learning_rate": 4.746905995752482e-05, "loss": 0.144, "step": 35640 }, { "epoch": 1.2955156624754707, "grad_norm": 0.9141899347305298, "learning_rate": 4.746678428929616e-05, "loss": 0.1263, "step": 35650 }, { "epoch": 1.2958790609782689, "grad_norm": 1.5138301849365234, "learning_rate": 4.74645076530537e-05, "loss": 0.143, "step": 35660 }, { "epoch": 1.2962424594810669, "grad_norm": 0.6234374046325684, "learning_rate": 4.746223004889554e-05, "loss": 0.1492, "step": 35670 }, { "epoch": 1.296605857983865, "grad_norm": 0.6530427932739258, "learning_rate": 4.745995147691981e-05, "loss": 0.1579, "step": 35680 }, { "epoch": 1.2969692564866633, "grad_norm": 0.9193394780158997, "learning_rate": 4.745767193722468e-05, "loss": 0.141, "step": 35690 }, { "epoch": 1.2973326549894615, "grad_norm": 0.8602085709571838, "learning_rate": 4.745539142990837e-05, "loss": 0.1302, "step": 35700 }, { "epoch": 1.2976960534922597, "grad_norm": 0.7672144174575806, "learning_rate": 4.745310995506914e-05, "loss": 0.1632, "step": 35710 }, { "epoch": 1.2980594519950577, "grad_norm": 0.728992760181427, "learning_rate": 4.74508275128053e-05, "loss": 0.161, "step": 35720 }, { "epoch": 1.2984228504978559, "grad_norm": 0.923298716545105, "learning_rate": 4.7448544103215164e-05, "loss": 0.125, "step": 35730 }, { "epoch": 1.298786249000654, "grad_norm": 0.6956040859222412, "learning_rate": 4.744625972639715e-05, "loss": 0.1071, "step": 35740 }, { "epoch": 1.2991496475034523, "grad_norm": 0.7756535410881042, "learning_rate": 4.7443974382449664e-05, "loss": 0.1452, "step": 35750 }, { "epoch": 1.2995130460062505, "grad_norm": 1.3024572134017944, "learning_rate": 4.7441688071471174e-05, "loss": 0.1328, "step": 35760 }, { "epoch": 1.2998764445090485, "grad_norm": 1.8605810403823853, "learning_rate": 4.7439400793560196e-05, "loss": 0.2063, "step": 35770 }, { "epoch": 1.300239843011847, "grad_norm": 1.6558598279953003, "learning_rate": 4.743711254881528e-05, "loss": 0.1282, "step": 35780 }, { "epoch": 1.300603241514645, "grad_norm": 0.8223969340324402, "learning_rate": 4.743482333733501e-05, "loss": 0.1348, "step": 35790 }, { "epoch": 1.300966640017443, "grad_norm": 1.5702069997787476, "learning_rate": 4.743253315921803e-05, "loss": 0.1656, "step": 35800 }, { "epoch": 1.3013300385202413, "grad_norm": 0.6384185552597046, "learning_rate": 4.743024201456301e-05, "loss": 0.1369, "step": 35810 }, { "epoch": 1.3016934370230395, "grad_norm": 1.1003926992416382, "learning_rate": 4.7427949903468667e-05, "loss": 0.1606, "step": 35820 }, { "epoch": 1.3020568355258377, "grad_norm": 1.1869399547576904, "learning_rate": 4.742565682603376e-05, "loss": 0.131, "step": 35830 }, { "epoch": 1.3024202340286357, "grad_norm": 1.0088342428207397, "learning_rate": 4.7423362782357096e-05, "loss": 0.1183, "step": 35840 }, { "epoch": 1.302783632531434, "grad_norm": 0.8730582594871521, "learning_rate": 4.7421067772537506e-05, "loss": 0.1228, "step": 35850 }, { "epoch": 1.3031470310342321, "grad_norm": 1.3678339719772339, "learning_rate": 4.7418771796673886e-05, "loss": 0.119, "step": 35860 }, { "epoch": 1.3035104295370303, "grad_norm": 0.5349250435829163, "learning_rate": 4.7416474854865154e-05, "loss": 0.1571, "step": 35870 }, { "epoch": 1.3038738280398285, "grad_norm": 1.4240535497665405, "learning_rate": 4.741417694721028e-05, "loss": 0.1221, "step": 35880 }, { "epoch": 1.3042372265426265, "grad_norm": 2.4048521518707275, "learning_rate": 4.741187807380827e-05, "loss": 0.1265, "step": 35890 }, { "epoch": 1.3046006250454247, "grad_norm": 0.9620640277862549, "learning_rate": 4.740957823475818e-05, "loss": 0.1601, "step": 35900 }, { "epoch": 1.304964023548223, "grad_norm": 2.351884603500366, "learning_rate": 4.740727743015909e-05, "loss": 0.1134, "step": 35910 }, { "epoch": 1.3053274220510211, "grad_norm": 0.6285625100135803, "learning_rate": 4.7404975660110146e-05, "loss": 0.4726, "step": 35920 }, { "epoch": 1.3056908205538194, "grad_norm": 0.6645105481147766, "learning_rate": 4.740267292471051e-05, "loss": 0.1163, "step": 35930 }, { "epoch": 1.3060542190566176, "grad_norm": 1.6493772268295288, "learning_rate": 4.7400369224059415e-05, "loss": 1.652, "step": 35940 }, { "epoch": 1.3064176175594158, "grad_norm": 0.6978940367698669, "learning_rate": 4.739806455825611e-05, "loss": 0.1534, "step": 35950 }, { "epoch": 1.3067810160622138, "grad_norm": 3.241497039794922, "learning_rate": 4.739575892739989e-05, "loss": 0.1196, "step": 35960 }, { "epoch": 1.307144414565012, "grad_norm": 1.1746867895126343, "learning_rate": 4.739345233159011e-05, "loss": 0.1576, "step": 35970 }, { "epoch": 1.3075078130678102, "grad_norm": 0.9227213859558105, "learning_rate": 4.7391144770926144e-05, "loss": 0.5262, "step": 35980 }, { "epoch": 1.3078712115706084, "grad_norm": 2.3250370025634766, "learning_rate": 4.738883624550741e-05, "loss": 0.1231, "step": 35990 }, { "epoch": 1.3082346100734066, "grad_norm": 1.344184398651123, "learning_rate": 4.738652675543339e-05, "loss": 0.1508, "step": 36000 }, { "epoch": 1.3082346100734066, "eval_loss": 0.3749592900276184, "eval_runtime": 180.4784, "eval_samples_per_second": 41.08, "eval_steps_per_second": 5.136, "eval_wer": 0.17836331620890591, "step": 36000 }, { "epoch": 1.3085980085762046, "grad_norm": 2.1917757987976074, "learning_rate": 4.738421630080358e-05, "loss": 0.1232, "step": 36010 }, { "epoch": 1.3089614070790028, "grad_norm": 0.7760763764381409, "learning_rate": 4.738190488171753e-05, "loss": 0.1965, "step": 36020 }, { "epoch": 1.309324805581801, "grad_norm": 1.6439956426620483, "learning_rate": 4.737959249827484e-05, "loss": 0.13, "step": 36030 }, { "epoch": 1.3096882040845992, "grad_norm": 1.612452507019043, "learning_rate": 4.7377279150575137e-05, "loss": 0.1335, "step": 36040 }, { "epoch": 1.3100516025873974, "grad_norm": 0.9884024858474731, "learning_rate": 4.737496483871809e-05, "loss": 1.4829, "step": 36050 }, { "epoch": 1.3104150010901954, "grad_norm": 0.8376805782318115, "learning_rate": 4.737264956280342e-05, "loss": 0.1502, "step": 36060 }, { "epoch": 1.3107783995929938, "grad_norm": 0.7544919848442078, "learning_rate": 4.7370333322930884e-05, "loss": 0.1799, "step": 36070 }, { "epoch": 1.3111417980957918, "grad_norm": 1.0415360927581787, "learning_rate": 4.736801611920028e-05, "loss": 0.133, "step": 36080 }, { "epoch": 1.31150519659859, "grad_norm": 1.3284482955932617, "learning_rate": 4.736569795171144e-05, "loss": 0.6017, "step": 36090 }, { "epoch": 1.3118685951013882, "grad_norm": 0.766444742679596, "learning_rate": 4.736337882056425e-05, "loss": 0.1629, "step": 36100 }, { "epoch": 1.3122319936041864, "grad_norm": 1.8423712253570557, "learning_rate": 4.7361058725858645e-05, "loss": 0.1417, "step": 36110 }, { "epoch": 1.3125953921069846, "grad_norm": 0.7671094536781311, "learning_rate": 4.735873766769458e-05, "loss": 0.1498, "step": 36120 }, { "epoch": 1.3129587906097826, "grad_norm": 0.7939559817314148, "learning_rate": 4.735641564617206e-05, "loss": 0.1101, "step": 36130 }, { "epoch": 1.3133221891125808, "grad_norm": 1.1651771068572998, "learning_rate": 4.735409266139113e-05, "loss": 0.1478, "step": 36140 }, { "epoch": 1.313685587615379, "grad_norm": 0.3433835506439209, "learning_rate": 4.735176871345188e-05, "loss": 0.1775, "step": 36150 }, { "epoch": 1.3140489861181772, "grad_norm": 1.2903847694396973, "learning_rate": 4.734944380245445e-05, "loss": 0.129, "step": 36160 }, { "epoch": 1.3144123846209754, "grad_norm": 1.4051779508590698, "learning_rate": 4.734711792849901e-05, "loss": 0.2257, "step": 36170 }, { "epoch": 1.3147757831237734, "grad_norm": 0.7781183123588562, "learning_rate": 4.734479109168577e-05, "loss": 0.1072, "step": 36180 }, { "epoch": 1.3151391816265716, "grad_norm": 3.805746078491211, "learning_rate": 4.734246329211498e-05, "loss": 0.1327, "step": 36190 }, { "epoch": 1.3155025801293698, "grad_norm": 1.0254390239715576, "learning_rate": 4.734013452988694e-05, "loss": 0.1479, "step": 36200 }, { "epoch": 1.315865978632168, "grad_norm": 1.2095835208892822, "learning_rate": 4.7337804805101994e-05, "loss": 0.1286, "step": 36210 }, { "epoch": 1.3162293771349662, "grad_norm": 0.7073403596878052, "learning_rate": 4.733547411786052e-05, "loss": 0.1385, "step": 36220 }, { "epoch": 1.3165927756377644, "grad_norm": 6.6172709465026855, "learning_rate": 4.7333142468262924e-05, "loss": 0.1202, "step": 36230 }, { "epoch": 1.3169561741405627, "grad_norm": 0.9846429228782654, "learning_rate": 4.733080985640969e-05, "loss": 0.1202, "step": 36240 }, { "epoch": 1.3173195726433606, "grad_norm": 1.7326525449752808, "learning_rate": 4.7328476282401313e-05, "loss": 0.1269, "step": 36250 }, { "epoch": 1.3176829711461588, "grad_norm": 1.7362529039382935, "learning_rate": 4.7326141746338334e-05, "loss": 0.139, "step": 36260 }, { "epoch": 1.318046369648957, "grad_norm": 0.5070465207099915, "learning_rate": 4.732380624832135e-05, "loss": 0.1963, "step": 36270 }, { "epoch": 1.3184097681517553, "grad_norm": 2.0635170936584473, "learning_rate": 4.7321469788450976e-05, "loss": 0.1251, "step": 36280 }, { "epoch": 1.3187731666545535, "grad_norm": 17.00756072998047, "learning_rate": 4.731913236682789e-05, "loss": 0.4956, "step": 36290 }, { "epoch": 1.3191365651573514, "grad_norm": 0.4699925184249878, "learning_rate": 4.7316793983552806e-05, "loss": 0.1397, "step": 36300 }, { "epoch": 1.3194999636601497, "grad_norm": 1.5378074645996094, "learning_rate": 4.731445463872647e-05, "loss": 0.1444, "step": 36310 }, { "epoch": 1.3198633621629479, "grad_norm": 0.6688012480735779, "learning_rate": 4.731211433244967e-05, "loss": 0.2393, "step": 36320 }, { "epoch": 1.320226760665746, "grad_norm": 1.1277016401290894, "learning_rate": 4.7309773064823274e-05, "loss": 0.6115, "step": 36330 }, { "epoch": 1.3205901591685443, "grad_norm": 0.7446697354316711, "learning_rate": 4.7307430835948114e-05, "loss": 0.1687, "step": 36340 }, { "epoch": 1.3209535576713423, "grad_norm": 1.6629223823547363, "learning_rate": 4.730508764592514e-05, "loss": 0.1681, "step": 36350 }, { "epoch": 1.3213169561741407, "grad_norm": 1.4008903503417969, "learning_rate": 4.73027434948553e-05, "loss": 0.1343, "step": 36360 }, { "epoch": 1.3216803546769387, "grad_norm": 0.6811515092849731, "learning_rate": 4.7300398382839586e-05, "loss": 0.186, "step": 36370 }, { "epoch": 1.3220437531797369, "grad_norm": 0.822640597820282, "learning_rate": 4.7298052309979055e-05, "loss": 0.1336, "step": 36380 }, { "epoch": 1.322407151682535, "grad_norm": 1.145392894744873, "learning_rate": 4.729570527637479e-05, "loss": 0.1267, "step": 36390 }, { "epoch": 1.3227705501853333, "grad_norm": 1.6381548643112183, "learning_rate": 4.729335728212792e-05, "loss": 0.1213, "step": 36400 }, { "epoch": 1.3231339486881315, "grad_norm": 1.882562518119812, "learning_rate": 4.729100832733959e-05, "loss": 0.1384, "step": 36410 }, { "epoch": 1.3234973471909295, "grad_norm": 0.7675313949584961, "learning_rate": 4.728865841211103e-05, "loss": 0.1554, "step": 36420 }, { "epoch": 1.3238607456937277, "grad_norm": 0.900806725025177, "learning_rate": 4.728630753654349e-05, "loss": 0.1174, "step": 36430 }, { "epoch": 1.324224144196526, "grad_norm": 1.4791582822799683, "learning_rate": 4.7283955700738235e-05, "loss": 0.1449, "step": 36440 }, { "epoch": 1.324587542699324, "grad_norm": 0.5582447052001953, "learning_rate": 4.728160290479663e-05, "loss": 0.1754, "step": 36450 }, { "epoch": 1.3249509412021223, "grad_norm": 2.113154411315918, "learning_rate": 4.727924914882002e-05, "loss": 0.1091, "step": 36460 }, { "epoch": 1.3253143397049203, "grad_norm": 0.9747204184532166, "learning_rate": 4.727689443290985e-05, "loss": 0.144, "step": 36470 }, { "epoch": 1.3256777382077185, "grad_norm": 0.6118887662887573, "learning_rate": 4.727453875716755e-05, "loss": 0.1144, "step": 36480 }, { "epoch": 1.3260411367105167, "grad_norm": 1.146438717842102, "learning_rate": 4.727218212169464e-05, "loss": 0.1138, "step": 36490 }, { "epoch": 1.326404535213315, "grad_norm": 1.2453789710998535, "learning_rate": 4.7269824526592636e-05, "loss": 0.1642, "step": 36500 }, { "epoch": 1.3267679337161131, "grad_norm": 2.001384973526001, "learning_rate": 4.726746597196313e-05, "loss": 0.1317, "step": 36510 }, { "epoch": 1.3271313322189113, "grad_norm": 0.4389583170413971, "learning_rate": 4.726510645790775e-05, "loss": 0.1477, "step": 36520 }, { "epoch": 1.3274947307217095, "grad_norm": 0.5064995884895325, "learning_rate": 4.726274598452815e-05, "loss": 0.1208, "step": 36530 }, { "epoch": 1.3278581292245075, "grad_norm": 1.937470555305481, "learning_rate": 4.726038455192603e-05, "loss": 0.1425, "step": 36540 }, { "epoch": 1.3282215277273057, "grad_norm": 0.7836539149284363, "learning_rate": 4.725802216020315e-05, "loss": 0.1942, "step": 36550 }, { "epoch": 1.328584926230104, "grad_norm": 0.7476559281349182, "learning_rate": 4.725565880946129e-05, "loss": 0.1269, "step": 36560 }, { "epoch": 1.3289483247329021, "grad_norm": 1.584917426109314, "learning_rate": 4.725329449980227e-05, "loss": 0.1939, "step": 36570 }, { "epoch": 1.3293117232357003, "grad_norm": 1.6503409147262573, "learning_rate": 4.7250929231327975e-05, "loss": 0.1281, "step": 36580 }, { "epoch": 1.3296751217384983, "grad_norm": 1.2938586473464966, "learning_rate": 4.72485630041403e-05, "loss": 0.1505, "step": 36590 }, { "epoch": 1.3300385202412965, "grad_norm": 0.7093682289123535, "learning_rate": 4.724619581834121e-05, "loss": 0.1517, "step": 36600 }, { "epoch": 1.3300385202412965, "eval_loss": 0.3578657805919647, "eval_runtime": 181.1919, "eval_samples_per_second": 40.918, "eval_steps_per_second": 5.116, "eval_wer": 0.17275400729754753, "step": 36600 }, { "epoch": 1.3304019187440947, "grad_norm": 2.326840877532959, "learning_rate": 4.72438276740327e-05, "loss": 0.1333, "step": 36610 }, { "epoch": 1.330765317246893, "grad_norm": 1.0283209085464478, "learning_rate": 4.7241458571316794e-05, "loss": 0.177, "step": 36620 }, { "epoch": 1.3311287157496912, "grad_norm": 1.416473388671875, "learning_rate": 4.7239325559518525e-05, "loss": 3.9838, "step": 36630 }, { "epoch": 1.3314921142524891, "grad_norm": 0.40842917561531067, "learning_rate": 4.7236954636109833e-05, "loss": 0.1116, "step": 36640 }, { "epoch": 1.3318555127552876, "grad_norm": 2.1062543392181396, "learning_rate": 4.7234582754589886e-05, "loss": 0.121, "step": 36650 }, { "epoch": 1.3322189112580856, "grad_norm": 0.6768646836280823, "learning_rate": 4.723220991506088e-05, "loss": 0.1271, "step": 36660 }, { "epoch": 1.3325823097608838, "grad_norm": 1.1778359413146973, "learning_rate": 4.7229836117625044e-05, "loss": 0.1289, "step": 36670 }, { "epoch": 1.332945708263682, "grad_norm": 2.1446762084960938, "learning_rate": 4.7227461362384664e-05, "loss": 0.163, "step": 36680 }, { "epoch": 1.3333091067664802, "grad_norm": 0.9764724373817444, "learning_rate": 4.7225085649442063e-05, "loss": 0.1131, "step": 36690 }, { "epoch": 1.3336725052692784, "grad_norm": 0.6134273409843445, "learning_rate": 4.72227089788996e-05, "loss": 0.1269, "step": 36700 }, { "epoch": 1.3340359037720764, "grad_norm": 0.8482096195220947, "learning_rate": 4.722033135085967e-05, "loss": 0.1211, "step": 36710 }, { "epoch": 1.3343993022748746, "grad_norm": 1.1198707818984985, "learning_rate": 4.7217952765424734e-05, "loss": 0.1681, "step": 36720 }, { "epoch": 1.3347627007776728, "grad_norm": 1.5771534442901611, "learning_rate": 4.721557322269725e-05, "loss": 0.7935, "step": 36730 }, { "epoch": 1.335126099280471, "grad_norm": 1.8235740661621094, "learning_rate": 4.721319272277977e-05, "loss": 0.1249, "step": 36740 }, { "epoch": 1.3354894977832692, "grad_norm": 2.7422354221343994, "learning_rate": 4.7210811265774845e-05, "loss": 0.1637, "step": 36750 }, { "epoch": 1.3358528962860672, "grad_norm": 3.130943775177002, "learning_rate": 4.720842885178509e-05, "loss": 0.1437, "step": 36760 }, { "epoch": 1.3362162947888654, "grad_norm": 0.48167362809181213, "learning_rate": 4.720604548091316e-05, "loss": 0.2081, "step": 36770 }, { "epoch": 1.3365796932916636, "grad_norm": 1.3520551919937134, "learning_rate": 4.720366115326174e-05, "loss": 0.1566, "step": 36780 }, { "epoch": 1.3369430917944618, "grad_norm": 0.9920271635055542, "learning_rate": 4.720127586893355e-05, "loss": 0.0995, "step": 36790 }, { "epoch": 1.33730649029726, "grad_norm": 0.6343932747840881, "learning_rate": 4.7198889628031376e-05, "loss": 0.6462, "step": 36800 }, { "epoch": 1.3376698888000582, "grad_norm": 0.8678078651428223, "learning_rate": 4.719650243065804e-05, "loss": 0.1224, "step": 36810 }, { "epoch": 1.3380332873028564, "grad_norm": 1.1642274856567383, "learning_rate": 4.719411427691639e-05, "loss": 0.1438, "step": 36820 }, { "epoch": 1.3383966858056544, "grad_norm": 0.6214116811752319, "learning_rate": 4.719172516690932e-05, "loss": 0.1172, "step": 36830 }, { "epoch": 1.3387600843084526, "grad_norm": 0.9811148643493652, "learning_rate": 4.7189335100739764e-05, "loss": 0.1229, "step": 36840 }, { "epoch": 1.3391234828112508, "grad_norm": 0.6965753436088562, "learning_rate": 4.718694407851072e-05, "loss": 0.1432, "step": 36850 }, { "epoch": 1.339486881314049, "grad_norm": 2.3423101902008057, "learning_rate": 4.718455210032519e-05, "loss": 0.1578, "step": 36860 }, { "epoch": 1.3398502798168472, "grad_norm": 1.1584868431091309, "learning_rate": 4.718215916628625e-05, "loss": 0.1792, "step": 36870 }, { "epoch": 1.3402136783196452, "grad_norm": 1.1057560443878174, "learning_rate": 4.717976527649698e-05, "loss": 0.129, "step": 36880 }, { "epoch": 1.3405770768224434, "grad_norm": 1.6027841567993164, "learning_rate": 4.7177370431060554e-05, "loss": 0.0984, "step": 36890 }, { "epoch": 1.3409404753252416, "grad_norm": 1.4970412254333496, "learning_rate": 4.717497463008014e-05, "loss": 0.1764, "step": 36900 }, { "epoch": 1.3413038738280398, "grad_norm": 7.802013397216797, "learning_rate": 4.717257787365897e-05, "loss": 0.1511, "step": 36910 }, { "epoch": 1.341667272330838, "grad_norm": 0.6998898983001709, "learning_rate": 4.717018016190031e-05, "loss": 0.1874, "step": 36920 }, { "epoch": 1.342030670833636, "grad_norm": 1.1323654651641846, "learning_rate": 4.716778149490747e-05, "loss": 0.0968, "step": 36930 }, { "epoch": 1.3423940693364345, "grad_norm": 0.9213439226150513, "learning_rate": 4.716538187278379e-05, "loss": 0.11, "step": 36940 }, { "epoch": 1.3427574678392324, "grad_norm": 3.241694688796997, "learning_rate": 4.7162981295632676e-05, "loss": 0.1695, "step": 36950 }, { "epoch": 1.3431208663420307, "grad_norm": 2.2264153957366943, "learning_rate": 4.716057976355755e-05, "loss": 0.1568, "step": 36960 }, { "epoch": 1.3434842648448289, "grad_norm": 2.439816474914551, "learning_rate": 4.715817727666189e-05, "loss": 0.2052, "step": 36970 }, { "epoch": 1.343847663347627, "grad_norm": 0.8145691752433777, "learning_rate": 4.715577383504921e-05, "loss": 0.1501, "step": 36980 }, { "epoch": 1.3442110618504253, "grad_norm": 0.8044644594192505, "learning_rate": 4.7153369438823074e-05, "loss": 0.1088, "step": 36990 }, { "epoch": 1.3445744603532233, "grad_norm": 1.2818701267242432, "learning_rate": 4.715096408808707e-05, "loss": 0.1492, "step": 37000 }, { "epoch": 1.3449378588560215, "grad_norm": 3.879460096359253, "learning_rate": 4.714855778294482e-05, "loss": 0.1407, "step": 37010 }, { "epoch": 1.3453012573588197, "grad_norm": 0.7606347799301147, "learning_rate": 4.714615052350004e-05, "loss": 0.2055, "step": 37020 }, { "epoch": 1.3456646558616179, "grad_norm": 14.105179786682129, "learning_rate": 4.714374230985642e-05, "loss": 0.2572, "step": 37030 }, { "epoch": 1.346028054364416, "grad_norm": 0.9659761786460876, "learning_rate": 4.714133314211774e-05, "loss": 0.1126, "step": 37040 }, { "epoch": 1.346391452867214, "grad_norm": 0.8018509745597839, "learning_rate": 4.7138923020387785e-05, "loss": 0.1731, "step": 37050 }, { "epoch": 1.3467548513700123, "grad_norm": 1.450352668762207, "learning_rate": 4.7136511944770414e-05, "loss": 0.139, "step": 37060 }, { "epoch": 1.3471182498728105, "grad_norm": 0.4030288755893707, "learning_rate": 4.71340999153695e-05, "loss": 0.1317, "step": 37070 }, { "epoch": 1.3474816483756087, "grad_norm": 1.5737247467041016, "learning_rate": 4.713168693228898e-05, "loss": 0.1183, "step": 37080 }, { "epoch": 1.347845046878407, "grad_norm": 0.9841533303260803, "learning_rate": 4.712927299563281e-05, "loss": 0.126, "step": 37090 }, { "epoch": 1.348208445381205, "grad_norm": 0.9880457520484924, "learning_rate": 4.7126858105505004e-05, "loss": 0.1518, "step": 37100 }, { "epoch": 1.3485718438840033, "grad_norm": 1.250982403755188, "learning_rate": 4.7124442262009605e-05, "loss": 0.1241, "step": 37110 }, { "epoch": 1.3489352423868013, "grad_norm": 0.8015254139900208, "learning_rate": 4.712202546525071e-05, "loss": 0.1294, "step": 37120 }, { "epoch": 1.3492986408895995, "grad_norm": 1.3923901319503784, "learning_rate": 4.711960771533245e-05, "loss": 0.1273, "step": 37130 }, { "epoch": 1.3496620393923977, "grad_norm": 1.7166532278060913, "learning_rate": 4.7117189012359e-05, "loss": 0.1309, "step": 37140 }, { "epoch": 1.350025437895196, "grad_norm": 1.0079472064971924, "learning_rate": 4.711476935643456e-05, "loss": 0.1275, "step": 37150 }, { "epoch": 1.3503888363979941, "grad_norm": 0.7986971139907837, "learning_rate": 4.71123487476634e-05, "loss": 0.1422, "step": 37160 }, { "epoch": 1.350752234900792, "grad_norm": 0.3768475353717804, "learning_rate": 4.71099271861498e-05, "loss": 0.1943, "step": 37170 }, { "epoch": 1.3511156334035903, "grad_norm": 1.1428521871566772, "learning_rate": 4.7107504671998115e-05, "loss": 0.1335, "step": 37180 }, { "epoch": 1.3514790319063885, "grad_norm": 0.627876341342926, "learning_rate": 4.7105081205312715e-05, "loss": 0.1301, "step": 37190 }, { "epoch": 1.3518424304091867, "grad_norm": 1.823798656463623, "learning_rate": 4.710265678619801e-05, "loss": 0.1732, "step": 37200 }, { "epoch": 1.3518424304091867, "eval_loss": 0.3695838451385498, "eval_runtime": 181.0815, "eval_samples_per_second": 40.943, "eval_steps_per_second": 5.119, "eval_wer": 0.17419717900775136, "step": 37200 }, { "epoch": 1.352205828911985, "grad_norm": 1.1491807699203491, "learning_rate": 4.710023141475846e-05, "loss": 0.1267, "step": 37210 }, { "epoch": 1.352569227414783, "grad_norm": 0.9230825901031494, "learning_rate": 4.709780509109858e-05, "loss": 0.1339, "step": 37220 }, { "epoch": 1.3529326259175813, "grad_norm": 1.4637092351913452, "learning_rate": 4.7095377815322893e-05, "loss": 0.1199, "step": 37230 }, { "epoch": 1.3532960244203793, "grad_norm": 2.766608476638794, "learning_rate": 4.7092949587536e-05, "loss": 0.1153, "step": 37240 }, { "epoch": 1.3536594229231775, "grad_norm": 0.6508689522743225, "learning_rate": 4.7090520407842516e-05, "loss": 0.1263, "step": 37250 }, { "epoch": 1.3540228214259757, "grad_norm": 1.2959270477294922, "learning_rate": 4.70880902763471e-05, "loss": 0.207, "step": 37260 }, { "epoch": 1.354386219928774, "grad_norm": 0.4432971477508545, "learning_rate": 4.708565919315447e-05, "loss": 0.189, "step": 37270 }, { "epoch": 1.3547496184315722, "grad_norm": 1.1359493732452393, "learning_rate": 4.708322715836936e-05, "loss": 0.1161, "step": 37280 }, { "epoch": 1.3551130169343701, "grad_norm": 1.1669936180114746, "learning_rate": 4.708079417209657e-05, "loss": 0.1298, "step": 37290 }, { "epoch": 1.3554764154371683, "grad_norm": 1.0905638933181763, "learning_rate": 4.707836023444092e-05, "loss": 0.1763, "step": 37300 }, { "epoch": 1.3558398139399666, "grad_norm": 1.1597601175308228, "learning_rate": 4.707592534550729e-05, "loss": 0.1346, "step": 37310 }, { "epoch": 1.3562032124427648, "grad_norm": 0.3999848961830139, "learning_rate": 4.707348950540057e-05, "loss": 0.1614, "step": 37320 }, { "epoch": 1.356566610945563, "grad_norm": 1.9580241441726685, "learning_rate": 4.7071052714225736e-05, "loss": 0.134, "step": 37330 }, { "epoch": 1.356930009448361, "grad_norm": 0.7617779970169067, "learning_rate": 4.7068614972087764e-05, "loss": 0.1372, "step": 37340 }, { "epoch": 1.3572934079511592, "grad_norm": 1.103390097618103, "learning_rate": 4.706617627909169e-05, "loss": 0.1162, "step": 37350 }, { "epoch": 1.3576568064539574, "grad_norm": 1.1971250772476196, "learning_rate": 4.70637366353426e-05, "loss": 0.1358, "step": 37360 }, { "epoch": 1.3580202049567556, "grad_norm": 0.47730955481529236, "learning_rate": 4.70612960409456e-05, "loss": 0.1518, "step": 37370 }, { "epoch": 1.3583836034595538, "grad_norm": 1.2747211456298828, "learning_rate": 4.705885449600584e-05, "loss": 0.1264, "step": 37380 }, { "epoch": 1.358747001962352, "grad_norm": 1.5373166799545288, "learning_rate": 4.705641200062854e-05, "loss": 0.1362, "step": 37390 }, { "epoch": 1.3591104004651502, "grad_norm": 7.401641368865967, "learning_rate": 4.705396855491891e-05, "loss": 0.1524, "step": 37400 }, { "epoch": 1.3594737989679482, "grad_norm": 1.0285519361495972, "learning_rate": 4.705152415898225e-05, "loss": 0.1343, "step": 37410 }, { "epoch": 1.3598371974707464, "grad_norm": 0.9249128699302673, "learning_rate": 4.704907881292387e-05, "loss": 0.1857, "step": 37420 }, { "epoch": 1.3602005959735446, "grad_norm": 1.0541716814041138, "learning_rate": 4.7046632516849135e-05, "loss": 0.1519, "step": 37430 }, { "epoch": 1.3605639944763428, "grad_norm": 0.9401641488075256, "learning_rate": 4.704418527086345e-05, "loss": 0.1239, "step": 37440 }, { "epoch": 1.360927392979141, "grad_norm": 1.179436445236206, "learning_rate": 4.7041737075072254e-05, "loss": 0.1345, "step": 37450 }, { "epoch": 1.361290791481939, "grad_norm": 0.5075955390930176, "learning_rate": 4.703928792958103e-05, "loss": 0.1119, "step": 37460 }, { "epoch": 1.3616541899847372, "grad_norm": 1.7815593481063843, "learning_rate": 4.7036837834495306e-05, "loss": 0.1378, "step": 37470 }, { "epoch": 1.3620175884875354, "grad_norm": 1.0368989706039429, "learning_rate": 4.7034386789920646e-05, "loss": 0.1193, "step": 37480 }, { "epoch": 1.3623809869903336, "grad_norm": 0.9185715913772583, "learning_rate": 4.703193479596266e-05, "loss": 0.1539, "step": 37490 }, { "epoch": 1.3627443854931318, "grad_norm": 1.1198723316192627, "learning_rate": 4.7029481852726996e-05, "loss": 0.1483, "step": 37500 }, { "epoch": 1.3631077839959298, "grad_norm": 0.8500091433525085, "learning_rate": 4.702702796031934e-05, "loss": 0.1742, "step": 37510 }, { "epoch": 1.3634711824987282, "grad_norm": 1.0554280281066895, "learning_rate": 4.7024573118845414e-05, "loss": 0.1406, "step": 37520 }, { "epoch": 1.3638345810015262, "grad_norm": 1.524234414100647, "learning_rate": 4.702211732841101e-05, "loss": 0.1242, "step": 37530 }, { "epoch": 1.3641979795043244, "grad_norm": 1.3234226703643799, "learning_rate": 4.701966058912191e-05, "loss": 0.1114, "step": 37540 }, { "epoch": 1.3645613780071226, "grad_norm": 4.683910846710205, "learning_rate": 4.701720290108399e-05, "loss": 0.1377, "step": 37550 }, { "epoch": 1.3649247765099208, "grad_norm": 1.4473618268966675, "learning_rate": 4.701474426440313e-05, "loss": 0.1136, "step": 37560 }, { "epoch": 1.365288175012719, "grad_norm": 1.2548261880874634, "learning_rate": 4.701228467918527e-05, "loss": 0.1376, "step": 37570 }, { "epoch": 1.365651573515517, "grad_norm": 0.5335317850112915, "learning_rate": 4.7009824145536385e-05, "loss": 0.1103, "step": 37580 }, { "epoch": 1.3660149720183152, "grad_norm": 2.0894274711608887, "learning_rate": 4.700736266356249e-05, "loss": 0.2852, "step": 37590 }, { "epoch": 1.3663783705211134, "grad_norm": 4.296121120452881, "learning_rate": 4.700490023336963e-05, "loss": 0.1475, "step": 37600 }, { "epoch": 1.3667417690239116, "grad_norm": 3.073425054550171, "learning_rate": 4.700243685506393e-05, "loss": 0.1254, "step": 37610 }, { "epoch": 1.3671051675267099, "grad_norm": 0.5121023058891296, "learning_rate": 4.69999725287515e-05, "loss": 0.1737, "step": 37620 }, { "epoch": 1.3674685660295078, "grad_norm": 0.8064444661140442, "learning_rate": 4.699750725453853e-05, "loss": 0.1045, "step": 37630 }, { "epoch": 1.367831964532306, "grad_norm": 0.5956308841705322, "learning_rate": 4.699504103253124e-05, "loss": 0.1059, "step": 37640 }, { "epoch": 1.3681953630351043, "grad_norm": 1.8260743618011475, "learning_rate": 4.699257386283589e-05, "loss": 0.1677, "step": 37650 }, { "epoch": 1.3685587615379025, "grad_norm": 2.3063583374023438, "learning_rate": 4.699010574555879e-05, "loss": 0.1406, "step": 37660 }, { "epoch": 1.3689221600407007, "grad_norm": 0.5565524697303772, "learning_rate": 4.698763668080627e-05, "loss": 0.1556, "step": 37670 }, { "epoch": 1.3692855585434989, "grad_norm": 1.148147702217102, "learning_rate": 4.698516666868471e-05, "loss": 0.124, "step": 37680 }, { "epoch": 1.369648957046297, "grad_norm": 1.5487512350082397, "learning_rate": 4.698269570930055e-05, "loss": 0.1478, "step": 37690 }, { "epoch": 1.370012355549095, "grad_norm": 1.8273712396621704, "learning_rate": 4.698022380276024e-05, "loss": 0.1591, "step": 37700 }, { "epoch": 1.3703757540518933, "grad_norm": 1.7402414083480835, "learning_rate": 4.6977750949170294e-05, "loss": 0.1075, "step": 37710 }, { "epoch": 1.3707391525546915, "grad_norm": 0.7992825508117676, "learning_rate": 4.697527714863726e-05, "loss": 0.1653, "step": 37720 }, { "epoch": 1.3711025510574897, "grad_norm": 5.170393943786621, "learning_rate": 4.697280240126772e-05, "loss": 0.1563, "step": 37730 }, { "epoch": 1.371465949560288, "grad_norm": 1.3735640048980713, "learning_rate": 4.697032670716831e-05, "loss": 0.1231, "step": 37740 }, { "epoch": 1.3718293480630859, "grad_norm": 1.8720015287399292, "learning_rate": 4.696785006644569e-05, "loss": 1.526, "step": 37750 }, { "epoch": 1.372192746565884, "grad_norm": 1.9550750255584717, "learning_rate": 4.696537247920657e-05, "loss": 0.1503, "step": 37760 }, { "epoch": 1.3725561450686823, "grad_norm": 0.5374103784561157, "learning_rate": 4.6962893945557704e-05, "loss": 0.1949, "step": 37770 }, { "epoch": 1.3729195435714805, "grad_norm": 1.097432255744934, "learning_rate": 4.6960414465605876e-05, "loss": 0.1157, "step": 37780 }, { "epoch": 1.3732829420742787, "grad_norm": 0.40494269132614136, "learning_rate": 4.695793403945793e-05, "loss": 0.126, "step": 37790 }, { "epoch": 1.3736463405770767, "grad_norm": 1.9734747409820557, "learning_rate": 4.695545266722073e-05, "loss": 0.1457, "step": 37800 }, { "epoch": 1.3736463405770767, "eval_loss": 0.3492252230644226, "eval_runtime": 181.0909, "eval_samples_per_second": 40.941, "eval_steps_per_second": 5.119, "eval_wer": 0.18146749686858968, "step": 37800 }, { "epoch": 1.3740097390798751, "grad_norm": 19.56266212463379, "learning_rate": 4.6952970349001204e-05, "loss": 0.421, "step": 37810 }, { "epoch": 1.374373137582673, "grad_norm": 0.8468944430351257, "learning_rate": 4.695048708490628e-05, "loss": 0.2035, "step": 37820 }, { "epoch": 1.3747365360854713, "grad_norm": 0.6463280320167542, "learning_rate": 4.6948002875042976e-05, "loss": 0.1323, "step": 37830 }, { "epoch": 1.3750999345882695, "grad_norm": 1.1540967226028442, "learning_rate": 4.694551771951831e-05, "loss": 1.2856, "step": 37840 }, { "epoch": 1.3754633330910677, "grad_norm": 1.295023798942566, "learning_rate": 4.6943031618439374e-05, "loss": 0.7144, "step": 37850 }, { "epoch": 1.375826731593866, "grad_norm": 1.5403015613555908, "learning_rate": 4.694054457191328e-05, "loss": 0.1429, "step": 37860 }, { "epoch": 1.376190130096664, "grad_norm": 1.434574842453003, "learning_rate": 4.693805658004718e-05, "loss": 0.1686, "step": 37870 }, { "epoch": 1.3765535285994621, "grad_norm": 0.6648684740066528, "learning_rate": 4.693556764294829e-05, "loss": 0.1098, "step": 37880 }, { "epoch": 1.3769169271022603, "grad_norm": 0.7901143431663513, "learning_rate": 4.6933077760723824e-05, "loss": 0.115, "step": 37890 }, { "epoch": 1.3772803256050585, "grad_norm": 0.9131706953048706, "learning_rate": 4.693058693348108e-05, "loss": 0.1266, "step": 37900 }, { "epoch": 1.3776437241078567, "grad_norm": 1.2103451490402222, "learning_rate": 4.692809516132738e-05, "loss": 0.1205, "step": 37910 }, { "epoch": 1.3780071226106547, "grad_norm": 0.42282989621162415, "learning_rate": 4.6925602444370075e-05, "loss": 0.1753, "step": 37920 }, { "epoch": 1.3783705211134532, "grad_norm": 0.51373291015625, "learning_rate": 4.692310878271658e-05, "loss": 0.1635, "step": 37930 }, { "epoch": 1.3787339196162511, "grad_norm": 0.7226901650428772, "learning_rate": 4.692061417647431e-05, "loss": 0.1226, "step": 37940 }, { "epoch": 1.3790973181190493, "grad_norm": 5.131813049316406, "learning_rate": 4.6918118625750784e-05, "loss": 0.1682, "step": 37950 }, { "epoch": 1.3794607166218475, "grad_norm": 1.30665922164917, "learning_rate": 4.6915622130653506e-05, "loss": 0.128, "step": 37960 }, { "epoch": 1.3798241151246458, "grad_norm": 0.3638138175010681, "learning_rate": 4.691312469129006e-05, "loss": 0.1852, "step": 37970 }, { "epoch": 1.380187513627444, "grad_norm": 1.5417994260787964, "learning_rate": 4.691062630776802e-05, "loss": 0.119, "step": 37980 }, { "epoch": 1.380550912130242, "grad_norm": 1.4151712656021118, "learning_rate": 4.6908126980195055e-05, "loss": 0.1079, "step": 37990 }, { "epoch": 1.3809143106330402, "grad_norm": 1.137370228767395, "learning_rate": 4.6905626708678855e-05, "loss": 0.2152, "step": 38000 }, { "epoch": 1.3812777091358384, "grad_norm": 1.0430890321731567, "learning_rate": 4.690312549332714e-05, "loss": 0.1243, "step": 38010 }, { "epoch": 1.3816411076386366, "grad_norm": 1.260365605354309, "learning_rate": 4.690062333424767e-05, "loss": 0.1755, "step": 38020 }, { "epoch": 1.3820045061414348, "grad_norm": 0.8367292284965515, "learning_rate": 4.689812023154827e-05, "loss": 1.1881, "step": 38030 }, { "epoch": 1.3823679046442328, "grad_norm": 1.2440451383590698, "learning_rate": 4.6895616185336775e-05, "loss": 0.1292, "step": 38040 }, { "epoch": 1.382731303147031, "grad_norm": 2.23522686958313, "learning_rate": 4.6893111195721094e-05, "loss": 0.1491, "step": 38050 }, { "epoch": 1.3830947016498292, "grad_norm": 1.0018017292022705, "learning_rate": 4.6890605262809145e-05, "loss": 0.112, "step": 38060 }, { "epoch": 1.3834581001526274, "grad_norm": 0.6445533037185669, "learning_rate": 4.68880983867089e-05, "loss": 0.1597, "step": 38070 }, { "epoch": 1.3838214986554256, "grad_norm": 1.5876944065093994, "learning_rate": 4.6885590567528375e-05, "loss": 0.1341, "step": 38080 }, { "epoch": 1.3841848971582236, "grad_norm": 1.5150282382965088, "learning_rate": 4.6883081805375616e-05, "loss": 0.1167, "step": 38090 }, { "epoch": 1.384548295661022, "grad_norm": 1.7657722234725952, "learning_rate": 4.688057210035873e-05, "loss": 0.1608, "step": 38100 }, { "epoch": 1.38491169416382, "grad_norm": 1.027761459350586, "learning_rate": 4.687806145258584e-05, "loss": 0.1566, "step": 38110 }, { "epoch": 1.3852750926666182, "grad_norm": 0.3475823998451233, "learning_rate": 4.6875549862165126e-05, "loss": 0.1502, "step": 38120 }, { "epoch": 1.3856384911694164, "grad_norm": 0.7863835692405701, "learning_rate": 4.687303732920481e-05, "loss": 0.1761, "step": 38130 }, { "epoch": 1.3860018896722146, "grad_norm": 2.0150928497314453, "learning_rate": 4.687052385381313e-05, "loss": 0.1417, "step": 38140 }, { "epoch": 1.3863652881750128, "grad_norm": 0.6676269769668579, "learning_rate": 4.6868009436098386e-05, "loss": 0.1307, "step": 38150 }, { "epoch": 1.3867286866778108, "grad_norm": 1.6957210302352905, "learning_rate": 4.6865494076168934e-05, "loss": 0.1385, "step": 38160 }, { "epoch": 1.387092085180609, "grad_norm": 0.8534975051879883, "learning_rate": 4.686297777413313e-05, "loss": 0.1568, "step": 38170 }, { "epoch": 1.3874554836834072, "grad_norm": 0.7309104800224304, "learning_rate": 4.6860460530099416e-05, "loss": 0.5466, "step": 38180 }, { "epoch": 1.3878188821862054, "grad_norm": 1.2103863954544067, "learning_rate": 4.6857942344176225e-05, "loss": 0.1227, "step": 38190 }, { "epoch": 1.3881822806890036, "grad_norm": 0.7991679906845093, "learning_rate": 4.685542321647207e-05, "loss": 0.1603, "step": 38200 }, { "epoch": 1.3885456791918016, "grad_norm": 1.146906852722168, "learning_rate": 4.685290314709549e-05, "loss": 0.1268, "step": 38210 }, { "epoch": 1.3889090776946, "grad_norm": 1.020175576210022, "learning_rate": 4.685038213615508e-05, "loss": 0.1517, "step": 38220 }, { "epoch": 1.389272476197398, "grad_norm": 1.1214244365692139, "learning_rate": 4.684786018375944e-05, "loss": 0.1185, "step": 38230 }, { "epoch": 1.3896358747001962, "grad_norm": 0.830916166305542, "learning_rate": 4.6845337290017235e-05, "loss": 0.1281, "step": 38240 }, { "epoch": 1.3899992732029944, "grad_norm": 0.5939742922782898, "learning_rate": 4.684281345503718e-05, "loss": 0.1599, "step": 38250 }, { "epoch": 1.3903626717057926, "grad_norm": 4.498940467834473, "learning_rate": 4.6840288678928003e-05, "loss": 0.1159, "step": 38260 }, { "epoch": 1.3907260702085908, "grad_norm": 0.6612393856048584, "learning_rate": 4.6837762961798495e-05, "loss": 0.1842, "step": 38270 }, { "epoch": 1.3910894687113888, "grad_norm": 2.456289529800415, "learning_rate": 4.683523630375748e-05, "loss": 0.1558, "step": 38280 }, { "epoch": 1.391452867214187, "grad_norm": 0.5414180159568787, "learning_rate": 4.683270870491383e-05, "loss": 0.1347, "step": 38290 }, { "epoch": 1.3918162657169852, "grad_norm": 2.1812076568603516, "learning_rate": 4.683018016537644e-05, "loss": 0.1558, "step": 38300 }, { "epoch": 1.3921796642197835, "grad_norm": 1.2050772905349731, "learning_rate": 4.682765068525425e-05, "loss": 0.1591, "step": 38310 }, { "epoch": 1.3925430627225817, "grad_norm": 1.050423264503479, "learning_rate": 4.6825120264656266e-05, "loss": 0.1578, "step": 38320 }, { "epoch": 1.3929064612253796, "grad_norm": 2.753676652908325, "learning_rate": 4.68225889036915e-05, "loss": 0.1373, "step": 38330 }, { "epoch": 1.3932698597281779, "grad_norm": 2.3123908042907715, "learning_rate": 4.682005660246902e-05, "loss": 0.1198, "step": 38340 }, { "epoch": 1.393633258230976, "grad_norm": 0.6317697167396545, "learning_rate": 4.681752336109794e-05, "loss": 0.157, "step": 38350 }, { "epoch": 1.3939966567337743, "grad_norm": 1.788620948791504, "learning_rate": 4.681498917968741e-05, "loss": 0.1424, "step": 38360 }, { "epoch": 1.3943600552365725, "grad_norm": 1.064799189567566, "learning_rate": 4.68124540583466e-05, "loss": 0.1762, "step": 38370 }, { "epoch": 1.3947234537393705, "grad_norm": 1.3951762914657593, "learning_rate": 4.6809917997184764e-05, "loss": 0.1198, "step": 38380 }, { "epoch": 1.3950868522421689, "grad_norm": 1.0863114595413208, "learning_rate": 4.6807380996311154e-05, "loss": 0.1393, "step": 38390 }, { "epoch": 1.3954502507449669, "grad_norm": 1.141787052154541, "learning_rate": 4.6804843055835105e-05, "loss": 0.1603, "step": 38400 }, { "epoch": 1.3954502507449669, "eval_loss": 0.3523618280887604, "eval_runtime": 180.8149, "eval_samples_per_second": 41.003, "eval_steps_per_second": 5.127, "eval_wer": 0.17399749487174831, "step": 38400 }, { "epoch": 1.395813649247765, "grad_norm": 1.5692111253738403, "learning_rate": 4.6802304175865936e-05, "loss": 0.1328, "step": 38410 }, { "epoch": 1.3961770477505633, "grad_norm": 0.4182591140270233, "learning_rate": 4.679976435651305e-05, "loss": 0.1562, "step": 38420 }, { "epoch": 1.3965404462533615, "grad_norm": 0.6963622570037842, "learning_rate": 4.67972235978859e-05, "loss": 0.1131, "step": 38430 }, { "epoch": 1.3969038447561597, "grad_norm": 1.0345783233642578, "learning_rate": 4.679468190009392e-05, "loss": 0.1231, "step": 38440 }, { "epoch": 1.3972672432589577, "grad_norm": 1.6084190607070923, "learning_rate": 4.679213926324665e-05, "loss": 0.133, "step": 38450 }, { "epoch": 1.3976306417617559, "grad_norm": 1.2635602951049805, "learning_rate": 4.678959568745364e-05, "loss": 0.1344, "step": 38460 }, { "epoch": 1.397994040264554, "grad_norm": 0.6128044724464417, "learning_rate": 4.678705117282447e-05, "loss": 0.1639, "step": 38470 }, { "epoch": 1.3983574387673523, "grad_norm": 1.128151297569275, "learning_rate": 4.6784505719468795e-05, "loss": 0.1342, "step": 38480 }, { "epoch": 1.3987208372701505, "grad_norm": 1.6067559719085693, "learning_rate": 4.678195932749627e-05, "loss": 0.1344, "step": 38490 }, { "epoch": 1.3990842357729485, "grad_norm": 0.4303024113178253, "learning_rate": 4.677941199701662e-05, "loss": 0.1519, "step": 38500 }, { "epoch": 1.399447634275747, "grad_norm": 3.09531307220459, "learning_rate": 4.6776863728139596e-05, "loss": 0.1197, "step": 38510 }, { "epoch": 1.399811032778545, "grad_norm": 1.2062981128692627, "learning_rate": 4.6774314520975e-05, "loss": 0.1488, "step": 38520 }, { "epoch": 1.4001744312813431, "grad_norm": 0.7981544733047485, "learning_rate": 4.6771764375632664e-05, "loss": 0.1155, "step": 38530 }, { "epoch": 1.4005378297841413, "grad_norm": 0.6589852571487427, "learning_rate": 4.676921329222247e-05, "loss": 0.1463, "step": 38540 }, { "epoch": 1.4009012282869395, "grad_norm": 1.496664047241211, "learning_rate": 4.676666127085433e-05, "loss": 0.1811, "step": 38550 }, { "epoch": 1.4012646267897377, "grad_norm": 0.7335402965545654, "learning_rate": 4.676410831163819e-05, "loss": 0.1364, "step": 38560 }, { "epoch": 1.4016280252925357, "grad_norm": 0.8753761053085327, "learning_rate": 4.676155441468407e-05, "loss": 0.1954, "step": 38570 }, { "epoch": 1.401991423795334, "grad_norm": 1.4288660287857056, "learning_rate": 4.6758999580101994e-05, "loss": 0.1347, "step": 38580 }, { "epoch": 1.4023548222981321, "grad_norm": 1.1383757591247559, "learning_rate": 4.675644380800205e-05, "loss": 0.1604, "step": 38590 }, { "epoch": 1.4027182208009303, "grad_norm": 1.4642599821090698, "learning_rate": 4.6753887098494344e-05, "loss": 0.1878, "step": 38600 }, { "epoch": 1.4030816193037285, "grad_norm": 0.9396153688430786, "learning_rate": 4.675132945168905e-05, "loss": 0.1201, "step": 38610 }, { "epoch": 1.4034450178065265, "grad_norm": 0.835436999797821, "learning_rate": 4.674877086769636e-05, "loss": 0.4225, "step": 38620 }, { "epoch": 1.4038084163093247, "grad_norm": 0.9934596419334412, "learning_rate": 4.674621134662651e-05, "loss": 0.1145, "step": 38630 }, { "epoch": 1.404171814812123, "grad_norm": 1.5066030025482178, "learning_rate": 4.674365088858979e-05, "loss": 0.101, "step": 38640 }, { "epoch": 1.4045352133149211, "grad_norm": 2.4759950637817383, "learning_rate": 4.674108949369652e-05, "loss": 0.1225, "step": 38650 }, { "epoch": 1.4048986118177194, "grad_norm": 2.4329168796539307, "learning_rate": 4.6738527162057054e-05, "loss": 0.1469, "step": 38660 }, { "epoch": 1.4052620103205173, "grad_norm": 0.7068483829498291, "learning_rate": 4.67359638937818e-05, "loss": 0.237, "step": 38670 }, { "epoch": 1.4056254088233158, "grad_norm": 2.3423826694488525, "learning_rate": 4.6733399688981207e-05, "loss": 0.1095, "step": 38680 }, { "epoch": 1.4059888073261138, "grad_norm": 0.7500453591346741, "learning_rate": 4.673083454776575e-05, "loss": 0.1337, "step": 38690 }, { "epoch": 1.406352205828912, "grad_norm": 2.1220805644989014, "learning_rate": 4.6728268470245937e-05, "loss": 0.1489, "step": 38700 }, { "epoch": 1.4067156043317102, "grad_norm": 3.195551633834839, "learning_rate": 4.672570145653234e-05, "loss": 0.1456, "step": 38710 }, { "epoch": 1.4070790028345084, "grad_norm": 0.8845533728599548, "learning_rate": 4.672313350673558e-05, "loss": 0.1529, "step": 38720 }, { "epoch": 1.4074424013373066, "grad_norm": 1.278830885887146, "learning_rate": 4.6720564620966294e-05, "loss": 0.1209, "step": 38730 }, { "epoch": 1.4078057998401046, "grad_norm": 1.8450745344161987, "learning_rate": 4.671799479933515e-05, "loss": 0.0863, "step": 38740 }, { "epoch": 1.4081691983429028, "grad_norm": 0.5718597173690796, "learning_rate": 4.6715424041952894e-05, "loss": 0.1597, "step": 38750 }, { "epoch": 1.408532596845701, "grad_norm": 3.594273090362549, "learning_rate": 4.671285234893027e-05, "loss": 0.1373, "step": 38760 }, { "epoch": 1.4088959953484992, "grad_norm": 1.3270690441131592, "learning_rate": 4.671027972037809e-05, "loss": 0.1727, "step": 38770 }, { "epoch": 1.4092593938512974, "grad_norm": 88.81269073486328, "learning_rate": 4.670770615640721e-05, "loss": 1.6965, "step": 38780 }, { "epoch": 1.4096227923540954, "grad_norm": 1.1233614683151245, "learning_rate": 4.670513165712851e-05, "loss": 0.1316, "step": 38790 }, { "epoch": 1.4099861908568938, "grad_norm": 0.49995678663253784, "learning_rate": 4.6702556222652905e-05, "loss": 0.1492, "step": 38800 }, { "epoch": 1.4103495893596918, "grad_norm": 0.7330392599105835, "learning_rate": 4.669997985309138e-05, "loss": 0.1065, "step": 38810 }, { "epoch": 1.41071298786249, "grad_norm": 0.49762871861457825, "learning_rate": 4.6697402548554925e-05, "loss": 0.167, "step": 38820 }, { "epoch": 1.4110763863652882, "grad_norm": 3.198273181915283, "learning_rate": 4.6694824309154596e-05, "loss": 0.5685, "step": 38830 }, { "epoch": 1.4114397848680864, "grad_norm": 0.7750107645988464, "learning_rate": 4.6692245135001476e-05, "loss": 0.1291, "step": 38840 }, { "epoch": 1.4118031833708846, "grad_norm": 0.6449529528617859, "learning_rate": 4.66896650262067e-05, "loss": 0.1522, "step": 38850 }, { "epoch": 1.4121665818736826, "grad_norm": 0.7553302049636841, "learning_rate": 4.668708398288142e-05, "loss": 0.1089, "step": 38860 }, { "epoch": 1.4125299803764808, "grad_norm": 0.3948783576488495, "learning_rate": 4.6684502005136864e-05, "loss": 0.1421, "step": 38870 }, { "epoch": 1.412893378879279, "grad_norm": 0.7775730490684509, "learning_rate": 4.668191909308426e-05, "loss": 0.1014, "step": 38880 }, { "epoch": 1.4132567773820772, "grad_norm": 0.3911081850528717, "learning_rate": 4.667933524683492e-05, "loss": 0.1504, "step": 38890 }, { "epoch": 1.4136201758848754, "grad_norm": 0.48814857006073, "learning_rate": 4.667675046650015e-05, "loss": 0.138, "step": 38900 }, { "epoch": 1.4139835743876734, "grad_norm": 2.613859176635742, "learning_rate": 4.667416475219133e-05, "loss": 0.1158, "step": 38910 }, { "epoch": 1.4143469728904716, "grad_norm": 0.9073649048805237, "learning_rate": 4.667157810401987e-05, "loss": 0.1753, "step": 38920 }, { "epoch": 1.4147103713932698, "grad_norm": 1.4347561597824097, "learning_rate": 4.666899052209722e-05, "loss": 0.1397, "step": 38930 }, { "epoch": 1.415073769896068, "grad_norm": 1.014145851135254, "learning_rate": 4.666640200653486e-05, "loss": 0.1425, "step": 38940 }, { "epoch": 1.4154371683988662, "grad_norm": 0.5404003858566284, "learning_rate": 4.6663812557444334e-05, "loss": 0.1918, "step": 38950 }, { "epoch": 1.4158005669016644, "grad_norm": 0.7507174015045166, "learning_rate": 4.66612221749372e-05, "loss": 0.1492, "step": 38960 }, { "epoch": 1.4161639654044627, "grad_norm": 0.47643178701400757, "learning_rate": 4.665863085912508e-05, "loss": 0.136, "step": 38970 }, { "epoch": 1.4165273639072606, "grad_norm": 1.3153865337371826, "learning_rate": 4.66560386101196e-05, "loss": 0.1421, "step": 38980 }, { "epoch": 1.4168907624100588, "grad_norm": 0.7111690640449524, "learning_rate": 4.665344542803248e-05, "loss": 0.1184, "step": 38990 }, { "epoch": 1.417254160912857, "grad_norm": 5.245561599731445, "learning_rate": 4.665085131297544e-05, "loss": 0.1343, "step": 39000 }, { "epoch": 1.417254160912857, "eval_loss": 0.34118154644966125, "eval_runtime": 180.7794, "eval_samples_per_second": 41.011, "eval_steps_per_second": 5.128, "eval_wer": 0.17092962041861057, "step": 39000 }, { "epoch": 1.4176175594156553, "grad_norm": 1.7938792705535889, "learning_rate": 4.664825626506025e-05, "loss": 0.1703, "step": 39010 }, { "epoch": 1.4179809579184535, "grad_norm": 0.7494391202926636, "learning_rate": 4.664566028439873e-05, "loss": 0.1644, "step": 39020 }, { "epoch": 1.4183443564212515, "grad_norm": 0.7234100699424744, "learning_rate": 4.664306337110272e-05, "loss": 0.1115, "step": 39030 }, { "epoch": 1.4187077549240497, "grad_norm": 0.6583457589149475, "learning_rate": 4.6640465525284114e-05, "loss": 0.1448, "step": 39040 }, { "epoch": 1.4190711534268479, "grad_norm": 1.7157262563705444, "learning_rate": 4.663786674705484e-05, "loss": 0.176, "step": 39050 }, { "epoch": 1.419434551929646, "grad_norm": 1.9635696411132812, "learning_rate": 4.663526703652688e-05, "loss": 0.1453, "step": 39060 }, { "epoch": 1.4197979504324443, "grad_norm": 0.5495097041130066, "learning_rate": 4.663266639381224e-05, "loss": 0.1114, "step": 39070 }, { "epoch": 1.4201613489352423, "grad_norm": 1.3244194984436035, "learning_rate": 4.663006481902298e-05, "loss": 0.146, "step": 39080 }, { "epoch": 1.4205247474380407, "grad_norm": 1.1683903932571411, "learning_rate": 4.662746231227119e-05, "loss": 0.1351, "step": 39090 }, { "epoch": 1.4208881459408387, "grad_norm": 2.392890214920044, "learning_rate": 4.662485887366899e-05, "loss": 0.2378, "step": 39100 }, { "epoch": 1.4212515444436369, "grad_norm": 1.7971110343933105, "learning_rate": 4.662225450332856e-05, "loss": 0.1168, "step": 39110 }, { "epoch": 1.421614942946435, "grad_norm": 1.0140278339385986, "learning_rate": 4.6619649201362124e-05, "loss": 0.1332, "step": 39120 }, { "epoch": 1.4219783414492333, "grad_norm": 3.1083390712738037, "learning_rate": 4.661704296788193e-05, "loss": 0.1483, "step": 39130 }, { "epoch": 1.4223417399520315, "grad_norm": 0.6209553480148315, "learning_rate": 4.661443580300026e-05, "loss": 0.1212, "step": 39140 }, { "epoch": 1.4227051384548295, "grad_norm": 0.6733147501945496, "learning_rate": 4.661182770682946e-05, "loss": 0.1343, "step": 39150 }, { "epoch": 1.4230685369576277, "grad_norm": 1.1502153873443604, "learning_rate": 4.660921867948189e-05, "loss": 0.1508, "step": 39160 }, { "epoch": 1.423431935460426, "grad_norm": 0.33853545784950256, "learning_rate": 4.660660872106999e-05, "loss": 0.1661, "step": 39170 }, { "epoch": 1.423795333963224, "grad_norm": 1.0470768213272095, "learning_rate": 4.660399783170618e-05, "loss": 0.1231, "step": 39180 }, { "epoch": 1.4241587324660223, "grad_norm": 0.5467321872711182, "learning_rate": 4.660138601150298e-05, "loss": 0.1166, "step": 39190 }, { "epoch": 1.4245221309688203, "grad_norm": 1.3816486597061157, "learning_rate": 4.659877326057291e-05, "loss": 0.1177, "step": 39200 }, { "epoch": 1.4248855294716185, "grad_norm": 1.0260194540023804, "learning_rate": 4.659615957902855e-05, "loss": 0.153, "step": 39210 }, { "epoch": 1.4252489279744167, "grad_norm": 0.7443385124206543, "learning_rate": 4.6593544966982524e-05, "loss": 0.1735, "step": 39220 }, { "epoch": 1.425612326477215, "grad_norm": 1.1526659727096558, "learning_rate": 4.659092942454746e-05, "loss": 0.1491, "step": 39230 }, { "epoch": 1.4259757249800131, "grad_norm": 0.8841147422790527, "learning_rate": 4.658831295183608e-05, "loss": 0.1611, "step": 39240 }, { "epoch": 1.4263391234828113, "grad_norm": 1.596132516860962, "learning_rate": 4.65856955489611e-05, "loss": 0.1543, "step": 39250 }, { "epoch": 1.4267025219856095, "grad_norm": 2.1355278491973877, "learning_rate": 4.65830772160353e-05, "loss": 0.1489, "step": 39260 }, { "epoch": 1.4270659204884075, "grad_norm": 0.9080690145492554, "learning_rate": 4.6580457953171496e-05, "loss": 0.28, "step": 39270 }, { "epoch": 1.4274293189912057, "grad_norm": 0.9187225699424744, "learning_rate": 4.6577837760482546e-05, "loss": 0.1243, "step": 39280 }, { "epoch": 1.427792717494004, "grad_norm": 1.0374051332473755, "learning_rate": 4.6575216638081335e-05, "loss": 0.1088, "step": 39290 }, { "epoch": 1.4281561159968021, "grad_norm": 0.5795188546180725, "learning_rate": 4.657259458608081e-05, "loss": 0.1288, "step": 39300 }, { "epoch": 1.4285195144996004, "grad_norm": 1.0621544122695923, "learning_rate": 4.656997160459394e-05, "loss": 0.1311, "step": 39310 }, { "epoch": 1.4288829130023983, "grad_norm": 0.35324281454086304, "learning_rate": 4.656734769373373e-05, "loss": 0.1635, "step": 39320 }, { "epoch": 1.4292463115051965, "grad_norm": 1.4020544290542603, "learning_rate": 4.656472285361326e-05, "loss": 0.1281, "step": 39330 }, { "epoch": 1.4296097100079947, "grad_norm": 0.9644222259521484, "learning_rate": 4.65620970843456e-05, "loss": 0.111, "step": 39340 }, { "epoch": 1.429973108510793, "grad_norm": 3.3897273540496826, "learning_rate": 4.65594703860439e-05, "loss": 0.2219, "step": 39350 }, { "epoch": 1.4303365070135912, "grad_norm": 1.1418486833572388, "learning_rate": 4.655684275882132e-05, "loss": 0.1347, "step": 39360 }, { "epoch": 1.4306999055163891, "grad_norm": 0.7159132361412048, "learning_rate": 4.655421420279109e-05, "loss": 0.2765, "step": 39370 }, { "epoch": 1.4310633040191876, "grad_norm": 1.4189454317092896, "learning_rate": 4.655158471806647e-05, "loss": 0.1247, "step": 39380 }, { "epoch": 1.4314267025219856, "grad_norm": 1.0472137928009033, "learning_rate": 4.6548954304760725e-05, "loss": 0.1193, "step": 39390 }, { "epoch": 1.4317901010247838, "grad_norm": 1.6883853673934937, "learning_rate": 4.654632296298723e-05, "loss": 0.1336, "step": 39400 }, { "epoch": 1.432153499527582, "grad_norm": 3.4792749881744385, "learning_rate": 4.654369069285933e-05, "loss": 0.1224, "step": 39410 }, { "epoch": 1.4325168980303802, "grad_norm": 0.9245648384094238, "learning_rate": 4.654105749449046e-05, "loss": 0.1684, "step": 39420 }, { "epoch": 1.4328802965331784, "grad_norm": 0.7134508490562439, "learning_rate": 4.653842336799406e-05, "loss": 0.1268, "step": 39430 }, { "epoch": 1.4332436950359764, "grad_norm": 0.8069209456443787, "learning_rate": 4.6535788313483624e-05, "loss": 0.1224, "step": 39440 }, { "epoch": 1.4336070935387746, "grad_norm": 1.5594000816345215, "learning_rate": 4.6533152331072706e-05, "loss": 0.178, "step": 39450 }, { "epoch": 1.4339704920415728, "grad_norm": 0.6994547843933105, "learning_rate": 4.653051542087486e-05, "loss": 0.1184, "step": 39460 }, { "epoch": 1.434333890544371, "grad_norm": 1.0693833827972412, "learning_rate": 4.6527877583003714e-05, "loss": 0.1696, "step": 39470 }, { "epoch": 1.4346972890471692, "grad_norm": 1.625401496887207, "learning_rate": 4.652523881757292e-05, "loss": 0.9774, "step": 39480 }, { "epoch": 1.4350606875499672, "grad_norm": 1.0620099306106567, "learning_rate": 4.652259912469618e-05, "loss": 0.141, "step": 39490 }, { "epoch": 1.4354240860527654, "grad_norm": 0.8238838315010071, "learning_rate": 4.6519958504487206e-05, "loss": 0.2848, "step": 39500 }, { "epoch": 1.4357874845555636, "grad_norm": 4.339720726013184, "learning_rate": 4.6517316957059796e-05, "loss": 0.1307, "step": 39510 }, { "epoch": 1.4361508830583618, "grad_norm": 1.6212254762649536, "learning_rate": 4.6514674482527754e-05, "loss": 0.1486, "step": 39520 }, { "epoch": 1.43651428156116, "grad_norm": 1.0148829221725464, "learning_rate": 4.651203108100494e-05, "loss": 0.1216, "step": 39530 }, { "epoch": 1.4368776800639582, "grad_norm": 0.8317530155181885, "learning_rate": 4.650938675260525e-05, "loss": 0.1058, "step": 39540 }, { "epoch": 1.4372410785667564, "grad_norm": 1.1663634777069092, "learning_rate": 4.6506741497442614e-05, "loss": 0.2442, "step": 39550 }, { "epoch": 1.4376044770695544, "grad_norm": 1.4678232669830322, "learning_rate": 4.6504095315631006e-05, "loss": 0.1353, "step": 39560 }, { "epoch": 1.4379678755723526, "grad_norm": 0.8588351011276245, "learning_rate": 4.6501448207284446e-05, "loss": 0.157, "step": 39570 }, { "epoch": 1.4383312740751508, "grad_norm": 0.7000893950462341, "learning_rate": 4.6498800172516985e-05, "loss": 0.1191, "step": 39580 }, { "epoch": 1.438694672577949, "grad_norm": 1.0854928493499756, "learning_rate": 4.649615121144271e-05, "loss": 0.1258, "step": 39590 }, { "epoch": 1.4390580710807472, "grad_norm": 1.003110647201538, "learning_rate": 4.649350132417577e-05, "loss": 0.1264, "step": 39600 }, { "epoch": 1.4390580710807472, "eval_loss": 0.34889447689056396, "eval_runtime": 180.6213, "eval_samples_per_second": 41.047, "eval_steps_per_second": 5.132, "eval_wer": 0.17173743351425927, "step": 39600 }, { "epoch": 1.4394214695835452, "grad_norm": 0.8560311198234558, "learning_rate": 4.649085051083033e-05, "loss": 0.129, "step": 39610 }, { "epoch": 1.4397848680863434, "grad_norm": 1.6158629655838013, "learning_rate": 4.6488198771520605e-05, "loss": 0.1359, "step": 39620 }, { "epoch": 1.4401482665891416, "grad_norm": 0.6946542263031006, "learning_rate": 4.6485546106360856e-05, "loss": 0.1195, "step": 39630 }, { "epoch": 1.4405116650919398, "grad_norm": 0.6091057062149048, "learning_rate": 4.648289251546536e-05, "loss": 0.1321, "step": 39640 }, { "epoch": 1.440875063594738, "grad_norm": 0.37464994192123413, "learning_rate": 4.648023799894847e-05, "loss": 0.1339, "step": 39650 }, { "epoch": 1.441238462097536, "grad_norm": 0.5378652215003967, "learning_rate": 4.647758255692456e-05, "loss": 0.1127, "step": 39660 }, { "epoch": 1.4416018606003345, "grad_norm": 1.4586265087127686, "learning_rate": 4.647492618950802e-05, "loss": 0.2027, "step": 39670 }, { "epoch": 1.4419652591031324, "grad_norm": 0.6388387084007263, "learning_rate": 4.647226889681333e-05, "loss": 0.1197, "step": 39680 }, { "epoch": 1.4423286576059307, "grad_norm": 0.7849758267402649, "learning_rate": 4.646961067895496e-05, "loss": 0.1165, "step": 39690 }, { "epoch": 1.4426920561087289, "grad_norm": 1.16459059715271, "learning_rate": 4.6466951536047464e-05, "loss": 0.1799, "step": 39700 }, { "epoch": 1.443055454611527, "grad_norm": 3.2792208194732666, "learning_rate": 4.64642914682054e-05, "loss": 0.1191, "step": 39710 }, { "epoch": 1.4434188531143253, "grad_norm": 0.3408263921737671, "learning_rate": 4.64616304755434e-05, "loss": 0.1971, "step": 39720 }, { "epoch": 1.4437822516171233, "grad_norm": 0.45033156871795654, "learning_rate": 4.645896855817609e-05, "loss": 0.1441, "step": 39730 }, { "epoch": 1.4441456501199215, "grad_norm": 2.284130096435547, "learning_rate": 4.645630571621817e-05, "loss": 0.122, "step": 39740 }, { "epoch": 1.4445090486227197, "grad_norm": 3.047889232635498, "learning_rate": 4.645364194978439e-05, "loss": 0.172, "step": 39750 }, { "epoch": 1.4448724471255179, "grad_norm": 1.2850980758666992, "learning_rate": 4.645097725898951e-05, "loss": 0.1088, "step": 39760 }, { "epoch": 1.445235845628316, "grad_norm": 0.8555011749267578, "learning_rate": 4.644831164394834e-05, "loss": 0.1516, "step": 39770 }, { "epoch": 1.445599244131114, "grad_norm": 0.9414917230606079, "learning_rate": 4.644564510477574e-05, "loss": 0.1104, "step": 39780 }, { "epoch": 1.4459626426339123, "grad_norm": 1.658109188079834, "learning_rate": 4.644297764158659e-05, "loss": 0.1361, "step": 39790 }, { "epoch": 1.4463260411367105, "grad_norm": 1.9984872341156006, "learning_rate": 4.644030925449583e-05, "loss": 0.5671, "step": 39800 }, { "epoch": 1.4466894396395087, "grad_norm": 2.086899995803833, "learning_rate": 4.6437639943618424e-05, "loss": 0.124, "step": 39810 }, { "epoch": 1.447052838142307, "grad_norm": 0.39096391201019287, "learning_rate": 4.64349697090694e-05, "loss": 0.2021, "step": 39820 }, { "epoch": 1.447416236645105, "grad_norm": 0.9561779499053955, "learning_rate": 4.643229855096378e-05, "loss": 0.1371, "step": 39830 }, { "epoch": 1.4477796351479033, "grad_norm": 1.6168954372406006, "learning_rate": 4.6429626469416685e-05, "loss": 0.1149, "step": 39840 }, { "epoch": 1.4481430336507013, "grad_norm": 1.4393991231918335, "learning_rate": 4.642695346454323e-05, "loss": 0.1472, "step": 39850 }, { "epoch": 1.4485064321534995, "grad_norm": 1.9806978702545166, "learning_rate": 4.642427953645859e-05, "loss": 0.1443, "step": 39860 }, { "epoch": 1.4488698306562977, "grad_norm": 1.012040615081787, "learning_rate": 4.642160468527797e-05, "loss": 0.155, "step": 39870 }, { "epoch": 1.449233229159096, "grad_norm": 0.620448112487793, "learning_rate": 4.641892891111662e-05, "loss": 0.12, "step": 39880 }, { "epoch": 1.4495966276618941, "grad_norm": 0.5192741751670837, "learning_rate": 4.6416252214089834e-05, "loss": 0.1096, "step": 39890 }, { "epoch": 1.449960026164692, "grad_norm": 1.4234672784805298, "learning_rate": 4.641357459431294e-05, "loss": 0.1548, "step": 39900 }, { "epoch": 1.4503234246674903, "grad_norm": 1.2218151092529297, "learning_rate": 4.641089605190131e-05, "loss": 0.1452, "step": 39910 }, { "epoch": 1.4506868231702885, "grad_norm": 0.5271123051643372, "learning_rate": 4.6408216586970344e-05, "loss": 0.1238, "step": 39920 }, { "epoch": 1.4510502216730867, "grad_norm": 1.0670936107635498, "learning_rate": 4.640553619963549e-05, "loss": 0.7119, "step": 39930 }, { "epoch": 1.451413620175885, "grad_norm": 2.9407644271850586, "learning_rate": 4.6402854890012256e-05, "loss": 0.1246, "step": 39940 }, { "epoch": 1.451777018678683, "grad_norm": 0.8619846701622009, "learning_rate": 4.6400172658216144e-05, "loss": 0.1524, "step": 39950 }, { "epoch": 1.4521404171814813, "grad_norm": 1.110069751739502, "learning_rate": 4.639748950436275e-05, "loss": 0.1147, "step": 39960 }, { "epoch": 1.4525038156842793, "grad_norm": 0.6605796813964844, "learning_rate": 4.639480542856764e-05, "loss": 0.1266, "step": 39970 }, { "epoch": 1.4528672141870775, "grad_norm": 0.8681196570396423, "learning_rate": 4.639212043094651e-05, "loss": 0.1168, "step": 39980 }, { "epoch": 1.4532306126898757, "grad_norm": 0.7025002241134644, "learning_rate": 4.6389434511615015e-05, "loss": 0.1117, "step": 39990 }, { "epoch": 1.453594011192674, "grad_norm": 1.203703761100769, "learning_rate": 4.6386747670688897e-05, "loss": 0.1524, "step": 40000 }, { "epoch": 1.4539574096954722, "grad_norm": 2.274060010910034, "learning_rate": 4.638405990828391e-05, "loss": 0.113, "step": 40010 }, { "epoch": 1.4543208081982701, "grad_norm": 1.424842357635498, "learning_rate": 4.638137122451587e-05, "loss": 0.1986, "step": 40020 }, { "epoch": 1.4546842067010683, "grad_norm": 2.1440541744232178, "learning_rate": 4.637868161950062e-05, "loss": 0.1406, "step": 40030 }, { "epoch": 1.4550476052038666, "grad_norm": 0.9488077759742737, "learning_rate": 4.6375991093354035e-05, "loss": 0.1827, "step": 40040 }, { "epoch": 1.4554110037066648, "grad_norm": 7.7812724113464355, "learning_rate": 4.637329964619206e-05, "loss": 0.2488, "step": 40050 }, { "epoch": 1.455774402209463, "grad_norm": 1.2816716432571411, "learning_rate": 4.6370607278130646e-05, "loss": 0.1125, "step": 40060 }, { "epoch": 1.456137800712261, "grad_norm": 0.6140567660331726, "learning_rate": 4.63679139892858e-05, "loss": 0.1526, "step": 40070 }, { "epoch": 1.4565011992150592, "grad_norm": 1.3745895624160767, "learning_rate": 4.636521977977357e-05, "loss": 0.1406, "step": 40080 }, { "epoch": 1.4568645977178574, "grad_norm": 0.934893786907196, "learning_rate": 4.636252464971004e-05, "loss": 0.1236, "step": 40090 }, { "epoch": 1.4572279962206556, "grad_norm": 4.1601738929748535, "learning_rate": 4.635982859921132e-05, "loss": 0.1256, "step": 40100 }, { "epoch": 1.4575913947234538, "grad_norm": 1.3297815322875977, "learning_rate": 4.635713162839359e-05, "loss": 0.1076, "step": 40110 }, { "epoch": 1.457954793226252, "grad_norm": 1.6878186464309692, "learning_rate": 4.6354433737373055e-05, "loss": 0.2212, "step": 40120 }, { "epoch": 1.4583181917290502, "grad_norm": 1.2743428945541382, "learning_rate": 4.6351734926265946e-05, "loss": 0.105, "step": 40130 }, { "epoch": 1.4586815902318482, "grad_norm": 1.5052075386047363, "learning_rate": 4.634903519518854e-05, "loss": 0.234, "step": 40140 }, { "epoch": 1.4590449887346464, "grad_norm": 1.7959517240524292, "learning_rate": 4.634633454425718e-05, "loss": 0.1572, "step": 40150 }, { "epoch": 1.4594083872374446, "grad_norm": 0.578747034072876, "learning_rate": 4.63436329735882e-05, "loss": 0.1111, "step": 40160 }, { "epoch": 1.4597717857402428, "grad_norm": 0.3820185959339142, "learning_rate": 4.634093048329803e-05, "loss": 0.1589, "step": 40170 }, { "epoch": 1.460135184243041, "grad_norm": 1.621700406074524, "learning_rate": 4.633822707350309e-05, "loss": 0.1401, "step": 40180 }, { "epoch": 1.460498582745839, "grad_norm": 1.2941464185714722, "learning_rate": 4.633552274431987e-05, "loss": 0.1678, "step": 40190 }, { "epoch": 1.4608619812486372, "grad_norm": 0.9888546466827393, "learning_rate": 4.633281749586488e-05, "loss": 0.1649, "step": 40200 }, { "epoch": 1.4608619812486372, "eval_loss": 0.3407399654388428, "eval_runtime": 180.8259, "eval_samples_per_second": 41.001, "eval_steps_per_second": 5.126, "eval_wer": 0.17137437144879916, "step": 40200 }, { "epoch": 1.4612253797514354, "grad_norm": 0.6030024290084839, "learning_rate": 4.633011132825469e-05, "loss": 0.1086, "step": 40210 }, { "epoch": 1.4615887782542336, "grad_norm": 2.6498842239379883, "learning_rate": 4.63274042416059e-05, "loss": 0.1341, "step": 40220 }, { "epoch": 1.4619521767570318, "grad_norm": 1.0614917278289795, "learning_rate": 4.632469623603514e-05, "loss": 0.1197, "step": 40230 }, { "epoch": 1.4623155752598298, "grad_norm": 1.7546344995498657, "learning_rate": 4.63219873116591e-05, "loss": 0.1518, "step": 40240 }, { "epoch": 1.4626789737626282, "grad_norm": 2.726959705352783, "learning_rate": 4.631927746859448e-05, "loss": 0.1514, "step": 40250 }, { "epoch": 1.4630423722654262, "grad_norm": 1.0468662977218628, "learning_rate": 4.6316566706958055e-05, "loss": 0.1309, "step": 40260 }, { "epoch": 1.4634057707682244, "grad_norm": 0.7446948885917664, "learning_rate": 4.631385502686661e-05, "loss": 0.176, "step": 40270 }, { "epoch": 1.4637691692710226, "grad_norm": 2.804288148880005, "learning_rate": 4.6311142428436996e-05, "loss": 0.103, "step": 40280 }, { "epoch": 1.4641325677738208, "grad_norm": 1.1131904125213623, "learning_rate": 4.630842891178607e-05, "loss": 0.1168, "step": 40290 }, { "epoch": 1.464495966276619, "grad_norm": 1.084128975868225, "learning_rate": 4.6305714477030766e-05, "loss": 0.5731, "step": 40300 }, { "epoch": 1.464859364779417, "grad_norm": 2.9369328022003174, "learning_rate": 4.630299912428803e-05, "loss": 0.12, "step": 40310 }, { "epoch": 1.4652227632822152, "grad_norm": 0.449259489774704, "learning_rate": 4.630028285367485e-05, "loss": 0.1396, "step": 40320 }, { "epoch": 1.4655861617850134, "grad_norm": 0.6570121049880981, "learning_rate": 4.6297565665308276e-05, "loss": 0.1143, "step": 40330 }, { "epoch": 1.4659495602878116, "grad_norm": 0.9117491841316223, "learning_rate": 4.629484755930537e-05, "loss": 0.0996, "step": 40340 }, { "epoch": 1.4663129587906099, "grad_norm": 1.197102665901184, "learning_rate": 4.629212853578325e-05, "loss": 0.1522, "step": 40350 }, { "epoch": 1.4666763572934078, "grad_norm": 2.8265323638916016, "learning_rate": 4.6289408594859075e-05, "loss": 0.1239, "step": 40360 }, { "epoch": 1.467039755796206, "grad_norm": 0.4153755307197571, "learning_rate": 4.628668773665002e-05, "loss": 0.1448, "step": 40370 }, { "epoch": 1.4674031542990043, "grad_norm": 0.9625080823898315, "learning_rate": 4.628396596127335e-05, "loss": 0.1249, "step": 40380 }, { "epoch": 1.4677665528018025, "grad_norm": 3.2815330028533936, "learning_rate": 4.62812432688463e-05, "loss": 0.1238, "step": 40390 }, { "epoch": 1.4681299513046007, "grad_norm": 0.5435966849327087, "learning_rate": 4.627851965948619e-05, "loss": 0.149, "step": 40400 }, { "epoch": 1.4684933498073989, "grad_norm": 1.5212714672088623, "learning_rate": 4.6275795133310383e-05, "loss": 0.1458, "step": 40410 }, { "epoch": 1.468856748310197, "grad_norm": 0.9295603632926941, "learning_rate": 4.627306969043627e-05, "loss": 0.2099, "step": 40420 }, { "epoch": 1.469220146812995, "grad_norm": 1.0282838344573975, "learning_rate": 4.627034333098127e-05, "loss": 0.1408, "step": 40430 }, { "epoch": 1.4695835453157933, "grad_norm": 1.5155305862426758, "learning_rate": 4.6267616055062855e-05, "loss": 0.2335, "step": 40440 }, { "epoch": 1.4699469438185915, "grad_norm": 0.6780584454536438, "learning_rate": 4.626488786279854e-05, "loss": 0.1611, "step": 40450 }, { "epoch": 1.4703103423213897, "grad_norm": 1.1376898288726807, "learning_rate": 4.626215875430586e-05, "loss": 0.1282, "step": 40460 }, { "epoch": 1.470673740824188, "grad_norm": 9.535052299499512, "learning_rate": 4.6259428729702414e-05, "loss": 0.3186, "step": 40470 }, { "epoch": 1.4710371393269859, "grad_norm": 1.4366358518600464, "learning_rate": 4.625669778910582e-05, "loss": 0.1202, "step": 40480 }, { "epoch": 1.471400537829784, "grad_norm": 0.9380308985710144, "learning_rate": 4.625396593263376e-05, "loss": 0.1125, "step": 40490 }, { "epoch": 1.4717639363325823, "grad_norm": 1.2832533121109009, "learning_rate": 4.6251233160403916e-05, "loss": 2.9509, "step": 40500 }, { "epoch": 1.4721273348353805, "grad_norm": 3.405505895614624, "learning_rate": 4.624849947253406e-05, "loss": 0.1369, "step": 40510 }, { "epoch": 1.4724907333381787, "grad_norm": 0.7077997326850891, "learning_rate": 4.624576486914196e-05, "loss": 0.1233, "step": 40520 }, { "epoch": 1.4728541318409767, "grad_norm": 0.7341346740722656, "learning_rate": 4.624302935034545e-05, "loss": 0.1146, "step": 40530 }, { "epoch": 1.4732175303437751, "grad_norm": 0.9452338218688965, "learning_rate": 4.6240292916262376e-05, "loss": 0.1696, "step": 40540 }, { "epoch": 1.473580928846573, "grad_norm": 0.8162540197372437, "learning_rate": 4.623755556701066e-05, "loss": 0.138, "step": 40550 }, { "epoch": 1.4739443273493713, "grad_norm": 0.551977276802063, "learning_rate": 4.623481730270824e-05, "loss": 0.2382, "step": 40560 }, { "epoch": 1.4743077258521695, "grad_norm": 0.6273486018180847, "learning_rate": 4.623207812347309e-05, "loss": 0.1902, "step": 40570 }, { "epoch": 1.4746711243549677, "grad_norm": 0.5531787872314453, "learning_rate": 4.622933802942324e-05, "loss": 0.1103, "step": 40580 }, { "epoch": 1.475034522857766, "grad_norm": 0.4982399344444275, "learning_rate": 4.622659702067675e-05, "loss": 0.1081, "step": 40590 }, { "epoch": 1.475397921360564, "grad_norm": 0.9091404676437378, "learning_rate": 4.622385509735172e-05, "loss": 0.3927, "step": 40600 }, { "epoch": 1.4757613198633621, "grad_norm": 0.6481756567955017, "learning_rate": 4.622111225956629e-05, "loss": 0.1186, "step": 40610 }, { "epoch": 1.4761247183661603, "grad_norm": 1.1302157640457153, "learning_rate": 4.621836850743864e-05, "loss": 0.1667, "step": 40620 }, { "epoch": 1.4764881168689585, "grad_norm": 2.197112560272217, "learning_rate": 4.6215623841086974e-05, "loss": 0.1226, "step": 40630 }, { "epoch": 1.4768515153717567, "grad_norm": 3.960108995437622, "learning_rate": 4.621287826062957e-05, "loss": 0.1343, "step": 40640 }, { "epoch": 1.4772149138745547, "grad_norm": 1.0748779773712158, "learning_rate": 4.6210131766184714e-05, "loss": 0.1492, "step": 40650 }, { "epoch": 1.477578312377353, "grad_norm": 1.8779007196426392, "learning_rate": 4.620738435787075e-05, "loss": 0.1446, "step": 40660 }, { "epoch": 1.4779417108801511, "grad_norm": 0.5000749230384827, "learning_rate": 4.620463603580605e-05, "loss": 0.1762, "step": 40670 }, { "epoch": 1.4783051093829493, "grad_norm": 0.6756991147994995, "learning_rate": 4.620188680010903e-05, "loss": 0.1375, "step": 40680 }, { "epoch": 1.4786685078857476, "grad_norm": 0.7340139746665955, "learning_rate": 4.619941170692398e-05, "loss": 2.0738, "step": 40690 }, { "epoch": 1.4790319063885458, "grad_norm": 4.0170722007751465, "learning_rate": 4.6196660735651925e-05, "loss": 0.157, "step": 40700 }, { "epoch": 1.479395304891344, "grad_norm": 0.8254412412643433, "learning_rate": 4.619390885109118e-05, "loss": 0.1385, "step": 40710 }, { "epoch": 1.479758703394142, "grad_norm": 0.7477695345878601, "learning_rate": 4.619115605336031e-05, "loss": 0.1772, "step": 40720 }, { "epoch": 1.4801221018969402, "grad_norm": 0.9684391617774963, "learning_rate": 4.618840234257792e-05, "loss": 3.1891, "step": 40730 }, { "epoch": 1.4804855003997384, "grad_norm": 0.9600037336349487, "learning_rate": 4.6185647718862655e-05, "loss": 0.1289, "step": 40740 }, { "epoch": 1.4808488989025366, "grad_norm": 1.848919153213501, "learning_rate": 4.6182892182333226e-05, "loss": 0.1588, "step": 40750 }, { "epoch": 1.4812122974053348, "grad_norm": 1.707576036453247, "learning_rate": 4.6180135733108335e-05, "loss": 0.1578, "step": 40760 }, { "epoch": 1.4815756959081328, "grad_norm": 0.9908716678619385, "learning_rate": 4.617737837130675e-05, "loss": 0.1763, "step": 40770 }, { "epoch": 1.481939094410931, "grad_norm": 1.638818383216858, "learning_rate": 4.617462009704728e-05, "loss": 0.1323, "step": 40780 }, { "epoch": 1.4823024929137292, "grad_norm": 2.1605606079101562, "learning_rate": 4.6171860910448774e-05, "loss": 0.123, "step": 40790 }, { "epoch": 1.4826658914165274, "grad_norm": 0.7089453935623169, "learning_rate": 4.6169100811630106e-05, "loss": 0.1489, "step": 40800 }, { "epoch": 1.4826658914165274, "eval_loss": 0.3524834215641022, "eval_runtime": 180.6874, "eval_samples_per_second": 41.032, "eval_steps_per_second": 5.13, "eval_wer": 0.17247263419681594, "step": 40800 }, { "epoch": 1.4830292899193256, "grad_norm": 0.8966375589370728, "learning_rate": 4.616633980071021e-05, "loss": 0.1114, "step": 40810 }, { "epoch": 1.4833926884221236, "grad_norm": 0.8344945907592773, "learning_rate": 4.616357787780804e-05, "loss": 0.1765, "step": 40820 }, { "epoch": 1.483756086924922, "grad_norm": 1.2400190830230713, "learning_rate": 4.616081504304259e-05, "loss": 0.1805, "step": 40830 }, { "epoch": 1.48411948542772, "grad_norm": 0.9686151742935181, "learning_rate": 4.615805129653292e-05, "loss": 0.1259, "step": 40840 }, { "epoch": 1.4844828839305182, "grad_norm": 1.00034499168396, "learning_rate": 4.615528663839811e-05, "loss": 0.1632, "step": 40850 }, { "epoch": 1.4848462824333164, "grad_norm": 3.2312328815460205, "learning_rate": 4.6152521068757256e-05, "loss": 0.116, "step": 40860 }, { "epoch": 1.4852096809361146, "grad_norm": 0.4837055206298828, "learning_rate": 4.6149754587729535e-05, "loss": 0.1534, "step": 40870 }, { "epoch": 1.4855730794389128, "grad_norm": 8.643519401550293, "learning_rate": 4.614698719543413e-05, "loss": 0.1122, "step": 40880 }, { "epoch": 1.4859364779417108, "grad_norm": 0.9113799333572388, "learning_rate": 4.61442188919903e-05, "loss": 0.1046, "step": 40890 }, { "epoch": 1.486299876444509, "grad_norm": 0.7763462662696838, "learning_rate": 4.61414496775173e-05, "loss": 0.1515, "step": 40900 }, { "epoch": 1.4866632749473072, "grad_norm": 1.2019357681274414, "learning_rate": 4.6138679552134464e-05, "loss": 0.1372, "step": 40910 }, { "epoch": 1.4870266734501054, "grad_norm": 1.1948570013046265, "learning_rate": 4.6135908515961136e-05, "loss": 0.2073, "step": 40920 }, { "epoch": 1.4873900719529036, "grad_norm": 1.3027549982070923, "learning_rate": 4.6133136569116706e-05, "loss": 0.12, "step": 40930 }, { "epoch": 1.4877534704557016, "grad_norm": 1.4980496168136597, "learning_rate": 4.613036371172062e-05, "loss": 2.4225, "step": 40940 }, { "epoch": 1.4881168689584998, "grad_norm": 0.7265346050262451, "learning_rate": 4.612758994389234e-05, "loss": 0.1631, "step": 40950 }, { "epoch": 1.488480267461298, "grad_norm": 0.6485431790351868, "learning_rate": 4.612481526575138e-05, "loss": 0.1175, "step": 40960 }, { "epoch": 1.4888436659640962, "grad_norm": 0.9532496333122253, "learning_rate": 4.612203967741729e-05, "loss": 0.2368, "step": 40970 }, { "epoch": 1.4892070644668944, "grad_norm": 3.3696892261505127, "learning_rate": 4.6119263179009676e-05, "loss": 0.1388, "step": 40980 }, { "epoch": 1.4895704629696926, "grad_norm": 0.7628744840621948, "learning_rate": 4.611648577064814e-05, "loss": 0.1475, "step": 40990 }, { "epoch": 1.4899338614724909, "grad_norm": 1.4854507446289062, "learning_rate": 4.611370745245237e-05, "loss": 0.1717, "step": 41000 }, { "epoch": 1.4902972599752888, "grad_norm": 1.2280082702636719, "learning_rate": 4.6110928224542074e-05, "loss": 0.1389, "step": 41010 }, { "epoch": 1.490660658478087, "grad_norm": 0.5658448934555054, "learning_rate": 4.6108148087036984e-05, "loss": 0.1625, "step": 41020 }, { "epoch": 1.4910240569808852, "grad_norm": 1.1708754301071167, "learning_rate": 4.6105367040056903e-05, "loss": 0.1283, "step": 41030 }, { "epoch": 1.4913874554836835, "grad_norm": 1.175658106803894, "learning_rate": 4.610258508372165e-05, "loss": 0.1197, "step": 41040 }, { "epoch": 1.4917508539864817, "grad_norm": 1.0719672441482544, "learning_rate": 4.609980221815109e-05, "loss": 0.1361, "step": 41050 }, { "epoch": 1.4921142524892796, "grad_norm": 0.7982541918754578, "learning_rate": 4.6097018443465114e-05, "loss": 0.1302, "step": 41060 }, { "epoch": 1.4924776509920779, "grad_norm": 0.360454797744751, "learning_rate": 4.609423375978369e-05, "loss": 0.2231, "step": 41070 }, { "epoch": 1.492841049494876, "grad_norm": 0.64405757188797, "learning_rate": 4.609144816722678e-05, "loss": 0.1212, "step": 41080 }, { "epoch": 1.4932044479976743, "grad_norm": 0.7874402403831482, "learning_rate": 4.608866166591441e-05, "loss": 3.1348, "step": 41090 }, { "epoch": 1.4935678465004725, "grad_norm": 1.059163212776184, "learning_rate": 4.608587425596665e-05, "loss": 0.1464, "step": 41100 }, { "epoch": 1.4939312450032705, "grad_norm": 1.5717148780822754, "learning_rate": 4.608308593750359e-05, "loss": 0.1104, "step": 41110 }, { "epoch": 1.4942946435060689, "grad_norm": 0.6417020559310913, "learning_rate": 4.6080296710645365e-05, "loss": 0.1573, "step": 41120 }, { "epoch": 1.4946580420088669, "grad_norm": 0.8871016502380371, "learning_rate": 4.607750657551216e-05, "loss": 0.1087, "step": 41130 }, { "epoch": 1.495021440511665, "grad_norm": 2.3125686645507812, "learning_rate": 4.6074715532224196e-05, "loss": 0.1379, "step": 41140 }, { "epoch": 1.4953848390144633, "grad_norm": 2.087214708328247, "learning_rate": 4.607192358090172e-05, "loss": 0.1311, "step": 41150 }, { "epoch": 1.4957482375172615, "grad_norm": 3.1915369033813477, "learning_rate": 4.6069130721665035e-05, "loss": 0.1231, "step": 41160 }, { "epoch": 1.4961116360200597, "grad_norm": 0.4626937508583069, "learning_rate": 4.606633695463447e-05, "loss": 0.1544, "step": 41170 }, { "epoch": 1.4964750345228577, "grad_norm": 161.15541076660156, "learning_rate": 4.6063542279930395e-05, "loss": 3.413, "step": 41180 }, { "epoch": 1.496838433025656, "grad_norm": 0.9905474185943604, "learning_rate": 4.606074669767323e-05, "loss": 0.1285, "step": 41190 }, { "epoch": 1.497201831528454, "grad_norm": 0.5389920473098755, "learning_rate": 4.6057950207983426e-05, "loss": 0.1184, "step": 41200 }, { "epoch": 1.4975652300312523, "grad_norm": 2.7976090908050537, "learning_rate": 4.605515281098147e-05, "loss": 0.2461, "step": 41210 }, { "epoch": 1.4979286285340505, "grad_norm": 0.4971259534358978, "learning_rate": 4.60523545067879e-05, "loss": 0.2034, "step": 41220 }, { "epoch": 1.4982920270368485, "grad_norm": 1.5046378374099731, "learning_rate": 4.6049555295523274e-05, "loss": 0.1342, "step": 41230 }, { "epoch": 1.4986554255396467, "grad_norm": 1.337195634841919, "learning_rate": 4.60467551773082e-05, "loss": 0.1198, "step": 41240 }, { "epoch": 1.499018824042445, "grad_norm": 1.2729612588882446, "learning_rate": 4.6043954152263336e-05, "loss": 0.1312, "step": 41250 }, { "epoch": 1.4993822225452431, "grad_norm": 0.9693030714988708, "learning_rate": 4.6041152220509365e-05, "loss": 0.1554, "step": 41260 }, { "epoch": 1.4997456210480413, "grad_norm": 0.48035889863967896, "learning_rate": 4.6038349382167e-05, "loss": 0.1611, "step": 41270 }, { "epoch": 1.5001090195508393, "grad_norm": 1.015608787536621, "learning_rate": 4.603554563735702e-05, "loss": 0.1322, "step": 41280 }, { "epoch": 1.5004724180536377, "grad_norm": 1.900895595550537, "learning_rate": 4.603274098620023e-05, "loss": 0.1819, "step": 41290 }, { "epoch": 1.5008358165564357, "grad_norm": 1.780765414237976, "learning_rate": 4.602993542881745e-05, "loss": 0.1396, "step": 41300 }, { "epoch": 1.501199215059234, "grad_norm": 3.2523162364959717, "learning_rate": 4.602712896532959e-05, "loss": 0.1333, "step": 41310 }, { "epoch": 1.5015626135620321, "grad_norm": 0.344933420419693, "learning_rate": 4.6024321595857554e-05, "loss": 0.1417, "step": 41320 }, { "epoch": 1.5019260120648303, "grad_norm": 0.7336893081665039, "learning_rate": 4.6021513320522304e-05, "loss": 0.1551, "step": 41330 }, { "epoch": 1.5022894105676285, "grad_norm": 0.9252750873565674, "learning_rate": 4.601870413944484e-05, "loss": 0.1049, "step": 41340 }, { "epoch": 1.5026528090704265, "grad_norm": 2.0064470767974854, "learning_rate": 4.60158940527462e-05, "loss": 0.1593, "step": 41350 }, { "epoch": 1.503016207573225, "grad_norm": 1.2280207872390747, "learning_rate": 4.601308306054746e-05, "loss": 0.1276, "step": 41360 }, { "epoch": 1.503379606076023, "grad_norm": 0.7326213717460632, "learning_rate": 4.601027116296974e-05, "loss": 0.1853, "step": 41370 }, { "epoch": 1.5037430045788212, "grad_norm": 2.208380937576294, "learning_rate": 4.600745836013418e-05, "loss": 0.1343, "step": 41380 }, { "epoch": 1.5041064030816194, "grad_norm": 0.7113050818443298, "learning_rate": 4.6004644652161996e-05, "loss": 0.198, "step": 41390 }, { "epoch": 1.5044698015844173, "grad_norm": 1.8392037153244019, "learning_rate": 4.60018300391744e-05, "loss": 0.1283, "step": 41400 }, { "epoch": 1.5044698015844173, "eval_loss": 0.35653889179229736, "eval_runtime": 180.3372, "eval_samples_per_second": 41.112, "eval_steps_per_second": 5.14, "eval_wer": 0.16764390872619675, "step": 41400 }, { "epoch": 1.5048332000872158, "grad_norm": 1.0530060529708862, "learning_rate": 4.5999014521292674e-05, "loss": 0.1072, "step": 41410 }, { "epoch": 1.5051965985900138, "grad_norm": 1.0648863315582275, "learning_rate": 4.599619809863813e-05, "loss": 0.1939, "step": 41420 }, { "epoch": 1.505559997092812, "grad_norm": 1.4178556203842163, "learning_rate": 4.599338077133212e-05, "loss": 0.1325, "step": 41430 }, { "epoch": 1.5059233955956102, "grad_norm": 0.6156584024429321, "learning_rate": 4.5990562539496015e-05, "loss": 0.1337, "step": 41440 }, { "epoch": 1.5062867940984082, "grad_norm": 0.9399839639663696, "learning_rate": 4.598774340325126e-05, "loss": 0.1697, "step": 41450 }, { "epoch": 1.5066501926012066, "grad_norm": 0.9702737927436829, "learning_rate": 4.598492336271931e-05, "loss": 0.0978, "step": 41460 }, { "epoch": 1.5070135911040046, "grad_norm": 0.8199527263641357, "learning_rate": 4.598210241802169e-05, "loss": 0.185, "step": 41470 }, { "epoch": 1.5073769896068028, "grad_norm": 1.122827172279358, "learning_rate": 4.597928056927993e-05, "loss": 0.1234, "step": 41480 }, { "epoch": 1.507740388109601, "grad_norm": 1.9142221212387085, "learning_rate": 4.5976457816615606e-05, "loss": 0.1346, "step": 41490 }, { "epoch": 1.5081037866123992, "grad_norm": 1.0756717920303345, "learning_rate": 4.5973634160150345e-05, "loss": 0.1431, "step": 41500 }, { "epoch": 1.5084671851151974, "grad_norm": 1.6231876611709595, "learning_rate": 4.5970809600005826e-05, "loss": 0.1608, "step": 41510 }, { "epoch": 1.5088305836179954, "grad_norm": 0.3704961836338043, "learning_rate": 4.596798413630373e-05, "loss": 0.1501, "step": 41520 }, { "epoch": 1.5091939821207938, "grad_norm": 0.7752798199653625, "learning_rate": 4.59651577691658e-05, "loss": 0.1344, "step": 41530 }, { "epoch": 1.5095573806235918, "grad_norm": 2.622103214263916, "learning_rate": 4.596233049871382e-05, "loss": 0.1232, "step": 41540 }, { "epoch": 1.50992077912639, "grad_norm": 0.4142579436302185, "learning_rate": 4.595950232506961e-05, "loss": 0.1227, "step": 41550 }, { "epoch": 1.5102841776291882, "grad_norm": 0.9995001554489136, "learning_rate": 4.5956673248355e-05, "loss": 0.1143, "step": 41560 }, { "epoch": 1.5106475761319862, "grad_norm": 2.1356821060180664, "learning_rate": 4.595384326869191e-05, "loss": 0.1969, "step": 41570 }, { "epoch": 1.5110109746347846, "grad_norm": 0.9950689673423767, "learning_rate": 4.5951012386202274e-05, "loss": 0.1362, "step": 41580 }, { "epoch": 1.5113743731375826, "grad_norm": 0.6441085934638977, "learning_rate": 4.5948180601008054e-05, "loss": 0.1557, "step": 41590 }, { "epoch": 1.5117377716403808, "grad_norm": 2.1033713817596436, "learning_rate": 4.594534791323127e-05, "loss": 0.1718, "step": 41600 }, { "epoch": 1.512101170143179, "grad_norm": 1.3968003988265991, "learning_rate": 4.5942514322993965e-05, "loss": 0.2915, "step": 41610 }, { "epoch": 1.5124645686459772, "grad_norm": 0.7833322882652283, "learning_rate": 4.593967983041823e-05, "loss": 0.1379, "step": 41620 }, { "epoch": 1.5128279671487754, "grad_norm": 1.0050405263900757, "learning_rate": 4.5936844435626196e-05, "loss": 0.1307, "step": 41630 }, { "epoch": 1.5131913656515734, "grad_norm": 1.9530189037322998, "learning_rate": 4.593400813874003e-05, "loss": 0.139, "step": 41640 }, { "epoch": 1.5135547641543718, "grad_norm": 0.45743170380592346, "learning_rate": 4.593117093988194e-05, "loss": 0.1422, "step": 41650 }, { "epoch": 1.5139181626571698, "grad_norm": 1.310746431350708, "learning_rate": 4.592833283917416e-05, "loss": 1.58, "step": 41660 }, { "epoch": 1.514281561159968, "grad_norm": 0.6696259379386902, "learning_rate": 4.592549383673898e-05, "loss": 0.1466, "step": 41670 }, { "epoch": 1.5146449596627662, "grad_norm": 1.0350476503372192, "learning_rate": 4.5922653932698734e-05, "loss": 0.1114, "step": 41680 }, { "epoch": 1.5150083581655642, "grad_norm": 1.5413391590118408, "learning_rate": 4.591981312717577e-05, "loss": 0.1225, "step": 41690 }, { "epoch": 1.5153717566683627, "grad_norm": 0.8129068613052368, "learning_rate": 4.5916971420292485e-05, "loss": 0.1951, "step": 41700 }, { "epoch": 1.5157351551711606, "grad_norm": 1.1114506721496582, "learning_rate": 4.591412881217133e-05, "loss": 0.1227, "step": 41710 }, { "epoch": 1.5160985536739588, "grad_norm": 0.5106993317604065, "learning_rate": 4.5911285302934775e-05, "loss": 0.1985, "step": 41720 }, { "epoch": 1.516461952176757, "grad_norm": 1.2125110626220703, "learning_rate": 4.590844089270534e-05, "loss": 0.1233, "step": 41730 }, { "epoch": 1.516825350679555, "grad_norm": 1.3580394983291626, "learning_rate": 4.590559558160558e-05, "loss": 0.1227, "step": 41740 }, { "epoch": 1.5171887491823535, "grad_norm": 0.4338432252407074, "learning_rate": 4.590274936975809e-05, "loss": 0.1462, "step": 41750 }, { "epoch": 1.5175521476851515, "grad_norm": 0.9010568857192993, "learning_rate": 4.58999022572855e-05, "loss": 0.1372, "step": 41760 }, { "epoch": 1.5179155461879497, "grad_norm": 0.737705647945404, "learning_rate": 4.589705424431048e-05, "loss": 0.1538, "step": 41770 }, { "epoch": 1.5182789446907479, "grad_norm": 1.0285004377365112, "learning_rate": 4.589420533095575e-05, "loss": 0.1101, "step": 41780 }, { "epoch": 1.518642343193546, "grad_norm": 0.5717383027076721, "learning_rate": 4.589135551734405e-05, "loss": 0.1157, "step": 41790 }, { "epoch": 1.5190057416963443, "grad_norm": 1.1417220830917358, "learning_rate": 4.588850480359818e-05, "loss": 0.1359, "step": 41800 }, { "epoch": 1.5193691401991423, "grad_norm": 2.673459768295288, "learning_rate": 4.588565318984095e-05, "loss": 0.1238, "step": 41810 }, { "epoch": 1.5197325387019407, "grad_norm": 1.1211605072021484, "learning_rate": 4.588280067619524e-05, "loss": 0.1642, "step": 41820 }, { "epoch": 1.5200959372047387, "grad_norm": 2.358137369155884, "learning_rate": 4.587994726278395e-05, "loss": 0.1234, "step": 41830 }, { "epoch": 1.5204593357075369, "grad_norm": 0.8301489949226379, "learning_rate": 4.587709294973002e-05, "loss": 0.1274, "step": 41840 }, { "epoch": 1.520822734210335, "grad_norm": 2.1138226985931396, "learning_rate": 4.587423773715644e-05, "loss": 0.1326, "step": 41850 }, { "epoch": 1.521186132713133, "grad_norm": 0.7757201194763184, "learning_rate": 4.587138162518623e-05, "loss": 0.1183, "step": 41860 }, { "epoch": 1.5215495312159315, "grad_norm": 0.7807698249816895, "learning_rate": 4.586852461394243e-05, "loss": 0.1485, "step": 41870 }, { "epoch": 1.5219129297187295, "grad_norm": 2.2938053607940674, "learning_rate": 4.586566670354817e-05, "loss": 0.1152, "step": 41880 }, { "epoch": 1.5222763282215277, "grad_norm": 1.2340235710144043, "learning_rate": 4.5862807894126566e-05, "loss": 0.1766, "step": 41890 }, { "epoch": 1.522639726724326, "grad_norm": 0.9382178783416748, "learning_rate": 4.5859948185800806e-05, "loss": 0.1273, "step": 41900 }, { "epoch": 1.523003125227124, "grad_norm": 4.5072526931762695, "learning_rate": 4.58570875786941e-05, "loss": 0.1333, "step": 41910 }, { "epoch": 1.5233665237299223, "grad_norm": 0.41228216886520386, "learning_rate": 4.5854226072929696e-05, "loss": 0.1766, "step": 41920 }, { "epoch": 1.5237299222327203, "grad_norm": 0.869669497013092, "learning_rate": 4.5851363668630886e-05, "loss": 0.1271, "step": 41930 }, { "epoch": 1.5240933207355187, "grad_norm": 1.169318675994873, "learning_rate": 4.584850036592101e-05, "loss": 0.083, "step": 41940 }, { "epoch": 1.5244567192383167, "grad_norm": 3.336904287338257, "learning_rate": 4.5845636164923426e-05, "loss": 0.1357, "step": 41950 }, { "epoch": 1.524820117741115, "grad_norm": 1.167758584022522, "learning_rate": 4.584277106576156e-05, "loss": 0.1162, "step": 41960 }, { "epoch": 1.5251835162439131, "grad_norm": 0.9635423421859741, "learning_rate": 4.5839905068558835e-05, "loss": 0.2177, "step": 41970 }, { "epoch": 1.5255469147467111, "grad_norm": 1.3818042278289795, "learning_rate": 4.583703817343876e-05, "loss": 0.1246, "step": 41980 }, { "epoch": 1.5259103132495095, "grad_norm": 1.1299431324005127, "learning_rate": 4.583417038052484e-05, "loss": 0.1359, "step": 41990 }, { "epoch": 1.5262737117523075, "grad_norm": 2.181351661682129, "learning_rate": 4.583130168994065e-05, "loss": 0.1706, "step": 42000 }, { "epoch": 1.5262737117523075, "eval_loss": 0.3528802692890167, "eval_runtime": 181.0519, "eval_samples_per_second": 40.95, "eval_steps_per_second": 5.12, "eval_wer": 0.17613956105796286, "step": 42000 }, { "epoch": 1.5266371102551057, "grad_norm": 1.0958346128463745, "learning_rate": 4.582843210180979e-05, "loss": 0.1187, "step": 42010 }, { "epoch": 1.527000508757904, "grad_norm": 0.463438481092453, "learning_rate": 4.58255616162559e-05, "loss": 0.1539, "step": 42020 }, { "epoch": 1.527363907260702, "grad_norm": 0.5655350685119629, "learning_rate": 4.5822690233402656e-05, "loss": 0.1503, "step": 42030 }, { "epoch": 1.5277273057635004, "grad_norm": 1.5692224502563477, "learning_rate": 4.5819817953373764e-05, "loss": 0.1219, "step": 42040 }, { "epoch": 1.5280907042662983, "grad_norm": 0.48884958028793335, "learning_rate": 4.5816944776293016e-05, "loss": 0.1455, "step": 42050 }, { "epoch": 1.5284541027690965, "grad_norm": 0.8623284697532654, "learning_rate": 4.5814070702284175e-05, "loss": 0.1498, "step": 42060 }, { "epoch": 1.5288175012718948, "grad_norm": 0.5985013246536255, "learning_rate": 4.581119573147108e-05, "loss": 0.4594, "step": 42070 }, { "epoch": 1.529180899774693, "grad_norm": 0.9812720417976379, "learning_rate": 4.580831986397761e-05, "loss": 0.1234, "step": 42080 }, { "epoch": 1.5295442982774912, "grad_norm": 0.5680709481239319, "learning_rate": 4.5805443099927666e-05, "loss": 0.1061, "step": 42090 }, { "epoch": 1.5299076967802892, "grad_norm": 0.6387588977813721, "learning_rate": 4.5802565439445225e-05, "loss": 0.1436, "step": 42100 }, { "epoch": 1.5302710952830876, "grad_norm": 1.1865098476409912, "learning_rate": 4.5799686882654236e-05, "loss": 0.1155, "step": 42110 }, { "epoch": 1.5306344937858856, "grad_norm": 0.7588171362876892, "learning_rate": 4.579680742967875e-05, "loss": 0.1799, "step": 42120 }, { "epoch": 1.5309978922886838, "grad_norm": 0.9183505773544312, "learning_rate": 4.579392708064283e-05, "loss": 0.1133, "step": 42130 }, { "epoch": 1.531361290791482, "grad_norm": 1.1988872289657593, "learning_rate": 4.5791045835670575e-05, "loss": 0.1107, "step": 42140 }, { "epoch": 1.53172468929428, "grad_norm": 0.6209965944290161, "learning_rate": 4.578816369488613e-05, "loss": 0.1518, "step": 42150 }, { "epoch": 1.5320880877970784, "grad_norm": 1.3487142324447632, "learning_rate": 4.5785280658413674e-05, "loss": 0.1126, "step": 42160 }, { "epoch": 1.5324514862998764, "grad_norm": 0.6516602039337158, "learning_rate": 4.578239672637743e-05, "loss": 0.1498, "step": 42170 }, { "epoch": 1.5328148848026746, "grad_norm": 2.4193315505981445, "learning_rate": 4.577951189890166e-05, "loss": 0.1408, "step": 42180 }, { "epoch": 1.5331782833054728, "grad_norm": 0.6747106313705444, "learning_rate": 4.577662617611065e-05, "loss": 0.1226, "step": 42190 }, { "epoch": 1.533541681808271, "grad_norm": 3.124244451522827, "learning_rate": 4.5773739558128744e-05, "loss": 0.1512, "step": 42200 }, { "epoch": 1.5339050803110692, "grad_norm": 0.8625807762145996, "learning_rate": 4.5770852045080314e-05, "loss": 0.1187, "step": 42210 }, { "epoch": 1.5342684788138672, "grad_norm": 0.9007976651191711, "learning_rate": 4.576796363708977e-05, "loss": 0.2001, "step": 42220 }, { "epoch": 1.5346318773166656, "grad_norm": 0.7381039261817932, "learning_rate": 4.576507433428157e-05, "loss": 0.1063, "step": 42230 }, { "epoch": 1.5349952758194636, "grad_norm": 0.9550501704216003, "learning_rate": 4.57621841367802e-05, "loss": 0.1448, "step": 42240 }, { "epoch": 1.5353586743222618, "grad_norm": 0.5087346434593201, "learning_rate": 4.5759293044710175e-05, "loss": 0.1665, "step": 42250 }, { "epoch": 1.53572207282506, "grad_norm": 0.4684658646583557, "learning_rate": 4.575640105819609e-05, "loss": 0.1089, "step": 42260 }, { "epoch": 1.536085471327858, "grad_norm": 0.6353893876075745, "learning_rate": 4.575350817736252e-05, "loss": 0.2437, "step": 42270 }, { "epoch": 1.5364488698306564, "grad_norm": 0.7524349689483643, "learning_rate": 4.575061440233414e-05, "loss": 0.1858, "step": 42280 }, { "epoch": 1.5368122683334544, "grad_norm": 0.9425112009048462, "learning_rate": 4.57477197332356e-05, "loss": 0.0948, "step": 42290 }, { "epoch": 1.5371756668362526, "grad_norm": 1.419872522354126, "learning_rate": 4.574482417019165e-05, "loss": 0.1272, "step": 42300 }, { "epoch": 1.5375390653390508, "grad_norm": 0.6511875987052917, "learning_rate": 4.574192771332703e-05, "loss": 0.176, "step": 42310 }, { "epoch": 1.5379024638418488, "grad_norm": 1.2612382173538208, "learning_rate": 4.573903036276655e-05, "loss": 0.1681, "step": 42320 }, { "epoch": 1.5382658623446472, "grad_norm": 0.828471839427948, "learning_rate": 4.573613211863504e-05, "loss": 0.1218, "step": 42330 }, { "epoch": 1.5386292608474452, "grad_norm": 0.7098140716552734, "learning_rate": 4.573323298105737e-05, "loss": 0.1264, "step": 42340 }, { "epoch": 1.5389926593502434, "grad_norm": 0.612920343875885, "learning_rate": 4.573033295015847e-05, "loss": 0.1457, "step": 42350 }, { "epoch": 1.5393560578530416, "grad_norm": 2.700010299682617, "learning_rate": 4.572743202606328e-05, "loss": 0.1416, "step": 42360 }, { "epoch": 1.5397194563558398, "grad_norm": 0.4544985890388489, "learning_rate": 4.5724530208896784e-05, "loss": 0.2174, "step": 42370 }, { "epoch": 1.540082854858638, "grad_norm": 1.7702118158340454, "learning_rate": 4.5721627498784025e-05, "loss": 0.7935, "step": 42380 }, { "epoch": 1.540446253361436, "grad_norm": 2.3855764865875244, "learning_rate": 4.571872389585007e-05, "loss": 0.1142, "step": 42390 }, { "epoch": 1.5408096518642345, "grad_norm": 1.9382286071777344, "learning_rate": 4.5715819400220004e-05, "loss": 0.1349, "step": 42400 }, { "epoch": 1.5411730503670324, "grad_norm": 1.8577841520309448, "learning_rate": 4.5712914012019003e-05, "loss": 0.1154, "step": 42410 }, { "epoch": 1.5415364488698307, "grad_norm": 1.4880726337432861, "learning_rate": 4.571000773137223e-05, "loss": 0.1402, "step": 42420 }, { "epoch": 1.5418998473726289, "grad_norm": 0.6903501152992249, "learning_rate": 4.570710055840491e-05, "loss": 0.1137, "step": 42430 }, { "epoch": 1.5422632458754268, "grad_norm": 1.4438791275024414, "learning_rate": 4.57041924932423e-05, "loss": 0.1285, "step": 42440 }, { "epoch": 1.5426266443782253, "grad_norm": 0.41870322823524475, "learning_rate": 4.57012835360097e-05, "loss": 0.1366, "step": 42450 }, { "epoch": 1.5429900428810233, "grad_norm": 0.9365738034248352, "learning_rate": 4.569837368683245e-05, "loss": 0.1051, "step": 42460 }, { "epoch": 1.5433534413838215, "grad_norm": 1.940673828125, "learning_rate": 4.569546294583593e-05, "loss": 0.157, "step": 42470 }, { "epoch": 1.5437168398866197, "grad_norm": 1.1944515705108643, "learning_rate": 4.5692551313145536e-05, "loss": 1.5159, "step": 42480 }, { "epoch": 1.5440802383894179, "grad_norm": 0.6140870451927185, "learning_rate": 4.568963878888673e-05, "loss": 0.0986, "step": 42490 }, { "epoch": 1.544443636892216, "grad_norm": 1.8208271265029907, "learning_rate": 4.5686725373185016e-05, "loss": 0.1519, "step": 42500 }, { "epoch": 1.544807035395014, "grad_norm": 1.2457455396652222, "learning_rate": 4.56838110661659e-05, "loss": 0.1634, "step": 42510 }, { "epoch": 1.5451704338978125, "grad_norm": 0.5140019655227661, "learning_rate": 4.568089586795496e-05, "loss": 0.1628, "step": 42520 }, { "epoch": 1.5455338324006105, "grad_norm": 0.8539334535598755, "learning_rate": 4.5677979778677796e-05, "loss": 0.1243, "step": 42530 }, { "epoch": 1.5458972309034087, "grad_norm": 1.2581802606582642, "learning_rate": 4.567506279846006e-05, "loss": 0.1715, "step": 42540 }, { "epoch": 1.546260629406207, "grad_norm": 1.8808507919311523, "learning_rate": 4.567214492742743e-05, "loss": 0.1415, "step": 42550 }, { "epoch": 1.5466240279090049, "grad_norm": 1.948970079421997, "learning_rate": 4.566922616570562e-05, "loss": 0.1147, "step": 42560 }, { "epoch": 1.5469874264118033, "grad_norm": 1.5000864267349243, "learning_rate": 4.566630651342041e-05, "loss": 0.1614, "step": 42570 }, { "epoch": 1.5473508249146013, "grad_norm": 1.5625576972961426, "learning_rate": 4.566338597069757e-05, "loss": 0.1145, "step": 42580 }, { "epoch": 1.5477142234173995, "grad_norm": 1.2443382740020752, "learning_rate": 4.566046453766295e-05, "loss": 0.1203, "step": 42590 }, { "epoch": 1.5480776219201977, "grad_norm": 1.5014569759368896, "learning_rate": 4.5657542214442426e-05, "loss": 0.1459, "step": 42600 }, { "epoch": 1.5480776219201977, "eval_loss": 0.351544588804245, "eval_runtime": 180.3519, "eval_samples_per_second": 41.109, "eval_steps_per_second": 5.14, "eval_wer": 0.17858115344818196, "step": 42600 }, { "epoch": 1.5484410204229957, "grad_norm": 1.0584172010421753, "learning_rate": 4.565461900116191e-05, "loss": 0.1046, "step": 42610 }, { "epoch": 1.5488044189257941, "grad_norm": 0.6157267689704895, "learning_rate": 4.565169489794735e-05, "loss": 0.135, "step": 42620 }, { "epoch": 1.549167817428592, "grad_norm": 0.898263692855835, "learning_rate": 4.564876990492474e-05, "loss": 0.1157, "step": 42630 }, { "epoch": 1.5495312159313903, "grad_norm": 0.6782193779945374, "learning_rate": 4.5645844022220096e-05, "loss": 0.2191, "step": 42640 }, { "epoch": 1.5498946144341885, "grad_norm": 0.6636195182800293, "learning_rate": 4.5642917249959493e-05, "loss": 0.1709, "step": 42650 }, { "epoch": 1.5502580129369867, "grad_norm": 1.3367676734924316, "learning_rate": 4.563998958826904e-05, "loss": 0.1197, "step": 42660 }, { "epoch": 1.550621411439785, "grad_norm": 0.470985472202301, "learning_rate": 4.563706103727486e-05, "loss": 0.1395, "step": 42670 }, { "epoch": 1.550984809942583, "grad_norm": 1.1232322454452515, "learning_rate": 4.563413159710316e-05, "loss": 0.1139, "step": 42680 }, { "epoch": 1.5513482084453813, "grad_norm": 1.0105756521224976, "learning_rate": 4.563120126788013e-05, "loss": 0.1243, "step": 42690 }, { "epoch": 1.5517116069481793, "grad_norm": 0.785205602645874, "learning_rate": 4.562827004973206e-05, "loss": 0.1588, "step": 42700 }, { "epoch": 1.5520750054509775, "grad_norm": 1.4863699674606323, "learning_rate": 4.5625337942785224e-05, "loss": 0.0913, "step": 42710 }, { "epoch": 1.5524384039537757, "grad_norm": 0.33174383640289307, "learning_rate": 4.562240494716596e-05, "loss": 0.158, "step": 42720 }, { "epoch": 1.5528018024565737, "grad_norm": 0.7735195159912109, "learning_rate": 4.5619471063000644e-05, "loss": 0.1295, "step": 42730 }, { "epoch": 1.5531652009593722, "grad_norm": 3.2964320182800293, "learning_rate": 4.561653629041568e-05, "loss": 0.1144, "step": 42740 }, { "epoch": 1.5535285994621701, "grad_norm": 0.6756449937820435, "learning_rate": 4.5613600629537526e-05, "loss": 0.119, "step": 42750 }, { "epoch": 1.5538919979649684, "grad_norm": 1.7608799934387207, "learning_rate": 4.5610664080492655e-05, "loss": 0.1239, "step": 42760 }, { "epoch": 1.5542553964677666, "grad_norm": 0.8312143087387085, "learning_rate": 4.5607726643407614e-05, "loss": 0.1434, "step": 42770 }, { "epoch": 1.5546187949705648, "grad_norm": 1.3083513975143433, "learning_rate": 4.560478831840894e-05, "loss": 0.125, "step": 42780 }, { "epoch": 1.554982193473363, "grad_norm": 1.4495130777359009, "learning_rate": 4.560184910562326e-05, "loss": 0.1172, "step": 42790 }, { "epoch": 1.555345591976161, "grad_norm": 0.5549319982528687, "learning_rate": 4.559890900517721e-05, "loss": 1.7985, "step": 42800 }, { "epoch": 1.5557089904789594, "grad_norm": 1.0677647590637207, "learning_rate": 4.5595968017197446e-05, "loss": 0.1485, "step": 42810 }, { "epoch": 1.5560723889817574, "grad_norm": 0.5432078242301941, "learning_rate": 4.559302614181071e-05, "loss": 0.1372, "step": 42820 }, { "epoch": 1.5564357874845556, "grad_norm": 2.0982048511505127, "learning_rate": 4.559008337914375e-05, "loss": 0.1543, "step": 42830 }, { "epoch": 1.5567991859873538, "grad_norm": 2.8568451404571533, "learning_rate": 4.558713972932335e-05, "loss": 0.1271, "step": 42840 }, { "epoch": 1.5571625844901518, "grad_norm": 0.9933029413223267, "learning_rate": 4.558419519247635e-05, "loss": 1.0891, "step": 42850 }, { "epoch": 1.5575259829929502, "grad_norm": 0.6010461449623108, "learning_rate": 4.5581249768729614e-05, "loss": 0.1509, "step": 42860 }, { "epoch": 1.5578893814957482, "grad_norm": 0.6242499351501465, "learning_rate": 4.557830345821006e-05, "loss": 0.1527, "step": 42870 }, { "epoch": 1.5582527799985464, "grad_norm": 0.48831334710121155, "learning_rate": 4.557535626104463e-05, "loss": 0.1451, "step": 42880 }, { "epoch": 1.5586161785013446, "grad_norm": 1.1660668849945068, "learning_rate": 4.55724081773603e-05, "loss": 0.1558, "step": 42890 }, { "epoch": 1.5589795770041426, "grad_norm": 1.067808747291565, "learning_rate": 4.5569459207284106e-05, "loss": 0.1634, "step": 42900 }, { "epoch": 1.559342975506941, "grad_norm": 1.6434768438339233, "learning_rate": 4.556650935094309e-05, "loss": 0.1269, "step": 42910 }, { "epoch": 1.559706374009739, "grad_norm": 0.4303635358810425, "learning_rate": 4.556355860846437e-05, "loss": 0.1536, "step": 42920 }, { "epoch": 1.5600697725125372, "grad_norm": 3.148212194442749, "learning_rate": 4.5560606979975075e-05, "loss": 0.1062, "step": 42930 }, { "epoch": 1.5604331710153354, "grad_norm": 3.3599109649658203, "learning_rate": 4.5557654465602376e-05, "loss": 0.1158, "step": 42940 }, { "epoch": 1.5607965695181336, "grad_norm": 3.2170286178588867, "learning_rate": 4.5554701065473494e-05, "loss": 0.1491, "step": 42950 }, { "epoch": 1.5611599680209318, "grad_norm": 1.1147798299789429, "learning_rate": 4.555174677971567e-05, "loss": 0.1143, "step": 42960 }, { "epoch": 1.5615233665237298, "grad_norm": 0.4949367046356201, "learning_rate": 4.5548791608456206e-05, "loss": 0.1639, "step": 42970 }, { "epoch": 1.5618867650265282, "grad_norm": 0.7166339755058289, "learning_rate": 4.554583555182244e-05, "loss": 0.137, "step": 42980 }, { "epoch": 1.5622501635293262, "grad_norm": 0.48903581500053406, "learning_rate": 4.55428786099417e-05, "loss": 0.126, "step": 42990 }, { "epoch": 1.5626135620321244, "grad_norm": 0.43728914856910706, "learning_rate": 4.553992078294142e-05, "loss": 0.1371, "step": 43000 }, { "epoch": 1.5629769605349226, "grad_norm": 0.7486665844917297, "learning_rate": 4.5536962070949035e-05, "loss": 0.1233, "step": 43010 }, { "epoch": 1.5633403590377206, "grad_norm": 0.7540434002876282, "learning_rate": 4.5534002474092025e-05, "loss": 0.1356, "step": 43020 }, { "epoch": 1.563703757540519, "grad_norm": 1.2763710021972656, "learning_rate": 4.55310419924979e-05, "loss": 0.1234, "step": 43030 }, { "epoch": 1.564067156043317, "grad_norm": 0.5709404945373535, "learning_rate": 4.552808062629424e-05, "loss": 0.1224, "step": 43040 }, { "epoch": 1.5644305545461152, "grad_norm": 0.5243006348609924, "learning_rate": 4.552511837560862e-05, "loss": 0.1175, "step": 43050 }, { "epoch": 1.5647939530489134, "grad_norm": 1.3225644826889038, "learning_rate": 4.552215524056867e-05, "loss": 0.1408, "step": 43060 }, { "epoch": 1.5651573515517117, "grad_norm": 0.2830749452114105, "learning_rate": 4.551919122130208e-05, "loss": 0.1588, "step": 43070 }, { "epoch": 1.5655207500545099, "grad_norm": 1.7666617631912231, "learning_rate": 4.551622631793654e-05, "loss": 0.109, "step": 43080 }, { "epoch": 1.5658841485573078, "grad_norm": 0.6468254327774048, "learning_rate": 4.551326053059981e-05, "loss": 0.1199, "step": 43090 }, { "epoch": 1.5662475470601063, "grad_norm": 0.7526164650917053, "learning_rate": 4.551029385941967e-05, "loss": 0.1648, "step": 43100 }, { "epoch": 1.5666109455629043, "grad_norm": 3.8184330463409424, "learning_rate": 4.550732630452394e-05, "loss": 0.1392, "step": 43110 }, { "epoch": 1.5669743440657025, "grad_norm": 0.9396213293075562, "learning_rate": 4.550435786604049e-05, "loss": 0.1659, "step": 43120 }, { "epoch": 1.5673377425685007, "grad_norm": 1.536440372467041, "learning_rate": 4.550168551604358e-05, "loss": 0.1227, "step": 43130 }, { "epoch": 1.5677011410712987, "grad_norm": 1.6777888536453247, "learning_rate": 4.549871539909584e-05, "loss": 0.128, "step": 43140 }, { "epoch": 1.568064539574097, "grad_norm": 21.312944412231445, "learning_rate": 4.5495744398931396e-05, "loss": 0.2651, "step": 43150 }, { "epoch": 1.568427938076895, "grad_norm": 0.8739009499549866, "learning_rate": 4.549277251567824e-05, "loss": 0.12, "step": 43160 }, { "epoch": 1.5687913365796933, "grad_norm": 0.3690776526927948, "learning_rate": 4.548979974946444e-05, "loss": 0.1665, "step": 43170 }, { "epoch": 1.5691547350824915, "grad_norm": 1.3902113437652588, "learning_rate": 4.548682610041807e-05, "loss": 0.1502, "step": 43180 }, { "epoch": 1.5695181335852895, "grad_norm": 0.9234703779220581, "learning_rate": 4.5483851568667244e-05, "loss": 0.1168, "step": 43190 }, { "epoch": 1.569881532088088, "grad_norm": 0.7674643397331238, "learning_rate": 4.5480876154340145e-05, "loss": 0.1404, "step": 43200 }, { "epoch": 1.569881532088088, "eval_loss": 0.3601061701774597, "eval_runtime": 180.5599, "eval_samples_per_second": 41.061, "eval_steps_per_second": 5.134, "eval_wer": 0.16855156388984696, "step": 43200 }, { "epoch": 1.5702449305908859, "grad_norm": 0.7668557167053223, "learning_rate": 4.5477899857564966e-05, "loss": 0.1842, "step": 43210 }, { "epoch": 1.570608329093684, "grad_norm": 0.7534570693969727, "learning_rate": 4.5474922678469936e-05, "loss": 0.1558, "step": 43220 }, { "epoch": 1.5709717275964823, "grad_norm": 0.9190795421600342, "learning_rate": 4.547194461718334e-05, "loss": 0.1808, "step": 43230 }, { "epoch": 1.5713351260992805, "grad_norm": 0.4574483633041382, "learning_rate": 4.54689656738335e-05, "loss": 0.1146, "step": 43240 }, { "epoch": 1.5716985246020787, "grad_norm": 1.1554951667785645, "learning_rate": 4.5465985848548744e-05, "loss": 0.8771, "step": 43250 }, { "epoch": 1.5720619231048767, "grad_norm": 1.175336480140686, "learning_rate": 4.546300514145748e-05, "loss": 0.1337, "step": 43260 }, { "epoch": 1.5724253216076751, "grad_norm": 0.4004783630371094, "learning_rate": 4.5460023552688136e-05, "loss": 0.1963, "step": 43270 }, { "epoch": 1.572788720110473, "grad_norm": 0.5944772362709045, "learning_rate": 4.5457041082369164e-05, "loss": 0.1223, "step": 43280 }, { "epoch": 1.5731521186132713, "grad_norm": 0.7069734334945679, "learning_rate": 4.545405773062909e-05, "loss": 3.2472, "step": 43290 }, { "epoch": 1.5735155171160695, "grad_norm": 1.0471086502075195, "learning_rate": 4.545107349759644e-05, "loss": 0.1558, "step": 43300 }, { "epoch": 1.5738789156188675, "grad_norm": 0.6987308263778687, "learning_rate": 4.54480883833998e-05, "loss": 0.4641, "step": 43310 }, { "epoch": 1.574242314121666, "grad_norm": 0.599287211894989, "learning_rate": 4.5445102388167785e-05, "loss": 0.1592, "step": 43320 }, { "epoch": 1.574605712624464, "grad_norm": 0.9643434286117554, "learning_rate": 4.544211551202904e-05, "loss": 0.1165, "step": 43330 }, { "epoch": 1.5749691111272621, "grad_norm": 0.5655382871627808, "learning_rate": 4.5439127755112285e-05, "loss": 0.1234, "step": 43340 }, { "epoch": 1.5753325096300603, "grad_norm": 1.7126801013946533, "learning_rate": 4.5436139117546235e-05, "loss": 0.1647, "step": 43350 }, { "epoch": 1.5756959081328585, "grad_norm": 0.6298018097877502, "learning_rate": 4.543314959945966e-05, "loss": 0.1028, "step": 43360 }, { "epoch": 1.5760593066356567, "grad_norm": 0.5706765651702881, "learning_rate": 4.543015920098137e-05, "loss": 0.5641, "step": 43370 }, { "epoch": 1.5764227051384547, "grad_norm": 0.9098716974258423, "learning_rate": 4.542716792224022e-05, "loss": 0.1233, "step": 43380 }, { "epoch": 1.5767861036412532, "grad_norm": 1.0217915773391724, "learning_rate": 4.5424175763365075e-05, "loss": 0.1306, "step": 43390 }, { "epoch": 1.5771495021440511, "grad_norm": 0.651685893535614, "learning_rate": 4.5421182724484866e-05, "loss": 0.1433, "step": 43400 }, { "epoch": 1.5775129006468493, "grad_norm": 0.6281771659851074, "learning_rate": 4.541818880572856e-05, "loss": 0.1313, "step": 43410 }, { "epoch": 1.5778762991496476, "grad_norm": 1.7486456632614136, "learning_rate": 4.541519400722514e-05, "loss": 0.122, "step": 43420 }, { "epoch": 1.5782396976524455, "grad_norm": 1.2109237909317017, "learning_rate": 4.541219832910364e-05, "loss": 0.1297, "step": 43430 }, { "epoch": 1.578603096155244, "grad_norm": 1.041900634765625, "learning_rate": 4.540920177149315e-05, "loss": 0.1014, "step": 43440 }, { "epoch": 1.578966494658042, "grad_norm": 0.7674359083175659, "learning_rate": 4.540620433452277e-05, "loss": 0.1838, "step": 43450 }, { "epoch": 1.5793298931608402, "grad_norm": 1.9548803567886353, "learning_rate": 4.540320601832165e-05, "loss": 0.1345, "step": 43460 }, { "epoch": 1.5796932916636384, "grad_norm": 0.39995163679122925, "learning_rate": 4.540020682301898e-05, "loss": 0.1305, "step": 43470 }, { "epoch": 1.5800566901664364, "grad_norm": 0.9415978789329529, "learning_rate": 4.539720674874398e-05, "loss": 0.123, "step": 43480 }, { "epoch": 1.5804200886692348, "grad_norm": 0.8457926511764526, "learning_rate": 4.539420579562592e-05, "loss": 0.1145, "step": 43490 }, { "epoch": 1.5807834871720328, "grad_norm": 2.9950082302093506, "learning_rate": 4.539120396379409e-05, "loss": 0.1551, "step": 43500 }, { "epoch": 1.581146885674831, "grad_norm": 1.8456460237503052, "learning_rate": 4.5388201253377834e-05, "loss": 0.0885, "step": 43510 }, { "epoch": 1.5815102841776292, "grad_norm": 0.4476306736469269, "learning_rate": 4.538519766450653e-05, "loss": 0.1351, "step": 43520 }, { "epoch": 1.5818736826804274, "grad_norm": 0.7363295555114746, "learning_rate": 4.5382193197309584e-05, "loss": 0.1045, "step": 43530 }, { "epoch": 1.5822370811832256, "grad_norm": 2.1484272480010986, "learning_rate": 4.5379187851916463e-05, "loss": 0.1304, "step": 43540 }, { "epoch": 1.5826004796860236, "grad_norm": 0.5627908706665039, "learning_rate": 4.537618162845664e-05, "loss": 0.1454, "step": 43550 }, { "epoch": 1.582963878188822, "grad_norm": 1.4841351509094238, "learning_rate": 4.537317452705964e-05, "loss": 0.1301, "step": 43560 }, { "epoch": 1.58332727669162, "grad_norm": 0.7127716541290283, "learning_rate": 4.537016654785505e-05, "loss": 0.1608, "step": 43570 }, { "epoch": 1.5836906751944182, "grad_norm": 1.0103297233581543, "learning_rate": 4.536715769097246e-05, "loss": 0.1137, "step": 43580 }, { "epoch": 1.5840540736972164, "grad_norm": 0.8980743288993835, "learning_rate": 4.536414795654151e-05, "loss": 0.1213, "step": 43590 }, { "epoch": 1.5844174722000144, "grad_norm": 0.5678355097770691, "learning_rate": 4.536113734469188e-05, "loss": 0.1253, "step": 43600 }, { "epoch": 1.5847808707028128, "grad_norm": 0.6713634729385376, "learning_rate": 4.535812585555328e-05, "loss": 0.1144, "step": 43610 }, { "epoch": 1.5851442692056108, "grad_norm": 0.4925456643104553, "learning_rate": 4.5355113489255484e-05, "loss": 0.1448, "step": 43620 }, { "epoch": 1.585507667708409, "grad_norm": 1.3464380502700806, "learning_rate": 4.5352100245928267e-05, "loss": 0.1213, "step": 43630 }, { "epoch": 1.5858710662112072, "grad_norm": 1.3755130767822266, "learning_rate": 4.5349086125701456e-05, "loss": 0.1277, "step": 43640 }, { "epoch": 1.5862344647140054, "grad_norm": 1.2649788856506348, "learning_rate": 4.534607112870494e-05, "loss": 0.1379, "step": 43650 }, { "epoch": 1.5865978632168036, "grad_norm": 0.6860102415084839, "learning_rate": 4.53430552550686e-05, "loss": 0.1209, "step": 43660 }, { "epoch": 1.5869612617196016, "grad_norm": 0.9149149656295776, "learning_rate": 4.534003850492239e-05, "loss": 0.158, "step": 43670 }, { "epoch": 1.5873246602224, "grad_norm": 1.1880120038986206, "learning_rate": 4.53370208783963e-05, "loss": 0.1283, "step": 43680 }, { "epoch": 1.587688058725198, "grad_norm": 2.6330199241638184, "learning_rate": 4.533400237562033e-05, "loss": 0.1414, "step": 43690 }, { "epoch": 1.5880514572279962, "grad_norm": 0.7637589573860168, "learning_rate": 4.533098299672455e-05, "loss": 0.1267, "step": 43700 }, { "epoch": 1.5884148557307944, "grad_norm": 1.7144758701324463, "learning_rate": 4.5327962741839044e-05, "loss": 0.1222, "step": 43710 }, { "epoch": 1.5887782542335924, "grad_norm": 1.0269776582717896, "learning_rate": 4.532494161109396e-05, "loss": 0.1862, "step": 43720 }, { "epoch": 1.5891416527363909, "grad_norm": 0.8622583746910095, "learning_rate": 4.532191960461946e-05, "loss": 0.1894, "step": 43730 }, { "epoch": 1.5895050512391888, "grad_norm": 1.0310677289962769, "learning_rate": 4.531889672254575e-05, "loss": 0.1284, "step": 43740 }, { "epoch": 1.589868449741987, "grad_norm": 2.753690242767334, "learning_rate": 4.531587296500306e-05, "loss": 0.1404, "step": 43750 }, { "epoch": 1.5902318482447853, "grad_norm": 0.5997269749641418, "learning_rate": 4.53128483321217e-05, "loss": 0.1119, "step": 43760 }, { "epoch": 1.5905952467475832, "grad_norm": 0.8589096665382385, "learning_rate": 4.5309822824031976e-05, "loss": 0.1319, "step": 43770 }, { "epoch": 1.5909586452503817, "grad_norm": 0.7129044532775879, "learning_rate": 4.530679644086425e-05, "loss": 0.2389, "step": 43780 }, { "epoch": 1.5913220437531796, "grad_norm": 0.6947050094604492, "learning_rate": 4.530376918274892e-05, "loss": 0.115, "step": 43790 }, { "epoch": 1.5916854422559779, "grad_norm": 0.9983404278755188, "learning_rate": 4.530074104981641e-05, "loss": 0.1446, "step": 43800 }, { "epoch": 1.5916854422559779, "eval_loss": 0.3569597005844116, "eval_runtime": 180.6536, "eval_samples_per_second": 41.04, "eval_steps_per_second": 5.131, "eval_wer": 0.17362535625465172, "step": 43800 }, { "epoch": 1.592048840758776, "grad_norm": 0.712482750415802, "learning_rate": 4.529771204219721e-05, "loss": 0.1434, "step": 43810 }, { "epoch": 1.5924122392615743, "grad_norm": 0.5298041105270386, "learning_rate": 4.5294682160021806e-05, "loss": 0.1771, "step": 43820 }, { "epoch": 1.5927756377643725, "grad_norm": 1.337560772895813, "learning_rate": 4.529165140342076e-05, "loss": 0.1144, "step": 43830 }, { "epoch": 1.5931390362671705, "grad_norm": 0.5129504203796387, "learning_rate": 4.5288619772524654e-05, "loss": 0.1001, "step": 43840 }, { "epoch": 1.593502434769969, "grad_norm": 0.7407031059265137, "learning_rate": 4.528558726746411e-05, "loss": 0.1302, "step": 43850 }, { "epoch": 1.5938658332727669, "grad_norm": 0.9279839992523193, "learning_rate": 4.5282553888369785e-05, "loss": 0.1452, "step": 43860 }, { "epoch": 1.594229231775565, "grad_norm": 0.5245470404624939, "learning_rate": 4.5279519635372374e-05, "loss": 0.1756, "step": 43870 }, { "epoch": 1.5945926302783633, "grad_norm": 0.6099745631217957, "learning_rate": 4.527648450860262e-05, "loss": 0.2019, "step": 43880 }, { "epoch": 1.5949560287811613, "grad_norm": 0.9615786075592041, "learning_rate": 4.52734485081913e-05, "loss": 0.1252, "step": 43890 }, { "epoch": 1.5953194272839597, "grad_norm": 1.52881920337677, "learning_rate": 4.527041163426921e-05, "loss": 1.8751, "step": 43900 }, { "epoch": 1.5956828257867577, "grad_norm": 0.8344588875770569, "learning_rate": 4.526737388696721e-05, "loss": 0.129, "step": 43910 }, { "epoch": 1.596046224289556, "grad_norm": 0.5732100605964661, "learning_rate": 4.526433526641617e-05, "loss": 0.1475, "step": 43920 }, { "epoch": 1.596409622792354, "grad_norm": 0.8947811722755432, "learning_rate": 4.526129577274704e-05, "loss": 0.4153, "step": 43930 }, { "epoch": 1.5967730212951523, "grad_norm": 1.6199461221694946, "learning_rate": 4.5258255406090746e-05, "loss": 0.1379, "step": 43940 }, { "epoch": 1.5971364197979505, "grad_norm": 1.3465640544891357, "learning_rate": 4.525521416657832e-05, "loss": 0.1515, "step": 43950 }, { "epoch": 1.5974998183007485, "grad_norm": 1.7875219583511353, "learning_rate": 4.525217205434078e-05, "loss": 0.1119, "step": 43960 }, { "epoch": 1.597863216803547, "grad_norm": 0.5457040071487427, "learning_rate": 4.52491290695092e-05, "loss": 0.1499, "step": 43970 }, { "epoch": 1.598226615306345, "grad_norm": 1.2962692975997925, "learning_rate": 4.52460852122147e-05, "loss": 0.1311, "step": 43980 }, { "epoch": 1.5985900138091431, "grad_norm": 0.679913341999054, "learning_rate": 4.5243040482588426e-05, "loss": 0.1298, "step": 43990 }, { "epoch": 1.5989534123119413, "grad_norm": 1.5390740633010864, "learning_rate": 4.523999488076156e-05, "loss": 0.1483, "step": 44000 }, { "epoch": 1.5993168108147393, "grad_norm": 3.566751003265381, "learning_rate": 4.523694840686532e-05, "loss": 0.1303, "step": 44010 }, { "epoch": 1.5996802093175377, "grad_norm": 0.7023512125015259, "learning_rate": 4.5233901061030984e-05, "loss": 0.1305, "step": 44020 }, { "epoch": 1.6000436078203357, "grad_norm": 1.47295343875885, "learning_rate": 4.523085284338985e-05, "loss": 0.1173, "step": 44030 }, { "epoch": 1.600407006323134, "grad_norm": 0.7622318863868713, "learning_rate": 4.522780375407324e-05, "loss": 0.1494, "step": 44040 }, { "epoch": 1.6007704048259321, "grad_norm": 2.0168585777282715, "learning_rate": 4.522475379321254e-05, "loss": 0.1575, "step": 44050 }, { "epoch": 1.6011338033287301, "grad_norm": 0.9191824793815613, "learning_rate": 4.522170296093916e-05, "loss": 0.1111, "step": 44060 }, { "epoch": 1.6014972018315285, "grad_norm": 0.5007340908050537, "learning_rate": 4.521865125738455e-05, "loss": 0.193, "step": 44070 }, { "epoch": 1.6018606003343265, "grad_norm": 0.8389549851417542, "learning_rate": 4.5215598682680186e-05, "loss": 0.1227, "step": 44080 }, { "epoch": 1.6022239988371247, "grad_norm": 0.7387205362319946, "learning_rate": 4.521254523695761e-05, "loss": 0.2035, "step": 44090 }, { "epoch": 1.602587397339923, "grad_norm": 1.1978685855865479, "learning_rate": 4.520949092034837e-05, "loss": 0.1739, "step": 44100 }, { "epoch": 1.6029507958427212, "grad_norm": 1.9989899396896362, "learning_rate": 4.5206435732984085e-05, "loss": 0.1285, "step": 44110 }, { "epoch": 1.6033141943455194, "grad_norm": 0.6451914310455322, "learning_rate": 4.5203379674996365e-05, "loss": 0.1466, "step": 44120 }, { "epoch": 1.6036775928483173, "grad_norm": 0.6689841747283936, "learning_rate": 4.5200322746516904e-05, "loss": 0.113, "step": 44130 }, { "epoch": 1.6040409913511158, "grad_norm": 1.1558260917663574, "learning_rate": 4.519726494767741e-05, "loss": 0.1005, "step": 44140 }, { "epoch": 1.6044043898539138, "grad_norm": 13.844839096069336, "learning_rate": 4.519420627860963e-05, "loss": 0.1279, "step": 44150 }, { "epoch": 1.604767788356712, "grad_norm": 0.6856222152709961, "learning_rate": 4.519114673944536e-05, "loss": 0.1147, "step": 44160 }, { "epoch": 1.6051311868595102, "grad_norm": 0.7829769253730774, "learning_rate": 4.5188086330316405e-05, "loss": 0.1336, "step": 44170 }, { "epoch": 1.6054945853623082, "grad_norm": 1.3698971271514893, "learning_rate": 4.518502505135465e-05, "loss": 0.1158, "step": 44180 }, { "epoch": 1.6058579838651066, "grad_norm": 1.3197015523910522, "learning_rate": 4.5181962902691975e-05, "loss": 0.1293, "step": 44190 }, { "epoch": 1.6062213823679046, "grad_norm": 0.8092926740646362, "learning_rate": 4.517889988446033e-05, "loss": 0.1466, "step": 44200 }, { "epoch": 1.6065847808707028, "grad_norm": 2.015113115310669, "learning_rate": 4.5175835996791684e-05, "loss": 0.1228, "step": 44210 }, { "epoch": 1.606948179373501, "grad_norm": 1.2220087051391602, "learning_rate": 4.5172771239818056e-05, "loss": 0.199, "step": 44220 }, { "epoch": 1.6073115778762992, "grad_norm": 0.5432813167572021, "learning_rate": 4.516970561367149e-05, "loss": 0.1453, "step": 44230 }, { "epoch": 1.6076749763790974, "grad_norm": 0.6337705850601196, "learning_rate": 4.516663911848407e-05, "loss": 0.1257, "step": 44240 }, { "epoch": 1.6080383748818954, "grad_norm": 0.6741940379142761, "learning_rate": 4.5163571754387915e-05, "loss": 0.1062, "step": 44250 }, { "epoch": 1.6084017733846938, "grad_norm": 2.3033409118652344, "learning_rate": 4.516050352151521e-05, "loss": 0.1452, "step": 44260 }, { "epoch": 1.6087651718874918, "grad_norm": 0.4420888125896454, "learning_rate": 4.515743441999814e-05, "loss": 0.1358, "step": 44270 }, { "epoch": 1.60912857039029, "grad_norm": 1.5571812391281128, "learning_rate": 4.515436444996893e-05, "loss": 0.1102, "step": 44280 }, { "epoch": 1.6094919688930882, "grad_norm": 1.084507703781128, "learning_rate": 4.5151293611559865e-05, "loss": 0.1099, "step": 44290 }, { "epoch": 1.6098553673958862, "grad_norm": 0.7025009989738464, "learning_rate": 4.514822190490327e-05, "loss": 0.2296, "step": 44300 }, { "epoch": 1.6102187658986846, "grad_norm": 2.125432252883911, "learning_rate": 4.514514933013147e-05, "loss": 0.1189, "step": 44310 }, { "epoch": 1.6105821644014826, "grad_norm": 0.47693368792533875, "learning_rate": 4.5142075887376856e-05, "loss": 0.1488, "step": 44320 }, { "epoch": 1.6109455629042808, "grad_norm": 0.7935511469841003, "learning_rate": 4.5139001576771865e-05, "loss": 1.8833, "step": 44330 }, { "epoch": 1.611308961407079, "grad_norm": 0.6441402435302734, "learning_rate": 4.513592639844896e-05, "loss": 0.1173, "step": 44340 }, { "epoch": 1.611672359909877, "grad_norm": 1.3646268844604492, "learning_rate": 4.513285035254062e-05, "loss": 0.1171, "step": 44350 }, { "epoch": 1.6120357584126754, "grad_norm": 1.0334749221801758, "learning_rate": 4.512977343917939e-05, "loss": 0.1069, "step": 44360 }, { "epoch": 1.6123991569154734, "grad_norm": 0.3879293203353882, "learning_rate": 4.5126695658497856e-05, "loss": 0.1244, "step": 44370 }, { "epoch": 1.6127625554182716, "grad_norm": 0.6635248064994812, "learning_rate": 4.5123617010628606e-05, "loss": 0.1102, "step": 44380 }, { "epoch": 1.6131259539210698, "grad_norm": 0.8040985465049744, "learning_rate": 4.51205374957043e-05, "loss": 0.1455, "step": 44390 }, { "epoch": 1.613489352423868, "grad_norm": 0.5279836654663086, "learning_rate": 4.511745711385763e-05, "loss": 0.1547, "step": 44400 }, { "epoch": 1.613489352423868, "eval_loss": 0.34678882360458374, "eval_runtime": 179.7828, "eval_samples_per_second": 41.239, "eval_steps_per_second": 5.156, "eval_wer": 0.17216403144117487, "step": 44400 }, { "epoch": 1.6138527509266662, "grad_norm": 1.1375586986541748, "learning_rate": 4.51143758652213e-05, "loss": 0.14, "step": 44410 }, { "epoch": 1.6142161494294642, "grad_norm": 1.5960606336593628, "learning_rate": 4.511129374992809e-05, "loss": 0.1336, "step": 44420 }, { "epoch": 1.6145795479322627, "grad_norm": 0.5347716808319092, "learning_rate": 4.5108210768110785e-05, "loss": 0.1083, "step": 44430 }, { "epoch": 1.6149429464350606, "grad_norm": 1.816926121711731, "learning_rate": 4.510512691990222e-05, "loss": 0.1122, "step": 44440 }, { "epoch": 1.6153063449378589, "grad_norm": 1.2517473697662354, "learning_rate": 4.510204220543528e-05, "loss": 0.144, "step": 44450 }, { "epoch": 1.615669743440657, "grad_norm": 1.0830953121185303, "learning_rate": 4.509895662484286e-05, "loss": 0.1851, "step": 44460 }, { "epoch": 1.616033141943455, "grad_norm": 0.45219525694847107, "learning_rate": 4.50958701782579e-05, "loss": 0.1589, "step": 44470 }, { "epoch": 1.6163965404462535, "grad_norm": 0.940949559211731, "learning_rate": 4.509278286581341e-05, "loss": 0.113, "step": 44480 }, { "epoch": 1.6167599389490515, "grad_norm": 0.7262178659439087, "learning_rate": 4.5089694687642394e-05, "loss": 0.1294, "step": 44490 }, { "epoch": 1.6171233374518497, "grad_norm": 0.8851106762886047, "learning_rate": 4.508660564387791e-05, "loss": 0.1563, "step": 44500 }, { "epoch": 1.6174867359546479, "grad_norm": 1.4259148836135864, "learning_rate": 4.508351573465306e-05, "loss": 0.1298, "step": 44510 }, { "epoch": 1.617850134457446, "grad_norm": 1.7158180475234985, "learning_rate": 4.508042496010098e-05, "loss": 0.197, "step": 44520 }, { "epoch": 1.6182135329602443, "grad_norm": 1.1961179971694946, "learning_rate": 4.507733332035482e-05, "loss": 2.6746, "step": 44530 }, { "epoch": 1.6185769314630423, "grad_norm": 1.0735702514648438, "learning_rate": 4.507424081554782e-05, "loss": 0.1132, "step": 44540 }, { "epoch": 1.6189403299658407, "grad_norm": 0.8479132056236267, "learning_rate": 4.507114744581319e-05, "loss": 0.1411, "step": 44550 }, { "epoch": 1.6193037284686387, "grad_norm": 0.804205596446991, "learning_rate": 4.506805321128424e-05, "loss": 0.1301, "step": 44560 }, { "epoch": 1.6196671269714369, "grad_norm": 0.4933542013168335, "learning_rate": 4.506495811209428e-05, "loss": 0.1765, "step": 44570 }, { "epoch": 1.620030525474235, "grad_norm": 1.0244536399841309, "learning_rate": 4.506186214837666e-05, "loss": 0.131, "step": 44580 }, { "epoch": 1.620393923977033, "grad_norm": 0.4374043941497803, "learning_rate": 4.5058765320264784e-05, "loss": 0.102, "step": 44590 }, { "epoch": 1.6207573224798315, "grad_norm": 0.5329868197441101, "learning_rate": 4.505566762789208e-05, "loss": 0.1168, "step": 44600 }, { "epoch": 1.6211207209826295, "grad_norm": 0.9576613306999207, "learning_rate": 4.5052569071392014e-05, "loss": 0.0948, "step": 44610 }, { "epoch": 1.6214841194854277, "grad_norm": 0.3620557188987732, "learning_rate": 4.50494696508981e-05, "loss": 0.1693, "step": 44620 }, { "epoch": 1.621847517988226, "grad_norm": 119.01215362548828, "learning_rate": 4.504636936654387e-05, "loss": 2.0014, "step": 44630 }, { "epoch": 1.622210916491024, "grad_norm": 0.491005003452301, "learning_rate": 4.504326821846291e-05, "loss": 0.0958, "step": 44640 }, { "epoch": 1.6225743149938223, "grad_norm": 0.8035761713981628, "learning_rate": 4.504016620678883e-05, "loss": 0.6652, "step": 44650 }, { "epoch": 1.6229377134966203, "grad_norm": 1.4501937627792358, "learning_rate": 4.5037063331655305e-05, "loss": 0.1282, "step": 44660 }, { "epoch": 1.6233011119994185, "grad_norm": 0.3285962641239166, "learning_rate": 4.503395959319601e-05, "loss": 0.6589, "step": 44670 }, { "epoch": 1.6236645105022167, "grad_norm": 1.3429205417633057, "learning_rate": 4.5030854991544666e-05, "loss": 0.1224, "step": 44680 }, { "epoch": 1.624027909005015, "grad_norm": 0.6868845224380493, "learning_rate": 4.502774952683506e-05, "loss": 0.1229, "step": 44690 }, { "epoch": 1.6243913075078131, "grad_norm": 0.7645006775856018, "learning_rate": 4.502464319920099e-05, "loss": 0.1587, "step": 44700 }, { "epoch": 1.6247547060106111, "grad_norm": 1.2401680946350098, "learning_rate": 4.502153600877628e-05, "loss": 0.1274, "step": 44710 }, { "epoch": 1.6251181045134095, "grad_norm": 0.4394826292991638, "learning_rate": 4.501842795569483e-05, "loss": 0.1434, "step": 44720 }, { "epoch": 1.6254815030162075, "grad_norm": 0.5105617046356201, "learning_rate": 4.5015319040090545e-05, "loss": 0.1089, "step": 44730 }, { "epoch": 1.6258449015190057, "grad_norm": 1.5043278932571411, "learning_rate": 4.5012209262097365e-05, "loss": 0.1391, "step": 44740 }, { "epoch": 1.626208300021804, "grad_norm": 0.8561335802078247, "learning_rate": 4.5009098621849296e-05, "loss": 0.2735, "step": 44750 }, { "epoch": 1.626571698524602, "grad_norm": 1.71244478225708, "learning_rate": 4.500598711948037e-05, "loss": 0.1855, "step": 44760 }, { "epoch": 1.6269350970274004, "grad_norm": 0.6392226815223694, "learning_rate": 4.500287475512463e-05, "loss": 0.1675, "step": 44770 }, { "epoch": 1.6272984955301983, "grad_norm": 0.9670777916908264, "learning_rate": 4.4999761528916194e-05, "loss": 0.1201, "step": 44780 }, { "epoch": 1.6276618940329965, "grad_norm": 0.6879392862319946, "learning_rate": 4.4996647440989195e-05, "loss": 0.15, "step": 44790 }, { "epoch": 1.6280252925357948, "grad_norm": 1.038004994392395, "learning_rate": 4.49935324914778e-05, "loss": 0.148, "step": 44800 }, { "epoch": 1.628388691038593, "grad_norm": 1.1731406450271606, "learning_rate": 4.499041668051624e-05, "loss": 0.1225, "step": 44810 }, { "epoch": 1.6287520895413912, "grad_norm": 1.0449947118759155, "learning_rate": 4.498730000823873e-05, "loss": 0.1348, "step": 44820 }, { "epoch": 1.6291154880441892, "grad_norm": 0.7107880115509033, "learning_rate": 4.498418247477959e-05, "loss": 0.1185, "step": 44830 }, { "epoch": 1.6294788865469876, "grad_norm": 0.9275081157684326, "learning_rate": 4.498106408027313e-05, "loss": 0.1405, "step": 44840 }, { "epoch": 1.6298422850497856, "grad_norm": 1.5348129272460938, "learning_rate": 4.497794482485371e-05, "loss": 0.1401, "step": 44850 }, { "epoch": 1.6302056835525838, "grad_norm": 1.6144418716430664, "learning_rate": 4.497482470865574e-05, "loss": 0.1191, "step": 44860 }, { "epoch": 1.630569082055382, "grad_norm": 1.1674468517303467, "learning_rate": 4.497170373181363e-05, "loss": 0.7629, "step": 44870 }, { "epoch": 1.63093248055818, "grad_norm": 0.9818703532218933, "learning_rate": 4.496858189446187e-05, "loss": 0.1275, "step": 44880 }, { "epoch": 1.6312958790609784, "grad_norm": 8.3660249710083, "learning_rate": 4.496545919673496e-05, "loss": 0.1166, "step": 44890 }, { "epoch": 1.6316592775637764, "grad_norm": 0.7371792793273926, "learning_rate": 4.496233563876746e-05, "loss": 0.1559, "step": 44900 }, { "epoch": 1.6320226760665746, "grad_norm": 0.9537221789360046, "learning_rate": 4.4959211220693945e-05, "loss": 0.125, "step": 44910 }, { "epoch": 1.6323860745693728, "grad_norm": 0.9887855648994446, "learning_rate": 4.495608594264902e-05, "loss": 0.1406, "step": 44920 }, { "epoch": 1.632749473072171, "grad_norm": 0.7480888962745667, "learning_rate": 4.495295980476737e-05, "loss": 0.1227, "step": 44930 }, { "epoch": 1.6331128715749692, "grad_norm": 0.784050703048706, "learning_rate": 4.494983280718367e-05, "loss": 0.1068, "step": 44940 }, { "epoch": 1.6334762700777672, "grad_norm": 2.8426759243011475, "learning_rate": 4.494670495003265e-05, "loss": 0.1327, "step": 44950 }, { "epoch": 1.6338396685805654, "grad_norm": 3.448587417602539, "learning_rate": 4.494357623344909e-05, "loss": 0.142, "step": 44960 }, { "epoch": 1.6342030670833636, "grad_norm": 0.669575572013855, "learning_rate": 4.4940446657567784e-05, "loss": 0.1398, "step": 44970 }, { "epoch": 1.6345664655861618, "grad_norm": 1.1868761777877808, "learning_rate": 4.493731622252358e-05, "loss": 0.117, "step": 44980 }, { "epoch": 1.63492986408896, "grad_norm": 0.8725171685218811, "learning_rate": 4.4934184928451364e-05, "loss": 0.0914, "step": 44990 }, { "epoch": 1.635293262591758, "grad_norm": 1.300013780593872, "learning_rate": 4.493105277548605e-05, "loss": 0.1761, "step": 45000 }, { "epoch": 1.635293262591758, "eval_loss": 0.3556674122810364, "eval_runtime": 180.1231, "eval_samples_per_second": 41.161, "eval_steps_per_second": 5.146, "eval_wer": 0.17496868589685408, "step": 45000 }, { "epoch": 1.6356566610945564, "grad_norm": 2.436525583267212, "learning_rate": 4.4927919763762574e-05, "loss": 0.1164, "step": 45010 }, { "epoch": 1.6360200595973544, "grad_norm": 1.3236192464828491, "learning_rate": 4.492478589341594e-05, "loss": 0.1496, "step": 45020 }, { "epoch": 1.6363834581001526, "grad_norm": 0.6237584948539734, "learning_rate": 4.4921651164581185e-05, "loss": 0.1449, "step": 45030 }, { "epoch": 1.6367468566029508, "grad_norm": 0.5335447192192078, "learning_rate": 4.491851557739336e-05, "loss": 0.1349, "step": 45040 }, { "epoch": 1.6371102551057488, "grad_norm": 1.4458340406417847, "learning_rate": 4.491537913198757e-05, "loss": 0.1994, "step": 45050 }, { "epoch": 1.6374736536085472, "grad_norm": 1.4140558242797852, "learning_rate": 4.4912241828498944e-05, "loss": 0.1432, "step": 45060 }, { "epoch": 1.6378370521113452, "grad_norm": 1.127317190170288, "learning_rate": 4.4909103667062666e-05, "loss": 0.1932, "step": 45070 }, { "epoch": 1.6382004506141434, "grad_norm": 3.4496073722839355, "learning_rate": 4.490596464781395e-05, "loss": 0.1057, "step": 45080 }, { "epoch": 1.6385638491169416, "grad_norm": 0.663720428943634, "learning_rate": 4.490282477088805e-05, "loss": 0.1081, "step": 45090 }, { "epoch": 1.6389272476197398, "grad_norm": 0.8442180156707764, "learning_rate": 4.4899684036420244e-05, "loss": 0.1354, "step": 45100 }, { "epoch": 1.639290646122538, "grad_norm": 1.3163623809814453, "learning_rate": 4.489654244454585e-05, "loss": 0.1247, "step": 45110 }, { "epoch": 1.639654044625336, "grad_norm": 0.639021635055542, "learning_rate": 4.489339999540023e-05, "loss": 0.1598, "step": 45120 }, { "epoch": 1.6400174431281345, "grad_norm": 0.549207329750061, "learning_rate": 4.489025668911879e-05, "loss": 0.1157, "step": 45130 }, { "epoch": 1.6403808416309325, "grad_norm": 3.4274439811706543, "learning_rate": 4.488711252583696e-05, "loss": 0.1635, "step": 45140 }, { "epoch": 1.6407442401337307, "grad_norm": 3.244072914123535, "learning_rate": 4.488396750569022e-05, "loss": 0.1216, "step": 45150 }, { "epoch": 1.6411076386365289, "grad_norm": 1.9557846784591675, "learning_rate": 4.4880821628814054e-05, "loss": 0.1197, "step": 45160 }, { "epoch": 1.6414710371393268, "grad_norm": 0.8653383851051331, "learning_rate": 4.487767489534402e-05, "loss": 0.1929, "step": 45170 }, { "epoch": 1.6418344356421253, "grad_norm": 0.9569295048713684, "learning_rate": 4.4874527305415706e-05, "loss": 0.1064, "step": 45180 }, { "epoch": 1.6421978341449233, "grad_norm": 0.9595149159431458, "learning_rate": 4.487137885916473e-05, "loss": 0.1087, "step": 45190 }, { "epoch": 1.6425612326477215, "grad_norm": 1.06610906124115, "learning_rate": 4.486822955672673e-05, "loss": 0.1126, "step": 45200 }, { "epoch": 1.6429246311505197, "grad_norm": 0.5096926689147949, "learning_rate": 4.4865079398237407e-05, "loss": 0.1175, "step": 45210 }, { "epoch": 1.6432880296533179, "grad_norm": 0.6575452089309692, "learning_rate": 4.48619283838325e-05, "loss": 0.2033, "step": 45220 }, { "epoch": 1.643651428156116, "grad_norm": 1.5489494800567627, "learning_rate": 4.485877651364777e-05, "loss": 0.1283, "step": 45230 }, { "epoch": 1.644014826658914, "grad_norm": 1.2202279567718506, "learning_rate": 4.485562378781901e-05, "loss": 0.129, "step": 45240 }, { "epoch": 1.6443782251617123, "grad_norm": 2.960289716720581, "learning_rate": 4.485247020648208e-05, "loss": 0.1547, "step": 45250 }, { "epoch": 1.6447416236645105, "grad_norm": 1.178314447402954, "learning_rate": 4.4849315769772835e-05, "loss": 0.1424, "step": 45260 }, { "epoch": 1.6451050221673087, "grad_norm": 0.47237566113471985, "learning_rate": 4.484616047782719e-05, "loss": 0.1003, "step": 45270 }, { "epoch": 1.645468420670107, "grad_norm": 0.7487808465957642, "learning_rate": 4.484300433078112e-05, "loss": 1.3485, "step": 45280 }, { "epoch": 1.6458318191729049, "grad_norm": 1.5242539644241333, "learning_rate": 4.483984732877059e-05, "loss": 0.1301, "step": 45290 }, { "epoch": 1.6461952176757033, "grad_norm": 1.1392406225204468, "learning_rate": 4.4836689471931624e-05, "loss": 0.1385, "step": 45300 }, { "epoch": 1.6465586161785013, "grad_norm": 0.856468677520752, "learning_rate": 4.483353076040029e-05, "loss": 0.1029, "step": 45310 }, { "epoch": 1.6469220146812995, "grad_norm": 0.414461225271225, "learning_rate": 4.483037119431268e-05, "loss": 0.1697, "step": 45320 }, { "epoch": 1.6472854131840977, "grad_norm": 1.7020654678344727, "learning_rate": 4.482721077380494e-05, "loss": 0.1102, "step": 45330 }, { "epoch": 1.6476488116868957, "grad_norm": 0.9631456136703491, "learning_rate": 4.482404949901323e-05, "loss": 0.1193, "step": 45340 }, { "epoch": 1.6480122101896941, "grad_norm": 0.5286620855331421, "learning_rate": 4.482088737007376e-05, "loss": 0.628, "step": 45350 }, { "epoch": 1.6483756086924921, "grad_norm": 1.0761183500289917, "learning_rate": 4.481772438712277e-05, "loss": 0.1131, "step": 45360 }, { "epoch": 1.6487390071952903, "grad_norm": 0.46266233921051025, "learning_rate": 4.481456055029656e-05, "loss": 0.1641, "step": 45370 }, { "epoch": 1.6491024056980885, "grad_norm": 1.1350431442260742, "learning_rate": 4.481139585973142e-05, "loss": 0.122, "step": 45380 }, { "epoch": 1.6494658042008867, "grad_norm": 4.3756632804870605, "learning_rate": 4.4808230315563735e-05, "loss": 0.1424, "step": 45390 }, { "epoch": 1.649829202703685, "grad_norm": 1.386616826057434, "learning_rate": 4.480506391792988e-05, "loss": 0.1579, "step": 45400 }, { "epoch": 1.650192601206483, "grad_norm": 0.6531800031661987, "learning_rate": 4.480189666696629e-05, "loss": 0.1275, "step": 45410 }, { "epoch": 1.6505559997092814, "grad_norm": 1.8200130462646484, "learning_rate": 4.479872856280942e-05, "loss": 0.1943, "step": 45420 }, { "epoch": 1.6509193982120793, "grad_norm": 0.6366170048713684, "learning_rate": 4.47955596055958e-05, "loss": 0.1172, "step": 45430 }, { "epoch": 1.6512827967148775, "grad_norm": 0.8036410808563232, "learning_rate": 4.479238979546193e-05, "loss": 0.1181, "step": 45440 }, { "epoch": 1.6516461952176757, "grad_norm": 0.7934151291847229, "learning_rate": 4.47892191325444e-05, "loss": 0.1319, "step": 45450 }, { "epoch": 1.6520095937204737, "grad_norm": 1.5175780057907104, "learning_rate": 4.4786047616979845e-05, "loss": 0.1328, "step": 45460 }, { "epoch": 1.6523729922232722, "grad_norm": 31.52168083190918, "learning_rate": 4.478287524890489e-05, "loss": 0.4917, "step": 45470 }, { "epoch": 1.6527363907260701, "grad_norm": 0.8360010981559753, "learning_rate": 4.477970202845623e-05, "loss": 0.1071, "step": 45480 }, { "epoch": 1.6530997892288684, "grad_norm": 0.5976376533508301, "learning_rate": 4.4776527955770586e-05, "loss": 0.1098, "step": 45490 }, { "epoch": 1.6534631877316666, "grad_norm": 0.779091477394104, "learning_rate": 4.4773353030984715e-05, "loss": 0.1621, "step": 45500 }, { "epoch": 1.6538265862344648, "grad_norm": 0.7147294282913208, "learning_rate": 4.477017725423542e-05, "loss": 0.2085, "step": 45510 }, { "epoch": 1.654189984737263, "grad_norm": 1.0562430620193481, "learning_rate": 4.4767000625659525e-05, "loss": 0.1489, "step": 45520 }, { "epoch": 1.654553383240061, "grad_norm": 0.6119662523269653, "learning_rate": 4.4763823145393906e-05, "loss": 0.0975, "step": 45530 }, { "epoch": 1.6549167817428592, "grad_norm": 2.1033360958099365, "learning_rate": 4.476064481357547e-05, "loss": 0.1151, "step": 45540 }, { "epoch": 1.6552801802456574, "grad_norm": 0.5644105672836304, "learning_rate": 4.4757465630341154e-05, "loss": 0.1479, "step": 45550 }, { "epoch": 1.6556435787484556, "grad_norm": 1.2466843128204346, "learning_rate": 4.475428559582794e-05, "loss": 0.1047, "step": 45560 }, { "epoch": 1.6560069772512538, "grad_norm": 0.6210132241249084, "learning_rate": 4.475110471017285e-05, "loss": 0.1703, "step": 45570 }, { "epoch": 1.6563703757540518, "grad_norm": 0.5731077194213867, "learning_rate": 4.474792297351293e-05, "loss": 0.1154, "step": 45580 }, { "epoch": 1.6567337742568502, "grad_norm": 1.2748225927352905, "learning_rate": 4.474474038598527e-05, "loss": 0.1106, "step": 45590 }, { "epoch": 1.6570971727596482, "grad_norm": 1.0353822708129883, "learning_rate": 4.4741556947727e-05, "loss": 0.1453, "step": 45600 }, { "epoch": 1.6570971727596482, "eval_loss": 0.35429847240448, "eval_runtime": 180.0899, "eval_samples_per_second": 41.168, "eval_steps_per_second": 5.147, "eval_wer": 0.16677255976909253, "step": 45600 }, { "epoch": 1.6574605712624464, "grad_norm": 0.8197756409645081, "learning_rate": 4.4738372658875286e-05, "loss": 0.6064, "step": 45610 }, { "epoch": 1.6578239697652446, "grad_norm": 1.9029946327209473, "learning_rate": 4.473518751956732e-05, "loss": 0.122, "step": 45620 }, { "epoch": 1.6581873682680426, "grad_norm": 0.897566020488739, "learning_rate": 4.473200152994035e-05, "loss": 0.5106, "step": 45630 }, { "epoch": 1.658550766770841, "grad_norm": 0.448548823595047, "learning_rate": 4.472881469013163e-05, "loss": 0.1316, "step": 45640 }, { "epoch": 1.658914165273639, "grad_norm": 0.9315693974494934, "learning_rate": 4.472562700027849e-05, "loss": 0.1616, "step": 45650 }, { "epoch": 1.6592775637764372, "grad_norm": 0.6731955409049988, "learning_rate": 4.4722438460518255e-05, "loss": 0.1138, "step": 45660 }, { "epoch": 1.6596409622792354, "grad_norm": 0.949320375919342, "learning_rate": 4.4719249070988325e-05, "loss": 0.1464, "step": 45670 }, { "epoch": 1.6600043607820336, "grad_norm": 1.0242235660552979, "learning_rate": 4.471605883182611e-05, "loss": 0.1135, "step": 45680 }, { "epoch": 1.6603677592848318, "grad_norm": 2.5394222736358643, "learning_rate": 4.471318689025813e-05, "loss": 1.5802, "step": 45690 }, { "epoch": 1.6607311577876298, "grad_norm": 0.5729508996009827, "learning_rate": 4.4709995037173305e-05, "loss": 0.44, "step": 45700 }, { "epoch": 1.6610945562904282, "grad_norm": 1.372788906097412, "learning_rate": 4.470680233485492e-05, "loss": 0.1286, "step": 45710 }, { "epoch": 1.6614579547932262, "grad_norm": 1.5759491920471191, "learning_rate": 4.470360878344055e-05, "loss": 0.1221, "step": 45720 }, { "epoch": 1.6618213532960244, "grad_norm": 1.0494245290756226, "learning_rate": 4.470041438306778e-05, "loss": 0.126, "step": 45730 }, { "epoch": 1.6621847517988226, "grad_norm": 0.469928115606308, "learning_rate": 4.469721913387424e-05, "loss": 0.1295, "step": 45740 }, { "epoch": 1.6625481503016206, "grad_norm": 0.9547176361083984, "learning_rate": 4.469402303599761e-05, "loss": 0.1724, "step": 45750 }, { "epoch": 1.662911548804419, "grad_norm": 0.5945098400115967, "learning_rate": 4.469082608957561e-05, "loss": 0.1282, "step": 45760 }, { "epoch": 1.663274947307217, "grad_norm": 0.8782799243927002, "learning_rate": 4.468762829474597e-05, "loss": 0.1594, "step": 45770 }, { "epoch": 1.6636383458100152, "grad_norm": 0.8542808294296265, "learning_rate": 4.4684429651646476e-05, "loss": 0.1147, "step": 45780 }, { "epoch": 1.6640017443128134, "grad_norm": 1.189684271812439, "learning_rate": 4.4681230160414946e-05, "loss": 0.1348, "step": 45790 }, { "epoch": 1.6643651428156117, "grad_norm": 0.9197025895118713, "learning_rate": 4.467802982118923e-05, "loss": 0.1528, "step": 45800 }, { "epoch": 1.6647285413184099, "grad_norm": 0.5935563445091248, "learning_rate": 4.4674828634107226e-05, "loss": 0.1356, "step": 45810 }, { "epoch": 1.6650919398212078, "grad_norm": 0.7441408038139343, "learning_rate": 4.467162659930686e-05, "loss": 0.1553, "step": 45820 }, { "epoch": 1.665455338324006, "grad_norm": 0.5700821280479431, "learning_rate": 4.466842371692609e-05, "loss": 0.1206, "step": 45830 }, { "epoch": 1.6658187368268043, "grad_norm": 1.0566598176956177, "learning_rate": 4.466521998710292e-05, "loss": 0.1137, "step": 45840 }, { "epoch": 1.6661821353296025, "grad_norm": 0.8243798613548279, "learning_rate": 4.4662015409975406e-05, "loss": 0.1531, "step": 45850 }, { "epoch": 1.6665455338324007, "grad_norm": 1.1144201755523682, "learning_rate": 4.465880998568159e-05, "loss": 0.1122, "step": 45860 }, { "epoch": 1.6669089323351987, "grad_norm": 0.8346664309501648, "learning_rate": 4.46556037143596e-05, "loss": 0.1365, "step": 45870 }, { "epoch": 1.667272330837997, "grad_norm": 1.140259027481079, "learning_rate": 4.46523965961476e-05, "loss": 0.1105, "step": 45880 }, { "epoch": 1.667635729340795, "grad_norm": 1.7616723775863647, "learning_rate": 4.464918863118374e-05, "loss": 0.1092, "step": 45890 }, { "epoch": 1.6679991278435933, "grad_norm": 0.5135784149169922, "learning_rate": 4.464597981960625e-05, "loss": 0.1502, "step": 45900 }, { "epoch": 1.6683625263463915, "grad_norm": 1.542801022529602, "learning_rate": 4.464277016155339e-05, "loss": 0.125, "step": 45910 }, { "epoch": 1.6687259248491895, "grad_norm": 0.31144529581069946, "learning_rate": 4.463955965716346e-05, "loss": 0.21, "step": 45920 }, { "epoch": 1.669089323351988, "grad_norm": 0.642985463142395, "learning_rate": 4.463634830657478e-05, "loss": 0.1213, "step": 45930 }, { "epoch": 1.6694527218547859, "grad_norm": 2.417689800262451, "learning_rate": 4.4633136109925716e-05, "loss": 0.1244, "step": 45940 }, { "epoch": 1.669816120357584, "grad_norm": 0.5426376461982727, "learning_rate": 4.462992306735467e-05, "loss": 0.1789, "step": 45950 }, { "epoch": 1.6701795188603823, "grad_norm": 0.5924781560897827, "learning_rate": 4.4626709179000094e-05, "loss": 0.118, "step": 45960 }, { "epoch": 1.6705429173631805, "grad_norm": 0.5799354314804077, "learning_rate": 4.4623494445000435e-05, "loss": 0.1714, "step": 45970 }, { "epoch": 1.6709063158659787, "grad_norm": 0.6282142400741577, "learning_rate": 4.462027886549423e-05, "loss": 0.1099, "step": 45980 }, { "epoch": 1.6712697143687767, "grad_norm": 1.1201330423355103, "learning_rate": 4.461706244062002e-05, "loss": 0.1281, "step": 45990 }, { "epoch": 1.6716331128715751, "grad_norm": 1.1922492980957031, "learning_rate": 4.461384517051638e-05, "loss": 0.1245, "step": 46000 }, { "epoch": 1.671996511374373, "grad_norm": 1.0188281536102295, "learning_rate": 4.461062705532194e-05, "loss": 0.1715, "step": 46010 }, { "epoch": 1.6723599098771713, "grad_norm": 0.5861912369728088, "learning_rate": 4.4607408095175364e-05, "loss": 0.1391, "step": 46020 }, { "epoch": 1.6727233083799695, "grad_norm": 0.6984696388244629, "learning_rate": 4.4604188290215324e-05, "loss": 0.116, "step": 46030 }, { "epoch": 1.6730867068827675, "grad_norm": 0.5184624791145325, "learning_rate": 4.460096764058057e-05, "loss": 0.1173, "step": 46040 }, { "epoch": 1.673450105385566, "grad_norm": 0.39695462584495544, "learning_rate": 4.4597746146409856e-05, "loss": 0.1325, "step": 46050 }, { "epoch": 1.673813503888364, "grad_norm": 0.687271237373352, "learning_rate": 4.459452380784199e-05, "loss": 0.1123, "step": 46060 }, { "epoch": 1.6741769023911621, "grad_norm": 0.8372097015380859, "learning_rate": 4.459130062501582e-05, "loss": 0.1895, "step": 46070 }, { "epoch": 1.6745403008939603, "grad_norm": 1.8692165613174438, "learning_rate": 4.4588076598070206e-05, "loss": 0.1213, "step": 46080 }, { "epoch": 1.6749036993967585, "grad_norm": 12.919623374938965, "learning_rate": 4.458485172714406e-05, "loss": 0.115, "step": 46090 }, { "epoch": 1.6752670978995567, "grad_norm": 0.6733956933021545, "learning_rate": 4.458162601237634e-05, "loss": 0.1473, "step": 46100 }, { "epoch": 1.6756304964023547, "grad_norm": 0.8653566241264343, "learning_rate": 4.457839945390603e-05, "loss": 0.0995, "step": 46110 }, { "epoch": 1.675993894905153, "grad_norm": 0.536120593547821, "learning_rate": 4.4575172051872145e-05, "loss": 0.1494, "step": 46120 }, { "epoch": 1.6763572934079511, "grad_norm": 3.844902753829956, "learning_rate": 4.4571943806413743e-05, "loss": 0.1086, "step": 46130 }, { "epoch": 1.6767206919107494, "grad_norm": 2.0951857566833496, "learning_rate": 4.4568714717669926e-05, "loss": 0.111, "step": 46140 }, { "epoch": 1.6770840904135476, "grad_norm": 2.954204559326172, "learning_rate": 4.456548478577981e-05, "loss": 0.1481, "step": 46150 }, { "epoch": 1.6774474889163455, "grad_norm": 0.7243287563323975, "learning_rate": 4.456225401088258e-05, "loss": 0.1192, "step": 46160 }, { "epoch": 1.677810887419144, "grad_norm": 1.103082299232483, "learning_rate": 4.455902239311741e-05, "loss": 0.156, "step": 46170 }, { "epoch": 1.678174285921942, "grad_norm": 1.2734848260879517, "learning_rate": 4.455578993262357e-05, "loss": 0.1154, "step": 46180 }, { "epoch": 1.6785376844247402, "grad_norm": 0.9912572503089905, "learning_rate": 4.455255662954032e-05, "loss": 0.1228, "step": 46190 }, { "epoch": 1.6789010829275384, "grad_norm": 0.8736640214920044, "learning_rate": 4.454932248400697e-05, "loss": 0.12, "step": 46200 }, { "epoch": 1.6789010829275384, "eval_loss": 0.3570244014263153, "eval_runtime": 179.9492, "eval_samples_per_second": 41.201, "eval_steps_per_second": 5.151, "eval_wer": 0.17022164939096338, "step": 46200 }, { "epoch": 1.6792644814303364, "grad_norm": 3.907130241394043, "learning_rate": 4.454608749616287e-05, "loss": 0.1108, "step": 46210 }, { "epoch": 1.6796278799331348, "grad_norm": 0.3899100720882416, "learning_rate": 4.4542851666147404e-05, "loss": 0.1699, "step": 46220 }, { "epoch": 1.6799912784359328, "grad_norm": 1.6752989292144775, "learning_rate": 4.45396149941e-05, "loss": 0.124, "step": 46230 }, { "epoch": 1.680354676938731, "grad_norm": 5.3016886711120605, "learning_rate": 4.453637748016011e-05, "loss": 0.1239, "step": 46240 }, { "epoch": 1.6807180754415292, "grad_norm": 2.723459482192993, "learning_rate": 4.453313912446722e-05, "loss": 0.1245, "step": 46250 }, { "epoch": 1.6810814739443274, "grad_norm": 2.0152530670166016, "learning_rate": 4.4529899927160854e-05, "loss": 0.1056, "step": 46260 }, { "epoch": 1.6814448724471256, "grad_norm": 0.7301231622695923, "learning_rate": 4.452665988838059e-05, "loss": 0.1899, "step": 46270 }, { "epoch": 1.6818082709499236, "grad_norm": 0.7544482350349426, "learning_rate": 4.4523419008266045e-05, "loss": 0.1233, "step": 46280 }, { "epoch": 1.682171669452722, "grad_norm": 0.9912691712379456, "learning_rate": 4.4520177286956824e-05, "loss": 0.1263, "step": 46290 }, { "epoch": 1.68253506795552, "grad_norm": 2.0335001945495605, "learning_rate": 4.451693472459262e-05, "loss": 0.1328, "step": 46300 }, { "epoch": 1.6828984664583182, "grad_norm": 0.5679906606674194, "learning_rate": 4.451369132131314e-05, "loss": 0.0828, "step": 46310 }, { "epoch": 1.6832618649611164, "grad_norm": 1.3262155055999756, "learning_rate": 4.451044707725814e-05, "loss": 0.1426, "step": 46320 }, { "epoch": 1.6836252634639144, "grad_norm": 1.1101963520050049, "learning_rate": 4.4507201992567386e-05, "loss": 0.1385, "step": 46330 }, { "epoch": 1.6839886619667128, "grad_norm": 0.8079712390899658, "learning_rate": 4.4503956067380704e-05, "loss": 0.1278, "step": 46340 }, { "epoch": 1.6843520604695108, "grad_norm": 0.79506516456604, "learning_rate": 4.450070930183795e-05, "loss": 0.1281, "step": 46350 }, { "epoch": 1.684715458972309, "grad_norm": 1.1767312288284302, "learning_rate": 4.4497461696079024e-05, "loss": 0.1299, "step": 46360 }, { "epoch": 1.6850788574751072, "grad_norm": 0.7996610403060913, "learning_rate": 4.449421325024384e-05, "loss": 0.156, "step": 46370 }, { "epoch": 1.6854422559779054, "grad_norm": 0.638761579990387, "learning_rate": 4.449096396447237e-05, "loss": 0.1197, "step": 46380 }, { "epoch": 1.6858056544807036, "grad_norm": 3.2339584827423096, "learning_rate": 4.448771383890461e-05, "loss": 0.0992, "step": 46390 }, { "epoch": 1.6861690529835016, "grad_norm": 1.0168710947036743, "learning_rate": 4.448446287368059e-05, "loss": 0.1471, "step": 46400 }, { "epoch": 1.6865324514862998, "grad_norm": 0.7783123850822449, "learning_rate": 4.44812110689404e-05, "loss": 0.1012, "step": 46410 }, { "epoch": 1.686895849989098, "grad_norm": 0.7757607698440552, "learning_rate": 4.447795842482414e-05, "loss": 0.1355, "step": 46420 }, { "epoch": 1.6872592484918962, "grad_norm": 1.9442954063415527, "learning_rate": 4.447470494147195e-05, "loss": 0.0953, "step": 46430 }, { "epoch": 1.6876226469946944, "grad_norm": 1.0810720920562744, "learning_rate": 4.447145061902401e-05, "loss": 0.1037, "step": 46440 }, { "epoch": 1.6879860454974924, "grad_norm": 5.812492847442627, "learning_rate": 4.4468195457620556e-05, "loss": 0.1376, "step": 46450 }, { "epoch": 1.6883494440002909, "grad_norm": 2.3588967323303223, "learning_rate": 4.4464939457401825e-05, "loss": 0.1164, "step": 46460 }, { "epoch": 1.6887128425030888, "grad_norm": 1.6339848041534424, "learning_rate": 4.4461682618508106e-05, "loss": 0.1604, "step": 46470 }, { "epoch": 1.689076241005887, "grad_norm": 1.6590059995651245, "learning_rate": 4.445842494107973e-05, "loss": 0.1529, "step": 46480 }, { "epoch": 1.6894396395086853, "grad_norm": 0.8776388764381409, "learning_rate": 4.445516642525705e-05, "loss": 0.1165, "step": 46490 }, { "epoch": 1.6898030380114832, "grad_norm": 2.1173806190490723, "learning_rate": 4.4451907071180474e-05, "loss": 0.1431, "step": 46500 }, { "epoch": 1.6901664365142817, "grad_norm": 1.3882853984832764, "learning_rate": 4.444864687899043e-05, "loss": 0.134, "step": 46510 }, { "epoch": 1.6905298350170797, "grad_norm": 0.5224485993385315, "learning_rate": 4.4445385848827395e-05, "loss": 0.1586, "step": 46520 }, { "epoch": 1.6908932335198779, "grad_norm": 1.3461922407150269, "learning_rate": 4.444212398083187e-05, "loss": 0.119, "step": 46530 }, { "epoch": 1.691256632022676, "grad_norm": 0.7005299925804138, "learning_rate": 4.4438861275144395e-05, "loss": 0.1046, "step": 46540 }, { "epoch": 1.6916200305254743, "grad_norm": 2.497610092163086, "learning_rate": 4.4435597731905554e-05, "loss": 0.1164, "step": 46550 }, { "epoch": 1.6919834290282725, "grad_norm": 4.168522357940674, "learning_rate": 4.443233335125596e-05, "loss": 0.1342, "step": 46560 }, { "epoch": 1.6923468275310705, "grad_norm": 0.59686678647995, "learning_rate": 4.442906813333626e-05, "loss": 0.1536, "step": 46570 }, { "epoch": 1.692710226033869, "grad_norm": 0.9300062656402588, "learning_rate": 4.442580207828715e-05, "loss": 0.1176, "step": 46580 }, { "epoch": 1.6930736245366669, "grad_norm": 0.8389412760734558, "learning_rate": 4.442253518624934e-05, "loss": 0.1085, "step": 46590 }, { "epoch": 1.693437023039465, "grad_norm": 1.0124256610870361, "learning_rate": 4.441926745736359e-05, "loss": 0.1357, "step": 46600 }, { "epoch": 1.6938004215422633, "grad_norm": 1.5312106609344482, "learning_rate": 4.4415998891770704e-05, "loss": 0.0911, "step": 46610 }, { "epoch": 1.6941638200450613, "grad_norm": 0.7724300622940063, "learning_rate": 4.441272948961151e-05, "loss": 0.1338, "step": 46620 }, { "epoch": 1.6945272185478597, "grad_norm": 0.9552409052848816, "learning_rate": 4.4409459251026864e-05, "loss": 0.1181, "step": 46630 }, { "epoch": 1.6948906170506577, "grad_norm": 0.7531419992446899, "learning_rate": 4.440618817615768e-05, "loss": 0.1301, "step": 46640 }, { "epoch": 1.695254015553456, "grad_norm": 1.4831467866897583, "learning_rate": 4.44029162651449e-05, "loss": 0.1734, "step": 46650 }, { "epoch": 1.695617414056254, "grad_norm": 1.008574366569519, "learning_rate": 4.43996435181295e-05, "loss": 0.1205, "step": 46660 }, { "epoch": 1.6959808125590523, "grad_norm": 0.7653814554214478, "learning_rate": 4.4396369935252475e-05, "loss": 0.1886, "step": 46670 }, { "epoch": 1.6963442110618505, "grad_norm": 0.668803334236145, "learning_rate": 4.439309551665488e-05, "loss": 0.1066, "step": 46680 }, { "epoch": 1.6967076095646485, "grad_norm": 0.6227422952651978, "learning_rate": 4.438982026247781e-05, "loss": 0.1191, "step": 46690 }, { "epoch": 1.6970710080674467, "grad_norm": 4.826232433319092, "learning_rate": 4.438654417286237e-05, "loss": 0.1594, "step": 46700 }, { "epoch": 1.697434406570245, "grad_norm": 7.682708740234375, "learning_rate": 4.4383267247949714e-05, "loss": 0.1131, "step": 46710 }, { "epoch": 1.6977978050730431, "grad_norm": 1.0136793851852417, "learning_rate": 4.4379989487881046e-05, "loss": 0.1575, "step": 46720 }, { "epoch": 1.6981612035758413, "grad_norm": 2.774850606918335, "learning_rate": 4.437671089279758e-05, "loss": 0.1384, "step": 46730 }, { "epoch": 1.6985246020786393, "grad_norm": 11.532723426818848, "learning_rate": 4.4373431462840584e-05, "loss": 0.1338, "step": 46740 }, { "epoch": 1.6988880005814377, "grad_norm": 1.9600322246551514, "learning_rate": 4.437015119815136e-05, "loss": 0.1311, "step": 46750 }, { "epoch": 1.6992513990842357, "grad_norm": 0.5886809229850769, "learning_rate": 4.436687009887124e-05, "loss": 0.1647, "step": 46760 }, { "epoch": 1.699614797587034, "grad_norm": 2.5982067584991455, "learning_rate": 4.436358816514159e-05, "loss": 0.2078, "step": 46770 }, { "epoch": 1.6999781960898321, "grad_norm": 0.7927113771438599, "learning_rate": 4.436030539710383e-05, "loss": 0.1017, "step": 46780 }, { "epoch": 1.7003415945926301, "grad_norm": 0.5954931974411011, "learning_rate": 4.435702179489939e-05, "loss": 0.1354, "step": 46790 }, { "epoch": 1.7007049930954286, "grad_norm": 1.10712468624115, "learning_rate": 4.4353737358669755e-05, "loss": 0.1531, "step": 46800 }, { "epoch": 1.7007049930954286, "eval_loss": 0.3568388819694519, "eval_runtime": 179.9769, "eval_samples_per_second": 41.194, "eval_steps_per_second": 5.151, "eval_wer": 0.16614627770617388, "step": 46800 }, { "epoch": 1.7010683915982265, "grad_norm": 2.1790926456451416, "learning_rate": 4.435045208855644e-05, "loss": 0.1336, "step": 46810 }, { "epoch": 1.7014317901010247, "grad_norm": 0.3032105267047882, "learning_rate": 4.434716598470099e-05, "loss": 0.1234, "step": 46820 }, { "epoch": 1.701795188603823, "grad_norm": 0.8836687207221985, "learning_rate": 4.434387904724499e-05, "loss": 0.1007, "step": 46830 }, { "epoch": 1.7021585871066212, "grad_norm": 0.9671638011932373, "learning_rate": 4.4340591276330075e-05, "loss": 0.3498, "step": 46840 }, { "epoch": 1.7025219856094194, "grad_norm": 3.225950002670288, "learning_rate": 4.4337302672097894e-05, "loss": 0.1493, "step": 46850 }, { "epoch": 1.7028853841122173, "grad_norm": 1.0754051208496094, "learning_rate": 4.4334013234690144e-05, "loss": 0.11, "step": 46860 }, { "epoch": 1.7032487826150158, "grad_norm": 0.3170652687549591, "learning_rate": 4.433072296424855e-05, "loss": 0.1543, "step": 46870 }, { "epoch": 1.7036121811178138, "grad_norm": 2.504772424697876, "learning_rate": 4.4327431860914885e-05, "loss": 0.1221, "step": 46880 }, { "epoch": 1.703975579620612, "grad_norm": 3.071819305419922, "learning_rate": 4.4324139924830956e-05, "loss": 2.0837, "step": 46890 }, { "epoch": 1.7043389781234102, "grad_norm": 0.6085606813430786, "learning_rate": 4.4320847156138584e-05, "loss": 0.2395, "step": 46900 }, { "epoch": 1.7047023766262082, "grad_norm": 1.1668941974639893, "learning_rate": 4.431755355497965e-05, "loss": 1.7169, "step": 46910 }, { "epoch": 1.7050657751290066, "grad_norm": 2.0856447219848633, "learning_rate": 4.431425912149607e-05, "loss": 0.1911, "step": 46920 }, { "epoch": 1.7054291736318046, "grad_norm": 2.468442916870117, "learning_rate": 4.431096385582979e-05, "loss": 0.1408, "step": 46930 }, { "epoch": 1.7057925721346028, "grad_norm": 0.5875902771949768, "learning_rate": 4.430766775812278e-05, "loss": 0.0959, "step": 46940 }, { "epoch": 1.706155970637401, "grad_norm": 0.7292661070823669, "learning_rate": 4.430437082851706e-05, "loss": 0.1416, "step": 46950 }, { "epoch": 1.7065193691401992, "grad_norm": 0.9893856644630432, "learning_rate": 4.43010730671547e-05, "loss": 0.1233, "step": 46960 }, { "epoch": 1.7068827676429974, "grad_norm": 0.5555475950241089, "learning_rate": 4.4297774474177755e-05, "loss": 0.1817, "step": 46970 }, { "epoch": 1.7072461661457954, "grad_norm": 2.520148754119873, "learning_rate": 4.429447504972838e-05, "loss": 0.123, "step": 46980 }, { "epoch": 1.7076095646485938, "grad_norm": 0.4278533160686493, "learning_rate": 4.429117479394873e-05, "loss": 0.1161, "step": 46990 }, { "epoch": 1.7079729631513918, "grad_norm": 1.8297946453094482, "learning_rate": 4.428787370698099e-05, "loss": 0.1347, "step": 47000 }, { "epoch": 1.70833636165419, "grad_norm": 1.1131938695907593, "learning_rate": 4.42845717889674e-05, "loss": 0.113, "step": 47010 }, { "epoch": 1.7086997601569882, "grad_norm": 0.6185646057128906, "learning_rate": 4.428126904005022e-05, "loss": 0.179, "step": 47020 }, { "epoch": 1.7090631586597862, "grad_norm": 0.5343379378318787, "learning_rate": 4.4277965460371775e-05, "loss": 0.1108, "step": 47030 }, { "epoch": 1.7094265571625846, "grad_norm": 1.2087206840515137, "learning_rate": 4.427466105007437e-05, "loss": 0.1308, "step": 47040 }, { "epoch": 1.7097899556653826, "grad_norm": 1.3067313432693481, "learning_rate": 4.4271355809300416e-05, "loss": 0.126, "step": 47050 }, { "epoch": 1.7101533541681808, "grad_norm": 3.69439697265625, "learning_rate": 4.42680497381923e-05, "loss": 0.1235, "step": 47060 }, { "epoch": 1.710516752670979, "grad_norm": 1.7959043979644775, "learning_rate": 4.4264742836892484e-05, "loss": 0.1596, "step": 47070 }, { "epoch": 1.710880151173777, "grad_norm": 0.7556483149528503, "learning_rate": 4.4261435105543434e-05, "loss": 0.139, "step": 47080 }, { "epoch": 1.7112435496765754, "grad_norm": 1.0691754817962646, "learning_rate": 4.425812654428768e-05, "loss": 0.1137, "step": 47090 }, { "epoch": 1.7116069481793734, "grad_norm": 0.39985036849975586, "learning_rate": 4.425481715326778e-05, "loss": 0.1147, "step": 47100 }, { "epoch": 1.7119703466821716, "grad_norm": 0.8595932126045227, "learning_rate": 4.425150693262631e-05, "loss": 0.124, "step": 47110 }, { "epoch": 1.7123337451849698, "grad_norm": 1.0184409618377686, "learning_rate": 4.424819588250591e-05, "loss": 0.1196, "step": 47120 }, { "epoch": 1.712697143687768, "grad_norm": 0.9141554236412048, "learning_rate": 4.4244884003049234e-05, "loss": 0.1139, "step": 47130 }, { "epoch": 1.7130605421905662, "grad_norm": 3.891220808029175, "learning_rate": 4.424157129439897e-05, "loss": 0.1123, "step": 47140 }, { "epoch": 1.7134239406933642, "grad_norm": 1.1739959716796875, "learning_rate": 4.4238257756697875e-05, "loss": 0.1472, "step": 47150 }, { "epoch": 1.7137873391961627, "grad_norm": 1.062530517578125, "learning_rate": 4.423494339008869e-05, "loss": 0.1222, "step": 47160 }, { "epoch": 1.7141507376989606, "grad_norm": 0.840980052947998, "learning_rate": 4.423162819471424e-05, "loss": 0.209, "step": 47170 }, { "epoch": 1.7145141362017589, "grad_norm": 1.5960949659347534, "learning_rate": 4.4228312170717353e-05, "loss": 0.2366, "step": 47180 }, { "epoch": 1.714877534704557, "grad_norm": 1.9222055673599243, "learning_rate": 4.4224995318240914e-05, "loss": 0.1195, "step": 47190 }, { "epoch": 1.715240933207355, "grad_norm": 0.9651756286621094, "learning_rate": 4.422167763742783e-05, "loss": 1.1511, "step": 47200 }, { "epoch": 1.7156043317101535, "grad_norm": 0.7599210739135742, "learning_rate": 4.421835912842105e-05, "loss": 0.1067, "step": 47210 }, { "epoch": 1.7159677302129515, "grad_norm": 0.9147046804428101, "learning_rate": 4.4215039791363546e-05, "loss": 0.2011, "step": 47220 }, { "epoch": 1.7163311287157497, "grad_norm": 3.473452091217041, "learning_rate": 4.421171962639835e-05, "loss": 0.1297, "step": 47230 }, { "epoch": 1.7166945272185479, "grad_norm": 0.8271628618240356, "learning_rate": 4.420839863366851e-05, "loss": 0.1042, "step": 47240 }, { "epoch": 1.717057925721346, "grad_norm": 1.2343850135803223, "learning_rate": 4.4205076813317115e-05, "loss": 0.1401, "step": 47250 }, { "epoch": 1.7174213242241443, "grad_norm": 2.5408592224121094, "learning_rate": 4.420175416548729e-05, "loss": 0.1331, "step": 47260 }, { "epoch": 1.7177847227269423, "grad_norm": 0.9061927795410156, "learning_rate": 4.41984306903222e-05, "loss": 0.151, "step": 47270 }, { "epoch": 1.7181481212297407, "grad_norm": 1.4675298929214478, "learning_rate": 4.419510638796505e-05, "loss": 0.133, "step": 47280 }, { "epoch": 1.7185115197325387, "grad_norm": 0.5093546509742737, "learning_rate": 4.4191781258559044e-05, "loss": 0.1091, "step": 47290 }, { "epoch": 1.718874918235337, "grad_norm": 1.7886688709259033, "learning_rate": 4.418845530224748e-05, "loss": 0.1364, "step": 47300 }, { "epoch": 1.719238316738135, "grad_norm": 0.9077005982398987, "learning_rate": 4.418512851917365e-05, "loss": 0.0886, "step": 47310 }, { "epoch": 1.719601715240933, "grad_norm": 0.9992018938064575, "learning_rate": 4.418180090948088e-05, "loss": 0.204, "step": 47320 }, { "epoch": 1.7199651137437315, "grad_norm": 3.1602091789245605, "learning_rate": 4.417847247331257e-05, "loss": 0.1099, "step": 47330 }, { "epoch": 1.7203285122465295, "grad_norm": 0.867824912071228, "learning_rate": 4.417514321081212e-05, "loss": 0.1015, "step": 47340 }, { "epoch": 1.7206919107493277, "grad_norm": 0.8763206005096436, "learning_rate": 4.4171813122122966e-05, "loss": 0.1669, "step": 47350 }, { "epoch": 1.721055309252126, "grad_norm": 0.579663097858429, "learning_rate": 4.4168482207388604e-05, "loss": 0.1216, "step": 47360 }, { "epoch": 1.721418707754924, "grad_norm": 0.9902794361114502, "learning_rate": 4.416515046675255e-05, "loss": 0.1507, "step": 47370 }, { "epoch": 1.7217821062577223, "grad_norm": 1.082514762878418, "learning_rate": 4.4161817900358334e-05, "loss": 0.1218, "step": 47380 }, { "epoch": 1.7221455047605203, "grad_norm": 1.6784402132034302, "learning_rate": 4.415848450834958e-05, "loss": 0.1112, "step": 47390 }, { "epoch": 1.7225089032633185, "grad_norm": 1.4955846071243286, "learning_rate": 4.415515029086989e-05, "loss": 0.1539, "step": 47400 }, { "epoch": 1.7225089032633185, "eval_loss": 0.3519718050956726, "eval_runtime": 180.5197, "eval_samples_per_second": 41.07, "eval_steps_per_second": 5.135, "eval_wer": 0.1726087824713635, "step": 47400 }, { "epoch": 1.7228723017661167, "grad_norm": 1.609122395515442, "learning_rate": 4.415181524806293e-05, "loss": 0.1218, "step": 47410 }, { "epoch": 1.723235700268915, "grad_norm": 0.5392347574234009, "learning_rate": 4.4148479380072386e-05, "loss": 0.1304, "step": 47420 }, { "epoch": 1.7235990987717131, "grad_norm": 1.874009132385254, "learning_rate": 4.4145142687042e-05, "loss": 0.1251, "step": 47430 }, { "epoch": 1.7239624972745111, "grad_norm": 0.5162834525108337, "learning_rate": 4.4141805169115534e-05, "loss": 0.0922, "step": 47440 }, { "epoch": 1.7243258957773095, "grad_norm": 0.5982137322425842, "learning_rate": 4.41384668264368e-05, "loss": 0.1274, "step": 47450 }, { "epoch": 1.7246892942801075, "grad_norm": 3.50940203666687, "learning_rate": 4.413512765914961e-05, "loss": 0.134, "step": 47460 }, { "epoch": 1.7250526927829057, "grad_norm": 0.49185237288475037, "learning_rate": 4.413178766739786e-05, "loss": 0.1573, "step": 47470 }, { "epoch": 1.725416091285704, "grad_norm": 0.7658770680427551, "learning_rate": 4.412844685132545e-05, "loss": 0.1231, "step": 47480 }, { "epoch": 1.725779489788502, "grad_norm": 0.7022835612297058, "learning_rate": 4.4125105211076324e-05, "loss": 0.124, "step": 47490 }, { "epoch": 1.7261428882913004, "grad_norm": 0.7009884119033813, "learning_rate": 4.4121762746794456e-05, "loss": 0.1207, "step": 47500 }, { "epoch": 1.7265062867940983, "grad_norm": 1.4415069818496704, "learning_rate": 4.4118419458623875e-05, "loss": 0.1223, "step": 47510 }, { "epoch": 1.7268696852968966, "grad_norm": 0.8234976530075073, "learning_rate": 4.411507534670862e-05, "loss": 0.1524, "step": 47520 }, { "epoch": 1.7272330837996948, "grad_norm": 0.6677774786949158, "learning_rate": 4.411173041119278e-05, "loss": 0.1298, "step": 47530 }, { "epoch": 1.727596482302493, "grad_norm": 9.5011625289917, "learning_rate": 4.410838465222048e-05, "loss": 0.1264, "step": 47540 }, { "epoch": 1.7279598808052912, "grad_norm": 1.5103870630264282, "learning_rate": 4.410503806993587e-05, "loss": 0.1182, "step": 47550 }, { "epoch": 1.7283232793080892, "grad_norm": 1.5079401731491089, "learning_rate": 4.410169066448314e-05, "loss": 0.1295, "step": 47560 }, { "epoch": 1.7286866778108876, "grad_norm": 1.1625335216522217, "learning_rate": 4.4098342436006536e-05, "loss": 0.169, "step": 47570 }, { "epoch": 1.7290500763136856, "grad_norm": 0.8692395687103271, "learning_rate": 4.40949933846503e-05, "loss": 0.147, "step": 47580 }, { "epoch": 1.7294134748164838, "grad_norm": 0.49405890703201294, "learning_rate": 4.409164351055873e-05, "loss": 0.1297, "step": 47590 }, { "epoch": 1.729776873319282, "grad_norm": 2.2081878185272217, "learning_rate": 4.408829281387619e-05, "loss": 0.161, "step": 47600 }, { "epoch": 1.73014027182208, "grad_norm": 2.5445384979248047, "learning_rate": 4.408494129474701e-05, "loss": 0.105, "step": 47610 }, { "epoch": 1.7305036703248784, "grad_norm": 0.4841686487197876, "learning_rate": 4.408158895331562e-05, "loss": 0.1352, "step": 47620 }, { "epoch": 1.7308670688276764, "grad_norm": 91.45899963378906, "learning_rate": 4.407823578972646e-05, "loss": 1.4893, "step": 47630 }, { "epoch": 1.7312304673304746, "grad_norm": 1.3897613286972046, "learning_rate": 4.4074881804124e-05, "loss": 0.1109, "step": 47640 }, { "epoch": 1.7315938658332728, "grad_norm": 2.621211528778076, "learning_rate": 4.407152699665275e-05, "loss": 0.1332, "step": 47650 }, { "epoch": 1.7319572643360708, "grad_norm": 0.8939427733421326, "learning_rate": 4.406817136745726e-05, "loss": 0.1101, "step": 47660 }, { "epoch": 1.7323206628388692, "grad_norm": 0.9848506450653076, "learning_rate": 4.4064814916682105e-05, "loss": 0.1855, "step": 47670 }, { "epoch": 1.7326840613416672, "grad_norm": 0.7746869921684265, "learning_rate": 4.406145764447192e-05, "loss": 0.1791, "step": 47680 }, { "epoch": 1.7330474598444654, "grad_norm": 0.6325744390487671, "learning_rate": 4.405809955097133e-05, "loss": 0.1067, "step": 47690 }, { "epoch": 1.7334108583472636, "grad_norm": 0.4917563498020172, "learning_rate": 4.405474063632505e-05, "loss": 0.1405, "step": 47700 }, { "epoch": 1.7337742568500618, "grad_norm": 0.8248608708381653, "learning_rate": 4.405138090067779e-05, "loss": 0.2285, "step": 47710 }, { "epoch": 1.73413765535286, "grad_norm": 1.1260930299758911, "learning_rate": 4.404802034417431e-05, "loss": 0.1606, "step": 47720 }, { "epoch": 1.734501053855658, "grad_norm": 1.282873272895813, "learning_rate": 4.404465896695941e-05, "loss": 0.1251, "step": 47730 }, { "epoch": 1.7348644523584564, "grad_norm": 1.245103120803833, "learning_rate": 4.404129676917791e-05, "loss": 0.1009, "step": 47740 }, { "epoch": 1.7352278508612544, "grad_norm": 0.8262288570404053, "learning_rate": 4.4037933750974686e-05, "loss": 0.2019, "step": 47750 }, { "epoch": 1.7355912493640526, "grad_norm": 0.6815000772476196, "learning_rate": 4.403456991249464e-05, "loss": 0.1395, "step": 47760 }, { "epoch": 1.7359546478668508, "grad_norm": 0.3317665159702301, "learning_rate": 4.403120525388269e-05, "loss": 0.1484, "step": 47770 }, { "epoch": 1.7363180463696488, "grad_norm": 3.1392834186553955, "learning_rate": 4.402783977528383e-05, "loss": 0.1404, "step": 47780 }, { "epoch": 1.7366814448724472, "grad_norm": 0.8862357139587402, "learning_rate": 4.4024473476843043e-05, "loss": 0.1012, "step": 47790 }, { "epoch": 1.7370448433752452, "grad_norm": 0.40148264169692993, "learning_rate": 4.402110635870539e-05, "loss": 0.1284, "step": 47800 }, { "epoch": 1.7374082418780434, "grad_norm": 0.6237661838531494, "learning_rate": 4.401773842101594e-05, "loss": 0.1583, "step": 47810 }, { "epoch": 1.7377716403808416, "grad_norm": 0.6144997477531433, "learning_rate": 4.4014369663919805e-05, "loss": 0.2531, "step": 47820 }, { "epoch": 1.7381350388836398, "grad_norm": 0.6017129421234131, "learning_rate": 4.4011000087562135e-05, "loss": 0.1141, "step": 47830 }, { "epoch": 1.738498437386438, "grad_norm": 1.1838932037353516, "learning_rate": 4.400762969208812e-05, "loss": 0.145, "step": 47840 }, { "epoch": 1.738861835889236, "grad_norm": 1.8152750730514526, "learning_rate": 4.400425847764297e-05, "loss": 0.1485, "step": 47850 }, { "epoch": 1.7392252343920345, "grad_norm": 1.8269041776657104, "learning_rate": 4.400088644437193e-05, "loss": 0.1248, "step": 47860 }, { "epoch": 1.7395886328948325, "grad_norm": 0.36247947812080383, "learning_rate": 4.39975135924203e-05, "loss": 0.1293, "step": 47870 }, { "epoch": 1.7399520313976307, "grad_norm": 0.5409418344497681, "learning_rate": 4.399413992193341e-05, "loss": 0.0932, "step": 47880 }, { "epoch": 1.7403154299004289, "grad_norm": 0.8623117804527283, "learning_rate": 4.3990765433056616e-05, "loss": 0.1024, "step": 47890 }, { "epoch": 1.7406788284032269, "grad_norm": 1.7151434421539307, "learning_rate": 4.39873901259353e-05, "loss": 0.1591, "step": 47900 }, { "epoch": 1.7410422269060253, "grad_norm": 0.580970287322998, "learning_rate": 4.39840140007149e-05, "loss": 0.1124, "step": 47910 }, { "epoch": 1.7414056254088233, "grad_norm": 0.7153110504150391, "learning_rate": 4.3980637057540884e-05, "loss": 0.2013, "step": 47920 }, { "epoch": 1.7417690239116215, "grad_norm": 0.6043591499328613, "learning_rate": 4.397725929655875e-05, "loss": 1.2071, "step": 47930 }, { "epoch": 1.7421324224144197, "grad_norm": 1.4633545875549316, "learning_rate": 4.397388071791403e-05, "loss": 0.1533, "step": 47940 }, { "epoch": 1.7424958209172177, "grad_norm": 2.011232376098633, "learning_rate": 4.3970501321752314e-05, "loss": 0.1288, "step": 47950 }, { "epoch": 1.742859219420016, "grad_norm": 1.238098382949829, "learning_rate": 4.396712110821918e-05, "loss": 0.1553, "step": 47960 }, { "epoch": 1.743222617922814, "grad_norm": 0.30882275104522705, "learning_rate": 4.3963740077460285e-05, "loss": 0.1547, "step": 47970 }, { "epoch": 1.7435860164256123, "grad_norm": 0.6614134311676025, "learning_rate": 4.39603582296213e-05, "loss": 0.0926, "step": 47980 }, { "epoch": 1.7439494149284105, "grad_norm": 0.8839965462684631, "learning_rate": 4.3956975564847944e-05, "loss": 0.1228, "step": 47990 }, { "epoch": 1.7443128134312087, "grad_norm": 1.4926025867462158, "learning_rate": 4.395359208328597e-05, "loss": 0.1692, "step": 48000 }, { "epoch": 1.7443128134312087, "eval_loss": 0.3166210651397705, "eval_runtime": 179.7213, "eval_samples_per_second": 41.253, "eval_steps_per_second": 5.158, "eval_wer": 0.1702307259425999, "step": 48000 }, { "epoch": 1.744676211934007, "grad_norm": 0.8344828486442566, "learning_rate": 4.395020778508114e-05, "loss": 0.115, "step": 48010 }, { "epoch": 1.7450396104368049, "grad_norm": 0.7698808908462524, "learning_rate": 4.394682267037928e-05, "loss": 0.11, "step": 48020 }, { "epoch": 1.7454030089396033, "grad_norm": 0.7840538024902344, "learning_rate": 4.394343673932625e-05, "loss": 0.1209, "step": 48030 }, { "epoch": 1.7457664074424013, "grad_norm": 0.9926084876060486, "learning_rate": 4.394004999206792e-05, "loss": 0.1711, "step": 48040 }, { "epoch": 1.7461298059451995, "grad_norm": 0.8097075819969177, "learning_rate": 4.3936662428750234e-05, "loss": 0.1564, "step": 48050 }, { "epoch": 1.7464932044479977, "grad_norm": 1.1143872737884521, "learning_rate": 4.393327404951915e-05, "loss": 0.1254, "step": 48060 }, { "epoch": 1.7468566029507957, "grad_norm": 0.8600324988365173, "learning_rate": 4.392988485452063e-05, "loss": 0.1512, "step": 48070 }, { "epoch": 1.7472200014535941, "grad_norm": 1.2914844751358032, "learning_rate": 4.3926494843900745e-05, "loss": 0.1215, "step": 48080 }, { "epoch": 1.7475833999563921, "grad_norm": 2.0937047004699707, "learning_rate": 4.3923104017805524e-05, "loss": 0.1222, "step": 48090 }, { "epoch": 1.7479467984591903, "grad_norm": 0.5339716076850891, "learning_rate": 4.391971237638108e-05, "loss": 0.1561, "step": 48100 }, { "epoch": 1.7483101969619885, "grad_norm": 2.8374857902526855, "learning_rate": 4.391631991977356e-05, "loss": 0.1204, "step": 48110 }, { "epoch": 1.7486735954647867, "grad_norm": 0.6171733140945435, "learning_rate": 4.39129266481291e-05, "loss": 0.1742, "step": 48120 }, { "epoch": 1.749036993967585, "grad_norm": 1.0918267965316772, "learning_rate": 4.390953256159394e-05, "loss": 0.1062, "step": 48130 }, { "epoch": 1.749400392470383, "grad_norm": 1.273437738418579, "learning_rate": 4.3906137660314296e-05, "loss": 0.1166, "step": 48140 }, { "epoch": 1.7497637909731814, "grad_norm": 0.9305084943771362, "learning_rate": 4.390274194443645e-05, "loss": 0.1264, "step": 48150 }, { "epoch": 1.7501271894759793, "grad_norm": 1.1687837839126587, "learning_rate": 4.389934541410671e-05, "loss": 0.1437, "step": 48160 }, { "epoch": 1.7504905879787775, "grad_norm": 0.5981254577636719, "learning_rate": 4.389594806947142e-05, "loss": 0.155, "step": 48170 }, { "epoch": 1.7508539864815758, "grad_norm": 0.8380612730979919, "learning_rate": 4.389254991067695e-05, "loss": 0.118, "step": 48180 }, { "epoch": 1.7512173849843737, "grad_norm": 0.9481167197227478, "learning_rate": 4.388915093786973e-05, "loss": 0.1164, "step": 48190 }, { "epoch": 1.7515807834871722, "grad_norm": 0.4648977220058441, "learning_rate": 4.3885751151196206e-05, "loss": 0.139, "step": 48200 }, { "epoch": 1.7519441819899702, "grad_norm": 1.5835154056549072, "learning_rate": 4.388235055080287e-05, "loss": 0.1288, "step": 48210 }, { "epoch": 1.7523075804927684, "grad_norm": 2.212986469268799, "learning_rate": 4.387894913683622e-05, "loss": 0.1271, "step": 48220 }, { "epoch": 1.7526709789955666, "grad_norm": 1.1150215864181519, "learning_rate": 4.3875887168782186e-05, "loss": 3.4347, "step": 48230 }, { "epoch": 1.7530343774983645, "grad_norm": 4.514034271240234, "learning_rate": 4.3872484209430055e-05, "loss": 0.1084, "step": 48240 }, { "epoch": 1.753397776001163, "grad_norm": 1.3234679698944092, "learning_rate": 4.386908043692973e-05, "loss": 0.1258, "step": 48250 }, { "epoch": 1.753761174503961, "grad_norm": 0.579300045967102, "learning_rate": 4.3865675851427856e-05, "loss": 0.1375, "step": 48260 }, { "epoch": 1.7541245730067592, "grad_norm": Infinity, "learning_rate": 4.3862611029481096e-05, "loss": 3.7136, "step": 48270 }, { "epoch": 1.7544879715095574, "grad_norm": 0.5648366808891296, "learning_rate": 4.3859204899680476e-05, "loss": 0.1164, "step": 48280 }, { "epoch": 1.7548513700123556, "grad_norm": 2.9147396087646484, "learning_rate": 4.38557979573038e-05, "loss": 0.1269, "step": 48290 }, { "epoch": 1.7552147685151538, "grad_norm": 1.5536600351333618, "learning_rate": 4.385239020249789e-05, "loss": 0.1275, "step": 48300 }, { "epoch": 1.7555781670179518, "grad_norm": 1.4855754375457764, "learning_rate": 4.384898163540956e-05, "loss": 0.1281, "step": 48310 }, { "epoch": 1.7559415655207502, "grad_norm": 182.5458984375, "learning_rate": 4.384557225618567e-05, "loss": 2.7885, "step": 48320 }, { "epoch": 1.7563049640235482, "grad_norm": 1.1808582544326782, "learning_rate": 4.3842162064973134e-05, "loss": 0.1378, "step": 48330 }, { "epoch": 1.7566683625263464, "grad_norm": 2.970055341720581, "learning_rate": 4.383875106191888e-05, "loss": 0.1319, "step": 48340 }, { "epoch": 1.7570317610291446, "grad_norm": 1.0342578887939453, "learning_rate": 4.383533924716986e-05, "loss": 0.1254, "step": 48350 }, { "epoch": 1.7573951595319426, "grad_norm": 2.821300745010376, "learning_rate": 4.383192662087309e-05, "loss": 0.1098, "step": 48360 }, { "epoch": 1.757758558034741, "grad_norm": 4.711688041687012, "learning_rate": 4.382851318317561e-05, "loss": 0.2667, "step": 48370 }, { "epoch": 1.758121956537539, "grad_norm": 0.6478423476219177, "learning_rate": 4.382509893422448e-05, "loss": 0.1041, "step": 48380 }, { "epoch": 1.7584853550403372, "grad_norm": 0.4265103042125702, "learning_rate": 4.382168387416683e-05, "loss": 0.0854, "step": 48390 }, { "epoch": 1.7588487535431354, "grad_norm": 1.3017734289169312, "learning_rate": 4.381826800314979e-05, "loss": 0.1411, "step": 48400 }, { "epoch": 1.7592121520459336, "grad_norm": 2.5378410816192627, "learning_rate": 4.3814851321320524e-05, "loss": 0.1361, "step": 48410 }, { "epoch": 1.7595755505487318, "grad_norm": 1.133738398551941, "learning_rate": 4.381143382882627e-05, "loss": 0.1628, "step": 48420 }, { "epoch": 1.7599389490515298, "grad_norm": 0.851696252822876, "learning_rate": 4.3808015525814254e-05, "loss": 0.1237, "step": 48430 }, { "epoch": 1.7603023475543282, "grad_norm": 0.6289827823638916, "learning_rate": 4.380459641243177e-05, "loss": 0.1126, "step": 48440 }, { "epoch": 1.7606657460571262, "grad_norm": 0.9123024940490723, "learning_rate": 4.380117648882614e-05, "loss": 0.1588, "step": 48450 }, { "epoch": 1.7610291445599244, "grad_norm": 0.9411369562149048, "learning_rate": 4.379775575514471e-05, "loss": 0.1505, "step": 48460 }, { "epoch": 1.7613925430627226, "grad_norm": 0.7461705207824707, "learning_rate": 4.379433421153486e-05, "loss": 0.1404, "step": 48470 }, { "epoch": 1.7617559415655206, "grad_norm": 3.351199150085449, "learning_rate": 4.3790911858144025e-05, "loss": 0.1126, "step": 48480 }, { "epoch": 1.762119340068319, "grad_norm": 0.8594498634338379, "learning_rate": 4.378748869511965e-05, "loss": 0.0944, "step": 48490 }, { "epoch": 1.762482738571117, "grad_norm": 8.456245422363281, "learning_rate": 4.378406472260924e-05, "loss": 0.1368, "step": 48500 }, { "epoch": 1.7628461370739152, "grad_norm": 2.1628379821777344, "learning_rate": 4.3780639940760306e-05, "loss": 0.1052, "step": 48510 }, { "epoch": 1.7632095355767134, "grad_norm": 0.7315357327461243, "learning_rate": 4.377721434972043e-05, "loss": 0.1247, "step": 48520 }, { "epoch": 1.7635729340795114, "grad_norm": 0.8521216511726379, "learning_rate": 4.377378794963719e-05, "loss": 4.0948, "step": 48530 }, { "epoch": 1.7639363325823099, "grad_norm": 1.6623167991638184, "learning_rate": 4.377036074065823e-05, "loss": 0.1117, "step": 48540 }, { "epoch": 1.7642997310851078, "grad_norm": 0.8901768326759338, "learning_rate": 4.3766932722931206e-05, "loss": 0.1484, "step": 48550 }, { "epoch": 1.764663129587906, "grad_norm": 1.522369623184204, "learning_rate": 4.3763503896603826e-05, "loss": 0.1206, "step": 48560 }, { "epoch": 1.7650265280907043, "grad_norm": 0.5716699957847595, "learning_rate": 4.3760074261823824e-05, "loss": 0.1744, "step": 48570 }, { "epoch": 1.7653899265935025, "grad_norm": 2.5722410678863525, "learning_rate": 4.375664381873896e-05, "loss": 0.1224, "step": 48580 }, { "epoch": 1.7657533250963007, "grad_norm": 2.1870505809783936, "learning_rate": 4.3753212567497065e-05, "loss": 0.1261, "step": 48590 }, { "epoch": 1.7661167235990987, "grad_norm": 0.8871011137962341, "learning_rate": 4.374978050824596e-05, "loss": 0.1399, "step": 48600 }, { "epoch": 1.7661167235990987, "eval_loss": 0.3405693769454956, "eval_runtime": 179.6168, "eval_samples_per_second": 41.277, "eval_steps_per_second": 5.161, "eval_wer": 0.16414035979450686, "step": 48600 }, { "epoch": 1.766480122101897, "grad_norm": 1.1147382259368896, "learning_rate": 4.374634764113352e-05, "loss": 0.1198, "step": 48610 }, { "epoch": 1.766843520604695, "grad_norm": 0.48618343472480774, "learning_rate": 4.374291396630767e-05, "loss": 0.199, "step": 48620 }, { "epoch": 1.7672069191074933, "grad_norm": 0.9088806509971619, "learning_rate": 4.373947948391633e-05, "loss": 0.1168, "step": 48630 }, { "epoch": 1.7675703176102915, "grad_norm": 0.4064035415649414, "learning_rate": 4.373604419410751e-05, "loss": 0.0935, "step": 48640 }, { "epoch": 1.7679337161130895, "grad_norm": 0.7718721032142639, "learning_rate": 4.373260809702921e-05, "loss": 0.1229, "step": 48650 }, { "epoch": 1.768297114615888, "grad_norm": 0.9764898419380188, "learning_rate": 4.3729171192829465e-05, "loss": 0.1043, "step": 48660 }, { "epoch": 1.7686605131186859, "grad_norm": 1.2039941549301147, "learning_rate": 4.372573348165638e-05, "loss": 0.142, "step": 48670 }, { "epoch": 1.769023911621484, "grad_norm": 0.5939382314682007, "learning_rate": 4.3722294963658064e-05, "loss": 0.1373, "step": 48680 }, { "epoch": 1.7693873101242823, "grad_norm": 1.034637451171875, "learning_rate": 4.3718855638982664e-05, "loss": 0.114, "step": 48690 }, { "epoch": 1.7697507086270805, "grad_norm": 1.1438782215118408, "learning_rate": 4.371541550777838e-05, "loss": 0.123, "step": 48700 }, { "epoch": 1.7701141071298787, "grad_norm": 0.8836175799369812, "learning_rate": 4.3711974570193435e-05, "loss": 0.1159, "step": 48710 }, { "epoch": 1.7704775056326767, "grad_norm": 0.4500117897987366, "learning_rate": 4.370853282637609e-05, "loss": 0.157, "step": 48720 }, { "epoch": 1.7708409041354751, "grad_norm": 0.8643542528152466, "learning_rate": 4.370509027647462e-05, "loss": 0.4179, "step": 48730 }, { "epoch": 1.771204302638273, "grad_norm": 1.6022706031799316, "learning_rate": 4.370164692063737e-05, "loss": 0.1222, "step": 48740 }, { "epoch": 1.7715677011410713, "grad_norm": 2.06821870803833, "learning_rate": 4.3698202759012685e-05, "loss": 0.1353, "step": 48750 }, { "epoch": 1.7719310996438695, "grad_norm": 0.6547145843505859, "learning_rate": 4.369475779174898e-05, "loss": 0.1053, "step": 48760 }, { "epoch": 1.7722944981466675, "grad_norm": 1.154436707496643, "learning_rate": 4.369131201899468e-05, "loss": 0.1642, "step": 48770 }, { "epoch": 1.772657896649466, "grad_norm": 1.4460755586624146, "learning_rate": 4.3687865440898243e-05, "loss": 0.1194, "step": 48780 }, { "epoch": 1.773021295152264, "grad_norm": 0.8164231777191162, "learning_rate": 4.368441805760818e-05, "loss": 0.1205, "step": 48790 }, { "epoch": 1.7733846936550621, "grad_norm": 3.0235085487365723, "learning_rate": 4.3680969869273016e-05, "loss": 0.2118, "step": 48800 }, { "epoch": 1.7737480921578603, "grad_norm": 1.397639513015747, "learning_rate": 4.367752087604134e-05, "loss": 0.1163, "step": 48810 }, { "epoch": 1.7741114906606583, "grad_norm": 0.5514954328536987, "learning_rate": 4.3674071078061726e-05, "loss": 0.1373, "step": 48820 }, { "epoch": 1.7744748891634567, "grad_norm": 1.3183518648147583, "learning_rate": 4.3670620475482836e-05, "loss": 0.1059, "step": 48830 }, { "epoch": 1.7748382876662547, "grad_norm": 0.6846873164176941, "learning_rate": 4.366716906845335e-05, "loss": 0.1139, "step": 48840 }, { "epoch": 1.775201686169053, "grad_norm": 1.2583421468734741, "learning_rate": 4.366371685712196e-05, "loss": 0.1248, "step": 48850 }, { "epoch": 1.7755650846718511, "grad_norm": 0.7057945728302002, "learning_rate": 4.366026384163742e-05, "loss": 0.1089, "step": 48860 }, { "epoch": 1.7759284831746494, "grad_norm": 1.1777584552764893, "learning_rate": 4.36568100221485e-05, "loss": 0.1842, "step": 48870 }, { "epoch": 1.7762918816774476, "grad_norm": 0.8768916726112366, "learning_rate": 4.3653355398804025e-05, "loss": 0.1095, "step": 48880 }, { "epoch": 1.7766552801802455, "grad_norm": 1.5699349641799927, "learning_rate": 4.364989997175283e-05, "loss": 0.0982, "step": 48890 }, { "epoch": 1.777018678683044, "grad_norm": 1.1270577907562256, "learning_rate": 4.36464437411438e-05, "loss": 0.1329, "step": 48900 }, { "epoch": 1.777382077185842, "grad_norm": 0.545153021812439, "learning_rate": 4.3642986707125856e-05, "loss": 0.1169, "step": 48910 }, { "epoch": 1.7777454756886402, "grad_norm": 1.2134042978286743, "learning_rate": 4.363952886984795e-05, "loss": 0.1704, "step": 48920 }, { "epoch": 1.7781088741914384, "grad_norm": 1.079684853553772, "learning_rate": 4.3636070229459055e-05, "loss": 0.1077, "step": 48930 }, { "epoch": 1.7784722726942364, "grad_norm": 0.6559361815452576, "learning_rate": 4.3632610786108205e-05, "loss": 0.1044, "step": 48940 }, { "epoch": 1.7788356711970348, "grad_norm": 0.9042558670043945, "learning_rate": 4.3629150539944454e-05, "loss": 0.8377, "step": 48950 }, { "epoch": 1.7791990696998328, "grad_norm": 2.102360725402832, "learning_rate": 4.362568949111689e-05, "loss": 0.1407, "step": 48960 }, { "epoch": 1.779562468202631, "grad_norm": 2.581956148147583, "learning_rate": 4.3622227639774635e-05, "loss": 0.182, "step": 48970 }, { "epoch": 1.7799258667054292, "grad_norm": 0.9113497138023376, "learning_rate": 4.361876498606685e-05, "loss": 0.0965, "step": 48980 }, { "epoch": 1.7802892652082274, "grad_norm": 2.428302049636841, "learning_rate": 4.361530153014273e-05, "loss": 0.1093, "step": 48990 }, { "epoch": 1.7806526637110256, "grad_norm": 1.9562546014785767, "learning_rate": 4.361183727215149e-05, "loss": 0.1437, "step": 49000 }, { "epoch": 1.7810160622138236, "grad_norm": 0.7445639967918396, "learning_rate": 4.360837221224241e-05, "loss": 0.1037, "step": 49010 }, { "epoch": 1.781379460716622, "grad_norm": 0.9966205954551697, "learning_rate": 4.360490635056478e-05, "loss": 0.1649, "step": 49020 }, { "epoch": 1.78174285921942, "grad_norm": 1.8854800462722778, "learning_rate": 4.360143968726793e-05, "loss": 0.1139, "step": 49030 }, { "epoch": 1.7821062577222182, "grad_norm": 0.5688827037811279, "learning_rate": 4.3597972222501225e-05, "loss": 0.1134, "step": 49040 }, { "epoch": 1.7824696562250164, "grad_norm": 0.7284519076347351, "learning_rate": 4.359450395641408e-05, "loss": 0.1636, "step": 49050 }, { "epoch": 1.7828330547278144, "grad_norm": 0.7459525465965271, "learning_rate": 4.359103488915591e-05, "loss": 0.1282, "step": 49060 }, { "epoch": 1.7831964532306128, "grad_norm": 0.3692184090614319, "learning_rate": 4.35875650208762e-05, "loss": 0.1491, "step": 49070 }, { "epoch": 1.7835598517334108, "grad_norm": 1.8872483968734741, "learning_rate": 4.358409435172443e-05, "loss": 0.1701, "step": 49080 }, { "epoch": 1.783923250236209, "grad_norm": 2.4084055423736572, "learning_rate": 4.358062288185018e-05, "loss": 0.1038, "step": 49090 }, { "epoch": 1.7842866487390072, "grad_norm": 1.3348972797393799, "learning_rate": 4.357715061140299e-05, "loss": 0.1723, "step": 49100 }, { "epoch": 1.7846500472418052, "grad_norm": 1.2129530906677246, "learning_rate": 4.357367754053248e-05, "loss": 0.1127, "step": 49110 }, { "epoch": 1.7850134457446036, "grad_norm": 0.3813287615776062, "learning_rate": 4.3570203669388285e-05, "loss": 0.1193, "step": 49120 }, { "epoch": 1.7853768442474016, "grad_norm": 0.5162807703018188, "learning_rate": 4.356672899812009e-05, "loss": 0.1023, "step": 49130 }, { "epoch": 1.7857402427501998, "grad_norm": 1.0435444116592407, "learning_rate": 4.356325352687761e-05, "loss": 0.0866, "step": 49140 }, { "epoch": 1.786103641252998, "grad_norm": 1.3136024475097656, "learning_rate": 4.355977725581058e-05, "loss": 0.1726, "step": 49150 }, { "epoch": 1.7864670397557962, "grad_norm": 3.7956295013427734, "learning_rate": 4.355630018506878e-05, "loss": 0.1215, "step": 49160 }, { "epoch": 1.7868304382585944, "grad_norm": 0.8079971075057983, "learning_rate": 4.3552822314802025e-05, "loss": 0.1389, "step": 49170 }, { "epoch": 1.7871938367613924, "grad_norm": 0.7602683901786804, "learning_rate": 4.354934364516018e-05, "loss": 0.1433, "step": 49180 }, { "epoch": 1.7875572352641909, "grad_norm": 1.5899375677108765, "learning_rate": 4.3545864176293104e-05, "loss": 0.1194, "step": 49190 }, { "epoch": 1.7879206337669888, "grad_norm": 57.75679016113281, "learning_rate": 4.354238390835073e-05, "loss": 0.4817, "step": 49200 }, { "epoch": 1.7879206337669888, "eval_loss": 0.3314359784126282, "eval_runtime": 180.9298, "eval_samples_per_second": 40.977, "eval_steps_per_second": 5.124, "eval_wer": 0.17101130938333908, "step": 49200 }, { "epoch": 1.788284032269787, "grad_norm": 0.689213216304779, "learning_rate": 4.353890284148301e-05, "loss": 0.1062, "step": 49210 }, { "epoch": 1.7886474307725853, "grad_norm": 0.43430793285369873, "learning_rate": 4.3535420975839924e-05, "loss": 0.1184, "step": 49220 }, { "epoch": 1.7890108292753832, "grad_norm": 0.5645721554756165, "learning_rate": 4.353193831157151e-05, "loss": 0.5598, "step": 49230 }, { "epoch": 1.7893742277781817, "grad_norm": 0.9227817058563232, "learning_rate": 4.352845484882779e-05, "loss": 0.1123, "step": 49240 }, { "epoch": 1.7897376262809797, "grad_norm": 1.031924843788147, "learning_rate": 4.35249705877589e-05, "loss": 0.1266, "step": 49250 }, { "epoch": 1.7901010247837779, "grad_norm": 0.7044590711593628, "learning_rate": 4.3521485528514914e-05, "loss": 0.1203, "step": 49260 }, { "epoch": 1.790464423286576, "grad_norm": 0.669763445854187, "learning_rate": 4.3517999671246034e-05, "loss": 0.1206, "step": 49270 }, { "epoch": 1.7908278217893743, "grad_norm": 1.149156928062439, "learning_rate": 4.351451301610243e-05, "loss": 0.1309, "step": 49280 }, { "epoch": 1.7911912202921725, "grad_norm": 0.42814984917640686, "learning_rate": 4.3511025563234334e-05, "loss": 0.6053, "step": 49290 }, { "epoch": 1.7915546187949705, "grad_norm": 1.2521979808807373, "learning_rate": 4.350753731279201e-05, "loss": 0.1397, "step": 49300 }, { "epoch": 1.791918017297769, "grad_norm": 1.2080492973327637, "learning_rate": 4.3504048264925756e-05, "loss": 0.1416, "step": 49310 }, { "epoch": 1.7922814158005669, "grad_norm": 0.479490727186203, "learning_rate": 4.3500558419785897e-05, "loss": 0.1247, "step": 49320 }, { "epoch": 1.792644814303365, "grad_norm": 0.6005672216415405, "learning_rate": 4.349706777752279e-05, "loss": 0.1049, "step": 49330 }, { "epoch": 1.7930082128061633, "grad_norm": 0.7790218591690063, "learning_rate": 4.349357633828687e-05, "loss": 0.111, "step": 49340 }, { "epoch": 1.7933716113089613, "grad_norm": 0.9393801689147949, "learning_rate": 4.3490084102228523e-05, "loss": 0.1228, "step": 49350 }, { "epoch": 1.7937350098117597, "grad_norm": 6.127364158630371, "learning_rate": 4.348659106949825e-05, "loss": 0.1369, "step": 49360 }, { "epoch": 1.7940984083145577, "grad_norm": 0.746756911277771, "learning_rate": 4.3483097240246546e-05, "loss": 0.1515, "step": 49370 }, { "epoch": 1.794461806817356, "grad_norm": 0.8121843934059143, "learning_rate": 4.347960261462394e-05, "loss": 0.1007, "step": 49380 }, { "epoch": 1.794825205320154, "grad_norm": 1.695778250694275, "learning_rate": 4.347610719278101e-05, "loss": 0.1302, "step": 49390 }, { "epoch": 1.795188603822952, "grad_norm": 0.9138917922973633, "learning_rate": 4.3472610974868354e-05, "loss": 0.1758, "step": 49400 }, { "epoch": 1.7955520023257505, "grad_norm": 0.6245046257972717, "learning_rate": 4.3469113961036625e-05, "loss": 0.1269, "step": 49410 }, { "epoch": 1.7959154008285485, "grad_norm": 0.4156048595905304, "learning_rate": 4.3465616151436484e-05, "loss": 0.1782, "step": 49420 }, { "epoch": 1.7962787993313467, "grad_norm": 0.7024033069610596, "learning_rate": 4.346211754621865e-05, "loss": 0.1817, "step": 49430 }, { "epoch": 1.796642197834145, "grad_norm": 1.7370237112045288, "learning_rate": 4.345861814553385e-05, "loss": 0.1453, "step": 49440 }, { "epoch": 1.7970055963369431, "grad_norm": 2.4251365661621094, "learning_rate": 4.3455117949532875e-05, "loss": 1.928, "step": 49450 }, { "epoch": 1.7973689948397413, "grad_norm": 0.7227508425712585, "learning_rate": 4.3451616958366524e-05, "loss": 0.1225, "step": 49460 }, { "epoch": 1.7977323933425393, "grad_norm": 0.27948006987571716, "learning_rate": 4.344811517218566e-05, "loss": 0.1819, "step": 49470 }, { "epoch": 1.7980957918453377, "grad_norm": 0.583686888217926, "learning_rate": 4.344461259114116e-05, "loss": 0.1155, "step": 49480 }, { "epoch": 1.7984591903481357, "grad_norm": 0.7126079797744751, "learning_rate": 4.344110921538391e-05, "loss": 0.1466, "step": 49490 }, { "epoch": 1.798822588850934, "grad_norm": 1.089173674583435, "learning_rate": 4.343760504506488e-05, "loss": 0.142, "step": 49500 }, { "epoch": 1.7991859873537321, "grad_norm": 0.950932502746582, "learning_rate": 4.343410008033506e-05, "loss": 0.1036, "step": 49510 }, { "epoch": 1.7995493858565301, "grad_norm": 0.6006519794464111, "learning_rate": 4.343059432134545e-05, "loss": 0.2374, "step": 49520 }, { "epoch": 1.7999127843593286, "grad_norm": 2.3363699913024902, "learning_rate": 4.342708776824711e-05, "loss": 0.1084, "step": 49530 }, { "epoch": 1.8002761828621265, "grad_norm": 0.5308919548988342, "learning_rate": 4.342358042119111e-05, "loss": 0.1194, "step": 49540 }, { "epoch": 1.8006395813649247, "grad_norm": 0.5925958752632141, "learning_rate": 4.3420072280328594e-05, "loss": 0.1374, "step": 49550 }, { "epoch": 1.801002979867723, "grad_norm": 0.9156503081321716, "learning_rate": 4.34165633458107e-05, "loss": 0.1201, "step": 49560 }, { "epoch": 1.8013663783705212, "grad_norm": 1.0838543176651, "learning_rate": 4.341305361778862e-05, "loss": 0.1442, "step": 49570 }, { "epoch": 1.8017297768733194, "grad_norm": 0.5390272736549377, "learning_rate": 4.340954309641357e-05, "loss": 0.1318, "step": 49580 }, { "epoch": 1.8020931753761174, "grad_norm": 1.546189785003662, "learning_rate": 4.340603178183681e-05, "loss": 0.1106, "step": 49590 }, { "epoch": 1.8024565738789158, "grad_norm": 0.6207401752471924, "learning_rate": 4.340251967420963e-05, "loss": 0.1309, "step": 49600 }, { "epoch": 1.8028199723817138, "grad_norm": 0.7735322713851929, "learning_rate": 4.339900677368335e-05, "loss": 0.1116, "step": 49610 }, { "epoch": 1.803183370884512, "grad_norm": 0.30109134316444397, "learning_rate": 4.3395493080409335e-05, "loss": 0.1424, "step": 49620 }, { "epoch": 1.8035467693873102, "grad_norm": 0.9261472225189209, "learning_rate": 4.339197859453897e-05, "loss": 0.1243, "step": 49630 }, { "epoch": 1.8039101678901082, "grad_norm": 1.4092273712158203, "learning_rate": 4.3388463316223696e-05, "loss": 0.0978, "step": 49640 }, { "epoch": 1.8042735663929066, "grad_norm": 1.4663946628570557, "learning_rate": 4.338494724561496e-05, "loss": 0.1159, "step": 49650 }, { "epoch": 1.8046369648957046, "grad_norm": 0.9966018795967102, "learning_rate": 4.338143038286425e-05, "loss": 0.1606, "step": 49660 }, { "epoch": 1.8050003633985028, "grad_norm": 1.0136394500732422, "learning_rate": 4.3377912728123124e-05, "loss": 0.147, "step": 49670 }, { "epoch": 1.805363761901301, "grad_norm": 0.647540807723999, "learning_rate": 4.337439428154312e-05, "loss": 3.8271, "step": 49680 }, { "epoch": 1.805727160404099, "grad_norm": 0.9579293131828308, "learning_rate": 4.3370875043275835e-05, "loss": 0.1021, "step": 49690 }, { "epoch": 1.8060905589068974, "grad_norm": 0.9937068819999695, "learning_rate": 4.3367355013472924e-05, "loss": 0.1295, "step": 49700 }, { "epoch": 1.8064539574096954, "grad_norm": 0.7757039070129395, "learning_rate": 4.3363834192286026e-05, "loss": 0.1347, "step": 49710 }, { "epoch": 1.8068173559124936, "grad_norm": 0.4056714177131653, "learning_rate": 4.336031257986685e-05, "loss": 0.1225, "step": 49720 }, { "epoch": 1.8071807544152918, "grad_norm": 0.7865206003189087, "learning_rate": 4.335679017636714e-05, "loss": 0.1149, "step": 49730 }, { "epoch": 1.80754415291809, "grad_norm": 0.8593509793281555, "learning_rate": 4.335326698193864e-05, "loss": 0.2783, "step": 49740 }, { "epoch": 1.8079075514208882, "grad_norm": 1.0015538930892944, "learning_rate": 4.334974299673318e-05, "loss": 0.1143, "step": 49750 }, { "epoch": 1.8082709499236862, "grad_norm": 0.6025313138961792, "learning_rate": 4.334621822090258e-05, "loss": 0.0951, "step": 49760 }, { "epoch": 1.8086343484264846, "grad_norm": 0.5842722058296204, "learning_rate": 4.33426926545987e-05, "loss": 0.1468, "step": 49770 }, { "epoch": 1.8089977469292826, "grad_norm": 0.5118249654769897, "learning_rate": 4.333916629797348e-05, "loss": 0.1095, "step": 49780 }, { "epoch": 1.8093611454320808, "grad_norm": 2.118723154067993, "learning_rate": 4.333563915117882e-05, "loss": 0.1106, "step": 49790 }, { "epoch": 1.809724543934879, "grad_norm": 0.7443241477012634, "learning_rate": 4.3332111214366714e-05, "loss": 0.1103, "step": 49800 }, { "epoch": 1.809724543934879, "eval_loss": 0.3337153196334839, "eval_runtime": 180.2527, "eval_samples_per_second": 41.131, "eval_steps_per_second": 5.143, "eval_wer": 0.1598834570769873, "step": 49800 }, { "epoch": 1.810087942437677, "grad_norm": 1.2676368951797485, "learning_rate": 4.332858248768916e-05, "loss": 0.1016, "step": 49810 }, { "epoch": 1.8104513409404754, "grad_norm": 2.088113307952881, "learning_rate": 4.3325052971298195e-05, "loss": 0.1931, "step": 49820 }, { "epoch": 1.8108147394432734, "grad_norm": 0.6147329807281494, "learning_rate": 4.332152266534591e-05, "loss": 0.0989, "step": 49830 }, { "epoch": 1.8111781379460716, "grad_norm": 1.3880411386489868, "learning_rate": 4.3317991569984384e-05, "loss": 0.1297, "step": 49840 }, { "epoch": 1.8115415364488698, "grad_norm": 1.794396996498108, "learning_rate": 4.331445968536579e-05, "loss": 0.1191, "step": 49850 }, { "epoch": 1.811904934951668, "grad_norm": 1.7588627338409424, "learning_rate": 4.331092701164229e-05, "loss": 0.1594, "step": 49860 }, { "epoch": 1.8122683334544663, "grad_norm": 0.9384113550186157, "learning_rate": 4.330739354896609e-05, "loss": 0.1634, "step": 49870 }, { "epoch": 1.8126317319572642, "grad_norm": 0.5903241634368896, "learning_rate": 4.330385929748945e-05, "loss": 0.1436, "step": 49880 }, { "epoch": 1.8129951304600627, "grad_norm": 0.6767405271530151, "learning_rate": 4.330032425736462e-05, "loss": 0.1139, "step": 49890 }, { "epoch": 1.8133585289628606, "grad_norm": 0.8860883116722107, "learning_rate": 4.329678842874395e-05, "loss": 0.1279, "step": 49900 }, { "epoch": 1.8137219274656589, "grad_norm": 58.24811553955078, "learning_rate": 4.3293251811779755e-05, "loss": 0.5721, "step": 49910 }, { "epoch": 1.814085325968457, "grad_norm": 0.965320348739624, "learning_rate": 4.328971440662443e-05, "loss": 0.1297, "step": 49920 }, { "epoch": 1.814448724471255, "grad_norm": 0.5799686908721924, "learning_rate": 4.328617621343039e-05, "loss": 0.1166, "step": 49930 }, { "epoch": 1.8148121229740535, "grad_norm": 2.6377480030059814, "learning_rate": 4.3282637232350074e-05, "loss": 0.0991, "step": 49940 }, { "epoch": 1.8151755214768515, "grad_norm": 0.9475420117378235, "learning_rate": 4.327909746353597e-05, "loss": 0.119, "step": 49950 }, { "epoch": 1.8155389199796497, "grad_norm": 0.36662977933883667, "learning_rate": 4.32755569071406e-05, "loss": 0.1224, "step": 49960 }, { "epoch": 1.8159023184824479, "grad_norm": 1.361423373222351, "learning_rate": 4.3272015563316506e-05, "loss": 0.1463, "step": 49970 }, { "epoch": 1.8162657169852459, "grad_norm": 1.3705862760543823, "learning_rate": 4.326847343221627e-05, "loss": 0.9571, "step": 49980 }, { "epoch": 1.8166291154880443, "grad_norm": 1.0312581062316895, "learning_rate": 4.326493051399251e-05, "loss": 0.1055, "step": 49990 }, { "epoch": 1.8169925139908423, "grad_norm": 1.8485617637634277, "learning_rate": 4.326138680879789e-05, "loss": 0.1521, "step": 50000 }, { "epoch": 1.8173559124936405, "grad_norm": 0.8536475300788879, "learning_rate": 4.325784231678508e-05, "loss": 0.1067, "step": 50010 }, { "epoch": 1.8177193109964387, "grad_norm": 0.8633929491043091, "learning_rate": 4.325429703810681e-05, "loss": 0.1326, "step": 50020 }, { "epoch": 1.818082709499237, "grad_norm": 0.9228955507278442, "learning_rate": 4.325075097291582e-05, "loss": 0.1123, "step": 50030 }, { "epoch": 1.818446108002035, "grad_norm": 0.6627784967422485, "learning_rate": 4.324720412136491e-05, "loss": 0.1049, "step": 50040 }, { "epoch": 1.818809506504833, "grad_norm": 2.006086826324463, "learning_rate": 4.324365648360691e-05, "loss": 0.1699, "step": 50050 }, { "epoch": 1.8191729050076315, "grad_norm": 0.8955428600311279, "learning_rate": 4.3240108059794646e-05, "loss": 0.1357, "step": 50060 }, { "epoch": 1.8195363035104295, "grad_norm": 164.89707946777344, "learning_rate": 4.3236558850081036e-05, "loss": 3.7378, "step": 50070 }, { "epoch": 1.8198997020132277, "grad_norm": 1.0500569343566895, "learning_rate": 4.3233008854618994e-05, "loss": 0.3275, "step": 50080 }, { "epoch": 1.820263100516026, "grad_norm": 23.444902420043945, "learning_rate": 4.3229458073561466e-05, "loss": 0.1081, "step": 50090 }, { "epoch": 1.820626499018824, "grad_norm": 2.446769952774048, "learning_rate": 4.322590650706145e-05, "loss": 0.1738, "step": 50100 }, { "epoch": 1.8209898975216223, "grad_norm": 4.417498588562012, "learning_rate": 4.322235415527198e-05, "loss": 0.1196, "step": 50110 }, { "epoch": 1.8213532960244203, "grad_norm": 0.8139522671699524, "learning_rate": 4.321880101834609e-05, "loss": 0.1339, "step": 50120 }, { "epoch": 1.8217166945272185, "grad_norm": 0.6750831007957458, "learning_rate": 4.32152470964369e-05, "loss": 0.1169, "step": 50130 }, { "epoch": 1.8220800930300167, "grad_norm": 0.6926230192184448, "learning_rate": 4.3211692389697514e-05, "loss": 0.1192, "step": 50140 }, { "epoch": 1.822443491532815, "grad_norm": 0.5890200138092041, "learning_rate": 4.3208136898281106e-05, "loss": 0.1431, "step": 50150 }, { "epoch": 1.8228068900356131, "grad_norm": 1.0174696445465088, "learning_rate": 4.3204580622340865e-05, "loss": 0.7309, "step": 50160 }, { "epoch": 1.8231702885384111, "grad_norm": 0.9049250483512878, "learning_rate": 4.320102356203001e-05, "loss": 0.1451, "step": 50170 }, { "epoch": 1.8235336870412096, "grad_norm": 0.7444465160369873, "learning_rate": 4.3197465717501815e-05, "loss": 0.1111, "step": 50180 }, { "epoch": 1.8238970855440075, "grad_norm": 1.0209647417068481, "learning_rate": 4.319390708890957e-05, "loss": 0.0911, "step": 50190 }, { "epoch": 1.8242604840468057, "grad_norm": 0.7396380305290222, "learning_rate": 4.31903476764066e-05, "loss": 0.1367, "step": 50200 }, { "epoch": 1.824623882549604, "grad_norm": 0.7910483479499817, "learning_rate": 4.318678748014626e-05, "loss": 0.0978, "step": 50210 }, { "epoch": 1.824987281052402, "grad_norm": 0.5519534349441528, "learning_rate": 4.318322650028197e-05, "loss": 0.1502, "step": 50220 }, { "epoch": 1.8253506795552004, "grad_norm": 0.7698003053665161, "learning_rate": 4.317966473696714e-05, "loss": 0.1257, "step": 50230 }, { "epoch": 1.8257140780579983, "grad_norm": 0.6308223605155945, "learning_rate": 4.3176102190355246e-05, "loss": 0.1048, "step": 50240 }, { "epoch": 1.8260774765607966, "grad_norm": 0.7210573554039001, "learning_rate": 4.317253886059978e-05, "loss": 2.1908, "step": 50250 }, { "epoch": 1.8264408750635948, "grad_norm": 1.0156967639923096, "learning_rate": 4.316897474785426e-05, "loss": 0.1612, "step": 50260 }, { "epoch": 1.8268042735663927, "grad_norm": 1.366592288017273, "learning_rate": 4.3165409852272276e-05, "loss": 0.1524, "step": 50270 }, { "epoch": 1.8271676720691912, "grad_norm": 1.1312644481658936, "learning_rate": 4.3161844174007406e-05, "loss": 0.1118, "step": 50280 }, { "epoch": 1.8275310705719892, "grad_norm": 0.3259322941303253, "learning_rate": 4.3158277713213295e-05, "loss": 0.1043, "step": 50290 }, { "epoch": 1.8278944690747874, "grad_norm": 0.8572397828102112, "learning_rate": 4.31547104700436e-05, "loss": 0.1346, "step": 50300 }, { "epoch": 1.8282578675775856, "grad_norm": 1.2105157375335693, "learning_rate": 4.3151142444652035e-05, "loss": 0.1135, "step": 50310 }, { "epoch": 1.8286212660803838, "grad_norm": 0.6126898527145386, "learning_rate": 4.314757363719232e-05, "loss": 0.1719, "step": 50320 }, { "epoch": 1.828984664583182, "grad_norm": 0.7111977338790894, "learning_rate": 4.314400404781822e-05, "loss": 0.1089, "step": 50330 }, { "epoch": 1.82934806308598, "grad_norm": 1.016365647315979, "learning_rate": 4.314043367668355e-05, "loss": 0.0869, "step": 50340 }, { "epoch": 1.8297114615887784, "grad_norm": 1.1696245670318604, "learning_rate": 4.3136862523942136e-05, "loss": 0.1241, "step": 50350 }, { "epoch": 1.8300748600915764, "grad_norm": 1.2339487075805664, "learning_rate": 4.313329058974784e-05, "loss": 0.1202, "step": 50360 }, { "epoch": 1.8304382585943746, "grad_norm": 1.6147994995117188, "learning_rate": 4.312971787425458e-05, "loss": 0.1502, "step": 50370 }, { "epoch": 1.8308016570971728, "grad_norm": 0.8028876781463623, "learning_rate": 4.312614437761628e-05, "loss": 0.1153, "step": 50380 }, { "epoch": 1.8311650555999708, "grad_norm": 0.6366049647331238, "learning_rate": 4.312257009998691e-05, "loss": 0.1367, "step": 50390 }, { "epoch": 1.8315284541027692, "grad_norm": 1.6343673467636108, "learning_rate": 4.311899504152047e-05, "loss": 0.1502, "step": 50400 }, { "epoch": 1.8315284541027692, "eval_loss": 0.34016337990760803, "eval_runtime": 180.2837, "eval_samples_per_second": 41.124, "eval_steps_per_second": 5.142, "eval_wer": 0.16422204875923538, "step": 50400 }, { "epoch": 1.8318918526055672, "grad_norm": 0.5833923816680908, "learning_rate": 4.311541920237101e-05, "loss": 0.1262, "step": 50410 }, { "epoch": 1.8322552511083654, "grad_norm": 0.5683671832084656, "learning_rate": 4.311184258269258e-05, "loss": 0.1626, "step": 50420 }, { "epoch": 1.8326186496111636, "grad_norm": 2.4269814491271973, "learning_rate": 4.3108265182639304e-05, "loss": 0.1293, "step": 50430 }, { "epoch": 1.8329820481139618, "grad_norm": 0.8372895121574402, "learning_rate": 4.310468700236532e-05, "loss": 0.1004, "step": 50440 }, { "epoch": 1.83334544661676, "grad_norm": 0.7744470238685608, "learning_rate": 4.3101108042024776e-05, "loss": 0.126, "step": 50450 }, { "epoch": 1.833708845119558, "grad_norm": 1.1404407024383545, "learning_rate": 4.3097528301771895e-05, "loss": 0.1267, "step": 50460 }, { "epoch": 1.8340722436223564, "grad_norm": 0.4898841381072998, "learning_rate": 4.30939477817609e-05, "loss": 0.1353, "step": 50470 }, { "epoch": 1.8344356421251544, "grad_norm": 0.949220597743988, "learning_rate": 4.3090366482146085e-05, "loss": 0.1188, "step": 50480 }, { "epoch": 1.8347990406279526, "grad_norm": 0.2803521156311035, "learning_rate": 4.308678440308175e-05, "loss": 0.0978, "step": 50490 }, { "epoch": 1.8351624391307508, "grad_norm": 1.3130167722702026, "learning_rate": 4.308320154472221e-05, "loss": 0.1228, "step": 50500 }, { "epoch": 1.8355258376335488, "grad_norm": 1.2615669965744019, "learning_rate": 4.307961790722187e-05, "loss": 0.1146, "step": 50510 }, { "epoch": 1.8358892361363472, "grad_norm": 1.296720266342163, "learning_rate": 4.307603349073512e-05, "loss": 0.1497, "step": 50520 }, { "epoch": 1.8362526346391452, "grad_norm": 0.7248766422271729, "learning_rate": 4.30724482954164e-05, "loss": 2.0, "step": 50530 }, { "epoch": 1.8366160331419434, "grad_norm": 0.9493942260742188, "learning_rate": 4.306886232142018e-05, "loss": 0.1092, "step": 50540 }, { "epoch": 1.8369794316447416, "grad_norm": 0.8784381151199341, "learning_rate": 4.306527556890097e-05, "loss": 0.1332, "step": 50550 }, { "epoch": 1.8373428301475396, "grad_norm": 1.1002815961837769, "learning_rate": 4.306168803801332e-05, "loss": 0.1166, "step": 50560 }, { "epoch": 1.837706228650338, "grad_norm": 1.0042141675949097, "learning_rate": 4.3058099728911795e-05, "loss": 0.1494, "step": 50570 }, { "epoch": 1.838069627153136, "grad_norm": 0.8974900841712952, "learning_rate": 4.305451064175101e-05, "loss": 0.1281, "step": 50580 }, { "epoch": 1.8384330256559342, "grad_norm": 6.133358001708984, "learning_rate": 4.3050920776685587e-05, "loss": 0.1136, "step": 50590 }, { "epoch": 1.8387964241587325, "grad_norm": 1.828365445137024, "learning_rate": 4.304733013387022e-05, "loss": 0.1634, "step": 50600 }, { "epoch": 1.8391598226615307, "grad_norm": 1.1756354570388794, "learning_rate": 4.3043738713459605e-05, "loss": 0.1039, "step": 50610 }, { "epoch": 1.8395232211643289, "grad_norm": 0.3747727870941162, "learning_rate": 4.304014651560849e-05, "loss": 0.1966, "step": 50620 }, { "epoch": 1.8398866196671269, "grad_norm": 2.3143324851989746, "learning_rate": 4.303655354047166e-05, "loss": 0.1294, "step": 50630 }, { "epoch": 1.8402500181699253, "grad_norm": 0.408648818731308, "learning_rate": 4.3032959788203894e-05, "loss": 0.1097, "step": 50640 }, { "epoch": 1.8406134166727233, "grad_norm": 2.8158206939697266, "learning_rate": 4.3029365258960065e-05, "loss": 0.1418, "step": 50650 }, { "epoch": 1.8409768151755215, "grad_norm": 1.1815099716186523, "learning_rate": 4.3025769952895036e-05, "loss": 0.1053, "step": 50660 }, { "epoch": 1.8413402136783197, "grad_norm": 0.5927807688713074, "learning_rate": 4.3022173870163705e-05, "loss": 0.1731, "step": 50670 }, { "epoch": 1.8417036121811177, "grad_norm": 0.945854663848877, "learning_rate": 4.301857701092103e-05, "loss": 0.1355, "step": 50680 }, { "epoch": 1.842067010683916, "grad_norm": 1.1901466846466064, "learning_rate": 4.301497937532199e-05, "loss": 0.1237, "step": 50690 }, { "epoch": 1.842430409186714, "grad_norm": 2.328880548477173, "learning_rate": 4.301138096352158e-05, "loss": 0.184, "step": 50700 }, { "epoch": 1.8427938076895123, "grad_norm": 1.652759075164795, "learning_rate": 4.3007781775674846e-05, "loss": 0.1085, "step": 50710 }, { "epoch": 1.8431572061923105, "grad_norm": 0.7879970669746399, "learning_rate": 4.3004181811936874e-05, "loss": 0.1291, "step": 50720 }, { "epoch": 1.8435206046951087, "grad_norm": 0.9201347827911377, "learning_rate": 4.3000581072462765e-05, "loss": 0.1069, "step": 50730 }, { "epoch": 1.843884003197907, "grad_norm": 0.4384756088256836, "learning_rate": 4.299697955740766e-05, "loss": 0.0966, "step": 50740 }, { "epoch": 1.844247401700705, "grad_norm": 1.022146463394165, "learning_rate": 4.299337726692674e-05, "loss": 0.4218, "step": 50750 }, { "epoch": 1.8446108002035033, "grad_norm": 1.298736810684204, "learning_rate": 4.298977420117521e-05, "loss": 0.1266, "step": 50760 }, { "epoch": 1.8449741987063013, "grad_norm": 0.3361910283565521, "learning_rate": 4.2986170360308324e-05, "loss": 0.1357, "step": 50770 }, { "epoch": 1.8453375972090995, "grad_norm": 1.3413841724395752, "learning_rate": 4.298256574448135e-05, "loss": 0.1116, "step": 50780 }, { "epoch": 1.8457009957118977, "grad_norm": 1.324555516242981, "learning_rate": 4.2978960353849606e-05, "loss": 0.1171, "step": 50790 }, { "epoch": 1.8460643942146957, "grad_norm": 2.6282169818878174, "learning_rate": 4.297535418856843e-05, "loss": 0.1341, "step": 50800 }, { "epoch": 1.8464277927174941, "grad_norm": 1.0654820203781128, "learning_rate": 4.297174724879319e-05, "loss": 0.1358, "step": 50810 }, { "epoch": 1.8467911912202921, "grad_norm": 0.45108261704444885, "learning_rate": 4.296813953467931e-05, "loss": 0.21, "step": 50820 }, { "epoch": 1.8471545897230903, "grad_norm": 0.4607618749141693, "learning_rate": 4.296453104638222e-05, "loss": 0.1004, "step": 50830 }, { "epoch": 1.8475179882258885, "grad_norm": 1.834341049194336, "learning_rate": 4.296092178405741e-05, "loss": 0.1092, "step": 50840 }, { "epoch": 1.8478813867286865, "grad_norm": 2.6476006507873535, "learning_rate": 4.295731174786039e-05, "loss": 0.1255, "step": 50850 }, { "epoch": 1.848244785231485, "grad_norm": 0.6069791316986084, "learning_rate": 4.2953700937946696e-05, "loss": 0.1166, "step": 50860 }, { "epoch": 1.848608183734283, "grad_norm": 0.9340389966964722, "learning_rate": 4.2950089354471915e-05, "loss": 0.1548, "step": 50870 }, { "epoch": 1.8489715822370811, "grad_norm": 1.0438172817230225, "learning_rate": 4.294647699759163e-05, "loss": 0.1996, "step": 50880 }, { "epoch": 1.8493349807398793, "grad_norm": 0.9623711109161377, "learning_rate": 4.294286386746152e-05, "loss": 0.11, "step": 50890 }, { "epoch": 1.8496983792426775, "grad_norm": 0.7084048986434937, "learning_rate": 4.2939249964237246e-05, "loss": 0.1411, "step": 50900 }, { "epoch": 1.8500617777454758, "grad_norm": 2.6798365116119385, "learning_rate": 4.293563528807453e-05, "loss": 0.1253, "step": 50910 }, { "epoch": 1.8504251762482737, "grad_norm": 1.17062246799469, "learning_rate": 4.2932019839129087e-05, "loss": 0.1676, "step": 50920 }, { "epoch": 1.8507885747510722, "grad_norm": 3.6233065128326416, "learning_rate": 4.2928403617556714e-05, "loss": 0.1128, "step": 50930 }, { "epoch": 1.8511519732538702, "grad_norm": 1.1630785465240479, "learning_rate": 4.2924786623513225e-05, "loss": 0.084, "step": 50940 }, { "epoch": 1.8515153717566684, "grad_norm": 1.654990553855896, "learning_rate": 4.292116885715446e-05, "loss": 0.1527, "step": 50950 }, { "epoch": 1.8518787702594666, "grad_norm": 1.1175593137741089, "learning_rate": 4.291755031863628e-05, "loss": 0.1132, "step": 50960 }, { "epoch": 1.8522421687622646, "grad_norm": 0.46370136737823486, "learning_rate": 4.291393100811462e-05, "loss": 0.1335, "step": 50970 }, { "epoch": 1.852605567265063, "grad_norm": 1.1095346212387085, "learning_rate": 4.2910310925745404e-05, "loss": 0.1024, "step": 50980 }, { "epoch": 1.852968965767861, "grad_norm": 1.7305604219436646, "learning_rate": 4.290669007168462e-05, "loss": 0.0964, "step": 50990 }, { "epoch": 1.8533323642706592, "grad_norm": 0.5683947801589966, "learning_rate": 4.290306844608827e-05, "loss": 0.1324, "step": 51000 }, { "epoch": 1.8533323642706592, "eval_loss": 0.35921338200569153, "eval_runtime": 179.6912, "eval_samples_per_second": 41.26, "eval_steps_per_second": 5.159, "eval_wer": 0.164648646686151, "step": 51000 }, { "epoch": 1.8536957627734574, "grad_norm": 0.417925089597702, "learning_rate": 4.289944604911239e-05, "loss": 0.156, "step": 51010 }, { "epoch": 1.8540591612762556, "grad_norm": 0.3907199501991272, "learning_rate": 4.2895822880913076e-05, "loss": 0.1322, "step": 51020 }, { "epoch": 1.8544225597790538, "grad_norm": 0.8322422504425049, "learning_rate": 4.2892198941646436e-05, "loss": 0.1278, "step": 51030 }, { "epoch": 1.8547859582818518, "grad_norm": 1.3560541868209839, "learning_rate": 4.2888574231468595e-05, "loss": 0.0933, "step": 51040 }, { "epoch": 1.8551493567846502, "grad_norm": 1.353043556213379, "learning_rate": 4.288494875053573e-05, "loss": 0.1185, "step": 51050 }, { "epoch": 1.8555127552874482, "grad_norm": 7.476738929748535, "learning_rate": 4.2881322499004076e-05, "loss": 0.127, "step": 51060 }, { "epoch": 1.8558761537902464, "grad_norm": 0.5096439719200134, "learning_rate": 4.2877695477029844e-05, "loss": 0.1768, "step": 51070 }, { "epoch": 1.8562395522930446, "grad_norm": 2.3075900077819824, "learning_rate": 4.2874067684769325e-05, "loss": 0.1288, "step": 51080 }, { "epoch": 1.8566029507958426, "grad_norm": 0.615508496761322, "learning_rate": 4.287043912237883e-05, "loss": 0.0903, "step": 51090 }, { "epoch": 1.856966349298641, "grad_norm": 2.022796154022217, "learning_rate": 4.286680979001469e-05, "loss": 0.1372, "step": 51100 }, { "epoch": 1.857329747801439, "grad_norm": 0.8577511310577393, "learning_rate": 4.28631796878333e-05, "loss": 0.1216, "step": 51110 }, { "epoch": 1.8576931463042372, "grad_norm": 0.5876504182815552, "learning_rate": 4.285954881599104e-05, "loss": 0.1672, "step": 51120 }, { "epoch": 1.8580565448070354, "grad_norm": 1.0467904806137085, "learning_rate": 4.2855917174644374e-05, "loss": 0.1175, "step": 51130 }, { "epoch": 1.8584199433098334, "grad_norm": 0.6273336410522461, "learning_rate": 4.285228476394977e-05, "loss": 0.1048, "step": 51140 }, { "epoch": 1.8587833418126318, "grad_norm": 0.7880851030349731, "learning_rate": 4.284865158406372e-05, "loss": 0.1418, "step": 51150 }, { "epoch": 1.8591467403154298, "grad_norm": 1.316256046295166, "learning_rate": 4.284501763514279e-05, "loss": 0.1373, "step": 51160 }, { "epoch": 1.859510138818228, "grad_norm": 0.46383532881736755, "learning_rate": 4.284138291734355e-05, "loss": 0.167, "step": 51170 }, { "epoch": 1.8598735373210262, "grad_norm": 0.7432321310043335, "learning_rate": 4.283774743082259e-05, "loss": 0.1534, "step": 51180 }, { "epoch": 1.8602369358238244, "grad_norm": 0.673897922039032, "learning_rate": 4.2834111175736555e-05, "loss": 0.1122, "step": 51190 }, { "epoch": 1.8606003343266226, "grad_norm": 0.7635305523872375, "learning_rate": 4.2830474152242136e-05, "loss": 0.1054, "step": 51200 }, { "epoch": 1.8609637328294206, "grad_norm": 1.281503677368164, "learning_rate": 4.282683636049602e-05, "loss": 0.1175, "step": 51210 }, { "epoch": 1.861327131332219, "grad_norm": 1.249510407447815, "learning_rate": 4.282319780065496e-05, "loss": 0.1624, "step": 51220 }, { "epoch": 1.861690529835017, "grad_norm": 1.7470375299453735, "learning_rate": 4.281955847287571e-05, "loss": 0.1173, "step": 51230 }, { "epoch": 1.8620539283378152, "grad_norm": 0.4860547184944153, "learning_rate": 4.2815918377315096e-05, "loss": 0.1109, "step": 51240 }, { "epoch": 1.8624173268406135, "grad_norm": 1.2206679582595825, "learning_rate": 4.281227751412995e-05, "loss": 0.139, "step": 51250 }, { "epoch": 1.8627807253434114, "grad_norm": 5.966470241546631, "learning_rate": 4.2808635883477134e-05, "loss": 0.0989, "step": 51260 }, { "epoch": 1.8631441238462099, "grad_norm": 0.6490101218223572, "learning_rate": 4.280499348551357e-05, "loss": 0.1876, "step": 51270 }, { "epoch": 1.8635075223490078, "grad_norm": 1.0723692178726196, "learning_rate": 4.280135032039618e-05, "loss": 0.1081, "step": 51280 }, { "epoch": 1.863870920851806, "grad_norm": 1.9690381288528442, "learning_rate": 4.2797706388281944e-05, "loss": 0.1112, "step": 51290 }, { "epoch": 1.8642343193546043, "grad_norm": 5.197057723999023, "learning_rate": 4.279406168932787e-05, "loss": 0.1374, "step": 51300 }, { "epoch": 1.8645977178574025, "grad_norm": 1.063194751739502, "learning_rate": 4.279041622369098e-05, "loss": 0.1221, "step": 51310 }, { "epoch": 1.8649611163602007, "grad_norm": 0.9989191889762878, "learning_rate": 4.2786769991528356e-05, "loss": 0.1602, "step": 51320 }, { "epoch": 1.8653245148629987, "grad_norm": 1.2675135135650635, "learning_rate": 4.278312299299711e-05, "loss": 0.1038, "step": 51330 }, { "epoch": 1.865687913365797, "grad_norm": 0.7109481692314148, "learning_rate": 4.277947522825435e-05, "loss": 0.1323, "step": 51340 }, { "epoch": 1.866051311868595, "grad_norm": 0.522361695766449, "learning_rate": 4.2775826697457265e-05, "loss": 0.1339, "step": 51350 }, { "epoch": 1.8664147103713933, "grad_norm": 0.6734838485717773, "learning_rate": 4.277217740076306e-05, "loss": 0.0991, "step": 51360 }, { "epoch": 1.8667781088741915, "grad_norm": 0.7802498936653137, "learning_rate": 4.276852733832897e-05, "loss": 0.1614, "step": 51370 }, { "epoch": 1.8671415073769895, "grad_norm": 0.7883875966072083, "learning_rate": 4.276487651031225e-05, "loss": 0.1093, "step": 51380 }, { "epoch": 1.867504905879788, "grad_norm": 2.097398519515991, "learning_rate": 4.27612249168702e-05, "loss": 0.0925, "step": 51390 }, { "epoch": 1.8678683043825859, "grad_norm": 2.9690377712249756, "learning_rate": 4.2757572558160176e-05, "loss": 0.1693, "step": 51400 }, { "epoch": 1.868231702885384, "grad_norm": 0.681706428527832, "learning_rate": 4.275391943433953e-05, "loss": 0.1045, "step": 51410 }, { "epoch": 1.8685951013881823, "grad_norm": 0.32064223289489746, "learning_rate": 4.275026554556566e-05, "loss": 0.1511, "step": 51420 }, { "epoch": 1.8689584998909803, "grad_norm": 0.30609723925590515, "learning_rate": 4.2746610891996006e-05, "loss": 0.1089, "step": 51430 }, { "epoch": 1.8693218983937787, "grad_norm": 0.5510174036026001, "learning_rate": 4.274295547378803e-05, "loss": 0.097, "step": 51440 }, { "epoch": 1.8696852968965767, "grad_norm": 1.0738519430160522, "learning_rate": 4.2739299291099233e-05, "loss": 0.1309, "step": 51450 }, { "epoch": 1.870048695399375, "grad_norm": 0.7475055456161499, "learning_rate": 4.2735642344087144e-05, "loss": 0.2601, "step": 51460 }, { "epoch": 1.8704120939021731, "grad_norm": 0.5625230669975281, "learning_rate": 4.273198463290934e-05, "loss": 0.1464, "step": 51470 }, { "epoch": 1.8707754924049713, "grad_norm": 1.03018057346344, "learning_rate": 4.2728326157723396e-05, "loss": 0.1346, "step": 51480 }, { "epoch": 1.8711388909077695, "grad_norm": 0.7019144892692566, "learning_rate": 4.272466691868696e-05, "loss": 0.1281, "step": 51490 }, { "epoch": 1.8715022894105675, "grad_norm": 1.6843324899673462, "learning_rate": 4.2721006915957695e-05, "loss": 0.8711, "step": 51500 }, { "epoch": 1.871865687913366, "grad_norm": 1.4415316581726074, "learning_rate": 4.271734614969329e-05, "loss": 0.1193, "step": 51510 }, { "epoch": 1.872229086416164, "grad_norm": 0.5036882162094116, "learning_rate": 4.271368462005148e-05, "loss": 0.1821, "step": 51520 }, { "epoch": 1.8725924849189621, "grad_norm": 80.8698959350586, "learning_rate": 4.2710022327190026e-05, "loss": 1.5666, "step": 51530 }, { "epoch": 1.8729558834217603, "grad_norm": 1.3852354288101196, "learning_rate": 4.2706359271266716e-05, "loss": 0.1154, "step": 51540 }, { "epoch": 1.8733192819245583, "grad_norm": 2.0087506771087646, "learning_rate": 4.270269545243939e-05, "loss": 0.1289, "step": 51550 }, { "epoch": 1.8736826804273568, "grad_norm": 18.74397087097168, "learning_rate": 4.26990308708659e-05, "loss": 0.166, "step": 51560 }, { "epoch": 1.8740460789301547, "grad_norm": 0.5033942461013794, "learning_rate": 4.2695365526704144e-05, "loss": 0.2648, "step": 51570 }, { "epoch": 1.874409477432953, "grad_norm": 0.7396907210350037, "learning_rate": 4.269169942011205e-05, "loss": 0.1235, "step": 51580 }, { "epoch": 1.8747728759357511, "grad_norm": 1.0756418704986572, "learning_rate": 4.2688032551247574e-05, "loss": 0.1113, "step": 51590 }, { "epoch": 1.8751362744385494, "grad_norm": 0.8277359008789062, "learning_rate": 4.268436492026871e-05, "loss": 0.1556, "step": 51600 }, { "epoch": 1.8751362744385494, "eval_loss": 0.3514460623264313, "eval_runtime": 180.5008, "eval_samples_per_second": 41.075, "eval_steps_per_second": 5.136, "eval_wer": 0.16721731079928115, "step": 51600 }, { "epoch": 1.8754996729413476, "grad_norm": 1.8270063400268555, "learning_rate": 4.268069652733349e-05, "loss": 0.1154, "step": 51610 }, { "epoch": 1.8758630714441455, "grad_norm": 1.2547001838684082, "learning_rate": 4.267702737259995e-05, "loss": 0.1256, "step": 51620 }, { "epoch": 1.876226469946944, "grad_norm": 2.664400100708008, "learning_rate": 4.26733574562262e-05, "loss": 0.1301, "step": 51630 }, { "epoch": 1.876589868449742, "grad_norm": 1.9625864028930664, "learning_rate": 4.266968677837037e-05, "loss": 0.1347, "step": 51640 }, { "epoch": 1.8769532669525402, "grad_norm": 1.5494035482406616, "learning_rate": 4.266601533919059e-05, "loss": 0.1537, "step": 51650 }, { "epoch": 1.8773166654553384, "grad_norm": 1.1424529552459717, "learning_rate": 4.2662343138845076e-05, "loss": 0.1212, "step": 51660 }, { "epoch": 1.8776800639581364, "grad_norm": 0.8773604035377502, "learning_rate": 4.265867017749203e-05, "loss": 0.2055, "step": 51670 }, { "epoch": 1.8780434624609348, "grad_norm": 0.9486932158470154, "learning_rate": 4.265499645528972e-05, "loss": 0.114, "step": 51680 }, { "epoch": 1.8784068609637328, "grad_norm": 0.5126560926437378, "learning_rate": 4.265132197239643e-05, "loss": 0.1068, "step": 51690 }, { "epoch": 1.878770259466531, "grad_norm": 1.4426672458648682, "learning_rate": 4.264764672897049e-05, "loss": 0.1268, "step": 51700 }, { "epoch": 1.8791336579693292, "grad_norm": 1.2795157432556152, "learning_rate": 4.264397072517023e-05, "loss": 0.1, "step": 51710 }, { "epoch": 1.8794970564721272, "grad_norm": 0.34891799092292786, "learning_rate": 4.2640293961154055e-05, "loss": 0.1683, "step": 51720 }, { "epoch": 1.8798604549749256, "grad_norm": 0.6939824223518372, "learning_rate": 4.2636616437080366e-05, "loss": 0.1131, "step": 51730 }, { "epoch": 1.8802238534777236, "grad_norm": 1.4243013858795166, "learning_rate": 4.2632938153107636e-05, "loss": 0.1019, "step": 51740 }, { "epoch": 1.8805872519805218, "grad_norm": 0.699863851070404, "learning_rate": 4.2629259109394335e-05, "loss": 0.1652, "step": 51750 }, { "epoch": 1.88095065048332, "grad_norm": 1.4008554220199585, "learning_rate": 4.2625579306098994e-05, "loss": 0.1062, "step": 51760 }, { "epoch": 1.8813140489861182, "grad_norm": 1.0460174083709717, "learning_rate": 4.2621898743380144e-05, "loss": 0.1216, "step": 51770 }, { "epoch": 1.8816774474889164, "grad_norm": 1.277803897857666, "learning_rate": 4.2618217421396375e-05, "loss": 1.8922, "step": 51780 }, { "epoch": 1.8820408459917144, "grad_norm": 2.2651615142822266, "learning_rate": 4.2614535340306314e-05, "loss": 0.1014, "step": 51790 }, { "epoch": 1.8824042444945128, "grad_norm": 0.8989794254302979, "learning_rate": 4.2610852500268586e-05, "loss": 0.1089, "step": 51800 }, { "epoch": 1.8827676429973108, "grad_norm": 0.9130983948707581, "learning_rate": 4.2607168901441885e-05, "loss": 0.1098, "step": 51810 }, { "epoch": 1.883131041500109, "grad_norm": 0.5734561085700989, "learning_rate": 4.260348454398493e-05, "loss": 0.1674, "step": 51820 }, { "epoch": 1.8834944400029072, "grad_norm": 0.9285537600517273, "learning_rate": 4.259979942805645e-05, "loss": 0.1197, "step": 51830 }, { "epoch": 1.8838578385057052, "grad_norm": 1.951344609260559, "learning_rate": 4.259611355381524e-05, "loss": 0.1009, "step": 51840 }, { "epoch": 1.8842212370085036, "grad_norm": 0.6593104004859924, "learning_rate": 4.2592426921420106e-05, "loss": 0.1121, "step": 51850 }, { "epoch": 1.8845846355113016, "grad_norm": 0.35744279623031616, "learning_rate": 4.258873953102987e-05, "loss": 0.1029, "step": 51860 }, { "epoch": 1.8849480340140998, "grad_norm": 0.7135227918624878, "learning_rate": 4.2585051382803455e-05, "loss": 0.153, "step": 51870 }, { "epoch": 1.885311432516898, "grad_norm": 1.8943212032318115, "learning_rate": 4.258136247689973e-05, "loss": 0.1079, "step": 51880 }, { "epoch": 1.8856748310196962, "grad_norm": 1.6160852909088135, "learning_rate": 4.2577672813477656e-05, "loss": 0.1259, "step": 51890 }, { "epoch": 1.8860382295224944, "grad_norm": 0.8660845160484314, "learning_rate": 4.25739823926962e-05, "loss": 0.1151, "step": 51900 }, { "epoch": 1.8864016280252924, "grad_norm": 1.074818730354309, "learning_rate": 4.2570291214714365e-05, "loss": 0.1181, "step": 51910 }, { "epoch": 1.8867650265280909, "grad_norm": 0.4410496950149536, "learning_rate": 4.2566599279691205e-05, "loss": 0.1296, "step": 51920 }, { "epoch": 1.8871284250308888, "grad_norm": 0.6509966254234314, "learning_rate": 4.2562906587785776e-05, "loss": 0.1096, "step": 51930 }, { "epoch": 1.887491823533687, "grad_norm": 0.8408392667770386, "learning_rate": 4.25592131391572e-05, "loss": 0.0968, "step": 51940 }, { "epoch": 1.8878552220364853, "grad_norm": 0.6419994831085205, "learning_rate": 4.25555189339646e-05, "loss": 0.1336, "step": 51950 }, { "epoch": 1.8882186205392832, "grad_norm": 1.4039828777313232, "learning_rate": 4.2551823972367156e-05, "loss": 0.1102, "step": 51960 }, { "epoch": 1.8885820190420817, "grad_norm": 1.1315640211105347, "learning_rate": 4.2548128254524066e-05, "loss": 0.1265, "step": 51970 }, { "epoch": 1.8889454175448797, "grad_norm": 0.6739282011985779, "learning_rate": 4.254443178059456e-05, "loss": 0.101, "step": 51980 }, { "epoch": 1.8893088160476779, "grad_norm": 0.9321909546852112, "learning_rate": 4.254073455073792e-05, "loss": 0.1093, "step": 51990 }, { "epoch": 1.889672214550476, "grad_norm": 8.199972152709961, "learning_rate": 4.2537036565113435e-05, "loss": 0.2084, "step": 52000 }, { "epoch": 1.890035613053274, "grad_norm": 0.5499328970909119, "learning_rate": 4.253333782388044e-05, "loss": 0.138, "step": 52010 }, { "epoch": 1.8903990115560725, "grad_norm": 1.2447484731674194, "learning_rate": 4.252963832719831e-05, "loss": 0.1353, "step": 52020 }, { "epoch": 1.8907624100588705, "grad_norm": 1.7561428546905518, "learning_rate": 4.252593807522642e-05, "loss": 0.1267, "step": 52030 }, { "epoch": 1.8911258085616687, "grad_norm": 0.6353381872177124, "learning_rate": 4.252223706812423e-05, "loss": 0.1105, "step": 52040 }, { "epoch": 1.8914892070644669, "grad_norm": 1.8371816873550415, "learning_rate": 4.251853530605118e-05, "loss": 0.1498, "step": 52050 }, { "epoch": 1.891852605567265, "grad_norm": 2.9866833686828613, "learning_rate": 4.251483278916678e-05, "loss": 0.1133, "step": 52060 }, { "epoch": 1.8922160040700633, "grad_norm": 0.9184136986732483, "learning_rate": 4.2511129517630555e-05, "loss": 0.114, "step": 52070 }, { "epoch": 1.8925794025728613, "grad_norm": 1.407132863998413, "learning_rate": 4.250742549160206e-05, "loss": 0.1055, "step": 52080 }, { "epoch": 1.8929428010756597, "grad_norm": 1.2703722715377808, "learning_rate": 4.250372071124089e-05, "loss": 0.1429, "step": 52090 }, { "epoch": 1.8933061995784577, "grad_norm": 1.260004997253418, "learning_rate": 4.2500385764093334e-05, "loss": 1.4537, "step": 52100 }, { "epoch": 1.893669598081256, "grad_norm": 0.9646703004837036, "learning_rate": 4.249667955093988e-05, "loss": 0.1465, "step": 52110 }, { "epoch": 1.894032996584054, "grad_norm": 0.7287250757217407, "learning_rate": 4.249297258391677e-05, "loss": 0.1387, "step": 52120 }, { "epoch": 1.894396395086852, "grad_norm": 1.042417049407959, "learning_rate": 4.24892648631837e-05, "loss": 0.1031, "step": 52130 }, { "epoch": 1.8947597935896505, "grad_norm": 0.9328198432922363, "learning_rate": 4.248555638890043e-05, "loss": 0.1324, "step": 52140 }, { "epoch": 1.8951231920924485, "grad_norm": 3.1417503356933594, "learning_rate": 4.2481847161226764e-05, "loss": 0.1798, "step": 52150 }, { "epoch": 1.8954865905952467, "grad_norm": 0.8507013916969299, "learning_rate": 4.247813718032249e-05, "loss": 0.1182, "step": 52160 }, { "epoch": 1.895849989098045, "grad_norm": 1.0147353410720825, "learning_rate": 4.247442644634748e-05, "loss": 0.1341, "step": 52170 }, { "epoch": 1.8962133876008431, "grad_norm": 2.492661476135254, "learning_rate": 4.2470714959461614e-05, "loss": 0.1109, "step": 52180 }, { "epoch": 1.8965767861036413, "grad_norm": 1.8121393918991089, "learning_rate": 4.246700271982479e-05, "loss": 0.1381, "step": 52190 }, { "epoch": 1.8969401846064393, "grad_norm": 1.2550605535507202, "learning_rate": 4.2463289727596965e-05, "loss": 0.1469, "step": 52200 }, { "epoch": 1.8969401846064393, "eval_loss": 0.3317066729068756, "eval_runtime": 179.6114, "eval_samples_per_second": 41.278, "eval_steps_per_second": 5.161, "eval_wer": 0.16958629077640822, "step": 52200 }, { "epoch": 1.8973035831092377, "grad_norm": 0.7204797863960266, "learning_rate": 4.245957598293813e-05, "loss": 0.1328, "step": 52210 }, { "epoch": 1.8976669816120357, "grad_norm": 0.5142366290092468, "learning_rate": 4.245586148600829e-05, "loss": 0.1457, "step": 52220 }, { "epoch": 1.898030380114834, "grad_norm": 0.8868045210838318, "learning_rate": 4.2452146236967474e-05, "loss": 0.1344, "step": 52230 }, { "epoch": 1.8983937786176321, "grad_norm": 0.7489217519760132, "learning_rate": 4.2448430235975777e-05, "loss": 0.1119, "step": 52240 }, { "epoch": 1.8987571771204301, "grad_norm": 1.2009568214416504, "learning_rate": 4.244471348319331e-05, "loss": 0.1503, "step": 52250 }, { "epoch": 1.8991205756232286, "grad_norm": 1.1081483364105225, "learning_rate": 4.24409959787802e-05, "loss": 0.1265, "step": 52260 }, { "epoch": 1.8994839741260265, "grad_norm": 0.42917948961257935, "learning_rate": 4.243727772289663e-05, "loss": 0.1825, "step": 52270 }, { "epoch": 1.8998473726288247, "grad_norm": 1.7099511623382568, "learning_rate": 4.2433558715702804e-05, "loss": 0.133, "step": 52280 }, { "epoch": 1.900210771131623, "grad_norm": 1.212544560432434, "learning_rate": 4.242983895735896e-05, "loss": 0.1306, "step": 52290 }, { "epoch": 1.900574169634421, "grad_norm": 0.48001641035079956, "learning_rate": 4.242611844802538e-05, "loss": 0.1739, "step": 52300 }, { "epoch": 1.9009375681372194, "grad_norm": 2.3596603870391846, "learning_rate": 4.242239718786235e-05, "loss": 0.129, "step": 52310 }, { "epoch": 1.9013009666400174, "grad_norm": 0.41326409578323364, "learning_rate": 4.241867517703022e-05, "loss": 0.1218, "step": 52320 }, { "epoch": 1.9016643651428156, "grad_norm": 0.9740013480186462, "learning_rate": 4.241495241568935e-05, "loss": 0.1248, "step": 52330 }, { "epoch": 1.9020277636456138, "grad_norm": 0.8275489807128906, "learning_rate": 4.2411228904000136e-05, "loss": 0.1067, "step": 52340 }, { "epoch": 1.902391162148412, "grad_norm": 9.790162086486816, "learning_rate": 4.240750464212303e-05, "loss": 0.1548, "step": 52350 }, { "epoch": 1.9027545606512102, "grad_norm": 3.4635374546051025, "learning_rate": 4.240377963021847e-05, "loss": 0.1157, "step": 52360 }, { "epoch": 1.9031179591540082, "grad_norm": 0.5103577971458435, "learning_rate": 4.2400053868446976e-05, "loss": 0.1356, "step": 52370 }, { "epoch": 1.9034813576568066, "grad_norm": 1.608657956123352, "learning_rate": 4.239632735696908e-05, "loss": 0.114, "step": 52380 }, { "epoch": 1.9038447561596046, "grad_norm": 0.9395160675048828, "learning_rate": 4.2392600095945324e-05, "loss": 0.1079, "step": 52390 }, { "epoch": 1.9042081546624028, "grad_norm": 1.1032116413116455, "learning_rate": 4.2388872085536314e-05, "loss": 0.1789, "step": 52400 }, { "epoch": 1.904571553165201, "grad_norm": 0.765036940574646, "learning_rate": 4.2385143325902675e-05, "loss": 0.0984, "step": 52410 }, { "epoch": 1.904934951667999, "grad_norm": 2.460920572280884, "learning_rate": 4.238141381720507e-05, "loss": 0.1201, "step": 52420 }, { "epoch": 1.9052983501707974, "grad_norm": 0.6005275845527649, "learning_rate": 4.237768355960418e-05, "loss": 0.1145, "step": 52430 }, { "epoch": 1.9056617486735954, "grad_norm": 0.606640636920929, "learning_rate": 4.2373952553260745e-05, "loss": 0.1204, "step": 52440 }, { "epoch": 1.9060251471763936, "grad_norm": 1.0981110334396362, "learning_rate": 4.237022079833551e-05, "loss": 0.1237, "step": 52450 }, { "epoch": 1.9063885456791918, "grad_norm": 1.2138440608978271, "learning_rate": 4.236648829498926e-05, "loss": 0.1027, "step": 52460 }, { "epoch": 1.90675194418199, "grad_norm": 0.38126930594444275, "learning_rate": 4.2362755043382816e-05, "loss": 0.1787, "step": 52470 }, { "epoch": 1.9071153426847882, "grad_norm": 1.1713272333145142, "learning_rate": 4.235902104367704e-05, "loss": 0.1098, "step": 52480 }, { "epoch": 1.9074787411875862, "grad_norm": 1.0597947835922241, "learning_rate": 4.235528629603282e-05, "loss": 0.1085, "step": 52490 }, { "epoch": 1.9078421396903846, "grad_norm": 0.5749408602714539, "learning_rate": 4.235155080061105e-05, "loss": 0.1295, "step": 52500 }, { "epoch": 1.9082055381931826, "grad_norm": 1.4702091217041016, "learning_rate": 4.234781455757269e-05, "loss": 0.1281, "step": 52510 }, { "epoch": 1.9085689366959808, "grad_norm": 0.586208164691925, "learning_rate": 4.234407756707873e-05, "loss": 0.1412, "step": 52520 }, { "epoch": 1.908932335198779, "grad_norm": 0.8572281002998352, "learning_rate": 4.2340339829290174e-05, "loss": 0.1059, "step": 52530 }, { "epoch": 1.909295733701577, "grad_norm": 0.7896180152893066, "learning_rate": 4.233660134436809e-05, "loss": 0.1144, "step": 52540 }, { "epoch": 1.9096591322043754, "grad_norm": 0.9928715825080872, "learning_rate": 4.233286211247351e-05, "loss": 0.1507, "step": 52550 }, { "epoch": 1.9100225307071734, "grad_norm": 1.1396877765655518, "learning_rate": 4.23291221337676e-05, "loss": 0.1306, "step": 52560 }, { "epoch": 1.9103859292099716, "grad_norm": 0.743976891040802, "learning_rate": 4.232538140841146e-05, "loss": 0.1839, "step": 52570 }, { "epoch": 1.9107493277127698, "grad_norm": 0.40765443444252014, "learning_rate": 4.232163993656628e-05, "loss": 0.1303, "step": 52580 }, { "epoch": 1.911112726215568, "grad_norm": 0.6623360514640808, "learning_rate": 4.231789771839326e-05, "loss": 0.1202, "step": 52590 }, { "epoch": 1.9114761247183663, "grad_norm": 0.9128944873809814, "learning_rate": 4.2314154754053656e-05, "loss": 0.827, "step": 52600 }, { "epoch": 1.9118395232211642, "grad_norm": 0.9086483716964722, "learning_rate": 4.231041104370872e-05, "loss": 0.1147, "step": 52610 }, { "epoch": 1.9122029217239624, "grad_norm": 1.0172945261001587, "learning_rate": 4.2306666587519765e-05, "loss": 0.1501, "step": 52620 }, { "epoch": 1.9125663202267607, "grad_norm": 1.759474277496338, "learning_rate": 4.2302921385648126e-05, "loss": 0.1389, "step": 52630 }, { "epoch": 1.9129297187295589, "grad_norm": 1.5807387828826904, "learning_rate": 4.229917543825517e-05, "loss": 0.1067, "step": 52640 }, { "epoch": 1.913293117232357, "grad_norm": 0.8961324095726013, "learning_rate": 4.2295428745502284e-05, "loss": 0.1204, "step": 52650 }, { "epoch": 1.913656515735155, "grad_norm": 1.3519996404647827, "learning_rate": 4.229168130755092e-05, "loss": 0.1163, "step": 52660 }, { "epoch": 1.9140199142379535, "grad_norm": 1.4970946311950684, "learning_rate": 4.2287933124562526e-05, "loss": 0.1532, "step": 52670 }, { "epoch": 1.9143833127407515, "grad_norm": 48.62047576904297, "learning_rate": 4.2284184196698615e-05, "loss": 0.474, "step": 52680 }, { "epoch": 1.9147467112435497, "grad_norm": 1.5001195669174194, "learning_rate": 4.22804345241207e-05, "loss": 0.1108, "step": 52690 }, { "epoch": 1.9151101097463479, "grad_norm": 0.6537098288536072, "learning_rate": 4.227668410699034e-05, "loss": 0.1457, "step": 52700 }, { "epoch": 1.9154735082491459, "grad_norm": 1.2610722780227661, "learning_rate": 4.227293294546914e-05, "loss": 0.1223, "step": 52710 }, { "epoch": 1.9158369067519443, "grad_norm": 1.0688477754592896, "learning_rate": 4.226918103971871e-05, "loss": 0.1319, "step": 52720 }, { "epoch": 1.9162003052547423, "grad_norm": 0.532785952091217, "learning_rate": 4.226542838990072e-05, "loss": 0.1355, "step": 52730 }, { "epoch": 1.9165637037575405, "grad_norm": 0.6391937136650085, "learning_rate": 4.226167499617684e-05, "loss": 0.1215, "step": 52740 }, { "epoch": 1.9169271022603387, "grad_norm": 0.9662737250328064, "learning_rate": 4.225792085870881e-05, "loss": 0.1522, "step": 52750 }, { "epoch": 1.917290500763137, "grad_norm": 1.3882033824920654, "learning_rate": 4.225416597765838e-05, "loss": 0.1234, "step": 52760 }, { "epoch": 1.917653899265935, "grad_norm": 1.2721084356307983, "learning_rate": 4.225041035318732e-05, "loss": 0.1645, "step": 52770 }, { "epoch": 1.918017297768733, "grad_norm": 1.06475830078125, "learning_rate": 4.224665398545745e-05, "loss": 0.1278, "step": 52780 }, { "epoch": 1.9183806962715315, "grad_norm": 1.8718911409378052, "learning_rate": 4.224289687463063e-05, "loss": 0.1081, "step": 52790 }, { "epoch": 1.9187440947743295, "grad_norm": 0.7336494326591492, "learning_rate": 4.223913902086874e-05, "loss": 0.1398, "step": 52800 }, { "epoch": 1.9187440947743295, "eval_loss": 0.33621227741241455, "eval_runtime": 180.2286, "eval_samples_per_second": 41.137, "eval_steps_per_second": 5.143, "eval_wer": 0.16169876740428776, "step": 52800 }, { "epoch": 1.9191074932771277, "grad_norm": 1.407049298286438, "learning_rate": 4.223538042433368e-05, "loss": 0.1219, "step": 52810 }, { "epoch": 1.919470891779926, "grad_norm": 0.27716466784477234, "learning_rate": 4.22316210851874e-05, "loss": 0.1303, "step": 52820 }, { "epoch": 1.919834290282724, "grad_norm": 1.0262128114700317, "learning_rate": 4.222786100359188e-05, "loss": 0.1053, "step": 52830 }, { "epoch": 1.9201976887855223, "grad_norm": 0.6818228960037231, "learning_rate": 4.222410017970913e-05, "loss": 0.09, "step": 52840 }, { "epoch": 1.9205610872883203, "grad_norm": 0.6761994361877441, "learning_rate": 4.2220338613701185e-05, "loss": 0.108, "step": 52850 }, { "epoch": 1.9209244857911185, "grad_norm": 4.313242435455322, "learning_rate": 4.2216576305730104e-05, "loss": 0.1262, "step": 52860 }, { "epoch": 1.9212878842939167, "grad_norm": 0.6098904609680176, "learning_rate": 4.221281325595803e-05, "loss": 0.2005, "step": 52870 }, { "epoch": 1.921651282796715, "grad_norm": 1.2213470935821533, "learning_rate": 4.2209049464547064e-05, "loss": 0.1088, "step": 52880 }, { "epoch": 1.9220146812995131, "grad_norm": 0.705827534198761, "learning_rate": 4.220528493165938e-05, "loss": 0.1207, "step": 52890 }, { "epoch": 1.9223780798023111, "grad_norm": 0.8161284327507019, "learning_rate": 4.22015196574572e-05, "loss": 0.1855, "step": 52900 }, { "epoch": 1.9227414783051093, "grad_norm": 0.7296738028526306, "learning_rate": 4.2197753642102734e-05, "loss": 0.1224, "step": 52910 }, { "epoch": 1.9231048768079075, "grad_norm": 1.1311039924621582, "learning_rate": 4.2193986885758255e-05, "loss": 0.1331, "step": 52920 }, { "epoch": 1.9234682753107057, "grad_norm": 1.0949995517730713, "learning_rate": 4.219021938858605e-05, "loss": 0.1172, "step": 52930 }, { "epoch": 1.923831673813504, "grad_norm": 2.3175034523010254, "learning_rate": 4.2186451150748465e-05, "loss": 0.1061, "step": 52940 }, { "epoch": 1.924195072316302, "grad_norm": 0.4657406806945801, "learning_rate": 4.2182682172407853e-05, "loss": 0.1099, "step": 52950 }, { "epoch": 1.9245584708191004, "grad_norm": 1.0153266191482544, "learning_rate": 4.2178912453726585e-05, "loss": 0.1028, "step": 52960 }, { "epoch": 1.9249218693218983, "grad_norm": 0.48774194717407227, "learning_rate": 4.217514199486712e-05, "loss": 0.1196, "step": 52970 }, { "epoch": 1.9252852678246966, "grad_norm": 0.5909627079963684, "learning_rate": 4.2171370795991886e-05, "loss": 3.5511, "step": 52980 }, { "epoch": 1.9256486663274948, "grad_norm": 1.0662988424301147, "learning_rate": 4.216759885726338e-05, "loss": 0.2095, "step": 52990 }, { "epoch": 1.9260120648302927, "grad_norm": 1.2562239170074463, "learning_rate": 4.2163826178844124e-05, "loss": 0.6666, "step": 53000 }, { "epoch": 1.9263754633330912, "grad_norm": 0.6966450214385986, "learning_rate": 4.216005276089666e-05, "loss": 0.1059, "step": 53010 }, { "epoch": 1.9267388618358892, "grad_norm": 0.7130870819091797, "learning_rate": 4.215627860358359e-05, "loss": 0.1822, "step": 53020 }, { "epoch": 1.9271022603386874, "grad_norm": 0.8667415380477905, "learning_rate": 4.215250370706752e-05, "loss": 0.1297, "step": 53030 }, { "epoch": 1.9274656588414856, "grad_norm": 0.8106217384338379, "learning_rate": 4.214872807151108e-05, "loss": 0.1198, "step": 53040 }, { "epoch": 1.9278290573442838, "grad_norm": 0.6625964045524597, "learning_rate": 4.214495169707697e-05, "loss": 0.1554, "step": 53050 }, { "epoch": 1.928192455847082, "grad_norm": 1.328296422958374, "learning_rate": 4.214117458392789e-05, "loss": 0.1275, "step": 53060 }, { "epoch": 1.92855585434988, "grad_norm": 0.5741416811943054, "learning_rate": 4.213739673222659e-05, "loss": 0.122, "step": 53070 }, { "epoch": 1.9289192528526784, "grad_norm": 0.6884883046150208, "learning_rate": 4.213361814213584e-05, "loss": 0.2229, "step": 53080 }, { "epoch": 1.9292826513554764, "grad_norm": 1.364357590675354, "learning_rate": 4.212983881381844e-05, "loss": 0.1169, "step": 53090 }, { "epoch": 1.9296460498582746, "grad_norm": 1.6540427207946777, "learning_rate": 4.2126058747437236e-05, "loss": 0.1273, "step": 53100 }, { "epoch": 1.9300094483610728, "grad_norm": 1.8838560581207275, "learning_rate": 4.21222779431551e-05, "loss": 0.1395, "step": 53110 }, { "epoch": 1.9303728468638708, "grad_norm": 1.0048059225082397, "learning_rate": 4.2118496401134925e-05, "loss": 0.1516, "step": 53120 }, { "epoch": 1.9307362453666692, "grad_norm": 1.0288422107696533, "learning_rate": 4.211471412153965e-05, "loss": 0.114, "step": 53130 }, { "epoch": 1.9310996438694672, "grad_norm": 0.8214828968048096, "learning_rate": 4.2110931104532236e-05, "loss": 0.1154, "step": 53140 }, { "epoch": 1.9314630423722654, "grad_norm": 1.7350075244903564, "learning_rate": 4.210714735027568e-05, "loss": 0.1351, "step": 53150 }, { "epoch": 1.9318264408750636, "grad_norm": 1.1846505403518677, "learning_rate": 4.210336285893302e-05, "loss": 0.1213, "step": 53160 }, { "epoch": 1.9321898393778618, "grad_norm": 0.36710694432258606, "learning_rate": 4.2099577630667295e-05, "loss": 0.1328, "step": 53170 }, { "epoch": 1.93255323788066, "grad_norm": 1.5242916345596313, "learning_rate": 4.209579166564162e-05, "loss": 0.1068, "step": 53180 }, { "epoch": 1.932916636383458, "grad_norm": 0.5341594219207764, "learning_rate": 4.209200496401911e-05, "loss": 0.1132, "step": 53190 }, { "epoch": 1.9332800348862562, "grad_norm": 1.32260000705719, "learning_rate": 4.2088217525962914e-05, "loss": 0.2021, "step": 53200 }, { "epoch": 1.9336434333890544, "grad_norm": 1.7666555643081665, "learning_rate": 4.208442935163622e-05, "loss": 0.1199, "step": 53210 }, { "epoch": 1.9340068318918526, "grad_norm": 0.7060844302177429, "learning_rate": 4.2080640441202265e-05, "loss": 0.2058, "step": 53220 }, { "epoch": 1.9343702303946508, "grad_norm": 0.6064701676368713, "learning_rate": 4.207685079482428e-05, "loss": 0.1163, "step": 53230 }, { "epoch": 1.9347336288974488, "grad_norm": 0.8445596694946289, "learning_rate": 4.2073060412665554e-05, "loss": 0.1094, "step": 53240 }, { "epoch": 1.9350970274002472, "grad_norm": 1.8160717487335205, "learning_rate": 4.20692692948894e-05, "loss": 0.1421, "step": 53250 }, { "epoch": 1.9354604259030452, "grad_norm": 0.8465480208396912, "learning_rate": 4.206547744165918e-05, "loss": 0.1151, "step": 53260 }, { "epoch": 1.9358238244058434, "grad_norm": 0.4361567795276642, "learning_rate": 4.206168485313823e-05, "loss": 0.1343, "step": 53270 }, { "epoch": 1.9361872229086416, "grad_norm": 0.4682723581790924, "learning_rate": 4.2057891529490004e-05, "loss": 0.2349, "step": 53280 }, { "epoch": 1.9365506214114396, "grad_norm": 0.7894558310508728, "learning_rate": 4.205409747087792e-05, "loss": 0.1211, "step": 53290 }, { "epoch": 1.936914019914238, "grad_norm": 1.9797241687774658, "learning_rate": 4.205030267746545e-05, "loss": 0.1403, "step": 53300 }, { "epoch": 1.937277418417036, "grad_norm": 0.7554487586021423, "learning_rate": 4.20465071494161e-05, "loss": 0.133, "step": 53310 }, { "epoch": 1.9376408169198343, "grad_norm": 0.5056400895118713, "learning_rate": 4.2042710886893414e-05, "loss": 0.1429, "step": 53320 }, { "epoch": 1.9380042154226325, "grad_norm": 4.12957763671875, "learning_rate": 4.203891389006096e-05, "loss": 0.1154, "step": 53330 }, { "epoch": 1.9383676139254307, "grad_norm": 0.7138916850090027, "learning_rate": 4.203511615908232e-05, "loss": 0.1235, "step": 53340 }, { "epoch": 1.9387310124282289, "grad_norm": 0.6101375818252563, "learning_rate": 4.2031317694121144e-05, "loss": 0.1015, "step": 53350 }, { "epoch": 1.9390944109310269, "grad_norm": 0.9244548082351685, "learning_rate": 4.202751849534108e-05, "loss": 0.104, "step": 53360 }, { "epoch": 1.9394578094338253, "grad_norm": 0.38235339522361755, "learning_rate": 4.202371856290583e-05, "loss": 0.2562, "step": 53370 }, { "epoch": 1.9398212079366233, "grad_norm": 1.2204453945159912, "learning_rate": 4.201991789697912e-05, "loss": 0.1074, "step": 53380 }, { "epoch": 1.9401846064394215, "grad_norm": 0.9025306105613708, "learning_rate": 4.2016116497724715e-05, "loss": 0.1154, "step": 53390 }, { "epoch": 1.9405480049422197, "grad_norm": 0.6132228970527649, "learning_rate": 4.201231436530637e-05, "loss": 0.1332, "step": 53400 }, { "epoch": 1.9405480049422197, "eval_loss": 0.34726399183273315, "eval_runtime": 180.5053, "eval_samples_per_second": 41.074, "eval_steps_per_second": 5.136, "eval_wer": 0.16638226804872294, "step": 53400 }, { "epoch": 1.9409114034450177, "grad_norm": 1.0227421522140503, "learning_rate": 4.2008511499887945e-05, "loss": 0.1042, "step": 53410 }, { "epoch": 1.941274801947816, "grad_norm": 1.9135148525238037, "learning_rate": 4.2004707901633274e-05, "loss": 0.1953, "step": 53420 }, { "epoch": 1.941638200450614, "grad_norm": 1.0358216762542725, "learning_rate": 4.200090357070624e-05, "loss": 0.1029, "step": 53430 }, { "epoch": 1.9420015989534123, "grad_norm": 0.9207081198692322, "learning_rate": 4.199709850727076e-05, "loss": 0.1171, "step": 53440 }, { "epoch": 1.9423649974562105, "grad_norm": 0.558474600315094, "learning_rate": 4.1993292711490784e-05, "loss": 0.1185, "step": 53450 }, { "epoch": 1.9427283959590087, "grad_norm": 1.7064687013626099, "learning_rate": 4.198948618353029e-05, "loss": 0.1429, "step": 53460 }, { "epoch": 1.943091794461807, "grad_norm": 0.4840683341026306, "learning_rate": 4.198567892355328e-05, "loss": 0.1566, "step": 53470 }, { "epoch": 1.943455192964605, "grad_norm": 2.152949810028076, "learning_rate": 4.19818709317238e-05, "loss": 0.11, "step": 53480 }, { "epoch": 1.943818591467403, "grad_norm": 1.0784387588500977, "learning_rate": 4.197806220820592e-05, "loss": 0.1211, "step": 53490 }, { "epoch": 1.9441819899702013, "grad_norm": 0.9039841890335083, "learning_rate": 4.197425275316376e-05, "loss": 0.1167, "step": 53500 }, { "epoch": 1.9445453884729995, "grad_norm": 0.8237749934196472, "learning_rate": 4.1970442566761436e-05, "loss": 0.1443, "step": 53510 }, { "epoch": 1.9449087869757977, "grad_norm": 0.5178882479667664, "learning_rate": 4.196663164916313e-05, "loss": 0.1251, "step": 53520 }, { "epoch": 1.9452721854785957, "grad_norm": 0.883787989616394, "learning_rate": 4.196282000053301e-05, "loss": 0.1078, "step": 53530 }, { "epoch": 1.9456355839813941, "grad_norm": 0.6376329064369202, "learning_rate": 4.195900762103535e-05, "loss": 0.1286, "step": 53540 }, { "epoch": 1.9459989824841921, "grad_norm": 1.3312426805496216, "learning_rate": 4.1955194510834394e-05, "loss": 0.1421, "step": 53550 }, { "epoch": 1.9463623809869903, "grad_norm": 0.8462713360786438, "learning_rate": 4.1951380670094424e-05, "loss": 0.1183, "step": 53560 }, { "epoch": 1.9467257794897885, "grad_norm": 0.4300178587436676, "learning_rate": 4.194756609897978e-05, "loss": 0.11, "step": 53570 }, { "epoch": 1.9470891779925865, "grad_norm": 0.5355455875396729, "learning_rate": 4.1943750797654816e-05, "loss": 0.1197, "step": 53580 }, { "epoch": 1.947452576495385, "grad_norm": 0.8750283122062683, "learning_rate": 4.193993476628391e-05, "loss": 0.1024, "step": 53590 }, { "epoch": 1.947815974998183, "grad_norm": 1.2552978992462158, "learning_rate": 4.193611800503148e-05, "loss": 0.152, "step": 53600 }, { "epoch": 1.9481793735009811, "grad_norm": 0.8852622509002686, "learning_rate": 4.1932300514062e-05, "loss": 0.1077, "step": 53610 }, { "epoch": 1.9485427720037793, "grad_norm": 0.6841835379600525, "learning_rate": 4.192848229353992e-05, "loss": 0.1281, "step": 53620 }, { "epoch": 1.9489061705065776, "grad_norm": 1.0521607398986816, "learning_rate": 4.192466334362978e-05, "loss": 0.1136, "step": 53630 }, { "epoch": 1.9492695690093758, "grad_norm": 4.119276523590088, "learning_rate": 4.192084366449612e-05, "loss": 0.0939, "step": 53640 }, { "epoch": 1.9496329675121737, "grad_norm": 0.8290958404541016, "learning_rate": 4.19170232563035e-05, "loss": 0.1625, "step": 53650 }, { "epoch": 1.9499963660149722, "grad_norm": 0.6359632015228271, "learning_rate": 4.191320211921654e-05, "loss": 0.1208, "step": 53660 }, { "epoch": 1.9503597645177702, "grad_norm": 0.699052631855011, "learning_rate": 4.1909380253399875e-05, "loss": 0.1248, "step": 53670 }, { "epoch": 1.9507231630205684, "grad_norm": 17.17115592956543, "learning_rate": 4.190555765901819e-05, "loss": 0.3458, "step": 53680 }, { "epoch": 1.9510865615233666, "grad_norm": 3.899052858352661, "learning_rate": 4.190173433623618e-05, "loss": 0.1144, "step": 53690 }, { "epoch": 1.9514499600261646, "grad_norm": 1.1907508373260498, "learning_rate": 4.1897910285218556e-05, "loss": 0.152, "step": 53700 }, { "epoch": 1.951813358528963, "grad_norm": 0.7645424008369446, "learning_rate": 4.189408550613011e-05, "loss": 0.1258, "step": 53710 }, { "epoch": 1.952176757031761, "grad_norm": 0.2523237466812134, "learning_rate": 4.1890259999135625e-05, "loss": 0.1213, "step": 53720 }, { "epoch": 1.9525401555345592, "grad_norm": 1.3578497171401978, "learning_rate": 4.188643376439993e-05, "loss": 1.5489, "step": 53730 }, { "epoch": 1.9529035540373574, "grad_norm": 0.6249386072158813, "learning_rate": 4.1882606802087896e-05, "loss": 0.1699, "step": 53740 }, { "epoch": 1.9532669525401556, "grad_norm": 0.9699862599372864, "learning_rate": 4.1878779112364394e-05, "loss": 0.1724, "step": 53750 }, { "epoch": 1.9536303510429538, "grad_norm": 1.3478792905807495, "learning_rate": 4.187495069539437e-05, "loss": 0.133, "step": 53760 }, { "epoch": 1.9539937495457518, "grad_norm": 0.6324986815452576, "learning_rate": 4.187112155134275e-05, "loss": 0.1354, "step": 53770 }, { "epoch": 1.95435714804855, "grad_norm": 2.009544610977173, "learning_rate": 4.186729168037453e-05, "loss": 0.1347, "step": 53780 }, { "epoch": 1.9547205465513482, "grad_norm": 0.6510929465293884, "learning_rate": 4.186346108265472e-05, "loss": 0.1227, "step": 53790 }, { "epoch": 1.9550839450541464, "grad_norm": 1.5079245567321777, "learning_rate": 4.185962975834838e-05, "loss": 0.1347, "step": 53800 }, { "epoch": 1.9554473435569446, "grad_norm": 3.214449882507324, "learning_rate": 4.1855797707620586e-05, "loss": 0.1138, "step": 53810 }, { "epoch": 1.9558107420597426, "grad_norm": 0.7995330095291138, "learning_rate": 4.1851964930636434e-05, "loss": 0.1522, "step": 53820 }, { "epoch": 1.956174140562541, "grad_norm": 1.6713122129440308, "learning_rate": 4.184813142756108e-05, "loss": 0.125, "step": 53830 }, { "epoch": 1.956537539065339, "grad_norm": 0.7136033177375793, "learning_rate": 4.184429719855968e-05, "loss": 0.1267, "step": 53840 }, { "epoch": 1.9569009375681372, "grad_norm": 0.5580174922943115, "learning_rate": 4.1840462243797444e-05, "loss": 0.1126, "step": 53850 }, { "epoch": 1.9572643360709354, "grad_norm": 0.8671419024467468, "learning_rate": 4.183662656343961e-05, "loss": 0.1209, "step": 53860 }, { "epoch": 1.9576277345737334, "grad_norm": 0.6624314188957214, "learning_rate": 4.183279015765145e-05, "loss": 0.1397, "step": 53870 }, { "epoch": 1.9579911330765318, "grad_norm": 1.4401901960372925, "learning_rate": 4.182895302659825e-05, "loss": 0.0894, "step": 53880 }, { "epoch": 1.9583545315793298, "grad_norm": 0.9187797904014587, "learning_rate": 4.182511517044534e-05, "loss": 0.127, "step": 53890 }, { "epoch": 1.958717930082128, "grad_norm": 1.2426072359085083, "learning_rate": 4.1821276589358084e-05, "loss": 0.1381, "step": 53900 }, { "epoch": 1.9590813285849262, "grad_norm": 0.8035231828689575, "learning_rate": 4.1817437283501865e-05, "loss": 0.0953, "step": 53910 }, { "epoch": 1.9594447270877244, "grad_norm": 0.32439205050468445, "learning_rate": 4.1813597253042115e-05, "loss": 0.138, "step": 53920 }, { "epoch": 1.9598081255905226, "grad_norm": 1.0287327766418457, "learning_rate": 4.180975649814428e-05, "loss": 3.1039, "step": 53930 }, { "epoch": 1.9601715240933206, "grad_norm": 1.3450182676315308, "learning_rate": 4.180591501897384e-05, "loss": 0.1081, "step": 53940 }, { "epoch": 1.960534922596119, "grad_norm": 7.1403961181640625, "learning_rate": 4.180207281569633e-05, "loss": 0.1484, "step": 53950 }, { "epoch": 1.960898321098917, "grad_norm": 1.2163225412368774, "learning_rate": 4.179822988847728e-05, "loss": 0.1041, "step": 53960 }, { "epoch": 1.9612617196017152, "grad_norm": 0.9922796487808228, "learning_rate": 4.179438623748228e-05, "loss": 0.1343, "step": 53970 }, { "epoch": 1.9616251181045135, "grad_norm": 2.245447874069214, "learning_rate": 4.1790541862876906e-05, "loss": 0.1015, "step": 53980 }, { "epoch": 1.9619885166073114, "grad_norm": 2.284679651260376, "learning_rate": 4.178669676482685e-05, "loss": 0.0913, "step": 53990 }, { "epoch": 1.9623519151101099, "grad_norm": 0.9692349433898926, "learning_rate": 4.178285094349775e-05, "loss": 0.1282, "step": 54000 }, { "epoch": 1.9623519151101099, "eval_loss": 0.3314037322998047, "eval_runtime": 180.606, "eval_samples_per_second": 41.051, "eval_steps_per_second": 5.133, "eval_wer": 0.16505709150979359, "step": 54000 }, { "epoch": 1.9627153136129079, "grad_norm": 0.4108816683292389, "learning_rate": 4.177900439905531e-05, "loss": 0.1272, "step": 54010 }, { "epoch": 1.963078712115706, "grad_norm": 0.3358526825904846, "learning_rate": 4.1775157131665276e-05, "loss": 0.1453, "step": 54020 }, { "epoch": 1.9634421106185043, "grad_norm": 1.476314663887024, "learning_rate": 4.177130914149341e-05, "loss": 0.1162, "step": 54030 }, { "epoch": 1.9638055091213025, "grad_norm": 0.7912114262580872, "learning_rate": 4.17674604287055e-05, "loss": 0.1056, "step": 54040 }, { "epoch": 1.9641689076241007, "grad_norm": 0.4801596403121948, "learning_rate": 4.176361099346738e-05, "loss": 0.1478, "step": 54050 }, { "epoch": 1.9645323061268987, "grad_norm": 0.7710531949996948, "learning_rate": 4.175976083594491e-05, "loss": 0.1131, "step": 54060 }, { "epoch": 1.9648957046296969, "grad_norm": 0.6709341406822205, "learning_rate": 4.175590995630398e-05, "loss": 0.1586, "step": 54070 }, { "epoch": 1.965259103132495, "grad_norm": 1.3941307067871094, "learning_rate": 4.17520583547105e-05, "loss": 0.1131, "step": 54080 }, { "epoch": 1.9656225016352933, "grad_norm": 0.759842038154602, "learning_rate": 4.174820603133043e-05, "loss": 0.0985, "step": 54090 }, { "epoch": 1.9659859001380915, "grad_norm": 0.9153608679771423, "learning_rate": 4.174435298632976e-05, "loss": 0.1547, "step": 54100 }, { "epoch": 1.9663492986408895, "grad_norm": 1.4363652467727661, "learning_rate": 4.174049921987449e-05, "loss": 0.1127, "step": 54110 }, { "epoch": 1.966712697143688, "grad_norm": 0.7368317246437073, "learning_rate": 4.173664473213067e-05, "loss": 0.1302, "step": 54120 }, { "epoch": 1.967076095646486, "grad_norm": 1.2740521430969238, "learning_rate": 4.173278952326438e-05, "loss": 0.1294, "step": 54130 }, { "epoch": 1.967439494149284, "grad_norm": 2.7798774242401123, "learning_rate": 4.1728933593441735e-05, "loss": 0.1011, "step": 54140 }, { "epoch": 1.9678028926520823, "grad_norm": 1.9629179239273071, "learning_rate": 4.172507694282885e-05, "loss": 0.3149, "step": 54150 }, { "epoch": 1.9681662911548803, "grad_norm": 3.5863332748413086, "learning_rate": 4.1721219571591915e-05, "loss": 0.1323, "step": 54160 }, { "epoch": 1.9685296896576787, "grad_norm": 0.29740679264068604, "learning_rate": 4.1717361479897116e-05, "loss": 0.1725, "step": 54170 }, { "epoch": 1.9688930881604767, "grad_norm": 1.0469319820404053, "learning_rate": 4.17135026679107e-05, "loss": 0.1138, "step": 54180 }, { "epoch": 1.969256486663275, "grad_norm": 0.5336177945137024, "learning_rate": 4.170964313579891e-05, "loss": 0.2207, "step": 54190 }, { "epoch": 1.9696198851660731, "grad_norm": 0.973862886428833, "learning_rate": 4.1705782883728055e-05, "loss": 0.1328, "step": 54200 }, { "epoch": 1.9699832836688713, "grad_norm": 0.8640954494476318, "learning_rate": 4.170192191186446e-05, "loss": 0.1315, "step": 54210 }, { "epoch": 1.9703466821716695, "grad_norm": 0.47578397393226624, "learning_rate": 4.169806022037447e-05, "loss": 0.1823, "step": 54220 }, { "epoch": 1.9707100806744675, "grad_norm": 1.4527409076690674, "learning_rate": 4.169419780942448e-05, "loss": 2.5822, "step": 54230 }, { "epoch": 1.971073479177266, "grad_norm": 0.48623302578926086, "learning_rate": 4.1690334679180896e-05, "loss": 0.1093, "step": 54240 }, { "epoch": 1.971436877680064, "grad_norm": 1.1767234802246094, "learning_rate": 4.1686470829810185e-05, "loss": 0.1329, "step": 54250 }, { "epoch": 1.9718002761828621, "grad_norm": 1.128841519355774, "learning_rate": 4.1682606261478816e-05, "loss": 0.1102, "step": 54260 }, { "epoch": 1.9721636746856603, "grad_norm": 1.4685746431350708, "learning_rate": 4.16787409743533e-05, "loss": 0.14, "step": 54270 }, { "epoch": 1.9725270731884583, "grad_norm": 0.9918948411941528, "learning_rate": 4.167487496860018e-05, "loss": 0.1093, "step": 54280 }, { "epoch": 1.9728904716912568, "grad_norm": 0.5849924683570862, "learning_rate": 4.167100824438602e-05, "loss": 0.3633, "step": 54290 }, { "epoch": 1.9732538701940547, "grad_norm": 1.0083026885986328, "learning_rate": 4.1667140801877433e-05, "loss": 0.3471, "step": 54300 }, { "epoch": 1.973617268696853, "grad_norm": 4.210540771484375, "learning_rate": 4.1663272641241056e-05, "loss": 0.111, "step": 54310 }, { "epoch": 1.9739806671996512, "grad_norm": 0.47457021474838257, "learning_rate": 4.165940376264354e-05, "loss": 0.1304, "step": 54320 }, { "epoch": 1.9743440657024494, "grad_norm": 0.6626879572868347, "learning_rate": 4.1655534166251596e-05, "loss": 0.1362, "step": 54330 }, { "epoch": 1.9747074642052476, "grad_norm": 1.0823551416397095, "learning_rate": 4.1651663852231946e-05, "loss": 0.1009, "step": 54340 }, { "epoch": 1.9750708627080455, "grad_norm": 1.6723361015319824, "learning_rate": 4.164779282075134e-05, "loss": 0.1539, "step": 54350 }, { "epoch": 1.9754342612108438, "grad_norm": 1.5842360258102417, "learning_rate": 4.1643921071976584e-05, "loss": 0.1342, "step": 54360 }, { "epoch": 1.975797659713642, "grad_norm": 1.055336594581604, "learning_rate": 4.164004860607448e-05, "loss": 0.176, "step": 54370 }, { "epoch": 1.9761610582164402, "grad_norm": 0.81571364402771, "learning_rate": 4.16361754232119e-05, "loss": 0.1187, "step": 54380 }, { "epoch": 1.9765244567192384, "grad_norm": 1.0346819162368774, "learning_rate": 4.1632301523555693e-05, "loss": 0.1255, "step": 54390 }, { "epoch": 1.9768878552220364, "grad_norm": 1.1211163997650146, "learning_rate": 4.162842690727281e-05, "loss": 0.1165, "step": 54400 }, { "epoch": 1.9772512537248348, "grad_norm": 0.5160552263259888, "learning_rate": 4.162455157453017e-05, "loss": 0.1393, "step": 54410 }, { "epoch": 1.9776146522276328, "grad_norm": 0.767784833908081, "learning_rate": 4.1620675525494746e-05, "loss": 0.1552, "step": 54420 }, { "epoch": 1.977978050730431, "grad_norm": 1.101317286491394, "learning_rate": 4.1616798760333554e-05, "loss": 0.1182, "step": 54430 }, { "epoch": 1.9783414492332292, "grad_norm": 0.7279396653175354, "learning_rate": 4.161292127921363e-05, "loss": 0.12, "step": 54440 }, { "epoch": 1.9787048477360272, "grad_norm": 1.5998153686523438, "learning_rate": 4.1609043082302036e-05, "loss": 0.1335, "step": 54450 }, { "epoch": 1.9790682462388256, "grad_norm": 0.8245583772659302, "learning_rate": 4.160516416976587e-05, "loss": 0.1249, "step": 54460 }, { "epoch": 1.9794316447416236, "grad_norm": 0.5749397277832031, "learning_rate": 4.1601284541772255e-05, "loss": 0.1939, "step": 54470 }, { "epoch": 1.9797950432444218, "grad_norm": 0.7786006927490234, "learning_rate": 4.159740419848837e-05, "loss": 1.8059, "step": 54480 }, { "epoch": 1.98015844174722, "grad_norm": 0.41233259439468384, "learning_rate": 4.159352314008138e-05, "loss": 0.1208, "step": 54490 }, { "epoch": 1.9805218402500182, "grad_norm": 0.5091323256492615, "learning_rate": 4.158964136671852e-05, "loss": 0.1279, "step": 54500 }, { "epoch": 1.9808852387528164, "grad_norm": 4.300207138061523, "learning_rate": 4.158575887856704e-05, "loss": 0.1744, "step": 54510 }, { "epoch": 1.9812486372556144, "grad_norm": 0.7447227239608765, "learning_rate": 4.1581875675794226e-05, "loss": 0.1652, "step": 54520 }, { "epoch": 1.9816120357584128, "grad_norm": 0.6846696734428406, "learning_rate": 4.157799175856738e-05, "loss": 0.1027, "step": 54530 }, { "epoch": 1.9819754342612108, "grad_norm": 0.8642467260360718, "learning_rate": 4.157410712705386e-05, "loss": 0.1165, "step": 54540 }, { "epoch": 1.982338832764009, "grad_norm": 0.8407902121543884, "learning_rate": 4.157022178142104e-05, "loss": 0.1623, "step": 54550 }, { "epoch": 1.9827022312668072, "grad_norm": 0.8839777708053589, "learning_rate": 4.156633572183631e-05, "loss": 0.1131, "step": 54560 }, { "epoch": 1.9830656297696052, "grad_norm": 1.39069402217865, "learning_rate": 4.1562448948467126e-05, "loss": 0.1906, "step": 54570 }, { "epoch": 1.9834290282724036, "grad_norm": 2.1196155548095703, "learning_rate": 4.1558561461480936e-05, "loss": 0.1261, "step": 54580 }, { "epoch": 1.9837924267752016, "grad_norm": 1.092934250831604, "learning_rate": 4.155467326104525e-05, "loss": 0.1029, "step": 54590 }, { "epoch": 1.9841558252779998, "grad_norm": 0.7902958989143372, "learning_rate": 4.1550784347327607e-05, "loss": 0.1159, "step": 54600 }, { "epoch": 1.9841558252779998, "eval_loss": 0.3433511555194855, "eval_runtime": 180.0868, "eval_samples_per_second": 41.169, "eval_steps_per_second": 5.148, "eval_wer": 0.16009221776462687, "step": 54600 }, { "epoch": 1.984519223780798, "grad_norm": 1.3083094358444214, "learning_rate": 4.1546894720495546e-05, "loss": 0.1172, "step": 54610 }, { "epoch": 1.9848826222835962, "grad_norm": 1.9061583280563354, "learning_rate": 4.154300438071666e-05, "loss": 0.1335, "step": 54620 }, { "epoch": 1.9852460207863944, "grad_norm": 1.9469786882400513, "learning_rate": 4.153911332815859e-05, "loss": 0.1014, "step": 54630 }, { "epoch": 1.9856094192891924, "grad_norm": 6.232102394104004, "learning_rate": 4.153522156298896e-05, "loss": 0.1216, "step": 54640 }, { "epoch": 1.9859728177919909, "grad_norm": 0.6339765191078186, "learning_rate": 4.153132908537547e-05, "loss": 0.1236, "step": 54650 }, { "epoch": 1.9863362162947888, "grad_norm": 0.9476169943809509, "learning_rate": 4.152743589548582e-05, "loss": 0.0962, "step": 54660 }, { "epoch": 1.986699614797587, "grad_norm": 1.0691879987716675, "learning_rate": 4.152354199348777e-05, "loss": 0.3789, "step": 54670 }, { "epoch": 1.9870630133003853, "grad_norm": 0.9338876605033875, "learning_rate": 4.1519647379549084e-05, "loss": 0.0914, "step": 54680 }, { "epoch": 1.9874264118031832, "grad_norm": 0.6754772663116455, "learning_rate": 4.151575205383758e-05, "loss": 0.1044, "step": 54690 }, { "epoch": 1.9877898103059817, "grad_norm": 0.6961863040924072, "learning_rate": 4.151185601652107e-05, "loss": 0.1322, "step": 54700 }, { "epoch": 1.9881532088087797, "grad_norm": 1.1425034999847412, "learning_rate": 4.150795926776744e-05, "loss": 0.1381, "step": 54710 }, { "epoch": 1.9885166073115779, "grad_norm": 1.4080971479415894, "learning_rate": 4.150406180774458e-05, "loss": 0.1234, "step": 54720 }, { "epoch": 1.988880005814376, "grad_norm": 0.7941197752952576, "learning_rate": 4.1500163636620414e-05, "loss": 0.0903, "step": 54730 }, { "epoch": 1.989243404317174, "grad_norm": 0.8813301920890808, "learning_rate": 4.149626475456291e-05, "loss": 0.0965, "step": 54740 }, { "epoch": 1.9896068028199725, "grad_norm": 0.727293848991394, "learning_rate": 4.1492365161740054e-05, "loss": 0.1269, "step": 54750 }, { "epoch": 1.9899702013227705, "grad_norm": 3.7548305988311768, "learning_rate": 4.148846485831986e-05, "loss": 0.0992, "step": 54760 }, { "epoch": 1.9903335998255687, "grad_norm": 0.5141910910606384, "learning_rate": 4.148456384447037e-05, "loss": 0.1275, "step": 54770 }, { "epoch": 1.9906969983283669, "grad_norm": 0.5424654483795166, "learning_rate": 4.1480662120359696e-05, "loss": 0.6733, "step": 54780 }, { "epoch": 1.991060396831165, "grad_norm": 0.8342083096504211, "learning_rate": 4.147675968615592e-05, "loss": 0.1126, "step": 54790 }, { "epoch": 1.9914237953339633, "grad_norm": 0.3992403745651245, "learning_rate": 4.147285654202719e-05, "loss": 0.1589, "step": 54800 }, { "epoch": 1.9917871938367613, "grad_norm": 0.9092950820922852, "learning_rate": 4.146895268814169e-05, "loss": 0.1217, "step": 54810 }, { "epoch": 1.9921505923395597, "grad_norm": 0.4327254295349121, "learning_rate": 4.1465048124667605e-05, "loss": 0.1615, "step": 54820 }, { "epoch": 1.9925139908423577, "grad_norm": 1.1109565496444702, "learning_rate": 4.146114285177319e-05, "loss": 0.1965, "step": 54830 }, { "epoch": 1.992877389345156, "grad_norm": 10.526979446411133, "learning_rate": 4.145723686962669e-05, "loss": 0.2047, "step": 54840 }, { "epoch": 1.993240787847954, "grad_norm": 1.4240983724594116, "learning_rate": 4.1453330178396415e-05, "loss": 0.1261, "step": 54850 }, { "epoch": 1.993604186350752, "grad_norm": 3.436688184738159, "learning_rate": 4.144942277825068e-05, "loss": 0.1194, "step": 54860 }, { "epoch": 1.9939675848535505, "grad_norm": 0.3504880666732788, "learning_rate": 4.1445514669357846e-05, "loss": 0.1269, "step": 54870 }, { "epoch": 1.9943309833563485, "grad_norm": 1.8600322008132935, "learning_rate": 4.14416058518863e-05, "loss": 0.1202, "step": 54880 }, { "epoch": 1.9946943818591467, "grad_norm": 0.7843186259269714, "learning_rate": 4.1437696326004456e-05, "loss": 0.1047, "step": 54890 }, { "epoch": 1.995057780361945, "grad_norm": 1.593837022781372, "learning_rate": 4.1433786091880765e-05, "loss": 0.1269, "step": 54900 }, { "epoch": 1.9954211788647431, "grad_norm": 1.9453426599502563, "learning_rate": 4.14298751496837e-05, "loss": 0.1054, "step": 54910 }, { "epoch": 1.9957845773675413, "grad_norm": 0.7861382365226746, "learning_rate": 4.142596349958177e-05, "loss": 0.1467, "step": 54920 }, { "epoch": 1.9961479758703393, "grad_norm": 0.9338520169258118, "learning_rate": 4.142205114174352e-05, "loss": 0.1014, "step": 54930 }, { "epoch": 1.9965113743731377, "grad_norm": 3.8717129230499268, "learning_rate": 4.1418138076337516e-05, "loss": 0.1426, "step": 54940 }, { "epoch": 1.9968747728759357, "grad_norm": 0.579759418964386, "learning_rate": 4.141422430353236e-05, "loss": 0.1154, "step": 54950 }, { "epoch": 1.997238171378734, "grad_norm": 1.129913091659546, "learning_rate": 4.141030982349668e-05, "loss": 0.1019, "step": 54960 }, { "epoch": 1.9976015698815321, "grad_norm": 0.5852164626121521, "learning_rate": 4.140639463639913e-05, "loss": 0.1719, "step": 54970 }, { "epoch": 1.9979649683843301, "grad_norm": 3.1367127895355225, "learning_rate": 4.1402478742408415e-05, "loss": 0.0909, "step": 54980 }, { "epoch": 1.9983283668871286, "grad_norm": 0.5207622051239014, "learning_rate": 4.1398562141693253e-05, "loss": 0.1212, "step": 54990 }, { "epoch": 1.9986917653899265, "grad_norm": 0.5118950605392456, "learning_rate": 4.1394644834422394e-05, "loss": 0.1217, "step": 55000 }, { "epoch": 1.9990551638927248, "grad_norm": 0.45482707023620605, "learning_rate": 4.1390726820764614e-05, "loss": 0.0986, "step": 55010 }, { "epoch": 1.999418562395523, "grad_norm": 1.9805399179458618, "learning_rate": 4.138680810088875e-05, "loss": 0.1356, "step": 55020 }, { "epoch": 1.999781960898321, "grad_norm": 1.0094414949417114, "learning_rate": 4.138288867496362e-05, "loss": 0.1751, "step": 55030 }, { "epoch": 2.0001453594011194, "grad_norm": 1.6492732763290405, "learning_rate": 4.1378968543158106e-05, "loss": 0.1792, "step": 55040 }, { "epoch": 2.0005087579039174, "grad_norm": 6.960714340209961, "learning_rate": 4.137504770564111e-05, "loss": 0.1707, "step": 55050 }, { "epoch": 2.000872156406716, "grad_norm": 0.483518123626709, "learning_rate": 4.1371126162581576e-05, "loss": 0.1043, "step": 55060 }, { "epoch": 2.0012355549095138, "grad_norm": 0.5076984763145447, "learning_rate": 4.1367203914148464e-05, "loss": 0.1356, "step": 55070 }, { "epoch": 2.0015989534123118, "grad_norm": 2.341773509979248, "learning_rate": 4.136328096051077e-05, "loss": 0.1096, "step": 55080 }, { "epoch": 2.00196235191511, "grad_norm": 0.5860946178436279, "learning_rate": 4.135935730183752e-05, "loss": 0.1076, "step": 55090 }, { "epoch": 2.002325750417908, "grad_norm": 0.4653785824775696, "learning_rate": 4.1355432938297774e-05, "loss": 0.1517, "step": 55100 }, { "epoch": 2.0026891489207066, "grad_norm": 1.198096513748169, "learning_rate": 4.135150787006061e-05, "loss": 0.369, "step": 55110 }, { "epoch": 2.0030525474235046, "grad_norm": 1.07427978515625, "learning_rate": 4.134758209729516e-05, "loss": 0.1476, "step": 55120 }, { "epoch": 2.0034159459263026, "grad_norm": 0.7984631657600403, "learning_rate": 4.134365562017055e-05, "loss": 0.0972, "step": 55130 }, { "epoch": 2.003779344429101, "grad_norm": 1.2470594644546509, "learning_rate": 4.133972843885598e-05, "loss": 0.0884, "step": 55140 }, { "epoch": 2.004142742931899, "grad_norm": 0.6046581268310547, "learning_rate": 4.133580055352064e-05, "loss": 0.2083, "step": 55150 }, { "epoch": 2.0045061414346974, "grad_norm": 0.8026099801063538, "learning_rate": 4.133187196433379e-05, "loss": 0.1278, "step": 55160 }, { "epoch": 2.0048695399374954, "grad_norm": 0.6957481503486633, "learning_rate": 4.132794267146467e-05, "loss": 0.1106, "step": 55170 }, { "epoch": 2.005232938440294, "grad_norm": 1.2208986282348633, "learning_rate": 4.13240126750826e-05, "loss": 0.1058, "step": 55180 }, { "epoch": 2.005596336943092, "grad_norm": 0.9665369391441345, "learning_rate": 4.132008197535692e-05, "loss": 0.1195, "step": 55190 }, { "epoch": 2.00595973544589, "grad_norm": 1.0869636535644531, "learning_rate": 4.131615057245696e-05, "loss": 0.1004, "step": 55200 }, { "epoch": 2.00595973544589, "eval_loss": 0.3372127115726471, "eval_runtime": 180.3164, "eval_samples_per_second": 41.117, "eval_steps_per_second": 5.141, "eval_wer": 0.16414035979450686, "step": 55200 }, { "epoch": 2.006323133948688, "grad_norm": 1.0461617708206177, "learning_rate": 4.131221846655212e-05, "loss": 0.1003, "step": 55210 }, { "epoch": 2.006686532451486, "grad_norm": 1.1234357357025146, "learning_rate": 4.130828565781183e-05, "loss": 0.131, "step": 55220 }, { "epoch": 2.0070499309542846, "grad_norm": 0.792592465877533, "learning_rate": 4.1304352146405544e-05, "loss": 0.1236, "step": 55230 }, { "epoch": 2.0074133294570826, "grad_norm": 2.0296480655670166, "learning_rate": 4.130041793250273e-05, "loss": 0.1162, "step": 55240 }, { "epoch": 2.0077767279598806, "grad_norm": 0.8490334153175354, "learning_rate": 4.12964830162729e-05, "loss": 0.0891, "step": 55250 }, { "epoch": 2.008140126462679, "grad_norm": 2.996204376220703, "learning_rate": 4.129254739788561e-05, "loss": 0.088, "step": 55260 }, { "epoch": 2.008503524965477, "grad_norm": 0.785502016544342, "learning_rate": 4.128861107751041e-05, "loss": 2.8838, "step": 55270 }, { "epoch": 2.0088669234682754, "grad_norm": 0.9276618957519531, "learning_rate": 4.128467405531693e-05, "loss": 0.1125, "step": 55280 }, { "epoch": 2.0092303219710734, "grad_norm": 0.6827619671821594, "learning_rate": 4.128073633147477e-05, "loss": 0.1538, "step": 55290 }, { "epoch": 2.009593720473872, "grad_norm": 0.5531404614448547, "learning_rate": 4.1276797906153614e-05, "loss": 0.1451, "step": 55300 }, { "epoch": 2.00995711897667, "grad_norm": 1.3195756673812866, "learning_rate": 4.127285877952315e-05, "loss": 0.0831, "step": 55310 }, { "epoch": 2.010320517479468, "grad_norm": 1.291306734085083, "learning_rate": 4.12689189517531e-05, "loss": 0.1168, "step": 55320 }, { "epoch": 2.0106839159822663, "grad_norm": 0.7740198373794556, "learning_rate": 4.126497842301322e-05, "loss": 0.1293, "step": 55330 }, { "epoch": 2.0110473144850642, "grad_norm": 0.619372546672821, "learning_rate": 4.126103719347329e-05, "loss": 0.1151, "step": 55340 }, { "epoch": 2.0114107129878627, "grad_norm": 0.6809590458869934, "learning_rate": 4.1257095263303114e-05, "loss": 0.0808, "step": 55350 }, { "epoch": 2.0117741114906607, "grad_norm": 0.7653446197509766, "learning_rate": 4.125315263267255e-05, "loss": 0.0847, "step": 55360 }, { "epoch": 2.0121375099934586, "grad_norm": 0.7010202407836914, "learning_rate": 4.124920930175148e-05, "loss": 0.1856, "step": 55370 }, { "epoch": 2.012500908496257, "grad_norm": 0.8760896921157837, "learning_rate": 4.1245265270709786e-05, "loss": 0.0966, "step": 55380 }, { "epoch": 2.012864306999055, "grad_norm": 0.8872328400611877, "learning_rate": 4.124132053971741e-05, "loss": 0.1225, "step": 55390 }, { "epoch": 2.0132277055018535, "grad_norm": 0.7111076712608337, "learning_rate": 4.123737510894433e-05, "loss": 0.0917, "step": 55400 }, { "epoch": 2.0135911040046515, "grad_norm": 0.2959582209587097, "learning_rate": 4.1233428978560515e-05, "loss": 0.084, "step": 55410 }, { "epoch": 2.0139545025074495, "grad_norm": 0.5472272038459778, "learning_rate": 4.122948214873602e-05, "loss": 0.1165, "step": 55420 }, { "epoch": 2.014317901010248, "grad_norm": 1.7232263088226318, "learning_rate": 4.1225534619640874e-05, "loss": 0.1483, "step": 55430 }, { "epoch": 2.014681299513046, "grad_norm": 0.6070485711097717, "learning_rate": 4.1221586391445164e-05, "loss": 0.1181, "step": 55440 }, { "epoch": 2.0150446980158443, "grad_norm": 0.42631739377975464, "learning_rate": 4.121763746431903e-05, "loss": 0.2435, "step": 55450 }, { "epoch": 2.0154080965186423, "grad_norm": 0.4716903865337372, "learning_rate": 4.1213687838432594e-05, "loss": 0.102, "step": 55460 }, { "epoch": 2.0157714950214407, "grad_norm": 1.0024840831756592, "learning_rate": 4.120973751395604e-05, "loss": 2.0817, "step": 55470 }, { "epoch": 2.0161348935242387, "grad_norm": 0.6983594298362732, "learning_rate": 4.1205786491059565e-05, "loss": 0.1091, "step": 55480 }, { "epoch": 2.0164982920270367, "grad_norm": 3.18595814704895, "learning_rate": 4.1201834769913405e-05, "loss": 0.1334, "step": 55490 }, { "epoch": 2.016861690529835, "grad_norm": 1.0065993070602417, "learning_rate": 4.119788235068785e-05, "loss": 0.0893, "step": 55500 }, { "epoch": 2.017225089032633, "grad_norm": 1.9013348817825317, "learning_rate": 4.119392923355315e-05, "loss": 0.1055, "step": 55510 }, { "epoch": 2.0175884875354315, "grad_norm": 0.727342963218689, "learning_rate": 4.118997541867968e-05, "loss": 0.1577, "step": 55520 }, { "epoch": 2.0179518860382295, "grad_norm": 1.3305946588516235, "learning_rate": 4.118602090623777e-05, "loss": 0.1156, "step": 55530 }, { "epoch": 2.0183152845410275, "grad_norm": 166.5440673828125, "learning_rate": 4.11820656963978e-05, "loss": 3.13, "step": 55540 }, { "epoch": 2.018678683043826, "grad_norm": 1.1718511581420898, "learning_rate": 4.11781097893302e-05, "loss": 1.2695, "step": 55550 }, { "epoch": 2.019042081546624, "grad_norm": 0.7488642930984497, "learning_rate": 4.117415318520541e-05, "loss": 0.1179, "step": 55560 }, { "epoch": 2.0194054800494223, "grad_norm": 0.8934155702590942, "learning_rate": 4.117019588419391e-05, "loss": 0.0957, "step": 55570 }, { "epoch": 2.0197688785522203, "grad_norm": 1.2470290660858154, "learning_rate": 4.11662378864662e-05, "loss": 0.0974, "step": 55580 }, { "epoch": 2.0201322770550187, "grad_norm": 2.387202501296997, "learning_rate": 4.116227919219282e-05, "loss": 0.2065, "step": 55590 }, { "epoch": 2.0204956755578167, "grad_norm": 0.9765509963035583, "learning_rate": 4.115831980154434e-05, "loss": 0.083, "step": 55600 }, { "epoch": 2.0208590740606147, "grad_norm": 1.544554591178894, "learning_rate": 4.115435971469135e-05, "loss": 0.1067, "step": 55610 }, { "epoch": 2.021222472563413, "grad_norm": 1.8516936302185059, "learning_rate": 4.1150398931804465e-05, "loss": 0.1292, "step": 55620 }, { "epoch": 2.021585871066211, "grad_norm": 1.211599349975586, "learning_rate": 4.114643745305437e-05, "loss": 0.098, "step": 55630 }, { "epoch": 2.0219492695690096, "grad_norm": 0.8160383105278015, "learning_rate": 4.114247527861173e-05, "loss": 0.0919, "step": 55640 }, { "epoch": 2.0223126680718075, "grad_norm": 0.8116459846496582, "learning_rate": 4.1138512408647256e-05, "loss": 0.0929, "step": 55650 }, { "epoch": 2.0226760665746055, "grad_norm": 0.9536616206169128, "learning_rate": 4.113454884333171e-05, "loss": 0.1269, "step": 55660 }, { "epoch": 2.023039465077404, "grad_norm": 0.6211200952529907, "learning_rate": 4.113058458283586e-05, "loss": 0.1285, "step": 55670 }, { "epoch": 2.023402863580202, "grad_norm": 1.3393282890319824, "learning_rate": 4.112661962733052e-05, "loss": 0.1211, "step": 55680 }, { "epoch": 2.0237662620830004, "grad_norm": 0.9137499928474426, "learning_rate": 4.1122653976986514e-05, "loss": 0.1492, "step": 55690 }, { "epoch": 2.0241296605857984, "grad_norm": 8.595315933227539, "learning_rate": 4.1118687631974705e-05, "loss": 0.0813, "step": 55700 }, { "epoch": 2.0244930590885963, "grad_norm": 8.519613265991211, "learning_rate": 4.111472059246601e-05, "loss": 0.0971, "step": 55710 }, { "epoch": 2.0248564575913948, "grad_norm": 0.906406819820404, "learning_rate": 4.111075285863133e-05, "loss": 0.1068, "step": 55720 }, { "epoch": 2.0252198560941927, "grad_norm": 0.6413214206695557, "learning_rate": 4.1106784430641634e-05, "loss": 0.0904, "step": 55730 }, { "epoch": 2.025583254596991, "grad_norm": 1.054943561553955, "learning_rate": 4.110281530866791e-05, "loss": 0.1087, "step": 55740 }, { "epoch": 2.025946653099789, "grad_norm": 0.686661958694458, "learning_rate": 4.1098845492881164e-05, "loss": 0.1022, "step": 55750 }, { "epoch": 2.0263100516025876, "grad_norm": 1.9529190063476562, "learning_rate": 4.109487498345245e-05, "loss": 0.1089, "step": 55760 }, { "epoch": 2.0266734501053856, "grad_norm": 0.5279061198234558, "learning_rate": 4.109090378055284e-05, "loss": 0.1115, "step": 55770 }, { "epoch": 2.0270368486081836, "grad_norm": 1.3651883602142334, "learning_rate": 4.108693188435343e-05, "loss": 0.1206, "step": 55780 }, { "epoch": 2.027400247110982, "grad_norm": 0.9911472201347351, "learning_rate": 4.108295929502536e-05, "loss": 0.1235, "step": 55790 }, { "epoch": 2.02776364561378, "grad_norm": 1.1165162324905396, "learning_rate": 4.107898601273981e-05, "loss": 0.0944, "step": 55800 }, { "epoch": 2.02776364561378, "eval_loss": 0.342909038066864, "eval_runtime": 179.8346, "eval_samples_per_second": 41.227, "eval_steps_per_second": 5.155, "eval_wer": 0.16486648392542705, "step": 55800 }, { "epoch": 2.0281270441165784, "grad_norm": 0.5809179544448853, "learning_rate": 4.107501203766795e-05, "loss": 0.0912, "step": 55810 }, { "epoch": 2.0284904426193764, "grad_norm": 0.5710409283638, "learning_rate": 4.1071037369981025e-05, "loss": 0.1165, "step": 55820 }, { "epoch": 2.0288538411221744, "grad_norm": 1.5615267753601074, "learning_rate": 4.1067062009850276e-05, "loss": 0.1126, "step": 55830 }, { "epoch": 2.029217239624973, "grad_norm": 0.5823513269424438, "learning_rate": 4.106308595744699e-05, "loss": 0.1649, "step": 55840 }, { "epoch": 2.029580638127771, "grad_norm": 1.1584099531173706, "learning_rate": 4.105910921294249e-05, "loss": 3.2733, "step": 55850 }, { "epoch": 2.029944036630569, "grad_norm": 3.6284244060516357, "learning_rate": 4.105513177650811e-05, "loss": 0.1123, "step": 55860 }, { "epoch": 2.030307435133367, "grad_norm": 0.2692999839782715, "learning_rate": 4.105115364831522e-05, "loss": 0.1022, "step": 55870 }, { "epoch": 2.0306708336361656, "grad_norm": 0.7533041834831238, "learning_rate": 4.1047174828535236e-05, "loss": 0.1051, "step": 55880 }, { "epoch": 2.0310342321389636, "grad_norm": 1.988377332687378, "learning_rate": 4.104319531733958e-05, "loss": 0.1112, "step": 55890 }, { "epoch": 2.0313976306417616, "grad_norm": 1.1997753381729126, "learning_rate": 4.103921511489972e-05, "loss": 0.1166, "step": 55900 }, { "epoch": 2.03176102914456, "grad_norm": 0.9296682476997375, "learning_rate": 4.1035234221387154e-05, "loss": 0.0914, "step": 55910 }, { "epoch": 2.032124427647358, "grad_norm": 1.5428096055984497, "learning_rate": 4.1031252636973394e-05, "loss": 0.1383, "step": 55920 }, { "epoch": 2.0324878261501564, "grad_norm": 1.2502493858337402, "learning_rate": 4.1027270361829995e-05, "loss": 0.0876, "step": 55930 }, { "epoch": 2.0328512246529544, "grad_norm": 0.9851539731025696, "learning_rate": 4.102328739612855e-05, "loss": 0.1441, "step": 55940 }, { "epoch": 2.0332146231557524, "grad_norm": 0.6906759738922119, "learning_rate": 4.101930374004066e-05, "loss": 0.0894, "step": 55950 }, { "epoch": 2.033578021658551, "grad_norm": 0.6129600405693054, "learning_rate": 4.101531939373796e-05, "loss": 0.0935, "step": 55960 }, { "epoch": 2.033941420161349, "grad_norm": 0.7423244118690491, "learning_rate": 4.101133435739214e-05, "loss": 0.0996, "step": 55970 }, { "epoch": 2.0343048186641473, "grad_norm": 0.9887922406196594, "learning_rate": 4.100734863117489e-05, "loss": 0.1159, "step": 55980 }, { "epoch": 2.0346682171669452, "grad_norm": 0.701602041721344, "learning_rate": 4.100336221525794e-05, "loss": 0.1278, "step": 55990 }, { "epoch": 2.0350316156697437, "grad_norm": 1.088302493095398, "learning_rate": 4.099937510981304e-05, "loss": 0.0966, "step": 56000 }, { "epoch": 2.0353950141725417, "grad_norm": 1.022271990776062, "learning_rate": 4.099538731501201e-05, "loss": 0.1055, "step": 56010 }, { "epoch": 2.0357584126753396, "grad_norm": 1.5955030918121338, "learning_rate": 4.099139883102664e-05, "loss": 0.1654, "step": 56020 }, { "epoch": 2.036121811178138, "grad_norm": 1.2459834814071655, "learning_rate": 4.0987409658028805e-05, "loss": 0.1179, "step": 56030 }, { "epoch": 2.036485209680936, "grad_norm": 2.4748172760009766, "learning_rate": 4.098341979619036e-05, "loss": 0.1511, "step": 56040 }, { "epoch": 2.0368486081837345, "grad_norm": 1.0897467136383057, "learning_rate": 4.097942924568323e-05, "loss": 0.1033, "step": 56050 }, { "epoch": 2.0372120066865325, "grad_norm": 0.9883999228477478, "learning_rate": 4.097543800667935e-05, "loss": 0.0879, "step": 56060 }, { "epoch": 2.0375754051893304, "grad_norm": 0.9798412919044495, "learning_rate": 4.097144607935068e-05, "loss": 0.9193, "step": 56070 }, { "epoch": 2.037938803692129, "grad_norm": 0.8659210801124573, "learning_rate": 4.0967453463869233e-05, "loss": 0.1121, "step": 56080 }, { "epoch": 2.038302202194927, "grad_norm": 0.825116753578186, "learning_rate": 4.096346016040703e-05, "loss": 0.1276, "step": 56090 }, { "epoch": 2.0386656006977253, "grad_norm": 0.6097813844680786, "learning_rate": 4.0959466169136115e-05, "loss": 0.1045, "step": 56100 }, { "epoch": 2.0390289992005233, "grad_norm": 2.39689564704895, "learning_rate": 4.0955471490228604e-05, "loss": 0.1286, "step": 56110 }, { "epoch": 2.0393923977033213, "grad_norm": 0.4547784626483917, "learning_rate": 4.095147612385658e-05, "loss": 0.1266, "step": 56120 }, { "epoch": 2.0397557962061197, "grad_norm": 1.1596136093139648, "learning_rate": 4.094748007019221e-05, "loss": 0.1022, "step": 56130 }, { "epoch": 2.0401191947089177, "grad_norm": 1.1768062114715576, "learning_rate": 4.094348332940767e-05, "loss": 0.1214, "step": 56140 }, { "epoch": 2.040482593211716, "grad_norm": 0.6755580902099609, "learning_rate": 4.0939485901675153e-05, "loss": 0.1078, "step": 56150 }, { "epoch": 2.040845991714514, "grad_norm": 0.7698992490768433, "learning_rate": 4.0935487787166914e-05, "loss": 0.0923, "step": 56160 }, { "epoch": 2.0412093902173125, "grad_norm": 0.6949880123138428, "learning_rate": 4.093148898605519e-05, "loss": 0.1065, "step": 56170 }, { "epoch": 2.0415727887201105, "grad_norm": 0.5093110203742981, "learning_rate": 4.09274894985123e-05, "loss": 0.1358, "step": 56180 }, { "epoch": 2.0419361872229085, "grad_norm": 0.8544941544532776, "learning_rate": 4.092348932471055e-05, "loss": 0.1215, "step": 56190 }, { "epoch": 2.042299585725707, "grad_norm": 0.8509432673454285, "learning_rate": 4.091948846482231e-05, "loss": 0.1024, "step": 56200 }, { "epoch": 2.042662984228505, "grad_norm": 0.5665140151977539, "learning_rate": 4.091548691901995e-05, "loss": 0.089, "step": 56210 }, { "epoch": 2.0430263827313033, "grad_norm": 0.9858969449996948, "learning_rate": 4.0911484687475886e-05, "loss": 0.1107, "step": 56220 }, { "epoch": 2.0433897812341013, "grad_norm": 0.8955181241035461, "learning_rate": 4.0907481770362556e-05, "loss": 0.113, "step": 56230 }, { "epoch": 2.0437531797368993, "grad_norm": 0.9829466938972473, "learning_rate": 4.090347816785244e-05, "loss": 0.1631, "step": 56240 }, { "epoch": 2.0441165782396977, "grad_norm": 0.5513119101524353, "learning_rate": 4.089947388011803e-05, "loss": 0.0959, "step": 56250 }, { "epoch": 2.0444799767424957, "grad_norm": 0.4322792887687683, "learning_rate": 4.089546890733187e-05, "loss": 0.0982, "step": 56260 }, { "epoch": 2.044843375245294, "grad_norm": 1.2782713174819946, "learning_rate": 4.0891463249666504e-05, "loss": 0.1112, "step": 56270 }, { "epoch": 2.045206773748092, "grad_norm": 0.9792034029960632, "learning_rate": 4.088745690729453e-05, "loss": 0.0975, "step": 56280 }, { "epoch": 2.0455701722508906, "grad_norm": 2.263601064682007, "learning_rate": 4.088344988038857e-05, "loss": 0.1418, "step": 56290 }, { "epoch": 2.0459335707536885, "grad_norm": 1.011856198310852, "learning_rate": 4.087944216912126e-05, "loss": 0.1022, "step": 56300 }, { "epoch": 2.0462969692564865, "grad_norm": 1.0281102657318115, "learning_rate": 4.0875433773665286e-05, "loss": 0.0925, "step": 56310 }, { "epoch": 2.046660367759285, "grad_norm": 0.5272021889686584, "learning_rate": 4.087142469419336e-05, "loss": 0.1297, "step": 56320 }, { "epoch": 2.047023766262083, "grad_norm": 0.9789879322052002, "learning_rate": 4.0867414930878224e-05, "loss": 0.0946, "step": 56330 }, { "epoch": 2.0473871647648814, "grad_norm": 0.8782264590263367, "learning_rate": 4.086340448389262e-05, "loss": 0.1246, "step": 56340 }, { "epoch": 2.0477505632676793, "grad_norm": 0.66651850938797, "learning_rate": 4.0859393353409364e-05, "loss": 0.0898, "step": 56350 }, { "epoch": 2.0481139617704773, "grad_norm": 1.6209585666656494, "learning_rate": 4.085538153960128e-05, "loss": 0.1016, "step": 56360 }, { "epoch": 2.0484773602732758, "grad_norm": 1.2438985109329224, "learning_rate": 4.085136904264121e-05, "loss": 0.1419, "step": 56370 }, { "epoch": 2.0488407587760737, "grad_norm": 0.7311316728591919, "learning_rate": 4.0847355862702055e-05, "loss": 0.102, "step": 56380 }, { "epoch": 2.049204157278872, "grad_norm": 1.2164160013198853, "learning_rate": 4.084334199995672e-05, "loss": 0.0936, "step": 56390 }, { "epoch": 2.04956755578167, "grad_norm": 0.746296226978302, "learning_rate": 4.083932745457815e-05, "loss": 0.1268, "step": 56400 }, { "epoch": 2.04956755578167, "eval_loss": 0.31779325008392334, "eval_runtime": 178.9992, "eval_samples_per_second": 41.419, "eval_steps_per_second": 5.179, "eval_wer": 0.15986530397371432, "step": 56400 }, { "epoch": 2.049930954284468, "grad_norm": 0.94898521900177, "learning_rate": 4.083531222673931e-05, "loss": 0.1344, "step": 56410 }, { "epoch": 2.0502943527872666, "grad_norm": 0.5653538703918457, "learning_rate": 4.083129631661322e-05, "loss": 0.1026, "step": 56420 }, { "epoch": 2.0506577512900646, "grad_norm": 0.6599397659301758, "learning_rate": 4.0827279724372884e-05, "loss": 0.0923, "step": 56430 }, { "epoch": 2.051021149792863, "grad_norm": 2.0571577548980713, "learning_rate": 4.082326245019139e-05, "loss": 0.1894, "step": 56440 }, { "epoch": 2.051384548295661, "grad_norm": 1.0707124471664429, "learning_rate": 4.081924449424182e-05, "loss": 0.1098, "step": 56450 }, { "epoch": 2.0517479467984594, "grad_norm": 0.450382798910141, "learning_rate": 4.081522585669728e-05, "loss": 0.0928, "step": 56460 }, { "epoch": 2.0521113453012574, "grad_norm": 0.9119880795478821, "learning_rate": 4.081120653773093e-05, "loss": 0.1102, "step": 56470 }, { "epoch": 2.0524747438040554, "grad_norm": 0.39113524556159973, "learning_rate": 4.080718653751595e-05, "loss": 0.1048, "step": 56480 }, { "epoch": 2.052838142306854, "grad_norm": 1.069718837738037, "learning_rate": 4.080316585622554e-05, "loss": 0.1165, "step": 56490 }, { "epoch": 2.053201540809652, "grad_norm": 1.0856863260269165, "learning_rate": 4.0799144494032936e-05, "loss": 0.0966, "step": 56500 }, { "epoch": 2.05356493931245, "grad_norm": 0.9092361927032471, "learning_rate": 4.079512245111142e-05, "loss": 0.1041, "step": 56510 }, { "epoch": 2.053928337815248, "grad_norm": 0.6025703549385071, "learning_rate": 4.079109972763428e-05, "loss": 0.1167, "step": 56520 }, { "epoch": 2.054291736318046, "grad_norm": 2.7288074493408203, "learning_rate": 4.078707632377483e-05, "loss": 0.1092, "step": 56530 }, { "epoch": 2.0546551348208446, "grad_norm": 1.0255563259124756, "learning_rate": 4.078305223970643e-05, "loss": 0.13, "step": 56540 }, { "epoch": 2.0550185333236426, "grad_norm": 0.556890070438385, "learning_rate": 4.0779429982609526e-05, "loss": 1.9074, "step": 56550 }, { "epoch": 2.055381931826441, "grad_norm": 0.8369362950325012, "learning_rate": 4.077540460662182e-05, "loss": 0.0925, "step": 56560 }, { "epoch": 2.055745330329239, "grad_norm": 0.6321738958358765, "learning_rate": 4.0771378550928064e-05, "loss": 0.1274, "step": 56570 }, { "epoch": 2.0561087288320374, "grad_norm": 2.1743392944335938, "learning_rate": 4.076735181570172e-05, "loss": 0.1147, "step": 56580 }, { "epoch": 2.0564721273348354, "grad_norm": 0.35284000635147095, "learning_rate": 4.076332440111629e-05, "loss": 0.2721, "step": 56590 }, { "epoch": 2.0568355258376334, "grad_norm": 0.4702494740486145, "learning_rate": 4.0759296307345285e-05, "loss": 0.0972, "step": 56600 }, { "epoch": 2.057198924340432, "grad_norm": 0.5263124704360962, "learning_rate": 4.075526753456229e-05, "loss": 0.1004, "step": 56610 }, { "epoch": 2.05756232284323, "grad_norm": 0.5063189268112183, "learning_rate": 4.0751238082940864e-05, "loss": 0.1254, "step": 56620 }, { "epoch": 2.0579257213460282, "grad_norm": 0.8294627070426941, "learning_rate": 4.074720795265463e-05, "loss": 0.1067, "step": 56630 }, { "epoch": 2.0582891198488262, "grad_norm": 0.8625883460044861, "learning_rate": 4.0743177143877244e-05, "loss": 0.1176, "step": 56640 }, { "epoch": 2.058652518351624, "grad_norm": 0.7036715745925903, "learning_rate": 4.073914565678236e-05, "loss": 0.0807, "step": 56650 }, { "epoch": 2.0590159168544226, "grad_norm": 1.748412013053894, "learning_rate": 4.07351134915437e-05, "loss": 0.0877, "step": 56660 }, { "epoch": 2.0593793153572206, "grad_norm": 1.1321426630020142, "learning_rate": 4.0731080648334975e-05, "loss": 0.097, "step": 56670 }, { "epoch": 2.059742713860019, "grad_norm": 2.1829307079315186, "learning_rate": 4.0727047127329964e-05, "loss": 0.1159, "step": 56680 }, { "epoch": 2.060106112362817, "grad_norm": 1.0120956897735596, "learning_rate": 4.0723012928702443e-05, "loss": 0.1096, "step": 56690 }, { "epoch": 2.060469510865615, "grad_norm": 0.6774507761001587, "learning_rate": 4.071897805262624e-05, "loss": 0.0925, "step": 56700 }, { "epoch": 2.0608329093684135, "grad_norm": 0.7925111651420593, "learning_rate": 4.07149424992752e-05, "loss": 0.0944, "step": 56710 }, { "epoch": 2.0611963078712114, "grad_norm": 0.41430070996284485, "learning_rate": 4.07109062688232e-05, "loss": 0.1082, "step": 56720 }, { "epoch": 2.06155970637401, "grad_norm": 0.5457510948181152, "learning_rate": 4.070686936144415e-05, "loss": 0.1724, "step": 56730 }, { "epoch": 2.061923104876808, "grad_norm": 1.1867283582687378, "learning_rate": 4.070283177731199e-05, "loss": 0.121, "step": 56740 }, { "epoch": 2.0622865033796063, "grad_norm": 0.5466375946998596, "learning_rate": 4.0698793516600676e-05, "loss": 0.0729, "step": 56750 }, { "epoch": 2.0626499018824043, "grad_norm": 0.802174985408783, "learning_rate": 4.0694754579484204e-05, "loss": 0.0983, "step": 56760 }, { "epoch": 2.0630133003852023, "grad_norm": 0.37869808077812195, "learning_rate": 4.06907149661366e-05, "loss": 0.1225, "step": 56770 }, { "epoch": 2.0633766988880007, "grad_norm": 1.0356521606445312, "learning_rate": 4.068667467673192e-05, "loss": 0.1046, "step": 56780 }, { "epoch": 2.0637400973907987, "grad_norm": 1.9794261455535889, "learning_rate": 4.068263371144423e-05, "loss": 0.1497, "step": 56790 }, { "epoch": 2.064103495893597, "grad_norm": 0.7426532506942749, "learning_rate": 4.067859207044766e-05, "loss": 0.1153, "step": 56800 }, { "epoch": 2.064466894396395, "grad_norm": 0.4637458622455597, "learning_rate": 4.0674549753916344e-05, "loss": 0.0885, "step": 56810 }, { "epoch": 2.064830292899193, "grad_norm": 0.44504934549331665, "learning_rate": 4.067050676202445e-05, "loss": 0.1483, "step": 56820 }, { "epoch": 2.0651936914019915, "grad_norm": 0.8600061535835266, "learning_rate": 4.066646309494617e-05, "loss": 0.113, "step": 56830 }, { "epoch": 2.0655570899047895, "grad_norm": 1.6054418087005615, "learning_rate": 4.0662418752855746e-05, "loss": 0.1626, "step": 56840 }, { "epoch": 2.065920488407588, "grad_norm": 0.9366486072540283, "learning_rate": 4.0658373735927415e-05, "loss": 0.1035, "step": 56850 }, { "epoch": 2.066283886910386, "grad_norm": 0.9057123064994812, "learning_rate": 4.065432804433548e-05, "loss": 0.0976, "step": 56860 }, { "epoch": 2.0666472854131843, "grad_norm": 0.7718061804771423, "learning_rate": 4.065028167825424e-05, "loss": 1.6687, "step": 56870 }, { "epoch": 2.0670106839159823, "grad_norm": 1.0670592784881592, "learning_rate": 4.064623463785805e-05, "loss": 0.0958, "step": 56880 }, { "epoch": 2.0673740824187803, "grad_norm": 0.6497521996498108, "learning_rate": 4.064218692332128e-05, "loss": 0.1301, "step": 56890 }, { "epoch": 2.0677374809215787, "grad_norm": 0.5239264369010925, "learning_rate": 4.063813853481833e-05, "loss": 0.0843, "step": 56900 }, { "epoch": 2.0681008794243767, "grad_norm": 0.7263264060020447, "learning_rate": 4.0634089472523626e-05, "loss": 0.0928, "step": 56910 }, { "epoch": 2.068464277927175, "grad_norm": 0.6024682521820068, "learning_rate": 4.063003973661164e-05, "loss": 0.1402, "step": 56920 }, { "epoch": 2.068827676429973, "grad_norm": 0.8949540853500366, "learning_rate": 4.0625989327256855e-05, "loss": 0.1171, "step": 56930 }, { "epoch": 2.069191074932771, "grad_norm": 0.9099026322364807, "learning_rate": 4.062193824463378e-05, "loss": 0.1184, "step": 56940 }, { "epoch": 2.0695544734355695, "grad_norm": 8.538558959960938, "learning_rate": 4.0617886488916976e-05, "loss": 0.0981, "step": 56950 }, { "epoch": 2.0699178719383675, "grad_norm": 0.8870179653167725, "learning_rate": 4.061383406028101e-05, "loss": 0.0796, "step": 56960 }, { "epoch": 2.070281270441166, "grad_norm": 0.8997694253921509, "learning_rate": 4.060978095890049e-05, "loss": 0.1289, "step": 56970 }, { "epoch": 2.070644668943964, "grad_norm": 1.018744707107544, "learning_rate": 4.060572718495004e-05, "loss": 0.1006, "step": 56980 }, { "epoch": 2.071008067446762, "grad_norm": 0.5158216953277588, "learning_rate": 4.0601672738604346e-05, "loss": 0.161, "step": 56990 }, { "epoch": 2.0713714659495603, "grad_norm": 1.025295615196228, "learning_rate": 4.059761762003807e-05, "loss": 0.086, "step": 57000 }, { "epoch": 2.0713714659495603, "eval_loss": 0.34686627984046936, "eval_runtime": 180.463, "eval_samples_per_second": 41.083, "eval_steps_per_second": 5.137, "eval_wer": 0.16041897362354093, "step": 57000 }, { "epoch": 2.0717348644523583, "grad_norm": 3.3658320903778076, "learning_rate": 4.0593561829425955e-05, "loss": 0.1124, "step": 57010 }, { "epoch": 2.0720982629551568, "grad_norm": 7.979375839233398, "learning_rate": 4.058950536694274e-05, "loss": 0.133, "step": 57020 }, { "epoch": 2.0724616614579547, "grad_norm": 0.7676217555999756, "learning_rate": 4.058544823276321e-05, "loss": 0.1096, "step": 57030 }, { "epoch": 2.072825059960753, "grad_norm": 0.6934232711791992, "learning_rate": 4.058139042706216e-05, "loss": 0.1132, "step": 57040 }, { "epoch": 2.073188458463551, "grad_norm": 0.9430510401725769, "learning_rate": 4.057733195001444e-05, "loss": 0.0998, "step": 57050 }, { "epoch": 2.073551856966349, "grad_norm": 3.497431993484497, "learning_rate": 4.057327280179491e-05, "loss": 0.089, "step": 57060 }, { "epoch": 2.0739152554691476, "grad_norm": 0.7105191349983215, "learning_rate": 4.056921298257847e-05, "loss": 0.1243, "step": 57070 }, { "epoch": 2.0742786539719456, "grad_norm": 7.004267692565918, "learning_rate": 4.0565152492540034e-05, "loss": 0.1229, "step": 57080 }, { "epoch": 2.074642052474744, "grad_norm": 0.42751577496528625, "learning_rate": 4.0561091331854555e-05, "loss": 0.1073, "step": 57090 }, { "epoch": 2.075005450977542, "grad_norm": 1.4434239864349365, "learning_rate": 4.055702950069702e-05, "loss": 0.1044, "step": 57100 }, { "epoch": 2.07536884948034, "grad_norm": 0.6759265661239624, "learning_rate": 4.055296699924244e-05, "loss": 0.089, "step": 57110 }, { "epoch": 2.0757322479831384, "grad_norm": 0.48018431663513184, "learning_rate": 4.0548903827665846e-05, "loss": 0.106, "step": 57120 }, { "epoch": 2.0760956464859364, "grad_norm": 1.510313630104065, "learning_rate": 4.054483998614231e-05, "loss": 0.1295, "step": 57130 }, { "epoch": 2.076459044988735, "grad_norm": 0.807949960231781, "learning_rate": 4.054077547484693e-05, "loss": 0.4319, "step": 57140 }, { "epoch": 2.0768224434915328, "grad_norm": 1.3790713548660278, "learning_rate": 4.0536710293954824e-05, "loss": 0.1006, "step": 57150 }, { "epoch": 2.077185841994331, "grad_norm": 0.507022500038147, "learning_rate": 4.0532644443641156e-05, "loss": 0.0751, "step": 57160 }, { "epoch": 2.077549240497129, "grad_norm": 1.7080292701721191, "learning_rate": 4.0528577924081104e-05, "loss": 0.1266, "step": 57170 }, { "epoch": 2.077912638999927, "grad_norm": 1.9344823360443115, "learning_rate": 4.052451073544987e-05, "loss": 0.094, "step": 57180 }, { "epoch": 2.0782760375027256, "grad_norm": 1.0933985710144043, "learning_rate": 4.0520442877922715e-05, "loss": 0.1295, "step": 57190 }, { "epoch": 2.0786394360055236, "grad_norm": 0.6466109752655029, "learning_rate": 4.05163743516749e-05, "loss": 0.108, "step": 57200 }, { "epoch": 2.079002834508322, "grad_norm": 0.5679341554641724, "learning_rate": 4.051230515688171e-05, "loss": 0.1205, "step": 57210 }, { "epoch": 2.07936623301112, "grad_norm": 0.5203921794891357, "learning_rate": 4.0508235293718495e-05, "loss": 0.1202, "step": 57220 }, { "epoch": 2.079729631513918, "grad_norm": 4.9159393310546875, "learning_rate": 4.050416476236059e-05, "loss": 0.0901, "step": 57230 }, { "epoch": 2.0800930300167164, "grad_norm": 0.7785301208496094, "learning_rate": 4.05000935629834e-05, "loss": 0.0885, "step": 57240 }, { "epoch": 2.0804564285195144, "grad_norm": 1.5235596895217896, "learning_rate": 4.049602169576232e-05, "loss": 0.1163, "step": 57250 }, { "epoch": 2.080819827022313, "grad_norm": 0.7558295726776123, "learning_rate": 4.0491949160872805e-05, "loss": 0.0969, "step": 57260 }, { "epoch": 2.081183225525111, "grad_norm": 0.8465888500213623, "learning_rate": 4.048787595849032e-05, "loss": 0.1061, "step": 57270 }, { "epoch": 2.081546624027909, "grad_norm": 1.5089519023895264, "learning_rate": 4.048380208879037e-05, "loss": 0.0918, "step": 57280 }, { "epoch": 2.0819100225307072, "grad_norm": 0.5132701992988586, "learning_rate": 4.047972755194847e-05, "loss": 0.0971, "step": 57290 }, { "epoch": 2.082273421033505, "grad_norm": 2.1400113105773926, "learning_rate": 4.047565234814019e-05, "loss": 0.0934, "step": 57300 }, { "epoch": 2.0826368195363036, "grad_norm": 0.6013107299804688, "learning_rate": 4.047157647754112e-05, "loss": 0.1349, "step": 57310 }, { "epoch": 2.0830002180391016, "grad_norm": 2.676640272140503, "learning_rate": 4.046749994032686e-05, "loss": 0.13, "step": 57320 }, { "epoch": 2.0833636165419, "grad_norm": 0.9156673550605774, "learning_rate": 4.046342273667306e-05, "loss": 0.1068, "step": 57330 }, { "epoch": 2.083727015044698, "grad_norm": 1.0060288906097412, "learning_rate": 4.04593448667554e-05, "loss": 0.1091, "step": 57340 }, { "epoch": 2.084090413547496, "grad_norm": 2.746476650238037, "learning_rate": 4.0455266330749567e-05, "loss": 0.1001, "step": 57350 }, { "epoch": 2.0844538120502945, "grad_norm": 1.1911275386810303, "learning_rate": 4.04511871288313e-05, "loss": 0.0981, "step": 57360 }, { "epoch": 2.0848172105530924, "grad_norm": 10.354631423950195, "learning_rate": 4.044710726117636e-05, "loss": 0.1354, "step": 57370 }, { "epoch": 2.085180609055891, "grad_norm": 1.2562741041183472, "learning_rate": 4.044302672796053e-05, "loss": 0.0962, "step": 57380 }, { "epoch": 2.085544007558689, "grad_norm": 0.48360708355903625, "learning_rate": 4.043894552935962e-05, "loss": 0.1203, "step": 57390 }, { "epoch": 2.085907406061487, "grad_norm": 1.9491641521453857, "learning_rate": 4.043486366554948e-05, "loss": 0.0984, "step": 57400 }, { "epoch": 2.0862708045642853, "grad_norm": 0.48460692167282104, "learning_rate": 4.0430781136705975e-05, "loss": 0.0984, "step": 57410 }, { "epoch": 2.0866342030670832, "grad_norm": 0.9770491719245911, "learning_rate": 4.042669794300502e-05, "loss": 0.1173, "step": 57420 }, { "epoch": 2.0869976015698817, "grad_norm": 0.4919109642505646, "learning_rate": 4.042261408462255e-05, "loss": 0.1162, "step": 57430 }, { "epoch": 2.0873610000726797, "grad_norm": 0.555167019367218, "learning_rate": 4.0418529561734495e-05, "loss": 0.1137, "step": 57440 }, { "epoch": 2.087724398575478, "grad_norm": 0.8190045356750488, "learning_rate": 4.041444437451687e-05, "loss": 0.0972, "step": 57450 }, { "epoch": 2.088087797078276, "grad_norm": 0.5673350691795349, "learning_rate": 4.041035852314568e-05, "loss": 0.0985, "step": 57460 }, { "epoch": 2.088451195581074, "grad_norm": 2.584392547607422, "learning_rate": 4.040627200779697e-05, "loss": 0.1159, "step": 57470 }, { "epoch": 2.0888145940838725, "grad_norm": 3.240104913711548, "learning_rate": 4.040218482864682e-05, "loss": 0.0886, "step": 57480 }, { "epoch": 2.0891779925866705, "grad_norm": 1.0577195882797241, "learning_rate": 4.039809698587132e-05, "loss": 0.1079, "step": 57490 }, { "epoch": 2.089541391089469, "grad_norm": 1.1150219440460205, "learning_rate": 4.039400847964661e-05, "loss": 0.091, "step": 57500 }, { "epoch": 2.089904789592267, "grad_norm": 0.42998915910720825, "learning_rate": 4.038991931014885e-05, "loss": 0.1038, "step": 57510 }, { "epoch": 2.090268188095065, "grad_norm": 1.2772380113601685, "learning_rate": 4.0385829477554216e-05, "loss": 0.1114, "step": 57520 }, { "epoch": 2.0906315865978633, "grad_norm": 0.6975306868553162, "learning_rate": 4.0381738982038944e-05, "loss": 0.1107, "step": 57530 }, { "epoch": 2.0909949851006613, "grad_norm": 0.5228861570358276, "learning_rate": 4.0377647823779257e-05, "loss": 0.1217, "step": 57540 }, { "epoch": 2.0913583836034597, "grad_norm": 0.8819922208786011, "learning_rate": 4.0373556002951444e-05, "loss": 0.1149, "step": 57550 }, { "epoch": 2.0917217821062577, "grad_norm": 0.47613778710365295, "learning_rate": 4.036946351973181e-05, "loss": 0.1037, "step": 57560 }, { "epoch": 2.0920851806090557, "grad_norm": 1.3058334589004517, "learning_rate": 4.0365370374296666e-05, "loss": 0.1195, "step": 57570 }, { "epoch": 2.092448579111854, "grad_norm": 0.9610320329666138, "learning_rate": 4.0361276566822383e-05, "loss": 0.1286, "step": 57580 }, { "epoch": 2.092811977614652, "grad_norm": 0.9065276980400085, "learning_rate": 4.035718209748536e-05, "loss": 0.1146, "step": 57590 }, { "epoch": 2.0931753761174505, "grad_norm": 1.189386248588562, "learning_rate": 4.0353086966461984e-05, "loss": 0.0853, "step": 57600 }, { "epoch": 2.0931753761174505, "eval_loss": 0.35443732142448425, "eval_runtime": 179.8419, "eval_samples_per_second": 41.225, "eval_steps_per_second": 5.155, "eval_wer": 0.16203459981483834, "step": 57600 }, { "epoch": 2.0935387746202485, "grad_norm": 1.9661606550216675, "learning_rate": 4.034899117392873e-05, "loss": 0.0915, "step": 57610 }, { "epoch": 2.093902173123047, "grad_norm": 1.0128490924835205, "learning_rate": 4.0344894720062055e-05, "loss": 2.6837, "step": 57620 }, { "epoch": 2.094265571625845, "grad_norm": 0.9373286962509155, "learning_rate": 4.0340797605038464e-05, "loss": 0.1149, "step": 57630 }, { "epoch": 2.094628970128643, "grad_norm": 0.7361924052238464, "learning_rate": 4.033669982903449e-05, "loss": 0.1473, "step": 57640 }, { "epoch": 2.0949923686314413, "grad_norm": 0.6584343314170837, "learning_rate": 4.0332601392226673e-05, "loss": 0.0983, "step": 57650 }, { "epoch": 2.0953557671342393, "grad_norm": 3.030869960784912, "learning_rate": 4.0328502294791634e-05, "loss": 0.0874, "step": 57660 }, { "epoch": 2.0957191656370378, "grad_norm": 0.4622768759727478, "learning_rate": 4.0324402536905964e-05, "loss": 0.122, "step": 57670 }, { "epoch": 2.0960825641398357, "grad_norm": 0.7545061111450195, "learning_rate": 4.0320302118746314e-05, "loss": 0.1077, "step": 57680 }, { "epoch": 2.0964459626426337, "grad_norm": 1.838789939880371, "learning_rate": 4.0316201040489355e-05, "loss": 0.1814, "step": 57690 }, { "epoch": 2.096809361145432, "grad_norm": 0.5931621789932251, "learning_rate": 4.031209930231179e-05, "loss": 0.1053, "step": 57700 }, { "epoch": 2.09717275964823, "grad_norm": 0.698026180267334, "learning_rate": 4.0307996904390336e-05, "loss": 0.0843, "step": 57710 }, { "epoch": 2.0975361581510286, "grad_norm": 0.663277804851532, "learning_rate": 4.030389384690177e-05, "loss": 0.1109, "step": 57720 }, { "epoch": 2.0978995566538265, "grad_norm": 0.6599337458610535, "learning_rate": 4.0299790130022874e-05, "loss": 0.1007, "step": 57730 }, { "epoch": 2.098262955156625, "grad_norm": 0.5328543186187744, "learning_rate": 4.0295685753930454e-05, "loss": 0.1004, "step": 57740 }, { "epoch": 2.098626353659423, "grad_norm": 0.5420628190040588, "learning_rate": 4.029158071880136e-05, "loss": 0.0959, "step": 57750 }, { "epoch": 2.098989752162221, "grad_norm": 1.3125580549240112, "learning_rate": 4.028747502481245e-05, "loss": 0.0835, "step": 57760 }, { "epoch": 2.0993531506650194, "grad_norm": 0.6729845404624939, "learning_rate": 4.028336867214064e-05, "loss": 0.1596, "step": 57770 }, { "epoch": 2.0997165491678174, "grad_norm": 0.8784998655319214, "learning_rate": 4.0279261660962854e-05, "loss": 0.1261, "step": 57780 }, { "epoch": 2.100079947670616, "grad_norm": 0.8162268996238708, "learning_rate": 4.027515399145605e-05, "loss": 0.0996, "step": 57790 }, { "epoch": 2.1004433461734138, "grad_norm": 0.8188743591308594, "learning_rate": 4.02710456637972e-05, "loss": 0.1043, "step": 57800 }, { "epoch": 2.1008067446762118, "grad_norm": 2.6283457279205322, "learning_rate": 4.0266936678163333e-05, "loss": 0.1207, "step": 57810 }, { "epoch": 2.10117014317901, "grad_norm": 0.9076483249664307, "learning_rate": 4.0262827034731486e-05, "loss": 0.1283, "step": 57820 }, { "epoch": 2.101533541681808, "grad_norm": 1.4384301900863647, "learning_rate": 4.025871673367873e-05, "loss": 0.0942, "step": 57830 }, { "epoch": 2.1018969401846066, "grad_norm": 0.7651816010475159, "learning_rate": 4.025460577518215e-05, "loss": 0.1171, "step": 57840 }, { "epoch": 2.1022603386874046, "grad_norm": 1.075475811958313, "learning_rate": 4.025049415941889e-05, "loss": 0.1002, "step": 57850 }, { "epoch": 2.1026237371902026, "grad_norm": 0.5640189051628113, "learning_rate": 4.02463818865661e-05, "loss": 0.0797, "step": 57860 }, { "epoch": 2.102987135693001, "grad_norm": 2.052508592605591, "learning_rate": 4.024226895680097e-05, "loss": 0.114, "step": 57870 }, { "epoch": 2.103350534195799, "grad_norm": 0.8014973998069763, "learning_rate": 4.023815537030068e-05, "loss": 0.1304, "step": 57880 }, { "epoch": 2.1037139326985974, "grad_norm": 0.9665643572807312, "learning_rate": 4.02340411272425e-05, "loss": 0.1191, "step": 57890 }, { "epoch": 2.1040773312013954, "grad_norm": 1.0654706954956055, "learning_rate": 4.02299262278037e-05, "loss": 0.0836, "step": 57900 }, { "epoch": 2.104440729704194, "grad_norm": 1.1803388595581055, "learning_rate": 4.022581067216157e-05, "loss": 0.0988, "step": 57910 }, { "epoch": 2.104804128206992, "grad_norm": 0.5792093276977539, "learning_rate": 4.022169446049342e-05, "loss": 0.1177, "step": 57920 }, { "epoch": 2.10516752670979, "grad_norm": 0.9450294375419617, "learning_rate": 4.021757759297662e-05, "loss": 0.094, "step": 57930 }, { "epoch": 2.1055309252125882, "grad_norm": 0.5335323810577393, "learning_rate": 4.021346006978854e-05, "loss": 0.1358, "step": 57940 }, { "epoch": 2.105894323715386, "grad_norm": 1.1108689308166504, "learning_rate": 4.02093418911066e-05, "loss": 0.0964, "step": 57950 }, { "epoch": 2.1062577222181846, "grad_norm": 0.3482346534729004, "learning_rate": 4.020522305710823e-05, "loss": 0.0928, "step": 57960 }, { "epoch": 2.1066211207209826, "grad_norm": 0.6527045369148254, "learning_rate": 4.02011035679709e-05, "loss": 0.1033, "step": 57970 }, { "epoch": 2.1069845192237806, "grad_norm": 0.9047361612319946, "learning_rate": 4.019698342387211e-05, "loss": 0.0939, "step": 57980 }, { "epoch": 2.107347917726579, "grad_norm": 1.3960262537002563, "learning_rate": 4.019286262498937e-05, "loss": 0.1275, "step": 57990 }, { "epoch": 2.107711316229377, "grad_norm": 0.49838632345199585, "learning_rate": 4.0188741171500234e-05, "loss": 0.1133, "step": 58000 }, { "epoch": 2.1080747147321754, "grad_norm": 0.6651538014411926, "learning_rate": 4.0184619063582284e-05, "loss": 0.1361, "step": 58010 }, { "epoch": 2.1084381132349734, "grad_norm": 0.7778026461601257, "learning_rate": 4.018049630141313e-05, "loss": 0.117, "step": 58020 }, { "epoch": 2.108801511737772, "grad_norm": 1.0851924419403076, "learning_rate": 4.0176372885170396e-05, "loss": 0.096, "step": 58030 }, { "epoch": 2.10916491024057, "grad_norm": 0.5920321345329285, "learning_rate": 4.017224881503176e-05, "loss": 0.1812, "step": 58040 }, { "epoch": 2.109528308743368, "grad_norm": 1.2104512453079224, "learning_rate": 4.0168124091174896e-05, "loss": 0.1002, "step": 58050 }, { "epoch": 2.1098917072461663, "grad_norm": 0.8000385761260986, "learning_rate": 4.016399871377754e-05, "loss": 0.099, "step": 58060 }, { "epoch": 2.1102551057489642, "grad_norm": 0.9628605246543884, "learning_rate": 4.015987268301742e-05, "loss": 0.1322, "step": 58070 }, { "epoch": 2.1106185042517627, "grad_norm": 1.1031752824783325, "learning_rate": 4.015574599907235e-05, "loss": 0.1089, "step": 58080 }, { "epoch": 2.1109819027545607, "grad_norm": 0.7440558075904846, "learning_rate": 4.0151618662120084e-05, "loss": 0.1255, "step": 58090 }, { "epoch": 2.1113453012573586, "grad_norm": 0.7492482662200928, "learning_rate": 4.0147490672338494e-05, "loss": 0.0787, "step": 58100 }, { "epoch": 2.111708699760157, "grad_norm": 1.2699692249298096, "learning_rate": 4.0143362029905415e-05, "loss": 0.0835, "step": 58110 }, { "epoch": 2.112072098262955, "grad_norm": 0.5075403451919556, "learning_rate": 4.013923273499876e-05, "loss": 0.113, "step": 58120 }, { "epoch": 2.1124354967657535, "grad_norm": 0.47074371576309204, "learning_rate": 4.013510278779643e-05, "loss": 0.1045, "step": 58130 }, { "epoch": 2.1127988952685515, "grad_norm": 1.9055145978927612, "learning_rate": 4.013097218847636e-05, "loss": 0.1096, "step": 58140 }, { "epoch": 2.1131622937713495, "grad_norm": 0.8922753930091858, "learning_rate": 4.0126840937216545e-05, "loss": 0.1129, "step": 58150 }, { "epoch": 2.113525692274148, "grad_norm": 1.5678116083145142, "learning_rate": 4.012270903419497e-05, "loss": 0.1314, "step": 58160 }, { "epoch": 2.113889090776946, "grad_norm": 1.4676604270935059, "learning_rate": 4.0118576479589675e-05, "loss": 0.119, "step": 58170 }, { "epoch": 2.1142524892797443, "grad_norm": 1.0103446245193481, "learning_rate": 4.0114443273578714e-05, "loss": 0.1036, "step": 58180 }, { "epoch": 2.1146158877825423, "grad_norm": 0.5744931101799011, "learning_rate": 4.011030941634016e-05, "loss": 0.1493, "step": 58190 }, { "epoch": 2.1149792862853407, "grad_norm": 1.430180311203003, "learning_rate": 4.010617490805214e-05, "loss": 0.0928, "step": 58200 }, { "epoch": 2.1149792862853407, "eval_loss": 0.3442366421222687, "eval_runtime": 180.0152, "eval_samples_per_second": 41.185, "eval_steps_per_second": 5.15, "eval_wer": 0.16051881569154247, "step": 58200 }, { "epoch": 2.1153426847881387, "grad_norm": 0.7342690825462341, "learning_rate": 4.0102039748892786e-05, "loss": 0.0878, "step": 58210 }, { "epoch": 2.1157060832909367, "grad_norm": 1.540487289428711, "learning_rate": 4.0097903939040284e-05, "loss": 0.1158, "step": 58220 }, { "epoch": 2.116069481793735, "grad_norm": 0.9415495991706848, "learning_rate": 4.009376747867281e-05, "loss": 0.105, "step": 58230 }, { "epoch": 2.116432880296533, "grad_norm": 0.8002855181694031, "learning_rate": 4.008963036796861e-05, "loss": 0.0855, "step": 58240 }, { "epoch": 2.1167962787993315, "grad_norm": 0.7064021825790405, "learning_rate": 4.008549260710591e-05, "loss": 0.1319, "step": 58250 }, { "epoch": 2.1171596773021295, "grad_norm": 0.5867117047309875, "learning_rate": 4.008135419626302e-05, "loss": 0.1232, "step": 58260 }, { "epoch": 2.1175230758049275, "grad_norm": 0.7439972162246704, "learning_rate": 4.007721513561824e-05, "loss": 0.1359, "step": 58270 }, { "epoch": 2.117886474307726, "grad_norm": 0.9335612058639526, "learning_rate": 4.007307542534989e-05, "loss": 0.0763, "step": 58280 }, { "epoch": 2.118249872810524, "grad_norm": 0.6899220943450928, "learning_rate": 4.006893506563637e-05, "loss": 0.129, "step": 58290 }, { "epoch": 2.1186132713133223, "grad_norm": 0.9896695613861084, "learning_rate": 4.006479405665604e-05, "loss": 0.0756, "step": 58300 }, { "epoch": 2.1189766698161203, "grad_norm": 0.8844881057739258, "learning_rate": 4.0060652398587335e-05, "loss": 0.111, "step": 58310 }, { "epoch": 2.1193400683189187, "grad_norm": 0.3384082615375519, "learning_rate": 4.0056510091608706e-05, "loss": 0.1182, "step": 58320 }, { "epoch": 2.1197034668217167, "grad_norm": 1.8488768339157104, "learning_rate": 4.005236713589863e-05, "loss": 0.1045, "step": 58330 }, { "epoch": 2.1200668653245147, "grad_norm": 4.640181064605713, "learning_rate": 4.004822353163561e-05, "loss": 0.1085, "step": 58340 }, { "epoch": 2.120430263827313, "grad_norm": 2.7104008197784424, "learning_rate": 4.004407927899817e-05, "loss": 0.0996, "step": 58350 }, { "epoch": 2.120793662330111, "grad_norm": 0.8320967555046082, "learning_rate": 4.00399343781649e-05, "loss": 0.0887, "step": 58360 }, { "epoch": 2.1211570608329096, "grad_norm": 0.5715747475624084, "learning_rate": 4.003578882931436e-05, "loss": 0.0961, "step": 58370 }, { "epoch": 2.1215204593357075, "grad_norm": 1.0619550943374634, "learning_rate": 4.003164263262518e-05, "loss": 0.1733, "step": 58380 }, { "epoch": 2.1218838578385055, "grad_norm": 0.6880344748497009, "learning_rate": 4.0027495788275995e-05, "loss": 0.1722, "step": 58390 }, { "epoch": 2.122247256341304, "grad_norm": 0.503822922706604, "learning_rate": 4.0023348296445483e-05, "loss": 0.0772, "step": 58400 }, { "epoch": 2.122610654844102, "grad_norm": 0.4914768636226654, "learning_rate": 4.001920015731235e-05, "loss": 0.0752, "step": 58410 }, { "epoch": 2.1229740533469004, "grad_norm": 0.7141969799995422, "learning_rate": 4.001505137105532e-05, "loss": 0.1247, "step": 58420 }, { "epoch": 2.1233374518496984, "grad_norm": 1.7771844863891602, "learning_rate": 4.0010901937853164e-05, "loss": 0.123, "step": 58430 }, { "epoch": 2.1237008503524963, "grad_norm": 0.8636963367462158, "learning_rate": 4.0006751857884636e-05, "loss": 0.1224, "step": 58440 }, { "epoch": 2.1240642488552948, "grad_norm": 0.6579970121383667, "learning_rate": 4.000260113132857e-05, "loss": 0.0992, "step": 58450 }, { "epoch": 2.1244276473580928, "grad_norm": 0.5212269425392151, "learning_rate": 3.99984497583638e-05, "loss": 0.1097, "step": 58460 }, { "epoch": 2.124791045860891, "grad_norm": 0.44934549927711487, "learning_rate": 3.999429773916919e-05, "loss": 0.1304, "step": 58470 }, { "epoch": 2.125154444363689, "grad_norm": 0.7750062942504883, "learning_rate": 3.999014507392365e-05, "loss": 0.1233, "step": 58480 }, { "epoch": 2.1255178428664876, "grad_norm": 0.9064908623695374, "learning_rate": 3.9985991762806087e-05, "loss": 0.2681, "step": 58490 }, { "epoch": 2.1258812413692856, "grad_norm": 0.9376353025436401, "learning_rate": 3.998183780599546e-05, "loss": 0.0911, "step": 58500 }, { "epoch": 2.1262446398720836, "grad_norm": 1.2456096410751343, "learning_rate": 3.9977683203670755e-05, "loss": 0.1072, "step": 58510 }, { "epoch": 2.126608038374882, "grad_norm": 1.1492791175842285, "learning_rate": 3.997352795601096e-05, "loss": 0.1181, "step": 58520 }, { "epoch": 2.12697143687768, "grad_norm": 1.6713447570800781, "learning_rate": 3.996937206319513e-05, "loss": 0.1018, "step": 58530 }, { "epoch": 2.1273348353804784, "grad_norm": 1.8490865230560303, "learning_rate": 3.996521552540231e-05, "loss": 0.1267, "step": 58540 }, { "epoch": 2.1276982338832764, "grad_norm": 0.8250418305397034, "learning_rate": 3.9961058342811606e-05, "loss": 0.1118, "step": 58550 }, { "epoch": 2.1280616323860744, "grad_norm": 1.141861915588379, "learning_rate": 3.995690051560213e-05, "loss": 0.0958, "step": 58560 }, { "epoch": 2.128425030888873, "grad_norm": 0.9268454313278198, "learning_rate": 3.995274204395303e-05, "loss": 0.1196, "step": 58570 }, { "epoch": 2.128788429391671, "grad_norm": 0.6160836219787598, "learning_rate": 3.994858292804347e-05, "loss": 0.1017, "step": 58580 }, { "epoch": 2.129151827894469, "grad_norm": 0.9815055131912231, "learning_rate": 3.994442316805266e-05, "loss": 0.0977, "step": 58590 }, { "epoch": 2.129515226397267, "grad_norm": 0.887614369392395, "learning_rate": 3.994026276415983e-05, "loss": 0.0924, "step": 58600 }, { "epoch": 2.1298786249000656, "grad_norm": 1.7379142045974731, "learning_rate": 3.993610171654424e-05, "loss": 0.1115, "step": 58610 }, { "epoch": 2.1302420234028636, "grad_norm": 0.9149182438850403, "learning_rate": 3.993194002538516e-05, "loss": 0.4902, "step": 58620 }, { "epoch": 2.1306054219056616, "grad_norm": 0.4498516619205475, "learning_rate": 3.992777769086192e-05, "loss": 0.1172, "step": 58630 }, { "epoch": 2.13096882040846, "grad_norm": 0.8547645807266235, "learning_rate": 3.992361471315385e-05, "loss": 0.1816, "step": 58640 }, { "epoch": 2.131332218911258, "grad_norm": 0.6961509585380554, "learning_rate": 3.991945109244032e-05, "loss": 0.1024, "step": 58650 }, { "epoch": 2.1316956174140564, "grad_norm": 0.989095151424408, "learning_rate": 3.9915286828900725e-05, "loss": 0.0871, "step": 58660 }, { "epoch": 2.1320590159168544, "grad_norm": 0.6588122844696045, "learning_rate": 3.9911121922714496e-05, "loss": 0.2563, "step": 58670 }, { "epoch": 2.1324224144196524, "grad_norm": 0.6134093999862671, "learning_rate": 3.9906956374061075e-05, "loss": 0.1274, "step": 58680 }, { "epoch": 2.132785812922451, "grad_norm": 1.8236083984375, "learning_rate": 3.990279018311993e-05, "loss": 0.1083, "step": 58690 }, { "epoch": 2.133149211425249, "grad_norm": 0.8734591007232666, "learning_rate": 3.989862335007059e-05, "loss": 0.0925, "step": 58700 }, { "epoch": 2.1335126099280473, "grad_norm": 0.5155262351036072, "learning_rate": 3.9894455875092587e-05, "loss": 0.1428, "step": 58710 }, { "epoch": 2.1338760084308452, "grad_norm": 1.4302911758422852, "learning_rate": 3.989028775836546e-05, "loss": 0.1089, "step": 58720 }, { "epoch": 2.1342394069336432, "grad_norm": 1.1335387229919434, "learning_rate": 3.988611900006882e-05, "loss": 0.1031, "step": 58730 }, { "epoch": 2.1346028054364417, "grad_norm": 0.45461785793304443, "learning_rate": 3.988194960038228e-05, "loss": 0.1125, "step": 58740 }, { "epoch": 2.1349662039392396, "grad_norm": 2.3098812103271484, "learning_rate": 3.9877779559485484e-05, "loss": 0.6339, "step": 58750 }, { "epoch": 2.135329602442038, "grad_norm": 2.491065502166748, "learning_rate": 3.98736088775581e-05, "loss": 0.1011, "step": 58760 }, { "epoch": 2.135693000944836, "grad_norm": 2.0698654651641846, "learning_rate": 3.986943755477983e-05, "loss": 1.3794, "step": 58770 }, { "epoch": 2.1360563994476345, "grad_norm": 1.4950264692306519, "learning_rate": 3.9865265591330394e-05, "loss": 0.108, "step": 58780 }, { "epoch": 2.1364197979504325, "grad_norm": 0.3976856768131256, "learning_rate": 3.986109298738957e-05, "loss": 0.1407, "step": 58790 }, { "epoch": 2.1367831964532304, "grad_norm": 1.319399356842041, "learning_rate": 3.985691974313711e-05, "loss": 0.1168, "step": 58800 }, { "epoch": 2.1367831964532304, "eval_loss": 0.33521324396133423, "eval_runtime": 180.0938, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.16158984878464974, "step": 58800 }, { "epoch": 2.137146594956029, "grad_norm": 3.924207925796509, "learning_rate": 3.985274585875284e-05, "loss": 0.0899, "step": 58810 }, { "epoch": 2.137509993458827, "grad_norm": 0.7248135805130005, "learning_rate": 3.984857133441661e-05, "loss": 0.1256, "step": 58820 }, { "epoch": 2.1378733919616253, "grad_norm": 0.5945442914962769, "learning_rate": 3.984439617030826e-05, "loss": 0.0891, "step": 58830 }, { "epoch": 2.1382367904644233, "grad_norm": 2.0642237663269043, "learning_rate": 3.98402203666077e-05, "loss": 0.1359, "step": 58840 }, { "epoch": 2.1386001889672213, "grad_norm": 1.0051828622817993, "learning_rate": 3.983604392349485e-05, "loss": 0.099, "step": 58850 }, { "epoch": 2.1389635874700197, "grad_norm": 1.7241709232330322, "learning_rate": 3.983186684114965e-05, "loss": 0.1353, "step": 58860 }, { "epoch": 2.1393269859728177, "grad_norm": 0.6430028080940247, "learning_rate": 3.9827689119752076e-05, "loss": 0.6511, "step": 58870 }, { "epoch": 2.139690384475616, "grad_norm": 0.76287442445755, "learning_rate": 3.9823510759482134e-05, "loss": 0.1082, "step": 58880 }, { "epoch": 2.140053782978414, "grad_norm": 0.6280699372291565, "learning_rate": 3.981933176051986e-05, "loss": 0.114, "step": 58890 }, { "epoch": 2.1404171814812125, "grad_norm": 0.8308879733085632, "learning_rate": 3.9815152123045305e-05, "loss": 0.1072, "step": 58900 }, { "epoch": 2.1407805799840105, "grad_norm": 0.5416497588157654, "learning_rate": 3.981097184723856e-05, "loss": 0.1809, "step": 58910 }, { "epoch": 2.1411439784868085, "grad_norm": 0.5450316071510315, "learning_rate": 3.9806790933279745e-05, "loss": 0.1198, "step": 58920 }, { "epoch": 2.141507376989607, "grad_norm": 0.6177099347114563, "learning_rate": 3.980260938134898e-05, "loss": 0.0926, "step": 58930 }, { "epoch": 2.141870775492405, "grad_norm": 1.365262746810913, "learning_rate": 3.9798427191626455e-05, "loss": 0.0998, "step": 58940 }, { "epoch": 2.1422341739952033, "grad_norm": 0.4065784513950348, "learning_rate": 3.979424436429234e-05, "loss": 0.0958, "step": 58950 }, { "epoch": 2.1425975724980013, "grad_norm": 0.7803066372871399, "learning_rate": 3.979006089952688e-05, "loss": 0.0997, "step": 58960 }, { "epoch": 2.1429609710007993, "grad_norm": 0.41044801473617554, "learning_rate": 3.978587679751032e-05, "loss": 0.1265, "step": 58970 }, { "epoch": 2.1433243695035977, "grad_norm": 1.1145354509353638, "learning_rate": 3.9781692058422936e-05, "loss": 0.0965, "step": 58980 }, { "epoch": 2.1436877680063957, "grad_norm": 0.6286850571632385, "learning_rate": 3.977750668244504e-05, "loss": 0.1237, "step": 58990 }, { "epoch": 2.144051166509194, "grad_norm": 0.7701926827430725, "learning_rate": 3.977332066975695e-05, "loss": 0.0984, "step": 59000 }, { "epoch": 2.144414565011992, "grad_norm": 2.205230236053467, "learning_rate": 3.976913402053904e-05, "loss": 0.1007, "step": 59010 }, { "epoch": 2.14477796351479, "grad_norm": 0.7837009429931641, "learning_rate": 3.97649467349717e-05, "loss": 0.1412, "step": 59020 }, { "epoch": 2.1451413620175885, "grad_norm": 1.4856473207473755, "learning_rate": 3.9760758813235336e-05, "loss": 0.1069, "step": 59030 }, { "epoch": 2.1455047605203865, "grad_norm": 0.7916889190673828, "learning_rate": 3.975657025551039e-05, "loss": 0.1216, "step": 59040 }, { "epoch": 2.145868159023185, "grad_norm": 2.3275558948516846, "learning_rate": 3.975238106197734e-05, "loss": 0.0862, "step": 59050 }, { "epoch": 2.146231557525983, "grad_norm": 1.2247077226638794, "learning_rate": 3.974819123281668e-05, "loss": 0.09, "step": 59060 }, { "epoch": 2.1465949560287814, "grad_norm": 3.578880548477173, "learning_rate": 3.9744000768208926e-05, "loss": 0.1694, "step": 59070 }, { "epoch": 2.1469583545315793, "grad_norm": 0.7688897848129272, "learning_rate": 3.973980966833465e-05, "loss": 0.1064, "step": 59080 }, { "epoch": 2.1473217530343773, "grad_norm": 0.7360697388648987, "learning_rate": 3.973561793337441e-05, "loss": 0.1038, "step": 59090 }, { "epoch": 2.1476851515371758, "grad_norm": 1.5406807661056519, "learning_rate": 3.9731425563508826e-05, "loss": 0.0949, "step": 59100 }, { "epoch": 2.1480485500399737, "grad_norm": 1.3897796869277954, "learning_rate": 3.972723255891853e-05, "loss": 0.1097, "step": 59110 }, { "epoch": 2.148411948542772, "grad_norm": 0.9940290451049805, "learning_rate": 3.9723038919784176e-05, "loss": 0.1342, "step": 59120 }, { "epoch": 2.14877534704557, "grad_norm": 1.5705652236938477, "learning_rate": 3.971884464628647e-05, "loss": 0.1225, "step": 59130 }, { "epoch": 2.149138745548368, "grad_norm": 0.8528106212615967, "learning_rate": 3.971464973860611e-05, "loss": 0.1127, "step": 59140 }, { "epoch": 2.1495021440511666, "grad_norm": 0.5715293884277344, "learning_rate": 3.971045419692385e-05, "loss": 0.1089, "step": 59150 }, { "epoch": 2.1498655425539646, "grad_norm": 1.5109196901321411, "learning_rate": 3.970625802142046e-05, "loss": 0.0809, "step": 59160 }, { "epoch": 2.150228941056763, "grad_norm": 0.4277292788028717, "learning_rate": 3.9702061212276744e-05, "loss": 0.1368, "step": 59170 }, { "epoch": 2.150592339559561, "grad_norm": 0.692513644695282, "learning_rate": 3.969786376967351e-05, "loss": 0.1399, "step": 59180 }, { "epoch": 2.1509557380623594, "grad_norm": 3.4921178817749023, "learning_rate": 3.969366569379162e-05, "loss": 0.1315, "step": 59190 }, { "epoch": 2.1513191365651574, "grad_norm": 1.5540839433670044, "learning_rate": 3.9689466984811964e-05, "loss": 0.1015, "step": 59200 }, { "epoch": 2.1516825350679554, "grad_norm": 0.6076385378837585, "learning_rate": 3.9685267642915436e-05, "loss": 0.111, "step": 59210 }, { "epoch": 2.152045933570754, "grad_norm": 0.5078336596488953, "learning_rate": 3.968106766828298e-05, "loss": 0.1122, "step": 59220 }, { "epoch": 2.152409332073552, "grad_norm": 1.294973373413086, "learning_rate": 3.967686706109554e-05, "loss": 0.1202, "step": 59230 }, { "epoch": 2.15277273057635, "grad_norm": 0.5963008999824524, "learning_rate": 3.967350612002765e-05, "loss": 6.9715, "step": 59240 }, { "epoch": 2.153136129079148, "grad_norm": 0.9680716395378113, "learning_rate": 3.966930437469738e-05, "loss": 0.761, "step": 59250 }, { "epoch": 2.153499527581946, "grad_norm": 0.5637746453285217, "learning_rate": 3.966510199731898e-05, "loss": 0.127, "step": 59260 }, { "epoch": 2.1538629260847446, "grad_norm": 0.5631716251373291, "learning_rate": 3.9660898988073514e-05, "loss": 0.1065, "step": 59270 }, { "epoch": 2.1542263245875426, "grad_norm": 2.773534059524536, "learning_rate": 3.965669534714208e-05, "loss": 0.1039, "step": 59280 }, { "epoch": 2.154589723090341, "grad_norm": 0.5603722333908081, "learning_rate": 3.965249107470579e-05, "loss": 0.1243, "step": 59290 }, { "epoch": 2.154953121593139, "grad_norm": 0.8897901177406311, "learning_rate": 3.964828617094579e-05, "loss": 0.0867, "step": 59300 }, { "epoch": 2.155316520095937, "grad_norm": 0.9018154144287109, "learning_rate": 3.9644080636043255e-05, "loss": 0.1066, "step": 59310 }, { "epoch": 2.1556799185987354, "grad_norm": 1.247503399848938, "learning_rate": 3.963987447017939e-05, "loss": 0.1193, "step": 59320 }, { "epoch": 2.1560433171015334, "grad_norm": 0.5965039730072021, "learning_rate": 3.963566767353544e-05, "loss": 0.1065, "step": 59330 }, { "epoch": 2.156406715604332, "grad_norm": 0.6746231913566589, "learning_rate": 3.9631460246292616e-05, "loss": 0.1096, "step": 59340 }, { "epoch": 2.15677011410713, "grad_norm": 0.8131401538848877, "learning_rate": 3.9627252188632246e-05, "loss": 0.0903, "step": 59350 }, { "epoch": 2.1571335126099282, "grad_norm": 0.8984467387199402, "learning_rate": 3.962304350073562e-05, "loss": 0.1095, "step": 59360 }, { "epoch": 2.1574969111127262, "grad_norm": 0.7640008926391602, "learning_rate": 3.961883418278408e-05, "loss": 0.1255, "step": 59370 }, { "epoch": 2.157860309615524, "grad_norm": 0.522688627243042, "learning_rate": 3.961462423495899e-05, "loss": 0.1144, "step": 59380 }, { "epoch": 2.1582237081183226, "grad_norm": 0.4221755266189575, "learning_rate": 3.961041365744174e-05, "loss": 0.1031, "step": 59390 }, { "epoch": 2.1585871066211206, "grad_norm": 1.1756844520568848, "learning_rate": 3.960620245041374e-05, "loss": 0.1034, "step": 59400 }, { "epoch": 2.1585871066211206, "eval_loss": 0.33832496404647827, "eval_runtime": 179.7531, "eval_samples_per_second": 41.245, "eval_steps_per_second": 5.157, "eval_wer": 0.16059142810463448, "step": 59400 }, { "epoch": 2.158950505123919, "grad_norm": 0.6600112915039062, "learning_rate": 3.960199061405646e-05, "loss": 0.1055, "step": 59410 }, { "epoch": 2.159313903626717, "grad_norm": 0.6152768135070801, "learning_rate": 3.959777814855135e-05, "loss": 0.1349, "step": 59420 }, { "epoch": 2.159677302129515, "grad_norm": 0.9786444306373596, "learning_rate": 3.959356505407992e-05, "loss": 0.1021, "step": 59430 }, { "epoch": 2.1600407006323135, "grad_norm": 1.3649888038635254, "learning_rate": 3.9589351330823697e-05, "loss": 0.1002, "step": 59440 }, { "epoch": 2.1604040991351114, "grad_norm": 0.8674107789993286, "learning_rate": 3.958513697896423e-05, "loss": 0.0963, "step": 59450 }, { "epoch": 2.16076749763791, "grad_norm": 0.7542990446090698, "learning_rate": 3.9580921998683114e-05, "loss": 0.0837, "step": 59460 }, { "epoch": 2.161130896140708, "grad_norm": 1.032072901725769, "learning_rate": 3.957670639016194e-05, "loss": 0.1991, "step": 59470 }, { "epoch": 2.1614942946435063, "grad_norm": 0.5288215279579163, "learning_rate": 3.9572490153582354e-05, "loss": 0.0821, "step": 59480 }, { "epoch": 2.1618576931463043, "grad_norm": 1.010878562927246, "learning_rate": 3.956827328912602e-05, "loss": 0.1697, "step": 59490 }, { "epoch": 2.1622210916491023, "grad_norm": 0.9703467488288879, "learning_rate": 3.956405579697462e-05, "loss": 0.135, "step": 59500 }, { "epoch": 2.1625844901519007, "grad_norm": 0.8474395275115967, "learning_rate": 3.9559837677309874e-05, "loss": 0.0969, "step": 59510 }, { "epoch": 2.1629478886546987, "grad_norm": 0.6262643933296204, "learning_rate": 3.955561893031353e-05, "loss": 0.6284, "step": 59520 }, { "epoch": 2.163311287157497, "grad_norm": 1.7965657711029053, "learning_rate": 3.955139955616735e-05, "loss": 0.103, "step": 59530 }, { "epoch": 2.163674685660295, "grad_norm": 1.317929744720459, "learning_rate": 3.954717955505314e-05, "loss": 0.1266, "step": 59540 }, { "epoch": 2.164038084163093, "grad_norm": 2.5945920944213867, "learning_rate": 3.954295892715272e-05, "loss": 0.2541, "step": 59550 }, { "epoch": 2.1644014826658915, "grad_norm": 0.8854953050613403, "learning_rate": 3.9538737672647955e-05, "loss": 0.0872, "step": 59560 }, { "epoch": 2.1647648811686895, "grad_norm": 1.2449252605438232, "learning_rate": 3.953451579172069e-05, "loss": 0.1297, "step": 59570 }, { "epoch": 2.165128279671488, "grad_norm": 0.9489690661430359, "learning_rate": 3.9530293284552876e-05, "loss": 0.1213, "step": 59580 }, { "epoch": 2.165491678174286, "grad_norm": 1.2009365558624268, "learning_rate": 3.952607015132642e-05, "loss": 0.1116, "step": 59590 }, { "epoch": 2.165855076677084, "grad_norm": 2.0308213233947754, "learning_rate": 3.952184639222327e-05, "loss": 0.104, "step": 59600 }, { "epoch": 2.1662184751798823, "grad_norm": 0.9132998585700989, "learning_rate": 3.951762200742544e-05, "loss": 0.0821, "step": 59610 }, { "epoch": 2.1665818736826803, "grad_norm": 0.3481888473033905, "learning_rate": 3.951339699711493e-05, "loss": 0.1061, "step": 59620 }, { "epoch": 2.1669452721854787, "grad_norm": 1.2526309490203857, "learning_rate": 3.950917136147378e-05, "loss": 0.134, "step": 59630 }, { "epoch": 2.1673086706882767, "grad_norm": 1.3150311708450317, "learning_rate": 3.950494510068407e-05, "loss": 0.1387, "step": 59640 }, { "epoch": 2.167672069191075, "grad_norm": 0.6540773510932922, "learning_rate": 3.950071821492787e-05, "loss": 0.1038, "step": 59650 }, { "epoch": 2.168035467693873, "grad_norm": 0.7014539837837219, "learning_rate": 3.949649070438732e-05, "loss": 0.1047, "step": 59660 }, { "epoch": 2.168398866196671, "grad_norm": 1.7086548805236816, "learning_rate": 3.9492262569244566e-05, "loss": 0.1298, "step": 59670 }, { "epoch": 2.1687622646994695, "grad_norm": 0.5339615941047668, "learning_rate": 3.9488033809681785e-05, "loss": 0.0818, "step": 59680 }, { "epoch": 2.1691256632022675, "grad_norm": 1.4150161743164062, "learning_rate": 3.9483804425881167e-05, "loss": 0.0952, "step": 59690 }, { "epoch": 2.169489061705066, "grad_norm": 1.182112216949463, "learning_rate": 3.947957441802496e-05, "loss": 0.0855, "step": 59700 }, { "epoch": 2.169852460207864, "grad_norm": 22.265352249145508, "learning_rate": 3.94753437862954e-05, "loss": 0.2064, "step": 59710 }, { "epoch": 2.170215858710662, "grad_norm": 1.3365362882614136, "learning_rate": 3.9471112530874784e-05, "loss": 0.1314, "step": 59720 }, { "epoch": 2.1705792572134603, "grad_norm": 0.5914321541786194, "learning_rate": 3.946688065194543e-05, "loss": 0.1072, "step": 59730 }, { "epoch": 2.1709426557162583, "grad_norm": 1.0717413425445557, "learning_rate": 3.946264814968964e-05, "loss": 0.1144, "step": 59740 }, { "epoch": 2.1713060542190568, "grad_norm": 0.7842442393302917, "learning_rate": 3.945841502428981e-05, "loss": 0.0989, "step": 59750 }, { "epoch": 2.1716694527218547, "grad_norm": 0.4757680594921112, "learning_rate": 3.9454181275928315e-05, "loss": 0.0909, "step": 59760 }, { "epoch": 2.172032851224653, "grad_norm": 0.9192887544631958, "learning_rate": 3.944994690478758e-05, "loss": 2.1207, "step": 59770 }, { "epoch": 2.172396249727451, "grad_norm": 1.9832956790924072, "learning_rate": 3.9445711911050055e-05, "loss": 0.1235, "step": 59780 }, { "epoch": 2.172759648230249, "grad_norm": 12.941081047058105, "learning_rate": 3.944147629489819e-05, "loss": 0.3816, "step": 59790 }, { "epoch": 2.1731230467330476, "grad_norm": 1.5549241304397583, "learning_rate": 3.9437240056514504e-05, "loss": 0.109, "step": 59800 }, { "epoch": 2.1734864452358456, "grad_norm": 3.1633951663970947, "learning_rate": 3.9433003196081495e-05, "loss": 0.1156, "step": 59810 }, { "epoch": 2.173849843738644, "grad_norm": 1.274003505706787, "learning_rate": 3.9428765713781744e-05, "loss": 0.0984, "step": 59820 }, { "epoch": 2.174213242241442, "grad_norm": 0.5220558047294617, "learning_rate": 3.9424527609797825e-05, "loss": 0.1151, "step": 59830 }, { "epoch": 2.17457664074424, "grad_norm": 1.241507887840271, "learning_rate": 3.942028888431232e-05, "loss": 0.1219, "step": 59840 }, { "epoch": 2.1749400392470384, "grad_norm": 0.5816989541053772, "learning_rate": 3.9416049537507875e-05, "loss": 0.0976, "step": 59850 }, { "epoch": 2.1753034377498364, "grad_norm": 0.6653616428375244, "learning_rate": 3.941180956956715e-05, "loss": 0.1196, "step": 59860 }, { "epoch": 2.175666836252635, "grad_norm": 0.6018986105918884, "learning_rate": 3.940756898067283e-05, "loss": 0.1151, "step": 59870 }, { "epoch": 2.176030234755433, "grad_norm": 0.5224238038063049, "learning_rate": 3.940332777100762e-05, "loss": 0.0892, "step": 59880 }, { "epoch": 2.1763936332582308, "grad_norm": 0.7985048294067383, "learning_rate": 3.939908594075427e-05, "loss": 0.1244, "step": 59890 }, { "epoch": 2.176757031761029, "grad_norm": 1.0602693557739258, "learning_rate": 3.9394843490095535e-05, "loss": 0.107, "step": 59900 }, { "epoch": 2.177120430263827, "grad_norm": 0.789055347442627, "learning_rate": 3.939060041921421e-05, "loss": 0.1354, "step": 59910 }, { "epoch": 2.1774838287666256, "grad_norm": 0.27713751792907715, "learning_rate": 3.9386356728293123e-05, "loss": 0.1047, "step": 59920 }, { "epoch": 2.1778472272694236, "grad_norm": 1.9695335626602173, "learning_rate": 3.9382112417515106e-05, "loss": 0.0788, "step": 59930 }, { "epoch": 2.178210625772222, "grad_norm": 1.5898009538650513, "learning_rate": 3.937786748706304e-05, "loss": 0.1194, "step": 59940 }, { "epoch": 2.17857402427502, "grad_norm": 1.2933491468429565, "learning_rate": 3.937362193711981e-05, "loss": 0.0878, "step": 59950 }, { "epoch": 2.178937422777818, "grad_norm": 0.5345110297203064, "learning_rate": 3.9369375767868355e-05, "loss": 0.1, "step": 59960 }, { "epoch": 2.1793008212806164, "grad_norm": 0.5044030547142029, "learning_rate": 3.936512897949163e-05, "loss": 0.1144, "step": 59970 }, { "epoch": 2.1796642197834144, "grad_norm": 0.5815631151199341, "learning_rate": 3.9360881572172605e-05, "loss": 0.0789, "step": 59980 }, { "epoch": 2.180027618286213, "grad_norm": 0.8639971613883972, "learning_rate": 3.9356633546094297e-05, "loss": 0.0971, "step": 59990 }, { "epoch": 2.180391016789011, "grad_norm": 1.318261981010437, "learning_rate": 3.935238490143972e-05, "loss": 0.0979, "step": 60000 }, { "epoch": 2.180391016789011, "eval_loss": 0.339672327041626, "eval_runtime": 179.2051, "eval_samples_per_second": 41.372, "eval_steps_per_second": 5.173, "eval_wer": 0.1550910378129141, "step": 60000 }, { "epoch": 2.180754415291809, "grad_norm": 1.4749493598937988, "learning_rate": 3.934813563839195e-05, "loss": 0.0857, "step": 60010 }, { "epoch": 2.1811178137946072, "grad_norm": 0.6420970559120178, "learning_rate": 3.934388575713407e-05, "loss": 0.1378, "step": 60020 }, { "epoch": 2.181481212297405, "grad_norm": 2.692276954650879, "learning_rate": 3.9339635257849176e-05, "loss": 0.1229, "step": 60030 }, { "epoch": 2.1818446108002036, "grad_norm": 0.6107433438301086, "learning_rate": 3.9335384140720435e-05, "loss": 0.1196, "step": 60040 }, { "epoch": 2.1822080093030016, "grad_norm": 8.781155586242676, "learning_rate": 3.933113240593098e-05, "loss": 0.1229, "step": 60050 }, { "epoch": 2.1825714078058, "grad_norm": 2.4440197944641113, "learning_rate": 3.9326880053664026e-05, "loss": 0.1012, "step": 60060 }, { "epoch": 2.182934806308598, "grad_norm": 0.6593974828720093, "learning_rate": 3.932262708410279e-05, "loss": 0.0975, "step": 60070 }, { "epoch": 2.183298204811396, "grad_norm": 1.740123987197876, "learning_rate": 3.931837349743051e-05, "loss": 0.1086, "step": 60080 }, { "epoch": 2.1836616033141945, "grad_norm": 0.8486297130584717, "learning_rate": 3.9314119293830466e-05, "loss": 0.1325, "step": 60090 }, { "epoch": 2.1840250018169924, "grad_norm": 1.1630836725234985, "learning_rate": 3.9309864473485945e-05, "loss": 0.0936, "step": 60100 }, { "epoch": 2.184388400319791, "grad_norm": 1.5026519298553467, "learning_rate": 3.930560903658028e-05, "loss": 0.1088, "step": 60110 }, { "epoch": 2.184751798822589, "grad_norm": 0.8840125799179077, "learning_rate": 3.9301352983296816e-05, "loss": 0.7203, "step": 60120 }, { "epoch": 2.185115197325387, "grad_norm": 1.5866588354110718, "learning_rate": 3.929709631381895e-05, "loss": 0.1023, "step": 60130 }, { "epoch": 2.1854785958281853, "grad_norm": 1.0091042518615723, "learning_rate": 3.9292839028330065e-05, "loss": 0.1165, "step": 60140 }, { "epoch": 2.1858419943309833, "grad_norm": 0.8317708969116211, "learning_rate": 3.9288581127013603e-05, "loss": 0.084, "step": 60150 }, { "epoch": 2.1862053928337817, "grad_norm": 0.5231217741966248, "learning_rate": 3.9284322610053016e-05, "loss": 0.0832, "step": 60160 }, { "epoch": 2.1865687913365797, "grad_norm": 1.9025609493255615, "learning_rate": 3.928006347763179e-05, "loss": 0.1349, "step": 60170 }, { "epoch": 2.1869321898393776, "grad_norm": 1.5179822444915771, "learning_rate": 3.927580372993344e-05, "loss": 0.1029, "step": 60180 }, { "epoch": 2.187295588342176, "grad_norm": 1.7581968307495117, "learning_rate": 3.9271543367141494e-05, "loss": 0.1232, "step": 60190 }, { "epoch": 2.187658986844974, "grad_norm": 1.4503281116485596, "learning_rate": 3.926728238943953e-05, "loss": 0.0832, "step": 60200 }, { "epoch": 2.1880223853477725, "grad_norm": 1.222233533859253, "learning_rate": 3.926302079701113e-05, "loss": 0.0918, "step": 60210 }, { "epoch": 2.1883857838505705, "grad_norm": 2.6328423023223877, "learning_rate": 3.9258758590039915e-05, "loss": 0.1229, "step": 60220 }, { "epoch": 2.188749182353369, "grad_norm": 1.2800387144088745, "learning_rate": 3.925449576870952e-05, "loss": 0.1132, "step": 60230 }, { "epoch": 2.189112580856167, "grad_norm": 1.5218274593353271, "learning_rate": 3.925023233320362e-05, "loss": 0.1508, "step": 60240 }, { "epoch": 2.189475979358965, "grad_norm": 0.6339848041534424, "learning_rate": 3.9245968283705916e-05, "loss": 0.0934, "step": 60250 }, { "epoch": 2.1898393778617633, "grad_norm": 0.6518699526786804, "learning_rate": 3.924170362040012e-05, "loss": 0.0979, "step": 60260 }, { "epoch": 2.1902027763645613, "grad_norm": 0.6267105340957642, "learning_rate": 3.923743834346999e-05, "loss": 0.3877, "step": 60270 }, { "epoch": 2.1905661748673597, "grad_norm": 0.5715605616569519, "learning_rate": 3.92331724530993e-05, "loss": 0.1171, "step": 60280 }, { "epoch": 2.1909295733701577, "grad_norm": 1.068161129951477, "learning_rate": 3.922890594947185e-05, "loss": 0.1452, "step": 60290 }, { "epoch": 2.1912929718729557, "grad_norm": 0.9280456304550171, "learning_rate": 3.9224638832771475e-05, "loss": 0.0951, "step": 60300 }, { "epoch": 2.191656370375754, "grad_norm": 1.1696865558624268, "learning_rate": 3.922037110318201e-05, "loss": 0.1019, "step": 60310 }, { "epoch": 2.192019768878552, "grad_norm": 0.8494959473609924, "learning_rate": 3.921610276088736e-05, "loss": 0.1189, "step": 60320 }, { "epoch": 2.1923831673813505, "grad_norm": 3.686048746109009, "learning_rate": 3.921183380607142e-05, "loss": 0.1161, "step": 60330 }, { "epoch": 2.1927465658841485, "grad_norm": 1.5831258296966553, "learning_rate": 3.920756423891814e-05, "loss": 0.1309, "step": 60340 }, { "epoch": 2.193109964386947, "grad_norm": 1.9985876083374023, "learning_rate": 3.920329405961145e-05, "loss": 0.1393, "step": 60350 }, { "epoch": 2.193473362889745, "grad_norm": 4.160605430603027, "learning_rate": 3.919902326833536e-05, "loss": 0.1535, "step": 60360 }, { "epoch": 2.193836761392543, "grad_norm": 0.43690192699432373, "learning_rate": 3.919475186527388e-05, "loss": 0.1186, "step": 60370 }, { "epoch": 2.1942001598953413, "grad_norm": 0.8073493242263794, "learning_rate": 3.9190479850611044e-05, "loss": 0.1047, "step": 60380 }, { "epoch": 2.1945635583981393, "grad_norm": 9.085131645202637, "learning_rate": 3.9186207224530925e-05, "loss": 0.1332, "step": 60390 }, { "epoch": 2.1949269569009378, "grad_norm": 1.6787877082824707, "learning_rate": 3.9181933987217614e-05, "loss": 0.0998, "step": 60400 }, { "epoch": 2.1952903554037357, "grad_norm": 0.6496911644935608, "learning_rate": 3.917766013885522e-05, "loss": 0.103, "step": 60410 }, { "epoch": 2.1956537539065337, "grad_norm": 0.9650323987007141, "learning_rate": 3.9173385679627896e-05, "loss": 0.1055, "step": 60420 }, { "epoch": 2.196017152409332, "grad_norm": 2.345998525619507, "learning_rate": 3.916911060971981e-05, "loss": 0.1, "step": 60430 }, { "epoch": 2.19638055091213, "grad_norm": 0.6440123915672302, "learning_rate": 3.9164834929315165e-05, "loss": 0.1562, "step": 60440 }, { "epoch": 2.1967439494149286, "grad_norm": 6.226611614227295, "learning_rate": 3.916055863859818e-05, "loss": 0.0971, "step": 60450 }, { "epoch": 2.1971073479177265, "grad_norm": 6.518206596374512, "learning_rate": 3.915628173775311e-05, "loss": 0.0858, "step": 60460 }, { "epoch": 2.1974707464205245, "grad_norm": 0.48097607493400574, "learning_rate": 3.915200422696423e-05, "loss": 0.1248, "step": 60470 }, { "epoch": 2.197834144923323, "grad_norm": 1.0158125162124634, "learning_rate": 3.914772610641584e-05, "loss": 0.0952, "step": 60480 }, { "epoch": 2.198197543426121, "grad_norm": 0.9592711925506592, "learning_rate": 3.914344737629226e-05, "loss": 0.1202, "step": 60490 }, { "epoch": 2.1985609419289194, "grad_norm": 0.8496592044830322, "learning_rate": 3.9139168036777864e-05, "loss": 0.1377, "step": 60500 }, { "epoch": 2.1989243404317174, "grad_norm": 0.9268959760665894, "learning_rate": 3.913488808805702e-05, "loss": 0.1058, "step": 60510 }, { "epoch": 2.199287738934516, "grad_norm": 1.091874122619629, "learning_rate": 3.913060753031414e-05, "loss": 0.1232, "step": 60520 }, { "epoch": 2.1996511374373138, "grad_norm": 45.63993835449219, "learning_rate": 3.912632636373367e-05, "loss": 0.4101, "step": 60530 }, { "epoch": 2.2000145359401118, "grad_norm": 1.5052204132080078, "learning_rate": 3.912204458850005e-05, "loss": 0.1542, "step": 60540 }, { "epoch": 2.20037793444291, "grad_norm": 0.9882798790931702, "learning_rate": 3.911776220479777e-05, "loss": 0.1096, "step": 60550 }, { "epoch": 2.200741332945708, "grad_norm": 2.0385029315948486, "learning_rate": 3.9113479212811356e-05, "loss": 0.0945, "step": 60560 }, { "epoch": 2.2011047314485066, "grad_norm": 0.5360209345817566, "learning_rate": 3.910919561272533e-05, "loss": 0.1064, "step": 60570 }, { "epoch": 2.2014681299513046, "grad_norm": 2.028599739074707, "learning_rate": 3.910491140472428e-05, "loss": 0.1076, "step": 60580 }, { "epoch": 2.2018315284541026, "grad_norm": 2.3928070068359375, "learning_rate": 3.910062658899277e-05, "loss": 0.1278, "step": 60590 }, { "epoch": 2.202194926956901, "grad_norm": 0.851287305355072, "learning_rate": 3.9096341165715436e-05, "loss": 0.0905, "step": 60600 }, { "epoch": 2.202194926956901, "eval_loss": 0.3481411039829254, "eval_runtime": 180.2524, "eval_samples_per_second": 41.131, "eval_steps_per_second": 5.143, "eval_wer": 0.16283333635885056, "step": 60600 }, { "epoch": 2.202558325459699, "grad_norm": 2.9646081924438477, "learning_rate": 3.9092055135076915e-05, "loss": 0.1062, "step": 60610 }, { "epoch": 2.2029217239624974, "grad_norm": 0.6181505918502808, "learning_rate": 3.908776849726188e-05, "loss": 0.102, "step": 60620 }, { "epoch": 2.2032851224652954, "grad_norm": 0.49643078446388245, "learning_rate": 3.908348125245502e-05, "loss": 0.1266, "step": 60630 }, { "epoch": 2.203648520968094, "grad_norm": 4.227423667907715, "learning_rate": 3.907919340084106e-05, "loss": 0.1613, "step": 60640 }, { "epoch": 2.204011919470892, "grad_norm": 0.5859548449516296, "learning_rate": 3.9074904942604764e-05, "loss": 0.0863, "step": 60650 }, { "epoch": 2.20437531797369, "grad_norm": 0.9373226761817932, "learning_rate": 3.9070615877930886e-05, "loss": 0.1071, "step": 60660 }, { "epoch": 2.2047387164764882, "grad_norm": 0.8272415399551392, "learning_rate": 3.906632620700422e-05, "loss": 0.1139, "step": 60670 }, { "epoch": 2.205102114979286, "grad_norm": 1.1634105443954468, "learning_rate": 3.9062035930009625e-05, "loss": 0.0981, "step": 60680 }, { "epoch": 2.2054655134820846, "grad_norm": 1.0491262674331665, "learning_rate": 3.905774504713192e-05, "loss": 0.1312, "step": 60690 }, { "epoch": 2.2058289119848826, "grad_norm": 0.6341159343719482, "learning_rate": 3.905345355855601e-05, "loss": 0.0847, "step": 60700 }, { "epoch": 2.2061923104876806, "grad_norm": 0.8382464647293091, "learning_rate": 3.904916146446678e-05, "loss": 0.0945, "step": 60710 }, { "epoch": 2.206555708990479, "grad_norm": 0.5253706574440002, "learning_rate": 3.904486876504917e-05, "loss": 0.1328, "step": 60720 }, { "epoch": 2.206919107493277, "grad_norm": 3.4987101554870605, "learning_rate": 3.904057546048815e-05, "loss": 0.0862, "step": 60730 }, { "epoch": 2.2072825059960755, "grad_norm": 1.807373285293579, "learning_rate": 3.903628155096867e-05, "loss": 0.1005, "step": 60740 }, { "epoch": 2.2076459044988734, "grad_norm": 1.1272157430648804, "learning_rate": 3.9031987036675774e-05, "loss": 0.1044, "step": 60750 }, { "epoch": 2.2080093030016714, "grad_norm": 0.47526538372039795, "learning_rate": 3.902769191779448e-05, "loss": 0.0947, "step": 60760 }, { "epoch": 2.20837270150447, "grad_norm": 0.8546761274337769, "learning_rate": 3.9023396194509846e-05, "loss": 0.1129, "step": 60770 }, { "epoch": 2.208736100007268, "grad_norm": 0.557783305644989, "learning_rate": 3.901909986700697e-05, "loss": 0.1198, "step": 60780 }, { "epoch": 2.2090994985100663, "grad_norm": 0.5007415413856506, "learning_rate": 3.901480293547096e-05, "loss": 0.1154, "step": 60790 }, { "epoch": 2.2094628970128642, "grad_norm": 1.8647228479385376, "learning_rate": 3.901050540008696e-05, "loss": 0.1997, "step": 60800 }, { "epoch": 2.2098262955156627, "grad_norm": 0.7277741432189941, "learning_rate": 3.900620726104012e-05, "loss": 0.1032, "step": 60810 }, { "epoch": 2.2101896940184607, "grad_norm": 0.4809872806072235, "learning_rate": 3.9001908518515656e-05, "loss": 0.1162, "step": 60820 }, { "epoch": 2.2105530925212586, "grad_norm": 0.7930201888084412, "learning_rate": 3.899760917269877e-05, "loss": 0.1207, "step": 60830 }, { "epoch": 2.210916491024057, "grad_norm": 1.0866421461105347, "learning_rate": 3.89933092237747e-05, "loss": 0.1334, "step": 60840 }, { "epoch": 2.211279889526855, "grad_norm": 2.5568645000457764, "learning_rate": 3.898900867192874e-05, "loss": 0.0934, "step": 60850 }, { "epoch": 2.2116432880296535, "grad_norm": 1.1865488290786743, "learning_rate": 3.8984707517346154e-05, "loss": 0.1072, "step": 60860 }, { "epoch": 2.2120066865324515, "grad_norm": 1.0457924604415894, "learning_rate": 3.8980405760212284e-05, "loss": 0.1108, "step": 60870 }, { "epoch": 2.2123700850352495, "grad_norm": 1.0669806003570557, "learning_rate": 3.897610340071247e-05, "loss": 0.1128, "step": 60880 }, { "epoch": 2.212733483538048, "grad_norm": 4.467153072357178, "learning_rate": 3.897180043903209e-05, "loss": 0.1347, "step": 60890 }, { "epoch": 2.213096882040846, "grad_norm": 0.48086392879486084, "learning_rate": 3.896749687535655e-05, "loss": 0.1018, "step": 60900 }, { "epoch": 2.2134602805436443, "grad_norm": 0.6917502284049988, "learning_rate": 3.8963192709871253e-05, "loss": 0.0779, "step": 60910 }, { "epoch": 2.2138236790464423, "grad_norm": 0.9939578771591187, "learning_rate": 3.8958887942761665e-05, "loss": 0.1278, "step": 60920 }, { "epoch": 2.2141870775492407, "grad_norm": 0.8723199963569641, "learning_rate": 3.895458257421327e-05, "loss": 0.1034, "step": 60930 }, { "epoch": 2.2145504760520387, "grad_norm": 2.1347460746765137, "learning_rate": 3.8950276604411554e-05, "loss": 0.1086, "step": 60940 }, { "epoch": 2.2149138745548367, "grad_norm": 0.6032381653785706, "learning_rate": 3.894597003354206e-05, "loss": 0.1141, "step": 60950 }, { "epoch": 2.215277273057635, "grad_norm": 0.540093719959259, "learning_rate": 3.894166286179033e-05, "loss": 0.0892, "step": 60960 }, { "epoch": 2.215640671560433, "grad_norm": 0.6019798517227173, "learning_rate": 3.893735508934197e-05, "loss": 0.0911, "step": 60970 }, { "epoch": 2.2160040700632315, "grad_norm": 1.290984869003296, "learning_rate": 3.893304671638254e-05, "loss": 0.1283, "step": 60980 }, { "epoch": 2.2163674685660295, "grad_norm": 0.5830800533294678, "learning_rate": 3.892873774309772e-05, "loss": 0.1094, "step": 60990 }, { "epoch": 2.2167308670688275, "grad_norm": 1.1006908416748047, "learning_rate": 3.892442816967315e-05, "loss": 0.1157, "step": 61000 }, { "epoch": 2.217094265571626, "grad_norm": 0.42782625555992126, "learning_rate": 3.8920117996294505e-05, "loss": 0.0852, "step": 61010 }, { "epoch": 2.217457664074424, "grad_norm": 1.036010503768921, "learning_rate": 3.8915807223147506e-05, "loss": 0.1175, "step": 61020 }, { "epoch": 2.2178210625772223, "grad_norm": 1.0316133499145508, "learning_rate": 3.891149585041789e-05, "loss": 0.1007, "step": 61030 }, { "epoch": 2.2181844610800203, "grad_norm": 1.3433195352554321, "learning_rate": 3.890718387829141e-05, "loss": 2.4829, "step": 61040 }, { "epoch": 2.2185478595828183, "grad_norm": 1.0637513399124146, "learning_rate": 3.890287130695386e-05, "loss": 0.1012, "step": 61050 }, { "epoch": 2.2189112580856167, "grad_norm": 0.9853934645652771, "learning_rate": 3.8898558136591055e-05, "loss": 0.0983, "step": 61060 }, { "epoch": 2.2192746565884147, "grad_norm": 0.6070169806480408, "learning_rate": 3.889424436738882e-05, "loss": 0.0933, "step": 61070 }, { "epoch": 2.219638055091213, "grad_norm": 0.9032323360443115, "learning_rate": 3.8889929999533045e-05, "loss": 0.1039, "step": 61080 }, { "epoch": 2.220001453594011, "grad_norm": 1.1702359914779663, "learning_rate": 3.888561503320961e-05, "loss": 0.1674, "step": 61090 }, { "epoch": 2.2203648520968096, "grad_norm": 1.5377318859100342, "learning_rate": 3.888129946860442e-05, "loss": 0.0977, "step": 61100 }, { "epoch": 2.2207282505996075, "grad_norm": 0.8765788078308105, "learning_rate": 3.887698330590342e-05, "loss": 0.1133, "step": 61110 }, { "epoch": 2.2210916491024055, "grad_norm": 1.543609857559204, "learning_rate": 3.887266654529259e-05, "loss": 0.1335, "step": 61120 }, { "epoch": 2.221455047605204, "grad_norm": 2.144033908843994, "learning_rate": 3.886834918695792e-05, "loss": 0.1097, "step": 61130 }, { "epoch": 2.221818446108002, "grad_norm": 0.9922833442687988, "learning_rate": 3.886403123108542e-05, "loss": 0.1245, "step": 61140 }, { "epoch": 2.2221818446108004, "grad_norm": 0.7214832305908203, "learning_rate": 3.885971267786115e-05, "loss": 0.3578, "step": 61150 }, { "epoch": 2.2225452431135984, "grad_norm": 0.3823475241661072, "learning_rate": 3.8855393527471175e-05, "loss": 0.1396, "step": 61160 }, { "epoch": 2.2229086416163963, "grad_norm": 0.4039243459701538, "learning_rate": 3.885107378010158e-05, "loss": 0.0998, "step": 61170 }, { "epoch": 2.2232720401191948, "grad_norm": 0.6202207207679749, "learning_rate": 3.884675343593851e-05, "loss": 0.1278, "step": 61180 }, { "epoch": 2.2236354386219928, "grad_norm": 1.5638877153396606, "learning_rate": 3.884243249516809e-05, "loss": 0.1162, "step": 61190 }, { "epoch": 2.223998837124791, "grad_norm": 2.9136829376220703, "learning_rate": 3.8838110957976514e-05, "loss": 0.1007, "step": 61200 }, { "epoch": 2.223998837124791, "eval_loss": 0.3254208564758301, "eval_runtime": 180.3346, "eval_samples_per_second": 41.112, "eval_steps_per_second": 5.14, "eval_wer": 0.15656143917802748, "step": 61200 }, { "epoch": 2.224362235627589, "grad_norm": 0.40365439653396606, "learning_rate": 3.883378882454998e-05, "loss": 0.1016, "step": 61210 }, { "epoch": 2.2247256341303876, "grad_norm": 0.48598694801330566, "learning_rate": 3.882946609507468e-05, "loss": 0.1089, "step": 61220 }, { "epoch": 2.2250890326331856, "grad_norm": 1.7332137823104858, "learning_rate": 3.882514276973692e-05, "loss": 0.1101, "step": 61230 }, { "epoch": 2.2254524311359836, "grad_norm": 2.3783786296844482, "learning_rate": 3.882081884872293e-05, "loss": 0.0936, "step": 61240 }, { "epoch": 2.225815829638782, "grad_norm": 0.684394896030426, "learning_rate": 3.881649433221904e-05, "loss": 0.0868, "step": 61250 }, { "epoch": 2.22617922814158, "grad_norm": 0.43269750475883484, "learning_rate": 3.881216922041156e-05, "loss": 0.1026, "step": 61260 }, { "epoch": 2.2265426266443784, "grad_norm": 0.9126709699630737, "learning_rate": 3.8807843513486866e-05, "loss": 0.1436, "step": 61270 }, { "epoch": 2.2269060251471764, "grad_norm": 1.7345128059387207, "learning_rate": 3.880351721163131e-05, "loss": 0.0992, "step": 61280 }, { "epoch": 2.2272694236499744, "grad_norm": 1.4722065925598145, "learning_rate": 3.879919031503131e-05, "loss": 0.1637, "step": 61290 }, { "epoch": 2.227632822152773, "grad_norm": 0.6145905256271362, "learning_rate": 3.879486282387331e-05, "loss": 0.0881, "step": 61300 }, { "epoch": 2.227996220655571, "grad_norm": 0.5936566591262817, "learning_rate": 3.879053473834374e-05, "loss": 0.0947, "step": 61310 }, { "epoch": 2.228359619158369, "grad_norm": 2.5217325687408447, "learning_rate": 3.87862060586291e-05, "loss": 0.1251, "step": 61320 }, { "epoch": 2.228723017661167, "grad_norm": 2.556070327758789, "learning_rate": 3.878187678491589e-05, "loss": 0.129, "step": 61330 }, { "epoch": 2.229086416163965, "grad_norm": 1.7533297538757324, "learning_rate": 3.877754691739065e-05, "loss": 0.1331, "step": 61340 }, { "epoch": 2.2294498146667636, "grad_norm": 0.6436717510223389, "learning_rate": 3.877321645623994e-05, "loss": 0.0836, "step": 61350 }, { "epoch": 2.2298132131695616, "grad_norm": 0.5834245085716248, "learning_rate": 3.8768885401650325e-05, "loss": 0.0953, "step": 61360 }, { "epoch": 2.23017661167236, "grad_norm": 2.3103013038635254, "learning_rate": 3.8764553753808436e-05, "loss": 0.1138, "step": 61370 }, { "epoch": 2.230540010175158, "grad_norm": 1.5668505430221558, "learning_rate": 3.87602215129009e-05, "loss": 0.1569, "step": 61380 }, { "epoch": 2.2309034086779564, "grad_norm": 0.719791054725647, "learning_rate": 3.875588867911437e-05, "loss": 0.108, "step": 61390 }, { "epoch": 2.2312668071807544, "grad_norm": 0.729350745677948, "learning_rate": 3.875155525263555e-05, "loss": 0.0832, "step": 61400 }, { "epoch": 2.2316302056835524, "grad_norm": 1.3647226095199585, "learning_rate": 3.874722123365113e-05, "loss": 0.0913, "step": 61410 }, { "epoch": 2.231993604186351, "grad_norm": 0.6896275877952576, "learning_rate": 3.8742886622347876e-05, "loss": 0.1133, "step": 61420 }, { "epoch": 2.232357002689149, "grad_norm": 0.8130580186843872, "learning_rate": 3.8738551418912526e-05, "loss": 0.0909, "step": 61430 }, { "epoch": 2.2327204011919473, "grad_norm": 1.155916690826416, "learning_rate": 3.873421562353188e-05, "loss": 0.114, "step": 61440 }, { "epoch": 2.2330837996947452, "grad_norm": 1.4737950563430786, "learning_rate": 3.872987923639274e-05, "loss": 0.4289, "step": 61450 }, { "epoch": 2.2334471981975432, "grad_norm": 0.41144660115242004, "learning_rate": 3.8725542257681966e-05, "loss": 0.0862, "step": 61460 }, { "epoch": 2.2338105967003417, "grad_norm": 0.5804570913314819, "learning_rate": 3.872120468758641e-05, "loss": 0.1067, "step": 61470 }, { "epoch": 2.2341739952031396, "grad_norm": 0.8408393263816833, "learning_rate": 3.871686652629296e-05, "loss": 0.1097, "step": 61480 }, { "epoch": 2.234537393705938, "grad_norm": 1.0146747827529907, "learning_rate": 3.871252777398854e-05, "loss": 0.1391, "step": 61490 }, { "epoch": 2.234900792208736, "grad_norm": 0.8638483881950378, "learning_rate": 3.8708188430860084e-05, "loss": 0.5518, "step": 61500 }, { "epoch": 2.2352641907115345, "grad_norm": 0.9493032693862915, "learning_rate": 3.8703848497094565e-05, "loss": 0.3308, "step": 61510 }, { "epoch": 2.2356275892143325, "grad_norm": 2.7466158866882324, "learning_rate": 3.8699507972878974e-05, "loss": 0.1196, "step": 61520 }, { "epoch": 2.2359909877171305, "grad_norm": 0.737774133682251, "learning_rate": 3.869516685840032e-05, "loss": 0.0988, "step": 61530 }, { "epoch": 2.236354386219929, "grad_norm": 0.4675132632255554, "learning_rate": 3.8690825153845667e-05, "loss": 0.1314, "step": 61540 }, { "epoch": 2.236717784722727, "grad_norm": 0.9533403515815735, "learning_rate": 3.8686482859402055e-05, "loss": 0.1024, "step": 61550 }, { "epoch": 2.2370811832255253, "grad_norm": 0.7988652586936951, "learning_rate": 3.8682139975256605e-05, "loss": 0.1002, "step": 61560 }, { "epoch": 2.2374445817283233, "grad_norm": 0.45931610465049744, "learning_rate": 3.867779650159642e-05, "loss": 0.1012, "step": 61570 }, { "epoch": 2.2378079802311213, "grad_norm": 2.8576176166534424, "learning_rate": 3.8673452438608646e-05, "loss": 0.2177, "step": 61580 }, { "epoch": 2.2381713787339197, "grad_norm": 1.2942947149276733, "learning_rate": 3.8669107786480464e-05, "loss": 0.1286, "step": 61590 }, { "epoch": 2.2385347772367177, "grad_norm": 0.4589090049266815, "learning_rate": 3.866476254539906e-05, "loss": 0.0818, "step": 61600 }, { "epoch": 2.238898175739516, "grad_norm": 0.5710172057151794, "learning_rate": 3.866041671555166e-05, "loss": 0.2093, "step": 61610 }, { "epoch": 2.239261574242314, "grad_norm": 0.6458502411842346, "learning_rate": 3.86560702971255e-05, "loss": 0.1259, "step": 61620 }, { "epoch": 2.239624972745112, "grad_norm": 1.265261173248291, "learning_rate": 3.865172329030786e-05, "loss": 0.1009, "step": 61630 }, { "epoch": 2.2399883712479105, "grad_norm": 0.49177274107933044, "learning_rate": 3.8647375695286036e-05, "loss": 0.1111, "step": 61640 }, { "epoch": 2.2403517697507085, "grad_norm": 1.8626538515090942, "learning_rate": 3.864302751224736e-05, "loss": 0.1194, "step": 61650 }, { "epoch": 2.240715168253507, "grad_norm": 0.9763522148132324, "learning_rate": 3.8638678741379166e-05, "loss": 0.0953, "step": 61660 }, { "epoch": 2.241078566756305, "grad_norm": 2.4940896034240723, "learning_rate": 3.863432938286883e-05, "loss": 0.1028, "step": 61670 }, { "epoch": 2.2414419652591033, "grad_norm": 1.7410259246826172, "learning_rate": 3.862997943690375e-05, "loss": 0.1435, "step": 61680 }, { "epoch": 2.2418053637619013, "grad_norm": 2.1346585750579834, "learning_rate": 3.862562890367135e-05, "loss": 0.1286, "step": 61690 }, { "epoch": 2.2421687622646993, "grad_norm": 0.6110004782676697, "learning_rate": 3.862127778335909e-05, "loss": 0.1131, "step": 61700 }, { "epoch": 2.2425321607674977, "grad_norm": 0.7446867227554321, "learning_rate": 3.8616926076154426e-05, "loss": 0.0888, "step": 61710 }, { "epoch": 2.2428955592702957, "grad_norm": 0.820365846157074, "learning_rate": 3.861257378224488e-05, "loss": 0.104, "step": 61720 }, { "epoch": 2.243258957773094, "grad_norm": 0.5953546166419983, "learning_rate": 3.860822090181795e-05, "loss": 0.0993, "step": 61730 }, { "epoch": 2.243622356275892, "grad_norm": 1.5128546953201294, "learning_rate": 3.86038674350612e-05, "loss": 0.2106, "step": 61740 }, { "epoch": 2.24398575477869, "grad_norm": 0.7002906799316406, "learning_rate": 3.859951338216221e-05, "loss": 0.0863, "step": 61750 }, { "epoch": 2.2443491532814885, "grad_norm": 0.7450056076049805, "learning_rate": 3.859515874330857e-05, "loss": 0.0991, "step": 61760 }, { "epoch": 2.2447125517842865, "grad_norm": 0.5604157447814941, "learning_rate": 3.859080351868792e-05, "loss": 0.1255, "step": 61770 }, { "epoch": 2.245075950287085, "grad_norm": 1.1846556663513184, "learning_rate": 3.85864477084879e-05, "loss": 0.1329, "step": 61780 }, { "epoch": 2.245439348789883, "grad_norm": 1.1680017709732056, "learning_rate": 3.8582091312896186e-05, "loss": 0.1409, "step": 61790 }, { "epoch": 2.2458027472926814, "grad_norm": 0.34896320104599, "learning_rate": 3.857773433210048e-05, "loss": 1.4069, "step": 61800 }, { "epoch": 2.2458027472926814, "eval_loss": 0.31015458703041077, "eval_runtime": 179.4469, "eval_samples_per_second": 41.316, "eval_steps_per_second": 5.166, "eval_wer": 0.15887595984533556, "step": 61800 }, { "epoch": 2.2461661457954794, "grad_norm": 8.44802188873291, "learning_rate": 3.8573376766288515e-05, "loss": 0.0953, "step": 61810 }, { "epoch": 2.2465295442982773, "grad_norm": 0.44796204566955566, "learning_rate": 3.8569018615648034e-05, "loss": 0.1058, "step": 61820 }, { "epoch": 2.2468929428010758, "grad_norm": 0.7886875867843628, "learning_rate": 3.8564659880366826e-05, "loss": 0.163, "step": 61830 }, { "epoch": 2.2472563413038738, "grad_norm": 0.5576759576797485, "learning_rate": 3.856030056063269e-05, "loss": 0.1326, "step": 61840 }, { "epoch": 2.247619739806672, "grad_norm": 0.8255923986434937, "learning_rate": 3.855594065663345e-05, "loss": 0.1045, "step": 61850 }, { "epoch": 2.24798313830947, "grad_norm": 1.2470930814743042, "learning_rate": 3.855158016855695e-05, "loss": 0.0788, "step": 61860 }, { "epoch": 2.248346536812268, "grad_norm": 0.9577877521514893, "learning_rate": 3.854721909659108e-05, "loss": 0.1024, "step": 61870 }, { "epoch": 2.2487099353150666, "grad_norm": 1.3195165395736694, "learning_rate": 3.854285744092375e-05, "loss": 0.0975, "step": 61880 }, { "epoch": 2.2490733338178646, "grad_norm": 0.8952762484550476, "learning_rate": 3.853849520174286e-05, "loss": 0.1479, "step": 61890 }, { "epoch": 2.249436732320663, "grad_norm": 0.9849411249160767, "learning_rate": 3.85341323792364e-05, "loss": 0.0954, "step": 61900 }, { "epoch": 2.249800130823461, "grad_norm": 1.1869410276412964, "learning_rate": 3.8529768973592325e-05, "loss": 0.104, "step": 61910 }, { "epoch": 2.250163529326259, "grad_norm": 0.7452064752578735, "learning_rate": 3.852540498499864e-05, "loss": 0.1, "step": 61920 }, { "epoch": 2.2505269278290574, "grad_norm": 0.7757828831672668, "learning_rate": 3.8521040413643385e-05, "loss": 0.1397, "step": 61930 }, { "epoch": 2.2508903263318554, "grad_norm": 1.0734906196594238, "learning_rate": 3.8516675259714594e-05, "loss": 0.1162, "step": 61940 }, { "epoch": 2.251253724834654, "grad_norm": 1.4619065523147583, "learning_rate": 3.851230952340037e-05, "loss": 0.6661, "step": 61950 }, { "epoch": 2.251617123337452, "grad_norm": 1.221156120300293, "learning_rate": 3.850794320488881e-05, "loss": 0.1048, "step": 61960 }, { "epoch": 2.2519805218402498, "grad_norm": 1.1556357145309448, "learning_rate": 3.8503576304368025e-05, "loss": 0.1435, "step": 61970 }, { "epoch": 2.252343920343048, "grad_norm": 0.5849198698997498, "learning_rate": 3.849920882202619e-05, "loss": 0.1031, "step": 61980 }, { "epoch": 2.252707318845846, "grad_norm": 0.5589366555213928, "learning_rate": 3.849484075805148e-05, "loss": 0.1123, "step": 61990 }, { "epoch": 2.2530707173486446, "grad_norm": 1.709695816040039, "learning_rate": 3.849047211263209e-05, "loss": 0.1071, "step": 62000 }, { "epoch": 2.2534341158514426, "grad_norm": 5.30033016204834, "learning_rate": 3.848610288595626e-05, "loss": 0.1276, "step": 62010 }, { "epoch": 2.253797514354241, "grad_norm": 1.242638349533081, "learning_rate": 3.848173307821224e-05, "loss": 0.1183, "step": 62020 }, { "epoch": 2.254160912857039, "grad_norm": 0.650566816329956, "learning_rate": 3.84773626895883e-05, "loss": 0.1074, "step": 62030 }, { "epoch": 2.2545243113598374, "grad_norm": 0.8243488669395447, "learning_rate": 3.847299172027277e-05, "loss": 0.2269, "step": 62040 }, { "epoch": 2.2548877098626354, "grad_norm": 0.5993553996086121, "learning_rate": 3.846862017045396e-05, "loss": 0.1093, "step": 62050 }, { "epoch": 2.2552511083654334, "grad_norm": 1.5640254020690918, "learning_rate": 3.846424804032023e-05, "loss": 0.1403, "step": 62060 }, { "epoch": 2.255614506868232, "grad_norm": 2.9386844635009766, "learning_rate": 3.8459875330059946e-05, "loss": 0.1261, "step": 62070 }, { "epoch": 2.25597790537103, "grad_norm": 0.45292994379997253, "learning_rate": 3.845550203986154e-05, "loss": 0.0919, "step": 62080 }, { "epoch": 2.2563413038738283, "grad_norm": 1.122269868850708, "learning_rate": 3.845112816991341e-05, "loss": 0.1646, "step": 62090 }, { "epoch": 2.2567047023766262, "grad_norm": 0.33831652998924255, "learning_rate": 3.844675372040403e-05, "loss": 0.0923, "step": 62100 }, { "epoch": 2.257068100879424, "grad_norm": 0.6775882244110107, "learning_rate": 3.844237869152188e-05, "loss": 0.092, "step": 62110 }, { "epoch": 2.2574314993822227, "grad_norm": 1.5221953392028809, "learning_rate": 3.843800308345547e-05, "loss": 0.1027, "step": 62120 }, { "epoch": 2.2577948978850206, "grad_norm": 1.1137598752975464, "learning_rate": 3.8433626896393306e-05, "loss": 0.1145, "step": 62130 }, { "epoch": 2.258158296387819, "grad_norm": 1.5517561435699463, "learning_rate": 3.842925013052395e-05, "loss": 0.0914, "step": 62140 }, { "epoch": 2.258521694890617, "grad_norm": 0.9319009184837341, "learning_rate": 3.8424872786036006e-05, "loss": 0.0987, "step": 62150 }, { "epoch": 2.258885093393415, "grad_norm": 1.056016206741333, "learning_rate": 3.842049486311805e-05, "loss": 0.0768, "step": 62160 }, { "epoch": 2.2592484918962135, "grad_norm": 0.3143411874771118, "learning_rate": 3.8416116361958724e-05, "loss": 0.124, "step": 62170 }, { "epoch": 2.2596118903990114, "grad_norm": 0.5706644058227539, "learning_rate": 3.841173728274668e-05, "loss": 0.0781, "step": 62180 }, { "epoch": 2.25997528890181, "grad_norm": 0.7634672522544861, "learning_rate": 3.840735762567058e-05, "loss": 0.1179, "step": 62190 }, { "epoch": 2.260338687404608, "grad_norm": 1.0519330501556396, "learning_rate": 3.840297739091916e-05, "loss": 0.0966, "step": 62200 }, { "epoch": 2.260702085907406, "grad_norm": 0.7548292875289917, "learning_rate": 3.839859657868112e-05, "loss": 0.1004, "step": 62210 }, { "epoch": 2.2610654844102043, "grad_norm": 0.5876504182815552, "learning_rate": 3.8394215189145236e-05, "loss": 0.1199, "step": 62220 }, { "epoch": 2.2614288829130023, "grad_norm": 0.7557339668273926, "learning_rate": 3.838983322250028e-05, "loss": 0.1043, "step": 62230 }, { "epoch": 2.2617922814158007, "grad_norm": 0.9950221180915833, "learning_rate": 3.838545067893504e-05, "loss": 0.103, "step": 62240 }, { "epoch": 2.2621556799185987, "grad_norm": 1.2867968082427979, "learning_rate": 3.838106755863836e-05, "loss": 0.0987, "step": 62250 }, { "epoch": 2.2625190784213967, "grad_norm": 0.8998819589614868, "learning_rate": 3.837668386179909e-05, "loss": 0.1041, "step": 62260 }, { "epoch": 2.262882476924195, "grad_norm": 1.0797913074493408, "learning_rate": 3.837229958860611e-05, "loss": 0.1054, "step": 62270 }, { "epoch": 2.263245875426993, "grad_norm": 1.4692394733428955, "learning_rate": 3.836791473924831e-05, "loss": 0.1027, "step": 62280 }, { "epoch": 2.2636092739297915, "grad_norm": 1.2375293970108032, "learning_rate": 3.836352931391464e-05, "loss": 0.1983, "step": 62290 }, { "epoch": 2.2639726724325895, "grad_norm": 1.2827754020690918, "learning_rate": 3.8359143312794035e-05, "loss": 0.0914, "step": 62300 }, { "epoch": 2.264336070935388, "grad_norm": 0.5154075622558594, "learning_rate": 3.835475673607547e-05, "loss": 0.0992, "step": 62310 }, { "epoch": 2.264699469438186, "grad_norm": 0.3848717510700226, "learning_rate": 3.8350369583947956e-05, "loss": 0.115, "step": 62320 }, { "epoch": 2.2650628679409843, "grad_norm": 0.6954711675643921, "learning_rate": 3.834598185660052e-05, "loss": 0.1018, "step": 62330 }, { "epoch": 2.2654262664437823, "grad_norm": 1.0320098400115967, "learning_rate": 3.834159355422221e-05, "loss": 0.1365, "step": 62340 }, { "epoch": 2.2657896649465803, "grad_norm": 0.6527755856513977, "learning_rate": 3.83372046770021e-05, "loss": 0.0761, "step": 62350 }, { "epoch": 2.2661530634493787, "grad_norm": 1.2087364196777344, "learning_rate": 3.8332815225129303e-05, "loss": 0.0941, "step": 62360 }, { "epoch": 2.2665164619521767, "grad_norm": 1.7340302467346191, "learning_rate": 3.8328425198792926e-05, "loss": 0.1125, "step": 62370 }, { "epoch": 2.266879860454975, "grad_norm": 1.7903550863265991, "learning_rate": 3.8324034598182135e-05, "loss": 0.1045, "step": 62380 }, { "epoch": 2.267243258957773, "grad_norm": 0.498909592628479, "learning_rate": 3.8319643423486105e-05, "loss": 0.1317, "step": 62390 }, { "epoch": 2.267606657460571, "grad_norm": 1.1796486377716064, "learning_rate": 3.831525167489403e-05, "loss": 0.0968, "step": 62400 }, { "epoch": 2.267606657460571, "eval_loss": 0.3475956916809082, "eval_runtime": 179.5468, "eval_samples_per_second": 41.293, "eval_steps_per_second": 5.163, "eval_wer": 0.1566340515911195, "step": 62400 }, { "epoch": 2.2679700559633695, "grad_norm": 0.7915635704994202, "learning_rate": 3.831085935259513e-05, "loss": 0.0949, "step": 62410 }, { "epoch": 2.2683334544661675, "grad_norm": 0.5292233824729919, "learning_rate": 3.8306466456778655e-05, "loss": 0.1073, "step": 62420 }, { "epoch": 2.268696852968966, "grad_norm": 0.5092893242835999, "learning_rate": 3.8302072987633895e-05, "loss": 0.1053, "step": 62430 }, { "epoch": 2.269060251471764, "grad_norm": 0.5169047117233276, "learning_rate": 3.829767894535013e-05, "loss": 0.1986, "step": 62440 }, { "epoch": 2.269423649974562, "grad_norm": 0.4594692587852478, "learning_rate": 3.829328433011671e-05, "loss": 0.1058, "step": 62450 }, { "epoch": 2.2697870484773603, "grad_norm": 0.4674893021583557, "learning_rate": 3.8288889142122955e-05, "loss": 0.1126, "step": 62460 }, { "epoch": 2.2701504469801583, "grad_norm": 1.422492504119873, "learning_rate": 3.828449338155825e-05, "loss": 0.1232, "step": 62470 }, { "epoch": 2.2705138454829568, "grad_norm": 2.171562671661377, "learning_rate": 3.828009704861199e-05, "loss": 0.1213, "step": 62480 }, { "epoch": 2.2708772439857547, "grad_norm": 1.4158885478973389, "learning_rate": 3.8275700143473595e-05, "loss": 0.1294, "step": 62490 }, { "epoch": 2.2712406424885527, "grad_norm": 0.7011764049530029, "learning_rate": 3.827130266633253e-05, "loss": 0.0875, "step": 62500 }, { "epoch": 2.271604040991351, "grad_norm": 2.2935948371887207, "learning_rate": 3.8266904617378235e-05, "loss": 0.2261, "step": 62510 }, { "epoch": 2.271967439494149, "grad_norm": 0.653005063533783, "learning_rate": 3.826250599680023e-05, "loss": 0.1304, "step": 62520 }, { "epoch": 2.2723308379969476, "grad_norm": 0.6509010791778564, "learning_rate": 3.8258106804788035e-05, "loss": 0.0971, "step": 62530 }, { "epoch": 2.2726942364997456, "grad_norm": 1.473751425743103, "learning_rate": 3.8253707041531186e-05, "loss": 0.1029, "step": 62540 }, { "epoch": 2.2730576350025435, "grad_norm": 0.31367307901382446, "learning_rate": 3.824930670721926e-05, "loss": 0.0996, "step": 62550 }, { "epoch": 2.273421033505342, "grad_norm": 0.6324036121368408, "learning_rate": 3.824490580204185e-05, "loss": 0.0984, "step": 62560 }, { "epoch": 2.27378443200814, "grad_norm": 1.8539944887161255, "learning_rate": 3.824050432618858e-05, "loss": 0.157, "step": 62570 }, { "epoch": 2.2741478305109384, "grad_norm": 1.1299885511398315, "learning_rate": 3.823610227984907e-05, "loss": 0.1997, "step": 62580 }, { "epoch": 2.2745112290137364, "grad_norm": 1.0749928951263428, "learning_rate": 3.823169966321302e-05, "loss": 0.092, "step": 62590 }, { "epoch": 2.274874627516535, "grad_norm": 1.3616483211517334, "learning_rate": 3.822729647647011e-05, "loss": 0.1034, "step": 62600 }, { "epoch": 2.275238026019333, "grad_norm": 1.2886927127838135, "learning_rate": 3.8222892719810057e-05, "loss": 0.0943, "step": 62610 }, { "epoch": 2.275601424522131, "grad_norm": 0.5466746091842651, "learning_rate": 3.82184883934226e-05, "loss": 0.1075, "step": 62620 }, { "epoch": 2.275964823024929, "grad_norm": 0.6999200582504272, "learning_rate": 3.821408349749751e-05, "loss": 0.1001, "step": 62630 }, { "epoch": 2.276328221527727, "grad_norm": 0.6271117329597473, "learning_rate": 3.820967803222458e-05, "loss": 0.0887, "step": 62640 }, { "epoch": 2.2766916200305256, "grad_norm": 1.088416337966919, "learning_rate": 3.820527199779362e-05, "loss": 0.2975, "step": 62650 }, { "epoch": 2.2770550185333236, "grad_norm": 0.5583050847053528, "learning_rate": 3.820086539439448e-05, "loss": 0.0849, "step": 62660 }, { "epoch": 2.277418417036122, "grad_norm": 0.5963543057441711, "learning_rate": 3.819645822221701e-05, "loss": 0.1082, "step": 62670 }, { "epoch": 2.27778181553892, "grad_norm": 2.868208408355713, "learning_rate": 3.819205048145113e-05, "loss": 0.0928, "step": 62680 }, { "epoch": 2.278145214041718, "grad_norm": 0.9108635187149048, "learning_rate": 3.8187642172286706e-05, "loss": 0.1155, "step": 62690 }, { "epoch": 2.2785086125445164, "grad_norm": 0.9071031808853149, "learning_rate": 3.8183233294913725e-05, "loss": 0.0974, "step": 62700 }, { "epoch": 2.2788720110473144, "grad_norm": 0.5449077486991882, "learning_rate": 3.817882384952212e-05, "loss": 0.0807, "step": 62710 }, { "epoch": 2.279235409550113, "grad_norm": 0.7269715666770935, "learning_rate": 3.817441383630187e-05, "loss": 0.1273, "step": 62720 }, { "epoch": 2.279598808052911, "grad_norm": 1.493605375289917, "learning_rate": 3.817000325544302e-05, "loss": 0.1112, "step": 62730 }, { "epoch": 2.279962206555709, "grad_norm": 0.6935878992080688, "learning_rate": 3.816559210713558e-05, "loss": 0.1291, "step": 62740 }, { "epoch": 2.2803256050585072, "grad_norm": 1.932387113571167, "learning_rate": 3.8161180391569625e-05, "loss": 0.0937, "step": 62750 }, { "epoch": 2.280689003561305, "grad_norm": 1.2899200916290283, "learning_rate": 3.8156768108935226e-05, "loss": 0.0894, "step": 62760 }, { "epoch": 2.2810524020641036, "grad_norm": 1.262176752090454, "learning_rate": 3.815235525942251e-05, "loss": 0.4695, "step": 62770 }, { "epoch": 2.2814158005669016, "grad_norm": 0.48227742314338684, "learning_rate": 3.8147941843221604e-05, "loss": 0.1126, "step": 62780 }, { "epoch": 2.2817791990696996, "grad_norm": 1.0351576805114746, "learning_rate": 3.814352786052266e-05, "loss": 0.1762, "step": 62790 }, { "epoch": 2.282142597572498, "grad_norm": 1.1177520751953125, "learning_rate": 3.813911331151586e-05, "loss": 0.1046, "step": 62800 }, { "epoch": 2.282505996075296, "grad_norm": 7.0832295417785645, "learning_rate": 3.8134698196391427e-05, "loss": 0.1436, "step": 62810 }, { "epoch": 2.2828693945780945, "grad_norm": 0.9384248852729797, "learning_rate": 3.8130282515339576e-05, "loss": 0.1185, "step": 62820 }, { "epoch": 2.2832327930808924, "grad_norm": 2.5718233585357666, "learning_rate": 3.812586626855057e-05, "loss": 0.1172, "step": 62830 }, { "epoch": 2.2835961915836904, "grad_norm": 0.9541994333267212, "learning_rate": 3.812144945621469e-05, "loss": 0.1141, "step": 62840 }, { "epoch": 2.283959590086489, "grad_norm": 0.7058838605880737, "learning_rate": 3.811703207852224e-05, "loss": 0.0813, "step": 62850 }, { "epoch": 2.284322988589287, "grad_norm": 0.6324445605278015, "learning_rate": 3.811261413566354e-05, "loss": 0.1308, "step": 62860 }, { "epoch": 2.2846863870920853, "grad_norm": 0.5424672365188599, "learning_rate": 3.810819562782896e-05, "loss": 0.1055, "step": 62870 }, { "epoch": 2.2850497855948833, "grad_norm": 0.5509172677993774, "learning_rate": 3.810377655520887e-05, "loss": 0.1323, "step": 62880 }, { "epoch": 2.2854131840976817, "grad_norm": 1.111088752746582, "learning_rate": 3.8099356917993664e-05, "loss": 0.142, "step": 62890 }, { "epoch": 2.2857765826004797, "grad_norm": 0.44855383038520813, "learning_rate": 3.8094936716373784e-05, "loss": 0.0943, "step": 62900 }, { "epoch": 2.286139981103278, "grad_norm": 3.367194890975952, "learning_rate": 3.8090515950539674e-05, "loss": 2.8216, "step": 62910 }, { "epoch": 2.286503379606076, "grad_norm": 0.8625146746635437, "learning_rate": 3.80860946206818e-05, "loss": 0.1108, "step": 62920 }, { "epoch": 2.286866778108874, "grad_norm": 0.6024346351623535, "learning_rate": 3.808167272699067e-05, "loss": 0.2589, "step": 62930 }, { "epoch": 2.2872301766116725, "grad_norm": 0.3697529733181, "learning_rate": 3.8077250269656813e-05, "loss": 0.1722, "step": 62940 }, { "epoch": 2.2875935751144705, "grad_norm": 1.8003566265106201, "learning_rate": 3.807282724887077e-05, "loss": 0.1144, "step": 62950 }, { "epoch": 2.287956973617269, "grad_norm": 0.6778300404548645, "learning_rate": 3.806840366482311e-05, "loss": 0.0946, "step": 62960 }, { "epoch": 2.288320372120067, "grad_norm": 0.5251741409301758, "learning_rate": 3.806397951770444e-05, "loss": 0.1181, "step": 62970 }, { "epoch": 2.288683770622865, "grad_norm": 1.101876974105835, "learning_rate": 3.805955480770537e-05, "loss": 0.1195, "step": 62980 }, { "epoch": 2.2890471691256633, "grad_norm": 0.5283622741699219, "learning_rate": 3.805512953501655e-05, "loss": 0.1299, "step": 62990 }, { "epoch": 2.2894105676284613, "grad_norm": 0.3856213390827179, "learning_rate": 3.8050703699828636e-05, "loss": 0.0909, "step": 63000 }, { "epoch": 2.2894105676284613, "eval_loss": 0.32783856987953186, "eval_runtime": 179.5121, "eval_samples_per_second": 41.301, "eval_steps_per_second": 5.164, "eval_wer": 0.15972915569916676, "step": 63000 }, { "epoch": 2.2897739661312597, "grad_norm": 0.5770326256752014, "learning_rate": 3.8046277302332357e-05, "loss": 0.1017, "step": 63010 }, { "epoch": 2.2901373646340577, "grad_norm": 0.5281986594200134, "learning_rate": 3.804185034271839e-05, "loss": 0.1164, "step": 63020 }, { "epoch": 2.2905007631368557, "grad_norm": 1.011020302772522, "learning_rate": 3.803742282117751e-05, "loss": 0.0986, "step": 63030 }, { "epoch": 2.290864161639654, "grad_norm": 0.9110655784606934, "learning_rate": 3.803299473790046e-05, "loss": 0.1498, "step": 63040 }, { "epoch": 2.291227560142452, "grad_norm": 1.5773357152938843, "learning_rate": 3.8028566093078036e-05, "loss": 0.0975, "step": 63050 }, { "epoch": 2.2915909586452505, "grad_norm": 3.258551597595215, "learning_rate": 3.802413688690105e-05, "loss": 0.0939, "step": 63060 }, { "epoch": 2.2919543571480485, "grad_norm": 0.6953330039978027, "learning_rate": 3.801970711956036e-05, "loss": 0.1111, "step": 63070 }, { "epoch": 2.2923177556508465, "grad_norm": 0.8726534843444824, "learning_rate": 3.80152767912468e-05, "loss": 0.1196, "step": 63080 }, { "epoch": 2.292681154153645, "grad_norm": 0.5163739323616028, "learning_rate": 3.801084590215128e-05, "loss": 0.1048, "step": 63090 }, { "epoch": 2.293044552656443, "grad_norm": 2.2029974460601807, "learning_rate": 3.80064144524647e-05, "loss": 0.099, "step": 63100 }, { "epoch": 2.2934079511592413, "grad_norm": 0.8436546921730042, "learning_rate": 3.8001982442378004e-05, "loss": 0.0945, "step": 63110 }, { "epoch": 2.2937713496620393, "grad_norm": 0.5407220721244812, "learning_rate": 3.799754987208214e-05, "loss": 0.1066, "step": 63120 }, { "epoch": 2.2941347481648373, "grad_norm": 0.7019248008728027, "learning_rate": 3.7993116741768095e-05, "loss": 0.11, "step": 63130 }, { "epoch": 2.2944981466676357, "grad_norm": 0.5901986956596375, "learning_rate": 3.7988683051626886e-05, "loss": 0.119, "step": 63140 }, { "epoch": 2.2948615451704337, "grad_norm": 0.6483830809593201, "learning_rate": 3.798424880184954e-05, "loss": 0.0827, "step": 63150 }, { "epoch": 2.295224943673232, "grad_norm": 0.41235288977622986, "learning_rate": 3.7979813992627103e-05, "loss": 0.0921, "step": 63160 }, { "epoch": 2.29558834217603, "grad_norm": 3.996107339859009, "learning_rate": 3.797537862415066e-05, "loss": 0.0967, "step": 63170 }, { "epoch": 2.2959517406788286, "grad_norm": 2.476738452911377, "learning_rate": 3.7970942696611335e-05, "loss": 0.1186, "step": 63180 }, { "epoch": 2.2963151391816266, "grad_norm": 0.8279284238815308, "learning_rate": 3.7966506210200224e-05, "loss": 0.1086, "step": 63190 }, { "epoch": 2.296678537684425, "grad_norm": 5.163793087005615, "learning_rate": 3.79620691651085e-05, "loss": 0.0827, "step": 63200 }, { "epoch": 2.297041936187223, "grad_norm": 0.577820360660553, "learning_rate": 3.795763156152734e-05, "loss": 0.1126, "step": 63210 }, { "epoch": 2.297405334690021, "grad_norm": 0.29767242074012756, "learning_rate": 3.7953193399647934e-05, "loss": 0.1169, "step": 63220 }, { "epoch": 2.2977687331928194, "grad_norm": 1.0364243984222412, "learning_rate": 3.794875467966152e-05, "loss": 0.1004, "step": 63230 }, { "epoch": 2.2981321316956174, "grad_norm": 0.5417031049728394, "learning_rate": 3.794475935465031e-05, "loss": 4.3853, "step": 63240 }, { "epoch": 2.298495530198416, "grad_norm": 0.8863941431045532, "learning_rate": 3.794031957478746e-05, "loss": 0.0942, "step": 63250 }, { "epoch": 2.2988589287012138, "grad_norm": 0.6005067825317383, "learning_rate": 3.7935879237372296e-05, "loss": 0.0879, "step": 63260 }, { "epoch": 2.2992223272040118, "grad_norm": 0.5702997446060181, "learning_rate": 3.793143834259612e-05, "loss": 0.1344, "step": 63270 }, { "epoch": 2.29958572570681, "grad_norm": 0.5447559356689453, "learning_rate": 3.7926996890650265e-05, "loss": 0.1249, "step": 63280 }, { "epoch": 2.299949124209608, "grad_norm": 0.9382325410842896, "learning_rate": 3.7922554881726125e-05, "loss": 0.1639, "step": 63290 }, { "epoch": 2.3003125227124066, "grad_norm": 1.128554344177246, "learning_rate": 3.791811231601506e-05, "loss": 0.124, "step": 63300 }, { "epoch": 2.3006759212152046, "grad_norm": 1.2001831531524658, "learning_rate": 3.7913669193708505e-05, "loss": 0.1115, "step": 63310 }, { "epoch": 2.3010393197180026, "grad_norm": 0.5141827464103699, "learning_rate": 3.790922551499789e-05, "loss": 0.1934, "step": 63320 }, { "epoch": 2.301402718220801, "grad_norm": 1.1889158487319946, "learning_rate": 3.7904781280074674e-05, "loss": 0.112, "step": 63330 }, { "epoch": 2.301766116723599, "grad_norm": 1.0070478916168213, "learning_rate": 3.7900336489130355e-05, "loss": 0.1434, "step": 63340 }, { "epoch": 2.3021295152263974, "grad_norm": 0.6805721521377563, "learning_rate": 3.789589114235643e-05, "loss": 0.088, "step": 63350 }, { "epoch": 2.3024929137291954, "grad_norm": 0.8101871013641357, "learning_rate": 3.789144523994445e-05, "loss": 0.0786, "step": 63360 }, { "epoch": 2.3028563122319934, "grad_norm": 0.5728216767311096, "learning_rate": 3.788699878208595e-05, "loss": 0.1176, "step": 63370 }, { "epoch": 2.303219710734792, "grad_norm": 6.430160999298096, "learning_rate": 3.788255176897253e-05, "loss": 0.1395, "step": 63380 }, { "epoch": 2.30358310923759, "grad_norm": 0.8273718953132629, "learning_rate": 3.78781042007958e-05, "loss": 0.1157, "step": 63390 }, { "epoch": 2.3039465077403882, "grad_norm": 0.7474293112754822, "learning_rate": 3.787365607774736e-05, "loss": 0.6462, "step": 63400 }, { "epoch": 2.304309906243186, "grad_norm": 4.311099052429199, "learning_rate": 3.7869207400018905e-05, "loss": 0.1136, "step": 63410 }, { "epoch": 2.304673304745984, "grad_norm": 0.8261300921440125, "learning_rate": 3.7864758167802074e-05, "loss": 0.1357, "step": 63420 }, { "epoch": 2.3050367032487826, "grad_norm": 2.123488187789917, "learning_rate": 3.78603083812886e-05, "loss": 0.0907, "step": 63430 }, { "epoch": 2.3054001017515806, "grad_norm": 0.7351600527763367, "learning_rate": 3.7855858040670175e-05, "loss": 0.1308, "step": 63440 }, { "epoch": 2.305763500254379, "grad_norm": 3.070939064025879, "learning_rate": 3.785140714613859e-05, "loss": 2.963, "step": 63450 }, { "epoch": 2.306126898757177, "grad_norm": 0.4340088963508606, "learning_rate": 3.7846955697885586e-05, "loss": 0.0927, "step": 63460 }, { "epoch": 2.3064902972599755, "grad_norm": 0.6686544418334961, "learning_rate": 3.7842503696102976e-05, "loss": 0.1441, "step": 63470 }, { "epoch": 2.3068536957627734, "grad_norm": 7.206737041473389, "learning_rate": 3.7838051140982575e-05, "loss": 0.1299, "step": 63480 }, { "epoch": 2.307217094265572, "grad_norm": 0.6773508191108704, "learning_rate": 3.7833598032716225e-05, "loss": 0.1177, "step": 63490 }, { "epoch": 2.30758049276837, "grad_norm": 0.5695934295654297, "learning_rate": 3.78291443714958e-05, "loss": 0.0918, "step": 63500 }, { "epoch": 2.307943891271168, "grad_norm": 0.6884729862213135, "learning_rate": 3.782469015751319e-05, "loss": 0.1068, "step": 63510 }, { "epoch": 2.3083072897739663, "grad_norm": 0.5272583365440369, "learning_rate": 3.782023539096031e-05, "loss": 0.1021, "step": 63520 }, { "epoch": 2.3086706882767642, "grad_norm": 3.166252613067627, "learning_rate": 3.7815780072029103e-05, "loss": 0.1035, "step": 63530 }, { "epoch": 2.3090340867795627, "grad_norm": 0.47669315338134766, "learning_rate": 3.781132420091153e-05, "loss": 0.1203, "step": 63540 }, { "epoch": 2.3093974852823607, "grad_norm": 1.5463957786560059, "learning_rate": 3.780686777779958e-05, "loss": 0.0845, "step": 63550 }, { "epoch": 2.3097608837851586, "grad_norm": 0.9264553785324097, "learning_rate": 3.780241080288527e-05, "loss": 0.0906, "step": 63560 }, { "epoch": 2.310124282287957, "grad_norm": 0.604017436504364, "learning_rate": 3.7797953276360624e-05, "loss": 0.128, "step": 63570 }, { "epoch": 2.310487680790755, "grad_norm": 0.6354121565818787, "learning_rate": 3.779349519841771e-05, "loss": 0.1044, "step": 63580 }, { "epoch": 2.3108510792935535, "grad_norm": 0.6733710169792175, "learning_rate": 3.7789036569248606e-05, "loss": 0.1096, "step": 63590 }, { "epoch": 2.3112144777963515, "grad_norm": 0.6780581474304199, "learning_rate": 3.778457738904542e-05, "loss": 0.1325, "step": 63600 }, { "epoch": 2.3112144777963515, "eval_loss": 0.33778947591781616, "eval_runtime": 180.1308, "eval_samples_per_second": 41.159, "eval_steps_per_second": 5.146, "eval_wer": 0.15696988400167008, "step": 63600 }, { "epoch": 2.3115778762991495, "grad_norm": 0.42744994163513184, "learning_rate": 3.778011765800028e-05, "loss": 0.0901, "step": 63610 }, { "epoch": 2.311941274801948, "grad_norm": 0.5375288724899292, "learning_rate": 3.777565737630534e-05, "loss": 0.1126, "step": 63620 }, { "epoch": 2.312304673304746, "grad_norm": 0.6710574626922607, "learning_rate": 3.777119654415279e-05, "loss": 0.4501, "step": 63630 }, { "epoch": 2.3126680718075443, "grad_norm": 1.513808012008667, "learning_rate": 3.77667351617348e-05, "loss": 0.1793, "step": 63640 }, { "epoch": 2.3130314703103423, "grad_norm": 0.4935424029827118, "learning_rate": 3.776227322924364e-05, "loss": 0.0848, "step": 63650 }, { "epoch": 2.3133948688131403, "grad_norm": 1.5648393630981445, "learning_rate": 3.775781074687152e-05, "loss": 0.0965, "step": 63660 }, { "epoch": 2.3137582673159387, "grad_norm": 0.3886503279209137, "learning_rate": 3.775334771481073e-05, "loss": 0.1081, "step": 63670 }, { "epoch": 2.3141216658187367, "grad_norm": 1.0196889638900757, "learning_rate": 3.7748884133253566e-05, "loss": 0.0974, "step": 63680 }, { "epoch": 2.314485064321535, "grad_norm": 1.0317192077636719, "learning_rate": 3.7744420002392345e-05, "loss": 0.1129, "step": 63690 }, { "epoch": 2.314848462824333, "grad_norm": 1.3416907787322998, "learning_rate": 3.773995532241941e-05, "loss": 0.0985, "step": 63700 }, { "epoch": 2.3152118613271315, "grad_norm": 1.2515931129455566, "learning_rate": 3.7735490093527126e-05, "loss": 0.1034, "step": 63710 }, { "epoch": 2.3155752598299295, "grad_norm": 0.6103869676589966, "learning_rate": 3.773102431590789e-05, "loss": 0.1153, "step": 63720 }, { "epoch": 2.3159386583327275, "grad_norm": 2.0320076942443848, "learning_rate": 3.772655798975412e-05, "loss": 0.1065, "step": 63730 }, { "epoch": 2.316302056835526, "grad_norm": 0.9758360385894775, "learning_rate": 3.772209111525824e-05, "loss": 0.1187, "step": 63740 }, { "epoch": 2.316665455338324, "grad_norm": 1.36004638671875, "learning_rate": 3.771762369261272e-05, "loss": 0.1045, "step": 63750 }, { "epoch": 2.3170288538411223, "grad_norm": 0.45251816511154175, "learning_rate": 3.771315572201004e-05, "loss": 0.1054, "step": 63760 }, { "epoch": 2.3173922523439203, "grad_norm": 3.3651912212371826, "learning_rate": 3.7708687203642724e-05, "loss": 0.1215, "step": 63770 }, { "epoch": 2.3177556508467188, "grad_norm": 0.9686463475227356, "learning_rate": 3.7704218137703284e-05, "loss": 0.1114, "step": 63780 }, { "epoch": 2.3181190493495167, "grad_norm": 0.7810651659965515, "learning_rate": 3.769974852438429e-05, "loss": 0.1284, "step": 63790 }, { "epoch": 2.3184824478523147, "grad_norm": 0.600099503993988, "learning_rate": 3.7695278363878325e-05, "loss": 0.0929, "step": 63800 }, { "epoch": 2.318845846355113, "grad_norm": 0.9034928679466248, "learning_rate": 3.769080765637798e-05, "loss": 0.099, "step": 63810 }, { "epoch": 2.319209244857911, "grad_norm": 1.4133280515670776, "learning_rate": 3.7686336402075885e-05, "loss": 0.1076, "step": 63820 }, { "epoch": 2.3195726433607096, "grad_norm": 0.6236594319343567, "learning_rate": 3.768186460116469e-05, "loss": 0.1036, "step": 63830 }, { "epoch": 2.3199360418635075, "grad_norm": 2.116008996963501, "learning_rate": 3.7677392253837076e-05, "loss": 0.1521, "step": 63840 }, { "epoch": 2.3202994403663055, "grad_norm": 1.0746735334396362, "learning_rate": 3.767291936028574e-05, "loss": 0.5176, "step": 63850 }, { "epoch": 2.320662838869104, "grad_norm": 1.0380078554153442, "learning_rate": 3.766844592070339e-05, "loss": 0.0889, "step": 63860 }, { "epoch": 2.321026237371902, "grad_norm": 0.835041344165802, "learning_rate": 3.766397193528278e-05, "loss": 0.1305, "step": 63870 }, { "epoch": 2.3213896358747004, "grad_norm": 3.784654140472412, "learning_rate": 3.7659497404216685e-05, "loss": 0.1531, "step": 63880 }, { "epoch": 2.3217530343774984, "grad_norm": 1.1239734888076782, "learning_rate": 3.765502232769789e-05, "loss": 0.1244, "step": 63890 }, { "epoch": 2.3221164328802963, "grad_norm": 0.5810584425926208, "learning_rate": 3.7650546705919204e-05, "loss": 0.1013, "step": 63900 }, { "epoch": 2.3224798313830948, "grad_norm": 0.6790658831596375, "learning_rate": 3.7646070539073475e-05, "loss": 0.1047, "step": 63910 }, { "epoch": 2.3228432298858928, "grad_norm": 0.3619256615638733, "learning_rate": 3.7641593827353556e-05, "loss": 0.0927, "step": 63920 }, { "epoch": 2.323206628388691, "grad_norm": 2.329050064086914, "learning_rate": 3.7637116570952346e-05, "loss": 0.12, "step": 63930 }, { "epoch": 2.323570026891489, "grad_norm": 1.9159663915634155, "learning_rate": 3.763263877006273e-05, "loss": 0.1296, "step": 63940 }, { "epoch": 2.323933425394287, "grad_norm": 1.206432819366455, "learning_rate": 3.762816042487768e-05, "loss": 0.0802, "step": 63950 }, { "epoch": 2.3242968238970856, "grad_norm": 0.9730502963066101, "learning_rate": 3.762368153559012e-05, "loss": 0.1171, "step": 63960 }, { "epoch": 2.3246602223998836, "grad_norm": 0.3301490247249603, "learning_rate": 3.761920210239303e-05, "loss": 0.1111, "step": 63970 }, { "epoch": 2.325023620902682, "grad_norm": 0.580382227897644, "learning_rate": 3.7614722125479425e-05, "loss": 0.0951, "step": 63980 }, { "epoch": 2.32538701940548, "grad_norm": 1.5714104175567627, "learning_rate": 3.761024160504232e-05, "loss": 0.1407, "step": 63990 }, { "epoch": 2.3257504179082784, "grad_norm": 0.6567360162734985, "learning_rate": 3.7605760541274784e-05, "loss": 0.0823, "step": 64000 }, { "epoch": 2.3261138164110764, "grad_norm": 1.1323597431182861, "learning_rate": 3.760127893436988e-05, "loss": 0.108, "step": 64010 }, { "epoch": 2.3264772149138744, "grad_norm": 0.9358565807342529, "learning_rate": 3.7596796784520684e-05, "loss": 0.1014, "step": 64020 }, { "epoch": 2.326840613416673, "grad_norm": 0.7020303010940552, "learning_rate": 3.759231409192034e-05, "loss": 0.098, "step": 64030 }, { "epoch": 2.327204011919471, "grad_norm": 1.7113333940505981, "learning_rate": 3.7587830856761996e-05, "loss": 0.1149, "step": 64040 }, { "epoch": 2.3275674104222692, "grad_norm": 2.211527109146118, "learning_rate": 3.75833470792388e-05, "loss": 0.085, "step": 64050 }, { "epoch": 2.327930808925067, "grad_norm": 0.6617085933685303, "learning_rate": 3.7578862759543954e-05, "loss": 0.1596, "step": 64060 }, { "epoch": 2.3282942074278656, "grad_norm": 0.6133392453193665, "learning_rate": 3.757437789787066e-05, "loss": 0.1044, "step": 64070 }, { "epoch": 2.3286576059306636, "grad_norm": 5.026115894317627, "learning_rate": 3.7569892494412175e-05, "loss": 0.1376, "step": 64080 }, { "epoch": 2.3290210044334616, "grad_norm": 1.0616756677627563, "learning_rate": 3.756540654936174e-05, "loss": 0.1029, "step": 64090 }, { "epoch": 2.32938440293626, "grad_norm": 0.9439811706542969, "learning_rate": 3.756092006291264e-05, "loss": 0.0779, "step": 64100 }, { "epoch": 2.329747801439058, "grad_norm": 1.4717971086502075, "learning_rate": 3.755643303525819e-05, "loss": 0.0926, "step": 64110 }, { "epoch": 2.3301111999418564, "grad_norm": 0.3808611035346985, "learning_rate": 3.7551945466591716e-05, "loss": 0.1099, "step": 64120 }, { "epoch": 2.3304745984446544, "grad_norm": 1.0548149347305298, "learning_rate": 3.754745735710657e-05, "loss": 0.1072, "step": 64130 }, { "epoch": 2.3308379969474524, "grad_norm": 2.6092560291290283, "learning_rate": 3.7542968706996136e-05, "loss": 0.1365, "step": 64140 }, { "epoch": 2.331201395450251, "grad_norm": 1.3453460931777954, "learning_rate": 3.7538479516453805e-05, "loss": 0.0904, "step": 64150 }, { "epoch": 2.331564793953049, "grad_norm": 0.8132860660552979, "learning_rate": 3.7533989785673e-05, "loss": 0.0848, "step": 64160 }, { "epoch": 2.3319281924558473, "grad_norm": 1.1206045150756836, "learning_rate": 3.7529499514847175e-05, "loss": 0.1255, "step": 64170 }, { "epoch": 2.3322915909586452, "grad_norm": 1.248970866203308, "learning_rate": 3.7525008704169795e-05, "loss": 0.0924, "step": 64180 }, { "epoch": 2.3326549894614432, "grad_norm": 0.9841907620429993, "learning_rate": 3.752051735383436e-05, "loss": 0.1479, "step": 64190 }, { "epoch": 2.3330183879642417, "grad_norm": 2.1250979900360107, "learning_rate": 3.7516025464034376e-05, "loss": 0.0934, "step": 64200 }, { "epoch": 2.3330183879642417, "eval_loss": 0.3528118431568146, "eval_runtime": 179.4186, "eval_samples_per_second": 41.322, "eval_steps_per_second": 5.167, "eval_wer": 0.15433768402708442, "step": 64200 }, { "epoch": 2.3333817864670396, "grad_norm": 0.7961970567703247, "learning_rate": 3.7511533034963384e-05, "loss": 0.0977, "step": 64210 }, { "epoch": 2.333745184969838, "grad_norm": 0.8082739114761353, "learning_rate": 3.750704006681495e-05, "loss": 0.1188, "step": 64220 }, { "epoch": 2.334108583472636, "grad_norm": 1.3309545516967773, "learning_rate": 3.7502546559782656e-05, "loss": 0.1256, "step": 64230 }, { "epoch": 2.334471981975434, "grad_norm": 0.8950253129005432, "learning_rate": 3.749805251406013e-05, "loss": 0.097, "step": 64240 }, { "epoch": 2.3348353804782325, "grad_norm": 1.537735939025879, "learning_rate": 3.7493557929840974e-05, "loss": 0.1024, "step": 64250 }, { "epoch": 2.3351987789810305, "grad_norm": 0.9132232666015625, "learning_rate": 3.748906280731887e-05, "loss": 0.1118, "step": 64260 }, { "epoch": 2.335562177483829, "grad_norm": 0.540766179561615, "learning_rate": 3.7484567146687485e-05, "loss": 0.1203, "step": 64270 }, { "epoch": 2.335925575986627, "grad_norm": 0.5811611413955688, "learning_rate": 3.748007094814051e-05, "loss": 0.1067, "step": 64280 }, { "epoch": 2.3362889744894253, "grad_norm": 0.715090274810791, "learning_rate": 3.747557421187169e-05, "loss": 0.1685, "step": 64290 }, { "epoch": 2.3366523729922233, "grad_norm": 0.6315838694572449, "learning_rate": 3.747107693807477e-05, "loss": 0.1196, "step": 64300 }, { "epoch": 2.3370157714950213, "grad_norm": 1.2922756671905518, "learning_rate": 3.7466579126943514e-05, "loss": 0.0906, "step": 64310 }, { "epoch": 2.3373791699978197, "grad_norm": 9.100321769714355, "learning_rate": 3.746208077867172e-05, "loss": 0.1205, "step": 64320 }, { "epoch": 2.3377425685006177, "grad_norm": 0.5777522921562195, "learning_rate": 3.74575818934532e-05, "loss": 0.0787, "step": 64330 }, { "epoch": 2.338105967003416, "grad_norm": 0.5407727360725403, "learning_rate": 3.74530824714818e-05, "loss": 0.0998, "step": 64340 }, { "epoch": 2.338469365506214, "grad_norm": 0.6790062785148621, "learning_rate": 3.744858251295139e-05, "loss": 0.0855, "step": 64350 }, { "epoch": 2.3388327640090125, "grad_norm": 0.552946925163269, "learning_rate": 3.744408201805585e-05, "loss": 0.0864, "step": 64360 }, { "epoch": 2.3391961625118105, "grad_norm": 0.36451128125190735, "learning_rate": 3.743958098698909e-05, "loss": 0.1093, "step": 64370 }, { "epoch": 2.3395595610146085, "grad_norm": 0.834068775177002, "learning_rate": 3.743507941994505e-05, "loss": 0.1181, "step": 64380 }, { "epoch": 2.339922959517407, "grad_norm": 1.2418774366378784, "learning_rate": 3.743057731711768e-05, "loss": 0.1128, "step": 64390 }, { "epoch": 2.340286358020205, "grad_norm": 1.0964419841766357, "learning_rate": 3.7426074678700964e-05, "loss": 0.1012, "step": 64400 }, { "epoch": 2.3406497565230033, "grad_norm": 0.7740904092788696, "learning_rate": 3.74215715048889e-05, "loss": 0.0835, "step": 64410 }, { "epoch": 2.3410131550258013, "grad_norm": 0.6739581823348999, "learning_rate": 3.741706779587551e-05, "loss": 0.1302, "step": 64420 }, { "epoch": 2.3413765535285993, "grad_norm": 1.141020655632019, "learning_rate": 3.7412563551854854e-05, "loss": 0.1881, "step": 64430 }, { "epoch": 2.3417399520313977, "grad_norm": 0.7994565367698669, "learning_rate": 3.7408058773020994e-05, "loss": 0.1099, "step": 64440 }, { "epoch": 2.3421033505341957, "grad_norm": 0.7365929484367371, "learning_rate": 3.740355345956804e-05, "loss": 0.088, "step": 64450 }, { "epoch": 2.342466749036994, "grad_norm": 0.5093470215797424, "learning_rate": 3.7399047611690095e-05, "loss": 0.1143, "step": 64460 }, { "epoch": 2.342830147539792, "grad_norm": 1.3585693836212158, "learning_rate": 3.7394541229581295e-05, "loss": 0.12, "step": 64470 }, { "epoch": 2.34319354604259, "grad_norm": 0.9446144700050354, "learning_rate": 3.739003431343583e-05, "loss": 2.1812, "step": 64480 }, { "epoch": 2.3435569445453885, "grad_norm": 0.4501352310180664, "learning_rate": 3.738552686344786e-05, "loss": 0.1086, "step": 64490 }, { "epoch": 2.3439203430481865, "grad_norm": 0.5260722637176514, "learning_rate": 3.73810188798116e-05, "loss": 0.1806, "step": 64500 }, { "epoch": 2.344283741550985, "grad_norm": 2.6056125164031982, "learning_rate": 3.73765103627213e-05, "loss": 0.1072, "step": 64510 }, { "epoch": 2.344647140053783, "grad_norm": 0.6981383562088013, "learning_rate": 3.73720013123712e-05, "loss": 0.1415, "step": 64520 }, { "epoch": 2.345010538556581, "grad_norm": 0.5633025169372559, "learning_rate": 3.7367491728955585e-05, "loss": 0.0986, "step": 64530 }, { "epoch": 2.3453739370593794, "grad_norm": 0.5558316111564636, "learning_rate": 3.7362981612668745e-05, "loss": 0.1791, "step": 64540 }, { "epoch": 2.3457373355621773, "grad_norm": 0.543397068977356, "learning_rate": 3.735847096370503e-05, "loss": 0.1002, "step": 64550 }, { "epoch": 2.3461007340649758, "grad_norm": 0.5885327458381653, "learning_rate": 3.7353959782258755e-05, "loss": 0.0944, "step": 64560 }, { "epoch": 2.3464641325677738, "grad_norm": 0.9266073107719421, "learning_rate": 3.7349448068524325e-05, "loss": 1.9036, "step": 64570 }, { "epoch": 2.346827531070572, "grad_norm": 0.4478204548358917, "learning_rate": 3.7344935822696116e-05, "loss": 0.1028, "step": 64580 }, { "epoch": 2.34719092957337, "grad_norm": 1.6678454875946045, "learning_rate": 3.7340423044968534e-05, "loss": 0.1299, "step": 64590 }, { "epoch": 2.347554328076168, "grad_norm": 2.0704760551452637, "learning_rate": 3.733590973553604e-05, "loss": 0.086, "step": 64600 }, { "epoch": 2.3479177265789666, "grad_norm": 1.1192750930786133, "learning_rate": 3.733139589459308e-05, "loss": 0.0977, "step": 64610 }, { "epoch": 2.3482811250817646, "grad_norm": 1.0189874172210693, "learning_rate": 3.732688152233415e-05, "loss": 0.0966, "step": 64620 }, { "epoch": 2.348644523584563, "grad_norm": 0.8110418319702148, "learning_rate": 3.7322366618953755e-05, "loss": 0.1015, "step": 64630 }, { "epoch": 2.349007922087361, "grad_norm": 0.7661551833152771, "learning_rate": 3.731785118464642e-05, "loss": 0.1056, "step": 64640 }, { "epoch": 2.3493713205901594, "grad_norm": 0.6133613586425781, "learning_rate": 3.731333521960672e-05, "loss": 0.0862, "step": 64650 }, { "epoch": 2.3497347190929574, "grad_norm": 1.6493825912475586, "learning_rate": 3.73088187240292e-05, "loss": 0.1103, "step": 64660 }, { "epoch": 2.3500981175957554, "grad_norm": 0.7170090675354004, "learning_rate": 3.7304301698108486e-05, "loss": 1.3385, "step": 64670 }, { "epoch": 2.350461516098554, "grad_norm": 0.3917316794395447, "learning_rate": 3.7299784142039186e-05, "loss": 0.0838, "step": 64680 }, { "epoch": 2.350824914601352, "grad_norm": 0.85912024974823, "learning_rate": 3.729526605601595e-05, "loss": 0.1025, "step": 64690 }, { "epoch": 2.35118831310415, "grad_norm": 0.3880862295627594, "learning_rate": 3.729074744023345e-05, "loss": 0.0867, "step": 64700 }, { "epoch": 2.351551711606948, "grad_norm": 0.5538926124572754, "learning_rate": 3.728622829488637e-05, "loss": 0.1349, "step": 64710 }, { "epoch": 2.351915110109746, "grad_norm": 0.4051951766014099, "learning_rate": 3.7281708620169424e-05, "loss": 0.1023, "step": 64720 }, { "epoch": 2.3522785086125446, "grad_norm": 3.5087623596191406, "learning_rate": 3.7277188416277354e-05, "loss": 0.1052, "step": 64730 }, { "epoch": 2.3526419071153426, "grad_norm": 0.7415525317192078, "learning_rate": 3.727266768340492e-05, "loss": 0.1682, "step": 64740 }, { "epoch": 2.353005305618141, "grad_norm": 0.41486695408821106, "learning_rate": 3.7268146421746895e-05, "loss": 0.123, "step": 64750 }, { "epoch": 2.353368704120939, "grad_norm": 1.6847058534622192, "learning_rate": 3.726362463149811e-05, "loss": 0.108, "step": 64760 }, { "epoch": 2.353732102623737, "grad_norm": 0.6038152575492859, "learning_rate": 3.7259102312853356e-05, "loss": 0.1165, "step": 64770 }, { "epoch": 2.3540955011265354, "grad_norm": 1.413368821144104, "learning_rate": 3.7254579466007505e-05, "loss": 0.0947, "step": 64780 }, { "epoch": 2.3544588996293334, "grad_norm": 0.9277619123458862, "learning_rate": 3.7250056091155427e-05, "loss": 0.1269, "step": 64790 }, { "epoch": 2.354822298132132, "grad_norm": 1.1914100646972656, "learning_rate": 3.724553218849202e-05, "loss": 0.0882, "step": 64800 }, { "epoch": 2.354822298132132, "eval_loss": 0.34669631719589233, "eval_runtime": 180.4215, "eval_samples_per_second": 41.093, "eval_steps_per_second": 5.138, "eval_wer": 0.15537241091364568, "step": 64800 }, { "epoch": 2.35518569663493, "grad_norm": 0.8063227534294128, "learning_rate": 3.7241007758212195e-05, "loss": 0.0882, "step": 64810 }, { "epoch": 2.355549095137728, "grad_norm": 8.477306365966797, "learning_rate": 3.723648280051091e-05, "loss": 0.1379, "step": 64820 }, { "epoch": 2.3559124936405262, "grad_norm": 1.4196289777755737, "learning_rate": 3.723195731558311e-05, "loss": 0.0822, "step": 64830 }, { "epoch": 2.3562758921433242, "grad_norm": 0.8000519275665283, "learning_rate": 3.722743130362379e-05, "loss": 0.0988, "step": 64840 }, { "epoch": 2.3566392906461227, "grad_norm": 5.073339939117432, "learning_rate": 3.722290476482796e-05, "loss": 0.0932, "step": 64850 }, { "epoch": 2.3570026891489206, "grad_norm": 0.8329682946205139, "learning_rate": 3.7218377699390666e-05, "loss": 0.091, "step": 64860 }, { "epoch": 2.357366087651719, "grad_norm": 33.570316314697266, "learning_rate": 3.7213850107506936e-05, "loss": 2.4337, "step": 64870 }, { "epoch": 2.357729486154517, "grad_norm": 0.5240826606750488, "learning_rate": 3.720932198937187e-05, "loss": 0.118, "step": 64880 }, { "epoch": 2.358092884657315, "grad_norm": 0.7187747955322266, "learning_rate": 3.720479334518056e-05, "loss": 0.1019, "step": 64890 }, { "epoch": 2.3584562831601135, "grad_norm": 5.0828351974487305, "learning_rate": 3.720026417512812e-05, "loss": 0.0869, "step": 64900 }, { "epoch": 2.3588196816629114, "grad_norm": 0.688025176525116, "learning_rate": 3.719573447940972e-05, "loss": 0.1029, "step": 64910 }, { "epoch": 2.35918308016571, "grad_norm": 1.203792691230774, "learning_rate": 3.71912042582205e-05, "loss": 0.1196, "step": 64920 }, { "epoch": 2.359546478668508, "grad_norm": 0.5731534361839294, "learning_rate": 3.718667351175567e-05, "loss": 0.0968, "step": 64930 }, { "epoch": 2.3599098771713063, "grad_norm": 1.7429757118225098, "learning_rate": 3.718214224021044e-05, "loss": 0.104, "step": 64940 }, { "epoch": 2.3602732756741043, "grad_norm": 1.9315886497497559, "learning_rate": 3.7177610443780045e-05, "loss": 0.1186, "step": 64950 }, { "epoch": 2.3606366741769023, "grad_norm": 0.8713351488113403, "learning_rate": 3.717307812265974e-05, "loss": 0.0962, "step": 64960 }, { "epoch": 2.3610000726797007, "grad_norm": 1.1917448043823242, "learning_rate": 3.716854527704482e-05, "loss": 0.1117, "step": 64970 }, { "epoch": 2.3613634711824987, "grad_norm": 1.092644214630127, "learning_rate": 3.716401190713057e-05, "loss": 0.0889, "step": 64980 }, { "epoch": 2.361726869685297, "grad_norm": 1.172472357749939, "learning_rate": 3.715947801311233e-05, "loss": 0.1126, "step": 64990 }, { "epoch": 2.362090268188095, "grad_norm": 1.0360251665115356, "learning_rate": 3.715494359518545e-05, "loss": 0.104, "step": 65000 }, { "epoch": 2.362453666690893, "grad_norm": 0.88475102186203, "learning_rate": 3.715040865354529e-05, "loss": 0.0905, "step": 65010 }, { "epoch": 2.3628170651936915, "grad_norm": 2.172114849090576, "learning_rate": 3.714587318838726e-05, "loss": 0.1047, "step": 65020 }, { "epoch": 2.3631804636964895, "grad_norm": 1.0699172019958496, "learning_rate": 3.7141337199906766e-05, "loss": 0.1052, "step": 65030 }, { "epoch": 2.363543862199288, "grad_norm": 0.365556925535202, "learning_rate": 3.713680068829925e-05, "loss": 0.1084, "step": 65040 }, { "epoch": 2.363907260702086, "grad_norm": 0.6626974940299988, "learning_rate": 3.713226365376018e-05, "loss": 0.0883, "step": 65050 }, { "epoch": 2.364270659204884, "grad_norm": 1.7596914768218994, "learning_rate": 3.7127726096485026e-05, "loss": 0.0966, "step": 65060 }, { "epoch": 2.3646340577076823, "grad_norm": 0.5741199254989624, "learning_rate": 3.712318801666932e-05, "loss": 0.1532, "step": 65070 }, { "epoch": 2.3649974562104803, "grad_norm": 1.754315733909607, "learning_rate": 3.711864941450856e-05, "loss": 0.1066, "step": 65080 }, { "epoch": 2.3653608547132787, "grad_norm": 0.7265182137489319, "learning_rate": 3.711411029019833e-05, "loss": 0.1318, "step": 65090 }, { "epoch": 2.3657242532160767, "grad_norm": 0.5546099543571472, "learning_rate": 3.7109570643934185e-05, "loss": 0.1076, "step": 65100 }, { "epoch": 2.3660876517188747, "grad_norm": 0.5260456800460815, "learning_rate": 3.7105030475911716e-05, "loss": 0.0945, "step": 65110 }, { "epoch": 2.366451050221673, "grad_norm": 0.733099639415741, "learning_rate": 3.710048978632657e-05, "loss": 0.0959, "step": 65120 }, { "epoch": 2.366814448724471, "grad_norm": 0.7349701523780823, "learning_rate": 3.709594857537436e-05, "loss": 0.0935, "step": 65130 }, { "epoch": 2.3671778472272695, "grad_norm": 1.6328225135803223, "learning_rate": 3.7091406843250774e-05, "loss": 0.158, "step": 65140 }, { "epoch": 2.3675412457300675, "grad_norm": 0.4439161717891693, "learning_rate": 3.7086864590151484e-05, "loss": 0.0965, "step": 65150 }, { "epoch": 2.367904644232866, "grad_norm": 1.500626564025879, "learning_rate": 3.70823218162722e-05, "loss": 0.0838, "step": 65160 }, { "epoch": 2.368268042735664, "grad_norm": 0.5546636581420898, "learning_rate": 3.7077778521808656e-05, "loss": 0.1117, "step": 65170 }, { "epoch": 2.368631441238462, "grad_norm": 2.3335354328155518, "learning_rate": 3.707323470695662e-05, "loss": 0.5259, "step": 65180 }, { "epoch": 2.3689948397412603, "grad_norm": 1.3475418090820312, "learning_rate": 3.706869037191185e-05, "loss": 0.1113, "step": 65190 }, { "epoch": 2.3693582382440583, "grad_norm": 1.5157225131988525, "learning_rate": 3.706414551687015e-05, "loss": 0.1229, "step": 65200 }, { "epoch": 2.3697216367468568, "grad_norm": 0.707976758480072, "learning_rate": 3.7059600142027354e-05, "loss": 0.1022, "step": 65210 }, { "epoch": 2.3700850352496547, "grad_norm": 0.48478442430496216, "learning_rate": 3.7055054247579285e-05, "loss": 0.1455, "step": 65220 }, { "epoch": 2.370448433752453, "grad_norm": 1.4668298959732056, "learning_rate": 3.7050507833721824e-05, "loss": 0.1318, "step": 65230 }, { "epoch": 2.370811832255251, "grad_norm": 0.6836544275283813, "learning_rate": 3.704596090065085e-05, "loss": 0.1028, "step": 65240 }, { "epoch": 2.371175230758049, "grad_norm": 0.5317667722702026, "learning_rate": 3.70414134485623e-05, "loss": 0.082, "step": 65250 }, { "epoch": 2.3715386292608476, "grad_norm": 0.5413720607757568, "learning_rate": 3.703686547765208e-05, "loss": 0.0988, "step": 65260 }, { "epoch": 2.3719020277636456, "grad_norm": 2.2720227241516113, "learning_rate": 3.703231698811614e-05, "loss": 0.1121, "step": 65270 }, { "epoch": 2.372265426266444, "grad_norm": 1.079412579536438, "learning_rate": 3.7027767980150485e-05, "loss": 0.1999, "step": 65280 }, { "epoch": 2.372628824769242, "grad_norm": 2.772294282913208, "learning_rate": 3.70232184539511e-05, "loss": 0.1109, "step": 65290 }, { "epoch": 2.37299222327204, "grad_norm": 1.1685398817062378, "learning_rate": 3.701866840971401e-05, "loss": 0.0708, "step": 65300 }, { "epoch": 2.3733556217748384, "grad_norm": 1.359842300415039, "learning_rate": 3.701411784763526e-05, "loss": 0.074, "step": 65310 }, { "epoch": 2.3737190202776364, "grad_norm": 0.42569172382354736, "learning_rate": 3.700956676791092e-05, "loss": 0.1001, "step": 65320 }, { "epoch": 2.374082418780435, "grad_norm": 0.6070738434791565, "learning_rate": 3.700501517073707e-05, "loss": 0.1083, "step": 65330 }, { "epoch": 2.374445817283233, "grad_norm": 5.960649490356445, "learning_rate": 3.700046305630984e-05, "loss": 0.1321, "step": 65340 }, { "epoch": 2.3748092157860308, "grad_norm": 0.613503098487854, "learning_rate": 3.699591042482536e-05, "loss": 0.1093, "step": 65350 }, { "epoch": 2.375172614288829, "grad_norm": 0.5209415555000305, "learning_rate": 3.699135727647977e-05, "loss": 0.103, "step": 65360 }, { "epoch": 2.375536012791627, "grad_norm": 0.7532001733779907, "learning_rate": 3.698680361146926e-05, "loss": 0.1061, "step": 65370 }, { "epoch": 2.3758994112944256, "grad_norm": 0.7915641665458679, "learning_rate": 3.6982249429990035e-05, "loss": 0.0951, "step": 65380 }, { "epoch": 2.3762628097972236, "grad_norm": 0.6081142425537109, "learning_rate": 3.697769473223832e-05, "loss": 0.115, "step": 65390 }, { "epoch": 2.3766262083000216, "grad_norm": 6.751429080963135, "learning_rate": 3.697313951841035e-05, "loss": 0.1017, "step": 65400 }, { "epoch": 2.3766262083000216, "eval_loss": 0.341545969247818, "eval_runtime": 180.2661, "eval_samples_per_second": 41.128, "eval_steps_per_second": 5.142, "eval_wer": 0.15476428195400005, "step": 65400 }, { "epoch": 2.37698960680282, "grad_norm": 0.7073554992675781, "learning_rate": 3.69685837887024e-05, "loss": 0.0784, "step": 65410 }, { "epoch": 2.377353005305618, "grad_norm": 1.2818964719772339, "learning_rate": 3.696402754331076e-05, "loss": 0.1119, "step": 65420 }, { "epoch": 2.3777164038084164, "grad_norm": 1.005615234375, "learning_rate": 3.695947078243174e-05, "loss": 0.1153, "step": 65430 }, { "epoch": 2.3780798023112144, "grad_norm": 0.8593710660934448, "learning_rate": 3.695491350626168e-05, "loss": 0.0793, "step": 65440 }, { "epoch": 2.378443200814013, "grad_norm": 2.337388038635254, "learning_rate": 3.695035571499692e-05, "loss": 0.097, "step": 65450 }, { "epoch": 2.378806599316811, "grad_norm": 0.9329900741577148, "learning_rate": 3.694579740883387e-05, "loss": 0.1108, "step": 65460 }, { "epoch": 2.379169997819609, "grad_norm": 0.7032762765884399, "learning_rate": 3.69412385879689e-05, "loss": 0.1192, "step": 65470 }, { "epoch": 2.3795333963224072, "grad_norm": 0.7048949599266052, "learning_rate": 3.693667925259845e-05, "loss": 0.0932, "step": 65480 }, { "epoch": 2.379896794825205, "grad_norm": 0.8367437124252319, "learning_rate": 3.693211940291896e-05, "loss": 0.139, "step": 65490 }, { "epoch": 2.3802601933280036, "grad_norm": 0.9763396978378296, "learning_rate": 3.69275590391269e-05, "loss": 0.0914, "step": 65500 }, { "epoch": 2.3806235918308016, "grad_norm": 1.1304420232772827, "learning_rate": 3.6922998161418764e-05, "loss": 0.0912, "step": 65510 }, { "epoch": 2.3809869903336, "grad_norm": 4.609717845916748, "learning_rate": 3.691843676999105e-05, "loss": 0.099, "step": 65520 }, { "epoch": 2.381350388836398, "grad_norm": 1.2089684009552002, "learning_rate": 3.6913874865040307e-05, "loss": 0.1087, "step": 65530 }, { "epoch": 2.381713787339196, "grad_norm": 0.7825998663902283, "learning_rate": 3.690931244676309e-05, "loss": 0.1141, "step": 65540 }, { "epoch": 2.3820771858419945, "grad_norm": 0.3670007586479187, "learning_rate": 3.690474951535597e-05, "loss": 0.0831, "step": 65550 }, { "epoch": 2.3824405843447924, "grad_norm": 0.8454808592796326, "learning_rate": 3.6900186071015545e-05, "loss": 2.7237, "step": 65560 }, { "epoch": 2.382803982847591, "grad_norm": 0.6918748617172241, "learning_rate": 3.689562211393845e-05, "loss": 0.4282, "step": 65570 }, { "epoch": 2.383167381350389, "grad_norm": 0.8183717727661133, "learning_rate": 3.6891057644321326e-05, "loss": 0.0964, "step": 65580 }, { "epoch": 2.383530779853187, "grad_norm": 1.358555793762207, "learning_rate": 3.688649266236083e-05, "loss": 0.1215, "step": 65590 }, { "epoch": 2.3838941783559853, "grad_norm": 0.7757040858268738, "learning_rate": 3.688192716825366e-05, "loss": 0.0865, "step": 65600 }, { "epoch": 2.3842575768587833, "grad_norm": 2.875025510787964, "learning_rate": 3.687736116219652e-05, "loss": 0.0834, "step": 65610 }, { "epoch": 2.3846209753615817, "grad_norm": 0.5162243247032166, "learning_rate": 3.6872794644386156e-05, "loss": 0.1043, "step": 65620 }, { "epoch": 2.3849843738643797, "grad_norm": 0.7602340579032898, "learning_rate": 3.68682276150193e-05, "loss": 0.1051, "step": 65630 }, { "epoch": 2.3853477723671777, "grad_norm": 0.4563780128955841, "learning_rate": 3.686366007429276e-05, "loss": 0.1144, "step": 65640 }, { "epoch": 2.385711170869976, "grad_norm": 1.0391710996627808, "learning_rate": 3.685909202240331e-05, "loss": 0.0894, "step": 65650 }, { "epoch": 2.386074569372774, "grad_norm": 0.5064871907234192, "learning_rate": 3.685452345954778e-05, "loss": 0.0882, "step": 65660 }, { "epoch": 2.3864379678755725, "grad_norm": 1.3617416620254517, "learning_rate": 3.684995438592301e-05, "loss": 0.1068, "step": 65670 }, { "epoch": 2.3868013663783705, "grad_norm": 0.7488900423049927, "learning_rate": 3.684538480172587e-05, "loss": 0.1205, "step": 65680 }, { "epoch": 2.3871647648811685, "grad_norm": 0.4754915237426758, "learning_rate": 3.684081470715325e-05, "loss": 0.1073, "step": 65690 }, { "epoch": 2.387528163383967, "grad_norm": 1.4789927005767822, "learning_rate": 3.6836244102402053e-05, "loss": 0.1681, "step": 65700 }, { "epoch": 2.387891561886765, "grad_norm": 0.6496606469154358, "learning_rate": 3.68316729876692e-05, "loss": 0.0993, "step": 65710 }, { "epoch": 2.3882549603895633, "grad_norm": 0.8786084651947021, "learning_rate": 3.6827101363151676e-05, "loss": 0.1326, "step": 65720 }, { "epoch": 2.3886183588923613, "grad_norm": 0.7775259613990784, "learning_rate": 3.682252922904641e-05, "loss": 0.1014, "step": 65730 }, { "epoch": 2.3889817573951597, "grad_norm": 1.226577877998352, "learning_rate": 3.681795658555044e-05, "loss": 0.1135, "step": 65740 }, { "epoch": 2.3893451558979577, "grad_norm": 0.6108711957931519, "learning_rate": 3.681338343286077e-05, "loss": 0.1047, "step": 65750 }, { "epoch": 2.3897085544007557, "grad_norm": 1.8602646589279175, "learning_rate": 3.6808809771174435e-05, "loss": 0.0888, "step": 65760 }, { "epoch": 2.390071952903554, "grad_norm": 0.9307143092155457, "learning_rate": 3.6804235600688503e-05, "loss": 0.1109, "step": 65770 }, { "epoch": 2.390435351406352, "grad_norm": 0.7531790733337402, "learning_rate": 3.679966092160005e-05, "loss": 0.1228, "step": 65780 }, { "epoch": 2.3907987499091505, "grad_norm": 0.608249843120575, "learning_rate": 3.679508573410621e-05, "loss": 0.1062, "step": 65790 }, { "epoch": 2.3911621484119485, "grad_norm": 1.1337485313415527, "learning_rate": 3.679051003840408e-05, "loss": 0.5558, "step": 65800 }, { "epoch": 2.391525546914747, "grad_norm": 0.5861150622367859, "learning_rate": 3.678593383469083e-05, "loss": 0.1104, "step": 65810 }, { "epoch": 2.391888945417545, "grad_norm": 0.3788084089756012, "learning_rate": 3.678135712316362e-05, "loss": 0.1413, "step": 65820 }, { "epoch": 2.392252343920343, "grad_norm": 2.359208106994629, "learning_rate": 3.6776779904019656e-05, "loss": 0.1204, "step": 65830 }, { "epoch": 2.3926157424231413, "grad_norm": 0.5361478328704834, "learning_rate": 3.677220217745614e-05, "loss": 0.1073, "step": 65840 }, { "epoch": 2.3929791409259393, "grad_norm": 1.1404966115951538, "learning_rate": 3.676762394367032e-05, "loss": 0.0842, "step": 65850 }, { "epoch": 2.3933425394287378, "grad_norm": 0.6131421327590942, "learning_rate": 3.676304520285946e-05, "loss": 0.0872, "step": 65860 }, { "epoch": 2.3937059379315357, "grad_norm": 0.7355049848556519, "learning_rate": 3.675846595522082e-05, "loss": 0.1181, "step": 65870 }, { "epoch": 2.3940693364343337, "grad_norm": 0.7013423442840576, "learning_rate": 3.675388620095174e-05, "loss": 0.0889, "step": 65880 }, { "epoch": 2.394432734937132, "grad_norm": 0.5543515086174011, "learning_rate": 3.674930594024951e-05, "loss": 0.1213, "step": 65890 }, { "epoch": 2.39479613343993, "grad_norm": 0.709343671798706, "learning_rate": 3.674472517331149e-05, "loss": 0.0912, "step": 65900 }, { "epoch": 2.3951595319427286, "grad_norm": 0.6905022859573364, "learning_rate": 3.674014390033506e-05, "loss": 0.6845, "step": 65910 }, { "epoch": 2.3955229304455266, "grad_norm": 0.6566099524497986, "learning_rate": 3.6735562121517593e-05, "loss": 0.1347, "step": 65920 }, { "epoch": 2.3958863289483245, "grad_norm": 1.4284336566925049, "learning_rate": 3.673097983705651e-05, "loss": 0.1064, "step": 65930 }, { "epoch": 2.396249727451123, "grad_norm": 0.9759535193443298, "learning_rate": 3.672639704714925e-05, "loss": 0.149, "step": 65940 }, { "epoch": 2.396613125953921, "grad_norm": 1.247986078262329, "learning_rate": 3.6721813751993255e-05, "loss": 0.1248, "step": 65950 }, { "epoch": 2.3969765244567194, "grad_norm": 0.7816616296768188, "learning_rate": 3.671722995178603e-05, "loss": 0.0935, "step": 65960 }, { "epoch": 2.3973399229595174, "grad_norm": 2.142498016357422, "learning_rate": 3.671264564672503e-05, "loss": 0.1141, "step": 65970 }, { "epoch": 2.3977033214623154, "grad_norm": 0.7998883724212646, "learning_rate": 3.670806083700782e-05, "loss": 0.0958, "step": 65980 }, { "epoch": 2.398066719965114, "grad_norm": 1.2408504486083984, "learning_rate": 3.6703475522831924e-05, "loss": 0.1669, "step": 65990 }, { "epoch": 2.3984301184679118, "grad_norm": 0.7468869686126709, "learning_rate": 3.669888970439491e-05, "loss": 0.0939, "step": 66000 }, { "epoch": 2.3984301184679118, "eval_loss": 0.3319300711154938, "eval_runtime": 179.03, "eval_samples_per_second": 41.412, "eval_steps_per_second": 5.178, "eval_wer": 0.1522319240474159, "step": 66000 }, { "epoch": 2.39879351697071, "grad_norm": 0.3617503046989441, "learning_rate": 3.669430338189436e-05, "loss": 0.0804, "step": 66010 }, { "epoch": 2.399156915473508, "grad_norm": 1.2790522575378418, "learning_rate": 3.668971655552788e-05, "loss": 0.1073, "step": 66020 }, { "epoch": 2.3995203139763066, "grad_norm": 0.5524618029594421, "learning_rate": 3.668512922549312e-05, "loss": 0.1024, "step": 66030 }, { "epoch": 2.3998837124791046, "grad_norm": 0.6617368459701538, "learning_rate": 3.6680541391987706e-05, "loss": 0.1241, "step": 66040 }, { "epoch": 2.4002471109819026, "grad_norm": 1.515463948249817, "learning_rate": 3.667595305520933e-05, "loss": 0.0991, "step": 66050 }, { "epoch": 2.400610509484701, "grad_norm": 1.0713670253753662, "learning_rate": 3.667136421535567e-05, "loss": 0.0983, "step": 66060 }, { "epoch": 2.400973907987499, "grad_norm": 0.4194028973579407, "learning_rate": 3.666677487262446e-05, "loss": 0.1354, "step": 66070 }, { "epoch": 2.4013373064902974, "grad_norm": 1.1584357023239136, "learning_rate": 3.6662185027213436e-05, "loss": 0.1073, "step": 66080 }, { "epoch": 2.4017007049930954, "grad_norm": 0.9621077179908752, "learning_rate": 3.6657594679320346e-05, "loss": 0.1, "step": 66090 }, { "epoch": 2.402064103495894, "grad_norm": 0.8532549738883972, "learning_rate": 3.665300382914298e-05, "loss": 0.0972, "step": 66100 }, { "epoch": 2.402427501998692, "grad_norm": 0.44833171367645264, "learning_rate": 3.664841247687914e-05, "loss": 0.094, "step": 66110 }, { "epoch": 2.40279090050149, "grad_norm": 0.8976952433586121, "learning_rate": 3.6643820622726654e-05, "loss": 0.1537, "step": 66120 }, { "epoch": 2.4031542990042882, "grad_norm": 1.280044674873352, "learning_rate": 3.663922826688336e-05, "loss": 0.1071, "step": 66130 }, { "epoch": 2.403517697507086, "grad_norm": 1.037636160850525, "learning_rate": 3.6634635409547144e-05, "loss": 0.1271, "step": 66140 }, { "epoch": 2.4038810960098846, "grad_norm": 0.6089548468589783, "learning_rate": 3.663004205091588e-05, "loss": 0.106, "step": 66150 }, { "epoch": 2.4042444945126826, "grad_norm": 0.5719799995422363, "learning_rate": 3.662544819118748e-05, "loss": 0.1048, "step": 66160 }, { "epoch": 2.4046078930154806, "grad_norm": 1.0621087551116943, "learning_rate": 3.662131328915747e-05, "loss": 0.1121, "step": 66170 }, { "epoch": 2.404971291518279, "grad_norm": 0.9576284289360046, "learning_rate": 3.6616718477889837e-05, "loss": 0.1075, "step": 66180 }, { "epoch": 2.405334690021077, "grad_norm": 0.6212823987007141, "learning_rate": 3.661212316609915e-05, "loss": 0.1262, "step": 66190 }, { "epoch": 2.4056980885238755, "grad_norm": 0.9172229170799255, "learning_rate": 3.660752735398338e-05, "loss": 0.0759, "step": 66200 }, { "epoch": 2.4060614870266734, "grad_norm": 0.7851585745811462, "learning_rate": 3.660293104174057e-05, "loss": 0.0873, "step": 66210 }, { "epoch": 2.4064248855294714, "grad_norm": 0.6783828735351562, "learning_rate": 3.659833422956873e-05, "loss": 0.1069, "step": 66220 }, { "epoch": 2.40678828403227, "grad_norm": 3.5662567615509033, "learning_rate": 3.659373691766594e-05, "loss": 0.0983, "step": 66230 }, { "epoch": 2.407151682535068, "grad_norm": 0.9401397705078125, "learning_rate": 3.658913910623028e-05, "loss": 0.1096, "step": 66240 }, { "epoch": 2.4075150810378663, "grad_norm": 0.5327457189559937, "learning_rate": 3.658454079545985e-05, "loss": 0.0867, "step": 66250 }, { "epoch": 2.4078784795406643, "grad_norm": 0.5370202660560608, "learning_rate": 3.657994198555278e-05, "loss": 0.0901, "step": 66260 }, { "epoch": 2.4082418780434622, "grad_norm": 0.2850395143032074, "learning_rate": 3.65753426767072e-05, "loss": 0.1159, "step": 66270 }, { "epoch": 2.4086052765462607, "grad_norm": 3.4857585430145264, "learning_rate": 3.65707428691213e-05, "loss": 0.117, "step": 66280 }, { "epoch": 2.4089686750490586, "grad_norm": 0.8752036690711975, "learning_rate": 3.656614256299325e-05, "loss": 0.1518, "step": 66290 }, { "epoch": 2.409332073551857, "grad_norm": 0.7939157485961914, "learning_rate": 3.656154175852128e-05, "loss": 0.0889, "step": 66300 }, { "epoch": 2.409695472054655, "grad_norm": 0.5964920520782471, "learning_rate": 3.6556940455903603e-05, "loss": 0.0838, "step": 66310 }, { "epoch": 2.4100588705574535, "grad_norm": 0.3993948996067047, "learning_rate": 3.655233865533848e-05, "loss": 0.1318, "step": 66320 }, { "epoch": 2.4104222690602515, "grad_norm": 0.5623260736465454, "learning_rate": 3.65477363570242e-05, "loss": 0.1193, "step": 66330 }, { "epoch": 2.4107856675630495, "grad_norm": 0.5477907061576843, "learning_rate": 3.654313356115903e-05, "loss": 0.1197, "step": 66340 }, { "epoch": 2.411149066065848, "grad_norm": 0.8918854594230652, "learning_rate": 3.653853026794132e-05, "loss": 0.1546, "step": 66350 }, { "epoch": 2.411512464568646, "grad_norm": 0.5984349250793457, "learning_rate": 3.6533926477569384e-05, "loss": 0.0822, "step": 66360 }, { "epoch": 2.4118758630714443, "grad_norm": 0.3398670554161072, "learning_rate": 3.65293221902416e-05, "loss": 0.1134, "step": 66370 }, { "epoch": 2.4122392615742423, "grad_norm": 1.7309616804122925, "learning_rate": 3.652471740615634e-05, "loss": 0.0953, "step": 66380 }, { "epoch": 2.4126026600770407, "grad_norm": 0.5632598996162415, "learning_rate": 3.6520112125512016e-05, "loss": 0.1023, "step": 66390 }, { "epoch": 2.4129660585798387, "grad_norm": 1.3867424726486206, "learning_rate": 3.6515506348507054e-05, "loss": 0.0856, "step": 66400 }, { "epoch": 2.4133294570826367, "grad_norm": 1.5078961849212646, "learning_rate": 3.651090007533989e-05, "loss": 0.0795, "step": 66410 }, { "epoch": 2.413692855585435, "grad_norm": 0.596082866191864, "learning_rate": 3.650629330620899e-05, "loss": 0.1231, "step": 66420 }, { "epoch": 2.414056254088233, "grad_norm": 0.9665220379829407, "learning_rate": 3.6501686041312865e-05, "loss": 0.1076, "step": 66430 }, { "epoch": 2.4144196525910315, "grad_norm": 0.4780147075653076, "learning_rate": 3.649753907919114e-05, "loss": 3.842, "step": 66440 }, { "epoch": 2.4147830510938295, "grad_norm": 0.7537965774536133, "learning_rate": 3.6492930872887963e-05, "loss": 0.0878, "step": 66450 }, { "epoch": 2.4151464495966275, "grad_norm": 2.1421070098876953, "learning_rate": 3.6488322171395295e-05, "loss": 0.0812, "step": 66460 }, { "epoch": 2.415509848099426, "grad_norm": 0.36758169531822205, "learning_rate": 3.648371297491169e-05, "loss": 0.1109, "step": 66470 }, { "epoch": 2.415873246602224, "grad_norm": 1.2690719366073608, "learning_rate": 3.647910328363577e-05, "loss": 0.1199, "step": 66480 }, { "epoch": 2.4162366451050223, "grad_norm": 1.2424167394638062, "learning_rate": 3.647449309776612e-05, "loss": 0.1087, "step": 66490 }, { "epoch": 2.4166000436078203, "grad_norm": 0.6070811748504639, "learning_rate": 3.6469882417501386e-05, "loss": 0.1042, "step": 66500 }, { "epoch": 2.4169634421106183, "grad_norm": 0.3652547597885132, "learning_rate": 3.646527124304024e-05, "loss": 0.13, "step": 66510 }, { "epoch": 2.4173268406134167, "grad_norm": 0.8389589190483093, "learning_rate": 3.646065957458134e-05, "loss": 0.1059, "step": 66520 }, { "epoch": 2.4176902391162147, "grad_norm": 4.236841678619385, "learning_rate": 3.64560474123234e-05, "loss": 0.1248, "step": 66530 }, { "epoch": 2.418053637619013, "grad_norm": 0.4040025770664215, "learning_rate": 3.645143475646514e-05, "loss": 0.1224, "step": 66540 }, { "epoch": 2.418417036121811, "grad_norm": 1.0393097400665283, "learning_rate": 3.6446821607205294e-05, "loss": 0.0945, "step": 66550 }, { "epoch": 2.418780434624609, "grad_norm": 1.010204792022705, "learning_rate": 3.644220796474264e-05, "loss": 0.0797, "step": 66560 }, { "epoch": 2.4191438331274076, "grad_norm": 0.8821393847465515, "learning_rate": 3.643759382927595e-05, "loss": 0.1042, "step": 66570 }, { "epoch": 2.4195072316302055, "grad_norm": 0.35728177428245544, "learning_rate": 3.643297920100404e-05, "loss": 0.0965, "step": 66580 }, { "epoch": 2.419870630133004, "grad_norm": 1.833901286125183, "learning_rate": 3.642836408012573e-05, "loss": 0.176, "step": 66590 }, { "epoch": 2.420234028635802, "grad_norm": 1.3145054578781128, "learning_rate": 3.6423748466839884e-05, "loss": 0.0881, "step": 66600 }, { "epoch": 2.420234028635802, "eval_loss": 0.3247428834438324, "eval_runtime": 179.5083, "eval_samples_per_second": 41.302, "eval_steps_per_second": 5.164, "eval_wer": 0.15563563091110424, "step": 66600 }, { "epoch": 2.4205974271386004, "grad_norm": 0.4729728102684021, "learning_rate": 3.6419132361345366e-05, "loss": 0.1078, "step": 66610 }, { "epoch": 2.4209608256413984, "grad_norm": 0.780598521232605, "learning_rate": 3.6414515763841054e-05, "loss": 0.104, "step": 66620 }, { "epoch": 2.4213242241441963, "grad_norm": 0.8436282873153687, "learning_rate": 3.6409898674525865e-05, "loss": 0.1148, "step": 66630 }, { "epoch": 2.4216876226469948, "grad_norm": 1.6270266771316528, "learning_rate": 3.640528109359875e-05, "loss": 1.2437, "step": 66640 }, { "epoch": 2.4220510211497928, "grad_norm": 0.6060745716094971, "learning_rate": 3.640066302125865e-05, "loss": 0.0958, "step": 66650 }, { "epoch": 2.422414419652591, "grad_norm": 1.076560139656067, "learning_rate": 3.6396044457704535e-05, "loss": 0.105, "step": 66660 }, { "epoch": 2.422777818155389, "grad_norm": 0.4505023956298828, "learning_rate": 3.6391425403135425e-05, "loss": 0.1123, "step": 66670 }, { "epoch": 2.4231412166581876, "grad_norm": 0.5208647847175598, "learning_rate": 3.6386805857750315e-05, "loss": 0.0909, "step": 66680 }, { "epoch": 2.4235046151609856, "grad_norm": 0.7721276879310608, "learning_rate": 3.638218582174826e-05, "loss": 0.113, "step": 66690 }, { "epoch": 2.4238680136637836, "grad_norm": 1.676924467086792, "learning_rate": 3.6377565295328316e-05, "loss": 0.1015, "step": 66700 }, { "epoch": 2.424231412166582, "grad_norm": 0.7819331288337708, "learning_rate": 3.6372944278689566e-05, "loss": 0.1092, "step": 66710 }, { "epoch": 2.42459481066938, "grad_norm": 0.6924077272415161, "learning_rate": 3.636832277203111e-05, "loss": 0.1739, "step": 66720 }, { "epoch": 2.4249582091721784, "grad_norm": 0.48950478434562683, "learning_rate": 3.636370077555208e-05, "loss": 0.1288, "step": 66730 }, { "epoch": 2.4253216076749764, "grad_norm": 1.9735438823699951, "learning_rate": 3.6359078289451604e-05, "loss": 0.1444, "step": 66740 }, { "epoch": 2.4256850061777744, "grad_norm": 2.684687852859497, "learning_rate": 3.635445531392887e-05, "loss": 0.0867, "step": 66750 }, { "epoch": 2.426048404680573, "grad_norm": 0.4811551570892334, "learning_rate": 3.634983184918305e-05, "loss": 0.1158, "step": 66760 }, { "epoch": 2.426411803183371, "grad_norm": 1.0460630655288696, "learning_rate": 3.6345207895413367e-05, "loss": 0.1113, "step": 66770 }, { "epoch": 2.4267752016861692, "grad_norm": 0.47594699263572693, "learning_rate": 3.634058345281903e-05, "loss": 0.119, "step": 66780 }, { "epoch": 2.427138600188967, "grad_norm": 1.4716179370880127, "learning_rate": 3.633595852159931e-05, "loss": 0.1607, "step": 66790 }, { "epoch": 2.427501998691765, "grad_norm": 2.8937737941741943, "learning_rate": 3.6331333101953465e-05, "loss": 0.0732, "step": 66800 }, { "epoch": 2.4278653971945636, "grad_norm": 0.4008066654205322, "learning_rate": 3.63267071940808e-05, "loss": 0.108, "step": 66810 }, { "epoch": 2.4282287956973616, "grad_norm": 0.6345723271369934, "learning_rate": 3.632208079818062e-05, "loss": 0.1021, "step": 66820 }, { "epoch": 2.42859219420016, "grad_norm": 4.007993221282959, "learning_rate": 3.631745391445226e-05, "loss": 0.1041, "step": 66830 }, { "epoch": 2.428955592702958, "grad_norm": 1.5959880352020264, "learning_rate": 3.631282654309508e-05, "loss": 0.1387, "step": 66840 }, { "epoch": 2.429318991205756, "grad_norm": 2.597745180130005, "learning_rate": 3.6308198684308465e-05, "loss": 0.1163, "step": 66850 }, { "epoch": 2.4296823897085544, "grad_norm": 0.8064637184143066, "learning_rate": 3.630357033829179e-05, "loss": 0.1064, "step": 66860 }, { "epoch": 2.4300457882113524, "grad_norm": 0.9430283308029175, "learning_rate": 3.629894150524449e-05, "loss": 0.1267, "step": 66870 }, { "epoch": 2.430409186714151, "grad_norm": 0.7025822997093201, "learning_rate": 3.629431218536601e-05, "loss": 0.0967, "step": 66880 }, { "epoch": 2.430772585216949, "grad_norm": 1.0002391338348389, "learning_rate": 3.628968237885579e-05, "loss": 0.1241, "step": 66890 }, { "epoch": 2.4311359837197473, "grad_norm": 1.6046959161758423, "learning_rate": 3.628505208591334e-05, "loss": 0.0894, "step": 66900 }, { "epoch": 2.4314993822225452, "grad_norm": 0.773638129234314, "learning_rate": 3.628042130673814e-05, "loss": 0.0885, "step": 66910 }, { "epoch": 2.4318627807253432, "grad_norm": 0.7153804898262024, "learning_rate": 3.627579004152972e-05, "loss": 0.127, "step": 66920 }, { "epoch": 2.4322261792281417, "grad_norm": 0.8669637441635132, "learning_rate": 3.627115829048763e-05, "loss": 0.0891, "step": 66930 }, { "epoch": 2.4325895777309396, "grad_norm": 2.438815116882324, "learning_rate": 3.6266526053811434e-05, "loss": 3.7705, "step": 66940 }, { "epoch": 2.432952976233738, "grad_norm": 0.46661120653152466, "learning_rate": 3.626189333170071e-05, "loss": 0.0928, "step": 66950 }, { "epoch": 2.433316374736536, "grad_norm": 1.2738080024719238, "learning_rate": 3.625726012435508e-05, "loss": 0.0838, "step": 66960 }, { "epoch": 2.4336797732393345, "grad_norm": 0.8235649466514587, "learning_rate": 3.6252626431974155e-05, "loss": 0.1173, "step": 66970 }, { "epoch": 2.4340431717421325, "grad_norm": 0.8627928495407104, "learning_rate": 3.62479922547576e-05, "loss": 0.0915, "step": 66980 }, { "epoch": 2.4344065702449305, "grad_norm": 0.746405839920044, "learning_rate": 3.624335759290509e-05, "loss": 0.2557, "step": 66990 }, { "epoch": 2.434769968747729, "grad_norm": 1.1601886749267578, "learning_rate": 3.6238722446616285e-05, "loss": 0.0987, "step": 67000 }, { "epoch": 2.435133367250527, "grad_norm": 1.9349639415740967, "learning_rate": 3.623408681609093e-05, "loss": 0.0899, "step": 67010 }, { "epoch": 2.4354967657533253, "grad_norm": 0.6410073637962341, "learning_rate": 3.622945070152874e-05, "loss": 0.164, "step": 67020 }, { "epoch": 2.4358601642561233, "grad_norm": 0.46642959117889404, "learning_rate": 3.622481410312948e-05, "loss": 0.1156, "step": 67030 }, { "epoch": 2.4362235627589213, "grad_norm": 1.0162826776504517, "learning_rate": 3.6220177021092916e-05, "loss": 0.1193, "step": 67040 }, { "epoch": 2.4365869612617197, "grad_norm": 1.3133575916290283, "learning_rate": 3.621553945561884e-05, "loss": 0.0849, "step": 67050 }, { "epoch": 2.4369503597645177, "grad_norm": 0.6921333074569702, "learning_rate": 3.621090140690708e-05, "loss": 0.0855, "step": 67060 }, { "epoch": 2.437313758267316, "grad_norm": 0.8446233868598938, "learning_rate": 3.620626287515746e-05, "loss": 0.9796, "step": 67070 }, { "epoch": 2.437677156770114, "grad_norm": 1.3895478248596191, "learning_rate": 3.620162386056985e-05, "loss": 0.1147, "step": 67080 }, { "epoch": 2.438040555272912, "grad_norm": 0.5276104807853699, "learning_rate": 3.619698436334412e-05, "loss": 0.0987, "step": 67090 }, { "epoch": 2.4384039537757105, "grad_norm": 1.7694755792617798, "learning_rate": 3.619234438368018e-05, "loss": 0.1291, "step": 67100 }, { "epoch": 2.4387673522785085, "grad_norm": 0.5948963761329651, "learning_rate": 3.618770392177794e-05, "loss": 0.0976, "step": 67110 }, { "epoch": 2.439130750781307, "grad_norm": 0.2391016185283661, "learning_rate": 3.618306297783734e-05, "loss": 0.0982, "step": 67120 }, { "epoch": 2.439494149284105, "grad_norm": 0.9383694529533386, "learning_rate": 3.617842155205835e-05, "loss": 0.0995, "step": 67130 }, { "epoch": 2.439857547786903, "grad_norm": 0.9149391055107117, "learning_rate": 3.617377964464094e-05, "loss": 0.1012, "step": 67140 }, { "epoch": 2.4402209462897013, "grad_norm": 0.5762970447540283, "learning_rate": 3.616913725578513e-05, "loss": 0.0943, "step": 67150 }, { "epoch": 2.4405843447924993, "grad_norm": 0.7008225321769714, "learning_rate": 3.6164494385690936e-05, "loss": 0.0838, "step": 67160 }, { "epoch": 2.4409477432952977, "grad_norm": 1.0070174932479858, "learning_rate": 3.61598510345584e-05, "loss": 0.1395, "step": 67170 }, { "epoch": 2.4413111417980957, "grad_norm": 0.7962942123413086, "learning_rate": 3.6155207202587596e-05, "loss": 0.1115, "step": 67180 }, { "epoch": 2.441674540300894, "grad_norm": 0.4024165868759155, "learning_rate": 3.615056288997859e-05, "loss": 0.0848, "step": 67190 }, { "epoch": 2.442037938803692, "grad_norm": 0.39084872603416443, "learning_rate": 3.6145918096931515e-05, "loss": 0.0967, "step": 67200 }, { "epoch": 2.442037938803692, "eval_loss": 0.33589035272598267, "eval_runtime": 179.6199, "eval_samples_per_second": 41.276, "eval_steps_per_second": 5.161, "eval_wer": 0.15488227712527455, "step": 67200 }, { "epoch": 2.44240133730649, "grad_norm": 0.6485455632209778, "learning_rate": 3.614127282364648e-05, "loss": 1.4456, "step": 67210 }, { "epoch": 2.4427647358092885, "grad_norm": 0.4933464229106903, "learning_rate": 3.613662707032364e-05, "loss": 0.1259, "step": 67220 }, { "epoch": 2.4431281343120865, "grad_norm": 2.199694871902466, "learning_rate": 3.613198083716317e-05, "loss": 0.1013, "step": 67230 }, { "epoch": 2.443491532814885, "grad_norm": 1.2690855264663696, "learning_rate": 3.612733412436524e-05, "loss": 0.1256, "step": 67240 }, { "epoch": 2.443854931317683, "grad_norm": 1.8013975620269775, "learning_rate": 3.612268693213009e-05, "loss": 0.4379, "step": 67250 }, { "epoch": 2.4442183298204814, "grad_norm": 4.287527561187744, "learning_rate": 3.611803926065792e-05, "loss": 0.0803, "step": 67260 }, { "epoch": 2.4445817283232794, "grad_norm": 0.6265177726745605, "learning_rate": 3.6113391110149006e-05, "loss": 0.1046, "step": 67270 }, { "epoch": 2.4449451268260773, "grad_norm": 1.527327537536621, "learning_rate": 3.6108742480803606e-05, "loss": 0.1095, "step": 67280 }, { "epoch": 2.4453085253288758, "grad_norm": 1.2177270650863647, "learning_rate": 3.6104093372822026e-05, "loss": 0.0972, "step": 67290 }, { "epoch": 2.4456719238316738, "grad_norm": 0.7354857921600342, "learning_rate": 3.609944378640457e-05, "loss": 0.0893, "step": 67300 }, { "epoch": 2.446035322334472, "grad_norm": 0.8578464984893799, "learning_rate": 3.609479372175156e-05, "loss": 0.0914, "step": 67310 }, { "epoch": 2.44639872083727, "grad_norm": 0.5541604161262512, "learning_rate": 3.6090143179063374e-05, "loss": 0.113, "step": 67320 }, { "epoch": 2.446762119340068, "grad_norm": 0.7503251433372498, "learning_rate": 3.608549215854037e-05, "loss": 0.116, "step": 67330 }, { "epoch": 2.4471255178428666, "grad_norm": 0.7713415026664734, "learning_rate": 3.608084066038297e-05, "loss": 0.1122, "step": 67340 }, { "epoch": 2.4474889163456646, "grad_norm": 2.4603497982025146, "learning_rate": 3.607618868479156e-05, "loss": 0.0932, "step": 67350 }, { "epoch": 2.447852314848463, "grad_norm": 0.4980012774467468, "learning_rate": 3.607153623196658e-05, "loss": 0.0905, "step": 67360 }, { "epoch": 2.448215713351261, "grad_norm": 0.5134033560752869, "learning_rate": 3.606688330210851e-05, "loss": 0.1666, "step": 67370 }, { "epoch": 2.448579111854059, "grad_norm": 0.5784050822257996, "learning_rate": 3.60622298954178e-05, "loss": 0.1092, "step": 67380 }, { "epoch": 2.4489425103568574, "grad_norm": 0.4290425777435303, "learning_rate": 3.605757601209497e-05, "loss": 0.1189, "step": 67390 }, { "epoch": 2.4493059088596554, "grad_norm": 1.0926011800765991, "learning_rate": 3.605292165234053e-05, "loss": 0.0879, "step": 67400 }, { "epoch": 2.449669307362454, "grad_norm": 1.1270503997802734, "learning_rate": 3.604826681635504e-05, "loss": 0.0893, "step": 67410 }, { "epoch": 2.450032705865252, "grad_norm": 0.6691473126411438, "learning_rate": 3.604361150433903e-05, "loss": 0.7363, "step": 67420 }, { "epoch": 2.4503961043680498, "grad_norm": 1.2996752262115479, "learning_rate": 3.603895571649308e-05, "loss": 0.0946, "step": 67430 }, { "epoch": 2.450759502870848, "grad_norm": 1.3618733882904053, "learning_rate": 3.603429945301783e-05, "loss": 0.1242, "step": 67440 }, { "epoch": 2.451122901373646, "grad_norm": 0.7978112101554871, "learning_rate": 3.6029642714113853e-05, "loss": 0.0783, "step": 67450 }, { "epoch": 2.4514862998764446, "grad_norm": 1.727400302886963, "learning_rate": 3.602498549998183e-05, "loss": 0.106, "step": 67460 }, { "epoch": 2.4518496983792426, "grad_norm": 0.9686618447303772, "learning_rate": 3.602032781082241e-05, "loss": 0.1259, "step": 67470 }, { "epoch": 2.452213096882041, "grad_norm": 0.4624063968658447, "learning_rate": 3.601566964683627e-05, "loss": 0.1066, "step": 67480 }, { "epoch": 2.452576495384839, "grad_norm": 0.38952404260635376, "learning_rate": 3.601101100822412e-05, "loss": 0.0993, "step": 67490 }, { "epoch": 2.452939893887637, "grad_norm": 1.37151300907135, "learning_rate": 3.600635189518668e-05, "loss": 0.0988, "step": 67500 }, { "epoch": 2.4533032923904354, "grad_norm": 0.4988241195678711, "learning_rate": 3.60016923079247e-05, "loss": 0.0942, "step": 67510 }, { "epoch": 2.4536666908932334, "grad_norm": 0.8300676941871643, "learning_rate": 3.599703224663894e-05, "loss": 0.1087, "step": 67520 }, { "epoch": 2.454030089396032, "grad_norm": 9.264083862304688, "learning_rate": 3.599237171153019e-05, "loss": 0.1155, "step": 67530 }, { "epoch": 2.45439348789883, "grad_norm": 0.9220635294914246, "learning_rate": 3.598771070279926e-05, "loss": 0.1134, "step": 67540 }, { "epoch": 2.4547568864016283, "grad_norm": 0.6584560871124268, "learning_rate": 3.598304922064696e-05, "loss": 0.0906, "step": 67550 }, { "epoch": 2.4551202849044262, "grad_norm": 2.7506167888641357, "learning_rate": 3.5978387265274157e-05, "loss": 0.1129, "step": 67560 }, { "epoch": 2.4554836834072242, "grad_norm": 1.5210083723068237, "learning_rate": 3.5973724836881694e-05, "loss": 0.1005, "step": 67570 }, { "epoch": 2.4558470819100227, "grad_norm": 0.7032837271690369, "learning_rate": 3.596906193567049e-05, "loss": 0.0681, "step": 67580 }, { "epoch": 2.4562104804128206, "grad_norm": 1.5217934846878052, "learning_rate": 3.596439856184142e-05, "loss": 0.1203, "step": 67590 }, { "epoch": 2.456573878915619, "grad_norm": 0.5665151476860046, "learning_rate": 3.595973471559544e-05, "loss": 0.0865, "step": 67600 }, { "epoch": 2.456937277418417, "grad_norm": 1.023913025856018, "learning_rate": 3.595507039713348e-05, "loss": 0.0941, "step": 67610 }, { "epoch": 2.457300675921215, "grad_norm": 0.6718622446060181, "learning_rate": 3.595040560665651e-05, "loss": 0.1392, "step": 67620 }, { "epoch": 2.4576640744240135, "grad_norm": 0.5096120238304138, "learning_rate": 3.594574034436553e-05, "loss": 0.1164, "step": 67630 }, { "epoch": 2.4580274729268115, "grad_norm": 0.776214063167572, "learning_rate": 3.594107461046154e-05, "loss": 0.1106, "step": 67640 }, { "epoch": 2.45839087142961, "grad_norm": 1.91248619556427, "learning_rate": 3.5936408405145575e-05, "loss": 4.1324, "step": 67650 }, { "epoch": 2.458754269932408, "grad_norm": 1.217971920967102, "learning_rate": 3.593174172861868e-05, "loss": 0.0972, "step": 67660 }, { "epoch": 2.459117668435206, "grad_norm": 6.793942451477051, "learning_rate": 3.5927074581081935e-05, "loss": 0.7676, "step": 67670 }, { "epoch": 2.4594810669380043, "grad_norm": 0.5515997409820557, "learning_rate": 3.592240696273643e-05, "loss": 0.0907, "step": 67680 }, { "epoch": 2.4598444654408023, "grad_norm": 0.4186965227127075, "learning_rate": 3.591773887378326e-05, "loss": 0.0876, "step": 67690 }, { "epoch": 2.4602078639436007, "grad_norm": 0.4198078215122223, "learning_rate": 3.5913070314423575e-05, "loss": 0.0872, "step": 67700 }, { "epoch": 2.4605712624463987, "grad_norm": 0.7509788870811462, "learning_rate": 3.5908401284858514e-05, "loss": 0.0912, "step": 67710 }, { "epoch": 2.4609346609491967, "grad_norm": 0.8919647336006165, "learning_rate": 3.590373178528926e-05, "loss": 0.1003, "step": 67720 }, { "epoch": 2.461298059451995, "grad_norm": 1.2128369808197021, "learning_rate": 3.5899061815917e-05, "loss": 0.1129, "step": 67730 }, { "epoch": 2.461661457954793, "grad_norm": 0.5779681205749512, "learning_rate": 3.589439137694293e-05, "loss": 0.1169, "step": 67740 }, { "epoch": 2.4620248564575915, "grad_norm": 0.6092358827590942, "learning_rate": 3.588972046856831e-05, "loss": 0.0884, "step": 67750 }, { "epoch": 2.4623882549603895, "grad_norm": 1.222869873046875, "learning_rate": 3.588504909099438e-05, "loss": 0.0993, "step": 67760 }, { "epoch": 2.462751653463188, "grad_norm": 0.26627829670906067, "learning_rate": 3.5880377244422416e-05, "loss": 0.1261, "step": 67770 }, { "epoch": 2.463115051965986, "grad_norm": 1.2034231424331665, "learning_rate": 3.58757049290537e-05, "loss": 0.0899, "step": 67780 }, { "epoch": 2.463478450468784, "grad_norm": 0.3671499192714691, "learning_rate": 3.5871032145089565e-05, "loss": 0.1387, "step": 67790 }, { "epoch": 2.4638418489715823, "grad_norm": 0.5502142310142517, "learning_rate": 3.586635889273133e-05, "loss": 0.1053, "step": 67800 }, { "epoch": 2.4638418489715823, "eval_loss": 0.32282206416130066, "eval_runtime": 179.8955, "eval_samples_per_second": 41.213, "eval_steps_per_second": 5.153, "eval_wer": 0.1538112440321673, "step": 67800 }, { "epoch": 2.4642052474743803, "grad_norm": 1.090920329093933, "learning_rate": 3.5861685172180346e-05, "loss": 0.1039, "step": 67810 }, { "epoch": 2.4645686459771787, "grad_norm": 0.406110018491745, "learning_rate": 3.5857010983638e-05, "loss": 0.1042, "step": 67820 }, { "epoch": 2.4649320444799767, "grad_norm": 1.2592461109161377, "learning_rate": 3.585233632730568e-05, "loss": 0.0835, "step": 67830 }, { "epoch": 2.465295442982775, "grad_norm": 0.5883360505104065, "learning_rate": 3.58476612033848e-05, "loss": 0.1341, "step": 67840 }, { "epoch": 2.465658841485573, "grad_norm": 1.322466492652893, "learning_rate": 3.58429856120768e-05, "loss": 0.0797, "step": 67850 }, { "epoch": 2.466022239988371, "grad_norm": 0.4922407567501068, "learning_rate": 3.583830955358312e-05, "loss": 0.0859, "step": 67860 }, { "epoch": 2.4663856384911695, "grad_norm": 0.7841882705688477, "learning_rate": 3.583363302810525e-05, "loss": 0.1096, "step": 67870 }, { "epoch": 2.4667490369939675, "grad_norm": 0.7191815376281738, "learning_rate": 3.582895603584467e-05, "loss": 0.0956, "step": 67880 }, { "epoch": 2.467112435496766, "grad_norm": 0.43222716450691223, "learning_rate": 3.5824278577002925e-05, "loss": 0.139, "step": 67890 }, { "epoch": 2.467475833999564, "grad_norm": 1.4954817295074463, "learning_rate": 3.581960065178151e-05, "loss": 0.0903, "step": 67900 }, { "epoch": 2.467839232502362, "grad_norm": 0.6472924947738647, "learning_rate": 3.5814922260382e-05, "loss": 0.0989, "step": 67910 }, { "epoch": 2.4682026310051604, "grad_norm": 1.0343185663223267, "learning_rate": 3.581024340300598e-05, "loss": 0.0951, "step": 67920 }, { "epoch": 2.4685660295079583, "grad_norm": 0.6948789358139038, "learning_rate": 3.580556407985503e-05, "loss": 0.3052, "step": 67930 }, { "epoch": 2.4689294280107568, "grad_norm": 0.5896201729774475, "learning_rate": 3.580088429113077e-05, "loss": 0.0787, "step": 67940 }, { "epoch": 2.4692928265135548, "grad_norm": 0.7022304534912109, "learning_rate": 3.5796204037034834e-05, "loss": 0.086, "step": 67950 }, { "epoch": 2.4696562250163527, "grad_norm": 0.6120296120643616, "learning_rate": 3.579152331776888e-05, "loss": 0.101, "step": 67960 }, { "epoch": 2.470019623519151, "grad_norm": 0.7050819993019104, "learning_rate": 3.5786842133534584e-05, "loss": 0.1042, "step": 67970 }, { "epoch": 2.470383022021949, "grad_norm": 0.728625476360321, "learning_rate": 3.578216048453364e-05, "loss": 0.1194, "step": 67980 }, { "epoch": 2.4707464205247476, "grad_norm": 5.270279884338379, "learning_rate": 3.577747837096776e-05, "loss": 0.1007, "step": 67990 }, { "epoch": 2.4711098190275456, "grad_norm": 1.098525047302246, "learning_rate": 3.577279579303868e-05, "loss": 0.1017, "step": 68000 }, { "epoch": 2.4714732175303435, "grad_norm": 2.74465012550354, "learning_rate": 3.576811275094817e-05, "loss": 0.3871, "step": 68010 }, { "epoch": 2.471836616033142, "grad_norm": 0.6227459907531738, "learning_rate": 3.576342924489799e-05, "loss": 0.1103, "step": 68020 }, { "epoch": 2.47220001453594, "grad_norm": 2.293656349182129, "learning_rate": 3.5758745275089945e-05, "loss": 0.0953, "step": 68030 }, { "epoch": 2.4725634130387384, "grad_norm": 1.2598451375961304, "learning_rate": 3.575406084172584e-05, "loss": 0.1743, "step": 68040 }, { "epoch": 2.4729268115415364, "grad_norm": 1.4611924886703491, "learning_rate": 3.574937594500751e-05, "loss": 0.0955, "step": 68050 }, { "epoch": 2.473290210044335, "grad_norm": 0.6100664138793945, "learning_rate": 3.5744690585136834e-05, "loss": 0.0935, "step": 68060 }, { "epoch": 2.473653608547133, "grad_norm": 1.22284996509552, "learning_rate": 3.574000476231566e-05, "loss": 0.1435, "step": 68070 }, { "epoch": 2.4740170070499308, "grad_norm": 0.8457713723182678, "learning_rate": 3.5735318476745887e-05, "loss": 0.0832, "step": 68080 }, { "epoch": 2.474380405552729, "grad_norm": 1.3872827291488647, "learning_rate": 3.573063172862944e-05, "loss": 0.1453, "step": 68090 }, { "epoch": 2.474743804055527, "grad_norm": 1.066683292388916, "learning_rate": 3.572594451816826e-05, "loss": 0.0809, "step": 68100 }, { "epoch": 2.4751072025583256, "grad_norm": 1.5101946592330933, "learning_rate": 3.5721256845564286e-05, "loss": 0.0854, "step": 68110 }, { "epoch": 2.4754706010611236, "grad_norm": 0.6682563424110413, "learning_rate": 3.571656871101951e-05, "loss": 0.1077, "step": 68120 }, { "epoch": 2.475833999563922, "grad_norm": 1.0795047283172607, "learning_rate": 3.5711880114735917e-05, "loss": 0.0855, "step": 68130 }, { "epoch": 2.47619739806672, "grad_norm": 4.4557671546936035, "learning_rate": 3.570719105691551e-05, "loss": 0.1676, "step": 68140 }, { "epoch": 2.476560796569518, "grad_norm": 0.7962543368339539, "learning_rate": 3.570250153776035e-05, "loss": 0.0869, "step": 68150 }, { "epoch": 2.4769241950723164, "grad_norm": 12.166545867919922, "learning_rate": 3.569781155747247e-05, "loss": 0.2161, "step": 68160 }, { "epoch": 2.4772875935751144, "grad_norm": 0.4934634864330292, "learning_rate": 3.569312111625396e-05, "loss": 0.1146, "step": 68170 }, { "epoch": 2.477650992077913, "grad_norm": 1.008591651916504, "learning_rate": 3.56884302143069e-05, "loss": 0.1029, "step": 68180 }, { "epoch": 2.478014390580711, "grad_norm": 1.2141749858856201, "learning_rate": 3.568373885183342e-05, "loss": 0.1215, "step": 68190 }, { "epoch": 2.478377789083509, "grad_norm": 1.004011631011963, "learning_rate": 3.567904702903564e-05, "loss": 0.0831, "step": 68200 }, { "epoch": 2.4787411875863072, "grad_norm": 31.751787185668945, "learning_rate": 3.567435474611572e-05, "loss": 0.4307, "step": 68210 }, { "epoch": 2.4791045860891052, "grad_norm": 0.7640292048454285, "learning_rate": 3.566966200327584e-05, "loss": 0.1086, "step": 68220 }, { "epoch": 2.4794679845919037, "grad_norm": 0.5559817552566528, "learning_rate": 3.566496880071817e-05, "loss": 0.1082, "step": 68230 }, { "epoch": 2.4798313830947016, "grad_norm": 2.7342145442962646, "learning_rate": 3.566027513864496e-05, "loss": 0.1049, "step": 68240 }, { "epoch": 2.4801947815974996, "grad_norm": 1.2804802656173706, "learning_rate": 3.565558101725841e-05, "loss": 0.0957, "step": 68250 }, { "epoch": 2.480558180100298, "grad_norm": 6.4595770835876465, "learning_rate": 3.565088643676079e-05, "loss": 0.0967, "step": 68260 }, { "epoch": 2.480921578603096, "grad_norm": 0.7362810373306274, "learning_rate": 3.564619139735437e-05, "loss": 0.1271, "step": 68270 }, { "epoch": 2.4812849771058945, "grad_norm": 2.1541872024536133, "learning_rate": 3.564149589924145e-05, "loss": 0.1168, "step": 68280 }, { "epoch": 2.4816483756086924, "grad_norm": 1.1019583940505981, "learning_rate": 3.563679994262433e-05, "loss": 0.1151, "step": 68290 }, { "epoch": 2.4820117741114904, "grad_norm": 0.7224584817886353, "learning_rate": 3.563210352770534e-05, "loss": 0.2149, "step": 68300 }, { "epoch": 2.482375172614289, "grad_norm": 0.6910248398780823, "learning_rate": 3.562740665468684e-05, "loss": 0.0971, "step": 68310 }, { "epoch": 2.482738571117087, "grad_norm": 1.294913411140442, "learning_rate": 3.56227093237712e-05, "loss": 0.1336, "step": 68320 }, { "epoch": 2.4831019696198853, "grad_norm": 0.5386795401573181, "learning_rate": 3.561801153516082e-05, "loss": 0.1147, "step": 68330 }, { "epoch": 2.4834653681226833, "grad_norm": 0.5479850769042969, "learning_rate": 3.561331328905809e-05, "loss": 0.0878, "step": 68340 }, { "epoch": 2.4838287666254817, "grad_norm": 0.24666792154312134, "learning_rate": 3.560861458566546e-05, "loss": 0.9362, "step": 68350 }, { "epoch": 2.4841921651282797, "grad_norm": 0.776744544506073, "learning_rate": 3.560391542518537e-05, "loss": 0.1084, "step": 68360 }, { "epoch": 2.4845555636310777, "grad_norm": 0.7053751945495605, "learning_rate": 3.55992158078203e-05, "loss": 0.1096, "step": 68370 }, { "epoch": 2.484918962133876, "grad_norm": 0.5632005929946899, "learning_rate": 3.559451573377272e-05, "loss": 0.1125, "step": 68380 }, { "epoch": 2.485282360636674, "grad_norm": 0.5601955652236938, "learning_rate": 3.558981520324516e-05, "loss": 0.1011, "step": 68390 }, { "epoch": 2.4856457591394725, "grad_norm": 0.9751861691474915, "learning_rate": 3.558511421644014e-05, "loss": 0.1193, "step": 68400 }, { "epoch": 2.4856457591394725, "eval_loss": 0.3299192190170288, "eval_runtime": 180.0784, "eval_samples_per_second": 41.171, "eval_steps_per_second": 5.148, "eval_wer": 0.15812260605950587, "step": 68400 }, { "epoch": 2.4860091576422705, "grad_norm": 3.060753107070923, "learning_rate": 3.5580412773560214e-05, "loss": 0.1417, "step": 68410 }, { "epoch": 2.486372556145069, "grad_norm": 0.9213599562644958, "learning_rate": 3.557571087480794e-05, "loss": 0.1066, "step": 68420 }, { "epoch": 2.486735954647867, "grad_norm": 0.6596553921699524, "learning_rate": 3.557100852038592e-05, "loss": 0.0984, "step": 68430 }, { "epoch": 2.487099353150665, "grad_norm": 0.7937065362930298, "learning_rate": 3.556630571049675e-05, "loss": 0.1673, "step": 68440 }, { "epoch": 2.4874627516534633, "grad_norm": 1.1487483978271484, "learning_rate": 3.556160244534307e-05, "loss": 0.0982, "step": 68450 }, { "epoch": 2.4878261501562613, "grad_norm": 0.7516663074493408, "learning_rate": 3.5556898725127504e-05, "loss": 0.0879, "step": 68460 }, { "epoch": 2.4881895486590597, "grad_norm": 3.729604721069336, "learning_rate": 3.5552194550052745e-05, "loss": 0.1866, "step": 68470 }, { "epoch": 2.4885529471618577, "grad_norm": 0.6454250812530518, "learning_rate": 3.554748992032146e-05, "loss": 0.1261, "step": 68480 }, { "epoch": 2.4889163456646557, "grad_norm": 1.3000408411026, "learning_rate": 3.554278483613637e-05, "loss": 0.1297, "step": 68490 }, { "epoch": 2.489279744167454, "grad_norm": 1.060686707496643, "learning_rate": 3.5538079297700185e-05, "loss": 0.0863, "step": 68500 }, { "epoch": 2.489643142670252, "grad_norm": 1.2778925895690918, "learning_rate": 3.5533373305215665e-05, "loss": 0.0819, "step": 68510 }, { "epoch": 2.4900065411730505, "grad_norm": 0.9975671172142029, "learning_rate": 3.5528666858885565e-05, "loss": 0.101, "step": 68520 }, { "epoch": 2.4903699396758485, "grad_norm": 0.8623627424240112, "learning_rate": 3.5523959958912666e-05, "loss": 0.1161, "step": 68530 }, { "epoch": 2.4907333381786465, "grad_norm": 0.5452187061309814, "learning_rate": 3.551925260549979e-05, "loss": 0.0967, "step": 68540 }, { "epoch": 2.491096736681445, "grad_norm": 0.7726628184318542, "learning_rate": 3.5514544798849736e-05, "loss": 0.1111, "step": 68550 }, { "epoch": 2.491460135184243, "grad_norm": 2.074589490890503, "learning_rate": 3.550983653916536e-05, "loss": 0.0911, "step": 68560 }, { "epoch": 2.4918235336870413, "grad_norm": 0.777515709400177, "learning_rate": 3.550512782664952e-05, "loss": 0.1118, "step": 68570 }, { "epoch": 2.4921869321898393, "grad_norm": 0.7411642074584961, "learning_rate": 3.55004186615051e-05, "loss": 0.0832, "step": 68580 }, { "epoch": 2.4925503306926373, "grad_norm": 1.0494729280471802, "learning_rate": 3.5495709043935e-05, "loss": 0.1126, "step": 68590 }, { "epoch": 2.4929137291954357, "grad_norm": 0.825706422328949, "learning_rate": 3.5490998974142144e-05, "loss": 2.8725, "step": 68600 }, { "epoch": 2.4932771276982337, "grad_norm": 0.7414544820785522, "learning_rate": 3.548628845232947e-05, "loss": 0.1034, "step": 68610 }, { "epoch": 2.493640526201032, "grad_norm": 1.752670168876648, "learning_rate": 3.548157747869993e-05, "loss": 0.4002, "step": 68620 }, { "epoch": 2.49400392470383, "grad_norm": 0.9184174537658691, "learning_rate": 3.547686605345651e-05, "loss": 0.101, "step": 68630 }, { "epoch": 2.4943673232066286, "grad_norm": 0.540532112121582, "learning_rate": 3.547215417680222e-05, "loss": 0.1, "step": 68640 }, { "epoch": 2.4947307217094266, "grad_norm": 0.7241819500923157, "learning_rate": 3.5467441848940056e-05, "loss": 0.0812, "step": 68650 }, { "epoch": 2.4950941202122245, "grad_norm": 0.5261086225509644, "learning_rate": 3.546272907007307e-05, "loss": 0.1093, "step": 68660 }, { "epoch": 2.495457518715023, "grad_norm": 0.5485601425170898, "learning_rate": 3.545801584040431e-05, "loss": 0.7212, "step": 68670 }, { "epoch": 2.495820917217821, "grad_norm": 0.5442925691604614, "learning_rate": 3.545330216013687e-05, "loss": 0.1235, "step": 68680 }, { "epoch": 2.4961843157206194, "grad_norm": 0.6182003021240234, "learning_rate": 3.5448588029473825e-05, "loss": 0.1382, "step": 68690 }, { "epoch": 2.4965477142234174, "grad_norm": 0.8053919076919556, "learning_rate": 3.5443873448618296e-05, "loss": 0.1266, "step": 68700 }, { "epoch": 2.496911112726216, "grad_norm": 2.04055118560791, "learning_rate": 3.5439158417773424e-05, "loss": 0.1026, "step": 68710 }, { "epoch": 2.497274511229014, "grad_norm": 0.5255793929100037, "learning_rate": 3.5434442937142354e-05, "loss": 0.1031, "step": 68720 }, { "epoch": 2.4976379097318118, "grad_norm": 1.7394444942474365, "learning_rate": 3.5429727006928266e-05, "loss": 0.081, "step": 68730 }, { "epoch": 2.49800130823461, "grad_norm": 1.1095107793807983, "learning_rate": 3.542501062733435e-05, "loss": 0.1198, "step": 68740 }, { "epoch": 2.498364706737408, "grad_norm": 1.0827983617782593, "learning_rate": 3.542029379856382e-05, "loss": 0.0985, "step": 68750 }, { "epoch": 2.4987281052402066, "grad_norm": 0.5815703868865967, "learning_rate": 3.54155765208199e-05, "loss": 0.0946, "step": 68760 }, { "epoch": 2.4990915037430046, "grad_norm": 1.133452296257019, "learning_rate": 3.541085879430585e-05, "loss": 0.0897, "step": 68770 }, { "epoch": 2.4994549022458026, "grad_norm": 1.6809009313583374, "learning_rate": 3.5406140619224936e-05, "loss": 0.1182, "step": 68780 }, { "epoch": 2.499818300748601, "grad_norm": 0.6066719889640808, "learning_rate": 3.540142199578045e-05, "loss": 0.1223, "step": 68790 }, { "epoch": 2.500181699251399, "grad_norm": 0.45101696252822876, "learning_rate": 3.53967029241757e-05, "loss": 0.0951, "step": 68800 }, { "epoch": 2.5005450977541974, "grad_norm": 2.0316238403320312, "learning_rate": 3.5391983404614e-05, "loss": 0.0941, "step": 68810 }, { "epoch": 2.5009084962569954, "grad_norm": 0.8582636117935181, "learning_rate": 3.538726343729873e-05, "loss": 0.1308, "step": 68820 }, { "epoch": 2.5012718947597934, "grad_norm": 1.0573068857192993, "learning_rate": 3.538254302243322e-05, "loss": 0.1064, "step": 68830 }, { "epoch": 2.501635293262592, "grad_norm": 1.7201263904571533, "learning_rate": 3.537782216022088e-05, "loss": 0.1303, "step": 68840 }, { "epoch": 2.50199869176539, "grad_norm": 0.8848857879638672, "learning_rate": 3.53731008508651e-05, "loss": 0.0885, "step": 68850 }, { "epoch": 2.5023620902681882, "grad_norm": 0.6936333775520325, "learning_rate": 3.5368379094569325e-05, "loss": 0.0989, "step": 68860 }, { "epoch": 2.502725488770986, "grad_norm": 0.7901983261108398, "learning_rate": 3.536365689153698e-05, "loss": 0.1984, "step": 68870 }, { "epoch": 2.503088887273784, "grad_norm": 0.5054183602333069, "learning_rate": 3.5358934241971534e-05, "loss": 0.0928, "step": 68880 }, { "epoch": 2.5034522857765826, "grad_norm": 1.7566126585006714, "learning_rate": 3.535421114607647e-05, "loss": 0.1212, "step": 68890 }, { "epoch": 2.5038156842793806, "grad_norm": 0.5128380656242371, "learning_rate": 3.5349487604055274e-05, "loss": 0.0774, "step": 68900 }, { "epoch": 2.504179082782179, "grad_norm": 0.994647741317749, "learning_rate": 3.53447636161115e-05, "loss": 0.1288, "step": 68910 }, { "epoch": 2.504542481284977, "grad_norm": 0.35602259635925293, "learning_rate": 3.534003918244866e-05, "loss": 0.1006, "step": 68920 }, { "epoch": 2.504905879787775, "grad_norm": 0.9458356499671936, "learning_rate": 3.533531430327032e-05, "loss": 0.1199, "step": 68930 }, { "epoch": 2.5052692782905734, "grad_norm": 1.100160837173462, "learning_rate": 3.533058897878006e-05, "loss": 0.0892, "step": 68940 }, { "epoch": 2.505632676793372, "grad_norm": 0.695726215839386, "learning_rate": 3.532586320918147e-05, "loss": 0.0928, "step": 68950 }, { "epoch": 2.50599607529617, "grad_norm": 1.826897382736206, "learning_rate": 3.532113699467819e-05, "loss": 0.105, "step": 68960 }, { "epoch": 2.506359473798968, "grad_norm": 1.4014049768447876, "learning_rate": 3.531641033547383e-05, "loss": 0.2298, "step": 68970 }, { "epoch": 2.5067228723017663, "grad_norm": 1.4749367237091064, "learning_rate": 3.531168323177206e-05, "loss": 0.0966, "step": 68980 }, { "epoch": 2.5070862708045643, "grad_norm": 4.613848686218262, "learning_rate": 3.530695568377655e-05, "loss": 0.1281, "step": 68990 }, { "epoch": 2.5074496693073627, "grad_norm": 0.9928845167160034, "learning_rate": 3.5302227691690984e-05, "loss": 0.1213, "step": 69000 }, { "epoch": 2.5074496693073627, "eval_loss": 0.30671653151512146, "eval_runtime": 179.555, "eval_samples_per_second": 41.291, "eval_steps_per_second": 5.163, "eval_wer": 0.1598017681122588, "step": 69000 }, { "epoch": 2.5078130678101607, "grad_norm": 0.24582041800022125, "learning_rate": 3.5297499255719094e-05, "loss": 0.0949, "step": 69010 }, { "epoch": 2.5081764663129587, "grad_norm": 0.4762285053730011, "learning_rate": 3.529277037606458e-05, "loss": 0.0983, "step": 69020 }, { "epoch": 2.508539864815757, "grad_norm": 0.6749287843704224, "learning_rate": 3.528804105293123e-05, "loss": 0.0911, "step": 69030 }, { "epoch": 2.508903263318555, "grad_norm": 0.4179406762123108, "learning_rate": 3.528331128652279e-05, "loss": 0.1979, "step": 69040 }, { "epoch": 2.5092666618213535, "grad_norm": 1.1406326293945312, "learning_rate": 3.5278581077043047e-05, "loss": 0.0918, "step": 69050 }, { "epoch": 2.5096300603241515, "grad_norm": 0.8093327879905701, "learning_rate": 3.527385042469583e-05, "loss": 0.0978, "step": 69060 }, { "epoch": 2.5099934588269495, "grad_norm": 1.7931946516036987, "learning_rate": 3.5269119329684945e-05, "loss": 0.109, "step": 69070 }, { "epoch": 2.510356857329748, "grad_norm": 0.6986146569252014, "learning_rate": 3.526438779221425e-05, "loss": 0.0993, "step": 69080 }, { "epoch": 2.510720255832546, "grad_norm": 1.2395824193954468, "learning_rate": 3.5259655812487604e-05, "loss": 0.1468, "step": 69090 }, { "epoch": 2.5110836543353443, "grad_norm": 3.537288188934326, "learning_rate": 3.525492339070889e-05, "loss": 0.0997, "step": 69100 }, { "epoch": 2.5114470528381423, "grad_norm": 0.8501663208007812, "learning_rate": 3.525019052708202e-05, "loss": 0.0933, "step": 69110 }, { "epoch": 2.5118104513409403, "grad_norm": 1.3228484392166138, "learning_rate": 3.524545722181091e-05, "loss": 0.1387, "step": 69120 }, { "epoch": 2.5121738498437387, "grad_norm": 1.2074254751205444, "learning_rate": 3.52407234750995e-05, "loss": 0.1062, "step": 69130 }, { "epoch": 2.5125372483465367, "grad_norm": 0.6108558177947998, "learning_rate": 3.523598928715174e-05, "loss": 0.1207, "step": 69140 }, { "epoch": 2.512900646849335, "grad_norm": 0.6959209442138672, "learning_rate": 3.523125465817164e-05, "loss": 0.0823, "step": 69150 }, { "epoch": 2.513264045352133, "grad_norm": 0.5447746515274048, "learning_rate": 3.5226519588363164e-05, "loss": 0.1009, "step": 69160 }, { "epoch": 2.513627443854931, "grad_norm": 6.87611198425293, "learning_rate": 3.522178407793036e-05, "loss": 0.1082, "step": 69170 }, { "epoch": 2.5139908423577295, "grad_norm": 1.2013996839523315, "learning_rate": 3.5217048127077246e-05, "loss": 0.1041, "step": 69180 }, { "epoch": 2.5143542408605275, "grad_norm": 2.1484246253967285, "learning_rate": 3.521231173600787e-05, "loss": 0.1174, "step": 69190 }, { "epoch": 2.514717639363326, "grad_norm": 0.6024388670921326, "learning_rate": 3.520757490492633e-05, "loss": 0.0968, "step": 69200 }, { "epoch": 2.515081037866124, "grad_norm": 0.621998131275177, "learning_rate": 3.5202837634036696e-05, "loss": 0.1441, "step": 69210 }, { "epoch": 2.515444436368922, "grad_norm": 0.7772573828697205, "learning_rate": 3.519809992354309e-05, "loss": 0.1199, "step": 69220 }, { "epoch": 2.5158078348717203, "grad_norm": 0.8994972109794617, "learning_rate": 3.519336177364966e-05, "loss": 0.1099, "step": 69230 }, { "epoch": 2.5161712333745188, "grad_norm": 0.7937003970146179, "learning_rate": 3.5188623184560524e-05, "loss": 0.1091, "step": 69240 }, { "epoch": 2.5165346318773167, "grad_norm": 1.3785254955291748, "learning_rate": 3.518388415647986e-05, "loss": 0.1035, "step": 69250 }, { "epoch": 2.5168980303801147, "grad_norm": 0.6472801566123962, "learning_rate": 3.517914468961188e-05, "loss": 0.1054, "step": 69260 }, { "epoch": 2.517261428882913, "grad_norm": 2.0437135696411133, "learning_rate": 3.517440478416076e-05, "loss": 0.1224, "step": 69270 }, { "epoch": 2.517624827385711, "grad_norm": 0.9029390811920166, "learning_rate": 3.516966444033074e-05, "loss": 0.0865, "step": 69280 }, { "epoch": 2.5179882258885096, "grad_norm": 0.801255464553833, "learning_rate": 3.5164923658326064e-05, "loss": 0.0891, "step": 69290 }, { "epoch": 2.5183516243913076, "grad_norm": 1.0700057744979858, "learning_rate": 3.5160182438350995e-05, "loss": 0.0928, "step": 69300 }, { "epoch": 2.5187150228941055, "grad_norm": 0.6255751848220825, "learning_rate": 3.515544078060982e-05, "loss": 0.1071, "step": 69310 }, { "epoch": 2.519078421396904, "grad_norm": 0.784589409828186, "learning_rate": 3.515069868530683e-05, "loss": 0.0892, "step": 69320 }, { "epoch": 2.519441819899702, "grad_norm": 0.8623689413070679, "learning_rate": 3.514595615264635e-05, "loss": 0.1024, "step": 69330 }, { "epoch": 2.5198052184025004, "grad_norm": 1.3670728206634521, "learning_rate": 3.514121318283272e-05, "loss": 0.1021, "step": 69340 }, { "epoch": 2.5201686169052984, "grad_norm": 1.2742701768875122, "learning_rate": 3.513646977607029e-05, "loss": 0.0916, "step": 69350 }, { "epoch": 2.5205320154080963, "grad_norm": 2.6667962074279785, "learning_rate": 3.513172593256345e-05, "loss": 0.0921, "step": 69360 }, { "epoch": 2.5208954139108948, "grad_norm": 0.8958526849746704, "learning_rate": 3.512698165251659e-05, "loss": 0.0989, "step": 69370 }, { "epoch": 2.5212588124136928, "grad_norm": 1.1172994375228882, "learning_rate": 3.512223693613412e-05, "loss": 0.1104, "step": 69380 }, { "epoch": 2.521622210916491, "grad_norm": 0.5839262008666992, "learning_rate": 3.5117491783620475e-05, "loss": 0.128, "step": 69390 }, { "epoch": 2.521985609419289, "grad_norm": 0.9729129672050476, "learning_rate": 3.51127461951801e-05, "loss": 0.1229, "step": 69400 }, { "epoch": 2.522349007922087, "grad_norm": 3.964264154434204, "learning_rate": 3.510800017101749e-05, "loss": 0.096, "step": 69410 }, { "epoch": 2.5227124064248856, "grad_norm": 0.4221835732460022, "learning_rate": 3.51032537113371e-05, "loss": 0.1109, "step": 69420 }, { "epoch": 2.5230758049276836, "grad_norm": 0.6467729806900024, "learning_rate": 3.5098506816343466e-05, "loss": 0.116, "step": 69430 }, { "epoch": 2.523439203430482, "grad_norm": 3.9705393314361572, "learning_rate": 3.50937594862411e-05, "loss": 0.1349, "step": 69440 }, { "epoch": 2.52380260193328, "grad_norm": 1.3955297470092773, "learning_rate": 3.508901172123455e-05, "loss": 0.1116, "step": 69450 }, { "epoch": 2.524166000436078, "grad_norm": 0.8039283156394958, "learning_rate": 3.508426352152838e-05, "loss": 0.0905, "step": 69460 }, { "epoch": 2.5245293989388764, "grad_norm": 1.1199578046798706, "learning_rate": 3.507951488732718e-05, "loss": 0.1136, "step": 69470 }, { "epoch": 2.5248927974416744, "grad_norm": 0.7925732731819153, "learning_rate": 3.507476581883555e-05, "loss": 0.1058, "step": 69480 }, { "epoch": 2.525256195944473, "grad_norm": 0.8125994205474854, "learning_rate": 3.5070016316258106e-05, "loss": 0.1033, "step": 69490 }, { "epoch": 2.525619594447271, "grad_norm": 0.4621226489543915, "learning_rate": 3.5065266379799475e-05, "loss": 1.4773, "step": 69500 }, { "epoch": 2.525982992950069, "grad_norm": 1.0948034524917603, "learning_rate": 3.506051600966434e-05, "loss": 0.0797, "step": 69510 }, { "epoch": 2.526346391452867, "grad_norm": 1.1567878723144531, "learning_rate": 3.5055765206057354e-05, "loss": 0.1143, "step": 69520 }, { "epoch": 2.5267097899556656, "grad_norm": 0.95686936378479, "learning_rate": 3.505101396918324e-05, "loss": 0.1188, "step": 69530 }, { "epoch": 2.5270731884584636, "grad_norm": 0.34038084745407104, "learning_rate": 3.504626229924669e-05, "loss": 0.1076, "step": 69540 }, { "epoch": 2.5274365869612616, "grad_norm": 4.851949214935303, "learning_rate": 3.504151019645243e-05, "loss": 0.0955, "step": 69550 }, { "epoch": 2.52779998546406, "grad_norm": 0.8883131742477417, "learning_rate": 3.503675766100524e-05, "loss": 0.1427, "step": 69560 }, { "epoch": 2.528163383966858, "grad_norm": 0.7588313221931458, "learning_rate": 3.5032004693109866e-05, "loss": 0.1198, "step": 69570 }, { "epoch": 2.5285267824696565, "grad_norm": 0.5408293604850769, "learning_rate": 3.50272512929711e-05, "loss": 0.1115, "step": 69580 }, { "epoch": 2.5288901809724544, "grad_norm": 1.0919950008392334, "learning_rate": 3.5022497460793754e-05, "loss": 0.7792, "step": 69590 }, { "epoch": 2.5292535794752524, "grad_norm": 0.9922258853912354, "learning_rate": 3.501774319678266e-05, "loss": 0.079, "step": 69600 }, { "epoch": 2.5292535794752524, "eval_loss": 0.3091621398925781, "eval_runtime": 180.6731, "eval_samples_per_second": 41.035, "eval_steps_per_second": 5.131, "eval_wer": 0.15795015157841233, "step": 69600 }, { "epoch": 2.529616977978051, "grad_norm": 1.5794726610183716, "learning_rate": 3.501298850114266e-05, "loss": 0.1154, "step": 69610 }, { "epoch": 2.529980376480849, "grad_norm": 3.069139003753662, "learning_rate": 3.5008233374078594e-05, "loss": 0.1161, "step": 69620 }, { "epoch": 2.5303437749836473, "grad_norm": 0.8879293203353882, "learning_rate": 3.500347781579537e-05, "loss": 0.0929, "step": 69630 }, { "epoch": 2.5307071734864452, "grad_norm": 2.097984552383423, "learning_rate": 3.4998721826497885e-05, "loss": 0.0873, "step": 69640 }, { "epoch": 2.5310705719892432, "grad_norm": 0.8583676218986511, "learning_rate": 3.499396540639104e-05, "loss": 0.6541, "step": 69650 }, { "epoch": 2.5314339704920417, "grad_norm": 0.44445595145225525, "learning_rate": 3.498920855567979e-05, "loss": 0.0748, "step": 69660 }, { "epoch": 2.5317973689948396, "grad_norm": 0.9186582565307617, "learning_rate": 3.4984451274569094e-05, "loss": 0.1022, "step": 69670 }, { "epoch": 2.532160767497638, "grad_norm": 1.34561288356781, "learning_rate": 3.497969356326391e-05, "loss": 0.0962, "step": 69680 }, { "epoch": 2.532524166000436, "grad_norm": 1.5889935493469238, "learning_rate": 3.497493542196923e-05, "loss": 0.1013, "step": 69690 }, { "epoch": 2.532887564503234, "grad_norm": 1.0599699020385742, "learning_rate": 3.4970176850890085e-05, "loss": 0.1048, "step": 69700 }, { "epoch": 2.5332509630060325, "grad_norm": 0.7291392087936401, "learning_rate": 3.496541785023149e-05, "loss": 0.1002, "step": 69710 }, { "epoch": 2.5336143615088305, "grad_norm": 0.5541179180145264, "learning_rate": 3.4960658420198494e-05, "loss": 0.1062, "step": 69720 }, { "epoch": 2.533977760011629, "grad_norm": 1.0008395910263062, "learning_rate": 3.495589856099617e-05, "loss": 0.2525, "step": 69730 }, { "epoch": 2.534341158514427, "grad_norm": 0.7523865699768066, "learning_rate": 3.49511382728296e-05, "loss": 0.117, "step": 69740 }, { "epoch": 2.534704557017225, "grad_norm": 1.8582743406295776, "learning_rate": 3.4946377555903886e-05, "loss": 0.0834, "step": 69750 }, { "epoch": 2.5350679555200233, "grad_norm": 0.44991886615753174, "learning_rate": 3.494161641042415e-05, "loss": 0.0895, "step": 69760 }, { "epoch": 2.5354313540228213, "grad_norm": 0.46044957637786865, "learning_rate": 3.4936854836595545e-05, "loss": 0.1333, "step": 69770 }, { "epoch": 2.5357947525256197, "grad_norm": 2.098876476287842, "learning_rate": 3.493209283462321e-05, "loss": 0.1073, "step": 69780 }, { "epoch": 2.5361581510284177, "grad_norm": 0.5006657838821411, "learning_rate": 3.492733040471234e-05, "loss": 0.1205, "step": 69790 }, { "epoch": 2.5365215495312157, "grad_norm": 1.2363359928131104, "learning_rate": 3.492256754706813e-05, "loss": 0.0865, "step": 69800 }, { "epoch": 2.536884948034014, "grad_norm": 0.5873517394065857, "learning_rate": 3.491780426189577e-05, "loss": 0.0842, "step": 69810 }, { "epoch": 2.5372483465368125, "grad_norm": 0.5149590373039246, "learning_rate": 3.491304054940053e-05, "loss": 0.302, "step": 69820 }, { "epoch": 2.5376117450396105, "grad_norm": 0.613667368888855, "learning_rate": 3.4908276409787635e-05, "loss": 0.1106, "step": 69830 }, { "epoch": 2.5379751435424085, "grad_norm": 1.8323549032211304, "learning_rate": 3.490351184326236e-05, "loss": 0.1301, "step": 69840 }, { "epoch": 2.538338542045207, "grad_norm": 1.859044075012207, "learning_rate": 3.4898746850030005e-05, "loss": 0.0863, "step": 69850 }, { "epoch": 2.538701940548005, "grad_norm": 1.0749214887619019, "learning_rate": 3.4893981430295864e-05, "loss": 0.0798, "step": 69860 }, { "epoch": 2.5390653390508033, "grad_norm": 0.9566397070884705, "learning_rate": 3.488921558426527e-05, "loss": 0.1183, "step": 69870 }, { "epoch": 2.5394287375536013, "grad_norm": 1.2835750579833984, "learning_rate": 3.4884449312143555e-05, "loss": 0.104, "step": 69880 }, { "epoch": 2.5397921360563993, "grad_norm": 0.6767297387123108, "learning_rate": 3.48796826141361e-05, "loss": 0.1889, "step": 69890 }, { "epoch": 2.5401555345591977, "grad_norm": 1.499045729637146, "learning_rate": 3.487491549044826e-05, "loss": 0.1031, "step": 69900 }, { "epoch": 2.5405189330619957, "grad_norm": 0.3522442877292633, "learning_rate": 3.487014794128545e-05, "loss": 0.1065, "step": 69910 }, { "epoch": 2.540882331564794, "grad_norm": 0.6056109070777893, "learning_rate": 3.486537996685309e-05, "loss": 0.1181, "step": 69920 }, { "epoch": 2.541245730067592, "grad_norm": 2.347325563430786, "learning_rate": 3.48606115673566e-05, "loss": 0.3176, "step": 69930 }, { "epoch": 2.54160912857039, "grad_norm": 2.3445467948913574, "learning_rate": 3.4855842743001446e-05, "loss": 0.1717, "step": 69940 }, { "epoch": 2.5419725270731885, "grad_norm": 0.9979462027549744, "learning_rate": 3.485107349399309e-05, "loss": 0.0845, "step": 69950 }, { "epoch": 2.5423359255759865, "grad_norm": 3.576714038848877, "learning_rate": 3.484630382053704e-05, "loss": 0.1516, "step": 69960 }, { "epoch": 2.542699324078785, "grad_norm": 0.4525027573108673, "learning_rate": 3.484153372283878e-05, "loss": 0.1062, "step": 69970 }, { "epoch": 2.543062722581583, "grad_norm": 1.1381046772003174, "learning_rate": 3.4836763201103854e-05, "loss": 0.1246, "step": 69980 }, { "epoch": 2.543426121084381, "grad_norm": 0.6374491453170776, "learning_rate": 3.48319922555378e-05, "loss": 0.1775, "step": 69990 }, { "epoch": 2.5437895195871794, "grad_norm": 1.7682280540466309, "learning_rate": 3.482722088634618e-05, "loss": 0.1135, "step": 70000 }, { "epoch": 2.5441529180899773, "grad_norm": 1.1015331745147705, "learning_rate": 3.482244909373458e-05, "loss": 0.0904, "step": 70010 }, { "epoch": 2.5445163165927758, "grad_norm": 4.6638689041137695, "learning_rate": 3.481767687790859e-05, "loss": 0.2748, "step": 70020 }, { "epoch": 2.5448797150955738, "grad_norm": 0.8912318348884583, "learning_rate": 3.481290423907384e-05, "loss": 0.094, "step": 70030 }, { "epoch": 2.5452431135983717, "grad_norm": 2.43723726272583, "learning_rate": 3.480813117743596e-05, "loss": 0.1276, "step": 70040 }, { "epoch": 2.54560651210117, "grad_norm": 3.3461971282958984, "learning_rate": 3.480335769320061e-05, "loss": 0.0971, "step": 70050 }, { "epoch": 2.545969910603968, "grad_norm": 3.6578071117401123, "learning_rate": 3.479858378657346e-05, "loss": 0.1575, "step": 70060 }, { "epoch": 2.5463333091067666, "grad_norm": 1.831850290298462, "learning_rate": 3.479380945776018e-05, "loss": 0.0947, "step": 70070 }, { "epoch": 2.5466967076095646, "grad_norm": 0.8772917985916138, "learning_rate": 3.478903470696651e-05, "loss": 0.1189, "step": 70080 }, { "epoch": 2.5470601061123626, "grad_norm": 7.842989921569824, "learning_rate": 3.478425953439816e-05, "loss": 0.1245, "step": 70090 }, { "epoch": 2.547423504615161, "grad_norm": 1.8557602167129517, "learning_rate": 3.4779483940260885e-05, "loss": 0.1099, "step": 70100 }, { "epoch": 2.5477869031179594, "grad_norm": 1.2630740404129028, "learning_rate": 3.477470792476044e-05, "loss": 0.076, "step": 70110 }, { "epoch": 2.5481503016207574, "grad_norm": 0.5257185697555542, "learning_rate": 3.4769931488102606e-05, "loss": 0.1972, "step": 70120 }, { "epoch": 2.5485137001235554, "grad_norm": 0.5321794748306274, "learning_rate": 3.4765154630493194e-05, "loss": 0.087, "step": 70130 }, { "epoch": 2.548877098626354, "grad_norm": 0.7569301128387451, "learning_rate": 3.4760377352138e-05, "loss": 0.1349, "step": 70140 }, { "epoch": 2.549240497129152, "grad_norm": 3.5890607833862305, "learning_rate": 3.475559965324289e-05, "loss": 1.1846, "step": 70150 }, { "epoch": 2.5496038956319502, "grad_norm": 0.8748692870140076, "learning_rate": 3.475082153401368e-05, "loss": 0.0899, "step": 70160 }, { "epoch": 2.549967294134748, "grad_norm": 0.45375722646713257, "learning_rate": 3.474604299465628e-05, "loss": 0.1166, "step": 70170 }, { "epoch": 2.550330692637546, "grad_norm": 4.401093006134033, "learning_rate": 3.474126403537656e-05, "loss": 0.1247, "step": 70180 }, { "epoch": 2.5506940911403446, "grad_norm": 0.7887241244316101, "learning_rate": 3.473648465638043e-05, "loss": 0.1344, "step": 70190 }, { "epoch": 2.5510574896431426, "grad_norm": 1.8106690645217896, "learning_rate": 3.4731704857873826e-05, "loss": 0.095, "step": 70200 }, { "epoch": 2.5510574896431426, "eval_loss": 0.32158222794532776, "eval_runtime": 180.3244, "eval_samples_per_second": 41.115, "eval_steps_per_second": 5.141, "eval_wer": 0.15185070887868282, "step": 70200 }, { "epoch": 2.551420888145941, "grad_norm": 0.9934507012367249, "learning_rate": 3.4726924640062676e-05, "loss": 0.0928, "step": 70210 }, { "epoch": 2.551784286648739, "grad_norm": 2.932734966278076, "learning_rate": 3.472214400315296e-05, "loss": 0.1131, "step": 70220 }, { "epoch": 2.552147685151537, "grad_norm": 0.6811621189117432, "learning_rate": 3.471736294735065e-05, "loss": 0.1616, "step": 70230 }, { "epoch": 2.5525110836543354, "grad_norm": 3.0019402503967285, "learning_rate": 3.471258147286173e-05, "loss": 0.1317, "step": 70240 }, { "epoch": 2.5528744821571334, "grad_norm": 0.5437862873077393, "learning_rate": 3.470779957989225e-05, "loss": 0.0941, "step": 70250 }, { "epoch": 2.553237880659932, "grad_norm": 1.201907992362976, "learning_rate": 3.470301726864822e-05, "loss": 0.0959, "step": 70260 }, { "epoch": 2.55360127916273, "grad_norm": 0.8288230299949646, "learning_rate": 3.469823453933569e-05, "loss": 0.1101, "step": 70270 }, { "epoch": 2.553964677665528, "grad_norm": 0.6374495625495911, "learning_rate": 3.469345139216075e-05, "loss": 0.0854, "step": 70280 }, { "epoch": 2.5543280761683262, "grad_norm": 0.5856258273124695, "learning_rate": 3.468866782732948e-05, "loss": 0.1128, "step": 70290 }, { "epoch": 2.5546914746711242, "grad_norm": 0.42517444491386414, "learning_rate": 3.4683883845047985e-05, "loss": 0.1721, "step": 70300 }, { "epoch": 2.5550548731739227, "grad_norm": 3.486084222793579, "learning_rate": 3.467909944552239e-05, "loss": 0.0961, "step": 70310 }, { "epoch": 2.5554182716767206, "grad_norm": 0.40620315074920654, "learning_rate": 3.467431462895884e-05, "loss": 0.1109, "step": 70320 }, { "epoch": 2.5557816701795186, "grad_norm": 0.5691574811935425, "learning_rate": 3.466952939556349e-05, "loss": 0.1062, "step": 70330 }, { "epoch": 2.556145068682317, "grad_norm": 1.281260371208191, "learning_rate": 3.466474374554252e-05, "loss": 0.096, "step": 70340 }, { "epoch": 2.556508467185115, "grad_norm": 0.9632150530815125, "learning_rate": 3.465995767910213e-05, "loss": 0.0934, "step": 70350 }, { "epoch": 2.5568718656879135, "grad_norm": 1.603409767150879, "learning_rate": 3.4655171196448544e-05, "loss": 0.0931, "step": 70360 }, { "epoch": 2.5572352641907115, "grad_norm": 0.9560374021530151, "learning_rate": 3.465038429778798e-05, "loss": 0.0984, "step": 70370 }, { "epoch": 2.5575986626935094, "grad_norm": 0.6290355920791626, "learning_rate": 3.464559698332669e-05, "loss": 0.113, "step": 70380 }, { "epoch": 2.557962061196308, "grad_norm": 156.17115783691406, "learning_rate": 3.464080925327094e-05, "loss": 0.4685, "step": 70390 }, { "epoch": 2.5583254596991063, "grad_norm": 1.0737193822860718, "learning_rate": 3.4636021107827026e-05, "loss": 0.0767, "step": 70400 }, { "epoch": 2.5586888582019043, "grad_norm": 0.8538148403167725, "learning_rate": 3.463123254720125e-05, "loss": 0.087, "step": 70410 }, { "epoch": 2.5590522567047023, "grad_norm": 0.826351523399353, "learning_rate": 3.462644357159993e-05, "loss": 0.1008, "step": 70420 }, { "epoch": 2.5594156552075007, "grad_norm": 0.4948084056377411, "learning_rate": 3.462165418122941e-05, "loss": 0.0961, "step": 70430 }, { "epoch": 2.5597790537102987, "grad_norm": 0.9462293982505798, "learning_rate": 3.4616864376296046e-05, "loss": 0.135, "step": 70440 }, { "epoch": 2.560142452213097, "grad_norm": 0.5528499484062195, "learning_rate": 3.4612074157006206e-05, "loss": 0.101, "step": 70450 }, { "epoch": 2.560505850715895, "grad_norm": 0.822938859462738, "learning_rate": 3.4607283523566294e-05, "loss": 0.095, "step": 70460 }, { "epoch": 2.560869249218693, "grad_norm": 0.6554206013679504, "learning_rate": 3.460249247618271e-05, "loss": 0.0966, "step": 70470 }, { "epoch": 2.5612326477214915, "grad_norm": 0.8792755007743835, "learning_rate": 3.4597701015061904e-05, "loss": 0.1041, "step": 70480 }, { "epoch": 2.5615960462242895, "grad_norm": 0.8316457867622375, "learning_rate": 3.4592909140410304e-05, "loss": 0.1166, "step": 70490 }, { "epoch": 2.561959444727088, "grad_norm": 1.183933138847351, "learning_rate": 3.458811685243438e-05, "loss": 0.0911, "step": 70500 }, { "epoch": 2.562322843229886, "grad_norm": 0.6310432553291321, "learning_rate": 3.458332415134062e-05, "loss": 0.0833, "step": 70510 }, { "epoch": 2.562686241732684, "grad_norm": 0.5768032670021057, "learning_rate": 3.457853103733552e-05, "loss": 0.1941, "step": 70520 }, { "epoch": 2.5630496402354823, "grad_norm": 0.5388504266738892, "learning_rate": 3.457373751062559e-05, "loss": 0.1083, "step": 70530 }, { "epoch": 2.5634130387382803, "grad_norm": 9.278057098388672, "learning_rate": 3.4568943571417376e-05, "loss": 0.102, "step": 70540 }, { "epoch": 2.5637764372410787, "grad_norm": 0.8533729910850525, "learning_rate": 3.456414921991744e-05, "loss": 0.0916, "step": 70550 }, { "epoch": 2.5641398357438767, "grad_norm": 0.8473436832427979, "learning_rate": 3.455935445633234e-05, "loss": 0.0953, "step": 70560 }, { "epoch": 2.5645032342466747, "grad_norm": 0.8911932706832886, "learning_rate": 3.455455928086866e-05, "loss": 0.0992, "step": 70570 }, { "epoch": 2.564866632749473, "grad_norm": 0.9488405585289001, "learning_rate": 3.4549763693733026e-05, "loss": 0.1191, "step": 70580 }, { "epoch": 2.565230031252271, "grad_norm": 0.6498254537582397, "learning_rate": 3.454496769513204e-05, "loss": 0.1442, "step": 70590 }, { "epoch": 2.5655934297550695, "grad_norm": 0.5127254724502563, "learning_rate": 3.4540171285272374e-05, "loss": 0.0843, "step": 70600 }, { "epoch": 2.5659568282578675, "grad_norm": 2.8321163654327393, "learning_rate": 3.453537446436066e-05, "loss": 0.076, "step": 70610 }, { "epoch": 2.5663202267606655, "grad_norm": 0.8829347491264343, "learning_rate": 3.4530577232603584e-05, "loss": 0.1044, "step": 70620 }, { "epoch": 2.566683625263464, "grad_norm": 1.8622163534164429, "learning_rate": 3.452577959020785e-05, "loss": 0.1057, "step": 70630 }, { "epoch": 2.567047023766262, "grad_norm": 0.5306766629219055, "learning_rate": 3.452098153738017e-05, "loss": 0.1118, "step": 70640 }, { "epoch": 2.5674104222690604, "grad_norm": 0.5810162425041199, "learning_rate": 3.451618307432727e-05, "loss": 0.0862, "step": 70650 }, { "epoch": 2.5677738207718583, "grad_norm": 0.790539026260376, "learning_rate": 3.4511384201255895e-05, "loss": 0.1227, "step": 70660 }, { "epoch": 2.5681372192746563, "grad_norm": 3.3890788555145264, "learning_rate": 3.450658491837282e-05, "loss": 0.0883, "step": 70670 }, { "epoch": 2.5685006177774548, "grad_norm": 1.1996808052062988, "learning_rate": 3.4501785225884816e-05, "loss": 0.0989, "step": 70680 }, { "epoch": 2.568864016280253, "grad_norm": 0.81224524974823, "learning_rate": 3.449698512399871e-05, "loss": 0.1149, "step": 70690 }, { "epoch": 2.569227414783051, "grad_norm": 1.3377439975738525, "learning_rate": 3.4492184612921305e-05, "loss": 0.1048, "step": 70700 }, { "epoch": 2.569590813285849, "grad_norm": 0.9538800716400146, "learning_rate": 3.4487383692859423e-05, "loss": 0.0946, "step": 70710 }, { "epoch": 2.5699542117886476, "grad_norm": 0.49254775047302246, "learning_rate": 3.448258236401994e-05, "loss": 0.1008, "step": 70720 }, { "epoch": 2.5703176102914456, "grad_norm": 0.44506704807281494, "learning_rate": 3.447778062660973e-05, "loss": 0.1111, "step": 70730 }, { "epoch": 2.570681008794244, "grad_norm": 0.8836443424224854, "learning_rate": 3.4472978480835674e-05, "loss": 0.1064, "step": 70740 }, { "epoch": 2.571044407297042, "grad_norm": 0.8320255279541016, "learning_rate": 3.4468175926904666e-05, "loss": 0.0926, "step": 70750 }, { "epoch": 2.57140780579984, "grad_norm": 0.6895723342895508, "learning_rate": 3.446337296502366e-05, "loss": 0.0766, "step": 70760 }, { "epoch": 2.5717712043026384, "grad_norm": 0.6943153738975525, "learning_rate": 3.445856959539958e-05, "loss": 0.1134, "step": 70770 }, { "epoch": 2.5721346028054364, "grad_norm": 0.7596734166145325, "learning_rate": 3.4453765818239387e-05, "loss": 0.0969, "step": 70780 }, { "epoch": 2.572498001308235, "grad_norm": 0.42216864228248596, "learning_rate": 3.4448961633750066e-05, "loss": 0.1094, "step": 70790 }, { "epoch": 2.572861399811033, "grad_norm": 0.8295478224754333, "learning_rate": 3.44441570421386e-05, "loss": 0.0825, "step": 70800 }, { "epoch": 2.572861399811033, "eval_loss": 0.3259897530078888, "eval_runtime": 180.0538, "eval_samples_per_second": 41.177, "eval_steps_per_second": 5.148, "eval_wer": 0.15045291992666146, "step": 70800 }, { "epoch": 2.5732247983138308, "grad_norm": 0.8128442168235779, "learning_rate": 3.4439352043612015e-05, "loss": 0.093, "step": 70810 }, { "epoch": 2.573588196816629, "grad_norm": 0.5261029601097107, "learning_rate": 3.4434546638377334e-05, "loss": 0.1067, "step": 70820 }, { "epoch": 2.573951595319427, "grad_norm": 2.5018603801727295, "learning_rate": 3.442974082664161e-05, "loss": 0.114, "step": 70830 }, { "epoch": 2.5743149938222256, "grad_norm": 0.37377244234085083, "learning_rate": 3.44249346086119e-05, "loss": 0.1822, "step": 70840 }, { "epoch": 2.5746783923250236, "grad_norm": 6.293512344360352, "learning_rate": 3.4420127984495295e-05, "loss": 0.1651, "step": 70850 }, { "epoch": 2.5750417908278216, "grad_norm": 1.2653559446334839, "learning_rate": 3.4415320954498894e-05, "loss": 0.0936, "step": 70860 }, { "epoch": 2.57540518933062, "grad_norm": 0.4816114008426666, "learning_rate": 3.4410513518829806e-05, "loss": 0.1242, "step": 70870 }, { "epoch": 2.575768587833418, "grad_norm": 0.6479201316833496, "learning_rate": 3.440570567769518e-05, "loss": 0.0924, "step": 70880 }, { "epoch": 2.5761319863362164, "grad_norm": 1.3513591289520264, "learning_rate": 3.440089743130216e-05, "loss": 0.1385, "step": 70890 }, { "epoch": 2.5764953848390144, "grad_norm": 0.4289826452732086, "learning_rate": 3.4396088779857917e-05, "loss": 0.0769, "step": 70900 }, { "epoch": 2.5768587833418124, "grad_norm": 1.5458887815475464, "learning_rate": 3.4391279723569635e-05, "loss": 0.1007, "step": 70910 }, { "epoch": 2.577222181844611, "grad_norm": 0.5470010638237, "learning_rate": 3.438647026264453e-05, "loss": 0.1041, "step": 70920 }, { "epoch": 2.577585580347409, "grad_norm": 0.7723416090011597, "learning_rate": 3.438166039728982e-05, "loss": 0.128, "step": 70930 }, { "epoch": 2.5779489788502072, "grad_norm": 0.7723271250724792, "learning_rate": 3.437685012771274e-05, "loss": 0.1332, "step": 70940 }, { "epoch": 2.5783123773530052, "grad_norm": 0.6610028147697449, "learning_rate": 3.4372039454120556e-05, "loss": 0.093, "step": 70950 }, { "epoch": 2.578675775855803, "grad_norm": 0.8244014978408813, "learning_rate": 3.436722837672053e-05, "loss": 0.0913, "step": 70960 }, { "epoch": 2.5790391743586016, "grad_norm": 3.8544437885284424, "learning_rate": 3.4362416895719966e-05, "loss": 0.1292, "step": 70970 }, { "epoch": 2.5794025728614, "grad_norm": 0.570715069770813, "learning_rate": 3.4357605011326164e-05, "loss": 0.1142, "step": 70980 }, { "epoch": 2.579765971364198, "grad_norm": 0.6846952438354492, "learning_rate": 3.435279272374647e-05, "loss": 0.1157, "step": 70990 }, { "epoch": 2.580129369866996, "grad_norm": 0.8145487904548645, "learning_rate": 3.4347980033188203e-05, "loss": 0.0972, "step": 71000 }, { "epoch": 2.5804927683697945, "grad_norm": 0.3357942998409271, "learning_rate": 3.434316693985874e-05, "loss": 0.0952, "step": 71010 }, { "epoch": 2.5808561668725924, "grad_norm": 0.6499632000923157, "learning_rate": 3.433835344396546e-05, "loss": 0.2527, "step": 71020 }, { "epoch": 2.581219565375391, "grad_norm": 1.1719329357147217, "learning_rate": 3.4333539545715754e-05, "loss": 0.0954, "step": 71030 }, { "epoch": 2.581582963878189, "grad_norm": 2.707500457763672, "learning_rate": 3.432872524531704e-05, "loss": 0.1674, "step": 71040 }, { "epoch": 2.581946362380987, "grad_norm": 1.7513278722763062, "learning_rate": 3.432391054297674e-05, "loss": 0.0758, "step": 71050 }, { "epoch": 2.5823097608837853, "grad_norm": 0.9239100813865662, "learning_rate": 3.431909543890231e-05, "loss": 0.0919, "step": 71060 }, { "epoch": 2.5826731593865833, "grad_norm": 0.21719126403331757, "learning_rate": 3.431427993330122e-05, "loss": 0.1458, "step": 71070 }, { "epoch": 2.5830365578893817, "grad_norm": 0.7722142338752747, "learning_rate": 3.430946402638095e-05, "loss": 0.0977, "step": 71080 }, { "epoch": 2.5833999563921797, "grad_norm": 0.7812473773956299, "learning_rate": 3.430464771834899e-05, "loss": 0.1204, "step": 71090 }, { "epoch": 2.5837633548949777, "grad_norm": 1.0319454669952393, "learning_rate": 3.429983100941287e-05, "loss": 0.0921, "step": 71100 }, { "epoch": 2.584126753397776, "grad_norm": 0.8233940601348877, "learning_rate": 3.429501389978013e-05, "loss": 0.0931, "step": 71110 }, { "epoch": 2.584490151900574, "grad_norm": 0.5543156862258911, "learning_rate": 3.42901963896583e-05, "loss": 0.0936, "step": 71120 }, { "epoch": 2.5848535504033725, "grad_norm": 0.849062979221344, "learning_rate": 3.4285378479254964e-05, "loss": 0.1034, "step": 71130 }, { "epoch": 2.5852169489061705, "grad_norm": 0.7621930837631226, "learning_rate": 3.428056016877771e-05, "loss": 0.1219, "step": 71140 }, { "epoch": 2.5855803474089685, "grad_norm": 1.097886323928833, "learning_rate": 3.427574145843413e-05, "loss": 0.1034, "step": 71150 }, { "epoch": 2.585943745911767, "grad_norm": 1.2844264507293701, "learning_rate": 3.4270922348431866e-05, "loss": 0.0961, "step": 71160 }, { "epoch": 2.586307144414565, "grad_norm": 0.6416186094284058, "learning_rate": 3.4266102838978544e-05, "loss": 0.0982, "step": 71170 }, { "epoch": 2.5866705429173633, "grad_norm": 1.0426020622253418, "learning_rate": 3.426128293028181e-05, "loss": 0.108, "step": 71180 }, { "epoch": 2.5870339414201613, "grad_norm": 1.2115471363067627, "learning_rate": 3.425646262254935e-05, "loss": 0.0981, "step": 71190 }, { "epoch": 2.5873973399229593, "grad_norm": 1.362383484840393, "learning_rate": 3.425164191598885e-05, "loss": 0.0936, "step": 71200 }, { "epoch": 2.5877607384257577, "grad_norm": 1.0915354490280151, "learning_rate": 3.4246820810808025e-05, "loss": 0.0832, "step": 71210 }, { "epoch": 2.5881241369285557, "grad_norm": 1.0872890949249268, "learning_rate": 3.424199930721459e-05, "loss": 0.1135, "step": 71220 }, { "epoch": 2.588487535431354, "grad_norm": 0.5933959484100342, "learning_rate": 3.4237177405416276e-05, "loss": 0.0971, "step": 71230 }, { "epoch": 2.588850933934152, "grad_norm": 1.2194724082946777, "learning_rate": 3.423235510562086e-05, "loss": 0.1171, "step": 71240 }, { "epoch": 2.58921433243695, "grad_norm": 1.0390851497650146, "learning_rate": 3.422753240803612e-05, "loss": 1.326, "step": 71250 }, { "epoch": 2.5895777309397485, "grad_norm": 1.630076289176941, "learning_rate": 3.4222709312869825e-05, "loss": 0.0837, "step": 71260 }, { "epoch": 2.589941129442547, "grad_norm": 0.4006626307964325, "learning_rate": 3.421788582032981e-05, "loss": 0.1116, "step": 71270 }, { "epoch": 2.590304527945345, "grad_norm": 0.5512908697128296, "learning_rate": 3.4213061930623884e-05, "loss": 0.1169, "step": 71280 }, { "epoch": 2.590667926448143, "grad_norm": 1.0259326696395874, "learning_rate": 3.420823764395991e-05, "loss": 0.1221, "step": 71290 }, { "epoch": 2.5910313249509414, "grad_norm": 1.1377673149108887, "learning_rate": 3.420341296054574e-05, "loss": 0.1241, "step": 71300 }, { "epoch": 2.5913947234537393, "grad_norm": 0.9114333987236023, "learning_rate": 3.419858788058924e-05, "loss": 0.1012, "step": 71310 }, { "epoch": 2.5917581219565378, "grad_norm": 0.6368651390075684, "learning_rate": 3.4193762404298327e-05, "loss": 0.1096, "step": 71320 }, { "epoch": 2.5921215204593357, "grad_norm": 1.080757975578308, "learning_rate": 3.4188936531880894e-05, "loss": 0.1046, "step": 71330 }, { "epoch": 2.5924849189621337, "grad_norm": 11.998626708984375, "learning_rate": 3.418411026354489e-05, "loss": 0.2426, "step": 71340 }, { "epoch": 2.592848317464932, "grad_norm": 1.4404159784317017, "learning_rate": 3.417928359949824e-05, "loss": 0.0751, "step": 71350 }, { "epoch": 2.59321171596773, "grad_norm": 0.42481374740600586, "learning_rate": 3.417445653994893e-05, "loss": 0.0891, "step": 71360 }, { "epoch": 2.5935751144705286, "grad_norm": 0.6405854225158691, "learning_rate": 3.416962908510493e-05, "loss": 0.1497, "step": 71370 }, { "epoch": 2.5939385129733266, "grad_norm": 0.6601307392120361, "learning_rate": 3.416480123517424e-05, "loss": 0.1041, "step": 71380 }, { "epoch": 2.5943019114761245, "grad_norm": 6.7601318359375, "learning_rate": 3.415997299036486e-05, "loss": 0.1162, "step": 71390 }, { "epoch": 2.594665309978923, "grad_norm": 0.7878421545028687, "learning_rate": 3.415514435088485e-05, "loss": 0.089, "step": 71400 }, { "epoch": 2.594665309978923, "eval_loss": 0.32694903016090393, "eval_runtime": 180.1568, "eval_samples_per_second": 41.153, "eval_steps_per_second": 5.146, "eval_wer": 0.15186886198195582, "step": 71400 }, { "epoch": 2.595028708481721, "grad_norm": 0.8284702897071838, "learning_rate": 3.415031531694224e-05, "loss": 0.1116, "step": 71410 }, { "epoch": 2.5953921069845194, "grad_norm": 0.6205730438232422, "learning_rate": 3.41454858887451e-05, "loss": 0.1055, "step": 71420 }, { "epoch": 2.5957555054873174, "grad_norm": 0.6210823655128479, "learning_rate": 3.414065606650151e-05, "loss": 0.1, "step": 71430 }, { "epoch": 2.5961189039901154, "grad_norm": 1.2746903896331787, "learning_rate": 3.4135825850419576e-05, "loss": 0.0938, "step": 71440 }, { "epoch": 2.596482302492914, "grad_norm": 0.6673762202262878, "learning_rate": 3.4130995240707406e-05, "loss": 1.6946, "step": 71450 }, { "epoch": 2.5968457009957118, "grad_norm": 0.6017360687255859, "learning_rate": 3.4126164237573145e-05, "loss": 0.0974, "step": 71460 }, { "epoch": 2.59720909949851, "grad_norm": 0.5965964198112488, "learning_rate": 3.4121332841224926e-05, "loss": 0.1109, "step": 71470 }, { "epoch": 2.597572498001308, "grad_norm": 0.8033668398857117, "learning_rate": 3.411650105187094e-05, "loss": 0.1007, "step": 71480 }, { "epoch": 2.597935896504106, "grad_norm": 0.9280270338058472, "learning_rate": 3.411166886971936e-05, "loss": 0.1341, "step": 71490 }, { "epoch": 2.5982992950069046, "grad_norm": 0.6610667705535889, "learning_rate": 3.4106836294978386e-05, "loss": 0.088, "step": 71500 }, { "epoch": 2.5986626935097026, "grad_norm": 0.6038778424263, "learning_rate": 3.410200332785624e-05, "loss": 0.0901, "step": 71510 }, { "epoch": 2.599026092012501, "grad_norm": 0.673305094242096, "learning_rate": 3.409716996856115e-05, "loss": 0.1105, "step": 71520 }, { "epoch": 2.599389490515299, "grad_norm": 0.5786300301551819, "learning_rate": 3.409233621730139e-05, "loss": 0.0854, "step": 71530 }, { "epoch": 2.599752889018097, "grad_norm": 0.8676998615264893, "learning_rate": 3.40875020742852e-05, "loss": 0.1241, "step": 71540 }, { "epoch": 2.6001162875208954, "grad_norm": 1.203029990196228, "learning_rate": 3.4083151010791036e-05, "loss": 2.9622, "step": 71550 }, { "epoch": 2.600479686023694, "grad_norm": 0.34861287474632263, "learning_rate": 3.40783161240115e-05, "loss": 0.1029, "step": 71560 }, { "epoch": 2.600843084526492, "grad_norm": 0.649398148059845, "learning_rate": 3.407348084607961e-05, "loss": 0.1265, "step": 71570 }, { "epoch": 2.60120648302929, "grad_norm": 1.2412714958190918, "learning_rate": 3.406864517720373e-05, "loss": 0.0755, "step": 71580 }, { "epoch": 2.6015698815320882, "grad_norm": 0.3836827278137207, "learning_rate": 3.406380911759219e-05, "loss": 0.1238, "step": 71590 }, { "epoch": 2.601933280034886, "grad_norm": 0.7698721885681152, "learning_rate": 3.405897266745337e-05, "loss": 0.09, "step": 71600 }, { "epoch": 2.6022966785376846, "grad_norm": 3.227402448654175, "learning_rate": 3.4054135826995636e-05, "loss": 0.5652, "step": 71610 }, { "epoch": 2.6026600770404826, "grad_norm": 0.5352892279624939, "learning_rate": 3.4049298596427415e-05, "loss": 0.1299, "step": 71620 }, { "epoch": 2.6030234755432806, "grad_norm": 1.8280853033065796, "learning_rate": 3.40444609759571e-05, "loss": 0.0985, "step": 71630 }, { "epoch": 2.603386874046079, "grad_norm": 0.6967837810516357, "learning_rate": 3.403962296579316e-05, "loss": 0.1074, "step": 71640 }, { "epoch": 2.603750272548877, "grad_norm": 0.4568573832511902, "learning_rate": 3.403478456614402e-05, "loss": 0.0929, "step": 71650 }, { "epoch": 2.6041136710516755, "grad_norm": 0.7679555416107178, "learning_rate": 3.402994577721816e-05, "loss": 0.0802, "step": 71660 }, { "epoch": 2.6044770695544734, "grad_norm": 0.9601152539253235, "learning_rate": 3.402510659922407e-05, "loss": 0.1268, "step": 71670 }, { "epoch": 2.6048404680572714, "grad_norm": 0.4837740659713745, "learning_rate": 3.4020267032370245e-05, "loss": 0.172, "step": 71680 }, { "epoch": 2.60520386656007, "grad_norm": 0.4992314279079437, "learning_rate": 3.401542707686521e-05, "loss": 0.1196, "step": 71690 }, { "epoch": 2.605567265062868, "grad_norm": 0.8764163255691528, "learning_rate": 3.4010586732917495e-05, "loss": 0.4405, "step": 71700 }, { "epoch": 2.6059306635656663, "grad_norm": 1.7109190225601196, "learning_rate": 3.400574600073566e-05, "loss": 0.1298, "step": 71710 }, { "epoch": 2.6062940620684643, "grad_norm": 1.2784879207611084, "learning_rate": 3.4000904880528275e-05, "loss": 0.1156, "step": 71720 }, { "epoch": 2.6066574605712622, "grad_norm": 8.60650634765625, "learning_rate": 3.399606337250392e-05, "loss": 0.125, "step": 71730 }, { "epoch": 2.6070208590740607, "grad_norm": 0.4801369309425354, "learning_rate": 3.39912214768712e-05, "loss": 0.1208, "step": 71740 }, { "epoch": 2.6073842575768587, "grad_norm": 4.337435722351074, "learning_rate": 3.398637919383873e-05, "loss": 0.1001, "step": 71750 }, { "epoch": 2.607747656079657, "grad_norm": 0.7785841822624207, "learning_rate": 3.398153652361517e-05, "loss": 0.088, "step": 71760 }, { "epoch": 2.608111054582455, "grad_norm": 0.7540931105613708, "learning_rate": 3.3976693466409155e-05, "loss": 0.0973, "step": 71770 }, { "epoch": 2.608474453085253, "grad_norm": 0.8233292698860168, "learning_rate": 3.3971850022429354e-05, "loss": 0.1088, "step": 71780 }, { "epoch": 2.6088378515880515, "grad_norm": 3.5180065631866455, "learning_rate": 3.396700619188446e-05, "loss": 0.1259, "step": 71790 }, { "epoch": 2.6092012500908495, "grad_norm": 0.8610531687736511, "learning_rate": 3.396216197498317e-05, "loss": 0.0909, "step": 71800 }, { "epoch": 2.609564648593648, "grad_norm": 0.7979753613471985, "learning_rate": 3.395731737193421e-05, "loss": 0.0987, "step": 71810 }, { "epoch": 2.609928047096446, "grad_norm": 3.379258871078491, "learning_rate": 3.3952472382946313e-05, "loss": 0.0889, "step": 71820 }, { "epoch": 2.610291445599244, "grad_norm": 1.4110392332077026, "learning_rate": 3.394762700822824e-05, "loss": 0.0718, "step": 71830 }, { "epoch": 2.6106548441020423, "grad_norm": 0.41368210315704346, "learning_rate": 3.3942781247988754e-05, "loss": 0.0993, "step": 71840 }, { "epoch": 2.6110182426048407, "grad_norm": 0.6575911045074463, "learning_rate": 3.3937935102436636e-05, "loss": 0.0845, "step": 71850 }, { "epoch": 2.6113816411076387, "grad_norm": 0.545257568359375, "learning_rate": 3.39330885717807e-05, "loss": 0.0896, "step": 71860 }, { "epoch": 2.6117450396104367, "grad_norm": 3.2522082328796387, "learning_rate": 3.392824165622976e-05, "loss": 0.1424, "step": 71870 }, { "epoch": 2.612108438113235, "grad_norm": 1.4843670129776, "learning_rate": 3.392339435599265e-05, "loss": 0.0788, "step": 71880 }, { "epoch": 2.612471836616033, "grad_norm": 1.475480318069458, "learning_rate": 3.3918546671278235e-05, "loss": 0.1693, "step": 71890 }, { "epoch": 2.6128352351188315, "grad_norm": 0.9142501354217529, "learning_rate": 3.3913698602295376e-05, "loss": 0.0975, "step": 71900 }, { "epoch": 2.6131986336216295, "grad_norm": 1.1580731868743896, "learning_rate": 3.390885014925295e-05, "loss": 0.1173, "step": 71910 }, { "epoch": 2.6135620321244275, "grad_norm": 0.5489696860313416, "learning_rate": 3.3904001312359874e-05, "loss": 0.111, "step": 71920 }, { "epoch": 2.613925430627226, "grad_norm": 1.6776854991912842, "learning_rate": 3.3899152091825064e-05, "loss": 0.1057, "step": 71930 }, { "epoch": 2.614288829130024, "grad_norm": 0.8543124198913574, "learning_rate": 3.3894302487857446e-05, "loss": 0.1383, "step": 71940 }, { "epoch": 2.6146522276328223, "grad_norm": 0.4847543239593506, "learning_rate": 3.388945250066599e-05, "loss": 0.1408, "step": 71950 }, { "epoch": 2.6150156261356203, "grad_norm": 0.8432245254516602, "learning_rate": 3.388460213045965e-05, "loss": 0.1051, "step": 71960 }, { "epoch": 2.6153790246384183, "grad_norm": 0.6041918396949768, "learning_rate": 3.387975137744742e-05, "loss": 0.0875, "step": 71970 }, { "epoch": 2.6157424231412167, "grad_norm": 0.6646948456764221, "learning_rate": 3.387490024183829e-05, "loss": 0.0961, "step": 71980 }, { "epoch": 2.6161058216440147, "grad_norm": 1.7802671194076538, "learning_rate": 3.387004872384129e-05, "loss": 0.1365, "step": 71990 }, { "epoch": 2.616469220146813, "grad_norm": 5.307714462280273, "learning_rate": 3.3865196823665454e-05, "loss": 0.0987, "step": 72000 }, { "epoch": 2.616469220146813, "eval_loss": 0.3264125883579254, "eval_runtime": 179.4492, "eval_samples_per_second": 41.315, "eval_steps_per_second": 5.166, "eval_wer": 0.15330295714052317, "step": 72000 }, { "epoch": 2.616832618649611, "grad_norm": 2.6637954711914062, "learning_rate": 3.386034454151982e-05, "loss": 0.0894, "step": 72010 }, { "epoch": 2.617196017152409, "grad_norm": 1.1595145463943481, "learning_rate": 3.385549187761347e-05, "loss": 0.1152, "step": 72020 }, { "epoch": 2.6175594156552076, "grad_norm": 0.6032492518424988, "learning_rate": 3.3850638832155486e-05, "loss": 0.097, "step": 72030 }, { "epoch": 2.6179228141580055, "grad_norm": 0.9775734543800354, "learning_rate": 3.3845785405354955e-05, "loss": 0.109, "step": 72040 }, { "epoch": 2.618286212660804, "grad_norm": 1.095192790031433, "learning_rate": 3.384093159742102e-05, "loss": 3.8308, "step": 72050 }, { "epoch": 2.618649611163602, "grad_norm": 0.4287867248058319, "learning_rate": 3.383607740856278e-05, "loss": 0.109, "step": 72060 }, { "epoch": 2.6190130096664, "grad_norm": 0.8537576794624329, "learning_rate": 3.3831222838989416e-05, "loss": 0.1129, "step": 72070 }, { "epoch": 2.6193764081691984, "grad_norm": 0.6496450901031494, "learning_rate": 3.382636788891008e-05, "loss": 0.1073, "step": 72080 }, { "epoch": 2.6197398066719964, "grad_norm": 1.3108080625534058, "learning_rate": 3.382151255853396e-05, "loss": 0.0974, "step": 72090 }, { "epoch": 2.620103205174795, "grad_norm": 0.6950684785842896, "learning_rate": 3.381665684807024e-05, "loss": 0.0906, "step": 72100 }, { "epoch": 2.6204666036775928, "grad_norm": 0.5899102091789246, "learning_rate": 3.381180075772815e-05, "loss": 0.0891, "step": 72110 }, { "epoch": 2.6208300021803907, "grad_norm": 1.007866382598877, "learning_rate": 3.380694428771692e-05, "loss": 0.1396, "step": 72120 }, { "epoch": 2.621193400683189, "grad_norm": 1.360654592514038, "learning_rate": 3.38020874382458e-05, "loss": 0.1011, "step": 72130 }, { "epoch": 2.6215567991859876, "grad_norm": 0.9132879972457886, "learning_rate": 3.3797230209524046e-05, "loss": 0.1458, "step": 72140 }, { "epoch": 2.6219201976887856, "grad_norm": 0.9610080122947693, "learning_rate": 3.379237260176093e-05, "loss": 0.0807, "step": 72150 }, { "epoch": 2.6222835961915836, "grad_norm": 0.8496606349945068, "learning_rate": 3.378751461516578e-05, "loss": 0.1041, "step": 72160 }, { "epoch": 2.622646994694382, "grad_norm": 0.5927808284759521, "learning_rate": 3.3782656249947894e-05, "loss": 1.5747, "step": 72170 }, { "epoch": 2.62301039319718, "grad_norm": 0.6998677849769592, "learning_rate": 3.3777797506316586e-05, "loss": 0.1019, "step": 72180 }, { "epoch": 2.6233737916999784, "grad_norm": 0.7553665637969971, "learning_rate": 3.3772938384481225e-05, "loss": 0.1208, "step": 72190 }, { "epoch": 2.6237371902027764, "grad_norm": 0.3797171115875244, "learning_rate": 3.376807888465116e-05, "loss": 0.1391, "step": 72200 }, { "epoch": 2.6241005887055744, "grad_norm": 0.8174279928207397, "learning_rate": 3.376321900703576e-05, "loss": 0.0969, "step": 72210 }, { "epoch": 2.624463987208373, "grad_norm": 3.2531015872955322, "learning_rate": 3.375835875184445e-05, "loss": 0.1024, "step": 72220 }, { "epoch": 2.624827385711171, "grad_norm": 1.0118309259414673, "learning_rate": 3.3753498119286616e-05, "loss": 0.1033, "step": 72230 }, { "epoch": 2.6251907842139692, "grad_norm": 1.3845003843307495, "learning_rate": 3.374863710957169e-05, "loss": 0.0972, "step": 72240 }, { "epoch": 2.625554182716767, "grad_norm": 0.5679929256439209, "learning_rate": 3.3743775722909124e-05, "loss": 0.0885, "step": 72250 }, { "epoch": 2.625917581219565, "grad_norm": 0.30551066994667053, "learning_rate": 3.373891395950838e-05, "loss": 0.0825, "step": 72260 }, { "epoch": 2.6262809797223636, "grad_norm": 0.40965649485588074, "learning_rate": 3.373405181957891e-05, "loss": 0.1317, "step": 72270 }, { "epoch": 2.6266443782251616, "grad_norm": 0.6095037460327148, "learning_rate": 3.3729189303330236e-05, "loss": 0.0956, "step": 72280 }, { "epoch": 2.62700777672796, "grad_norm": 0.6658949851989746, "learning_rate": 3.3724326410971844e-05, "loss": 0.7257, "step": 72290 }, { "epoch": 2.627371175230758, "grad_norm": 3.251826524734497, "learning_rate": 3.371946314271327e-05, "loss": 0.1154, "step": 72300 }, { "epoch": 2.627734573733556, "grad_norm": 0.6652829051017761, "learning_rate": 3.371459949876406e-05, "loss": 0.0783, "step": 72310 }, { "epoch": 2.6280979722363544, "grad_norm": 7.321001052856445, "learning_rate": 3.370973547933376e-05, "loss": 0.1234, "step": 72320 }, { "epoch": 2.6284613707391524, "grad_norm": 0.5794792771339417, "learning_rate": 3.370487108463195e-05, "loss": 0.0981, "step": 72330 }, { "epoch": 2.628824769241951, "grad_norm": 2.157670736312866, "learning_rate": 3.370000631486822e-05, "loss": 0.1266, "step": 72340 }, { "epoch": 2.629188167744749, "grad_norm": 0.7778168320655823, "learning_rate": 3.369514117025216e-05, "loss": 0.1093, "step": 72350 }, { "epoch": 2.629551566247547, "grad_norm": 0.4507717490196228, "learning_rate": 3.3690275650993416e-05, "loss": 0.0935, "step": 72360 }, { "epoch": 2.6299149647503453, "grad_norm": 0.2999439835548401, "learning_rate": 3.36854097573016e-05, "loss": 0.1175, "step": 72370 }, { "epoch": 2.6302783632531432, "grad_norm": 0.6613568663597107, "learning_rate": 3.36805434893864e-05, "loss": 0.0982, "step": 72380 }, { "epoch": 2.6306417617559417, "grad_norm": 2.211897611618042, "learning_rate": 3.367567684745745e-05, "loss": 0.0877, "step": 72390 }, { "epoch": 2.6310051602587397, "grad_norm": 2.100520372390747, "learning_rate": 3.367080983172446e-05, "loss": 0.1534, "step": 72400 }, { "epoch": 2.6313685587615376, "grad_norm": 0.6530410647392273, "learning_rate": 3.366594244239713e-05, "loss": 0.0819, "step": 72410 }, { "epoch": 2.631731957264336, "grad_norm": 0.8508390188217163, "learning_rate": 3.366107467968517e-05, "loss": 1.7912, "step": 72420 }, { "epoch": 2.6320953557671345, "grad_norm": 2.8727540969848633, "learning_rate": 3.365620654379831e-05, "loss": 0.1243, "step": 72430 }, { "epoch": 2.6324587542699325, "grad_norm": 0.42828473448753357, "learning_rate": 3.3651338034946314e-05, "loss": 0.1423, "step": 72440 }, { "epoch": 2.6328221527727305, "grad_norm": 0.5628288388252258, "learning_rate": 3.364646915333895e-05, "loss": 0.4869, "step": 72450 }, { "epoch": 2.633185551275529, "grad_norm": 0.48309531807899475, "learning_rate": 3.364159989918598e-05, "loss": 0.095, "step": 72460 }, { "epoch": 2.633548949778327, "grad_norm": 0.5635913014411926, "learning_rate": 3.3636730272697234e-05, "loss": 0.0948, "step": 72470 }, { "epoch": 2.6339123482811253, "grad_norm": 0.7630922198295593, "learning_rate": 3.3631860274082504e-05, "loss": 0.108, "step": 72480 }, { "epoch": 2.6342757467839233, "grad_norm": 1.6271787881851196, "learning_rate": 3.3626989903551626e-05, "loss": 0.1169, "step": 72490 }, { "epoch": 2.6346391452867213, "grad_norm": 1.0612133741378784, "learning_rate": 3.3622119161314446e-05, "loss": 0.1574, "step": 72500 }, { "epoch": 2.6350025437895197, "grad_norm": 0.5140257477760315, "learning_rate": 3.361724804758083e-05, "loss": 0.0765, "step": 72510 }, { "epoch": 2.6353659422923177, "grad_norm": 1.7995847463607788, "learning_rate": 3.361237656256066e-05, "loss": 0.1069, "step": 72520 }, { "epoch": 2.635729340795116, "grad_norm": 0.6248586177825928, "learning_rate": 3.360750470646383e-05, "loss": 0.0842, "step": 72530 }, { "epoch": 2.636092739297914, "grad_norm": 0.8050362467765808, "learning_rate": 3.360263247950023e-05, "loss": 0.2037, "step": 72540 }, { "epoch": 2.636456137800712, "grad_norm": 1.0852928161621094, "learning_rate": 3.359775988187983e-05, "loss": 0.0985, "step": 72550 }, { "epoch": 2.6368195363035105, "grad_norm": 0.591783344745636, "learning_rate": 3.359288691381253e-05, "loss": 0.0948, "step": 72560 }, { "epoch": 2.6371829348063085, "grad_norm": 0.6417847275733948, "learning_rate": 3.358801357550831e-05, "loss": 0.1011, "step": 72570 }, { "epoch": 2.637546333309107, "grad_norm": 0.8405566811561584, "learning_rate": 3.358313986717714e-05, "loss": 0.1016, "step": 72580 }, { "epoch": 2.637909731811905, "grad_norm": 1.3981549739837646, "learning_rate": 3.357826578902901e-05, "loss": 0.1164, "step": 72590 }, { "epoch": 2.638273130314703, "grad_norm": 0.6139928698539734, "learning_rate": 3.357339134127393e-05, "loss": 0.091, "step": 72600 }, { "epoch": 2.638273130314703, "eval_loss": 0.3287167251110077, "eval_runtime": 180.0474, "eval_samples_per_second": 41.178, "eval_steps_per_second": 5.149, "eval_wer": 0.15324849783070416, "step": 72600 }, { "epoch": 2.6386365288175013, "grad_norm": 0.5749704241752625, "learning_rate": 3.356851652412193e-05, "loss": 0.1381, "step": 72610 }, { "epoch": 2.6389999273202993, "grad_norm": 0.48276287317276, "learning_rate": 3.3563641337783035e-05, "loss": 0.1009, "step": 72620 }, { "epoch": 2.6393633258230977, "grad_norm": 1.9593979120254517, "learning_rate": 3.35587657824673e-05, "loss": 0.1092, "step": 72630 }, { "epoch": 2.6397267243258957, "grad_norm": 1.3498990535736084, "learning_rate": 3.35538898583848e-05, "loss": 0.1033, "step": 72640 }, { "epoch": 2.6400901228286937, "grad_norm": 4.862055778503418, "learning_rate": 3.354901356574563e-05, "loss": 0.0958, "step": 72650 }, { "epoch": 2.640453521331492, "grad_norm": 0.3533124029636383, "learning_rate": 3.354413690475987e-05, "loss": 0.0717, "step": 72660 }, { "epoch": 2.64081691983429, "grad_norm": 1.32888662815094, "learning_rate": 3.3539259875637664e-05, "loss": 0.1151, "step": 72670 }, { "epoch": 2.6411803183370886, "grad_norm": 0.8028721213340759, "learning_rate": 3.353438247858912e-05, "loss": 0.1194, "step": 72680 }, { "epoch": 2.6415437168398865, "grad_norm": 0.8875879645347595, "learning_rate": 3.352950471382441e-05, "loss": 0.1026, "step": 72690 }, { "epoch": 2.6419071153426845, "grad_norm": 0.956411600112915, "learning_rate": 3.3524626581553684e-05, "loss": 0.0575, "step": 72700 }, { "epoch": 2.642270513845483, "grad_norm": 2.3979716300964355, "learning_rate": 3.351974808198713e-05, "loss": 0.1078, "step": 72710 }, { "epoch": 2.6426339123482814, "grad_norm": 3.241748571395874, "learning_rate": 3.351486921533495e-05, "loss": 0.1079, "step": 72720 }, { "epoch": 2.6429973108510794, "grad_norm": 0.5039170384407043, "learning_rate": 3.350998998180735e-05, "loss": 0.112, "step": 72730 }, { "epoch": 2.6433607093538773, "grad_norm": 1.3268622159957886, "learning_rate": 3.350511038161456e-05, "loss": 0.115, "step": 72740 }, { "epoch": 2.6437241078566758, "grad_norm": 0.8192645907402039, "learning_rate": 3.350023041496682e-05, "loss": 0.0873, "step": 72750 }, { "epoch": 2.6440875063594738, "grad_norm": 0.7651393413543701, "learning_rate": 3.34953500820744e-05, "loss": 0.0978, "step": 72760 }, { "epoch": 2.644450904862272, "grad_norm": 3.417607069015503, "learning_rate": 3.3490469383147564e-05, "loss": 0.1212, "step": 72770 }, { "epoch": 2.64481430336507, "grad_norm": 6.00585412979126, "learning_rate": 3.348558831839661e-05, "loss": 0.1212, "step": 72780 }, { "epoch": 2.645177701867868, "grad_norm": 0.36312735080718994, "learning_rate": 3.3480706888031865e-05, "loss": 0.1236, "step": 72790 }, { "epoch": 2.6455411003706666, "grad_norm": 0.6871273517608643, "learning_rate": 3.347582509226362e-05, "loss": 0.0875, "step": 72800 }, { "epoch": 2.6459044988734646, "grad_norm": 1.383457899093628, "learning_rate": 3.3470942931302236e-05, "loss": 0.1213, "step": 72810 }, { "epoch": 2.646267897376263, "grad_norm": 0.7942748665809631, "learning_rate": 3.346606040535805e-05, "loss": 0.0837, "step": 72820 }, { "epoch": 2.646631295879061, "grad_norm": 1.3881112337112427, "learning_rate": 3.346117751464146e-05, "loss": 0.085, "step": 72830 }, { "epoch": 2.646994694381859, "grad_norm": 2.80416202545166, "learning_rate": 3.345629425936283e-05, "loss": 4.2814, "step": 72840 }, { "epoch": 2.6473580928846574, "grad_norm": 0.7120780348777771, "learning_rate": 3.345141063973256e-05, "loss": 0.0874, "step": 72850 }, { "epoch": 2.6477214913874554, "grad_norm": 1.7287395000457764, "learning_rate": 3.344652665596108e-05, "loss": 0.1066, "step": 72860 }, { "epoch": 2.648084889890254, "grad_norm": 0.6621074080467224, "learning_rate": 3.344164230825882e-05, "loss": 0.1096, "step": 72870 }, { "epoch": 2.648448288393052, "grad_norm": 1.1361407041549683, "learning_rate": 3.343675759683623e-05, "loss": 0.1008, "step": 72880 }, { "epoch": 2.64881168689585, "grad_norm": 0.6596959829330444, "learning_rate": 3.3431872521903766e-05, "loss": 0.1222, "step": 72890 }, { "epoch": 2.649175085398648, "grad_norm": 1.1007713079452515, "learning_rate": 3.342698708367192e-05, "loss": 0.0837, "step": 72900 }, { "epoch": 2.649538483901446, "grad_norm": 0.754059910774231, "learning_rate": 3.342210128235119e-05, "loss": 0.1006, "step": 72910 }, { "epoch": 2.6499018824042446, "grad_norm": 0.3821747303009033, "learning_rate": 3.341721511815208e-05, "loss": 0.1176, "step": 72920 }, { "epoch": 2.6502652809070426, "grad_norm": 1.3366892337799072, "learning_rate": 3.341232859128511e-05, "loss": 0.0974, "step": 72930 }, { "epoch": 2.6506286794098406, "grad_norm": 0.8863315582275391, "learning_rate": 3.340744170196084e-05, "loss": 0.1181, "step": 72940 }, { "epoch": 2.650992077912639, "grad_norm": 0.8968802094459534, "learning_rate": 3.3402554450389826e-05, "loss": 0.1012, "step": 72950 }, { "epoch": 2.651355476415437, "grad_norm": 0.6080183982849121, "learning_rate": 3.339766683678262e-05, "loss": 0.0997, "step": 72960 }, { "epoch": 2.6517188749182354, "grad_norm": 0.8404228687286377, "learning_rate": 3.339277886134985e-05, "loss": 0.098, "step": 72970 }, { "epoch": 2.6520822734210334, "grad_norm": 0.5841569900512695, "learning_rate": 3.338789052430208e-05, "loss": 0.1032, "step": 72980 }, { "epoch": 2.6524456719238314, "grad_norm": 1.0763466358184814, "learning_rate": 3.3383001825849966e-05, "loss": 0.0971, "step": 72990 }, { "epoch": 2.65280907042663, "grad_norm": 2.3036322593688965, "learning_rate": 3.337811276620412e-05, "loss": 1.71, "step": 73000 }, { "epoch": 2.6531724689294283, "grad_norm": 0.29687023162841797, "learning_rate": 3.337322334557521e-05, "loss": 0.1814, "step": 73010 }, { "epoch": 2.6535358674322262, "grad_norm": 1.0260635614395142, "learning_rate": 3.3368333564173905e-05, "loss": 0.1138, "step": 73020 }, { "epoch": 2.6538992659350242, "grad_norm": 1.9731409549713135, "learning_rate": 3.3363443422210875e-05, "loss": 0.1022, "step": 73030 }, { "epoch": 2.6542626644378227, "grad_norm": 1.9720983505249023, "learning_rate": 3.335855291989682e-05, "loss": 0.1169, "step": 73040 }, { "epoch": 2.6546260629406206, "grad_norm": 0.8619999885559082, "learning_rate": 3.335366205744246e-05, "loss": 0.0857, "step": 73050 }, { "epoch": 2.654989461443419, "grad_norm": 1.4044042825698853, "learning_rate": 3.334877083505853e-05, "loss": 0.0945, "step": 73060 }, { "epoch": 2.655352859946217, "grad_norm": 0.6870516538619995, "learning_rate": 3.3343879252955765e-05, "loss": 0.0916, "step": 73070 }, { "epoch": 2.655716258449015, "grad_norm": 1.0098764896392822, "learning_rate": 3.3338987311344935e-05, "loss": 0.1125, "step": 73080 }, { "epoch": 2.6560796569518135, "grad_norm": 3.241283655166626, "learning_rate": 3.333409501043681e-05, "loss": 0.1192, "step": 73090 }, { "epoch": 2.6564430554546115, "grad_norm": 0.5080071687698364, "learning_rate": 3.332920235044219e-05, "loss": 0.0923, "step": 73100 }, { "epoch": 2.65680645395741, "grad_norm": 0.7751922607421875, "learning_rate": 3.332430933157187e-05, "loss": 0.1025, "step": 73110 }, { "epoch": 2.657169852460208, "grad_norm": 1.3310539722442627, "learning_rate": 3.3319415954036674e-05, "loss": 0.1309, "step": 73120 }, { "epoch": 2.657533250963006, "grad_norm": 2.5508885383605957, "learning_rate": 3.331452221804745e-05, "loss": 0.1121, "step": 73130 }, { "epoch": 2.6578966494658043, "grad_norm": 4.273713111877441, "learning_rate": 3.330962812381505e-05, "loss": 0.1043, "step": 73140 }, { "epoch": 2.6582600479686023, "grad_norm": 0.32448074221611023, "learning_rate": 3.3304733671550336e-05, "loss": 0.865, "step": 73150 }, { "epoch": 2.6586234464714007, "grad_norm": 0.39604759216308594, "learning_rate": 3.329983886146419e-05, "loss": 0.0736, "step": 73160 }, { "epoch": 2.6589868449741987, "grad_norm": 2.2893431186676025, "learning_rate": 3.3294943693767536e-05, "loss": 0.1108, "step": 73170 }, { "epoch": 2.6593502434769967, "grad_norm": 1.7954707145690918, "learning_rate": 3.3290048168671256e-05, "loss": 0.0905, "step": 73180 }, { "epoch": 2.659713641979795, "grad_norm": 0.5648924708366394, "learning_rate": 3.3285152286386305e-05, "loss": 0.1076, "step": 73190 }, { "epoch": 2.660077040482593, "grad_norm": 0.6963376998901367, "learning_rate": 3.3280256047123614e-05, "loss": 0.0812, "step": 73200 }, { "epoch": 2.660077040482593, "eval_loss": 0.3232385516166687, "eval_runtime": 181.4857, "eval_samples_per_second": 40.852, "eval_steps_per_second": 5.108, "eval_wer": 0.1502078530324759, "step": 73200 }, { "epoch": 2.6604404389853915, "grad_norm": 15.696502685546875, "learning_rate": 3.3275359451094157e-05, "loss": 0.0925, "step": 73210 }, { "epoch": 2.6608038374881895, "grad_norm": 0.43037521839141846, "learning_rate": 3.327046249850891e-05, "loss": 0.1148, "step": 73220 }, { "epoch": 2.6611672359909875, "grad_norm": 2.093749523162842, "learning_rate": 3.326556518957885e-05, "loss": 0.098, "step": 73230 }, { "epoch": 2.661530634493786, "grad_norm": 0.5571810603141785, "learning_rate": 3.3260667524514996e-05, "loss": 0.1274, "step": 73240 }, { "epoch": 2.661894032996584, "grad_norm": 0.46131211519241333, "learning_rate": 3.3255769503528374e-05, "loss": 0.0872, "step": 73250 }, { "epoch": 2.6622574314993823, "grad_norm": 2.3554348945617676, "learning_rate": 3.325087112683002e-05, "loss": 0.1181, "step": 73260 }, { "epoch": 2.6626208300021803, "grad_norm": 0.8151417970657349, "learning_rate": 3.324597239463097e-05, "loss": 0.1176, "step": 73270 }, { "epoch": 2.6629842285049783, "grad_norm": 0.97243332862854, "learning_rate": 3.324107330714233e-05, "loss": 0.1183, "step": 73280 }, { "epoch": 2.6633476270077767, "grad_norm": 0.8408851623535156, "learning_rate": 3.3236173864575154e-05, "loss": 0.1119, "step": 73290 }, { "epoch": 2.663711025510575, "grad_norm": 0.8413365483283997, "learning_rate": 3.323127406714055e-05, "loss": 0.1135, "step": 73300 }, { "epoch": 2.664074424013373, "grad_norm": 1.1940609216690063, "learning_rate": 3.3226373915049636e-05, "loss": 0.114, "step": 73310 }, { "epoch": 2.664437822516171, "grad_norm": 0.761283814907074, "learning_rate": 3.3221473408513534e-05, "loss": 0.0885, "step": 73320 }, { "epoch": 2.6648012210189695, "grad_norm": 7.94679594039917, "learning_rate": 3.3216572547743396e-05, "loss": 0.0983, "step": 73330 }, { "epoch": 2.6651646195217675, "grad_norm": 0.7168159484863281, "learning_rate": 3.321167133295038e-05, "loss": 0.1152, "step": 73340 }, { "epoch": 2.665528018024566, "grad_norm": 1.2092477083206177, "learning_rate": 3.3206769764345676e-05, "loss": 0.0965, "step": 73350 }, { "epoch": 2.665891416527364, "grad_norm": 0.47513625025749207, "learning_rate": 3.320186784214045e-05, "loss": 0.4253, "step": 73360 }, { "epoch": 2.666254815030162, "grad_norm": 1.22835111618042, "learning_rate": 3.319696556654592e-05, "loss": 0.1073, "step": 73370 }, { "epoch": 2.6666182135329604, "grad_norm": 0.9767407774925232, "learning_rate": 3.319206293777332e-05, "loss": 0.1118, "step": 73380 }, { "epoch": 2.6669816120357583, "grad_norm": 0.8150458335876465, "learning_rate": 3.318715995603387e-05, "loss": 0.0999, "step": 73390 }, { "epoch": 2.6673450105385568, "grad_norm": 0.8495771288871765, "learning_rate": 3.3182256621538826e-05, "loss": 0.0753, "step": 73400 }, { "epoch": 2.6677084090413548, "grad_norm": 1.7628092765808105, "learning_rate": 3.317735293449946e-05, "loss": 0.1248, "step": 73410 }, { "epoch": 2.6680718075441527, "grad_norm": 0.7195779085159302, "learning_rate": 3.317244889512704e-05, "loss": 0.1246, "step": 73420 }, { "epoch": 2.668435206046951, "grad_norm": 1.0770323276519775, "learning_rate": 3.316754450363289e-05, "loss": 0.0859, "step": 73430 }, { "epoch": 2.668798604549749, "grad_norm": 0.6442772746086121, "learning_rate": 3.31626397602283e-05, "loss": 0.1092, "step": 73440 }, { "epoch": 2.6691620030525476, "grad_norm": 0.9785648584365845, "learning_rate": 3.315773466512461e-05, "loss": 0.1202, "step": 73450 }, { "epoch": 2.6695254015553456, "grad_norm": 0.7215647101402283, "learning_rate": 3.315282921853316e-05, "loss": 0.1158, "step": 73460 }, { "epoch": 2.6698888000581436, "grad_norm": 0.5394707918167114, "learning_rate": 3.31479234206653e-05, "loss": 0.1174, "step": 73470 }, { "epoch": 2.670252198560942, "grad_norm": 0.6166718602180481, "learning_rate": 3.3143017271732416e-05, "loss": 0.1115, "step": 73480 }, { "epoch": 2.67061559706374, "grad_norm": 0.7951743006706238, "learning_rate": 3.3138110771945876e-05, "loss": 0.0974, "step": 73490 }, { "epoch": 2.6709789955665384, "grad_norm": 1.5483492612838745, "learning_rate": 3.313320392151711e-05, "loss": 0.0771, "step": 73500 }, { "epoch": 2.6713423940693364, "grad_norm": 0.6423888206481934, "learning_rate": 3.3128296720657524e-05, "loss": 0.0939, "step": 73510 }, { "epoch": 2.6717057925721344, "grad_norm": 0.5538840293884277, "learning_rate": 3.3123389169578556e-05, "loss": 0.108, "step": 73520 }, { "epoch": 2.672069191074933, "grad_norm": 0.5549068450927734, "learning_rate": 3.311848126849165e-05, "loss": 0.1466, "step": 73530 }, { "epoch": 2.6724325895777308, "grad_norm": 0.7564308643341064, "learning_rate": 3.311357301760827e-05, "loss": 0.1241, "step": 73540 }, { "epoch": 2.672795988080529, "grad_norm": 0.683080792427063, "learning_rate": 3.310866441713989e-05, "loss": 0.0924, "step": 73550 }, { "epoch": 2.673159386583327, "grad_norm": 0.5799837708473206, "learning_rate": 3.3103755467298024e-05, "loss": 0.1, "step": 73560 }, { "epoch": 2.673522785086125, "grad_norm": 0.9366778135299683, "learning_rate": 3.309884616829416e-05, "loss": 0.0942, "step": 73570 }, { "epoch": 2.6738861835889236, "grad_norm": 2.1698505878448486, "learning_rate": 3.309393652033984e-05, "loss": 0.1085, "step": 73580 }, { "epoch": 2.674249582091722, "grad_norm": 0.8438801169395447, "learning_rate": 3.308902652364658e-05, "loss": 1.4071, "step": 73590 }, { "epoch": 2.67461298059452, "grad_norm": 0.5139632225036621, "learning_rate": 3.308411617842595e-05, "loss": 0.0993, "step": 73600 }, { "epoch": 2.674976379097318, "grad_norm": 0.5203242301940918, "learning_rate": 3.3079205484889534e-05, "loss": 0.0798, "step": 73610 }, { "epoch": 2.6753397776001164, "grad_norm": 0.8717512488365173, "learning_rate": 3.307429444324888e-05, "loss": 0.1186, "step": 73620 }, { "epoch": 2.6757031761029144, "grad_norm": 2.0422654151916504, "learning_rate": 3.3069383053715617e-05, "loss": 0.1053, "step": 73630 }, { "epoch": 2.676066574605713, "grad_norm": 1.5940972566604614, "learning_rate": 3.306447131650135e-05, "loss": 0.129, "step": 73640 }, { "epoch": 2.676429973108511, "grad_norm": 0.8113188743591309, "learning_rate": 3.30595592318177e-05, "loss": 0.0888, "step": 73650 }, { "epoch": 2.676793371611309, "grad_norm": 0.5296781063079834, "learning_rate": 3.305464679987632e-05, "loss": 0.0927, "step": 73660 }, { "epoch": 2.6771567701141072, "grad_norm": 1.491728663444519, "learning_rate": 3.304973402088887e-05, "loss": 0.1386, "step": 73670 }, { "epoch": 2.6775201686169052, "grad_norm": 0.6483957767486572, "learning_rate": 3.304482089506703e-05, "loss": 0.0994, "step": 73680 }, { "epoch": 2.6778835671197037, "grad_norm": 1.6856306791305542, "learning_rate": 3.303990742262247e-05, "loss": 0.1227, "step": 73690 }, { "epoch": 2.6782469656225016, "grad_norm": 1.752510905265808, "learning_rate": 3.3034993603766906e-05, "loss": 0.0956, "step": 73700 }, { "epoch": 2.6786103641252996, "grad_norm": 0.6596788167953491, "learning_rate": 3.303007943871206e-05, "loss": 0.1126, "step": 73710 }, { "epoch": 2.678973762628098, "grad_norm": 1.1651583909988403, "learning_rate": 3.302516492766966e-05, "loss": 0.1116, "step": 73720 }, { "epoch": 2.679337161130896, "grad_norm": 25.031896591186523, "learning_rate": 3.302025007085146e-05, "loss": 0.1139, "step": 73730 }, { "epoch": 2.6797005596336945, "grad_norm": 0.4031795561313629, "learning_rate": 3.301533486846922e-05, "loss": 0.0996, "step": 73740 }, { "epoch": 2.6800639581364925, "grad_norm": 0.6333705186843872, "learning_rate": 3.301041932073472e-05, "loss": 0.4087, "step": 73750 }, { "epoch": 2.6804273566392904, "grad_norm": 0.9752139449119568, "learning_rate": 3.300550342785975e-05, "loss": 0.1121, "step": 73760 }, { "epoch": 2.680790755142089, "grad_norm": 0.5102497339248657, "learning_rate": 3.300058719005612e-05, "loss": 0.1125, "step": 73770 }, { "epoch": 2.681154153644887, "grad_norm": 0.661267876625061, "learning_rate": 3.299567060753565e-05, "loss": 0.116, "step": 73780 }, { "epoch": 2.6815175521476853, "grad_norm": 4.50182580947876, "learning_rate": 3.2990753680510194e-05, "loss": 0.1047, "step": 73790 }, { "epoch": 2.6818809506504833, "grad_norm": 1.0736654996871948, "learning_rate": 3.298583640919159e-05, "loss": 0.0967, "step": 73800 }, { "epoch": 2.6818809506504833, "eval_loss": 0.3250260353088379, "eval_runtime": 180.3631, "eval_samples_per_second": 41.106, "eval_steps_per_second": 5.14, "eval_wer": 0.150552761994663, "step": 73800 }, { "epoch": 2.6822443491532812, "grad_norm": 0.7733132839202881, "learning_rate": 3.2980918793791706e-05, "loss": 0.0863, "step": 73810 }, { "epoch": 2.6826077476560797, "grad_norm": 1.0003288984298706, "learning_rate": 3.2976000834522424e-05, "loss": 0.1159, "step": 73820 }, { "epoch": 2.6829711461588777, "grad_norm": 1.0312319993972778, "learning_rate": 3.2971082531595646e-05, "loss": 0.1081, "step": 73830 }, { "epoch": 2.683334544661676, "grad_norm": 0.9150594472885132, "learning_rate": 3.2966163885223286e-05, "loss": 0.1042, "step": 73840 }, { "epoch": 2.683697943164474, "grad_norm": 1.0201760530471802, "learning_rate": 3.2961244895617265e-05, "loss": 0.0894, "step": 73850 }, { "epoch": 2.684061341667272, "grad_norm": 0.7284217476844788, "learning_rate": 3.295632556298953e-05, "loss": 0.0844, "step": 73860 }, { "epoch": 2.6844247401700705, "grad_norm": 0.5448662042617798, "learning_rate": 3.2951405887552033e-05, "loss": 0.0978, "step": 73870 }, { "epoch": 2.684788138672869, "grad_norm": 0.558380126953125, "learning_rate": 3.294648586951675e-05, "loss": 0.1168, "step": 73880 }, { "epoch": 2.685151537175667, "grad_norm": 0.5779474377632141, "learning_rate": 3.2941565509095676e-05, "loss": 0.0737, "step": 73890 }, { "epoch": 2.685514935678465, "grad_norm": 1.018198847770691, "learning_rate": 3.293664480650078e-05, "loss": 0.0783, "step": 73900 }, { "epoch": 2.6858783341812633, "grad_norm": 1.5002427101135254, "learning_rate": 3.293172376194411e-05, "loss": 0.103, "step": 73910 }, { "epoch": 2.6862417326840613, "grad_norm": 0.6482456922531128, "learning_rate": 3.2926802375637675e-05, "loss": 0.1128, "step": 73920 }, { "epoch": 2.6866051311868597, "grad_norm": 0.38328006863594055, "learning_rate": 3.292188064779354e-05, "loss": 0.09, "step": 73930 }, { "epoch": 2.6869685296896577, "grad_norm": 0.3640558421611786, "learning_rate": 3.2916958578623746e-05, "loss": 0.0808, "step": 73940 }, { "epoch": 2.6873319281924557, "grad_norm": 0.7267985343933105, "learning_rate": 3.2912036168340376e-05, "loss": 0.0819, "step": 73950 }, { "epoch": 2.687695326695254, "grad_norm": 0.9190502166748047, "learning_rate": 3.2907113417155525e-05, "loss": 0.0973, "step": 73960 }, { "epoch": 2.688058725198052, "grad_norm": 1.3987020254135132, "learning_rate": 3.290219032528128e-05, "loss": 0.0987, "step": 73970 }, { "epoch": 2.6884221237008505, "grad_norm": 0.7504600286483765, "learning_rate": 3.289726689292978e-05, "loss": 0.1197, "step": 73980 }, { "epoch": 2.6887855222036485, "grad_norm": 0.8399171829223633, "learning_rate": 3.2892343120313144e-05, "loss": 0.1476, "step": 73990 }, { "epoch": 2.6891489207064465, "grad_norm": 0.37751835584640503, "learning_rate": 3.288741900764353e-05, "loss": 0.0819, "step": 74000 }, { "epoch": 2.689512319209245, "grad_norm": 49.60390853881836, "learning_rate": 3.288249455513308e-05, "loss": 0.8904, "step": 74010 }, { "epoch": 2.689875717712043, "grad_norm": 0.541854977607727, "learning_rate": 3.2877569762994e-05, "loss": 0.1234, "step": 74020 }, { "epoch": 2.6902391162148414, "grad_norm": 1.2786191701889038, "learning_rate": 3.2872644631438466e-05, "loss": 0.0906, "step": 74030 }, { "epoch": 2.6906025147176393, "grad_norm": 0.8454063534736633, "learning_rate": 3.2867719160678676e-05, "loss": 0.1212, "step": 74040 }, { "epoch": 2.6909659132204373, "grad_norm": 0.7831797003746033, "learning_rate": 3.286279335092687e-05, "loss": 0.0792, "step": 74050 }, { "epoch": 2.6913293117232358, "grad_norm": 0.46578606963157654, "learning_rate": 3.285786720239526e-05, "loss": 0.0707, "step": 74060 }, { "epoch": 2.6916927102260337, "grad_norm": 0.9215951561927795, "learning_rate": 3.285294071529613e-05, "loss": 0.1329, "step": 74070 }, { "epoch": 2.692056108728832, "grad_norm": 0.5061529278755188, "learning_rate": 3.284801388984171e-05, "loss": 0.1024, "step": 74080 }, { "epoch": 2.69241950723163, "grad_norm": 1.1045541763305664, "learning_rate": 3.2843086726244307e-05, "loss": 0.0937, "step": 74090 }, { "epoch": 2.692782905734428, "grad_norm": 0.4563618004322052, "learning_rate": 3.28381592247162e-05, "loss": 0.0796, "step": 74100 }, { "epoch": 2.6931463042372266, "grad_norm": 0.6136996150016785, "learning_rate": 3.28332313854697e-05, "loss": 0.0797, "step": 74110 }, { "epoch": 2.6935097027400245, "grad_norm": 0.3746108114719391, "learning_rate": 3.2828303208717125e-05, "loss": 0.0847, "step": 74120 }, { "epoch": 2.693873101242823, "grad_norm": 9.6745023727417, "learning_rate": 3.282337469467082e-05, "loss": 0.1017, "step": 74130 }, { "epoch": 2.694236499745621, "grad_norm": 0.8015979528427124, "learning_rate": 3.281844584354314e-05, "loss": 0.7825, "step": 74140 }, { "epoch": 2.694599898248419, "grad_norm": 0.7004625797271729, "learning_rate": 3.281351665554644e-05, "loss": 0.1076, "step": 74150 }, { "epoch": 2.6949632967512174, "grad_norm": 0.49726399779319763, "learning_rate": 3.2808587130893107e-05, "loss": 0.09, "step": 74160 }, { "epoch": 2.695326695254016, "grad_norm": 0.7523669600486755, "learning_rate": 3.280365726979555e-05, "loss": 0.1085, "step": 74170 }, { "epoch": 2.695690093756814, "grad_norm": 0.8919450044631958, "learning_rate": 3.279872707246615e-05, "loss": 0.1102, "step": 74180 }, { "epoch": 2.6960534922596118, "grad_norm": 2.7779541015625, "learning_rate": 3.279379653911736e-05, "loss": 0.1118, "step": 74190 }, { "epoch": 2.69641689076241, "grad_norm": 1.5841718912124634, "learning_rate": 3.27888656699616e-05, "loss": 0.214, "step": 74200 }, { "epoch": 2.696780289265208, "grad_norm": 1.1387628316879272, "learning_rate": 3.278393446521134e-05, "loss": 0.0854, "step": 74210 }, { "epoch": 2.6971436877680066, "grad_norm": 3.4580607414245605, "learning_rate": 3.277900292507904e-05, "loss": 0.1282, "step": 74220 }, { "epoch": 2.6975070862708046, "grad_norm": 1.5573906898498535, "learning_rate": 3.277407104977717e-05, "loss": 0.1394, "step": 74230 }, { "epoch": 2.6978704847736026, "grad_norm": 1.1080759763717651, "learning_rate": 3.276913883951824e-05, "loss": 0.1507, "step": 74240 }, { "epoch": 2.698233883276401, "grad_norm": 0.7681221961975098, "learning_rate": 3.276420629451476e-05, "loss": 0.0866, "step": 74250 }, { "epoch": 2.698597281779199, "grad_norm": 0.5694213509559631, "learning_rate": 3.275927341497927e-05, "loss": 0.0853, "step": 74260 }, { "epoch": 2.6989606802819974, "grad_norm": 1.044425129890442, "learning_rate": 3.275434020112428e-05, "loss": 0.1089, "step": 74270 }, { "epoch": 2.6993240787847954, "grad_norm": 0.5060895681381226, "learning_rate": 3.274940665316237e-05, "loss": 0.0757, "step": 74280 }, { "epoch": 2.6996874772875934, "grad_norm": 3.3999178409576416, "learning_rate": 3.274447277130611e-05, "loss": 0.112, "step": 74290 }, { "epoch": 2.700050875790392, "grad_norm": 1.1808927059173584, "learning_rate": 3.273953855576805e-05, "loss": 0.0899, "step": 74300 }, { "epoch": 2.70041427429319, "grad_norm": 0.481868177652359, "learning_rate": 3.273460400676083e-05, "loss": 0.0935, "step": 74310 }, { "epoch": 2.7007776727959882, "grad_norm": 3.8246326446533203, "learning_rate": 3.272966912449703e-05, "loss": 0.094, "step": 74320 }, { "epoch": 2.7011410712987862, "grad_norm": 0.8496780395507812, "learning_rate": 3.27247339091893e-05, "loss": 0.1554, "step": 74330 }, { "epoch": 2.701504469801584, "grad_norm": 1.1947115659713745, "learning_rate": 3.271979836105026e-05, "loss": 0.1302, "step": 74340 }, { "epoch": 2.7018678683043826, "grad_norm": 2.5246219635009766, "learning_rate": 3.271486248029258e-05, "loss": 0.088, "step": 74350 }, { "epoch": 2.7022312668071806, "grad_norm": 1.4565434455871582, "learning_rate": 3.270992626712893e-05, "loss": 0.0862, "step": 74360 }, { "epoch": 2.702594665309979, "grad_norm": 0.473636269569397, "learning_rate": 3.270498972177198e-05, "loss": 0.1168, "step": 74370 }, { "epoch": 2.702958063812777, "grad_norm": 2.2672886848449707, "learning_rate": 3.270005284443445e-05, "loss": 0.2479, "step": 74380 }, { "epoch": 2.703321462315575, "grad_norm": 2.7625412940979004, "learning_rate": 3.269511563532903e-05, "loss": 0.1127, "step": 74390 }, { "epoch": 2.7036848608183734, "grad_norm": 1.6143194437026978, "learning_rate": 3.2690178094668455e-05, "loss": 0.1106, "step": 74400 }, { "epoch": 2.7036848608183734, "eval_loss": 0.30357053875923157, "eval_runtime": 179.9214, "eval_samples_per_second": 41.207, "eval_steps_per_second": 5.152, "eval_wer": 0.15205946956632235, "step": 74400 }, { "epoch": 2.7040482593211714, "grad_norm": 1.560831069946289, "learning_rate": 3.268524022266547e-05, "loss": 0.0948, "step": 74410 }, { "epoch": 2.70441165782397, "grad_norm": 0.5551114082336426, "learning_rate": 3.2680302019532836e-05, "loss": 0.1299, "step": 74420 }, { "epoch": 2.704775056326768, "grad_norm": 1.2796604633331299, "learning_rate": 3.2675363485483304e-05, "loss": 0.1122, "step": 74430 }, { "epoch": 2.705138454829566, "grad_norm": 1.2246671915054321, "learning_rate": 3.2670424620729675e-05, "loss": 0.1183, "step": 74440 }, { "epoch": 2.7055018533323643, "grad_norm": 2.03642201423645, "learning_rate": 3.266548542548474e-05, "loss": 0.0848, "step": 74450 }, { "epoch": 2.7058652518351627, "grad_norm": 1.559633493423462, "learning_rate": 3.2660545899961305e-05, "loss": 0.0841, "step": 74460 }, { "epoch": 2.7062286503379607, "grad_norm": 0.7185676097869873, "learning_rate": 3.265560604437221e-05, "loss": 0.104, "step": 74470 }, { "epoch": 2.7065920488407587, "grad_norm": 0.627554178237915, "learning_rate": 3.265066585893029e-05, "loss": 0.1016, "step": 74480 }, { "epoch": 2.706955447343557, "grad_norm": 0.7802649736404419, "learning_rate": 3.26457253438484e-05, "loss": 0.0987, "step": 74490 }, { "epoch": 2.707318845846355, "grad_norm": 0.4407544732093811, "learning_rate": 3.2640784499339404e-05, "loss": 0.0948, "step": 74500 }, { "epoch": 2.7076822443491535, "grad_norm": 0.7778434157371521, "learning_rate": 3.263584332561619e-05, "loss": 0.0943, "step": 74510 }, { "epoch": 2.7080456428519515, "grad_norm": 0.46505939960479736, "learning_rate": 3.263090182289166e-05, "loss": 0.1109, "step": 74520 }, { "epoch": 2.7084090413547495, "grad_norm": 0.6025874614715576, "learning_rate": 3.2625959991378715e-05, "loss": 0.0936, "step": 74530 }, { "epoch": 2.708772439857548, "grad_norm": 1.0582184791564941, "learning_rate": 3.262101783129029e-05, "loss": 0.0809, "step": 74540 }, { "epoch": 2.709135838360346, "grad_norm": 1.1490769386291504, "learning_rate": 3.261607534283932e-05, "loss": 0.0848, "step": 74550 }, { "epoch": 2.7094992368631443, "grad_norm": 0.6131067276000977, "learning_rate": 3.2611132526238766e-05, "loss": 0.084, "step": 74560 }, { "epoch": 2.7098626353659423, "grad_norm": 1.1936076879501343, "learning_rate": 3.26061893817016e-05, "loss": 0.0876, "step": 74570 }, { "epoch": 2.7102260338687403, "grad_norm": 1.389906406402588, "learning_rate": 3.260124590944078e-05, "loss": 0.0985, "step": 74580 }, { "epoch": 2.7105894323715387, "grad_norm": 1.8453727960586548, "learning_rate": 3.259630210966933e-05, "loss": 0.0959, "step": 74590 }, { "epoch": 2.7109528308743367, "grad_norm": 0.9837631583213806, "learning_rate": 3.259135798260025e-05, "loss": 0.0882, "step": 74600 }, { "epoch": 2.711316229377135, "grad_norm": 1.1993751525878906, "learning_rate": 3.2586413528446555e-05, "loss": 0.0676, "step": 74610 }, { "epoch": 2.711679627879933, "grad_norm": 6.072772026062012, "learning_rate": 3.25814687474213e-05, "loss": 0.0994, "step": 74620 }, { "epoch": 2.712043026382731, "grad_norm": 1.2597107887268066, "learning_rate": 3.257652363973753e-05, "loss": 0.1082, "step": 74630 }, { "epoch": 2.7124064248855295, "grad_norm": 1.3290597200393677, "learning_rate": 3.257157820560831e-05, "loss": 0.128, "step": 74640 }, { "epoch": 2.7127698233883275, "grad_norm": 2.4594569206237793, "learning_rate": 3.256663244524673e-05, "loss": 0.0824, "step": 74650 }, { "epoch": 2.713133221891126, "grad_norm": 0.3981126844882965, "learning_rate": 3.256168635886588e-05, "loss": 0.1033, "step": 74660 }, { "epoch": 2.713496620393924, "grad_norm": 0.6342089772224426, "learning_rate": 3.255673994667887e-05, "loss": 0.1291, "step": 74670 }, { "epoch": 2.713860018896722, "grad_norm": 1.3177196979522705, "learning_rate": 3.2551793208898826e-05, "loss": 0.1083, "step": 74680 }, { "epoch": 2.7142234173995203, "grad_norm": 1.9709926843643188, "learning_rate": 3.2546846145738873e-05, "loss": 0.1416, "step": 74690 }, { "epoch": 2.7145868159023183, "grad_norm": 2.019582986831665, "learning_rate": 3.2541898757412174e-05, "loss": 0.1671, "step": 74700 }, { "epoch": 2.7149502144051167, "grad_norm": 0.7411203980445862, "learning_rate": 3.25369510441319e-05, "loss": 0.0905, "step": 74710 }, { "epoch": 2.7153136129079147, "grad_norm": 0.7094716429710388, "learning_rate": 3.2532003006111215e-05, "loss": 0.1127, "step": 74720 }, { "epoch": 2.7156770114107127, "grad_norm": 0.6261359453201294, "learning_rate": 3.252705464356332e-05, "loss": 0.0934, "step": 74730 }, { "epoch": 2.716040409913511, "grad_norm": 0.4718007445335388, "learning_rate": 3.252210595670142e-05, "loss": 0.094, "step": 74740 }, { "epoch": 2.7164038084163096, "grad_norm": 0.8705607652664185, "learning_rate": 3.2517156945738734e-05, "loss": 0.097, "step": 74750 }, { "epoch": 2.7167672069191076, "grad_norm": 0.4735512435436249, "learning_rate": 3.251220761088851e-05, "loss": 0.0741, "step": 74760 }, { "epoch": 2.7171306054219055, "grad_norm": 0.6891077756881714, "learning_rate": 3.250725795236398e-05, "loss": 0.1007, "step": 74770 }, { "epoch": 2.717494003924704, "grad_norm": 2.493716239929199, "learning_rate": 3.250230797037843e-05, "loss": 0.0849, "step": 74780 }, { "epoch": 2.717857402427502, "grad_norm": 1.8947222232818604, "learning_rate": 3.249735766514512e-05, "loss": 0.1433, "step": 74790 }, { "epoch": 2.7182208009303004, "grad_norm": 0.5233703255653381, "learning_rate": 3.2492407036877334e-05, "loss": 0.0772, "step": 74800 }, { "epoch": 2.7185841994330984, "grad_norm": 0.4244493544101715, "learning_rate": 3.2487456085788395e-05, "loss": 0.0771, "step": 74810 }, { "epoch": 2.7189475979358964, "grad_norm": 0.5100713968276978, "learning_rate": 3.24825048120916e-05, "loss": 0.0915, "step": 74820 }, { "epoch": 2.719310996438695, "grad_norm": 1.360521674156189, "learning_rate": 3.2477553216000314e-05, "loss": 0.1032, "step": 74830 }, { "epoch": 2.7196743949414928, "grad_norm": 0.9262562990188599, "learning_rate": 3.2472601297727853e-05, "loss": 0.0992, "step": 74840 }, { "epoch": 2.720037793444291, "grad_norm": 1.0304359197616577, "learning_rate": 3.246764905748759e-05, "loss": 1.7738, "step": 74850 }, { "epoch": 2.720401191947089, "grad_norm": 0.621614933013916, "learning_rate": 3.246269649549291e-05, "loss": 0.0865, "step": 74860 }, { "epoch": 2.720764590449887, "grad_norm": 0.544062077999115, "learning_rate": 3.245774361195718e-05, "loss": 0.0947, "step": 74870 }, { "epoch": 2.7211279889526856, "grad_norm": 0.8662183284759521, "learning_rate": 3.2452790407093814e-05, "loss": 0.1065, "step": 74880 }, { "epoch": 2.7214913874554836, "grad_norm": 0.6098036170005798, "learning_rate": 3.244783688111622e-05, "loss": 0.1058, "step": 74890 }, { "epoch": 2.721854785958282, "grad_norm": 0.5739080309867859, "learning_rate": 3.2442883034237845e-05, "loss": 1.0575, "step": 74900 }, { "epoch": 2.72221818446108, "grad_norm": 0.8640351891517639, "learning_rate": 3.2437928866672124e-05, "loss": 0.0948, "step": 74910 }, { "epoch": 2.722581582963878, "grad_norm": 1.041825294494629, "learning_rate": 3.2432974378632504e-05, "loss": 0.1079, "step": 74920 }, { "epoch": 2.7229449814666764, "grad_norm": 1.7764942646026611, "learning_rate": 3.242801957033247e-05, "loss": 0.1058, "step": 74930 }, { "epoch": 2.7233083799694744, "grad_norm": 0.5012884140014648, "learning_rate": 3.24230644419855e-05, "loss": 0.0924, "step": 74940 }, { "epoch": 2.723671778472273, "grad_norm": 1.0178241729736328, "learning_rate": 3.241810899380509e-05, "loss": 0.0794, "step": 74950 }, { "epoch": 2.724035176975071, "grad_norm": 0.5228786468505859, "learning_rate": 3.241315322600476e-05, "loss": 0.0947, "step": 74960 }, { "epoch": 2.724398575477869, "grad_norm": 0.7717702388763428, "learning_rate": 3.2408197138798035e-05, "loss": 0.1196, "step": 74970 }, { "epoch": 2.724761973980667, "grad_norm": 0.5628354549407959, "learning_rate": 3.240324073239846e-05, "loss": 0.086, "step": 74980 }, { "epoch": 2.725125372483465, "grad_norm": 0.535048246383667, "learning_rate": 3.239828400701957e-05, "loss": 0.1447, "step": 74990 }, { "epoch": 2.7254887709862636, "grad_norm": 0.42276647686958313, "learning_rate": 3.2393326962874953e-05, "loss": 0.0784, "step": 75000 }, { "epoch": 2.7254887709862636, "eval_loss": 0.3183054029941559, "eval_runtime": 180.0805, "eval_samples_per_second": 41.17, "eval_steps_per_second": 5.148, "eval_wer": 0.1470129068564271, "step": 75000 }, { "epoch": 2.7258521694890616, "grad_norm": 1.1505578756332397, "learning_rate": 3.238836960017818e-05, "loss": 0.0862, "step": 75010 }, { "epoch": 2.7262155679918596, "grad_norm": 2.7842421531677246, "learning_rate": 3.238341191914285e-05, "loss": 0.1111, "step": 75020 }, { "epoch": 2.726578966494658, "grad_norm": 0.8966996073722839, "learning_rate": 3.237845391998257e-05, "loss": 0.085, "step": 75030 }, { "epoch": 2.7269423649974565, "grad_norm": 2.870903491973877, "learning_rate": 3.237349560291096e-05, "loss": 0.103, "step": 75040 }, { "epoch": 2.7273057635002544, "grad_norm": 1.8146476745605469, "learning_rate": 3.236853696814167e-05, "loss": 0.0806, "step": 75050 }, { "epoch": 2.7276691620030524, "grad_norm": 1.370487928390503, "learning_rate": 3.236357801588833e-05, "loss": 0.0923, "step": 75060 }, { "epoch": 2.728032560505851, "grad_norm": 0.7427790760993958, "learning_rate": 3.235861874636462e-05, "loss": 0.1101, "step": 75070 }, { "epoch": 2.728395959008649, "grad_norm": 0.7309651970863342, "learning_rate": 3.23536591597842e-05, "loss": 0.1063, "step": 75080 }, { "epoch": 2.7287593575114473, "grad_norm": 0.5744786262512207, "learning_rate": 3.2348699256360784e-05, "loss": 0.1202, "step": 75090 }, { "epoch": 2.7291227560142453, "grad_norm": 0.7631998658180237, "learning_rate": 3.234373903630806e-05, "loss": 0.7446, "step": 75100 }, { "epoch": 2.7294861545170432, "grad_norm": 0.4925616979598999, "learning_rate": 3.233877849983974e-05, "loss": 0.0862, "step": 75110 }, { "epoch": 2.7298495530198417, "grad_norm": 2.4710123538970947, "learning_rate": 3.233381764716958e-05, "loss": 0.0956, "step": 75120 }, { "epoch": 2.7302129515226397, "grad_norm": 0.8318620920181274, "learning_rate": 3.23288564785113e-05, "loss": 0.1057, "step": 75130 }, { "epoch": 2.730576350025438, "grad_norm": 0.6408945918083191, "learning_rate": 3.2323894994078674e-05, "loss": 0.123, "step": 75140 }, { "epoch": 2.730939748528236, "grad_norm": 1.6067343950271606, "learning_rate": 3.2318933194085474e-05, "loss": 0.1345, "step": 75150 }, { "epoch": 2.731303147031034, "grad_norm": 0.9870972633361816, "learning_rate": 3.231397107874548e-05, "loss": 0.0888, "step": 75160 }, { "epoch": 2.7316665455338325, "grad_norm": 0.7536956071853638, "learning_rate": 3.23090086482725e-05, "loss": 0.1541, "step": 75170 }, { "epoch": 2.7320299440366305, "grad_norm": 0.45174309611320496, "learning_rate": 3.2304045902880334e-05, "loss": 0.0858, "step": 75180 }, { "epoch": 2.732393342539429, "grad_norm": 0.9457273483276367, "learning_rate": 3.229908284278283e-05, "loss": 0.1079, "step": 75190 }, { "epoch": 2.732756741042227, "grad_norm": 0.7832821011543274, "learning_rate": 3.229411946819381e-05, "loss": 0.0844, "step": 75200 }, { "epoch": 2.733120139545025, "grad_norm": 0.7583007216453552, "learning_rate": 3.228915577932713e-05, "loss": 0.1036, "step": 75210 }, { "epoch": 2.7334835380478233, "grad_norm": 0.38852185010910034, "learning_rate": 3.2284191776396675e-05, "loss": 0.092, "step": 75220 }, { "epoch": 2.7338469365506213, "grad_norm": 0.9784302115440369, "learning_rate": 3.22792274596163e-05, "loss": 0.1215, "step": 75230 }, { "epoch": 2.7342103350534197, "grad_norm": 0.6491109728813171, "learning_rate": 3.227426282919992e-05, "loss": 0.118, "step": 75240 }, { "epoch": 2.7345737335562177, "grad_norm": 1.2278261184692383, "learning_rate": 3.226929788536143e-05, "loss": 0.7961, "step": 75250 }, { "epoch": 2.7349371320590157, "grad_norm": 0.7250826358795166, "learning_rate": 3.226433262831477e-05, "loss": 0.2077, "step": 75260 }, { "epoch": 2.735300530561814, "grad_norm": 0.6161037683486938, "learning_rate": 3.2259367058273855e-05, "loss": 0.0958, "step": 75270 }, { "epoch": 2.735663929064612, "grad_norm": 0.6900900602340698, "learning_rate": 3.2254401175452646e-05, "loss": 0.0848, "step": 75280 }, { "epoch": 2.7360273275674105, "grad_norm": 1.1437780857086182, "learning_rate": 3.2249434980065106e-05, "loss": 0.1336, "step": 75290 }, { "epoch": 2.7363907260702085, "grad_norm": 2.3000125885009766, "learning_rate": 3.2244468472325194e-05, "loss": 0.0859, "step": 75300 }, { "epoch": 2.7367541245730065, "grad_norm": 1.2666622400283813, "learning_rate": 3.2239501652446926e-05, "loss": 0.1021, "step": 75310 }, { "epoch": 2.737117523075805, "grad_norm": 0.4458122253417969, "learning_rate": 3.2234534520644275e-05, "loss": 0.0954, "step": 75320 }, { "epoch": 2.7374809215786033, "grad_norm": 1.7729071378707886, "learning_rate": 3.2229567077131285e-05, "loss": 1.6003, "step": 75330 }, { "epoch": 2.7378443200814013, "grad_norm": 0.8443679213523865, "learning_rate": 3.222459932212196e-05, "loss": 0.0879, "step": 75340 }, { "epoch": 2.7382077185841993, "grad_norm": 0.7365388870239258, "learning_rate": 3.221963125583037e-05, "loss": 0.0823, "step": 75350 }, { "epoch": 2.7385711170869977, "grad_norm": 0.6535968780517578, "learning_rate": 3.2214662878470546e-05, "loss": 0.1252, "step": 75360 }, { "epoch": 2.7389345155897957, "grad_norm": 0.674757719039917, "learning_rate": 3.220969419025657e-05, "loss": 0.1166, "step": 75370 }, { "epoch": 2.739297914092594, "grad_norm": 0.6909737586975098, "learning_rate": 3.220472519140253e-05, "loss": 0.1192, "step": 75380 }, { "epoch": 2.739661312595392, "grad_norm": 0.7484961152076721, "learning_rate": 3.219975588212251e-05, "loss": 0.1064, "step": 75390 }, { "epoch": 2.74002471109819, "grad_norm": 2.3563716411590576, "learning_rate": 3.219478626263063e-05, "loss": 0.0755, "step": 75400 }, { "epoch": 2.7403881096009886, "grad_norm": 0.4629516005516052, "learning_rate": 3.2189816333141004e-05, "loss": 0.0974, "step": 75410 }, { "epoch": 2.7407515081037865, "grad_norm": 0.487054705619812, "learning_rate": 3.2184846093867774e-05, "loss": 0.1004, "step": 75420 }, { "epoch": 2.741114906606585, "grad_norm": 3.042552947998047, "learning_rate": 3.2179875545025096e-05, "loss": 0.1113, "step": 75430 }, { "epoch": 2.741478305109383, "grad_norm": 0.5376294255256653, "learning_rate": 3.2174904686827114e-05, "loss": 0.0972, "step": 75440 }, { "epoch": 2.741841703612181, "grad_norm": 0.9033780694007874, "learning_rate": 3.216993351948803e-05, "loss": 0.0784, "step": 75450 }, { "epoch": 2.7422051021149794, "grad_norm": 0.29921913146972656, "learning_rate": 3.2164962043222015e-05, "loss": 0.076, "step": 75460 }, { "epoch": 2.7425685006177773, "grad_norm": 0.30889561772346497, "learning_rate": 3.2159990258243286e-05, "loss": 0.09, "step": 75470 }, { "epoch": 2.742931899120576, "grad_norm": 4.390368938446045, "learning_rate": 3.2155018164766044e-05, "loss": 0.087, "step": 75480 }, { "epoch": 2.7432952976233738, "grad_norm": 2.617569923400879, "learning_rate": 3.2150045763004526e-05, "loss": 0.0989, "step": 75490 }, { "epoch": 2.7436586961261717, "grad_norm": 0.5497812032699585, "learning_rate": 3.214507305317298e-05, "loss": 0.0869, "step": 75500 }, { "epoch": 2.74402209462897, "grad_norm": 2.1748311519622803, "learning_rate": 3.214010003548566e-05, "loss": 0.1087, "step": 75510 }, { "epoch": 2.744385493131768, "grad_norm": 0.7790930271148682, "learning_rate": 3.213512671015683e-05, "loss": 1.0444, "step": 75520 }, { "epoch": 2.7447488916345666, "grad_norm": 0.8997694849967957, "learning_rate": 3.2130153077400784e-05, "loss": 0.2153, "step": 75530 }, { "epoch": 2.7451122901373646, "grad_norm": 0.6659709811210632, "learning_rate": 3.2125179137431805e-05, "loss": 0.1144, "step": 75540 }, { "epoch": 2.7454756886401626, "grad_norm": 0.6743984818458557, "learning_rate": 3.212020489046421e-05, "loss": 0.0858, "step": 75550 }, { "epoch": 2.745839087142961, "grad_norm": 0.6624968647956848, "learning_rate": 3.2115230336712316e-05, "loss": 0.0913, "step": 75560 }, { "epoch": 2.7462024856457594, "grad_norm": 0.6814375519752502, "learning_rate": 3.211025547639047e-05, "loss": 0.089, "step": 75570 }, { "epoch": 2.7465658841485574, "grad_norm": 0.9000943303108215, "learning_rate": 3.210528030971301e-05, "loss": 0.1184, "step": 75580 }, { "epoch": 2.7469292826513554, "grad_norm": 1.0685985088348389, "learning_rate": 3.21003048368943e-05, "loss": 0.1682, "step": 75590 }, { "epoch": 2.7472926811541534, "grad_norm": 0.4757719933986664, "learning_rate": 3.209532905814872e-05, "loss": 0.1029, "step": 75600 }, { "epoch": 2.7472926811541534, "eval_loss": 0.304624080657959, "eval_runtime": 179.9156, "eval_samples_per_second": 41.208, "eval_steps_per_second": 5.152, "eval_wer": 0.1494363461433732, "step": 75600 }, { "epoch": 2.747656079656952, "grad_norm": 1.4344089031219482, "learning_rate": 3.209035297369066e-05, "loss": 0.0954, "step": 75610 }, { "epoch": 2.7480194781597502, "grad_norm": 0.8026723265647888, "learning_rate": 3.208537658373451e-05, "loss": 0.1161, "step": 75620 }, { "epoch": 2.748382876662548, "grad_norm": 0.8414619565010071, "learning_rate": 3.20803998884947e-05, "loss": 0.0969, "step": 75630 }, { "epoch": 2.748746275165346, "grad_norm": 0.46038514375686646, "learning_rate": 3.2075422888185645e-05, "loss": 0.1183, "step": 75640 }, { "epoch": 2.7491096736681446, "grad_norm": 0.6400403380393982, "learning_rate": 3.207044558302179e-05, "loss": 0.0962, "step": 75650 }, { "epoch": 2.7494730721709426, "grad_norm": 0.637139618396759, "learning_rate": 3.206596574790073e-05, "loss": 1.817, "step": 75660 }, { "epoch": 2.749836470673741, "grad_norm": 1.1132330894470215, "learning_rate": 3.206098786410359e-05, "loss": 0.1059, "step": 75670 }, { "epoch": 2.750199869176539, "grad_norm": 0.6702316999435425, "learning_rate": 3.2056009676073615e-05, "loss": 0.0735, "step": 75680 }, { "epoch": 2.750563267679337, "grad_norm": 0.7996656894683838, "learning_rate": 3.205103118402528e-05, "loss": 0.1147, "step": 75690 }, { "epoch": 2.7509266661821354, "grad_norm": 0.6503117680549622, "learning_rate": 3.204605238817311e-05, "loss": 0.0837, "step": 75700 }, { "epoch": 2.7512900646849334, "grad_norm": 0.7380549907684326, "learning_rate": 3.204107328873161e-05, "loss": 0.0742, "step": 75710 }, { "epoch": 2.751653463187732, "grad_norm": 1.3897452354431152, "learning_rate": 3.203609388591531e-05, "loss": 0.1324, "step": 75720 }, { "epoch": 2.75201686169053, "grad_norm": 0.6932911276817322, "learning_rate": 3.203111417993876e-05, "loss": 0.0847, "step": 75730 }, { "epoch": 2.752380260193328, "grad_norm": 0.9964193105697632, "learning_rate": 3.2026134171016516e-05, "loss": 0.098, "step": 75740 }, { "epoch": 2.7527436586961262, "grad_norm": 2.219566822052002, "learning_rate": 3.2021153859363154e-05, "loss": 0.0866, "step": 75750 }, { "epoch": 2.7531070571989242, "grad_norm": 0.4468567967414856, "learning_rate": 3.201617324519325e-05, "loss": 0.0779, "step": 75760 }, { "epoch": 2.7534704557017227, "grad_norm": 1.1400572061538696, "learning_rate": 3.2011192328721406e-05, "loss": 0.1104, "step": 75770 }, { "epoch": 2.7538338542045206, "grad_norm": 0.7320595383644104, "learning_rate": 3.2006211110162234e-05, "loss": 0.0955, "step": 75780 }, { "epoch": 2.7541972527073186, "grad_norm": 1.0638219118118286, "learning_rate": 3.200122958973034e-05, "loss": 0.1557, "step": 75790 }, { "epoch": 2.754560651210117, "grad_norm": 0.8229318261146545, "learning_rate": 3.1996247767640385e-05, "loss": 0.0807, "step": 75800 }, { "epoch": 2.754924049712915, "grad_norm": 0.5131879448890686, "learning_rate": 3.1991265644107005e-05, "loss": 0.0859, "step": 75810 }, { "epoch": 2.7552874482157135, "grad_norm": 0.7201241850852966, "learning_rate": 3.198628321934486e-05, "loss": 0.1255, "step": 75820 }, { "epoch": 2.7556508467185115, "grad_norm": 0.8084592819213867, "learning_rate": 3.198130049356863e-05, "loss": 0.0959, "step": 75830 }, { "epoch": 2.7560142452213094, "grad_norm": 0.9655843377113342, "learning_rate": 3.197631746699301e-05, "loss": 0.1137, "step": 75840 }, { "epoch": 2.756377643724108, "grad_norm": 0.6856592893600464, "learning_rate": 3.197133413983268e-05, "loss": 0.0953, "step": 75850 }, { "epoch": 2.7567410422269063, "grad_norm": 0.8975215554237366, "learning_rate": 3.196635051230237e-05, "loss": 0.2702, "step": 75860 }, { "epoch": 2.7571044407297043, "grad_norm": 1.0239101648330688, "learning_rate": 3.19613665846168e-05, "loss": 0.8354, "step": 75870 }, { "epoch": 2.7574678392325023, "grad_norm": 1.5141791105270386, "learning_rate": 3.195638235699072e-05, "loss": 0.1123, "step": 75880 }, { "epoch": 2.7578312377353003, "grad_norm": 0.787190318107605, "learning_rate": 3.195139782963887e-05, "loss": 0.1198, "step": 75890 }, { "epoch": 2.7581946362380987, "grad_norm": 0.6342429518699646, "learning_rate": 3.1946413002776024e-05, "loss": 0.0838, "step": 75900 }, { "epoch": 2.758558034740897, "grad_norm": 2.490267753601074, "learning_rate": 3.194142787661695e-05, "loss": 0.0861, "step": 75910 }, { "epoch": 2.758921433243695, "grad_norm": 0.7890759110450745, "learning_rate": 3.1936442451376454e-05, "loss": 0.1203, "step": 75920 }, { "epoch": 2.759284831746493, "grad_norm": 1.0969079732894897, "learning_rate": 3.193145672726933e-05, "loss": 0.1042, "step": 75930 }, { "epoch": 2.7596482302492915, "grad_norm": 0.42303451895713806, "learning_rate": 3.1926470704510395e-05, "loss": 0.0979, "step": 75940 }, { "epoch": 2.7600116287520895, "grad_norm": 0.7856914401054382, "learning_rate": 3.192148438331448e-05, "loss": 0.081, "step": 75950 }, { "epoch": 2.760375027254888, "grad_norm": 0.7306569814682007, "learning_rate": 3.191649776389644e-05, "loss": 0.0688, "step": 75960 }, { "epoch": 2.760738425757686, "grad_norm": 0.43717941641807556, "learning_rate": 3.1911510846471115e-05, "loss": 0.1219, "step": 75970 }, { "epoch": 2.761101824260484, "grad_norm": 0.6672983169555664, "learning_rate": 3.190652363125337e-05, "loss": 0.1003, "step": 75980 }, { "epoch": 2.7614652227632823, "grad_norm": 2.3338167667388916, "learning_rate": 3.190153611845811e-05, "loss": 0.117, "step": 75990 }, { "epoch": 2.7618286212660803, "grad_norm": 0.893578052520752, "learning_rate": 3.1896548308300206e-05, "loss": 0.0922, "step": 76000 }, { "epoch": 2.7621920197688787, "grad_norm": 1.831598162651062, "learning_rate": 3.189156020099458e-05, "loss": 0.0921, "step": 76010 }, { "epoch": 2.7625554182716767, "grad_norm": 0.4112573564052582, "learning_rate": 3.1886571796756136e-05, "loss": 0.1237, "step": 76020 }, { "epoch": 2.7629188167744747, "grad_norm": 2.476116418838501, "learning_rate": 3.1881583095799816e-05, "loss": 0.097, "step": 76030 }, { "epoch": 2.763282215277273, "grad_norm": 0.5987531542778015, "learning_rate": 3.1876594098340575e-05, "loss": 0.117, "step": 76040 }, { "epoch": 2.763645613780071, "grad_norm": 0.4382152855396271, "learning_rate": 3.187160480459335e-05, "loss": 0.0773, "step": 76050 }, { "epoch": 2.7640090122828695, "grad_norm": 0.4360668659210205, "learning_rate": 3.186661521477313e-05, "loss": 0.0785, "step": 76060 }, { "epoch": 2.7643724107856675, "grad_norm": 2.2010788917541504, "learning_rate": 3.1861625329094894e-05, "loss": 0.1354, "step": 76070 }, { "epoch": 2.7647358092884655, "grad_norm": 0.8447809815406799, "learning_rate": 3.185663514777363e-05, "loss": 0.0982, "step": 76080 }, { "epoch": 2.765099207791264, "grad_norm": 1.7606275081634521, "learning_rate": 3.185164467102436e-05, "loss": 0.1429, "step": 76090 }, { "epoch": 2.765462606294062, "grad_norm": 0.8025608062744141, "learning_rate": 3.1846653899062094e-05, "loss": 0.0833, "step": 76100 }, { "epoch": 2.7658260047968604, "grad_norm": 1.0630611181259155, "learning_rate": 3.184166283210188e-05, "loss": 0.0663, "step": 76110 }, { "epoch": 2.7661894032996583, "grad_norm": 0.47696417570114136, "learning_rate": 3.1836671470358744e-05, "loss": 0.1282, "step": 76120 }, { "epoch": 2.7665528018024563, "grad_norm": 4.783881187438965, "learning_rate": 3.183167981404777e-05, "loss": 0.1214, "step": 76130 }, { "epoch": 2.7669162003052548, "grad_norm": 1.0744116306304932, "learning_rate": 3.1826687863384006e-05, "loss": 0.1492, "step": 76140 }, { "epoch": 2.767279598808053, "grad_norm": 1.5960917472839355, "learning_rate": 3.182169561858257e-05, "loss": 0.0879, "step": 76150 }, { "epoch": 2.767642997310851, "grad_norm": 0.9969580769538879, "learning_rate": 3.1816703079858535e-05, "loss": 0.0919, "step": 76160 }, { "epoch": 2.768006395813649, "grad_norm": 0.3668254613876343, "learning_rate": 3.181171024742701e-05, "loss": 0.1718, "step": 76170 }, { "epoch": 2.768369794316447, "grad_norm": 0.7729851603507996, "learning_rate": 3.180671712150314e-05, "loss": 0.0842, "step": 76180 }, { "epoch": 2.7687331928192456, "grad_norm": 0.6386042833328247, "learning_rate": 3.1801723702302034e-05, "loss": 0.1199, "step": 76190 }, { "epoch": 2.769096591322044, "grad_norm": 2.096891164779663, "learning_rate": 3.179672999003887e-05, "loss": 0.1033, "step": 76200 }, { "epoch": 2.769096591322044, "eval_loss": 0.31721433997154236, "eval_runtime": 179.2645, "eval_samples_per_second": 41.358, "eval_steps_per_second": 5.171, "eval_wer": 0.14798409788153286, "step": 76200 }, { "epoch": 2.769459989824842, "grad_norm": 2.548231601715088, "learning_rate": 3.1791735984928784e-05, "loss": 0.1199, "step": 76210 }, { "epoch": 2.76982338832764, "grad_norm": 1.2589582204818726, "learning_rate": 3.178674168718696e-05, "loss": 0.1165, "step": 76220 }, { "epoch": 2.7701867868304384, "grad_norm": 1.0916184186935425, "learning_rate": 3.178174709702858e-05, "loss": 0.09, "step": 76230 }, { "epoch": 2.7705501853332364, "grad_norm": 0.47342580556869507, "learning_rate": 3.177675221466885e-05, "loss": 0.0816, "step": 76240 }, { "epoch": 2.770913583836035, "grad_norm": 1.0653049945831299, "learning_rate": 3.177175704032298e-05, "loss": 0.1169, "step": 76250 }, { "epoch": 2.771276982338833, "grad_norm": 0.5227024555206299, "learning_rate": 3.176676157420619e-05, "loss": 0.0903, "step": 76260 }, { "epoch": 2.771640380841631, "grad_norm": 1.8630784749984741, "learning_rate": 3.1761765816533726e-05, "loss": 0.1136, "step": 76270 }, { "epoch": 2.772003779344429, "grad_norm": 1.7517484426498413, "learning_rate": 3.175676976752083e-05, "loss": 0.0897, "step": 76280 }, { "epoch": 2.772367177847227, "grad_norm": 0.7945340871810913, "learning_rate": 3.175177342738276e-05, "loss": 0.1031, "step": 76290 }, { "epoch": 2.7727305763500256, "grad_norm": 0.6702117919921875, "learning_rate": 3.174677679633481e-05, "loss": 0.3309, "step": 76300 }, { "epoch": 2.7730939748528236, "grad_norm": 0.8197999000549316, "learning_rate": 3.174177987459223e-05, "loss": 0.1137, "step": 76310 }, { "epoch": 2.7734573733556216, "grad_norm": 0.35699373483657837, "learning_rate": 3.1736782662370354e-05, "loss": 0.092, "step": 76320 }, { "epoch": 2.77382077185842, "grad_norm": 0.566719114780426, "learning_rate": 3.173178515988449e-05, "loss": 0.1059, "step": 76330 }, { "epoch": 2.774184170361218, "grad_norm": 0.6718754172325134, "learning_rate": 3.172678736734995e-05, "loss": 0.1299, "step": 76340 }, { "epoch": 2.7745475688640164, "grad_norm": 0.5272148847579956, "learning_rate": 3.1721789284982075e-05, "loss": 0.1943, "step": 76350 }, { "epoch": 2.7749109673668144, "grad_norm": 0.7239329218864441, "learning_rate": 3.1716790912996214e-05, "loss": 0.0834, "step": 76360 }, { "epoch": 2.7752743658696124, "grad_norm": 1.7969343662261963, "learning_rate": 3.171179225160774e-05, "loss": 0.1131, "step": 76370 }, { "epoch": 2.775637764372411, "grad_norm": 1.348568320274353, "learning_rate": 3.1706793301032e-05, "loss": 0.1328, "step": 76380 }, { "epoch": 2.776001162875209, "grad_norm": 0.6011419892311096, "learning_rate": 3.170179406148441e-05, "loss": 0.1409, "step": 76390 }, { "epoch": 2.7763645613780072, "grad_norm": 1.8177915811538696, "learning_rate": 3.169679453318036e-05, "loss": 0.0737, "step": 76400 }, { "epoch": 2.7767279598808052, "grad_norm": 0.592851996421814, "learning_rate": 3.1691794716335266e-05, "loss": 0.0998, "step": 76410 }, { "epoch": 2.777091358383603, "grad_norm": 1.3811548948287964, "learning_rate": 3.168679461116454e-05, "loss": 0.0754, "step": 76420 }, { "epoch": 2.7774547568864016, "grad_norm": 2.7338156700134277, "learning_rate": 3.168179421788363e-05, "loss": 0.1265, "step": 76430 }, { "epoch": 2.7778181553892, "grad_norm": 0.604120135307312, "learning_rate": 3.167679353670798e-05, "loss": 0.1086, "step": 76440 }, { "epoch": 2.778181553891998, "grad_norm": 0.8220155239105225, "learning_rate": 3.1671792567853045e-05, "loss": 1.4637, "step": 76450 }, { "epoch": 2.778544952394796, "grad_norm": 0.8582079410552979, "learning_rate": 3.166679131153432e-05, "loss": 0.1023, "step": 76460 }, { "epoch": 2.778908350897594, "grad_norm": 0.5946437120437622, "learning_rate": 3.166178976796727e-05, "loss": 0.1233, "step": 76470 }, { "epoch": 2.7792717494003925, "grad_norm": 1.113297700881958, "learning_rate": 3.165678793736741e-05, "loss": 0.1044, "step": 76480 }, { "epoch": 2.779635147903191, "grad_norm": 96.74727630615234, "learning_rate": 3.165178581995023e-05, "loss": 1.901, "step": 76490 }, { "epoch": 2.779998546405989, "grad_norm": 0.4184577465057373, "learning_rate": 3.164678341593127e-05, "loss": 0.0768, "step": 76500 }, { "epoch": 2.780361944908787, "grad_norm": 0.558016300201416, "learning_rate": 3.164178072552606e-05, "loss": 0.2095, "step": 76510 }, { "epoch": 2.7807253434115853, "grad_norm": 0.7608421444892883, "learning_rate": 3.1636777748950156e-05, "loss": 0.0934, "step": 76520 }, { "epoch": 2.7810887419143833, "grad_norm": 0.43345919251441956, "learning_rate": 3.163177448641911e-05, "loss": 0.0852, "step": 76530 }, { "epoch": 2.7814521404171817, "grad_norm": 3.386565923690796, "learning_rate": 3.1626770938148496e-05, "loss": 0.4973, "step": 76540 }, { "epoch": 2.7818155389199797, "grad_norm": Infinity, "learning_rate": 3.1622267500575804e-05, "loss": 2.2209, "step": 76550 }, { "epoch": 2.7821789374227777, "grad_norm": 1.2026207447052002, "learning_rate": 3.161726340999396e-05, "loss": 0.091, "step": 76560 }, { "epoch": 2.782542335925576, "grad_norm": 0.6033660769462585, "learning_rate": 3.1612259034297784e-05, "loss": 0.1076, "step": 76570 }, { "epoch": 2.782905734428374, "grad_norm": 0.6164398193359375, "learning_rate": 3.1607254373702885e-05, "loss": 0.1115, "step": 76580 }, { "epoch": 2.7832691329311725, "grad_norm": 0.4211709201335907, "learning_rate": 3.1602249428424916e-05, "loss": 0.1061, "step": 76590 }, { "epoch": 2.7836325314339705, "grad_norm": 0.8229207396507263, "learning_rate": 3.1597244198679496e-05, "loss": 0.1703, "step": 76600 }, { "epoch": 2.7839959299367685, "grad_norm": 0.2660597860813141, "learning_rate": 3.159223868468231e-05, "loss": 0.0873, "step": 76610 }, { "epoch": 2.784359328439567, "grad_norm": 0.8367421627044678, "learning_rate": 3.1587232886649006e-05, "loss": 0.0906, "step": 76620 }, { "epoch": 2.784722726942365, "grad_norm": 0.4646151661872864, "learning_rate": 3.158222680479527e-05, "loss": 0.0922, "step": 76630 }, { "epoch": 2.7850861254451633, "grad_norm": 1.0064074993133545, "learning_rate": 3.1577220439336814e-05, "loss": 0.0827, "step": 76640 }, { "epoch": 2.7854495239479613, "grad_norm": 0.7360056638717651, "learning_rate": 3.157221379048932e-05, "loss": 0.1526, "step": 76650 }, { "epoch": 2.7858129224507593, "grad_norm": 0.4394819736480713, "learning_rate": 3.1567206858468524e-05, "loss": 0.0802, "step": 76660 }, { "epoch": 2.7861763209535577, "grad_norm": 2.16237735748291, "learning_rate": 3.1562199643490156e-05, "loss": 0.1079, "step": 76670 }, { "epoch": 2.7865397194563557, "grad_norm": 1.211832046508789, "learning_rate": 3.155719214576994e-05, "loss": 0.0896, "step": 76680 }, { "epoch": 2.786903117959154, "grad_norm": 0.561252772808075, "learning_rate": 3.1552184365523654e-05, "loss": 0.1002, "step": 76690 }, { "epoch": 2.787266516461952, "grad_norm": 0.947999894618988, "learning_rate": 3.1547176302967046e-05, "loss": 0.075, "step": 76700 }, { "epoch": 2.78762991496475, "grad_norm": 0.3279600739479065, "learning_rate": 3.154216795831591e-05, "loss": 0.0808, "step": 76710 }, { "epoch": 2.7879933134675485, "grad_norm": 0.4340432584285736, "learning_rate": 3.1537159331786046e-05, "loss": 0.1208, "step": 76720 }, { "epoch": 2.788356711970347, "grad_norm": 1.4407846927642822, "learning_rate": 3.1532150423593234e-05, "loss": 0.0973, "step": 76730 }, { "epoch": 2.788720110473145, "grad_norm": 1.0775196552276611, "learning_rate": 3.152714123395331e-05, "loss": 0.1013, "step": 76740 }, { "epoch": 2.789083508975943, "grad_norm": 1.0762931108474731, "learning_rate": 3.152213176308209e-05, "loss": 0.0797, "step": 76750 }, { "epoch": 2.789446907478741, "grad_norm": 0.645371675491333, "learning_rate": 3.1517122011195414e-05, "loss": 0.0918, "step": 76760 }, { "epoch": 2.7898103059815393, "grad_norm": 0.4548865556716919, "learning_rate": 3.151211197850914e-05, "loss": 0.1365, "step": 76770 }, { "epoch": 2.7901737044843378, "grad_norm": 0.9523658156394958, "learning_rate": 3.1507101665239136e-05, "loss": 0.103, "step": 76780 }, { "epoch": 2.7905371029871358, "grad_norm": 0.6093083024024963, "learning_rate": 3.150209107160127e-05, "loss": 0.0966, "step": 76790 }, { "epoch": 2.7909005014899337, "grad_norm": 1.1917424201965332, "learning_rate": 3.149708019781143e-05, "loss": 0.0723, "step": 76800 }, { "epoch": 2.7909005014899337, "eval_loss": 0.33958899974823, "eval_runtime": 180.4871, "eval_samples_per_second": 41.078, "eval_steps_per_second": 5.136, "eval_wer": 0.14761195926443627, "step": 76800 }, { "epoch": 2.791263899992732, "grad_norm": 0.5231362581253052, "learning_rate": 3.149206904408553e-05, "loss": 0.0774, "step": 76810 }, { "epoch": 2.79162729849553, "grad_norm": 0.3892790973186493, "learning_rate": 3.148705761063947e-05, "loss": 0.1298, "step": 76820 }, { "epoch": 2.7919906969983286, "grad_norm": 1.2342190742492676, "learning_rate": 3.1482045897689174e-05, "loss": 0.0784, "step": 76830 }, { "epoch": 2.7923540955011266, "grad_norm": 0.6379334926605225, "learning_rate": 3.147703390545059e-05, "loss": 0.1218, "step": 76840 }, { "epoch": 2.7927174940039245, "grad_norm": 4.186805248260498, "learning_rate": 3.1472021634139656e-05, "loss": 0.1091, "step": 76850 }, { "epoch": 2.793080892506723, "grad_norm": 0.9693048596382141, "learning_rate": 3.146700908397234e-05, "loss": 0.081, "step": 76860 }, { "epoch": 2.793444291009521, "grad_norm": 0.41208523511886597, "learning_rate": 3.146199625516461e-05, "loss": 0.1323, "step": 76870 }, { "epoch": 2.7938076895123194, "grad_norm": 0.6204960346221924, "learning_rate": 3.145698314793245e-05, "loss": 0.0843, "step": 76880 }, { "epoch": 2.7941710880151174, "grad_norm": 0.9049692153930664, "learning_rate": 3.145196976249187e-05, "loss": 0.1092, "step": 76890 }, { "epoch": 2.7945344865179154, "grad_norm": 0.9340922832489014, "learning_rate": 3.144695609905887e-05, "loss": 0.1021, "step": 76900 }, { "epoch": 2.794897885020714, "grad_norm": 0.40798988938331604, "learning_rate": 3.144194215784946e-05, "loss": 0.1096, "step": 76910 }, { "epoch": 2.7952612835235118, "grad_norm": 0.4020129144191742, "learning_rate": 3.143692793907968e-05, "loss": 0.0928, "step": 76920 }, { "epoch": 2.79562468202631, "grad_norm": 0.621026873588562, "learning_rate": 3.1431913442965585e-05, "loss": 0.0978, "step": 76930 }, { "epoch": 2.795988080529108, "grad_norm": 0.428535133600235, "learning_rate": 3.1426898669723225e-05, "loss": 0.1034, "step": 76940 }, { "epoch": 2.796351479031906, "grad_norm": 1.3301656246185303, "learning_rate": 3.1421883619568665e-05, "loss": 0.0813, "step": 76950 }, { "epoch": 2.7967148775347046, "grad_norm": 0.4475337266921997, "learning_rate": 3.141686829271799e-05, "loss": 0.0865, "step": 76960 }, { "epoch": 2.7970782760375026, "grad_norm": 2.5869007110595703, "learning_rate": 3.1411852689387294e-05, "loss": 0.1085, "step": 76970 }, { "epoch": 2.797441674540301, "grad_norm": 1.98558509349823, "learning_rate": 3.140683680979268e-05, "loss": 0.0887, "step": 76980 }, { "epoch": 2.797805073043099, "grad_norm": 0.4044126570224762, "learning_rate": 3.1401820654150267e-05, "loss": 0.1273, "step": 76990 }, { "epoch": 2.798168471545897, "grad_norm": 0.5848196148872375, "learning_rate": 3.139680422267617e-05, "loss": 0.9749, "step": 77000 }, { "epoch": 2.7985318700486954, "grad_norm": 1.0990394353866577, "learning_rate": 3.139178751558655e-05, "loss": 0.0864, "step": 77010 }, { "epoch": 2.798895268551494, "grad_norm": 0.5997377634048462, "learning_rate": 3.138677053309753e-05, "loss": 0.1654, "step": 77020 }, { "epoch": 2.799258667054292, "grad_norm": 2.957549571990967, "learning_rate": 3.138175327542531e-05, "loss": 0.0771, "step": 77030 }, { "epoch": 2.79962206555709, "grad_norm": 1.3711345195770264, "learning_rate": 3.137673574278604e-05, "loss": 0.1682, "step": 77040 }, { "epoch": 2.799985464059888, "grad_norm": 1.1909111738204956, "learning_rate": 3.137171793539591e-05, "loss": 0.1002, "step": 77050 }, { "epoch": 2.8003488625626862, "grad_norm": 0.622058093547821, "learning_rate": 3.136669985347113e-05, "loss": 0.0842, "step": 77060 }, { "epoch": 2.8007122610654847, "grad_norm": 1.0402450561523438, "learning_rate": 3.136168149722791e-05, "loss": 0.1096, "step": 77070 }, { "epoch": 2.8010756595682826, "grad_norm": 4.3762969970703125, "learning_rate": 3.135666286688247e-05, "loss": 0.0888, "step": 77080 }, { "epoch": 2.8014390580710806, "grad_norm": 1.1344795227050781, "learning_rate": 3.135164396265103e-05, "loss": 0.1097, "step": 77090 }, { "epoch": 2.801802456573879, "grad_norm": 0.5318688154220581, "learning_rate": 3.134662478474987e-05, "loss": 0.0926, "step": 77100 }, { "epoch": 2.802165855076677, "grad_norm": 1.101820468902588, "learning_rate": 3.1341605333395216e-05, "loss": 0.0989, "step": 77110 }, { "epoch": 2.8025292535794755, "grad_norm": 0.3960217237472534, "learning_rate": 3.133658560880336e-05, "loss": 0.1064, "step": 77120 }, { "epoch": 2.8028926520822735, "grad_norm": 0.6371271014213562, "learning_rate": 3.133156561119057e-05, "loss": 0.0957, "step": 77130 }, { "epoch": 2.8032560505850714, "grad_norm": 0.6579133868217468, "learning_rate": 3.132654534077315e-05, "loss": 0.0861, "step": 77140 }, { "epoch": 2.80361944908787, "grad_norm": 0.3245817720890045, "learning_rate": 3.13215247977674e-05, "loss": 2.9585, "step": 77150 }, { "epoch": 2.803982847590668, "grad_norm": 0.4539554715156555, "learning_rate": 3.131650398238963e-05, "loss": 3.7161, "step": 77160 }, { "epoch": 2.8043462460934663, "grad_norm": 0.5258297324180603, "learning_rate": 3.1311482894856194e-05, "loss": 0.0938, "step": 77170 }, { "epoch": 2.8047096445962643, "grad_norm": 1.8863434791564941, "learning_rate": 3.13064615353834e-05, "loss": 0.1022, "step": 77180 }, { "epoch": 2.8050730430990622, "grad_norm": 0.4523390531539917, "learning_rate": 3.130143990418763e-05, "loss": 0.1094, "step": 77190 }, { "epoch": 2.8054364416018607, "grad_norm": 1.1494712829589844, "learning_rate": 3.1296418001485225e-05, "loss": 0.0911, "step": 77200 }, { "epoch": 2.8057998401046587, "grad_norm": 1.3108868598937988, "learning_rate": 3.129139582749258e-05, "loss": 0.1123, "step": 77210 }, { "epoch": 2.806163238607457, "grad_norm": 0.6619325280189514, "learning_rate": 3.128637338242607e-05, "loss": 0.1178, "step": 77220 }, { "epoch": 2.806526637110255, "grad_norm": 0.9496577978134155, "learning_rate": 3.128135066650209e-05, "loss": 0.0903, "step": 77230 }, { "epoch": 2.806890035613053, "grad_norm": 1.0447412729263306, "learning_rate": 3.127632767993707e-05, "loss": 0.1962, "step": 77240 }, { "epoch": 2.8072534341158515, "grad_norm": 0.6359366774559021, "learning_rate": 3.127130442294742e-05, "loss": 0.0872, "step": 77250 }, { "epoch": 2.8076168326186495, "grad_norm": 0.36711859703063965, "learning_rate": 3.126628089574957e-05, "loss": 0.1117, "step": 77260 }, { "epoch": 2.807980231121448, "grad_norm": 0.46006709337234497, "learning_rate": 3.1261257098559975e-05, "loss": 0.1075, "step": 77270 }, { "epoch": 2.808343629624246, "grad_norm": 0.4880903959274292, "learning_rate": 3.125623303159509e-05, "loss": 0.0933, "step": 77280 }, { "epoch": 2.808707028127044, "grad_norm": 0.6547648906707764, "learning_rate": 3.125120869507138e-05, "loss": 0.0927, "step": 77290 }, { "epoch": 2.8090704266298423, "grad_norm": 0.9286164045333862, "learning_rate": 3.124618408920533e-05, "loss": 0.0701, "step": 77300 }, { "epoch": 2.8094338251326407, "grad_norm": 0.7774373292922974, "learning_rate": 3.1241159214213436e-05, "loss": 0.0885, "step": 77310 }, { "epoch": 2.8097972236354387, "grad_norm": 0.6464956402778625, "learning_rate": 3.12361340703122e-05, "loss": 0.1133, "step": 77320 }, { "epoch": 2.8101606221382367, "grad_norm": 0.6208813190460205, "learning_rate": 3.123110865771813e-05, "loss": 0.1143, "step": 77330 }, { "epoch": 2.8105240206410347, "grad_norm": 0.5069448351860046, "learning_rate": 3.122608297664776e-05, "loss": 0.0991, "step": 77340 }, { "epoch": 2.810887419143833, "grad_norm": 0.6329632997512817, "learning_rate": 3.122105702731762e-05, "loss": 0.0925, "step": 77350 }, { "epoch": 2.8112508176466315, "grad_norm": 0.8677617311477661, "learning_rate": 3.121603080994428e-05, "loss": 0.0819, "step": 77360 }, { "epoch": 2.8116142161494295, "grad_norm": 0.7725453972816467, "learning_rate": 3.1211004324744274e-05, "loss": 0.1081, "step": 77370 }, { "epoch": 2.8119776146522275, "grad_norm": 0.7517724633216858, "learning_rate": 3.12059775719342e-05, "loss": 0.0873, "step": 77380 }, { "epoch": 2.812341013155026, "grad_norm": 0.8779316544532776, "learning_rate": 3.1200950551730636e-05, "loss": 0.1038, "step": 77390 }, { "epoch": 2.812704411657824, "grad_norm": 0.4052380323410034, "learning_rate": 3.119592326435016e-05, "loss": 0.0791, "step": 77400 }, { "epoch": 2.812704411657824, "eval_loss": 0.3295031487941742, "eval_runtime": 180.5257, "eval_samples_per_second": 41.069, "eval_steps_per_second": 5.135, "eval_wer": 0.14710367237279212, "step": 77400 }, { "epoch": 2.8130678101606224, "grad_norm": 0.4020283818244934, "learning_rate": 3.1190895710009416e-05, "loss": 0.1089, "step": 77410 }, { "epoch": 2.8134312086634203, "grad_norm": 0.852902889251709, "learning_rate": 3.118586788892499e-05, "loss": 0.0949, "step": 77420 }, { "epoch": 2.8137946071662183, "grad_norm": 0.7513383030891418, "learning_rate": 3.1180839801313536e-05, "loss": 0.11, "step": 77430 }, { "epoch": 2.8141580056690167, "grad_norm": 0.7311908006668091, "learning_rate": 3.117581144739168e-05, "loss": 0.1077, "step": 77440 }, { "epoch": 2.8145214041718147, "grad_norm": 0.7238545417785645, "learning_rate": 3.117078282737608e-05, "loss": 0.0743, "step": 77450 }, { "epoch": 2.814884802674613, "grad_norm": 0.6685813069343567, "learning_rate": 3.116575394148341e-05, "loss": 0.0895, "step": 77460 }, { "epoch": 2.815248201177411, "grad_norm": 0.6721900105476379, "learning_rate": 3.116072478993034e-05, "loss": 2.3187, "step": 77470 }, { "epoch": 2.815611599680209, "grad_norm": 0.5871604084968567, "learning_rate": 3.1155695372933553e-05, "loss": 0.0879, "step": 77480 }, { "epoch": 2.8159749981830076, "grad_norm": 0.6194286942481995, "learning_rate": 3.1150665690709755e-05, "loss": 0.1139, "step": 77490 }, { "epoch": 2.8163383966858055, "grad_norm": 2.4753482341766357, "learning_rate": 3.114563574347566e-05, "loss": 0.0882, "step": 77500 }, { "epoch": 2.816701795188604, "grad_norm": 0.6420596241950989, "learning_rate": 3.1140605531447985e-05, "loss": 0.0677, "step": 77510 }, { "epoch": 2.817065193691402, "grad_norm": 0.7851647734642029, "learning_rate": 3.1135575054843464e-05, "loss": 0.0943, "step": 77520 }, { "epoch": 2.8174285921942, "grad_norm": 0.5447911024093628, "learning_rate": 3.113054431387885e-05, "loss": 0.0936, "step": 77530 }, { "epoch": 2.8177919906969984, "grad_norm": 1.930080771446228, "learning_rate": 3.1125513308770886e-05, "loss": 0.0979, "step": 77540 }, { "epoch": 2.8181553891997964, "grad_norm": 1.2513458728790283, "learning_rate": 3.112048203973636e-05, "loss": 0.0968, "step": 77550 }, { "epoch": 2.818518787702595, "grad_norm": 0.7506178617477417, "learning_rate": 3.1115450506992025e-05, "loss": 0.0754, "step": 77560 }, { "epoch": 2.8188821862053928, "grad_norm": 2.6080875396728516, "learning_rate": 3.11104187107547e-05, "loss": 0.902, "step": 77570 }, { "epoch": 2.8192455847081908, "grad_norm": 1.5008831024169922, "learning_rate": 3.110538665124117e-05, "loss": 0.1221, "step": 77580 }, { "epoch": 2.819608983210989, "grad_norm": 0.6769622564315796, "learning_rate": 3.1100354328668244e-05, "loss": 0.1255, "step": 77590 }, { "epoch": 2.8199723817137876, "grad_norm": 1.557826280593872, "learning_rate": 3.109532174325277e-05, "loss": 0.4934, "step": 77600 }, { "epoch": 2.8203357802165856, "grad_norm": 0.48424941301345825, "learning_rate": 3.1090288895211554e-05, "loss": 0.1016, "step": 77610 }, { "epoch": 2.8206991787193836, "grad_norm": 0.5457457900047302, "learning_rate": 3.1085255784761466e-05, "loss": 0.1036, "step": 77620 }, { "epoch": 2.821062577222182, "grad_norm": 0.729720950126648, "learning_rate": 3.108022241211936e-05, "loss": 0.0936, "step": 77630 }, { "epoch": 2.82142597572498, "grad_norm": 3.1648247241973877, "learning_rate": 3.1075188777502104e-05, "loss": 0.1059, "step": 77640 }, { "epoch": 2.8217893742277784, "grad_norm": 2.022939920425415, "learning_rate": 3.107015488112658e-05, "loss": 2.3178, "step": 77650 }, { "epoch": 2.8221527727305764, "grad_norm": 0.5269419550895691, "learning_rate": 3.1065120723209676e-05, "loss": 0.1035, "step": 77660 }, { "epoch": 2.8225161712333744, "grad_norm": 2.0776426792144775, "learning_rate": 3.10600863039683e-05, "loss": 0.1209, "step": 77670 }, { "epoch": 2.822879569736173, "grad_norm": 1.718711495399475, "learning_rate": 3.105505162361936e-05, "loss": 0.0931, "step": 77680 }, { "epoch": 2.823242968238971, "grad_norm": 0.9783419370651245, "learning_rate": 3.10500166823798e-05, "loss": 0.1366, "step": 77690 }, { "epoch": 2.8236063667417692, "grad_norm": 0.3474352955818176, "learning_rate": 3.1044981480466544e-05, "loss": 0.0823, "step": 77700 }, { "epoch": 2.823969765244567, "grad_norm": 0.4555971622467041, "learning_rate": 3.103994601809655e-05, "loss": 0.0809, "step": 77710 }, { "epoch": 2.824333163747365, "grad_norm": 0.5148143768310547, "learning_rate": 3.103491029548676e-05, "loss": 0.1185, "step": 77720 }, { "epoch": 2.8246965622501636, "grad_norm": 0.7520084381103516, "learning_rate": 3.102987431285416e-05, "loss": 0.0865, "step": 77730 }, { "epoch": 2.8250599607529616, "grad_norm": 2.1148874759674072, "learning_rate": 3.102483807041574e-05, "loss": 0.0923, "step": 77740 }, { "epoch": 2.82542335925576, "grad_norm": 0.8044182658195496, "learning_rate": 3.1019801568388476e-05, "loss": 0.0848, "step": 77750 }, { "epoch": 2.825786757758558, "grad_norm": 0.5779685974121094, "learning_rate": 3.1014764806989385e-05, "loss": 0.0902, "step": 77760 }, { "epoch": 2.826150156261356, "grad_norm": 0.9560330510139465, "learning_rate": 3.1009727786435474e-05, "loss": 1.5009, "step": 77770 }, { "epoch": 2.8265135547641544, "grad_norm": 0.7544772624969482, "learning_rate": 3.100469050694378e-05, "loss": 0.1239, "step": 77780 }, { "epoch": 2.8268769532669524, "grad_norm": 0.824269711971283, "learning_rate": 3.099965296873134e-05, "loss": 0.0934, "step": 77790 }, { "epoch": 2.827240351769751, "grad_norm": 0.6971185803413391, "learning_rate": 3.099461517201519e-05, "loss": 0.0923, "step": 77800 }, { "epoch": 2.827603750272549, "grad_norm": 0.81715989112854, "learning_rate": 3.098957711701241e-05, "loss": 0.1041, "step": 77810 }, { "epoch": 2.827967148775347, "grad_norm": 0.5701466798782349, "learning_rate": 3.098453880394006e-05, "loss": 0.1016, "step": 77820 }, { "epoch": 2.8283305472781453, "grad_norm": 6.4445881843566895, "learning_rate": 3.0979500233015224e-05, "loss": 0.0902, "step": 77830 }, { "epoch": 2.8286939457809432, "grad_norm": 2.981534004211426, "learning_rate": 3.0974461404455e-05, "loss": 0.1224, "step": 77840 }, { "epoch": 2.8290573442837417, "grad_norm": 0.878025472164154, "learning_rate": 3.096942231847649e-05, "loss": 0.0965, "step": 77850 }, { "epoch": 2.8294207427865397, "grad_norm": 0.45175373554229736, "learning_rate": 3.096438297529681e-05, "loss": 0.0821, "step": 77860 }, { "epoch": 2.8297841412893376, "grad_norm": 1.207901954650879, "learning_rate": 3.0959343375133096e-05, "loss": 0.1296, "step": 77870 }, { "epoch": 2.830147539792136, "grad_norm": 2.8622663021087646, "learning_rate": 3.0954303518202476e-05, "loss": 0.0949, "step": 77880 }, { "epoch": 2.8305109382949345, "grad_norm": 1.028940200805664, "learning_rate": 3.0949263404722104e-05, "loss": 0.1271, "step": 77890 }, { "epoch": 2.8308743367977325, "grad_norm": 0.8942374587059021, "learning_rate": 3.094422303490913e-05, "loss": 1.3726, "step": 77900 }, { "epoch": 2.8312377353005305, "grad_norm": 1.3904191255569458, "learning_rate": 3.093918240898075e-05, "loss": 0.0858, "step": 77910 }, { "epoch": 2.831601133803329, "grad_norm": 3.551011562347412, "learning_rate": 3.093414152715412e-05, "loss": 0.106, "step": 77920 }, { "epoch": 2.831964532306127, "grad_norm": 1.3634971380233765, "learning_rate": 3.092910038964645e-05, "loss": 0.1102, "step": 77930 }, { "epoch": 2.8323279308089253, "grad_norm": 0.9616494178771973, "learning_rate": 3.092405899667494e-05, "loss": 0.0946, "step": 77940 }, { "epoch": 2.8326913293117233, "grad_norm": 1.1661938428878784, "learning_rate": 3.0919017348456805e-05, "loss": 0.0848, "step": 77950 }, { "epoch": 2.8330547278145213, "grad_norm": 1.016788125038147, "learning_rate": 3.091397544520927e-05, "loss": 0.0958, "step": 77960 }, { "epoch": 2.8334181263173197, "grad_norm": 0.3791126012802124, "learning_rate": 3.090893328714958e-05, "loss": 0.0954, "step": 77970 }, { "epoch": 2.8337815248201177, "grad_norm": 1.1772645711898804, "learning_rate": 3.0903890874494975e-05, "loss": 0.117, "step": 77980 }, { "epoch": 2.834144923322916, "grad_norm": 0.7023350596427917, "learning_rate": 3.089884820746272e-05, "loss": 0.0857, "step": 77990 }, { "epoch": 2.834508321825714, "grad_norm": 0.8230845332145691, "learning_rate": 3.0893805286270085e-05, "loss": 0.085, "step": 78000 }, { "epoch": 2.834508321825714, "eval_loss": 0.34080591797828674, "eval_runtime": 179.5232, "eval_samples_per_second": 41.298, "eval_steps_per_second": 5.164, "eval_wer": 0.1498084847604698, "step": 78000 }, { "epoch": 2.834871720328512, "grad_norm": 0.7763819098472595, "learning_rate": 3.088876211113435e-05, "loss": 0.0876, "step": 78010 }, { "epoch": 2.8352351188313105, "grad_norm": 0.9296404719352722, "learning_rate": 3.088371868227281e-05, "loss": 0.115, "step": 78020 }, { "epoch": 2.8355985173341085, "grad_norm": 0.7724182605743408, "learning_rate": 3.087867499990276e-05, "loss": 0.1133, "step": 78030 }, { "epoch": 2.835961915836907, "grad_norm": 0.6081514954566956, "learning_rate": 3.087363106424152e-05, "loss": 0.1185, "step": 78040 }, { "epoch": 2.836325314339705, "grad_norm": 0.692362904548645, "learning_rate": 3.086858687550642e-05, "loss": 0.0796, "step": 78050 }, { "epoch": 2.836688712842503, "grad_norm": 0.7458900213241577, "learning_rate": 3.0863542433914794e-05, "loss": 0.0985, "step": 78060 }, { "epoch": 2.8370521113453013, "grad_norm": 0.2584981322288513, "learning_rate": 3.0858497739683984e-05, "loss": 0.102, "step": 78070 }, { "epoch": 2.8374155098480993, "grad_norm": 1.8372403383255005, "learning_rate": 3.085345279303136e-05, "loss": 0.1121, "step": 78080 }, { "epoch": 2.8377789083508977, "grad_norm": 2.2560524940490723, "learning_rate": 3.0848407594174266e-05, "loss": 0.1086, "step": 78090 }, { "epoch": 2.8381423068536957, "grad_norm": 1.1488791704177856, "learning_rate": 3.0843362143330104e-05, "loss": 1.6247, "step": 78100 }, { "epoch": 2.8385057053564937, "grad_norm": 1.206886649131775, "learning_rate": 3.083831644071626e-05, "loss": 0.0846, "step": 78110 }, { "epoch": 2.838869103859292, "grad_norm": 0.6960283517837524, "learning_rate": 3.083327048655013e-05, "loss": 0.1261, "step": 78120 }, { "epoch": 2.83923250236209, "grad_norm": 0.6910631656646729, "learning_rate": 3.082822428104914e-05, "loss": 0.0979, "step": 78130 }, { "epoch": 2.8395959008648886, "grad_norm": 1.3443272113800049, "learning_rate": 3.082317782443069e-05, "loss": 0.1082, "step": 78140 }, { "epoch": 2.8399592993676865, "grad_norm": 0.290623277425766, "learning_rate": 3.081813111691223e-05, "loss": 0.0773, "step": 78150 }, { "epoch": 2.8403226978704845, "grad_norm": 0.5689085721969604, "learning_rate": 3.08130841587112e-05, "loss": 0.0972, "step": 78160 }, { "epoch": 2.840686096373283, "grad_norm": 0.45181599259376526, "learning_rate": 3.080803695004506e-05, "loss": 0.1002, "step": 78170 }, { "epoch": 2.8410494948760814, "grad_norm": 0.31175151467323303, "learning_rate": 3.080298949113127e-05, "loss": 0.0951, "step": 78180 }, { "epoch": 2.8414128933788794, "grad_norm": 0.6608039140701294, "learning_rate": 3.0797941782187314e-05, "loss": 0.1207, "step": 78190 }, { "epoch": 2.8417762918816774, "grad_norm": 0.9375587701797485, "learning_rate": 3.079289382343068e-05, "loss": 0.0875, "step": 78200 }, { "epoch": 2.842139690384476, "grad_norm": 0.581164538860321, "learning_rate": 3.078784561507885e-05, "loss": 0.204, "step": 78210 }, { "epoch": 2.8425030888872738, "grad_norm": 0.8400561809539795, "learning_rate": 3.078279715734935e-05, "loss": 0.106, "step": 78220 }, { "epoch": 2.842866487390072, "grad_norm": 0.5431386232376099, "learning_rate": 3.07777484504597e-05, "loss": 0.1134, "step": 78230 }, { "epoch": 2.84322988589287, "grad_norm": 0.5342890620231628, "learning_rate": 3.077269949462742e-05, "loss": 0.1102, "step": 78240 }, { "epoch": 2.843593284395668, "grad_norm": 0.6654142737388611, "learning_rate": 3.076765029007006e-05, "loss": 0.0818, "step": 78250 }, { "epoch": 2.8439566828984666, "grad_norm": 0.49340054392814636, "learning_rate": 3.076260083700518e-05, "loss": 0.0899, "step": 78260 }, { "epoch": 2.8443200814012646, "grad_norm": 0.5866402387619019, "learning_rate": 3.0757551135650325e-05, "loss": 0.1185, "step": 78270 }, { "epoch": 2.844683479904063, "grad_norm": 0.45987945795059204, "learning_rate": 3.075250118622308e-05, "loss": 0.1405, "step": 78280 }, { "epoch": 2.845046878406861, "grad_norm": 1.2310110330581665, "learning_rate": 3.0747450988941025e-05, "loss": 0.1569, "step": 78290 }, { "epoch": 2.845410276909659, "grad_norm": 1.251125693321228, "learning_rate": 3.074240054402175e-05, "loss": 0.0897, "step": 78300 }, { "epoch": 2.8457736754124574, "grad_norm": 0.369094580411911, "learning_rate": 3.0737349851682876e-05, "loss": 0.0805, "step": 78310 }, { "epoch": 2.8461370739152554, "grad_norm": 0.8887357711791992, "learning_rate": 3.0732298912142e-05, "loss": 0.1093, "step": 78320 }, { "epoch": 2.846500472418054, "grad_norm": 0.629465639591217, "learning_rate": 3.072724772561677e-05, "loss": 0.1114, "step": 78330 }, { "epoch": 2.846863870920852, "grad_norm": 1.0231704711914062, "learning_rate": 3.072219629232481e-05, "loss": 0.1372, "step": 78340 }, { "epoch": 2.84722726942365, "grad_norm": 0.8119713664054871, "learning_rate": 3.071714461248377e-05, "loss": 0.0918, "step": 78350 }, { "epoch": 2.847590667926448, "grad_norm": 0.886022686958313, "learning_rate": 3.071209268631131e-05, "loss": 0.1005, "step": 78360 }, { "epoch": 2.847954066429246, "grad_norm": 0.7678380012512207, "learning_rate": 3.07070405140251e-05, "loss": 0.1497, "step": 78370 }, { "epoch": 2.8483174649320446, "grad_norm": 1.3270221948623657, "learning_rate": 3.070198809584283e-05, "loss": 0.1596, "step": 78380 }, { "epoch": 2.8486808634348426, "grad_norm": 0.3739996552467346, "learning_rate": 3.0696935431982165e-05, "loss": 0.1301, "step": 78390 }, { "epoch": 2.8490442619376406, "grad_norm": 1.079307198524475, "learning_rate": 3.0691882522660834e-05, "loss": 0.0879, "step": 78400 }, { "epoch": 2.849407660440439, "grad_norm": 2.8571724891662598, "learning_rate": 3.068682936809652e-05, "loss": 0.0914, "step": 78410 }, { "epoch": 2.849771058943237, "grad_norm": 0.8942508697509766, "learning_rate": 3.068177596850698e-05, "loss": 0.1367, "step": 78420 }, { "epoch": 2.8501344574460354, "grad_norm": 0.7308377027511597, "learning_rate": 3.0676722324109924e-05, "loss": 0.0927, "step": 78430 }, { "epoch": 2.8504978559488334, "grad_norm": 0.5024714469909668, "learning_rate": 3.06716684351231e-05, "loss": 0.114, "step": 78440 }, { "epoch": 2.8508612544516314, "grad_norm": 0.8501279354095459, "learning_rate": 3.066661430176426e-05, "loss": 0.0976, "step": 78450 }, { "epoch": 2.85122465295443, "grad_norm": 1.5030112266540527, "learning_rate": 3.066155992425118e-05, "loss": 0.0904, "step": 78460 }, { "epoch": 2.8515880514572283, "grad_norm": 0.9223312139511108, "learning_rate": 3.065650530280162e-05, "loss": 0.088, "step": 78470 }, { "epoch": 2.8519514499600263, "grad_norm": 0.8321495652198792, "learning_rate": 3.0651450437633375e-05, "loss": 0.0871, "step": 78480 }, { "epoch": 2.8523148484628242, "grad_norm": 0.5586594343185425, "learning_rate": 3.064639532896423e-05, "loss": 0.0893, "step": 78490 }, { "epoch": 2.8526782469656227, "grad_norm": 2.804748296737671, "learning_rate": 3.064133997701201e-05, "loss": 0.0791, "step": 78500 }, { "epoch": 2.8530416454684207, "grad_norm": 0.7210208177566528, "learning_rate": 3.063628438199453e-05, "loss": 0.0806, "step": 78510 }, { "epoch": 2.853405043971219, "grad_norm": 0.3784767687320709, "learning_rate": 3.063122854412959e-05, "loss": 0.1137, "step": 78520 }, { "epoch": 2.853768442474017, "grad_norm": 0.754591703414917, "learning_rate": 3.062617246363506e-05, "loss": 0.0763, "step": 78530 }, { "epoch": 2.854131840976815, "grad_norm": 0.5262603759765625, "learning_rate": 3.062111614072877e-05, "loss": 0.1768, "step": 78540 }, { "epoch": 2.8544952394796135, "grad_norm": 0.6021800637245178, "learning_rate": 3.0616059575628596e-05, "loss": 0.0806, "step": 78550 }, { "epoch": 2.8548586379824115, "grad_norm": 0.5301395654678345, "learning_rate": 3.061100276855239e-05, "loss": 0.1013, "step": 78560 }, { "epoch": 2.85522203648521, "grad_norm": 0.6743770837783813, "learning_rate": 3.060594571971804e-05, "loss": 0.0996, "step": 78570 }, { "epoch": 2.855585434988008, "grad_norm": 0.4729292094707489, "learning_rate": 3.060088842934344e-05, "loss": 0.1166, "step": 78580 }, { "epoch": 2.855948833490806, "grad_norm": 1.35919189453125, "learning_rate": 3.059583089764648e-05, "loss": 0.0929, "step": 78590 }, { "epoch": 2.8563122319936043, "grad_norm": 1.0586267709732056, "learning_rate": 3.059077312484507e-05, "loss": 0.1016, "step": 78600 }, { "epoch": 2.8563122319936043, "eval_loss": 0.3411506116390228, "eval_runtime": 179.4197, "eval_samples_per_second": 41.322, "eval_steps_per_second": 5.167, "eval_wer": 0.1493818868335542, "step": 78600 }, { "epoch": 2.8566756304964023, "grad_norm": 1.362656593322754, "learning_rate": 3.0585715111157145e-05, "loss": 0.0997, "step": 78610 }, { "epoch": 2.8570390289992007, "grad_norm": 0.3799169361591339, "learning_rate": 3.058065685680063e-05, "loss": 0.0939, "step": 78620 }, { "epoch": 2.8574024275019987, "grad_norm": 0.5108311772346497, "learning_rate": 3.0575598361993476e-05, "loss": 0.1104, "step": 78630 }, { "epoch": 2.8577658260047967, "grad_norm": 0.7714293599128723, "learning_rate": 3.057053962695361e-05, "loss": 0.1018, "step": 78640 }, { "epoch": 2.858129224507595, "grad_norm": 0.46769365668296814, "learning_rate": 3.056548065189902e-05, "loss": 0.0867, "step": 78650 }, { "epoch": 2.858492623010393, "grad_norm": 0.6245691180229187, "learning_rate": 3.056042143704767e-05, "loss": 0.1026, "step": 78660 }, { "epoch": 2.8588560215131915, "grad_norm": 0.45852673053741455, "learning_rate": 3.055536198261755e-05, "loss": 0.3827, "step": 78670 }, { "epoch": 2.8592194200159895, "grad_norm": 1.1499156951904297, "learning_rate": 3.055030228882663e-05, "loss": 0.1123, "step": 78680 }, { "epoch": 2.8595828185187875, "grad_norm": 0.426826536655426, "learning_rate": 3.054524235589295e-05, "loss": 0.095, "step": 78690 }, { "epoch": 2.859946217021586, "grad_norm": 0.8750178217887878, "learning_rate": 3.05401821840345e-05, "loss": 0.0875, "step": 78700 }, { "epoch": 2.860309615524384, "grad_norm": 0.49052000045776367, "learning_rate": 3.053512177346932e-05, "loss": 0.0902, "step": 78710 }, { "epoch": 2.8606730140271823, "grad_norm": 0.41709479689598083, "learning_rate": 3.0530061124415426e-05, "loss": 0.1248, "step": 78720 }, { "epoch": 2.8610364125299803, "grad_norm": 1.0585765838623047, "learning_rate": 3.052500023709088e-05, "loss": 0.0996, "step": 78730 }, { "epoch": 2.8613998110327783, "grad_norm": 1.3335462808609009, "learning_rate": 3.051993911171373e-05, "loss": 0.1188, "step": 78740 }, { "epoch": 2.8617632095355767, "grad_norm": 4.254768371582031, "learning_rate": 3.051487774850204e-05, "loss": 1.4491, "step": 78750 }, { "epoch": 2.862126608038375, "grad_norm": 0.6429024934768677, "learning_rate": 3.0509816147673897e-05, "loss": 0.0981, "step": 78760 }, { "epoch": 2.862490006541173, "grad_norm": 0.5360010266304016, "learning_rate": 3.050475430944738e-05, "loss": 0.1222, "step": 78770 }, { "epoch": 2.862853405043971, "grad_norm": 0.7741981148719788, "learning_rate": 3.0499692234040577e-05, "loss": 0.0939, "step": 78780 }, { "epoch": 2.8632168035467696, "grad_norm": 0.3562833368778229, "learning_rate": 3.0494629921671614e-05, "loss": 0.1132, "step": 78790 }, { "epoch": 2.8635802020495675, "grad_norm": 0.3981534242630005, "learning_rate": 3.0489567372558585e-05, "loss": 0.1176, "step": 78800 }, { "epoch": 2.863943600552366, "grad_norm": 0.6364398002624512, "learning_rate": 3.0484504586919643e-05, "loss": 0.1225, "step": 78810 }, { "epoch": 2.864306999055164, "grad_norm": 0.9307785034179688, "learning_rate": 3.047944156497291e-05, "loss": 0.1134, "step": 78820 }, { "epoch": 2.864670397557962, "grad_norm": 0.3588425815105438, "learning_rate": 3.047437830693653e-05, "loss": 0.1112, "step": 78830 }, { "epoch": 2.8650337960607604, "grad_norm": 2.3050026893615723, "learning_rate": 3.0469314813028672e-05, "loss": 0.1121, "step": 78840 }, { "epoch": 2.8653971945635583, "grad_norm": 3.113504648208618, "learning_rate": 3.0464251083467492e-05, "loss": 0.085, "step": 78850 }, { "epoch": 2.8657605930663568, "grad_norm": 1.4952332973480225, "learning_rate": 3.0459187118471177e-05, "loss": 0.0785, "step": 78860 }, { "epoch": 2.8661239915691548, "grad_norm": 3.147885322570801, "learning_rate": 3.0454122918257915e-05, "loss": 0.1065, "step": 78870 }, { "epoch": 2.8664873900719527, "grad_norm": 1.8741129636764526, "learning_rate": 3.0449058483045906e-05, "loss": 0.1066, "step": 78880 }, { "epoch": 2.866850788574751, "grad_norm": 0.5779256224632263, "learning_rate": 3.044399381305335e-05, "loss": 0.1182, "step": 78890 }, { "epoch": 2.867214187077549, "grad_norm": 0.8008689880371094, "learning_rate": 3.043892890849847e-05, "loss": 0.0872, "step": 78900 }, { "epoch": 2.8675775855803476, "grad_norm": 0.38459739089012146, "learning_rate": 3.0433863769599498e-05, "loss": 0.0808, "step": 78910 }, { "epoch": 2.8679409840831456, "grad_norm": 0.524728536605835, "learning_rate": 3.0428798396574663e-05, "loss": 0.0871, "step": 78920 }, { "epoch": 2.8683043825859436, "grad_norm": 0.9773525595664978, "learning_rate": 3.042373278964223e-05, "loss": 0.0752, "step": 78930 }, { "epoch": 2.868667781088742, "grad_norm": 2.2767083644866943, "learning_rate": 3.041866694902045e-05, "loss": 0.164, "step": 78940 }, { "epoch": 2.86903117959154, "grad_norm": 1.0827412605285645, "learning_rate": 3.0413600874927578e-05, "loss": 0.0826, "step": 78950 }, { "epoch": 2.8693945780943384, "grad_norm": 0.5473418831825256, "learning_rate": 3.040853456758192e-05, "loss": 0.0948, "step": 78960 }, { "epoch": 2.8697579765971364, "grad_norm": 1.4309738874435425, "learning_rate": 3.0403468027201742e-05, "loss": 0.1169, "step": 78970 }, { "epoch": 2.8701213750999344, "grad_norm": 0.6939014196395874, "learning_rate": 3.0398401254005353e-05, "loss": 0.0897, "step": 78980 }, { "epoch": 2.870484773602733, "grad_norm": 1.1855500936508179, "learning_rate": 3.0393334248211064e-05, "loss": 0.115, "step": 78990 }, { "epoch": 2.870848172105531, "grad_norm": 0.3316340446472168, "learning_rate": 3.0388267010037193e-05, "loss": 0.0837, "step": 79000 }, { "epoch": 2.871211570608329, "grad_norm": 0.5338824987411499, "learning_rate": 3.0383199539702067e-05, "loss": 1.6163, "step": 79010 }, { "epoch": 2.871574969111127, "grad_norm": 0.6817033290863037, "learning_rate": 3.0378131837424024e-05, "loss": 0.0984, "step": 79020 }, { "epoch": 2.871938367613925, "grad_norm": 1.228437066078186, "learning_rate": 3.0373063903421416e-05, "loss": 0.088, "step": 79030 }, { "epoch": 2.8723017661167236, "grad_norm": 1.157533884048462, "learning_rate": 3.0367995737912604e-05, "loss": 0.1136, "step": 79040 }, { "epoch": 2.872665164619522, "grad_norm": 0.695222795009613, "learning_rate": 3.0362927341115954e-05, "loss": 0.074, "step": 79050 }, { "epoch": 2.87302856312232, "grad_norm": 0.658486008644104, "learning_rate": 3.0357858713249844e-05, "loss": 0.9512, "step": 79060 }, { "epoch": 2.873391961625118, "grad_norm": 0.7332690358161926, "learning_rate": 3.035278985453267e-05, "loss": 0.12, "step": 79070 }, { "epoch": 2.8737553601279164, "grad_norm": 0.8998427987098694, "learning_rate": 3.034772076518283e-05, "loss": 0.087, "step": 79080 }, { "epoch": 2.8741187586307144, "grad_norm": 0.6652089953422546, "learning_rate": 3.034265144541872e-05, "loss": 0.1126, "step": 79090 }, { "epoch": 2.874482157133513, "grad_norm": 0.5899316668510437, "learning_rate": 3.0337581895458773e-05, "loss": 0.097, "step": 79100 }, { "epoch": 2.874845555636311, "grad_norm": 2.4437944889068604, "learning_rate": 3.0332512115521407e-05, "loss": 0.0967, "step": 79110 }, { "epoch": 2.875208954139109, "grad_norm": 1.678469181060791, "learning_rate": 3.0327442105825076e-05, "loss": 0.1091, "step": 79120 }, { "epoch": 2.8755723526419072, "grad_norm": 2.5215954780578613, "learning_rate": 3.032237186658821e-05, "loss": 0.0954, "step": 79130 }, { "epoch": 2.8759357511447052, "grad_norm": 0.5362206697463989, "learning_rate": 3.031730139802929e-05, "loss": 0.0925, "step": 79140 }, { "epoch": 2.8762991496475037, "grad_norm": 0.8719716668128967, "learning_rate": 3.0312230700366766e-05, "loss": 0.0721, "step": 79150 }, { "epoch": 2.8766625481503016, "grad_norm": 0.5796521306037903, "learning_rate": 3.030715977381912e-05, "loss": 0.085, "step": 79160 }, { "epoch": 2.8770259466530996, "grad_norm": 0.5834314227104187, "learning_rate": 3.0302088618604844e-05, "loss": 0.0967, "step": 79170 }, { "epoch": 2.877389345155898, "grad_norm": 0.9963647127151489, "learning_rate": 3.029701723494243e-05, "loss": 0.1186, "step": 79180 }, { "epoch": 2.877752743658696, "grad_norm": 0.852916419506073, "learning_rate": 3.02919456230504e-05, "loss": 0.1209, "step": 79190 }, { "epoch": 2.8781161421614945, "grad_norm": 0.6775915622711182, "learning_rate": 3.028687378314725e-05, "loss": 0.093, "step": 79200 }, { "epoch": 2.8781161421614945, "eval_loss": 0.3184477686882019, "eval_runtime": 179.9919, "eval_samples_per_second": 41.191, "eval_steps_per_second": 5.15, "eval_wer": 0.14663169168769402, "step": 79200 }, { "epoch": 2.8784795406642925, "grad_norm": 1.629595160484314, "learning_rate": 3.028180171545153e-05, "loss": 0.0897, "step": 79210 }, { "epoch": 2.8788429391670904, "grad_norm": 0.8693638443946838, "learning_rate": 3.027672942018176e-05, "loss": 0.8562, "step": 79220 }, { "epoch": 2.879206337669889, "grad_norm": 0.8512022495269775, "learning_rate": 3.02716568975565e-05, "loss": 0.0919, "step": 79230 }, { "epoch": 2.879569736172687, "grad_norm": 0.3746062219142914, "learning_rate": 3.0266584147794295e-05, "loss": 0.1221, "step": 79240 }, { "epoch": 2.8799331346754853, "grad_norm": 0.9358767867088318, "learning_rate": 3.026151117111372e-05, "loss": 0.1888, "step": 79250 }, { "epoch": 2.8802965331782833, "grad_norm": 0.5931565165519714, "learning_rate": 3.025643796773335e-05, "loss": 0.1583, "step": 79260 }, { "epoch": 2.8806599316810813, "grad_norm": 0.6608014702796936, "learning_rate": 3.0251364537871767e-05, "loss": 0.1107, "step": 79270 }, { "epoch": 2.8810233301838797, "grad_norm": 0.8677799105644226, "learning_rate": 3.0246290881747574e-05, "loss": 0.1313, "step": 79280 }, { "epoch": 2.8813867286866777, "grad_norm": 1.321589469909668, "learning_rate": 3.0241216999579368e-05, "loss": 0.1476, "step": 79290 }, { "epoch": 2.881750127189476, "grad_norm": 1.2019727230072021, "learning_rate": 3.0236142891585777e-05, "loss": 0.0955, "step": 79300 }, { "epoch": 2.882113525692274, "grad_norm": 0.7772002220153809, "learning_rate": 3.023106855798542e-05, "loss": 0.0799, "step": 79310 }, { "epoch": 2.882476924195072, "grad_norm": 0.4837106764316559, "learning_rate": 3.0225993998996925e-05, "loss": 0.1102, "step": 79320 }, { "epoch": 2.8828403226978705, "grad_norm": 1.1968311071395874, "learning_rate": 3.0220919214838946e-05, "loss": 0.0921, "step": 79330 }, { "epoch": 2.883203721200669, "grad_norm": 0.2687516510486603, "learning_rate": 3.021584420573013e-05, "loss": 0.4854, "step": 79340 }, { "epoch": 2.883567119703467, "grad_norm": 0.661100447177887, "learning_rate": 3.0210768971889152e-05, "loss": 0.0993, "step": 79350 }, { "epoch": 2.883930518206265, "grad_norm": 0.4699995219707489, "learning_rate": 3.0205693513534672e-05, "loss": 0.1043, "step": 79360 }, { "epoch": 2.8842939167090633, "grad_norm": 0.5575037598609924, "learning_rate": 3.0200617830885386e-05, "loss": 0.113, "step": 79370 }, { "epoch": 2.8846573152118613, "grad_norm": 0.5147402286529541, "learning_rate": 3.0195541924159974e-05, "loss": 0.0983, "step": 79380 }, { "epoch": 2.8850207137146597, "grad_norm": 0.8548463582992554, "learning_rate": 3.0190465793577155e-05, "loss": 0.0788, "step": 79390 }, { "epoch": 2.8853841122174577, "grad_norm": 0.46500858664512634, "learning_rate": 3.018538943935563e-05, "loss": 0.0641, "step": 79400 }, { "epoch": 2.8857475107202557, "grad_norm": 0.479408860206604, "learning_rate": 3.018031286171412e-05, "loss": 0.1539, "step": 79410 }, { "epoch": 2.886110909223054, "grad_norm": 0.9867441654205322, "learning_rate": 3.0175236060871366e-05, "loss": 0.0999, "step": 79420 }, { "epoch": 2.886474307725852, "grad_norm": 0.8628416061401367, "learning_rate": 3.0170159037046096e-05, "loss": 0.1166, "step": 79430 }, { "epoch": 2.8868377062286505, "grad_norm": 1.9563437700271606, "learning_rate": 3.0165081790457077e-05, "loss": 0.1211, "step": 79440 }, { "epoch": 2.8872011047314485, "grad_norm": 0.6726937890052795, "learning_rate": 3.0160004321323053e-05, "loss": 0.0926, "step": 79450 }, { "epoch": 2.8875645032342465, "grad_norm": 0.509483814239502, "learning_rate": 3.0154926629862813e-05, "loss": 0.0803, "step": 79460 }, { "epoch": 2.887927901737045, "grad_norm": 0.7619544863700867, "learning_rate": 3.014984871629512e-05, "loss": 0.1251, "step": 79470 }, { "epoch": 2.888291300239843, "grad_norm": 1.0748878717422485, "learning_rate": 3.0144770580838767e-05, "loss": 0.1073, "step": 79480 }, { "epoch": 2.8886546987426414, "grad_norm": 0.7058316469192505, "learning_rate": 3.0139692223712555e-05, "loss": 0.1058, "step": 79490 }, { "epoch": 2.8890180972454393, "grad_norm": 1.453240990638733, "learning_rate": 3.013461364513529e-05, "loss": 0.105, "step": 79500 }, { "epoch": 2.8893814957482373, "grad_norm": 1.7408169507980347, "learning_rate": 3.0129534845325803e-05, "loss": 1.6102, "step": 79510 }, { "epoch": 2.8897448942510358, "grad_norm": 0.4194059669971466, "learning_rate": 3.0124455824502894e-05, "loss": 0.0936, "step": 79520 }, { "epoch": 2.8901082927538337, "grad_norm": 7.034887790679932, "learning_rate": 3.0119376582885427e-05, "loss": 0.1072, "step": 79530 }, { "epoch": 2.890471691256632, "grad_norm": 1.7293283939361572, "learning_rate": 3.0114297120692236e-05, "loss": 0.1205, "step": 79540 }, { "epoch": 2.89083508975943, "grad_norm": 3.1167123317718506, "learning_rate": 3.0109217438142184e-05, "loss": 0.0817, "step": 79550 }, { "epoch": 2.891198488262228, "grad_norm": 1.0231302976608276, "learning_rate": 3.0104137535454124e-05, "loss": 0.1408, "step": 79560 }, { "epoch": 2.8915618867650266, "grad_norm": 0.3560333251953125, "learning_rate": 3.0099057412846942e-05, "loss": 0.127, "step": 79570 }, { "epoch": 2.8919252852678246, "grad_norm": 0.43482980132102966, "learning_rate": 3.009397707053952e-05, "loss": 0.079, "step": 79580 }, { "epoch": 2.892288683770623, "grad_norm": 0.8485931158065796, "learning_rate": 3.008889650875074e-05, "loss": 0.1206, "step": 79590 }, { "epoch": 2.892652082273421, "grad_norm": 2.297492504119873, "learning_rate": 3.0083815727699526e-05, "loss": 0.0991, "step": 79600 }, { "epoch": 2.893015480776219, "grad_norm": 0.5528286695480347, "learning_rate": 3.0078734727604775e-05, "loss": 0.0757, "step": 79610 }, { "epoch": 2.8933788792790174, "grad_norm": 0.5743618607521057, "learning_rate": 3.0073653508685424e-05, "loss": 0.1121, "step": 79620 }, { "epoch": 2.893742277781816, "grad_norm": 0.8158531785011292, "learning_rate": 3.006857207116039e-05, "loss": 0.1045, "step": 79630 }, { "epoch": 2.894105676284614, "grad_norm": 0.7049798369407654, "learning_rate": 3.0063490415248613e-05, "loss": 0.1012, "step": 79640 }, { "epoch": 2.8944690747874118, "grad_norm": 0.3521101772785187, "learning_rate": 3.0058408541169057e-05, "loss": 0.0765, "step": 79650 }, { "epoch": 2.89483247329021, "grad_norm": 1.0171892642974854, "learning_rate": 3.0053326449140674e-05, "loss": 0.1011, "step": 79660 }, { "epoch": 2.895195871793008, "grad_norm": 0.5262839794158936, "learning_rate": 3.0048244139382438e-05, "loss": 0.1322, "step": 79670 }, { "epoch": 2.8955592702958066, "grad_norm": 1.3580801486968994, "learning_rate": 3.0043161612113313e-05, "loss": 0.0946, "step": 79680 }, { "epoch": 2.8959226687986046, "grad_norm": 1.4954004287719727, "learning_rate": 3.0038078867552306e-05, "loss": 0.0929, "step": 79690 }, { "epoch": 2.8962860673014026, "grad_norm": 0.6515393257141113, "learning_rate": 3.0032995905918405e-05, "loss": 0.1044, "step": 79700 }, { "epoch": 2.896649465804201, "grad_norm": 0.647331953048706, "learning_rate": 3.002791272743061e-05, "loss": 0.0747, "step": 79710 }, { "epoch": 2.897012864306999, "grad_norm": 0.9838120341300964, "learning_rate": 3.0022829332307962e-05, "loss": 0.1169, "step": 79720 }, { "epoch": 2.8973762628097974, "grad_norm": 0.6499975919723511, "learning_rate": 3.001774572076945e-05, "loss": 0.1144, "step": 79730 }, { "epoch": 2.8977396613125954, "grad_norm": 0.8443338871002197, "learning_rate": 3.0012661893034143e-05, "loss": 0.1184, "step": 79740 }, { "epoch": 2.8981030598153934, "grad_norm": 0.49458226561546326, "learning_rate": 3.0007577849321062e-05, "loss": 0.0847, "step": 79750 }, { "epoch": 2.898466458318192, "grad_norm": 0.5407196283340454, "learning_rate": 3.0002493589849272e-05, "loss": 0.0981, "step": 79760 }, { "epoch": 2.89882985682099, "grad_norm": 0.4729011654853821, "learning_rate": 2.9997409114837833e-05, "loss": 0.2098, "step": 79770 }, { "epoch": 2.8991932553237882, "grad_norm": 0.9962542653083801, "learning_rate": 2.9992324424505815e-05, "loss": 0.1022, "step": 79780 }, { "epoch": 2.8995566538265862, "grad_norm": 0.4466484487056732, "learning_rate": 2.9987239519072296e-05, "loss": 0.0995, "step": 79790 }, { "epoch": 2.899920052329384, "grad_norm": 1.0443413257598877, "learning_rate": 2.9982154398756372e-05, "loss": 0.0956, "step": 79800 }, { "epoch": 2.899920052329384, "eval_loss": 0.33830875158309937, "eval_runtime": 179.6694, "eval_samples_per_second": 41.265, "eval_steps_per_second": 5.159, "eval_wer": 0.14741227512843322, "step": 79800 }, { "epoch": 2.9002834508321826, "grad_norm": 0.4343029856681824, "learning_rate": 2.9977577606928674e-05, "loss": 3.598, "step": 79810 }, { "epoch": 2.9006468493349806, "grad_norm": 0.4609208405017853, "learning_rate": 2.9972492078939808e-05, "loss": 0.3644, "step": 79820 }, { "epoch": 2.901010247837779, "grad_norm": 0.8700145483016968, "learning_rate": 2.9967406336703952e-05, "loss": 0.1192, "step": 79830 }, { "epoch": 2.901373646340577, "grad_norm": 0.6192082762718201, "learning_rate": 2.9962320380440228e-05, "loss": 0.1008, "step": 79840 }, { "epoch": 2.901737044843375, "grad_norm": 0.41174137592315674, "learning_rate": 2.995723421036778e-05, "loss": 0.1181, "step": 79850 }, { "epoch": 2.9021004433461735, "grad_norm": 3.049891710281372, "learning_rate": 2.9952147826705745e-05, "loss": 0.0981, "step": 79860 }, { "epoch": 2.9024638418489714, "grad_norm": 0.4502425193786621, "learning_rate": 2.9947061229673275e-05, "loss": 0.1306, "step": 79870 }, { "epoch": 2.90282724035177, "grad_norm": 0.5543062686920166, "learning_rate": 2.9941974419489545e-05, "loss": 0.0982, "step": 79880 }, { "epoch": 2.903190638854568, "grad_norm": 0.9228424429893494, "learning_rate": 2.9936887396373715e-05, "loss": 0.1304, "step": 79890 }, { "epoch": 2.903554037357366, "grad_norm": 0.6268784999847412, "learning_rate": 2.9931800160544975e-05, "loss": 0.0823, "step": 79900 }, { "epoch": 2.9039174358601643, "grad_norm": 0.6508156657218933, "learning_rate": 2.9926712712222516e-05, "loss": 0.0938, "step": 79910 }, { "epoch": 2.9042808343629627, "grad_norm": 0.45321550965309143, "learning_rate": 2.9921625051625533e-05, "loss": 0.1246, "step": 79920 }, { "epoch": 2.9046442328657607, "grad_norm": 0.6320390105247498, "learning_rate": 2.9916537178973242e-05, "loss": 0.0835, "step": 79930 }, { "epoch": 2.9050076313685587, "grad_norm": 0.7819294929504395, "learning_rate": 2.9911449094484852e-05, "loss": 0.1086, "step": 79940 }, { "epoch": 2.905371029871357, "grad_norm": 0.7224891185760498, "learning_rate": 2.9906360798379594e-05, "loss": 0.0825, "step": 79950 }, { "epoch": 2.905734428374155, "grad_norm": 0.7941370606422424, "learning_rate": 2.990127229087671e-05, "loss": 0.0839, "step": 79960 }, { "epoch": 2.9060978268769535, "grad_norm": 0.5782437920570374, "learning_rate": 2.9896183572195442e-05, "loss": 0.097, "step": 79970 }, { "epoch": 2.9064612253797515, "grad_norm": 1.7239668369293213, "learning_rate": 2.989109464255504e-05, "loss": 0.0919, "step": 79980 }, { "epoch": 2.9068246238825495, "grad_norm": 1.2387197017669678, "learning_rate": 2.988600550217478e-05, "loss": 0.0851, "step": 79990 }, { "epoch": 2.907188022385348, "grad_norm": 2.639697313308716, "learning_rate": 2.9880916151273926e-05, "loss": 0.0829, "step": 80000 }, { "epoch": 2.907551420888146, "grad_norm": 0.9989453554153442, "learning_rate": 2.9875826590071754e-05, "loss": 0.0789, "step": 80010 }, { "epoch": 2.9079148193909443, "grad_norm": 1.7626229524612427, "learning_rate": 2.987073681878757e-05, "loss": 0.0905, "step": 80020 }, { "epoch": 2.9082782178937423, "grad_norm": 2.506680488586426, "learning_rate": 2.986564683764066e-05, "loss": 0.1006, "step": 80030 }, { "epoch": 2.9086416163965403, "grad_norm": 1.299718976020813, "learning_rate": 2.9860556646850347e-05, "loss": 0.1024, "step": 80040 }, { "epoch": 2.9090050148993387, "grad_norm": 1.0177974700927734, "learning_rate": 2.9855466246635943e-05, "loss": 0.0788, "step": 80050 }, { "epoch": 2.9093684134021367, "grad_norm": 0.6795012354850769, "learning_rate": 2.9850375637216767e-05, "loss": 0.1015, "step": 80060 }, { "epoch": 2.909731811904935, "grad_norm": 0.24664345383644104, "learning_rate": 2.9845284818812164e-05, "loss": 0.0944, "step": 80070 }, { "epoch": 2.910095210407733, "grad_norm": 0.5156140923500061, "learning_rate": 2.984019379164148e-05, "loss": 0.0917, "step": 80080 }, { "epoch": 2.910458608910531, "grad_norm": 1.331092357635498, "learning_rate": 2.9835102555924065e-05, "loss": 0.1069, "step": 80090 }, { "epoch": 2.9108220074133295, "grad_norm": 0.8283532857894897, "learning_rate": 2.983001111187928e-05, "loss": 0.0973, "step": 80100 }, { "epoch": 2.9111854059161275, "grad_norm": 0.8295063972473145, "learning_rate": 2.9824919459726507e-05, "loss": 0.0805, "step": 80110 }, { "epoch": 2.911548804418926, "grad_norm": 1.1419576406478882, "learning_rate": 2.981982759968513e-05, "loss": 0.0974, "step": 80120 }, { "epoch": 2.911912202921724, "grad_norm": 0.5481380224227905, "learning_rate": 2.9814735531974513e-05, "loss": 0.1007, "step": 80130 }, { "epoch": 2.912275601424522, "grad_norm": 1.488004207611084, "learning_rate": 2.9809643256814092e-05, "loss": 0.1599, "step": 80140 }, { "epoch": 2.9126389999273203, "grad_norm": 1.346227765083313, "learning_rate": 2.980455077442324e-05, "loss": 0.0826, "step": 80150 }, { "epoch": 2.9130023984301183, "grad_norm": 0.6229421496391296, "learning_rate": 2.9799458085021396e-05, "loss": 0.1042, "step": 80160 }, { "epoch": 2.9133657969329168, "grad_norm": 0.7724307179450989, "learning_rate": 2.979436518882798e-05, "loss": 0.1068, "step": 80170 }, { "epoch": 2.9137291954357147, "grad_norm": 0.4120637774467468, "learning_rate": 2.9789272086062426e-05, "loss": 0.0949, "step": 80180 }, { "epoch": 2.9140925939385127, "grad_norm": 0.8677302598953247, "learning_rate": 2.9784178776944178e-05, "loss": 0.1253, "step": 80190 }, { "epoch": 2.914455992441311, "grad_norm": 0.5817800760269165, "learning_rate": 2.9779085261692686e-05, "loss": 0.0754, "step": 80200 }, { "epoch": 2.9148193909441096, "grad_norm": 2.1507725715637207, "learning_rate": 2.977399154052742e-05, "loss": 0.2402, "step": 80210 }, { "epoch": 2.9151827894469076, "grad_norm": 0.4526066184043884, "learning_rate": 2.9768897613667836e-05, "loss": 0.1243, "step": 80220 }, { "epoch": 2.9155461879497055, "grad_norm": 1.391575574874878, "learning_rate": 2.976380348133343e-05, "loss": 0.1129, "step": 80230 }, { "epoch": 2.915909586452504, "grad_norm": 0.7592736482620239, "learning_rate": 2.9758709143743678e-05, "loss": 0.1376, "step": 80240 }, { "epoch": 2.916272984955302, "grad_norm": 0.671796441078186, "learning_rate": 2.975361460111808e-05, "loss": 0.0608, "step": 80250 }, { "epoch": 2.9166363834581004, "grad_norm": 2.7102513313293457, "learning_rate": 2.9748519853676142e-05, "loss": 0.0919, "step": 80260 }, { "epoch": 2.9169997819608984, "grad_norm": 1.3529448509216309, "learning_rate": 2.974342490163738e-05, "loss": 0.1079, "step": 80270 }, { "epoch": 2.9173631804636964, "grad_norm": 0.8166323900222778, "learning_rate": 2.9738329745221317e-05, "loss": 0.076, "step": 80280 }, { "epoch": 2.917726578966495, "grad_norm": 0.824936032295227, "learning_rate": 2.973323438464748e-05, "loss": 0.1155, "step": 80290 }, { "epoch": 2.9180899774692928, "grad_norm": 0.4837649166584015, "learning_rate": 2.972813882013542e-05, "loss": 0.0775, "step": 80300 }, { "epoch": 2.918453375972091, "grad_norm": 1.6542714834213257, "learning_rate": 2.9723043051904676e-05, "loss": 0.1013, "step": 80310 }, { "epoch": 2.918816774474889, "grad_norm": 0.2745031416416168, "learning_rate": 2.9717947080174813e-05, "loss": 0.0914, "step": 80320 }, { "epoch": 2.919180172977687, "grad_norm": 1.0925464630126953, "learning_rate": 2.9712850905165402e-05, "loss": 0.1577, "step": 80330 }, { "epoch": 2.9195435714804856, "grad_norm": 0.6618020534515381, "learning_rate": 2.9707754527096004e-05, "loss": 0.1105, "step": 80340 }, { "epoch": 2.9199069699832836, "grad_norm": 0.9204813838005066, "learning_rate": 2.9702657946186223e-05, "loss": 0.0855, "step": 80350 }, { "epoch": 2.920270368486082, "grad_norm": 0.6669716835021973, "learning_rate": 2.9697561162655634e-05, "loss": 0.1003, "step": 80360 }, { "epoch": 2.92063376698888, "grad_norm": 0.4183257818222046, "learning_rate": 2.9692464176723855e-05, "loss": 0.0998, "step": 80370 }, { "epoch": 2.920997165491678, "grad_norm": 6.260075569152832, "learning_rate": 2.9687366988610493e-05, "loss": 0.1007, "step": 80380 }, { "epoch": 2.9213605639944764, "grad_norm": 2.819657325744629, "learning_rate": 2.9682269598535162e-05, "loss": 0.0982, "step": 80390 }, { "epoch": 2.9217239624972744, "grad_norm": 0.5224602818489075, "learning_rate": 2.967717200671749e-05, "loss": 0.1009, "step": 80400 }, { "epoch": 2.9217239624972744, "eval_loss": 0.3313393294811249, "eval_runtime": 180.6766, "eval_samples_per_second": 41.035, "eval_steps_per_second": 5.131, "eval_wer": 0.14564234755931527, "step": 80400 }, { "epoch": 2.922087361000073, "grad_norm": 0.504077672958374, "learning_rate": 2.9672074213377122e-05, "loss": 0.0892, "step": 80410 }, { "epoch": 2.922450759502871, "grad_norm": 0.34101128578186035, "learning_rate": 2.96669762187337e-05, "loss": 0.1225, "step": 80420 }, { "epoch": 2.922814158005669, "grad_norm": 0.7131312489509583, "learning_rate": 2.966187802300688e-05, "loss": 0.0804, "step": 80430 }, { "epoch": 2.9231775565084672, "grad_norm": 1.1708521842956543, "learning_rate": 2.9656779626416314e-05, "loss": 0.1506, "step": 80440 }, { "epoch": 2.923540955011265, "grad_norm": 1.453748106956482, "learning_rate": 2.965168102918169e-05, "loss": 0.1353, "step": 80450 }, { "epoch": 2.9239043535140636, "grad_norm": 0.7086220979690552, "learning_rate": 2.964658223152268e-05, "loss": 0.0892, "step": 80460 }, { "epoch": 2.9242677520168616, "grad_norm": 0.8955743312835693, "learning_rate": 2.9641483233658973e-05, "loss": 0.0989, "step": 80470 }, { "epoch": 2.9246311505196596, "grad_norm": 0.5515048503875732, "learning_rate": 2.9636384035810265e-05, "loss": 0.1187, "step": 80480 }, { "epoch": 2.924994549022458, "grad_norm": 1.164106011390686, "learning_rate": 2.963128463819627e-05, "loss": 0.1057, "step": 80490 }, { "epoch": 2.9253579475252565, "grad_norm": 0.8334591388702393, "learning_rate": 2.9626185041036696e-05, "loss": 0.4326, "step": 80500 }, { "epoch": 2.9257213460280544, "grad_norm": 0.49100545048713684, "learning_rate": 2.9621085244551272e-05, "loss": 0.087, "step": 80510 }, { "epoch": 2.9260847445308524, "grad_norm": 0.5234443545341492, "learning_rate": 2.9615985248959722e-05, "loss": 0.0982, "step": 80520 }, { "epoch": 2.926448143033651, "grad_norm": 1.0264884233474731, "learning_rate": 2.961088505448179e-05, "loss": 0.0965, "step": 80530 }, { "epoch": 2.926811541536449, "grad_norm": 0.7391953468322754, "learning_rate": 2.9605784661337233e-05, "loss": 0.4112, "step": 80540 }, { "epoch": 2.9271749400392473, "grad_norm": 4.123349666595459, "learning_rate": 2.9600684069745803e-05, "loss": 0.0957, "step": 80550 }, { "epoch": 2.9275383385420453, "grad_norm": 2.021984338760376, "learning_rate": 2.9595583279927257e-05, "loss": 0.0952, "step": 80560 }, { "epoch": 2.9279017370448432, "grad_norm": 0.546364426612854, "learning_rate": 2.9590482292101383e-05, "loss": 0.1167, "step": 80570 }, { "epoch": 2.9282651355476417, "grad_norm": 0.6582480669021606, "learning_rate": 2.9585381106487963e-05, "loss": 0.1149, "step": 80580 }, { "epoch": 2.9286285340504397, "grad_norm": 0.6442203521728516, "learning_rate": 2.958027972330678e-05, "loss": 0.114, "step": 80590 }, { "epoch": 2.928991932553238, "grad_norm": 0.36104217171669006, "learning_rate": 2.9575178142777643e-05, "loss": 0.1073, "step": 80600 }, { "epoch": 2.929355331056036, "grad_norm": 1.933703064918518, "learning_rate": 2.9570076365120363e-05, "loss": 0.1025, "step": 80610 }, { "epoch": 2.929718729558834, "grad_norm": 0.6946010589599609, "learning_rate": 2.956497439055475e-05, "loss": 0.098, "step": 80620 }, { "epoch": 2.9300821280616325, "grad_norm": 0.6851116418838501, "learning_rate": 2.9559872219300633e-05, "loss": 0.089, "step": 80630 }, { "epoch": 2.9304455265644305, "grad_norm": 0.7841261625289917, "learning_rate": 2.9554769851577847e-05, "loss": 0.0956, "step": 80640 }, { "epoch": 2.930808925067229, "grad_norm": 0.7110610008239746, "learning_rate": 2.9549667287606225e-05, "loss": 3.0796, "step": 80650 }, { "epoch": 2.931172323570027, "grad_norm": 1.172564148902893, "learning_rate": 2.954456452760564e-05, "loss": 0.1005, "step": 80660 }, { "epoch": 2.931535722072825, "grad_norm": 0.44309452176094055, "learning_rate": 2.9539461571795928e-05, "loss": 0.1204, "step": 80670 }, { "epoch": 2.9318991205756233, "grad_norm": 1.936556339263916, "learning_rate": 2.9534358420396978e-05, "loss": 0.1174, "step": 80680 }, { "epoch": 2.9322625190784213, "grad_norm": 0.7564278841018677, "learning_rate": 2.9529255073628653e-05, "loss": 0.1195, "step": 80690 }, { "epoch": 2.9326259175812197, "grad_norm": 0.443446546792984, "learning_rate": 2.952415153171084e-05, "loss": 0.081, "step": 80700 }, { "epoch": 2.9329893160840177, "grad_norm": 1.2691240310668945, "learning_rate": 2.9519047794863434e-05, "loss": 0.0897, "step": 80710 }, { "epoch": 2.9333527145868157, "grad_norm": 0.9182204008102417, "learning_rate": 2.9513943863306337e-05, "loss": 0.1097, "step": 80720 }, { "epoch": 2.933716113089614, "grad_norm": 0.8882256150245667, "learning_rate": 2.950883973725947e-05, "loss": 0.0928, "step": 80730 }, { "epoch": 2.934079511592412, "grad_norm": 0.7944663166999817, "learning_rate": 2.9503735416942735e-05, "loss": 0.0942, "step": 80740 }, { "epoch": 2.9344429100952105, "grad_norm": 0.4034799337387085, "learning_rate": 2.9498630902576057e-05, "loss": 0.0881, "step": 80750 }, { "epoch": 2.9348063085980085, "grad_norm": 0.37826791405677795, "learning_rate": 2.949352619437939e-05, "loss": 0.0925, "step": 80760 }, { "epoch": 2.9351697071008065, "grad_norm": 0.5798398852348328, "learning_rate": 2.948842129257266e-05, "loss": 0.2732, "step": 80770 }, { "epoch": 2.935533105603605, "grad_norm": 1.879731297492981, "learning_rate": 2.948331619737583e-05, "loss": 0.1367, "step": 80780 }, { "epoch": 2.9358965041064033, "grad_norm": 0.9150714874267578, "learning_rate": 2.9478210909008856e-05, "loss": 0.1096, "step": 80790 }, { "epoch": 2.9362599026092013, "grad_norm": 1.1447314023971558, "learning_rate": 2.947310542769171e-05, "loss": 0.0902, "step": 80800 }, { "epoch": 2.9366233011119993, "grad_norm": 4.874231338500977, "learning_rate": 2.9467999753644367e-05, "loss": 0.1206, "step": 80810 }, { "epoch": 2.9369866996147977, "grad_norm": 0.6774386763572693, "learning_rate": 2.9462893887086807e-05, "loss": 1.3499, "step": 80820 }, { "epoch": 2.9373500981175957, "grad_norm": 1.529350996017456, "learning_rate": 2.945778782823903e-05, "loss": 0.0881, "step": 80830 }, { "epoch": 2.937713496620394, "grad_norm": 1.6456125974655151, "learning_rate": 2.945268157732104e-05, "loss": 0.1505, "step": 80840 }, { "epoch": 2.938076895123192, "grad_norm": 0.8206561207771301, "learning_rate": 2.944757513455284e-05, "loss": 0.0873, "step": 80850 }, { "epoch": 2.93844029362599, "grad_norm": 1.830176830291748, "learning_rate": 2.9442468500154453e-05, "loss": 0.1002, "step": 80860 }, { "epoch": 2.9388036921287886, "grad_norm": 1.3282181024551392, "learning_rate": 2.9437361674345905e-05, "loss": 0.113, "step": 80870 }, { "epoch": 2.9391670906315865, "grad_norm": 1.0826009511947632, "learning_rate": 2.943225465734723e-05, "loss": 0.0938, "step": 80880 }, { "epoch": 2.939530489134385, "grad_norm": 0.4472619891166687, "learning_rate": 2.9427147449378467e-05, "loss": 0.0999, "step": 80890 }, { "epoch": 2.939893887637183, "grad_norm": 0.5009836554527283, "learning_rate": 2.9422040050659682e-05, "loss": 0.0867, "step": 80900 }, { "epoch": 2.940257286139981, "grad_norm": 1.1963528394699097, "learning_rate": 2.941693246141092e-05, "loss": 0.1074, "step": 80910 }, { "epoch": 2.9406206846427794, "grad_norm": 0.3960399627685547, "learning_rate": 2.941182468185225e-05, "loss": 0.1298, "step": 80920 }, { "epoch": 2.9409840831455774, "grad_norm": 0.7249006032943726, "learning_rate": 2.940671671220376e-05, "loss": 0.093, "step": 80930 }, { "epoch": 2.941347481648376, "grad_norm": 0.5996330976486206, "learning_rate": 2.9401608552685517e-05, "loss": 0.1213, "step": 80940 }, { "epoch": 2.9417108801511738, "grad_norm": 1.9566066265106201, "learning_rate": 2.9396500203517628e-05, "loss": 0.0795, "step": 80950 }, { "epoch": 2.9420742786539718, "grad_norm": 1.3713960647583008, "learning_rate": 2.9391391664920186e-05, "loss": 0.0879, "step": 80960 }, { "epoch": 2.94243767715677, "grad_norm": 0.639777660369873, "learning_rate": 2.9386282937113306e-05, "loss": 0.121, "step": 80970 }, { "epoch": 2.942801075659568, "grad_norm": 0.6077538728713989, "learning_rate": 2.9381174020317093e-05, "loss": 0.1197, "step": 80980 }, { "epoch": 2.9431644741623666, "grad_norm": 1.1664655208587646, "learning_rate": 2.937606491475169e-05, "loss": 0.1132, "step": 80990 }, { "epoch": 2.9435278726651646, "grad_norm": 0.44675686955451965, "learning_rate": 2.9370955620637213e-05, "loss": 0.0951, "step": 81000 }, { "epoch": 2.9435278726651646, "eval_loss": 0.3302467167377472, "eval_runtime": 180.9443, "eval_samples_per_second": 40.974, "eval_steps_per_second": 5.123, "eval_wer": 0.14665892134260353, "step": 81000 }, { "epoch": 2.9438912711679626, "grad_norm": 0.5193939208984375, "learning_rate": 2.936584613819381e-05, "loss": 0.0736, "step": 81010 }, { "epoch": 2.944254669670761, "grad_norm": 4.043830394744873, "learning_rate": 2.9360736467641632e-05, "loss": 0.146, "step": 81020 }, { "epoch": 2.944618068173559, "grad_norm": 0.8960684537887573, "learning_rate": 2.9355626609200832e-05, "loss": 0.0933, "step": 81030 }, { "epoch": 2.9449814666763574, "grad_norm": 0.4079131782054901, "learning_rate": 2.9350516563091586e-05, "loss": 0.1555, "step": 81040 }, { "epoch": 2.9453448651791554, "grad_norm": 0.5445089936256409, "learning_rate": 2.934540632953406e-05, "loss": 0.1163, "step": 81050 }, { "epoch": 2.9457082636819534, "grad_norm": 4.034743309020996, "learning_rate": 2.934029590874843e-05, "loss": 0.0817, "step": 81060 }, { "epoch": 2.946071662184752, "grad_norm": 1.0341869592666626, "learning_rate": 2.933518530095489e-05, "loss": 0.1143, "step": 81070 }, { "epoch": 2.9464350606875502, "grad_norm": 1.2305265665054321, "learning_rate": 2.9330074506373644e-05, "loss": 0.0945, "step": 81080 }, { "epoch": 2.946798459190348, "grad_norm": 1.0462018251419067, "learning_rate": 2.9324963525224897e-05, "loss": 0.1181, "step": 81090 }, { "epoch": 2.947161857693146, "grad_norm": 1.0071947574615479, "learning_rate": 2.9319852357728857e-05, "loss": 0.0935, "step": 81100 }, { "epoch": 2.9475252561959446, "grad_norm": 0.3526189625263214, "learning_rate": 2.931474100410575e-05, "loss": 0.0801, "step": 81110 }, { "epoch": 2.9478886546987426, "grad_norm": 0.22515632212162018, "learning_rate": 2.9309629464575804e-05, "loss": 0.0955, "step": 81120 }, { "epoch": 2.948252053201541, "grad_norm": 1.0929369926452637, "learning_rate": 2.930451773935926e-05, "loss": 0.1307, "step": 81130 }, { "epoch": 2.948615451704339, "grad_norm": 1.116426944732666, "learning_rate": 2.9299405828676364e-05, "loss": 0.1067, "step": 81140 }, { "epoch": 2.948978850207137, "grad_norm": 1.081275224685669, "learning_rate": 2.9294293732747358e-05, "loss": 0.09, "step": 81150 }, { "epoch": 2.9493422487099354, "grad_norm": 0.4264758825302124, "learning_rate": 2.9289181451792526e-05, "loss": 0.0715, "step": 81160 }, { "epoch": 2.9497056472127334, "grad_norm": 0.48512741923332214, "learning_rate": 2.928406898603212e-05, "loss": 0.0862, "step": 81170 }, { "epoch": 2.950069045715532, "grad_norm": 1.373490571975708, "learning_rate": 2.9278956335686426e-05, "loss": 0.0913, "step": 81180 }, { "epoch": 2.95043244421833, "grad_norm": 0.8015252351760864, "learning_rate": 2.9273843500975728e-05, "loss": 0.1061, "step": 81190 }, { "epoch": 2.950795842721128, "grad_norm": 1.1844451427459717, "learning_rate": 2.926873048212032e-05, "loss": 0.0869, "step": 81200 }, { "epoch": 2.9511592412239263, "grad_norm": 0.6692541241645813, "learning_rate": 2.9263617279340504e-05, "loss": 0.0701, "step": 81210 }, { "epoch": 2.9515226397267242, "grad_norm": 0.5705549120903015, "learning_rate": 2.9258503892856585e-05, "loss": 0.0923, "step": 81220 }, { "epoch": 2.9518860382295227, "grad_norm": 3.464637041091919, "learning_rate": 2.925339032288889e-05, "loss": 0.0887, "step": 81230 }, { "epoch": 2.9522494367323207, "grad_norm": 1.3816555738449097, "learning_rate": 2.9248276569657745e-05, "loss": 0.1061, "step": 81240 }, { "epoch": 2.9526128352351186, "grad_norm": 0.42636001110076904, "learning_rate": 2.9243162633383463e-05, "loss": 0.0889, "step": 81250 }, { "epoch": 2.952976233737917, "grad_norm": 1.3730337619781494, "learning_rate": 2.9238048514286413e-05, "loss": 0.0846, "step": 81260 }, { "epoch": 2.953339632240715, "grad_norm": 0.708302915096283, "learning_rate": 2.9232934212586925e-05, "loss": 0.1031, "step": 81270 }, { "epoch": 2.9537030307435135, "grad_norm": 0.8678138852119446, "learning_rate": 2.9227819728505367e-05, "loss": 0.1032, "step": 81280 }, { "epoch": 2.9540664292463115, "grad_norm": 1.3964858055114746, "learning_rate": 2.9222705062262096e-05, "loss": 0.0804, "step": 81290 }, { "epoch": 2.9544298277491094, "grad_norm": 0.6818703413009644, "learning_rate": 2.921759021407749e-05, "loss": 0.0871, "step": 81300 }, { "epoch": 2.954793226251908, "grad_norm": 1.078911542892456, "learning_rate": 2.9212475184171932e-05, "loss": 0.084, "step": 81310 }, { "epoch": 2.955156624754706, "grad_norm": 9.535181999206543, "learning_rate": 2.9207359972765795e-05, "loss": 0.0994, "step": 81320 }, { "epoch": 2.9555200232575043, "grad_norm": 0.8145607113838196, "learning_rate": 2.9202244580079496e-05, "loss": 0.0932, "step": 81330 }, { "epoch": 2.9558834217603023, "grad_norm": 0.7624212503433228, "learning_rate": 2.919712900633343e-05, "loss": 0.0864, "step": 81340 }, { "epoch": 2.9562468202631003, "grad_norm": 0.8702255487442017, "learning_rate": 2.9192013251748007e-05, "loss": 0.0634, "step": 81350 }, { "epoch": 2.9566102187658987, "grad_norm": 0.5847821235656738, "learning_rate": 2.9186897316543644e-05, "loss": 0.0777, "step": 81360 }, { "epoch": 2.956973617268697, "grad_norm": 0.6588742136955261, "learning_rate": 2.9181781200940776e-05, "loss": 0.1158, "step": 81370 }, { "epoch": 2.957337015771495, "grad_norm": 0.8579931855201721, "learning_rate": 2.917666490515984e-05, "loss": 0.0941, "step": 81380 }, { "epoch": 2.957700414274293, "grad_norm": 0.9896190166473389, "learning_rate": 2.9171548429421264e-05, "loss": 0.1239, "step": 81390 }, { "epoch": 2.9580638127770915, "grad_norm": 3.1119399070739746, "learning_rate": 2.9166431773945514e-05, "loss": 0.0973, "step": 81400 }, { "epoch": 2.9584272112798895, "grad_norm": 0.6430637240409851, "learning_rate": 2.9161314938953037e-05, "loss": 0.0964, "step": 81410 }, { "epoch": 2.958790609782688, "grad_norm": 0.16759249567985535, "learning_rate": 2.9156197924664312e-05, "loss": 0.0869, "step": 81420 }, { "epoch": 2.959154008285486, "grad_norm": 0.5628185272216797, "learning_rate": 2.915108073129981e-05, "loss": 0.0885, "step": 81430 }, { "epoch": 2.959517406788284, "grad_norm": 1.6805976629257202, "learning_rate": 2.914596335908e-05, "loss": 0.1402, "step": 81440 }, { "epoch": 2.9598808052910823, "grad_norm": 0.996425986289978, "learning_rate": 2.9140845808225388e-05, "loss": 0.0823, "step": 81450 }, { "epoch": 2.9602442037938803, "grad_norm": 0.6585590243339539, "learning_rate": 2.9135728078956453e-05, "loss": 0.0735, "step": 81460 }, { "epoch": 2.9606076022966787, "grad_norm": 12.562392234802246, "learning_rate": 2.9130610171493722e-05, "loss": 0.1128, "step": 81470 }, { "epoch": 2.9609710007994767, "grad_norm": 0.5774283409118652, "learning_rate": 2.9125492086057682e-05, "loss": 0.0844, "step": 81480 }, { "epoch": 2.9613343993022747, "grad_norm": 0.5809444785118103, "learning_rate": 2.912037382286888e-05, "loss": 0.1085, "step": 81490 }, { "epoch": 2.961697797805073, "grad_norm": 0.5010820627212524, "learning_rate": 2.9115255382147827e-05, "loss": 0.0803, "step": 81500 }, { "epoch": 2.962061196307871, "grad_norm": 1.3801108598709106, "learning_rate": 2.9110136764115055e-05, "loss": 0.0661, "step": 81510 }, { "epoch": 2.9624245948106696, "grad_norm": 1.027056336402893, "learning_rate": 2.9105017968991123e-05, "loss": 0.109, "step": 81520 }, { "epoch": 2.9627879933134675, "grad_norm": 0.978444516658783, "learning_rate": 2.909989899699656e-05, "loss": 0.0935, "step": 81530 }, { "epoch": 2.9631513918162655, "grad_norm": 2.2912325859069824, "learning_rate": 2.909477984835195e-05, "loss": 0.0905, "step": 81540 }, { "epoch": 2.963514790319064, "grad_norm": 1.8258506059646606, "learning_rate": 2.9089660523277833e-05, "loss": 0.0858, "step": 81550 }, { "epoch": 2.963878188821862, "grad_norm": 1.961013674736023, "learning_rate": 2.9084541021994804e-05, "loss": 0.0858, "step": 81560 }, { "epoch": 2.9642415873246604, "grad_norm": 0.25523585081100464, "learning_rate": 2.9079421344723435e-05, "loss": 0.0943, "step": 81570 }, { "epoch": 2.9646049858274583, "grad_norm": 0.8947268724441528, "learning_rate": 2.9074301491684313e-05, "loss": 0.1359, "step": 81580 }, { "epoch": 2.9649683843302563, "grad_norm": 0.5048563480377197, "learning_rate": 2.906918146309804e-05, "loss": 0.101, "step": 81590 }, { "epoch": 2.9653317828330548, "grad_norm": 0.5647857785224915, "learning_rate": 2.9064061259185206e-05, "loss": 0.0856, "step": 81600 }, { "epoch": 2.9653317828330548, "eval_loss": 0.3227691650390625, "eval_runtime": 180.1665, "eval_samples_per_second": 41.151, "eval_steps_per_second": 5.145, "eval_wer": 0.14830177718881044, "step": 81600 }, { "epoch": 2.9656951813358527, "grad_norm": 0.39782488346099854, "learning_rate": 2.905894088016644e-05, "loss": 0.0894, "step": 81610 }, { "epoch": 2.966058579838651, "grad_norm": 0.8222696185112, "learning_rate": 2.9053820326262354e-05, "loss": 0.1044, "step": 81620 }, { "epoch": 2.966421978341449, "grad_norm": 1.1362643241882324, "learning_rate": 2.904869959769357e-05, "loss": 0.0877, "step": 81630 }, { "epoch": 2.966785376844247, "grad_norm": 1.2552587985992432, "learning_rate": 2.904357869468073e-05, "loss": 0.1026, "step": 81640 }, { "epoch": 2.9671487753470456, "grad_norm": 0.7745124101638794, "learning_rate": 2.9038457617444466e-05, "loss": 0.0873, "step": 81650 }, { "epoch": 2.967512173849844, "grad_norm": 2.436732769012451, "learning_rate": 2.9033336366205432e-05, "loss": 0.0781, "step": 81660 }, { "epoch": 2.967875572352642, "grad_norm": 0.5839672684669495, "learning_rate": 2.9028214941184284e-05, "loss": 0.1152, "step": 81670 }, { "epoch": 2.96823897085544, "grad_norm": 0.9086770415306091, "learning_rate": 2.9023093342601694e-05, "loss": 0.0919, "step": 81680 }, { "epoch": 2.9686023693582384, "grad_norm": 0.9452347159385681, "learning_rate": 2.901797157067832e-05, "loss": 0.1022, "step": 81690 }, { "epoch": 2.9689657678610364, "grad_norm": 0.7514773607254028, "learning_rate": 2.9012849625634847e-05, "loss": 1.7501, "step": 81700 }, { "epoch": 2.969329166363835, "grad_norm": 1.142943263053894, "learning_rate": 2.9007727507691966e-05, "loss": 0.0906, "step": 81710 }, { "epoch": 2.969692564866633, "grad_norm": 1.344696283340454, "learning_rate": 2.9002605217070363e-05, "loss": 0.106, "step": 81720 }, { "epoch": 2.970055963369431, "grad_norm": 0.7408128976821899, "learning_rate": 2.899748275399074e-05, "loss": 0.1039, "step": 81730 }, { "epoch": 2.970419361872229, "grad_norm": 1.7010142803192139, "learning_rate": 2.8992360118673816e-05, "loss": 0.1071, "step": 81740 }, { "epoch": 2.970782760375027, "grad_norm": 1.1163867712020874, "learning_rate": 2.8987237311340286e-05, "loss": 0.0827, "step": 81750 }, { "epoch": 2.9711461588778256, "grad_norm": 2.118901491165161, "learning_rate": 2.8982114332210903e-05, "loss": 0.1033, "step": 81760 }, { "epoch": 2.9715095573806236, "grad_norm": 0.469307541847229, "learning_rate": 2.897699118150637e-05, "loss": 0.1066, "step": 81770 }, { "epoch": 2.9718729558834216, "grad_norm": 0.7060539722442627, "learning_rate": 2.8971867859447444e-05, "loss": 0.1095, "step": 81780 }, { "epoch": 2.97223635438622, "grad_norm": 1.0039600133895874, "learning_rate": 2.8966744366254856e-05, "loss": 0.1412, "step": 81790 }, { "epoch": 2.972599752889018, "grad_norm": 1.0242499113082886, "learning_rate": 2.8961620702149373e-05, "loss": 0.0833, "step": 81800 }, { "epoch": 2.9729631513918164, "grad_norm": 0.5664736032485962, "learning_rate": 2.8956496867351752e-05, "loss": 0.0793, "step": 81810 }, { "epoch": 2.9733265498946144, "grad_norm": 0.5009458065032959, "learning_rate": 2.8951372862082753e-05, "loss": 0.0925, "step": 81820 }, { "epoch": 2.9736899483974124, "grad_norm": 1.5864497423171997, "learning_rate": 2.894624868656316e-05, "loss": 0.09, "step": 81830 }, { "epoch": 2.974053346900211, "grad_norm": 4.366128444671631, "learning_rate": 2.894112434101375e-05, "loss": 0.2078, "step": 81840 }, { "epoch": 2.974416745403009, "grad_norm": 0.7028205394744873, "learning_rate": 2.8935999825655312e-05, "loss": 0.2251, "step": 81850 }, { "epoch": 2.9747801439058073, "grad_norm": 0.5308458805084229, "learning_rate": 2.8930875140708645e-05, "loss": 0.0903, "step": 81860 }, { "epoch": 2.9751435424086052, "grad_norm": 3.357011318206787, "learning_rate": 2.892575028639456e-05, "loss": 0.1105, "step": 81870 }, { "epoch": 2.975506940911403, "grad_norm": 0.9237788319587708, "learning_rate": 2.8920625262933864e-05, "loss": 0.0774, "step": 81880 }, { "epoch": 2.9758703394142016, "grad_norm": 0.480029821395874, "learning_rate": 2.891550007054737e-05, "loss": 0.1154, "step": 81890 }, { "epoch": 2.9762337379169996, "grad_norm": 0.6338282823562622, "learning_rate": 2.891037470945591e-05, "loss": 4.1728, "step": 81900 }, { "epoch": 2.976597136419798, "grad_norm": 0.628488302230835, "learning_rate": 2.8905249179880318e-05, "loss": 0.078, "step": 81910 }, { "epoch": 2.976960534922596, "grad_norm": 1.6177397966384888, "learning_rate": 2.8900123482041437e-05, "loss": 0.089, "step": 81920 }, { "epoch": 2.977323933425394, "grad_norm": 1.245584487915039, "learning_rate": 2.8894997616160103e-05, "loss": 0.1122, "step": 81930 }, { "epoch": 2.9776873319281925, "grad_norm": 0.6176816821098328, "learning_rate": 2.888987158245719e-05, "loss": 0.0911, "step": 81940 }, { "epoch": 2.978050730430991, "grad_norm": 0.7683790326118469, "learning_rate": 2.888474538115355e-05, "loss": 2.4532, "step": 81950 }, { "epoch": 2.978414128933789, "grad_norm": 0.7443512082099915, "learning_rate": 2.8879619012470045e-05, "loss": 0.109, "step": 81960 }, { "epoch": 2.978777527436587, "grad_norm": 1.6451610326766968, "learning_rate": 2.8874492476627568e-05, "loss": 0.1023, "step": 81970 }, { "epoch": 2.9791409259393853, "grad_norm": 0.39093294739723206, "learning_rate": 2.886936577384699e-05, "loss": 0.0989, "step": 81980 }, { "epoch": 2.9795043244421833, "grad_norm": 0.4320976436138153, "learning_rate": 2.886423890434922e-05, "loss": 0.1103, "step": 81990 }, { "epoch": 2.9798677229449817, "grad_norm": 0.45375579595565796, "learning_rate": 2.8859111868355128e-05, "loss": 0.1922, "step": 82000 }, { "epoch": 2.9802311214477797, "grad_norm": 1.53719162940979, "learning_rate": 2.8853984666085644e-05, "loss": 0.5349, "step": 82010 }, { "epoch": 2.9805945199505777, "grad_norm": 0.5638075470924377, "learning_rate": 2.8848857297761676e-05, "loss": 0.1004, "step": 82020 }, { "epoch": 2.980957918453376, "grad_norm": 0.7393288612365723, "learning_rate": 2.8843729763604138e-05, "loss": 0.1003, "step": 82030 }, { "epoch": 2.981321316956174, "grad_norm": 0.5188916325569153, "learning_rate": 2.8838602063833962e-05, "loss": 0.109, "step": 82040 }, { "epoch": 2.9816847154589725, "grad_norm": 1.9435770511627197, "learning_rate": 2.883347419867208e-05, "loss": 0.0678, "step": 82050 }, { "epoch": 2.9820481139617705, "grad_norm": 0.795820415019989, "learning_rate": 2.882834616833944e-05, "loss": 0.0863, "step": 82060 }, { "epoch": 2.9824115124645685, "grad_norm": 0.5096336007118225, "learning_rate": 2.882321797305697e-05, "loss": 0.0888, "step": 82070 }, { "epoch": 2.982774910967367, "grad_norm": 1.0155811309814453, "learning_rate": 2.881808961304565e-05, "loss": 0.0809, "step": 82080 }, { "epoch": 2.983138309470165, "grad_norm": 0.9700034856796265, "learning_rate": 2.8812961088526436e-05, "loss": 0.1124, "step": 82090 }, { "epoch": 2.9835017079729633, "grad_norm": 1.9299287796020508, "learning_rate": 2.8807832399720292e-05, "loss": 0.0942, "step": 82100 }, { "epoch": 2.9838651064757613, "grad_norm": 0.7534053325653076, "learning_rate": 2.8802703546848204e-05, "loss": 0.1195, "step": 82110 }, { "epoch": 2.9842285049785593, "grad_norm": 1.0273375511169434, "learning_rate": 2.8797574530131138e-05, "loss": 0.1052, "step": 82120 }, { "epoch": 2.9845919034813577, "grad_norm": 0.5971968173980713, "learning_rate": 2.8792445349790108e-05, "loss": 0.0975, "step": 82130 }, { "epoch": 2.9849553019841557, "grad_norm": 1.0438076257705688, "learning_rate": 2.8787316006046096e-05, "loss": 0.1066, "step": 82140 }, { "epoch": 2.985318700486954, "grad_norm": 1.0743767023086548, "learning_rate": 2.8782186499120116e-05, "loss": 0.0779, "step": 82150 }, { "epoch": 2.985682098989752, "grad_norm": 0.9644399285316467, "learning_rate": 2.8777056829233172e-05, "loss": 0.085, "step": 82160 }, { "epoch": 2.98604549749255, "grad_norm": 0.2966119647026062, "learning_rate": 2.8771926996606297e-05, "loss": 0.0879, "step": 82170 }, { "epoch": 2.9864088959953485, "grad_norm": 0.8855867981910706, "learning_rate": 2.87667970014605e-05, "loss": 0.1638, "step": 82180 }, { "epoch": 2.9867722944981465, "grad_norm": 0.5556718111038208, "learning_rate": 2.8761666844016822e-05, "loss": 0.1023, "step": 82190 }, { "epoch": 2.987135693000945, "grad_norm": 0.35973209142684937, "learning_rate": 2.8756536524496313e-05, "loss": 0.0726, "step": 82200 }, { "epoch": 2.987135693000945, "eval_loss": 0.32470783591270447, "eval_runtime": 179.3561, "eval_samples_per_second": 41.337, "eval_steps_per_second": 5.168, "eval_wer": 0.1449252999800316, "step": 82200 }, { "epoch": 2.987499091503743, "grad_norm": 0.5686795711517334, "learning_rate": 2.8751406043119998e-05, "loss": 0.0811, "step": 82210 }, { "epoch": 2.987862490006541, "grad_norm": 0.5881648659706116, "learning_rate": 2.8746275400108956e-05, "loss": 0.1118, "step": 82220 }, { "epoch": 2.9882258885093393, "grad_norm": 0.4261440336704254, "learning_rate": 2.8741144595684227e-05, "loss": 0.0932, "step": 82230 }, { "epoch": 2.9885892870121378, "grad_norm": 0.6575589776039124, "learning_rate": 2.8736013630066894e-05, "loss": 0.1028, "step": 82240 }, { "epoch": 2.9889526855149358, "grad_norm": 0.4109443426132202, "learning_rate": 2.8730882503478024e-05, "loss": 0.0768, "step": 82250 }, { "epoch": 2.9893160840177337, "grad_norm": 0.4477255046367645, "learning_rate": 2.8725751216138706e-05, "loss": 0.0736, "step": 82260 }, { "epoch": 2.989679482520532, "grad_norm": 0.5772513747215271, "learning_rate": 2.8720619768270023e-05, "loss": 0.0978, "step": 82270 }, { "epoch": 2.99004288102333, "grad_norm": 0.8295323252677917, "learning_rate": 2.871548816009307e-05, "loss": 0.0975, "step": 82280 }, { "epoch": 2.9904062795261286, "grad_norm": 1.1587345600128174, "learning_rate": 2.8710356391828953e-05, "loss": 0.2043, "step": 82290 }, { "epoch": 2.9907696780289266, "grad_norm": 0.5980029702186584, "learning_rate": 2.8705224463698778e-05, "loss": 0.0955, "step": 82300 }, { "epoch": 2.9911330765317246, "grad_norm": 0.8250631093978882, "learning_rate": 2.8700092375923666e-05, "loss": 0.0931, "step": 82310 }, { "epoch": 2.991496475034523, "grad_norm": 0.9287375211715698, "learning_rate": 2.8694960128724735e-05, "loss": 0.1066, "step": 82320 }, { "epoch": 2.991859873537321, "grad_norm": 1.5283560752868652, "learning_rate": 2.868982772232312e-05, "loss": 0.0921, "step": 82330 }, { "epoch": 2.9922232720401194, "grad_norm": 1.8111027479171753, "learning_rate": 2.8684695156939955e-05, "loss": 0.1164, "step": 82340 }, { "epoch": 2.9925866705429174, "grad_norm": 0.3591112494468689, "learning_rate": 2.86795624327964e-05, "loss": 0.0832, "step": 82350 }, { "epoch": 2.9929500690457154, "grad_norm": 1.000104308128357, "learning_rate": 2.8674429550113578e-05, "loss": 0.0898, "step": 82360 }, { "epoch": 2.993313467548514, "grad_norm": 3.1828064918518066, "learning_rate": 2.8669296509112666e-05, "loss": 0.144, "step": 82370 }, { "epoch": 2.993676866051312, "grad_norm": 0.5351777076721191, "learning_rate": 2.866416331001482e-05, "loss": 0.0879, "step": 82380 }, { "epoch": 2.99404026455411, "grad_norm": 0.941906213760376, "learning_rate": 2.865902995304121e-05, "loss": 0.1301, "step": 82390 }, { "epoch": 2.994403663056908, "grad_norm": 2.9968576431274414, "learning_rate": 2.8653896438413024e-05, "loss": 0.0856, "step": 82400 }, { "epoch": 2.994767061559706, "grad_norm": 0.454728364944458, "learning_rate": 2.8648762766351438e-05, "loss": 0.0795, "step": 82410 }, { "epoch": 2.9951304600625046, "grad_norm": 0.9574378728866577, "learning_rate": 2.864362893707765e-05, "loss": 0.0924, "step": 82420 }, { "epoch": 2.9954938585653026, "grad_norm": 0.537486732006073, "learning_rate": 2.8638494950812854e-05, "loss": 0.0825, "step": 82430 }, { "epoch": 2.995857257068101, "grad_norm": 0.4603738486766815, "learning_rate": 2.863336080777826e-05, "loss": 0.1493, "step": 82440 }, { "epoch": 2.996220655570899, "grad_norm": 1.2842907905578613, "learning_rate": 2.862822650819507e-05, "loss": 0.0693, "step": 82450 }, { "epoch": 2.996584054073697, "grad_norm": 1.0182005167007446, "learning_rate": 2.862309205228451e-05, "loss": 0.1398, "step": 82460 }, { "epoch": 2.9969474525764954, "grad_norm": 0.6332042217254639, "learning_rate": 2.8617957440267806e-05, "loss": 0.1113, "step": 82470 }, { "epoch": 2.9973108510792934, "grad_norm": 0.7729670405387878, "learning_rate": 2.861282267236619e-05, "loss": 0.0994, "step": 82480 }, { "epoch": 2.997674249582092, "grad_norm": 1.1133567094802856, "learning_rate": 2.8607687748800898e-05, "loss": 0.1356, "step": 82490 }, { "epoch": 2.99803764808489, "grad_norm": 0.6192561388015747, "learning_rate": 2.860255266979318e-05, "loss": 0.0742, "step": 82500 }, { "epoch": 2.998401046587688, "grad_norm": 0.7631018757820129, "learning_rate": 2.8597417435564282e-05, "loss": 0.0794, "step": 82510 }, { "epoch": 2.9987644450904862, "grad_norm": 0.49496951699256897, "learning_rate": 2.859228204633547e-05, "loss": 0.1244, "step": 82520 }, { "epoch": 2.9991278435932847, "grad_norm": 0.5484241247177124, "learning_rate": 2.8587146502328e-05, "loss": 0.0882, "step": 82530 }, { "epoch": 2.9994912420960826, "grad_norm": 0.8066346049308777, "learning_rate": 2.8582010803763153e-05, "loss": 0.1345, "step": 82540 }, { "epoch": 2.9998546405988806, "grad_norm": 0.8499393463134766, "learning_rate": 2.8576874950862205e-05, "loss": 0.0929, "step": 82550 }, { "epoch": 3.000218039101679, "grad_norm": 0.6916410326957703, "learning_rate": 2.8571738943846445e-05, "loss": 0.0913, "step": 82560 }, { "epoch": 3.000581437604477, "grad_norm": 0.7126309871673584, "learning_rate": 2.8566602782937162e-05, "loss": 0.0894, "step": 82570 }, { "epoch": 3.000944836107275, "grad_norm": 0.8689286708831787, "learning_rate": 2.8561466468355648e-05, "loss": 0.2882, "step": 82580 }, { "epoch": 3.0013082346100735, "grad_norm": 0.8167956471443176, "learning_rate": 2.8556330000323223e-05, "loss": 0.0969, "step": 82590 }, { "epoch": 3.0016716331128714, "grad_norm": 1.8583896160125732, "learning_rate": 2.8551193379061186e-05, "loss": 0.0697, "step": 82600 }, { "epoch": 3.00203503161567, "grad_norm": 0.6422175168991089, "learning_rate": 2.8546056604790866e-05, "loss": 0.0794, "step": 82610 }, { "epoch": 3.002398430118468, "grad_norm": 0.8441867232322693, "learning_rate": 2.8540919677733584e-05, "loss": 0.0738, "step": 82620 }, { "epoch": 3.0027618286212663, "grad_norm": 0.6802086234092712, "learning_rate": 2.8535782598110672e-05, "loss": 0.0804, "step": 82630 }, { "epoch": 3.0031252271240643, "grad_norm": 1.3518569469451904, "learning_rate": 2.8530645366143467e-05, "loss": 0.0901, "step": 82640 }, { "epoch": 3.0034886256268623, "grad_norm": 0.32762405276298523, "learning_rate": 2.852550798205331e-05, "loss": 0.0784, "step": 82650 }, { "epoch": 3.0038520241296607, "grad_norm": 0.43205514550209045, "learning_rate": 2.8520370446061567e-05, "loss": 0.0897, "step": 82660 }, { "epoch": 3.0042154226324587, "grad_norm": 0.6062584519386292, "learning_rate": 2.8515232758389582e-05, "loss": 0.0806, "step": 82670 }, { "epoch": 3.004578821135257, "grad_norm": 0.8513742089271545, "learning_rate": 2.8510094919258722e-05, "loss": 0.0741, "step": 82680 }, { "epoch": 3.004942219638055, "grad_norm": 0.43795377016067505, "learning_rate": 2.850495692889037e-05, "loss": 0.0878, "step": 82690 }, { "epoch": 3.005305618140853, "grad_norm": 1.4670331478118896, "learning_rate": 2.8499818787505884e-05, "loss": 0.0879, "step": 82700 }, { "epoch": 3.0056690166436515, "grad_norm": 0.5501256585121155, "learning_rate": 2.8494680495326665e-05, "loss": 0.073, "step": 82710 }, { "epoch": 3.0060324151464495, "grad_norm": 0.9265702962875366, "learning_rate": 2.84895420525741e-05, "loss": 0.0781, "step": 82720 }, { "epoch": 3.006395813649248, "grad_norm": 0.5779339075088501, "learning_rate": 2.8484403459469576e-05, "loss": 0.0829, "step": 82730 }, { "epoch": 3.006759212152046, "grad_norm": 0.31466546654701233, "learning_rate": 2.8479264716234504e-05, "loss": 0.0862, "step": 82740 }, { "epoch": 3.0071226106548443, "grad_norm": 2.829972505569458, "learning_rate": 2.84741258230903e-05, "loss": 0.0945, "step": 82750 }, { "epoch": 3.0074860091576423, "grad_norm": 0.4769342243671417, "learning_rate": 2.8468986780258382e-05, "loss": 0.0896, "step": 82760 }, { "epoch": 3.0078494076604403, "grad_norm": 0.9461230635643005, "learning_rate": 2.8463847587960157e-05, "loss": 0.0749, "step": 82770 }, { "epoch": 3.0082128061632387, "grad_norm": 0.4310390055179596, "learning_rate": 2.8458708246417064e-05, "loss": 0.1645, "step": 82780 }, { "epoch": 3.0085762046660367, "grad_norm": 0.46994635462760925, "learning_rate": 2.845356875585054e-05, "loss": 0.0835, "step": 82790 }, { "epoch": 3.008939603168835, "grad_norm": 1.9111509323120117, "learning_rate": 2.844842911648203e-05, "loss": 0.0781, "step": 82800 }, { "epoch": 3.008939603168835, "eval_loss": 0.33264267444610596, "eval_runtime": 179.8864, "eval_samples_per_second": 41.215, "eval_steps_per_second": 5.153, "eval_wer": 0.14235663586690145, "step": 82800 }, { "epoch": 3.009303001671633, "grad_norm": 0.5043010711669922, "learning_rate": 2.8443289328532973e-05, "loss": 0.0914, "step": 82810 }, { "epoch": 3.009666400174431, "grad_norm": 1.3565329313278198, "learning_rate": 2.8438149392224832e-05, "loss": 0.0642, "step": 82820 }, { "epoch": 3.0100297986772295, "grad_norm": 0.710573136806488, "learning_rate": 2.843300930777907e-05, "loss": 0.0714, "step": 82830 }, { "epoch": 3.0103931971800275, "grad_norm": 0.28599199652671814, "learning_rate": 2.8427869075417145e-05, "loss": 0.0736, "step": 82840 }, { "epoch": 3.010756595682826, "grad_norm": 1.063194751739502, "learning_rate": 2.8422728695360546e-05, "loss": 0.087, "step": 82850 }, { "epoch": 3.011119994185624, "grad_norm": 1.3806395530700684, "learning_rate": 2.841758816783074e-05, "loss": 0.1306, "step": 82860 }, { "epoch": 3.011483392688422, "grad_norm": 0.4422304332256317, "learning_rate": 2.841244749304922e-05, "loss": 0.0665, "step": 82870 }, { "epoch": 3.0118467911912203, "grad_norm": 0.5602436661720276, "learning_rate": 2.840730667123748e-05, "loss": 0.0796, "step": 82880 }, { "epoch": 3.0122101896940183, "grad_norm": 0.39872846007347107, "learning_rate": 2.8402165702617016e-05, "loss": 0.0937, "step": 82890 }, { "epoch": 3.0125735881968168, "grad_norm": 0.5337156057357788, "learning_rate": 2.8397024587409344e-05, "loss": 0.08, "step": 82900 }, { "epoch": 3.0129369866996147, "grad_norm": 0.46881571412086487, "learning_rate": 2.8391883325835962e-05, "loss": 0.0913, "step": 82910 }, { "epoch": 3.013300385202413, "grad_norm": 2.061661720275879, "learning_rate": 2.8386741918118404e-05, "loss": 0.0662, "step": 82920 }, { "epoch": 3.013663783705211, "grad_norm": 1.4211331605911255, "learning_rate": 2.838160036447818e-05, "loss": 0.0677, "step": 82930 }, { "epoch": 3.014027182208009, "grad_norm": 1.1780022382736206, "learning_rate": 2.837645866513683e-05, "loss": 0.1062, "step": 82940 }, { "epoch": 3.0143905807108076, "grad_norm": 1.3207381963729858, "learning_rate": 2.837131682031589e-05, "loss": 0.1001, "step": 82950 }, { "epoch": 3.0147539792136056, "grad_norm": 0.9408227801322937, "learning_rate": 2.8366174830236896e-05, "loss": 0.0934, "step": 82960 }, { "epoch": 3.015117377716404, "grad_norm": 10.08785629272461, "learning_rate": 2.8361032695121416e-05, "loss": 0.0665, "step": 82970 }, { "epoch": 3.015480776219202, "grad_norm": 0.6198272705078125, "learning_rate": 2.835589041519099e-05, "loss": 0.0794, "step": 82980 }, { "epoch": 3.015844174722, "grad_norm": 0.44829145073890686, "learning_rate": 2.8350747990667192e-05, "loss": 0.0853, "step": 82990 }, { "epoch": 3.0162075732247984, "grad_norm": 0.6157195568084717, "learning_rate": 2.834560542177158e-05, "loss": 0.0791, "step": 83000 }, { "epoch": 3.0165709717275964, "grad_norm": 0.40746578574180603, "learning_rate": 2.8340462708725735e-05, "loss": 0.076, "step": 83010 }, { "epoch": 3.016934370230395, "grad_norm": 5.672430038452148, "learning_rate": 2.833531985175124e-05, "loss": 0.0816, "step": 83020 }, { "epoch": 3.0172977687331928, "grad_norm": 1.2975281476974487, "learning_rate": 2.8330176851069672e-05, "loss": 0.0738, "step": 83030 }, { "epoch": 3.017661167235991, "grad_norm": 0.22850178182125092, "learning_rate": 2.8325033706902643e-05, "loss": 0.1293, "step": 83040 }, { "epoch": 3.018024565738789, "grad_norm": 1.8061107397079468, "learning_rate": 2.8319890419471728e-05, "loss": 0.0873, "step": 83050 }, { "epoch": 3.018387964241587, "grad_norm": 0.5460423827171326, "learning_rate": 2.8314746988998564e-05, "loss": 0.0753, "step": 83060 }, { "epoch": 3.0187513627443856, "grad_norm": 0.9149671196937561, "learning_rate": 2.8310117779454733e-05, "loss": 3.5039, "step": 83070 }, { "epoch": 3.0191147612471836, "grad_norm": 1.0777734518051147, "learning_rate": 2.83049740778118e-05, "loss": 0.0834, "step": 83080 }, { "epoch": 3.019478159749982, "grad_norm": 1.6478363275527954, "learning_rate": 2.829983023376929e-05, "loss": 0.1151, "step": 83090 }, { "epoch": 3.01984155825278, "grad_norm": 0.5667656660079956, "learning_rate": 2.8294686247548845e-05, "loss": 0.099, "step": 83100 }, { "epoch": 3.020204956755578, "grad_norm": 0.7810095548629761, "learning_rate": 2.828954211937209e-05, "loss": 0.0826, "step": 83110 }, { "epoch": 3.0205683552583764, "grad_norm": 0.45061448216438293, "learning_rate": 2.8284397849460664e-05, "loss": 0.0758, "step": 83120 }, { "epoch": 3.0209317537611744, "grad_norm": 2.199354887008667, "learning_rate": 2.8279253438036228e-05, "loss": 0.0767, "step": 83130 }, { "epoch": 3.021295152263973, "grad_norm": 0.7062342166900635, "learning_rate": 2.827410888532042e-05, "loss": 0.0847, "step": 83140 }, { "epoch": 3.021658550766771, "grad_norm": 1.1551927328109741, "learning_rate": 2.826896419153492e-05, "loss": 0.0789, "step": 83150 }, { "epoch": 3.022021949269569, "grad_norm": 2.4032013416290283, "learning_rate": 2.826381935690137e-05, "loss": 0.1152, "step": 83160 }, { "epoch": 3.0223853477723672, "grad_norm": 1.0286635160446167, "learning_rate": 2.825867438164147e-05, "loss": 0.0724, "step": 83170 }, { "epoch": 3.022748746275165, "grad_norm": 0.9156087636947632, "learning_rate": 2.8253529265976886e-05, "loss": 0.0808, "step": 83180 }, { "epoch": 3.0231121447779636, "grad_norm": 0.3878247141838074, "learning_rate": 2.8248384010129293e-05, "loss": 0.0736, "step": 83190 }, { "epoch": 3.0234755432807616, "grad_norm": 0.7282688021659851, "learning_rate": 2.8243238614320394e-05, "loss": 0.0954, "step": 83200 }, { "epoch": 3.02383894178356, "grad_norm": 0.40208378434181213, "learning_rate": 2.8238093078771876e-05, "loss": 0.0893, "step": 83210 }, { "epoch": 3.024202340286358, "grad_norm": 4.144686698913574, "learning_rate": 2.823294740370546e-05, "loss": 0.079, "step": 83220 }, { "epoch": 3.024565738789156, "grad_norm": 1.0858464241027832, "learning_rate": 2.8227801589342838e-05, "loss": 0.0808, "step": 83230 }, { "epoch": 3.0249291372919545, "grad_norm": 1.1500715017318726, "learning_rate": 2.822265563590573e-05, "loss": 0.0714, "step": 83240 }, { "epoch": 3.0252925357947524, "grad_norm": 2.532526731491089, "learning_rate": 2.8217509543615862e-05, "loss": 0.0838, "step": 83250 }, { "epoch": 3.025655934297551, "grad_norm": 0.6312755942344666, "learning_rate": 2.8212363312694945e-05, "loss": 0.0853, "step": 83260 }, { "epoch": 3.026019332800349, "grad_norm": 0.5501546859741211, "learning_rate": 2.8207216943364734e-05, "loss": 0.0705, "step": 83270 }, { "epoch": 3.026382731303147, "grad_norm": 6.8282060623168945, "learning_rate": 2.8202070435846946e-05, "loss": 0.076, "step": 83280 }, { "epoch": 3.0267461298059453, "grad_norm": 0.48019352555274963, "learning_rate": 2.8196923790363344e-05, "loss": 0.0919, "step": 83290 }, { "epoch": 3.0271095283087432, "grad_norm": 0.48283651471138, "learning_rate": 2.8191777007135667e-05, "loss": 0.0932, "step": 83300 }, { "epoch": 3.0274729268115417, "grad_norm": 0.7604033350944519, "learning_rate": 2.8186630086385672e-05, "loss": 0.1037, "step": 83310 }, { "epoch": 3.0278363253143397, "grad_norm": 1.1358963251113892, "learning_rate": 2.818148302833513e-05, "loss": 0.0804, "step": 83320 }, { "epoch": 3.028199723817138, "grad_norm": 1.2052534818649292, "learning_rate": 2.81763358332058e-05, "loss": 0.0753, "step": 83330 }, { "epoch": 3.028563122319936, "grad_norm": 0.5695772767066956, "learning_rate": 2.8171188501219464e-05, "loss": 0.0833, "step": 83340 }, { "epoch": 3.028926520822734, "grad_norm": 0.7455033659934998, "learning_rate": 2.81660410325979e-05, "loss": 0.0772, "step": 83350 }, { "epoch": 3.0292899193255325, "grad_norm": 0.7446203231811523, "learning_rate": 2.816089342756289e-05, "loss": 0.1103, "step": 83360 }, { "epoch": 3.0296533178283305, "grad_norm": 2.548922061920166, "learning_rate": 2.8155745686336234e-05, "loss": 0.0707, "step": 83370 }, { "epoch": 3.030016716331129, "grad_norm": 1.2039028406143188, "learning_rate": 2.815059780913971e-05, "loss": 0.0767, "step": 83380 }, { "epoch": 3.030380114833927, "grad_norm": 0.5070446729660034, "learning_rate": 2.8145449796195152e-05, "loss": 0.1082, "step": 83390 }, { "epoch": 3.030743513336725, "grad_norm": 3.70131778717041, "learning_rate": 2.814030164772434e-05, "loss": 0.0848, "step": 83400 }, { "epoch": 3.030743513336725, "eval_loss": 0.3220129907131195, "eval_runtime": 179.9233, "eval_samples_per_second": 41.206, "eval_steps_per_second": 5.152, "eval_wer": 0.14291938206836458, "step": 83400 }, { "epoch": 3.0311069118395233, "grad_norm": 0.40809836983680725, "learning_rate": 2.813515336394911e-05, "loss": 0.0843, "step": 83410 }, { "epoch": 3.0314703103423213, "grad_norm": 0.9957777261734009, "learning_rate": 2.8130004945091283e-05, "loss": 3.893, "step": 83420 }, { "epoch": 3.0318337088451197, "grad_norm": 0.45973557233810425, "learning_rate": 2.8124856391372673e-05, "loss": 0.0776, "step": 83430 }, { "epoch": 3.0321971073479177, "grad_norm": 0.7182989120483398, "learning_rate": 2.811970770301512e-05, "loss": 0.0772, "step": 83440 }, { "epoch": 3.0325605058507157, "grad_norm": 0.5667139291763306, "learning_rate": 2.811455888024046e-05, "loss": 0.0785, "step": 83450 }, { "epoch": 3.032923904353514, "grad_norm": 0.8791239857673645, "learning_rate": 2.810940992327054e-05, "loss": 0.0729, "step": 83460 }, { "epoch": 3.033287302856312, "grad_norm": 1.0834791660308838, "learning_rate": 2.810426083232721e-05, "loss": 0.0763, "step": 83470 }, { "epoch": 3.0336507013591105, "grad_norm": 1.9368480443954468, "learning_rate": 2.809911160763233e-05, "loss": 0.0822, "step": 83480 }, { "epoch": 3.0340140998619085, "grad_norm": 1.531395673751831, "learning_rate": 2.8093962249407755e-05, "loss": 0.1231, "step": 83490 }, { "epoch": 3.034377498364707, "grad_norm": 1.5916883945465088, "learning_rate": 2.8088812757875347e-05, "loss": 0.236, "step": 83500 }, { "epoch": 3.034740896867505, "grad_norm": 0.34502482414245605, "learning_rate": 2.8083663133256998e-05, "loss": 0.0986, "step": 83510 }, { "epoch": 3.035104295370303, "grad_norm": 1.0849754810333252, "learning_rate": 2.8078513375774574e-05, "loss": 0.0704, "step": 83520 }, { "epoch": 3.0354676938731013, "grad_norm": 0.6940005421638489, "learning_rate": 2.8073363485649962e-05, "loss": 0.078, "step": 83530 }, { "epoch": 3.0358310923758993, "grad_norm": 0.4291388988494873, "learning_rate": 2.8068213463105054e-05, "loss": 0.0762, "step": 83540 }, { "epoch": 3.0361944908786977, "grad_norm": 1.4870105981826782, "learning_rate": 2.8063063308361736e-05, "loss": 0.0967, "step": 83550 }, { "epoch": 3.0365578893814957, "grad_norm": 0.8705921769142151, "learning_rate": 2.805791302164193e-05, "loss": 0.069, "step": 83560 }, { "epoch": 3.0369212878842937, "grad_norm": 1.3815371990203857, "learning_rate": 2.805276260316752e-05, "loss": 0.0682, "step": 83570 }, { "epoch": 3.037284686387092, "grad_norm": 0.4686858654022217, "learning_rate": 2.8047612053160443e-05, "loss": 0.0705, "step": 83580 }, { "epoch": 3.03764808488989, "grad_norm": 1.2113518714904785, "learning_rate": 2.80424613718426e-05, "loss": 0.3061, "step": 83590 }, { "epoch": 3.0380114833926886, "grad_norm": 1.3406014442443848, "learning_rate": 2.803731055943592e-05, "loss": 0.0936, "step": 83600 }, { "epoch": 3.0383748818954865, "grad_norm": 2.528625726699829, "learning_rate": 2.803215961616234e-05, "loss": 0.0922, "step": 83610 }, { "epoch": 3.038738280398285, "grad_norm": 0.6336283683776855, "learning_rate": 2.8027008542243784e-05, "loss": 0.086, "step": 83620 }, { "epoch": 3.039101678901083, "grad_norm": 1.0377708673477173, "learning_rate": 2.8021857337902208e-05, "loss": 0.0734, "step": 83630 }, { "epoch": 3.039465077403881, "grad_norm": 0.5187166929244995, "learning_rate": 2.8016706003359545e-05, "loss": 0.0859, "step": 83640 }, { "epoch": 3.0398284759066794, "grad_norm": 0.650485634803772, "learning_rate": 2.801155453883775e-05, "loss": 0.091, "step": 83650 }, { "epoch": 3.0401918744094774, "grad_norm": 1.6549148559570312, "learning_rate": 2.800640294455879e-05, "loss": 0.0801, "step": 83660 }, { "epoch": 3.040555272912276, "grad_norm": 0.75333172082901, "learning_rate": 2.8001251220744628e-05, "loss": 0.0705, "step": 83670 }, { "epoch": 3.0409186714150738, "grad_norm": 1.4784330129623413, "learning_rate": 2.799609936761723e-05, "loss": 0.0881, "step": 83680 }, { "epoch": 3.0412820699178718, "grad_norm": 0.5964809656143188, "learning_rate": 2.7990947385398563e-05, "loss": 0.0899, "step": 83690 }, { "epoch": 3.04164546842067, "grad_norm": 0.6587772369384766, "learning_rate": 2.7985795274310622e-05, "loss": 0.081, "step": 83700 }, { "epoch": 3.042008866923468, "grad_norm": 0.6639897227287292, "learning_rate": 2.7980643034575375e-05, "loss": 0.1012, "step": 83710 }, { "epoch": 3.0423722654262666, "grad_norm": 0.4024466872215271, "learning_rate": 2.797549066641484e-05, "loss": 0.0641, "step": 83720 }, { "epoch": 3.0427356639290646, "grad_norm": 0.6157781481742859, "learning_rate": 2.797033817005099e-05, "loss": 0.074, "step": 83730 }, { "epoch": 3.0430990624318626, "grad_norm": 0.43719834089279175, "learning_rate": 2.7965185545705835e-05, "loss": 0.0881, "step": 83740 }, { "epoch": 3.043462460934661, "grad_norm": 1.6078770160675049, "learning_rate": 2.796003279360139e-05, "loss": 0.0706, "step": 83750 }, { "epoch": 3.043825859437459, "grad_norm": 0.7965851426124573, "learning_rate": 2.7954879913959658e-05, "loss": 0.0649, "step": 83760 }, { "epoch": 3.0441892579402574, "grad_norm": 0.8984066843986511, "learning_rate": 2.7949726907002672e-05, "loss": 0.0769, "step": 83770 }, { "epoch": 3.0445526564430554, "grad_norm": 2.9094557762145996, "learning_rate": 2.794457377295244e-05, "loss": 0.0899, "step": 83780 }, { "epoch": 3.044916054945854, "grad_norm": 0.49150341749191284, "learning_rate": 2.793942051203101e-05, "loss": 1.4083, "step": 83790 }, { "epoch": 3.045279453448652, "grad_norm": 2.3720130920410156, "learning_rate": 2.7934267124460407e-05, "loss": 0.0894, "step": 83800 }, { "epoch": 3.04564285195145, "grad_norm": 0.31627193093299866, "learning_rate": 2.7929113610462665e-05, "loss": 0.0815, "step": 83810 }, { "epoch": 3.046006250454248, "grad_norm": 0.43195366859436035, "learning_rate": 2.792395997025985e-05, "loss": 0.0851, "step": 83820 }, { "epoch": 3.046369648957046, "grad_norm": 0.6193608641624451, "learning_rate": 2.7918806204073995e-05, "loss": 0.0718, "step": 83830 }, { "epoch": 3.0467330474598446, "grad_norm": 0.6569747924804688, "learning_rate": 2.791365231212717e-05, "loss": 0.097, "step": 83840 }, { "epoch": 3.0470964459626426, "grad_norm": 2.10140323638916, "learning_rate": 2.7908498294641437e-05, "loss": 0.0959, "step": 83850 }, { "epoch": 3.0474598444654406, "grad_norm": 0.7795642018318176, "learning_rate": 2.7903344151838856e-05, "loss": 0.0731, "step": 83860 }, { "epoch": 3.047823242968239, "grad_norm": 0.37401872873306274, "learning_rate": 2.7898189883941507e-05, "loss": 0.6129, "step": 83870 }, { "epoch": 3.048186641471037, "grad_norm": 0.9135899543762207, "learning_rate": 2.7893035491171466e-05, "loss": 0.0889, "step": 83880 }, { "epoch": 3.0485500399738354, "grad_norm": 0.572894275188446, "learning_rate": 2.7887880973750817e-05, "loss": 0.0953, "step": 83890 }, { "epoch": 3.0489134384766334, "grad_norm": 0.4888230264186859, "learning_rate": 2.7882726331901655e-05, "loss": 0.0821, "step": 83900 }, { "epoch": 3.049276836979432, "grad_norm": 0.5517320036888123, "learning_rate": 2.787757156584608e-05, "loss": 0.0905, "step": 83910 }, { "epoch": 3.04964023548223, "grad_norm": 0.5624126195907593, "learning_rate": 2.7872416675806174e-05, "loss": 0.0658, "step": 83920 }, { "epoch": 3.050003633985028, "grad_norm": 0.606499969959259, "learning_rate": 2.7867261662004058e-05, "loss": 0.0772, "step": 83930 }, { "epoch": 3.0503670324878263, "grad_norm": 0.30192533135414124, "learning_rate": 2.7862106524661835e-05, "loss": 0.0782, "step": 83940 }, { "epoch": 3.0507304309906242, "grad_norm": 0.5810015797615051, "learning_rate": 2.7856951264001623e-05, "loss": 0.0724, "step": 83950 }, { "epoch": 3.0510938294934227, "grad_norm": 0.8986351490020752, "learning_rate": 2.7851795880245556e-05, "loss": 0.082, "step": 83960 }, { "epoch": 3.0514572279962207, "grad_norm": 0.655348539352417, "learning_rate": 2.7846640373615745e-05, "loss": 0.0694, "step": 83970 }, { "epoch": 3.0518206264990186, "grad_norm": 0.9153086543083191, "learning_rate": 2.784148474433433e-05, "loss": 0.0749, "step": 83980 }, { "epoch": 3.052184025001817, "grad_norm": 0.4834425151348114, "learning_rate": 2.783632899262345e-05, "loss": 0.072, "step": 83990 }, { "epoch": 3.052547423504615, "grad_norm": 0.8401370644569397, "learning_rate": 2.783117311870524e-05, "loss": 0.0781, "step": 84000 }, { "epoch": 3.052547423504615, "eval_loss": 0.3286387026309967, "eval_runtime": 180.0952, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.14074100967560405, "step": 84000 }, { "epoch": 3.0529108220074135, "grad_norm": 0.7353171706199646, "learning_rate": 2.7826017122801855e-05, "loss": 0.0685, "step": 84010 }, { "epoch": 3.0532742205102115, "grad_norm": 0.4934822916984558, "learning_rate": 2.7820861005135445e-05, "loss": 0.0677, "step": 84020 }, { "epoch": 3.0536376190130095, "grad_norm": 0.6039226651191711, "learning_rate": 2.781570476592818e-05, "loss": 0.071, "step": 84030 }, { "epoch": 3.054001017515808, "grad_norm": 0.7619723677635193, "learning_rate": 2.7810548405402215e-05, "loss": 0.0811, "step": 84040 }, { "epoch": 3.054364416018606, "grad_norm": 0.9137590527534485, "learning_rate": 2.7805391923779704e-05, "loss": 0.104, "step": 84050 }, { "epoch": 3.0547278145214043, "grad_norm": 0.40205273032188416, "learning_rate": 2.7800235321282854e-05, "loss": 0.0756, "step": 84060 }, { "epoch": 3.0550912130242023, "grad_norm": 0.5475990772247314, "learning_rate": 2.779507859813381e-05, "loss": 0.0918, "step": 84070 }, { "epoch": 3.0554546115270007, "grad_norm": 0.5701851844787598, "learning_rate": 2.778992175455479e-05, "loss": 0.0744, "step": 84080 }, { "epoch": 3.0558180100297987, "grad_norm": 0.2219076156616211, "learning_rate": 2.7784764790767957e-05, "loss": 0.2494, "step": 84090 }, { "epoch": 3.0561814085325967, "grad_norm": 4.648733615875244, "learning_rate": 2.7779607706995524e-05, "loss": 0.1106, "step": 84100 }, { "epoch": 3.056544807035395, "grad_norm": 0.6483043432235718, "learning_rate": 2.7774450503459687e-05, "loss": 0.0938, "step": 84110 }, { "epoch": 3.056908205538193, "grad_norm": 0.7879406213760376, "learning_rate": 2.7769293180382645e-05, "loss": 0.0727, "step": 84120 }, { "epoch": 3.0572716040409915, "grad_norm": 0.540431022644043, "learning_rate": 2.7764135737986613e-05, "loss": 0.0692, "step": 84130 }, { "epoch": 3.0576350025437895, "grad_norm": 0.498374879360199, "learning_rate": 2.7758978176493805e-05, "loss": 0.0985, "step": 84140 }, { "epoch": 3.0579984010465875, "grad_norm": 2.0039825439453125, "learning_rate": 2.775382049612645e-05, "loss": 0.1214, "step": 84150 }, { "epoch": 3.058361799549386, "grad_norm": 0.6101000905036926, "learning_rate": 2.7748662697106757e-05, "loss": 0.0777, "step": 84160 }, { "epoch": 3.058725198052184, "grad_norm": 1.092410683631897, "learning_rate": 2.7743504779656976e-05, "loss": 0.0606, "step": 84170 }, { "epoch": 3.0590885965549823, "grad_norm": 0.5803566575050354, "learning_rate": 2.773834674399934e-05, "loss": 0.0756, "step": 84180 }, { "epoch": 3.0594519950577803, "grad_norm": 0.4680107831954956, "learning_rate": 2.7733188590356073e-05, "loss": 0.0878, "step": 84190 }, { "epoch": 3.0598153935605787, "grad_norm": 4.1969380378723145, "learning_rate": 2.7728030318949448e-05, "loss": 0.0911, "step": 84200 }, { "epoch": 3.0601787920633767, "grad_norm": 0.45293834805488586, "learning_rate": 2.7722871930001694e-05, "loss": 0.1119, "step": 84210 }, { "epoch": 3.0605421905661747, "grad_norm": 1.130631685256958, "learning_rate": 2.7718229279634755e-05, "loss": 2.6943, "step": 84220 }, { "epoch": 3.060905589068973, "grad_norm": 0.3156570494174957, "learning_rate": 2.7713070667971197e-05, "loss": 0.0641, "step": 84230 }, { "epoch": 3.061268987571771, "grad_norm": 0.860851526260376, "learning_rate": 2.7707911939411078e-05, "loss": 0.093, "step": 84240 }, { "epoch": 3.0616323860745696, "grad_norm": 1.0920404195785522, "learning_rate": 2.7702753094176664e-05, "loss": 0.1426, "step": 84250 }, { "epoch": 3.0619957845773675, "grad_norm": 0.4944891333580017, "learning_rate": 2.769759413249024e-05, "loss": 0.1016, "step": 84260 }, { "epoch": 3.0623591830801655, "grad_norm": 0.503200113773346, "learning_rate": 2.7692435054574084e-05, "loss": 0.0753, "step": 84270 }, { "epoch": 3.062722581582964, "grad_norm": 0.9437207579612732, "learning_rate": 2.7687275860650476e-05, "loss": 0.0665, "step": 84280 }, { "epoch": 3.063085980085762, "grad_norm": 0.91017085313797, "learning_rate": 2.7682116550941716e-05, "loss": 0.1268, "step": 84290 }, { "epoch": 3.0634493785885604, "grad_norm": 1.9001520872116089, "learning_rate": 2.7676957125670096e-05, "loss": 0.0818, "step": 84300 }, { "epoch": 3.0638127770913584, "grad_norm": 0.6754614114761353, "learning_rate": 2.767179758505792e-05, "loss": 0.0902, "step": 84310 }, { "epoch": 3.0641761755941563, "grad_norm": 1.6156865358352661, "learning_rate": 2.766663792932749e-05, "loss": 0.0744, "step": 84320 }, { "epoch": 3.0645395740969548, "grad_norm": 0.5703209042549133, "learning_rate": 2.7661478158701122e-05, "loss": 0.082, "step": 84330 }, { "epoch": 3.0649029725997528, "grad_norm": 0.4021599590778351, "learning_rate": 2.7656318273401128e-05, "loss": 0.0933, "step": 84340 }, { "epoch": 3.065266371102551, "grad_norm": 0.5536443591117859, "learning_rate": 2.7651158273649825e-05, "loss": 0.0867, "step": 84350 }, { "epoch": 3.065629769605349, "grad_norm": 0.4906123876571655, "learning_rate": 2.7645998159669557e-05, "loss": 0.0784, "step": 84360 }, { "epoch": 3.0659931681081476, "grad_norm": 1.6186575889587402, "learning_rate": 2.7640837931682627e-05, "loss": 0.1294, "step": 84370 }, { "epoch": 3.0663565666109456, "grad_norm": 1.145196557044983, "learning_rate": 2.76356775899114e-05, "loss": 0.0663, "step": 84380 }, { "epoch": 3.0667199651137436, "grad_norm": 0.4692659378051758, "learning_rate": 2.76305171345782e-05, "loss": 0.091, "step": 84390 }, { "epoch": 3.067083363616542, "grad_norm": 3.6405162811279297, "learning_rate": 2.7625356565905376e-05, "loss": 0.0835, "step": 84400 }, { "epoch": 3.06744676211934, "grad_norm": 1.138875126838684, "learning_rate": 2.762019588411528e-05, "loss": 0.1015, "step": 84410 }, { "epoch": 3.0678101606221384, "grad_norm": 0.8834431767463684, "learning_rate": 2.7615035089430262e-05, "loss": 0.0729, "step": 84420 }, { "epoch": 3.0681735591249364, "grad_norm": 0.36101579666137695, "learning_rate": 2.7609874182072693e-05, "loss": 0.0694, "step": 84430 }, { "epoch": 3.0685369576277344, "grad_norm": 0.5472438931465149, "learning_rate": 2.760471316226492e-05, "loss": 0.0904, "step": 84440 }, { "epoch": 3.068900356130533, "grad_norm": 0.7334174513816833, "learning_rate": 2.7599552030229337e-05, "loss": 0.1055, "step": 84450 }, { "epoch": 3.069263754633331, "grad_norm": 3.81465220451355, "learning_rate": 2.75943907861883e-05, "loss": 0.0912, "step": 84460 }, { "epoch": 3.069627153136129, "grad_norm": 0.5324450135231018, "learning_rate": 2.7589229430364196e-05, "loss": 0.3292, "step": 84470 }, { "epoch": 3.069990551638927, "grad_norm": 0.7580345273017883, "learning_rate": 2.758406796297941e-05, "loss": 0.1129, "step": 84480 }, { "epoch": 3.0703539501417256, "grad_norm": 0.8137314915657043, "learning_rate": 2.757890638425632e-05, "loss": 0.0886, "step": 84490 }, { "epoch": 3.0707173486445236, "grad_norm": 1.109212875366211, "learning_rate": 2.7573744694417343e-05, "loss": 0.0996, "step": 84500 }, { "epoch": 3.0710807471473216, "grad_norm": 0.991165816783905, "learning_rate": 2.7568582893684852e-05, "loss": 0.1194, "step": 84510 }, { "epoch": 3.07144414565012, "grad_norm": 0.8121623396873474, "learning_rate": 2.7563420982281275e-05, "loss": 0.0811, "step": 84520 }, { "epoch": 3.071807544152918, "grad_norm": 0.5951569080352783, "learning_rate": 2.7558258960429007e-05, "loss": 0.0876, "step": 84530 }, { "epoch": 3.0721709426557164, "grad_norm": 0.7901829481124878, "learning_rate": 2.7553096828350454e-05, "loss": 0.0911, "step": 84540 }, { "epoch": 3.0725343411585144, "grad_norm": 0.6474528312683105, "learning_rate": 2.7547934586268048e-05, "loss": 0.1048, "step": 84550 }, { "epoch": 3.0728977396613124, "grad_norm": 0.7994125485420227, "learning_rate": 2.75427722344042e-05, "loss": 0.0767, "step": 84560 }, { "epoch": 3.073261138164111, "grad_norm": 0.5310266017913818, "learning_rate": 2.7537609772981353e-05, "loss": 0.0821, "step": 84570 }, { "epoch": 3.073624536666909, "grad_norm": 0.7052491307258606, "learning_rate": 2.753244720222193e-05, "loss": 0.0586, "step": 84580 }, { "epoch": 3.0739879351697073, "grad_norm": 0.32586991786956787, "learning_rate": 2.7527284522348362e-05, "loss": 0.096, "step": 84590 }, { "epoch": 3.0743513336725052, "grad_norm": 1.7913528680801392, "learning_rate": 2.7522121733583102e-05, "loss": 0.0981, "step": 84600 }, { "epoch": 3.0743513336725052, "eval_loss": 0.33099600672721863, "eval_runtime": 180.5075, "eval_samples_per_second": 41.073, "eval_steps_per_second": 5.136, "eval_wer": 0.14155789932288926, "step": 84600 }, { "epoch": 3.0747147321753037, "grad_norm": 0.8068686723709106, "learning_rate": 2.7516958836148583e-05, "loss": 0.121, "step": 84610 }, { "epoch": 3.0750781306781017, "grad_norm": 2.0272300243377686, "learning_rate": 2.7511795830267268e-05, "loss": 0.2779, "step": 84620 }, { "epoch": 3.0754415291808996, "grad_norm": 0.571723997592926, "learning_rate": 2.7506632716161603e-05, "loss": 0.0819, "step": 84630 }, { "epoch": 3.075804927683698, "grad_norm": 0.5370312929153442, "learning_rate": 2.7501469494054056e-05, "loss": 0.1944, "step": 84640 }, { "epoch": 3.076168326186496, "grad_norm": 0.9437683820724487, "learning_rate": 2.749630616416709e-05, "loss": 0.0791, "step": 84650 }, { "epoch": 3.0765317246892945, "grad_norm": 0.29546335339546204, "learning_rate": 2.749114272672317e-05, "loss": 0.1011, "step": 84660 }, { "epoch": 3.0768951231920925, "grad_norm": 1.31322181224823, "learning_rate": 2.7485979181944778e-05, "loss": 0.0709, "step": 84670 }, { "epoch": 3.0772585216948904, "grad_norm": 0.3159250319004059, "learning_rate": 2.748081553005438e-05, "loss": 0.0715, "step": 84680 }, { "epoch": 3.077621920197689, "grad_norm": 0.9355630278587341, "learning_rate": 2.747565177127448e-05, "loss": 0.5317, "step": 84690 }, { "epoch": 3.077985318700487, "grad_norm": 0.462240993976593, "learning_rate": 2.7470487905827542e-05, "loss": 0.0819, "step": 84700 }, { "epoch": 3.0783487172032853, "grad_norm": 1.28692626953125, "learning_rate": 2.7465323933936077e-05, "loss": 0.0753, "step": 84710 }, { "epoch": 3.0787121157060833, "grad_norm": 0.413011372089386, "learning_rate": 2.7460159855822577e-05, "loss": 0.0573, "step": 84720 }, { "epoch": 3.0790755142088813, "grad_norm": 0.4972487986087799, "learning_rate": 2.745499567170953e-05, "loss": 0.7906, "step": 84730 }, { "epoch": 3.0794389127116797, "grad_norm": 0.48013266921043396, "learning_rate": 2.744983138181947e-05, "loss": 0.0815, "step": 84740 }, { "epoch": 3.0798023112144777, "grad_norm": 0.597145676612854, "learning_rate": 2.7444666986374877e-05, "loss": 0.0945, "step": 84750 }, { "epoch": 3.080165709717276, "grad_norm": 1.169415831565857, "learning_rate": 2.7439502485598296e-05, "loss": 0.0948, "step": 84760 }, { "epoch": 3.080529108220074, "grad_norm": 0.42434608936309814, "learning_rate": 2.7434337879712217e-05, "loss": 0.937, "step": 84770 }, { "epoch": 3.0808925067228725, "grad_norm": 1.248987078666687, "learning_rate": 2.7429173168939194e-05, "loss": 0.0861, "step": 84780 }, { "epoch": 3.0812559052256705, "grad_norm": 0.3468557894229889, "learning_rate": 2.742400835350174e-05, "loss": 0.1311, "step": 84790 }, { "epoch": 3.0816193037284685, "grad_norm": 0.5820382237434387, "learning_rate": 2.7418843433622387e-05, "loss": 0.0835, "step": 84800 }, { "epoch": 3.081982702231267, "grad_norm": 0.7529087066650391, "learning_rate": 2.741367840952368e-05, "loss": 0.0695, "step": 84810 }, { "epoch": 3.082346100734065, "grad_norm": 0.4199211895465851, "learning_rate": 2.7408513281428156e-05, "loss": 0.0736, "step": 84820 }, { "epoch": 3.0827094992368633, "grad_norm": 0.3389349579811096, "learning_rate": 2.7403348049558363e-05, "loss": 0.0825, "step": 84830 }, { "epoch": 3.0830728977396613, "grad_norm": 0.3386542797088623, "learning_rate": 2.739818271413686e-05, "loss": 0.0791, "step": 84840 }, { "epoch": 3.0834362962424593, "grad_norm": 1.0355015993118286, "learning_rate": 2.7393017275386186e-05, "loss": 0.0781, "step": 84850 }, { "epoch": 3.0837996947452577, "grad_norm": 0.6449688673019409, "learning_rate": 2.7387851733528918e-05, "loss": 0.078, "step": 84860 }, { "epoch": 3.0841630932480557, "grad_norm": 0.6027774214744568, "learning_rate": 2.7382686088787613e-05, "loss": 0.0954, "step": 84870 }, { "epoch": 3.084526491750854, "grad_norm": 0.3841930627822876, "learning_rate": 2.737752034138485e-05, "loss": 0.0701, "step": 84880 }, { "epoch": 3.084889890253652, "grad_norm": 0.4170621633529663, "learning_rate": 2.7372354491543184e-05, "loss": 0.0901, "step": 84890 }, { "epoch": 3.0852532887564506, "grad_norm": 0.8120031356811523, "learning_rate": 2.736718853948521e-05, "loss": 0.0809, "step": 84900 }, { "epoch": 3.0856166872592485, "grad_norm": 0.36227869987487793, "learning_rate": 2.73620224854335e-05, "loss": 0.1081, "step": 84910 }, { "epoch": 3.0859800857620465, "grad_norm": 0.6383411884307861, "learning_rate": 2.7356856329610646e-05, "loss": 0.0825, "step": 84920 }, { "epoch": 3.086343484264845, "grad_norm": 0.4589132070541382, "learning_rate": 2.7351690072239246e-05, "loss": 0.0631, "step": 84930 }, { "epoch": 3.086706882767643, "grad_norm": 0.5622755885124207, "learning_rate": 2.734652371354188e-05, "loss": 0.0708, "step": 84940 }, { "epoch": 3.0870702812704414, "grad_norm": 2.5855283737182617, "learning_rate": 2.7341357253741155e-05, "loss": 0.0931, "step": 84950 }, { "epoch": 3.0874336797732393, "grad_norm": 0.4554671049118042, "learning_rate": 2.7336190693059688e-05, "loss": 0.0817, "step": 84960 }, { "epoch": 3.0877970782760373, "grad_norm": 0.824824869632721, "learning_rate": 2.7331024031720065e-05, "loss": 0.0966, "step": 84970 }, { "epoch": 3.0881604767788358, "grad_norm": 1.0286164283752441, "learning_rate": 2.7325857269944916e-05, "loss": 0.0838, "step": 84980 }, { "epoch": 3.0885238752816337, "grad_norm": 0.5027440190315247, "learning_rate": 2.7320690407956844e-05, "loss": 0.0819, "step": 84990 }, { "epoch": 3.088887273784432, "grad_norm": 0.5407890677452087, "learning_rate": 2.7315523445978486e-05, "loss": 0.1, "step": 85000 }, { "epoch": 3.08925067228723, "grad_norm": 0.7916487455368042, "learning_rate": 2.731035638423246e-05, "loss": 0.0742, "step": 85010 }, { "epoch": 3.089614070790028, "grad_norm": 189.06448364257812, "learning_rate": 2.7305189222941403e-05, "loss": 3.2542, "step": 85020 }, { "epoch": 3.0899774692928266, "grad_norm": 1.7703773975372314, "learning_rate": 2.730002196232794e-05, "loss": 0.0749, "step": 85030 }, { "epoch": 3.0903408677956246, "grad_norm": 0.6317950487136841, "learning_rate": 2.7294854602614712e-05, "loss": 0.0938, "step": 85040 }, { "epoch": 3.090704266298423, "grad_norm": 1.0689524412155151, "learning_rate": 2.7289687144024368e-05, "loss": 0.0934, "step": 85050 }, { "epoch": 3.091067664801221, "grad_norm": 0.41926833987236023, "learning_rate": 2.7284519586779545e-05, "loss": 0.0997, "step": 85060 }, { "epoch": 3.0914310633040194, "grad_norm": 0.8323477506637573, "learning_rate": 2.727935193110291e-05, "loss": 0.0701, "step": 85070 }, { "epoch": 3.0917944618068174, "grad_norm": 1.0824620723724365, "learning_rate": 2.7274184177217104e-05, "loss": 0.0732, "step": 85080 }, { "epoch": 3.0921578603096154, "grad_norm": 0.6392784118652344, "learning_rate": 2.72690163253448e-05, "loss": 0.1011, "step": 85090 }, { "epoch": 3.092521258812414, "grad_norm": 1.3124550580978394, "learning_rate": 2.7263848375708646e-05, "loss": 0.0908, "step": 85100 }, { "epoch": 3.092884657315212, "grad_norm": 0.42463892698287964, "learning_rate": 2.7258680328531326e-05, "loss": 0.0768, "step": 85110 }, { "epoch": 3.09324805581801, "grad_norm": 0.8673943281173706, "learning_rate": 2.725351218403551e-05, "loss": 0.101, "step": 85120 }, { "epoch": 3.093611454320808, "grad_norm": 0.26485633850097656, "learning_rate": 2.7248343942443866e-05, "loss": 0.0684, "step": 85130 }, { "epoch": 3.093974852823606, "grad_norm": 1.3009984493255615, "learning_rate": 2.7243175603979088e-05, "loss": 0.1174, "step": 85140 }, { "epoch": 3.0943382513264046, "grad_norm": 1.3685442209243774, "learning_rate": 2.723800716886385e-05, "loss": 0.0942, "step": 85150 }, { "epoch": 3.0947016498292026, "grad_norm": 1.106117844581604, "learning_rate": 2.7232838637320846e-05, "loss": 0.1083, "step": 85160 }, { "epoch": 3.095065048332001, "grad_norm": 0.48407310247421265, "learning_rate": 2.722767000957277e-05, "loss": 0.0596, "step": 85170 }, { "epoch": 3.095428446834799, "grad_norm": 0.6600604057312012, "learning_rate": 2.7222501285842315e-05, "loss": 0.0893, "step": 85180 }, { "epoch": 3.0957918453375974, "grad_norm": 0.9965558648109436, "learning_rate": 2.7217332466352192e-05, "loss": 0.1042, "step": 85190 }, { "epoch": 3.0961552438403954, "grad_norm": 0.9833908081054688, "learning_rate": 2.7212163551325105e-05, "loss": 0.0964, "step": 85200 }, { "epoch": 3.0961552438403954, "eval_loss": 0.3095722794532776, "eval_runtime": 179.4671, "eval_samples_per_second": 41.311, "eval_steps_per_second": 5.165, "eval_wer": 0.14271969793236153, "step": 85200 }, { "epoch": 3.0965186423431934, "grad_norm": 2.14083194732666, "learning_rate": 2.720699454098376e-05, "loss": 0.087, "step": 85210 }, { "epoch": 3.096882040845992, "grad_norm": 1.5555180311203003, "learning_rate": 2.720182543555087e-05, "loss": 0.0806, "step": 85220 }, { "epoch": 3.09724543934879, "grad_norm": 0.7186011075973511, "learning_rate": 2.7196656235249157e-05, "loss": 0.0788, "step": 85230 }, { "epoch": 3.0976088378515882, "grad_norm": 0.28633779287338257, "learning_rate": 2.719148694030134e-05, "loss": 0.1674, "step": 85240 }, { "epoch": 3.0979722363543862, "grad_norm": 0.8628358244895935, "learning_rate": 2.718631755093016e-05, "loss": 0.0893, "step": 85250 }, { "epoch": 3.098335634857184, "grad_norm": 0.41827306151390076, "learning_rate": 2.718114806735832e-05, "loss": 0.0989, "step": 85260 }, { "epoch": 3.0986990333599826, "grad_norm": 0.5456732511520386, "learning_rate": 2.7175978489808577e-05, "loss": 0.0693, "step": 85270 }, { "epoch": 3.0990624318627806, "grad_norm": 0.7632073760032654, "learning_rate": 2.7170808818503667e-05, "loss": 0.1152, "step": 85280 }, { "epoch": 3.099425830365579, "grad_norm": 0.419622540473938, "learning_rate": 2.716563905366632e-05, "loss": 0.0821, "step": 85290 }, { "epoch": 3.099789228868377, "grad_norm": 0.8410579562187195, "learning_rate": 2.71604691955193e-05, "loss": 0.0906, "step": 85300 }, { "epoch": 3.100152627371175, "grad_norm": 1.2106753587722778, "learning_rate": 2.7155299244285343e-05, "loss": 0.0966, "step": 85310 }, { "epoch": 3.1005160258739735, "grad_norm": 1.0297480821609497, "learning_rate": 2.7150129200187213e-05, "loss": 0.0814, "step": 85320 }, { "epoch": 3.1008794243767714, "grad_norm": 0.5016379356384277, "learning_rate": 2.7144959063447667e-05, "loss": 0.0808, "step": 85330 }, { "epoch": 3.10124282287957, "grad_norm": 0.8718425035476685, "learning_rate": 2.7139788834289463e-05, "loss": 0.094, "step": 85340 }, { "epoch": 3.101606221382368, "grad_norm": 1.8094639778137207, "learning_rate": 2.7134618512935372e-05, "loss": 0.097, "step": 85350 }, { "epoch": 3.1019696198851663, "grad_norm": 0.45816463232040405, "learning_rate": 2.7129448099608167e-05, "loss": 0.0649, "step": 85360 }, { "epoch": 3.1023330183879643, "grad_norm": 5.5397868156433105, "learning_rate": 2.7124277594530624e-05, "loss": 0.0717, "step": 85370 }, { "epoch": 3.1026964168907623, "grad_norm": 0.3643113076686859, "learning_rate": 2.7119106997925513e-05, "loss": 0.0649, "step": 85380 }, { "epoch": 3.1030598153935607, "grad_norm": 1.1016013622283936, "learning_rate": 2.711393631001562e-05, "loss": 0.0926, "step": 85390 }, { "epoch": 3.1034232138963587, "grad_norm": 2.1157453060150146, "learning_rate": 2.7108765531023733e-05, "loss": 0.0879, "step": 85400 }, { "epoch": 3.103786612399157, "grad_norm": 0.9492454528808594, "learning_rate": 2.7103594661172644e-05, "loss": 0.0777, "step": 85410 }, { "epoch": 3.104150010901955, "grad_norm": 0.3126516342163086, "learning_rate": 2.7098423700685143e-05, "loss": 0.0567, "step": 85420 }, { "epoch": 3.104513409404753, "grad_norm": 0.3263763189315796, "learning_rate": 2.7093252649784035e-05, "loss": 0.067, "step": 85430 }, { "epoch": 3.1048768079075515, "grad_norm": 0.8375474214553833, "learning_rate": 2.7088081508692115e-05, "loss": 0.0907, "step": 85440 }, { "epoch": 3.1052402064103495, "grad_norm": 0.8583451509475708, "learning_rate": 2.7082910277632194e-05, "loss": 0.1196, "step": 85450 }, { "epoch": 3.105603604913148, "grad_norm": 1.2481297254562378, "learning_rate": 2.707773895682708e-05, "loss": 0.0821, "step": 85460 }, { "epoch": 3.105967003415946, "grad_norm": 0.3148916959762573, "learning_rate": 2.7072567546499585e-05, "loss": 0.0726, "step": 85470 }, { "epoch": 3.1063304019187443, "grad_norm": 0.3623039126396179, "learning_rate": 2.7067396046872533e-05, "loss": 0.0876, "step": 85480 }, { "epoch": 3.1066938004215423, "grad_norm": 0.5890102982521057, "learning_rate": 2.7062224458168733e-05, "loss": 0.1002, "step": 85490 }, { "epoch": 3.1070571989243403, "grad_norm": 1.0585196018218994, "learning_rate": 2.705705278061103e-05, "loss": 0.0956, "step": 85500 }, { "epoch": 3.1074205974271387, "grad_norm": 0.3192172944545746, "learning_rate": 2.7051881014422232e-05, "loss": 0.1025, "step": 85510 }, { "epoch": 3.1077839959299367, "grad_norm": 0.47871172428131104, "learning_rate": 2.7046709159825184e-05, "loss": 0.092, "step": 85520 }, { "epoch": 3.108147394432735, "grad_norm": 0.516779899597168, "learning_rate": 2.704153721704273e-05, "loss": 0.0672, "step": 85530 }, { "epoch": 3.108510792935533, "grad_norm": 0.5075317621231079, "learning_rate": 2.7036365186297685e-05, "loss": 0.0803, "step": 85540 }, { "epoch": 3.108874191438331, "grad_norm": 0.664806604385376, "learning_rate": 2.703119306781292e-05, "loss": 0.0831, "step": 85550 }, { "epoch": 3.1092375899411295, "grad_norm": 0.8135377168655396, "learning_rate": 2.702602086181127e-05, "loss": 0.1029, "step": 85560 }, { "epoch": 3.1096009884439275, "grad_norm": 1.3879525661468506, "learning_rate": 2.702084856851559e-05, "loss": 0.0779, "step": 85570 }, { "epoch": 3.109964386946726, "grad_norm": 0.43086570501327515, "learning_rate": 2.7015676188148732e-05, "loss": 0.0809, "step": 85580 }, { "epoch": 3.110327785449524, "grad_norm": 0.5117005705833435, "learning_rate": 2.7010503720933567e-05, "loss": 0.103, "step": 85590 }, { "epoch": 3.110691183952322, "grad_norm": 1.2577043771743774, "learning_rate": 2.7005331167092945e-05, "loss": 0.1145, "step": 85600 }, { "epoch": 3.1110545824551203, "grad_norm": 0.43926846981048584, "learning_rate": 2.7000158526849734e-05, "loss": 0.5229, "step": 85610 }, { "epoch": 3.1114179809579183, "grad_norm": 0.6876174807548523, "learning_rate": 2.6994985800426813e-05, "loss": 0.0711, "step": 85620 }, { "epoch": 3.1117813794607168, "grad_norm": 0.5573021769523621, "learning_rate": 2.6989812988047053e-05, "loss": 0.0801, "step": 85630 }, { "epoch": 3.1121447779635147, "grad_norm": 0.8710441589355469, "learning_rate": 2.6984640089933332e-05, "loss": 0.0774, "step": 85640 }, { "epoch": 3.112508176466313, "grad_norm": 0.9022230505943298, "learning_rate": 2.697946710630853e-05, "loss": 0.0983, "step": 85650 }, { "epoch": 3.112871574969111, "grad_norm": 1.3726192712783813, "learning_rate": 2.6974294037395533e-05, "loss": 0.1686, "step": 85660 }, { "epoch": 3.113234973471909, "grad_norm": 0.6950212121009827, "learning_rate": 2.6969120883417228e-05, "loss": 0.0732, "step": 85670 }, { "epoch": 3.1135983719747076, "grad_norm": 0.5118197202682495, "learning_rate": 2.696394764459651e-05, "loss": 0.1856, "step": 85680 }, { "epoch": 3.1139617704775056, "grad_norm": 0.7940520644187927, "learning_rate": 2.6958774321156278e-05, "loss": 0.0907, "step": 85690 }, { "epoch": 3.114325168980304, "grad_norm": 4.099023342132568, "learning_rate": 2.6953600913319427e-05, "loss": 0.0699, "step": 85700 }, { "epoch": 3.114688567483102, "grad_norm": 0.6201004385948181, "learning_rate": 2.694842742130887e-05, "loss": 0.0649, "step": 85710 }, { "epoch": 3.1150519659859, "grad_norm": 1.4649277925491333, "learning_rate": 2.6943253845347506e-05, "loss": 0.0812, "step": 85720 }, { "epoch": 3.1154153644886984, "grad_norm": 1.2290292978286743, "learning_rate": 2.6938080185658242e-05, "loss": 0.088, "step": 85730 }, { "epoch": 3.1157787629914964, "grad_norm": 3.397437334060669, "learning_rate": 2.6932906442464005e-05, "loss": 0.1178, "step": 85740 }, { "epoch": 3.116142161494295, "grad_norm": 1.0771185159683228, "learning_rate": 2.69277326159877e-05, "loss": 0.086, "step": 85750 }, { "epoch": 3.116505559997093, "grad_norm": 0.9583094716072083, "learning_rate": 2.692255870645226e-05, "loss": 0.0721, "step": 85760 }, { "epoch": 3.116868958499891, "grad_norm": 0.3439558744430542, "learning_rate": 2.6917384714080606e-05, "loss": 0.0629, "step": 85770 }, { "epoch": 3.117232357002689, "grad_norm": 0.909960150718689, "learning_rate": 2.691221063909567e-05, "loss": 0.0839, "step": 85780 }, { "epoch": 3.117595755505487, "grad_norm": 0.5029392242431641, "learning_rate": 2.6907036481720377e-05, "loss": 0.09, "step": 85790 }, { "epoch": 3.1179591540082856, "grad_norm": 0.8775815963745117, "learning_rate": 2.6901862242177667e-05, "loss": 0.0989, "step": 85800 }, { "epoch": 3.1179591540082856, "eval_loss": 0.33487364649772644, "eval_runtime": 179.4409, "eval_samples_per_second": 41.317, "eval_steps_per_second": 5.166, "eval_wer": 0.14279231034545356, "step": 85800 }, { "epoch": 3.1183225525110836, "grad_norm": 0.7273783683776855, "learning_rate": 2.689668792069048e-05, "loss": 0.0869, "step": 85810 }, { "epoch": 3.118685951013882, "grad_norm": 0.24581924080848694, "learning_rate": 2.689151351748176e-05, "loss": 0.0647, "step": 85820 }, { "epoch": 3.11904934951668, "grad_norm": 0.9781889915466309, "learning_rate": 2.688633903277445e-05, "loss": 0.0781, "step": 85830 }, { "epoch": 3.119412748019478, "grad_norm": 0.4281591773033142, "learning_rate": 2.688116446679151e-05, "loss": 0.1053, "step": 85840 }, { "epoch": 3.1197761465222764, "grad_norm": 1.24199378490448, "learning_rate": 2.6875989819755876e-05, "loss": 0.0792, "step": 85850 }, { "epoch": 3.1201395450250744, "grad_norm": 1.8490883111953735, "learning_rate": 2.6870815091890523e-05, "loss": 0.1136, "step": 85860 }, { "epoch": 3.120502943527873, "grad_norm": 0.7298690676689148, "learning_rate": 2.6865640283418398e-05, "loss": 0.082, "step": 85870 }, { "epoch": 3.120866342030671, "grad_norm": 0.5065509080886841, "learning_rate": 2.6860465394562478e-05, "loss": 0.0723, "step": 85880 }, { "epoch": 3.121229740533469, "grad_norm": 0.9484612941741943, "learning_rate": 2.6855290425545713e-05, "loss": 0.1147, "step": 85890 }, { "epoch": 3.1215931390362672, "grad_norm": 0.5501681566238403, "learning_rate": 2.68501153765911e-05, "loss": 0.1155, "step": 85900 }, { "epoch": 3.121956537539065, "grad_norm": 1.5682904720306396, "learning_rate": 2.684494024792159e-05, "loss": 0.1119, "step": 85910 }, { "epoch": 3.1223199360418636, "grad_norm": 0.8644644618034363, "learning_rate": 2.6839765039760168e-05, "loss": 0.0715, "step": 85920 }, { "epoch": 3.1226833345446616, "grad_norm": 0.5618588924407959, "learning_rate": 2.683458975232982e-05, "loss": 0.0808, "step": 85930 }, { "epoch": 3.12304673304746, "grad_norm": 0.45507627725601196, "learning_rate": 2.682941438585352e-05, "loss": 0.0747, "step": 85940 }, { "epoch": 3.123410131550258, "grad_norm": 0.4180305302143097, "learning_rate": 2.682423894055427e-05, "loss": 0.0642, "step": 85950 }, { "epoch": 3.123773530053056, "grad_norm": 1.2814691066741943, "learning_rate": 2.6819063416655054e-05, "loss": 0.1001, "step": 85960 }, { "epoch": 3.1241369285558545, "grad_norm": 0.5419327020645142, "learning_rate": 2.6813887814378864e-05, "loss": 1.3428, "step": 85970 }, { "epoch": 3.1245003270586524, "grad_norm": 0.9683062434196472, "learning_rate": 2.6808712133948705e-05, "loss": 0.0685, "step": 85980 }, { "epoch": 3.124863725561451, "grad_norm": 1.1266988515853882, "learning_rate": 2.6803536375587572e-05, "loss": 0.103, "step": 85990 }, { "epoch": 3.125227124064249, "grad_norm": 0.9783998727798462, "learning_rate": 2.679836053951848e-05, "loss": 0.0788, "step": 86000 }, { "epoch": 3.125590522567047, "grad_norm": 2.445962905883789, "learning_rate": 2.6793184625964425e-05, "loss": 0.0841, "step": 86010 }, { "epoch": 3.1259539210698453, "grad_norm": 0.41320154070854187, "learning_rate": 2.678800863514843e-05, "loss": 0.0663, "step": 86020 }, { "epoch": 3.1263173195726432, "grad_norm": 0.4643288850784302, "learning_rate": 2.6782832567293504e-05, "loss": 0.0801, "step": 86030 }, { "epoch": 3.1266807180754417, "grad_norm": 0.5545969605445862, "learning_rate": 2.677765642262266e-05, "loss": 0.0976, "step": 86040 }, { "epoch": 3.1270441165782397, "grad_norm": 0.8363248109817505, "learning_rate": 2.677248020135893e-05, "loss": 0.0715, "step": 86050 }, { "epoch": 3.127407515081038, "grad_norm": 1.5308492183685303, "learning_rate": 2.6767303903725332e-05, "loss": 0.1622, "step": 86060 }, { "epoch": 3.127770913583836, "grad_norm": 2.161001682281494, "learning_rate": 2.6762127529944903e-05, "loss": 0.081, "step": 86070 }, { "epoch": 3.128134312086634, "grad_norm": 0.6405352354049683, "learning_rate": 2.6756951080240662e-05, "loss": 0.0689, "step": 86080 }, { "epoch": 3.1284977105894325, "grad_norm": 12.528480529785156, "learning_rate": 2.675177455483565e-05, "loss": 0.9386, "step": 86090 }, { "epoch": 3.1288611090922305, "grad_norm": 3.0616352558135986, "learning_rate": 2.674659795395291e-05, "loss": 0.0755, "step": 86100 }, { "epoch": 3.129224507595029, "grad_norm": 0.49330297112464905, "learning_rate": 2.6741421277815475e-05, "loss": 0.0792, "step": 86110 }, { "epoch": 3.129587906097827, "grad_norm": 7.257319450378418, "learning_rate": 2.6736244526646398e-05, "loss": 0.0865, "step": 86120 }, { "epoch": 3.129951304600625, "grad_norm": 1.1979786157608032, "learning_rate": 2.6731067700668712e-05, "loss": 0.0974, "step": 86130 }, { "epoch": 3.1303147031034233, "grad_norm": 0.2251403033733368, "learning_rate": 2.6725890800105486e-05, "loss": 0.0755, "step": 86140 }, { "epoch": 3.1306781016062213, "grad_norm": 0.9207643270492554, "learning_rate": 2.6720713825179767e-05, "loss": 0.1739, "step": 86150 }, { "epoch": 3.1310415001090197, "grad_norm": 0.5833568572998047, "learning_rate": 2.671553677611461e-05, "loss": 0.0747, "step": 86160 }, { "epoch": 3.1314048986118177, "grad_norm": 0.5385452508926392, "learning_rate": 2.6710359653133078e-05, "loss": 2.187, "step": 86170 }, { "epoch": 3.1317682971146157, "grad_norm": 0.7017218470573425, "learning_rate": 2.670518245645823e-05, "loss": 0.0838, "step": 86180 }, { "epoch": 3.132131695617414, "grad_norm": 0.682905375957489, "learning_rate": 2.670000518631314e-05, "loss": 0.0946, "step": 86190 }, { "epoch": 3.132495094120212, "grad_norm": 0.5854523181915283, "learning_rate": 2.669482784292087e-05, "loss": 0.1085, "step": 86200 }, { "epoch": 3.1328584926230105, "grad_norm": 0.4975448548793793, "learning_rate": 2.6689650426504504e-05, "loss": 0.0851, "step": 86210 }, { "epoch": 3.1332218911258085, "grad_norm": 3.5613021850585938, "learning_rate": 2.6684472937287115e-05, "loss": 0.0911, "step": 86220 }, { "epoch": 3.1335852896286065, "grad_norm": 1.3843308687210083, "learning_rate": 2.6679295375491776e-05, "loss": 0.0627, "step": 86230 }, { "epoch": 3.133948688131405, "grad_norm": 3.7349839210510254, "learning_rate": 2.6674117741341575e-05, "loss": 0.0982, "step": 86240 }, { "epoch": 3.134312086634203, "grad_norm": 0.9458445906639099, "learning_rate": 2.66689400350596e-05, "loss": 0.1023, "step": 86250 }, { "epoch": 3.1346754851370013, "grad_norm": 0.45296090841293335, "learning_rate": 2.6663762256868928e-05, "loss": 0.0625, "step": 86260 }, { "epoch": 3.1350388836397993, "grad_norm": 0.7236283421516418, "learning_rate": 2.665858440699267e-05, "loss": 0.0708, "step": 86270 }, { "epoch": 3.1354022821425978, "grad_norm": 2.072012424468994, "learning_rate": 2.665340648565391e-05, "loss": 0.0683, "step": 86280 }, { "epoch": 3.1357656806453957, "grad_norm": 0.8755659461021423, "learning_rate": 2.6648228493075744e-05, "loss": 0.0866, "step": 86290 }, { "epoch": 3.1361290791481937, "grad_norm": 0.7825614213943481, "learning_rate": 2.6643050429481275e-05, "loss": 0.0887, "step": 86300 }, { "epoch": 3.136492477650992, "grad_norm": 0.9423545002937317, "learning_rate": 2.6637872295093612e-05, "loss": 0.093, "step": 86310 }, { "epoch": 3.13685587615379, "grad_norm": 0.37489980459213257, "learning_rate": 2.6632694090135856e-05, "loss": 0.0797, "step": 86320 }, { "epoch": 3.1372192746565886, "grad_norm": 0.4927680790424347, "learning_rate": 2.6627515814831126e-05, "loss": 0.0708, "step": 86330 }, { "epoch": 3.1375826731593865, "grad_norm": 0.7253931760787964, "learning_rate": 2.6622337469402537e-05, "loss": 0.0974, "step": 86340 }, { "epoch": 3.137946071662185, "grad_norm": 0.8817862272262573, "learning_rate": 2.6617159054073182e-05, "loss": 0.0893, "step": 86350 }, { "epoch": 3.138309470164983, "grad_norm": 0.9500261545181274, "learning_rate": 2.6611980569066208e-05, "loss": 0.0715, "step": 86360 }, { "epoch": 3.138672868667781, "grad_norm": 0.5115967988967896, "learning_rate": 2.660680201460472e-05, "loss": 0.0713, "step": 86370 }, { "epoch": 3.1390362671705794, "grad_norm": 0.752263069152832, "learning_rate": 2.6601623390911857e-05, "loss": 0.0685, "step": 86380 }, { "epoch": 3.1393996656733774, "grad_norm": 0.39566388726234436, "learning_rate": 2.6596444698210738e-05, "loss": 1.6043, "step": 86390 }, { "epoch": 3.139763064176176, "grad_norm": 0.3248102068901062, "learning_rate": 2.6591265936724495e-05, "loss": 0.2024, "step": 86400 }, { "epoch": 3.139763064176176, "eval_loss": 0.30238452553749084, "eval_runtime": 179.688, "eval_samples_per_second": 41.26, "eval_steps_per_second": 5.159, "eval_wer": 0.142565396554541, "step": 86400 }, { "epoch": 3.1401264626789738, "grad_norm": 0.7450832724571228, "learning_rate": 2.6586087106676272e-05, "loss": 0.0831, "step": 86410 }, { "epoch": 3.1404898611817718, "grad_norm": 0.35402756929397583, "learning_rate": 2.658090820828919e-05, "loss": 0.0802, "step": 86420 }, { "epoch": 3.14085325968457, "grad_norm": 0.7039386034011841, "learning_rate": 2.65757292417864e-05, "loss": 0.0693, "step": 86430 }, { "epoch": 3.141216658187368, "grad_norm": 0.5584103465080261, "learning_rate": 2.6570550207391043e-05, "loss": 0.0747, "step": 86440 }, { "epoch": 3.1415800566901666, "grad_norm": 0.5645958185195923, "learning_rate": 2.656537110532627e-05, "loss": 0.0887, "step": 86450 }, { "epoch": 3.1419434551929646, "grad_norm": 0.7481971979141235, "learning_rate": 2.656019193581522e-05, "loss": 0.1027, "step": 86460 }, { "epoch": 3.1423068536957626, "grad_norm": 0.48159149289131165, "learning_rate": 2.6555012699081057e-05, "loss": 0.0642, "step": 86470 }, { "epoch": 3.142670252198561, "grad_norm": 0.6558395028114319, "learning_rate": 2.654983339534693e-05, "loss": 0.0767, "step": 86480 }, { "epoch": 3.143033650701359, "grad_norm": 0.8240875601768494, "learning_rate": 2.654465402483599e-05, "loss": 0.1036, "step": 86490 }, { "epoch": 3.1433970492041574, "grad_norm": 1.9741828441619873, "learning_rate": 2.6539474587771406e-05, "loss": 0.0872, "step": 86500 }, { "epoch": 3.1437604477069554, "grad_norm": 0.4783799350261688, "learning_rate": 2.6534295084376337e-05, "loss": 3.1495, "step": 86510 }, { "epoch": 3.1441238462097534, "grad_norm": 0.5813152194023132, "learning_rate": 2.652911551487396e-05, "loss": 0.078, "step": 86520 }, { "epoch": 3.144487244712552, "grad_norm": 1.1872795820236206, "learning_rate": 2.6523935879487432e-05, "loss": 0.0817, "step": 86530 }, { "epoch": 3.14485064321535, "grad_norm": 0.5647823214530945, "learning_rate": 2.6518756178439925e-05, "loss": 1.1158, "step": 86540 }, { "epoch": 3.1452140417181482, "grad_norm": 1.1907984018325806, "learning_rate": 2.6513576411954627e-05, "loss": 0.1104, "step": 86550 }, { "epoch": 3.145577440220946, "grad_norm": 0.5368396043777466, "learning_rate": 2.6508396580254697e-05, "loss": 0.0929, "step": 86560 }, { "epoch": 3.1459408387237446, "grad_norm": 0.34880682826042175, "learning_rate": 2.650321668356333e-05, "loss": 0.0714, "step": 86570 }, { "epoch": 3.1463042372265426, "grad_norm": 0.4075338542461395, "learning_rate": 2.6498036722103703e-05, "loss": 0.0836, "step": 86580 }, { "epoch": 3.1466676357293406, "grad_norm": 5.59214973449707, "learning_rate": 2.6492856696099006e-05, "loss": 0.0829, "step": 86590 }, { "epoch": 3.147031034232139, "grad_norm": 0.9753894209861755, "learning_rate": 2.6487676605772426e-05, "loss": 0.0899, "step": 86600 }, { "epoch": 3.147394432734937, "grad_norm": 0.3571523129940033, "learning_rate": 2.648249645134715e-05, "loss": 0.0722, "step": 86610 }, { "epoch": 3.1477578312377354, "grad_norm": 0.5043341517448425, "learning_rate": 2.647731623304638e-05, "loss": 0.0663, "step": 86620 }, { "epoch": 3.1481212297405334, "grad_norm": 5.329759120941162, "learning_rate": 2.647213595109331e-05, "loss": 0.0844, "step": 86630 }, { "epoch": 3.148484628243332, "grad_norm": 0.3437232971191406, "learning_rate": 2.6466955605711136e-05, "loss": 0.1001, "step": 86640 }, { "epoch": 3.14884802674613, "grad_norm": 2.181110143661499, "learning_rate": 2.646177519712307e-05, "loss": 0.0972, "step": 86650 }, { "epoch": 3.149211425248928, "grad_norm": 0.8751040101051331, "learning_rate": 2.6456594725552302e-05, "loss": 0.0811, "step": 86660 }, { "epoch": 3.1495748237517263, "grad_norm": 0.36447498202323914, "learning_rate": 2.6451414191222062e-05, "loss": 0.0606, "step": 86670 }, { "epoch": 3.1499382222545242, "grad_norm": 0.47651347517967224, "learning_rate": 2.644623359435554e-05, "loss": 0.0665, "step": 86680 }, { "epoch": 3.1503016207573227, "grad_norm": 0.4340047836303711, "learning_rate": 2.6441052935175964e-05, "loss": 0.0787, "step": 86690 }, { "epoch": 3.1506650192601207, "grad_norm": 6.734339714050293, "learning_rate": 2.6435872213906538e-05, "loss": 0.0808, "step": 86700 }, { "epoch": 3.1510284177629186, "grad_norm": 0.3784515857696533, "learning_rate": 2.6430691430770494e-05, "loss": 0.0739, "step": 86710 }, { "epoch": 3.151391816265717, "grad_norm": 1.0528838634490967, "learning_rate": 2.6425510585991047e-05, "loss": 0.0704, "step": 86720 }, { "epoch": 3.151755214768515, "grad_norm": 0.8241010308265686, "learning_rate": 2.6420329679791412e-05, "loss": 0.0657, "step": 86730 }, { "epoch": 3.1521186132713135, "grad_norm": 0.5599542260169983, "learning_rate": 2.6415148712394833e-05, "loss": 0.0743, "step": 86740 }, { "epoch": 3.1524820117741115, "grad_norm": 1.3045130968093872, "learning_rate": 2.6409967684024522e-05, "loss": 0.0808, "step": 86750 }, { "epoch": 3.1528454102769095, "grad_norm": 0.8639160394668579, "learning_rate": 2.640478659490373e-05, "loss": 0.0784, "step": 86760 }, { "epoch": 3.153208808779708, "grad_norm": 0.4310116767883301, "learning_rate": 2.6399605445255677e-05, "loss": 0.0792, "step": 86770 }, { "epoch": 3.153572207282506, "grad_norm": 0.40688809752464294, "learning_rate": 2.6394424235303606e-05, "loss": 0.069, "step": 86780 }, { "epoch": 3.1539356057853043, "grad_norm": 0.7720523476600647, "learning_rate": 2.638924296527076e-05, "loss": 0.0791, "step": 86790 }, { "epoch": 3.1542990042881023, "grad_norm": 0.520601212978363, "learning_rate": 2.638406163538037e-05, "loss": 0.0901, "step": 86800 }, { "epoch": 3.1546624027909003, "grad_norm": 0.37290602922439575, "learning_rate": 2.6378880245855698e-05, "loss": 0.1093, "step": 86810 }, { "epoch": 3.1550258012936987, "grad_norm": 0.9572335481643677, "learning_rate": 2.637369879691997e-05, "loss": 0.6554, "step": 86820 }, { "epoch": 3.1553891997964967, "grad_norm": 0.8544325232505798, "learning_rate": 2.6368517288796456e-05, "loss": 0.0846, "step": 86830 }, { "epoch": 3.155752598299295, "grad_norm": 1.4652429819107056, "learning_rate": 2.6363335721708403e-05, "loss": 0.1105, "step": 86840 }, { "epoch": 3.156115996802093, "grad_norm": 3.825007438659668, "learning_rate": 2.6358154095879063e-05, "loss": 0.095, "step": 86850 }, { "epoch": 3.1564793953048915, "grad_norm": 1.822260856628418, "learning_rate": 2.6352972411531696e-05, "loss": 0.0825, "step": 86860 }, { "epoch": 3.1568427938076895, "grad_norm": 0.46824485063552856, "learning_rate": 2.6347790668889553e-05, "loss": 0.0716, "step": 86870 }, { "epoch": 3.1572061923104875, "grad_norm": 1.1813397407531738, "learning_rate": 2.6342608868175916e-05, "loss": 0.0891, "step": 86880 }, { "epoch": 3.157569590813286, "grad_norm": 0.7336288690567017, "learning_rate": 2.6337427009614034e-05, "loss": 0.1021, "step": 86890 }, { "epoch": 3.157932989316084, "grad_norm": 0.4944972097873688, "learning_rate": 2.6332245093427187e-05, "loss": 0.0873, "step": 86900 }, { "epoch": 3.1582963878188823, "grad_norm": 0.5169403553009033, "learning_rate": 2.6327063119838634e-05, "loss": 0.0966, "step": 86910 }, { "epoch": 3.1586597863216803, "grad_norm": 0.6052831411361694, "learning_rate": 2.6321881089071655e-05, "loss": 0.0665, "step": 86920 }, { "epoch": 3.1590231848244787, "grad_norm": 0.4833454489707947, "learning_rate": 2.6316699001349526e-05, "loss": 0.0732, "step": 86930 }, { "epoch": 3.1593865833272767, "grad_norm": 0.6335532069206238, "learning_rate": 2.6311516856895512e-05, "loss": 0.0874, "step": 86940 }, { "epoch": 3.1597499818300747, "grad_norm": 1.6006765365600586, "learning_rate": 2.630633465593291e-05, "loss": 0.1257, "step": 86950 }, { "epoch": 3.160113380332873, "grad_norm": 0.3963083028793335, "learning_rate": 2.6301152398684998e-05, "loss": 0.0822, "step": 86960 }, { "epoch": 3.160476778835671, "grad_norm": 0.44627153873443604, "learning_rate": 2.6295970085375054e-05, "loss": 0.0622, "step": 86970 }, { "epoch": 3.1608401773384696, "grad_norm": 0.7904446125030518, "learning_rate": 2.629078771622637e-05, "loss": 0.0753, "step": 86980 }, { "epoch": 3.1612035758412675, "grad_norm": 0.5941157937049866, "learning_rate": 2.628560529146224e-05, "loss": 0.0884, "step": 86990 }, { "epoch": 3.1615669743440655, "grad_norm": 0.6121344566345215, "learning_rate": 2.6280422811305948e-05, "loss": 0.0678, "step": 87000 }, { "epoch": 3.1615669743440655, "eval_loss": 0.33216938376426697, "eval_runtime": 180.0386, "eval_samples_per_second": 41.18, "eval_steps_per_second": 5.149, "eval_wer": 0.1409860765697896, "step": 87000 }, { "epoch": 3.161930372846864, "grad_norm": 1.1521292924880981, "learning_rate": 2.6275240275980795e-05, "loss": 0.0904, "step": 87010 }, { "epoch": 3.162293771349662, "grad_norm": 0.48100772500038147, "learning_rate": 2.6270057685710074e-05, "loss": 0.0725, "step": 87020 }, { "epoch": 3.1626571698524604, "grad_norm": 0.4358821511268616, "learning_rate": 2.6264875040717092e-05, "loss": 0.0714, "step": 87030 }, { "epoch": 3.1630205683552584, "grad_norm": 0.907632052898407, "learning_rate": 2.625969234122514e-05, "loss": 0.0949, "step": 87040 }, { "epoch": 3.1633839668580563, "grad_norm": 0.6242339015007019, "learning_rate": 2.6254509587457527e-05, "loss": 0.0856, "step": 87050 }, { "epoch": 3.1637473653608548, "grad_norm": 0.8153877854347229, "learning_rate": 2.6249326779637555e-05, "loss": 0.1291, "step": 87060 }, { "epoch": 3.1641107638636528, "grad_norm": 1.0637160539627075, "learning_rate": 2.6244143917988544e-05, "loss": 0.0709, "step": 87070 }, { "epoch": 3.164474162366451, "grad_norm": 0.33303794264793396, "learning_rate": 2.6238961002733796e-05, "loss": 0.0828, "step": 87080 }, { "epoch": 3.164837560869249, "grad_norm": 0.676785945892334, "learning_rate": 2.623377803409663e-05, "loss": 0.1132, "step": 87090 }, { "epoch": 3.165200959372047, "grad_norm": 1.7492424249649048, "learning_rate": 2.6228595012300356e-05, "loss": 0.0876, "step": 87100 }, { "epoch": 3.1655643578748456, "grad_norm": 0.5225327610969543, "learning_rate": 2.622341193756829e-05, "loss": 0.1208, "step": 87110 }, { "epoch": 3.1659277563776436, "grad_norm": 0.5629643201828003, "learning_rate": 2.6218228810123763e-05, "loss": 0.0862, "step": 87120 }, { "epoch": 3.166291154880442, "grad_norm": 1.335174560546875, "learning_rate": 2.6213045630190084e-05, "loss": 0.0746, "step": 87130 }, { "epoch": 3.16665455338324, "grad_norm": 0.6635318398475647, "learning_rate": 2.6207862397990597e-05, "loss": 0.0903, "step": 87140 }, { "epoch": 3.1670179518860384, "grad_norm": 0.9929222464561462, "learning_rate": 2.620267911374861e-05, "loss": 0.1024, "step": 87150 }, { "epoch": 3.1673813503888364, "grad_norm": 0.5913928151130676, "learning_rate": 2.619749577768745e-05, "loss": 0.114, "step": 87160 }, { "epoch": 3.1677447488916344, "grad_norm": 0.9411669373512268, "learning_rate": 2.6192312390030472e-05, "loss": 0.0878, "step": 87170 }, { "epoch": 3.168108147394433, "grad_norm": 2.4124414920806885, "learning_rate": 2.6187128951000982e-05, "loss": 0.0937, "step": 87180 }, { "epoch": 3.168471545897231, "grad_norm": 0.3180709481239319, "learning_rate": 2.6181945460822343e-05, "loss": 0.0956, "step": 87190 }, { "epoch": 3.168834944400029, "grad_norm": 0.9713842272758484, "learning_rate": 2.6176761919717867e-05, "loss": 0.111, "step": 87200 }, { "epoch": 3.169198342902827, "grad_norm": 0.7940172553062439, "learning_rate": 2.6171578327910918e-05, "loss": 0.0794, "step": 87210 }, { "epoch": 3.1695617414056256, "grad_norm": 0.7418053150177002, "learning_rate": 2.6166394685624823e-05, "loss": 0.0692, "step": 87220 }, { "epoch": 3.1699251399084236, "grad_norm": 0.4988052546977997, "learning_rate": 2.6161210993082925e-05, "loss": 0.0702, "step": 87230 }, { "epoch": 3.1702885384112216, "grad_norm": 0.41006624698638916, "learning_rate": 2.6156027250508587e-05, "loss": 0.1156, "step": 87240 }, { "epoch": 3.17065193691402, "grad_norm": 0.7204731702804565, "learning_rate": 2.615084345812514e-05, "loss": 0.0822, "step": 87250 }, { "epoch": 3.171015335416818, "grad_norm": 0.5459019541740417, "learning_rate": 2.6145659616155948e-05, "loss": 0.076, "step": 87260 }, { "epoch": 3.1713787339196164, "grad_norm": 0.5608823299407959, "learning_rate": 2.6140475724824355e-05, "loss": 0.076, "step": 87270 }, { "epoch": 3.1717421324224144, "grad_norm": 0.6672724485397339, "learning_rate": 2.613529178435372e-05, "loss": 0.0617, "step": 87280 }, { "epoch": 3.1721055309252124, "grad_norm": 0.4979981482028961, "learning_rate": 2.6130107794967412e-05, "loss": 0.0852, "step": 87290 }, { "epoch": 3.172468929428011, "grad_norm": 4.004956245422363, "learning_rate": 2.612492375688877e-05, "loss": 0.1044, "step": 87300 }, { "epoch": 3.172832327930809, "grad_norm": 0.3019026517868042, "learning_rate": 2.611973967034117e-05, "loss": 0.1081, "step": 87310 }, { "epoch": 3.1731957264336073, "grad_norm": 0.5312141180038452, "learning_rate": 2.6114555535547964e-05, "loss": 0.072, "step": 87320 }, { "epoch": 3.1735591249364052, "grad_norm": 0.5587911605834961, "learning_rate": 2.6109371352732537e-05, "loss": 0.0663, "step": 87330 }, { "epoch": 3.1739225234392032, "grad_norm": 0.7223249077796936, "learning_rate": 2.610418712211824e-05, "loss": 0.0689, "step": 87340 }, { "epoch": 3.1742859219420017, "grad_norm": 0.9574065208435059, "learning_rate": 2.6099002843928444e-05, "loss": 0.1149, "step": 87350 }, { "epoch": 3.1746493204447996, "grad_norm": 1.298269510269165, "learning_rate": 2.6093818518386535e-05, "loss": 0.0749, "step": 87360 }, { "epoch": 3.175012718947598, "grad_norm": 215.23536682128906, "learning_rate": 2.6088634145715867e-05, "loss": 3.8479, "step": 87370 }, { "epoch": 3.175376117450396, "grad_norm": 0.8245293498039246, "learning_rate": 2.608344972613984e-05, "loss": 0.0947, "step": 87380 }, { "epoch": 3.1757395159531945, "grad_norm": 0.462643027305603, "learning_rate": 2.6078265259881805e-05, "loss": 0.0876, "step": 87390 }, { "epoch": 3.1761029144559925, "grad_norm": 1.069300651550293, "learning_rate": 2.607308074716517e-05, "loss": 0.0977, "step": 87400 }, { "epoch": 3.1764663129587904, "grad_norm": 0.46647369861602783, "learning_rate": 2.6067896188213296e-05, "loss": 0.1076, "step": 87410 }, { "epoch": 3.176829711461589, "grad_norm": 0.46599775552749634, "learning_rate": 2.6062711583249578e-05, "loss": 0.1663, "step": 87420 }, { "epoch": 3.177193109964387, "grad_norm": 0.6090976595878601, "learning_rate": 2.60575269324974e-05, "loss": 0.0798, "step": 87430 }, { "epoch": 3.1775565084671853, "grad_norm": 3.0198044776916504, "learning_rate": 2.6052342236180144e-05, "loss": 0.0952, "step": 87440 }, { "epoch": 3.1779199069699833, "grad_norm": 0.8521358370780945, "learning_rate": 2.604715749452121e-05, "loss": 0.0942, "step": 87450 }, { "epoch": 3.1782833054727813, "grad_norm": 1.1309806108474731, "learning_rate": 2.604197270774398e-05, "loss": 0.0858, "step": 87460 }, { "epoch": 3.1786467039755797, "grad_norm": 2.8067383766174316, "learning_rate": 2.603678787607186e-05, "loss": 0.0718, "step": 87470 }, { "epoch": 3.1790101024783777, "grad_norm": 1.6722538471221924, "learning_rate": 2.603160299972824e-05, "loss": 0.0787, "step": 87480 }, { "epoch": 3.179373500981176, "grad_norm": 5.742082595825195, "learning_rate": 2.602641807893651e-05, "loss": 0.1014, "step": 87490 }, { "epoch": 3.179736899483974, "grad_norm": 1.0743470191955566, "learning_rate": 2.6021233113920078e-05, "loss": 0.1036, "step": 87500 }, { "epoch": 3.1801002979867725, "grad_norm": 2.879175901412964, "learning_rate": 2.6016048104902345e-05, "loss": 0.1001, "step": 87510 }, { "epoch": 3.1804636964895705, "grad_norm": 0.37631672620773315, "learning_rate": 2.601086305210672e-05, "loss": 0.0747, "step": 87520 }, { "epoch": 3.1808270949923685, "grad_norm": 0.38719940185546875, "learning_rate": 2.6005677955756603e-05, "loss": 0.8272, "step": 87530 }, { "epoch": 3.181190493495167, "grad_norm": 0.6106435060501099, "learning_rate": 2.6000492816075395e-05, "loss": 0.085, "step": 87540 }, { "epoch": 3.181553891997965, "grad_norm": 0.6239719986915588, "learning_rate": 2.5995307633286515e-05, "loss": 0.079, "step": 87550 }, { "epoch": 3.1819172905007633, "grad_norm": 0.8448728919029236, "learning_rate": 2.5990122407613366e-05, "loss": 0.0744, "step": 87560 }, { "epoch": 3.1822806890035613, "grad_norm": 0.7143642902374268, "learning_rate": 2.598493713927937e-05, "loss": 0.0686, "step": 87570 }, { "epoch": 3.1826440875063593, "grad_norm": 0.44550129771232605, "learning_rate": 2.597975182850793e-05, "loss": 0.0585, "step": 87580 }, { "epoch": 3.1830074860091577, "grad_norm": 0.43901458382606506, "learning_rate": 2.5974566475522478e-05, "loss": 0.0831, "step": 87590 }, { "epoch": 3.1833708845119557, "grad_norm": 1.0826005935668945, "learning_rate": 2.5969381080546417e-05, "loss": 0.0793, "step": 87600 }, { "epoch": 3.1833708845119557, "eval_loss": 0.3327307105064392, "eval_runtime": 180.1166, "eval_samples_per_second": 41.162, "eval_steps_per_second": 5.147, "eval_wer": 0.1424837075898125, "step": 87600 }, { "epoch": 3.183734283014754, "grad_norm": 0.43947404623031616, "learning_rate": 2.596419564380318e-05, "loss": 0.1618, "step": 87610 }, { "epoch": 3.184097681517552, "grad_norm": 1.2502785921096802, "learning_rate": 2.5959010165516185e-05, "loss": 0.081, "step": 87620 }, { "epoch": 3.18446108002035, "grad_norm": 0.45034274458885193, "learning_rate": 2.5953824645908848e-05, "loss": 0.0821, "step": 87630 }, { "epoch": 3.1848244785231485, "grad_norm": 0.5315303206443787, "learning_rate": 2.5948639085204607e-05, "loss": 0.1093, "step": 87640 }, { "epoch": 3.1851878770259465, "grad_norm": 1.181911826133728, "learning_rate": 2.5943453483626873e-05, "loss": 0.0766, "step": 87650 }, { "epoch": 3.185551275528745, "grad_norm": 0.4337679147720337, "learning_rate": 2.5938267841399096e-05, "loss": 0.0886, "step": 87660 }, { "epoch": 3.185914674031543, "grad_norm": 4.992573261260986, "learning_rate": 2.5933082158744686e-05, "loss": 0.0955, "step": 87670 }, { "epoch": 3.1862780725343414, "grad_norm": 0.401584267616272, "learning_rate": 2.5927896435887094e-05, "loss": 0.0802, "step": 87680 }, { "epoch": 3.1866414710371394, "grad_norm": 0.39341244101524353, "learning_rate": 2.592271067304975e-05, "loss": 0.092, "step": 87690 }, { "epoch": 3.1870048695399373, "grad_norm": 0.8146529197692871, "learning_rate": 2.5917524870456074e-05, "loss": 0.0875, "step": 87700 }, { "epoch": 3.1873682680427358, "grad_norm": 0.4977721869945526, "learning_rate": 2.5912339028329524e-05, "loss": 0.0709, "step": 87710 }, { "epoch": 3.1877316665455337, "grad_norm": 0.9400126338005066, "learning_rate": 2.5907153146893527e-05, "loss": 0.0756, "step": 87720 }, { "epoch": 3.188095065048332, "grad_norm": 1.8934528827667236, "learning_rate": 2.590196722637153e-05, "loss": 0.1116, "step": 87730 }, { "epoch": 3.18845846355113, "grad_norm": 0.3290179669857025, "learning_rate": 2.5896781266986974e-05, "loss": 0.0794, "step": 87740 }, { "epoch": 3.188821862053928, "grad_norm": 0.42296847701072693, "learning_rate": 2.589159526896331e-05, "loss": 0.0917, "step": 87750 }, { "epoch": 3.1891852605567266, "grad_norm": 1.1522185802459717, "learning_rate": 2.5886409232523966e-05, "loss": 0.0855, "step": 87760 }, { "epoch": 3.1895486590595246, "grad_norm": 0.9515447616577148, "learning_rate": 2.5881223157892415e-05, "loss": 0.0809, "step": 87770 }, { "epoch": 3.189912057562323, "grad_norm": 0.8773075342178345, "learning_rate": 2.587603704529209e-05, "loss": 0.0754, "step": 87780 }, { "epoch": 3.190275456065121, "grad_norm": 0.6473771333694458, "learning_rate": 2.587085089494644e-05, "loss": 0.0887, "step": 87790 }, { "epoch": 3.1906388545679194, "grad_norm": 1.6101555824279785, "learning_rate": 2.586566470707893e-05, "loss": 0.0901, "step": 87800 }, { "epoch": 3.1910022530707174, "grad_norm": 0.25950807332992554, "learning_rate": 2.5860478481912996e-05, "loss": 0.1501, "step": 87810 }, { "epoch": 3.1913656515735154, "grad_norm": 1.339440941810608, "learning_rate": 2.585529221967212e-05, "loss": 0.0645, "step": 87820 }, { "epoch": 3.191729050076314, "grad_norm": 0.8550190329551697, "learning_rate": 2.5850105920579736e-05, "loss": 0.0686, "step": 87830 }, { "epoch": 3.192092448579112, "grad_norm": 0.5492476224899292, "learning_rate": 2.584491958485932e-05, "loss": 0.0913, "step": 87840 }, { "epoch": 3.19245584708191, "grad_norm": 0.8055387735366821, "learning_rate": 2.5839733212734323e-05, "loss": 0.0886, "step": 87850 }, { "epoch": 3.192819245584708, "grad_norm": 0.5844281315803528, "learning_rate": 2.5834546804428207e-05, "loss": 0.0589, "step": 87860 }, { "epoch": 3.193182644087506, "grad_norm": 0.47842904925346375, "learning_rate": 2.582936036016444e-05, "loss": 0.0664, "step": 87870 }, { "epoch": 3.1935460425903046, "grad_norm": 0.5088633894920349, "learning_rate": 2.5824173880166486e-05, "loss": 0.0858, "step": 87880 }, { "epoch": 3.1939094410931026, "grad_norm": 0.4572700262069702, "learning_rate": 2.581898736465781e-05, "loss": 0.0813, "step": 87890 }, { "epoch": 3.194272839595901, "grad_norm": 1.1615513563156128, "learning_rate": 2.5813800813861884e-05, "loss": 0.1006, "step": 87900 }, { "epoch": 3.194636238098699, "grad_norm": 0.410319060087204, "learning_rate": 2.5808614228002174e-05, "loss": 0.0918, "step": 87910 }, { "epoch": 3.194999636601497, "grad_norm": 0.28735676407814026, "learning_rate": 2.5803427607302154e-05, "loss": 0.0691, "step": 87920 }, { "epoch": 3.1953630351042954, "grad_norm": 0.9969823956489563, "learning_rate": 2.5798240951985302e-05, "loss": 0.0749, "step": 87930 }, { "epoch": 3.1957264336070934, "grad_norm": 1.1910429000854492, "learning_rate": 2.579305426227509e-05, "loss": 0.1894, "step": 87940 }, { "epoch": 3.196089832109892, "grad_norm": 0.9283407926559448, "learning_rate": 2.5787867538394982e-05, "loss": 0.0854, "step": 87950 }, { "epoch": 3.19645323061269, "grad_norm": 0.5121541619300842, "learning_rate": 2.5782680780568476e-05, "loss": 0.4152, "step": 87960 }, { "epoch": 3.1968166291154883, "grad_norm": 0.6104360222816467, "learning_rate": 2.577749398901903e-05, "loss": 0.0793, "step": 87970 }, { "epoch": 3.1971800276182862, "grad_norm": 0.3884080648422241, "learning_rate": 2.577230716397014e-05, "loss": 0.0808, "step": 87980 }, { "epoch": 3.197543426121084, "grad_norm": 1.0838251113891602, "learning_rate": 2.5767120305645277e-05, "loss": 0.0883, "step": 87990 }, { "epoch": 3.1979068246238826, "grad_norm": 0.7806040048599243, "learning_rate": 2.5761933414267936e-05, "loss": 0.0721, "step": 88000 }, { "epoch": 3.1982702231266806, "grad_norm": 0.4943070709705353, "learning_rate": 2.575674649006159e-05, "loss": 0.0751, "step": 88010 }, { "epoch": 3.198633621629479, "grad_norm": 1.1027365922927856, "learning_rate": 2.575155953324973e-05, "loss": 0.0714, "step": 88020 }, { "epoch": 3.198997020132277, "grad_norm": 0.5191725492477417, "learning_rate": 2.5746372544055847e-05, "loss": 0.0699, "step": 88030 }, { "epoch": 3.199360418635075, "grad_norm": 0.9866086840629578, "learning_rate": 2.5741185522703424e-05, "loss": 0.1314, "step": 88040 }, { "epoch": 3.1997238171378735, "grad_norm": 2.110804319381714, "learning_rate": 2.5735998469415956e-05, "loss": 0.0873, "step": 88050 }, { "epoch": 3.2000872156406714, "grad_norm": 0.9099189639091492, "learning_rate": 2.573081138441693e-05, "loss": 0.1216, "step": 88060 }, { "epoch": 3.20045061414347, "grad_norm": 0.6158716082572937, "learning_rate": 2.572562426792985e-05, "loss": 0.0683, "step": 88070 }, { "epoch": 3.200814012646268, "grad_norm": 0.6630299091339111, "learning_rate": 2.5720437120178186e-05, "loss": 0.089, "step": 88080 }, { "epoch": 3.2011774111490663, "grad_norm": 0.4174249768257141, "learning_rate": 2.5715249941385467e-05, "loss": 0.0949, "step": 88090 }, { "epoch": 3.2015408096518643, "grad_norm": 0.7170994877815247, "learning_rate": 2.5710062731775164e-05, "loss": 0.0864, "step": 88100 }, { "epoch": 3.2019042081546623, "grad_norm": 0.5610363483428955, "learning_rate": 2.5704875491570784e-05, "loss": 0.0878, "step": 88110 }, { "epoch": 3.2022676066574607, "grad_norm": 0.41790106892585754, "learning_rate": 2.5699688220995834e-05, "loss": 3.9644, "step": 88120 }, { "epoch": 3.2026310051602587, "grad_norm": 0.8751020431518555, "learning_rate": 2.5694500920273795e-05, "loss": 0.0729, "step": 88130 }, { "epoch": 3.202994403663057, "grad_norm": 0.5966963768005371, "learning_rate": 2.5689313589628193e-05, "loss": 0.187, "step": 88140 }, { "epoch": 3.203357802165855, "grad_norm": 0.8189564943313599, "learning_rate": 2.5684126229282516e-05, "loss": 0.0749, "step": 88150 }, { "epoch": 3.203721200668653, "grad_norm": 1.0785588026046753, "learning_rate": 2.5678938839460283e-05, "loss": 0.0835, "step": 88160 }, { "epoch": 3.2040845991714515, "grad_norm": 1.5373785495758057, "learning_rate": 2.5673751420384983e-05, "loss": 0.0789, "step": 88170 }, { "epoch": 3.2044479976742495, "grad_norm": 0.6006679534912109, "learning_rate": 2.566856397228013e-05, "loss": 0.0694, "step": 88180 }, { "epoch": 3.204811396177048, "grad_norm": 0.9026358127593994, "learning_rate": 2.5663376495369242e-05, "loss": 0.0847, "step": 88190 }, { "epoch": 3.205174794679846, "grad_norm": 1.896236777305603, "learning_rate": 2.565818898987581e-05, "loss": 0.0814, "step": 88200 }, { "epoch": 3.205174794679846, "eval_loss": 0.3309068977832794, "eval_runtime": 180.8021, "eval_samples_per_second": 41.006, "eval_steps_per_second": 5.127, "eval_wer": 0.142601702761087, "step": 88200 }, { "epoch": 3.205538193182644, "grad_norm": 0.5485680103302002, "learning_rate": 2.565300145602336e-05, "loss": 0.0943, "step": 88210 }, { "epoch": 3.2059015916854423, "grad_norm": 0.5330252647399902, "learning_rate": 2.564833265149393e-05, "loss": 0.8654, "step": 88220 }, { "epoch": 3.2062649901882403, "grad_norm": 1.7009663581848145, "learning_rate": 2.564314506437512e-05, "loss": 0.0911, "step": 88230 }, { "epoch": 3.2066283886910387, "grad_norm": 0.3746320903301239, "learning_rate": 2.563795744954548e-05, "loss": 0.0656, "step": 88240 }, { "epoch": 3.2069917871938367, "grad_norm": 1.1512928009033203, "learning_rate": 2.5632769807228512e-05, "loss": 0.0897, "step": 88250 }, { "epoch": 3.207355185696635, "grad_norm": 3.660646915435791, "learning_rate": 2.562758213764776e-05, "loss": 0.0859, "step": 88260 }, { "epoch": 3.207718584199433, "grad_norm": 0.3683335781097412, "learning_rate": 2.5622394441026716e-05, "loss": 0.1157, "step": 88270 }, { "epoch": 3.208081982702231, "grad_norm": 0.9600638151168823, "learning_rate": 2.561720671758891e-05, "loss": 0.077, "step": 88280 }, { "epoch": 3.2084453812050295, "grad_norm": 1.4279944896697998, "learning_rate": 2.5612018967557866e-05, "loss": 0.0889, "step": 88290 }, { "epoch": 3.2088087797078275, "grad_norm": 0.37430229783058167, "learning_rate": 2.5606831191157103e-05, "loss": 0.0924, "step": 88300 }, { "epoch": 3.209172178210626, "grad_norm": 0.4506620764732361, "learning_rate": 2.5601643388610137e-05, "loss": 0.095, "step": 88310 }, { "epoch": 3.209535576713424, "grad_norm": 0.5026054382324219, "learning_rate": 2.5596455560140504e-05, "loss": 0.0703, "step": 88320 }, { "epoch": 3.209898975216222, "grad_norm": 0.2740514278411865, "learning_rate": 2.559126770597173e-05, "loss": 0.081, "step": 88330 }, { "epoch": 3.2102623737190203, "grad_norm": 0.4658343493938446, "learning_rate": 2.558607982632732e-05, "loss": 0.1092, "step": 88340 }, { "epoch": 3.2106257722218183, "grad_norm": 0.7769015431404114, "learning_rate": 2.5580891921430822e-05, "loss": 0.0985, "step": 88350 }, { "epoch": 3.2109891707246168, "grad_norm": 0.4521372616291046, "learning_rate": 2.5575703991505758e-05, "loss": 0.0765, "step": 88360 }, { "epoch": 3.2113525692274147, "grad_norm": 0.41286054253578186, "learning_rate": 2.5570516036775656e-05, "loss": 0.0758, "step": 88370 }, { "epoch": 3.211715967730213, "grad_norm": 0.5163076519966125, "learning_rate": 2.5565328057464044e-05, "loss": 0.0822, "step": 88380 }, { "epoch": 3.212079366233011, "grad_norm": 0.8043489456176758, "learning_rate": 2.556014005379447e-05, "loss": 0.0742, "step": 88390 }, { "epoch": 3.212442764735809, "grad_norm": 5.606634616851807, "learning_rate": 2.555495202599044e-05, "loss": 0.0879, "step": 88400 }, { "epoch": 3.2128061632386076, "grad_norm": 0.7967556118965149, "learning_rate": 2.554976397427551e-05, "loss": 0.0851, "step": 88410 }, { "epoch": 3.2131695617414056, "grad_norm": 0.8802252411842346, "learning_rate": 2.5544575898873208e-05, "loss": 0.067, "step": 88420 }, { "epoch": 3.213532960244204, "grad_norm": 1.1108994483947754, "learning_rate": 2.553938780000706e-05, "loss": 0.092, "step": 88430 }, { "epoch": 3.213896358747002, "grad_norm": 0.37617167830467224, "learning_rate": 2.5534199677900618e-05, "loss": 0.1041, "step": 88440 }, { "epoch": 3.2142597572498, "grad_norm": 0.6323724389076233, "learning_rate": 2.5529011532777407e-05, "loss": 0.0805, "step": 88450 }, { "epoch": 3.2146231557525984, "grad_norm": 1.353440284729004, "learning_rate": 2.5523823364860978e-05, "loss": 0.0927, "step": 88460 }, { "epoch": 3.2149865542553964, "grad_norm": 0.5883220434188843, "learning_rate": 2.551863517437486e-05, "loss": 0.0714, "step": 88470 }, { "epoch": 3.215349952758195, "grad_norm": 0.545572817325592, "learning_rate": 2.551344696154259e-05, "loss": 0.063, "step": 88480 }, { "epoch": 3.215713351260993, "grad_norm": 1.073169469833374, "learning_rate": 2.550825872658773e-05, "loss": 0.1216, "step": 88490 }, { "epoch": 3.2160767497637908, "grad_norm": 0.45323723554611206, "learning_rate": 2.5503070469733804e-05, "loss": 0.0881, "step": 88500 }, { "epoch": 3.216440148266589, "grad_norm": 0.47871801257133484, "learning_rate": 2.5497882191204365e-05, "loss": 0.1577, "step": 88510 }, { "epoch": 3.216803546769387, "grad_norm": 0.21129682660102844, "learning_rate": 2.5492693891222952e-05, "loss": 0.0599, "step": 88520 }, { "epoch": 3.2171669452721856, "grad_norm": 1.4315932989120483, "learning_rate": 2.548750557001311e-05, "loss": 0.1047, "step": 88530 }, { "epoch": 3.2175303437749836, "grad_norm": 0.4861602187156677, "learning_rate": 2.5482317227798393e-05, "loss": 0.0807, "step": 88540 }, { "epoch": 3.217893742277782, "grad_norm": 0.9959172010421753, "learning_rate": 2.547712886480233e-05, "loss": 0.0929, "step": 88550 }, { "epoch": 3.21825714078058, "grad_norm": 1.7969993352890015, "learning_rate": 2.5471940481248496e-05, "loss": 0.0884, "step": 88560 }, { "epoch": 3.218620539283378, "grad_norm": 0.49897995591163635, "learning_rate": 2.5466752077360417e-05, "loss": 0.069, "step": 88570 }, { "epoch": 3.2189839377861764, "grad_norm": 1.5426241159439087, "learning_rate": 2.5461563653361658e-05, "loss": 0.0803, "step": 88580 }, { "epoch": 3.2193473362889744, "grad_norm": 0.3128204345703125, "learning_rate": 2.5456375209475765e-05, "loss": 0.08, "step": 88590 }, { "epoch": 3.219710734791773, "grad_norm": 1.4503716230392456, "learning_rate": 2.5451186745926276e-05, "loss": 0.0908, "step": 88600 }, { "epoch": 3.220074133294571, "grad_norm": 1.7355505228042603, "learning_rate": 2.5445998262936765e-05, "loss": 0.0784, "step": 88610 }, { "epoch": 3.220437531797369, "grad_norm": 0.8922900557518005, "learning_rate": 2.5440809760730773e-05, "loss": 0.1035, "step": 88620 }, { "epoch": 3.2208009303001672, "grad_norm": 0.41246405243873596, "learning_rate": 2.543562123953186e-05, "loss": 0.0966, "step": 88630 }, { "epoch": 3.221164328802965, "grad_norm": 0.5685162544250488, "learning_rate": 2.5430432699563577e-05, "loss": 0.0869, "step": 88640 }, { "epoch": 3.2215277273057636, "grad_norm": 1.1315709352493286, "learning_rate": 2.5425244141049477e-05, "loss": 0.096, "step": 88650 }, { "epoch": 3.2218911258085616, "grad_norm": 0.5966361165046692, "learning_rate": 2.542005556421312e-05, "loss": 0.0861, "step": 88660 }, { "epoch": 3.22225452431136, "grad_norm": 0.6581082344055176, "learning_rate": 2.541486696927806e-05, "loss": 0.0604, "step": 88670 }, { "epoch": 3.222617922814158, "grad_norm": 0.4752594530582428, "learning_rate": 2.540967835646787e-05, "loss": 0.2735, "step": 88680 }, { "epoch": 3.222981321316956, "grad_norm": 1.1282252073287964, "learning_rate": 2.5404489726006085e-05, "loss": 0.0928, "step": 88690 }, { "epoch": 3.2233447198197545, "grad_norm": 0.7483789324760437, "learning_rate": 2.5399301078116287e-05, "loss": 0.1104, "step": 88700 }, { "epoch": 3.2237081183225524, "grad_norm": 0.338220477104187, "learning_rate": 2.5394112413022024e-05, "loss": 0.0564, "step": 88710 }, { "epoch": 3.224071516825351, "grad_norm": 0.5026964545249939, "learning_rate": 2.538892373094685e-05, "loss": 0.0771, "step": 88720 }, { "epoch": 3.224434915328149, "grad_norm": 0.5595789551734924, "learning_rate": 2.5383735032114348e-05, "loss": 0.0752, "step": 88730 }, { "epoch": 3.224798313830947, "grad_norm": 0.37049752473831177, "learning_rate": 2.5378546316748066e-05, "loss": 0.0846, "step": 88740 }, { "epoch": 3.2251617123337453, "grad_norm": 0.7598865628242493, "learning_rate": 2.5373357585071568e-05, "loss": 0.0657, "step": 88750 }, { "epoch": 3.2255251108365433, "grad_norm": 2.472294569015503, "learning_rate": 2.536816883730842e-05, "loss": 0.1018, "step": 88760 }, { "epoch": 3.2258885093393417, "grad_norm": 0.44734901189804077, "learning_rate": 2.5362980073682186e-05, "loss": 0.08, "step": 88770 }, { "epoch": 3.2262519078421397, "grad_norm": 0.9893713593482971, "learning_rate": 2.5357791294416433e-05, "loss": 0.097, "step": 88780 }, { "epoch": 3.2266153063449377, "grad_norm": 0.5891297459602356, "learning_rate": 2.5352602499734725e-05, "loss": 0.0738, "step": 88790 }, { "epoch": 3.226978704847736, "grad_norm": 1.1289516687393188, "learning_rate": 2.5347413689860634e-05, "loss": 0.0851, "step": 88800 }, { "epoch": 3.226978704847736, "eval_loss": 0.32641828060150146, "eval_runtime": 181.2974, "eval_samples_per_second": 40.894, "eval_steps_per_second": 5.113, "eval_wer": 0.13833572349193093, "step": 88800 }, { "epoch": 3.227342103350534, "grad_norm": 1.0237905979156494, "learning_rate": 2.5342224865017715e-05, "loss": 0.0795, "step": 88810 }, { "epoch": 3.2277055018533325, "grad_norm": 1.2821190357208252, "learning_rate": 2.5337036025429555e-05, "loss": 0.0745, "step": 88820 }, { "epoch": 3.2280689003561305, "grad_norm": 0.4639264941215515, "learning_rate": 2.5331847171319707e-05, "loss": 0.0825, "step": 88830 }, { "epoch": 3.228432298858929, "grad_norm": 0.4527970254421234, "learning_rate": 2.5327177190389585e-05, "loss": 1.9595, "step": 88840 }, { "epoch": 3.228795697361727, "grad_norm": 4.547738552093506, "learning_rate": 2.532198830930447e-05, "loss": 0.0768, "step": 88850 }, { "epoch": 3.229159095864525, "grad_norm": 1.0073570013046265, "learning_rate": 2.5316799414346026e-05, "loss": 0.0892, "step": 88860 }, { "epoch": 3.2295224943673233, "grad_norm": 0.5932965874671936, "learning_rate": 2.5311610505737827e-05, "loss": 0.064, "step": 88870 }, { "epoch": 3.2298858928701213, "grad_norm": 0.6178048253059387, "learning_rate": 2.530642158370343e-05, "loss": 0.0592, "step": 88880 }, { "epoch": 3.2302492913729197, "grad_norm": 0.31980982422828674, "learning_rate": 2.5301232648466427e-05, "loss": 0.1194, "step": 88890 }, { "epoch": 3.2306126898757177, "grad_norm": 0.910460889339447, "learning_rate": 2.529604370025037e-05, "loss": 0.084, "step": 88900 }, { "epoch": 3.2309760883785157, "grad_norm": 0.4696938991546631, "learning_rate": 2.5290854739278857e-05, "loss": 0.1013, "step": 88910 }, { "epoch": 3.231339486881314, "grad_norm": 0.5140429139137268, "learning_rate": 2.5286184663683343e-05, "loss": 1.1616, "step": 88920 }, { "epoch": 3.231702885384112, "grad_norm": 1.7606545686721802, "learning_rate": 2.5280995679092382e-05, "loss": 0.0786, "step": 88930 }, { "epoch": 3.2320662838869105, "grad_norm": 1.0249334573745728, "learning_rate": 2.527580668239431e-05, "loss": 0.0797, "step": 88940 }, { "epoch": 3.2324296823897085, "grad_norm": 0.7938006520271301, "learning_rate": 2.5270617673812712e-05, "loss": 0.0956, "step": 88950 }, { "epoch": 3.232793080892507, "grad_norm": 0.998557984828949, "learning_rate": 2.526542865357116e-05, "loss": 0.0837, "step": 88960 }, { "epoch": 3.233156479395305, "grad_norm": 3.8933629989624023, "learning_rate": 2.5260239621893232e-05, "loss": 0.0633, "step": 88970 }, { "epoch": 3.233519877898103, "grad_norm": 0.6665277481079102, "learning_rate": 2.525556948378978e-05, "loss": 2.7581, "step": 88980 }, { "epoch": 3.2338832764009013, "grad_norm": 3.3742940425872803, "learning_rate": 2.52503804309987e-05, "loss": 0.0782, "step": 88990 }, { "epoch": 3.2342466749036993, "grad_norm": 0.37812352180480957, "learning_rate": 2.524519136741961e-05, "loss": 0.0937, "step": 89000 }, { "epoch": 3.2346100734064978, "grad_norm": 0.42430004477500916, "learning_rate": 2.5240002293276092e-05, "loss": 0.067, "step": 89010 }, { "epoch": 3.2349734719092957, "grad_norm": 0.669116735458374, "learning_rate": 2.523481320879174e-05, "loss": 0.0901, "step": 89020 }, { "epoch": 3.2353368704120937, "grad_norm": 0.8977360725402832, "learning_rate": 2.5229624114190113e-05, "loss": 0.0802, "step": 89030 }, { "epoch": 3.235700268914892, "grad_norm": 0.6342353224754333, "learning_rate": 2.5224435009694808e-05, "loss": 0.316, "step": 89040 }, { "epoch": 3.23606366741769, "grad_norm": 1.2511061429977417, "learning_rate": 2.5219245895529385e-05, "loss": 0.0862, "step": 89050 }, { "epoch": 3.2364270659204886, "grad_norm": 0.7103098034858704, "learning_rate": 2.5214056771917448e-05, "loss": 0.0743, "step": 89060 }, { "epoch": 3.2367904644232866, "grad_norm": 1.1459332704544067, "learning_rate": 2.5208867639082562e-05, "loss": 0.0808, "step": 89070 }, { "epoch": 3.2371538629260845, "grad_norm": 0.4795287251472473, "learning_rate": 2.5203678497248314e-05, "loss": 0.0596, "step": 89080 }, { "epoch": 3.237517261428883, "grad_norm": 1.7111159563064575, "learning_rate": 2.5198489346638282e-05, "loss": 0.0803, "step": 89090 }, { "epoch": 3.237880659931681, "grad_norm": 0.9549298286437988, "learning_rate": 2.519330018747605e-05, "loss": 0.0808, "step": 89100 }, { "epoch": 3.2382440584344794, "grad_norm": 1.708971381187439, "learning_rate": 2.5188111019985204e-05, "loss": 0.072, "step": 89110 }, { "epoch": 3.2386074569372774, "grad_norm": 1.0324857234954834, "learning_rate": 2.5182921844389317e-05, "loss": 0.0593, "step": 89120 }, { "epoch": 3.238970855440076, "grad_norm": 0.7414169311523438, "learning_rate": 2.517773266091199e-05, "loss": 0.067, "step": 89130 }, { "epoch": 3.2393342539428738, "grad_norm": 0.739587128162384, "learning_rate": 2.5172543469776783e-05, "loss": 0.0832, "step": 89140 }, { "epoch": 3.2396976524456718, "grad_norm": 0.6406792402267456, "learning_rate": 2.5167354271207298e-05, "loss": 0.0713, "step": 89150 }, { "epoch": 3.24006105094847, "grad_norm": 0.40510469675064087, "learning_rate": 2.5162165065427114e-05, "loss": 0.0698, "step": 89160 }, { "epoch": 3.240424449451268, "grad_norm": 0.508594810962677, "learning_rate": 2.515697585265981e-05, "loss": 0.0691, "step": 89170 }, { "epoch": 3.2407878479540666, "grad_norm": 0.5306017994880676, "learning_rate": 2.5151786633128983e-05, "loss": 0.0975, "step": 89180 }, { "epoch": 3.2411512464568646, "grad_norm": 1.0172330141067505, "learning_rate": 2.51465974070582e-05, "loss": 0.0897, "step": 89190 }, { "epoch": 3.2415146449596626, "grad_norm": 1.291803240776062, "learning_rate": 2.514140817467106e-05, "loss": 0.0812, "step": 89200 }, { "epoch": 3.241878043462461, "grad_norm": 0.5256122946739197, "learning_rate": 2.513621893619115e-05, "loss": 0.1297, "step": 89210 }, { "epoch": 3.242241441965259, "grad_norm": 1.4141759872436523, "learning_rate": 2.513102969184204e-05, "loss": 0.0578, "step": 89220 }, { "epoch": 3.2426048404680574, "grad_norm": 0.35552000999450684, "learning_rate": 2.5125840441847336e-05, "loss": 0.0774, "step": 89230 }, { "epoch": 3.2429682389708554, "grad_norm": 0.5986491441726685, "learning_rate": 2.512065118643061e-05, "loss": 0.096, "step": 89240 }, { "epoch": 3.243331637473654, "grad_norm": 0.6903038024902344, "learning_rate": 2.5115461925815458e-05, "loss": 0.0827, "step": 89250 }, { "epoch": 3.243695035976452, "grad_norm": 0.6188151240348816, "learning_rate": 2.5110272660225454e-05, "loss": 0.0887, "step": 89260 }, { "epoch": 3.24405843447925, "grad_norm": 0.27323612570762634, "learning_rate": 2.5105083389884205e-05, "loss": 1.8973, "step": 89270 }, { "epoch": 3.2444218329820482, "grad_norm": 0.4321115016937256, "learning_rate": 2.5099894115015284e-05, "loss": 0.091, "step": 89280 }, { "epoch": 3.244785231484846, "grad_norm": 0.5513198375701904, "learning_rate": 2.5094704835842276e-05, "loss": 0.0713, "step": 89290 }, { "epoch": 3.2451486299876446, "grad_norm": 0.5108364224433899, "learning_rate": 2.5089515552588776e-05, "loss": 0.0746, "step": 89300 }, { "epoch": 3.2455120284904426, "grad_norm": 0.5740257501602173, "learning_rate": 2.5084326265478365e-05, "loss": 0.0713, "step": 89310 }, { "epoch": 3.2458754269932406, "grad_norm": 0.46265411376953125, "learning_rate": 2.507913697473464e-05, "loss": 0.0768, "step": 89320 }, { "epoch": 3.246238825496039, "grad_norm": 0.573864758014679, "learning_rate": 2.5073947680581184e-05, "loss": 0.0928, "step": 89330 }, { "epoch": 3.246602223998837, "grad_norm": 0.34501180052757263, "learning_rate": 2.5068758383241586e-05, "loss": 0.0843, "step": 89340 }, { "epoch": 3.2469656225016355, "grad_norm": 1.3891141414642334, "learning_rate": 2.5063569082939437e-05, "loss": 0.0825, "step": 89350 }, { "epoch": 3.2473290210044334, "grad_norm": 0.40869995951652527, "learning_rate": 2.505837977989831e-05, "loss": 2.1322, "step": 89360 }, { "epoch": 3.2476924195072314, "grad_norm": 0.3573366403579712, "learning_rate": 2.505319047434182e-05, "loss": 0.0659, "step": 89370 }, { "epoch": 3.24805581801003, "grad_norm": 1.208050012588501, "learning_rate": 2.5048001166493536e-05, "loss": 0.0684, "step": 89380 }, { "epoch": 3.248419216512828, "grad_norm": 2.167421340942383, "learning_rate": 2.5042811856577054e-05, "loss": 0.5461, "step": 89390 }, { "epoch": 3.2487826150156263, "grad_norm": 0.7320494055747986, "learning_rate": 2.5037622544815958e-05, "loss": 0.0968, "step": 89400 }, { "epoch": 3.2487826150156263, "eval_loss": 0.28867700695991516, "eval_runtime": 180.0941, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.1410223827763356, "step": 89400 }, { "epoch": 3.2491460135184242, "grad_norm": 0.4674376845359802, "learning_rate": 2.5032433231433845e-05, "loss": 0.1201, "step": 89410 }, { "epoch": 3.2495094120212227, "grad_norm": 0.5038551688194275, "learning_rate": 2.5027243916654296e-05, "loss": 0.0698, "step": 89420 }, { "epoch": 3.2498728105240207, "grad_norm": 0.5582560896873474, "learning_rate": 2.5022054600700902e-05, "loss": 0.0666, "step": 89430 }, { "epoch": 3.2502362090268186, "grad_norm": 3.2635886669158936, "learning_rate": 2.501686528379726e-05, "loss": 0.0858, "step": 89440 }, { "epoch": 3.250599607529617, "grad_norm": 6.544740676879883, "learning_rate": 2.5011675966166947e-05, "loss": 0.0792, "step": 89450 }, { "epoch": 3.250963006032415, "grad_norm": 0.33596140146255493, "learning_rate": 2.500648664803356e-05, "loss": 0.0678, "step": 89460 }, { "epoch": 3.2513264045352135, "grad_norm": 0.7997293472290039, "learning_rate": 2.500129732962069e-05, "loss": 0.0636, "step": 89470 }, { "epoch": 3.2516898030380115, "grad_norm": 0.2458629459142685, "learning_rate": 2.4996108011151922e-05, "loss": 0.0711, "step": 89480 }, { "epoch": 3.25205320154081, "grad_norm": 0.5491876006126404, "learning_rate": 2.4990918692850834e-05, "loss": 0.0982, "step": 89490 }, { "epoch": 3.252416600043608, "grad_norm": 1.1274678707122803, "learning_rate": 2.4985729374941038e-05, "loss": 0.1037, "step": 89500 }, { "epoch": 3.252779998546406, "grad_norm": 0.576329231262207, "learning_rate": 2.4980540057646117e-05, "loss": 0.0813, "step": 89510 }, { "epoch": 3.2531433970492043, "grad_norm": 2.285304069519043, "learning_rate": 2.497535074118965e-05, "loss": 0.0752, "step": 89520 }, { "epoch": 3.2535067955520023, "grad_norm": 1.1110707521438599, "learning_rate": 2.4970161425795233e-05, "loss": 0.0663, "step": 89530 }, { "epoch": 3.2538701940548007, "grad_norm": 0.6601670980453491, "learning_rate": 2.4964972111686443e-05, "loss": 0.0782, "step": 89540 }, { "epoch": 3.2542335925575987, "grad_norm": 1.520925521850586, "learning_rate": 2.4959782799086897e-05, "loss": 0.0842, "step": 89550 }, { "epoch": 3.2545969910603967, "grad_norm": 0.5269960165023804, "learning_rate": 2.495459348822016e-05, "loss": 0.0606, "step": 89560 }, { "epoch": 3.254960389563195, "grad_norm": 2.8767483234405518, "learning_rate": 2.494940417930983e-05, "loss": 1.3044, "step": 89570 }, { "epoch": 3.255323788065993, "grad_norm": 0.3898687958717346, "learning_rate": 2.4944214872579484e-05, "loss": 0.1111, "step": 89580 }, { "epoch": 3.2556871865687915, "grad_norm": 0.7580005526542664, "learning_rate": 2.4939025568252732e-05, "loss": 0.1151, "step": 89590 }, { "epoch": 3.2560505850715895, "grad_norm": 1.3324828147888184, "learning_rate": 2.4933836266553147e-05, "loss": 0.0821, "step": 89600 }, { "epoch": 3.2564139835743875, "grad_norm": 0.6260429620742798, "learning_rate": 2.4928646967704323e-05, "loss": 0.0501, "step": 89610 }, { "epoch": 3.256777382077186, "grad_norm": 0.3619268238544464, "learning_rate": 2.492345767192985e-05, "loss": 0.0667, "step": 89620 }, { "epoch": 3.257140780579984, "grad_norm": 1.3483206033706665, "learning_rate": 2.4918268379453302e-05, "loss": 0.062, "step": 89630 }, { "epoch": 3.2575041790827823, "grad_norm": 31.592674255371094, "learning_rate": 2.491307909049829e-05, "loss": 0.2416, "step": 89640 }, { "epoch": 3.2578675775855803, "grad_norm": 0.5905751585960388, "learning_rate": 2.490788980528839e-05, "loss": 0.0816, "step": 89650 }, { "epoch": 3.2582309760883783, "grad_norm": 1.89798903465271, "learning_rate": 2.4902700524047194e-05, "loss": 0.0913, "step": 89660 }, { "epoch": 3.2585943745911767, "grad_norm": 0.883533239364624, "learning_rate": 2.489751124699828e-05, "loss": 0.0588, "step": 89670 }, { "epoch": 3.2589577730939747, "grad_norm": 0.48883989453315735, "learning_rate": 2.4892321974365235e-05, "loss": 0.0794, "step": 89680 }, { "epoch": 3.259321171596773, "grad_norm": 0.4255521595478058, "learning_rate": 2.4887132706371663e-05, "loss": 0.0727, "step": 89690 }, { "epoch": 3.259684570099571, "grad_norm": 1.9494534730911255, "learning_rate": 2.488194344324114e-05, "loss": 0.0798, "step": 89700 }, { "epoch": 3.260047968602369, "grad_norm": 0.4608819782733917, "learning_rate": 2.4876754185197255e-05, "loss": 0.1128, "step": 89710 }, { "epoch": 3.2604113671051675, "grad_norm": 0.2869090139865875, "learning_rate": 2.487156493246359e-05, "loss": 0.0729, "step": 89720 }, { "epoch": 3.2607747656079655, "grad_norm": 0.4189178943634033, "learning_rate": 2.4866375685263733e-05, "loss": 0.0711, "step": 89730 }, { "epoch": 3.261138164110764, "grad_norm": 0.8501338958740234, "learning_rate": 2.4861186443821273e-05, "loss": 0.0915, "step": 89740 }, { "epoch": 3.261501562613562, "grad_norm": 1.5623184442520142, "learning_rate": 2.48559972083598e-05, "loss": 0.0871, "step": 89750 }, { "epoch": 3.2618649611163604, "grad_norm": 0.3259228765964508, "learning_rate": 2.4850807979102895e-05, "loss": 0.0864, "step": 89760 }, { "epoch": 3.2622283596191584, "grad_norm": 1.2263481616973877, "learning_rate": 2.4845618756274147e-05, "loss": 0.0599, "step": 89770 }, { "epoch": 3.262591758121957, "grad_norm": 4.90409517288208, "learning_rate": 2.484042954009712e-05, "loss": 0.0895, "step": 89780 }, { "epoch": 3.2629551566247548, "grad_norm": 0.4526398479938507, "learning_rate": 2.483524033079543e-05, "loss": 0.0734, "step": 89790 }, { "epoch": 3.2633185551275528, "grad_norm": 0.7492665648460388, "learning_rate": 2.483005112859265e-05, "loss": 0.0904, "step": 89800 }, { "epoch": 3.263681953630351, "grad_norm": 0.5449044108390808, "learning_rate": 2.482486193371236e-05, "loss": 0.0764, "step": 89810 }, { "epoch": 3.264045352133149, "grad_norm": 0.5052310824394226, "learning_rate": 2.4819672746378137e-05, "loss": 0.0617, "step": 89820 }, { "epoch": 3.2644087506359476, "grad_norm": 0.5267361998558044, "learning_rate": 2.4814483566813582e-05, "loss": 0.077, "step": 89830 }, { "epoch": 3.2647721491387456, "grad_norm": 1.3752968311309814, "learning_rate": 2.4809294395242273e-05, "loss": 0.0719, "step": 89840 }, { "epoch": 3.2651355476415436, "grad_norm": 1.6243940591812134, "learning_rate": 2.4804105231887787e-05, "loss": 0.1557, "step": 89850 }, { "epoch": 3.265498946144342, "grad_norm": 1.946386694908142, "learning_rate": 2.479891607697371e-05, "loss": 0.0984, "step": 89860 }, { "epoch": 3.26586234464714, "grad_norm": 1.5876291990280151, "learning_rate": 2.4793726930723615e-05, "loss": 0.0768, "step": 89870 }, { "epoch": 3.2662257431499384, "grad_norm": 0.359464555978775, "learning_rate": 2.4788537793361104e-05, "loss": 0.0734, "step": 89880 }, { "epoch": 3.2665891416527364, "grad_norm": 1.2184336185455322, "learning_rate": 2.4783348665109743e-05, "loss": 0.0963, "step": 89890 }, { "epoch": 3.2669525401555344, "grad_norm": 0.5689897537231445, "learning_rate": 2.477815954619312e-05, "loss": 0.0818, "step": 89900 }, { "epoch": 3.267315938658333, "grad_norm": 0.8023828268051147, "learning_rate": 2.4773489347334143e-05, "loss": 0.0947, "step": 89910 }, { "epoch": 3.267679337161131, "grad_norm": 0.41300055384635925, "learning_rate": 2.476830024676948e-05, "loss": 0.0646, "step": 89920 }, { "epoch": 3.268042735663929, "grad_norm": 1.4045326709747314, "learning_rate": 2.4763111156187922e-05, "loss": 0.0702, "step": 89930 }, { "epoch": 3.268406134166727, "grad_norm": 0.2815437614917755, "learning_rate": 2.4757922075813077e-05, "loss": 0.0844, "step": 89940 }, { "epoch": 3.268769532669525, "grad_norm": 0.8915033340454102, "learning_rate": 2.475273300586851e-05, "loss": 0.1112, "step": 89950 }, { "epoch": 3.2691329311723236, "grad_norm": 0.7407945394515991, "learning_rate": 2.474754394657779e-05, "loss": 0.0744, "step": 89960 }, { "epoch": 3.2694963296751216, "grad_norm": 0.6766940951347351, "learning_rate": 2.474235489816451e-05, "loss": 0.771, "step": 89970 }, { "epoch": 3.26985972817792, "grad_norm": 0.9781772494316101, "learning_rate": 2.4737165860852227e-05, "loss": 0.5104, "step": 89980 }, { "epoch": 3.270223126680718, "grad_norm": 0.4817918539047241, "learning_rate": 2.4731976834864544e-05, "loss": 0.105, "step": 89990 }, { "epoch": 3.270586525183516, "grad_norm": 0.5263279676437378, "learning_rate": 2.4726787820425023e-05, "loss": 0.0872, "step": 90000 }, { "epoch": 3.270586525183516, "eval_loss": 0.29889926314353943, "eval_runtime": 179.1872, "eval_samples_per_second": 41.376, "eval_steps_per_second": 5.173, "eval_wer": 0.14040517726505347, "step": 90000 }, { "epoch": 3.2709499236863144, "grad_norm": 21.08318328857422, "learning_rate": 2.4721598817757247e-05, "loss": 0.3013, "step": 90010 }, { "epoch": 3.2713133221891124, "grad_norm": 0.4224644601345062, "learning_rate": 2.4716409827084783e-05, "loss": 0.079, "step": 90020 }, { "epoch": 3.271676720691911, "grad_norm": 0.4048929214477539, "learning_rate": 2.4711220848631203e-05, "loss": 0.0684, "step": 90030 }, { "epoch": 3.272040119194709, "grad_norm": 0.522287905216217, "learning_rate": 2.47060318826201e-05, "loss": 0.1021, "step": 90040 }, { "epoch": 3.2724035176975073, "grad_norm": 1.5194038152694702, "learning_rate": 2.4700842929275033e-05, "loss": 0.0878, "step": 90050 }, { "epoch": 3.2727669162003052, "grad_norm": 0.3342646062374115, "learning_rate": 2.4695653988819584e-05, "loss": 0.0697, "step": 90060 }, { "epoch": 3.2731303147031037, "grad_norm": 0.7479965686798096, "learning_rate": 2.469046506147732e-05, "loss": 0.0573, "step": 90070 }, { "epoch": 3.2734937132059017, "grad_norm": 0.4107944071292877, "learning_rate": 2.4685276147471804e-05, "loss": 0.0553, "step": 90080 }, { "epoch": 3.2738571117086996, "grad_norm": 0.8151494860649109, "learning_rate": 2.468008724702663e-05, "loss": 2.1021, "step": 90090 }, { "epoch": 3.274220510211498, "grad_norm": 0.6547772884368896, "learning_rate": 2.4674898360365357e-05, "loss": 0.0704, "step": 90100 }, { "epoch": 3.274583908714296, "grad_norm": 0.7743812203407288, "learning_rate": 2.4669709487711562e-05, "loss": 0.0689, "step": 90110 }, { "epoch": 3.2749473072170945, "grad_norm": 0.546514630317688, "learning_rate": 2.4664520629288795e-05, "loss": 0.0693, "step": 90120 }, { "epoch": 3.2753107057198925, "grad_norm": 1.609788179397583, "learning_rate": 2.4659331785320652e-05, "loss": 0.0814, "step": 90130 }, { "epoch": 3.2756741042226905, "grad_norm": 0.8801899552345276, "learning_rate": 2.4654142956030692e-05, "loss": 0.0745, "step": 90140 }, { "epoch": 3.276037502725489, "grad_norm": 0.5829627513885498, "learning_rate": 2.464895414164248e-05, "loss": 0.0597, "step": 90150 }, { "epoch": 3.276400901228287, "grad_norm": 0.7144574522972107, "learning_rate": 2.4643765342379584e-05, "loss": 0.1288, "step": 90160 }, { "epoch": 3.2767642997310853, "grad_norm": 0.4121115505695343, "learning_rate": 2.4638576558465562e-05, "loss": 0.0911, "step": 90170 }, { "epoch": 3.2771276982338833, "grad_norm": 0.4146971106529236, "learning_rate": 2.4633387790124003e-05, "loss": 0.066, "step": 90180 }, { "epoch": 3.2774910967366813, "grad_norm": 0.6736780405044556, "learning_rate": 2.4628199037578457e-05, "loss": 0.0708, "step": 90190 }, { "epoch": 3.2778544952394797, "grad_norm": 2.1393494606018066, "learning_rate": 2.462301030105249e-05, "loss": 0.0841, "step": 90200 }, { "epoch": 3.2782178937422777, "grad_norm": 1.2542799711227417, "learning_rate": 2.461782158076967e-05, "loss": 0.0797, "step": 90210 }, { "epoch": 3.278581292245076, "grad_norm": 1.3703997135162354, "learning_rate": 2.461263287695355e-05, "loss": 0.0578, "step": 90220 }, { "epoch": 3.278944690747874, "grad_norm": 0.32917872071266174, "learning_rate": 2.4607444189827708e-05, "loss": 0.0609, "step": 90230 }, { "epoch": 3.279308089250672, "grad_norm": 0.6270015835762024, "learning_rate": 2.4602255519615702e-05, "loss": 0.0925, "step": 90240 }, { "epoch": 3.2796714877534705, "grad_norm": 0.564975917339325, "learning_rate": 2.4597066866541087e-05, "loss": 0.0892, "step": 90250 }, { "epoch": 3.2800348862562685, "grad_norm": 0.6385096311569214, "learning_rate": 2.4591878230827433e-05, "loss": 0.0876, "step": 90260 }, { "epoch": 3.280398284759067, "grad_norm": 1.597822666168213, "learning_rate": 2.4586689612698277e-05, "loss": 0.1932, "step": 90270 }, { "epoch": 3.280761683261865, "grad_norm": 0.7112919688224792, "learning_rate": 2.458150101237721e-05, "loss": 0.0928, "step": 90280 }, { "epoch": 3.281125081764663, "grad_norm": 0.4111541509628296, "learning_rate": 2.4576312430087772e-05, "loss": 0.068, "step": 90290 }, { "epoch": 3.2814884802674613, "grad_norm": 0.9870650172233582, "learning_rate": 2.4571123866053525e-05, "loss": 0.08, "step": 90300 }, { "epoch": 3.2818518787702593, "grad_norm": 2.479681968688965, "learning_rate": 2.4565935320498016e-05, "loss": 4.4447, "step": 90310 }, { "epoch": 3.2822152772730577, "grad_norm": 0.686183512210846, "learning_rate": 2.456074679364482e-05, "loss": 0.0719, "step": 90320 }, { "epoch": 3.2825786757758557, "grad_norm": 0.5197970271110535, "learning_rate": 2.455555828571748e-05, "loss": 0.0548, "step": 90330 }, { "epoch": 3.282942074278654, "grad_norm": 1.1354804039001465, "learning_rate": 2.4550369796939547e-05, "loss": 0.0908, "step": 90340 }, { "epoch": 3.283305472781452, "grad_norm": 0.7994649410247803, "learning_rate": 2.4545181327534588e-05, "loss": 0.0862, "step": 90350 }, { "epoch": 3.2836688712842506, "grad_norm": 1.2239160537719727, "learning_rate": 2.4539992877726135e-05, "loss": 0.2734, "step": 90360 }, { "epoch": 3.2840322697870485, "grad_norm": 2.116908311843872, "learning_rate": 2.4534804447737762e-05, "loss": 0.0638, "step": 90370 }, { "epoch": 3.2843956682898465, "grad_norm": 0.5922090411186218, "learning_rate": 2.452961603779301e-05, "loss": 0.0883, "step": 90380 }, { "epoch": 3.284759066792645, "grad_norm": 0.7629362940788269, "learning_rate": 2.4524427648115427e-05, "loss": 0.07, "step": 90390 }, { "epoch": 3.285122465295443, "grad_norm": 0.5463785529136658, "learning_rate": 2.451923927892857e-05, "loss": 0.0813, "step": 90400 }, { "epoch": 3.2854858637982414, "grad_norm": 0.6801964044570923, "learning_rate": 2.4514050930455965e-05, "loss": 0.0673, "step": 90410 }, { "epoch": 3.2858492623010394, "grad_norm": 1.022977590560913, "learning_rate": 2.450886260292119e-05, "loss": 0.0685, "step": 90420 }, { "epoch": 3.2862126608038373, "grad_norm": 0.8831082582473755, "learning_rate": 2.450367429654778e-05, "loss": 0.0728, "step": 90430 }, { "epoch": 3.2865760593066358, "grad_norm": 0.6400480270385742, "learning_rate": 2.4498486011559277e-05, "loss": 0.0967, "step": 90440 }, { "epoch": 3.2869394578094338, "grad_norm": 0.8099062442779541, "learning_rate": 2.4493297748179225e-05, "loss": 0.066, "step": 90450 }, { "epoch": 3.287302856312232, "grad_norm": 0.3613360524177551, "learning_rate": 2.4488109506631163e-05, "loss": 0.1143, "step": 90460 }, { "epoch": 3.28766625481503, "grad_norm": 0.5503920316696167, "learning_rate": 2.448292128713865e-05, "loss": 0.0591, "step": 90470 }, { "epoch": 3.288029653317828, "grad_norm": 0.34772297739982605, "learning_rate": 2.447773308992522e-05, "loss": 0.0738, "step": 90480 }, { "epoch": 3.2883930518206266, "grad_norm": 1.3450576066970825, "learning_rate": 2.4472544915214414e-05, "loss": 0.0832, "step": 90490 }, { "epoch": 3.2887564503234246, "grad_norm": 0.5817814469337463, "learning_rate": 2.4467356763229756e-05, "loss": 0.0962, "step": 90500 }, { "epoch": 3.289119848826223, "grad_norm": 0.669734001159668, "learning_rate": 2.446216863419481e-05, "loss": 0.0779, "step": 90510 }, { "epoch": 3.289483247329021, "grad_norm": 1.3286347389221191, "learning_rate": 2.445698052833311e-05, "loss": 0.07, "step": 90520 }, { "epoch": 3.289846645831819, "grad_norm": 1.7495746612548828, "learning_rate": 2.4451792445868186e-05, "loss": 0.07, "step": 90530 }, { "epoch": 3.2902100443346174, "grad_norm": 0.6228033304214478, "learning_rate": 2.4446604387023572e-05, "loss": 0.1081, "step": 90540 }, { "epoch": 3.2905734428374154, "grad_norm": 0.8681657910346985, "learning_rate": 2.4441416352022793e-05, "loss": 0.0826, "step": 90550 }, { "epoch": 3.290936841340214, "grad_norm": 1.1657065153121948, "learning_rate": 2.4436228341089412e-05, "loss": 0.0801, "step": 90560 }, { "epoch": 3.291300239843012, "grad_norm": 1.196022391319275, "learning_rate": 2.4431040354446944e-05, "loss": 0.0665, "step": 90570 }, { "epoch": 3.2916636383458098, "grad_norm": 0.8635385632514954, "learning_rate": 2.442585239231892e-05, "loss": 0.0694, "step": 90580 }, { "epoch": 3.292027036848608, "grad_norm": 0.664546549320221, "learning_rate": 2.442066445492888e-05, "loss": 0.0938, "step": 90590 }, { "epoch": 3.292390435351406, "grad_norm": 0.6976110339164734, "learning_rate": 2.441547654250033e-05, "loss": 0.0818, "step": 90600 }, { "epoch": 3.292390435351406, "eval_loss": 0.33689337968826294, "eval_runtime": 179.0908, "eval_samples_per_second": 41.398, "eval_steps_per_second": 5.176, "eval_wer": 0.1381995752173834, "step": 90600 }, { "epoch": 3.2927538338542046, "grad_norm": 0.48501139879226685, "learning_rate": 2.441028865525683e-05, "loss": 0.0915, "step": 90610 }, { "epoch": 3.2931172323570026, "grad_norm": 0.7468106746673584, "learning_rate": 2.4405100793421892e-05, "loss": 0.1325, "step": 90620 }, { "epoch": 3.293480630859801, "grad_norm": 0.38419654965400696, "learning_rate": 2.4399912957219048e-05, "loss": 0.071, "step": 90630 }, { "epoch": 3.293844029362599, "grad_norm": 0.44008463621139526, "learning_rate": 2.4394725146871817e-05, "loss": 0.0885, "step": 90640 }, { "epoch": 3.2942074278653974, "grad_norm": 4.322512149810791, "learning_rate": 2.4389537362603714e-05, "loss": 0.0892, "step": 90650 }, { "epoch": 3.2945708263681954, "grad_norm": 0.5549167990684509, "learning_rate": 2.4384349604638285e-05, "loss": 0.0908, "step": 90660 }, { "epoch": 3.2949342248709934, "grad_norm": 0.5810611248016357, "learning_rate": 2.437916187319904e-05, "loss": 0.0805, "step": 90670 }, { "epoch": 3.295297623373792, "grad_norm": 0.5657238364219666, "learning_rate": 2.4373974168509507e-05, "loss": 0.0607, "step": 90680 }, { "epoch": 3.29566102187659, "grad_norm": 0.6547468304634094, "learning_rate": 2.436878649079318e-05, "loss": 0.0987, "step": 90690 }, { "epoch": 3.2960244203793883, "grad_norm": 1.354154109954834, "learning_rate": 2.4363598840273617e-05, "loss": 0.0898, "step": 90700 }, { "epoch": 3.2963878188821862, "grad_norm": 3.738417387008667, "learning_rate": 2.4358411217174308e-05, "loss": 0.0716, "step": 90710 }, { "epoch": 3.2967512173849842, "grad_norm": 1.3510026931762695, "learning_rate": 2.435322362171878e-05, "loss": 0.0855, "step": 90720 }, { "epoch": 3.2971146158877827, "grad_norm": 1.4833030700683594, "learning_rate": 2.4348036054130545e-05, "loss": 0.0859, "step": 90730 }, { "epoch": 3.2974780143905806, "grad_norm": 0.327749639749527, "learning_rate": 2.4342848514633107e-05, "loss": 0.0798, "step": 90740 }, { "epoch": 3.297841412893379, "grad_norm": 2.8632781505584717, "learning_rate": 2.4337661003449998e-05, "loss": 0.0958, "step": 90750 }, { "epoch": 3.298204811396177, "grad_norm": 1.3341703414916992, "learning_rate": 2.433247352080472e-05, "loss": 2.2308, "step": 90760 }, { "epoch": 3.298568209898975, "grad_norm": 1.0177751779556274, "learning_rate": 2.4327286066920785e-05, "loss": 0.0721, "step": 90770 }, { "epoch": 3.2989316084017735, "grad_norm": 0.5713348388671875, "learning_rate": 2.4322098642021697e-05, "loss": 0.0657, "step": 90780 }, { "epoch": 3.2992950069045714, "grad_norm": 0.32323822379112244, "learning_rate": 2.4316911246330955e-05, "loss": 0.2474, "step": 90790 }, { "epoch": 3.29965840540737, "grad_norm": 0.4968611001968384, "learning_rate": 2.431172388007209e-05, "loss": 0.0831, "step": 90800 }, { "epoch": 3.300021803910168, "grad_norm": 0.9985103607177734, "learning_rate": 2.430653654346859e-05, "loss": 0.0802, "step": 90810 }, { "epoch": 3.300385202412966, "grad_norm": 0.5295760631561279, "learning_rate": 2.430134923674397e-05, "loss": 0.0702, "step": 90820 }, { "epoch": 3.3007486009157643, "grad_norm": 0.6976117491722107, "learning_rate": 2.429616196012172e-05, "loss": 0.0797, "step": 90830 }, { "epoch": 3.3011119994185623, "grad_norm": 0.5984934568405151, "learning_rate": 2.4290974713825338e-05, "loss": 0.3996, "step": 90840 }, { "epoch": 3.3014753979213607, "grad_norm": 0.9891178607940674, "learning_rate": 2.4285787498078343e-05, "loss": 0.0631, "step": 90850 }, { "epoch": 3.3018387964241587, "grad_norm": 0.42125028371810913, "learning_rate": 2.4280600313104227e-05, "loss": 0.0814, "step": 90860 }, { "epoch": 3.3022021949269567, "grad_norm": 0.25509488582611084, "learning_rate": 2.427541315912648e-05, "loss": 0.0648, "step": 90870 }, { "epoch": 3.302565593429755, "grad_norm": 0.5609679222106934, "learning_rate": 2.42702260363686e-05, "loss": 0.0945, "step": 90880 }, { "epoch": 3.302928991932553, "grad_norm": 1.2793158292770386, "learning_rate": 2.4265038945054076e-05, "loss": 0.0853, "step": 90890 }, { "epoch": 3.3032923904353515, "grad_norm": 0.8435468673706055, "learning_rate": 2.425985188540642e-05, "loss": 0.1439, "step": 90900 }, { "epoch": 3.3036557889381495, "grad_norm": 1.7698181867599487, "learning_rate": 2.425466485764911e-05, "loss": 0.073, "step": 90910 }, { "epoch": 3.304019187440948, "grad_norm": 0.8225585222244263, "learning_rate": 2.4249477862005646e-05, "loss": 0.0733, "step": 90920 }, { "epoch": 3.304382585943746, "grad_norm": 1.2218375205993652, "learning_rate": 2.4244290898699494e-05, "loss": 0.0723, "step": 90930 }, { "epoch": 3.3047459844465443, "grad_norm": 2.20505952835083, "learning_rate": 2.423910396795417e-05, "loss": 0.0815, "step": 90940 }, { "epoch": 3.3051093829493423, "grad_norm": 0.4661131501197815, "learning_rate": 2.423391706999315e-05, "loss": 0.0872, "step": 90950 }, { "epoch": 3.3054727814521403, "grad_norm": 0.6685827970504761, "learning_rate": 2.4228730205039916e-05, "loss": 0.066, "step": 90960 }, { "epoch": 3.3058361799549387, "grad_norm": 0.89743572473526, "learning_rate": 2.4223543373317954e-05, "loss": 0.0665, "step": 90970 }, { "epoch": 3.3061995784577367, "grad_norm": 0.9550055265426636, "learning_rate": 2.4218356575050734e-05, "loss": 0.0722, "step": 90980 }, { "epoch": 3.306562976960535, "grad_norm": 0.4779164791107178, "learning_rate": 2.421316981046176e-05, "loss": 0.0724, "step": 90990 }, { "epoch": 3.306926375463333, "grad_norm": 1.0296430587768555, "learning_rate": 2.4207983079774498e-05, "loss": 0.0718, "step": 91000 }, { "epoch": 3.307289773966131, "grad_norm": 0.6730550527572632, "learning_rate": 2.4202796383212427e-05, "loss": 0.0622, "step": 91010 }, { "epoch": 3.3076531724689295, "grad_norm": 0.5089064240455627, "learning_rate": 2.4197609720999027e-05, "loss": 0.056, "step": 91020 }, { "epoch": 3.3080165709717275, "grad_norm": 1.3337105512619019, "learning_rate": 2.4192423093357754e-05, "loss": 0.0799, "step": 91030 }, { "epoch": 3.308379969474526, "grad_norm": 0.7988773584365845, "learning_rate": 2.418723650051211e-05, "loss": 0.094, "step": 91040 }, { "epoch": 3.308743367977324, "grad_norm": 2.004700183868408, "learning_rate": 2.4182049942685553e-05, "loss": 0.073, "step": 91050 }, { "epoch": 3.309106766480122, "grad_norm": 0.5091060996055603, "learning_rate": 2.4176863420101553e-05, "loss": 0.0869, "step": 91060 }, { "epoch": 3.3094701649829203, "grad_norm": 1.5232765674591064, "learning_rate": 2.417167693298358e-05, "loss": 0.0926, "step": 91070 }, { "epoch": 3.3098335634857183, "grad_norm": 2.1751208305358887, "learning_rate": 2.416649048155509e-05, "loss": 0.0679, "step": 91080 }, { "epoch": 3.3101969619885168, "grad_norm": 0.7265444397926331, "learning_rate": 2.416130406603957e-05, "loss": 0.6392, "step": 91090 }, { "epoch": 3.3105603604913147, "grad_norm": 1.931279182434082, "learning_rate": 2.4156117686660473e-05, "loss": 0.097, "step": 91100 }, { "epoch": 3.3109237589941127, "grad_norm": 0.5885264277458191, "learning_rate": 2.4150931343641264e-05, "loss": 0.087, "step": 91110 }, { "epoch": 3.311287157496911, "grad_norm": 1.2313237190246582, "learning_rate": 2.4145745037205388e-05, "loss": 0.0718, "step": 91120 }, { "epoch": 3.311650555999709, "grad_norm": 0.4470736086368561, "learning_rate": 2.4140558767576336e-05, "loss": 0.0614, "step": 91130 }, { "epoch": 3.3120139545025076, "grad_norm": 0.9219857454299927, "learning_rate": 2.4135372534977542e-05, "loss": 0.1004, "step": 91140 }, { "epoch": 3.3123773530053056, "grad_norm": 0.5581304430961609, "learning_rate": 2.4130186339632473e-05, "loss": 0.0861, "step": 91150 }, { "epoch": 3.3127407515081035, "grad_norm": 0.6495917439460754, "learning_rate": 2.412500018176458e-05, "loss": 0.0892, "step": 91160 }, { "epoch": 3.313104150010902, "grad_norm": 0.8792677521705627, "learning_rate": 2.4119814061597304e-05, "loss": 0.0804, "step": 91170 }, { "epoch": 3.3134675485137, "grad_norm": 0.7889745235443115, "learning_rate": 2.411462797935412e-05, "loss": 0.068, "step": 91180 }, { "epoch": 3.3138309470164984, "grad_norm": 0.883449137210846, "learning_rate": 2.4109441935258465e-05, "loss": 0.1013, "step": 91190 }, { "epoch": 3.3141943455192964, "grad_norm": 0.7440003156661987, "learning_rate": 2.410425592953379e-05, "loss": 0.0737, "step": 91200 }, { "epoch": 3.3141943455192964, "eval_loss": 0.3343234956264496, "eval_runtime": 179.7554, "eval_samples_per_second": 41.245, "eval_steps_per_second": 5.157, "eval_wer": 0.13966997658249677, "step": 91200 }, { "epoch": 3.314557744022095, "grad_norm": 0.6900414228439331, "learning_rate": 2.409906996240353e-05, "loss": 0.0944, "step": 91210 }, { "epoch": 3.314921142524893, "grad_norm": 1.4145492315292358, "learning_rate": 2.4093884034091148e-05, "loss": 0.0583, "step": 91220 }, { "epoch": 3.315284541027691, "grad_norm": 1.502017855644226, "learning_rate": 2.4088698144820076e-05, "loss": 0.073, "step": 91230 }, { "epoch": 3.315647939530489, "grad_norm": 0.4413808584213257, "learning_rate": 2.4083512294813765e-05, "loss": 0.0817, "step": 91240 }, { "epoch": 3.316011338033287, "grad_norm": 4.0073957443237305, "learning_rate": 2.4078326484295642e-05, "loss": 0.0913, "step": 91250 }, { "epoch": 3.3163747365360856, "grad_norm": 1.807396650314331, "learning_rate": 2.407314071348915e-05, "loss": 0.0713, "step": 91260 }, { "epoch": 3.3167381350388836, "grad_norm": 0.6919573545455933, "learning_rate": 2.4067954982617726e-05, "loss": 0.0856, "step": 91270 }, { "epoch": 3.317101533541682, "grad_norm": 0.31657874584198, "learning_rate": 2.4062769291904814e-05, "loss": 0.0806, "step": 91280 }, { "epoch": 3.31746493204448, "grad_norm": 2.0308666229248047, "learning_rate": 2.4057583641573835e-05, "loss": 0.0987, "step": 91290 }, { "epoch": 3.317828330547278, "grad_norm": 1.023868441581726, "learning_rate": 2.4052398031848224e-05, "loss": 0.0692, "step": 91300 }, { "epoch": 3.3181917290500764, "grad_norm": 0.5633085370063782, "learning_rate": 2.4047212462951402e-05, "loss": 0.0779, "step": 91310 }, { "epoch": 3.3185551275528744, "grad_norm": 0.48355531692504883, "learning_rate": 2.4042026935106812e-05, "loss": 0.0692, "step": 91320 }, { "epoch": 3.318918526055673, "grad_norm": 1.3398791551589966, "learning_rate": 2.4036841448537876e-05, "loss": 0.0805, "step": 91330 }, { "epoch": 3.319281924558471, "grad_norm": 0.36602282524108887, "learning_rate": 2.4031656003468016e-05, "loss": 0.0789, "step": 91340 }, { "epoch": 3.319645323061269, "grad_norm": 0.6087961196899414, "learning_rate": 2.4026470600120643e-05, "loss": 0.0816, "step": 91350 }, { "epoch": 3.3200087215640672, "grad_norm": 0.5523571372032166, "learning_rate": 2.402128523871919e-05, "loss": 0.0945, "step": 91360 }, { "epoch": 3.320372120066865, "grad_norm": 0.5356813669204712, "learning_rate": 2.401609991948708e-05, "loss": 0.0543, "step": 91370 }, { "epoch": 3.3207355185696636, "grad_norm": 0.7490220665931702, "learning_rate": 2.4010914642647725e-05, "loss": 0.0802, "step": 91380 }, { "epoch": 3.3210989170724616, "grad_norm": 0.3810897469520569, "learning_rate": 2.4005729408424538e-05, "loss": 0.0986, "step": 91390 }, { "epoch": 3.3214623155752596, "grad_norm": 2.465951919555664, "learning_rate": 2.4000544217040926e-05, "loss": 0.0977, "step": 91400 }, { "epoch": 3.321825714078058, "grad_norm": 0.69112628698349, "learning_rate": 2.399535906872031e-05, "loss": 0.0679, "step": 91410 }, { "epoch": 3.322189112580856, "grad_norm": 0.594582200050354, "learning_rate": 2.3990173963686103e-05, "loss": 0.0622, "step": 91420 }, { "epoch": 3.3225525110836545, "grad_norm": 0.5401979088783264, "learning_rate": 2.3984988902161706e-05, "loss": 0.0557, "step": 91430 }, { "epoch": 3.3229159095864524, "grad_norm": 0.34108448028564453, "learning_rate": 2.397980388437052e-05, "loss": 0.0941, "step": 91440 }, { "epoch": 3.3232793080892504, "grad_norm": 0.8815538287162781, "learning_rate": 2.3974618910535958e-05, "loss": 0.0491, "step": 91450 }, { "epoch": 3.323642706592049, "grad_norm": 1.5742990970611572, "learning_rate": 2.3969433980881417e-05, "loss": 0.151, "step": 91460 }, { "epoch": 3.324006105094847, "grad_norm": 0.6522462368011475, "learning_rate": 2.3964249095630302e-05, "loss": 0.0642, "step": 91470 }, { "epoch": 3.3243695035976453, "grad_norm": 2.5433623790740967, "learning_rate": 2.3959064255006012e-05, "loss": 0.0702, "step": 91480 }, { "epoch": 3.3247329021004433, "grad_norm": 0.345841646194458, "learning_rate": 2.395387945923193e-05, "loss": 0.1386, "step": 91490 }, { "epoch": 3.3250963006032417, "grad_norm": 0.6606214642524719, "learning_rate": 2.394869470853146e-05, "loss": 0.3363, "step": 91500 }, { "epoch": 3.3254596991060397, "grad_norm": 0.638674795627594, "learning_rate": 2.3943510003128004e-05, "loss": 0.0987, "step": 91510 }, { "epoch": 3.325823097608838, "grad_norm": 0.4758436381816864, "learning_rate": 2.393832534324494e-05, "loss": 0.061, "step": 91520 }, { "epoch": 3.326186496111636, "grad_norm": 4.208878040313721, "learning_rate": 2.393314072910565e-05, "loss": 0.0707, "step": 91530 }, { "epoch": 3.326549894614434, "grad_norm": 0.49438315629959106, "learning_rate": 2.3927956160933544e-05, "loss": 0.0792, "step": 91540 }, { "epoch": 3.3269132931172325, "grad_norm": 1.3179932832717896, "learning_rate": 2.3922771638951983e-05, "loss": 0.0857, "step": 91550 }, { "epoch": 3.3272766916200305, "grad_norm": 0.5983903408050537, "learning_rate": 2.3917587163384365e-05, "loss": 0.0743, "step": 91560 }, { "epoch": 3.327640090122829, "grad_norm": 0.30007684230804443, "learning_rate": 2.3912402734454063e-05, "loss": 0.0538, "step": 91570 }, { "epoch": 3.328003488625627, "grad_norm": 2.637645959854126, "learning_rate": 2.3907218352384452e-05, "loss": 0.066, "step": 91580 }, { "epoch": 3.328366887128425, "grad_norm": 0.9720343351364136, "learning_rate": 2.3902034017398923e-05, "loss": 0.0899, "step": 91590 }, { "epoch": 3.3287302856312233, "grad_norm": 0.5020787715911865, "learning_rate": 2.3896849729720834e-05, "loss": 0.0858, "step": 91600 }, { "epoch": 3.3290936841340213, "grad_norm": 0.7373344898223877, "learning_rate": 2.3891665489573573e-05, "loss": 0.0704, "step": 91610 }, { "epoch": 3.3294570826368197, "grad_norm": 0.8118528127670288, "learning_rate": 2.3886481297180494e-05, "loss": 0.0735, "step": 91620 }, { "epoch": 3.3298204811396177, "grad_norm": 0.37041768431663513, "learning_rate": 2.388129715276498e-05, "loss": 0.0747, "step": 91630 }, { "epoch": 3.3301838796424157, "grad_norm": 0.9788088798522949, "learning_rate": 2.3876113056550392e-05, "loss": 0.0728, "step": 91640 }, { "epoch": 3.330547278145214, "grad_norm": 1.582607626914978, "learning_rate": 2.3870929008760087e-05, "loss": 0.0798, "step": 91650 }, { "epoch": 3.330910676648012, "grad_norm": 4.744373321533203, "learning_rate": 2.386574500961744e-05, "loss": 0.0922, "step": 91660 }, { "epoch": 3.3312740751508105, "grad_norm": 0.6684284806251526, "learning_rate": 2.38605610593458e-05, "loss": 0.0615, "step": 91670 }, { "epoch": 3.3316374736536085, "grad_norm": 0.49481749534606934, "learning_rate": 2.3855377158168535e-05, "loss": 0.0838, "step": 91680 }, { "epoch": 3.3320008721564065, "grad_norm": 0.9517963528633118, "learning_rate": 2.385019330630899e-05, "loss": 0.0822, "step": 91690 }, { "epoch": 3.332364270659205, "grad_norm": 1.185115098953247, "learning_rate": 2.384500950399053e-05, "loss": 0.0902, "step": 91700 }, { "epoch": 3.332727669162003, "grad_norm": 0.7998439073562622, "learning_rate": 2.38398257514365e-05, "loss": 0.0977, "step": 91710 }, { "epoch": 3.3330910676648013, "grad_norm": 0.7033588886260986, "learning_rate": 2.383464204887025e-05, "loss": 4.2273, "step": 91720 }, { "epoch": 3.3334544661675993, "grad_norm": 0.37441256642341614, "learning_rate": 2.3829458396515128e-05, "loss": 0.0806, "step": 91730 }, { "epoch": 3.3338178646703973, "grad_norm": 1.9060165882110596, "learning_rate": 2.3824274794594473e-05, "loss": 1.8747, "step": 91740 }, { "epoch": 3.3341812631731957, "grad_norm": 1.7355316877365112, "learning_rate": 2.3819091243331643e-05, "loss": 0.0927, "step": 91750 }, { "epoch": 3.3345446616759937, "grad_norm": 0.8641614317893982, "learning_rate": 2.381390774294996e-05, "loss": 0.1018, "step": 91760 }, { "epoch": 3.334908060178792, "grad_norm": 1.4142506122589111, "learning_rate": 2.380872429367278e-05, "loss": 0.0714, "step": 91770 }, { "epoch": 3.33527145868159, "grad_norm": 0.9375418424606323, "learning_rate": 2.3803540895723433e-05, "loss": 0.0922, "step": 91780 }, { "epoch": 3.3356348571843886, "grad_norm": 0.28046151995658875, "learning_rate": 2.3798357549325245e-05, "loss": 0.0794, "step": 91790 }, { "epoch": 3.3359982556871866, "grad_norm": 0.9596878290176392, "learning_rate": 2.3793174254701557e-05, "loss": 0.0864, "step": 91800 }, { "epoch": 3.3359982556871866, "eval_loss": 0.3040144741535187, "eval_runtime": 179.6763, "eval_samples_per_second": 41.263, "eval_steps_per_second": 5.159, "eval_wer": 0.1398333545119538, "step": 91800 }, { "epoch": 3.336361654189985, "grad_norm": 1.36545729637146, "learning_rate": 2.3787991012075697e-05, "loss": 2.5138, "step": 91810 }, { "epoch": 3.336725052692783, "grad_norm": 1.0148299932479858, "learning_rate": 2.3782807821670993e-05, "loss": 0.0763, "step": 91820 }, { "epoch": 3.337088451195581, "grad_norm": 1.4392248392105103, "learning_rate": 2.3777624683710768e-05, "loss": 0.0623, "step": 91830 }, { "epoch": 3.3374518496983794, "grad_norm": 0.4060908854007721, "learning_rate": 2.3772441598418347e-05, "loss": 0.1011, "step": 91840 }, { "epoch": 3.3378152482011774, "grad_norm": 2.786815643310547, "learning_rate": 2.3767258566017045e-05, "loss": 0.0724, "step": 91850 }, { "epoch": 3.338178646703976, "grad_norm": 0.658647894859314, "learning_rate": 2.3762075586730194e-05, "loss": 0.0953, "step": 91860 }, { "epoch": 3.338542045206774, "grad_norm": 0.6986158490180969, "learning_rate": 2.3756892660781096e-05, "loss": 0.0742, "step": 91870 }, { "epoch": 3.3389054437095718, "grad_norm": 0.4815951883792877, "learning_rate": 2.375170978839307e-05, "loss": 0.0624, "step": 91880 }, { "epoch": 3.33926884221237, "grad_norm": 0.5518103241920471, "learning_rate": 2.3746526969789432e-05, "loss": 0.0692, "step": 91890 }, { "epoch": 3.339632240715168, "grad_norm": 0.5782762765884399, "learning_rate": 2.374134420519348e-05, "loss": 0.0755, "step": 91900 }, { "epoch": 3.3399956392179666, "grad_norm": 0.35663267970085144, "learning_rate": 2.3736161494828535e-05, "loss": 0.0877, "step": 91910 }, { "epoch": 3.3403590377207646, "grad_norm": 0.7183496952056885, "learning_rate": 2.373097883891789e-05, "loss": 0.0501, "step": 91920 }, { "epoch": 3.3407224362235626, "grad_norm": 0.6491569876670837, "learning_rate": 2.3725796237684853e-05, "loss": 0.1178, "step": 91930 }, { "epoch": 3.341085834726361, "grad_norm": 0.3948687016963959, "learning_rate": 2.372061369135272e-05, "loss": 0.0991, "step": 91940 }, { "epoch": 3.341449233229159, "grad_norm": 0.5275573134422302, "learning_rate": 2.3715431200144793e-05, "loss": 0.1027, "step": 91950 }, { "epoch": 3.3418126317319574, "grad_norm": 0.5675976872444153, "learning_rate": 2.371024876428437e-05, "loss": 0.0734, "step": 91960 }, { "epoch": 3.3421760302347554, "grad_norm": 2.3037304878234863, "learning_rate": 2.3705066383994738e-05, "loss": 0.0692, "step": 91970 }, { "epoch": 3.3425394287375534, "grad_norm": 0.8419054746627808, "learning_rate": 2.369988405949918e-05, "loss": 0.0693, "step": 91980 }, { "epoch": 3.342902827240352, "grad_norm": 0.7216833829879761, "learning_rate": 2.3694701791020994e-05, "loss": 0.9264, "step": 91990 }, { "epoch": 3.34326622574315, "grad_norm": 0.7542405724525452, "learning_rate": 2.3689519578783467e-05, "loss": 0.0725, "step": 92000 }, { "epoch": 3.3436296242459482, "grad_norm": 1.4204015731811523, "learning_rate": 2.368433742300988e-05, "loss": 0.0826, "step": 92010 }, { "epoch": 3.343993022748746, "grad_norm": 3.0590789318084717, "learning_rate": 2.3679155323923514e-05, "loss": 0.0686, "step": 92020 }, { "epoch": 3.344356421251544, "grad_norm": 0.6067277789115906, "learning_rate": 2.3673973281747634e-05, "loss": 0.0887, "step": 92030 }, { "epoch": 3.3447198197543426, "grad_norm": 0.8979749083518982, "learning_rate": 2.3668791296705533e-05, "loss": 0.0774, "step": 92040 }, { "epoch": 3.3450832182571406, "grad_norm": 2.3768551349639893, "learning_rate": 2.3663609369020484e-05, "loss": 0.0991, "step": 92050 }, { "epoch": 3.345446616759939, "grad_norm": 0.9666934609413147, "learning_rate": 2.365842749891575e-05, "loss": 0.0693, "step": 92060 }, { "epoch": 3.345810015262737, "grad_norm": 0.48444709181785583, "learning_rate": 2.3653245686614603e-05, "loss": 0.0599, "step": 92070 }, { "epoch": 3.3461734137655355, "grad_norm": 0.5083462595939636, "learning_rate": 2.36480639323403e-05, "loss": 0.0561, "step": 92080 }, { "epoch": 3.3465368122683334, "grad_norm": 0.41698160767555237, "learning_rate": 2.3642882236316115e-05, "loss": 0.0903, "step": 92090 }, { "epoch": 3.346900210771132, "grad_norm": 0.29507341980934143, "learning_rate": 2.3637700598765313e-05, "loss": 0.0729, "step": 92100 }, { "epoch": 3.34726360927393, "grad_norm": 0.414693146944046, "learning_rate": 2.3632519019911142e-05, "loss": 0.1065, "step": 92110 }, { "epoch": 3.347627007776728, "grad_norm": 0.46231353282928467, "learning_rate": 2.3627337499976855e-05, "loss": 0.5836, "step": 92120 }, { "epoch": 3.3479904062795263, "grad_norm": 0.6952545046806335, "learning_rate": 2.362215603918571e-05, "loss": 0.0865, "step": 92130 }, { "epoch": 3.3483538047823243, "grad_norm": 0.5076987743377686, "learning_rate": 2.361697463776097e-05, "loss": 0.0992, "step": 92140 }, { "epoch": 3.3487172032851227, "grad_norm": 0.7665526866912842, "learning_rate": 2.3611793295925865e-05, "loss": 0.0715, "step": 92150 }, { "epoch": 3.3490806017879207, "grad_norm": 0.5041813254356384, "learning_rate": 2.360661201390365e-05, "loss": 0.0631, "step": 92160 }, { "epoch": 3.3494440002907186, "grad_norm": 0.6250981092453003, "learning_rate": 2.360143079191756e-05, "loss": 0.0595, "step": 92170 }, { "epoch": 3.349807398793517, "grad_norm": 0.5737594962120056, "learning_rate": 2.3596249630190846e-05, "loss": 0.0698, "step": 92180 }, { "epoch": 3.350170797296315, "grad_norm": 0.33314749598503113, "learning_rate": 2.359106852894674e-05, "loss": 0.058, "step": 92190 }, { "epoch": 3.3505341957991135, "grad_norm": 1.2376643419265747, "learning_rate": 2.3585887488408483e-05, "loss": 0.0721, "step": 92200 }, { "epoch": 3.3508975943019115, "grad_norm": 6.422807216644287, "learning_rate": 2.35807065087993e-05, "loss": 0.1657, "step": 92210 }, { "epoch": 3.3512609928047095, "grad_norm": 1.1383922100067139, "learning_rate": 2.357552559034241e-05, "loss": 0.0738, "step": 92220 }, { "epoch": 3.351624391307508, "grad_norm": 0.5397285223007202, "learning_rate": 2.357034473326107e-05, "loss": 0.0631, "step": 92230 }, { "epoch": 3.351987789810306, "grad_norm": 0.4672096073627472, "learning_rate": 2.3565163937778485e-05, "loss": 0.0836, "step": 92240 }, { "epoch": 3.3523511883131043, "grad_norm": 0.714462161064148, "learning_rate": 2.3559983204117886e-05, "loss": 0.0699, "step": 92250 }, { "epoch": 3.3527145868159023, "grad_norm": 0.5036824941635132, "learning_rate": 2.355480253250248e-05, "loss": 0.6057, "step": 92260 }, { "epoch": 3.3530779853187003, "grad_norm": 0.6620817184448242, "learning_rate": 2.3549621923155486e-05, "loss": 0.0665, "step": 92270 }, { "epoch": 3.3534413838214987, "grad_norm": 0.5807569622993469, "learning_rate": 2.354444137630013e-05, "loss": 0.0691, "step": 92280 }, { "epoch": 3.3538047823242967, "grad_norm": 0.5693409442901611, "learning_rate": 2.3539260892159618e-05, "loss": 0.0721, "step": 92290 }, { "epoch": 3.354168180827095, "grad_norm": 0.7940452098846436, "learning_rate": 2.3534080470957157e-05, "loss": 0.0749, "step": 92300 }, { "epoch": 3.354531579329893, "grad_norm": 0.3544544577598572, "learning_rate": 2.352890011291594e-05, "loss": 0.0782, "step": 92310 }, { "epoch": 3.354894977832691, "grad_norm": 0.5041877031326294, "learning_rate": 2.3523719818259196e-05, "loss": 0.0592, "step": 92320 }, { "epoch": 3.3552583763354895, "grad_norm": 0.4897719919681549, "learning_rate": 2.3518539587210112e-05, "loss": 0.0678, "step": 92330 }, { "epoch": 3.3556217748382875, "grad_norm": 1.1827727556228638, "learning_rate": 2.3513359419991884e-05, "loss": 0.0838, "step": 92340 }, { "epoch": 3.355985173341086, "grad_norm": 0.646219789981842, "learning_rate": 2.3508179316827713e-05, "loss": 0.068, "step": 92350 }, { "epoch": 3.356348571843884, "grad_norm": 0.49982723593711853, "learning_rate": 2.3502999277940772e-05, "loss": 0.0703, "step": 92360 }, { "epoch": 3.3567119703466823, "grad_norm": 0.8181835412979126, "learning_rate": 2.3497819303554276e-05, "loss": 0.0847, "step": 92370 }, { "epoch": 3.3570753688494803, "grad_norm": 1.0289931297302246, "learning_rate": 2.3492639393891408e-05, "loss": 0.0778, "step": 92380 }, { "epoch": 3.3574387673522788, "grad_norm": 0.6994947791099548, "learning_rate": 2.348745954917534e-05, "loss": 0.0865, "step": 92390 }, { "epoch": 3.3578021658550767, "grad_norm": 0.9630132913589478, "learning_rate": 2.348227976962926e-05, "loss": 0.0974, "step": 92400 }, { "epoch": 3.3578021658550767, "eval_loss": 0.31169602274894714, "eval_runtime": 178.7618, "eval_samples_per_second": 41.474, "eval_steps_per_second": 5.186, "eval_wer": 0.14020549312905042, "step": 92400 }, { "epoch": 3.3581655643578747, "grad_norm": 0.2899853587150574, "learning_rate": 2.3477100055476334e-05, "loss": 0.0749, "step": 92410 }, { "epoch": 3.358528962860673, "grad_norm": 0.3742106854915619, "learning_rate": 2.347192040693976e-05, "loss": 0.0738, "step": 92420 }, { "epoch": 3.358892361363471, "grad_norm": 0.5455346703529358, "learning_rate": 2.3466740824242695e-05, "loss": 0.063, "step": 92430 }, { "epoch": 3.3592557598662696, "grad_norm": 0.29540014266967773, "learning_rate": 2.3461561307608315e-05, "loss": 0.0981, "step": 92440 }, { "epoch": 3.3596191583690675, "grad_norm": 1.146060824394226, "learning_rate": 2.3456381857259785e-05, "loss": 0.0866, "step": 92450 }, { "epoch": 3.3599825568718655, "grad_norm": 0.8182836174964905, "learning_rate": 2.345120247342026e-05, "loss": 0.1022, "step": 92460 }, { "epoch": 3.360345955374664, "grad_norm": 0.33180689811706543, "learning_rate": 2.3446023156312915e-05, "loss": 0.0737, "step": 92470 }, { "epoch": 3.360709353877462, "grad_norm": 0.6751521229743958, "learning_rate": 2.3440843906160907e-05, "loss": 0.0652, "step": 92480 }, { "epoch": 3.3610727523802604, "grad_norm": 0.37743857502937317, "learning_rate": 2.3435664723187384e-05, "loss": 0.0711, "step": 92490 }, { "epoch": 3.3614361508830584, "grad_norm": 0.6196742057800293, "learning_rate": 2.3430485607615494e-05, "loss": 0.0684, "step": 92500 }, { "epoch": 3.3617995493858563, "grad_norm": 0.6973705291748047, "learning_rate": 2.3425306559668404e-05, "loss": 0.1036, "step": 92510 }, { "epoch": 3.3621629478886548, "grad_norm": 0.2943952679634094, "learning_rate": 2.3420127579569257e-05, "loss": 0.131, "step": 92520 }, { "epoch": 3.3625263463914528, "grad_norm": 0.7704665660858154, "learning_rate": 2.3414948667541187e-05, "loss": 0.062, "step": 92530 }, { "epoch": 3.362889744894251, "grad_norm": 0.4366964101791382, "learning_rate": 2.3409769823807337e-05, "loss": 0.2334, "step": 92540 }, { "epoch": 3.363253143397049, "grad_norm": 1.0481702089309692, "learning_rate": 2.340459104859084e-05, "loss": 0.0911, "step": 92550 }, { "epoch": 3.363616541899847, "grad_norm": 0.21111765503883362, "learning_rate": 2.339941234211485e-05, "loss": 0.0729, "step": 92560 }, { "epoch": 3.3639799404026456, "grad_norm": 0.42436930537223816, "learning_rate": 2.3394233704602484e-05, "loss": 0.6901, "step": 92570 }, { "epoch": 3.3643433389054436, "grad_norm": 1.4946538209915161, "learning_rate": 2.3389055136276874e-05, "loss": 0.0645, "step": 92580 }, { "epoch": 3.364706737408242, "grad_norm": 0.7017802000045776, "learning_rate": 2.3383876637361148e-05, "loss": 0.0914, "step": 92590 }, { "epoch": 3.36507013591104, "grad_norm": 0.6439974904060364, "learning_rate": 2.337869820807842e-05, "loss": 0.0786, "step": 92600 }, { "epoch": 3.365433534413838, "grad_norm": 0.9561066627502441, "learning_rate": 2.337351984865182e-05, "loss": 0.0977, "step": 92610 }, { "epoch": 3.3657969329166364, "grad_norm": 0.6108697652816772, "learning_rate": 2.336834155930447e-05, "loss": 0.0608, "step": 92620 }, { "epoch": 3.3661603314194344, "grad_norm": 0.5634490251541138, "learning_rate": 2.3363163340259476e-05, "loss": 0.2946, "step": 92630 }, { "epoch": 3.366523729922233, "grad_norm": 0.5420652031898499, "learning_rate": 2.335798519173995e-05, "loss": 0.0797, "step": 92640 }, { "epoch": 3.366887128425031, "grad_norm": 0.5687423944473267, "learning_rate": 2.3352807113968985e-05, "loss": 0.069, "step": 92650 }, { "epoch": 3.3672505269278292, "grad_norm": 0.5029795169830322, "learning_rate": 2.3347629107169715e-05, "loss": 0.0894, "step": 92660 }, { "epoch": 3.367613925430627, "grad_norm": 0.4098545014858246, "learning_rate": 2.3342451171565227e-05, "loss": 0.0689, "step": 92670 }, { "epoch": 3.3679773239334256, "grad_norm": 0.4281129539012909, "learning_rate": 2.333727330737862e-05, "loss": 0.0883, "step": 92680 }, { "epoch": 3.3683407224362236, "grad_norm": 1.2608349323272705, "learning_rate": 2.333209551483298e-05, "loss": 0.0812, "step": 92690 }, { "epoch": 3.3687041209390216, "grad_norm": 0.6354079842567444, "learning_rate": 2.332691779415142e-05, "loss": 0.1057, "step": 92700 }, { "epoch": 3.36906751944182, "grad_norm": 0.5533850193023682, "learning_rate": 2.3321740145557018e-05, "loss": 0.081, "step": 92710 }, { "epoch": 3.369430917944618, "grad_norm": 0.7266316413879395, "learning_rate": 2.3316562569272865e-05, "loss": 0.0744, "step": 92720 }, { "epoch": 3.3697943164474164, "grad_norm": 0.7804214358329773, "learning_rate": 2.3311385065522038e-05, "loss": 0.0609, "step": 92730 }, { "epoch": 3.3701577149502144, "grad_norm": 1.9746454954147339, "learning_rate": 2.330620763452761e-05, "loss": 0.09, "step": 92740 }, { "epoch": 3.3705211134530124, "grad_norm": 0.7240809202194214, "learning_rate": 2.330103027651268e-05, "loss": 0.2196, "step": 92750 }, { "epoch": 3.370884511955811, "grad_norm": 0.37045255303382874, "learning_rate": 2.3295852991700314e-05, "loss": 0.0786, "step": 92760 }, { "epoch": 3.371247910458609, "grad_norm": 0.32502445578575134, "learning_rate": 2.3290675780313577e-05, "loss": 0.0598, "step": 92770 }, { "epoch": 3.3716113089614073, "grad_norm": 0.5612372159957886, "learning_rate": 2.3285498642575535e-05, "loss": 0.0646, "step": 92780 }, { "epoch": 3.3719747074642052, "grad_norm": 0.3779931664466858, "learning_rate": 2.328032157870925e-05, "loss": 0.0859, "step": 92790 }, { "epoch": 3.3723381059670032, "grad_norm": 1.5073649883270264, "learning_rate": 2.3275144588937797e-05, "loss": 0.0893, "step": 92800 }, { "epoch": 3.3727015044698017, "grad_norm": 2.095792293548584, "learning_rate": 2.3269967673484227e-05, "loss": 0.0537, "step": 92810 }, { "epoch": 3.3730649029725996, "grad_norm": 1.275193452835083, "learning_rate": 2.32647908325716e-05, "loss": 0.0813, "step": 92820 }, { "epoch": 3.373428301475398, "grad_norm": 0.4080447256565094, "learning_rate": 2.3259614066422957e-05, "loss": 0.0716, "step": 92830 }, { "epoch": 3.373791699978196, "grad_norm": 0.6313503980636597, "learning_rate": 2.325443737526134e-05, "loss": 0.0792, "step": 92840 }, { "epoch": 3.374155098480994, "grad_norm": 0.5047944188117981, "learning_rate": 2.324926075930982e-05, "loss": 0.0532, "step": 92850 }, { "epoch": 3.3745184969837925, "grad_norm": 1.3697469234466553, "learning_rate": 2.3244084218791422e-05, "loss": 0.0832, "step": 92860 }, { "epoch": 3.3748818954865905, "grad_norm": 0.7875816226005554, "learning_rate": 2.3238907753929188e-05, "loss": 0.0736, "step": 92870 }, { "epoch": 3.375245293989389, "grad_norm": 0.43678218126296997, "learning_rate": 2.3233731364946143e-05, "loss": 0.5897, "step": 92880 }, { "epoch": 3.375608692492187, "grad_norm": 0.36059579253196716, "learning_rate": 2.322855505206534e-05, "loss": 0.1002, "step": 92890 }, { "epoch": 3.375972090994985, "grad_norm": 0.7359516620635986, "learning_rate": 2.3223378815509795e-05, "loss": 0.1078, "step": 92900 }, { "epoch": 3.3763354894977833, "grad_norm": 0.4600794017314911, "learning_rate": 2.3218202655502538e-05, "loss": 0.0678, "step": 92910 }, { "epoch": 3.3766988880005813, "grad_norm": 0.5198982357978821, "learning_rate": 2.321302657226659e-05, "loss": 0.0595, "step": 92920 }, { "epoch": 3.3770622865033797, "grad_norm": 1.273722529411316, "learning_rate": 2.320785056602495e-05, "loss": 0.0675, "step": 92930 }, { "epoch": 3.3774256850061777, "grad_norm": 0.33449608087539673, "learning_rate": 2.3202674637000675e-05, "loss": 0.0971, "step": 92940 }, { "epoch": 3.377789083508976, "grad_norm": 0.9987308382987976, "learning_rate": 2.3197498785416746e-05, "loss": 0.127, "step": 92950 }, { "epoch": 3.378152482011774, "grad_norm": 0.3726591467857361, "learning_rate": 2.3192323011496186e-05, "loss": 0.0732, "step": 92960 }, { "epoch": 3.3785158805145725, "grad_norm": 1.3593406677246094, "learning_rate": 2.3187147315461994e-05, "loss": 0.0619, "step": 92970 }, { "epoch": 3.3788792790173705, "grad_norm": 0.44492968916893005, "learning_rate": 2.3181971697537165e-05, "loss": 0.0628, "step": 92980 }, { "epoch": 3.3792426775201685, "grad_norm": 0.7739204168319702, "learning_rate": 2.3176796157944713e-05, "loss": 0.0881, "step": 92990 }, { "epoch": 3.379606076022967, "grad_norm": 0.6815133690834045, "learning_rate": 2.317162069690763e-05, "loss": 0.0798, "step": 93000 }, { "epoch": 3.379606076022967, "eval_loss": 0.3195069134235382, "eval_runtime": 179.6087, "eval_samples_per_second": 41.279, "eval_steps_per_second": 5.161, "eval_wer": 0.13676548005881606, "step": 93000 }, { "epoch": 3.379969474525765, "grad_norm": 0.5755239725112915, "learning_rate": 2.316644531464891e-05, "loss": 0.072, "step": 93010 }, { "epoch": 3.3803328730285633, "grad_norm": 0.6259827017784119, "learning_rate": 2.3161270011391535e-05, "loss": 0.0585, "step": 93020 }, { "epoch": 3.3806962715313613, "grad_norm": 0.35530751943588257, "learning_rate": 2.315609478735848e-05, "loss": 0.0735, "step": 93030 }, { "epoch": 3.3810596700341593, "grad_norm": 0.5004699230194092, "learning_rate": 2.3150919642772752e-05, "loss": 0.0884, "step": 93040 }, { "epoch": 3.3814230685369577, "grad_norm": 0.8541852235794067, "learning_rate": 2.3145744577857316e-05, "loss": 0.0844, "step": 93050 }, { "epoch": 3.3817864670397557, "grad_norm": 1.4642248153686523, "learning_rate": 2.314056959283515e-05, "loss": 0.0759, "step": 93060 }, { "epoch": 3.382149865542554, "grad_norm": 1.696931004524231, "learning_rate": 2.3135394687929225e-05, "loss": 0.0605, "step": 93070 }, { "epoch": 3.382513264045352, "grad_norm": 2.729449987411499, "learning_rate": 2.31302198633625e-05, "loss": 0.068, "step": 93080 }, { "epoch": 3.38287666254815, "grad_norm": 0.6647607684135437, "learning_rate": 2.3125045119357953e-05, "loss": 0.0832, "step": 93090 }, { "epoch": 3.3832400610509485, "grad_norm": 0.4802834093570709, "learning_rate": 2.3119870456138545e-05, "loss": 0.0764, "step": 93100 }, { "epoch": 3.3836034595537465, "grad_norm": 1.4223417043685913, "learning_rate": 2.311469587392723e-05, "loss": 0.0824, "step": 93110 }, { "epoch": 3.383966858056545, "grad_norm": 1.8741366863250732, "learning_rate": 2.310952137294695e-05, "loss": 0.0715, "step": 93120 }, { "epoch": 3.384330256559343, "grad_norm": 0.6077579855918884, "learning_rate": 2.3104346953420676e-05, "loss": 0.0657, "step": 93130 }, { "epoch": 3.384693655062141, "grad_norm": 0.47796136140823364, "learning_rate": 2.3099172615571353e-05, "loss": 0.0814, "step": 93140 }, { "epoch": 3.3850570535649394, "grad_norm": 1.0871938467025757, "learning_rate": 2.3093998359621916e-05, "loss": 0.0979, "step": 93150 }, { "epoch": 3.3854204520677373, "grad_norm": 0.46470001339912415, "learning_rate": 2.3088824185795305e-05, "loss": 0.0796, "step": 93160 }, { "epoch": 3.3857838505705358, "grad_norm": 0.7592546343803406, "learning_rate": 2.3083650094314453e-05, "loss": 0.0859, "step": 93170 }, { "epoch": 3.3861472490733338, "grad_norm": 0.29902932047843933, "learning_rate": 2.307847608540231e-05, "loss": 0.0703, "step": 93180 }, { "epoch": 3.3865106475761317, "grad_norm": 0.5462153553962708, "learning_rate": 2.307330215928179e-05, "loss": 0.069, "step": 93190 }, { "epoch": 3.38687404607893, "grad_norm": 1.0044230222702026, "learning_rate": 2.3068128316175834e-05, "loss": 0.0714, "step": 93200 }, { "epoch": 3.387237444581728, "grad_norm": 0.9057084918022156, "learning_rate": 2.306295455630735e-05, "loss": 0.0883, "step": 93210 }, { "epoch": 3.3876008430845266, "grad_norm": 0.3063741624355316, "learning_rate": 2.3057780879899252e-05, "loss": 0.0672, "step": 93220 }, { "epoch": 3.3879642415873246, "grad_norm": 0.4650433361530304, "learning_rate": 2.3052607287174475e-05, "loss": 0.0631, "step": 93230 }, { "epoch": 3.388327640090123, "grad_norm": 0.5927343368530273, "learning_rate": 2.3047433778355925e-05, "loss": 0.1448, "step": 93240 }, { "epoch": 3.388691038592921, "grad_norm": 0.8953503370285034, "learning_rate": 2.3042260353666503e-05, "loss": 0.0677, "step": 93250 }, { "epoch": 3.3890544370957194, "grad_norm": 1.3331018686294556, "learning_rate": 2.303708701332912e-05, "loss": 0.0651, "step": 93260 }, { "epoch": 3.3894178355985174, "grad_norm": 0.5606054067611694, "learning_rate": 2.303191375756666e-05, "loss": 0.1045, "step": 93270 }, { "epoch": 3.3897812341013154, "grad_norm": 0.4888154864311218, "learning_rate": 2.3026740586602043e-05, "loss": 0.0742, "step": 93280 }, { "epoch": 3.390144632604114, "grad_norm": 0.45617663860321045, "learning_rate": 2.3021567500658156e-05, "loss": 0.0822, "step": 93290 }, { "epoch": 3.390508031106912, "grad_norm": 0.9321984052658081, "learning_rate": 2.3016394499957886e-05, "loss": 0.0726, "step": 93300 }, { "epoch": 3.39087142960971, "grad_norm": 0.3022707402706146, "learning_rate": 2.3011221584724108e-05, "loss": 0.0992, "step": 93310 }, { "epoch": 3.391234828112508, "grad_norm": 1.342934250831604, "learning_rate": 2.3006048755179723e-05, "loss": 0.0897, "step": 93320 }, { "epoch": 3.391598226615306, "grad_norm": 0.9461100101470947, "learning_rate": 2.3000876011547607e-05, "loss": 0.0695, "step": 93330 }, { "epoch": 3.3919616251181046, "grad_norm": 0.7840179204940796, "learning_rate": 2.299570335405063e-05, "loss": 0.0822, "step": 93340 }, { "epoch": 3.3923250236209026, "grad_norm": 0.6755959391593933, "learning_rate": 2.2990530782911664e-05, "loss": 0.1182, "step": 93350 }, { "epoch": 3.392688422123701, "grad_norm": 0.8153521418571472, "learning_rate": 2.2985358298353566e-05, "loss": 0.0852, "step": 93360 }, { "epoch": 3.393051820626499, "grad_norm": 0.6111595630645752, "learning_rate": 2.2980185900599222e-05, "loss": 0.0635, "step": 93370 }, { "epoch": 3.393415219129297, "grad_norm": 0.3639895021915436, "learning_rate": 2.297501358987148e-05, "loss": 0.1101, "step": 93380 }, { "epoch": 3.3937786176320954, "grad_norm": 0.7763181924819946, "learning_rate": 2.2969841366393195e-05, "loss": 0.088, "step": 93390 }, { "epoch": 3.3941420161348934, "grad_norm": 1.595831274986267, "learning_rate": 2.2964669230387228e-05, "loss": 0.1059, "step": 93400 }, { "epoch": 3.394505414637692, "grad_norm": 0.5875428915023804, "learning_rate": 2.2959497182076408e-05, "loss": 0.0743, "step": 93410 }, { "epoch": 3.39486881314049, "grad_norm": 1.4419046640396118, "learning_rate": 2.2954325221683606e-05, "loss": 0.085, "step": 93420 }, { "epoch": 3.395232211643288, "grad_norm": 0.7197487354278564, "learning_rate": 2.294915334943165e-05, "loss": 0.0828, "step": 93430 }, { "epoch": 3.3955956101460862, "grad_norm": 0.403689444065094, "learning_rate": 2.294398156554338e-05, "loss": 0.0969, "step": 93440 }, { "epoch": 3.3959590086488842, "grad_norm": 0.3563007712364197, "learning_rate": 2.2938809870241632e-05, "loss": 0.1025, "step": 93450 }, { "epoch": 3.3963224071516827, "grad_norm": 0.41774362325668335, "learning_rate": 2.2933638263749218e-05, "loss": 0.09, "step": 93460 }, { "epoch": 3.3966858056544806, "grad_norm": 1.4661532640457153, "learning_rate": 2.2928466746288993e-05, "loss": 0.5754, "step": 93470 }, { "epoch": 3.3970492041572786, "grad_norm": 0.6082340478897095, "learning_rate": 2.2923295318083766e-05, "loss": 0.0627, "step": 93480 }, { "epoch": 3.397412602660077, "grad_norm": 1.2264482975006104, "learning_rate": 2.2918123979356353e-05, "loss": 0.0981, "step": 93490 }, { "epoch": 3.397776001162875, "grad_norm": 1.3761318922042847, "learning_rate": 2.2912952730329555e-05, "loss": 0.0723, "step": 93500 }, { "epoch": 3.3981393996656735, "grad_norm": 0.6967355608940125, "learning_rate": 2.290778157122622e-05, "loss": 0.0643, "step": 93510 }, { "epoch": 3.3985027981684715, "grad_norm": 0.5545636415481567, "learning_rate": 2.2902610502269122e-05, "loss": 0.0619, "step": 93520 }, { "epoch": 3.39886619667127, "grad_norm": 0.4898998737335205, "learning_rate": 2.289743952368108e-05, "loss": 1.7187, "step": 93530 }, { "epoch": 3.399229595174068, "grad_norm": 0.593694806098938, "learning_rate": 2.2892268635684885e-05, "loss": 0.0942, "step": 93540 }, { "epoch": 3.3995929936768663, "grad_norm": 0.8465686440467834, "learning_rate": 2.2887097838503327e-05, "loss": 0.0883, "step": 93550 }, { "epoch": 3.3999563921796643, "grad_norm": 0.9347935318946838, "learning_rate": 2.2881927132359214e-05, "loss": 0.0872, "step": 93560 }, { "epoch": 3.4003197906824623, "grad_norm": 0.5704132914543152, "learning_rate": 2.287675651747533e-05, "loss": 1.4816, "step": 93570 }, { "epoch": 3.4006831891852607, "grad_norm": 0.25210240483283997, "learning_rate": 2.287158599407445e-05, "loss": 0.0696, "step": 93580 }, { "epoch": 3.4010465876880587, "grad_norm": 0.7095610499382019, "learning_rate": 2.2866415562379356e-05, "loss": 0.0818, "step": 93590 }, { "epoch": 3.401409986190857, "grad_norm": 1.3713339567184448, "learning_rate": 2.2861245222612812e-05, "loss": 0.088, "step": 93600 }, { "epoch": 3.401409986190857, "eval_loss": 0.29974231123924255, "eval_runtime": 178.5228, "eval_samples_per_second": 41.53, "eval_steps_per_second": 5.193, "eval_wer": 0.13912538348430664, "step": 93600 }, { "epoch": 3.401773384693655, "grad_norm": 1.1637344360351562, "learning_rate": 2.285659199560597e-05, "loss": 3.4301, "step": 93610 }, { "epoch": 3.402136783196453, "grad_norm": 0.5053747296333313, "learning_rate": 2.285142183111744e-05, "loss": 0.0693, "step": 93620 }, { "epoch": 3.4025001816992515, "grad_norm": 0.740875780582428, "learning_rate": 2.2846251759203496e-05, "loss": 0.0629, "step": 93630 }, { "epoch": 3.4028635802020495, "grad_norm": 5.731196880340576, "learning_rate": 2.2841081780086904e-05, "loss": 0.0998, "step": 93640 }, { "epoch": 3.403226978704848, "grad_norm": 0.7567720413208008, "learning_rate": 2.2835911893990414e-05, "loss": 0.0844, "step": 93650 }, { "epoch": 3.403590377207646, "grad_norm": 0.4909075200557709, "learning_rate": 2.283074210113677e-05, "loss": 0.0724, "step": 93660 }, { "epoch": 3.403953775710444, "grad_norm": 0.5206305980682373, "learning_rate": 2.282557240174874e-05, "loss": 0.0583, "step": 93670 }, { "epoch": 3.4043171742132423, "grad_norm": 0.6271891593933105, "learning_rate": 2.2820402796049063e-05, "loss": 0.0776, "step": 93680 }, { "epoch": 3.4046805727160403, "grad_norm": 0.6527193784713745, "learning_rate": 2.281523328426047e-05, "loss": 0.085, "step": 93690 }, { "epoch": 3.4050439712188387, "grad_norm": 0.8757163882255554, "learning_rate": 2.2810063866605706e-05, "loss": 0.0707, "step": 93700 }, { "epoch": 3.4054073697216367, "grad_norm": 0.8993749022483826, "learning_rate": 2.280489454330748e-05, "loss": 0.0713, "step": 93710 }, { "epoch": 3.4057707682244347, "grad_norm": 0.611003577709198, "learning_rate": 2.2799725314588555e-05, "loss": 0.0635, "step": 93720 }, { "epoch": 3.406134166727233, "grad_norm": 0.4539841413497925, "learning_rate": 2.2794556180671636e-05, "loss": 0.0718, "step": 93730 }, { "epoch": 3.406497565230031, "grad_norm": 0.2736055850982666, "learning_rate": 2.2789387141779445e-05, "loss": 0.1861, "step": 93740 }, { "epoch": 3.4068609637328295, "grad_norm": 0.6625291109085083, "learning_rate": 2.2784218198134695e-05, "loss": 0.107, "step": 93750 }, { "epoch": 3.4072243622356275, "grad_norm": 0.49704796075820923, "learning_rate": 2.277904934996009e-05, "loss": 0.0621, "step": 93760 }, { "epoch": 3.4075877607384255, "grad_norm": 0.471886545419693, "learning_rate": 2.2773880597478356e-05, "loss": 0.1414, "step": 93770 }, { "epoch": 3.407951159241224, "grad_norm": 0.4752335548400879, "learning_rate": 2.2768711940912185e-05, "loss": 0.0657, "step": 93780 }, { "epoch": 3.408314557744022, "grad_norm": 0.6662150025367737, "learning_rate": 2.276354338048428e-05, "loss": 0.0933, "step": 93790 }, { "epoch": 3.4086779562468204, "grad_norm": 0.9255740642547607, "learning_rate": 2.275837491641732e-05, "loss": 0.1099, "step": 93800 }, { "epoch": 3.4090413547496183, "grad_norm": 0.515019953250885, "learning_rate": 2.2753206548934024e-05, "loss": 0.0861, "step": 93810 }, { "epoch": 3.4094047532524168, "grad_norm": 0.33219701051712036, "learning_rate": 2.2748038278257063e-05, "loss": 0.0842, "step": 93820 }, { "epoch": 3.4097681517552147, "grad_norm": 0.40631362795829773, "learning_rate": 2.2742870104609114e-05, "loss": 0.0673, "step": 93830 }, { "epoch": 3.410131550258013, "grad_norm": 0.4353393316268921, "learning_rate": 2.2737702028212868e-05, "loss": 0.0768, "step": 93840 }, { "epoch": 3.410494948760811, "grad_norm": 1.10258150100708, "learning_rate": 2.273253404929098e-05, "loss": 0.0884, "step": 93850 }, { "epoch": 3.410858347263609, "grad_norm": 3.386838912963867, "learning_rate": 2.2727366168066142e-05, "loss": 0.1114, "step": 93860 }, { "epoch": 3.4112217457664076, "grad_norm": 1.1398248672485352, "learning_rate": 2.2722198384761008e-05, "loss": 0.0541, "step": 93870 }, { "epoch": 3.4115851442692056, "grad_norm": 0.950499951839447, "learning_rate": 2.2717030699598245e-05, "loss": 0.0961, "step": 93880 }, { "epoch": 3.411948542772004, "grad_norm": 0.48193359375, "learning_rate": 2.2711863112800506e-05, "loss": 0.4101, "step": 93890 }, { "epoch": 3.412311941274802, "grad_norm": 0.827944815158844, "learning_rate": 2.270669562459043e-05, "loss": 0.0864, "step": 93900 }, { "epoch": 3.4126753397776, "grad_norm": 1.7731389999389648, "learning_rate": 2.270152823519069e-05, "loss": 0.1077, "step": 93910 }, { "epoch": 3.4130387382803984, "grad_norm": 0.750033438205719, "learning_rate": 2.2696360944823923e-05, "loss": 0.1013, "step": 93920 }, { "epoch": 3.4134021367831964, "grad_norm": 1.206369161605835, "learning_rate": 2.269119375371277e-05, "loss": 0.3487, "step": 93930 }, { "epoch": 3.413765535285995, "grad_norm": 0.38662195205688477, "learning_rate": 2.2686026662079858e-05, "loss": 0.1012, "step": 93940 }, { "epoch": 3.414128933788793, "grad_norm": 0.5544074177742004, "learning_rate": 2.2680859670147815e-05, "loss": 0.0925, "step": 93950 }, { "epoch": 3.4144923322915908, "grad_norm": 1.074537754058838, "learning_rate": 2.267569277813929e-05, "loss": 0.0875, "step": 93960 }, { "epoch": 3.414855730794389, "grad_norm": 3.1776864528656006, "learning_rate": 2.267052598627689e-05, "loss": 3.4631, "step": 93970 }, { "epoch": 3.415219129297187, "grad_norm": 0.5976073741912842, "learning_rate": 2.266535929478324e-05, "loss": 0.0741, "step": 93980 }, { "epoch": 3.4155825277999856, "grad_norm": 0.507327675819397, "learning_rate": 2.2660192703880935e-05, "loss": 0.0834, "step": 93990 }, { "epoch": 3.4159459263027836, "grad_norm": 0.46804341673851013, "learning_rate": 2.2655026213792617e-05, "loss": 0.0912, "step": 94000 }, { "epoch": 3.4163093248055816, "grad_norm": 0.6629424095153809, "learning_rate": 2.2649859824740876e-05, "loss": 0.0829, "step": 94010 }, { "epoch": 3.41667272330838, "grad_norm": 1.2816437482833862, "learning_rate": 2.2644693536948315e-05, "loss": 0.0895, "step": 94020 }, { "epoch": 3.417036121811178, "grad_norm": 0.32198429107666016, "learning_rate": 2.2639527350637525e-05, "loss": 0.0852, "step": 94030 }, { "epoch": 3.4173995203139764, "grad_norm": 0.49124446511268616, "learning_rate": 2.263436126603109e-05, "loss": 0.5835, "step": 94040 }, { "epoch": 3.4177629188167744, "grad_norm": 0.45547664165496826, "learning_rate": 2.262919528335163e-05, "loss": 0.0716, "step": 94050 }, { "epoch": 3.4181263173195724, "grad_norm": 0.4709664285182953, "learning_rate": 2.2624029402821705e-05, "loss": 0.0837, "step": 94060 }, { "epoch": 3.418489715822371, "grad_norm": 0.8313547372817993, "learning_rate": 2.2618863624663898e-05, "loss": 0.0755, "step": 94070 }, { "epoch": 3.418853114325169, "grad_norm": 0.8527863025665283, "learning_rate": 2.2613697949100782e-05, "loss": 0.0617, "step": 94080 }, { "epoch": 3.4192165128279672, "grad_norm": 0.39365309476852417, "learning_rate": 2.2608532376354932e-05, "loss": 0.08, "step": 94090 }, { "epoch": 3.419579911330765, "grad_norm": 0.9611566662788391, "learning_rate": 2.2603366906648916e-05, "loss": 0.0964, "step": 94100 }, { "epoch": 3.4199433098335636, "grad_norm": 0.5890967845916748, "learning_rate": 2.2598201540205294e-05, "loss": 0.0627, "step": 94110 }, { "epoch": 3.4203067083363616, "grad_norm": 0.24214434623718262, "learning_rate": 2.259303627724662e-05, "loss": 0.0614, "step": 94120 }, { "epoch": 3.42067010683916, "grad_norm": 0.6111648678779602, "learning_rate": 2.2587871117995445e-05, "loss": 0.0712, "step": 94130 }, { "epoch": 3.421033505341958, "grad_norm": 3.44565486907959, "learning_rate": 2.2582706062674325e-05, "loss": 0.6448, "step": 94140 }, { "epoch": 3.421396903844756, "grad_norm": 2.6556754112243652, "learning_rate": 2.25775411115058e-05, "loss": 0.0893, "step": 94150 }, { "epoch": 3.4217603023475545, "grad_norm": 1.2639325857162476, "learning_rate": 2.257237626471241e-05, "loss": 0.0819, "step": 94160 }, { "epoch": 3.4221237008503524, "grad_norm": 0.7145587801933289, "learning_rate": 2.2567211522516685e-05, "loss": 0.0752, "step": 94170 }, { "epoch": 3.422487099353151, "grad_norm": 1.2810157537460327, "learning_rate": 2.2562046885141167e-05, "loss": 0.0675, "step": 94180 }, { "epoch": 3.422850497855949, "grad_norm": 0.4140676259994507, "learning_rate": 2.2556882352808367e-05, "loss": 0.0819, "step": 94190 }, { "epoch": 3.423213896358747, "grad_norm": 0.9687098860740662, "learning_rate": 2.2551717925740817e-05, "loss": 0.0944, "step": 94200 }, { "epoch": 3.423213896358747, "eval_loss": 0.31988459825515747, "eval_runtime": 180.0602, "eval_samples_per_second": 41.175, "eval_steps_per_second": 5.148, "eval_wer": 0.1369107048850001, "step": 94200 }, { "epoch": 3.4235772948615453, "grad_norm": 0.8577378392219543, "learning_rate": 2.2546553604161032e-05, "loss": 0.0767, "step": 94210 }, { "epoch": 3.4239406933643433, "grad_norm": 0.5929591655731201, "learning_rate": 2.254138938829152e-05, "loss": 0.0623, "step": 94220 }, { "epoch": 3.4243040918671417, "grad_norm": 0.5392001867294312, "learning_rate": 2.2536225278354787e-05, "loss": 0.0848, "step": 94230 }, { "epoch": 3.4246674903699397, "grad_norm": 0.5557697415351868, "learning_rate": 2.253106127457335e-05, "loss": 0.0784, "step": 94240 }, { "epoch": 3.4250308888727377, "grad_norm": 0.5030058026313782, "learning_rate": 2.2525897377169696e-05, "loss": 0.0786, "step": 94250 }, { "epoch": 3.425394287375536, "grad_norm": 1.1300536394119263, "learning_rate": 2.2520733586366323e-05, "loss": 0.0868, "step": 94260 }, { "epoch": 3.425757685878334, "grad_norm": 0.6011260747909546, "learning_rate": 2.2515569902385714e-05, "loss": 0.0811, "step": 94270 }, { "epoch": 3.4261210843811325, "grad_norm": 0.9162232279777527, "learning_rate": 2.2510406325450357e-05, "loss": 0.0749, "step": 94280 }, { "epoch": 3.4264844828839305, "grad_norm": 1.2079869508743286, "learning_rate": 2.2505242855782737e-05, "loss": 0.0844, "step": 94290 }, { "epoch": 3.4268478813867285, "grad_norm": 0.703209638595581, "learning_rate": 2.2500079493605327e-05, "loss": 0.0921, "step": 94300 }, { "epoch": 3.427211279889527, "grad_norm": 1.7158406972885132, "learning_rate": 2.249491623914059e-05, "loss": 0.0942, "step": 94310 }, { "epoch": 3.427574678392325, "grad_norm": 0.5088964700698853, "learning_rate": 2.248975309261101e-05, "loss": 0.0584, "step": 94320 }, { "epoch": 3.4279380768951233, "grad_norm": 0.3864693343639374, "learning_rate": 2.2484590054239024e-05, "loss": 0.0579, "step": 94330 }, { "epoch": 3.4283014753979213, "grad_norm": 0.4104454517364502, "learning_rate": 2.2479427124247117e-05, "loss": 0.1607, "step": 94340 }, { "epoch": 3.4286648739007197, "grad_norm": 1.0903159379959106, "learning_rate": 2.247426430285772e-05, "loss": 0.0783, "step": 94350 }, { "epoch": 3.4290282724035177, "grad_norm": 0.8514654636383057, "learning_rate": 2.2469101590293284e-05, "loss": 0.065, "step": 94360 }, { "epoch": 3.4293916709063157, "grad_norm": 0.6110685467720032, "learning_rate": 2.246393898677626e-05, "loss": 0.0635, "step": 94370 }, { "epoch": 3.429755069409114, "grad_norm": 0.38304954767227173, "learning_rate": 2.245877649252908e-05, "loss": 0.069, "step": 94380 }, { "epoch": 3.430118467911912, "grad_norm": 1.2440117597579956, "learning_rate": 2.245361410777418e-05, "loss": 0.1056, "step": 94390 }, { "epoch": 3.4304818664147105, "grad_norm": 0.7492786645889282, "learning_rate": 2.2448451832733987e-05, "loss": 0.0809, "step": 94400 }, { "epoch": 3.4308452649175085, "grad_norm": 0.4058247208595276, "learning_rate": 2.244328966763093e-05, "loss": 0.0934, "step": 94410 }, { "epoch": 3.431208663420307, "grad_norm": 1.7672019004821777, "learning_rate": 2.243812761268742e-05, "loss": 0.0696, "step": 94420 }, { "epoch": 3.431572061923105, "grad_norm": 0.4631694555282593, "learning_rate": 2.2432965668125878e-05, "loss": 2.0515, "step": 94430 }, { "epoch": 3.431935460425903, "grad_norm": 1.0094584226608276, "learning_rate": 2.2427803834168716e-05, "loss": 0.0686, "step": 94440 }, { "epoch": 3.4322988589287013, "grad_norm": 0.7469279766082764, "learning_rate": 2.2422642111038328e-05, "loss": 0.0753, "step": 94450 }, { "epoch": 3.4326622574314993, "grad_norm": 0.5456721186637878, "learning_rate": 2.2417480498957126e-05, "loss": 0.1019, "step": 94460 }, { "epoch": 3.4330256559342978, "grad_norm": 2.7943344116210938, "learning_rate": 2.2412318998147492e-05, "loss": 0.0705, "step": 94470 }, { "epoch": 3.4333890544370957, "grad_norm": 5.648090839385986, "learning_rate": 2.2407157608831836e-05, "loss": 0.0853, "step": 94480 }, { "epoch": 3.4337524529398937, "grad_norm": 0.7561296224594116, "learning_rate": 2.2401996331232528e-05, "loss": 0.0796, "step": 94490 }, { "epoch": 3.434115851442692, "grad_norm": 1.078397274017334, "learning_rate": 2.2396835165571954e-05, "loss": 0.0706, "step": 94500 }, { "epoch": 3.43447924994549, "grad_norm": 0.31419476866722107, "learning_rate": 2.2391674112072498e-05, "loss": 0.0871, "step": 94510 }, { "epoch": 3.4348426484482886, "grad_norm": 0.4185982644557953, "learning_rate": 2.2386513170956513e-05, "loss": 0.0643, "step": 94520 }, { "epoch": 3.4352060469510866, "grad_norm": 0.8538812398910522, "learning_rate": 2.2381352342446385e-05, "loss": 0.0627, "step": 94530 }, { "epoch": 3.4355694454538845, "grad_norm": 0.42258143424987793, "learning_rate": 2.2376191626764462e-05, "loss": 0.0774, "step": 94540 }, { "epoch": 3.435932843956683, "grad_norm": 0.7472050786018372, "learning_rate": 2.237103102413311e-05, "loss": 0.0717, "step": 94550 }, { "epoch": 3.436296242459481, "grad_norm": 1.003833293914795, "learning_rate": 2.2365870534774678e-05, "loss": 0.1762, "step": 94560 }, { "epoch": 3.4366596409622794, "grad_norm": 3.8594932556152344, "learning_rate": 2.2360710158911507e-05, "loss": 0.0672, "step": 94570 }, { "epoch": 3.4370230394650774, "grad_norm": 2.909346103668213, "learning_rate": 2.235554989676595e-05, "loss": 0.0676, "step": 94580 }, { "epoch": 3.4373864379678754, "grad_norm": 0.6960200667381287, "learning_rate": 2.235038974856033e-05, "loss": 0.0823, "step": 94590 }, { "epoch": 3.437749836470674, "grad_norm": 0.5080627202987671, "learning_rate": 2.2345229714516998e-05, "loss": 0.1036, "step": 94600 }, { "epoch": 3.4381132349734718, "grad_norm": 0.4168925881385803, "learning_rate": 2.2340069794858267e-05, "loss": 0.0925, "step": 94610 }, { "epoch": 3.43847663347627, "grad_norm": 0.32866325974464417, "learning_rate": 2.233490998980647e-05, "loss": 0.0729, "step": 94620 }, { "epoch": 3.438840031979068, "grad_norm": 0.5904275178909302, "learning_rate": 2.2329750299583913e-05, "loss": 0.2434, "step": 94630 }, { "epoch": 3.4392034304818666, "grad_norm": 0.8439253568649292, "learning_rate": 2.232459072441292e-05, "loss": 0.0738, "step": 94640 }, { "epoch": 3.4395668289846646, "grad_norm": 2.668860912322998, "learning_rate": 2.2319431264515792e-05, "loss": 0.0934, "step": 94650 }, { "epoch": 3.4399302274874626, "grad_norm": 0.3184053301811218, "learning_rate": 2.231427192011483e-05, "loss": 0.0986, "step": 94660 }, { "epoch": 3.440293625990261, "grad_norm": 1.6480865478515625, "learning_rate": 2.2309112691432337e-05, "loss": 3.1534, "step": 94670 }, { "epoch": 3.440657024493059, "grad_norm": 1.2210397720336914, "learning_rate": 2.2303953578690602e-05, "loss": 0.0899, "step": 94680 }, { "epoch": 3.4410204229958574, "grad_norm": 0.8659685254096985, "learning_rate": 2.2298794582111922e-05, "loss": 0.0755, "step": 94690 }, { "epoch": 3.4413838214986554, "grad_norm": 1.4027395248413086, "learning_rate": 2.229363570191857e-05, "loss": 0.0773, "step": 94700 }, { "epoch": 3.441747220001454, "grad_norm": 0.47845137119293213, "learning_rate": 2.228847693833282e-05, "loss": 0.0872, "step": 94710 }, { "epoch": 3.442110618504252, "grad_norm": 1.3525196313858032, "learning_rate": 2.228331829157695e-05, "loss": 0.0653, "step": 94720 }, { "epoch": 3.44247401700705, "grad_norm": 1.1353908777236938, "learning_rate": 2.2278159761873235e-05, "loss": 0.0758, "step": 94730 }, { "epoch": 3.4428374155098482, "grad_norm": 1.6163307428359985, "learning_rate": 2.2273001349443935e-05, "loss": 0.8463, "step": 94740 }, { "epoch": 3.443200814012646, "grad_norm": 0.5957239866256714, "learning_rate": 2.22678430545113e-05, "loss": 0.0654, "step": 94750 }, { "epoch": 3.4435642125154446, "grad_norm": 1.3498693704605103, "learning_rate": 2.2262684877297586e-05, "loss": 0.0716, "step": 94760 }, { "epoch": 3.4439276110182426, "grad_norm": 4.1495208740234375, "learning_rate": 2.2257526818025036e-05, "loss": 0.0578, "step": 94770 }, { "epoch": 3.4442910095210406, "grad_norm": 0.5842284560203552, "learning_rate": 2.2252368876915903e-05, "loss": 0.0851, "step": 94780 }, { "epoch": 3.444654408023839, "grad_norm": 0.6782490015029907, "learning_rate": 2.2247211054192425e-05, "loss": 0.0831, "step": 94790 }, { "epoch": 3.445017806526637, "grad_norm": 1.1757545471191406, "learning_rate": 2.224205335007682e-05, "loss": 0.0995, "step": 94800 }, { "epoch": 3.445017806526637, "eval_loss": 0.31772205233573914, "eval_runtime": 178.9226, "eval_samples_per_second": 41.437, "eval_steps_per_second": 5.181, "eval_wer": 0.1385807903861165, "step": 94800 }, { "epoch": 3.4453812050294355, "grad_norm": 2.117018461227417, "learning_rate": 2.223689576479132e-05, "loss": 0.0569, "step": 94810 }, { "epoch": 3.4457446035322334, "grad_norm": 0.7748499512672424, "learning_rate": 2.2231738298558158e-05, "loss": 0.0602, "step": 94820 }, { "epoch": 3.4461080020350314, "grad_norm": 0.43736356496810913, "learning_rate": 2.2226580951599544e-05, "loss": 0.0952, "step": 94830 }, { "epoch": 3.44647140053783, "grad_norm": 0.4574269950389862, "learning_rate": 2.222142372413769e-05, "loss": 0.07, "step": 94840 }, { "epoch": 3.446834799040628, "grad_norm": 0.883139431476593, "learning_rate": 2.2216266616394793e-05, "loss": 0.089, "step": 94850 }, { "epoch": 3.4471981975434263, "grad_norm": 0.8173096179962158, "learning_rate": 2.2211109628593067e-05, "loss": 0.1002, "step": 94860 }, { "epoch": 3.4475615960462243, "grad_norm": 0.5318263173103333, "learning_rate": 2.2205952760954704e-05, "loss": 0.0586, "step": 94870 }, { "epoch": 3.4479249945490222, "grad_norm": 0.18389153480529785, "learning_rate": 2.2200796013701898e-05, "loss": 0.0589, "step": 94880 }, { "epoch": 3.4482883930518207, "grad_norm": 0.47492220997810364, "learning_rate": 2.2195639387056833e-05, "loss": 0.6377, "step": 94890 }, { "epoch": 3.4486517915546187, "grad_norm": 0.3797650635242462, "learning_rate": 2.219048288124168e-05, "loss": 0.086, "step": 94900 }, { "epoch": 3.449015190057417, "grad_norm": 1.8727638721466064, "learning_rate": 2.218532649647863e-05, "loss": 0.0876, "step": 94910 }, { "epoch": 3.449378588560215, "grad_norm": 1.5461180210113525, "learning_rate": 2.218017023298985e-05, "loss": 0.0739, "step": 94920 }, { "epoch": 3.4497419870630135, "grad_norm": 0.7635065317153931, "learning_rate": 2.2175014090997497e-05, "loss": 0.0724, "step": 94930 }, { "epoch": 3.4501053855658115, "grad_norm": 0.9960238337516785, "learning_rate": 2.216985807072374e-05, "loss": 0.1, "step": 94940 }, { "epoch": 3.4504687840686095, "grad_norm": 0.8764038681983948, "learning_rate": 2.2164702172390717e-05, "loss": 0.0761, "step": 94950 }, { "epoch": 3.450832182571408, "grad_norm": 0.46660447120666504, "learning_rate": 2.21595463962206e-05, "loss": 0.0609, "step": 94960 }, { "epoch": 3.451195581074206, "grad_norm": 0.7150638103485107, "learning_rate": 2.215439074243552e-05, "loss": 0.0697, "step": 94970 }, { "epoch": 3.4515589795770043, "grad_norm": 1.3236603736877441, "learning_rate": 2.2149235211257624e-05, "loss": 0.0812, "step": 94980 }, { "epoch": 3.4519223780798023, "grad_norm": 0.8594760894775391, "learning_rate": 2.214407980290903e-05, "loss": 0.0899, "step": 94990 }, { "epoch": 3.4522857765826007, "grad_norm": 0.6767681837081909, "learning_rate": 2.2138924517611874e-05, "loss": 0.0807, "step": 95000 }, { "epoch": 3.4526491750853987, "grad_norm": 0.4284761846065521, "learning_rate": 2.213376935558829e-05, "loss": 0.0719, "step": 95010 }, { "epoch": 3.4530125735881967, "grad_norm": 0.35260239243507385, "learning_rate": 2.2128614317060385e-05, "loss": 0.0685, "step": 95020 }, { "epoch": 3.453375972090995, "grad_norm": 0.7802332043647766, "learning_rate": 2.2123459402250275e-05, "loss": 0.0772, "step": 95030 }, { "epoch": 3.453739370593793, "grad_norm": 0.5842748284339905, "learning_rate": 2.211830461138005e-05, "loss": 0.0744, "step": 95040 }, { "epoch": 3.4541027690965915, "grad_norm": 0.6803625822067261, "learning_rate": 2.2113149944671842e-05, "loss": 2.0802, "step": 95050 }, { "epoch": 3.4544661675993895, "grad_norm": 3.472691774368286, "learning_rate": 2.2107995402347726e-05, "loss": 0.1059, "step": 95060 }, { "epoch": 3.4548295661021875, "grad_norm": 0.8646115660667419, "learning_rate": 2.21028409846298e-05, "loss": 0.0745, "step": 95070 }, { "epoch": 3.455192964604986, "grad_norm": 0.9967368245124817, "learning_rate": 2.2097686691740148e-05, "loss": 0.0703, "step": 95080 }, { "epoch": 3.455556363107784, "grad_norm": 3.0647103786468506, "learning_rate": 2.2092532523900842e-05, "loss": 0.0838, "step": 95090 }, { "epoch": 3.4559197616105823, "grad_norm": 1.3772906064987183, "learning_rate": 2.208737848133397e-05, "loss": 0.1081, "step": 95100 }, { "epoch": 3.4562831601133803, "grad_norm": 0.9632165431976318, "learning_rate": 2.20822245642616e-05, "loss": 0.0736, "step": 95110 }, { "epoch": 3.4566465586161783, "grad_norm": 0.6966424584388733, "learning_rate": 2.207707077290579e-05, "loss": 0.0656, "step": 95120 }, { "epoch": 3.4570099571189767, "grad_norm": 0.45801427960395813, "learning_rate": 2.2071917107488604e-05, "loss": 0.0616, "step": 95130 }, { "epoch": 3.4573733556217747, "grad_norm": 0.4734851121902466, "learning_rate": 2.206676356823208e-05, "loss": 0.0897, "step": 95140 }, { "epoch": 3.457736754124573, "grad_norm": 0.842993438243866, "learning_rate": 2.2061610155358287e-05, "loss": 0.0648, "step": 95150 }, { "epoch": 3.458100152627371, "grad_norm": 4.05435848236084, "learning_rate": 2.2056456869089256e-05, "loss": 0.0896, "step": 95160 }, { "epoch": 3.458463551130169, "grad_norm": 0.8322821855545044, "learning_rate": 2.2051303709647027e-05, "loss": 0.07, "step": 95170 }, { "epoch": 3.4588269496329676, "grad_norm": 0.5541922450065613, "learning_rate": 2.2046150677253618e-05, "loss": 0.0852, "step": 95180 }, { "epoch": 3.4591903481357655, "grad_norm": 0.5772917866706848, "learning_rate": 2.2040997772131077e-05, "loss": 0.0837, "step": 95190 }, { "epoch": 3.459553746638564, "grad_norm": 0.5851882100105286, "learning_rate": 2.2035844994501418e-05, "loss": 0.089, "step": 95200 }, { "epoch": 3.459917145141362, "grad_norm": 1.0712413787841797, "learning_rate": 2.2030692344586647e-05, "loss": 0.1188, "step": 95210 }, { "epoch": 3.4602805436441604, "grad_norm": 0.4828985333442688, "learning_rate": 2.202553982260878e-05, "loss": 0.0737, "step": 95220 }, { "epoch": 3.4606439421469584, "grad_norm": 0.3994056284427643, "learning_rate": 2.2020387428789807e-05, "loss": 0.0601, "step": 95230 }, { "epoch": 3.4610073406497563, "grad_norm": 0.8096089363098145, "learning_rate": 2.201523516335175e-05, "loss": 0.081, "step": 95240 }, { "epoch": 3.4613707391525548, "grad_norm": 1.1060764789581299, "learning_rate": 2.2010083026516588e-05, "loss": 0.0827, "step": 95250 }, { "epoch": 3.4617341376553528, "grad_norm": 0.3895101547241211, "learning_rate": 2.2004931018506313e-05, "loss": 0.0759, "step": 95260 }, { "epoch": 3.462097536158151, "grad_norm": 0.4792887568473816, "learning_rate": 2.1999779139542903e-05, "loss": 0.0839, "step": 95270 }, { "epoch": 3.462460934660949, "grad_norm": 0.8363034129142761, "learning_rate": 2.1994627389848325e-05, "loss": 0.0668, "step": 95280 }, { "epoch": 3.4628243331637476, "grad_norm": 0.4887206256389618, "learning_rate": 2.198947576964457e-05, "loss": 0.0653, "step": 95290 }, { "epoch": 3.4631877316665456, "grad_norm": 4.43966817855835, "learning_rate": 2.198432427915359e-05, "loss": 0.113, "step": 95300 }, { "epoch": 3.4635511301693436, "grad_norm": 0.7550996541976929, "learning_rate": 2.197917291859735e-05, "loss": 0.1061, "step": 95310 }, { "epoch": 3.463914528672142, "grad_norm": 0.64354407787323, "learning_rate": 2.1974021688197797e-05, "loss": 0.0585, "step": 95320 }, { "epoch": 3.46427792717494, "grad_norm": 1.6076833009719849, "learning_rate": 2.1968870588176877e-05, "loss": 0.0663, "step": 95330 }, { "epoch": 3.4646413256777384, "grad_norm": 0.5022009015083313, "learning_rate": 2.1963719618756548e-05, "loss": 0.1249, "step": 95340 }, { "epoch": 3.4650047241805364, "grad_norm": 0.5584505796432495, "learning_rate": 2.1958568780158736e-05, "loss": 0.082, "step": 95350 }, { "epoch": 3.4653681226833344, "grad_norm": 1.0235668420791626, "learning_rate": 2.1953418072605375e-05, "loss": 0.079, "step": 95360 }, { "epoch": 3.465731521186133, "grad_norm": 1.6643403768539429, "learning_rate": 2.194826749631839e-05, "loss": 0.0644, "step": 95370 }, { "epoch": 3.466094919688931, "grad_norm": 0.6343129873275757, "learning_rate": 2.1943117051519688e-05, "loss": 0.0642, "step": 95380 }, { "epoch": 3.4664583181917292, "grad_norm": 0.3787631392478943, "learning_rate": 2.193796673843121e-05, "loss": 0.0764, "step": 95390 }, { "epoch": 3.466821716694527, "grad_norm": 0.6121103763580322, "learning_rate": 2.1932816557274846e-05, "loss": 0.1018, "step": 95400 }, { "epoch": 3.466821716694527, "eval_loss": 0.32295721769332886, "eval_runtime": 179.2759, "eval_samples_per_second": 41.355, "eval_steps_per_second": 5.171, "eval_wer": 0.13823588142392942, "step": 95400 }, { "epoch": 3.467185115197325, "grad_norm": 0.6954236626625061, "learning_rate": 2.1927666508272505e-05, "loss": 0.0636, "step": 95410 }, { "epoch": 3.4675485137001236, "grad_norm": 1.4941720962524414, "learning_rate": 2.1922516591646072e-05, "loss": 0.0725, "step": 95420 }, { "epoch": 3.4679119122029216, "grad_norm": 0.487307071685791, "learning_rate": 2.1917366807617463e-05, "loss": 0.0668, "step": 95430 }, { "epoch": 3.46827531070572, "grad_norm": 0.6700599193572998, "learning_rate": 2.1912217156408547e-05, "loss": 0.0679, "step": 95440 }, { "epoch": 3.468638709208518, "grad_norm": 0.6767346858978271, "learning_rate": 2.1907067638241208e-05, "loss": 0.0813, "step": 95450 }, { "epoch": 3.469002107711316, "grad_norm": 2.5367612838745117, "learning_rate": 2.190191825333732e-05, "loss": 0.1077, "step": 95460 }, { "epoch": 3.4693655062141144, "grad_norm": 0.5402595400810242, "learning_rate": 2.1896769001918742e-05, "loss": 0.073, "step": 95470 }, { "epoch": 3.4697289047169124, "grad_norm": 0.6329225301742554, "learning_rate": 2.1891619884207354e-05, "loss": 0.085, "step": 95480 }, { "epoch": 3.470092303219711, "grad_norm": 0.5831683874130249, "learning_rate": 2.1886470900425008e-05, "loss": 0.0892, "step": 95490 }, { "epoch": 3.470455701722509, "grad_norm": 2.0767431259155273, "learning_rate": 2.188132205079355e-05, "loss": 0.0894, "step": 95500 }, { "epoch": 3.4708191002253073, "grad_norm": 0.6538608074188232, "learning_rate": 2.1876173335534835e-05, "loss": 0.0625, "step": 95510 }, { "epoch": 3.4711824987281052, "grad_norm": 2.3269765377044678, "learning_rate": 2.1871024754870677e-05, "loss": 0.066, "step": 95520 }, { "epoch": 3.4715458972309032, "grad_norm": 0.8417708873748779, "learning_rate": 2.1865876309022947e-05, "loss": 0.0716, "step": 95530 }, { "epoch": 3.4719092957337017, "grad_norm": 0.220072403550148, "learning_rate": 2.186072799821345e-05, "loss": 0.0811, "step": 95540 }, { "epoch": 3.4722726942364996, "grad_norm": 0.7579740285873413, "learning_rate": 2.185557982266402e-05, "loss": 0.0944, "step": 95550 }, { "epoch": 3.472636092739298, "grad_norm": 0.6758162379264832, "learning_rate": 2.1850431782596466e-05, "loss": 0.0807, "step": 95560 }, { "epoch": 3.472999491242096, "grad_norm": 1.1941717863082886, "learning_rate": 2.1845283878232585e-05, "loss": 0.0713, "step": 95570 }, { "epoch": 3.4733628897448945, "grad_norm": 1.0327314138412476, "learning_rate": 2.1840136109794213e-05, "loss": 0.094, "step": 95580 }, { "epoch": 3.4737262882476925, "grad_norm": 0.4671393036842346, "learning_rate": 2.183498847750313e-05, "loss": 0.074, "step": 95590 }, { "epoch": 3.4740896867504905, "grad_norm": 0.4938909113407135, "learning_rate": 2.1829840981581134e-05, "loss": 0.0846, "step": 95600 }, { "epoch": 3.474453085253289, "grad_norm": 1.201877474784851, "learning_rate": 2.182469362225e-05, "loss": 0.0852, "step": 95610 }, { "epoch": 3.474816483756087, "grad_norm": 0.8695741295814514, "learning_rate": 2.181954639973153e-05, "loss": 0.0635, "step": 95620 }, { "epoch": 3.4751798822588853, "grad_norm": 0.5013503432273865, "learning_rate": 2.1814399314247492e-05, "loss": 0.0752, "step": 95630 }, { "epoch": 3.4755432807616833, "grad_norm": 0.45891714096069336, "learning_rate": 2.180925236601965e-05, "loss": 0.0795, "step": 95640 }, { "epoch": 3.4759066792644813, "grad_norm": 0.9783682823181152, "learning_rate": 2.1804105555269772e-05, "loss": 0.0782, "step": 95650 }, { "epoch": 3.4762700777672797, "grad_norm": 0.3729395270347595, "learning_rate": 2.17989588822196e-05, "loss": 0.0787, "step": 95660 }, { "epoch": 3.4766334762700777, "grad_norm": 0.9214646220207214, "learning_rate": 2.1793812347090918e-05, "loss": 0.0526, "step": 95670 }, { "epoch": 3.476996874772876, "grad_norm": 0.4130672216415405, "learning_rate": 2.178866595010545e-05, "loss": 0.0806, "step": 95680 }, { "epoch": 3.477360273275674, "grad_norm": 0.38258814811706543, "learning_rate": 2.178351969148494e-05, "loss": 0.08, "step": 95690 }, { "epoch": 3.477723671778472, "grad_norm": 0.7837095260620117, "learning_rate": 2.1778373571451124e-05, "loss": 0.187, "step": 95700 }, { "epoch": 3.4780870702812705, "grad_norm": 1.1877186298370361, "learning_rate": 2.177322759022572e-05, "loss": 0.0982, "step": 95710 }, { "epoch": 3.4784504687840685, "grad_norm": 0.8703027367591858, "learning_rate": 2.1768081748030463e-05, "loss": 0.066, "step": 95720 }, { "epoch": 3.478813867286867, "grad_norm": 0.47968795895576477, "learning_rate": 2.176293604508707e-05, "loss": 0.0758, "step": 95730 }, { "epoch": 3.479177265789665, "grad_norm": 0.5117019414901733, "learning_rate": 2.175779048161724e-05, "loss": 0.0838, "step": 95740 }, { "epoch": 3.479540664292463, "grad_norm": 0.738413393497467, "learning_rate": 2.1752645057842686e-05, "loss": 0.1082, "step": 95750 }, { "epoch": 3.4799040627952613, "grad_norm": 1.208146333694458, "learning_rate": 2.1747499773985092e-05, "loss": 0.0773, "step": 95760 }, { "epoch": 3.4802674612980593, "grad_norm": 0.4587624967098236, "learning_rate": 2.1742354630266172e-05, "loss": 0.8829, "step": 95770 }, { "epoch": 3.4806308598008577, "grad_norm": 0.5824334025382996, "learning_rate": 2.1737209626907594e-05, "loss": 0.0732, "step": 95780 }, { "epoch": 3.4809942583036557, "grad_norm": 0.649512767791748, "learning_rate": 2.173206476413105e-05, "loss": 0.0883, "step": 95790 }, { "epoch": 3.481357656806454, "grad_norm": 1.6176427602767944, "learning_rate": 2.1726920042158194e-05, "loss": 0.1347, "step": 95800 }, { "epoch": 3.481721055309252, "grad_norm": 0.5751357674598694, "learning_rate": 2.1721775461210718e-05, "loss": 0.0736, "step": 95810 }, { "epoch": 3.48208445381205, "grad_norm": 0.701714813709259, "learning_rate": 2.171663102151028e-05, "loss": 0.0742, "step": 95820 }, { "epoch": 3.4824478523148485, "grad_norm": 0.4845195710659027, "learning_rate": 2.1711486723278522e-05, "loss": 0.0668, "step": 95830 }, { "epoch": 3.4828112508176465, "grad_norm": 0.7643341422080994, "learning_rate": 2.1706342566737105e-05, "loss": 0.0767, "step": 95840 }, { "epoch": 3.483174649320445, "grad_norm": 0.4030478894710541, "learning_rate": 2.170119855210765e-05, "loss": 0.0836, "step": 95850 }, { "epoch": 3.483538047823243, "grad_norm": 0.9113463163375854, "learning_rate": 2.1696054679611828e-05, "loss": 0.1111, "step": 95860 }, { "epoch": 3.4839014463260414, "grad_norm": 0.46975961327552795, "learning_rate": 2.1690910949471255e-05, "loss": 0.0592, "step": 95870 }, { "epoch": 3.4842648448288394, "grad_norm": 0.4419771134853363, "learning_rate": 2.1685767361907554e-05, "loss": 0.0996, "step": 95880 }, { "epoch": 3.4846282433316373, "grad_norm": 0.6320810914039612, "learning_rate": 2.168062391714235e-05, "loss": 0.1194, "step": 95890 }, { "epoch": 3.4849916418344358, "grad_norm": 0.3942996859550476, "learning_rate": 2.1675480615397234e-05, "loss": 0.0825, "step": 95900 }, { "epoch": 3.4853550403372338, "grad_norm": 0.3566824197769165, "learning_rate": 2.167033745689384e-05, "loss": 0.0666, "step": 95910 }, { "epoch": 3.485718438840032, "grad_norm": 0.48564252257347107, "learning_rate": 2.1665194441853765e-05, "loss": 0.0687, "step": 95920 }, { "epoch": 3.48608183734283, "grad_norm": 0.3420655429363251, "learning_rate": 2.166005157049859e-05, "loss": 0.0627, "step": 95930 }, { "epoch": 3.486445235845628, "grad_norm": 0.3921029567718506, "learning_rate": 2.165490884304991e-05, "loss": 0.0621, "step": 95940 }, { "epoch": 3.4868086343484266, "grad_norm": 0.609154224395752, "learning_rate": 2.1649766259729298e-05, "loss": 0.0788, "step": 95950 }, { "epoch": 3.4871720328512246, "grad_norm": 0.4822045564651489, "learning_rate": 2.1644623820758347e-05, "loss": 0.086, "step": 95960 }, { "epoch": 3.487535431354023, "grad_norm": 0.9078019261360168, "learning_rate": 2.1639481526358616e-05, "loss": 0.0632, "step": 95970 }, { "epoch": 3.487898829856821, "grad_norm": 0.43268144130706787, "learning_rate": 2.163433937675167e-05, "loss": 0.704, "step": 95980 }, { "epoch": 3.488262228359619, "grad_norm": 0.4717707633972168, "learning_rate": 2.1629197372159055e-05, "loss": 0.0839, "step": 95990 }, { "epoch": 3.4886256268624174, "grad_norm": 1.3348411321640015, "learning_rate": 2.162405551280234e-05, "loss": 0.0634, "step": 96000 }, { "epoch": 3.4886256268624174, "eval_loss": 0.3231545090675354, "eval_runtime": 179.599, "eval_samples_per_second": 41.281, "eval_steps_per_second": 5.161, "eval_wer": 0.13457803111441902, "step": 96000 }, { "epoch": 3.4889890253652154, "grad_norm": 0.6468439698219299, "learning_rate": 2.1618913798903064e-05, "loss": 0.0726, "step": 96010 }, { "epoch": 3.489352423868014, "grad_norm": 0.6067370772361755, "learning_rate": 2.1613772230682762e-05, "loss": 0.0662, "step": 96020 }, { "epoch": 3.489715822370812, "grad_norm": 4.149559020996094, "learning_rate": 2.1608630808362966e-05, "loss": 0.0685, "step": 96030 }, { "epoch": 3.4900792208736098, "grad_norm": 0.5562649369239807, "learning_rate": 2.1603489532165194e-05, "loss": 0.0805, "step": 96040 }, { "epoch": 3.490442619376408, "grad_norm": 1.4775289297103882, "learning_rate": 2.159834840231098e-05, "loss": 0.0917, "step": 96050 }, { "epoch": 3.490806017879206, "grad_norm": 0.4842393100261688, "learning_rate": 2.1593207419021833e-05, "loss": 0.0956, "step": 96060 }, { "epoch": 3.4911694163820046, "grad_norm": 0.5330924391746521, "learning_rate": 2.1588066582519257e-05, "loss": 1.4342, "step": 96070 }, { "epoch": 3.4915328148848026, "grad_norm": 0.2691187858581543, "learning_rate": 2.158292589302475e-05, "loss": 0.0785, "step": 96080 }, { "epoch": 3.491896213387601, "grad_norm": 0.42408475279808044, "learning_rate": 2.15777853507598e-05, "loss": 0.0903, "step": 96090 }, { "epoch": 3.492259611890399, "grad_norm": 2.1866910457611084, "learning_rate": 2.157264495594591e-05, "loss": 0.0759, "step": 96100 }, { "epoch": 3.492623010393197, "grad_norm": 0.2777433693408966, "learning_rate": 2.1567504708804557e-05, "loss": 0.097, "step": 96110 }, { "epoch": 3.4929864088959954, "grad_norm": 0.4376242756843567, "learning_rate": 2.156236460955721e-05, "loss": 0.0691, "step": 96120 }, { "epoch": 3.4933498073987934, "grad_norm": 0.6688746213912964, "learning_rate": 2.1557224658425347e-05, "loss": 0.1149, "step": 96130 }, { "epoch": 3.493713205901592, "grad_norm": 1.0745078325271606, "learning_rate": 2.155208485563041e-05, "loss": 0.0955, "step": 96140 }, { "epoch": 3.49407660440439, "grad_norm": 2.5494561195373535, "learning_rate": 2.154694520139388e-05, "loss": 0.1092, "step": 96150 }, { "epoch": 3.4944400029071883, "grad_norm": 0.681524932384491, "learning_rate": 2.1541805695937192e-05, "loss": 0.0966, "step": 96160 }, { "epoch": 3.4948034014099862, "grad_norm": 0.8694483637809753, "learning_rate": 2.1536666339481797e-05, "loss": 0.2408, "step": 96170 }, { "epoch": 3.4951667999127842, "grad_norm": 0.36922356486320496, "learning_rate": 2.1531527132249113e-05, "loss": 0.0785, "step": 96180 }, { "epoch": 3.4955301984155827, "grad_norm": 0.3221088945865631, "learning_rate": 2.1526388074460594e-05, "loss": 0.0954, "step": 96190 }, { "epoch": 3.4958935969183806, "grad_norm": 0.868403434753418, "learning_rate": 2.1521249166337658e-05, "loss": 0.086, "step": 96200 }, { "epoch": 3.496256995421179, "grad_norm": 0.6299740076065063, "learning_rate": 2.1516110408101714e-05, "loss": 0.0829, "step": 96210 }, { "epoch": 3.496620393923977, "grad_norm": 0.744305431842804, "learning_rate": 2.1510971799974177e-05, "loss": 0.0669, "step": 96220 }, { "epoch": 3.496983792426775, "grad_norm": 0.9115591049194336, "learning_rate": 2.1505833342176442e-05, "loss": 0.0635, "step": 96230 }, { "epoch": 3.4973471909295735, "grad_norm": 0.4782477617263794, "learning_rate": 2.1500695034929926e-05, "loss": 0.0879, "step": 96240 }, { "epoch": 3.4977105894323715, "grad_norm": 0.6979982852935791, "learning_rate": 2.1495556878456014e-05, "loss": 0.0725, "step": 96250 }, { "epoch": 3.49807398793517, "grad_norm": 0.4281218945980072, "learning_rate": 2.1490932666723033e-05, "loss": 3.0583, "step": 96260 }, { "epoch": 3.498437386437968, "grad_norm": 2.243551254272461, "learning_rate": 2.148579479732697e-05, "loss": 0.0783, "step": 96270 }, { "epoch": 3.498800784940766, "grad_norm": 0.8817722797393799, "learning_rate": 2.1480657079345505e-05, "loss": 0.0782, "step": 96280 }, { "epoch": 3.4991641834435643, "grad_norm": 0.6434484124183655, "learning_rate": 2.1475519513000002e-05, "loss": 0.0924, "step": 96290 }, { "epoch": 3.4995275819463623, "grad_norm": 0.7872079014778137, "learning_rate": 2.1470382098511813e-05, "loss": 0.0852, "step": 96300 }, { "epoch": 3.4998909804491607, "grad_norm": 1.4359321594238281, "learning_rate": 2.1465244836102312e-05, "loss": 0.0759, "step": 96310 }, { "epoch": 3.5002543789519587, "grad_norm": 0.307167112827301, "learning_rate": 2.1460107725992838e-05, "loss": 0.0755, "step": 96320 }, { "epoch": 3.5006177774547567, "grad_norm": 0.6518979072570801, "learning_rate": 2.1454970768404724e-05, "loss": 0.063, "step": 96330 }, { "epoch": 3.500981175957555, "grad_norm": 1.06710684299469, "learning_rate": 2.1449833963559293e-05, "loss": 0.0851, "step": 96340 }, { "epoch": 3.501344574460353, "grad_norm": 0.6126148104667664, "learning_rate": 2.14446973116779e-05, "loss": 0.0888, "step": 96350 }, { "epoch": 3.5017079729631515, "grad_norm": 0.7071236968040466, "learning_rate": 2.1439560812981848e-05, "loss": 0.1182, "step": 96360 }, { "epoch": 3.5020713714659495, "grad_norm": 2.222172737121582, "learning_rate": 2.1434424467692455e-05, "loss": 0.1948, "step": 96370 }, { "epoch": 3.5024347699687475, "grad_norm": 1.0301228761672974, "learning_rate": 2.142928827603102e-05, "loss": 0.0754, "step": 96380 }, { "epoch": 3.502798168471546, "grad_norm": 1.2635794878005981, "learning_rate": 2.142415223821884e-05, "loss": 0.0892, "step": 96390 }, { "epoch": 3.5031615669743443, "grad_norm": 0.6022728681564331, "learning_rate": 2.141901635447723e-05, "loss": 0.0853, "step": 96400 }, { "epoch": 3.5035249654771423, "grad_norm": 1.4563450813293457, "learning_rate": 2.1413880625027462e-05, "loss": 0.1804, "step": 96410 }, { "epoch": 3.5038883639799403, "grad_norm": 0.5189762711524963, "learning_rate": 2.140874505009082e-05, "loss": 0.068, "step": 96420 }, { "epoch": 3.5042517624827387, "grad_norm": 1.153496503829956, "learning_rate": 2.1403609629888578e-05, "loss": 0.1057, "step": 96430 }, { "epoch": 3.5046151609855367, "grad_norm": 0.8835933208465576, "learning_rate": 2.139847436464199e-05, "loss": 0.0967, "step": 96440 }, { "epoch": 3.504978559488335, "grad_norm": 0.9221778512001038, "learning_rate": 2.1393339254572334e-05, "loss": 0.0876, "step": 96450 }, { "epoch": 3.505341957991133, "grad_norm": 0.32772889733314514, "learning_rate": 2.138820429990086e-05, "loss": 0.0628, "step": 96460 }, { "epoch": 3.505705356493931, "grad_norm": 0.482295960187912, "learning_rate": 2.138306950084881e-05, "loss": 0.0768, "step": 96470 }, { "epoch": 3.5060687549967295, "grad_norm": 0.7036164999008179, "learning_rate": 2.1377934857637427e-05, "loss": 0.0804, "step": 96480 }, { "epoch": 3.5064321534995275, "grad_norm": 0.6191168427467346, "learning_rate": 2.137280037048793e-05, "loss": 0.0966, "step": 96490 }, { "epoch": 3.506795552002326, "grad_norm": 1.3983656167984009, "learning_rate": 2.1367666039621575e-05, "loss": 0.0961, "step": 96500 }, { "epoch": 3.507158950505124, "grad_norm": 0.36156126856803894, "learning_rate": 2.1362531865259564e-05, "loss": 0.1172, "step": 96510 }, { "epoch": 3.507522349007922, "grad_norm": 0.7854031324386597, "learning_rate": 2.135739784762311e-05, "loss": 0.0675, "step": 96520 }, { "epoch": 3.5078857475107204, "grad_norm": 1.4320755004882812, "learning_rate": 2.135226398693342e-05, "loss": 0.0677, "step": 96530 }, { "epoch": 3.5082491460135183, "grad_norm": 1.0837007761001587, "learning_rate": 2.13471302834117e-05, "loss": 0.064, "step": 96540 }, { "epoch": 3.5086125445163168, "grad_norm": 2.1308250427246094, "learning_rate": 2.134199673727914e-05, "loss": 0.094, "step": 96550 }, { "epoch": 3.5089759430191148, "grad_norm": 0.3453007638454437, "learning_rate": 2.1336863348756927e-05, "loss": 0.1269, "step": 96560 }, { "epoch": 3.5093393415219127, "grad_norm": 0.2881056070327759, "learning_rate": 2.133173011806624e-05, "loss": 0.1384, "step": 96570 }, { "epoch": 3.509702740024711, "grad_norm": 1.0903687477111816, "learning_rate": 2.1326597045428236e-05, "loss": 0.0777, "step": 96580 }, { "epoch": 3.510066138527509, "grad_norm": 0.44503089785575867, "learning_rate": 2.1321464131064105e-05, "loss": 0.0847, "step": 96590 }, { "epoch": 3.5104295370303076, "grad_norm": 0.562317430973053, "learning_rate": 2.1316331375195002e-05, "loss": 0.0654, "step": 96600 }, { "epoch": 3.5104295370303076, "eval_loss": 0.3297887444496155, "eval_runtime": 180.5411, "eval_samples_per_second": 41.065, "eval_steps_per_second": 5.135, "eval_wer": 0.13554922213952475, "step": 96600 }, { "epoch": 3.5107929355331056, "grad_norm": 0.9230484962463379, "learning_rate": 2.131119877804207e-05, "loss": 0.0732, "step": 96610 }, { "epoch": 3.5111563340359035, "grad_norm": 4.310789585113525, "learning_rate": 2.1306066339826457e-05, "loss": 0.1152, "step": 96620 }, { "epoch": 3.511519732538702, "grad_norm": 0.3380495011806488, "learning_rate": 2.1300934060769296e-05, "loss": 0.2172, "step": 96630 }, { "epoch": 3.5118831310415, "grad_norm": 0.524796187877655, "learning_rate": 2.129580194109173e-05, "loss": 0.0933, "step": 96640 }, { "epoch": 3.5122465295442984, "grad_norm": 0.6239128708839417, "learning_rate": 2.1290669981014882e-05, "loss": 0.0738, "step": 96650 }, { "epoch": 3.5126099280470964, "grad_norm": 0.976793646812439, "learning_rate": 2.128553818075987e-05, "loss": 0.0777, "step": 96660 }, { "epoch": 3.5129733265498944, "grad_norm": 1.0099037885665894, "learning_rate": 2.1280406540547794e-05, "loss": 0.0661, "step": 96670 }, { "epoch": 3.513336725052693, "grad_norm": 0.6158185601234436, "learning_rate": 2.127527506059976e-05, "loss": 0.0782, "step": 96680 }, { "epoch": 3.513700123555491, "grad_norm": 0.28984469175338745, "learning_rate": 2.1270143741136884e-05, "loss": 0.0849, "step": 96690 }, { "epoch": 3.514063522058289, "grad_norm": 1.0383470058441162, "learning_rate": 2.126501258238024e-05, "loss": 0.0993, "step": 96700 }, { "epoch": 3.514426920561087, "grad_norm": 0.32447656989097595, "learning_rate": 2.1259881584550912e-05, "loss": 0.0801, "step": 96710 }, { "epoch": 3.5147903190638856, "grad_norm": 0.6273922920227051, "learning_rate": 2.1254750747869972e-05, "loss": 0.0771, "step": 96720 }, { "epoch": 3.5151537175666836, "grad_norm": 0.7431948781013489, "learning_rate": 2.1249620072558508e-05, "loss": 0.09, "step": 96730 }, { "epoch": 3.515517116069482, "grad_norm": 0.4066786468029022, "learning_rate": 2.124448955883757e-05, "loss": 0.0772, "step": 96740 }, { "epoch": 3.51588051457228, "grad_norm": 0.31640565395355225, "learning_rate": 2.1239359206928214e-05, "loss": 0.0722, "step": 96750 }, { "epoch": 3.516243913075078, "grad_norm": 0.4770644009113312, "learning_rate": 2.1234229017051488e-05, "loss": 0.0742, "step": 96760 }, { "epoch": 3.5166073115778764, "grad_norm": 0.25661101937294006, "learning_rate": 2.122909898942843e-05, "loss": 2.1078, "step": 96770 }, { "epoch": 3.5169707100806744, "grad_norm": 0.7563055157661438, "learning_rate": 2.1223969124280086e-05, "loss": 0.0827, "step": 96780 }, { "epoch": 3.517334108583473, "grad_norm": 0.690726637840271, "learning_rate": 2.1218839421827474e-05, "loss": 0.0743, "step": 96790 }, { "epoch": 3.517697507086271, "grad_norm": 1.4046658277511597, "learning_rate": 2.1213709882291623e-05, "loss": 0.0789, "step": 96800 }, { "epoch": 3.518060905589069, "grad_norm": 0.5457620620727539, "learning_rate": 2.1208580505893542e-05, "loss": 0.1015, "step": 96810 }, { "epoch": 3.5184243040918672, "grad_norm": 1.5679082870483398, "learning_rate": 2.1203451292854222e-05, "loss": 0.0695, "step": 96820 }, { "epoch": 3.5187877025946652, "grad_norm": 0.6426934003829956, "learning_rate": 2.119832224339469e-05, "loss": 0.0706, "step": 96830 }, { "epoch": 3.5191511010974637, "grad_norm": 2.433039665222168, "learning_rate": 2.119319335773593e-05, "loss": 0.1022, "step": 96840 }, { "epoch": 3.5195144996002616, "grad_norm": 1.6734727621078491, "learning_rate": 2.118806463609892e-05, "loss": 0.0889, "step": 96850 }, { "epoch": 3.5198778981030596, "grad_norm": 0.5065173506736755, "learning_rate": 2.1182936078704636e-05, "loss": 0.1207, "step": 96860 }, { "epoch": 3.520241296605858, "grad_norm": 0.8844788670539856, "learning_rate": 2.117780768577406e-05, "loss": 0.0761, "step": 96870 }, { "epoch": 3.520604695108656, "grad_norm": 0.48743927478790283, "learning_rate": 2.117267945752815e-05, "loss": 0.0719, "step": 96880 }, { "epoch": 3.5209680936114545, "grad_norm": 0.8191256523132324, "learning_rate": 2.116755139418787e-05, "loss": 0.0796, "step": 96890 }, { "epoch": 3.5213314921142524, "grad_norm": 0.6994876265525818, "learning_rate": 2.116242349597416e-05, "loss": 0.0961, "step": 96900 }, { "epoch": 3.5216948906170504, "grad_norm": 0.2623302638530731, "learning_rate": 2.115729576310796e-05, "loss": 0.0902, "step": 96910 }, { "epoch": 3.522058289119849, "grad_norm": 0.44012176990509033, "learning_rate": 2.1152168195810222e-05, "loss": 0.0695, "step": 96920 }, { "epoch": 3.522421687622647, "grad_norm": 0.39194273948669434, "learning_rate": 2.114704079430187e-05, "loss": 0.0626, "step": 96930 }, { "epoch": 3.5227850861254453, "grad_norm": 2.427734851837158, "learning_rate": 2.1141913558803818e-05, "loss": 0.084, "step": 96940 }, { "epoch": 3.5231484846282433, "grad_norm": 0.9978201985359192, "learning_rate": 2.1136786489536975e-05, "loss": 0.1378, "step": 96950 }, { "epoch": 3.5235118831310412, "grad_norm": 0.9334393739700317, "learning_rate": 2.113165958672226e-05, "loss": 0.0942, "step": 96960 }, { "epoch": 3.5238752816338397, "grad_norm": 0.5911192893981934, "learning_rate": 2.1126532850580578e-05, "loss": 0.0585, "step": 96970 }, { "epoch": 3.524238680136638, "grad_norm": 0.35171112418174744, "learning_rate": 2.112140628133281e-05, "loss": 0.0747, "step": 96980 }, { "epoch": 3.524602078639436, "grad_norm": 0.4741590917110443, "learning_rate": 2.1116279879199846e-05, "loss": 0.1048, "step": 96990 }, { "epoch": 3.524965477142234, "grad_norm": 4.185072422027588, "learning_rate": 2.1111153644402558e-05, "loss": 0.095, "step": 97000 }, { "epoch": 3.5253288756450325, "grad_norm": 0.4775453209877014, "learning_rate": 2.110602757716182e-05, "loss": 0.0866, "step": 97010 }, { "epoch": 3.5256922741478305, "grad_norm": 4.56588077545166, "learning_rate": 2.110090167769851e-05, "loss": 0.0696, "step": 97020 }, { "epoch": 3.526055672650629, "grad_norm": 0.5117329359054565, "learning_rate": 2.109577594623347e-05, "loss": 0.0655, "step": 97030 }, { "epoch": 3.526419071153427, "grad_norm": 0.45523038506507874, "learning_rate": 2.109065038298755e-05, "loss": 0.0838, "step": 97040 }, { "epoch": 3.526782469656225, "grad_norm": 0.7840531468391418, "learning_rate": 2.10855249881816e-05, "loss": 0.0915, "step": 97050 }, { "epoch": 3.5271458681590233, "grad_norm": 0.42048537731170654, "learning_rate": 2.1080399762036447e-05, "loss": 0.0882, "step": 97060 }, { "epoch": 3.5275092666618213, "grad_norm": 0.36128684878349304, "learning_rate": 2.1075274704772924e-05, "loss": 0.0609, "step": 97070 }, { "epoch": 3.5278726651646197, "grad_norm": 0.4942743182182312, "learning_rate": 2.107014981661185e-05, "loss": 0.0883, "step": 97080 }, { "epoch": 3.5282360636674177, "grad_norm": 0.442184716463089, "learning_rate": 2.1065025097774034e-05, "loss": 0.1055, "step": 97090 }, { "epoch": 3.5285994621702157, "grad_norm": 0.8446380496025085, "learning_rate": 2.105990054848029e-05, "loss": 0.069, "step": 97100 }, { "epoch": 3.528962860673014, "grad_norm": 0.5443778038024902, "learning_rate": 2.1054776168951412e-05, "loss": 0.0868, "step": 97110 }, { "epoch": 3.529326259175812, "grad_norm": 0.6392617225646973, "learning_rate": 2.1049651959408194e-05, "loss": 0.0639, "step": 97120 }, { "epoch": 3.5296896576786105, "grad_norm": 0.7572323083877563, "learning_rate": 2.104452792007141e-05, "loss": 0.0894, "step": 97130 }, { "epoch": 3.5300530561814085, "grad_norm": 1.0398154258728027, "learning_rate": 2.1039404051161852e-05, "loss": 0.0756, "step": 97140 }, { "epoch": 3.5304164546842065, "grad_norm": 1.265731930732727, "learning_rate": 2.1034280352900277e-05, "loss": 0.0822, "step": 97150 }, { "epoch": 3.530779853187005, "grad_norm": 0.5231419205665588, "learning_rate": 2.1029156825507453e-05, "loss": 0.0723, "step": 97160 }, { "epoch": 3.531143251689803, "grad_norm": 0.6905182600021362, "learning_rate": 2.1024033469204134e-05, "loss": 0.0611, "step": 97170 }, { "epoch": 3.5315066501926013, "grad_norm": 0.5370905995368958, "learning_rate": 2.1018910284211067e-05, "loss": 0.0803, "step": 97180 }, { "epoch": 3.5318700486953993, "grad_norm": 0.41777387261390686, "learning_rate": 2.1013787270748992e-05, "loss": 0.0868, "step": 97190 }, { "epoch": 3.5322334471981973, "grad_norm": 0.6612346172332764, "learning_rate": 2.1008664429038633e-05, "loss": 0.0969, "step": 97200 }, { "epoch": 3.5322334471981973, "eval_loss": 0.33734017610549927, "eval_runtime": 179.3542, "eval_samples_per_second": 41.337, "eval_steps_per_second": 5.169, "eval_wer": 0.13551291593297873, "step": 97200 } ], "logging_steps": 10, "max_steps": 165108, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.181422551839662e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }